diff --git a/patches/gcc10.1/gcc-Improve-initialization-of-objects-when-the-initializ.patch b/patches/gcc10.1/gcc-Improve-initialization-of-objects-when-the-initializ.patch new file mode 100644 index 0000000..00fdb45 --- /dev/null +++ b/patches/gcc10.1/gcc-Improve-initialization-of-objects-when-the-initializ.patch @@ -0,0 +1,39 @@ +From a2cde0c6443c440c2a2b72b5eea060229a0cff57 Mon Sep 17 00:00:00 2001 +From: Jeff Law +Date: Sat, 9 Jul 2022 11:11:00 -0400 +Subject: [PATCH] [RFA] Improve initialization of objects when the initializer + +gcc/ + + * expr.c (store_expr): Identify trailing NULs in a STRING_CST + initializer and use clear_storage rather than copying the + NULs to the destination array. +--- + gcc/expr.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/gcc/expr.c b/gcc/expr.c +index 991b26f33..6ff393462 100644 +--- a/gcc/expr.c ++++ b/gcc/expr.c +@@ -5723,6 +5723,17 @@ store_expr (tree exp, rtx target, int call_param_p, + } + + str_copy_len = TREE_STRING_LENGTH (str); ++ ++ /* Trailing NUL bytes in EXP will be handled by the call to ++ clear_storage, which is more efficient than copying them from ++ the STRING_CST, so trim those from STR_COPY_LEN. */ ++ while (str_copy_len) ++ { ++ if (TREE_STRING_POINTER (str)[str_copy_len - 1]) ++ break; ++ str_copy_len--; ++ } ++ + if ((STORE_MAX_PIECES & (STORE_MAX_PIECES - 1)) == 0) + { + str_copy_len += STORE_MAX_PIECES - 1; +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0006-Fix-non-robust-split-condition-in-define_insn.patch b/patches/gcc10.1/gcc-xtensa-0006-Fix-non-robust-split-condition-in-define_insn.patch new file mode 100644 index 0000000..4c5418f --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0006-Fix-non-robust-split-condition-in-define_insn.patch @@ -0,0 +1,44 @@ +From 2065a3fccb11e28ebcc42aa46c52a40b0fae9bea Mon Sep 17 00:00:00 2001 +From: Kewen Lin +Date: Sun, 21 Nov 2021 20:18:31 -0600 +Subject: [PATCH 01/31] xtensa: Fix non-robust split condition in + define_insn_and_split + +This patch is to fix some non-robust split conditions in some +define_insn_and_splits, to make each of them applied on top of +the corresponding condition for define_insn part, otherwise the +splitting could perform unexpectedly. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (movdi_internal, movdf_internal): Fix split + condition. +--- + gcc/config/xtensa/xtensa.md | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2a8e59ee9..123916957 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -782,7 +782,7 @@ + "register_operand (operands[0], DImode) + || register_operand (operands[1], DImode)" + "#" +- "reload_completed" ++ "&& reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))] + { +@@ -1058,7 +1058,7 @@ + "register_operand (operands[0], DFmode) + || register_operand (operands[1], DFmode)" + "#" +- "reload_completed" ++ "&& reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))] + { +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch b/patches/gcc10.1/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch deleted file mode 100644 index 336b961..0000000 --- a/patches/gcc10.1/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch +++ /dev/null @@ -1,29 +0,0 @@ -From f1568d0597ffd3027eebefc2cf31646ab5d5ca19 Mon Sep 17 00:00:00 2001 -From: Takayuki 'January June' Suwa -Date: Sun, 19 Dec 2021 22:44:03 +0900 -Subject: [PATCH] gcc: xtensa: make trying to replace 'l32r' with 'movi' + - 'slli' regardless of optimizing for size or not, because 'l32r' is much - slower than the latter on ESP8266 - ---- - gcc/config/xtensa/xtensa.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c -index 37c6ac1fd..6cd9d5528 100644 ---- a/gcc/config/xtensa/xtensa.c -+++ b/gcc/config/xtensa/xtensa.c -@@ -1074,8 +1074,8 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) - { - /* Try to emit MOVI + SLLI sequence, that is smaller - than L32R + literal. */ -- if (optimize_size && mode == SImode && CONST_INT_P (src) -- && register_operand (dst, mode)) -+ if (optimize >= 1 && ! optimize_debug && mode == SImode -+ && CONST_INT_P (src) && register_operand (dst, mode)) - { - HOST_WIDE_INT srcval = INTVAL (src); - int shift = ctz_hwi (srcval); --- -2.20.1 - diff --git a/patches/gcc10.1/gcc-xtensa-0007-Backport-patches-from-upstream-master.patch b/patches/gcc10.1/gcc-xtensa-0007-Backport-patches-from-upstream-master.patch deleted file mode 100644 index eb06969..0000000 --- a/patches/gcc10.1/gcc-xtensa-0007-Backport-patches-from-upstream-master.patch +++ /dev/null @@ -1,3186 +0,0 @@ -From 989fc2c516206d7cf70177a416815f91998e2131 Mon Sep 17 00:00:00 2001 -From: Takayuki 'January June' Suwa -Date: Fri, 27 May 2022 21:34:37 +0900 -Subject: [PATCH 1/3] xtensa: Backport patches from upstream/master - -2b5b8610e985e23a0c2e0272339ab074a750e240 "xtensa: Fix non-robust split condition in define_insn_and_split" -7e5baa7e6f4caced6bdaef6d866d19e7656d8a16 "xtensa: fix -Wformat-diag warnings." -d543bac1631700f0da30d5ca555296f4938a82c6 "xtensa: Rename deprecated extv/extzv insn patterns to extvsi/extzvsi" -112447f8564c0307c5da99a4094a3a99f204239f "xtensa: Reflect the 32-bit Integer Divide Option" -b753405a5f0d45eea97f4cc7df2c2089401b08bf "xtensa: Simplify EXTUI instruction maskimm validations" -9b251fe2e39a49c0d3ecd34cf8c5d55544efd159 "xtensa: Make use of IN_RANGE macro where appropriate" -3397563ad6c8fc5d9675faf507e52dd2ed284202 "xtensa: Fix instruction counting regarding block move expansion" -6454b4a8f5d90dd355c3c7e31a592a439223b645 "xtensa: Add setmemsi insn pattern" -9aad2b22436d5346fa224e5c14439dcef36cf3dd "xtensa: Improve bswap[sd]i2 insn patterns" -e94c6dbfb57a862dd8a8685eabc4886ad1aaea25 "xtensa: fix PR target/105879" -2fcc69d8ce4eddf6dea878a5383254d366e1bb14 "xtensa: Implement bswaphi2 insn pattern" -9777d446e2148ef9a6e9f35db3f4eab99ee8812c "xtensa: Make one_cmplsi2 optimizer-friendly" -e44e7face13f38f9b228e2619786ba0add9ef77b "xtensa: Optimize '(~x & y)' to '((x & y) ^ y)'" -29dc90a580bf45f503ed89eb1dc63b5676db776b "xtensa: Add clrsbsi2 insn pattern" -9489a1ab05ad1bda7126da5513f08282da3e531d "xtensa: Tweak some widen multiplications" -fddf0e1057fe24eff0d894fbc2959b4086464a96 "xtensa: Consider the Loop Option when setmemsi is expanded to small loop" -ccd02e734e0f1742629403b46e5b1c650b00fd65 "xtensa: Improve instruction cost estimation and suggestion" -cd02f15f1aecc45b2c2feae16840503549508619 "xtensa: Improve constant synthesis for both integer and floating-point" -1c68ec1f8ab531fba56cccf549ffe592bf622821 "xtensa: Improve shift operations more" -e1b193c1cce3a975a9ed60dd0f30182fe0255d7c "xtensa: Simplify conditional branch/move insn patterns" -70ce04ca353bb0cda8321b91a77c2477e26d339b "xtensa: Make use of BALL/BNALL instructions" -077438933cf94f00cc5edf974338c11ba4bf7a39 "xtensa: Optimize bitwise AND operation with some specific forms of constants" -96518f714e3fab53a966a05b8d48011e27c1a718 "xtensa: Document new -mextra-l32r-costs= Xtensa-specific option" -43b0c56fda4bc990e8ee8d6a0b376de7b663bb06 "xtensa: Add support for sibling call optimization" -c95e307e3a978166cd5d6817ec9d8293825ff3fb "xtensa: Add some dedicated patterns that correspond to GIMPLE canonicalizations" -cfad4856fa46abc878934a9433d0bfc2482ccf00 "xtensa: Eliminate unwanted reg-reg moves during DFmode input reloads" -ce3867d414bd7d9e5b6fb2a51b1fb3d9e9e1eae9 "xtensa: Eliminate [DS]Cmode hard register clobber that is immediately followed by whole overwrite the register" -479b6f449ee999501ad6eff0b7db8d0cd5b2d28d "xtensa: Defer storing integer constants into litpool until reload" ---- - gcc/config/xtensa/constraints.md | 10 +- - gcc/config/xtensa/predicates.md | 41 +- - gcc/config/xtensa/xtensa-protos.h | 11 +- - gcc/config/xtensa/xtensa.c | 733 +++++++++--- - gcc/config/xtensa/xtensa.h | 7 +- - gcc/config/xtensa/xtensa.md | 1024 +++++++++++++---- - gcc/config/xtensa/xtensa.opt | 6 +- - gcc/doc/invoke.texi | 11 +- - gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c | 33 + - gcc/testsuite/gcc.target/xtensa/bswap-O1.c | 37 + - gcc/testsuite/gcc.target/xtensa/bswap-O2.c | 37 + - gcc/testsuite/gcc.target/xtensa/bswap-Os.c | 37 + - .../gcc.target/xtensa/check_zero_byte.c | 9 + - .../gcc.target/xtensa/constsynth_2insns.c | 44 + - .../gcc.target/xtensa/constsynth_3insns.c | 24 + - .../gcc.target/xtensa/constsynth_double.c | 11 + - .../gcc.target/xtensa/funnel_shifter.c | 17 + - .../gcc.target/xtensa/one_cmpl_abs.c | 9 + - gcc/testsuite/gcc.target/xtensa/sibcalls.c | 20 + - libgcc/config/xtensa/lib1funcs.S | 23 + - libgcc/config/xtensa/t-xtensa | 2 +- - 21 files changed, 1796 insertions(+), 350 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O1.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O2.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-Os.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/check_zero_byte.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_double.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/funnel_shifter.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/sibcalls.c - -diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md -index 2062c8816..13b3daafc 100644 ---- a/gcc/config/xtensa/constraints.md -+++ b/gcc/config/xtensa/constraints.md -@@ -92,7 +92,7 @@ - "An integer constant in the range @minus{}32-95 for use with MOVI.N - instructions." - (and (match_code "const_int") -- (match_test "ival >= -32 && ival <= 95"))) -+ (match_test "IN_RANGE (ival, -32, 95)"))) - - (define_constraint "N" - "An unsigned 8-bit integer constant shifted left by 8 bits for use -@@ -103,7 +103,7 @@ - (define_constraint "O" - "An integer constant that can be used in ADDI.N instructions." - (and (match_code "const_int") -- (match_test "ival == -1 || (ival >= 1 && ival <= 15)"))) -+ (match_test "ival == -1 || IN_RANGE (ival, 1, 15)"))) - - (define_constraint "P" - "An integer constant that can be used as a mask value in an EXTUI -@@ -113,8 +113,10 @@ - - (define_constraint "Y" - "A constant that can be used in relaxed MOVI instructions." -- (and (match_code "const_int,const_double,const,symbol_ref,label_ref") -- (match_test "TARGET_AUTO_LITPOOLS"))) -+ (ior (and (match_code "const_int,const_double,const,symbol_ref,label_ref") -+ (match_test "TARGET_AUTO_LITPOOLS")) -+ (and (match_code "const_int") -+ (match_test "can_create_pseudo_p ()")))) - - ;; Memory constraints. Do not use define_memory_constraint here. Doing so - ;; causes reload to force some constants into the constant pool, but since -diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md -index eb52b05aa..633cc6264 100644 ---- a/gcc/config/xtensa/predicates.md -+++ b/gcc/config/xtensa/predicates.md -@@ -25,8 +25,7 @@ - - (define_predicate "addsubx_operand" - (and (match_code "const_int") -- (match_test "INTVAL (op) >= 1 -- && INTVAL (op) <= 3"))) -+ (match_test "IN_RANGE (INTVAL (op), 1, 3)"))) - - (define_predicate "arith_operand" - (ior (and (match_code "const_int") -@@ -53,9 +52,19 @@ - (match_test "xtensa_mask_immediate (INTVAL (op))")) - (match_operand 0 "register_operand"))) - -+(define_predicate "shifted_mask_operand" -+ (match_code "const_int") -+{ -+ HOST_WIDE_INT mask = INTVAL (op); -+ int shift = ctz_hwi (mask); -+ -+ return IN_RANGE (shift, 1, 31) -+ && xtensa_mask_immediate ((uint32_t)mask >> shift); -+}) -+ - (define_predicate "extui_fldsz_operand" - (and (match_code "const_int") -- (match_test "xtensa_mask_immediate ((1 << INTVAL (op)) - 1)"))) -+ (match_test "IN_RANGE (INTVAL (op), 1, 16)"))) - - (define_predicate "sext_operand" - (if_then_else (match_test "TARGET_SEXT") -@@ -64,7 +73,7 @@ - - (define_predicate "sext_fldsz_operand" - (and (match_code "const_int") -- (match_test "INTVAL (op) >= 8 && INTVAL (op) <= 23"))) -+ (match_test "IN_RANGE (INTVAL (op), 8, 23)"))) - - (define_predicate "lsbitnum_operand" - (and (match_code "const_int") -@@ -138,8 +147,9 @@ - (match_test "!constantpool_mem_p (op) - || GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0"))) - (ior (and (match_code "const_int") -- (match_test "GET_MODE_CLASS (mode) == MODE_INT -- && xtensa_simm12b (INTVAL (op))")) -+ (match_test "(GET_MODE_CLASS (mode) == MODE_INT -+ && xtensa_simm12b (INTVAL (op))) -+ || can_create_pseudo_p ()")) - (and (match_code "const_int,const_double,const,symbol_ref,label_ref") - (match_test "(TARGET_CONST16 || TARGET_AUTO_LITPOOLS) - && CONSTANT_P (op) -@@ -156,6 +166,19 @@ - (and (match_code "const_int") - (match_test "xtensa_mem_offset (INTVAL (op), SFmode)"))) - -+(define_predicate "reload_operand" -+ (match_code "mem") -+{ -+ const_rtx addr = XEXP (op, 0); -+ if (REG_P (addr)) -+ return REGNO (addr) == A1_REG; -+ if (GET_CODE (addr) == PLUS) -+ return REG_P (XEXP (addr, 0)) -+ && REGNO (XEXP (addr, 0)) == A1_REG -+ && CONST_INT_P (XEXP (addr, 1)); -+ return false; -+}) -+ - (define_predicate "branch_operator" - (match_code "eq,ne,lt,ge")) - -@@ -165,9 +188,15 @@ - (define_predicate "boolean_operator" - (match_code "eq,ne")) - -+(define_predicate "logical_shift_operator" -+ (match_code "ashift,lshiftrt")) -+ - (define_predicate "xtensa_cstoresi_operator" - (match_code "eq,ne,gt,ge,lt,le")) - -+(define_predicate "xtensa_shift_per_byte_operator" -+ (match_code "ashift,ashiftrt,lshiftrt")) -+ - (define_predicate "tls_symbol_operand" - (and (match_code "symbol_ref") - (match_test "SYMBOL_REF_TLS_MODEL (op) != 0"))) -diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h -index 18d803581..75ed3bfb0 100644 ---- a/gcc/config/xtensa/xtensa-protos.h -+++ b/gcc/config/xtensa/xtensa-protos.h -@@ -41,18 +41,23 @@ extern void xtensa_expand_conditional_branch (rtx *, machine_mode); - extern int xtensa_expand_conditional_move (rtx *, int); - extern int xtensa_expand_scc (rtx *, machine_mode); - extern int xtensa_expand_block_move (rtx *); -+extern int xtensa_expand_block_set_unrolled_loop (rtx *); -+extern int xtensa_expand_block_set_small_loop (rtx *); - extern void xtensa_split_operand_pair (rtx *, machine_mode); -+extern int xtensa_constantsynth (rtx, HOST_WIDE_INT); - extern int xtensa_emit_move_sequence (rtx *, machine_mode); - extern rtx xtensa_copy_incoming_a7 (rtx); - extern void xtensa_expand_nonlocal_goto (rtx *); - extern void xtensa_expand_compare_and_swap (rtx, rtx, rtx, rtx); - extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool); - extern void xtensa_emit_loop_end (rtx_insn *, rtx *); --extern char *xtensa_emit_branch (bool, bool, rtx *); --extern char *xtensa_emit_bit_branch (bool, bool, rtx *); -+extern char *xtensa_emit_branch (bool, rtx *); - extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); -+extern void xtensa_prepare_expand_call (int, rtx *); - extern char *xtensa_emit_call (int, rtx *); -+extern char *xtensa_emit_sibcall (int, rtx *); - extern bool xtensa_tls_referenced_p (rtx); -+extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); - - #ifdef TREE_CODE - extern void init_cumulative_args (CUMULATIVE_ARGS *, int); -@@ -70,7 +75,7 @@ extern int xtensa_dbx_register_number (int); - extern long compute_frame_size (poly_int64); - extern bool xtensa_use_return_instruction_p (void); - extern void xtensa_expand_prologue (void); --extern void xtensa_expand_epilogue (void); -+extern void xtensa_expand_epilogue (bool); - extern void order_regs_for_local_alloc (void); - extern enum reg_class xtensa_regno_to_class (int regno); - extern HOST_WIDE_INT xtensa_initial_elimination_offset (int from, int to); -diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c -index 6cd9d5528..5b1aa9b23 100644 ---- a/gcc/config/xtensa/xtensa.c -+++ b/gcc/config/xtensa/xtensa.c -@@ -55,6 +55,7 @@ along with GCC; see the file COPYING3. If not see - #include "dumpfile.h" - #include "hw-doloop.h" - #include "rtl-iter.h" -+#include "insn-attr.h" - - /* This file should be included last. */ - #include "target-def.h" -@@ -117,7 +118,7 @@ const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] = - - static void xtensa_option_override (void); - static enum internal_test map_test_to_internal_test (enum rtx_code); --static rtx gen_int_relational (enum rtx_code, rtx, rtx, int *); -+static rtx gen_int_relational (enum rtx_code, rtx, rtx); - static rtx gen_float_relational (enum rtx_code, rtx, rtx); - static rtx gen_conditional_move (enum rtx_code, machine_mode, rtx, rtx); - static rtx fixup_subreg_mem (rtx); -@@ -134,6 +135,7 @@ static unsigned int xtensa_multibss_section_type_flags (tree, const char *, - static section *xtensa_select_rtx_section (machine_mode, rtx, - unsigned HOST_WIDE_INT); - static bool xtensa_rtx_costs (rtx, machine_mode, int, int, int *, bool); -+static int xtensa_insn_cost (rtx_insn *, bool); - static int xtensa_register_move_cost (machine_mode, reg_class_t, - reg_class_t); - static int xtensa_memory_move_cost (machine_mode, reg_class_t, bool); -@@ -185,6 +187,7 @@ static bool xtensa_modes_tieable_p (machine_mode, machine_mode); - static HOST_WIDE_INT xtensa_constant_alignment (const_tree, HOST_WIDE_INT); - static HOST_WIDE_INT xtensa_starting_frame_offset (void); - static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); -+static bool xtensa_function_ok_for_sibcall (tree, tree); - - - -@@ -208,6 +211,8 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); - #define TARGET_MEMORY_MOVE_COST xtensa_memory_move_cost - #undef TARGET_RTX_COSTS - #define TARGET_RTX_COSTS xtensa_rtx_costs -+#undef TARGET_INSN_COST -+#define TARGET_INSN_COST xtensa_insn_cost - #undef TARGET_ADDRESS_COST - #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 - -@@ -333,6 +338,9 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); - #undef TARGET_HAVE_SPECULATION_SAFE_VALUE - #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed - -+#undef TARGET_FUNCTION_OK_FOR_SIBCALL -+#define TARGET_FUNCTION_OK_FOR_SIBCALL xtensa_function_ok_for_sibcall -+ - struct gcc_target targetm = TARGET_INITIALIZER; - - -@@ -341,42 +349,42 @@ struct gcc_target targetm = TARGET_INITIALIZER; - bool - xtensa_simm8 (HOST_WIDE_INT v) - { -- return v >= -128 && v <= 127; -+ return IN_RANGE (v, -128, 127); - } - - - bool - xtensa_simm8x256 (HOST_WIDE_INT v) - { -- return (v & 255) == 0 && (v >= -32768 && v <= 32512); -+ return (v & 255) == 0 && IN_RANGE (v, -32768, 32512); - } - - - bool - xtensa_simm12b (HOST_WIDE_INT v) - { -- return v >= -2048 && v <= 2047; -+ return IN_RANGE (v, -2048, 2047); - } - - - static bool - xtensa_uimm8 (HOST_WIDE_INT v) - { -- return v >= 0 && v <= 255; -+ return IN_RANGE (v, 0, 255); - } - - - static bool - xtensa_uimm8x2 (HOST_WIDE_INT v) - { -- return (v & 1) == 0 && (v >= 0 && v <= 510); -+ return (v & 1) == 0 && IN_RANGE (v, 0, 510); - } - - - static bool - xtensa_uimm8x4 (HOST_WIDE_INT v) - { -- return (v & 3) == 0 && (v >= 0 && v <= 1020); -+ return (v & 3) == 0 && IN_RANGE (v, 0, 1020); - } - - -@@ -446,19 +454,7 @@ xtensa_b4constu (HOST_WIDE_INT v) - bool - xtensa_mask_immediate (HOST_WIDE_INT v) - { --#define MAX_MASK_SIZE 16 -- int mask_size; -- -- for (mask_size = 1; mask_size <= MAX_MASK_SIZE; mask_size++) -- { -- if ((v & 1) == 0) -- return false; -- v = v >> 1; -- if (v == 0) -- return true; -- } -- -- return false; -+ return IN_RANGE (exact_log2 (v + 1), 1, 16); - } - - -@@ -539,7 +535,7 @@ smalloffset_mem_p (rtx op) - return FALSE; - - val = INTVAL (offset); -- return (val & 3) == 0 && (val >= 0 && val <= 60); -+ return (val & 3) == 0 && IN_RANGE (val, 0, 60); - } - } - return FALSE; -@@ -678,8 +674,7 @@ map_test_to_internal_test (enum rtx_code test_code) - static rtx - gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - rtx cmp0, /* first operand to compare */ -- rtx cmp1, /* second operand to compare */ -- int *p_invert /* whether branch needs to reverse test */) -+ rtx cmp1 /* second operand to compare */) - { - struct cmp_info - { -@@ -711,6 +706,7 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - enum internal_test test; - machine_mode mode; - struct cmp_info *p_info; -+ int invert; - - test = map_test_to_internal_test (test_code); - gcc_assert (test != ITEST_MAX); -@@ -747,9 +743,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - } - - /* See if we need to invert the result. */ -- *p_invert = ((GET_CODE (cmp1) == CONST_INT) -- ? p_info->invert_const -- : p_info->invert_reg); -+ invert = ((GET_CODE (cmp1) == CONST_INT) -+ ? p_info->invert_const -+ : p_info->invert_reg); - - /* Comparison to constants, may involve adding 1 to change a LT into LE. - Comparison between two registers, may involve switching operands. */ -@@ -766,7 +762,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - cmp1 = temp; - } - -- return gen_rtx_fmt_ee (p_info->test_code, VOIDmode, cmp0, cmp1); -+ return gen_rtx_fmt_ee (invert ? reverse_condition (p_info->test_code) -+ : p_info->test_code, -+ VOIDmode, cmp0, cmp1); - } - - -@@ -825,45 +823,33 @@ xtensa_expand_conditional_branch (rtx *operands, machine_mode mode) - enum rtx_code test_code = GET_CODE (operands[0]); - rtx cmp0 = operands[1]; - rtx cmp1 = operands[2]; -- rtx cmp; -- int invert; -- rtx label1, label2; -+ rtx cmp, label; - - switch (mode) - { -+ case E_SFmode: -+ if (TARGET_HARD_FLOAT) -+ { -+ cmp = gen_float_relational (test_code, cmp0, cmp1); -+ break; -+ } -+ /* FALLTHRU */ -+ - case E_DFmode: - default: - fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1)); - - case E_SImode: -- invert = FALSE; -- cmp = gen_int_relational (test_code, cmp0, cmp1, &invert); -- break; -- -- case E_SFmode: -- if (!TARGET_HARD_FLOAT) -- fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, -- cmp0, cmp1)); -- invert = FALSE; -- cmp = gen_float_relational (test_code, cmp0, cmp1); -+ cmp = gen_int_relational (test_code, cmp0, cmp1); - break; - } - - /* Generate the branch. */ -- -- label1 = gen_rtx_LABEL_REF (VOIDmode, operands[3]); -- label2 = pc_rtx; -- -- if (invert) -- { -- label2 = label1; -- label1 = pc_rtx; -- } -- -+ label = gen_rtx_LABEL_REF (VOIDmode, operands[3]); - emit_jump_insn (gen_rtx_SET (pc_rtx, - gen_rtx_IF_THEN_ELSE (VOIDmode, cmp, -- label1, -- label2))); -+ label, -+ pc_rtx))); - } - - -@@ -1035,6 +1021,123 @@ xtensa_split_operand_pair (rtx operands[4], machine_mode mode) - } - - -+/* Try to emit insns to load srcval (that cannot fit into signed 12-bit) -+ into dst with synthesizing a such constant value from a sequence of -+ load-immediate / arithmetic ones, instead of a L32R instruction -+ (plus a constant in litpool). */ -+ -+static void -+xtensa_emit_constantsynth (rtx dst, enum rtx_code code, -+ HOST_WIDE_INT imm0, HOST_WIDE_INT imm1, -+ rtx (*gen_op)(rtx, HOST_WIDE_INT), -+ HOST_WIDE_INT imm2) -+{ -+ gcc_assert (REG_P (dst)); -+ emit_move_insn (dst, GEN_INT (imm0)); -+ emit_move_insn (dst, gen_rtx_fmt_ee (code, SImode, -+ dst, GEN_INT (imm1))); -+ if (gen_op) -+ emit_move_insn (dst, gen_op (dst, imm2)); -+} -+ -+static int -+xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, -+ rtx (*gen_op)(rtx, HOST_WIDE_INT), -+ HOST_WIDE_INT op_imm) -+{ -+ int shift = exact_log2 (srcval + 1); -+ -+ if (IN_RANGE (shift, 1, 31)) -+ { -+ xtensa_emit_constantsynth (dst, LSHIFTRT, -1, 32 - shift, -+ gen_op, op_imm); -+ return 1; -+ } -+ -+ if (IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) -+ { -+ HOST_WIDE_INT imm0, imm1; -+ -+ if (srcval < -32768) -+ imm1 = -32768; -+ else if (srcval > 32512) -+ imm1 = 32512; -+ else -+ imm1 = srcval & ~255; -+ imm0 = srcval - imm1; -+ if (TARGET_DENSITY && imm1 < 32512 && IN_RANGE (imm0, 224, 255)) -+ imm0 -= 256, imm1 += 256; -+ xtensa_emit_constantsynth (dst, PLUS, imm0, imm1, gen_op, op_imm); -+ return 1; -+ } -+ -+ shift = ctz_hwi (srcval); -+ if (xtensa_simm12b (srcval >> shift)) -+ { -+ xtensa_emit_constantsynth (dst, ASHIFT, srcval >> shift, shift, -+ gen_op, op_imm); -+ return 1; -+ } -+ -+ return 0; -+} -+ -+static rtx -+xtensa_constantsynth_rtx_SLLI (rtx reg, HOST_WIDE_INT imm) -+{ -+ return gen_rtx_ASHIFT (SImode, reg, GEN_INT (imm)); -+} -+ -+static rtx -+xtensa_constantsynth_rtx_ADDSUBX (rtx reg, HOST_WIDE_INT imm) -+{ -+ return imm == 7 -+ ? gen_rtx_MINUS (SImode, gen_rtx_ASHIFT (SImode, reg, GEN_INT (3)), -+ reg) -+ : gen_rtx_PLUS (SImode, gen_rtx_ASHIFT (SImode, reg, -+ GEN_INT (floor_log2 (imm - 1))), -+ reg); -+} -+ -+int -+xtensa_constantsynth (rtx dst, HOST_WIDE_INT srcval) -+{ -+ /* No need for synthesizing for what fits into MOVI instruction. */ -+ if (xtensa_simm12b (srcval)) -+ return 0; -+ -+ /* 2-insns substitution. */ -+ if ((optimize_size || (optimize && xtensa_extra_l32r_costs >= 1)) -+ && xtensa_constantsynth_2insn (dst, srcval, NULL, 0)) -+ return 1; -+ -+ /* 3-insns substitution. */ -+ if (optimize > 1 && !optimize_size && xtensa_extra_l32r_costs >= 2) -+ { -+ int shift, divisor; -+ -+ /* 2-insns substitution followed by SLLI. */ -+ shift = ctz_hwi (srcval); -+ if (IN_RANGE (shift, 1, 31) && -+ xtensa_constantsynth_2insn (dst, srcval >> shift, -+ xtensa_constantsynth_rtx_SLLI, -+ shift)) -+ return 1; -+ -+ /* 2-insns substitution followed by ADDX[248] or SUBX8. */ -+ if (TARGET_ADDX) -+ for (divisor = 3; divisor <= 9; divisor += 2) -+ if (srcval % divisor == 0 && -+ xtensa_constantsynth_2insn (dst, srcval / divisor, -+ xtensa_constantsynth_rtx_ADDSUBX, -+ divisor)) -+ return 1; -+ } -+ -+ return 0; -+} -+ -+ - /* Emit insns to move operands[1] into operands[0]. - Return 1 if we have written out everything that needs to be done to - do the move. Otherwise, return 0 and the caller will emit the move -@@ -1070,24 +1173,9 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) - return 1; - } - -- if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16) -+ if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16 -+ && ! (CONST_INT_P (src) && can_create_pseudo_p ())) - { -- /* Try to emit MOVI + SLLI sequence, that is smaller -- than L32R + literal. */ -- if (optimize >= 1 && ! optimize_debug && mode == SImode -- && CONST_INT_P (src) && register_operand (dst, mode)) -- { -- HOST_WIDE_INT srcval = INTVAL (src); -- int shift = ctz_hwi (srcval); -- -- if (xtensa_simm12b (srcval >> shift)) -- { -- emit_move_insn (dst, GEN_INT (srcval >> shift)); -- emit_insn (gen_ashlsi3_internal (dst, dst, GEN_INT (shift))); -- return 1; -- } -- } -- - src = force_const_mem (SImode, src); - operands[1] = src; - } -@@ -1315,7 +1403,7 @@ xtensa_expand_block_move (rtx *operands) - move_ratio = 4; - if (optimize > 2) - move_ratio = LARGEST_MOVE_RATIO; -- num_pieces = (bytes / align) + (bytes % align); /* Close enough anyway. */ -+ num_pieces = (bytes / align) + ((bytes % align + 1) / 2); - if (num_pieces > move_ratio) - return 0; - -@@ -1352,7 +1440,7 @@ xtensa_expand_block_move (rtx *operands) - temp[next] = gen_reg_rtx (mode[next]); - - x = adjust_address (src_mem, mode[next], offset_ld); -- emit_insn (gen_rtx_SET (temp[next], x)); -+ emit_move_insn (temp[next], x); - - offset_ld += next_amount; - bytes -= next_amount; -@@ -1362,9 +1450,9 @@ xtensa_expand_block_move (rtx *operands) - if (active[phase]) - { - active[phase] = false; -- -+ - x = adjust_address (dst_mem, mode[phase], offset_st); -- emit_insn (gen_rtx_SET (x, temp[phase])); -+ emit_move_insn (x, temp[phase]); - - offset_st += amount[phase]; - } -@@ -1375,6 +1463,246 @@ xtensa_expand_block_move (rtx *operands) - } - - -+/* Try to expand a block set operation to a sequence of RTL move -+ instructions. If not optimizing, or if the block size is not a -+ constant, or if the block is too large, or if the value to -+ initialize the block with is not a constant, the expansion -+ fails and GCC falls back to calling memset(). -+ -+ operands[0] is the destination -+ operands[1] is the length -+ operands[2] is the initialization value -+ operands[3] is the alignment */ -+ -+static int -+xtensa_sizeof_MOVI (HOST_WIDE_INT imm) -+{ -+ return (TARGET_DENSITY && IN_RANGE (imm, -32, 95)) ? 2 : 3; -+} -+ -+int -+xtensa_expand_block_set_unrolled_loop (rtx *operands) -+{ -+ rtx dst_mem = operands[0]; -+ HOST_WIDE_INT bytes, value, align; -+ int expand_len, funccall_len; -+ rtx x, reg; -+ int offset; -+ -+ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) -+ return 0; -+ -+ bytes = INTVAL (operands[1]); -+ if (bytes <= 0) -+ return 0; -+ value = (int8_t)INTVAL (operands[2]); -+ align = INTVAL (operands[3]); -+ if (align > MOVE_MAX) -+ align = MOVE_MAX; -+ -+ /* Insn expansion: holding the init value. -+ Either MOV(.N) or L32R w/litpool. */ -+ if (align == 1) -+ expand_len = xtensa_sizeof_MOVI (value); -+ else if (value == 0 || value == -1) -+ expand_len = TARGET_DENSITY ? 2 : 3; -+ else -+ expand_len = 3 + 4; -+ /* Insn expansion: a series of aligned memory stores. -+ Consist of S8I, S16I or S32I(.N). */ -+ expand_len += (bytes / align) * (TARGET_DENSITY -+ && align == 4 ? 2 : 3); -+ /* Insn expansion: the remainder, sub-aligned memory stores. -+ A combination of S8I and S16I as needed. */ -+ expand_len += ((bytes % align + 1) / 2) * 3; -+ -+ /* Function call: preparing two arguments. */ -+ funccall_len = xtensa_sizeof_MOVI (value); -+ funccall_len += xtensa_sizeof_MOVI (bytes); -+ /* Function call: calling memset(). */ -+ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; -+ -+ /* Apply expansion bonus (2x) if optimizing for speed. */ -+ if (optimize > 1 && !optimize_size) -+ funccall_len *= 2; -+ -+ /* Decide whether to expand or not, based on the sum of the length -+ of instructions. */ -+ if (expand_len > funccall_len) -+ return 0; -+ -+ x = XEXP (dst_mem, 0); -+ if (!REG_P (x)) -+ dst_mem = replace_equiv_address (dst_mem, force_reg (Pmode, x)); -+ switch (align) -+ { -+ case 1: -+ break; -+ case 2: -+ value = (int16_t)((uint8_t)value * 0x0101U); -+ break; -+ case 4: -+ value = (int32_t)((uint8_t)value * 0x01010101U); -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ reg = force_reg (SImode, GEN_INT (value)); -+ -+ offset = 0; -+ do -+ { -+ int unit_size = MIN (bytes, align); -+ machine_mode unit_mode = (unit_size >= 4 ? SImode : -+ (unit_size >= 2 ? HImode : -+ QImode)); -+ unit_size = GET_MODE_SIZE (unit_mode); -+ -+ emit_move_insn (adjust_address (dst_mem, unit_mode, offset), -+ unit_mode == SImode ? reg -+ : convert_to_mode (unit_mode, reg, true)); -+ -+ offset += unit_size; -+ bytes -= unit_size; -+ } -+ while (bytes > 0); -+ -+ return 1; -+} -+ -+int -+xtensa_expand_block_set_small_loop (rtx *operands) -+{ -+ HOST_WIDE_INT bytes, value, align, count; -+ int expand_len, funccall_len; -+ rtx x, dst, end, reg; -+ machine_mode unit_mode; -+ rtx_code_label *label; -+ -+ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) -+ return 0; -+ -+ bytes = INTVAL (operands[1]); -+ if (bytes <= 0) -+ return 0; -+ value = (int8_t)INTVAL (operands[2]); -+ align = INTVAL (operands[3]); -+ if (align > MOVE_MAX) -+ align = MOVE_MAX; -+ -+ /* Totally-aligned block only. */ -+ if (bytes % align != 0) -+ return 0; -+ count = bytes / align; -+ -+ /* If the Loop Option (zero-overhead looping) is configured and active, -+ almost no restrictions about the length of the block. */ -+ if (! (TARGET_LOOPS && optimize)) -+ { -+ /* If 4-byte aligned, small loop substitution is almost optimal, -+ thus limited to only offset to the end address for ADDI/ADDMI -+ instruction. */ -+ if (align == 4 -+ && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) -+ return 0; -+ -+ /* If no 4-byte aligned, loop count should be treated as the -+ constraint. */ -+ if (align != 4 -+ && count > ((optimize > 1 && !optimize_size) ? 8 : 15)) -+ return 0; -+ } -+ -+ /* Insn expansion: holding the init value. -+ Either MOV(.N) or L32R w/litpool. */ -+ if (align == 1) -+ expand_len = xtensa_sizeof_MOVI (value); -+ else if (value == 0 || value == -1) -+ expand_len = TARGET_DENSITY ? 2 : 3; -+ else -+ expand_len = 3 + 4; -+ if (TARGET_LOOPS && optimize) /* zero-overhead looping */ -+ { -+ /* Insn translation: Either MOV(.N) or L32R w/litpool for the -+ loop count. */ -+ expand_len += xtensa_simm12b (count) ? xtensa_sizeof_MOVI (count) -+ : 3 + 4; -+ /* Insn translation: LOOP, the zero-overhead looping setup -+ instruction. */ -+ expand_len += 3; -+ /* Insn expansion: the loop body instructions. -+ For store, one of S8I, S16I or S32I(.N). -+ For advance, ADDI(.N). */ -+ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) -+ + (TARGET_DENSITY ? 2 : 3); -+ } -+ else /* NO zero-overhead looping */ -+ { -+ /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ -+ expand_len += bytes > 127 ? 3 -+ : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; -+ /* Insn expansion: the loop body and branch instruction. -+ For store, one of S8I, S16I or S32I(.N). -+ For advance, ADDI(.N). -+ For branch, BNE. */ -+ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) -+ + (TARGET_DENSITY ? 2 : 3) + 3; -+ } -+ -+ /* Function call: preparing two arguments. */ -+ funccall_len = xtensa_sizeof_MOVI (value); -+ funccall_len += xtensa_sizeof_MOVI (bytes); -+ /* Function call: calling memset(). */ -+ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; -+ -+ /* Apply expansion bonus (2x) if optimizing for speed. */ -+ if (optimize > 1 && !optimize_size) -+ funccall_len *= 2; -+ -+ /* Decide whether to expand or not, based on the sum of the length -+ of instructions. */ -+ if (expand_len > funccall_len) -+ return 0; -+ -+ x = XEXP (operands[0], 0); -+ if (!REG_P (x)) -+ x = XEXP (replace_equiv_address (operands[0], force_reg (Pmode, x)), 0); -+ dst = gen_reg_rtx (SImode); -+ emit_move_insn (dst, x); -+ end = gen_reg_rtx (SImode); -+ if (TARGET_LOOPS && optimize) -+ x = force_reg (SImode, operands[1] /* the length */); -+ else -+ x = operands[1]; -+ emit_insn (gen_addsi3 (end, dst, x)); -+ switch (align) -+ { -+ case 1: -+ unit_mode = QImode; -+ break; -+ case 2: -+ value = (int16_t)((uint8_t)value * 0x0101U); -+ unit_mode = HImode; -+ break; -+ case 4: -+ value = (int32_t)((uint8_t)value * 0x01010101U); -+ unit_mode = SImode; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ reg = force_reg (unit_mode, GEN_INT (value)); -+ -+ label = gen_label_rtx (); -+ emit_label (label); -+ emit_move_insn (gen_rtx_MEM (unit_mode, dst), reg); -+ emit_insn (gen_addsi3 (dst, dst, GEN_INT (align))); -+ emit_cmp_and_jump_insns (dst, end, NE, const0_rtx, SImode, true, label); -+ -+ return 1; -+} -+ -+ - void - xtensa_expand_nonlocal_goto (rtx *operands) - { -@@ -1725,21 +2053,20 @@ xtensa_emit_loop_end (rtx_insn *insn, rtx *operands) - - - char * --xtensa_emit_branch (bool inverted, bool immed, rtx *operands) -+xtensa_emit_branch (bool immed, rtx *operands) - { - static char result[64]; -- enum rtx_code code; -+ enum rtx_code code = GET_CODE (operands[3]); - const char *op; - -- code = GET_CODE (operands[3]); - switch (code) - { -- case EQ: op = inverted ? "ne" : "eq"; break; -- case NE: op = inverted ? "eq" : "ne"; break; -- case LT: op = inverted ? "ge" : "lt"; break; -- case GE: op = inverted ? "lt" : "ge"; break; -- case LTU: op = inverted ? "geu" : "ltu"; break; -- case GEU: op = inverted ? "ltu" : "geu"; break; -+ case EQ: op = "eq"; break; -+ case NE: op = "ne"; break; -+ case LT: op = "lt"; break; -+ case GE: op = "ge"; break; -+ case LTU: op = "ltu"; break; -+ case GEU: op = "geu"; break; - default: gcc_unreachable (); - } - -@@ -1758,32 +2085,6 @@ xtensa_emit_branch (bool inverted, bool immed, rtx *operands) - } - - --char * --xtensa_emit_bit_branch (bool inverted, bool immed, rtx *operands) --{ -- static char result[64]; -- const char *op; -- -- switch (GET_CODE (operands[3])) -- { -- case EQ: op = inverted ? "bs" : "bc"; break; -- case NE: op = inverted ? "bc" : "bs"; break; -- default: gcc_unreachable (); -- } -- -- if (immed) -- { -- unsigned bitnum = INTVAL (operands[1]) & 0x1f; -- operands[1] = GEN_INT (bitnum); -- sprintf (result, "b%si\t%%0, %%d1, %%2", op); -- } -- else -- sprintf (result, "b%s\t%%0, %%1, %%2", op); -- -- return result; --} -- -- - char * - xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - { -@@ -1792,12 +2093,14 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - const char *op; - - code = GET_CODE (operands[4]); -+ if (inverted) -+ code = reverse_condition (code); - if (isbool) - { - switch (code) - { -- case EQ: op = inverted ? "t" : "f"; break; -- case NE: op = inverted ? "f" : "t"; break; -+ case EQ: op = "f"; break; -+ case NE: op = "t"; break; - default: gcc_unreachable (); - } - } -@@ -1805,10 +2108,10 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - { - switch (code) - { -- case EQ: op = inverted ? "nez" : "eqz"; break; -- case NE: op = inverted ? "eqz" : "nez"; break; -- case LT: op = inverted ? "gez" : "ltz"; break; -- case GE: op = inverted ? "ltz" : "gez"; break; -+ case EQ: op = "eqz"; break; -+ case NE: op = "nez"; break; -+ case LT: op = "ltz"; break; -+ case GE: op = "gez"; break; - default: gcc_unreachable (); - } - } -@@ -1819,6 +2122,20 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - } - - -+void -+xtensa_prepare_expand_call (int callop, rtx *operands) -+{ -+ rtx addr = XEXP (operands[callop], 0); -+ -+ if (flag_pic && SYMBOL_REF_P (addr) -+ && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) -+ addr = gen_sym_PLT (addr); -+ -+ if (!call_insn_operand (addr, VOIDmode)) -+ XEXP (operands[callop], 0) = copy_to_mode_reg (Pmode, addr); -+} -+ -+ - char * - xtensa_emit_call (int callop, rtx *operands) - { -@@ -1837,6 +2154,24 @@ xtensa_emit_call (int callop, rtx *operands) - } - - -+char * -+xtensa_emit_sibcall (int callop, rtx *operands) -+{ -+ static char result[64]; -+ rtx tgt = operands[callop]; -+ -+ if (GET_CODE (tgt) == CONST_INT) -+ sprintf (result, "j.l\t" HOST_WIDE_INT_PRINT_HEX ", a9", -+ INTVAL (tgt)); -+ else if (register_operand (tgt, VOIDmode)) -+ sprintf (result, "jx\t%%%d", callop); -+ else -+ sprintf (result, "j.l\t%%%d, a9", callop); -+ -+ return result; -+} -+ -+ - bool - xtensa_legitimate_address_p (machine_mode mode, rtx addr, bool strict) - { -@@ -2061,6 +2396,20 @@ xtensa_tls_referenced_p (rtx x) - } - - -+/* Helper function for "*shlrd_..." patterns. */ -+ -+enum rtx_code -+xtensa_shlrd_which_direction (rtx op0, rtx op1) -+{ -+ if (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT) -+ return ASHIFT; /* shld */ -+ if (GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT) -+ return LSHIFTRT; /* shrd */ -+ -+ return UNKNOWN; -+} -+ -+ - /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ - - static bool -@@ -2364,7 +2713,7 @@ static void - printx (FILE *file, signed int val) - { - /* Print a hexadecimal value in a nice way. */ -- if ((val > -0xa) && (val < 0xa)) -+ if (IN_RANGE (val, -9, 9)) - fprintf (file, "%d", val); - else if (val < 0) - fprintf (file, "-0x%x", -val); -@@ -2379,7 +2728,7 @@ void - print_operand (FILE *file, rtx x, int letter) - { - if (!x) -- error ("PRINT_OPERAND null pointer"); -+ error ("% null pointer"); - - switch (letter) - { -@@ -2424,17 +2773,11 @@ print_operand (FILE *file, rtx x, int letter) - case 'K': - if (GET_CODE (x) == CONST_INT) - { -- int num_bits = 0; - unsigned val = INTVAL (x); -- while (val & 1) -- { -- num_bits += 1; -- val = val >> 1; -- } -- if ((val != 0) || (num_bits == 0) || (num_bits > 16)) -+ if (!xtensa_mask_immediate (val)) - fatal_insn ("invalid mask", x); - -- fprintf (file, "%d", num_bits); -+ fprintf (file, "%d", floor_log2 (val + 1)); - } - else - output_operand_lossage ("invalid %%K value"); -@@ -2584,7 +2927,7 @@ void - print_operand_address (FILE *file, rtx addr) - { - if (!addr) -- error ("PRINT_OPERAND_ADDRESS, null pointer"); -+ error ("%, null pointer"); - - switch (GET_CODE (addr)) - { -@@ -2750,7 +3093,7 @@ xtensa_call_save_reg(int regno) - return crtl->profile || !crtl->is_leaf || crtl->calls_eh_return || - df_regs_ever_live_p (regno); - -- if (crtl->calls_eh_return && regno >= 2 && regno < 4) -+ if (crtl->calls_eh_return && IN_RANGE (regno, 2, 3)) - return true; - - return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno); -@@ -2870,7 +3213,7 @@ xtensa_expand_prologue (void) - int callee_save_size = cfun->machine->callee_save_size; - - /* -128 is a limit of single addi instruction. */ -- if (total_size > 0 && total_size <= 128) -+ if (IN_RANGE (total_size, 1, 128)) - { - insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, - GEN_INT (-total_size))); -@@ -2999,7 +3342,7 @@ xtensa_expand_prologue (void) - } - - void --xtensa_expand_epilogue (void) -+xtensa_expand_epilogue (bool sibcall_p) - { - if (!TARGET_WINDOWED_ABI) - { -@@ -3033,10 +3376,13 @@ xtensa_expand_epilogue (void) - if (xtensa_call_save_reg(regno)) - { - rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); -+ rtx reg; - - offset -= UNITS_PER_WORD; -- emit_move_insn (gen_rtx_REG (SImode, regno), -+ emit_move_insn (reg = gen_rtx_REG (SImode, regno), - gen_frame_mem (SImode, x)); -+ if (regno == A0_REG && sibcall_p) -+ emit_use (reg); - } - } - -@@ -3071,7 +3417,8 @@ xtensa_expand_epilogue (void) - EH_RETURN_STACKADJ_RTX)); - } - cfun->machine->epilogue_done = true; -- emit_jump_insn (gen_return ()); -+ if (!sibcall_p) -+ emit_jump_insn (gen_return ()); - } - - bool -@@ -3697,7 +4044,7 @@ xtensa_multibss_section_type_flags (tree decl, const char *name, int reloc) - flags |= SECTION_BSS; /* @nobits */ - else - warning (0, "only uninitialized variables can be placed in a " -- ".bss section"); -+ "%<.bss%> section"); - } - - return flags; -@@ -3750,7 +4097,7 @@ xtensa_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, - static bool - xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - int opno ATTRIBUTE_UNUSED, -- int *total, bool speed ATTRIBUTE_UNUSED) -+ int *total, bool speed) - { - int code = GET_CODE (x); - -@@ -3838,9 +4185,14 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - return true; - - case CLZ: -+ case CLRSB: - *total = COSTS_N_INSNS (TARGET_NSA ? 1 : 50); - return true; - -+ case BSWAP: -+ *total = COSTS_N_INSNS (mode == HImode ? 3 : 5); -+ return true; -+ - case NOT: - *total = COSTS_N_INSNS (mode == DImode ? 3 : 2); - return true; -@@ -3864,13 +4216,16 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - return true; - - case ABS: -+ case NEG: - { - if (mode == SFmode) - *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50); - else if (mode == DFmode) - *total = COSTS_N_INSNS (50); -- else -+ else if (mode == DImode) - *total = COSTS_N_INSNS (4); -+ else -+ *total = COSTS_N_INSNS (1); - return true; - } - -@@ -3886,10 +4241,6 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - return true; - } - -- case NEG: -- *total = COSTS_N_INSNS (mode == DImode ? 4 : 2); -- return true; -- - case MULT: - { - if (mode == SFmode) -@@ -3929,11 +4280,11 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - case UMOD: - { - if (mode == DImode) -- *total = COSTS_N_INSNS (50); -+ *total = COSTS_N_INSNS (speed ? 100 : 50); - else if (TARGET_DIV32) - *total = COSTS_N_INSNS (32); - else -- *total = COSTS_N_INSNS (50); -+ *total = COSTS_N_INSNS (speed ? 100 : 50); - return true; - } - -@@ -3966,6 +4317,98 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - } - } - -+static bool -+xtensa_is_insn_L32R_p(const rtx_insn *insn) -+{ -+ rtx x = PATTERN (insn); -+ -+ if (GET_CODE (x) == SET) -+ { -+ x = XEXP (x, 1); -+ if (GET_CODE (x) == MEM) -+ { -+ x = XEXP (x, 0); -+ return (GET_CODE (x) == SYMBOL_REF || CONST_INT_P (x)) -+ && CONSTANT_POOL_ADDRESS_P (x); -+ } -+ } -+ -+ return false; -+} -+ -+/* Compute a relative costs of RTL insns. This is necessary in order to -+ achieve better RTL insn splitting/combination result. */ -+ -+static int -+xtensa_insn_cost (rtx_insn *insn, bool speed) -+{ -+ if (!(recog_memoized (insn) < 0)) -+ { -+ int len = get_attr_length (insn), n = (len + 2) / 3; -+ -+ if (len == 0) -+ return COSTS_N_INSNS (0); -+ -+ if (speed) /* For speed cost. */ -+ { -+ /* "L32R" may be particular slow (implementation-dependent). */ -+ if (xtensa_is_insn_L32R_p (insn)) -+ return COSTS_N_INSNS (1 + xtensa_extra_l32r_costs); -+ -+ /* Cost based on the pipeline model. */ -+ switch (get_attr_type (insn)) -+ { -+ case TYPE_STORE: -+ case TYPE_MOVE: -+ case TYPE_ARITH: -+ case TYPE_MULTI: -+ case TYPE_NOP: -+ case TYPE_FSTORE: -+ return COSTS_N_INSNS (n); -+ -+ case TYPE_LOAD: -+ return COSTS_N_INSNS (n - 1 + 2); -+ -+ case TYPE_JUMP: -+ case TYPE_CALL: -+ return COSTS_N_INSNS (n - 1 + 3); -+ -+ case TYPE_FCONV: -+ case TYPE_FLOAD: -+ case TYPE_MUL16: -+ case TYPE_MUL32: -+ case TYPE_RSR: -+ return COSTS_N_INSNS (n * 2); -+ -+ case TYPE_FMADD: -+ return COSTS_N_INSNS (n * 4); -+ -+ case TYPE_DIV32: -+ return COSTS_N_INSNS (n * 16); -+ -+ default: -+ break; -+ } -+ } -+ else /* For size cost. */ -+ { -+ /* Cost based on the instruction length. */ -+ if (get_attr_type (insn) != TYPE_UNKNOWN) -+ { -+ /* "L32R" itself plus constant in litpool. */ -+ if (xtensa_is_insn_L32R_p (insn)) -+ return COSTS_N_INSNS (2) + 1; -+ -+ /* Consider ".n" short instructions. */ -+ return COSTS_N_INSNS (n) - (n * 3 - len); -+ } -+ } -+ } -+ -+ /* Fall back. */ -+ return pattern_cost (PATTERN (insn), speed); -+} -+ - /* Worker function for TARGET_RETURN_IN_MEMORY. */ - - static bool -@@ -4491,4 +4934,16 @@ xtensa_asan_shadow_offset (void) - return HOST_WIDE_INT_UC (0x10000000); - } - -+/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */ -+static bool -+xtensa_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, tree exp ATTRIBUTE_UNUSED) -+{ -+ /* Do not allow sibcalls if the Windowed Register Option is -+ configured. */ -+ if (TARGET_WINDOWED_ABI) -+ return false; -+ -+ return true; -+} -+ - #include "gt-xtensa.h" -diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h -index fa86a245e..3e9cbc943 100644 ---- a/gcc/config/xtensa/xtensa.h -+++ b/gcc/config/xtensa/xtensa.h -@@ -74,6 +74,11 @@ along with GCC; see the file COPYING3. If not see - #define HAVE_AS_TLS 0 - #endif - -+/* Define this if the target has no hardware divide instructions. */ -+#if !TARGET_DIV32 -+#define TARGET_HAS_NO_HW_DIVIDE -+#endif -+ - - /* Target CPU builtins. */ - #define TARGET_CPU_CPP_BUILTINS() \ -@@ -488,7 +493,7 @@ enum reg_class - used for this purpose since all function arguments are pushed on - the stack. */ - #define FUNCTION_ARG_REGNO_P(N) \ -- ((N) >= GP_OUTGOING_ARG_FIRST && (N) <= GP_OUTGOING_ARG_LAST) -+ IN_RANGE ((N), GP_OUTGOING_ARG_FIRST, GP_OUTGOING_ARG_LAST) - - /* Record the number of argument words seen so far, along with a flag to - indicate whether these are incoming arguments. (FUNCTION_INCOMING_ARG -diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md -index 2a8e59ee9..124548dfe 100644 ---- a/gcc/config/xtensa/xtensa.md -+++ b/gcc/config/xtensa/xtensa.md -@@ -25,6 +25,7 @@ - (A7_REG 7) - (A8_REG 8) - (A9_REG 9) -+ (A10_REG 10) - - (UNSPEC_NOP 2) - (UNSPEC_PLT 3) -@@ -83,6 +84,13 @@ - ;; the same template. - (define_mode_iterator HQI [HI QI]) - -+;; This code iterator is for *shlrd and its variants. -+(define_code_iterator ior_op [ior plus]) -+ -+;; This mode iterator allows the DC and SC patterns to be defined from -+;; the same template. -+(define_mode_iterator DSC [DC SC]) -+ - - ;; Attributes. - -@@ -98,7 +106,10 @@ - - ;; Describe a user's asm statement. - (define_asm_attributes -- [(set_attr "type" "multi")]) -+ [(set_attr "type" "multi") -+ (set_attr "mode" "none") -+ (set_attr "length" "3")]) ;; Should be the maximum possible length -+ ;; of a single machine instruction. - - - ;; Pipeline model. -@@ -224,20 +235,42 @@ - - ;; Multiplication. - --(define_expand "mulsidi3" -+(define_expand "mulsidi3" - [(set (match_operand:DI 0 "register_operand") -- (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand")) -- (any_extend:DI (match_operand:SI 2 "register_operand"))))] -+ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand")) -+ (sign_extend:DI (match_operand:SI 2 "register_operand"))))] - "TARGET_MUL32_HIGH" - { - rtx temp = gen_reg_rtx (SImode); - emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); -- emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), -- operands[1], operands[2])); -+ emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), -+ operands[1], operands[2])); - emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); - DONE; - }) - -+(define_expand "umulsidi3" -+ [(set (match_operand:DI 0 "register_operand") -+ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand")) -+ (zero_extend:DI (match_operand:SI 2 "register_operand"))))] -+ "" -+{ -+ if (TARGET_MUL32_HIGH) -+ { -+ rtx temp = gen_reg_rtx (SImode); -+ emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); -+ emit_insn (gen_umulsi3_highpart (gen_highpart (SImode, operands[0]), -+ operands[1], operands[2])); -+ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); -+ } -+ else -+ emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "__umulsidi3"), -+ operands[0], LCT_NORMAL, DImode, -+ operands[1], SImode, -+ operands[2], SImode); -+ DONE; -+}) -+ - (define_insn "mulsi3_highpart" - [(set (match_operand:SI 0 "register_operand" "=a") - (truncate:SI -@@ -261,30 +294,16 @@ - (set_attr "mode" "SI") - (set_attr "length" "3")]) - --(define_insn "mulhisi3" -- [(set (match_operand:SI 0 "register_operand" "=C,A") -- (mult:SI (sign_extend:SI -- (match_operand:HI 1 "register_operand" "%r,r")) -- (sign_extend:SI -- (match_operand:HI 2 "register_operand" "r,r"))))] -- "TARGET_MUL16 || TARGET_MAC16" -- "@ -- mul16s\t%0, %1, %2 -- mul.aa.ll\t%1, %2" -- [(set_attr "type" "mul16,mac16") -- (set_attr "mode" "SI") -- (set_attr "length" "3,3")]) -- --(define_insn "umulhisi3" -+(define_insn "mulhisi3" - [(set (match_operand:SI 0 "register_operand" "=C,A") -- (mult:SI (zero_extend:SI -+ (mult:SI (any_extend:SI - (match_operand:HI 1 "register_operand" "%r,r")) -- (zero_extend:SI -+ (any_extend:SI - (match_operand:HI 2 "register_operand" "r,r"))))] - "TARGET_MUL16 || TARGET_MAC16" - "@ -- mul16u\t%0, %1, %2 -- umul.aa.ll\t%1, %2" -+ mul16\t%0, %1, %2 -+ mul.aa.ll\t%1, %2" - [(set_attr "type" "mul16,mac16") - (set_attr "mode" "SI") - (set_attr "length" "3,3")]) -@@ -429,7 +448,17 @@ - (set_attr "length" "3")]) - - --;; Count leading/trailing zeros and find first bit. -+;; Count redundant leading sign bits and leading/trailing zeros, -+;; and find first bit. -+ -+(define_insn "clrsbsi2" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (clrsb:SI (match_operand:SI 1 "register_operand" "r")))] -+ "TARGET_NSA" -+ "nsa\t%0, %1" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "3")]) - - (define_insn "clzsi2" - [(set (match_operand:SI 0 "register_operand" "=a") -@@ -471,23 +500,78 @@ - - ;; Byte swap. - --(define_insn "bswapsi2" -- [(set (match_operand:SI 0 "register_operand" "=&a") -- (bswap:SI (match_operand:SI 1 "register_operand" "r")))] -- "!optimize_size" -- "ssai\t8\;srli\t%0, %1, 16\;src\t%0, %0, %1\;src\t%0, %0, %0\;src\t%0, %1, %0" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "15")]) -+(define_insn "bswaphi2" -+ [(set (match_operand:HI 0 "register_operand" "=a") -+ (bswap:HI (match_operand:HI 1 "register_operand" "r"))) -+ (clobber (match_scratch:HI 2 "=&a"))] -+ "" -+ "extui\t%2, %1, 8, 8\;slli\t%0, %1, 8\;or\t%0, %0, %2" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "HI") -+ (set_attr "length" "9")]) - --(define_insn "bswapdi2" -- [(set (match_operand:DI 0 "register_operand" "=&a") -- (bswap:DI (match_operand:DI 1 "register_operand" "r")))] -- "!optimize_size" -- "ssai\t8\;srli\t%0, %D1, 16\;src\t%0, %0, %D1\;src\t%0, %0, %0\;src\t%0, %D1, %0\;srli\t%D0, %1, 16\;src\t%D0, %D0, %1\;src\t%D0, %D0, %D0\;src\t%D0, %1, %D0" -- [(set_attr "type" "arith") -- (set_attr "mode" "DI") -- (set_attr "length" "27")]) -+(define_expand "bswapsi2" -+ [(set (match_operand:SI 0 "register_operand" "") -+ (bswap:SI (match_operand:SI 1 "register_operand" "")))] -+ "!optimize_debug && optimize > 1" -+{ -+ /* GIMPLE manual byte-swapping recognition is now activated. -+ For both built-in and manual bswaps, emit corresponding library call -+ if optimizing for size, or a series of dedicated machine instructions -+ if otherwise. */ -+ if (optimize_size) -+ emit_library_call_value (optab_libfunc (bswap_optab, SImode), -+ operands[0], LCT_NORMAL, SImode, -+ operands[1], SImode); -+ else -+ emit_insn (gen_bswapsi2_internal (operands[0], operands[1])); -+ DONE; -+}) -+ -+(define_insn "bswapsi2_internal" -+ [(set (match_operand:SI 0 "register_operand" "=a,&a") -+ (bswap:SI (match_operand:SI 1 "register_operand" "0,r"))) -+ (clobber (match_scratch:SI 2 "=&a,X"))] -+ "!optimize_debug && optimize > 1 && !optimize_size" -+{ -+ rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn); -+ const char *init = "ssai\t8\;"; -+ static char result[64]; -+ if (prev_insn && NONJUMP_INSN_P (prev_insn)) -+ { -+ rtx x = PATTERN (prev_insn); -+ if (GET_CODE (x) == PARALLEL && XVECLEN (x, 0) == 2 -+ && GET_CODE (XVECEXP (x, 0, 0)) == SET -+ && GET_CODE (XVECEXP (x, 0, 1)) == CLOBBER) -+ { -+ x = XEXP (XVECEXP (x, 0, 0), 1); -+ if (GET_CODE (x) == BSWAP && GET_MODE (x) == SImode) -+ init = ""; -+ } -+ } -+ sprintf (result, -+ (which_alternative == 0) -+ ? "%s" "srli\t%%2, %%1, 16\;src\t%%2, %%2, %%1\;src\t%%2, %%2, %%2\;src\t%%0, %%1, %%2" -+ : "%s" "srli\t%%0, %%1, 16\;src\t%%0, %%0, %%1\;src\t%%0, %%0, %%0\;src\t%%0, %%1, %%0", -+ init); -+ return result; -+} -+ [(set_attr "type" "arith,arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "15,15")]) -+ -+(define_expand "bswapdi2" -+ [(set (match_operand:DI 0 "register_operand" "") -+ (bswap:DI (match_operand:DI 1 "register_operand" "")))] -+ "!optimize_debug && optimize > 1 && optimize_size" -+{ -+ /* Replace with a single DImode library call. -+ Without this, two SImode library calls are emitted. */ -+ emit_library_call_value (optab_libfunc (bswap_optab, DImode), -+ operands[0], LCT_NORMAL, DImode, -+ operands[1], DImode); -+ DONE; -+}) - - - ;; Negation and one's complement. -@@ -501,16 +585,26 @@ - (set_attr "mode" "SI") - (set_attr "length" "3")]) - --(define_expand "one_cmplsi2" -- [(set (match_operand:SI 0 "register_operand" "") -- (not:SI (match_operand:SI 1 "register_operand" "")))] -+(define_insn_and_split "one_cmplsi2" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (not:SI (match_operand:SI 1 "register_operand" "r")))] - "" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 2) -+ (const_int -1)) -+ (set (match_dup 0) -+ (xor:SI (match_dup 1) -+ (match_dup 2)))] - { -- rtx temp = gen_reg_rtx (SImode); -- emit_insn (gen_movsi (temp, constm1_rtx)); -- emit_insn (gen_xorsi3 (operands[0], temp, operands[1])); -- DONE; --}) -+ operands[2] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 5) -+ (const_int 6)))]) - - (define_insn "negsf2" - [(set (match_operand:SF 0 "register_operand" "=f") -@@ -536,6 +630,103 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,3")]) - -+(define_insn_and_split "*andsi3_bitcmpl" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (not:SI (match_operand:SI 1 "register_operand" "r")) -+ (match_operand:SI 2 "register_operand" "r")))] -+ "" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 3) -+ (and:SI (match_dup 1) -+ (match_dup 2))) -+ (set (match_dup 0) -+ (xor:SI (match_dup 3) -+ (match_dup 2)))] -+{ -+ operands[3] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*andsi3_const_pow2_minus_one" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "const_int_operand" "i")))] -+ "IN_RANGE (exact_log2 (INTVAL (operands[2]) + 1), 17, 31)" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (ashift:SI (match_dup 1) -+ (match_dup 2))) -+ (set (match_dup 0) -+ (lshiftrt:SI (match_dup 0) -+ (match_dup 2)))] -+{ -+ operands[2] = GEN_INT (32 - floor_log2 (INTVAL (operands[2]) + 1)); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && INTVAL (operands[2]) == 0x7FFFFFFF") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_insn_and_split "*andsi3_const_negative_pow2" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "const_int_operand" "i")))] -+ "IN_RANGE (exact_log2 (-INTVAL (operands[2])), 12, 31)" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (lshiftrt:SI (match_dup 1) -+ (match_dup 2))) -+ (set (match_dup 0) -+ (ashift:SI (match_dup 0) -+ (match_dup 2)))] -+{ -+ operands[2] = GEN_INT (floor_log2 (-INTVAL (operands[2]))); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*andsi3_const_shifted_mask" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "shifted_mask_operand" "i")))] -+ "! xtensa_simm12b (INTVAL (operands[2]))" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (zero_extract:SI (match_dup 1) -+ (match_dup 3) -+ (match_dup 4))) -+ (set (match_dup 0) -+ (ashift:SI (match_dup 0) -+ (match_dup 2)))] -+{ -+ HOST_WIDE_INT mask = INTVAL (operands[2]); -+ int shift = ctz_hwi (mask); -+ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); -+ int mask_pos = shift; -+ if (BITS_BIG_ENDIAN) -+ mask_pos = (32 - (mask_size + shift)) & 0x1f; -+ operands[2] = GEN_INT (shift); -+ operands[3] = GEN_INT (mask_size); -+ operands[4] = GEN_INT (mask_pos); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && ctz_hwi (INTVAL (operands[2])) == 1") -+ (const_int 5) -+ (const_int 6)))]) -+ - (define_insn "iorsi3" - [(set (match_operand:SI 0 "register_operand" "=a") - (ior:SI (match_operand:SI 1 "register_operand" "%r") -@@ -634,7 +825,7 @@ - - ;; Field extract instructions. - --(define_expand "extv" -+(define_expand "extvsi" - [(set (match_operand:SI 0 "register_operand" "") - (sign_extract:SI (match_operand:SI 1 "register_operand" "") - (match_operand:SI 2 "const_int_operand" "") -@@ -649,12 +840,12 @@ - if (!lsbitnum_operand (operands[3], SImode)) - FAIL; - -- emit_insn (gen_extv_internal (operands[0], operands[1], -- operands[2], operands[3])); -+ emit_insn (gen_extvsi_internal (operands[0], operands[1], -+ operands[2], operands[3])); - DONE; - }) - --(define_insn "extv_internal" -+(define_insn "extvsi_internal" - [(set (match_operand:SI 0 "register_operand" "=a") - (sign_extract:SI (match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "sext_fldsz_operand" "i") -@@ -669,7 +860,7 @@ - (set_attr "mode" "SI") - (set_attr "length" "3")]) - --(define_expand "extzv" -+(define_expand "extzvsi" - [(set (match_operand:SI 0 "register_operand" "") - (zero_extract:SI (match_operand:SI 1 "register_operand" "") - (match_operand:SI 2 "const_int_operand" "") -@@ -678,12 +869,12 @@ - { - if (!extui_fldsz_operand (operands[2], SImode)) - FAIL; -- emit_insn (gen_extzv_internal (operands[0], operands[1], -- operands[2], operands[3])); -+ emit_insn (gen_extzvsi_internal (operands[0], operands[1], -+ operands[2], operands[3])); - DONE; - }) - --(define_insn "extzv_internal" -+(define_insn "extzvsi_internal" - [(set (match_operand:SI 0 "register_operand" "=a") - (zero_extract:SI (match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "extui_fldsz_operand" "i") -@@ -757,11 +948,14 @@ - because of offering further optimization opportunities. */ - if (register_operand (operands[0], DImode)) - { -- rtx first, second; -- -- split_double (operands[1], &first, &second); -- emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), first)); -- emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), second)); -+ rtx lowpart, highpart; -+ -+ if (TARGET_BIG_ENDIAN) -+ split_double (operands[1], &highpart, &lowpart); -+ else -+ split_double (operands[1], &lowpart, &highpart); -+ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), lowpart)); -+ emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), highpart)); - DONE; - } - -@@ -782,7 +976,7 @@ - "register_operand (operands[0], DImode) - || register_operand (operands[1], DImode)" - "#" -- "reload_completed" -+ "&& reload_completed" - [(set (match_dup 0) (match_dup 2)) - (set (match_dup 1) (match_dup 3))] - { -@@ -831,6 +1025,19 @@ - (set_attr "mode" "SI") - (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")]) - -+(define_split -+ [(set (match_operand:SI 0 "register_operand") -+ (match_operand:SI 1 "constantpool_operand"))] -+ "! optimize_debug && reload_completed" -+ [(const_int 0)] -+{ -+ rtx x = avoid_constant_pool_reference (operands[1]); -+ if (! CONST_INT_P (x)) -+ FAIL; -+ if (! xtensa_constantsynth (operands[0], INTVAL (x))) -+ emit_move_insn (operands[0], x); -+}) -+ - ;; 16-bit Integer moves - - (define_expand "movhi" -@@ -1035,6 +1242,43 @@ - (set_attr "mode" "SF") - (set_attr "length" "3")]) - -+(define_split -+ [(set (match_operand:SF 0 "register_operand") -+ (match_operand:SF 1 "constantpool_operand"))] -+ "! optimize_debug && reload_completed" -+ [(const_int 0)] -+{ -+ int i = 0; -+ rtx x = XEXP (operands[1], 0); -+ long l[2]; -+ if (GET_CODE (x) == SYMBOL_REF -+ && CONSTANT_POOL_ADDRESS_P (x)) -+ x = get_pool_constant (x); -+ else if (GET_CODE (x) == CONST) -+ { -+ x = XEXP (x, 0); -+ gcc_assert (GET_CODE (x) == PLUS -+ && GET_CODE (XEXP (x, 0)) == SYMBOL_REF -+ && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) -+ && CONST_INT_P (XEXP (x, 1))); -+ i = INTVAL (XEXP (x, 1)); -+ gcc_assert (i == 0 || i == 4); -+ i /= 4; -+ x = get_pool_constant (XEXP (x, 0)); -+ } -+ else -+ gcc_unreachable (); -+ if (GET_MODE (x) == SFmode) -+ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l[0]); -+ else if (GET_MODE (x) == DFmode) -+ REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); -+ else -+ FAIL; -+ x = gen_rtx_REG (SImode, REGNO (operands[0])); -+ if (! xtensa_constantsynth (x, l[i])) -+ emit_move_insn (x, GEN_INT (l[i])); -+}) -+ - ;; 64-bit floating point moves - - (define_expand "movdf" -@@ -1058,7 +1302,7 @@ - "register_operand (operands[0], DFmode) - || register_operand (operands[1], DFmode)" - "#" -- "reload_completed" -+ "&& reload_completed" - [(set (match_dup 0) (match_dup 2)) - (set (match_dup 1) (match_dup 3))] - { -@@ -1085,6 +1329,22 @@ - DONE; - }) - -+;; Block sets -+ -+(define_expand "setmemsi" -+ [(match_operand:BLK 0 "memory_operand") -+ (match_operand:SI 1 "") -+ (match_operand:SI 2 "") -+ (match_operand:SI 3 "const_int_operand")] -+ "!optimize_debug && optimize" -+{ -+ if (xtensa_expand_block_set_unrolled_loop (operands)) -+ DONE; -+ if (xtensa_expand_block_set_small_loop (operands)) -+ DONE; -+ FAIL; -+}) -+ - - ;; Shift instructions. - -@@ -1097,16 +1357,6 @@ - operands[1] = xtensa_copy_incoming_a7 (operands[1]); - }) - --(define_insn "*ashlsi3_1" -- [(set (match_operand:SI 0 "register_operand" "=a") -- (ashift:SI (match_operand:SI 1 "register_operand" "r") -- (const_int 1)))] -- "TARGET_DENSITY" -- "add.n\t%0, %1, %1" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "2")]) -- - (define_insn "ashlsi3_internal" - [(set (match_operand:SI 0 "register_operand" "=a,a") - (ashift:SI (match_operand:SI 1 "register_operand" "r,r") -@@ -1119,16 +1369,14 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,6")]) - --(define_insn "*ashlsi3_3x" -- [(set (match_operand:SI 0 "register_operand" "=a") -- (ashift:SI (match_operand:SI 1 "register_operand" "r") -- (ashift:SI (match_operand:SI 2 "register_operand" "r") -- (const_int 3))))] -- "" -- "ssa8b\t%2\;sll\t%0, %1" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "6")]) -+(define_split -+ [(set (match_operand:SI 0 "register_operand") -+ (ashift:SI (match_operand:SI 1 "register_operand") -+ (const_int 1)))] -+ "TARGET_DENSITY" -+ [(set (match_dup 0) -+ (plus:SI (match_dup 1) -+ (match_dup 1)))]) - - (define_insn "ashrsi3" - [(set (match_operand:SI 0 "register_operand" "=a,a") -@@ -1142,17 +1390,6 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,6")]) - --(define_insn "*ashrsi3_3x" -- [(set (match_operand:SI 0 "register_operand" "=a") -- (ashiftrt:SI (match_operand:SI 1 "register_operand" "r") -- (ashift:SI (match_operand:SI 2 "register_operand" "r") -- (const_int 3))))] -- "" -- "ssa8l\t%2\;sra\t%0, %1" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "6")]) -- - (define_insn "lshrsi3" - [(set (match_operand:SI 0 "register_operand" "=a,a") - (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r") -@@ -1162,9 +1399,9 @@ - if (which_alternative == 0) - { - if ((INTVAL (operands[2]) & 0x1f) < 16) -- return "srli\t%0, %1, %R2"; -+ return "srli\t%0, %1, %R2"; - else -- return "extui\t%0, %1, %R2, %L2"; -+ return "extui\t%0, %1, %R2, %L2"; - } - return "ssr\t%2\;srl\t%0, %1"; - } -@@ -1172,13 +1409,170 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,6")]) - --(define_insn "*lshrsi3_3x" -+(define_insn "*shift_per_byte" - [(set (match_operand:SI 0 "register_operand" "=a") -- (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") -- (ashift:SI (match_operand:SI 2 "register_operand" "r") -- (const_int 3))))] -+ (match_operator:SI 3 "xtensa_shift_per_byte_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3))]))] -+ "!optimize_debug && optimize" -+{ -+ switch (GET_CODE (operands[3])) -+ { -+ case ASHIFT: return "ssa8b\t%2\;sll\t%0, %1"; -+ case ASHIFTRT: return "ssa8l\t%2\;sra\t%0, %1"; -+ case LSHIFTRT: return "ssa8l\t%2\;srl\t%0, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*shift_per_byte_omit_AND_0" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (match_operator:SI 4 "xtensa_shift_per_byte_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3)) -+ (match_operand:SI 3 "const_int_operand" "i"))]))] -+ "!optimize_debug && optimize -+ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (match_op_dup 4 -+ [(match_dup 1) -+ (ashift:SI (match_dup 2) -+ (const_int 3))]))] -+ "" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*shift_per_byte_omit_AND_1" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (match_operator:SI 4 "xtensa_shift_per_byte_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (neg:SI (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3)) -+ (match_operand:SI 3 "const_int_operand" "i")))]))] -+ "!optimize_debug && optimize -+ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 5) -+ (neg:SI (match_dup 2))) -+ (set (match_dup 0) -+ (match_op_dup 4 -+ [(match_dup 1) -+ (ashift:SI (match_dup 5) -+ (const_int 3))]))] -+{ -+ operands[5] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "9")]) -+ -+(define_insn "*shlrd_reg_" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "register_operand" "r")]) -+ (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 3 "register_operand" "r") -+ (neg:SI (match_dup 2))])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" -+{ -+ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) -+ { -+ case ASHIFT: return "ssl\t%2\;src\t%0, %1, %3"; -+ case LSHIFTRT: return "ssr\t%2\;src\t%0, %3, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn "*shlrd_const_" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 3 "const_int_operand" "i")]) -+ (match_operator:SI 6 "logical_shift_operator" -+ [(match_operand:SI 2 "register_operand" "r") -+ (match_operand:SI 4 "const_int_operand" "i")])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN -+ && IN_RANGE (INTVAL (operands[3]), 1, 31) -+ && IN_RANGE (INTVAL (operands[4]), 1, 31) -+ && INTVAL (operands[3]) + INTVAL (operands[4]) == 32" -+{ -+ switch (xtensa_shlrd_which_direction (operands[5], operands[6])) -+ { -+ case ASHIFT: return "ssai\t%L3\;src\t%0, %1, %2"; -+ case LSHIFTRT: return "ssai\t%R3\;src\t%0, %2, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn "*shlrd_per_byte_" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3))]) -+ (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 3 "register_operand" "r") -+ (neg:SI (ashift:SI (match_dup 2) -+ (const_int 3)))])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" -+{ -+ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) -+ { -+ case ASHIFT: return "ssa8b\t%2\;src\t%0, %1, %3"; -+ case LSHIFTRT: return "ssa8l\t%2\;src\t%0, %3, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*shlrd_per_byte__omit_AND" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3)) -+ (match_operand:SI 4 "const_int_operand" "i"))]) -+ (match_operator:SI 6 "logical_shift_operator" -+ [(match_operand:SI 3 "register_operand" "r") -+ (neg:SI (and:SI (ashift:SI (match_dup 2) -+ (const_int 3)) -+ (match_dup 4)))])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN -+ && (INTVAL (operands[4]) & 0x1f) == 3 << 3" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (ior_op:SI (match_op_dup 5 -+ [(match_dup 1) -+ (ashift:SI (match_dup 2) -+ (const_int 3))]) -+ (match_op_dup 6 -+ [(match_dup 3) -+ (neg:SI (ashift:SI (match_dup 2) -+ (const_int 3)))])))] - "" -- "ssa8l\t%2\;srl\t%0, %1" - [(set_attr "type" "arith") - (set_attr "mode" "SI") - (set_attr "length" "6")]) -@@ -1239,28 +1633,13 @@ - (define_insn "*btrue" - [(set (pc) - (if_then_else (match_operator 3 "branch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "branch_operand" "K,r")]) -+ [(match_operand:SI 0 "register_operand" "r,r") -+ (match_operand:SI 1 "branch_operand" "K,r")]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { -- return xtensa_emit_branch (false, which_alternative == 0, operands); --} -- [(set_attr "type" "jump,jump") -- (set_attr "mode" "none") -- (set_attr "length" "3,3")]) -- --(define_insn "*bfalse" -- [(set (pc) -- (if_then_else (match_operator 3 "branch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "branch_operand" "K,r")]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -- "" --{ -- return xtensa_emit_branch (true, which_alternative == 0, operands); -+ return xtensa_emit_branch (which_alternative == 0, operands); - } - [(set_attr "type" "jump,jump") - (set_attr "mode" "none") -@@ -1269,28 +1648,13 @@ - (define_insn "*ubtrue" - [(set (pc) - (if_then_else (match_operator 3 "ubranch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "ubranch_operand" "L,r")]) -+ [(match_operand:SI 0 "register_operand" "r,r") -+ (match_operand:SI 1 "ubranch_operand" "L,r")]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { -- return xtensa_emit_branch (false, which_alternative == 0, operands); --} -- [(set_attr "type" "jump,jump") -- (set_attr "mode" "none") -- (set_attr "length" "3,3")]) -- --(define_insn "*ubfalse" -- [(set (pc) -- (if_then_else (match_operator 3 "ubranch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "ubranch_operand" "L,r")]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -- "" --{ -- return xtensa_emit_branch (true, which_alternative == 0, operands); -+ return xtensa_emit_branch (which_alternative == 0, operands); - } - [(set_attr "type" "jump,jump") - (set_attr "mode" "none") -@@ -1301,80 +1665,178 @@ - (define_insn "*bittrue" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(zero_extract:SI -- (match_operand:SI 0 "register_operand" "r,r") -- (const_int 1) -- (match_operand:SI 1 "arith_operand" "J,r")) -+ [(zero_extract:SI (match_operand:SI 0 "register_operand" "r,r") -+ (const_int 1) -+ (match_operand:SI 1 "arith_operand" "J,r")) - (const_int 0)]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { -- return xtensa_emit_bit_branch (false, which_alternative == 0, operands); -+ static char result[64]; -+ char op; -+ switch (GET_CODE (operands[3])) -+ { -+ case EQ: op = 'c'; break; -+ case NE: op = 's'; break; -+ default: gcc_unreachable (); -+ } -+ if (which_alternative == 0) -+ { -+ operands[1] = GEN_INT (INTVAL (operands[1]) & 0x1f); -+ sprintf (result, "bb%ci\t%%0, %%d1, %%2", op); -+ } -+ else -+ sprintf (result, "bb%c\t%%0, %%1, %%2", op); -+ return result; - } - [(set_attr "type" "jump") - (set_attr "mode" "none") - (set_attr "length" "3")]) - --(define_insn "*bitfalse" -+(define_insn "*masktrue" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(zero_extract:SI -- (match_operand:SI 0 "register_operand" "r,r") -- (const_int 1) -- (match_operand:SI 1 "arith_operand" "J,r")) -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "register_operand" "r")) - (const_int 0)]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -+ (label_ref (match_operand 2 "" "")) -+ (pc)))] - "" - { -- return xtensa_emit_bit_branch (true, which_alternative == 0, operands); -+ switch (GET_CODE (operands[3])) -+ { -+ case EQ: return "bnone\t%0, %1, %2"; -+ case NE: return "bany\t%0, %1, %2"; -+ default: gcc_unreachable (); -+ } - } - [(set_attr "type" "jump") - (set_attr "mode" "none") - (set_attr "length" "3")]) - --(define_insn "*masktrue" -+(define_insn "*masktrue_bitcmpl" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(and:SI (match_operand:SI 0 "register_operand" "r") -- (match_operand:SI 1 "register_operand" "r")) -- (const_int 0)]) -+ [(and:SI (not:SI (match_operand:SI 0 "register_operand" "r")) -+ (match_operand:SI 1 "register_operand" "r")) -+ (const_int 0)]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { - switch (GET_CODE (operands[3])) - { -- case EQ: return "bnone\t%0, %1, %2"; -- case NE: return "bany\t%0, %1, %2"; -- default: gcc_unreachable (); -+ case EQ: return "ball\t%0, %1, %2"; -+ case NE: return "bnall\t%0, %1, %2"; -+ default: gcc_unreachable (); - } - } - [(set_attr "type" "jump") - (set_attr "mode" "none") - (set_attr "length" "3")]) - --(define_insn "*maskfalse" -+(define_insn_and_split "*masktrue_const_pow2_minus_one" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(and:SI (match_operand:SI 0 "register_operand" "r") -- (match_operand:SI 1 "register_operand" "r")) -- (const_int 0)]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -- "" -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "const_int_operand" "i")) -+ (const_int 0)]) -+ (label_ref (match_operand 2 "" "")) -+ (pc)))] -+ "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31)" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 4) -+ (ashift:SI (match_dup 0) -+ (match_dup 1))) -+ (set (pc) -+ (if_then_else (match_op_dup 3 -+ [(match_dup 4) -+ (const_int 0)]) -+ (label_ref (match_dup 2)) -+ (pc)))] - { -- switch (GET_CODE (operands[3])) -- { -- case EQ: return "bany\t%0, %1, %2"; -- case NE: return "bnone\t%0, %1, %2"; -- default: gcc_unreachable (); -- } -+ operands[1] = GEN_INT (32 - floor_log2 (INTVAL (operands[1]) + 1)); -+ operands[4] = gen_reg_rtx (SImode); - } - [(set_attr "type" "jump") - (set_attr "mode" "none") -- (set_attr "length" "3")]) -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && INTVAL (operands[1]) == 0x7FFFFFFF") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_insn_and_split "*masktrue_const_negative_pow2" -+ [(set (pc) -+ (if_then_else (match_operator 3 "boolean_operator" -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "const_int_operand" "i")) -+ (const_int 0)]) -+ (label_ref (match_operand 2 "" "")) -+ (pc)))] -+ "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 12, 30)" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 4) -+ (lshiftrt:SI (match_dup 0) -+ (match_dup 1))) -+ (set (pc) -+ (if_then_else (match_op_dup 3 -+ [(match_dup 4) -+ (const_int 0)]) -+ (label_ref (match_dup 2)) -+ (pc)))] -+{ -+ operands[1] = GEN_INT (floor_log2 (-INTVAL (operands[1]))); -+ operands[4] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "jump") -+ (set_attr "mode" "none") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*masktrue_const_shifted_mask" -+ [(set (pc) -+ (if_then_else (match_operator 4 "boolean_operator" -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "shifted_mask_operand" "i")) -+ (match_operand:SI 2 "const_int_operand" "i")]) -+ (label_ref (match_operand 3 "" "")) -+ (pc)))] -+ "(INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 -+ && xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 6) -+ (zero_extract:SI (match_dup 0) -+ (match_dup 5) -+ (match_dup 1))) -+ (set (pc) -+ (if_then_else (match_op_dup 4 -+ [(match_dup 6) -+ (match_dup 2)]) -+ (label_ref (match_dup 3)) -+ (pc)))] -+{ -+ HOST_WIDE_INT mask = INTVAL (operands[1]); -+ int shift = ctz_hwi (mask); -+ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); -+ int mask_pos = shift; -+ if (BITS_BIG_ENDIAN) -+ mask_pos = (32 - (mask_size + shift)) & 0x1f; -+ operands[1] = GEN_INT (mask_pos); -+ operands[2] = GEN_INT ((uint32_t)INTVAL (operands[2]) >> shift); -+ operands[5] = GEN_INT (mask_size); -+ operands[6] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "jump") -+ (set_attr "mode" "none") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && (uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])) == 0") -+ (const_int 5) -+ (const_int 6)))]) - - - ;; Zero-overhead looping support. -@@ -1696,18 +2158,13 @@ - (match_operand 1 "" ""))] - "" - { -- rtx addr = XEXP (operands[0], 0); -- if (flag_pic && GET_CODE (addr) == SYMBOL_REF -- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) -- addr = gen_sym_PLT (addr); -- if (!call_insn_operand (addr, VOIDmode)) -- XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr); -+ xtensa_prepare_expand_call (0, operands); - }) - - (define_insn "call_internal" - [(call (mem (match_operand:SI 0 "call_insn_operand" "nir")) - (match_operand 1 "" "i"))] -- "" -+ "!SIBLING_CALL_P (insn)" - { - return xtensa_emit_call (0, operands); - } -@@ -1721,19 +2178,14 @@ - (match_operand 2 "" "")))] - "" - { -- rtx addr = XEXP (operands[1], 0); -- if (flag_pic && GET_CODE (addr) == SYMBOL_REF -- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) -- addr = gen_sym_PLT (addr); -- if (!call_insn_operand (addr, VOIDmode)) -- XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr); -+ xtensa_prepare_expand_call (1, operands); - }) - - (define_insn "call_value_internal" - [(set (match_operand 0 "register_operand" "=a") - (call (mem (match_operand:SI 1 "call_insn_operand" "nir")) - (match_operand 2 "" "i")))] -- "" -+ "!SIBLING_CALL_P (insn)" - { - return xtensa_emit_call (1, operands); - } -@@ -1741,6 +2193,70 @@ - (set_attr "mode" "none") - (set_attr "length" "3")]) - -+(define_expand "sibcall" -+ [(call (match_operand 0 "memory_operand" "") -+ (match_operand 1 "" ""))] -+ "!TARGET_WINDOWED_ABI" -+{ -+ xtensa_prepare_expand_call (0, operands); -+}) -+ -+(define_insn "sibcall_internal" -+ [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nir")) -+ (match_operand 1 "" "i"))] -+ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" -+{ -+ return xtensa_emit_sibcall (0, operands); -+} -+ [(set_attr "type" "call") -+ (set_attr "mode" "none") -+ (set_attr "length" "3")]) -+ -+(define_split -+ [(call (mem:SI (match_operand:SI 0 "register_operand")) -+ (match_operand 1 ""))] -+ "reload_completed -+ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) -+ && IN_RANGE (REGNO (operands[0]), 12, 15)" -+ [(set (reg:SI A10_REG) -+ (match_dup 0)) -+ (call (mem:SI (reg:SI A10_REG)) -+ (match_dup 1))]) -+ -+(define_expand "sibcall_value" -+ [(set (match_operand 0 "register_operand" "") -+ (call (match_operand 1 "memory_operand" "") -+ (match_operand 2 "" "")))] -+ "!TARGET_WINDOWED_ABI" -+{ -+ xtensa_prepare_expand_call (1, operands); -+}) -+ -+(define_insn "sibcall_value_internal" -+ [(set (match_operand 0 "register_operand" "=a") -+ (call (mem:SI (match_operand:SI 1 "call_insn_operand" "nir")) -+ (match_operand 2 "" "i")))] -+ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" -+{ -+ return xtensa_emit_sibcall (1, operands); -+} -+ [(set_attr "type" "call") -+ (set_attr "mode" "none") -+ (set_attr "length" "3")]) -+ -+(define_split -+ [(set (match_operand 0 "register_operand") -+ (call (mem:SI (match_operand:SI 1 "register_operand")) -+ (match_operand 2 "")))] -+ "reload_completed -+ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) -+ && IN_RANGE (REGNO (operands[1]), 12, 15)" -+ [(set (reg:SI A10_REG) -+ (match_dup 1)) -+ (set (match_dup 0) -+ (call (mem:SI (reg:SI A10_REG)) -+ (match_dup 2)))]) -+ - (define_insn "entry" - [(set (reg:SI A1_REG) - (unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")] -@@ -1762,7 +2278,10 @@ - } - [(set_attr "type" "jump") - (set_attr "mode" "none") -- (set_attr "length" "2")]) -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 2) -+ (const_int 3)))]) - - - ;; Miscellaneous instructions. -@@ -1805,7 +2324,15 @@ - [(return)] - "" - { -- xtensa_expand_epilogue (); -+ xtensa_expand_epilogue (false); -+ DONE; -+}) -+ -+(define_expand "sibcall_epilogue" -+ [(return)] -+ "!TARGET_WINDOWED_ABI" -+{ -+ xtensa_expand_epilogue (true); - DONE; - }) - -@@ -1817,7 +2344,10 @@ - } - [(set_attr "type" "nop") - (set_attr "mode" "none") -- (set_attr "length" "3")]) -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 2) -+ (const_int 3)))]) - - (define_expand "nonlocal_goto" - [(match_operand:SI 0 "general_operand" "") -@@ -1881,8 +2411,9 @@ - [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] - "" - "" -- [(set_attr "length" "0") -- (set_attr "type" "nop")]) -+ [(set_attr "type" "nop") -+ (set_attr "mode" "none") -+ (set_attr "length" "0")]) - - ;; Do not schedule instructions accessing memory before this point. - -@@ -1901,7 +2432,9 @@ - (unspec:BLK [(match_operand:SI 1 "" "")] UNSPEC_FRAME_BLOCKAGE))] - "" - "" -- [(set_attr "length" "0")]) -+ [(set_attr "type" "nop") -+ (set_attr "mode" "none") -+ (set_attr "length" "0")]) - - (define_insn "trap" - [(trap_if (const_int 1) (const_int 0))] -@@ -1914,7 +2447,10 @@ - } - [(set_attr "type" "trap") - (set_attr "mode" "none") -- (set_attr "length" "3")]) -+ (set (attr "length") -+ (if_then_else (match_test "!TARGET_DEBUG && TARGET_DENSITY") -+ (const_int 2) -+ (const_int 3)))]) - - ;; Setting up a frame pointer is tricky for Xtensa because GCC doesn't - ;; know if a frame pointer is required until the reload pass, and -@@ -2177,3 +2713,103 @@ - xtensa_expand_atomic (, operands[0], operands[1], operands[2], true); - DONE; - }) -+ -+(define_insn_and_split "*round_up_to_even" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r") -+ (const_int 1)) -+ (const_int -2)))] -+ "" -+ "#" -+ "can_create_pseudo_p ()" -+ [(set (match_dup 2) -+ (and:SI (match_dup 1) -+ (const_int 1))) -+ (set (match_dup 0) -+ (plus:SI (match_dup 2) -+ (match_dup 1)))] -+{ -+ operands[2] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_insn_and_split "*signed_ge_zero" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ge:SI (match_operand:SI 1 "register_operand" "r") -+ (const_int 0)))] -+ "" -+ "#" -+ "" -+ [(set (match_dup 0) -+ (ashiftrt:SI (match_dup 1) -+ (const_int 31))) -+ (set (match_dup 0) -+ (plus:SI (match_dup 0) -+ (const_int 1)))] -+ "" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_peephole2 -+ [(set (match_operand:SI 0 "register_operand") -+ (match_operand:SI 6 "reload_operand")) -+ (set (match_operand:SI 1 "register_operand") -+ (match_operand:SI 7 "reload_operand")) -+ (set (match_operand:SF 2 "register_operand") -+ (match_operand:SF 4 "register_operand")) -+ (set (match_operand:SF 3 "register_operand") -+ (match_operand:SF 5 "register_operand"))] -+ "REGNO (operands[0]) == REGNO (operands[4]) -+ && REGNO (operands[1]) == REGNO (operands[5]) -+ && peep2_reg_dead_p (4, operands[0]) -+ && peep2_reg_dead_p (4, operands[1])" -+ [(set (match_dup 2) -+ (match_dup 6)) -+ (set (match_dup 3) -+ (match_dup 7))] -+{ -+ uint32_t check = 0; -+ int i; -+ for (i = 0; i <= 3; ++i) -+ { -+ uint32_t mask = (uint32_t)1 << REGNO (operands[i]); -+ if (check & mask) -+ FAIL; -+ check |= mask; -+ } -+ operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0)); -+ operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0)); -+}) -+ -+(define_split -+ [(clobber (match_operand:DSC 0 "register_operand"))] -+ "GP_REG_P (REGNO (operands[0]))" -+ [(const_int 0)] -+{ -+ unsigned int regno = REGNO (operands[0]); -+ machine_mode inner_mode = GET_MODE_INNER (mode); -+ rtx_insn *insn; -+ rtx x; -+ if (! ((insn = next_nonnote_nondebug_insn (curr_insn)) -+ && NONJUMP_INSN_P (insn) -+ && GET_CODE (x = PATTERN (insn)) == SET -+ && REG_P (x = XEXP (x, 0)) -+ && GET_MODE (x) == inner_mode -+ && REGNO (x) == regno -+ && (insn = next_nonnote_nondebug_insn (insn)) -+ && NONJUMP_INSN_P (insn) -+ && GET_CODE (x = PATTERN (insn)) == SET -+ && REG_P (x = XEXP (x, 0)) -+ && GET_MODE (x) == inner_mode -+ && REGNO (x) == regno + REG_NREGS (operands[0]) / 2)) -+ FAIL; -+}) -diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt -index aef67970b..97aa44f92 100644 ---- a/gcc/config/xtensa/xtensa.opt -+++ b/gcc/config/xtensa/xtensa.opt -@@ -27,9 +27,13 @@ Target Report Mask(FORCE_NO_PIC) - Disable position-independent code (PIC) for use in OS kernel code. - - mlongcalls --Target -+Target Mask(LONGCALLS) - Use indirect CALLXn instructions for large programs. - -+mextra-l32r-costs= -+Target RejectNegative Joined UInteger Var(xtensa_extra_l32r_costs) Init(0) -+Set extra memory access cost for L32R instruction, in clock-cycle units. -+ - mtarget-align - Target - Automatically align branch targets to reduce branch penalties. -diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi -index eabeec944..c35f51afb 100644 ---- a/gcc/doc/invoke.texi -+++ b/gcc/doc/invoke.texi -@@ -1385,7 +1385,8 @@ See RS/6000 and PowerPC Options. - -mtext-section-literals -mno-text-section-literals @gol - -mauto-litpools -mno-auto-litpools @gol - -mtarget-align -mno-target-align @gol ---mlongcalls -mno-longcalls} -+-mlongcalls -mno-longcalls @gol -+-mextra-l32r-costs=@var{cycles}} - - @emph{zSeries Options} - See S/390 and zSeries Options. -@@ -30519,6 +30520,14 @@ assembly code generated by GCC still shows direct call - instructions---look at the disassembled object code to see the actual - instructions. Note that the assembler uses an indirect call for - every cross-file call, not just those that really are out of range. -+ -+@item -mextra-l32r-costs=@var{n} -+@opindex mextra-l32r-costs -+Specify an extra cost of instruction RAM/ROM access for @code{L32R} -+instructions, in clock cycles. This affects, when optimizing for speed, -+whether loading a constant from literal pool using @code{L32R} or -+synthesizing the constant from a small one with a couple of arithmetic -+instructions. The default value is 0. - @end table - - @node zSeries Options -diff --git a/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c -new file mode 100644 -index 000000000..ba61c6f37 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O" } */ -+ -+extern void foo(void); -+ -+void BNONE_test(int a, int b) -+{ -+ if (a & b) -+ foo(); -+} -+ -+void BANY_test(int a, int b) -+{ -+ if (!(a & b)) -+ foo(); -+} -+ -+void BALL_test(int a, int b) -+{ -+ if (~a & b) -+ foo(); -+} -+ -+void BNALL_test(int a, int b) -+{ -+ if (!(~a & b)) -+ foo(); -+} -+ -+/* { dg-final { scan-assembler-times "bnone" 1 } } */ -+/* { dg-final { scan-assembler-times "bany" 1 } } */ -+/* { dg-final { scan-assembler-times "ball" 1 } } */ -+/* { dg-final { scan-assembler-times "bnall" 1 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O1.c b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c -new file mode 100644 -index 000000000..a0c885baa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O1" } */ -+ -+unsigned int test_0(unsigned int a) -+{ -+ return (a & 0x000000FF) << 24 | -+ (a & 0x0000FF00) << 8 | -+ (a & 0x00FF0000) >> 8 | -+ (a & 0xFF000000) >> 24; -+} -+ -+unsigned int test_1(unsigned int a) -+{ -+ union -+ { -+ unsigned int i; -+ unsigned char a[4]; -+ } u, v; -+ u.i = a; -+ v.a[0] = u.a[3]; -+ v.a[1] = u.a[2]; -+ v.a[2] = u.a[1]; -+ v.a[3] = u.a[0]; -+ return v.i; -+} -+ -+unsigned int test_2(unsigned int a) -+{ -+ return __builtin_bswap32(a); -+} -+ -+unsigned long long test_3(unsigned long long a) -+{ -+ return __builtin_bswap64(a); -+} -+ -+/* { dg-final { scan-assembler-times "call" 2 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O2.c b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c -new file mode 100644 -index 000000000..4cf95b925 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+unsigned int test_0(unsigned int a) -+{ -+ return (a & 0x000000FF) << 24 | -+ (a & 0x0000FF00) << 8 | -+ (a & 0x00FF0000) >> 8 | -+ (a & 0xFF000000) >> 24; -+} -+ -+unsigned int test_1(unsigned int a) -+{ -+ union -+ { -+ unsigned int i; -+ unsigned char a[4]; -+ } u, v; -+ u.i = a; -+ v.a[0] = u.a[3]; -+ v.a[1] = u.a[2]; -+ v.a[2] = u.a[1]; -+ v.a[3] = u.a[0]; -+ return v.i; -+} -+ -+unsigned int test_2(unsigned int a) -+{ -+ return __builtin_bswap32(a); -+} -+ -+unsigned long long test_3(unsigned long long a) -+{ -+ return __builtin_bswap64(a); -+} -+ -+/* { dg-final { scan-assembler-times "ssai" 4 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-Os.c b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c -new file mode 100644 -index 000000000..1e010fd62 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Os" } */ -+ -+unsigned int test_0(unsigned int a) -+{ -+ return (a & 0x000000FF) << 24 | -+ (a & 0x0000FF00) << 8 | -+ (a & 0x00FF0000) >> 8 | -+ (a & 0xFF000000) >> 24; -+} -+ -+unsigned int test_1(unsigned int a) -+{ -+ union -+ { -+ unsigned int i; -+ unsigned char a[4]; -+ } u, v; -+ u.i = a; -+ v.a[0] = u.a[3]; -+ v.a[1] = u.a[2]; -+ v.a[2] = u.a[1]; -+ v.a[3] = u.a[0]; -+ return v.i; -+} -+ -+unsigned int test_2(unsigned int a) -+{ -+ return __builtin_bswap32(a); -+} -+ -+unsigned long long test_3(unsigned long long a) -+{ -+ return __builtin_bswap64(a); -+} -+ -+/* { dg-final { scan-assembler-times "call" 4 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c -new file mode 100644 -index 000000000..6a04aaeef ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O" } */ -+ -+int check_zero_byte(int v) -+{ -+ return (v - 0x01010101) & ~v & 0x80808080; -+} -+ -+/* { dg-final { scan-assembler-not "movi" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c -new file mode 100644 -index 000000000..ec2606ed1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c -@@ -0,0 +1,44 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Os } */ -+ -+int test_0(void) -+{ -+ return 4095; -+} -+ -+int test_1(void) -+{ -+ return 2147483647; -+} -+ -+int test_2(void) -+{ -+ return -34816; -+} -+ -+int test_3(void) -+{ -+ return -2049; -+} -+ -+int test_4(void) -+{ -+ return 2048; -+} -+ -+int test_5(void) -+{ -+ return 34559; -+} -+ -+int test_6(void) -+{ -+ return 43680; -+} -+ -+void test_7(int *p) -+{ -+ *p = -1432354816; -+} -+ -+/* { dg-final { scan-assembler-not "l32r" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c -new file mode 100644 -index 000000000..f3c4a1c7c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c -@@ -0,0 +1,24 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mextra-l32r-costs=3" } */ -+ -+int test_0(void) -+{ -+ return 134217216; -+} -+ -+int test_1(void) -+{ -+ return -27604992; -+} -+ -+int test_2(void) -+{ -+ return -162279; -+} -+ -+void test_3(int *p) -+{ -+ *p = 192437; -+} -+ -+/* { dg-final { scan-assembler-not "l32r" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c -new file mode 100644 -index 000000000..11e5d5242 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Os } */ -+ -+void test(unsigned int count, double array[]) -+{ -+ unsigned int i; -+ for (i = 0; i < count; ++i) -+ array[i] = 1.0; -+} -+ -+/* { dg-final { scan-assembler-not "l32r" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c -new file mode 100644 -index 000000000..c8f987ccd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+unsigned int test_0(const void *addr) -+{ -+ unsigned int n = (unsigned int)addr; -+ const unsigned int *a = (const unsigned int*)(n & ~3); -+ n = (n & 3) * 8; -+ return (a[0] >> n) | (a[1] << (32 - n)); -+} -+ -+unsigned int test_1(unsigned int a, unsigned int b) -+{ -+ return (a >> 16) + (b << 16); -+} -+ -+/* { dg-final { scan-assembler-times "src" 2 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c -new file mode 100644 -index 000000000..608f65fd7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O1" } */ -+ -+int one_cmpl_abs(int a) -+{ -+ return a < 0 ? ~a : a; -+} -+ -+/* { dg-final { scan-assembler-not "bgez" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/sibcalls.c b/gcc/testsuite/gcc.target/xtensa/sibcalls.c -new file mode 100644 -index 000000000..7a4018796 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/sibcalls.c -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -foptimize-sibling-calls" } */ -+ -+extern int foo(int); -+extern void bar(int); -+ -+int test_0(int a) { -+ return foo(a); -+} -+ -+void test_1(int a) { -+ bar(a); -+} -+ -+int test_2(int (*a)(void)) { -+ bar(0); -+ return a(); -+} -+ -+/* { dg-final { scan-assembler-not "ret" } } */ -diff --git a/libgcc/config/xtensa/lib1funcs.S b/libgcc/config/xtensa/lib1funcs.S -index b19deae14..ad9072c40 100644 ---- a/libgcc/config/xtensa/lib1funcs.S -+++ b/libgcc/config/xtensa/lib1funcs.S -@@ -456,6 +456,29 @@ __nsau_data: - #endif /* L_clz */ - - -+#ifdef L_clrsbsi2 -+ .align 4 -+ .global __clrsbsi2 -+ .type __clrsbsi2, @function -+__clrsbsi2: -+ leaf_entry sp, 16 -+#if XCHAL_HAVE_NSA -+ nsa a2, a2 -+#else -+ srai a3, a2, 31 -+ xor a3, a3, a2 -+ movi a2, 31 -+ beqz a3, .Lreturn -+ do_nsau a2, a3, a4, a5 -+ addi a2, a2, -1 -+.Lreturn: -+#endif -+ leaf_return -+ .size __clrsbsi2, . - __clrsbsi2 -+ -+#endif /* L_clrsbsi2 */ -+ -+ - #ifdef L_clzsi2 - .align 4 - .global __clzsi2 -diff --git a/libgcc/config/xtensa/t-xtensa b/libgcc/config/xtensa/t-xtensa -index 9836c96ae..084618b38 100644 ---- a/libgcc/config/xtensa/t-xtensa -+++ b/libgcc/config/xtensa/t-xtensa -@@ -1,6 +1,6 @@ - LIB1ASMSRC = xtensa/lib1funcs.S - LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \ -- _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \ -+ _umulsidi3 _clz _clrsbsi2 _clzsi2 _ctzsi2 _ffssi2 \ - _ashldi3 _ashrdi3 _lshrdi3 \ - _bswapsi2 _bswapdi2 \ - _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \ --- -2.20.1 - diff --git a/patches/gcc10.1/gcc-xtensa-0007-fix-Wformat-diag-warnings.patch b/patches/gcc10.1/gcc-xtensa-0007-fix-Wformat-diag-warnings.patch new file mode 100644 index 0000000..5aebddc --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0007-fix-Wformat-diag-warnings.patch @@ -0,0 +1,48 @@ +From 76ee6b24125c885150e5b493b26b594801998b74 Mon Sep 17 00:00:00 2001 +From: Martin Liska +Date: Tue, 18 Jan 2022 14:51:40 +0100 +Subject: [PATCH 02/31] xtensa: fix -Wformat-diag warnings. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (print_operand): Fix warnings. + (print_operand_address): Likewise. + (xtensa_multibss_section_type_flags): Likewise. +--- + gcc/config/xtensa/xtensa.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 37c6ac1fd..b1dbe8520 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -2379,7 +2379,7 @@ void + print_operand (FILE *file, rtx x, int letter) + { + if (!x) +- error ("PRINT_OPERAND null pointer"); ++ error ("% null pointer"); + + switch (letter) + { +@@ -2584,7 +2584,7 @@ void + print_operand_address (FILE *file, rtx addr) + { + if (!addr) +- error ("PRINT_OPERAND_ADDRESS, null pointer"); ++ error ("%, null pointer"); + + switch (GET_CODE (addr)) + { +@@ -3697,7 +3697,7 @@ xtensa_multibss_section_type_flags (tree decl, const char *name, int reloc) + flags |= SECTION_BSS; /* @nobits */ + else + warning (0, "only uninitialized variables can be placed in a " +- ".bss section"); ++ "%<.bss%> section"); + } + + return flags; +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0008-Rename-deprecated-extv-extzv-insn-patterns-to.patch b/patches/gcc10.1/gcc-xtensa-0008-Rename-deprecated-extv-extzv-insn-patterns-to.patch new file mode 100644 index 0000000..46260ef --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0008-Rename-deprecated-extv-extzv-insn-patterns-to.patch @@ -0,0 +1,74 @@ +From b5b9fd01c4db135893c44e82a9f33c2411e993d0 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 6 May 2022 19:34:06 +0900 +Subject: [PATCH 03/31] xtensa: Rename deprecated extv/extzv insn patterns to + extvsi/extzvsi + +These patterns were deprecated since GCC 4.8. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (extvsi, extvsi_internal, extzvsi, + extzvsi_internal): Rename from extv, extv_internal, extzv and + extzv_internal, respectively. +--- + gcc/config/xtensa/xtensa.md | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 123916957..251c313d5 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -634,7 +634,7 @@ + + ;; Field extract instructions. + +-(define_expand "extv" ++(define_expand "extvsi" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "") +@@ -649,12 +649,12 @@ + if (!lsbitnum_operand (operands[3], SImode)) + FAIL; + +- emit_insn (gen_extv_internal (operands[0], operands[1], +- operands[2], operands[3])); ++ emit_insn (gen_extvsi_internal (operands[0], operands[1], ++ operands[2], operands[3])); + DONE; + }) + +-(define_insn "extv_internal" ++(define_insn "extvsi_internal" + [(set (match_operand:SI 0 "register_operand" "=a") + (sign_extract:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "sext_fldsz_operand" "i") +@@ -669,7 +669,7 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +-(define_expand "extzv" ++(define_expand "extzvsi" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extract:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "") +@@ -678,12 +678,12 @@ + { + if (!extui_fldsz_operand (operands[2], SImode)) + FAIL; +- emit_insn (gen_extzv_internal (operands[0], operands[1], +- operands[2], operands[3])); ++ emit_insn (gen_extzvsi_internal (operands[0], operands[1], ++ operands[2], operands[3])); + DONE; + }) + +-(define_insn "extzv_internal" ++(define_insn "extzvsi_internal" + [(set (match_operand:SI 0 "register_operand" "=a") + (zero_extract:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "extui_fldsz_operand" "i") +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0009-Reflect-the-32-bit-Integer-Divide-Option.patch b/patches/gcc10.1/gcc-xtensa-0009-Reflect-the-32-bit-Integer-Divide-Option.patch new file mode 100644 index 0000000..607367c --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0009-Reflect-the-32-bit-Integer-Divide-Option.patch @@ -0,0 +1,41 @@ +From 12fa0b13b6f0c52e5c4d75f39822771a7f780f94 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 6 May 2022 19:34:19 +0900 +Subject: [PATCH 04/31] xtensa: Reflect the 32-bit Integer Divide Option + +On Espressif's ESP8266 (based on Tensilica LX106, no hardware divider), +this patch reduces the size of each: + + __moddi3() @ libgcc.a : 969 -> 301 (saves 668) + __divmoddi4() : 1111 -> 426 (saves 685) + __udivmoddi4() : 1043 -> 319 (saves 724) + +in bytes, respectively. + +gcc/ChangeLog: + + * config/xtensa/xtensa.h (TARGET_HAS_NO_HW_DIVIDE): New macro + definition. +--- + gcc/config/xtensa/xtensa.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index fa86a245e..5b102de51 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -74,6 +74,11 @@ along with GCC; see the file COPYING3. If not see + #define HAVE_AS_TLS 0 + #endif + ++/* Define this if the target has no hardware divide instructions. */ ++#if !TARGET_DIV32 ++#define TARGET_HAS_NO_HW_DIVIDE ++#endif ++ + + /* Target CPU builtins. */ + #define TARGET_CPU_CPP_BUILTINS() \ +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0010-Simplify-EXTUI-instruction-maskimm-validation.patch b/patches/gcc10.1/gcc-xtensa-0010-Simplify-EXTUI-instruction-maskimm-validation.patch new file mode 100644 index 0000000..8d257cd --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0010-Simplify-EXTUI-instruction-maskimm-validation.patch @@ -0,0 +1,78 @@ +From 49383c9381a937b360adeb14f5e7bd4472f7c386 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:26:30 +0900 +Subject: [PATCH 05/31] xtensa: Simplify EXTUI instruction maskimm validations + +No functional changes. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (extui_fldsz_operand): Simplify. + * config/xtensa/xtensa.c (xtensa_mask_immediate, print_operand): + Ditto. +--- + gcc/config/xtensa/predicates.md | 2 +- + gcc/config/xtensa/xtensa.c | 24 +++--------------------- + 2 files changed, 4 insertions(+), 22 deletions(-) + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index eb52b05aa..3f84859b6 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -55,7 +55,7 @@ + + (define_predicate "extui_fldsz_operand" + (and (match_code "const_int") +- (match_test "xtensa_mask_immediate ((1 << INTVAL (op)) - 1)"))) ++ (match_test "IN_RANGE (INTVAL (op), 1, 16)"))) + + (define_predicate "sext_operand" + (if_then_else (match_test "TARGET_SEXT") +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index b1dbe8520..4043f40ce 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -446,19 +446,7 @@ xtensa_b4constu (HOST_WIDE_INT v) + bool + xtensa_mask_immediate (HOST_WIDE_INT v) + { +-#define MAX_MASK_SIZE 16 +- int mask_size; +- +- for (mask_size = 1; mask_size <= MAX_MASK_SIZE; mask_size++) +- { +- if ((v & 1) == 0) +- return false; +- v = v >> 1; +- if (v == 0) +- return true; +- } +- +- return false; ++ return IN_RANGE (exact_log2 (v + 1), 1, 16); + } + + +@@ -2424,17 +2412,11 @@ print_operand (FILE *file, rtx x, int letter) + case 'K': + if (GET_CODE (x) == CONST_INT) + { +- int num_bits = 0; + unsigned val = INTVAL (x); +- while (val & 1) +- { +- num_bits += 1; +- val = val >> 1; +- } +- if ((val != 0) || (num_bits == 0) || (num_bits > 16)) ++ if (!xtensa_mask_immediate (val)) + fatal_insn ("invalid mask", x); + +- fprintf (file, "%d", num_bits); ++ fprintf (file, "%d", floor_log2 (val + 1)); + } + else + output_operand_lossage ("invalid %%K value"); +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0011-Make-use-of-IN_RANGE-macro-where-appropriate.patch b/patches/gcc10.1/gcc-xtensa-0011-Make-use-of-IN_RANGE-macro-where-appropriate.patch new file mode 100644 index 0000000..419ebfe --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0011-Make-use-of-IN_RANGE-macro-where-appropriate.patch @@ -0,0 +1,174 @@ +From fa7073ff572c248896057a5a7841a3e1d98380ad Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:27:36 +0900 +Subject: [PATCH 06/31] xtensa: Make use of IN_RANGE macro where appropriate + +No functional changes. + +gcc/ChangeLog: + + * config/xtensa/constraints.md (M, O): Use the macro. + * config/xtensa/predicates.md (addsubx_operand, extui_fldsz_operand, + sext_fldsz_operand): Ditto. + * config/xtensa/xtensa.c (xtensa_simm8, xtensa_simm8x256, + xtensa_simm12b, xtensa_uimm8, xtensa_uimm8x2, xtensa_uimm8x4, + xtensa_mask_immediate, smalloffset_mem_p, printx, xtensa_call_save_reg, + xtensa_expand_prologue): Ditto. + * config/xtensa/xtensa.h (FUNCTION_ARG_REGNO_P): Ditto. +--- + gcc/config/xtensa/constraints.md | 4 ++-- + gcc/config/xtensa/predicates.md | 5 ++--- + gcc/config/xtensa/xtensa.c | 20 ++++++++++---------- + gcc/config/xtensa/xtensa.h | 2 +- + 4 files changed, 15 insertions(+), 16 deletions(-) + +diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md +index 2062c8816..9a8caab4f 100644 +--- a/gcc/config/xtensa/constraints.md ++++ b/gcc/config/xtensa/constraints.md +@@ -92,7 +92,7 @@ + "An integer constant in the range @minus{}32-95 for use with MOVI.N + instructions." + (and (match_code "const_int") +- (match_test "ival >= -32 && ival <= 95"))) ++ (match_test "IN_RANGE (ival, -32, 95)"))) + + (define_constraint "N" + "An unsigned 8-bit integer constant shifted left by 8 bits for use +@@ -103,7 +103,7 @@ + (define_constraint "O" + "An integer constant that can be used in ADDI.N instructions." + (and (match_code "const_int") +- (match_test "ival == -1 || (ival >= 1 && ival <= 15)"))) ++ (match_test "ival == -1 || IN_RANGE (ival, 1, 15)"))) + + (define_constraint "P" + "An integer constant that can be used as a mask value in an EXTUI +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index 3f84859b6..91b9343a2 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -25,8 +25,7 @@ + + (define_predicate "addsubx_operand" + (and (match_code "const_int") +- (match_test "INTVAL (op) >= 1 +- && INTVAL (op) <= 3"))) ++ (match_test "IN_RANGE (INTVAL (op), 1, 3)"))) + + (define_predicate "arith_operand" + (ior (and (match_code "const_int") +@@ -64,7 +63,7 @@ + + (define_predicate "sext_fldsz_operand" + (and (match_code "const_int") +- (match_test "INTVAL (op) >= 8 && INTVAL (op) <= 23"))) ++ (match_test "IN_RANGE (INTVAL (op), 8, 23)"))) + + (define_predicate "lsbitnum_operand" + (and (match_code "const_int") +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 4043f40ce..02dc5799a 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -341,42 +341,42 @@ struct gcc_target targetm = TARGET_INITIALIZER; + bool + xtensa_simm8 (HOST_WIDE_INT v) + { +- return v >= -128 && v <= 127; ++ return IN_RANGE (v, -128, 127); + } + + + bool + xtensa_simm8x256 (HOST_WIDE_INT v) + { +- return (v & 255) == 0 && (v >= -32768 && v <= 32512); ++ return (v & 255) == 0 && IN_RANGE (v, -32768, 32512); + } + + + bool + xtensa_simm12b (HOST_WIDE_INT v) + { +- return v >= -2048 && v <= 2047; ++ return IN_RANGE (v, -2048, 2047); + } + + + static bool + xtensa_uimm8 (HOST_WIDE_INT v) + { +- return v >= 0 && v <= 255; ++ return IN_RANGE (v, 0, 255); + } + + + static bool + xtensa_uimm8x2 (HOST_WIDE_INT v) + { +- return (v & 1) == 0 && (v >= 0 && v <= 510); ++ return (v & 1) == 0 && IN_RANGE (v, 0, 510); + } + + + static bool + xtensa_uimm8x4 (HOST_WIDE_INT v) + { +- return (v & 3) == 0 && (v >= 0 && v <= 1020); ++ return (v & 3) == 0 && IN_RANGE (v, 0, 1020); + } + + +@@ -527,7 +527,7 @@ smalloffset_mem_p (rtx op) + return FALSE; + + val = INTVAL (offset); +- return (val & 3) == 0 && (val >= 0 && val <= 60); ++ return (val & 3) == 0 && IN_RANGE (val, 0, 60); + } + } + return FALSE; +@@ -2352,7 +2352,7 @@ static void + printx (FILE *file, signed int val) + { + /* Print a hexadecimal value in a nice way. */ +- if ((val > -0xa) && (val < 0xa)) ++ if (IN_RANGE (val, -9, 9)) + fprintf (file, "%d", val); + else if (val < 0) + fprintf (file, "-0x%x", -val); +@@ -2732,7 +2732,7 @@ xtensa_call_save_reg(int regno) + return crtl->profile || !crtl->is_leaf || crtl->calls_eh_return || + df_regs_ever_live_p (regno); + +- if (crtl->calls_eh_return && regno >= 2 && regno < 4) ++ if (crtl->calls_eh_return && IN_RANGE (regno, 2, 3)) + return true; + + return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno); +@@ -2852,7 +2852,7 @@ xtensa_expand_prologue (void) + int callee_save_size = cfun->machine->callee_save_size; + + /* -128 is a limit of single addi instruction. */ +- if (total_size > 0 && total_size <= 128) ++ if (IN_RANGE (total_size, 1, 128)) + { + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-total_size))); +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index 5b102de51..3e9cbc943 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -493,7 +493,7 @@ enum reg_class + used for this purpose since all function arguments are pushed on + the stack. */ + #define FUNCTION_ARG_REGNO_P(N) \ +- ((N) >= GP_OUTGOING_ARG_FIRST && (N) <= GP_OUTGOING_ARG_LAST) ++ IN_RANGE ((N), GP_OUTGOING_ARG_FIRST, GP_OUTGOING_ARG_LAST) + + /* Record the number of argument words seen so far, along with a flag to + indicate whether these are incoming arguments. (FUNCTION_INCOMING_ARG +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0012-Fix-instruction-counting-regarding-block-move.patch b/patches/gcc10.1/gcc-xtensa-0012-Fix-instruction-counting-regarding-block-move.patch new file mode 100644 index 0000000..dae4a21 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0012-Fix-instruction-counting-regarding-block-move.patch @@ -0,0 +1,54 @@ +From 5cda5b41a7646d220f7351226b5da78955b0fc7f Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:29:22 +0900 +Subject: [PATCH 07/31] xtensa: Fix instruction counting regarding block move + expansion + +This patch makes counting the number of instructions of the remainder +(modulo 4) part more accurate. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_expand_block_move): + Make instruction counting more accurate, and simplify emitting insns. +--- + gcc/config/xtensa/xtensa.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 02dc5799a..0fe8b73ad 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1303,7 +1303,7 @@ xtensa_expand_block_move (rtx *operands) + move_ratio = 4; + if (optimize > 2) + move_ratio = LARGEST_MOVE_RATIO; +- num_pieces = (bytes / align) + (bytes % align); /* Close enough anyway. */ ++ num_pieces = (bytes / align) + ((bytes % align + 1) / 2); + if (num_pieces > move_ratio) + return 0; + +@@ -1340,7 +1340,7 @@ xtensa_expand_block_move (rtx *operands) + temp[next] = gen_reg_rtx (mode[next]); + + x = adjust_address (src_mem, mode[next], offset_ld); +- emit_insn (gen_rtx_SET (temp[next], x)); ++ emit_move_insn (temp[next], x); + + offset_ld += next_amount; + bytes -= next_amount; +@@ -1350,9 +1350,9 @@ xtensa_expand_block_move (rtx *operands) + if (active[phase]) + { + active[phase] = false; +- ++ + x = adjust_address (dst_mem, mode[phase], offset_st); +- emit_insn (gen_rtx_SET (x, temp[phase])); ++ emit_move_insn (x, temp[phase]); + + offset_st += amount[phase]; + } +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0013-Add-setmemsi-insn-pattern.patch b/patches/gcc10.1/gcc-xtensa-0013-Add-setmemsi-insn-pattern.patch new file mode 100644 index 0000000..a7212ce --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0013-Add-setmemsi-insn-pattern.patch @@ -0,0 +1,303 @@ +From 02572a935a2cbabc96387289300fb78d61dde555 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 24 May 2022 00:52:44 +0900 +Subject: [PATCH 08/31] xtensa: Add setmemsi insn pattern + +This patch introduces setmemsi insn pattern of two kinds, unrolled loop and +small loop, for fixed small length and constant initialization value. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h + (xtensa_expand_block_set_unrolled_loop, + xtensa_expand_block_set_small_loop): New prototypes. + * config/xtensa/xtensa.c (xtensa_sizeof_MOVI, + xtensa_expand_block_set_unrolled_loop, + xtensa_expand_block_set_small_loop): New functions. + * config/xtensa/xtensa.md (setmemsi): New expansion pattern. + * config/xtensa/xtensa.opt (mlongcalls): Add target mask. +--- + gcc/config/xtensa/xtensa-protos.h | 2 + + gcc/config/xtensa/xtensa.c | 211 ++++++++++++++++++++++++++++++ + gcc/config/xtensa/xtensa.md | 16 +++ + gcc/config/xtensa/xtensa.opt | 2 +- + 4 files changed, 230 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 18d803581..80b1da2bb 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -41,6 +41,8 @@ extern void xtensa_expand_conditional_branch (rtx *, machine_mode); + extern int xtensa_expand_conditional_move (rtx *, int); + extern int xtensa_expand_scc (rtx *, machine_mode); + extern int xtensa_expand_block_move (rtx *); ++extern int xtensa_expand_block_set_unrolled_loop (rtx *); ++extern int xtensa_expand_block_set_small_loop (rtx *); + extern void xtensa_split_operand_pair (rtx *, machine_mode); + extern int xtensa_emit_move_sequence (rtx *, machine_mode); + extern rtx xtensa_copy_incoming_a7 (rtx); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 0fe8b73ad..a6d76a953 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1363,6 +1363,217 @@ xtensa_expand_block_move (rtx *operands) + } + + ++/* Try to expand a block set operation to a sequence of RTL move ++ instructions. If not optimizing, or if the block size is not a ++ constant, or if the block is too large, or if the value to ++ initialize the block with is not a constant, the expansion ++ fails and GCC falls back to calling memset(). ++ ++ operands[0] is the destination ++ operands[1] is the length ++ operands[2] is the initialization value ++ operands[3] is the alignment */ ++ ++static int ++xtensa_sizeof_MOVI (HOST_WIDE_INT imm) ++{ ++ return (TARGET_DENSITY && IN_RANGE (imm, -32, 95)) ? 2 : 3; ++} ++ ++int ++xtensa_expand_block_set_unrolled_loop (rtx *operands) ++{ ++ rtx dst_mem = operands[0]; ++ HOST_WIDE_INT bytes, value, align; ++ int expand_len, funccall_len; ++ rtx x, reg; ++ int offset; ++ ++ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) ++ return 0; ++ ++ bytes = INTVAL (operands[1]); ++ if (bytes <= 0) ++ return 0; ++ value = (int8_t)INTVAL (operands[2]); ++ align = INTVAL (operands[3]); ++ if (align > MOVE_MAX) ++ align = MOVE_MAX; ++ ++ /* Insn expansion: holding the init value. ++ Either MOV(.N) or L32R w/litpool. */ ++ if (align == 1) ++ expand_len = xtensa_sizeof_MOVI (value); ++ else if (value == 0 || value == -1) ++ expand_len = TARGET_DENSITY ? 2 : 3; ++ else ++ expand_len = 3 + 4; ++ /* Insn expansion: a series of aligned memory stores. ++ Consist of S8I, S16I or S32I(.N). */ ++ expand_len += (bytes / align) * (TARGET_DENSITY ++ && align == 4 ? 2 : 3); ++ /* Insn expansion: the remainder, sub-aligned memory stores. ++ A combination of S8I and S16I as needed. */ ++ expand_len += ((bytes % align + 1) / 2) * 3; ++ ++ /* Function call: preparing two arguments. */ ++ funccall_len = xtensa_sizeof_MOVI (value); ++ funccall_len += xtensa_sizeof_MOVI (bytes); ++ /* Function call: calling memset(). */ ++ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; ++ ++ /* Apply expansion bonus (2x) if optimizing for speed. */ ++ if (optimize > 1 && !optimize_size) ++ funccall_len *= 2; ++ ++ /* Decide whether to expand or not, based on the sum of the length ++ of instructions. */ ++ if (expand_len > funccall_len) ++ return 0; ++ ++ x = XEXP (dst_mem, 0); ++ if (!REG_P (x)) ++ dst_mem = replace_equiv_address (dst_mem, force_reg (Pmode, x)); ++ switch (align) ++ { ++ case 1: ++ break; ++ case 2: ++ value = (int16_t)((uint8_t)value * 0x0101U); ++ break; ++ case 4: ++ value = (int32_t)((uint8_t)value * 0x01010101U); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ reg = force_reg (SImode, GEN_INT (value)); ++ ++ offset = 0; ++ do ++ { ++ int unit_size = MIN (bytes, align); ++ machine_mode unit_mode = (unit_size >= 4 ? SImode : ++ (unit_size >= 2 ? HImode : ++ QImode)); ++ unit_size = GET_MODE_SIZE (unit_mode); ++ ++ emit_move_insn (adjust_address (dst_mem, unit_mode, offset), ++ unit_mode == SImode ? reg ++ : convert_to_mode (unit_mode, reg, true)); ++ ++ offset += unit_size; ++ bytes -= unit_size; ++ } ++ while (bytes > 0); ++ ++ return 1; ++} ++ ++int ++xtensa_expand_block_set_small_loop (rtx *operands) ++{ ++ HOST_WIDE_INT bytes, value, align; ++ int expand_len, funccall_len; ++ rtx x, dst, end, reg; ++ machine_mode unit_mode; ++ rtx_code_label *label; ++ ++ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) ++ return 0; ++ ++ bytes = INTVAL (operands[1]); ++ if (bytes <= 0) ++ return 0; ++ value = (int8_t)INTVAL (operands[2]); ++ align = INTVAL (operands[3]); ++ if (align > MOVE_MAX) ++ align = MOVE_MAX; ++ ++ /* Totally-aligned block only. */ ++ if (bytes % align != 0) ++ return 0; ++ ++ /* If 4-byte aligned, small loop substitution is almost optimal, thus ++ limited to only offset to the end address for ADDI/ADDMI instruction. */ ++ if (align == 4 ++ && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) ++ return 0; ++ ++ /* If no 4-byte aligned, loop count should be treated as the constraint. */ ++ if (align != 4 ++ && bytes / align > ((optimize > 1 && !optimize_size) ? 8 : 15)) ++ return 0; ++ ++ /* Insn expansion: holding the init value. ++ Either MOV(.N) or L32R w/litpool. */ ++ if (align == 1) ++ expand_len = xtensa_sizeof_MOVI (value); ++ else if (value == 0 || value == -1) ++ expand_len = TARGET_DENSITY ? 2 : 3; ++ else ++ expand_len = 3 + 4; ++ /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ ++ expand_len += bytes > 127 ? 3 ++ : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; ++ ++ /* Insn expansion: the loop body and branch instruction. ++ For store, one of S8I, S16I or S32I(.N). ++ For advance, ADDI(.N). ++ For branch, BNE. */ ++ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) ++ + (TARGET_DENSITY ? 2 : 3) + 3; ++ ++ /* Function call: preparing two arguments. */ ++ funccall_len = xtensa_sizeof_MOVI (value); ++ funccall_len += xtensa_sizeof_MOVI (bytes); ++ /* Function call: calling memset(). */ ++ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; ++ ++ /* Apply expansion bonus (2x) if optimizing for speed. */ ++ if (optimize > 1 && !optimize_size) ++ funccall_len *= 2; ++ ++ /* Decide whether to expand or not, based on the sum of the length ++ of instructions. */ ++ if (expand_len > funccall_len) ++ return 0; ++ ++ x = XEXP (operands[0], 0); ++ if (!REG_P (x)) ++ x = XEXP (replace_equiv_address (operands[0], force_reg (Pmode, x)), 0); ++ dst = gen_reg_rtx (SImode); ++ emit_move_insn (dst, x); ++ end = gen_reg_rtx (SImode); ++ emit_insn (gen_addsi3 (end, dst, operands[1] /* the length */)); ++ switch (align) ++ { ++ case 1: ++ unit_mode = QImode; ++ break; ++ case 2: ++ value = (int16_t)((uint8_t)value * 0x0101U); ++ unit_mode = HImode; ++ break; ++ case 4: ++ value = (int32_t)((uint8_t)value * 0x01010101U); ++ unit_mode = SImode; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ reg = force_reg (unit_mode, GEN_INT (value)); ++ ++ label = gen_label_rtx (); ++ emit_label (label); ++ emit_move_insn (gen_rtx_MEM (unit_mode, dst), reg); ++ emit_insn (gen_addsi3 (dst, dst, GEN_INT (align))); ++ emit_cmp_and_jump_insns (dst, end, NE, const0_rtx, SImode, true, label); ++ ++ return 1; ++} ++ ++ + void + xtensa_expand_nonlocal_goto (rtx *operands) + { +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 251c313d5..9eb689efa 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1085,6 +1085,22 @@ + DONE; + }) + ++;; Block sets ++ ++(define_expand "setmemsi" ++ [(match_operand:BLK 0 "memory_operand") ++ (match_operand:SI 1 "") ++ (match_operand:SI 2 "") ++ (match_operand:SI 3 "const_int_operand")] ++ "!optimize_debug && optimize" ++{ ++ if (xtensa_expand_block_set_unrolled_loop (operands)) ++ DONE; ++ if (xtensa_expand_block_set_small_loop (operands)) ++ DONE; ++ FAIL; ++}) ++ + + ;; Shift instructions. + +diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt +index aef67970b..e1d992f5d 100644 +--- a/gcc/config/xtensa/xtensa.opt ++++ b/gcc/config/xtensa/xtensa.opt +@@ -27,7 +27,7 @@ Target Report Mask(FORCE_NO_PIC) + Disable position-independent code (PIC) for use in OS kernel code. + + mlongcalls +-Target ++Target Mask(LONGCALLS) + Use indirect CALLXn instructions for large programs. + + mtarget-align +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0014-Improve-bswap-sd-i2-insn-patterns.patch b/patches/gcc10.1/gcc-xtensa-0014-Improve-bswap-sd-i2-insn-patterns.patch new file mode 100644 index 0000000..a5fb6f1 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0014-Improve-bswap-sd-i2-insn-patterns.patch @@ -0,0 +1,254 @@ +From be1ca3aa6e9754ed16d1b7a60657912af02844da Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:33:59 +0900 +Subject: [PATCH 09/31] xtensa: Improve bswap[sd]i2 insn patterns + +This patch makes bswap[sd]i2 better register allocation, and reconstructs +bswapsi2 in order to take advantage of GIMPLE manual byte-swapping +recognition. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (bswapsi2): New expansion pattern. + (bswapsi2_internal): Revise the template and condition, and add + detection code for preceding the same insn in order to omit a + "SSAI 8" instruction of the latter. + (bswapdi2): Suppress built-in insn expansion with the corresponding + library call when optimizing for size. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/bswap-O1.c: New. + * gcc.target/xtensa/bswap-O2.c: Ditto. + * gcc.target/xtensa/bswap-Os.c: Ditto. +--- + gcc/config/xtensa/xtensa.md | 77 +++++++++++++++++----- + gcc/testsuite/gcc.target/xtensa/bswap-O1.c | 37 +++++++++++ + gcc/testsuite/gcc.target/xtensa/bswap-O2.c | 37 +++++++++++ + gcc/testsuite/gcc.target/xtensa/bswap-Os.c | 37 +++++++++++ + 4 files changed, 172 insertions(+), 16 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O1.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O2.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-Os.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 9eb689efa..cea280061 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -471,23 +471,68 @@ + + ;; Byte swap. + +-(define_insn "bswapsi2" +- [(set (match_operand:SI 0 "register_operand" "=&a") +- (bswap:SI (match_operand:SI 1 "register_operand" "r")))] +- "!optimize_size" +- "ssai\t8\;srli\t%0, %1, 16\;src\t%0, %0, %1\;src\t%0, %0, %0\;src\t%0, %1, %0" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "15")]) ++(define_expand "bswapsi2" ++ [(set (match_operand:SI 0 "register_operand" "") ++ (bswap:SI (match_operand:SI 1 "register_operand" "")))] ++ "!optimize_debug && optimize > 1" ++{ ++ /* GIMPLE manual byte-swapping recognition is now activated. ++ For both built-in and manual bswaps, emit corresponding library call ++ if optimizing for size, or a series of dedicated machine instructions ++ if otherwise. */ ++ if (optimize_size) ++ emit_library_call_value (optab_libfunc (bswap_optab, SImode), ++ operands[0], LCT_NORMAL, SImode, ++ operands[1], SImode); ++ else ++ emit_insn (gen_bswapsi2_internal (operands[0], operands[1])); ++ DONE; ++}) + +-(define_insn "bswapdi2" +- [(set (match_operand:DI 0 "register_operand" "=&a") +- (bswap:DI (match_operand:DI 1 "register_operand" "r")))] +- "!optimize_size" +- "ssai\t8\;srli\t%0, %D1, 16\;src\t%0, %0, %D1\;src\t%0, %0, %0\;src\t%0, %D1, %0\;srli\t%D0, %1, 16\;src\t%D0, %D0, %1\;src\t%D0, %D0, %D0\;src\t%D0, %1, %D0" +- [(set_attr "type" "arith") +- (set_attr "mode" "DI") +- (set_attr "length" "27")]) ++(define_insn "bswapsi2_internal" ++ [(set (match_operand:SI 0 "register_operand" "=a,&a") ++ (bswap:SI (match_operand:SI 1 "register_operand" "0,r"))) ++ (clobber (match_scratch:SI 2 "=&a,X"))] ++ "!optimize_debug && optimize > 1 && !optimize_size" ++{ ++ rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn); ++ const char *init = "ssai\t8\;"; ++ static char result[64]; ++ if (prev_insn && NONJUMP_INSN_P (prev_insn)) ++ { ++ rtx x = PATTERN (prev_insn); ++ if (GET_CODE (x) == PARALLEL && XVECLEN (x, 0) == 2 ++ && GET_CODE (XVECEXP (x, 0, 0)) == SET ++ && GET_CODE (XVECEXP (x, 0, 1)) == CLOBBER) ++ { ++ x = XEXP (XVECEXP (x, 0, 0), 1); ++ if (GET_CODE (x) == BSWAP && GET_MODE (x) == SImode) ++ init = ""; ++ } ++ } ++ sprintf (result, ++ (which_alternative == 0) ++ ? "%s" "srli\t%%2, %%1, 16\;src\t%%2, %%2, %%1\;src\t%%2, %%2, %%2\;src\t%%0, %%1, %%2" ++ : "%s" "srli\t%%0, %%1, 16\;src\t%%0, %%0, %%1\;src\t%%0, %%0, %%0\;src\t%%0, %%1, %%0", ++ init); ++ return result; ++} ++ [(set_attr "type" "arith,arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "15,15")]) ++ ++(define_expand "bswapdi2" ++ [(set (match_operand:DI 0 "register_operand" "") ++ (bswap:DI (match_operand:DI 1 "register_operand" "")))] ++ "!optimize_debug && optimize > 1 && optimize_size" ++{ ++ /* Replace with a single DImode library call. ++ Without this, two SImode library calls are emitted. */ ++ emit_library_call_value (optab_libfunc (bswap_optab, DImode), ++ operands[0], LCT_NORMAL, DImode, ++ operands[1], DImode); ++ DONE; ++}) + + + ;; Negation and one's complement. +diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O1.c b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c +new file mode 100644 +index 000000000..a0c885baa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++unsigned int test_0(unsigned int a) ++{ ++ return (a & 0x000000FF) << 24 | ++ (a & 0x0000FF00) << 8 | ++ (a & 0x00FF0000) >> 8 | ++ (a & 0xFF000000) >> 24; ++} ++ ++unsigned int test_1(unsigned int a) ++{ ++ union ++ { ++ unsigned int i; ++ unsigned char a[4]; ++ } u, v; ++ u.i = a; ++ v.a[0] = u.a[3]; ++ v.a[1] = u.a[2]; ++ v.a[2] = u.a[1]; ++ v.a[3] = u.a[0]; ++ return v.i; ++} ++ ++unsigned int test_2(unsigned int a) ++{ ++ return __builtin_bswap32(a); ++} ++ ++unsigned long long test_3(unsigned long long a) ++{ ++ return __builtin_bswap64(a); ++} ++ ++/* { dg-final { scan-assembler-times "call" 2 } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O2.c b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c +new file mode 100644 +index 000000000..4cf95b925 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++unsigned int test_0(unsigned int a) ++{ ++ return (a & 0x000000FF) << 24 | ++ (a & 0x0000FF00) << 8 | ++ (a & 0x00FF0000) >> 8 | ++ (a & 0xFF000000) >> 24; ++} ++ ++unsigned int test_1(unsigned int a) ++{ ++ union ++ { ++ unsigned int i; ++ unsigned char a[4]; ++ } u, v; ++ u.i = a; ++ v.a[0] = u.a[3]; ++ v.a[1] = u.a[2]; ++ v.a[2] = u.a[1]; ++ v.a[3] = u.a[0]; ++ return v.i; ++} ++ ++unsigned int test_2(unsigned int a) ++{ ++ return __builtin_bswap32(a); ++} ++ ++unsigned long long test_3(unsigned long long a) ++{ ++ return __builtin_bswap64(a); ++} ++ ++/* { dg-final { scan-assembler-times "ssai" 4 } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-Os.c b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c +new file mode 100644 +index 000000000..1e010fd62 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Os" } */ ++ ++unsigned int test_0(unsigned int a) ++{ ++ return (a & 0x000000FF) << 24 | ++ (a & 0x0000FF00) << 8 | ++ (a & 0x00FF0000) >> 8 | ++ (a & 0xFF000000) >> 24; ++} ++ ++unsigned int test_1(unsigned int a) ++{ ++ union ++ { ++ unsigned int i; ++ unsigned char a[4]; ++ } u, v; ++ u.i = a; ++ v.a[0] = u.a[3]; ++ v.a[1] = u.a[2]; ++ v.a[2] = u.a[1]; ++ v.a[3] = u.a[0]; ++ return v.i; ++} ++ ++unsigned int test_2(unsigned int a) ++{ ++ return __builtin_bswap32(a); ++} ++ ++unsigned long long test_3(unsigned long long a) ++{ ++ return __builtin_bswap64(a); ++} ++ ++/* { dg-final { scan-assembler-times "call" 4 } } */ +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0015-fix-PR-target-105879.patch b/patches/gcc10.1/gcc-xtensa-0015-fix-PR-target-105879.patch new file mode 100644 index 0000000..2c21f47 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0015-fix-PR-target-105879.patch @@ -0,0 +1,48 @@ +From 1848b547a6ac69a002d068239a5bc9463f3fae25 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Tue, 7 Jun 2022 21:01:01 -0700 +Subject: [PATCH 10/31] gcc: xtensa: fix PR target/105879 + +split_double operates with the 'word that comes first in memory in the +target' terminology, while gen_lowpart operates with the 'value +representing some low-order bits of X' terminology. They are not +equivalent and must be dealt with differently on little- and big-endian +targets. + +gcc/ + PR target/105879 + * config/xtensa/xtensa.md (movdi): Rename 'first' and 'second' + to 'lowpart' and 'highpart' so that they match 'gen_lowpart' and + 'gen_highpart' bitwise semantics and fix order of highpart and + lowpart depending on target endianness. +--- + gcc/config/xtensa/xtensa.md | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index cea280061..30d8ef96c 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -802,11 +802,14 @@ + because of offering further optimization opportunities. */ + if (register_operand (operands[0], DImode)) + { +- rtx first, second; +- +- split_double (operands[1], &first, &second); +- emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), first)); +- emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), second)); ++ rtx lowpart, highpart; ++ ++ if (TARGET_BIG_ENDIAN) ++ split_double (operands[1], &highpart, &lowpart); ++ else ++ split_double (operands[1], &lowpart, &highpart); ++ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), lowpart)); ++ emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), highpart)); + DONE; + } + +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0016-Implement-bswaphi2-insn-pattern.patch b/patches/gcc10.1/gcc-xtensa-0016-Implement-bswaphi2-insn-pattern.patch new file mode 100644 index 0000000..3a31e62 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0016-Implement-bswaphi2-insn-pattern.patch @@ -0,0 +1,39 @@ +From f47a902c9a94d2e9df879de4613dae62c8e9cc4f Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:44:32 +0900 +Subject: [PATCH 11/31] xtensa: Implement bswaphi2 insn pattern + +This patch adds bswaphi2 insn pattern that is one instruction less than the +default expansion. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (bswaphi2): New insn pattern. +--- + gcc/config/xtensa/xtensa.md | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 30d8ef96c..c1f44777d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -471,6 +471,16 @@ + + ;; Byte swap. + ++(define_insn "bswaphi2" ++ [(set (match_operand:HI 0 "register_operand" "=a") ++ (bswap:HI (match_operand:HI 1 "register_operand" "r"))) ++ (clobber (match_scratch:HI 2 "=&a"))] ++ "" ++ "extui\t%2, %1, 8, 8\;slli\t%0, %1, 8\;or\t%0, %0, %2" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "HI") ++ (set_attr "length" "9")]) ++ + (define_expand "bswapsi2" + [(set (match_operand:SI 0 "register_operand" "") + (bswap:SI (match_operand:SI 1 "register_operand" "")))] +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0017-Make-one_cmplsi2-optimizer-friendly.patch b/patches/gcc10.1/gcc-xtensa-0017-Make-one_cmplsi2-optimizer-friendly.patch new file mode 100644 index 0000000..017a30f --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0017-Make-one_cmplsi2-optimizer-friendly.patch @@ -0,0 +1,86 @@ +From 22b5756399ef63a4102334724b12a4c186075227 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:46:16 +0900 +Subject: [PATCH 12/31] xtensa: Make one_cmplsi2 optimizer-friendly + +In Xtensa ISA, there is no single machine instruction that calculates unary +bitwise negation. But a few optimizers assume that bitwise negation can be +done by a single insn. + +As a result, '((x < 0) ? ~x : x)' cannot be optimized to '(x ^ (x >> 31))' +ever before, for example. + +This patch relaxes such limitation, by putting the insn expansion off till +the split pass. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (one_cmplsi2): + Rearrange as an insn_and_split pattern. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/one_cmpl_abs.c: New. +--- + gcc/config/xtensa/xtensa.md | 26 +++++++++++++------ + .../gcc.target/xtensa/one_cmpl_abs.c | 9 +++++++ + 2 files changed, 27 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index c1f44777d..2f6d48d03 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -556,16 +556,26 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +-(define_expand "one_cmplsi2" +- [(set (match_operand:SI 0 "register_operand" "") +- (not:SI (match_operand:SI 1 "register_operand" "")))] ++(define_insn_and_split "one_cmplsi2" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (not:SI (match_operand:SI 1 "register_operand" "r")))] + "" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 2) ++ (const_int -1)) ++ (set (match_dup 0) ++ (xor:SI (match_dup 1) ++ (match_dup 2)))] + { +- rtx temp = gen_reg_rtx (SImode); +- emit_insn (gen_movsi (temp, constm1_rtx)); +- emit_insn (gen_xorsi3 (operands[0], temp, operands[1])); +- DONE; +-}) ++ operands[2] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 5) ++ (const_int 6)))]) + + (define_insn "negsf2" + [(set (match_operand:SF 0 "register_operand" "=f") +diff --git a/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c +new file mode 100644 +index 000000000..608f65fd7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++int one_cmpl_abs(int a) ++{ ++ return a < 0 ? ~a : a; ++} ++ ++/* { dg-final { scan-assembler-not "bgez" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0018-Optimize-x-y-to-x-y-y.patch b/patches/gcc10.1/gcc-xtensa-0018-Optimize-x-y-to-x-y-y.patch new file mode 100644 index 0000000..d1167a1 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0018-Optimize-x-y-to-x-y-y.patch @@ -0,0 +1,71 @@ +From cc259b2801c8d04c39169214041305fdd5b87acd Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:55:44 +0900 +Subject: [PATCH 13/31] xtensa: Optimize '(~x & y)' to '((x & y) ^ y)' + +In Xtensa ISA, there is no single machine instruction that calculates unary +bitwise negation. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*andsi3_bitcmpl): + New insn_and_split pattern. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/check_zero_byte.c: New. +--- + gcc/config/xtensa/xtensa.md | 20 +++++++++++++++++++ + .../gcc.target/xtensa/check_zero_byte.c | 9 +++++++++ + 2 files changed, 29 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/xtensa/check_zero_byte.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2f6d48d03..28ed1d34e 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -601,6 +601,26 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,3")]) + ++(define_insn_and_split "*andsi3_bitcmpl" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (not:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "r")))] ++ "" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 3) ++ (and:SI (match_dup 1) ++ (match_dup 2))) ++ (set (match_dup 0) ++ (xor:SI (match_dup 3) ++ (match_dup 2)))] ++{ ++ operands[3] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ + (define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "=a") + (ior:SI (match_operand:SI 1 "register_operand" "%r") +diff --git a/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c +new file mode 100644 +index 000000000..6a04aaeef +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O" } */ ++ ++int check_zero_byte(int v) ++{ ++ return (v - 0x01010101) & ~v & 0x80808080; ++} ++ ++/* { dg-final { scan-assembler-not "movi" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0019-Add-clrsbsi2-insn-pattern.patch b/patches/gcc10.1/gcc-xtensa-0019-Add-clrsbsi2-insn-pattern.patch new file mode 100644 index 0000000..ebaa985 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0019-Add-clrsbsi2-insn-pattern.patch @@ -0,0 +1,98 @@ +From ebd48d915076589f04b5c1ed50f9f5ddfae088e8 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:57:35 +0900 +Subject: [PATCH 14/31] xtensa: Add clrsbsi2 insn pattern + +> (clrsb:m x) +> Represents the number of redundant leading sign bits in x, represented +> as an integer of mode m, starting at the most significant bit position. + +This explanation is just what the NSA instruction (not ever emitted before) +calculates in Xtensa ISA. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (clrsbsi2): New insn pattern. + +libgcc/ChangeLog: + + * config/xtensa/lib1funcs.S (__clrsbsi2): New function. + * config/xtensa/t-xtensa (LIB1ASMFUNCS): Add _clrsbsi2. +--- + gcc/config/xtensa/xtensa.md | 12 +++++++++++- + libgcc/config/xtensa/lib1funcs.S | 23 +++++++++++++++++++++++ + libgcc/config/xtensa/t-xtensa | 2 +- + 3 files changed, 35 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 28ed1d34e..6c76fb942 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -429,7 +429,17 @@ + (set_attr "length" "3")]) + + +-;; Count leading/trailing zeros and find first bit. ++;; Count redundant leading sign bits and leading/trailing zeros, ++;; and find first bit. ++ ++(define_insn "clrsbsi2" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (clrsb:SI (match_operand:SI 1 "register_operand" "r")))] ++ "TARGET_NSA" ++ "nsa\t%0, %1" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "3")]) + + (define_insn "clzsi2" + [(set (match_operand:SI 0 "register_operand" "=a") +diff --git a/libgcc/config/xtensa/lib1funcs.S b/libgcc/config/xtensa/lib1funcs.S +index b19deae14..ad9072c40 100644 +--- a/libgcc/config/xtensa/lib1funcs.S ++++ b/libgcc/config/xtensa/lib1funcs.S +@@ -456,6 +456,29 @@ __nsau_data: + #endif /* L_clz */ + + ++#ifdef L_clrsbsi2 ++ .align 4 ++ .global __clrsbsi2 ++ .type __clrsbsi2, @function ++__clrsbsi2: ++ leaf_entry sp, 16 ++#if XCHAL_HAVE_NSA ++ nsa a2, a2 ++#else ++ srai a3, a2, 31 ++ xor a3, a3, a2 ++ movi a2, 31 ++ beqz a3, .Lreturn ++ do_nsau a2, a3, a4, a5 ++ addi a2, a2, -1 ++.Lreturn: ++#endif ++ leaf_return ++ .size __clrsbsi2, . - __clrsbsi2 ++ ++#endif /* L_clrsbsi2 */ ++ ++ + #ifdef L_clzsi2 + .align 4 + .global __clzsi2 +diff --git a/libgcc/config/xtensa/t-xtensa b/libgcc/config/xtensa/t-xtensa +index 9836c96ae..084618b38 100644 +--- a/libgcc/config/xtensa/t-xtensa ++++ b/libgcc/config/xtensa/t-xtensa +@@ -1,6 +1,6 @@ + LIB1ASMSRC = xtensa/lib1funcs.S + LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \ +- _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \ ++ _umulsidi3 _clz _clrsbsi2 _clzsi2 _ctzsi2 _ffssi2 \ + _ashldi3 _ashrdi3 _lshrdi3 \ + _bswapsi2 _bswapdi2 \ + _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \ +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0020-Tweak-some-widen-multiplications.patch b/patches/gcc10.1/gcc-xtensa-0020-Tweak-some-widen-multiplications.patch new file mode 100644 index 0000000..8de8a89 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0020-Tweak-some-widen-multiplications.patch @@ -0,0 +1,110 @@ +From 1ba9369255749ccf9ec82565a192b1a523b0e374 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 10 Jun 2022 13:17:40 +0900 +Subject: [PATCH 15/31] xtensa: Tweak some widen multiplications + +umulsidi3 is faster than umuldi3 even if library call, and is also +prerequisite for fast constant division by multiplication. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (mulsidi3, umulsidi3): + Split into individual signedness, in order to use libcall + "__umulsidi3" but not the other. + (mulhisi3): Merge into one by using code iterator. + (mulsidi3, mulhisi3, umulhisi3): Remove. +--- + gcc/config/xtensa/xtensa.md | 56 +++++++++++++++++++++---------------- + 1 file changed, 32 insertions(+), 24 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 6c76fb942..3314b3fd6 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -224,20 +224,42 @@ + + ;; Multiplication. + +-(define_expand "mulsidi3" ++(define_expand "mulsidi3" + [(set (match_operand:DI 0 "register_operand") +- (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand")) +- (any_extend:DI (match_operand:SI 2 "register_operand"))))] ++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand")) ++ (sign_extend:DI (match_operand:SI 2 "register_operand"))))] + "TARGET_MUL32_HIGH" + { + rtx temp = gen_reg_rtx (SImode); + emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); +- emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), +- operands[1], operands[2])); ++ emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), ++ operands[1], operands[2])); + emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); + DONE; + }) + ++(define_expand "umulsidi3" ++ [(set (match_operand:DI 0 "register_operand") ++ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand")) ++ (zero_extend:DI (match_operand:SI 2 "register_operand"))))] ++ "" ++{ ++ if (TARGET_MUL32_HIGH) ++ { ++ rtx temp = gen_reg_rtx (SImode); ++ emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); ++ emit_insn (gen_umulsi3_highpart (gen_highpart (SImode, operands[0]), ++ operands[1], operands[2])); ++ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); ++ } ++ else ++ emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "__umulsidi3"), ++ operands[0], LCT_NORMAL, DImode, ++ operands[1], SImode, ++ operands[2], SImode); ++ DONE; ++}) ++ + (define_insn "mulsi3_highpart" + [(set (match_operand:SI 0 "register_operand" "=a") + (truncate:SI +@@ -261,30 +283,16 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +-(define_insn "mulhisi3" +- [(set (match_operand:SI 0 "register_operand" "=C,A") +- (mult:SI (sign_extend:SI +- (match_operand:HI 1 "register_operand" "%r,r")) +- (sign_extend:SI +- (match_operand:HI 2 "register_operand" "r,r"))))] +- "TARGET_MUL16 || TARGET_MAC16" +- "@ +- mul16s\t%0, %1, %2 +- mul.aa.ll\t%1, %2" +- [(set_attr "type" "mul16,mac16") +- (set_attr "mode" "SI") +- (set_attr "length" "3,3")]) +- +-(define_insn "umulhisi3" ++(define_insn "mulhisi3" + [(set (match_operand:SI 0 "register_operand" "=C,A") +- (mult:SI (zero_extend:SI ++ (mult:SI (any_extend:SI + (match_operand:HI 1 "register_operand" "%r,r")) +- (zero_extend:SI ++ (any_extend:SI + (match_operand:HI 2 "register_operand" "r,r"))))] + "TARGET_MUL16 || TARGET_MAC16" + "@ +- mul16u\t%0, %1, %2 +- umul.aa.ll\t%1, %2" ++ mul16\t%0, %1, %2 ++ mul.aa.ll\t%1, %2" + [(set_attr "type" "mul16,mac16") + (set_attr "mode" "SI") + (set_attr "length" "3,3")]) +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0021-Consider-the-Loop-Option-when-setmemsi-is-exp.patch b/patches/gcc10.1/gcc-xtensa-0021-Consider-the-Loop-Option-when-setmemsi-is-exp.patch new file mode 100644 index 0000000..491da47 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0021-Consider-the-Loop-Option-when-setmemsi-is-exp.patch @@ -0,0 +1,125 @@ +From bc108c84544d5a0e6289628e8749a92c9695f006 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 10 Jun 2022 13:18:24 +0900 +Subject: [PATCH 16/31] xtensa: Consider the Loop Option when setmemsi is + expanded to small loop + +Now apply to almost any size of aligned block under such circumstances. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_expand_block_set_small_loop): + Pass through the block length / loop count conditions if + zero-overhead looping is configured and active, +--- + gcc/config/xtensa/xtensa.c | 71 +++++++++++++++++++++++++++----------- + 1 file changed, 50 insertions(+), 21 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index a6d76a953..e2f97b79c 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1473,7 +1473,7 @@ xtensa_expand_block_set_unrolled_loop (rtx *operands) + int + xtensa_expand_block_set_small_loop (rtx *operands) + { +- HOST_WIDE_INT bytes, value, align; ++ HOST_WIDE_INT bytes, value, align, count; + int expand_len, funccall_len; + rtx x, dst, end, reg; + machine_mode unit_mode; +@@ -1493,17 +1493,25 @@ xtensa_expand_block_set_small_loop (rtx *operands) + /* Totally-aligned block only. */ + if (bytes % align != 0) + return 0; ++ count = bytes / align; + +- /* If 4-byte aligned, small loop substitution is almost optimal, thus +- limited to only offset to the end address for ADDI/ADDMI instruction. */ +- if (align == 4 +- && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) +- return 0; ++ /* If the Loop Option (zero-overhead looping) is configured and active, ++ almost no restrictions about the length of the block. */ ++ if (! (TARGET_LOOPS && optimize)) ++ { ++ /* If 4-byte aligned, small loop substitution is almost optimal, ++ thus limited to only offset to the end address for ADDI/ADDMI ++ instruction. */ ++ if (align == 4 ++ && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) ++ return 0; + +- /* If no 4-byte aligned, loop count should be treated as the constraint. */ +- if (align != 4 +- && bytes / align > ((optimize > 1 && !optimize_size) ? 8 : 15)) +- return 0; ++ /* If no 4-byte aligned, loop count should be treated as the ++ constraint. */ ++ if (align != 4 ++ && count > ((optimize > 1 && !optimize_size) ? 8 : 15)) ++ return 0; ++ } + + /* Insn expansion: holding the init value. + Either MOV(.N) or L32R w/litpool. */ +@@ -1513,16 +1521,33 @@ xtensa_expand_block_set_small_loop (rtx *operands) + expand_len = TARGET_DENSITY ? 2 : 3; + else + expand_len = 3 + 4; +- /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ +- expand_len += bytes > 127 ? 3 +- : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; +- +- /* Insn expansion: the loop body and branch instruction. +- For store, one of S8I, S16I or S32I(.N). +- For advance, ADDI(.N). +- For branch, BNE. */ +- expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) +- + (TARGET_DENSITY ? 2 : 3) + 3; ++ if (TARGET_LOOPS && optimize) /* zero-overhead looping */ ++ { ++ /* Insn translation: Either MOV(.N) or L32R w/litpool for the ++ loop count. */ ++ expand_len += xtensa_simm12b (count) ? xtensa_sizeof_MOVI (count) ++ : 3 + 4; ++ /* Insn translation: LOOP, the zero-overhead looping setup ++ instruction. */ ++ expand_len += 3; ++ /* Insn expansion: the loop body instructions. ++ For store, one of S8I, S16I or S32I(.N). ++ For advance, ADDI(.N). */ ++ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) ++ + (TARGET_DENSITY ? 2 : 3); ++ } ++ else /* NO zero-overhead looping */ ++ { ++ /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ ++ expand_len += bytes > 127 ? 3 ++ : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; ++ /* Insn expansion: the loop body and branch instruction. ++ For store, one of S8I, S16I or S32I(.N). ++ For advance, ADDI(.N). ++ For branch, BNE. */ ++ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) ++ + (TARGET_DENSITY ? 2 : 3) + 3; ++ } + + /* Function call: preparing two arguments. */ + funccall_len = xtensa_sizeof_MOVI (value); +@@ -1545,7 +1570,11 @@ xtensa_expand_block_set_small_loop (rtx *operands) + dst = gen_reg_rtx (SImode); + emit_move_insn (dst, x); + end = gen_reg_rtx (SImode); +- emit_insn (gen_addsi3 (end, dst, operands[1] /* the length */)); ++ if (TARGET_LOOPS && optimize) ++ x = force_reg (SImode, operands[1] /* the length */); ++ else ++ x = operands[1]; ++ emit_insn (gen_addsi3 (end, dst, x)); + switch (align) + { + case 1: +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0022-Improve-instruction-cost-estimation-and-sugge.patch b/patches/gcc10.1/gcc-xtensa-0022-Improve-instruction-cost-estimation-and-sugge.patch new file mode 100644 index 0000000..5792a6f --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0022-Improve-instruction-cost-estimation-and-sugge.patch @@ -0,0 +1,325 @@ +From de854e2348b8159bc389471e68023986c8878c92 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 10 Jun 2022 13:19:32 +0900 +Subject: [PATCH 17/31] xtensa: Improve instruction cost estimation and + suggestion + +This patch implements a new target-specific relative RTL insn cost function +because of suboptimal cost estimation by default, and fixes several "length" +insn attributes (related to the cost estimation). + +And also introduces a new machine-dependent option "-mextra-l32r-costs=" +that tells implementation-specific InstRAM/ROM access penalty for L32R +instruction to the compiler (in clock-cycle units, 0 by default). + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_rtx_costs): Correct wrong case + for ABS and NEG, add missing case for BSWAP and CLRSB, and + double the costs for integer divisions using libfuncs if + optimizing for speed, in order to take advantage of fast constant + division by multiplication. + (TARGET_INSN_COST): New macro definition. + (xtensa_is_insn_L32R_p, xtensa_insn_cost): New functions for + calculating relative costs of a RTL insns, for both of speed and + size. + * config/xtensa/xtensa.md (return, nop, trap): Correct values of + the attribute "length" that depends on TARGET_DENSITY. + (define_asm_attributes, blockage, frame_blockage): Add missing + attributes. + * config/xtensa/xtensa.opt (-mextra-l32r-costs=): New machine- + dependent option, however, preparatory work for now. +--- + gcc/config/xtensa/xtensa.c | 116 ++++++++++++++++++++++++++++++++--- + gcc/config/xtensa/xtensa.md | 29 ++++++--- + gcc/config/xtensa/xtensa.opt | 4 ++ + 3 files changed, 134 insertions(+), 15 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index e2f97b79c..94ff901c5 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -55,6 +55,7 @@ along with GCC; see the file COPYING3. If not see + #include "dumpfile.h" + #include "hw-doloop.h" + #include "rtl-iter.h" ++#include "insn-attr.h" + + /* This file should be included last. */ + #include "target-def.h" +@@ -134,6 +135,7 @@ static unsigned int xtensa_multibss_section_type_flags (tree, const char *, + static section *xtensa_select_rtx_section (machine_mode, rtx, + unsigned HOST_WIDE_INT); + static bool xtensa_rtx_costs (rtx, machine_mode, int, int, int *, bool); ++static int xtensa_insn_cost (rtx_insn *, bool); + static int xtensa_register_move_cost (machine_mode, reg_class_t, + reg_class_t); + static int xtensa_memory_move_cost (machine_mode, reg_class_t, bool); +@@ -208,6 +210,8 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); + #define TARGET_MEMORY_MOVE_COST xtensa_memory_move_cost + #undef TARGET_RTX_COSTS + #define TARGET_RTX_COSTS xtensa_rtx_costs ++#undef TARGET_INSN_COST ++#define TARGET_INSN_COST xtensa_insn_cost + #undef TARGET_ADDRESS_COST + #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 + +@@ -3972,7 +3976,7 @@ xtensa_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, + static bool + xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + int opno ATTRIBUTE_UNUSED, +- int *total, bool speed ATTRIBUTE_UNUSED) ++ int *total, bool speed) + { + int code = GET_CODE (x); + +@@ -4060,9 +4064,14 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + return true; + + case CLZ: ++ case CLRSB: + *total = COSTS_N_INSNS (TARGET_NSA ? 1 : 50); + return true; + ++ case BSWAP: ++ *total = COSTS_N_INSNS (mode == HImode ? 3 : 5); ++ return true; ++ + case NOT: + *total = COSTS_N_INSNS (mode == DImode ? 3 : 2); + return true; +@@ -4086,13 +4095,16 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + return true; + + case ABS: ++ case NEG: + { + if (mode == SFmode) + *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50); + else if (mode == DFmode) + *total = COSTS_N_INSNS (50); +- else ++ else if (mode == DImode) + *total = COSTS_N_INSNS (4); ++ else ++ *total = COSTS_N_INSNS (1); + return true; + } + +@@ -4108,10 +4120,6 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + return true; + } + +- case NEG: +- *total = COSTS_N_INSNS (mode == DImode ? 4 : 2); +- return true; +- + case MULT: + { + if (mode == SFmode) +@@ -4151,11 +4159,11 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + case UMOD: + { + if (mode == DImode) +- *total = COSTS_N_INSNS (50); ++ *total = COSTS_N_INSNS (speed ? 100 : 50); + else if (TARGET_DIV32) + *total = COSTS_N_INSNS (32); + else +- *total = COSTS_N_INSNS (50); ++ *total = COSTS_N_INSNS (speed ? 100 : 50); + return true; + } + +@@ -4188,6 +4196,98 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + } + } + ++static bool ++xtensa_is_insn_L32R_p(const rtx_insn *insn) ++{ ++ rtx x = PATTERN (insn); ++ ++ if (GET_CODE (x) == SET) ++ { ++ x = XEXP (x, 1); ++ if (GET_CODE (x) == MEM) ++ { ++ x = XEXP (x, 0); ++ return (GET_CODE (x) == SYMBOL_REF || CONST_INT_P (x)) ++ && CONSTANT_POOL_ADDRESS_P (x); ++ } ++ } ++ ++ return false; ++} ++ ++/* Compute a relative costs of RTL insns. This is necessary in order to ++ achieve better RTL insn splitting/combination result. */ ++ ++static int ++xtensa_insn_cost (rtx_insn *insn, bool speed) ++{ ++ if (!(recog_memoized (insn) < 0)) ++ { ++ int len = get_attr_length (insn), n = (len + 2) / 3; ++ ++ if (len == 0) ++ return COSTS_N_INSNS (0); ++ ++ if (speed) /* For speed cost. */ ++ { ++ /* "L32R" may be particular slow (implementation-dependent). */ ++ if (xtensa_is_insn_L32R_p (insn)) ++ return COSTS_N_INSNS (1 + xtensa_extra_l32r_costs); ++ ++ /* Cost based on the pipeline model. */ ++ switch (get_attr_type (insn)) ++ { ++ case TYPE_STORE: ++ case TYPE_MOVE: ++ case TYPE_ARITH: ++ case TYPE_MULTI: ++ case TYPE_NOP: ++ case TYPE_FSTORE: ++ return COSTS_N_INSNS (n); ++ ++ case TYPE_LOAD: ++ return COSTS_N_INSNS (n - 1 + 2); ++ ++ case TYPE_JUMP: ++ case TYPE_CALL: ++ return COSTS_N_INSNS (n - 1 + 3); ++ ++ case TYPE_FCONV: ++ case TYPE_FLOAD: ++ case TYPE_MUL16: ++ case TYPE_MUL32: ++ case TYPE_RSR: ++ return COSTS_N_INSNS (n * 2); ++ ++ case TYPE_FMADD: ++ return COSTS_N_INSNS (n * 4); ++ ++ case TYPE_DIV32: ++ return COSTS_N_INSNS (n * 16); ++ ++ default: ++ break; ++ } ++ } ++ else /* For size cost. */ ++ { ++ /* Cost based on the instruction length. */ ++ if (get_attr_type (insn) != TYPE_UNKNOWN) ++ { ++ /* "L32R" itself plus constant in litpool. */ ++ if (xtensa_is_insn_L32R_p (insn)) ++ return COSTS_N_INSNS (2) + 1; ++ ++ /* Consider ".n" short instructions. */ ++ return COSTS_N_INSNS (n) - (n * 3 - len); ++ } ++ } ++ } ++ ++ /* Fall back. */ ++ return pattern_cost (PATTERN (insn), speed); ++} ++ + /* Worker function for TARGET_RETURN_IN_MEMORY. */ + + static bool +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 3314b3fd6..da6b71d1d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -98,7 +98,10 @@ + + ;; Describe a user's asm statement. + (define_asm_attributes +- [(set_attr "type" "multi")]) ++ [(set_attr "type" "multi") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ;; Should be the maximum possible length ++ ;; of a single machine instruction. + + + ;; Pipeline model. +@@ -1884,7 +1887,10 @@ + } + [(set_attr "type" "jump") + (set_attr "mode" "none") +- (set_attr "length" "2")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 2) ++ (const_int 3)))]) + + + ;; Miscellaneous instructions. +@@ -1939,7 +1945,10 @@ + } + [(set_attr "type" "nop") + (set_attr "mode" "none") +- (set_attr "length" "3")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 2) ++ (const_int 3)))]) + + (define_expand "nonlocal_goto" + [(match_operand:SI 0 "general_operand" "") +@@ -2003,8 +2012,9 @@ + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] + "" + "" +- [(set_attr "length" "0") +- (set_attr "type" "nop")]) ++ [(set_attr "type" "nop") ++ (set_attr "mode" "none") ++ (set_attr "length" "0")]) + + ;; Do not schedule instructions accessing memory before this point. + +@@ -2023,7 +2033,9 @@ + (unspec:BLK [(match_operand:SI 1 "" "")] UNSPEC_FRAME_BLOCKAGE))] + "" + "" +- [(set_attr "length" "0")]) ++ [(set_attr "type" "nop") ++ (set_attr "mode" "none") ++ (set_attr "length" "0")]) + + (define_insn "trap" + [(trap_if (const_int 1) (const_int 0))] +@@ -2036,7 +2048,10 @@ + } + [(set_attr "type" "trap") + (set_attr "mode" "none") +- (set_attr "length" "3")]) ++ (set (attr "length") ++ (if_then_else (match_test "!TARGET_DEBUG && TARGET_DENSITY") ++ (const_int 2) ++ (const_int 3)))]) + + ;; Setting up a frame pointer is tricky for Xtensa because GCC doesn't + ;; know if a frame pointer is required until the reload pass, and +diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt +index e1d992f5d..97aa44f92 100644 +--- a/gcc/config/xtensa/xtensa.opt ++++ b/gcc/config/xtensa/xtensa.opt +@@ -30,6 +30,10 @@ mlongcalls + Target Mask(LONGCALLS) + Use indirect CALLXn instructions for large programs. + ++mextra-l32r-costs= ++Target RejectNegative Joined UInteger Var(xtensa_extra_l32r_costs) Init(0) ++Set extra memory access cost for L32R instruction, in clock-cycle units. ++ + mtarget-align + Target + Automatically align branch targets to reduce branch penalties. +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0023-Improve-constant-synthesis-for-both-integer-a.patch b/patches/gcc10.1/gcc-xtensa-0023-Improve-constant-synthesis-for-both-integer-a.patch new file mode 100644 index 0000000..0e14673 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0023-Improve-constant-synthesis-for-both-integer-a.patch @@ -0,0 +1,400 @@ +From ed2c4b57807470b386e9abdf145282e197d9da65 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sat, 11 Jun 2022 00:26:17 +0900 +Subject: [PATCH 18/31] xtensa: Improve constant synthesis for both integer and + floating-point + +This patch revises the previous implementation of constant synthesis. + +First, changed to use define_split machine description pattern and to run +after reload pass, in order not to interfere some optimizations such as +the loop invariant motion. + +Second, not only integer but floating-point is subject to processing. + +Third, several new synthesis patterns - when the constant cannot fit into +a "MOVI Ax, simm12" instruction, but: + +I. can be represented as a power of two minus one (eg. 32767, 65535 or + 0x7fffffffUL) + => "MOVI(.N) Ax, -1" + "SRLI Ax, Ax, 1 ... 31" (or "EXTUI") +II. is between -34816 and 34559 + => "MOVI(.N) Ax, -2048 ... 2047" + "ADDMI Ax, Ax, -32768 ... 32512" +III. (existing case) can fit into a signed 12-bit if the trailing zero bits + are stripped + => "MOVI(.N) Ax, -2048 ... 2047" + "SLLI Ax, Ax, 1 ... 31" + +The above sequences consist of 5 or 6 bytes and have latency of 2 clock cycles, +in contrast with "L32R Ax, " (3 bytes and one clock latency, but may +suffer additional one clock pipeline stall and implementation-specific +InstRAM/ROM access penalty) plus 4 bytes of constant value. + +In addition, 3-instructions synthesis patterns (8 or 9 bytes, 3 clock latency) +are also provided when optimizing for speed and L32R instruction has +considerable access penalty: + +IV. 2-instructions synthesis (any of I ... III) followed by + "SLLI Ax, Ax, 1 ... 31" +V. 2-instructions synthesis followed by either "ADDX[248] Ax, Ax, Ax" + or "SUBX8 Ax, Ax, Ax" (multiplying by 3, 5, 7 or 9) + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h (xtensa_constantsynth): + New prototype. + * config/xtensa/xtensa.c (xtensa_emit_constantsynth, + xtensa_constantsynth_2insn, xtensa_constantsynth_rtx_SLLI, + xtensa_constantsynth_rtx_ADDSUBX, xtensa_constantsynth): + New backend functions that process the abovementioned logic. + (xtensa_emit_move_sequence): Revert the previous changes. + * config/xtensa/xtensa.md: New split patterns for integer + and floating-point, as the frontend part. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/constsynth_2insns.c: New. + * gcc.target/xtensa/constsynth_3insns.c: Ditto. + * gcc.target/xtensa/constsynth_double.c: Ditto. +--- + gcc/config/xtensa/xtensa-protos.h | 1 + + gcc/config/xtensa/xtensa.c | 133 +++++++++++++++--- + gcc/config/xtensa/xtensa.md | 50 +++++++ + .../gcc.target/xtensa/constsynth_2insns.c | 44 ++++++ + .../gcc.target/xtensa/constsynth_3insns.c | 24 ++++ + .../gcc.target/xtensa/constsynth_double.c | 11 ++ + 6 files changed, 247 insertions(+), 16 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_double.c + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 80b1da2bb..d65bc2954 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -44,6 +44,7 @@ extern int xtensa_expand_block_move (rtx *); + extern int xtensa_expand_block_set_unrolled_loop (rtx *); + extern int xtensa_expand_block_set_small_loop (rtx *); + extern void xtensa_split_operand_pair (rtx *, machine_mode); ++extern int xtensa_constantsynth (rtx, HOST_WIDE_INT); + extern int xtensa_emit_move_sequence (rtx *, machine_mode); + extern rtx xtensa_copy_incoming_a7 (rtx); + extern void xtensa_expand_nonlocal_goto (rtx *); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 94ff901c5..ba36d7244 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1027,6 +1027,123 @@ xtensa_split_operand_pair (rtx operands[4], machine_mode mode) + } + + ++/* Try to emit insns to load srcval (that cannot fit into signed 12-bit) ++ into dst with synthesizing a such constant value from a sequence of ++ load-immediate / arithmetic ones, instead of a L32R instruction ++ (plus a constant in litpool). */ ++ ++static void ++xtensa_emit_constantsynth (rtx dst, enum rtx_code code, ++ HOST_WIDE_INT imm0, HOST_WIDE_INT imm1, ++ rtx (*gen_op)(rtx, HOST_WIDE_INT), ++ HOST_WIDE_INT imm2) ++{ ++ gcc_assert (REG_P (dst)); ++ emit_move_insn (dst, GEN_INT (imm0)); ++ emit_move_insn (dst, gen_rtx_fmt_ee (code, SImode, ++ dst, GEN_INT (imm1))); ++ if (gen_op) ++ emit_move_insn (dst, gen_op (dst, imm2)); ++} ++ ++static int ++xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, ++ rtx (*gen_op)(rtx, HOST_WIDE_INT), ++ HOST_WIDE_INT op_imm) ++{ ++ int shift = exact_log2 (srcval + 1); ++ ++ if (IN_RANGE (shift, 1, 31)) ++ { ++ xtensa_emit_constantsynth (dst, LSHIFTRT, -1, 32 - shift, ++ gen_op, op_imm); ++ return 1; ++ } ++ ++ if (IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) ++ { ++ HOST_WIDE_INT imm0, imm1; ++ ++ if (srcval < -32768) ++ imm1 = -32768; ++ else if (srcval > 32512) ++ imm1 = 32512; ++ else ++ imm1 = srcval & ~255; ++ imm0 = srcval - imm1; ++ if (TARGET_DENSITY && imm1 < 32512 && IN_RANGE (imm0, 224, 255)) ++ imm0 -= 256, imm1 += 256; ++ xtensa_emit_constantsynth (dst, PLUS, imm0, imm1, gen_op, op_imm); ++ return 1; ++ } ++ ++ shift = ctz_hwi (srcval); ++ if (xtensa_simm12b (srcval >> shift)) ++ { ++ xtensa_emit_constantsynth (dst, ASHIFT, srcval >> shift, shift, ++ gen_op, op_imm); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static rtx ++xtensa_constantsynth_rtx_SLLI (rtx reg, HOST_WIDE_INT imm) ++{ ++ return gen_rtx_ASHIFT (SImode, reg, GEN_INT (imm)); ++} ++ ++static rtx ++xtensa_constantsynth_rtx_ADDSUBX (rtx reg, HOST_WIDE_INT imm) ++{ ++ return imm == 7 ++ ? gen_rtx_MINUS (SImode, gen_rtx_ASHIFT (SImode, reg, GEN_INT (3)), ++ reg) ++ : gen_rtx_PLUS (SImode, gen_rtx_ASHIFT (SImode, reg, ++ GEN_INT (floor_log2 (imm - 1))), ++ reg); ++} ++ ++int ++xtensa_constantsynth (rtx dst, HOST_WIDE_INT srcval) ++{ ++ /* No need for synthesizing for what fits into MOVI instruction. */ ++ if (xtensa_simm12b (srcval)) ++ return 0; ++ ++ /* 2-insns substitution. */ ++ if ((optimize_size || (optimize && xtensa_extra_l32r_costs >= 1)) ++ && xtensa_constantsynth_2insn (dst, srcval, NULL, 0)) ++ return 1; ++ ++ /* 3-insns substitution. */ ++ if (optimize > 1 && !optimize_size && xtensa_extra_l32r_costs >= 2) ++ { ++ int shift, divisor; ++ ++ /* 2-insns substitution followed by SLLI. */ ++ shift = ctz_hwi (srcval); ++ if (IN_RANGE (shift, 1, 31) && ++ xtensa_constantsynth_2insn (dst, srcval >> shift, ++ xtensa_constantsynth_rtx_SLLI, ++ shift)) ++ return 1; ++ ++ /* 2-insns substitution followed by ADDX[248] or SUBX8. */ ++ if (TARGET_ADDX) ++ for (divisor = 3; divisor <= 9; divisor += 2) ++ if (srcval % divisor == 0 && ++ xtensa_constantsynth_2insn (dst, srcval / divisor, ++ xtensa_constantsynth_rtx_ADDSUBX, ++ divisor)) ++ return 1; ++ } ++ ++ return 0; ++} ++ ++ + /* Emit insns to move operands[1] into operands[0]. + Return 1 if we have written out everything that needs to be done to + do the move. Otherwise, return 0 and the caller will emit the move +@@ -1064,22 +1181,6 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) + + if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16) + { +- /* Try to emit MOVI + SLLI sequence, that is smaller +- than L32R + literal. */ +- if (optimize_size && mode == SImode && CONST_INT_P (src) +- && register_operand (dst, mode)) +- { +- HOST_WIDE_INT srcval = INTVAL (src); +- int shift = ctz_hwi (srcval); +- +- if (xtensa_simm12b (srcval >> shift)) +- { +- emit_move_insn (dst, GEN_INT (srcval >> shift)); +- emit_insn (gen_ashlsi3_internal (dst, dst, GEN_INT (shift))); +- return 1; +- } +- } +- + src = force_const_mem (SImode, src); + operands[1] = src; + } +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index da6b71d1d..ddc3087fa 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -940,6 +940,19 @@ + (set_attr "mode" "SI") + (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")]) + ++(define_split ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operand:SI 1 "constantpool_operand"))] ++ "! optimize_debug && reload_completed" ++ [(const_int 0)] ++{ ++ rtx x = avoid_constant_pool_reference (operands[1]); ++ if (! CONST_INT_P (x)) ++ FAIL; ++ if (! xtensa_constantsynth (operands[0], INTVAL (x))) ++ emit_move_insn (operands[0], x); ++}) ++ + ;; 16-bit Integer moves + + (define_expand "movhi" +@@ -1144,6 +1157,43 @@ + (set_attr "mode" "SF") + (set_attr "length" "3")]) + ++(define_split ++ [(set (match_operand:SF 0 "register_operand") ++ (match_operand:SF 1 "constantpool_operand"))] ++ "! optimize_debug && reload_completed" ++ [(const_int 0)] ++{ ++ int i = 0; ++ rtx x = XEXP (operands[1], 0); ++ long l[2]; ++ if (GET_CODE (x) == SYMBOL_REF ++ && CONSTANT_POOL_ADDRESS_P (x)) ++ x = get_pool_constant (x); ++ else if (GET_CODE (x) == CONST) ++ { ++ x = XEXP (x, 0); ++ gcc_assert (GET_CODE (x) == PLUS ++ && GET_CODE (XEXP (x, 0)) == SYMBOL_REF ++ && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) ++ && CONST_INT_P (XEXP (x, 1))); ++ i = INTVAL (XEXP (x, 1)); ++ gcc_assert (i == 0 || i == 4); ++ i /= 4; ++ x = get_pool_constant (XEXP (x, 0)); ++ } ++ else ++ gcc_unreachable (); ++ if (GET_MODE (x) == SFmode) ++ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l[0]); ++ else if (GET_MODE (x) == DFmode) ++ REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); ++ else ++ FAIL; ++ x = gen_rtx_REG (SImode, REGNO (operands[0])); ++ if (! xtensa_constantsynth (x, l[i])) ++ emit_move_insn (x, GEN_INT (l[i])); ++}) ++ + ;; 64-bit floating point moves + + (define_expand "movdf" +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c +new file mode 100644 +index 000000000..43c85a250 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c +@@ -0,0 +1,44 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Os" } */ ++ ++int test_0(void) ++{ ++ return 4095; ++} ++ ++int test_1(void) ++{ ++ return 2147483647; ++} ++ ++int test_2(void) ++{ ++ return -34816; ++} ++ ++int test_3(void) ++{ ++ return -2049; ++} ++ ++int test_4(void) ++{ ++ return 2048; ++} ++ ++int test_5(void) ++{ ++ return 34559; ++} ++ ++int test_6(void) ++{ ++ return 43680; ++} ++ ++void test_7(int *p) ++{ ++ *p = -1432354816; ++} ++ ++/* { dg-final { scan-assembler-not "l32r" } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c +new file mode 100644 +index 000000000..f3c4a1c7c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mextra-l32r-costs=3" } */ ++ ++int test_0(void) ++{ ++ return 134217216; ++} ++ ++int test_1(void) ++{ ++ return -27604992; ++} ++ ++int test_2(void) ++{ ++ return -162279; ++} ++ ++void test_3(int *p) ++{ ++ *p = 192437; ++} ++ ++/* { dg-final { scan-assembler-not "l32r" } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +new file mode 100644 +index 000000000..890ca5047 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Os" } */ ++ ++void test(unsigned int count, double array[]) ++{ ++ unsigned int i; ++ for (i = 0; i < count; ++i) ++ array[i] = 1.0; ++} ++ ++/* { dg-final { scan-assembler-not "l32r" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0024-Improve-shift-operations-more.patch b/patches/gcc10.1/gcc-xtensa-0024-Improve-shift-operations-more.patch new file mode 100644 index 0000000..9c44b89 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0024-Improve-shift-operations-more.patch @@ -0,0 +1,383 @@ +From fd3771fcc13b8712c91cec70f4533760f72b54e1 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 01:38:31 +0900 +Subject: [PATCH 19/31] xtensa: Improve shift operations more + +This patch introduces funnel shifter utilization, and rearranges existing +"per-byte shift" insn patterns. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (logical_shift_operator, + xtensa_shift_per_byte_operator): New predicates. + * config/xtensa/xtensa-protos.h (xtensa_shlrd_which_direction): + New prototype. + * config/xtensa/xtensa.c (xtensa_shlrd_which_direction): + New helper function for funnel shift patterns. + * config/xtensa/xtensa.md (ior_op): New code iterator. + (*ashlsi3_1): Replace with new split pattern. + (*shift_per_byte): Unify *ashlsi3_3x, *ashrsi3_3x and *lshrsi3_3x. + (*shift_per_byte_omit_AND_0, *shift_per_byte_omit_AND_1): + New insn-and-split patterns that redirect to *xtensa_shift_per_byte, + in order to omit unnecessary bitwise AND operation. + (*shlrd_reg_, *shlrd_const_, *shlrd_per_byte_, + *shlrd_per_byte__omit_AND): + New insn patterns for funnel shifts. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/funnel_shifter.c: New. +--- + gcc/config/xtensa/predicates.md | 6 + + gcc/config/xtensa/xtensa-protos.h | 1 + + gcc/config/xtensa/xtensa.c | 14 ++ + gcc/config/xtensa/xtensa.md | 213 ++++++++++++++---- + .../gcc.target/xtensa/funnel_shifter.c | 17 ++ + 5 files changed, 213 insertions(+), 38 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/funnel_shifter.c + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index 91b9343a2..e7836f0ec 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -164,9 +164,15 @@ + (define_predicate "boolean_operator" + (match_code "eq,ne")) + ++(define_predicate "logical_shift_operator" ++ (match_code "ashift,lshiftrt")) ++ + (define_predicate "xtensa_cstoresi_operator" + (match_code "eq,ne,gt,ge,lt,le")) + ++(define_predicate "xtensa_shift_per_byte_operator" ++ (match_code "ashift,ashiftrt,lshiftrt")) ++ + (define_predicate "tls_symbol_operand" + (and (match_code "symbol_ref") + (match_test "SYMBOL_REF_TLS_MODEL (op) != 0"))) +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index d65bc2954..32743bc67 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -56,6 +56,7 @@ extern char *xtensa_emit_bit_branch (bool, bool, rtx *); + extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); + extern char *xtensa_emit_call (int, rtx *); + extern bool xtensa_tls_referenced_p (rtx); ++extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); + + #ifdef TREE_CODE + extern void init_cumulative_args (CUMULATIVE_ARGS *, int); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index ba36d7244..473cfaf9d 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -2394,6 +2394,20 @@ xtensa_tls_referenced_p (rtx x) + } + + ++/* Helper function for "*shlrd_..." patterns. */ ++ ++enum rtx_code ++xtensa_shlrd_which_direction (rtx op0, rtx op1) ++{ ++ if (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT) ++ return ASHIFT; /* shld */ ++ if (GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT) ++ return LSHIFTRT; /* shrd */ ++ ++ return UNKNOWN; ++} ++ ++ + /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ + + static bool +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index ddc3087fa..58bba89af 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -83,6 +83,9 @@ + ;; the same template. + (define_mode_iterator HQI [HI QI]) + ++;; This code iterator is for *shlrd and its variants. ++(define_code_iterator ior_op [ior plus]) ++ + + ;; Attributes. + +@@ -1272,16 +1275,6 @@ + operands[1] = xtensa_copy_incoming_a7 (operands[1]); + }) + +-(define_insn "*ashlsi3_1" +- [(set (match_operand:SI 0 "register_operand" "=a") +- (ashift:SI (match_operand:SI 1 "register_operand" "r") +- (const_int 1)))] +- "TARGET_DENSITY" +- "add.n\t%0, %1, %1" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "2")]) +- + (define_insn "ashlsi3_internal" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (ashift:SI (match_operand:SI 1 "register_operand" "r,r") +@@ -1294,16 +1287,14 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,6")]) + +-(define_insn "*ashlsi3_3x" +- [(set (match_operand:SI 0 "register_operand" "=a") +- (ashift:SI (match_operand:SI 1 "register_operand" "r") +- (ashift:SI (match_operand:SI 2 "register_operand" "r") +- (const_int 3))))] +- "" +- "ssa8b\t%2\;sll\t%0, %1" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "6")]) ++(define_split ++ [(set (match_operand:SI 0 "register_operand") ++ (ashift:SI (match_operand:SI 1 "register_operand") ++ (const_int 1)))] ++ "TARGET_DENSITY" ++ [(set (match_dup 0) ++ (plus:SI (match_dup 1) ++ (match_dup 1)))]) + + (define_insn "ashrsi3" + [(set (match_operand:SI 0 "register_operand" "=a,a") +@@ -1317,17 +1308,6 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,6")]) + +-(define_insn "*ashrsi3_3x" +- [(set (match_operand:SI 0 "register_operand" "=a") +- (ashiftrt:SI (match_operand:SI 1 "register_operand" "r") +- (ashift:SI (match_operand:SI 2 "register_operand" "r") +- (const_int 3))))] +- "" +- "ssa8l\t%2\;sra\t%0, %1" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "6")]) +- + (define_insn "lshrsi3" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r") +@@ -1337,9 +1317,9 @@ + if (which_alternative == 0) + { + if ((INTVAL (operands[2]) & 0x1f) < 16) +- return "srli\t%0, %1, %R2"; ++ return "srli\t%0, %1, %R2"; + else +- return "extui\t%0, %1, %R2, %L2"; ++ return "extui\t%0, %1, %R2, %L2"; + } + return "ssr\t%2\;srl\t%0, %1"; + } +@@ -1347,13 +1327,170 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,6")]) + +-(define_insn "*lshrsi3_3x" ++(define_insn "*shift_per_byte" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (match_operator:SI 3 "xtensa_shift_per_byte_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3))]))] ++ "!optimize_debug && optimize" ++{ ++ switch (GET_CODE (operands[3])) ++ { ++ case ASHIFT: return "ssa8b\t%2\;sll\t%0, %1"; ++ case ASHIFTRT: return "ssa8l\t%2\;sra\t%0, %1"; ++ case LSHIFTRT: return "ssa8l\t%2\;srl\t%0, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*shift_per_byte_omit_AND_0" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (match_operator:SI 4 "xtensa_shift_per_byte_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3)) ++ (match_operand:SI 3 "const_int_operand" "i"))]))] ++ "!optimize_debug && optimize ++ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (match_op_dup 4 ++ [(match_dup 1) ++ (ashift:SI (match_dup 2) ++ (const_int 3))]))] ++ "" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*shift_per_byte_omit_AND_1" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (match_operator:SI 4 "xtensa_shift_per_byte_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (neg:SI (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3)) ++ (match_operand:SI 3 "const_int_operand" "i")))]))] ++ "!optimize_debug && optimize ++ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 5) ++ (neg:SI (match_dup 2))) ++ (set (match_dup 0) ++ (match_op_dup 4 ++ [(match_dup 1) ++ (ashift:SI (match_dup 5) ++ (const_int 3))]))] ++{ ++ operands[5] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "9")]) ++ ++(define_insn "*shlrd_reg_" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")]) ++ (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 3 "register_operand" "r") ++ (neg:SI (match_dup 2))])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" ++{ ++ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) ++ { ++ case ASHIFT: return "ssl\t%2\;src\t%0, %1, %3"; ++ case LSHIFTRT: return "ssr\t%2\;src\t%0, %3, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn "*shlrd_const_" + [(set (match_operand:SI 0 "register_operand" "=a") +- (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") +- (ashift:SI (match_operand:SI 2 "register_operand" "r") +- (const_int 3))))] ++ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 3 "const_int_operand" "i")]) ++ (match_operator:SI 6 "logical_shift_operator" ++ [(match_operand:SI 2 "register_operand" "r") ++ (match_operand:SI 4 "const_int_operand" "i")])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN ++ && IN_RANGE (INTVAL (operands[3]), 1, 31) ++ && IN_RANGE (INTVAL (operands[4]), 1, 31) ++ && INTVAL (operands[3]) + INTVAL (operands[4]) == 32" ++{ ++ switch (xtensa_shlrd_which_direction (operands[5], operands[6])) ++ { ++ case ASHIFT: return "ssai\t%L3\;src\t%0, %1, %2"; ++ case LSHIFTRT: return "ssai\t%R3\;src\t%0, %2, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn "*shlrd_per_byte_" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3))]) ++ (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 3 "register_operand" "r") ++ (neg:SI (ashift:SI (match_dup 2) ++ (const_int 3)))])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" ++{ ++ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) ++ { ++ case ASHIFT: return "ssa8b\t%2\;src\t%0, %1, %3"; ++ case LSHIFTRT: return "ssa8l\t%2\;src\t%0, %3, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*shlrd_per_byte__omit_AND" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3)) ++ (match_operand:SI 4 "const_int_operand" "i"))]) ++ (match_operator:SI 6 "logical_shift_operator" ++ [(match_operand:SI 3 "register_operand" "r") ++ (neg:SI (and:SI (ashift:SI (match_dup 2) ++ (const_int 3)) ++ (match_dup 4)))])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN ++ && (INTVAL (operands[4]) & 0x1f) == 3 << 3" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (ior_op:SI (match_op_dup 5 ++ [(match_dup 1) ++ (ashift:SI (match_dup 2) ++ (const_int 3))]) ++ (match_op_dup 6 ++ [(match_dup 3) ++ (neg:SI (ashift:SI (match_dup 2) ++ (const_int 3)))])))] + "" +- "ssa8l\t%2\;srl\t%0, %1" + [(set_attr "type" "arith") + (set_attr "mode" "SI") + (set_attr "length" "6")]) +diff --git a/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c +new file mode 100644 +index 000000000..c8f987ccd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++unsigned int test_0(const void *addr) ++{ ++ unsigned int n = (unsigned int)addr; ++ const unsigned int *a = (const unsigned int*)(n & ~3); ++ n = (n & 3) * 8; ++ return (a[0] >> n) | (a[1] << (32 - n)); ++} ++ ++unsigned int test_1(unsigned int a, unsigned int b) ++{ ++ return (a >> 16) + (b << 16); ++} ++ ++/* { dg-final { scan-assembler-times "src" 2 } } */ +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0025-Simplify-conditional-branch-move-insn-pattern.patch b/patches/gcc10.1/gcc-xtensa-0025-Simplify-conditional-branch-move-insn-pattern.patch new file mode 100644 index 0000000..cdb96ff --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0025-Simplify-conditional-branch-move-insn-pattern.patch @@ -0,0 +1,427 @@ +From 0690bcdd42d0aa6671f9ec3ccbbe70faa04ffb6b Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Mon, 31 Jan 2022 09:56:21 +0900 +Subject: [PATCH 20/31] xtensa: Simplify conditional branch/move insn patterns + +No need to describe the "false side" conditional insn patterns anymore. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h (xtensa_emit_branch): + Remove the first argument. + (xtensa_emit_bit_branch): Remove it because now called only from the + output statement of *bittrue insn pattern. + * config/xtensa/xtensa.c (gen_int_relational): Remove the last + argument 'p_invert', and make so that the condition is reversed by + itself as needed. + (xtensa_expand_conditional_branch): Share the common path, and remove + condition inversion code. + (xtensa_emit_branch, xtensa_emit_movcc): Simplify by removing the + "false side" pattern. + (xtensa_emit_bit_branch): Remove it because of the abovementioned + reason, and move the function body to *bittrue insn pattern. + * config/xtensa/xtensa.md (*bittrue): Transplant the output + statement from removed xtensa_emit_bit_branch(). + (*bfalse, *ubfalse, *bitfalse, *maskfalse): Remove the "false side" + insn patterns. +--- + gcc/config/xtensa/xtensa-protos.h | 3 +- + gcc/config/xtensa/xtensa.c | 111 ++++++++++------------------ + gcc/config/xtensa/xtensa.md | 117 ++++++++---------------------- + 3 files changed, 70 insertions(+), 161 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 32743bc67..e4b2d2f06 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -51,8 +51,7 @@ extern void xtensa_expand_nonlocal_goto (rtx *); + extern void xtensa_expand_compare_and_swap (rtx, rtx, rtx, rtx); + extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool); + extern void xtensa_emit_loop_end (rtx_insn *, rtx *); +-extern char *xtensa_emit_branch (bool, bool, rtx *); +-extern char *xtensa_emit_bit_branch (bool, bool, rtx *); ++extern char *xtensa_emit_branch (bool, rtx *); + extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); + extern char *xtensa_emit_call (int, rtx *); + extern bool xtensa_tls_referenced_p (rtx); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 473cfaf9d..8deae3d51 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -118,7 +118,7 @@ const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] = + + static void xtensa_option_override (void); + static enum internal_test map_test_to_internal_test (enum rtx_code); +-static rtx gen_int_relational (enum rtx_code, rtx, rtx, int *); ++static rtx gen_int_relational (enum rtx_code, rtx, rtx); + static rtx gen_float_relational (enum rtx_code, rtx, rtx); + static rtx gen_conditional_move (enum rtx_code, machine_mode, rtx, rtx); + static rtx fixup_subreg_mem (rtx); +@@ -670,8 +670,7 @@ map_test_to_internal_test (enum rtx_code test_code) + static rtx + gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + rtx cmp0, /* first operand to compare */ +- rtx cmp1, /* second operand to compare */ +- int *p_invert /* whether branch needs to reverse test */) ++ rtx cmp1 /* second operand to compare */) + { + struct cmp_info + { +@@ -703,6 +702,7 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + enum internal_test test; + machine_mode mode; + struct cmp_info *p_info; ++ int invert; + + test = map_test_to_internal_test (test_code); + gcc_assert (test != ITEST_MAX); +@@ -739,9 +739,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + } + + /* See if we need to invert the result. */ +- *p_invert = ((GET_CODE (cmp1) == CONST_INT) +- ? p_info->invert_const +- : p_info->invert_reg); ++ invert = ((GET_CODE (cmp1) == CONST_INT) ++ ? p_info->invert_const ++ : p_info->invert_reg); + + /* Comparison to constants, may involve adding 1 to change a LT into LE. + Comparison between two registers, may involve switching operands. */ +@@ -758,7 +758,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + cmp1 = temp; + } + +- return gen_rtx_fmt_ee (p_info->test_code, VOIDmode, cmp0, cmp1); ++ return gen_rtx_fmt_ee (invert ? reverse_condition (p_info->test_code) ++ : p_info->test_code, ++ VOIDmode, cmp0, cmp1); + } + + +@@ -817,45 +819,33 @@ xtensa_expand_conditional_branch (rtx *operands, machine_mode mode) + enum rtx_code test_code = GET_CODE (operands[0]); + rtx cmp0 = operands[1]; + rtx cmp1 = operands[2]; +- rtx cmp; +- int invert; +- rtx label1, label2; ++ rtx cmp, label; + + switch (mode) + { ++ case E_SFmode: ++ if (TARGET_HARD_FLOAT) ++ { ++ cmp = gen_float_relational (test_code, cmp0, cmp1); ++ break; ++ } ++ /* FALLTHRU */ ++ + case E_DFmode: + default: + fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1)); + + case E_SImode: +- invert = FALSE; +- cmp = gen_int_relational (test_code, cmp0, cmp1, &invert); +- break; +- +- case E_SFmode: +- if (!TARGET_HARD_FLOAT) +- fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, +- cmp0, cmp1)); +- invert = FALSE; +- cmp = gen_float_relational (test_code, cmp0, cmp1); ++ cmp = gen_int_relational (test_code, cmp0, cmp1); + break; + } + + /* Generate the branch. */ +- +- label1 = gen_rtx_LABEL_REF (VOIDmode, operands[3]); +- label2 = pc_rtx; +- +- if (invert) +- { +- label2 = label1; +- label1 = pc_rtx; +- } +- ++ label = gen_rtx_LABEL_REF (VOIDmode, operands[3]); + emit_jump_insn (gen_rtx_SET (pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, cmp, +- label1, +- label2))); ++ label, ++ pc_rtx))); + } + + +@@ -2058,21 +2048,20 @@ xtensa_emit_loop_end (rtx_insn *insn, rtx *operands) + + + char * +-xtensa_emit_branch (bool inverted, bool immed, rtx *operands) ++xtensa_emit_branch (bool immed, rtx *operands) + { + static char result[64]; +- enum rtx_code code; ++ enum rtx_code code = GET_CODE (operands[3]); + const char *op; + +- code = GET_CODE (operands[3]); + switch (code) + { +- case EQ: op = inverted ? "ne" : "eq"; break; +- case NE: op = inverted ? "eq" : "ne"; break; +- case LT: op = inverted ? "ge" : "lt"; break; +- case GE: op = inverted ? "lt" : "ge"; break; +- case LTU: op = inverted ? "geu" : "ltu"; break; +- case GEU: op = inverted ? "ltu" : "geu"; break; ++ case EQ: op = "eq"; break; ++ case NE: op = "ne"; break; ++ case LT: op = "lt"; break; ++ case GE: op = "ge"; break; ++ case LTU: op = "ltu"; break; ++ case GEU: op = "geu"; break; + default: gcc_unreachable (); + } + +@@ -2091,32 +2080,6 @@ xtensa_emit_branch (bool inverted, bool immed, rtx *operands) + } + + +-char * +-xtensa_emit_bit_branch (bool inverted, bool immed, rtx *operands) +-{ +- static char result[64]; +- const char *op; +- +- switch (GET_CODE (operands[3])) +- { +- case EQ: op = inverted ? "bs" : "bc"; break; +- case NE: op = inverted ? "bc" : "bs"; break; +- default: gcc_unreachable (); +- } +- +- if (immed) +- { +- unsigned bitnum = INTVAL (operands[1]) & 0x1f; +- operands[1] = GEN_INT (bitnum); +- sprintf (result, "b%si\t%%0, %%d1, %%2", op); +- } +- else +- sprintf (result, "b%s\t%%0, %%1, %%2", op); +- +- return result; +-} +- +- + char * + xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + { +@@ -2125,12 +2088,14 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + const char *op; + + code = GET_CODE (operands[4]); ++ if (inverted) ++ code = reverse_condition (code); + if (isbool) + { + switch (code) + { +- case EQ: op = inverted ? "t" : "f"; break; +- case NE: op = inverted ? "f" : "t"; break; ++ case EQ: op = "f"; break; ++ case NE: op = "t"; break; + default: gcc_unreachable (); + } + } +@@ -2138,10 +2103,10 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + { + switch (code) + { +- case EQ: op = inverted ? "nez" : "eqz"; break; +- case NE: op = inverted ? "eqz" : "nez"; break; +- case LT: op = inverted ? "gez" : "ltz"; break; +- case GE: op = inverted ? "ltz" : "gez"; break; ++ case EQ: op = "eqz"; break; ++ case NE: op = "nez"; break; ++ case LT: op = "ltz"; break; ++ case GE: op = "gez"; break; + default: gcc_unreachable (); + } + } +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 58bba89af..40000859d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1551,28 +1551,13 @@ + (define_insn "*btrue" + [(set (pc) + (if_then_else (match_operator 3 "branch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "branch_operand" "K,r")]) ++ [(match_operand:SI 0 "register_operand" "r,r") ++ (match_operand:SI 1 "branch_operand" "K,r")]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { +- return xtensa_emit_branch (false, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump,jump") +- (set_attr "mode" "none") +- (set_attr "length" "3,3")]) +- +-(define_insn "*bfalse" +- [(set (pc) +- (if_then_else (match_operator 3 "branch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "branch_operand" "K,r")]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] +- "" +-{ +- return xtensa_emit_branch (true, which_alternative == 0, operands); ++ return xtensa_emit_branch (which_alternative == 0, operands); + } + [(set_attr "type" "jump,jump") + (set_attr "mode" "none") +@@ -1581,28 +1566,13 @@ + (define_insn "*ubtrue" + [(set (pc) + (if_then_else (match_operator 3 "ubranch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "ubranch_operand" "L,r")]) ++ [(match_operand:SI 0 "register_operand" "r,r") ++ (match_operand:SI 1 "ubranch_operand" "L,r")]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { +- return xtensa_emit_branch (false, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump,jump") +- (set_attr "mode" "none") +- (set_attr "length" "3,3")]) +- +-(define_insn "*ubfalse" +- [(set (pc) +- (if_then_else (match_operator 3 "ubranch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "ubranch_operand" "L,r")]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] +- "" +-{ +- return xtensa_emit_branch (true, which_alternative == 0, operands); ++ return xtensa_emit_branch (which_alternative == 0, operands); + } + [(set_attr "type" "jump,jump") + (set_attr "mode" "none") +@@ -1613,75 +1583,50 @@ + (define_insn "*bittrue" + [(set (pc) + (if_then_else (match_operator 3 "boolean_operator" +- [(zero_extract:SI +- (match_operand:SI 0 "register_operand" "r,r") +- (const_int 1) +- (match_operand:SI 1 "arith_operand" "J,r")) +- (const_int 0)]) +- (label_ref (match_operand 2 "" "")) +- (pc)))] +- "" +-{ +- return xtensa_emit_bit_branch (false, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump") +- (set_attr "mode" "none") +- (set_attr "length" "3")]) +- +-(define_insn "*bitfalse" +- [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" +- [(zero_extract:SI +- (match_operand:SI 0 "register_operand" "r,r") +- (const_int 1) +- (match_operand:SI 1 "arith_operand" "J,r")) ++ [(zero_extract:SI (match_operand:SI 0 "register_operand" "r,r") ++ (const_int 1) ++ (match_operand:SI 1 "arith_operand" "J,r")) + (const_int 0)]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] +- "" +-{ +- return xtensa_emit_bit_branch (true, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump") +- (set_attr "mode" "none") +- (set_attr "length" "3")]) +- +-(define_insn "*masktrue" +- [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" +- [(and:SI (match_operand:SI 0 "register_operand" "r") +- (match_operand:SI 1 "register_operand" "r")) +- (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { ++ static char result[64]; ++ char op; + switch (GET_CODE (operands[3])) + { +- case EQ: return "bnone\t%0, %1, %2"; +- case NE: return "bany\t%0, %1, %2"; +- default: gcc_unreachable (); ++ case EQ: op = 'c'; break; ++ case NE: op = 's'; break; ++ default: gcc_unreachable (); + } ++ if (which_alternative == 0) ++ { ++ operands[1] = GEN_INT (INTVAL (operands[1]) & 0x1f); ++ sprintf (result, "bb%ci\t%%0, %%d1, %%2", op); ++ } ++ else ++ sprintf (result, "bb%c\t%%0, %%1, %%2", op); ++ return result; + } + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set_attr "length" "3")]) + +-(define_insn "*maskfalse" ++(define_insn "*masktrue" + [(set (pc) + (if_then_else (match_operator 3 "boolean_operator" +- [(and:SI (match_operand:SI 0 "register_operand" "r") +- (match_operand:SI 1 "register_operand" "r")) +- (const_int 0)]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "register_operand" "r")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] + "" + { + switch (GET_CODE (operands[3])) + { +- case EQ: return "bany\t%0, %1, %2"; +- case NE: return "bnone\t%0, %1, %2"; +- default: gcc_unreachable (); ++ case EQ: return "bnone\t%0, %1, %2"; ++ case NE: return "bany\t%0, %1, %2"; ++ default: gcc_unreachable (); + } + } + [(set_attr "type" "jump") +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0026-Make-use-of-BALL-BNALL-instructions.patch b/patches/gcc10.1/gcc-xtensa-0026-Make-use-of-BALL-BNALL-instructions.patch new file mode 100644 index 0000000..e1d2790 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0026-Make-use-of-BALL-BNALL-instructions.patch @@ -0,0 +1,101 @@ +From a7cf439409089eab17341a1a24fb9be2b967ca7c Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 27 May 2021 19:04:12 +0900 +Subject: [PATCH 21/31] xtensa: Make use of BALL/BNALL instructions + +In Xtensa ISA, there is no single machine instruction that calculates unary +bitwise negation, but a few similar fused instructions are exist: + + "BALL Ax, Ay, label" // if ((~Ax & Ay) == 0) goto label; + "BNALL Ax, Ay, label" // if ((~Ax & Ay) != 0) goto label; + +These instructions have never been emitted before, but it seems no reason not +to make use of them. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*masktrue_bitcmpl): New insn pattern. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/BALL-BNALL.c: New. +--- + gcc/config/xtensa/xtensa.md | 21 +++++++++++++ + gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c | 33 ++++++++++++++++++++ + 2 files changed, 54 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 40000859d..b34b2afb6 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1633,6 +1633,27 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_insn "*masktrue_bitcmpl" ++ [(set (pc) ++ (if_then_else (match_operator 3 "boolean_operator" ++ [(and:SI (not:SI (match_operand:SI 0 "register_operand" "r")) ++ (match_operand:SI 1 "register_operand" "r")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] ++ "" ++{ ++ switch (GET_CODE (operands[3])) ++ { ++ case EQ: return "ball\t%0, %1, %2"; ++ case NE: return "bnall\t%0, %1, %2"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ++ + + ;; Zero-overhead looping support. + +diff --git a/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c +new file mode 100644 +index 000000000..ba61c6f37 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c +@@ -0,0 +1,33 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O" } */ ++ ++extern void foo(void); ++ ++void BNONE_test(int a, int b) ++{ ++ if (a & b) ++ foo(); ++} ++ ++void BANY_test(int a, int b) ++{ ++ if (!(a & b)) ++ foo(); ++} ++ ++void BALL_test(int a, int b) ++{ ++ if (~a & b) ++ foo(); ++} ++ ++void BNALL_test(int a, int b) ++{ ++ if (!(~a & b)) ++ foo(); ++} ++ ++/* { dg-final { scan-assembler-times "bnone" 1 } } */ ++/* { dg-final { scan-assembler-times "bany" 1 } } */ ++/* { dg-final { scan-assembler-times "ball" 1 } } */ ++/* { dg-final { scan-assembler-times "bnall" 1 } } */ +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0027-Optimize-bitwise-AND-operation-with-some-spec.patch b/patches/gcc10.1/gcc-xtensa-0027-Optimize-bitwise-AND-operation-with-some-spec.patch new file mode 100644 index 0000000..b13350f --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0027-Optimize-bitwise-AND-operation-with-some-spec.patch @@ -0,0 +1,252 @@ +From 43c7f8333028ff03d8a4681ab62de2febcc43f5c Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 01:28:43 +0900 +Subject: [PATCH 22/31] xtensa: Optimize bitwise AND operation with some + specific forms of constants + +This patch offers several insn-and-split patterns for bitwise AND with +register and constant that can be represented as: + +i. 1's least significant N bits and the others 0's (17 <= N <= 31) +ii. 1's most significant N bits and the others 0's (12 <= N <= 31) +iii. M 1's sequence of bits and trailing N 0's bits, that cannot fit into a + "MOVI Ax, simm12" instruction (1 <= M <= 16, 1 <= N <= 30) + +And also offers shortcuts for conditional branch if each of the abovementioned +operations is (not) equal to zero. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (shifted_mask_operand): + New predicate. + * config/xtensa/xtensa.md (*andsi3_const_pow2_minus_one): + New insn-and-split pattern. + (*andsi3_const_negative_pow2, *andsi3_const_shifted_mask, + *masktrue_const_pow2_minus_one, *masktrue_const_negative_pow2, + *masktrue_const_shifted_mask): Ditto. +--- + gcc/config/xtensa/predicates.md | 10 ++ + gcc/config/xtensa/xtensa.md | 179 ++++++++++++++++++++++++++++++++ + 2 files changed, 189 insertions(+) + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index e7836f0ec..367fc17f3 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -52,6 +52,16 @@ + (match_test "xtensa_mask_immediate (INTVAL (op))")) + (match_operand 0 "register_operand"))) + ++(define_predicate "shifted_mask_operand" ++ (match_code "const_int") ++{ ++ HOST_WIDE_INT mask = INTVAL (op); ++ int shift = ctz_hwi (mask); ++ ++ return IN_RANGE (shift, 1, 31) ++ && xtensa_mask_immediate ((uint32_t)mask >> shift); ++}) ++ + (define_predicate "extui_fldsz_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 1, 16)"))) +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index b34b2afb6..355fb7742 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -645,6 +645,83 @@ + (set_attr "mode" "SI") + (set_attr "length" "6")]) + ++(define_insn_and_split "*andsi3_const_pow2_minus_one" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "const_int_operand" "i")))] ++ "IN_RANGE (exact_log2 (INTVAL (operands[2]) + 1), 17, 31)" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (ashift:SI (match_dup 1) ++ (match_dup 2))) ++ (set (match_dup 0) ++ (lshiftrt:SI (match_dup 0) ++ (match_dup 2)))] ++{ ++ operands[2] = GEN_INT (32 - floor_log2 (INTVAL (operands[2]) + 1)); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && INTVAL (operands[2]) == 0x7FFFFFFF") ++ (const_int 5) ++ (const_int 6)))]) ++ ++(define_insn_and_split "*andsi3_const_negative_pow2" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "const_int_operand" "i")))] ++ "IN_RANGE (exact_log2 (-INTVAL (operands[2])), 12, 31)" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (lshiftrt:SI (match_dup 1) ++ (match_dup 2))) ++ (set (match_dup 0) ++ (ashift:SI (match_dup 0) ++ (match_dup 2)))] ++{ ++ operands[2] = GEN_INT (floor_log2 (-INTVAL (operands[2]))); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*andsi3_const_shifted_mask" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "shifted_mask_operand" "i")))] ++ "! xtensa_simm12b (INTVAL (operands[2]))" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (zero_extract:SI (match_dup 1) ++ (match_dup 3) ++ (match_dup 4))) ++ (set (match_dup 0) ++ (ashift:SI (match_dup 0) ++ (match_dup 2)))] ++{ ++ HOST_WIDE_INT mask = INTVAL (operands[2]); ++ int shift = ctz_hwi (mask); ++ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); ++ int mask_pos = shift; ++ if (BITS_BIG_ENDIAN) ++ mask_pos = (32 - (mask_size + shift)) & 0x1f; ++ operands[2] = GEN_INT (shift); ++ operands[3] = GEN_INT (mask_size); ++ operands[4] = GEN_INT (mask_pos); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && ctz_hwi (INTVAL (operands[2])) == 1") ++ (const_int 5) ++ (const_int 6)))]) ++ + (define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "=a") + (ior:SI (match_operand:SI 1 "register_operand" "%r") +@@ -1654,6 +1731,108 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_insn_and_split "*masktrue_const_pow2_minus_one" ++ [(set (pc) ++ (if_then_else (match_operator 3 "boolean_operator" ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "const_int_operand" "i")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] ++ "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31)" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 4) ++ (ashift:SI (match_dup 0) ++ (match_dup 1))) ++ (set (pc) ++ (if_then_else (match_op_dup 3 ++ [(match_dup 4) ++ (const_int 0)]) ++ (label_ref (match_dup 2)) ++ (pc)))] ++{ ++ operands[1] = GEN_INT (32 - floor_log2 (INTVAL (operands[1]) + 1)); ++ operands[4] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && INTVAL (operands[1]) == 0x7FFFFFFF") ++ (const_int 5) ++ (const_int 6)))]) ++ ++(define_insn_and_split "*masktrue_const_negative_pow2" ++ [(set (pc) ++ (if_then_else (match_operator 3 "boolean_operator" ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "const_int_operand" "i")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] ++ "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 12, 30)" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 4) ++ (lshiftrt:SI (match_dup 0) ++ (match_dup 1))) ++ (set (pc) ++ (if_then_else (match_op_dup 3 ++ [(match_dup 4) ++ (const_int 0)]) ++ (label_ref (match_dup 2)) ++ (pc)))] ++{ ++ operands[1] = GEN_INT (floor_log2 (-INTVAL (operands[1]))); ++ operands[4] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*masktrue_const_shifted_mask" ++ [(set (pc) ++ (if_then_else (match_operator 4 "boolean_operator" ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "shifted_mask_operand" "i")) ++ (match_operand:SI 2 "const_int_operand" "i")]) ++ (label_ref (match_operand 3 "" "")) ++ (pc)))] ++ "(INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 ++ && xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 6) ++ (zero_extract:SI (match_dup 0) ++ (match_dup 5) ++ (match_dup 1))) ++ (set (pc) ++ (if_then_else (match_op_dup 4 ++ [(match_dup 6) ++ (match_dup 2)]) ++ (label_ref (match_dup 3)) ++ (pc)))] ++{ ++ HOST_WIDE_INT mask = INTVAL (operands[1]); ++ int shift = ctz_hwi (mask); ++ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); ++ int mask_pos = shift; ++ if (BITS_BIG_ENDIAN) ++ mask_pos = (32 - (mask_size + shift)) & 0x1f; ++ operands[1] = GEN_INT (mask_pos); ++ operands[2] = GEN_INT ((uint32_t)INTVAL (operands[2]) >> shift); ++ operands[5] = GEN_INT (mask_size); ++ operands[6] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && (uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])) == 0") ++ (const_int 5) ++ (const_int 6)))]) ++ + + ;; Zero-overhead looping support. + +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0028-Document-new-mextra-l32r-costs-Xtensa-specifi.patch b/patches/gcc10.1/gcc-xtensa-0028-Document-new-mextra-l32r-costs-Xtensa-specifi.patch new file mode 100644 index 0000000..ebe9eb0 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0028-Document-new-mextra-l32r-costs-Xtensa-specifi.patch @@ -0,0 +1,44 @@ +From 7856e5d6344828b2a72aeef671a169dbd1a85a55 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:34:48 +0900 +Subject: [PATCH 23/31] xtensa: Document new -mextra-l32r-costs= + Xtensa-specific option + +gcc/ChangeLog: + * doc/invoke.texi: Document -mextra-l32r-costs= option. +--- + gcc/doc/invoke.texi | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index eabeec944..c35f51afb 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1385,7 +1385,8 @@ See RS/6000 and PowerPC Options. + -mtext-section-literals -mno-text-section-literals @gol + -mauto-litpools -mno-auto-litpools @gol + -mtarget-align -mno-target-align @gol +--mlongcalls -mno-longcalls} ++-mlongcalls -mno-longcalls @gol ++-mextra-l32r-costs=@var{cycles}} + + @emph{zSeries Options} + See S/390 and zSeries Options. +@@ -30519,6 +30520,14 @@ assembly code generated by GCC still shows direct call + instructions---look at the disassembled object code to see the actual + instructions. Note that the assembler uses an indirect call for + every cross-file call, not just those that really are out of range. ++ ++@item -mextra-l32r-costs=@var{n} ++@opindex mextra-l32r-costs ++Specify an extra cost of instruction RAM/ROM access for @code{L32R} ++instructions, in clock cycles. This affects, when optimizing for speed, ++whether loading a constant from literal pool using @code{L32R} or ++synthesizing the constant from a small one with a couple of arithmetic ++instructions. The default value is 0. + @end table + + @node zSeries Options +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0029-Add-support-for-sibling-call-optimization.patch b/patches/gcc10.1/gcc-xtensa-0029-Add-support-for-sibling-call-optimization.patch new file mode 100644 index 0000000..f5c0f78 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0029-Add-support-for-sibling-call-optimization.patch @@ -0,0 +1,354 @@ +From c985f67f0b9a35ca5f22647c326c6b43a2b237fa Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 15 Jun 2022 21:21:21 +0900 +Subject: [PATCH 24/31] xtensa: Add support for sibling call optimization + +This patch introduces support for sibling call optimization, when the Windowed +Register Option is NOT configured. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h (xtensa_prepare_expand_call, + xtensa_emit_sibcall): New prototypes. + (xtensa_expand_epilogue): Add new argument that specifies whether + or not sibling call. + * config/xtensa/xtensa.c (TARGET_FUNCTION_OK_FOR_SIBCALL): + New macro definition. + (xtensa_prepare_expand_call): New function in order to share + the common code. + (xtensa_emit_sibcall, xtensa_function_ok_for_sibcall): + New functions. + (xtensa_expand_epilogue): Add new argument sibcall_p and use it + for sibling call handling. + * config/xtensa/xtensa.md (call, call_value): + Use xtensa_prepare_expand_call. + (call_internal, call_value_internal): + Add the condition in order to be disabled if sibling call. + (sibcall, sibcall_value, sibcall_epilogue): New expansions. + (sibcall_internal, sibcall_value_internal): New insn patterns, + and split ones in order to take care of the indirect sibcalls. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/sibcalls.c: New. +--- + gcc/config/xtensa/xtensa-protos.h | 4 +- + gcc/config/xtensa/xtensa.c | 57 ++++++++++++- + gcc/config/xtensa/xtensa.md | 93 ++++++++++++++++++---- + gcc/testsuite/gcc.target/xtensa/sibcalls.c | 20 +++++ + 4 files changed, 155 insertions(+), 19 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/sibcalls.c + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index e4b2d2f06..75ed3bfb0 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -53,7 +53,9 @@ extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool); + extern void xtensa_emit_loop_end (rtx_insn *, rtx *); + extern char *xtensa_emit_branch (bool, rtx *); + extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); ++extern void xtensa_prepare_expand_call (int, rtx *); + extern char *xtensa_emit_call (int, rtx *); ++extern char *xtensa_emit_sibcall (int, rtx *); + extern bool xtensa_tls_referenced_p (rtx); + extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); + +@@ -73,7 +75,7 @@ extern int xtensa_dbx_register_number (int); + extern long compute_frame_size (poly_int64); + extern bool xtensa_use_return_instruction_p (void); + extern void xtensa_expand_prologue (void); +-extern void xtensa_expand_epilogue (void); ++extern void xtensa_expand_epilogue (bool); + extern void order_regs_for_local_alloc (void); + extern enum reg_class xtensa_regno_to_class (int regno); + extern HOST_WIDE_INT xtensa_initial_elimination_offset (int from, int to); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 8deae3d51..a714b980a 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -187,6 +187,7 @@ static bool xtensa_modes_tieable_p (machine_mode, machine_mode); + static HOST_WIDE_INT xtensa_constant_alignment (const_tree, HOST_WIDE_INT); + static HOST_WIDE_INT xtensa_starting_frame_offset (void); + static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); ++static bool xtensa_function_ok_for_sibcall (tree, tree); + + + +@@ -337,6 +338,9 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); + #undef TARGET_HAVE_SPECULATION_SAFE_VALUE + #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed + ++#undef TARGET_FUNCTION_OK_FOR_SIBCALL ++#define TARGET_FUNCTION_OK_FOR_SIBCALL xtensa_function_ok_for_sibcall ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + +@@ -2117,6 +2121,20 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + } + + ++void ++xtensa_prepare_expand_call (int callop, rtx *operands) ++{ ++ rtx addr = XEXP (operands[callop], 0); ++ ++ if (flag_pic && SYMBOL_REF_P (addr) ++ && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) ++ addr = gen_sym_PLT (addr); ++ ++ if (!call_insn_operand (addr, VOIDmode)) ++ XEXP (operands[callop], 0) = copy_to_mode_reg (Pmode, addr); ++} ++ ++ + char * + xtensa_emit_call (int callop, rtx *operands) + { +@@ -2135,6 +2153,24 @@ xtensa_emit_call (int callop, rtx *operands) + } + + ++char * ++xtensa_emit_sibcall (int callop, rtx *operands) ++{ ++ static char result[64]; ++ rtx tgt = operands[callop]; ++ ++ if (GET_CODE (tgt) == CONST_INT) ++ sprintf (result, "j.l\t" HOST_WIDE_INT_PRINT_HEX ", a9", ++ INTVAL (tgt)); ++ else if (register_operand (tgt, VOIDmode)) ++ sprintf (result, "jx\t%%%d", callop); ++ else ++ sprintf (result, "j.l\t%%%d, a9", callop); ++ ++ return result; ++} ++ ++ + bool + xtensa_legitimate_address_p (machine_mode mode, rtx addr, bool strict) + { +@@ -3305,7 +3341,7 @@ xtensa_expand_prologue (void) + } + + void +-xtensa_expand_epilogue (void) ++xtensa_expand_epilogue (bool sibcall_p) + { + if (!TARGET_WINDOWED_ABI) + { +@@ -3339,10 +3375,13 @@ xtensa_expand_epilogue (void) + if (xtensa_call_save_reg(regno)) + { + rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); ++ rtx reg; + + offset -= UNITS_PER_WORD; +- emit_move_insn (gen_rtx_REG (SImode, regno), ++ emit_move_insn (reg = gen_rtx_REG (SImode, regno), + gen_frame_mem (SImode, x)); ++ if (regno == A0_REG && sibcall_p) ++ emit_use (reg); + } + } + +@@ -3377,7 +3416,8 @@ xtensa_expand_epilogue (void) + EH_RETURN_STACKADJ_RTX)); + } + cfun->machine->epilogue_done = true; +- emit_jump_insn (gen_return ()); ++ if (!sibcall_p) ++ emit_jump_insn (gen_return ()); + } + + bool +@@ -4893,4 +4933,15 @@ xtensa_asan_shadow_offset (void) + return HOST_WIDE_INT_UC (0x10000000); + } + ++/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */ ++static bool ++xtensa_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, tree exp ATTRIBUTE_UNUSED) ++{ ++ /* Do not allow sibcalls when windowed registers ABI is in effect. */ ++ if (TARGET_WINDOWED_ABI) ++ return false; ++ ++ return true; ++} ++ + #include "gt-xtensa.h" +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 355fb7742..2a11d1c86 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -25,6 +25,7 @@ + (A7_REG 7) + (A8_REG 8) + (A9_REG 9) ++ (A10_REG 10) + + (UNSPEC_NOP 2) + (UNSPEC_PLT 3) +@@ -2153,18 +2154,13 @@ + (match_operand 1 "" ""))] + "" + { +- rtx addr = XEXP (operands[0], 0); +- if (flag_pic && GET_CODE (addr) == SYMBOL_REF +- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) +- addr = gen_sym_PLT (addr); +- if (!call_insn_operand (addr, VOIDmode)) +- XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr); ++ xtensa_prepare_expand_call (0, operands); + }) + + (define_insn "call_internal" + [(call (mem (match_operand:SI 0 "call_insn_operand" "nir")) + (match_operand 1 "" "i"))] +- "" ++ "!SIBLING_CALL_P (insn)" + { + return xtensa_emit_call (0, operands); + } +@@ -2178,19 +2174,14 @@ + (match_operand 2 "" "")))] + "" + { +- rtx addr = XEXP (operands[1], 0); +- if (flag_pic && GET_CODE (addr) == SYMBOL_REF +- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) +- addr = gen_sym_PLT (addr); +- if (!call_insn_operand (addr, VOIDmode)) +- XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr); ++ xtensa_prepare_expand_call (1, operands); + }) + + (define_insn "call_value_internal" + [(set (match_operand 0 "register_operand" "=a") + (call (mem (match_operand:SI 1 "call_insn_operand" "nir")) + (match_operand 2 "" "i")))] +- "" ++ "!SIBLING_CALL_P (insn)" + { + return xtensa_emit_call (1, operands); + } +@@ -2198,6 +2189,70 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_expand "sibcall" ++ [(call (match_operand 0 "memory_operand" "") ++ (match_operand 1 "" ""))] ++ "!TARGET_WINDOWED_ABI" ++{ ++ xtensa_prepare_expand_call (0, operands); ++}) ++ ++(define_insn "sibcall_internal" ++ [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nir")) ++ (match_operand 1 "" "i"))] ++ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" ++{ ++ return xtensa_emit_sibcall (0, operands); ++} ++ [(set_attr "type" "call") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ++ ++(define_split ++ [(call (mem:SI (match_operand:SI 0 "register_operand")) ++ (match_operand 1 ""))] ++ "reload_completed ++ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) ++ && IN_RANGE (REGNO (operands[0]), 12, 15)" ++ [(set (reg:SI A10_REG) ++ (match_dup 0)) ++ (call (mem:SI (reg:SI A10_REG)) ++ (match_dup 1))]) ++ ++(define_expand "sibcall_value" ++ [(set (match_operand 0 "register_operand" "") ++ (call (match_operand 1 "memory_operand" "") ++ (match_operand 2 "" "")))] ++ "!TARGET_WINDOWED_ABI" ++{ ++ xtensa_prepare_expand_call (1, operands); ++}) ++ ++(define_insn "sibcall_value_internal" ++ [(set (match_operand 0 "register_operand" "=a") ++ (call (mem:SI (match_operand:SI 1 "call_insn_operand" "nir")) ++ (match_operand 2 "" "i")))] ++ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" ++{ ++ return xtensa_emit_sibcall (1, operands); ++} ++ [(set_attr "type" "call") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ++ ++(define_split ++ [(set (match_operand 0 "register_operand") ++ (call (mem:SI (match_operand:SI 1 "register_operand")) ++ (match_operand 2 "")))] ++ "reload_completed ++ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) ++ && IN_RANGE (REGNO (operands[1]), 12, 15)" ++ [(set (reg:SI A10_REG) ++ (match_dup 1)) ++ (set (match_dup 0) ++ (call (mem:SI (reg:SI A10_REG)) ++ (match_dup 2)))]) ++ + (define_insn "entry" + [(set (reg:SI A1_REG) + (unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")] +@@ -2265,7 +2320,15 @@ + [(return)] + "" + { +- xtensa_expand_epilogue (); ++ xtensa_expand_epilogue (false); ++ DONE; ++}) ++ ++(define_expand "sibcall_epilogue" ++ [(return)] ++ "!TARGET_WINDOWED_ABI" ++{ ++ xtensa_expand_epilogue (true); + DONE; + }) + +diff --git a/gcc/testsuite/gcc.target/xtensa/sibcalls.c b/gcc/testsuite/gcc.target/xtensa/sibcalls.c +new file mode 100644 +index 000000000..d2b3fccf1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/sibcalls.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mabi=call0 -foptimize-sibling-calls" } */ ++ ++extern int foo(int); ++extern void bar(int); ++ ++int test_0(int a) { ++ return foo(a); ++} ++ ++void test_1(int a) { ++ bar(a); ++} ++ ++int test_2(int (*a)(void)) { ++ bar(0); ++ return a(); ++} ++ ++/* { dg-final { scan-assembler-not "ret" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0030-Add-some-dedicated-patterns-that-correspond-t.patch b/patches/gcc10.1/gcc-xtensa-0030-Add-some-dedicated-patterns-that-correspond-t.patch new file mode 100644 index 0000000..ad60202 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0030-Add-some-dedicated-patterns-that-correspond-t.patch @@ -0,0 +1,81 @@ +From 16878066a57f917814a8d6fe45f7f7d2eebdbbc0 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:37:54 +0900 +Subject: [PATCH 25/31] xtensa: Add some dedicated patterns that correspond to + GIMPLE canonicalizations + +This patch offers better RTL representations against straightforward +derivations from some tree optimizers' canonicalized forms. + +- rounding up to even, such as '(x + (x & 1))', is canonicalized to + '((x + 1) & -2)', but the former is one instruction less than the latter + in Xtensa ISA. +- signed greater or equal to zero as logical value '((signed)x >= 0)', + is canonicalized to '((unsigned)(x ^ -1) >> 31)', but the equivalent + '(((signed)x >> 31) + 1)' is one instruction less. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*round_up_to_even): + New insn-and-split pattern. + (*signed_ge_zero): Ditto. +--- + gcc/config/xtensa/xtensa.md | 45 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 45 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2a11d1c86..3e8e2e76f 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2709,3 +2709,48 @@ + xtensa_expand_atomic (, operands[0], operands[1], operands[2], true); + DONE; + }) ++ ++(define_insn_and_split "*round_up_to_even" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int 1)) ++ (const_int -2)))] ++ "" ++ "#" ++ "can_create_pseudo_p ()" ++ [(set (match_dup 2) ++ (and:SI (match_dup 1) ++ (const_int 1))) ++ (set (match_dup 0) ++ (plus:SI (match_dup 2) ++ (match_dup 1)))] ++{ ++ operands[2] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 5) ++ (const_int 6)))]) ++ ++(define_insn_and_split "*signed_ge_zero" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ge:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int 0)))] ++ "" ++ "#" ++ "" ++ [(set (match_dup 0) ++ (ashiftrt:SI (match_dup 1) ++ (const_int 31))) ++ (set (match_dup 0) ++ (plus:SI (match_dup 0) ++ (const_int 1)))] ++ "" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 5) ++ (const_int 6)))]) +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0031-Eliminate-unwanted-reg-reg-moves-during-DFmod.patch b/patches/gcc10.1/gcc-xtensa-0031-Eliminate-unwanted-reg-reg-moves-during-DFmod.patch new file mode 100644 index 0000000..28bb494 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0031-Eliminate-unwanted-reg-reg-moves-during-DFmod.patch @@ -0,0 +1,90 @@ +From a0f2dfa2e952111dbd85d2b2f1caaf570facce8a Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:39:49 +0900 +Subject: [PATCH 26/31] xtensa: Eliminate unwanted reg-reg moves during DFmode + input reloads + +When spilled DFmode registers are reloaded in, once loaded into a pair of +SImode regs and then copied from that regs. Such unwanted reg-reg moves +seems not to be eliminated at the "cprop_hardreg" stage, despite no problem +in output reloads. + +Luckily it is easy to resolve such inefficiencies, with the use of peephole2 +pattern. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (reload_operand): + New predicate. + * config/xtensa/xtensa.md: New peephole2 pattern. +--- + gcc/config/xtensa/predicates.md | 13 +++++++++++++ + gcc/config/xtensa/xtensa.md | 31 +++++++++++++++++++++++++++++++ + 2 files changed, 44 insertions(+) + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index 367fc17f3..c1cddb733 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -165,6 +165,19 @@ + (and (match_code "const_int") + (match_test "xtensa_mem_offset (INTVAL (op), SFmode)"))) + ++(define_predicate "reload_operand" ++ (match_code "mem") ++{ ++ const_rtx addr = XEXP (op, 0); ++ if (REG_P (addr)) ++ return REGNO (addr) == A1_REG; ++ if (GET_CODE (addr) == PLUS) ++ return REG_P (XEXP (addr, 0)) ++ && REGNO (XEXP (addr, 0)) == A1_REG ++ && CONST_INT_P (XEXP (addr, 1)); ++ return false; ++}) ++ + (define_predicate "branch_operator" + (match_code "eq,ne,lt,ge")) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 3e8e2e76f..2598c09c9 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2754,3 +2754,34 @@ + (if_then_else (match_test "TARGET_DENSITY") + (const_int 5) + (const_int 6)))]) ++ ++(define_peephole2 ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operand:SI 6 "reload_operand")) ++ (set (match_operand:SI 1 "register_operand") ++ (match_operand:SI 7 "reload_operand")) ++ (set (match_operand:SF 2 "register_operand") ++ (match_operand:SF 4 "register_operand")) ++ (set (match_operand:SF 3 "register_operand") ++ (match_operand:SF 5 "register_operand"))] ++ "REGNO (operands[0]) == REGNO (operands[4]) ++ && REGNO (operands[1]) == REGNO (operands[5]) ++ && peep2_reg_dead_p (4, operands[0]) ++ && peep2_reg_dead_p (4, operands[1])" ++ [(set (match_dup 2) ++ (match_dup 6)) ++ (set (match_dup 3) ++ (match_dup 7))] ++{ ++ uint32_t check = 0; ++ int i; ++ for (i = 0; i <= 3; ++i) ++ { ++ uint32_t mask = (uint32_t)1 << REGNO (operands[i]); ++ if (check & mask) ++ FAIL; ++ check |= mask; ++ } ++ operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0)); ++ operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0)); ++}) +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0032-Eliminate-DS-Cmode-hard-register-clobber-that.patch b/patches/gcc10.1/gcc-xtensa-0032-Eliminate-DS-Cmode-hard-register-clobber-that.patch new file mode 100644 index 0000000..7c4a869 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0032-Eliminate-DS-Cmode-hard-register-clobber-that.patch @@ -0,0 +1,99 @@ +From d6c2b11e9ce88f3b1a7ddcf9a2712b070ad4dbfb Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:53:04 +0900 +Subject: [PATCH 27/31] xtensa: Eliminate [DS]Cmode hard register clobber that + is immediately followed by whole overwrite the register + +RTL expansion of substitution to [DS]Cmode hard register includes obstructive +register clobber. + +A simplest example: + + double _Complex test(double _Complex c) { + return c; + } + +will be converted to: + + (set (reg:DF 42 [ c ]) (reg:DF 2 a2)) + (set (reg:DF 43 [ c+8 ]) (reg:DF 4 a4)) + (clobber (reg:DC 2 a2)) + (set (reg:DF 2 a2) (reg:DF 42 [ c ])) + (set (reg:DF 4 a4) (reg:DF 43 [ c+8 ])) + (use (reg:DC 2 a2)) + (return) + +and then finally: + + test: + mov a8, a2 + mov a9, a3 + mov a6, a4 + mov a7, a5 + mov a2, a8 + mov a3, a9 + mov a4, a6 + mov a5, a7 + ret + +As you see, it is so ridiculous. + +This patch eliminates such clobber in order to prune away the wasted move +instructions by the optimizer: + + test: + ret + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (DSC): New split pattern and mode iterator. +--- + gcc/config/xtensa/xtensa.md | 28 ++++++++++++++++++++++++++++ + 1 file changed, 28 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2598c09c9..124548dfe 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -87,6 +87,10 @@ + ;; This code iterator is for *shlrd and its variants. + (define_code_iterator ior_op [ior plus]) + ++;; This mode iterator allows the DC and SC patterns to be defined from ++;; the same template. ++(define_mode_iterator DSC [DC SC]) ++ + + ;; Attributes. + +@@ -2785,3 +2789,27 @@ + operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0)); + operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0)); + }) ++ ++(define_split ++ [(clobber (match_operand:DSC 0 "register_operand"))] ++ "GP_REG_P (REGNO (operands[0]))" ++ [(const_int 0)] ++{ ++ unsigned int regno = REGNO (operands[0]); ++ machine_mode inner_mode = GET_MODE_INNER (mode); ++ rtx_insn *insn; ++ rtx x; ++ if (! ((insn = next_nonnote_nondebug_insn (curr_insn)) ++ && NONJUMP_INSN_P (insn) ++ && GET_CODE (x = PATTERN (insn)) == SET ++ && REG_P (x = XEXP (x, 0)) ++ && GET_MODE (x) == inner_mode ++ && REGNO (x) == regno ++ && (insn = next_nonnote_nondebug_insn (insn)) ++ && NONJUMP_INSN_P (insn) ++ && GET_CODE (x = PATTERN (insn)) == SET ++ && REG_P (x = XEXP (x, 0)) ++ && GET_MODE (x) == inner_mode ++ && REGNO (x) == regno + REG_NREGS (operands[0]) / 2)) ++ FAIL; ++}) +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0033-Defer-storing-integer-constants-into-litpool-.patch b/patches/gcc10.1/gcc-xtensa-0033-Defer-storing-integer-constants-into-litpool-.patch new file mode 100644 index 0000000..6007b49 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0033-Defer-storing-integer-constants-into-litpool-.patch @@ -0,0 +1,111 @@ +From e37c151ca3beacb7f4f116a94c9c80223b0c6fbf Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 17 Jun 2022 22:47:49 +0900 +Subject: [PATCH 28/31] xtensa: Defer storing integer constants into litpool + until reload + +Storing integer constants into litpool in the early stage of compilation +hinders some integer optimizations. In fact, such integer constants are +not subject to the constant folding process. + +For example: + + extern unsigned short value; + extern void foo(void); + void test(void) { + if (value == 30001) + foo(); + } + + .literal_position + .literal .LC0, value + .literal .LC1, 30001 + test: + l32r a3, .LC0 + l32r a2, .LC1 + l16ui a3, a3, 0 + extui a2, a2, 0, 16 // runtime zero-extension despite constant + bne a3, a2, .L1 + j.l foo, a9 + .L1: + ret.n + +This patch defers the placement of integer constants into litpool until +the start of reload: + + .literal_position + .literal .LC0, value + .literal .LC1, 30001 + test: + l32r a3, .LC0 + l32r a2, .LC1 + l16ui a3, a3, 0 + bne a3, a2, .L1 + j.l foo, a9 + .L1: + ret.n + +gcc/ChangeLog: + + * config/xtensa/constraints.md (Y): + Change to include integer constants until reload begins. + * config/xtensa/predicates.md (move_operand): Ditto. + * config/xtensa/xtensa.c (xtensa_emit_move_sequence): + Change to allow storing integer constants into litpool only after + reload begins. +--- + gcc/config/xtensa/constraints.md | 6 ++++-- + gcc/config/xtensa/predicates.md | 5 +++-- + gcc/config/xtensa/xtensa.c | 3 ++- + 3 files changed, 9 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md +index 9a8caab4f..13b3daafc 100644 +--- a/gcc/config/xtensa/constraints.md ++++ b/gcc/config/xtensa/constraints.md +@@ -113,8 +113,10 @@ + + (define_constraint "Y" + "A constant that can be used in relaxed MOVI instructions." +- (and (match_code "const_int,const_double,const,symbol_ref,label_ref") +- (match_test "TARGET_AUTO_LITPOOLS"))) ++ (ior (and (match_code "const_int,const_double,const,symbol_ref,label_ref") ++ (match_test "TARGET_AUTO_LITPOOLS")) ++ (and (match_code "const_int") ++ (match_test "can_create_pseudo_p ()")))) + + ;; Memory constraints. Do not use define_memory_constraint here. Doing so + ;; causes reload to force some constants into the constant pool, but since +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index c1cddb733..633cc6264 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -147,8 +147,9 @@ + (match_test "!constantpool_mem_p (op) + || GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0"))) + (ior (and (match_code "const_int") +- (match_test "GET_MODE_CLASS (mode) == MODE_INT +- && xtensa_simm12b (INTVAL (op))")) ++ (match_test "(GET_MODE_CLASS (mode) == MODE_INT ++ && xtensa_simm12b (INTVAL (op))) ++ || can_create_pseudo_p ()")) + (and (match_code "const_int,const_double,const,symbol_ref,label_ref") + (match_test "(TARGET_CONST16 || TARGET_AUTO_LITPOOLS) + && CONSTANT_P (op) +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index a714b980a..1d64e2c76 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1173,7 +1173,8 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) + return 1; + } + +- if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16) ++ if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16 ++ && ! (CONST_INT_P (src) && can_create_pseudo_p ())) + { + src = force_const_mem (SImode, src); + operands[1] = src; +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0034-Apply-a-few-minor-fixes.patch b/patches/gcc10.1/gcc-xtensa-0034-Apply-a-few-minor-fixes.patch new file mode 100644 index 0000000..5ecac42 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0034-Apply-a-few-minor-fixes.patch @@ -0,0 +1,129 @@ +From dfaefed18297218392071039325baabac59d5c43 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 19 Jun 2022 22:32:45 +0900 +Subject: [PATCH 29/31] xtensa: Apply a few minor fixes + +No functional changes. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_emit_move_sequence): + Use can_create_pseudo_p(), instead of using individual + reload_in_progress and reload_completed. + (xtensa_expand_block_set_small_loop): Use xtensa_simm8x256(), + the existing predicate function. + (xtensa_is_insn_L32R_p, gen_int_relational, xtensa_emit_sibcall): + Use the standard RTX code predicate macros such as MEM_P, + SYMBOL_REF_P and/or CONST_INT_P. + * config/xtensa/xtensa.md: Avoid using numeric literals to determine + if callee-saved register, at the split patterns for indirect sibcall + fixups. +--- + gcc/config/xtensa/xtensa.c | 16 ++++++++-------- + gcc/config/xtensa/xtensa.md | 8 ++++---- + 2 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 1d64e2c76..595c5f96f 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -743,7 +743,7 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + } + + /* See if we need to invert the result. */ +- invert = ((GET_CODE (cmp1) == CONST_INT) ++ invert = (CONST_INT_P (cmp1) + ? p_info->invert_const + : p_info->invert_reg); + +@@ -1200,7 +1200,7 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) + } + } + +- if (!(reload_in_progress | reload_completed) ++ if (can_create_pseudo_p () + && !xtensa_valid_move (mode, operands)) + operands[1] = force_reg (mode, operands[1]); + +@@ -1603,7 +1603,7 @@ xtensa_expand_block_set_small_loop (rtx *operands) + thus limited to only offset to the end address for ADDI/ADDMI + instruction. */ + if (align == 4 +- && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) ++ && ! (bytes <= 127 || xtensa_simm8x256 (bytes))) + return 0; + + /* If no 4-byte aligned, loop count should be treated as the +@@ -2160,7 +2160,7 @@ xtensa_emit_sibcall (int callop, rtx *operands) + static char result[64]; + rtx tgt = operands[callop]; + +- if (GET_CODE (tgt) == CONST_INT) ++ if (CONST_INT_P (tgt)) + sprintf (result, "j.l\t" HOST_WIDE_INT_PRINT_HEX ", a9", + INTVAL (tgt)); + else if (register_operand (tgt, VOIDmode)) +@@ -4318,17 +4318,17 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + } + + static bool +-xtensa_is_insn_L32R_p(const rtx_insn *insn) ++xtensa_is_insn_L32R_p (const rtx_insn *insn) + { + rtx x = PATTERN (insn); + + if (GET_CODE (x) == SET) + { +- x = XEXP (x, 1); +- if (GET_CODE (x) == MEM) ++ x = SET_SRC (x); ++ if (MEM_P (x)) + { + x = XEXP (x, 0); +- return (GET_CODE (x) == SYMBOL_REF || CONST_INT_P (x)) ++ return (SYMBOL_REF_P (x) || CONST_INT_P (x)) + && CONSTANT_POOL_ADDRESS_P (x); + } + } +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 124548dfe..6f51a5357 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1251,14 +1251,14 @@ + int i = 0; + rtx x = XEXP (operands[1], 0); + long l[2]; +- if (GET_CODE (x) == SYMBOL_REF ++ if (SYMBOL_REF_P (x) + && CONSTANT_POOL_ADDRESS_P (x)) + x = get_pool_constant (x); + else if (GET_CODE (x) == CONST) + { + x = XEXP (x, 0); + gcc_assert (GET_CODE (x) == PLUS +- && GET_CODE (XEXP (x, 0)) == SYMBOL_REF ++ && SYMBOL_REF_P (XEXP (x, 0)) + && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) + && CONST_INT_P (XEXP (x, 1))); + i = INTVAL (XEXP (x, 1)); +@@ -2217,7 +2217,7 @@ + (match_operand 1 ""))] + "reload_completed + && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) +- && IN_RANGE (REGNO (operands[0]), 12, 15)" ++ && ! call_used_or_fixed_reg_p (REGNO (operands[0]))" + [(set (reg:SI A10_REG) + (match_dup 0)) + (call (mem:SI (reg:SI A10_REG)) +@@ -2250,7 +2250,7 @@ + (match_operand 2 "")))] + "reload_completed + && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) +- && IN_RANGE (REGNO (operands[1]), 12, 15)" ++ && ! call_used_or_fixed_reg_p (REGNO (operands[1]))" + [(set (reg:SI A10_REG) + (match_dup 1)) + (set (match_dup 0) +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0035-Fix-RTL-insn-cost-estimation-about-relaxed-MO.patch b/patches/gcc10.1/gcc-xtensa-0035-Fix-RTL-insn-cost-estimation-about-relaxed-MO.patch new file mode 100644 index 0000000..d65c44d --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0035-Fix-RTL-insn-cost-estimation-about-relaxed-MO.patch @@ -0,0 +1,56 @@ +From 48c657f23a61a41a46842b25bce4f287a56223a2 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Mon, 20 Jun 2022 01:56:16 +0900 +Subject: [PATCH 30/31] xtensa: Fix RTL insn cost estimation about relaxed MOVI + instructions + +These instructions will all be converted to L32R ones with litpool entries +by the assembler. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_is_insn_L32R_p): + Consider relaxed MOVI instructions as L32R. +--- + gcc/config/xtensa/xtensa.c | 22 ++++++++++++++-------- + 1 file changed, 14 insertions(+), 8 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 595c5f96f..b92ec9caa 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4322,17 +4322,23 @@ xtensa_is_insn_L32R_p (const rtx_insn *insn) + { + rtx x = PATTERN (insn); + +- if (GET_CODE (x) == SET) ++ if (GET_CODE (x) != SET) ++ return false; ++ ++ x = XEXP (x, 1); ++ if (MEM_P (x)) + { +- x = SET_SRC (x); +- if (MEM_P (x)) +- { +- x = XEXP (x, 0); +- return (SYMBOL_REF_P (x) || CONST_INT_P (x)) +- && CONSTANT_POOL_ADDRESS_P (x); +- } ++ x = XEXP (x, 0); ++ return (SYMBOL_REF_P (x) || CONST_INT_P (x)) ++ && CONSTANT_POOL_ADDRESS_P (x); + } + ++ /* relaxed MOVI instructions, that will be converted to L32R by the ++ assembler. */ ++ if (CONST_INT_P (x) ++ && ! xtensa_simm12b (INTVAL (x))) ++ return true; ++ + return false; + } + +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0036-Fix-buffer-overflow.patch b/patches/gcc10.1/gcc-xtensa-0036-Fix-buffer-overflow.patch new file mode 100644 index 0000000..35f9f10 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0036-Fix-buffer-overflow.patch @@ -0,0 +1,33 @@ +From 75c341c7de5c6f325d6ded7bd91d77793fe358d5 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 22 Jun 2022 04:04:45 +0900 +Subject: [PATCH 31/31] xtensa: Fix buffer overflow + +Fortify buffer overflow message reported. +(see https://github.com/earlephilhower/esp-quick-toolchain/issues/36) + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (bswapsi2_internal): + Enlarge the buffer that is obviously smaller than the template + string given to sprintf(). +--- + gcc/config/xtensa/xtensa.md | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 6f51a5357..81b016859 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -536,7 +536,7 @@ + { + rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn); + const char *init = "ssai\t8\;"; +- static char result[64]; ++ static char result[128]; + if (prev_insn && NONJUMP_INSN_P (prev_insn)) + { + rtx x = PATTERN (prev_insn); +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0037-Optimize-integer-constant-addition-that-is-be.patch b/patches/gcc10.1/gcc-xtensa-0037-Optimize-integer-constant-addition-that-is-be.patch new file mode 100644 index 0000000..0ea6d48 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0037-Optimize-integer-constant-addition-that-is-be.patch @@ -0,0 +1,95 @@ +From 9308911796a46bd689bbcc1cedef1b63ae9b871e Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 26 Jun 2022 14:07:56 +0900 +Subject: [PATCH] xtensa: Optimize integer constant addition that is + between -32896 and 32639 + +Such constants are often subject to the constant synthesis: + + int test(int a) { + return a - 31999; + } + + test: + movi a3, 1 + addmi a3, a3, -0x7d00 + add a2, a2, a3 + ret + +This patch optimizes such case as follows: + + test: + addi a2, a2, 1 + addmi a2, a2, -0x7d00 + ret + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: + Suppress unnecessary emitting nop insn in the split patterns for + integer/FP constant synthesis, and add new peephole2 pattern that + folds such synthesized additions. +--- + gcc/config/xtensa/xtensa.md | 35 +++++++++++++++++++++++++++++++++++ + 1 file changed, 35 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 81b016859..b697e16db 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1036,6 +1036,7 @@ + FAIL; + if (! xtensa_constantsynth (operands[0], INTVAL (x))) + emit_move_insn (operands[0], x); ++ DONE; + }) + + ;; 16-bit Integer moves +@@ -1277,6 +1278,7 @@ + x = gen_rtx_REG (SImode, REGNO (operands[0])); + if (! xtensa_constantsynth (x, l[i])) + emit_move_insn (x, GEN_INT (l[i])); ++ DONE; + }) + + ;; 64-bit floating point moves +@@ -2813,3 +2815,36 @@ + && REGNO (x) == regno + REG_NREGS (operands[0]) / 2)) + FAIL; + }) ++ ++(define_peephole2 ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operand:SI 1 "const_int_operand")) ++ (set (match_dup 0) ++ (plus:SI (match_dup 0) ++ (match_operand:SI 2 "const_int_operand"))) ++ (set (match_operand:SI 3 "register_operand") ++ (plus:SI (match_operand:SI 4 "register_operand") ++ (match_dup 0)))] ++ "IN_RANGE (INTVAL (operands[1]) + INTVAL (operands[2]), ++ (-128 - 32768), (127 + 32512)) ++ && REGNO (operands[0]) != REGNO (operands[3]) ++ && REGNO (operands[0]) != REGNO (operands[4]) ++ && peep2_reg_dead_p (3, operands[0])" ++ [(set (match_dup 3) ++ (plus:SI (match_dup 4) ++ (match_dup 1))) ++ (set (match_dup 3) ++ (plus:SI (match_dup 3) ++ (match_dup 2)))] ++{ ++ HOST_WIDE_INT value = INTVAL (operands[1]) + INTVAL (operands[2]); ++ int imm0, imm1; ++ value += 128; ++ if (value > 32512) ++ imm1 = 32512; ++ else ++ imm1 = value & ~255; ++ imm0 = value - imm1 - 128; ++ operands[1] = GEN_INT (imm0); ++ operands[2] = GEN_INT (imm1); ++}) +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0038-Minor-fix-for-FP-constant-synthesis.patch b/patches/gcc10.1/gcc-xtensa-0038-Minor-fix-for-FP-constant-synthesis.patch new file mode 100644 index 0000000..8fc23d8 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0038-Minor-fix-for-FP-constant-synthesis.patch @@ -0,0 +1,92 @@ +From 7bed998154345cb072cd425b5d61734d3e0bac5d Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 1 Jul 2022 13:39:34 +0900 +Subject: [PATCH] xtensa: Minor fix for FP constant synthesis + +This patch fixes an non-fatal issue about negative constant values derived +from FP constant synthesis on hosts whose 'long' is wider than 'int32_t'. + +And also replaces the dedicated code in FP constant synthesis split +pattern with the appropriate existing function call. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: + In FP constant synthesis split pattern, subcontract to + avoid_constant_pool_reference() as in the case of integer, + because it can handle well too. And cast to int32_t before + calling xtensa_constantsynth() in order to ignore upper 32-bit. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/constsynth_double.c: + Modify in order to catch the issue. +--- + gcc/config/xtensa/xtensa.md | 35 +++++-------------- + .../gcc.target/xtensa/constsynth_double.c | 2 +- + 2 files changed, 9 insertions(+), 28 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index b697e16db..6ef84b4f2 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1249,35 +1249,16 @@ + "! optimize_debug && reload_completed" + [(const_int 0)] + { +- int i = 0; +- rtx x = XEXP (operands[1], 0); +- long l[2]; +- if (SYMBOL_REF_P (x) +- && CONSTANT_POOL_ADDRESS_P (x)) +- x = get_pool_constant (x); +- else if (GET_CODE (x) == CONST) +- { +- x = XEXP (x, 0); +- gcc_assert (GET_CODE (x) == PLUS +- && SYMBOL_REF_P (XEXP (x, 0)) +- && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) +- && CONST_INT_P (XEXP (x, 1))); +- i = INTVAL (XEXP (x, 1)); +- gcc_assert (i == 0 || i == 4); +- i /= 4; +- x = get_pool_constant (XEXP (x, 0)); +- } +- else +- gcc_unreachable (); +- if (GET_MODE (x) == SFmode) +- REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l[0]); +- else if (GET_MODE (x) == DFmode) +- REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); +- else ++ rtx x = avoid_constant_pool_reference (operands[1]); ++ long l; ++ HOST_WIDE_INT value; ++ if (! CONST_DOUBLE_P (x) || GET_MODE (x) != SFmode) + FAIL; ++ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); + x = gen_rtx_REG (SImode, REGNO (operands[0])); +- if (! xtensa_constantsynth (x, l[i])) +- emit_move_insn (x, GEN_INT (l[i])); ++ value = (int32_t)l; ++ if (! xtensa_constantsynth (x, value)) ++ emit_move_insn (x, GEN_INT (value)); + DONE; + }) + +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +index 890ca5047..5fba6a986 100644 +--- a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +@@ -5,7 +5,7 @@ void test(unsigned int count, double array[]) + { + unsigned int i; + for (i = 0; i < count; ++i) +- array[i] = 1.0; ++ array[i] = 8.988474246316506e+307; + } + + /* { dg-final { scan-assembler-not "l32r" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0041-constantsynth-Make-try-to-find-shorter-instru.patch b/patches/gcc10.1/gcc-xtensa-0041-constantsynth-Make-try-to-find-shorter-instru.patch new file mode 100644 index 0000000..fcb3c72 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0041-constantsynth-Make-try-to-find-shorter-instru.patch @@ -0,0 +1,132 @@ +From afcf727f9c4174b104b594cbd14cba9c57de71d1 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 15 Jul 2022 08:46:55 +0900 +Subject: [PATCH] xtensa: constantsynth: Make try to find shorter + instruction + +This patch allows the constant synthesis to choose shorter instruction +if possible. + + /* example */ + int test(void) { + return 128 << 8; + } + + ;; before + test: + movi a2, 0x100 + addmi a2, a2, 0x7f00 + ret.n + + ;; after + test: + movi.n a2, 1 + slli a2, a2, 15 + ret.n + +When the Code Density Option is configured, the latter is one byte smaller +than the former. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_emit_constantsynth): Remove. + (xtensa_constantsynth_2insn): Change to try all three synthetic + methods and to use the one that fits the immediate value of + the seed into a Narrow Move Immediate instruction "MOVI.N" + when the Code Density Option is configured. +--- + gcc/config/xtensa/xtensa.c | 58 +++++++++++++++++++------------------- + 1 file changed, 29 insertions(+), 29 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index b92ec9caa..a5330e52b 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1026,35 +1026,35 @@ xtensa_split_operand_pair (rtx operands[4], machine_mode mode) + load-immediate / arithmetic ones, instead of a L32R instruction + (plus a constant in litpool). */ + +-static void +-xtensa_emit_constantsynth (rtx dst, enum rtx_code code, +- HOST_WIDE_INT imm0, HOST_WIDE_INT imm1, +- rtx (*gen_op)(rtx, HOST_WIDE_INT), +- HOST_WIDE_INT imm2) +-{ +- gcc_assert (REG_P (dst)); +- emit_move_insn (dst, GEN_INT (imm0)); +- emit_move_insn (dst, gen_rtx_fmt_ee (code, SImode, +- dst, GEN_INT (imm1))); +- if (gen_op) +- emit_move_insn (dst, gen_op (dst, imm2)); +-} +- + static int + xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, + rtx (*gen_op)(rtx, HOST_WIDE_INT), + HOST_WIDE_INT op_imm) + { +- int shift = exact_log2 (srcval + 1); ++ HOST_WIDE_INT imm = INT_MAX; ++ rtx x = NULL_RTX; ++ int shift; + ++ gcc_assert (REG_P (dst)); ++ ++ shift = exact_log2 (srcval + 1); + if (IN_RANGE (shift, 1, 31)) + { +- xtensa_emit_constantsynth (dst, LSHIFTRT, -1, 32 - shift, +- gen_op, op_imm); +- return 1; ++ imm = -1; ++ x = gen_lshrsi3 (dst, dst, GEN_INT (32 - shift)); + } + +- if (IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) ++ ++ shift = ctz_hwi (srcval); ++ if ((!x || (TARGET_DENSITY && ! IN_RANGE (imm, -32, 95))) ++ && xtensa_simm12b (srcval >> shift)) ++ { ++ imm = srcval >> shift; ++ x = gen_ashlsi3 (dst, dst, GEN_INT (shift)); ++ } ++ ++ if ((!x || (TARGET_DENSITY && ! IN_RANGE (imm, -32, 95))) ++ && IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) + { + HOST_WIDE_INT imm0, imm1; + +@@ -1067,19 +1067,19 @@ xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, + imm0 = srcval - imm1; + if (TARGET_DENSITY && imm1 < 32512 && IN_RANGE (imm0, 224, 255)) + imm0 -= 256, imm1 += 256; +- xtensa_emit_constantsynth (dst, PLUS, imm0, imm1, gen_op, op_imm); +- return 1; ++ imm = imm0; ++ x = gen_addsi3 (dst, dst, GEN_INT (imm1)); + } + +- shift = ctz_hwi (srcval); +- if (xtensa_simm12b (srcval >> shift)) +- { +- xtensa_emit_constantsynth (dst, ASHIFT, srcval >> shift, shift, +- gen_op, op_imm); +- return 1; +- } ++ if (!x) ++ return 0; + +- return 0; ++ emit_move_insn (dst, GEN_INT (imm)); ++ emit_insn (x); ++ if (gen_op) ++ emit_move_insn (dst, gen_op (dst, op_imm)); ++ ++ return 1; + } + + static rtx +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0042-Optimize-bitwise-AND-with-imm1-followed-by-br.patch b/patches/gcc10.1/gcc-xtensa-0042-Optimize-bitwise-AND-with-imm1-followed-by-br.patch new file mode 100644 index 0000000..acf6d99 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0042-Optimize-bitwise-AND-with-imm1-followed-by-br.patch @@ -0,0 +1,177 @@ +From 5776497b68fcce6bf31835cf0a4d693e336bb2ca Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 14 Jul 2022 20:47:46 +0900 +Subject: [PATCH] xtensa: Optimize "bitwise AND with imm1" followed by + "branch if (not) equal to imm2" + +This patch enhances the effectiveness of the previously posted one: +"xtensa: Optimize bitwise AND operation with some specific forms of constants". + + /* example */ + extern void foo(int); + void test(int a) { + if ((a & (-1U << 8)) == (128 << 8)) /* 0 or one of "b4const" */ + foo(a); + } + + ;; before + .global test + test: + movi a3, -0x100 + movi.n a4, 1 + and a3, a2, a3 + slli a4, a4, 15 + bne a3, a4, .L3 + j.l foo, a9 + .L1: + ret.n + + ;; after + .global test + test: + srli a3, a2, 8 + bnei a3, 128, .L1 + j.l foo, a9 + .L1: + ret.n + +gcc/ChangeLog: + + * config/xtensa/xtensa.md + (*masktrue_const_pow2_minus_one, *masktrue_const_negative_pow2, + *masktrue_const_shifted_mask): If the immediate for bitwise AND is + represented as '-(1 << N)', decrease the lower bound of N from 12 + to 1. And the other immediate for conditional branch is now no + longer limited to zero, but also one of some positive integers. + Finally, remove the checks of some conditions, because the comparison + expressions that don't satisfy such checks are determined as + compile-time constants and thus will be optimized away before + RTL expansion. +--- + gcc/config/xtensa/xtensa.md | 73 ++++++++++++++++++++++--------------- + 1 file changed, 44 insertions(+), 29 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 6ef84b4f2..ca8b3913d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1721,63 +1721,78 @@ + + (define_insn_and_split "*masktrue_const_pow2_minus_one" + [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" ++ (if_then_else (match_operator 4 "boolean_operator" + [(and:SI (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "const_int_operand" "i")) +- (const_int 0)]) +- (label_ref (match_operand 2 "" "")) ++ (match_operand:SI 2 "const_int_operand" "i")]) ++ (label_ref (match_operand 3 "" "")) + (pc)))] +- "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31)" ++ "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31) ++ /* && (~INTVAL (operands[1]) & INTVAL (operands[2])) == 0 // can be omitted */ ++ && xtensa_b4const_or_zero (INTVAL (operands[2]) << (32 - floor_log2 (INTVAL (operands[1]) + 1)))" + "#" + "&& can_create_pseudo_p ()" +- [(set (match_dup 4) ++ [(set (match_dup 5) + (ashift:SI (match_dup 0) + (match_dup 1))) + (set (pc) +- (if_then_else (match_op_dup 3 +- [(match_dup 4) +- (const_int 0)]) +- (label_ref (match_dup 2)) ++ (if_then_else (match_op_dup 4 ++ [(match_dup 5) ++ (match_dup 2)]) ++ (label_ref (match_dup 3)) + (pc)))] + { +- operands[1] = GEN_INT (32 - floor_log2 (INTVAL (operands[1]) + 1)); +- operands[4] = gen_reg_rtx (SImode); ++ int shift = 32 - floor_log2 (INTVAL (operands[1]) + 1); ++ operands[1] = GEN_INT (shift); ++ operands[2] = GEN_INT (INTVAL (operands[2]) << shift); ++ operands[5] = gen_reg_rtx (SImode); + } + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set (attr "length") +- (if_then_else (match_test "TARGET_DENSITY +- && INTVAL (operands[1]) == 0x7FFFFFFF") +- (const_int 5) +- (const_int 6)))]) ++ (if_then_else (match_test "(TARGET_DENSITY && INTVAL (operands[1]) == 0x7FFFFFFF) ++ && INTVAL (operands[2]) == 0") ++ (const_int 4) ++ (if_then_else (match_test "TARGET_DENSITY ++ && (INTVAL (operands[1]) == 0x7FFFFFFF ++ || INTVAL (operands[2]) == 0)") ++ (const_int 5) ++ (const_int 6))))]) + + (define_insn_and_split "*masktrue_const_negative_pow2" + [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" ++ (if_then_else (match_operator 4 "boolean_operator" + [(and:SI (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "const_int_operand" "i")) +- (const_int 0)]) +- (label_ref (match_operand 2 "" "")) ++ (match_operand:SI 2 "const_int_operand" "i")]) ++ (label_ref (match_operand 3 "" "")) + (pc)))] +- "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 12, 30)" ++ "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 1, 30) ++ /* && (~INTVAL (operands[1]) & INTVAL (operands[2])) == 0 // can be omitted */ ++ && xtensa_b4const_or_zero (INTVAL (operands[2]) >> floor_log2 (-INTVAL (operands[1])))" + "#" + "&& can_create_pseudo_p ()" +- [(set (match_dup 4) ++ [(set (match_dup 5) + (lshiftrt:SI (match_dup 0) + (match_dup 1))) + (set (pc) +- (if_then_else (match_op_dup 3 +- [(match_dup 4) +- (const_int 0)]) +- (label_ref (match_dup 2)) ++ (if_then_else (match_op_dup 4 ++ [(match_dup 5) ++ (match_dup 2)]) ++ (label_ref (match_dup 3)) + (pc)))] + { +- operands[1] = GEN_INT (floor_log2 (-INTVAL (operands[1]))); +- operands[4] = gen_reg_rtx (SImode); ++ int shift = floor_log2 (-INTVAL (operands[1])); ++ operands[1] = GEN_INT (shift); ++ operands[2] = GEN_INT (INTVAL (operands[2]) >> shift); ++ operands[5] = gen_reg_rtx (SImode); + } + [(set_attr "type" "jump") + (set_attr "mode" "none") +- (set_attr "length" "6")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY && INTVAL (operands[2]) == 0") ++ (const_int 5) ++ (const_int 6)))]) + + (define_insn_and_split "*masktrue_const_shifted_mask" + [(set (pc) +@@ -1787,8 +1802,8 @@ + (match_operand:SI 2 "const_int_operand" "i")]) + (label_ref (match_operand 3 "" "")) + (pc)))] +- "(INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 +- && xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" ++ "/* (INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 // can be omitted ++ && */ xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" + "#" + "&& can_create_pseudo_p ()" + [(set (match_dup 6) +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-Improve-initialization-of-objects-when-the-initializ.patch b/patches/gcc10.2/gcc-Improve-initialization-of-objects-when-the-initializ.patch new file mode 100644 index 0000000..00fdb45 --- /dev/null +++ b/patches/gcc10.2/gcc-Improve-initialization-of-objects-when-the-initializ.patch @@ -0,0 +1,39 @@ +From a2cde0c6443c440c2a2b72b5eea060229a0cff57 Mon Sep 17 00:00:00 2001 +From: Jeff Law +Date: Sat, 9 Jul 2022 11:11:00 -0400 +Subject: [PATCH] [RFA] Improve initialization of objects when the initializer + +gcc/ + + * expr.c (store_expr): Identify trailing NULs in a STRING_CST + initializer and use clear_storage rather than copying the + NULs to the destination array. +--- + gcc/expr.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/gcc/expr.c b/gcc/expr.c +index 991b26f33..6ff393462 100644 +--- a/gcc/expr.c ++++ b/gcc/expr.c +@@ -5723,6 +5723,17 @@ store_expr (tree exp, rtx target, int call_param_p, + } + + str_copy_len = TREE_STRING_LENGTH (str); ++ ++ /* Trailing NUL bytes in EXP will be handled by the call to ++ clear_storage, which is more efficient than copying them from ++ the STRING_CST, so trim those from STR_COPY_LEN. */ ++ while (str_copy_len) ++ { ++ if (TREE_STRING_POINTER (str)[str_copy_len - 1]) ++ break; ++ str_copy_len--; ++ } ++ + if ((STORE_MAX_PIECES & (STORE_MAX_PIECES - 1)) == 0) + { + str_copy_len += STORE_MAX_PIECES - 1; +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0006-Fix-non-robust-split-condition-in-define_insn.patch b/patches/gcc10.2/gcc-xtensa-0006-Fix-non-robust-split-condition-in-define_insn.patch new file mode 100644 index 0000000..4c5418f --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0006-Fix-non-robust-split-condition-in-define_insn.patch @@ -0,0 +1,44 @@ +From 2065a3fccb11e28ebcc42aa46c52a40b0fae9bea Mon Sep 17 00:00:00 2001 +From: Kewen Lin +Date: Sun, 21 Nov 2021 20:18:31 -0600 +Subject: [PATCH 01/31] xtensa: Fix non-robust split condition in + define_insn_and_split + +This patch is to fix some non-robust split conditions in some +define_insn_and_splits, to make each of them applied on top of +the corresponding condition for define_insn part, otherwise the +splitting could perform unexpectedly. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (movdi_internal, movdf_internal): Fix split + condition. +--- + gcc/config/xtensa/xtensa.md | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2a8e59ee9..123916957 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -782,7 +782,7 @@ + "register_operand (operands[0], DImode) + || register_operand (operands[1], DImode)" + "#" +- "reload_completed" ++ "&& reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))] + { +@@ -1058,7 +1058,7 @@ + "register_operand (operands[0], DFmode) + || register_operand (operands[1], DFmode)" + "#" +- "reload_completed" ++ "&& reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))] + { +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch b/patches/gcc10.2/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch deleted file mode 100644 index 336b961..0000000 --- a/patches/gcc10.2/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch +++ /dev/null @@ -1,29 +0,0 @@ -From f1568d0597ffd3027eebefc2cf31646ab5d5ca19 Mon Sep 17 00:00:00 2001 -From: Takayuki 'January June' Suwa -Date: Sun, 19 Dec 2021 22:44:03 +0900 -Subject: [PATCH] gcc: xtensa: make trying to replace 'l32r' with 'movi' + - 'slli' regardless of optimizing for size or not, because 'l32r' is much - slower than the latter on ESP8266 - ---- - gcc/config/xtensa/xtensa.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c -index 37c6ac1fd..6cd9d5528 100644 ---- a/gcc/config/xtensa/xtensa.c -+++ b/gcc/config/xtensa/xtensa.c -@@ -1074,8 +1074,8 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) - { - /* Try to emit MOVI + SLLI sequence, that is smaller - than L32R + literal. */ -- if (optimize_size && mode == SImode && CONST_INT_P (src) -- && register_operand (dst, mode)) -+ if (optimize >= 1 && ! optimize_debug && mode == SImode -+ && CONST_INT_P (src) && register_operand (dst, mode)) - { - HOST_WIDE_INT srcval = INTVAL (src); - int shift = ctz_hwi (srcval); --- -2.20.1 - diff --git a/patches/gcc10.2/gcc-xtensa-0007-Backport-patches-from-upstream-master.patch b/patches/gcc10.2/gcc-xtensa-0007-Backport-patches-from-upstream-master.patch deleted file mode 100644 index eb06969..0000000 --- a/patches/gcc10.2/gcc-xtensa-0007-Backport-patches-from-upstream-master.patch +++ /dev/null @@ -1,3186 +0,0 @@ -From 989fc2c516206d7cf70177a416815f91998e2131 Mon Sep 17 00:00:00 2001 -From: Takayuki 'January June' Suwa -Date: Fri, 27 May 2022 21:34:37 +0900 -Subject: [PATCH 1/3] xtensa: Backport patches from upstream/master - -2b5b8610e985e23a0c2e0272339ab074a750e240 "xtensa: Fix non-robust split condition in define_insn_and_split" -7e5baa7e6f4caced6bdaef6d866d19e7656d8a16 "xtensa: fix -Wformat-diag warnings." -d543bac1631700f0da30d5ca555296f4938a82c6 "xtensa: Rename deprecated extv/extzv insn patterns to extvsi/extzvsi" -112447f8564c0307c5da99a4094a3a99f204239f "xtensa: Reflect the 32-bit Integer Divide Option" -b753405a5f0d45eea97f4cc7df2c2089401b08bf "xtensa: Simplify EXTUI instruction maskimm validations" -9b251fe2e39a49c0d3ecd34cf8c5d55544efd159 "xtensa: Make use of IN_RANGE macro where appropriate" -3397563ad6c8fc5d9675faf507e52dd2ed284202 "xtensa: Fix instruction counting regarding block move expansion" -6454b4a8f5d90dd355c3c7e31a592a439223b645 "xtensa: Add setmemsi insn pattern" -9aad2b22436d5346fa224e5c14439dcef36cf3dd "xtensa: Improve bswap[sd]i2 insn patterns" -e94c6dbfb57a862dd8a8685eabc4886ad1aaea25 "xtensa: fix PR target/105879" -2fcc69d8ce4eddf6dea878a5383254d366e1bb14 "xtensa: Implement bswaphi2 insn pattern" -9777d446e2148ef9a6e9f35db3f4eab99ee8812c "xtensa: Make one_cmplsi2 optimizer-friendly" -e44e7face13f38f9b228e2619786ba0add9ef77b "xtensa: Optimize '(~x & y)' to '((x & y) ^ y)'" -29dc90a580bf45f503ed89eb1dc63b5676db776b "xtensa: Add clrsbsi2 insn pattern" -9489a1ab05ad1bda7126da5513f08282da3e531d "xtensa: Tweak some widen multiplications" -fddf0e1057fe24eff0d894fbc2959b4086464a96 "xtensa: Consider the Loop Option when setmemsi is expanded to small loop" -ccd02e734e0f1742629403b46e5b1c650b00fd65 "xtensa: Improve instruction cost estimation and suggestion" -cd02f15f1aecc45b2c2feae16840503549508619 "xtensa: Improve constant synthesis for both integer and floating-point" -1c68ec1f8ab531fba56cccf549ffe592bf622821 "xtensa: Improve shift operations more" -e1b193c1cce3a975a9ed60dd0f30182fe0255d7c "xtensa: Simplify conditional branch/move insn patterns" -70ce04ca353bb0cda8321b91a77c2477e26d339b "xtensa: Make use of BALL/BNALL instructions" -077438933cf94f00cc5edf974338c11ba4bf7a39 "xtensa: Optimize bitwise AND operation with some specific forms of constants" -96518f714e3fab53a966a05b8d48011e27c1a718 "xtensa: Document new -mextra-l32r-costs= Xtensa-specific option" -43b0c56fda4bc990e8ee8d6a0b376de7b663bb06 "xtensa: Add support for sibling call optimization" -c95e307e3a978166cd5d6817ec9d8293825ff3fb "xtensa: Add some dedicated patterns that correspond to GIMPLE canonicalizations" -cfad4856fa46abc878934a9433d0bfc2482ccf00 "xtensa: Eliminate unwanted reg-reg moves during DFmode input reloads" -ce3867d414bd7d9e5b6fb2a51b1fb3d9e9e1eae9 "xtensa: Eliminate [DS]Cmode hard register clobber that is immediately followed by whole overwrite the register" -479b6f449ee999501ad6eff0b7db8d0cd5b2d28d "xtensa: Defer storing integer constants into litpool until reload" ---- - gcc/config/xtensa/constraints.md | 10 +- - gcc/config/xtensa/predicates.md | 41 +- - gcc/config/xtensa/xtensa-protos.h | 11 +- - gcc/config/xtensa/xtensa.c | 733 +++++++++--- - gcc/config/xtensa/xtensa.h | 7 +- - gcc/config/xtensa/xtensa.md | 1024 +++++++++++++---- - gcc/config/xtensa/xtensa.opt | 6 +- - gcc/doc/invoke.texi | 11 +- - gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c | 33 + - gcc/testsuite/gcc.target/xtensa/bswap-O1.c | 37 + - gcc/testsuite/gcc.target/xtensa/bswap-O2.c | 37 + - gcc/testsuite/gcc.target/xtensa/bswap-Os.c | 37 + - .../gcc.target/xtensa/check_zero_byte.c | 9 + - .../gcc.target/xtensa/constsynth_2insns.c | 44 + - .../gcc.target/xtensa/constsynth_3insns.c | 24 + - .../gcc.target/xtensa/constsynth_double.c | 11 + - .../gcc.target/xtensa/funnel_shifter.c | 17 + - .../gcc.target/xtensa/one_cmpl_abs.c | 9 + - gcc/testsuite/gcc.target/xtensa/sibcalls.c | 20 + - libgcc/config/xtensa/lib1funcs.S | 23 + - libgcc/config/xtensa/t-xtensa | 2 +- - 21 files changed, 1796 insertions(+), 350 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O1.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O2.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-Os.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/check_zero_byte.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_double.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/funnel_shifter.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/sibcalls.c - -diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md -index 2062c8816..13b3daafc 100644 ---- a/gcc/config/xtensa/constraints.md -+++ b/gcc/config/xtensa/constraints.md -@@ -92,7 +92,7 @@ - "An integer constant in the range @minus{}32-95 for use with MOVI.N - instructions." - (and (match_code "const_int") -- (match_test "ival >= -32 && ival <= 95"))) -+ (match_test "IN_RANGE (ival, -32, 95)"))) - - (define_constraint "N" - "An unsigned 8-bit integer constant shifted left by 8 bits for use -@@ -103,7 +103,7 @@ - (define_constraint "O" - "An integer constant that can be used in ADDI.N instructions." - (and (match_code "const_int") -- (match_test "ival == -1 || (ival >= 1 && ival <= 15)"))) -+ (match_test "ival == -1 || IN_RANGE (ival, 1, 15)"))) - - (define_constraint "P" - "An integer constant that can be used as a mask value in an EXTUI -@@ -113,8 +113,10 @@ - - (define_constraint "Y" - "A constant that can be used in relaxed MOVI instructions." -- (and (match_code "const_int,const_double,const,symbol_ref,label_ref") -- (match_test "TARGET_AUTO_LITPOOLS"))) -+ (ior (and (match_code "const_int,const_double,const,symbol_ref,label_ref") -+ (match_test "TARGET_AUTO_LITPOOLS")) -+ (and (match_code "const_int") -+ (match_test "can_create_pseudo_p ()")))) - - ;; Memory constraints. Do not use define_memory_constraint here. Doing so - ;; causes reload to force some constants into the constant pool, but since -diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md -index eb52b05aa..633cc6264 100644 ---- a/gcc/config/xtensa/predicates.md -+++ b/gcc/config/xtensa/predicates.md -@@ -25,8 +25,7 @@ - - (define_predicate "addsubx_operand" - (and (match_code "const_int") -- (match_test "INTVAL (op) >= 1 -- && INTVAL (op) <= 3"))) -+ (match_test "IN_RANGE (INTVAL (op), 1, 3)"))) - - (define_predicate "arith_operand" - (ior (and (match_code "const_int") -@@ -53,9 +52,19 @@ - (match_test "xtensa_mask_immediate (INTVAL (op))")) - (match_operand 0 "register_operand"))) - -+(define_predicate "shifted_mask_operand" -+ (match_code "const_int") -+{ -+ HOST_WIDE_INT mask = INTVAL (op); -+ int shift = ctz_hwi (mask); -+ -+ return IN_RANGE (shift, 1, 31) -+ && xtensa_mask_immediate ((uint32_t)mask >> shift); -+}) -+ - (define_predicate "extui_fldsz_operand" - (and (match_code "const_int") -- (match_test "xtensa_mask_immediate ((1 << INTVAL (op)) - 1)"))) -+ (match_test "IN_RANGE (INTVAL (op), 1, 16)"))) - - (define_predicate "sext_operand" - (if_then_else (match_test "TARGET_SEXT") -@@ -64,7 +73,7 @@ - - (define_predicate "sext_fldsz_operand" - (and (match_code "const_int") -- (match_test "INTVAL (op) >= 8 && INTVAL (op) <= 23"))) -+ (match_test "IN_RANGE (INTVAL (op), 8, 23)"))) - - (define_predicate "lsbitnum_operand" - (and (match_code "const_int") -@@ -138,8 +147,9 @@ - (match_test "!constantpool_mem_p (op) - || GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0"))) - (ior (and (match_code "const_int") -- (match_test "GET_MODE_CLASS (mode) == MODE_INT -- && xtensa_simm12b (INTVAL (op))")) -+ (match_test "(GET_MODE_CLASS (mode) == MODE_INT -+ && xtensa_simm12b (INTVAL (op))) -+ || can_create_pseudo_p ()")) - (and (match_code "const_int,const_double,const,symbol_ref,label_ref") - (match_test "(TARGET_CONST16 || TARGET_AUTO_LITPOOLS) - && CONSTANT_P (op) -@@ -156,6 +166,19 @@ - (and (match_code "const_int") - (match_test "xtensa_mem_offset (INTVAL (op), SFmode)"))) - -+(define_predicate "reload_operand" -+ (match_code "mem") -+{ -+ const_rtx addr = XEXP (op, 0); -+ if (REG_P (addr)) -+ return REGNO (addr) == A1_REG; -+ if (GET_CODE (addr) == PLUS) -+ return REG_P (XEXP (addr, 0)) -+ && REGNO (XEXP (addr, 0)) == A1_REG -+ && CONST_INT_P (XEXP (addr, 1)); -+ return false; -+}) -+ - (define_predicate "branch_operator" - (match_code "eq,ne,lt,ge")) - -@@ -165,9 +188,15 @@ - (define_predicate "boolean_operator" - (match_code "eq,ne")) - -+(define_predicate "logical_shift_operator" -+ (match_code "ashift,lshiftrt")) -+ - (define_predicate "xtensa_cstoresi_operator" - (match_code "eq,ne,gt,ge,lt,le")) - -+(define_predicate "xtensa_shift_per_byte_operator" -+ (match_code "ashift,ashiftrt,lshiftrt")) -+ - (define_predicate "tls_symbol_operand" - (and (match_code "symbol_ref") - (match_test "SYMBOL_REF_TLS_MODEL (op) != 0"))) -diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h -index 18d803581..75ed3bfb0 100644 ---- a/gcc/config/xtensa/xtensa-protos.h -+++ b/gcc/config/xtensa/xtensa-protos.h -@@ -41,18 +41,23 @@ extern void xtensa_expand_conditional_branch (rtx *, machine_mode); - extern int xtensa_expand_conditional_move (rtx *, int); - extern int xtensa_expand_scc (rtx *, machine_mode); - extern int xtensa_expand_block_move (rtx *); -+extern int xtensa_expand_block_set_unrolled_loop (rtx *); -+extern int xtensa_expand_block_set_small_loop (rtx *); - extern void xtensa_split_operand_pair (rtx *, machine_mode); -+extern int xtensa_constantsynth (rtx, HOST_WIDE_INT); - extern int xtensa_emit_move_sequence (rtx *, machine_mode); - extern rtx xtensa_copy_incoming_a7 (rtx); - extern void xtensa_expand_nonlocal_goto (rtx *); - extern void xtensa_expand_compare_and_swap (rtx, rtx, rtx, rtx); - extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool); - extern void xtensa_emit_loop_end (rtx_insn *, rtx *); --extern char *xtensa_emit_branch (bool, bool, rtx *); --extern char *xtensa_emit_bit_branch (bool, bool, rtx *); -+extern char *xtensa_emit_branch (bool, rtx *); - extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); -+extern void xtensa_prepare_expand_call (int, rtx *); - extern char *xtensa_emit_call (int, rtx *); -+extern char *xtensa_emit_sibcall (int, rtx *); - extern bool xtensa_tls_referenced_p (rtx); -+extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); - - #ifdef TREE_CODE - extern void init_cumulative_args (CUMULATIVE_ARGS *, int); -@@ -70,7 +75,7 @@ extern int xtensa_dbx_register_number (int); - extern long compute_frame_size (poly_int64); - extern bool xtensa_use_return_instruction_p (void); - extern void xtensa_expand_prologue (void); --extern void xtensa_expand_epilogue (void); -+extern void xtensa_expand_epilogue (bool); - extern void order_regs_for_local_alloc (void); - extern enum reg_class xtensa_regno_to_class (int regno); - extern HOST_WIDE_INT xtensa_initial_elimination_offset (int from, int to); -diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c -index 6cd9d5528..5b1aa9b23 100644 ---- a/gcc/config/xtensa/xtensa.c -+++ b/gcc/config/xtensa/xtensa.c -@@ -55,6 +55,7 @@ along with GCC; see the file COPYING3. If not see - #include "dumpfile.h" - #include "hw-doloop.h" - #include "rtl-iter.h" -+#include "insn-attr.h" - - /* This file should be included last. */ - #include "target-def.h" -@@ -117,7 +118,7 @@ const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] = - - static void xtensa_option_override (void); - static enum internal_test map_test_to_internal_test (enum rtx_code); --static rtx gen_int_relational (enum rtx_code, rtx, rtx, int *); -+static rtx gen_int_relational (enum rtx_code, rtx, rtx); - static rtx gen_float_relational (enum rtx_code, rtx, rtx); - static rtx gen_conditional_move (enum rtx_code, machine_mode, rtx, rtx); - static rtx fixup_subreg_mem (rtx); -@@ -134,6 +135,7 @@ static unsigned int xtensa_multibss_section_type_flags (tree, const char *, - static section *xtensa_select_rtx_section (machine_mode, rtx, - unsigned HOST_WIDE_INT); - static bool xtensa_rtx_costs (rtx, machine_mode, int, int, int *, bool); -+static int xtensa_insn_cost (rtx_insn *, bool); - static int xtensa_register_move_cost (machine_mode, reg_class_t, - reg_class_t); - static int xtensa_memory_move_cost (machine_mode, reg_class_t, bool); -@@ -185,6 +187,7 @@ static bool xtensa_modes_tieable_p (machine_mode, machine_mode); - static HOST_WIDE_INT xtensa_constant_alignment (const_tree, HOST_WIDE_INT); - static HOST_WIDE_INT xtensa_starting_frame_offset (void); - static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); -+static bool xtensa_function_ok_for_sibcall (tree, tree); - - - -@@ -208,6 +211,8 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); - #define TARGET_MEMORY_MOVE_COST xtensa_memory_move_cost - #undef TARGET_RTX_COSTS - #define TARGET_RTX_COSTS xtensa_rtx_costs -+#undef TARGET_INSN_COST -+#define TARGET_INSN_COST xtensa_insn_cost - #undef TARGET_ADDRESS_COST - #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 - -@@ -333,6 +338,9 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); - #undef TARGET_HAVE_SPECULATION_SAFE_VALUE - #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed - -+#undef TARGET_FUNCTION_OK_FOR_SIBCALL -+#define TARGET_FUNCTION_OK_FOR_SIBCALL xtensa_function_ok_for_sibcall -+ - struct gcc_target targetm = TARGET_INITIALIZER; - - -@@ -341,42 +349,42 @@ struct gcc_target targetm = TARGET_INITIALIZER; - bool - xtensa_simm8 (HOST_WIDE_INT v) - { -- return v >= -128 && v <= 127; -+ return IN_RANGE (v, -128, 127); - } - - - bool - xtensa_simm8x256 (HOST_WIDE_INT v) - { -- return (v & 255) == 0 && (v >= -32768 && v <= 32512); -+ return (v & 255) == 0 && IN_RANGE (v, -32768, 32512); - } - - - bool - xtensa_simm12b (HOST_WIDE_INT v) - { -- return v >= -2048 && v <= 2047; -+ return IN_RANGE (v, -2048, 2047); - } - - - static bool - xtensa_uimm8 (HOST_WIDE_INT v) - { -- return v >= 0 && v <= 255; -+ return IN_RANGE (v, 0, 255); - } - - - static bool - xtensa_uimm8x2 (HOST_WIDE_INT v) - { -- return (v & 1) == 0 && (v >= 0 && v <= 510); -+ return (v & 1) == 0 && IN_RANGE (v, 0, 510); - } - - - static bool - xtensa_uimm8x4 (HOST_WIDE_INT v) - { -- return (v & 3) == 0 && (v >= 0 && v <= 1020); -+ return (v & 3) == 0 && IN_RANGE (v, 0, 1020); - } - - -@@ -446,19 +454,7 @@ xtensa_b4constu (HOST_WIDE_INT v) - bool - xtensa_mask_immediate (HOST_WIDE_INT v) - { --#define MAX_MASK_SIZE 16 -- int mask_size; -- -- for (mask_size = 1; mask_size <= MAX_MASK_SIZE; mask_size++) -- { -- if ((v & 1) == 0) -- return false; -- v = v >> 1; -- if (v == 0) -- return true; -- } -- -- return false; -+ return IN_RANGE (exact_log2 (v + 1), 1, 16); - } - - -@@ -539,7 +535,7 @@ smalloffset_mem_p (rtx op) - return FALSE; - - val = INTVAL (offset); -- return (val & 3) == 0 && (val >= 0 && val <= 60); -+ return (val & 3) == 0 && IN_RANGE (val, 0, 60); - } - } - return FALSE; -@@ -678,8 +674,7 @@ map_test_to_internal_test (enum rtx_code test_code) - static rtx - gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - rtx cmp0, /* first operand to compare */ -- rtx cmp1, /* second operand to compare */ -- int *p_invert /* whether branch needs to reverse test */) -+ rtx cmp1 /* second operand to compare */) - { - struct cmp_info - { -@@ -711,6 +706,7 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - enum internal_test test; - machine_mode mode; - struct cmp_info *p_info; -+ int invert; - - test = map_test_to_internal_test (test_code); - gcc_assert (test != ITEST_MAX); -@@ -747,9 +743,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - } - - /* See if we need to invert the result. */ -- *p_invert = ((GET_CODE (cmp1) == CONST_INT) -- ? p_info->invert_const -- : p_info->invert_reg); -+ invert = ((GET_CODE (cmp1) == CONST_INT) -+ ? p_info->invert_const -+ : p_info->invert_reg); - - /* Comparison to constants, may involve adding 1 to change a LT into LE. - Comparison between two registers, may involve switching operands. */ -@@ -766,7 +762,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - cmp1 = temp; - } - -- return gen_rtx_fmt_ee (p_info->test_code, VOIDmode, cmp0, cmp1); -+ return gen_rtx_fmt_ee (invert ? reverse_condition (p_info->test_code) -+ : p_info->test_code, -+ VOIDmode, cmp0, cmp1); - } - - -@@ -825,45 +823,33 @@ xtensa_expand_conditional_branch (rtx *operands, machine_mode mode) - enum rtx_code test_code = GET_CODE (operands[0]); - rtx cmp0 = operands[1]; - rtx cmp1 = operands[2]; -- rtx cmp; -- int invert; -- rtx label1, label2; -+ rtx cmp, label; - - switch (mode) - { -+ case E_SFmode: -+ if (TARGET_HARD_FLOAT) -+ { -+ cmp = gen_float_relational (test_code, cmp0, cmp1); -+ break; -+ } -+ /* FALLTHRU */ -+ - case E_DFmode: - default: - fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1)); - - case E_SImode: -- invert = FALSE; -- cmp = gen_int_relational (test_code, cmp0, cmp1, &invert); -- break; -- -- case E_SFmode: -- if (!TARGET_HARD_FLOAT) -- fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, -- cmp0, cmp1)); -- invert = FALSE; -- cmp = gen_float_relational (test_code, cmp0, cmp1); -+ cmp = gen_int_relational (test_code, cmp0, cmp1); - break; - } - - /* Generate the branch. */ -- -- label1 = gen_rtx_LABEL_REF (VOIDmode, operands[3]); -- label2 = pc_rtx; -- -- if (invert) -- { -- label2 = label1; -- label1 = pc_rtx; -- } -- -+ label = gen_rtx_LABEL_REF (VOIDmode, operands[3]); - emit_jump_insn (gen_rtx_SET (pc_rtx, - gen_rtx_IF_THEN_ELSE (VOIDmode, cmp, -- label1, -- label2))); -+ label, -+ pc_rtx))); - } - - -@@ -1035,6 +1021,123 @@ xtensa_split_operand_pair (rtx operands[4], machine_mode mode) - } - - -+/* Try to emit insns to load srcval (that cannot fit into signed 12-bit) -+ into dst with synthesizing a such constant value from a sequence of -+ load-immediate / arithmetic ones, instead of a L32R instruction -+ (plus a constant in litpool). */ -+ -+static void -+xtensa_emit_constantsynth (rtx dst, enum rtx_code code, -+ HOST_WIDE_INT imm0, HOST_WIDE_INT imm1, -+ rtx (*gen_op)(rtx, HOST_WIDE_INT), -+ HOST_WIDE_INT imm2) -+{ -+ gcc_assert (REG_P (dst)); -+ emit_move_insn (dst, GEN_INT (imm0)); -+ emit_move_insn (dst, gen_rtx_fmt_ee (code, SImode, -+ dst, GEN_INT (imm1))); -+ if (gen_op) -+ emit_move_insn (dst, gen_op (dst, imm2)); -+} -+ -+static int -+xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, -+ rtx (*gen_op)(rtx, HOST_WIDE_INT), -+ HOST_WIDE_INT op_imm) -+{ -+ int shift = exact_log2 (srcval + 1); -+ -+ if (IN_RANGE (shift, 1, 31)) -+ { -+ xtensa_emit_constantsynth (dst, LSHIFTRT, -1, 32 - shift, -+ gen_op, op_imm); -+ return 1; -+ } -+ -+ if (IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) -+ { -+ HOST_WIDE_INT imm0, imm1; -+ -+ if (srcval < -32768) -+ imm1 = -32768; -+ else if (srcval > 32512) -+ imm1 = 32512; -+ else -+ imm1 = srcval & ~255; -+ imm0 = srcval - imm1; -+ if (TARGET_DENSITY && imm1 < 32512 && IN_RANGE (imm0, 224, 255)) -+ imm0 -= 256, imm1 += 256; -+ xtensa_emit_constantsynth (dst, PLUS, imm0, imm1, gen_op, op_imm); -+ return 1; -+ } -+ -+ shift = ctz_hwi (srcval); -+ if (xtensa_simm12b (srcval >> shift)) -+ { -+ xtensa_emit_constantsynth (dst, ASHIFT, srcval >> shift, shift, -+ gen_op, op_imm); -+ return 1; -+ } -+ -+ return 0; -+} -+ -+static rtx -+xtensa_constantsynth_rtx_SLLI (rtx reg, HOST_WIDE_INT imm) -+{ -+ return gen_rtx_ASHIFT (SImode, reg, GEN_INT (imm)); -+} -+ -+static rtx -+xtensa_constantsynth_rtx_ADDSUBX (rtx reg, HOST_WIDE_INT imm) -+{ -+ return imm == 7 -+ ? gen_rtx_MINUS (SImode, gen_rtx_ASHIFT (SImode, reg, GEN_INT (3)), -+ reg) -+ : gen_rtx_PLUS (SImode, gen_rtx_ASHIFT (SImode, reg, -+ GEN_INT (floor_log2 (imm - 1))), -+ reg); -+} -+ -+int -+xtensa_constantsynth (rtx dst, HOST_WIDE_INT srcval) -+{ -+ /* No need for synthesizing for what fits into MOVI instruction. */ -+ if (xtensa_simm12b (srcval)) -+ return 0; -+ -+ /* 2-insns substitution. */ -+ if ((optimize_size || (optimize && xtensa_extra_l32r_costs >= 1)) -+ && xtensa_constantsynth_2insn (dst, srcval, NULL, 0)) -+ return 1; -+ -+ /* 3-insns substitution. */ -+ if (optimize > 1 && !optimize_size && xtensa_extra_l32r_costs >= 2) -+ { -+ int shift, divisor; -+ -+ /* 2-insns substitution followed by SLLI. */ -+ shift = ctz_hwi (srcval); -+ if (IN_RANGE (shift, 1, 31) && -+ xtensa_constantsynth_2insn (dst, srcval >> shift, -+ xtensa_constantsynth_rtx_SLLI, -+ shift)) -+ return 1; -+ -+ /* 2-insns substitution followed by ADDX[248] or SUBX8. */ -+ if (TARGET_ADDX) -+ for (divisor = 3; divisor <= 9; divisor += 2) -+ if (srcval % divisor == 0 && -+ xtensa_constantsynth_2insn (dst, srcval / divisor, -+ xtensa_constantsynth_rtx_ADDSUBX, -+ divisor)) -+ return 1; -+ } -+ -+ return 0; -+} -+ -+ - /* Emit insns to move operands[1] into operands[0]. - Return 1 if we have written out everything that needs to be done to - do the move. Otherwise, return 0 and the caller will emit the move -@@ -1070,24 +1173,9 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) - return 1; - } - -- if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16) -+ if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16 -+ && ! (CONST_INT_P (src) && can_create_pseudo_p ())) - { -- /* Try to emit MOVI + SLLI sequence, that is smaller -- than L32R + literal. */ -- if (optimize >= 1 && ! optimize_debug && mode == SImode -- && CONST_INT_P (src) && register_operand (dst, mode)) -- { -- HOST_WIDE_INT srcval = INTVAL (src); -- int shift = ctz_hwi (srcval); -- -- if (xtensa_simm12b (srcval >> shift)) -- { -- emit_move_insn (dst, GEN_INT (srcval >> shift)); -- emit_insn (gen_ashlsi3_internal (dst, dst, GEN_INT (shift))); -- return 1; -- } -- } -- - src = force_const_mem (SImode, src); - operands[1] = src; - } -@@ -1315,7 +1403,7 @@ xtensa_expand_block_move (rtx *operands) - move_ratio = 4; - if (optimize > 2) - move_ratio = LARGEST_MOVE_RATIO; -- num_pieces = (bytes / align) + (bytes % align); /* Close enough anyway. */ -+ num_pieces = (bytes / align) + ((bytes % align + 1) / 2); - if (num_pieces > move_ratio) - return 0; - -@@ -1352,7 +1440,7 @@ xtensa_expand_block_move (rtx *operands) - temp[next] = gen_reg_rtx (mode[next]); - - x = adjust_address (src_mem, mode[next], offset_ld); -- emit_insn (gen_rtx_SET (temp[next], x)); -+ emit_move_insn (temp[next], x); - - offset_ld += next_amount; - bytes -= next_amount; -@@ -1362,9 +1450,9 @@ xtensa_expand_block_move (rtx *operands) - if (active[phase]) - { - active[phase] = false; -- -+ - x = adjust_address (dst_mem, mode[phase], offset_st); -- emit_insn (gen_rtx_SET (x, temp[phase])); -+ emit_move_insn (x, temp[phase]); - - offset_st += amount[phase]; - } -@@ -1375,6 +1463,246 @@ xtensa_expand_block_move (rtx *operands) - } - - -+/* Try to expand a block set operation to a sequence of RTL move -+ instructions. If not optimizing, or if the block size is not a -+ constant, or if the block is too large, or if the value to -+ initialize the block with is not a constant, the expansion -+ fails and GCC falls back to calling memset(). -+ -+ operands[0] is the destination -+ operands[1] is the length -+ operands[2] is the initialization value -+ operands[3] is the alignment */ -+ -+static int -+xtensa_sizeof_MOVI (HOST_WIDE_INT imm) -+{ -+ return (TARGET_DENSITY && IN_RANGE (imm, -32, 95)) ? 2 : 3; -+} -+ -+int -+xtensa_expand_block_set_unrolled_loop (rtx *operands) -+{ -+ rtx dst_mem = operands[0]; -+ HOST_WIDE_INT bytes, value, align; -+ int expand_len, funccall_len; -+ rtx x, reg; -+ int offset; -+ -+ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) -+ return 0; -+ -+ bytes = INTVAL (operands[1]); -+ if (bytes <= 0) -+ return 0; -+ value = (int8_t)INTVAL (operands[2]); -+ align = INTVAL (operands[3]); -+ if (align > MOVE_MAX) -+ align = MOVE_MAX; -+ -+ /* Insn expansion: holding the init value. -+ Either MOV(.N) or L32R w/litpool. */ -+ if (align == 1) -+ expand_len = xtensa_sizeof_MOVI (value); -+ else if (value == 0 || value == -1) -+ expand_len = TARGET_DENSITY ? 2 : 3; -+ else -+ expand_len = 3 + 4; -+ /* Insn expansion: a series of aligned memory stores. -+ Consist of S8I, S16I or S32I(.N). */ -+ expand_len += (bytes / align) * (TARGET_DENSITY -+ && align == 4 ? 2 : 3); -+ /* Insn expansion: the remainder, sub-aligned memory stores. -+ A combination of S8I and S16I as needed. */ -+ expand_len += ((bytes % align + 1) / 2) * 3; -+ -+ /* Function call: preparing two arguments. */ -+ funccall_len = xtensa_sizeof_MOVI (value); -+ funccall_len += xtensa_sizeof_MOVI (bytes); -+ /* Function call: calling memset(). */ -+ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; -+ -+ /* Apply expansion bonus (2x) if optimizing for speed. */ -+ if (optimize > 1 && !optimize_size) -+ funccall_len *= 2; -+ -+ /* Decide whether to expand or not, based on the sum of the length -+ of instructions. */ -+ if (expand_len > funccall_len) -+ return 0; -+ -+ x = XEXP (dst_mem, 0); -+ if (!REG_P (x)) -+ dst_mem = replace_equiv_address (dst_mem, force_reg (Pmode, x)); -+ switch (align) -+ { -+ case 1: -+ break; -+ case 2: -+ value = (int16_t)((uint8_t)value * 0x0101U); -+ break; -+ case 4: -+ value = (int32_t)((uint8_t)value * 0x01010101U); -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ reg = force_reg (SImode, GEN_INT (value)); -+ -+ offset = 0; -+ do -+ { -+ int unit_size = MIN (bytes, align); -+ machine_mode unit_mode = (unit_size >= 4 ? SImode : -+ (unit_size >= 2 ? HImode : -+ QImode)); -+ unit_size = GET_MODE_SIZE (unit_mode); -+ -+ emit_move_insn (adjust_address (dst_mem, unit_mode, offset), -+ unit_mode == SImode ? reg -+ : convert_to_mode (unit_mode, reg, true)); -+ -+ offset += unit_size; -+ bytes -= unit_size; -+ } -+ while (bytes > 0); -+ -+ return 1; -+} -+ -+int -+xtensa_expand_block_set_small_loop (rtx *operands) -+{ -+ HOST_WIDE_INT bytes, value, align, count; -+ int expand_len, funccall_len; -+ rtx x, dst, end, reg; -+ machine_mode unit_mode; -+ rtx_code_label *label; -+ -+ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) -+ return 0; -+ -+ bytes = INTVAL (operands[1]); -+ if (bytes <= 0) -+ return 0; -+ value = (int8_t)INTVAL (operands[2]); -+ align = INTVAL (operands[3]); -+ if (align > MOVE_MAX) -+ align = MOVE_MAX; -+ -+ /* Totally-aligned block only. */ -+ if (bytes % align != 0) -+ return 0; -+ count = bytes / align; -+ -+ /* If the Loop Option (zero-overhead looping) is configured and active, -+ almost no restrictions about the length of the block. */ -+ if (! (TARGET_LOOPS && optimize)) -+ { -+ /* If 4-byte aligned, small loop substitution is almost optimal, -+ thus limited to only offset to the end address for ADDI/ADDMI -+ instruction. */ -+ if (align == 4 -+ && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) -+ return 0; -+ -+ /* If no 4-byte aligned, loop count should be treated as the -+ constraint. */ -+ if (align != 4 -+ && count > ((optimize > 1 && !optimize_size) ? 8 : 15)) -+ return 0; -+ } -+ -+ /* Insn expansion: holding the init value. -+ Either MOV(.N) or L32R w/litpool. */ -+ if (align == 1) -+ expand_len = xtensa_sizeof_MOVI (value); -+ else if (value == 0 || value == -1) -+ expand_len = TARGET_DENSITY ? 2 : 3; -+ else -+ expand_len = 3 + 4; -+ if (TARGET_LOOPS && optimize) /* zero-overhead looping */ -+ { -+ /* Insn translation: Either MOV(.N) or L32R w/litpool for the -+ loop count. */ -+ expand_len += xtensa_simm12b (count) ? xtensa_sizeof_MOVI (count) -+ : 3 + 4; -+ /* Insn translation: LOOP, the zero-overhead looping setup -+ instruction. */ -+ expand_len += 3; -+ /* Insn expansion: the loop body instructions. -+ For store, one of S8I, S16I or S32I(.N). -+ For advance, ADDI(.N). */ -+ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) -+ + (TARGET_DENSITY ? 2 : 3); -+ } -+ else /* NO zero-overhead looping */ -+ { -+ /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ -+ expand_len += bytes > 127 ? 3 -+ : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; -+ /* Insn expansion: the loop body and branch instruction. -+ For store, one of S8I, S16I or S32I(.N). -+ For advance, ADDI(.N). -+ For branch, BNE. */ -+ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) -+ + (TARGET_DENSITY ? 2 : 3) + 3; -+ } -+ -+ /* Function call: preparing two arguments. */ -+ funccall_len = xtensa_sizeof_MOVI (value); -+ funccall_len += xtensa_sizeof_MOVI (bytes); -+ /* Function call: calling memset(). */ -+ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; -+ -+ /* Apply expansion bonus (2x) if optimizing for speed. */ -+ if (optimize > 1 && !optimize_size) -+ funccall_len *= 2; -+ -+ /* Decide whether to expand or not, based on the sum of the length -+ of instructions. */ -+ if (expand_len > funccall_len) -+ return 0; -+ -+ x = XEXP (operands[0], 0); -+ if (!REG_P (x)) -+ x = XEXP (replace_equiv_address (operands[0], force_reg (Pmode, x)), 0); -+ dst = gen_reg_rtx (SImode); -+ emit_move_insn (dst, x); -+ end = gen_reg_rtx (SImode); -+ if (TARGET_LOOPS && optimize) -+ x = force_reg (SImode, operands[1] /* the length */); -+ else -+ x = operands[1]; -+ emit_insn (gen_addsi3 (end, dst, x)); -+ switch (align) -+ { -+ case 1: -+ unit_mode = QImode; -+ break; -+ case 2: -+ value = (int16_t)((uint8_t)value * 0x0101U); -+ unit_mode = HImode; -+ break; -+ case 4: -+ value = (int32_t)((uint8_t)value * 0x01010101U); -+ unit_mode = SImode; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ reg = force_reg (unit_mode, GEN_INT (value)); -+ -+ label = gen_label_rtx (); -+ emit_label (label); -+ emit_move_insn (gen_rtx_MEM (unit_mode, dst), reg); -+ emit_insn (gen_addsi3 (dst, dst, GEN_INT (align))); -+ emit_cmp_and_jump_insns (dst, end, NE, const0_rtx, SImode, true, label); -+ -+ return 1; -+} -+ -+ - void - xtensa_expand_nonlocal_goto (rtx *operands) - { -@@ -1725,21 +2053,20 @@ xtensa_emit_loop_end (rtx_insn *insn, rtx *operands) - - - char * --xtensa_emit_branch (bool inverted, bool immed, rtx *operands) -+xtensa_emit_branch (bool immed, rtx *operands) - { - static char result[64]; -- enum rtx_code code; -+ enum rtx_code code = GET_CODE (operands[3]); - const char *op; - -- code = GET_CODE (operands[3]); - switch (code) - { -- case EQ: op = inverted ? "ne" : "eq"; break; -- case NE: op = inverted ? "eq" : "ne"; break; -- case LT: op = inverted ? "ge" : "lt"; break; -- case GE: op = inverted ? "lt" : "ge"; break; -- case LTU: op = inverted ? "geu" : "ltu"; break; -- case GEU: op = inverted ? "ltu" : "geu"; break; -+ case EQ: op = "eq"; break; -+ case NE: op = "ne"; break; -+ case LT: op = "lt"; break; -+ case GE: op = "ge"; break; -+ case LTU: op = "ltu"; break; -+ case GEU: op = "geu"; break; - default: gcc_unreachable (); - } - -@@ -1758,32 +2085,6 @@ xtensa_emit_branch (bool inverted, bool immed, rtx *operands) - } - - --char * --xtensa_emit_bit_branch (bool inverted, bool immed, rtx *operands) --{ -- static char result[64]; -- const char *op; -- -- switch (GET_CODE (operands[3])) -- { -- case EQ: op = inverted ? "bs" : "bc"; break; -- case NE: op = inverted ? "bc" : "bs"; break; -- default: gcc_unreachable (); -- } -- -- if (immed) -- { -- unsigned bitnum = INTVAL (operands[1]) & 0x1f; -- operands[1] = GEN_INT (bitnum); -- sprintf (result, "b%si\t%%0, %%d1, %%2", op); -- } -- else -- sprintf (result, "b%s\t%%0, %%1, %%2", op); -- -- return result; --} -- -- - char * - xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - { -@@ -1792,12 +2093,14 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - const char *op; - - code = GET_CODE (operands[4]); -+ if (inverted) -+ code = reverse_condition (code); - if (isbool) - { - switch (code) - { -- case EQ: op = inverted ? "t" : "f"; break; -- case NE: op = inverted ? "f" : "t"; break; -+ case EQ: op = "f"; break; -+ case NE: op = "t"; break; - default: gcc_unreachable (); - } - } -@@ -1805,10 +2108,10 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - { - switch (code) - { -- case EQ: op = inverted ? "nez" : "eqz"; break; -- case NE: op = inverted ? "eqz" : "nez"; break; -- case LT: op = inverted ? "gez" : "ltz"; break; -- case GE: op = inverted ? "ltz" : "gez"; break; -+ case EQ: op = "eqz"; break; -+ case NE: op = "nez"; break; -+ case LT: op = "ltz"; break; -+ case GE: op = "gez"; break; - default: gcc_unreachable (); - } - } -@@ -1819,6 +2122,20 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - } - - -+void -+xtensa_prepare_expand_call (int callop, rtx *operands) -+{ -+ rtx addr = XEXP (operands[callop], 0); -+ -+ if (flag_pic && SYMBOL_REF_P (addr) -+ && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) -+ addr = gen_sym_PLT (addr); -+ -+ if (!call_insn_operand (addr, VOIDmode)) -+ XEXP (operands[callop], 0) = copy_to_mode_reg (Pmode, addr); -+} -+ -+ - char * - xtensa_emit_call (int callop, rtx *operands) - { -@@ -1837,6 +2154,24 @@ xtensa_emit_call (int callop, rtx *operands) - } - - -+char * -+xtensa_emit_sibcall (int callop, rtx *operands) -+{ -+ static char result[64]; -+ rtx tgt = operands[callop]; -+ -+ if (GET_CODE (tgt) == CONST_INT) -+ sprintf (result, "j.l\t" HOST_WIDE_INT_PRINT_HEX ", a9", -+ INTVAL (tgt)); -+ else if (register_operand (tgt, VOIDmode)) -+ sprintf (result, "jx\t%%%d", callop); -+ else -+ sprintf (result, "j.l\t%%%d, a9", callop); -+ -+ return result; -+} -+ -+ - bool - xtensa_legitimate_address_p (machine_mode mode, rtx addr, bool strict) - { -@@ -2061,6 +2396,20 @@ xtensa_tls_referenced_p (rtx x) - } - - -+/* Helper function for "*shlrd_..." patterns. */ -+ -+enum rtx_code -+xtensa_shlrd_which_direction (rtx op0, rtx op1) -+{ -+ if (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT) -+ return ASHIFT; /* shld */ -+ if (GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT) -+ return LSHIFTRT; /* shrd */ -+ -+ return UNKNOWN; -+} -+ -+ - /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ - - static bool -@@ -2364,7 +2713,7 @@ static void - printx (FILE *file, signed int val) - { - /* Print a hexadecimal value in a nice way. */ -- if ((val > -0xa) && (val < 0xa)) -+ if (IN_RANGE (val, -9, 9)) - fprintf (file, "%d", val); - else if (val < 0) - fprintf (file, "-0x%x", -val); -@@ -2379,7 +2728,7 @@ void - print_operand (FILE *file, rtx x, int letter) - { - if (!x) -- error ("PRINT_OPERAND null pointer"); -+ error ("% null pointer"); - - switch (letter) - { -@@ -2424,17 +2773,11 @@ print_operand (FILE *file, rtx x, int letter) - case 'K': - if (GET_CODE (x) == CONST_INT) - { -- int num_bits = 0; - unsigned val = INTVAL (x); -- while (val & 1) -- { -- num_bits += 1; -- val = val >> 1; -- } -- if ((val != 0) || (num_bits == 0) || (num_bits > 16)) -+ if (!xtensa_mask_immediate (val)) - fatal_insn ("invalid mask", x); - -- fprintf (file, "%d", num_bits); -+ fprintf (file, "%d", floor_log2 (val + 1)); - } - else - output_operand_lossage ("invalid %%K value"); -@@ -2584,7 +2927,7 @@ void - print_operand_address (FILE *file, rtx addr) - { - if (!addr) -- error ("PRINT_OPERAND_ADDRESS, null pointer"); -+ error ("%, null pointer"); - - switch (GET_CODE (addr)) - { -@@ -2750,7 +3093,7 @@ xtensa_call_save_reg(int regno) - return crtl->profile || !crtl->is_leaf || crtl->calls_eh_return || - df_regs_ever_live_p (regno); - -- if (crtl->calls_eh_return && regno >= 2 && regno < 4) -+ if (crtl->calls_eh_return && IN_RANGE (regno, 2, 3)) - return true; - - return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno); -@@ -2870,7 +3213,7 @@ xtensa_expand_prologue (void) - int callee_save_size = cfun->machine->callee_save_size; - - /* -128 is a limit of single addi instruction. */ -- if (total_size > 0 && total_size <= 128) -+ if (IN_RANGE (total_size, 1, 128)) - { - insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, - GEN_INT (-total_size))); -@@ -2999,7 +3342,7 @@ xtensa_expand_prologue (void) - } - - void --xtensa_expand_epilogue (void) -+xtensa_expand_epilogue (bool sibcall_p) - { - if (!TARGET_WINDOWED_ABI) - { -@@ -3033,10 +3376,13 @@ xtensa_expand_epilogue (void) - if (xtensa_call_save_reg(regno)) - { - rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); -+ rtx reg; - - offset -= UNITS_PER_WORD; -- emit_move_insn (gen_rtx_REG (SImode, regno), -+ emit_move_insn (reg = gen_rtx_REG (SImode, regno), - gen_frame_mem (SImode, x)); -+ if (regno == A0_REG && sibcall_p) -+ emit_use (reg); - } - } - -@@ -3071,7 +3417,8 @@ xtensa_expand_epilogue (void) - EH_RETURN_STACKADJ_RTX)); - } - cfun->machine->epilogue_done = true; -- emit_jump_insn (gen_return ()); -+ if (!sibcall_p) -+ emit_jump_insn (gen_return ()); - } - - bool -@@ -3697,7 +4044,7 @@ xtensa_multibss_section_type_flags (tree decl, const char *name, int reloc) - flags |= SECTION_BSS; /* @nobits */ - else - warning (0, "only uninitialized variables can be placed in a " -- ".bss section"); -+ "%<.bss%> section"); - } - - return flags; -@@ -3750,7 +4097,7 @@ xtensa_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, - static bool - xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - int opno ATTRIBUTE_UNUSED, -- int *total, bool speed ATTRIBUTE_UNUSED) -+ int *total, bool speed) - { - int code = GET_CODE (x); - -@@ -3838,9 +4185,14 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - return true; - - case CLZ: -+ case CLRSB: - *total = COSTS_N_INSNS (TARGET_NSA ? 1 : 50); - return true; - -+ case BSWAP: -+ *total = COSTS_N_INSNS (mode == HImode ? 3 : 5); -+ return true; -+ - case NOT: - *total = COSTS_N_INSNS (mode == DImode ? 3 : 2); - return true; -@@ -3864,13 +4216,16 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - return true; - - case ABS: -+ case NEG: - { - if (mode == SFmode) - *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50); - else if (mode == DFmode) - *total = COSTS_N_INSNS (50); -- else -+ else if (mode == DImode) - *total = COSTS_N_INSNS (4); -+ else -+ *total = COSTS_N_INSNS (1); - return true; - } - -@@ -3886,10 +4241,6 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - return true; - } - -- case NEG: -- *total = COSTS_N_INSNS (mode == DImode ? 4 : 2); -- return true; -- - case MULT: - { - if (mode == SFmode) -@@ -3929,11 +4280,11 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - case UMOD: - { - if (mode == DImode) -- *total = COSTS_N_INSNS (50); -+ *total = COSTS_N_INSNS (speed ? 100 : 50); - else if (TARGET_DIV32) - *total = COSTS_N_INSNS (32); - else -- *total = COSTS_N_INSNS (50); -+ *total = COSTS_N_INSNS (speed ? 100 : 50); - return true; - } - -@@ -3966,6 +4317,98 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - } - } - -+static bool -+xtensa_is_insn_L32R_p(const rtx_insn *insn) -+{ -+ rtx x = PATTERN (insn); -+ -+ if (GET_CODE (x) == SET) -+ { -+ x = XEXP (x, 1); -+ if (GET_CODE (x) == MEM) -+ { -+ x = XEXP (x, 0); -+ return (GET_CODE (x) == SYMBOL_REF || CONST_INT_P (x)) -+ && CONSTANT_POOL_ADDRESS_P (x); -+ } -+ } -+ -+ return false; -+} -+ -+/* Compute a relative costs of RTL insns. This is necessary in order to -+ achieve better RTL insn splitting/combination result. */ -+ -+static int -+xtensa_insn_cost (rtx_insn *insn, bool speed) -+{ -+ if (!(recog_memoized (insn) < 0)) -+ { -+ int len = get_attr_length (insn), n = (len + 2) / 3; -+ -+ if (len == 0) -+ return COSTS_N_INSNS (0); -+ -+ if (speed) /* For speed cost. */ -+ { -+ /* "L32R" may be particular slow (implementation-dependent). */ -+ if (xtensa_is_insn_L32R_p (insn)) -+ return COSTS_N_INSNS (1 + xtensa_extra_l32r_costs); -+ -+ /* Cost based on the pipeline model. */ -+ switch (get_attr_type (insn)) -+ { -+ case TYPE_STORE: -+ case TYPE_MOVE: -+ case TYPE_ARITH: -+ case TYPE_MULTI: -+ case TYPE_NOP: -+ case TYPE_FSTORE: -+ return COSTS_N_INSNS (n); -+ -+ case TYPE_LOAD: -+ return COSTS_N_INSNS (n - 1 + 2); -+ -+ case TYPE_JUMP: -+ case TYPE_CALL: -+ return COSTS_N_INSNS (n - 1 + 3); -+ -+ case TYPE_FCONV: -+ case TYPE_FLOAD: -+ case TYPE_MUL16: -+ case TYPE_MUL32: -+ case TYPE_RSR: -+ return COSTS_N_INSNS (n * 2); -+ -+ case TYPE_FMADD: -+ return COSTS_N_INSNS (n * 4); -+ -+ case TYPE_DIV32: -+ return COSTS_N_INSNS (n * 16); -+ -+ default: -+ break; -+ } -+ } -+ else /* For size cost. */ -+ { -+ /* Cost based on the instruction length. */ -+ if (get_attr_type (insn) != TYPE_UNKNOWN) -+ { -+ /* "L32R" itself plus constant in litpool. */ -+ if (xtensa_is_insn_L32R_p (insn)) -+ return COSTS_N_INSNS (2) + 1; -+ -+ /* Consider ".n" short instructions. */ -+ return COSTS_N_INSNS (n) - (n * 3 - len); -+ } -+ } -+ } -+ -+ /* Fall back. */ -+ return pattern_cost (PATTERN (insn), speed); -+} -+ - /* Worker function for TARGET_RETURN_IN_MEMORY. */ - - static bool -@@ -4491,4 +4934,16 @@ xtensa_asan_shadow_offset (void) - return HOST_WIDE_INT_UC (0x10000000); - } - -+/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */ -+static bool -+xtensa_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, tree exp ATTRIBUTE_UNUSED) -+{ -+ /* Do not allow sibcalls if the Windowed Register Option is -+ configured. */ -+ if (TARGET_WINDOWED_ABI) -+ return false; -+ -+ return true; -+} -+ - #include "gt-xtensa.h" -diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h -index fa86a245e..3e9cbc943 100644 ---- a/gcc/config/xtensa/xtensa.h -+++ b/gcc/config/xtensa/xtensa.h -@@ -74,6 +74,11 @@ along with GCC; see the file COPYING3. If not see - #define HAVE_AS_TLS 0 - #endif - -+/* Define this if the target has no hardware divide instructions. */ -+#if !TARGET_DIV32 -+#define TARGET_HAS_NO_HW_DIVIDE -+#endif -+ - - /* Target CPU builtins. */ - #define TARGET_CPU_CPP_BUILTINS() \ -@@ -488,7 +493,7 @@ enum reg_class - used for this purpose since all function arguments are pushed on - the stack. */ - #define FUNCTION_ARG_REGNO_P(N) \ -- ((N) >= GP_OUTGOING_ARG_FIRST && (N) <= GP_OUTGOING_ARG_LAST) -+ IN_RANGE ((N), GP_OUTGOING_ARG_FIRST, GP_OUTGOING_ARG_LAST) - - /* Record the number of argument words seen so far, along with a flag to - indicate whether these are incoming arguments. (FUNCTION_INCOMING_ARG -diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md -index 2a8e59ee9..124548dfe 100644 ---- a/gcc/config/xtensa/xtensa.md -+++ b/gcc/config/xtensa/xtensa.md -@@ -25,6 +25,7 @@ - (A7_REG 7) - (A8_REG 8) - (A9_REG 9) -+ (A10_REG 10) - - (UNSPEC_NOP 2) - (UNSPEC_PLT 3) -@@ -83,6 +84,13 @@ - ;; the same template. - (define_mode_iterator HQI [HI QI]) - -+;; This code iterator is for *shlrd and its variants. -+(define_code_iterator ior_op [ior plus]) -+ -+;; This mode iterator allows the DC and SC patterns to be defined from -+;; the same template. -+(define_mode_iterator DSC [DC SC]) -+ - - ;; Attributes. - -@@ -98,7 +106,10 @@ - - ;; Describe a user's asm statement. - (define_asm_attributes -- [(set_attr "type" "multi")]) -+ [(set_attr "type" "multi") -+ (set_attr "mode" "none") -+ (set_attr "length" "3")]) ;; Should be the maximum possible length -+ ;; of a single machine instruction. - - - ;; Pipeline model. -@@ -224,20 +235,42 @@ - - ;; Multiplication. - --(define_expand "mulsidi3" -+(define_expand "mulsidi3" - [(set (match_operand:DI 0 "register_operand") -- (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand")) -- (any_extend:DI (match_operand:SI 2 "register_operand"))))] -+ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand")) -+ (sign_extend:DI (match_operand:SI 2 "register_operand"))))] - "TARGET_MUL32_HIGH" - { - rtx temp = gen_reg_rtx (SImode); - emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); -- emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), -- operands[1], operands[2])); -+ emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), -+ operands[1], operands[2])); - emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); - DONE; - }) - -+(define_expand "umulsidi3" -+ [(set (match_operand:DI 0 "register_operand") -+ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand")) -+ (zero_extend:DI (match_operand:SI 2 "register_operand"))))] -+ "" -+{ -+ if (TARGET_MUL32_HIGH) -+ { -+ rtx temp = gen_reg_rtx (SImode); -+ emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); -+ emit_insn (gen_umulsi3_highpart (gen_highpart (SImode, operands[0]), -+ operands[1], operands[2])); -+ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); -+ } -+ else -+ emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "__umulsidi3"), -+ operands[0], LCT_NORMAL, DImode, -+ operands[1], SImode, -+ operands[2], SImode); -+ DONE; -+}) -+ - (define_insn "mulsi3_highpart" - [(set (match_operand:SI 0 "register_operand" "=a") - (truncate:SI -@@ -261,30 +294,16 @@ - (set_attr "mode" "SI") - (set_attr "length" "3")]) - --(define_insn "mulhisi3" -- [(set (match_operand:SI 0 "register_operand" "=C,A") -- (mult:SI (sign_extend:SI -- (match_operand:HI 1 "register_operand" "%r,r")) -- (sign_extend:SI -- (match_operand:HI 2 "register_operand" "r,r"))))] -- "TARGET_MUL16 || TARGET_MAC16" -- "@ -- mul16s\t%0, %1, %2 -- mul.aa.ll\t%1, %2" -- [(set_attr "type" "mul16,mac16") -- (set_attr "mode" "SI") -- (set_attr "length" "3,3")]) -- --(define_insn "umulhisi3" -+(define_insn "mulhisi3" - [(set (match_operand:SI 0 "register_operand" "=C,A") -- (mult:SI (zero_extend:SI -+ (mult:SI (any_extend:SI - (match_operand:HI 1 "register_operand" "%r,r")) -- (zero_extend:SI -+ (any_extend:SI - (match_operand:HI 2 "register_operand" "r,r"))))] - "TARGET_MUL16 || TARGET_MAC16" - "@ -- mul16u\t%0, %1, %2 -- umul.aa.ll\t%1, %2" -+ mul16\t%0, %1, %2 -+ mul.aa.ll\t%1, %2" - [(set_attr "type" "mul16,mac16") - (set_attr "mode" "SI") - (set_attr "length" "3,3")]) -@@ -429,7 +448,17 @@ - (set_attr "length" "3")]) - - --;; Count leading/trailing zeros and find first bit. -+;; Count redundant leading sign bits and leading/trailing zeros, -+;; and find first bit. -+ -+(define_insn "clrsbsi2" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (clrsb:SI (match_operand:SI 1 "register_operand" "r")))] -+ "TARGET_NSA" -+ "nsa\t%0, %1" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "3")]) - - (define_insn "clzsi2" - [(set (match_operand:SI 0 "register_operand" "=a") -@@ -471,23 +500,78 @@ - - ;; Byte swap. - --(define_insn "bswapsi2" -- [(set (match_operand:SI 0 "register_operand" "=&a") -- (bswap:SI (match_operand:SI 1 "register_operand" "r")))] -- "!optimize_size" -- "ssai\t8\;srli\t%0, %1, 16\;src\t%0, %0, %1\;src\t%0, %0, %0\;src\t%0, %1, %0" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "15")]) -+(define_insn "bswaphi2" -+ [(set (match_operand:HI 0 "register_operand" "=a") -+ (bswap:HI (match_operand:HI 1 "register_operand" "r"))) -+ (clobber (match_scratch:HI 2 "=&a"))] -+ "" -+ "extui\t%2, %1, 8, 8\;slli\t%0, %1, 8\;or\t%0, %0, %2" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "HI") -+ (set_attr "length" "9")]) - --(define_insn "bswapdi2" -- [(set (match_operand:DI 0 "register_operand" "=&a") -- (bswap:DI (match_operand:DI 1 "register_operand" "r")))] -- "!optimize_size" -- "ssai\t8\;srli\t%0, %D1, 16\;src\t%0, %0, %D1\;src\t%0, %0, %0\;src\t%0, %D1, %0\;srli\t%D0, %1, 16\;src\t%D0, %D0, %1\;src\t%D0, %D0, %D0\;src\t%D0, %1, %D0" -- [(set_attr "type" "arith") -- (set_attr "mode" "DI") -- (set_attr "length" "27")]) -+(define_expand "bswapsi2" -+ [(set (match_operand:SI 0 "register_operand" "") -+ (bswap:SI (match_operand:SI 1 "register_operand" "")))] -+ "!optimize_debug && optimize > 1" -+{ -+ /* GIMPLE manual byte-swapping recognition is now activated. -+ For both built-in and manual bswaps, emit corresponding library call -+ if optimizing for size, or a series of dedicated machine instructions -+ if otherwise. */ -+ if (optimize_size) -+ emit_library_call_value (optab_libfunc (bswap_optab, SImode), -+ operands[0], LCT_NORMAL, SImode, -+ operands[1], SImode); -+ else -+ emit_insn (gen_bswapsi2_internal (operands[0], operands[1])); -+ DONE; -+}) -+ -+(define_insn "bswapsi2_internal" -+ [(set (match_operand:SI 0 "register_operand" "=a,&a") -+ (bswap:SI (match_operand:SI 1 "register_operand" "0,r"))) -+ (clobber (match_scratch:SI 2 "=&a,X"))] -+ "!optimize_debug && optimize > 1 && !optimize_size" -+{ -+ rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn); -+ const char *init = "ssai\t8\;"; -+ static char result[64]; -+ if (prev_insn && NONJUMP_INSN_P (prev_insn)) -+ { -+ rtx x = PATTERN (prev_insn); -+ if (GET_CODE (x) == PARALLEL && XVECLEN (x, 0) == 2 -+ && GET_CODE (XVECEXP (x, 0, 0)) == SET -+ && GET_CODE (XVECEXP (x, 0, 1)) == CLOBBER) -+ { -+ x = XEXP (XVECEXP (x, 0, 0), 1); -+ if (GET_CODE (x) == BSWAP && GET_MODE (x) == SImode) -+ init = ""; -+ } -+ } -+ sprintf (result, -+ (which_alternative == 0) -+ ? "%s" "srli\t%%2, %%1, 16\;src\t%%2, %%2, %%1\;src\t%%2, %%2, %%2\;src\t%%0, %%1, %%2" -+ : "%s" "srli\t%%0, %%1, 16\;src\t%%0, %%0, %%1\;src\t%%0, %%0, %%0\;src\t%%0, %%1, %%0", -+ init); -+ return result; -+} -+ [(set_attr "type" "arith,arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "15,15")]) -+ -+(define_expand "bswapdi2" -+ [(set (match_operand:DI 0 "register_operand" "") -+ (bswap:DI (match_operand:DI 1 "register_operand" "")))] -+ "!optimize_debug && optimize > 1 && optimize_size" -+{ -+ /* Replace with a single DImode library call. -+ Without this, two SImode library calls are emitted. */ -+ emit_library_call_value (optab_libfunc (bswap_optab, DImode), -+ operands[0], LCT_NORMAL, DImode, -+ operands[1], DImode); -+ DONE; -+}) - - - ;; Negation and one's complement. -@@ -501,16 +585,26 @@ - (set_attr "mode" "SI") - (set_attr "length" "3")]) - --(define_expand "one_cmplsi2" -- [(set (match_operand:SI 0 "register_operand" "") -- (not:SI (match_operand:SI 1 "register_operand" "")))] -+(define_insn_and_split "one_cmplsi2" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (not:SI (match_operand:SI 1 "register_operand" "r")))] - "" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 2) -+ (const_int -1)) -+ (set (match_dup 0) -+ (xor:SI (match_dup 1) -+ (match_dup 2)))] - { -- rtx temp = gen_reg_rtx (SImode); -- emit_insn (gen_movsi (temp, constm1_rtx)); -- emit_insn (gen_xorsi3 (operands[0], temp, operands[1])); -- DONE; --}) -+ operands[2] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 5) -+ (const_int 6)))]) - - (define_insn "negsf2" - [(set (match_operand:SF 0 "register_operand" "=f") -@@ -536,6 +630,103 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,3")]) - -+(define_insn_and_split "*andsi3_bitcmpl" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (not:SI (match_operand:SI 1 "register_operand" "r")) -+ (match_operand:SI 2 "register_operand" "r")))] -+ "" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 3) -+ (and:SI (match_dup 1) -+ (match_dup 2))) -+ (set (match_dup 0) -+ (xor:SI (match_dup 3) -+ (match_dup 2)))] -+{ -+ operands[3] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*andsi3_const_pow2_minus_one" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "const_int_operand" "i")))] -+ "IN_RANGE (exact_log2 (INTVAL (operands[2]) + 1), 17, 31)" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (ashift:SI (match_dup 1) -+ (match_dup 2))) -+ (set (match_dup 0) -+ (lshiftrt:SI (match_dup 0) -+ (match_dup 2)))] -+{ -+ operands[2] = GEN_INT (32 - floor_log2 (INTVAL (operands[2]) + 1)); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && INTVAL (operands[2]) == 0x7FFFFFFF") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_insn_and_split "*andsi3_const_negative_pow2" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "const_int_operand" "i")))] -+ "IN_RANGE (exact_log2 (-INTVAL (operands[2])), 12, 31)" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (lshiftrt:SI (match_dup 1) -+ (match_dup 2))) -+ (set (match_dup 0) -+ (ashift:SI (match_dup 0) -+ (match_dup 2)))] -+{ -+ operands[2] = GEN_INT (floor_log2 (-INTVAL (operands[2]))); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*andsi3_const_shifted_mask" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "shifted_mask_operand" "i")))] -+ "! xtensa_simm12b (INTVAL (operands[2]))" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (zero_extract:SI (match_dup 1) -+ (match_dup 3) -+ (match_dup 4))) -+ (set (match_dup 0) -+ (ashift:SI (match_dup 0) -+ (match_dup 2)))] -+{ -+ HOST_WIDE_INT mask = INTVAL (operands[2]); -+ int shift = ctz_hwi (mask); -+ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); -+ int mask_pos = shift; -+ if (BITS_BIG_ENDIAN) -+ mask_pos = (32 - (mask_size + shift)) & 0x1f; -+ operands[2] = GEN_INT (shift); -+ operands[3] = GEN_INT (mask_size); -+ operands[4] = GEN_INT (mask_pos); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && ctz_hwi (INTVAL (operands[2])) == 1") -+ (const_int 5) -+ (const_int 6)))]) -+ - (define_insn "iorsi3" - [(set (match_operand:SI 0 "register_operand" "=a") - (ior:SI (match_operand:SI 1 "register_operand" "%r") -@@ -634,7 +825,7 @@ - - ;; Field extract instructions. - --(define_expand "extv" -+(define_expand "extvsi" - [(set (match_operand:SI 0 "register_operand" "") - (sign_extract:SI (match_operand:SI 1 "register_operand" "") - (match_operand:SI 2 "const_int_operand" "") -@@ -649,12 +840,12 @@ - if (!lsbitnum_operand (operands[3], SImode)) - FAIL; - -- emit_insn (gen_extv_internal (operands[0], operands[1], -- operands[2], operands[3])); -+ emit_insn (gen_extvsi_internal (operands[0], operands[1], -+ operands[2], operands[3])); - DONE; - }) - --(define_insn "extv_internal" -+(define_insn "extvsi_internal" - [(set (match_operand:SI 0 "register_operand" "=a") - (sign_extract:SI (match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "sext_fldsz_operand" "i") -@@ -669,7 +860,7 @@ - (set_attr "mode" "SI") - (set_attr "length" "3")]) - --(define_expand "extzv" -+(define_expand "extzvsi" - [(set (match_operand:SI 0 "register_operand" "") - (zero_extract:SI (match_operand:SI 1 "register_operand" "") - (match_operand:SI 2 "const_int_operand" "") -@@ -678,12 +869,12 @@ - { - if (!extui_fldsz_operand (operands[2], SImode)) - FAIL; -- emit_insn (gen_extzv_internal (operands[0], operands[1], -- operands[2], operands[3])); -+ emit_insn (gen_extzvsi_internal (operands[0], operands[1], -+ operands[2], operands[3])); - DONE; - }) - --(define_insn "extzv_internal" -+(define_insn "extzvsi_internal" - [(set (match_operand:SI 0 "register_operand" "=a") - (zero_extract:SI (match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "extui_fldsz_operand" "i") -@@ -757,11 +948,14 @@ - because of offering further optimization opportunities. */ - if (register_operand (operands[0], DImode)) - { -- rtx first, second; -- -- split_double (operands[1], &first, &second); -- emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), first)); -- emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), second)); -+ rtx lowpart, highpart; -+ -+ if (TARGET_BIG_ENDIAN) -+ split_double (operands[1], &highpart, &lowpart); -+ else -+ split_double (operands[1], &lowpart, &highpart); -+ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), lowpart)); -+ emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), highpart)); - DONE; - } - -@@ -782,7 +976,7 @@ - "register_operand (operands[0], DImode) - || register_operand (operands[1], DImode)" - "#" -- "reload_completed" -+ "&& reload_completed" - [(set (match_dup 0) (match_dup 2)) - (set (match_dup 1) (match_dup 3))] - { -@@ -831,6 +1025,19 @@ - (set_attr "mode" "SI") - (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")]) - -+(define_split -+ [(set (match_operand:SI 0 "register_operand") -+ (match_operand:SI 1 "constantpool_operand"))] -+ "! optimize_debug && reload_completed" -+ [(const_int 0)] -+{ -+ rtx x = avoid_constant_pool_reference (operands[1]); -+ if (! CONST_INT_P (x)) -+ FAIL; -+ if (! xtensa_constantsynth (operands[0], INTVAL (x))) -+ emit_move_insn (operands[0], x); -+}) -+ - ;; 16-bit Integer moves - - (define_expand "movhi" -@@ -1035,6 +1242,43 @@ - (set_attr "mode" "SF") - (set_attr "length" "3")]) - -+(define_split -+ [(set (match_operand:SF 0 "register_operand") -+ (match_operand:SF 1 "constantpool_operand"))] -+ "! optimize_debug && reload_completed" -+ [(const_int 0)] -+{ -+ int i = 0; -+ rtx x = XEXP (operands[1], 0); -+ long l[2]; -+ if (GET_CODE (x) == SYMBOL_REF -+ && CONSTANT_POOL_ADDRESS_P (x)) -+ x = get_pool_constant (x); -+ else if (GET_CODE (x) == CONST) -+ { -+ x = XEXP (x, 0); -+ gcc_assert (GET_CODE (x) == PLUS -+ && GET_CODE (XEXP (x, 0)) == SYMBOL_REF -+ && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) -+ && CONST_INT_P (XEXP (x, 1))); -+ i = INTVAL (XEXP (x, 1)); -+ gcc_assert (i == 0 || i == 4); -+ i /= 4; -+ x = get_pool_constant (XEXP (x, 0)); -+ } -+ else -+ gcc_unreachable (); -+ if (GET_MODE (x) == SFmode) -+ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l[0]); -+ else if (GET_MODE (x) == DFmode) -+ REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); -+ else -+ FAIL; -+ x = gen_rtx_REG (SImode, REGNO (operands[0])); -+ if (! xtensa_constantsynth (x, l[i])) -+ emit_move_insn (x, GEN_INT (l[i])); -+}) -+ - ;; 64-bit floating point moves - - (define_expand "movdf" -@@ -1058,7 +1302,7 @@ - "register_operand (operands[0], DFmode) - || register_operand (operands[1], DFmode)" - "#" -- "reload_completed" -+ "&& reload_completed" - [(set (match_dup 0) (match_dup 2)) - (set (match_dup 1) (match_dup 3))] - { -@@ -1085,6 +1329,22 @@ - DONE; - }) - -+;; Block sets -+ -+(define_expand "setmemsi" -+ [(match_operand:BLK 0 "memory_operand") -+ (match_operand:SI 1 "") -+ (match_operand:SI 2 "") -+ (match_operand:SI 3 "const_int_operand")] -+ "!optimize_debug && optimize" -+{ -+ if (xtensa_expand_block_set_unrolled_loop (operands)) -+ DONE; -+ if (xtensa_expand_block_set_small_loop (operands)) -+ DONE; -+ FAIL; -+}) -+ - - ;; Shift instructions. - -@@ -1097,16 +1357,6 @@ - operands[1] = xtensa_copy_incoming_a7 (operands[1]); - }) - --(define_insn "*ashlsi3_1" -- [(set (match_operand:SI 0 "register_operand" "=a") -- (ashift:SI (match_operand:SI 1 "register_operand" "r") -- (const_int 1)))] -- "TARGET_DENSITY" -- "add.n\t%0, %1, %1" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "2")]) -- - (define_insn "ashlsi3_internal" - [(set (match_operand:SI 0 "register_operand" "=a,a") - (ashift:SI (match_operand:SI 1 "register_operand" "r,r") -@@ -1119,16 +1369,14 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,6")]) - --(define_insn "*ashlsi3_3x" -- [(set (match_operand:SI 0 "register_operand" "=a") -- (ashift:SI (match_operand:SI 1 "register_operand" "r") -- (ashift:SI (match_operand:SI 2 "register_operand" "r") -- (const_int 3))))] -- "" -- "ssa8b\t%2\;sll\t%0, %1" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "6")]) -+(define_split -+ [(set (match_operand:SI 0 "register_operand") -+ (ashift:SI (match_operand:SI 1 "register_operand") -+ (const_int 1)))] -+ "TARGET_DENSITY" -+ [(set (match_dup 0) -+ (plus:SI (match_dup 1) -+ (match_dup 1)))]) - - (define_insn "ashrsi3" - [(set (match_operand:SI 0 "register_operand" "=a,a") -@@ -1142,17 +1390,6 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,6")]) - --(define_insn "*ashrsi3_3x" -- [(set (match_operand:SI 0 "register_operand" "=a") -- (ashiftrt:SI (match_operand:SI 1 "register_operand" "r") -- (ashift:SI (match_operand:SI 2 "register_operand" "r") -- (const_int 3))))] -- "" -- "ssa8l\t%2\;sra\t%0, %1" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "6")]) -- - (define_insn "lshrsi3" - [(set (match_operand:SI 0 "register_operand" "=a,a") - (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r") -@@ -1162,9 +1399,9 @@ - if (which_alternative == 0) - { - if ((INTVAL (operands[2]) & 0x1f) < 16) -- return "srli\t%0, %1, %R2"; -+ return "srli\t%0, %1, %R2"; - else -- return "extui\t%0, %1, %R2, %L2"; -+ return "extui\t%0, %1, %R2, %L2"; - } - return "ssr\t%2\;srl\t%0, %1"; - } -@@ -1172,13 +1409,170 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,6")]) - --(define_insn "*lshrsi3_3x" -+(define_insn "*shift_per_byte" - [(set (match_operand:SI 0 "register_operand" "=a") -- (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") -- (ashift:SI (match_operand:SI 2 "register_operand" "r") -- (const_int 3))))] -+ (match_operator:SI 3 "xtensa_shift_per_byte_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3))]))] -+ "!optimize_debug && optimize" -+{ -+ switch (GET_CODE (operands[3])) -+ { -+ case ASHIFT: return "ssa8b\t%2\;sll\t%0, %1"; -+ case ASHIFTRT: return "ssa8l\t%2\;sra\t%0, %1"; -+ case LSHIFTRT: return "ssa8l\t%2\;srl\t%0, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*shift_per_byte_omit_AND_0" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (match_operator:SI 4 "xtensa_shift_per_byte_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3)) -+ (match_operand:SI 3 "const_int_operand" "i"))]))] -+ "!optimize_debug && optimize -+ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (match_op_dup 4 -+ [(match_dup 1) -+ (ashift:SI (match_dup 2) -+ (const_int 3))]))] -+ "" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*shift_per_byte_omit_AND_1" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (match_operator:SI 4 "xtensa_shift_per_byte_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (neg:SI (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3)) -+ (match_operand:SI 3 "const_int_operand" "i")))]))] -+ "!optimize_debug && optimize -+ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 5) -+ (neg:SI (match_dup 2))) -+ (set (match_dup 0) -+ (match_op_dup 4 -+ [(match_dup 1) -+ (ashift:SI (match_dup 5) -+ (const_int 3))]))] -+{ -+ operands[5] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "9")]) -+ -+(define_insn "*shlrd_reg_" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "register_operand" "r")]) -+ (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 3 "register_operand" "r") -+ (neg:SI (match_dup 2))])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" -+{ -+ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) -+ { -+ case ASHIFT: return "ssl\t%2\;src\t%0, %1, %3"; -+ case LSHIFTRT: return "ssr\t%2\;src\t%0, %3, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn "*shlrd_const_" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 3 "const_int_operand" "i")]) -+ (match_operator:SI 6 "logical_shift_operator" -+ [(match_operand:SI 2 "register_operand" "r") -+ (match_operand:SI 4 "const_int_operand" "i")])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN -+ && IN_RANGE (INTVAL (operands[3]), 1, 31) -+ && IN_RANGE (INTVAL (operands[4]), 1, 31) -+ && INTVAL (operands[3]) + INTVAL (operands[4]) == 32" -+{ -+ switch (xtensa_shlrd_which_direction (operands[5], operands[6])) -+ { -+ case ASHIFT: return "ssai\t%L3\;src\t%0, %1, %2"; -+ case LSHIFTRT: return "ssai\t%R3\;src\t%0, %2, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn "*shlrd_per_byte_" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3))]) -+ (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 3 "register_operand" "r") -+ (neg:SI (ashift:SI (match_dup 2) -+ (const_int 3)))])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" -+{ -+ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) -+ { -+ case ASHIFT: return "ssa8b\t%2\;src\t%0, %1, %3"; -+ case LSHIFTRT: return "ssa8l\t%2\;src\t%0, %3, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*shlrd_per_byte__omit_AND" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3)) -+ (match_operand:SI 4 "const_int_operand" "i"))]) -+ (match_operator:SI 6 "logical_shift_operator" -+ [(match_operand:SI 3 "register_operand" "r") -+ (neg:SI (and:SI (ashift:SI (match_dup 2) -+ (const_int 3)) -+ (match_dup 4)))])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN -+ && (INTVAL (operands[4]) & 0x1f) == 3 << 3" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (ior_op:SI (match_op_dup 5 -+ [(match_dup 1) -+ (ashift:SI (match_dup 2) -+ (const_int 3))]) -+ (match_op_dup 6 -+ [(match_dup 3) -+ (neg:SI (ashift:SI (match_dup 2) -+ (const_int 3)))])))] - "" -- "ssa8l\t%2\;srl\t%0, %1" - [(set_attr "type" "arith") - (set_attr "mode" "SI") - (set_attr "length" "6")]) -@@ -1239,28 +1633,13 @@ - (define_insn "*btrue" - [(set (pc) - (if_then_else (match_operator 3 "branch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "branch_operand" "K,r")]) -+ [(match_operand:SI 0 "register_operand" "r,r") -+ (match_operand:SI 1 "branch_operand" "K,r")]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { -- return xtensa_emit_branch (false, which_alternative == 0, operands); --} -- [(set_attr "type" "jump,jump") -- (set_attr "mode" "none") -- (set_attr "length" "3,3")]) -- --(define_insn "*bfalse" -- [(set (pc) -- (if_then_else (match_operator 3 "branch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "branch_operand" "K,r")]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -- "" --{ -- return xtensa_emit_branch (true, which_alternative == 0, operands); -+ return xtensa_emit_branch (which_alternative == 0, operands); - } - [(set_attr "type" "jump,jump") - (set_attr "mode" "none") -@@ -1269,28 +1648,13 @@ - (define_insn "*ubtrue" - [(set (pc) - (if_then_else (match_operator 3 "ubranch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "ubranch_operand" "L,r")]) -+ [(match_operand:SI 0 "register_operand" "r,r") -+ (match_operand:SI 1 "ubranch_operand" "L,r")]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { -- return xtensa_emit_branch (false, which_alternative == 0, operands); --} -- [(set_attr "type" "jump,jump") -- (set_attr "mode" "none") -- (set_attr "length" "3,3")]) -- --(define_insn "*ubfalse" -- [(set (pc) -- (if_then_else (match_operator 3 "ubranch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "ubranch_operand" "L,r")]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -- "" --{ -- return xtensa_emit_branch (true, which_alternative == 0, operands); -+ return xtensa_emit_branch (which_alternative == 0, operands); - } - [(set_attr "type" "jump,jump") - (set_attr "mode" "none") -@@ -1301,80 +1665,178 @@ - (define_insn "*bittrue" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(zero_extract:SI -- (match_operand:SI 0 "register_operand" "r,r") -- (const_int 1) -- (match_operand:SI 1 "arith_operand" "J,r")) -+ [(zero_extract:SI (match_operand:SI 0 "register_operand" "r,r") -+ (const_int 1) -+ (match_operand:SI 1 "arith_operand" "J,r")) - (const_int 0)]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { -- return xtensa_emit_bit_branch (false, which_alternative == 0, operands); -+ static char result[64]; -+ char op; -+ switch (GET_CODE (operands[3])) -+ { -+ case EQ: op = 'c'; break; -+ case NE: op = 's'; break; -+ default: gcc_unreachable (); -+ } -+ if (which_alternative == 0) -+ { -+ operands[1] = GEN_INT (INTVAL (operands[1]) & 0x1f); -+ sprintf (result, "bb%ci\t%%0, %%d1, %%2", op); -+ } -+ else -+ sprintf (result, "bb%c\t%%0, %%1, %%2", op); -+ return result; - } - [(set_attr "type" "jump") - (set_attr "mode" "none") - (set_attr "length" "3")]) - --(define_insn "*bitfalse" -+(define_insn "*masktrue" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(zero_extract:SI -- (match_operand:SI 0 "register_operand" "r,r") -- (const_int 1) -- (match_operand:SI 1 "arith_operand" "J,r")) -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "register_operand" "r")) - (const_int 0)]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -+ (label_ref (match_operand 2 "" "")) -+ (pc)))] - "" - { -- return xtensa_emit_bit_branch (true, which_alternative == 0, operands); -+ switch (GET_CODE (operands[3])) -+ { -+ case EQ: return "bnone\t%0, %1, %2"; -+ case NE: return "bany\t%0, %1, %2"; -+ default: gcc_unreachable (); -+ } - } - [(set_attr "type" "jump") - (set_attr "mode" "none") - (set_attr "length" "3")]) - --(define_insn "*masktrue" -+(define_insn "*masktrue_bitcmpl" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(and:SI (match_operand:SI 0 "register_operand" "r") -- (match_operand:SI 1 "register_operand" "r")) -- (const_int 0)]) -+ [(and:SI (not:SI (match_operand:SI 0 "register_operand" "r")) -+ (match_operand:SI 1 "register_operand" "r")) -+ (const_int 0)]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { - switch (GET_CODE (operands[3])) - { -- case EQ: return "bnone\t%0, %1, %2"; -- case NE: return "bany\t%0, %1, %2"; -- default: gcc_unreachable (); -+ case EQ: return "ball\t%0, %1, %2"; -+ case NE: return "bnall\t%0, %1, %2"; -+ default: gcc_unreachable (); - } - } - [(set_attr "type" "jump") - (set_attr "mode" "none") - (set_attr "length" "3")]) - --(define_insn "*maskfalse" -+(define_insn_and_split "*masktrue_const_pow2_minus_one" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(and:SI (match_operand:SI 0 "register_operand" "r") -- (match_operand:SI 1 "register_operand" "r")) -- (const_int 0)]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -- "" -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "const_int_operand" "i")) -+ (const_int 0)]) -+ (label_ref (match_operand 2 "" "")) -+ (pc)))] -+ "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31)" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 4) -+ (ashift:SI (match_dup 0) -+ (match_dup 1))) -+ (set (pc) -+ (if_then_else (match_op_dup 3 -+ [(match_dup 4) -+ (const_int 0)]) -+ (label_ref (match_dup 2)) -+ (pc)))] - { -- switch (GET_CODE (operands[3])) -- { -- case EQ: return "bany\t%0, %1, %2"; -- case NE: return "bnone\t%0, %1, %2"; -- default: gcc_unreachable (); -- } -+ operands[1] = GEN_INT (32 - floor_log2 (INTVAL (operands[1]) + 1)); -+ operands[4] = gen_reg_rtx (SImode); - } - [(set_attr "type" "jump") - (set_attr "mode" "none") -- (set_attr "length" "3")]) -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && INTVAL (operands[1]) == 0x7FFFFFFF") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_insn_and_split "*masktrue_const_negative_pow2" -+ [(set (pc) -+ (if_then_else (match_operator 3 "boolean_operator" -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "const_int_operand" "i")) -+ (const_int 0)]) -+ (label_ref (match_operand 2 "" "")) -+ (pc)))] -+ "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 12, 30)" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 4) -+ (lshiftrt:SI (match_dup 0) -+ (match_dup 1))) -+ (set (pc) -+ (if_then_else (match_op_dup 3 -+ [(match_dup 4) -+ (const_int 0)]) -+ (label_ref (match_dup 2)) -+ (pc)))] -+{ -+ operands[1] = GEN_INT (floor_log2 (-INTVAL (operands[1]))); -+ operands[4] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "jump") -+ (set_attr "mode" "none") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*masktrue_const_shifted_mask" -+ [(set (pc) -+ (if_then_else (match_operator 4 "boolean_operator" -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "shifted_mask_operand" "i")) -+ (match_operand:SI 2 "const_int_operand" "i")]) -+ (label_ref (match_operand 3 "" "")) -+ (pc)))] -+ "(INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 -+ && xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 6) -+ (zero_extract:SI (match_dup 0) -+ (match_dup 5) -+ (match_dup 1))) -+ (set (pc) -+ (if_then_else (match_op_dup 4 -+ [(match_dup 6) -+ (match_dup 2)]) -+ (label_ref (match_dup 3)) -+ (pc)))] -+{ -+ HOST_WIDE_INT mask = INTVAL (operands[1]); -+ int shift = ctz_hwi (mask); -+ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); -+ int mask_pos = shift; -+ if (BITS_BIG_ENDIAN) -+ mask_pos = (32 - (mask_size + shift)) & 0x1f; -+ operands[1] = GEN_INT (mask_pos); -+ operands[2] = GEN_INT ((uint32_t)INTVAL (operands[2]) >> shift); -+ operands[5] = GEN_INT (mask_size); -+ operands[6] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "jump") -+ (set_attr "mode" "none") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && (uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])) == 0") -+ (const_int 5) -+ (const_int 6)))]) - - - ;; Zero-overhead looping support. -@@ -1696,18 +2158,13 @@ - (match_operand 1 "" ""))] - "" - { -- rtx addr = XEXP (operands[0], 0); -- if (flag_pic && GET_CODE (addr) == SYMBOL_REF -- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) -- addr = gen_sym_PLT (addr); -- if (!call_insn_operand (addr, VOIDmode)) -- XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr); -+ xtensa_prepare_expand_call (0, operands); - }) - - (define_insn "call_internal" - [(call (mem (match_operand:SI 0 "call_insn_operand" "nir")) - (match_operand 1 "" "i"))] -- "" -+ "!SIBLING_CALL_P (insn)" - { - return xtensa_emit_call (0, operands); - } -@@ -1721,19 +2178,14 @@ - (match_operand 2 "" "")))] - "" - { -- rtx addr = XEXP (operands[1], 0); -- if (flag_pic && GET_CODE (addr) == SYMBOL_REF -- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) -- addr = gen_sym_PLT (addr); -- if (!call_insn_operand (addr, VOIDmode)) -- XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr); -+ xtensa_prepare_expand_call (1, operands); - }) - - (define_insn "call_value_internal" - [(set (match_operand 0 "register_operand" "=a") - (call (mem (match_operand:SI 1 "call_insn_operand" "nir")) - (match_operand 2 "" "i")))] -- "" -+ "!SIBLING_CALL_P (insn)" - { - return xtensa_emit_call (1, operands); - } -@@ -1741,6 +2193,70 @@ - (set_attr "mode" "none") - (set_attr "length" "3")]) - -+(define_expand "sibcall" -+ [(call (match_operand 0 "memory_operand" "") -+ (match_operand 1 "" ""))] -+ "!TARGET_WINDOWED_ABI" -+{ -+ xtensa_prepare_expand_call (0, operands); -+}) -+ -+(define_insn "sibcall_internal" -+ [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nir")) -+ (match_operand 1 "" "i"))] -+ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" -+{ -+ return xtensa_emit_sibcall (0, operands); -+} -+ [(set_attr "type" "call") -+ (set_attr "mode" "none") -+ (set_attr "length" "3")]) -+ -+(define_split -+ [(call (mem:SI (match_operand:SI 0 "register_operand")) -+ (match_operand 1 ""))] -+ "reload_completed -+ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) -+ && IN_RANGE (REGNO (operands[0]), 12, 15)" -+ [(set (reg:SI A10_REG) -+ (match_dup 0)) -+ (call (mem:SI (reg:SI A10_REG)) -+ (match_dup 1))]) -+ -+(define_expand "sibcall_value" -+ [(set (match_operand 0 "register_operand" "") -+ (call (match_operand 1 "memory_operand" "") -+ (match_operand 2 "" "")))] -+ "!TARGET_WINDOWED_ABI" -+{ -+ xtensa_prepare_expand_call (1, operands); -+}) -+ -+(define_insn "sibcall_value_internal" -+ [(set (match_operand 0 "register_operand" "=a") -+ (call (mem:SI (match_operand:SI 1 "call_insn_operand" "nir")) -+ (match_operand 2 "" "i")))] -+ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" -+{ -+ return xtensa_emit_sibcall (1, operands); -+} -+ [(set_attr "type" "call") -+ (set_attr "mode" "none") -+ (set_attr "length" "3")]) -+ -+(define_split -+ [(set (match_operand 0 "register_operand") -+ (call (mem:SI (match_operand:SI 1 "register_operand")) -+ (match_operand 2 "")))] -+ "reload_completed -+ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) -+ && IN_RANGE (REGNO (operands[1]), 12, 15)" -+ [(set (reg:SI A10_REG) -+ (match_dup 1)) -+ (set (match_dup 0) -+ (call (mem:SI (reg:SI A10_REG)) -+ (match_dup 2)))]) -+ - (define_insn "entry" - [(set (reg:SI A1_REG) - (unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")] -@@ -1762,7 +2278,10 @@ - } - [(set_attr "type" "jump") - (set_attr "mode" "none") -- (set_attr "length" "2")]) -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 2) -+ (const_int 3)))]) - - - ;; Miscellaneous instructions. -@@ -1805,7 +2324,15 @@ - [(return)] - "" - { -- xtensa_expand_epilogue (); -+ xtensa_expand_epilogue (false); -+ DONE; -+}) -+ -+(define_expand "sibcall_epilogue" -+ [(return)] -+ "!TARGET_WINDOWED_ABI" -+{ -+ xtensa_expand_epilogue (true); - DONE; - }) - -@@ -1817,7 +2344,10 @@ - } - [(set_attr "type" "nop") - (set_attr "mode" "none") -- (set_attr "length" "3")]) -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 2) -+ (const_int 3)))]) - - (define_expand "nonlocal_goto" - [(match_operand:SI 0 "general_operand" "") -@@ -1881,8 +2411,9 @@ - [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] - "" - "" -- [(set_attr "length" "0") -- (set_attr "type" "nop")]) -+ [(set_attr "type" "nop") -+ (set_attr "mode" "none") -+ (set_attr "length" "0")]) - - ;; Do not schedule instructions accessing memory before this point. - -@@ -1901,7 +2432,9 @@ - (unspec:BLK [(match_operand:SI 1 "" "")] UNSPEC_FRAME_BLOCKAGE))] - "" - "" -- [(set_attr "length" "0")]) -+ [(set_attr "type" "nop") -+ (set_attr "mode" "none") -+ (set_attr "length" "0")]) - - (define_insn "trap" - [(trap_if (const_int 1) (const_int 0))] -@@ -1914,7 +2447,10 @@ - } - [(set_attr "type" "trap") - (set_attr "mode" "none") -- (set_attr "length" "3")]) -+ (set (attr "length") -+ (if_then_else (match_test "!TARGET_DEBUG && TARGET_DENSITY") -+ (const_int 2) -+ (const_int 3)))]) - - ;; Setting up a frame pointer is tricky for Xtensa because GCC doesn't - ;; know if a frame pointer is required until the reload pass, and -@@ -2177,3 +2713,103 @@ - xtensa_expand_atomic (, operands[0], operands[1], operands[2], true); - DONE; - }) -+ -+(define_insn_and_split "*round_up_to_even" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r") -+ (const_int 1)) -+ (const_int -2)))] -+ "" -+ "#" -+ "can_create_pseudo_p ()" -+ [(set (match_dup 2) -+ (and:SI (match_dup 1) -+ (const_int 1))) -+ (set (match_dup 0) -+ (plus:SI (match_dup 2) -+ (match_dup 1)))] -+{ -+ operands[2] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_insn_and_split "*signed_ge_zero" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ge:SI (match_operand:SI 1 "register_operand" "r") -+ (const_int 0)))] -+ "" -+ "#" -+ "" -+ [(set (match_dup 0) -+ (ashiftrt:SI (match_dup 1) -+ (const_int 31))) -+ (set (match_dup 0) -+ (plus:SI (match_dup 0) -+ (const_int 1)))] -+ "" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_peephole2 -+ [(set (match_operand:SI 0 "register_operand") -+ (match_operand:SI 6 "reload_operand")) -+ (set (match_operand:SI 1 "register_operand") -+ (match_operand:SI 7 "reload_operand")) -+ (set (match_operand:SF 2 "register_operand") -+ (match_operand:SF 4 "register_operand")) -+ (set (match_operand:SF 3 "register_operand") -+ (match_operand:SF 5 "register_operand"))] -+ "REGNO (operands[0]) == REGNO (operands[4]) -+ && REGNO (operands[1]) == REGNO (operands[5]) -+ && peep2_reg_dead_p (4, operands[0]) -+ && peep2_reg_dead_p (4, operands[1])" -+ [(set (match_dup 2) -+ (match_dup 6)) -+ (set (match_dup 3) -+ (match_dup 7))] -+{ -+ uint32_t check = 0; -+ int i; -+ for (i = 0; i <= 3; ++i) -+ { -+ uint32_t mask = (uint32_t)1 << REGNO (operands[i]); -+ if (check & mask) -+ FAIL; -+ check |= mask; -+ } -+ operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0)); -+ operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0)); -+}) -+ -+(define_split -+ [(clobber (match_operand:DSC 0 "register_operand"))] -+ "GP_REG_P (REGNO (operands[0]))" -+ [(const_int 0)] -+{ -+ unsigned int regno = REGNO (operands[0]); -+ machine_mode inner_mode = GET_MODE_INNER (mode); -+ rtx_insn *insn; -+ rtx x; -+ if (! ((insn = next_nonnote_nondebug_insn (curr_insn)) -+ && NONJUMP_INSN_P (insn) -+ && GET_CODE (x = PATTERN (insn)) == SET -+ && REG_P (x = XEXP (x, 0)) -+ && GET_MODE (x) == inner_mode -+ && REGNO (x) == regno -+ && (insn = next_nonnote_nondebug_insn (insn)) -+ && NONJUMP_INSN_P (insn) -+ && GET_CODE (x = PATTERN (insn)) == SET -+ && REG_P (x = XEXP (x, 0)) -+ && GET_MODE (x) == inner_mode -+ && REGNO (x) == regno + REG_NREGS (operands[0]) / 2)) -+ FAIL; -+}) -diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt -index aef67970b..97aa44f92 100644 ---- a/gcc/config/xtensa/xtensa.opt -+++ b/gcc/config/xtensa/xtensa.opt -@@ -27,9 +27,13 @@ Target Report Mask(FORCE_NO_PIC) - Disable position-independent code (PIC) for use in OS kernel code. - - mlongcalls --Target -+Target Mask(LONGCALLS) - Use indirect CALLXn instructions for large programs. - -+mextra-l32r-costs= -+Target RejectNegative Joined UInteger Var(xtensa_extra_l32r_costs) Init(0) -+Set extra memory access cost for L32R instruction, in clock-cycle units. -+ - mtarget-align - Target - Automatically align branch targets to reduce branch penalties. -diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi -index eabeec944..c35f51afb 100644 ---- a/gcc/doc/invoke.texi -+++ b/gcc/doc/invoke.texi -@@ -1385,7 +1385,8 @@ See RS/6000 and PowerPC Options. - -mtext-section-literals -mno-text-section-literals @gol - -mauto-litpools -mno-auto-litpools @gol - -mtarget-align -mno-target-align @gol ---mlongcalls -mno-longcalls} -+-mlongcalls -mno-longcalls @gol -+-mextra-l32r-costs=@var{cycles}} - - @emph{zSeries Options} - See S/390 and zSeries Options. -@@ -30519,6 +30520,14 @@ assembly code generated by GCC still shows direct call - instructions---look at the disassembled object code to see the actual - instructions. Note that the assembler uses an indirect call for - every cross-file call, not just those that really are out of range. -+ -+@item -mextra-l32r-costs=@var{n} -+@opindex mextra-l32r-costs -+Specify an extra cost of instruction RAM/ROM access for @code{L32R} -+instructions, in clock cycles. This affects, when optimizing for speed, -+whether loading a constant from literal pool using @code{L32R} or -+synthesizing the constant from a small one with a couple of arithmetic -+instructions. The default value is 0. - @end table - - @node zSeries Options -diff --git a/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c -new file mode 100644 -index 000000000..ba61c6f37 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O" } */ -+ -+extern void foo(void); -+ -+void BNONE_test(int a, int b) -+{ -+ if (a & b) -+ foo(); -+} -+ -+void BANY_test(int a, int b) -+{ -+ if (!(a & b)) -+ foo(); -+} -+ -+void BALL_test(int a, int b) -+{ -+ if (~a & b) -+ foo(); -+} -+ -+void BNALL_test(int a, int b) -+{ -+ if (!(~a & b)) -+ foo(); -+} -+ -+/* { dg-final { scan-assembler-times "bnone" 1 } } */ -+/* { dg-final { scan-assembler-times "bany" 1 } } */ -+/* { dg-final { scan-assembler-times "ball" 1 } } */ -+/* { dg-final { scan-assembler-times "bnall" 1 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O1.c b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c -new file mode 100644 -index 000000000..a0c885baa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O1" } */ -+ -+unsigned int test_0(unsigned int a) -+{ -+ return (a & 0x000000FF) << 24 | -+ (a & 0x0000FF00) << 8 | -+ (a & 0x00FF0000) >> 8 | -+ (a & 0xFF000000) >> 24; -+} -+ -+unsigned int test_1(unsigned int a) -+{ -+ union -+ { -+ unsigned int i; -+ unsigned char a[4]; -+ } u, v; -+ u.i = a; -+ v.a[0] = u.a[3]; -+ v.a[1] = u.a[2]; -+ v.a[2] = u.a[1]; -+ v.a[3] = u.a[0]; -+ return v.i; -+} -+ -+unsigned int test_2(unsigned int a) -+{ -+ return __builtin_bswap32(a); -+} -+ -+unsigned long long test_3(unsigned long long a) -+{ -+ return __builtin_bswap64(a); -+} -+ -+/* { dg-final { scan-assembler-times "call" 2 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O2.c b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c -new file mode 100644 -index 000000000..4cf95b925 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+unsigned int test_0(unsigned int a) -+{ -+ return (a & 0x000000FF) << 24 | -+ (a & 0x0000FF00) << 8 | -+ (a & 0x00FF0000) >> 8 | -+ (a & 0xFF000000) >> 24; -+} -+ -+unsigned int test_1(unsigned int a) -+{ -+ union -+ { -+ unsigned int i; -+ unsigned char a[4]; -+ } u, v; -+ u.i = a; -+ v.a[0] = u.a[3]; -+ v.a[1] = u.a[2]; -+ v.a[2] = u.a[1]; -+ v.a[3] = u.a[0]; -+ return v.i; -+} -+ -+unsigned int test_2(unsigned int a) -+{ -+ return __builtin_bswap32(a); -+} -+ -+unsigned long long test_3(unsigned long long a) -+{ -+ return __builtin_bswap64(a); -+} -+ -+/* { dg-final { scan-assembler-times "ssai" 4 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-Os.c b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c -new file mode 100644 -index 000000000..1e010fd62 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Os" } */ -+ -+unsigned int test_0(unsigned int a) -+{ -+ return (a & 0x000000FF) << 24 | -+ (a & 0x0000FF00) << 8 | -+ (a & 0x00FF0000) >> 8 | -+ (a & 0xFF000000) >> 24; -+} -+ -+unsigned int test_1(unsigned int a) -+{ -+ union -+ { -+ unsigned int i; -+ unsigned char a[4]; -+ } u, v; -+ u.i = a; -+ v.a[0] = u.a[3]; -+ v.a[1] = u.a[2]; -+ v.a[2] = u.a[1]; -+ v.a[3] = u.a[0]; -+ return v.i; -+} -+ -+unsigned int test_2(unsigned int a) -+{ -+ return __builtin_bswap32(a); -+} -+ -+unsigned long long test_3(unsigned long long a) -+{ -+ return __builtin_bswap64(a); -+} -+ -+/* { dg-final { scan-assembler-times "call" 4 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c -new file mode 100644 -index 000000000..6a04aaeef ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O" } */ -+ -+int check_zero_byte(int v) -+{ -+ return (v - 0x01010101) & ~v & 0x80808080; -+} -+ -+/* { dg-final { scan-assembler-not "movi" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c -new file mode 100644 -index 000000000..ec2606ed1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c -@@ -0,0 +1,44 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Os } */ -+ -+int test_0(void) -+{ -+ return 4095; -+} -+ -+int test_1(void) -+{ -+ return 2147483647; -+} -+ -+int test_2(void) -+{ -+ return -34816; -+} -+ -+int test_3(void) -+{ -+ return -2049; -+} -+ -+int test_4(void) -+{ -+ return 2048; -+} -+ -+int test_5(void) -+{ -+ return 34559; -+} -+ -+int test_6(void) -+{ -+ return 43680; -+} -+ -+void test_7(int *p) -+{ -+ *p = -1432354816; -+} -+ -+/* { dg-final { scan-assembler-not "l32r" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c -new file mode 100644 -index 000000000..f3c4a1c7c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c -@@ -0,0 +1,24 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mextra-l32r-costs=3" } */ -+ -+int test_0(void) -+{ -+ return 134217216; -+} -+ -+int test_1(void) -+{ -+ return -27604992; -+} -+ -+int test_2(void) -+{ -+ return -162279; -+} -+ -+void test_3(int *p) -+{ -+ *p = 192437; -+} -+ -+/* { dg-final { scan-assembler-not "l32r" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c -new file mode 100644 -index 000000000..11e5d5242 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Os } */ -+ -+void test(unsigned int count, double array[]) -+{ -+ unsigned int i; -+ for (i = 0; i < count; ++i) -+ array[i] = 1.0; -+} -+ -+/* { dg-final { scan-assembler-not "l32r" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c -new file mode 100644 -index 000000000..c8f987ccd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+unsigned int test_0(const void *addr) -+{ -+ unsigned int n = (unsigned int)addr; -+ const unsigned int *a = (const unsigned int*)(n & ~3); -+ n = (n & 3) * 8; -+ return (a[0] >> n) | (a[1] << (32 - n)); -+} -+ -+unsigned int test_1(unsigned int a, unsigned int b) -+{ -+ return (a >> 16) + (b << 16); -+} -+ -+/* { dg-final { scan-assembler-times "src" 2 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c -new file mode 100644 -index 000000000..608f65fd7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O1" } */ -+ -+int one_cmpl_abs(int a) -+{ -+ return a < 0 ? ~a : a; -+} -+ -+/* { dg-final { scan-assembler-not "bgez" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/sibcalls.c b/gcc/testsuite/gcc.target/xtensa/sibcalls.c -new file mode 100644 -index 000000000..7a4018796 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/sibcalls.c -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -foptimize-sibling-calls" } */ -+ -+extern int foo(int); -+extern void bar(int); -+ -+int test_0(int a) { -+ return foo(a); -+} -+ -+void test_1(int a) { -+ bar(a); -+} -+ -+int test_2(int (*a)(void)) { -+ bar(0); -+ return a(); -+} -+ -+/* { dg-final { scan-assembler-not "ret" } } */ -diff --git a/libgcc/config/xtensa/lib1funcs.S b/libgcc/config/xtensa/lib1funcs.S -index b19deae14..ad9072c40 100644 ---- a/libgcc/config/xtensa/lib1funcs.S -+++ b/libgcc/config/xtensa/lib1funcs.S -@@ -456,6 +456,29 @@ __nsau_data: - #endif /* L_clz */ - - -+#ifdef L_clrsbsi2 -+ .align 4 -+ .global __clrsbsi2 -+ .type __clrsbsi2, @function -+__clrsbsi2: -+ leaf_entry sp, 16 -+#if XCHAL_HAVE_NSA -+ nsa a2, a2 -+#else -+ srai a3, a2, 31 -+ xor a3, a3, a2 -+ movi a2, 31 -+ beqz a3, .Lreturn -+ do_nsau a2, a3, a4, a5 -+ addi a2, a2, -1 -+.Lreturn: -+#endif -+ leaf_return -+ .size __clrsbsi2, . - __clrsbsi2 -+ -+#endif /* L_clrsbsi2 */ -+ -+ - #ifdef L_clzsi2 - .align 4 - .global __clzsi2 -diff --git a/libgcc/config/xtensa/t-xtensa b/libgcc/config/xtensa/t-xtensa -index 9836c96ae..084618b38 100644 ---- a/libgcc/config/xtensa/t-xtensa -+++ b/libgcc/config/xtensa/t-xtensa -@@ -1,6 +1,6 @@ - LIB1ASMSRC = xtensa/lib1funcs.S - LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \ -- _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \ -+ _umulsidi3 _clz _clrsbsi2 _clzsi2 _ctzsi2 _ffssi2 \ - _ashldi3 _ashrdi3 _lshrdi3 \ - _bswapsi2 _bswapdi2 \ - _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \ --- -2.20.1 - diff --git a/patches/gcc10.2/gcc-xtensa-0007-fix-Wformat-diag-warnings.patch b/patches/gcc10.2/gcc-xtensa-0007-fix-Wformat-diag-warnings.patch new file mode 100644 index 0000000..5aebddc --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0007-fix-Wformat-diag-warnings.patch @@ -0,0 +1,48 @@ +From 76ee6b24125c885150e5b493b26b594801998b74 Mon Sep 17 00:00:00 2001 +From: Martin Liska +Date: Tue, 18 Jan 2022 14:51:40 +0100 +Subject: [PATCH 02/31] xtensa: fix -Wformat-diag warnings. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (print_operand): Fix warnings. + (print_operand_address): Likewise. + (xtensa_multibss_section_type_flags): Likewise. +--- + gcc/config/xtensa/xtensa.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 37c6ac1fd..b1dbe8520 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -2379,7 +2379,7 @@ void + print_operand (FILE *file, rtx x, int letter) + { + if (!x) +- error ("PRINT_OPERAND null pointer"); ++ error ("% null pointer"); + + switch (letter) + { +@@ -2584,7 +2584,7 @@ void + print_operand_address (FILE *file, rtx addr) + { + if (!addr) +- error ("PRINT_OPERAND_ADDRESS, null pointer"); ++ error ("%, null pointer"); + + switch (GET_CODE (addr)) + { +@@ -3697,7 +3697,7 @@ xtensa_multibss_section_type_flags (tree decl, const char *name, int reloc) + flags |= SECTION_BSS; /* @nobits */ + else + warning (0, "only uninitialized variables can be placed in a " +- ".bss section"); ++ "%<.bss%> section"); + } + + return flags; +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0008-Rename-deprecated-extv-extzv-insn-patterns-to.patch b/patches/gcc10.2/gcc-xtensa-0008-Rename-deprecated-extv-extzv-insn-patterns-to.patch new file mode 100644 index 0000000..46260ef --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0008-Rename-deprecated-extv-extzv-insn-patterns-to.patch @@ -0,0 +1,74 @@ +From b5b9fd01c4db135893c44e82a9f33c2411e993d0 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 6 May 2022 19:34:06 +0900 +Subject: [PATCH 03/31] xtensa: Rename deprecated extv/extzv insn patterns to + extvsi/extzvsi + +These patterns were deprecated since GCC 4.8. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (extvsi, extvsi_internal, extzvsi, + extzvsi_internal): Rename from extv, extv_internal, extzv and + extzv_internal, respectively. +--- + gcc/config/xtensa/xtensa.md | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 123916957..251c313d5 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -634,7 +634,7 @@ + + ;; Field extract instructions. + +-(define_expand "extv" ++(define_expand "extvsi" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "") +@@ -649,12 +649,12 @@ + if (!lsbitnum_operand (operands[3], SImode)) + FAIL; + +- emit_insn (gen_extv_internal (operands[0], operands[1], +- operands[2], operands[3])); ++ emit_insn (gen_extvsi_internal (operands[0], operands[1], ++ operands[2], operands[3])); + DONE; + }) + +-(define_insn "extv_internal" ++(define_insn "extvsi_internal" + [(set (match_operand:SI 0 "register_operand" "=a") + (sign_extract:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "sext_fldsz_operand" "i") +@@ -669,7 +669,7 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +-(define_expand "extzv" ++(define_expand "extzvsi" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extract:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "") +@@ -678,12 +678,12 @@ + { + if (!extui_fldsz_operand (operands[2], SImode)) + FAIL; +- emit_insn (gen_extzv_internal (operands[0], operands[1], +- operands[2], operands[3])); ++ emit_insn (gen_extzvsi_internal (operands[0], operands[1], ++ operands[2], operands[3])); + DONE; + }) + +-(define_insn "extzv_internal" ++(define_insn "extzvsi_internal" + [(set (match_operand:SI 0 "register_operand" "=a") + (zero_extract:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "extui_fldsz_operand" "i") +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0009-Reflect-the-32-bit-Integer-Divide-Option.patch b/patches/gcc10.2/gcc-xtensa-0009-Reflect-the-32-bit-Integer-Divide-Option.patch new file mode 100644 index 0000000..607367c --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0009-Reflect-the-32-bit-Integer-Divide-Option.patch @@ -0,0 +1,41 @@ +From 12fa0b13b6f0c52e5c4d75f39822771a7f780f94 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 6 May 2022 19:34:19 +0900 +Subject: [PATCH 04/31] xtensa: Reflect the 32-bit Integer Divide Option + +On Espressif's ESP8266 (based on Tensilica LX106, no hardware divider), +this patch reduces the size of each: + + __moddi3() @ libgcc.a : 969 -> 301 (saves 668) + __divmoddi4() : 1111 -> 426 (saves 685) + __udivmoddi4() : 1043 -> 319 (saves 724) + +in bytes, respectively. + +gcc/ChangeLog: + + * config/xtensa/xtensa.h (TARGET_HAS_NO_HW_DIVIDE): New macro + definition. +--- + gcc/config/xtensa/xtensa.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index fa86a245e..5b102de51 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -74,6 +74,11 @@ along with GCC; see the file COPYING3. If not see + #define HAVE_AS_TLS 0 + #endif + ++/* Define this if the target has no hardware divide instructions. */ ++#if !TARGET_DIV32 ++#define TARGET_HAS_NO_HW_DIVIDE ++#endif ++ + + /* Target CPU builtins. */ + #define TARGET_CPU_CPP_BUILTINS() \ +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0010-Simplify-EXTUI-instruction-maskimm-validation.patch b/patches/gcc10.2/gcc-xtensa-0010-Simplify-EXTUI-instruction-maskimm-validation.patch new file mode 100644 index 0000000..8d257cd --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0010-Simplify-EXTUI-instruction-maskimm-validation.patch @@ -0,0 +1,78 @@ +From 49383c9381a937b360adeb14f5e7bd4472f7c386 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:26:30 +0900 +Subject: [PATCH 05/31] xtensa: Simplify EXTUI instruction maskimm validations + +No functional changes. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (extui_fldsz_operand): Simplify. + * config/xtensa/xtensa.c (xtensa_mask_immediate, print_operand): + Ditto. +--- + gcc/config/xtensa/predicates.md | 2 +- + gcc/config/xtensa/xtensa.c | 24 +++--------------------- + 2 files changed, 4 insertions(+), 22 deletions(-) + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index eb52b05aa..3f84859b6 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -55,7 +55,7 @@ + + (define_predicate "extui_fldsz_operand" + (and (match_code "const_int") +- (match_test "xtensa_mask_immediate ((1 << INTVAL (op)) - 1)"))) ++ (match_test "IN_RANGE (INTVAL (op), 1, 16)"))) + + (define_predicate "sext_operand" + (if_then_else (match_test "TARGET_SEXT") +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index b1dbe8520..4043f40ce 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -446,19 +446,7 @@ xtensa_b4constu (HOST_WIDE_INT v) + bool + xtensa_mask_immediate (HOST_WIDE_INT v) + { +-#define MAX_MASK_SIZE 16 +- int mask_size; +- +- for (mask_size = 1; mask_size <= MAX_MASK_SIZE; mask_size++) +- { +- if ((v & 1) == 0) +- return false; +- v = v >> 1; +- if (v == 0) +- return true; +- } +- +- return false; ++ return IN_RANGE (exact_log2 (v + 1), 1, 16); + } + + +@@ -2424,17 +2412,11 @@ print_operand (FILE *file, rtx x, int letter) + case 'K': + if (GET_CODE (x) == CONST_INT) + { +- int num_bits = 0; + unsigned val = INTVAL (x); +- while (val & 1) +- { +- num_bits += 1; +- val = val >> 1; +- } +- if ((val != 0) || (num_bits == 0) || (num_bits > 16)) ++ if (!xtensa_mask_immediate (val)) + fatal_insn ("invalid mask", x); + +- fprintf (file, "%d", num_bits); ++ fprintf (file, "%d", floor_log2 (val + 1)); + } + else + output_operand_lossage ("invalid %%K value"); +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0011-Make-use-of-IN_RANGE-macro-where-appropriate.patch b/patches/gcc10.2/gcc-xtensa-0011-Make-use-of-IN_RANGE-macro-where-appropriate.patch new file mode 100644 index 0000000..419ebfe --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0011-Make-use-of-IN_RANGE-macro-where-appropriate.patch @@ -0,0 +1,174 @@ +From fa7073ff572c248896057a5a7841a3e1d98380ad Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:27:36 +0900 +Subject: [PATCH 06/31] xtensa: Make use of IN_RANGE macro where appropriate + +No functional changes. + +gcc/ChangeLog: + + * config/xtensa/constraints.md (M, O): Use the macro. + * config/xtensa/predicates.md (addsubx_operand, extui_fldsz_operand, + sext_fldsz_operand): Ditto. + * config/xtensa/xtensa.c (xtensa_simm8, xtensa_simm8x256, + xtensa_simm12b, xtensa_uimm8, xtensa_uimm8x2, xtensa_uimm8x4, + xtensa_mask_immediate, smalloffset_mem_p, printx, xtensa_call_save_reg, + xtensa_expand_prologue): Ditto. + * config/xtensa/xtensa.h (FUNCTION_ARG_REGNO_P): Ditto. +--- + gcc/config/xtensa/constraints.md | 4 ++-- + gcc/config/xtensa/predicates.md | 5 ++--- + gcc/config/xtensa/xtensa.c | 20 ++++++++++---------- + gcc/config/xtensa/xtensa.h | 2 +- + 4 files changed, 15 insertions(+), 16 deletions(-) + +diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md +index 2062c8816..9a8caab4f 100644 +--- a/gcc/config/xtensa/constraints.md ++++ b/gcc/config/xtensa/constraints.md +@@ -92,7 +92,7 @@ + "An integer constant in the range @minus{}32-95 for use with MOVI.N + instructions." + (and (match_code "const_int") +- (match_test "ival >= -32 && ival <= 95"))) ++ (match_test "IN_RANGE (ival, -32, 95)"))) + + (define_constraint "N" + "An unsigned 8-bit integer constant shifted left by 8 bits for use +@@ -103,7 +103,7 @@ + (define_constraint "O" + "An integer constant that can be used in ADDI.N instructions." + (and (match_code "const_int") +- (match_test "ival == -1 || (ival >= 1 && ival <= 15)"))) ++ (match_test "ival == -1 || IN_RANGE (ival, 1, 15)"))) + + (define_constraint "P" + "An integer constant that can be used as a mask value in an EXTUI +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index 3f84859b6..91b9343a2 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -25,8 +25,7 @@ + + (define_predicate "addsubx_operand" + (and (match_code "const_int") +- (match_test "INTVAL (op) >= 1 +- && INTVAL (op) <= 3"))) ++ (match_test "IN_RANGE (INTVAL (op), 1, 3)"))) + + (define_predicate "arith_operand" + (ior (and (match_code "const_int") +@@ -64,7 +63,7 @@ + + (define_predicate "sext_fldsz_operand" + (and (match_code "const_int") +- (match_test "INTVAL (op) >= 8 && INTVAL (op) <= 23"))) ++ (match_test "IN_RANGE (INTVAL (op), 8, 23)"))) + + (define_predicate "lsbitnum_operand" + (and (match_code "const_int") +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 4043f40ce..02dc5799a 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -341,42 +341,42 @@ struct gcc_target targetm = TARGET_INITIALIZER; + bool + xtensa_simm8 (HOST_WIDE_INT v) + { +- return v >= -128 && v <= 127; ++ return IN_RANGE (v, -128, 127); + } + + + bool + xtensa_simm8x256 (HOST_WIDE_INT v) + { +- return (v & 255) == 0 && (v >= -32768 && v <= 32512); ++ return (v & 255) == 0 && IN_RANGE (v, -32768, 32512); + } + + + bool + xtensa_simm12b (HOST_WIDE_INT v) + { +- return v >= -2048 && v <= 2047; ++ return IN_RANGE (v, -2048, 2047); + } + + + static bool + xtensa_uimm8 (HOST_WIDE_INT v) + { +- return v >= 0 && v <= 255; ++ return IN_RANGE (v, 0, 255); + } + + + static bool + xtensa_uimm8x2 (HOST_WIDE_INT v) + { +- return (v & 1) == 0 && (v >= 0 && v <= 510); ++ return (v & 1) == 0 && IN_RANGE (v, 0, 510); + } + + + static bool + xtensa_uimm8x4 (HOST_WIDE_INT v) + { +- return (v & 3) == 0 && (v >= 0 && v <= 1020); ++ return (v & 3) == 0 && IN_RANGE (v, 0, 1020); + } + + +@@ -527,7 +527,7 @@ smalloffset_mem_p (rtx op) + return FALSE; + + val = INTVAL (offset); +- return (val & 3) == 0 && (val >= 0 && val <= 60); ++ return (val & 3) == 0 && IN_RANGE (val, 0, 60); + } + } + return FALSE; +@@ -2352,7 +2352,7 @@ static void + printx (FILE *file, signed int val) + { + /* Print a hexadecimal value in a nice way. */ +- if ((val > -0xa) && (val < 0xa)) ++ if (IN_RANGE (val, -9, 9)) + fprintf (file, "%d", val); + else if (val < 0) + fprintf (file, "-0x%x", -val); +@@ -2732,7 +2732,7 @@ xtensa_call_save_reg(int regno) + return crtl->profile || !crtl->is_leaf || crtl->calls_eh_return || + df_regs_ever_live_p (regno); + +- if (crtl->calls_eh_return && regno >= 2 && regno < 4) ++ if (crtl->calls_eh_return && IN_RANGE (regno, 2, 3)) + return true; + + return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno); +@@ -2852,7 +2852,7 @@ xtensa_expand_prologue (void) + int callee_save_size = cfun->machine->callee_save_size; + + /* -128 is a limit of single addi instruction. */ +- if (total_size > 0 && total_size <= 128) ++ if (IN_RANGE (total_size, 1, 128)) + { + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-total_size))); +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index 5b102de51..3e9cbc943 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -493,7 +493,7 @@ enum reg_class + used for this purpose since all function arguments are pushed on + the stack. */ + #define FUNCTION_ARG_REGNO_P(N) \ +- ((N) >= GP_OUTGOING_ARG_FIRST && (N) <= GP_OUTGOING_ARG_LAST) ++ IN_RANGE ((N), GP_OUTGOING_ARG_FIRST, GP_OUTGOING_ARG_LAST) + + /* Record the number of argument words seen so far, along with a flag to + indicate whether these are incoming arguments. (FUNCTION_INCOMING_ARG +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0012-Fix-instruction-counting-regarding-block-move.patch b/patches/gcc10.2/gcc-xtensa-0012-Fix-instruction-counting-regarding-block-move.patch new file mode 100644 index 0000000..dae4a21 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0012-Fix-instruction-counting-regarding-block-move.patch @@ -0,0 +1,54 @@ +From 5cda5b41a7646d220f7351226b5da78955b0fc7f Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:29:22 +0900 +Subject: [PATCH 07/31] xtensa: Fix instruction counting regarding block move + expansion + +This patch makes counting the number of instructions of the remainder +(modulo 4) part more accurate. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_expand_block_move): + Make instruction counting more accurate, and simplify emitting insns. +--- + gcc/config/xtensa/xtensa.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 02dc5799a..0fe8b73ad 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1303,7 +1303,7 @@ xtensa_expand_block_move (rtx *operands) + move_ratio = 4; + if (optimize > 2) + move_ratio = LARGEST_MOVE_RATIO; +- num_pieces = (bytes / align) + (bytes % align); /* Close enough anyway. */ ++ num_pieces = (bytes / align) + ((bytes % align + 1) / 2); + if (num_pieces > move_ratio) + return 0; + +@@ -1340,7 +1340,7 @@ xtensa_expand_block_move (rtx *operands) + temp[next] = gen_reg_rtx (mode[next]); + + x = adjust_address (src_mem, mode[next], offset_ld); +- emit_insn (gen_rtx_SET (temp[next], x)); ++ emit_move_insn (temp[next], x); + + offset_ld += next_amount; + bytes -= next_amount; +@@ -1350,9 +1350,9 @@ xtensa_expand_block_move (rtx *operands) + if (active[phase]) + { + active[phase] = false; +- ++ + x = adjust_address (dst_mem, mode[phase], offset_st); +- emit_insn (gen_rtx_SET (x, temp[phase])); ++ emit_move_insn (x, temp[phase]); + + offset_st += amount[phase]; + } +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0013-Add-setmemsi-insn-pattern.patch b/patches/gcc10.2/gcc-xtensa-0013-Add-setmemsi-insn-pattern.patch new file mode 100644 index 0000000..a7212ce --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0013-Add-setmemsi-insn-pattern.patch @@ -0,0 +1,303 @@ +From 02572a935a2cbabc96387289300fb78d61dde555 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 24 May 2022 00:52:44 +0900 +Subject: [PATCH 08/31] xtensa: Add setmemsi insn pattern + +This patch introduces setmemsi insn pattern of two kinds, unrolled loop and +small loop, for fixed small length and constant initialization value. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h + (xtensa_expand_block_set_unrolled_loop, + xtensa_expand_block_set_small_loop): New prototypes. + * config/xtensa/xtensa.c (xtensa_sizeof_MOVI, + xtensa_expand_block_set_unrolled_loop, + xtensa_expand_block_set_small_loop): New functions. + * config/xtensa/xtensa.md (setmemsi): New expansion pattern. + * config/xtensa/xtensa.opt (mlongcalls): Add target mask. +--- + gcc/config/xtensa/xtensa-protos.h | 2 + + gcc/config/xtensa/xtensa.c | 211 ++++++++++++++++++++++++++++++ + gcc/config/xtensa/xtensa.md | 16 +++ + gcc/config/xtensa/xtensa.opt | 2 +- + 4 files changed, 230 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 18d803581..80b1da2bb 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -41,6 +41,8 @@ extern void xtensa_expand_conditional_branch (rtx *, machine_mode); + extern int xtensa_expand_conditional_move (rtx *, int); + extern int xtensa_expand_scc (rtx *, machine_mode); + extern int xtensa_expand_block_move (rtx *); ++extern int xtensa_expand_block_set_unrolled_loop (rtx *); ++extern int xtensa_expand_block_set_small_loop (rtx *); + extern void xtensa_split_operand_pair (rtx *, machine_mode); + extern int xtensa_emit_move_sequence (rtx *, machine_mode); + extern rtx xtensa_copy_incoming_a7 (rtx); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 0fe8b73ad..a6d76a953 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1363,6 +1363,217 @@ xtensa_expand_block_move (rtx *operands) + } + + ++/* Try to expand a block set operation to a sequence of RTL move ++ instructions. If not optimizing, or if the block size is not a ++ constant, or if the block is too large, or if the value to ++ initialize the block with is not a constant, the expansion ++ fails and GCC falls back to calling memset(). ++ ++ operands[0] is the destination ++ operands[1] is the length ++ operands[2] is the initialization value ++ operands[3] is the alignment */ ++ ++static int ++xtensa_sizeof_MOVI (HOST_WIDE_INT imm) ++{ ++ return (TARGET_DENSITY && IN_RANGE (imm, -32, 95)) ? 2 : 3; ++} ++ ++int ++xtensa_expand_block_set_unrolled_loop (rtx *operands) ++{ ++ rtx dst_mem = operands[0]; ++ HOST_WIDE_INT bytes, value, align; ++ int expand_len, funccall_len; ++ rtx x, reg; ++ int offset; ++ ++ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) ++ return 0; ++ ++ bytes = INTVAL (operands[1]); ++ if (bytes <= 0) ++ return 0; ++ value = (int8_t)INTVAL (operands[2]); ++ align = INTVAL (operands[3]); ++ if (align > MOVE_MAX) ++ align = MOVE_MAX; ++ ++ /* Insn expansion: holding the init value. ++ Either MOV(.N) or L32R w/litpool. */ ++ if (align == 1) ++ expand_len = xtensa_sizeof_MOVI (value); ++ else if (value == 0 || value == -1) ++ expand_len = TARGET_DENSITY ? 2 : 3; ++ else ++ expand_len = 3 + 4; ++ /* Insn expansion: a series of aligned memory stores. ++ Consist of S8I, S16I or S32I(.N). */ ++ expand_len += (bytes / align) * (TARGET_DENSITY ++ && align == 4 ? 2 : 3); ++ /* Insn expansion: the remainder, sub-aligned memory stores. ++ A combination of S8I and S16I as needed. */ ++ expand_len += ((bytes % align + 1) / 2) * 3; ++ ++ /* Function call: preparing two arguments. */ ++ funccall_len = xtensa_sizeof_MOVI (value); ++ funccall_len += xtensa_sizeof_MOVI (bytes); ++ /* Function call: calling memset(). */ ++ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; ++ ++ /* Apply expansion bonus (2x) if optimizing for speed. */ ++ if (optimize > 1 && !optimize_size) ++ funccall_len *= 2; ++ ++ /* Decide whether to expand or not, based on the sum of the length ++ of instructions. */ ++ if (expand_len > funccall_len) ++ return 0; ++ ++ x = XEXP (dst_mem, 0); ++ if (!REG_P (x)) ++ dst_mem = replace_equiv_address (dst_mem, force_reg (Pmode, x)); ++ switch (align) ++ { ++ case 1: ++ break; ++ case 2: ++ value = (int16_t)((uint8_t)value * 0x0101U); ++ break; ++ case 4: ++ value = (int32_t)((uint8_t)value * 0x01010101U); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ reg = force_reg (SImode, GEN_INT (value)); ++ ++ offset = 0; ++ do ++ { ++ int unit_size = MIN (bytes, align); ++ machine_mode unit_mode = (unit_size >= 4 ? SImode : ++ (unit_size >= 2 ? HImode : ++ QImode)); ++ unit_size = GET_MODE_SIZE (unit_mode); ++ ++ emit_move_insn (adjust_address (dst_mem, unit_mode, offset), ++ unit_mode == SImode ? reg ++ : convert_to_mode (unit_mode, reg, true)); ++ ++ offset += unit_size; ++ bytes -= unit_size; ++ } ++ while (bytes > 0); ++ ++ return 1; ++} ++ ++int ++xtensa_expand_block_set_small_loop (rtx *operands) ++{ ++ HOST_WIDE_INT bytes, value, align; ++ int expand_len, funccall_len; ++ rtx x, dst, end, reg; ++ machine_mode unit_mode; ++ rtx_code_label *label; ++ ++ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) ++ return 0; ++ ++ bytes = INTVAL (operands[1]); ++ if (bytes <= 0) ++ return 0; ++ value = (int8_t)INTVAL (operands[2]); ++ align = INTVAL (operands[3]); ++ if (align > MOVE_MAX) ++ align = MOVE_MAX; ++ ++ /* Totally-aligned block only. */ ++ if (bytes % align != 0) ++ return 0; ++ ++ /* If 4-byte aligned, small loop substitution is almost optimal, thus ++ limited to only offset to the end address for ADDI/ADDMI instruction. */ ++ if (align == 4 ++ && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) ++ return 0; ++ ++ /* If no 4-byte aligned, loop count should be treated as the constraint. */ ++ if (align != 4 ++ && bytes / align > ((optimize > 1 && !optimize_size) ? 8 : 15)) ++ return 0; ++ ++ /* Insn expansion: holding the init value. ++ Either MOV(.N) or L32R w/litpool. */ ++ if (align == 1) ++ expand_len = xtensa_sizeof_MOVI (value); ++ else if (value == 0 || value == -1) ++ expand_len = TARGET_DENSITY ? 2 : 3; ++ else ++ expand_len = 3 + 4; ++ /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ ++ expand_len += bytes > 127 ? 3 ++ : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; ++ ++ /* Insn expansion: the loop body and branch instruction. ++ For store, one of S8I, S16I or S32I(.N). ++ For advance, ADDI(.N). ++ For branch, BNE. */ ++ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) ++ + (TARGET_DENSITY ? 2 : 3) + 3; ++ ++ /* Function call: preparing two arguments. */ ++ funccall_len = xtensa_sizeof_MOVI (value); ++ funccall_len += xtensa_sizeof_MOVI (bytes); ++ /* Function call: calling memset(). */ ++ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; ++ ++ /* Apply expansion bonus (2x) if optimizing for speed. */ ++ if (optimize > 1 && !optimize_size) ++ funccall_len *= 2; ++ ++ /* Decide whether to expand or not, based on the sum of the length ++ of instructions. */ ++ if (expand_len > funccall_len) ++ return 0; ++ ++ x = XEXP (operands[0], 0); ++ if (!REG_P (x)) ++ x = XEXP (replace_equiv_address (operands[0], force_reg (Pmode, x)), 0); ++ dst = gen_reg_rtx (SImode); ++ emit_move_insn (dst, x); ++ end = gen_reg_rtx (SImode); ++ emit_insn (gen_addsi3 (end, dst, operands[1] /* the length */)); ++ switch (align) ++ { ++ case 1: ++ unit_mode = QImode; ++ break; ++ case 2: ++ value = (int16_t)((uint8_t)value * 0x0101U); ++ unit_mode = HImode; ++ break; ++ case 4: ++ value = (int32_t)((uint8_t)value * 0x01010101U); ++ unit_mode = SImode; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ reg = force_reg (unit_mode, GEN_INT (value)); ++ ++ label = gen_label_rtx (); ++ emit_label (label); ++ emit_move_insn (gen_rtx_MEM (unit_mode, dst), reg); ++ emit_insn (gen_addsi3 (dst, dst, GEN_INT (align))); ++ emit_cmp_and_jump_insns (dst, end, NE, const0_rtx, SImode, true, label); ++ ++ return 1; ++} ++ ++ + void + xtensa_expand_nonlocal_goto (rtx *operands) + { +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 251c313d5..9eb689efa 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1085,6 +1085,22 @@ + DONE; + }) + ++;; Block sets ++ ++(define_expand "setmemsi" ++ [(match_operand:BLK 0 "memory_operand") ++ (match_operand:SI 1 "") ++ (match_operand:SI 2 "") ++ (match_operand:SI 3 "const_int_operand")] ++ "!optimize_debug && optimize" ++{ ++ if (xtensa_expand_block_set_unrolled_loop (operands)) ++ DONE; ++ if (xtensa_expand_block_set_small_loop (operands)) ++ DONE; ++ FAIL; ++}) ++ + + ;; Shift instructions. + +diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt +index aef67970b..e1d992f5d 100644 +--- a/gcc/config/xtensa/xtensa.opt ++++ b/gcc/config/xtensa/xtensa.opt +@@ -27,7 +27,7 @@ Target Report Mask(FORCE_NO_PIC) + Disable position-independent code (PIC) for use in OS kernel code. + + mlongcalls +-Target ++Target Mask(LONGCALLS) + Use indirect CALLXn instructions for large programs. + + mtarget-align +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0014-Improve-bswap-sd-i2-insn-patterns.patch b/patches/gcc10.2/gcc-xtensa-0014-Improve-bswap-sd-i2-insn-patterns.patch new file mode 100644 index 0000000..a5fb6f1 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0014-Improve-bswap-sd-i2-insn-patterns.patch @@ -0,0 +1,254 @@ +From be1ca3aa6e9754ed16d1b7a60657912af02844da Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:33:59 +0900 +Subject: [PATCH 09/31] xtensa: Improve bswap[sd]i2 insn patterns + +This patch makes bswap[sd]i2 better register allocation, and reconstructs +bswapsi2 in order to take advantage of GIMPLE manual byte-swapping +recognition. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (bswapsi2): New expansion pattern. + (bswapsi2_internal): Revise the template and condition, and add + detection code for preceding the same insn in order to omit a + "SSAI 8" instruction of the latter. + (bswapdi2): Suppress built-in insn expansion with the corresponding + library call when optimizing for size. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/bswap-O1.c: New. + * gcc.target/xtensa/bswap-O2.c: Ditto. + * gcc.target/xtensa/bswap-Os.c: Ditto. +--- + gcc/config/xtensa/xtensa.md | 77 +++++++++++++++++----- + gcc/testsuite/gcc.target/xtensa/bswap-O1.c | 37 +++++++++++ + gcc/testsuite/gcc.target/xtensa/bswap-O2.c | 37 +++++++++++ + gcc/testsuite/gcc.target/xtensa/bswap-Os.c | 37 +++++++++++ + 4 files changed, 172 insertions(+), 16 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O1.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O2.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-Os.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 9eb689efa..cea280061 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -471,23 +471,68 @@ + + ;; Byte swap. + +-(define_insn "bswapsi2" +- [(set (match_operand:SI 0 "register_operand" "=&a") +- (bswap:SI (match_operand:SI 1 "register_operand" "r")))] +- "!optimize_size" +- "ssai\t8\;srli\t%0, %1, 16\;src\t%0, %0, %1\;src\t%0, %0, %0\;src\t%0, %1, %0" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "15")]) ++(define_expand "bswapsi2" ++ [(set (match_operand:SI 0 "register_operand" "") ++ (bswap:SI (match_operand:SI 1 "register_operand" "")))] ++ "!optimize_debug && optimize > 1" ++{ ++ /* GIMPLE manual byte-swapping recognition is now activated. ++ For both built-in and manual bswaps, emit corresponding library call ++ if optimizing for size, or a series of dedicated machine instructions ++ if otherwise. */ ++ if (optimize_size) ++ emit_library_call_value (optab_libfunc (bswap_optab, SImode), ++ operands[0], LCT_NORMAL, SImode, ++ operands[1], SImode); ++ else ++ emit_insn (gen_bswapsi2_internal (operands[0], operands[1])); ++ DONE; ++}) + +-(define_insn "bswapdi2" +- [(set (match_operand:DI 0 "register_operand" "=&a") +- (bswap:DI (match_operand:DI 1 "register_operand" "r")))] +- "!optimize_size" +- "ssai\t8\;srli\t%0, %D1, 16\;src\t%0, %0, %D1\;src\t%0, %0, %0\;src\t%0, %D1, %0\;srli\t%D0, %1, 16\;src\t%D0, %D0, %1\;src\t%D0, %D0, %D0\;src\t%D0, %1, %D0" +- [(set_attr "type" "arith") +- (set_attr "mode" "DI") +- (set_attr "length" "27")]) ++(define_insn "bswapsi2_internal" ++ [(set (match_operand:SI 0 "register_operand" "=a,&a") ++ (bswap:SI (match_operand:SI 1 "register_operand" "0,r"))) ++ (clobber (match_scratch:SI 2 "=&a,X"))] ++ "!optimize_debug && optimize > 1 && !optimize_size" ++{ ++ rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn); ++ const char *init = "ssai\t8\;"; ++ static char result[64]; ++ if (prev_insn && NONJUMP_INSN_P (prev_insn)) ++ { ++ rtx x = PATTERN (prev_insn); ++ if (GET_CODE (x) == PARALLEL && XVECLEN (x, 0) == 2 ++ && GET_CODE (XVECEXP (x, 0, 0)) == SET ++ && GET_CODE (XVECEXP (x, 0, 1)) == CLOBBER) ++ { ++ x = XEXP (XVECEXP (x, 0, 0), 1); ++ if (GET_CODE (x) == BSWAP && GET_MODE (x) == SImode) ++ init = ""; ++ } ++ } ++ sprintf (result, ++ (which_alternative == 0) ++ ? "%s" "srli\t%%2, %%1, 16\;src\t%%2, %%2, %%1\;src\t%%2, %%2, %%2\;src\t%%0, %%1, %%2" ++ : "%s" "srli\t%%0, %%1, 16\;src\t%%0, %%0, %%1\;src\t%%0, %%0, %%0\;src\t%%0, %%1, %%0", ++ init); ++ return result; ++} ++ [(set_attr "type" "arith,arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "15,15")]) ++ ++(define_expand "bswapdi2" ++ [(set (match_operand:DI 0 "register_operand" "") ++ (bswap:DI (match_operand:DI 1 "register_operand" "")))] ++ "!optimize_debug && optimize > 1 && optimize_size" ++{ ++ /* Replace with a single DImode library call. ++ Without this, two SImode library calls are emitted. */ ++ emit_library_call_value (optab_libfunc (bswap_optab, DImode), ++ operands[0], LCT_NORMAL, DImode, ++ operands[1], DImode); ++ DONE; ++}) + + + ;; Negation and one's complement. +diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O1.c b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c +new file mode 100644 +index 000000000..a0c885baa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++unsigned int test_0(unsigned int a) ++{ ++ return (a & 0x000000FF) << 24 | ++ (a & 0x0000FF00) << 8 | ++ (a & 0x00FF0000) >> 8 | ++ (a & 0xFF000000) >> 24; ++} ++ ++unsigned int test_1(unsigned int a) ++{ ++ union ++ { ++ unsigned int i; ++ unsigned char a[4]; ++ } u, v; ++ u.i = a; ++ v.a[0] = u.a[3]; ++ v.a[1] = u.a[2]; ++ v.a[2] = u.a[1]; ++ v.a[3] = u.a[0]; ++ return v.i; ++} ++ ++unsigned int test_2(unsigned int a) ++{ ++ return __builtin_bswap32(a); ++} ++ ++unsigned long long test_3(unsigned long long a) ++{ ++ return __builtin_bswap64(a); ++} ++ ++/* { dg-final { scan-assembler-times "call" 2 } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O2.c b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c +new file mode 100644 +index 000000000..4cf95b925 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++unsigned int test_0(unsigned int a) ++{ ++ return (a & 0x000000FF) << 24 | ++ (a & 0x0000FF00) << 8 | ++ (a & 0x00FF0000) >> 8 | ++ (a & 0xFF000000) >> 24; ++} ++ ++unsigned int test_1(unsigned int a) ++{ ++ union ++ { ++ unsigned int i; ++ unsigned char a[4]; ++ } u, v; ++ u.i = a; ++ v.a[0] = u.a[3]; ++ v.a[1] = u.a[2]; ++ v.a[2] = u.a[1]; ++ v.a[3] = u.a[0]; ++ return v.i; ++} ++ ++unsigned int test_2(unsigned int a) ++{ ++ return __builtin_bswap32(a); ++} ++ ++unsigned long long test_3(unsigned long long a) ++{ ++ return __builtin_bswap64(a); ++} ++ ++/* { dg-final { scan-assembler-times "ssai" 4 } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-Os.c b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c +new file mode 100644 +index 000000000..1e010fd62 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Os" } */ ++ ++unsigned int test_0(unsigned int a) ++{ ++ return (a & 0x000000FF) << 24 | ++ (a & 0x0000FF00) << 8 | ++ (a & 0x00FF0000) >> 8 | ++ (a & 0xFF000000) >> 24; ++} ++ ++unsigned int test_1(unsigned int a) ++{ ++ union ++ { ++ unsigned int i; ++ unsigned char a[4]; ++ } u, v; ++ u.i = a; ++ v.a[0] = u.a[3]; ++ v.a[1] = u.a[2]; ++ v.a[2] = u.a[1]; ++ v.a[3] = u.a[0]; ++ return v.i; ++} ++ ++unsigned int test_2(unsigned int a) ++{ ++ return __builtin_bswap32(a); ++} ++ ++unsigned long long test_3(unsigned long long a) ++{ ++ return __builtin_bswap64(a); ++} ++ ++/* { dg-final { scan-assembler-times "call" 4 } } */ +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0015-fix-PR-target-105879.patch b/patches/gcc10.2/gcc-xtensa-0015-fix-PR-target-105879.patch new file mode 100644 index 0000000..2c21f47 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0015-fix-PR-target-105879.patch @@ -0,0 +1,48 @@ +From 1848b547a6ac69a002d068239a5bc9463f3fae25 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Tue, 7 Jun 2022 21:01:01 -0700 +Subject: [PATCH 10/31] gcc: xtensa: fix PR target/105879 + +split_double operates with the 'word that comes first in memory in the +target' terminology, while gen_lowpart operates with the 'value +representing some low-order bits of X' terminology. They are not +equivalent and must be dealt with differently on little- and big-endian +targets. + +gcc/ + PR target/105879 + * config/xtensa/xtensa.md (movdi): Rename 'first' and 'second' + to 'lowpart' and 'highpart' so that they match 'gen_lowpart' and + 'gen_highpart' bitwise semantics and fix order of highpart and + lowpart depending on target endianness. +--- + gcc/config/xtensa/xtensa.md | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index cea280061..30d8ef96c 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -802,11 +802,14 @@ + because of offering further optimization opportunities. */ + if (register_operand (operands[0], DImode)) + { +- rtx first, second; +- +- split_double (operands[1], &first, &second); +- emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), first)); +- emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), second)); ++ rtx lowpart, highpart; ++ ++ if (TARGET_BIG_ENDIAN) ++ split_double (operands[1], &highpart, &lowpart); ++ else ++ split_double (operands[1], &lowpart, &highpart); ++ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), lowpart)); ++ emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), highpart)); + DONE; + } + +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0016-Implement-bswaphi2-insn-pattern.patch b/patches/gcc10.2/gcc-xtensa-0016-Implement-bswaphi2-insn-pattern.patch new file mode 100644 index 0000000..3a31e62 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0016-Implement-bswaphi2-insn-pattern.patch @@ -0,0 +1,39 @@ +From f47a902c9a94d2e9df879de4613dae62c8e9cc4f Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:44:32 +0900 +Subject: [PATCH 11/31] xtensa: Implement bswaphi2 insn pattern + +This patch adds bswaphi2 insn pattern that is one instruction less than the +default expansion. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (bswaphi2): New insn pattern. +--- + gcc/config/xtensa/xtensa.md | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 30d8ef96c..c1f44777d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -471,6 +471,16 @@ + + ;; Byte swap. + ++(define_insn "bswaphi2" ++ [(set (match_operand:HI 0 "register_operand" "=a") ++ (bswap:HI (match_operand:HI 1 "register_operand" "r"))) ++ (clobber (match_scratch:HI 2 "=&a"))] ++ "" ++ "extui\t%2, %1, 8, 8\;slli\t%0, %1, 8\;or\t%0, %0, %2" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "HI") ++ (set_attr "length" "9")]) ++ + (define_expand "bswapsi2" + [(set (match_operand:SI 0 "register_operand" "") + (bswap:SI (match_operand:SI 1 "register_operand" "")))] +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0017-Make-one_cmplsi2-optimizer-friendly.patch b/patches/gcc10.2/gcc-xtensa-0017-Make-one_cmplsi2-optimizer-friendly.patch new file mode 100644 index 0000000..017a30f --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0017-Make-one_cmplsi2-optimizer-friendly.patch @@ -0,0 +1,86 @@ +From 22b5756399ef63a4102334724b12a4c186075227 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:46:16 +0900 +Subject: [PATCH 12/31] xtensa: Make one_cmplsi2 optimizer-friendly + +In Xtensa ISA, there is no single machine instruction that calculates unary +bitwise negation. But a few optimizers assume that bitwise negation can be +done by a single insn. + +As a result, '((x < 0) ? ~x : x)' cannot be optimized to '(x ^ (x >> 31))' +ever before, for example. + +This patch relaxes such limitation, by putting the insn expansion off till +the split pass. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (one_cmplsi2): + Rearrange as an insn_and_split pattern. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/one_cmpl_abs.c: New. +--- + gcc/config/xtensa/xtensa.md | 26 +++++++++++++------ + .../gcc.target/xtensa/one_cmpl_abs.c | 9 +++++++ + 2 files changed, 27 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index c1f44777d..2f6d48d03 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -556,16 +556,26 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +-(define_expand "one_cmplsi2" +- [(set (match_operand:SI 0 "register_operand" "") +- (not:SI (match_operand:SI 1 "register_operand" "")))] ++(define_insn_and_split "one_cmplsi2" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (not:SI (match_operand:SI 1 "register_operand" "r")))] + "" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 2) ++ (const_int -1)) ++ (set (match_dup 0) ++ (xor:SI (match_dup 1) ++ (match_dup 2)))] + { +- rtx temp = gen_reg_rtx (SImode); +- emit_insn (gen_movsi (temp, constm1_rtx)); +- emit_insn (gen_xorsi3 (operands[0], temp, operands[1])); +- DONE; +-}) ++ operands[2] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 5) ++ (const_int 6)))]) + + (define_insn "negsf2" + [(set (match_operand:SF 0 "register_operand" "=f") +diff --git a/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c +new file mode 100644 +index 000000000..608f65fd7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++int one_cmpl_abs(int a) ++{ ++ return a < 0 ? ~a : a; ++} ++ ++/* { dg-final { scan-assembler-not "bgez" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0018-Optimize-x-y-to-x-y-y.patch b/patches/gcc10.2/gcc-xtensa-0018-Optimize-x-y-to-x-y-y.patch new file mode 100644 index 0000000..d1167a1 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0018-Optimize-x-y-to-x-y-y.patch @@ -0,0 +1,71 @@ +From cc259b2801c8d04c39169214041305fdd5b87acd Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:55:44 +0900 +Subject: [PATCH 13/31] xtensa: Optimize '(~x & y)' to '((x & y) ^ y)' + +In Xtensa ISA, there is no single machine instruction that calculates unary +bitwise negation. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*andsi3_bitcmpl): + New insn_and_split pattern. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/check_zero_byte.c: New. +--- + gcc/config/xtensa/xtensa.md | 20 +++++++++++++++++++ + .../gcc.target/xtensa/check_zero_byte.c | 9 +++++++++ + 2 files changed, 29 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/xtensa/check_zero_byte.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2f6d48d03..28ed1d34e 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -601,6 +601,26 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,3")]) + ++(define_insn_and_split "*andsi3_bitcmpl" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (not:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "r")))] ++ "" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 3) ++ (and:SI (match_dup 1) ++ (match_dup 2))) ++ (set (match_dup 0) ++ (xor:SI (match_dup 3) ++ (match_dup 2)))] ++{ ++ operands[3] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ + (define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "=a") + (ior:SI (match_operand:SI 1 "register_operand" "%r") +diff --git a/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c +new file mode 100644 +index 000000000..6a04aaeef +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O" } */ ++ ++int check_zero_byte(int v) ++{ ++ return (v - 0x01010101) & ~v & 0x80808080; ++} ++ ++/* { dg-final { scan-assembler-not "movi" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0019-Add-clrsbsi2-insn-pattern.patch b/patches/gcc10.2/gcc-xtensa-0019-Add-clrsbsi2-insn-pattern.patch new file mode 100644 index 0000000..ebaa985 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0019-Add-clrsbsi2-insn-pattern.patch @@ -0,0 +1,98 @@ +From ebd48d915076589f04b5c1ed50f9f5ddfae088e8 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:57:35 +0900 +Subject: [PATCH 14/31] xtensa: Add clrsbsi2 insn pattern + +> (clrsb:m x) +> Represents the number of redundant leading sign bits in x, represented +> as an integer of mode m, starting at the most significant bit position. + +This explanation is just what the NSA instruction (not ever emitted before) +calculates in Xtensa ISA. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (clrsbsi2): New insn pattern. + +libgcc/ChangeLog: + + * config/xtensa/lib1funcs.S (__clrsbsi2): New function. + * config/xtensa/t-xtensa (LIB1ASMFUNCS): Add _clrsbsi2. +--- + gcc/config/xtensa/xtensa.md | 12 +++++++++++- + libgcc/config/xtensa/lib1funcs.S | 23 +++++++++++++++++++++++ + libgcc/config/xtensa/t-xtensa | 2 +- + 3 files changed, 35 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 28ed1d34e..6c76fb942 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -429,7 +429,17 @@ + (set_attr "length" "3")]) + + +-;; Count leading/trailing zeros and find first bit. ++;; Count redundant leading sign bits and leading/trailing zeros, ++;; and find first bit. ++ ++(define_insn "clrsbsi2" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (clrsb:SI (match_operand:SI 1 "register_operand" "r")))] ++ "TARGET_NSA" ++ "nsa\t%0, %1" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "3")]) + + (define_insn "clzsi2" + [(set (match_operand:SI 0 "register_operand" "=a") +diff --git a/libgcc/config/xtensa/lib1funcs.S b/libgcc/config/xtensa/lib1funcs.S +index b19deae14..ad9072c40 100644 +--- a/libgcc/config/xtensa/lib1funcs.S ++++ b/libgcc/config/xtensa/lib1funcs.S +@@ -456,6 +456,29 @@ __nsau_data: + #endif /* L_clz */ + + ++#ifdef L_clrsbsi2 ++ .align 4 ++ .global __clrsbsi2 ++ .type __clrsbsi2, @function ++__clrsbsi2: ++ leaf_entry sp, 16 ++#if XCHAL_HAVE_NSA ++ nsa a2, a2 ++#else ++ srai a3, a2, 31 ++ xor a3, a3, a2 ++ movi a2, 31 ++ beqz a3, .Lreturn ++ do_nsau a2, a3, a4, a5 ++ addi a2, a2, -1 ++.Lreturn: ++#endif ++ leaf_return ++ .size __clrsbsi2, . - __clrsbsi2 ++ ++#endif /* L_clrsbsi2 */ ++ ++ + #ifdef L_clzsi2 + .align 4 + .global __clzsi2 +diff --git a/libgcc/config/xtensa/t-xtensa b/libgcc/config/xtensa/t-xtensa +index 9836c96ae..084618b38 100644 +--- a/libgcc/config/xtensa/t-xtensa ++++ b/libgcc/config/xtensa/t-xtensa +@@ -1,6 +1,6 @@ + LIB1ASMSRC = xtensa/lib1funcs.S + LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \ +- _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \ ++ _umulsidi3 _clz _clrsbsi2 _clzsi2 _ctzsi2 _ffssi2 \ + _ashldi3 _ashrdi3 _lshrdi3 \ + _bswapsi2 _bswapdi2 \ + _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \ +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0020-Tweak-some-widen-multiplications.patch b/patches/gcc10.2/gcc-xtensa-0020-Tweak-some-widen-multiplications.patch new file mode 100644 index 0000000..8de8a89 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0020-Tweak-some-widen-multiplications.patch @@ -0,0 +1,110 @@ +From 1ba9369255749ccf9ec82565a192b1a523b0e374 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 10 Jun 2022 13:17:40 +0900 +Subject: [PATCH 15/31] xtensa: Tweak some widen multiplications + +umulsidi3 is faster than umuldi3 even if library call, and is also +prerequisite for fast constant division by multiplication. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (mulsidi3, umulsidi3): + Split into individual signedness, in order to use libcall + "__umulsidi3" but not the other. + (mulhisi3): Merge into one by using code iterator. + (mulsidi3, mulhisi3, umulhisi3): Remove. +--- + gcc/config/xtensa/xtensa.md | 56 +++++++++++++++++++++---------------- + 1 file changed, 32 insertions(+), 24 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 6c76fb942..3314b3fd6 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -224,20 +224,42 @@ + + ;; Multiplication. + +-(define_expand "mulsidi3" ++(define_expand "mulsidi3" + [(set (match_operand:DI 0 "register_operand") +- (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand")) +- (any_extend:DI (match_operand:SI 2 "register_operand"))))] ++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand")) ++ (sign_extend:DI (match_operand:SI 2 "register_operand"))))] + "TARGET_MUL32_HIGH" + { + rtx temp = gen_reg_rtx (SImode); + emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); +- emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), +- operands[1], operands[2])); ++ emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), ++ operands[1], operands[2])); + emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); + DONE; + }) + ++(define_expand "umulsidi3" ++ [(set (match_operand:DI 0 "register_operand") ++ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand")) ++ (zero_extend:DI (match_operand:SI 2 "register_operand"))))] ++ "" ++{ ++ if (TARGET_MUL32_HIGH) ++ { ++ rtx temp = gen_reg_rtx (SImode); ++ emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); ++ emit_insn (gen_umulsi3_highpart (gen_highpart (SImode, operands[0]), ++ operands[1], operands[2])); ++ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); ++ } ++ else ++ emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "__umulsidi3"), ++ operands[0], LCT_NORMAL, DImode, ++ operands[1], SImode, ++ operands[2], SImode); ++ DONE; ++}) ++ + (define_insn "mulsi3_highpart" + [(set (match_operand:SI 0 "register_operand" "=a") + (truncate:SI +@@ -261,30 +283,16 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +-(define_insn "mulhisi3" +- [(set (match_operand:SI 0 "register_operand" "=C,A") +- (mult:SI (sign_extend:SI +- (match_operand:HI 1 "register_operand" "%r,r")) +- (sign_extend:SI +- (match_operand:HI 2 "register_operand" "r,r"))))] +- "TARGET_MUL16 || TARGET_MAC16" +- "@ +- mul16s\t%0, %1, %2 +- mul.aa.ll\t%1, %2" +- [(set_attr "type" "mul16,mac16") +- (set_attr "mode" "SI") +- (set_attr "length" "3,3")]) +- +-(define_insn "umulhisi3" ++(define_insn "mulhisi3" + [(set (match_operand:SI 0 "register_operand" "=C,A") +- (mult:SI (zero_extend:SI ++ (mult:SI (any_extend:SI + (match_operand:HI 1 "register_operand" "%r,r")) +- (zero_extend:SI ++ (any_extend:SI + (match_operand:HI 2 "register_operand" "r,r"))))] + "TARGET_MUL16 || TARGET_MAC16" + "@ +- mul16u\t%0, %1, %2 +- umul.aa.ll\t%1, %2" ++ mul16\t%0, %1, %2 ++ mul.aa.ll\t%1, %2" + [(set_attr "type" "mul16,mac16") + (set_attr "mode" "SI") + (set_attr "length" "3,3")]) +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0021-Consider-the-Loop-Option-when-setmemsi-is-exp.patch b/patches/gcc10.2/gcc-xtensa-0021-Consider-the-Loop-Option-when-setmemsi-is-exp.patch new file mode 100644 index 0000000..491da47 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0021-Consider-the-Loop-Option-when-setmemsi-is-exp.patch @@ -0,0 +1,125 @@ +From bc108c84544d5a0e6289628e8749a92c9695f006 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 10 Jun 2022 13:18:24 +0900 +Subject: [PATCH 16/31] xtensa: Consider the Loop Option when setmemsi is + expanded to small loop + +Now apply to almost any size of aligned block under such circumstances. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_expand_block_set_small_loop): + Pass through the block length / loop count conditions if + zero-overhead looping is configured and active, +--- + gcc/config/xtensa/xtensa.c | 71 +++++++++++++++++++++++++++----------- + 1 file changed, 50 insertions(+), 21 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index a6d76a953..e2f97b79c 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1473,7 +1473,7 @@ xtensa_expand_block_set_unrolled_loop (rtx *operands) + int + xtensa_expand_block_set_small_loop (rtx *operands) + { +- HOST_WIDE_INT bytes, value, align; ++ HOST_WIDE_INT bytes, value, align, count; + int expand_len, funccall_len; + rtx x, dst, end, reg; + machine_mode unit_mode; +@@ -1493,17 +1493,25 @@ xtensa_expand_block_set_small_loop (rtx *operands) + /* Totally-aligned block only. */ + if (bytes % align != 0) + return 0; ++ count = bytes / align; + +- /* If 4-byte aligned, small loop substitution is almost optimal, thus +- limited to only offset to the end address for ADDI/ADDMI instruction. */ +- if (align == 4 +- && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) +- return 0; ++ /* If the Loop Option (zero-overhead looping) is configured and active, ++ almost no restrictions about the length of the block. */ ++ if (! (TARGET_LOOPS && optimize)) ++ { ++ /* If 4-byte aligned, small loop substitution is almost optimal, ++ thus limited to only offset to the end address for ADDI/ADDMI ++ instruction. */ ++ if (align == 4 ++ && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) ++ return 0; + +- /* If no 4-byte aligned, loop count should be treated as the constraint. */ +- if (align != 4 +- && bytes / align > ((optimize > 1 && !optimize_size) ? 8 : 15)) +- return 0; ++ /* If no 4-byte aligned, loop count should be treated as the ++ constraint. */ ++ if (align != 4 ++ && count > ((optimize > 1 && !optimize_size) ? 8 : 15)) ++ return 0; ++ } + + /* Insn expansion: holding the init value. + Either MOV(.N) or L32R w/litpool. */ +@@ -1513,16 +1521,33 @@ xtensa_expand_block_set_small_loop (rtx *operands) + expand_len = TARGET_DENSITY ? 2 : 3; + else + expand_len = 3 + 4; +- /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ +- expand_len += bytes > 127 ? 3 +- : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; +- +- /* Insn expansion: the loop body and branch instruction. +- For store, one of S8I, S16I or S32I(.N). +- For advance, ADDI(.N). +- For branch, BNE. */ +- expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) +- + (TARGET_DENSITY ? 2 : 3) + 3; ++ if (TARGET_LOOPS && optimize) /* zero-overhead looping */ ++ { ++ /* Insn translation: Either MOV(.N) or L32R w/litpool for the ++ loop count. */ ++ expand_len += xtensa_simm12b (count) ? xtensa_sizeof_MOVI (count) ++ : 3 + 4; ++ /* Insn translation: LOOP, the zero-overhead looping setup ++ instruction. */ ++ expand_len += 3; ++ /* Insn expansion: the loop body instructions. ++ For store, one of S8I, S16I or S32I(.N). ++ For advance, ADDI(.N). */ ++ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) ++ + (TARGET_DENSITY ? 2 : 3); ++ } ++ else /* NO zero-overhead looping */ ++ { ++ /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ ++ expand_len += bytes > 127 ? 3 ++ : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; ++ /* Insn expansion: the loop body and branch instruction. ++ For store, one of S8I, S16I or S32I(.N). ++ For advance, ADDI(.N). ++ For branch, BNE. */ ++ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) ++ + (TARGET_DENSITY ? 2 : 3) + 3; ++ } + + /* Function call: preparing two arguments. */ + funccall_len = xtensa_sizeof_MOVI (value); +@@ -1545,7 +1570,11 @@ xtensa_expand_block_set_small_loop (rtx *operands) + dst = gen_reg_rtx (SImode); + emit_move_insn (dst, x); + end = gen_reg_rtx (SImode); +- emit_insn (gen_addsi3 (end, dst, operands[1] /* the length */)); ++ if (TARGET_LOOPS && optimize) ++ x = force_reg (SImode, operands[1] /* the length */); ++ else ++ x = operands[1]; ++ emit_insn (gen_addsi3 (end, dst, x)); + switch (align) + { + case 1: +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0022-Improve-instruction-cost-estimation-and-sugge.patch b/patches/gcc10.2/gcc-xtensa-0022-Improve-instruction-cost-estimation-and-sugge.patch new file mode 100644 index 0000000..5792a6f --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0022-Improve-instruction-cost-estimation-and-sugge.patch @@ -0,0 +1,325 @@ +From de854e2348b8159bc389471e68023986c8878c92 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 10 Jun 2022 13:19:32 +0900 +Subject: [PATCH 17/31] xtensa: Improve instruction cost estimation and + suggestion + +This patch implements a new target-specific relative RTL insn cost function +because of suboptimal cost estimation by default, and fixes several "length" +insn attributes (related to the cost estimation). + +And also introduces a new machine-dependent option "-mextra-l32r-costs=" +that tells implementation-specific InstRAM/ROM access penalty for L32R +instruction to the compiler (in clock-cycle units, 0 by default). + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_rtx_costs): Correct wrong case + for ABS and NEG, add missing case for BSWAP and CLRSB, and + double the costs for integer divisions using libfuncs if + optimizing for speed, in order to take advantage of fast constant + division by multiplication. + (TARGET_INSN_COST): New macro definition. + (xtensa_is_insn_L32R_p, xtensa_insn_cost): New functions for + calculating relative costs of a RTL insns, for both of speed and + size. + * config/xtensa/xtensa.md (return, nop, trap): Correct values of + the attribute "length" that depends on TARGET_DENSITY. + (define_asm_attributes, blockage, frame_blockage): Add missing + attributes. + * config/xtensa/xtensa.opt (-mextra-l32r-costs=): New machine- + dependent option, however, preparatory work for now. +--- + gcc/config/xtensa/xtensa.c | 116 ++++++++++++++++++++++++++++++++--- + gcc/config/xtensa/xtensa.md | 29 ++++++--- + gcc/config/xtensa/xtensa.opt | 4 ++ + 3 files changed, 134 insertions(+), 15 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index e2f97b79c..94ff901c5 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -55,6 +55,7 @@ along with GCC; see the file COPYING3. If not see + #include "dumpfile.h" + #include "hw-doloop.h" + #include "rtl-iter.h" ++#include "insn-attr.h" + + /* This file should be included last. */ + #include "target-def.h" +@@ -134,6 +135,7 @@ static unsigned int xtensa_multibss_section_type_flags (tree, const char *, + static section *xtensa_select_rtx_section (machine_mode, rtx, + unsigned HOST_WIDE_INT); + static bool xtensa_rtx_costs (rtx, machine_mode, int, int, int *, bool); ++static int xtensa_insn_cost (rtx_insn *, bool); + static int xtensa_register_move_cost (machine_mode, reg_class_t, + reg_class_t); + static int xtensa_memory_move_cost (machine_mode, reg_class_t, bool); +@@ -208,6 +210,8 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); + #define TARGET_MEMORY_MOVE_COST xtensa_memory_move_cost + #undef TARGET_RTX_COSTS + #define TARGET_RTX_COSTS xtensa_rtx_costs ++#undef TARGET_INSN_COST ++#define TARGET_INSN_COST xtensa_insn_cost + #undef TARGET_ADDRESS_COST + #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 + +@@ -3972,7 +3976,7 @@ xtensa_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, + static bool + xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + int opno ATTRIBUTE_UNUSED, +- int *total, bool speed ATTRIBUTE_UNUSED) ++ int *total, bool speed) + { + int code = GET_CODE (x); + +@@ -4060,9 +4064,14 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + return true; + + case CLZ: ++ case CLRSB: + *total = COSTS_N_INSNS (TARGET_NSA ? 1 : 50); + return true; + ++ case BSWAP: ++ *total = COSTS_N_INSNS (mode == HImode ? 3 : 5); ++ return true; ++ + case NOT: + *total = COSTS_N_INSNS (mode == DImode ? 3 : 2); + return true; +@@ -4086,13 +4095,16 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + return true; + + case ABS: ++ case NEG: + { + if (mode == SFmode) + *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50); + else if (mode == DFmode) + *total = COSTS_N_INSNS (50); +- else ++ else if (mode == DImode) + *total = COSTS_N_INSNS (4); ++ else ++ *total = COSTS_N_INSNS (1); + return true; + } + +@@ -4108,10 +4120,6 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + return true; + } + +- case NEG: +- *total = COSTS_N_INSNS (mode == DImode ? 4 : 2); +- return true; +- + case MULT: + { + if (mode == SFmode) +@@ -4151,11 +4159,11 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + case UMOD: + { + if (mode == DImode) +- *total = COSTS_N_INSNS (50); ++ *total = COSTS_N_INSNS (speed ? 100 : 50); + else if (TARGET_DIV32) + *total = COSTS_N_INSNS (32); + else +- *total = COSTS_N_INSNS (50); ++ *total = COSTS_N_INSNS (speed ? 100 : 50); + return true; + } + +@@ -4188,6 +4196,98 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + } + } + ++static bool ++xtensa_is_insn_L32R_p(const rtx_insn *insn) ++{ ++ rtx x = PATTERN (insn); ++ ++ if (GET_CODE (x) == SET) ++ { ++ x = XEXP (x, 1); ++ if (GET_CODE (x) == MEM) ++ { ++ x = XEXP (x, 0); ++ return (GET_CODE (x) == SYMBOL_REF || CONST_INT_P (x)) ++ && CONSTANT_POOL_ADDRESS_P (x); ++ } ++ } ++ ++ return false; ++} ++ ++/* Compute a relative costs of RTL insns. This is necessary in order to ++ achieve better RTL insn splitting/combination result. */ ++ ++static int ++xtensa_insn_cost (rtx_insn *insn, bool speed) ++{ ++ if (!(recog_memoized (insn) < 0)) ++ { ++ int len = get_attr_length (insn), n = (len + 2) / 3; ++ ++ if (len == 0) ++ return COSTS_N_INSNS (0); ++ ++ if (speed) /* For speed cost. */ ++ { ++ /* "L32R" may be particular slow (implementation-dependent). */ ++ if (xtensa_is_insn_L32R_p (insn)) ++ return COSTS_N_INSNS (1 + xtensa_extra_l32r_costs); ++ ++ /* Cost based on the pipeline model. */ ++ switch (get_attr_type (insn)) ++ { ++ case TYPE_STORE: ++ case TYPE_MOVE: ++ case TYPE_ARITH: ++ case TYPE_MULTI: ++ case TYPE_NOP: ++ case TYPE_FSTORE: ++ return COSTS_N_INSNS (n); ++ ++ case TYPE_LOAD: ++ return COSTS_N_INSNS (n - 1 + 2); ++ ++ case TYPE_JUMP: ++ case TYPE_CALL: ++ return COSTS_N_INSNS (n - 1 + 3); ++ ++ case TYPE_FCONV: ++ case TYPE_FLOAD: ++ case TYPE_MUL16: ++ case TYPE_MUL32: ++ case TYPE_RSR: ++ return COSTS_N_INSNS (n * 2); ++ ++ case TYPE_FMADD: ++ return COSTS_N_INSNS (n * 4); ++ ++ case TYPE_DIV32: ++ return COSTS_N_INSNS (n * 16); ++ ++ default: ++ break; ++ } ++ } ++ else /* For size cost. */ ++ { ++ /* Cost based on the instruction length. */ ++ if (get_attr_type (insn) != TYPE_UNKNOWN) ++ { ++ /* "L32R" itself plus constant in litpool. */ ++ if (xtensa_is_insn_L32R_p (insn)) ++ return COSTS_N_INSNS (2) + 1; ++ ++ /* Consider ".n" short instructions. */ ++ return COSTS_N_INSNS (n) - (n * 3 - len); ++ } ++ } ++ } ++ ++ /* Fall back. */ ++ return pattern_cost (PATTERN (insn), speed); ++} ++ + /* Worker function for TARGET_RETURN_IN_MEMORY. */ + + static bool +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 3314b3fd6..da6b71d1d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -98,7 +98,10 @@ + + ;; Describe a user's asm statement. + (define_asm_attributes +- [(set_attr "type" "multi")]) ++ [(set_attr "type" "multi") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ;; Should be the maximum possible length ++ ;; of a single machine instruction. + + + ;; Pipeline model. +@@ -1884,7 +1887,10 @@ + } + [(set_attr "type" "jump") + (set_attr "mode" "none") +- (set_attr "length" "2")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 2) ++ (const_int 3)))]) + + + ;; Miscellaneous instructions. +@@ -1939,7 +1945,10 @@ + } + [(set_attr "type" "nop") + (set_attr "mode" "none") +- (set_attr "length" "3")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 2) ++ (const_int 3)))]) + + (define_expand "nonlocal_goto" + [(match_operand:SI 0 "general_operand" "") +@@ -2003,8 +2012,9 @@ + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] + "" + "" +- [(set_attr "length" "0") +- (set_attr "type" "nop")]) ++ [(set_attr "type" "nop") ++ (set_attr "mode" "none") ++ (set_attr "length" "0")]) + + ;; Do not schedule instructions accessing memory before this point. + +@@ -2023,7 +2033,9 @@ + (unspec:BLK [(match_operand:SI 1 "" "")] UNSPEC_FRAME_BLOCKAGE))] + "" + "" +- [(set_attr "length" "0")]) ++ [(set_attr "type" "nop") ++ (set_attr "mode" "none") ++ (set_attr "length" "0")]) + + (define_insn "trap" + [(trap_if (const_int 1) (const_int 0))] +@@ -2036,7 +2048,10 @@ + } + [(set_attr "type" "trap") + (set_attr "mode" "none") +- (set_attr "length" "3")]) ++ (set (attr "length") ++ (if_then_else (match_test "!TARGET_DEBUG && TARGET_DENSITY") ++ (const_int 2) ++ (const_int 3)))]) + + ;; Setting up a frame pointer is tricky for Xtensa because GCC doesn't + ;; know if a frame pointer is required until the reload pass, and +diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt +index e1d992f5d..97aa44f92 100644 +--- a/gcc/config/xtensa/xtensa.opt ++++ b/gcc/config/xtensa/xtensa.opt +@@ -30,6 +30,10 @@ mlongcalls + Target Mask(LONGCALLS) + Use indirect CALLXn instructions for large programs. + ++mextra-l32r-costs= ++Target RejectNegative Joined UInteger Var(xtensa_extra_l32r_costs) Init(0) ++Set extra memory access cost for L32R instruction, in clock-cycle units. ++ + mtarget-align + Target + Automatically align branch targets to reduce branch penalties. +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0023-Improve-constant-synthesis-for-both-integer-a.patch b/patches/gcc10.2/gcc-xtensa-0023-Improve-constant-synthesis-for-both-integer-a.patch new file mode 100644 index 0000000..0e14673 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0023-Improve-constant-synthesis-for-both-integer-a.patch @@ -0,0 +1,400 @@ +From ed2c4b57807470b386e9abdf145282e197d9da65 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sat, 11 Jun 2022 00:26:17 +0900 +Subject: [PATCH 18/31] xtensa: Improve constant synthesis for both integer and + floating-point + +This patch revises the previous implementation of constant synthesis. + +First, changed to use define_split machine description pattern and to run +after reload pass, in order not to interfere some optimizations such as +the loop invariant motion. + +Second, not only integer but floating-point is subject to processing. + +Third, several new synthesis patterns - when the constant cannot fit into +a "MOVI Ax, simm12" instruction, but: + +I. can be represented as a power of two minus one (eg. 32767, 65535 or + 0x7fffffffUL) + => "MOVI(.N) Ax, -1" + "SRLI Ax, Ax, 1 ... 31" (or "EXTUI") +II. is between -34816 and 34559 + => "MOVI(.N) Ax, -2048 ... 2047" + "ADDMI Ax, Ax, -32768 ... 32512" +III. (existing case) can fit into a signed 12-bit if the trailing zero bits + are stripped + => "MOVI(.N) Ax, -2048 ... 2047" + "SLLI Ax, Ax, 1 ... 31" + +The above sequences consist of 5 or 6 bytes and have latency of 2 clock cycles, +in contrast with "L32R Ax, " (3 bytes and one clock latency, but may +suffer additional one clock pipeline stall and implementation-specific +InstRAM/ROM access penalty) plus 4 bytes of constant value. + +In addition, 3-instructions synthesis patterns (8 or 9 bytes, 3 clock latency) +are also provided when optimizing for speed and L32R instruction has +considerable access penalty: + +IV. 2-instructions synthesis (any of I ... III) followed by + "SLLI Ax, Ax, 1 ... 31" +V. 2-instructions synthesis followed by either "ADDX[248] Ax, Ax, Ax" + or "SUBX8 Ax, Ax, Ax" (multiplying by 3, 5, 7 or 9) + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h (xtensa_constantsynth): + New prototype. + * config/xtensa/xtensa.c (xtensa_emit_constantsynth, + xtensa_constantsynth_2insn, xtensa_constantsynth_rtx_SLLI, + xtensa_constantsynth_rtx_ADDSUBX, xtensa_constantsynth): + New backend functions that process the abovementioned logic. + (xtensa_emit_move_sequence): Revert the previous changes. + * config/xtensa/xtensa.md: New split patterns for integer + and floating-point, as the frontend part. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/constsynth_2insns.c: New. + * gcc.target/xtensa/constsynth_3insns.c: Ditto. + * gcc.target/xtensa/constsynth_double.c: Ditto. +--- + gcc/config/xtensa/xtensa-protos.h | 1 + + gcc/config/xtensa/xtensa.c | 133 +++++++++++++++--- + gcc/config/xtensa/xtensa.md | 50 +++++++ + .../gcc.target/xtensa/constsynth_2insns.c | 44 ++++++ + .../gcc.target/xtensa/constsynth_3insns.c | 24 ++++ + .../gcc.target/xtensa/constsynth_double.c | 11 ++ + 6 files changed, 247 insertions(+), 16 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_double.c + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 80b1da2bb..d65bc2954 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -44,6 +44,7 @@ extern int xtensa_expand_block_move (rtx *); + extern int xtensa_expand_block_set_unrolled_loop (rtx *); + extern int xtensa_expand_block_set_small_loop (rtx *); + extern void xtensa_split_operand_pair (rtx *, machine_mode); ++extern int xtensa_constantsynth (rtx, HOST_WIDE_INT); + extern int xtensa_emit_move_sequence (rtx *, machine_mode); + extern rtx xtensa_copy_incoming_a7 (rtx); + extern void xtensa_expand_nonlocal_goto (rtx *); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 94ff901c5..ba36d7244 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1027,6 +1027,123 @@ xtensa_split_operand_pair (rtx operands[4], machine_mode mode) + } + + ++/* Try to emit insns to load srcval (that cannot fit into signed 12-bit) ++ into dst with synthesizing a such constant value from a sequence of ++ load-immediate / arithmetic ones, instead of a L32R instruction ++ (plus a constant in litpool). */ ++ ++static void ++xtensa_emit_constantsynth (rtx dst, enum rtx_code code, ++ HOST_WIDE_INT imm0, HOST_WIDE_INT imm1, ++ rtx (*gen_op)(rtx, HOST_WIDE_INT), ++ HOST_WIDE_INT imm2) ++{ ++ gcc_assert (REG_P (dst)); ++ emit_move_insn (dst, GEN_INT (imm0)); ++ emit_move_insn (dst, gen_rtx_fmt_ee (code, SImode, ++ dst, GEN_INT (imm1))); ++ if (gen_op) ++ emit_move_insn (dst, gen_op (dst, imm2)); ++} ++ ++static int ++xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, ++ rtx (*gen_op)(rtx, HOST_WIDE_INT), ++ HOST_WIDE_INT op_imm) ++{ ++ int shift = exact_log2 (srcval + 1); ++ ++ if (IN_RANGE (shift, 1, 31)) ++ { ++ xtensa_emit_constantsynth (dst, LSHIFTRT, -1, 32 - shift, ++ gen_op, op_imm); ++ return 1; ++ } ++ ++ if (IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) ++ { ++ HOST_WIDE_INT imm0, imm1; ++ ++ if (srcval < -32768) ++ imm1 = -32768; ++ else if (srcval > 32512) ++ imm1 = 32512; ++ else ++ imm1 = srcval & ~255; ++ imm0 = srcval - imm1; ++ if (TARGET_DENSITY && imm1 < 32512 && IN_RANGE (imm0, 224, 255)) ++ imm0 -= 256, imm1 += 256; ++ xtensa_emit_constantsynth (dst, PLUS, imm0, imm1, gen_op, op_imm); ++ return 1; ++ } ++ ++ shift = ctz_hwi (srcval); ++ if (xtensa_simm12b (srcval >> shift)) ++ { ++ xtensa_emit_constantsynth (dst, ASHIFT, srcval >> shift, shift, ++ gen_op, op_imm); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static rtx ++xtensa_constantsynth_rtx_SLLI (rtx reg, HOST_WIDE_INT imm) ++{ ++ return gen_rtx_ASHIFT (SImode, reg, GEN_INT (imm)); ++} ++ ++static rtx ++xtensa_constantsynth_rtx_ADDSUBX (rtx reg, HOST_WIDE_INT imm) ++{ ++ return imm == 7 ++ ? gen_rtx_MINUS (SImode, gen_rtx_ASHIFT (SImode, reg, GEN_INT (3)), ++ reg) ++ : gen_rtx_PLUS (SImode, gen_rtx_ASHIFT (SImode, reg, ++ GEN_INT (floor_log2 (imm - 1))), ++ reg); ++} ++ ++int ++xtensa_constantsynth (rtx dst, HOST_WIDE_INT srcval) ++{ ++ /* No need for synthesizing for what fits into MOVI instruction. */ ++ if (xtensa_simm12b (srcval)) ++ return 0; ++ ++ /* 2-insns substitution. */ ++ if ((optimize_size || (optimize && xtensa_extra_l32r_costs >= 1)) ++ && xtensa_constantsynth_2insn (dst, srcval, NULL, 0)) ++ return 1; ++ ++ /* 3-insns substitution. */ ++ if (optimize > 1 && !optimize_size && xtensa_extra_l32r_costs >= 2) ++ { ++ int shift, divisor; ++ ++ /* 2-insns substitution followed by SLLI. */ ++ shift = ctz_hwi (srcval); ++ if (IN_RANGE (shift, 1, 31) && ++ xtensa_constantsynth_2insn (dst, srcval >> shift, ++ xtensa_constantsynth_rtx_SLLI, ++ shift)) ++ return 1; ++ ++ /* 2-insns substitution followed by ADDX[248] or SUBX8. */ ++ if (TARGET_ADDX) ++ for (divisor = 3; divisor <= 9; divisor += 2) ++ if (srcval % divisor == 0 && ++ xtensa_constantsynth_2insn (dst, srcval / divisor, ++ xtensa_constantsynth_rtx_ADDSUBX, ++ divisor)) ++ return 1; ++ } ++ ++ return 0; ++} ++ ++ + /* Emit insns to move operands[1] into operands[0]. + Return 1 if we have written out everything that needs to be done to + do the move. Otherwise, return 0 and the caller will emit the move +@@ -1064,22 +1181,6 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) + + if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16) + { +- /* Try to emit MOVI + SLLI sequence, that is smaller +- than L32R + literal. */ +- if (optimize_size && mode == SImode && CONST_INT_P (src) +- && register_operand (dst, mode)) +- { +- HOST_WIDE_INT srcval = INTVAL (src); +- int shift = ctz_hwi (srcval); +- +- if (xtensa_simm12b (srcval >> shift)) +- { +- emit_move_insn (dst, GEN_INT (srcval >> shift)); +- emit_insn (gen_ashlsi3_internal (dst, dst, GEN_INT (shift))); +- return 1; +- } +- } +- + src = force_const_mem (SImode, src); + operands[1] = src; + } +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index da6b71d1d..ddc3087fa 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -940,6 +940,19 @@ + (set_attr "mode" "SI") + (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")]) + ++(define_split ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operand:SI 1 "constantpool_operand"))] ++ "! optimize_debug && reload_completed" ++ [(const_int 0)] ++{ ++ rtx x = avoid_constant_pool_reference (operands[1]); ++ if (! CONST_INT_P (x)) ++ FAIL; ++ if (! xtensa_constantsynth (operands[0], INTVAL (x))) ++ emit_move_insn (operands[0], x); ++}) ++ + ;; 16-bit Integer moves + + (define_expand "movhi" +@@ -1144,6 +1157,43 @@ + (set_attr "mode" "SF") + (set_attr "length" "3")]) + ++(define_split ++ [(set (match_operand:SF 0 "register_operand") ++ (match_operand:SF 1 "constantpool_operand"))] ++ "! optimize_debug && reload_completed" ++ [(const_int 0)] ++{ ++ int i = 0; ++ rtx x = XEXP (operands[1], 0); ++ long l[2]; ++ if (GET_CODE (x) == SYMBOL_REF ++ && CONSTANT_POOL_ADDRESS_P (x)) ++ x = get_pool_constant (x); ++ else if (GET_CODE (x) == CONST) ++ { ++ x = XEXP (x, 0); ++ gcc_assert (GET_CODE (x) == PLUS ++ && GET_CODE (XEXP (x, 0)) == SYMBOL_REF ++ && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) ++ && CONST_INT_P (XEXP (x, 1))); ++ i = INTVAL (XEXP (x, 1)); ++ gcc_assert (i == 0 || i == 4); ++ i /= 4; ++ x = get_pool_constant (XEXP (x, 0)); ++ } ++ else ++ gcc_unreachable (); ++ if (GET_MODE (x) == SFmode) ++ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l[0]); ++ else if (GET_MODE (x) == DFmode) ++ REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); ++ else ++ FAIL; ++ x = gen_rtx_REG (SImode, REGNO (operands[0])); ++ if (! xtensa_constantsynth (x, l[i])) ++ emit_move_insn (x, GEN_INT (l[i])); ++}) ++ + ;; 64-bit floating point moves + + (define_expand "movdf" +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c +new file mode 100644 +index 000000000..43c85a250 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c +@@ -0,0 +1,44 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Os" } */ ++ ++int test_0(void) ++{ ++ return 4095; ++} ++ ++int test_1(void) ++{ ++ return 2147483647; ++} ++ ++int test_2(void) ++{ ++ return -34816; ++} ++ ++int test_3(void) ++{ ++ return -2049; ++} ++ ++int test_4(void) ++{ ++ return 2048; ++} ++ ++int test_5(void) ++{ ++ return 34559; ++} ++ ++int test_6(void) ++{ ++ return 43680; ++} ++ ++void test_7(int *p) ++{ ++ *p = -1432354816; ++} ++ ++/* { dg-final { scan-assembler-not "l32r" } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c +new file mode 100644 +index 000000000..f3c4a1c7c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mextra-l32r-costs=3" } */ ++ ++int test_0(void) ++{ ++ return 134217216; ++} ++ ++int test_1(void) ++{ ++ return -27604992; ++} ++ ++int test_2(void) ++{ ++ return -162279; ++} ++ ++void test_3(int *p) ++{ ++ *p = 192437; ++} ++ ++/* { dg-final { scan-assembler-not "l32r" } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +new file mode 100644 +index 000000000..890ca5047 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Os" } */ ++ ++void test(unsigned int count, double array[]) ++{ ++ unsigned int i; ++ for (i = 0; i < count; ++i) ++ array[i] = 1.0; ++} ++ ++/* { dg-final { scan-assembler-not "l32r" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0024-Improve-shift-operations-more.patch b/patches/gcc10.2/gcc-xtensa-0024-Improve-shift-operations-more.patch new file mode 100644 index 0000000..9c44b89 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0024-Improve-shift-operations-more.patch @@ -0,0 +1,383 @@ +From fd3771fcc13b8712c91cec70f4533760f72b54e1 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 01:38:31 +0900 +Subject: [PATCH 19/31] xtensa: Improve shift operations more + +This patch introduces funnel shifter utilization, and rearranges existing +"per-byte shift" insn patterns. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (logical_shift_operator, + xtensa_shift_per_byte_operator): New predicates. + * config/xtensa/xtensa-protos.h (xtensa_shlrd_which_direction): + New prototype. + * config/xtensa/xtensa.c (xtensa_shlrd_which_direction): + New helper function for funnel shift patterns. + * config/xtensa/xtensa.md (ior_op): New code iterator. + (*ashlsi3_1): Replace with new split pattern. + (*shift_per_byte): Unify *ashlsi3_3x, *ashrsi3_3x and *lshrsi3_3x. + (*shift_per_byte_omit_AND_0, *shift_per_byte_omit_AND_1): + New insn-and-split patterns that redirect to *xtensa_shift_per_byte, + in order to omit unnecessary bitwise AND operation. + (*shlrd_reg_, *shlrd_const_, *shlrd_per_byte_, + *shlrd_per_byte__omit_AND): + New insn patterns for funnel shifts. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/funnel_shifter.c: New. +--- + gcc/config/xtensa/predicates.md | 6 + + gcc/config/xtensa/xtensa-protos.h | 1 + + gcc/config/xtensa/xtensa.c | 14 ++ + gcc/config/xtensa/xtensa.md | 213 ++++++++++++++---- + .../gcc.target/xtensa/funnel_shifter.c | 17 ++ + 5 files changed, 213 insertions(+), 38 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/funnel_shifter.c + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index 91b9343a2..e7836f0ec 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -164,9 +164,15 @@ + (define_predicate "boolean_operator" + (match_code "eq,ne")) + ++(define_predicate "logical_shift_operator" ++ (match_code "ashift,lshiftrt")) ++ + (define_predicate "xtensa_cstoresi_operator" + (match_code "eq,ne,gt,ge,lt,le")) + ++(define_predicate "xtensa_shift_per_byte_operator" ++ (match_code "ashift,ashiftrt,lshiftrt")) ++ + (define_predicate "tls_symbol_operand" + (and (match_code "symbol_ref") + (match_test "SYMBOL_REF_TLS_MODEL (op) != 0"))) +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index d65bc2954..32743bc67 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -56,6 +56,7 @@ extern char *xtensa_emit_bit_branch (bool, bool, rtx *); + extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); + extern char *xtensa_emit_call (int, rtx *); + extern bool xtensa_tls_referenced_p (rtx); ++extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); + + #ifdef TREE_CODE + extern void init_cumulative_args (CUMULATIVE_ARGS *, int); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index ba36d7244..473cfaf9d 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -2394,6 +2394,20 @@ xtensa_tls_referenced_p (rtx x) + } + + ++/* Helper function for "*shlrd_..." patterns. */ ++ ++enum rtx_code ++xtensa_shlrd_which_direction (rtx op0, rtx op1) ++{ ++ if (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT) ++ return ASHIFT; /* shld */ ++ if (GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT) ++ return LSHIFTRT; /* shrd */ ++ ++ return UNKNOWN; ++} ++ ++ + /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ + + static bool +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index ddc3087fa..58bba89af 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -83,6 +83,9 @@ + ;; the same template. + (define_mode_iterator HQI [HI QI]) + ++;; This code iterator is for *shlrd and its variants. ++(define_code_iterator ior_op [ior plus]) ++ + + ;; Attributes. + +@@ -1272,16 +1275,6 @@ + operands[1] = xtensa_copy_incoming_a7 (operands[1]); + }) + +-(define_insn "*ashlsi3_1" +- [(set (match_operand:SI 0 "register_operand" "=a") +- (ashift:SI (match_operand:SI 1 "register_operand" "r") +- (const_int 1)))] +- "TARGET_DENSITY" +- "add.n\t%0, %1, %1" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "2")]) +- + (define_insn "ashlsi3_internal" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (ashift:SI (match_operand:SI 1 "register_operand" "r,r") +@@ -1294,16 +1287,14 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,6")]) + +-(define_insn "*ashlsi3_3x" +- [(set (match_operand:SI 0 "register_operand" "=a") +- (ashift:SI (match_operand:SI 1 "register_operand" "r") +- (ashift:SI (match_operand:SI 2 "register_operand" "r") +- (const_int 3))))] +- "" +- "ssa8b\t%2\;sll\t%0, %1" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "6")]) ++(define_split ++ [(set (match_operand:SI 0 "register_operand") ++ (ashift:SI (match_operand:SI 1 "register_operand") ++ (const_int 1)))] ++ "TARGET_DENSITY" ++ [(set (match_dup 0) ++ (plus:SI (match_dup 1) ++ (match_dup 1)))]) + + (define_insn "ashrsi3" + [(set (match_operand:SI 0 "register_operand" "=a,a") +@@ -1317,17 +1308,6 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,6")]) + +-(define_insn "*ashrsi3_3x" +- [(set (match_operand:SI 0 "register_operand" "=a") +- (ashiftrt:SI (match_operand:SI 1 "register_operand" "r") +- (ashift:SI (match_operand:SI 2 "register_operand" "r") +- (const_int 3))))] +- "" +- "ssa8l\t%2\;sra\t%0, %1" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "6")]) +- + (define_insn "lshrsi3" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r") +@@ -1337,9 +1317,9 @@ + if (which_alternative == 0) + { + if ((INTVAL (operands[2]) & 0x1f) < 16) +- return "srli\t%0, %1, %R2"; ++ return "srli\t%0, %1, %R2"; + else +- return "extui\t%0, %1, %R2, %L2"; ++ return "extui\t%0, %1, %R2, %L2"; + } + return "ssr\t%2\;srl\t%0, %1"; + } +@@ -1347,13 +1327,170 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,6")]) + +-(define_insn "*lshrsi3_3x" ++(define_insn "*shift_per_byte" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (match_operator:SI 3 "xtensa_shift_per_byte_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3))]))] ++ "!optimize_debug && optimize" ++{ ++ switch (GET_CODE (operands[3])) ++ { ++ case ASHIFT: return "ssa8b\t%2\;sll\t%0, %1"; ++ case ASHIFTRT: return "ssa8l\t%2\;sra\t%0, %1"; ++ case LSHIFTRT: return "ssa8l\t%2\;srl\t%0, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*shift_per_byte_omit_AND_0" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (match_operator:SI 4 "xtensa_shift_per_byte_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3)) ++ (match_operand:SI 3 "const_int_operand" "i"))]))] ++ "!optimize_debug && optimize ++ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (match_op_dup 4 ++ [(match_dup 1) ++ (ashift:SI (match_dup 2) ++ (const_int 3))]))] ++ "" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*shift_per_byte_omit_AND_1" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (match_operator:SI 4 "xtensa_shift_per_byte_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (neg:SI (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3)) ++ (match_operand:SI 3 "const_int_operand" "i")))]))] ++ "!optimize_debug && optimize ++ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 5) ++ (neg:SI (match_dup 2))) ++ (set (match_dup 0) ++ (match_op_dup 4 ++ [(match_dup 1) ++ (ashift:SI (match_dup 5) ++ (const_int 3))]))] ++{ ++ operands[5] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "9")]) ++ ++(define_insn "*shlrd_reg_" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")]) ++ (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 3 "register_operand" "r") ++ (neg:SI (match_dup 2))])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" ++{ ++ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) ++ { ++ case ASHIFT: return "ssl\t%2\;src\t%0, %1, %3"; ++ case LSHIFTRT: return "ssr\t%2\;src\t%0, %3, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn "*shlrd_const_" + [(set (match_operand:SI 0 "register_operand" "=a") +- (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") +- (ashift:SI (match_operand:SI 2 "register_operand" "r") +- (const_int 3))))] ++ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 3 "const_int_operand" "i")]) ++ (match_operator:SI 6 "logical_shift_operator" ++ [(match_operand:SI 2 "register_operand" "r") ++ (match_operand:SI 4 "const_int_operand" "i")])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN ++ && IN_RANGE (INTVAL (operands[3]), 1, 31) ++ && IN_RANGE (INTVAL (operands[4]), 1, 31) ++ && INTVAL (operands[3]) + INTVAL (operands[4]) == 32" ++{ ++ switch (xtensa_shlrd_which_direction (operands[5], operands[6])) ++ { ++ case ASHIFT: return "ssai\t%L3\;src\t%0, %1, %2"; ++ case LSHIFTRT: return "ssai\t%R3\;src\t%0, %2, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn "*shlrd_per_byte_" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3))]) ++ (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 3 "register_operand" "r") ++ (neg:SI (ashift:SI (match_dup 2) ++ (const_int 3)))])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" ++{ ++ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) ++ { ++ case ASHIFT: return "ssa8b\t%2\;src\t%0, %1, %3"; ++ case LSHIFTRT: return "ssa8l\t%2\;src\t%0, %3, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*shlrd_per_byte__omit_AND" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3)) ++ (match_operand:SI 4 "const_int_operand" "i"))]) ++ (match_operator:SI 6 "logical_shift_operator" ++ [(match_operand:SI 3 "register_operand" "r") ++ (neg:SI (and:SI (ashift:SI (match_dup 2) ++ (const_int 3)) ++ (match_dup 4)))])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN ++ && (INTVAL (operands[4]) & 0x1f) == 3 << 3" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (ior_op:SI (match_op_dup 5 ++ [(match_dup 1) ++ (ashift:SI (match_dup 2) ++ (const_int 3))]) ++ (match_op_dup 6 ++ [(match_dup 3) ++ (neg:SI (ashift:SI (match_dup 2) ++ (const_int 3)))])))] + "" +- "ssa8l\t%2\;srl\t%0, %1" + [(set_attr "type" "arith") + (set_attr "mode" "SI") + (set_attr "length" "6")]) +diff --git a/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c +new file mode 100644 +index 000000000..c8f987ccd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++unsigned int test_0(const void *addr) ++{ ++ unsigned int n = (unsigned int)addr; ++ const unsigned int *a = (const unsigned int*)(n & ~3); ++ n = (n & 3) * 8; ++ return (a[0] >> n) | (a[1] << (32 - n)); ++} ++ ++unsigned int test_1(unsigned int a, unsigned int b) ++{ ++ return (a >> 16) + (b << 16); ++} ++ ++/* { dg-final { scan-assembler-times "src" 2 } } */ +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0025-Simplify-conditional-branch-move-insn-pattern.patch b/patches/gcc10.2/gcc-xtensa-0025-Simplify-conditional-branch-move-insn-pattern.patch new file mode 100644 index 0000000..cdb96ff --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0025-Simplify-conditional-branch-move-insn-pattern.patch @@ -0,0 +1,427 @@ +From 0690bcdd42d0aa6671f9ec3ccbbe70faa04ffb6b Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Mon, 31 Jan 2022 09:56:21 +0900 +Subject: [PATCH 20/31] xtensa: Simplify conditional branch/move insn patterns + +No need to describe the "false side" conditional insn patterns anymore. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h (xtensa_emit_branch): + Remove the first argument. + (xtensa_emit_bit_branch): Remove it because now called only from the + output statement of *bittrue insn pattern. + * config/xtensa/xtensa.c (gen_int_relational): Remove the last + argument 'p_invert', and make so that the condition is reversed by + itself as needed. + (xtensa_expand_conditional_branch): Share the common path, and remove + condition inversion code. + (xtensa_emit_branch, xtensa_emit_movcc): Simplify by removing the + "false side" pattern. + (xtensa_emit_bit_branch): Remove it because of the abovementioned + reason, and move the function body to *bittrue insn pattern. + * config/xtensa/xtensa.md (*bittrue): Transplant the output + statement from removed xtensa_emit_bit_branch(). + (*bfalse, *ubfalse, *bitfalse, *maskfalse): Remove the "false side" + insn patterns. +--- + gcc/config/xtensa/xtensa-protos.h | 3 +- + gcc/config/xtensa/xtensa.c | 111 ++++++++++------------------ + gcc/config/xtensa/xtensa.md | 117 ++++++++---------------------- + 3 files changed, 70 insertions(+), 161 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 32743bc67..e4b2d2f06 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -51,8 +51,7 @@ extern void xtensa_expand_nonlocal_goto (rtx *); + extern void xtensa_expand_compare_and_swap (rtx, rtx, rtx, rtx); + extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool); + extern void xtensa_emit_loop_end (rtx_insn *, rtx *); +-extern char *xtensa_emit_branch (bool, bool, rtx *); +-extern char *xtensa_emit_bit_branch (bool, bool, rtx *); ++extern char *xtensa_emit_branch (bool, rtx *); + extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); + extern char *xtensa_emit_call (int, rtx *); + extern bool xtensa_tls_referenced_p (rtx); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 473cfaf9d..8deae3d51 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -118,7 +118,7 @@ const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] = + + static void xtensa_option_override (void); + static enum internal_test map_test_to_internal_test (enum rtx_code); +-static rtx gen_int_relational (enum rtx_code, rtx, rtx, int *); ++static rtx gen_int_relational (enum rtx_code, rtx, rtx); + static rtx gen_float_relational (enum rtx_code, rtx, rtx); + static rtx gen_conditional_move (enum rtx_code, machine_mode, rtx, rtx); + static rtx fixup_subreg_mem (rtx); +@@ -670,8 +670,7 @@ map_test_to_internal_test (enum rtx_code test_code) + static rtx + gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + rtx cmp0, /* first operand to compare */ +- rtx cmp1, /* second operand to compare */ +- int *p_invert /* whether branch needs to reverse test */) ++ rtx cmp1 /* second operand to compare */) + { + struct cmp_info + { +@@ -703,6 +702,7 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + enum internal_test test; + machine_mode mode; + struct cmp_info *p_info; ++ int invert; + + test = map_test_to_internal_test (test_code); + gcc_assert (test != ITEST_MAX); +@@ -739,9 +739,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + } + + /* See if we need to invert the result. */ +- *p_invert = ((GET_CODE (cmp1) == CONST_INT) +- ? p_info->invert_const +- : p_info->invert_reg); ++ invert = ((GET_CODE (cmp1) == CONST_INT) ++ ? p_info->invert_const ++ : p_info->invert_reg); + + /* Comparison to constants, may involve adding 1 to change a LT into LE. + Comparison between two registers, may involve switching operands. */ +@@ -758,7 +758,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + cmp1 = temp; + } + +- return gen_rtx_fmt_ee (p_info->test_code, VOIDmode, cmp0, cmp1); ++ return gen_rtx_fmt_ee (invert ? reverse_condition (p_info->test_code) ++ : p_info->test_code, ++ VOIDmode, cmp0, cmp1); + } + + +@@ -817,45 +819,33 @@ xtensa_expand_conditional_branch (rtx *operands, machine_mode mode) + enum rtx_code test_code = GET_CODE (operands[0]); + rtx cmp0 = operands[1]; + rtx cmp1 = operands[2]; +- rtx cmp; +- int invert; +- rtx label1, label2; ++ rtx cmp, label; + + switch (mode) + { ++ case E_SFmode: ++ if (TARGET_HARD_FLOAT) ++ { ++ cmp = gen_float_relational (test_code, cmp0, cmp1); ++ break; ++ } ++ /* FALLTHRU */ ++ + case E_DFmode: + default: + fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1)); + + case E_SImode: +- invert = FALSE; +- cmp = gen_int_relational (test_code, cmp0, cmp1, &invert); +- break; +- +- case E_SFmode: +- if (!TARGET_HARD_FLOAT) +- fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, +- cmp0, cmp1)); +- invert = FALSE; +- cmp = gen_float_relational (test_code, cmp0, cmp1); ++ cmp = gen_int_relational (test_code, cmp0, cmp1); + break; + } + + /* Generate the branch. */ +- +- label1 = gen_rtx_LABEL_REF (VOIDmode, operands[3]); +- label2 = pc_rtx; +- +- if (invert) +- { +- label2 = label1; +- label1 = pc_rtx; +- } +- ++ label = gen_rtx_LABEL_REF (VOIDmode, operands[3]); + emit_jump_insn (gen_rtx_SET (pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, cmp, +- label1, +- label2))); ++ label, ++ pc_rtx))); + } + + +@@ -2058,21 +2048,20 @@ xtensa_emit_loop_end (rtx_insn *insn, rtx *operands) + + + char * +-xtensa_emit_branch (bool inverted, bool immed, rtx *operands) ++xtensa_emit_branch (bool immed, rtx *operands) + { + static char result[64]; +- enum rtx_code code; ++ enum rtx_code code = GET_CODE (operands[3]); + const char *op; + +- code = GET_CODE (operands[3]); + switch (code) + { +- case EQ: op = inverted ? "ne" : "eq"; break; +- case NE: op = inverted ? "eq" : "ne"; break; +- case LT: op = inverted ? "ge" : "lt"; break; +- case GE: op = inverted ? "lt" : "ge"; break; +- case LTU: op = inverted ? "geu" : "ltu"; break; +- case GEU: op = inverted ? "ltu" : "geu"; break; ++ case EQ: op = "eq"; break; ++ case NE: op = "ne"; break; ++ case LT: op = "lt"; break; ++ case GE: op = "ge"; break; ++ case LTU: op = "ltu"; break; ++ case GEU: op = "geu"; break; + default: gcc_unreachable (); + } + +@@ -2091,32 +2080,6 @@ xtensa_emit_branch (bool inverted, bool immed, rtx *operands) + } + + +-char * +-xtensa_emit_bit_branch (bool inverted, bool immed, rtx *operands) +-{ +- static char result[64]; +- const char *op; +- +- switch (GET_CODE (operands[3])) +- { +- case EQ: op = inverted ? "bs" : "bc"; break; +- case NE: op = inverted ? "bc" : "bs"; break; +- default: gcc_unreachable (); +- } +- +- if (immed) +- { +- unsigned bitnum = INTVAL (operands[1]) & 0x1f; +- operands[1] = GEN_INT (bitnum); +- sprintf (result, "b%si\t%%0, %%d1, %%2", op); +- } +- else +- sprintf (result, "b%s\t%%0, %%1, %%2", op); +- +- return result; +-} +- +- + char * + xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + { +@@ -2125,12 +2088,14 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + const char *op; + + code = GET_CODE (operands[4]); ++ if (inverted) ++ code = reverse_condition (code); + if (isbool) + { + switch (code) + { +- case EQ: op = inverted ? "t" : "f"; break; +- case NE: op = inverted ? "f" : "t"; break; ++ case EQ: op = "f"; break; ++ case NE: op = "t"; break; + default: gcc_unreachable (); + } + } +@@ -2138,10 +2103,10 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + { + switch (code) + { +- case EQ: op = inverted ? "nez" : "eqz"; break; +- case NE: op = inverted ? "eqz" : "nez"; break; +- case LT: op = inverted ? "gez" : "ltz"; break; +- case GE: op = inverted ? "ltz" : "gez"; break; ++ case EQ: op = "eqz"; break; ++ case NE: op = "nez"; break; ++ case LT: op = "ltz"; break; ++ case GE: op = "gez"; break; + default: gcc_unreachable (); + } + } +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 58bba89af..40000859d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1551,28 +1551,13 @@ + (define_insn "*btrue" + [(set (pc) + (if_then_else (match_operator 3 "branch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "branch_operand" "K,r")]) ++ [(match_operand:SI 0 "register_operand" "r,r") ++ (match_operand:SI 1 "branch_operand" "K,r")]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { +- return xtensa_emit_branch (false, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump,jump") +- (set_attr "mode" "none") +- (set_attr "length" "3,3")]) +- +-(define_insn "*bfalse" +- [(set (pc) +- (if_then_else (match_operator 3 "branch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "branch_operand" "K,r")]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] +- "" +-{ +- return xtensa_emit_branch (true, which_alternative == 0, operands); ++ return xtensa_emit_branch (which_alternative == 0, operands); + } + [(set_attr "type" "jump,jump") + (set_attr "mode" "none") +@@ -1581,28 +1566,13 @@ + (define_insn "*ubtrue" + [(set (pc) + (if_then_else (match_operator 3 "ubranch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "ubranch_operand" "L,r")]) ++ [(match_operand:SI 0 "register_operand" "r,r") ++ (match_operand:SI 1 "ubranch_operand" "L,r")]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { +- return xtensa_emit_branch (false, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump,jump") +- (set_attr "mode" "none") +- (set_attr "length" "3,3")]) +- +-(define_insn "*ubfalse" +- [(set (pc) +- (if_then_else (match_operator 3 "ubranch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "ubranch_operand" "L,r")]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] +- "" +-{ +- return xtensa_emit_branch (true, which_alternative == 0, operands); ++ return xtensa_emit_branch (which_alternative == 0, operands); + } + [(set_attr "type" "jump,jump") + (set_attr "mode" "none") +@@ -1613,75 +1583,50 @@ + (define_insn "*bittrue" + [(set (pc) + (if_then_else (match_operator 3 "boolean_operator" +- [(zero_extract:SI +- (match_operand:SI 0 "register_operand" "r,r") +- (const_int 1) +- (match_operand:SI 1 "arith_operand" "J,r")) +- (const_int 0)]) +- (label_ref (match_operand 2 "" "")) +- (pc)))] +- "" +-{ +- return xtensa_emit_bit_branch (false, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump") +- (set_attr "mode" "none") +- (set_attr "length" "3")]) +- +-(define_insn "*bitfalse" +- [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" +- [(zero_extract:SI +- (match_operand:SI 0 "register_operand" "r,r") +- (const_int 1) +- (match_operand:SI 1 "arith_operand" "J,r")) ++ [(zero_extract:SI (match_operand:SI 0 "register_operand" "r,r") ++ (const_int 1) ++ (match_operand:SI 1 "arith_operand" "J,r")) + (const_int 0)]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] +- "" +-{ +- return xtensa_emit_bit_branch (true, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump") +- (set_attr "mode" "none") +- (set_attr "length" "3")]) +- +-(define_insn "*masktrue" +- [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" +- [(and:SI (match_operand:SI 0 "register_operand" "r") +- (match_operand:SI 1 "register_operand" "r")) +- (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { ++ static char result[64]; ++ char op; + switch (GET_CODE (operands[3])) + { +- case EQ: return "bnone\t%0, %1, %2"; +- case NE: return "bany\t%0, %1, %2"; +- default: gcc_unreachable (); ++ case EQ: op = 'c'; break; ++ case NE: op = 's'; break; ++ default: gcc_unreachable (); + } ++ if (which_alternative == 0) ++ { ++ operands[1] = GEN_INT (INTVAL (operands[1]) & 0x1f); ++ sprintf (result, "bb%ci\t%%0, %%d1, %%2", op); ++ } ++ else ++ sprintf (result, "bb%c\t%%0, %%1, %%2", op); ++ return result; + } + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set_attr "length" "3")]) + +-(define_insn "*maskfalse" ++(define_insn "*masktrue" + [(set (pc) + (if_then_else (match_operator 3 "boolean_operator" +- [(and:SI (match_operand:SI 0 "register_operand" "r") +- (match_operand:SI 1 "register_operand" "r")) +- (const_int 0)]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "register_operand" "r")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] + "" + { + switch (GET_CODE (operands[3])) + { +- case EQ: return "bany\t%0, %1, %2"; +- case NE: return "bnone\t%0, %1, %2"; +- default: gcc_unreachable (); ++ case EQ: return "bnone\t%0, %1, %2"; ++ case NE: return "bany\t%0, %1, %2"; ++ default: gcc_unreachable (); + } + } + [(set_attr "type" "jump") +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0026-Make-use-of-BALL-BNALL-instructions.patch b/patches/gcc10.2/gcc-xtensa-0026-Make-use-of-BALL-BNALL-instructions.patch new file mode 100644 index 0000000..e1d2790 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0026-Make-use-of-BALL-BNALL-instructions.patch @@ -0,0 +1,101 @@ +From a7cf439409089eab17341a1a24fb9be2b967ca7c Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 27 May 2021 19:04:12 +0900 +Subject: [PATCH 21/31] xtensa: Make use of BALL/BNALL instructions + +In Xtensa ISA, there is no single machine instruction that calculates unary +bitwise negation, but a few similar fused instructions are exist: + + "BALL Ax, Ay, label" // if ((~Ax & Ay) == 0) goto label; + "BNALL Ax, Ay, label" // if ((~Ax & Ay) != 0) goto label; + +These instructions have never been emitted before, but it seems no reason not +to make use of them. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*masktrue_bitcmpl): New insn pattern. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/BALL-BNALL.c: New. +--- + gcc/config/xtensa/xtensa.md | 21 +++++++++++++ + gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c | 33 ++++++++++++++++++++ + 2 files changed, 54 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 40000859d..b34b2afb6 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1633,6 +1633,27 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_insn "*masktrue_bitcmpl" ++ [(set (pc) ++ (if_then_else (match_operator 3 "boolean_operator" ++ [(and:SI (not:SI (match_operand:SI 0 "register_operand" "r")) ++ (match_operand:SI 1 "register_operand" "r")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] ++ "" ++{ ++ switch (GET_CODE (operands[3])) ++ { ++ case EQ: return "ball\t%0, %1, %2"; ++ case NE: return "bnall\t%0, %1, %2"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ++ + + ;; Zero-overhead looping support. + +diff --git a/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c +new file mode 100644 +index 000000000..ba61c6f37 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c +@@ -0,0 +1,33 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O" } */ ++ ++extern void foo(void); ++ ++void BNONE_test(int a, int b) ++{ ++ if (a & b) ++ foo(); ++} ++ ++void BANY_test(int a, int b) ++{ ++ if (!(a & b)) ++ foo(); ++} ++ ++void BALL_test(int a, int b) ++{ ++ if (~a & b) ++ foo(); ++} ++ ++void BNALL_test(int a, int b) ++{ ++ if (!(~a & b)) ++ foo(); ++} ++ ++/* { dg-final { scan-assembler-times "bnone" 1 } } */ ++/* { dg-final { scan-assembler-times "bany" 1 } } */ ++/* { dg-final { scan-assembler-times "ball" 1 } } */ ++/* { dg-final { scan-assembler-times "bnall" 1 } } */ +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0027-Optimize-bitwise-AND-operation-with-some-spec.patch b/patches/gcc10.2/gcc-xtensa-0027-Optimize-bitwise-AND-operation-with-some-spec.patch new file mode 100644 index 0000000..b13350f --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0027-Optimize-bitwise-AND-operation-with-some-spec.patch @@ -0,0 +1,252 @@ +From 43c7f8333028ff03d8a4681ab62de2febcc43f5c Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 01:28:43 +0900 +Subject: [PATCH 22/31] xtensa: Optimize bitwise AND operation with some + specific forms of constants + +This patch offers several insn-and-split patterns for bitwise AND with +register and constant that can be represented as: + +i. 1's least significant N bits and the others 0's (17 <= N <= 31) +ii. 1's most significant N bits and the others 0's (12 <= N <= 31) +iii. M 1's sequence of bits and trailing N 0's bits, that cannot fit into a + "MOVI Ax, simm12" instruction (1 <= M <= 16, 1 <= N <= 30) + +And also offers shortcuts for conditional branch if each of the abovementioned +operations is (not) equal to zero. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (shifted_mask_operand): + New predicate. + * config/xtensa/xtensa.md (*andsi3_const_pow2_minus_one): + New insn-and-split pattern. + (*andsi3_const_negative_pow2, *andsi3_const_shifted_mask, + *masktrue_const_pow2_minus_one, *masktrue_const_negative_pow2, + *masktrue_const_shifted_mask): Ditto. +--- + gcc/config/xtensa/predicates.md | 10 ++ + gcc/config/xtensa/xtensa.md | 179 ++++++++++++++++++++++++++++++++ + 2 files changed, 189 insertions(+) + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index e7836f0ec..367fc17f3 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -52,6 +52,16 @@ + (match_test "xtensa_mask_immediate (INTVAL (op))")) + (match_operand 0 "register_operand"))) + ++(define_predicate "shifted_mask_operand" ++ (match_code "const_int") ++{ ++ HOST_WIDE_INT mask = INTVAL (op); ++ int shift = ctz_hwi (mask); ++ ++ return IN_RANGE (shift, 1, 31) ++ && xtensa_mask_immediate ((uint32_t)mask >> shift); ++}) ++ + (define_predicate "extui_fldsz_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 1, 16)"))) +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index b34b2afb6..355fb7742 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -645,6 +645,83 @@ + (set_attr "mode" "SI") + (set_attr "length" "6")]) + ++(define_insn_and_split "*andsi3_const_pow2_minus_one" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "const_int_operand" "i")))] ++ "IN_RANGE (exact_log2 (INTVAL (operands[2]) + 1), 17, 31)" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (ashift:SI (match_dup 1) ++ (match_dup 2))) ++ (set (match_dup 0) ++ (lshiftrt:SI (match_dup 0) ++ (match_dup 2)))] ++{ ++ operands[2] = GEN_INT (32 - floor_log2 (INTVAL (operands[2]) + 1)); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && INTVAL (operands[2]) == 0x7FFFFFFF") ++ (const_int 5) ++ (const_int 6)))]) ++ ++(define_insn_and_split "*andsi3_const_negative_pow2" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "const_int_operand" "i")))] ++ "IN_RANGE (exact_log2 (-INTVAL (operands[2])), 12, 31)" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (lshiftrt:SI (match_dup 1) ++ (match_dup 2))) ++ (set (match_dup 0) ++ (ashift:SI (match_dup 0) ++ (match_dup 2)))] ++{ ++ operands[2] = GEN_INT (floor_log2 (-INTVAL (operands[2]))); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*andsi3_const_shifted_mask" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "shifted_mask_operand" "i")))] ++ "! xtensa_simm12b (INTVAL (operands[2]))" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (zero_extract:SI (match_dup 1) ++ (match_dup 3) ++ (match_dup 4))) ++ (set (match_dup 0) ++ (ashift:SI (match_dup 0) ++ (match_dup 2)))] ++{ ++ HOST_WIDE_INT mask = INTVAL (operands[2]); ++ int shift = ctz_hwi (mask); ++ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); ++ int mask_pos = shift; ++ if (BITS_BIG_ENDIAN) ++ mask_pos = (32 - (mask_size + shift)) & 0x1f; ++ operands[2] = GEN_INT (shift); ++ operands[3] = GEN_INT (mask_size); ++ operands[4] = GEN_INT (mask_pos); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && ctz_hwi (INTVAL (operands[2])) == 1") ++ (const_int 5) ++ (const_int 6)))]) ++ + (define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "=a") + (ior:SI (match_operand:SI 1 "register_operand" "%r") +@@ -1654,6 +1731,108 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_insn_and_split "*masktrue_const_pow2_minus_one" ++ [(set (pc) ++ (if_then_else (match_operator 3 "boolean_operator" ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "const_int_operand" "i")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] ++ "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31)" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 4) ++ (ashift:SI (match_dup 0) ++ (match_dup 1))) ++ (set (pc) ++ (if_then_else (match_op_dup 3 ++ [(match_dup 4) ++ (const_int 0)]) ++ (label_ref (match_dup 2)) ++ (pc)))] ++{ ++ operands[1] = GEN_INT (32 - floor_log2 (INTVAL (operands[1]) + 1)); ++ operands[4] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && INTVAL (operands[1]) == 0x7FFFFFFF") ++ (const_int 5) ++ (const_int 6)))]) ++ ++(define_insn_and_split "*masktrue_const_negative_pow2" ++ [(set (pc) ++ (if_then_else (match_operator 3 "boolean_operator" ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "const_int_operand" "i")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] ++ "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 12, 30)" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 4) ++ (lshiftrt:SI (match_dup 0) ++ (match_dup 1))) ++ (set (pc) ++ (if_then_else (match_op_dup 3 ++ [(match_dup 4) ++ (const_int 0)]) ++ (label_ref (match_dup 2)) ++ (pc)))] ++{ ++ operands[1] = GEN_INT (floor_log2 (-INTVAL (operands[1]))); ++ operands[4] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*masktrue_const_shifted_mask" ++ [(set (pc) ++ (if_then_else (match_operator 4 "boolean_operator" ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "shifted_mask_operand" "i")) ++ (match_operand:SI 2 "const_int_operand" "i")]) ++ (label_ref (match_operand 3 "" "")) ++ (pc)))] ++ "(INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 ++ && xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 6) ++ (zero_extract:SI (match_dup 0) ++ (match_dup 5) ++ (match_dup 1))) ++ (set (pc) ++ (if_then_else (match_op_dup 4 ++ [(match_dup 6) ++ (match_dup 2)]) ++ (label_ref (match_dup 3)) ++ (pc)))] ++{ ++ HOST_WIDE_INT mask = INTVAL (operands[1]); ++ int shift = ctz_hwi (mask); ++ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); ++ int mask_pos = shift; ++ if (BITS_BIG_ENDIAN) ++ mask_pos = (32 - (mask_size + shift)) & 0x1f; ++ operands[1] = GEN_INT (mask_pos); ++ operands[2] = GEN_INT ((uint32_t)INTVAL (operands[2]) >> shift); ++ operands[5] = GEN_INT (mask_size); ++ operands[6] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && (uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])) == 0") ++ (const_int 5) ++ (const_int 6)))]) ++ + + ;; Zero-overhead looping support. + +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0028-Document-new-mextra-l32r-costs-Xtensa-specifi.patch b/patches/gcc10.2/gcc-xtensa-0028-Document-new-mextra-l32r-costs-Xtensa-specifi.patch new file mode 100644 index 0000000..ebe9eb0 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0028-Document-new-mextra-l32r-costs-Xtensa-specifi.patch @@ -0,0 +1,44 @@ +From 7856e5d6344828b2a72aeef671a169dbd1a85a55 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:34:48 +0900 +Subject: [PATCH 23/31] xtensa: Document new -mextra-l32r-costs= + Xtensa-specific option + +gcc/ChangeLog: + * doc/invoke.texi: Document -mextra-l32r-costs= option. +--- + gcc/doc/invoke.texi | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index eabeec944..c35f51afb 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1385,7 +1385,8 @@ See RS/6000 and PowerPC Options. + -mtext-section-literals -mno-text-section-literals @gol + -mauto-litpools -mno-auto-litpools @gol + -mtarget-align -mno-target-align @gol +--mlongcalls -mno-longcalls} ++-mlongcalls -mno-longcalls @gol ++-mextra-l32r-costs=@var{cycles}} + + @emph{zSeries Options} + See S/390 and zSeries Options. +@@ -30519,6 +30520,14 @@ assembly code generated by GCC still shows direct call + instructions---look at the disassembled object code to see the actual + instructions. Note that the assembler uses an indirect call for + every cross-file call, not just those that really are out of range. ++ ++@item -mextra-l32r-costs=@var{n} ++@opindex mextra-l32r-costs ++Specify an extra cost of instruction RAM/ROM access for @code{L32R} ++instructions, in clock cycles. This affects, when optimizing for speed, ++whether loading a constant from literal pool using @code{L32R} or ++synthesizing the constant from a small one with a couple of arithmetic ++instructions. The default value is 0. + @end table + + @node zSeries Options +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0029-Add-support-for-sibling-call-optimization.patch b/patches/gcc10.2/gcc-xtensa-0029-Add-support-for-sibling-call-optimization.patch new file mode 100644 index 0000000..f5c0f78 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0029-Add-support-for-sibling-call-optimization.patch @@ -0,0 +1,354 @@ +From c985f67f0b9a35ca5f22647c326c6b43a2b237fa Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 15 Jun 2022 21:21:21 +0900 +Subject: [PATCH 24/31] xtensa: Add support for sibling call optimization + +This patch introduces support for sibling call optimization, when the Windowed +Register Option is NOT configured. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h (xtensa_prepare_expand_call, + xtensa_emit_sibcall): New prototypes. + (xtensa_expand_epilogue): Add new argument that specifies whether + or not sibling call. + * config/xtensa/xtensa.c (TARGET_FUNCTION_OK_FOR_SIBCALL): + New macro definition. + (xtensa_prepare_expand_call): New function in order to share + the common code. + (xtensa_emit_sibcall, xtensa_function_ok_for_sibcall): + New functions. + (xtensa_expand_epilogue): Add new argument sibcall_p and use it + for sibling call handling. + * config/xtensa/xtensa.md (call, call_value): + Use xtensa_prepare_expand_call. + (call_internal, call_value_internal): + Add the condition in order to be disabled if sibling call. + (sibcall, sibcall_value, sibcall_epilogue): New expansions. + (sibcall_internal, sibcall_value_internal): New insn patterns, + and split ones in order to take care of the indirect sibcalls. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/sibcalls.c: New. +--- + gcc/config/xtensa/xtensa-protos.h | 4 +- + gcc/config/xtensa/xtensa.c | 57 ++++++++++++- + gcc/config/xtensa/xtensa.md | 93 ++++++++++++++++++---- + gcc/testsuite/gcc.target/xtensa/sibcalls.c | 20 +++++ + 4 files changed, 155 insertions(+), 19 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/sibcalls.c + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index e4b2d2f06..75ed3bfb0 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -53,7 +53,9 @@ extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool); + extern void xtensa_emit_loop_end (rtx_insn *, rtx *); + extern char *xtensa_emit_branch (bool, rtx *); + extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); ++extern void xtensa_prepare_expand_call (int, rtx *); + extern char *xtensa_emit_call (int, rtx *); ++extern char *xtensa_emit_sibcall (int, rtx *); + extern bool xtensa_tls_referenced_p (rtx); + extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); + +@@ -73,7 +75,7 @@ extern int xtensa_dbx_register_number (int); + extern long compute_frame_size (poly_int64); + extern bool xtensa_use_return_instruction_p (void); + extern void xtensa_expand_prologue (void); +-extern void xtensa_expand_epilogue (void); ++extern void xtensa_expand_epilogue (bool); + extern void order_regs_for_local_alloc (void); + extern enum reg_class xtensa_regno_to_class (int regno); + extern HOST_WIDE_INT xtensa_initial_elimination_offset (int from, int to); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 8deae3d51..a714b980a 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -187,6 +187,7 @@ static bool xtensa_modes_tieable_p (machine_mode, machine_mode); + static HOST_WIDE_INT xtensa_constant_alignment (const_tree, HOST_WIDE_INT); + static HOST_WIDE_INT xtensa_starting_frame_offset (void); + static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); ++static bool xtensa_function_ok_for_sibcall (tree, tree); + + + +@@ -337,6 +338,9 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); + #undef TARGET_HAVE_SPECULATION_SAFE_VALUE + #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed + ++#undef TARGET_FUNCTION_OK_FOR_SIBCALL ++#define TARGET_FUNCTION_OK_FOR_SIBCALL xtensa_function_ok_for_sibcall ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + +@@ -2117,6 +2121,20 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + } + + ++void ++xtensa_prepare_expand_call (int callop, rtx *operands) ++{ ++ rtx addr = XEXP (operands[callop], 0); ++ ++ if (flag_pic && SYMBOL_REF_P (addr) ++ && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) ++ addr = gen_sym_PLT (addr); ++ ++ if (!call_insn_operand (addr, VOIDmode)) ++ XEXP (operands[callop], 0) = copy_to_mode_reg (Pmode, addr); ++} ++ ++ + char * + xtensa_emit_call (int callop, rtx *operands) + { +@@ -2135,6 +2153,24 @@ xtensa_emit_call (int callop, rtx *operands) + } + + ++char * ++xtensa_emit_sibcall (int callop, rtx *operands) ++{ ++ static char result[64]; ++ rtx tgt = operands[callop]; ++ ++ if (GET_CODE (tgt) == CONST_INT) ++ sprintf (result, "j.l\t" HOST_WIDE_INT_PRINT_HEX ", a9", ++ INTVAL (tgt)); ++ else if (register_operand (tgt, VOIDmode)) ++ sprintf (result, "jx\t%%%d", callop); ++ else ++ sprintf (result, "j.l\t%%%d, a9", callop); ++ ++ return result; ++} ++ ++ + bool + xtensa_legitimate_address_p (machine_mode mode, rtx addr, bool strict) + { +@@ -3305,7 +3341,7 @@ xtensa_expand_prologue (void) + } + + void +-xtensa_expand_epilogue (void) ++xtensa_expand_epilogue (bool sibcall_p) + { + if (!TARGET_WINDOWED_ABI) + { +@@ -3339,10 +3375,13 @@ xtensa_expand_epilogue (void) + if (xtensa_call_save_reg(regno)) + { + rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); ++ rtx reg; + + offset -= UNITS_PER_WORD; +- emit_move_insn (gen_rtx_REG (SImode, regno), ++ emit_move_insn (reg = gen_rtx_REG (SImode, regno), + gen_frame_mem (SImode, x)); ++ if (regno == A0_REG && sibcall_p) ++ emit_use (reg); + } + } + +@@ -3377,7 +3416,8 @@ xtensa_expand_epilogue (void) + EH_RETURN_STACKADJ_RTX)); + } + cfun->machine->epilogue_done = true; +- emit_jump_insn (gen_return ()); ++ if (!sibcall_p) ++ emit_jump_insn (gen_return ()); + } + + bool +@@ -4893,4 +4933,15 @@ xtensa_asan_shadow_offset (void) + return HOST_WIDE_INT_UC (0x10000000); + } + ++/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */ ++static bool ++xtensa_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, tree exp ATTRIBUTE_UNUSED) ++{ ++ /* Do not allow sibcalls when windowed registers ABI is in effect. */ ++ if (TARGET_WINDOWED_ABI) ++ return false; ++ ++ return true; ++} ++ + #include "gt-xtensa.h" +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 355fb7742..2a11d1c86 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -25,6 +25,7 @@ + (A7_REG 7) + (A8_REG 8) + (A9_REG 9) ++ (A10_REG 10) + + (UNSPEC_NOP 2) + (UNSPEC_PLT 3) +@@ -2153,18 +2154,13 @@ + (match_operand 1 "" ""))] + "" + { +- rtx addr = XEXP (operands[0], 0); +- if (flag_pic && GET_CODE (addr) == SYMBOL_REF +- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) +- addr = gen_sym_PLT (addr); +- if (!call_insn_operand (addr, VOIDmode)) +- XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr); ++ xtensa_prepare_expand_call (0, operands); + }) + + (define_insn "call_internal" + [(call (mem (match_operand:SI 0 "call_insn_operand" "nir")) + (match_operand 1 "" "i"))] +- "" ++ "!SIBLING_CALL_P (insn)" + { + return xtensa_emit_call (0, operands); + } +@@ -2178,19 +2174,14 @@ + (match_operand 2 "" "")))] + "" + { +- rtx addr = XEXP (operands[1], 0); +- if (flag_pic && GET_CODE (addr) == SYMBOL_REF +- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) +- addr = gen_sym_PLT (addr); +- if (!call_insn_operand (addr, VOIDmode)) +- XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr); ++ xtensa_prepare_expand_call (1, operands); + }) + + (define_insn "call_value_internal" + [(set (match_operand 0 "register_operand" "=a") + (call (mem (match_operand:SI 1 "call_insn_operand" "nir")) + (match_operand 2 "" "i")))] +- "" ++ "!SIBLING_CALL_P (insn)" + { + return xtensa_emit_call (1, operands); + } +@@ -2198,6 +2189,70 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_expand "sibcall" ++ [(call (match_operand 0 "memory_operand" "") ++ (match_operand 1 "" ""))] ++ "!TARGET_WINDOWED_ABI" ++{ ++ xtensa_prepare_expand_call (0, operands); ++}) ++ ++(define_insn "sibcall_internal" ++ [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nir")) ++ (match_operand 1 "" "i"))] ++ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" ++{ ++ return xtensa_emit_sibcall (0, operands); ++} ++ [(set_attr "type" "call") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ++ ++(define_split ++ [(call (mem:SI (match_operand:SI 0 "register_operand")) ++ (match_operand 1 ""))] ++ "reload_completed ++ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) ++ && IN_RANGE (REGNO (operands[0]), 12, 15)" ++ [(set (reg:SI A10_REG) ++ (match_dup 0)) ++ (call (mem:SI (reg:SI A10_REG)) ++ (match_dup 1))]) ++ ++(define_expand "sibcall_value" ++ [(set (match_operand 0 "register_operand" "") ++ (call (match_operand 1 "memory_operand" "") ++ (match_operand 2 "" "")))] ++ "!TARGET_WINDOWED_ABI" ++{ ++ xtensa_prepare_expand_call (1, operands); ++}) ++ ++(define_insn "sibcall_value_internal" ++ [(set (match_operand 0 "register_operand" "=a") ++ (call (mem:SI (match_operand:SI 1 "call_insn_operand" "nir")) ++ (match_operand 2 "" "i")))] ++ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" ++{ ++ return xtensa_emit_sibcall (1, operands); ++} ++ [(set_attr "type" "call") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ++ ++(define_split ++ [(set (match_operand 0 "register_operand") ++ (call (mem:SI (match_operand:SI 1 "register_operand")) ++ (match_operand 2 "")))] ++ "reload_completed ++ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) ++ && IN_RANGE (REGNO (operands[1]), 12, 15)" ++ [(set (reg:SI A10_REG) ++ (match_dup 1)) ++ (set (match_dup 0) ++ (call (mem:SI (reg:SI A10_REG)) ++ (match_dup 2)))]) ++ + (define_insn "entry" + [(set (reg:SI A1_REG) + (unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")] +@@ -2265,7 +2320,15 @@ + [(return)] + "" + { +- xtensa_expand_epilogue (); ++ xtensa_expand_epilogue (false); ++ DONE; ++}) ++ ++(define_expand "sibcall_epilogue" ++ [(return)] ++ "!TARGET_WINDOWED_ABI" ++{ ++ xtensa_expand_epilogue (true); + DONE; + }) + +diff --git a/gcc/testsuite/gcc.target/xtensa/sibcalls.c b/gcc/testsuite/gcc.target/xtensa/sibcalls.c +new file mode 100644 +index 000000000..d2b3fccf1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/sibcalls.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mabi=call0 -foptimize-sibling-calls" } */ ++ ++extern int foo(int); ++extern void bar(int); ++ ++int test_0(int a) { ++ return foo(a); ++} ++ ++void test_1(int a) { ++ bar(a); ++} ++ ++int test_2(int (*a)(void)) { ++ bar(0); ++ return a(); ++} ++ ++/* { dg-final { scan-assembler-not "ret" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0030-Add-some-dedicated-patterns-that-correspond-t.patch b/patches/gcc10.2/gcc-xtensa-0030-Add-some-dedicated-patterns-that-correspond-t.patch new file mode 100644 index 0000000..ad60202 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0030-Add-some-dedicated-patterns-that-correspond-t.patch @@ -0,0 +1,81 @@ +From 16878066a57f917814a8d6fe45f7f7d2eebdbbc0 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:37:54 +0900 +Subject: [PATCH 25/31] xtensa: Add some dedicated patterns that correspond to + GIMPLE canonicalizations + +This patch offers better RTL representations against straightforward +derivations from some tree optimizers' canonicalized forms. + +- rounding up to even, such as '(x + (x & 1))', is canonicalized to + '((x + 1) & -2)', but the former is one instruction less than the latter + in Xtensa ISA. +- signed greater or equal to zero as logical value '((signed)x >= 0)', + is canonicalized to '((unsigned)(x ^ -1) >> 31)', but the equivalent + '(((signed)x >> 31) + 1)' is one instruction less. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*round_up_to_even): + New insn-and-split pattern. + (*signed_ge_zero): Ditto. +--- + gcc/config/xtensa/xtensa.md | 45 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 45 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2a11d1c86..3e8e2e76f 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2709,3 +2709,48 @@ + xtensa_expand_atomic (, operands[0], operands[1], operands[2], true); + DONE; + }) ++ ++(define_insn_and_split "*round_up_to_even" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int 1)) ++ (const_int -2)))] ++ "" ++ "#" ++ "can_create_pseudo_p ()" ++ [(set (match_dup 2) ++ (and:SI (match_dup 1) ++ (const_int 1))) ++ (set (match_dup 0) ++ (plus:SI (match_dup 2) ++ (match_dup 1)))] ++{ ++ operands[2] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 5) ++ (const_int 6)))]) ++ ++(define_insn_and_split "*signed_ge_zero" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ge:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int 0)))] ++ "" ++ "#" ++ "" ++ [(set (match_dup 0) ++ (ashiftrt:SI (match_dup 1) ++ (const_int 31))) ++ (set (match_dup 0) ++ (plus:SI (match_dup 0) ++ (const_int 1)))] ++ "" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 5) ++ (const_int 6)))]) +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0031-Eliminate-unwanted-reg-reg-moves-during-DFmod.patch b/patches/gcc10.2/gcc-xtensa-0031-Eliminate-unwanted-reg-reg-moves-during-DFmod.patch new file mode 100644 index 0000000..28bb494 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0031-Eliminate-unwanted-reg-reg-moves-during-DFmod.patch @@ -0,0 +1,90 @@ +From a0f2dfa2e952111dbd85d2b2f1caaf570facce8a Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:39:49 +0900 +Subject: [PATCH 26/31] xtensa: Eliminate unwanted reg-reg moves during DFmode + input reloads + +When spilled DFmode registers are reloaded in, once loaded into a pair of +SImode regs and then copied from that regs. Such unwanted reg-reg moves +seems not to be eliminated at the "cprop_hardreg" stage, despite no problem +in output reloads. + +Luckily it is easy to resolve such inefficiencies, with the use of peephole2 +pattern. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (reload_operand): + New predicate. + * config/xtensa/xtensa.md: New peephole2 pattern. +--- + gcc/config/xtensa/predicates.md | 13 +++++++++++++ + gcc/config/xtensa/xtensa.md | 31 +++++++++++++++++++++++++++++++ + 2 files changed, 44 insertions(+) + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index 367fc17f3..c1cddb733 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -165,6 +165,19 @@ + (and (match_code "const_int") + (match_test "xtensa_mem_offset (INTVAL (op), SFmode)"))) + ++(define_predicate "reload_operand" ++ (match_code "mem") ++{ ++ const_rtx addr = XEXP (op, 0); ++ if (REG_P (addr)) ++ return REGNO (addr) == A1_REG; ++ if (GET_CODE (addr) == PLUS) ++ return REG_P (XEXP (addr, 0)) ++ && REGNO (XEXP (addr, 0)) == A1_REG ++ && CONST_INT_P (XEXP (addr, 1)); ++ return false; ++}) ++ + (define_predicate "branch_operator" + (match_code "eq,ne,lt,ge")) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 3e8e2e76f..2598c09c9 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2754,3 +2754,34 @@ + (if_then_else (match_test "TARGET_DENSITY") + (const_int 5) + (const_int 6)))]) ++ ++(define_peephole2 ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operand:SI 6 "reload_operand")) ++ (set (match_operand:SI 1 "register_operand") ++ (match_operand:SI 7 "reload_operand")) ++ (set (match_operand:SF 2 "register_operand") ++ (match_operand:SF 4 "register_operand")) ++ (set (match_operand:SF 3 "register_operand") ++ (match_operand:SF 5 "register_operand"))] ++ "REGNO (operands[0]) == REGNO (operands[4]) ++ && REGNO (operands[1]) == REGNO (operands[5]) ++ && peep2_reg_dead_p (4, operands[0]) ++ && peep2_reg_dead_p (4, operands[1])" ++ [(set (match_dup 2) ++ (match_dup 6)) ++ (set (match_dup 3) ++ (match_dup 7))] ++{ ++ uint32_t check = 0; ++ int i; ++ for (i = 0; i <= 3; ++i) ++ { ++ uint32_t mask = (uint32_t)1 << REGNO (operands[i]); ++ if (check & mask) ++ FAIL; ++ check |= mask; ++ } ++ operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0)); ++ operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0)); ++}) +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0032-Eliminate-DS-Cmode-hard-register-clobber-that.patch b/patches/gcc10.2/gcc-xtensa-0032-Eliminate-DS-Cmode-hard-register-clobber-that.patch new file mode 100644 index 0000000..7c4a869 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0032-Eliminate-DS-Cmode-hard-register-clobber-that.patch @@ -0,0 +1,99 @@ +From d6c2b11e9ce88f3b1a7ddcf9a2712b070ad4dbfb Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:53:04 +0900 +Subject: [PATCH 27/31] xtensa: Eliminate [DS]Cmode hard register clobber that + is immediately followed by whole overwrite the register + +RTL expansion of substitution to [DS]Cmode hard register includes obstructive +register clobber. + +A simplest example: + + double _Complex test(double _Complex c) { + return c; + } + +will be converted to: + + (set (reg:DF 42 [ c ]) (reg:DF 2 a2)) + (set (reg:DF 43 [ c+8 ]) (reg:DF 4 a4)) + (clobber (reg:DC 2 a2)) + (set (reg:DF 2 a2) (reg:DF 42 [ c ])) + (set (reg:DF 4 a4) (reg:DF 43 [ c+8 ])) + (use (reg:DC 2 a2)) + (return) + +and then finally: + + test: + mov a8, a2 + mov a9, a3 + mov a6, a4 + mov a7, a5 + mov a2, a8 + mov a3, a9 + mov a4, a6 + mov a5, a7 + ret + +As you see, it is so ridiculous. + +This patch eliminates such clobber in order to prune away the wasted move +instructions by the optimizer: + + test: + ret + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (DSC): New split pattern and mode iterator. +--- + gcc/config/xtensa/xtensa.md | 28 ++++++++++++++++++++++++++++ + 1 file changed, 28 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2598c09c9..124548dfe 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -87,6 +87,10 @@ + ;; This code iterator is for *shlrd and its variants. + (define_code_iterator ior_op [ior plus]) + ++;; This mode iterator allows the DC and SC patterns to be defined from ++;; the same template. ++(define_mode_iterator DSC [DC SC]) ++ + + ;; Attributes. + +@@ -2785,3 +2789,27 @@ + operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0)); + operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0)); + }) ++ ++(define_split ++ [(clobber (match_operand:DSC 0 "register_operand"))] ++ "GP_REG_P (REGNO (operands[0]))" ++ [(const_int 0)] ++{ ++ unsigned int regno = REGNO (operands[0]); ++ machine_mode inner_mode = GET_MODE_INNER (mode); ++ rtx_insn *insn; ++ rtx x; ++ if (! ((insn = next_nonnote_nondebug_insn (curr_insn)) ++ && NONJUMP_INSN_P (insn) ++ && GET_CODE (x = PATTERN (insn)) == SET ++ && REG_P (x = XEXP (x, 0)) ++ && GET_MODE (x) == inner_mode ++ && REGNO (x) == regno ++ && (insn = next_nonnote_nondebug_insn (insn)) ++ && NONJUMP_INSN_P (insn) ++ && GET_CODE (x = PATTERN (insn)) == SET ++ && REG_P (x = XEXP (x, 0)) ++ && GET_MODE (x) == inner_mode ++ && REGNO (x) == regno + REG_NREGS (operands[0]) / 2)) ++ FAIL; ++}) +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0033-Defer-storing-integer-constants-into-litpool-.patch b/patches/gcc10.2/gcc-xtensa-0033-Defer-storing-integer-constants-into-litpool-.patch new file mode 100644 index 0000000..6007b49 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0033-Defer-storing-integer-constants-into-litpool-.patch @@ -0,0 +1,111 @@ +From e37c151ca3beacb7f4f116a94c9c80223b0c6fbf Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 17 Jun 2022 22:47:49 +0900 +Subject: [PATCH 28/31] xtensa: Defer storing integer constants into litpool + until reload + +Storing integer constants into litpool in the early stage of compilation +hinders some integer optimizations. In fact, such integer constants are +not subject to the constant folding process. + +For example: + + extern unsigned short value; + extern void foo(void); + void test(void) { + if (value == 30001) + foo(); + } + + .literal_position + .literal .LC0, value + .literal .LC1, 30001 + test: + l32r a3, .LC0 + l32r a2, .LC1 + l16ui a3, a3, 0 + extui a2, a2, 0, 16 // runtime zero-extension despite constant + bne a3, a2, .L1 + j.l foo, a9 + .L1: + ret.n + +This patch defers the placement of integer constants into litpool until +the start of reload: + + .literal_position + .literal .LC0, value + .literal .LC1, 30001 + test: + l32r a3, .LC0 + l32r a2, .LC1 + l16ui a3, a3, 0 + bne a3, a2, .L1 + j.l foo, a9 + .L1: + ret.n + +gcc/ChangeLog: + + * config/xtensa/constraints.md (Y): + Change to include integer constants until reload begins. + * config/xtensa/predicates.md (move_operand): Ditto. + * config/xtensa/xtensa.c (xtensa_emit_move_sequence): + Change to allow storing integer constants into litpool only after + reload begins. +--- + gcc/config/xtensa/constraints.md | 6 ++++-- + gcc/config/xtensa/predicates.md | 5 +++-- + gcc/config/xtensa/xtensa.c | 3 ++- + 3 files changed, 9 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md +index 9a8caab4f..13b3daafc 100644 +--- a/gcc/config/xtensa/constraints.md ++++ b/gcc/config/xtensa/constraints.md +@@ -113,8 +113,10 @@ + + (define_constraint "Y" + "A constant that can be used in relaxed MOVI instructions." +- (and (match_code "const_int,const_double,const,symbol_ref,label_ref") +- (match_test "TARGET_AUTO_LITPOOLS"))) ++ (ior (and (match_code "const_int,const_double,const,symbol_ref,label_ref") ++ (match_test "TARGET_AUTO_LITPOOLS")) ++ (and (match_code "const_int") ++ (match_test "can_create_pseudo_p ()")))) + + ;; Memory constraints. Do not use define_memory_constraint here. Doing so + ;; causes reload to force some constants into the constant pool, but since +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index c1cddb733..633cc6264 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -147,8 +147,9 @@ + (match_test "!constantpool_mem_p (op) + || GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0"))) + (ior (and (match_code "const_int") +- (match_test "GET_MODE_CLASS (mode) == MODE_INT +- && xtensa_simm12b (INTVAL (op))")) ++ (match_test "(GET_MODE_CLASS (mode) == MODE_INT ++ && xtensa_simm12b (INTVAL (op))) ++ || can_create_pseudo_p ()")) + (and (match_code "const_int,const_double,const,symbol_ref,label_ref") + (match_test "(TARGET_CONST16 || TARGET_AUTO_LITPOOLS) + && CONSTANT_P (op) +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index a714b980a..1d64e2c76 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1173,7 +1173,8 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) + return 1; + } + +- if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16) ++ if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16 ++ && ! (CONST_INT_P (src) && can_create_pseudo_p ())) + { + src = force_const_mem (SImode, src); + operands[1] = src; +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0034-Apply-a-few-minor-fixes.patch b/patches/gcc10.2/gcc-xtensa-0034-Apply-a-few-minor-fixes.patch new file mode 100644 index 0000000..5ecac42 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0034-Apply-a-few-minor-fixes.patch @@ -0,0 +1,129 @@ +From dfaefed18297218392071039325baabac59d5c43 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 19 Jun 2022 22:32:45 +0900 +Subject: [PATCH 29/31] xtensa: Apply a few minor fixes + +No functional changes. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_emit_move_sequence): + Use can_create_pseudo_p(), instead of using individual + reload_in_progress and reload_completed. + (xtensa_expand_block_set_small_loop): Use xtensa_simm8x256(), + the existing predicate function. + (xtensa_is_insn_L32R_p, gen_int_relational, xtensa_emit_sibcall): + Use the standard RTX code predicate macros such as MEM_P, + SYMBOL_REF_P and/or CONST_INT_P. + * config/xtensa/xtensa.md: Avoid using numeric literals to determine + if callee-saved register, at the split patterns for indirect sibcall + fixups. +--- + gcc/config/xtensa/xtensa.c | 16 ++++++++-------- + gcc/config/xtensa/xtensa.md | 8 ++++---- + 2 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 1d64e2c76..595c5f96f 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -743,7 +743,7 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + } + + /* See if we need to invert the result. */ +- invert = ((GET_CODE (cmp1) == CONST_INT) ++ invert = (CONST_INT_P (cmp1) + ? p_info->invert_const + : p_info->invert_reg); + +@@ -1200,7 +1200,7 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) + } + } + +- if (!(reload_in_progress | reload_completed) ++ if (can_create_pseudo_p () + && !xtensa_valid_move (mode, operands)) + operands[1] = force_reg (mode, operands[1]); + +@@ -1603,7 +1603,7 @@ xtensa_expand_block_set_small_loop (rtx *operands) + thus limited to only offset to the end address for ADDI/ADDMI + instruction. */ + if (align == 4 +- && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) ++ && ! (bytes <= 127 || xtensa_simm8x256 (bytes))) + return 0; + + /* If no 4-byte aligned, loop count should be treated as the +@@ -2160,7 +2160,7 @@ xtensa_emit_sibcall (int callop, rtx *operands) + static char result[64]; + rtx tgt = operands[callop]; + +- if (GET_CODE (tgt) == CONST_INT) ++ if (CONST_INT_P (tgt)) + sprintf (result, "j.l\t" HOST_WIDE_INT_PRINT_HEX ", a9", + INTVAL (tgt)); + else if (register_operand (tgt, VOIDmode)) +@@ -4318,17 +4318,17 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + } + + static bool +-xtensa_is_insn_L32R_p(const rtx_insn *insn) ++xtensa_is_insn_L32R_p (const rtx_insn *insn) + { + rtx x = PATTERN (insn); + + if (GET_CODE (x) == SET) + { +- x = XEXP (x, 1); +- if (GET_CODE (x) == MEM) ++ x = SET_SRC (x); ++ if (MEM_P (x)) + { + x = XEXP (x, 0); +- return (GET_CODE (x) == SYMBOL_REF || CONST_INT_P (x)) ++ return (SYMBOL_REF_P (x) || CONST_INT_P (x)) + && CONSTANT_POOL_ADDRESS_P (x); + } + } +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 124548dfe..6f51a5357 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1251,14 +1251,14 @@ + int i = 0; + rtx x = XEXP (operands[1], 0); + long l[2]; +- if (GET_CODE (x) == SYMBOL_REF ++ if (SYMBOL_REF_P (x) + && CONSTANT_POOL_ADDRESS_P (x)) + x = get_pool_constant (x); + else if (GET_CODE (x) == CONST) + { + x = XEXP (x, 0); + gcc_assert (GET_CODE (x) == PLUS +- && GET_CODE (XEXP (x, 0)) == SYMBOL_REF ++ && SYMBOL_REF_P (XEXP (x, 0)) + && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) + && CONST_INT_P (XEXP (x, 1))); + i = INTVAL (XEXP (x, 1)); +@@ -2217,7 +2217,7 @@ + (match_operand 1 ""))] + "reload_completed + && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) +- && IN_RANGE (REGNO (operands[0]), 12, 15)" ++ && ! call_used_or_fixed_reg_p (REGNO (operands[0]))" + [(set (reg:SI A10_REG) + (match_dup 0)) + (call (mem:SI (reg:SI A10_REG)) +@@ -2250,7 +2250,7 @@ + (match_operand 2 "")))] + "reload_completed + && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) +- && IN_RANGE (REGNO (operands[1]), 12, 15)" ++ && ! call_used_or_fixed_reg_p (REGNO (operands[1]))" + [(set (reg:SI A10_REG) + (match_dup 1)) + (set (match_dup 0) +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0035-Fix-RTL-insn-cost-estimation-about-relaxed-MO.patch b/patches/gcc10.2/gcc-xtensa-0035-Fix-RTL-insn-cost-estimation-about-relaxed-MO.patch new file mode 100644 index 0000000..d65c44d --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0035-Fix-RTL-insn-cost-estimation-about-relaxed-MO.patch @@ -0,0 +1,56 @@ +From 48c657f23a61a41a46842b25bce4f287a56223a2 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Mon, 20 Jun 2022 01:56:16 +0900 +Subject: [PATCH 30/31] xtensa: Fix RTL insn cost estimation about relaxed MOVI + instructions + +These instructions will all be converted to L32R ones with litpool entries +by the assembler. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_is_insn_L32R_p): + Consider relaxed MOVI instructions as L32R. +--- + gcc/config/xtensa/xtensa.c | 22 ++++++++++++++-------- + 1 file changed, 14 insertions(+), 8 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 595c5f96f..b92ec9caa 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4322,17 +4322,23 @@ xtensa_is_insn_L32R_p (const rtx_insn *insn) + { + rtx x = PATTERN (insn); + +- if (GET_CODE (x) == SET) ++ if (GET_CODE (x) != SET) ++ return false; ++ ++ x = XEXP (x, 1); ++ if (MEM_P (x)) + { +- x = SET_SRC (x); +- if (MEM_P (x)) +- { +- x = XEXP (x, 0); +- return (SYMBOL_REF_P (x) || CONST_INT_P (x)) +- && CONSTANT_POOL_ADDRESS_P (x); +- } ++ x = XEXP (x, 0); ++ return (SYMBOL_REF_P (x) || CONST_INT_P (x)) ++ && CONSTANT_POOL_ADDRESS_P (x); + } + ++ /* relaxed MOVI instructions, that will be converted to L32R by the ++ assembler. */ ++ if (CONST_INT_P (x) ++ && ! xtensa_simm12b (INTVAL (x))) ++ return true; ++ + return false; + } + +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0036-Fix-buffer-overflow.patch b/patches/gcc10.2/gcc-xtensa-0036-Fix-buffer-overflow.patch new file mode 100644 index 0000000..35f9f10 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0036-Fix-buffer-overflow.patch @@ -0,0 +1,33 @@ +From 75c341c7de5c6f325d6ded7bd91d77793fe358d5 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 22 Jun 2022 04:04:45 +0900 +Subject: [PATCH 31/31] xtensa: Fix buffer overflow + +Fortify buffer overflow message reported. +(see https://github.com/earlephilhower/esp-quick-toolchain/issues/36) + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (bswapsi2_internal): + Enlarge the buffer that is obviously smaller than the template + string given to sprintf(). +--- + gcc/config/xtensa/xtensa.md | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 6f51a5357..81b016859 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -536,7 +536,7 @@ + { + rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn); + const char *init = "ssai\t8\;"; +- static char result[64]; ++ static char result[128]; + if (prev_insn && NONJUMP_INSN_P (prev_insn)) + { + rtx x = PATTERN (prev_insn); +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0037-Optimize-integer-constant-addition-that-is-be.patch b/patches/gcc10.2/gcc-xtensa-0037-Optimize-integer-constant-addition-that-is-be.patch new file mode 100644 index 0000000..0ea6d48 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0037-Optimize-integer-constant-addition-that-is-be.patch @@ -0,0 +1,95 @@ +From 9308911796a46bd689bbcc1cedef1b63ae9b871e Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 26 Jun 2022 14:07:56 +0900 +Subject: [PATCH] xtensa: Optimize integer constant addition that is + between -32896 and 32639 + +Such constants are often subject to the constant synthesis: + + int test(int a) { + return a - 31999; + } + + test: + movi a3, 1 + addmi a3, a3, -0x7d00 + add a2, a2, a3 + ret + +This patch optimizes such case as follows: + + test: + addi a2, a2, 1 + addmi a2, a2, -0x7d00 + ret + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: + Suppress unnecessary emitting nop insn in the split patterns for + integer/FP constant synthesis, and add new peephole2 pattern that + folds such synthesized additions. +--- + gcc/config/xtensa/xtensa.md | 35 +++++++++++++++++++++++++++++++++++ + 1 file changed, 35 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 81b016859..b697e16db 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1036,6 +1036,7 @@ + FAIL; + if (! xtensa_constantsynth (operands[0], INTVAL (x))) + emit_move_insn (operands[0], x); ++ DONE; + }) + + ;; 16-bit Integer moves +@@ -1277,6 +1278,7 @@ + x = gen_rtx_REG (SImode, REGNO (operands[0])); + if (! xtensa_constantsynth (x, l[i])) + emit_move_insn (x, GEN_INT (l[i])); ++ DONE; + }) + + ;; 64-bit floating point moves +@@ -2813,3 +2815,36 @@ + && REGNO (x) == regno + REG_NREGS (operands[0]) / 2)) + FAIL; + }) ++ ++(define_peephole2 ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operand:SI 1 "const_int_operand")) ++ (set (match_dup 0) ++ (plus:SI (match_dup 0) ++ (match_operand:SI 2 "const_int_operand"))) ++ (set (match_operand:SI 3 "register_operand") ++ (plus:SI (match_operand:SI 4 "register_operand") ++ (match_dup 0)))] ++ "IN_RANGE (INTVAL (operands[1]) + INTVAL (operands[2]), ++ (-128 - 32768), (127 + 32512)) ++ && REGNO (operands[0]) != REGNO (operands[3]) ++ && REGNO (operands[0]) != REGNO (operands[4]) ++ && peep2_reg_dead_p (3, operands[0])" ++ [(set (match_dup 3) ++ (plus:SI (match_dup 4) ++ (match_dup 1))) ++ (set (match_dup 3) ++ (plus:SI (match_dup 3) ++ (match_dup 2)))] ++{ ++ HOST_WIDE_INT value = INTVAL (operands[1]) + INTVAL (operands[2]); ++ int imm0, imm1; ++ value += 128; ++ if (value > 32512) ++ imm1 = 32512; ++ else ++ imm1 = value & ~255; ++ imm0 = value - imm1 - 128; ++ operands[1] = GEN_INT (imm0); ++ operands[2] = GEN_INT (imm1); ++}) +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0038-Minor-fix-for-FP-constant-synthesis.patch b/patches/gcc10.2/gcc-xtensa-0038-Minor-fix-for-FP-constant-synthesis.patch new file mode 100644 index 0000000..8fc23d8 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0038-Minor-fix-for-FP-constant-synthesis.patch @@ -0,0 +1,92 @@ +From 7bed998154345cb072cd425b5d61734d3e0bac5d Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 1 Jul 2022 13:39:34 +0900 +Subject: [PATCH] xtensa: Minor fix for FP constant synthesis + +This patch fixes an non-fatal issue about negative constant values derived +from FP constant synthesis on hosts whose 'long' is wider than 'int32_t'. + +And also replaces the dedicated code in FP constant synthesis split +pattern with the appropriate existing function call. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: + In FP constant synthesis split pattern, subcontract to + avoid_constant_pool_reference() as in the case of integer, + because it can handle well too. And cast to int32_t before + calling xtensa_constantsynth() in order to ignore upper 32-bit. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/constsynth_double.c: + Modify in order to catch the issue. +--- + gcc/config/xtensa/xtensa.md | 35 +++++-------------- + .../gcc.target/xtensa/constsynth_double.c | 2 +- + 2 files changed, 9 insertions(+), 28 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index b697e16db..6ef84b4f2 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1249,35 +1249,16 @@ + "! optimize_debug && reload_completed" + [(const_int 0)] + { +- int i = 0; +- rtx x = XEXP (operands[1], 0); +- long l[2]; +- if (SYMBOL_REF_P (x) +- && CONSTANT_POOL_ADDRESS_P (x)) +- x = get_pool_constant (x); +- else if (GET_CODE (x) == CONST) +- { +- x = XEXP (x, 0); +- gcc_assert (GET_CODE (x) == PLUS +- && SYMBOL_REF_P (XEXP (x, 0)) +- && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) +- && CONST_INT_P (XEXP (x, 1))); +- i = INTVAL (XEXP (x, 1)); +- gcc_assert (i == 0 || i == 4); +- i /= 4; +- x = get_pool_constant (XEXP (x, 0)); +- } +- else +- gcc_unreachable (); +- if (GET_MODE (x) == SFmode) +- REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l[0]); +- else if (GET_MODE (x) == DFmode) +- REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); +- else ++ rtx x = avoid_constant_pool_reference (operands[1]); ++ long l; ++ HOST_WIDE_INT value; ++ if (! CONST_DOUBLE_P (x) || GET_MODE (x) != SFmode) + FAIL; ++ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); + x = gen_rtx_REG (SImode, REGNO (operands[0])); +- if (! xtensa_constantsynth (x, l[i])) +- emit_move_insn (x, GEN_INT (l[i])); ++ value = (int32_t)l; ++ if (! xtensa_constantsynth (x, value)) ++ emit_move_insn (x, GEN_INT (value)); + DONE; + }) + +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +index 890ca5047..5fba6a986 100644 +--- a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +@@ -5,7 +5,7 @@ void test(unsigned int count, double array[]) + { + unsigned int i; + for (i = 0; i < count; ++i) +- array[i] = 1.0; ++ array[i] = 8.988474246316506e+307; + } + + /* { dg-final { scan-assembler-not "l32r" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0041-constantsynth-Make-try-to-find-shorter-instru.patch b/patches/gcc10.2/gcc-xtensa-0041-constantsynth-Make-try-to-find-shorter-instru.patch new file mode 100644 index 0000000..fcb3c72 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0041-constantsynth-Make-try-to-find-shorter-instru.patch @@ -0,0 +1,132 @@ +From afcf727f9c4174b104b594cbd14cba9c57de71d1 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 15 Jul 2022 08:46:55 +0900 +Subject: [PATCH] xtensa: constantsynth: Make try to find shorter + instruction + +This patch allows the constant synthesis to choose shorter instruction +if possible. + + /* example */ + int test(void) { + return 128 << 8; + } + + ;; before + test: + movi a2, 0x100 + addmi a2, a2, 0x7f00 + ret.n + + ;; after + test: + movi.n a2, 1 + slli a2, a2, 15 + ret.n + +When the Code Density Option is configured, the latter is one byte smaller +than the former. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_emit_constantsynth): Remove. + (xtensa_constantsynth_2insn): Change to try all three synthetic + methods and to use the one that fits the immediate value of + the seed into a Narrow Move Immediate instruction "MOVI.N" + when the Code Density Option is configured. +--- + gcc/config/xtensa/xtensa.c | 58 +++++++++++++++++++------------------- + 1 file changed, 29 insertions(+), 29 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index b92ec9caa..a5330e52b 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1026,35 +1026,35 @@ xtensa_split_operand_pair (rtx operands[4], machine_mode mode) + load-immediate / arithmetic ones, instead of a L32R instruction + (plus a constant in litpool). */ + +-static void +-xtensa_emit_constantsynth (rtx dst, enum rtx_code code, +- HOST_WIDE_INT imm0, HOST_WIDE_INT imm1, +- rtx (*gen_op)(rtx, HOST_WIDE_INT), +- HOST_WIDE_INT imm2) +-{ +- gcc_assert (REG_P (dst)); +- emit_move_insn (dst, GEN_INT (imm0)); +- emit_move_insn (dst, gen_rtx_fmt_ee (code, SImode, +- dst, GEN_INT (imm1))); +- if (gen_op) +- emit_move_insn (dst, gen_op (dst, imm2)); +-} +- + static int + xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, + rtx (*gen_op)(rtx, HOST_WIDE_INT), + HOST_WIDE_INT op_imm) + { +- int shift = exact_log2 (srcval + 1); ++ HOST_WIDE_INT imm = INT_MAX; ++ rtx x = NULL_RTX; ++ int shift; + ++ gcc_assert (REG_P (dst)); ++ ++ shift = exact_log2 (srcval + 1); + if (IN_RANGE (shift, 1, 31)) + { +- xtensa_emit_constantsynth (dst, LSHIFTRT, -1, 32 - shift, +- gen_op, op_imm); +- return 1; ++ imm = -1; ++ x = gen_lshrsi3 (dst, dst, GEN_INT (32 - shift)); + } + +- if (IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) ++ ++ shift = ctz_hwi (srcval); ++ if ((!x || (TARGET_DENSITY && ! IN_RANGE (imm, -32, 95))) ++ && xtensa_simm12b (srcval >> shift)) ++ { ++ imm = srcval >> shift; ++ x = gen_ashlsi3 (dst, dst, GEN_INT (shift)); ++ } ++ ++ if ((!x || (TARGET_DENSITY && ! IN_RANGE (imm, -32, 95))) ++ && IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) + { + HOST_WIDE_INT imm0, imm1; + +@@ -1067,19 +1067,19 @@ xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, + imm0 = srcval - imm1; + if (TARGET_DENSITY && imm1 < 32512 && IN_RANGE (imm0, 224, 255)) + imm0 -= 256, imm1 += 256; +- xtensa_emit_constantsynth (dst, PLUS, imm0, imm1, gen_op, op_imm); +- return 1; ++ imm = imm0; ++ x = gen_addsi3 (dst, dst, GEN_INT (imm1)); + } + +- shift = ctz_hwi (srcval); +- if (xtensa_simm12b (srcval >> shift)) +- { +- xtensa_emit_constantsynth (dst, ASHIFT, srcval >> shift, shift, +- gen_op, op_imm); +- return 1; +- } ++ if (!x) ++ return 0; + +- return 0; ++ emit_move_insn (dst, GEN_INT (imm)); ++ emit_insn (x); ++ if (gen_op) ++ emit_move_insn (dst, gen_op (dst, op_imm)); ++ ++ return 1; + } + + static rtx +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0042-Optimize-bitwise-AND-with-imm1-followed-by-br.patch b/patches/gcc10.2/gcc-xtensa-0042-Optimize-bitwise-AND-with-imm1-followed-by-br.patch new file mode 100644 index 0000000..acf6d99 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0042-Optimize-bitwise-AND-with-imm1-followed-by-br.patch @@ -0,0 +1,177 @@ +From 5776497b68fcce6bf31835cf0a4d693e336bb2ca Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 14 Jul 2022 20:47:46 +0900 +Subject: [PATCH] xtensa: Optimize "bitwise AND with imm1" followed by + "branch if (not) equal to imm2" + +This patch enhances the effectiveness of the previously posted one: +"xtensa: Optimize bitwise AND operation with some specific forms of constants". + + /* example */ + extern void foo(int); + void test(int a) { + if ((a & (-1U << 8)) == (128 << 8)) /* 0 or one of "b4const" */ + foo(a); + } + + ;; before + .global test + test: + movi a3, -0x100 + movi.n a4, 1 + and a3, a2, a3 + slli a4, a4, 15 + bne a3, a4, .L3 + j.l foo, a9 + .L1: + ret.n + + ;; after + .global test + test: + srli a3, a2, 8 + bnei a3, 128, .L1 + j.l foo, a9 + .L1: + ret.n + +gcc/ChangeLog: + + * config/xtensa/xtensa.md + (*masktrue_const_pow2_minus_one, *masktrue_const_negative_pow2, + *masktrue_const_shifted_mask): If the immediate for bitwise AND is + represented as '-(1 << N)', decrease the lower bound of N from 12 + to 1. And the other immediate for conditional branch is now no + longer limited to zero, but also one of some positive integers. + Finally, remove the checks of some conditions, because the comparison + expressions that don't satisfy such checks are determined as + compile-time constants and thus will be optimized away before + RTL expansion. +--- + gcc/config/xtensa/xtensa.md | 73 ++++++++++++++++++++++--------------- + 1 file changed, 44 insertions(+), 29 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 6ef84b4f2..ca8b3913d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1721,63 +1721,78 @@ + + (define_insn_and_split "*masktrue_const_pow2_minus_one" + [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" ++ (if_then_else (match_operator 4 "boolean_operator" + [(and:SI (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "const_int_operand" "i")) +- (const_int 0)]) +- (label_ref (match_operand 2 "" "")) ++ (match_operand:SI 2 "const_int_operand" "i")]) ++ (label_ref (match_operand 3 "" "")) + (pc)))] +- "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31)" ++ "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31) ++ /* && (~INTVAL (operands[1]) & INTVAL (operands[2])) == 0 // can be omitted */ ++ && xtensa_b4const_or_zero (INTVAL (operands[2]) << (32 - floor_log2 (INTVAL (operands[1]) + 1)))" + "#" + "&& can_create_pseudo_p ()" +- [(set (match_dup 4) ++ [(set (match_dup 5) + (ashift:SI (match_dup 0) + (match_dup 1))) + (set (pc) +- (if_then_else (match_op_dup 3 +- [(match_dup 4) +- (const_int 0)]) +- (label_ref (match_dup 2)) ++ (if_then_else (match_op_dup 4 ++ [(match_dup 5) ++ (match_dup 2)]) ++ (label_ref (match_dup 3)) + (pc)))] + { +- operands[1] = GEN_INT (32 - floor_log2 (INTVAL (operands[1]) + 1)); +- operands[4] = gen_reg_rtx (SImode); ++ int shift = 32 - floor_log2 (INTVAL (operands[1]) + 1); ++ operands[1] = GEN_INT (shift); ++ operands[2] = GEN_INT (INTVAL (operands[2]) << shift); ++ operands[5] = gen_reg_rtx (SImode); + } + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set (attr "length") +- (if_then_else (match_test "TARGET_DENSITY +- && INTVAL (operands[1]) == 0x7FFFFFFF") +- (const_int 5) +- (const_int 6)))]) ++ (if_then_else (match_test "(TARGET_DENSITY && INTVAL (operands[1]) == 0x7FFFFFFF) ++ && INTVAL (operands[2]) == 0") ++ (const_int 4) ++ (if_then_else (match_test "TARGET_DENSITY ++ && (INTVAL (operands[1]) == 0x7FFFFFFF ++ || INTVAL (operands[2]) == 0)") ++ (const_int 5) ++ (const_int 6))))]) + + (define_insn_and_split "*masktrue_const_negative_pow2" + [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" ++ (if_then_else (match_operator 4 "boolean_operator" + [(and:SI (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "const_int_operand" "i")) +- (const_int 0)]) +- (label_ref (match_operand 2 "" "")) ++ (match_operand:SI 2 "const_int_operand" "i")]) ++ (label_ref (match_operand 3 "" "")) + (pc)))] +- "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 12, 30)" ++ "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 1, 30) ++ /* && (~INTVAL (operands[1]) & INTVAL (operands[2])) == 0 // can be omitted */ ++ && xtensa_b4const_or_zero (INTVAL (operands[2]) >> floor_log2 (-INTVAL (operands[1])))" + "#" + "&& can_create_pseudo_p ()" +- [(set (match_dup 4) ++ [(set (match_dup 5) + (lshiftrt:SI (match_dup 0) + (match_dup 1))) + (set (pc) +- (if_then_else (match_op_dup 3 +- [(match_dup 4) +- (const_int 0)]) +- (label_ref (match_dup 2)) ++ (if_then_else (match_op_dup 4 ++ [(match_dup 5) ++ (match_dup 2)]) ++ (label_ref (match_dup 3)) + (pc)))] + { +- operands[1] = GEN_INT (floor_log2 (-INTVAL (operands[1]))); +- operands[4] = gen_reg_rtx (SImode); ++ int shift = floor_log2 (-INTVAL (operands[1])); ++ operands[1] = GEN_INT (shift); ++ operands[2] = GEN_INT (INTVAL (operands[2]) >> shift); ++ operands[5] = gen_reg_rtx (SImode); + } + [(set_attr "type" "jump") + (set_attr "mode" "none") +- (set_attr "length" "6")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY && INTVAL (operands[2]) == 0") ++ (const_int 5) ++ (const_int 6)))]) + + (define_insn_and_split "*masktrue_const_shifted_mask" + [(set (pc) +@@ -1787,8 +1802,8 @@ + (match_operand:SI 2 "const_int_operand" "i")]) + (label_ref (match_operand 3 "" "")) + (pc)))] +- "(INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 +- && xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" ++ "/* (INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 // can be omitted ++ && */ xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" + "#" + "&& can_create_pseudo_p ()" + [(set (match_dup 6) +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-Improve-initialization-of-objects-when-the-initializ.patch b/patches/gcc10.3/gcc-Improve-initialization-of-objects-when-the-initializ.patch new file mode 100644 index 0000000..00fdb45 --- /dev/null +++ b/patches/gcc10.3/gcc-Improve-initialization-of-objects-when-the-initializ.patch @@ -0,0 +1,39 @@ +From a2cde0c6443c440c2a2b72b5eea060229a0cff57 Mon Sep 17 00:00:00 2001 +From: Jeff Law +Date: Sat, 9 Jul 2022 11:11:00 -0400 +Subject: [PATCH] [RFA] Improve initialization of objects when the initializer + +gcc/ + + * expr.c (store_expr): Identify trailing NULs in a STRING_CST + initializer and use clear_storage rather than copying the + NULs to the destination array. +--- + gcc/expr.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/gcc/expr.c b/gcc/expr.c +index 991b26f33..6ff393462 100644 +--- a/gcc/expr.c ++++ b/gcc/expr.c +@@ -5723,6 +5723,17 @@ store_expr (tree exp, rtx target, int call_param_p, + } + + str_copy_len = TREE_STRING_LENGTH (str); ++ ++ /* Trailing NUL bytes in EXP will be handled by the call to ++ clear_storage, which is more efficient than copying them from ++ the STRING_CST, so trim those from STR_COPY_LEN. */ ++ while (str_copy_len) ++ { ++ if (TREE_STRING_POINTER (str)[str_copy_len - 1]) ++ break; ++ str_copy_len--; ++ } ++ + if ((STORE_MAX_PIECES & (STORE_MAX_PIECES - 1)) == 0) + { + str_copy_len += STORE_MAX_PIECES - 1; +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0006-Fix-non-robust-split-condition-in-define_insn.patch b/patches/gcc10.3/gcc-xtensa-0006-Fix-non-robust-split-condition-in-define_insn.patch new file mode 100644 index 0000000..4c5418f --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0006-Fix-non-robust-split-condition-in-define_insn.patch @@ -0,0 +1,44 @@ +From 2065a3fccb11e28ebcc42aa46c52a40b0fae9bea Mon Sep 17 00:00:00 2001 +From: Kewen Lin +Date: Sun, 21 Nov 2021 20:18:31 -0600 +Subject: [PATCH 01/31] xtensa: Fix non-robust split condition in + define_insn_and_split + +This patch is to fix some non-robust split conditions in some +define_insn_and_splits, to make each of them applied on top of +the corresponding condition for define_insn part, otherwise the +splitting could perform unexpectedly. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (movdi_internal, movdf_internal): Fix split + condition. +--- + gcc/config/xtensa/xtensa.md | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2a8e59ee9..123916957 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -782,7 +782,7 @@ + "register_operand (operands[0], DImode) + || register_operand (operands[1], DImode)" + "#" +- "reload_completed" ++ "&& reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))] + { +@@ -1058,7 +1058,7 @@ + "register_operand (operands[0], DFmode) + || register_operand (operands[1], DFmode)" + "#" +- "reload_completed" ++ "&& reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))] + { +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch b/patches/gcc10.3/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch deleted file mode 100644 index 336b961..0000000 --- a/patches/gcc10.3/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch +++ /dev/null @@ -1,29 +0,0 @@ -From f1568d0597ffd3027eebefc2cf31646ab5d5ca19 Mon Sep 17 00:00:00 2001 -From: Takayuki 'January June' Suwa -Date: Sun, 19 Dec 2021 22:44:03 +0900 -Subject: [PATCH] gcc: xtensa: make trying to replace 'l32r' with 'movi' + - 'slli' regardless of optimizing for size or not, because 'l32r' is much - slower than the latter on ESP8266 - ---- - gcc/config/xtensa/xtensa.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c -index 37c6ac1fd..6cd9d5528 100644 ---- a/gcc/config/xtensa/xtensa.c -+++ b/gcc/config/xtensa/xtensa.c -@@ -1074,8 +1074,8 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) - { - /* Try to emit MOVI + SLLI sequence, that is smaller - than L32R + literal. */ -- if (optimize_size && mode == SImode && CONST_INT_P (src) -- && register_operand (dst, mode)) -+ if (optimize >= 1 && ! optimize_debug && mode == SImode -+ && CONST_INT_P (src) && register_operand (dst, mode)) - { - HOST_WIDE_INT srcval = INTVAL (src); - int shift = ctz_hwi (srcval); --- -2.20.1 - diff --git a/patches/gcc10.3/gcc-xtensa-0007-Backport-patches-from-upstream-master.patch b/patches/gcc10.3/gcc-xtensa-0007-Backport-patches-from-upstream-master.patch deleted file mode 100644 index 9f8e00b..0000000 --- a/patches/gcc10.3/gcc-xtensa-0007-Backport-patches-from-upstream-master.patch +++ /dev/null @@ -1,3186 +0,0 @@ -From 989fc2c516206d7cf70177a416815f91998e2131 Mon Sep 17 00:00:00 2001 -From: Takayuki 'January June' Suwa -Date: Fri, 27 May 2022 21:34:37 +0900 -Subject: [PATCH 1/3] xtensa: Backport patches from upstream/master - -2b5b8610e985e23a0c2e0272339ab074a750e240 "xtensa: Fix non-robust split condition in define_insn_and_split" -7e5baa7e6f4caced6bdaef6d866d19e7656d8a16 "xtensa: fix -Wformat-diag warnings." -d543bac1631700f0da30d5ca555296f4938a82c6 "xtensa: Rename deprecated extv/extzv insn patterns to extvsi/extzvsi" -112447f8564c0307c5da99a4094a3a99f204239f "xtensa: Reflect the 32-bit Integer Divide Option" -b753405a5f0d45eea97f4cc7df2c2089401b08bf "xtensa: Simplify EXTUI instruction maskimm validations" -9b251fe2e39a49c0d3ecd34cf8c5d55544efd159 "xtensa: Make use of IN_RANGE macro where appropriate" -3397563ad6c8fc5d9675faf507e52dd2ed284202 "xtensa: Fix instruction counting regarding block move expansion" -6454b4a8f5d90dd355c3c7e31a592a439223b645 "xtensa: Add setmemsi insn pattern" -9aad2b22436d5346fa224e5c14439dcef36cf3dd "xtensa: Improve bswap[sd]i2 insn patterns" -e94c6dbfb57a862dd8a8685eabc4886ad1aaea25 "xtensa: fix PR target/105879" -2fcc69d8ce4eddf6dea878a5383254d366e1bb14 "xtensa: Implement bswaphi2 insn pattern" -9777d446e2148ef9a6e9f35db3f4eab99ee8812c "xtensa: Make one_cmplsi2 optimizer-friendly" -e44e7face13f38f9b228e2619786ba0add9ef77b "xtensa: Optimize '(~x & y)' to '((x & y) ^ y)'" -29dc90a580bf45f503ed89eb1dc63b5676db776b "xtensa: Add clrsbsi2 insn pattern" -9489a1ab05ad1bda7126da5513f08282da3e531d "xtensa: Tweak some widen multiplications" -fddf0e1057fe24eff0d894fbc2959b4086464a96 "xtensa: Consider the Loop Option when setmemsi is expanded to small loop" -ccd02e734e0f1742629403b46e5b1c650b00fd65 "xtensa: Improve instruction cost estimation and suggestion" -cd02f15f1aecc45b2c2feae16840503549508619 "xtensa: Improve constant synthesis for both integer and floating-point" -1c68ec1f8ab531fba56cccf549ffe592bf622821 "xtensa: Improve shift operations more" -e1b193c1cce3a975a9ed60dd0f30182fe0255d7c "xtensa: Simplify conditional branch/move insn patterns" -70ce04ca353bb0cda8321b91a77c2477e26d339b "xtensa: Make use of BALL/BNALL instructions" -077438933cf94f00cc5edf974338c11ba4bf7a39 "xtensa: Optimize bitwise AND operation with some specific forms of constants" -96518f714e3fab53a966a05b8d48011e27c1a718 "xtensa: Document new -mextra-l32r-costs= Xtensa-specific option" -43b0c56fda4bc990e8ee8d6a0b376de7b663bb06 "xtensa: Add support for sibling call optimization" -c95e307e3a978166cd5d6817ec9d8293825ff3fb "xtensa: Add some dedicated patterns that correspond to GIMPLE canonicalizations" -cfad4856fa46abc878934a9433d0bfc2482ccf00 "xtensa: Eliminate unwanted reg-reg moves during DFmode input reloads" -ce3867d414bd7d9e5b6fb2a51b1fb3d9e9e1eae9 "xtensa: Eliminate [DS]Cmode hard register clobber that is immediately followed by whole overwrite the register" -479b6f449ee999501ad6eff0b7db8d0cd5b2d28d "xtensa: Defer storing integer constants into litpool until reload" ---- - gcc/config/xtensa/constraints.md | 10 +- - gcc/config/xtensa/predicates.md | 41 +- - gcc/config/xtensa/xtensa-protos.h | 11 +- - gcc/config/xtensa/xtensa.c | 733 +++++++++--- - gcc/config/xtensa/xtensa.h | 7 +- - gcc/config/xtensa/xtensa.md | 1024 +++++++++++++---- - gcc/config/xtensa/xtensa.opt | 6 +- - gcc/doc/invoke.texi | 11 +- - gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c | 33 + - gcc/testsuite/gcc.target/xtensa/bswap-O1.c | 37 + - gcc/testsuite/gcc.target/xtensa/bswap-O2.c | 37 + - gcc/testsuite/gcc.target/xtensa/bswap-Os.c | 37 + - .../gcc.target/xtensa/check_zero_byte.c | 9 + - .../gcc.target/xtensa/constsynth_2insns.c | 44 + - .../gcc.target/xtensa/constsynth_3insns.c | 24 + - .../gcc.target/xtensa/constsynth_double.c | 11 + - .../gcc.target/xtensa/funnel_shifter.c | 17 + - .../gcc.target/xtensa/one_cmpl_abs.c | 9 + - gcc/testsuite/gcc.target/xtensa/sibcalls.c | 20 + - libgcc/config/xtensa/lib1funcs.S | 23 + - libgcc/config/xtensa/t-xtensa | 2 +- - 21 files changed, 1796 insertions(+), 350 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O1.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O2.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-Os.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/check_zero_byte.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_double.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/funnel_shifter.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/sibcalls.c - -diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md -index 2062c8816..13b3daafc 100644 ---- a/gcc/config/xtensa/constraints.md -+++ b/gcc/config/xtensa/constraints.md -@@ -92,7 +92,7 @@ - "An integer constant in the range @minus{}32-95 for use with MOVI.N - instructions." - (and (match_code "const_int") -- (match_test "ival >= -32 && ival <= 95"))) -+ (match_test "IN_RANGE (ival, -32, 95)"))) - - (define_constraint "N" - "An unsigned 8-bit integer constant shifted left by 8 bits for use -@@ -103,7 +103,7 @@ - (define_constraint "O" - "An integer constant that can be used in ADDI.N instructions." - (and (match_code "const_int") -- (match_test "ival == -1 || (ival >= 1 && ival <= 15)"))) -+ (match_test "ival == -1 || IN_RANGE (ival, 1, 15)"))) - - (define_constraint "P" - "An integer constant that can be used as a mask value in an EXTUI -@@ -113,8 +113,10 @@ - - (define_constraint "Y" - "A constant that can be used in relaxed MOVI instructions." -- (and (match_code "const_int,const_double,const,symbol_ref,label_ref") -- (match_test "TARGET_AUTO_LITPOOLS"))) -+ (ior (and (match_code "const_int,const_double,const,symbol_ref,label_ref") -+ (match_test "TARGET_AUTO_LITPOOLS")) -+ (and (match_code "const_int") -+ (match_test "can_create_pseudo_p ()")))) - - ;; Memory constraints. Do not use define_memory_constraint here. Doing so - ;; causes reload to force some constants into the constant pool, but since -diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md -index eb52b05aa..633cc6264 100644 ---- a/gcc/config/xtensa/predicates.md -+++ b/gcc/config/xtensa/predicates.md -@@ -25,8 +25,7 @@ - - (define_predicate "addsubx_operand" - (and (match_code "const_int") -- (match_test "INTVAL (op) >= 1 -- && INTVAL (op) <= 3"))) -+ (match_test "IN_RANGE (INTVAL (op), 1, 3)"))) - - (define_predicate "arith_operand" - (ior (and (match_code "const_int") -@@ -53,9 +52,19 @@ - (match_test "xtensa_mask_immediate (INTVAL (op))")) - (match_operand 0 "register_operand"))) - -+(define_predicate "shifted_mask_operand" -+ (match_code "const_int") -+{ -+ HOST_WIDE_INT mask = INTVAL (op); -+ int shift = ctz_hwi (mask); -+ -+ return IN_RANGE (shift, 1, 31) -+ && xtensa_mask_immediate ((uint32_t)mask >> shift); -+}) -+ - (define_predicate "extui_fldsz_operand" - (and (match_code "const_int") -- (match_test "xtensa_mask_immediate ((1 << INTVAL (op)) - 1)"))) -+ (match_test "IN_RANGE (INTVAL (op), 1, 16)"))) - - (define_predicate "sext_operand" - (if_then_else (match_test "TARGET_SEXT") -@@ -64,7 +73,7 @@ - - (define_predicate "sext_fldsz_operand" - (and (match_code "const_int") -- (match_test "INTVAL (op) >= 8 && INTVAL (op) <= 23"))) -+ (match_test "IN_RANGE (INTVAL (op), 8, 23)"))) - - (define_predicate "lsbitnum_operand" - (and (match_code "const_int") -@@ -138,8 +147,9 @@ - (match_test "!constantpool_mem_p (op) - || GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0"))) - (ior (and (match_code "const_int") -- (match_test "GET_MODE_CLASS (mode) == MODE_INT -- && xtensa_simm12b (INTVAL (op))")) -+ (match_test "(GET_MODE_CLASS (mode) == MODE_INT -+ && xtensa_simm12b (INTVAL (op))) -+ || can_create_pseudo_p ()")) - (and (match_code "const_int,const_double,const,symbol_ref,label_ref") - (match_test "(TARGET_CONST16 || TARGET_AUTO_LITPOOLS) - && CONSTANT_P (op) -@@ -156,6 +166,19 @@ - (and (match_code "const_int") - (match_test "xtensa_mem_offset (INTVAL (op), SFmode)"))) - -+(define_predicate "reload_operand" -+ (match_code "mem") -+{ -+ const_rtx addr = XEXP (op, 0); -+ if (REG_P (addr)) -+ return REGNO (addr) == A1_REG; -+ if (GET_CODE (addr) == PLUS) -+ return REG_P (XEXP (addr, 0)) -+ && REGNO (XEXP (addr, 0)) == A1_REG -+ && CONST_INT_P (XEXP (addr, 1)); -+ return false; -+}) -+ - (define_predicate "branch_operator" - (match_code "eq,ne,lt,ge")) - -@@ -165,9 +188,15 @@ - (define_predicate "boolean_operator" - (match_code "eq,ne")) - -+(define_predicate "logical_shift_operator" -+ (match_code "ashift,lshiftrt")) -+ - (define_predicate "xtensa_cstoresi_operator" - (match_code "eq,ne,gt,ge,lt,le")) - -+(define_predicate "xtensa_shift_per_byte_operator" -+ (match_code "ashift,ashiftrt,lshiftrt")) -+ - (define_predicate "tls_symbol_operand" - (and (match_code "symbol_ref") - (match_test "SYMBOL_REF_TLS_MODEL (op) != 0"))) -diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h -index 18d803581..75ed3bfb0 100644 ---- a/gcc/config/xtensa/xtensa-protos.h -+++ b/gcc/config/xtensa/xtensa-protos.h -@@ -41,18 +41,23 @@ extern void xtensa_expand_conditional_branch (rtx *, machine_mode); - extern int xtensa_expand_conditional_move (rtx *, int); - extern int xtensa_expand_scc (rtx *, machine_mode); - extern int xtensa_expand_block_move (rtx *); -+extern int xtensa_expand_block_set_unrolled_loop (rtx *); -+extern int xtensa_expand_block_set_small_loop (rtx *); - extern void xtensa_split_operand_pair (rtx *, machine_mode); -+extern int xtensa_constantsynth (rtx, HOST_WIDE_INT); - extern int xtensa_emit_move_sequence (rtx *, machine_mode); - extern rtx xtensa_copy_incoming_a7 (rtx); - extern void xtensa_expand_nonlocal_goto (rtx *); - extern void xtensa_expand_compare_and_swap (rtx, rtx, rtx, rtx); - extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool); - extern void xtensa_emit_loop_end (rtx_insn *, rtx *); --extern char *xtensa_emit_branch (bool, bool, rtx *); --extern char *xtensa_emit_bit_branch (bool, bool, rtx *); -+extern char *xtensa_emit_branch (bool, rtx *); - extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); -+extern void xtensa_prepare_expand_call (int, rtx *); - extern char *xtensa_emit_call (int, rtx *); -+extern char *xtensa_emit_sibcall (int, rtx *); - extern bool xtensa_tls_referenced_p (rtx); -+extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); - - #ifdef TREE_CODE - extern void init_cumulative_args (CUMULATIVE_ARGS *, int); -@@ -70,7 +75,7 @@ extern int xtensa_dbx_register_number (int); - extern long compute_frame_size (poly_int64); - extern bool xtensa_use_return_instruction_p (void); - extern void xtensa_expand_prologue (void); --extern void xtensa_expand_epilogue (void); -+extern void xtensa_expand_epilogue (bool); - extern void order_regs_for_local_alloc (void); - extern enum reg_class xtensa_regno_to_class (int regno); - extern HOST_WIDE_INT xtensa_initial_elimination_offset (int from, int to); -diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c -index 6cd9d5528..5b1aa9b23 100644 ---- a/gcc/config/xtensa/xtensa.c -+++ b/gcc/config/xtensa/xtensa.c -@@ -55,6 +55,7 @@ along with GCC; see the file COPYING3. If not see - #include "dumpfile.h" - #include "hw-doloop.h" - #include "rtl-iter.h" -+#include "insn-attr.h" - - /* This file should be included last. */ - #include "target-def.h" -@@ -117,7 +118,7 @@ const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] = - - static void xtensa_option_override (void); - static enum internal_test map_test_to_internal_test (enum rtx_code); --static rtx gen_int_relational (enum rtx_code, rtx, rtx, int *); -+static rtx gen_int_relational (enum rtx_code, rtx, rtx); - static rtx gen_float_relational (enum rtx_code, rtx, rtx); - static rtx gen_conditional_move (enum rtx_code, machine_mode, rtx, rtx); - static rtx fixup_subreg_mem (rtx); -@@ -134,6 +135,7 @@ static unsigned int xtensa_multibss_section_type_flags (tree, const char *, - static section *xtensa_select_rtx_section (machine_mode, rtx, - unsigned HOST_WIDE_INT); - static bool xtensa_rtx_costs (rtx, machine_mode, int, int, int *, bool); -+static int xtensa_insn_cost (rtx_insn *, bool); - static int xtensa_register_move_cost (machine_mode, reg_class_t, - reg_class_t); - static int xtensa_memory_move_cost (machine_mode, reg_class_t, bool); -@@ -185,6 +187,7 @@ static bool xtensa_modes_tieable_p (machine_mode, machine_mode); - static HOST_WIDE_INT xtensa_constant_alignment (const_tree, HOST_WIDE_INT); - static HOST_WIDE_INT xtensa_starting_frame_offset (void); - static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); -+static bool xtensa_function_ok_for_sibcall (tree, tree); - - - -@@ -208,6 +211,8 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); - #define TARGET_MEMORY_MOVE_COST xtensa_memory_move_cost - #undef TARGET_RTX_COSTS - #define TARGET_RTX_COSTS xtensa_rtx_costs -+#undef TARGET_INSN_COST -+#define TARGET_INSN_COST xtensa_insn_cost - #undef TARGET_ADDRESS_COST - #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 - -@@ -333,6 +338,9 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); - #undef TARGET_HAVE_SPECULATION_SAFE_VALUE - #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed - -+#undef TARGET_FUNCTION_OK_FOR_SIBCALL -+#define TARGET_FUNCTION_OK_FOR_SIBCALL xtensa_function_ok_for_sibcall -+ - struct gcc_target targetm = TARGET_INITIALIZER; - - -@@ -341,42 +349,42 @@ struct gcc_target targetm = TARGET_INITIALIZER; - bool - xtensa_simm8 (HOST_WIDE_INT v) - { -- return v >= -128 && v <= 127; -+ return IN_RANGE (v, -128, 127); - } - - - bool - xtensa_simm8x256 (HOST_WIDE_INT v) - { -- return (v & 255) == 0 && (v >= -32768 && v <= 32512); -+ return (v & 255) == 0 && IN_RANGE (v, -32768, 32512); - } - - - bool - xtensa_simm12b (HOST_WIDE_INT v) - { -- return v >= -2048 && v <= 2047; -+ return IN_RANGE (v, -2048, 2047); - } - - - static bool - xtensa_uimm8 (HOST_WIDE_INT v) - { -- return v >= 0 && v <= 255; -+ return IN_RANGE (v, 0, 255); - } - - - static bool - xtensa_uimm8x2 (HOST_WIDE_INT v) - { -- return (v & 1) == 0 && (v >= 0 && v <= 510); -+ return (v & 1) == 0 && IN_RANGE (v, 0, 510); - } - - - static bool - xtensa_uimm8x4 (HOST_WIDE_INT v) - { -- return (v & 3) == 0 && (v >= 0 && v <= 1020); -+ return (v & 3) == 0 && IN_RANGE (v, 0, 1020); - } - - -@@ -446,19 +454,7 @@ xtensa_b4constu (HOST_WIDE_INT v) - bool - xtensa_mask_immediate (HOST_WIDE_INT v) - { --#define MAX_MASK_SIZE 16 -- int mask_size; -- -- for (mask_size = 1; mask_size <= MAX_MASK_SIZE; mask_size++) -- { -- if ((v & 1) == 0) -- return false; -- v = v >> 1; -- if (v == 0) -- return true; -- } -- -- return false; -+ return IN_RANGE (exact_log2 (v + 1), 1, 16); - } - - -@@ -539,7 +535,7 @@ smalloffset_mem_p (rtx op) - return FALSE; - - val = INTVAL (offset); -- return (val & 3) == 0 && (val >= 0 && val <= 60); -+ return (val & 3) == 0 && IN_RANGE (val, 0, 60); - } - } - return FALSE; -@@ -678,8 +674,7 @@ map_test_to_internal_test (enum rtx_code test_code) - static rtx - gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - rtx cmp0, /* first operand to compare */ -- rtx cmp1, /* second operand to compare */ -- int *p_invert /* whether branch needs to reverse test */) -+ rtx cmp1 /* second operand to compare */) - { - struct cmp_info - { -@@ -711,6 +706,7 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - enum internal_test test; - machine_mode mode; - struct cmp_info *p_info; -+ int invert; - - test = map_test_to_internal_test (test_code); - gcc_assert (test != ITEST_MAX); -@@ -747,9 +743,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - } - - /* See if we need to invert the result. */ -- *p_invert = ((GET_CODE (cmp1) == CONST_INT) -- ? p_info->invert_const -- : p_info->invert_reg); -+ invert = ((GET_CODE (cmp1) == CONST_INT) -+ ? p_info->invert_const -+ : p_info->invert_reg); - - /* Comparison to constants, may involve adding 1 to change a LT into LE. - Comparison between two registers, may involve switching operands. */ -@@ -766,7 +762,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - cmp1 = temp; - } - -- return gen_rtx_fmt_ee (p_info->test_code, VOIDmode, cmp0, cmp1); -+ return gen_rtx_fmt_ee (invert ? reverse_condition (p_info->test_code) -+ : p_info->test_code, -+ VOIDmode, cmp0, cmp1); - } - - -@@ -825,45 +823,33 @@ xtensa_expand_conditional_branch (rtx *operands, machine_mode mode) - enum rtx_code test_code = GET_CODE (operands[0]); - rtx cmp0 = operands[1]; - rtx cmp1 = operands[2]; -- rtx cmp; -- int invert; -- rtx label1, label2; -+ rtx cmp, label; - - switch (mode) - { -+ case E_SFmode: -+ if (TARGET_HARD_FLOAT) -+ { -+ cmp = gen_float_relational (test_code, cmp0, cmp1); -+ break; -+ } -+ /* FALLTHRU */ -+ - case E_DFmode: - default: - fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1)); - - case E_SImode: -- invert = FALSE; -- cmp = gen_int_relational (test_code, cmp0, cmp1, &invert); -- break; -- -- case E_SFmode: -- if (!TARGET_HARD_FLOAT) -- fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, -- cmp0, cmp1)); -- invert = FALSE; -- cmp = gen_float_relational (test_code, cmp0, cmp1); -+ cmp = gen_int_relational (test_code, cmp0, cmp1); - break; - } - - /* Generate the branch. */ -- -- label1 = gen_rtx_LABEL_REF (VOIDmode, operands[3]); -- label2 = pc_rtx; -- -- if (invert) -- { -- label2 = label1; -- label1 = pc_rtx; -- } -- -+ label = gen_rtx_LABEL_REF (VOIDmode, operands[3]); - emit_jump_insn (gen_rtx_SET (pc_rtx, - gen_rtx_IF_THEN_ELSE (VOIDmode, cmp, -- label1, -- label2))); -+ label, -+ pc_rtx))); - } - - -@@ -1035,6 +1021,123 @@ xtensa_split_operand_pair (rtx operands[4], machine_mode mode) - } - - -+/* Try to emit insns to load srcval (that cannot fit into signed 12-bit) -+ into dst with synthesizing a such constant value from a sequence of -+ load-immediate / arithmetic ones, instead of a L32R instruction -+ (plus a constant in litpool). */ -+ -+static void -+xtensa_emit_constantsynth (rtx dst, enum rtx_code code, -+ HOST_WIDE_INT imm0, HOST_WIDE_INT imm1, -+ rtx (*gen_op)(rtx, HOST_WIDE_INT), -+ HOST_WIDE_INT imm2) -+{ -+ gcc_assert (REG_P (dst)); -+ emit_move_insn (dst, GEN_INT (imm0)); -+ emit_move_insn (dst, gen_rtx_fmt_ee (code, SImode, -+ dst, GEN_INT (imm1))); -+ if (gen_op) -+ emit_move_insn (dst, gen_op (dst, imm2)); -+} -+ -+static int -+xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, -+ rtx (*gen_op)(rtx, HOST_WIDE_INT), -+ HOST_WIDE_INT op_imm) -+{ -+ int shift = exact_log2 (srcval + 1); -+ -+ if (IN_RANGE (shift, 1, 31)) -+ { -+ xtensa_emit_constantsynth (dst, LSHIFTRT, -1, 32 - shift, -+ gen_op, op_imm); -+ return 1; -+ } -+ -+ if (IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) -+ { -+ HOST_WIDE_INT imm0, imm1; -+ -+ if (srcval < -32768) -+ imm1 = -32768; -+ else if (srcval > 32512) -+ imm1 = 32512; -+ else -+ imm1 = srcval & ~255; -+ imm0 = srcval - imm1; -+ if (TARGET_DENSITY && imm1 < 32512 && IN_RANGE (imm0, 224, 255)) -+ imm0 -= 256, imm1 += 256; -+ xtensa_emit_constantsynth (dst, PLUS, imm0, imm1, gen_op, op_imm); -+ return 1; -+ } -+ -+ shift = ctz_hwi (srcval); -+ if (xtensa_simm12b (srcval >> shift)) -+ { -+ xtensa_emit_constantsynth (dst, ASHIFT, srcval >> shift, shift, -+ gen_op, op_imm); -+ return 1; -+ } -+ -+ return 0; -+} -+ -+static rtx -+xtensa_constantsynth_rtx_SLLI (rtx reg, HOST_WIDE_INT imm) -+{ -+ return gen_rtx_ASHIFT (SImode, reg, GEN_INT (imm)); -+} -+ -+static rtx -+xtensa_constantsynth_rtx_ADDSUBX (rtx reg, HOST_WIDE_INT imm) -+{ -+ return imm == 7 -+ ? gen_rtx_MINUS (SImode, gen_rtx_ASHIFT (SImode, reg, GEN_INT (3)), -+ reg) -+ : gen_rtx_PLUS (SImode, gen_rtx_ASHIFT (SImode, reg, -+ GEN_INT (floor_log2 (imm - 1))), -+ reg); -+} -+ -+int -+xtensa_constantsynth (rtx dst, HOST_WIDE_INT srcval) -+{ -+ /* No need for synthesizing for what fits into MOVI instruction. */ -+ if (xtensa_simm12b (srcval)) -+ return 0; -+ -+ /* 2-insns substitution. */ -+ if ((optimize_size || (optimize && xtensa_extra_l32r_costs >= 1)) -+ && xtensa_constantsynth_2insn (dst, srcval, NULL, 0)) -+ return 1; -+ -+ /* 3-insns substitution. */ -+ if (optimize > 1 && !optimize_size && xtensa_extra_l32r_costs >= 2) -+ { -+ int shift, divisor; -+ -+ /* 2-insns substitution followed by SLLI. */ -+ shift = ctz_hwi (srcval); -+ if (IN_RANGE (shift, 1, 31) && -+ xtensa_constantsynth_2insn (dst, srcval >> shift, -+ xtensa_constantsynth_rtx_SLLI, -+ shift)) -+ return 1; -+ -+ /* 2-insns substitution followed by ADDX[248] or SUBX8. */ -+ if (TARGET_ADDX) -+ for (divisor = 3; divisor <= 9; divisor += 2) -+ if (srcval % divisor == 0 && -+ xtensa_constantsynth_2insn (dst, srcval / divisor, -+ xtensa_constantsynth_rtx_ADDSUBX, -+ divisor)) -+ return 1; -+ } -+ -+ return 0; -+} -+ -+ - /* Emit insns to move operands[1] into operands[0]. - Return 1 if we have written out everything that needs to be done to - do the move. Otherwise, return 0 and the caller will emit the move -@@ -1070,24 +1173,9 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) - return 1; - } - -- if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16) -+ if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16 -+ && ! (CONST_INT_P (src) && can_create_pseudo_p ())) - { -- /* Try to emit MOVI + SLLI sequence, that is smaller -- than L32R + literal. */ -- if (optimize >= 1 && ! optimize_debug && mode == SImode -- && CONST_INT_P (src) && register_operand (dst, mode)) -- { -- HOST_WIDE_INT srcval = INTVAL (src); -- int shift = ctz_hwi (srcval); -- -- if (xtensa_simm12b (srcval >> shift)) -- { -- emit_move_insn (dst, GEN_INT (srcval >> shift)); -- emit_insn (gen_ashlsi3_internal (dst, dst, GEN_INT (shift))); -- return 1; -- } -- } -- - src = force_const_mem (SImode, src); - operands[1] = src; - } -@@ -1315,7 +1403,7 @@ xtensa_expand_block_move (rtx *operands) - move_ratio = 4; - if (optimize > 2) - move_ratio = LARGEST_MOVE_RATIO; -- num_pieces = (bytes / align) + (bytes % align); /* Close enough anyway. */ -+ num_pieces = (bytes / align) + ((bytes % align + 1) / 2); - if (num_pieces > move_ratio) - return 0; - -@@ -1352,7 +1440,7 @@ xtensa_expand_block_move (rtx *operands) - temp[next] = gen_reg_rtx (mode[next]); - - x = adjust_address (src_mem, mode[next], offset_ld); -- emit_insn (gen_rtx_SET (temp[next], x)); -+ emit_move_insn (temp[next], x); - - offset_ld += next_amount; - bytes -= next_amount; -@@ -1362,9 +1450,9 @@ xtensa_expand_block_move (rtx *operands) - if (active[phase]) - { - active[phase] = false; -- -+ - x = adjust_address (dst_mem, mode[phase], offset_st); -- emit_insn (gen_rtx_SET (x, temp[phase])); -+ emit_move_insn (x, temp[phase]); - - offset_st += amount[phase]; - } -@@ -1375,6 +1463,246 @@ xtensa_expand_block_move (rtx *operands) - } - - -+/* Try to expand a block set operation to a sequence of RTL move -+ instructions. If not optimizing, or if the block size is not a -+ constant, or if the block is too large, or if the value to -+ initialize the block with is not a constant, the expansion -+ fails and GCC falls back to calling memset(). -+ -+ operands[0] is the destination -+ operands[1] is the length -+ operands[2] is the initialization value -+ operands[3] is the alignment */ -+ -+static int -+xtensa_sizeof_MOVI (HOST_WIDE_INT imm) -+{ -+ return (TARGET_DENSITY && IN_RANGE (imm, -32, 95)) ? 2 : 3; -+} -+ -+int -+xtensa_expand_block_set_unrolled_loop (rtx *operands) -+{ -+ rtx dst_mem = operands[0]; -+ HOST_WIDE_INT bytes, value, align; -+ int expand_len, funccall_len; -+ rtx x, reg; -+ int offset; -+ -+ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) -+ return 0; -+ -+ bytes = INTVAL (operands[1]); -+ if (bytes <= 0) -+ return 0; -+ value = (int8_t)INTVAL (operands[2]); -+ align = INTVAL (operands[3]); -+ if (align > MOVE_MAX) -+ align = MOVE_MAX; -+ -+ /* Insn expansion: holding the init value. -+ Either MOV(.N) or L32R w/litpool. */ -+ if (align == 1) -+ expand_len = xtensa_sizeof_MOVI (value); -+ else if (value == 0 || value == -1) -+ expand_len = TARGET_DENSITY ? 2 : 3; -+ else -+ expand_len = 3 + 4; -+ /* Insn expansion: a series of aligned memory stores. -+ Consist of S8I, S16I or S32I(.N). */ -+ expand_len += (bytes / align) * (TARGET_DENSITY -+ && align == 4 ? 2 : 3); -+ /* Insn expansion: the remainder, sub-aligned memory stores. -+ A combination of S8I and S16I as needed. */ -+ expand_len += ((bytes % align + 1) / 2) * 3; -+ -+ /* Function call: preparing two arguments. */ -+ funccall_len = xtensa_sizeof_MOVI (value); -+ funccall_len += xtensa_sizeof_MOVI (bytes); -+ /* Function call: calling memset(). */ -+ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; -+ -+ /* Apply expansion bonus (2x) if optimizing for speed. */ -+ if (optimize > 1 && !optimize_size) -+ funccall_len *= 2; -+ -+ /* Decide whether to expand or not, based on the sum of the length -+ of instructions. */ -+ if (expand_len > funccall_len) -+ return 0; -+ -+ x = XEXP (dst_mem, 0); -+ if (!REG_P (x)) -+ dst_mem = replace_equiv_address (dst_mem, force_reg (Pmode, x)); -+ switch (align) -+ { -+ case 1: -+ break; -+ case 2: -+ value = (int16_t)((uint8_t)value * 0x0101U); -+ break; -+ case 4: -+ value = (int32_t)((uint8_t)value * 0x01010101U); -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ reg = force_reg (SImode, GEN_INT (value)); -+ -+ offset = 0; -+ do -+ { -+ int unit_size = MIN (bytes, align); -+ machine_mode unit_mode = (unit_size >= 4 ? SImode : -+ (unit_size >= 2 ? HImode : -+ QImode)); -+ unit_size = GET_MODE_SIZE (unit_mode); -+ -+ emit_move_insn (adjust_address (dst_mem, unit_mode, offset), -+ unit_mode == SImode ? reg -+ : convert_to_mode (unit_mode, reg, true)); -+ -+ offset += unit_size; -+ bytes -= unit_size; -+ } -+ while (bytes > 0); -+ -+ return 1; -+} -+ -+int -+xtensa_expand_block_set_small_loop (rtx *operands) -+{ -+ HOST_WIDE_INT bytes, value, align, count; -+ int expand_len, funccall_len; -+ rtx x, dst, end, reg; -+ machine_mode unit_mode; -+ rtx_code_label *label; -+ -+ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) -+ return 0; -+ -+ bytes = INTVAL (operands[1]); -+ if (bytes <= 0) -+ return 0; -+ value = (int8_t)INTVAL (operands[2]); -+ align = INTVAL (operands[3]); -+ if (align > MOVE_MAX) -+ align = MOVE_MAX; -+ -+ /* Totally-aligned block only. */ -+ if (bytes % align != 0) -+ return 0; -+ count = bytes / align; -+ -+ /* If the Loop Option (zero-overhead looping) is configured and active, -+ almost no restrictions about the length of the block. */ -+ if (! (TARGET_LOOPS && optimize)) -+ { -+ /* If 4-byte aligned, small loop substitution is almost optimal, -+ thus limited to only offset to the end address for ADDI/ADDMI -+ instruction. */ -+ if (align == 4 -+ && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) -+ return 0; -+ -+ /* If no 4-byte aligned, loop count should be treated as the -+ constraint. */ -+ if (align != 4 -+ && count > ((optimize > 1 && !optimize_size) ? 8 : 15)) -+ return 0; -+ } -+ -+ /* Insn expansion: holding the init value. -+ Either MOV(.N) or L32R w/litpool. */ -+ if (align == 1) -+ expand_len = xtensa_sizeof_MOVI (value); -+ else if (value == 0 || value == -1) -+ expand_len = TARGET_DENSITY ? 2 : 3; -+ else -+ expand_len = 3 + 4; -+ if (TARGET_LOOPS && optimize) /* zero-overhead looping */ -+ { -+ /* Insn translation: Either MOV(.N) or L32R w/litpool for the -+ loop count. */ -+ expand_len += xtensa_simm12b (count) ? xtensa_sizeof_MOVI (count) -+ : 3 + 4; -+ /* Insn translation: LOOP, the zero-overhead looping setup -+ instruction. */ -+ expand_len += 3; -+ /* Insn expansion: the loop body instructions. -+ For store, one of S8I, S16I or S32I(.N). -+ For advance, ADDI(.N). */ -+ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) -+ + (TARGET_DENSITY ? 2 : 3); -+ } -+ else /* NO zero-overhead looping */ -+ { -+ /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ -+ expand_len += bytes > 127 ? 3 -+ : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; -+ /* Insn expansion: the loop body and branch instruction. -+ For store, one of S8I, S16I or S32I(.N). -+ For advance, ADDI(.N). -+ For branch, BNE. */ -+ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) -+ + (TARGET_DENSITY ? 2 : 3) + 3; -+ } -+ -+ /* Function call: preparing two arguments. */ -+ funccall_len = xtensa_sizeof_MOVI (value); -+ funccall_len += xtensa_sizeof_MOVI (bytes); -+ /* Function call: calling memset(). */ -+ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; -+ -+ /* Apply expansion bonus (2x) if optimizing for speed. */ -+ if (optimize > 1 && !optimize_size) -+ funccall_len *= 2; -+ -+ /* Decide whether to expand or not, based on the sum of the length -+ of instructions. */ -+ if (expand_len > funccall_len) -+ return 0; -+ -+ x = XEXP (operands[0], 0); -+ if (!REG_P (x)) -+ x = XEXP (replace_equiv_address (operands[0], force_reg (Pmode, x)), 0); -+ dst = gen_reg_rtx (SImode); -+ emit_move_insn (dst, x); -+ end = gen_reg_rtx (SImode); -+ if (TARGET_LOOPS && optimize) -+ x = force_reg (SImode, operands[1] /* the length */); -+ else -+ x = operands[1]; -+ emit_insn (gen_addsi3 (end, dst, x)); -+ switch (align) -+ { -+ case 1: -+ unit_mode = QImode; -+ break; -+ case 2: -+ value = (int16_t)((uint8_t)value * 0x0101U); -+ unit_mode = HImode; -+ break; -+ case 4: -+ value = (int32_t)((uint8_t)value * 0x01010101U); -+ unit_mode = SImode; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ reg = force_reg (unit_mode, GEN_INT (value)); -+ -+ label = gen_label_rtx (); -+ emit_label (label); -+ emit_move_insn (gen_rtx_MEM (unit_mode, dst), reg); -+ emit_insn (gen_addsi3 (dst, dst, GEN_INT (align))); -+ emit_cmp_and_jump_insns (dst, end, NE, const0_rtx, SImode, true, label); -+ -+ return 1; -+} -+ -+ - void - xtensa_expand_nonlocal_goto (rtx *operands) - { -@@ -1725,21 +2053,20 @@ xtensa_emit_loop_end (rtx_insn *insn, rtx *operands) - - - char * --xtensa_emit_branch (bool inverted, bool immed, rtx *operands) -+xtensa_emit_branch (bool immed, rtx *operands) - { - static char result[64]; -- enum rtx_code code; -+ enum rtx_code code = GET_CODE (operands[3]); - const char *op; - -- code = GET_CODE (operands[3]); - switch (code) - { -- case EQ: op = inverted ? "ne" : "eq"; break; -- case NE: op = inverted ? "eq" : "ne"; break; -- case LT: op = inverted ? "ge" : "lt"; break; -- case GE: op = inverted ? "lt" : "ge"; break; -- case LTU: op = inverted ? "geu" : "ltu"; break; -- case GEU: op = inverted ? "ltu" : "geu"; break; -+ case EQ: op = "eq"; break; -+ case NE: op = "ne"; break; -+ case LT: op = "lt"; break; -+ case GE: op = "ge"; break; -+ case LTU: op = "ltu"; break; -+ case GEU: op = "geu"; break; - default: gcc_unreachable (); - } - -@@ -1758,32 +2085,6 @@ xtensa_emit_branch (bool inverted, bool immed, rtx *operands) - } - - --char * --xtensa_emit_bit_branch (bool inverted, bool immed, rtx *operands) --{ -- static char result[64]; -- const char *op; -- -- switch (GET_CODE (operands[3])) -- { -- case EQ: op = inverted ? "bs" : "bc"; break; -- case NE: op = inverted ? "bc" : "bs"; break; -- default: gcc_unreachable (); -- } -- -- if (immed) -- { -- unsigned bitnum = INTVAL (operands[1]) & 0x1f; -- operands[1] = GEN_INT (bitnum); -- sprintf (result, "b%si\t%%0, %%d1, %%2", op); -- } -- else -- sprintf (result, "b%s\t%%0, %%1, %%2", op); -- -- return result; --} -- -- - char * - xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - { -@@ -1792,12 +2093,14 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - const char *op; - - code = GET_CODE (operands[4]); -+ if (inverted) -+ code = reverse_condition (code); - if (isbool) - { - switch (code) - { -- case EQ: op = inverted ? "t" : "f"; break; -- case NE: op = inverted ? "f" : "t"; break; -+ case EQ: op = "f"; break; -+ case NE: op = "t"; break; - default: gcc_unreachable (); - } - } -@@ -1805,10 +2108,10 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - { - switch (code) - { -- case EQ: op = inverted ? "nez" : "eqz"; break; -- case NE: op = inverted ? "eqz" : "nez"; break; -- case LT: op = inverted ? "gez" : "ltz"; break; -- case GE: op = inverted ? "ltz" : "gez"; break; -+ case EQ: op = "eqz"; break; -+ case NE: op = "nez"; break; -+ case LT: op = "ltz"; break; -+ case GE: op = "gez"; break; - default: gcc_unreachable (); - } - } -@@ -1819,6 +2122,20 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - } - - -+void -+xtensa_prepare_expand_call (int callop, rtx *operands) -+{ -+ rtx addr = XEXP (operands[callop], 0); -+ -+ if (flag_pic && SYMBOL_REF_P (addr) -+ && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) -+ addr = gen_sym_PLT (addr); -+ -+ if (!call_insn_operand (addr, VOIDmode)) -+ XEXP (operands[callop], 0) = copy_to_mode_reg (Pmode, addr); -+} -+ -+ - char * - xtensa_emit_call (int callop, rtx *operands) - { -@@ -1837,6 +2154,24 @@ xtensa_emit_call (int callop, rtx *operands) - } - - -+char * -+xtensa_emit_sibcall (int callop, rtx *operands) -+{ -+ static char result[64]; -+ rtx tgt = operands[callop]; -+ -+ if (GET_CODE (tgt) == CONST_INT) -+ sprintf (result, "j.l\t" HOST_WIDE_INT_PRINT_HEX ", a9", -+ INTVAL (tgt)); -+ else if (register_operand (tgt, VOIDmode)) -+ sprintf (result, "jx\t%%%d", callop); -+ else -+ sprintf (result, "j.l\t%%%d, a9", callop); -+ -+ return result; -+} -+ -+ - bool - xtensa_legitimate_address_p (machine_mode mode, rtx addr, bool strict) - { -@@ -2061,6 +2396,20 @@ xtensa_tls_referenced_p (rtx x) - } - - -+/* Helper function for "*shlrd_..." patterns. */ -+ -+enum rtx_code -+xtensa_shlrd_which_direction (rtx op0, rtx op1) -+{ -+ if (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT) -+ return ASHIFT; /* shld */ -+ if (GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT) -+ return LSHIFTRT; /* shrd */ -+ -+ return UNKNOWN; -+} -+ -+ - /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ - - static bool -@@ -2364,7 +2713,7 @@ static void - printx (FILE *file, signed int val) - { - /* Print a hexadecimal value in a nice way. */ -- if ((val > -0xa) && (val < 0xa)) -+ if (IN_RANGE (val, -9, 9)) - fprintf (file, "%d", val); - else if (val < 0) - fprintf (file, "-0x%x", -val); -@@ -2379,7 +2728,7 @@ void - print_operand (FILE *file, rtx x, int letter) - { - if (!x) -- error ("PRINT_OPERAND null pointer"); -+ error ("% null pointer"); - - switch (letter) - { -@@ -2424,17 +2773,11 @@ print_operand (FILE *file, rtx x, int letter) - case 'K': - if (GET_CODE (x) == CONST_INT) - { -- int num_bits = 0; - unsigned val = INTVAL (x); -- while (val & 1) -- { -- num_bits += 1; -- val = val >> 1; -- } -- if ((val != 0) || (num_bits == 0) || (num_bits > 16)) -+ if (!xtensa_mask_immediate (val)) - fatal_insn ("invalid mask", x); - -- fprintf (file, "%d", num_bits); -+ fprintf (file, "%d", floor_log2 (val + 1)); - } - else - output_operand_lossage ("invalid %%K value"); -@@ -2584,7 +2927,7 @@ void - print_operand_address (FILE *file, rtx addr) - { - if (!addr) -- error ("PRINT_OPERAND_ADDRESS, null pointer"); -+ error ("%, null pointer"); - - switch (GET_CODE (addr)) - { -@@ -2750,7 +3093,7 @@ xtensa_call_save_reg(int regno) - return crtl->profile || !crtl->is_leaf || crtl->calls_eh_return || - df_regs_ever_live_p (regno); - -- if (crtl->calls_eh_return && regno >= 2 && regno < 4) -+ if (crtl->calls_eh_return && IN_RANGE (regno, 2, 3)) - return true; - - return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno); -@@ -2870,7 +3213,7 @@ xtensa_expand_prologue (void) - int callee_save_size = cfun->machine->callee_save_size; - - /* -128 is a limit of single addi instruction. */ -- if (total_size > 0 && total_size <= 128) -+ if (IN_RANGE (total_size, 1, 128)) - { - insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, - GEN_INT (-total_size))); -@@ -2999,7 +3342,7 @@ xtensa_expand_prologue (void) - } - - void --xtensa_expand_epilogue (void) -+xtensa_expand_epilogue (bool sibcall_p) - { - if (!TARGET_WINDOWED_ABI) - { -@@ -3033,10 +3376,13 @@ xtensa_expand_epilogue (void) - if (xtensa_call_save_reg(regno)) - { - rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); -+ rtx reg; - - offset -= UNITS_PER_WORD; -- emit_move_insn (gen_rtx_REG (SImode, regno), -+ emit_move_insn (reg = gen_rtx_REG (SImode, regno), - gen_frame_mem (SImode, x)); -+ if (regno == A0_REG && sibcall_p) -+ emit_use (reg); - } - } - -@@ -3071,7 +3417,8 @@ xtensa_expand_epilogue (void) - EH_RETURN_STACKADJ_RTX)); - } - cfun->machine->epilogue_done = true; -- emit_jump_insn (gen_return ()); -+ if (!sibcall_p) -+ emit_jump_insn (gen_return ()); - } - - bool -@@ -3697,7 +4044,7 @@ xtensa_multibss_section_type_flags (tree decl, const char *name, int reloc) - flags |= SECTION_BSS; /* @nobits */ - else - warning (0, "only uninitialized variables can be placed in a " -- ".bss section"); -+ "%<.bss%> section"); - } - - return flags; -@@ -3750,7 +4097,7 @@ xtensa_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, - static bool - xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - int opno ATTRIBUTE_UNUSED, -- int *total, bool speed ATTRIBUTE_UNUSED) -+ int *total, bool speed) - { - int code = GET_CODE (x); - -@@ -3838,9 +4185,14 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - return true; - - case CLZ: -+ case CLRSB: - *total = COSTS_N_INSNS (TARGET_NSA ? 1 : 50); - return true; - -+ case BSWAP: -+ *total = COSTS_N_INSNS (mode == HImode ? 3 : 5); -+ return true; -+ - case NOT: - *total = COSTS_N_INSNS (mode == DImode ? 3 : 2); - return true; -@@ -3864,13 +4216,16 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - return true; - - case ABS: -+ case NEG: - { - if (mode == SFmode) - *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50); - else if (mode == DFmode) - *total = COSTS_N_INSNS (50); -- else -+ else if (mode == DImode) - *total = COSTS_N_INSNS (4); -+ else -+ *total = COSTS_N_INSNS (1); - return true; - } - -@@ -3886,10 +4241,6 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - return true; - } - -- case NEG: -- *total = COSTS_N_INSNS (mode == DImode ? 4 : 2); -- return true; -- - case MULT: - { - if (mode == SFmode) -@@ -3929,11 +4280,11 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - case UMOD: - { - if (mode == DImode) -- *total = COSTS_N_INSNS (50); -+ *total = COSTS_N_INSNS (speed ? 100 : 50); - else if (TARGET_DIV32) - *total = COSTS_N_INSNS (32); - else -- *total = COSTS_N_INSNS (50); -+ *total = COSTS_N_INSNS (speed ? 100 : 50); - return true; - } - -@@ -3966,6 +4317,98 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - } - } - -+static bool -+xtensa_is_insn_L32R_p(const rtx_insn *insn) -+{ -+ rtx x = PATTERN (insn); -+ -+ if (GET_CODE (x) == SET) -+ { -+ x = XEXP (x, 1); -+ if (GET_CODE (x) == MEM) -+ { -+ x = XEXP (x, 0); -+ return (GET_CODE (x) == SYMBOL_REF || CONST_INT_P (x)) -+ && CONSTANT_POOL_ADDRESS_P (x); -+ } -+ } -+ -+ return false; -+} -+ -+/* Compute a relative costs of RTL insns. This is necessary in order to -+ achieve better RTL insn splitting/combination result. */ -+ -+static int -+xtensa_insn_cost (rtx_insn *insn, bool speed) -+{ -+ if (!(recog_memoized (insn) < 0)) -+ { -+ int len = get_attr_length (insn), n = (len + 2) / 3; -+ -+ if (len == 0) -+ return COSTS_N_INSNS (0); -+ -+ if (speed) /* For speed cost. */ -+ { -+ /* "L32R" may be particular slow (implementation-dependent). */ -+ if (xtensa_is_insn_L32R_p (insn)) -+ return COSTS_N_INSNS (1 + xtensa_extra_l32r_costs); -+ -+ /* Cost based on the pipeline model. */ -+ switch (get_attr_type (insn)) -+ { -+ case TYPE_STORE: -+ case TYPE_MOVE: -+ case TYPE_ARITH: -+ case TYPE_MULTI: -+ case TYPE_NOP: -+ case TYPE_FSTORE: -+ return COSTS_N_INSNS (n); -+ -+ case TYPE_LOAD: -+ return COSTS_N_INSNS (n - 1 + 2); -+ -+ case TYPE_JUMP: -+ case TYPE_CALL: -+ return COSTS_N_INSNS (n - 1 + 3); -+ -+ case TYPE_FCONV: -+ case TYPE_FLOAD: -+ case TYPE_MUL16: -+ case TYPE_MUL32: -+ case TYPE_RSR: -+ return COSTS_N_INSNS (n * 2); -+ -+ case TYPE_FMADD: -+ return COSTS_N_INSNS (n * 4); -+ -+ case TYPE_DIV32: -+ return COSTS_N_INSNS (n * 16); -+ -+ default: -+ break; -+ } -+ } -+ else /* For size cost. */ -+ { -+ /* Cost based on the instruction length. */ -+ if (get_attr_type (insn) != TYPE_UNKNOWN) -+ { -+ /* "L32R" itself plus constant in litpool. */ -+ if (xtensa_is_insn_L32R_p (insn)) -+ return COSTS_N_INSNS (2) + 1; -+ -+ /* Consider ".n" short instructions. */ -+ return COSTS_N_INSNS (n) - (n * 3 - len); -+ } -+ } -+ } -+ -+ /* Fall back. */ -+ return pattern_cost (PATTERN (insn), speed); -+} -+ - /* Worker function for TARGET_RETURN_IN_MEMORY. */ - - static bool -@@ -4491,4 +4934,16 @@ xtensa_asan_shadow_offset (void) - return HOST_WIDE_INT_UC (0x10000000); - } - -+/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */ -+static bool -+xtensa_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, tree exp ATTRIBUTE_UNUSED) -+{ -+ /* Do not allow sibcalls if the Windowed Register Option is -+ configured. */ -+ if (TARGET_WINDOWED_ABI) -+ return false; -+ -+ return true; -+} -+ - #include "gt-xtensa.h" -diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h -index fa86a245e..3e9cbc943 100644 ---- a/gcc/config/xtensa/xtensa.h -+++ b/gcc/config/xtensa/xtensa.h -@@ -74,6 +74,11 @@ along with GCC; see the file COPYING3. If not see - #define HAVE_AS_TLS 0 - #endif - -+/* Define this if the target has no hardware divide instructions. */ -+#if !TARGET_DIV32 -+#define TARGET_HAS_NO_HW_DIVIDE -+#endif -+ - - /* Target CPU builtins. */ - #define TARGET_CPU_CPP_BUILTINS() \ -@@ -488,7 +493,7 @@ enum reg_class - used for this purpose since all function arguments are pushed on - the stack. */ - #define FUNCTION_ARG_REGNO_P(N) \ -- ((N) >= GP_OUTGOING_ARG_FIRST && (N) <= GP_OUTGOING_ARG_LAST) -+ IN_RANGE ((N), GP_OUTGOING_ARG_FIRST, GP_OUTGOING_ARG_LAST) - - /* Record the number of argument words seen so far, along with a flag to - indicate whether these are incoming arguments. (FUNCTION_INCOMING_ARG -diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md -index 2a8e59ee9..124548dfe 100644 ---- a/gcc/config/xtensa/xtensa.md -+++ b/gcc/config/xtensa/xtensa.md -@@ -25,6 +25,7 @@ - (A7_REG 7) - (A8_REG 8) - (A9_REG 9) -+ (A10_REG 10) - - (UNSPEC_NOP 2) - (UNSPEC_PLT 3) -@@ -83,6 +84,13 @@ - ;; the same template. - (define_mode_iterator HQI [HI QI]) - -+;; This code iterator is for *shlrd and its variants. -+(define_code_iterator ior_op [ior plus]) -+ -+;; This mode iterator allows the DC and SC patterns to be defined from -+;; the same template. -+(define_mode_iterator DSC [DC SC]) -+ - - ;; Attributes. - -@@ -98,7 +106,10 @@ - - ;; Describe a user's asm statement. - (define_asm_attributes -- [(set_attr "type" "multi")]) -+ [(set_attr "type" "multi") -+ (set_attr "mode" "none") -+ (set_attr "length" "3")]) ;; Should be the maximum possible length -+ ;; of a single machine instruction. - - - ;; Pipeline model. -@@ -224,20 +235,42 @@ - - ;; Multiplication. - --(define_expand "mulsidi3" -+(define_expand "mulsidi3" - [(set (match_operand:DI 0 "register_operand") -- (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand")) -- (any_extend:DI (match_operand:SI 2 "register_operand"))))] -+ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand")) -+ (sign_extend:DI (match_operand:SI 2 "register_operand"))))] - "TARGET_MUL32_HIGH" - { - rtx temp = gen_reg_rtx (SImode); - emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); -- emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), -- operands[1], operands[2])); -+ emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), -+ operands[1], operands[2])); - emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); - DONE; - }) - -+(define_expand "umulsidi3" -+ [(set (match_operand:DI 0 "register_operand") -+ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand")) -+ (zero_extend:DI (match_operand:SI 2 "register_operand"))))] -+ "" -+{ -+ if (TARGET_MUL32_HIGH) -+ { -+ rtx temp = gen_reg_rtx (SImode); -+ emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); -+ emit_insn (gen_umulsi3_highpart (gen_highpart (SImode, operands[0]), -+ operands[1], operands[2])); -+ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); -+ } -+ else -+ emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "__umulsidi3"), -+ operands[0], LCT_NORMAL, DImode, -+ operands[1], SImode, -+ operands[2], SImode); -+ DONE; -+}) -+ - (define_insn "mulsi3_highpart" - [(set (match_operand:SI 0 "register_operand" "=a") - (truncate:SI -@@ -261,30 +294,16 @@ - (set_attr "mode" "SI") - (set_attr "length" "3")]) - --(define_insn "mulhisi3" -- [(set (match_operand:SI 0 "register_operand" "=C,A") -- (mult:SI (sign_extend:SI -- (match_operand:HI 1 "register_operand" "%r,r")) -- (sign_extend:SI -- (match_operand:HI 2 "register_operand" "r,r"))))] -- "TARGET_MUL16 || TARGET_MAC16" -- "@ -- mul16s\t%0, %1, %2 -- mul.aa.ll\t%1, %2" -- [(set_attr "type" "mul16,mac16") -- (set_attr "mode" "SI") -- (set_attr "length" "3,3")]) -- --(define_insn "umulhisi3" -+(define_insn "mulhisi3" - [(set (match_operand:SI 0 "register_operand" "=C,A") -- (mult:SI (zero_extend:SI -+ (mult:SI (any_extend:SI - (match_operand:HI 1 "register_operand" "%r,r")) -- (zero_extend:SI -+ (any_extend:SI - (match_operand:HI 2 "register_operand" "r,r"))))] - "TARGET_MUL16 || TARGET_MAC16" - "@ -- mul16u\t%0, %1, %2 -- umul.aa.ll\t%1, %2" -+ mul16\t%0, %1, %2 -+ mul.aa.ll\t%1, %2" - [(set_attr "type" "mul16,mac16") - (set_attr "mode" "SI") - (set_attr "length" "3,3")]) -@@ -429,7 +448,17 @@ - (set_attr "length" "3")]) - - --;; Count leading/trailing zeros and find first bit. -+;; Count redundant leading sign bits and leading/trailing zeros, -+;; and find first bit. -+ -+(define_insn "clrsbsi2" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (clrsb:SI (match_operand:SI 1 "register_operand" "r")))] -+ "TARGET_NSA" -+ "nsa\t%0, %1" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "3")]) - - (define_insn "clzsi2" - [(set (match_operand:SI 0 "register_operand" "=a") -@@ -471,23 +500,78 @@ - - ;; Byte swap. - --(define_insn "bswapsi2" -- [(set (match_operand:SI 0 "register_operand" "=&a") -- (bswap:SI (match_operand:SI 1 "register_operand" "r")))] -- "!optimize_size" -- "ssai\t8\;srli\t%0, %1, 16\;src\t%0, %0, %1\;src\t%0, %0, %0\;src\t%0, %1, %0" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "15")]) -+(define_insn "bswaphi2" -+ [(set (match_operand:HI 0 "register_operand" "=a") -+ (bswap:HI (match_operand:HI 1 "register_operand" "r"))) -+ (clobber (match_scratch:HI 2 "=&a"))] -+ "" -+ "extui\t%2, %1, 8, 8\;slli\t%0, %1, 8\;or\t%0, %0, %2" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "HI") -+ (set_attr "length" "9")]) - --(define_insn "bswapdi2" -- [(set (match_operand:DI 0 "register_operand" "=&a") -- (bswap:DI (match_operand:DI 1 "register_operand" "r")))] -- "!optimize_size" -- "ssai\t8\;srli\t%0, %D1, 16\;src\t%0, %0, %D1\;src\t%0, %0, %0\;src\t%0, %D1, %0\;srli\t%D0, %1, 16\;src\t%D0, %D0, %1\;src\t%D0, %D0, %D0\;src\t%D0, %1, %D0" -- [(set_attr "type" "arith") -- (set_attr "mode" "DI") -- (set_attr "length" "27")]) -+(define_expand "bswapsi2" -+ [(set (match_operand:SI 0 "register_operand" "") -+ (bswap:SI (match_operand:SI 1 "register_operand" "")))] -+ "!optimize_debug && optimize > 1" -+{ -+ /* GIMPLE manual byte-swapping recognition is now activated. -+ For both built-in and manual bswaps, emit corresponding library call -+ if optimizing for size, or a series of dedicated machine instructions -+ if otherwise. */ -+ if (optimize_size) -+ emit_library_call_value (optab_libfunc (bswap_optab, SImode), -+ operands[0], LCT_NORMAL, SImode, -+ operands[1], SImode); -+ else -+ emit_insn (gen_bswapsi2_internal (operands[0], operands[1])); -+ DONE; -+}) -+ -+(define_insn "bswapsi2_internal" -+ [(set (match_operand:SI 0 "register_operand" "=a,&a") -+ (bswap:SI (match_operand:SI 1 "register_operand" "0,r"))) -+ (clobber (match_scratch:SI 2 "=&a,X"))] -+ "!optimize_debug && optimize > 1 && !optimize_size" -+{ -+ rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn); -+ const char *init = "ssai\t8\;"; -+ static char result[128]; -+ if (prev_insn && NONJUMP_INSN_P (prev_insn)) -+ { -+ rtx x = PATTERN (prev_insn); -+ if (GET_CODE (x) == PARALLEL && XVECLEN (x, 0) == 2 -+ && GET_CODE (XVECEXP (x, 0, 0)) == SET -+ && GET_CODE (XVECEXP (x, 0, 1)) == CLOBBER) -+ { -+ x = XEXP (XVECEXP (x, 0, 0), 1); -+ if (GET_CODE (x) == BSWAP && GET_MODE (x) == SImode) -+ init = ""; -+ } -+ } -+ sprintf (result, -+ (which_alternative == 0) -+ ? "%s" "srli\t%%2, %%1, 16\;src\t%%2, %%2, %%1\;src\t%%2, %%2, %%2\;src\t%%0, %%1, %%2" -+ : "%s" "srli\t%%0, %%1, 16\;src\t%%0, %%0, %%1\;src\t%%0, %%0, %%0\;src\t%%0, %%1, %%0", -+ init); -+ return result; -+} -+ [(set_attr "type" "arith,arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "15,15")]) -+ -+(define_expand "bswapdi2" -+ [(set (match_operand:DI 0 "register_operand" "") -+ (bswap:DI (match_operand:DI 1 "register_operand" "")))] -+ "!optimize_debug && optimize > 1 && optimize_size" -+{ -+ /* Replace with a single DImode library call. -+ Without this, two SImode library calls are emitted. */ -+ emit_library_call_value (optab_libfunc (bswap_optab, DImode), -+ operands[0], LCT_NORMAL, DImode, -+ operands[1], DImode); -+ DONE; -+}) - - - ;; Negation and one's complement. -@@ -501,16 +585,26 @@ - (set_attr "mode" "SI") - (set_attr "length" "3")]) - --(define_expand "one_cmplsi2" -- [(set (match_operand:SI 0 "register_operand" "") -- (not:SI (match_operand:SI 1 "register_operand" "")))] -+(define_insn_and_split "one_cmplsi2" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (not:SI (match_operand:SI 1 "register_operand" "r")))] - "" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 2) -+ (const_int -1)) -+ (set (match_dup 0) -+ (xor:SI (match_dup 1) -+ (match_dup 2)))] - { -- rtx temp = gen_reg_rtx (SImode); -- emit_insn (gen_movsi (temp, constm1_rtx)); -- emit_insn (gen_xorsi3 (operands[0], temp, operands[1])); -- DONE; --}) -+ operands[2] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 5) -+ (const_int 6)))]) - - (define_insn "negsf2" - [(set (match_operand:SF 0 "register_operand" "=f") -@@ -536,6 +630,103 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,3")]) - -+(define_insn_and_split "*andsi3_bitcmpl" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (not:SI (match_operand:SI 1 "register_operand" "r")) -+ (match_operand:SI 2 "register_operand" "r")))] -+ "" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 3) -+ (and:SI (match_dup 1) -+ (match_dup 2))) -+ (set (match_dup 0) -+ (xor:SI (match_dup 3) -+ (match_dup 2)))] -+{ -+ operands[3] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*andsi3_const_pow2_minus_one" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "const_int_operand" "i")))] -+ "IN_RANGE (exact_log2 (INTVAL (operands[2]) + 1), 17, 31)" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (ashift:SI (match_dup 1) -+ (match_dup 2))) -+ (set (match_dup 0) -+ (lshiftrt:SI (match_dup 0) -+ (match_dup 2)))] -+{ -+ operands[2] = GEN_INT (32 - floor_log2 (INTVAL (operands[2]) + 1)); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && INTVAL (operands[2]) == 0x7FFFFFFF") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_insn_and_split "*andsi3_const_negative_pow2" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "const_int_operand" "i")))] -+ "IN_RANGE (exact_log2 (-INTVAL (operands[2])), 12, 31)" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (lshiftrt:SI (match_dup 1) -+ (match_dup 2))) -+ (set (match_dup 0) -+ (ashift:SI (match_dup 0) -+ (match_dup 2)))] -+{ -+ operands[2] = GEN_INT (floor_log2 (-INTVAL (operands[2]))); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*andsi3_const_shifted_mask" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "shifted_mask_operand" "i")))] -+ "! xtensa_simm12b (INTVAL (operands[2]))" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (zero_extract:SI (match_dup 1) -+ (match_dup 3) -+ (match_dup 4))) -+ (set (match_dup 0) -+ (ashift:SI (match_dup 0) -+ (match_dup 2)))] -+{ -+ HOST_WIDE_INT mask = INTVAL (operands[2]); -+ int shift = ctz_hwi (mask); -+ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); -+ int mask_pos = shift; -+ if (BITS_BIG_ENDIAN) -+ mask_pos = (32 - (mask_size + shift)) & 0x1f; -+ operands[2] = GEN_INT (shift); -+ operands[3] = GEN_INT (mask_size); -+ operands[4] = GEN_INT (mask_pos); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && ctz_hwi (INTVAL (operands[2])) == 1") -+ (const_int 5) -+ (const_int 6)))]) -+ - (define_insn "iorsi3" - [(set (match_operand:SI 0 "register_operand" "=a") - (ior:SI (match_operand:SI 1 "register_operand" "%r") -@@ -634,7 +825,7 @@ - - ;; Field extract instructions. - --(define_expand "extv" -+(define_expand "extvsi" - [(set (match_operand:SI 0 "register_operand" "") - (sign_extract:SI (match_operand:SI 1 "register_operand" "") - (match_operand:SI 2 "const_int_operand" "") -@@ -649,12 +840,12 @@ - if (!lsbitnum_operand (operands[3], SImode)) - FAIL; - -- emit_insn (gen_extv_internal (operands[0], operands[1], -- operands[2], operands[3])); -+ emit_insn (gen_extvsi_internal (operands[0], operands[1], -+ operands[2], operands[3])); - DONE; - }) - --(define_insn "extv_internal" -+(define_insn "extvsi_internal" - [(set (match_operand:SI 0 "register_operand" "=a") - (sign_extract:SI (match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "sext_fldsz_operand" "i") -@@ -669,7 +860,7 @@ - (set_attr "mode" "SI") - (set_attr "length" "3")]) - --(define_expand "extzv" -+(define_expand "extzvsi" - [(set (match_operand:SI 0 "register_operand" "") - (zero_extract:SI (match_operand:SI 1 "register_operand" "") - (match_operand:SI 2 "const_int_operand" "") -@@ -678,12 +869,12 @@ - { - if (!extui_fldsz_operand (operands[2], SImode)) - FAIL; -- emit_insn (gen_extzv_internal (operands[0], operands[1], -- operands[2], operands[3])); -+ emit_insn (gen_extzvsi_internal (operands[0], operands[1], -+ operands[2], operands[3])); - DONE; - }) - --(define_insn "extzv_internal" -+(define_insn "extzvsi_internal" - [(set (match_operand:SI 0 "register_operand" "=a") - (zero_extract:SI (match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "extui_fldsz_operand" "i") -@@ -757,11 +948,14 @@ - because of offering further optimization opportunities. */ - if (register_operand (operands[0], DImode)) - { -- rtx first, second; -- -- split_double (operands[1], &first, &second); -- emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), first)); -- emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), second)); -+ rtx lowpart, highpart; -+ -+ if (TARGET_BIG_ENDIAN) -+ split_double (operands[1], &highpart, &lowpart); -+ else -+ split_double (operands[1], &lowpart, &highpart); -+ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), lowpart)); -+ emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), highpart)); - DONE; - } - -@@ -782,7 +976,7 @@ - "register_operand (operands[0], DImode) - || register_operand (operands[1], DImode)" - "#" -- "reload_completed" -+ "&& reload_completed" - [(set (match_dup 0) (match_dup 2)) - (set (match_dup 1) (match_dup 3))] - { -@@ -831,6 +1025,19 @@ - (set_attr "mode" "SI") - (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")]) - -+(define_split -+ [(set (match_operand:SI 0 "register_operand") -+ (match_operand:SI 1 "constantpool_operand"))] -+ "! optimize_debug && reload_completed" -+ [(const_int 0)] -+{ -+ rtx x = avoid_constant_pool_reference (operands[1]); -+ if (! CONST_INT_P (x)) -+ FAIL; -+ if (! xtensa_constantsynth (operands[0], INTVAL (x))) -+ emit_move_insn (operands[0], x); -+}) -+ - ;; 16-bit Integer moves - - (define_expand "movhi" -@@ -1035,6 +1242,43 @@ - (set_attr "mode" "SF") - (set_attr "length" "3")]) - -+(define_split -+ [(set (match_operand:SF 0 "register_operand") -+ (match_operand:SF 1 "constantpool_operand"))] -+ "! optimize_debug && reload_completed" -+ [(const_int 0)] -+{ -+ int i = 0; -+ rtx x = XEXP (operands[1], 0); -+ long l[2]; -+ if (GET_CODE (x) == SYMBOL_REF -+ && CONSTANT_POOL_ADDRESS_P (x)) -+ x = get_pool_constant (x); -+ else if (GET_CODE (x) == CONST) -+ { -+ x = XEXP (x, 0); -+ gcc_assert (GET_CODE (x) == PLUS -+ && GET_CODE (XEXP (x, 0)) == SYMBOL_REF -+ && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) -+ && CONST_INT_P (XEXP (x, 1))); -+ i = INTVAL (XEXP (x, 1)); -+ gcc_assert (i == 0 || i == 4); -+ i /= 4; -+ x = get_pool_constant (XEXP (x, 0)); -+ } -+ else -+ gcc_unreachable (); -+ if (GET_MODE (x) == SFmode) -+ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l[0]); -+ else if (GET_MODE (x) == DFmode) -+ REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); -+ else -+ FAIL; -+ x = gen_rtx_REG (SImode, REGNO (operands[0])); -+ if (! xtensa_constantsynth (x, l[i])) -+ emit_move_insn (x, GEN_INT (l[i])); -+}) -+ - ;; 64-bit floating point moves - - (define_expand "movdf" -@@ -1058,7 +1302,7 @@ - "register_operand (operands[0], DFmode) - || register_operand (operands[1], DFmode)" - "#" -- "reload_completed" -+ "&& reload_completed" - [(set (match_dup 0) (match_dup 2)) - (set (match_dup 1) (match_dup 3))] - { -@@ -1085,6 +1329,22 @@ - DONE; - }) - -+;; Block sets -+ -+(define_expand "setmemsi" -+ [(match_operand:BLK 0 "memory_operand") -+ (match_operand:SI 1 "") -+ (match_operand:SI 2 "") -+ (match_operand:SI 3 "const_int_operand")] -+ "!optimize_debug && optimize" -+{ -+ if (xtensa_expand_block_set_unrolled_loop (operands)) -+ DONE; -+ if (xtensa_expand_block_set_small_loop (operands)) -+ DONE; -+ FAIL; -+}) -+ - - ;; Shift instructions. - -@@ -1097,16 +1357,6 @@ - operands[1] = xtensa_copy_incoming_a7 (operands[1]); - }) - --(define_insn "*ashlsi3_1" -- [(set (match_operand:SI 0 "register_operand" "=a") -- (ashift:SI (match_operand:SI 1 "register_operand" "r") -- (const_int 1)))] -- "TARGET_DENSITY" -- "add.n\t%0, %1, %1" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "2")]) -- - (define_insn "ashlsi3_internal" - [(set (match_operand:SI 0 "register_operand" "=a,a") - (ashift:SI (match_operand:SI 1 "register_operand" "r,r") -@@ -1119,16 +1369,14 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,6")]) - --(define_insn "*ashlsi3_3x" -- [(set (match_operand:SI 0 "register_operand" "=a") -- (ashift:SI (match_operand:SI 1 "register_operand" "r") -- (ashift:SI (match_operand:SI 2 "register_operand" "r") -- (const_int 3))))] -- "" -- "ssa8b\t%2\;sll\t%0, %1" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "6")]) -+(define_split -+ [(set (match_operand:SI 0 "register_operand") -+ (ashift:SI (match_operand:SI 1 "register_operand") -+ (const_int 1)))] -+ "TARGET_DENSITY" -+ [(set (match_dup 0) -+ (plus:SI (match_dup 1) -+ (match_dup 1)))]) - - (define_insn "ashrsi3" - [(set (match_operand:SI 0 "register_operand" "=a,a") -@@ -1142,17 +1390,6 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,6")]) - --(define_insn "*ashrsi3_3x" -- [(set (match_operand:SI 0 "register_operand" "=a") -- (ashiftrt:SI (match_operand:SI 1 "register_operand" "r") -- (ashift:SI (match_operand:SI 2 "register_operand" "r") -- (const_int 3))))] -- "" -- "ssa8l\t%2\;sra\t%0, %1" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "6")]) -- - (define_insn "lshrsi3" - [(set (match_operand:SI 0 "register_operand" "=a,a") - (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r") -@@ -1162,9 +1399,9 @@ - if (which_alternative == 0) - { - if ((INTVAL (operands[2]) & 0x1f) < 16) -- return "srli\t%0, %1, %R2"; -+ return "srli\t%0, %1, %R2"; - else -- return "extui\t%0, %1, %R2, %L2"; -+ return "extui\t%0, %1, %R2, %L2"; - } - return "ssr\t%2\;srl\t%0, %1"; - } -@@ -1172,13 +1409,170 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,6")]) - --(define_insn "*lshrsi3_3x" -+(define_insn "*shift_per_byte" - [(set (match_operand:SI 0 "register_operand" "=a") -- (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") -- (ashift:SI (match_operand:SI 2 "register_operand" "r") -- (const_int 3))))] -+ (match_operator:SI 3 "xtensa_shift_per_byte_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3))]))] -+ "!optimize_debug && optimize" -+{ -+ switch (GET_CODE (operands[3])) -+ { -+ case ASHIFT: return "ssa8b\t%2\;sll\t%0, %1"; -+ case ASHIFTRT: return "ssa8l\t%2\;sra\t%0, %1"; -+ case LSHIFTRT: return "ssa8l\t%2\;srl\t%0, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*shift_per_byte_omit_AND_0" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (match_operator:SI 4 "xtensa_shift_per_byte_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3)) -+ (match_operand:SI 3 "const_int_operand" "i"))]))] -+ "!optimize_debug && optimize -+ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (match_op_dup 4 -+ [(match_dup 1) -+ (ashift:SI (match_dup 2) -+ (const_int 3))]))] -+ "" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*shift_per_byte_omit_AND_1" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (match_operator:SI 4 "xtensa_shift_per_byte_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (neg:SI (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3)) -+ (match_operand:SI 3 "const_int_operand" "i")))]))] -+ "!optimize_debug && optimize -+ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 5) -+ (neg:SI (match_dup 2))) -+ (set (match_dup 0) -+ (match_op_dup 4 -+ [(match_dup 1) -+ (ashift:SI (match_dup 5) -+ (const_int 3))]))] -+{ -+ operands[5] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "9")]) -+ -+(define_insn "*shlrd_reg_" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "register_operand" "r")]) -+ (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 3 "register_operand" "r") -+ (neg:SI (match_dup 2))])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" -+{ -+ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) -+ { -+ case ASHIFT: return "ssl\t%2\;src\t%0, %1, %3"; -+ case LSHIFTRT: return "ssr\t%2\;src\t%0, %3, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn "*shlrd_const_" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 3 "const_int_operand" "i")]) -+ (match_operator:SI 6 "logical_shift_operator" -+ [(match_operand:SI 2 "register_operand" "r") -+ (match_operand:SI 4 "const_int_operand" "i")])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN -+ && IN_RANGE (INTVAL (operands[3]), 1, 31) -+ && IN_RANGE (INTVAL (operands[4]), 1, 31) -+ && INTVAL (operands[3]) + INTVAL (operands[4]) == 32" -+{ -+ switch (xtensa_shlrd_which_direction (operands[5], operands[6])) -+ { -+ case ASHIFT: return "ssai\t%L3\;src\t%0, %1, %2"; -+ case LSHIFTRT: return "ssai\t%R3\;src\t%0, %2, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn "*shlrd_per_byte_" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3))]) -+ (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 3 "register_operand" "r") -+ (neg:SI (ashift:SI (match_dup 2) -+ (const_int 3)))])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" -+{ -+ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) -+ { -+ case ASHIFT: return "ssa8b\t%2\;src\t%0, %1, %3"; -+ case LSHIFTRT: return "ssa8l\t%2\;src\t%0, %3, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*shlrd_per_byte__omit_AND" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3)) -+ (match_operand:SI 4 "const_int_operand" "i"))]) -+ (match_operator:SI 6 "logical_shift_operator" -+ [(match_operand:SI 3 "register_operand" "r") -+ (neg:SI (and:SI (ashift:SI (match_dup 2) -+ (const_int 3)) -+ (match_dup 4)))])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN -+ && (INTVAL (operands[4]) & 0x1f) == 3 << 3" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (ior_op:SI (match_op_dup 5 -+ [(match_dup 1) -+ (ashift:SI (match_dup 2) -+ (const_int 3))]) -+ (match_op_dup 6 -+ [(match_dup 3) -+ (neg:SI (ashift:SI (match_dup 2) -+ (const_int 3)))])))] - "" -- "ssa8l\t%2\;srl\t%0, %1" - [(set_attr "type" "arith") - (set_attr "mode" "SI") - (set_attr "length" "6")]) -@@ -1239,28 +1633,13 @@ - (define_insn "*btrue" - [(set (pc) - (if_then_else (match_operator 3 "branch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "branch_operand" "K,r")]) -+ [(match_operand:SI 0 "register_operand" "r,r") -+ (match_operand:SI 1 "branch_operand" "K,r")]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { -- return xtensa_emit_branch (false, which_alternative == 0, operands); --} -- [(set_attr "type" "jump,jump") -- (set_attr "mode" "none") -- (set_attr "length" "3,3")]) -- --(define_insn "*bfalse" -- [(set (pc) -- (if_then_else (match_operator 3 "branch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "branch_operand" "K,r")]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -- "" --{ -- return xtensa_emit_branch (true, which_alternative == 0, operands); -+ return xtensa_emit_branch (which_alternative == 0, operands); - } - [(set_attr "type" "jump,jump") - (set_attr "mode" "none") -@@ -1269,28 +1648,13 @@ - (define_insn "*ubtrue" - [(set (pc) - (if_then_else (match_operator 3 "ubranch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "ubranch_operand" "L,r")]) -+ [(match_operand:SI 0 "register_operand" "r,r") -+ (match_operand:SI 1 "ubranch_operand" "L,r")]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { -- return xtensa_emit_branch (false, which_alternative == 0, operands); --} -- [(set_attr "type" "jump,jump") -- (set_attr "mode" "none") -- (set_attr "length" "3,3")]) -- --(define_insn "*ubfalse" -- [(set (pc) -- (if_then_else (match_operator 3 "ubranch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "ubranch_operand" "L,r")]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -- "" --{ -- return xtensa_emit_branch (true, which_alternative == 0, operands); -+ return xtensa_emit_branch (which_alternative == 0, operands); - } - [(set_attr "type" "jump,jump") - (set_attr "mode" "none") -@@ -1301,80 +1665,178 @@ - (define_insn "*bittrue" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(zero_extract:SI -- (match_operand:SI 0 "register_operand" "r,r") -- (const_int 1) -- (match_operand:SI 1 "arith_operand" "J,r")) -+ [(zero_extract:SI (match_operand:SI 0 "register_operand" "r,r") -+ (const_int 1) -+ (match_operand:SI 1 "arith_operand" "J,r")) - (const_int 0)]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { -- return xtensa_emit_bit_branch (false, which_alternative == 0, operands); -+ static char result[64]; -+ char op; -+ switch (GET_CODE (operands[3])) -+ { -+ case EQ: op = 'c'; break; -+ case NE: op = 's'; break; -+ default: gcc_unreachable (); -+ } -+ if (which_alternative == 0) -+ { -+ operands[1] = GEN_INT (INTVAL (operands[1]) & 0x1f); -+ sprintf (result, "bb%ci\t%%0, %%d1, %%2", op); -+ } -+ else -+ sprintf (result, "bb%c\t%%0, %%1, %%2", op); -+ return result; - } - [(set_attr "type" "jump") - (set_attr "mode" "none") - (set_attr "length" "3")]) - --(define_insn "*bitfalse" -+(define_insn "*masktrue" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(zero_extract:SI -- (match_operand:SI 0 "register_operand" "r,r") -- (const_int 1) -- (match_operand:SI 1 "arith_operand" "J,r")) -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "register_operand" "r")) - (const_int 0)]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -+ (label_ref (match_operand 2 "" "")) -+ (pc)))] - "" - { -- return xtensa_emit_bit_branch (true, which_alternative == 0, operands); -+ switch (GET_CODE (operands[3])) -+ { -+ case EQ: return "bnone\t%0, %1, %2"; -+ case NE: return "bany\t%0, %1, %2"; -+ default: gcc_unreachable (); -+ } - } - [(set_attr "type" "jump") - (set_attr "mode" "none") - (set_attr "length" "3")]) - --(define_insn "*masktrue" -+(define_insn "*masktrue_bitcmpl" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(and:SI (match_operand:SI 0 "register_operand" "r") -- (match_operand:SI 1 "register_operand" "r")) -- (const_int 0)]) -+ [(and:SI (not:SI (match_operand:SI 0 "register_operand" "r")) -+ (match_operand:SI 1 "register_operand" "r")) -+ (const_int 0)]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { - switch (GET_CODE (operands[3])) - { -- case EQ: return "bnone\t%0, %1, %2"; -- case NE: return "bany\t%0, %1, %2"; -- default: gcc_unreachable (); -+ case EQ: return "ball\t%0, %1, %2"; -+ case NE: return "bnall\t%0, %1, %2"; -+ default: gcc_unreachable (); - } - } - [(set_attr "type" "jump") - (set_attr "mode" "none") - (set_attr "length" "3")]) - --(define_insn "*maskfalse" -+(define_insn_and_split "*masktrue_const_pow2_minus_one" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(and:SI (match_operand:SI 0 "register_operand" "r") -- (match_operand:SI 1 "register_operand" "r")) -- (const_int 0)]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -- "" -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "const_int_operand" "i")) -+ (const_int 0)]) -+ (label_ref (match_operand 2 "" "")) -+ (pc)))] -+ "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31)" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 4) -+ (ashift:SI (match_dup 0) -+ (match_dup 1))) -+ (set (pc) -+ (if_then_else (match_op_dup 3 -+ [(match_dup 4) -+ (const_int 0)]) -+ (label_ref (match_dup 2)) -+ (pc)))] - { -- switch (GET_CODE (operands[3])) -- { -- case EQ: return "bany\t%0, %1, %2"; -- case NE: return "bnone\t%0, %1, %2"; -- default: gcc_unreachable (); -- } -+ operands[1] = GEN_INT (32 - floor_log2 (INTVAL (operands[1]) + 1)); -+ operands[4] = gen_reg_rtx (SImode); - } - [(set_attr "type" "jump") - (set_attr "mode" "none") -- (set_attr "length" "3")]) -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && INTVAL (operands[1]) == 0x7FFFFFFF") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_insn_and_split "*masktrue_const_negative_pow2" -+ [(set (pc) -+ (if_then_else (match_operator 3 "boolean_operator" -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "const_int_operand" "i")) -+ (const_int 0)]) -+ (label_ref (match_operand 2 "" "")) -+ (pc)))] -+ "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 12, 30)" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 4) -+ (lshiftrt:SI (match_dup 0) -+ (match_dup 1))) -+ (set (pc) -+ (if_then_else (match_op_dup 3 -+ [(match_dup 4) -+ (const_int 0)]) -+ (label_ref (match_dup 2)) -+ (pc)))] -+{ -+ operands[1] = GEN_INT (floor_log2 (-INTVAL (operands[1]))); -+ operands[4] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "jump") -+ (set_attr "mode" "none") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*masktrue_const_shifted_mask" -+ [(set (pc) -+ (if_then_else (match_operator 4 "boolean_operator" -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "shifted_mask_operand" "i")) -+ (match_operand:SI 2 "const_int_operand" "i")]) -+ (label_ref (match_operand 3 "" "")) -+ (pc)))] -+ "(INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 -+ && xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 6) -+ (zero_extract:SI (match_dup 0) -+ (match_dup 5) -+ (match_dup 1))) -+ (set (pc) -+ (if_then_else (match_op_dup 4 -+ [(match_dup 6) -+ (match_dup 2)]) -+ (label_ref (match_dup 3)) -+ (pc)))] -+{ -+ HOST_WIDE_INT mask = INTVAL (operands[1]); -+ int shift = ctz_hwi (mask); -+ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); -+ int mask_pos = shift; -+ if (BITS_BIG_ENDIAN) -+ mask_pos = (32 - (mask_size + shift)) & 0x1f; -+ operands[1] = GEN_INT (mask_pos); -+ operands[2] = GEN_INT ((uint32_t)INTVAL (operands[2]) >> shift); -+ operands[5] = GEN_INT (mask_size); -+ operands[6] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "jump") -+ (set_attr "mode" "none") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && (uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])) == 0") -+ (const_int 5) -+ (const_int 6)))]) - - - ;; Zero-overhead looping support. -@@ -1696,18 +2158,13 @@ - (match_operand 1 "" ""))] - "" - { -- rtx addr = XEXP (operands[0], 0); -- if (flag_pic && GET_CODE (addr) == SYMBOL_REF -- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) -- addr = gen_sym_PLT (addr); -- if (!call_insn_operand (addr, VOIDmode)) -- XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr); -+ xtensa_prepare_expand_call (0, operands); - }) - - (define_insn "call_internal" - [(call (mem (match_operand:SI 0 "call_insn_operand" "nir")) - (match_operand 1 "" "i"))] -- "" -+ "!SIBLING_CALL_P (insn)" - { - return xtensa_emit_call (0, operands); - } -@@ -1721,19 +2178,14 @@ - (match_operand 2 "" "")))] - "" - { -- rtx addr = XEXP (operands[1], 0); -- if (flag_pic && GET_CODE (addr) == SYMBOL_REF -- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) -- addr = gen_sym_PLT (addr); -- if (!call_insn_operand (addr, VOIDmode)) -- XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr); -+ xtensa_prepare_expand_call (1, operands); - }) - - (define_insn "call_value_internal" - [(set (match_operand 0 "register_operand" "=a") - (call (mem (match_operand:SI 1 "call_insn_operand" "nir")) - (match_operand 2 "" "i")))] -- "" -+ "!SIBLING_CALL_P (insn)" - { - return xtensa_emit_call (1, operands); - } -@@ -1741,6 +2193,70 @@ - (set_attr "mode" "none") - (set_attr "length" "3")]) - -+(define_expand "sibcall" -+ [(call (match_operand 0 "memory_operand" "") -+ (match_operand 1 "" ""))] -+ "!TARGET_WINDOWED_ABI" -+{ -+ xtensa_prepare_expand_call (0, operands); -+}) -+ -+(define_insn "sibcall_internal" -+ [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nir")) -+ (match_operand 1 "" "i"))] -+ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" -+{ -+ return xtensa_emit_sibcall (0, operands); -+} -+ [(set_attr "type" "call") -+ (set_attr "mode" "none") -+ (set_attr "length" "3")]) -+ -+(define_split -+ [(call (mem:SI (match_operand:SI 0 "register_operand")) -+ (match_operand 1 ""))] -+ "reload_completed -+ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) -+ && IN_RANGE (REGNO (operands[0]), 12, 15)" -+ [(set (reg:SI A10_REG) -+ (match_dup 0)) -+ (call (mem:SI (reg:SI A10_REG)) -+ (match_dup 1))]) -+ -+(define_expand "sibcall_value" -+ [(set (match_operand 0 "register_operand" "") -+ (call (match_operand 1 "memory_operand" "") -+ (match_operand 2 "" "")))] -+ "!TARGET_WINDOWED_ABI" -+{ -+ xtensa_prepare_expand_call (1, operands); -+}) -+ -+(define_insn "sibcall_value_internal" -+ [(set (match_operand 0 "register_operand" "=a") -+ (call (mem:SI (match_operand:SI 1 "call_insn_operand" "nir")) -+ (match_operand 2 "" "i")))] -+ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" -+{ -+ return xtensa_emit_sibcall (1, operands); -+} -+ [(set_attr "type" "call") -+ (set_attr "mode" "none") -+ (set_attr "length" "3")]) -+ -+(define_split -+ [(set (match_operand 0 "register_operand") -+ (call (mem:SI (match_operand:SI 1 "register_operand")) -+ (match_operand 2 "")))] -+ "reload_completed -+ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) -+ && IN_RANGE (REGNO (operands[1]), 12, 15)" -+ [(set (reg:SI A10_REG) -+ (match_dup 1)) -+ (set (match_dup 0) -+ (call (mem:SI (reg:SI A10_REG)) -+ (match_dup 2)))]) -+ - (define_insn "entry" - [(set (reg:SI A1_REG) - (unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")] -@@ -1762,7 +2278,10 @@ - } - [(set_attr "type" "jump") - (set_attr "mode" "none") -- (set_attr "length" "2")]) -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 2) -+ (const_int 3)))]) - - - ;; Miscellaneous instructions. -@@ -1805,7 +2324,15 @@ - [(return)] - "" - { -- xtensa_expand_epilogue (); -+ xtensa_expand_epilogue (false); -+ DONE; -+}) -+ -+(define_expand "sibcall_epilogue" -+ [(return)] -+ "!TARGET_WINDOWED_ABI" -+{ -+ xtensa_expand_epilogue (true); - DONE; - }) - -@@ -1817,7 +2344,10 @@ - } - [(set_attr "type" "nop") - (set_attr "mode" "none") -- (set_attr "length" "3")]) -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 2) -+ (const_int 3)))]) - - (define_expand "nonlocal_goto" - [(match_operand:SI 0 "general_operand" "") -@@ -1881,8 +2411,9 @@ - [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] - "" - "" -- [(set_attr "length" "0") -- (set_attr "type" "nop")]) -+ [(set_attr "type" "nop") -+ (set_attr "mode" "none") -+ (set_attr "length" "0")]) - - ;; Do not schedule instructions accessing memory before this point. - -@@ -1901,7 +2432,9 @@ - (unspec:BLK [(match_operand:SI 1 "" "")] UNSPEC_FRAME_BLOCKAGE))] - "" - "" -- [(set_attr "length" "0")]) -+ [(set_attr "type" "nop") -+ (set_attr "mode" "none") -+ (set_attr "length" "0")]) - - (define_insn "trap" - [(trap_if (const_int 1) (const_int 0))] -@@ -1914,7 +2447,10 @@ - } - [(set_attr "type" "trap") - (set_attr "mode" "none") -- (set_attr "length" "3")]) -+ (set (attr "length") -+ (if_then_else (match_test "!TARGET_DEBUG && TARGET_DENSITY") -+ (const_int 2) -+ (const_int 3)))]) - - ;; Setting up a frame pointer is tricky for Xtensa because GCC doesn't - ;; know if a frame pointer is required until the reload pass, and -@@ -2177,3 +2713,103 @@ - xtensa_expand_atomic (, operands[0], operands[1], operands[2], true); - DONE; - }) -+ -+(define_insn_and_split "*round_up_to_even" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r") -+ (const_int 1)) -+ (const_int -2)))] -+ "" -+ "#" -+ "can_create_pseudo_p ()" -+ [(set (match_dup 2) -+ (and:SI (match_dup 1) -+ (const_int 1))) -+ (set (match_dup 0) -+ (plus:SI (match_dup 2) -+ (match_dup 1)))] -+{ -+ operands[2] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_insn_and_split "*signed_ge_zero" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ge:SI (match_operand:SI 1 "register_operand" "r") -+ (const_int 0)))] -+ "" -+ "#" -+ "" -+ [(set (match_dup 0) -+ (ashiftrt:SI (match_dup 1) -+ (const_int 31))) -+ (set (match_dup 0) -+ (plus:SI (match_dup 0) -+ (const_int 1)))] -+ "" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_peephole2 -+ [(set (match_operand:SI 0 "register_operand") -+ (match_operand:SI 6 "reload_operand")) -+ (set (match_operand:SI 1 "register_operand") -+ (match_operand:SI 7 "reload_operand")) -+ (set (match_operand:SF 2 "register_operand") -+ (match_operand:SF 4 "register_operand")) -+ (set (match_operand:SF 3 "register_operand") -+ (match_operand:SF 5 "register_operand"))] -+ "REGNO (operands[0]) == REGNO (operands[4]) -+ && REGNO (operands[1]) == REGNO (operands[5]) -+ && peep2_reg_dead_p (4, operands[0]) -+ && peep2_reg_dead_p (4, operands[1])" -+ [(set (match_dup 2) -+ (match_dup 6)) -+ (set (match_dup 3) -+ (match_dup 7))] -+{ -+ uint32_t check = 0; -+ int i; -+ for (i = 0; i <= 3; ++i) -+ { -+ uint32_t mask = (uint32_t)1 << REGNO (operands[i]); -+ if (check & mask) -+ FAIL; -+ check |= mask; -+ } -+ operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0)); -+ operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0)); -+}) -+ -+(define_split -+ [(clobber (match_operand:DSC 0 "register_operand"))] -+ "GP_REG_P (REGNO (operands[0]))" -+ [(const_int 0)] -+{ -+ unsigned int regno = REGNO (operands[0]); -+ machine_mode inner_mode = GET_MODE_INNER (mode); -+ rtx_insn *insn; -+ rtx x; -+ if (! ((insn = next_nonnote_nondebug_insn (curr_insn)) -+ && NONJUMP_INSN_P (insn) -+ && GET_CODE (x = PATTERN (insn)) == SET -+ && REG_P (x = XEXP (x, 0)) -+ && GET_MODE (x) == inner_mode -+ && REGNO (x) == regno -+ && (insn = next_nonnote_nondebug_insn (insn)) -+ && NONJUMP_INSN_P (insn) -+ && GET_CODE (x = PATTERN (insn)) == SET -+ && REG_P (x = XEXP (x, 0)) -+ && GET_MODE (x) == inner_mode -+ && REGNO (x) == regno + REG_NREGS (operands[0]) / 2)) -+ FAIL; -+}) -diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt -index aef67970b..97aa44f92 100644 ---- a/gcc/config/xtensa/xtensa.opt -+++ b/gcc/config/xtensa/xtensa.opt -@@ -27,9 +27,13 @@ Target Report Mask(FORCE_NO_PIC) - Disable position-independent code (PIC) for use in OS kernel code. - - mlongcalls --Target -+Target Mask(LONGCALLS) - Use indirect CALLXn instructions for large programs. - -+mextra-l32r-costs= -+Target RejectNegative Joined UInteger Var(xtensa_extra_l32r_costs) Init(0) -+Set extra memory access cost for L32R instruction, in clock-cycle units. -+ - mtarget-align - Target - Automatically align branch targets to reduce branch penalties. -diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi -index eabeec944..c35f51afb 100644 ---- a/gcc/doc/invoke.texi -+++ b/gcc/doc/invoke.texi -@@ -1385,7 +1385,8 @@ See RS/6000 and PowerPC Options. - -mtext-section-literals -mno-text-section-literals @gol - -mauto-litpools -mno-auto-litpools @gol - -mtarget-align -mno-target-align @gol ---mlongcalls -mno-longcalls} -+-mlongcalls -mno-longcalls @gol -+-mextra-l32r-costs=@var{cycles}} - - @emph{zSeries Options} - See S/390 and zSeries Options. -@@ -30519,6 +30520,14 @@ assembly code generated by GCC still shows direct call - instructions---look at the disassembled object code to see the actual - instructions. Note that the assembler uses an indirect call for - every cross-file call, not just those that really are out of range. -+ -+@item -mextra-l32r-costs=@var{n} -+@opindex mextra-l32r-costs -+Specify an extra cost of instruction RAM/ROM access for @code{L32R} -+instructions, in clock cycles. This affects, when optimizing for speed, -+whether loading a constant from literal pool using @code{L32R} or -+synthesizing the constant from a small one with a couple of arithmetic -+instructions. The default value is 0. - @end table - - @node zSeries Options -diff --git a/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c -new file mode 100644 -index 000000000..ba61c6f37 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O" } */ -+ -+extern void foo(void); -+ -+void BNONE_test(int a, int b) -+{ -+ if (a & b) -+ foo(); -+} -+ -+void BANY_test(int a, int b) -+{ -+ if (!(a & b)) -+ foo(); -+} -+ -+void BALL_test(int a, int b) -+{ -+ if (~a & b) -+ foo(); -+} -+ -+void BNALL_test(int a, int b) -+{ -+ if (!(~a & b)) -+ foo(); -+} -+ -+/* { dg-final { scan-assembler-times "bnone" 1 } } */ -+/* { dg-final { scan-assembler-times "bany" 1 } } */ -+/* { dg-final { scan-assembler-times "ball" 1 } } */ -+/* { dg-final { scan-assembler-times "bnall" 1 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O1.c b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c -new file mode 100644 -index 000000000..a0c885baa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O1" } */ -+ -+unsigned int test_0(unsigned int a) -+{ -+ return (a & 0x000000FF) << 24 | -+ (a & 0x0000FF00) << 8 | -+ (a & 0x00FF0000) >> 8 | -+ (a & 0xFF000000) >> 24; -+} -+ -+unsigned int test_1(unsigned int a) -+{ -+ union -+ { -+ unsigned int i; -+ unsigned char a[4]; -+ } u, v; -+ u.i = a; -+ v.a[0] = u.a[3]; -+ v.a[1] = u.a[2]; -+ v.a[2] = u.a[1]; -+ v.a[3] = u.a[0]; -+ return v.i; -+} -+ -+unsigned int test_2(unsigned int a) -+{ -+ return __builtin_bswap32(a); -+} -+ -+unsigned long long test_3(unsigned long long a) -+{ -+ return __builtin_bswap64(a); -+} -+ -+/* { dg-final { scan-assembler-times "call" 2 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O2.c b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c -new file mode 100644 -index 000000000..4cf95b925 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+unsigned int test_0(unsigned int a) -+{ -+ return (a & 0x000000FF) << 24 | -+ (a & 0x0000FF00) << 8 | -+ (a & 0x00FF0000) >> 8 | -+ (a & 0xFF000000) >> 24; -+} -+ -+unsigned int test_1(unsigned int a) -+{ -+ union -+ { -+ unsigned int i; -+ unsigned char a[4]; -+ } u, v; -+ u.i = a; -+ v.a[0] = u.a[3]; -+ v.a[1] = u.a[2]; -+ v.a[2] = u.a[1]; -+ v.a[3] = u.a[0]; -+ return v.i; -+} -+ -+unsigned int test_2(unsigned int a) -+{ -+ return __builtin_bswap32(a); -+} -+ -+unsigned long long test_3(unsigned long long a) -+{ -+ return __builtin_bswap64(a); -+} -+ -+/* { dg-final { scan-assembler-times "ssai" 4 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-Os.c b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c -new file mode 100644 -index 000000000..1e010fd62 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Os" } */ -+ -+unsigned int test_0(unsigned int a) -+{ -+ return (a & 0x000000FF) << 24 | -+ (a & 0x0000FF00) << 8 | -+ (a & 0x00FF0000) >> 8 | -+ (a & 0xFF000000) >> 24; -+} -+ -+unsigned int test_1(unsigned int a) -+{ -+ union -+ { -+ unsigned int i; -+ unsigned char a[4]; -+ } u, v; -+ u.i = a; -+ v.a[0] = u.a[3]; -+ v.a[1] = u.a[2]; -+ v.a[2] = u.a[1]; -+ v.a[3] = u.a[0]; -+ return v.i; -+} -+ -+unsigned int test_2(unsigned int a) -+{ -+ return __builtin_bswap32(a); -+} -+ -+unsigned long long test_3(unsigned long long a) -+{ -+ return __builtin_bswap64(a); -+} -+ -+/* { dg-final { scan-assembler-times "call" 4 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c -new file mode 100644 -index 000000000..6a04aaeef ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O" } */ -+ -+int check_zero_byte(int v) -+{ -+ return (v - 0x01010101) & ~v & 0x80808080; -+} -+ -+/* { dg-final { scan-assembler-not "movi" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c -new file mode 100644 -index 000000000..ec2606ed1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c -@@ -0,0 +1,44 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Os } */ -+ -+int test_0(void) -+{ -+ return 4095; -+} -+ -+int test_1(void) -+{ -+ return 2147483647; -+} -+ -+int test_2(void) -+{ -+ return -34816; -+} -+ -+int test_3(void) -+{ -+ return -2049; -+} -+ -+int test_4(void) -+{ -+ return 2048; -+} -+ -+int test_5(void) -+{ -+ return 34559; -+} -+ -+int test_6(void) -+{ -+ return 43680; -+} -+ -+void test_7(int *p) -+{ -+ *p = -1432354816; -+} -+ -+/* { dg-final { scan-assembler-not "l32r" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c -new file mode 100644 -index 000000000..f3c4a1c7c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c -@@ -0,0 +1,24 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mextra-l32r-costs=3" } */ -+ -+int test_0(void) -+{ -+ return 134217216; -+} -+ -+int test_1(void) -+{ -+ return -27604992; -+} -+ -+int test_2(void) -+{ -+ return -162279; -+} -+ -+void test_3(int *p) -+{ -+ *p = 192437; -+} -+ -+/* { dg-final { scan-assembler-not "l32r" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c -new file mode 100644 -index 000000000..11e5d5242 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Os } */ -+ -+void test(unsigned int count, double array[]) -+{ -+ unsigned int i; -+ for (i = 0; i < count; ++i) -+ array[i] = 1.0; -+} -+ -+/* { dg-final { scan-assembler-not "l32r" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c -new file mode 100644 -index 000000000..c8f987ccd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+unsigned int test_0(const void *addr) -+{ -+ unsigned int n = (unsigned int)addr; -+ const unsigned int *a = (const unsigned int*)(n & ~3); -+ n = (n & 3) * 8; -+ return (a[0] >> n) | (a[1] << (32 - n)); -+} -+ -+unsigned int test_1(unsigned int a, unsigned int b) -+{ -+ return (a >> 16) + (b << 16); -+} -+ -+/* { dg-final { scan-assembler-times "src" 2 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c -new file mode 100644 -index 000000000..608f65fd7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O1" } */ -+ -+int one_cmpl_abs(int a) -+{ -+ return a < 0 ? ~a : a; -+} -+ -+/* { dg-final { scan-assembler-not "bgez" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/sibcalls.c b/gcc/testsuite/gcc.target/xtensa/sibcalls.c -new file mode 100644 -index 000000000..7a4018796 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/sibcalls.c -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -foptimize-sibling-calls" } */ -+ -+extern int foo(int); -+extern void bar(int); -+ -+int test_0(int a) { -+ return foo(a); -+} -+ -+void test_1(int a) { -+ bar(a); -+} -+ -+int test_2(int (*a)(void)) { -+ bar(0); -+ return a(); -+} -+ -+/* { dg-final { scan-assembler-not "ret" } } */ -diff --git a/libgcc/config/xtensa/lib1funcs.S b/libgcc/config/xtensa/lib1funcs.S -index b19deae14..ad9072c40 100644 ---- a/libgcc/config/xtensa/lib1funcs.S -+++ b/libgcc/config/xtensa/lib1funcs.S -@@ -456,6 +456,29 @@ __nsau_data: - #endif /* L_clz */ - - -+#ifdef L_clrsbsi2 -+ .align 4 -+ .global __clrsbsi2 -+ .type __clrsbsi2, @function -+__clrsbsi2: -+ leaf_entry sp, 16 -+#if XCHAL_HAVE_NSA -+ nsa a2, a2 -+#else -+ srai a3, a2, 31 -+ xor a3, a3, a2 -+ movi a2, 31 -+ beqz a3, .Lreturn -+ do_nsau a2, a3, a4, a5 -+ addi a2, a2, -1 -+.Lreturn: -+#endif -+ leaf_return -+ .size __clrsbsi2, . - __clrsbsi2 -+ -+#endif /* L_clrsbsi2 */ -+ -+ - #ifdef L_clzsi2 - .align 4 - .global __clzsi2 -diff --git a/libgcc/config/xtensa/t-xtensa b/libgcc/config/xtensa/t-xtensa -index 9836c96ae..084618b38 100644 ---- a/libgcc/config/xtensa/t-xtensa -+++ b/libgcc/config/xtensa/t-xtensa -@@ -1,6 +1,6 @@ - LIB1ASMSRC = xtensa/lib1funcs.S - LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \ -- _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \ -+ _umulsidi3 _clz _clrsbsi2 _clzsi2 _ctzsi2 _ffssi2 \ - _ashldi3 _ashrdi3 _lshrdi3 \ - _bswapsi2 _bswapdi2 \ - _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \ --- -2.20.1 - diff --git a/patches/gcc10.3/gcc-xtensa-0007-fix-Wformat-diag-warnings.patch b/patches/gcc10.3/gcc-xtensa-0007-fix-Wformat-diag-warnings.patch new file mode 100644 index 0000000..5aebddc --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0007-fix-Wformat-diag-warnings.patch @@ -0,0 +1,48 @@ +From 76ee6b24125c885150e5b493b26b594801998b74 Mon Sep 17 00:00:00 2001 +From: Martin Liska +Date: Tue, 18 Jan 2022 14:51:40 +0100 +Subject: [PATCH 02/31] xtensa: fix -Wformat-diag warnings. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (print_operand): Fix warnings. + (print_operand_address): Likewise. + (xtensa_multibss_section_type_flags): Likewise. +--- + gcc/config/xtensa/xtensa.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 37c6ac1fd..b1dbe8520 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -2379,7 +2379,7 @@ void + print_operand (FILE *file, rtx x, int letter) + { + if (!x) +- error ("PRINT_OPERAND null pointer"); ++ error ("% null pointer"); + + switch (letter) + { +@@ -2584,7 +2584,7 @@ void + print_operand_address (FILE *file, rtx addr) + { + if (!addr) +- error ("PRINT_OPERAND_ADDRESS, null pointer"); ++ error ("%, null pointer"); + + switch (GET_CODE (addr)) + { +@@ -3697,7 +3697,7 @@ xtensa_multibss_section_type_flags (tree decl, const char *name, int reloc) + flags |= SECTION_BSS; /* @nobits */ + else + warning (0, "only uninitialized variables can be placed in a " +- ".bss section"); ++ "%<.bss%> section"); + } + + return flags; +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0008-Rename-deprecated-extv-extzv-insn-patterns-to.patch b/patches/gcc10.3/gcc-xtensa-0008-Rename-deprecated-extv-extzv-insn-patterns-to.patch new file mode 100644 index 0000000..46260ef --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0008-Rename-deprecated-extv-extzv-insn-patterns-to.patch @@ -0,0 +1,74 @@ +From b5b9fd01c4db135893c44e82a9f33c2411e993d0 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 6 May 2022 19:34:06 +0900 +Subject: [PATCH 03/31] xtensa: Rename deprecated extv/extzv insn patterns to + extvsi/extzvsi + +These patterns were deprecated since GCC 4.8. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (extvsi, extvsi_internal, extzvsi, + extzvsi_internal): Rename from extv, extv_internal, extzv and + extzv_internal, respectively. +--- + gcc/config/xtensa/xtensa.md | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 123916957..251c313d5 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -634,7 +634,7 @@ + + ;; Field extract instructions. + +-(define_expand "extv" ++(define_expand "extvsi" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "") +@@ -649,12 +649,12 @@ + if (!lsbitnum_operand (operands[3], SImode)) + FAIL; + +- emit_insn (gen_extv_internal (operands[0], operands[1], +- operands[2], operands[3])); ++ emit_insn (gen_extvsi_internal (operands[0], operands[1], ++ operands[2], operands[3])); + DONE; + }) + +-(define_insn "extv_internal" ++(define_insn "extvsi_internal" + [(set (match_operand:SI 0 "register_operand" "=a") + (sign_extract:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "sext_fldsz_operand" "i") +@@ -669,7 +669,7 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +-(define_expand "extzv" ++(define_expand "extzvsi" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extract:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "") +@@ -678,12 +678,12 @@ + { + if (!extui_fldsz_operand (operands[2], SImode)) + FAIL; +- emit_insn (gen_extzv_internal (operands[0], operands[1], +- operands[2], operands[3])); ++ emit_insn (gen_extzvsi_internal (operands[0], operands[1], ++ operands[2], operands[3])); + DONE; + }) + +-(define_insn "extzv_internal" ++(define_insn "extzvsi_internal" + [(set (match_operand:SI 0 "register_operand" "=a") + (zero_extract:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "extui_fldsz_operand" "i") +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0009-Reflect-the-32-bit-Integer-Divide-Option.patch b/patches/gcc10.3/gcc-xtensa-0009-Reflect-the-32-bit-Integer-Divide-Option.patch new file mode 100644 index 0000000..607367c --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0009-Reflect-the-32-bit-Integer-Divide-Option.patch @@ -0,0 +1,41 @@ +From 12fa0b13b6f0c52e5c4d75f39822771a7f780f94 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 6 May 2022 19:34:19 +0900 +Subject: [PATCH 04/31] xtensa: Reflect the 32-bit Integer Divide Option + +On Espressif's ESP8266 (based on Tensilica LX106, no hardware divider), +this patch reduces the size of each: + + __moddi3() @ libgcc.a : 969 -> 301 (saves 668) + __divmoddi4() : 1111 -> 426 (saves 685) + __udivmoddi4() : 1043 -> 319 (saves 724) + +in bytes, respectively. + +gcc/ChangeLog: + + * config/xtensa/xtensa.h (TARGET_HAS_NO_HW_DIVIDE): New macro + definition. +--- + gcc/config/xtensa/xtensa.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index fa86a245e..5b102de51 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -74,6 +74,11 @@ along with GCC; see the file COPYING3. If not see + #define HAVE_AS_TLS 0 + #endif + ++/* Define this if the target has no hardware divide instructions. */ ++#if !TARGET_DIV32 ++#define TARGET_HAS_NO_HW_DIVIDE ++#endif ++ + + /* Target CPU builtins. */ + #define TARGET_CPU_CPP_BUILTINS() \ +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0010-Simplify-EXTUI-instruction-maskimm-validation.patch b/patches/gcc10.3/gcc-xtensa-0010-Simplify-EXTUI-instruction-maskimm-validation.patch new file mode 100644 index 0000000..8d257cd --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0010-Simplify-EXTUI-instruction-maskimm-validation.patch @@ -0,0 +1,78 @@ +From 49383c9381a937b360adeb14f5e7bd4472f7c386 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:26:30 +0900 +Subject: [PATCH 05/31] xtensa: Simplify EXTUI instruction maskimm validations + +No functional changes. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (extui_fldsz_operand): Simplify. + * config/xtensa/xtensa.c (xtensa_mask_immediate, print_operand): + Ditto. +--- + gcc/config/xtensa/predicates.md | 2 +- + gcc/config/xtensa/xtensa.c | 24 +++--------------------- + 2 files changed, 4 insertions(+), 22 deletions(-) + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index eb52b05aa..3f84859b6 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -55,7 +55,7 @@ + + (define_predicate "extui_fldsz_operand" + (and (match_code "const_int") +- (match_test "xtensa_mask_immediate ((1 << INTVAL (op)) - 1)"))) ++ (match_test "IN_RANGE (INTVAL (op), 1, 16)"))) + + (define_predicate "sext_operand" + (if_then_else (match_test "TARGET_SEXT") +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index b1dbe8520..4043f40ce 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -446,19 +446,7 @@ xtensa_b4constu (HOST_WIDE_INT v) + bool + xtensa_mask_immediate (HOST_WIDE_INT v) + { +-#define MAX_MASK_SIZE 16 +- int mask_size; +- +- for (mask_size = 1; mask_size <= MAX_MASK_SIZE; mask_size++) +- { +- if ((v & 1) == 0) +- return false; +- v = v >> 1; +- if (v == 0) +- return true; +- } +- +- return false; ++ return IN_RANGE (exact_log2 (v + 1), 1, 16); + } + + +@@ -2424,17 +2412,11 @@ print_operand (FILE *file, rtx x, int letter) + case 'K': + if (GET_CODE (x) == CONST_INT) + { +- int num_bits = 0; + unsigned val = INTVAL (x); +- while (val & 1) +- { +- num_bits += 1; +- val = val >> 1; +- } +- if ((val != 0) || (num_bits == 0) || (num_bits > 16)) ++ if (!xtensa_mask_immediate (val)) + fatal_insn ("invalid mask", x); + +- fprintf (file, "%d", num_bits); ++ fprintf (file, "%d", floor_log2 (val + 1)); + } + else + output_operand_lossage ("invalid %%K value"); +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0011-Make-use-of-IN_RANGE-macro-where-appropriate.patch b/patches/gcc10.3/gcc-xtensa-0011-Make-use-of-IN_RANGE-macro-where-appropriate.patch new file mode 100644 index 0000000..419ebfe --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0011-Make-use-of-IN_RANGE-macro-where-appropriate.patch @@ -0,0 +1,174 @@ +From fa7073ff572c248896057a5a7841a3e1d98380ad Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:27:36 +0900 +Subject: [PATCH 06/31] xtensa: Make use of IN_RANGE macro where appropriate + +No functional changes. + +gcc/ChangeLog: + + * config/xtensa/constraints.md (M, O): Use the macro. + * config/xtensa/predicates.md (addsubx_operand, extui_fldsz_operand, + sext_fldsz_operand): Ditto. + * config/xtensa/xtensa.c (xtensa_simm8, xtensa_simm8x256, + xtensa_simm12b, xtensa_uimm8, xtensa_uimm8x2, xtensa_uimm8x4, + xtensa_mask_immediate, smalloffset_mem_p, printx, xtensa_call_save_reg, + xtensa_expand_prologue): Ditto. + * config/xtensa/xtensa.h (FUNCTION_ARG_REGNO_P): Ditto. +--- + gcc/config/xtensa/constraints.md | 4 ++-- + gcc/config/xtensa/predicates.md | 5 ++--- + gcc/config/xtensa/xtensa.c | 20 ++++++++++---------- + gcc/config/xtensa/xtensa.h | 2 +- + 4 files changed, 15 insertions(+), 16 deletions(-) + +diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md +index 2062c8816..9a8caab4f 100644 +--- a/gcc/config/xtensa/constraints.md ++++ b/gcc/config/xtensa/constraints.md +@@ -92,7 +92,7 @@ + "An integer constant in the range @minus{}32-95 for use with MOVI.N + instructions." + (and (match_code "const_int") +- (match_test "ival >= -32 && ival <= 95"))) ++ (match_test "IN_RANGE (ival, -32, 95)"))) + + (define_constraint "N" + "An unsigned 8-bit integer constant shifted left by 8 bits for use +@@ -103,7 +103,7 @@ + (define_constraint "O" + "An integer constant that can be used in ADDI.N instructions." + (and (match_code "const_int") +- (match_test "ival == -1 || (ival >= 1 && ival <= 15)"))) ++ (match_test "ival == -1 || IN_RANGE (ival, 1, 15)"))) + + (define_constraint "P" + "An integer constant that can be used as a mask value in an EXTUI +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index 3f84859b6..91b9343a2 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -25,8 +25,7 @@ + + (define_predicate "addsubx_operand" + (and (match_code "const_int") +- (match_test "INTVAL (op) >= 1 +- && INTVAL (op) <= 3"))) ++ (match_test "IN_RANGE (INTVAL (op), 1, 3)"))) + + (define_predicate "arith_operand" + (ior (and (match_code "const_int") +@@ -64,7 +63,7 @@ + + (define_predicate "sext_fldsz_operand" + (and (match_code "const_int") +- (match_test "INTVAL (op) >= 8 && INTVAL (op) <= 23"))) ++ (match_test "IN_RANGE (INTVAL (op), 8, 23)"))) + + (define_predicate "lsbitnum_operand" + (and (match_code "const_int") +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 4043f40ce..02dc5799a 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -341,42 +341,42 @@ struct gcc_target targetm = TARGET_INITIALIZER; + bool + xtensa_simm8 (HOST_WIDE_INT v) + { +- return v >= -128 && v <= 127; ++ return IN_RANGE (v, -128, 127); + } + + + bool + xtensa_simm8x256 (HOST_WIDE_INT v) + { +- return (v & 255) == 0 && (v >= -32768 && v <= 32512); ++ return (v & 255) == 0 && IN_RANGE (v, -32768, 32512); + } + + + bool + xtensa_simm12b (HOST_WIDE_INT v) + { +- return v >= -2048 && v <= 2047; ++ return IN_RANGE (v, -2048, 2047); + } + + + static bool + xtensa_uimm8 (HOST_WIDE_INT v) + { +- return v >= 0 && v <= 255; ++ return IN_RANGE (v, 0, 255); + } + + + static bool + xtensa_uimm8x2 (HOST_WIDE_INT v) + { +- return (v & 1) == 0 && (v >= 0 && v <= 510); ++ return (v & 1) == 0 && IN_RANGE (v, 0, 510); + } + + + static bool + xtensa_uimm8x4 (HOST_WIDE_INT v) + { +- return (v & 3) == 0 && (v >= 0 && v <= 1020); ++ return (v & 3) == 0 && IN_RANGE (v, 0, 1020); + } + + +@@ -527,7 +527,7 @@ smalloffset_mem_p (rtx op) + return FALSE; + + val = INTVAL (offset); +- return (val & 3) == 0 && (val >= 0 && val <= 60); ++ return (val & 3) == 0 && IN_RANGE (val, 0, 60); + } + } + return FALSE; +@@ -2352,7 +2352,7 @@ static void + printx (FILE *file, signed int val) + { + /* Print a hexadecimal value in a nice way. */ +- if ((val > -0xa) && (val < 0xa)) ++ if (IN_RANGE (val, -9, 9)) + fprintf (file, "%d", val); + else if (val < 0) + fprintf (file, "-0x%x", -val); +@@ -2732,7 +2732,7 @@ xtensa_call_save_reg(int regno) + return crtl->profile || !crtl->is_leaf || crtl->calls_eh_return || + df_regs_ever_live_p (regno); + +- if (crtl->calls_eh_return && regno >= 2 && regno < 4) ++ if (crtl->calls_eh_return && IN_RANGE (regno, 2, 3)) + return true; + + return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno); +@@ -2852,7 +2852,7 @@ xtensa_expand_prologue (void) + int callee_save_size = cfun->machine->callee_save_size; + + /* -128 is a limit of single addi instruction. */ +- if (total_size > 0 && total_size <= 128) ++ if (IN_RANGE (total_size, 1, 128)) + { + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-total_size))); +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index 5b102de51..3e9cbc943 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -493,7 +493,7 @@ enum reg_class + used for this purpose since all function arguments are pushed on + the stack. */ + #define FUNCTION_ARG_REGNO_P(N) \ +- ((N) >= GP_OUTGOING_ARG_FIRST && (N) <= GP_OUTGOING_ARG_LAST) ++ IN_RANGE ((N), GP_OUTGOING_ARG_FIRST, GP_OUTGOING_ARG_LAST) + + /* Record the number of argument words seen so far, along with a flag to + indicate whether these are incoming arguments. (FUNCTION_INCOMING_ARG +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0012-Fix-instruction-counting-regarding-block-move.patch b/patches/gcc10.3/gcc-xtensa-0012-Fix-instruction-counting-regarding-block-move.patch new file mode 100644 index 0000000..dae4a21 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0012-Fix-instruction-counting-regarding-block-move.patch @@ -0,0 +1,54 @@ +From 5cda5b41a7646d220f7351226b5da78955b0fc7f Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:29:22 +0900 +Subject: [PATCH 07/31] xtensa: Fix instruction counting regarding block move + expansion + +This patch makes counting the number of instructions of the remainder +(modulo 4) part more accurate. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_expand_block_move): + Make instruction counting more accurate, and simplify emitting insns. +--- + gcc/config/xtensa/xtensa.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 02dc5799a..0fe8b73ad 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1303,7 +1303,7 @@ xtensa_expand_block_move (rtx *operands) + move_ratio = 4; + if (optimize > 2) + move_ratio = LARGEST_MOVE_RATIO; +- num_pieces = (bytes / align) + (bytes % align); /* Close enough anyway. */ ++ num_pieces = (bytes / align) + ((bytes % align + 1) / 2); + if (num_pieces > move_ratio) + return 0; + +@@ -1340,7 +1340,7 @@ xtensa_expand_block_move (rtx *operands) + temp[next] = gen_reg_rtx (mode[next]); + + x = adjust_address (src_mem, mode[next], offset_ld); +- emit_insn (gen_rtx_SET (temp[next], x)); ++ emit_move_insn (temp[next], x); + + offset_ld += next_amount; + bytes -= next_amount; +@@ -1350,9 +1350,9 @@ xtensa_expand_block_move (rtx *operands) + if (active[phase]) + { + active[phase] = false; +- ++ + x = adjust_address (dst_mem, mode[phase], offset_st); +- emit_insn (gen_rtx_SET (x, temp[phase])); ++ emit_move_insn (x, temp[phase]); + + offset_st += amount[phase]; + } +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0013-Add-setmemsi-insn-pattern.patch b/patches/gcc10.3/gcc-xtensa-0013-Add-setmemsi-insn-pattern.patch new file mode 100644 index 0000000..a7212ce --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0013-Add-setmemsi-insn-pattern.patch @@ -0,0 +1,303 @@ +From 02572a935a2cbabc96387289300fb78d61dde555 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 24 May 2022 00:52:44 +0900 +Subject: [PATCH 08/31] xtensa: Add setmemsi insn pattern + +This patch introduces setmemsi insn pattern of two kinds, unrolled loop and +small loop, for fixed small length and constant initialization value. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h + (xtensa_expand_block_set_unrolled_loop, + xtensa_expand_block_set_small_loop): New prototypes. + * config/xtensa/xtensa.c (xtensa_sizeof_MOVI, + xtensa_expand_block_set_unrolled_loop, + xtensa_expand_block_set_small_loop): New functions. + * config/xtensa/xtensa.md (setmemsi): New expansion pattern. + * config/xtensa/xtensa.opt (mlongcalls): Add target mask. +--- + gcc/config/xtensa/xtensa-protos.h | 2 + + gcc/config/xtensa/xtensa.c | 211 ++++++++++++++++++++++++++++++ + gcc/config/xtensa/xtensa.md | 16 +++ + gcc/config/xtensa/xtensa.opt | 2 +- + 4 files changed, 230 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 18d803581..80b1da2bb 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -41,6 +41,8 @@ extern void xtensa_expand_conditional_branch (rtx *, machine_mode); + extern int xtensa_expand_conditional_move (rtx *, int); + extern int xtensa_expand_scc (rtx *, machine_mode); + extern int xtensa_expand_block_move (rtx *); ++extern int xtensa_expand_block_set_unrolled_loop (rtx *); ++extern int xtensa_expand_block_set_small_loop (rtx *); + extern void xtensa_split_operand_pair (rtx *, machine_mode); + extern int xtensa_emit_move_sequence (rtx *, machine_mode); + extern rtx xtensa_copy_incoming_a7 (rtx); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 0fe8b73ad..a6d76a953 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1363,6 +1363,217 @@ xtensa_expand_block_move (rtx *operands) + } + + ++/* Try to expand a block set operation to a sequence of RTL move ++ instructions. If not optimizing, or if the block size is not a ++ constant, or if the block is too large, or if the value to ++ initialize the block with is not a constant, the expansion ++ fails and GCC falls back to calling memset(). ++ ++ operands[0] is the destination ++ operands[1] is the length ++ operands[2] is the initialization value ++ operands[3] is the alignment */ ++ ++static int ++xtensa_sizeof_MOVI (HOST_WIDE_INT imm) ++{ ++ return (TARGET_DENSITY && IN_RANGE (imm, -32, 95)) ? 2 : 3; ++} ++ ++int ++xtensa_expand_block_set_unrolled_loop (rtx *operands) ++{ ++ rtx dst_mem = operands[0]; ++ HOST_WIDE_INT bytes, value, align; ++ int expand_len, funccall_len; ++ rtx x, reg; ++ int offset; ++ ++ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) ++ return 0; ++ ++ bytes = INTVAL (operands[1]); ++ if (bytes <= 0) ++ return 0; ++ value = (int8_t)INTVAL (operands[2]); ++ align = INTVAL (operands[3]); ++ if (align > MOVE_MAX) ++ align = MOVE_MAX; ++ ++ /* Insn expansion: holding the init value. ++ Either MOV(.N) or L32R w/litpool. */ ++ if (align == 1) ++ expand_len = xtensa_sizeof_MOVI (value); ++ else if (value == 0 || value == -1) ++ expand_len = TARGET_DENSITY ? 2 : 3; ++ else ++ expand_len = 3 + 4; ++ /* Insn expansion: a series of aligned memory stores. ++ Consist of S8I, S16I or S32I(.N). */ ++ expand_len += (bytes / align) * (TARGET_DENSITY ++ && align == 4 ? 2 : 3); ++ /* Insn expansion: the remainder, sub-aligned memory stores. ++ A combination of S8I and S16I as needed. */ ++ expand_len += ((bytes % align + 1) / 2) * 3; ++ ++ /* Function call: preparing two arguments. */ ++ funccall_len = xtensa_sizeof_MOVI (value); ++ funccall_len += xtensa_sizeof_MOVI (bytes); ++ /* Function call: calling memset(). */ ++ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; ++ ++ /* Apply expansion bonus (2x) if optimizing for speed. */ ++ if (optimize > 1 && !optimize_size) ++ funccall_len *= 2; ++ ++ /* Decide whether to expand or not, based on the sum of the length ++ of instructions. */ ++ if (expand_len > funccall_len) ++ return 0; ++ ++ x = XEXP (dst_mem, 0); ++ if (!REG_P (x)) ++ dst_mem = replace_equiv_address (dst_mem, force_reg (Pmode, x)); ++ switch (align) ++ { ++ case 1: ++ break; ++ case 2: ++ value = (int16_t)((uint8_t)value * 0x0101U); ++ break; ++ case 4: ++ value = (int32_t)((uint8_t)value * 0x01010101U); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ reg = force_reg (SImode, GEN_INT (value)); ++ ++ offset = 0; ++ do ++ { ++ int unit_size = MIN (bytes, align); ++ machine_mode unit_mode = (unit_size >= 4 ? SImode : ++ (unit_size >= 2 ? HImode : ++ QImode)); ++ unit_size = GET_MODE_SIZE (unit_mode); ++ ++ emit_move_insn (adjust_address (dst_mem, unit_mode, offset), ++ unit_mode == SImode ? reg ++ : convert_to_mode (unit_mode, reg, true)); ++ ++ offset += unit_size; ++ bytes -= unit_size; ++ } ++ while (bytes > 0); ++ ++ return 1; ++} ++ ++int ++xtensa_expand_block_set_small_loop (rtx *operands) ++{ ++ HOST_WIDE_INT bytes, value, align; ++ int expand_len, funccall_len; ++ rtx x, dst, end, reg; ++ machine_mode unit_mode; ++ rtx_code_label *label; ++ ++ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) ++ return 0; ++ ++ bytes = INTVAL (operands[1]); ++ if (bytes <= 0) ++ return 0; ++ value = (int8_t)INTVAL (operands[2]); ++ align = INTVAL (operands[3]); ++ if (align > MOVE_MAX) ++ align = MOVE_MAX; ++ ++ /* Totally-aligned block only. */ ++ if (bytes % align != 0) ++ return 0; ++ ++ /* If 4-byte aligned, small loop substitution is almost optimal, thus ++ limited to only offset to the end address for ADDI/ADDMI instruction. */ ++ if (align == 4 ++ && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) ++ return 0; ++ ++ /* If no 4-byte aligned, loop count should be treated as the constraint. */ ++ if (align != 4 ++ && bytes / align > ((optimize > 1 && !optimize_size) ? 8 : 15)) ++ return 0; ++ ++ /* Insn expansion: holding the init value. ++ Either MOV(.N) or L32R w/litpool. */ ++ if (align == 1) ++ expand_len = xtensa_sizeof_MOVI (value); ++ else if (value == 0 || value == -1) ++ expand_len = TARGET_DENSITY ? 2 : 3; ++ else ++ expand_len = 3 + 4; ++ /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ ++ expand_len += bytes > 127 ? 3 ++ : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; ++ ++ /* Insn expansion: the loop body and branch instruction. ++ For store, one of S8I, S16I or S32I(.N). ++ For advance, ADDI(.N). ++ For branch, BNE. */ ++ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) ++ + (TARGET_DENSITY ? 2 : 3) + 3; ++ ++ /* Function call: preparing two arguments. */ ++ funccall_len = xtensa_sizeof_MOVI (value); ++ funccall_len += xtensa_sizeof_MOVI (bytes); ++ /* Function call: calling memset(). */ ++ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; ++ ++ /* Apply expansion bonus (2x) if optimizing for speed. */ ++ if (optimize > 1 && !optimize_size) ++ funccall_len *= 2; ++ ++ /* Decide whether to expand or not, based on the sum of the length ++ of instructions. */ ++ if (expand_len > funccall_len) ++ return 0; ++ ++ x = XEXP (operands[0], 0); ++ if (!REG_P (x)) ++ x = XEXP (replace_equiv_address (operands[0], force_reg (Pmode, x)), 0); ++ dst = gen_reg_rtx (SImode); ++ emit_move_insn (dst, x); ++ end = gen_reg_rtx (SImode); ++ emit_insn (gen_addsi3 (end, dst, operands[1] /* the length */)); ++ switch (align) ++ { ++ case 1: ++ unit_mode = QImode; ++ break; ++ case 2: ++ value = (int16_t)((uint8_t)value * 0x0101U); ++ unit_mode = HImode; ++ break; ++ case 4: ++ value = (int32_t)((uint8_t)value * 0x01010101U); ++ unit_mode = SImode; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ reg = force_reg (unit_mode, GEN_INT (value)); ++ ++ label = gen_label_rtx (); ++ emit_label (label); ++ emit_move_insn (gen_rtx_MEM (unit_mode, dst), reg); ++ emit_insn (gen_addsi3 (dst, dst, GEN_INT (align))); ++ emit_cmp_and_jump_insns (dst, end, NE, const0_rtx, SImode, true, label); ++ ++ return 1; ++} ++ ++ + void + xtensa_expand_nonlocal_goto (rtx *operands) + { +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 251c313d5..9eb689efa 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1085,6 +1085,22 @@ + DONE; + }) + ++;; Block sets ++ ++(define_expand "setmemsi" ++ [(match_operand:BLK 0 "memory_operand") ++ (match_operand:SI 1 "") ++ (match_operand:SI 2 "") ++ (match_operand:SI 3 "const_int_operand")] ++ "!optimize_debug && optimize" ++{ ++ if (xtensa_expand_block_set_unrolled_loop (operands)) ++ DONE; ++ if (xtensa_expand_block_set_small_loop (operands)) ++ DONE; ++ FAIL; ++}) ++ + + ;; Shift instructions. + +diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt +index aef67970b..e1d992f5d 100644 +--- a/gcc/config/xtensa/xtensa.opt ++++ b/gcc/config/xtensa/xtensa.opt +@@ -27,7 +27,7 @@ Target Report Mask(FORCE_NO_PIC) + Disable position-independent code (PIC) for use in OS kernel code. + + mlongcalls +-Target ++Target Mask(LONGCALLS) + Use indirect CALLXn instructions for large programs. + + mtarget-align +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0014-Improve-bswap-sd-i2-insn-patterns.patch b/patches/gcc10.3/gcc-xtensa-0014-Improve-bswap-sd-i2-insn-patterns.patch new file mode 100644 index 0000000..a5fb6f1 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0014-Improve-bswap-sd-i2-insn-patterns.patch @@ -0,0 +1,254 @@ +From be1ca3aa6e9754ed16d1b7a60657912af02844da Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:33:59 +0900 +Subject: [PATCH 09/31] xtensa: Improve bswap[sd]i2 insn patterns + +This patch makes bswap[sd]i2 better register allocation, and reconstructs +bswapsi2 in order to take advantage of GIMPLE manual byte-swapping +recognition. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (bswapsi2): New expansion pattern. + (bswapsi2_internal): Revise the template and condition, and add + detection code for preceding the same insn in order to omit a + "SSAI 8" instruction of the latter. + (bswapdi2): Suppress built-in insn expansion with the corresponding + library call when optimizing for size. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/bswap-O1.c: New. + * gcc.target/xtensa/bswap-O2.c: Ditto. + * gcc.target/xtensa/bswap-Os.c: Ditto. +--- + gcc/config/xtensa/xtensa.md | 77 +++++++++++++++++----- + gcc/testsuite/gcc.target/xtensa/bswap-O1.c | 37 +++++++++++ + gcc/testsuite/gcc.target/xtensa/bswap-O2.c | 37 +++++++++++ + gcc/testsuite/gcc.target/xtensa/bswap-Os.c | 37 +++++++++++ + 4 files changed, 172 insertions(+), 16 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O1.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O2.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-Os.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 9eb689efa..cea280061 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -471,23 +471,68 @@ + + ;; Byte swap. + +-(define_insn "bswapsi2" +- [(set (match_operand:SI 0 "register_operand" "=&a") +- (bswap:SI (match_operand:SI 1 "register_operand" "r")))] +- "!optimize_size" +- "ssai\t8\;srli\t%0, %1, 16\;src\t%0, %0, %1\;src\t%0, %0, %0\;src\t%0, %1, %0" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "15")]) ++(define_expand "bswapsi2" ++ [(set (match_operand:SI 0 "register_operand" "") ++ (bswap:SI (match_operand:SI 1 "register_operand" "")))] ++ "!optimize_debug && optimize > 1" ++{ ++ /* GIMPLE manual byte-swapping recognition is now activated. ++ For both built-in and manual bswaps, emit corresponding library call ++ if optimizing for size, or a series of dedicated machine instructions ++ if otherwise. */ ++ if (optimize_size) ++ emit_library_call_value (optab_libfunc (bswap_optab, SImode), ++ operands[0], LCT_NORMAL, SImode, ++ operands[1], SImode); ++ else ++ emit_insn (gen_bswapsi2_internal (operands[0], operands[1])); ++ DONE; ++}) + +-(define_insn "bswapdi2" +- [(set (match_operand:DI 0 "register_operand" "=&a") +- (bswap:DI (match_operand:DI 1 "register_operand" "r")))] +- "!optimize_size" +- "ssai\t8\;srli\t%0, %D1, 16\;src\t%0, %0, %D1\;src\t%0, %0, %0\;src\t%0, %D1, %0\;srli\t%D0, %1, 16\;src\t%D0, %D0, %1\;src\t%D0, %D0, %D0\;src\t%D0, %1, %D0" +- [(set_attr "type" "arith") +- (set_attr "mode" "DI") +- (set_attr "length" "27")]) ++(define_insn "bswapsi2_internal" ++ [(set (match_operand:SI 0 "register_operand" "=a,&a") ++ (bswap:SI (match_operand:SI 1 "register_operand" "0,r"))) ++ (clobber (match_scratch:SI 2 "=&a,X"))] ++ "!optimize_debug && optimize > 1 && !optimize_size" ++{ ++ rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn); ++ const char *init = "ssai\t8\;"; ++ static char result[64]; ++ if (prev_insn && NONJUMP_INSN_P (prev_insn)) ++ { ++ rtx x = PATTERN (prev_insn); ++ if (GET_CODE (x) == PARALLEL && XVECLEN (x, 0) == 2 ++ && GET_CODE (XVECEXP (x, 0, 0)) == SET ++ && GET_CODE (XVECEXP (x, 0, 1)) == CLOBBER) ++ { ++ x = XEXP (XVECEXP (x, 0, 0), 1); ++ if (GET_CODE (x) == BSWAP && GET_MODE (x) == SImode) ++ init = ""; ++ } ++ } ++ sprintf (result, ++ (which_alternative == 0) ++ ? "%s" "srli\t%%2, %%1, 16\;src\t%%2, %%2, %%1\;src\t%%2, %%2, %%2\;src\t%%0, %%1, %%2" ++ : "%s" "srli\t%%0, %%1, 16\;src\t%%0, %%0, %%1\;src\t%%0, %%0, %%0\;src\t%%0, %%1, %%0", ++ init); ++ return result; ++} ++ [(set_attr "type" "arith,arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "15,15")]) ++ ++(define_expand "bswapdi2" ++ [(set (match_operand:DI 0 "register_operand" "") ++ (bswap:DI (match_operand:DI 1 "register_operand" "")))] ++ "!optimize_debug && optimize > 1 && optimize_size" ++{ ++ /* Replace with a single DImode library call. ++ Without this, two SImode library calls are emitted. */ ++ emit_library_call_value (optab_libfunc (bswap_optab, DImode), ++ operands[0], LCT_NORMAL, DImode, ++ operands[1], DImode); ++ DONE; ++}) + + + ;; Negation and one's complement. +diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O1.c b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c +new file mode 100644 +index 000000000..a0c885baa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++unsigned int test_0(unsigned int a) ++{ ++ return (a & 0x000000FF) << 24 | ++ (a & 0x0000FF00) << 8 | ++ (a & 0x00FF0000) >> 8 | ++ (a & 0xFF000000) >> 24; ++} ++ ++unsigned int test_1(unsigned int a) ++{ ++ union ++ { ++ unsigned int i; ++ unsigned char a[4]; ++ } u, v; ++ u.i = a; ++ v.a[0] = u.a[3]; ++ v.a[1] = u.a[2]; ++ v.a[2] = u.a[1]; ++ v.a[3] = u.a[0]; ++ return v.i; ++} ++ ++unsigned int test_2(unsigned int a) ++{ ++ return __builtin_bswap32(a); ++} ++ ++unsigned long long test_3(unsigned long long a) ++{ ++ return __builtin_bswap64(a); ++} ++ ++/* { dg-final { scan-assembler-times "call" 2 } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O2.c b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c +new file mode 100644 +index 000000000..4cf95b925 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++unsigned int test_0(unsigned int a) ++{ ++ return (a & 0x000000FF) << 24 | ++ (a & 0x0000FF00) << 8 | ++ (a & 0x00FF0000) >> 8 | ++ (a & 0xFF000000) >> 24; ++} ++ ++unsigned int test_1(unsigned int a) ++{ ++ union ++ { ++ unsigned int i; ++ unsigned char a[4]; ++ } u, v; ++ u.i = a; ++ v.a[0] = u.a[3]; ++ v.a[1] = u.a[2]; ++ v.a[2] = u.a[1]; ++ v.a[3] = u.a[0]; ++ return v.i; ++} ++ ++unsigned int test_2(unsigned int a) ++{ ++ return __builtin_bswap32(a); ++} ++ ++unsigned long long test_3(unsigned long long a) ++{ ++ return __builtin_bswap64(a); ++} ++ ++/* { dg-final { scan-assembler-times "ssai" 4 } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-Os.c b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c +new file mode 100644 +index 000000000..1e010fd62 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Os" } */ ++ ++unsigned int test_0(unsigned int a) ++{ ++ return (a & 0x000000FF) << 24 | ++ (a & 0x0000FF00) << 8 | ++ (a & 0x00FF0000) >> 8 | ++ (a & 0xFF000000) >> 24; ++} ++ ++unsigned int test_1(unsigned int a) ++{ ++ union ++ { ++ unsigned int i; ++ unsigned char a[4]; ++ } u, v; ++ u.i = a; ++ v.a[0] = u.a[3]; ++ v.a[1] = u.a[2]; ++ v.a[2] = u.a[1]; ++ v.a[3] = u.a[0]; ++ return v.i; ++} ++ ++unsigned int test_2(unsigned int a) ++{ ++ return __builtin_bswap32(a); ++} ++ ++unsigned long long test_3(unsigned long long a) ++{ ++ return __builtin_bswap64(a); ++} ++ ++/* { dg-final { scan-assembler-times "call" 4 } } */ +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0015-fix-PR-target-105879.patch b/patches/gcc10.3/gcc-xtensa-0015-fix-PR-target-105879.patch new file mode 100644 index 0000000..2c21f47 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0015-fix-PR-target-105879.patch @@ -0,0 +1,48 @@ +From 1848b547a6ac69a002d068239a5bc9463f3fae25 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Tue, 7 Jun 2022 21:01:01 -0700 +Subject: [PATCH 10/31] gcc: xtensa: fix PR target/105879 + +split_double operates with the 'word that comes first in memory in the +target' terminology, while gen_lowpart operates with the 'value +representing some low-order bits of X' terminology. They are not +equivalent and must be dealt with differently on little- and big-endian +targets. + +gcc/ + PR target/105879 + * config/xtensa/xtensa.md (movdi): Rename 'first' and 'second' + to 'lowpart' and 'highpart' so that they match 'gen_lowpart' and + 'gen_highpart' bitwise semantics and fix order of highpart and + lowpart depending on target endianness. +--- + gcc/config/xtensa/xtensa.md | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index cea280061..30d8ef96c 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -802,11 +802,14 @@ + because of offering further optimization opportunities. */ + if (register_operand (operands[0], DImode)) + { +- rtx first, second; +- +- split_double (operands[1], &first, &second); +- emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), first)); +- emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), second)); ++ rtx lowpart, highpart; ++ ++ if (TARGET_BIG_ENDIAN) ++ split_double (operands[1], &highpart, &lowpart); ++ else ++ split_double (operands[1], &lowpart, &highpart); ++ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), lowpart)); ++ emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), highpart)); + DONE; + } + +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0016-Implement-bswaphi2-insn-pattern.patch b/patches/gcc10.3/gcc-xtensa-0016-Implement-bswaphi2-insn-pattern.patch new file mode 100644 index 0000000..3a31e62 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0016-Implement-bswaphi2-insn-pattern.patch @@ -0,0 +1,39 @@ +From f47a902c9a94d2e9df879de4613dae62c8e9cc4f Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:44:32 +0900 +Subject: [PATCH 11/31] xtensa: Implement bswaphi2 insn pattern + +This patch adds bswaphi2 insn pattern that is one instruction less than the +default expansion. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (bswaphi2): New insn pattern. +--- + gcc/config/xtensa/xtensa.md | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 30d8ef96c..c1f44777d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -471,6 +471,16 @@ + + ;; Byte swap. + ++(define_insn "bswaphi2" ++ [(set (match_operand:HI 0 "register_operand" "=a") ++ (bswap:HI (match_operand:HI 1 "register_operand" "r"))) ++ (clobber (match_scratch:HI 2 "=&a"))] ++ "" ++ "extui\t%2, %1, 8, 8\;slli\t%0, %1, 8\;or\t%0, %0, %2" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "HI") ++ (set_attr "length" "9")]) ++ + (define_expand "bswapsi2" + [(set (match_operand:SI 0 "register_operand" "") + (bswap:SI (match_operand:SI 1 "register_operand" "")))] +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0017-Make-one_cmplsi2-optimizer-friendly.patch b/patches/gcc10.3/gcc-xtensa-0017-Make-one_cmplsi2-optimizer-friendly.patch new file mode 100644 index 0000000..017a30f --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0017-Make-one_cmplsi2-optimizer-friendly.patch @@ -0,0 +1,86 @@ +From 22b5756399ef63a4102334724b12a4c186075227 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:46:16 +0900 +Subject: [PATCH 12/31] xtensa: Make one_cmplsi2 optimizer-friendly + +In Xtensa ISA, there is no single machine instruction that calculates unary +bitwise negation. But a few optimizers assume that bitwise negation can be +done by a single insn. + +As a result, '((x < 0) ? ~x : x)' cannot be optimized to '(x ^ (x >> 31))' +ever before, for example. + +This patch relaxes such limitation, by putting the insn expansion off till +the split pass. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (one_cmplsi2): + Rearrange as an insn_and_split pattern. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/one_cmpl_abs.c: New. +--- + gcc/config/xtensa/xtensa.md | 26 +++++++++++++------ + .../gcc.target/xtensa/one_cmpl_abs.c | 9 +++++++ + 2 files changed, 27 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index c1f44777d..2f6d48d03 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -556,16 +556,26 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +-(define_expand "one_cmplsi2" +- [(set (match_operand:SI 0 "register_operand" "") +- (not:SI (match_operand:SI 1 "register_operand" "")))] ++(define_insn_and_split "one_cmplsi2" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (not:SI (match_operand:SI 1 "register_operand" "r")))] + "" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 2) ++ (const_int -1)) ++ (set (match_dup 0) ++ (xor:SI (match_dup 1) ++ (match_dup 2)))] + { +- rtx temp = gen_reg_rtx (SImode); +- emit_insn (gen_movsi (temp, constm1_rtx)); +- emit_insn (gen_xorsi3 (operands[0], temp, operands[1])); +- DONE; +-}) ++ operands[2] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 5) ++ (const_int 6)))]) + + (define_insn "negsf2" + [(set (match_operand:SF 0 "register_operand" "=f") +diff --git a/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c +new file mode 100644 +index 000000000..608f65fd7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++int one_cmpl_abs(int a) ++{ ++ return a < 0 ? ~a : a; ++} ++ ++/* { dg-final { scan-assembler-not "bgez" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0018-Optimize-x-y-to-x-y-y.patch b/patches/gcc10.3/gcc-xtensa-0018-Optimize-x-y-to-x-y-y.patch new file mode 100644 index 0000000..d1167a1 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0018-Optimize-x-y-to-x-y-y.patch @@ -0,0 +1,71 @@ +From cc259b2801c8d04c39169214041305fdd5b87acd Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:55:44 +0900 +Subject: [PATCH 13/31] xtensa: Optimize '(~x & y)' to '((x & y) ^ y)' + +In Xtensa ISA, there is no single machine instruction that calculates unary +bitwise negation. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*andsi3_bitcmpl): + New insn_and_split pattern. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/check_zero_byte.c: New. +--- + gcc/config/xtensa/xtensa.md | 20 +++++++++++++++++++ + .../gcc.target/xtensa/check_zero_byte.c | 9 +++++++++ + 2 files changed, 29 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/xtensa/check_zero_byte.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2f6d48d03..28ed1d34e 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -601,6 +601,26 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,3")]) + ++(define_insn_and_split "*andsi3_bitcmpl" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (not:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "r")))] ++ "" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 3) ++ (and:SI (match_dup 1) ++ (match_dup 2))) ++ (set (match_dup 0) ++ (xor:SI (match_dup 3) ++ (match_dup 2)))] ++{ ++ operands[3] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ + (define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "=a") + (ior:SI (match_operand:SI 1 "register_operand" "%r") +diff --git a/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c +new file mode 100644 +index 000000000..6a04aaeef +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O" } */ ++ ++int check_zero_byte(int v) ++{ ++ return (v - 0x01010101) & ~v & 0x80808080; ++} ++ ++/* { dg-final { scan-assembler-not "movi" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0019-Add-clrsbsi2-insn-pattern.patch b/patches/gcc10.3/gcc-xtensa-0019-Add-clrsbsi2-insn-pattern.patch new file mode 100644 index 0000000..ebaa985 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0019-Add-clrsbsi2-insn-pattern.patch @@ -0,0 +1,98 @@ +From ebd48d915076589f04b5c1ed50f9f5ddfae088e8 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:57:35 +0900 +Subject: [PATCH 14/31] xtensa: Add clrsbsi2 insn pattern + +> (clrsb:m x) +> Represents the number of redundant leading sign bits in x, represented +> as an integer of mode m, starting at the most significant bit position. + +This explanation is just what the NSA instruction (not ever emitted before) +calculates in Xtensa ISA. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (clrsbsi2): New insn pattern. + +libgcc/ChangeLog: + + * config/xtensa/lib1funcs.S (__clrsbsi2): New function. + * config/xtensa/t-xtensa (LIB1ASMFUNCS): Add _clrsbsi2. +--- + gcc/config/xtensa/xtensa.md | 12 +++++++++++- + libgcc/config/xtensa/lib1funcs.S | 23 +++++++++++++++++++++++ + libgcc/config/xtensa/t-xtensa | 2 +- + 3 files changed, 35 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 28ed1d34e..6c76fb942 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -429,7 +429,17 @@ + (set_attr "length" "3")]) + + +-;; Count leading/trailing zeros and find first bit. ++;; Count redundant leading sign bits and leading/trailing zeros, ++;; and find first bit. ++ ++(define_insn "clrsbsi2" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (clrsb:SI (match_operand:SI 1 "register_operand" "r")))] ++ "TARGET_NSA" ++ "nsa\t%0, %1" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "3")]) + + (define_insn "clzsi2" + [(set (match_operand:SI 0 "register_operand" "=a") +diff --git a/libgcc/config/xtensa/lib1funcs.S b/libgcc/config/xtensa/lib1funcs.S +index b19deae14..ad9072c40 100644 +--- a/libgcc/config/xtensa/lib1funcs.S ++++ b/libgcc/config/xtensa/lib1funcs.S +@@ -456,6 +456,29 @@ __nsau_data: + #endif /* L_clz */ + + ++#ifdef L_clrsbsi2 ++ .align 4 ++ .global __clrsbsi2 ++ .type __clrsbsi2, @function ++__clrsbsi2: ++ leaf_entry sp, 16 ++#if XCHAL_HAVE_NSA ++ nsa a2, a2 ++#else ++ srai a3, a2, 31 ++ xor a3, a3, a2 ++ movi a2, 31 ++ beqz a3, .Lreturn ++ do_nsau a2, a3, a4, a5 ++ addi a2, a2, -1 ++.Lreturn: ++#endif ++ leaf_return ++ .size __clrsbsi2, . - __clrsbsi2 ++ ++#endif /* L_clrsbsi2 */ ++ ++ + #ifdef L_clzsi2 + .align 4 + .global __clzsi2 +diff --git a/libgcc/config/xtensa/t-xtensa b/libgcc/config/xtensa/t-xtensa +index 9836c96ae..084618b38 100644 +--- a/libgcc/config/xtensa/t-xtensa ++++ b/libgcc/config/xtensa/t-xtensa +@@ -1,6 +1,6 @@ + LIB1ASMSRC = xtensa/lib1funcs.S + LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \ +- _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \ ++ _umulsidi3 _clz _clrsbsi2 _clzsi2 _ctzsi2 _ffssi2 \ + _ashldi3 _ashrdi3 _lshrdi3 \ + _bswapsi2 _bswapdi2 \ + _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \ +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0020-Tweak-some-widen-multiplications.patch b/patches/gcc10.3/gcc-xtensa-0020-Tweak-some-widen-multiplications.patch new file mode 100644 index 0000000..8de8a89 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0020-Tweak-some-widen-multiplications.patch @@ -0,0 +1,110 @@ +From 1ba9369255749ccf9ec82565a192b1a523b0e374 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 10 Jun 2022 13:17:40 +0900 +Subject: [PATCH 15/31] xtensa: Tweak some widen multiplications + +umulsidi3 is faster than umuldi3 even if library call, and is also +prerequisite for fast constant division by multiplication. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (mulsidi3, umulsidi3): + Split into individual signedness, in order to use libcall + "__umulsidi3" but not the other. + (mulhisi3): Merge into one by using code iterator. + (mulsidi3, mulhisi3, umulhisi3): Remove. +--- + gcc/config/xtensa/xtensa.md | 56 +++++++++++++++++++++---------------- + 1 file changed, 32 insertions(+), 24 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 6c76fb942..3314b3fd6 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -224,20 +224,42 @@ + + ;; Multiplication. + +-(define_expand "mulsidi3" ++(define_expand "mulsidi3" + [(set (match_operand:DI 0 "register_operand") +- (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand")) +- (any_extend:DI (match_operand:SI 2 "register_operand"))))] ++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand")) ++ (sign_extend:DI (match_operand:SI 2 "register_operand"))))] + "TARGET_MUL32_HIGH" + { + rtx temp = gen_reg_rtx (SImode); + emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); +- emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), +- operands[1], operands[2])); ++ emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), ++ operands[1], operands[2])); + emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); + DONE; + }) + ++(define_expand "umulsidi3" ++ [(set (match_operand:DI 0 "register_operand") ++ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand")) ++ (zero_extend:DI (match_operand:SI 2 "register_operand"))))] ++ "" ++{ ++ if (TARGET_MUL32_HIGH) ++ { ++ rtx temp = gen_reg_rtx (SImode); ++ emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); ++ emit_insn (gen_umulsi3_highpart (gen_highpart (SImode, operands[0]), ++ operands[1], operands[2])); ++ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); ++ } ++ else ++ emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "__umulsidi3"), ++ operands[0], LCT_NORMAL, DImode, ++ operands[1], SImode, ++ operands[2], SImode); ++ DONE; ++}) ++ + (define_insn "mulsi3_highpart" + [(set (match_operand:SI 0 "register_operand" "=a") + (truncate:SI +@@ -261,30 +283,16 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +-(define_insn "mulhisi3" +- [(set (match_operand:SI 0 "register_operand" "=C,A") +- (mult:SI (sign_extend:SI +- (match_operand:HI 1 "register_operand" "%r,r")) +- (sign_extend:SI +- (match_operand:HI 2 "register_operand" "r,r"))))] +- "TARGET_MUL16 || TARGET_MAC16" +- "@ +- mul16s\t%0, %1, %2 +- mul.aa.ll\t%1, %2" +- [(set_attr "type" "mul16,mac16") +- (set_attr "mode" "SI") +- (set_attr "length" "3,3")]) +- +-(define_insn "umulhisi3" ++(define_insn "mulhisi3" + [(set (match_operand:SI 0 "register_operand" "=C,A") +- (mult:SI (zero_extend:SI ++ (mult:SI (any_extend:SI + (match_operand:HI 1 "register_operand" "%r,r")) +- (zero_extend:SI ++ (any_extend:SI + (match_operand:HI 2 "register_operand" "r,r"))))] + "TARGET_MUL16 || TARGET_MAC16" + "@ +- mul16u\t%0, %1, %2 +- umul.aa.ll\t%1, %2" ++ mul16\t%0, %1, %2 ++ mul.aa.ll\t%1, %2" + [(set_attr "type" "mul16,mac16") + (set_attr "mode" "SI") + (set_attr "length" "3,3")]) +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0021-Consider-the-Loop-Option-when-setmemsi-is-exp.patch b/patches/gcc10.3/gcc-xtensa-0021-Consider-the-Loop-Option-when-setmemsi-is-exp.patch new file mode 100644 index 0000000..491da47 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0021-Consider-the-Loop-Option-when-setmemsi-is-exp.patch @@ -0,0 +1,125 @@ +From bc108c84544d5a0e6289628e8749a92c9695f006 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 10 Jun 2022 13:18:24 +0900 +Subject: [PATCH 16/31] xtensa: Consider the Loop Option when setmemsi is + expanded to small loop + +Now apply to almost any size of aligned block under such circumstances. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_expand_block_set_small_loop): + Pass through the block length / loop count conditions if + zero-overhead looping is configured and active, +--- + gcc/config/xtensa/xtensa.c | 71 +++++++++++++++++++++++++++----------- + 1 file changed, 50 insertions(+), 21 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index a6d76a953..e2f97b79c 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1473,7 +1473,7 @@ xtensa_expand_block_set_unrolled_loop (rtx *operands) + int + xtensa_expand_block_set_small_loop (rtx *operands) + { +- HOST_WIDE_INT bytes, value, align; ++ HOST_WIDE_INT bytes, value, align, count; + int expand_len, funccall_len; + rtx x, dst, end, reg; + machine_mode unit_mode; +@@ -1493,17 +1493,25 @@ xtensa_expand_block_set_small_loop (rtx *operands) + /* Totally-aligned block only. */ + if (bytes % align != 0) + return 0; ++ count = bytes / align; + +- /* If 4-byte aligned, small loop substitution is almost optimal, thus +- limited to only offset to the end address for ADDI/ADDMI instruction. */ +- if (align == 4 +- && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) +- return 0; ++ /* If the Loop Option (zero-overhead looping) is configured and active, ++ almost no restrictions about the length of the block. */ ++ if (! (TARGET_LOOPS && optimize)) ++ { ++ /* If 4-byte aligned, small loop substitution is almost optimal, ++ thus limited to only offset to the end address for ADDI/ADDMI ++ instruction. */ ++ if (align == 4 ++ && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) ++ return 0; + +- /* If no 4-byte aligned, loop count should be treated as the constraint. */ +- if (align != 4 +- && bytes / align > ((optimize > 1 && !optimize_size) ? 8 : 15)) +- return 0; ++ /* If no 4-byte aligned, loop count should be treated as the ++ constraint. */ ++ if (align != 4 ++ && count > ((optimize > 1 && !optimize_size) ? 8 : 15)) ++ return 0; ++ } + + /* Insn expansion: holding the init value. + Either MOV(.N) or L32R w/litpool. */ +@@ -1513,16 +1521,33 @@ xtensa_expand_block_set_small_loop (rtx *operands) + expand_len = TARGET_DENSITY ? 2 : 3; + else + expand_len = 3 + 4; +- /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ +- expand_len += bytes > 127 ? 3 +- : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; +- +- /* Insn expansion: the loop body and branch instruction. +- For store, one of S8I, S16I or S32I(.N). +- For advance, ADDI(.N). +- For branch, BNE. */ +- expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) +- + (TARGET_DENSITY ? 2 : 3) + 3; ++ if (TARGET_LOOPS && optimize) /* zero-overhead looping */ ++ { ++ /* Insn translation: Either MOV(.N) or L32R w/litpool for the ++ loop count. */ ++ expand_len += xtensa_simm12b (count) ? xtensa_sizeof_MOVI (count) ++ : 3 + 4; ++ /* Insn translation: LOOP, the zero-overhead looping setup ++ instruction. */ ++ expand_len += 3; ++ /* Insn expansion: the loop body instructions. ++ For store, one of S8I, S16I or S32I(.N). ++ For advance, ADDI(.N). */ ++ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) ++ + (TARGET_DENSITY ? 2 : 3); ++ } ++ else /* NO zero-overhead looping */ ++ { ++ /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ ++ expand_len += bytes > 127 ? 3 ++ : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; ++ /* Insn expansion: the loop body and branch instruction. ++ For store, one of S8I, S16I or S32I(.N). ++ For advance, ADDI(.N). ++ For branch, BNE. */ ++ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) ++ + (TARGET_DENSITY ? 2 : 3) + 3; ++ } + + /* Function call: preparing two arguments. */ + funccall_len = xtensa_sizeof_MOVI (value); +@@ -1545,7 +1570,11 @@ xtensa_expand_block_set_small_loop (rtx *operands) + dst = gen_reg_rtx (SImode); + emit_move_insn (dst, x); + end = gen_reg_rtx (SImode); +- emit_insn (gen_addsi3 (end, dst, operands[1] /* the length */)); ++ if (TARGET_LOOPS && optimize) ++ x = force_reg (SImode, operands[1] /* the length */); ++ else ++ x = operands[1]; ++ emit_insn (gen_addsi3 (end, dst, x)); + switch (align) + { + case 1: +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0022-Improve-instruction-cost-estimation-and-sugge.patch b/patches/gcc10.3/gcc-xtensa-0022-Improve-instruction-cost-estimation-and-sugge.patch new file mode 100644 index 0000000..5792a6f --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0022-Improve-instruction-cost-estimation-and-sugge.patch @@ -0,0 +1,325 @@ +From de854e2348b8159bc389471e68023986c8878c92 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 10 Jun 2022 13:19:32 +0900 +Subject: [PATCH 17/31] xtensa: Improve instruction cost estimation and + suggestion + +This patch implements a new target-specific relative RTL insn cost function +because of suboptimal cost estimation by default, and fixes several "length" +insn attributes (related to the cost estimation). + +And also introduces a new machine-dependent option "-mextra-l32r-costs=" +that tells implementation-specific InstRAM/ROM access penalty for L32R +instruction to the compiler (in clock-cycle units, 0 by default). + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_rtx_costs): Correct wrong case + for ABS and NEG, add missing case for BSWAP and CLRSB, and + double the costs for integer divisions using libfuncs if + optimizing for speed, in order to take advantage of fast constant + division by multiplication. + (TARGET_INSN_COST): New macro definition. + (xtensa_is_insn_L32R_p, xtensa_insn_cost): New functions for + calculating relative costs of a RTL insns, for both of speed and + size. + * config/xtensa/xtensa.md (return, nop, trap): Correct values of + the attribute "length" that depends on TARGET_DENSITY. + (define_asm_attributes, blockage, frame_blockage): Add missing + attributes. + * config/xtensa/xtensa.opt (-mextra-l32r-costs=): New machine- + dependent option, however, preparatory work for now. +--- + gcc/config/xtensa/xtensa.c | 116 ++++++++++++++++++++++++++++++++--- + gcc/config/xtensa/xtensa.md | 29 ++++++--- + gcc/config/xtensa/xtensa.opt | 4 ++ + 3 files changed, 134 insertions(+), 15 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index e2f97b79c..94ff901c5 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -55,6 +55,7 @@ along with GCC; see the file COPYING3. If not see + #include "dumpfile.h" + #include "hw-doloop.h" + #include "rtl-iter.h" ++#include "insn-attr.h" + + /* This file should be included last. */ + #include "target-def.h" +@@ -134,6 +135,7 @@ static unsigned int xtensa_multibss_section_type_flags (tree, const char *, + static section *xtensa_select_rtx_section (machine_mode, rtx, + unsigned HOST_WIDE_INT); + static bool xtensa_rtx_costs (rtx, machine_mode, int, int, int *, bool); ++static int xtensa_insn_cost (rtx_insn *, bool); + static int xtensa_register_move_cost (machine_mode, reg_class_t, + reg_class_t); + static int xtensa_memory_move_cost (machine_mode, reg_class_t, bool); +@@ -208,6 +210,8 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); + #define TARGET_MEMORY_MOVE_COST xtensa_memory_move_cost + #undef TARGET_RTX_COSTS + #define TARGET_RTX_COSTS xtensa_rtx_costs ++#undef TARGET_INSN_COST ++#define TARGET_INSN_COST xtensa_insn_cost + #undef TARGET_ADDRESS_COST + #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 + +@@ -3972,7 +3976,7 @@ xtensa_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, + static bool + xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + int opno ATTRIBUTE_UNUSED, +- int *total, bool speed ATTRIBUTE_UNUSED) ++ int *total, bool speed) + { + int code = GET_CODE (x); + +@@ -4060,9 +4064,14 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + return true; + + case CLZ: ++ case CLRSB: + *total = COSTS_N_INSNS (TARGET_NSA ? 1 : 50); + return true; + ++ case BSWAP: ++ *total = COSTS_N_INSNS (mode == HImode ? 3 : 5); ++ return true; ++ + case NOT: + *total = COSTS_N_INSNS (mode == DImode ? 3 : 2); + return true; +@@ -4086,13 +4095,16 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + return true; + + case ABS: ++ case NEG: + { + if (mode == SFmode) + *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50); + else if (mode == DFmode) + *total = COSTS_N_INSNS (50); +- else ++ else if (mode == DImode) + *total = COSTS_N_INSNS (4); ++ else ++ *total = COSTS_N_INSNS (1); + return true; + } + +@@ -4108,10 +4120,6 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + return true; + } + +- case NEG: +- *total = COSTS_N_INSNS (mode == DImode ? 4 : 2); +- return true; +- + case MULT: + { + if (mode == SFmode) +@@ -4151,11 +4159,11 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + case UMOD: + { + if (mode == DImode) +- *total = COSTS_N_INSNS (50); ++ *total = COSTS_N_INSNS (speed ? 100 : 50); + else if (TARGET_DIV32) + *total = COSTS_N_INSNS (32); + else +- *total = COSTS_N_INSNS (50); ++ *total = COSTS_N_INSNS (speed ? 100 : 50); + return true; + } + +@@ -4188,6 +4196,98 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + } + } + ++static bool ++xtensa_is_insn_L32R_p(const rtx_insn *insn) ++{ ++ rtx x = PATTERN (insn); ++ ++ if (GET_CODE (x) == SET) ++ { ++ x = XEXP (x, 1); ++ if (GET_CODE (x) == MEM) ++ { ++ x = XEXP (x, 0); ++ return (GET_CODE (x) == SYMBOL_REF || CONST_INT_P (x)) ++ && CONSTANT_POOL_ADDRESS_P (x); ++ } ++ } ++ ++ return false; ++} ++ ++/* Compute a relative costs of RTL insns. This is necessary in order to ++ achieve better RTL insn splitting/combination result. */ ++ ++static int ++xtensa_insn_cost (rtx_insn *insn, bool speed) ++{ ++ if (!(recog_memoized (insn) < 0)) ++ { ++ int len = get_attr_length (insn), n = (len + 2) / 3; ++ ++ if (len == 0) ++ return COSTS_N_INSNS (0); ++ ++ if (speed) /* For speed cost. */ ++ { ++ /* "L32R" may be particular slow (implementation-dependent). */ ++ if (xtensa_is_insn_L32R_p (insn)) ++ return COSTS_N_INSNS (1 + xtensa_extra_l32r_costs); ++ ++ /* Cost based on the pipeline model. */ ++ switch (get_attr_type (insn)) ++ { ++ case TYPE_STORE: ++ case TYPE_MOVE: ++ case TYPE_ARITH: ++ case TYPE_MULTI: ++ case TYPE_NOP: ++ case TYPE_FSTORE: ++ return COSTS_N_INSNS (n); ++ ++ case TYPE_LOAD: ++ return COSTS_N_INSNS (n - 1 + 2); ++ ++ case TYPE_JUMP: ++ case TYPE_CALL: ++ return COSTS_N_INSNS (n - 1 + 3); ++ ++ case TYPE_FCONV: ++ case TYPE_FLOAD: ++ case TYPE_MUL16: ++ case TYPE_MUL32: ++ case TYPE_RSR: ++ return COSTS_N_INSNS (n * 2); ++ ++ case TYPE_FMADD: ++ return COSTS_N_INSNS (n * 4); ++ ++ case TYPE_DIV32: ++ return COSTS_N_INSNS (n * 16); ++ ++ default: ++ break; ++ } ++ } ++ else /* For size cost. */ ++ { ++ /* Cost based on the instruction length. */ ++ if (get_attr_type (insn) != TYPE_UNKNOWN) ++ { ++ /* "L32R" itself plus constant in litpool. */ ++ if (xtensa_is_insn_L32R_p (insn)) ++ return COSTS_N_INSNS (2) + 1; ++ ++ /* Consider ".n" short instructions. */ ++ return COSTS_N_INSNS (n) - (n * 3 - len); ++ } ++ } ++ } ++ ++ /* Fall back. */ ++ return pattern_cost (PATTERN (insn), speed); ++} ++ + /* Worker function for TARGET_RETURN_IN_MEMORY. */ + + static bool +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 3314b3fd6..da6b71d1d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -98,7 +98,10 @@ + + ;; Describe a user's asm statement. + (define_asm_attributes +- [(set_attr "type" "multi")]) ++ [(set_attr "type" "multi") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ;; Should be the maximum possible length ++ ;; of a single machine instruction. + + + ;; Pipeline model. +@@ -1884,7 +1887,10 @@ + } + [(set_attr "type" "jump") + (set_attr "mode" "none") +- (set_attr "length" "2")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 2) ++ (const_int 3)))]) + + + ;; Miscellaneous instructions. +@@ -1939,7 +1945,10 @@ + } + [(set_attr "type" "nop") + (set_attr "mode" "none") +- (set_attr "length" "3")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 2) ++ (const_int 3)))]) + + (define_expand "nonlocal_goto" + [(match_operand:SI 0 "general_operand" "") +@@ -2003,8 +2012,9 @@ + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] + "" + "" +- [(set_attr "length" "0") +- (set_attr "type" "nop")]) ++ [(set_attr "type" "nop") ++ (set_attr "mode" "none") ++ (set_attr "length" "0")]) + + ;; Do not schedule instructions accessing memory before this point. + +@@ -2023,7 +2033,9 @@ + (unspec:BLK [(match_operand:SI 1 "" "")] UNSPEC_FRAME_BLOCKAGE))] + "" + "" +- [(set_attr "length" "0")]) ++ [(set_attr "type" "nop") ++ (set_attr "mode" "none") ++ (set_attr "length" "0")]) + + (define_insn "trap" + [(trap_if (const_int 1) (const_int 0))] +@@ -2036,7 +2048,10 @@ + } + [(set_attr "type" "trap") + (set_attr "mode" "none") +- (set_attr "length" "3")]) ++ (set (attr "length") ++ (if_then_else (match_test "!TARGET_DEBUG && TARGET_DENSITY") ++ (const_int 2) ++ (const_int 3)))]) + + ;; Setting up a frame pointer is tricky for Xtensa because GCC doesn't + ;; know if a frame pointer is required until the reload pass, and +diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt +index e1d992f5d..97aa44f92 100644 +--- a/gcc/config/xtensa/xtensa.opt ++++ b/gcc/config/xtensa/xtensa.opt +@@ -30,6 +30,10 @@ mlongcalls + Target Mask(LONGCALLS) + Use indirect CALLXn instructions for large programs. + ++mextra-l32r-costs= ++Target RejectNegative Joined UInteger Var(xtensa_extra_l32r_costs) Init(0) ++Set extra memory access cost for L32R instruction, in clock-cycle units. ++ + mtarget-align + Target + Automatically align branch targets to reduce branch penalties. +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0023-Improve-constant-synthesis-for-both-integer-a.patch b/patches/gcc10.3/gcc-xtensa-0023-Improve-constant-synthesis-for-both-integer-a.patch new file mode 100644 index 0000000..0e14673 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0023-Improve-constant-synthesis-for-both-integer-a.patch @@ -0,0 +1,400 @@ +From ed2c4b57807470b386e9abdf145282e197d9da65 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sat, 11 Jun 2022 00:26:17 +0900 +Subject: [PATCH 18/31] xtensa: Improve constant synthesis for both integer and + floating-point + +This patch revises the previous implementation of constant synthesis. + +First, changed to use define_split machine description pattern and to run +after reload pass, in order not to interfere some optimizations such as +the loop invariant motion. + +Second, not only integer but floating-point is subject to processing. + +Third, several new synthesis patterns - when the constant cannot fit into +a "MOVI Ax, simm12" instruction, but: + +I. can be represented as a power of two minus one (eg. 32767, 65535 or + 0x7fffffffUL) + => "MOVI(.N) Ax, -1" + "SRLI Ax, Ax, 1 ... 31" (or "EXTUI") +II. is between -34816 and 34559 + => "MOVI(.N) Ax, -2048 ... 2047" + "ADDMI Ax, Ax, -32768 ... 32512" +III. (existing case) can fit into a signed 12-bit if the trailing zero bits + are stripped + => "MOVI(.N) Ax, -2048 ... 2047" + "SLLI Ax, Ax, 1 ... 31" + +The above sequences consist of 5 or 6 bytes and have latency of 2 clock cycles, +in contrast with "L32R Ax, " (3 bytes and one clock latency, but may +suffer additional one clock pipeline stall and implementation-specific +InstRAM/ROM access penalty) plus 4 bytes of constant value. + +In addition, 3-instructions synthesis patterns (8 or 9 bytes, 3 clock latency) +are also provided when optimizing for speed and L32R instruction has +considerable access penalty: + +IV. 2-instructions synthesis (any of I ... III) followed by + "SLLI Ax, Ax, 1 ... 31" +V. 2-instructions synthesis followed by either "ADDX[248] Ax, Ax, Ax" + or "SUBX8 Ax, Ax, Ax" (multiplying by 3, 5, 7 or 9) + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h (xtensa_constantsynth): + New prototype. + * config/xtensa/xtensa.c (xtensa_emit_constantsynth, + xtensa_constantsynth_2insn, xtensa_constantsynth_rtx_SLLI, + xtensa_constantsynth_rtx_ADDSUBX, xtensa_constantsynth): + New backend functions that process the abovementioned logic. + (xtensa_emit_move_sequence): Revert the previous changes. + * config/xtensa/xtensa.md: New split patterns for integer + and floating-point, as the frontend part. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/constsynth_2insns.c: New. + * gcc.target/xtensa/constsynth_3insns.c: Ditto. + * gcc.target/xtensa/constsynth_double.c: Ditto. +--- + gcc/config/xtensa/xtensa-protos.h | 1 + + gcc/config/xtensa/xtensa.c | 133 +++++++++++++++--- + gcc/config/xtensa/xtensa.md | 50 +++++++ + .../gcc.target/xtensa/constsynth_2insns.c | 44 ++++++ + .../gcc.target/xtensa/constsynth_3insns.c | 24 ++++ + .../gcc.target/xtensa/constsynth_double.c | 11 ++ + 6 files changed, 247 insertions(+), 16 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_double.c + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 80b1da2bb..d65bc2954 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -44,6 +44,7 @@ extern int xtensa_expand_block_move (rtx *); + extern int xtensa_expand_block_set_unrolled_loop (rtx *); + extern int xtensa_expand_block_set_small_loop (rtx *); + extern void xtensa_split_operand_pair (rtx *, machine_mode); ++extern int xtensa_constantsynth (rtx, HOST_WIDE_INT); + extern int xtensa_emit_move_sequence (rtx *, machine_mode); + extern rtx xtensa_copy_incoming_a7 (rtx); + extern void xtensa_expand_nonlocal_goto (rtx *); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 94ff901c5..ba36d7244 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1027,6 +1027,123 @@ xtensa_split_operand_pair (rtx operands[4], machine_mode mode) + } + + ++/* Try to emit insns to load srcval (that cannot fit into signed 12-bit) ++ into dst with synthesizing a such constant value from a sequence of ++ load-immediate / arithmetic ones, instead of a L32R instruction ++ (plus a constant in litpool). */ ++ ++static void ++xtensa_emit_constantsynth (rtx dst, enum rtx_code code, ++ HOST_WIDE_INT imm0, HOST_WIDE_INT imm1, ++ rtx (*gen_op)(rtx, HOST_WIDE_INT), ++ HOST_WIDE_INT imm2) ++{ ++ gcc_assert (REG_P (dst)); ++ emit_move_insn (dst, GEN_INT (imm0)); ++ emit_move_insn (dst, gen_rtx_fmt_ee (code, SImode, ++ dst, GEN_INT (imm1))); ++ if (gen_op) ++ emit_move_insn (dst, gen_op (dst, imm2)); ++} ++ ++static int ++xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, ++ rtx (*gen_op)(rtx, HOST_WIDE_INT), ++ HOST_WIDE_INT op_imm) ++{ ++ int shift = exact_log2 (srcval + 1); ++ ++ if (IN_RANGE (shift, 1, 31)) ++ { ++ xtensa_emit_constantsynth (dst, LSHIFTRT, -1, 32 - shift, ++ gen_op, op_imm); ++ return 1; ++ } ++ ++ if (IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) ++ { ++ HOST_WIDE_INT imm0, imm1; ++ ++ if (srcval < -32768) ++ imm1 = -32768; ++ else if (srcval > 32512) ++ imm1 = 32512; ++ else ++ imm1 = srcval & ~255; ++ imm0 = srcval - imm1; ++ if (TARGET_DENSITY && imm1 < 32512 && IN_RANGE (imm0, 224, 255)) ++ imm0 -= 256, imm1 += 256; ++ xtensa_emit_constantsynth (dst, PLUS, imm0, imm1, gen_op, op_imm); ++ return 1; ++ } ++ ++ shift = ctz_hwi (srcval); ++ if (xtensa_simm12b (srcval >> shift)) ++ { ++ xtensa_emit_constantsynth (dst, ASHIFT, srcval >> shift, shift, ++ gen_op, op_imm); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static rtx ++xtensa_constantsynth_rtx_SLLI (rtx reg, HOST_WIDE_INT imm) ++{ ++ return gen_rtx_ASHIFT (SImode, reg, GEN_INT (imm)); ++} ++ ++static rtx ++xtensa_constantsynth_rtx_ADDSUBX (rtx reg, HOST_WIDE_INT imm) ++{ ++ return imm == 7 ++ ? gen_rtx_MINUS (SImode, gen_rtx_ASHIFT (SImode, reg, GEN_INT (3)), ++ reg) ++ : gen_rtx_PLUS (SImode, gen_rtx_ASHIFT (SImode, reg, ++ GEN_INT (floor_log2 (imm - 1))), ++ reg); ++} ++ ++int ++xtensa_constantsynth (rtx dst, HOST_WIDE_INT srcval) ++{ ++ /* No need for synthesizing for what fits into MOVI instruction. */ ++ if (xtensa_simm12b (srcval)) ++ return 0; ++ ++ /* 2-insns substitution. */ ++ if ((optimize_size || (optimize && xtensa_extra_l32r_costs >= 1)) ++ && xtensa_constantsynth_2insn (dst, srcval, NULL, 0)) ++ return 1; ++ ++ /* 3-insns substitution. */ ++ if (optimize > 1 && !optimize_size && xtensa_extra_l32r_costs >= 2) ++ { ++ int shift, divisor; ++ ++ /* 2-insns substitution followed by SLLI. */ ++ shift = ctz_hwi (srcval); ++ if (IN_RANGE (shift, 1, 31) && ++ xtensa_constantsynth_2insn (dst, srcval >> shift, ++ xtensa_constantsynth_rtx_SLLI, ++ shift)) ++ return 1; ++ ++ /* 2-insns substitution followed by ADDX[248] or SUBX8. */ ++ if (TARGET_ADDX) ++ for (divisor = 3; divisor <= 9; divisor += 2) ++ if (srcval % divisor == 0 && ++ xtensa_constantsynth_2insn (dst, srcval / divisor, ++ xtensa_constantsynth_rtx_ADDSUBX, ++ divisor)) ++ return 1; ++ } ++ ++ return 0; ++} ++ ++ + /* Emit insns to move operands[1] into operands[0]. + Return 1 if we have written out everything that needs to be done to + do the move. Otherwise, return 0 and the caller will emit the move +@@ -1064,22 +1181,6 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) + + if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16) + { +- /* Try to emit MOVI + SLLI sequence, that is smaller +- than L32R + literal. */ +- if (optimize_size && mode == SImode && CONST_INT_P (src) +- && register_operand (dst, mode)) +- { +- HOST_WIDE_INT srcval = INTVAL (src); +- int shift = ctz_hwi (srcval); +- +- if (xtensa_simm12b (srcval >> shift)) +- { +- emit_move_insn (dst, GEN_INT (srcval >> shift)); +- emit_insn (gen_ashlsi3_internal (dst, dst, GEN_INT (shift))); +- return 1; +- } +- } +- + src = force_const_mem (SImode, src); + operands[1] = src; + } +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index da6b71d1d..ddc3087fa 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -940,6 +940,19 @@ + (set_attr "mode" "SI") + (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")]) + ++(define_split ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operand:SI 1 "constantpool_operand"))] ++ "! optimize_debug && reload_completed" ++ [(const_int 0)] ++{ ++ rtx x = avoid_constant_pool_reference (operands[1]); ++ if (! CONST_INT_P (x)) ++ FAIL; ++ if (! xtensa_constantsynth (operands[0], INTVAL (x))) ++ emit_move_insn (operands[0], x); ++}) ++ + ;; 16-bit Integer moves + + (define_expand "movhi" +@@ -1144,6 +1157,43 @@ + (set_attr "mode" "SF") + (set_attr "length" "3")]) + ++(define_split ++ [(set (match_operand:SF 0 "register_operand") ++ (match_operand:SF 1 "constantpool_operand"))] ++ "! optimize_debug && reload_completed" ++ [(const_int 0)] ++{ ++ int i = 0; ++ rtx x = XEXP (operands[1], 0); ++ long l[2]; ++ if (GET_CODE (x) == SYMBOL_REF ++ && CONSTANT_POOL_ADDRESS_P (x)) ++ x = get_pool_constant (x); ++ else if (GET_CODE (x) == CONST) ++ { ++ x = XEXP (x, 0); ++ gcc_assert (GET_CODE (x) == PLUS ++ && GET_CODE (XEXP (x, 0)) == SYMBOL_REF ++ && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) ++ && CONST_INT_P (XEXP (x, 1))); ++ i = INTVAL (XEXP (x, 1)); ++ gcc_assert (i == 0 || i == 4); ++ i /= 4; ++ x = get_pool_constant (XEXP (x, 0)); ++ } ++ else ++ gcc_unreachable (); ++ if (GET_MODE (x) == SFmode) ++ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l[0]); ++ else if (GET_MODE (x) == DFmode) ++ REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); ++ else ++ FAIL; ++ x = gen_rtx_REG (SImode, REGNO (operands[0])); ++ if (! xtensa_constantsynth (x, l[i])) ++ emit_move_insn (x, GEN_INT (l[i])); ++}) ++ + ;; 64-bit floating point moves + + (define_expand "movdf" +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c +new file mode 100644 +index 000000000..43c85a250 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c +@@ -0,0 +1,44 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Os" } */ ++ ++int test_0(void) ++{ ++ return 4095; ++} ++ ++int test_1(void) ++{ ++ return 2147483647; ++} ++ ++int test_2(void) ++{ ++ return -34816; ++} ++ ++int test_3(void) ++{ ++ return -2049; ++} ++ ++int test_4(void) ++{ ++ return 2048; ++} ++ ++int test_5(void) ++{ ++ return 34559; ++} ++ ++int test_6(void) ++{ ++ return 43680; ++} ++ ++void test_7(int *p) ++{ ++ *p = -1432354816; ++} ++ ++/* { dg-final { scan-assembler-not "l32r" } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c +new file mode 100644 +index 000000000..f3c4a1c7c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mextra-l32r-costs=3" } */ ++ ++int test_0(void) ++{ ++ return 134217216; ++} ++ ++int test_1(void) ++{ ++ return -27604992; ++} ++ ++int test_2(void) ++{ ++ return -162279; ++} ++ ++void test_3(int *p) ++{ ++ *p = 192437; ++} ++ ++/* { dg-final { scan-assembler-not "l32r" } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +new file mode 100644 +index 000000000..890ca5047 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Os" } */ ++ ++void test(unsigned int count, double array[]) ++{ ++ unsigned int i; ++ for (i = 0; i < count; ++i) ++ array[i] = 1.0; ++} ++ ++/* { dg-final { scan-assembler-not "l32r" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0024-Improve-shift-operations-more.patch b/patches/gcc10.3/gcc-xtensa-0024-Improve-shift-operations-more.patch new file mode 100644 index 0000000..9c44b89 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0024-Improve-shift-operations-more.patch @@ -0,0 +1,383 @@ +From fd3771fcc13b8712c91cec70f4533760f72b54e1 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 01:38:31 +0900 +Subject: [PATCH 19/31] xtensa: Improve shift operations more + +This patch introduces funnel shifter utilization, and rearranges existing +"per-byte shift" insn patterns. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (logical_shift_operator, + xtensa_shift_per_byte_operator): New predicates. + * config/xtensa/xtensa-protos.h (xtensa_shlrd_which_direction): + New prototype. + * config/xtensa/xtensa.c (xtensa_shlrd_which_direction): + New helper function for funnel shift patterns. + * config/xtensa/xtensa.md (ior_op): New code iterator. + (*ashlsi3_1): Replace with new split pattern. + (*shift_per_byte): Unify *ashlsi3_3x, *ashrsi3_3x and *lshrsi3_3x. + (*shift_per_byte_omit_AND_0, *shift_per_byte_omit_AND_1): + New insn-and-split patterns that redirect to *xtensa_shift_per_byte, + in order to omit unnecessary bitwise AND operation. + (*shlrd_reg_, *shlrd_const_, *shlrd_per_byte_, + *shlrd_per_byte__omit_AND): + New insn patterns for funnel shifts. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/funnel_shifter.c: New. +--- + gcc/config/xtensa/predicates.md | 6 + + gcc/config/xtensa/xtensa-protos.h | 1 + + gcc/config/xtensa/xtensa.c | 14 ++ + gcc/config/xtensa/xtensa.md | 213 ++++++++++++++---- + .../gcc.target/xtensa/funnel_shifter.c | 17 ++ + 5 files changed, 213 insertions(+), 38 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/funnel_shifter.c + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index 91b9343a2..e7836f0ec 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -164,9 +164,15 @@ + (define_predicate "boolean_operator" + (match_code "eq,ne")) + ++(define_predicate "logical_shift_operator" ++ (match_code "ashift,lshiftrt")) ++ + (define_predicate "xtensa_cstoresi_operator" + (match_code "eq,ne,gt,ge,lt,le")) + ++(define_predicate "xtensa_shift_per_byte_operator" ++ (match_code "ashift,ashiftrt,lshiftrt")) ++ + (define_predicate "tls_symbol_operand" + (and (match_code "symbol_ref") + (match_test "SYMBOL_REF_TLS_MODEL (op) != 0"))) +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index d65bc2954..32743bc67 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -56,6 +56,7 @@ extern char *xtensa_emit_bit_branch (bool, bool, rtx *); + extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); + extern char *xtensa_emit_call (int, rtx *); + extern bool xtensa_tls_referenced_p (rtx); ++extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); + + #ifdef TREE_CODE + extern void init_cumulative_args (CUMULATIVE_ARGS *, int); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index ba36d7244..473cfaf9d 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -2394,6 +2394,20 @@ xtensa_tls_referenced_p (rtx x) + } + + ++/* Helper function for "*shlrd_..." patterns. */ ++ ++enum rtx_code ++xtensa_shlrd_which_direction (rtx op0, rtx op1) ++{ ++ if (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT) ++ return ASHIFT; /* shld */ ++ if (GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT) ++ return LSHIFTRT; /* shrd */ ++ ++ return UNKNOWN; ++} ++ ++ + /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ + + static bool +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index ddc3087fa..58bba89af 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -83,6 +83,9 @@ + ;; the same template. + (define_mode_iterator HQI [HI QI]) + ++;; This code iterator is for *shlrd and its variants. ++(define_code_iterator ior_op [ior plus]) ++ + + ;; Attributes. + +@@ -1272,16 +1275,6 @@ + operands[1] = xtensa_copy_incoming_a7 (operands[1]); + }) + +-(define_insn "*ashlsi3_1" +- [(set (match_operand:SI 0 "register_operand" "=a") +- (ashift:SI (match_operand:SI 1 "register_operand" "r") +- (const_int 1)))] +- "TARGET_DENSITY" +- "add.n\t%0, %1, %1" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "2")]) +- + (define_insn "ashlsi3_internal" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (ashift:SI (match_operand:SI 1 "register_operand" "r,r") +@@ -1294,16 +1287,14 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,6")]) + +-(define_insn "*ashlsi3_3x" +- [(set (match_operand:SI 0 "register_operand" "=a") +- (ashift:SI (match_operand:SI 1 "register_operand" "r") +- (ashift:SI (match_operand:SI 2 "register_operand" "r") +- (const_int 3))))] +- "" +- "ssa8b\t%2\;sll\t%0, %1" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "6")]) ++(define_split ++ [(set (match_operand:SI 0 "register_operand") ++ (ashift:SI (match_operand:SI 1 "register_operand") ++ (const_int 1)))] ++ "TARGET_DENSITY" ++ [(set (match_dup 0) ++ (plus:SI (match_dup 1) ++ (match_dup 1)))]) + + (define_insn "ashrsi3" + [(set (match_operand:SI 0 "register_operand" "=a,a") +@@ -1317,17 +1308,6 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,6")]) + +-(define_insn "*ashrsi3_3x" +- [(set (match_operand:SI 0 "register_operand" "=a") +- (ashiftrt:SI (match_operand:SI 1 "register_operand" "r") +- (ashift:SI (match_operand:SI 2 "register_operand" "r") +- (const_int 3))))] +- "" +- "ssa8l\t%2\;sra\t%0, %1" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "6")]) +- + (define_insn "lshrsi3" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r") +@@ -1337,9 +1317,9 @@ + if (which_alternative == 0) + { + if ((INTVAL (operands[2]) & 0x1f) < 16) +- return "srli\t%0, %1, %R2"; ++ return "srli\t%0, %1, %R2"; + else +- return "extui\t%0, %1, %R2, %L2"; ++ return "extui\t%0, %1, %R2, %L2"; + } + return "ssr\t%2\;srl\t%0, %1"; + } +@@ -1347,13 +1327,170 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,6")]) + +-(define_insn "*lshrsi3_3x" ++(define_insn "*shift_per_byte" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (match_operator:SI 3 "xtensa_shift_per_byte_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3))]))] ++ "!optimize_debug && optimize" ++{ ++ switch (GET_CODE (operands[3])) ++ { ++ case ASHIFT: return "ssa8b\t%2\;sll\t%0, %1"; ++ case ASHIFTRT: return "ssa8l\t%2\;sra\t%0, %1"; ++ case LSHIFTRT: return "ssa8l\t%2\;srl\t%0, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*shift_per_byte_omit_AND_0" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (match_operator:SI 4 "xtensa_shift_per_byte_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3)) ++ (match_operand:SI 3 "const_int_operand" "i"))]))] ++ "!optimize_debug && optimize ++ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (match_op_dup 4 ++ [(match_dup 1) ++ (ashift:SI (match_dup 2) ++ (const_int 3))]))] ++ "" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*shift_per_byte_omit_AND_1" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (match_operator:SI 4 "xtensa_shift_per_byte_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (neg:SI (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3)) ++ (match_operand:SI 3 "const_int_operand" "i")))]))] ++ "!optimize_debug && optimize ++ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 5) ++ (neg:SI (match_dup 2))) ++ (set (match_dup 0) ++ (match_op_dup 4 ++ [(match_dup 1) ++ (ashift:SI (match_dup 5) ++ (const_int 3))]))] ++{ ++ operands[5] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "9")]) ++ ++(define_insn "*shlrd_reg_" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")]) ++ (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 3 "register_operand" "r") ++ (neg:SI (match_dup 2))])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" ++{ ++ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) ++ { ++ case ASHIFT: return "ssl\t%2\;src\t%0, %1, %3"; ++ case LSHIFTRT: return "ssr\t%2\;src\t%0, %3, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn "*shlrd_const_" + [(set (match_operand:SI 0 "register_operand" "=a") +- (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") +- (ashift:SI (match_operand:SI 2 "register_operand" "r") +- (const_int 3))))] ++ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 3 "const_int_operand" "i")]) ++ (match_operator:SI 6 "logical_shift_operator" ++ [(match_operand:SI 2 "register_operand" "r") ++ (match_operand:SI 4 "const_int_operand" "i")])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN ++ && IN_RANGE (INTVAL (operands[3]), 1, 31) ++ && IN_RANGE (INTVAL (operands[4]), 1, 31) ++ && INTVAL (operands[3]) + INTVAL (operands[4]) == 32" ++{ ++ switch (xtensa_shlrd_which_direction (operands[5], operands[6])) ++ { ++ case ASHIFT: return "ssai\t%L3\;src\t%0, %1, %2"; ++ case LSHIFTRT: return "ssai\t%R3\;src\t%0, %2, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn "*shlrd_per_byte_" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3))]) ++ (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 3 "register_operand" "r") ++ (neg:SI (ashift:SI (match_dup 2) ++ (const_int 3)))])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" ++{ ++ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) ++ { ++ case ASHIFT: return "ssa8b\t%2\;src\t%0, %1, %3"; ++ case LSHIFTRT: return "ssa8l\t%2\;src\t%0, %3, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*shlrd_per_byte__omit_AND" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3)) ++ (match_operand:SI 4 "const_int_operand" "i"))]) ++ (match_operator:SI 6 "logical_shift_operator" ++ [(match_operand:SI 3 "register_operand" "r") ++ (neg:SI (and:SI (ashift:SI (match_dup 2) ++ (const_int 3)) ++ (match_dup 4)))])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN ++ && (INTVAL (operands[4]) & 0x1f) == 3 << 3" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (ior_op:SI (match_op_dup 5 ++ [(match_dup 1) ++ (ashift:SI (match_dup 2) ++ (const_int 3))]) ++ (match_op_dup 6 ++ [(match_dup 3) ++ (neg:SI (ashift:SI (match_dup 2) ++ (const_int 3)))])))] + "" +- "ssa8l\t%2\;srl\t%0, %1" + [(set_attr "type" "arith") + (set_attr "mode" "SI") + (set_attr "length" "6")]) +diff --git a/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c +new file mode 100644 +index 000000000..c8f987ccd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++unsigned int test_0(const void *addr) ++{ ++ unsigned int n = (unsigned int)addr; ++ const unsigned int *a = (const unsigned int*)(n & ~3); ++ n = (n & 3) * 8; ++ return (a[0] >> n) | (a[1] << (32 - n)); ++} ++ ++unsigned int test_1(unsigned int a, unsigned int b) ++{ ++ return (a >> 16) + (b << 16); ++} ++ ++/* { dg-final { scan-assembler-times "src" 2 } } */ +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0025-Simplify-conditional-branch-move-insn-pattern.patch b/patches/gcc10.3/gcc-xtensa-0025-Simplify-conditional-branch-move-insn-pattern.patch new file mode 100644 index 0000000..cdb96ff --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0025-Simplify-conditional-branch-move-insn-pattern.patch @@ -0,0 +1,427 @@ +From 0690bcdd42d0aa6671f9ec3ccbbe70faa04ffb6b Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Mon, 31 Jan 2022 09:56:21 +0900 +Subject: [PATCH 20/31] xtensa: Simplify conditional branch/move insn patterns + +No need to describe the "false side" conditional insn patterns anymore. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h (xtensa_emit_branch): + Remove the first argument. + (xtensa_emit_bit_branch): Remove it because now called only from the + output statement of *bittrue insn pattern. + * config/xtensa/xtensa.c (gen_int_relational): Remove the last + argument 'p_invert', and make so that the condition is reversed by + itself as needed. + (xtensa_expand_conditional_branch): Share the common path, and remove + condition inversion code. + (xtensa_emit_branch, xtensa_emit_movcc): Simplify by removing the + "false side" pattern. + (xtensa_emit_bit_branch): Remove it because of the abovementioned + reason, and move the function body to *bittrue insn pattern. + * config/xtensa/xtensa.md (*bittrue): Transplant the output + statement from removed xtensa_emit_bit_branch(). + (*bfalse, *ubfalse, *bitfalse, *maskfalse): Remove the "false side" + insn patterns. +--- + gcc/config/xtensa/xtensa-protos.h | 3 +- + gcc/config/xtensa/xtensa.c | 111 ++++++++++------------------ + gcc/config/xtensa/xtensa.md | 117 ++++++++---------------------- + 3 files changed, 70 insertions(+), 161 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 32743bc67..e4b2d2f06 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -51,8 +51,7 @@ extern void xtensa_expand_nonlocal_goto (rtx *); + extern void xtensa_expand_compare_and_swap (rtx, rtx, rtx, rtx); + extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool); + extern void xtensa_emit_loop_end (rtx_insn *, rtx *); +-extern char *xtensa_emit_branch (bool, bool, rtx *); +-extern char *xtensa_emit_bit_branch (bool, bool, rtx *); ++extern char *xtensa_emit_branch (bool, rtx *); + extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); + extern char *xtensa_emit_call (int, rtx *); + extern bool xtensa_tls_referenced_p (rtx); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 473cfaf9d..8deae3d51 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -118,7 +118,7 @@ const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] = + + static void xtensa_option_override (void); + static enum internal_test map_test_to_internal_test (enum rtx_code); +-static rtx gen_int_relational (enum rtx_code, rtx, rtx, int *); ++static rtx gen_int_relational (enum rtx_code, rtx, rtx); + static rtx gen_float_relational (enum rtx_code, rtx, rtx); + static rtx gen_conditional_move (enum rtx_code, machine_mode, rtx, rtx); + static rtx fixup_subreg_mem (rtx); +@@ -670,8 +670,7 @@ map_test_to_internal_test (enum rtx_code test_code) + static rtx + gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + rtx cmp0, /* first operand to compare */ +- rtx cmp1, /* second operand to compare */ +- int *p_invert /* whether branch needs to reverse test */) ++ rtx cmp1 /* second operand to compare */) + { + struct cmp_info + { +@@ -703,6 +702,7 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + enum internal_test test; + machine_mode mode; + struct cmp_info *p_info; ++ int invert; + + test = map_test_to_internal_test (test_code); + gcc_assert (test != ITEST_MAX); +@@ -739,9 +739,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + } + + /* See if we need to invert the result. */ +- *p_invert = ((GET_CODE (cmp1) == CONST_INT) +- ? p_info->invert_const +- : p_info->invert_reg); ++ invert = ((GET_CODE (cmp1) == CONST_INT) ++ ? p_info->invert_const ++ : p_info->invert_reg); + + /* Comparison to constants, may involve adding 1 to change a LT into LE. + Comparison between two registers, may involve switching operands. */ +@@ -758,7 +758,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + cmp1 = temp; + } + +- return gen_rtx_fmt_ee (p_info->test_code, VOIDmode, cmp0, cmp1); ++ return gen_rtx_fmt_ee (invert ? reverse_condition (p_info->test_code) ++ : p_info->test_code, ++ VOIDmode, cmp0, cmp1); + } + + +@@ -817,45 +819,33 @@ xtensa_expand_conditional_branch (rtx *operands, machine_mode mode) + enum rtx_code test_code = GET_CODE (operands[0]); + rtx cmp0 = operands[1]; + rtx cmp1 = operands[2]; +- rtx cmp; +- int invert; +- rtx label1, label2; ++ rtx cmp, label; + + switch (mode) + { ++ case E_SFmode: ++ if (TARGET_HARD_FLOAT) ++ { ++ cmp = gen_float_relational (test_code, cmp0, cmp1); ++ break; ++ } ++ /* FALLTHRU */ ++ + case E_DFmode: + default: + fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1)); + + case E_SImode: +- invert = FALSE; +- cmp = gen_int_relational (test_code, cmp0, cmp1, &invert); +- break; +- +- case E_SFmode: +- if (!TARGET_HARD_FLOAT) +- fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, +- cmp0, cmp1)); +- invert = FALSE; +- cmp = gen_float_relational (test_code, cmp0, cmp1); ++ cmp = gen_int_relational (test_code, cmp0, cmp1); + break; + } + + /* Generate the branch. */ +- +- label1 = gen_rtx_LABEL_REF (VOIDmode, operands[3]); +- label2 = pc_rtx; +- +- if (invert) +- { +- label2 = label1; +- label1 = pc_rtx; +- } +- ++ label = gen_rtx_LABEL_REF (VOIDmode, operands[3]); + emit_jump_insn (gen_rtx_SET (pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, cmp, +- label1, +- label2))); ++ label, ++ pc_rtx))); + } + + +@@ -2058,21 +2048,20 @@ xtensa_emit_loop_end (rtx_insn *insn, rtx *operands) + + + char * +-xtensa_emit_branch (bool inverted, bool immed, rtx *operands) ++xtensa_emit_branch (bool immed, rtx *operands) + { + static char result[64]; +- enum rtx_code code; ++ enum rtx_code code = GET_CODE (operands[3]); + const char *op; + +- code = GET_CODE (operands[3]); + switch (code) + { +- case EQ: op = inverted ? "ne" : "eq"; break; +- case NE: op = inverted ? "eq" : "ne"; break; +- case LT: op = inverted ? "ge" : "lt"; break; +- case GE: op = inverted ? "lt" : "ge"; break; +- case LTU: op = inverted ? "geu" : "ltu"; break; +- case GEU: op = inverted ? "ltu" : "geu"; break; ++ case EQ: op = "eq"; break; ++ case NE: op = "ne"; break; ++ case LT: op = "lt"; break; ++ case GE: op = "ge"; break; ++ case LTU: op = "ltu"; break; ++ case GEU: op = "geu"; break; + default: gcc_unreachable (); + } + +@@ -2091,32 +2080,6 @@ xtensa_emit_branch (bool inverted, bool immed, rtx *operands) + } + + +-char * +-xtensa_emit_bit_branch (bool inverted, bool immed, rtx *operands) +-{ +- static char result[64]; +- const char *op; +- +- switch (GET_CODE (operands[3])) +- { +- case EQ: op = inverted ? "bs" : "bc"; break; +- case NE: op = inverted ? "bc" : "bs"; break; +- default: gcc_unreachable (); +- } +- +- if (immed) +- { +- unsigned bitnum = INTVAL (operands[1]) & 0x1f; +- operands[1] = GEN_INT (bitnum); +- sprintf (result, "b%si\t%%0, %%d1, %%2", op); +- } +- else +- sprintf (result, "b%s\t%%0, %%1, %%2", op); +- +- return result; +-} +- +- + char * + xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + { +@@ -2125,12 +2088,14 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + const char *op; + + code = GET_CODE (operands[4]); ++ if (inverted) ++ code = reverse_condition (code); + if (isbool) + { + switch (code) + { +- case EQ: op = inverted ? "t" : "f"; break; +- case NE: op = inverted ? "f" : "t"; break; ++ case EQ: op = "f"; break; ++ case NE: op = "t"; break; + default: gcc_unreachable (); + } + } +@@ -2138,10 +2103,10 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + { + switch (code) + { +- case EQ: op = inverted ? "nez" : "eqz"; break; +- case NE: op = inverted ? "eqz" : "nez"; break; +- case LT: op = inverted ? "gez" : "ltz"; break; +- case GE: op = inverted ? "ltz" : "gez"; break; ++ case EQ: op = "eqz"; break; ++ case NE: op = "nez"; break; ++ case LT: op = "ltz"; break; ++ case GE: op = "gez"; break; + default: gcc_unreachable (); + } + } +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 58bba89af..40000859d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1551,28 +1551,13 @@ + (define_insn "*btrue" + [(set (pc) + (if_then_else (match_operator 3 "branch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "branch_operand" "K,r")]) ++ [(match_operand:SI 0 "register_operand" "r,r") ++ (match_operand:SI 1 "branch_operand" "K,r")]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { +- return xtensa_emit_branch (false, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump,jump") +- (set_attr "mode" "none") +- (set_attr "length" "3,3")]) +- +-(define_insn "*bfalse" +- [(set (pc) +- (if_then_else (match_operator 3 "branch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "branch_operand" "K,r")]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] +- "" +-{ +- return xtensa_emit_branch (true, which_alternative == 0, operands); ++ return xtensa_emit_branch (which_alternative == 0, operands); + } + [(set_attr "type" "jump,jump") + (set_attr "mode" "none") +@@ -1581,28 +1566,13 @@ + (define_insn "*ubtrue" + [(set (pc) + (if_then_else (match_operator 3 "ubranch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "ubranch_operand" "L,r")]) ++ [(match_operand:SI 0 "register_operand" "r,r") ++ (match_operand:SI 1 "ubranch_operand" "L,r")]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { +- return xtensa_emit_branch (false, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump,jump") +- (set_attr "mode" "none") +- (set_attr "length" "3,3")]) +- +-(define_insn "*ubfalse" +- [(set (pc) +- (if_then_else (match_operator 3 "ubranch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "ubranch_operand" "L,r")]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] +- "" +-{ +- return xtensa_emit_branch (true, which_alternative == 0, operands); ++ return xtensa_emit_branch (which_alternative == 0, operands); + } + [(set_attr "type" "jump,jump") + (set_attr "mode" "none") +@@ -1613,75 +1583,50 @@ + (define_insn "*bittrue" + [(set (pc) + (if_then_else (match_operator 3 "boolean_operator" +- [(zero_extract:SI +- (match_operand:SI 0 "register_operand" "r,r") +- (const_int 1) +- (match_operand:SI 1 "arith_operand" "J,r")) +- (const_int 0)]) +- (label_ref (match_operand 2 "" "")) +- (pc)))] +- "" +-{ +- return xtensa_emit_bit_branch (false, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump") +- (set_attr "mode" "none") +- (set_attr "length" "3")]) +- +-(define_insn "*bitfalse" +- [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" +- [(zero_extract:SI +- (match_operand:SI 0 "register_operand" "r,r") +- (const_int 1) +- (match_operand:SI 1 "arith_operand" "J,r")) ++ [(zero_extract:SI (match_operand:SI 0 "register_operand" "r,r") ++ (const_int 1) ++ (match_operand:SI 1 "arith_operand" "J,r")) + (const_int 0)]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] +- "" +-{ +- return xtensa_emit_bit_branch (true, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump") +- (set_attr "mode" "none") +- (set_attr "length" "3")]) +- +-(define_insn "*masktrue" +- [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" +- [(and:SI (match_operand:SI 0 "register_operand" "r") +- (match_operand:SI 1 "register_operand" "r")) +- (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { ++ static char result[64]; ++ char op; + switch (GET_CODE (operands[3])) + { +- case EQ: return "bnone\t%0, %1, %2"; +- case NE: return "bany\t%0, %1, %2"; +- default: gcc_unreachable (); ++ case EQ: op = 'c'; break; ++ case NE: op = 's'; break; ++ default: gcc_unreachable (); + } ++ if (which_alternative == 0) ++ { ++ operands[1] = GEN_INT (INTVAL (operands[1]) & 0x1f); ++ sprintf (result, "bb%ci\t%%0, %%d1, %%2", op); ++ } ++ else ++ sprintf (result, "bb%c\t%%0, %%1, %%2", op); ++ return result; + } + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set_attr "length" "3")]) + +-(define_insn "*maskfalse" ++(define_insn "*masktrue" + [(set (pc) + (if_then_else (match_operator 3 "boolean_operator" +- [(and:SI (match_operand:SI 0 "register_operand" "r") +- (match_operand:SI 1 "register_operand" "r")) +- (const_int 0)]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "register_operand" "r")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] + "" + { + switch (GET_CODE (operands[3])) + { +- case EQ: return "bany\t%0, %1, %2"; +- case NE: return "bnone\t%0, %1, %2"; +- default: gcc_unreachable (); ++ case EQ: return "bnone\t%0, %1, %2"; ++ case NE: return "bany\t%0, %1, %2"; ++ default: gcc_unreachable (); + } + } + [(set_attr "type" "jump") +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0026-Make-use-of-BALL-BNALL-instructions.patch b/patches/gcc10.3/gcc-xtensa-0026-Make-use-of-BALL-BNALL-instructions.patch new file mode 100644 index 0000000..e1d2790 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0026-Make-use-of-BALL-BNALL-instructions.patch @@ -0,0 +1,101 @@ +From a7cf439409089eab17341a1a24fb9be2b967ca7c Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 27 May 2021 19:04:12 +0900 +Subject: [PATCH 21/31] xtensa: Make use of BALL/BNALL instructions + +In Xtensa ISA, there is no single machine instruction that calculates unary +bitwise negation, but a few similar fused instructions are exist: + + "BALL Ax, Ay, label" // if ((~Ax & Ay) == 0) goto label; + "BNALL Ax, Ay, label" // if ((~Ax & Ay) != 0) goto label; + +These instructions have never been emitted before, but it seems no reason not +to make use of them. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*masktrue_bitcmpl): New insn pattern. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/BALL-BNALL.c: New. +--- + gcc/config/xtensa/xtensa.md | 21 +++++++++++++ + gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c | 33 ++++++++++++++++++++ + 2 files changed, 54 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 40000859d..b34b2afb6 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1633,6 +1633,27 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_insn "*masktrue_bitcmpl" ++ [(set (pc) ++ (if_then_else (match_operator 3 "boolean_operator" ++ [(and:SI (not:SI (match_operand:SI 0 "register_operand" "r")) ++ (match_operand:SI 1 "register_operand" "r")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] ++ "" ++{ ++ switch (GET_CODE (operands[3])) ++ { ++ case EQ: return "ball\t%0, %1, %2"; ++ case NE: return "bnall\t%0, %1, %2"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ++ + + ;; Zero-overhead looping support. + +diff --git a/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c +new file mode 100644 +index 000000000..ba61c6f37 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c +@@ -0,0 +1,33 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O" } */ ++ ++extern void foo(void); ++ ++void BNONE_test(int a, int b) ++{ ++ if (a & b) ++ foo(); ++} ++ ++void BANY_test(int a, int b) ++{ ++ if (!(a & b)) ++ foo(); ++} ++ ++void BALL_test(int a, int b) ++{ ++ if (~a & b) ++ foo(); ++} ++ ++void BNALL_test(int a, int b) ++{ ++ if (!(~a & b)) ++ foo(); ++} ++ ++/* { dg-final { scan-assembler-times "bnone" 1 } } */ ++/* { dg-final { scan-assembler-times "bany" 1 } } */ ++/* { dg-final { scan-assembler-times "ball" 1 } } */ ++/* { dg-final { scan-assembler-times "bnall" 1 } } */ +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0027-Optimize-bitwise-AND-operation-with-some-spec.patch b/patches/gcc10.3/gcc-xtensa-0027-Optimize-bitwise-AND-operation-with-some-spec.patch new file mode 100644 index 0000000..b13350f --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0027-Optimize-bitwise-AND-operation-with-some-spec.patch @@ -0,0 +1,252 @@ +From 43c7f8333028ff03d8a4681ab62de2febcc43f5c Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 01:28:43 +0900 +Subject: [PATCH 22/31] xtensa: Optimize bitwise AND operation with some + specific forms of constants + +This patch offers several insn-and-split patterns for bitwise AND with +register and constant that can be represented as: + +i. 1's least significant N bits and the others 0's (17 <= N <= 31) +ii. 1's most significant N bits and the others 0's (12 <= N <= 31) +iii. M 1's sequence of bits and trailing N 0's bits, that cannot fit into a + "MOVI Ax, simm12" instruction (1 <= M <= 16, 1 <= N <= 30) + +And also offers shortcuts for conditional branch if each of the abovementioned +operations is (not) equal to zero. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (shifted_mask_operand): + New predicate. + * config/xtensa/xtensa.md (*andsi3_const_pow2_minus_one): + New insn-and-split pattern. + (*andsi3_const_negative_pow2, *andsi3_const_shifted_mask, + *masktrue_const_pow2_minus_one, *masktrue_const_negative_pow2, + *masktrue_const_shifted_mask): Ditto. +--- + gcc/config/xtensa/predicates.md | 10 ++ + gcc/config/xtensa/xtensa.md | 179 ++++++++++++++++++++++++++++++++ + 2 files changed, 189 insertions(+) + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index e7836f0ec..367fc17f3 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -52,6 +52,16 @@ + (match_test "xtensa_mask_immediate (INTVAL (op))")) + (match_operand 0 "register_operand"))) + ++(define_predicate "shifted_mask_operand" ++ (match_code "const_int") ++{ ++ HOST_WIDE_INT mask = INTVAL (op); ++ int shift = ctz_hwi (mask); ++ ++ return IN_RANGE (shift, 1, 31) ++ && xtensa_mask_immediate ((uint32_t)mask >> shift); ++}) ++ + (define_predicate "extui_fldsz_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 1, 16)"))) +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index b34b2afb6..355fb7742 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -645,6 +645,83 @@ + (set_attr "mode" "SI") + (set_attr "length" "6")]) + ++(define_insn_and_split "*andsi3_const_pow2_minus_one" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "const_int_operand" "i")))] ++ "IN_RANGE (exact_log2 (INTVAL (operands[2]) + 1), 17, 31)" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (ashift:SI (match_dup 1) ++ (match_dup 2))) ++ (set (match_dup 0) ++ (lshiftrt:SI (match_dup 0) ++ (match_dup 2)))] ++{ ++ operands[2] = GEN_INT (32 - floor_log2 (INTVAL (operands[2]) + 1)); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && INTVAL (operands[2]) == 0x7FFFFFFF") ++ (const_int 5) ++ (const_int 6)))]) ++ ++(define_insn_and_split "*andsi3_const_negative_pow2" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "const_int_operand" "i")))] ++ "IN_RANGE (exact_log2 (-INTVAL (operands[2])), 12, 31)" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (lshiftrt:SI (match_dup 1) ++ (match_dup 2))) ++ (set (match_dup 0) ++ (ashift:SI (match_dup 0) ++ (match_dup 2)))] ++{ ++ operands[2] = GEN_INT (floor_log2 (-INTVAL (operands[2]))); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*andsi3_const_shifted_mask" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "shifted_mask_operand" "i")))] ++ "! xtensa_simm12b (INTVAL (operands[2]))" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (zero_extract:SI (match_dup 1) ++ (match_dup 3) ++ (match_dup 4))) ++ (set (match_dup 0) ++ (ashift:SI (match_dup 0) ++ (match_dup 2)))] ++{ ++ HOST_WIDE_INT mask = INTVAL (operands[2]); ++ int shift = ctz_hwi (mask); ++ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); ++ int mask_pos = shift; ++ if (BITS_BIG_ENDIAN) ++ mask_pos = (32 - (mask_size + shift)) & 0x1f; ++ operands[2] = GEN_INT (shift); ++ operands[3] = GEN_INT (mask_size); ++ operands[4] = GEN_INT (mask_pos); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && ctz_hwi (INTVAL (operands[2])) == 1") ++ (const_int 5) ++ (const_int 6)))]) ++ + (define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "=a") + (ior:SI (match_operand:SI 1 "register_operand" "%r") +@@ -1654,6 +1731,108 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_insn_and_split "*masktrue_const_pow2_minus_one" ++ [(set (pc) ++ (if_then_else (match_operator 3 "boolean_operator" ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "const_int_operand" "i")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] ++ "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31)" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 4) ++ (ashift:SI (match_dup 0) ++ (match_dup 1))) ++ (set (pc) ++ (if_then_else (match_op_dup 3 ++ [(match_dup 4) ++ (const_int 0)]) ++ (label_ref (match_dup 2)) ++ (pc)))] ++{ ++ operands[1] = GEN_INT (32 - floor_log2 (INTVAL (operands[1]) + 1)); ++ operands[4] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && INTVAL (operands[1]) == 0x7FFFFFFF") ++ (const_int 5) ++ (const_int 6)))]) ++ ++(define_insn_and_split "*masktrue_const_negative_pow2" ++ [(set (pc) ++ (if_then_else (match_operator 3 "boolean_operator" ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "const_int_operand" "i")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] ++ "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 12, 30)" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 4) ++ (lshiftrt:SI (match_dup 0) ++ (match_dup 1))) ++ (set (pc) ++ (if_then_else (match_op_dup 3 ++ [(match_dup 4) ++ (const_int 0)]) ++ (label_ref (match_dup 2)) ++ (pc)))] ++{ ++ operands[1] = GEN_INT (floor_log2 (-INTVAL (operands[1]))); ++ operands[4] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*masktrue_const_shifted_mask" ++ [(set (pc) ++ (if_then_else (match_operator 4 "boolean_operator" ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "shifted_mask_operand" "i")) ++ (match_operand:SI 2 "const_int_operand" "i")]) ++ (label_ref (match_operand 3 "" "")) ++ (pc)))] ++ "(INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 ++ && xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 6) ++ (zero_extract:SI (match_dup 0) ++ (match_dup 5) ++ (match_dup 1))) ++ (set (pc) ++ (if_then_else (match_op_dup 4 ++ [(match_dup 6) ++ (match_dup 2)]) ++ (label_ref (match_dup 3)) ++ (pc)))] ++{ ++ HOST_WIDE_INT mask = INTVAL (operands[1]); ++ int shift = ctz_hwi (mask); ++ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); ++ int mask_pos = shift; ++ if (BITS_BIG_ENDIAN) ++ mask_pos = (32 - (mask_size + shift)) & 0x1f; ++ operands[1] = GEN_INT (mask_pos); ++ operands[2] = GEN_INT ((uint32_t)INTVAL (operands[2]) >> shift); ++ operands[5] = GEN_INT (mask_size); ++ operands[6] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && (uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])) == 0") ++ (const_int 5) ++ (const_int 6)))]) ++ + + ;; Zero-overhead looping support. + +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0028-Document-new-mextra-l32r-costs-Xtensa-specifi.patch b/patches/gcc10.3/gcc-xtensa-0028-Document-new-mextra-l32r-costs-Xtensa-specifi.patch new file mode 100644 index 0000000..ebe9eb0 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0028-Document-new-mextra-l32r-costs-Xtensa-specifi.patch @@ -0,0 +1,44 @@ +From 7856e5d6344828b2a72aeef671a169dbd1a85a55 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:34:48 +0900 +Subject: [PATCH 23/31] xtensa: Document new -mextra-l32r-costs= + Xtensa-specific option + +gcc/ChangeLog: + * doc/invoke.texi: Document -mextra-l32r-costs= option. +--- + gcc/doc/invoke.texi | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index eabeec944..c35f51afb 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1385,7 +1385,8 @@ See RS/6000 and PowerPC Options. + -mtext-section-literals -mno-text-section-literals @gol + -mauto-litpools -mno-auto-litpools @gol + -mtarget-align -mno-target-align @gol +--mlongcalls -mno-longcalls} ++-mlongcalls -mno-longcalls @gol ++-mextra-l32r-costs=@var{cycles}} + + @emph{zSeries Options} + See S/390 and zSeries Options. +@@ -30519,6 +30520,14 @@ assembly code generated by GCC still shows direct call + instructions---look at the disassembled object code to see the actual + instructions. Note that the assembler uses an indirect call for + every cross-file call, not just those that really are out of range. ++ ++@item -mextra-l32r-costs=@var{n} ++@opindex mextra-l32r-costs ++Specify an extra cost of instruction RAM/ROM access for @code{L32R} ++instructions, in clock cycles. This affects, when optimizing for speed, ++whether loading a constant from literal pool using @code{L32R} or ++synthesizing the constant from a small one with a couple of arithmetic ++instructions. The default value is 0. + @end table + + @node zSeries Options +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0029-Add-support-for-sibling-call-optimization.patch b/patches/gcc10.3/gcc-xtensa-0029-Add-support-for-sibling-call-optimization.patch new file mode 100644 index 0000000..f5c0f78 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0029-Add-support-for-sibling-call-optimization.patch @@ -0,0 +1,354 @@ +From c985f67f0b9a35ca5f22647c326c6b43a2b237fa Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 15 Jun 2022 21:21:21 +0900 +Subject: [PATCH 24/31] xtensa: Add support for sibling call optimization + +This patch introduces support for sibling call optimization, when the Windowed +Register Option is NOT configured. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h (xtensa_prepare_expand_call, + xtensa_emit_sibcall): New prototypes. + (xtensa_expand_epilogue): Add new argument that specifies whether + or not sibling call. + * config/xtensa/xtensa.c (TARGET_FUNCTION_OK_FOR_SIBCALL): + New macro definition. + (xtensa_prepare_expand_call): New function in order to share + the common code. + (xtensa_emit_sibcall, xtensa_function_ok_for_sibcall): + New functions. + (xtensa_expand_epilogue): Add new argument sibcall_p and use it + for sibling call handling. + * config/xtensa/xtensa.md (call, call_value): + Use xtensa_prepare_expand_call. + (call_internal, call_value_internal): + Add the condition in order to be disabled if sibling call. + (sibcall, sibcall_value, sibcall_epilogue): New expansions. + (sibcall_internal, sibcall_value_internal): New insn patterns, + and split ones in order to take care of the indirect sibcalls. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/sibcalls.c: New. +--- + gcc/config/xtensa/xtensa-protos.h | 4 +- + gcc/config/xtensa/xtensa.c | 57 ++++++++++++- + gcc/config/xtensa/xtensa.md | 93 ++++++++++++++++++---- + gcc/testsuite/gcc.target/xtensa/sibcalls.c | 20 +++++ + 4 files changed, 155 insertions(+), 19 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/sibcalls.c + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index e4b2d2f06..75ed3bfb0 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -53,7 +53,9 @@ extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool); + extern void xtensa_emit_loop_end (rtx_insn *, rtx *); + extern char *xtensa_emit_branch (bool, rtx *); + extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); ++extern void xtensa_prepare_expand_call (int, rtx *); + extern char *xtensa_emit_call (int, rtx *); ++extern char *xtensa_emit_sibcall (int, rtx *); + extern bool xtensa_tls_referenced_p (rtx); + extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); + +@@ -73,7 +75,7 @@ extern int xtensa_dbx_register_number (int); + extern long compute_frame_size (poly_int64); + extern bool xtensa_use_return_instruction_p (void); + extern void xtensa_expand_prologue (void); +-extern void xtensa_expand_epilogue (void); ++extern void xtensa_expand_epilogue (bool); + extern void order_regs_for_local_alloc (void); + extern enum reg_class xtensa_regno_to_class (int regno); + extern HOST_WIDE_INT xtensa_initial_elimination_offset (int from, int to); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 8deae3d51..a714b980a 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -187,6 +187,7 @@ static bool xtensa_modes_tieable_p (machine_mode, machine_mode); + static HOST_WIDE_INT xtensa_constant_alignment (const_tree, HOST_WIDE_INT); + static HOST_WIDE_INT xtensa_starting_frame_offset (void); + static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); ++static bool xtensa_function_ok_for_sibcall (tree, tree); + + + +@@ -337,6 +338,9 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); + #undef TARGET_HAVE_SPECULATION_SAFE_VALUE + #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed + ++#undef TARGET_FUNCTION_OK_FOR_SIBCALL ++#define TARGET_FUNCTION_OK_FOR_SIBCALL xtensa_function_ok_for_sibcall ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + +@@ -2117,6 +2121,20 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + } + + ++void ++xtensa_prepare_expand_call (int callop, rtx *operands) ++{ ++ rtx addr = XEXP (operands[callop], 0); ++ ++ if (flag_pic && SYMBOL_REF_P (addr) ++ && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) ++ addr = gen_sym_PLT (addr); ++ ++ if (!call_insn_operand (addr, VOIDmode)) ++ XEXP (operands[callop], 0) = copy_to_mode_reg (Pmode, addr); ++} ++ ++ + char * + xtensa_emit_call (int callop, rtx *operands) + { +@@ -2135,6 +2153,24 @@ xtensa_emit_call (int callop, rtx *operands) + } + + ++char * ++xtensa_emit_sibcall (int callop, rtx *operands) ++{ ++ static char result[64]; ++ rtx tgt = operands[callop]; ++ ++ if (GET_CODE (tgt) == CONST_INT) ++ sprintf (result, "j.l\t" HOST_WIDE_INT_PRINT_HEX ", a9", ++ INTVAL (tgt)); ++ else if (register_operand (tgt, VOIDmode)) ++ sprintf (result, "jx\t%%%d", callop); ++ else ++ sprintf (result, "j.l\t%%%d, a9", callop); ++ ++ return result; ++} ++ ++ + bool + xtensa_legitimate_address_p (machine_mode mode, rtx addr, bool strict) + { +@@ -3305,7 +3341,7 @@ xtensa_expand_prologue (void) + } + + void +-xtensa_expand_epilogue (void) ++xtensa_expand_epilogue (bool sibcall_p) + { + if (!TARGET_WINDOWED_ABI) + { +@@ -3339,10 +3375,13 @@ xtensa_expand_epilogue (void) + if (xtensa_call_save_reg(regno)) + { + rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); ++ rtx reg; + + offset -= UNITS_PER_WORD; +- emit_move_insn (gen_rtx_REG (SImode, regno), ++ emit_move_insn (reg = gen_rtx_REG (SImode, regno), + gen_frame_mem (SImode, x)); ++ if (regno == A0_REG && sibcall_p) ++ emit_use (reg); + } + } + +@@ -3377,7 +3416,8 @@ xtensa_expand_epilogue (void) + EH_RETURN_STACKADJ_RTX)); + } + cfun->machine->epilogue_done = true; +- emit_jump_insn (gen_return ()); ++ if (!sibcall_p) ++ emit_jump_insn (gen_return ()); + } + + bool +@@ -4893,4 +4933,15 @@ xtensa_asan_shadow_offset (void) + return HOST_WIDE_INT_UC (0x10000000); + } + ++/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */ ++static bool ++xtensa_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, tree exp ATTRIBUTE_UNUSED) ++{ ++ /* Do not allow sibcalls when windowed registers ABI is in effect. */ ++ if (TARGET_WINDOWED_ABI) ++ return false; ++ ++ return true; ++} ++ + #include "gt-xtensa.h" +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 355fb7742..2a11d1c86 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -25,6 +25,7 @@ + (A7_REG 7) + (A8_REG 8) + (A9_REG 9) ++ (A10_REG 10) + + (UNSPEC_NOP 2) + (UNSPEC_PLT 3) +@@ -2153,18 +2154,13 @@ + (match_operand 1 "" ""))] + "" + { +- rtx addr = XEXP (operands[0], 0); +- if (flag_pic && GET_CODE (addr) == SYMBOL_REF +- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) +- addr = gen_sym_PLT (addr); +- if (!call_insn_operand (addr, VOIDmode)) +- XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr); ++ xtensa_prepare_expand_call (0, operands); + }) + + (define_insn "call_internal" + [(call (mem (match_operand:SI 0 "call_insn_operand" "nir")) + (match_operand 1 "" "i"))] +- "" ++ "!SIBLING_CALL_P (insn)" + { + return xtensa_emit_call (0, operands); + } +@@ -2178,19 +2174,14 @@ + (match_operand 2 "" "")))] + "" + { +- rtx addr = XEXP (operands[1], 0); +- if (flag_pic && GET_CODE (addr) == SYMBOL_REF +- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) +- addr = gen_sym_PLT (addr); +- if (!call_insn_operand (addr, VOIDmode)) +- XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr); ++ xtensa_prepare_expand_call (1, operands); + }) + + (define_insn "call_value_internal" + [(set (match_operand 0 "register_operand" "=a") + (call (mem (match_operand:SI 1 "call_insn_operand" "nir")) + (match_operand 2 "" "i")))] +- "" ++ "!SIBLING_CALL_P (insn)" + { + return xtensa_emit_call (1, operands); + } +@@ -2198,6 +2189,70 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_expand "sibcall" ++ [(call (match_operand 0 "memory_operand" "") ++ (match_operand 1 "" ""))] ++ "!TARGET_WINDOWED_ABI" ++{ ++ xtensa_prepare_expand_call (0, operands); ++}) ++ ++(define_insn "sibcall_internal" ++ [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nir")) ++ (match_operand 1 "" "i"))] ++ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" ++{ ++ return xtensa_emit_sibcall (0, operands); ++} ++ [(set_attr "type" "call") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ++ ++(define_split ++ [(call (mem:SI (match_operand:SI 0 "register_operand")) ++ (match_operand 1 ""))] ++ "reload_completed ++ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) ++ && IN_RANGE (REGNO (operands[0]), 12, 15)" ++ [(set (reg:SI A10_REG) ++ (match_dup 0)) ++ (call (mem:SI (reg:SI A10_REG)) ++ (match_dup 1))]) ++ ++(define_expand "sibcall_value" ++ [(set (match_operand 0 "register_operand" "") ++ (call (match_operand 1 "memory_operand" "") ++ (match_operand 2 "" "")))] ++ "!TARGET_WINDOWED_ABI" ++{ ++ xtensa_prepare_expand_call (1, operands); ++}) ++ ++(define_insn "sibcall_value_internal" ++ [(set (match_operand 0 "register_operand" "=a") ++ (call (mem:SI (match_operand:SI 1 "call_insn_operand" "nir")) ++ (match_operand 2 "" "i")))] ++ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" ++{ ++ return xtensa_emit_sibcall (1, operands); ++} ++ [(set_attr "type" "call") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ++ ++(define_split ++ [(set (match_operand 0 "register_operand") ++ (call (mem:SI (match_operand:SI 1 "register_operand")) ++ (match_operand 2 "")))] ++ "reload_completed ++ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) ++ && IN_RANGE (REGNO (operands[1]), 12, 15)" ++ [(set (reg:SI A10_REG) ++ (match_dup 1)) ++ (set (match_dup 0) ++ (call (mem:SI (reg:SI A10_REG)) ++ (match_dup 2)))]) ++ + (define_insn "entry" + [(set (reg:SI A1_REG) + (unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")] +@@ -2265,7 +2320,15 @@ + [(return)] + "" + { +- xtensa_expand_epilogue (); ++ xtensa_expand_epilogue (false); ++ DONE; ++}) ++ ++(define_expand "sibcall_epilogue" ++ [(return)] ++ "!TARGET_WINDOWED_ABI" ++{ ++ xtensa_expand_epilogue (true); + DONE; + }) + +diff --git a/gcc/testsuite/gcc.target/xtensa/sibcalls.c b/gcc/testsuite/gcc.target/xtensa/sibcalls.c +new file mode 100644 +index 000000000..d2b3fccf1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/sibcalls.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mabi=call0 -foptimize-sibling-calls" } */ ++ ++extern int foo(int); ++extern void bar(int); ++ ++int test_0(int a) { ++ return foo(a); ++} ++ ++void test_1(int a) { ++ bar(a); ++} ++ ++int test_2(int (*a)(void)) { ++ bar(0); ++ return a(); ++} ++ ++/* { dg-final { scan-assembler-not "ret" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0030-Add-some-dedicated-patterns-that-correspond-t.patch b/patches/gcc10.3/gcc-xtensa-0030-Add-some-dedicated-patterns-that-correspond-t.patch new file mode 100644 index 0000000..ad60202 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0030-Add-some-dedicated-patterns-that-correspond-t.patch @@ -0,0 +1,81 @@ +From 16878066a57f917814a8d6fe45f7f7d2eebdbbc0 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:37:54 +0900 +Subject: [PATCH 25/31] xtensa: Add some dedicated patterns that correspond to + GIMPLE canonicalizations + +This patch offers better RTL representations against straightforward +derivations from some tree optimizers' canonicalized forms. + +- rounding up to even, such as '(x + (x & 1))', is canonicalized to + '((x + 1) & -2)', but the former is one instruction less than the latter + in Xtensa ISA. +- signed greater or equal to zero as logical value '((signed)x >= 0)', + is canonicalized to '((unsigned)(x ^ -1) >> 31)', but the equivalent + '(((signed)x >> 31) + 1)' is one instruction less. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*round_up_to_even): + New insn-and-split pattern. + (*signed_ge_zero): Ditto. +--- + gcc/config/xtensa/xtensa.md | 45 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 45 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2a11d1c86..3e8e2e76f 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2709,3 +2709,48 @@ + xtensa_expand_atomic (, operands[0], operands[1], operands[2], true); + DONE; + }) ++ ++(define_insn_and_split "*round_up_to_even" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int 1)) ++ (const_int -2)))] ++ "" ++ "#" ++ "can_create_pseudo_p ()" ++ [(set (match_dup 2) ++ (and:SI (match_dup 1) ++ (const_int 1))) ++ (set (match_dup 0) ++ (plus:SI (match_dup 2) ++ (match_dup 1)))] ++{ ++ operands[2] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 5) ++ (const_int 6)))]) ++ ++(define_insn_and_split "*signed_ge_zero" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ge:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int 0)))] ++ "" ++ "#" ++ "" ++ [(set (match_dup 0) ++ (ashiftrt:SI (match_dup 1) ++ (const_int 31))) ++ (set (match_dup 0) ++ (plus:SI (match_dup 0) ++ (const_int 1)))] ++ "" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 5) ++ (const_int 6)))]) +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0031-Eliminate-unwanted-reg-reg-moves-during-DFmod.patch b/patches/gcc10.3/gcc-xtensa-0031-Eliminate-unwanted-reg-reg-moves-during-DFmod.patch new file mode 100644 index 0000000..28bb494 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0031-Eliminate-unwanted-reg-reg-moves-during-DFmod.patch @@ -0,0 +1,90 @@ +From a0f2dfa2e952111dbd85d2b2f1caaf570facce8a Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:39:49 +0900 +Subject: [PATCH 26/31] xtensa: Eliminate unwanted reg-reg moves during DFmode + input reloads + +When spilled DFmode registers are reloaded in, once loaded into a pair of +SImode regs and then copied from that regs. Such unwanted reg-reg moves +seems not to be eliminated at the "cprop_hardreg" stage, despite no problem +in output reloads. + +Luckily it is easy to resolve such inefficiencies, with the use of peephole2 +pattern. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (reload_operand): + New predicate. + * config/xtensa/xtensa.md: New peephole2 pattern. +--- + gcc/config/xtensa/predicates.md | 13 +++++++++++++ + gcc/config/xtensa/xtensa.md | 31 +++++++++++++++++++++++++++++++ + 2 files changed, 44 insertions(+) + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index 367fc17f3..c1cddb733 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -165,6 +165,19 @@ + (and (match_code "const_int") + (match_test "xtensa_mem_offset (INTVAL (op), SFmode)"))) + ++(define_predicate "reload_operand" ++ (match_code "mem") ++{ ++ const_rtx addr = XEXP (op, 0); ++ if (REG_P (addr)) ++ return REGNO (addr) == A1_REG; ++ if (GET_CODE (addr) == PLUS) ++ return REG_P (XEXP (addr, 0)) ++ && REGNO (XEXP (addr, 0)) == A1_REG ++ && CONST_INT_P (XEXP (addr, 1)); ++ return false; ++}) ++ + (define_predicate "branch_operator" + (match_code "eq,ne,lt,ge")) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 3e8e2e76f..2598c09c9 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2754,3 +2754,34 @@ + (if_then_else (match_test "TARGET_DENSITY") + (const_int 5) + (const_int 6)))]) ++ ++(define_peephole2 ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operand:SI 6 "reload_operand")) ++ (set (match_operand:SI 1 "register_operand") ++ (match_operand:SI 7 "reload_operand")) ++ (set (match_operand:SF 2 "register_operand") ++ (match_operand:SF 4 "register_operand")) ++ (set (match_operand:SF 3 "register_operand") ++ (match_operand:SF 5 "register_operand"))] ++ "REGNO (operands[0]) == REGNO (operands[4]) ++ && REGNO (operands[1]) == REGNO (operands[5]) ++ && peep2_reg_dead_p (4, operands[0]) ++ && peep2_reg_dead_p (4, operands[1])" ++ [(set (match_dup 2) ++ (match_dup 6)) ++ (set (match_dup 3) ++ (match_dup 7))] ++{ ++ uint32_t check = 0; ++ int i; ++ for (i = 0; i <= 3; ++i) ++ { ++ uint32_t mask = (uint32_t)1 << REGNO (operands[i]); ++ if (check & mask) ++ FAIL; ++ check |= mask; ++ } ++ operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0)); ++ operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0)); ++}) +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0032-Eliminate-DS-Cmode-hard-register-clobber-that.patch b/patches/gcc10.3/gcc-xtensa-0032-Eliminate-DS-Cmode-hard-register-clobber-that.patch new file mode 100644 index 0000000..7c4a869 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0032-Eliminate-DS-Cmode-hard-register-clobber-that.patch @@ -0,0 +1,99 @@ +From d6c2b11e9ce88f3b1a7ddcf9a2712b070ad4dbfb Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:53:04 +0900 +Subject: [PATCH 27/31] xtensa: Eliminate [DS]Cmode hard register clobber that + is immediately followed by whole overwrite the register + +RTL expansion of substitution to [DS]Cmode hard register includes obstructive +register clobber. + +A simplest example: + + double _Complex test(double _Complex c) { + return c; + } + +will be converted to: + + (set (reg:DF 42 [ c ]) (reg:DF 2 a2)) + (set (reg:DF 43 [ c+8 ]) (reg:DF 4 a4)) + (clobber (reg:DC 2 a2)) + (set (reg:DF 2 a2) (reg:DF 42 [ c ])) + (set (reg:DF 4 a4) (reg:DF 43 [ c+8 ])) + (use (reg:DC 2 a2)) + (return) + +and then finally: + + test: + mov a8, a2 + mov a9, a3 + mov a6, a4 + mov a7, a5 + mov a2, a8 + mov a3, a9 + mov a4, a6 + mov a5, a7 + ret + +As you see, it is so ridiculous. + +This patch eliminates such clobber in order to prune away the wasted move +instructions by the optimizer: + + test: + ret + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (DSC): New split pattern and mode iterator. +--- + gcc/config/xtensa/xtensa.md | 28 ++++++++++++++++++++++++++++ + 1 file changed, 28 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2598c09c9..124548dfe 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -87,6 +87,10 @@ + ;; This code iterator is for *shlrd and its variants. + (define_code_iterator ior_op [ior plus]) + ++;; This mode iterator allows the DC and SC patterns to be defined from ++;; the same template. ++(define_mode_iterator DSC [DC SC]) ++ + + ;; Attributes. + +@@ -2785,3 +2789,27 @@ + operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0)); + operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0)); + }) ++ ++(define_split ++ [(clobber (match_operand:DSC 0 "register_operand"))] ++ "GP_REG_P (REGNO (operands[0]))" ++ [(const_int 0)] ++{ ++ unsigned int regno = REGNO (operands[0]); ++ machine_mode inner_mode = GET_MODE_INNER (mode); ++ rtx_insn *insn; ++ rtx x; ++ if (! ((insn = next_nonnote_nondebug_insn (curr_insn)) ++ && NONJUMP_INSN_P (insn) ++ && GET_CODE (x = PATTERN (insn)) == SET ++ && REG_P (x = XEXP (x, 0)) ++ && GET_MODE (x) == inner_mode ++ && REGNO (x) == regno ++ && (insn = next_nonnote_nondebug_insn (insn)) ++ && NONJUMP_INSN_P (insn) ++ && GET_CODE (x = PATTERN (insn)) == SET ++ && REG_P (x = XEXP (x, 0)) ++ && GET_MODE (x) == inner_mode ++ && REGNO (x) == regno + REG_NREGS (operands[0]) / 2)) ++ FAIL; ++}) +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0033-Defer-storing-integer-constants-into-litpool-.patch b/patches/gcc10.3/gcc-xtensa-0033-Defer-storing-integer-constants-into-litpool-.patch new file mode 100644 index 0000000..6007b49 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0033-Defer-storing-integer-constants-into-litpool-.patch @@ -0,0 +1,111 @@ +From e37c151ca3beacb7f4f116a94c9c80223b0c6fbf Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 17 Jun 2022 22:47:49 +0900 +Subject: [PATCH 28/31] xtensa: Defer storing integer constants into litpool + until reload + +Storing integer constants into litpool in the early stage of compilation +hinders some integer optimizations. In fact, such integer constants are +not subject to the constant folding process. + +For example: + + extern unsigned short value; + extern void foo(void); + void test(void) { + if (value == 30001) + foo(); + } + + .literal_position + .literal .LC0, value + .literal .LC1, 30001 + test: + l32r a3, .LC0 + l32r a2, .LC1 + l16ui a3, a3, 0 + extui a2, a2, 0, 16 // runtime zero-extension despite constant + bne a3, a2, .L1 + j.l foo, a9 + .L1: + ret.n + +This patch defers the placement of integer constants into litpool until +the start of reload: + + .literal_position + .literal .LC0, value + .literal .LC1, 30001 + test: + l32r a3, .LC0 + l32r a2, .LC1 + l16ui a3, a3, 0 + bne a3, a2, .L1 + j.l foo, a9 + .L1: + ret.n + +gcc/ChangeLog: + + * config/xtensa/constraints.md (Y): + Change to include integer constants until reload begins. + * config/xtensa/predicates.md (move_operand): Ditto. + * config/xtensa/xtensa.c (xtensa_emit_move_sequence): + Change to allow storing integer constants into litpool only after + reload begins. +--- + gcc/config/xtensa/constraints.md | 6 ++++-- + gcc/config/xtensa/predicates.md | 5 +++-- + gcc/config/xtensa/xtensa.c | 3 ++- + 3 files changed, 9 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md +index 9a8caab4f..13b3daafc 100644 +--- a/gcc/config/xtensa/constraints.md ++++ b/gcc/config/xtensa/constraints.md +@@ -113,8 +113,10 @@ + + (define_constraint "Y" + "A constant that can be used in relaxed MOVI instructions." +- (and (match_code "const_int,const_double,const,symbol_ref,label_ref") +- (match_test "TARGET_AUTO_LITPOOLS"))) ++ (ior (and (match_code "const_int,const_double,const,symbol_ref,label_ref") ++ (match_test "TARGET_AUTO_LITPOOLS")) ++ (and (match_code "const_int") ++ (match_test "can_create_pseudo_p ()")))) + + ;; Memory constraints. Do not use define_memory_constraint here. Doing so + ;; causes reload to force some constants into the constant pool, but since +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index c1cddb733..633cc6264 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -147,8 +147,9 @@ + (match_test "!constantpool_mem_p (op) + || GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0"))) + (ior (and (match_code "const_int") +- (match_test "GET_MODE_CLASS (mode) == MODE_INT +- && xtensa_simm12b (INTVAL (op))")) ++ (match_test "(GET_MODE_CLASS (mode) == MODE_INT ++ && xtensa_simm12b (INTVAL (op))) ++ || can_create_pseudo_p ()")) + (and (match_code "const_int,const_double,const,symbol_ref,label_ref") + (match_test "(TARGET_CONST16 || TARGET_AUTO_LITPOOLS) + && CONSTANT_P (op) +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index a714b980a..1d64e2c76 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1173,7 +1173,8 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) + return 1; + } + +- if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16) ++ if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16 ++ && ! (CONST_INT_P (src) && can_create_pseudo_p ())) + { + src = force_const_mem (SImode, src); + operands[1] = src; +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0034-Apply-a-few-minor-fixes.patch b/patches/gcc10.3/gcc-xtensa-0034-Apply-a-few-minor-fixes.patch new file mode 100644 index 0000000..5ecac42 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0034-Apply-a-few-minor-fixes.patch @@ -0,0 +1,129 @@ +From dfaefed18297218392071039325baabac59d5c43 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 19 Jun 2022 22:32:45 +0900 +Subject: [PATCH 29/31] xtensa: Apply a few minor fixes + +No functional changes. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_emit_move_sequence): + Use can_create_pseudo_p(), instead of using individual + reload_in_progress and reload_completed. + (xtensa_expand_block_set_small_loop): Use xtensa_simm8x256(), + the existing predicate function. + (xtensa_is_insn_L32R_p, gen_int_relational, xtensa_emit_sibcall): + Use the standard RTX code predicate macros such as MEM_P, + SYMBOL_REF_P and/or CONST_INT_P. + * config/xtensa/xtensa.md: Avoid using numeric literals to determine + if callee-saved register, at the split patterns for indirect sibcall + fixups. +--- + gcc/config/xtensa/xtensa.c | 16 ++++++++-------- + gcc/config/xtensa/xtensa.md | 8 ++++---- + 2 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 1d64e2c76..595c5f96f 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -743,7 +743,7 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + } + + /* See if we need to invert the result. */ +- invert = ((GET_CODE (cmp1) == CONST_INT) ++ invert = (CONST_INT_P (cmp1) + ? p_info->invert_const + : p_info->invert_reg); + +@@ -1200,7 +1200,7 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) + } + } + +- if (!(reload_in_progress | reload_completed) ++ if (can_create_pseudo_p () + && !xtensa_valid_move (mode, operands)) + operands[1] = force_reg (mode, operands[1]); + +@@ -1603,7 +1603,7 @@ xtensa_expand_block_set_small_loop (rtx *operands) + thus limited to only offset to the end address for ADDI/ADDMI + instruction. */ + if (align == 4 +- && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) ++ && ! (bytes <= 127 || xtensa_simm8x256 (bytes))) + return 0; + + /* If no 4-byte aligned, loop count should be treated as the +@@ -2160,7 +2160,7 @@ xtensa_emit_sibcall (int callop, rtx *operands) + static char result[64]; + rtx tgt = operands[callop]; + +- if (GET_CODE (tgt) == CONST_INT) ++ if (CONST_INT_P (tgt)) + sprintf (result, "j.l\t" HOST_WIDE_INT_PRINT_HEX ", a9", + INTVAL (tgt)); + else if (register_operand (tgt, VOIDmode)) +@@ -4318,17 +4318,17 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + } + + static bool +-xtensa_is_insn_L32R_p(const rtx_insn *insn) ++xtensa_is_insn_L32R_p (const rtx_insn *insn) + { + rtx x = PATTERN (insn); + + if (GET_CODE (x) == SET) + { +- x = XEXP (x, 1); +- if (GET_CODE (x) == MEM) ++ x = SET_SRC (x); ++ if (MEM_P (x)) + { + x = XEXP (x, 0); +- return (GET_CODE (x) == SYMBOL_REF || CONST_INT_P (x)) ++ return (SYMBOL_REF_P (x) || CONST_INT_P (x)) + && CONSTANT_POOL_ADDRESS_P (x); + } + } +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 124548dfe..6f51a5357 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1251,14 +1251,14 @@ + int i = 0; + rtx x = XEXP (operands[1], 0); + long l[2]; +- if (GET_CODE (x) == SYMBOL_REF ++ if (SYMBOL_REF_P (x) + && CONSTANT_POOL_ADDRESS_P (x)) + x = get_pool_constant (x); + else if (GET_CODE (x) == CONST) + { + x = XEXP (x, 0); + gcc_assert (GET_CODE (x) == PLUS +- && GET_CODE (XEXP (x, 0)) == SYMBOL_REF ++ && SYMBOL_REF_P (XEXP (x, 0)) + && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) + && CONST_INT_P (XEXP (x, 1))); + i = INTVAL (XEXP (x, 1)); +@@ -2217,7 +2217,7 @@ + (match_operand 1 ""))] + "reload_completed + && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) +- && IN_RANGE (REGNO (operands[0]), 12, 15)" ++ && ! call_used_or_fixed_reg_p (REGNO (operands[0]))" + [(set (reg:SI A10_REG) + (match_dup 0)) + (call (mem:SI (reg:SI A10_REG)) +@@ -2250,7 +2250,7 @@ + (match_operand 2 "")))] + "reload_completed + && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) +- && IN_RANGE (REGNO (operands[1]), 12, 15)" ++ && ! call_used_or_fixed_reg_p (REGNO (operands[1]))" + [(set (reg:SI A10_REG) + (match_dup 1)) + (set (match_dup 0) +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0035-Fix-RTL-insn-cost-estimation-about-relaxed-MO.patch b/patches/gcc10.3/gcc-xtensa-0035-Fix-RTL-insn-cost-estimation-about-relaxed-MO.patch new file mode 100644 index 0000000..d65c44d --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0035-Fix-RTL-insn-cost-estimation-about-relaxed-MO.patch @@ -0,0 +1,56 @@ +From 48c657f23a61a41a46842b25bce4f287a56223a2 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Mon, 20 Jun 2022 01:56:16 +0900 +Subject: [PATCH 30/31] xtensa: Fix RTL insn cost estimation about relaxed MOVI + instructions + +These instructions will all be converted to L32R ones with litpool entries +by the assembler. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_is_insn_L32R_p): + Consider relaxed MOVI instructions as L32R. +--- + gcc/config/xtensa/xtensa.c | 22 ++++++++++++++-------- + 1 file changed, 14 insertions(+), 8 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 595c5f96f..b92ec9caa 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4322,17 +4322,23 @@ xtensa_is_insn_L32R_p (const rtx_insn *insn) + { + rtx x = PATTERN (insn); + +- if (GET_CODE (x) == SET) ++ if (GET_CODE (x) != SET) ++ return false; ++ ++ x = XEXP (x, 1); ++ if (MEM_P (x)) + { +- x = SET_SRC (x); +- if (MEM_P (x)) +- { +- x = XEXP (x, 0); +- return (SYMBOL_REF_P (x) || CONST_INT_P (x)) +- && CONSTANT_POOL_ADDRESS_P (x); +- } ++ x = XEXP (x, 0); ++ return (SYMBOL_REF_P (x) || CONST_INT_P (x)) ++ && CONSTANT_POOL_ADDRESS_P (x); + } + ++ /* relaxed MOVI instructions, that will be converted to L32R by the ++ assembler. */ ++ if (CONST_INT_P (x) ++ && ! xtensa_simm12b (INTVAL (x))) ++ return true; ++ + return false; + } + +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0036-Fix-buffer-overflow.patch b/patches/gcc10.3/gcc-xtensa-0036-Fix-buffer-overflow.patch new file mode 100644 index 0000000..35f9f10 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0036-Fix-buffer-overflow.patch @@ -0,0 +1,33 @@ +From 75c341c7de5c6f325d6ded7bd91d77793fe358d5 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 22 Jun 2022 04:04:45 +0900 +Subject: [PATCH 31/31] xtensa: Fix buffer overflow + +Fortify buffer overflow message reported. +(see https://github.com/earlephilhower/esp-quick-toolchain/issues/36) + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (bswapsi2_internal): + Enlarge the buffer that is obviously smaller than the template + string given to sprintf(). +--- + gcc/config/xtensa/xtensa.md | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 6f51a5357..81b016859 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -536,7 +536,7 @@ + { + rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn); + const char *init = "ssai\t8\;"; +- static char result[64]; ++ static char result[128]; + if (prev_insn && NONJUMP_INSN_P (prev_insn)) + { + rtx x = PATTERN (prev_insn); +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0037-Optimize-integer-constant-addition-that-is-be.patch b/patches/gcc10.3/gcc-xtensa-0037-Optimize-integer-constant-addition-that-is-be.patch new file mode 100644 index 0000000..0ea6d48 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0037-Optimize-integer-constant-addition-that-is-be.patch @@ -0,0 +1,95 @@ +From 9308911796a46bd689bbcc1cedef1b63ae9b871e Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 26 Jun 2022 14:07:56 +0900 +Subject: [PATCH] xtensa: Optimize integer constant addition that is + between -32896 and 32639 + +Such constants are often subject to the constant synthesis: + + int test(int a) { + return a - 31999; + } + + test: + movi a3, 1 + addmi a3, a3, -0x7d00 + add a2, a2, a3 + ret + +This patch optimizes such case as follows: + + test: + addi a2, a2, 1 + addmi a2, a2, -0x7d00 + ret + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: + Suppress unnecessary emitting nop insn in the split patterns for + integer/FP constant synthesis, and add new peephole2 pattern that + folds such synthesized additions. +--- + gcc/config/xtensa/xtensa.md | 35 +++++++++++++++++++++++++++++++++++ + 1 file changed, 35 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 81b016859..b697e16db 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1036,6 +1036,7 @@ + FAIL; + if (! xtensa_constantsynth (operands[0], INTVAL (x))) + emit_move_insn (operands[0], x); ++ DONE; + }) + + ;; 16-bit Integer moves +@@ -1277,6 +1278,7 @@ + x = gen_rtx_REG (SImode, REGNO (operands[0])); + if (! xtensa_constantsynth (x, l[i])) + emit_move_insn (x, GEN_INT (l[i])); ++ DONE; + }) + + ;; 64-bit floating point moves +@@ -2813,3 +2815,36 @@ + && REGNO (x) == regno + REG_NREGS (operands[0]) / 2)) + FAIL; + }) ++ ++(define_peephole2 ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operand:SI 1 "const_int_operand")) ++ (set (match_dup 0) ++ (plus:SI (match_dup 0) ++ (match_operand:SI 2 "const_int_operand"))) ++ (set (match_operand:SI 3 "register_operand") ++ (plus:SI (match_operand:SI 4 "register_operand") ++ (match_dup 0)))] ++ "IN_RANGE (INTVAL (operands[1]) + INTVAL (operands[2]), ++ (-128 - 32768), (127 + 32512)) ++ && REGNO (operands[0]) != REGNO (operands[3]) ++ && REGNO (operands[0]) != REGNO (operands[4]) ++ && peep2_reg_dead_p (3, operands[0])" ++ [(set (match_dup 3) ++ (plus:SI (match_dup 4) ++ (match_dup 1))) ++ (set (match_dup 3) ++ (plus:SI (match_dup 3) ++ (match_dup 2)))] ++{ ++ HOST_WIDE_INT value = INTVAL (operands[1]) + INTVAL (operands[2]); ++ int imm0, imm1; ++ value += 128; ++ if (value > 32512) ++ imm1 = 32512; ++ else ++ imm1 = value & ~255; ++ imm0 = value - imm1 - 128; ++ operands[1] = GEN_INT (imm0); ++ operands[2] = GEN_INT (imm1); ++}) +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0038-Minor-fix-for-FP-constant-synthesis.patch b/patches/gcc10.3/gcc-xtensa-0038-Minor-fix-for-FP-constant-synthesis.patch new file mode 100644 index 0000000..8fc23d8 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0038-Minor-fix-for-FP-constant-synthesis.patch @@ -0,0 +1,92 @@ +From 7bed998154345cb072cd425b5d61734d3e0bac5d Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 1 Jul 2022 13:39:34 +0900 +Subject: [PATCH] xtensa: Minor fix for FP constant synthesis + +This patch fixes an non-fatal issue about negative constant values derived +from FP constant synthesis on hosts whose 'long' is wider than 'int32_t'. + +And also replaces the dedicated code in FP constant synthesis split +pattern with the appropriate existing function call. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: + In FP constant synthesis split pattern, subcontract to + avoid_constant_pool_reference() as in the case of integer, + because it can handle well too. And cast to int32_t before + calling xtensa_constantsynth() in order to ignore upper 32-bit. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/constsynth_double.c: + Modify in order to catch the issue. +--- + gcc/config/xtensa/xtensa.md | 35 +++++-------------- + .../gcc.target/xtensa/constsynth_double.c | 2 +- + 2 files changed, 9 insertions(+), 28 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index b697e16db..6ef84b4f2 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1249,35 +1249,16 @@ + "! optimize_debug && reload_completed" + [(const_int 0)] + { +- int i = 0; +- rtx x = XEXP (operands[1], 0); +- long l[2]; +- if (SYMBOL_REF_P (x) +- && CONSTANT_POOL_ADDRESS_P (x)) +- x = get_pool_constant (x); +- else if (GET_CODE (x) == CONST) +- { +- x = XEXP (x, 0); +- gcc_assert (GET_CODE (x) == PLUS +- && SYMBOL_REF_P (XEXP (x, 0)) +- && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) +- && CONST_INT_P (XEXP (x, 1))); +- i = INTVAL (XEXP (x, 1)); +- gcc_assert (i == 0 || i == 4); +- i /= 4; +- x = get_pool_constant (XEXP (x, 0)); +- } +- else +- gcc_unreachable (); +- if (GET_MODE (x) == SFmode) +- REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l[0]); +- else if (GET_MODE (x) == DFmode) +- REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); +- else ++ rtx x = avoid_constant_pool_reference (operands[1]); ++ long l; ++ HOST_WIDE_INT value; ++ if (! CONST_DOUBLE_P (x) || GET_MODE (x) != SFmode) + FAIL; ++ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); + x = gen_rtx_REG (SImode, REGNO (operands[0])); +- if (! xtensa_constantsynth (x, l[i])) +- emit_move_insn (x, GEN_INT (l[i])); ++ value = (int32_t)l; ++ if (! xtensa_constantsynth (x, value)) ++ emit_move_insn (x, GEN_INT (value)); + DONE; + }) + +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +index 890ca5047..5fba6a986 100644 +--- a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +@@ -5,7 +5,7 @@ void test(unsigned int count, double array[]) + { + unsigned int i; + for (i = 0; i < count; ++i) +- array[i] = 1.0; ++ array[i] = 8.988474246316506e+307; + } + + /* { dg-final { scan-assembler-not "l32r" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0041-constantsynth-Make-try-to-find-shorter-instru.patch b/patches/gcc10.3/gcc-xtensa-0041-constantsynth-Make-try-to-find-shorter-instru.patch new file mode 100644 index 0000000..fcb3c72 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0041-constantsynth-Make-try-to-find-shorter-instru.patch @@ -0,0 +1,132 @@ +From afcf727f9c4174b104b594cbd14cba9c57de71d1 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 15 Jul 2022 08:46:55 +0900 +Subject: [PATCH] xtensa: constantsynth: Make try to find shorter + instruction + +This patch allows the constant synthesis to choose shorter instruction +if possible. + + /* example */ + int test(void) { + return 128 << 8; + } + + ;; before + test: + movi a2, 0x100 + addmi a2, a2, 0x7f00 + ret.n + + ;; after + test: + movi.n a2, 1 + slli a2, a2, 15 + ret.n + +When the Code Density Option is configured, the latter is one byte smaller +than the former. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_emit_constantsynth): Remove. + (xtensa_constantsynth_2insn): Change to try all three synthetic + methods and to use the one that fits the immediate value of + the seed into a Narrow Move Immediate instruction "MOVI.N" + when the Code Density Option is configured. +--- + gcc/config/xtensa/xtensa.c | 58 +++++++++++++++++++------------------- + 1 file changed, 29 insertions(+), 29 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index b92ec9caa..a5330e52b 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1026,35 +1026,35 @@ xtensa_split_operand_pair (rtx operands[4], machine_mode mode) + load-immediate / arithmetic ones, instead of a L32R instruction + (plus a constant in litpool). */ + +-static void +-xtensa_emit_constantsynth (rtx dst, enum rtx_code code, +- HOST_WIDE_INT imm0, HOST_WIDE_INT imm1, +- rtx (*gen_op)(rtx, HOST_WIDE_INT), +- HOST_WIDE_INT imm2) +-{ +- gcc_assert (REG_P (dst)); +- emit_move_insn (dst, GEN_INT (imm0)); +- emit_move_insn (dst, gen_rtx_fmt_ee (code, SImode, +- dst, GEN_INT (imm1))); +- if (gen_op) +- emit_move_insn (dst, gen_op (dst, imm2)); +-} +- + static int + xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, + rtx (*gen_op)(rtx, HOST_WIDE_INT), + HOST_WIDE_INT op_imm) + { +- int shift = exact_log2 (srcval + 1); ++ HOST_WIDE_INT imm = INT_MAX; ++ rtx x = NULL_RTX; ++ int shift; + ++ gcc_assert (REG_P (dst)); ++ ++ shift = exact_log2 (srcval + 1); + if (IN_RANGE (shift, 1, 31)) + { +- xtensa_emit_constantsynth (dst, LSHIFTRT, -1, 32 - shift, +- gen_op, op_imm); +- return 1; ++ imm = -1; ++ x = gen_lshrsi3 (dst, dst, GEN_INT (32 - shift)); + } + +- if (IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) ++ ++ shift = ctz_hwi (srcval); ++ if ((!x || (TARGET_DENSITY && ! IN_RANGE (imm, -32, 95))) ++ && xtensa_simm12b (srcval >> shift)) ++ { ++ imm = srcval >> shift; ++ x = gen_ashlsi3 (dst, dst, GEN_INT (shift)); ++ } ++ ++ if ((!x || (TARGET_DENSITY && ! IN_RANGE (imm, -32, 95))) ++ && IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) + { + HOST_WIDE_INT imm0, imm1; + +@@ -1067,19 +1067,19 @@ xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, + imm0 = srcval - imm1; + if (TARGET_DENSITY && imm1 < 32512 && IN_RANGE (imm0, 224, 255)) + imm0 -= 256, imm1 += 256; +- xtensa_emit_constantsynth (dst, PLUS, imm0, imm1, gen_op, op_imm); +- return 1; ++ imm = imm0; ++ x = gen_addsi3 (dst, dst, GEN_INT (imm1)); + } + +- shift = ctz_hwi (srcval); +- if (xtensa_simm12b (srcval >> shift)) +- { +- xtensa_emit_constantsynth (dst, ASHIFT, srcval >> shift, shift, +- gen_op, op_imm); +- return 1; +- } ++ if (!x) ++ return 0; + +- return 0; ++ emit_move_insn (dst, GEN_INT (imm)); ++ emit_insn (x); ++ if (gen_op) ++ emit_move_insn (dst, gen_op (dst, op_imm)); ++ ++ return 1; + } + + static rtx +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0042-Optimize-bitwise-AND-with-imm1-followed-by-br.patch b/patches/gcc10.3/gcc-xtensa-0042-Optimize-bitwise-AND-with-imm1-followed-by-br.patch new file mode 100644 index 0000000..acf6d99 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0042-Optimize-bitwise-AND-with-imm1-followed-by-br.patch @@ -0,0 +1,177 @@ +From 5776497b68fcce6bf31835cf0a4d693e336bb2ca Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 14 Jul 2022 20:47:46 +0900 +Subject: [PATCH] xtensa: Optimize "bitwise AND with imm1" followed by + "branch if (not) equal to imm2" + +This patch enhances the effectiveness of the previously posted one: +"xtensa: Optimize bitwise AND operation with some specific forms of constants". + + /* example */ + extern void foo(int); + void test(int a) { + if ((a & (-1U << 8)) == (128 << 8)) /* 0 or one of "b4const" */ + foo(a); + } + + ;; before + .global test + test: + movi a3, -0x100 + movi.n a4, 1 + and a3, a2, a3 + slli a4, a4, 15 + bne a3, a4, .L3 + j.l foo, a9 + .L1: + ret.n + + ;; after + .global test + test: + srli a3, a2, 8 + bnei a3, 128, .L1 + j.l foo, a9 + .L1: + ret.n + +gcc/ChangeLog: + + * config/xtensa/xtensa.md + (*masktrue_const_pow2_minus_one, *masktrue_const_negative_pow2, + *masktrue_const_shifted_mask): If the immediate for bitwise AND is + represented as '-(1 << N)', decrease the lower bound of N from 12 + to 1. And the other immediate for conditional branch is now no + longer limited to zero, but also one of some positive integers. + Finally, remove the checks of some conditions, because the comparison + expressions that don't satisfy such checks are determined as + compile-time constants and thus will be optimized away before + RTL expansion. +--- + gcc/config/xtensa/xtensa.md | 73 ++++++++++++++++++++++--------------- + 1 file changed, 44 insertions(+), 29 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 6ef84b4f2..ca8b3913d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1721,63 +1721,78 @@ + + (define_insn_and_split "*masktrue_const_pow2_minus_one" + [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" ++ (if_then_else (match_operator 4 "boolean_operator" + [(and:SI (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "const_int_operand" "i")) +- (const_int 0)]) +- (label_ref (match_operand 2 "" "")) ++ (match_operand:SI 2 "const_int_operand" "i")]) ++ (label_ref (match_operand 3 "" "")) + (pc)))] +- "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31)" ++ "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31) ++ /* && (~INTVAL (operands[1]) & INTVAL (operands[2])) == 0 // can be omitted */ ++ && xtensa_b4const_or_zero (INTVAL (operands[2]) << (32 - floor_log2 (INTVAL (operands[1]) + 1)))" + "#" + "&& can_create_pseudo_p ()" +- [(set (match_dup 4) ++ [(set (match_dup 5) + (ashift:SI (match_dup 0) + (match_dup 1))) + (set (pc) +- (if_then_else (match_op_dup 3 +- [(match_dup 4) +- (const_int 0)]) +- (label_ref (match_dup 2)) ++ (if_then_else (match_op_dup 4 ++ [(match_dup 5) ++ (match_dup 2)]) ++ (label_ref (match_dup 3)) + (pc)))] + { +- operands[1] = GEN_INT (32 - floor_log2 (INTVAL (operands[1]) + 1)); +- operands[4] = gen_reg_rtx (SImode); ++ int shift = 32 - floor_log2 (INTVAL (operands[1]) + 1); ++ operands[1] = GEN_INT (shift); ++ operands[2] = GEN_INT (INTVAL (operands[2]) << shift); ++ operands[5] = gen_reg_rtx (SImode); + } + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set (attr "length") +- (if_then_else (match_test "TARGET_DENSITY +- && INTVAL (operands[1]) == 0x7FFFFFFF") +- (const_int 5) +- (const_int 6)))]) ++ (if_then_else (match_test "(TARGET_DENSITY && INTVAL (operands[1]) == 0x7FFFFFFF) ++ && INTVAL (operands[2]) == 0") ++ (const_int 4) ++ (if_then_else (match_test "TARGET_DENSITY ++ && (INTVAL (operands[1]) == 0x7FFFFFFF ++ || INTVAL (operands[2]) == 0)") ++ (const_int 5) ++ (const_int 6))))]) + + (define_insn_and_split "*masktrue_const_negative_pow2" + [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" ++ (if_then_else (match_operator 4 "boolean_operator" + [(and:SI (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "const_int_operand" "i")) +- (const_int 0)]) +- (label_ref (match_operand 2 "" "")) ++ (match_operand:SI 2 "const_int_operand" "i")]) ++ (label_ref (match_operand 3 "" "")) + (pc)))] +- "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 12, 30)" ++ "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 1, 30) ++ /* && (~INTVAL (operands[1]) & INTVAL (operands[2])) == 0 // can be omitted */ ++ && xtensa_b4const_or_zero (INTVAL (operands[2]) >> floor_log2 (-INTVAL (operands[1])))" + "#" + "&& can_create_pseudo_p ()" +- [(set (match_dup 4) ++ [(set (match_dup 5) + (lshiftrt:SI (match_dup 0) + (match_dup 1))) + (set (pc) +- (if_then_else (match_op_dup 3 +- [(match_dup 4) +- (const_int 0)]) +- (label_ref (match_dup 2)) ++ (if_then_else (match_op_dup 4 ++ [(match_dup 5) ++ (match_dup 2)]) ++ (label_ref (match_dup 3)) + (pc)))] + { +- operands[1] = GEN_INT (floor_log2 (-INTVAL (operands[1]))); +- operands[4] = gen_reg_rtx (SImode); ++ int shift = floor_log2 (-INTVAL (operands[1])); ++ operands[1] = GEN_INT (shift); ++ operands[2] = GEN_INT (INTVAL (operands[2]) >> shift); ++ operands[5] = gen_reg_rtx (SImode); + } + [(set_attr "type" "jump") + (set_attr "mode" "none") +- (set_attr "length" "6")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY && INTVAL (operands[2]) == 0") ++ (const_int 5) ++ (const_int 6)))]) + + (define_insn_and_split "*masktrue_const_shifted_mask" + [(set (pc) +@@ -1787,8 +1802,8 @@ + (match_operand:SI 2 "const_int_operand" "i")]) + (label_ref (match_operand 3 "" "")) + (pc)))] +- "(INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 +- && xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" ++ "/* (INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 // can be omitted ++ && */ xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" + "#" + "&& can_create_pseudo_p ()" + [(set (match_dup 6) +-- +2.20.1 + diff --git a/patches/gcc11.1/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch b/patches/gcc11.1/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch deleted file mode 100644 index 336b961..0000000 --- a/patches/gcc11.1/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch +++ /dev/null @@ -1,29 +0,0 @@ -From f1568d0597ffd3027eebefc2cf31646ab5d5ca19 Mon Sep 17 00:00:00 2001 -From: Takayuki 'January June' Suwa -Date: Sun, 19 Dec 2021 22:44:03 +0900 -Subject: [PATCH] gcc: xtensa: make trying to replace 'l32r' with 'movi' + - 'slli' regardless of optimizing for size or not, because 'l32r' is much - slower than the latter on ESP8266 - ---- - gcc/config/xtensa/xtensa.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c -index 37c6ac1fd..6cd9d5528 100644 ---- a/gcc/config/xtensa/xtensa.c -+++ b/gcc/config/xtensa/xtensa.c -@@ -1074,8 +1074,8 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) - { - /* Try to emit MOVI + SLLI sequence, that is smaller - than L32R + literal. */ -- if (optimize_size && mode == SImode && CONST_INT_P (src) -- && register_operand (dst, mode)) -+ if (optimize >= 1 && ! optimize_debug && mode == SImode -+ && CONST_INT_P (src) && register_operand (dst, mode)) - { - HOST_WIDE_INT srcval = INTVAL (src); - int shift = ctz_hwi (srcval); --- -2.20.1 -