diff --git a/patches/gcc10.1/gcc-Improve-initialization-of-objects-when-the-initializ.patch b/patches/gcc10.1/gcc-Improve-initialization-of-objects-when-the-initializ.patch new file mode 100644 index 0000000..00fdb45 --- /dev/null +++ b/patches/gcc10.1/gcc-Improve-initialization-of-objects-when-the-initializ.patch @@ -0,0 +1,39 @@ +From a2cde0c6443c440c2a2b72b5eea060229a0cff57 Mon Sep 17 00:00:00 2001 +From: Jeff Law +Date: Sat, 9 Jul 2022 11:11:00 -0400 +Subject: [PATCH] [RFA] Improve initialization of objects when the initializer + +gcc/ + + * expr.c (store_expr): Identify trailing NULs in a STRING_CST + initializer and use clear_storage rather than copying the + NULs to the destination array. +--- + gcc/expr.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/gcc/expr.c b/gcc/expr.c +index 991b26f33..6ff393462 100644 +--- a/gcc/expr.c ++++ b/gcc/expr.c +@@ -5723,6 +5723,17 @@ store_expr (tree exp, rtx target, int call_param_p, + } + + str_copy_len = TREE_STRING_LENGTH (str); ++ ++ /* Trailing NUL bytes in EXP will be handled by the call to ++ clear_storage, which is more efficient than copying them from ++ the STRING_CST, so trim those from STR_COPY_LEN. */ ++ while (str_copy_len) ++ { ++ if (TREE_STRING_POINTER (str)[str_copy_len - 1]) ++ break; ++ str_copy_len--; ++ } ++ + if ((STORE_MAX_PIECES & (STORE_MAX_PIECES - 1)) == 0) + { + str_copy_len += STORE_MAX_PIECES - 1; +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0006-Fix-non-robust-split-condition-in-define_insn.patch b/patches/gcc10.1/gcc-xtensa-0006-Fix-non-robust-split-condition-in-define_insn.patch new file mode 100644 index 0000000..4c5418f --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0006-Fix-non-robust-split-condition-in-define_insn.patch @@ -0,0 +1,44 @@ +From 2065a3fccb11e28ebcc42aa46c52a40b0fae9bea Mon Sep 17 00:00:00 2001 +From: Kewen Lin +Date: Sun, 21 Nov 2021 20:18:31 -0600 +Subject: [PATCH 01/31] xtensa: Fix non-robust split condition in + define_insn_and_split + +This patch is to fix some non-robust split conditions in some +define_insn_and_splits, to make each of them applied on top of +the corresponding condition for define_insn part, otherwise the +splitting could perform unexpectedly. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (movdi_internal, movdf_internal): Fix split + condition. +--- + gcc/config/xtensa/xtensa.md | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2a8e59ee9..123916957 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -782,7 +782,7 @@ + "register_operand (operands[0], DImode) + || register_operand (operands[1], DImode)" + "#" +- "reload_completed" ++ "&& reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))] + { +@@ -1058,7 +1058,7 @@ + "register_operand (operands[0], DFmode) + || register_operand (operands[1], DFmode)" + "#" +- "reload_completed" ++ "&& reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))] + { +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch b/patches/gcc10.1/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch deleted file mode 100644 index 336b961..0000000 --- a/patches/gcc10.1/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch +++ /dev/null @@ -1,29 +0,0 @@ -From f1568d0597ffd3027eebefc2cf31646ab5d5ca19 Mon Sep 17 00:00:00 2001 -From: Takayuki 'January June' Suwa -Date: Sun, 19 Dec 2021 22:44:03 +0900 -Subject: [PATCH] gcc: xtensa: make trying to replace 'l32r' with 'movi' + - 'slli' regardless of optimizing for size or not, because 'l32r' is much - slower than the latter on ESP8266 - ---- - gcc/config/xtensa/xtensa.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c -index 37c6ac1fd..6cd9d5528 100644 ---- a/gcc/config/xtensa/xtensa.c -+++ b/gcc/config/xtensa/xtensa.c -@@ -1074,8 +1074,8 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) - { - /* Try to emit MOVI + SLLI sequence, that is smaller - than L32R + literal. */ -- if (optimize_size && mode == SImode && CONST_INT_P (src) -- && register_operand (dst, mode)) -+ if (optimize >= 1 && ! optimize_debug && mode == SImode -+ && CONST_INT_P (src) && register_operand (dst, mode)) - { - HOST_WIDE_INT srcval = INTVAL (src); - int shift = ctz_hwi (srcval); --- -2.20.1 - diff --git a/patches/gcc10.1/gcc-xtensa-0007-Backport-patches-from-upstream-master.patch b/patches/gcc10.1/gcc-xtensa-0007-Backport-patches-from-upstream-master.patch deleted file mode 100644 index eb06969..0000000 --- a/patches/gcc10.1/gcc-xtensa-0007-Backport-patches-from-upstream-master.patch +++ /dev/null @@ -1,3186 +0,0 @@ -From 989fc2c516206d7cf70177a416815f91998e2131 Mon Sep 17 00:00:00 2001 -From: Takayuki 'January June' Suwa -Date: Fri, 27 May 2022 21:34:37 +0900 -Subject: [PATCH 1/3] xtensa: Backport patches from upstream/master - -2b5b8610e985e23a0c2e0272339ab074a750e240 "xtensa: Fix non-robust split condition in define_insn_and_split" -7e5baa7e6f4caced6bdaef6d866d19e7656d8a16 "xtensa: fix -Wformat-diag warnings." -d543bac1631700f0da30d5ca555296f4938a82c6 "xtensa: Rename deprecated extv/extzv insn patterns to extvsi/extzvsi" -112447f8564c0307c5da99a4094a3a99f204239f "xtensa: Reflect the 32-bit Integer Divide Option" -b753405a5f0d45eea97f4cc7df2c2089401b08bf "xtensa: Simplify EXTUI instruction maskimm validations" -9b251fe2e39a49c0d3ecd34cf8c5d55544efd159 "xtensa: Make use of IN_RANGE macro where appropriate" -3397563ad6c8fc5d9675faf507e52dd2ed284202 "xtensa: Fix instruction counting regarding block move expansion" -6454b4a8f5d90dd355c3c7e31a592a439223b645 "xtensa: Add setmemsi insn pattern" -9aad2b22436d5346fa224e5c14439dcef36cf3dd "xtensa: Improve bswap[sd]i2 insn patterns" -e94c6dbfb57a862dd8a8685eabc4886ad1aaea25 "xtensa: fix PR target/105879" -2fcc69d8ce4eddf6dea878a5383254d366e1bb14 "xtensa: Implement bswaphi2 insn pattern" -9777d446e2148ef9a6e9f35db3f4eab99ee8812c "xtensa: Make one_cmplsi2 optimizer-friendly" -e44e7face13f38f9b228e2619786ba0add9ef77b "xtensa: Optimize '(~x & y)' to '((x & y) ^ y)'" -29dc90a580bf45f503ed89eb1dc63b5676db776b "xtensa: Add clrsbsi2 insn pattern" -9489a1ab05ad1bda7126da5513f08282da3e531d "xtensa: Tweak some widen multiplications" -fddf0e1057fe24eff0d894fbc2959b4086464a96 "xtensa: Consider the Loop Option when setmemsi is expanded to small loop" -ccd02e734e0f1742629403b46e5b1c650b00fd65 "xtensa: Improve instruction cost estimation and suggestion" -cd02f15f1aecc45b2c2feae16840503549508619 "xtensa: Improve constant synthesis for both integer and floating-point" -1c68ec1f8ab531fba56cccf549ffe592bf622821 "xtensa: Improve shift operations more" -e1b193c1cce3a975a9ed60dd0f30182fe0255d7c "xtensa: Simplify conditional branch/move insn patterns" -70ce04ca353bb0cda8321b91a77c2477e26d339b "xtensa: Make use of BALL/BNALL instructions" -077438933cf94f00cc5edf974338c11ba4bf7a39 "xtensa: Optimize bitwise AND operation with some specific forms of constants" -96518f714e3fab53a966a05b8d48011e27c1a718 "xtensa: Document new -mextra-l32r-costs= Xtensa-specific option" -43b0c56fda4bc990e8ee8d6a0b376de7b663bb06 "xtensa: Add support for sibling call optimization" -c95e307e3a978166cd5d6817ec9d8293825ff3fb "xtensa: Add some dedicated patterns that correspond to GIMPLE canonicalizations" -cfad4856fa46abc878934a9433d0bfc2482ccf00 "xtensa: Eliminate unwanted reg-reg moves during DFmode input reloads" -ce3867d414bd7d9e5b6fb2a51b1fb3d9e9e1eae9 "xtensa: Eliminate [DS]Cmode hard register clobber that is immediately followed by whole overwrite the register" -479b6f449ee999501ad6eff0b7db8d0cd5b2d28d "xtensa: Defer storing integer constants into litpool until reload" ---- - gcc/config/xtensa/constraints.md | 10 +- - gcc/config/xtensa/predicates.md | 41 +- - gcc/config/xtensa/xtensa-protos.h | 11 +- - gcc/config/xtensa/xtensa.c | 733 +++++++++--- - gcc/config/xtensa/xtensa.h | 7 +- - gcc/config/xtensa/xtensa.md | 1024 +++++++++++++---- - gcc/config/xtensa/xtensa.opt | 6 +- - gcc/doc/invoke.texi | 11 +- - gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c | 33 + - gcc/testsuite/gcc.target/xtensa/bswap-O1.c | 37 + - gcc/testsuite/gcc.target/xtensa/bswap-O2.c | 37 + - gcc/testsuite/gcc.target/xtensa/bswap-Os.c | 37 + - .../gcc.target/xtensa/check_zero_byte.c | 9 + - .../gcc.target/xtensa/constsynth_2insns.c | 44 + - .../gcc.target/xtensa/constsynth_3insns.c | 24 + - .../gcc.target/xtensa/constsynth_double.c | 11 + - .../gcc.target/xtensa/funnel_shifter.c | 17 + - .../gcc.target/xtensa/one_cmpl_abs.c | 9 + - gcc/testsuite/gcc.target/xtensa/sibcalls.c | 20 + - libgcc/config/xtensa/lib1funcs.S | 23 + - libgcc/config/xtensa/t-xtensa | 2 +- - 21 files changed, 1796 insertions(+), 350 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O1.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O2.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-Os.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/check_zero_byte.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_double.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/funnel_shifter.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/sibcalls.c - -diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md -index 2062c8816..13b3daafc 100644 ---- a/gcc/config/xtensa/constraints.md -+++ b/gcc/config/xtensa/constraints.md -@@ -92,7 +92,7 @@ - "An integer constant in the range @minus{}32-95 for use with MOVI.N - instructions." - (and (match_code "const_int") -- (match_test "ival >= -32 && ival <= 95"))) -+ (match_test "IN_RANGE (ival, -32, 95)"))) - - (define_constraint "N" - "An unsigned 8-bit integer constant shifted left by 8 bits for use -@@ -103,7 +103,7 @@ - (define_constraint "O" - "An integer constant that can be used in ADDI.N instructions." - (and (match_code "const_int") -- (match_test "ival == -1 || (ival >= 1 && ival <= 15)"))) -+ (match_test "ival == -1 || IN_RANGE (ival, 1, 15)"))) - - (define_constraint "P" - "An integer constant that can be used as a mask value in an EXTUI -@@ -113,8 +113,10 @@ - - (define_constraint "Y" - "A constant that can be used in relaxed MOVI instructions." -- (and (match_code "const_int,const_double,const,symbol_ref,label_ref") -- (match_test "TARGET_AUTO_LITPOOLS"))) -+ (ior (and (match_code "const_int,const_double,const,symbol_ref,label_ref") -+ (match_test "TARGET_AUTO_LITPOOLS")) -+ (and (match_code "const_int") -+ (match_test "can_create_pseudo_p ()")))) - - ;; Memory constraints. Do not use define_memory_constraint here. Doing so - ;; causes reload to force some constants into the constant pool, but since -diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md -index eb52b05aa..633cc6264 100644 ---- a/gcc/config/xtensa/predicates.md -+++ b/gcc/config/xtensa/predicates.md -@@ -25,8 +25,7 @@ - - (define_predicate "addsubx_operand" - (and (match_code "const_int") -- (match_test "INTVAL (op) >= 1 -- && INTVAL (op) <= 3"))) -+ (match_test "IN_RANGE (INTVAL (op), 1, 3)"))) - - (define_predicate "arith_operand" - (ior (and (match_code "const_int") -@@ -53,9 +52,19 @@ - (match_test "xtensa_mask_immediate (INTVAL (op))")) - (match_operand 0 "register_operand"))) - -+(define_predicate "shifted_mask_operand" -+ (match_code "const_int") -+{ -+ HOST_WIDE_INT mask = INTVAL (op); -+ int shift = ctz_hwi (mask); -+ -+ return IN_RANGE (shift, 1, 31) -+ && xtensa_mask_immediate ((uint32_t)mask >> shift); -+}) -+ - (define_predicate "extui_fldsz_operand" - (and (match_code "const_int") -- (match_test "xtensa_mask_immediate ((1 << INTVAL (op)) - 1)"))) -+ (match_test "IN_RANGE (INTVAL (op), 1, 16)"))) - - (define_predicate "sext_operand" - (if_then_else (match_test "TARGET_SEXT") -@@ -64,7 +73,7 @@ - - (define_predicate "sext_fldsz_operand" - (and (match_code "const_int") -- (match_test "INTVAL (op) >= 8 && INTVAL (op) <= 23"))) -+ (match_test "IN_RANGE (INTVAL (op), 8, 23)"))) - - (define_predicate "lsbitnum_operand" - (and (match_code "const_int") -@@ -138,8 +147,9 @@ - (match_test "!constantpool_mem_p (op) - || GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0"))) - (ior (and (match_code "const_int") -- (match_test "GET_MODE_CLASS (mode) == MODE_INT -- && xtensa_simm12b (INTVAL (op))")) -+ (match_test "(GET_MODE_CLASS (mode) == MODE_INT -+ && xtensa_simm12b (INTVAL (op))) -+ || can_create_pseudo_p ()")) - (and (match_code "const_int,const_double,const,symbol_ref,label_ref") - (match_test "(TARGET_CONST16 || TARGET_AUTO_LITPOOLS) - && CONSTANT_P (op) -@@ -156,6 +166,19 @@ - (and (match_code "const_int") - (match_test "xtensa_mem_offset (INTVAL (op), SFmode)"))) - -+(define_predicate "reload_operand" -+ (match_code "mem") -+{ -+ const_rtx addr = XEXP (op, 0); -+ if (REG_P (addr)) -+ return REGNO (addr) == A1_REG; -+ if (GET_CODE (addr) == PLUS) -+ return REG_P (XEXP (addr, 0)) -+ && REGNO (XEXP (addr, 0)) == A1_REG -+ && CONST_INT_P (XEXP (addr, 1)); -+ return false; -+}) -+ - (define_predicate "branch_operator" - (match_code "eq,ne,lt,ge")) - -@@ -165,9 +188,15 @@ - (define_predicate "boolean_operator" - (match_code "eq,ne")) - -+(define_predicate "logical_shift_operator" -+ (match_code "ashift,lshiftrt")) -+ - (define_predicate "xtensa_cstoresi_operator" - (match_code "eq,ne,gt,ge,lt,le")) - -+(define_predicate "xtensa_shift_per_byte_operator" -+ (match_code "ashift,ashiftrt,lshiftrt")) -+ - (define_predicate "tls_symbol_operand" - (and (match_code "symbol_ref") - (match_test "SYMBOL_REF_TLS_MODEL (op) != 0"))) -diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h -index 18d803581..75ed3bfb0 100644 ---- a/gcc/config/xtensa/xtensa-protos.h -+++ b/gcc/config/xtensa/xtensa-protos.h -@@ -41,18 +41,23 @@ extern void xtensa_expand_conditional_branch (rtx *, machine_mode); - extern int xtensa_expand_conditional_move (rtx *, int); - extern int xtensa_expand_scc (rtx *, machine_mode); - extern int xtensa_expand_block_move (rtx *); -+extern int xtensa_expand_block_set_unrolled_loop (rtx *); -+extern int xtensa_expand_block_set_small_loop (rtx *); - extern void xtensa_split_operand_pair (rtx *, machine_mode); -+extern int xtensa_constantsynth (rtx, HOST_WIDE_INT); - extern int xtensa_emit_move_sequence (rtx *, machine_mode); - extern rtx xtensa_copy_incoming_a7 (rtx); - extern void xtensa_expand_nonlocal_goto (rtx *); - extern void xtensa_expand_compare_and_swap (rtx, rtx, rtx, rtx); - extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool); - extern void xtensa_emit_loop_end (rtx_insn *, rtx *); --extern char *xtensa_emit_branch (bool, bool, rtx *); --extern char *xtensa_emit_bit_branch (bool, bool, rtx *); -+extern char *xtensa_emit_branch (bool, rtx *); - extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); -+extern void xtensa_prepare_expand_call (int, rtx *); - extern char *xtensa_emit_call (int, rtx *); -+extern char *xtensa_emit_sibcall (int, rtx *); - extern bool xtensa_tls_referenced_p (rtx); -+extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); - - #ifdef TREE_CODE - extern void init_cumulative_args (CUMULATIVE_ARGS *, int); -@@ -70,7 +75,7 @@ extern int xtensa_dbx_register_number (int); - extern long compute_frame_size (poly_int64); - extern bool xtensa_use_return_instruction_p (void); - extern void xtensa_expand_prologue (void); --extern void xtensa_expand_epilogue (void); -+extern void xtensa_expand_epilogue (bool); - extern void order_regs_for_local_alloc (void); - extern enum reg_class xtensa_regno_to_class (int regno); - extern HOST_WIDE_INT xtensa_initial_elimination_offset (int from, int to); -diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c -index 6cd9d5528..5b1aa9b23 100644 ---- a/gcc/config/xtensa/xtensa.c -+++ b/gcc/config/xtensa/xtensa.c -@@ -55,6 +55,7 @@ along with GCC; see the file COPYING3. If not see - #include "dumpfile.h" - #include "hw-doloop.h" - #include "rtl-iter.h" -+#include "insn-attr.h" - - /* This file should be included last. */ - #include "target-def.h" -@@ -117,7 +118,7 @@ const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] = - - static void xtensa_option_override (void); - static enum internal_test map_test_to_internal_test (enum rtx_code); --static rtx gen_int_relational (enum rtx_code, rtx, rtx, int *); -+static rtx gen_int_relational (enum rtx_code, rtx, rtx); - static rtx gen_float_relational (enum rtx_code, rtx, rtx); - static rtx gen_conditional_move (enum rtx_code, machine_mode, rtx, rtx); - static rtx fixup_subreg_mem (rtx); -@@ -134,6 +135,7 @@ static unsigned int xtensa_multibss_section_type_flags (tree, const char *, - static section *xtensa_select_rtx_section (machine_mode, rtx, - unsigned HOST_WIDE_INT); - static bool xtensa_rtx_costs (rtx, machine_mode, int, int, int *, bool); -+static int xtensa_insn_cost (rtx_insn *, bool); - static int xtensa_register_move_cost (machine_mode, reg_class_t, - reg_class_t); - static int xtensa_memory_move_cost (machine_mode, reg_class_t, bool); -@@ -185,6 +187,7 @@ static bool xtensa_modes_tieable_p (machine_mode, machine_mode); - static HOST_WIDE_INT xtensa_constant_alignment (const_tree, HOST_WIDE_INT); - static HOST_WIDE_INT xtensa_starting_frame_offset (void); - static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); -+static bool xtensa_function_ok_for_sibcall (tree, tree); - - - -@@ -208,6 +211,8 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); - #define TARGET_MEMORY_MOVE_COST xtensa_memory_move_cost - #undef TARGET_RTX_COSTS - #define TARGET_RTX_COSTS xtensa_rtx_costs -+#undef TARGET_INSN_COST -+#define TARGET_INSN_COST xtensa_insn_cost - #undef TARGET_ADDRESS_COST - #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 - -@@ -333,6 +338,9 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); - #undef TARGET_HAVE_SPECULATION_SAFE_VALUE - #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed - -+#undef TARGET_FUNCTION_OK_FOR_SIBCALL -+#define TARGET_FUNCTION_OK_FOR_SIBCALL xtensa_function_ok_for_sibcall -+ - struct gcc_target targetm = TARGET_INITIALIZER; - - -@@ -341,42 +349,42 @@ struct gcc_target targetm = TARGET_INITIALIZER; - bool - xtensa_simm8 (HOST_WIDE_INT v) - { -- return v >= -128 && v <= 127; -+ return IN_RANGE (v, -128, 127); - } - - - bool - xtensa_simm8x256 (HOST_WIDE_INT v) - { -- return (v & 255) == 0 && (v >= -32768 && v <= 32512); -+ return (v & 255) == 0 && IN_RANGE (v, -32768, 32512); - } - - - bool - xtensa_simm12b (HOST_WIDE_INT v) - { -- return v >= -2048 && v <= 2047; -+ return IN_RANGE (v, -2048, 2047); - } - - - static bool - xtensa_uimm8 (HOST_WIDE_INT v) - { -- return v >= 0 && v <= 255; -+ return IN_RANGE (v, 0, 255); - } - - - static bool - xtensa_uimm8x2 (HOST_WIDE_INT v) - { -- return (v & 1) == 0 && (v >= 0 && v <= 510); -+ return (v & 1) == 0 && IN_RANGE (v, 0, 510); - } - - - static bool - xtensa_uimm8x4 (HOST_WIDE_INT v) - { -- return (v & 3) == 0 && (v >= 0 && v <= 1020); -+ return (v & 3) == 0 && IN_RANGE (v, 0, 1020); - } - - -@@ -446,19 +454,7 @@ xtensa_b4constu (HOST_WIDE_INT v) - bool - xtensa_mask_immediate (HOST_WIDE_INT v) - { --#define MAX_MASK_SIZE 16 -- int mask_size; -- -- for (mask_size = 1; mask_size <= MAX_MASK_SIZE; mask_size++) -- { -- if ((v & 1) == 0) -- return false; -- v = v >> 1; -- if (v == 0) -- return true; -- } -- -- return false; -+ return IN_RANGE (exact_log2 (v + 1), 1, 16); - } - - -@@ -539,7 +535,7 @@ smalloffset_mem_p (rtx op) - return FALSE; - - val = INTVAL (offset); -- return (val & 3) == 0 && (val >= 0 && val <= 60); -+ return (val & 3) == 0 && IN_RANGE (val, 0, 60); - } - } - return FALSE; -@@ -678,8 +674,7 @@ map_test_to_internal_test (enum rtx_code test_code) - static rtx - gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - rtx cmp0, /* first operand to compare */ -- rtx cmp1, /* second operand to compare */ -- int *p_invert /* whether branch needs to reverse test */) -+ rtx cmp1 /* second operand to compare */) - { - struct cmp_info - { -@@ -711,6 +706,7 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - enum internal_test test; - machine_mode mode; - struct cmp_info *p_info; -+ int invert; - - test = map_test_to_internal_test (test_code); - gcc_assert (test != ITEST_MAX); -@@ -747,9 +743,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - } - - /* See if we need to invert the result. */ -- *p_invert = ((GET_CODE (cmp1) == CONST_INT) -- ? p_info->invert_const -- : p_info->invert_reg); -+ invert = ((GET_CODE (cmp1) == CONST_INT) -+ ? p_info->invert_const -+ : p_info->invert_reg); - - /* Comparison to constants, may involve adding 1 to change a LT into LE. - Comparison between two registers, may involve switching operands. */ -@@ -766,7 +762,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - cmp1 = temp; - } - -- return gen_rtx_fmt_ee (p_info->test_code, VOIDmode, cmp0, cmp1); -+ return gen_rtx_fmt_ee (invert ? reverse_condition (p_info->test_code) -+ : p_info->test_code, -+ VOIDmode, cmp0, cmp1); - } - - -@@ -825,45 +823,33 @@ xtensa_expand_conditional_branch (rtx *operands, machine_mode mode) - enum rtx_code test_code = GET_CODE (operands[0]); - rtx cmp0 = operands[1]; - rtx cmp1 = operands[2]; -- rtx cmp; -- int invert; -- rtx label1, label2; -+ rtx cmp, label; - - switch (mode) - { -+ case E_SFmode: -+ if (TARGET_HARD_FLOAT) -+ { -+ cmp = gen_float_relational (test_code, cmp0, cmp1); -+ break; -+ } -+ /* FALLTHRU */ -+ - case E_DFmode: - default: - fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1)); - - case E_SImode: -- invert = FALSE; -- cmp = gen_int_relational (test_code, cmp0, cmp1, &invert); -- break; -- -- case E_SFmode: -- if (!TARGET_HARD_FLOAT) -- fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, -- cmp0, cmp1)); -- invert = FALSE; -- cmp = gen_float_relational (test_code, cmp0, cmp1); -+ cmp = gen_int_relational (test_code, cmp0, cmp1); - break; - } - - /* Generate the branch. */ -- -- label1 = gen_rtx_LABEL_REF (VOIDmode, operands[3]); -- label2 = pc_rtx; -- -- if (invert) -- { -- label2 = label1; -- label1 = pc_rtx; -- } -- -+ label = gen_rtx_LABEL_REF (VOIDmode, operands[3]); - emit_jump_insn (gen_rtx_SET (pc_rtx, - gen_rtx_IF_THEN_ELSE (VOIDmode, cmp, -- label1, -- label2))); -+ label, -+ pc_rtx))); - } - - -@@ -1035,6 +1021,123 @@ xtensa_split_operand_pair (rtx operands[4], machine_mode mode) - } - - -+/* Try to emit insns to load srcval (that cannot fit into signed 12-bit) -+ into dst with synthesizing a such constant value from a sequence of -+ load-immediate / arithmetic ones, instead of a L32R instruction -+ (plus a constant in litpool). */ -+ -+static void -+xtensa_emit_constantsynth (rtx dst, enum rtx_code code, -+ HOST_WIDE_INT imm0, HOST_WIDE_INT imm1, -+ rtx (*gen_op)(rtx, HOST_WIDE_INT), -+ HOST_WIDE_INT imm2) -+{ -+ gcc_assert (REG_P (dst)); -+ emit_move_insn (dst, GEN_INT (imm0)); -+ emit_move_insn (dst, gen_rtx_fmt_ee (code, SImode, -+ dst, GEN_INT (imm1))); -+ if (gen_op) -+ emit_move_insn (dst, gen_op (dst, imm2)); -+} -+ -+static int -+xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, -+ rtx (*gen_op)(rtx, HOST_WIDE_INT), -+ HOST_WIDE_INT op_imm) -+{ -+ int shift = exact_log2 (srcval + 1); -+ -+ if (IN_RANGE (shift, 1, 31)) -+ { -+ xtensa_emit_constantsynth (dst, LSHIFTRT, -1, 32 - shift, -+ gen_op, op_imm); -+ return 1; -+ } -+ -+ if (IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) -+ { -+ HOST_WIDE_INT imm0, imm1; -+ -+ if (srcval < -32768) -+ imm1 = -32768; -+ else if (srcval > 32512) -+ imm1 = 32512; -+ else -+ imm1 = srcval & ~255; -+ imm0 = srcval - imm1; -+ if (TARGET_DENSITY && imm1 < 32512 && IN_RANGE (imm0, 224, 255)) -+ imm0 -= 256, imm1 += 256; -+ xtensa_emit_constantsynth (dst, PLUS, imm0, imm1, gen_op, op_imm); -+ return 1; -+ } -+ -+ shift = ctz_hwi (srcval); -+ if (xtensa_simm12b (srcval >> shift)) -+ { -+ xtensa_emit_constantsynth (dst, ASHIFT, srcval >> shift, shift, -+ gen_op, op_imm); -+ return 1; -+ } -+ -+ return 0; -+} -+ -+static rtx -+xtensa_constantsynth_rtx_SLLI (rtx reg, HOST_WIDE_INT imm) -+{ -+ return gen_rtx_ASHIFT (SImode, reg, GEN_INT (imm)); -+} -+ -+static rtx -+xtensa_constantsynth_rtx_ADDSUBX (rtx reg, HOST_WIDE_INT imm) -+{ -+ return imm == 7 -+ ? gen_rtx_MINUS (SImode, gen_rtx_ASHIFT (SImode, reg, GEN_INT (3)), -+ reg) -+ : gen_rtx_PLUS (SImode, gen_rtx_ASHIFT (SImode, reg, -+ GEN_INT (floor_log2 (imm - 1))), -+ reg); -+} -+ -+int -+xtensa_constantsynth (rtx dst, HOST_WIDE_INT srcval) -+{ -+ /* No need for synthesizing for what fits into MOVI instruction. */ -+ if (xtensa_simm12b (srcval)) -+ return 0; -+ -+ /* 2-insns substitution. */ -+ if ((optimize_size || (optimize && xtensa_extra_l32r_costs >= 1)) -+ && xtensa_constantsynth_2insn (dst, srcval, NULL, 0)) -+ return 1; -+ -+ /* 3-insns substitution. */ -+ if (optimize > 1 && !optimize_size && xtensa_extra_l32r_costs >= 2) -+ { -+ int shift, divisor; -+ -+ /* 2-insns substitution followed by SLLI. */ -+ shift = ctz_hwi (srcval); -+ if (IN_RANGE (shift, 1, 31) && -+ xtensa_constantsynth_2insn (dst, srcval >> shift, -+ xtensa_constantsynth_rtx_SLLI, -+ shift)) -+ return 1; -+ -+ /* 2-insns substitution followed by ADDX[248] or SUBX8. */ -+ if (TARGET_ADDX) -+ for (divisor = 3; divisor <= 9; divisor += 2) -+ if (srcval % divisor == 0 && -+ xtensa_constantsynth_2insn (dst, srcval / divisor, -+ xtensa_constantsynth_rtx_ADDSUBX, -+ divisor)) -+ return 1; -+ } -+ -+ return 0; -+} -+ -+ - /* Emit insns to move operands[1] into operands[0]. - Return 1 if we have written out everything that needs to be done to - do the move. Otherwise, return 0 and the caller will emit the move -@@ -1070,24 +1173,9 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) - return 1; - } - -- if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16) -+ if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16 -+ && ! (CONST_INT_P (src) && can_create_pseudo_p ())) - { -- /* Try to emit MOVI + SLLI sequence, that is smaller -- than L32R + literal. */ -- if (optimize >= 1 && ! optimize_debug && mode == SImode -- && CONST_INT_P (src) && register_operand (dst, mode)) -- { -- HOST_WIDE_INT srcval = INTVAL (src); -- int shift = ctz_hwi (srcval); -- -- if (xtensa_simm12b (srcval >> shift)) -- { -- emit_move_insn (dst, GEN_INT (srcval >> shift)); -- emit_insn (gen_ashlsi3_internal (dst, dst, GEN_INT (shift))); -- return 1; -- } -- } -- - src = force_const_mem (SImode, src); - operands[1] = src; - } -@@ -1315,7 +1403,7 @@ xtensa_expand_block_move (rtx *operands) - move_ratio = 4; - if (optimize > 2) - move_ratio = LARGEST_MOVE_RATIO; -- num_pieces = (bytes / align) + (bytes % align); /* Close enough anyway. */ -+ num_pieces = (bytes / align) + ((bytes % align + 1) / 2); - if (num_pieces > move_ratio) - return 0; - -@@ -1352,7 +1440,7 @@ xtensa_expand_block_move (rtx *operands) - temp[next] = gen_reg_rtx (mode[next]); - - x = adjust_address (src_mem, mode[next], offset_ld); -- emit_insn (gen_rtx_SET (temp[next], x)); -+ emit_move_insn (temp[next], x); - - offset_ld += next_amount; - bytes -= next_amount; -@@ -1362,9 +1450,9 @@ xtensa_expand_block_move (rtx *operands) - if (active[phase]) - { - active[phase] = false; -- -+ - x = adjust_address (dst_mem, mode[phase], offset_st); -- emit_insn (gen_rtx_SET (x, temp[phase])); -+ emit_move_insn (x, temp[phase]); - - offset_st += amount[phase]; - } -@@ -1375,6 +1463,246 @@ xtensa_expand_block_move (rtx *operands) - } - - -+/* Try to expand a block set operation to a sequence of RTL move -+ instructions. If not optimizing, or if the block size is not a -+ constant, or if the block is too large, or if the value to -+ initialize the block with is not a constant, the expansion -+ fails and GCC falls back to calling memset(). -+ -+ operands[0] is the destination -+ operands[1] is the length -+ operands[2] is the initialization value -+ operands[3] is the alignment */ -+ -+static int -+xtensa_sizeof_MOVI (HOST_WIDE_INT imm) -+{ -+ return (TARGET_DENSITY && IN_RANGE (imm, -32, 95)) ? 2 : 3; -+} -+ -+int -+xtensa_expand_block_set_unrolled_loop (rtx *operands) -+{ -+ rtx dst_mem = operands[0]; -+ HOST_WIDE_INT bytes, value, align; -+ int expand_len, funccall_len; -+ rtx x, reg; -+ int offset; -+ -+ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) -+ return 0; -+ -+ bytes = INTVAL (operands[1]); -+ if (bytes <= 0) -+ return 0; -+ value = (int8_t)INTVAL (operands[2]); -+ align = INTVAL (operands[3]); -+ if (align > MOVE_MAX) -+ align = MOVE_MAX; -+ -+ /* Insn expansion: holding the init value. -+ Either MOV(.N) or L32R w/litpool. */ -+ if (align == 1) -+ expand_len = xtensa_sizeof_MOVI (value); -+ else if (value == 0 || value == -1) -+ expand_len = TARGET_DENSITY ? 2 : 3; -+ else -+ expand_len = 3 + 4; -+ /* Insn expansion: a series of aligned memory stores. -+ Consist of S8I, S16I or S32I(.N). */ -+ expand_len += (bytes / align) * (TARGET_DENSITY -+ && align == 4 ? 2 : 3); -+ /* Insn expansion: the remainder, sub-aligned memory stores. -+ A combination of S8I and S16I as needed. */ -+ expand_len += ((bytes % align + 1) / 2) * 3; -+ -+ /* Function call: preparing two arguments. */ -+ funccall_len = xtensa_sizeof_MOVI (value); -+ funccall_len += xtensa_sizeof_MOVI (bytes); -+ /* Function call: calling memset(). */ -+ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; -+ -+ /* Apply expansion bonus (2x) if optimizing for speed. */ -+ if (optimize > 1 && !optimize_size) -+ funccall_len *= 2; -+ -+ /* Decide whether to expand or not, based on the sum of the length -+ of instructions. */ -+ if (expand_len > funccall_len) -+ return 0; -+ -+ x = XEXP (dst_mem, 0); -+ if (!REG_P (x)) -+ dst_mem = replace_equiv_address (dst_mem, force_reg (Pmode, x)); -+ switch (align) -+ { -+ case 1: -+ break; -+ case 2: -+ value = (int16_t)((uint8_t)value * 0x0101U); -+ break; -+ case 4: -+ value = (int32_t)((uint8_t)value * 0x01010101U); -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ reg = force_reg (SImode, GEN_INT (value)); -+ -+ offset = 0; -+ do -+ { -+ int unit_size = MIN (bytes, align); -+ machine_mode unit_mode = (unit_size >= 4 ? SImode : -+ (unit_size >= 2 ? HImode : -+ QImode)); -+ unit_size = GET_MODE_SIZE (unit_mode); -+ -+ emit_move_insn (adjust_address (dst_mem, unit_mode, offset), -+ unit_mode == SImode ? reg -+ : convert_to_mode (unit_mode, reg, true)); -+ -+ offset += unit_size; -+ bytes -= unit_size; -+ } -+ while (bytes > 0); -+ -+ return 1; -+} -+ -+int -+xtensa_expand_block_set_small_loop (rtx *operands) -+{ -+ HOST_WIDE_INT bytes, value, align, count; -+ int expand_len, funccall_len; -+ rtx x, dst, end, reg; -+ machine_mode unit_mode; -+ rtx_code_label *label; -+ -+ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) -+ return 0; -+ -+ bytes = INTVAL (operands[1]); -+ if (bytes <= 0) -+ return 0; -+ value = (int8_t)INTVAL (operands[2]); -+ align = INTVAL (operands[3]); -+ if (align > MOVE_MAX) -+ align = MOVE_MAX; -+ -+ /* Totally-aligned block only. */ -+ if (bytes % align != 0) -+ return 0; -+ count = bytes / align; -+ -+ /* If the Loop Option (zero-overhead looping) is configured and active, -+ almost no restrictions about the length of the block. */ -+ if (! (TARGET_LOOPS && optimize)) -+ { -+ /* If 4-byte aligned, small loop substitution is almost optimal, -+ thus limited to only offset to the end address for ADDI/ADDMI -+ instruction. */ -+ if (align == 4 -+ && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) -+ return 0; -+ -+ /* If no 4-byte aligned, loop count should be treated as the -+ constraint. */ -+ if (align != 4 -+ && count > ((optimize > 1 && !optimize_size) ? 8 : 15)) -+ return 0; -+ } -+ -+ /* Insn expansion: holding the init value. -+ Either MOV(.N) or L32R w/litpool. */ -+ if (align == 1) -+ expand_len = xtensa_sizeof_MOVI (value); -+ else if (value == 0 || value == -1) -+ expand_len = TARGET_DENSITY ? 2 : 3; -+ else -+ expand_len = 3 + 4; -+ if (TARGET_LOOPS && optimize) /* zero-overhead looping */ -+ { -+ /* Insn translation: Either MOV(.N) or L32R w/litpool for the -+ loop count. */ -+ expand_len += xtensa_simm12b (count) ? xtensa_sizeof_MOVI (count) -+ : 3 + 4; -+ /* Insn translation: LOOP, the zero-overhead looping setup -+ instruction. */ -+ expand_len += 3; -+ /* Insn expansion: the loop body instructions. -+ For store, one of S8I, S16I or S32I(.N). -+ For advance, ADDI(.N). */ -+ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) -+ + (TARGET_DENSITY ? 2 : 3); -+ } -+ else /* NO zero-overhead looping */ -+ { -+ /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ -+ expand_len += bytes > 127 ? 3 -+ : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; -+ /* Insn expansion: the loop body and branch instruction. -+ For store, one of S8I, S16I or S32I(.N). -+ For advance, ADDI(.N). -+ For branch, BNE. */ -+ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) -+ + (TARGET_DENSITY ? 2 : 3) + 3; -+ } -+ -+ /* Function call: preparing two arguments. */ -+ funccall_len = xtensa_sizeof_MOVI (value); -+ funccall_len += xtensa_sizeof_MOVI (bytes); -+ /* Function call: calling memset(). */ -+ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; -+ -+ /* Apply expansion bonus (2x) if optimizing for speed. */ -+ if (optimize > 1 && !optimize_size) -+ funccall_len *= 2; -+ -+ /* Decide whether to expand or not, based on the sum of the length -+ of instructions. */ -+ if (expand_len > funccall_len) -+ return 0; -+ -+ x = XEXP (operands[0], 0); -+ if (!REG_P (x)) -+ x = XEXP (replace_equiv_address (operands[0], force_reg (Pmode, x)), 0); -+ dst = gen_reg_rtx (SImode); -+ emit_move_insn (dst, x); -+ end = gen_reg_rtx (SImode); -+ if (TARGET_LOOPS && optimize) -+ x = force_reg (SImode, operands[1] /* the length */); -+ else -+ x = operands[1]; -+ emit_insn (gen_addsi3 (end, dst, x)); -+ switch (align) -+ { -+ case 1: -+ unit_mode = QImode; -+ break; -+ case 2: -+ value = (int16_t)((uint8_t)value * 0x0101U); -+ unit_mode = HImode; -+ break; -+ case 4: -+ value = (int32_t)((uint8_t)value * 0x01010101U); -+ unit_mode = SImode; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ reg = force_reg (unit_mode, GEN_INT (value)); -+ -+ label = gen_label_rtx (); -+ emit_label (label); -+ emit_move_insn (gen_rtx_MEM (unit_mode, dst), reg); -+ emit_insn (gen_addsi3 (dst, dst, GEN_INT (align))); -+ emit_cmp_and_jump_insns (dst, end, NE, const0_rtx, SImode, true, label); -+ -+ return 1; -+} -+ -+ - void - xtensa_expand_nonlocal_goto (rtx *operands) - { -@@ -1725,21 +2053,20 @@ xtensa_emit_loop_end (rtx_insn *insn, rtx *operands) - - - char * --xtensa_emit_branch (bool inverted, bool immed, rtx *operands) -+xtensa_emit_branch (bool immed, rtx *operands) - { - static char result[64]; -- enum rtx_code code; -+ enum rtx_code code = GET_CODE (operands[3]); - const char *op; - -- code = GET_CODE (operands[3]); - switch (code) - { -- case EQ: op = inverted ? "ne" : "eq"; break; -- case NE: op = inverted ? "eq" : "ne"; break; -- case LT: op = inverted ? "ge" : "lt"; break; -- case GE: op = inverted ? "lt" : "ge"; break; -- case LTU: op = inverted ? "geu" : "ltu"; break; -- case GEU: op = inverted ? "ltu" : "geu"; break; -+ case EQ: op = "eq"; break; -+ case NE: op = "ne"; break; -+ case LT: op = "lt"; break; -+ case GE: op = "ge"; break; -+ case LTU: op = "ltu"; break; -+ case GEU: op = "geu"; break; - default: gcc_unreachable (); - } - -@@ -1758,32 +2085,6 @@ xtensa_emit_branch (bool inverted, bool immed, rtx *operands) - } - - --char * --xtensa_emit_bit_branch (bool inverted, bool immed, rtx *operands) --{ -- static char result[64]; -- const char *op; -- -- switch (GET_CODE (operands[3])) -- { -- case EQ: op = inverted ? "bs" : "bc"; break; -- case NE: op = inverted ? "bc" : "bs"; break; -- default: gcc_unreachable (); -- } -- -- if (immed) -- { -- unsigned bitnum = INTVAL (operands[1]) & 0x1f; -- operands[1] = GEN_INT (bitnum); -- sprintf (result, "b%si\t%%0, %%d1, %%2", op); -- } -- else -- sprintf (result, "b%s\t%%0, %%1, %%2", op); -- -- return result; --} -- -- - char * - xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - { -@@ -1792,12 +2093,14 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - const char *op; - - code = GET_CODE (operands[4]); -+ if (inverted) -+ code = reverse_condition (code); - if (isbool) - { - switch (code) - { -- case EQ: op = inverted ? "t" : "f"; break; -- case NE: op = inverted ? "f" : "t"; break; -+ case EQ: op = "f"; break; -+ case NE: op = "t"; break; - default: gcc_unreachable (); - } - } -@@ -1805,10 +2108,10 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - { - switch (code) - { -- case EQ: op = inverted ? "nez" : "eqz"; break; -- case NE: op = inverted ? "eqz" : "nez"; break; -- case LT: op = inverted ? "gez" : "ltz"; break; -- case GE: op = inverted ? "ltz" : "gez"; break; -+ case EQ: op = "eqz"; break; -+ case NE: op = "nez"; break; -+ case LT: op = "ltz"; break; -+ case GE: op = "gez"; break; - default: gcc_unreachable (); - } - } -@@ -1819,6 +2122,20 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - } - - -+void -+xtensa_prepare_expand_call (int callop, rtx *operands) -+{ -+ rtx addr = XEXP (operands[callop], 0); -+ -+ if (flag_pic && SYMBOL_REF_P (addr) -+ && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) -+ addr = gen_sym_PLT (addr); -+ -+ if (!call_insn_operand (addr, VOIDmode)) -+ XEXP (operands[callop], 0) = copy_to_mode_reg (Pmode, addr); -+} -+ -+ - char * - xtensa_emit_call (int callop, rtx *operands) - { -@@ -1837,6 +2154,24 @@ xtensa_emit_call (int callop, rtx *operands) - } - - -+char * -+xtensa_emit_sibcall (int callop, rtx *operands) -+{ -+ static char result[64]; -+ rtx tgt = operands[callop]; -+ -+ if (GET_CODE (tgt) == CONST_INT) -+ sprintf (result, "j.l\t" HOST_WIDE_INT_PRINT_HEX ", a9", -+ INTVAL (tgt)); -+ else if (register_operand (tgt, VOIDmode)) -+ sprintf (result, "jx\t%%%d", callop); -+ else -+ sprintf (result, "j.l\t%%%d, a9", callop); -+ -+ return result; -+} -+ -+ - bool - xtensa_legitimate_address_p (machine_mode mode, rtx addr, bool strict) - { -@@ -2061,6 +2396,20 @@ xtensa_tls_referenced_p (rtx x) - } - - -+/* Helper function for "*shlrd_..." patterns. */ -+ -+enum rtx_code -+xtensa_shlrd_which_direction (rtx op0, rtx op1) -+{ -+ if (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT) -+ return ASHIFT; /* shld */ -+ if (GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT) -+ return LSHIFTRT; /* shrd */ -+ -+ return UNKNOWN; -+} -+ -+ - /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ - - static bool -@@ -2364,7 +2713,7 @@ static void - printx (FILE *file, signed int val) - { - /* Print a hexadecimal value in a nice way. */ -- if ((val > -0xa) && (val < 0xa)) -+ if (IN_RANGE (val, -9, 9)) - fprintf (file, "%d", val); - else if (val < 0) - fprintf (file, "-0x%x", -val); -@@ -2379,7 +2728,7 @@ void - print_operand (FILE *file, rtx x, int letter) - { - if (!x) -- error ("PRINT_OPERAND null pointer"); -+ error ("% null pointer"); - - switch (letter) - { -@@ -2424,17 +2773,11 @@ print_operand (FILE *file, rtx x, int letter) - case 'K': - if (GET_CODE (x) == CONST_INT) - { -- int num_bits = 0; - unsigned val = INTVAL (x); -- while (val & 1) -- { -- num_bits += 1; -- val = val >> 1; -- } -- if ((val != 0) || (num_bits == 0) || (num_bits > 16)) -+ if (!xtensa_mask_immediate (val)) - fatal_insn ("invalid mask", x); - -- fprintf (file, "%d", num_bits); -+ fprintf (file, "%d", floor_log2 (val + 1)); - } - else - output_operand_lossage ("invalid %%K value"); -@@ -2584,7 +2927,7 @@ void - print_operand_address (FILE *file, rtx addr) - { - if (!addr) -- error ("PRINT_OPERAND_ADDRESS, null pointer"); -+ error ("%, null pointer"); - - switch (GET_CODE (addr)) - { -@@ -2750,7 +3093,7 @@ xtensa_call_save_reg(int regno) - return crtl->profile || !crtl->is_leaf || crtl->calls_eh_return || - df_regs_ever_live_p (regno); - -- if (crtl->calls_eh_return && regno >= 2 && regno < 4) -+ if (crtl->calls_eh_return && IN_RANGE (regno, 2, 3)) - return true; - - return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno); -@@ -2870,7 +3213,7 @@ xtensa_expand_prologue (void) - int callee_save_size = cfun->machine->callee_save_size; - - /* -128 is a limit of single addi instruction. */ -- if (total_size > 0 && total_size <= 128) -+ if (IN_RANGE (total_size, 1, 128)) - { - insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, - GEN_INT (-total_size))); -@@ -2999,7 +3342,7 @@ xtensa_expand_prologue (void) - } - - void --xtensa_expand_epilogue (void) -+xtensa_expand_epilogue (bool sibcall_p) - { - if (!TARGET_WINDOWED_ABI) - { -@@ -3033,10 +3376,13 @@ xtensa_expand_epilogue (void) - if (xtensa_call_save_reg(regno)) - { - rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); -+ rtx reg; - - offset -= UNITS_PER_WORD; -- emit_move_insn (gen_rtx_REG (SImode, regno), -+ emit_move_insn (reg = gen_rtx_REG (SImode, regno), - gen_frame_mem (SImode, x)); -+ if (regno == A0_REG && sibcall_p) -+ emit_use (reg); - } - } - -@@ -3071,7 +3417,8 @@ xtensa_expand_epilogue (void) - EH_RETURN_STACKADJ_RTX)); - } - cfun->machine->epilogue_done = true; -- emit_jump_insn (gen_return ()); -+ if (!sibcall_p) -+ emit_jump_insn (gen_return ()); - } - - bool -@@ -3697,7 +4044,7 @@ xtensa_multibss_section_type_flags (tree decl, const char *name, int reloc) - flags |= SECTION_BSS; /* @nobits */ - else - warning (0, "only uninitialized variables can be placed in a " -- ".bss section"); -+ "%<.bss%> section"); - } - - return flags; -@@ -3750,7 +4097,7 @@ xtensa_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, - static bool - xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - int opno ATTRIBUTE_UNUSED, -- int *total, bool speed ATTRIBUTE_UNUSED) -+ int *total, bool speed) - { - int code = GET_CODE (x); - -@@ -3838,9 +4185,14 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - return true; - - case CLZ: -+ case CLRSB: - *total = COSTS_N_INSNS (TARGET_NSA ? 1 : 50); - return true; - -+ case BSWAP: -+ *total = COSTS_N_INSNS (mode == HImode ? 3 : 5); -+ return true; -+ - case NOT: - *total = COSTS_N_INSNS (mode == DImode ? 3 : 2); - return true; -@@ -3864,13 +4216,16 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - return true; - - case ABS: -+ case NEG: - { - if (mode == SFmode) - *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50); - else if (mode == DFmode) - *total = COSTS_N_INSNS (50); -- else -+ else if (mode == DImode) - *total = COSTS_N_INSNS (4); -+ else -+ *total = COSTS_N_INSNS (1); - return true; - } - -@@ -3886,10 +4241,6 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - return true; - } - -- case NEG: -- *total = COSTS_N_INSNS (mode == DImode ? 4 : 2); -- return true; -- - case MULT: - { - if (mode == SFmode) -@@ -3929,11 +4280,11 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - case UMOD: - { - if (mode == DImode) -- *total = COSTS_N_INSNS (50); -+ *total = COSTS_N_INSNS (speed ? 100 : 50); - else if (TARGET_DIV32) - *total = COSTS_N_INSNS (32); - else -- *total = COSTS_N_INSNS (50); -+ *total = COSTS_N_INSNS (speed ? 100 : 50); - return true; - } - -@@ -3966,6 +4317,98 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - } - } - -+static bool -+xtensa_is_insn_L32R_p(const rtx_insn *insn) -+{ -+ rtx x = PATTERN (insn); -+ -+ if (GET_CODE (x) == SET) -+ { -+ x = XEXP (x, 1); -+ if (GET_CODE (x) == MEM) -+ { -+ x = XEXP (x, 0); -+ return (GET_CODE (x) == SYMBOL_REF || CONST_INT_P (x)) -+ && CONSTANT_POOL_ADDRESS_P (x); -+ } -+ } -+ -+ return false; -+} -+ -+/* Compute a relative costs of RTL insns. This is necessary in order to -+ achieve better RTL insn splitting/combination result. */ -+ -+static int -+xtensa_insn_cost (rtx_insn *insn, bool speed) -+{ -+ if (!(recog_memoized (insn) < 0)) -+ { -+ int len = get_attr_length (insn), n = (len + 2) / 3; -+ -+ if (len == 0) -+ return COSTS_N_INSNS (0); -+ -+ if (speed) /* For speed cost. */ -+ { -+ /* "L32R" may be particular slow (implementation-dependent). */ -+ if (xtensa_is_insn_L32R_p (insn)) -+ return COSTS_N_INSNS (1 + xtensa_extra_l32r_costs); -+ -+ /* Cost based on the pipeline model. */ -+ switch (get_attr_type (insn)) -+ { -+ case TYPE_STORE: -+ case TYPE_MOVE: -+ case TYPE_ARITH: -+ case TYPE_MULTI: -+ case TYPE_NOP: -+ case TYPE_FSTORE: -+ return COSTS_N_INSNS (n); -+ -+ case TYPE_LOAD: -+ return COSTS_N_INSNS (n - 1 + 2); -+ -+ case TYPE_JUMP: -+ case TYPE_CALL: -+ return COSTS_N_INSNS (n - 1 + 3); -+ -+ case TYPE_FCONV: -+ case TYPE_FLOAD: -+ case TYPE_MUL16: -+ case TYPE_MUL32: -+ case TYPE_RSR: -+ return COSTS_N_INSNS (n * 2); -+ -+ case TYPE_FMADD: -+ return COSTS_N_INSNS (n * 4); -+ -+ case TYPE_DIV32: -+ return COSTS_N_INSNS (n * 16); -+ -+ default: -+ break; -+ } -+ } -+ else /* For size cost. */ -+ { -+ /* Cost based on the instruction length. */ -+ if (get_attr_type (insn) != TYPE_UNKNOWN) -+ { -+ /* "L32R" itself plus constant in litpool. */ -+ if (xtensa_is_insn_L32R_p (insn)) -+ return COSTS_N_INSNS (2) + 1; -+ -+ /* Consider ".n" short instructions. */ -+ return COSTS_N_INSNS (n) - (n * 3 - len); -+ } -+ } -+ } -+ -+ /* Fall back. */ -+ return pattern_cost (PATTERN (insn), speed); -+} -+ - /* Worker function for TARGET_RETURN_IN_MEMORY. */ - - static bool -@@ -4491,4 +4934,16 @@ xtensa_asan_shadow_offset (void) - return HOST_WIDE_INT_UC (0x10000000); - } - -+/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */ -+static bool -+xtensa_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, tree exp ATTRIBUTE_UNUSED) -+{ -+ /* Do not allow sibcalls if the Windowed Register Option is -+ configured. */ -+ if (TARGET_WINDOWED_ABI) -+ return false; -+ -+ return true; -+} -+ - #include "gt-xtensa.h" -diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h -index fa86a245e..3e9cbc943 100644 ---- a/gcc/config/xtensa/xtensa.h -+++ b/gcc/config/xtensa/xtensa.h -@@ -74,6 +74,11 @@ along with GCC; see the file COPYING3. If not see - #define HAVE_AS_TLS 0 - #endif - -+/* Define this if the target has no hardware divide instructions. */ -+#if !TARGET_DIV32 -+#define TARGET_HAS_NO_HW_DIVIDE -+#endif -+ - - /* Target CPU builtins. */ - #define TARGET_CPU_CPP_BUILTINS() \ -@@ -488,7 +493,7 @@ enum reg_class - used for this purpose since all function arguments are pushed on - the stack. */ - #define FUNCTION_ARG_REGNO_P(N) \ -- ((N) >= GP_OUTGOING_ARG_FIRST && (N) <= GP_OUTGOING_ARG_LAST) -+ IN_RANGE ((N), GP_OUTGOING_ARG_FIRST, GP_OUTGOING_ARG_LAST) - - /* Record the number of argument words seen so far, along with a flag to - indicate whether these are incoming arguments. (FUNCTION_INCOMING_ARG -diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md -index 2a8e59ee9..124548dfe 100644 ---- a/gcc/config/xtensa/xtensa.md -+++ b/gcc/config/xtensa/xtensa.md -@@ -25,6 +25,7 @@ - (A7_REG 7) - (A8_REG 8) - (A9_REG 9) -+ (A10_REG 10) - - (UNSPEC_NOP 2) - (UNSPEC_PLT 3) -@@ -83,6 +84,13 @@ - ;; the same template. - (define_mode_iterator HQI [HI QI]) - -+;; This code iterator is for *shlrd and its variants. -+(define_code_iterator ior_op [ior plus]) -+ -+;; This mode iterator allows the DC and SC patterns to be defined from -+;; the same template. -+(define_mode_iterator DSC [DC SC]) -+ - - ;; Attributes. - -@@ -98,7 +106,10 @@ - - ;; Describe a user's asm statement. - (define_asm_attributes -- [(set_attr "type" "multi")]) -+ [(set_attr "type" "multi") -+ (set_attr "mode" "none") -+ (set_attr "length" "3")]) ;; Should be the maximum possible length -+ ;; of a single machine instruction. - - - ;; Pipeline model. -@@ -224,20 +235,42 @@ - - ;; Multiplication. - --(define_expand "mulsidi3" -+(define_expand "mulsidi3" - [(set (match_operand:DI 0 "register_operand") -- (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand")) -- (any_extend:DI (match_operand:SI 2 "register_operand"))))] -+ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand")) -+ (sign_extend:DI (match_operand:SI 2 "register_operand"))))] - "TARGET_MUL32_HIGH" - { - rtx temp = gen_reg_rtx (SImode); - emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); -- emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), -- operands[1], operands[2])); -+ emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), -+ operands[1], operands[2])); - emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); - DONE; - }) - -+(define_expand "umulsidi3" -+ [(set (match_operand:DI 0 "register_operand") -+ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand")) -+ (zero_extend:DI (match_operand:SI 2 "register_operand"))))] -+ "" -+{ -+ if (TARGET_MUL32_HIGH) -+ { -+ rtx temp = gen_reg_rtx (SImode); -+ emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); -+ emit_insn (gen_umulsi3_highpart (gen_highpart (SImode, operands[0]), -+ operands[1], operands[2])); -+ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); -+ } -+ else -+ emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "__umulsidi3"), -+ operands[0], LCT_NORMAL, DImode, -+ operands[1], SImode, -+ operands[2], SImode); -+ DONE; -+}) -+ - (define_insn "mulsi3_highpart" - [(set (match_operand:SI 0 "register_operand" "=a") - (truncate:SI -@@ -261,30 +294,16 @@ - (set_attr "mode" "SI") - (set_attr "length" "3")]) - --(define_insn "mulhisi3" -- [(set (match_operand:SI 0 "register_operand" "=C,A") -- (mult:SI (sign_extend:SI -- (match_operand:HI 1 "register_operand" "%r,r")) -- (sign_extend:SI -- (match_operand:HI 2 "register_operand" "r,r"))))] -- "TARGET_MUL16 || TARGET_MAC16" -- "@ -- mul16s\t%0, %1, %2 -- mul.aa.ll\t%1, %2" -- [(set_attr "type" "mul16,mac16") -- (set_attr "mode" "SI") -- (set_attr "length" "3,3")]) -- --(define_insn "umulhisi3" -+(define_insn "mulhisi3" - [(set (match_operand:SI 0 "register_operand" "=C,A") -- (mult:SI (zero_extend:SI -+ (mult:SI (any_extend:SI - (match_operand:HI 1 "register_operand" "%r,r")) -- (zero_extend:SI -+ (any_extend:SI - (match_operand:HI 2 "register_operand" "r,r"))))] - "TARGET_MUL16 || TARGET_MAC16" - "@ -- mul16u\t%0, %1, %2 -- umul.aa.ll\t%1, %2" -+ mul16\t%0, %1, %2 -+ mul.aa.ll\t%1, %2" - [(set_attr "type" "mul16,mac16") - (set_attr "mode" "SI") - (set_attr "length" "3,3")]) -@@ -429,7 +448,17 @@ - (set_attr "length" "3")]) - - --;; Count leading/trailing zeros and find first bit. -+;; Count redundant leading sign bits and leading/trailing zeros, -+;; and find first bit. -+ -+(define_insn "clrsbsi2" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (clrsb:SI (match_operand:SI 1 "register_operand" "r")))] -+ "TARGET_NSA" -+ "nsa\t%0, %1" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "3")]) - - (define_insn "clzsi2" - [(set (match_operand:SI 0 "register_operand" "=a") -@@ -471,23 +500,78 @@ - - ;; Byte swap. - --(define_insn "bswapsi2" -- [(set (match_operand:SI 0 "register_operand" "=&a") -- (bswap:SI (match_operand:SI 1 "register_operand" "r")))] -- "!optimize_size" -- "ssai\t8\;srli\t%0, %1, 16\;src\t%0, %0, %1\;src\t%0, %0, %0\;src\t%0, %1, %0" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "15")]) -+(define_insn "bswaphi2" -+ [(set (match_operand:HI 0 "register_operand" "=a") -+ (bswap:HI (match_operand:HI 1 "register_operand" "r"))) -+ (clobber (match_scratch:HI 2 "=&a"))] -+ "" -+ "extui\t%2, %1, 8, 8\;slli\t%0, %1, 8\;or\t%0, %0, %2" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "HI") -+ (set_attr "length" "9")]) - --(define_insn "bswapdi2" -- [(set (match_operand:DI 0 "register_operand" "=&a") -- (bswap:DI (match_operand:DI 1 "register_operand" "r")))] -- "!optimize_size" -- "ssai\t8\;srli\t%0, %D1, 16\;src\t%0, %0, %D1\;src\t%0, %0, %0\;src\t%0, %D1, %0\;srli\t%D0, %1, 16\;src\t%D0, %D0, %1\;src\t%D0, %D0, %D0\;src\t%D0, %1, %D0" -- [(set_attr "type" "arith") -- (set_attr "mode" "DI") -- (set_attr "length" "27")]) -+(define_expand "bswapsi2" -+ [(set (match_operand:SI 0 "register_operand" "") -+ (bswap:SI (match_operand:SI 1 "register_operand" "")))] -+ "!optimize_debug && optimize > 1" -+{ -+ /* GIMPLE manual byte-swapping recognition is now activated. -+ For both built-in and manual bswaps, emit corresponding library call -+ if optimizing for size, or a series of dedicated machine instructions -+ if otherwise. */ -+ if (optimize_size) -+ emit_library_call_value (optab_libfunc (bswap_optab, SImode), -+ operands[0], LCT_NORMAL, SImode, -+ operands[1], SImode); -+ else -+ emit_insn (gen_bswapsi2_internal (operands[0], operands[1])); -+ DONE; -+}) -+ -+(define_insn "bswapsi2_internal" -+ [(set (match_operand:SI 0 "register_operand" "=a,&a") -+ (bswap:SI (match_operand:SI 1 "register_operand" "0,r"))) -+ (clobber (match_scratch:SI 2 "=&a,X"))] -+ "!optimize_debug && optimize > 1 && !optimize_size" -+{ -+ rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn); -+ const char *init = "ssai\t8\;"; -+ static char result[64]; -+ if (prev_insn && NONJUMP_INSN_P (prev_insn)) -+ { -+ rtx x = PATTERN (prev_insn); -+ if (GET_CODE (x) == PARALLEL && XVECLEN (x, 0) == 2 -+ && GET_CODE (XVECEXP (x, 0, 0)) == SET -+ && GET_CODE (XVECEXP (x, 0, 1)) == CLOBBER) -+ { -+ x = XEXP (XVECEXP (x, 0, 0), 1); -+ if (GET_CODE (x) == BSWAP && GET_MODE (x) == SImode) -+ init = ""; -+ } -+ } -+ sprintf (result, -+ (which_alternative == 0) -+ ? "%s" "srli\t%%2, %%1, 16\;src\t%%2, %%2, %%1\;src\t%%2, %%2, %%2\;src\t%%0, %%1, %%2" -+ : "%s" "srli\t%%0, %%1, 16\;src\t%%0, %%0, %%1\;src\t%%0, %%0, %%0\;src\t%%0, %%1, %%0", -+ init); -+ return result; -+} -+ [(set_attr "type" "arith,arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "15,15")]) -+ -+(define_expand "bswapdi2" -+ [(set (match_operand:DI 0 "register_operand" "") -+ (bswap:DI (match_operand:DI 1 "register_operand" "")))] -+ "!optimize_debug && optimize > 1 && optimize_size" -+{ -+ /* Replace with a single DImode library call. -+ Without this, two SImode library calls are emitted. */ -+ emit_library_call_value (optab_libfunc (bswap_optab, DImode), -+ operands[0], LCT_NORMAL, DImode, -+ operands[1], DImode); -+ DONE; -+}) - - - ;; Negation and one's complement. -@@ -501,16 +585,26 @@ - (set_attr "mode" "SI") - (set_attr "length" "3")]) - --(define_expand "one_cmplsi2" -- [(set (match_operand:SI 0 "register_operand" "") -- (not:SI (match_operand:SI 1 "register_operand" "")))] -+(define_insn_and_split "one_cmplsi2" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (not:SI (match_operand:SI 1 "register_operand" "r")))] - "" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 2) -+ (const_int -1)) -+ (set (match_dup 0) -+ (xor:SI (match_dup 1) -+ (match_dup 2)))] - { -- rtx temp = gen_reg_rtx (SImode); -- emit_insn (gen_movsi (temp, constm1_rtx)); -- emit_insn (gen_xorsi3 (operands[0], temp, operands[1])); -- DONE; --}) -+ operands[2] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 5) -+ (const_int 6)))]) - - (define_insn "negsf2" - [(set (match_operand:SF 0 "register_operand" "=f") -@@ -536,6 +630,103 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,3")]) - -+(define_insn_and_split "*andsi3_bitcmpl" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (not:SI (match_operand:SI 1 "register_operand" "r")) -+ (match_operand:SI 2 "register_operand" "r")))] -+ "" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 3) -+ (and:SI (match_dup 1) -+ (match_dup 2))) -+ (set (match_dup 0) -+ (xor:SI (match_dup 3) -+ (match_dup 2)))] -+{ -+ operands[3] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*andsi3_const_pow2_minus_one" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "const_int_operand" "i")))] -+ "IN_RANGE (exact_log2 (INTVAL (operands[2]) + 1), 17, 31)" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (ashift:SI (match_dup 1) -+ (match_dup 2))) -+ (set (match_dup 0) -+ (lshiftrt:SI (match_dup 0) -+ (match_dup 2)))] -+{ -+ operands[2] = GEN_INT (32 - floor_log2 (INTVAL (operands[2]) + 1)); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && INTVAL (operands[2]) == 0x7FFFFFFF") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_insn_and_split "*andsi3_const_negative_pow2" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "const_int_operand" "i")))] -+ "IN_RANGE (exact_log2 (-INTVAL (operands[2])), 12, 31)" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (lshiftrt:SI (match_dup 1) -+ (match_dup 2))) -+ (set (match_dup 0) -+ (ashift:SI (match_dup 0) -+ (match_dup 2)))] -+{ -+ operands[2] = GEN_INT (floor_log2 (-INTVAL (operands[2]))); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*andsi3_const_shifted_mask" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "shifted_mask_operand" "i")))] -+ "! xtensa_simm12b (INTVAL (operands[2]))" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (zero_extract:SI (match_dup 1) -+ (match_dup 3) -+ (match_dup 4))) -+ (set (match_dup 0) -+ (ashift:SI (match_dup 0) -+ (match_dup 2)))] -+{ -+ HOST_WIDE_INT mask = INTVAL (operands[2]); -+ int shift = ctz_hwi (mask); -+ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); -+ int mask_pos = shift; -+ if (BITS_BIG_ENDIAN) -+ mask_pos = (32 - (mask_size + shift)) & 0x1f; -+ operands[2] = GEN_INT (shift); -+ operands[3] = GEN_INT (mask_size); -+ operands[4] = GEN_INT (mask_pos); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && ctz_hwi (INTVAL (operands[2])) == 1") -+ (const_int 5) -+ (const_int 6)))]) -+ - (define_insn "iorsi3" - [(set (match_operand:SI 0 "register_operand" "=a") - (ior:SI (match_operand:SI 1 "register_operand" "%r") -@@ -634,7 +825,7 @@ - - ;; Field extract instructions. - --(define_expand "extv" -+(define_expand "extvsi" - [(set (match_operand:SI 0 "register_operand" "") - (sign_extract:SI (match_operand:SI 1 "register_operand" "") - (match_operand:SI 2 "const_int_operand" "") -@@ -649,12 +840,12 @@ - if (!lsbitnum_operand (operands[3], SImode)) - FAIL; - -- emit_insn (gen_extv_internal (operands[0], operands[1], -- operands[2], operands[3])); -+ emit_insn (gen_extvsi_internal (operands[0], operands[1], -+ operands[2], operands[3])); - DONE; - }) - --(define_insn "extv_internal" -+(define_insn "extvsi_internal" - [(set (match_operand:SI 0 "register_operand" "=a") - (sign_extract:SI (match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "sext_fldsz_operand" "i") -@@ -669,7 +860,7 @@ - (set_attr "mode" "SI") - (set_attr "length" "3")]) - --(define_expand "extzv" -+(define_expand "extzvsi" - [(set (match_operand:SI 0 "register_operand" "") - (zero_extract:SI (match_operand:SI 1 "register_operand" "") - (match_operand:SI 2 "const_int_operand" "") -@@ -678,12 +869,12 @@ - { - if (!extui_fldsz_operand (operands[2], SImode)) - FAIL; -- emit_insn (gen_extzv_internal (operands[0], operands[1], -- operands[2], operands[3])); -+ emit_insn (gen_extzvsi_internal (operands[0], operands[1], -+ operands[2], operands[3])); - DONE; - }) - --(define_insn "extzv_internal" -+(define_insn "extzvsi_internal" - [(set (match_operand:SI 0 "register_operand" "=a") - (zero_extract:SI (match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "extui_fldsz_operand" "i") -@@ -757,11 +948,14 @@ - because of offering further optimization opportunities. */ - if (register_operand (operands[0], DImode)) - { -- rtx first, second; -- -- split_double (operands[1], &first, &second); -- emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), first)); -- emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), second)); -+ rtx lowpart, highpart; -+ -+ if (TARGET_BIG_ENDIAN) -+ split_double (operands[1], &highpart, &lowpart); -+ else -+ split_double (operands[1], &lowpart, &highpart); -+ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), lowpart)); -+ emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), highpart)); - DONE; - } - -@@ -782,7 +976,7 @@ - "register_operand (operands[0], DImode) - || register_operand (operands[1], DImode)" - "#" -- "reload_completed" -+ "&& reload_completed" - [(set (match_dup 0) (match_dup 2)) - (set (match_dup 1) (match_dup 3))] - { -@@ -831,6 +1025,19 @@ - (set_attr "mode" "SI") - (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")]) - -+(define_split -+ [(set (match_operand:SI 0 "register_operand") -+ (match_operand:SI 1 "constantpool_operand"))] -+ "! optimize_debug && reload_completed" -+ [(const_int 0)] -+{ -+ rtx x = avoid_constant_pool_reference (operands[1]); -+ if (! CONST_INT_P (x)) -+ FAIL; -+ if (! xtensa_constantsynth (operands[0], INTVAL (x))) -+ emit_move_insn (operands[0], x); -+}) -+ - ;; 16-bit Integer moves - - (define_expand "movhi" -@@ -1035,6 +1242,43 @@ - (set_attr "mode" "SF") - (set_attr "length" "3")]) - -+(define_split -+ [(set (match_operand:SF 0 "register_operand") -+ (match_operand:SF 1 "constantpool_operand"))] -+ "! optimize_debug && reload_completed" -+ [(const_int 0)] -+{ -+ int i = 0; -+ rtx x = XEXP (operands[1], 0); -+ long l[2]; -+ if (GET_CODE (x) == SYMBOL_REF -+ && CONSTANT_POOL_ADDRESS_P (x)) -+ x = get_pool_constant (x); -+ else if (GET_CODE (x) == CONST) -+ { -+ x = XEXP (x, 0); -+ gcc_assert (GET_CODE (x) == PLUS -+ && GET_CODE (XEXP (x, 0)) == SYMBOL_REF -+ && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) -+ && CONST_INT_P (XEXP (x, 1))); -+ i = INTVAL (XEXP (x, 1)); -+ gcc_assert (i == 0 || i == 4); -+ i /= 4; -+ x = get_pool_constant (XEXP (x, 0)); -+ } -+ else -+ gcc_unreachable (); -+ if (GET_MODE (x) == SFmode) -+ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l[0]); -+ else if (GET_MODE (x) == DFmode) -+ REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); -+ else -+ FAIL; -+ x = gen_rtx_REG (SImode, REGNO (operands[0])); -+ if (! xtensa_constantsynth (x, l[i])) -+ emit_move_insn (x, GEN_INT (l[i])); -+}) -+ - ;; 64-bit floating point moves - - (define_expand "movdf" -@@ -1058,7 +1302,7 @@ - "register_operand (operands[0], DFmode) - || register_operand (operands[1], DFmode)" - "#" -- "reload_completed" -+ "&& reload_completed" - [(set (match_dup 0) (match_dup 2)) - (set (match_dup 1) (match_dup 3))] - { -@@ -1085,6 +1329,22 @@ - DONE; - }) - -+;; Block sets -+ -+(define_expand "setmemsi" -+ [(match_operand:BLK 0 "memory_operand") -+ (match_operand:SI 1 "") -+ (match_operand:SI 2 "") -+ (match_operand:SI 3 "const_int_operand")] -+ "!optimize_debug && optimize" -+{ -+ if (xtensa_expand_block_set_unrolled_loop (operands)) -+ DONE; -+ if (xtensa_expand_block_set_small_loop (operands)) -+ DONE; -+ FAIL; -+}) -+ - - ;; Shift instructions. - -@@ -1097,16 +1357,6 @@ - operands[1] = xtensa_copy_incoming_a7 (operands[1]); - }) - --(define_insn "*ashlsi3_1" -- [(set (match_operand:SI 0 "register_operand" "=a") -- (ashift:SI (match_operand:SI 1 "register_operand" "r") -- (const_int 1)))] -- "TARGET_DENSITY" -- "add.n\t%0, %1, %1" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "2")]) -- - (define_insn "ashlsi3_internal" - [(set (match_operand:SI 0 "register_operand" "=a,a") - (ashift:SI (match_operand:SI 1 "register_operand" "r,r") -@@ -1119,16 +1369,14 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,6")]) - --(define_insn "*ashlsi3_3x" -- [(set (match_operand:SI 0 "register_operand" "=a") -- (ashift:SI (match_operand:SI 1 "register_operand" "r") -- (ashift:SI (match_operand:SI 2 "register_operand" "r") -- (const_int 3))))] -- "" -- "ssa8b\t%2\;sll\t%0, %1" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "6")]) -+(define_split -+ [(set (match_operand:SI 0 "register_operand") -+ (ashift:SI (match_operand:SI 1 "register_operand") -+ (const_int 1)))] -+ "TARGET_DENSITY" -+ [(set (match_dup 0) -+ (plus:SI (match_dup 1) -+ (match_dup 1)))]) - - (define_insn "ashrsi3" - [(set (match_operand:SI 0 "register_operand" "=a,a") -@@ -1142,17 +1390,6 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,6")]) - --(define_insn "*ashrsi3_3x" -- [(set (match_operand:SI 0 "register_operand" "=a") -- (ashiftrt:SI (match_operand:SI 1 "register_operand" "r") -- (ashift:SI (match_operand:SI 2 "register_operand" "r") -- (const_int 3))))] -- "" -- "ssa8l\t%2\;sra\t%0, %1" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "6")]) -- - (define_insn "lshrsi3" - [(set (match_operand:SI 0 "register_operand" "=a,a") - (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r") -@@ -1162,9 +1399,9 @@ - if (which_alternative == 0) - { - if ((INTVAL (operands[2]) & 0x1f) < 16) -- return "srli\t%0, %1, %R2"; -+ return "srli\t%0, %1, %R2"; - else -- return "extui\t%0, %1, %R2, %L2"; -+ return "extui\t%0, %1, %R2, %L2"; - } - return "ssr\t%2\;srl\t%0, %1"; - } -@@ -1172,13 +1409,170 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,6")]) - --(define_insn "*lshrsi3_3x" -+(define_insn "*shift_per_byte" - [(set (match_operand:SI 0 "register_operand" "=a") -- (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") -- (ashift:SI (match_operand:SI 2 "register_operand" "r") -- (const_int 3))))] -+ (match_operator:SI 3 "xtensa_shift_per_byte_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3))]))] -+ "!optimize_debug && optimize" -+{ -+ switch (GET_CODE (operands[3])) -+ { -+ case ASHIFT: return "ssa8b\t%2\;sll\t%0, %1"; -+ case ASHIFTRT: return "ssa8l\t%2\;sra\t%0, %1"; -+ case LSHIFTRT: return "ssa8l\t%2\;srl\t%0, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*shift_per_byte_omit_AND_0" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (match_operator:SI 4 "xtensa_shift_per_byte_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3)) -+ (match_operand:SI 3 "const_int_operand" "i"))]))] -+ "!optimize_debug && optimize -+ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (match_op_dup 4 -+ [(match_dup 1) -+ (ashift:SI (match_dup 2) -+ (const_int 3))]))] -+ "" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*shift_per_byte_omit_AND_1" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (match_operator:SI 4 "xtensa_shift_per_byte_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (neg:SI (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3)) -+ (match_operand:SI 3 "const_int_operand" "i")))]))] -+ "!optimize_debug && optimize -+ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 5) -+ (neg:SI (match_dup 2))) -+ (set (match_dup 0) -+ (match_op_dup 4 -+ [(match_dup 1) -+ (ashift:SI (match_dup 5) -+ (const_int 3))]))] -+{ -+ operands[5] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "9")]) -+ -+(define_insn "*shlrd_reg_" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "register_operand" "r")]) -+ (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 3 "register_operand" "r") -+ (neg:SI (match_dup 2))])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" -+{ -+ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) -+ { -+ case ASHIFT: return "ssl\t%2\;src\t%0, %1, %3"; -+ case LSHIFTRT: return "ssr\t%2\;src\t%0, %3, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn "*shlrd_const_" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 3 "const_int_operand" "i")]) -+ (match_operator:SI 6 "logical_shift_operator" -+ [(match_operand:SI 2 "register_operand" "r") -+ (match_operand:SI 4 "const_int_operand" "i")])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN -+ && IN_RANGE (INTVAL (operands[3]), 1, 31) -+ && IN_RANGE (INTVAL (operands[4]), 1, 31) -+ && INTVAL (operands[3]) + INTVAL (operands[4]) == 32" -+{ -+ switch (xtensa_shlrd_which_direction (operands[5], operands[6])) -+ { -+ case ASHIFT: return "ssai\t%L3\;src\t%0, %1, %2"; -+ case LSHIFTRT: return "ssai\t%R3\;src\t%0, %2, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn "*shlrd_per_byte_" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3))]) -+ (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 3 "register_operand" "r") -+ (neg:SI (ashift:SI (match_dup 2) -+ (const_int 3)))])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" -+{ -+ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) -+ { -+ case ASHIFT: return "ssa8b\t%2\;src\t%0, %1, %3"; -+ case LSHIFTRT: return "ssa8l\t%2\;src\t%0, %3, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*shlrd_per_byte__omit_AND" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3)) -+ (match_operand:SI 4 "const_int_operand" "i"))]) -+ (match_operator:SI 6 "logical_shift_operator" -+ [(match_operand:SI 3 "register_operand" "r") -+ (neg:SI (and:SI (ashift:SI (match_dup 2) -+ (const_int 3)) -+ (match_dup 4)))])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN -+ && (INTVAL (operands[4]) & 0x1f) == 3 << 3" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (ior_op:SI (match_op_dup 5 -+ [(match_dup 1) -+ (ashift:SI (match_dup 2) -+ (const_int 3))]) -+ (match_op_dup 6 -+ [(match_dup 3) -+ (neg:SI (ashift:SI (match_dup 2) -+ (const_int 3)))])))] - "" -- "ssa8l\t%2\;srl\t%0, %1" - [(set_attr "type" "arith") - (set_attr "mode" "SI") - (set_attr "length" "6")]) -@@ -1239,28 +1633,13 @@ - (define_insn "*btrue" - [(set (pc) - (if_then_else (match_operator 3 "branch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "branch_operand" "K,r")]) -+ [(match_operand:SI 0 "register_operand" "r,r") -+ (match_operand:SI 1 "branch_operand" "K,r")]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { -- return xtensa_emit_branch (false, which_alternative == 0, operands); --} -- [(set_attr "type" "jump,jump") -- (set_attr "mode" "none") -- (set_attr "length" "3,3")]) -- --(define_insn "*bfalse" -- [(set (pc) -- (if_then_else (match_operator 3 "branch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "branch_operand" "K,r")]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -- "" --{ -- return xtensa_emit_branch (true, which_alternative == 0, operands); -+ return xtensa_emit_branch (which_alternative == 0, operands); - } - [(set_attr "type" "jump,jump") - (set_attr "mode" "none") -@@ -1269,28 +1648,13 @@ - (define_insn "*ubtrue" - [(set (pc) - (if_then_else (match_operator 3 "ubranch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "ubranch_operand" "L,r")]) -+ [(match_operand:SI 0 "register_operand" "r,r") -+ (match_operand:SI 1 "ubranch_operand" "L,r")]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { -- return xtensa_emit_branch (false, which_alternative == 0, operands); --} -- [(set_attr "type" "jump,jump") -- (set_attr "mode" "none") -- (set_attr "length" "3,3")]) -- --(define_insn "*ubfalse" -- [(set (pc) -- (if_then_else (match_operator 3 "ubranch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "ubranch_operand" "L,r")]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -- "" --{ -- return xtensa_emit_branch (true, which_alternative == 0, operands); -+ return xtensa_emit_branch (which_alternative == 0, operands); - } - [(set_attr "type" "jump,jump") - (set_attr "mode" "none") -@@ -1301,80 +1665,178 @@ - (define_insn "*bittrue" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(zero_extract:SI -- (match_operand:SI 0 "register_operand" "r,r") -- (const_int 1) -- (match_operand:SI 1 "arith_operand" "J,r")) -+ [(zero_extract:SI (match_operand:SI 0 "register_operand" "r,r") -+ (const_int 1) -+ (match_operand:SI 1 "arith_operand" "J,r")) - (const_int 0)]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { -- return xtensa_emit_bit_branch (false, which_alternative == 0, operands); -+ static char result[64]; -+ char op; -+ switch (GET_CODE (operands[3])) -+ { -+ case EQ: op = 'c'; break; -+ case NE: op = 's'; break; -+ default: gcc_unreachable (); -+ } -+ if (which_alternative == 0) -+ { -+ operands[1] = GEN_INT (INTVAL (operands[1]) & 0x1f); -+ sprintf (result, "bb%ci\t%%0, %%d1, %%2", op); -+ } -+ else -+ sprintf (result, "bb%c\t%%0, %%1, %%2", op); -+ return result; - } - [(set_attr "type" "jump") - (set_attr "mode" "none") - (set_attr "length" "3")]) - --(define_insn "*bitfalse" -+(define_insn "*masktrue" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(zero_extract:SI -- (match_operand:SI 0 "register_operand" "r,r") -- (const_int 1) -- (match_operand:SI 1 "arith_operand" "J,r")) -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "register_operand" "r")) - (const_int 0)]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -+ (label_ref (match_operand 2 "" "")) -+ (pc)))] - "" - { -- return xtensa_emit_bit_branch (true, which_alternative == 0, operands); -+ switch (GET_CODE (operands[3])) -+ { -+ case EQ: return "bnone\t%0, %1, %2"; -+ case NE: return "bany\t%0, %1, %2"; -+ default: gcc_unreachable (); -+ } - } - [(set_attr "type" "jump") - (set_attr "mode" "none") - (set_attr "length" "3")]) - --(define_insn "*masktrue" -+(define_insn "*masktrue_bitcmpl" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(and:SI (match_operand:SI 0 "register_operand" "r") -- (match_operand:SI 1 "register_operand" "r")) -- (const_int 0)]) -+ [(and:SI (not:SI (match_operand:SI 0 "register_operand" "r")) -+ (match_operand:SI 1 "register_operand" "r")) -+ (const_int 0)]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { - switch (GET_CODE (operands[3])) - { -- case EQ: return "bnone\t%0, %1, %2"; -- case NE: return "bany\t%0, %1, %2"; -- default: gcc_unreachable (); -+ case EQ: return "ball\t%0, %1, %2"; -+ case NE: return "bnall\t%0, %1, %2"; -+ default: gcc_unreachable (); - } - } - [(set_attr "type" "jump") - (set_attr "mode" "none") - (set_attr "length" "3")]) - --(define_insn "*maskfalse" -+(define_insn_and_split "*masktrue_const_pow2_minus_one" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(and:SI (match_operand:SI 0 "register_operand" "r") -- (match_operand:SI 1 "register_operand" "r")) -- (const_int 0)]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -- "" -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "const_int_operand" "i")) -+ (const_int 0)]) -+ (label_ref (match_operand 2 "" "")) -+ (pc)))] -+ "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31)" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 4) -+ (ashift:SI (match_dup 0) -+ (match_dup 1))) -+ (set (pc) -+ (if_then_else (match_op_dup 3 -+ [(match_dup 4) -+ (const_int 0)]) -+ (label_ref (match_dup 2)) -+ (pc)))] - { -- switch (GET_CODE (operands[3])) -- { -- case EQ: return "bany\t%0, %1, %2"; -- case NE: return "bnone\t%0, %1, %2"; -- default: gcc_unreachable (); -- } -+ operands[1] = GEN_INT (32 - floor_log2 (INTVAL (operands[1]) + 1)); -+ operands[4] = gen_reg_rtx (SImode); - } - [(set_attr "type" "jump") - (set_attr "mode" "none") -- (set_attr "length" "3")]) -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && INTVAL (operands[1]) == 0x7FFFFFFF") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_insn_and_split "*masktrue_const_negative_pow2" -+ [(set (pc) -+ (if_then_else (match_operator 3 "boolean_operator" -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "const_int_operand" "i")) -+ (const_int 0)]) -+ (label_ref (match_operand 2 "" "")) -+ (pc)))] -+ "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 12, 30)" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 4) -+ (lshiftrt:SI (match_dup 0) -+ (match_dup 1))) -+ (set (pc) -+ (if_then_else (match_op_dup 3 -+ [(match_dup 4) -+ (const_int 0)]) -+ (label_ref (match_dup 2)) -+ (pc)))] -+{ -+ operands[1] = GEN_INT (floor_log2 (-INTVAL (operands[1]))); -+ operands[4] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "jump") -+ (set_attr "mode" "none") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*masktrue_const_shifted_mask" -+ [(set (pc) -+ (if_then_else (match_operator 4 "boolean_operator" -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "shifted_mask_operand" "i")) -+ (match_operand:SI 2 "const_int_operand" "i")]) -+ (label_ref (match_operand 3 "" "")) -+ (pc)))] -+ "(INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 -+ && xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 6) -+ (zero_extract:SI (match_dup 0) -+ (match_dup 5) -+ (match_dup 1))) -+ (set (pc) -+ (if_then_else (match_op_dup 4 -+ [(match_dup 6) -+ (match_dup 2)]) -+ (label_ref (match_dup 3)) -+ (pc)))] -+{ -+ HOST_WIDE_INT mask = INTVAL (operands[1]); -+ int shift = ctz_hwi (mask); -+ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); -+ int mask_pos = shift; -+ if (BITS_BIG_ENDIAN) -+ mask_pos = (32 - (mask_size + shift)) & 0x1f; -+ operands[1] = GEN_INT (mask_pos); -+ operands[2] = GEN_INT ((uint32_t)INTVAL (operands[2]) >> shift); -+ operands[5] = GEN_INT (mask_size); -+ operands[6] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "jump") -+ (set_attr "mode" "none") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && (uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])) == 0") -+ (const_int 5) -+ (const_int 6)))]) - - - ;; Zero-overhead looping support. -@@ -1696,18 +2158,13 @@ - (match_operand 1 "" ""))] - "" - { -- rtx addr = XEXP (operands[0], 0); -- if (flag_pic && GET_CODE (addr) == SYMBOL_REF -- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) -- addr = gen_sym_PLT (addr); -- if (!call_insn_operand (addr, VOIDmode)) -- XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr); -+ xtensa_prepare_expand_call (0, operands); - }) - - (define_insn "call_internal" - [(call (mem (match_operand:SI 0 "call_insn_operand" "nir")) - (match_operand 1 "" "i"))] -- "" -+ "!SIBLING_CALL_P (insn)" - { - return xtensa_emit_call (0, operands); - } -@@ -1721,19 +2178,14 @@ - (match_operand 2 "" "")))] - "" - { -- rtx addr = XEXP (operands[1], 0); -- if (flag_pic && GET_CODE (addr) == SYMBOL_REF -- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) -- addr = gen_sym_PLT (addr); -- if (!call_insn_operand (addr, VOIDmode)) -- XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr); -+ xtensa_prepare_expand_call (1, operands); - }) - - (define_insn "call_value_internal" - [(set (match_operand 0 "register_operand" "=a") - (call (mem (match_operand:SI 1 "call_insn_operand" "nir")) - (match_operand 2 "" "i")))] -- "" -+ "!SIBLING_CALL_P (insn)" - { - return xtensa_emit_call (1, operands); - } -@@ -1741,6 +2193,70 @@ - (set_attr "mode" "none") - (set_attr "length" "3")]) - -+(define_expand "sibcall" -+ [(call (match_operand 0 "memory_operand" "") -+ (match_operand 1 "" ""))] -+ "!TARGET_WINDOWED_ABI" -+{ -+ xtensa_prepare_expand_call (0, operands); -+}) -+ -+(define_insn "sibcall_internal" -+ [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nir")) -+ (match_operand 1 "" "i"))] -+ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" -+{ -+ return xtensa_emit_sibcall (0, operands); -+} -+ [(set_attr "type" "call") -+ (set_attr "mode" "none") -+ (set_attr "length" "3")]) -+ -+(define_split -+ [(call (mem:SI (match_operand:SI 0 "register_operand")) -+ (match_operand 1 ""))] -+ "reload_completed -+ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) -+ && IN_RANGE (REGNO (operands[0]), 12, 15)" -+ [(set (reg:SI A10_REG) -+ (match_dup 0)) -+ (call (mem:SI (reg:SI A10_REG)) -+ (match_dup 1))]) -+ -+(define_expand "sibcall_value" -+ [(set (match_operand 0 "register_operand" "") -+ (call (match_operand 1 "memory_operand" "") -+ (match_operand 2 "" "")))] -+ "!TARGET_WINDOWED_ABI" -+{ -+ xtensa_prepare_expand_call (1, operands); -+}) -+ -+(define_insn "sibcall_value_internal" -+ [(set (match_operand 0 "register_operand" "=a") -+ (call (mem:SI (match_operand:SI 1 "call_insn_operand" "nir")) -+ (match_operand 2 "" "i")))] -+ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" -+{ -+ return xtensa_emit_sibcall (1, operands); -+} -+ [(set_attr "type" "call") -+ (set_attr "mode" "none") -+ (set_attr "length" "3")]) -+ -+(define_split -+ [(set (match_operand 0 "register_operand") -+ (call (mem:SI (match_operand:SI 1 "register_operand")) -+ (match_operand 2 "")))] -+ "reload_completed -+ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) -+ && IN_RANGE (REGNO (operands[1]), 12, 15)" -+ [(set (reg:SI A10_REG) -+ (match_dup 1)) -+ (set (match_dup 0) -+ (call (mem:SI (reg:SI A10_REG)) -+ (match_dup 2)))]) -+ - (define_insn "entry" - [(set (reg:SI A1_REG) - (unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")] -@@ -1762,7 +2278,10 @@ - } - [(set_attr "type" "jump") - (set_attr "mode" "none") -- (set_attr "length" "2")]) -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 2) -+ (const_int 3)))]) - - - ;; Miscellaneous instructions. -@@ -1805,7 +2324,15 @@ - [(return)] - "" - { -- xtensa_expand_epilogue (); -+ xtensa_expand_epilogue (false); -+ DONE; -+}) -+ -+(define_expand "sibcall_epilogue" -+ [(return)] -+ "!TARGET_WINDOWED_ABI" -+{ -+ xtensa_expand_epilogue (true); - DONE; - }) - -@@ -1817,7 +2344,10 @@ - } - [(set_attr "type" "nop") - (set_attr "mode" "none") -- (set_attr "length" "3")]) -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 2) -+ (const_int 3)))]) - - (define_expand "nonlocal_goto" - [(match_operand:SI 0 "general_operand" "") -@@ -1881,8 +2411,9 @@ - [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] - "" - "" -- [(set_attr "length" "0") -- (set_attr "type" "nop")]) -+ [(set_attr "type" "nop") -+ (set_attr "mode" "none") -+ (set_attr "length" "0")]) - - ;; Do not schedule instructions accessing memory before this point. - -@@ -1901,7 +2432,9 @@ - (unspec:BLK [(match_operand:SI 1 "" "")] UNSPEC_FRAME_BLOCKAGE))] - "" - "" -- [(set_attr "length" "0")]) -+ [(set_attr "type" "nop") -+ (set_attr "mode" "none") -+ (set_attr "length" "0")]) - - (define_insn "trap" - [(trap_if (const_int 1) (const_int 0))] -@@ -1914,7 +2447,10 @@ - } - [(set_attr "type" "trap") - (set_attr "mode" "none") -- (set_attr "length" "3")]) -+ (set (attr "length") -+ (if_then_else (match_test "!TARGET_DEBUG && TARGET_DENSITY") -+ (const_int 2) -+ (const_int 3)))]) - - ;; Setting up a frame pointer is tricky for Xtensa because GCC doesn't - ;; know if a frame pointer is required until the reload pass, and -@@ -2177,3 +2713,103 @@ - xtensa_expand_atomic (, operands[0], operands[1], operands[2], true); - DONE; - }) -+ -+(define_insn_and_split "*round_up_to_even" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r") -+ (const_int 1)) -+ (const_int -2)))] -+ "" -+ "#" -+ "can_create_pseudo_p ()" -+ [(set (match_dup 2) -+ (and:SI (match_dup 1) -+ (const_int 1))) -+ (set (match_dup 0) -+ (plus:SI (match_dup 2) -+ (match_dup 1)))] -+{ -+ operands[2] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_insn_and_split "*signed_ge_zero" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ge:SI (match_operand:SI 1 "register_operand" "r") -+ (const_int 0)))] -+ "" -+ "#" -+ "" -+ [(set (match_dup 0) -+ (ashiftrt:SI (match_dup 1) -+ (const_int 31))) -+ (set (match_dup 0) -+ (plus:SI (match_dup 0) -+ (const_int 1)))] -+ "" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_peephole2 -+ [(set (match_operand:SI 0 "register_operand") -+ (match_operand:SI 6 "reload_operand")) -+ (set (match_operand:SI 1 "register_operand") -+ (match_operand:SI 7 "reload_operand")) -+ (set (match_operand:SF 2 "register_operand") -+ (match_operand:SF 4 "register_operand")) -+ (set (match_operand:SF 3 "register_operand") -+ (match_operand:SF 5 "register_operand"))] -+ "REGNO (operands[0]) == REGNO (operands[4]) -+ && REGNO (operands[1]) == REGNO (operands[5]) -+ && peep2_reg_dead_p (4, operands[0]) -+ && peep2_reg_dead_p (4, operands[1])" -+ [(set (match_dup 2) -+ (match_dup 6)) -+ (set (match_dup 3) -+ (match_dup 7))] -+{ -+ uint32_t check = 0; -+ int i; -+ for (i = 0; i <= 3; ++i) -+ { -+ uint32_t mask = (uint32_t)1 << REGNO (operands[i]); -+ if (check & mask) -+ FAIL; -+ check |= mask; -+ } -+ operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0)); -+ operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0)); -+}) -+ -+(define_split -+ [(clobber (match_operand:DSC 0 "register_operand"))] -+ "GP_REG_P (REGNO (operands[0]))" -+ [(const_int 0)] -+{ -+ unsigned int regno = REGNO (operands[0]); -+ machine_mode inner_mode = GET_MODE_INNER (mode); -+ rtx_insn *insn; -+ rtx x; -+ if (! ((insn = next_nonnote_nondebug_insn (curr_insn)) -+ && NONJUMP_INSN_P (insn) -+ && GET_CODE (x = PATTERN (insn)) == SET -+ && REG_P (x = XEXP (x, 0)) -+ && GET_MODE (x) == inner_mode -+ && REGNO (x) == regno -+ && (insn = next_nonnote_nondebug_insn (insn)) -+ && NONJUMP_INSN_P (insn) -+ && GET_CODE (x = PATTERN (insn)) == SET -+ && REG_P (x = XEXP (x, 0)) -+ && GET_MODE (x) == inner_mode -+ && REGNO (x) == regno + REG_NREGS (operands[0]) / 2)) -+ FAIL; -+}) -diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt -index aef67970b..97aa44f92 100644 ---- a/gcc/config/xtensa/xtensa.opt -+++ b/gcc/config/xtensa/xtensa.opt -@@ -27,9 +27,13 @@ Target Report Mask(FORCE_NO_PIC) - Disable position-independent code (PIC) for use in OS kernel code. - - mlongcalls --Target -+Target Mask(LONGCALLS) - Use indirect CALLXn instructions for large programs. - -+mextra-l32r-costs= -+Target RejectNegative Joined UInteger Var(xtensa_extra_l32r_costs) Init(0) -+Set extra memory access cost for L32R instruction, in clock-cycle units. -+ - mtarget-align - Target - Automatically align branch targets to reduce branch penalties. -diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi -index eabeec944..c35f51afb 100644 ---- a/gcc/doc/invoke.texi -+++ b/gcc/doc/invoke.texi -@@ -1385,7 +1385,8 @@ See RS/6000 and PowerPC Options. - -mtext-section-literals -mno-text-section-literals @gol - -mauto-litpools -mno-auto-litpools @gol - -mtarget-align -mno-target-align @gol ---mlongcalls -mno-longcalls} -+-mlongcalls -mno-longcalls @gol -+-mextra-l32r-costs=@var{cycles}} - - @emph{zSeries Options} - See S/390 and zSeries Options. -@@ -30519,6 +30520,14 @@ assembly code generated by GCC still shows direct call - instructions---look at the disassembled object code to see the actual - instructions. Note that the assembler uses an indirect call for - every cross-file call, not just those that really are out of range. -+ -+@item -mextra-l32r-costs=@var{n} -+@opindex mextra-l32r-costs -+Specify an extra cost of instruction RAM/ROM access for @code{L32R} -+instructions, in clock cycles. This affects, when optimizing for speed, -+whether loading a constant from literal pool using @code{L32R} or -+synthesizing the constant from a small one with a couple of arithmetic -+instructions. The default value is 0. - @end table - - @node zSeries Options -diff --git a/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c -new file mode 100644 -index 000000000..ba61c6f37 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O" } */ -+ -+extern void foo(void); -+ -+void BNONE_test(int a, int b) -+{ -+ if (a & b) -+ foo(); -+} -+ -+void BANY_test(int a, int b) -+{ -+ if (!(a & b)) -+ foo(); -+} -+ -+void BALL_test(int a, int b) -+{ -+ if (~a & b) -+ foo(); -+} -+ -+void BNALL_test(int a, int b) -+{ -+ if (!(~a & b)) -+ foo(); -+} -+ -+/* { dg-final { scan-assembler-times "bnone" 1 } } */ -+/* { dg-final { scan-assembler-times "bany" 1 } } */ -+/* { dg-final { scan-assembler-times "ball" 1 } } */ -+/* { dg-final { scan-assembler-times "bnall" 1 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O1.c b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c -new file mode 100644 -index 000000000..a0c885baa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O1" } */ -+ -+unsigned int test_0(unsigned int a) -+{ -+ return (a & 0x000000FF) << 24 | -+ (a & 0x0000FF00) << 8 | -+ (a & 0x00FF0000) >> 8 | -+ (a & 0xFF000000) >> 24; -+} -+ -+unsigned int test_1(unsigned int a) -+{ -+ union -+ { -+ unsigned int i; -+ unsigned char a[4]; -+ } u, v; -+ u.i = a; -+ v.a[0] = u.a[3]; -+ v.a[1] = u.a[2]; -+ v.a[2] = u.a[1]; -+ v.a[3] = u.a[0]; -+ return v.i; -+} -+ -+unsigned int test_2(unsigned int a) -+{ -+ return __builtin_bswap32(a); -+} -+ -+unsigned long long test_3(unsigned long long a) -+{ -+ return __builtin_bswap64(a); -+} -+ -+/* { dg-final { scan-assembler-times "call" 2 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O2.c b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c -new file mode 100644 -index 000000000..4cf95b925 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+unsigned int test_0(unsigned int a) -+{ -+ return (a & 0x000000FF) << 24 | -+ (a & 0x0000FF00) << 8 | -+ (a & 0x00FF0000) >> 8 | -+ (a & 0xFF000000) >> 24; -+} -+ -+unsigned int test_1(unsigned int a) -+{ -+ union -+ { -+ unsigned int i; -+ unsigned char a[4]; -+ } u, v; -+ u.i = a; -+ v.a[0] = u.a[3]; -+ v.a[1] = u.a[2]; -+ v.a[2] = u.a[1]; -+ v.a[3] = u.a[0]; -+ return v.i; -+} -+ -+unsigned int test_2(unsigned int a) -+{ -+ return __builtin_bswap32(a); -+} -+ -+unsigned long long test_3(unsigned long long a) -+{ -+ return __builtin_bswap64(a); -+} -+ -+/* { dg-final { scan-assembler-times "ssai" 4 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-Os.c b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c -new file mode 100644 -index 000000000..1e010fd62 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Os" } */ -+ -+unsigned int test_0(unsigned int a) -+{ -+ return (a & 0x000000FF) << 24 | -+ (a & 0x0000FF00) << 8 | -+ (a & 0x00FF0000) >> 8 | -+ (a & 0xFF000000) >> 24; -+} -+ -+unsigned int test_1(unsigned int a) -+{ -+ union -+ { -+ unsigned int i; -+ unsigned char a[4]; -+ } u, v; -+ u.i = a; -+ v.a[0] = u.a[3]; -+ v.a[1] = u.a[2]; -+ v.a[2] = u.a[1]; -+ v.a[3] = u.a[0]; -+ return v.i; -+} -+ -+unsigned int test_2(unsigned int a) -+{ -+ return __builtin_bswap32(a); -+} -+ -+unsigned long long test_3(unsigned long long a) -+{ -+ return __builtin_bswap64(a); -+} -+ -+/* { dg-final { scan-assembler-times "call" 4 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c -new file mode 100644 -index 000000000..6a04aaeef ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O" } */ -+ -+int check_zero_byte(int v) -+{ -+ return (v - 0x01010101) & ~v & 0x80808080; -+} -+ -+/* { dg-final { scan-assembler-not "movi" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c -new file mode 100644 -index 000000000..ec2606ed1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c -@@ -0,0 +1,44 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Os } */ -+ -+int test_0(void) -+{ -+ return 4095; -+} -+ -+int test_1(void) -+{ -+ return 2147483647; -+} -+ -+int test_2(void) -+{ -+ return -34816; -+} -+ -+int test_3(void) -+{ -+ return -2049; -+} -+ -+int test_4(void) -+{ -+ return 2048; -+} -+ -+int test_5(void) -+{ -+ return 34559; -+} -+ -+int test_6(void) -+{ -+ return 43680; -+} -+ -+void test_7(int *p) -+{ -+ *p = -1432354816; -+} -+ -+/* { dg-final { scan-assembler-not "l32r" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c -new file mode 100644 -index 000000000..f3c4a1c7c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c -@@ -0,0 +1,24 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mextra-l32r-costs=3" } */ -+ -+int test_0(void) -+{ -+ return 134217216; -+} -+ -+int test_1(void) -+{ -+ return -27604992; -+} -+ -+int test_2(void) -+{ -+ return -162279; -+} -+ -+void test_3(int *p) -+{ -+ *p = 192437; -+} -+ -+/* { dg-final { scan-assembler-not "l32r" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c -new file mode 100644 -index 000000000..11e5d5242 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Os } */ -+ -+void test(unsigned int count, double array[]) -+{ -+ unsigned int i; -+ for (i = 0; i < count; ++i) -+ array[i] = 1.0; -+} -+ -+/* { dg-final { scan-assembler-not "l32r" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c -new file mode 100644 -index 000000000..c8f987ccd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+unsigned int test_0(const void *addr) -+{ -+ unsigned int n = (unsigned int)addr; -+ const unsigned int *a = (const unsigned int*)(n & ~3); -+ n = (n & 3) * 8; -+ return (a[0] >> n) | (a[1] << (32 - n)); -+} -+ -+unsigned int test_1(unsigned int a, unsigned int b) -+{ -+ return (a >> 16) + (b << 16); -+} -+ -+/* { dg-final { scan-assembler-times "src" 2 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c -new file mode 100644 -index 000000000..608f65fd7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O1" } */ -+ -+int one_cmpl_abs(int a) -+{ -+ return a < 0 ? ~a : a; -+} -+ -+/* { dg-final { scan-assembler-not "bgez" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/sibcalls.c b/gcc/testsuite/gcc.target/xtensa/sibcalls.c -new file mode 100644 -index 000000000..7a4018796 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/sibcalls.c -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -foptimize-sibling-calls" } */ -+ -+extern int foo(int); -+extern void bar(int); -+ -+int test_0(int a) { -+ return foo(a); -+} -+ -+void test_1(int a) { -+ bar(a); -+} -+ -+int test_2(int (*a)(void)) { -+ bar(0); -+ return a(); -+} -+ -+/* { dg-final { scan-assembler-not "ret" } } */ -diff --git a/libgcc/config/xtensa/lib1funcs.S b/libgcc/config/xtensa/lib1funcs.S -index b19deae14..ad9072c40 100644 ---- a/libgcc/config/xtensa/lib1funcs.S -+++ b/libgcc/config/xtensa/lib1funcs.S -@@ -456,6 +456,29 @@ __nsau_data: - #endif /* L_clz */ - - -+#ifdef L_clrsbsi2 -+ .align 4 -+ .global __clrsbsi2 -+ .type __clrsbsi2, @function -+__clrsbsi2: -+ leaf_entry sp, 16 -+#if XCHAL_HAVE_NSA -+ nsa a2, a2 -+#else -+ srai a3, a2, 31 -+ xor a3, a3, a2 -+ movi a2, 31 -+ beqz a3, .Lreturn -+ do_nsau a2, a3, a4, a5 -+ addi a2, a2, -1 -+.Lreturn: -+#endif -+ leaf_return -+ .size __clrsbsi2, . - __clrsbsi2 -+ -+#endif /* L_clrsbsi2 */ -+ -+ - #ifdef L_clzsi2 - .align 4 - .global __clzsi2 -diff --git a/libgcc/config/xtensa/t-xtensa b/libgcc/config/xtensa/t-xtensa -index 9836c96ae..084618b38 100644 ---- a/libgcc/config/xtensa/t-xtensa -+++ b/libgcc/config/xtensa/t-xtensa -@@ -1,6 +1,6 @@ - LIB1ASMSRC = xtensa/lib1funcs.S - LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \ -- _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \ -+ _umulsidi3 _clz _clrsbsi2 _clzsi2 _ctzsi2 _ffssi2 \ - _ashldi3 _ashrdi3 _lshrdi3 \ - _bswapsi2 _bswapdi2 \ - _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \ --- -2.20.1 - diff --git a/patches/gcc10.1/gcc-xtensa-0007-fix-Wformat-diag-warnings.patch b/patches/gcc10.1/gcc-xtensa-0007-fix-Wformat-diag-warnings.patch new file mode 100644 index 0000000..5aebddc --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0007-fix-Wformat-diag-warnings.patch @@ -0,0 +1,48 @@ +From 76ee6b24125c885150e5b493b26b594801998b74 Mon Sep 17 00:00:00 2001 +From: Martin Liska +Date: Tue, 18 Jan 2022 14:51:40 +0100 +Subject: [PATCH 02/31] xtensa: fix -Wformat-diag warnings. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (print_operand): Fix warnings. + (print_operand_address): Likewise. + (xtensa_multibss_section_type_flags): Likewise. +--- + gcc/config/xtensa/xtensa.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 37c6ac1fd..b1dbe8520 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -2379,7 +2379,7 @@ void + print_operand (FILE *file, rtx x, int letter) + { + if (!x) +- error ("PRINT_OPERAND null pointer"); ++ error ("% null pointer"); + + switch (letter) + { +@@ -2584,7 +2584,7 @@ void + print_operand_address (FILE *file, rtx addr) + { + if (!addr) +- error ("PRINT_OPERAND_ADDRESS, null pointer"); ++ error ("%, null pointer"); + + switch (GET_CODE (addr)) + { +@@ -3697,7 +3697,7 @@ xtensa_multibss_section_type_flags (tree decl, const char *name, int reloc) + flags |= SECTION_BSS; /* @nobits */ + else + warning (0, "only uninitialized variables can be placed in a " +- ".bss section"); ++ "%<.bss%> section"); + } + + return flags; +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0008-Rename-deprecated-extv-extzv-insn-patterns-to.patch b/patches/gcc10.1/gcc-xtensa-0008-Rename-deprecated-extv-extzv-insn-patterns-to.patch new file mode 100644 index 0000000..46260ef --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0008-Rename-deprecated-extv-extzv-insn-patterns-to.patch @@ -0,0 +1,74 @@ +From b5b9fd01c4db135893c44e82a9f33c2411e993d0 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 6 May 2022 19:34:06 +0900 +Subject: [PATCH 03/31] xtensa: Rename deprecated extv/extzv insn patterns to + extvsi/extzvsi + +These patterns were deprecated since GCC 4.8. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (extvsi, extvsi_internal, extzvsi, + extzvsi_internal): Rename from extv, extv_internal, extzv and + extzv_internal, respectively. +--- + gcc/config/xtensa/xtensa.md | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 123916957..251c313d5 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -634,7 +634,7 @@ + + ;; Field extract instructions. + +-(define_expand "extv" ++(define_expand "extvsi" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "") +@@ -649,12 +649,12 @@ + if (!lsbitnum_operand (operands[3], SImode)) + FAIL; + +- emit_insn (gen_extv_internal (operands[0], operands[1], +- operands[2], operands[3])); ++ emit_insn (gen_extvsi_internal (operands[0], operands[1], ++ operands[2], operands[3])); + DONE; + }) + +-(define_insn "extv_internal" ++(define_insn "extvsi_internal" + [(set (match_operand:SI 0 "register_operand" "=a") + (sign_extract:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "sext_fldsz_operand" "i") +@@ -669,7 +669,7 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +-(define_expand "extzv" ++(define_expand "extzvsi" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extract:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "") +@@ -678,12 +678,12 @@ + { + if (!extui_fldsz_operand (operands[2], SImode)) + FAIL; +- emit_insn (gen_extzv_internal (operands[0], operands[1], +- operands[2], operands[3])); ++ emit_insn (gen_extzvsi_internal (operands[0], operands[1], ++ operands[2], operands[3])); + DONE; + }) + +-(define_insn "extzv_internal" ++(define_insn "extzvsi_internal" + [(set (match_operand:SI 0 "register_operand" "=a") + (zero_extract:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "extui_fldsz_operand" "i") +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0009-Reflect-the-32-bit-Integer-Divide-Option.patch b/patches/gcc10.1/gcc-xtensa-0009-Reflect-the-32-bit-Integer-Divide-Option.patch new file mode 100644 index 0000000..607367c --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0009-Reflect-the-32-bit-Integer-Divide-Option.patch @@ -0,0 +1,41 @@ +From 12fa0b13b6f0c52e5c4d75f39822771a7f780f94 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 6 May 2022 19:34:19 +0900 +Subject: [PATCH 04/31] xtensa: Reflect the 32-bit Integer Divide Option + +On Espressif's ESP8266 (based on Tensilica LX106, no hardware divider), +this patch reduces the size of each: + + __moddi3() @ libgcc.a : 969 -> 301 (saves 668) + __divmoddi4() : 1111 -> 426 (saves 685) + __udivmoddi4() : 1043 -> 319 (saves 724) + +in bytes, respectively. + +gcc/ChangeLog: + + * config/xtensa/xtensa.h (TARGET_HAS_NO_HW_DIVIDE): New macro + definition. +--- + gcc/config/xtensa/xtensa.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index fa86a245e..5b102de51 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -74,6 +74,11 @@ along with GCC; see the file COPYING3. If not see + #define HAVE_AS_TLS 0 + #endif + ++/* Define this if the target has no hardware divide instructions. */ ++#if !TARGET_DIV32 ++#define TARGET_HAS_NO_HW_DIVIDE ++#endif ++ + + /* Target CPU builtins. */ + #define TARGET_CPU_CPP_BUILTINS() \ +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0010-Simplify-EXTUI-instruction-maskimm-validation.patch b/patches/gcc10.1/gcc-xtensa-0010-Simplify-EXTUI-instruction-maskimm-validation.patch new file mode 100644 index 0000000..8d257cd --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0010-Simplify-EXTUI-instruction-maskimm-validation.patch @@ -0,0 +1,78 @@ +From 49383c9381a937b360adeb14f5e7bd4472f7c386 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:26:30 +0900 +Subject: [PATCH 05/31] xtensa: Simplify EXTUI instruction maskimm validations + +No functional changes. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (extui_fldsz_operand): Simplify. + * config/xtensa/xtensa.c (xtensa_mask_immediate, print_operand): + Ditto. +--- + gcc/config/xtensa/predicates.md | 2 +- + gcc/config/xtensa/xtensa.c | 24 +++--------------------- + 2 files changed, 4 insertions(+), 22 deletions(-) + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index eb52b05aa..3f84859b6 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -55,7 +55,7 @@ + + (define_predicate "extui_fldsz_operand" + (and (match_code "const_int") +- (match_test "xtensa_mask_immediate ((1 << INTVAL (op)) - 1)"))) ++ (match_test "IN_RANGE (INTVAL (op), 1, 16)"))) + + (define_predicate "sext_operand" + (if_then_else (match_test "TARGET_SEXT") +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index b1dbe8520..4043f40ce 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -446,19 +446,7 @@ xtensa_b4constu (HOST_WIDE_INT v) + bool + xtensa_mask_immediate (HOST_WIDE_INT v) + { +-#define MAX_MASK_SIZE 16 +- int mask_size; +- +- for (mask_size = 1; mask_size <= MAX_MASK_SIZE; mask_size++) +- { +- if ((v & 1) == 0) +- return false; +- v = v >> 1; +- if (v == 0) +- return true; +- } +- +- return false; ++ return IN_RANGE (exact_log2 (v + 1), 1, 16); + } + + +@@ -2424,17 +2412,11 @@ print_operand (FILE *file, rtx x, int letter) + case 'K': + if (GET_CODE (x) == CONST_INT) + { +- int num_bits = 0; + unsigned val = INTVAL (x); +- while (val & 1) +- { +- num_bits += 1; +- val = val >> 1; +- } +- if ((val != 0) || (num_bits == 0) || (num_bits > 16)) ++ if (!xtensa_mask_immediate (val)) + fatal_insn ("invalid mask", x); + +- fprintf (file, "%d", num_bits); ++ fprintf (file, "%d", floor_log2 (val + 1)); + } + else + output_operand_lossage ("invalid %%K value"); +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0011-Make-use-of-IN_RANGE-macro-where-appropriate.patch b/patches/gcc10.1/gcc-xtensa-0011-Make-use-of-IN_RANGE-macro-where-appropriate.patch new file mode 100644 index 0000000..419ebfe --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0011-Make-use-of-IN_RANGE-macro-where-appropriate.patch @@ -0,0 +1,174 @@ +From fa7073ff572c248896057a5a7841a3e1d98380ad Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:27:36 +0900 +Subject: [PATCH 06/31] xtensa: Make use of IN_RANGE macro where appropriate + +No functional changes. + +gcc/ChangeLog: + + * config/xtensa/constraints.md (M, O): Use the macro. + * config/xtensa/predicates.md (addsubx_operand, extui_fldsz_operand, + sext_fldsz_operand): Ditto. + * config/xtensa/xtensa.c (xtensa_simm8, xtensa_simm8x256, + xtensa_simm12b, xtensa_uimm8, xtensa_uimm8x2, xtensa_uimm8x4, + xtensa_mask_immediate, smalloffset_mem_p, printx, xtensa_call_save_reg, + xtensa_expand_prologue): Ditto. + * config/xtensa/xtensa.h (FUNCTION_ARG_REGNO_P): Ditto. +--- + gcc/config/xtensa/constraints.md | 4 ++-- + gcc/config/xtensa/predicates.md | 5 ++--- + gcc/config/xtensa/xtensa.c | 20 ++++++++++---------- + gcc/config/xtensa/xtensa.h | 2 +- + 4 files changed, 15 insertions(+), 16 deletions(-) + +diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md +index 2062c8816..9a8caab4f 100644 +--- a/gcc/config/xtensa/constraints.md ++++ b/gcc/config/xtensa/constraints.md +@@ -92,7 +92,7 @@ + "An integer constant in the range @minus{}32-95 for use with MOVI.N + instructions." + (and (match_code "const_int") +- (match_test "ival >= -32 && ival <= 95"))) ++ (match_test "IN_RANGE (ival, -32, 95)"))) + + (define_constraint "N" + "An unsigned 8-bit integer constant shifted left by 8 bits for use +@@ -103,7 +103,7 @@ + (define_constraint "O" + "An integer constant that can be used in ADDI.N instructions." + (and (match_code "const_int") +- (match_test "ival == -1 || (ival >= 1 && ival <= 15)"))) ++ (match_test "ival == -1 || IN_RANGE (ival, 1, 15)"))) + + (define_constraint "P" + "An integer constant that can be used as a mask value in an EXTUI +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index 3f84859b6..91b9343a2 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -25,8 +25,7 @@ + + (define_predicate "addsubx_operand" + (and (match_code "const_int") +- (match_test "INTVAL (op) >= 1 +- && INTVAL (op) <= 3"))) ++ (match_test "IN_RANGE (INTVAL (op), 1, 3)"))) + + (define_predicate "arith_operand" + (ior (and (match_code "const_int") +@@ -64,7 +63,7 @@ + + (define_predicate "sext_fldsz_operand" + (and (match_code "const_int") +- (match_test "INTVAL (op) >= 8 && INTVAL (op) <= 23"))) ++ (match_test "IN_RANGE (INTVAL (op), 8, 23)"))) + + (define_predicate "lsbitnum_operand" + (and (match_code "const_int") +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 4043f40ce..02dc5799a 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -341,42 +341,42 @@ struct gcc_target targetm = TARGET_INITIALIZER; + bool + xtensa_simm8 (HOST_WIDE_INT v) + { +- return v >= -128 && v <= 127; ++ return IN_RANGE (v, -128, 127); + } + + + bool + xtensa_simm8x256 (HOST_WIDE_INT v) + { +- return (v & 255) == 0 && (v >= -32768 && v <= 32512); ++ return (v & 255) == 0 && IN_RANGE (v, -32768, 32512); + } + + + bool + xtensa_simm12b (HOST_WIDE_INT v) + { +- return v >= -2048 && v <= 2047; ++ return IN_RANGE (v, -2048, 2047); + } + + + static bool + xtensa_uimm8 (HOST_WIDE_INT v) + { +- return v >= 0 && v <= 255; ++ return IN_RANGE (v, 0, 255); + } + + + static bool + xtensa_uimm8x2 (HOST_WIDE_INT v) + { +- return (v & 1) == 0 && (v >= 0 && v <= 510); ++ return (v & 1) == 0 && IN_RANGE (v, 0, 510); + } + + + static bool + xtensa_uimm8x4 (HOST_WIDE_INT v) + { +- return (v & 3) == 0 && (v >= 0 && v <= 1020); ++ return (v & 3) == 0 && IN_RANGE (v, 0, 1020); + } + + +@@ -527,7 +527,7 @@ smalloffset_mem_p (rtx op) + return FALSE; + + val = INTVAL (offset); +- return (val & 3) == 0 && (val >= 0 && val <= 60); ++ return (val & 3) == 0 && IN_RANGE (val, 0, 60); + } + } + return FALSE; +@@ -2352,7 +2352,7 @@ static void + printx (FILE *file, signed int val) + { + /* Print a hexadecimal value in a nice way. */ +- if ((val > -0xa) && (val < 0xa)) ++ if (IN_RANGE (val, -9, 9)) + fprintf (file, "%d", val); + else if (val < 0) + fprintf (file, "-0x%x", -val); +@@ -2732,7 +2732,7 @@ xtensa_call_save_reg(int regno) + return crtl->profile || !crtl->is_leaf || crtl->calls_eh_return || + df_regs_ever_live_p (regno); + +- if (crtl->calls_eh_return && regno >= 2 && regno < 4) ++ if (crtl->calls_eh_return && IN_RANGE (regno, 2, 3)) + return true; + + return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno); +@@ -2852,7 +2852,7 @@ xtensa_expand_prologue (void) + int callee_save_size = cfun->machine->callee_save_size; + + /* -128 is a limit of single addi instruction. */ +- if (total_size > 0 && total_size <= 128) ++ if (IN_RANGE (total_size, 1, 128)) + { + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-total_size))); +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index 5b102de51..3e9cbc943 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -493,7 +493,7 @@ enum reg_class + used for this purpose since all function arguments are pushed on + the stack. */ + #define FUNCTION_ARG_REGNO_P(N) \ +- ((N) >= GP_OUTGOING_ARG_FIRST && (N) <= GP_OUTGOING_ARG_LAST) ++ IN_RANGE ((N), GP_OUTGOING_ARG_FIRST, GP_OUTGOING_ARG_LAST) + + /* Record the number of argument words seen so far, along with a flag to + indicate whether these are incoming arguments. (FUNCTION_INCOMING_ARG +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0012-Fix-instruction-counting-regarding-block-move.patch b/patches/gcc10.1/gcc-xtensa-0012-Fix-instruction-counting-regarding-block-move.patch new file mode 100644 index 0000000..dae4a21 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0012-Fix-instruction-counting-regarding-block-move.patch @@ -0,0 +1,54 @@ +From 5cda5b41a7646d220f7351226b5da78955b0fc7f Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:29:22 +0900 +Subject: [PATCH 07/31] xtensa: Fix instruction counting regarding block move + expansion + +This patch makes counting the number of instructions of the remainder +(modulo 4) part more accurate. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_expand_block_move): + Make instruction counting more accurate, and simplify emitting insns. +--- + gcc/config/xtensa/xtensa.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 02dc5799a..0fe8b73ad 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1303,7 +1303,7 @@ xtensa_expand_block_move (rtx *operands) + move_ratio = 4; + if (optimize > 2) + move_ratio = LARGEST_MOVE_RATIO; +- num_pieces = (bytes / align) + (bytes % align); /* Close enough anyway. */ ++ num_pieces = (bytes / align) + ((bytes % align + 1) / 2); + if (num_pieces > move_ratio) + return 0; + +@@ -1340,7 +1340,7 @@ xtensa_expand_block_move (rtx *operands) + temp[next] = gen_reg_rtx (mode[next]); + + x = adjust_address (src_mem, mode[next], offset_ld); +- emit_insn (gen_rtx_SET (temp[next], x)); ++ emit_move_insn (temp[next], x); + + offset_ld += next_amount; + bytes -= next_amount; +@@ -1350,9 +1350,9 @@ xtensa_expand_block_move (rtx *operands) + if (active[phase]) + { + active[phase] = false; +- ++ + x = adjust_address (dst_mem, mode[phase], offset_st); +- emit_insn (gen_rtx_SET (x, temp[phase])); ++ emit_move_insn (x, temp[phase]); + + offset_st += amount[phase]; + } +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0013-Add-setmemsi-insn-pattern.patch b/patches/gcc10.1/gcc-xtensa-0013-Add-setmemsi-insn-pattern.patch new file mode 100644 index 0000000..a7212ce --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0013-Add-setmemsi-insn-pattern.patch @@ -0,0 +1,303 @@ +From 02572a935a2cbabc96387289300fb78d61dde555 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 24 May 2022 00:52:44 +0900 +Subject: [PATCH 08/31] xtensa: Add setmemsi insn pattern + +This patch introduces setmemsi insn pattern of two kinds, unrolled loop and +small loop, for fixed small length and constant initialization value. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h + (xtensa_expand_block_set_unrolled_loop, + xtensa_expand_block_set_small_loop): New prototypes. + * config/xtensa/xtensa.c (xtensa_sizeof_MOVI, + xtensa_expand_block_set_unrolled_loop, + xtensa_expand_block_set_small_loop): New functions. + * config/xtensa/xtensa.md (setmemsi): New expansion pattern. + * config/xtensa/xtensa.opt (mlongcalls): Add target mask. +--- + gcc/config/xtensa/xtensa-protos.h | 2 + + gcc/config/xtensa/xtensa.c | 211 ++++++++++++++++++++++++++++++ + gcc/config/xtensa/xtensa.md | 16 +++ + gcc/config/xtensa/xtensa.opt | 2 +- + 4 files changed, 230 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 18d803581..80b1da2bb 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -41,6 +41,8 @@ extern void xtensa_expand_conditional_branch (rtx *, machine_mode); + extern int xtensa_expand_conditional_move (rtx *, int); + extern int xtensa_expand_scc (rtx *, machine_mode); + extern int xtensa_expand_block_move (rtx *); ++extern int xtensa_expand_block_set_unrolled_loop (rtx *); ++extern int xtensa_expand_block_set_small_loop (rtx *); + extern void xtensa_split_operand_pair (rtx *, machine_mode); + extern int xtensa_emit_move_sequence (rtx *, machine_mode); + extern rtx xtensa_copy_incoming_a7 (rtx); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 0fe8b73ad..a6d76a953 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1363,6 +1363,217 @@ xtensa_expand_block_move (rtx *operands) + } + + ++/* Try to expand a block set operation to a sequence of RTL move ++ instructions. If not optimizing, or if the block size is not a ++ constant, or if the block is too large, or if the value to ++ initialize the block with is not a constant, the expansion ++ fails and GCC falls back to calling memset(). ++ ++ operands[0] is the destination ++ operands[1] is the length ++ operands[2] is the initialization value ++ operands[3] is the alignment */ ++ ++static int ++xtensa_sizeof_MOVI (HOST_WIDE_INT imm) ++{ ++ return (TARGET_DENSITY && IN_RANGE (imm, -32, 95)) ? 2 : 3; ++} ++ ++int ++xtensa_expand_block_set_unrolled_loop (rtx *operands) ++{ ++ rtx dst_mem = operands[0]; ++ HOST_WIDE_INT bytes, value, align; ++ int expand_len, funccall_len; ++ rtx x, reg; ++ int offset; ++ ++ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) ++ return 0; ++ ++ bytes = INTVAL (operands[1]); ++ if (bytes <= 0) ++ return 0; ++ value = (int8_t)INTVAL (operands[2]); ++ align = INTVAL (operands[3]); ++ if (align > MOVE_MAX) ++ align = MOVE_MAX; ++ ++ /* Insn expansion: holding the init value. ++ Either MOV(.N) or L32R w/litpool. */ ++ if (align == 1) ++ expand_len = xtensa_sizeof_MOVI (value); ++ else if (value == 0 || value == -1) ++ expand_len = TARGET_DENSITY ? 2 : 3; ++ else ++ expand_len = 3 + 4; ++ /* Insn expansion: a series of aligned memory stores. ++ Consist of S8I, S16I or S32I(.N). */ ++ expand_len += (bytes / align) * (TARGET_DENSITY ++ && align == 4 ? 2 : 3); ++ /* Insn expansion: the remainder, sub-aligned memory stores. ++ A combination of S8I and S16I as needed. */ ++ expand_len += ((bytes % align + 1) / 2) * 3; ++ ++ /* Function call: preparing two arguments. */ ++ funccall_len = xtensa_sizeof_MOVI (value); ++ funccall_len += xtensa_sizeof_MOVI (bytes); ++ /* Function call: calling memset(). */ ++ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; ++ ++ /* Apply expansion bonus (2x) if optimizing for speed. */ ++ if (optimize > 1 && !optimize_size) ++ funccall_len *= 2; ++ ++ /* Decide whether to expand or not, based on the sum of the length ++ of instructions. */ ++ if (expand_len > funccall_len) ++ return 0; ++ ++ x = XEXP (dst_mem, 0); ++ if (!REG_P (x)) ++ dst_mem = replace_equiv_address (dst_mem, force_reg (Pmode, x)); ++ switch (align) ++ { ++ case 1: ++ break; ++ case 2: ++ value = (int16_t)((uint8_t)value * 0x0101U); ++ break; ++ case 4: ++ value = (int32_t)((uint8_t)value * 0x01010101U); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ reg = force_reg (SImode, GEN_INT (value)); ++ ++ offset = 0; ++ do ++ { ++ int unit_size = MIN (bytes, align); ++ machine_mode unit_mode = (unit_size >= 4 ? SImode : ++ (unit_size >= 2 ? HImode : ++ QImode)); ++ unit_size = GET_MODE_SIZE (unit_mode); ++ ++ emit_move_insn (adjust_address (dst_mem, unit_mode, offset), ++ unit_mode == SImode ? reg ++ : convert_to_mode (unit_mode, reg, true)); ++ ++ offset += unit_size; ++ bytes -= unit_size; ++ } ++ while (bytes > 0); ++ ++ return 1; ++} ++ ++int ++xtensa_expand_block_set_small_loop (rtx *operands) ++{ ++ HOST_WIDE_INT bytes, value, align; ++ int expand_len, funccall_len; ++ rtx x, dst, end, reg; ++ machine_mode unit_mode; ++ rtx_code_label *label; ++ ++ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) ++ return 0; ++ ++ bytes = INTVAL (operands[1]); ++ if (bytes <= 0) ++ return 0; ++ value = (int8_t)INTVAL (operands[2]); ++ align = INTVAL (operands[3]); ++ if (align > MOVE_MAX) ++ align = MOVE_MAX; ++ ++ /* Totally-aligned block only. */ ++ if (bytes % align != 0) ++ return 0; ++ ++ /* If 4-byte aligned, small loop substitution is almost optimal, thus ++ limited to only offset to the end address for ADDI/ADDMI instruction. */ ++ if (align == 4 ++ && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) ++ return 0; ++ ++ /* If no 4-byte aligned, loop count should be treated as the constraint. */ ++ if (align != 4 ++ && bytes / align > ((optimize > 1 && !optimize_size) ? 8 : 15)) ++ return 0; ++ ++ /* Insn expansion: holding the init value. ++ Either MOV(.N) or L32R w/litpool. */ ++ if (align == 1) ++ expand_len = xtensa_sizeof_MOVI (value); ++ else if (value == 0 || value == -1) ++ expand_len = TARGET_DENSITY ? 2 : 3; ++ else ++ expand_len = 3 + 4; ++ /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ ++ expand_len += bytes > 127 ? 3 ++ : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; ++ ++ /* Insn expansion: the loop body and branch instruction. ++ For store, one of S8I, S16I or S32I(.N). ++ For advance, ADDI(.N). ++ For branch, BNE. */ ++ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) ++ + (TARGET_DENSITY ? 2 : 3) + 3; ++ ++ /* Function call: preparing two arguments. */ ++ funccall_len = xtensa_sizeof_MOVI (value); ++ funccall_len += xtensa_sizeof_MOVI (bytes); ++ /* Function call: calling memset(). */ ++ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; ++ ++ /* Apply expansion bonus (2x) if optimizing for speed. */ ++ if (optimize > 1 && !optimize_size) ++ funccall_len *= 2; ++ ++ /* Decide whether to expand or not, based on the sum of the length ++ of instructions. */ ++ if (expand_len > funccall_len) ++ return 0; ++ ++ x = XEXP (operands[0], 0); ++ if (!REG_P (x)) ++ x = XEXP (replace_equiv_address (operands[0], force_reg (Pmode, x)), 0); ++ dst = gen_reg_rtx (SImode); ++ emit_move_insn (dst, x); ++ end = gen_reg_rtx (SImode); ++ emit_insn (gen_addsi3 (end, dst, operands[1] /* the length */)); ++ switch (align) ++ { ++ case 1: ++ unit_mode = QImode; ++ break; ++ case 2: ++ value = (int16_t)((uint8_t)value * 0x0101U); ++ unit_mode = HImode; ++ break; ++ case 4: ++ value = (int32_t)((uint8_t)value * 0x01010101U); ++ unit_mode = SImode; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ reg = force_reg (unit_mode, GEN_INT (value)); ++ ++ label = gen_label_rtx (); ++ emit_label (label); ++ emit_move_insn (gen_rtx_MEM (unit_mode, dst), reg); ++ emit_insn (gen_addsi3 (dst, dst, GEN_INT (align))); ++ emit_cmp_and_jump_insns (dst, end, NE, const0_rtx, SImode, true, label); ++ ++ return 1; ++} ++ ++ + void + xtensa_expand_nonlocal_goto (rtx *operands) + { +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 251c313d5..9eb689efa 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1085,6 +1085,22 @@ + DONE; + }) + ++;; Block sets ++ ++(define_expand "setmemsi" ++ [(match_operand:BLK 0 "memory_operand") ++ (match_operand:SI 1 "") ++ (match_operand:SI 2 "") ++ (match_operand:SI 3 "const_int_operand")] ++ "!optimize_debug && optimize" ++{ ++ if (xtensa_expand_block_set_unrolled_loop (operands)) ++ DONE; ++ if (xtensa_expand_block_set_small_loop (operands)) ++ DONE; ++ FAIL; ++}) ++ + + ;; Shift instructions. + +diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt +index aef67970b..e1d992f5d 100644 +--- a/gcc/config/xtensa/xtensa.opt ++++ b/gcc/config/xtensa/xtensa.opt +@@ -27,7 +27,7 @@ Target Report Mask(FORCE_NO_PIC) + Disable position-independent code (PIC) for use in OS kernel code. + + mlongcalls +-Target ++Target Mask(LONGCALLS) + Use indirect CALLXn instructions for large programs. + + mtarget-align +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0014-Improve-bswap-sd-i2-insn-patterns.patch b/patches/gcc10.1/gcc-xtensa-0014-Improve-bswap-sd-i2-insn-patterns.patch new file mode 100644 index 0000000..a5fb6f1 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0014-Improve-bswap-sd-i2-insn-patterns.patch @@ -0,0 +1,254 @@ +From be1ca3aa6e9754ed16d1b7a60657912af02844da Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:33:59 +0900 +Subject: [PATCH 09/31] xtensa: Improve bswap[sd]i2 insn patterns + +This patch makes bswap[sd]i2 better register allocation, and reconstructs +bswapsi2 in order to take advantage of GIMPLE manual byte-swapping +recognition. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (bswapsi2): New expansion pattern. + (bswapsi2_internal): Revise the template and condition, and add + detection code for preceding the same insn in order to omit a + "SSAI 8" instruction of the latter. + (bswapdi2): Suppress built-in insn expansion with the corresponding + library call when optimizing for size. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/bswap-O1.c: New. + * gcc.target/xtensa/bswap-O2.c: Ditto. + * gcc.target/xtensa/bswap-Os.c: Ditto. +--- + gcc/config/xtensa/xtensa.md | 77 +++++++++++++++++----- + gcc/testsuite/gcc.target/xtensa/bswap-O1.c | 37 +++++++++++ + gcc/testsuite/gcc.target/xtensa/bswap-O2.c | 37 +++++++++++ + gcc/testsuite/gcc.target/xtensa/bswap-Os.c | 37 +++++++++++ + 4 files changed, 172 insertions(+), 16 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O1.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O2.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-Os.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 9eb689efa..cea280061 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -471,23 +471,68 @@ + + ;; Byte swap. + +-(define_insn "bswapsi2" +- [(set (match_operand:SI 0 "register_operand" "=&a") +- (bswap:SI (match_operand:SI 1 "register_operand" "r")))] +- "!optimize_size" +- "ssai\t8\;srli\t%0, %1, 16\;src\t%0, %0, %1\;src\t%0, %0, %0\;src\t%0, %1, %0" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "15")]) ++(define_expand "bswapsi2" ++ [(set (match_operand:SI 0 "register_operand" "") ++ (bswap:SI (match_operand:SI 1 "register_operand" "")))] ++ "!optimize_debug && optimize > 1" ++{ ++ /* GIMPLE manual byte-swapping recognition is now activated. ++ For both built-in and manual bswaps, emit corresponding library call ++ if optimizing for size, or a series of dedicated machine instructions ++ if otherwise. */ ++ if (optimize_size) ++ emit_library_call_value (optab_libfunc (bswap_optab, SImode), ++ operands[0], LCT_NORMAL, SImode, ++ operands[1], SImode); ++ else ++ emit_insn (gen_bswapsi2_internal (operands[0], operands[1])); ++ DONE; ++}) + +-(define_insn "bswapdi2" +- [(set (match_operand:DI 0 "register_operand" "=&a") +- (bswap:DI (match_operand:DI 1 "register_operand" "r")))] +- "!optimize_size" +- "ssai\t8\;srli\t%0, %D1, 16\;src\t%0, %0, %D1\;src\t%0, %0, %0\;src\t%0, %D1, %0\;srli\t%D0, %1, 16\;src\t%D0, %D0, %1\;src\t%D0, %D0, %D0\;src\t%D0, %1, %D0" +- [(set_attr "type" "arith") +- (set_attr "mode" "DI") +- (set_attr "length" "27")]) ++(define_insn "bswapsi2_internal" ++ [(set (match_operand:SI 0 "register_operand" "=a,&a") ++ (bswap:SI (match_operand:SI 1 "register_operand" "0,r"))) ++ (clobber (match_scratch:SI 2 "=&a,X"))] ++ "!optimize_debug && optimize > 1 && !optimize_size" ++{ ++ rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn); ++ const char *init = "ssai\t8\;"; ++ static char result[64]; ++ if (prev_insn && NONJUMP_INSN_P (prev_insn)) ++ { ++ rtx x = PATTERN (prev_insn); ++ if (GET_CODE (x) == PARALLEL && XVECLEN (x, 0) == 2 ++ && GET_CODE (XVECEXP (x, 0, 0)) == SET ++ && GET_CODE (XVECEXP (x, 0, 1)) == CLOBBER) ++ { ++ x = XEXP (XVECEXP (x, 0, 0), 1); ++ if (GET_CODE (x) == BSWAP && GET_MODE (x) == SImode) ++ init = ""; ++ } ++ } ++ sprintf (result, ++ (which_alternative == 0) ++ ? "%s" "srli\t%%2, %%1, 16\;src\t%%2, %%2, %%1\;src\t%%2, %%2, %%2\;src\t%%0, %%1, %%2" ++ : "%s" "srli\t%%0, %%1, 16\;src\t%%0, %%0, %%1\;src\t%%0, %%0, %%0\;src\t%%0, %%1, %%0", ++ init); ++ return result; ++} ++ [(set_attr "type" "arith,arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "15,15")]) ++ ++(define_expand "bswapdi2" ++ [(set (match_operand:DI 0 "register_operand" "") ++ (bswap:DI (match_operand:DI 1 "register_operand" "")))] ++ "!optimize_debug && optimize > 1 && optimize_size" ++{ ++ /* Replace with a single DImode library call. ++ Without this, two SImode library calls are emitted. */ ++ emit_library_call_value (optab_libfunc (bswap_optab, DImode), ++ operands[0], LCT_NORMAL, DImode, ++ operands[1], DImode); ++ DONE; ++}) + + + ;; Negation and one's complement. +diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O1.c b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c +new file mode 100644 +index 000000000..a0c885baa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++unsigned int test_0(unsigned int a) ++{ ++ return (a & 0x000000FF) << 24 | ++ (a & 0x0000FF00) << 8 | ++ (a & 0x00FF0000) >> 8 | ++ (a & 0xFF000000) >> 24; ++} ++ ++unsigned int test_1(unsigned int a) ++{ ++ union ++ { ++ unsigned int i; ++ unsigned char a[4]; ++ } u, v; ++ u.i = a; ++ v.a[0] = u.a[3]; ++ v.a[1] = u.a[2]; ++ v.a[2] = u.a[1]; ++ v.a[3] = u.a[0]; ++ return v.i; ++} ++ ++unsigned int test_2(unsigned int a) ++{ ++ return __builtin_bswap32(a); ++} ++ ++unsigned long long test_3(unsigned long long a) ++{ ++ return __builtin_bswap64(a); ++} ++ ++/* { dg-final { scan-assembler-times "call" 2 } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O2.c b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c +new file mode 100644 +index 000000000..4cf95b925 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++unsigned int test_0(unsigned int a) ++{ ++ return (a & 0x000000FF) << 24 | ++ (a & 0x0000FF00) << 8 | ++ (a & 0x00FF0000) >> 8 | ++ (a & 0xFF000000) >> 24; ++} ++ ++unsigned int test_1(unsigned int a) ++{ ++ union ++ { ++ unsigned int i; ++ unsigned char a[4]; ++ } u, v; ++ u.i = a; ++ v.a[0] = u.a[3]; ++ v.a[1] = u.a[2]; ++ v.a[2] = u.a[1]; ++ v.a[3] = u.a[0]; ++ return v.i; ++} ++ ++unsigned int test_2(unsigned int a) ++{ ++ return __builtin_bswap32(a); ++} ++ ++unsigned long long test_3(unsigned long long a) ++{ ++ return __builtin_bswap64(a); ++} ++ ++/* { dg-final { scan-assembler-times "ssai" 4 } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-Os.c b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c +new file mode 100644 +index 000000000..1e010fd62 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Os" } */ ++ ++unsigned int test_0(unsigned int a) ++{ ++ return (a & 0x000000FF) << 24 | ++ (a & 0x0000FF00) << 8 | ++ (a & 0x00FF0000) >> 8 | ++ (a & 0xFF000000) >> 24; ++} ++ ++unsigned int test_1(unsigned int a) ++{ ++ union ++ { ++ unsigned int i; ++ unsigned char a[4]; ++ } u, v; ++ u.i = a; ++ v.a[0] = u.a[3]; ++ v.a[1] = u.a[2]; ++ v.a[2] = u.a[1]; ++ v.a[3] = u.a[0]; ++ return v.i; ++} ++ ++unsigned int test_2(unsigned int a) ++{ ++ return __builtin_bswap32(a); ++} ++ ++unsigned long long test_3(unsigned long long a) ++{ ++ return __builtin_bswap64(a); ++} ++ ++/* { dg-final { scan-assembler-times "call" 4 } } */ +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0015-fix-PR-target-105879.patch b/patches/gcc10.1/gcc-xtensa-0015-fix-PR-target-105879.patch new file mode 100644 index 0000000..2c21f47 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0015-fix-PR-target-105879.patch @@ -0,0 +1,48 @@ +From 1848b547a6ac69a002d068239a5bc9463f3fae25 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Tue, 7 Jun 2022 21:01:01 -0700 +Subject: [PATCH 10/31] gcc: xtensa: fix PR target/105879 + +split_double operates with the 'word that comes first in memory in the +target' terminology, while gen_lowpart operates with the 'value +representing some low-order bits of X' terminology. They are not +equivalent and must be dealt with differently on little- and big-endian +targets. + +gcc/ + PR target/105879 + * config/xtensa/xtensa.md (movdi): Rename 'first' and 'second' + to 'lowpart' and 'highpart' so that they match 'gen_lowpart' and + 'gen_highpart' bitwise semantics and fix order of highpart and + lowpart depending on target endianness. +--- + gcc/config/xtensa/xtensa.md | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index cea280061..30d8ef96c 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -802,11 +802,14 @@ + because of offering further optimization opportunities. */ + if (register_operand (operands[0], DImode)) + { +- rtx first, second; +- +- split_double (operands[1], &first, &second); +- emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), first)); +- emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), second)); ++ rtx lowpart, highpart; ++ ++ if (TARGET_BIG_ENDIAN) ++ split_double (operands[1], &highpart, &lowpart); ++ else ++ split_double (operands[1], &lowpart, &highpart); ++ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), lowpart)); ++ emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), highpart)); + DONE; + } + +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0016-Implement-bswaphi2-insn-pattern.patch b/patches/gcc10.1/gcc-xtensa-0016-Implement-bswaphi2-insn-pattern.patch new file mode 100644 index 0000000..3a31e62 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0016-Implement-bswaphi2-insn-pattern.patch @@ -0,0 +1,39 @@ +From f47a902c9a94d2e9df879de4613dae62c8e9cc4f Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:44:32 +0900 +Subject: [PATCH 11/31] xtensa: Implement bswaphi2 insn pattern + +This patch adds bswaphi2 insn pattern that is one instruction less than the +default expansion. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (bswaphi2): New insn pattern. +--- + gcc/config/xtensa/xtensa.md | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 30d8ef96c..c1f44777d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -471,6 +471,16 @@ + + ;; Byte swap. + ++(define_insn "bswaphi2" ++ [(set (match_operand:HI 0 "register_operand" "=a") ++ (bswap:HI (match_operand:HI 1 "register_operand" "r"))) ++ (clobber (match_scratch:HI 2 "=&a"))] ++ "" ++ "extui\t%2, %1, 8, 8\;slli\t%0, %1, 8\;or\t%0, %0, %2" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "HI") ++ (set_attr "length" "9")]) ++ + (define_expand "bswapsi2" + [(set (match_operand:SI 0 "register_operand" "") + (bswap:SI (match_operand:SI 1 "register_operand" "")))] +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0017-Make-one_cmplsi2-optimizer-friendly.patch b/patches/gcc10.1/gcc-xtensa-0017-Make-one_cmplsi2-optimizer-friendly.patch new file mode 100644 index 0000000..017a30f --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0017-Make-one_cmplsi2-optimizer-friendly.patch @@ -0,0 +1,86 @@ +From 22b5756399ef63a4102334724b12a4c186075227 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:46:16 +0900 +Subject: [PATCH 12/31] xtensa: Make one_cmplsi2 optimizer-friendly + +In Xtensa ISA, there is no single machine instruction that calculates unary +bitwise negation. But a few optimizers assume that bitwise negation can be +done by a single insn. + +As a result, '((x < 0) ? ~x : x)' cannot be optimized to '(x ^ (x >> 31))' +ever before, for example. + +This patch relaxes such limitation, by putting the insn expansion off till +the split pass. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (one_cmplsi2): + Rearrange as an insn_and_split pattern. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/one_cmpl_abs.c: New. +--- + gcc/config/xtensa/xtensa.md | 26 +++++++++++++------ + .../gcc.target/xtensa/one_cmpl_abs.c | 9 +++++++ + 2 files changed, 27 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index c1f44777d..2f6d48d03 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -556,16 +556,26 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +-(define_expand "one_cmplsi2" +- [(set (match_operand:SI 0 "register_operand" "") +- (not:SI (match_operand:SI 1 "register_operand" "")))] ++(define_insn_and_split "one_cmplsi2" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (not:SI (match_operand:SI 1 "register_operand" "r")))] + "" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 2) ++ (const_int -1)) ++ (set (match_dup 0) ++ (xor:SI (match_dup 1) ++ (match_dup 2)))] + { +- rtx temp = gen_reg_rtx (SImode); +- emit_insn (gen_movsi (temp, constm1_rtx)); +- emit_insn (gen_xorsi3 (operands[0], temp, operands[1])); +- DONE; +-}) ++ operands[2] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 5) ++ (const_int 6)))]) + + (define_insn "negsf2" + [(set (match_operand:SF 0 "register_operand" "=f") +diff --git a/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c +new file mode 100644 +index 000000000..608f65fd7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++int one_cmpl_abs(int a) ++{ ++ return a < 0 ? ~a : a; ++} ++ ++/* { dg-final { scan-assembler-not "bgez" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0018-Optimize-x-y-to-x-y-y.patch b/patches/gcc10.1/gcc-xtensa-0018-Optimize-x-y-to-x-y-y.patch new file mode 100644 index 0000000..d1167a1 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0018-Optimize-x-y-to-x-y-y.patch @@ -0,0 +1,71 @@ +From cc259b2801c8d04c39169214041305fdd5b87acd Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:55:44 +0900 +Subject: [PATCH 13/31] xtensa: Optimize '(~x & y)' to '((x & y) ^ y)' + +In Xtensa ISA, there is no single machine instruction that calculates unary +bitwise negation. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*andsi3_bitcmpl): + New insn_and_split pattern. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/check_zero_byte.c: New. +--- + gcc/config/xtensa/xtensa.md | 20 +++++++++++++++++++ + .../gcc.target/xtensa/check_zero_byte.c | 9 +++++++++ + 2 files changed, 29 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/xtensa/check_zero_byte.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2f6d48d03..28ed1d34e 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -601,6 +601,26 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,3")]) + ++(define_insn_and_split "*andsi3_bitcmpl" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (not:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "r")))] ++ "" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 3) ++ (and:SI (match_dup 1) ++ (match_dup 2))) ++ (set (match_dup 0) ++ (xor:SI (match_dup 3) ++ (match_dup 2)))] ++{ ++ operands[3] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ + (define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "=a") + (ior:SI (match_operand:SI 1 "register_operand" "%r") +diff --git a/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c +new file mode 100644 +index 000000000..6a04aaeef +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O" } */ ++ ++int check_zero_byte(int v) ++{ ++ return (v - 0x01010101) & ~v & 0x80808080; ++} ++ ++/* { dg-final { scan-assembler-not "movi" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0019-Add-clrsbsi2-insn-pattern.patch b/patches/gcc10.1/gcc-xtensa-0019-Add-clrsbsi2-insn-pattern.patch new file mode 100644 index 0000000..ebaa985 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0019-Add-clrsbsi2-insn-pattern.patch @@ -0,0 +1,98 @@ +From ebd48d915076589f04b5c1ed50f9f5ddfae088e8 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:57:35 +0900 +Subject: [PATCH 14/31] xtensa: Add clrsbsi2 insn pattern + +> (clrsb:m x) +> Represents the number of redundant leading sign bits in x, represented +> as an integer of mode m, starting at the most significant bit position. + +This explanation is just what the NSA instruction (not ever emitted before) +calculates in Xtensa ISA. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (clrsbsi2): New insn pattern. + +libgcc/ChangeLog: + + * config/xtensa/lib1funcs.S (__clrsbsi2): New function. + * config/xtensa/t-xtensa (LIB1ASMFUNCS): Add _clrsbsi2. +--- + gcc/config/xtensa/xtensa.md | 12 +++++++++++- + libgcc/config/xtensa/lib1funcs.S | 23 +++++++++++++++++++++++ + libgcc/config/xtensa/t-xtensa | 2 +- + 3 files changed, 35 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 28ed1d34e..6c76fb942 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -429,7 +429,17 @@ + (set_attr "length" "3")]) + + +-;; Count leading/trailing zeros and find first bit. ++;; Count redundant leading sign bits and leading/trailing zeros, ++;; and find first bit. ++ ++(define_insn "clrsbsi2" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (clrsb:SI (match_operand:SI 1 "register_operand" "r")))] ++ "TARGET_NSA" ++ "nsa\t%0, %1" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "3")]) + + (define_insn "clzsi2" + [(set (match_operand:SI 0 "register_operand" "=a") +diff --git a/libgcc/config/xtensa/lib1funcs.S b/libgcc/config/xtensa/lib1funcs.S +index b19deae14..ad9072c40 100644 +--- a/libgcc/config/xtensa/lib1funcs.S ++++ b/libgcc/config/xtensa/lib1funcs.S +@@ -456,6 +456,29 @@ __nsau_data: + #endif /* L_clz */ + + ++#ifdef L_clrsbsi2 ++ .align 4 ++ .global __clrsbsi2 ++ .type __clrsbsi2, @function ++__clrsbsi2: ++ leaf_entry sp, 16 ++#if XCHAL_HAVE_NSA ++ nsa a2, a2 ++#else ++ srai a3, a2, 31 ++ xor a3, a3, a2 ++ movi a2, 31 ++ beqz a3, .Lreturn ++ do_nsau a2, a3, a4, a5 ++ addi a2, a2, -1 ++.Lreturn: ++#endif ++ leaf_return ++ .size __clrsbsi2, . - __clrsbsi2 ++ ++#endif /* L_clrsbsi2 */ ++ ++ + #ifdef L_clzsi2 + .align 4 + .global __clzsi2 +diff --git a/libgcc/config/xtensa/t-xtensa b/libgcc/config/xtensa/t-xtensa +index 9836c96ae..084618b38 100644 +--- a/libgcc/config/xtensa/t-xtensa ++++ b/libgcc/config/xtensa/t-xtensa +@@ -1,6 +1,6 @@ + LIB1ASMSRC = xtensa/lib1funcs.S + LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \ +- _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \ ++ _umulsidi3 _clz _clrsbsi2 _clzsi2 _ctzsi2 _ffssi2 \ + _ashldi3 _ashrdi3 _lshrdi3 \ + _bswapsi2 _bswapdi2 \ + _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \ +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0020-Tweak-some-widen-multiplications.patch b/patches/gcc10.1/gcc-xtensa-0020-Tweak-some-widen-multiplications.patch new file mode 100644 index 0000000..8de8a89 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0020-Tweak-some-widen-multiplications.patch @@ -0,0 +1,110 @@ +From 1ba9369255749ccf9ec82565a192b1a523b0e374 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 10 Jun 2022 13:17:40 +0900 +Subject: [PATCH 15/31] xtensa: Tweak some widen multiplications + +umulsidi3 is faster than umuldi3 even if library call, and is also +prerequisite for fast constant division by multiplication. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (mulsidi3, umulsidi3): + Split into individual signedness, in order to use libcall + "__umulsidi3" but not the other. + (mulhisi3): Merge into one by using code iterator. + (mulsidi3, mulhisi3, umulhisi3): Remove. +--- + gcc/config/xtensa/xtensa.md | 56 +++++++++++++++++++++---------------- + 1 file changed, 32 insertions(+), 24 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 6c76fb942..3314b3fd6 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -224,20 +224,42 @@ + + ;; Multiplication. + +-(define_expand "mulsidi3" ++(define_expand "mulsidi3" + [(set (match_operand:DI 0 "register_operand") +- (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand")) +- (any_extend:DI (match_operand:SI 2 "register_operand"))))] ++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand")) ++ (sign_extend:DI (match_operand:SI 2 "register_operand"))))] + "TARGET_MUL32_HIGH" + { + rtx temp = gen_reg_rtx (SImode); + emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); +- emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), +- operands[1], operands[2])); ++ emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), ++ operands[1], operands[2])); + emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); + DONE; + }) + ++(define_expand "umulsidi3" ++ [(set (match_operand:DI 0 "register_operand") ++ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand")) ++ (zero_extend:DI (match_operand:SI 2 "register_operand"))))] ++ "" ++{ ++ if (TARGET_MUL32_HIGH) ++ { ++ rtx temp = gen_reg_rtx (SImode); ++ emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); ++ emit_insn (gen_umulsi3_highpart (gen_highpart (SImode, operands[0]), ++ operands[1], operands[2])); ++ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); ++ } ++ else ++ emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "__umulsidi3"), ++ operands[0], LCT_NORMAL, DImode, ++ operands[1], SImode, ++ operands[2], SImode); ++ DONE; ++}) ++ + (define_insn "mulsi3_highpart" + [(set (match_operand:SI 0 "register_operand" "=a") + (truncate:SI +@@ -261,30 +283,16 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +-(define_insn "mulhisi3" +- [(set (match_operand:SI 0 "register_operand" "=C,A") +- (mult:SI (sign_extend:SI +- (match_operand:HI 1 "register_operand" "%r,r")) +- (sign_extend:SI +- (match_operand:HI 2 "register_operand" "r,r"))))] +- "TARGET_MUL16 || TARGET_MAC16" +- "@ +- mul16s\t%0, %1, %2 +- mul.aa.ll\t%1, %2" +- [(set_attr "type" "mul16,mac16") +- (set_attr "mode" "SI") +- (set_attr "length" "3,3")]) +- +-(define_insn "umulhisi3" ++(define_insn "mulhisi3" + [(set (match_operand:SI 0 "register_operand" "=C,A") +- (mult:SI (zero_extend:SI ++ (mult:SI (any_extend:SI + (match_operand:HI 1 "register_operand" "%r,r")) +- (zero_extend:SI ++ (any_extend:SI + (match_operand:HI 2 "register_operand" "r,r"))))] + "TARGET_MUL16 || TARGET_MAC16" + "@ +- mul16u\t%0, %1, %2 +- umul.aa.ll\t%1, %2" ++ mul16\t%0, %1, %2 ++ mul.aa.ll\t%1, %2" + [(set_attr "type" "mul16,mac16") + (set_attr "mode" "SI") + (set_attr "length" "3,3")]) +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0021-Consider-the-Loop-Option-when-setmemsi-is-exp.patch b/patches/gcc10.1/gcc-xtensa-0021-Consider-the-Loop-Option-when-setmemsi-is-exp.patch new file mode 100644 index 0000000..491da47 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0021-Consider-the-Loop-Option-when-setmemsi-is-exp.patch @@ -0,0 +1,125 @@ +From bc108c84544d5a0e6289628e8749a92c9695f006 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 10 Jun 2022 13:18:24 +0900 +Subject: [PATCH 16/31] xtensa: Consider the Loop Option when setmemsi is + expanded to small loop + +Now apply to almost any size of aligned block under such circumstances. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_expand_block_set_small_loop): + Pass through the block length / loop count conditions if + zero-overhead looping is configured and active, +--- + gcc/config/xtensa/xtensa.c | 71 +++++++++++++++++++++++++++----------- + 1 file changed, 50 insertions(+), 21 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index a6d76a953..e2f97b79c 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1473,7 +1473,7 @@ xtensa_expand_block_set_unrolled_loop (rtx *operands) + int + xtensa_expand_block_set_small_loop (rtx *operands) + { +- HOST_WIDE_INT bytes, value, align; ++ HOST_WIDE_INT bytes, value, align, count; + int expand_len, funccall_len; + rtx x, dst, end, reg; + machine_mode unit_mode; +@@ -1493,17 +1493,25 @@ xtensa_expand_block_set_small_loop (rtx *operands) + /* Totally-aligned block only. */ + if (bytes % align != 0) + return 0; ++ count = bytes / align; + +- /* If 4-byte aligned, small loop substitution is almost optimal, thus +- limited to only offset to the end address for ADDI/ADDMI instruction. */ +- if (align == 4 +- && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) +- return 0; ++ /* If the Loop Option (zero-overhead looping) is configured and active, ++ almost no restrictions about the length of the block. */ ++ if (! (TARGET_LOOPS && optimize)) ++ { ++ /* If 4-byte aligned, small loop substitution is almost optimal, ++ thus limited to only offset to the end address for ADDI/ADDMI ++ instruction. */ ++ if (align == 4 ++ && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) ++ return 0; + +- /* If no 4-byte aligned, loop count should be treated as the constraint. */ +- if (align != 4 +- && bytes / align > ((optimize > 1 && !optimize_size) ? 8 : 15)) +- return 0; ++ /* If no 4-byte aligned, loop count should be treated as the ++ constraint. */ ++ if (align != 4 ++ && count > ((optimize > 1 && !optimize_size) ? 8 : 15)) ++ return 0; ++ } + + /* Insn expansion: holding the init value. + Either MOV(.N) or L32R w/litpool. */ +@@ -1513,16 +1521,33 @@ xtensa_expand_block_set_small_loop (rtx *operands) + expand_len = TARGET_DENSITY ? 2 : 3; + else + expand_len = 3 + 4; +- /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ +- expand_len += bytes > 127 ? 3 +- : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; +- +- /* Insn expansion: the loop body and branch instruction. +- For store, one of S8I, S16I or S32I(.N). +- For advance, ADDI(.N). +- For branch, BNE. */ +- expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) +- + (TARGET_DENSITY ? 2 : 3) + 3; ++ if (TARGET_LOOPS && optimize) /* zero-overhead looping */ ++ { ++ /* Insn translation: Either MOV(.N) or L32R w/litpool for the ++ loop count. */ ++ expand_len += xtensa_simm12b (count) ? xtensa_sizeof_MOVI (count) ++ : 3 + 4; ++ /* Insn translation: LOOP, the zero-overhead looping setup ++ instruction. */ ++ expand_len += 3; ++ /* Insn expansion: the loop body instructions. ++ For store, one of S8I, S16I or S32I(.N). ++ For advance, ADDI(.N). */ ++ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) ++ + (TARGET_DENSITY ? 2 : 3); ++ } ++ else /* NO zero-overhead looping */ ++ { ++ /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ ++ expand_len += bytes > 127 ? 3 ++ : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; ++ /* Insn expansion: the loop body and branch instruction. ++ For store, one of S8I, S16I or S32I(.N). ++ For advance, ADDI(.N). ++ For branch, BNE. */ ++ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) ++ + (TARGET_DENSITY ? 2 : 3) + 3; ++ } + + /* Function call: preparing two arguments. */ + funccall_len = xtensa_sizeof_MOVI (value); +@@ -1545,7 +1570,11 @@ xtensa_expand_block_set_small_loop (rtx *operands) + dst = gen_reg_rtx (SImode); + emit_move_insn (dst, x); + end = gen_reg_rtx (SImode); +- emit_insn (gen_addsi3 (end, dst, operands[1] /* the length */)); ++ if (TARGET_LOOPS && optimize) ++ x = force_reg (SImode, operands[1] /* the length */); ++ else ++ x = operands[1]; ++ emit_insn (gen_addsi3 (end, dst, x)); + switch (align) + { + case 1: +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0022-Improve-instruction-cost-estimation-and-sugge.patch b/patches/gcc10.1/gcc-xtensa-0022-Improve-instruction-cost-estimation-and-sugge.patch new file mode 100644 index 0000000..5792a6f --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0022-Improve-instruction-cost-estimation-and-sugge.patch @@ -0,0 +1,325 @@ +From de854e2348b8159bc389471e68023986c8878c92 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 10 Jun 2022 13:19:32 +0900 +Subject: [PATCH 17/31] xtensa: Improve instruction cost estimation and + suggestion + +This patch implements a new target-specific relative RTL insn cost function +because of suboptimal cost estimation by default, and fixes several "length" +insn attributes (related to the cost estimation). + +And also introduces a new machine-dependent option "-mextra-l32r-costs=" +that tells implementation-specific InstRAM/ROM access penalty for L32R +instruction to the compiler (in clock-cycle units, 0 by default). + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_rtx_costs): Correct wrong case + for ABS and NEG, add missing case for BSWAP and CLRSB, and + double the costs for integer divisions using libfuncs if + optimizing for speed, in order to take advantage of fast constant + division by multiplication. + (TARGET_INSN_COST): New macro definition. + (xtensa_is_insn_L32R_p, xtensa_insn_cost): New functions for + calculating relative costs of a RTL insns, for both of speed and + size. + * config/xtensa/xtensa.md (return, nop, trap): Correct values of + the attribute "length" that depends on TARGET_DENSITY. + (define_asm_attributes, blockage, frame_blockage): Add missing + attributes. + * config/xtensa/xtensa.opt (-mextra-l32r-costs=): New machine- + dependent option, however, preparatory work for now. +--- + gcc/config/xtensa/xtensa.c | 116 ++++++++++++++++++++++++++++++++--- + gcc/config/xtensa/xtensa.md | 29 ++++++--- + gcc/config/xtensa/xtensa.opt | 4 ++ + 3 files changed, 134 insertions(+), 15 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index e2f97b79c..94ff901c5 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -55,6 +55,7 @@ along with GCC; see the file COPYING3. If not see + #include "dumpfile.h" + #include "hw-doloop.h" + #include "rtl-iter.h" ++#include "insn-attr.h" + + /* This file should be included last. */ + #include "target-def.h" +@@ -134,6 +135,7 @@ static unsigned int xtensa_multibss_section_type_flags (tree, const char *, + static section *xtensa_select_rtx_section (machine_mode, rtx, + unsigned HOST_WIDE_INT); + static bool xtensa_rtx_costs (rtx, machine_mode, int, int, int *, bool); ++static int xtensa_insn_cost (rtx_insn *, bool); + static int xtensa_register_move_cost (machine_mode, reg_class_t, + reg_class_t); + static int xtensa_memory_move_cost (machine_mode, reg_class_t, bool); +@@ -208,6 +210,8 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); + #define TARGET_MEMORY_MOVE_COST xtensa_memory_move_cost + #undef TARGET_RTX_COSTS + #define TARGET_RTX_COSTS xtensa_rtx_costs ++#undef TARGET_INSN_COST ++#define TARGET_INSN_COST xtensa_insn_cost + #undef TARGET_ADDRESS_COST + #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 + +@@ -3972,7 +3976,7 @@ xtensa_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, + static bool + xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + int opno ATTRIBUTE_UNUSED, +- int *total, bool speed ATTRIBUTE_UNUSED) ++ int *total, bool speed) + { + int code = GET_CODE (x); + +@@ -4060,9 +4064,14 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + return true; + + case CLZ: ++ case CLRSB: + *total = COSTS_N_INSNS (TARGET_NSA ? 1 : 50); + return true; + ++ case BSWAP: ++ *total = COSTS_N_INSNS (mode == HImode ? 3 : 5); ++ return true; ++ + case NOT: + *total = COSTS_N_INSNS (mode == DImode ? 3 : 2); + return true; +@@ -4086,13 +4095,16 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + return true; + + case ABS: ++ case NEG: + { + if (mode == SFmode) + *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50); + else if (mode == DFmode) + *total = COSTS_N_INSNS (50); +- else ++ else if (mode == DImode) + *total = COSTS_N_INSNS (4); ++ else ++ *total = COSTS_N_INSNS (1); + return true; + } + +@@ -4108,10 +4120,6 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + return true; + } + +- case NEG: +- *total = COSTS_N_INSNS (mode == DImode ? 4 : 2); +- return true; +- + case MULT: + { + if (mode == SFmode) +@@ -4151,11 +4159,11 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + case UMOD: + { + if (mode == DImode) +- *total = COSTS_N_INSNS (50); ++ *total = COSTS_N_INSNS (speed ? 100 : 50); + else if (TARGET_DIV32) + *total = COSTS_N_INSNS (32); + else +- *total = COSTS_N_INSNS (50); ++ *total = COSTS_N_INSNS (speed ? 100 : 50); + return true; + } + +@@ -4188,6 +4196,98 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + } + } + ++static bool ++xtensa_is_insn_L32R_p(const rtx_insn *insn) ++{ ++ rtx x = PATTERN (insn); ++ ++ if (GET_CODE (x) == SET) ++ { ++ x = XEXP (x, 1); ++ if (GET_CODE (x) == MEM) ++ { ++ x = XEXP (x, 0); ++ return (GET_CODE (x) == SYMBOL_REF || CONST_INT_P (x)) ++ && CONSTANT_POOL_ADDRESS_P (x); ++ } ++ } ++ ++ return false; ++} ++ ++/* Compute a relative costs of RTL insns. This is necessary in order to ++ achieve better RTL insn splitting/combination result. */ ++ ++static int ++xtensa_insn_cost (rtx_insn *insn, bool speed) ++{ ++ if (!(recog_memoized (insn) < 0)) ++ { ++ int len = get_attr_length (insn), n = (len + 2) / 3; ++ ++ if (len == 0) ++ return COSTS_N_INSNS (0); ++ ++ if (speed) /* For speed cost. */ ++ { ++ /* "L32R" may be particular slow (implementation-dependent). */ ++ if (xtensa_is_insn_L32R_p (insn)) ++ return COSTS_N_INSNS (1 + xtensa_extra_l32r_costs); ++ ++ /* Cost based on the pipeline model. */ ++ switch (get_attr_type (insn)) ++ { ++ case TYPE_STORE: ++ case TYPE_MOVE: ++ case TYPE_ARITH: ++ case TYPE_MULTI: ++ case TYPE_NOP: ++ case TYPE_FSTORE: ++ return COSTS_N_INSNS (n); ++ ++ case TYPE_LOAD: ++ return COSTS_N_INSNS (n - 1 + 2); ++ ++ case TYPE_JUMP: ++ case TYPE_CALL: ++ return COSTS_N_INSNS (n - 1 + 3); ++ ++ case TYPE_FCONV: ++ case TYPE_FLOAD: ++ case TYPE_MUL16: ++ case TYPE_MUL32: ++ case TYPE_RSR: ++ return COSTS_N_INSNS (n * 2); ++ ++ case TYPE_FMADD: ++ return COSTS_N_INSNS (n * 4); ++ ++ case TYPE_DIV32: ++ return COSTS_N_INSNS (n * 16); ++ ++ default: ++ break; ++ } ++ } ++ else /* For size cost. */ ++ { ++ /* Cost based on the instruction length. */ ++ if (get_attr_type (insn) != TYPE_UNKNOWN) ++ { ++ /* "L32R" itself plus constant in litpool. */ ++ if (xtensa_is_insn_L32R_p (insn)) ++ return COSTS_N_INSNS (2) + 1; ++ ++ /* Consider ".n" short instructions. */ ++ return COSTS_N_INSNS (n) - (n * 3 - len); ++ } ++ } ++ } ++ ++ /* Fall back. */ ++ return pattern_cost (PATTERN (insn), speed); ++} ++ + /* Worker function for TARGET_RETURN_IN_MEMORY. */ + + static bool +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 3314b3fd6..da6b71d1d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -98,7 +98,10 @@ + + ;; Describe a user's asm statement. + (define_asm_attributes +- [(set_attr "type" "multi")]) ++ [(set_attr "type" "multi") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ;; Should be the maximum possible length ++ ;; of a single machine instruction. + + + ;; Pipeline model. +@@ -1884,7 +1887,10 @@ + } + [(set_attr "type" "jump") + (set_attr "mode" "none") +- (set_attr "length" "2")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 2) ++ (const_int 3)))]) + + + ;; Miscellaneous instructions. +@@ -1939,7 +1945,10 @@ + } + [(set_attr "type" "nop") + (set_attr "mode" "none") +- (set_attr "length" "3")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 2) ++ (const_int 3)))]) + + (define_expand "nonlocal_goto" + [(match_operand:SI 0 "general_operand" "") +@@ -2003,8 +2012,9 @@ + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] + "" + "" +- [(set_attr "length" "0") +- (set_attr "type" "nop")]) ++ [(set_attr "type" "nop") ++ (set_attr "mode" "none") ++ (set_attr "length" "0")]) + + ;; Do not schedule instructions accessing memory before this point. + +@@ -2023,7 +2033,9 @@ + (unspec:BLK [(match_operand:SI 1 "" "")] UNSPEC_FRAME_BLOCKAGE))] + "" + "" +- [(set_attr "length" "0")]) ++ [(set_attr "type" "nop") ++ (set_attr "mode" "none") ++ (set_attr "length" "0")]) + + (define_insn "trap" + [(trap_if (const_int 1) (const_int 0))] +@@ -2036,7 +2048,10 @@ + } + [(set_attr "type" "trap") + (set_attr "mode" "none") +- (set_attr "length" "3")]) ++ (set (attr "length") ++ (if_then_else (match_test "!TARGET_DEBUG && TARGET_DENSITY") ++ (const_int 2) ++ (const_int 3)))]) + + ;; Setting up a frame pointer is tricky for Xtensa because GCC doesn't + ;; know if a frame pointer is required until the reload pass, and +diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt +index e1d992f5d..97aa44f92 100644 +--- a/gcc/config/xtensa/xtensa.opt ++++ b/gcc/config/xtensa/xtensa.opt +@@ -30,6 +30,10 @@ mlongcalls + Target Mask(LONGCALLS) + Use indirect CALLXn instructions for large programs. + ++mextra-l32r-costs= ++Target RejectNegative Joined UInteger Var(xtensa_extra_l32r_costs) Init(0) ++Set extra memory access cost for L32R instruction, in clock-cycle units. ++ + mtarget-align + Target + Automatically align branch targets to reduce branch penalties. +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0023-Improve-constant-synthesis-for-both-integer-a.patch b/patches/gcc10.1/gcc-xtensa-0023-Improve-constant-synthesis-for-both-integer-a.patch new file mode 100644 index 0000000..0e14673 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0023-Improve-constant-synthesis-for-both-integer-a.patch @@ -0,0 +1,400 @@ +From ed2c4b57807470b386e9abdf145282e197d9da65 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sat, 11 Jun 2022 00:26:17 +0900 +Subject: [PATCH 18/31] xtensa: Improve constant synthesis for both integer and + floating-point + +This patch revises the previous implementation of constant synthesis. + +First, changed to use define_split machine description pattern and to run +after reload pass, in order not to interfere some optimizations such as +the loop invariant motion. + +Second, not only integer but floating-point is subject to processing. + +Third, several new synthesis patterns - when the constant cannot fit into +a "MOVI Ax, simm12" instruction, but: + +I. can be represented as a power of two minus one (eg. 32767, 65535 or + 0x7fffffffUL) + => "MOVI(.N) Ax, -1" + "SRLI Ax, Ax, 1 ... 31" (or "EXTUI") +II. is between -34816 and 34559 + => "MOVI(.N) Ax, -2048 ... 2047" + "ADDMI Ax, Ax, -32768 ... 32512" +III. (existing case) can fit into a signed 12-bit if the trailing zero bits + are stripped + => "MOVI(.N) Ax, -2048 ... 2047" + "SLLI Ax, Ax, 1 ... 31" + +The above sequences consist of 5 or 6 bytes and have latency of 2 clock cycles, +in contrast with "L32R Ax, " (3 bytes and one clock latency, but may +suffer additional one clock pipeline stall and implementation-specific +InstRAM/ROM access penalty) plus 4 bytes of constant value. + +In addition, 3-instructions synthesis patterns (8 or 9 bytes, 3 clock latency) +are also provided when optimizing for speed and L32R instruction has +considerable access penalty: + +IV. 2-instructions synthesis (any of I ... III) followed by + "SLLI Ax, Ax, 1 ... 31" +V. 2-instructions synthesis followed by either "ADDX[248] Ax, Ax, Ax" + or "SUBX8 Ax, Ax, Ax" (multiplying by 3, 5, 7 or 9) + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h (xtensa_constantsynth): + New prototype. + * config/xtensa/xtensa.c (xtensa_emit_constantsynth, + xtensa_constantsynth_2insn, xtensa_constantsynth_rtx_SLLI, + xtensa_constantsynth_rtx_ADDSUBX, xtensa_constantsynth): + New backend functions that process the abovementioned logic. + (xtensa_emit_move_sequence): Revert the previous changes. + * config/xtensa/xtensa.md: New split patterns for integer + and floating-point, as the frontend part. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/constsynth_2insns.c: New. + * gcc.target/xtensa/constsynth_3insns.c: Ditto. + * gcc.target/xtensa/constsynth_double.c: Ditto. +--- + gcc/config/xtensa/xtensa-protos.h | 1 + + gcc/config/xtensa/xtensa.c | 133 +++++++++++++++--- + gcc/config/xtensa/xtensa.md | 50 +++++++ + .../gcc.target/xtensa/constsynth_2insns.c | 44 ++++++ + .../gcc.target/xtensa/constsynth_3insns.c | 24 ++++ + .../gcc.target/xtensa/constsynth_double.c | 11 ++ + 6 files changed, 247 insertions(+), 16 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_double.c + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 80b1da2bb..d65bc2954 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -44,6 +44,7 @@ extern int xtensa_expand_block_move (rtx *); + extern int xtensa_expand_block_set_unrolled_loop (rtx *); + extern int xtensa_expand_block_set_small_loop (rtx *); + extern void xtensa_split_operand_pair (rtx *, machine_mode); ++extern int xtensa_constantsynth (rtx, HOST_WIDE_INT); + extern int xtensa_emit_move_sequence (rtx *, machine_mode); + extern rtx xtensa_copy_incoming_a7 (rtx); + extern void xtensa_expand_nonlocal_goto (rtx *); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 94ff901c5..ba36d7244 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1027,6 +1027,123 @@ xtensa_split_operand_pair (rtx operands[4], machine_mode mode) + } + + ++/* Try to emit insns to load srcval (that cannot fit into signed 12-bit) ++ into dst with synthesizing a such constant value from a sequence of ++ load-immediate / arithmetic ones, instead of a L32R instruction ++ (plus a constant in litpool). */ ++ ++static void ++xtensa_emit_constantsynth (rtx dst, enum rtx_code code, ++ HOST_WIDE_INT imm0, HOST_WIDE_INT imm1, ++ rtx (*gen_op)(rtx, HOST_WIDE_INT), ++ HOST_WIDE_INT imm2) ++{ ++ gcc_assert (REG_P (dst)); ++ emit_move_insn (dst, GEN_INT (imm0)); ++ emit_move_insn (dst, gen_rtx_fmt_ee (code, SImode, ++ dst, GEN_INT (imm1))); ++ if (gen_op) ++ emit_move_insn (dst, gen_op (dst, imm2)); ++} ++ ++static int ++xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, ++ rtx (*gen_op)(rtx, HOST_WIDE_INT), ++ HOST_WIDE_INT op_imm) ++{ ++ int shift = exact_log2 (srcval + 1); ++ ++ if (IN_RANGE (shift, 1, 31)) ++ { ++ xtensa_emit_constantsynth (dst, LSHIFTRT, -1, 32 - shift, ++ gen_op, op_imm); ++ return 1; ++ } ++ ++ if (IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) ++ { ++ HOST_WIDE_INT imm0, imm1; ++ ++ if (srcval < -32768) ++ imm1 = -32768; ++ else if (srcval > 32512) ++ imm1 = 32512; ++ else ++ imm1 = srcval & ~255; ++ imm0 = srcval - imm1; ++ if (TARGET_DENSITY && imm1 < 32512 && IN_RANGE (imm0, 224, 255)) ++ imm0 -= 256, imm1 += 256; ++ xtensa_emit_constantsynth (dst, PLUS, imm0, imm1, gen_op, op_imm); ++ return 1; ++ } ++ ++ shift = ctz_hwi (srcval); ++ if (xtensa_simm12b (srcval >> shift)) ++ { ++ xtensa_emit_constantsynth (dst, ASHIFT, srcval >> shift, shift, ++ gen_op, op_imm); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static rtx ++xtensa_constantsynth_rtx_SLLI (rtx reg, HOST_WIDE_INT imm) ++{ ++ return gen_rtx_ASHIFT (SImode, reg, GEN_INT (imm)); ++} ++ ++static rtx ++xtensa_constantsynth_rtx_ADDSUBX (rtx reg, HOST_WIDE_INT imm) ++{ ++ return imm == 7 ++ ? gen_rtx_MINUS (SImode, gen_rtx_ASHIFT (SImode, reg, GEN_INT (3)), ++ reg) ++ : gen_rtx_PLUS (SImode, gen_rtx_ASHIFT (SImode, reg, ++ GEN_INT (floor_log2 (imm - 1))), ++ reg); ++} ++ ++int ++xtensa_constantsynth (rtx dst, HOST_WIDE_INT srcval) ++{ ++ /* No need for synthesizing for what fits into MOVI instruction. */ ++ if (xtensa_simm12b (srcval)) ++ return 0; ++ ++ /* 2-insns substitution. */ ++ if ((optimize_size || (optimize && xtensa_extra_l32r_costs >= 1)) ++ && xtensa_constantsynth_2insn (dst, srcval, NULL, 0)) ++ return 1; ++ ++ /* 3-insns substitution. */ ++ if (optimize > 1 && !optimize_size && xtensa_extra_l32r_costs >= 2) ++ { ++ int shift, divisor; ++ ++ /* 2-insns substitution followed by SLLI. */ ++ shift = ctz_hwi (srcval); ++ if (IN_RANGE (shift, 1, 31) && ++ xtensa_constantsynth_2insn (dst, srcval >> shift, ++ xtensa_constantsynth_rtx_SLLI, ++ shift)) ++ return 1; ++ ++ /* 2-insns substitution followed by ADDX[248] or SUBX8. */ ++ if (TARGET_ADDX) ++ for (divisor = 3; divisor <= 9; divisor += 2) ++ if (srcval % divisor == 0 && ++ xtensa_constantsynth_2insn (dst, srcval / divisor, ++ xtensa_constantsynth_rtx_ADDSUBX, ++ divisor)) ++ return 1; ++ } ++ ++ return 0; ++} ++ ++ + /* Emit insns to move operands[1] into operands[0]. + Return 1 if we have written out everything that needs to be done to + do the move. Otherwise, return 0 and the caller will emit the move +@@ -1064,22 +1181,6 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) + + if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16) + { +- /* Try to emit MOVI + SLLI sequence, that is smaller +- than L32R + literal. */ +- if (optimize_size && mode == SImode && CONST_INT_P (src) +- && register_operand (dst, mode)) +- { +- HOST_WIDE_INT srcval = INTVAL (src); +- int shift = ctz_hwi (srcval); +- +- if (xtensa_simm12b (srcval >> shift)) +- { +- emit_move_insn (dst, GEN_INT (srcval >> shift)); +- emit_insn (gen_ashlsi3_internal (dst, dst, GEN_INT (shift))); +- return 1; +- } +- } +- + src = force_const_mem (SImode, src); + operands[1] = src; + } +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index da6b71d1d..ddc3087fa 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -940,6 +940,19 @@ + (set_attr "mode" "SI") + (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")]) + ++(define_split ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operand:SI 1 "constantpool_operand"))] ++ "! optimize_debug && reload_completed" ++ [(const_int 0)] ++{ ++ rtx x = avoid_constant_pool_reference (operands[1]); ++ if (! CONST_INT_P (x)) ++ FAIL; ++ if (! xtensa_constantsynth (operands[0], INTVAL (x))) ++ emit_move_insn (operands[0], x); ++}) ++ + ;; 16-bit Integer moves + + (define_expand "movhi" +@@ -1144,6 +1157,43 @@ + (set_attr "mode" "SF") + (set_attr "length" "3")]) + ++(define_split ++ [(set (match_operand:SF 0 "register_operand") ++ (match_operand:SF 1 "constantpool_operand"))] ++ "! optimize_debug && reload_completed" ++ [(const_int 0)] ++{ ++ int i = 0; ++ rtx x = XEXP (operands[1], 0); ++ long l[2]; ++ if (GET_CODE (x) == SYMBOL_REF ++ && CONSTANT_POOL_ADDRESS_P (x)) ++ x = get_pool_constant (x); ++ else if (GET_CODE (x) == CONST) ++ { ++ x = XEXP (x, 0); ++ gcc_assert (GET_CODE (x) == PLUS ++ && GET_CODE (XEXP (x, 0)) == SYMBOL_REF ++ && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) ++ && CONST_INT_P (XEXP (x, 1))); ++ i = INTVAL (XEXP (x, 1)); ++ gcc_assert (i == 0 || i == 4); ++ i /= 4; ++ x = get_pool_constant (XEXP (x, 0)); ++ } ++ else ++ gcc_unreachable (); ++ if (GET_MODE (x) == SFmode) ++ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l[0]); ++ else if (GET_MODE (x) == DFmode) ++ REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); ++ else ++ FAIL; ++ x = gen_rtx_REG (SImode, REGNO (operands[0])); ++ if (! xtensa_constantsynth (x, l[i])) ++ emit_move_insn (x, GEN_INT (l[i])); ++}) ++ + ;; 64-bit floating point moves + + (define_expand "movdf" +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c +new file mode 100644 +index 000000000..43c85a250 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c +@@ -0,0 +1,44 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Os" } */ ++ ++int test_0(void) ++{ ++ return 4095; ++} ++ ++int test_1(void) ++{ ++ return 2147483647; ++} ++ ++int test_2(void) ++{ ++ return -34816; ++} ++ ++int test_3(void) ++{ ++ return -2049; ++} ++ ++int test_4(void) ++{ ++ return 2048; ++} ++ ++int test_5(void) ++{ ++ return 34559; ++} ++ ++int test_6(void) ++{ ++ return 43680; ++} ++ ++void test_7(int *p) ++{ ++ *p = -1432354816; ++} ++ ++/* { dg-final { scan-assembler-not "l32r" } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c +new file mode 100644 +index 000000000..f3c4a1c7c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mextra-l32r-costs=3" } */ ++ ++int test_0(void) ++{ ++ return 134217216; ++} ++ ++int test_1(void) ++{ ++ return -27604992; ++} ++ ++int test_2(void) ++{ ++ return -162279; ++} ++ ++void test_3(int *p) ++{ ++ *p = 192437; ++} ++ ++/* { dg-final { scan-assembler-not "l32r" } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +new file mode 100644 +index 000000000..890ca5047 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Os" } */ ++ ++void test(unsigned int count, double array[]) ++{ ++ unsigned int i; ++ for (i = 0; i < count; ++i) ++ array[i] = 1.0; ++} ++ ++/* { dg-final { scan-assembler-not "l32r" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0024-Improve-shift-operations-more.patch b/patches/gcc10.1/gcc-xtensa-0024-Improve-shift-operations-more.patch new file mode 100644 index 0000000..9c44b89 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0024-Improve-shift-operations-more.patch @@ -0,0 +1,383 @@ +From fd3771fcc13b8712c91cec70f4533760f72b54e1 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 01:38:31 +0900 +Subject: [PATCH 19/31] xtensa: Improve shift operations more + +This patch introduces funnel shifter utilization, and rearranges existing +"per-byte shift" insn patterns. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (logical_shift_operator, + xtensa_shift_per_byte_operator): New predicates. + * config/xtensa/xtensa-protos.h (xtensa_shlrd_which_direction): + New prototype. + * config/xtensa/xtensa.c (xtensa_shlrd_which_direction): + New helper function for funnel shift patterns. + * config/xtensa/xtensa.md (ior_op): New code iterator. + (*ashlsi3_1): Replace with new split pattern. + (*shift_per_byte): Unify *ashlsi3_3x, *ashrsi3_3x and *lshrsi3_3x. + (*shift_per_byte_omit_AND_0, *shift_per_byte_omit_AND_1): + New insn-and-split patterns that redirect to *xtensa_shift_per_byte, + in order to omit unnecessary bitwise AND operation. + (*shlrd_reg_, *shlrd_const_, *shlrd_per_byte_, + *shlrd_per_byte__omit_AND): + New insn patterns for funnel shifts. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/funnel_shifter.c: New. +--- + gcc/config/xtensa/predicates.md | 6 + + gcc/config/xtensa/xtensa-protos.h | 1 + + gcc/config/xtensa/xtensa.c | 14 ++ + gcc/config/xtensa/xtensa.md | 213 ++++++++++++++---- + .../gcc.target/xtensa/funnel_shifter.c | 17 ++ + 5 files changed, 213 insertions(+), 38 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/funnel_shifter.c + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index 91b9343a2..e7836f0ec 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -164,9 +164,15 @@ + (define_predicate "boolean_operator" + (match_code "eq,ne")) + ++(define_predicate "logical_shift_operator" ++ (match_code "ashift,lshiftrt")) ++ + (define_predicate "xtensa_cstoresi_operator" + (match_code "eq,ne,gt,ge,lt,le")) + ++(define_predicate "xtensa_shift_per_byte_operator" ++ (match_code "ashift,ashiftrt,lshiftrt")) ++ + (define_predicate "tls_symbol_operand" + (and (match_code "symbol_ref") + (match_test "SYMBOL_REF_TLS_MODEL (op) != 0"))) +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index d65bc2954..32743bc67 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -56,6 +56,7 @@ extern char *xtensa_emit_bit_branch (bool, bool, rtx *); + extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); + extern char *xtensa_emit_call (int, rtx *); + extern bool xtensa_tls_referenced_p (rtx); ++extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); + + #ifdef TREE_CODE + extern void init_cumulative_args (CUMULATIVE_ARGS *, int); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index ba36d7244..473cfaf9d 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -2394,6 +2394,20 @@ xtensa_tls_referenced_p (rtx x) + } + + ++/* Helper function for "*shlrd_..." patterns. */ ++ ++enum rtx_code ++xtensa_shlrd_which_direction (rtx op0, rtx op1) ++{ ++ if (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT) ++ return ASHIFT; /* shld */ ++ if (GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT) ++ return LSHIFTRT; /* shrd */ ++ ++ return UNKNOWN; ++} ++ ++ + /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ + + static bool +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index ddc3087fa..58bba89af 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -83,6 +83,9 @@ + ;; the same template. + (define_mode_iterator HQI [HI QI]) + ++;; This code iterator is for *shlrd and its variants. ++(define_code_iterator ior_op [ior plus]) ++ + + ;; Attributes. + +@@ -1272,16 +1275,6 @@ + operands[1] = xtensa_copy_incoming_a7 (operands[1]); + }) + +-(define_insn "*ashlsi3_1" +- [(set (match_operand:SI 0 "register_operand" "=a") +- (ashift:SI (match_operand:SI 1 "register_operand" "r") +- (const_int 1)))] +- "TARGET_DENSITY" +- "add.n\t%0, %1, %1" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "2")]) +- + (define_insn "ashlsi3_internal" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (ashift:SI (match_operand:SI 1 "register_operand" "r,r") +@@ -1294,16 +1287,14 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,6")]) + +-(define_insn "*ashlsi3_3x" +- [(set (match_operand:SI 0 "register_operand" "=a") +- (ashift:SI (match_operand:SI 1 "register_operand" "r") +- (ashift:SI (match_operand:SI 2 "register_operand" "r") +- (const_int 3))))] +- "" +- "ssa8b\t%2\;sll\t%0, %1" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "6")]) ++(define_split ++ [(set (match_operand:SI 0 "register_operand") ++ (ashift:SI (match_operand:SI 1 "register_operand") ++ (const_int 1)))] ++ "TARGET_DENSITY" ++ [(set (match_dup 0) ++ (plus:SI (match_dup 1) ++ (match_dup 1)))]) + + (define_insn "ashrsi3" + [(set (match_operand:SI 0 "register_operand" "=a,a") +@@ -1317,17 +1308,6 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,6")]) + +-(define_insn "*ashrsi3_3x" +- [(set (match_operand:SI 0 "register_operand" "=a") +- (ashiftrt:SI (match_operand:SI 1 "register_operand" "r") +- (ashift:SI (match_operand:SI 2 "register_operand" "r") +- (const_int 3))))] +- "" +- "ssa8l\t%2\;sra\t%0, %1" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "6")]) +- + (define_insn "lshrsi3" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r") +@@ -1337,9 +1317,9 @@ + if (which_alternative == 0) + { + if ((INTVAL (operands[2]) & 0x1f) < 16) +- return "srli\t%0, %1, %R2"; ++ return "srli\t%0, %1, %R2"; + else +- return "extui\t%0, %1, %R2, %L2"; ++ return "extui\t%0, %1, %R2, %L2"; + } + return "ssr\t%2\;srl\t%0, %1"; + } +@@ -1347,13 +1327,170 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,6")]) + +-(define_insn "*lshrsi3_3x" ++(define_insn "*shift_per_byte" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (match_operator:SI 3 "xtensa_shift_per_byte_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3))]))] ++ "!optimize_debug && optimize" ++{ ++ switch (GET_CODE (operands[3])) ++ { ++ case ASHIFT: return "ssa8b\t%2\;sll\t%0, %1"; ++ case ASHIFTRT: return "ssa8l\t%2\;sra\t%0, %1"; ++ case LSHIFTRT: return "ssa8l\t%2\;srl\t%0, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*shift_per_byte_omit_AND_0" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (match_operator:SI 4 "xtensa_shift_per_byte_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3)) ++ (match_operand:SI 3 "const_int_operand" "i"))]))] ++ "!optimize_debug && optimize ++ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (match_op_dup 4 ++ [(match_dup 1) ++ (ashift:SI (match_dup 2) ++ (const_int 3))]))] ++ "" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*shift_per_byte_omit_AND_1" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (match_operator:SI 4 "xtensa_shift_per_byte_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (neg:SI (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3)) ++ (match_operand:SI 3 "const_int_operand" "i")))]))] ++ "!optimize_debug && optimize ++ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 5) ++ (neg:SI (match_dup 2))) ++ (set (match_dup 0) ++ (match_op_dup 4 ++ [(match_dup 1) ++ (ashift:SI (match_dup 5) ++ (const_int 3))]))] ++{ ++ operands[5] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "9")]) ++ ++(define_insn "*shlrd_reg_" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")]) ++ (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 3 "register_operand" "r") ++ (neg:SI (match_dup 2))])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" ++{ ++ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) ++ { ++ case ASHIFT: return "ssl\t%2\;src\t%0, %1, %3"; ++ case LSHIFTRT: return "ssr\t%2\;src\t%0, %3, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn "*shlrd_const_" + [(set (match_operand:SI 0 "register_operand" "=a") +- (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") +- (ashift:SI (match_operand:SI 2 "register_operand" "r") +- (const_int 3))))] ++ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 3 "const_int_operand" "i")]) ++ (match_operator:SI 6 "logical_shift_operator" ++ [(match_operand:SI 2 "register_operand" "r") ++ (match_operand:SI 4 "const_int_operand" "i")])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN ++ && IN_RANGE (INTVAL (operands[3]), 1, 31) ++ && IN_RANGE (INTVAL (operands[4]), 1, 31) ++ && INTVAL (operands[3]) + INTVAL (operands[4]) == 32" ++{ ++ switch (xtensa_shlrd_which_direction (operands[5], operands[6])) ++ { ++ case ASHIFT: return "ssai\t%L3\;src\t%0, %1, %2"; ++ case LSHIFTRT: return "ssai\t%R3\;src\t%0, %2, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn "*shlrd_per_byte_" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3))]) ++ (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 3 "register_operand" "r") ++ (neg:SI (ashift:SI (match_dup 2) ++ (const_int 3)))])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" ++{ ++ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) ++ { ++ case ASHIFT: return "ssa8b\t%2\;src\t%0, %1, %3"; ++ case LSHIFTRT: return "ssa8l\t%2\;src\t%0, %3, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*shlrd_per_byte__omit_AND" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3)) ++ (match_operand:SI 4 "const_int_operand" "i"))]) ++ (match_operator:SI 6 "logical_shift_operator" ++ [(match_operand:SI 3 "register_operand" "r") ++ (neg:SI (and:SI (ashift:SI (match_dup 2) ++ (const_int 3)) ++ (match_dup 4)))])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN ++ && (INTVAL (operands[4]) & 0x1f) == 3 << 3" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (ior_op:SI (match_op_dup 5 ++ [(match_dup 1) ++ (ashift:SI (match_dup 2) ++ (const_int 3))]) ++ (match_op_dup 6 ++ [(match_dup 3) ++ (neg:SI (ashift:SI (match_dup 2) ++ (const_int 3)))])))] + "" +- "ssa8l\t%2\;srl\t%0, %1" + [(set_attr "type" "arith") + (set_attr "mode" "SI") + (set_attr "length" "6")]) +diff --git a/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c +new file mode 100644 +index 000000000..c8f987ccd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++unsigned int test_0(const void *addr) ++{ ++ unsigned int n = (unsigned int)addr; ++ const unsigned int *a = (const unsigned int*)(n & ~3); ++ n = (n & 3) * 8; ++ return (a[0] >> n) | (a[1] << (32 - n)); ++} ++ ++unsigned int test_1(unsigned int a, unsigned int b) ++{ ++ return (a >> 16) + (b << 16); ++} ++ ++/* { dg-final { scan-assembler-times "src" 2 } } */ +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0025-Simplify-conditional-branch-move-insn-pattern.patch b/patches/gcc10.1/gcc-xtensa-0025-Simplify-conditional-branch-move-insn-pattern.patch new file mode 100644 index 0000000..cdb96ff --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0025-Simplify-conditional-branch-move-insn-pattern.patch @@ -0,0 +1,427 @@ +From 0690bcdd42d0aa6671f9ec3ccbbe70faa04ffb6b Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Mon, 31 Jan 2022 09:56:21 +0900 +Subject: [PATCH 20/31] xtensa: Simplify conditional branch/move insn patterns + +No need to describe the "false side" conditional insn patterns anymore. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h (xtensa_emit_branch): + Remove the first argument. + (xtensa_emit_bit_branch): Remove it because now called only from the + output statement of *bittrue insn pattern. + * config/xtensa/xtensa.c (gen_int_relational): Remove the last + argument 'p_invert', and make so that the condition is reversed by + itself as needed. + (xtensa_expand_conditional_branch): Share the common path, and remove + condition inversion code. + (xtensa_emit_branch, xtensa_emit_movcc): Simplify by removing the + "false side" pattern. + (xtensa_emit_bit_branch): Remove it because of the abovementioned + reason, and move the function body to *bittrue insn pattern. + * config/xtensa/xtensa.md (*bittrue): Transplant the output + statement from removed xtensa_emit_bit_branch(). + (*bfalse, *ubfalse, *bitfalse, *maskfalse): Remove the "false side" + insn patterns. +--- + gcc/config/xtensa/xtensa-protos.h | 3 +- + gcc/config/xtensa/xtensa.c | 111 ++++++++++------------------ + gcc/config/xtensa/xtensa.md | 117 ++++++++---------------------- + 3 files changed, 70 insertions(+), 161 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 32743bc67..e4b2d2f06 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -51,8 +51,7 @@ extern void xtensa_expand_nonlocal_goto (rtx *); + extern void xtensa_expand_compare_and_swap (rtx, rtx, rtx, rtx); + extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool); + extern void xtensa_emit_loop_end (rtx_insn *, rtx *); +-extern char *xtensa_emit_branch (bool, bool, rtx *); +-extern char *xtensa_emit_bit_branch (bool, bool, rtx *); ++extern char *xtensa_emit_branch (bool, rtx *); + extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); + extern char *xtensa_emit_call (int, rtx *); + extern bool xtensa_tls_referenced_p (rtx); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 473cfaf9d..8deae3d51 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -118,7 +118,7 @@ const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] = + + static void xtensa_option_override (void); + static enum internal_test map_test_to_internal_test (enum rtx_code); +-static rtx gen_int_relational (enum rtx_code, rtx, rtx, int *); ++static rtx gen_int_relational (enum rtx_code, rtx, rtx); + static rtx gen_float_relational (enum rtx_code, rtx, rtx); + static rtx gen_conditional_move (enum rtx_code, machine_mode, rtx, rtx); + static rtx fixup_subreg_mem (rtx); +@@ -670,8 +670,7 @@ map_test_to_internal_test (enum rtx_code test_code) + static rtx + gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + rtx cmp0, /* first operand to compare */ +- rtx cmp1, /* second operand to compare */ +- int *p_invert /* whether branch needs to reverse test */) ++ rtx cmp1 /* second operand to compare */) + { + struct cmp_info + { +@@ -703,6 +702,7 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + enum internal_test test; + machine_mode mode; + struct cmp_info *p_info; ++ int invert; + + test = map_test_to_internal_test (test_code); + gcc_assert (test != ITEST_MAX); +@@ -739,9 +739,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + } + + /* See if we need to invert the result. */ +- *p_invert = ((GET_CODE (cmp1) == CONST_INT) +- ? p_info->invert_const +- : p_info->invert_reg); ++ invert = ((GET_CODE (cmp1) == CONST_INT) ++ ? p_info->invert_const ++ : p_info->invert_reg); + + /* Comparison to constants, may involve adding 1 to change a LT into LE. + Comparison between two registers, may involve switching operands. */ +@@ -758,7 +758,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + cmp1 = temp; + } + +- return gen_rtx_fmt_ee (p_info->test_code, VOIDmode, cmp0, cmp1); ++ return gen_rtx_fmt_ee (invert ? reverse_condition (p_info->test_code) ++ : p_info->test_code, ++ VOIDmode, cmp0, cmp1); + } + + +@@ -817,45 +819,33 @@ xtensa_expand_conditional_branch (rtx *operands, machine_mode mode) + enum rtx_code test_code = GET_CODE (operands[0]); + rtx cmp0 = operands[1]; + rtx cmp1 = operands[2]; +- rtx cmp; +- int invert; +- rtx label1, label2; ++ rtx cmp, label; + + switch (mode) + { ++ case E_SFmode: ++ if (TARGET_HARD_FLOAT) ++ { ++ cmp = gen_float_relational (test_code, cmp0, cmp1); ++ break; ++ } ++ /* FALLTHRU */ ++ + case E_DFmode: + default: + fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1)); + + case E_SImode: +- invert = FALSE; +- cmp = gen_int_relational (test_code, cmp0, cmp1, &invert); +- break; +- +- case E_SFmode: +- if (!TARGET_HARD_FLOAT) +- fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, +- cmp0, cmp1)); +- invert = FALSE; +- cmp = gen_float_relational (test_code, cmp0, cmp1); ++ cmp = gen_int_relational (test_code, cmp0, cmp1); + break; + } + + /* Generate the branch. */ +- +- label1 = gen_rtx_LABEL_REF (VOIDmode, operands[3]); +- label2 = pc_rtx; +- +- if (invert) +- { +- label2 = label1; +- label1 = pc_rtx; +- } +- ++ label = gen_rtx_LABEL_REF (VOIDmode, operands[3]); + emit_jump_insn (gen_rtx_SET (pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, cmp, +- label1, +- label2))); ++ label, ++ pc_rtx))); + } + + +@@ -2058,21 +2048,20 @@ xtensa_emit_loop_end (rtx_insn *insn, rtx *operands) + + + char * +-xtensa_emit_branch (bool inverted, bool immed, rtx *operands) ++xtensa_emit_branch (bool immed, rtx *operands) + { + static char result[64]; +- enum rtx_code code; ++ enum rtx_code code = GET_CODE (operands[3]); + const char *op; + +- code = GET_CODE (operands[3]); + switch (code) + { +- case EQ: op = inverted ? "ne" : "eq"; break; +- case NE: op = inverted ? "eq" : "ne"; break; +- case LT: op = inverted ? "ge" : "lt"; break; +- case GE: op = inverted ? "lt" : "ge"; break; +- case LTU: op = inverted ? "geu" : "ltu"; break; +- case GEU: op = inverted ? "ltu" : "geu"; break; ++ case EQ: op = "eq"; break; ++ case NE: op = "ne"; break; ++ case LT: op = "lt"; break; ++ case GE: op = "ge"; break; ++ case LTU: op = "ltu"; break; ++ case GEU: op = "geu"; break; + default: gcc_unreachable (); + } + +@@ -2091,32 +2080,6 @@ xtensa_emit_branch (bool inverted, bool immed, rtx *operands) + } + + +-char * +-xtensa_emit_bit_branch (bool inverted, bool immed, rtx *operands) +-{ +- static char result[64]; +- const char *op; +- +- switch (GET_CODE (operands[3])) +- { +- case EQ: op = inverted ? "bs" : "bc"; break; +- case NE: op = inverted ? "bc" : "bs"; break; +- default: gcc_unreachable (); +- } +- +- if (immed) +- { +- unsigned bitnum = INTVAL (operands[1]) & 0x1f; +- operands[1] = GEN_INT (bitnum); +- sprintf (result, "b%si\t%%0, %%d1, %%2", op); +- } +- else +- sprintf (result, "b%s\t%%0, %%1, %%2", op); +- +- return result; +-} +- +- + char * + xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + { +@@ -2125,12 +2088,14 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + const char *op; + + code = GET_CODE (operands[4]); ++ if (inverted) ++ code = reverse_condition (code); + if (isbool) + { + switch (code) + { +- case EQ: op = inverted ? "t" : "f"; break; +- case NE: op = inverted ? "f" : "t"; break; ++ case EQ: op = "f"; break; ++ case NE: op = "t"; break; + default: gcc_unreachable (); + } + } +@@ -2138,10 +2103,10 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + { + switch (code) + { +- case EQ: op = inverted ? "nez" : "eqz"; break; +- case NE: op = inverted ? "eqz" : "nez"; break; +- case LT: op = inverted ? "gez" : "ltz"; break; +- case GE: op = inverted ? "ltz" : "gez"; break; ++ case EQ: op = "eqz"; break; ++ case NE: op = "nez"; break; ++ case LT: op = "ltz"; break; ++ case GE: op = "gez"; break; + default: gcc_unreachable (); + } + } +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 58bba89af..40000859d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1551,28 +1551,13 @@ + (define_insn "*btrue" + [(set (pc) + (if_then_else (match_operator 3 "branch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "branch_operand" "K,r")]) ++ [(match_operand:SI 0 "register_operand" "r,r") ++ (match_operand:SI 1 "branch_operand" "K,r")]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { +- return xtensa_emit_branch (false, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump,jump") +- (set_attr "mode" "none") +- (set_attr "length" "3,3")]) +- +-(define_insn "*bfalse" +- [(set (pc) +- (if_then_else (match_operator 3 "branch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "branch_operand" "K,r")]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] +- "" +-{ +- return xtensa_emit_branch (true, which_alternative == 0, operands); ++ return xtensa_emit_branch (which_alternative == 0, operands); + } + [(set_attr "type" "jump,jump") + (set_attr "mode" "none") +@@ -1581,28 +1566,13 @@ + (define_insn "*ubtrue" + [(set (pc) + (if_then_else (match_operator 3 "ubranch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "ubranch_operand" "L,r")]) ++ [(match_operand:SI 0 "register_operand" "r,r") ++ (match_operand:SI 1 "ubranch_operand" "L,r")]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { +- return xtensa_emit_branch (false, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump,jump") +- (set_attr "mode" "none") +- (set_attr "length" "3,3")]) +- +-(define_insn "*ubfalse" +- [(set (pc) +- (if_then_else (match_operator 3 "ubranch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "ubranch_operand" "L,r")]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] +- "" +-{ +- return xtensa_emit_branch (true, which_alternative == 0, operands); ++ return xtensa_emit_branch (which_alternative == 0, operands); + } + [(set_attr "type" "jump,jump") + (set_attr "mode" "none") +@@ -1613,75 +1583,50 @@ + (define_insn "*bittrue" + [(set (pc) + (if_then_else (match_operator 3 "boolean_operator" +- [(zero_extract:SI +- (match_operand:SI 0 "register_operand" "r,r") +- (const_int 1) +- (match_operand:SI 1 "arith_operand" "J,r")) +- (const_int 0)]) +- (label_ref (match_operand 2 "" "")) +- (pc)))] +- "" +-{ +- return xtensa_emit_bit_branch (false, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump") +- (set_attr "mode" "none") +- (set_attr "length" "3")]) +- +-(define_insn "*bitfalse" +- [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" +- [(zero_extract:SI +- (match_operand:SI 0 "register_operand" "r,r") +- (const_int 1) +- (match_operand:SI 1 "arith_operand" "J,r")) ++ [(zero_extract:SI (match_operand:SI 0 "register_operand" "r,r") ++ (const_int 1) ++ (match_operand:SI 1 "arith_operand" "J,r")) + (const_int 0)]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] +- "" +-{ +- return xtensa_emit_bit_branch (true, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump") +- (set_attr "mode" "none") +- (set_attr "length" "3")]) +- +-(define_insn "*masktrue" +- [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" +- [(and:SI (match_operand:SI 0 "register_operand" "r") +- (match_operand:SI 1 "register_operand" "r")) +- (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { ++ static char result[64]; ++ char op; + switch (GET_CODE (operands[3])) + { +- case EQ: return "bnone\t%0, %1, %2"; +- case NE: return "bany\t%0, %1, %2"; +- default: gcc_unreachable (); ++ case EQ: op = 'c'; break; ++ case NE: op = 's'; break; ++ default: gcc_unreachable (); + } ++ if (which_alternative == 0) ++ { ++ operands[1] = GEN_INT (INTVAL (operands[1]) & 0x1f); ++ sprintf (result, "bb%ci\t%%0, %%d1, %%2", op); ++ } ++ else ++ sprintf (result, "bb%c\t%%0, %%1, %%2", op); ++ return result; + } + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set_attr "length" "3")]) + +-(define_insn "*maskfalse" ++(define_insn "*masktrue" + [(set (pc) + (if_then_else (match_operator 3 "boolean_operator" +- [(and:SI (match_operand:SI 0 "register_operand" "r") +- (match_operand:SI 1 "register_operand" "r")) +- (const_int 0)]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "register_operand" "r")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] + "" + { + switch (GET_CODE (operands[3])) + { +- case EQ: return "bany\t%0, %1, %2"; +- case NE: return "bnone\t%0, %1, %2"; +- default: gcc_unreachable (); ++ case EQ: return "bnone\t%0, %1, %2"; ++ case NE: return "bany\t%0, %1, %2"; ++ default: gcc_unreachable (); + } + } + [(set_attr "type" "jump") +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0026-Make-use-of-BALL-BNALL-instructions.patch b/patches/gcc10.1/gcc-xtensa-0026-Make-use-of-BALL-BNALL-instructions.patch new file mode 100644 index 0000000..e1d2790 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0026-Make-use-of-BALL-BNALL-instructions.patch @@ -0,0 +1,101 @@ +From a7cf439409089eab17341a1a24fb9be2b967ca7c Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 27 May 2021 19:04:12 +0900 +Subject: [PATCH 21/31] xtensa: Make use of BALL/BNALL instructions + +In Xtensa ISA, there is no single machine instruction that calculates unary +bitwise negation, but a few similar fused instructions are exist: + + "BALL Ax, Ay, label" // if ((~Ax & Ay) == 0) goto label; + "BNALL Ax, Ay, label" // if ((~Ax & Ay) != 0) goto label; + +These instructions have never been emitted before, but it seems no reason not +to make use of them. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*masktrue_bitcmpl): New insn pattern. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/BALL-BNALL.c: New. +--- + gcc/config/xtensa/xtensa.md | 21 +++++++++++++ + gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c | 33 ++++++++++++++++++++ + 2 files changed, 54 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 40000859d..b34b2afb6 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1633,6 +1633,27 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_insn "*masktrue_bitcmpl" ++ [(set (pc) ++ (if_then_else (match_operator 3 "boolean_operator" ++ [(and:SI (not:SI (match_operand:SI 0 "register_operand" "r")) ++ (match_operand:SI 1 "register_operand" "r")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] ++ "" ++{ ++ switch (GET_CODE (operands[3])) ++ { ++ case EQ: return "ball\t%0, %1, %2"; ++ case NE: return "bnall\t%0, %1, %2"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ++ + + ;; Zero-overhead looping support. + +diff --git a/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c +new file mode 100644 +index 000000000..ba61c6f37 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c +@@ -0,0 +1,33 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O" } */ ++ ++extern void foo(void); ++ ++void BNONE_test(int a, int b) ++{ ++ if (a & b) ++ foo(); ++} ++ ++void BANY_test(int a, int b) ++{ ++ if (!(a & b)) ++ foo(); ++} ++ ++void BALL_test(int a, int b) ++{ ++ if (~a & b) ++ foo(); ++} ++ ++void BNALL_test(int a, int b) ++{ ++ if (!(~a & b)) ++ foo(); ++} ++ ++/* { dg-final { scan-assembler-times "bnone" 1 } } */ ++/* { dg-final { scan-assembler-times "bany" 1 } } */ ++/* { dg-final { scan-assembler-times "ball" 1 } } */ ++/* { dg-final { scan-assembler-times "bnall" 1 } } */ +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0027-Optimize-bitwise-AND-operation-with-some-spec.patch b/patches/gcc10.1/gcc-xtensa-0027-Optimize-bitwise-AND-operation-with-some-spec.patch new file mode 100644 index 0000000..b13350f --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0027-Optimize-bitwise-AND-operation-with-some-spec.patch @@ -0,0 +1,252 @@ +From 43c7f8333028ff03d8a4681ab62de2febcc43f5c Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 01:28:43 +0900 +Subject: [PATCH 22/31] xtensa: Optimize bitwise AND operation with some + specific forms of constants + +This patch offers several insn-and-split patterns for bitwise AND with +register and constant that can be represented as: + +i. 1's least significant N bits and the others 0's (17 <= N <= 31) +ii. 1's most significant N bits and the others 0's (12 <= N <= 31) +iii. M 1's sequence of bits and trailing N 0's bits, that cannot fit into a + "MOVI Ax, simm12" instruction (1 <= M <= 16, 1 <= N <= 30) + +And also offers shortcuts for conditional branch if each of the abovementioned +operations is (not) equal to zero. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (shifted_mask_operand): + New predicate. + * config/xtensa/xtensa.md (*andsi3_const_pow2_minus_one): + New insn-and-split pattern. + (*andsi3_const_negative_pow2, *andsi3_const_shifted_mask, + *masktrue_const_pow2_minus_one, *masktrue_const_negative_pow2, + *masktrue_const_shifted_mask): Ditto. +--- + gcc/config/xtensa/predicates.md | 10 ++ + gcc/config/xtensa/xtensa.md | 179 ++++++++++++++++++++++++++++++++ + 2 files changed, 189 insertions(+) + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index e7836f0ec..367fc17f3 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -52,6 +52,16 @@ + (match_test "xtensa_mask_immediate (INTVAL (op))")) + (match_operand 0 "register_operand"))) + ++(define_predicate "shifted_mask_operand" ++ (match_code "const_int") ++{ ++ HOST_WIDE_INT mask = INTVAL (op); ++ int shift = ctz_hwi (mask); ++ ++ return IN_RANGE (shift, 1, 31) ++ && xtensa_mask_immediate ((uint32_t)mask >> shift); ++}) ++ + (define_predicate "extui_fldsz_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 1, 16)"))) +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index b34b2afb6..355fb7742 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -645,6 +645,83 @@ + (set_attr "mode" "SI") + (set_attr "length" "6")]) + ++(define_insn_and_split "*andsi3_const_pow2_minus_one" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "const_int_operand" "i")))] ++ "IN_RANGE (exact_log2 (INTVAL (operands[2]) + 1), 17, 31)" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (ashift:SI (match_dup 1) ++ (match_dup 2))) ++ (set (match_dup 0) ++ (lshiftrt:SI (match_dup 0) ++ (match_dup 2)))] ++{ ++ operands[2] = GEN_INT (32 - floor_log2 (INTVAL (operands[2]) + 1)); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && INTVAL (operands[2]) == 0x7FFFFFFF") ++ (const_int 5) ++ (const_int 6)))]) ++ ++(define_insn_and_split "*andsi3_const_negative_pow2" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "const_int_operand" "i")))] ++ "IN_RANGE (exact_log2 (-INTVAL (operands[2])), 12, 31)" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (lshiftrt:SI (match_dup 1) ++ (match_dup 2))) ++ (set (match_dup 0) ++ (ashift:SI (match_dup 0) ++ (match_dup 2)))] ++{ ++ operands[2] = GEN_INT (floor_log2 (-INTVAL (operands[2]))); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*andsi3_const_shifted_mask" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "shifted_mask_operand" "i")))] ++ "! xtensa_simm12b (INTVAL (operands[2]))" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (zero_extract:SI (match_dup 1) ++ (match_dup 3) ++ (match_dup 4))) ++ (set (match_dup 0) ++ (ashift:SI (match_dup 0) ++ (match_dup 2)))] ++{ ++ HOST_WIDE_INT mask = INTVAL (operands[2]); ++ int shift = ctz_hwi (mask); ++ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); ++ int mask_pos = shift; ++ if (BITS_BIG_ENDIAN) ++ mask_pos = (32 - (mask_size + shift)) & 0x1f; ++ operands[2] = GEN_INT (shift); ++ operands[3] = GEN_INT (mask_size); ++ operands[4] = GEN_INT (mask_pos); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && ctz_hwi (INTVAL (operands[2])) == 1") ++ (const_int 5) ++ (const_int 6)))]) ++ + (define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "=a") + (ior:SI (match_operand:SI 1 "register_operand" "%r") +@@ -1654,6 +1731,108 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_insn_and_split "*masktrue_const_pow2_minus_one" ++ [(set (pc) ++ (if_then_else (match_operator 3 "boolean_operator" ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "const_int_operand" "i")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] ++ "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31)" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 4) ++ (ashift:SI (match_dup 0) ++ (match_dup 1))) ++ (set (pc) ++ (if_then_else (match_op_dup 3 ++ [(match_dup 4) ++ (const_int 0)]) ++ (label_ref (match_dup 2)) ++ (pc)))] ++{ ++ operands[1] = GEN_INT (32 - floor_log2 (INTVAL (operands[1]) + 1)); ++ operands[4] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && INTVAL (operands[1]) == 0x7FFFFFFF") ++ (const_int 5) ++ (const_int 6)))]) ++ ++(define_insn_and_split "*masktrue_const_negative_pow2" ++ [(set (pc) ++ (if_then_else (match_operator 3 "boolean_operator" ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "const_int_operand" "i")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] ++ "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 12, 30)" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 4) ++ (lshiftrt:SI (match_dup 0) ++ (match_dup 1))) ++ (set (pc) ++ (if_then_else (match_op_dup 3 ++ [(match_dup 4) ++ (const_int 0)]) ++ (label_ref (match_dup 2)) ++ (pc)))] ++{ ++ operands[1] = GEN_INT (floor_log2 (-INTVAL (operands[1]))); ++ operands[4] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*masktrue_const_shifted_mask" ++ [(set (pc) ++ (if_then_else (match_operator 4 "boolean_operator" ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "shifted_mask_operand" "i")) ++ (match_operand:SI 2 "const_int_operand" "i")]) ++ (label_ref (match_operand 3 "" "")) ++ (pc)))] ++ "(INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 ++ && xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 6) ++ (zero_extract:SI (match_dup 0) ++ (match_dup 5) ++ (match_dup 1))) ++ (set (pc) ++ (if_then_else (match_op_dup 4 ++ [(match_dup 6) ++ (match_dup 2)]) ++ (label_ref (match_dup 3)) ++ (pc)))] ++{ ++ HOST_WIDE_INT mask = INTVAL (operands[1]); ++ int shift = ctz_hwi (mask); ++ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); ++ int mask_pos = shift; ++ if (BITS_BIG_ENDIAN) ++ mask_pos = (32 - (mask_size + shift)) & 0x1f; ++ operands[1] = GEN_INT (mask_pos); ++ operands[2] = GEN_INT ((uint32_t)INTVAL (operands[2]) >> shift); ++ operands[5] = GEN_INT (mask_size); ++ operands[6] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && (uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])) == 0") ++ (const_int 5) ++ (const_int 6)))]) ++ + + ;; Zero-overhead looping support. + +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0028-Document-new-mextra-l32r-costs-Xtensa-specifi.patch b/patches/gcc10.1/gcc-xtensa-0028-Document-new-mextra-l32r-costs-Xtensa-specifi.patch new file mode 100644 index 0000000..ebe9eb0 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0028-Document-new-mextra-l32r-costs-Xtensa-specifi.patch @@ -0,0 +1,44 @@ +From 7856e5d6344828b2a72aeef671a169dbd1a85a55 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:34:48 +0900 +Subject: [PATCH 23/31] xtensa: Document new -mextra-l32r-costs= + Xtensa-specific option + +gcc/ChangeLog: + * doc/invoke.texi: Document -mextra-l32r-costs= option. +--- + gcc/doc/invoke.texi | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index eabeec944..c35f51afb 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1385,7 +1385,8 @@ See RS/6000 and PowerPC Options. + -mtext-section-literals -mno-text-section-literals @gol + -mauto-litpools -mno-auto-litpools @gol + -mtarget-align -mno-target-align @gol +--mlongcalls -mno-longcalls} ++-mlongcalls -mno-longcalls @gol ++-mextra-l32r-costs=@var{cycles}} + + @emph{zSeries Options} + See S/390 and zSeries Options. +@@ -30519,6 +30520,14 @@ assembly code generated by GCC still shows direct call + instructions---look at the disassembled object code to see the actual + instructions. Note that the assembler uses an indirect call for + every cross-file call, not just those that really are out of range. ++ ++@item -mextra-l32r-costs=@var{n} ++@opindex mextra-l32r-costs ++Specify an extra cost of instruction RAM/ROM access for @code{L32R} ++instructions, in clock cycles. This affects, when optimizing for speed, ++whether loading a constant from literal pool using @code{L32R} or ++synthesizing the constant from a small one with a couple of arithmetic ++instructions. The default value is 0. + @end table + + @node zSeries Options +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0029-Add-support-for-sibling-call-optimization.patch b/patches/gcc10.1/gcc-xtensa-0029-Add-support-for-sibling-call-optimization.patch new file mode 100644 index 0000000..f5c0f78 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0029-Add-support-for-sibling-call-optimization.patch @@ -0,0 +1,354 @@ +From c985f67f0b9a35ca5f22647c326c6b43a2b237fa Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 15 Jun 2022 21:21:21 +0900 +Subject: [PATCH 24/31] xtensa: Add support for sibling call optimization + +This patch introduces support for sibling call optimization, when the Windowed +Register Option is NOT configured. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h (xtensa_prepare_expand_call, + xtensa_emit_sibcall): New prototypes. + (xtensa_expand_epilogue): Add new argument that specifies whether + or not sibling call. + * config/xtensa/xtensa.c (TARGET_FUNCTION_OK_FOR_SIBCALL): + New macro definition. + (xtensa_prepare_expand_call): New function in order to share + the common code. + (xtensa_emit_sibcall, xtensa_function_ok_for_sibcall): + New functions. + (xtensa_expand_epilogue): Add new argument sibcall_p and use it + for sibling call handling. + * config/xtensa/xtensa.md (call, call_value): + Use xtensa_prepare_expand_call. + (call_internal, call_value_internal): + Add the condition in order to be disabled if sibling call. + (sibcall, sibcall_value, sibcall_epilogue): New expansions. + (sibcall_internal, sibcall_value_internal): New insn patterns, + and split ones in order to take care of the indirect sibcalls. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/sibcalls.c: New. +--- + gcc/config/xtensa/xtensa-protos.h | 4 +- + gcc/config/xtensa/xtensa.c | 57 ++++++++++++- + gcc/config/xtensa/xtensa.md | 93 ++++++++++++++++++---- + gcc/testsuite/gcc.target/xtensa/sibcalls.c | 20 +++++ + 4 files changed, 155 insertions(+), 19 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/sibcalls.c + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index e4b2d2f06..75ed3bfb0 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -53,7 +53,9 @@ extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool); + extern void xtensa_emit_loop_end (rtx_insn *, rtx *); + extern char *xtensa_emit_branch (bool, rtx *); + extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); ++extern void xtensa_prepare_expand_call (int, rtx *); + extern char *xtensa_emit_call (int, rtx *); ++extern char *xtensa_emit_sibcall (int, rtx *); + extern bool xtensa_tls_referenced_p (rtx); + extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); + +@@ -73,7 +75,7 @@ extern int xtensa_dbx_register_number (int); + extern long compute_frame_size (poly_int64); + extern bool xtensa_use_return_instruction_p (void); + extern void xtensa_expand_prologue (void); +-extern void xtensa_expand_epilogue (void); ++extern void xtensa_expand_epilogue (bool); + extern void order_regs_for_local_alloc (void); + extern enum reg_class xtensa_regno_to_class (int regno); + extern HOST_WIDE_INT xtensa_initial_elimination_offset (int from, int to); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 8deae3d51..a714b980a 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -187,6 +187,7 @@ static bool xtensa_modes_tieable_p (machine_mode, machine_mode); + static HOST_WIDE_INT xtensa_constant_alignment (const_tree, HOST_WIDE_INT); + static HOST_WIDE_INT xtensa_starting_frame_offset (void); + static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); ++static bool xtensa_function_ok_for_sibcall (tree, tree); + + + +@@ -337,6 +338,9 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); + #undef TARGET_HAVE_SPECULATION_SAFE_VALUE + #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed + ++#undef TARGET_FUNCTION_OK_FOR_SIBCALL ++#define TARGET_FUNCTION_OK_FOR_SIBCALL xtensa_function_ok_for_sibcall ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + +@@ -2117,6 +2121,20 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + } + + ++void ++xtensa_prepare_expand_call (int callop, rtx *operands) ++{ ++ rtx addr = XEXP (operands[callop], 0); ++ ++ if (flag_pic && SYMBOL_REF_P (addr) ++ && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) ++ addr = gen_sym_PLT (addr); ++ ++ if (!call_insn_operand (addr, VOIDmode)) ++ XEXP (operands[callop], 0) = copy_to_mode_reg (Pmode, addr); ++} ++ ++ + char * + xtensa_emit_call (int callop, rtx *operands) + { +@@ -2135,6 +2153,24 @@ xtensa_emit_call (int callop, rtx *operands) + } + + ++char * ++xtensa_emit_sibcall (int callop, rtx *operands) ++{ ++ static char result[64]; ++ rtx tgt = operands[callop]; ++ ++ if (GET_CODE (tgt) == CONST_INT) ++ sprintf (result, "j.l\t" HOST_WIDE_INT_PRINT_HEX ", a9", ++ INTVAL (tgt)); ++ else if (register_operand (tgt, VOIDmode)) ++ sprintf (result, "jx\t%%%d", callop); ++ else ++ sprintf (result, "j.l\t%%%d, a9", callop); ++ ++ return result; ++} ++ ++ + bool + xtensa_legitimate_address_p (machine_mode mode, rtx addr, bool strict) + { +@@ -3305,7 +3341,7 @@ xtensa_expand_prologue (void) + } + + void +-xtensa_expand_epilogue (void) ++xtensa_expand_epilogue (bool sibcall_p) + { + if (!TARGET_WINDOWED_ABI) + { +@@ -3339,10 +3375,13 @@ xtensa_expand_epilogue (void) + if (xtensa_call_save_reg(regno)) + { + rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); ++ rtx reg; + + offset -= UNITS_PER_WORD; +- emit_move_insn (gen_rtx_REG (SImode, regno), ++ emit_move_insn (reg = gen_rtx_REG (SImode, regno), + gen_frame_mem (SImode, x)); ++ if (regno == A0_REG && sibcall_p) ++ emit_use (reg); + } + } + +@@ -3377,7 +3416,8 @@ xtensa_expand_epilogue (void) + EH_RETURN_STACKADJ_RTX)); + } + cfun->machine->epilogue_done = true; +- emit_jump_insn (gen_return ()); ++ if (!sibcall_p) ++ emit_jump_insn (gen_return ()); + } + + bool +@@ -4893,4 +4933,15 @@ xtensa_asan_shadow_offset (void) + return HOST_WIDE_INT_UC (0x10000000); + } + ++/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */ ++static bool ++xtensa_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, tree exp ATTRIBUTE_UNUSED) ++{ ++ /* Do not allow sibcalls when windowed registers ABI is in effect. */ ++ if (TARGET_WINDOWED_ABI) ++ return false; ++ ++ return true; ++} ++ + #include "gt-xtensa.h" +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 355fb7742..2a11d1c86 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -25,6 +25,7 @@ + (A7_REG 7) + (A8_REG 8) + (A9_REG 9) ++ (A10_REG 10) + + (UNSPEC_NOP 2) + (UNSPEC_PLT 3) +@@ -2153,18 +2154,13 @@ + (match_operand 1 "" ""))] + "" + { +- rtx addr = XEXP (operands[0], 0); +- if (flag_pic && GET_CODE (addr) == SYMBOL_REF +- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) +- addr = gen_sym_PLT (addr); +- if (!call_insn_operand (addr, VOIDmode)) +- XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr); ++ xtensa_prepare_expand_call (0, operands); + }) + + (define_insn "call_internal" + [(call (mem (match_operand:SI 0 "call_insn_operand" "nir")) + (match_operand 1 "" "i"))] +- "" ++ "!SIBLING_CALL_P (insn)" + { + return xtensa_emit_call (0, operands); + } +@@ -2178,19 +2174,14 @@ + (match_operand 2 "" "")))] + "" + { +- rtx addr = XEXP (operands[1], 0); +- if (flag_pic && GET_CODE (addr) == SYMBOL_REF +- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) +- addr = gen_sym_PLT (addr); +- if (!call_insn_operand (addr, VOIDmode)) +- XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr); ++ xtensa_prepare_expand_call (1, operands); + }) + + (define_insn "call_value_internal" + [(set (match_operand 0 "register_operand" "=a") + (call (mem (match_operand:SI 1 "call_insn_operand" "nir")) + (match_operand 2 "" "i")))] +- "" ++ "!SIBLING_CALL_P (insn)" + { + return xtensa_emit_call (1, operands); + } +@@ -2198,6 +2189,70 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_expand "sibcall" ++ [(call (match_operand 0 "memory_operand" "") ++ (match_operand 1 "" ""))] ++ "!TARGET_WINDOWED_ABI" ++{ ++ xtensa_prepare_expand_call (0, operands); ++}) ++ ++(define_insn "sibcall_internal" ++ [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nir")) ++ (match_operand 1 "" "i"))] ++ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" ++{ ++ return xtensa_emit_sibcall (0, operands); ++} ++ [(set_attr "type" "call") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ++ ++(define_split ++ [(call (mem:SI (match_operand:SI 0 "register_operand")) ++ (match_operand 1 ""))] ++ "reload_completed ++ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) ++ && IN_RANGE (REGNO (operands[0]), 12, 15)" ++ [(set (reg:SI A10_REG) ++ (match_dup 0)) ++ (call (mem:SI (reg:SI A10_REG)) ++ (match_dup 1))]) ++ ++(define_expand "sibcall_value" ++ [(set (match_operand 0 "register_operand" "") ++ (call (match_operand 1 "memory_operand" "") ++ (match_operand 2 "" "")))] ++ "!TARGET_WINDOWED_ABI" ++{ ++ xtensa_prepare_expand_call (1, operands); ++}) ++ ++(define_insn "sibcall_value_internal" ++ [(set (match_operand 0 "register_operand" "=a") ++ (call (mem:SI (match_operand:SI 1 "call_insn_operand" "nir")) ++ (match_operand 2 "" "i")))] ++ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" ++{ ++ return xtensa_emit_sibcall (1, operands); ++} ++ [(set_attr "type" "call") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ++ ++(define_split ++ [(set (match_operand 0 "register_operand") ++ (call (mem:SI (match_operand:SI 1 "register_operand")) ++ (match_operand 2 "")))] ++ "reload_completed ++ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) ++ && IN_RANGE (REGNO (operands[1]), 12, 15)" ++ [(set (reg:SI A10_REG) ++ (match_dup 1)) ++ (set (match_dup 0) ++ (call (mem:SI (reg:SI A10_REG)) ++ (match_dup 2)))]) ++ + (define_insn "entry" + [(set (reg:SI A1_REG) + (unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")] +@@ -2265,7 +2320,15 @@ + [(return)] + "" + { +- xtensa_expand_epilogue (); ++ xtensa_expand_epilogue (false); ++ DONE; ++}) ++ ++(define_expand "sibcall_epilogue" ++ [(return)] ++ "!TARGET_WINDOWED_ABI" ++{ ++ xtensa_expand_epilogue (true); + DONE; + }) + +diff --git a/gcc/testsuite/gcc.target/xtensa/sibcalls.c b/gcc/testsuite/gcc.target/xtensa/sibcalls.c +new file mode 100644 +index 000000000..d2b3fccf1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/sibcalls.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mabi=call0 -foptimize-sibling-calls" } */ ++ ++extern int foo(int); ++extern void bar(int); ++ ++int test_0(int a) { ++ return foo(a); ++} ++ ++void test_1(int a) { ++ bar(a); ++} ++ ++int test_2(int (*a)(void)) { ++ bar(0); ++ return a(); ++} ++ ++/* { dg-final { scan-assembler-not "ret" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0030-Add-some-dedicated-patterns-that-correspond-t.patch b/patches/gcc10.1/gcc-xtensa-0030-Add-some-dedicated-patterns-that-correspond-t.patch new file mode 100644 index 0000000..ad60202 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0030-Add-some-dedicated-patterns-that-correspond-t.patch @@ -0,0 +1,81 @@ +From 16878066a57f917814a8d6fe45f7f7d2eebdbbc0 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:37:54 +0900 +Subject: [PATCH 25/31] xtensa: Add some dedicated patterns that correspond to + GIMPLE canonicalizations + +This patch offers better RTL representations against straightforward +derivations from some tree optimizers' canonicalized forms. + +- rounding up to even, such as '(x + (x & 1))', is canonicalized to + '((x + 1) & -2)', but the former is one instruction less than the latter + in Xtensa ISA. +- signed greater or equal to zero as logical value '((signed)x >= 0)', + is canonicalized to '((unsigned)(x ^ -1) >> 31)', but the equivalent + '(((signed)x >> 31) + 1)' is one instruction less. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*round_up_to_even): + New insn-and-split pattern. + (*signed_ge_zero): Ditto. +--- + gcc/config/xtensa/xtensa.md | 45 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 45 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2a11d1c86..3e8e2e76f 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2709,3 +2709,48 @@ + xtensa_expand_atomic (, operands[0], operands[1], operands[2], true); + DONE; + }) ++ ++(define_insn_and_split "*round_up_to_even" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int 1)) ++ (const_int -2)))] ++ "" ++ "#" ++ "can_create_pseudo_p ()" ++ [(set (match_dup 2) ++ (and:SI (match_dup 1) ++ (const_int 1))) ++ (set (match_dup 0) ++ (plus:SI (match_dup 2) ++ (match_dup 1)))] ++{ ++ operands[2] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 5) ++ (const_int 6)))]) ++ ++(define_insn_and_split "*signed_ge_zero" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ge:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int 0)))] ++ "" ++ "#" ++ "" ++ [(set (match_dup 0) ++ (ashiftrt:SI (match_dup 1) ++ (const_int 31))) ++ (set (match_dup 0) ++ (plus:SI (match_dup 0) ++ (const_int 1)))] ++ "" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 5) ++ (const_int 6)))]) +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0031-Eliminate-unwanted-reg-reg-moves-during-DFmod.patch b/patches/gcc10.1/gcc-xtensa-0031-Eliminate-unwanted-reg-reg-moves-during-DFmod.patch new file mode 100644 index 0000000..28bb494 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0031-Eliminate-unwanted-reg-reg-moves-during-DFmod.patch @@ -0,0 +1,90 @@ +From a0f2dfa2e952111dbd85d2b2f1caaf570facce8a Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:39:49 +0900 +Subject: [PATCH 26/31] xtensa: Eliminate unwanted reg-reg moves during DFmode + input reloads + +When spilled DFmode registers are reloaded in, once loaded into a pair of +SImode regs and then copied from that regs. Such unwanted reg-reg moves +seems not to be eliminated at the "cprop_hardreg" stage, despite no problem +in output reloads. + +Luckily it is easy to resolve such inefficiencies, with the use of peephole2 +pattern. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (reload_operand): + New predicate. + * config/xtensa/xtensa.md: New peephole2 pattern. +--- + gcc/config/xtensa/predicates.md | 13 +++++++++++++ + gcc/config/xtensa/xtensa.md | 31 +++++++++++++++++++++++++++++++ + 2 files changed, 44 insertions(+) + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index 367fc17f3..c1cddb733 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -165,6 +165,19 @@ + (and (match_code "const_int") + (match_test "xtensa_mem_offset (INTVAL (op), SFmode)"))) + ++(define_predicate "reload_operand" ++ (match_code "mem") ++{ ++ const_rtx addr = XEXP (op, 0); ++ if (REG_P (addr)) ++ return REGNO (addr) == A1_REG; ++ if (GET_CODE (addr) == PLUS) ++ return REG_P (XEXP (addr, 0)) ++ && REGNO (XEXP (addr, 0)) == A1_REG ++ && CONST_INT_P (XEXP (addr, 1)); ++ return false; ++}) ++ + (define_predicate "branch_operator" + (match_code "eq,ne,lt,ge")) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 3e8e2e76f..2598c09c9 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2754,3 +2754,34 @@ + (if_then_else (match_test "TARGET_DENSITY") + (const_int 5) + (const_int 6)))]) ++ ++(define_peephole2 ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operand:SI 6 "reload_operand")) ++ (set (match_operand:SI 1 "register_operand") ++ (match_operand:SI 7 "reload_operand")) ++ (set (match_operand:SF 2 "register_operand") ++ (match_operand:SF 4 "register_operand")) ++ (set (match_operand:SF 3 "register_operand") ++ (match_operand:SF 5 "register_operand"))] ++ "REGNO (operands[0]) == REGNO (operands[4]) ++ && REGNO (operands[1]) == REGNO (operands[5]) ++ && peep2_reg_dead_p (4, operands[0]) ++ && peep2_reg_dead_p (4, operands[1])" ++ [(set (match_dup 2) ++ (match_dup 6)) ++ (set (match_dup 3) ++ (match_dup 7))] ++{ ++ uint32_t check = 0; ++ int i; ++ for (i = 0; i <= 3; ++i) ++ { ++ uint32_t mask = (uint32_t)1 << REGNO (operands[i]); ++ if (check & mask) ++ FAIL; ++ check |= mask; ++ } ++ operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0)); ++ operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0)); ++}) +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0032-Eliminate-DS-Cmode-hard-register-clobber-that.patch b/patches/gcc10.1/gcc-xtensa-0032-Eliminate-DS-Cmode-hard-register-clobber-that.patch new file mode 100644 index 0000000..7c4a869 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0032-Eliminate-DS-Cmode-hard-register-clobber-that.patch @@ -0,0 +1,99 @@ +From d6c2b11e9ce88f3b1a7ddcf9a2712b070ad4dbfb Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:53:04 +0900 +Subject: [PATCH 27/31] xtensa: Eliminate [DS]Cmode hard register clobber that + is immediately followed by whole overwrite the register + +RTL expansion of substitution to [DS]Cmode hard register includes obstructive +register clobber. + +A simplest example: + + double _Complex test(double _Complex c) { + return c; + } + +will be converted to: + + (set (reg:DF 42 [ c ]) (reg:DF 2 a2)) + (set (reg:DF 43 [ c+8 ]) (reg:DF 4 a4)) + (clobber (reg:DC 2 a2)) + (set (reg:DF 2 a2) (reg:DF 42 [ c ])) + (set (reg:DF 4 a4) (reg:DF 43 [ c+8 ])) + (use (reg:DC 2 a2)) + (return) + +and then finally: + + test: + mov a8, a2 + mov a9, a3 + mov a6, a4 + mov a7, a5 + mov a2, a8 + mov a3, a9 + mov a4, a6 + mov a5, a7 + ret + +As you see, it is so ridiculous. + +This patch eliminates such clobber in order to prune away the wasted move +instructions by the optimizer: + + test: + ret + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (DSC): New split pattern and mode iterator. +--- + gcc/config/xtensa/xtensa.md | 28 ++++++++++++++++++++++++++++ + 1 file changed, 28 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2598c09c9..124548dfe 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -87,6 +87,10 @@ + ;; This code iterator is for *shlrd and its variants. + (define_code_iterator ior_op [ior plus]) + ++;; This mode iterator allows the DC and SC patterns to be defined from ++;; the same template. ++(define_mode_iterator DSC [DC SC]) ++ + + ;; Attributes. + +@@ -2785,3 +2789,27 @@ + operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0)); + operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0)); + }) ++ ++(define_split ++ [(clobber (match_operand:DSC 0 "register_operand"))] ++ "GP_REG_P (REGNO (operands[0]))" ++ [(const_int 0)] ++{ ++ unsigned int regno = REGNO (operands[0]); ++ machine_mode inner_mode = GET_MODE_INNER (mode); ++ rtx_insn *insn; ++ rtx x; ++ if (! ((insn = next_nonnote_nondebug_insn (curr_insn)) ++ && NONJUMP_INSN_P (insn) ++ && GET_CODE (x = PATTERN (insn)) == SET ++ && REG_P (x = XEXP (x, 0)) ++ && GET_MODE (x) == inner_mode ++ && REGNO (x) == regno ++ && (insn = next_nonnote_nondebug_insn (insn)) ++ && NONJUMP_INSN_P (insn) ++ && GET_CODE (x = PATTERN (insn)) == SET ++ && REG_P (x = XEXP (x, 0)) ++ && GET_MODE (x) == inner_mode ++ && REGNO (x) == regno + REG_NREGS (operands[0]) / 2)) ++ FAIL; ++}) +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0033-Defer-storing-integer-constants-into-litpool-.patch b/patches/gcc10.1/gcc-xtensa-0033-Defer-storing-integer-constants-into-litpool-.patch new file mode 100644 index 0000000..6007b49 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0033-Defer-storing-integer-constants-into-litpool-.patch @@ -0,0 +1,111 @@ +From e37c151ca3beacb7f4f116a94c9c80223b0c6fbf Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 17 Jun 2022 22:47:49 +0900 +Subject: [PATCH 28/31] xtensa: Defer storing integer constants into litpool + until reload + +Storing integer constants into litpool in the early stage of compilation +hinders some integer optimizations. In fact, such integer constants are +not subject to the constant folding process. + +For example: + + extern unsigned short value; + extern void foo(void); + void test(void) { + if (value == 30001) + foo(); + } + + .literal_position + .literal .LC0, value + .literal .LC1, 30001 + test: + l32r a3, .LC0 + l32r a2, .LC1 + l16ui a3, a3, 0 + extui a2, a2, 0, 16 // runtime zero-extension despite constant + bne a3, a2, .L1 + j.l foo, a9 + .L1: + ret.n + +This patch defers the placement of integer constants into litpool until +the start of reload: + + .literal_position + .literal .LC0, value + .literal .LC1, 30001 + test: + l32r a3, .LC0 + l32r a2, .LC1 + l16ui a3, a3, 0 + bne a3, a2, .L1 + j.l foo, a9 + .L1: + ret.n + +gcc/ChangeLog: + + * config/xtensa/constraints.md (Y): + Change to include integer constants until reload begins. + * config/xtensa/predicates.md (move_operand): Ditto. + * config/xtensa/xtensa.c (xtensa_emit_move_sequence): + Change to allow storing integer constants into litpool only after + reload begins. +--- + gcc/config/xtensa/constraints.md | 6 ++++-- + gcc/config/xtensa/predicates.md | 5 +++-- + gcc/config/xtensa/xtensa.c | 3 ++- + 3 files changed, 9 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md +index 9a8caab4f..13b3daafc 100644 +--- a/gcc/config/xtensa/constraints.md ++++ b/gcc/config/xtensa/constraints.md +@@ -113,8 +113,10 @@ + + (define_constraint "Y" + "A constant that can be used in relaxed MOVI instructions." +- (and (match_code "const_int,const_double,const,symbol_ref,label_ref") +- (match_test "TARGET_AUTO_LITPOOLS"))) ++ (ior (and (match_code "const_int,const_double,const,symbol_ref,label_ref") ++ (match_test "TARGET_AUTO_LITPOOLS")) ++ (and (match_code "const_int") ++ (match_test "can_create_pseudo_p ()")))) + + ;; Memory constraints. Do not use define_memory_constraint here. Doing so + ;; causes reload to force some constants into the constant pool, but since +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index c1cddb733..633cc6264 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -147,8 +147,9 @@ + (match_test "!constantpool_mem_p (op) + || GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0"))) + (ior (and (match_code "const_int") +- (match_test "GET_MODE_CLASS (mode) == MODE_INT +- && xtensa_simm12b (INTVAL (op))")) ++ (match_test "(GET_MODE_CLASS (mode) == MODE_INT ++ && xtensa_simm12b (INTVAL (op))) ++ || can_create_pseudo_p ()")) + (and (match_code "const_int,const_double,const,symbol_ref,label_ref") + (match_test "(TARGET_CONST16 || TARGET_AUTO_LITPOOLS) + && CONSTANT_P (op) +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index a714b980a..1d64e2c76 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1173,7 +1173,8 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) + return 1; + } + +- if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16) ++ if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16 ++ && ! (CONST_INT_P (src) && can_create_pseudo_p ())) + { + src = force_const_mem (SImode, src); + operands[1] = src; +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0034-Apply-a-few-minor-fixes.patch b/patches/gcc10.1/gcc-xtensa-0034-Apply-a-few-minor-fixes.patch new file mode 100644 index 0000000..5ecac42 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0034-Apply-a-few-minor-fixes.patch @@ -0,0 +1,129 @@ +From dfaefed18297218392071039325baabac59d5c43 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 19 Jun 2022 22:32:45 +0900 +Subject: [PATCH 29/31] xtensa: Apply a few minor fixes + +No functional changes. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_emit_move_sequence): + Use can_create_pseudo_p(), instead of using individual + reload_in_progress and reload_completed. + (xtensa_expand_block_set_small_loop): Use xtensa_simm8x256(), + the existing predicate function. + (xtensa_is_insn_L32R_p, gen_int_relational, xtensa_emit_sibcall): + Use the standard RTX code predicate macros such as MEM_P, + SYMBOL_REF_P and/or CONST_INT_P. + * config/xtensa/xtensa.md: Avoid using numeric literals to determine + if callee-saved register, at the split patterns for indirect sibcall + fixups. +--- + gcc/config/xtensa/xtensa.c | 16 ++++++++-------- + gcc/config/xtensa/xtensa.md | 8 ++++---- + 2 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 1d64e2c76..595c5f96f 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -743,7 +743,7 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + } + + /* See if we need to invert the result. */ +- invert = ((GET_CODE (cmp1) == CONST_INT) ++ invert = (CONST_INT_P (cmp1) + ? p_info->invert_const + : p_info->invert_reg); + +@@ -1200,7 +1200,7 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) + } + } + +- if (!(reload_in_progress | reload_completed) ++ if (can_create_pseudo_p () + && !xtensa_valid_move (mode, operands)) + operands[1] = force_reg (mode, operands[1]); + +@@ -1603,7 +1603,7 @@ xtensa_expand_block_set_small_loop (rtx *operands) + thus limited to only offset to the end address for ADDI/ADDMI + instruction. */ + if (align == 4 +- && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) ++ && ! (bytes <= 127 || xtensa_simm8x256 (bytes))) + return 0; + + /* If no 4-byte aligned, loop count should be treated as the +@@ -2160,7 +2160,7 @@ xtensa_emit_sibcall (int callop, rtx *operands) + static char result[64]; + rtx tgt = operands[callop]; + +- if (GET_CODE (tgt) == CONST_INT) ++ if (CONST_INT_P (tgt)) + sprintf (result, "j.l\t" HOST_WIDE_INT_PRINT_HEX ", a9", + INTVAL (tgt)); + else if (register_operand (tgt, VOIDmode)) +@@ -4318,17 +4318,17 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + } + + static bool +-xtensa_is_insn_L32R_p(const rtx_insn *insn) ++xtensa_is_insn_L32R_p (const rtx_insn *insn) + { + rtx x = PATTERN (insn); + + if (GET_CODE (x) == SET) + { +- x = XEXP (x, 1); +- if (GET_CODE (x) == MEM) ++ x = SET_SRC (x); ++ if (MEM_P (x)) + { + x = XEXP (x, 0); +- return (GET_CODE (x) == SYMBOL_REF || CONST_INT_P (x)) ++ return (SYMBOL_REF_P (x) || CONST_INT_P (x)) + && CONSTANT_POOL_ADDRESS_P (x); + } + } +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 124548dfe..6f51a5357 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1251,14 +1251,14 @@ + int i = 0; + rtx x = XEXP (operands[1], 0); + long l[2]; +- if (GET_CODE (x) == SYMBOL_REF ++ if (SYMBOL_REF_P (x) + && CONSTANT_POOL_ADDRESS_P (x)) + x = get_pool_constant (x); + else if (GET_CODE (x) == CONST) + { + x = XEXP (x, 0); + gcc_assert (GET_CODE (x) == PLUS +- && GET_CODE (XEXP (x, 0)) == SYMBOL_REF ++ && SYMBOL_REF_P (XEXP (x, 0)) + && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) + && CONST_INT_P (XEXP (x, 1))); + i = INTVAL (XEXP (x, 1)); +@@ -2217,7 +2217,7 @@ + (match_operand 1 ""))] + "reload_completed + && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) +- && IN_RANGE (REGNO (operands[0]), 12, 15)" ++ && ! call_used_or_fixed_reg_p (REGNO (operands[0]))" + [(set (reg:SI A10_REG) + (match_dup 0)) + (call (mem:SI (reg:SI A10_REG)) +@@ -2250,7 +2250,7 @@ + (match_operand 2 "")))] + "reload_completed + && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) +- && IN_RANGE (REGNO (operands[1]), 12, 15)" ++ && ! call_used_or_fixed_reg_p (REGNO (operands[1]))" + [(set (reg:SI A10_REG) + (match_dup 1)) + (set (match_dup 0) +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0035-Fix-RTL-insn-cost-estimation-about-relaxed-MO.patch b/patches/gcc10.1/gcc-xtensa-0035-Fix-RTL-insn-cost-estimation-about-relaxed-MO.patch new file mode 100644 index 0000000..d65c44d --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0035-Fix-RTL-insn-cost-estimation-about-relaxed-MO.patch @@ -0,0 +1,56 @@ +From 48c657f23a61a41a46842b25bce4f287a56223a2 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Mon, 20 Jun 2022 01:56:16 +0900 +Subject: [PATCH 30/31] xtensa: Fix RTL insn cost estimation about relaxed MOVI + instructions + +These instructions will all be converted to L32R ones with litpool entries +by the assembler. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_is_insn_L32R_p): + Consider relaxed MOVI instructions as L32R. +--- + gcc/config/xtensa/xtensa.c | 22 ++++++++++++++-------- + 1 file changed, 14 insertions(+), 8 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 595c5f96f..b92ec9caa 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4322,17 +4322,23 @@ xtensa_is_insn_L32R_p (const rtx_insn *insn) + { + rtx x = PATTERN (insn); + +- if (GET_CODE (x) == SET) ++ if (GET_CODE (x) != SET) ++ return false; ++ ++ x = XEXP (x, 1); ++ if (MEM_P (x)) + { +- x = SET_SRC (x); +- if (MEM_P (x)) +- { +- x = XEXP (x, 0); +- return (SYMBOL_REF_P (x) || CONST_INT_P (x)) +- && CONSTANT_POOL_ADDRESS_P (x); +- } ++ x = XEXP (x, 0); ++ return (SYMBOL_REF_P (x) || CONST_INT_P (x)) ++ && CONSTANT_POOL_ADDRESS_P (x); + } + ++ /* relaxed MOVI instructions, that will be converted to L32R by the ++ assembler. */ ++ if (CONST_INT_P (x) ++ && ! xtensa_simm12b (INTVAL (x))) ++ return true; ++ + return false; + } + +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0036-Fix-buffer-overflow.patch b/patches/gcc10.1/gcc-xtensa-0036-Fix-buffer-overflow.patch new file mode 100644 index 0000000..35f9f10 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0036-Fix-buffer-overflow.patch @@ -0,0 +1,33 @@ +From 75c341c7de5c6f325d6ded7bd91d77793fe358d5 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 22 Jun 2022 04:04:45 +0900 +Subject: [PATCH 31/31] xtensa: Fix buffer overflow + +Fortify buffer overflow message reported. +(see https://github.com/earlephilhower/esp-quick-toolchain/issues/36) + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (bswapsi2_internal): + Enlarge the buffer that is obviously smaller than the template + string given to sprintf(). +--- + gcc/config/xtensa/xtensa.md | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 6f51a5357..81b016859 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -536,7 +536,7 @@ + { + rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn); + const char *init = "ssai\t8\;"; +- static char result[64]; ++ static char result[128]; + if (prev_insn && NONJUMP_INSN_P (prev_insn)) + { + rtx x = PATTERN (prev_insn); +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0037-Optimize-integer-constant-addition-that-is-be.patch b/patches/gcc10.1/gcc-xtensa-0037-Optimize-integer-constant-addition-that-is-be.patch new file mode 100644 index 0000000..0ea6d48 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0037-Optimize-integer-constant-addition-that-is-be.patch @@ -0,0 +1,95 @@ +From 9308911796a46bd689bbcc1cedef1b63ae9b871e Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 26 Jun 2022 14:07:56 +0900 +Subject: [PATCH] xtensa: Optimize integer constant addition that is + between -32896 and 32639 + +Such constants are often subject to the constant synthesis: + + int test(int a) { + return a - 31999; + } + + test: + movi a3, 1 + addmi a3, a3, -0x7d00 + add a2, a2, a3 + ret + +This patch optimizes such case as follows: + + test: + addi a2, a2, 1 + addmi a2, a2, -0x7d00 + ret + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: + Suppress unnecessary emitting nop insn in the split patterns for + integer/FP constant synthesis, and add new peephole2 pattern that + folds such synthesized additions. +--- + gcc/config/xtensa/xtensa.md | 35 +++++++++++++++++++++++++++++++++++ + 1 file changed, 35 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 81b016859..b697e16db 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1036,6 +1036,7 @@ + FAIL; + if (! xtensa_constantsynth (operands[0], INTVAL (x))) + emit_move_insn (operands[0], x); ++ DONE; + }) + + ;; 16-bit Integer moves +@@ -1277,6 +1278,7 @@ + x = gen_rtx_REG (SImode, REGNO (operands[0])); + if (! xtensa_constantsynth (x, l[i])) + emit_move_insn (x, GEN_INT (l[i])); ++ DONE; + }) + + ;; 64-bit floating point moves +@@ -2813,3 +2815,36 @@ + && REGNO (x) == regno + REG_NREGS (operands[0]) / 2)) + FAIL; + }) ++ ++(define_peephole2 ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operand:SI 1 "const_int_operand")) ++ (set (match_dup 0) ++ (plus:SI (match_dup 0) ++ (match_operand:SI 2 "const_int_operand"))) ++ (set (match_operand:SI 3 "register_operand") ++ (plus:SI (match_operand:SI 4 "register_operand") ++ (match_dup 0)))] ++ "IN_RANGE (INTVAL (operands[1]) + INTVAL (operands[2]), ++ (-128 - 32768), (127 + 32512)) ++ && REGNO (operands[0]) != REGNO (operands[3]) ++ && REGNO (operands[0]) != REGNO (operands[4]) ++ && peep2_reg_dead_p (3, operands[0])" ++ [(set (match_dup 3) ++ (plus:SI (match_dup 4) ++ (match_dup 1))) ++ (set (match_dup 3) ++ (plus:SI (match_dup 3) ++ (match_dup 2)))] ++{ ++ HOST_WIDE_INT value = INTVAL (operands[1]) + INTVAL (operands[2]); ++ int imm0, imm1; ++ value += 128; ++ if (value > 32512) ++ imm1 = 32512; ++ else ++ imm1 = value & ~255; ++ imm0 = value - imm1 - 128; ++ operands[1] = GEN_INT (imm0); ++ operands[2] = GEN_INT (imm1); ++}) +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0038-Minor-fix-for-FP-constant-synthesis.patch b/patches/gcc10.1/gcc-xtensa-0038-Minor-fix-for-FP-constant-synthesis.patch new file mode 100644 index 0000000..8fc23d8 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0038-Minor-fix-for-FP-constant-synthesis.patch @@ -0,0 +1,92 @@ +From 7bed998154345cb072cd425b5d61734d3e0bac5d Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 1 Jul 2022 13:39:34 +0900 +Subject: [PATCH] xtensa: Minor fix for FP constant synthesis + +This patch fixes an non-fatal issue about negative constant values derived +from FP constant synthesis on hosts whose 'long' is wider than 'int32_t'. + +And also replaces the dedicated code in FP constant synthesis split +pattern with the appropriate existing function call. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: + In FP constant synthesis split pattern, subcontract to + avoid_constant_pool_reference() as in the case of integer, + because it can handle well too. And cast to int32_t before + calling xtensa_constantsynth() in order to ignore upper 32-bit. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/constsynth_double.c: + Modify in order to catch the issue. +--- + gcc/config/xtensa/xtensa.md | 35 +++++-------------- + .../gcc.target/xtensa/constsynth_double.c | 2 +- + 2 files changed, 9 insertions(+), 28 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index b697e16db..6ef84b4f2 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1249,35 +1249,16 @@ + "! optimize_debug && reload_completed" + [(const_int 0)] + { +- int i = 0; +- rtx x = XEXP (operands[1], 0); +- long l[2]; +- if (SYMBOL_REF_P (x) +- && CONSTANT_POOL_ADDRESS_P (x)) +- x = get_pool_constant (x); +- else if (GET_CODE (x) == CONST) +- { +- x = XEXP (x, 0); +- gcc_assert (GET_CODE (x) == PLUS +- && SYMBOL_REF_P (XEXP (x, 0)) +- && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) +- && CONST_INT_P (XEXP (x, 1))); +- i = INTVAL (XEXP (x, 1)); +- gcc_assert (i == 0 || i == 4); +- i /= 4; +- x = get_pool_constant (XEXP (x, 0)); +- } +- else +- gcc_unreachable (); +- if (GET_MODE (x) == SFmode) +- REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l[0]); +- else if (GET_MODE (x) == DFmode) +- REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); +- else ++ rtx x = avoid_constant_pool_reference (operands[1]); ++ long l; ++ HOST_WIDE_INT value; ++ if (! CONST_DOUBLE_P (x) || GET_MODE (x) != SFmode) + FAIL; ++ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); + x = gen_rtx_REG (SImode, REGNO (operands[0])); +- if (! xtensa_constantsynth (x, l[i])) +- emit_move_insn (x, GEN_INT (l[i])); ++ value = (int32_t)l; ++ if (! xtensa_constantsynth (x, value)) ++ emit_move_insn (x, GEN_INT (value)); + DONE; + }) + +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +index 890ca5047..5fba6a986 100644 +--- a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +@@ -5,7 +5,7 @@ void test(unsigned int count, double array[]) + { + unsigned int i; + for (i = 0; i < count; ++i) +- array[i] = 1.0; ++ array[i] = 8.988474246316506e+307; + } + + /* { dg-final { scan-assembler-not "l32r" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0039-constantsynth-Make-try-to-find-shorter-instru.patch b/patches/gcc10.1/gcc-xtensa-0039-constantsynth-Make-try-to-find-shorter-instru.patch new file mode 100644 index 0000000..fcb3c72 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0039-constantsynth-Make-try-to-find-shorter-instru.patch @@ -0,0 +1,132 @@ +From afcf727f9c4174b104b594cbd14cba9c57de71d1 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 15 Jul 2022 08:46:55 +0900 +Subject: [PATCH] xtensa: constantsynth: Make try to find shorter + instruction + +This patch allows the constant synthesis to choose shorter instruction +if possible. + + /* example */ + int test(void) { + return 128 << 8; + } + + ;; before + test: + movi a2, 0x100 + addmi a2, a2, 0x7f00 + ret.n + + ;; after + test: + movi.n a2, 1 + slli a2, a2, 15 + ret.n + +When the Code Density Option is configured, the latter is one byte smaller +than the former. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_emit_constantsynth): Remove. + (xtensa_constantsynth_2insn): Change to try all three synthetic + methods and to use the one that fits the immediate value of + the seed into a Narrow Move Immediate instruction "MOVI.N" + when the Code Density Option is configured. +--- + gcc/config/xtensa/xtensa.c | 58 +++++++++++++++++++------------------- + 1 file changed, 29 insertions(+), 29 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index b92ec9caa..a5330e52b 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1026,35 +1026,35 @@ xtensa_split_operand_pair (rtx operands[4], machine_mode mode) + load-immediate / arithmetic ones, instead of a L32R instruction + (plus a constant in litpool). */ + +-static void +-xtensa_emit_constantsynth (rtx dst, enum rtx_code code, +- HOST_WIDE_INT imm0, HOST_WIDE_INT imm1, +- rtx (*gen_op)(rtx, HOST_WIDE_INT), +- HOST_WIDE_INT imm2) +-{ +- gcc_assert (REG_P (dst)); +- emit_move_insn (dst, GEN_INT (imm0)); +- emit_move_insn (dst, gen_rtx_fmt_ee (code, SImode, +- dst, GEN_INT (imm1))); +- if (gen_op) +- emit_move_insn (dst, gen_op (dst, imm2)); +-} +- + static int + xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, + rtx (*gen_op)(rtx, HOST_WIDE_INT), + HOST_WIDE_INT op_imm) + { +- int shift = exact_log2 (srcval + 1); ++ HOST_WIDE_INT imm = INT_MAX; ++ rtx x = NULL_RTX; ++ int shift; + ++ gcc_assert (REG_P (dst)); ++ ++ shift = exact_log2 (srcval + 1); + if (IN_RANGE (shift, 1, 31)) + { +- xtensa_emit_constantsynth (dst, LSHIFTRT, -1, 32 - shift, +- gen_op, op_imm); +- return 1; ++ imm = -1; ++ x = gen_lshrsi3 (dst, dst, GEN_INT (32 - shift)); + } + +- if (IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) ++ ++ shift = ctz_hwi (srcval); ++ if ((!x || (TARGET_DENSITY && ! IN_RANGE (imm, -32, 95))) ++ && xtensa_simm12b (srcval >> shift)) ++ { ++ imm = srcval >> shift; ++ x = gen_ashlsi3 (dst, dst, GEN_INT (shift)); ++ } ++ ++ if ((!x || (TARGET_DENSITY && ! IN_RANGE (imm, -32, 95))) ++ && IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) + { + HOST_WIDE_INT imm0, imm1; + +@@ -1067,19 +1067,19 @@ xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, + imm0 = srcval - imm1; + if (TARGET_DENSITY && imm1 < 32512 && IN_RANGE (imm0, 224, 255)) + imm0 -= 256, imm1 += 256; +- xtensa_emit_constantsynth (dst, PLUS, imm0, imm1, gen_op, op_imm); +- return 1; ++ imm = imm0; ++ x = gen_addsi3 (dst, dst, GEN_INT (imm1)); + } + +- shift = ctz_hwi (srcval); +- if (xtensa_simm12b (srcval >> shift)) +- { +- xtensa_emit_constantsynth (dst, ASHIFT, srcval >> shift, shift, +- gen_op, op_imm); +- return 1; +- } ++ if (!x) ++ return 0; + +- return 0; ++ emit_move_insn (dst, GEN_INT (imm)); ++ emit_insn (x); ++ if (gen_op) ++ emit_move_insn (dst, gen_op (dst, op_imm)); ++ ++ return 1; + } + + static rtx +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0040-Optimize-bitwise-AND-with-imm1-followed-by-br.patch b/patches/gcc10.1/gcc-xtensa-0040-Optimize-bitwise-AND-with-imm1-followed-by-br.patch new file mode 100644 index 0000000..acf6d99 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0040-Optimize-bitwise-AND-with-imm1-followed-by-br.patch @@ -0,0 +1,177 @@ +From 5776497b68fcce6bf31835cf0a4d693e336bb2ca Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 14 Jul 2022 20:47:46 +0900 +Subject: [PATCH] xtensa: Optimize "bitwise AND with imm1" followed by + "branch if (not) equal to imm2" + +This patch enhances the effectiveness of the previously posted one: +"xtensa: Optimize bitwise AND operation with some specific forms of constants". + + /* example */ + extern void foo(int); + void test(int a) { + if ((a & (-1U << 8)) == (128 << 8)) /* 0 or one of "b4const" */ + foo(a); + } + + ;; before + .global test + test: + movi a3, -0x100 + movi.n a4, 1 + and a3, a2, a3 + slli a4, a4, 15 + bne a3, a4, .L3 + j.l foo, a9 + .L1: + ret.n + + ;; after + .global test + test: + srli a3, a2, 8 + bnei a3, 128, .L1 + j.l foo, a9 + .L1: + ret.n + +gcc/ChangeLog: + + * config/xtensa/xtensa.md + (*masktrue_const_pow2_minus_one, *masktrue_const_negative_pow2, + *masktrue_const_shifted_mask): If the immediate for bitwise AND is + represented as '-(1 << N)', decrease the lower bound of N from 12 + to 1. And the other immediate for conditional branch is now no + longer limited to zero, but also one of some positive integers. + Finally, remove the checks of some conditions, because the comparison + expressions that don't satisfy such checks are determined as + compile-time constants and thus will be optimized away before + RTL expansion. +--- + gcc/config/xtensa/xtensa.md | 73 ++++++++++++++++++++++--------------- + 1 file changed, 44 insertions(+), 29 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 6ef84b4f2..ca8b3913d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1721,63 +1721,78 @@ + + (define_insn_and_split "*masktrue_const_pow2_minus_one" + [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" ++ (if_then_else (match_operator 4 "boolean_operator" + [(and:SI (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "const_int_operand" "i")) +- (const_int 0)]) +- (label_ref (match_operand 2 "" "")) ++ (match_operand:SI 2 "const_int_operand" "i")]) ++ (label_ref (match_operand 3 "" "")) + (pc)))] +- "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31)" ++ "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31) ++ /* && (~INTVAL (operands[1]) & INTVAL (operands[2])) == 0 // can be omitted */ ++ && xtensa_b4const_or_zero (INTVAL (operands[2]) << (32 - floor_log2 (INTVAL (operands[1]) + 1)))" + "#" + "&& can_create_pseudo_p ()" +- [(set (match_dup 4) ++ [(set (match_dup 5) + (ashift:SI (match_dup 0) + (match_dup 1))) + (set (pc) +- (if_then_else (match_op_dup 3 +- [(match_dup 4) +- (const_int 0)]) +- (label_ref (match_dup 2)) ++ (if_then_else (match_op_dup 4 ++ [(match_dup 5) ++ (match_dup 2)]) ++ (label_ref (match_dup 3)) + (pc)))] + { +- operands[1] = GEN_INT (32 - floor_log2 (INTVAL (operands[1]) + 1)); +- operands[4] = gen_reg_rtx (SImode); ++ int shift = 32 - floor_log2 (INTVAL (operands[1]) + 1); ++ operands[1] = GEN_INT (shift); ++ operands[2] = GEN_INT (INTVAL (operands[2]) << shift); ++ operands[5] = gen_reg_rtx (SImode); + } + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set (attr "length") +- (if_then_else (match_test "TARGET_DENSITY +- && INTVAL (operands[1]) == 0x7FFFFFFF") +- (const_int 5) +- (const_int 6)))]) ++ (if_then_else (match_test "(TARGET_DENSITY && INTVAL (operands[1]) == 0x7FFFFFFF) ++ && INTVAL (operands[2]) == 0") ++ (const_int 4) ++ (if_then_else (match_test "TARGET_DENSITY ++ && (INTVAL (operands[1]) == 0x7FFFFFFF ++ || INTVAL (operands[2]) == 0)") ++ (const_int 5) ++ (const_int 6))))]) + + (define_insn_and_split "*masktrue_const_negative_pow2" + [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" ++ (if_then_else (match_operator 4 "boolean_operator" + [(and:SI (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "const_int_operand" "i")) +- (const_int 0)]) +- (label_ref (match_operand 2 "" "")) ++ (match_operand:SI 2 "const_int_operand" "i")]) ++ (label_ref (match_operand 3 "" "")) + (pc)))] +- "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 12, 30)" ++ "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 1, 30) ++ /* && (~INTVAL (operands[1]) & INTVAL (operands[2])) == 0 // can be omitted */ ++ && xtensa_b4const_or_zero (INTVAL (operands[2]) >> floor_log2 (-INTVAL (operands[1])))" + "#" + "&& can_create_pseudo_p ()" +- [(set (match_dup 4) ++ [(set (match_dup 5) + (lshiftrt:SI (match_dup 0) + (match_dup 1))) + (set (pc) +- (if_then_else (match_op_dup 3 +- [(match_dup 4) +- (const_int 0)]) +- (label_ref (match_dup 2)) ++ (if_then_else (match_op_dup 4 ++ [(match_dup 5) ++ (match_dup 2)]) ++ (label_ref (match_dup 3)) + (pc)))] + { +- operands[1] = GEN_INT (floor_log2 (-INTVAL (operands[1]))); +- operands[4] = gen_reg_rtx (SImode); ++ int shift = floor_log2 (-INTVAL (operands[1])); ++ operands[1] = GEN_INT (shift); ++ operands[2] = GEN_INT (INTVAL (operands[2]) >> shift); ++ operands[5] = gen_reg_rtx (SImode); + } + [(set_attr "type" "jump") + (set_attr "mode" "none") +- (set_attr "length" "6")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY && INTVAL (operands[2]) == 0") ++ (const_int 5) ++ (const_int 6)))]) + + (define_insn_and_split "*masktrue_const_shifted_mask" + [(set (pc) +@@ -1787,8 +1802,8 @@ + (match_operand:SI 2 "const_int_operand" "i")]) + (label_ref (match_operand 3 "" "")) + (pc)))] +- "(INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 +- && xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" ++ "/* (INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 // can be omitted ++ && */ xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" + "#" + "&& can_create_pseudo_p ()" + [(set (match_dup 6) +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0041-Correct-the-relative-RTX-cost-that-correspond.patch b/patches/gcc10.1/gcc-xtensa-0041-Correct-the-relative-RTX-cost-that-correspond.patch new file mode 100644 index 0000000..ec12c18 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0041-Correct-the-relative-RTX-cost-that-correspond.patch @@ -0,0 +1,167 @@ +From 7435ec0392c1f36bf3740c3a9748e7149c0c153e Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sat, 16 Jul 2022 14:44:02 +0900 +Subject: [PATCH] xtensa: Correct the relative RTX cost that corresponds to the + Move Immediate "MOVI" instruction + +This patch corrects the overestimation of the relative cost of +'(set (reg) (const_int N))' where N fits into the instruction itself. + +In fact, such overestimation confuses the RTL loop invariant motion pass. +As a result, it brings almost no negative impact from the speed point of +view, but addtiional reg-reg move instructions and register allocation +pressure about the size. + + /* example, optimized for size */ + extern int foo(void); + extern int array[16]; + void test_0(void) { + unsigned int i; + for (i = 0; i < sizeof(array)/sizeof(*array); ++i) + array[i] = 1024; + } + void test_1(void) { + unsigned int i; + for (i = 0; i < sizeof(array)/sizeof(*array); ++i) + array[i] = array[i] ? 1024 : 0; + } + void test_2(void) { + unsigned int i; + for (i = 0; i < sizeof(array)/sizeof(*array); ++i) + array[i] = foo() ? 0 : 1024; + } + + ;; before + .literal_position + .literal .LC0, array + test_0: + l32r a3, .LC0 + movi.n a2, 0 + movi a4, 0x400 // OK + .L2: + s32i.n a4, a3, 0 + addi.n a2, a2, 1 + addi.n a3, a3, 4 + bnei a2, 16, .L2 + ret.n + .literal_position + .literal .LC1, array + test_1: + l32r a2, .LC1 + movi.n a3, 0 + movi a5, 0x400 // NG + .L6: + l32i.n a4, a2, 0 + beqz.n a4, .L5 + mov.n a4, a5 // should be "movi a4, 0x400" + .L5: + s32i.n a4, a2, 0 + addi.n a3, a3, 1 + addi.n a2, a2, 4 + bnei a3, 16, .L6 + ret.n + .literal_position + .literal .LC2, array + test_2: + addi sp, sp, -32 + s32i.n a12, sp, 24 + l32r a12, .LC2 + s32i.n a13, sp, 20 + s32i.n a14, sp, 16 + s32i.n a15, sp, 12 + s32i.n a0, sp, 28 + addi a13, a12, 64 + movi.n a15, 0 // NG + movi a14, 0x400 // and wastes callee-saved registers (only 4) + .L11: + call0 foo + mov.n a3, a14 // should be "movi a3, 0x400" + movnez a3, a15, a2 + s32i.n a3, a12, 0 + addi.n a12, a12, 4 + bne a12, a13, .L11 + l32i.n a0, sp, 28 + l32i.n a12, sp, 24 + l32i.n a13, sp, 20 + l32i.n a14, sp, 16 + l32i.n a15, sp, 12 + addi sp, sp, 32 + ret.n + + ;; after + .literal_position + .literal .LC0, array + test_0: + l32r a3, .LC0 + movi.n a2, 0 + movi a4, 0x400 // OK + .L2: + s32i.n a4, a3, 0 + addi.n a2, a2, 1 + addi.n a3, a3, 4 + bnei a2, 16, .L2 + ret.n + .literal_position + .literal .LC1, array + test_1: + l32r a2, .LC1 + movi.n a3, 0 + .L6: + l32i.n a4, a2, 0 + beqz.n a4, .L5 + movi a4, 0x400 // OK + .L5: + s32i.n a4, a2, 0 + addi.n a3, a3, 1 + addi.n a2, a2, 4 + bnei a3, 16, .L6 + ret.n + .literal_position + .literal .LC2, array + test_2: + addi sp, sp, -16 + s32i.n a12, sp, 8 + l32r a12, .LC2 + s32i.n a13, sp, 4 + s32i.n a0, sp, 12 + addi a13, a12, 64 + .L11: + call0 foo + movi.n a3, 0 // OK + movi a4, 0x400 // and less register allocation pressure + moveqz a3, a4, a2 + s32i.n a3, a12, 0 + addi.n a12, a12, 4 + bne a12, a13, .L11 + l32i.n a0, sp, 12 + l32i.n a12, sp, 8 + l32i.n a13, sp, 4 + addi sp, sp, 16 + ret.n + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_rtx_costs): + Change the relative cost of '(set (reg) (const_int N))' where + N fits into signed 12-bit from 4 to 0 if optimizing for size. + And use the appropriate macro instead of the bare number 4. +--- + gcc/config/xtensa/xtensa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index a5330e52b..bd3489bfe 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4109,7 +4109,7 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + case SET: + if (xtensa_simm12b (INTVAL (x))) + { +- *total = 4; ++ *total = speed ? COSTS_N_INSNS (1) : 0; + return true; + } + break; +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0042-Optimize-bitwise-AND-NOT-with-imm-followed-by.patch b/patches/gcc10.1/gcc-xtensa-0042-Optimize-bitwise-AND-NOT-with-imm-followed-by.patch new file mode 100644 index 0000000..f020eaa --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0042-Optimize-bitwise-AND-NOT-with-imm-followed-by.patch @@ -0,0 +1,179 @@ +From f4d76407c8c33229f9b1d7b81e713ed10a5d408b Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 21 Jul 2022 16:10:47 +0900 +Subject: [PATCH] xtensa: Optimize "bitwise AND NOT with imm" followed by + "branch if (not) equal to zero" + +The RTL combiner will transform "if ((x & C) == C) goto label;" +into "if ((~x & C) == 0) goto label;" and will try to match it with +the insn patterns. + + /* example */ + void test_0(int a) { + if ((char)a == 255) + foo(); + } + void test_1(int a) { + if ((unsigned short)a == 0xFFFF) + foo(); + } + void test_2(int a) { + if ((a & 0x00003F80) != 0x00003F80) + foo(); + } + + ;; before + test_0: + extui a2, a2, 0, 8 + movi a3, 0xff + bne a2, a3, .L1 + j.l foo, a9 + .L1: + ret.n + test_1: + movi.n a3, -1 + extui a2, a2, 0, 16 + extui a3, a3, 16, 16 + bne a2, a3, .L3 + j.l foo, a9 + .L3: + ret.n + test_2: + movi a3, 0x80 + extui a2, a2, 7, 7 + addmi a3, a3, 0x3f00 + slli a2, a2, 7 + beq a2, a3, .L5 + j.l foo, a9 + .L5: + ret.n + + ;; after + test_0: + movi a3, 0xff + bnall a2, a3, .L1 + j.l foo, a9 + .L1: + ret.n + test_1: + movi.n a3, -1 + extui a3, a3, 16, 16 + bnall a2, a3, .L3 + j.l foo, a9 + .L3: + ret.n + test_2: + movi a3, 0x80 + addmi a3, a3, 0x3f00 + ball a2, a3, .L5 + j.l foo, a9 + .L5: + ret.n + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*masktrue_const_bitcmpl): + Add a new insn_and_split pattern, and a few split patterns for + special cases. +--- + gcc/config/xtensa/xtensa.md | 84 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 84 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index ca8b3913d..ed1e072fe 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1719,6 +1719,90 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_insn_and_split "*masktrue_const_bitcmpl" ++ [(set (pc) ++ (if_then_else (match_operator 3 "boolean_operator" ++ [(and:SI (not:SI (match_operand:SI 0 "register_operand" "r")) ++ (match_operand:SI 1 "const_int_operand" "i")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] ++ "exact_log2 (INTVAL (operands[1])) < 0" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 4) ++ (match_dup 1)) ++ (set (pc) ++ (if_then_else (match_op_dup 3 ++ [(and:SI (not:SI (match_dup 0)) ++ (match_dup 4)) ++ (const_int 0)]) ++ (label_ref (match_dup 2)) ++ (pc)))] ++{ ++ operands[4] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && IN_RANGE (INTVAL (operands[1]), -32, 95)") ++ (const_int 5) ++ (if_then_else (match_test "xtensa_simm12b (INTVAL (operands[1]))") ++ (const_int 6) ++ (const_int 10))))]) ++ ++(define_split ++ [(set (pc) ++ (if_then_else (match_operator 2 "boolean_operator" ++ [(subreg:HQI (not:SI (match_operand:SI 0 "register_operand")) 0) ++ (const_int 0)]) ++ (label_ref (match_operand 1 "")) ++ (pc)))] ++ "!BYTES_BIG_ENDIAN" ++ [(set (pc) ++ (if_then_else (match_op_dup 2 ++ [(and:SI (not:SI (match_dup 0)) ++ (match_dup 3)) ++ (const_int 0)]) ++ (label_ref (match_dup 1)) ++ (pc)))] ++{ ++ operands[3] = GEN_INT ((1 << GET_MODE_BITSIZE (mode)) - 1); ++}) ++ ++(define_split ++ [(set (pc) ++ (if_then_else (match_operator 2 "boolean_operator" ++ [(subreg:HI (not:SI (match_operand:SI 0 "register_operand")) 2) ++ (const_int 0)]) ++ (label_ref (match_operand 1 "")) ++ (pc)))] ++ "BYTES_BIG_ENDIAN" ++ [(set (pc) ++ (if_then_else (match_op_dup 2 ++ [(and:SI (not:SI (match_dup 0)) ++ (const_int 65535)) ++ (const_int 0)]) ++ (label_ref (match_dup 1)) ++ (pc)))]) ++ ++(define_split ++ [(set (pc) ++ (if_then_else (match_operator 2 "boolean_operator" ++ [(subreg:QI (not:SI (match_operand:SI 0 "register_operand")) 3) ++ (const_int 0)]) ++ (label_ref (match_operand 1 "")) ++ (pc)))] ++ "BYTES_BIG_ENDIAN" ++ [(set (pc) ++ (if_then_else (match_op_dup 2 ++ [(and:SI (not:SI (match_dup 0)) ++ (const_int 255)) ++ (const_int 0)]) ++ (label_ref (match_dup 1)) ++ (pc)))]) ++ + (define_insn_and_split "*masktrue_const_pow2_minus_one" + [(set (pc) + (if_then_else (match_operator 4 "boolean_operator" +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0043-Add-RTX-costs-for-if_then_else.patch b/patches/gcc10.1/gcc-xtensa-0043-Add-RTX-costs-for-if_then_else.patch new file mode 100644 index 0000000..2f20939 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0043-Add-RTX-costs-for-if_then_else.patch @@ -0,0 +1,30 @@ +From 6c4824b9ee3272c7621639f873b6a4b38b5e117e Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 28 Jul 2022 11:59:00 +0900 +Subject: [PATCH] xtensa: Add RTX costs for if_then_else + +It takes one machine instruction for both conditional branch and move. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_rtx_costs): + Add new case for IF_THEN_ELSE. +--- + gcc/config/xtensa/xtensa.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index bd3489bfe..b6f41a478 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4309,6 +4309,7 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + + case ZERO_EXTRACT: + case ZERO_EXTEND: ++ case IF_THEN_ELSE: + *total = COSTS_N_INSNS (1); + return true; + +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0044-Fix-conflicting-hard-regno-between-indirect-s.patch b/patches/gcc10.1/gcc-xtensa-0044-Fix-conflicting-hard-regno-between-indirect-s.patch new file mode 100644 index 0000000..1840f9f --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0044-Fix-conflicting-hard-regno-between-indirect-s.patch @@ -0,0 +1,60 @@ +From 25b8acf68c6d262f75a84bbc8238e5c326c1b1bf Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sat, 30 Jul 2022 03:25:04 +0900 +Subject: [PATCH] xtensa: Fix conflicting hard regno between indirect + sibcall fixups and EH_RETURN_STACKADJ_RTX + +The hard register A10 was already allocated for EH_RETURN_STACKADJ_RTX. +(although exception handling and sibling call may not apply at the same time, + but for safety) + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: Change hard register number used in + the split patterns for indirect sibling call fixups from 10 to 11, + the last free one for the CALL0 ABI. +--- + gcc/config/xtensa/xtensa.md | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index ed1e072fe..9eeb73915 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -25,7 +25,7 @@ + (A7_REG 7) + (A8_REG 8) + (A9_REG 9) +- (A10_REG 10) ++ (A11_REG 11) + + (UNSPEC_NOP 2) + (UNSPEC_PLT 3) +@@ -2300,9 +2300,9 @@ + "reload_completed + && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) + && ! call_used_or_fixed_reg_p (REGNO (operands[0]))" +- [(set (reg:SI A10_REG) ++ [(set (reg:SI A11_REG) + (match_dup 0)) +- (call (mem:SI (reg:SI A10_REG)) ++ (call (mem:SI (reg:SI A11_REG)) + (match_dup 1))]) + + (define_expand "sibcall_value" +@@ -2333,10 +2333,10 @@ + "reload_completed + && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) + && ! call_used_or_fixed_reg_p (REGNO (operands[1]))" +- [(set (reg:SI A10_REG) ++ [(set (reg:SI A11_REG) + (match_dup 1)) + (set (match_dup 0) +- (call (mem:SI (reg:SI A10_REG)) ++ (call (mem:SI (reg:SI A11_REG)) + (match_dup 2)))]) + + (define_insn "entry" +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0045-Turn-on-fsplit-wide-types-early-by-default.patch b/patches/gcc10.1/gcc-xtensa-0045-Turn-on-fsplit-wide-types-early-by-default.patch new file mode 100644 index 0000000..e381a8d --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0045-Turn-on-fsplit-wide-types-early-by-default.patch @@ -0,0 +1,38 @@ +From 624bf9fd927ada2d6d6dc34f5e0de704e7ee268f Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 4 Aug 2022 19:56:27 +0900 +Subject: [PATCH] xtensa: Turn on -fsplit-wide-types-early by default + +Since GCC10, the "subreg2" optimization pass was no longer tied to enabling +"subreg1" unless -fsplit-wide-types-early was turned on (PR88233). However +on the Xtensa port, the lack of "subreg2" can degrade the quality of the +output code, especially for those that produce many D[FC]mode pseudos. + +This patch turns on -fsplit-wide-types-early by default in order to restore +the previous behavior. + +gcc/ChangeLog: + + * common/config/xtensa/xtensa-common.c + (xtensa_option_optimization_table): Add OPT_fsplit_wide_types_early + for OPT_LEVELS_ALL in order to restore pre-GCC10 behavior. +--- + gcc/common/config/xtensa/xtensa-common.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/gcc/common/config/xtensa/xtensa-common.c b/gcc/common/config/xtensa/xtensa-common.c +index dd751a14d..697a9eb22 100644 +--- a/gcc/common/config/xtensa/xtensa-common.c ++++ b/gcc/common/config/xtensa/xtensa-common.c +@@ -34,6 +34,8 @@ static const struct default_options xtensa_option_optimization_table[] = + assembler, so GCC cannot do a good job of reordering blocks. + Do not enable reordering unless it is explicitly requested. */ + { OPT_LEVELS_ALL, OPT_freorder_blocks, NULL, 0 }, ++ /* Split multi-word types early (pre-GCC10 behavior). */ ++ { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 }, + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0046-Optimize-stack-pointer-updates-in-function-pr.patch b/patches/gcc10.1/gcc-xtensa-0046-Optimize-stack-pointer-updates-in-function-pr.patch new file mode 100644 index 0000000..d94e38e --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0046-Optimize-stack-pointer-updates-in-function-pr.patch @@ -0,0 +1,171 @@ +From 745e9839f8d18724f31015a1dcbde2c2c513d3c5 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 17 Aug 2022 14:54:16 +0900 +Subject: [PATCH] xtensa: Optimize stack pointer updates in function + pro/epilogue under certain conditions + +This patch enforces the use of "addmi" machine instruction instead of +addition/subtraction with two source registers for adjusting the stack +pointer, if the adjustment fits into a signed 16-bit and is also a multiple +of 256. + + /* example */ + void test(void) { + char buffer[4096]; + __asm__(""::"m"(buffer)); + } + + ;; before + test: + movi.n a9, 1 + slli a9, a9, 12 + sub sp, sp, a9 + movi.n a9, 1 + slli a9, a9, 12 + add.n sp, sp, a9 + addi sp, sp, 0 + ret.n + + ;; after + test: + addmi sp, sp, -0x1000 + addmi sp, sp, 0x1000 + ret.n + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_expand_prologue): + Use an "addmi" machine instruction for updating the stack pointer + rather than addition/subtraction via hard register A9, if the amount + of change satisfies the literal value conditions of that instruction + when the CALL0 ABI is used. + (xtensa_expand_epilogue): Ditto. + And also inhibit the stack pointer addition of constant zero. +--- + gcc/config/xtensa/xtensa.c | 79 ++++++++++++++++++++++++++------------ + 1 file changed, 54 insertions(+), 25 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index b6f41a478..a93b15f4d 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -3186,7 +3186,6 @@ xtensa_expand_prologue (void) + rtx_insn *insn = NULL; + rtx note_rtx; + +- + total_size = compute_frame_size (get_frame_size ()); + + if (flag_stack_usage_info) +@@ -3242,10 +3241,17 @@ xtensa_expand_prologue (void) + } + else + { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, GEN_INT (total_size)); +- insn = emit_insn (gen_subsi3 (stack_pointer_rtx, +- stack_pointer_rtx, tmp_reg)); ++ if (xtensa_simm8x256 (-total_size)) ++ insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (-total_size))); ++ else ++ { ++ rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); ++ emit_move_insn (tmp_reg, GEN_INT (total_size)); ++ insn = emit_insn (gen_subsi3 (stack_pointer_rtx, ++ stack_pointer_rtx, tmp_reg)); ++ } + RTX_FRAME_RELATED_P (insn) = 1; + note_rtx = gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, +@@ -3273,11 +3279,19 @@ xtensa_expand_prologue (void) + if (total_size > 1024 + || (!callee_save_size && total_size > 128)) + { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, GEN_INT (total_size - +- callee_save_size)); +- insn = emit_insn (gen_subsi3 (stack_pointer_rtx, +- stack_pointer_rtx, tmp_reg)); ++ if (xtensa_simm8x256 (callee_save_size - total_size)) ++ insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (callee_save_size - ++ total_size))); ++ else ++ { ++ rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); ++ emit_move_insn (tmp_reg, GEN_INT (total_size - ++ callee_save_size)); ++ insn = emit_insn (gen_subsi3 (stack_pointer_rtx, ++ stack_pointer_rtx, tmp_reg)); ++ } + RTX_FRAME_RELATED_P (insn) = 1; + note_rtx = gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, +@@ -3351,12 +3365,21 @@ xtensa_expand_epilogue (bool sibcall_p) + + if (cfun->machine->current_frame_size > (frame_pointer_needed ? 127 : 1024)) + { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, GEN_INT (cfun->machine->current_frame_size - +- cfun->machine->callee_save_size)); +- emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ? +- hard_frame_pointer_rtx : stack_pointer_rtx, +- tmp_reg)); ++ if (xtensa_simm8x256 (cfun->machine->current_frame_size - ++ cfun->machine->callee_save_size)) ++ emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ? ++ hard_frame_pointer_rtx : stack_pointer_rtx, ++ GEN_INT (cfun->machine->current_frame_size - ++ cfun->machine->callee_save_size))); ++ else ++ { ++ rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); ++ emit_move_insn (tmp_reg, GEN_INT (cfun->machine->current_frame_size - ++ cfun->machine->callee_save_size)); ++ emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ? ++ hard_frame_pointer_rtx : stack_pointer_rtx, ++ tmp_reg)); ++ } + offset = cfun->machine->callee_save_size - UNITS_PER_WORD; + } + else +@@ -3396,18 +3419,24 @@ xtensa_expand_epilogue (bool sibcall_p) + offset = cfun->machine->current_frame_size; + else + offset = cfun->machine->callee_save_size; +- +- emit_insn (gen_addsi3 (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (offset))); ++ if (offset) ++ emit_insn (gen_addsi3 (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (offset))); + } + else + { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, +- GEN_INT (cfun->machine->current_frame_size)); +- emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, +- tmp_reg)); ++ if (xtensa_simm8x256 (cfun->machine->current_frame_size)) ++ emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (cfun->machine->current_frame_size))); ++ else ++ { ++ rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); ++ emit_move_insn (tmp_reg, ++ GEN_INT (cfun->machine->current_frame_size)); ++ emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, ++ tmp_reg)); ++ } + } + } + +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0047-Improve-indirect-sibling-call-handling.patch b/patches/gcc10.1/gcc-xtensa-0047-Improve-indirect-sibling-call-handling.patch new file mode 100644 index 0000000..a6e870f --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0047-Improve-indirect-sibling-call-handling.patch @@ -0,0 +1,166 @@ +From d8f7137070d92c297e1deecd6dabdb471ddaa9ab Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 18 Aug 2022 01:11:32 +0900 +Subject: [PATCH] xtensa: Improve indirect sibling call handling + +No longer needs the dedicated hard register (A11) for the address of the +call and the split patterns for fixups, due to the introduction of appropriate +register class and constraint. + +(Note: "ISC_REGS" contains a hard register A8 used as a "static chain" + pointer for nested functions, but no problem; Pointer to nested function + actually points to "trampoline", and trampoline itself doesn't receive + "static chain" pointer to its parent's stack frame from the caller.) + +gcc/ChangeLog: + + * config/xtensa/xtensa.h + (enum reg_class, REG_CLASS_NAMES, REG_CLASS_CONTENTS): + Add new register class "ISC_REGS". + * config/xtensa/constraints.md (c): Add new register constraint. + * config/xtensa/xtensa.md (define_constants): Remove "A11_REG". + (sibcall_internal, sibcall_value_internal): + Change to use the new register constraint, and remove two split + patterns for fixups that are no longer needed. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/sibcalls.c: Add a new test function to ensure + that registers for arguments (occupy from A2 to A7) and for indirect + sibcall (should be assigned to A8) neither conflict nor spill out. +--- + gcc/config/xtensa/constraints.md | 5 ++++ + gcc/config/xtensa/xtensa.h | 3 +++ + gcc/config/xtensa/xtensa.md | 29 ++-------------------- + gcc/testsuite/gcc.target/xtensa/sibcalls.c | 5 ++++ + 4 files changed, 15 insertions(+), 27 deletions(-) + +diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md +index 13b3daafc..f590dcf3a 100644 +--- a/gcc/config/xtensa/constraints.md ++++ b/gcc/config/xtensa/constraints.md +@@ -27,6 +27,11 @@ + "Boolean registers @code{b0}-@code{b15}; only available if the Xtensa + Boolean Option is configured.") + ++(define_register_constraint "c" "TARGET_WINDOWED_ABI ? NO_REGS : ISC_REGS" ++ "@internal ++ General-purpose AR registers for indirect sibling calls, @code{a2}- ++ @code{a8}.") ++ + (define_register_constraint "d" "TARGET_DENSITY ? AR_REGS: NO_REGS" + "@internal + All AR registers, including sp, but only if the Xtensa Code Density +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index 3e9cbc943..ee2238606 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -372,6 +372,7 @@ enum reg_class + FP_REGS, /* floating point registers */ + ACC_REG, /* MAC16 accumulator */ + SP_REG, /* sp register (aka a1) */ ++ ISC_REGS, /* registers for indirect sibling calls */ + RL_REGS, /* preferred reload regs (not sp or fp) */ + GR_REGS, /* integer registers except sp */ + AR_REGS, /* all integer registers */ +@@ -393,6 +394,7 @@ enum reg_class + "FP_REGS", \ + "ACC_REG", \ + "SP_REG", \ ++ "ISC_REGS", \ + "RL_REGS", \ + "GR_REGS", \ + "AR_REGS", \ +@@ -409,6 +411,7 @@ enum reg_class + { 0xfff80000, 0x00000007 }, /* floating-point registers */ \ + { 0x00000000, 0x00000008 }, /* MAC16 accumulator */ \ + { 0x00000002, 0x00000000 }, /* stack pointer register */ \ ++ { 0x000001fc, 0x00000000 }, /* registers for indirect sibling calls */ \ + { 0x0000fffd, 0x00000000 }, /* preferred reload registers */ \ + { 0x0000fffd, 0x00000000 }, /* general-purpose registers */ \ + { 0x0003ffff, 0x00000000 }, /* integer registers */ \ +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 9eeb73915..0c05c16b1 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -25,7 +25,6 @@ + (A7_REG 7) + (A8_REG 8) + (A9_REG 9) +- (A11_REG 11) + + (UNSPEC_NOP 2) + (UNSPEC_PLT 3) +@@ -2284,7 +2283,7 @@ + }) + + (define_insn "sibcall_internal" +- [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nir")) ++ [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nic")) + (match_operand 1 "" "i"))] + "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" + { +@@ -2294,17 +2293,6 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + +-(define_split +- [(call (mem:SI (match_operand:SI 0 "register_operand")) +- (match_operand 1 ""))] +- "reload_completed +- && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) +- && ! call_used_or_fixed_reg_p (REGNO (operands[0]))" +- [(set (reg:SI A11_REG) +- (match_dup 0)) +- (call (mem:SI (reg:SI A11_REG)) +- (match_dup 1))]) +- + (define_expand "sibcall_value" + [(set (match_operand 0 "register_operand" "") + (call (match_operand 1 "memory_operand" "") +@@ -2316,7 +2304,7 @@ + + (define_insn "sibcall_value_internal" + [(set (match_operand 0 "register_operand" "=a") +- (call (mem:SI (match_operand:SI 1 "call_insn_operand" "nir")) ++ (call (mem:SI (match_operand:SI 1 "call_insn_operand" "nic")) + (match_operand 2 "" "i")))] + "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" + { +@@ -2326,19 +2314,6 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + +-(define_split +- [(set (match_operand 0 "register_operand") +- (call (mem:SI (match_operand:SI 1 "register_operand")) +- (match_operand 2 "")))] +- "reload_completed +- && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) +- && ! call_used_or_fixed_reg_p (REGNO (operands[1]))" +- [(set (reg:SI A11_REG) +- (match_dup 1)) +- (set (match_dup 0) +- (call (mem:SI (reg:SI A11_REG)) +- (match_dup 2)))]) +- + (define_insn "entry" + [(set (reg:SI A1_REG) + (unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")] +diff --git a/gcc/testsuite/gcc.target/xtensa/sibcalls.c b/gcc/testsuite/gcc.target/xtensa/sibcalls.c +index d2b3fccf1..dff6750e2 100644 +--- a/gcc/testsuite/gcc.target/xtensa/sibcalls.c ++++ b/gcc/testsuite/gcc.target/xtensa/sibcalls.c +@@ -17,4 +17,9 @@ int test_2(int (*a)(void)) { + return a(); + } + ++_Complex double test_3(_Complex double a, _Complex double (*b)(_Complex double, double)) { ++ bar(-1); ++ return b(a, 3.141592653589795); ++} ++ + /* { dg-final { scan-assembler-not "ret" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.1/gcc-xtensa-0048-add-static-PIE-support.patch b/patches/gcc10.1/gcc-xtensa-0048-add-static-PIE-support.patch new file mode 100644 index 0000000..f16832b --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0048-add-static-PIE-support.patch @@ -0,0 +1,31 @@ +From 5773838c193d36476109de77d230391f6738bb62 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Fri, 12 Aug 2022 21:02:15 -0700 +Subject: [PATCH] xtensa: gcc: add static PIE support + +gcc/ + * config/xtensa/linux.h (LINK_SPEC): Add static-pie. +--- + gcc/config/xtensa/linux.h | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/linux.h b/gcc/config/xtensa/linux.h +index 62a33a9bc..49796c97f 100644 +--- a/gcc/config/xtensa/linux.h ++++ b/gcc/config/xtensa/linux.h +@@ -52,9 +52,10 @@ along with GCC; see the file COPYING3. If not see + #define LINK_SPEC \ + "%{shared:-shared} \ + %{!shared: \ +- %{!static: \ ++ %{!static:%{!static-pie: \ + %{rdynamic:-export-dynamic} \ +- -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \ ++ -dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} \ ++ %{static-pie:-static -pie --no-dynamic-linker -z text} \ + %{static:-static}}" + + #undef LOCAL_LABEL_PREFIX +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0049-Eliminate-unused-stack-frame-allocation-freei.patch b/patches/gcc10.1/gcc-xtensa-0049-Eliminate-unused-stack-frame-allocation-freei.patch new file mode 100644 index 0000000..0c11972 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0049-Eliminate-unused-stack-frame-allocation-freei.patch @@ -0,0 +1,300 @@ +From 55fbffc224d951aca1eab3cbfb74c540e7ef2f3f Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 28 Aug 2022 22:42:25 +0900 +Subject: [PATCH] xtensa: Eliminate unused stack frame allocation/freeing + +In the example below, 'x' is once placed on the stack frame and then read +into registers as the argument value of bar(): + + /* example */ + struct foo { + int a, b; + }; + extern struct foo bar(struct foo); + struct foo test(void) { + struct foo x = { 0, 1 }; + return bar(x); + } + +Thanks to the dead store elimination, the initialization of 'x' turns into +merely loading the immediates to registers, but corresponding stack frame +growth is not rolled back. As a result: + + ;; prereq: the CALL0 ABI + ;; before + test: + addi sp, sp, -16 // unused stack frame allocation/freeing + movi.n a2, 0 + movi.n a3, 1 + addi sp, sp, 16 // because no instructions that refer to + j.l bar, a9 // the stack pointer between the two + +This patch eliminates such unused stack frame allocation/freeing: + + ;; after + test: + movi.n a2, 0 + movi.n a3, 1 + j.l bar, a9 + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (machine_function): New boolean member as + a flag that controls whether to emit the insns for stack pointer + adjustment inside of the pro/epilogue. + (xtensa_emit_adjust_stack_ptr): New function to share the common + codes and to emit insns if not inhibited. + (xtensa_expand_epilogue): Change to use the function mentioned + above when using the CALL0 ABI. + (xtensa_expand_prologue): Ditto. + And also change to set the inhibit flag used by + xtensa_emit_adjust_stack_ptr() to true if the stack pointer is only + used for its own adjustment. +--- + gcc/config/xtensa/xtensa.c | 164 ++++++++++++++++++------------------- + 1 file changed, 80 insertions(+), 84 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index a93b15f4d..97291fc0f 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -102,6 +102,7 @@ struct GTY(()) machine_function + int callee_save_size; + bool frame_laid_out; + bool epilogue_done; ++ bool inhibit_logues_a1_adjusts; + }; + + /* Vector, indexed by hard register number, which contains 1 for a +@@ -3084,7 +3085,7 @@ xtensa_output_literal (FILE *file, rtx x, machine_mode mode, int labelno) + } + + static bool +-xtensa_call_save_reg(int regno) ++xtensa_call_save_reg (int regno) + { + if (TARGET_WINDOWED_ABI) + return false; +@@ -3120,7 +3121,7 @@ compute_frame_size (poly_int64 size) + cfun->machine->callee_save_size = 0; + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) + { +- if (xtensa_call_save_reg(regno)) ++ if (xtensa_call_save_reg (regno)) + cfun->machine->callee_save_size += UNITS_PER_WORD; + } + +@@ -3175,6 +3176,49 @@ xtensa_initial_elimination_offset (int from, int to ATTRIBUTE_UNUSED) + return offset; + } + ++#define ADJUST_SP_NONE 0x0 ++#define ADJUST_SP_NEED_NOTE 0x1 ++#define ADJUST_SP_FRAME_PTR 0x2 ++static void ++xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int flags) ++{ ++ rtx_insn *insn; ++ rtx ptr = (flags & ADJUST_SP_FRAME_PTR) ? hard_frame_pointer_rtx ++ : stack_pointer_rtx; ++ ++ if (cfun->machine->inhibit_logues_a1_adjusts) ++ return; ++ ++ if (xtensa_simm8 (offset) ++ || xtensa_simm8x256 (offset)) ++ insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, GEN_INT (offset))); ++ else ++ { ++ rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); ++ ++ if (offset < 0) ++ { ++ emit_move_insn (tmp_reg, GEN_INT (-offset)); ++ insn = emit_insn (gen_subsi3 (stack_pointer_rtx, ptr, tmp_reg)); ++ } ++ else ++ { ++ emit_move_insn (tmp_reg, GEN_INT (offset)); ++ insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, tmp_reg)); ++ } ++ } ++ ++ if (flags & ADJUST_SP_NEED_NOTE) ++ { ++ rtx note_rtx = gen_rtx_SET (stack_pointer_rtx, ++ plus_constant (Pmode, stack_pointer_rtx, ++ offset)); ++ ++ RTX_FRAME_RELATED_P (insn) = 1; ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); ++ } ++} ++ + /* minimum frame = reg save area (4 words) plus static chain (1 word) + and the total number of words must be a multiple of 128 bits. */ + #define MIN_FRAME_SIZE (8 * UNITS_PER_WORD) +@@ -3210,17 +3254,30 @@ xtensa_expand_prologue (void) + int regno; + HOST_WIDE_INT offset = 0; + int callee_save_size = cfun->machine->callee_save_size; ++ df_ref ref; ++ bool stack_pointer_needed = frame_pointer_needed ++ || crtl->calls_eh_return; ++ ++ /* Check if the function body really needs the stack pointer. */ ++ if (!stack_pointer_needed) ++ for (ref = DF_REG_USE_CHAIN (A1_REG); ++ ref; ref = DF_REF_NEXT_REG (ref)) ++ if (DF_REF_CLASS (ref) == DF_REF_REGULAR ++ && NONJUMP_INSN_P (DF_REF_INSN (ref))) ++ stack_pointer_needed = true; ++ /* Check if callee-saved registers really need saving to the stack. */ ++ if (!stack_pointer_needed) ++ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) ++ if (xtensa_call_save_reg (regno)) ++ stack_pointer_needed = true; ++ ++ cfun->machine->inhibit_logues_a1_adjusts = !stack_pointer_needed; + + /* -128 is a limit of single addi instruction. */ + if (IN_RANGE (total_size, 1, 128)) + { +- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (-total_size))); +- RTX_FRAME_RELATED_P (insn) = 1; +- note_rtx = gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- -total_size)); +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); ++ xtensa_emit_adjust_stack_ptr (-total_size, ++ ADJUST_SP_NEED_NOTE); + offset = total_size - UNITS_PER_WORD; + } + else if (callee_save_size) +@@ -3230,33 +3287,14 @@ xtensa_expand_prologue (void) + * move it to its final location. */ + if (total_size > 1024) + { +- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (-callee_save_size))); +- RTX_FRAME_RELATED_P (insn) = 1; +- note_rtx = gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- -callee_save_size)); +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); ++ xtensa_emit_adjust_stack_ptr (-callee_save_size, ++ ADJUST_SP_NEED_NOTE); + offset = callee_save_size - UNITS_PER_WORD; + } + else + { +- if (xtensa_simm8x256 (-total_size)) +- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (-total_size))); +- else +- { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, GEN_INT (total_size)); +- insn = emit_insn (gen_subsi3 (stack_pointer_rtx, +- stack_pointer_rtx, tmp_reg)); +- } +- RTX_FRAME_RELATED_P (insn) = 1; +- note_rtx = gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- -total_size)); +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); ++ xtensa_emit_adjust_stack_ptr (-total_size, ++ ADJUST_SP_NEED_NOTE); + offset = total_size - UNITS_PER_WORD; + } + } +@@ -3278,27 +3316,8 @@ xtensa_expand_prologue (void) + } + if (total_size > 1024 + || (!callee_save_size && total_size > 128)) +- { +- if (xtensa_simm8x256 (callee_save_size - total_size)) +- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (callee_save_size - +- total_size))); +- else +- { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, GEN_INT (total_size - +- callee_save_size)); +- insn = emit_insn (gen_subsi3 (stack_pointer_rtx, +- stack_pointer_rtx, tmp_reg)); +- } +- RTX_FRAME_RELATED_P (insn) = 1; +- note_rtx = gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- callee_save_size - +- total_size)); +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); +- } ++ xtensa_emit_adjust_stack_ptr (callee_save_size - total_size, ++ ADJUST_SP_NEED_NOTE); + } + + if (frame_pointer_needed) +@@ -3365,21 +3384,11 @@ xtensa_expand_epilogue (bool sibcall_p) + + if (cfun->machine->current_frame_size > (frame_pointer_needed ? 127 : 1024)) + { +- if (xtensa_simm8x256 (cfun->machine->current_frame_size - +- cfun->machine->callee_save_size)) +- emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ? +- hard_frame_pointer_rtx : stack_pointer_rtx, +- GEN_INT (cfun->machine->current_frame_size - +- cfun->machine->callee_save_size))); +- else +- { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, GEN_INT (cfun->machine->current_frame_size - +- cfun->machine->callee_save_size)); +- emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ? +- hard_frame_pointer_rtx : stack_pointer_rtx, +- tmp_reg)); +- } ++ xtensa_emit_adjust_stack_ptr (cfun->machine->current_frame_size - ++ cfun->machine->callee_save_size, ++ frame_pointer_needed ++ ? ADJUST_SP_FRAME_PTR ++ : ADJUST_SP_NONE); + offset = cfun->machine->callee_save_size - UNITS_PER_WORD; + } + else +@@ -3420,24 +3429,11 @@ xtensa_expand_epilogue (bool sibcall_p) + else + offset = cfun->machine->callee_save_size; + if (offset) +- emit_insn (gen_addsi3 (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (offset))); ++ xtensa_emit_adjust_stack_ptr (offset, ADJUST_SP_NONE); + } + else +- { +- if (xtensa_simm8x256 (cfun->machine->current_frame_size)) +- emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (cfun->machine->current_frame_size))); +- else +- { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, +- GEN_INT (cfun->machine->current_frame_size)); +- emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, +- tmp_reg)); +- } +- } ++ xtensa_emit_adjust_stack_ptr (cfun->machine->current_frame_size, ++ ADJUST_SP_NONE); + } + + if (crtl->calls_eh_return) +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0050-Make-complex-hard-register-clobber-eliminatio.patch b/patches/gcc10.1/gcc-xtensa-0050-Make-complex-hard-register-clobber-eliminatio.patch new file mode 100644 index 0000000..c39608c --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0050-Make-complex-hard-register-clobber-eliminatio.patch @@ -0,0 +1,111 @@ +From 78eac52fe49e1463bec7a838dd172b970412927b Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 30 Aug 2022 21:28:51 +0900 +Subject: [PATCH] xtensa: Make complex hard register clobber elimination + more robust and accurate + +This patch eliminates all clobbers for complex hard registers that will +be overwritten entirely afterwards (supersedence of +3867d414bd7d9e5b6fb2a51b1fb3d9e9e1eae9). + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: Rewrite the split pattern that performs + the abovementioned process so that insns that overwrite clobbered + register no longer need to be contiguous. + (DSC): Remove as no longer needed. +--- + gcc/config/xtensa/xtensa.md | 67 +++++++++++++++++++++++++------------ + 1 file changed, 45 insertions(+), 22 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 0c05c16b1..ec4a69e30 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -86,10 +86,6 @@ + ;; This code iterator is for *shlrd and its variants. + (define_code_iterator ior_op [ior plus]) + +-;; This mode iterator allows the DC and SC patterns to be defined from +-;; the same template. +-(define_mode_iterator DSC [DC SC]) +- + + ;; Attributes. + +@@ -2848,27 +2844,54 @@ + }) + + (define_split +- [(clobber (match_operand:DSC 0 "register_operand"))] +- "GP_REG_P (REGNO (operands[0]))" ++ [(clobber (match_operand 0 "register_operand"))] ++ "HARD_REGISTER_P (operands[0]) ++ && COMPLEX_MODE_P (GET_MODE (operands[0]))" + [(const_int 0)] + { +- unsigned int regno = REGNO (operands[0]); +- machine_mode inner_mode = GET_MODE_INNER (mode); ++ auto_sbitmap bmp (FIRST_PSEUDO_REGISTER); + rtx_insn *insn; +- rtx x; +- if (! ((insn = next_nonnote_nondebug_insn (curr_insn)) +- && NONJUMP_INSN_P (insn) +- && GET_CODE (x = PATTERN (insn)) == SET +- && REG_P (x = XEXP (x, 0)) +- && GET_MODE (x) == inner_mode +- && REGNO (x) == regno +- && (insn = next_nonnote_nondebug_insn (insn)) +- && NONJUMP_INSN_P (insn) +- && GET_CODE (x = PATTERN (insn)) == SET +- && REG_P (x = XEXP (x, 0)) +- && GET_MODE (x) == inner_mode +- && REGNO (x) == regno + REG_NREGS (operands[0]) / 2)) +- FAIL; ++ rtx reg = gen_rtx_REG (SImode, 0); ++ bitmap_set_range (bmp, REGNO (operands[0]), REG_NREGS (operands[0])); ++ for (insn = next_nonnote_nondebug_insn_bb (curr_insn); ++ insn; insn = next_nonnote_nondebug_insn_bb (insn)) ++ { ++ sbitmap_iterator iter; ++ unsigned int regno; ++ if (NONJUMP_INSN_P (insn)) ++ { ++ EXECUTE_IF_SET_IN_BITMAP (bmp, 2, regno, iter) ++ { ++ set_regno_raw (reg, regno, REG_NREGS (reg)); ++ if (reg_overlap_mentioned_p (reg, PATTERN (insn))) ++ break; ++ } ++ if (GET_CODE (PATTERN (insn)) == SET) ++ { ++ rtx x = SET_DEST (PATTERN (insn)); ++ if (REG_P (x) && HARD_REGISTER_P (x)) ++ bitmap_clear_range (bmp, REGNO (x), REG_NREGS (x)); ++ else if (SUBREG_P (x) && HARD_REGISTER_P (SUBREG_REG (x))) ++ { ++ struct subreg_info info; ++ subreg_get_info (regno = REGNO (SUBREG_REG (x)), ++ GET_MODE (SUBREG_REG (x)), ++ SUBREG_BYTE (x), GET_MODE (x), &info); ++ if (!info.representable_p) ++ break; ++ bitmap_clear_range (bmp, regno + info.offset, info.nregs); ++ } ++ } ++ if (bitmap_empty_p (bmp)) ++ goto FALLTHRU; ++ } ++ else if (CALL_P (insn)) ++ EXECUTE_IF_SET_IN_BITMAP (bmp, 2, regno, iter) ++ if (call_used_or_fixed_reg_p (regno)) ++ break; ++ } ++ FAIL; ++FALLTHRU:; + }) + + (define_peephole2 +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0051-constantsynth-Add-new-3-insns-synthesis-patte.patch b/patches/gcc10.1/gcc-xtensa-0051-constantsynth-Add-new-3-insns-synthesis-patte.patch new file mode 100644 index 0000000..0f6d156 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0051-constantsynth-Add-new-3-insns-synthesis-patte.patch @@ -0,0 +1,91 @@ +From dc825d410b7a3025d3b902f83bb3e360ac42f477 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sat, 3 Sep 2022 12:27:51 +0900 +Subject: [PATCH] xtensa: constantsynth: Add new 3-insns synthesis pattern + +This patch adds a new 3-instructions constant synthesis pattern: + +- A value that can fit into a signed 12-bit after a number of either bitwise + left or right rotations: + => "MOVI(.N) Ax, simm12" + "SSAI (1 ... 11) or (21 ... 31)" + + "SRC Ax, Ax, Ax" + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_constantsynth): + Add new pattern for the abovementioned case. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/constsynth_3insns.c (test_4): + Add new test function. +--- + gcc/config/xtensa/xtensa.c | 31 +++++++++++++++++++ + .../gcc.target/xtensa/constsynth_3insns.c | 11 +++++++ + 2 files changed, 42 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 97291fc0f..baee55ce3 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1133,6 +1133,37 @@ xtensa_constantsynth (rtx dst, HOST_WIDE_INT srcval) + xtensa_constantsynth_rtx_ADDSUBX, + divisor)) + return 1; ++ ++ /* loading simm12 followed by left/right bitwise rotation: ++ MOVI + SSAI + SRC. */ ++ if ((srcval & 0x001FF800) == 0 ++ || (srcval & 0x001FF800) == 0x001FF800) ++ { ++ int32_t v; ++ ++ for (shift = 1; shift < 12; ++shift) ++ { ++ v = (int32_t)(((uint32_t)srcval >> shift) ++ | ((uint32_t)srcval << (32 - shift))); ++ if (xtensa_simm12b(v)) ++ { ++ emit_move_insn (dst, GEN_INT (v)); ++ emit_insn (gen_rotlsi3 (dst, dst, GEN_INT (shift))); ++ return 1; ++ } ++ } ++ for (shift = 1; shift < 12; ++shift) ++ { ++ v = (int32_t)(((uint32_t)srcval << shift) ++ | ((uint32_t)srcval >> (32 - shift))); ++ if (xtensa_simm12b(v)) ++ { ++ emit_move_insn (dst, GEN_INT (v)); ++ emit_insn (gen_rotrsi3 (dst, dst, GEN_INT (shift))); ++ return 1; ++ } ++ } ++ } + } + + return 0; +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c +index f3c4a1c7c..831288c7d 100644 +--- a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c +@@ -21,4 +21,15 @@ void test_3(int *p) + *p = 192437; + } + ++struct foo ++{ ++ unsigned int b : 10; ++ unsigned int g : 11; ++ unsigned int r : 11; ++}; ++void test_4(struct foo *p, unsigned int v) ++{ ++ p->g = v; ++} ++ + /* { dg-final { scan-assembler-not "l32r" } } */ +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0052-fix-builtin_apply-return-value.patch b/patches/gcc10.1/gcc-xtensa-0052-fix-builtin_apply-return-value.patch new file mode 100644 index 0000000..375f437 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0052-fix-builtin_apply-return-value.patch @@ -0,0 +1,81 @@ +From 4df06b8339667e15107034842185300cca85c51c Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Sat, 10 Sep 2022 17:31:07 -0700 +Subject: [PATCH] xtensa: gcc: fix builtin_apply return value + +xtensa may use up to 4 registers to return a value from a function, but +recognition of only one register in the xtensa_function_value_regno_p +and missing untyped_call pattern result in that only one register is +saved by the __builtin_apply and returned by the __builtin_apply_return. + +gcc/ + * config/xtensa/xtensa.c (xtensa_function_value_regno_p): + Recognize all 4 return registers. + * config/xtensa/xtensa.h (GP_RETURN_REG_COUNT): New definition. + * config/xtensa/xtensa.md (untyped_call): New pattern. +--- + gcc/config/xtensa/xtensa.c | 2 +- + gcc/config/xtensa/xtensa.h | 1 + + gcc/config/xtensa/xtensa.md | 21 +++++++++++++++++++++ + 3 files changed, 23 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index baee55ce3..ad4940913 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4508,7 +4508,7 @@ xtensa_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) + static bool + xtensa_function_value_regno_p (const unsigned int regno) + { +- return (regno == GP_RETURN); ++ return (regno >= GP_RETURN && regno < GP_RETURN + GP_RETURN_REG_COUNT); + } + + /* The static chain is passed in memory. Provide rtx giving 'mem' +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index ee2238606..3a986fa1c 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -477,6 +477,7 @@ enum reg_class + point, and values of coprocessor and user-defined modes. */ + #define GP_RETURN (GP_REG_FIRST + 2 + WINDOW_SIZE) + #define GP_OUTGOING_RETURN (GP_REG_FIRST + 2) ++#define GP_RETURN_REG_COUNT 4 + + /* Symbolic macros for the first/last argument registers. */ + #define GP_ARG_FIRST (GP_REG_FIRST + 2) +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index ec4a69e30..c18640b25 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2310,6 +2310,27 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_expand "untyped_call" ++ [(parallel [(call (match_operand 0 "") ++ (const_int 0)) ++ (match_operand 1 "") ++ (match_operand 2 "")])] ++ "" ++{ ++ int i; ++ ++ emit_call_insn (gen_call (operands[0], const0_rtx)); ++ ++ for (i = 0; i < XVECLEN (operands[2], 0); i++) ++ { ++ rtx set = XVECEXP (operands[2], 0, i); ++ emit_move_insn (SET_DEST (set), SET_SRC (set)); ++ } ++ ++ emit_insn (gen_blockage ()); ++ DONE; ++}) ++ + (define_insn "entry" + [(set (reg:SI A1_REG) + (unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")] +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0053-implement-MI-thunk-generation-for-call0-API.patch b/patches/gcc10.1/gcc-xtensa-0053-implement-MI-thunk-generation-for-call0-API.patch new file mode 100644 index 0000000..4384596 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0053-implement-MI-thunk-generation-for-call0-API.patch @@ -0,0 +1,164 @@ +From 3778aeadea08b3f630b89d711f634f967e8d24b3 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Thu, 14 Jul 2022 02:39:59 -0700 +Subject: [PATCH] xtensa: gcc: implement MI thunk generation for call0 ABI + +gcc/ + * config/xtensa/xtensa.c (xtensa_can_output_mi_thunk) + (xtensa_output_mi_thunk): New functions. + (TARGET_ASM_CAN_OUTPUT_MI_THUNK) + (TARGET_ASM_OUTPUT_MI_THUNK): New macro definitions. + (xtensa_prepare_expand_call): Use fixed register a8 as temporary + when called with reload_completed set to 1. +--- + gcc/config/xtensa/xtensa.c | 115 ++++++++++++++++++++++++++++++++++++- + 1 file changed, 114 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index ad4940913..0ccc63fdf 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -189,6 +189,14 @@ static HOST_WIDE_INT xtensa_constant_alignment (const_tree, HOST_WIDE_INT); + static HOST_WIDE_INT xtensa_starting_frame_offset (void); + static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); + static bool xtensa_function_ok_for_sibcall (tree, tree); ++static bool xtensa_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED, ++ HOST_WIDE_INT delta ATTRIBUTE_UNUSED, ++ HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED, ++ const_tree function ATTRIBUTE_UNUSED); ++static void xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, ++ HOST_WIDE_INT delta, ++ HOST_WIDE_INT vcall_offset, ++ tree function); + + + +@@ -342,6 +350,12 @@ static bool xtensa_function_ok_for_sibcall (tree, tree); + #undef TARGET_FUNCTION_OK_FOR_SIBCALL + #define TARGET_FUNCTION_OK_FOR_SIBCALL xtensa_function_ok_for_sibcall + ++#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK ++#define TARGET_ASM_CAN_OUTPUT_MI_THUNK xtensa_can_output_mi_thunk ++ ++#undef TARGET_ASM_OUTPUT_MI_THUNK ++#define TARGET_ASM_OUTPUT_MI_THUNK xtensa_output_mi_thunk ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + +@@ -2164,7 +2178,16 @@ xtensa_prepare_expand_call (int callop, rtx *operands) + addr = gen_sym_PLT (addr); + + if (!call_insn_operand (addr, VOIDmode)) +- XEXP (operands[callop], 0) = copy_to_mode_reg (Pmode, addr); ++ { ++ /* This may be called while generating MI thunk when we pretend ++ that reload is over. Use a8 as a temporary register in that case. */ ++ rtx reg = can_create_pseudo_p () ++ ? copy_to_mode_reg (Pmode, addr) ++ : copy_to_suggested_reg (addr, ++ gen_rtx_REG (Pmode, A8_REG), ++ Pmode); ++ XEXP (operands[callop], 0) = reg; ++ } + } + + +@@ -5008,4 +5031,94 @@ xtensa_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, tree exp ATTRIBUTE_U + return true; + } + ++static bool ++xtensa_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED, ++ HOST_WIDE_INT delta ATTRIBUTE_UNUSED, ++ HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED, ++ const_tree function ATTRIBUTE_UNUSED) ++{ ++ if (TARGET_WINDOWED_ABI) ++ return false; ++ ++ return true; ++} ++ ++/* Output code to add DELTA to the first argument, and then jump ++ to FUNCTION. Used for C++ multiple inheritance. */ ++static void ++xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, ++ HOST_WIDE_INT delta, ++ HOST_WIDE_INT vcall_offset, ++ tree function) ++{ ++ rtx this_rtx; ++ rtx funexp; ++ rtx_insn *insn; ++ int this_reg_no; ++ rtx temp0 = gen_rtx_REG (Pmode, A9_REG); ++ const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk)); ++ ++ reload_completed = 1; ++ ++ if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) ++ this_reg_no = 3; ++ else ++ this_reg_no = 2; ++ ++ this_rtx = gen_rtx_REG (Pmode, A0_REG + this_reg_no); ++ ++ if (delta) ++ { ++ if (xtensa_simm8 (delta)) ++ emit_insn (gen_addsi3 (this_rtx, this_rtx, GEN_INT (delta))); ++ else ++ { ++ emit_move_insn (temp0, GEN_INT (delta)); ++ emit_insn (gen_addsi3 (this_rtx, this_rtx, temp0)); ++ } ++ } ++ ++ if (vcall_offset) ++ { ++ rtx temp1 = gen_rtx_REG (Pmode, A0_REG + 10); ++ rtx addr = temp1; ++ ++ emit_move_insn (temp0, gen_rtx_MEM (Pmode, this_rtx)); ++ if (xtensa_uimm8x4 (vcall_offset)) ++ addr = plus_constant (Pmode, temp0, vcall_offset); ++ else if (xtensa_simm8 (vcall_offset)) ++ emit_insn (gen_addsi3 (temp1, temp0, GEN_INT (vcall_offset))); ++ else ++ { ++ emit_move_insn (temp1, GEN_INT (vcall_offset)); ++ emit_insn (gen_addsi3 (temp1, temp0, temp1)); ++ } ++ emit_move_insn (temp1, gen_rtx_MEM (Pmode, addr)); ++ emit_insn (gen_add2_insn (this_rtx, temp1)); ++ } ++ ++ /* Generate a tail call to the target function. */ ++ if (!TREE_USED (function)) ++ { ++ assemble_external (function); ++ TREE_USED (function) = 1; ++ } ++ ++ funexp = XEXP (DECL_RTL (function), 0); ++ funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); ++ insn = emit_call_insn (gen_sibcall (funexp, const0_rtx)); ++ SIBLING_CALL_P (insn) = 1; ++ ++ insn = get_insns (); ++ shorten_branches (insn); ++ assemble_start_function (thunk, fnname); ++ final_start_function (insn, file, 1); ++ final (insn, file, 1); ++ final_end_function (); ++ assemble_end_function (thunk, fnname); ++ ++ /* Stop pretending to be a post-reload pass. */ ++ reload_completed = 0; ++} ++ + #include "gt-xtensa.h" +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0054-enable-section-anchors-support.patch b/patches/gcc10.1/gcc-xtensa-0054-enable-section-anchors-support.patch new file mode 100644 index 0000000..cca2ff4 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0054-enable-section-anchors-support.patch @@ -0,0 +1,29 @@ +From 6e38872f54f49c0b3b3f72668dcdbfa66007ceb6 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Fri, 16 Sep 2022 20:56:39 -0700 +Subject: [PATCH] xtensa: gcc: enable section anchors support + +gcc/ + * config/xtensa/xtensa.c (TARGET_MAX_ANCHOR_OFFSET): New + definition. +--- + gcc/config/xtensa/xtensa.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 0ccc63fdf..ba4dd47c5 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -356,6 +356,9 @@ static void xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + #undef TARGET_ASM_OUTPUT_MI_THUNK + #define TARGET_ASM_OUTPUT_MI_THUNK xtensa_output_mi_thunk + ++#undef TARGET_MAX_ANCHOR_OFFSET ++#define TARGET_MAX_ANCHOR_OFFSET 1020 ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0055-Prepare-the-transition-from-Reload-to-LRA.patch b/patches/gcc10.1/gcc-xtensa-0055-Prepare-the-transition-from-Reload-to-LRA.patch new file mode 100644 index 0000000..11f1d50 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0055-Prepare-the-transition-from-Reload-to-LRA.patch @@ -0,0 +1,301 @@ +From 2fa3f80877ab2b7a06403097c09fbc4bc892d6e3 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 14 Oct 2022 19:43:23 +0900 +Subject: [PATCH] xtensa: Prepare the transition from Reload to LRA + +This patch provides the first step in the transition from Reload to LRA +in Xtensa. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h + (xtensa_split1_finished_p, xtensa_split_DI_reg_imm): New prototypes. + * config/xtensa/xtensa.c + (xtensa_split1_finished_p, xtensa_split_DI_reg_imm, xtensa_lra_p): + New functions. + (TARGET_LRA_P): Replace the dummy hook with xtensa_lra_p. + (xt_true_regnum): Rework. + * config/xtensa/xtensa.h (CALL_REALLY_USED_REGISTERS): + Switch from CALL_USED_REGISTERS, and revise the comment. + * config/xtensa/constraints.md (Y): + Use !xtensa_split1_finished_p() instead of can_create_pseudo_p(). + * config/xtensa/predicates.md (move_operand): Ditto. + * config/xtensa/xtensa.md: Add two new split patterns: + - splits DImode immediate load into two SImode ones + - puts out-of-constraint SImode constants into the constant pool + * config/xtensa/xtensa.opt (-mlra): New target-specific option + for testing purpose. +--- + gcc/config/xtensa/constraints.md | 2 +- + gcc/config/xtensa/predicates.md | 2 +- + gcc/config/xtensa/xtensa-protos.h | 2 + + gcc/config/xtensa/xtensa.c | 69 ++++++++++++++++++++++++++----- + gcc/config/xtensa/xtensa.h | 8 ++-- + gcc/config/xtensa/xtensa.md | 36 ++++++++++++---- + gcc/config/xtensa/xtensa.opt | 4 ++ + 7 files changed, 99 insertions(+), 24 deletions(-) + +diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md +index f590dcf3a..a2cb57000 100644 +--- a/gcc/config/xtensa/constraints.md ++++ b/gcc/config/xtensa/constraints.md +@@ -121,7 +121,7 @@ + (ior (and (match_code "const_int,const_double,const,symbol_ref,label_ref") + (match_test "TARGET_AUTO_LITPOOLS")) + (and (match_code "const_int") +- (match_test "can_create_pseudo_p ()")))) ++ (match_test "! xtensa_split1_finished_p ()")))) + + ;; Memory constraints. Do not use define_memory_constraint here. Doing so + ;; causes reload to force some constants into the constant pool, but since +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index 633cc6264..09d9a5770 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -149,7 +149,7 @@ + (ior (and (match_code "const_int") + (match_test "(GET_MODE_CLASS (mode) == MODE_INT + && xtensa_simm12b (INTVAL (op))) +- || can_create_pseudo_p ()")) ++ || ! xtensa_split1_finished_p ()")) + (and (match_code "const_int,const_double,const,symbol_ref,label_ref") + (match_test "(TARGET_CONST16 || TARGET_AUTO_LITPOOLS) + && CONSTANT_P (op) +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 75ed3bfb0..63b147a90 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -58,6 +58,8 @@ extern char *xtensa_emit_call (int, rtx *); + extern char *xtensa_emit_sibcall (int, rtx *); + extern bool xtensa_tls_referenced_p (rtx); + extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); ++extern bool xtensa_split1_finished_p (void); ++extern void xtensa_split_DI_reg_imm (rtx *); + + #ifdef TREE_CODE + extern void init_cumulative_args (CUMULATIVE_ARGS *, int); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index ba4dd47c5..658d19924 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -56,6 +56,7 @@ along with GCC; see the file COPYING3. If not see + #include "hw-doloop.h" + #include "rtl-iter.h" + #include "insn-attr.h" ++#include "tree-pass.h" + + /* This file should be included last. */ + #include "target-def.h" +@@ -197,6 +198,7 @@ static void xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta, + HOST_WIDE_INT vcall_offset, + tree function); ++static bool xtensa_lra_p (void); + + + +@@ -291,7 +293,7 @@ static void xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + #define TARGET_CANNOT_FORCE_CONST_MEM xtensa_cannot_force_const_mem + + #undef TARGET_LRA_P +-#define TARGET_LRA_P hook_bool_void_false ++#define TARGET_LRA_P xtensa_lra_p + + #undef TARGET_LEGITIMATE_ADDRESS_P + #define TARGET_LEGITIMATE_ADDRESS_P xtensa_legitimate_address_p +@@ -482,21 +484,30 @@ xtensa_mask_immediate (HOST_WIDE_INT v) + int + xt_true_regnum (rtx x) + { +- if (GET_CODE (x) == REG) ++ if (REG_P (x)) + { +- if (reg_renumber +- && REGNO (x) >= FIRST_PSEUDO_REGISTER +- && reg_renumber[REGNO (x)] >= 0) ++ if (! HARD_REGISTER_P (x) ++ && reg_renumber ++ && (lra_in_progress || reg_renumber[REGNO (x)] >= 0)) + return reg_renumber[REGNO (x)]; + return REGNO (x); + } +- if (GET_CODE (x) == SUBREG) ++ if (SUBREG_P (x)) + { + int base = xt_true_regnum (SUBREG_REG (x)); +- if (base >= 0 && base < FIRST_PSEUDO_REGISTER) +- return base + subreg_regno_offset (REGNO (SUBREG_REG (x)), +- GET_MODE (SUBREG_REG (x)), +- SUBREG_BYTE (x), GET_MODE (x)); ++ ++ if (base >= 0 ++ && HARD_REGISTER_NUM_P (base)) ++ { ++ struct subreg_info info; ++ ++ subreg_get_info (lra_in_progress ++ ? (unsigned) base : REGNO (SUBREG_REG (x)), ++ GET_MODE (SUBREG_REG (x)), ++ SUBREG_BYTE (x), GET_MODE (x), &info); ++ if (info.representable_p) ++ return base + info.offset; ++ } + } + return -1; + } +@@ -2468,6 +2479,36 @@ xtensa_shlrd_which_direction (rtx op0, rtx op1) + } + + ++/* Return true after "split1" pass has been finished. */ ++ ++bool ++xtensa_split1_finished_p (void) ++{ ++ return cfun && (cfun->curr_properties & PROP_rtl_split_insns); ++} ++ ++ ++/* Split a DImode pair of reg (operand[0]) and const_int (operand[1]) into ++ two SImode pairs, the low-part (operands[0] and [1]) and the high-part ++ (operands[2] and [3]). */ ++ ++void ++xtensa_split_DI_reg_imm (rtx *operands) ++{ ++ rtx lowpart, highpart; ++ ++ if (WORDS_BIG_ENDIAN) ++ split_double (operands[1], &highpart, &lowpart); ++ else ++ split_double (operands[1], &lowpart, &highpart); ++ ++ operands[3] = highpart; ++ operands[2] = gen_highpart (SImode, operands[0]); ++ operands[1] = lowpart; ++ operands[0] = gen_lowpart (SImode, operands[0]); ++} ++ ++ + /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ + + static bool +@@ -5124,4 +5165,12 @@ xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + reload_completed = 0; + } + ++/* Implement TARGET_LRA_P. */ ++ ++static bool ++xtensa_lra_p (void) ++{ ++ return TARGET_LRA; ++} ++ + #include "gt-xtensa.h" +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index 3a986fa1c..4b08ee5c1 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -228,7 +228,7 @@ along with GCC; see the file COPYING3. If not see + } + + /* 1 for registers not available across function calls. +- These must include the FIXED_REGISTERS and also any ++ These need not include the FIXED_REGISTERS but must any + registers that can be used without being saved. + The latter must include the registers where values are returned + and the register where structure-value addresses are passed. +@@ -241,10 +241,10 @@ along with GCC; see the file COPYING3. If not see + + Proper values are computed in TARGET_CONDITIONAL_REGISTER_USAGE. */ + +-#define CALL_USED_REGISTERS \ ++#define CALL_REALLY_USED_REGISTERS \ + { \ +- 1, 1, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 2, 2, 2, 2, \ +- 1, 1, 1, \ ++ 1, 0, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 2, 2, 2, 2, \ ++ 0, 0, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, \ + } +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index c18640b25..7c248ed2d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -943,14 +943,9 @@ + because of offering further optimization opportunities. */ + if (register_operand (operands[0], DImode)) + { +- rtx lowpart, highpart; +- +- if (TARGET_BIG_ENDIAN) +- split_double (operands[1], &highpart, &lowpart); +- else +- split_double (operands[1], &lowpart, &highpart); +- emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), lowpart)); +- emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), highpart)); ++ xtensa_split_DI_reg_imm (operands); ++ emit_move_insn (operands[0], operands[1]); ++ emit_move_insn (operands[2], operands[3]); + DONE; + } + +@@ -984,6 +979,19 @@ + } + }) + ++(define_split ++ [(set (match_operand:DI 0 "register_operand") ++ (match_operand:DI 1 "const_int_operand"))] ++ "!TARGET_CONST16 && !TARGET_AUTO_LITPOOLS ++ && ! xtensa_split1_finished_p ()" ++ [(set (match_dup 0) ++ (match_dup 1)) ++ (set (match_dup 2) ++ (match_dup 3))] ++{ ++ xtensa_split_DI_reg_imm (operands); ++}) ++ + ;; 32-bit Integer moves + + (define_expand "movsi" +@@ -1020,6 +1028,18 @@ + (set_attr "mode" "SI") + (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")]) + ++(define_split ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operand:SI 1 "const_int_operand"))] ++ "!TARGET_CONST16 && !TARGET_AUTO_LITPOOLS ++ && ! xtensa_split1_finished_p () ++ && ! xtensa_simm12b (INTVAL (operands[1]))" ++ [(set (match_dup 0) ++ (match_dup 1))] ++{ ++ operands[1] = force_const_mem (SImode, operands[1]); ++}) ++ + (define_split + [(set (match_operand:SI 0 "register_operand") + (match_operand:SI 1 "constantpool_operand"))] +diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt +index 97aa44f92..106af4e30 100644 +--- a/gcc/config/xtensa/xtensa.opt ++++ b/gcc/config/xtensa/xtensa.opt +@@ -34,6 +34,10 @@ mextra-l32r-costs= + Target RejectNegative Joined UInteger Var(xtensa_extra_l32r_costs) Init(0) + Set extra memory access cost for L32R instruction, in clock-cycle units. + ++mlra ++Target Mask(LRA) ++Use LRA instead of reload (transitional). ++ + mtarget-align + Target + Automatically align branch targets to reduce branch penalties. +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0056-Make-register-A0-allocable-for-the-CALL0-ABI.patch b/patches/gcc10.1/gcc-xtensa-0056-Make-register-A0-allocable-for-the-CALL0-ABI.patch new file mode 100644 index 0000000..9f0ab74 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0056-Make-register-A0-allocable-for-the-CALL0-ABI.patch @@ -0,0 +1,66 @@ +From d262f47d2f1c0dec78b59b0b3eec26d17b60bf83 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 21 Oct 2022 11:58:35 +0900 +Subject: [PATCH] xtensa: Make register A0 allocable for the CALL0 ABI + +This patch offers an additional allocable register by RA for the CALL0 +ABI. + +> Register a0 holds the return address upon entry to a function, but +> unlike the windowed register ABI, it is not reserved for this purpose +> and may hold other values after the return address has been saved. + - Xtensa ISA Reference Manual, + 8.1.2 "CALL0 Register Usage and Stack Layout" [p.589] + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_conditional_register_usage): + Remove register A0 from FIXED_REGS if the CALL0 ABI. + (xtensa_expand_epilogue): Change to emit '(use (reg:SI A0_REG))' + unconditionally after restoring callee-saved registers for + sibling-call functions, in order to prevent misleading that + register A0 is free to use. +--- + gcc/config/xtensa/xtensa.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 658d19924..767264641 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -3506,15 +3506,14 @@ xtensa_expand_epilogue (bool sibcall_p) + if (xtensa_call_save_reg(regno)) + { + rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); +- rtx reg; + + offset -= UNITS_PER_WORD; +- emit_move_insn (reg = gen_rtx_REG (SImode, regno), ++ emit_move_insn (gen_rtx_REG (SImode, regno), + gen_frame_mem (SImode, x)); +- if (regno == A0_REG && sibcall_p) +- emit_use (reg); + } + } ++ if (sibcall_p) ++ emit_use (gen_rtx_REG (SImode, A0_REG)); + + if (cfun->machine->current_frame_size > 0) + { +@@ -5005,6 +5004,13 @@ xtensa_conditional_register_usage (void) + /* Remove hard FP register from the preferred reload registers set. */ + CLEAR_HARD_REG_BIT (reg_class_contents[(int)RL_REGS], + HARD_FRAME_POINTER_REGNUM); ++ ++ /* Register A0 holds the return address upon entry to a function ++ for the CALL0 ABI, but unlike the windowed register ABI, it is ++ not reserved for this purpose and may hold other values after ++ the return address has been saved. */ ++ if (!TARGET_WINDOWED_ABI) ++ fixed_regs[A0_REG] = 0; + } + + /* Map hard register number to register class */ +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0057-Fix-out-of-bounds-array-access-in-the-movdi-p.patch b/patches/gcc10.1/gcc-xtensa-0057-Fix-out-of-bounds-array-access-in-the-movdi-p.patch new file mode 100644 index 0000000..ec28936 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0057-Fix-out-of-bounds-array-access-in-the-movdi-p.patch @@ -0,0 +1,74 @@ +From 3092ce3d24acb6ca10d8c980fb49685832566ae4 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 26 Oct 2022 15:27:51 +0900 +Subject: [PATCH] xtensa: Fix out-of-bounds array access in the movdi pattern + +The following new warnings were introduced in the commit +4f3f0296acbb ("xtensa: Prepare the transition from Reload to LRA"): + +gcc/config/xtensa/xtensa.md:945:26: error: array subscript 3 is above + array bounds of 'rtx_def* [2]' [-Werror=array-bounds] + 945 | emit_move_insn (operands[2], operands[3]); +gcc/config/xtensa/xtensa.md:945:26: error: array subscript 2 is above + array bounds of 'rtx_def* [2]' [-Werror=array-bounds] + 945 | emit_move_insn (operands[2], operands[3]); + +From gcc/insn-emit.cc (generated by building): + +> /* ../../gcc/config/xtensa/xtensa.md:932 */ +> rtx +> gen_movdi (rtx operand0, +> rtx operand1) +> { +> rtx_insn *_val = 0; +> start_sequence (); +> { +> rtx operands[2]; // only 2 elements +> operands[0] = operand0; +> operands[1] = operand1; +> #define FAIL return (end_sequence (), _val) +> #define DONE return (_val = get_insns (), end_sequence (), _val) +> #line 936 "../../gcc/config/xtensa/xtensa.md" +> { +> if (CONSTANT_P (operands[1])) +> { +> /* Split in halves if 64-bit Const-to-Reg moves +> because of offering further optimization opportunities. */ +> if (register_operand (operands[0], DImode)) +> { +> xtensa_split_DI_reg_imm (operands); // out-of-bounds! +> emit_move_insn (operands[0], operands[1]); +> emit_move_insn (operands[2], operands[3]); // out-of-bounds! +> DONE; +> } + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (movdi): + Copy operands[0...1] to ops[0...3] and then use the latter before + calling xtensa_split_DI_reg_imm() and emitting insns. +--- + gcc/config/xtensa/xtensa.md | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 7c248ed2d..31e5f1b28 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -943,9 +943,10 @@ + because of offering further optimization opportunities. */ + if (register_operand (operands[0], DImode)) + { +- xtensa_split_DI_reg_imm (operands); +- emit_move_insn (operands[0], operands[1]); +- emit_move_insn (operands[2], operands[3]); ++ rtx ops[4] = { operands[0], operands[1] }; ++ xtensa_split_DI_reg_imm (ops); ++ emit_move_insn (ops[0], ops[1]); ++ emit_move_insn (ops[2], ops[3]); + DONE; + } + +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0058-Tabify-and-trim-trailing-spaces.patch b/patches/gcc10.1/gcc-xtensa-0058-Tabify-and-trim-trailing-spaces.patch new file mode 100644 index 0000000..6ae21e1 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0058-Tabify-and-trim-trailing-spaces.patch @@ -0,0 +1,576 @@ +From b326051c7a6d15e15b4410ef658d2e67c0a604af Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 27 Dec 2022 15:30:12 +0900 +Subject: [PATCH] xtensa: Tabify, and trim trailing spaces + +Cosmetic and no functional changes. + +gcc/ChangeLog: + + * config/xtensa/elf.h: Tabify, and trim trailing spaces. + * config/xtensa/linux.h: Likewise. + * config/xtensa/uclinux.h: Likewise. + * config/xtensa/xtensa.c: Likewise. + * config/xtensa/xtensa.h: Likewise. + * config/xtensa/xtensa.md: Likewise. +--- + gcc/config/xtensa/elf.h | 32 ++++++------ + gcc/config/xtensa/linux.h | 1 - + gcc/config/xtensa/uclinux.h | 1 - + gcc/config/xtensa/xtensa.c | 85 ++++++++++++++++---------------- + gcc/config/xtensa/xtensa.h | 6 +-- + gcc/config/xtensa/xtensa.md | 98 ++++++++++++++++++------------------- + 6 files changed, 110 insertions(+), 113 deletions(-) + +diff --git a/gcc/config/xtensa/elf.h b/gcc/config/xtensa/elf.h +index 6fd589fed..e0d1d7275 100644 +--- a/gcc/config/xtensa/elf.h ++++ b/gcc/config/xtensa/elf.h +@@ -57,7 +57,7 @@ along with GCC; see the file COPYING3. If not see + "crt1-sim%O%s crt0%O%s crti%O%s crtbegin%O%s _vectors%O%s" + + #undef ENDFILE_SPEC +-#define ENDFILE_SPEC "crtend%O%s crtn%O%s" ++#define ENDFILE_SPEC "crtend%O%s crtn%O%s" + + #undef LINK_SPEC + #define LINK_SPEC \ +@@ -82,19 +82,17 @@ along with GCC; see the file COPYING3. If not see + /* Search for headers in $tooldir/arch/include and for libraries and + startfiles in $tooldir/arch/lib. */ + #define GCC_DRIVER_HOST_INITIALIZATION \ +-do \ +-{ \ +- char *tooldir, *archdir; \ +- tooldir = concat (tooldir_base_prefix, spec_machine, \ +- dir_separator_str, NULL); \ +- if (!IS_ABSOLUTE_PATH (tooldir)) \ +- tooldir = concat (standard_exec_prefix, spec_machine, dir_separator_str, \ +- spec_version, dir_separator_str, tooldir, NULL); \ +- archdir = concat (tooldir, "arch", dir_separator_str, NULL); \ +- add_prefix (&startfile_prefixes, \ +- concat (archdir, "lib", dir_separator_str, NULL), \ +- "GCC", PREFIX_PRIORITY_LAST, 0, 1); \ +- add_prefix (&include_prefixes, archdir, \ +- "GCC", PREFIX_PRIORITY_LAST, 0, 0); \ +- } \ +-while (0) ++ do { \ ++ char *tooldir, *archdir; \ ++ tooldir = concat (tooldir_base_prefix, spec_machine, \ ++ dir_separator_str, NULL); \ ++ if (!IS_ABSOLUTE_PATH (tooldir)) \ ++ tooldir = concat (standard_exec_prefix, spec_machine, dir_separator_str, \ ++ spec_version, dir_separator_str, tooldir, NULL); \ ++ archdir = concat (tooldir, "arch", dir_separator_str, NULL); \ ++ add_prefix (&startfile_prefixes, \ ++ concat (archdir, "lib", dir_separator_str, NULL), \ ++ "GCC", PREFIX_PRIORITY_LAST, 0, 1); \ ++ add_prefix (&include_prefixes, archdir, \ ++ "GCC", PREFIX_PRIORITY_LAST, 0, 0); \ ++ } while (0) +diff --git a/gcc/config/xtensa/linux.h b/gcc/config/xtensa/linux.h +index 49796c97f..51ea065bd 100644 +--- a/gcc/config/xtensa/linux.h ++++ b/gcc/config/xtensa/linux.h +@@ -65,4 +65,3 @@ along with GCC; see the file COPYING3. If not see + #define XTENSA_ALWAYS_PIC 1 + + #undef DBX_REGISTER_NUMBER +- +diff --git a/gcc/config/xtensa/uclinux.h b/gcc/config/xtensa/uclinux.h +index 64ba26f39..51b6f2f95 100644 +--- a/gcc/config/xtensa/uclinux.h ++++ b/gcc/config/xtensa/uclinux.h +@@ -66,4 +66,3 @@ along with GCC; see the file COPYING3. If not see + #define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function + + #undef DBX_REGISTER_NUMBER +- +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 767264641..d3dafa4aa 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -176,7 +176,7 @@ static bool constantpool_address_p (const_rtx addr); + static bool xtensa_legitimate_constant_p (machine_mode, rtx); + static void xtensa_reorg (void); + static bool xtensa_can_use_doloop_p (const widest_int &, const widest_int &, +- unsigned int, bool); ++ unsigned int, bool); + static const char *xtensa_invalid_within_doloop (const rtx_insn *); + + static bool xtensa_member_type_forces_blk (const_tree, +@@ -2105,7 +2105,7 @@ xtensa_emit_loop_end (rtx_insn *insn, rtx *operands) + done = 1; + } + break; +- } ++ } + } + + output_asm_insn ("%1_LEND:", operands); +@@ -2305,7 +2305,7 @@ xtensa_tls_module_base (void) + xtensa_tls_module_base_symbol = + gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_"); + SYMBOL_REF_FLAGS (xtensa_tls_module_base_symbol) +- |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; ++ |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; + } + + return xtensa_tls_module_base_symbol; +@@ -3444,7 +3444,7 @@ xtensa_expand_prologue (void) + } + } + else +- { ++ { + insn = emit_insn (gen_movsi (hard_frame_pointer_rtx, + stack_pointer_rtx)); + if (!TARGET_WINDOWED_ABI) +@@ -3567,11 +3567,12 @@ xtensa_set_return_address (rtx address, rtx scratch) + gen_rtx_REG (SImode, A0_REG)); + rtx insn; + +- if (total_size > 1024) { +- emit_move_insn (scratch, GEN_INT (total_size - UNITS_PER_WORD)); +- emit_insn (gen_addsi3 (scratch, frame, scratch)); +- a0_addr = scratch; +- } ++ if (total_size > 1024) ++ { ++ emit_move_insn (scratch, GEN_INT (total_size - UNITS_PER_WORD)); ++ emit_insn (gen_addsi3 (scratch, frame, scratch)); ++ a0_addr = scratch; ++ } + + insn = emit_move_insn (gen_frame_mem (SImode, a0_addr), address); + RTX_FRAME_RELATED_P (insn) = 1; +@@ -3853,8 +3854,8 @@ xtensa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + /* Check if the argument is in registers: + + if ((AP).__va_ndx <= __MAX_ARGS_IN_REGISTERS * 4 +- && !must_pass_in_stack (type)) +- __array = (AP).__va_reg; */ ++ && !must_pass_in_stack (type)) ++ __array = (AP).__va_reg; */ + + array = create_tmp_var (ptr_type_node); + +@@ -4550,8 +4551,8 @@ xtensa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) + /* Worker function for TARGET_FUNCTION_VALUE. */ + + rtx +-xtensa_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED, +- bool outgoing) ++xtensa_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED, ++ bool outgoing) + { + return gen_rtx_REG ((INTEGRAL_TYPE_P (valtype) + && TYPE_PRECISION (valtype) < BITS_PER_WORD) +@@ -4754,7 +4755,7 @@ xtensa_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x) + + static bool + xtensa_can_use_doloop_p (const widest_int &, const widest_int &, +- unsigned int loop_depth, bool entered_at_top) ++ unsigned int loop_depth, bool entered_at_top) + { + /* Considering limitations in the hardware, only use doloop + for innermost loops which must be entered from the top. */ +@@ -4793,32 +4794,32 @@ hwloop_optimize (hwloop_info loop) + if (loop->depth > 1) + { + if (dump_file) +- fprintf (dump_file, ";; loop %d is not innermost\n", +- loop->loop_no); ++ fprintf (dump_file, ";; loop %d is not innermost\n", ++ loop->loop_no); + return false; + } + + if (!loop->incoming_dest) + { + if (dump_file) +- fprintf (dump_file, ";; loop %d has more than one entry\n", +- loop->loop_no); ++ fprintf (dump_file, ";; loop %d has more than one entry\n", ++ loop->loop_no); + return false; + } + + if (loop->incoming_dest != loop->head) + { + if (dump_file) +- fprintf (dump_file, ";; loop %d is not entered from head\n", +- loop->loop_no); ++ fprintf (dump_file, ";; loop %d is not entered from head\n", ++ loop->loop_no); + return false; + } + + if (loop->has_call || loop->has_asm) + { + if (dump_file) +- fprintf (dump_file, ";; loop %d has invalid insn\n", +- loop->loop_no); ++ fprintf (dump_file, ";; loop %d has invalid insn\n", ++ loop->loop_no); + return false; + } + +@@ -4826,8 +4827,8 @@ hwloop_optimize (hwloop_info loop) + if (loop->iter_reg_used || loop->iter_reg_used_outside) + { + if (dump_file) +- fprintf (dump_file, ";; loop %d uses iterator\n", +- loop->loop_no); ++ fprintf (dump_file, ";; loop %d uses iterator\n", ++ loop->loop_no); + return false; + } + +@@ -4839,8 +4840,8 @@ hwloop_optimize (hwloop_info loop) + if (!insn) + { + if (dump_file) +- fprintf (dump_file, ";; loop %d start_label not before loop_end\n", +- loop->loop_no); ++ fprintf (dump_file, ";; loop %d start_label not before loop_end\n", ++ loop->loop_no); + return false; + } + +@@ -4864,8 +4865,8 @@ hwloop_optimize (hwloop_info loop) + start_sequence (); + + insn = emit_insn (gen_zero_cost_loop_start (loop->iter_reg, +- loop->start_label, +- loop->iter_reg)); ++ loop->start_label, ++ loop->iter_reg)); + + seq = get_insns (); + +@@ -4881,21 +4882,21 @@ hwloop_optimize (hwloop_info loop) + seq = emit_label_before (gen_label_rtx (), seq); + new_bb = create_basic_block (seq, insn, entry_bb); + FOR_EACH_EDGE (e, ei, loop->incoming) +- { +- if (!(e->flags & EDGE_FALLTHRU)) +- redirect_edge_and_branch_force (e, new_bb); +- else +- redirect_edge_succ (e, new_bb); +- } ++ { ++ if (!(e->flags & EDGE_FALLTHRU)) ++ redirect_edge_and_branch_force (e, new_bb); ++ else ++ redirect_edge_succ (e, new_bb); ++ } + + make_edge (new_bb, loop->head, 0); + } + else + { + while (DEBUG_INSN_P (entry_after) +- || (NOTE_P (entry_after) ++ || (NOTE_P (entry_after) + && NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK)) +- entry_after = PREV_INSN (entry_after); ++ entry_after = PREV_INSN (entry_after); + + emit_insn_after (seq, entry_after); + } +@@ -4916,15 +4917,15 @@ hwloop_fail (hwloop_info loop) + rtx_insn *insn = loop->loop_end; + + emit_insn_before (gen_addsi3 (loop->iter_reg, +- loop->iter_reg, +- constm1_rtx), +- loop->loop_end); ++ loop->iter_reg, ++ constm1_rtx), ++ loop->loop_end); + + test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx); + insn = emit_jump_insn_before (gen_cbranchsi4 (test, +- loop->iter_reg, const0_rtx, +- loop->start_label), +- loop->loop_end); ++ loop->iter_reg, const0_rtx, ++ loop->start_label), ++ loop->loop_end); + + JUMP_LABEL (insn) = loop->start_label; + LABEL_NUSES (loop->start_label)++; +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index 4b08ee5c1..b5fec1cb3 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -63,7 +63,7 @@ along with GCC; see the file COPYING3. If not see + #define TARGET_S32C1I XCHAL_HAVE_S32C1I + #define TARGET_ABSOLUTE_LITERALS XSHAL_USE_ABSOLUTE_LITERALS + #define TARGET_THREADPTR XCHAL_HAVE_THREADPTR +-#define TARGET_LOOPS XCHAL_HAVE_LOOPS ++#define TARGET_LOOPS XCHAL_HAVE_LOOPS + #define TARGET_WINDOWED_ABI (XSHAL_ABI == XTHAL_ABI_WINDOWED) + #define TARGET_DEBUG XCHAL_HAVE_DEBUG + #define TARGET_L32R XCHAL_HAVE_L32R +@@ -297,7 +297,7 @@ extern int leaf_function; + + /* Coprocessor registers */ + #define BR_REG_FIRST 18 +-#define BR_REG_LAST 18 ++#define BR_REG_LAST 18 + #define BR_REG_NUM (BR_REG_LAST - BR_REG_FIRST + 1) + + /* 16 floating-point registers */ +@@ -743,7 +743,7 @@ typedef struct xtensa_args + + + /* Define output to appear before the constant pool. */ +-#define ASM_OUTPUT_POOL_PROLOGUE(FILE, FUNNAME, FUNDECL, SIZE) \ ++#define ASM_OUTPUT_POOL_PROLOGUE(FILE, FUNNAME, FUNDECL, SIZE) \ + do { \ + if ((SIZE) > 0 || !TARGET_WINDOWED_ABI) \ + { \ +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 31e5f1b28..08fb6f312 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -70,13 +70,13 @@ + + ;; This code iterator is for floating-point comparisons. + (define_code_iterator any_scc_sf [eq lt le uneq unlt unle unordered]) +-(define_code_attr scc_sf [(eq "oeq") (lt "olt") (le "ole") ++(define_code_attr scc_sf [(eq "oeq") (lt "olt") (le "ole") + (uneq "ueq") (unlt "ult") (unle "ule") + (unordered "un")]) + + ;; This iterator and attribute allow to combine most atomic operations. + (define_code_iterator ATOMIC [and ior xor plus minus mult]) +-(define_code_attr atomic [(and "and") (ior "ior") (xor "xor") ++(define_code_attr atomic [(and "and") (ior "ior") (xor "xor") + (plus "add") (minus "sub") (mult "nand")]) + + ;; This mode iterator allows the HI and QI patterns to be defined from +@@ -195,7 +195,7 @@ + + (define_insn "subsi3" + [(set (match_operand:SI 0 "register_operand" "=a") +- (minus:SI (match_operand:SI 1 "register_operand" "r") ++ (minus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")))] + "" + "sub\t%0, %1, %2" +@@ -434,7 +434,7 @@ + + (define_insn "si3" + [(set (match_operand:SI 0 "register_operand" "=a") +- (any_minmax:SI (match_operand:SI 1 "register_operand" "%r") ++ (any_minmax:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" "r")))] + "TARGET_MINMAX" + "\t%0, %1, %2" +@@ -507,7 +507,7 @@ + + (define_expand "bswapsi2" + [(set (match_operand:SI 0 "register_operand" "") +- (bswap:SI (match_operand:SI 1 "register_operand" "")))] ++ (bswap:SI (match_operand:SI 1 "register_operand" "")))] + "!optimize_debug && optimize > 1" + { + /* GIMPLE manual byte-swapping recognition is now activated. +@@ -1025,7 +1025,7 @@ + %v0s32i\t%1, %0 + rsr\t%0, ACCLO + wsr\t%1, ACCLO" +- [(set_attr "type" "move,move,move,load,store,store,move,move,move,move,move,load,load,store,rsr,wsr") ++ [(set_attr "type" "move,move,move,load,store,store,move,move,move,move,move,load,load,store,rsr,wsr") + (set_attr "mode" "SI") + (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")]) + +@@ -1175,7 +1175,7 @@ + "((register_operand (operands[0], SFmode) + || register_operand (operands[1], SFmode)) + && !(FP_REG_P (xt_true_regnum (operands[0])) +- && (constantpool_mem_p (operands[1]) || CONSTANT_P (operands[1]))))" ++ && (constantpool_mem_p (operands[1]) || CONSTANT_P (operands[1]))))" + "@ + mov.s\t%0, %1 + %v1lsi\t%0, %1 +@@ -1360,7 +1360,7 @@ + [(set (match_operand:SI 0 "register_operand" "=a,a") + (ashift:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:SI 2 "arith_operand" "J,r")))] +- "" ++ "" + "@ + slli\t%0, %1, %R2 + ssl\t%2\;sll\t%0, %1" +@@ -1946,13 +1946,13 @@ + + (define_insn "zero_cost_loop_start" + [(set (pc) +- (if_then_else (ne (match_operand:SI 2 "register_operand" "0") +- (const_int 1)) +- (label_ref (match_operand 1 "" "")) +- (pc))) ++ (if_then_else (ne (match_operand:SI 2 "register_operand" "0") ++ (const_int 1)) ++ (label_ref (match_operand 1 "" "")) ++ (pc))) + (set (match_operand:SI 0 "register_operand" "=a") +- (plus (match_dup 0) +- (const_int -1))) ++ (plus (match_dup 0) ++ (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_START)] + "TARGET_LOOPS && optimize" + "loop\t%0, %l1_LEND" +@@ -1962,13 +1962,13 @@ + + (define_insn "zero_cost_loop_end" + [(set (pc) +- (if_then_else (ne (match_operand:SI 2 "nonimmediate_operand" "0,0") +- (const_int 1)) +- (label_ref (match_operand 1 "" "")) +- (pc))) ++ (if_then_else (ne (match_operand:SI 2 "nonimmediate_operand" "0,0") ++ (const_int 1)) ++ (label_ref (match_operand 1 "" "")) ++ (pc))) + (set (match_operand:SI 0 "nonimmediate_operand" "=a,m") +- (plus (match_dup 0) +- (const_int -1))) ++ (plus (match_dup 0) ++ (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END) + (clobber (match_scratch:SI 3 "=X,&r"))] + "TARGET_LOOPS && optimize" +@@ -1979,13 +1979,13 @@ + + (define_insn "loop_end" + [(set (pc) +- (if_then_else (ne (match_operand:SI 2 "register_operand" "0") +- (const_int 1)) +- (label_ref (match_operand 1 "" "")) +- (pc))) ++ (if_then_else (ne (match_operand:SI 2 "register_operand" "0") ++ (const_int 1)) ++ (label_ref (match_operand 1 "" "")) ++ (pc))) + (set (match_operand:SI 0 "register_operand" "=a") +- (plus (match_dup 0) +- (const_int -1))) ++ (plus (match_dup 0) ++ (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END)] + "TARGET_LOOPS && optimize" + { +@@ -1998,13 +1998,13 @@ + + (define_split + [(set (pc) +- (if_then_else (ne (match_operand:SI 0 "nonimmediate_operand" "") +- (const_int 1)) +- (label_ref (match_operand 1 "" "")) +- (pc))) ++ (if_then_else (ne (match_operand:SI 0 "nonimmediate_operand" "") ++ (const_int 1)) ++ (label_ref (match_operand 1 "" "")) ++ (pc))) + (set (match_operand:SI 2 "nonimmediate_operand" "") +- (plus:SI (match_dup 0) +- (const_int -1))) ++ (plus:SI (match_dup 0) ++ (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END) + (clobber (match_scratch 3))] + "TARGET_LOOPS && optimize && reload_completed" +@@ -2020,7 +2020,7 @@ + emit_move_insn (operands[0], operands[3]); + test = gen_rtx_NE (VOIDmode, operands[3], const0_rtx); + emit_jump_insn (gen_cbranchsi4 (test, operands[3], +- const0_rtx, operands[1])); ++ const0_rtx, operands[1])); + } + else + { +@@ -2034,15 +2034,15 @@ + ; operand 1 is the label to jump to at the top of the loop + (define_expand "doloop_end" + [(parallel [(set (pc) (if_then_else +- (ne (match_operand:SI 0 "" "") +- (const_int 1)) +- (label_ref (match_operand 1 "" "")) +- (pc))) +- (set (match_dup 0) +- (plus:SI (match_dup 0) +- (const_int -1))) +- (unspec [(const_int 0)] UNSPEC_LSETUP_END) +- (clobber (match_dup 2))])] ; match_scratch ++ (ne (match_operand:SI 0 "" "") ++ (const_int 1)) ++ (label_ref (match_operand 1 "" "")) ++ (pc))) ++ (set (match_dup 0) ++ (plus:SI (match_dup 0) ++ (const_int -1))) ++ (unspec [(const_int 0)] UNSPEC_LSETUP_END) ++ (clobber (match_dup 2))])] ; match_scratch + "TARGET_LOOPS && optimize" + { + /* The loop optimizer doesn't check the predicates... */ +@@ -2281,8 +2281,8 @@ + + (define_insn "call_value_internal" + [(set (match_operand 0 "register_operand" "=a") +- (call (mem (match_operand:SI 1 "call_insn_operand" "nir")) +- (match_operand 2 "" "i")))] ++ (call (mem (match_operand:SI 1 "call_insn_operand" "nir")) ++ (match_operand 2 "" "i")))] + "!SIBLING_CALL_P (insn)" + { + return xtensa_emit_call (1, operands); +@@ -2387,9 +2387,9 @@ + + (define_expand "allocate_stack" + [(set (match_operand 0 "nonimmed_operand") +- (minus (reg A1_REG) (match_operand 1 "add_operand"))) ++ (minus (reg A1_REG) (match_operand 1 "add_operand"))) + (set (reg A1_REG) +- (minus (reg A1_REG) (match_dup 1)))] ++ (minus (reg A1_REG) (match_dup 1)))] + "TARGET_WINDOWED_ABI" + { + if (CONST_INT_P (operands[1])) +@@ -2514,7 +2514,7 @@ + + (define_expand "frame_blockage" + [(set (match_dup 0) +- (unspec:BLK [(match_dup 1)] UNSPEC_FRAME_BLOCKAGE))] ++ (unspec:BLK [(match_dup 1)] UNSPEC_FRAME_BLOCKAGE))] + "" + { + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); +@@ -2524,7 +2524,7 @@ + + (define_insn "*frame_blockage" + [(set (match_operand:BLK 0 "" "") +- (unspec:BLK [(match_operand:SI 1 "" "")] UNSPEC_FRAME_BLOCKAGE))] ++ (unspec:BLK [(match_operand:SI 1 "" "")] UNSPEC_FRAME_BLOCKAGE))] + "" + "" + [(set_attr "type" "nop") +@@ -2801,7 +2801,7 @@ + (define_expand "sync_new_" + [(set (match_operand:HQI 0 "register_operand") + (ATOMIC:HQI (match_operand:HQI 1 "memory_operand") +- (match_operand:HQI 2 "register_operand"))) ++ (match_operand:HQI 2 "register_operand"))) + (set (match_dup 1) (ATOMIC:HQI (match_dup 1) (match_dup 2)))] + "TARGET_S32C1I" + { +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0059-Clean-up-xtensa_expand_prologue.patch b/patches/gcc10.1/gcc-xtensa-0059-Clean-up-xtensa_expand_prologue.patch new file mode 100644 index 0000000..8809c85 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0059-Clean-up-xtensa_expand_prologue.patch @@ -0,0 +1,42 @@ +From b1f4a90f366a3a5775f30507e2b7800ad366dcdc Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 27 Dec 2022 15:30:12 +0900 +Subject: [PATCH] xtensa: Clean up xtensa_expand_prologue + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_expand_prologue): Modify to + exit the inspection loops as soon as the necessity of stack + pointer is found. +--- + gcc/config/xtensa/xtensa.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index d3dafa4aa..d4713cd8d 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -3362,12 +3362,18 @@ xtensa_expand_prologue (void) + ref; ref = DF_REF_NEXT_REG (ref)) + if (DF_REF_CLASS (ref) == DF_REF_REGULAR + && NONJUMP_INSN_P (DF_REF_INSN (ref))) +- stack_pointer_needed = true; ++ { ++ stack_pointer_needed = true; ++ break; ++ } + /* Check if callee-saved registers really need saving to the stack. */ + if (!stack_pointer_needed) + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) + if (xtensa_call_save_reg (regno)) +- stack_pointer_needed = true; ++ { ++ stack_pointer_needed = true; ++ break; ++ } + + cfun->machine->inhibit_logues_a1_adjusts = !stack_pointer_needed; + +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0060-Change-GP_RETURN-_REG_COUNT-to-GP_RETURN_-FIR.patch b/patches/gcc10.1/gcc-xtensa-0060-Change-GP_RETURN-_REG_COUNT-to-GP_RETURN_-FIR.patch new file mode 100644 index 0000000..789c1a3 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0060-Change-GP_RETURN-_REG_COUNT-to-GP_RETURN_-FIR.patch @@ -0,0 +1,71 @@ +From e3b1e99a383cbceb2c910a3a88392f37e58daeb2 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 27 Dec 2022 15:30:12 +0900 +Subject: [PATCH] xtensa: Change GP_RETURN{,_REG_COUNT} to + GP_RETURN_{FIRST,LAST} + +gcc/ChangeLog: + + * config/xtensa/xtensa.h (GP_RETURN, GP_RETURN_REG_COUNT): + Change to GP_RETURN_FIRST and GP_RETURN_LAST, respectively. + * config/xtensa/xtensa.c (xtensa_function_value, + xtensa_libcall_value, xtensa_function_value_regno_p): Ditto. +--- + gcc/config/xtensa/xtensa.c | 10 +++++----- + gcc/config/xtensa/xtensa.h | 4 ++-- + 2 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index d4713cd8d..054a44ea3 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4561,9 +4561,9 @@ xtensa_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED, + bool outgoing) + { + return gen_rtx_REG ((INTEGRAL_TYPE_P (valtype) +- && TYPE_PRECISION (valtype) < BITS_PER_WORD) +- ? SImode : TYPE_MODE (valtype), +- outgoing ? GP_OUTGOING_RETURN : GP_RETURN); ++ && TYPE_PRECISION (valtype) < BITS_PER_WORD) ++ ? SImode : TYPE_MODE (valtype), ++ outgoing ? GP_OUTGOING_RETURN : GP_RETURN_FIRST); + } + + /* Worker function for TARGET_LIBCALL_VALUE. */ +@@ -4573,7 +4573,7 @@ xtensa_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) + { + return gen_rtx_REG ((GET_MODE_CLASS (mode) == MODE_INT + && GET_MODE_SIZE (mode) < UNITS_PER_WORD) +- ? SImode : mode, GP_RETURN); ++ ? SImode : mode, GP_RETURN_FIRST); + } + + /* Worker function TARGET_FUNCTION_VALUE_REGNO_P. */ +@@ -4581,7 +4581,7 @@ xtensa_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) + static bool + xtensa_function_value_regno_p (const unsigned int regno) + { +- return (regno >= GP_RETURN && regno < GP_RETURN + GP_RETURN_REG_COUNT); ++ return IN_RANGE (regno, GP_RETURN_FIRST, GP_RETURN_LAST); + } + + /* The static chain is passed in memory. Provide rtx giving 'mem' +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index b5fec1cb3..e3f808c42 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -475,9 +475,9 @@ enum reg_class + + /* Symbolic macros for the registers used to return integer, floating + point, and values of coprocessor and user-defined modes. */ +-#define GP_RETURN (GP_REG_FIRST + 2 + WINDOW_SIZE) ++#define GP_RETURN_FIRST (GP_REG_FIRST + 2 + WINDOW_SIZE) ++#define GP_RETURN_LAST (GP_RETURN_FIRST + 3) + #define GP_OUTGOING_RETURN (GP_REG_FIRST + 2) +-#define GP_RETURN_REG_COUNT 4 + + /* Symbolic macros for the first/last argument registers. */ + #define GP_ARG_FIRST (GP_REG_FIRST + 2) +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0061-Generate-density-instructions-in-set_frame_pt.patch b/patches/gcc10.1/gcc-xtensa-0061-Generate-density-instructions-in-set_frame_pt.patch new file mode 100644 index 0000000..5b71081 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0061-Generate-density-instructions-in-set_frame_pt.patch @@ -0,0 +1,38 @@ +From 46b7c587fe47fa73811d7cd9b453ee32f7ba8ad8 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 27 Dec 2022 15:30:12 +0900 +Subject: [PATCH] xtensa: Generate density instructions in set_frame_ptr + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (set_frame_ptr): Fix to reflect + TARGET_DENSITY. +--- + gcc/config/xtensa/xtensa.md | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 08fb6f312..06fda8aa5 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2567,12 +2567,15 @@ + "" + { + if (frame_pointer_needed) +- return "mov\ta7, sp"; ++ return (TARGET_DENSITY ? "mov.n\ta7, sp" : "mov\ta7, sp"); + return ""; + } + [(set_attr "type" "move") + (set_attr "mode" "SI") +- (set_attr "length" "3")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 2) ++ (const_int 3)))]) + + ;; Post-reload splitter to remove fp assignment when it's not needed. + (define_split +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0062-use-define_c_enums-instead-of-define_constant.patch b/patches/gcc10.1/gcc-xtensa-0062-use-define_c_enums-instead-of-define_constant.patch new file mode 100644 index 0000000..57976f3 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0062-use-define_c_enums-instead-of-define_constant.patch @@ -0,0 +1,77 @@ +From 101c49b504fb567227291a381ada09273d8ec4a7 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Fri, 23 Dec 2022 12:17:09 -0800 +Subject: [PATCH] gcc: xtensa: use define_c_enums instead of + define_constants + +This improves RTL dumps readability. No functional changes. + +gcc/ + * config/xtensa/xtensa.md (unspec): Extract UNSPEC_* constants + into this enum. + (unspecv): Extract UNSPECV_* constants into this enum. +--- + gcc/config/xtensa/xtensa.md | 46 ++++++++++++++++++++----------------- + 1 file changed, 25 insertions(+), 21 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 06fda8aa5..a2cfb3df7 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -25,28 +25,32 @@ + (A7_REG 7) + (A8_REG 8) + (A9_REG 9) ++]) ++ ++(define_c_enum "unspec" [ ++ UNSPEC_NOP ++ UNSPEC_PLT ++ UNSPEC_RET_ADDR ++ UNSPEC_TPOFF ++ UNSPEC_DTPOFF ++ UNSPEC_TLS_FUNC ++ UNSPEC_TLS_ARG ++ UNSPEC_TLS_CALL ++ UNSPEC_TP ++ UNSPEC_MEMW ++ UNSPEC_LSETUP_START ++ UNSPEC_LSETUP_END ++ UNSPEC_FRAME_BLOCKAGE ++]) + +- (UNSPEC_NOP 2) +- (UNSPEC_PLT 3) +- (UNSPEC_RET_ADDR 4) +- (UNSPEC_TPOFF 5) +- (UNSPEC_DTPOFF 6) +- (UNSPEC_TLS_FUNC 7) +- (UNSPEC_TLS_ARG 8) +- (UNSPEC_TLS_CALL 9) +- (UNSPEC_TP 10) +- (UNSPEC_MEMW 11) +- (UNSPEC_LSETUP_START 12) +- (UNSPEC_LSETUP_END 13) +- (UNSPEC_FRAME_BLOCKAGE 14) +- +- (UNSPECV_SET_FP 1) +- (UNSPECV_ENTRY 2) +- (UNSPECV_S32RI 4) +- (UNSPECV_S32C1I 5) +- (UNSPECV_EH_RETURN 6) +- (UNSPECV_SET_TP 7) +- (UNSPECV_BLOCKAGE 8) ++(define_c_enum "unspecv" [ ++ UNSPECV_SET_FP ++ UNSPECV_ENTRY ++ UNSPECV_S32RI ++ UNSPECV_S32C1I ++ UNSPECV_EH_RETURN ++ UNSPECV_SET_TP ++ UNSPECV_BLOCKAGE + ]) + + ;; This code iterator allows signed and unsigned widening multiplications +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0063-Check-DF-availability-before-use.patch b/patches/gcc10.1/gcc-xtensa-0063-Check-DF-availability-before-use.patch new file mode 100644 index 0000000..afa8d82 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0063-Check-DF-availability-before-use.patch @@ -0,0 +1,31 @@ +From 4b938a83c19e3e7dc71b407e2f78f2ccbc57b742 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 29 Dec 2022 21:14:33 +0900 +Subject: [PATCH] xtensa: Check DF availability before use + +Perhaps no problem, but for safety. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_expand_prologue): Fix to check + DF availability before use of DF_* macros. +--- + gcc/config/xtensa/xtensa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 054a44ea3..8f748efa4 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -3357,7 +3357,7 @@ xtensa_expand_prologue (void) + || crtl->calls_eh_return; + + /* Check if the function body really needs the stack pointer. */ +- if (!stack_pointer_needed) ++ if (!stack_pointer_needed && df) + for (ref = DF_REG_USE_CHAIN (A1_REG); + ref; ref = DF_REF_NEXT_REG (ref)) + if (DF_REF_CLASS (ref) == DF_REF_REGULAR +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0064-use-GP_RETURN_-instead-of-magic-constant.patch b/patches/gcc10.1/gcc-xtensa-0064-use-GP_RETURN_-instead-of-magic-constant.patch new file mode 100644 index 0000000..93f3399 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0064-use-GP_RETURN_-instead-of-magic-constant.patch @@ -0,0 +1,28 @@ +From 4433ebb185be476704937b3a8b3fe5e568c22712 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Wed, 28 Dec 2022 11:27:21 -0800 +Subject: [PATCH] gcc: xtensa: use GP_RETURN_* instead of magic constant + +gcc/ + * config/xtensa/xtensa.c (xtensa_return_in_memory): Use + GP_RETURN_* instead of magic constant. +--- + gcc/config/xtensa/xtensa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 8f748efa4..ffd36217d 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4551,7 +4551,7 @@ static bool + xtensa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) + { + return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type) +- > 4 * UNITS_PER_WORD); ++ > (unsigned) (GP_RETURN_LAST - GP_RETURN_FIRST + 1) * UNITS_PER_WORD); + } + + /* Worker function for TARGET_FUNCTION_VALUE. */ +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0065-Optimize-stack-frame-adjustment-more.patch b/patches/gcc10.1/gcc-xtensa-0065-Optimize-stack-frame-adjustment-more.patch new file mode 100644 index 0000000..2964a2b --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0065-Optimize-stack-frame-adjustment-more.patch @@ -0,0 +1,245 @@ +From 385f165d92e68a58206005e9652e8e880a2ed2d7 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 28 Dec 2022 22:50:52 +0900 +Subject: [PATCH] xtensa: Optimize stack frame adjustment more + +This patch introduces a convenient helper function for integer immediate +addition with scratch register as needed, that splits and emits either +up to two ADDI/ADDMI machine instructions or an addition by register +following an integer immediate load (which may later be transformed by +constantsynth). + +By using the helper function, it makes stack frame adjustment logic +simplified and instruction count less in some cases. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c + (xtensa_split_imm_two_addends, xtensa_emit_add_imm): + New helper functions. + (xtensa_set_return_address, xtensa_output_mi_thunk): + Change to use the helper function. + (xtensa_emit_adjust_stack_ptr): Ditto. + And also change to try reusing the content of scratch register + A9 if the register is not modified in the function body. +--- + gcc/config/xtensa/xtensa.c | 151 ++++++++++++++++++++++++++----------- + 1 file changed, 106 insertions(+), 45 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index ffd36217d..b05ae9045 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -104,6 +104,7 @@ struct GTY(()) machine_function + bool frame_laid_out; + bool epilogue_done; + bool inhibit_logues_a1_adjusts; ++ rtx last_logues_a9_content; + }; + + /* Vector, indexed by hard register number, which contains 1 for a +@@ -2509,6 +2510,86 @@ xtensa_split_DI_reg_imm (rtx *operands) + } + + ++/* Try to split an integer value into what are suitable for two consecutive ++ immediate addition instructions, ADDI or ADDMI. */ ++ ++static bool ++xtensa_split_imm_two_addends (HOST_WIDE_INT imm, HOST_WIDE_INT v[2]) ++{ ++ HOST_WIDE_INT v0, v1; ++ ++ if (imm < -32768) ++ v0 = -32768, v1 = imm + 32768; ++ else if (imm > 32512) ++ v0 = 32512, v1 = imm - 32512; ++ else if (TARGET_DENSITY && xtensa_simm12b (imm)) ++ /* A pair of MOVI(.N) and ADD.N is one or two bytes less than two ++ immediate additions if TARGET_DENSITY. */ ++ return false; ++ else ++ v0 = (imm + 128) & ~255L, v1 = imm - v0; ++ ++ if (xtensa_simm8 (v1) || xtensa_simm8x256 (v1)) ++ { ++ v[0] = v0, v[1] = v1; ++ return true; ++ } ++ ++ return false; ++} ++ ++ ++/* Helper function for integer immediate addition with scratch register ++ as needed, that splits and emits either up to two ADDI/ADDMI machine ++ instructions or an addition by register following an integer immediate ++ load (which may later be transformed by constantsynth). ++ ++ If 'scratch' is NULL_RTX but still needed, a new pseudo-register will ++ be allocated. Thus, after the reload/LRA pass, the specified scratch ++ register must be a hard one. */ ++ ++static bool ++xtensa_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch, ++ bool need_note) ++{ ++ bool retval = false; ++ HOST_WIDE_INT v[2]; ++ rtx_insn *insn; ++ ++ if (imm == 0) ++ return false; ++ ++ if (xtensa_simm8 (imm) || xtensa_simm8x256 (imm)) ++ insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm))); ++ else if (xtensa_split_imm_two_addends (imm, v)) ++ { ++ if (!scratch) ++ scratch = gen_reg_rtx (SImode); ++ emit_insn (gen_addsi3 (scratch, src, GEN_INT (v[0]))); ++ insn = emit_insn (gen_addsi3 (dst, scratch, GEN_INT (v[1]))); ++ } ++ else ++ { ++ if (scratch) ++ emit_move_insn (scratch, GEN_INT (imm)); ++ else ++ scratch = force_reg (SImode, GEN_INT (imm)); ++ retval = true; ++ insn = emit_insn (gen_addsi3 (dst, src, scratch)); ++ } ++ ++ if (need_note) ++ { ++ rtx note_rtx = gen_rtx_SET (dst, plus_constant (Pmode, src, imm)); ++ ++ RTX_FRAME_RELATED_P (insn) = 1; ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); ++ } ++ ++ return retval; ++} ++ ++ + /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ + + static bool +@@ -3280,41 +3361,33 @@ xtensa_initial_elimination_offset (int from, int to ATTRIBUTE_UNUSED) + static void + xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int flags) + { ++ rtx src, scratch; + rtx_insn *insn; +- rtx ptr = (flags & ADJUST_SP_FRAME_PTR) ? hard_frame_pointer_rtx +- : stack_pointer_rtx; + + if (cfun->machine->inhibit_logues_a1_adjusts) + return; + +- if (xtensa_simm8 (offset) +- || xtensa_simm8x256 (offset)) +- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, GEN_INT (offset))); +- else +- { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); ++ src = (flags & ADJUST_SP_FRAME_PTR) ++ ? hard_frame_pointer_rtx : stack_pointer_rtx; ++ scratch = gen_rtx_REG (Pmode, A9_REG); + +- if (offset < 0) +- { +- emit_move_insn (tmp_reg, GEN_INT (-offset)); +- insn = emit_insn (gen_subsi3 (stack_pointer_rtx, ptr, tmp_reg)); +- } +- else +- { +- emit_move_insn (tmp_reg, GEN_INT (offset)); +- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, tmp_reg)); +- } +- } +- +- if (flags & ADJUST_SP_NEED_NOTE) ++ if (df && DF_REG_DEF_COUNT (A9_REG) == 0 ++ && cfun->machine->last_logues_a9_content ++ && -INTVAL (cfun->machine->last_logues_a9_content) == offset) + { +- rtx note_rtx = gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- offset)); ++ insn = emit_insn (gen_subsi3 (stack_pointer_rtx, src, scratch)); ++ if (flags & ADJUST_SP_NEED_NOTE) ++ { ++ rtx note_rtx = gen_rtx_SET (stack_pointer_rtx, ++ plus_constant (Pmode, src, offset)); + +- RTX_FRAME_RELATED_P (insn) = 1; +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); ++ } + } ++ else if (xtensa_emit_add_imm (stack_pointer_rtx, src, offset, scratch, ++ (flags & ADJUST_SP_NEED_NOTE))) ++ cfun->machine->last_logues_a9_content = GEN_INT (offset); + } + + /* minimum frame = reg save area (4 words) plus static chain (1 word) +@@ -3342,8 +3415,9 @@ xtensa_expand_prologue (void) + /* Use a8 as a temporary since a0-a7 may be live. */ + rtx tmp_reg = gen_rtx_REG (Pmode, A8_REG); + emit_insn (gen_entry (GEN_INT (MIN_FRAME_SIZE))); +- emit_move_insn (tmp_reg, GEN_INT (total_size - MIN_FRAME_SIZE)); +- emit_insn (gen_subsi3 (tmp_reg, stack_pointer_rtx, tmp_reg)); ++ xtensa_emit_add_imm (tmp_reg, stack_pointer_rtx, ++ MIN_FRAME_SIZE - total_size, ++ tmp_reg, false); + insn = emit_insn (gen_movsi (stack_pointer_rtx, tmp_reg)); + } + } +@@ -3575,8 +3649,8 @@ xtensa_set_return_address (rtx address, rtx scratch) + + if (total_size > 1024) + { +- emit_move_insn (scratch, GEN_INT (total_size - UNITS_PER_WORD)); +- emit_insn (gen_addsi3 (scratch, frame, scratch)); ++ xtensa_emit_add_imm (scratch, frame, total_size - UNITS_PER_WORD, ++ scratch, false); + a0_addr = scratch; + } + +@@ -5125,15 +5199,7 @@ xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + this_rtx = gen_rtx_REG (Pmode, A0_REG + this_reg_no); + + if (delta) +- { +- if (xtensa_simm8 (delta)) +- emit_insn (gen_addsi3 (this_rtx, this_rtx, GEN_INT (delta))); +- else +- { +- emit_move_insn (temp0, GEN_INT (delta)); +- emit_insn (gen_addsi3 (this_rtx, this_rtx, temp0)); +- } +- } ++ xtensa_emit_add_imm (this_rtx, this_rtx, delta, temp0, false); + + if (vcall_offset) + { +@@ -5143,13 +5209,8 @@ xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + emit_move_insn (temp0, gen_rtx_MEM (Pmode, this_rtx)); + if (xtensa_uimm8x4 (vcall_offset)) + addr = plus_constant (Pmode, temp0, vcall_offset); +- else if (xtensa_simm8 (vcall_offset)) +- emit_insn (gen_addsi3 (temp1, temp0, GEN_INT (vcall_offset))); + else +- { +- emit_move_insn (temp1, GEN_INT (vcall_offset)); +- emit_insn (gen_addsi3 (temp1, temp0, temp1)); +- } ++ xtensa_emit_add_imm (temp1, temp0, vcall_offset, temp1, false); + emit_move_insn (temp1, gen_rtx_MEM (Pmode, addr)); + emit_insn (gen_add2_insn (this_rtx, temp1)); + } +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0066-Optimize-bitwise-splicing-operation.patch b/patches/gcc10.1/gcc-xtensa-0066-Optimize-bitwise-splicing-operation.patch new file mode 100644 index 0000000..193de88 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0066-Optimize-bitwise-splicing-operation.patch @@ -0,0 +1,84 @@ +From fef84d1ba0cb5956687f776b22f51d9fa5e7d176 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 6 Jan 2023 14:08:06 +0900 +Subject: [PATCH] xtensa: Optimize bitwise splicing operation + +This patch optimizes the operation of cutting and splicing two register +values at a specified bit position, in other words, combining (bitwise +ORing) bits 0 through (C-1) of the register with bits C through 31 +of the other, where C is the specified immediate integer 17 through 31. + +This typically applies to signed copy of floating point number and +__builtin_return_address() if the windowed register ABI, and saves one +instruction compared to four shifts and a bitwise OR by the default RTL +combination pass. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*splice_bits): + New insn_and_split pattern. +--- + gcc/config/xtensa/xtensa.md | 47 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 47 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index a2cfb3df7..ba1c044c4 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -746,6 +746,53 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + ++(define_insn_and_split "*splice_bits" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 3 "const_int_operand" "i")) ++ (and:SI (match_operand:SI 2 "register_operand" "r") ++ (match_operand:SI 4 "const_int_operand" "i"))))] ++ ++ "!optimize_debug && optimize ++ && INTVAL (operands[3]) + INTVAL (operands[4]) == -1 ++ && (exact_log2 (INTVAL (operands[3]) + 1) > 16 ++ || exact_log2 (INTVAL (operands[4]) + 1) > 16)" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 5) ++ (ashift:SI (match_dup 1) ++ (match_dup 4))) ++ (set (match_dup 6) ++ (lshiftrt:SI (match_dup 2) ++ (match_dup 3))) ++ (set (match_dup 0) ++ (ior:SI (lshiftrt:SI (match_dup 5) ++ (match_dup 4)) ++ (ashift:SI (match_dup 6) ++ (match_dup 3))))] ++{ ++ int shift; ++ if (INTVAL (operands[3]) < 0) ++ { ++ rtx x; ++ x = operands[1], operands[1] = operands[2], operands[2] = x; ++ x = operands[3], operands[3] = operands[4], operands[4] = x; ++ } ++ shift = floor_log2 (INTVAL (operands[3]) + 1); ++ operands[3] = GEN_INT (shift); ++ operands[4] = GEN_INT (32 - shift); ++ operands[5] = gen_reg_rtx (SImode); ++ operands[6] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && (INTVAL (operands[3]) == 0x7FFFFFFF ++ || INTVAL (operands[4]) == 0x7FFFFFFF)") ++ (const_int 11) ++ (const_int 12)))]) ++ + + ;; Zero-extend instructions. + +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0067-Make-instruction-cost-estimation-for-size-mor.patch b/patches/gcc10.1/gcc-xtensa-0067-Make-instruction-cost-estimation-for-size-mor.patch new file mode 100644 index 0000000..a146e25 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0067-Make-instruction-cost-estimation-for-size-mor.patch @@ -0,0 +1,85 @@ +From 32f3873104faa4323d7db85262145b7895824e4a Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 10 Jan 2023 01:44:09 +0900 +Subject: [PATCH] xtensa: Make instruction cost estimation for size more + accurate + +Until now, we applied COSTS_N_INSNS() (multiplying by 4) after dividing +the instruction length by 3, so we couldn't express the difference less +than modulo 3 in insn cost for size (e.g. 11 Bytes and 12 bytes cost the +same). + +This patch fixes that. + +;; 2 bytes +addi.n a2, a2, -1 ; cost 3 + +;; 3 bytes +addmi a2, a2, 1024 ; cost 4 + +;; 4 bytes +movi.n a3, 80 ; cost 5 +bnez.n a2, a3, .L4 + +;; 5 bytes +srli a2, a3, 1 ; cost 7 +add.n a2, a2, a2 + +;; 6 bytes +ssai 8 ; cost 8 +src a4, a2, a3 + +:: 3 + 4 bytes +l32r a2, .L5 ; cost 9 + +;; 11 bytes ; cost 15 +;; 12 bytes ; cost 16 + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_insn_cost): + Let insn cost for size be obtained by applying COSTS_N_INSNS() + to instruction length and then dividing by 3. +--- + gcc/config/xtensa/xtensa.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index b05ae9045..e0adf069e 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4554,13 +4554,15 @@ xtensa_insn_cost (rtx_insn *insn, bool speed) + { + if (!(recog_memoized (insn) < 0)) + { +- int len = get_attr_length (insn), n = (len + 2) / 3; ++ int len = get_attr_length (insn); + + if (len == 0) + return COSTS_N_INSNS (0); + + if (speed) /* For speed cost. */ + { ++ int n = (len + 2) / 3; ++ + /* "L32R" may be particular slow (implementation-dependent). */ + if (xtensa_is_insn_L32R_p (insn)) + return COSTS_N_INSNS (1 + xtensa_extra_l32r_costs); +@@ -4607,10 +4609,11 @@ xtensa_insn_cost (rtx_insn *insn, bool speed) + { + /* "L32R" itself plus constant in litpool. */ + if (xtensa_is_insn_L32R_p (insn)) +- return COSTS_N_INSNS (2) + 1; ++ len = 3 + 4; + +- /* Consider ".n" short instructions. */ +- return COSTS_N_INSNS (n) - (n * 3 - len); ++ /* Consider fractional instruction length (for example, ".n" ++ short instructions or "L32R" litpool constants. */ ++ return (COSTS_N_INSNS (len) + 1) / 3; + } + } + } +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0068-Tune-btrue-insn-pattern.patch b/patches/gcc10.1/gcc-xtensa-0068-Tune-btrue-insn-pattern.patch new file mode 100644 index 0000000..40caadd --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0068-Tune-btrue-insn-pattern.patch @@ -0,0 +1,55 @@ +From 5fe437012eb770e8fc2d2d9f859110e5cc707fc5 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 11 Jan 2023 19:26:03 +0900 +Subject: [PATCH] xtensa: Tune "*btrue" insn pattern + +This branch instruction has short encoding if EQ/NE comparison against +immediate zero when the Code Density Option is enabled, but its "length" +attribute was only for normal encoding. This patch fixes it. + +This patch also prevents undesireable replacement the comparison immediate +zero of the instruction (short encoding, as mentioned above) with a +register that has value of zero (normal encoding) by the postreload pass. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*btrue): + Correct value of the attribute "length" that depends on + TARGET_DENSITY and operands, and add '?' character to the register + constraint of the compared operand. +--- + gcc/config/xtensa/xtensa.md | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index ba1c044c4..4b0b74368 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1684,7 +1684,7 @@ + [(set (pc) + (if_then_else (match_operator 3 "branch_operator" + [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "branch_operand" "K,r")]) ++ (match_operand:SI 1 "branch_operand" "K,?r")]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" +@@ -1693,7 +1693,14 @@ + } + [(set_attr "type" "jump,jump") + (set_attr "mode" "none") +- (set_attr "length" "3,3")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && CONST_INT_P (operands[1]) ++ && INTVAL (operands[1]) == 0 ++ && (GET_CODE (operands[3]) == EQ ++ || GET_CODE (operands[3]) == NE)") ++ (const_int 2) ++ (const_int 3)))]) + + (define_insn "*ubtrue" + [(set (pc) +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0069-Optimize-ctzsi2-and-ffssi2-a-bit.patch b/patches/gcc10.1/gcc-xtensa-0069-Optimize-ctzsi2-and-ffssi2-a-bit.patch new file mode 100644 index 0000000..017244c --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0069-Optimize-ctzsi2-and-ffssi2-a-bit.patch @@ -0,0 +1,44 @@ +From c7111ec38b7a1825f759804b021afc7b7b5b7491 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 12 Jan 2023 10:01:01 +0900 +Subject: [PATCH] xtensa: Optimize ctzsi2 and ffssi2 a bit + +This patch saves one byte when the Code Density Option is enabled, + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (ctzsi2, ffssi2): + Rearrange the emitting codes. +--- + gcc/config/xtensa/xtensa.md | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 4b0b74368..4f1e8fd13 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -477,8 +477,8 @@ + emit_insn (gen_negsi2 (temp, operands[1])); + emit_insn (gen_andsi3 (temp, temp, operands[1])); + emit_insn (gen_clzsi2 (temp, temp)); +- emit_insn (gen_negsi2 (temp, temp)); +- emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (31))); ++ emit_move_insn (operands[0], GEN_INT (31)); ++ emit_insn (gen_subsi3 (operands[0], operands[0], temp)); + DONE; + }) + +@@ -491,8 +491,8 @@ + emit_insn (gen_negsi2 (temp, operands[1])); + emit_insn (gen_andsi3 (temp, temp, operands[1])); + emit_insn (gen_clzsi2 (temp, temp)); +- emit_insn (gen_negsi2 (temp, temp)); +- emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (32))); ++ emit_move_insn (operands[0], GEN_INT (32)); ++ emit_insn (gen_subsi3 (operands[0], operands[0], temp)); + DONE; + }) + +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0070-Remove-old-broken-tweak-for-leaf-function.patch b/patches/gcc10.1/gcc-xtensa-0070-Remove-old-broken-tweak-for-leaf-function.patch new file mode 100644 index 0000000..e182227 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0070-Remove-old-broken-tweak-for-leaf-function.patch @@ -0,0 +1,218 @@ +From 70feb8960c923e914f1e0bf8e7eae96300c708a2 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sat, 14 Jan 2023 04:31:46 +0900 +Subject: [PATCH] xtensa: Remove old broken tweak for leaf function + +In the before-IRA era, ORDER_REGS_FOR_LOCAL_ALLOC was called for each +function in Xtensa, and there was register allocation table reordering +for leaf functions to compensate for the poor performance of local-alloc. + +Today the adjustment hook is still called via its alternative +ADJUST_REG_ALLOC_ORDER, but it is only called once at the start of the IRA, +and leaf_function_p() erroneously returns true and also gives no argument +count. + +That straightforwardly misleads register allocation that all functions are +always leaves with no arguments, which leads to inefficiencies in allocation +results. + +Fortunately, IRA is smart enough than local-alloc to not need such assistance. + +This patch does away with the antiquated by removing the wreckage that no +longer works. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h (order_regs_for_local_alloc): + Rename to xtensa_adjust_reg_alloc_order. + * config/xtensa/xtensa.c (xtensa_adjust_reg_alloc_order): + Ditto. And also remove code to reorder register numbers for + leaf functions, rename the tables, and adjust the allocation + order for the call0 ABI to use register A0 more. + (xtensa_leaf_regs): Remove. + * config/xtensa/xtensa.h (REG_ALLOC_ORDER): Cosmetics. + (order_regs_for_local_alloc): Rename as the above. + (LEAF_REGISTERS, LEAF_REG_REMAP, leaf_function): Remove. +--- + gcc/config/xtensa/xtensa-protos.h | 2 +- + gcc/config/xtensa/xtensa.c | 77 +++++++------------------------ + gcc/config/xtensa/xtensa.h | 51 ++++++-------------- + 3 files changed, 31 insertions(+), 99 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 63b147a90..39d5a5825 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -78,7 +78,7 @@ extern long compute_frame_size (poly_int64); + extern bool xtensa_use_return_instruction_p (void); + extern void xtensa_expand_prologue (void); + extern void xtensa_expand_epilogue (bool); +-extern void order_regs_for_local_alloc (void); ++extern void xtensa_adjust_reg_alloc_order (void); + extern enum reg_class xtensa_regno_to_class (int regno); + extern HOST_WIDE_INT xtensa_initial_elimination_offset (int from, int to); + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index e0adf069e..db7ac3599 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -107,18 +107,6 @@ struct GTY(()) machine_function + rtx last_logues_a9_content; + }; + +-/* Vector, indexed by hard register number, which contains 1 for a +- register that is allowable in a candidate for leaf function +- treatment. */ +- +-const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] = +-{ +- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +- 1, 1, 1, +- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +- 1 +-}; +- + static void xtensa_option_override (void); + static enum internal_test map_test_to_internal_test (enum rtx_code); + static rtx gen_int_relational (enum rtx_code, rtx, rtx); +@@ -4175,58 +4163,25 @@ xtensa_secondary_reload (bool in_p, rtx x, reg_class_t rclass, + return NO_REGS; + } + ++/* Called once at the start of IRA, by ADJUST_REG_ALLOC_ORDER. */ + + void +-order_regs_for_local_alloc (void) ++xtensa_adjust_reg_alloc_order (void) + { +- if (!leaf_function_p ()) +- { +- static const int reg_nonleaf_alloc_order[FIRST_PSEUDO_REGISTER] = +- REG_ALLOC_ORDER; +- static const int reg_nonleaf_alloc_order_call0[FIRST_PSEUDO_REGISTER] = +- { +- 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 12, 13, 14, 15, +- 18, +- 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, +- 0, 1, 16, 17, +- 35, +- }; +- +- memcpy (reg_alloc_order, TARGET_WINDOWED_ABI ? +- reg_nonleaf_alloc_order : reg_nonleaf_alloc_order_call0, +- FIRST_PSEUDO_REGISTER * sizeof (int)); +- } +- else +- { +- int i, num_arg_regs; +- int nxt = 0; +- +- /* Use the AR registers in increasing order (skipping a0 and a1) +- but save the incoming argument registers for a last resort. */ +- num_arg_regs = crtl->args.info.arg_words; +- if (num_arg_regs > MAX_ARGS_IN_REGISTERS) +- num_arg_regs = MAX_ARGS_IN_REGISTERS; +- for (i = GP_ARG_FIRST; i < 16 - num_arg_regs; i++) +- reg_alloc_order[nxt++] = i + num_arg_regs; +- for (i = 0; i < num_arg_regs; i++) +- reg_alloc_order[nxt++] = GP_ARG_FIRST + i; +- +- /* List the coprocessor registers in order. */ +- for (i = 0; i < BR_REG_NUM; i++) +- reg_alloc_order[nxt++] = BR_REG_FIRST + i; +- +- /* List the FP registers in order for now. */ +- for (i = 0; i < 16; i++) +- reg_alloc_order[nxt++] = FP_REG_FIRST + i; +- +- /* GCC requires that we list *all* the registers.... */ +- reg_alloc_order[nxt++] = 0; /* a0 = return address */ +- reg_alloc_order[nxt++] = 1; /* a1 = stack pointer */ +- reg_alloc_order[nxt++] = 16; /* pseudo frame pointer */ +- reg_alloc_order[nxt++] = 17; /* pseudo arg pointer */ +- +- reg_alloc_order[nxt++] = ACC_REG_FIRST; /* MAC16 accumulator */ +- } ++ static const int reg_windowed_alloc_order[FIRST_PSEUDO_REGISTER] = ++ REG_ALLOC_ORDER; ++ static const int reg_call0_alloc_order[FIRST_PSEUDO_REGISTER] = ++ { ++ 9, 10, 11, 7, 6, 5, 4, 3, 2, 8, 0, 12, 13, 14, 15, ++ 18, ++ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, ++ 1, 16, 17, ++ 35, ++ }; ++ ++ memcpy (reg_alloc_order, TARGET_WINDOWED_ABI ? ++ reg_windowed_alloc_order : reg_call0_alloc_order, ++ FIRST_PSEUDO_REGISTER * sizeof (int)); + } + + +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index e3f808c42..ef7f9e5d5 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -249,44 +249,21 @@ along with GCC; see the file COPYING3. If not see + 1, \ + } + +-/* For non-leaf procedures on Xtensa processors, the allocation order +- is as specified below by REG_ALLOC_ORDER. For leaf procedures, we +- want to use the lowest numbered registers first to minimize +- register window overflows. However, local-alloc is not smart +- enough to consider conflicts with incoming arguments. If an +- incoming argument in a2 is live throughout the function and +- local-alloc decides to use a2, then the incoming argument must +- either be spilled or copied to another register. To get around +- this, we define ADJUST_REG_ALLOC_ORDER to redefine +- reg_alloc_order for leaf functions such that lowest numbered +- registers are used first with the exception that the incoming +- argument registers are not used until after other register choices +- have been exhausted. */ +- +-#define REG_ALLOC_ORDER \ +-{ 8, 9, 10, 11, 12, 13, 14, 15, 7, 6, 5, 4, 3, 2, \ +- 18, \ +- 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, \ +- 0, 1, 16, 17, \ +- 35, \ +-} +- +-#define ADJUST_REG_ALLOC_ORDER order_regs_for_local_alloc () +- +-/* For Xtensa, the only point of this is to prevent GCC from otherwise +- giving preference to call-used registers. To minimize window +- overflows for the AR registers, we want to give preference to the +- lower-numbered AR registers. For other register files, which are +- not windowed, we still prefer call-used registers, if there are any. */ +-extern const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER]; +-#define LEAF_REGISTERS xtensa_leaf_regs ++/* For the windowed register ABI on Xtensa processors, the allocation ++ order is as specified below by REG_ALLOC_ORDER. ++ For the call0 ABI, on the other hand, ADJUST_REG_ALLOC_ORDER hook ++ will be called once at the start of IRA, replacing it with the ++ appropriate one. */ + +-/* For Xtensa, no remapping is necessary, but this macro must be +- defined if LEAF_REGISTERS is defined. */ +-#define LEAF_REG_REMAP(REGNO) (REGNO) +- +-/* This must be declared if LEAF_REGISTERS is set. */ +-extern int leaf_function; ++#define REG_ALLOC_ORDER \ ++{ \ ++ 8, 9, 10, 11, 12, 13, 14, 15, 7, 6, 5, 4, 3, 2, \ ++ 18, \ ++ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, \ ++ 0, 1, 16, 17, \ ++ 35, \ ++} ++#define ADJUST_REG_ALLOC_ORDER xtensa_adjust_reg_alloc_order () + + /* Internal macros to classify a register number. */ + +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0071-Optimize-inversion-of-the-MSB.patch b/patches/gcc10.1/gcc-xtensa-0071-Optimize-inversion-of-the-MSB.patch new file mode 100644 index 0000000..323b830 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0071-Optimize-inversion-of-the-MSB.patch @@ -0,0 +1,59 @@ +From 97538d16c11c17764aab63695ce3b5275fd50d56 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 18 Jan 2023 09:53:38 +0900 +Subject: [PATCH] xtensa: Optimize inversion of the MSB + +Such operation can be done either bitwise-XOR or addition with -2147483648, +but the latter is one byte less if TARGET_DENSITY. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (xorsi3_internal): + Rename from the original of "xorsi3". + (xorsi3): New expansion pattern that emits addition rather than + bitwise-XOR when the second source is a constant of -2147483648 + if TARGET_DENSITY. +--- + gcc/config/xtensa/xtensa.md | 26 +++++++++++++++++++++++++- + 1 file changed, 25 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 4f1e8fd13..c6a299cc1 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -736,7 +736,31 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +-(define_insn "xorsi3" ++(define_expand "xorsi3" ++ [(set (match_operand:SI 0 "register_operand") ++ (xor:SI (match_operand:SI 1 "register_operand") ++ (match_operand:SI 2 "nonmemory_operand")))] ++ "" ++{ ++ if (register_operand (operands[2], SImode)) ++ emit_insn (gen_xorsi3_internal (operands[0], operands[1], ++ operands[2])); ++ else ++ { ++ rtx (*gen_op)(rtx, rtx, rtx); ++ if (TARGET_DENSITY ++ && CONST_INT_P (operands[2]) ++ && INTVAL (operands[2]) == -2147483648L) ++ gen_op = gen_addsi3; ++ else ++ gen_op = gen_xorsi3_internal; ++ emit_insn (gen_op (operands[0], operands[1], ++ force_reg (SImode, operands[2]))); ++ } ++ DONE; ++}) ++ ++(define_insn "xorsi3_internal" + [(set (match_operand:SI 0 "register_operand" "=a") + (xor:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" "r")))] +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0072-Revise-complex-hard-register-clobber-eliminat.patch b/patches/gcc10.1/gcc-xtensa-0072-Revise-complex-hard-register-clobber-eliminat.patch new file mode 100644 index 0000000..157876b --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0072-Revise-complex-hard-register-clobber-eliminat.patch @@ -0,0 +1,112 @@ +From 91b14e1f0de9a690b6c3b411d1c2706e05063977 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 20 Jan 2023 08:30:01 +0900 +Subject: [PATCH] xtensa: Revise complex hard register clobber elimination + +In the previously posted patch +"xtensa: Make complex hard register clobber elimination more robust and accurate", +the check code for insns that refer to the [DS]Cmode hard register before +it is overwritten after it is clobbered is incomplete. Fortunately such +insns are seldom emitted, so it didn't matter. + +This patch fixes that for the sake of completeness. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: + Fix exit from loops detecting references before overwriting in the + split pattern. +--- + gcc/config/xtensa/xtensa.md | 72 +++++++++++++++++++------------------ + 1 file changed, 37 insertions(+), 35 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index c6a299cc1..4d976ece5 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2978,45 +2978,47 @@ + { + auto_sbitmap bmp (FIRST_PSEUDO_REGISTER); + rtx_insn *insn; +- rtx reg = gen_rtx_REG (SImode, 0); ++ rtx reg = gen_rtx_REG (SImode, 0), dest; ++ unsigned int regno; ++ sbitmap_iterator iter; + bitmap_set_range (bmp, REGNO (operands[0]), REG_NREGS (operands[0])); + for (insn = next_nonnote_nondebug_insn_bb (curr_insn); + insn; insn = next_nonnote_nondebug_insn_bb (insn)) +- { +- sbitmap_iterator iter; +- unsigned int regno; +- if (NONJUMP_INSN_P (insn)) +- { +- EXECUTE_IF_SET_IN_BITMAP (bmp, 2, regno, iter) +- { +- set_regno_raw (reg, regno, REG_NREGS (reg)); +- if (reg_overlap_mentioned_p (reg, PATTERN (insn))) +- break; +- } +- if (GET_CODE (PATTERN (insn)) == SET) +- { +- rtx x = SET_DEST (PATTERN (insn)); +- if (REG_P (x) && HARD_REGISTER_P (x)) +- bitmap_clear_range (bmp, REGNO (x), REG_NREGS (x)); +- else if (SUBREG_P (x) && HARD_REGISTER_P (SUBREG_REG (x))) +- { +- struct subreg_info info; +- subreg_get_info (regno = REGNO (SUBREG_REG (x)), +- GET_MODE (SUBREG_REG (x)), +- SUBREG_BYTE (x), GET_MODE (x), &info); +- if (!info.representable_p) +- break; +- bitmap_clear_range (bmp, regno + info.offset, info.nregs); +- } +- } +- if (bitmap_empty_p (bmp)) +- goto FALLTHRU; +- } +- else if (CALL_P (insn)) ++ if (NONJUMP_INSN_P (insn)) ++ { + EXECUTE_IF_SET_IN_BITMAP (bmp, 2, regno, iter) +- if (call_used_or_fixed_reg_p (regno)) +- break; +- } ++ { ++ set_regno_raw (reg, regno, REG_NREGS (reg)); ++ if (reg_referenced_p (reg, PATTERN (insn))) ++ goto ABORT; ++ } ++ if (GET_CODE (PATTERN (insn)) == SET ++ || GET_CODE (PATTERN (insn)) == CLOBBER) ++ { ++ dest = SET_DEST (PATTERN (insn)); ++ if (REG_P (dest) && HARD_REGISTER_P (dest)) ++ bitmap_clear_range (bmp, REGNO (dest), REG_NREGS (dest)); ++ else if (SUBREG_P (dest) ++ && HARD_REGISTER_P (SUBREG_REG (dest))) ++ { ++ struct subreg_info info; ++ subreg_get_info (regno = REGNO (SUBREG_REG (dest)), ++ GET_MODE (SUBREG_REG (dest)), ++ SUBREG_BYTE (dest), GET_MODE (dest), ++ &info); ++ if (!info.representable_p) ++ break; ++ bitmap_clear_range (bmp, regno + info.offset, info.nregs); ++ } ++ } ++ if (bitmap_empty_p (bmp)) ++ goto FALLTHRU; ++ } ++ else if (CALL_P (insn)) ++ EXECUTE_IF_SET_IN_BITMAP (bmp, 2, regno, iter) ++ if (call_used_or_fixed_reg_p (regno)) ++ goto ABORT; ++ABORT: + FAIL; + FALLTHRU:; + }) +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0073-Enforce-return-address-saving-when-Og-is-spec.patch b/patches/gcc10.1/gcc-xtensa-0073-Enforce-return-address-saving-when-Og-is-spec.patch new file mode 100644 index 0000000..1a7f055 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0073-Enforce-return-address-saving-when-Og-is-spec.patch @@ -0,0 +1,39 @@ +From 7ef080074a2c422e20a8e4dae50f6f002c6c2928 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Mon, 30 Jan 2023 18:37:55 +0900 +Subject: [PATCH] xtensa: Enforce return address saving when -Og is + specified + +Leaf function often omits saving its return address to the stack slot, +and this feature often makes debugging very confusing, especially for +stack dump analysis. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_call_save_reg): Change to return + true if register A0 (return address register) when -Og is specified. +--- + gcc/config/xtensa/xtensa.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index db7ac3599..5c6ee7a8f 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -3258,8 +3258,11 @@ xtensa_call_save_reg (int regno) + return false; + + if (regno == A0_REG) +- return crtl->profile || !crtl->is_leaf || crtl->calls_eh_return || +- df_regs_ever_live_p (regno); ++ /* Ensure the return address to be saved to the stack slot in order ++ to assist stack dump analysis when -Og is specified. */ ++ return optimize_debug ++ || crtl->profile || !crtl->is_leaf || crtl->calls_eh_return ++ || df_regs_ever_live_p (regno); + + if (crtl->calls_eh_return && IN_RANGE (regno, 2, 3)) + return true; +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0074-fix-PR-target-108876.patch b/patches/gcc10.1/gcc-xtensa-0074-fix-PR-target-108876.patch new file mode 100644 index 0000000..9609f04 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0074-fix-PR-target-108876.patch @@ -0,0 +1,116 @@ +From 16cfee0871e5a6411b17adc2dc422b9760d17893 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Wed, 22 Feb 2023 22:08:21 -0800 +Subject: [PATCH 1/2] xtensa: fix PR target/108876 + +In commit b2ef02e8cbbaf95fee98be255f697f47193960ec, the sibling call +insn included (use (reg:SI A0_REG)) to fix the problem, which added +a USE chain unconditionally to the data flow of register A0 during +the sibling call. + +As a result, df_regs_ever_live_p (A0_REG) returns true, so even if +register A0 is not used outside of the sibling call insn, saves and +restores to stack slots are emitted in pro/epilogue, and finally +code size increases. +(This is why I never included (use A0) in sibling calls) + + /* example */ + extern int foo(int); + int test(int a) { + return foo(a * 3 + 1); + } + +;; before + test: + addi sp, sp, -16 ;; unneeded stack frame allocation (induced) + s32i.n a0, sp, 12 ;; unneeded saving of register A0 + l32i.n a0, sp, 12 ;; unneeded restoration of register A0 + addx2 a2, a2, a2 + addi.n a2, a2, 1 + addi sp, sp, 16 ;; unneeded stack frame freeing (induced) + j.l foo, a9 ;; sibling call (truly needs register A0) + +The essential cause is that we emit (use A0) *before* the insns that +does the stack pointer adjustment during epilogue expansion, so the +liveness of register A0 ends early, so register A0 is reused afterwards. + +This patch fixes the problem and avoids such regression by doing the +emit of (use A0) in the sibling call epilogue expansion at the end. + +;; after +test: + addx2 a2, a2, a2 + addi.n a2, a2, 1 + j.l foo, a9 + +>From RTL-pass "315r.rnreg" by +"gfortran -O3 -funroll-loops -mabi=call0 -S -da gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90": + + ;; Function selector_init (__selectors_MOD_selector_init, funcdef_no=2, decl_uid=987, cgraph_uid=3, symbol_order=4) + ... + (insn 3807 3806 3808 121 (set (reg:SI 15 a15) + (mem/c:SI (plus:SI (reg/f:SI 1 sp) + (const_int 268 [0x10c])) [31 S4 A32])) "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 53 {movsi_internal} + (nil)) + (insn 3808 3807 3809 121 (set (reg:SI 7 a7) + (const_int 288 [0x120])) "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 53 {movsi_internal} + (nil)) + (insn 3809 3808 3810 121 (set (reg/f:SI 1 sp) + (plus:SI (reg/f:SI 1 sp) + (reg:SI 7 a7))) "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 1 {addsi3} + (expr_list:REG_DEAD (reg:SI 9 a9) + (nil))) + (insn 3810 3809 721 121 (use (reg:SI 0 a0)) "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 -1 + (expr_list:REG_DEAD (reg:SI 0 a0) + (nil))) + (call_insn/j 721 3810 722 121 (call (mem:SI (symbol_ref:SI ("free") [flags 0x41] ) [0 __builtin_free S4 A32]) + (const_int 0 [0])) "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 discrim 1 106 {sibcall_internal} + (expr_list:REG_DEAD (reg:SI 2 a2) + (expr_list:REG_CALL_DECL (symbol_ref:SI ("free") [flags 0x41] ) + (expr_list:REG_EH_REGION (const_int 0 [0]) + (nil)))) + (expr_list:SI (use (reg:SI 2 a2)) + (nil))) + +(IMHO the "rnreg" pass doesn't take REG_ALLOC_ORDER into account; +it just seems to allocate registers in fixed_regs index order, +which may have hurt register A0 that became allocatable in the recent +patch) + +gcc/ChangeLog: + PR target/108876 + + * config/xtensa/xtensa.c (xtensa_expand_epilogue): + Emit (use (reg:SI A0_REG)) at the end in the sibling call + (i.e. the same place as (return) in the normal call). +--- + gcc/config/xtensa/xtensa.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 5c6ee7a8f..3426494f5 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -3583,8 +3583,6 @@ xtensa_expand_epilogue (bool sibcall_p) + gen_frame_mem (SImode, x)); + } + } +- if (sibcall_p) +- emit_use (gen_rtx_REG (SImode, A0_REG)); + + if (cfun->machine->current_frame_size > 0) + { +@@ -3610,7 +3608,9 @@ xtensa_expand_epilogue (bool sibcall_p) + EH_RETURN_STACKADJ_RTX)); + } + cfun->machine->epilogue_done = true; +- if (!sibcall_p) ++ if (sibcall_p) ++ emit_use (gen_rtx_REG (SImode, A0_REG)); ++ else + emit_jump_insn (gen_return ()); + } + +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0075-Fix-missing-mode-warnings-in-machine-descript.patch b/patches/gcc10.1/gcc-xtensa-0075-Fix-missing-mode-warnings-in-machine-descript.patch new file mode 100644 index 0000000..0099b1c --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0075-Fix-missing-mode-warnings-in-machine-descript.patch @@ -0,0 +1,54 @@ +From bed35098a6d3d0032716f23e5c631e7aa183f227 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 23 Feb 2023 12:42:32 +0900 +Subject: [PATCH 2/2] xtensa: Fix missing mode warnings in machine description + +gcc/ChangeLog: + + * config/xtensa/xtensa.md + (zero_cost_loop_start, zero_cost_loop_end, loop_end): + Add missing "SI:" to PLUS RTXes. +--- + gcc/config/xtensa/xtensa.md | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 4d976ece5..9c017dd19 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2033,8 +2033,8 @@ + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_operand:SI 0 "register_operand" "=a") +- (plus (match_dup 0) +- (const_int -1))) ++ (plus:SI (match_dup 0) ++ (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_START)] + "TARGET_LOOPS && optimize" + "loop\t%0, %l1_LEND" +@@ -2049,8 +2049,8 @@ + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_operand:SI 0 "nonimmediate_operand" "=a,m") +- (plus (match_dup 0) +- (const_int -1))) ++ (plus:SI (match_dup 0) ++ (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END) + (clobber (match_scratch:SI 3 "=X,&r"))] + "TARGET_LOOPS && optimize" +@@ -2066,8 +2066,8 @@ + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_operand:SI 0 "register_operand" "=a") +- (plus (match_dup 0) +- (const_int -1))) ++ (plus:SI (match_dup 0) ++ (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END)] + "TARGET_LOOPS && optimize" + { +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0076-Eliminate-the-use-of-callee-saved-register-th.patch b/patches/gcc10.1/gcc-xtensa-0076-Eliminate-the-use-of-callee-saved-register-th.patch new file mode 100644 index 0000000..99109d5 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0076-Eliminate-the-use-of-callee-saved-register-th.patch @@ -0,0 +1,303 @@ +From 19e3ee5197e1de1ec6228cb54ff4ad8f27af5138 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 27 Jan 2023 12:17:33 +0900 +Subject: [PATCH] xtensa: Eliminate the use of callee-saved register that + saves and restores only once + +In the case of the CALL0 ABI, values that must be retained before and +after function calls are placed in the callee-saved registers (A12 +through A15) and referenced later. However, it is often the case that +the save and the reference are each only once and a simple register- +register move (with two exceptions; i. the register saved to/restored +from is the stack pointer, ii. the function needs an additional stack +pointer adjustment to grow the stack). + +e.g. in the following example, if there are no other occurrences of +register A14: + +;; before + ; prologue { + ... + s32i.n a14, sp, 16 + ... ;; no frame pointer needed + ;; no additional stack growth + ; } prologue + ... + mov.n a14, a6 ;; A6 is not SP + ... + call0 foo + ... + mov.n a8, a14 ;; A8 is not SP + ... + ; epilogue { + ... + l32i.n a14, sp, 16 + ... + ; } epilogue + +It can be possible like this: + +;; after + ; prologue { + ... + (no save needed) + ... + ; } prologue + ... + s32i.n a6, sp, 16 ;; replaced with A14's slot + ... + call0 foo + ... + l32i.n a8, sp, 16 ;; through SP + ... + ; epilogue { + ... + (no restoration needed) + ... + ; } epilogue + +This patch adds the abovementioned logic to the function prologue/epilogue +RTL expander code. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (machine_function): Add new member + 'eliminated_callee_saved_bmp'. + (xtensa_can_eliminate_callee_saved_reg_p): New function to + determine whether the register can be eliminated or not. + (xtensa_expand_prologue): Add invoking the above function and + elimination the use of callee-saved register by using its stack + slot through the stack pointer (or the frame pointer if needed) + directly. + (xtensa_expand_prologue): Modify to not emit register restoration + insn from its stack slot if the register is already eliminated. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/elim_callee_saved.c: New. +--- + gcc/config/xtensa/xtensa.c | 132 ++++++++++++++---- + .../gcc.target/xtensa/elim_callee_saved.c | 38 +++++ + 2 files changed, 145 insertions(+), 25 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 3426494f5..6aea625d9 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -105,6 +105,7 @@ struct GTY(()) machine_function + bool epilogue_done; + bool inhibit_logues_a1_adjusts; + rtx last_logues_a9_content; ++ HOST_WIDE_INT eliminated_callee_saved_bmp; + }; + + static void xtensa_option_override (void); +@@ -3381,6 +3382,66 @@ xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int flags) + cfun->machine->last_logues_a9_content = GEN_INT (offset); + } + ++static bool ++xtensa_can_eliminate_callee_saved_reg_p (unsigned int regno, ++ rtx_insn **p_insnS, ++ rtx_insn **p_insnR) ++{ ++ df_ref ref; ++ rtx_insn *insn, *insnS = NULL, *insnR = NULL; ++ rtx pattern; ++ ++ if (!optimize || !df || call_used_or_fixed_reg_p (regno)) ++ return false; ++ ++ for (ref = DF_REG_DEF_CHAIN (regno); ++ ref; ref = DF_REF_NEXT_REG (ref)) ++ if (DF_REF_CLASS (ref) != DF_REF_REGULAR ++ || DEBUG_INSN_P (insn = DF_REF_INSN (ref))) ++ continue; ++ else if (GET_CODE (pattern = PATTERN (insn)) == SET ++ && REG_P (SET_DEST (pattern)) ++ && REGNO (SET_DEST (pattern)) == regno ++ && REG_NREGS (SET_DEST (pattern)) == 1 ++ && REG_P (SET_SRC (pattern)) ++ && REGNO (SET_SRC (pattern)) != A1_REG) ++ { ++ if (insnS) ++ return false; ++ insnS = insn; ++ continue; ++ } ++ else ++ return false; ++ ++ for (ref = DF_REG_USE_CHAIN (regno); ++ ref; ref = DF_REF_NEXT_REG (ref)) ++ if (DF_REF_CLASS (ref) != DF_REF_REGULAR ++ || DEBUG_INSN_P (insn = DF_REF_INSN (ref))) ++ continue; ++ else if (GET_CODE (pattern = PATTERN (insn)) == SET ++ && REG_P (SET_SRC (pattern)) ++ && REGNO (SET_SRC (pattern)) == regno ++ && REG_NREGS (SET_SRC (pattern)) == 1 ++ && REG_P (SET_DEST (pattern)) ++ && REGNO (SET_DEST (pattern)) != A1_REG) ++ { ++ if (insnR) ++ return false; ++ insnR = insn; ++ continue; ++ } ++ else ++ return false; ++ ++ if (!insnS || !insnR) ++ return false; ++ ++ *p_insnS = insnS, *p_insnR = insnR; ++ ++ return true; ++} ++ + /* minimum frame = reg save area (4 words) plus static chain (1 word) + and the total number of words must be a multiple of 128 bits. */ + #define MIN_FRAME_SIZE (8 * UNITS_PER_WORD) +@@ -3420,6 +3481,7 @@ xtensa_expand_prologue (void) + df_ref ref; + bool stack_pointer_needed = frame_pointer_needed + || crtl->calls_eh_return; ++ bool large_stack_needed; + + /* Check if the function body really needs the stack pointer. */ + if (!stack_pointer_needed && df) +@@ -3468,23 +3530,41 @@ xtensa_expand_prologue (void) + } + } + ++ large_stack_needed = total_size > 1024 ++ || (!callee_save_size && total_size > 128); + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) +- { +- if (xtensa_call_save_reg(regno)) +- { +- rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); +- rtx mem = gen_frame_mem (SImode, x); +- rtx reg = gen_rtx_REG (SImode, regno); ++ if (xtensa_call_save_reg(regno)) ++ { ++ rtx x = gen_rtx_PLUS (Pmode, ++ stack_pointer_rtx, GEN_INT (offset)); ++ rtx mem = gen_frame_mem (SImode, x); ++ rtx_insn *insnS, *insnR; ++ ++ if (!large_stack_needed ++ && xtensa_can_eliminate_callee_saved_reg_p (regno, ++ &insnS, &insnR)) ++ { ++ if (frame_pointer_needed) ++ mem = replace_rtx (mem, stack_pointer_rtx, ++ hard_frame_pointer_rtx); ++ SET_DEST (PATTERN (insnS)) = mem; ++ df_insn_rescan (insnS); ++ SET_SRC (PATTERN (insnR)) = copy_rtx (mem); ++ df_insn_rescan (insnR); ++ cfun->machine->eliminated_callee_saved_bmp |= 1 << regno; ++ } ++ else ++ { ++ rtx reg = gen_rtx_REG (SImode, regno); + +- offset -= UNITS_PER_WORD; +- insn = emit_move_insn (mem, reg); +- RTX_FRAME_RELATED_P (insn) = 1; +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, +- gen_rtx_SET (mem, reg)); +- } +- } +- if (total_size > 1024 +- || (!callee_save_size && total_size > 128)) ++ insn = emit_move_insn (mem, reg); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, ++ gen_rtx_SET (mem, reg)); ++ } ++ offset -= UNITS_PER_WORD; ++ } ++ if (large_stack_needed) + xtensa_emit_adjust_stack_ptr (callee_save_size - total_size, + ADJUST_SP_NEED_NOTE); + } +@@ -3573,16 +3653,18 @@ xtensa_expand_epilogue (bool sibcall_p) + emit_insn (gen_blockage ()); + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) +- { +- if (xtensa_call_save_reg(regno)) +- { +- rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); +- +- offset -= UNITS_PER_WORD; +- emit_move_insn (gen_rtx_REG (SImode, regno), +- gen_frame_mem (SImode, x)); +- } +- } ++ if (xtensa_call_save_reg(regno)) ++ { ++ if (! (cfun->machine->eliminated_callee_saved_bmp ++ & (1 << regno))) ++ { ++ rtx x = gen_rtx_PLUS (Pmode, ++ stack_pointer_rtx, GEN_INT (offset)); ++ emit_move_insn (gen_rtx_REG (SImode, regno), ++ gen_frame_mem (SImode, x)); ++ } ++ offset -= UNITS_PER_WORD; ++ } + + if (cfun->machine->current_frame_size > 0) + { +diff --git a/gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c b/gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c +new file mode 100644 +index 000000000..cd3d6b9f2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c +@@ -0,0 +1,38 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mabi=call0" } */ ++ ++extern void foo(void); ++ ++/* eliminated one register (the reservoir of variable 'a') by its stack slot through the stack pointer. */ ++int test0(int a) { ++ int array[252]; /* the maximum bound of non-large stack. */ ++ foo(); ++ asm volatile("" : : "m"(array)); ++ return a; ++} ++ ++/* cannot eliminate if large stack is needed, because the offset from TOS cannot fit into single L32I/S32I instruction. */ ++int test1(int a) { ++ int array[10000]; /* requires large stack. */ ++ foo(); ++ asm volatile("" : : "m"(array)); ++ return a; ++} ++ ++/* register A15 is the reservoir of the stack pointer and cannot be eliminated if the frame pointer is needed. ++ other registers still can be, but through the frame pointer rather the stack pointer. */ ++int test2(int a) { ++ int* p = __builtin_alloca(16); ++ foo(); ++ asm volatile("" : : "r"(p)); ++ return a; ++} ++ ++/* in -O0 the composite hard registers may still remain unsplitted at pro_and_epilogue and must be excluded. */ ++extern double bar(void); ++int __attribute__((optimize(0))) test3(int a) { ++ return bar() + a; ++} ++ ++/* { dg-final { scan-assembler-times "mov\t|mov.n\t" 21 } } */ ++/* { dg-final { scan-assembler-times "a15, 8" 2 } } */ +-- +2.30.2 + diff --git a/patches/gcc10.1/gcc-xtensa-0077-Eliminate-unnecessary-general-purpose-reg-reg.patch b/patches/gcc10.1/gcc-xtensa-0077-Eliminate-unnecessary-general-purpose-reg-reg.patch new file mode 100644 index 0000000..f42c958 --- /dev/null +++ b/patches/gcc10.1/gcc-xtensa-0077-Eliminate-unnecessary-general-purpose-reg-reg.patch @@ -0,0 +1,159 @@ +From 33aef933318545ff759442b391d0a53aae43251e Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sat, 18 Feb 2023 13:43:34 +0900 +Subject: [PATCH] xtensa: Eliminate unnecessary general-purpose reg-reg + moves + +Register-register move instructions that can be easily seen as +unnecessary by the human eye may remain in the compiled result. +For example: + +/* example */ +double test(double a, double b) { + return __builtin_copysign(a, b); +} + +test: + add.n a3, a3, a3 + extui a5, a5, 31, 1 + ssai 1 + ;; Be in the same BB + src a7, a5, a3 ;; Replacing the destination doesn't + ;; violate any constraints of the + ;; operands + ;; No CALL insns in this span + ;; Both A3 and A7 are irrelevant to + ;; insns in this span + mov.n a3, a7 ;; An unnecessary reg-reg move + ;; A7 is not used after this + ret.n + +The last two instructions above, excluding the return instruction, +could be done like this: + + src a3, a5, a3 + +This symptom often occurs when handling DI/DFmode values with SImode +instructions. This patch solves the above problem using peephole2 +pattern. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: New peephole2 pattern that eliminates + the occurrence of general-purpose register used only once and for + transferring intermediate value. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/elim_GP_regmove_0.c: New test. + * gcc.target/xtensa/elim_GP_regmove_1.c: New test. +--- + gcc/config/xtensa/xtensa.md | 46 +++++++++++++++++++ + .../gcc.target/xtensa/elim_GP_regmove_0.c | 23 ++++++++++ + .../gcc.target/xtensa/elim_GP_regmove_1.c | 10 ++++ + 3 files changed, 79 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_0.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_1.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 9c017dd19..628b27b32 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -3055,3 +3055,49 @@ FALLTHRU:; + operands[1] = GEN_INT (imm0); + operands[2] = GEN_INT (imm1); + }) ++ ++(define_peephole2 ++ [(set (match_operand 0 "register_operand") ++ (match_operand 1 "register_operand"))] ++ "REG_NREGS (operands[0]) == 1 && GP_REG_P (REGNO (operands[0])) ++ && REG_NREGS (operands[1]) == 1 && GP_REG_P (REGNO (operands[1])) ++ && peep2_reg_dead_p (1, operands[1])" ++ [(const_int 0)] ++{ ++ basic_block bb = BLOCK_FOR_INSN (curr_insn); ++ rtx_insn *head = BB_HEAD (bb), *insn; ++ rtx dest = operands[0], src = operands[1], pattern, t_dest, dest_orig; ++ for (insn = PREV_INSN (curr_insn); ++ insn && insn != head; ++ insn = PREV_INSN (insn)) ++ if (CALL_P (insn)) ++ break; ++ else if (INSN_P (insn)) ++ { ++ if (GET_CODE (pattern = PATTERN (insn)) == SET ++ && REG_P (t_dest = SET_DEST (pattern)) ++ && REG_NREGS (t_dest) == 1 ++ && REGNO (t_dest) == REGNO (src)) ++ { ++ dest_orig = SET_DEST (pattern); ++ SET_DEST (pattern) = gen_rtx_REG (GET_MODE (t_dest), ++ REGNO (dest)); ++ extract_insn (insn); ++ if (!constrain_operands (true, get_enabled_alternatives (insn))) ++ { ++ SET_DEST (pattern) = dest_orig; ++ goto ABORT; ++ } ++ df_insn_rescan (insn); ++ goto FALLTHRU; ++ } ++ if (reg_overlap_mentioned_p (dest, pattern) ++ || reg_overlap_mentioned_p (src, pattern) ++ || set_of (dest, insn) ++ || set_of (src, insn)) ++ break; ++ } ++ABORT: ++ FAIL; ++FALLTHRU:; ++}) +diff --git a/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_0.c b/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_0.c +new file mode 100644 +index 000000000..5c195c357 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_0.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fpeephole2" } */ ++ ++/* can be processed */ ++double test0(double a, double b) { ++ return __builtin_copysign(a, b); ++} ++ ++/* cannot be processed: due to violate '0' constraint of the 2nd source operand. */ ++int test1(int a, int b) { ++ int c; ++ asm volatile ("" : "=a"(c) : "r"(a), "0"(b)); ++ return c; ++} ++ ++/* cannot be processed: due to violate '&' constraint of the destination operand. */ ++int test2(int a) { ++ int b; ++ asm volatile ("" : "=&a"(b) : "r"(a)); ++ return b; ++} ++ ++/* { dg-final { scan-assembler-times "mov\t|mov.n\t" 2 } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_1.c b/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_1.c +new file mode 100644 +index 000000000..a13ef8188 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_1.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fpeephole2 -mabi=windowed" } */ ++ ++/* cannot be processed: due to violate 'a' constraint of the destination operand of the stack adjustment instruction. */ ++void test(void) { ++ int buffer[8192]; ++ asm volatile ("" : : "m"(buffer)); ++} ++ ++/* { dg-final { scan-assembler-times "movsp" 1 } } */ +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-Improve-initialization-of-objects-when-the-initializ.patch b/patches/gcc10.2/gcc-Improve-initialization-of-objects-when-the-initializ.patch new file mode 100644 index 0000000..00fdb45 --- /dev/null +++ b/patches/gcc10.2/gcc-Improve-initialization-of-objects-when-the-initializ.patch @@ -0,0 +1,39 @@ +From a2cde0c6443c440c2a2b72b5eea060229a0cff57 Mon Sep 17 00:00:00 2001 +From: Jeff Law +Date: Sat, 9 Jul 2022 11:11:00 -0400 +Subject: [PATCH] [RFA] Improve initialization of objects when the initializer + +gcc/ + + * expr.c (store_expr): Identify trailing NULs in a STRING_CST + initializer and use clear_storage rather than copying the + NULs to the destination array. +--- + gcc/expr.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/gcc/expr.c b/gcc/expr.c +index 991b26f33..6ff393462 100644 +--- a/gcc/expr.c ++++ b/gcc/expr.c +@@ -5723,6 +5723,17 @@ store_expr (tree exp, rtx target, int call_param_p, + } + + str_copy_len = TREE_STRING_LENGTH (str); ++ ++ /* Trailing NUL bytes in EXP will be handled by the call to ++ clear_storage, which is more efficient than copying them from ++ the STRING_CST, so trim those from STR_COPY_LEN. */ ++ while (str_copy_len) ++ { ++ if (TREE_STRING_POINTER (str)[str_copy_len - 1]) ++ break; ++ str_copy_len--; ++ } ++ + if ((STORE_MAX_PIECES & (STORE_MAX_PIECES - 1)) == 0) + { + str_copy_len += STORE_MAX_PIECES - 1; +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0006-Fix-non-robust-split-condition-in-define_insn.patch b/patches/gcc10.2/gcc-xtensa-0006-Fix-non-robust-split-condition-in-define_insn.patch new file mode 100644 index 0000000..4c5418f --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0006-Fix-non-robust-split-condition-in-define_insn.patch @@ -0,0 +1,44 @@ +From 2065a3fccb11e28ebcc42aa46c52a40b0fae9bea Mon Sep 17 00:00:00 2001 +From: Kewen Lin +Date: Sun, 21 Nov 2021 20:18:31 -0600 +Subject: [PATCH 01/31] xtensa: Fix non-robust split condition in + define_insn_and_split + +This patch is to fix some non-robust split conditions in some +define_insn_and_splits, to make each of them applied on top of +the corresponding condition for define_insn part, otherwise the +splitting could perform unexpectedly. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (movdi_internal, movdf_internal): Fix split + condition. +--- + gcc/config/xtensa/xtensa.md | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2a8e59ee9..123916957 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -782,7 +782,7 @@ + "register_operand (operands[0], DImode) + || register_operand (operands[1], DImode)" + "#" +- "reload_completed" ++ "&& reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))] + { +@@ -1058,7 +1058,7 @@ + "register_operand (operands[0], DFmode) + || register_operand (operands[1], DFmode)" + "#" +- "reload_completed" ++ "&& reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))] + { +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch b/patches/gcc10.2/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch deleted file mode 100644 index 336b961..0000000 --- a/patches/gcc10.2/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch +++ /dev/null @@ -1,29 +0,0 @@ -From f1568d0597ffd3027eebefc2cf31646ab5d5ca19 Mon Sep 17 00:00:00 2001 -From: Takayuki 'January June' Suwa -Date: Sun, 19 Dec 2021 22:44:03 +0900 -Subject: [PATCH] gcc: xtensa: make trying to replace 'l32r' with 'movi' + - 'slli' regardless of optimizing for size or not, because 'l32r' is much - slower than the latter on ESP8266 - ---- - gcc/config/xtensa/xtensa.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c -index 37c6ac1fd..6cd9d5528 100644 ---- a/gcc/config/xtensa/xtensa.c -+++ b/gcc/config/xtensa/xtensa.c -@@ -1074,8 +1074,8 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) - { - /* Try to emit MOVI + SLLI sequence, that is smaller - than L32R + literal. */ -- if (optimize_size && mode == SImode && CONST_INT_P (src) -- && register_operand (dst, mode)) -+ if (optimize >= 1 && ! optimize_debug && mode == SImode -+ && CONST_INT_P (src) && register_operand (dst, mode)) - { - HOST_WIDE_INT srcval = INTVAL (src); - int shift = ctz_hwi (srcval); --- -2.20.1 - diff --git a/patches/gcc10.2/gcc-xtensa-0007-Backport-patches-from-upstream-master.patch b/patches/gcc10.2/gcc-xtensa-0007-Backport-patches-from-upstream-master.patch deleted file mode 100644 index eb06969..0000000 --- a/patches/gcc10.2/gcc-xtensa-0007-Backport-patches-from-upstream-master.patch +++ /dev/null @@ -1,3186 +0,0 @@ -From 989fc2c516206d7cf70177a416815f91998e2131 Mon Sep 17 00:00:00 2001 -From: Takayuki 'January June' Suwa -Date: Fri, 27 May 2022 21:34:37 +0900 -Subject: [PATCH 1/3] xtensa: Backport patches from upstream/master - -2b5b8610e985e23a0c2e0272339ab074a750e240 "xtensa: Fix non-robust split condition in define_insn_and_split" -7e5baa7e6f4caced6bdaef6d866d19e7656d8a16 "xtensa: fix -Wformat-diag warnings." -d543bac1631700f0da30d5ca555296f4938a82c6 "xtensa: Rename deprecated extv/extzv insn patterns to extvsi/extzvsi" -112447f8564c0307c5da99a4094a3a99f204239f "xtensa: Reflect the 32-bit Integer Divide Option" -b753405a5f0d45eea97f4cc7df2c2089401b08bf "xtensa: Simplify EXTUI instruction maskimm validations" -9b251fe2e39a49c0d3ecd34cf8c5d55544efd159 "xtensa: Make use of IN_RANGE macro where appropriate" -3397563ad6c8fc5d9675faf507e52dd2ed284202 "xtensa: Fix instruction counting regarding block move expansion" -6454b4a8f5d90dd355c3c7e31a592a439223b645 "xtensa: Add setmemsi insn pattern" -9aad2b22436d5346fa224e5c14439dcef36cf3dd "xtensa: Improve bswap[sd]i2 insn patterns" -e94c6dbfb57a862dd8a8685eabc4886ad1aaea25 "xtensa: fix PR target/105879" -2fcc69d8ce4eddf6dea878a5383254d366e1bb14 "xtensa: Implement bswaphi2 insn pattern" -9777d446e2148ef9a6e9f35db3f4eab99ee8812c "xtensa: Make one_cmplsi2 optimizer-friendly" -e44e7face13f38f9b228e2619786ba0add9ef77b "xtensa: Optimize '(~x & y)' to '((x & y) ^ y)'" -29dc90a580bf45f503ed89eb1dc63b5676db776b "xtensa: Add clrsbsi2 insn pattern" -9489a1ab05ad1bda7126da5513f08282da3e531d "xtensa: Tweak some widen multiplications" -fddf0e1057fe24eff0d894fbc2959b4086464a96 "xtensa: Consider the Loop Option when setmemsi is expanded to small loop" -ccd02e734e0f1742629403b46e5b1c650b00fd65 "xtensa: Improve instruction cost estimation and suggestion" -cd02f15f1aecc45b2c2feae16840503549508619 "xtensa: Improve constant synthesis for both integer and floating-point" -1c68ec1f8ab531fba56cccf549ffe592bf622821 "xtensa: Improve shift operations more" -e1b193c1cce3a975a9ed60dd0f30182fe0255d7c "xtensa: Simplify conditional branch/move insn patterns" -70ce04ca353bb0cda8321b91a77c2477e26d339b "xtensa: Make use of BALL/BNALL instructions" -077438933cf94f00cc5edf974338c11ba4bf7a39 "xtensa: Optimize bitwise AND operation with some specific forms of constants" -96518f714e3fab53a966a05b8d48011e27c1a718 "xtensa: Document new -mextra-l32r-costs= Xtensa-specific option" -43b0c56fda4bc990e8ee8d6a0b376de7b663bb06 "xtensa: Add support for sibling call optimization" -c95e307e3a978166cd5d6817ec9d8293825ff3fb "xtensa: Add some dedicated patterns that correspond to GIMPLE canonicalizations" -cfad4856fa46abc878934a9433d0bfc2482ccf00 "xtensa: Eliminate unwanted reg-reg moves during DFmode input reloads" -ce3867d414bd7d9e5b6fb2a51b1fb3d9e9e1eae9 "xtensa: Eliminate [DS]Cmode hard register clobber that is immediately followed by whole overwrite the register" -479b6f449ee999501ad6eff0b7db8d0cd5b2d28d "xtensa: Defer storing integer constants into litpool until reload" ---- - gcc/config/xtensa/constraints.md | 10 +- - gcc/config/xtensa/predicates.md | 41 +- - gcc/config/xtensa/xtensa-protos.h | 11 +- - gcc/config/xtensa/xtensa.c | 733 +++++++++--- - gcc/config/xtensa/xtensa.h | 7 +- - gcc/config/xtensa/xtensa.md | 1024 +++++++++++++---- - gcc/config/xtensa/xtensa.opt | 6 +- - gcc/doc/invoke.texi | 11 +- - gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c | 33 + - gcc/testsuite/gcc.target/xtensa/bswap-O1.c | 37 + - gcc/testsuite/gcc.target/xtensa/bswap-O2.c | 37 + - gcc/testsuite/gcc.target/xtensa/bswap-Os.c | 37 + - .../gcc.target/xtensa/check_zero_byte.c | 9 + - .../gcc.target/xtensa/constsynth_2insns.c | 44 + - .../gcc.target/xtensa/constsynth_3insns.c | 24 + - .../gcc.target/xtensa/constsynth_double.c | 11 + - .../gcc.target/xtensa/funnel_shifter.c | 17 + - .../gcc.target/xtensa/one_cmpl_abs.c | 9 + - gcc/testsuite/gcc.target/xtensa/sibcalls.c | 20 + - libgcc/config/xtensa/lib1funcs.S | 23 + - libgcc/config/xtensa/t-xtensa | 2 +- - 21 files changed, 1796 insertions(+), 350 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O1.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O2.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-Os.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/check_zero_byte.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_double.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/funnel_shifter.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/sibcalls.c - -diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md -index 2062c8816..13b3daafc 100644 ---- a/gcc/config/xtensa/constraints.md -+++ b/gcc/config/xtensa/constraints.md -@@ -92,7 +92,7 @@ - "An integer constant in the range @minus{}32-95 for use with MOVI.N - instructions." - (and (match_code "const_int") -- (match_test "ival >= -32 && ival <= 95"))) -+ (match_test "IN_RANGE (ival, -32, 95)"))) - - (define_constraint "N" - "An unsigned 8-bit integer constant shifted left by 8 bits for use -@@ -103,7 +103,7 @@ - (define_constraint "O" - "An integer constant that can be used in ADDI.N instructions." - (and (match_code "const_int") -- (match_test "ival == -1 || (ival >= 1 && ival <= 15)"))) -+ (match_test "ival == -1 || IN_RANGE (ival, 1, 15)"))) - - (define_constraint "P" - "An integer constant that can be used as a mask value in an EXTUI -@@ -113,8 +113,10 @@ - - (define_constraint "Y" - "A constant that can be used in relaxed MOVI instructions." -- (and (match_code "const_int,const_double,const,symbol_ref,label_ref") -- (match_test "TARGET_AUTO_LITPOOLS"))) -+ (ior (and (match_code "const_int,const_double,const,symbol_ref,label_ref") -+ (match_test "TARGET_AUTO_LITPOOLS")) -+ (and (match_code "const_int") -+ (match_test "can_create_pseudo_p ()")))) - - ;; Memory constraints. Do not use define_memory_constraint here. Doing so - ;; causes reload to force some constants into the constant pool, but since -diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md -index eb52b05aa..633cc6264 100644 ---- a/gcc/config/xtensa/predicates.md -+++ b/gcc/config/xtensa/predicates.md -@@ -25,8 +25,7 @@ - - (define_predicate "addsubx_operand" - (and (match_code "const_int") -- (match_test "INTVAL (op) >= 1 -- && INTVAL (op) <= 3"))) -+ (match_test "IN_RANGE (INTVAL (op), 1, 3)"))) - - (define_predicate "arith_operand" - (ior (and (match_code "const_int") -@@ -53,9 +52,19 @@ - (match_test "xtensa_mask_immediate (INTVAL (op))")) - (match_operand 0 "register_operand"))) - -+(define_predicate "shifted_mask_operand" -+ (match_code "const_int") -+{ -+ HOST_WIDE_INT mask = INTVAL (op); -+ int shift = ctz_hwi (mask); -+ -+ return IN_RANGE (shift, 1, 31) -+ && xtensa_mask_immediate ((uint32_t)mask >> shift); -+}) -+ - (define_predicate "extui_fldsz_operand" - (and (match_code "const_int") -- (match_test "xtensa_mask_immediate ((1 << INTVAL (op)) - 1)"))) -+ (match_test "IN_RANGE (INTVAL (op), 1, 16)"))) - - (define_predicate "sext_operand" - (if_then_else (match_test "TARGET_SEXT") -@@ -64,7 +73,7 @@ - - (define_predicate "sext_fldsz_operand" - (and (match_code "const_int") -- (match_test "INTVAL (op) >= 8 && INTVAL (op) <= 23"))) -+ (match_test "IN_RANGE (INTVAL (op), 8, 23)"))) - - (define_predicate "lsbitnum_operand" - (and (match_code "const_int") -@@ -138,8 +147,9 @@ - (match_test "!constantpool_mem_p (op) - || GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0"))) - (ior (and (match_code "const_int") -- (match_test "GET_MODE_CLASS (mode) == MODE_INT -- && xtensa_simm12b (INTVAL (op))")) -+ (match_test "(GET_MODE_CLASS (mode) == MODE_INT -+ && xtensa_simm12b (INTVAL (op))) -+ || can_create_pseudo_p ()")) - (and (match_code "const_int,const_double,const,symbol_ref,label_ref") - (match_test "(TARGET_CONST16 || TARGET_AUTO_LITPOOLS) - && CONSTANT_P (op) -@@ -156,6 +166,19 @@ - (and (match_code "const_int") - (match_test "xtensa_mem_offset (INTVAL (op), SFmode)"))) - -+(define_predicate "reload_operand" -+ (match_code "mem") -+{ -+ const_rtx addr = XEXP (op, 0); -+ if (REG_P (addr)) -+ return REGNO (addr) == A1_REG; -+ if (GET_CODE (addr) == PLUS) -+ return REG_P (XEXP (addr, 0)) -+ && REGNO (XEXP (addr, 0)) == A1_REG -+ && CONST_INT_P (XEXP (addr, 1)); -+ return false; -+}) -+ - (define_predicate "branch_operator" - (match_code "eq,ne,lt,ge")) - -@@ -165,9 +188,15 @@ - (define_predicate "boolean_operator" - (match_code "eq,ne")) - -+(define_predicate "logical_shift_operator" -+ (match_code "ashift,lshiftrt")) -+ - (define_predicate "xtensa_cstoresi_operator" - (match_code "eq,ne,gt,ge,lt,le")) - -+(define_predicate "xtensa_shift_per_byte_operator" -+ (match_code "ashift,ashiftrt,lshiftrt")) -+ - (define_predicate "tls_symbol_operand" - (and (match_code "symbol_ref") - (match_test "SYMBOL_REF_TLS_MODEL (op) != 0"))) -diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h -index 18d803581..75ed3bfb0 100644 ---- a/gcc/config/xtensa/xtensa-protos.h -+++ b/gcc/config/xtensa/xtensa-protos.h -@@ -41,18 +41,23 @@ extern void xtensa_expand_conditional_branch (rtx *, machine_mode); - extern int xtensa_expand_conditional_move (rtx *, int); - extern int xtensa_expand_scc (rtx *, machine_mode); - extern int xtensa_expand_block_move (rtx *); -+extern int xtensa_expand_block_set_unrolled_loop (rtx *); -+extern int xtensa_expand_block_set_small_loop (rtx *); - extern void xtensa_split_operand_pair (rtx *, machine_mode); -+extern int xtensa_constantsynth (rtx, HOST_WIDE_INT); - extern int xtensa_emit_move_sequence (rtx *, machine_mode); - extern rtx xtensa_copy_incoming_a7 (rtx); - extern void xtensa_expand_nonlocal_goto (rtx *); - extern void xtensa_expand_compare_and_swap (rtx, rtx, rtx, rtx); - extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool); - extern void xtensa_emit_loop_end (rtx_insn *, rtx *); --extern char *xtensa_emit_branch (bool, bool, rtx *); --extern char *xtensa_emit_bit_branch (bool, bool, rtx *); -+extern char *xtensa_emit_branch (bool, rtx *); - extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); -+extern void xtensa_prepare_expand_call (int, rtx *); - extern char *xtensa_emit_call (int, rtx *); -+extern char *xtensa_emit_sibcall (int, rtx *); - extern bool xtensa_tls_referenced_p (rtx); -+extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); - - #ifdef TREE_CODE - extern void init_cumulative_args (CUMULATIVE_ARGS *, int); -@@ -70,7 +75,7 @@ extern int xtensa_dbx_register_number (int); - extern long compute_frame_size (poly_int64); - extern bool xtensa_use_return_instruction_p (void); - extern void xtensa_expand_prologue (void); --extern void xtensa_expand_epilogue (void); -+extern void xtensa_expand_epilogue (bool); - extern void order_regs_for_local_alloc (void); - extern enum reg_class xtensa_regno_to_class (int regno); - extern HOST_WIDE_INT xtensa_initial_elimination_offset (int from, int to); -diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c -index 6cd9d5528..5b1aa9b23 100644 ---- a/gcc/config/xtensa/xtensa.c -+++ b/gcc/config/xtensa/xtensa.c -@@ -55,6 +55,7 @@ along with GCC; see the file COPYING3. If not see - #include "dumpfile.h" - #include "hw-doloop.h" - #include "rtl-iter.h" -+#include "insn-attr.h" - - /* This file should be included last. */ - #include "target-def.h" -@@ -117,7 +118,7 @@ const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] = - - static void xtensa_option_override (void); - static enum internal_test map_test_to_internal_test (enum rtx_code); --static rtx gen_int_relational (enum rtx_code, rtx, rtx, int *); -+static rtx gen_int_relational (enum rtx_code, rtx, rtx); - static rtx gen_float_relational (enum rtx_code, rtx, rtx); - static rtx gen_conditional_move (enum rtx_code, machine_mode, rtx, rtx); - static rtx fixup_subreg_mem (rtx); -@@ -134,6 +135,7 @@ static unsigned int xtensa_multibss_section_type_flags (tree, const char *, - static section *xtensa_select_rtx_section (machine_mode, rtx, - unsigned HOST_WIDE_INT); - static bool xtensa_rtx_costs (rtx, machine_mode, int, int, int *, bool); -+static int xtensa_insn_cost (rtx_insn *, bool); - static int xtensa_register_move_cost (machine_mode, reg_class_t, - reg_class_t); - static int xtensa_memory_move_cost (machine_mode, reg_class_t, bool); -@@ -185,6 +187,7 @@ static bool xtensa_modes_tieable_p (machine_mode, machine_mode); - static HOST_WIDE_INT xtensa_constant_alignment (const_tree, HOST_WIDE_INT); - static HOST_WIDE_INT xtensa_starting_frame_offset (void); - static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); -+static bool xtensa_function_ok_for_sibcall (tree, tree); - - - -@@ -208,6 +211,8 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); - #define TARGET_MEMORY_MOVE_COST xtensa_memory_move_cost - #undef TARGET_RTX_COSTS - #define TARGET_RTX_COSTS xtensa_rtx_costs -+#undef TARGET_INSN_COST -+#define TARGET_INSN_COST xtensa_insn_cost - #undef TARGET_ADDRESS_COST - #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 - -@@ -333,6 +338,9 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); - #undef TARGET_HAVE_SPECULATION_SAFE_VALUE - #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed - -+#undef TARGET_FUNCTION_OK_FOR_SIBCALL -+#define TARGET_FUNCTION_OK_FOR_SIBCALL xtensa_function_ok_for_sibcall -+ - struct gcc_target targetm = TARGET_INITIALIZER; - - -@@ -341,42 +349,42 @@ struct gcc_target targetm = TARGET_INITIALIZER; - bool - xtensa_simm8 (HOST_WIDE_INT v) - { -- return v >= -128 && v <= 127; -+ return IN_RANGE (v, -128, 127); - } - - - bool - xtensa_simm8x256 (HOST_WIDE_INT v) - { -- return (v & 255) == 0 && (v >= -32768 && v <= 32512); -+ return (v & 255) == 0 && IN_RANGE (v, -32768, 32512); - } - - - bool - xtensa_simm12b (HOST_WIDE_INT v) - { -- return v >= -2048 && v <= 2047; -+ return IN_RANGE (v, -2048, 2047); - } - - - static bool - xtensa_uimm8 (HOST_WIDE_INT v) - { -- return v >= 0 && v <= 255; -+ return IN_RANGE (v, 0, 255); - } - - - static bool - xtensa_uimm8x2 (HOST_WIDE_INT v) - { -- return (v & 1) == 0 && (v >= 0 && v <= 510); -+ return (v & 1) == 0 && IN_RANGE (v, 0, 510); - } - - - static bool - xtensa_uimm8x4 (HOST_WIDE_INT v) - { -- return (v & 3) == 0 && (v >= 0 && v <= 1020); -+ return (v & 3) == 0 && IN_RANGE (v, 0, 1020); - } - - -@@ -446,19 +454,7 @@ xtensa_b4constu (HOST_WIDE_INT v) - bool - xtensa_mask_immediate (HOST_WIDE_INT v) - { --#define MAX_MASK_SIZE 16 -- int mask_size; -- -- for (mask_size = 1; mask_size <= MAX_MASK_SIZE; mask_size++) -- { -- if ((v & 1) == 0) -- return false; -- v = v >> 1; -- if (v == 0) -- return true; -- } -- -- return false; -+ return IN_RANGE (exact_log2 (v + 1), 1, 16); - } - - -@@ -539,7 +535,7 @@ smalloffset_mem_p (rtx op) - return FALSE; - - val = INTVAL (offset); -- return (val & 3) == 0 && (val >= 0 && val <= 60); -+ return (val & 3) == 0 && IN_RANGE (val, 0, 60); - } - } - return FALSE; -@@ -678,8 +674,7 @@ map_test_to_internal_test (enum rtx_code test_code) - static rtx - gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - rtx cmp0, /* first operand to compare */ -- rtx cmp1, /* second operand to compare */ -- int *p_invert /* whether branch needs to reverse test */) -+ rtx cmp1 /* second operand to compare */) - { - struct cmp_info - { -@@ -711,6 +706,7 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - enum internal_test test; - machine_mode mode; - struct cmp_info *p_info; -+ int invert; - - test = map_test_to_internal_test (test_code); - gcc_assert (test != ITEST_MAX); -@@ -747,9 +743,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - } - - /* See if we need to invert the result. */ -- *p_invert = ((GET_CODE (cmp1) == CONST_INT) -- ? p_info->invert_const -- : p_info->invert_reg); -+ invert = ((GET_CODE (cmp1) == CONST_INT) -+ ? p_info->invert_const -+ : p_info->invert_reg); - - /* Comparison to constants, may involve adding 1 to change a LT into LE. - Comparison between two registers, may involve switching operands. */ -@@ -766,7 +762,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - cmp1 = temp; - } - -- return gen_rtx_fmt_ee (p_info->test_code, VOIDmode, cmp0, cmp1); -+ return gen_rtx_fmt_ee (invert ? reverse_condition (p_info->test_code) -+ : p_info->test_code, -+ VOIDmode, cmp0, cmp1); - } - - -@@ -825,45 +823,33 @@ xtensa_expand_conditional_branch (rtx *operands, machine_mode mode) - enum rtx_code test_code = GET_CODE (operands[0]); - rtx cmp0 = operands[1]; - rtx cmp1 = operands[2]; -- rtx cmp; -- int invert; -- rtx label1, label2; -+ rtx cmp, label; - - switch (mode) - { -+ case E_SFmode: -+ if (TARGET_HARD_FLOAT) -+ { -+ cmp = gen_float_relational (test_code, cmp0, cmp1); -+ break; -+ } -+ /* FALLTHRU */ -+ - case E_DFmode: - default: - fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1)); - - case E_SImode: -- invert = FALSE; -- cmp = gen_int_relational (test_code, cmp0, cmp1, &invert); -- break; -- -- case E_SFmode: -- if (!TARGET_HARD_FLOAT) -- fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, -- cmp0, cmp1)); -- invert = FALSE; -- cmp = gen_float_relational (test_code, cmp0, cmp1); -+ cmp = gen_int_relational (test_code, cmp0, cmp1); - break; - } - - /* Generate the branch. */ -- -- label1 = gen_rtx_LABEL_REF (VOIDmode, operands[3]); -- label2 = pc_rtx; -- -- if (invert) -- { -- label2 = label1; -- label1 = pc_rtx; -- } -- -+ label = gen_rtx_LABEL_REF (VOIDmode, operands[3]); - emit_jump_insn (gen_rtx_SET (pc_rtx, - gen_rtx_IF_THEN_ELSE (VOIDmode, cmp, -- label1, -- label2))); -+ label, -+ pc_rtx))); - } - - -@@ -1035,6 +1021,123 @@ xtensa_split_operand_pair (rtx operands[4], machine_mode mode) - } - - -+/* Try to emit insns to load srcval (that cannot fit into signed 12-bit) -+ into dst with synthesizing a such constant value from a sequence of -+ load-immediate / arithmetic ones, instead of a L32R instruction -+ (plus a constant in litpool). */ -+ -+static void -+xtensa_emit_constantsynth (rtx dst, enum rtx_code code, -+ HOST_WIDE_INT imm0, HOST_WIDE_INT imm1, -+ rtx (*gen_op)(rtx, HOST_WIDE_INT), -+ HOST_WIDE_INT imm2) -+{ -+ gcc_assert (REG_P (dst)); -+ emit_move_insn (dst, GEN_INT (imm0)); -+ emit_move_insn (dst, gen_rtx_fmt_ee (code, SImode, -+ dst, GEN_INT (imm1))); -+ if (gen_op) -+ emit_move_insn (dst, gen_op (dst, imm2)); -+} -+ -+static int -+xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, -+ rtx (*gen_op)(rtx, HOST_WIDE_INT), -+ HOST_WIDE_INT op_imm) -+{ -+ int shift = exact_log2 (srcval + 1); -+ -+ if (IN_RANGE (shift, 1, 31)) -+ { -+ xtensa_emit_constantsynth (dst, LSHIFTRT, -1, 32 - shift, -+ gen_op, op_imm); -+ return 1; -+ } -+ -+ if (IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) -+ { -+ HOST_WIDE_INT imm0, imm1; -+ -+ if (srcval < -32768) -+ imm1 = -32768; -+ else if (srcval > 32512) -+ imm1 = 32512; -+ else -+ imm1 = srcval & ~255; -+ imm0 = srcval - imm1; -+ if (TARGET_DENSITY && imm1 < 32512 && IN_RANGE (imm0, 224, 255)) -+ imm0 -= 256, imm1 += 256; -+ xtensa_emit_constantsynth (dst, PLUS, imm0, imm1, gen_op, op_imm); -+ return 1; -+ } -+ -+ shift = ctz_hwi (srcval); -+ if (xtensa_simm12b (srcval >> shift)) -+ { -+ xtensa_emit_constantsynth (dst, ASHIFT, srcval >> shift, shift, -+ gen_op, op_imm); -+ return 1; -+ } -+ -+ return 0; -+} -+ -+static rtx -+xtensa_constantsynth_rtx_SLLI (rtx reg, HOST_WIDE_INT imm) -+{ -+ return gen_rtx_ASHIFT (SImode, reg, GEN_INT (imm)); -+} -+ -+static rtx -+xtensa_constantsynth_rtx_ADDSUBX (rtx reg, HOST_WIDE_INT imm) -+{ -+ return imm == 7 -+ ? gen_rtx_MINUS (SImode, gen_rtx_ASHIFT (SImode, reg, GEN_INT (3)), -+ reg) -+ : gen_rtx_PLUS (SImode, gen_rtx_ASHIFT (SImode, reg, -+ GEN_INT (floor_log2 (imm - 1))), -+ reg); -+} -+ -+int -+xtensa_constantsynth (rtx dst, HOST_WIDE_INT srcval) -+{ -+ /* No need for synthesizing for what fits into MOVI instruction. */ -+ if (xtensa_simm12b (srcval)) -+ return 0; -+ -+ /* 2-insns substitution. */ -+ if ((optimize_size || (optimize && xtensa_extra_l32r_costs >= 1)) -+ && xtensa_constantsynth_2insn (dst, srcval, NULL, 0)) -+ return 1; -+ -+ /* 3-insns substitution. */ -+ if (optimize > 1 && !optimize_size && xtensa_extra_l32r_costs >= 2) -+ { -+ int shift, divisor; -+ -+ /* 2-insns substitution followed by SLLI. */ -+ shift = ctz_hwi (srcval); -+ if (IN_RANGE (shift, 1, 31) && -+ xtensa_constantsynth_2insn (dst, srcval >> shift, -+ xtensa_constantsynth_rtx_SLLI, -+ shift)) -+ return 1; -+ -+ /* 2-insns substitution followed by ADDX[248] or SUBX8. */ -+ if (TARGET_ADDX) -+ for (divisor = 3; divisor <= 9; divisor += 2) -+ if (srcval % divisor == 0 && -+ xtensa_constantsynth_2insn (dst, srcval / divisor, -+ xtensa_constantsynth_rtx_ADDSUBX, -+ divisor)) -+ return 1; -+ } -+ -+ return 0; -+} -+ -+ - /* Emit insns to move operands[1] into operands[0]. - Return 1 if we have written out everything that needs to be done to - do the move. Otherwise, return 0 and the caller will emit the move -@@ -1070,24 +1173,9 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) - return 1; - } - -- if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16) -+ if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16 -+ && ! (CONST_INT_P (src) && can_create_pseudo_p ())) - { -- /* Try to emit MOVI + SLLI sequence, that is smaller -- than L32R + literal. */ -- if (optimize >= 1 && ! optimize_debug && mode == SImode -- && CONST_INT_P (src) && register_operand (dst, mode)) -- { -- HOST_WIDE_INT srcval = INTVAL (src); -- int shift = ctz_hwi (srcval); -- -- if (xtensa_simm12b (srcval >> shift)) -- { -- emit_move_insn (dst, GEN_INT (srcval >> shift)); -- emit_insn (gen_ashlsi3_internal (dst, dst, GEN_INT (shift))); -- return 1; -- } -- } -- - src = force_const_mem (SImode, src); - operands[1] = src; - } -@@ -1315,7 +1403,7 @@ xtensa_expand_block_move (rtx *operands) - move_ratio = 4; - if (optimize > 2) - move_ratio = LARGEST_MOVE_RATIO; -- num_pieces = (bytes / align) + (bytes % align); /* Close enough anyway. */ -+ num_pieces = (bytes / align) + ((bytes % align + 1) / 2); - if (num_pieces > move_ratio) - return 0; - -@@ -1352,7 +1440,7 @@ xtensa_expand_block_move (rtx *operands) - temp[next] = gen_reg_rtx (mode[next]); - - x = adjust_address (src_mem, mode[next], offset_ld); -- emit_insn (gen_rtx_SET (temp[next], x)); -+ emit_move_insn (temp[next], x); - - offset_ld += next_amount; - bytes -= next_amount; -@@ -1362,9 +1450,9 @@ xtensa_expand_block_move (rtx *operands) - if (active[phase]) - { - active[phase] = false; -- -+ - x = adjust_address (dst_mem, mode[phase], offset_st); -- emit_insn (gen_rtx_SET (x, temp[phase])); -+ emit_move_insn (x, temp[phase]); - - offset_st += amount[phase]; - } -@@ -1375,6 +1463,246 @@ xtensa_expand_block_move (rtx *operands) - } - - -+/* Try to expand a block set operation to a sequence of RTL move -+ instructions. If not optimizing, or if the block size is not a -+ constant, or if the block is too large, or if the value to -+ initialize the block with is not a constant, the expansion -+ fails and GCC falls back to calling memset(). -+ -+ operands[0] is the destination -+ operands[1] is the length -+ operands[2] is the initialization value -+ operands[3] is the alignment */ -+ -+static int -+xtensa_sizeof_MOVI (HOST_WIDE_INT imm) -+{ -+ return (TARGET_DENSITY && IN_RANGE (imm, -32, 95)) ? 2 : 3; -+} -+ -+int -+xtensa_expand_block_set_unrolled_loop (rtx *operands) -+{ -+ rtx dst_mem = operands[0]; -+ HOST_WIDE_INT bytes, value, align; -+ int expand_len, funccall_len; -+ rtx x, reg; -+ int offset; -+ -+ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) -+ return 0; -+ -+ bytes = INTVAL (operands[1]); -+ if (bytes <= 0) -+ return 0; -+ value = (int8_t)INTVAL (operands[2]); -+ align = INTVAL (operands[3]); -+ if (align > MOVE_MAX) -+ align = MOVE_MAX; -+ -+ /* Insn expansion: holding the init value. -+ Either MOV(.N) or L32R w/litpool. */ -+ if (align == 1) -+ expand_len = xtensa_sizeof_MOVI (value); -+ else if (value == 0 || value == -1) -+ expand_len = TARGET_DENSITY ? 2 : 3; -+ else -+ expand_len = 3 + 4; -+ /* Insn expansion: a series of aligned memory stores. -+ Consist of S8I, S16I or S32I(.N). */ -+ expand_len += (bytes / align) * (TARGET_DENSITY -+ && align == 4 ? 2 : 3); -+ /* Insn expansion: the remainder, sub-aligned memory stores. -+ A combination of S8I and S16I as needed. */ -+ expand_len += ((bytes % align + 1) / 2) * 3; -+ -+ /* Function call: preparing two arguments. */ -+ funccall_len = xtensa_sizeof_MOVI (value); -+ funccall_len += xtensa_sizeof_MOVI (bytes); -+ /* Function call: calling memset(). */ -+ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; -+ -+ /* Apply expansion bonus (2x) if optimizing for speed. */ -+ if (optimize > 1 && !optimize_size) -+ funccall_len *= 2; -+ -+ /* Decide whether to expand or not, based on the sum of the length -+ of instructions. */ -+ if (expand_len > funccall_len) -+ return 0; -+ -+ x = XEXP (dst_mem, 0); -+ if (!REG_P (x)) -+ dst_mem = replace_equiv_address (dst_mem, force_reg (Pmode, x)); -+ switch (align) -+ { -+ case 1: -+ break; -+ case 2: -+ value = (int16_t)((uint8_t)value * 0x0101U); -+ break; -+ case 4: -+ value = (int32_t)((uint8_t)value * 0x01010101U); -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ reg = force_reg (SImode, GEN_INT (value)); -+ -+ offset = 0; -+ do -+ { -+ int unit_size = MIN (bytes, align); -+ machine_mode unit_mode = (unit_size >= 4 ? SImode : -+ (unit_size >= 2 ? HImode : -+ QImode)); -+ unit_size = GET_MODE_SIZE (unit_mode); -+ -+ emit_move_insn (adjust_address (dst_mem, unit_mode, offset), -+ unit_mode == SImode ? reg -+ : convert_to_mode (unit_mode, reg, true)); -+ -+ offset += unit_size; -+ bytes -= unit_size; -+ } -+ while (bytes > 0); -+ -+ return 1; -+} -+ -+int -+xtensa_expand_block_set_small_loop (rtx *operands) -+{ -+ HOST_WIDE_INT bytes, value, align, count; -+ int expand_len, funccall_len; -+ rtx x, dst, end, reg; -+ machine_mode unit_mode; -+ rtx_code_label *label; -+ -+ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) -+ return 0; -+ -+ bytes = INTVAL (operands[1]); -+ if (bytes <= 0) -+ return 0; -+ value = (int8_t)INTVAL (operands[2]); -+ align = INTVAL (operands[3]); -+ if (align > MOVE_MAX) -+ align = MOVE_MAX; -+ -+ /* Totally-aligned block only. */ -+ if (bytes % align != 0) -+ return 0; -+ count = bytes / align; -+ -+ /* If the Loop Option (zero-overhead looping) is configured and active, -+ almost no restrictions about the length of the block. */ -+ if (! (TARGET_LOOPS && optimize)) -+ { -+ /* If 4-byte aligned, small loop substitution is almost optimal, -+ thus limited to only offset to the end address for ADDI/ADDMI -+ instruction. */ -+ if (align == 4 -+ && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) -+ return 0; -+ -+ /* If no 4-byte aligned, loop count should be treated as the -+ constraint. */ -+ if (align != 4 -+ && count > ((optimize > 1 && !optimize_size) ? 8 : 15)) -+ return 0; -+ } -+ -+ /* Insn expansion: holding the init value. -+ Either MOV(.N) or L32R w/litpool. */ -+ if (align == 1) -+ expand_len = xtensa_sizeof_MOVI (value); -+ else if (value == 0 || value == -1) -+ expand_len = TARGET_DENSITY ? 2 : 3; -+ else -+ expand_len = 3 + 4; -+ if (TARGET_LOOPS && optimize) /* zero-overhead looping */ -+ { -+ /* Insn translation: Either MOV(.N) or L32R w/litpool for the -+ loop count. */ -+ expand_len += xtensa_simm12b (count) ? xtensa_sizeof_MOVI (count) -+ : 3 + 4; -+ /* Insn translation: LOOP, the zero-overhead looping setup -+ instruction. */ -+ expand_len += 3; -+ /* Insn expansion: the loop body instructions. -+ For store, one of S8I, S16I or S32I(.N). -+ For advance, ADDI(.N). */ -+ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) -+ + (TARGET_DENSITY ? 2 : 3); -+ } -+ else /* NO zero-overhead looping */ -+ { -+ /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ -+ expand_len += bytes > 127 ? 3 -+ : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; -+ /* Insn expansion: the loop body and branch instruction. -+ For store, one of S8I, S16I or S32I(.N). -+ For advance, ADDI(.N). -+ For branch, BNE. */ -+ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) -+ + (TARGET_DENSITY ? 2 : 3) + 3; -+ } -+ -+ /* Function call: preparing two arguments. */ -+ funccall_len = xtensa_sizeof_MOVI (value); -+ funccall_len += xtensa_sizeof_MOVI (bytes); -+ /* Function call: calling memset(). */ -+ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; -+ -+ /* Apply expansion bonus (2x) if optimizing for speed. */ -+ if (optimize > 1 && !optimize_size) -+ funccall_len *= 2; -+ -+ /* Decide whether to expand or not, based on the sum of the length -+ of instructions. */ -+ if (expand_len > funccall_len) -+ return 0; -+ -+ x = XEXP (operands[0], 0); -+ if (!REG_P (x)) -+ x = XEXP (replace_equiv_address (operands[0], force_reg (Pmode, x)), 0); -+ dst = gen_reg_rtx (SImode); -+ emit_move_insn (dst, x); -+ end = gen_reg_rtx (SImode); -+ if (TARGET_LOOPS && optimize) -+ x = force_reg (SImode, operands[1] /* the length */); -+ else -+ x = operands[1]; -+ emit_insn (gen_addsi3 (end, dst, x)); -+ switch (align) -+ { -+ case 1: -+ unit_mode = QImode; -+ break; -+ case 2: -+ value = (int16_t)((uint8_t)value * 0x0101U); -+ unit_mode = HImode; -+ break; -+ case 4: -+ value = (int32_t)((uint8_t)value * 0x01010101U); -+ unit_mode = SImode; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ reg = force_reg (unit_mode, GEN_INT (value)); -+ -+ label = gen_label_rtx (); -+ emit_label (label); -+ emit_move_insn (gen_rtx_MEM (unit_mode, dst), reg); -+ emit_insn (gen_addsi3 (dst, dst, GEN_INT (align))); -+ emit_cmp_and_jump_insns (dst, end, NE, const0_rtx, SImode, true, label); -+ -+ return 1; -+} -+ -+ - void - xtensa_expand_nonlocal_goto (rtx *operands) - { -@@ -1725,21 +2053,20 @@ xtensa_emit_loop_end (rtx_insn *insn, rtx *operands) - - - char * --xtensa_emit_branch (bool inverted, bool immed, rtx *operands) -+xtensa_emit_branch (bool immed, rtx *operands) - { - static char result[64]; -- enum rtx_code code; -+ enum rtx_code code = GET_CODE (operands[3]); - const char *op; - -- code = GET_CODE (operands[3]); - switch (code) - { -- case EQ: op = inverted ? "ne" : "eq"; break; -- case NE: op = inverted ? "eq" : "ne"; break; -- case LT: op = inverted ? "ge" : "lt"; break; -- case GE: op = inverted ? "lt" : "ge"; break; -- case LTU: op = inverted ? "geu" : "ltu"; break; -- case GEU: op = inverted ? "ltu" : "geu"; break; -+ case EQ: op = "eq"; break; -+ case NE: op = "ne"; break; -+ case LT: op = "lt"; break; -+ case GE: op = "ge"; break; -+ case LTU: op = "ltu"; break; -+ case GEU: op = "geu"; break; - default: gcc_unreachable (); - } - -@@ -1758,32 +2085,6 @@ xtensa_emit_branch (bool inverted, bool immed, rtx *operands) - } - - --char * --xtensa_emit_bit_branch (bool inverted, bool immed, rtx *operands) --{ -- static char result[64]; -- const char *op; -- -- switch (GET_CODE (operands[3])) -- { -- case EQ: op = inverted ? "bs" : "bc"; break; -- case NE: op = inverted ? "bc" : "bs"; break; -- default: gcc_unreachable (); -- } -- -- if (immed) -- { -- unsigned bitnum = INTVAL (operands[1]) & 0x1f; -- operands[1] = GEN_INT (bitnum); -- sprintf (result, "b%si\t%%0, %%d1, %%2", op); -- } -- else -- sprintf (result, "b%s\t%%0, %%1, %%2", op); -- -- return result; --} -- -- - char * - xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - { -@@ -1792,12 +2093,14 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - const char *op; - - code = GET_CODE (operands[4]); -+ if (inverted) -+ code = reverse_condition (code); - if (isbool) - { - switch (code) - { -- case EQ: op = inverted ? "t" : "f"; break; -- case NE: op = inverted ? "f" : "t"; break; -+ case EQ: op = "f"; break; -+ case NE: op = "t"; break; - default: gcc_unreachable (); - } - } -@@ -1805,10 +2108,10 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - { - switch (code) - { -- case EQ: op = inverted ? "nez" : "eqz"; break; -- case NE: op = inverted ? "eqz" : "nez"; break; -- case LT: op = inverted ? "gez" : "ltz"; break; -- case GE: op = inverted ? "ltz" : "gez"; break; -+ case EQ: op = "eqz"; break; -+ case NE: op = "nez"; break; -+ case LT: op = "ltz"; break; -+ case GE: op = "gez"; break; - default: gcc_unreachable (); - } - } -@@ -1819,6 +2122,20 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - } - - -+void -+xtensa_prepare_expand_call (int callop, rtx *operands) -+{ -+ rtx addr = XEXP (operands[callop], 0); -+ -+ if (flag_pic && SYMBOL_REF_P (addr) -+ && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) -+ addr = gen_sym_PLT (addr); -+ -+ if (!call_insn_operand (addr, VOIDmode)) -+ XEXP (operands[callop], 0) = copy_to_mode_reg (Pmode, addr); -+} -+ -+ - char * - xtensa_emit_call (int callop, rtx *operands) - { -@@ -1837,6 +2154,24 @@ xtensa_emit_call (int callop, rtx *operands) - } - - -+char * -+xtensa_emit_sibcall (int callop, rtx *operands) -+{ -+ static char result[64]; -+ rtx tgt = operands[callop]; -+ -+ if (GET_CODE (tgt) == CONST_INT) -+ sprintf (result, "j.l\t" HOST_WIDE_INT_PRINT_HEX ", a9", -+ INTVAL (tgt)); -+ else if (register_operand (tgt, VOIDmode)) -+ sprintf (result, "jx\t%%%d", callop); -+ else -+ sprintf (result, "j.l\t%%%d, a9", callop); -+ -+ return result; -+} -+ -+ - bool - xtensa_legitimate_address_p (machine_mode mode, rtx addr, bool strict) - { -@@ -2061,6 +2396,20 @@ xtensa_tls_referenced_p (rtx x) - } - - -+/* Helper function for "*shlrd_..." patterns. */ -+ -+enum rtx_code -+xtensa_shlrd_which_direction (rtx op0, rtx op1) -+{ -+ if (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT) -+ return ASHIFT; /* shld */ -+ if (GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT) -+ return LSHIFTRT; /* shrd */ -+ -+ return UNKNOWN; -+} -+ -+ - /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ - - static bool -@@ -2364,7 +2713,7 @@ static void - printx (FILE *file, signed int val) - { - /* Print a hexadecimal value in a nice way. */ -- if ((val > -0xa) && (val < 0xa)) -+ if (IN_RANGE (val, -9, 9)) - fprintf (file, "%d", val); - else if (val < 0) - fprintf (file, "-0x%x", -val); -@@ -2379,7 +2728,7 @@ void - print_operand (FILE *file, rtx x, int letter) - { - if (!x) -- error ("PRINT_OPERAND null pointer"); -+ error ("% null pointer"); - - switch (letter) - { -@@ -2424,17 +2773,11 @@ print_operand (FILE *file, rtx x, int letter) - case 'K': - if (GET_CODE (x) == CONST_INT) - { -- int num_bits = 0; - unsigned val = INTVAL (x); -- while (val & 1) -- { -- num_bits += 1; -- val = val >> 1; -- } -- if ((val != 0) || (num_bits == 0) || (num_bits > 16)) -+ if (!xtensa_mask_immediate (val)) - fatal_insn ("invalid mask", x); - -- fprintf (file, "%d", num_bits); -+ fprintf (file, "%d", floor_log2 (val + 1)); - } - else - output_operand_lossage ("invalid %%K value"); -@@ -2584,7 +2927,7 @@ void - print_operand_address (FILE *file, rtx addr) - { - if (!addr) -- error ("PRINT_OPERAND_ADDRESS, null pointer"); -+ error ("%, null pointer"); - - switch (GET_CODE (addr)) - { -@@ -2750,7 +3093,7 @@ xtensa_call_save_reg(int regno) - return crtl->profile || !crtl->is_leaf || crtl->calls_eh_return || - df_regs_ever_live_p (regno); - -- if (crtl->calls_eh_return && regno >= 2 && regno < 4) -+ if (crtl->calls_eh_return && IN_RANGE (regno, 2, 3)) - return true; - - return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno); -@@ -2870,7 +3213,7 @@ xtensa_expand_prologue (void) - int callee_save_size = cfun->machine->callee_save_size; - - /* -128 is a limit of single addi instruction. */ -- if (total_size > 0 && total_size <= 128) -+ if (IN_RANGE (total_size, 1, 128)) - { - insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, - GEN_INT (-total_size))); -@@ -2999,7 +3342,7 @@ xtensa_expand_prologue (void) - } - - void --xtensa_expand_epilogue (void) -+xtensa_expand_epilogue (bool sibcall_p) - { - if (!TARGET_WINDOWED_ABI) - { -@@ -3033,10 +3376,13 @@ xtensa_expand_epilogue (void) - if (xtensa_call_save_reg(regno)) - { - rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); -+ rtx reg; - - offset -= UNITS_PER_WORD; -- emit_move_insn (gen_rtx_REG (SImode, regno), -+ emit_move_insn (reg = gen_rtx_REG (SImode, regno), - gen_frame_mem (SImode, x)); -+ if (regno == A0_REG && sibcall_p) -+ emit_use (reg); - } - } - -@@ -3071,7 +3417,8 @@ xtensa_expand_epilogue (void) - EH_RETURN_STACKADJ_RTX)); - } - cfun->machine->epilogue_done = true; -- emit_jump_insn (gen_return ()); -+ if (!sibcall_p) -+ emit_jump_insn (gen_return ()); - } - - bool -@@ -3697,7 +4044,7 @@ xtensa_multibss_section_type_flags (tree decl, const char *name, int reloc) - flags |= SECTION_BSS; /* @nobits */ - else - warning (0, "only uninitialized variables can be placed in a " -- ".bss section"); -+ "%<.bss%> section"); - } - - return flags; -@@ -3750,7 +4097,7 @@ xtensa_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, - static bool - xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - int opno ATTRIBUTE_UNUSED, -- int *total, bool speed ATTRIBUTE_UNUSED) -+ int *total, bool speed) - { - int code = GET_CODE (x); - -@@ -3838,9 +4185,14 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - return true; - - case CLZ: -+ case CLRSB: - *total = COSTS_N_INSNS (TARGET_NSA ? 1 : 50); - return true; - -+ case BSWAP: -+ *total = COSTS_N_INSNS (mode == HImode ? 3 : 5); -+ return true; -+ - case NOT: - *total = COSTS_N_INSNS (mode == DImode ? 3 : 2); - return true; -@@ -3864,13 +4216,16 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - return true; - - case ABS: -+ case NEG: - { - if (mode == SFmode) - *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50); - else if (mode == DFmode) - *total = COSTS_N_INSNS (50); -- else -+ else if (mode == DImode) - *total = COSTS_N_INSNS (4); -+ else -+ *total = COSTS_N_INSNS (1); - return true; - } - -@@ -3886,10 +4241,6 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - return true; - } - -- case NEG: -- *total = COSTS_N_INSNS (mode == DImode ? 4 : 2); -- return true; -- - case MULT: - { - if (mode == SFmode) -@@ -3929,11 +4280,11 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - case UMOD: - { - if (mode == DImode) -- *total = COSTS_N_INSNS (50); -+ *total = COSTS_N_INSNS (speed ? 100 : 50); - else if (TARGET_DIV32) - *total = COSTS_N_INSNS (32); - else -- *total = COSTS_N_INSNS (50); -+ *total = COSTS_N_INSNS (speed ? 100 : 50); - return true; - } - -@@ -3966,6 +4317,98 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - } - } - -+static bool -+xtensa_is_insn_L32R_p(const rtx_insn *insn) -+{ -+ rtx x = PATTERN (insn); -+ -+ if (GET_CODE (x) == SET) -+ { -+ x = XEXP (x, 1); -+ if (GET_CODE (x) == MEM) -+ { -+ x = XEXP (x, 0); -+ return (GET_CODE (x) == SYMBOL_REF || CONST_INT_P (x)) -+ && CONSTANT_POOL_ADDRESS_P (x); -+ } -+ } -+ -+ return false; -+} -+ -+/* Compute a relative costs of RTL insns. This is necessary in order to -+ achieve better RTL insn splitting/combination result. */ -+ -+static int -+xtensa_insn_cost (rtx_insn *insn, bool speed) -+{ -+ if (!(recog_memoized (insn) < 0)) -+ { -+ int len = get_attr_length (insn), n = (len + 2) / 3; -+ -+ if (len == 0) -+ return COSTS_N_INSNS (0); -+ -+ if (speed) /* For speed cost. */ -+ { -+ /* "L32R" may be particular slow (implementation-dependent). */ -+ if (xtensa_is_insn_L32R_p (insn)) -+ return COSTS_N_INSNS (1 + xtensa_extra_l32r_costs); -+ -+ /* Cost based on the pipeline model. */ -+ switch (get_attr_type (insn)) -+ { -+ case TYPE_STORE: -+ case TYPE_MOVE: -+ case TYPE_ARITH: -+ case TYPE_MULTI: -+ case TYPE_NOP: -+ case TYPE_FSTORE: -+ return COSTS_N_INSNS (n); -+ -+ case TYPE_LOAD: -+ return COSTS_N_INSNS (n - 1 + 2); -+ -+ case TYPE_JUMP: -+ case TYPE_CALL: -+ return COSTS_N_INSNS (n - 1 + 3); -+ -+ case TYPE_FCONV: -+ case TYPE_FLOAD: -+ case TYPE_MUL16: -+ case TYPE_MUL32: -+ case TYPE_RSR: -+ return COSTS_N_INSNS (n * 2); -+ -+ case TYPE_FMADD: -+ return COSTS_N_INSNS (n * 4); -+ -+ case TYPE_DIV32: -+ return COSTS_N_INSNS (n * 16); -+ -+ default: -+ break; -+ } -+ } -+ else /* For size cost. */ -+ { -+ /* Cost based on the instruction length. */ -+ if (get_attr_type (insn) != TYPE_UNKNOWN) -+ { -+ /* "L32R" itself plus constant in litpool. */ -+ if (xtensa_is_insn_L32R_p (insn)) -+ return COSTS_N_INSNS (2) + 1; -+ -+ /* Consider ".n" short instructions. */ -+ return COSTS_N_INSNS (n) - (n * 3 - len); -+ } -+ } -+ } -+ -+ /* Fall back. */ -+ return pattern_cost (PATTERN (insn), speed); -+} -+ - /* Worker function for TARGET_RETURN_IN_MEMORY. */ - - static bool -@@ -4491,4 +4934,16 @@ xtensa_asan_shadow_offset (void) - return HOST_WIDE_INT_UC (0x10000000); - } - -+/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */ -+static bool -+xtensa_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, tree exp ATTRIBUTE_UNUSED) -+{ -+ /* Do not allow sibcalls if the Windowed Register Option is -+ configured. */ -+ if (TARGET_WINDOWED_ABI) -+ return false; -+ -+ return true; -+} -+ - #include "gt-xtensa.h" -diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h -index fa86a245e..3e9cbc943 100644 ---- a/gcc/config/xtensa/xtensa.h -+++ b/gcc/config/xtensa/xtensa.h -@@ -74,6 +74,11 @@ along with GCC; see the file COPYING3. If not see - #define HAVE_AS_TLS 0 - #endif - -+/* Define this if the target has no hardware divide instructions. */ -+#if !TARGET_DIV32 -+#define TARGET_HAS_NO_HW_DIVIDE -+#endif -+ - - /* Target CPU builtins. */ - #define TARGET_CPU_CPP_BUILTINS() \ -@@ -488,7 +493,7 @@ enum reg_class - used for this purpose since all function arguments are pushed on - the stack. */ - #define FUNCTION_ARG_REGNO_P(N) \ -- ((N) >= GP_OUTGOING_ARG_FIRST && (N) <= GP_OUTGOING_ARG_LAST) -+ IN_RANGE ((N), GP_OUTGOING_ARG_FIRST, GP_OUTGOING_ARG_LAST) - - /* Record the number of argument words seen so far, along with a flag to - indicate whether these are incoming arguments. (FUNCTION_INCOMING_ARG -diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md -index 2a8e59ee9..124548dfe 100644 ---- a/gcc/config/xtensa/xtensa.md -+++ b/gcc/config/xtensa/xtensa.md -@@ -25,6 +25,7 @@ - (A7_REG 7) - (A8_REG 8) - (A9_REG 9) -+ (A10_REG 10) - - (UNSPEC_NOP 2) - (UNSPEC_PLT 3) -@@ -83,6 +84,13 @@ - ;; the same template. - (define_mode_iterator HQI [HI QI]) - -+;; This code iterator is for *shlrd and its variants. -+(define_code_iterator ior_op [ior plus]) -+ -+;; This mode iterator allows the DC and SC patterns to be defined from -+;; the same template. -+(define_mode_iterator DSC [DC SC]) -+ - - ;; Attributes. - -@@ -98,7 +106,10 @@ - - ;; Describe a user's asm statement. - (define_asm_attributes -- [(set_attr "type" "multi")]) -+ [(set_attr "type" "multi") -+ (set_attr "mode" "none") -+ (set_attr "length" "3")]) ;; Should be the maximum possible length -+ ;; of a single machine instruction. - - - ;; Pipeline model. -@@ -224,20 +235,42 @@ - - ;; Multiplication. - --(define_expand "mulsidi3" -+(define_expand "mulsidi3" - [(set (match_operand:DI 0 "register_operand") -- (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand")) -- (any_extend:DI (match_operand:SI 2 "register_operand"))))] -+ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand")) -+ (sign_extend:DI (match_operand:SI 2 "register_operand"))))] - "TARGET_MUL32_HIGH" - { - rtx temp = gen_reg_rtx (SImode); - emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); -- emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), -- operands[1], operands[2])); -+ emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), -+ operands[1], operands[2])); - emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); - DONE; - }) - -+(define_expand "umulsidi3" -+ [(set (match_operand:DI 0 "register_operand") -+ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand")) -+ (zero_extend:DI (match_operand:SI 2 "register_operand"))))] -+ "" -+{ -+ if (TARGET_MUL32_HIGH) -+ { -+ rtx temp = gen_reg_rtx (SImode); -+ emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); -+ emit_insn (gen_umulsi3_highpart (gen_highpart (SImode, operands[0]), -+ operands[1], operands[2])); -+ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); -+ } -+ else -+ emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "__umulsidi3"), -+ operands[0], LCT_NORMAL, DImode, -+ operands[1], SImode, -+ operands[2], SImode); -+ DONE; -+}) -+ - (define_insn "mulsi3_highpart" - [(set (match_operand:SI 0 "register_operand" "=a") - (truncate:SI -@@ -261,30 +294,16 @@ - (set_attr "mode" "SI") - (set_attr "length" "3")]) - --(define_insn "mulhisi3" -- [(set (match_operand:SI 0 "register_operand" "=C,A") -- (mult:SI (sign_extend:SI -- (match_operand:HI 1 "register_operand" "%r,r")) -- (sign_extend:SI -- (match_operand:HI 2 "register_operand" "r,r"))))] -- "TARGET_MUL16 || TARGET_MAC16" -- "@ -- mul16s\t%0, %1, %2 -- mul.aa.ll\t%1, %2" -- [(set_attr "type" "mul16,mac16") -- (set_attr "mode" "SI") -- (set_attr "length" "3,3")]) -- --(define_insn "umulhisi3" -+(define_insn "mulhisi3" - [(set (match_operand:SI 0 "register_operand" "=C,A") -- (mult:SI (zero_extend:SI -+ (mult:SI (any_extend:SI - (match_operand:HI 1 "register_operand" "%r,r")) -- (zero_extend:SI -+ (any_extend:SI - (match_operand:HI 2 "register_operand" "r,r"))))] - "TARGET_MUL16 || TARGET_MAC16" - "@ -- mul16u\t%0, %1, %2 -- umul.aa.ll\t%1, %2" -+ mul16\t%0, %1, %2 -+ mul.aa.ll\t%1, %2" - [(set_attr "type" "mul16,mac16") - (set_attr "mode" "SI") - (set_attr "length" "3,3")]) -@@ -429,7 +448,17 @@ - (set_attr "length" "3")]) - - --;; Count leading/trailing zeros and find first bit. -+;; Count redundant leading sign bits and leading/trailing zeros, -+;; and find first bit. -+ -+(define_insn "clrsbsi2" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (clrsb:SI (match_operand:SI 1 "register_operand" "r")))] -+ "TARGET_NSA" -+ "nsa\t%0, %1" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "3")]) - - (define_insn "clzsi2" - [(set (match_operand:SI 0 "register_operand" "=a") -@@ -471,23 +500,78 @@ - - ;; Byte swap. - --(define_insn "bswapsi2" -- [(set (match_operand:SI 0 "register_operand" "=&a") -- (bswap:SI (match_operand:SI 1 "register_operand" "r")))] -- "!optimize_size" -- "ssai\t8\;srli\t%0, %1, 16\;src\t%0, %0, %1\;src\t%0, %0, %0\;src\t%0, %1, %0" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "15")]) -+(define_insn "bswaphi2" -+ [(set (match_operand:HI 0 "register_operand" "=a") -+ (bswap:HI (match_operand:HI 1 "register_operand" "r"))) -+ (clobber (match_scratch:HI 2 "=&a"))] -+ "" -+ "extui\t%2, %1, 8, 8\;slli\t%0, %1, 8\;or\t%0, %0, %2" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "HI") -+ (set_attr "length" "9")]) - --(define_insn "bswapdi2" -- [(set (match_operand:DI 0 "register_operand" "=&a") -- (bswap:DI (match_operand:DI 1 "register_operand" "r")))] -- "!optimize_size" -- "ssai\t8\;srli\t%0, %D1, 16\;src\t%0, %0, %D1\;src\t%0, %0, %0\;src\t%0, %D1, %0\;srli\t%D0, %1, 16\;src\t%D0, %D0, %1\;src\t%D0, %D0, %D0\;src\t%D0, %1, %D0" -- [(set_attr "type" "arith") -- (set_attr "mode" "DI") -- (set_attr "length" "27")]) -+(define_expand "bswapsi2" -+ [(set (match_operand:SI 0 "register_operand" "") -+ (bswap:SI (match_operand:SI 1 "register_operand" "")))] -+ "!optimize_debug && optimize > 1" -+{ -+ /* GIMPLE manual byte-swapping recognition is now activated. -+ For both built-in and manual bswaps, emit corresponding library call -+ if optimizing for size, or a series of dedicated machine instructions -+ if otherwise. */ -+ if (optimize_size) -+ emit_library_call_value (optab_libfunc (bswap_optab, SImode), -+ operands[0], LCT_NORMAL, SImode, -+ operands[1], SImode); -+ else -+ emit_insn (gen_bswapsi2_internal (operands[0], operands[1])); -+ DONE; -+}) -+ -+(define_insn "bswapsi2_internal" -+ [(set (match_operand:SI 0 "register_operand" "=a,&a") -+ (bswap:SI (match_operand:SI 1 "register_operand" "0,r"))) -+ (clobber (match_scratch:SI 2 "=&a,X"))] -+ "!optimize_debug && optimize > 1 && !optimize_size" -+{ -+ rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn); -+ const char *init = "ssai\t8\;"; -+ static char result[64]; -+ if (prev_insn && NONJUMP_INSN_P (prev_insn)) -+ { -+ rtx x = PATTERN (prev_insn); -+ if (GET_CODE (x) == PARALLEL && XVECLEN (x, 0) == 2 -+ && GET_CODE (XVECEXP (x, 0, 0)) == SET -+ && GET_CODE (XVECEXP (x, 0, 1)) == CLOBBER) -+ { -+ x = XEXP (XVECEXP (x, 0, 0), 1); -+ if (GET_CODE (x) == BSWAP && GET_MODE (x) == SImode) -+ init = ""; -+ } -+ } -+ sprintf (result, -+ (which_alternative == 0) -+ ? "%s" "srli\t%%2, %%1, 16\;src\t%%2, %%2, %%1\;src\t%%2, %%2, %%2\;src\t%%0, %%1, %%2" -+ : "%s" "srli\t%%0, %%1, 16\;src\t%%0, %%0, %%1\;src\t%%0, %%0, %%0\;src\t%%0, %%1, %%0", -+ init); -+ return result; -+} -+ [(set_attr "type" "arith,arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "15,15")]) -+ -+(define_expand "bswapdi2" -+ [(set (match_operand:DI 0 "register_operand" "") -+ (bswap:DI (match_operand:DI 1 "register_operand" "")))] -+ "!optimize_debug && optimize > 1 && optimize_size" -+{ -+ /* Replace with a single DImode library call. -+ Without this, two SImode library calls are emitted. */ -+ emit_library_call_value (optab_libfunc (bswap_optab, DImode), -+ operands[0], LCT_NORMAL, DImode, -+ operands[1], DImode); -+ DONE; -+}) - - - ;; Negation and one's complement. -@@ -501,16 +585,26 @@ - (set_attr "mode" "SI") - (set_attr "length" "3")]) - --(define_expand "one_cmplsi2" -- [(set (match_operand:SI 0 "register_operand" "") -- (not:SI (match_operand:SI 1 "register_operand" "")))] -+(define_insn_and_split "one_cmplsi2" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (not:SI (match_operand:SI 1 "register_operand" "r")))] - "" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 2) -+ (const_int -1)) -+ (set (match_dup 0) -+ (xor:SI (match_dup 1) -+ (match_dup 2)))] - { -- rtx temp = gen_reg_rtx (SImode); -- emit_insn (gen_movsi (temp, constm1_rtx)); -- emit_insn (gen_xorsi3 (operands[0], temp, operands[1])); -- DONE; --}) -+ operands[2] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 5) -+ (const_int 6)))]) - - (define_insn "negsf2" - [(set (match_operand:SF 0 "register_operand" "=f") -@@ -536,6 +630,103 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,3")]) - -+(define_insn_and_split "*andsi3_bitcmpl" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (not:SI (match_operand:SI 1 "register_operand" "r")) -+ (match_operand:SI 2 "register_operand" "r")))] -+ "" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 3) -+ (and:SI (match_dup 1) -+ (match_dup 2))) -+ (set (match_dup 0) -+ (xor:SI (match_dup 3) -+ (match_dup 2)))] -+{ -+ operands[3] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*andsi3_const_pow2_minus_one" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "const_int_operand" "i")))] -+ "IN_RANGE (exact_log2 (INTVAL (operands[2]) + 1), 17, 31)" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (ashift:SI (match_dup 1) -+ (match_dup 2))) -+ (set (match_dup 0) -+ (lshiftrt:SI (match_dup 0) -+ (match_dup 2)))] -+{ -+ operands[2] = GEN_INT (32 - floor_log2 (INTVAL (operands[2]) + 1)); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && INTVAL (operands[2]) == 0x7FFFFFFF") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_insn_and_split "*andsi3_const_negative_pow2" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "const_int_operand" "i")))] -+ "IN_RANGE (exact_log2 (-INTVAL (operands[2])), 12, 31)" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (lshiftrt:SI (match_dup 1) -+ (match_dup 2))) -+ (set (match_dup 0) -+ (ashift:SI (match_dup 0) -+ (match_dup 2)))] -+{ -+ operands[2] = GEN_INT (floor_log2 (-INTVAL (operands[2]))); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*andsi3_const_shifted_mask" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "shifted_mask_operand" "i")))] -+ "! xtensa_simm12b (INTVAL (operands[2]))" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (zero_extract:SI (match_dup 1) -+ (match_dup 3) -+ (match_dup 4))) -+ (set (match_dup 0) -+ (ashift:SI (match_dup 0) -+ (match_dup 2)))] -+{ -+ HOST_WIDE_INT mask = INTVAL (operands[2]); -+ int shift = ctz_hwi (mask); -+ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); -+ int mask_pos = shift; -+ if (BITS_BIG_ENDIAN) -+ mask_pos = (32 - (mask_size + shift)) & 0x1f; -+ operands[2] = GEN_INT (shift); -+ operands[3] = GEN_INT (mask_size); -+ operands[4] = GEN_INT (mask_pos); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && ctz_hwi (INTVAL (operands[2])) == 1") -+ (const_int 5) -+ (const_int 6)))]) -+ - (define_insn "iorsi3" - [(set (match_operand:SI 0 "register_operand" "=a") - (ior:SI (match_operand:SI 1 "register_operand" "%r") -@@ -634,7 +825,7 @@ - - ;; Field extract instructions. - --(define_expand "extv" -+(define_expand "extvsi" - [(set (match_operand:SI 0 "register_operand" "") - (sign_extract:SI (match_operand:SI 1 "register_operand" "") - (match_operand:SI 2 "const_int_operand" "") -@@ -649,12 +840,12 @@ - if (!lsbitnum_operand (operands[3], SImode)) - FAIL; - -- emit_insn (gen_extv_internal (operands[0], operands[1], -- operands[2], operands[3])); -+ emit_insn (gen_extvsi_internal (operands[0], operands[1], -+ operands[2], operands[3])); - DONE; - }) - --(define_insn "extv_internal" -+(define_insn "extvsi_internal" - [(set (match_operand:SI 0 "register_operand" "=a") - (sign_extract:SI (match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "sext_fldsz_operand" "i") -@@ -669,7 +860,7 @@ - (set_attr "mode" "SI") - (set_attr "length" "3")]) - --(define_expand "extzv" -+(define_expand "extzvsi" - [(set (match_operand:SI 0 "register_operand" "") - (zero_extract:SI (match_operand:SI 1 "register_operand" "") - (match_operand:SI 2 "const_int_operand" "") -@@ -678,12 +869,12 @@ - { - if (!extui_fldsz_operand (operands[2], SImode)) - FAIL; -- emit_insn (gen_extzv_internal (operands[0], operands[1], -- operands[2], operands[3])); -+ emit_insn (gen_extzvsi_internal (operands[0], operands[1], -+ operands[2], operands[3])); - DONE; - }) - --(define_insn "extzv_internal" -+(define_insn "extzvsi_internal" - [(set (match_operand:SI 0 "register_operand" "=a") - (zero_extract:SI (match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "extui_fldsz_operand" "i") -@@ -757,11 +948,14 @@ - because of offering further optimization opportunities. */ - if (register_operand (operands[0], DImode)) - { -- rtx first, second; -- -- split_double (operands[1], &first, &second); -- emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), first)); -- emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), second)); -+ rtx lowpart, highpart; -+ -+ if (TARGET_BIG_ENDIAN) -+ split_double (operands[1], &highpart, &lowpart); -+ else -+ split_double (operands[1], &lowpart, &highpart); -+ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), lowpart)); -+ emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), highpart)); - DONE; - } - -@@ -782,7 +976,7 @@ - "register_operand (operands[0], DImode) - || register_operand (operands[1], DImode)" - "#" -- "reload_completed" -+ "&& reload_completed" - [(set (match_dup 0) (match_dup 2)) - (set (match_dup 1) (match_dup 3))] - { -@@ -831,6 +1025,19 @@ - (set_attr "mode" "SI") - (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")]) - -+(define_split -+ [(set (match_operand:SI 0 "register_operand") -+ (match_operand:SI 1 "constantpool_operand"))] -+ "! optimize_debug && reload_completed" -+ [(const_int 0)] -+{ -+ rtx x = avoid_constant_pool_reference (operands[1]); -+ if (! CONST_INT_P (x)) -+ FAIL; -+ if (! xtensa_constantsynth (operands[0], INTVAL (x))) -+ emit_move_insn (operands[0], x); -+}) -+ - ;; 16-bit Integer moves - - (define_expand "movhi" -@@ -1035,6 +1242,43 @@ - (set_attr "mode" "SF") - (set_attr "length" "3")]) - -+(define_split -+ [(set (match_operand:SF 0 "register_operand") -+ (match_operand:SF 1 "constantpool_operand"))] -+ "! optimize_debug && reload_completed" -+ [(const_int 0)] -+{ -+ int i = 0; -+ rtx x = XEXP (operands[1], 0); -+ long l[2]; -+ if (GET_CODE (x) == SYMBOL_REF -+ && CONSTANT_POOL_ADDRESS_P (x)) -+ x = get_pool_constant (x); -+ else if (GET_CODE (x) == CONST) -+ { -+ x = XEXP (x, 0); -+ gcc_assert (GET_CODE (x) == PLUS -+ && GET_CODE (XEXP (x, 0)) == SYMBOL_REF -+ && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) -+ && CONST_INT_P (XEXP (x, 1))); -+ i = INTVAL (XEXP (x, 1)); -+ gcc_assert (i == 0 || i == 4); -+ i /= 4; -+ x = get_pool_constant (XEXP (x, 0)); -+ } -+ else -+ gcc_unreachable (); -+ if (GET_MODE (x) == SFmode) -+ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l[0]); -+ else if (GET_MODE (x) == DFmode) -+ REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); -+ else -+ FAIL; -+ x = gen_rtx_REG (SImode, REGNO (operands[0])); -+ if (! xtensa_constantsynth (x, l[i])) -+ emit_move_insn (x, GEN_INT (l[i])); -+}) -+ - ;; 64-bit floating point moves - - (define_expand "movdf" -@@ -1058,7 +1302,7 @@ - "register_operand (operands[0], DFmode) - || register_operand (operands[1], DFmode)" - "#" -- "reload_completed" -+ "&& reload_completed" - [(set (match_dup 0) (match_dup 2)) - (set (match_dup 1) (match_dup 3))] - { -@@ -1085,6 +1329,22 @@ - DONE; - }) - -+;; Block sets -+ -+(define_expand "setmemsi" -+ [(match_operand:BLK 0 "memory_operand") -+ (match_operand:SI 1 "") -+ (match_operand:SI 2 "") -+ (match_operand:SI 3 "const_int_operand")] -+ "!optimize_debug && optimize" -+{ -+ if (xtensa_expand_block_set_unrolled_loop (operands)) -+ DONE; -+ if (xtensa_expand_block_set_small_loop (operands)) -+ DONE; -+ FAIL; -+}) -+ - - ;; Shift instructions. - -@@ -1097,16 +1357,6 @@ - operands[1] = xtensa_copy_incoming_a7 (operands[1]); - }) - --(define_insn "*ashlsi3_1" -- [(set (match_operand:SI 0 "register_operand" "=a") -- (ashift:SI (match_operand:SI 1 "register_operand" "r") -- (const_int 1)))] -- "TARGET_DENSITY" -- "add.n\t%0, %1, %1" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "2")]) -- - (define_insn "ashlsi3_internal" - [(set (match_operand:SI 0 "register_operand" "=a,a") - (ashift:SI (match_operand:SI 1 "register_operand" "r,r") -@@ -1119,16 +1369,14 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,6")]) - --(define_insn "*ashlsi3_3x" -- [(set (match_operand:SI 0 "register_operand" "=a") -- (ashift:SI (match_operand:SI 1 "register_operand" "r") -- (ashift:SI (match_operand:SI 2 "register_operand" "r") -- (const_int 3))))] -- "" -- "ssa8b\t%2\;sll\t%0, %1" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "6")]) -+(define_split -+ [(set (match_operand:SI 0 "register_operand") -+ (ashift:SI (match_operand:SI 1 "register_operand") -+ (const_int 1)))] -+ "TARGET_DENSITY" -+ [(set (match_dup 0) -+ (plus:SI (match_dup 1) -+ (match_dup 1)))]) - - (define_insn "ashrsi3" - [(set (match_operand:SI 0 "register_operand" "=a,a") -@@ -1142,17 +1390,6 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,6")]) - --(define_insn "*ashrsi3_3x" -- [(set (match_operand:SI 0 "register_operand" "=a") -- (ashiftrt:SI (match_operand:SI 1 "register_operand" "r") -- (ashift:SI (match_operand:SI 2 "register_operand" "r") -- (const_int 3))))] -- "" -- "ssa8l\t%2\;sra\t%0, %1" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "6")]) -- - (define_insn "lshrsi3" - [(set (match_operand:SI 0 "register_operand" "=a,a") - (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r") -@@ -1162,9 +1399,9 @@ - if (which_alternative == 0) - { - if ((INTVAL (operands[2]) & 0x1f) < 16) -- return "srli\t%0, %1, %R2"; -+ return "srli\t%0, %1, %R2"; - else -- return "extui\t%0, %1, %R2, %L2"; -+ return "extui\t%0, %1, %R2, %L2"; - } - return "ssr\t%2\;srl\t%0, %1"; - } -@@ -1172,13 +1409,170 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,6")]) - --(define_insn "*lshrsi3_3x" -+(define_insn "*shift_per_byte" - [(set (match_operand:SI 0 "register_operand" "=a") -- (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") -- (ashift:SI (match_operand:SI 2 "register_operand" "r") -- (const_int 3))))] -+ (match_operator:SI 3 "xtensa_shift_per_byte_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3))]))] -+ "!optimize_debug && optimize" -+{ -+ switch (GET_CODE (operands[3])) -+ { -+ case ASHIFT: return "ssa8b\t%2\;sll\t%0, %1"; -+ case ASHIFTRT: return "ssa8l\t%2\;sra\t%0, %1"; -+ case LSHIFTRT: return "ssa8l\t%2\;srl\t%0, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*shift_per_byte_omit_AND_0" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (match_operator:SI 4 "xtensa_shift_per_byte_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3)) -+ (match_operand:SI 3 "const_int_operand" "i"))]))] -+ "!optimize_debug && optimize -+ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (match_op_dup 4 -+ [(match_dup 1) -+ (ashift:SI (match_dup 2) -+ (const_int 3))]))] -+ "" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*shift_per_byte_omit_AND_1" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (match_operator:SI 4 "xtensa_shift_per_byte_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (neg:SI (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3)) -+ (match_operand:SI 3 "const_int_operand" "i")))]))] -+ "!optimize_debug && optimize -+ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 5) -+ (neg:SI (match_dup 2))) -+ (set (match_dup 0) -+ (match_op_dup 4 -+ [(match_dup 1) -+ (ashift:SI (match_dup 5) -+ (const_int 3))]))] -+{ -+ operands[5] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "9")]) -+ -+(define_insn "*shlrd_reg_" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "register_operand" "r")]) -+ (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 3 "register_operand" "r") -+ (neg:SI (match_dup 2))])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" -+{ -+ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) -+ { -+ case ASHIFT: return "ssl\t%2\;src\t%0, %1, %3"; -+ case LSHIFTRT: return "ssr\t%2\;src\t%0, %3, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn "*shlrd_const_" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 3 "const_int_operand" "i")]) -+ (match_operator:SI 6 "logical_shift_operator" -+ [(match_operand:SI 2 "register_operand" "r") -+ (match_operand:SI 4 "const_int_operand" "i")])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN -+ && IN_RANGE (INTVAL (operands[3]), 1, 31) -+ && IN_RANGE (INTVAL (operands[4]), 1, 31) -+ && INTVAL (operands[3]) + INTVAL (operands[4]) == 32" -+{ -+ switch (xtensa_shlrd_which_direction (operands[5], operands[6])) -+ { -+ case ASHIFT: return "ssai\t%L3\;src\t%0, %1, %2"; -+ case LSHIFTRT: return "ssai\t%R3\;src\t%0, %2, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn "*shlrd_per_byte_" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3))]) -+ (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 3 "register_operand" "r") -+ (neg:SI (ashift:SI (match_dup 2) -+ (const_int 3)))])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" -+{ -+ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) -+ { -+ case ASHIFT: return "ssa8b\t%2\;src\t%0, %1, %3"; -+ case LSHIFTRT: return "ssa8l\t%2\;src\t%0, %3, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*shlrd_per_byte__omit_AND" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3)) -+ (match_operand:SI 4 "const_int_operand" "i"))]) -+ (match_operator:SI 6 "logical_shift_operator" -+ [(match_operand:SI 3 "register_operand" "r") -+ (neg:SI (and:SI (ashift:SI (match_dup 2) -+ (const_int 3)) -+ (match_dup 4)))])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN -+ && (INTVAL (operands[4]) & 0x1f) == 3 << 3" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (ior_op:SI (match_op_dup 5 -+ [(match_dup 1) -+ (ashift:SI (match_dup 2) -+ (const_int 3))]) -+ (match_op_dup 6 -+ [(match_dup 3) -+ (neg:SI (ashift:SI (match_dup 2) -+ (const_int 3)))])))] - "" -- "ssa8l\t%2\;srl\t%0, %1" - [(set_attr "type" "arith") - (set_attr "mode" "SI") - (set_attr "length" "6")]) -@@ -1239,28 +1633,13 @@ - (define_insn "*btrue" - [(set (pc) - (if_then_else (match_operator 3 "branch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "branch_operand" "K,r")]) -+ [(match_operand:SI 0 "register_operand" "r,r") -+ (match_operand:SI 1 "branch_operand" "K,r")]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { -- return xtensa_emit_branch (false, which_alternative == 0, operands); --} -- [(set_attr "type" "jump,jump") -- (set_attr "mode" "none") -- (set_attr "length" "3,3")]) -- --(define_insn "*bfalse" -- [(set (pc) -- (if_then_else (match_operator 3 "branch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "branch_operand" "K,r")]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -- "" --{ -- return xtensa_emit_branch (true, which_alternative == 0, operands); -+ return xtensa_emit_branch (which_alternative == 0, operands); - } - [(set_attr "type" "jump,jump") - (set_attr "mode" "none") -@@ -1269,28 +1648,13 @@ - (define_insn "*ubtrue" - [(set (pc) - (if_then_else (match_operator 3 "ubranch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "ubranch_operand" "L,r")]) -+ [(match_operand:SI 0 "register_operand" "r,r") -+ (match_operand:SI 1 "ubranch_operand" "L,r")]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { -- return xtensa_emit_branch (false, which_alternative == 0, operands); --} -- [(set_attr "type" "jump,jump") -- (set_attr "mode" "none") -- (set_attr "length" "3,3")]) -- --(define_insn "*ubfalse" -- [(set (pc) -- (if_then_else (match_operator 3 "ubranch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "ubranch_operand" "L,r")]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -- "" --{ -- return xtensa_emit_branch (true, which_alternative == 0, operands); -+ return xtensa_emit_branch (which_alternative == 0, operands); - } - [(set_attr "type" "jump,jump") - (set_attr "mode" "none") -@@ -1301,80 +1665,178 @@ - (define_insn "*bittrue" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(zero_extract:SI -- (match_operand:SI 0 "register_operand" "r,r") -- (const_int 1) -- (match_operand:SI 1 "arith_operand" "J,r")) -+ [(zero_extract:SI (match_operand:SI 0 "register_operand" "r,r") -+ (const_int 1) -+ (match_operand:SI 1 "arith_operand" "J,r")) - (const_int 0)]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { -- return xtensa_emit_bit_branch (false, which_alternative == 0, operands); -+ static char result[64]; -+ char op; -+ switch (GET_CODE (operands[3])) -+ { -+ case EQ: op = 'c'; break; -+ case NE: op = 's'; break; -+ default: gcc_unreachable (); -+ } -+ if (which_alternative == 0) -+ { -+ operands[1] = GEN_INT (INTVAL (operands[1]) & 0x1f); -+ sprintf (result, "bb%ci\t%%0, %%d1, %%2", op); -+ } -+ else -+ sprintf (result, "bb%c\t%%0, %%1, %%2", op); -+ return result; - } - [(set_attr "type" "jump") - (set_attr "mode" "none") - (set_attr "length" "3")]) - --(define_insn "*bitfalse" -+(define_insn "*masktrue" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(zero_extract:SI -- (match_operand:SI 0 "register_operand" "r,r") -- (const_int 1) -- (match_operand:SI 1 "arith_operand" "J,r")) -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "register_operand" "r")) - (const_int 0)]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -+ (label_ref (match_operand 2 "" "")) -+ (pc)))] - "" - { -- return xtensa_emit_bit_branch (true, which_alternative == 0, operands); -+ switch (GET_CODE (operands[3])) -+ { -+ case EQ: return "bnone\t%0, %1, %2"; -+ case NE: return "bany\t%0, %1, %2"; -+ default: gcc_unreachable (); -+ } - } - [(set_attr "type" "jump") - (set_attr "mode" "none") - (set_attr "length" "3")]) - --(define_insn "*masktrue" -+(define_insn "*masktrue_bitcmpl" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(and:SI (match_operand:SI 0 "register_operand" "r") -- (match_operand:SI 1 "register_operand" "r")) -- (const_int 0)]) -+ [(and:SI (not:SI (match_operand:SI 0 "register_operand" "r")) -+ (match_operand:SI 1 "register_operand" "r")) -+ (const_int 0)]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { - switch (GET_CODE (operands[3])) - { -- case EQ: return "bnone\t%0, %1, %2"; -- case NE: return "bany\t%0, %1, %2"; -- default: gcc_unreachable (); -+ case EQ: return "ball\t%0, %1, %2"; -+ case NE: return "bnall\t%0, %1, %2"; -+ default: gcc_unreachable (); - } - } - [(set_attr "type" "jump") - (set_attr "mode" "none") - (set_attr "length" "3")]) - --(define_insn "*maskfalse" -+(define_insn_and_split "*masktrue_const_pow2_minus_one" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(and:SI (match_operand:SI 0 "register_operand" "r") -- (match_operand:SI 1 "register_operand" "r")) -- (const_int 0)]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -- "" -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "const_int_operand" "i")) -+ (const_int 0)]) -+ (label_ref (match_operand 2 "" "")) -+ (pc)))] -+ "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31)" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 4) -+ (ashift:SI (match_dup 0) -+ (match_dup 1))) -+ (set (pc) -+ (if_then_else (match_op_dup 3 -+ [(match_dup 4) -+ (const_int 0)]) -+ (label_ref (match_dup 2)) -+ (pc)))] - { -- switch (GET_CODE (operands[3])) -- { -- case EQ: return "bany\t%0, %1, %2"; -- case NE: return "bnone\t%0, %1, %2"; -- default: gcc_unreachable (); -- } -+ operands[1] = GEN_INT (32 - floor_log2 (INTVAL (operands[1]) + 1)); -+ operands[4] = gen_reg_rtx (SImode); - } - [(set_attr "type" "jump") - (set_attr "mode" "none") -- (set_attr "length" "3")]) -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && INTVAL (operands[1]) == 0x7FFFFFFF") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_insn_and_split "*masktrue_const_negative_pow2" -+ [(set (pc) -+ (if_then_else (match_operator 3 "boolean_operator" -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "const_int_operand" "i")) -+ (const_int 0)]) -+ (label_ref (match_operand 2 "" "")) -+ (pc)))] -+ "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 12, 30)" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 4) -+ (lshiftrt:SI (match_dup 0) -+ (match_dup 1))) -+ (set (pc) -+ (if_then_else (match_op_dup 3 -+ [(match_dup 4) -+ (const_int 0)]) -+ (label_ref (match_dup 2)) -+ (pc)))] -+{ -+ operands[1] = GEN_INT (floor_log2 (-INTVAL (operands[1]))); -+ operands[4] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "jump") -+ (set_attr "mode" "none") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*masktrue_const_shifted_mask" -+ [(set (pc) -+ (if_then_else (match_operator 4 "boolean_operator" -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "shifted_mask_operand" "i")) -+ (match_operand:SI 2 "const_int_operand" "i")]) -+ (label_ref (match_operand 3 "" "")) -+ (pc)))] -+ "(INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 -+ && xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 6) -+ (zero_extract:SI (match_dup 0) -+ (match_dup 5) -+ (match_dup 1))) -+ (set (pc) -+ (if_then_else (match_op_dup 4 -+ [(match_dup 6) -+ (match_dup 2)]) -+ (label_ref (match_dup 3)) -+ (pc)))] -+{ -+ HOST_WIDE_INT mask = INTVAL (operands[1]); -+ int shift = ctz_hwi (mask); -+ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); -+ int mask_pos = shift; -+ if (BITS_BIG_ENDIAN) -+ mask_pos = (32 - (mask_size + shift)) & 0x1f; -+ operands[1] = GEN_INT (mask_pos); -+ operands[2] = GEN_INT ((uint32_t)INTVAL (operands[2]) >> shift); -+ operands[5] = GEN_INT (mask_size); -+ operands[6] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "jump") -+ (set_attr "mode" "none") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && (uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])) == 0") -+ (const_int 5) -+ (const_int 6)))]) - - - ;; Zero-overhead looping support. -@@ -1696,18 +2158,13 @@ - (match_operand 1 "" ""))] - "" - { -- rtx addr = XEXP (operands[0], 0); -- if (flag_pic && GET_CODE (addr) == SYMBOL_REF -- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) -- addr = gen_sym_PLT (addr); -- if (!call_insn_operand (addr, VOIDmode)) -- XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr); -+ xtensa_prepare_expand_call (0, operands); - }) - - (define_insn "call_internal" - [(call (mem (match_operand:SI 0 "call_insn_operand" "nir")) - (match_operand 1 "" "i"))] -- "" -+ "!SIBLING_CALL_P (insn)" - { - return xtensa_emit_call (0, operands); - } -@@ -1721,19 +2178,14 @@ - (match_operand 2 "" "")))] - "" - { -- rtx addr = XEXP (operands[1], 0); -- if (flag_pic && GET_CODE (addr) == SYMBOL_REF -- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) -- addr = gen_sym_PLT (addr); -- if (!call_insn_operand (addr, VOIDmode)) -- XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr); -+ xtensa_prepare_expand_call (1, operands); - }) - - (define_insn "call_value_internal" - [(set (match_operand 0 "register_operand" "=a") - (call (mem (match_operand:SI 1 "call_insn_operand" "nir")) - (match_operand 2 "" "i")))] -- "" -+ "!SIBLING_CALL_P (insn)" - { - return xtensa_emit_call (1, operands); - } -@@ -1741,6 +2193,70 @@ - (set_attr "mode" "none") - (set_attr "length" "3")]) - -+(define_expand "sibcall" -+ [(call (match_operand 0 "memory_operand" "") -+ (match_operand 1 "" ""))] -+ "!TARGET_WINDOWED_ABI" -+{ -+ xtensa_prepare_expand_call (0, operands); -+}) -+ -+(define_insn "sibcall_internal" -+ [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nir")) -+ (match_operand 1 "" "i"))] -+ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" -+{ -+ return xtensa_emit_sibcall (0, operands); -+} -+ [(set_attr "type" "call") -+ (set_attr "mode" "none") -+ (set_attr "length" "3")]) -+ -+(define_split -+ [(call (mem:SI (match_operand:SI 0 "register_operand")) -+ (match_operand 1 ""))] -+ "reload_completed -+ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) -+ && IN_RANGE (REGNO (operands[0]), 12, 15)" -+ [(set (reg:SI A10_REG) -+ (match_dup 0)) -+ (call (mem:SI (reg:SI A10_REG)) -+ (match_dup 1))]) -+ -+(define_expand "sibcall_value" -+ [(set (match_operand 0 "register_operand" "") -+ (call (match_operand 1 "memory_operand" "") -+ (match_operand 2 "" "")))] -+ "!TARGET_WINDOWED_ABI" -+{ -+ xtensa_prepare_expand_call (1, operands); -+}) -+ -+(define_insn "sibcall_value_internal" -+ [(set (match_operand 0 "register_operand" "=a") -+ (call (mem:SI (match_operand:SI 1 "call_insn_operand" "nir")) -+ (match_operand 2 "" "i")))] -+ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" -+{ -+ return xtensa_emit_sibcall (1, operands); -+} -+ [(set_attr "type" "call") -+ (set_attr "mode" "none") -+ (set_attr "length" "3")]) -+ -+(define_split -+ [(set (match_operand 0 "register_operand") -+ (call (mem:SI (match_operand:SI 1 "register_operand")) -+ (match_operand 2 "")))] -+ "reload_completed -+ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) -+ && IN_RANGE (REGNO (operands[1]), 12, 15)" -+ [(set (reg:SI A10_REG) -+ (match_dup 1)) -+ (set (match_dup 0) -+ (call (mem:SI (reg:SI A10_REG)) -+ (match_dup 2)))]) -+ - (define_insn "entry" - [(set (reg:SI A1_REG) - (unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")] -@@ -1762,7 +2278,10 @@ - } - [(set_attr "type" "jump") - (set_attr "mode" "none") -- (set_attr "length" "2")]) -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 2) -+ (const_int 3)))]) - - - ;; Miscellaneous instructions. -@@ -1805,7 +2324,15 @@ - [(return)] - "" - { -- xtensa_expand_epilogue (); -+ xtensa_expand_epilogue (false); -+ DONE; -+}) -+ -+(define_expand "sibcall_epilogue" -+ [(return)] -+ "!TARGET_WINDOWED_ABI" -+{ -+ xtensa_expand_epilogue (true); - DONE; - }) - -@@ -1817,7 +2344,10 @@ - } - [(set_attr "type" "nop") - (set_attr "mode" "none") -- (set_attr "length" "3")]) -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 2) -+ (const_int 3)))]) - - (define_expand "nonlocal_goto" - [(match_operand:SI 0 "general_operand" "") -@@ -1881,8 +2411,9 @@ - [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] - "" - "" -- [(set_attr "length" "0") -- (set_attr "type" "nop")]) -+ [(set_attr "type" "nop") -+ (set_attr "mode" "none") -+ (set_attr "length" "0")]) - - ;; Do not schedule instructions accessing memory before this point. - -@@ -1901,7 +2432,9 @@ - (unspec:BLK [(match_operand:SI 1 "" "")] UNSPEC_FRAME_BLOCKAGE))] - "" - "" -- [(set_attr "length" "0")]) -+ [(set_attr "type" "nop") -+ (set_attr "mode" "none") -+ (set_attr "length" "0")]) - - (define_insn "trap" - [(trap_if (const_int 1) (const_int 0))] -@@ -1914,7 +2447,10 @@ - } - [(set_attr "type" "trap") - (set_attr "mode" "none") -- (set_attr "length" "3")]) -+ (set (attr "length") -+ (if_then_else (match_test "!TARGET_DEBUG && TARGET_DENSITY") -+ (const_int 2) -+ (const_int 3)))]) - - ;; Setting up a frame pointer is tricky for Xtensa because GCC doesn't - ;; know if a frame pointer is required until the reload pass, and -@@ -2177,3 +2713,103 @@ - xtensa_expand_atomic (, operands[0], operands[1], operands[2], true); - DONE; - }) -+ -+(define_insn_and_split "*round_up_to_even" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r") -+ (const_int 1)) -+ (const_int -2)))] -+ "" -+ "#" -+ "can_create_pseudo_p ()" -+ [(set (match_dup 2) -+ (and:SI (match_dup 1) -+ (const_int 1))) -+ (set (match_dup 0) -+ (plus:SI (match_dup 2) -+ (match_dup 1)))] -+{ -+ operands[2] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_insn_and_split "*signed_ge_zero" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ge:SI (match_operand:SI 1 "register_operand" "r") -+ (const_int 0)))] -+ "" -+ "#" -+ "" -+ [(set (match_dup 0) -+ (ashiftrt:SI (match_dup 1) -+ (const_int 31))) -+ (set (match_dup 0) -+ (plus:SI (match_dup 0) -+ (const_int 1)))] -+ "" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_peephole2 -+ [(set (match_operand:SI 0 "register_operand") -+ (match_operand:SI 6 "reload_operand")) -+ (set (match_operand:SI 1 "register_operand") -+ (match_operand:SI 7 "reload_operand")) -+ (set (match_operand:SF 2 "register_operand") -+ (match_operand:SF 4 "register_operand")) -+ (set (match_operand:SF 3 "register_operand") -+ (match_operand:SF 5 "register_operand"))] -+ "REGNO (operands[0]) == REGNO (operands[4]) -+ && REGNO (operands[1]) == REGNO (operands[5]) -+ && peep2_reg_dead_p (4, operands[0]) -+ && peep2_reg_dead_p (4, operands[1])" -+ [(set (match_dup 2) -+ (match_dup 6)) -+ (set (match_dup 3) -+ (match_dup 7))] -+{ -+ uint32_t check = 0; -+ int i; -+ for (i = 0; i <= 3; ++i) -+ { -+ uint32_t mask = (uint32_t)1 << REGNO (operands[i]); -+ if (check & mask) -+ FAIL; -+ check |= mask; -+ } -+ operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0)); -+ operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0)); -+}) -+ -+(define_split -+ [(clobber (match_operand:DSC 0 "register_operand"))] -+ "GP_REG_P (REGNO (operands[0]))" -+ [(const_int 0)] -+{ -+ unsigned int regno = REGNO (operands[0]); -+ machine_mode inner_mode = GET_MODE_INNER (mode); -+ rtx_insn *insn; -+ rtx x; -+ if (! ((insn = next_nonnote_nondebug_insn (curr_insn)) -+ && NONJUMP_INSN_P (insn) -+ && GET_CODE (x = PATTERN (insn)) == SET -+ && REG_P (x = XEXP (x, 0)) -+ && GET_MODE (x) == inner_mode -+ && REGNO (x) == regno -+ && (insn = next_nonnote_nondebug_insn (insn)) -+ && NONJUMP_INSN_P (insn) -+ && GET_CODE (x = PATTERN (insn)) == SET -+ && REG_P (x = XEXP (x, 0)) -+ && GET_MODE (x) == inner_mode -+ && REGNO (x) == regno + REG_NREGS (operands[0]) / 2)) -+ FAIL; -+}) -diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt -index aef67970b..97aa44f92 100644 ---- a/gcc/config/xtensa/xtensa.opt -+++ b/gcc/config/xtensa/xtensa.opt -@@ -27,9 +27,13 @@ Target Report Mask(FORCE_NO_PIC) - Disable position-independent code (PIC) for use in OS kernel code. - - mlongcalls --Target -+Target Mask(LONGCALLS) - Use indirect CALLXn instructions for large programs. - -+mextra-l32r-costs= -+Target RejectNegative Joined UInteger Var(xtensa_extra_l32r_costs) Init(0) -+Set extra memory access cost for L32R instruction, in clock-cycle units. -+ - mtarget-align - Target - Automatically align branch targets to reduce branch penalties. -diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi -index eabeec944..c35f51afb 100644 ---- a/gcc/doc/invoke.texi -+++ b/gcc/doc/invoke.texi -@@ -1385,7 +1385,8 @@ See RS/6000 and PowerPC Options. - -mtext-section-literals -mno-text-section-literals @gol - -mauto-litpools -mno-auto-litpools @gol - -mtarget-align -mno-target-align @gol ---mlongcalls -mno-longcalls} -+-mlongcalls -mno-longcalls @gol -+-mextra-l32r-costs=@var{cycles}} - - @emph{zSeries Options} - See S/390 and zSeries Options. -@@ -30519,6 +30520,14 @@ assembly code generated by GCC still shows direct call - instructions---look at the disassembled object code to see the actual - instructions. Note that the assembler uses an indirect call for - every cross-file call, not just those that really are out of range. -+ -+@item -mextra-l32r-costs=@var{n} -+@opindex mextra-l32r-costs -+Specify an extra cost of instruction RAM/ROM access for @code{L32R} -+instructions, in clock cycles. This affects, when optimizing for speed, -+whether loading a constant from literal pool using @code{L32R} or -+synthesizing the constant from a small one with a couple of arithmetic -+instructions. The default value is 0. - @end table - - @node zSeries Options -diff --git a/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c -new file mode 100644 -index 000000000..ba61c6f37 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O" } */ -+ -+extern void foo(void); -+ -+void BNONE_test(int a, int b) -+{ -+ if (a & b) -+ foo(); -+} -+ -+void BANY_test(int a, int b) -+{ -+ if (!(a & b)) -+ foo(); -+} -+ -+void BALL_test(int a, int b) -+{ -+ if (~a & b) -+ foo(); -+} -+ -+void BNALL_test(int a, int b) -+{ -+ if (!(~a & b)) -+ foo(); -+} -+ -+/* { dg-final { scan-assembler-times "bnone" 1 } } */ -+/* { dg-final { scan-assembler-times "bany" 1 } } */ -+/* { dg-final { scan-assembler-times "ball" 1 } } */ -+/* { dg-final { scan-assembler-times "bnall" 1 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O1.c b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c -new file mode 100644 -index 000000000..a0c885baa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O1" } */ -+ -+unsigned int test_0(unsigned int a) -+{ -+ return (a & 0x000000FF) << 24 | -+ (a & 0x0000FF00) << 8 | -+ (a & 0x00FF0000) >> 8 | -+ (a & 0xFF000000) >> 24; -+} -+ -+unsigned int test_1(unsigned int a) -+{ -+ union -+ { -+ unsigned int i; -+ unsigned char a[4]; -+ } u, v; -+ u.i = a; -+ v.a[0] = u.a[3]; -+ v.a[1] = u.a[2]; -+ v.a[2] = u.a[1]; -+ v.a[3] = u.a[0]; -+ return v.i; -+} -+ -+unsigned int test_2(unsigned int a) -+{ -+ return __builtin_bswap32(a); -+} -+ -+unsigned long long test_3(unsigned long long a) -+{ -+ return __builtin_bswap64(a); -+} -+ -+/* { dg-final { scan-assembler-times "call" 2 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O2.c b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c -new file mode 100644 -index 000000000..4cf95b925 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+unsigned int test_0(unsigned int a) -+{ -+ return (a & 0x000000FF) << 24 | -+ (a & 0x0000FF00) << 8 | -+ (a & 0x00FF0000) >> 8 | -+ (a & 0xFF000000) >> 24; -+} -+ -+unsigned int test_1(unsigned int a) -+{ -+ union -+ { -+ unsigned int i; -+ unsigned char a[4]; -+ } u, v; -+ u.i = a; -+ v.a[0] = u.a[3]; -+ v.a[1] = u.a[2]; -+ v.a[2] = u.a[1]; -+ v.a[3] = u.a[0]; -+ return v.i; -+} -+ -+unsigned int test_2(unsigned int a) -+{ -+ return __builtin_bswap32(a); -+} -+ -+unsigned long long test_3(unsigned long long a) -+{ -+ return __builtin_bswap64(a); -+} -+ -+/* { dg-final { scan-assembler-times "ssai" 4 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-Os.c b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c -new file mode 100644 -index 000000000..1e010fd62 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Os" } */ -+ -+unsigned int test_0(unsigned int a) -+{ -+ return (a & 0x000000FF) << 24 | -+ (a & 0x0000FF00) << 8 | -+ (a & 0x00FF0000) >> 8 | -+ (a & 0xFF000000) >> 24; -+} -+ -+unsigned int test_1(unsigned int a) -+{ -+ union -+ { -+ unsigned int i; -+ unsigned char a[4]; -+ } u, v; -+ u.i = a; -+ v.a[0] = u.a[3]; -+ v.a[1] = u.a[2]; -+ v.a[2] = u.a[1]; -+ v.a[3] = u.a[0]; -+ return v.i; -+} -+ -+unsigned int test_2(unsigned int a) -+{ -+ return __builtin_bswap32(a); -+} -+ -+unsigned long long test_3(unsigned long long a) -+{ -+ return __builtin_bswap64(a); -+} -+ -+/* { dg-final { scan-assembler-times "call" 4 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c -new file mode 100644 -index 000000000..6a04aaeef ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O" } */ -+ -+int check_zero_byte(int v) -+{ -+ return (v - 0x01010101) & ~v & 0x80808080; -+} -+ -+/* { dg-final { scan-assembler-not "movi" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c -new file mode 100644 -index 000000000..ec2606ed1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c -@@ -0,0 +1,44 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Os } */ -+ -+int test_0(void) -+{ -+ return 4095; -+} -+ -+int test_1(void) -+{ -+ return 2147483647; -+} -+ -+int test_2(void) -+{ -+ return -34816; -+} -+ -+int test_3(void) -+{ -+ return -2049; -+} -+ -+int test_4(void) -+{ -+ return 2048; -+} -+ -+int test_5(void) -+{ -+ return 34559; -+} -+ -+int test_6(void) -+{ -+ return 43680; -+} -+ -+void test_7(int *p) -+{ -+ *p = -1432354816; -+} -+ -+/* { dg-final { scan-assembler-not "l32r" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c -new file mode 100644 -index 000000000..f3c4a1c7c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c -@@ -0,0 +1,24 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mextra-l32r-costs=3" } */ -+ -+int test_0(void) -+{ -+ return 134217216; -+} -+ -+int test_1(void) -+{ -+ return -27604992; -+} -+ -+int test_2(void) -+{ -+ return -162279; -+} -+ -+void test_3(int *p) -+{ -+ *p = 192437; -+} -+ -+/* { dg-final { scan-assembler-not "l32r" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c -new file mode 100644 -index 000000000..11e5d5242 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Os } */ -+ -+void test(unsigned int count, double array[]) -+{ -+ unsigned int i; -+ for (i = 0; i < count; ++i) -+ array[i] = 1.0; -+} -+ -+/* { dg-final { scan-assembler-not "l32r" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c -new file mode 100644 -index 000000000..c8f987ccd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+unsigned int test_0(const void *addr) -+{ -+ unsigned int n = (unsigned int)addr; -+ const unsigned int *a = (const unsigned int*)(n & ~3); -+ n = (n & 3) * 8; -+ return (a[0] >> n) | (a[1] << (32 - n)); -+} -+ -+unsigned int test_1(unsigned int a, unsigned int b) -+{ -+ return (a >> 16) + (b << 16); -+} -+ -+/* { dg-final { scan-assembler-times "src" 2 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c -new file mode 100644 -index 000000000..608f65fd7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O1" } */ -+ -+int one_cmpl_abs(int a) -+{ -+ return a < 0 ? ~a : a; -+} -+ -+/* { dg-final { scan-assembler-not "bgez" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/sibcalls.c b/gcc/testsuite/gcc.target/xtensa/sibcalls.c -new file mode 100644 -index 000000000..7a4018796 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/sibcalls.c -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -foptimize-sibling-calls" } */ -+ -+extern int foo(int); -+extern void bar(int); -+ -+int test_0(int a) { -+ return foo(a); -+} -+ -+void test_1(int a) { -+ bar(a); -+} -+ -+int test_2(int (*a)(void)) { -+ bar(0); -+ return a(); -+} -+ -+/* { dg-final { scan-assembler-not "ret" } } */ -diff --git a/libgcc/config/xtensa/lib1funcs.S b/libgcc/config/xtensa/lib1funcs.S -index b19deae14..ad9072c40 100644 ---- a/libgcc/config/xtensa/lib1funcs.S -+++ b/libgcc/config/xtensa/lib1funcs.S -@@ -456,6 +456,29 @@ __nsau_data: - #endif /* L_clz */ - - -+#ifdef L_clrsbsi2 -+ .align 4 -+ .global __clrsbsi2 -+ .type __clrsbsi2, @function -+__clrsbsi2: -+ leaf_entry sp, 16 -+#if XCHAL_HAVE_NSA -+ nsa a2, a2 -+#else -+ srai a3, a2, 31 -+ xor a3, a3, a2 -+ movi a2, 31 -+ beqz a3, .Lreturn -+ do_nsau a2, a3, a4, a5 -+ addi a2, a2, -1 -+.Lreturn: -+#endif -+ leaf_return -+ .size __clrsbsi2, . - __clrsbsi2 -+ -+#endif /* L_clrsbsi2 */ -+ -+ - #ifdef L_clzsi2 - .align 4 - .global __clzsi2 -diff --git a/libgcc/config/xtensa/t-xtensa b/libgcc/config/xtensa/t-xtensa -index 9836c96ae..084618b38 100644 ---- a/libgcc/config/xtensa/t-xtensa -+++ b/libgcc/config/xtensa/t-xtensa -@@ -1,6 +1,6 @@ - LIB1ASMSRC = xtensa/lib1funcs.S - LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \ -- _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \ -+ _umulsidi3 _clz _clrsbsi2 _clzsi2 _ctzsi2 _ffssi2 \ - _ashldi3 _ashrdi3 _lshrdi3 \ - _bswapsi2 _bswapdi2 \ - _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \ --- -2.20.1 - diff --git a/patches/gcc10.2/gcc-xtensa-0007-fix-Wformat-diag-warnings.patch b/patches/gcc10.2/gcc-xtensa-0007-fix-Wformat-diag-warnings.patch new file mode 100644 index 0000000..5aebddc --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0007-fix-Wformat-diag-warnings.patch @@ -0,0 +1,48 @@ +From 76ee6b24125c885150e5b493b26b594801998b74 Mon Sep 17 00:00:00 2001 +From: Martin Liska +Date: Tue, 18 Jan 2022 14:51:40 +0100 +Subject: [PATCH 02/31] xtensa: fix -Wformat-diag warnings. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (print_operand): Fix warnings. + (print_operand_address): Likewise. + (xtensa_multibss_section_type_flags): Likewise. +--- + gcc/config/xtensa/xtensa.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 37c6ac1fd..b1dbe8520 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -2379,7 +2379,7 @@ void + print_operand (FILE *file, rtx x, int letter) + { + if (!x) +- error ("PRINT_OPERAND null pointer"); ++ error ("% null pointer"); + + switch (letter) + { +@@ -2584,7 +2584,7 @@ void + print_operand_address (FILE *file, rtx addr) + { + if (!addr) +- error ("PRINT_OPERAND_ADDRESS, null pointer"); ++ error ("%, null pointer"); + + switch (GET_CODE (addr)) + { +@@ -3697,7 +3697,7 @@ xtensa_multibss_section_type_flags (tree decl, const char *name, int reloc) + flags |= SECTION_BSS; /* @nobits */ + else + warning (0, "only uninitialized variables can be placed in a " +- ".bss section"); ++ "%<.bss%> section"); + } + + return flags; +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0008-Rename-deprecated-extv-extzv-insn-patterns-to.patch b/patches/gcc10.2/gcc-xtensa-0008-Rename-deprecated-extv-extzv-insn-patterns-to.patch new file mode 100644 index 0000000..46260ef --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0008-Rename-deprecated-extv-extzv-insn-patterns-to.patch @@ -0,0 +1,74 @@ +From b5b9fd01c4db135893c44e82a9f33c2411e993d0 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 6 May 2022 19:34:06 +0900 +Subject: [PATCH 03/31] xtensa: Rename deprecated extv/extzv insn patterns to + extvsi/extzvsi + +These patterns were deprecated since GCC 4.8. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (extvsi, extvsi_internal, extzvsi, + extzvsi_internal): Rename from extv, extv_internal, extzv and + extzv_internal, respectively. +--- + gcc/config/xtensa/xtensa.md | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 123916957..251c313d5 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -634,7 +634,7 @@ + + ;; Field extract instructions. + +-(define_expand "extv" ++(define_expand "extvsi" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "") +@@ -649,12 +649,12 @@ + if (!lsbitnum_operand (operands[3], SImode)) + FAIL; + +- emit_insn (gen_extv_internal (operands[0], operands[1], +- operands[2], operands[3])); ++ emit_insn (gen_extvsi_internal (operands[0], operands[1], ++ operands[2], operands[3])); + DONE; + }) + +-(define_insn "extv_internal" ++(define_insn "extvsi_internal" + [(set (match_operand:SI 0 "register_operand" "=a") + (sign_extract:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "sext_fldsz_operand" "i") +@@ -669,7 +669,7 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +-(define_expand "extzv" ++(define_expand "extzvsi" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extract:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "") +@@ -678,12 +678,12 @@ + { + if (!extui_fldsz_operand (operands[2], SImode)) + FAIL; +- emit_insn (gen_extzv_internal (operands[0], operands[1], +- operands[2], operands[3])); ++ emit_insn (gen_extzvsi_internal (operands[0], operands[1], ++ operands[2], operands[3])); + DONE; + }) + +-(define_insn "extzv_internal" ++(define_insn "extzvsi_internal" + [(set (match_operand:SI 0 "register_operand" "=a") + (zero_extract:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "extui_fldsz_operand" "i") +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0009-Reflect-the-32-bit-Integer-Divide-Option.patch b/patches/gcc10.2/gcc-xtensa-0009-Reflect-the-32-bit-Integer-Divide-Option.patch new file mode 100644 index 0000000..607367c --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0009-Reflect-the-32-bit-Integer-Divide-Option.patch @@ -0,0 +1,41 @@ +From 12fa0b13b6f0c52e5c4d75f39822771a7f780f94 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 6 May 2022 19:34:19 +0900 +Subject: [PATCH 04/31] xtensa: Reflect the 32-bit Integer Divide Option + +On Espressif's ESP8266 (based on Tensilica LX106, no hardware divider), +this patch reduces the size of each: + + __moddi3() @ libgcc.a : 969 -> 301 (saves 668) + __divmoddi4() : 1111 -> 426 (saves 685) + __udivmoddi4() : 1043 -> 319 (saves 724) + +in bytes, respectively. + +gcc/ChangeLog: + + * config/xtensa/xtensa.h (TARGET_HAS_NO_HW_DIVIDE): New macro + definition. +--- + gcc/config/xtensa/xtensa.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index fa86a245e..5b102de51 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -74,6 +74,11 @@ along with GCC; see the file COPYING3. If not see + #define HAVE_AS_TLS 0 + #endif + ++/* Define this if the target has no hardware divide instructions. */ ++#if !TARGET_DIV32 ++#define TARGET_HAS_NO_HW_DIVIDE ++#endif ++ + + /* Target CPU builtins. */ + #define TARGET_CPU_CPP_BUILTINS() \ +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0010-Simplify-EXTUI-instruction-maskimm-validation.patch b/patches/gcc10.2/gcc-xtensa-0010-Simplify-EXTUI-instruction-maskimm-validation.patch new file mode 100644 index 0000000..8d257cd --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0010-Simplify-EXTUI-instruction-maskimm-validation.patch @@ -0,0 +1,78 @@ +From 49383c9381a937b360adeb14f5e7bd4472f7c386 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:26:30 +0900 +Subject: [PATCH 05/31] xtensa: Simplify EXTUI instruction maskimm validations + +No functional changes. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (extui_fldsz_operand): Simplify. + * config/xtensa/xtensa.c (xtensa_mask_immediate, print_operand): + Ditto. +--- + gcc/config/xtensa/predicates.md | 2 +- + gcc/config/xtensa/xtensa.c | 24 +++--------------------- + 2 files changed, 4 insertions(+), 22 deletions(-) + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index eb52b05aa..3f84859b6 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -55,7 +55,7 @@ + + (define_predicate "extui_fldsz_operand" + (and (match_code "const_int") +- (match_test "xtensa_mask_immediate ((1 << INTVAL (op)) - 1)"))) ++ (match_test "IN_RANGE (INTVAL (op), 1, 16)"))) + + (define_predicate "sext_operand" + (if_then_else (match_test "TARGET_SEXT") +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index b1dbe8520..4043f40ce 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -446,19 +446,7 @@ xtensa_b4constu (HOST_WIDE_INT v) + bool + xtensa_mask_immediate (HOST_WIDE_INT v) + { +-#define MAX_MASK_SIZE 16 +- int mask_size; +- +- for (mask_size = 1; mask_size <= MAX_MASK_SIZE; mask_size++) +- { +- if ((v & 1) == 0) +- return false; +- v = v >> 1; +- if (v == 0) +- return true; +- } +- +- return false; ++ return IN_RANGE (exact_log2 (v + 1), 1, 16); + } + + +@@ -2424,17 +2412,11 @@ print_operand (FILE *file, rtx x, int letter) + case 'K': + if (GET_CODE (x) == CONST_INT) + { +- int num_bits = 0; + unsigned val = INTVAL (x); +- while (val & 1) +- { +- num_bits += 1; +- val = val >> 1; +- } +- if ((val != 0) || (num_bits == 0) || (num_bits > 16)) ++ if (!xtensa_mask_immediate (val)) + fatal_insn ("invalid mask", x); + +- fprintf (file, "%d", num_bits); ++ fprintf (file, "%d", floor_log2 (val + 1)); + } + else + output_operand_lossage ("invalid %%K value"); +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0011-Make-use-of-IN_RANGE-macro-where-appropriate.patch b/patches/gcc10.2/gcc-xtensa-0011-Make-use-of-IN_RANGE-macro-where-appropriate.patch new file mode 100644 index 0000000..419ebfe --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0011-Make-use-of-IN_RANGE-macro-where-appropriate.patch @@ -0,0 +1,174 @@ +From fa7073ff572c248896057a5a7841a3e1d98380ad Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:27:36 +0900 +Subject: [PATCH 06/31] xtensa: Make use of IN_RANGE macro where appropriate + +No functional changes. + +gcc/ChangeLog: + + * config/xtensa/constraints.md (M, O): Use the macro. + * config/xtensa/predicates.md (addsubx_operand, extui_fldsz_operand, + sext_fldsz_operand): Ditto. + * config/xtensa/xtensa.c (xtensa_simm8, xtensa_simm8x256, + xtensa_simm12b, xtensa_uimm8, xtensa_uimm8x2, xtensa_uimm8x4, + xtensa_mask_immediate, smalloffset_mem_p, printx, xtensa_call_save_reg, + xtensa_expand_prologue): Ditto. + * config/xtensa/xtensa.h (FUNCTION_ARG_REGNO_P): Ditto. +--- + gcc/config/xtensa/constraints.md | 4 ++-- + gcc/config/xtensa/predicates.md | 5 ++--- + gcc/config/xtensa/xtensa.c | 20 ++++++++++---------- + gcc/config/xtensa/xtensa.h | 2 +- + 4 files changed, 15 insertions(+), 16 deletions(-) + +diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md +index 2062c8816..9a8caab4f 100644 +--- a/gcc/config/xtensa/constraints.md ++++ b/gcc/config/xtensa/constraints.md +@@ -92,7 +92,7 @@ + "An integer constant in the range @minus{}32-95 for use with MOVI.N + instructions." + (and (match_code "const_int") +- (match_test "ival >= -32 && ival <= 95"))) ++ (match_test "IN_RANGE (ival, -32, 95)"))) + + (define_constraint "N" + "An unsigned 8-bit integer constant shifted left by 8 bits for use +@@ -103,7 +103,7 @@ + (define_constraint "O" + "An integer constant that can be used in ADDI.N instructions." + (and (match_code "const_int") +- (match_test "ival == -1 || (ival >= 1 && ival <= 15)"))) ++ (match_test "ival == -1 || IN_RANGE (ival, 1, 15)"))) + + (define_constraint "P" + "An integer constant that can be used as a mask value in an EXTUI +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index 3f84859b6..91b9343a2 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -25,8 +25,7 @@ + + (define_predicate "addsubx_operand" + (and (match_code "const_int") +- (match_test "INTVAL (op) >= 1 +- && INTVAL (op) <= 3"))) ++ (match_test "IN_RANGE (INTVAL (op), 1, 3)"))) + + (define_predicate "arith_operand" + (ior (and (match_code "const_int") +@@ -64,7 +63,7 @@ + + (define_predicate "sext_fldsz_operand" + (and (match_code "const_int") +- (match_test "INTVAL (op) >= 8 && INTVAL (op) <= 23"))) ++ (match_test "IN_RANGE (INTVAL (op), 8, 23)"))) + + (define_predicate "lsbitnum_operand" + (and (match_code "const_int") +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 4043f40ce..02dc5799a 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -341,42 +341,42 @@ struct gcc_target targetm = TARGET_INITIALIZER; + bool + xtensa_simm8 (HOST_WIDE_INT v) + { +- return v >= -128 && v <= 127; ++ return IN_RANGE (v, -128, 127); + } + + + bool + xtensa_simm8x256 (HOST_WIDE_INT v) + { +- return (v & 255) == 0 && (v >= -32768 && v <= 32512); ++ return (v & 255) == 0 && IN_RANGE (v, -32768, 32512); + } + + + bool + xtensa_simm12b (HOST_WIDE_INT v) + { +- return v >= -2048 && v <= 2047; ++ return IN_RANGE (v, -2048, 2047); + } + + + static bool + xtensa_uimm8 (HOST_WIDE_INT v) + { +- return v >= 0 && v <= 255; ++ return IN_RANGE (v, 0, 255); + } + + + static bool + xtensa_uimm8x2 (HOST_WIDE_INT v) + { +- return (v & 1) == 0 && (v >= 0 && v <= 510); ++ return (v & 1) == 0 && IN_RANGE (v, 0, 510); + } + + + static bool + xtensa_uimm8x4 (HOST_WIDE_INT v) + { +- return (v & 3) == 0 && (v >= 0 && v <= 1020); ++ return (v & 3) == 0 && IN_RANGE (v, 0, 1020); + } + + +@@ -527,7 +527,7 @@ smalloffset_mem_p (rtx op) + return FALSE; + + val = INTVAL (offset); +- return (val & 3) == 0 && (val >= 0 && val <= 60); ++ return (val & 3) == 0 && IN_RANGE (val, 0, 60); + } + } + return FALSE; +@@ -2352,7 +2352,7 @@ static void + printx (FILE *file, signed int val) + { + /* Print a hexadecimal value in a nice way. */ +- if ((val > -0xa) && (val < 0xa)) ++ if (IN_RANGE (val, -9, 9)) + fprintf (file, "%d", val); + else if (val < 0) + fprintf (file, "-0x%x", -val); +@@ -2732,7 +2732,7 @@ xtensa_call_save_reg(int regno) + return crtl->profile || !crtl->is_leaf || crtl->calls_eh_return || + df_regs_ever_live_p (regno); + +- if (crtl->calls_eh_return && regno >= 2 && regno < 4) ++ if (crtl->calls_eh_return && IN_RANGE (regno, 2, 3)) + return true; + + return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno); +@@ -2852,7 +2852,7 @@ xtensa_expand_prologue (void) + int callee_save_size = cfun->machine->callee_save_size; + + /* -128 is a limit of single addi instruction. */ +- if (total_size > 0 && total_size <= 128) ++ if (IN_RANGE (total_size, 1, 128)) + { + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-total_size))); +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index 5b102de51..3e9cbc943 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -493,7 +493,7 @@ enum reg_class + used for this purpose since all function arguments are pushed on + the stack. */ + #define FUNCTION_ARG_REGNO_P(N) \ +- ((N) >= GP_OUTGOING_ARG_FIRST && (N) <= GP_OUTGOING_ARG_LAST) ++ IN_RANGE ((N), GP_OUTGOING_ARG_FIRST, GP_OUTGOING_ARG_LAST) + + /* Record the number of argument words seen so far, along with a flag to + indicate whether these are incoming arguments. (FUNCTION_INCOMING_ARG +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0012-Fix-instruction-counting-regarding-block-move.patch b/patches/gcc10.2/gcc-xtensa-0012-Fix-instruction-counting-regarding-block-move.patch new file mode 100644 index 0000000..dae4a21 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0012-Fix-instruction-counting-regarding-block-move.patch @@ -0,0 +1,54 @@ +From 5cda5b41a7646d220f7351226b5da78955b0fc7f Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:29:22 +0900 +Subject: [PATCH 07/31] xtensa: Fix instruction counting regarding block move + expansion + +This patch makes counting the number of instructions of the remainder +(modulo 4) part more accurate. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_expand_block_move): + Make instruction counting more accurate, and simplify emitting insns. +--- + gcc/config/xtensa/xtensa.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 02dc5799a..0fe8b73ad 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1303,7 +1303,7 @@ xtensa_expand_block_move (rtx *operands) + move_ratio = 4; + if (optimize > 2) + move_ratio = LARGEST_MOVE_RATIO; +- num_pieces = (bytes / align) + (bytes % align); /* Close enough anyway. */ ++ num_pieces = (bytes / align) + ((bytes % align + 1) / 2); + if (num_pieces > move_ratio) + return 0; + +@@ -1340,7 +1340,7 @@ xtensa_expand_block_move (rtx *operands) + temp[next] = gen_reg_rtx (mode[next]); + + x = adjust_address (src_mem, mode[next], offset_ld); +- emit_insn (gen_rtx_SET (temp[next], x)); ++ emit_move_insn (temp[next], x); + + offset_ld += next_amount; + bytes -= next_amount; +@@ -1350,9 +1350,9 @@ xtensa_expand_block_move (rtx *operands) + if (active[phase]) + { + active[phase] = false; +- ++ + x = adjust_address (dst_mem, mode[phase], offset_st); +- emit_insn (gen_rtx_SET (x, temp[phase])); ++ emit_move_insn (x, temp[phase]); + + offset_st += amount[phase]; + } +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0013-Add-setmemsi-insn-pattern.patch b/patches/gcc10.2/gcc-xtensa-0013-Add-setmemsi-insn-pattern.patch new file mode 100644 index 0000000..a7212ce --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0013-Add-setmemsi-insn-pattern.patch @@ -0,0 +1,303 @@ +From 02572a935a2cbabc96387289300fb78d61dde555 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 24 May 2022 00:52:44 +0900 +Subject: [PATCH 08/31] xtensa: Add setmemsi insn pattern + +This patch introduces setmemsi insn pattern of two kinds, unrolled loop and +small loop, for fixed small length and constant initialization value. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h + (xtensa_expand_block_set_unrolled_loop, + xtensa_expand_block_set_small_loop): New prototypes. + * config/xtensa/xtensa.c (xtensa_sizeof_MOVI, + xtensa_expand_block_set_unrolled_loop, + xtensa_expand_block_set_small_loop): New functions. + * config/xtensa/xtensa.md (setmemsi): New expansion pattern. + * config/xtensa/xtensa.opt (mlongcalls): Add target mask. +--- + gcc/config/xtensa/xtensa-protos.h | 2 + + gcc/config/xtensa/xtensa.c | 211 ++++++++++++++++++++++++++++++ + gcc/config/xtensa/xtensa.md | 16 +++ + gcc/config/xtensa/xtensa.opt | 2 +- + 4 files changed, 230 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 18d803581..80b1da2bb 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -41,6 +41,8 @@ extern void xtensa_expand_conditional_branch (rtx *, machine_mode); + extern int xtensa_expand_conditional_move (rtx *, int); + extern int xtensa_expand_scc (rtx *, machine_mode); + extern int xtensa_expand_block_move (rtx *); ++extern int xtensa_expand_block_set_unrolled_loop (rtx *); ++extern int xtensa_expand_block_set_small_loop (rtx *); + extern void xtensa_split_operand_pair (rtx *, machine_mode); + extern int xtensa_emit_move_sequence (rtx *, machine_mode); + extern rtx xtensa_copy_incoming_a7 (rtx); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 0fe8b73ad..a6d76a953 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1363,6 +1363,217 @@ xtensa_expand_block_move (rtx *operands) + } + + ++/* Try to expand a block set operation to a sequence of RTL move ++ instructions. If not optimizing, or if the block size is not a ++ constant, or if the block is too large, or if the value to ++ initialize the block with is not a constant, the expansion ++ fails and GCC falls back to calling memset(). ++ ++ operands[0] is the destination ++ operands[1] is the length ++ operands[2] is the initialization value ++ operands[3] is the alignment */ ++ ++static int ++xtensa_sizeof_MOVI (HOST_WIDE_INT imm) ++{ ++ return (TARGET_DENSITY && IN_RANGE (imm, -32, 95)) ? 2 : 3; ++} ++ ++int ++xtensa_expand_block_set_unrolled_loop (rtx *operands) ++{ ++ rtx dst_mem = operands[0]; ++ HOST_WIDE_INT bytes, value, align; ++ int expand_len, funccall_len; ++ rtx x, reg; ++ int offset; ++ ++ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) ++ return 0; ++ ++ bytes = INTVAL (operands[1]); ++ if (bytes <= 0) ++ return 0; ++ value = (int8_t)INTVAL (operands[2]); ++ align = INTVAL (operands[3]); ++ if (align > MOVE_MAX) ++ align = MOVE_MAX; ++ ++ /* Insn expansion: holding the init value. ++ Either MOV(.N) or L32R w/litpool. */ ++ if (align == 1) ++ expand_len = xtensa_sizeof_MOVI (value); ++ else if (value == 0 || value == -1) ++ expand_len = TARGET_DENSITY ? 2 : 3; ++ else ++ expand_len = 3 + 4; ++ /* Insn expansion: a series of aligned memory stores. ++ Consist of S8I, S16I or S32I(.N). */ ++ expand_len += (bytes / align) * (TARGET_DENSITY ++ && align == 4 ? 2 : 3); ++ /* Insn expansion: the remainder, sub-aligned memory stores. ++ A combination of S8I and S16I as needed. */ ++ expand_len += ((bytes % align + 1) / 2) * 3; ++ ++ /* Function call: preparing two arguments. */ ++ funccall_len = xtensa_sizeof_MOVI (value); ++ funccall_len += xtensa_sizeof_MOVI (bytes); ++ /* Function call: calling memset(). */ ++ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; ++ ++ /* Apply expansion bonus (2x) if optimizing for speed. */ ++ if (optimize > 1 && !optimize_size) ++ funccall_len *= 2; ++ ++ /* Decide whether to expand or not, based on the sum of the length ++ of instructions. */ ++ if (expand_len > funccall_len) ++ return 0; ++ ++ x = XEXP (dst_mem, 0); ++ if (!REG_P (x)) ++ dst_mem = replace_equiv_address (dst_mem, force_reg (Pmode, x)); ++ switch (align) ++ { ++ case 1: ++ break; ++ case 2: ++ value = (int16_t)((uint8_t)value * 0x0101U); ++ break; ++ case 4: ++ value = (int32_t)((uint8_t)value * 0x01010101U); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ reg = force_reg (SImode, GEN_INT (value)); ++ ++ offset = 0; ++ do ++ { ++ int unit_size = MIN (bytes, align); ++ machine_mode unit_mode = (unit_size >= 4 ? SImode : ++ (unit_size >= 2 ? HImode : ++ QImode)); ++ unit_size = GET_MODE_SIZE (unit_mode); ++ ++ emit_move_insn (adjust_address (dst_mem, unit_mode, offset), ++ unit_mode == SImode ? reg ++ : convert_to_mode (unit_mode, reg, true)); ++ ++ offset += unit_size; ++ bytes -= unit_size; ++ } ++ while (bytes > 0); ++ ++ return 1; ++} ++ ++int ++xtensa_expand_block_set_small_loop (rtx *operands) ++{ ++ HOST_WIDE_INT bytes, value, align; ++ int expand_len, funccall_len; ++ rtx x, dst, end, reg; ++ machine_mode unit_mode; ++ rtx_code_label *label; ++ ++ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) ++ return 0; ++ ++ bytes = INTVAL (operands[1]); ++ if (bytes <= 0) ++ return 0; ++ value = (int8_t)INTVAL (operands[2]); ++ align = INTVAL (operands[3]); ++ if (align > MOVE_MAX) ++ align = MOVE_MAX; ++ ++ /* Totally-aligned block only. */ ++ if (bytes % align != 0) ++ return 0; ++ ++ /* If 4-byte aligned, small loop substitution is almost optimal, thus ++ limited to only offset to the end address for ADDI/ADDMI instruction. */ ++ if (align == 4 ++ && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) ++ return 0; ++ ++ /* If no 4-byte aligned, loop count should be treated as the constraint. */ ++ if (align != 4 ++ && bytes / align > ((optimize > 1 && !optimize_size) ? 8 : 15)) ++ return 0; ++ ++ /* Insn expansion: holding the init value. ++ Either MOV(.N) or L32R w/litpool. */ ++ if (align == 1) ++ expand_len = xtensa_sizeof_MOVI (value); ++ else if (value == 0 || value == -1) ++ expand_len = TARGET_DENSITY ? 2 : 3; ++ else ++ expand_len = 3 + 4; ++ /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ ++ expand_len += bytes > 127 ? 3 ++ : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; ++ ++ /* Insn expansion: the loop body and branch instruction. ++ For store, one of S8I, S16I or S32I(.N). ++ For advance, ADDI(.N). ++ For branch, BNE. */ ++ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) ++ + (TARGET_DENSITY ? 2 : 3) + 3; ++ ++ /* Function call: preparing two arguments. */ ++ funccall_len = xtensa_sizeof_MOVI (value); ++ funccall_len += xtensa_sizeof_MOVI (bytes); ++ /* Function call: calling memset(). */ ++ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; ++ ++ /* Apply expansion bonus (2x) if optimizing for speed. */ ++ if (optimize > 1 && !optimize_size) ++ funccall_len *= 2; ++ ++ /* Decide whether to expand or not, based on the sum of the length ++ of instructions. */ ++ if (expand_len > funccall_len) ++ return 0; ++ ++ x = XEXP (operands[0], 0); ++ if (!REG_P (x)) ++ x = XEXP (replace_equiv_address (operands[0], force_reg (Pmode, x)), 0); ++ dst = gen_reg_rtx (SImode); ++ emit_move_insn (dst, x); ++ end = gen_reg_rtx (SImode); ++ emit_insn (gen_addsi3 (end, dst, operands[1] /* the length */)); ++ switch (align) ++ { ++ case 1: ++ unit_mode = QImode; ++ break; ++ case 2: ++ value = (int16_t)((uint8_t)value * 0x0101U); ++ unit_mode = HImode; ++ break; ++ case 4: ++ value = (int32_t)((uint8_t)value * 0x01010101U); ++ unit_mode = SImode; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ reg = force_reg (unit_mode, GEN_INT (value)); ++ ++ label = gen_label_rtx (); ++ emit_label (label); ++ emit_move_insn (gen_rtx_MEM (unit_mode, dst), reg); ++ emit_insn (gen_addsi3 (dst, dst, GEN_INT (align))); ++ emit_cmp_and_jump_insns (dst, end, NE, const0_rtx, SImode, true, label); ++ ++ return 1; ++} ++ ++ + void + xtensa_expand_nonlocal_goto (rtx *operands) + { +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 251c313d5..9eb689efa 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1085,6 +1085,22 @@ + DONE; + }) + ++;; Block sets ++ ++(define_expand "setmemsi" ++ [(match_operand:BLK 0 "memory_operand") ++ (match_operand:SI 1 "") ++ (match_operand:SI 2 "") ++ (match_operand:SI 3 "const_int_operand")] ++ "!optimize_debug && optimize" ++{ ++ if (xtensa_expand_block_set_unrolled_loop (operands)) ++ DONE; ++ if (xtensa_expand_block_set_small_loop (operands)) ++ DONE; ++ FAIL; ++}) ++ + + ;; Shift instructions. + +diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt +index aef67970b..e1d992f5d 100644 +--- a/gcc/config/xtensa/xtensa.opt ++++ b/gcc/config/xtensa/xtensa.opt +@@ -27,7 +27,7 @@ Target Report Mask(FORCE_NO_PIC) + Disable position-independent code (PIC) for use in OS kernel code. + + mlongcalls +-Target ++Target Mask(LONGCALLS) + Use indirect CALLXn instructions for large programs. + + mtarget-align +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0014-Improve-bswap-sd-i2-insn-patterns.patch b/patches/gcc10.2/gcc-xtensa-0014-Improve-bswap-sd-i2-insn-patterns.patch new file mode 100644 index 0000000..a5fb6f1 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0014-Improve-bswap-sd-i2-insn-patterns.patch @@ -0,0 +1,254 @@ +From be1ca3aa6e9754ed16d1b7a60657912af02844da Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:33:59 +0900 +Subject: [PATCH 09/31] xtensa: Improve bswap[sd]i2 insn patterns + +This patch makes bswap[sd]i2 better register allocation, and reconstructs +bswapsi2 in order to take advantage of GIMPLE manual byte-swapping +recognition. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (bswapsi2): New expansion pattern. + (bswapsi2_internal): Revise the template and condition, and add + detection code for preceding the same insn in order to omit a + "SSAI 8" instruction of the latter. + (bswapdi2): Suppress built-in insn expansion with the corresponding + library call when optimizing for size. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/bswap-O1.c: New. + * gcc.target/xtensa/bswap-O2.c: Ditto. + * gcc.target/xtensa/bswap-Os.c: Ditto. +--- + gcc/config/xtensa/xtensa.md | 77 +++++++++++++++++----- + gcc/testsuite/gcc.target/xtensa/bswap-O1.c | 37 +++++++++++ + gcc/testsuite/gcc.target/xtensa/bswap-O2.c | 37 +++++++++++ + gcc/testsuite/gcc.target/xtensa/bswap-Os.c | 37 +++++++++++ + 4 files changed, 172 insertions(+), 16 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O1.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O2.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-Os.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 9eb689efa..cea280061 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -471,23 +471,68 @@ + + ;; Byte swap. + +-(define_insn "bswapsi2" +- [(set (match_operand:SI 0 "register_operand" "=&a") +- (bswap:SI (match_operand:SI 1 "register_operand" "r")))] +- "!optimize_size" +- "ssai\t8\;srli\t%0, %1, 16\;src\t%0, %0, %1\;src\t%0, %0, %0\;src\t%0, %1, %0" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "15")]) ++(define_expand "bswapsi2" ++ [(set (match_operand:SI 0 "register_operand" "") ++ (bswap:SI (match_operand:SI 1 "register_operand" "")))] ++ "!optimize_debug && optimize > 1" ++{ ++ /* GIMPLE manual byte-swapping recognition is now activated. ++ For both built-in and manual bswaps, emit corresponding library call ++ if optimizing for size, or a series of dedicated machine instructions ++ if otherwise. */ ++ if (optimize_size) ++ emit_library_call_value (optab_libfunc (bswap_optab, SImode), ++ operands[0], LCT_NORMAL, SImode, ++ operands[1], SImode); ++ else ++ emit_insn (gen_bswapsi2_internal (operands[0], operands[1])); ++ DONE; ++}) + +-(define_insn "bswapdi2" +- [(set (match_operand:DI 0 "register_operand" "=&a") +- (bswap:DI (match_operand:DI 1 "register_operand" "r")))] +- "!optimize_size" +- "ssai\t8\;srli\t%0, %D1, 16\;src\t%0, %0, %D1\;src\t%0, %0, %0\;src\t%0, %D1, %0\;srli\t%D0, %1, 16\;src\t%D0, %D0, %1\;src\t%D0, %D0, %D0\;src\t%D0, %1, %D0" +- [(set_attr "type" "arith") +- (set_attr "mode" "DI") +- (set_attr "length" "27")]) ++(define_insn "bswapsi2_internal" ++ [(set (match_operand:SI 0 "register_operand" "=a,&a") ++ (bswap:SI (match_operand:SI 1 "register_operand" "0,r"))) ++ (clobber (match_scratch:SI 2 "=&a,X"))] ++ "!optimize_debug && optimize > 1 && !optimize_size" ++{ ++ rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn); ++ const char *init = "ssai\t8\;"; ++ static char result[64]; ++ if (prev_insn && NONJUMP_INSN_P (prev_insn)) ++ { ++ rtx x = PATTERN (prev_insn); ++ if (GET_CODE (x) == PARALLEL && XVECLEN (x, 0) == 2 ++ && GET_CODE (XVECEXP (x, 0, 0)) == SET ++ && GET_CODE (XVECEXP (x, 0, 1)) == CLOBBER) ++ { ++ x = XEXP (XVECEXP (x, 0, 0), 1); ++ if (GET_CODE (x) == BSWAP && GET_MODE (x) == SImode) ++ init = ""; ++ } ++ } ++ sprintf (result, ++ (which_alternative == 0) ++ ? "%s" "srli\t%%2, %%1, 16\;src\t%%2, %%2, %%1\;src\t%%2, %%2, %%2\;src\t%%0, %%1, %%2" ++ : "%s" "srli\t%%0, %%1, 16\;src\t%%0, %%0, %%1\;src\t%%0, %%0, %%0\;src\t%%0, %%1, %%0", ++ init); ++ return result; ++} ++ [(set_attr "type" "arith,arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "15,15")]) ++ ++(define_expand "bswapdi2" ++ [(set (match_operand:DI 0 "register_operand" "") ++ (bswap:DI (match_operand:DI 1 "register_operand" "")))] ++ "!optimize_debug && optimize > 1 && optimize_size" ++{ ++ /* Replace with a single DImode library call. ++ Without this, two SImode library calls are emitted. */ ++ emit_library_call_value (optab_libfunc (bswap_optab, DImode), ++ operands[0], LCT_NORMAL, DImode, ++ operands[1], DImode); ++ DONE; ++}) + + + ;; Negation and one's complement. +diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O1.c b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c +new file mode 100644 +index 000000000..a0c885baa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++unsigned int test_0(unsigned int a) ++{ ++ return (a & 0x000000FF) << 24 | ++ (a & 0x0000FF00) << 8 | ++ (a & 0x00FF0000) >> 8 | ++ (a & 0xFF000000) >> 24; ++} ++ ++unsigned int test_1(unsigned int a) ++{ ++ union ++ { ++ unsigned int i; ++ unsigned char a[4]; ++ } u, v; ++ u.i = a; ++ v.a[0] = u.a[3]; ++ v.a[1] = u.a[2]; ++ v.a[2] = u.a[1]; ++ v.a[3] = u.a[0]; ++ return v.i; ++} ++ ++unsigned int test_2(unsigned int a) ++{ ++ return __builtin_bswap32(a); ++} ++ ++unsigned long long test_3(unsigned long long a) ++{ ++ return __builtin_bswap64(a); ++} ++ ++/* { dg-final { scan-assembler-times "call" 2 } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O2.c b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c +new file mode 100644 +index 000000000..4cf95b925 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++unsigned int test_0(unsigned int a) ++{ ++ return (a & 0x000000FF) << 24 | ++ (a & 0x0000FF00) << 8 | ++ (a & 0x00FF0000) >> 8 | ++ (a & 0xFF000000) >> 24; ++} ++ ++unsigned int test_1(unsigned int a) ++{ ++ union ++ { ++ unsigned int i; ++ unsigned char a[4]; ++ } u, v; ++ u.i = a; ++ v.a[0] = u.a[3]; ++ v.a[1] = u.a[2]; ++ v.a[2] = u.a[1]; ++ v.a[3] = u.a[0]; ++ return v.i; ++} ++ ++unsigned int test_2(unsigned int a) ++{ ++ return __builtin_bswap32(a); ++} ++ ++unsigned long long test_3(unsigned long long a) ++{ ++ return __builtin_bswap64(a); ++} ++ ++/* { dg-final { scan-assembler-times "ssai" 4 } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-Os.c b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c +new file mode 100644 +index 000000000..1e010fd62 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Os" } */ ++ ++unsigned int test_0(unsigned int a) ++{ ++ return (a & 0x000000FF) << 24 | ++ (a & 0x0000FF00) << 8 | ++ (a & 0x00FF0000) >> 8 | ++ (a & 0xFF000000) >> 24; ++} ++ ++unsigned int test_1(unsigned int a) ++{ ++ union ++ { ++ unsigned int i; ++ unsigned char a[4]; ++ } u, v; ++ u.i = a; ++ v.a[0] = u.a[3]; ++ v.a[1] = u.a[2]; ++ v.a[2] = u.a[1]; ++ v.a[3] = u.a[0]; ++ return v.i; ++} ++ ++unsigned int test_2(unsigned int a) ++{ ++ return __builtin_bswap32(a); ++} ++ ++unsigned long long test_3(unsigned long long a) ++{ ++ return __builtin_bswap64(a); ++} ++ ++/* { dg-final { scan-assembler-times "call" 4 } } */ +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0015-fix-PR-target-105879.patch b/patches/gcc10.2/gcc-xtensa-0015-fix-PR-target-105879.patch new file mode 100644 index 0000000..2c21f47 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0015-fix-PR-target-105879.patch @@ -0,0 +1,48 @@ +From 1848b547a6ac69a002d068239a5bc9463f3fae25 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Tue, 7 Jun 2022 21:01:01 -0700 +Subject: [PATCH 10/31] gcc: xtensa: fix PR target/105879 + +split_double operates with the 'word that comes first in memory in the +target' terminology, while gen_lowpart operates with the 'value +representing some low-order bits of X' terminology. They are not +equivalent and must be dealt with differently on little- and big-endian +targets. + +gcc/ + PR target/105879 + * config/xtensa/xtensa.md (movdi): Rename 'first' and 'second' + to 'lowpart' and 'highpart' so that they match 'gen_lowpart' and + 'gen_highpart' bitwise semantics and fix order of highpart and + lowpart depending on target endianness. +--- + gcc/config/xtensa/xtensa.md | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index cea280061..30d8ef96c 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -802,11 +802,14 @@ + because of offering further optimization opportunities. */ + if (register_operand (operands[0], DImode)) + { +- rtx first, second; +- +- split_double (operands[1], &first, &second); +- emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), first)); +- emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), second)); ++ rtx lowpart, highpart; ++ ++ if (TARGET_BIG_ENDIAN) ++ split_double (operands[1], &highpart, &lowpart); ++ else ++ split_double (operands[1], &lowpart, &highpart); ++ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), lowpart)); ++ emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), highpart)); + DONE; + } + +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0016-Implement-bswaphi2-insn-pattern.patch b/patches/gcc10.2/gcc-xtensa-0016-Implement-bswaphi2-insn-pattern.patch new file mode 100644 index 0000000..3a31e62 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0016-Implement-bswaphi2-insn-pattern.patch @@ -0,0 +1,39 @@ +From f47a902c9a94d2e9df879de4613dae62c8e9cc4f Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:44:32 +0900 +Subject: [PATCH 11/31] xtensa: Implement bswaphi2 insn pattern + +This patch adds bswaphi2 insn pattern that is one instruction less than the +default expansion. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (bswaphi2): New insn pattern. +--- + gcc/config/xtensa/xtensa.md | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 30d8ef96c..c1f44777d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -471,6 +471,16 @@ + + ;; Byte swap. + ++(define_insn "bswaphi2" ++ [(set (match_operand:HI 0 "register_operand" "=a") ++ (bswap:HI (match_operand:HI 1 "register_operand" "r"))) ++ (clobber (match_scratch:HI 2 "=&a"))] ++ "" ++ "extui\t%2, %1, 8, 8\;slli\t%0, %1, 8\;or\t%0, %0, %2" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "HI") ++ (set_attr "length" "9")]) ++ + (define_expand "bswapsi2" + [(set (match_operand:SI 0 "register_operand" "") + (bswap:SI (match_operand:SI 1 "register_operand" "")))] +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0017-Make-one_cmplsi2-optimizer-friendly.patch b/patches/gcc10.2/gcc-xtensa-0017-Make-one_cmplsi2-optimizer-friendly.patch new file mode 100644 index 0000000..017a30f --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0017-Make-one_cmplsi2-optimizer-friendly.patch @@ -0,0 +1,86 @@ +From 22b5756399ef63a4102334724b12a4c186075227 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:46:16 +0900 +Subject: [PATCH 12/31] xtensa: Make one_cmplsi2 optimizer-friendly + +In Xtensa ISA, there is no single machine instruction that calculates unary +bitwise negation. But a few optimizers assume that bitwise negation can be +done by a single insn. + +As a result, '((x < 0) ? ~x : x)' cannot be optimized to '(x ^ (x >> 31))' +ever before, for example. + +This patch relaxes such limitation, by putting the insn expansion off till +the split pass. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (one_cmplsi2): + Rearrange as an insn_and_split pattern. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/one_cmpl_abs.c: New. +--- + gcc/config/xtensa/xtensa.md | 26 +++++++++++++------ + .../gcc.target/xtensa/one_cmpl_abs.c | 9 +++++++ + 2 files changed, 27 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index c1f44777d..2f6d48d03 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -556,16 +556,26 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +-(define_expand "one_cmplsi2" +- [(set (match_operand:SI 0 "register_operand" "") +- (not:SI (match_operand:SI 1 "register_operand" "")))] ++(define_insn_and_split "one_cmplsi2" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (not:SI (match_operand:SI 1 "register_operand" "r")))] + "" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 2) ++ (const_int -1)) ++ (set (match_dup 0) ++ (xor:SI (match_dup 1) ++ (match_dup 2)))] + { +- rtx temp = gen_reg_rtx (SImode); +- emit_insn (gen_movsi (temp, constm1_rtx)); +- emit_insn (gen_xorsi3 (operands[0], temp, operands[1])); +- DONE; +-}) ++ operands[2] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 5) ++ (const_int 6)))]) + + (define_insn "negsf2" + [(set (match_operand:SF 0 "register_operand" "=f") +diff --git a/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c +new file mode 100644 +index 000000000..608f65fd7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++int one_cmpl_abs(int a) ++{ ++ return a < 0 ? ~a : a; ++} ++ ++/* { dg-final { scan-assembler-not "bgez" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0018-Optimize-x-y-to-x-y-y.patch b/patches/gcc10.2/gcc-xtensa-0018-Optimize-x-y-to-x-y-y.patch new file mode 100644 index 0000000..d1167a1 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0018-Optimize-x-y-to-x-y-y.patch @@ -0,0 +1,71 @@ +From cc259b2801c8d04c39169214041305fdd5b87acd Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:55:44 +0900 +Subject: [PATCH 13/31] xtensa: Optimize '(~x & y)' to '((x & y) ^ y)' + +In Xtensa ISA, there is no single machine instruction that calculates unary +bitwise negation. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*andsi3_bitcmpl): + New insn_and_split pattern. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/check_zero_byte.c: New. +--- + gcc/config/xtensa/xtensa.md | 20 +++++++++++++++++++ + .../gcc.target/xtensa/check_zero_byte.c | 9 +++++++++ + 2 files changed, 29 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/xtensa/check_zero_byte.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2f6d48d03..28ed1d34e 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -601,6 +601,26 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,3")]) + ++(define_insn_and_split "*andsi3_bitcmpl" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (not:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "r")))] ++ "" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 3) ++ (and:SI (match_dup 1) ++ (match_dup 2))) ++ (set (match_dup 0) ++ (xor:SI (match_dup 3) ++ (match_dup 2)))] ++{ ++ operands[3] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ + (define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "=a") + (ior:SI (match_operand:SI 1 "register_operand" "%r") +diff --git a/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c +new file mode 100644 +index 000000000..6a04aaeef +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O" } */ ++ ++int check_zero_byte(int v) ++{ ++ return (v - 0x01010101) & ~v & 0x80808080; ++} ++ ++/* { dg-final { scan-assembler-not "movi" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0019-Add-clrsbsi2-insn-pattern.patch b/patches/gcc10.2/gcc-xtensa-0019-Add-clrsbsi2-insn-pattern.patch new file mode 100644 index 0000000..ebaa985 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0019-Add-clrsbsi2-insn-pattern.patch @@ -0,0 +1,98 @@ +From ebd48d915076589f04b5c1ed50f9f5ddfae088e8 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:57:35 +0900 +Subject: [PATCH 14/31] xtensa: Add clrsbsi2 insn pattern + +> (clrsb:m x) +> Represents the number of redundant leading sign bits in x, represented +> as an integer of mode m, starting at the most significant bit position. + +This explanation is just what the NSA instruction (not ever emitted before) +calculates in Xtensa ISA. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (clrsbsi2): New insn pattern. + +libgcc/ChangeLog: + + * config/xtensa/lib1funcs.S (__clrsbsi2): New function. + * config/xtensa/t-xtensa (LIB1ASMFUNCS): Add _clrsbsi2. +--- + gcc/config/xtensa/xtensa.md | 12 +++++++++++- + libgcc/config/xtensa/lib1funcs.S | 23 +++++++++++++++++++++++ + libgcc/config/xtensa/t-xtensa | 2 +- + 3 files changed, 35 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 28ed1d34e..6c76fb942 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -429,7 +429,17 @@ + (set_attr "length" "3")]) + + +-;; Count leading/trailing zeros and find first bit. ++;; Count redundant leading sign bits and leading/trailing zeros, ++;; and find first bit. ++ ++(define_insn "clrsbsi2" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (clrsb:SI (match_operand:SI 1 "register_operand" "r")))] ++ "TARGET_NSA" ++ "nsa\t%0, %1" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "3")]) + + (define_insn "clzsi2" + [(set (match_operand:SI 0 "register_operand" "=a") +diff --git a/libgcc/config/xtensa/lib1funcs.S b/libgcc/config/xtensa/lib1funcs.S +index b19deae14..ad9072c40 100644 +--- a/libgcc/config/xtensa/lib1funcs.S ++++ b/libgcc/config/xtensa/lib1funcs.S +@@ -456,6 +456,29 @@ __nsau_data: + #endif /* L_clz */ + + ++#ifdef L_clrsbsi2 ++ .align 4 ++ .global __clrsbsi2 ++ .type __clrsbsi2, @function ++__clrsbsi2: ++ leaf_entry sp, 16 ++#if XCHAL_HAVE_NSA ++ nsa a2, a2 ++#else ++ srai a3, a2, 31 ++ xor a3, a3, a2 ++ movi a2, 31 ++ beqz a3, .Lreturn ++ do_nsau a2, a3, a4, a5 ++ addi a2, a2, -1 ++.Lreturn: ++#endif ++ leaf_return ++ .size __clrsbsi2, . - __clrsbsi2 ++ ++#endif /* L_clrsbsi2 */ ++ ++ + #ifdef L_clzsi2 + .align 4 + .global __clzsi2 +diff --git a/libgcc/config/xtensa/t-xtensa b/libgcc/config/xtensa/t-xtensa +index 9836c96ae..084618b38 100644 +--- a/libgcc/config/xtensa/t-xtensa ++++ b/libgcc/config/xtensa/t-xtensa +@@ -1,6 +1,6 @@ + LIB1ASMSRC = xtensa/lib1funcs.S + LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \ +- _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \ ++ _umulsidi3 _clz _clrsbsi2 _clzsi2 _ctzsi2 _ffssi2 \ + _ashldi3 _ashrdi3 _lshrdi3 \ + _bswapsi2 _bswapdi2 \ + _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \ +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0020-Tweak-some-widen-multiplications.patch b/patches/gcc10.2/gcc-xtensa-0020-Tweak-some-widen-multiplications.patch new file mode 100644 index 0000000..8de8a89 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0020-Tweak-some-widen-multiplications.patch @@ -0,0 +1,110 @@ +From 1ba9369255749ccf9ec82565a192b1a523b0e374 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 10 Jun 2022 13:17:40 +0900 +Subject: [PATCH 15/31] xtensa: Tweak some widen multiplications + +umulsidi3 is faster than umuldi3 even if library call, and is also +prerequisite for fast constant division by multiplication. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (mulsidi3, umulsidi3): + Split into individual signedness, in order to use libcall + "__umulsidi3" but not the other. + (mulhisi3): Merge into one by using code iterator. + (mulsidi3, mulhisi3, umulhisi3): Remove. +--- + gcc/config/xtensa/xtensa.md | 56 +++++++++++++++++++++---------------- + 1 file changed, 32 insertions(+), 24 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 6c76fb942..3314b3fd6 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -224,20 +224,42 @@ + + ;; Multiplication. + +-(define_expand "mulsidi3" ++(define_expand "mulsidi3" + [(set (match_operand:DI 0 "register_operand") +- (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand")) +- (any_extend:DI (match_operand:SI 2 "register_operand"))))] ++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand")) ++ (sign_extend:DI (match_operand:SI 2 "register_operand"))))] + "TARGET_MUL32_HIGH" + { + rtx temp = gen_reg_rtx (SImode); + emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); +- emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), +- operands[1], operands[2])); ++ emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), ++ operands[1], operands[2])); + emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); + DONE; + }) + ++(define_expand "umulsidi3" ++ [(set (match_operand:DI 0 "register_operand") ++ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand")) ++ (zero_extend:DI (match_operand:SI 2 "register_operand"))))] ++ "" ++{ ++ if (TARGET_MUL32_HIGH) ++ { ++ rtx temp = gen_reg_rtx (SImode); ++ emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); ++ emit_insn (gen_umulsi3_highpart (gen_highpart (SImode, operands[0]), ++ operands[1], operands[2])); ++ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); ++ } ++ else ++ emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "__umulsidi3"), ++ operands[0], LCT_NORMAL, DImode, ++ operands[1], SImode, ++ operands[2], SImode); ++ DONE; ++}) ++ + (define_insn "mulsi3_highpart" + [(set (match_operand:SI 0 "register_operand" "=a") + (truncate:SI +@@ -261,30 +283,16 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +-(define_insn "mulhisi3" +- [(set (match_operand:SI 0 "register_operand" "=C,A") +- (mult:SI (sign_extend:SI +- (match_operand:HI 1 "register_operand" "%r,r")) +- (sign_extend:SI +- (match_operand:HI 2 "register_operand" "r,r"))))] +- "TARGET_MUL16 || TARGET_MAC16" +- "@ +- mul16s\t%0, %1, %2 +- mul.aa.ll\t%1, %2" +- [(set_attr "type" "mul16,mac16") +- (set_attr "mode" "SI") +- (set_attr "length" "3,3")]) +- +-(define_insn "umulhisi3" ++(define_insn "mulhisi3" + [(set (match_operand:SI 0 "register_operand" "=C,A") +- (mult:SI (zero_extend:SI ++ (mult:SI (any_extend:SI + (match_operand:HI 1 "register_operand" "%r,r")) +- (zero_extend:SI ++ (any_extend:SI + (match_operand:HI 2 "register_operand" "r,r"))))] + "TARGET_MUL16 || TARGET_MAC16" + "@ +- mul16u\t%0, %1, %2 +- umul.aa.ll\t%1, %2" ++ mul16\t%0, %1, %2 ++ mul.aa.ll\t%1, %2" + [(set_attr "type" "mul16,mac16") + (set_attr "mode" "SI") + (set_attr "length" "3,3")]) +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0021-Consider-the-Loop-Option-when-setmemsi-is-exp.patch b/patches/gcc10.2/gcc-xtensa-0021-Consider-the-Loop-Option-when-setmemsi-is-exp.patch new file mode 100644 index 0000000..491da47 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0021-Consider-the-Loop-Option-when-setmemsi-is-exp.patch @@ -0,0 +1,125 @@ +From bc108c84544d5a0e6289628e8749a92c9695f006 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 10 Jun 2022 13:18:24 +0900 +Subject: [PATCH 16/31] xtensa: Consider the Loop Option when setmemsi is + expanded to small loop + +Now apply to almost any size of aligned block under such circumstances. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_expand_block_set_small_loop): + Pass through the block length / loop count conditions if + zero-overhead looping is configured and active, +--- + gcc/config/xtensa/xtensa.c | 71 +++++++++++++++++++++++++++----------- + 1 file changed, 50 insertions(+), 21 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index a6d76a953..e2f97b79c 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1473,7 +1473,7 @@ xtensa_expand_block_set_unrolled_loop (rtx *operands) + int + xtensa_expand_block_set_small_loop (rtx *operands) + { +- HOST_WIDE_INT bytes, value, align; ++ HOST_WIDE_INT bytes, value, align, count; + int expand_len, funccall_len; + rtx x, dst, end, reg; + machine_mode unit_mode; +@@ -1493,17 +1493,25 @@ xtensa_expand_block_set_small_loop (rtx *operands) + /* Totally-aligned block only. */ + if (bytes % align != 0) + return 0; ++ count = bytes / align; + +- /* If 4-byte aligned, small loop substitution is almost optimal, thus +- limited to only offset to the end address for ADDI/ADDMI instruction. */ +- if (align == 4 +- && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) +- return 0; ++ /* If the Loop Option (zero-overhead looping) is configured and active, ++ almost no restrictions about the length of the block. */ ++ if (! (TARGET_LOOPS && optimize)) ++ { ++ /* If 4-byte aligned, small loop substitution is almost optimal, ++ thus limited to only offset to the end address for ADDI/ADDMI ++ instruction. */ ++ if (align == 4 ++ && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) ++ return 0; + +- /* If no 4-byte aligned, loop count should be treated as the constraint. */ +- if (align != 4 +- && bytes / align > ((optimize > 1 && !optimize_size) ? 8 : 15)) +- return 0; ++ /* If no 4-byte aligned, loop count should be treated as the ++ constraint. */ ++ if (align != 4 ++ && count > ((optimize > 1 && !optimize_size) ? 8 : 15)) ++ return 0; ++ } + + /* Insn expansion: holding the init value. + Either MOV(.N) or L32R w/litpool. */ +@@ -1513,16 +1521,33 @@ xtensa_expand_block_set_small_loop (rtx *operands) + expand_len = TARGET_DENSITY ? 2 : 3; + else + expand_len = 3 + 4; +- /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ +- expand_len += bytes > 127 ? 3 +- : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; +- +- /* Insn expansion: the loop body and branch instruction. +- For store, one of S8I, S16I or S32I(.N). +- For advance, ADDI(.N). +- For branch, BNE. */ +- expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) +- + (TARGET_DENSITY ? 2 : 3) + 3; ++ if (TARGET_LOOPS && optimize) /* zero-overhead looping */ ++ { ++ /* Insn translation: Either MOV(.N) or L32R w/litpool for the ++ loop count. */ ++ expand_len += xtensa_simm12b (count) ? xtensa_sizeof_MOVI (count) ++ : 3 + 4; ++ /* Insn translation: LOOP, the zero-overhead looping setup ++ instruction. */ ++ expand_len += 3; ++ /* Insn expansion: the loop body instructions. ++ For store, one of S8I, S16I or S32I(.N). ++ For advance, ADDI(.N). */ ++ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) ++ + (TARGET_DENSITY ? 2 : 3); ++ } ++ else /* NO zero-overhead looping */ ++ { ++ /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ ++ expand_len += bytes > 127 ? 3 ++ : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; ++ /* Insn expansion: the loop body and branch instruction. ++ For store, one of S8I, S16I or S32I(.N). ++ For advance, ADDI(.N). ++ For branch, BNE. */ ++ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) ++ + (TARGET_DENSITY ? 2 : 3) + 3; ++ } + + /* Function call: preparing two arguments. */ + funccall_len = xtensa_sizeof_MOVI (value); +@@ -1545,7 +1570,11 @@ xtensa_expand_block_set_small_loop (rtx *operands) + dst = gen_reg_rtx (SImode); + emit_move_insn (dst, x); + end = gen_reg_rtx (SImode); +- emit_insn (gen_addsi3 (end, dst, operands[1] /* the length */)); ++ if (TARGET_LOOPS && optimize) ++ x = force_reg (SImode, operands[1] /* the length */); ++ else ++ x = operands[1]; ++ emit_insn (gen_addsi3 (end, dst, x)); + switch (align) + { + case 1: +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0022-Improve-instruction-cost-estimation-and-sugge.patch b/patches/gcc10.2/gcc-xtensa-0022-Improve-instruction-cost-estimation-and-sugge.patch new file mode 100644 index 0000000..5792a6f --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0022-Improve-instruction-cost-estimation-and-sugge.patch @@ -0,0 +1,325 @@ +From de854e2348b8159bc389471e68023986c8878c92 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 10 Jun 2022 13:19:32 +0900 +Subject: [PATCH 17/31] xtensa: Improve instruction cost estimation and + suggestion + +This patch implements a new target-specific relative RTL insn cost function +because of suboptimal cost estimation by default, and fixes several "length" +insn attributes (related to the cost estimation). + +And also introduces a new machine-dependent option "-mextra-l32r-costs=" +that tells implementation-specific InstRAM/ROM access penalty for L32R +instruction to the compiler (in clock-cycle units, 0 by default). + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_rtx_costs): Correct wrong case + for ABS and NEG, add missing case for BSWAP and CLRSB, and + double the costs for integer divisions using libfuncs if + optimizing for speed, in order to take advantage of fast constant + division by multiplication. + (TARGET_INSN_COST): New macro definition. + (xtensa_is_insn_L32R_p, xtensa_insn_cost): New functions for + calculating relative costs of a RTL insns, for both of speed and + size. + * config/xtensa/xtensa.md (return, nop, trap): Correct values of + the attribute "length" that depends on TARGET_DENSITY. + (define_asm_attributes, blockage, frame_blockage): Add missing + attributes. + * config/xtensa/xtensa.opt (-mextra-l32r-costs=): New machine- + dependent option, however, preparatory work for now. +--- + gcc/config/xtensa/xtensa.c | 116 ++++++++++++++++++++++++++++++++--- + gcc/config/xtensa/xtensa.md | 29 ++++++--- + gcc/config/xtensa/xtensa.opt | 4 ++ + 3 files changed, 134 insertions(+), 15 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index e2f97b79c..94ff901c5 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -55,6 +55,7 @@ along with GCC; see the file COPYING3. If not see + #include "dumpfile.h" + #include "hw-doloop.h" + #include "rtl-iter.h" ++#include "insn-attr.h" + + /* This file should be included last. */ + #include "target-def.h" +@@ -134,6 +135,7 @@ static unsigned int xtensa_multibss_section_type_flags (tree, const char *, + static section *xtensa_select_rtx_section (machine_mode, rtx, + unsigned HOST_WIDE_INT); + static bool xtensa_rtx_costs (rtx, machine_mode, int, int, int *, bool); ++static int xtensa_insn_cost (rtx_insn *, bool); + static int xtensa_register_move_cost (machine_mode, reg_class_t, + reg_class_t); + static int xtensa_memory_move_cost (machine_mode, reg_class_t, bool); +@@ -208,6 +210,8 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); + #define TARGET_MEMORY_MOVE_COST xtensa_memory_move_cost + #undef TARGET_RTX_COSTS + #define TARGET_RTX_COSTS xtensa_rtx_costs ++#undef TARGET_INSN_COST ++#define TARGET_INSN_COST xtensa_insn_cost + #undef TARGET_ADDRESS_COST + #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 + +@@ -3972,7 +3976,7 @@ xtensa_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, + static bool + xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + int opno ATTRIBUTE_UNUSED, +- int *total, bool speed ATTRIBUTE_UNUSED) ++ int *total, bool speed) + { + int code = GET_CODE (x); + +@@ -4060,9 +4064,14 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + return true; + + case CLZ: ++ case CLRSB: + *total = COSTS_N_INSNS (TARGET_NSA ? 1 : 50); + return true; + ++ case BSWAP: ++ *total = COSTS_N_INSNS (mode == HImode ? 3 : 5); ++ return true; ++ + case NOT: + *total = COSTS_N_INSNS (mode == DImode ? 3 : 2); + return true; +@@ -4086,13 +4095,16 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + return true; + + case ABS: ++ case NEG: + { + if (mode == SFmode) + *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50); + else if (mode == DFmode) + *total = COSTS_N_INSNS (50); +- else ++ else if (mode == DImode) + *total = COSTS_N_INSNS (4); ++ else ++ *total = COSTS_N_INSNS (1); + return true; + } + +@@ -4108,10 +4120,6 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + return true; + } + +- case NEG: +- *total = COSTS_N_INSNS (mode == DImode ? 4 : 2); +- return true; +- + case MULT: + { + if (mode == SFmode) +@@ -4151,11 +4159,11 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + case UMOD: + { + if (mode == DImode) +- *total = COSTS_N_INSNS (50); ++ *total = COSTS_N_INSNS (speed ? 100 : 50); + else if (TARGET_DIV32) + *total = COSTS_N_INSNS (32); + else +- *total = COSTS_N_INSNS (50); ++ *total = COSTS_N_INSNS (speed ? 100 : 50); + return true; + } + +@@ -4188,6 +4196,98 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + } + } + ++static bool ++xtensa_is_insn_L32R_p(const rtx_insn *insn) ++{ ++ rtx x = PATTERN (insn); ++ ++ if (GET_CODE (x) == SET) ++ { ++ x = XEXP (x, 1); ++ if (GET_CODE (x) == MEM) ++ { ++ x = XEXP (x, 0); ++ return (GET_CODE (x) == SYMBOL_REF || CONST_INT_P (x)) ++ && CONSTANT_POOL_ADDRESS_P (x); ++ } ++ } ++ ++ return false; ++} ++ ++/* Compute a relative costs of RTL insns. This is necessary in order to ++ achieve better RTL insn splitting/combination result. */ ++ ++static int ++xtensa_insn_cost (rtx_insn *insn, bool speed) ++{ ++ if (!(recog_memoized (insn) < 0)) ++ { ++ int len = get_attr_length (insn), n = (len + 2) / 3; ++ ++ if (len == 0) ++ return COSTS_N_INSNS (0); ++ ++ if (speed) /* For speed cost. */ ++ { ++ /* "L32R" may be particular slow (implementation-dependent). */ ++ if (xtensa_is_insn_L32R_p (insn)) ++ return COSTS_N_INSNS (1 + xtensa_extra_l32r_costs); ++ ++ /* Cost based on the pipeline model. */ ++ switch (get_attr_type (insn)) ++ { ++ case TYPE_STORE: ++ case TYPE_MOVE: ++ case TYPE_ARITH: ++ case TYPE_MULTI: ++ case TYPE_NOP: ++ case TYPE_FSTORE: ++ return COSTS_N_INSNS (n); ++ ++ case TYPE_LOAD: ++ return COSTS_N_INSNS (n - 1 + 2); ++ ++ case TYPE_JUMP: ++ case TYPE_CALL: ++ return COSTS_N_INSNS (n - 1 + 3); ++ ++ case TYPE_FCONV: ++ case TYPE_FLOAD: ++ case TYPE_MUL16: ++ case TYPE_MUL32: ++ case TYPE_RSR: ++ return COSTS_N_INSNS (n * 2); ++ ++ case TYPE_FMADD: ++ return COSTS_N_INSNS (n * 4); ++ ++ case TYPE_DIV32: ++ return COSTS_N_INSNS (n * 16); ++ ++ default: ++ break; ++ } ++ } ++ else /* For size cost. */ ++ { ++ /* Cost based on the instruction length. */ ++ if (get_attr_type (insn) != TYPE_UNKNOWN) ++ { ++ /* "L32R" itself plus constant in litpool. */ ++ if (xtensa_is_insn_L32R_p (insn)) ++ return COSTS_N_INSNS (2) + 1; ++ ++ /* Consider ".n" short instructions. */ ++ return COSTS_N_INSNS (n) - (n * 3 - len); ++ } ++ } ++ } ++ ++ /* Fall back. */ ++ return pattern_cost (PATTERN (insn), speed); ++} ++ + /* Worker function for TARGET_RETURN_IN_MEMORY. */ + + static bool +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 3314b3fd6..da6b71d1d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -98,7 +98,10 @@ + + ;; Describe a user's asm statement. + (define_asm_attributes +- [(set_attr "type" "multi")]) ++ [(set_attr "type" "multi") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ;; Should be the maximum possible length ++ ;; of a single machine instruction. + + + ;; Pipeline model. +@@ -1884,7 +1887,10 @@ + } + [(set_attr "type" "jump") + (set_attr "mode" "none") +- (set_attr "length" "2")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 2) ++ (const_int 3)))]) + + + ;; Miscellaneous instructions. +@@ -1939,7 +1945,10 @@ + } + [(set_attr "type" "nop") + (set_attr "mode" "none") +- (set_attr "length" "3")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 2) ++ (const_int 3)))]) + + (define_expand "nonlocal_goto" + [(match_operand:SI 0 "general_operand" "") +@@ -2003,8 +2012,9 @@ + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] + "" + "" +- [(set_attr "length" "0") +- (set_attr "type" "nop")]) ++ [(set_attr "type" "nop") ++ (set_attr "mode" "none") ++ (set_attr "length" "0")]) + + ;; Do not schedule instructions accessing memory before this point. + +@@ -2023,7 +2033,9 @@ + (unspec:BLK [(match_operand:SI 1 "" "")] UNSPEC_FRAME_BLOCKAGE))] + "" + "" +- [(set_attr "length" "0")]) ++ [(set_attr "type" "nop") ++ (set_attr "mode" "none") ++ (set_attr "length" "0")]) + + (define_insn "trap" + [(trap_if (const_int 1) (const_int 0))] +@@ -2036,7 +2048,10 @@ + } + [(set_attr "type" "trap") + (set_attr "mode" "none") +- (set_attr "length" "3")]) ++ (set (attr "length") ++ (if_then_else (match_test "!TARGET_DEBUG && TARGET_DENSITY") ++ (const_int 2) ++ (const_int 3)))]) + + ;; Setting up a frame pointer is tricky for Xtensa because GCC doesn't + ;; know if a frame pointer is required until the reload pass, and +diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt +index e1d992f5d..97aa44f92 100644 +--- a/gcc/config/xtensa/xtensa.opt ++++ b/gcc/config/xtensa/xtensa.opt +@@ -30,6 +30,10 @@ mlongcalls + Target Mask(LONGCALLS) + Use indirect CALLXn instructions for large programs. + ++mextra-l32r-costs= ++Target RejectNegative Joined UInteger Var(xtensa_extra_l32r_costs) Init(0) ++Set extra memory access cost for L32R instruction, in clock-cycle units. ++ + mtarget-align + Target + Automatically align branch targets to reduce branch penalties. +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0023-Improve-constant-synthesis-for-both-integer-a.patch b/patches/gcc10.2/gcc-xtensa-0023-Improve-constant-synthesis-for-both-integer-a.patch new file mode 100644 index 0000000..0e14673 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0023-Improve-constant-synthesis-for-both-integer-a.patch @@ -0,0 +1,400 @@ +From ed2c4b57807470b386e9abdf145282e197d9da65 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sat, 11 Jun 2022 00:26:17 +0900 +Subject: [PATCH 18/31] xtensa: Improve constant synthesis for both integer and + floating-point + +This patch revises the previous implementation of constant synthesis. + +First, changed to use define_split machine description pattern and to run +after reload pass, in order not to interfere some optimizations such as +the loop invariant motion. + +Second, not only integer but floating-point is subject to processing. + +Third, several new synthesis patterns - when the constant cannot fit into +a "MOVI Ax, simm12" instruction, but: + +I. can be represented as a power of two minus one (eg. 32767, 65535 or + 0x7fffffffUL) + => "MOVI(.N) Ax, -1" + "SRLI Ax, Ax, 1 ... 31" (or "EXTUI") +II. is between -34816 and 34559 + => "MOVI(.N) Ax, -2048 ... 2047" + "ADDMI Ax, Ax, -32768 ... 32512" +III. (existing case) can fit into a signed 12-bit if the trailing zero bits + are stripped + => "MOVI(.N) Ax, -2048 ... 2047" + "SLLI Ax, Ax, 1 ... 31" + +The above sequences consist of 5 or 6 bytes and have latency of 2 clock cycles, +in contrast with "L32R Ax, " (3 bytes and one clock latency, but may +suffer additional one clock pipeline stall and implementation-specific +InstRAM/ROM access penalty) plus 4 bytes of constant value. + +In addition, 3-instructions synthesis patterns (8 or 9 bytes, 3 clock latency) +are also provided when optimizing for speed and L32R instruction has +considerable access penalty: + +IV. 2-instructions synthesis (any of I ... III) followed by + "SLLI Ax, Ax, 1 ... 31" +V. 2-instructions synthesis followed by either "ADDX[248] Ax, Ax, Ax" + or "SUBX8 Ax, Ax, Ax" (multiplying by 3, 5, 7 or 9) + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h (xtensa_constantsynth): + New prototype. + * config/xtensa/xtensa.c (xtensa_emit_constantsynth, + xtensa_constantsynth_2insn, xtensa_constantsynth_rtx_SLLI, + xtensa_constantsynth_rtx_ADDSUBX, xtensa_constantsynth): + New backend functions that process the abovementioned logic. + (xtensa_emit_move_sequence): Revert the previous changes. + * config/xtensa/xtensa.md: New split patterns for integer + and floating-point, as the frontend part. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/constsynth_2insns.c: New. + * gcc.target/xtensa/constsynth_3insns.c: Ditto. + * gcc.target/xtensa/constsynth_double.c: Ditto. +--- + gcc/config/xtensa/xtensa-protos.h | 1 + + gcc/config/xtensa/xtensa.c | 133 +++++++++++++++--- + gcc/config/xtensa/xtensa.md | 50 +++++++ + .../gcc.target/xtensa/constsynth_2insns.c | 44 ++++++ + .../gcc.target/xtensa/constsynth_3insns.c | 24 ++++ + .../gcc.target/xtensa/constsynth_double.c | 11 ++ + 6 files changed, 247 insertions(+), 16 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_double.c + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 80b1da2bb..d65bc2954 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -44,6 +44,7 @@ extern int xtensa_expand_block_move (rtx *); + extern int xtensa_expand_block_set_unrolled_loop (rtx *); + extern int xtensa_expand_block_set_small_loop (rtx *); + extern void xtensa_split_operand_pair (rtx *, machine_mode); ++extern int xtensa_constantsynth (rtx, HOST_WIDE_INT); + extern int xtensa_emit_move_sequence (rtx *, machine_mode); + extern rtx xtensa_copy_incoming_a7 (rtx); + extern void xtensa_expand_nonlocal_goto (rtx *); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 94ff901c5..ba36d7244 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1027,6 +1027,123 @@ xtensa_split_operand_pair (rtx operands[4], machine_mode mode) + } + + ++/* Try to emit insns to load srcval (that cannot fit into signed 12-bit) ++ into dst with synthesizing a such constant value from a sequence of ++ load-immediate / arithmetic ones, instead of a L32R instruction ++ (plus a constant in litpool). */ ++ ++static void ++xtensa_emit_constantsynth (rtx dst, enum rtx_code code, ++ HOST_WIDE_INT imm0, HOST_WIDE_INT imm1, ++ rtx (*gen_op)(rtx, HOST_WIDE_INT), ++ HOST_WIDE_INT imm2) ++{ ++ gcc_assert (REG_P (dst)); ++ emit_move_insn (dst, GEN_INT (imm0)); ++ emit_move_insn (dst, gen_rtx_fmt_ee (code, SImode, ++ dst, GEN_INT (imm1))); ++ if (gen_op) ++ emit_move_insn (dst, gen_op (dst, imm2)); ++} ++ ++static int ++xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, ++ rtx (*gen_op)(rtx, HOST_WIDE_INT), ++ HOST_WIDE_INT op_imm) ++{ ++ int shift = exact_log2 (srcval + 1); ++ ++ if (IN_RANGE (shift, 1, 31)) ++ { ++ xtensa_emit_constantsynth (dst, LSHIFTRT, -1, 32 - shift, ++ gen_op, op_imm); ++ return 1; ++ } ++ ++ if (IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) ++ { ++ HOST_WIDE_INT imm0, imm1; ++ ++ if (srcval < -32768) ++ imm1 = -32768; ++ else if (srcval > 32512) ++ imm1 = 32512; ++ else ++ imm1 = srcval & ~255; ++ imm0 = srcval - imm1; ++ if (TARGET_DENSITY && imm1 < 32512 && IN_RANGE (imm0, 224, 255)) ++ imm0 -= 256, imm1 += 256; ++ xtensa_emit_constantsynth (dst, PLUS, imm0, imm1, gen_op, op_imm); ++ return 1; ++ } ++ ++ shift = ctz_hwi (srcval); ++ if (xtensa_simm12b (srcval >> shift)) ++ { ++ xtensa_emit_constantsynth (dst, ASHIFT, srcval >> shift, shift, ++ gen_op, op_imm); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static rtx ++xtensa_constantsynth_rtx_SLLI (rtx reg, HOST_WIDE_INT imm) ++{ ++ return gen_rtx_ASHIFT (SImode, reg, GEN_INT (imm)); ++} ++ ++static rtx ++xtensa_constantsynth_rtx_ADDSUBX (rtx reg, HOST_WIDE_INT imm) ++{ ++ return imm == 7 ++ ? gen_rtx_MINUS (SImode, gen_rtx_ASHIFT (SImode, reg, GEN_INT (3)), ++ reg) ++ : gen_rtx_PLUS (SImode, gen_rtx_ASHIFT (SImode, reg, ++ GEN_INT (floor_log2 (imm - 1))), ++ reg); ++} ++ ++int ++xtensa_constantsynth (rtx dst, HOST_WIDE_INT srcval) ++{ ++ /* No need for synthesizing for what fits into MOVI instruction. */ ++ if (xtensa_simm12b (srcval)) ++ return 0; ++ ++ /* 2-insns substitution. */ ++ if ((optimize_size || (optimize && xtensa_extra_l32r_costs >= 1)) ++ && xtensa_constantsynth_2insn (dst, srcval, NULL, 0)) ++ return 1; ++ ++ /* 3-insns substitution. */ ++ if (optimize > 1 && !optimize_size && xtensa_extra_l32r_costs >= 2) ++ { ++ int shift, divisor; ++ ++ /* 2-insns substitution followed by SLLI. */ ++ shift = ctz_hwi (srcval); ++ if (IN_RANGE (shift, 1, 31) && ++ xtensa_constantsynth_2insn (dst, srcval >> shift, ++ xtensa_constantsynth_rtx_SLLI, ++ shift)) ++ return 1; ++ ++ /* 2-insns substitution followed by ADDX[248] or SUBX8. */ ++ if (TARGET_ADDX) ++ for (divisor = 3; divisor <= 9; divisor += 2) ++ if (srcval % divisor == 0 && ++ xtensa_constantsynth_2insn (dst, srcval / divisor, ++ xtensa_constantsynth_rtx_ADDSUBX, ++ divisor)) ++ return 1; ++ } ++ ++ return 0; ++} ++ ++ + /* Emit insns to move operands[1] into operands[0]. + Return 1 if we have written out everything that needs to be done to + do the move. Otherwise, return 0 and the caller will emit the move +@@ -1064,22 +1181,6 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) + + if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16) + { +- /* Try to emit MOVI + SLLI sequence, that is smaller +- than L32R + literal. */ +- if (optimize_size && mode == SImode && CONST_INT_P (src) +- && register_operand (dst, mode)) +- { +- HOST_WIDE_INT srcval = INTVAL (src); +- int shift = ctz_hwi (srcval); +- +- if (xtensa_simm12b (srcval >> shift)) +- { +- emit_move_insn (dst, GEN_INT (srcval >> shift)); +- emit_insn (gen_ashlsi3_internal (dst, dst, GEN_INT (shift))); +- return 1; +- } +- } +- + src = force_const_mem (SImode, src); + operands[1] = src; + } +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index da6b71d1d..ddc3087fa 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -940,6 +940,19 @@ + (set_attr "mode" "SI") + (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")]) + ++(define_split ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operand:SI 1 "constantpool_operand"))] ++ "! optimize_debug && reload_completed" ++ [(const_int 0)] ++{ ++ rtx x = avoid_constant_pool_reference (operands[1]); ++ if (! CONST_INT_P (x)) ++ FAIL; ++ if (! xtensa_constantsynth (operands[0], INTVAL (x))) ++ emit_move_insn (operands[0], x); ++}) ++ + ;; 16-bit Integer moves + + (define_expand "movhi" +@@ -1144,6 +1157,43 @@ + (set_attr "mode" "SF") + (set_attr "length" "3")]) + ++(define_split ++ [(set (match_operand:SF 0 "register_operand") ++ (match_operand:SF 1 "constantpool_operand"))] ++ "! optimize_debug && reload_completed" ++ [(const_int 0)] ++{ ++ int i = 0; ++ rtx x = XEXP (operands[1], 0); ++ long l[2]; ++ if (GET_CODE (x) == SYMBOL_REF ++ && CONSTANT_POOL_ADDRESS_P (x)) ++ x = get_pool_constant (x); ++ else if (GET_CODE (x) == CONST) ++ { ++ x = XEXP (x, 0); ++ gcc_assert (GET_CODE (x) == PLUS ++ && GET_CODE (XEXP (x, 0)) == SYMBOL_REF ++ && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) ++ && CONST_INT_P (XEXP (x, 1))); ++ i = INTVAL (XEXP (x, 1)); ++ gcc_assert (i == 0 || i == 4); ++ i /= 4; ++ x = get_pool_constant (XEXP (x, 0)); ++ } ++ else ++ gcc_unreachable (); ++ if (GET_MODE (x) == SFmode) ++ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l[0]); ++ else if (GET_MODE (x) == DFmode) ++ REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); ++ else ++ FAIL; ++ x = gen_rtx_REG (SImode, REGNO (operands[0])); ++ if (! xtensa_constantsynth (x, l[i])) ++ emit_move_insn (x, GEN_INT (l[i])); ++}) ++ + ;; 64-bit floating point moves + + (define_expand "movdf" +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c +new file mode 100644 +index 000000000..43c85a250 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c +@@ -0,0 +1,44 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Os" } */ ++ ++int test_0(void) ++{ ++ return 4095; ++} ++ ++int test_1(void) ++{ ++ return 2147483647; ++} ++ ++int test_2(void) ++{ ++ return -34816; ++} ++ ++int test_3(void) ++{ ++ return -2049; ++} ++ ++int test_4(void) ++{ ++ return 2048; ++} ++ ++int test_5(void) ++{ ++ return 34559; ++} ++ ++int test_6(void) ++{ ++ return 43680; ++} ++ ++void test_7(int *p) ++{ ++ *p = -1432354816; ++} ++ ++/* { dg-final { scan-assembler-not "l32r" } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c +new file mode 100644 +index 000000000..f3c4a1c7c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mextra-l32r-costs=3" } */ ++ ++int test_0(void) ++{ ++ return 134217216; ++} ++ ++int test_1(void) ++{ ++ return -27604992; ++} ++ ++int test_2(void) ++{ ++ return -162279; ++} ++ ++void test_3(int *p) ++{ ++ *p = 192437; ++} ++ ++/* { dg-final { scan-assembler-not "l32r" } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +new file mode 100644 +index 000000000..890ca5047 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Os" } */ ++ ++void test(unsigned int count, double array[]) ++{ ++ unsigned int i; ++ for (i = 0; i < count; ++i) ++ array[i] = 1.0; ++} ++ ++/* { dg-final { scan-assembler-not "l32r" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0024-Improve-shift-operations-more.patch b/patches/gcc10.2/gcc-xtensa-0024-Improve-shift-operations-more.patch new file mode 100644 index 0000000..9c44b89 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0024-Improve-shift-operations-more.patch @@ -0,0 +1,383 @@ +From fd3771fcc13b8712c91cec70f4533760f72b54e1 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 01:38:31 +0900 +Subject: [PATCH 19/31] xtensa: Improve shift operations more + +This patch introduces funnel shifter utilization, and rearranges existing +"per-byte shift" insn patterns. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (logical_shift_operator, + xtensa_shift_per_byte_operator): New predicates. + * config/xtensa/xtensa-protos.h (xtensa_shlrd_which_direction): + New prototype. + * config/xtensa/xtensa.c (xtensa_shlrd_which_direction): + New helper function for funnel shift patterns. + * config/xtensa/xtensa.md (ior_op): New code iterator. + (*ashlsi3_1): Replace with new split pattern. + (*shift_per_byte): Unify *ashlsi3_3x, *ashrsi3_3x and *lshrsi3_3x. + (*shift_per_byte_omit_AND_0, *shift_per_byte_omit_AND_1): + New insn-and-split patterns that redirect to *xtensa_shift_per_byte, + in order to omit unnecessary bitwise AND operation. + (*shlrd_reg_, *shlrd_const_, *shlrd_per_byte_, + *shlrd_per_byte__omit_AND): + New insn patterns for funnel shifts. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/funnel_shifter.c: New. +--- + gcc/config/xtensa/predicates.md | 6 + + gcc/config/xtensa/xtensa-protos.h | 1 + + gcc/config/xtensa/xtensa.c | 14 ++ + gcc/config/xtensa/xtensa.md | 213 ++++++++++++++---- + .../gcc.target/xtensa/funnel_shifter.c | 17 ++ + 5 files changed, 213 insertions(+), 38 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/funnel_shifter.c + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index 91b9343a2..e7836f0ec 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -164,9 +164,15 @@ + (define_predicate "boolean_operator" + (match_code "eq,ne")) + ++(define_predicate "logical_shift_operator" ++ (match_code "ashift,lshiftrt")) ++ + (define_predicate "xtensa_cstoresi_operator" + (match_code "eq,ne,gt,ge,lt,le")) + ++(define_predicate "xtensa_shift_per_byte_operator" ++ (match_code "ashift,ashiftrt,lshiftrt")) ++ + (define_predicate "tls_symbol_operand" + (and (match_code "symbol_ref") + (match_test "SYMBOL_REF_TLS_MODEL (op) != 0"))) +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index d65bc2954..32743bc67 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -56,6 +56,7 @@ extern char *xtensa_emit_bit_branch (bool, bool, rtx *); + extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); + extern char *xtensa_emit_call (int, rtx *); + extern bool xtensa_tls_referenced_p (rtx); ++extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); + + #ifdef TREE_CODE + extern void init_cumulative_args (CUMULATIVE_ARGS *, int); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index ba36d7244..473cfaf9d 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -2394,6 +2394,20 @@ xtensa_tls_referenced_p (rtx x) + } + + ++/* Helper function for "*shlrd_..." patterns. */ ++ ++enum rtx_code ++xtensa_shlrd_which_direction (rtx op0, rtx op1) ++{ ++ if (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT) ++ return ASHIFT; /* shld */ ++ if (GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT) ++ return LSHIFTRT; /* shrd */ ++ ++ return UNKNOWN; ++} ++ ++ + /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ + + static bool +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index ddc3087fa..58bba89af 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -83,6 +83,9 @@ + ;; the same template. + (define_mode_iterator HQI [HI QI]) + ++;; This code iterator is for *shlrd and its variants. ++(define_code_iterator ior_op [ior plus]) ++ + + ;; Attributes. + +@@ -1272,16 +1275,6 @@ + operands[1] = xtensa_copy_incoming_a7 (operands[1]); + }) + +-(define_insn "*ashlsi3_1" +- [(set (match_operand:SI 0 "register_operand" "=a") +- (ashift:SI (match_operand:SI 1 "register_operand" "r") +- (const_int 1)))] +- "TARGET_DENSITY" +- "add.n\t%0, %1, %1" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "2")]) +- + (define_insn "ashlsi3_internal" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (ashift:SI (match_operand:SI 1 "register_operand" "r,r") +@@ -1294,16 +1287,14 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,6")]) + +-(define_insn "*ashlsi3_3x" +- [(set (match_operand:SI 0 "register_operand" "=a") +- (ashift:SI (match_operand:SI 1 "register_operand" "r") +- (ashift:SI (match_operand:SI 2 "register_operand" "r") +- (const_int 3))))] +- "" +- "ssa8b\t%2\;sll\t%0, %1" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "6")]) ++(define_split ++ [(set (match_operand:SI 0 "register_operand") ++ (ashift:SI (match_operand:SI 1 "register_operand") ++ (const_int 1)))] ++ "TARGET_DENSITY" ++ [(set (match_dup 0) ++ (plus:SI (match_dup 1) ++ (match_dup 1)))]) + + (define_insn "ashrsi3" + [(set (match_operand:SI 0 "register_operand" "=a,a") +@@ -1317,17 +1308,6 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,6")]) + +-(define_insn "*ashrsi3_3x" +- [(set (match_operand:SI 0 "register_operand" "=a") +- (ashiftrt:SI (match_operand:SI 1 "register_operand" "r") +- (ashift:SI (match_operand:SI 2 "register_operand" "r") +- (const_int 3))))] +- "" +- "ssa8l\t%2\;sra\t%0, %1" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "6")]) +- + (define_insn "lshrsi3" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r") +@@ -1337,9 +1317,9 @@ + if (which_alternative == 0) + { + if ((INTVAL (operands[2]) & 0x1f) < 16) +- return "srli\t%0, %1, %R2"; ++ return "srli\t%0, %1, %R2"; + else +- return "extui\t%0, %1, %R2, %L2"; ++ return "extui\t%0, %1, %R2, %L2"; + } + return "ssr\t%2\;srl\t%0, %1"; + } +@@ -1347,13 +1327,170 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,6")]) + +-(define_insn "*lshrsi3_3x" ++(define_insn "*shift_per_byte" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (match_operator:SI 3 "xtensa_shift_per_byte_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3))]))] ++ "!optimize_debug && optimize" ++{ ++ switch (GET_CODE (operands[3])) ++ { ++ case ASHIFT: return "ssa8b\t%2\;sll\t%0, %1"; ++ case ASHIFTRT: return "ssa8l\t%2\;sra\t%0, %1"; ++ case LSHIFTRT: return "ssa8l\t%2\;srl\t%0, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*shift_per_byte_omit_AND_0" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (match_operator:SI 4 "xtensa_shift_per_byte_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3)) ++ (match_operand:SI 3 "const_int_operand" "i"))]))] ++ "!optimize_debug && optimize ++ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (match_op_dup 4 ++ [(match_dup 1) ++ (ashift:SI (match_dup 2) ++ (const_int 3))]))] ++ "" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*shift_per_byte_omit_AND_1" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (match_operator:SI 4 "xtensa_shift_per_byte_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (neg:SI (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3)) ++ (match_operand:SI 3 "const_int_operand" "i")))]))] ++ "!optimize_debug && optimize ++ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 5) ++ (neg:SI (match_dup 2))) ++ (set (match_dup 0) ++ (match_op_dup 4 ++ [(match_dup 1) ++ (ashift:SI (match_dup 5) ++ (const_int 3))]))] ++{ ++ operands[5] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "9")]) ++ ++(define_insn "*shlrd_reg_" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")]) ++ (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 3 "register_operand" "r") ++ (neg:SI (match_dup 2))])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" ++{ ++ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) ++ { ++ case ASHIFT: return "ssl\t%2\;src\t%0, %1, %3"; ++ case LSHIFTRT: return "ssr\t%2\;src\t%0, %3, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn "*shlrd_const_" + [(set (match_operand:SI 0 "register_operand" "=a") +- (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") +- (ashift:SI (match_operand:SI 2 "register_operand" "r") +- (const_int 3))))] ++ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 3 "const_int_operand" "i")]) ++ (match_operator:SI 6 "logical_shift_operator" ++ [(match_operand:SI 2 "register_operand" "r") ++ (match_operand:SI 4 "const_int_operand" "i")])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN ++ && IN_RANGE (INTVAL (operands[3]), 1, 31) ++ && IN_RANGE (INTVAL (operands[4]), 1, 31) ++ && INTVAL (operands[3]) + INTVAL (operands[4]) == 32" ++{ ++ switch (xtensa_shlrd_which_direction (operands[5], operands[6])) ++ { ++ case ASHIFT: return "ssai\t%L3\;src\t%0, %1, %2"; ++ case LSHIFTRT: return "ssai\t%R3\;src\t%0, %2, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn "*shlrd_per_byte_" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3))]) ++ (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 3 "register_operand" "r") ++ (neg:SI (ashift:SI (match_dup 2) ++ (const_int 3)))])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" ++{ ++ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) ++ { ++ case ASHIFT: return "ssa8b\t%2\;src\t%0, %1, %3"; ++ case LSHIFTRT: return "ssa8l\t%2\;src\t%0, %3, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*shlrd_per_byte__omit_AND" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3)) ++ (match_operand:SI 4 "const_int_operand" "i"))]) ++ (match_operator:SI 6 "logical_shift_operator" ++ [(match_operand:SI 3 "register_operand" "r") ++ (neg:SI (and:SI (ashift:SI (match_dup 2) ++ (const_int 3)) ++ (match_dup 4)))])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN ++ && (INTVAL (operands[4]) & 0x1f) == 3 << 3" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (ior_op:SI (match_op_dup 5 ++ [(match_dup 1) ++ (ashift:SI (match_dup 2) ++ (const_int 3))]) ++ (match_op_dup 6 ++ [(match_dup 3) ++ (neg:SI (ashift:SI (match_dup 2) ++ (const_int 3)))])))] + "" +- "ssa8l\t%2\;srl\t%0, %1" + [(set_attr "type" "arith") + (set_attr "mode" "SI") + (set_attr "length" "6")]) +diff --git a/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c +new file mode 100644 +index 000000000..c8f987ccd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++unsigned int test_0(const void *addr) ++{ ++ unsigned int n = (unsigned int)addr; ++ const unsigned int *a = (const unsigned int*)(n & ~3); ++ n = (n & 3) * 8; ++ return (a[0] >> n) | (a[1] << (32 - n)); ++} ++ ++unsigned int test_1(unsigned int a, unsigned int b) ++{ ++ return (a >> 16) + (b << 16); ++} ++ ++/* { dg-final { scan-assembler-times "src" 2 } } */ +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0025-Simplify-conditional-branch-move-insn-pattern.patch b/patches/gcc10.2/gcc-xtensa-0025-Simplify-conditional-branch-move-insn-pattern.patch new file mode 100644 index 0000000..cdb96ff --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0025-Simplify-conditional-branch-move-insn-pattern.patch @@ -0,0 +1,427 @@ +From 0690bcdd42d0aa6671f9ec3ccbbe70faa04ffb6b Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Mon, 31 Jan 2022 09:56:21 +0900 +Subject: [PATCH 20/31] xtensa: Simplify conditional branch/move insn patterns + +No need to describe the "false side" conditional insn patterns anymore. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h (xtensa_emit_branch): + Remove the first argument. + (xtensa_emit_bit_branch): Remove it because now called only from the + output statement of *bittrue insn pattern. + * config/xtensa/xtensa.c (gen_int_relational): Remove the last + argument 'p_invert', and make so that the condition is reversed by + itself as needed. + (xtensa_expand_conditional_branch): Share the common path, and remove + condition inversion code. + (xtensa_emit_branch, xtensa_emit_movcc): Simplify by removing the + "false side" pattern. + (xtensa_emit_bit_branch): Remove it because of the abovementioned + reason, and move the function body to *bittrue insn pattern. + * config/xtensa/xtensa.md (*bittrue): Transplant the output + statement from removed xtensa_emit_bit_branch(). + (*bfalse, *ubfalse, *bitfalse, *maskfalse): Remove the "false side" + insn patterns. +--- + gcc/config/xtensa/xtensa-protos.h | 3 +- + gcc/config/xtensa/xtensa.c | 111 ++++++++++------------------ + gcc/config/xtensa/xtensa.md | 117 ++++++++---------------------- + 3 files changed, 70 insertions(+), 161 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 32743bc67..e4b2d2f06 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -51,8 +51,7 @@ extern void xtensa_expand_nonlocal_goto (rtx *); + extern void xtensa_expand_compare_and_swap (rtx, rtx, rtx, rtx); + extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool); + extern void xtensa_emit_loop_end (rtx_insn *, rtx *); +-extern char *xtensa_emit_branch (bool, bool, rtx *); +-extern char *xtensa_emit_bit_branch (bool, bool, rtx *); ++extern char *xtensa_emit_branch (bool, rtx *); + extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); + extern char *xtensa_emit_call (int, rtx *); + extern bool xtensa_tls_referenced_p (rtx); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 473cfaf9d..8deae3d51 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -118,7 +118,7 @@ const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] = + + static void xtensa_option_override (void); + static enum internal_test map_test_to_internal_test (enum rtx_code); +-static rtx gen_int_relational (enum rtx_code, rtx, rtx, int *); ++static rtx gen_int_relational (enum rtx_code, rtx, rtx); + static rtx gen_float_relational (enum rtx_code, rtx, rtx); + static rtx gen_conditional_move (enum rtx_code, machine_mode, rtx, rtx); + static rtx fixup_subreg_mem (rtx); +@@ -670,8 +670,7 @@ map_test_to_internal_test (enum rtx_code test_code) + static rtx + gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + rtx cmp0, /* first operand to compare */ +- rtx cmp1, /* second operand to compare */ +- int *p_invert /* whether branch needs to reverse test */) ++ rtx cmp1 /* second operand to compare */) + { + struct cmp_info + { +@@ -703,6 +702,7 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + enum internal_test test; + machine_mode mode; + struct cmp_info *p_info; ++ int invert; + + test = map_test_to_internal_test (test_code); + gcc_assert (test != ITEST_MAX); +@@ -739,9 +739,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + } + + /* See if we need to invert the result. */ +- *p_invert = ((GET_CODE (cmp1) == CONST_INT) +- ? p_info->invert_const +- : p_info->invert_reg); ++ invert = ((GET_CODE (cmp1) == CONST_INT) ++ ? p_info->invert_const ++ : p_info->invert_reg); + + /* Comparison to constants, may involve adding 1 to change a LT into LE. + Comparison between two registers, may involve switching operands. */ +@@ -758,7 +758,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + cmp1 = temp; + } + +- return gen_rtx_fmt_ee (p_info->test_code, VOIDmode, cmp0, cmp1); ++ return gen_rtx_fmt_ee (invert ? reverse_condition (p_info->test_code) ++ : p_info->test_code, ++ VOIDmode, cmp0, cmp1); + } + + +@@ -817,45 +819,33 @@ xtensa_expand_conditional_branch (rtx *operands, machine_mode mode) + enum rtx_code test_code = GET_CODE (operands[0]); + rtx cmp0 = operands[1]; + rtx cmp1 = operands[2]; +- rtx cmp; +- int invert; +- rtx label1, label2; ++ rtx cmp, label; + + switch (mode) + { ++ case E_SFmode: ++ if (TARGET_HARD_FLOAT) ++ { ++ cmp = gen_float_relational (test_code, cmp0, cmp1); ++ break; ++ } ++ /* FALLTHRU */ ++ + case E_DFmode: + default: + fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1)); + + case E_SImode: +- invert = FALSE; +- cmp = gen_int_relational (test_code, cmp0, cmp1, &invert); +- break; +- +- case E_SFmode: +- if (!TARGET_HARD_FLOAT) +- fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, +- cmp0, cmp1)); +- invert = FALSE; +- cmp = gen_float_relational (test_code, cmp0, cmp1); ++ cmp = gen_int_relational (test_code, cmp0, cmp1); + break; + } + + /* Generate the branch. */ +- +- label1 = gen_rtx_LABEL_REF (VOIDmode, operands[3]); +- label2 = pc_rtx; +- +- if (invert) +- { +- label2 = label1; +- label1 = pc_rtx; +- } +- ++ label = gen_rtx_LABEL_REF (VOIDmode, operands[3]); + emit_jump_insn (gen_rtx_SET (pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, cmp, +- label1, +- label2))); ++ label, ++ pc_rtx))); + } + + +@@ -2058,21 +2048,20 @@ xtensa_emit_loop_end (rtx_insn *insn, rtx *operands) + + + char * +-xtensa_emit_branch (bool inverted, bool immed, rtx *operands) ++xtensa_emit_branch (bool immed, rtx *operands) + { + static char result[64]; +- enum rtx_code code; ++ enum rtx_code code = GET_CODE (operands[3]); + const char *op; + +- code = GET_CODE (operands[3]); + switch (code) + { +- case EQ: op = inverted ? "ne" : "eq"; break; +- case NE: op = inverted ? "eq" : "ne"; break; +- case LT: op = inverted ? "ge" : "lt"; break; +- case GE: op = inverted ? "lt" : "ge"; break; +- case LTU: op = inverted ? "geu" : "ltu"; break; +- case GEU: op = inverted ? "ltu" : "geu"; break; ++ case EQ: op = "eq"; break; ++ case NE: op = "ne"; break; ++ case LT: op = "lt"; break; ++ case GE: op = "ge"; break; ++ case LTU: op = "ltu"; break; ++ case GEU: op = "geu"; break; + default: gcc_unreachable (); + } + +@@ -2091,32 +2080,6 @@ xtensa_emit_branch (bool inverted, bool immed, rtx *operands) + } + + +-char * +-xtensa_emit_bit_branch (bool inverted, bool immed, rtx *operands) +-{ +- static char result[64]; +- const char *op; +- +- switch (GET_CODE (operands[3])) +- { +- case EQ: op = inverted ? "bs" : "bc"; break; +- case NE: op = inverted ? "bc" : "bs"; break; +- default: gcc_unreachable (); +- } +- +- if (immed) +- { +- unsigned bitnum = INTVAL (operands[1]) & 0x1f; +- operands[1] = GEN_INT (bitnum); +- sprintf (result, "b%si\t%%0, %%d1, %%2", op); +- } +- else +- sprintf (result, "b%s\t%%0, %%1, %%2", op); +- +- return result; +-} +- +- + char * + xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + { +@@ -2125,12 +2088,14 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + const char *op; + + code = GET_CODE (operands[4]); ++ if (inverted) ++ code = reverse_condition (code); + if (isbool) + { + switch (code) + { +- case EQ: op = inverted ? "t" : "f"; break; +- case NE: op = inverted ? "f" : "t"; break; ++ case EQ: op = "f"; break; ++ case NE: op = "t"; break; + default: gcc_unreachable (); + } + } +@@ -2138,10 +2103,10 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + { + switch (code) + { +- case EQ: op = inverted ? "nez" : "eqz"; break; +- case NE: op = inverted ? "eqz" : "nez"; break; +- case LT: op = inverted ? "gez" : "ltz"; break; +- case GE: op = inverted ? "ltz" : "gez"; break; ++ case EQ: op = "eqz"; break; ++ case NE: op = "nez"; break; ++ case LT: op = "ltz"; break; ++ case GE: op = "gez"; break; + default: gcc_unreachable (); + } + } +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 58bba89af..40000859d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1551,28 +1551,13 @@ + (define_insn "*btrue" + [(set (pc) + (if_then_else (match_operator 3 "branch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "branch_operand" "K,r")]) ++ [(match_operand:SI 0 "register_operand" "r,r") ++ (match_operand:SI 1 "branch_operand" "K,r")]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { +- return xtensa_emit_branch (false, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump,jump") +- (set_attr "mode" "none") +- (set_attr "length" "3,3")]) +- +-(define_insn "*bfalse" +- [(set (pc) +- (if_then_else (match_operator 3 "branch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "branch_operand" "K,r")]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] +- "" +-{ +- return xtensa_emit_branch (true, which_alternative == 0, operands); ++ return xtensa_emit_branch (which_alternative == 0, operands); + } + [(set_attr "type" "jump,jump") + (set_attr "mode" "none") +@@ -1581,28 +1566,13 @@ + (define_insn "*ubtrue" + [(set (pc) + (if_then_else (match_operator 3 "ubranch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "ubranch_operand" "L,r")]) ++ [(match_operand:SI 0 "register_operand" "r,r") ++ (match_operand:SI 1 "ubranch_operand" "L,r")]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { +- return xtensa_emit_branch (false, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump,jump") +- (set_attr "mode" "none") +- (set_attr "length" "3,3")]) +- +-(define_insn "*ubfalse" +- [(set (pc) +- (if_then_else (match_operator 3 "ubranch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "ubranch_operand" "L,r")]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] +- "" +-{ +- return xtensa_emit_branch (true, which_alternative == 0, operands); ++ return xtensa_emit_branch (which_alternative == 0, operands); + } + [(set_attr "type" "jump,jump") + (set_attr "mode" "none") +@@ -1613,75 +1583,50 @@ + (define_insn "*bittrue" + [(set (pc) + (if_then_else (match_operator 3 "boolean_operator" +- [(zero_extract:SI +- (match_operand:SI 0 "register_operand" "r,r") +- (const_int 1) +- (match_operand:SI 1 "arith_operand" "J,r")) +- (const_int 0)]) +- (label_ref (match_operand 2 "" "")) +- (pc)))] +- "" +-{ +- return xtensa_emit_bit_branch (false, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump") +- (set_attr "mode" "none") +- (set_attr "length" "3")]) +- +-(define_insn "*bitfalse" +- [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" +- [(zero_extract:SI +- (match_operand:SI 0 "register_operand" "r,r") +- (const_int 1) +- (match_operand:SI 1 "arith_operand" "J,r")) ++ [(zero_extract:SI (match_operand:SI 0 "register_operand" "r,r") ++ (const_int 1) ++ (match_operand:SI 1 "arith_operand" "J,r")) + (const_int 0)]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] +- "" +-{ +- return xtensa_emit_bit_branch (true, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump") +- (set_attr "mode" "none") +- (set_attr "length" "3")]) +- +-(define_insn "*masktrue" +- [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" +- [(and:SI (match_operand:SI 0 "register_operand" "r") +- (match_operand:SI 1 "register_operand" "r")) +- (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { ++ static char result[64]; ++ char op; + switch (GET_CODE (operands[3])) + { +- case EQ: return "bnone\t%0, %1, %2"; +- case NE: return "bany\t%0, %1, %2"; +- default: gcc_unreachable (); ++ case EQ: op = 'c'; break; ++ case NE: op = 's'; break; ++ default: gcc_unreachable (); + } ++ if (which_alternative == 0) ++ { ++ operands[1] = GEN_INT (INTVAL (operands[1]) & 0x1f); ++ sprintf (result, "bb%ci\t%%0, %%d1, %%2", op); ++ } ++ else ++ sprintf (result, "bb%c\t%%0, %%1, %%2", op); ++ return result; + } + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set_attr "length" "3")]) + +-(define_insn "*maskfalse" ++(define_insn "*masktrue" + [(set (pc) + (if_then_else (match_operator 3 "boolean_operator" +- [(and:SI (match_operand:SI 0 "register_operand" "r") +- (match_operand:SI 1 "register_operand" "r")) +- (const_int 0)]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "register_operand" "r")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] + "" + { + switch (GET_CODE (operands[3])) + { +- case EQ: return "bany\t%0, %1, %2"; +- case NE: return "bnone\t%0, %1, %2"; +- default: gcc_unreachable (); ++ case EQ: return "bnone\t%0, %1, %2"; ++ case NE: return "bany\t%0, %1, %2"; ++ default: gcc_unreachable (); + } + } + [(set_attr "type" "jump") +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0026-Make-use-of-BALL-BNALL-instructions.patch b/patches/gcc10.2/gcc-xtensa-0026-Make-use-of-BALL-BNALL-instructions.patch new file mode 100644 index 0000000..e1d2790 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0026-Make-use-of-BALL-BNALL-instructions.patch @@ -0,0 +1,101 @@ +From a7cf439409089eab17341a1a24fb9be2b967ca7c Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 27 May 2021 19:04:12 +0900 +Subject: [PATCH 21/31] xtensa: Make use of BALL/BNALL instructions + +In Xtensa ISA, there is no single machine instruction that calculates unary +bitwise negation, but a few similar fused instructions are exist: + + "BALL Ax, Ay, label" // if ((~Ax & Ay) == 0) goto label; + "BNALL Ax, Ay, label" // if ((~Ax & Ay) != 0) goto label; + +These instructions have never been emitted before, but it seems no reason not +to make use of them. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*masktrue_bitcmpl): New insn pattern. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/BALL-BNALL.c: New. +--- + gcc/config/xtensa/xtensa.md | 21 +++++++++++++ + gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c | 33 ++++++++++++++++++++ + 2 files changed, 54 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 40000859d..b34b2afb6 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1633,6 +1633,27 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_insn "*masktrue_bitcmpl" ++ [(set (pc) ++ (if_then_else (match_operator 3 "boolean_operator" ++ [(and:SI (not:SI (match_operand:SI 0 "register_operand" "r")) ++ (match_operand:SI 1 "register_operand" "r")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] ++ "" ++{ ++ switch (GET_CODE (operands[3])) ++ { ++ case EQ: return "ball\t%0, %1, %2"; ++ case NE: return "bnall\t%0, %1, %2"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ++ + + ;; Zero-overhead looping support. + +diff --git a/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c +new file mode 100644 +index 000000000..ba61c6f37 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c +@@ -0,0 +1,33 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O" } */ ++ ++extern void foo(void); ++ ++void BNONE_test(int a, int b) ++{ ++ if (a & b) ++ foo(); ++} ++ ++void BANY_test(int a, int b) ++{ ++ if (!(a & b)) ++ foo(); ++} ++ ++void BALL_test(int a, int b) ++{ ++ if (~a & b) ++ foo(); ++} ++ ++void BNALL_test(int a, int b) ++{ ++ if (!(~a & b)) ++ foo(); ++} ++ ++/* { dg-final { scan-assembler-times "bnone" 1 } } */ ++/* { dg-final { scan-assembler-times "bany" 1 } } */ ++/* { dg-final { scan-assembler-times "ball" 1 } } */ ++/* { dg-final { scan-assembler-times "bnall" 1 } } */ +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0027-Optimize-bitwise-AND-operation-with-some-spec.patch b/patches/gcc10.2/gcc-xtensa-0027-Optimize-bitwise-AND-operation-with-some-spec.patch new file mode 100644 index 0000000..b13350f --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0027-Optimize-bitwise-AND-operation-with-some-spec.patch @@ -0,0 +1,252 @@ +From 43c7f8333028ff03d8a4681ab62de2febcc43f5c Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 01:28:43 +0900 +Subject: [PATCH 22/31] xtensa: Optimize bitwise AND operation with some + specific forms of constants + +This patch offers several insn-and-split patterns for bitwise AND with +register and constant that can be represented as: + +i. 1's least significant N bits and the others 0's (17 <= N <= 31) +ii. 1's most significant N bits and the others 0's (12 <= N <= 31) +iii. M 1's sequence of bits and trailing N 0's bits, that cannot fit into a + "MOVI Ax, simm12" instruction (1 <= M <= 16, 1 <= N <= 30) + +And also offers shortcuts for conditional branch if each of the abovementioned +operations is (not) equal to zero. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (shifted_mask_operand): + New predicate. + * config/xtensa/xtensa.md (*andsi3_const_pow2_minus_one): + New insn-and-split pattern. + (*andsi3_const_negative_pow2, *andsi3_const_shifted_mask, + *masktrue_const_pow2_minus_one, *masktrue_const_negative_pow2, + *masktrue_const_shifted_mask): Ditto. +--- + gcc/config/xtensa/predicates.md | 10 ++ + gcc/config/xtensa/xtensa.md | 179 ++++++++++++++++++++++++++++++++ + 2 files changed, 189 insertions(+) + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index e7836f0ec..367fc17f3 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -52,6 +52,16 @@ + (match_test "xtensa_mask_immediate (INTVAL (op))")) + (match_operand 0 "register_operand"))) + ++(define_predicate "shifted_mask_operand" ++ (match_code "const_int") ++{ ++ HOST_WIDE_INT mask = INTVAL (op); ++ int shift = ctz_hwi (mask); ++ ++ return IN_RANGE (shift, 1, 31) ++ && xtensa_mask_immediate ((uint32_t)mask >> shift); ++}) ++ + (define_predicate "extui_fldsz_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 1, 16)"))) +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index b34b2afb6..355fb7742 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -645,6 +645,83 @@ + (set_attr "mode" "SI") + (set_attr "length" "6")]) + ++(define_insn_and_split "*andsi3_const_pow2_minus_one" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "const_int_operand" "i")))] ++ "IN_RANGE (exact_log2 (INTVAL (operands[2]) + 1), 17, 31)" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (ashift:SI (match_dup 1) ++ (match_dup 2))) ++ (set (match_dup 0) ++ (lshiftrt:SI (match_dup 0) ++ (match_dup 2)))] ++{ ++ operands[2] = GEN_INT (32 - floor_log2 (INTVAL (operands[2]) + 1)); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && INTVAL (operands[2]) == 0x7FFFFFFF") ++ (const_int 5) ++ (const_int 6)))]) ++ ++(define_insn_and_split "*andsi3_const_negative_pow2" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "const_int_operand" "i")))] ++ "IN_RANGE (exact_log2 (-INTVAL (operands[2])), 12, 31)" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (lshiftrt:SI (match_dup 1) ++ (match_dup 2))) ++ (set (match_dup 0) ++ (ashift:SI (match_dup 0) ++ (match_dup 2)))] ++{ ++ operands[2] = GEN_INT (floor_log2 (-INTVAL (operands[2]))); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*andsi3_const_shifted_mask" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "shifted_mask_operand" "i")))] ++ "! xtensa_simm12b (INTVAL (operands[2]))" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (zero_extract:SI (match_dup 1) ++ (match_dup 3) ++ (match_dup 4))) ++ (set (match_dup 0) ++ (ashift:SI (match_dup 0) ++ (match_dup 2)))] ++{ ++ HOST_WIDE_INT mask = INTVAL (operands[2]); ++ int shift = ctz_hwi (mask); ++ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); ++ int mask_pos = shift; ++ if (BITS_BIG_ENDIAN) ++ mask_pos = (32 - (mask_size + shift)) & 0x1f; ++ operands[2] = GEN_INT (shift); ++ operands[3] = GEN_INT (mask_size); ++ operands[4] = GEN_INT (mask_pos); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && ctz_hwi (INTVAL (operands[2])) == 1") ++ (const_int 5) ++ (const_int 6)))]) ++ + (define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "=a") + (ior:SI (match_operand:SI 1 "register_operand" "%r") +@@ -1654,6 +1731,108 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_insn_and_split "*masktrue_const_pow2_minus_one" ++ [(set (pc) ++ (if_then_else (match_operator 3 "boolean_operator" ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "const_int_operand" "i")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] ++ "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31)" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 4) ++ (ashift:SI (match_dup 0) ++ (match_dup 1))) ++ (set (pc) ++ (if_then_else (match_op_dup 3 ++ [(match_dup 4) ++ (const_int 0)]) ++ (label_ref (match_dup 2)) ++ (pc)))] ++{ ++ operands[1] = GEN_INT (32 - floor_log2 (INTVAL (operands[1]) + 1)); ++ operands[4] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && INTVAL (operands[1]) == 0x7FFFFFFF") ++ (const_int 5) ++ (const_int 6)))]) ++ ++(define_insn_and_split "*masktrue_const_negative_pow2" ++ [(set (pc) ++ (if_then_else (match_operator 3 "boolean_operator" ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "const_int_operand" "i")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] ++ "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 12, 30)" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 4) ++ (lshiftrt:SI (match_dup 0) ++ (match_dup 1))) ++ (set (pc) ++ (if_then_else (match_op_dup 3 ++ [(match_dup 4) ++ (const_int 0)]) ++ (label_ref (match_dup 2)) ++ (pc)))] ++{ ++ operands[1] = GEN_INT (floor_log2 (-INTVAL (operands[1]))); ++ operands[4] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*masktrue_const_shifted_mask" ++ [(set (pc) ++ (if_then_else (match_operator 4 "boolean_operator" ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "shifted_mask_operand" "i")) ++ (match_operand:SI 2 "const_int_operand" "i")]) ++ (label_ref (match_operand 3 "" "")) ++ (pc)))] ++ "(INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 ++ && xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 6) ++ (zero_extract:SI (match_dup 0) ++ (match_dup 5) ++ (match_dup 1))) ++ (set (pc) ++ (if_then_else (match_op_dup 4 ++ [(match_dup 6) ++ (match_dup 2)]) ++ (label_ref (match_dup 3)) ++ (pc)))] ++{ ++ HOST_WIDE_INT mask = INTVAL (operands[1]); ++ int shift = ctz_hwi (mask); ++ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); ++ int mask_pos = shift; ++ if (BITS_BIG_ENDIAN) ++ mask_pos = (32 - (mask_size + shift)) & 0x1f; ++ operands[1] = GEN_INT (mask_pos); ++ operands[2] = GEN_INT ((uint32_t)INTVAL (operands[2]) >> shift); ++ operands[5] = GEN_INT (mask_size); ++ operands[6] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && (uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])) == 0") ++ (const_int 5) ++ (const_int 6)))]) ++ + + ;; Zero-overhead looping support. + +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0028-Document-new-mextra-l32r-costs-Xtensa-specifi.patch b/patches/gcc10.2/gcc-xtensa-0028-Document-new-mextra-l32r-costs-Xtensa-specifi.patch new file mode 100644 index 0000000..ebe9eb0 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0028-Document-new-mextra-l32r-costs-Xtensa-specifi.patch @@ -0,0 +1,44 @@ +From 7856e5d6344828b2a72aeef671a169dbd1a85a55 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:34:48 +0900 +Subject: [PATCH 23/31] xtensa: Document new -mextra-l32r-costs= + Xtensa-specific option + +gcc/ChangeLog: + * doc/invoke.texi: Document -mextra-l32r-costs= option. +--- + gcc/doc/invoke.texi | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index eabeec944..c35f51afb 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1385,7 +1385,8 @@ See RS/6000 and PowerPC Options. + -mtext-section-literals -mno-text-section-literals @gol + -mauto-litpools -mno-auto-litpools @gol + -mtarget-align -mno-target-align @gol +--mlongcalls -mno-longcalls} ++-mlongcalls -mno-longcalls @gol ++-mextra-l32r-costs=@var{cycles}} + + @emph{zSeries Options} + See S/390 and zSeries Options. +@@ -30519,6 +30520,14 @@ assembly code generated by GCC still shows direct call + instructions---look at the disassembled object code to see the actual + instructions. Note that the assembler uses an indirect call for + every cross-file call, not just those that really are out of range. ++ ++@item -mextra-l32r-costs=@var{n} ++@opindex mextra-l32r-costs ++Specify an extra cost of instruction RAM/ROM access for @code{L32R} ++instructions, in clock cycles. This affects, when optimizing for speed, ++whether loading a constant from literal pool using @code{L32R} or ++synthesizing the constant from a small one with a couple of arithmetic ++instructions. The default value is 0. + @end table + + @node zSeries Options +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0029-Add-support-for-sibling-call-optimization.patch b/patches/gcc10.2/gcc-xtensa-0029-Add-support-for-sibling-call-optimization.patch new file mode 100644 index 0000000..f5c0f78 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0029-Add-support-for-sibling-call-optimization.patch @@ -0,0 +1,354 @@ +From c985f67f0b9a35ca5f22647c326c6b43a2b237fa Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 15 Jun 2022 21:21:21 +0900 +Subject: [PATCH 24/31] xtensa: Add support for sibling call optimization + +This patch introduces support for sibling call optimization, when the Windowed +Register Option is NOT configured. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h (xtensa_prepare_expand_call, + xtensa_emit_sibcall): New prototypes. + (xtensa_expand_epilogue): Add new argument that specifies whether + or not sibling call. + * config/xtensa/xtensa.c (TARGET_FUNCTION_OK_FOR_SIBCALL): + New macro definition. + (xtensa_prepare_expand_call): New function in order to share + the common code. + (xtensa_emit_sibcall, xtensa_function_ok_for_sibcall): + New functions. + (xtensa_expand_epilogue): Add new argument sibcall_p and use it + for sibling call handling. + * config/xtensa/xtensa.md (call, call_value): + Use xtensa_prepare_expand_call. + (call_internal, call_value_internal): + Add the condition in order to be disabled if sibling call. + (sibcall, sibcall_value, sibcall_epilogue): New expansions. + (sibcall_internal, sibcall_value_internal): New insn patterns, + and split ones in order to take care of the indirect sibcalls. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/sibcalls.c: New. +--- + gcc/config/xtensa/xtensa-protos.h | 4 +- + gcc/config/xtensa/xtensa.c | 57 ++++++++++++- + gcc/config/xtensa/xtensa.md | 93 ++++++++++++++++++---- + gcc/testsuite/gcc.target/xtensa/sibcalls.c | 20 +++++ + 4 files changed, 155 insertions(+), 19 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/sibcalls.c + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index e4b2d2f06..75ed3bfb0 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -53,7 +53,9 @@ extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool); + extern void xtensa_emit_loop_end (rtx_insn *, rtx *); + extern char *xtensa_emit_branch (bool, rtx *); + extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); ++extern void xtensa_prepare_expand_call (int, rtx *); + extern char *xtensa_emit_call (int, rtx *); ++extern char *xtensa_emit_sibcall (int, rtx *); + extern bool xtensa_tls_referenced_p (rtx); + extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); + +@@ -73,7 +75,7 @@ extern int xtensa_dbx_register_number (int); + extern long compute_frame_size (poly_int64); + extern bool xtensa_use_return_instruction_p (void); + extern void xtensa_expand_prologue (void); +-extern void xtensa_expand_epilogue (void); ++extern void xtensa_expand_epilogue (bool); + extern void order_regs_for_local_alloc (void); + extern enum reg_class xtensa_regno_to_class (int regno); + extern HOST_WIDE_INT xtensa_initial_elimination_offset (int from, int to); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 8deae3d51..a714b980a 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -187,6 +187,7 @@ static bool xtensa_modes_tieable_p (machine_mode, machine_mode); + static HOST_WIDE_INT xtensa_constant_alignment (const_tree, HOST_WIDE_INT); + static HOST_WIDE_INT xtensa_starting_frame_offset (void); + static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); ++static bool xtensa_function_ok_for_sibcall (tree, tree); + + + +@@ -337,6 +338,9 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); + #undef TARGET_HAVE_SPECULATION_SAFE_VALUE + #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed + ++#undef TARGET_FUNCTION_OK_FOR_SIBCALL ++#define TARGET_FUNCTION_OK_FOR_SIBCALL xtensa_function_ok_for_sibcall ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + +@@ -2117,6 +2121,20 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + } + + ++void ++xtensa_prepare_expand_call (int callop, rtx *operands) ++{ ++ rtx addr = XEXP (operands[callop], 0); ++ ++ if (flag_pic && SYMBOL_REF_P (addr) ++ && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) ++ addr = gen_sym_PLT (addr); ++ ++ if (!call_insn_operand (addr, VOIDmode)) ++ XEXP (operands[callop], 0) = copy_to_mode_reg (Pmode, addr); ++} ++ ++ + char * + xtensa_emit_call (int callop, rtx *operands) + { +@@ -2135,6 +2153,24 @@ xtensa_emit_call (int callop, rtx *operands) + } + + ++char * ++xtensa_emit_sibcall (int callop, rtx *operands) ++{ ++ static char result[64]; ++ rtx tgt = operands[callop]; ++ ++ if (GET_CODE (tgt) == CONST_INT) ++ sprintf (result, "j.l\t" HOST_WIDE_INT_PRINT_HEX ", a9", ++ INTVAL (tgt)); ++ else if (register_operand (tgt, VOIDmode)) ++ sprintf (result, "jx\t%%%d", callop); ++ else ++ sprintf (result, "j.l\t%%%d, a9", callop); ++ ++ return result; ++} ++ ++ + bool + xtensa_legitimate_address_p (machine_mode mode, rtx addr, bool strict) + { +@@ -3305,7 +3341,7 @@ xtensa_expand_prologue (void) + } + + void +-xtensa_expand_epilogue (void) ++xtensa_expand_epilogue (bool sibcall_p) + { + if (!TARGET_WINDOWED_ABI) + { +@@ -3339,10 +3375,13 @@ xtensa_expand_epilogue (void) + if (xtensa_call_save_reg(regno)) + { + rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); ++ rtx reg; + + offset -= UNITS_PER_WORD; +- emit_move_insn (gen_rtx_REG (SImode, regno), ++ emit_move_insn (reg = gen_rtx_REG (SImode, regno), + gen_frame_mem (SImode, x)); ++ if (regno == A0_REG && sibcall_p) ++ emit_use (reg); + } + } + +@@ -3377,7 +3416,8 @@ xtensa_expand_epilogue (void) + EH_RETURN_STACKADJ_RTX)); + } + cfun->machine->epilogue_done = true; +- emit_jump_insn (gen_return ()); ++ if (!sibcall_p) ++ emit_jump_insn (gen_return ()); + } + + bool +@@ -4893,4 +4933,15 @@ xtensa_asan_shadow_offset (void) + return HOST_WIDE_INT_UC (0x10000000); + } + ++/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */ ++static bool ++xtensa_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, tree exp ATTRIBUTE_UNUSED) ++{ ++ /* Do not allow sibcalls when windowed registers ABI is in effect. */ ++ if (TARGET_WINDOWED_ABI) ++ return false; ++ ++ return true; ++} ++ + #include "gt-xtensa.h" +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 355fb7742..2a11d1c86 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -25,6 +25,7 @@ + (A7_REG 7) + (A8_REG 8) + (A9_REG 9) ++ (A10_REG 10) + + (UNSPEC_NOP 2) + (UNSPEC_PLT 3) +@@ -2153,18 +2154,13 @@ + (match_operand 1 "" ""))] + "" + { +- rtx addr = XEXP (operands[0], 0); +- if (flag_pic && GET_CODE (addr) == SYMBOL_REF +- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) +- addr = gen_sym_PLT (addr); +- if (!call_insn_operand (addr, VOIDmode)) +- XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr); ++ xtensa_prepare_expand_call (0, operands); + }) + + (define_insn "call_internal" + [(call (mem (match_operand:SI 0 "call_insn_operand" "nir")) + (match_operand 1 "" "i"))] +- "" ++ "!SIBLING_CALL_P (insn)" + { + return xtensa_emit_call (0, operands); + } +@@ -2178,19 +2174,14 @@ + (match_operand 2 "" "")))] + "" + { +- rtx addr = XEXP (operands[1], 0); +- if (flag_pic && GET_CODE (addr) == SYMBOL_REF +- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) +- addr = gen_sym_PLT (addr); +- if (!call_insn_operand (addr, VOIDmode)) +- XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr); ++ xtensa_prepare_expand_call (1, operands); + }) + + (define_insn "call_value_internal" + [(set (match_operand 0 "register_operand" "=a") + (call (mem (match_operand:SI 1 "call_insn_operand" "nir")) + (match_operand 2 "" "i")))] +- "" ++ "!SIBLING_CALL_P (insn)" + { + return xtensa_emit_call (1, operands); + } +@@ -2198,6 +2189,70 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_expand "sibcall" ++ [(call (match_operand 0 "memory_operand" "") ++ (match_operand 1 "" ""))] ++ "!TARGET_WINDOWED_ABI" ++{ ++ xtensa_prepare_expand_call (0, operands); ++}) ++ ++(define_insn "sibcall_internal" ++ [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nir")) ++ (match_operand 1 "" "i"))] ++ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" ++{ ++ return xtensa_emit_sibcall (0, operands); ++} ++ [(set_attr "type" "call") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ++ ++(define_split ++ [(call (mem:SI (match_operand:SI 0 "register_operand")) ++ (match_operand 1 ""))] ++ "reload_completed ++ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) ++ && IN_RANGE (REGNO (operands[0]), 12, 15)" ++ [(set (reg:SI A10_REG) ++ (match_dup 0)) ++ (call (mem:SI (reg:SI A10_REG)) ++ (match_dup 1))]) ++ ++(define_expand "sibcall_value" ++ [(set (match_operand 0 "register_operand" "") ++ (call (match_operand 1 "memory_operand" "") ++ (match_operand 2 "" "")))] ++ "!TARGET_WINDOWED_ABI" ++{ ++ xtensa_prepare_expand_call (1, operands); ++}) ++ ++(define_insn "sibcall_value_internal" ++ [(set (match_operand 0 "register_operand" "=a") ++ (call (mem:SI (match_operand:SI 1 "call_insn_operand" "nir")) ++ (match_operand 2 "" "i")))] ++ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" ++{ ++ return xtensa_emit_sibcall (1, operands); ++} ++ [(set_attr "type" "call") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ++ ++(define_split ++ [(set (match_operand 0 "register_operand") ++ (call (mem:SI (match_operand:SI 1 "register_operand")) ++ (match_operand 2 "")))] ++ "reload_completed ++ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) ++ && IN_RANGE (REGNO (operands[1]), 12, 15)" ++ [(set (reg:SI A10_REG) ++ (match_dup 1)) ++ (set (match_dup 0) ++ (call (mem:SI (reg:SI A10_REG)) ++ (match_dup 2)))]) ++ + (define_insn "entry" + [(set (reg:SI A1_REG) + (unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")] +@@ -2265,7 +2320,15 @@ + [(return)] + "" + { +- xtensa_expand_epilogue (); ++ xtensa_expand_epilogue (false); ++ DONE; ++}) ++ ++(define_expand "sibcall_epilogue" ++ [(return)] ++ "!TARGET_WINDOWED_ABI" ++{ ++ xtensa_expand_epilogue (true); + DONE; + }) + +diff --git a/gcc/testsuite/gcc.target/xtensa/sibcalls.c b/gcc/testsuite/gcc.target/xtensa/sibcalls.c +new file mode 100644 +index 000000000..d2b3fccf1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/sibcalls.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mabi=call0 -foptimize-sibling-calls" } */ ++ ++extern int foo(int); ++extern void bar(int); ++ ++int test_0(int a) { ++ return foo(a); ++} ++ ++void test_1(int a) { ++ bar(a); ++} ++ ++int test_2(int (*a)(void)) { ++ bar(0); ++ return a(); ++} ++ ++/* { dg-final { scan-assembler-not "ret" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0030-Add-some-dedicated-patterns-that-correspond-t.patch b/patches/gcc10.2/gcc-xtensa-0030-Add-some-dedicated-patterns-that-correspond-t.patch new file mode 100644 index 0000000..ad60202 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0030-Add-some-dedicated-patterns-that-correspond-t.patch @@ -0,0 +1,81 @@ +From 16878066a57f917814a8d6fe45f7f7d2eebdbbc0 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:37:54 +0900 +Subject: [PATCH 25/31] xtensa: Add some dedicated patterns that correspond to + GIMPLE canonicalizations + +This patch offers better RTL representations against straightforward +derivations from some tree optimizers' canonicalized forms. + +- rounding up to even, such as '(x + (x & 1))', is canonicalized to + '((x + 1) & -2)', but the former is one instruction less than the latter + in Xtensa ISA. +- signed greater or equal to zero as logical value '((signed)x >= 0)', + is canonicalized to '((unsigned)(x ^ -1) >> 31)', but the equivalent + '(((signed)x >> 31) + 1)' is one instruction less. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*round_up_to_even): + New insn-and-split pattern. + (*signed_ge_zero): Ditto. +--- + gcc/config/xtensa/xtensa.md | 45 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 45 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2a11d1c86..3e8e2e76f 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2709,3 +2709,48 @@ + xtensa_expand_atomic (, operands[0], operands[1], operands[2], true); + DONE; + }) ++ ++(define_insn_and_split "*round_up_to_even" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int 1)) ++ (const_int -2)))] ++ "" ++ "#" ++ "can_create_pseudo_p ()" ++ [(set (match_dup 2) ++ (and:SI (match_dup 1) ++ (const_int 1))) ++ (set (match_dup 0) ++ (plus:SI (match_dup 2) ++ (match_dup 1)))] ++{ ++ operands[2] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 5) ++ (const_int 6)))]) ++ ++(define_insn_and_split "*signed_ge_zero" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ge:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int 0)))] ++ "" ++ "#" ++ "" ++ [(set (match_dup 0) ++ (ashiftrt:SI (match_dup 1) ++ (const_int 31))) ++ (set (match_dup 0) ++ (plus:SI (match_dup 0) ++ (const_int 1)))] ++ "" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 5) ++ (const_int 6)))]) +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0031-Eliminate-unwanted-reg-reg-moves-during-DFmod.patch b/patches/gcc10.2/gcc-xtensa-0031-Eliminate-unwanted-reg-reg-moves-during-DFmod.patch new file mode 100644 index 0000000..28bb494 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0031-Eliminate-unwanted-reg-reg-moves-during-DFmod.patch @@ -0,0 +1,90 @@ +From a0f2dfa2e952111dbd85d2b2f1caaf570facce8a Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:39:49 +0900 +Subject: [PATCH 26/31] xtensa: Eliminate unwanted reg-reg moves during DFmode + input reloads + +When spilled DFmode registers are reloaded in, once loaded into a pair of +SImode regs and then copied from that regs. Such unwanted reg-reg moves +seems not to be eliminated at the "cprop_hardreg" stage, despite no problem +in output reloads. + +Luckily it is easy to resolve such inefficiencies, with the use of peephole2 +pattern. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (reload_operand): + New predicate. + * config/xtensa/xtensa.md: New peephole2 pattern. +--- + gcc/config/xtensa/predicates.md | 13 +++++++++++++ + gcc/config/xtensa/xtensa.md | 31 +++++++++++++++++++++++++++++++ + 2 files changed, 44 insertions(+) + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index 367fc17f3..c1cddb733 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -165,6 +165,19 @@ + (and (match_code "const_int") + (match_test "xtensa_mem_offset (INTVAL (op), SFmode)"))) + ++(define_predicate "reload_operand" ++ (match_code "mem") ++{ ++ const_rtx addr = XEXP (op, 0); ++ if (REG_P (addr)) ++ return REGNO (addr) == A1_REG; ++ if (GET_CODE (addr) == PLUS) ++ return REG_P (XEXP (addr, 0)) ++ && REGNO (XEXP (addr, 0)) == A1_REG ++ && CONST_INT_P (XEXP (addr, 1)); ++ return false; ++}) ++ + (define_predicate "branch_operator" + (match_code "eq,ne,lt,ge")) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 3e8e2e76f..2598c09c9 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2754,3 +2754,34 @@ + (if_then_else (match_test "TARGET_DENSITY") + (const_int 5) + (const_int 6)))]) ++ ++(define_peephole2 ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operand:SI 6 "reload_operand")) ++ (set (match_operand:SI 1 "register_operand") ++ (match_operand:SI 7 "reload_operand")) ++ (set (match_operand:SF 2 "register_operand") ++ (match_operand:SF 4 "register_operand")) ++ (set (match_operand:SF 3 "register_operand") ++ (match_operand:SF 5 "register_operand"))] ++ "REGNO (operands[0]) == REGNO (operands[4]) ++ && REGNO (operands[1]) == REGNO (operands[5]) ++ && peep2_reg_dead_p (4, operands[0]) ++ && peep2_reg_dead_p (4, operands[1])" ++ [(set (match_dup 2) ++ (match_dup 6)) ++ (set (match_dup 3) ++ (match_dup 7))] ++{ ++ uint32_t check = 0; ++ int i; ++ for (i = 0; i <= 3; ++i) ++ { ++ uint32_t mask = (uint32_t)1 << REGNO (operands[i]); ++ if (check & mask) ++ FAIL; ++ check |= mask; ++ } ++ operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0)); ++ operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0)); ++}) +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0032-Eliminate-DS-Cmode-hard-register-clobber-that.patch b/patches/gcc10.2/gcc-xtensa-0032-Eliminate-DS-Cmode-hard-register-clobber-that.patch new file mode 100644 index 0000000..7c4a869 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0032-Eliminate-DS-Cmode-hard-register-clobber-that.patch @@ -0,0 +1,99 @@ +From d6c2b11e9ce88f3b1a7ddcf9a2712b070ad4dbfb Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:53:04 +0900 +Subject: [PATCH 27/31] xtensa: Eliminate [DS]Cmode hard register clobber that + is immediately followed by whole overwrite the register + +RTL expansion of substitution to [DS]Cmode hard register includes obstructive +register clobber. + +A simplest example: + + double _Complex test(double _Complex c) { + return c; + } + +will be converted to: + + (set (reg:DF 42 [ c ]) (reg:DF 2 a2)) + (set (reg:DF 43 [ c+8 ]) (reg:DF 4 a4)) + (clobber (reg:DC 2 a2)) + (set (reg:DF 2 a2) (reg:DF 42 [ c ])) + (set (reg:DF 4 a4) (reg:DF 43 [ c+8 ])) + (use (reg:DC 2 a2)) + (return) + +and then finally: + + test: + mov a8, a2 + mov a9, a3 + mov a6, a4 + mov a7, a5 + mov a2, a8 + mov a3, a9 + mov a4, a6 + mov a5, a7 + ret + +As you see, it is so ridiculous. + +This patch eliminates such clobber in order to prune away the wasted move +instructions by the optimizer: + + test: + ret + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (DSC): New split pattern and mode iterator. +--- + gcc/config/xtensa/xtensa.md | 28 ++++++++++++++++++++++++++++ + 1 file changed, 28 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2598c09c9..124548dfe 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -87,6 +87,10 @@ + ;; This code iterator is for *shlrd and its variants. + (define_code_iterator ior_op [ior plus]) + ++;; This mode iterator allows the DC and SC patterns to be defined from ++;; the same template. ++(define_mode_iterator DSC [DC SC]) ++ + + ;; Attributes. + +@@ -2785,3 +2789,27 @@ + operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0)); + operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0)); + }) ++ ++(define_split ++ [(clobber (match_operand:DSC 0 "register_operand"))] ++ "GP_REG_P (REGNO (operands[0]))" ++ [(const_int 0)] ++{ ++ unsigned int regno = REGNO (operands[0]); ++ machine_mode inner_mode = GET_MODE_INNER (mode); ++ rtx_insn *insn; ++ rtx x; ++ if (! ((insn = next_nonnote_nondebug_insn (curr_insn)) ++ && NONJUMP_INSN_P (insn) ++ && GET_CODE (x = PATTERN (insn)) == SET ++ && REG_P (x = XEXP (x, 0)) ++ && GET_MODE (x) == inner_mode ++ && REGNO (x) == regno ++ && (insn = next_nonnote_nondebug_insn (insn)) ++ && NONJUMP_INSN_P (insn) ++ && GET_CODE (x = PATTERN (insn)) == SET ++ && REG_P (x = XEXP (x, 0)) ++ && GET_MODE (x) == inner_mode ++ && REGNO (x) == regno + REG_NREGS (operands[0]) / 2)) ++ FAIL; ++}) +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0033-Defer-storing-integer-constants-into-litpool-.patch b/patches/gcc10.2/gcc-xtensa-0033-Defer-storing-integer-constants-into-litpool-.patch new file mode 100644 index 0000000..6007b49 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0033-Defer-storing-integer-constants-into-litpool-.patch @@ -0,0 +1,111 @@ +From e37c151ca3beacb7f4f116a94c9c80223b0c6fbf Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 17 Jun 2022 22:47:49 +0900 +Subject: [PATCH 28/31] xtensa: Defer storing integer constants into litpool + until reload + +Storing integer constants into litpool in the early stage of compilation +hinders some integer optimizations. In fact, such integer constants are +not subject to the constant folding process. + +For example: + + extern unsigned short value; + extern void foo(void); + void test(void) { + if (value == 30001) + foo(); + } + + .literal_position + .literal .LC0, value + .literal .LC1, 30001 + test: + l32r a3, .LC0 + l32r a2, .LC1 + l16ui a3, a3, 0 + extui a2, a2, 0, 16 // runtime zero-extension despite constant + bne a3, a2, .L1 + j.l foo, a9 + .L1: + ret.n + +This patch defers the placement of integer constants into litpool until +the start of reload: + + .literal_position + .literal .LC0, value + .literal .LC1, 30001 + test: + l32r a3, .LC0 + l32r a2, .LC1 + l16ui a3, a3, 0 + bne a3, a2, .L1 + j.l foo, a9 + .L1: + ret.n + +gcc/ChangeLog: + + * config/xtensa/constraints.md (Y): + Change to include integer constants until reload begins. + * config/xtensa/predicates.md (move_operand): Ditto. + * config/xtensa/xtensa.c (xtensa_emit_move_sequence): + Change to allow storing integer constants into litpool only after + reload begins. +--- + gcc/config/xtensa/constraints.md | 6 ++++-- + gcc/config/xtensa/predicates.md | 5 +++-- + gcc/config/xtensa/xtensa.c | 3 ++- + 3 files changed, 9 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md +index 9a8caab4f..13b3daafc 100644 +--- a/gcc/config/xtensa/constraints.md ++++ b/gcc/config/xtensa/constraints.md +@@ -113,8 +113,10 @@ + + (define_constraint "Y" + "A constant that can be used in relaxed MOVI instructions." +- (and (match_code "const_int,const_double,const,symbol_ref,label_ref") +- (match_test "TARGET_AUTO_LITPOOLS"))) ++ (ior (and (match_code "const_int,const_double,const,symbol_ref,label_ref") ++ (match_test "TARGET_AUTO_LITPOOLS")) ++ (and (match_code "const_int") ++ (match_test "can_create_pseudo_p ()")))) + + ;; Memory constraints. Do not use define_memory_constraint here. Doing so + ;; causes reload to force some constants into the constant pool, but since +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index c1cddb733..633cc6264 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -147,8 +147,9 @@ + (match_test "!constantpool_mem_p (op) + || GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0"))) + (ior (and (match_code "const_int") +- (match_test "GET_MODE_CLASS (mode) == MODE_INT +- && xtensa_simm12b (INTVAL (op))")) ++ (match_test "(GET_MODE_CLASS (mode) == MODE_INT ++ && xtensa_simm12b (INTVAL (op))) ++ || can_create_pseudo_p ()")) + (and (match_code "const_int,const_double,const,symbol_ref,label_ref") + (match_test "(TARGET_CONST16 || TARGET_AUTO_LITPOOLS) + && CONSTANT_P (op) +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index a714b980a..1d64e2c76 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1173,7 +1173,8 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) + return 1; + } + +- if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16) ++ if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16 ++ && ! (CONST_INT_P (src) && can_create_pseudo_p ())) + { + src = force_const_mem (SImode, src); + operands[1] = src; +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0034-Apply-a-few-minor-fixes.patch b/patches/gcc10.2/gcc-xtensa-0034-Apply-a-few-minor-fixes.patch new file mode 100644 index 0000000..5ecac42 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0034-Apply-a-few-minor-fixes.patch @@ -0,0 +1,129 @@ +From dfaefed18297218392071039325baabac59d5c43 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 19 Jun 2022 22:32:45 +0900 +Subject: [PATCH 29/31] xtensa: Apply a few minor fixes + +No functional changes. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_emit_move_sequence): + Use can_create_pseudo_p(), instead of using individual + reload_in_progress and reload_completed. + (xtensa_expand_block_set_small_loop): Use xtensa_simm8x256(), + the existing predicate function. + (xtensa_is_insn_L32R_p, gen_int_relational, xtensa_emit_sibcall): + Use the standard RTX code predicate macros such as MEM_P, + SYMBOL_REF_P and/or CONST_INT_P. + * config/xtensa/xtensa.md: Avoid using numeric literals to determine + if callee-saved register, at the split patterns for indirect sibcall + fixups. +--- + gcc/config/xtensa/xtensa.c | 16 ++++++++-------- + gcc/config/xtensa/xtensa.md | 8 ++++---- + 2 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 1d64e2c76..595c5f96f 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -743,7 +743,7 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + } + + /* See if we need to invert the result. */ +- invert = ((GET_CODE (cmp1) == CONST_INT) ++ invert = (CONST_INT_P (cmp1) + ? p_info->invert_const + : p_info->invert_reg); + +@@ -1200,7 +1200,7 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) + } + } + +- if (!(reload_in_progress | reload_completed) ++ if (can_create_pseudo_p () + && !xtensa_valid_move (mode, operands)) + operands[1] = force_reg (mode, operands[1]); + +@@ -1603,7 +1603,7 @@ xtensa_expand_block_set_small_loop (rtx *operands) + thus limited to only offset to the end address for ADDI/ADDMI + instruction. */ + if (align == 4 +- && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) ++ && ! (bytes <= 127 || xtensa_simm8x256 (bytes))) + return 0; + + /* If no 4-byte aligned, loop count should be treated as the +@@ -2160,7 +2160,7 @@ xtensa_emit_sibcall (int callop, rtx *operands) + static char result[64]; + rtx tgt = operands[callop]; + +- if (GET_CODE (tgt) == CONST_INT) ++ if (CONST_INT_P (tgt)) + sprintf (result, "j.l\t" HOST_WIDE_INT_PRINT_HEX ", a9", + INTVAL (tgt)); + else if (register_operand (tgt, VOIDmode)) +@@ -4318,17 +4318,17 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + } + + static bool +-xtensa_is_insn_L32R_p(const rtx_insn *insn) ++xtensa_is_insn_L32R_p (const rtx_insn *insn) + { + rtx x = PATTERN (insn); + + if (GET_CODE (x) == SET) + { +- x = XEXP (x, 1); +- if (GET_CODE (x) == MEM) ++ x = SET_SRC (x); ++ if (MEM_P (x)) + { + x = XEXP (x, 0); +- return (GET_CODE (x) == SYMBOL_REF || CONST_INT_P (x)) ++ return (SYMBOL_REF_P (x) || CONST_INT_P (x)) + && CONSTANT_POOL_ADDRESS_P (x); + } + } +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 124548dfe..6f51a5357 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1251,14 +1251,14 @@ + int i = 0; + rtx x = XEXP (operands[1], 0); + long l[2]; +- if (GET_CODE (x) == SYMBOL_REF ++ if (SYMBOL_REF_P (x) + && CONSTANT_POOL_ADDRESS_P (x)) + x = get_pool_constant (x); + else if (GET_CODE (x) == CONST) + { + x = XEXP (x, 0); + gcc_assert (GET_CODE (x) == PLUS +- && GET_CODE (XEXP (x, 0)) == SYMBOL_REF ++ && SYMBOL_REF_P (XEXP (x, 0)) + && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) + && CONST_INT_P (XEXP (x, 1))); + i = INTVAL (XEXP (x, 1)); +@@ -2217,7 +2217,7 @@ + (match_operand 1 ""))] + "reload_completed + && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) +- && IN_RANGE (REGNO (operands[0]), 12, 15)" ++ && ! call_used_or_fixed_reg_p (REGNO (operands[0]))" + [(set (reg:SI A10_REG) + (match_dup 0)) + (call (mem:SI (reg:SI A10_REG)) +@@ -2250,7 +2250,7 @@ + (match_operand 2 "")))] + "reload_completed + && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) +- && IN_RANGE (REGNO (operands[1]), 12, 15)" ++ && ! call_used_or_fixed_reg_p (REGNO (operands[1]))" + [(set (reg:SI A10_REG) + (match_dup 1)) + (set (match_dup 0) +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0035-Fix-RTL-insn-cost-estimation-about-relaxed-MO.patch b/patches/gcc10.2/gcc-xtensa-0035-Fix-RTL-insn-cost-estimation-about-relaxed-MO.patch new file mode 100644 index 0000000..d65c44d --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0035-Fix-RTL-insn-cost-estimation-about-relaxed-MO.patch @@ -0,0 +1,56 @@ +From 48c657f23a61a41a46842b25bce4f287a56223a2 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Mon, 20 Jun 2022 01:56:16 +0900 +Subject: [PATCH 30/31] xtensa: Fix RTL insn cost estimation about relaxed MOVI + instructions + +These instructions will all be converted to L32R ones with litpool entries +by the assembler. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_is_insn_L32R_p): + Consider relaxed MOVI instructions as L32R. +--- + gcc/config/xtensa/xtensa.c | 22 ++++++++++++++-------- + 1 file changed, 14 insertions(+), 8 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 595c5f96f..b92ec9caa 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4322,17 +4322,23 @@ xtensa_is_insn_L32R_p (const rtx_insn *insn) + { + rtx x = PATTERN (insn); + +- if (GET_CODE (x) == SET) ++ if (GET_CODE (x) != SET) ++ return false; ++ ++ x = XEXP (x, 1); ++ if (MEM_P (x)) + { +- x = SET_SRC (x); +- if (MEM_P (x)) +- { +- x = XEXP (x, 0); +- return (SYMBOL_REF_P (x) || CONST_INT_P (x)) +- && CONSTANT_POOL_ADDRESS_P (x); +- } ++ x = XEXP (x, 0); ++ return (SYMBOL_REF_P (x) || CONST_INT_P (x)) ++ && CONSTANT_POOL_ADDRESS_P (x); + } + ++ /* relaxed MOVI instructions, that will be converted to L32R by the ++ assembler. */ ++ if (CONST_INT_P (x) ++ && ! xtensa_simm12b (INTVAL (x))) ++ return true; ++ + return false; + } + +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0036-Fix-buffer-overflow.patch b/patches/gcc10.2/gcc-xtensa-0036-Fix-buffer-overflow.patch new file mode 100644 index 0000000..35f9f10 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0036-Fix-buffer-overflow.patch @@ -0,0 +1,33 @@ +From 75c341c7de5c6f325d6ded7bd91d77793fe358d5 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 22 Jun 2022 04:04:45 +0900 +Subject: [PATCH 31/31] xtensa: Fix buffer overflow + +Fortify buffer overflow message reported. +(see https://github.com/earlephilhower/esp-quick-toolchain/issues/36) + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (bswapsi2_internal): + Enlarge the buffer that is obviously smaller than the template + string given to sprintf(). +--- + gcc/config/xtensa/xtensa.md | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 6f51a5357..81b016859 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -536,7 +536,7 @@ + { + rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn); + const char *init = "ssai\t8\;"; +- static char result[64]; ++ static char result[128]; + if (prev_insn && NONJUMP_INSN_P (prev_insn)) + { + rtx x = PATTERN (prev_insn); +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0037-Optimize-integer-constant-addition-that-is-be.patch b/patches/gcc10.2/gcc-xtensa-0037-Optimize-integer-constant-addition-that-is-be.patch new file mode 100644 index 0000000..0ea6d48 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0037-Optimize-integer-constant-addition-that-is-be.patch @@ -0,0 +1,95 @@ +From 9308911796a46bd689bbcc1cedef1b63ae9b871e Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 26 Jun 2022 14:07:56 +0900 +Subject: [PATCH] xtensa: Optimize integer constant addition that is + between -32896 and 32639 + +Such constants are often subject to the constant synthesis: + + int test(int a) { + return a - 31999; + } + + test: + movi a3, 1 + addmi a3, a3, -0x7d00 + add a2, a2, a3 + ret + +This patch optimizes such case as follows: + + test: + addi a2, a2, 1 + addmi a2, a2, -0x7d00 + ret + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: + Suppress unnecessary emitting nop insn in the split patterns for + integer/FP constant synthesis, and add new peephole2 pattern that + folds such synthesized additions. +--- + gcc/config/xtensa/xtensa.md | 35 +++++++++++++++++++++++++++++++++++ + 1 file changed, 35 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 81b016859..b697e16db 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1036,6 +1036,7 @@ + FAIL; + if (! xtensa_constantsynth (operands[0], INTVAL (x))) + emit_move_insn (operands[0], x); ++ DONE; + }) + + ;; 16-bit Integer moves +@@ -1277,6 +1278,7 @@ + x = gen_rtx_REG (SImode, REGNO (operands[0])); + if (! xtensa_constantsynth (x, l[i])) + emit_move_insn (x, GEN_INT (l[i])); ++ DONE; + }) + + ;; 64-bit floating point moves +@@ -2813,3 +2815,36 @@ + && REGNO (x) == regno + REG_NREGS (operands[0]) / 2)) + FAIL; + }) ++ ++(define_peephole2 ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operand:SI 1 "const_int_operand")) ++ (set (match_dup 0) ++ (plus:SI (match_dup 0) ++ (match_operand:SI 2 "const_int_operand"))) ++ (set (match_operand:SI 3 "register_operand") ++ (plus:SI (match_operand:SI 4 "register_operand") ++ (match_dup 0)))] ++ "IN_RANGE (INTVAL (operands[1]) + INTVAL (operands[2]), ++ (-128 - 32768), (127 + 32512)) ++ && REGNO (operands[0]) != REGNO (operands[3]) ++ && REGNO (operands[0]) != REGNO (operands[4]) ++ && peep2_reg_dead_p (3, operands[0])" ++ [(set (match_dup 3) ++ (plus:SI (match_dup 4) ++ (match_dup 1))) ++ (set (match_dup 3) ++ (plus:SI (match_dup 3) ++ (match_dup 2)))] ++{ ++ HOST_WIDE_INT value = INTVAL (operands[1]) + INTVAL (operands[2]); ++ int imm0, imm1; ++ value += 128; ++ if (value > 32512) ++ imm1 = 32512; ++ else ++ imm1 = value & ~255; ++ imm0 = value - imm1 - 128; ++ operands[1] = GEN_INT (imm0); ++ operands[2] = GEN_INT (imm1); ++}) +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0038-Minor-fix-for-FP-constant-synthesis.patch b/patches/gcc10.2/gcc-xtensa-0038-Minor-fix-for-FP-constant-synthesis.patch new file mode 100644 index 0000000..8fc23d8 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0038-Minor-fix-for-FP-constant-synthesis.patch @@ -0,0 +1,92 @@ +From 7bed998154345cb072cd425b5d61734d3e0bac5d Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 1 Jul 2022 13:39:34 +0900 +Subject: [PATCH] xtensa: Minor fix for FP constant synthesis + +This patch fixes an non-fatal issue about negative constant values derived +from FP constant synthesis on hosts whose 'long' is wider than 'int32_t'. + +And also replaces the dedicated code in FP constant synthesis split +pattern with the appropriate existing function call. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: + In FP constant synthesis split pattern, subcontract to + avoid_constant_pool_reference() as in the case of integer, + because it can handle well too. And cast to int32_t before + calling xtensa_constantsynth() in order to ignore upper 32-bit. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/constsynth_double.c: + Modify in order to catch the issue. +--- + gcc/config/xtensa/xtensa.md | 35 +++++-------------- + .../gcc.target/xtensa/constsynth_double.c | 2 +- + 2 files changed, 9 insertions(+), 28 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index b697e16db..6ef84b4f2 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1249,35 +1249,16 @@ + "! optimize_debug && reload_completed" + [(const_int 0)] + { +- int i = 0; +- rtx x = XEXP (operands[1], 0); +- long l[2]; +- if (SYMBOL_REF_P (x) +- && CONSTANT_POOL_ADDRESS_P (x)) +- x = get_pool_constant (x); +- else if (GET_CODE (x) == CONST) +- { +- x = XEXP (x, 0); +- gcc_assert (GET_CODE (x) == PLUS +- && SYMBOL_REF_P (XEXP (x, 0)) +- && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) +- && CONST_INT_P (XEXP (x, 1))); +- i = INTVAL (XEXP (x, 1)); +- gcc_assert (i == 0 || i == 4); +- i /= 4; +- x = get_pool_constant (XEXP (x, 0)); +- } +- else +- gcc_unreachable (); +- if (GET_MODE (x) == SFmode) +- REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l[0]); +- else if (GET_MODE (x) == DFmode) +- REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); +- else ++ rtx x = avoid_constant_pool_reference (operands[1]); ++ long l; ++ HOST_WIDE_INT value; ++ if (! CONST_DOUBLE_P (x) || GET_MODE (x) != SFmode) + FAIL; ++ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); + x = gen_rtx_REG (SImode, REGNO (operands[0])); +- if (! xtensa_constantsynth (x, l[i])) +- emit_move_insn (x, GEN_INT (l[i])); ++ value = (int32_t)l; ++ if (! xtensa_constantsynth (x, value)) ++ emit_move_insn (x, GEN_INT (value)); + DONE; + }) + +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +index 890ca5047..5fba6a986 100644 +--- a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +@@ -5,7 +5,7 @@ void test(unsigned int count, double array[]) + { + unsigned int i; + for (i = 0; i < count; ++i) +- array[i] = 1.0; ++ array[i] = 8.988474246316506e+307; + } + + /* { dg-final { scan-assembler-not "l32r" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0039-constantsynth-Make-try-to-find-shorter-instru.patch b/patches/gcc10.2/gcc-xtensa-0039-constantsynth-Make-try-to-find-shorter-instru.patch new file mode 100644 index 0000000..fcb3c72 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0039-constantsynth-Make-try-to-find-shorter-instru.patch @@ -0,0 +1,132 @@ +From afcf727f9c4174b104b594cbd14cba9c57de71d1 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 15 Jul 2022 08:46:55 +0900 +Subject: [PATCH] xtensa: constantsynth: Make try to find shorter + instruction + +This patch allows the constant synthesis to choose shorter instruction +if possible. + + /* example */ + int test(void) { + return 128 << 8; + } + + ;; before + test: + movi a2, 0x100 + addmi a2, a2, 0x7f00 + ret.n + + ;; after + test: + movi.n a2, 1 + slli a2, a2, 15 + ret.n + +When the Code Density Option is configured, the latter is one byte smaller +than the former. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_emit_constantsynth): Remove. + (xtensa_constantsynth_2insn): Change to try all three synthetic + methods and to use the one that fits the immediate value of + the seed into a Narrow Move Immediate instruction "MOVI.N" + when the Code Density Option is configured. +--- + gcc/config/xtensa/xtensa.c | 58 +++++++++++++++++++------------------- + 1 file changed, 29 insertions(+), 29 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index b92ec9caa..a5330e52b 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1026,35 +1026,35 @@ xtensa_split_operand_pair (rtx operands[4], machine_mode mode) + load-immediate / arithmetic ones, instead of a L32R instruction + (plus a constant in litpool). */ + +-static void +-xtensa_emit_constantsynth (rtx dst, enum rtx_code code, +- HOST_WIDE_INT imm0, HOST_WIDE_INT imm1, +- rtx (*gen_op)(rtx, HOST_WIDE_INT), +- HOST_WIDE_INT imm2) +-{ +- gcc_assert (REG_P (dst)); +- emit_move_insn (dst, GEN_INT (imm0)); +- emit_move_insn (dst, gen_rtx_fmt_ee (code, SImode, +- dst, GEN_INT (imm1))); +- if (gen_op) +- emit_move_insn (dst, gen_op (dst, imm2)); +-} +- + static int + xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, + rtx (*gen_op)(rtx, HOST_WIDE_INT), + HOST_WIDE_INT op_imm) + { +- int shift = exact_log2 (srcval + 1); ++ HOST_WIDE_INT imm = INT_MAX; ++ rtx x = NULL_RTX; ++ int shift; + ++ gcc_assert (REG_P (dst)); ++ ++ shift = exact_log2 (srcval + 1); + if (IN_RANGE (shift, 1, 31)) + { +- xtensa_emit_constantsynth (dst, LSHIFTRT, -1, 32 - shift, +- gen_op, op_imm); +- return 1; ++ imm = -1; ++ x = gen_lshrsi3 (dst, dst, GEN_INT (32 - shift)); + } + +- if (IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) ++ ++ shift = ctz_hwi (srcval); ++ if ((!x || (TARGET_DENSITY && ! IN_RANGE (imm, -32, 95))) ++ && xtensa_simm12b (srcval >> shift)) ++ { ++ imm = srcval >> shift; ++ x = gen_ashlsi3 (dst, dst, GEN_INT (shift)); ++ } ++ ++ if ((!x || (TARGET_DENSITY && ! IN_RANGE (imm, -32, 95))) ++ && IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) + { + HOST_WIDE_INT imm0, imm1; + +@@ -1067,19 +1067,19 @@ xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, + imm0 = srcval - imm1; + if (TARGET_DENSITY && imm1 < 32512 && IN_RANGE (imm0, 224, 255)) + imm0 -= 256, imm1 += 256; +- xtensa_emit_constantsynth (dst, PLUS, imm0, imm1, gen_op, op_imm); +- return 1; ++ imm = imm0; ++ x = gen_addsi3 (dst, dst, GEN_INT (imm1)); + } + +- shift = ctz_hwi (srcval); +- if (xtensa_simm12b (srcval >> shift)) +- { +- xtensa_emit_constantsynth (dst, ASHIFT, srcval >> shift, shift, +- gen_op, op_imm); +- return 1; +- } ++ if (!x) ++ return 0; + +- return 0; ++ emit_move_insn (dst, GEN_INT (imm)); ++ emit_insn (x); ++ if (gen_op) ++ emit_move_insn (dst, gen_op (dst, op_imm)); ++ ++ return 1; + } + + static rtx +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0040-Optimize-bitwise-AND-with-imm1-followed-by-br.patch b/patches/gcc10.2/gcc-xtensa-0040-Optimize-bitwise-AND-with-imm1-followed-by-br.patch new file mode 100644 index 0000000..acf6d99 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0040-Optimize-bitwise-AND-with-imm1-followed-by-br.patch @@ -0,0 +1,177 @@ +From 5776497b68fcce6bf31835cf0a4d693e336bb2ca Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 14 Jul 2022 20:47:46 +0900 +Subject: [PATCH] xtensa: Optimize "bitwise AND with imm1" followed by + "branch if (not) equal to imm2" + +This patch enhances the effectiveness of the previously posted one: +"xtensa: Optimize bitwise AND operation with some specific forms of constants". + + /* example */ + extern void foo(int); + void test(int a) { + if ((a & (-1U << 8)) == (128 << 8)) /* 0 or one of "b4const" */ + foo(a); + } + + ;; before + .global test + test: + movi a3, -0x100 + movi.n a4, 1 + and a3, a2, a3 + slli a4, a4, 15 + bne a3, a4, .L3 + j.l foo, a9 + .L1: + ret.n + + ;; after + .global test + test: + srli a3, a2, 8 + bnei a3, 128, .L1 + j.l foo, a9 + .L1: + ret.n + +gcc/ChangeLog: + + * config/xtensa/xtensa.md + (*masktrue_const_pow2_minus_one, *masktrue_const_negative_pow2, + *masktrue_const_shifted_mask): If the immediate for bitwise AND is + represented as '-(1 << N)', decrease the lower bound of N from 12 + to 1. And the other immediate for conditional branch is now no + longer limited to zero, but also one of some positive integers. + Finally, remove the checks of some conditions, because the comparison + expressions that don't satisfy such checks are determined as + compile-time constants and thus will be optimized away before + RTL expansion. +--- + gcc/config/xtensa/xtensa.md | 73 ++++++++++++++++++++++--------------- + 1 file changed, 44 insertions(+), 29 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 6ef84b4f2..ca8b3913d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1721,63 +1721,78 @@ + + (define_insn_and_split "*masktrue_const_pow2_minus_one" + [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" ++ (if_then_else (match_operator 4 "boolean_operator" + [(and:SI (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "const_int_operand" "i")) +- (const_int 0)]) +- (label_ref (match_operand 2 "" "")) ++ (match_operand:SI 2 "const_int_operand" "i")]) ++ (label_ref (match_operand 3 "" "")) + (pc)))] +- "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31)" ++ "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31) ++ /* && (~INTVAL (operands[1]) & INTVAL (operands[2])) == 0 // can be omitted */ ++ && xtensa_b4const_or_zero (INTVAL (operands[2]) << (32 - floor_log2 (INTVAL (operands[1]) + 1)))" + "#" + "&& can_create_pseudo_p ()" +- [(set (match_dup 4) ++ [(set (match_dup 5) + (ashift:SI (match_dup 0) + (match_dup 1))) + (set (pc) +- (if_then_else (match_op_dup 3 +- [(match_dup 4) +- (const_int 0)]) +- (label_ref (match_dup 2)) ++ (if_then_else (match_op_dup 4 ++ [(match_dup 5) ++ (match_dup 2)]) ++ (label_ref (match_dup 3)) + (pc)))] + { +- operands[1] = GEN_INT (32 - floor_log2 (INTVAL (operands[1]) + 1)); +- operands[4] = gen_reg_rtx (SImode); ++ int shift = 32 - floor_log2 (INTVAL (operands[1]) + 1); ++ operands[1] = GEN_INT (shift); ++ operands[2] = GEN_INT (INTVAL (operands[2]) << shift); ++ operands[5] = gen_reg_rtx (SImode); + } + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set (attr "length") +- (if_then_else (match_test "TARGET_DENSITY +- && INTVAL (operands[1]) == 0x7FFFFFFF") +- (const_int 5) +- (const_int 6)))]) ++ (if_then_else (match_test "(TARGET_DENSITY && INTVAL (operands[1]) == 0x7FFFFFFF) ++ && INTVAL (operands[2]) == 0") ++ (const_int 4) ++ (if_then_else (match_test "TARGET_DENSITY ++ && (INTVAL (operands[1]) == 0x7FFFFFFF ++ || INTVAL (operands[2]) == 0)") ++ (const_int 5) ++ (const_int 6))))]) + + (define_insn_and_split "*masktrue_const_negative_pow2" + [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" ++ (if_then_else (match_operator 4 "boolean_operator" + [(and:SI (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "const_int_operand" "i")) +- (const_int 0)]) +- (label_ref (match_operand 2 "" "")) ++ (match_operand:SI 2 "const_int_operand" "i")]) ++ (label_ref (match_operand 3 "" "")) + (pc)))] +- "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 12, 30)" ++ "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 1, 30) ++ /* && (~INTVAL (operands[1]) & INTVAL (operands[2])) == 0 // can be omitted */ ++ && xtensa_b4const_or_zero (INTVAL (operands[2]) >> floor_log2 (-INTVAL (operands[1])))" + "#" + "&& can_create_pseudo_p ()" +- [(set (match_dup 4) ++ [(set (match_dup 5) + (lshiftrt:SI (match_dup 0) + (match_dup 1))) + (set (pc) +- (if_then_else (match_op_dup 3 +- [(match_dup 4) +- (const_int 0)]) +- (label_ref (match_dup 2)) ++ (if_then_else (match_op_dup 4 ++ [(match_dup 5) ++ (match_dup 2)]) ++ (label_ref (match_dup 3)) + (pc)))] + { +- operands[1] = GEN_INT (floor_log2 (-INTVAL (operands[1]))); +- operands[4] = gen_reg_rtx (SImode); ++ int shift = floor_log2 (-INTVAL (operands[1])); ++ operands[1] = GEN_INT (shift); ++ operands[2] = GEN_INT (INTVAL (operands[2]) >> shift); ++ operands[5] = gen_reg_rtx (SImode); + } + [(set_attr "type" "jump") + (set_attr "mode" "none") +- (set_attr "length" "6")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY && INTVAL (operands[2]) == 0") ++ (const_int 5) ++ (const_int 6)))]) + + (define_insn_and_split "*masktrue_const_shifted_mask" + [(set (pc) +@@ -1787,8 +1802,8 @@ + (match_operand:SI 2 "const_int_operand" "i")]) + (label_ref (match_operand 3 "" "")) + (pc)))] +- "(INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 +- && xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" ++ "/* (INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 // can be omitted ++ && */ xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" + "#" + "&& can_create_pseudo_p ()" + [(set (match_dup 6) +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0041-Correct-the-relative-RTX-cost-that-correspond.patch b/patches/gcc10.2/gcc-xtensa-0041-Correct-the-relative-RTX-cost-that-correspond.patch new file mode 100644 index 0000000..ec12c18 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0041-Correct-the-relative-RTX-cost-that-correspond.patch @@ -0,0 +1,167 @@ +From 7435ec0392c1f36bf3740c3a9748e7149c0c153e Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sat, 16 Jul 2022 14:44:02 +0900 +Subject: [PATCH] xtensa: Correct the relative RTX cost that corresponds to the + Move Immediate "MOVI" instruction + +This patch corrects the overestimation of the relative cost of +'(set (reg) (const_int N))' where N fits into the instruction itself. + +In fact, such overestimation confuses the RTL loop invariant motion pass. +As a result, it brings almost no negative impact from the speed point of +view, but addtiional reg-reg move instructions and register allocation +pressure about the size. + + /* example, optimized for size */ + extern int foo(void); + extern int array[16]; + void test_0(void) { + unsigned int i; + for (i = 0; i < sizeof(array)/sizeof(*array); ++i) + array[i] = 1024; + } + void test_1(void) { + unsigned int i; + for (i = 0; i < sizeof(array)/sizeof(*array); ++i) + array[i] = array[i] ? 1024 : 0; + } + void test_2(void) { + unsigned int i; + for (i = 0; i < sizeof(array)/sizeof(*array); ++i) + array[i] = foo() ? 0 : 1024; + } + + ;; before + .literal_position + .literal .LC0, array + test_0: + l32r a3, .LC0 + movi.n a2, 0 + movi a4, 0x400 // OK + .L2: + s32i.n a4, a3, 0 + addi.n a2, a2, 1 + addi.n a3, a3, 4 + bnei a2, 16, .L2 + ret.n + .literal_position + .literal .LC1, array + test_1: + l32r a2, .LC1 + movi.n a3, 0 + movi a5, 0x400 // NG + .L6: + l32i.n a4, a2, 0 + beqz.n a4, .L5 + mov.n a4, a5 // should be "movi a4, 0x400" + .L5: + s32i.n a4, a2, 0 + addi.n a3, a3, 1 + addi.n a2, a2, 4 + bnei a3, 16, .L6 + ret.n + .literal_position + .literal .LC2, array + test_2: + addi sp, sp, -32 + s32i.n a12, sp, 24 + l32r a12, .LC2 + s32i.n a13, sp, 20 + s32i.n a14, sp, 16 + s32i.n a15, sp, 12 + s32i.n a0, sp, 28 + addi a13, a12, 64 + movi.n a15, 0 // NG + movi a14, 0x400 // and wastes callee-saved registers (only 4) + .L11: + call0 foo + mov.n a3, a14 // should be "movi a3, 0x400" + movnez a3, a15, a2 + s32i.n a3, a12, 0 + addi.n a12, a12, 4 + bne a12, a13, .L11 + l32i.n a0, sp, 28 + l32i.n a12, sp, 24 + l32i.n a13, sp, 20 + l32i.n a14, sp, 16 + l32i.n a15, sp, 12 + addi sp, sp, 32 + ret.n + + ;; after + .literal_position + .literal .LC0, array + test_0: + l32r a3, .LC0 + movi.n a2, 0 + movi a4, 0x400 // OK + .L2: + s32i.n a4, a3, 0 + addi.n a2, a2, 1 + addi.n a3, a3, 4 + bnei a2, 16, .L2 + ret.n + .literal_position + .literal .LC1, array + test_1: + l32r a2, .LC1 + movi.n a3, 0 + .L6: + l32i.n a4, a2, 0 + beqz.n a4, .L5 + movi a4, 0x400 // OK + .L5: + s32i.n a4, a2, 0 + addi.n a3, a3, 1 + addi.n a2, a2, 4 + bnei a3, 16, .L6 + ret.n + .literal_position + .literal .LC2, array + test_2: + addi sp, sp, -16 + s32i.n a12, sp, 8 + l32r a12, .LC2 + s32i.n a13, sp, 4 + s32i.n a0, sp, 12 + addi a13, a12, 64 + .L11: + call0 foo + movi.n a3, 0 // OK + movi a4, 0x400 // and less register allocation pressure + moveqz a3, a4, a2 + s32i.n a3, a12, 0 + addi.n a12, a12, 4 + bne a12, a13, .L11 + l32i.n a0, sp, 12 + l32i.n a12, sp, 8 + l32i.n a13, sp, 4 + addi sp, sp, 16 + ret.n + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_rtx_costs): + Change the relative cost of '(set (reg) (const_int N))' where + N fits into signed 12-bit from 4 to 0 if optimizing for size. + And use the appropriate macro instead of the bare number 4. +--- + gcc/config/xtensa/xtensa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index a5330e52b..bd3489bfe 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4109,7 +4109,7 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + case SET: + if (xtensa_simm12b (INTVAL (x))) + { +- *total = 4; ++ *total = speed ? COSTS_N_INSNS (1) : 0; + return true; + } + break; +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0042-Optimize-bitwise-AND-NOT-with-imm-followed-by.patch b/patches/gcc10.2/gcc-xtensa-0042-Optimize-bitwise-AND-NOT-with-imm-followed-by.patch new file mode 100644 index 0000000..f020eaa --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0042-Optimize-bitwise-AND-NOT-with-imm-followed-by.patch @@ -0,0 +1,179 @@ +From f4d76407c8c33229f9b1d7b81e713ed10a5d408b Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 21 Jul 2022 16:10:47 +0900 +Subject: [PATCH] xtensa: Optimize "bitwise AND NOT with imm" followed by + "branch if (not) equal to zero" + +The RTL combiner will transform "if ((x & C) == C) goto label;" +into "if ((~x & C) == 0) goto label;" and will try to match it with +the insn patterns. + + /* example */ + void test_0(int a) { + if ((char)a == 255) + foo(); + } + void test_1(int a) { + if ((unsigned short)a == 0xFFFF) + foo(); + } + void test_2(int a) { + if ((a & 0x00003F80) != 0x00003F80) + foo(); + } + + ;; before + test_0: + extui a2, a2, 0, 8 + movi a3, 0xff + bne a2, a3, .L1 + j.l foo, a9 + .L1: + ret.n + test_1: + movi.n a3, -1 + extui a2, a2, 0, 16 + extui a3, a3, 16, 16 + bne a2, a3, .L3 + j.l foo, a9 + .L3: + ret.n + test_2: + movi a3, 0x80 + extui a2, a2, 7, 7 + addmi a3, a3, 0x3f00 + slli a2, a2, 7 + beq a2, a3, .L5 + j.l foo, a9 + .L5: + ret.n + + ;; after + test_0: + movi a3, 0xff + bnall a2, a3, .L1 + j.l foo, a9 + .L1: + ret.n + test_1: + movi.n a3, -1 + extui a3, a3, 16, 16 + bnall a2, a3, .L3 + j.l foo, a9 + .L3: + ret.n + test_2: + movi a3, 0x80 + addmi a3, a3, 0x3f00 + ball a2, a3, .L5 + j.l foo, a9 + .L5: + ret.n + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*masktrue_const_bitcmpl): + Add a new insn_and_split pattern, and a few split patterns for + special cases. +--- + gcc/config/xtensa/xtensa.md | 84 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 84 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index ca8b3913d..ed1e072fe 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1719,6 +1719,90 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_insn_and_split "*masktrue_const_bitcmpl" ++ [(set (pc) ++ (if_then_else (match_operator 3 "boolean_operator" ++ [(and:SI (not:SI (match_operand:SI 0 "register_operand" "r")) ++ (match_operand:SI 1 "const_int_operand" "i")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] ++ "exact_log2 (INTVAL (operands[1])) < 0" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 4) ++ (match_dup 1)) ++ (set (pc) ++ (if_then_else (match_op_dup 3 ++ [(and:SI (not:SI (match_dup 0)) ++ (match_dup 4)) ++ (const_int 0)]) ++ (label_ref (match_dup 2)) ++ (pc)))] ++{ ++ operands[4] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && IN_RANGE (INTVAL (operands[1]), -32, 95)") ++ (const_int 5) ++ (if_then_else (match_test "xtensa_simm12b (INTVAL (operands[1]))") ++ (const_int 6) ++ (const_int 10))))]) ++ ++(define_split ++ [(set (pc) ++ (if_then_else (match_operator 2 "boolean_operator" ++ [(subreg:HQI (not:SI (match_operand:SI 0 "register_operand")) 0) ++ (const_int 0)]) ++ (label_ref (match_operand 1 "")) ++ (pc)))] ++ "!BYTES_BIG_ENDIAN" ++ [(set (pc) ++ (if_then_else (match_op_dup 2 ++ [(and:SI (not:SI (match_dup 0)) ++ (match_dup 3)) ++ (const_int 0)]) ++ (label_ref (match_dup 1)) ++ (pc)))] ++{ ++ operands[3] = GEN_INT ((1 << GET_MODE_BITSIZE (mode)) - 1); ++}) ++ ++(define_split ++ [(set (pc) ++ (if_then_else (match_operator 2 "boolean_operator" ++ [(subreg:HI (not:SI (match_operand:SI 0 "register_operand")) 2) ++ (const_int 0)]) ++ (label_ref (match_operand 1 "")) ++ (pc)))] ++ "BYTES_BIG_ENDIAN" ++ [(set (pc) ++ (if_then_else (match_op_dup 2 ++ [(and:SI (not:SI (match_dup 0)) ++ (const_int 65535)) ++ (const_int 0)]) ++ (label_ref (match_dup 1)) ++ (pc)))]) ++ ++(define_split ++ [(set (pc) ++ (if_then_else (match_operator 2 "boolean_operator" ++ [(subreg:QI (not:SI (match_operand:SI 0 "register_operand")) 3) ++ (const_int 0)]) ++ (label_ref (match_operand 1 "")) ++ (pc)))] ++ "BYTES_BIG_ENDIAN" ++ [(set (pc) ++ (if_then_else (match_op_dup 2 ++ [(and:SI (not:SI (match_dup 0)) ++ (const_int 255)) ++ (const_int 0)]) ++ (label_ref (match_dup 1)) ++ (pc)))]) ++ + (define_insn_and_split "*masktrue_const_pow2_minus_one" + [(set (pc) + (if_then_else (match_operator 4 "boolean_operator" +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0043-Add-RTX-costs-for-if_then_else.patch b/patches/gcc10.2/gcc-xtensa-0043-Add-RTX-costs-for-if_then_else.patch new file mode 100644 index 0000000..2f20939 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0043-Add-RTX-costs-for-if_then_else.patch @@ -0,0 +1,30 @@ +From 6c4824b9ee3272c7621639f873b6a4b38b5e117e Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 28 Jul 2022 11:59:00 +0900 +Subject: [PATCH] xtensa: Add RTX costs for if_then_else + +It takes one machine instruction for both conditional branch and move. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_rtx_costs): + Add new case for IF_THEN_ELSE. +--- + gcc/config/xtensa/xtensa.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index bd3489bfe..b6f41a478 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4309,6 +4309,7 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + + case ZERO_EXTRACT: + case ZERO_EXTEND: ++ case IF_THEN_ELSE: + *total = COSTS_N_INSNS (1); + return true; + +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0044-Fix-conflicting-hard-regno-between-indirect-s.patch b/patches/gcc10.2/gcc-xtensa-0044-Fix-conflicting-hard-regno-between-indirect-s.patch new file mode 100644 index 0000000..1840f9f --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0044-Fix-conflicting-hard-regno-between-indirect-s.patch @@ -0,0 +1,60 @@ +From 25b8acf68c6d262f75a84bbc8238e5c326c1b1bf Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sat, 30 Jul 2022 03:25:04 +0900 +Subject: [PATCH] xtensa: Fix conflicting hard regno between indirect + sibcall fixups and EH_RETURN_STACKADJ_RTX + +The hard register A10 was already allocated for EH_RETURN_STACKADJ_RTX. +(although exception handling and sibling call may not apply at the same time, + but for safety) + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: Change hard register number used in + the split patterns for indirect sibling call fixups from 10 to 11, + the last free one for the CALL0 ABI. +--- + gcc/config/xtensa/xtensa.md | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index ed1e072fe..9eeb73915 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -25,7 +25,7 @@ + (A7_REG 7) + (A8_REG 8) + (A9_REG 9) +- (A10_REG 10) ++ (A11_REG 11) + + (UNSPEC_NOP 2) + (UNSPEC_PLT 3) +@@ -2300,9 +2300,9 @@ + "reload_completed + && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) + && ! call_used_or_fixed_reg_p (REGNO (operands[0]))" +- [(set (reg:SI A10_REG) ++ [(set (reg:SI A11_REG) + (match_dup 0)) +- (call (mem:SI (reg:SI A10_REG)) ++ (call (mem:SI (reg:SI A11_REG)) + (match_dup 1))]) + + (define_expand "sibcall_value" +@@ -2333,10 +2333,10 @@ + "reload_completed + && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) + && ! call_used_or_fixed_reg_p (REGNO (operands[1]))" +- [(set (reg:SI A10_REG) ++ [(set (reg:SI A11_REG) + (match_dup 1)) + (set (match_dup 0) +- (call (mem:SI (reg:SI A10_REG)) ++ (call (mem:SI (reg:SI A11_REG)) + (match_dup 2)))]) + + (define_insn "entry" +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0045-Turn-on-fsplit-wide-types-early-by-default.patch b/patches/gcc10.2/gcc-xtensa-0045-Turn-on-fsplit-wide-types-early-by-default.patch new file mode 100644 index 0000000..e381a8d --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0045-Turn-on-fsplit-wide-types-early-by-default.patch @@ -0,0 +1,38 @@ +From 624bf9fd927ada2d6d6dc34f5e0de704e7ee268f Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 4 Aug 2022 19:56:27 +0900 +Subject: [PATCH] xtensa: Turn on -fsplit-wide-types-early by default + +Since GCC10, the "subreg2" optimization pass was no longer tied to enabling +"subreg1" unless -fsplit-wide-types-early was turned on (PR88233). However +on the Xtensa port, the lack of "subreg2" can degrade the quality of the +output code, especially for those that produce many D[FC]mode pseudos. + +This patch turns on -fsplit-wide-types-early by default in order to restore +the previous behavior. + +gcc/ChangeLog: + + * common/config/xtensa/xtensa-common.c + (xtensa_option_optimization_table): Add OPT_fsplit_wide_types_early + for OPT_LEVELS_ALL in order to restore pre-GCC10 behavior. +--- + gcc/common/config/xtensa/xtensa-common.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/gcc/common/config/xtensa/xtensa-common.c b/gcc/common/config/xtensa/xtensa-common.c +index dd751a14d..697a9eb22 100644 +--- a/gcc/common/config/xtensa/xtensa-common.c ++++ b/gcc/common/config/xtensa/xtensa-common.c +@@ -34,6 +34,8 @@ static const struct default_options xtensa_option_optimization_table[] = + assembler, so GCC cannot do a good job of reordering blocks. + Do not enable reordering unless it is explicitly requested. */ + { OPT_LEVELS_ALL, OPT_freorder_blocks, NULL, 0 }, ++ /* Split multi-word types early (pre-GCC10 behavior). */ ++ { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 }, + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0046-Optimize-stack-pointer-updates-in-function-pr.patch b/patches/gcc10.2/gcc-xtensa-0046-Optimize-stack-pointer-updates-in-function-pr.patch new file mode 100644 index 0000000..d94e38e --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0046-Optimize-stack-pointer-updates-in-function-pr.patch @@ -0,0 +1,171 @@ +From 745e9839f8d18724f31015a1dcbde2c2c513d3c5 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 17 Aug 2022 14:54:16 +0900 +Subject: [PATCH] xtensa: Optimize stack pointer updates in function + pro/epilogue under certain conditions + +This patch enforces the use of "addmi" machine instruction instead of +addition/subtraction with two source registers for adjusting the stack +pointer, if the adjustment fits into a signed 16-bit and is also a multiple +of 256. + + /* example */ + void test(void) { + char buffer[4096]; + __asm__(""::"m"(buffer)); + } + + ;; before + test: + movi.n a9, 1 + slli a9, a9, 12 + sub sp, sp, a9 + movi.n a9, 1 + slli a9, a9, 12 + add.n sp, sp, a9 + addi sp, sp, 0 + ret.n + + ;; after + test: + addmi sp, sp, -0x1000 + addmi sp, sp, 0x1000 + ret.n + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_expand_prologue): + Use an "addmi" machine instruction for updating the stack pointer + rather than addition/subtraction via hard register A9, if the amount + of change satisfies the literal value conditions of that instruction + when the CALL0 ABI is used. + (xtensa_expand_epilogue): Ditto. + And also inhibit the stack pointer addition of constant zero. +--- + gcc/config/xtensa/xtensa.c | 79 ++++++++++++++++++++++++++------------ + 1 file changed, 54 insertions(+), 25 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index b6f41a478..a93b15f4d 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -3186,7 +3186,6 @@ xtensa_expand_prologue (void) + rtx_insn *insn = NULL; + rtx note_rtx; + +- + total_size = compute_frame_size (get_frame_size ()); + + if (flag_stack_usage_info) +@@ -3242,10 +3241,17 @@ xtensa_expand_prologue (void) + } + else + { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, GEN_INT (total_size)); +- insn = emit_insn (gen_subsi3 (stack_pointer_rtx, +- stack_pointer_rtx, tmp_reg)); ++ if (xtensa_simm8x256 (-total_size)) ++ insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (-total_size))); ++ else ++ { ++ rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); ++ emit_move_insn (tmp_reg, GEN_INT (total_size)); ++ insn = emit_insn (gen_subsi3 (stack_pointer_rtx, ++ stack_pointer_rtx, tmp_reg)); ++ } + RTX_FRAME_RELATED_P (insn) = 1; + note_rtx = gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, +@@ -3273,11 +3279,19 @@ xtensa_expand_prologue (void) + if (total_size > 1024 + || (!callee_save_size && total_size > 128)) + { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, GEN_INT (total_size - +- callee_save_size)); +- insn = emit_insn (gen_subsi3 (stack_pointer_rtx, +- stack_pointer_rtx, tmp_reg)); ++ if (xtensa_simm8x256 (callee_save_size - total_size)) ++ insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (callee_save_size - ++ total_size))); ++ else ++ { ++ rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); ++ emit_move_insn (tmp_reg, GEN_INT (total_size - ++ callee_save_size)); ++ insn = emit_insn (gen_subsi3 (stack_pointer_rtx, ++ stack_pointer_rtx, tmp_reg)); ++ } + RTX_FRAME_RELATED_P (insn) = 1; + note_rtx = gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, +@@ -3351,12 +3365,21 @@ xtensa_expand_epilogue (bool sibcall_p) + + if (cfun->machine->current_frame_size > (frame_pointer_needed ? 127 : 1024)) + { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, GEN_INT (cfun->machine->current_frame_size - +- cfun->machine->callee_save_size)); +- emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ? +- hard_frame_pointer_rtx : stack_pointer_rtx, +- tmp_reg)); ++ if (xtensa_simm8x256 (cfun->machine->current_frame_size - ++ cfun->machine->callee_save_size)) ++ emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ? ++ hard_frame_pointer_rtx : stack_pointer_rtx, ++ GEN_INT (cfun->machine->current_frame_size - ++ cfun->machine->callee_save_size))); ++ else ++ { ++ rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); ++ emit_move_insn (tmp_reg, GEN_INT (cfun->machine->current_frame_size - ++ cfun->machine->callee_save_size)); ++ emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ? ++ hard_frame_pointer_rtx : stack_pointer_rtx, ++ tmp_reg)); ++ } + offset = cfun->machine->callee_save_size - UNITS_PER_WORD; + } + else +@@ -3396,18 +3419,24 @@ xtensa_expand_epilogue (bool sibcall_p) + offset = cfun->machine->current_frame_size; + else + offset = cfun->machine->callee_save_size; +- +- emit_insn (gen_addsi3 (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (offset))); ++ if (offset) ++ emit_insn (gen_addsi3 (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (offset))); + } + else + { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, +- GEN_INT (cfun->machine->current_frame_size)); +- emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, +- tmp_reg)); ++ if (xtensa_simm8x256 (cfun->machine->current_frame_size)) ++ emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (cfun->machine->current_frame_size))); ++ else ++ { ++ rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); ++ emit_move_insn (tmp_reg, ++ GEN_INT (cfun->machine->current_frame_size)); ++ emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, ++ tmp_reg)); ++ } + } + } + +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0047-Improve-indirect-sibling-call-handling.patch b/patches/gcc10.2/gcc-xtensa-0047-Improve-indirect-sibling-call-handling.patch new file mode 100644 index 0000000..a6e870f --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0047-Improve-indirect-sibling-call-handling.patch @@ -0,0 +1,166 @@ +From d8f7137070d92c297e1deecd6dabdb471ddaa9ab Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 18 Aug 2022 01:11:32 +0900 +Subject: [PATCH] xtensa: Improve indirect sibling call handling + +No longer needs the dedicated hard register (A11) for the address of the +call and the split patterns for fixups, due to the introduction of appropriate +register class and constraint. + +(Note: "ISC_REGS" contains a hard register A8 used as a "static chain" + pointer for nested functions, but no problem; Pointer to nested function + actually points to "trampoline", and trampoline itself doesn't receive + "static chain" pointer to its parent's stack frame from the caller.) + +gcc/ChangeLog: + + * config/xtensa/xtensa.h + (enum reg_class, REG_CLASS_NAMES, REG_CLASS_CONTENTS): + Add new register class "ISC_REGS". + * config/xtensa/constraints.md (c): Add new register constraint. + * config/xtensa/xtensa.md (define_constants): Remove "A11_REG". + (sibcall_internal, sibcall_value_internal): + Change to use the new register constraint, and remove two split + patterns for fixups that are no longer needed. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/sibcalls.c: Add a new test function to ensure + that registers for arguments (occupy from A2 to A7) and for indirect + sibcall (should be assigned to A8) neither conflict nor spill out. +--- + gcc/config/xtensa/constraints.md | 5 ++++ + gcc/config/xtensa/xtensa.h | 3 +++ + gcc/config/xtensa/xtensa.md | 29 ++-------------------- + gcc/testsuite/gcc.target/xtensa/sibcalls.c | 5 ++++ + 4 files changed, 15 insertions(+), 27 deletions(-) + +diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md +index 13b3daafc..f590dcf3a 100644 +--- a/gcc/config/xtensa/constraints.md ++++ b/gcc/config/xtensa/constraints.md +@@ -27,6 +27,11 @@ + "Boolean registers @code{b0}-@code{b15}; only available if the Xtensa + Boolean Option is configured.") + ++(define_register_constraint "c" "TARGET_WINDOWED_ABI ? NO_REGS : ISC_REGS" ++ "@internal ++ General-purpose AR registers for indirect sibling calls, @code{a2}- ++ @code{a8}.") ++ + (define_register_constraint "d" "TARGET_DENSITY ? AR_REGS: NO_REGS" + "@internal + All AR registers, including sp, but only if the Xtensa Code Density +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index 3e9cbc943..ee2238606 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -372,6 +372,7 @@ enum reg_class + FP_REGS, /* floating point registers */ + ACC_REG, /* MAC16 accumulator */ + SP_REG, /* sp register (aka a1) */ ++ ISC_REGS, /* registers for indirect sibling calls */ + RL_REGS, /* preferred reload regs (not sp or fp) */ + GR_REGS, /* integer registers except sp */ + AR_REGS, /* all integer registers */ +@@ -393,6 +394,7 @@ enum reg_class + "FP_REGS", \ + "ACC_REG", \ + "SP_REG", \ ++ "ISC_REGS", \ + "RL_REGS", \ + "GR_REGS", \ + "AR_REGS", \ +@@ -409,6 +411,7 @@ enum reg_class + { 0xfff80000, 0x00000007 }, /* floating-point registers */ \ + { 0x00000000, 0x00000008 }, /* MAC16 accumulator */ \ + { 0x00000002, 0x00000000 }, /* stack pointer register */ \ ++ { 0x000001fc, 0x00000000 }, /* registers for indirect sibling calls */ \ + { 0x0000fffd, 0x00000000 }, /* preferred reload registers */ \ + { 0x0000fffd, 0x00000000 }, /* general-purpose registers */ \ + { 0x0003ffff, 0x00000000 }, /* integer registers */ \ +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 9eeb73915..0c05c16b1 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -25,7 +25,6 @@ + (A7_REG 7) + (A8_REG 8) + (A9_REG 9) +- (A11_REG 11) + + (UNSPEC_NOP 2) + (UNSPEC_PLT 3) +@@ -2284,7 +2283,7 @@ + }) + + (define_insn "sibcall_internal" +- [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nir")) ++ [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nic")) + (match_operand 1 "" "i"))] + "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" + { +@@ -2294,17 +2293,6 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + +-(define_split +- [(call (mem:SI (match_operand:SI 0 "register_operand")) +- (match_operand 1 ""))] +- "reload_completed +- && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) +- && ! call_used_or_fixed_reg_p (REGNO (operands[0]))" +- [(set (reg:SI A11_REG) +- (match_dup 0)) +- (call (mem:SI (reg:SI A11_REG)) +- (match_dup 1))]) +- + (define_expand "sibcall_value" + [(set (match_operand 0 "register_operand" "") + (call (match_operand 1 "memory_operand" "") +@@ -2316,7 +2304,7 @@ + + (define_insn "sibcall_value_internal" + [(set (match_operand 0 "register_operand" "=a") +- (call (mem:SI (match_operand:SI 1 "call_insn_operand" "nir")) ++ (call (mem:SI (match_operand:SI 1 "call_insn_operand" "nic")) + (match_operand 2 "" "i")))] + "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" + { +@@ -2326,19 +2314,6 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + +-(define_split +- [(set (match_operand 0 "register_operand") +- (call (mem:SI (match_operand:SI 1 "register_operand")) +- (match_operand 2 "")))] +- "reload_completed +- && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) +- && ! call_used_or_fixed_reg_p (REGNO (operands[1]))" +- [(set (reg:SI A11_REG) +- (match_dup 1)) +- (set (match_dup 0) +- (call (mem:SI (reg:SI A11_REG)) +- (match_dup 2)))]) +- + (define_insn "entry" + [(set (reg:SI A1_REG) + (unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")] +diff --git a/gcc/testsuite/gcc.target/xtensa/sibcalls.c b/gcc/testsuite/gcc.target/xtensa/sibcalls.c +index d2b3fccf1..dff6750e2 100644 +--- a/gcc/testsuite/gcc.target/xtensa/sibcalls.c ++++ b/gcc/testsuite/gcc.target/xtensa/sibcalls.c +@@ -17,4 +17,9 @@ int test_2(int (*a)(void)) { + return a(); + } + ++_Complex double test_3(_Complex double a, _Complex double (*b)(_Complex double, double)) { ++ bar(-1); ++ return b(a, 3.141592653589795); ++} ++ + /* { dg-final { scan-assembler-not "ret" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.2/gcc-xtensa-0048-add-static-PIE-support.patch b/patches/gcc10.2/gcc-xtensa-0048-add-static-PIE-support.patch new file mode 100644 index 0000000..f16832b --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0048-add-static-PIE-support.patch @@ -0,0 +1,31 @@ +From 5773838c193d36476109de77d230391f6738bb62 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Fri, 12 Aug 2022 21:02:15 -0700 +Subject: [PATCH] xtensa: gcc: add static PIE support + +gcc/ + * config/xtensa/linux.h (LINK_SPEC): Add static-pie. +--- + gcc/config/xtensa/linux.h | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/linux.h b/gcc/config/xtensa/linux.h +index 62a33a9bc..49796c97f 100644 +--- a/gcc/config/xtensa/linux.h ++++ b/gcc/config/xtensa/linux.h +@@ -52,9 +52,10 @@ along with GCC; see the file COPYING3. If not see + #define LINK_SPEC \ + "%{shared:-shared} \ + %{!shared: \ +- %{!static: \ ++ %{!static:%{!static-pie: \ + %{rdynamic:-export-dynamic} \ +- -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \ ++ -dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} \ ++ %{static-pie:-static -pie --no-dynamic-linker -z text} \ + %{static:-static}}" + + #undef LOCAL_LABEL_PREFIX +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0049-Eliminate-unused-stack-frame-allocation-freei.patch b/patches/gcc10.2/gcc-xtensa-0049-Eliminate-unused-stack-frame-allocation-freei.patch new file mode 100644 index 0000000..0c11972 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0049-Eliminate-unused-stack-frame-allocation-freei.patch @@ -0,0 +1,300 @@ +From 55fbffc224d951aca1eab3cbfb74c540e7ef2f3f Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 28 Aug 2022 22:42:25 +0900 +Subject: [PATCH] xtensa: Eliminate unused stack frame allocation/freeing + +In the example below, 'x' is once placed on the stack frame and then read +into registers as the argument value of bar(): + + /* example */ + struct foo { + int a, b; + }; + extern struct foo bar(struct foo); + struct foo test(void) { + struct foo x = { 0, 1 }; + return bar(x); + } + +Thanks to the dead store elimination, the initialization of 'x' turns into +merely loading the immediates to registers, but corresponding stack frame +growth is not rolled back. As a result: + + ;; prereq: the CALL0 ABI + ;; before + test: + addi sp, sp, -16 // unused stack frame allocation/freeing + movi.n a2, 0 + movi.n a3, 1 + addi sp, sp, 16 // because no instructions that refer to + j.l bar, a9 // the stack pointer between the two + +This patch eliminates such unused stack frame allocation/freeing: + + ;; after + test: + movi.n a2, 0 + movi.n a3, 1 + j.l bar, a9 + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (machine_function): New boolean member as + a flag that controls whether to emit the insns for stack pointer + adjustment inside of the pro/epilogue. + (xtensa_emit_adjust_stack_ptr): New function to share the common + codes and to emit insns if not inhibited. + (xtensa_expand_epilogue): Change to use the function mentioned + above when using the CALL0 ABI. + (xtensa_expand_prologue): Ditto. + And also change to set the inhibit flag used by + xtensa_emit_adjust_stack_ptr() to true if the stack pointer is only + used for its own adjustment. +--- + gcc/config/xtensa/xtensa.c | 164 ++++++++++++++++++------------------- + 1 file changed, 80 insertions(+), 84 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index a93b15f4d..97291fc0f 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -102,6 +102,7 @@ struct GTY(()) machine_function + int callee_save_size; + bool frame_laid_out; + bool epilogue_done; ++ bool inhibit_logues_a1_adjusts; + }; + + /* Vector, indexed by hard register number, which contains 1 for a +@@ -3084,7 +3085,7 @@ xtensa_output_literal (FILE *file, rtx x, machine_mode mode, int labelno) + } + + static bool +-xtensa_call_save_reg(int regno) ++xtensa_call_save_reg (int regno) + { + if (TARGET_WINDOWED_ABI) + return false; +@@ -3120,7 +3121,7 @@ compute_frame_size (poly_int64 size) + cfun->machine->callee_save_size = 0; + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) + { +- if (xtensa_call_save_reg(regno)) ++ if (xtensa_call_save_reg (regno)) + cfun->machine->callee_save_size += UNITS_PER_WORD; + } + +@@ -3175,6 +3176,49 @@ xtensa_initial_elimination_offset (int from, int to ATTRIBUTE_UNUSED) + return offset; + } + ++#define ADJUST_SP_NONE 0x0 ++#define ADJUST_SP_NEED_NOTE 0x1 ++#define ADJUST_SP_FRAME_PTR 0x2 ++static void ++xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int flags) ++{ ++ rtx_insn *insn; ++ rtx ptr = (flags & ADJUST_SP_FRAME_PTR) ? hard_frame_pointer_rtx ++ : stack_pointer_rtx; ++ ++ if (cfun->machine->inhibit_logues_a1_adjusts) ++ return; ++ ++ if (xtensa_simm8 (offset) ++ || xtensa_simm8x256 (offset)) ++ insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, GEN_INT (offset))); ++ else ++ { ++ rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); ++ ++ if (offset < 0) ++ { ++ emit_move_insn (tmp_reg, GEN_INT (-offset)); ++ insn = emit_insn (gen_subsi3 (stack_pointer_rtx, ptr, tmp_reg)); ++ } ++ else ++ { ++ emit_move_insn (tmp_reg, GEN_INT (offset)); ++ insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, tmp_reg)); ++ } ++ } ++ ++ if (flags & ADJUST_SP_NEED_NOTE) ++ { ++ rtx note_rtx = gen_rtx_SET (stack_pointer_rtx, ++ plus_constant (Pmode, stack_pointer_rtx, ++ offset)); ++ ++ RTX_FRAME_RELATED_P (insn) = 1; ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); ++ } ++} ++ + /* minimum frame = reg save area (4 words) plus static chain (1 word) + and the total number of words must be a multiple of 128 bits. */ + #define MIN_FRAME_SIZE (8 * UNITS_PER_WORD) +@@ -3210,17 +3254,30 @@ xtensa_expand_prologue (void) + int regno; + HOST_WIDE_INT offset = 0; + int callee_save_size = cfun->machine->callee_save_size; ++ df_ref ref; ++ bool stack_pointer_needed = frame_pointer_needed ++ || crtl->calls_eh_return; ++ ++ /* Check if the function body really needs the stack pointer. */ ++ if (!stack_pointer_needed) ++ for (ref = DF_REG_USE_CHAIN (A1_REG); ++ ref; ref = DF_REF_NEXT_REG (ref)) ++ if (DF_REF_CLASS (ref) == DF_REF_REGULAR ++ && NONJUMP_INSN_P (DF_REF_INSN (ref))) ++ stack_pointer_needed = true; ++ /* Check if callee-saved registers really need saving to the stack. */ ++ if (!stack_pointer_needed) ++ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) ++ if (xtensa_call_save_reg (regno)) ++ stack_pointer_needed = true; ++ ++ cfun->machine->inhibit_logues_a1_adjusts = !stack_pointer_needed; + + /* -128 is a limit of single addi instruction. */ + if (IN_RANGE (total_size, 1, 128)) + { +- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (-total_size))); +- RTX_FRAME_RELATED_P (insn) = 1; +- note_rtx = gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- -total_size)); +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); ++ xtensa_emit_adjust_stack_ptr (-total_size, ++ ADJUST_SP_NEED_NOTE); + offset = total_size - UNITS_PER_WORD; + } + else if (callee_save_size) +@@ -3230,33 +3287,14 @@ xtensa_expand_prologue (void) + * move it to its final location. */ + if (total_size > 1024) + { +- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (-callee_save_size))); +- RTX_FRAME_RELATED_P (insn) = 1; +- note_rtx = gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- -callee_save_size)); +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); ++ xtensa_emit_adjust_stack_ptr (-callee_save_size, ++ ADJUST_SP_NEED_NOTE); + offset = callee_save_size - UNITS_PER_WORD; + } + else + { +- if (xtensa_simm8x256 (-total_size)) +- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (-total_size))); +- else +- { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, GEN_INT (total_size)); +- insn = emit_insn (gen_subsi3 (stack_pointer_rtx, +- stack_pointer_rtx, tmp_reg)); +- } +- RTX_FRAME_RELATED_P (insn) = 1; +- note_rtx = gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- -total_size)); +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); ++ xtensa_emit_adjust_stack_ptr (-total_size, ++ ADJUST_SP_NEED_NOTE); + offset = total_size - UNITS_PER_WORD; + } + } +@@ -3278,27 +3316,8 @@ xtensa_expand_prologue (void) + } + if (total_size > 1024 + || (!callee_save_size && total_size > 128)) +- { +- if (xtensa_simm8x256 (callee_save_size - total_size)) +- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (callee_save_size - +- total_size))); +- else +- { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, GEN_INT (total_size - +- callee_save_size)); +- insn = emit_insn (gen_subsi3 (stack_pointer_rtx, +- stack_pointer_rtx, tmp_reg)); +- } +- RTX_FRAME_RELATED_P (insn) = 1; +- note_rtx = gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- callee_save_size - +- total_size)); +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); +- } ++ xtensa_emit_adjust_stack_ptr (callee_save_size - total_size, ++ ADJUST_SP_NEED_NOTE); + } + + if (frame_pointer_needed) +@@ -3365,21 +3384,11 @@ xtensa_expand_epilogue (bool sibcall_p) + + if (cfun->machine->current_frame_size > (frame_pointer_needed ? 127 : 1024)) + { +- if (xtensa_simm8x256 (cfun->machine->current_frame_size - +- cfun->machine->callee_save_size)) +- emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ? +- hard_frame_pointer_rtx : stack_pointer_rtx, +- GEN_INT (cfun->machine->current_frame_size - +- cfun->machine->callee_save_size))); +- else +- { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, GEN_INT (cfun->machine->current_frame_size - +- cfun->machine->callee_save_size)); +- emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ? +- hard_frame_pointer_rtx : stack_pointer_rtx, +- tmp_reg)); +- } ++ xtensa_emit_adjust_stack_ptr (cfun->machine->current_frame_size - ++ cfun->machine->callee_save_size, ++ frame_pointer_needed ++ ? ADJUST_SP_FRAME_PTR ++ : ADJUST_SP_NONE); + offset = cfun->machine->callee_save_size - UNITS_PER_WORD; + } + else +@@ -3420,24 +3429,11 @@ xtensa_expand_epilogue (bool sibcall_p) + else + offset = cfun->machine->callee_save_size; + if (offset) +- emit_insn (gen_addsi3 (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (offset))); ++ xtensa_emit_adjust_stack_ptr (offset, ADJUST_SP_NONE); + } + else +- { +- if (xtensa_simm8x256 (cfun->machine->current_frame_size)) +- emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (cfun->machine->current_frame_size))); +- else +- { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, +- GEN_INT (cfun->machine->current_frame_size)); +- emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, +- tmp_reg)); +- } +- } ++ xtensa_emit_adjust_stack_ptr (cfun->machine->current_frame_size, ++ ADJUST_SP_NONE); + } + + if (crtl->calls_eh_return) +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0050-Make-complex-hard-register-clobber-eliminatio.patch b/patches/gcc10.2/gcc-xtensa-0050-Make-complex-hard-register-clobber-eliminatio.patch new file mode 100644 index 0000000..c39608c --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0050-Make-complex-hard-register-clobber-eliminatio.patch @@ -0,0 +1,111 @@ +From 78eac52fe49e1463bec7a838dd172b970412927b Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 30 Aug 2022 21:28:51 +0900 +Subject: [PATCH] xtensa: Make complex hard register clobber elimination + more robust and accurate + +This patch eliminates all clobbers for complex hard registers that will +be overwritten entirely afterwards (supersedence of +3867d414bd7d9e5b6fb2a51b1fb3d9e9e1eae9). + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: Rewrite the split pattern that performs + the abovementioned process so that insns that overwrite clobbered + register no longer need to be contiguous. + (DSC): Remove as no longer needed. +--- + gcc/config/xtensa/xtensa.md | 67 +++++++++++++++++++++++++------------ + 1 file changed, 45 insertions(+), 22 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 0c05c16b1..ec4a69e30 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -86,10 +86,6 @@ + ;; This code iterator is for *shlrd and its variants. + (define_code_iterator ior_op [ior plus]) + +-;; This mode iterator allows the DC and SC patterns to be defined from +-;; the same template. +-(define_mode_iterator DSC [DC SC]) +- + + ;; Attributes. + +@@ -2848,27 +2844,54 @@ + }) + + (define_split +- [(clobber (match_operand:DSC 0 "register_operand"))] +- "GP_REG_P (REGNO (operands[0]))" ++ [(clobber (match_operand 0 "register_operand"))] ++ "HARD_REGISTER_P (operands[0]) ++ && COMPLEX_MODE_P (GET_MODE (operands[0]))" + [(const_int 0)] + { +- unsigned int regno = REGNO (operands[0]); +- machine_mode inner_mode = GET_MODE_INNER (mode); ++ auto_sbitmap bmp (FIRST_PSEUDO_REGISTER); + rtx_insn *insn; +- rtx x; +- if (! ((insn = next_nonnote_nondebug_insn (curr_insn)) +- && NONJUMP_INSN_P (insn) +- && GET_CODE (x = PATTERN (insn)) == SET +- && REG_P (x = XEXP (x, 0)) +- && GET_MODE (x) == inner_mode +- && REGNO (x) == regno +- && (insn = next_nonnote_nondebug_insn (insn)) +- && NONJUMP_INSN_P (insn) +- && GET_CODE (x = PATTERN (insn)) == SET +- && REG_P (x = XEXP (x, 0)) +- && GET_MODE (x) == inner_mode +- && REGNO (x) == regno + REG_NREGS (operands[0]) / 2)) +- FAIL; ++ rtx reg = gen_rtx_REG (SImode, 0); ++ bitmap_set_range (bmp, REGNO (operands[0]), REG_NREGS (operands[0])); ++ for (insn = next_nonnote_nondebug_insn_bb (curr_insn); ++ insn; insn = next_nonnote_nondebug_insn_bb (insn)) ++ { ++ sbitmap_iterator iter; ++ unsigned int regno; ++ if (NONJUMP_INSN_P (insn)) ++ { ++ EXECUTE_IF_SET_IN_BITMAP (bmp, 2, regno, iter) ++ { ++ set_regno_raw (reg, regno, REG_NREGS (reg)); ++ if (reg_overlap_mentioned_p (reg, PATTERN (insn))) ++ break; ++ } ++ if (GET_CODE (PATTERN (insn)) == SET) ++ { ++ rtx x = SET_DEST (PATTERN (insn)); ++ if (REG_P (x) && HARD_REGISTER_P (x)) ++ bitmap_clear_range (bmp, REGNO (x), REG_NREGS (x)); ++ else if (SUBREG_P (x) && HARD_REGISTER_P (SUBREG_REG (x))) ++ { ++ struct subreg_info info; ++ subreg_get_info (regno = REGNO (SUBREG_REG (x)), ++ GET_MODE (SUBREG_REG (x)), ++ SUBREG_BYTE (x), GET_MODE (x), &info); ++ if (!info.representable_p) ++ break; ++ bitmap_clear_range (bmp, regno + info.offset, info.nregs); ++ } ++ } ++ if (bitmap_empty_p (bmp)) ++ goto FALLTHRU; ++ } ++ else if (CALL_P (insn)) ++ EXECUTE_IF_SET_IN_BITMAP (bmp, 2, regno, iter) ++ if (call_used_or_fixed_reg_p (regno)) ++ break; ++ } ++ FAIL; ++FALLTHRU:; + }) + + (define_peephole2 +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0051-constantsynth-Add-new-3-insns-synthesis-patte.patch b/patches/gcc10.2/gcc-xtensa-0051-constantsynth-Add-new-3-insns-synthesis-patte.patch new file mode 100644 index 0000000..0f6d156 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0051-constantsynth-Add-new-3-insns-synthesis-patte.patch @@ -0,0 +1,91 @@ +From dc825d410b7a3025d3b902f83bb3e360ac42f477 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sat, 3 Sep 2022 12:27:51 +0900 +Subject: [PATCH] xtensa: constantsynth: Add new 3-insns synthesis pattern + +This patch adds a new 3-instructions constant synthesis pattern: + +- A value that can fit into a signed 12-bit after a number of either bitwise + left or right rotations: + => "MOVI(.N) Ax, simm12" + "SSAI (1 ... 11) or (21 ... 31)" + + "SRC Ax, Ax, Ax" + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_constantsynth): + Add new pattern for the abovementioned case. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/constsynth_3insns.c (test_4): + Add new test function. +--- + gcc/config/xtensa/xtensa.c | 31 +++++++++++++++++++ + .../gcc.target/xtensa/constsynth_3insns.c | 11 +++++++ + 2 files changed, 42 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 97291fc0f..baee55ce3 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1133,6 +1133,37 @@ xtensa_constantsynth (rtx dst, HOST_WIDE_INT srcval) + xtensa_constantsynth_rtx_ADDSUBX, + divisor)) + return 1; ++ ++ /* loading simm12 followed by left/right bitwise rotation: ++ MOVI + SSAI + SRC. */ ++ if ((srcval & 0x001FF800) == 0 ++ || (srcval & 0x001FF800) == 0x001FF800) ++ { ++ int32_t v; ++ ++ for (shift = 1; shift < 12; ++shift) ++ { ++ v = (int32_t)(((uint32_t)srcval >> shift) ++ | ((uint32_t)srcval << (32 - shift))); ++ if (xtensa_simm12b(v)) ++ { ++ emit_move_insn (dst, GEN_INT (v)); ++ emit_insn (gen_rotlsi3 (dst, dst, GEN_INT (shift))); ++ return 1; ++ } ++ } ++ for (shift = 1; shift < 12; ++shift) ++ { ++ v = (int32_t)(((uint32_t)srcval << shift) ++ | ((uint32_t)srcval >> (32 - shift))); ++ if (xtensa_simm12b(v)) ++ { ++ emit_move_insn (dst, GEN_INT (v)); ++ emit_insn (gen_rotrsi3 (dst, dst, GEN_INT (shift))); ++ return 1; ++ } ++ } ++ } + } + + return 0; +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c +index f3c4a1c7c..831288c7d 100644 +--- a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c +@@ -21,4 +21,15 @@ void test_3(int *p) + *p = 192437; + } + ++struct foo ++{ ++ unsigned int b : 10; ++ unsigned int g : 11; ++ unsigned int r : 11; ++}; ++void test_4(struct foo *p, unsigned int v) ++{ ++ p->g = v; ++} ++ + /* { dg-final { scan-assembler-not "l32r" } } */ +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0052-fix-builtin_apply-return-value.patch b/patches/gcc10.2/gcc-xtensa-0052-fix-builtin_apply-return-value.patch new file mode 100644 index 0000000..375f437 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0052-fix-builtin_apply-return-value.patch @@ -0,0 +1,81 @@ +From 4df06b8339667e15107034842185300cca85c51c Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Sat, 10 Sep 2022 17:31:07 -0700 +Subject: [PATCH] xtensa: gcc: fix builtin_apply return value + +xtensa may use up to 4 registers to return a value from a function, but +recognition of only one register in the xtensa_function_value_regno_p +and missing untyped_call pattern result in that only one register is +saved by the __builtin_apply and returned by the __builtin_apply_return. + +gcc/ + * config/xtensa/xtensa.c (xtensa_function_value_regno_p): + Recognize all 4 return registers. + * config/xtensa/xtensa.h (GP_RETURN_REG_COUNT): New definition. + * config/xtensa/xtensa.md (untyped_call): New pattern. +--- + gcc/config/xtensa/xtensa.c | 2 +- + gcc/config/xtensa/xtensa.h | 1 + + gcc/config/xtensa/xtensa.md | 21 +++++++++++++++++++++ + 3 files changed, 23 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index baee55ce3..ad4940913 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4508,7 +4508,7 @@ xtensa_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) + static bool + xtensa_function_value_regno_p (const unsigned int regno) + { +- return (regno == GP_RETURN); ++ return (regno >= GP_RETURN && regno < GP_RETURN + GP_RETURN_REG_COUNT); + } + + /* The static chain is passed in memory. Provide rtx giving 'mem' +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index ee2238606..3a986fa1c 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -477,6 +477,7 @@ enum reg_class + point, and values of coprocessor and user-defined modes. */ + #define GP_RETURN (GP_REG_FIRST + 2 + WINDOW_SIZE) + #define GP_OUTGOING_RETURN (GP_REG_FIRST + 2) ++#define GP_RETURN_REG_COUNT 4 + + /* Symbolic macros for the first/last argument registers. */ + #define GP_ARG_FIRST (GP_REG_FIRST + 2) +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index ec4a69e30..c18640b25 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2310,6 +2310,27 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_expand "untyped_call" ++ [(parallel [(call (match_operand 0 "") ++ (const_int 0)) ++ (match_operand 1 "") ++ (match_operand 2 "")])] ++ "" ++{ ++ int i; ++ ++ emit_call_insn (gen_call (operands[0], const0_rtx)); ++ ++ for (i = 0; i < XVECLEN (operands[2], 0); i++) ++ { ++ rtx set = XVECEXP (operands[2], 0, i); ++ emit_move_insn (SET_DEST (set), SET_SRC (set)); ++ } ++ ++ emit_insn (gen_blockage ()); ++ DONE; ++}) ++ + (define_insn "entry" + [(set (reg:SI A1_REG) + (unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")] +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0053-implement-MI-thunk-generation-for-call0-API.patch b/patches/gcc10.2/gcc-xtensa-0053-implement-MI-thunk-generation-for-call0-API.patch new file mode 100644 index 0000000..4384596 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0053-implement-MI-thunk-generation-for-call0-API.patch @@ -0,0 +1,164 @@ +From 3778aeadea08b3f630b89d711f634f967e8d24b3 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Thu, 14 Jul 2022 02:39:59 -0700 +Subject: [PATCH] xtensa: gcc: implement MI thunk generation for call0 ABI + +gcc/ + * config/xtensa/xtensa.c (xtensa_can_output_mi_thunk) + (xtensa_output_mi_thunk): New functions. + (TARGET_ASM_CAN_OUTPUT_MI_THUNK) + (TARGET_ASM_OUTPUT_MI_THUNK): New macro definitions. + (xtensa_prepare_expand_call): Use fixed register a8 as temporary + when called with reload_completed set to 1. +--- + gcc/config/xtensa/xtensa.c | 115 ++++++++++++++++++++++++++++++++++++- + 1 file changed, 114 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index ad4940913..0ccc63fdf 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -189,6 +189,14 @@ static HOST_WIDE_INT xtensa_constant_alignment (const_tree, HOST_WIDE_INT); + static HOST_WIDE_INT xtensa_starting_frame_offset (void); + static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); + static bool xtensa_function_ok_for_sibcall (tree, tree); ++static bool xtensa_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED, ++ HOST_WIDE_INT delta ATTRIBUTE_UNUSED, ++ HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED, ++ const_tree function ATTRIBUTE_UNUSED); ++static void xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, ++ HOST_WIDE_INT delta, ++ HOST_WIDE_INT vcall_offset, ++ tree function); + + + +@@ -342,6 +350,12 @@ static bool xtensa_function_ok_for_sibcall (tree, tree); + #undef TARGET_FUNCTION_OK_FOR_SIBCALL + #define TARGET_FUNCTION_OK_FOR_SIBCALL xtensa_function_ok_for_sibcall + ++#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK ++#define TARGET_ASM_CAN_OUTPUT_MI_THUNK xtensa_can_output_mi_thunk ++ ++#undef TARGET_ASM_OUTPUT_MI_THUNK ++#define TARGET_ASM_OUTPUT_MI_THUNK xtensa_output_mi_thunk ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + +@@ -2164,7 +2178,16 @@ xtensa_prepare_expand_call (int callop, rtx *operands) + addr = gen_sym_PLT (addr); + + if (!call_insn_operand (addr, VOIDmode)) +- XEXP (operands[callop], 0) = copy_to_mode_reg (Pmode, addr); ++ { ++ /* This may be called while generating MI thunk when we pretend ++ that reload is over. Use a8 as a temporary register in that case. */ ++ rtx reg = can_create_pseudo_p () ++ ? copy_to_mode_reg (Pmode, addr) ++ : copy_to_suggested_reg (addr, ++ gen_rtx_REG (Pmode, A8_REG), ++ Pmode); ++ XEXP (operands[callop], 0) = reg; ++ } + } + + +@@ -5008,4 +5031,94 @@ xtensa_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, tree exp ATTRIBUTE_U + return true; + } + ++static bool ++xtensa_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED, ++ HOST_WIDE_INT delta ATTRIBUTE_UNUSED, ++ HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED, ++ const_tree function ATTRIBUTE_UNUSED) ++{ ++ if (TARGET_WINDOWED_ABI) ++ return false; ++ ++ return true; ++} ++ ++/* Output code to add DELTA to the first argument, and then jump ++ to FUNCTION. Used for C++ multiple inheritance. */ ++static void ++xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, ++ HOST_WIDE_INT delta, ++ HOST_WIDE_INT vcall_offset, ++ tree function) ++{ ++ rtx this_rtx; ++ rtx funexp; ++ rtx_insn *insn; ++ int this_reg_no; ++ rtx temp0 = gen_rtx_REG (Pmode, A9_REG); ++ const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk)); ++ ++ reload_completed = 1; ++ ++ if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) ++ this_reg_no = 3; ++ else ++ this_reg_no = 2; ++ ++ this_rtx = gen_rtx_REG (Pmode, A0_REG + this_reg_no); ++ ++ if (delta) ++ { ++ if (xtensa_simm8 (delta)) ++ emit_insn (gen_addsi3 (this_rtx, this_rtx, GEN_INT (delta))); ++ else ++ { ++ emit_move_insn (temp0, GEN_INT (delta)); ++ emit_insn (gen_addsi3 (this_rtx, this_rtx, temp0)); ++ } ++ } ++ ++ if (vcall_offset) ++ { ++ rtx temp1 = gen_rtx_REG (Pmode, A0_REG + 10); ++ rtx addr = temp1; ++ ++ emit_move_insn (temp0, gen_rtx_MEM (Pmode, this_rtx)); ++ if (xtensa_uimm8x4 (vcall_offset)) ++ addr = plus_constant (Pmode, temp0, vcall_offset); ++ else if (xtensa_simm8 (vcall_offset)) ++ emit_insn (gen_addsi3 (temp1, temp0, GEN_INT (vcall_offset))); ++ else ++ { ++ emit_move_insn (temp1, GEN_INT (vcall_offset)); ++ emit_insn (gen_addsi3 (temp1, temp0, temp1)); ++ } ++ emit_move_insn (temp1, gen_rtx_MEM (Pmode, addr)); ++ emit_insn (gen_add2_insn (this_rtx, temp1)); ++ } ++ ++ /* Generate a tail call to the target function. */ ++ if (!TREE_USED (function)) ++ { ++ assemble_external (function); ++ TREE_USED (function) = 1; ++ } ++ ++ funexp = XEXP (DECL_RTL (function), 0); ++ funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); ++ insn = emit_call_insn (gen_sibcall (funexp, const0_rtx)); ++ SIBLING_CALL_P (insn) = 1; ++ ++ insn = get_insns (); ++ shorten_branches (insn); ++ assemble_start_function (thunk, fnname); ++ final_start_function (insn, file, 1); ++ final (insn, file, 1); ++ final_end_function (); ++ assemble_end_function (thunk, fnname); ++ ++ /* Stop pretending to be a post-reload pass. */ ++ reload_completed = 0; ++} ++ + #include "gt-xtensa.h" +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0054-enable-section-anchors-support.patch b/patches/gcc10.2/gcc-xtensa-0054-enable-section-anchors-support.patch new file mode 100644 index 0000000..cca2ff4 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0054-enable-section-anchors-support.patch @@ -0,0 +1,29 @@ +From 6e38872f54f49c0b3b3f72668dcdbfa66007ceb6 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Fri, 16 Sep 2022 20:56:39 -0700 +Subject: [PATCH] xtensa: gcc: enable section anchors support + +gcc/ + * config/xtensa/xtensa.c (TARGET_MAX_ANCHOR_OFFSET): New + definition. +--- + gcc/config/xtensa/xtensa.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 0ccc63fdf..ba4dd47c5 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -356,6 +356,9 @@ static void xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + #undef TARGET_ASM_OUTPUT_MI_THUNK + #define TARGET_ASM_OUTPUT_MI_THUNK xtensa_output_mi_thunk + ++#undef TARGET_MAX_ANCHOR_OFFSET ++#define TARGET_MAX_ANCHOR_OFFSET 1020 ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0055-Prepare-the-transition-from-Reload-to-LRA.patch b/patches/gcc10.2/gcc-xtensa-0055-Prepare-the-transition-from-Reload-to-LRA.patch new file mode 100644 index 0000000..11f1d50 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0055-Prepare-the-transition-from-Reload-to-LRA.patch @@ -0,0 +1,301 @@ +From 2fa3f80877ab2b7a06403097c09fbc4bc892d6e3 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 14 Oct 2022 19:43:23 +0900 +Subject: [PATCH] xtensa: Prepare the transition from Reload to LRA + +This patch provides the first step in the transition from Reload to LRA +in Xtensa. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h + (xtensa_split1_finished_p, xtensa_split_DI_reg_imm): New prototypes. + * config/xtensa/xtensa.c + (xtensa_split1_finished_p, xtensa_split_DI_reg_imm, xtensa_lra_p): + New functions. + (TARGET_LRA_P): Replace the dummy hook with xtensa_lra_p. + (xt_true_regnum): Rework. + * config/xtensa/xtensa.h (CALL_REALLY_USED_REGISTERS): + Switch from CALL_USED_REGISTERS, and revise the comment. + * config/xtensa/constraints.md (Y): + Use !xtensa_split1_finished_p() instead of can_create_pseudo_p(). + * config/xtensa/predicates.md (move_operand): Ditto. + * config/xtensa/xtensa.md: Add two new split patterns: + - splits DImode immediate load into two SImode ones + - puts out-of-constraint SImode constants into the constant pool + * config/xtensa/xtensa.opt (-mlra): New target-specific option + for testing purpose. +--- + gcc/config/xtensa/constraints.md | 2 +- + gcc/config/xtensa/predicates.md | 2 +- + gcc/config/xtensa/xtensa-protos.h | 2 + + gcc/config/xtensa/xtensa.c | 69 ++++++++++++++++++++++++++----- + gcc/config/xtensa/xtensa.h | 8 ++-- + gcc/config/xtensa/xtensa.md | 36 ++++++++++++---- + gcc/config/xtensa/xtensa.opt | 4 ++ + 7 files changed, 99 insertions(+), 24 deletions(-) + +diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md +index f590dcf3a..a2cb57000 100644 +--- a/gcc/config/xtensa/constraints.md ++++ b/gcc/config/xtensa/constraints.md +@@ -121,7 +121,7 @@ + (ior (and (match_code "const_int,const_double,const,symbol_ref,label_ref") + (match_test "TARGET_AUTO_LITPOOLS")) + (and (match_code "const_int") +- (match_test "can_create_pseudo_p ()")))) ++ (match_test "! xtensa_split1_finished_p ()")))) + + ;; Memory constraints. Do not use define_memory_constraint here. Doing so + ;; causes reload to force some constants into the constant pool, but since +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index 633cc6264..09d9a5770 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -149,7 +149,7 @@ + (ior (and (match_code "const_int") + (match_test "(GET_MODE_CLASS (mode) == MODE_INT + && xtensa_simm12b (INTVAL (op))) +- || can_create_pseudo_p ()")) ++ || ! xtensa_split1_finished_p ()")) + (and (match_code "const_int,const_double,const,symbol_ref,label_ref") + (match_test "(TARGET_CONST16 || TARGET_AUTO_LITPOOLS) + && CONSTANT_P (op) +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 75ed3bfb0..63b147a90 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -58,6 +58,8 @@ extern char *xtensa_emit_call (int, rtx *); + extern char *xtensa_emit_sibcall (int, rtx *); + extern bool xtensa_tls_referenced_p (rtx); + extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); ++extern bool xtensa_split1_finished_p (void); ++extern void xtensa_split_DI_reg_imm (rtx *); + + #ifdef TREE_CODE + extern void init_cumulative_args (CUMULATIVE_ARGS *, int); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index ba4dd47c5..658d19924 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -56,6 +56,7 @@ along with GCC; see the file COPYING3. If not see + #include "hw-doloop.h" + #include "rtl-iter.h" + #include "insn-attr.h" ++#include "tree-pass.h" + + /* This file should be included last. */ + #include "target-def.h" +@@ -197,6 +198,7 @@ static void xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta, + HOST_WIDE_INT vcall_offset, + tree function); ++static bool xtensa_lra_p (void); + + + +@@ -291,7 +293,7 @@ static void xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + #define TARGET_CANNOT_FORCE_CONST_MEM xtensa_cannot_force_const_mem + + #undef TARGET_LRA_P +-#define TARGET_LRA_P hook_bool_void_false ++#define TARGET_LRA_P xtensa_lra_p + + #undef TARGET_LEGITIMATE_ADDRESS_P + #define TARGET_LEGITIMATE_ADDRESS_P xtensa_legitimate_address_p +@@ -482,21 +484,30 @@ xtensa_mask_immediate (HOST_WIDE_INT v) + int + xt_true_regnum (rtx x) + { +- if (GET_CODE (x) == REG) ++ if (REG_P (x)) + { +- if (reg_renumber +- && REGNO (x) >= FIRST_PSEUDO_REGISTER +- && reg_renumber[REGNO (x)] >= 0) ++ if (! HARD_REGISTER_P (x) ++ && reg_renumber ++ && (lra_in_progress || reg_renumber[REGNO (x)] >= 0)) + return reg_renumber[REGNO (x)]; + return REGNO (x); + } +- if (GET_CODE (x) == SUBREG) ++ if (SUBREG_P (x)) + { + int base = xt_true_regnum (SUBREG_REG (x)); +- if (base >= 0 && base < FIRST_PSEUDO_REGISTER) +- return base + subreg_regno_offset (REGNO (SUBREG_REG (x)), +- GET_MODE (SUBREG_REG (x)), +- SUBREG_BYTE (x), GET_MODE (x)); ++ ++ if (base >= 0 ++ && HARD_REGISTER_NUM_P (base)) ++ { ++ struct subreg_info info; ++ ++ subreg_get_info (lra_in_progress ++ ? (unsigned) base : REGNO (SUBREG_REG (x)), ++ GET_MODE (SUBREG_REG (x)), ++ SUBREG_BYTE (x), GET_MODE (x), &info); ++ if (info.representable_p) ++ return base + info.offset; ++ } + } + return -1; + } +@@ -2468,6 +2479,36 @@ xtensa_shlrd_which_direction (rtx op0, rtx op1) + } + + ++/* Return true after "split1" pass has been finished. */ ++ ++bool ++xtensa_split1_finished_p (void) ++{ ++ return cfun && (cfun->curr_properties & PROP_rtl_split_insns); ++} ++ ++ ++/* Split a DImode pair of reg (operand[0]) and const_int (operand[1]) into ++ two SImode pairs, the low-part (operands[0] and [1]) and the high-part ++ (operands[2] and [3]). */ ++ ++void ++xtensa_split_DI_reg_imm (rtx *operands) ++{ ++ rtx lowpart, highpart; ++ ++ if (WORDS_BIG_ENDIAN) ++ split_double (operands[1], &highpart, &lowpart); ++ else ++ split_double (operands[1], &lowpart, &highpart); ++ ++ operands[3] = highpart; ++ operands[2] = gen_highpart (SImode, operands[0]); ++ operands[1] = lowpart; ++ operands[0] = gen_lowpart (SImode, operands[0]); ++} ++ ++ + /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ + + static bool +@@ -5124,4 +5165,12 @@ xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + reload_completed = 0; + } + ++/* Implement TARGET_LRA_P. */ ++ ++static bool ++xtensa_lra_p (void) ++{ ++ return TARGET_LRA; ++} ++ + #include "gt-xtensa.h" +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index 3a986fa1c..4b08ee5c1 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -228,7 +228,7 @@ along with GCC; see the file COPYING3. If not see + } + + /* 1 for registers not available across function calls. +- These must include the FIXED_REGISTERS and also any ++ These need not include the FIXED_REGISTERS but must any + registers that can be used without being saved. + The latter must include the registers where values are returned + and the register where structure-value addresses are passed. +@@ -241,10 +241,10 @@ along with GCC; see the file COPYING3. If not see + + Proper values are computed in TARGET_CONDITIONAL_REGISTER_USAGE. */ + +-#define CALL_USED_REGISTERS \ ++#define CALL_REALLY_USED_REGISTERS \ + { \ +- 1, 1, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 2, 2, 2, 2, \ +- 1, 1, 1, \ ++ 1, 0, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 2, 2, 2, 2, \ ++ 0, 0, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, \ + } +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index c18640b25..7c248ed2d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -943,14 +943,9 @@ + because of offering further optimization opportunities. */ + if (register_operand (operands[0], DImode)) + { +- rtx lowpart, highpart; +- +- if (TARGET_BIG_ENDIAN) +- split_double (operands[1], &highpart, &lowpart); +- else +- split_double (operands[1], &lowpart, &highpart); +- emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), lowpart)); +- emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), highpart)); ++ xtensa_split_DI_reg_imm (operands); ++ emit_move_insn (operands[0], operands[1]); ++ emit_move_insn (operands[2], operands[3]); + DONE; + } + +@@ -984,6 +979,19 @@ + } + }) + ++(define_split ++ [(set (match_operand:DI 0 "register_operand") ++ (match_operand:DI 1 "const_int_operand"))] ++ "!TARGET_CONST16 && !TARGET_AUTO_LITPOOLS ++ && ! xtensa_split1_finished_p ()" ++ [(set (match_dup 0) ++ (match_dup 1)) ++ (set (match_dup 2) ++ (match_dup 3))] ++{ ++ xtensa_split_DI_reg_imm (operands); ++}) ++ + ;; 32-bit Integer moves + + (define_expand "movsi" +@@ -1020,6 +1028,18 @@ + (set_attr "mode" "SI") + (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")]) + ++(define_split ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operand:SI 1 "const_int_operand"))] ++ "!TARGET_CONST16 && !TARGET_AUTO_LITPOOLS ++ && ! xtensa_split1_finished_p () ++ && ! xtensa_simm12b (INTVAL (operands[1]))" ++ [(set (match_dup 0) ++ (match_dup 1))] ++{ ++ operands[1] = force_const_mem (SImode, operands[1]); ++}) ++ + (define_split + [(set (match_operand:SI 0 "register_operand") + (match_operand:SI 1 "constantpool_operand"))] +diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt +index 97aa44f92..106af4e30 100644 +--- a/gcc/config/xtensa/xtensa.opt ++++ b/gcc/config/xtensa/xtensa.opt +@@ -34,6 +34,10 @@ mextra-l32r-costs= + Target RejectNegative Joined UInteger Var(xtensa_extra_l32r_costs) Init(0) + Set extra memory access cost for L32R instruction, in clock-cycle units. + ++mlra ++Target Mask(LRA) ++Use LRA instead of reload (transitional). ++ + mtarget-align + Target + Automatically align branch targets to reduce branch penalties. +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0056-Make-register-A0-allocable-for-the-CALL0-ABI.patch b/patches/gcc10.2/gcc-xtensa-0056-Make-register-A0-allocable-for-the-CALL0-ABI.patch new file mode 100644 index 0000000..9f0ab74 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0056-Make-register-A0-allocable-for-the-CALL0-ABI.patch @@ -0,0 +1,66 @@ +From d262f47d2f1c0dec78b59b0b3eec26d17b60bf83 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 21 Oct 2022 11:58:35 +0900 +Subject: [PATCH] xtensa: Make register A0 allocable for the CALL0 ABI + +This patch offers an additional allocable register by RA for the CALL0 +ABI. + +> Register a0 holds the return address upon entry to a function, but +> unlike the windowed register ABI, it is not reserved for this purpose +> and may hold other values after the return address has been saved. + - Xtensa ISA Reference Manual, + 8.1.2 "CALL0 Register Usage and Stack Layout" [p.589] + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_conditional_register_usage): + Remove register A0 from FIXED_REGS if the CALL0 ABI. + (xtensa_expand_epilogue): Change to emit '(use (reg:SI A0_REG))' + unconditionally after restoring callee-saved registers for + sibling-call functions, in order to prevent misleading that + register A0 is free to use. +--- + gcc/config/xtensa/xtensa.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 658d19924..767264641 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -3506,15 +3506,14 @@ xtensa_expand_epilogue (bool sibcall_p) + if (xtensa_call_save_reg(regno)) + { + rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); +- rtx reg; + + offset -= UNITS_PER_WORD; +- emit_move_insn (reg = gen_rtx_REG (SImode, regno), ++ emit_move_insn (gen_rtx_REG (SImode, regno), + gen_frame_mem (SImode, x)); +- if (regno == A0_REG && sibcall_p) +- emit_use (reg); + } + } ++ if (sibcall_p) ++ emit_use (gen_rtx_REG (SImode, A0_REG)); + + if (cfun->machine->current_frame_size > 0) + { +@@ -5005,6 +5004,13 @@ xtensa_conditional_register_usage (void) + /* Remove hard FP register from the preferred reload registers set. */ + CLEAR_HARD_REG_BIT (reg_class_contents[(int)RL_REGS], + HARD_FRAME_POINTER_REGNUM); ++ ++ /* Register A0 holds the return address upon entry to a function ++ for the CALL0 ABI, but unlike the windowed register ABI, it is ++ not reserved for this purpose and may hold other values after ++ the return address has been saved. */ ++ if (!TARGET_WINDOWED_ABI) ++ fixed_regs[A0_REG] = 0; + } + + /* Map hard register number to register class */ +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0057-Fix-out-of-bounds-array-access-in-the-movdi-p.patch b/patches/gcc10.2/gcc-xtensa-0057-Fix-out-of-bounds-array-access-in-the-movdi-p.patch new file mode 100644 index 0000000..ec28936 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0057-Fix-out-of-bounds-array-access-in-the-movdi-p.patch @@ -0,0 +1,74 @@ +From 3092ce3d24acb6ca10d8c980fb49685832566ae4 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 26 Oct 2022 15:27:51 +0900 +Subject: [PATCH] xtensa: Fix out-of-bounds array access in the movdi pattern + +The following new warnings were introduced in the commit +4f3f0296acbb ("xtensa: Prepare the transition from Reload to LRA"): + +gcc/config/xtensa/xtensa.md:945:26: error: array subscript 3 is above + array bounds of 'rtx_def* [2]' [-Werror=array-bounds] + 945 | emit_move_insn (operands[2], operands[3]); +gcc/config/xtensa/xtensa.md:945:26: error: array subscript 2 is above + array bounds of 'rtx_def* [2]' [-Werror=array-bounds] + 945 | emit_move_insn (operands[2], operands[3]); + +From gcc/insn-emit.cc (generated by building): + +> /* ../../gcc/config/xtensa/xtensa.md:932 */ +> rtx +> gen_movdi (rtx operand0, +> rtx operand1) +> { +> rtx_insn *_val = 0; +> start_sequence (); +> { +> rtx operands[2]; // only 2 elements +> operands[0] = operand0; +> operands[1] = operand1; +> #define FAIL return (end_sequence (), _val) +> #define DONE return (_val = get_insns (), end_sequence (), _val) +> #line 936 "../../gcc/config/xtensa/xtensa.md" +> { +> if (CONSTANT_P (operands[1])) +> { +> /* Split in halves if 64-bit Const-to-Reg moves +> because of offering further optimization opportunities. */ +> if (register_operand (operands[0], DImode)) +> { +> xtensa_split_DI_reg_imm (operands); // out-of-bounds! +> emit_move_insn (operands[0], operands[1]); +> emit_move_insn (operands[2], operands[3]); // out-of-bounds! +> DONE; +> } + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (movdi): + Copy operands[0...1] to ops[0...3] and then use the latter before + calling xtensa_split_DI_reg_imm() and emitting insns. +--- + gcc/config/xtensa/xtensa.md | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 7c248ed2d..31e5f1b28 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -943,9 +943,10 @@ + because of offering further optimization opportunities. */ + if (register_operand (operands[0], DImode)) + { +- xtensa_split_DI_reg_imm (operands); +- emit_move_insn (operands[0], operands[1]); +- emit_move_insn (operands[2], operands[3]); ++ rtx ops[4] = { operands[0], operands[1] }; ++ xtensa_split_DI_reg_imm (ops); ++ emit_move_insn (ops[0], ops[1]); ++ emit_move_insn (ops[2], ops[3]); + DONE; + } + +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0058-Tabify-and-trim-trailing-spaces.patch b/patches/gcc10.2/gcc-xtensa-0058-Tabify-and-trim-trailing-spaces.patch new file mode 100644 index 0000000..6ae21e1 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0058-Tabify-and-trim-trailing-spaces.patch @@ -0,0 +1,576 @@ +From b326051c7a6d15e15b4410ef658d2e67c0a604af Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 27 Dec 2022 15:30:12 +0900 +Subject: [PATCH] xtensa: Tabify, and trim trailing spaces + +Cosmetic and no functional changes. + +gcc/ChangeLog: + + * config/xtensa/elf.h: Tabify, and trim trailing spaces. + * config/xtensa/linux.h: Likewise. + * config/xtensa/uclinux.h: Likewise. + * config/xtensa/xtensa.c: Likewise. + * config/xtensa/xtensa.h: Likewise. + * config/xtensa/xtensa.md: Likewise. +--- + gcc/config/xtensa/elf.h | 32 ++++++------ + gcc/config/xtensa/linux.h | 1 - + gcc/config/xtensa/uclinux.h | 1 - + gcc/config/xtensa/xtensa.c | 85 ++++++++++++++++---------------- + gcc/config/xtensa/xtensa.h | 6 +-- + gcc/config/xtensa/xtensa.md | 98 ++++++++++++++++++------------------- + 6 files changed, 110 insertions(+), 113 deletions(-) + +diff --git a/gcc/config/xtensa/elf.h b/gcc/config/xtensa/elf.h +index 6fd589fed..e0d1d7275 100644 +--- a/gcc/config/xtensa/elf.h ++++ b/gcc/config/xtensa/elf.h +@@ -57,7 +57,7 @@ along with GCC; see the file COPYING3. If not see + "crt1-sim%O%s crt0%O%s crti%O%s crtbegin%O%s _vectors%O%s" + + #undef ENDFILE_SPEC +-#define ENDFILE_SPEC "crtend%O%s crtn%O%s" ++#define ENDFILE_SPEC "crtend%O%s crtn%O%s" + + #undef LINK_SPEC + #define LINK_SPEC \ +@@ -82,19 +82,17 @@ along with GCC; see the file COPYING3. If not see + /* Search for headers in $tooldir/arch/include and for libraries and + startfiles in $tooldir/arch/lib. */ + #define GCC_DRIVER_HOST_INITIALIZATION \ +-do \ +-{ \ +- char *tooldir, *archdir; \ +- tooldir = concat (tooldir_base_prefix, spec_machine, \ +- dir_separator_str, NULL); \ +- if (!IS_ABSOLUTE_PATH (tooldir)) \ +- tooldir = concat (standard_exec_prefix, spec_machine, dir_separator_str, \ +- spec_version, dir_separator_str, tooldir, NULL); \ +- archdir = concat (tooldir, "arch", dir_separator_str, NULL); \ +- add_prefix (&startfile_prefixes, \ +- concat (archdir, "lib", dir_separator_str, NULL), \ +- "GCC", PREFIX_PRIORITY_LAST, 0, 1); \ +- add_prefix (&include_prefixes, archdir, \ +- "GCC", PREFIX_PRIORITY_LAST, 0, 0); \ +- } \ +-while (0) ++ do { \ ++ char *tooldir, *archdir; \ ++ tooldir = concat (tooldir_base_prefix, spec_machine, \ ++ dir_separator_str, NULL); \ ++ if (!IS_ABSOLUTE_PATH (tooldir)) \ ++ tooldir = concat (standard_exec_prefix, spec_machine, dir_separator_str, \ ++ spec_version, dir_separator_str, tooldir, NULL); \ ++ archdir = concat (tooldir, "arch", dir_separator_str, NULL); \ ++ add_prefix (&startfile_prefixes, \ ++ concat (archdir, "lib", dir_separator_str, NULL), \ ++ "GCC", PREFIX_PRIORITY_LAST, 0, 1); \ ++ add_prefix (&include_prefixes, archdir, \ ++ "GCC", PREFIX_PRIORITY_LAST, 0, 0); \ ++ } while (0) +diff --git a/gcc/config/xtensa/linux.h b/gcc/config/xtensa/linux.h +index 49796c97f..51ea065bd 100644 +--- a/gcc/config/xtensa/linux.h ++++ b/gcc/config/xtensa/linux.h +@@ -65,4 +65,3 @@ along with GCC; see the file COPYING3. If not see + #define XTENSA_ALWAYS_PIC 1 + + #undef DBX_REGISTER_NUMBER +- +diff --git a/gcc/config/xtensa/uclinux.h b/gcc/config/xtensa/uclinux.h +index 64ba26f39..51b6f2f95 100644 +--- a/gcc/config/xtensa/uclinux.h ++++ b/gcc/config/xtensa/uclinux.h +@@ -66,4 +66,3 @@ along with GCC; see the file COPYING3. If not see + #define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function + + #undef DBX_REGISTER_NUMBER +- +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 767264641..d3dafa4aa 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -176,7 +176,7 @@ static bool constantpool_address_p (const_rtx addr); + static bool xtensa_legitimate_constant_p (machine_mode, rtx); + static void xtensa_reorg (void); + static bool xtensa_can_use_doloop_p (const widest_int &, const widest_int &, +- unsigned int, bool); ++ unsigned int, bool); + static const char *xtensa_invalid_within_doloop (const rtx_insn *); + + static bool xtensa_member_type_forces_blk (const_tree, +@@ -2105,7 +2105,7 @@ xtensa_emit_loop_end (rtx_insn *insn, rtx *operands) + done = 1; + } + break; +- } ++ } + } + + output_asm_insn ("%1_LEND:", operands); +@@ -2305,7 +2305,7 @@ xtensa_tls_module_base (void) + xtensa_tls_module_base_symbol = + gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_"); + SYMBOL_REF_FLAGS (xtensa_tls_module_base_symbol) +- |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; ++ |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; + } + + return xtensa_tls_module_base_symbol; +@@ -3444,7 +3444,7 @@ xtensa_expand_prologue (void) + } + } + else +- { ++ { + insn = emit_insn (gen_movsi (hard_frame_pointer_rtx, + stack_pointer_rtx)); + if (!TARGET_WINDOWED_ABI) +@@ -3567,11 +3567,12 @@ xtensa_set_return_address (rtx address, rtx scratch) + gen_rtx_REG (SImode, A0_REG)); + rtx insn; + +- if (total_size > 1024) { +- emit_move_insn (scratch, GEN_INT (total_size - UNITS_PER_WORD)); +- emit_insn (gen_addsi3 (scratch, frame, scratch)); +- a0_addr = scratch; +- } ++ if (total_size > 1024) ++ { ++ emit_move_insn (scratch, GEN_INT (total_size - UNITS_PER_WORD)); ++ emit_insn (gen_addsi3 (scratch, frame, scratch)); ++ a0_addr = scratch; ++ } + + insn = emit_move_insn (gen_frame_mem (SImode, a0_addr), address); + RTX_FRAME_RELATED_P (insn) = 1; +@@ -3853,8 +3854,8 @@ xtensa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + /* Check if the argument is in registers: + + if ((AP).__va_ndx <= __MAX_ARGS_IN_REGISTERS * 4 +- && !must_pass_in_stack (type)) +- __array = (AP).__va_reg; */ ++ && !must_pass_in_stack (type)) ++ __array = (AP).__va_reg; */ + + array = create_tmp_var (ptr_type_node); + +@@ -4550,8 +4551,8 @@ xtensa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) + /* Worker function for TARGET_FUNCTION_VALUE. */ + + rtx +-xtensa_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED, +- bool outgoing) ++xtensa_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED, ++ bool outgoing) + { + return gen_rtx_REG ((INTEGRAL_TYPE_P (valtype) + && TYPE_PRECISION (valtype) < BITS_PER_WORD) +@@ -4754,7 +4755,7 @@ xtensa_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x) + + static bool + xtensa_can_use_doloop_p (const widest_int &, const widest_int &, +- unsigned int loop_depth, bool entered_at_top) ++ unsigned int loop_depth, bool entered_at_top) + { + /* Considering limitations in the hardware, only use doloop + for innermost loops which must be entered from the top. */ +@@ -4793,32 +4794,32 @@ hwloop_optimize (hwloop_info loop) + if (loop->depth > 1) + { + if (dump_file) +- fprintf (dump_file, ";; loop %d is not innermost\n", +- loop->loop_no); ++ fprintf (dump_file, ";; loop %d is not innermost\n", ++ loop->loop_no); + return false; + } + + if (!loop->incoming_dest) + { + if (dump_file) +- fprintf (dump_file, ";; loop %d has more than one entry\n", +- loop->loop_no); ++ fprintf (dump_file, ";; loop %d has more than one entry\n", ++ loop->loop_no); + return false; + } + + if (loop->incoming_dest != loop->head) + { + if (dump_file) +- fprintf (dump_file, ";; loop %d is not entered from head\n", +- loop->loop_no); ++ fprintf (dump_file, ";; loop %d is not entered from head\n", ++ loop->loop_no); + return false; + } + + if (loop->has_call || loop->has_asm) + { + if (dump_file) +- fprintf (dump_file, ";; loop %d has invalid insn\n", +- loop->loop_no); ++ fprintf (dump_file, ";; loop %d has invalid insn\n", ++ loop->loop_no); + return false; + } + +@@ -4826,8 +4827,8 @@ hwloop_optimize (hwloop_info loop) + if (loop->iter_reg_used || loop->iter_reg_used_outside) + { + if (dump_file) +- fprintf (dump_file, ";; loop %d uses iterator\n", +- loop->loop_no); ++ fprintf (dump_file, ";; loop %d uses iterator\n", ++ loop->loop_no); + return false; + } + +@@ -4839,8 +4840,8 @@ hwloop_optimize (hwloop_info loop) + if (!insn) + { + if (dump_file) +- fprintf (dump_file, ";; loop %d start_label not before loop_end\n", +- loop->loop_no); ++ fprintf (dump_file, ";; loop %d start_label not before loop_end\n", ++ loop->loop_no); + return false; + } + +@@ -4864,8 +4865,8 @@ hwloop_optimize (hwloop_info loop) + start_sequence (); + + insn = emit_insn (gen_zero_cost_loop_start (loop->iter_reg, +- loop->start_label, +- loop->iter_reg)); ++ loop->start_label, ++ loop->iter_reg)); + + seq = get_insns (); + +@@ -4881,21 +4882,21 @@ hwloop_optimize (hwloop_info loop) + seq = emit_label_before (gen_label_rtx (), seq); + new_bb = create_basic_block (seq, insn, entry_bb); + FOR_EACH_EDGE (e, ei, loop->incoming) +- { +- if (!(e->flags & EDGE_FALLTHRU)) +- redirect_edge_and_branch_force (e, new_bb); +- else +- redirect_edge_succ (e, new_bb); +- } ++ { ++ if (!(e->flags & EDGE_FALLTHRU)) ++ redirect_edge_and_branch_force (e, new_bb); ++ else ++ redirect_edge_succ (e, new_bb); ++ } + + make_edge (new_bb, loop->head, 0); + } + else + { + while (DEBUG_INSN_P (entry_after) +- || (NOTE_P (entry_after) ++ || (NOTE_P (entry_after) + && NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK)) +- entry_after = PREV_INSN (entry_after); ++ entry_after = PREV_INSN (entry_after); + + emit_insn_after (seq, entry_after); + } +@@ -4916,15 +4917,15 @@ hwloop_fail (hwloop_info loop) + rtx_insn *insn = loop->loop_end; + + emit_insn_before (gen_addsi3 (loop->iter_reg, +- loop->iter_reg, +- constm1_rtx), +- loop->loop_end); ++ loop->iter_reg, ++ constm1_rtx), ++ loop->loop_end); + + test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx); + insn = emit_jump_insn_before (gen_cbranchsi4 (test, +- loop->iter_reg, const0_rtx, +- loop->start_label), +- loop->loop_end); ++ loop->iter_reg, const0_rtx, ++ loop->start_label), ++ loop->loop_end); + + JUMP_LABEL (insn) = loop->start_label; + LABEL_NUSES (loop->start_label)++; +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index 4b08ee5c1..b5fec1cb3 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -63,7 +63,7 @@ along with GCC; see the file COPYING3. If not see + #define TARGET_S32C1I XCHAL_HAVE_S32C1I + #define TARGET_ABSOLUTE_LITERALS XSHAL_USE_ABSOLUTE_LITERALS + #define TARGET_THREADPTR XCHAL_HAVE_THREADPTR +-#define TARGET_LOOPS XCHAL_HAVE_LOOPS ++#define TARGET_LOOPS XCHAL_HAVE_LOOPS + #define TARGET_WINDOWED_ABI (XSHAL_ABI == XTHAL_ABI_WINDOWED) + #define TARGET_DEBUG XCHAL_HAVE_DEBUG + #define TARGET_L32R XCHAL_HAVE_L32R +@@ -297,7 +297,7 @@ extern int leaf_function; + + /* Coprocessor registers */ + #define BR_REG_FIRST 18 +-#define BR_REG_LAST 18 ++#define BR_REG_LAST 18 + #define BR_REG_NUM (BR_REG_LAST - BR_REG_FIRST + 1) + + /* 16 floating-point registers */ +@@ -743,7 +743,7 @@ typedef struct xtensa_args + + + /* Define output to appear before the constant pool. */ +-#define ASM_OUTPUT_POOL_PROLOGUE(FILE, FUNNAME, FUNDECL, SIZE) \ ++#define ASM_OUTPUT_POOL_PROLOGUE(FILE, FUNNAME, FUNDECL, SIZE) \ + do { \ + if ((SIZE) > 0 || !TARGET_WINDOWED_ABI) \ + { \ +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 31e5f1b28..08fb6f312 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -70,13 +70,13 @@ + + ;; This code iterator is for floating-point comparisons. + (define_code_iterator any_scc_sf [eq lt le uneq unlt unle unordered]) +-(define_code_attr scc_sf [(eq "oeq") (lt "olt") (le "ole") ++(define_code_attr scc_sf [(eq "oeq") (lt "olt") (le "ole") + (uneq "ueq") (unlt "ult") (unle "ule") + (unordered "un")]) + + ;; This iterator and attribute allow to combine most atomic operations. + (define_code_iterator ATOMIC [and ior xor plus minus mult]) +-(define_code_attr atomic [(and "and") (ior "ior") (xor "xor") ++(define_code_attr atomic [(and "and") (ior "ior") (xor "xor") + (plus "add") (minus "sub") (mult "nand")]) + + ;; This mode iterator allows the HI and QI patterns to be defined from +@@ -195,7 +195,7 @@ + + (define_insn "subsi3" + [(set (match_operand:SI 0 "register_operand" "=a") +- (minus:SI (match_operand:SI 1 "register_operand" "r") ++ (minus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")))] + "" + "sub\t%0, %1, %2" +@@ -434,7 +434,7 @@ + + (define_insn "si3" + [(set (match_operand:SI 0 "register_operand" "=a") +- (any_minmax:SI (match_operand:SI 1 "register_operand" "%r") ++ (any_minmax:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" "r")))] + "TARGET_MINMAX" + "\t%0, %1, %2" +@@ -507,7 +507,7 @@ + + (define_expand "bswapsi2" + [(set (match_operand:SI 0 "register_operand" "") +- (bswap:SI (match_operand:SI 1 "register_operand" "")))] ++ (bswap:SI (match_operand:SI 1 "register_operand" "")))] + "!optimize_debug && optimize > 1" + { + /* GIMPLE manual byte-swapping recognition is now activated. +@@ -1025,7 +1025,7 @@ + %v0s32i\t%1, %0 + rsr\t%0, ACCLO + wsr\t%1, ACCLO" +- [(set_attr "type" "move,move,move,load,store,store,move,move,move,move,move,load,load,store,rsr,wsr") ++ [(set_attr "type" "move,move,move,load,store,store,move,move,move,move,move,load,load,store,rsr,wsr") + (set_attr "mode" "SI") + (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")]) + +@@ -1175,7 +1175,7 @@ + "((register_operand (operands[0], SFmode) + || register_operand (operands[1], SFmode)) + && !(FP_REG_P (xt_true_regnum (operands[0])) +- && (constantpool_mem_p (operands[1]) || CONSTANT_P (operands[1]))))" ++ && (constantpool_mem_p (operands[1]) || CONSTANT_P (operands[1]))))" + "@ + mov.s\t%0, %1 + %v1lsi\t%0, %1 +@@ -1360,7 +1360,7 @@ + [(set (match_operand:SI 0 "register_operand" "=a,a") + (ashift:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:SI 2 "arith_operand" "J,r")))] +- "" ++ "" + "@ + slli\t%0, %1, %R2 + ssl\t%2\;sll\t%0, %1" +@@ -1946,13 +1946,13 @@ + + (define_insn "zero_cost_loop_start" + [(set (pc) +- (if_then_else (ne (match_operand:SI 2 "register_operand" "0") +- (const_int 1)) +- (label_ref (match_operand 1 "" "")) +- (pc))) ++ (if_then_else (ne (match_operand:SI 2 "register_operand" "0") ++ (const_int 1)) ++ (label_ref (match_operand 1 "" "")) ++ (pc))) + (set (match_operand:SI 0 "register_operand" "=a") +- (plus (match_dup 0) +- (const_int -1))) ++ (plus (match_dup 0) ++ (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_START)] + "TARGET_LOOPS && optimize" + "loop\t%0, %l1_LEND" +@@ -1962,13 +1962,13 @@ + + (define_insn "zero_cost_loop_end" + [(set (pc) +- (if_then_else (ne (match_operand:SI 2 "nonimmediate_operand" "0,0") +- (const_int 1)) +- (label_ref (match_operand 1 "" "")) +- (pc))) ++ (if_then_else (ne (match_operand:SI 2 "nonimmediate_operand" "0,0") ++ (const_int 1)) ++ (label_ref (match_operand 1 "" "")) ++ (pc))) + (set (match_operand:SI 0 "nonimmediate_operand" "=a,m") +- (plus (match_dup 0) +- (const_int -1))) ++ (plus (match_dup 0) ++ (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END) + (clobber (match_scratch:SI 3 "=X,&r"))] + "TARGET_LOOPS && optimize" +@@ -1979,13 +1979,13 @@ + + (define_insn "loop_end" + [(set (pc) +- (if_then_else (ne (match_operand:SI 2 "register_operand" "0") +- (const_int 1)) +- (label_ref (match_operand 1 "" "")) +- (pc))) ++ (if_then_else (ne (match_operand:SI 2 "register_operand" "0") ++ (const_int 1)) ++ (label_ref (match_operand 1 "" "")) ++ (pc))) + (set (match_operand:SI 0 "register_operand" "=a") +- (plus (match_dup 0) +- (const_int -1))) ++ (plus (match_dup 0) ++ (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END)] + "TARGET_LOOPS && optimize" + { +@@ -1998,13 +1998,13 @@ + + (define_split + [(set (pc) +- (if_then_else (ne (match_operand:SI 0 "nonimmediate_operand" "") +- (const_int 1)) +- (label_ref (match_operand 1 "" "")) +- (pc))) ++ (if_then_else (ne (match_operand:SI 0 "nonimmediate_operand" "") ++ (const_int 1)) ++ (label_ref (match_operand 1 "" "")) ++ (pc))) + (set (match_operand:SI 2 "nonimmediate_operand" "") +- (plus:SI (match_dup 0) +- (const_int -1))) ++ (plus:SI (match_dup 0) ++ (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END) + (clobber (match_scratch 3))] + "TARGET_LOOPS && optimize && reload_completed" +@@ -2020,7 +2020,7 @@ + emit_move_insn (operands[0], operands[3]); + test = gen_rtx_NE (VOIDmode, operands[3], const0_rtx); + emit_jump_insn (gen_cbranchsi4 (test, operands[3], +- const0_rtx, operands[1])); ++ const0_rtx, operands[1])); + } + else + { +@@ -2034,15 +2034,15 @@ + ; operand 1 is the label to jump to at the top of the loop + (define_expand "doloop_end" + [(parallel [(set (pc) (if_then_else +- (ne (match_operand:SI 0 "" "") +- (const_int 1)) +- (label_ref (match_operand 1 "" "")) +- (pc))) +- (set (match_dup 0) +- (plus:SI (match_dup 0) +- (const_int -1))) +- (unspec [(const_int 0)] UNSPEC_LSETUP_END) +- (clobber (match_dup 2))])] ; match_scratch ++ (ne (match_operand:SI 0 "" "") ++ (const_int 1)) ++ (label_ref (match_operand 1 "" "")) ++ (pc))) ++ (set (match_dup 0) ++ (plus:SI (match_dup 0) ++ (const_int -1))) ++ (unspec [(const_int 0)] UNSPEC_LSETUP_END) ++ (clobber (match_dup 2))])] ; match_scratch + "TARGET_LOOPS && optimize" + { + /* The loop optimizer doesn't check the predicates... */ +@@ -2281,8 +2281,8 @@ + + (define_insn "call_value_internal" + [(set (match_operand 0 "register_operand" "=a") +- (call (mem (match_operand:SI 1 "call_insn_operand" "nir")) +- (match_operand 2 "" "i")))] ++ (call (mem (match_operand:SI 1 "call_insn_operand" "nir")) ++ (match_operand 2 "" "i")))] + "!SIBLING_CALL_P (insn)" + { + return xtensa_emit_call (1, operands); +@@ -2387,9 +2387,9 @@ + + (define_expand "allocate_stack" + [(set (match_operand 0 "nonimmed_operand") +- (minus (reg A1_REG) (match_operand 1 "add_operand"))) ++ (minus (reg A1_REG) (match_operand 1 "add_operand"))) + (set (reg A1_REG) +- (minus (reg A1_REG) (match_dup 1)))] ++ (minus (reg A1_REG) (match_dup 1)))] + "TARGET_WINDOWED_ABI" + { + if (CONST_INT_P (operands[1])) +@@ -2514,7 +2514,7 @@ + + (define_expand "frame_blockage" + [(set (match_dup 0) +- (unspec:BLK [(match_dup 1)] UNSPEC_FRAME_BLOCKAGE))] ++ (unspec:BLK [(match_dup 1)] UNSPEC_FRAME_BLOCKAGE))] + "" + { + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); +@@ -2524,7 +2524,7 @@ + + (define_insn "*frame_blockage" + [(set (match_operand:BLK 0 "" "") +- (unspec:BLK [(match_operand:SI 1 "" "")] UNSPEC_FRAME_BLOCKAGE))] ++ (unspec:BLK [(match_operand:SI 1 "" "")] UNSPEC_FRAME_BLOCKAGE))] + "" + "" + [(set_attr "type" "nop") +@@ -2801,7 +2801,7 @@ + (define_expand "sync_new_" + [(set (match_operand:HQI 0 "register_operand") + (ATOMIC:HQI (match_operand:HQI 1 "memory_operand") +- (match_operand:HQI 2 "register_operand"))) ++ (match_operand:HQI 2 "register_operand"))) + (set (match_dup 1) (ATOMIC:HQI (match_dup 1) (match_dup 2)))] + "TARGET_S32C1I" + { +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0059-Clean-up-xtensa_expand_prologue.patch b/patches/gcc10.2/gcc-xtensa-0059-Clean-up-xtensa_expand_prologue.patch new file mode 100644 index 0000000..8809c85 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0059-Clean-up-xtensa_expand_prologue.patch @@ -0,0 +1,42 @@ +From b1f4a90f366a3a5775f30507e2b7800ad366dcdc Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 27 Dec 2022 15:30:12 +0900 +Subject: [PATCH] xtensa: Clean up xtensa_expand_prologue + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_expand_prologue): Modify to + exit the inspection loops as soon as the necessity of stack + pointer is found. +--- + gcc/config/xtensa/xtensa.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index d3dafa4aa..d4713cd8d 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -3362,12 +3362,18 @@ xtensa_expand_prologue (void) + ref; ref = DF_REF_NEXT_REG (ref)) + if (DF_REF_CLASS (ref) == DF_REF_REGULAR + && NONJUMP_INSN_P (DF_REF_INSN (ref))) +- stack_pointer_needed = true; ++ { ++ stack_pointer_needed = true; ++ break; ++ } + /* Check if callee-saved registers really need saving to the stack. */ + if (!stack_pointer_needed) + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) + if (xtensa_call_save_reg (regno)) +- stack_pointer_needed = true; ++ { ++ stack_pointer_needed = true; ++ break; ++ } + + cfun->machine->inhibit_logues_a1_adjusts = !stack_pointer_needed; + +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0060-Change-GP_RETURN-_REG_COUNT-to-GP_RETURN_-FIR.patch b/patches/gcc10.2/gcc-xtensa-0060-Change-GP_RETURN-_REG_COUNT-to-GP_RETURN_-FIR.patch new file mode 100644 index 0000000..789c1a3 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0060-Change-GP_RETURN-_REG_COUNT-to-GP_RETURN_-FIR.patch @@ -0,0 +1,71 @@ +From e3b1e99a383cbceb2c910a3a88392f37e58daeb2 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 27 Dec 2022 15:30:12 +0900 +Subject: [PATCH] xtensa: Change GP_RETURN{,_REG_COUNT} to + GP_RETURN_{FIRST,LAST} + +gcc/ChangeLog: + + * config/xtensa/xtensa.h (GP_RETURN, GP_RETURN_REG_COUNT): + Change to GP_RETURN_FIRST and GP_RETURN_LAST, respectively. + * config/xtensa/xtensa.c (xtensa_function_value, + xtensa_libcall_value, xtensa_function_value_regno_p): Ditto. +--- + gcc/config/xtensa/xtensa.c | 10 +++++----- + gcc/config/xtensa/xtensa.h | 4 ++-- + 2 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index d4713cd8d..054a44ea3 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4561,9 +4561,9 @@ xtensa_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED, + bool outgoing) + { + return gen_rtx_REG ((INTEGRAL_TYPE_P (valtype) +- && TYPE_PRECISION (valtype) < BITS_PER_WORD) +- ? SImode : TYPE_MODE (valtype), +- outgoing ? GP_OUTGOING_RETURN : GP_RETURN); ++ && TYPE_PRECISION (valtype) < BITS_PER_WORD) ++ ? SImode : TYPE_MODE (valtype), ++ outgoing ? GP_OUTGOING_RETURN : GP_RETURN_FIRST); + } + + /* Worker function for TARGET_LIBCALL_VALUE. */ +@@ -4573,7 +4573,7 @@ xtensa_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) + { + return gen_rtx_REG ((GET_MODE_CLASS (mode) == MODE_INT + && GET_MODE_SIZE (mode) < UNITS_PER_WORD) +- ? SImode : mode, GP_RETURN); ++ ? SImode : mode, GP_RETURN_FIRST); + } + + /* Worker function TARGET_FUNCTION_VALUE_REGNO_P. */ +@@ -4581,7 +4581,7 @@ xtensa_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) + static bool + xtensa_function_value_regno_p (const unsigned int regno) + { +- return (regno >= GP_RETURN && regno < GP_RETURN + GP_RETURN_REG_COUNT); ++ return IN_RANGE (regno, GP_RETURN_FIRST, GP_RETURN_LAST); + } + + /* The static chain is passed in memory. Provide rtx giving 'mem' +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index b5fec1cb3..e3f808c42 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -475,9 +475,9 @@ enum reg_class + + /* Symbolic macros for the registers used to return integer, floating + point, and values of coprocessor and user-defined modes. */ +-#define GP_RETURN (GP_REG_FIRST + 2 + WINDOW_SIZE) ++#define GP_RETURN_FIRST (GP_REG_FIRST + 2 + WINDOW_SIZE) ++#define GP_RETURN_LAST (GP_RETURN_FIRST + 3) + #define GP_OUTGOING_RETURN (GP_REG_FIRST + 2) +-#define GP_RETURN_REG_COUNT 4 + + /* Symbolic macros for the first/last argument registers. */ + #define GP_ARG_FIRST (GP_REG_FIRST + 2) +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0061-Generate-density-instructions-in-set_frame_pt.patch b/patches/gcc10.2/gcc-xtensa-0061-Generate-density-instructions-in-set_frame_pt.patch new file mode 100644 index 0000000..5b71081 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0061-Generate-density-instructions-in-set_frame_pt.patch @@ -0,0 +1,38 @@ +From 46b7c587fe47fa73811d7cd9b453ee32f7ba8ad8 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 27 Dec 2022 15:30:12 +0900 +Subject: [PATCH] xtensa: Generate density instructions in set_frame_ptr + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (set_frame_ptr): Fix to reflect + TARGET_DENSITY. +--- + gcc/config/xtensa/xtensa.md | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 08fb6f312..06fda8aa5 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2567,12 +2567,15 @@ + "" + { + if (frame_pointer_needed) +- return "mov\ta7, sp"; ++ return (TARGET_DENSITY ? "mov.n\ta7, sp" : "mov\ta7, sp"); + return ""; + } + [(set_attr "type" "move") + (set_attr "mode" "SI") +- (set_attr "length" "3")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 2) ++ (const_int 3)))]) + + ;; Post-reload splitter to remove fp assignment when it's not needed. + (define_split +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0062-use-define_c_enums-instead-of-define_constant.patch b/patches/gcc10.2/gcc-xtensa-0062-use-define_c_enums-instead-of-define_constant.patch new file mode 100644 index 0000000..57976f3 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0062-use-define_c_enums-instead-of-define_constant.patch @@ -0,0 +1,77 @@ +From 101c49b504fb567227291a381ada09273d8ec4a7 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Fri, 23 Dec 2022 12:17:09 -0800 +Subject: [PATCH] gcc: xtensa: use define_c_enums instead of + define_constants + +This improves RTL dumps readability. No functional changes. + +gcc/ + * config/xtensa/xtensa.md (unspec): Extract UNSPEC_* constants + into this enum. + (unspecv): Extract UNSPECV_* constants into this enum. +--- + gcc/config/xtensa/xtensa.md | 46 ++++++++++++++++++++----------------- + 1 file changed, 25 insertions(+), 21 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 06fda8aa5..a2cfb3df7 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -25,28 +25,32 @@ + (A7_REG 7) + (A8_REG 8) + (A9_REG 9) ++]) ++ ++(define_c_enum "unspec" [ ++ UNSPEC_NOP ++ UNSPEC_PLT ++ UNSPEC_RET_ADDR ++ UNSPEC_TPOFF ++ UNSPEC_DTPOFF ++ UNSPEC_TLS_FUNC ++ UNSPEC_TLS_ARG ++ UNSPEC_TLS_CALL ++ UNSPEC_TP ++ UNSPEC_MEMW ++ UNSPEC_LSETUP_START ++ UNSPEC_LSETUP_END ++ UNSPEC_FRAME_BLOCKAGE ++]) + +- (UNSPEC_NOP 2) +- (UNSPEC_PLT 3) +- (UNSPEC_RET_ADDR 4) +- (UNSPEC_TPOFF 5) +- (UNSPEC_DTPOFF 6) +- (UNSPEC_TLS_FUNC 7) +- (UNSPEC_TLS_ARG 8) +- (UNSPEC_TLS_CALL 9) +- (UNSPEC_TP 10) +- (UNSPEC_MEMW 11) +- (UNSPEC_LSETUP_START 12) +- (UNSPEC_LSETUP_END 13) +- (UNSPEC_FRAME_BLOCKAGE 14) +- +- (UNSPECV_SET_FP 1) +- (UNSPECV_ENTRY 2) +- (UNSPECV_S32RI 4) +- (UNSPECV_S32C1I 5) +- (UNSPECV_EH_RETURN 6) +- (UNSPECV_SET_TP 7) +- (UNSPECV_BLOCKAGE 8) ++(define_c_enum "unspecv" [ ++ UNSPECV_SET_FP ++ UNSPECV_ENTRY ++ UNSPECV_S32RI ++ UNSPECV_S32C1I ++ UNSPECV_EH_RETURN ++ UNSPECV_SET_TP ++ UNSPECV_BLOCKAGE + ]) + + ;; This code iterator allows signed and unsigned widening multiplications +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0063-Check-DF-availability-before-use.patch b/patches/gcc10.2/gcc-xtensa-0063-Check-DF-availability-before-use.patch new file mode 100644 index 0000000..afa8d82 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0063-Check-DF-availability-before-use.patch @@ -0,0 +1,31 @@ +From 4b938a83c19e3e7dc71b407e2f78f2ccbc57b742 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 29 Dec 2022 21:14:33 +0900 +Subject: [PATCH] xtensa: Check DF availability before use + +Perhaps no problem, but for safety. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_expand_prologue): Fix to check + DF availability before use of DF_* macros. +--- + gcc/config/xtensa/xtensa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 054a44ea3..8f748efa4 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -3357,7 +3357,7 @@ xtensa_expand_prologue (void) + || crtl->calls_eh_return; + + /* Check if the function body really needs the stack pointer. */ +- if (!stack_pointer_needed) ++ if (!stack_pointer_needed && df) + for (ref = DF_REG_USE_CHAIN (A1_REG); + ref; ref = DF_REF_NEXT_REG (ref)) + if (DF_REF_CLASS (ref) == DF_REF_REGULAR +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0064-use-GP_RETURN_-instead-of-magic-constant.patch b/patches/gcc10.2/gcc-xtensa-0064-use-GP_RETURN_-instead-of-magic-constant.patch new file mode 100644 index 0000000..93f3399 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0064-use-GP_RETURN_-instead-of-magic-constant.patch @@ -0,0 +1,28 @@ +From 4433ebb185be476704937b3a8b3fe5e568c22712 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Wed, 28 Dec 2022 11:27:21 -0800 +Subject: [PATCH] gcc: xtensa: use GP_RETURN_* instead of magic constant + +gcc/ + * config/xtensa/xtensa.c (xtensa_return_in_memory): Use + GP_RETURN_* instead of magic constant. +--- + gcc/config/xtensa/xtensa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 8f748efa4..ffd36217d 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4551,7 +4551,7 @@ static bool + xtensa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) + { + return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type) +- > 4 * UNITS_PER_WORD); ++ > (unsigned) (GP_RETURN_LAST - GP_RETURN_FIRST + 1) * UNITS_PER_WORD); + } + + /* Worker function for TARGET_FUNCTION_VALUE. */ +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0065-Optimize-stack-frame-adjustment-more.patch b/patches/gcc10.2/gcc-xtensa-0065-Optimize-stack-frame-adjustment-more.patch new file mode 100644 index 0000000..2964a2b --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0065-Optimize-stack-frame-adjustment-more.patch @@ -0,0 +1,245 @@ +From 385f165d92e68a58206005e9652e8e880a2ed2d7 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 28 Dec 2022 22:50:52 +0900 +Subject: [PATCH] xtensa: Optimize stack frame adjustment more + +This patch introduces a convenient helper function for integer immediate +addition with scratch register as needed, that splits and emits either +up to two ADDI/ADDMI machine instructions or an addition by register +following an integer immediate load (which may later be transformed by +constantsynth). + +By using the helper function, it makes stack frame adjustment logic +simplified and instruction count less in some cases. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c + (xtensa_split_imm_two_addends, xtensa_emit_add_imm): + New helper functions. + (xtensa_set_return_address, xtensa_output_mi_thunk): + Change to use the helper function. + (xtensa_emit_adjust_stack_ptr): Ditto. + And also change to try reusing the content of scratch register + A9 if the register is not modified in the function body. +--- + gcc/config/xtensa/xtensa.c | 151 ++++++++++++++++++++++++++----------- + 1 file changed, 106 insertions(+), 45 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index ffd36217d..b05ae9045 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -104,6 +104,7 @@ struct GTY(()) machine_function + bool frame_laid_out; + bool epilogue_done; + bool inhibit_logues_a1_adjusts; ++ rtx last_logues_a9_content; + }; + + /* Vector, indexed by hard register number, which contains 1 for a +@@ -2509,6 +2510,86 @@ xtensa_split_DI_reg_imm (rtx *operands) + } + + ++/* Try to split an integer value into what are suitable for two consecutive ++ immediate addition instructions, ADDI or ADDMI. */ ++ ++static bool ++xtensa_split_imm_two_addends (HOST_WIDE_INT imm, HOST_WIDE_INT v[2]) ++{ ++ HOST_WIDE_INT v0, v1; ++ ++ if (imm < -32768) ++ v0 = -32768, v1 = imm + 32768; ++ else if (imm > 32512) ++ v0 = 32512, v1 = imm - 32512; ++ else if (TARGET_DENSITY && xtensa_simm12b (imm)) ++ /* A pair of MOVI(.N) and ADD.N is one or two bytes less than two ++ immediate additions if TARGET_DENSITY. */ ++ return false; ++ else ++ v0 = (imm + 128) & ~255L, v1 = imm - v0; ++ ++ if (xtensa_simm8 (v1) || xtensa_simm8x256 (v1)) ++ { ++ v[0] = v0, v[1] = v1; ++ return true; ++ } ++ ++ return false; ++} ++ ++ ++/* Helper function for integer immediate addition with scratch register ++ as needed, that splits and emits either up to two ADDI/ADDMI machine ++ instructions or an addition by register following an integer immediate ++ load (which may later be transformed by constantsynth). ++ ++ If 'scratch' is NULL_RTX but still needed, a new pseudo-register will ++ be allocated. Thus, after the reload/LRA pass, the specified scratch ++ register must be a hard one. */ ++ ++static bool ++xtensa_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch, ++ bool need_note) ++{ ++ bool retval = false; ++ HOST_WIDE_INT v[2]; ++ rtx_insn *insn; ++ ++ if (imm == 0) ++ return false; ++ ++ if (xtensa_simm8 (imm) || xtensa_simm8x256 (imm)) ++ insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm))); ++ else if (xtensa_split_imm_two_addends (imm, v)) ++ { ++ if (!scratch) ++ scratch = gen_reg_rtx (SImode); ++ emit_insn (gen_addsi3 (scratch, src, GEN_INT (v[0]))); ++ insn = emit_insn (gen_addsi3 (dst, scratch, GEN_INT (v[1]))); ++ } ++ else ++ { ++ if (scratch) ++ emit_move_insn (scratch, GEN_INT (imm)); ++ else ++ scratch = force_reg (SImode, GEN_INT (imm)); ++ retval = true; ++ insn = emit_insn (gen_addsi3 (dst, src, scratch)); ++ } ++ ++ if (need_note) ++ { ++ rtx note_rtx = gen_rtx_SET (dst, plus_constant (Pmode, src, imm)); ++ ++ RTX_FRAME_RELATED_P (insn) = 1; ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); ++ } ++ ++ return retval; ++} ++ ++ + /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ + + static bool +@@ -3280,41 +3361,33 @@ xtensa_initial_elimination_offset (int from, int to ATTRIBUTE_UNUSED) + static void + xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int flags) + { ++ rtx src, scratch; + rtx_insn *insn; +- rtx ptr = (flags & ADJUST_SP_FRAME_PTR) ? hard_frame_pointer_rtx +- : stack_pointer_rtx; + + if (cfun->machine->inhibit_logues_a1_adjusts) + return; + +- if (xtensa_simm8 (offset) +- || xtensa_simm8x256 (offset)) +- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, GEN_INT (offset))); +- else +- { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); ++ src = (flags & ADJUST_SP_FRAME_PTR) ++ ? hard_frame_pointer_rtx : stack_pointer_rtx; ++ scratch = gen_rtx_REG (Pmode, A9_REG); + +- if (offset < 0) +- { +- emit_move_insn (tmp_reg, GEN_INT (-offset)); +- insn = emit_insn (gen_subsi3 (stack_pointer_rtx, ptr, tmp_reg)); +- } +- else +- { +- emit_move_insn (tmp_reg, GEN_INT (offset)); +- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, tmp_reg)); +- } +- } +- +- if (flags & ADJUST_SP_NEED_NOTE) ++ if (df && DF_REG_DEF_COUNT (A9_REG) == 0 ++ && cfun->machine->last_logues_a9_content ++ && -INTVAL (cfun->machine->last_logues_a9_content) == offset) + { +- rtx note_rtx = gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- offset)); ++ insn = emit_insn (gen_subsi3 (stack_pointer_rtx, src, scratch)); ++ if (flags & ADJUST_SP_NEED_NOTE) ++ { ++ rtx note_rtx = gen_rtx_SET (stack_pointer_rtx, ++ plus_constant (Pmode, src, offset)); + +- RTX_FRAME_RELATED_P (insn) = 1; +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); ++ } + } ++ else if (xtensa_emit_add_imm (stack_pointer_rtx, src, offset, scratch, ++ (flags & ADJUST_SP_NEED_NOTE))) ++ cfun->machine->last_logues_a9_content = GEN_INT (offset); + } + + /* minimum frame = reg save area (4 words) plus static chain (1 word) +@@ -3342,8 +3415,9 @@ xtensa_expand_prologue (void) + /* Use a8 as a temporary since a0-a7 may be live. */ + rtx tmp_reg = gen_rtx_REG (Pmode, A8_REG); + emit_insn (gen_entry (GEN_INT (MIN_FRAME_SIZE))); +- emit_move_insn (tmp_reg, GEN_INT (total_size - MIN_FRAME_SIZE)); +- emit_insn (gen_subsi3 (tmp_reg, stack_pointer_rtx, tmp_reg)); ++ xtensa_emit_add_imm (tmp_reg, stack_pointer_rtx, ++ MIN_FRAME_SIZE - total_size, ++ tmp_reg, false); + insn = emit_insn (gen_movsi (stack_pointer_rtx, tmp_reg)); + } + } +@@ -3575,8 +3649,8 @@ xtensa_set_return_address (rtx address, rtx scratch) + + if (total_size > 1024) + { +- emit_move_insn (scratch, GEN_INT (total_size - UNITS_PER_WORD)); +- emit_insn (gen_addsi3 (scratch, frame, scratch)); ++ xtensa_emit_add_imm (scratch, frame, total_size - UNITS_PER_WORD, ++ scratch, false); + a0_addr = scratch; + } + +@@ -5125,15 +5199,7 @@ xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + this_rtx = gen_rtx_REG (Pmode, A0_REG + this_reg_no); + + if (delta) +- { +- if (xtensa_simm8 (delta)) +- emit_insn (gen_addsi3 (this_rtx, this_rtx, GEN_INT (delta))); +- else +- { +- emit_move_insn (temp0, GEN_INT (delta)); +- emit_insn (gen_addsi3 (this_rtx, this_rtx, temp0)); +- } +- } ++ xtensa_emit_add_imm (this_rtx, this_rtx, delta, temp0, false); + + if (vcall_offset) + { +@@ -5143,13 +5209,8 @@ xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + emit_move_insn (temp0, gen_rtx_MEM (Pmode, this_rtx)); + if (xtensa_uimm8x4 (vcall_offset)) + addr = plus_constant (Pmode, temp0, vcall_offset); +- else if (xtensa_simm8 (vcall_offset)) +- emit_insn (gen_addsi3 (temp1, temp0, GEN_INT (vcall_offset))); + else +- { +- emit_move_insn (temp1, GEN_INT (vcall_offset)); +- emit_insn (gen_addsi3 (temp1, temp0, temp1)); +- } ++ xtensa_emit_add_imm (temp1, temp0, vcall_offset, temp1, false); + emit_move_insn (temp1, gen_rtx_MEM (Pmode, addr)); + emit_insn (gen_add2_insn (this_rtx, temp1)); + } +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0066-Optimize-bitwise-splicing-operation.patch b/patches/gcc10.2/gcc-xtensa-0066-Optimize-bitwise-splicing-operation.patch new file mode 100644 index 0000000..193de88 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0066-Optimize-bitwise-splicing-operation.patch @@ -0,0 +1,84 @@ +From fef84d1ba0cb5956687f776b22f51d9fa5e7d176 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 6 Jan 2023 14:08:06 +0900 +Subject: [PATCH] xtensa: Optimize bitwise splicing operation + +This patch optimizes the operation of cutting and splicing two register +values at a specified bit position, in other words, combining (bitwise +ORing) bits 0 through (C-1) of the register with bits C through 31 +of the other, where C is the specified immediate integer 17 through 31. + +This typically applies to signed copy of floating point number and +__builtin_return_address() if the windowed register ABI, and saves one +instruction compared to four shifts and a bitwise OR by the default RTL +combination pass. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*splice_bits): + New insn_and_split pattern. +--- + gcc/config/xtensa/xtensa.md | 47 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 47 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index a2cfb3df7..ba1c044c4 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -746,6 +746,53 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + ++(define_insn_and_split "*splice_bits" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 3 "const_int_operand" "i")) ++ (and:SI (match_operand:SI 2 "register_operand" "r") ++ (match_operand:SI 4 "const_int_operand" "i"))))] ++ ++ "!optimize_debug && optimize ++ && INTVAL (operands[3]) + INTVAL (operands[4]) == -1 ++ && (exact_log2 (INTVAL (operands[3]) + 1) > 16 ++ || exact_log2 (INTVAL (operands[4]) + 1) > 16)" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 5) ++ (ashift:SI (match_dup 1) ++ (match_dup 4))) ++ (set (match_dup 6) ++ (lshiftrt:SI (match_dup 2) ++ (match_dup 3))) ++ (set (match_dup 0) ++ (ior:SI (lshiftrt:SI (match_dup 5) ++ (match_dup 4)) ++ (ashift:SI (match_dup 6) ++ (match_dup 3))))] ++{ ++ int shift; ++ if (INTVAL (operands[3]) < 0) ++ { ++ rtx x; ++ x = operands[1], operands[1] = operands[2], operands[2] = x; ++ x = operands[3], operands[3] = operands[4], operands[4] = x; ++ } ++ shift = floor_log2 (INTVAL (operands[3]) + 1); ++ operands[3] = GEN_INT (shift); ++ operands[4] = GEN_INT (32 - shift); ++ operands[5] = gen_reg_rtx (SImode); ++ operands[6] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && (INTVAL (operands[3]) == 0x7FFFFFFF ++ || INTVAL (operands[4]) == 0x7FFFFFFF)") ++ (const_int 11) ++ (const_int 12)))]) ++ + + ;; Zero-extend instructions. + +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0067-Make-instruction-cost-estimation-for-size-mor.patch b/patches/gcc10.2/gcc-xtensa-0067-Make-instruction-cost-estimation-for-size-mor.patch new file mode 100644 index 0000000..a146e25 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0067-Make-instruction-cost-estimation-for-size-mor.patch @@ -0,0 +1,85 @@ +From 32f3873104faa4323d7db85262145b7895824e4a Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 10 Jan 2023 01:44:09 +0900 +Subject: [PATCH] xtensa: Make instruction cost estimation for size more + accurate + +Until now, we applied COSTS_N_INSNS() (multiplying by 4) after dividing +the instruction length by 3, so we couldn't express the difference less +than modulo 3 in insn cost for size (e.g. 11 Bytes and 12 bytes cost the +same). + +This patch fixes that. + +;; 2 bytes +addi.n a2, a2, -1 ; cost 3 + +;; 3 bytes +addmi a2, a2, 1024 ; cost 4 + +;; 4 bytes +movi.n a3, 80 ; cost 5 +bnez.n a2, a3, .L4 + +;; 5 bytes +srli a2, a3, 1 ; cost 7 +add.n a2, a2, a2 + +;; 6 bytes +ssai 8 ; cost 8 +src a4, a2, a3 + +:: 3 + 4 bytes +l32r a2, .L5 ; cost 9 + +;; 11 bytes ; cost 15 +;; 12 bytes ; cost 16 + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_insn_cost): + Let insn cost for size be obtained by applying COSTS_N_INSNS() + to instruction length and then dividing by 3. +--- + gcc/config/xtensa/xtensa.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index b05ae9045..e0adf069e 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4554,13 +4554,15 @@ xtensa_insn_cost (rtx_insn *insn, bool speed) + { + if (!(recog_memoized (insn) < 0)) + { +- int len = get_attr_length (insn), n = (len + 2) / 3; ++ int len = get_attr_length (insn); + + if (len == 0) + return COSTS_N_INSNS (0); + + if (speed) /* For speed cost. */ + { ++ int n = (len + 2) / 3; ++ + /* "L32R" may be particular slow (implementation-dependent). */ + if (xtensa_is_insn_L32R_p (insn)) + return COSTS_N_INSNS (1 + xtensa_extra_l32r_costs); +@@ -4607,10 +4609,11 @@ xtensa_insn_cost (rtx_insn *insn, bool speed) + { + /* "L32R" itself plus constant in litpool. */ + if (xtensa_is_insn_L32R_p (insn)) +- return COSTS_N_INSNS (2) + 1; ++ len = 3 + 4; + +- /* Consider ".n" short instructions. */ +- return COSTS_N_INSNS (n) - (n * 3 - len); ++ /* Consider fractional instruction length (for example, ".n" ++ short instructions or "L32R" litpool constants. */ ++ return (COSTS_N_INSNS (len) + 1) / 3; + } + } + } +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0068-Tune-btrue-insn-pattern.patch b/patches/gcc10.2/gcc-xtensa-0068-Tune-btrue-insn-pattern.patch new file mode 100644 index 0000000..40caadd --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0068-Tune-btrue-insn-pattern.patch @@ -0,0 +1,55 @@ +From 5fe437012eb770e8fc2d2d9f859110e5cc707fc5 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 11 Jan 2023 19:26:03 +0900 +Subject: [PATCH] xtensa: Tune "*btrue" insn pattern + +This branch instruction has short encoding if EQ/NE comparison against +immediate zero when the Code Density Option is enabled, but its "length" +attribute was only for normal encoding. This patch fixes it. + +This patch also prevents undesireable replacement the comparison immediate +zero of the instruction (short encoding, as mentioned above) with a +register that has value of zero (normal encoding) by the postreload pass. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*btrue): + Correct value of the attribute "length" that depends on + TARGET_DENSITY and operands, and add '?' character to the register + constraint of the compared operand. +--- + gcc/config/xtensa/xtensa.md | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index ba1c044c4..4b0b74368 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1684,7 +1684,7 @@ + [(set (pc) + (if_then_else (match_operator 3 "branch_operator" + [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "branch_operand" "K,r")]) ++ (match_operand:SI 1 "branch_operand" "K,?r")]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" +@@ -1693,7 +1693,14 @@ + } + [(set_attr "type" "jump,jump") + (set_attr "mode" "none") +- (set_attr "length" "3,3")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && CONST_INT_P (operands[1]) ++ && INTVAL (operands[1]) == 0 ++ && (GET_CODE (operands[3]) == EQ ++ || GET_CODE (operands[3]) == NE)") ++ (const_int 2) ++ (const_int 3)))]) + + (define_insn "*ubtrue" + [(set (pc) +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0069-Optimize-ctzsi2-and-ffssi2-a-bit.patch b/patches/gcc10.2/gcc-xtensa-0069-Optimize-ctzsi2-and-ffssi2-a-bit.patch new file mode 100644 index 0000000..017244c --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0069-Optimize-ctzsi2-and-ffssi2-a-bit.patch @@ -0,0 +1,44 @@ +From c7111ec38b7a1825f759804b021afc7b7b5b7491 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 12 Jan 2023 10:01:01 +0900 +Subject: [PATCH] xtensa: Optimize ctzsi2 and ffssi2 a bit + +This patch saves one byte when the Code Density Option is enabled, + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (ctzsi2, ffssi2): + Rearrange the emitting codes. +--- + gcc/config/xtensa/xtensa.md | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 4b0b74368..4f1e8fd13 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -477,8 +477,8 @@ + emit_insn (gen_negsi2 (temp, operands[1])); + emit_insn (gen_andsi3 (temp, temp, operands[1])); + emit_insn (gen_clzsi2 (temp, temp)); +- emit_insn (gen_negsi2 (temp, temp)); +- emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (31))); ++ emit_move_insn (operands[0], GEN_INT (31)); ++ emit_insn (gen_subsi3 (operands[0], operands[0], temp)); + DONE; + }) + +@@ -491,8 +491,8 @@ + emit_insn (gen_negsi2 (temp, operands[1])); + emit_insn (gen_andsi3 (temp, temp, operands[1])); + emit_insn (gen_clzsi2 (temp, temp)); +- emit_insn (gen_negsi2 (temp, temp)); +- emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (32))); ++ emit_move_insn (operands[0], GEN_INT (32)); ++ emit_insn (gen_subsi3 (operands[0], operands[0], temp)); + DONE; + }) + +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0070-Remove-old-broken-tweak-for-leaf-function.patch b/patches/gcc10.2/gcc-xtensa-0070-Remove-old-broken-tweak-for-leaf-function.patch new file mode 100644 index 0000000..e182227 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0070-Remove-old-broken-tweak-for-leaf-function.patch @@ -0,0 +1,218 @@ +From 70feb8960c923e914f1e0bf8e7eae96300c708a2 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sat, 14 Jan 2023 04:31:46 +0900 +Subject: [PATCH] xtensa: Remove old broken tweak for leaf function + +In the before-IRA era, ORDER_REGS_FOR_LOCAL_ALLOC was called for each +function in Xtensa, and there was register allocation table reordering +for leaf functions to compensate for the poor performance of local-alloc. + +Today the adjustment hook is still called via its alternative +ADJUST_REG_ALLOC_ORDER, but it is only called once at the start of the IRA, +and leaf_function_p() erroneously returns true and also gives no argument +count. + +That straightforwardly misleads register allocation that all functions are +always leaves with no arguments, which leads to inefficiencies in allocation +results. + +Fortunately, IRA is smart enough than local-alloc to not need such assistance. + +This patch does away with the antiquated by removing the wreckage that no +longer works. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h (order_regs_for_local_alloc): + Rename to xtensa_adjust_reg_alloc_order. + * config/xtensa/xtensa.c (xtensa_adjust_reg_alloc_order): + Ditto. And also remove code to reorder register numbers for + leaf functions, rename the tables, and adjust the allocation + order for the call0 ABI to use register A0 more. + (xtensa_leaf_regs): Remove. + * config/xtensa/xtensa.h (REG_ALLOC_ORDER): Cosmetics. + (order_regs_for_local_alloc): Rename as the above. + (LEAF_REGISTERS, LEAF_REG_REMAP, leaf_function): Remove. +--- + gcc/config/xtensa/xtensa-protos.h | 2 +- + gcc/config/xtensa/xtensa.c | 77 +++++++------------------------ + gcc/config/xtensa/xtensa.h | 51 ++++++-------------- + 3 files changed, 31 insertions(+), 99 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 63b147a90..39d5a5825 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -78,7 +78,7 @@ extern long compute_frame_size (poly_int64); + extern bool xtensa_use_return_instruction_p (void); + extern void xtensa_expand_prologue (void); + extern void xtensa_expand_epilogue (bool); +-extern void order_regs_for_local_alloc (void); ++extern void xtensa_adjust_reg_alloc_order (void); + extern enum reg_class xtensa_regno_to_class (int regno); + extern HOST_WIDE_INT xtensa_initial_elimination_offset (int from, int to); + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index e0adf069e..db7ac3599 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -107,18 +107,6 @@ struct GTY(()) machine_function + rtx last_logues_a9_content; + }; + +-/* Vector, indexed by hard register number, which contains 1 for a +- register that is allowable in a candidate for leaf function +- treatment. */ +- +-const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] = +-{ +- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +- 1, 1, 1, +- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +- 1 +-}; +- + static void xtensa_option_override (void); + static enum internal_test map_test_to_internal_test (enum rtx_code); + static rtx gen_int_relational (enum rtx_code, rtx, rtx); +@@ -4175,58 +4163,25 @@ xtensa_secondary_reload (bool in_p, rtx x, reg_class_t rclass, + return NO_REGS; + } + ++/* Called once at the start of IRA, by ADJUST_REG_ALLOC_ORDER. */ + + void +-order_regs_for_local_alloc (void) ++xtensa_adjust_reg_alloc_order (void) + { +- if (!leaf_function_p ()) +- { +- static const int reg_nonleaf_alloc_order[FIRST_PSEUDO_REGISTER] = +- REG_ALLOC_ORDER; +- static const int reg_nonleaf_alloc_order_call0[FIRST_PSEUDO_REGISTER] = +- { +- 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 12, 13, 14, 15, +- 18, +- 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, +- 0, 1, 16, 17, +- 35, +- }; +- +- memcpy (reg_alloc_order, TARGET_WINDOWED_ABI ? +- reg_nonleaf_alloc_order : reg_nonleaf_alloc_order_call0, +- FIRST_PSEUDO_REGISTER * sizeof (int)); +- } +- else +- { +- int i, num_arg_regs; +- int nxt = 0; +- +- /* Use the AR registers in increasing order (skipping a0 and a1) +- but save the incoming argument registers for a last resort. */ +- num_arg_regs = crtl->args.info.arg_words; +- if (num_arg_regs > MAX_ARGS_IN_REGISTERS) +- num_arg_regs = MAX_ARGS_IN_REGISTERS; +- for (i = GP_ARG_FIRST; i < 16 - num_arg_regs; i++) +- reg_alloc_order[nxt++] = i + num_arg_regs; +- for (i = 0; i < num_arg_regs; i++) +- reg_alloc_order[nxt++] = GP_ARG_FIRST + i; +- +- /* List the coprocessor registers in order. */ +- for (i = 0; i < BR_REG_NUM; i++) +- reg_alloc_order[nxt++] = BR_REG_FIRST + i; +- +- /* List the FP registers in order for now. */ +- for (i = 0; i < 16; i++) +- reg_alloc_order[nxt++] = FP_REG_FIRST + i; +- +- /* GCC requires that we list *all* the registers.... */ +- reg_alloc_order[nxt++] = 0; /* a0 = return address */ +- reg_alloc_order[nxt++] = 1; /* a1 = stack pointer */ +- reg_alloc_order[nxt++] = 16; /* pseudo frame pointer */ +- reg_alloc_order[nxt++] = 17; /* pseudo arg pointer */ +- +- reg_alloc_order[nxt++] = ACC_REG_FIRST; /* MAC16 accumulator */ +- } ++ static const int reg_windowed_alloc_order[FIRST_PSEUDO_REGISTER] = ++ REG_ALLOC_ORDER; ++ static const int reg_call0_alloc_order[FIRST_PSEUDO_REGISTER] = ++ { ++ 9, 10, 11, 7, 6, 5, 4, 3, 2, 8, 0, 12, 13, 14, 15, ++ 18, ++ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, ++ 1, 16, 17, ++ 35, ++ }; ++ ++ memcpy (reg_alloc_order, TARGET_WINDOWED_ABI ? ++ reg_windowed_alloc_order : reg_call0_alloc_order, ++ FIRST_PSEUDO_REGISTER * sizeof (int)); + } + + +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index e3f808c42..ef7f9e5d5 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -249,44 +249,21 @@ along with GCC; see the file COPYING3. If not see + 1, \ + } + +-/* For non-leaf procedures on Xtensa processors, the allocation order +- is as specified below by REG_ALLOC_ORDER. For leaf procedures, we +- want to use the lowest numbered registers first to minimize +- register window overflows. However, local-alloc is not smart +- enough to consider conflicts with incoming arguments. If an +- incoming argument in a2 is live throughout the function and +- local-alloc decides to use a2, then the incoming argument must +- either be spilled or copied to another register. To get around +- this, we define ADJUST_REG_ALLOC_ORDER to redefine +- reg_alloc_order for leaf functions such that lowest numbered +- registers are used first with the exception that the incoming +- argument registers are not used until after other register choices +- have been exhausted. */ +- +-#define REG_ALLOC_ORDER \ +-{ 8, 9, 10, 11, 12, 13, 14, 15, 7, 6, 5, 4, 3, 2, \ +- 18, \ +- 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, \ +- 0, 1, 16, 17, \ +- 35, \ +-} +- +-#define ADJUST_REG_ALLOC_ORDER order_regs_for_local_alloc () +- +-/* For Xtensa, the only point of this is to prevent GCC from otherwise +- giving preference to call-used registers. To minimize window +- overflows for the AR registers, we want to give preference to the +- lower-numbered AR registers. For other register files, which are +- not windowed, we still prefer call-used registers, if there are any. */ +-extern const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER]; +-#define LEAF_REGISTERS xtensa_leaf_regs ++/* For the windowed register ABI on Xtensa processors, the allocation ++ order is as specified below by REG_ALLOC_ORDER. ++ For the call0 ABI, on the other hand, ADJUST_REG_ALLOC_ORDER hook ++ will be called once at the start of IRA, replacing it with the ++ appropriate one. */ + +-/* For Xtensa, no remapping is necessary, but this macro must be +- defined if LEAF_REGISTERS is defined. */ +-#define LEAF_REG_REMAP(REGNO) (REGNO) +- +-/* This must be declared if LEAF_REGISTERS is set. */ +-extern int leaf_function; ++#define REG_ALLOC_ORDER \ ++{ \ ++ 8, 9, 10, 11, 12, 13, 14, 15, 7, 6, 5, 4, 3, 2, \ ++ 18, \ ++ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, \ ++ 0, 1, 16, 17, \ ++ 35, \ ++} ++#define ADJUST_REG_ALLOC_ORDER xtensa_adjust_reg_alloc_order () + + /* Internal macros to classify a register number. */ + +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0071-Optimize-inversion-of-the-MSB.patch b/patches/gcc10.2/gcc-xtensa-0071-Optimize-inversion-of-the-MSB.patch new file mode 100644 index 0000000..323b830 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0071-Optimize-inversion-of-the-MSB.patch @@ -0,0 +1,59 @@ +From 97538d16c11c17764aab63695ce3b5275fd50d56 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 18 Jan 2023 09:53:38 +0900 +Subject: [PATCH] xtensa: Optimize inversion of the MSB + +Such operation can be done either bitwise-XOR or addition with -2147483648, +but the latter is one byte less if TARGET_DENSITY. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (xorsi3_internal): + Rename from the original of "xorsi3". + (xorsi3): New expansion pattern that emits addition rather than + bitwise-XOR when the second source is a constant of -2147483648 + if TARGET_DENSITY. +--- + gcc/config/xtensa/xtensa.md | 26 +++++++++++++++++++++++++- + 1 file changed, 25 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 4f1e8fd13..c6a299cc1 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -736,7 +736,31 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +-(define_insn "xorsi3" ++(define_expand "xorsi3" ++ [(set (match_operand:SI 0 "register_operand") ++ (xor:SI (match_operand:SI 1 "register_operand") ++ (match_operand:SI 2 "nonmemory_operand")))] ++ "" ++{ ++ if (register_operand (operands[2], SImode)) ++ emit_insn (gen_xorsi3_internal (operands[0], operands[1], ++ operands[2])); ++ else ++ { ++ rtx (*gen_op)(rtx, rtx, rtx); ++ if (TARGET_DENSITY ++ && CONST_INT_P (operands[2]) ++ && INTVAL (operands[2]) == -2147483648L) ++ gen_op = gen_addsi3; ++ else ++ gen_op = gen_xorsi3_internal; ++ emit_insn (gen_op (operands[0], operands[1], ++ force_reg (SImode, operands[2]))); ++ } ++ DONE; ++}) ++ ++(define_insn "xorsi3_internal" + [(set (match_operand:SI 0 "register_operand" "=a") + (xor:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" "r")))] +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0072-Revise-complex-hard-register-clobber-eliminat.patch b/patches/gcc10.2/gcc-xtensa-0072-Revise-complex-hard-register-clobber-eliminat.patch new file mode 100644 index 0000000..157876b --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0072-Revise-complex-hard-register-clobber-eliminat.patch @@ -0,0 +1,112 @@ +From 91b14e1f0de9a690b6c3b411d1c2706e05063977 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 20 Jan 2023 08:30:01 +0900 +Subject: [PATCH] xtensa: Revise complex hard register clobber elimination + +In the previously posted patch +"xtensa: Make complex hard register clobber elimination more robust and accurate", +the check code for insns that refer to the [DS]Cmode hard register before +it is overwritten after it is clobbered is incomplete. Fortunately such +insns are seldom emitted, so it didn't matter. + +This patch fixes that for the sake of completeness. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: + Fix exit from loops detecting references before overwriting in the + split pattern. +--- + gcc/config/xtensa/xtensa.md | 72 +++++++++++++++++++------------------ + 1 file changed, 37 insertions(+), 35 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index c6a299cc1..4d976ece5 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2978,45 +2978,47 @@ + { + auto_sbitmap bmp (FIRST_PSEUDO_REGISTER); + rtx_insn *insn; +- rtx reg = gen_rtx_REG (SImode, 0); ++ rtx reg = gen_rtx_REG (SImode, 0), dest; ++ unsigned int regno; ++ sbitmap_iterator iter; + bitmap_set_range (bmp, REGNO (operands[0]), REG_NREGS (operands[0])); + for (insn = next_nonnote_nondebug_insn_bb (curr_insn); + insn; insn = next_nonnote_nondebug_insn_bb (insn)) +- { +- sbitmap_iterator iter; +- unsigned int regno; +- if (NONJUMP_INSN_P (insn)) +- { +- EXECUTE_IF_SET_IN_BITMAP (bmp, 2, regno, iter) +- { +- set_regno_raw (reg, regno, REG_NREGS (reg)); +- if (reg_overlap_mentioned_p (reg, PATTERN (insn))) +- break; +- } +- if (GET_CODE (PATTERN (insn)) == SET) +- { +- rtx x = SET_DEST (PATTERN (insn)); +- if (REG_P (x) && HARD_REGISTER_P (x)) +- bitmap_clear_range (bmp, REGNO (x), REG_NREGS (x)); +- else if (SUBREG_P (x) && HARD_REGISTER_P (SUBREG_REG (x))) +- { +- struct subreg_info info; +- subreg_get_info (regno = REGNO (SUBREG_REG (x)), +- GET_MODE (SUBREG_REG (x)), +- SUBREG_BYTE (x), GET_MODE (x), &info); +- if (!info.representable_p) +- break; +- bitmap_clear_range (bmp, regno + info.offset, info.nregs); +- } +- } +- if (bitmap_empty_p (bmp)) +- goto FALLTHRU; +- } +- else if (CALL_P (insn)) ++ if (NONJUMP_INSN_P (insn)) ++ { + EXECUTE_IF_SET_IN_BITMAP (bmp, 2, regno, iter) +- if (call_used_or_fixed_reg_p (regno)) +- break; +- } ++ { ++ set_regno_raw (reg, regno, REG_NREGS (reg)); ++ if (reg_referenced_p (reg, PATTERN (insn))) ++ goto ABORT; ++ } ++ if (GET_CODE (PATTERN (insn)) == SET ++ || GET_CODE (PATTERN (insn)) == CLOBBER) ++ { ++ dest = SET_DEST (PATTERN (insn)); ++ if (REG_P (dest) && HARD_REGISTER_P (dest)) ++ bitmap_clear_range (bmp, REGNO (dest), REG_NREGS (dest)); ++ else if (SUBREG_P (dest) ++ && HARD_REGISTER_P (SUBREG_REG (dest))) ++ { ++ struct subreg_info info; ++ subreg_get_info (regno = REGNO (SUBREG_REG (dest)), ++ GET_MODE (SUBREG_REG (dest)), ++ SUBREG_BYTE (dest), GET_MODE (dest), ++ &info); ++ if (!info.representable_p) ++ break; ++ bitmap_clear_range (bmp, regno + info.offset, info.nregs); ++ } ++ } ++ if (bitmap_empty_p (bmp)) ++ goto FALLTHRU; ++ } ++ else if (CALL_P (insn)) ++ EXECUTE_IF_SET_IN_BITMAP (bmp, 2, regno, iter) ++ if (call_used_or_fixed_reg_p (regno)) ++ goto ABORT; ++ABORT: + FAIL; + FALLTHRU:; + }) +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0073-Enforce-return-address-saving-when-Og-is-spec.patch b/patches/gcc10.2/gcc-xtensa-0073-Enforce-return-address-saving-when-Og-is-spec.patch new file mode 100644 index 0000000..1a7f055 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0073-Enforce-return-address-saving-when-Og-is-spec.patch @@ -0,0 +1,39 @@ +From 7ef080074a2c422e20a8e4dae50f6f002c6c2928 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Mon, 30 Jan 2023 18:37:55 +0900 +Subject: [PATCH] xtensa: Enforce return address saving when -Og is + specified + +Leaf function often omits saving its return address to the stack slot, +and this feature often makes debugging very confusing, especially for +stack dump analysis. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_call_save_reg): Change to return + true if register A0 (return address register) when -Og is specified. +--- + gcc/config/xtensa/xtensa.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index db7ac3599..5c6ee7a8f 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -3258,8 +3258,11 @@ xtensa_call_save_reg (int regno) + return false; + + if (regno == A0_REG) +- return crtl->profile || !crtl->is_leaf || crtl->calls_eh_return || +- df_regs_ever_live_p (regno); ++ /* Ensure the return address to be saved to the stack slot in order ++ to assist stack dump analysis when -Og is specified. */ ++ return optimize_debug ++ || crtl->profile || !crtl->is_leaf || crtl->calls_eh_return ++ || df_regs_ever_live_p (regno); + + if (crtl->calls_eh_return && IN_RANGE (regno, 2, 3)) + return true; +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0074-fix-PR-target-108876.patch b/patches/gcc10.2/gcc-xtensa-0074-fix-PR-target-108876.patch new file mode 100644 index 0000000..9609f04 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0074-fix-PR-target-108876.patch @@ -0,0 +1,116 @@ +From 16cfee0871e5a6411b17adc2dc422b9760d17893 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Wed, 22 Feb 2023 22:08:21 -0800 +Subject: [PATCH 1/2] xtensa: fix PR target/108876 + +In commit b2ef02e8cbbaf95fee98be255f697f47193960ec, the sibling call +insn included (use (reg:SI A0_REG)) to fix the problem, which added +a USE chain unconditionally to the data flow of register A0 during +the sibling call. + +As a result, df_regs_ever_live_p (A0_REG) returns true, so even if +register A0 is not used outside of the sibling call insn, saves and +restores to stack slots are emitted in pro/epilogue, and finally +code size increases. +(This is why I never included (use A0) in sibling calls) + + /* example */ + extern int foo(int); + int test(int a) { + return foo(a * 3 + 1); + } + +;; before + test: + addi sp, sp, -16 ;; unneeded stack frame allocation (induced) + s32i.n a0, sp, 12 ;; unneeded saving of register A0 + l32i.n a0, sp, 12 ;; unneeded restoration of register A0 + addx2 a2, a2, a2 + addi.n a2, a2, 1 + addi sp, sp, 16 ;; unneeded stack frame freeing (induced) + j.l foo, a9 ;; sibling call (truly needs register A0) + +The essential cause is that we emit (use A0) *before* the insns that +does the stack pointer adjustment during epilogue expansion, so the +liveness of register A0 ends early, so register A0 is reused afterwards. + +This patch fixes the problem and avoids such regression by doing the +emit of (use A0) in the sibling call epilogue expansion at the end. + +;; after +test: + addx2 a2, a2, a2 + addi.n a2, a2, 1 + j.l foo, a9 + +>From RTL-pass "315r.rnreg" by +"gfortran -O3 -funroll-loops -mabi=call0 -S -da gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90": + + ;; Function selector_init (__selectors_MOD_selector_init, funcdef_no=2, decl_uid=987, cgraph_uid=3, symbol_order=4) + ... + (insn 3807 3806 3808 121 (set (reg:SI 15 a15) + (mem/c:SI (plus:SI (reg/f:SI 1 sp) + (const_int 268 [0x10c])) [31 S4 A32])) "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 53 {movsi_internal} + (nil)) + (insn 3808 3807 3809 121 (set (reg:SI 7 a7) + (const_int 288 [0x120])) "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 53 {movsi_internal} + (nil)) + (insn 3809 3808 3810 121 (set (reg/f:SI 1 sp) + (plus:SI (reg/f:SI 1 sp) + (reg:SI 7 a7))) "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 1 {addsi3} + (expr_list:REG_DEAD (reg:SI 9 a9) + (nil))) + (insn 3810 3809 721 121 (use (reg:SI 0 a0)) "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 -1 + (expr_list:REG_DEAD (reg:SI 0 a0) + (nil))) + (call_insn/j 721 3810 722 121 (call (mem:SI (symbol_ref:SI ("free") [flags 0x41] ) [0 __builtin_free S4 A32]) + (const_int 0 [0])) "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 discrim 1 106 {sibcall_internal} + (expr_list:REG_DEAD (reg:SI 2 a2) + (expr_list:REG_CALL_DECL (symbol_ref:SI ("free") [flags 0x41] ) + (expr_list:REG_EH_REGION (const_int 0 [0]) + (nil)))) + (expr_list:SI (use (reg:SI 2 a2)) + (nil))) + +(IMHO the "rnreg" pass doesn't take REG_ALLOC_ORDER into account; +it just seems to allocate registers in fixed_regs index order, +which may have hurt register A0 that became allocatable in the recent +patch) + +gcc/ChangeLog: + PR target/108876 + + * config/xtensa/xtensa.c (xtensa_expand_epilogue): + Emit (use (reg:SI A0_REG)) at the end in the sibling call + (i.e. the same place as (return) in the normal call). +--- + gcc/config/xtensa/xtensa.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 5c6ee7a8f..3426494f5 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -3583,8 +3583,6 @@ xtensa_expand_epilogue (bool sibcall_p) + gen_frame_mem (SImode, x)); + } + } +- if (sibcall_p) +- emit_use (gen_rtx_REG (SImode, A0_REG)); + + if (cfun->machine->current_frame_size > 0) + { +@@ -3610,7 +3608,9 @@ xtensa_expand_epilogue (bool sibcall_p) + EH_RETURN_STACKADJ_RTX)); + } + cfun->machine->epilogue_done = true; +- if (!sibcall_p) ++ if (sibcall_p) ++ emit_use (gen_rtx_REG (SImode, A0_REG)); ++ else + emit_jump_insn (gen_return ()); + } + +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0075-Fix-missing-mode-warnings-in-machine-descript.patch b/patches/gcc10.2/gcc-xtensa-0075-Fix-missing-mode-warnings-in-machine-descript.patch new file mode 100644 index 0000000..0099b1c --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0075-Fix-missing-mode-warnings-in-machine-descript.patch @@ -0,0 +1,54 @@ +From bed35098a6d3d0032716f23e5c631e7aa183f227 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 23 Feb 2023 12:42:32 +0900 +Subject: [PATCH 2/2] xtensa: Fix missing mode warnings in machine description + +gcc/ChangeLog: + + * config/xtensa/xtensa.md + (zero_cost_loop_start, zero_cost_loop_end, loop_end): + Add missing "SI:" to PLUS RTXes. +--- + gcc/config/xtensa/xtensa.md | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 4d976ece5..9c017dd19 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2033,8 +2033,8 @@ + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_operand:SI 0 "register_operand" "=a") +- (plus (match_dup 0) +- (const_int -1))) ++ (plus:SI (match_dup 0) ++ (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_START)] + "TARGET_LOOPS && optimize" + "loop\t%0, %l1_LEND" +@@ -2049,8 +2049,8 @@ + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_operand:SI 0 "nonimmediate_operand" "=a,m") +- (plus (match_dup 0) +- (const_int -1))) ++ (plus:SI (match_dup 0) ++ (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END) + (clobber (match_scratch:SI 3 "=X,&r"))] + "TARGET_LOOPS && optimize" +@@ -2066,8 +2066,8 @@ + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_operand:SI 0 "register_operand" "=a") +- (plus (match_dup 0) +- (const_int -1))) ++ (plus:SI (match_dup 0) ++ (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END)] + "TARGET_LOOPS && optimize" + { +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0076-Eliminate-the-use-of-callee-saved-register-th.patch b/patches/gcc10.2/gcc-xtensa-0076-Eliminate-the-use-of-callee-saved-register-th.patch new file mode 100644 index 0000000..99109d5 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0076-Eliminate-the-use-of-callee-saved-register-th.patch @@ -0,0 +1,303 @@ +From 19e3ee5197e1de1ec6228cb54ff4ad8f27af5138 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 27 Jan 2023 12:17:33 +0900 +Subject: [PATCH] xtensa: Eliminate the use of callee-saved register that + saves and restores only once + +In the case of the CALL0 ABI, values that must be retained before and +after function calls are placed in the callee-saved registers (A12 +through A15) and referenced later. However, it is often the case that +the save and the reference are each only once and a simple register- +register move (with two exceptions; i. the register saved to/restored +from is the stack pointer, ii. the function needs an additional stack +pointer adjustment to grow the stack). + +e.g. in the following example, if there are no other occurrences of +register A14: + +;; before + ; prologue { + ... + s32i.n a14, sp, 16 + ... ;; no frame pointer needed + ;; no additional stack growth + ; } prologue + ... + mov.n a14, a6 ;; A6 is not SP + ... + call0 foo + ... + mov.n a8, a14 ;; A8 is not SP + ... + ; epilogue { + ... + l32i.n a14, sp, 16 + ... + ; } epilogue + +It can be possible like this: + +;; after + ; prologue { + ... + (no save needed) + ... + ; } prologue + ... + s32i.n a6, sp, 16 ;; replaced with A14's slot + ... + call0 foo + ... + l32i.n a8, sp, 16 ;; through SP + ... + ; epilogue { + ... + (no restoration needed) + ... + ; } epilogue + +This patch adds the abovementioned logic to the function prologue/epilogue +RTL expander code. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (machine_function): Add new member + 'eliminated_callee_saved_bmp'. + (xtensa_can_eliminate_callee_saved_reg_p): New function to + determine whether the register can be eliminated or not. + (xtensa_expand_prologue): Add invoking the above function and + elimination the use of callee-saved register by using its stack + slot through the stack pointer (or the frame pointer if needed) + directly. + (xtensa_expand_prologue): Modify to not emit register restoration + insn from its stack slot if the register is already eliminated. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/elim_callee_saved.c: New. +--- + gcc/config/xtensa/xtensa.c | 132 ++++++++++++++---- + .../gcc.target/xtensa/elim_callee_saved.c | 38 +++++ + 2 files changed, 145 insertions(+), 25 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 3426494f5..6aea625d9 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -105,6 +105,7 @@ struct GTY(()) machine_function + bool epilogue_done; + bool inhibit_logues_a1_adjusts; + rtx last_logues_a9_content; ++ HOST_WIDE_INT eliminated_callee_saved_bmp; + }; + + static void xtensa_option_override (void); +@@ -3381,6 +3382,66 @@ xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int flags) + cfun->machine->last_logues_a9_content = GEN_INT (offset); + } + ++static bool ++xtensa_can_eliminate_callee_saved_reg_p (unsigned int regno, ++ rtx_insn **p_insnS, ++ rtx_insn **p_insnR) ++{ ++ df_ref ref; ++ rtx_insn *insn, *insnS = NULL, *insnR = NULL; ++ rtx pattern; ++ ++ if (!optimize || !df || call_used_or_fixed_reg_p (regno)) ++ return false; ++ ++ for (ref = DF_REG_DEF_CHAIN (regno); ++ ref; ref = DF_REF_NEXT_REG (ref)) ++ if (DF_REF_CLASS (ref) != DF_REF_REGULAR ++ || DEBUG_INSN_P (insn = DF_REF_INSN (ref))) ++ continue; ++ else if (GET_CODE (pattern = PATTERN (insn)) == SET ++ && REG_P (SET_DEST (pattern)) ++ && REGNO (SET_DEST (pattern)) == regno ++ && REG_NREGS (SET_DEST (pattern)) == 1 ++ && REG_P (SET_SRC (pattern)) ++ && REGNO (SET_SRC (pattern)) != A1_REG) ++ { ++ if (insnS) ++ return false; ++ insnS = insn; ++ continue; ++ } ++ else ++ return false; ++ ++ for (ref = DF_REG_USE_CHAIN (regno); ++ ref; ref = DF_REF_NEXT_REG (ref)) ++ if (DF_REF_CLASS (ref) != DF_REF_REGULAR ++ || DEBUG_INSN_P (insn = DF_REF_INSN (ref))) ++ continue; ++ else if (GET_CODE (pattern = PATTERN (insn)) == SET ++ && REG_P (SET_SRC (pattern)) ++ && REGNO (SET_SRC (pattern)) == regno ++ && REG_NREGS (SET_SRC (pattern)) == 1 ++ && REG_P (SET_DEST (pattern)) ++ && REGNO (SET_DEST (pattern)) != A1_REG) ++ { ++ if (insnR) ++ return false; ++ insnR = insn; ++ continue; ++ } ++ else ++ return false; ++ ++ if (!insnS || !insnR) ++ return false; ++ ++ *p_insnS = insnS, *p_insnR = insnR; ++ ++ return true; ++} ++ + /* minimum frame = reg save area (4 words) plus static chain (1 word) + and the total number of words must be a multiple of 128 bits. */ + #define MIN_FRAME_SIZE (8 * UNITS_PER_WORD) +@@ -3420,6 +3481,7 @@ xtensa_expand_prologue (void) + df_ref ref; + bool stack_pointer_needed = frame_pointer_needed + || crtl->calls_eh_return; ++ bool large_stack_needed; + + /* Check if the function body really needs the stack pointer. */ + if (!stack_pointer_needed && df) +@@ -3468,23 +3530,41 @@ xtensa_expand_prologue (void) + } + } + ++ large_stack_needed = total_size > 1024 ++ || (!callee_save_size && total_size > 128); + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) +- { +- if (xtensa_call_save_reg(regno)) +- { +- rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); +- rtx mem = gen_frame_mem (SImode, x); +- rtx reg = gen_rtx_REG (SImode, regno); ++ if (xtensa_call_save_reg(regno)) ++ { ++ rtx x = gen_rtx_PLUS (Pmode, ++ stack_pointer_rtx, GEN_INT (offset)); ++ rtx mem = gen_frame_mem (SImode, x); ++ rtx_insn *insnS, *insnR; ++ ++ if (!large_stack_needed ++ && xtensa_can_eliminate_callee_saved_reg_p (regno, ++ &insnS, &insnR)) ++ { ++ if (frame_pointer_needed) ++ mem = replace_rtx (mem, stack_pointer_rtx, ++ hard_frame_pointer_rtx); ++ SET_DEST (PATTERN (insnS)) = mem; ++ df_insn_rescan (insnS); ++ SET_SRC (PATTERN (insnR)) = copy_rtx (mem); ++ df_insn_rescan (insnR); ++ cfun->machine->eliminated_callee_saved_bmp |= 1 << regno; ++ } ++ else ++ { ++ rtx reg = gen_rtx_REG (SImode, regno); + +- offset -= UNITS_PER_WORD; +- insn = emit_move_insn (mem, reg); +- RTX_FRAME_RELATED_P (insn) = 1; +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, +- gen_rtx_SET (mem, reg)); +- } +- } +- if (total_size > 1024 +- || (!callee_save_size && total_size > 128)) ++ insn = emit_move_insn (mem, reg); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, ++ gen_rtx_SET (mem, reg)); ++ } ++ offset -= UNITS_PER_WORD; ++ } ++ if (large_stack_needed) + xtensa_emit_adjust_stack_ptr (callee_save_size - total_size, + ADJUST_SP_NEED_NOTE); + } +@@ -3573,16 +3653,18 @@ xtensa_expand_epilogue (bool sibcall_p) + emit_insn (gen_blockage ()); + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) +- { +- if (xtensa_call_save_reg(regno)) +- { +- rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); +- +- offset -= UNITS_PER_WORD; +- emit_move_insn (gen_rtx_REG (SImode, regno), +- gen_frame_mem (SImode, x)); +- } +- } ++ if (xtensa_call_save_reg(regno)) ++ { ++ if (! (cfun->machine->eliminated_callee_saved_bmp ++ & (1 << regno))) ++ { ++ rtx x = gen_rtx_PLUS (Pmode, ++ stack_pointer_rtx, GEN_INT (offset)); ++ emit_move_insn (gen_rtx_REG (SImode, regno), ++ gen_frame_mem (SImode, x)); ++ } ++ offset -= UNITS_PER_WORD; ++ } + + if (cfun->machine->current_frame_size > 0) + { +diff --git a/gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c b/gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c +new file mode 100644 +index 000000000..cd3d6b9f2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c +@@ -0,0 +1,38 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mabi=call0" } */ ++ ++extern void foo(void); ++ ++/* eliminated one register (the reservoir of variable 'a') by its stack slot through the stack pointer. */ ++int test0(int a) { ++ int array[252]; /* the maximum bound of non-large stack. */ ++ foo(); ++ asm volatile("" : : "m"(array)); ++ return a; ++} ++ ++/* cannot eliminate if large stack is needed, because the offset from TOS cannot fit into single L32I/S32I instruction. */ ++int test1(int a) { ++ int array[10000]; /* requires large stack. */ ++ foo(); ++ asm volatile("" : : "m"(array)); ++ return a; ++} ++ ++/* register A15 is the reservoir of the stack pointer and cannot be eliminated if the frame pointer is needed. ++ other registers still can be, but through the frame pointer rather the stack pointer. */ ++int test2(int a) { ++ int* p = __builtin_alloca(16); ++ foo(); ++ asm volatile("" : : "r"(p)); ++ return a; ++} ++ ++/* in -O0 the composite hard registers may still remain unsplitted at pro_and_epilogue and must be excluded. */ ++extern double bar(void); ++int __attribute__((optimize(0))) test3(int a) { ++ return bar() + a; ++} ++ ++/* { dg-final { scan-assembler-times "mov\t|mov.n\t" 21 } } */ ++/* { dg-final { scan-assembler-times "a15, 8" 2 } } */ +-- +2.30.2 + diff --git a/patches/gcc10.2/gcc-xtensa-0077-Eliminate-unnecessary-general-purpose-reg-reg.patch b/patches/gcc10.2/gcc-xtensa-0077-Eliminate-unnecessary-general-purpose-reg-reg.patch new file mode 100644 index 0000000..f42c958 --- /dev/null +++ b/patches/gcc10.2/gcc-xtensa-0077-Eliminate-unnecessary-general-purpose-reg-reg.patch @@ -0,0 +1,159 @@ +From 33aef933318545ff759442b391d0a53aae43251e Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sat, 18 Feb 2023 13:43:34 +0900 +Subject: [PATCH] xtensa: Eliminate unnecessary general-purpose reg-reg + moves + +Register-register move instructions that can be easily seen as +unnecessary by the human eye may remain in the compiled result. +For example: + +/* example */ +double test(double a, double b) { + return __builtin_copysign(a, b); +} + +test: + add.n a3, a3, a3 + extui a5, a5, 31, 1 + ssai 1 + ;; Be in the same BB + src a7, a5, a3 ;; Replacing the destination doesn't + ;; violate any constraints of the + ;; operands + ;; No CALL insns in this span + ;; Both A3 and A7 are irrelevant to + ;; insns in this span + mov.n a3, a7 ;; An unnecessary reg-reg move + ;; A7 is not used after this + ret.n + +The last two instructions above, excluding the return instruction, +could be done like this: + + src a3, a5, a3 + +This symptom often occurs when handling DI/DFmode values with SImode +instructions. This patch solves the above problem using peephole2 +pattern. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: New peephole2 pattern that eliminates + the occurrence of general-purpose register used only once and for + transferring intermediate value. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/elim_GP_regmove_0.c: New test. + * gcc.target/xtensa/elim_GP_regmove_1.c: New test. +--- + gcc/config/xtensa/xtensa.md | 46 +++++++++++++++++++ + .../gcc.target/xtensa/elim_GP_regmove_0.c | 23 ++++++++++ + .../gcc.target/xtensa/elim_GP_regmove_1.c | 10 ++++ + 3 files changed, 79 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_0.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_1.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 9c017dd19..628b27b32 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -3055,3 +3055,49 @@ FALLTHRU:; + operands[1] = GEN_INT (imm0); + operands[2] = GEN_INT (imm1); + }) ++ ++(define_peephole2 ++ [(set (match_operand 0 "register_operand") ++ (match_operand 1 "register_operand"))] ++ "REG_NREGS (operands[0]) == 1 && GP_REG_P (REGNO (operands[0])) ++ && REG_NREGS (operands[1]) == 1 && GP_REG_P (REGNO (operands[1])) ++ && peep2_reg_dead_p (1, operands[1])" ++ [(const_int 0)] ++{ ++ basic_block bb = BLOCK_FOR_INSN (curr_insn); ++ rtx_insn *head = BB_HEAD (bb), *insn; ++ rtx dest = operands[0], src = operands[1], pattern, t_dest, dest_orig; ++ for (insn = PREV_INSN (curr_insn); ++ insn && insn != head; ++ insn = PREV_INSN (insn)) ++ if (CALL_P (insn)) ++ break; ++ else if (INSN_P (insn)) ++ { ++ if (GET_CODE (pattern = PATTERN (insn)) == SET ++ && REG_P (t_dest = SET_DEST (pattern)) ++ && REG_NREGS (t_dest) == 1 ++ && REGNO (t_dest) == REGNO (src)) ++ { ++ dest_orig = SET_DEST (pattern); ++ SET_DEST (pattern) = gen_rtx_REG (GET_MODE (t_dest), ++ REGNO (dest)); ++ extract_insn (insn); ++ if (!constrain_operands (true, get_enabled_alternatives (insn))) ++ { ++ SET_DEST (pattern) = dest_orig; ++ goto ABORT; ++ } ++ df_insn_rescan (insn); ++ goto FALLTHRU; ++ } ++ if (reg_overlap_mentioned_p (dest, pattern) ++ || reg_overlap_mentioned_p (src, pattern) ++ || set_of (dest, insn) ++ || set_of (src, insn)) ++ break; ++ } ++ABORT: ++ FAIL; ++FALLTHRU:; ++}) +diff --git a/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_0.c b/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_0.c +new file mode 100644 +index 000000000..5c195c357 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_0.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fpeephole2" } */ ++ ++/* can be processed */ ++double test0(double a, double b) { ++ return __builtin_copysign(a, b); ++} ++ ++/* cannot be processed: due to violate '0' constraint of the 2nd source operand. */ ++int test1(int a, int b) { ++ int c; ++ asm volatile ("" : "=a"(c) : "r"(a), "0"(b)); ++ return c; ++} ++ ++/* cannot be processed: due to violate '&' constraint of the destination operand. */ ++int test2(int a) { ++ int b; ++ asm volatile ("" : "=&a"(b) : "r"(a)); ++ return b; ++} ++ ++/* { dg-final { scan-assembler-times "mov\t|mov.n\t" 2 } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_1.c b/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_1.c +new file mode 100644 +index 000000000..a13ef8188 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_1.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fpeephole2 -mabi=windowed" } */ ++ ++/* cannot be processed: due to violate 'a' constraint of the destination operand of the stack adjustment instruction. */ ++void test(void) { ++ int buffer[8192]; ++ asm volatile ("" : : "m"(buffer)); ++} ++ ++/* { dg-final { scan-assembler-times "movsp" 1 } } */ +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-Improve-initialization-of-objects-when-the-initializ.patch b/patches/gcc10.3/gcc-Improve-initialization-of-objects-when-the-initializ.patch new file mode 100644 index 0000000..00fdb45 --- /dev/null +++ b/patches/gcc10.3/gcc-Improve-initialization-of-objects-when-the-initializ.patch @@ -0,0 +1,39 @@ +From a2cde0c6443c440c2a2b72b5eea060229a0cff57 Mon Sep 17 00:00:00 2001 +From: Jeff Law +Date: Sat, 9 Jul 2022 11:11:00 -0400 +Subject: [PATCH] [RFA] Improve initialization of objects when the initializer + +gcc/ + + * expr.c (store_expr): Identify trailing NULs in a STRING_CST + initializer and use clear_storage rather than copying the + NULs to the destination array. +--- + gcc/expr.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/gcc/expr.c b/gcc/expr.c +index 991b26f33..6ff393462 100644 +--- a/gcc/expr.c ++++ b/gcc/expr.c +@@ -5723,6 +5723,17 @@ store_expr (tree exp, rtx target, int call_param_p, + } + + str_copy_len = TREE_STRING_LENGTH (str); ++ ++ /* Trailing NUL bytes in EXP will be handled by the call to ++ clear_storage, which is more efficient than copying them from ++ the STRING_CST, so trim those from STR_COPY_LEN. */ ++ while (str_copy_len) ++ { ++ if (TREE_STRING_POINTER (str)[str_copy_len - 1]) ++ break; ++ str_copy_len--; ++ } ++ + if ((STORE_MAX_PIECES & (STORE_MAX_PIECES - 1)) == 0) + { + str_copy_len += STORE_MAX_PIECES - 1; +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0006-Fix-non-robust-split-condition-in-define_insn.patch b/patches/gcc10.3/gcc-xtensa-0006-Fix-non-robust-split-condition-in-define_insn.patch new file mode 100644 index 0000000..4c5418f --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0006-Fix-non-robust-split-condition-in-define_insn.patch @@ -0,0 +1,44 @@ +From 2065a3fccb11e28ebcc42aa46c52a40b0fae9bea Mon Sep 17 00:00:00 2001 +From: Kewen Lin +Date: Sun, 21 Nov 2021 20:18:31 -0600 +Subject: [PATCH 01/31] xtensa: Fix non-robust split condition in + define_insn_and_split + +This patch is to fix some non-robust split conditions in some +define_insn_and_splits, to make each of them applied on top of +the corresponding condition for define_insn part, otherwise the +splitting could perform unexpectedly. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (movdi_internal, movdf_internal): Fix split + condition. +--- + gcc/config/xtensa/xtensa.md | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2a8e59ee9..123916957 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -782,7 +782,7 @@ + "register_operand (operands[0], DImode) + || register_operand (operands[1], DImode)" + "#" +- "reload_completed" ++ "&& reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))] + { +@@ -1058,7 +1058,7 @@ + "register_operand (operands[0], DFmode) + || register_operand (operands[1], DFmode)" + "#" +- "reload_completed" ++ "&& reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))] + { +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch b/patches/gcc10.3/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch deleted file mode 100644 index 336b961..0000000 --- a/patches/gcc10.3/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch +++ /dev/null @@ -1,29 +0,0 @@ -From f1568d0597ffd3027eebefc2cf31646ab5d5ca19 Mon Sep 17 00:00:00 2001 -From: Takayuki 'January June' Suwa -Date: Sun, 19 Dec 2021 22:44:03 +0900 -Subject: [PATCH] gcc: xtensa: make trying to replace 'l32r' with 'movi' + - 'slli' regardless of optimizing for size or not, because 'l32r' is much - slower than the latter on ESP8266 - ---- - gcc/config/xtensa/xtensa.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c -index 37c6ac1fd..6cd9d5528 100644 ---- a/gcc/config/xtensa/xtensa.c -+++ b/gcc/config/xtensa/xtensa.c -@@ -1074,8 +1074,8 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) - { - /* Try to emit MOVI + SLLI sequence, that is smaller - than L32R + literal. */ -- if (optimize_size && mode == SImode && CONST_INT_P (src) -- && register_operand (dst, mode)) -+ if (optimize >= 1 && ! optimize_debug && mode == SImode -+ && CONST_INT_P (src) && register_operand (dst, mode)) - { - HOST_WIDE_INT srcval = INTVAL (src); - int shift = ctz_hwi (srcval); --- -2.20.1 - diff --git a/patches/gcc10.3/gcc-xtensa-0007-Backport-patches-from-upstream-master.patch b/patches/gcc10.3/gcc-xtensa-0007-Backport-patches-from-upstream-master.patch deleted file mode 100644 index 9f8e00b..0000000 --- a/patches/gcc10.3/gcc-xtensa-0007-Backport-patches-from-upstream-master.patch +++ /dev/null @@ -1,3186 +0,0 @@ -From 989fc2c516206d7cf70177a416815f91998e2131 Mon Sep 17 00:00:00 2001 -From: Takayuki 'January June' Suwa -Date: Fri, 27 May 2022 21:34:37 +0900 -Subject: [PATCH 1/3] xtensa: Backport patches from upstream/master - -2b5b8610e985e23a0c2e0272339ab074a750e240 "xtensa: Fix non-robust split condition in define_insn_and_split" -7e5baa7e6f4caced6bdaef6d866d19e7656d8a16 "xtensa: fix -Wformat-diag warnings." -d543bac1631700f0da30d5ca555296f4938a82c6 "xtensa: Rename deprecated extv/extzv insn patterns to extvsi/extzvsi" -112447f8564c0307c5da99a4094a3a99f204239f "xtensa: Reflect the 32-bit Integer Divide Option" -b753405a5f0d45eea97f4cc7df2c2089401b08bf "xtensa: Simplify EXTUI instruction maskimm validations" -9b251fe2e39a49c0d3ecd34cf8c5d55544efd159 "xtensa: Make use of IN_RANGE macro where appropriate" -3397563ad6c8fc5d9675faf507e52dd2ed284202 "xtensa: Fix instruction counting regarding block move expansion" -6454b4a8f5d90dd355c3c7e31a592a439223b645 "xtensa: Add setmemsi insn pattern" -9aad2b22436d5346fa224e5c14439dcef36cf3dd "xtensa: Improve bswap[sd]i2 insn patterns" -e94c6dbfb57a862dd8a8685eabc4886ad1aaea25 "xtensa: fix PR target/105879" -2fcc69d8ce4eddf6dea878a5383254d366e1bb14 "xtensa: Implement bswaphi2 insn pattern" -9777d446e2148ef9a6e9f35db3f4eab99ee8812c "xtensa: Make one_cmplsi2 optimizer-friendly" -e44e7face13f38f9b228e2619786ba0add9ef77b "xtensa: Optimize '(~x & y)' to '((x & y) ^ y)'" -29dc90a580bf45f503ed89eb1dc63b5676db776b "xtensa: Add clrsbsi2 insn pattern" -9489a1ab05ad1bda7126da5513f08282da3e531d "xtensa: Tweak some widen multiplications" -fddf0e1057fe24eff0d894fbc2959b4086464a96 "xtensa: Consider the Loop Option when setmemsi is expanded to small loop" -ccd02e734e0f1742629403b46e5b1c650b00fd65 "xtensa: Improve instruction cost estimation and suggestion" -cd02f15f1aecc45b2c2feae16840503549508619 "xtensa: Improve constant synthesis for both integer and floating-point" -1c68ec1f8ab531fba56cccf549ffe592bf622821 "xtensa: Improve shift operations more" -e1b193c1cce3a975a9ed60dd0f30182fe0255d7c "xtensa: Simplify conditional branch/move insn patterns" -70ce04ca353bb0cda8321b91a77c2477e26d339b "xtensa: Make use of BALL/BNALL instructions" -077438933cf94f00cc5edf974338c11ba4bf7a39 "xtensa: Optimize bitwise AND operation with some specific forms of constants" -96518f714e3fab53a966a05b8d48011e27c1a718 "xtensa: Document new -mextra-l32r-costs= Xtensa-specific option" -43b0c56fda4bc990e8ee8d6a0b376de7b663bb06 "xtensa: Add support for sibling call optimization" -c95e307e3a978166cd5d6817ec9d8293825ff3fb "xtensa: Add some dedicated patterns that correspond to GIMPLE canonicalizations" -cfad4856fa46abc878934a9433d0bfc2482ccf00 "xtensa: Eliminate unwanted reg-reg moves during DFmode input reloads" -ce3867d414bd7d9e5b6fb2a51b1fb3d9e9e1eae9 "xtensa: Eliminate [DS]Cmode hard register clobber that is immediately followed by whole overwrite the register" -479b6f449ee999501ad6eff0b7db8d0cd5b2d28d "xtensa: Defer storing integer constants into litpool until reload" ---- - gcc/config/xtensa/constraints.md | 10 +- - gcc/config/xtensa/predicates.md | 41 +- - gcc/config/xtensa/xtensa-protos.h | 11 +- - gcc/config/xtensa/xtensa.c | 733 +++++++++--- - gcc/config/xtensa/xtensa.h | 7 +- - gcc/config/xtensa/xtensa.md | 1024 +++++++++++++---- - gcc/config/xtensa/xtensa.opt | 6 +- - gcc/doc/invoke.texi | 11 +- - gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c | 33 + - gcc/testsuite/gcc.target/xtensa/bswap-O1.c | 37 + - gcc/testsuite/gcc.target/xtensa/bswap-O2.c | 37 + - gcc/testsuite/gcc.target/xtensa/bswap-Os.c | 37 + - .../gcc.target/xtensa/check_zero_byte.c | 9 + - .../gcc.target/xtensa/constsynth_2insns.c | 44 + - .../gcc.target/xtensa/constsynth_3insns.c | 24 + - .../gcc.target/xtensa/constsynth_double.c | 11 + - .../gcc.target/xtensa/funnel_shifter.c | 17 + - .../gcc.target/xtensa/one_cmpl_abs.c | 9 + - gcc/testsuite/gcc.target/xtensa/sibcalls.c | 20 + - libgcc/config/xtensa/lib1funcs.S | 23 + - libgcc/config/xtensa/t-xtensa | 2 +- - 21 files changed, 1796 insertions(+), 350 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O1.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O2.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-Os.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/check_zero_byte.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_double.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/funnel_shifter.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c - create mode 100644 gcc/testsuite/gcc.target/xtensa/sibcalls.c - -diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md -index 2062c8816..13b3daafc 100644 ---- a/gcc/config/xtensa/constraints.md -+++ b/gcc/config/xtensa/constraints.md -@@ -92,7 +92,7 @@ - "An integer constant in the range @minus{}32-95 for use with MOVI.N - instructions." - (and (match_code "const_int") -- (match_test "ival >= -32 && ival <= 95"))) -+ (match_test "IN_RANGE (ival, -32, 95)"))) - - (define_constraint "N" - "An unsigned 8-bit integer constant shifted left by 8 bits for use -@@ -103,7 +103,7 @@ - (define_constraint "O" - "An integer constant that can be used in ADDI.N instructions." - (and (match_code "const_int") -- (match_test "ival == -1 || (ival >= 1 && ival <= 15)"))) -+ (match_test "ival == -1 || IN_RANGE (ival, 1, 15)"))) - - (define_constraint "P" - "An integer constant that can be used as a mask value in an EXTUI -@@ -113,8 +113,10 @@ - - (define_constraint "Y" - "A constant that can be used in relaxed MOVI instructions." -- (and (match_code "const_int,const_double,const,symbol_ref,label_ref") -- (match_test "TARGET_AUTO_LITPOOLS"))) -+ (ior (and (match_code "const_int,const_double,const,symbol_ref,label_ref") -+ (match_test "TARGET_AUTO_LITPOOLS")) -+ (and (match_code "const_int") -+ (match_test "can_create_pseudo_p ()")))) - - ;; Memory constraints. Do not use define_memory_constraint here. Doing so - ;; causes reload to force some constants into the constant pool, but since -diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md -index eb52b05aa..633cc6264 100644 ---- a/gcc/config/xtensa/predicates.md -+++ b/gcc/config/xtensa/predicates.md -@@ -25,8 +25,7 @@ - - (define_predicate "addsubx_operand" - (and (match_code "const_int") -- (match_test "INTVAL (op) >= 1 -- && INTVAL (op) <= 3"))) -+ (match_test "IN_RANGE (INTVAL (op), 1, 3)"))) - - (define_predicate "arith_operand" - (ior (and (match_code "const_int") -@@ -53,9 +52,19 @@ - (match_test "xtensa_mask_immediate (INTVAL (op))")) - (match_operand 0 "register_operand"))) - -+(define_predicate "shifted_mask_operand" -+ (match_code "const_int") -+{ -+ HOST_WIDE_INT mask = INTVAL (op); -+ int shift = ctz_hwi (mask); -+ -+ return IN_RANGE (shift, 1, 31) -+ && xtensa_mask_immediate ((uint32_t)mask >> shift); -+}) -+ - (define_predicate "extui_fldsz_operand" - (and (match_code "const_int") -- (match_test "xtensa_mask_immediate ((1 << INTVAL (op)) - 1)"))) -+ (match_test "IN_RANGE (INTVAL (op), 1, 16)"))) - - (define_predicate "sext_operand" - (if_then_else (match_test "TARGET_SEXT") -@@ -64,7 +73,7 @@ - - (define_predicate "sext_fldsz_operand" - (and (match_code "const_int") -- (match_test "INTVAL (op) >= 8 && INTVAL (op) <= 23"))) -+ (match_test "IN_RANGE (INTVAL (op), 8, 23)"))) - - (define_predicate "lsbitnum_operand" - (and (match_code "const_int") -@@ -138,8 +147,9 @@ - (match_test "!constantpool_mem_p (op) - || GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0"))) - (ior (and (match_code "const_int") -- (match_test "GET_MODE_CLASS (mode) == MODE_INT -- && xtensa_simm12b (INTVAL (op))")) -+ (match_test "(GET_MODE_CLASS (mode) == MODE_INT -+ && xtensa_simm12b (INTVAL (op))) -+ || can_create_pseudo_p ()")) - (and (match_code "const_int,const_double,const,symbol_ref,label_ref") - (match_test "(TARGET_CONST16 || TARGET_AUTO_LITPOOLS) - && CONSTANT_P (op) -@@ -156,6 +166,19 @@ - (and (match_code "const_int") - (match_test "xtensa_mem_offset (INTVAL (op), SFmode)"))) - -+(define_predicate "reload_operand" -+ (match_code "mem") -+{ -+ const_rtx addr = XEXP (op, 0); -+ if (REG_P (addr)) -+ return REGNO (addr) == A1_REG; -+ if (GET_CODE (addr) == PLUS) -+ return REG_P (XEXP (addr, 0)) -+ && REGNO (XEXP (addr, 0)) == A1_REG -+ && CONST_INT_P (XEXP (addr, 1)); -+ return false; -+}) -+ - (define_predicate "branch_operator" - (match_code "eq,ne,lt,ge")) - -@@ -165,9 +188,15 @@ - (define_predicate "boolean_operator" - (match_code "eq,ne")) - -+(define_predicate "logical_shift_operator" -+ (match_code "ashift,lshiftrt")) -+ - (define_predicate "xtensa_cstoresi_operator" - (match_code "eq,ne,gt,ge,lt,le")) - -+(define_predicate "xtensa_shift_per_byte_operator" -+ (match_code "ashift,ashiftrt,lshiftrt")) -+ - (define_predicate "tls_symbol_operand" - (and (match_code "symbol_ref") - (match_test "SYMBOL_REF_TLS_MODEL (op) != 0"))) -diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h -index 18d803581..75ed3bfb0 100644 ---- a/gcc/config/xtensa/xtensa-protos.h -+++ b/gcc/config/xtensa/xtensa-protos.h -@@ -41,18 +41,23 @@ extern void xtensa_expand_conditional_branch (rtx *, machine_mode); - extern int xtensa_expand_conditional_move (rtx *, int); - extern int xtensa_expand_scc (rtx *, machine_mode); - extern int xtensa_expand_block_move (rtx *); -+extern int xtensa_expand_block_set_unrolled_loop (rtx *); -+extern int xtensa_expand_block_set_small_loop (rtx *); - extern void xtensa_split_operand_pair (rtx *, machine_mode); -+extern int xtensa_constantsynth (rtx, HOST_WIDE_INT); - extern int xtensa_emit_move_sequence (rtx *, machine_mode); - extern rtx xtensa_copy_incoming_a7 (rtx); - extern void xtensa_expand_nonlocal_goto (rtx *); - extern void xtensa_expand_compare_and_swap (rtx, rtx, rtx, rtx); - extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool); - extern void xtensa_emit_loop_end (rtx_insn *, rtx *); --extern char *xtensa_emit_branch (bool, bool, rtx *); --extern char *xtensa_emit_bit_branch (bool, bool, rtx *); -+extern char *xtensa_emit_branch (bool, rtx *); - extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); -+extern void xtensa_prepare_expand_call (int, rtx *); - extern char *xtensa_emit_call (int, rtx *); -+extern char *xtensa_emit_sibcall (int, rtx *); - extern bool xtensa_tls_referenced_p (rtx); -+extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); - - #ifdef TREE_CODE - extern void init_cumulative_args (CUMULATIVE_ARGS *, int); -@@ -70,7 +75,7 @@ extern int xtensa_dbx_register_number (int); - extern long compute_frame_size (poly_int64); - extern bool xtensa_use_return_instruction_p (void); - extern void xtensa_expand_prologue (void); --extern void xtensa_expand_epilogue (void); -+extern void xtensa_expand_epilogue (bool); - extern void order_regs_for_local_alloc (void); - extern enum reg_class xtensa_regno_to_class (int regno); - extern HOST_WIDE_INT xtensa_initial_elimination_offset (int from, int to); -diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c -index 6cd9d5528..5b1aa9b23 100644 ---- a/gcc/config/xtensa/xtensa.c -+++ b/gcc/config/xtensa/xtensa.c -@@ -55,6 +55,7 @@ along with GCC; see the file COPYING3. If not see - #include "dumpfile.h" - #include "hw-doloop.h" - #include "rtl-iter.h" -+#include "insn-attr.h" - - /* This file should be included last. */ - #include "target-def.h" -@@ -117,7 +118,7 @@ const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] = - - static void xtensa_option_override (void); - static enum internal_test map_test_to_internal_test (enum rtx_code); --static rtx gen_int_relational (enum rtx_code, rtx, rtx, int *); -+static rtx gen_int_relational (enum rtx_code, rtx, rtx); - static rtx gen_float_relational (enum rtx_code, rtx, rtx); - static rtx gen_conditional_move (enum rtx_code, machine_mode, rtx, rtx); - static rtx fixup_subreg_mem (rtx); -@@ -134,6 +135,7 @@ static unsigned int xtensa_multibss_section_type_flags (tree, const char *, - static section *xtensa_select_rtx_section (machine_mode, rtx, - unsigned HOST_WIDE_INT); - static bool xtensa_rtx_costs (rtx, machine_mode, int, int, int *, bool); -+static int xtensa_insn_cost (rtx_insn *, bool); - static int xtensa_register_move_cost (machine_mode, reg_class_t, - reg_class_t); - static int xtensa_memory_move_cost (machine_mode, reg_class_t, bool); -@@ -185,6 +187,7 @@ static bool xtensa_modes_tieable_p (machine_mode, machine_mode); - static HOST_WIDE_INT xtensa_constant_alignment (const_tree, HOST_WIDE_INT); - static HOST_WIDE_INT xtensa_starting_frame_offset (void); - static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); -+static bool xtensa_function_ok_for_sibcall (tree, tree); - - - -@@ -208,6 +211,8 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); - #define TARGET_MEMORY_MOVE_COST xtensa_memory_move_cost - #undef TARGET_RTX_COSTS - #define TARGET_RTX_COSTS xtensa_rtx_costs -+#undef TARGET_INSN_COST -+#define TARGET_INSN_COST xtensa_insn_cost - #undef TARGET_ADDRESS_COST - #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 - -@@ -333,6 +338,9 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); - #undef TARGET_HAVE_SPECULATION_SAFE_VALUE - #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed - -+#undef TARGET_FUNCTION_OK_FOR_SIBCALL -+#define TARGET_FUNCTION_OK_FOR_SIBCALL xtensa_function_ok_for_sibcall -+ - struct gcc_target targetm = TARGET_INITIALIZER; - - -@@ -341,42 +349,42 @@ struct gcc_target targetm = TARGET_INITIALIZER; - bool - xtensa_simm8 (HOST_WIDE_INT v) - { -- return v >= -128 && v <= 127; -+ return IN_RANGE (v, -128, 127); - } - - - bool - xtensa_simm8x256 (HOST_WIDE_INT v) - { -- return (v & 255) == 0 && (v >= -32768 && v <= 32512); -+ return (v & 255) == 0 && IN_RANGE (v, -32768, 32512); - } - - - bool - xtensa_simm12b (HOST_WIDE_INT v) - { -- return v >= -2048 && v <= 2047; -+ return IN_RANGE (v, -2048, 2047); - } - - - static bool - xtensa_uimm8 (HOST_WIDE_INT v) - { -- return v >= 0 && v <= 255; -+ return IN_RANGE (v, 0, 255); - } - - - static bool - xtensa_uimm8x2 (HOST_WIDE_INT v) - { -- return (v & 1) == 0 && (v >= 0 && v <= 510); -+ return (v & 1) == 0 && IN_RANGE (v, 0, 510); - } - - - static bool - xtensa_uimm8x4 (HOST_WIDE_INT v) - { -- return (v & 3) == 0 && (v >= 0 && v <= 1020); -+ return (v & 3) == 0 && IN_RANGE (v, 0, 1020); - } - - -@@ -446,19 +454,7 @@ xtensa_b4constu (HOST_WIDE_INT v) - bool - xtensa_mask_immediate (HOST_WIDE_INT v) - { --#define MAX_MASK_SIZE 16 -- int mask_size; -- -- for (mask_size = 1; mask_size <= MAX_MASK_SIZE; mask_size++) -- { -- if ((v & 1) == 0) -- return false; -- v = v >> 1; -- if (v == 0) -- return true; -- } -- -- return false; -+ return IN_RANGE (exact_log2 (v + 1), 1, 16); - } - - -@@ -539,7 +535,7 @@ smalloffset_mem_p (rtx op) - return FALSE; - - val = INTVAL (offset); -- return (val & 3) == 0 && (val >= 0 && val <= 60); -+ return (val & 3) == 0 && IN_RANGE (val, 0, 60); - } - } - return FALSE; -@@ -678,8 +674,7 @@ map_test_to_internal_test (enum rtx_code test_code) - static rtx - gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - rtx cmp0, /* first operand to compare */ -- rtx cmp1, /* second operand to compare */ -- int *p_invert /* whether branch needs to reverse test */) -+ rtx cmp1 /* second operand to compare */) - { - struct cmp_info - { -@@ -711,6 +706,7 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - enum internal_test test; - machine_mode mode; - struct cmp_info *p_info; -+ int invert; - - test = map_test_to_internal_test (test_code); - gcc_assert (test != ITEST_MAX); -@@ -747,9 +743,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - } - - /* See if we need to invert the result. */ -- *p_invert = ((GET_CODE (cmp1) == CONST_INT) -- ? p_info->invert_const -- : p_info->invert_reg); -+ invert = ((GET_CODE (cmp1) == CONST_INT) -+ ? p_info->invert_const -+ : p_info->invert_reg); - - /* Comparison to constants, may involve adding 1 to change a LT into LE. - Comparison between two registers, may involve switching operands. */ -@@ -766,7 +762,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ - cmp1 = temp; - } - -- return gen_rtx_fmt_ee (p_info->test_code, VOIDmode, cmp0, cmp1); -+ return gen_rtx_fmt_ee (invert ? reverse_condition (p_info->test_code) -+ : p_info->test_code, -+ VOIDmode, cmp0, cmp1); - } - - -@@ -825,45 +823,33 @@ xtensa_expand_conditional_branch (rtx *operands, machine_mode mode) - enum rtx_code test_code = GET_CODE (operands[0]); - rtx cmp0 = operands[1]; - rtx cmp1 = operands[2]; -- rtx cmp; -- int invert; -- rtx label1, label2; -+ rtx cmp, label; - - switch (mode) - { -+ case E_SFmode: -+ if (TARGET_HARD_FLOAT) -+ { -+ cmp = gen_float_relational (test_code, cmp0, cmp1); -+ break; -+ } -+ /* FALLTHRU */ -+ - case E_DFmode: - default: - fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1)); - - case E_SImode: -- invert = FALSE; -- cmp = gen_int_relational (test_code, cmp0, cmp1, &invert); -- break; -- -- case E_SFmode: -- if (!TARGET_HARD_FLOAT) -- fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, -- cmp0, cmp1)); -- invert = FALSE; -- cmp = gen_float_relational (test_code, cmp0, cmp1); -+ cmp = gen_int_relational (test_code, cmp0, cmp1); - break; - } - - /* Generate the branch. */ -- -- label1 = gen_rtx_LABEL_REF (VOIDmode, operands[3]); -- label2 = pc_rtx; -- -- if (invert) -- { -- label2 = label1; -- label1 = pc_rtx; -- } -- -+ label = gen_rtx_LABEL_REF (VOIDmode, operands[3]); - emit_jump_insn (gen_rtx_SET (pc_rtx, - gen_rtx_IF_THEN_ELSE (VOIDmode, cmp, -- label1, -- label2))); -+ label, -+ pc_rtx))); - } - - -@@ -1035,6 +1021,123 @@ xtensa_split_operand_pair (rtx operands[4], machine_mode mode) - } - - -+/* Try to emit insns to load srcval (that cannot fit into signed 12-bit) -+ into dst with synthesizing a such constant value from a sequence of -+ load-immediate / arithmetic ones, instead of a L32R instruction -+ (plus a constant in litpool). */ -+ -+static void -+xtensa_emit_constantsynth (rtx dst, enum rtx_code code, -+ HOST_WIDE_INT imm0, HOST_WIDE_INT imm1, -+ rtx (*gen_op)(rtx, HOST_WIDE_INT), -+ HOST_WIDE_INT imm2) -+{ -+ gcc_assert (REG_P (dst)); -+ emit_move_insn (dst, GEN_INT (imm0)); -+ emit_move_insn (dst, gen_rtx_fmt_ee (code, SImode, -+ dst, GEN_INT (imm1))); -+ if (gen_op) -+ emit_move_insn (dst, gen_op (dst, imm2)); -+} -+ -+static int -+xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, -+ rtx (*gen_op)(rtx, HOST_WIDE_INT), -+ HOST_WIDE_INT op_imm) -+{ -+ int shift = exact_log2 (srcval + 1); -+ -+ if (IN_RANGE (shift, 1, 31)) -+ { -+ xtensa_emit_constantsynth (dst, LSHIFTRT, -1, 32 - shift, -+ gen_op, op_imm); -+ return 1; -+ } -+ -+ if (IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) -+ { -+ HOST_WIDE_INT imm0, imm1; -+ -+ if (srcval < -32768) -+ imm1 = -32768; -+ else if (srcval > 32512) -+ imm1 = 32512; -+ else -+ imm1 = srcval & ~255; -+ imm0 = srcval - imm1; -+ if (TARGET_DENSITY && imm1 < 32512 && IN_RANGE (imm0, 224, 255)) -+ imm0 -= 256, imm1 += 256; -+ xtensa_emit_constantsynth (dst, PLUS, imm0, imm1, gen_op, op_imm); -+ return 1; -+ } -+ -+ shift = ctz_hwi (srcval); -+ if (xtensa_simm12b (srcval >> shift)) -+ { -+ xtensa_emit_constantsynth (dst, ASHIFT, srcval >> shift, shift, -+ gen_op, op_imm); -+ return 1; -+ } -+ -+ return 0; -+} -+ -+static rtx -+xtensa_constantsynth_rtx_SLLI (rtx reg, HOST_WIDE_INT imm) -+{ -+ return gen_rtx_ASHIFT (SImode, reg, GEN_INT (imm)); -+} -+ -+static rtx -+xtensa_constantsynth_rtx_ADDSUBX (rtx reg, HOST_WIDE_INT imm) -+{ -+ return imm == 7 -+ ? gen_rtx_MINUS (SImode, gen_rtx_ASHIFT (SImode, reg, GEN_INT (3)), -+ reg) -+ : gen_rtx_PLUS (SImode, gen_rtx_ASHIFT (SImode, reg, -+ GEN_INT (floor_log2 (imm - 1))), -+ reg); -+} -+ -+int -+xtensa_constantsynth (rtx dst, HOST_WIDE_INT srcval) -+{ -+ /* No need for synthesizing for what fits into MOVI instruction. */ -+ if (xtensa_simm12b (srcval)) -+ return 0; -+ -+ /* 2-insns substitution. */ -+ if ((optimize_size || (optimize && xtensa_extra_l32r_costs >= 1)) -+ && xtensa_constantsynth_2insn (dst, srcval, NULL, 0)) -+ return 1; -+ -+ /* 3-insns substitution. */ -+ if (optimize > 1 && !optimize_size && xtensa_extra_l32r_costs >= 2) -+ { -+ int shift, divisor; -+ -+ /* 2-insns substitution followed by SLLI. */ -+ shift = ctz_hwi (srcval); -+ if (IN_RANGE (shift, 1, 31) && -+ xtensa_constantsynth_2insn (dst, srcval >> shift, -+ xtensa_constantsynth_rtx_SLLI, -+ shift)) -+ return 1; -+ -+ /* 2-insns substitution followed by ADDX[248] or SUBX8. */ -+ if (TARGET_ADDX) -+ for (divisor = 3; divisor <= 9; divisor += 2) -+ if (srcval % divisor == 0 && -+ xtensa_constantsynth_2insn (dst, srcval / divisor, -+ xtensa_constantsynth_rtx_ADDSUBX, -+ divisor)) -+ return 1; -+ } -+ -+ return 0; -+} -+ -+ - /* Emit insns to move operands[1] into operands[0]. - Return 1 if we have written out everything that needs to be done to - do the move. Otherwise, return 0 and the caller will emit the move -@@ -1070,24 +1173,9 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) - return 1; - } - -- if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16) -+ if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16 -+ && ! (CONST_INT_P (src) && can_create_pseudo_p ())) - { -- /* Try to emit MOVI + SLLI sequence, that is smaller -- than L32R + literal. */ -- if (optimize >= 1 && ! optimize_debug && mode == SImode -- && CONST_INT_P (src) && register_operand (dst, mode)) -- { -- HOST_WIDE_INT srcval = INTVAL (src); -- int shift = ctz_hwi (srcval); -- -- if (xtensa_simm12b (srcval >> shift)) -- { -- emit_move_insn (dst, GEN_INT (srcval >> shift)); -- emit_insn (gen_ashlsi3_internal (dst, dst, GEN_INT (shift))); -- return 1; -- } -- } -- - src = force_const_mem (SImode, src); - operands[1] = src; - } -@@ -1315,7 +1403,7 @@ xtensa_expand_block_move (rtx *operands) - move_ratio = 4; - if (optimize > 2) - move_ratio = LARGEST_MOVE_RATIO; -- num_pieces = (bytes / align) + (bytes % align); /* Close enough anyway. */ -+ num_pieces = (bytes / align) + ((bytes % align + 1) / 2); - if (num_pieces > move_ratio) - return 0; - -@@ -1352,7 +1440,7 @@ xtensa_expand_block_move (rtx *operands) - temp[next] = gen_reg_rtx (mode[next]); - - x = adjust_address (src_mem, mode[next], offset_ld); -- emit_insn (gen_rtx_SET (temp[next], x)); -+ emit_move_insn (temp[next], x); - - offset_ld += next_amount; - bytes -= next_amount; -@@ -1362,9 +1450,9 @@ xtensa_expand_block_move (rtx *operands) - if (active[phase]) - { - active[phase] = false; -- -+ - x = adjust_address (dst_mem, mode[phase], offset_st); -- emit_insn (gen_rtx_SET (x, temp[phase])); -+ emit_move_insn (x, temp[phase]); - - offset_st += amount[phase]; - } -@@ -1375,6 +1463,246 @@ xtensa_expand_block_move (rtx *operands) - } - - -+/* Try to expand a block set operation to a sequence of RTL move -+ instructions. If not optimizing, or if the block size is not a -+ constant, or if the block is too large, or if the value to -+ initialize the block with is not a constant, the expansion -+ fails and GCC falls back to calling memset(). -+ -+ operands[0] is the destination -+ operands[1] is the length -+ operands[2] is the initialization value -+ operands[3] is the alignment */ -+ -+static int -+xtensa_sizeof_MOVI (HOST_WIDE_INT imm) -+{ -+ return (TARGET_DENSITY && IN_RANGE (imm, -32, 95)) ? 2 : 3; -+} -+ -+int -+xtensa_expand_block_set_unrolled_loop (rtx *operands) -+{ -+ rtx dst_mem = operands[0]; -+ HOST_WIDE_INT bytes, value, align; -+ int expand_len, funccall_len; -+ rtx x, reg; -+ int offset; -+ -+ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) -+ return 0; -+ -+ bytes = INTVAL (operands[1]); -+ if (bytes <= 0) -+ return 0; -+ value = (int8_t)INTVAL (operands[2]); -+ align = INTVAL (operands[3]); -+ if (align > MOVE_MAX) -+ align = MOVE_MAX; -+ -+ /* Insn expansion: holding the init value. -+ Either MOV(.N) or L32R w/litpool. */ -+ if (align == 1) -+ expand_len = xtensa_sizeof_MOVI (value); -+ else if (value == 0 || value == -1) -+ expand_len = TARGET_DENSITY ? 2 : 3; -+ else -+ expand_len = 3 + 4; -+ /* Insn expansion: a series of aligned memory stores. -+ Consist of S8I, S16I or S32I(.N). */ -+ expand_len += (bytes / align) * (TARGET_DENSITY -+ && align == 4 ? 2 : 3); -+ /* Insn expansion: the remainder, sub-aligned memory stores. -+ A combination of S8I and S16I as needed. */ -+ expand_len += ((bytes % align + 1) / 2) * 3; -+ -+ /* Function call: preparing two arguments. */ -+ funccall_len = xtensa_sizeof_MOVI (value); -+ funccall_len += xtensa_sizeof_MOVI (bytes); -+ /* Function call: calling memset(). */ -+ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; -+ -+ /* Apply expansion bonus (2x) if optimizing for speed. */ -+ if (optimize > 1 && !optimize_size) -+ funccall_len *= 2; -+ -+ /* Decide whether to expand or not, based on the sum of the length -+ of instructions. */ -+ if (expand_len > funccall_len) -+ return 0; -+ -+ x = XEXP (dst_mem, 0); -+ if (!REG_P (x)) -+ dst_mem = replace_equiv_address (dst_mem, force_reg (Pmode, x)); -+ switch (align) -+ { -+ case 1: -+ break; -+ case 2: -+ value = (int16_t)((uint8_t)value * 0x0101U); -+ break; -+ case 4: -+ value = (int32_t)((uint8_t)value * 0x01010101U); -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ reg = force_reg (SImode, GEN_INT (value)); -+ -+ offset = 0; -+ do -+ { -+ int unit_size = MIN (bytes, align); -+ machine_mode unit_mode = (unit_size >= 4 ? SImode : -+ (unit_size >= 2 ? HImode : -+ QImode)); -+ unit_size = GET_MODE_SIZE (unit_mode); -+ -+ emit_move_insn (adjust_address (dst_mem, unit_mode, offset), -+ unit_mode == SImode ? reg -+ : convert_to_mode (unit_mode, reg, true)); -+ -+ offset += unit_size; -+ bytes -= unit_size; -+ } -+ while (bytes > 0); -+ -+ return 1; -+} -+ -+int -+xtensa_expand_block_set_small_loop (rtx *operands) -+{ -+ HOST_WIDE_INT bytes, value, align, count; -+ int expand_len, funccall_len; -+ rtx x, dst, end, reg; -+ machine_mode unit_mode; -+ rtx_code_label *label; -+ -+ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) -+ return 0; -+ -+ bytes = INTVAL (operands[1]); -+ if (bytes <= 0) -+ return 0; -+ value = (int8_t)INTVAL (operands[2]); -+ align = INTVAL (operands[3]); -+ if (align > MOVE_MAX) -+ align = MOVE_MAX; -+ -+ /* Totally-aligned block only. */ -+ if (bytes % align != 0) -+ return 0; -+ count = bytes / align; -+ -+ /* If the Loop Option (zero-overhead looping) is configured and active, -+ almost no restrictions about the length of the block. */ -+ if (! (TARGET_LOOPS && optimize)) -+ { -+ /* If 4-byte aligned, small loop substitution is almost optimal, -+ thus limited to only offset to the end address for ADDI/ADDMI -+ instruction. */ -+ if (align == 4 -+ && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) -+ return 0; -+ -+ /* If no 4-byte aligned, loop count should be treated as the -+ constraint. */ -+ if (align != 4 -+ && count > ((optimize > 1 && !optimize_size) ? 8 : 15)) -+ return 0; -+ } -+ -+ /* Insn expansion: holding the init value. -+ Either MOV(.N) or L32R w/litpool. */ -+ if (align == 1) -+ expand_len = xtensa_sizeof_MOVI (value); -+ else if (value == 0 || value == -1) -+ expand_len = TARGET_DENSITY ? 2 : 3; -+ else -+ expand_len = 3 + 4; -+ if (TARGET_LOOPS && optimize) /* zero-overhead looping */ -+ { -+ /* Insn translation: Either MOV(.N) or L32R w/litpool for the -+ loop count. */ -+ expand_len += xtensa_simm12b (count) ? xtensa_sizeof_MOVI (count) -+ : 3 + 4; -+ /* Insn translation: LOOP, the zero-overhead looping setup -+ instruction. */ -+ expand_len += 3; -+ /* Insn expansion: the loop body instructions. -+ For store, one of S8I, S16I or S32I(.N). -+ For advance, ADDI(.N). */ -+ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) -+ + (TARGET_DENSITY ? 2 : 3); -+ } -+ else /* NO zero-overhead looping */ -+ { -+ /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ -+ expand_len += bytes > 127 ? 3 -+ : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; -+ /* Insn expansion: the loop body and branch instruction. -+ For store, one of S8I, S16I or S32I(.N). -+ For advance, ADDI(.N). -+ For branch, BNE. */ -+ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) -+ + (TARGET_DENSITY ? 2 : 3) + 3; -+ } -+ -+ /* Function call: preparing two arguments. */ -+ funccall_len = xtensa_sizeof_MOVI (value); -+ funccall_len += xtensa_sizeof_MOVI (bytes); -+ /* Function call: calling memset(). */ -+ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; -+ -+ /* Apply expansion bonus (2x) if optimizing for speed. */ -+ if (optimize > 1 && !optimize_size) -+ funccall_len *= 2; -+ -+ /* Decide whether to expand or not, based on the sum of the length -+ of instructions. */ -+ if (expand_len > funccall_len) -+ return 0; -+ -+ x = XEXP (operands[0], 0); -+ if (!REG_P (x)) -+ x = XEXP (replace_equiv_address (operands[0], force_reg (Pmode, x)), 0); -+ dst = gen_reg_rtx (SImode); -+ emit_move_insn (dst, x); -+ end = gen_reg_rtx (SImode); -+ if (TARGET_LOOPS && optimize) -+ x = force_reg (SImode, operands[1] /* the length */); -+ else -+ x = operands[1]; -+ emit_insn (gen_addsi3 (end, dst, x)); -+ switch (align) -+ { -+ case 1: -+ unit_mode = QImode; -+ break; -+ case 2: -+ value = (int16_t)((uint8_t)value * 0x0101U); -+ unit_mode = HImode; -+ break; -+ case 4: -+ value = (int32_t)((uint8_t)value * 0x01010101U); -+ unit_mode = SImode; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ reg = force_reg (unit_mode, GEN_INT (value)); -+ -+ label = gen_label_rtx (); -+ emit_label (label); -+ emit_move_insn (gen_rtx_MEM (unit_mode, dst), reg); -+ emit_insn (gen_addsi3 (dst, dst, GEN_INT (align))); -+ emit_cmp_and_jump_insns (dst, end, NE, const0_rtx, SImode, true, label); -+ -+ return 1; -+} -+ -+ - void - xtensa_expand_nonlocal_goto (rtx *operands) - { -@@ -1725,21 +2053,20 @@ xtensa_emit_loop_end (rtx_insn *insn, rtx *operands) - - - char * --xtensa_emit_branch (bool inverted, bool immed, rtx *operands) -+xtensa_emit_branch (bool immed, rtx *operands) - { - static char result[64]; -- enum rtx_code code; -+ enum rtx_code code = GET_CODE (operands[3]); - const char *op; - -- code = GET_CODE (operands[3]); - switch (code) - { -- case EQ: op = inverted ? "ne" : "eq"; break; -- case NE: op = inverted ? "eq" : "ne"; break; -- case LT: op = inverted ? "ge" : "lt"; break; -- case GE: op = inverted ? "lt" : "ge"; break; -- case LTU: op = inverted ? "geu" : "ltu"; break; -- case GEU: op = inverted ? "ltu" : "geu"; break; -+ case EQ: op = "eq"; break; -+ case NE: op = "ne"; break; -+ case LT: op = "lt"; break; -+ case GE: op = "ge"; break; -+ case LTU: op = "ltu"; break; -+ case GEU: op = "geu"; break; - default: gcc_unreachable (); - } - -@@ -1758,32 +2085,6 @@ xtensa_emit_branch (bool inverted, bool immed, rtx *operands) - } - - --char * --xtensa_emit_bit_branch (bool inverted, bool immed, rtx *operands) --{ -- static char result[64]; -- const char *op; -- -- switch (GET_CODE (operands[3])) -- { -- case EQ: op = inverted ? "bs" : "bc"; break; -- case NE: op = inverted ? "bc" : "bs"; break; -- default: gcc_unreachable (); -- } -- -- if (immed) -- { -- unsigned bitnum = INTVAL (operands[1]) & 0x1f; -- operands[1] = GEN_INT (bitnum); -- sprintf (result, "b%si\t%%0, %%d1, %%2", op); -- } -- else -- sprintf (result, "b%s\t%%0, %%1, %%2", op); -- -- return result; --} -- -- - char * - xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - { -@@ -1792,12 +2093,14 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - const char *op; - - code = GET_CODE (operands[4]); -+ if (inverted) -+ code = reverse_condition (code); - if (isbool) - { - switch (code) - { -- case EQ: op = inverted ? "t" : "f"; break; -- case NE: op = inverted ? "f" : "t"; break; -+ case EQ: op = "f"; break; -+ case NE: op = "t"; break; - default: gcc_unreachable (); - } - } -@@ -1805,10 +2108,10 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - { - switch (code) - { -- case EQ: op = inverted ? "nez" : "eqz"; break; -- case NE: op = inverted ? "eqz" : "nez"; break; -- case LT: op = inverted ? "gez" : "ltz"; break; -- case GE: op = inverted ? "ltz" : "gez"; break; -+ case EQ: op = "eqz"; break; -+ case NE: op = "nez"; break; -+ case LT: op = "ltz"; break; -+ case GE: op = "gez"; break; - default: gcc_unreachable (); - } - } -@@ -1819,6 +2122,20 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) - } - - -+void -+xtensa_prepare_expand_call (int callop, rtx *operands) -+{ -+ rtx addr = XEXP (operands[callop], 0); -+ -+ if (flag_pic && SYMBOL_REF_P (addr) -+ && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) -+ addr = gen_sym_PLT (addr); -+ -+ if (!call_insn_operand (addr, VOIDmode)) -+ XEXP (operands[callop], 0) = copy_to_mode_reg (Pmode, addr); -+} -+ -+ - char * - xtensa_emit_call (int callop, rtx *operands) - { -@@ -1837,6 +2154,24 @@ xtensa_emit_call (int callop, rtx *operands) - } - - -+char * -+xtensa_emit_sibcall (int callop, rtx *operands) -+{ -+ static char result[64]; -+ rtx tgt = operands[callop]; -+ -+ if (GET_CODE (tgt) == CONST_INT) -+ sprintf (result, "j.l\t" HOST_WIDE_INT_PRINT_HEX ", a9", -+ INTVAL (tgt)); -+ else if (register_operand (tgt, VOIDmode)) -+ sprintf (result, "jx\t%%%d", callop); -+ else -+ sprintf (result, "j.l\t%%%d, a9", callop); -+ -+ return result; -+} -+ -+ - bool - xtensa_legitimate_address_p (machine_mode mode, rtx addr, bool strict) - { -@@ -2061,6 +2396,20 @@ xtensa_tls_referenced_p (rtx x) - } - - -+/* Helper function for "*shlrd_..." patterns. */ -+ -+enum rtx_code -+xtensa_shlrd_which_direction (rtx op0, rtx op1) -+{ -+ if (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT) -+ return ASHIFT; /* shld */ -+ if (GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT) -+ return LSHIFTRT; /* shrd */ -+ -+ return UNKNOWN; -+} -+ -+ - /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ - - static bool -@@ -2364,7 +2713,7 @@ static void - printx (FILE *file, signed int val) - { - /* Print a hexadecimal value in a nice way. */ -- if ((val > -0xa) && (val < 0xa)) -+ if (IN_RANGE (val, -9, 9)) - fprintf (file, "%d", val); - else if (val < 0) - fprintf (file, "-0x%x", -val); -@@ -2379,7 +2728,7 @@ void - print_operand (FILE *file, rtx x, int letter) - { - if (!x) -- error ("PRINT_OPERAND null pointer"); -+ error ("% null pointer"); - - switch (letter) - { -@@ -2424,17 +2773,11 @@ print_operand (FILE *file, rtx x, int letter) - case 'K': - if (GET_CODE (x) == CONST_INT) - { -- int num_bits = 0; - unsigned val = INTVAL (x); -- while (val & 1) -- { -- num_bits += 1; -- val = val >> 1; -- } -- if ((val != 0) || (num_bits == 0) || (num_bits > 16)) -+ if (!xtensa_mask_immediate (val)) - fatal_insn ("invalid mask", x); - -- fprintf (file, "%d", num_bits); -+ fprintf (file, "%d", floor_log2 (val + 1)); - } - else - output_operand_lossage ("invalid %%K value"); -@@ -2584,7 +2927,7 @@ void - print_operand_address (FILE *file, rtx addr) - { - if (!addr) -- error ("PRINT_OPERAND_ADDRESS, null pointer"); -+ error ("%, null pointer"); - - switch (GET_CODE (addr)) - { -@@ -2750,7 +3093,7 @@ xtensa_call_save_reg(int regno) - return crtl->profile || !crtl->is_leaf || crtl->calls_eh_return || - df_regs_ever_live_p (regno); - -- if (crtl->calls_eh_return && regno >= 2 && regno < 4) -+ if (crtl->calls_eh_return && IN_RANGE (regno, 2, 3)) - return true; - - return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno); -@@ -2870,7 +3213,7 @@ xtensa_expand_prologue (void) - int callee_save_size = cfun->machine->callee_save_size; - - /* -128 is a limit of single addi instruction. */ -- if (total_size > 0 && total_size <= 128) -+ if (IN_RANGE (total_size, 1, 128)) - { - insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, - GEN_INT (-total_size))); -@@ -2999,7 +3342,7 @@ xtensa_expand_prologue (void) - } - - void --xtensa_expand_epilogue (void) -+xtensa_expand_epilogue (bool sibcall_p) - { - if (!TARGET_WINDOWED_ABI) - { -@@ -3033,10 +3376,13 @@ xtensa_expand_epilogue (void) - if (xtensa_call_save_reg(regno)) - { - rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); -+ rtx reg; - - offset -= UNITS_PER_WORD; -- emit_move_insn (gen_rtx_REG (SImode, regno), -+ emit_move_insn (reg = gen_rtx_REG (SImode, regno), - gen_frame_mem (SImode, x)); -+ if (regno == A0_REG && sibcall_p) -+ emit_use (reg); - } - } - -@@ -3071,7 +3417,8 @@ xtensa_expand_epilogue (void) - EH_RETURN_STACKADJ_RTX)); - } - cfun->machine->epilogue_done = true; -- emit_jump_insn (gen_return ()); -+ if (!sibcall_p) -+ emit_jump_insn (gen_return ()); - } - - bool -@@ -3697,7 +4044,7 @@ xtensa_multibss_section_type_flags (tree decl, const char *name, int reloc) - flags |= SECTION_BSS; /* @nobits */ - else - warning (0, "only uninitialized variables can be placed in a " -- ".bss section"); -+ "%<.bss%> section"); - } - - return flags; -@@ -3750,7 +4097,7 @@ xtensa_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, - static bool - xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - int opno ATTRIBUTE_UNUSED, -- int *total, bool speed ATTRIBUTE_UNUSED) -+ int *total, bool speed) - { - int code = GET_CODE (x); - -@@ -3838,9 +4185,14 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - return true; - - case CLZ: -+ case CLRSB: - *total = COSTS_N_INSNS (TARGET_NSA ? 1 : 50); - return true; - -+ case BSWAP: -+ *total = COSTS_N_INSNS (mode == HImode ? 3 : 5); -+ return true; -+ - case NOT: - *total = COSTS_N_INSNS (mode == DImode ? 3 : 2); - return true; -@@ -3864,13 +4216,16 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - return true; - - case ABS: -+ case NEG: - { - if (mode == SFmode) - *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50); - else if (mode == DFmode) - *total = COSTS_N_INSNS (50); -- else -+ else if (mode == DImode) - *total = COSTS_N_INSNS (4); -+ else -+ *total = COSTS_N_INSNS (1); - return true; - } - -@@ -3886,10 +4241,6 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - return true; - } - -- case NEG: -- *total = COSTS_N_INSNS (mode == DImode ? 4 : 2); -- return true; -- - case MULT: - { - if (mode == SFmode) -@@ -3929,11 +4280,11 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - case UMOD: - { - if (mode == DImode) -- *total = COSTS_N_INSNS (50); -+ *total = COSTS_N_INSNS (speed ? 100 : 50); - else if (TARGET_DIV32) - *total = COSTS_N_INSNS (32); - else -- *total = COSTS_N_INSNS (50); -+ *total = COSTS_N_INSNS (speed ? 100 : 50); - return true; - } - -@@ -3966,6 +4317,98 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, - } - } - -+static bool -+xtensa_is_insn_L32R_p(const rtx_insn *insn) -+{ -+ rtx x = PATTERN (insn); -+ -+ if (GET_CODE (x) == SET) -+ { -+ x = XEXP (x, 1); -+ if (GET_CODE (x) == MEM) -+ { -+ x = XEXP (x, 0); -+ return (GET_CODE (x) == SYMBOL_REF || CONST_INT_P (x)) -+ && CONSTANT_POOL_ADDRESS_P (x); -+ } -+ } -+ -+ return false; -+} -+ -+/* Compute a relative costs of RTL insns. This is necessary in order to -+ achieve better RTL insn splitting/combination result. */ -+ -+static int -+xtensa_insn_cost (rtx_insn *insn, bool speed) -+{ -+ if (!(recog_memoized (insn) < 0)) -+ { -+ int len = get_attr_length (insn), n = (len + 2) / 3; -+ -+ if (len == 0) -+ return COSTS_N_INSNS (0); -+ -+ if (speed) /* For speed cost. */ -+ { -+ /* "L32R" may be particular slow (implementation-dependent). */ -+ if (xtensa_is_insn_L32R_p (insn)) -+ return COSTS_N_INSNS (1 + xtensa_extra_l32r_costs); -+ -+ /* Cost based on the pipeline model. */ -+ switch (get_attr_type (insn)) -+ { -+ case TYPE_STORE: -+ case TYPE_MOVE: -+ case TYPE_ARITH: -+ case TYPE_MULTI: -+ case TYPE_NOP: -+ case TYPE_FSTORE: -+ return COSTS_N_INSNS (n); -+ -+ case TYPE_LOAD: -+ return COSTS_N_INSNS (n - 1 + 2); -+ -+ case TYPE_JUMP: -+ case TYPE_CALL: -+ return COSTS_N_INSNS (n - 1 + 3); -+ -+ case TYPE_FCONV: -+ case TYPE_FLOAD: -+ case TYPE_MUL16: -+ case TYPE_MUL32: -+ case TYPE_RSR: -+ return COSTS_N_INSNS (n * 2); -+ -+ case TYPE_FMADD: -+ return COSTS_N_INSNS (n * 4); -+ -+ case TYPE_DIV32: -+ return COSTS_N_INSNS (n * 16); -+ -+ default: -+ break; -+ } -+ } -+ else /* For size cost. */ -+ { -+ /* Cost based on the instruction length. */ -+ if (get_attr_type (insn) != TYPE_UNKNOWN) -+ { -+ /* "L32R" itself plus constant in litpool. */ -+ if (xtensa_is_insn_L32R_p (insn)) -+ return COSTS_N_INSNS (2) + 1; -+ -+ /* Consider ".n" short instructions. */ -+ return COSTS_N_INSNS (n) - (n * 3 - len); -+ } -+ } -+ } -+ -+ /* Fall back. */ -+ return pattern_cost (PATTERN (insn), speed); -+} -+ - /* Worker function for TARGET_RETURN_IN_MEMORY. */ - - static bool -@@ -4491,4 +4934,16 @@ xtensa_asan_shadow_offset (void) - return HOST_WIDE_INT_UC (0x10000000); - } - -+/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */ -+static bool -+xtensa_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, tree exp ATTRIBUTE_UNUSED) -+{ -+ /* Do not allow sibcalls if the Windowed Register Option is -+ configured. */ -+ if (TARGET_WINDOWED_ABI) -+ return false; -+ -+ return true; -+} -+ - #include "gt-xtensa.h" -diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h -index fa86a245e..3e9cbc943 100644 ---- a/gcc/config/xtensa/xtensa.h -+++ b/gcc/config/xtensa/xtensa.h -@@ -74,6 +74,11 @@ along with GCC; see the file COPYING3. If not see - #define HAVE_AS_TLS 0 - #endif - -+/* Define this if the target has no hardware divide instructions. */ -+#if !TARGET_DIV32 -+#define TARGET_HAS_NO_HW_DIVIDE -+#endif -+ - - /* Target CPU builtins. */ - #define TARGET_CPU_CPP_BUILTINS() \ -@@ -488,7 +493,7 @@ enum reg_class - used for this purpose since all function arguments are pushed on - the stack. */ - #define FUNCTION_ARG_REGNO_P(N) \ -- ((N) >= GP_OUTGOING_ARG_FIRST && (N) <= GP_OUTGOING_ARG_LAST) -+ IN_RANGE ((N), GP_OUTGOING_ARG_FIRST, GP_OUTGOING_ARG_LAST) - - /* Record the number of argument words seen so far, along with a flag to - indicate whether these are incoming arguments. (FUNCTION_INCOMING_ARG -diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md -index 2a8e59ee9..124548dfe 100644 ---- a/gcc/config/xtensa/xtensa.md -+++ b/gcc/config/xtensa/xtensa.md -@@ -25,6 +25,7 @@ - (A7_REG 7) - (A8_REG 8) - (A9_REG 9) -+ (A10_REG 10) - - (UNSPEC_NOP 2) - (UNSPEC_PLT 3) -@@ -83,6 +84,13 @@ - ;; the same template. - (define_mode_iterator HQI [HI QI]) - -+;; This code iterator is for *shlrd and its variants. -+(define_code_iterator ior_op [ior plus]) -+ -+;; This mode iterator allows the DC and SC patterns to be defined from -+;; the same template. -+(define_mode_iterator DSC [DC SC]) -+ - - ;; Attributes. - -@@ -98,7 +106,10 @@ - - ;; Describe a user's asm statement. - (define_asm_attributes -- [(set_attr "type" "multi")]) -+ [(set_attr "type" "multi") -+ (set_attr "mode" "none") -+ (set_attr "length" "3")]) ;; Should be the maximum possible length -+ ;; of a single machine instruction. - - - ;; Pipeline model. -@@ -224,20 +235,42 @@ - - ;; Multiplication. - --(define_expand "mulsidi3" -+(define_expand "mulsidi3" - [(set (match_operand:DI 0 "register_operand") -- (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand")) -- (any_extend:DI (match_operand:SI 2 "register_operand"))))] -+ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand")) -+ (sign_extend:DI (match_operand:SI 2 "register_operand"))))] - "TARGET_MUL32_HIGH" - { - rtx temp = gen_reg_rtx (SImode); - emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); -- emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), -- operands[1], operands[2])); -+ emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), -+ operands[1], operands[2])); - emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); - DONE; - }) - -+(define_expand "umulsidi3" -+ [(set (match_operand:DI 0 "register_operand") -+ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand")) -+ (zero_extend:DI (match_operand:SI 2 "register_operand"))))] -+ "" -+{ -+ if (TARGET_MUL32_HIGH) -+ { -+ rtx temp = gen_reg_rtx (SImode); -+ emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); -+ emit_insn (gen_umulsi3_highpart (gen_highpart (SImode, operands[0]), -+ operands[1], operands[2])); -+ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); -+ } -+ else -+ emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "__umulsidi3"), -+ operands[0], LCT_NORMAL, DImode, -+ operands[1], SImode, -+ operands[2], SImode); -+ DONE; -+}) -+ - (define_insn "mulsi3_highpart" - [(set (match_operand:SI 0 "register_operand" "=a") - (truncate:SI -@@ -261,30 +294,16 @@ - (set_attr "mode" "SI") - (set_attr "length" "3")]) - --(define_insn "mulhisi3" -- [(set (match_operand:SI 0 "register_operand" "=C,A") -- (mult:SI (sign_extend:SI -- (match_operand:HI 1 "register_operand" "%r,r")) -- (sign_extend:SI -- (match_operand:HI 2 "register_operand" "r,r"))))] -- "TARGET_MUL16 || TARGET_MAC16" -- "@ -- mul16s\t%0, %1, %2 -- mul.aa.ll\t%1, %2" -- [(set_attr "type" "mul16,mac16") -- (set_attr "mode" "SI") -- (set_attr "length" "3,3")]) -- --(define_insn "umulhisi3" -+(define_insn "mulhisi3" - [(set (match_operand:SI 0 "register_operand" "=C,A") -- (mult:SI (zero_extend:SI -+ (mult:SI (any_extend:SI - (match_operand:HI 1 "register_operand" "%r,r")) -- (zero_extend:SI -+ (any_extend:SI - (match_operand:HI 2 "register_operand" "r,r"))))] - "TARGET_MUL16 || TARGET_MAC16" - "@ -- mul16u\t%0, %1, %2 -- umul.aa.ll\t%1, %2" -+ mul16\t%0, %1, %2 -+ mul.aa.ll\t%1, %2" - [(set_attr "type" "mul16,mac16") - (set_attr "mode" "SI") - (set_attr "length" "3,3")]) -@@ -429,7 +448,17 @@ - (set_attr "length" "3")]) - - --;; Count leading/trailing zeros and find first bit. -+;; Count redundant leading sign bits and leading/trailing zeros, -+;; and find first bit. -+ -+(define_insn "clrsbsi2" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (clrsb:SI (match_operand:SI 1 "register_operand" "r")))] -+ "TARGET_NSA" -+ "nsa\t%0, %1" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "3")]) - - (define_insn "clzsi2" - [(set (match_operand:SI 0 "register_operand" "=a") -@@ -471,23 +500,78 @@ - - ;; Byte swap. - --(define_insn "bswapsi2" -- [(set (match_operand:SI 0 "register_operand" "=&a") -- (bswap:SI (match_operand:SI 1 "register_operand" "r")))] -- "!optimize_size" -- "ssai\t8\;srli\t%0, %1, 16\;src\t%0, %0, %1\;src\t%0, %0, %0\;src\t%0, %1, %0" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "15")]) -+(define_insn "bswaphi2" -+ [(set (match_operand:HI 0 "register_operand" "=a") -+ (bswap:HI (match_operand:HI 1 "register_operand" "r"))) -+ (clobber (match_scratch:HI 2 "=&a"))] -+ "" -+ "extui\t%2, %1, 8, 8\;slli\t%0, %1, 8\;or\t%0, %0, %2" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "HI") -+ (set_attr "length" "9")]) - --(define_insn "bswapdi2" -- [(set (match_operand:DI 0 "register_operand" "=&a") -- (bswap:DI (match_operand:DI 1 "register_operand" "r")))] -- "!optimize_size" -- "ssai\t8\;srli\t%0, %D1, 16\;src\t%0, %0, %D1\;src\t%0, %0, %0\;src\t%0, %D1, %0\;srli\t%D0, %1, 16\;src\t%D0, %D0, %1\;src\t%D0, %D0, %D0\;src\t%D0, %1, %D0" -- [(set_attr "type" "arith") -- (set_attr "mode" "DI") -- (set_attr "length" "27")]) -+(define_expand "bswapsi2" -+ [(set (match_operand:SI 0 "register_operand" "") -+ (bswap:SI (match_operand:SI 1 "register_operand" "")))] -+ "!optimize_debug && optimize > 1" -+{ -+ /* GIMPLE manual byte-swapping recognition is now activated. -+ For both built-in and manual bswaps, emit corresponding library call -+ if optimizing for size, or a series of dedicated machine instructions -+ if otherwise. */ -+ if (optimize_size) -+ emit_library_call_value (optab_libfunc (bswap_optab, SImode), -+ operands[0], LCT_NORMAL, SImode, -+ operands[1], SImode); -+ else -+ emit_insn (gen_bswapsi2_internal (operands[0], operands[1])); -+ DONE; -+}) -+ -+(define_insn "bswapsi2_internal" -+ [(set (match_operand:SI 0 "register_operand" "=a,&a") -+ (bswap:SI (match_operand:SI 1 "register_operand" "0,r"))) -+ (clobber (match_scratch:SI 2 "=&a,X"))] -+ "!optimize_debug && optimize > 1 && !optimize_size" -+{ -+ rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn); -+ const char *init = "ssai\t8\;"; -+ static char result[128]; -+ if (prev_insn && NONJUMP_INSN_P (prev_insn)) -+ { -+ rtx x = PATTERN (prev_insn); -+ if (GET_CODE (x) == PARALLEL && XVECLEN (x, 0) == 2 -+ && GET_CODE (XVECEXP (x, 0, 0)) == SET -+ && GET_CODE (XVECEXP (x, 0, 1)) == CLOBBER) -+ { -+ x = XEXP (XVECEXP (x, 0, 0), 1); -+ if (GET_CODE (x) == BSWAP && GET_MODE (x) == SImode) -+ init = ""; -+ } -+ } -+ sprintf (result, -+ (which_alternative == 0) -+ ? "%s" "srli\t%%2, %%1, 16\;src\t%%2, %%2, %%1\;src\t%%2, %%2, %%2\;src\t%%0, %%1, %%2" -+ : "%s" "srli\t%%0, %%1, 16\;src\t%%0, %%0, %%1\;src\t%%0, %%0, %%0\;src\t%%0, %%1, %%0", -+ init); -+ return result; -+} -+ [(set_attr "type" "arith,arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "15,15")]) -+ -+(define_expand "bswapdi2" -+ [(set (match_operand:DI 0 "register_operand" "") -+ (bswap:DI (match_operand:DI 1 "register_operand" "")))] -+ "!optimize_debug && optimize > 1 && optimize_size" -+{ -+ /* Replace with a single DImode library call. -+ Without this, two SImode library calls are emitted. */ -+ emit_library_call_value (optab_libfunc (bswap_optab, DImode), -+ operands[0], LCT_NORMAL, DImode, -+ operands[1], DImode); -+ DONE; -+}) - - - ;; Negation and one's complement. -@@ -501,16 +585,26 @@ - (set_attr "mode" "SI") - (set_attr "length" "3")]) - --(define_expand "one_cmplsi2" -- [(set (match_operand:SI 0 "register_operand" "") -- (not:SI (match_operand:SI 1 "register_operand" "")))] -+(define_insn_and_split "one_cmplsi2" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (not:SI (match_operand:SI 1 "register_operand" "r")))] - "" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 2) -+ (const_int -1)) -+ (set (match_dup 0) -+ (xor:SI (match_dup 1) -+ (match_dup 2)))] - { -- rtx temp = gen_reg_rtx (SImode); -- emit_insn (gen_movsi (temp, constm1_rtx)); -- emit_insn (gen_xorsi3 (operands[0], temp, operands[1])); -- DONE; --}) -+ operands[2] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 5) -+ (const_int 6)))]) - - (define_insn "negsf2" - [(set (match_operand:SF 0 "register_operand" "=f") -@@ -536,6 +630,103 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,3")]) - -+(define_insn_and_split "*andsi3_bitcmpl" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (not:SI (match_operand:SI 1 "register_operand" "r")) -+ (match_operand:SI 2 "register_operand" "r")))] -+ "" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 3) -+ (and:SI (match_dup 1) -+ (match_dup 2))) -+ (set (match_dup 0) -+ (xor:SI (match_dup 3) -+ (match_dup 2)))] -+{ -+ operands[3] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*andsi3_const_pow2_minus_one" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "const_int_operand" "i")))] -+ "IN_RANGE (exact_log2 (INTVAL (operands[2]) + 1), 17, 31)" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (ashift:SI (match_dup 1) -+ (match_dup 2))) -+ (set (match_dup 0) -+ (lshiftrt:SI (match_dup 0) -+ (match_dup 2)))] -+{ -+ operands[2] = GEN_INT (32 - floor_log2 (INTVAL (operands[2]) + 1)); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && INTVAL (operands[2]) == 0x7FFFFFFF") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_insn_and_split "*andsi3_const_negative_pow2" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "const_int_operand" "i")))] -+ "IN_RANGE (exact_log2 (-INTVAL (operands[2])), 12, 31)" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (lshiftrt:SI (match_dup 1) -+ (match_dup 2))) -+ (set (match_dup 0) -+ (ashift:SI (match_dup 0) -+ (match_dup 2)))] -+{ -+ operands[2] = GEN_INT (floor_log2 (-INTVAL (operands[2]))); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*andsi3_const_shifted_mask" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "shifted_mask_operand" "i")))] -+ "! xtensa_simm12b (INTVAL (operands[2]))" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (zero_extract:SI (match_dup 1) -+ (match_dup 3) -+ (match_dup 4))) -+ (set (match_dup 0) -+ (ashift:SI (match_dup 0) -+ (match_dup 2)))] -+{ -+ HOST_WIDE_INT mask = INTVAL (operands[2]); -+ int shift = ctz_hwi (mask); -+ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); -+ int mask_pos = shift; -+ if (BITS_BIG_ENDIAN) -+ mask_pos = (32 - (mask_size + shift)) & 0x1f; -+ operands[2] = GEN_INT (shift); -+ operands[3] = GEN_INT (mask_size); -+ operands[4] = GEN_INT (mask_pos); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && ctz_hwi (INTVAL (operands[2])) == 1") -+ (const_int 5) -+ (const_int 6)))]) -+ - (define_insn "iorsi3" - [(set (match_operand:SI 0 "register_operand" "=a") - (ior:SI (match_operand:SI 1 "register_operand" "%r") -@@ -634,7 +825,7 @@ - - ;; Field extract instructions. - --(define_expand "extv" -+(define_expand "extvsi" - [(set (match_operand:SI 0 "register_operand" "") - (sign_extract:SI (match_operand:SI 1 "register_operand" "") - (match_operand:SI 2 "const_int_operand" "") -@@ -649,12 +840,12 @@ - if (!lsbitnum_operand (operands[3], SImode)) - FAIL; - -- emit_insn (gen_extv_internal (operands[0], operands[1], -- operands[2], operands[3])); -+ emit_insn (gen_extvsi_internal (operands[0], operands[1], -+ operands[2], operands[3])); - DONE; - }) - --(define_insn "extv_internal" -+(define_insn "extvsi_internal" - [(set (match_operand:SI 0 "register_operand" "=a") - (sign_extract:SI (match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "sext_fldsz_operand" "i") -@@ -669,7 +860,7 @@ - (set_attr "mode" "SI") - (set_attr "length" "3")]) - --(define_expand "extzv" -+(define_expand "extzvsi" - [(set (match_operand:SI 0 "register_operand" "") - (zero_extract:SI (match_operand:SI 1 "register_operand" "") - (match_operand:SI 2 "const_int_operand" "") -@@ -678,12 +869,12 @@ - { - if (!extui_fldsz_operand (operands[2], SImode)) - FAIL; -- emit_insn (gen_extzv_internal (operands[0], operands[1], -- operands[2], operands[3])); -+ emit_insn (gen_extzvsi_internal (operands[0], operands[1], -+ operands[2], operands[3])); - DONE; - }) - --(define_insn "extzv_internal" -+(define_insn "extzvsi_internal" - [(set (match_operand:SI 0 "register_operand" "=a") - (zero_extract:SI (match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "extui_fldsz_operand" "i") -@@ -757,11 +948,14 @@ - because of offering further optimization opportunities. */ - if (register_operand (operands[0], DImode)) - { -- rtx first, second; -- -- split_double (operands[1], &first, &second); -- emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), first)); -- emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), second)); -+ rtx lowpart, highpart; -+ -+ if (TARGET_BIG_ENDIAN) -+ split_double (operands[1], &highpart, &lowpart); -+ else -+ split_double (operands[1], &lowpart, &highpart); -+ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), lowpart)); -+ emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), highpart)); - DONE; - } - -@@ -782,7 +976,7 @@ - "register_operand (operands[0], DImode) - || register_operand (operands[1], DImode)" - "#" -- "reload_completed" -+ "&& reload_completed" - [(set (match_dup 0) (match_dup 2)) - (set (match_dup 1) (match_dup 3))] - { -@@ -831,6 +1025,19 @@ - (set_attr "mode" "SI") - (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")]) - -+(define_split -+ [(set (match_operand:SI 0 "register_operand") -+ (match_operand:SI 1 "constantpool_operand"))] -+ "! optimize_debug && reload_completed" -+ [(const_int 0)] -+{ -+ rtx x = avoid_constant_pool_reference (operands[1]); -+ if (! CONST_INT_P (x)) -+ FAIL; -+ if (! xtensa_constantsynth (operands[0], INTVAL (x))) -+ emit_move_insn (operands[0], x); -+}) -+ - ;; 16-bit Integer moves - - (define_expand "movhi" -@@ -1035,6 +1242,43 @@ - (set_attr "mode" "SF") - (set_attr "length" "3")]) - -+(define_split -+ [(set (match_operand:SF 0 "register_operand") -+ (match_operand:SF 1 "constantpool_operand"))] -+ "! optimize_debug && reload_completed" -+ [(const_int 0)] -+{ -+ int i = 0; -+ rtx x = XEXP (operands[1], 0); -+ long l[2]; -+ if (GET_CODE (x) == SYMBOL_REF -+ && CONSTANT_POOL_ADDRESS_P (x)) -+ x = get_pool_constant (x); -+ else if (GET_CODE (x) == CONST) -+ { -+ x = XEXP (x, 0); -+ gcc_assert (GET_CODE (x) == PLUS -+ && GET_CODE (XEXP (x, 0)) == SYMBOL_REF -+ && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) -+ && CONST_INT_P (XEXP (x, 1))); -+ i = INTVAL (XEXP (x, 1)); -+ gcc_assert (i == 0 || i == 4); -+ i /= 4; -+ x = get_pool_constant (XEXP (x, 0)); -+ } -+ else -+ gcc_unreachable (); -+ if (GET_MODE (x) == SFmode) -+ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l[0]); -+ else if (GET_MODE (x) == DFmode) -+ REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); -+ else -+ FAIL; -+ x = gen_rtx_REG (SImode, REGNO (operands[0])); -+ if (! xtensa_constantsynth (x, l[i])) -+ emit_move_insn (x, GEN_INT (l[i])); -+}) -+ - ;; 64-bit floating point moves - - (define_expand "movdf" -@@ -1058,7 +1302,7 @@ - "register_operand (operands[0], DFmode) - || register_operand (operands[1], DFmode)" - "#" -- "reload_completed" -+ "&& reload_completed" - [(set (match_dup 0) (match_dup 2)) - (set (match_dup 1) (match_dup 3))] - { -@@ -1085,6 +1329,22 @@ - DONE; - }) - -+;; Block sets -+ -+(define_expand "setmemsi" -+ [(match_operand:BLK 0 "memory_operand") -+ (match_operand:SI 1 "") -+ (match_operand:SI 2 "") -+ (match_operand:SI 3 "const_int_operand")] -+ "!optimize_debug && optimize" -+{ -+ if (xtensa_expand_block_set_unrolled_loop (operands)) -+ DONE; -+ if (xtensa_expand_block_set_small_loop (operands)) -+ DONE; -+ FAIL; -+}) -+ - - ;; Shift instructions. - -@@ -1097,16 +1357,6 @@ - operands[1] = xtensa_copy_incoming_a7 (operands[1]); - }) - --(define_insn "*ashlsi3_1" -- [(set (match_operand:SI 0 "register_operand" "=a") -- (ashift:SI (match_operand:SI 1 "register_operand" "r") -- (const_int 1)))] -- "TARGET_DENSITY" -- "add.n\t%0, %1, %1" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "2")]) -- - (define_insn "ashlsi3_internal" - [(set (match_operand:SI 0 "register_operand" "=a,a") - (ashift:SI (match_operand:SI 1 "register_operand" "r,r") -@@ -1119,16 +1369,14 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,6")]) - --(define_insn "*ashlsi3_3x" -- [(set (match_operand:SI 0 "register_operand" "=a") -- (ashift:SI (match_operand:SI 1 "register_operand" "r") -- (ashift:SI (match_operand:SI 2 "register_operand" "r") -- (const_int 3))))] -- "" -- "ssa8b\t%2\;sll\t%0, %1" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "6")]) -+(define_split -+ [(set (match_operand:SI 0 "register_operand") -+ (ashift:SI (match_operand:SI 1 "register_operand") -+ (const_int 1)))] -+ "TARGET_DENSITY" -+ [(set (match_dup 0) -+ (plus:SI (match_dup 1) -+ (match_dup 1)))]) - - (define_insn "ashrsi3" - [(set (match_operand:SI 0 "register_operand" "=a,a") -@@ -1142,17 +1390,6 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,6")]) - --(define_insn "*ashrsi3_3x" -- [(set (match_operand:SI 0 "register_operand" "=a") -- (ashiftrt:SI (match_operand:SI 1 "register_operand" "r") -- (ashift:SI (match_operand:SI 2 "register_operand" "r") -- (const_int 3))))] -- "" -- "ssa8l\t%2\;sra\t%0, %1" -- [(set_attr "type" "arith") -- (set_attr "mode" "SI") -- (set_attr "length" "6")]) -- - (define_insn "lshrsi3" - [(set (match_operand:SI 0 "register_operand" "=a,a") - (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r") -@@ -1162,9 +1399,9 @@ - if (which_alternative == 0) - { - if ((INTVAL (operands[2]) & 0x1f) < 16) -- return "srli\t%0, %1, %R2"; -+ return "srli\t%0, %1, %R2"; - else -- return "extui\t%0, %1, %R2, %L2"; -+ return "extui\t%0, %1, %R2, %L2"; - } - return "ssr\t%2\;srl\t%0, %1"; - } -@@ -1172,13 +1409,170 @@ - (set_attr "mode" "SI") - (set_attr "length" "3,6")]) - --(define_insn "*lshrsi3_3x" -+(define_insn "*shift_per_byte" - [(set (match_operand:SI 0 "register_operand" "=a") -- (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") -- (ashift:SI (match_operand:SI 2 "register_operand" "r") -- (const_int 3))))] -+ (match_operator:SI 3 "xtensa_shift_per_byte_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3))]))] -+ "!optimize_debug && optimize" -+{ -+ switch (GET_CODE (operands[3])) -+ { -+ case ASHIFT: return "ssa8b\t%2\;sll\t%0, %1"; -+ case ASHIFTRT: return "ssa8l\t%2\;sra\t%0, %1"; -+ case LSHIFTRT: return "ssa8l\t%2\;srl\t%0, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*shift_per_byte_omit_AND_0" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (match_operator:SI 4 "xtensa_shift_per_byte_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3)) -+ (match_operand:SI 3 "const_int_operand" "i"))]))] -+ "!optimize_debug && optimize -+ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (match_op_dup 4 -+ [(match_dup 1) -+ (ashift:SI (match_dup 2) -+ (const_int 3))]))] -+ "" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*shift_per_byte_omit_AND_1" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (match_operator:SI 4 "xtensa_shift_per_byte_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (neg:SI (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3)) -+ (match_operand:SI 3 "const_int_operand" "i")))]))] -+ "!optimize_debug && optimize -+ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 5) -+ (neg:SI (match_dup 2))) -+ (set (match_dup 0) -+ (match_op_dup 4 -+ [(match_dup 1) -+ (ashift:SI (match_dup 5) -+ (const_int 3))]))] -+{ -+ operands[5] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "9")]) -+ -+(define_insn "*shlrd_reg_" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "register_operand" "r")]) -+ (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 3 "register_operand" "r") -+ (neg:SI (match_dup 2))])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" -+{ -+ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) -+ { -+ case ASHIFT: return "ssl\t%2\;src\t%0, %1, %3"; -+ case LSHIFTRT: return "ssr\t%2\;src\t%0, %3, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn "*shlrd_const_" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 3 "const_int_operand" "i")]) -+ (match_operator:SI 6 "logical_shift_operator" -+ [(match_operand:SI 2 "register_operand" "r") -+ (match_operand:SI 4 "const_int_operand" "i")])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN -+ && IN_RANGE (INTVAL (operands[3]), 1, 31) -+ && IN_RANGE (INTVAL (operands[4]), 1, 31) -+ && INTVAL (operands[3]) + INTVAL (operands[4]) == 32" -+{ -+ switch (xtensa_shlrd_which_direction (operands[5], operands[6])) -+ { -+ case ASHIFT: return "ssai\t%L3\;src\t%0, %1, %2"; -+ case LSHIFTRT: return "ssai\t%R3\;src\t%0, %2, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn "*shlrd_per_byte_" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3))]) -+ (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 3 "register_operand" "r") -+ (neg:SI (ashift:SI (match_dup 2) -+ (const_int 3)))])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" -+{ -+ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) -+ { -+ case ASHIFT: return "ssa8b\t%2\;src\t%0, %1, %3"; -+ case LSHIFTRT: return "ssa8l\t%2\;src\t%0, %3, %1"; -+ default: gcc_unreachable (); -+ } -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*shlrd_per_byte__omit_AND" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" -+ [(match_operand:SI 1 "register_operand" "r") -+ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") -+ (const_int 3)) -+ (match_operand:SI 4 "const_int_operand" "i"))]) -+ (match_operator:SI 6 "logical_shift_operator" -+ [(match_operand:SI 3 "register_operand" "r") -+ (neg:SI (and:SI (ashift:SI (match_dup 2) -+ (const_int 3)) -+ (match_dup 4)))])))] -+ "!optimize_debug && optimize -+ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN -+ && (INTVAL (operands[4]) & 0x1f) == 3 << 3" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (ior_op:SI (match_op_dup 5 -+ [(match_dup 1) -+ (ashift:SI (match_dup 2) -+ (const_int 3))]) -+ (match_op_dup 6 -+ [(match_dup 3) -+ (neg:SI (ashift:SI (match_dup 2) -+ (const_int 3)))])))] - "" -- "ssa8l\t%2\;srl\t%0, %1" - [(set_attr "type" "arith") - (set_attr "mode" "SI") - (set_attr "length" "6")]) -@@ -1239,28 +1633,13 @@ - (define_insn "*btrue" - [(set (pc) - (if_then_else (match_operator 3 "branch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "branch_operand" "K,r")]) -+ [(match_operand:SI 0 "register_operand" "r,r") -+ (match_operand:SI 1 "branch_operand" "K,r")]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { -- return xtensa_emit_branch (false, which_alternative == 0, operands); --} -- [(set_attr "type" "jump,jump") -- (set_attr "mode" "none") -- (set_attr "length" "3,3")]) -- --(define_insn "*bfalse" -- [(set (pc) -- (if_then_else (match_operator 3 "branch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "branch_operand" "K,r")]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -- "" --{ -- return xtensa_emit_branch (true, which_alternative == 0, operands); -+ return xtensa_emit_branch (which_alternative == 0, operands); - } - [(set_attr "type" "jump,jump") - (set_attr "mode" "none") -@@ -1269,28 +1648,13 @@ - (define_insn "*ubtrue" - [(set (pc) - (if_then_else (match_operator 3 "ubranch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "ubranch_operand" "L,r")]) -+ [(match_operand:SI 0 "register_operand" "r,r") -+ (match_operand:SI 1 "ubranch_operand" "L,r")]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { -- return xtensa_emit_branch (false, which_alternative == 0, operands); --} -- [(set_attr "type" "jump,jump") -- (set_attr "mode" "none") -- (set_attr "length" "3,3")]) -- --(define_insn "*ubfalse" -- [(set (pc) -- (if_then_else (match_operator 3 "ubranch_operator" -- [(match_operand:SI 0 "register_operand" "r,r") -- (match_operand:SI 1 "ubranch_operand" "L,r")]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -- "" --{ -- return xtensa_emit_branch (true, which_alternative == 0, operands); -+ return xtensa_emit_branch (which_alternative == 0, operands); - } - [(set_attr "type" "jump,jump") - (set_attr "mode" "none") -@@ -1301,80 +1665,178 @@ - (define_insn "*bittrue" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(zero_extract:SI -- (match_operand:SI 0 "register_operand" "r,r") -- (const_int 1) -- (match_operand:SI 1 "arith_operand" "J,r")) -+ [(zero_extract:SI (match_operand:SI 0 "register_operand" "r,r") -+ (const_int 1) -+ (match_operand:SI 1 "arith_operand" "J,r")) - (const_int 0)]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { -- return xtensa_emit_bit_branch (false, which_alternative == 0, operands); -+ static char result[64]; -+ char op; -+ switch (GET_CODE (operands[3])) -+ { -+ case EQ: op = 'c'; break; -+ case NE: op = 's'; break; -+ default: gcc_unreachable (); -+ } -+ if (which_alternative == 0) -+ { -+ operands[1] = GEN_INT (INTVAL (operands[1]) & 0x1f); -+ sprintf (result, "bb%ci\t%%0, %%d1, %%2", op); -+ } -+ else -+ sprintf (result, "bb%c\t%%0, %%1, %%2", op); -+ return result; - } - [(set_attr "type" "jump") - (set_attr "mode" "none") - (set_attr "length" "3")]) - --(define_insn "*bitfalse" -+(define_insn "*masktrue" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(zero_extract:SI -- (match_operand:SI 0 "register_operand" "r,r") -- (const_int 1) -- (match_operand:SI 1 "arith_operand" "J,r")) -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "register_operand" "r")) - (const_int 0)]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -+ (label_ref (match_operand 2 "" "")) -+ (pc)))] - "" - { -- return xtensa_emit_bit_branch (true, which_alternative == 0, operands); -+ switch (GET_CODE (operands[3])) -+ { -+ case EQ: return "bnone\t%0, %1, %2"; -+ case NE: return "bany\t%0, %1, %2"; -+ default: gcc_unreachable (); -+ } - } - [(set_attr "type" "jump") - (set_attr "mode" "none") - (set_attr "length" "3")]) - --(define_insn "*masktrue" -+(define_insn "*masktrue_bitcmpl" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(and:SI (match_operand:SI 0 "register_operand" "r") -- (match_operand:SI 1 "register_operand" "r")) -- (const_int 0)]) -+ [(and:SI (not:SI (match_operand:SI 0 "register_operand" "r")) -+ (match_operand:SI 1 "register_operand" "r")) -+ (const_int 0)]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { - switch (GET_CODE (operands[3])) - { -- case EQ: return "bnone\t%0, %1, %2"; -- case NE: return "bany\t%0, %1, %2"; -- default: gcc_unreachable (); -+ case EQ: return "ball\t%0, %1, %2"; -+ case NE: return "bnall\t%0, %1, %2"; -+ default: gcc_unreachable (); - } - } - [(set_attr "type" "jump") - (set_attr "mode" "none") - (set_attr "length" "3")]) - --(define_insn "*maskfalse" -+(define_insn_and_split "*masktrue_const_pow2_minus_one" - [(set (pc) - (if_then_else (match_operator 3 "boolean_operator" -- [(and:SI (match_operand:SI 0 "register_operand" "r") -- (match_operand:SI 1 "register_operand" "r")) -- (const_int 0)]) -- (pc) -- (label_ref (match_operand 2 "" ""))))] -- "" -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "const_int_operand" "i")) -+ (const_int 0)]) -+ (label_ref (match_operand 2 "" "")) -+ (pc)))] -+ "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31)" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 4) -+ (ashift:SI (match_dup 0) -+ (match_dup 1))) -+ (set (pc) -+ (if_then_else (match_op_dup 3 -+ [(match_dup 4) -+ (const_int 0)]) -+ (label_ref (match_dup 2)) -+ (pc)))] - { -- switch (GET_CODE (operands[3])) -- { -- case EQ: return "bany\t%0, %1, %2"; -- case NE: return "bnone\t%0, %1, %2"; -- default: gcc_unreachable (); -- } -+ operands[1] = GEN_INT (32 - floor_log2 (INTVAL (operands[1]) + 1)); -+ operands[4] = gen_reg_rtx (SImode); - } - [(set_attr "type" "jump") - (set_attr "mode" "none") -- (set_attr "length" "3")]) -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && INTVAL (operands[1]) == 0x7FFFFFFF") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_insn_and_split "*masktrue_const_negative_pow2" -+ [(set (pc) -+ (if_then_else (match_operator 3 "boolean_operator" -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "const_int_operand" "i")) -+ (const_int 0)]) -+ (label_ref (match_operand 2 "" "")) -+ (pc)))] -+ "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 12, 30)" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 4) -+ (lshiftrt:SI (match_dup 0) -+ (match_dup 1))) -+ (set (pc) -+ (if_then_else (match_op_dup 3 -+ [(match_dup 4) -+ (const_int 0)]) -+ (label_ref (match_dup 2)) -+ (pc)))] -+{ -+ operands[1] = GEN_INT (floor_log2 (-INTVAL (operands[1]))); -+ operands[4] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "jump") -+ (set_attr "mode" "none") -+ (set_attr "length" "6")]) -+ -+(define_insn_and_split "*masktrue_const_shifted_mask" -+ [(set (pc) -+ (if_then_else (match_operator 4 "boolean_operator" -+ [(and:SI (match_operand:SI 0 "register_operand" "r") -+ (match_operand:SI 1 "shifted_mask_operand" "i")) -+ (match_operand:SI 2 "const_int_operand" "i")]) -+ (label_ref (match_operand 3 "" "")) -+ (pc)))] -+ "(INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 -+ && xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" -+ "#" -+ "&& can_create_pseudo_p ()" -+ [(set (match_dup 6) -+ (zero_extract:SI (match_dup 0) -+ (match_dup 5) -+ (match_dup 1))) -+ (set (pc) -+ (if_then_else (match_op_dup 4 -+ [(match_dup 6) -+ (match_dup 2)]) -+ (label_ref (match_dup 3)) -+ (pc)))] -+{ -+ HOST_WIDE_INT mask = INTVAL (operands[1]); -+ int shift = ctz_hwi (mask); -+ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); -+ int mask_pos = shift; -+ if (BITS_BIG_ENDIAN) -+ mask_pos = (32 - (mask_size + shift)) & 0x1f; -+ operands[1] = GEN_INT (mask_pos); -+ operands[2] = GEN_INT ((uint32_t)INTVAL (operands[2]) >> shift); -+ operands[5] = GEN_INT (mask_size); -+ operands[6] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "jump") -+ (set_attr "mode" "none") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY -+ && (uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])) == 0") -+ (const_int 5) -+ (const_int 6)))]) - - - ;; Zero-overhead looping support. -@@ -1696,18 +2158,13 @@ - (match_operand 1 "" ""))] - "" - { -- rtx addr = XEXP (operands[0], 0); -- if (flag_pic && GET_CODE (addr) == SYMBOL_REF -- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) -- addr = gen_sym_PLT (addr); -- if (!call_insn_operand (addr, VOIDmode)) -- XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr); -+ xtensa_prepare_expand_call (0, operands); - }) - - (define_insn "call_internal" - [(call (mem (match_operand:SI 0 "call_insn_operand" "nir")) - (match_operand 1 "" "i"))] -- "" -+ "!SIBLING_CALL_P (insn)" - { - return xtensa_emit_call (0, operands); - } -@@ -1721,19 +2178,14 @@ - (match_operand 2 "" "")))] - "" - { -- rtx addr = XEXP (operands[1], 0); -- if (flag_pic && GET_CODE (addr) == SYMBOL_REF -- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) -- addr = gen_sym_PLT (addr); -- if (!call_insn_operand (addr, VOIDmode)) -- XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr); -+ xtensa_prepare_expand_call (1, operands); - }) - - (define_insn "call_value_internal" - [(set (match_operand 0 "register_operand" "=a") - (call (mem (match_operand:SI 1 "call_insn_operand" "nir")) - (match_operand 2 "" "i")))] -- "" -+ "!SIBLING_CALL_P (insn)" - { - return xtensa_emit_call (1, operands); - } -@@ -1741,6 +2193,70 @@ - (set_attr "mode" "none") - (set_attr "length" "3")]) - -+(define_expand "sibcall" -+ [(call (match_operand 0 "memory_operand" "") -+ (match_operand 1 "" ""))] -+ "!TARGET_WINDOWED_ABI" -+{ -+ xtensa_prepare_expand_call (0, operands); -+}) -+ -+(define_insn "sibcall_internal" -+ [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nir")) -+ (match_operand 1 "" "i"))] -+ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" -+{ -+ return xtensa_emit_sibcall (0, operands); -+} -+ [(set_attr "type" "call") -+ (set_attr "mode" "none") -+ (set_attr "length" "3")]) -+ -+(define_split -+ [(call (mem:SI (match_operand:SI 0 "register_operand")) -+ (match_operand 1 ""))] -+ "reload_completed -+ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) -+ && IN_RANGE (REGNO (operands[0]), 12, 15)" -+ [(set (reg:SI A10_REG) -+ (match_dup 0)) -+ (call (mem:SI (reg:SI A10_REG)) -+ (match_dup 1))]) -+ -+(define_expand "sibcall_value" -+ [(set (match_operand 0 "register_operand" "") -+ (call (match_operand 1 "memory_operand" "") -+ (match_operand 2 "" "")))] -+ "!TARGET_WINDOWED_ABI" -+{ -+ xtensa_prepare_expand_call (1, operands); -+}) -+ -+(define_insn "sibcall_value_internal" -+ [(set (match_operand 0 "register_operand" "=a") -+ (call (mem:SI (match_operand:SI 1 "call_insn_operand" "nir")) -+ (match_operand 2 "" "i")))] -+ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" -+{ -+ return xtensa_emit_sibcall (1, operands); -+} -+ [(set_attr "type" "call") -+ (set_attr "mode" "none") -+ (set_attr "length" "3")]) -+ -+(define_split -+ [(set (match_operand 0 "register_operand") -+ (call (mem:SI (match_operand:SI 1 "register_operand")) -+ (match_operand 2 "")))] -+ "reload_completed -+ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) -+ && IN_RANGE (REGNO (operands[1]), 12, 15)" -+ [(set (reg:SI A10_REG) -+ (match_dup 1)) -+ (set (match_dup 0) -+ (call (mem:SI (reg:SI A10_REG)) -+ (match_dup 2)))]) -+ - (define_insn "entry" - [(set (reg:SI A1_REG) - (unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")] -@@ -1762,7 +2278,10 @@ - } - [(set_attr "type" "jump") - (set_attr "mode" "none") -- (set_attr "length" "2")]) -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 2) -+ (const_int 3)))]) - - - ;; Miscellaneous instructions. -@@ -1805,7 +2324,15 @@ - [(return)] - "" - { -- xtensa_expand_epilogue (); -+ xtensa_expand_epilogue (false); -+ DONE; -+}) -+ -+(define_expand "sibcall_epilogue" -+ [(return)] -+ "!TARGET_WINDOWED_ABI" -+{ -+ xtensa_expand_epilogue (true); - DONE; - }) - -@@ -1817,7 +2344,10 @@ - } - [(set_attr "type" "nop") - (set_attr "mode" "none") -- (set_attr "length" "3")]) -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 2) -+ (const_int 3)))]) - - (define_expand "nonlocal_goto" - [(match_operand:SI 0 "general_operand" "") -@@ -1881,8 +2411,9 @@ - [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] - "" - "" -- [(set_attr "length" "0") -- (set_attr "type" "nop")]) -+ [(set_attr "type" "nop") -+ (set_attr "mode" "none") -+ (set_attr "length" "0")]) - - ;; Do not schedule instructions accessing memory before this point. - -@@ -1901,7 +2432,9 @@ - (unspec:BLK [(match_operand:SI 1 "" "")] UNSPEC_FRAME_BLOCKAGE))] - "" - "" -- [(set_attr "length" "0")]) -+ [(set_attr "type" "nop") -+ (set_attr "mode" "none") -+ (set_attr "length" "0")]) - - (define_insn "trap" - [(trap_if (const_int 1) (const_int 0))] -@@ -1914,7 +2447,10 @@ - } - [(set_attr "type" "trap") - (set_attr "mode" "none") -- (set_attr "length" "3")]) -+ (set (attr "length") -+ (if_then_else (match_test "!TARGET_DEBUG && TARGET_DENSITY") -+ (const_int 2) -+ (const_int 3)))]) - - ;; Setting up a frame pointer is tricky for Xtensa because GCC doesn't - ;; know if a frame pointer is required until the reload pass, and -@@ -2177,3 +2713,103 @@ - xtensa_expand_atomic (, operands[0], operands[1], operands[2], true); - DONE; - }) -+ -+(define_insn_and_split "*round_up_to_even" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r") -+ (const_int 1)) -+ (const_int -2)))] -+ "" -+ "#" -+ "can_create_pseudo_p ()" -+ [(set (match_dup 2) -+ (and:SI (match_dup 1) -+ (const_int 1))) -+ (set (match_dup 0) -+ (plus:SI (match_dup 2) -+ (match_dup 1)))] -+{ -+ operands[2] = gen_reg_rtx (SImode); -+} -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_insn_and_split "*signed_ge_zero" -+ [(set (match_operand:SI 0 "register_operand" "=a") -+ (ge:SI (match_operand:SI 1 "register_operand" "r") -+ (const_int 0)))] -+ "" -+ "#" -+ "" -+ [(set (match_dup 0) -+ (ashiftrt:SI (match_dup 1) -+ (const_int 31))) -+ (set (match_dup 0) -+ (plus:SI (match_dup 0) -+ (const_int 1)))] -+ "" -+ [(set_attr "type" "arith") -+ (set_attr "mode" "SI") -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_DENSITY") -+ (const_int 5) -+ (const_int 6)))]) -+ -+(define_peephole2 -+ [(set (match_operand:SI 0 "register_operand") -+ (match_operand:SI 6 "reload_operand")) -+ (set (match_operand:SI 1 "register_operand") -+ (match_operand:SI 7 "reload_operand")) -+ (set (match_operand:SF 2 "register_operand") -+ (match_operand:SF 4 "register_operand")) -+ (set (match_operand:SF 3 "register_operand") -+ (match_operand:SF 5 "register_operand"))] -+ "REGNO (operands[0]) == REGNO (operands[4]) -+ && REGNO (operands[1]) == REGNO (operands[5]) -+ && peep2_reg_dead_p (4, operands[0]) -+ && peep2_reg_dead_p (4, operands[1])" -+ [(set (match_dup 2) -+ (match_dup 6)) -+ (set (match_dup 3) -+ (match_dup 7))] -+{ -+ uint32_t check = 0; -+ int i; -+ for (i = 0; i <= 3; ++i) -+ { -+ uint32_t mask = (uint32_t)1 << REGNO (operands[i]); -+ if (check & mask) -+ FAIL; -+ check |= mask; -+ } -+ operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0)); -+ operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0)); -+}) -+ -+(define_split -+ [(clobber (match_operand:DSC 0 "register_operand"))] -+ "GP_REG_P (REGNO (operands[0]))" -+ [(const_int 0)] -+{ -+ unsigned int regno = REGNO (operands[0]); -+ machine_mode inner_mode = GET_MODE_INNER (mode); -+ rtx_insn *insn; -+ rtx x; -+ if (! ((insn = next_nonnote_nondebug_insn (curr_insn)) -+ && NONJUMP_INSN_P (insn) -+ && GET_CODE (x = PATTERN (insn)) == SET -+ && REG_P (x = XEXP (x, 0)) -+ && GET_MODE (x) == inner_mode -+ && REGNO (x) == regno -+ && (insn = next_nonnote_nondebug_insn (insn)) -+ && NONJUMP_INSN_P (insn) -+ && GET_CODE (x = PATTERN (insn)) == SET -+ && REG_P (x = XEXP (x, 0)) -+ && GET_MODE (x) == inner_mode -+ && REGNO (x) == regno + REG_NREGS (operands[0]) / 2)) -+ FAIL; -+}) -diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt -index aef67970b..97aa44f92 100644 ---- a/gcc/config/xtensa/xtensa.opt -+++ b/gcc/config/xtensa/xtensa.opt -@@ -27,9 +27,13 @@ Target Report Mask(FORCE_NO_PIC) - Disable position-independent code (PIC) for use in OS kernel code. - - mlongcalls --Target -+Target Mask(LONGCALLS) - Use indirect CALLXn instructions for large programs. - -+mextra-l32r-costs= -+Target RejectNegative Joined UInteger Var(xtensa_extra_l32r_costs) Init(0) -+Set extra memory access cost for L32R instruction, in clock-cycle units. -+ - mtarget-align - Target - Automatically align branch targets to reduce branch penalties. -diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi -index eabeec944..c35f51afb 100644 ---- a/gcc/doc/invoke.texi -+++ b/gcc/doc/invoke.texi -@@ -1385,7 +1385,8 @@ See RS/6000 and PowerPC Options. - -mtext-section-literals -mno-text-section-literals @gol - -mauto-litpools -mno-auto-litpools @gol - -mtarget-align -mno-target-align @gol ---mlongcalls -mno-longcalls} -+-mlongcalls -mno-longcalls @gol -+-mextra-l32r-costs=@var{cycles}} - - @emph{zSeries Options} - See S/390 and zSeries Options. -@@ -30519,6 +30520,14 @@ assembly code generated by GCC still shows direct call - instructions---look at the disassembled object code to see the actual - instructions. Note that the assembler uses an indirect call for - every cross-file call, not just those that really are out of range. -+ -+@item -mextra-l32r-costs=@var{n} -+@opindex mextra-l32r-costs -+Specify an extra cost of instruction RAM/ROM access for @code{L32R} -+instructions, in clock cycles. This affects, when optimizing for speed, -+whether loading a constant from literal pool using @code{L32R} or -+synthesizing the constant from a small one with a couple of arithmetic -+instructions. The default value is 0. - @end table - - @node zSeries Options -diff --git a/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c -new file mode 100644 -index 000000000..ba61c6f37 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O" } */ -+ -+extern void foo(void); -+ -+void BNONE_test(int a, int b) -+{ -+ if (a & b) -+ foo(); -+} -+ -+void BANY_test(int a, int b) -+{ -+ if (!(a & b)) -+ foo(); -+} -+ -+void BALL_test(int a, int b) -+{ -+ if (~a & b) -+ foo(); -+} -+ -+void BNALL_test(int a, int b) -+{ -+ if (!(~a & b)) -+ foo(); -+} -+ -+/* { dg-final { scan-assembler-times "bnone" 1 } } */ -+/* { dg-final { scan-assembler-times "bany" 1 } } */ -+/* { dg-final { scan-assembler-times "ball" 1 } } */ -+/* { dg-final { scan-assembler-times "bnall" 1 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O1.c b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c -new file mode 100644 -index 000000000..a0c885baa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O1" } */ -+ -+unsigned int test_0(unsigned int a) -+{ -+ return (a & 0x000000FF) << 24 | -+ (a & 0x0000FF00) << 8 | -+ (a & 0x00FF0000) >> 8 | -+ (a & 0xFF000000) >> 24; -+} -+ -+unsigned int test_1(unsigned int a) -+{ -+ union -+ { -+ unsigned int i; -+ unsigned char a[4]; -+ } u, v; -+ u.i = a; -+ v.a[0] = u.a[3]; -+ v.a[1] = u.a[2]; -+ v.a[2] = u.a[1]; -+ v.a[3] = u.a[0]; -+ return v.i; -+} -+ -+unsigned int test_2(unsigned int a) -+{ -+ return __builtin_bswap32(a); -+} -+ -+unsigned long long test_3(unsigned long long a) -+{ -+ return __builtin_bswap64(a); -+} -+ -+/* { dg-final { scan-assembler-times "call" 2 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O2.c b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c -new file mode 100644 -index 000000000..4cf95b925 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+unsigned int test_0(unsigned int a) -+{ -+ return (a & 0x000000FF) << 24 | -+ (a & 0x0000FF00) << 8 | -+ (a & 0x00FF0000) >> 8 | -+ (a & 0xFF000000) >> 24; -+} -+ -+unsigned int test_1(unsigned int a) -+{ -+ union -+ { -+ unsigned int i; -+ unsigned char a[4]; -+ } u, v; -+ u.i = a; -+ v.a[0] = u.a[3]; -+ v.a[1] = u.a[2]; -+ v.a[2] = u.a[1]; -+ v.a[3] = u.a[0]; -+ return v.i; -+} -+ -+unsigned int test_2(unsigned int a) -+{ -+ return __builtin_bswap32(a); -+} -+ -+unsigned long long test_3(unsigned long long a) -+{ -+ return __builtin_bswap64(a); -+} -+ -+/* { dg-final { scan-assembler-times "ssai" 4 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-Os.c b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c -new file mode 100644 -index 000000000..1e010fd62 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Os" } */ -+ -+unsigned int test_0(unsigned int a) -+{ -+ return (a & 0x000000FF) << 24 | -+ (a & 0x0000FF00) << 8 | -+ (a & 0x00FF0000) >> 8 | -+ (a & 0xFF000000) >> 24; -+} -+ -+unsigned int test_1(unsigned int a) -+{ -+ union -+ { -+ unsigned int i; -+ unsigned char a[4]; -+ } u, v; -+ u.i = a; -+ v.a[0] = u.a[3]; -+ v.a[1] = u.a[2]; -+ v.a[2] = u.a[1]; -+ v.a[3] = u.a[0]; -+ return v.i; -+} -+ -+unsigned int test_2(unsigned int a) -+{ -+ return __builtin_bswap32(a); -+} -+ -+unsigned long long test_3(unsigned long long a) -+{ -+ return __builtin_bswap64(a); -+} -+ -+/* { dg-final { scan-assembler-times "call" 4 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c -new file mode 100644 -index 000000000..6a04aaeef ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O" } */ -+ -+int check_zero_byte(int v) -+{ -+ return (v - 0x01010101) & ~v & 0x80808080; -+} -+ -+/* { dg-final { scan-assembler-not "movi" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c -new file mode 100644 -index 000000000..ec2606ed1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c -@@ -0,0 +1,44 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Os } */ -+ -+int test_0(void) -+{ -+ return 4095; -+} -+ -+int test_1(void) -+{ -+ return 2147483647; -+} -+ -+int test_2(void) -+{ -+ return -34816; -+} -+ -+int test_3(void) -+{ -+ return -2049; -+} -+ -+int test_4(void) -+{ -+ return 2048; -+} -+ -+int test_5(void) -+{ -+ return 34559; -+} -+ -+int test_6(void) -+{ -+ return 43680; -+} -+ -+void test_7(int *p) -+{ -+ *p = -1432354816; -+} -+ -+/* { dg-final { scan-assembler-not "l32r" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c -new file mode 100644 -index 000000000..f3c4a1c7c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c -@@ -0,0 +1,24 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mextra-l32r-costs=3" } */ -+ -+int test_0(void) -+{ -+ return 134217216; -+} -+ -+int test_1(void) -+{ -+ return -27604992; -+} -+ -+int test_2(void) -+{ -+ return -162279; -+} -+ -+void test_3(int *p) -+{ -+ *p = 192437; -+} -+ -+/* { dg-final { scan-assembler-not "l32r" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c -new file mode 100644 -index 000000000..11e5d5242 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Os } */ -+ -+void test(unsigned int count, double array[]) -+{ -+ unsigned int i; -+ for (i = 0; i < count; ++i) -+ array[i] = 1.0; -+} -+ -+/* { dg-final { scan-assembler-not "l32r" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c -new file mode 100644 -index 000000000..c8f987ccd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+unsigned int test_0(const void *addr) -+{ -+ unsigned int n = (unsigned int)addr; -+ const unsigned int *a = (const unsigned int*)(n & ~3); -+ n = (n & 3) * 8; -+ return (a[0] >> n) | (a[1] << (32 - n)); -+} -+ -+unsigned int test_1(unsigned int a, unsigned int b) -+{ -+ return (a >> 16) + (b << 16); -+} -+ -+/* { dg-final { scan-assembler-times "src" 2 } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c -new file mode 100644 -index 000000000..608f65fd7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O1" } */ -+ -+int one_cmpl_abs(int a) -+{ -+ return a < 0 ? ~a : a; -+} -+ -+/* { dg-final { scan-assembler-not "bgez" } } */ -diff --git a/gcc/testsuite/gcc.target/xtensa/sibcalls.c b/gcc/testsuite/gcc.target/xtensa/sibcalls.c -new file mode 100644 -index 000000000..7a4018796 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/xtensa/sibcalls.c -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -foptimize-sibling-calls" } */ -+ -+extern int foo(int); -+extern void bar(int); -+ -+int test_0(int a) { -+ return foo(a); -+} -+ -+void test_1(int a) { -+ bar(a); -+} -+ -+int test_2(int (*a)(void)) { -+ bar(0); -+ return a(); -+} -+ -+/* { dg-final { scan-assembler-not "ret" } } */ -diff --git a/libgcc/config/xtensa/lib1funcs.S b/libgcc/config/xtensa/lib1funcs.S -index b19deae14..ad9072c40 100644 ---- a/libgcc/config/xtensa/lib1funcs.S -+++ b/libgcc/config/xtensa/lib1funcs.S -@@ -456,6 +456,29 @@ __nsau_data: - #endif /* L_clz */ - - -+#ifdef L_clrsbsi2 -+ .align 4 -+ .global __clrsbsi2 -+ .type __clrsbsi2, @function -+__clrsbsi2: -+ leaf_entry sp, 16 -+#if XCHAL_HAVE_NSA -+ nsa a2, a2 -+#else -+ srai a3, a2, 31 -+ xor a3, a3, a2 -+ movi a2, 31 -+ beqz a3, .Lreturn -+ do_nsau a2, a3, a4, a5 -+ addi a2, a2, -1 -+.Lreturn: -+#endif -+ leaf_return -+ .size __clrsbsi2, . - __clrsbsi2 -+ -+#endif /* L_clrsbsi2 */ -+ -+ - #ifdef L_clzsi2 - .align 4 - .global __clzsi2 -diff --git a/libgcc/config/xtensa/t-xtensa b/libgcc/config/xtensa/t-xtensa -index 9836c96ae..084618b38 100644 ---- a/libgcc/config/xtensa/t-xtensa -+++ b/libgcc/config/xtensa/t-xtensa -@@ -1,6 +1,6 @@ - LIB1ASMSRC = xtensa/lib1funcs.S - LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \ -- _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \ -+ _umulsidi3 _clz _clrsbsi2 _clzsi2 _ctzsi2 _ffssi2 \ - _ashldi3 _ashrdi3 _lshrdi3 \ - _bswapsi2 _bswapdi2 \ - _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \ --- -2.20.1 - diff --git a/patches/gcc10.3/gcc-xtensa-0007-fix-Wformat-diag-warnings.patch b/patches/gcc10.3/gcc-xtensa-0007-fix-Wformat-diag-warnings.patch new file mode 100644 index 0000000..5aebddc --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0007-fix-Wformat-diag-warnings.patch @@ -0,0 +1,48 @@ +From 76ee6b24125c885150e5b493b26b594801998b74 Mon Sep 17 00:00:00 2001 +From: Martin Liska +Date: Tue, 18 Jan 2022 14:51:40 +0100 +Subject: [PATCH 02/31] xtensa: fix -Wformat-diag warnings. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (print_operand): Fix warnings. + (print_operand_address): Likewise. + (xtensa_multibss_section_type_flags): Likewise. +--- + gcc/config/xtensa/xtensa.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 37c6ac1fd..b1dbe8520 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -2379,7 +2379,7 @@ void + print_operand (FILE *file, rtx x, int letter) + { + if (!x) +- error ("PRINT_OPERAND null pointer"); ++ error ("% null pointer"); + + switch (letter) + { +@@ -2584,7 +2584,7 @@ void + print_operand_address (FILE *file, rtx addr) + { + if (!addr) +- error ("PRINT_OPERAND_ADDRESS, null pointer"); ++ error ("%, null pointer"); + + switch (GET_CODE (addr)) + { +@@ -3697,7 +3697,7 @@ xtensa_multibss_section_type_flags (tree decl, const char *name, int reloc) + flags |= SECTION_BSS; /* @nobits */ + else + warning (0, "only uninitialized variables can be placed in a " +- ".bss section"); ++ "%<.bss%> section"); + } + + return flags; +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0008-Rename-deprecated-extv-extzv-insn-patterns-to.patch b/patches/gcc10.3/gcc-xtensa-0008-Rename-deprecated-extv-extzv-insn-patterns-to.patch new file mode 100644 index 0000000..46260ef --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0008-Rename-deprecated-extv-extzv-insn-patterns-to.patch @@ -0,0 +1,74 @@ +From b5b9fd01c4db135893c44e82a9f33c2411e993d0 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 6 May 2022 19:34:06 +0900 +Subject: [PATCH 03/31] xtensa: Rename deprecated extv/extzv insn patterns to + extvsi/extzvsi + +These patterns were deprecated since GCC 4.8. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (extvsi, extvsi_internal, extzvsi, + extzvsi_internal): Rename from extv, extv_internal, extzv and + extzv_internal, respectively. +--- + gcc/config/xtensa/xtensa.md | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 123916957..251c313d5 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -634,7 +634,7 @@ + + ;; Field extract instructions. + +-(define_expand "extv" ++(define_expand "extvsi" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "") +@@ -649,12 +649,12 @@ + if (!lsbitnum_operand (operands[3], SImode)) + FAIL; + +- emit_insn (gen_extv_internal (operands[0], operands[1], +- operands[2], operands[3])); ++ emit_insn (gen_extvsi_internal (operands[0], operands[1], ++ operands[2], operands[3])); + DONE; + }) + +-(define_insn "extv_internal" ++(define_insn "extvsi_internal" + [(set (match_operand:SI 0 "register_operand" "=a") + (sign_extract:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "sext_fldsz_operand" "i") +@@ -669,7 +669,7 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +-(define_expand "extzv" ++(define_expand "extzvsi" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extract:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "") +@@ -678,12 +678,12 @@ + { + if (!extui_fldsz_operand (operands[2], SImode)) + FAIL; +- emit_insn (gen_extzv_internal (operands[0], operands[1], +- operands[2], operands[3])); ++ emit_insn (gen_extzvsi_internal (operands[0], operands[1], ++ operands[2], operands[3])); + DONE; + }) + +-(define_insn "extzv_internal" ++(define_insn "extzvsi_internal" + [(set (match_operand:SI 0 "register_operand" "=a") + (zero_extract:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "extui_fldsz_operand" "i") +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0009-Reflect-the-32-bit-Integer-Divide-Option.patch b/patches/gcc10.3/gcc-xtensa-0009-Reflect-the-32-bit-Integer-Divide-Option.patch new file mode 100644 index 0000000..607367c --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0009-Reflect-the-32-bit-Integer-Divide-Option.patch @@ -0,0 +1,41 @@ +From 12fa0b13b6f0c52e5c4d75f39822771a7f780f94 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 6 May 2022 19:34:19 +0900 +Subject: [PATCH 04/31] xtensa: Reflect the 32-bit Integer Divide Option + +On Espressif's ESP8266 (based on Tensilica LX106, no hardware divider), +this patch reduces the size of each: + + __moddi3() @ libgcc.a : 969 -> 301 (saves 668) + __divmoddi4() : 1111 -> 426 (saves 685) + __udivmoddi4() : 1043 -> 319 (saves 724) + +in bytes, respectively. + +gcc/ChangeLog: + + * config/xtensa/xtensa.h (TARGET_HAS_NO_HW_DIVIDE): New macro + definition. +--- + gcc/config/xtensa/xtensa.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index fa86a245e..5b102de51 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -74,6 +74,11 @@ along with GCC; see the file COPYING3. If not see + #define HAVE_AS_TLS 0 + #endif + ++/* Define this if the target has no hardware divide instructions. */ ++#if !TARGET_DIV32 ++#define TARGET_HAS_NO_HW_DIVIDE ++#endif ++ + + /* Target CPU builtins. */ + #define TARGET_CPU_CPP_BUILTINS() \ +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0010-Simplify-EXTUI-instruction-maskimm-validation.patch b/patches/gcc10.3/gcc-xtensa-0010-Simplify-EXTUI-instruction-maskimm-validation.patch new file mode 100644 index 0000000..8d257cd --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0010-Simplify-EXTUI-instruction-maskimm-validation.patch @@ -0,0 +1,78 @@ +From 49383c9381a937b360adeb14f5e7bd4472f7c386 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:26:30 +0900 +Subject: [PATCH 05/31] xtensa: Simplify EXTUI instruction maskimm validations + +No functional changes. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (extui_fldsz_operand): Simplify. + * config/xtensa/xtensa.c (xtensa_mask_immediate, print_operand): + Ditto. +--- + gcc/config/xtensa/predicates.md | 2 +- + gcc/config/xtensa/xtensa.c | 24 +++--------------------- + 2 files changed, 4 insertions(+), 22 deletions(-) + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index eb52b05aa..3f84859b6 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -55,7 +55,7 @@ + + (define_predicate "extui_fldsz_operand" + (and (match_code "const_int") +- (match_test "xtensa_mask_immediate ((1 << INTVAL (op)) - 1)"))) ++ (match_test "IN_RANGE (INTVAL (op), 1, 16)"))) + + (define_predicate "sext_operand" + (if_then_else (match_test "TARGET_SEXT") +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index b1dbe8520..4043f40ce 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -446,19 +446,7 @@ xtensa_b4constu (HOST_WIDE_INT v) + bool + xtensa_mask_immediate (HOST_WIDE_INT v) + { +-#define MAX_MASK_SIZE 16 +- int mask_size; +- +- for (mask_size = 1; mask_size <= MAX_MASK_SIZE; mask_size++) +- { +- if ((v & 1) == 0) +- return false; +- v = v >> 1; +- if (v == 0) +- return true; +- } +- +- return false; ++ return IN_RANGE (exact_log2 (v + 1), 1, 16); + } + + +@@ -2424,17 +2412,11 @@ print_operand (FILE *file, rtx x, int letter) + case 'K': + if (GET_CODE (x) == CONST_INT) + { +- int num_bits = 0; + unsigned val = INTVAL (x); +- while (val & 1) +- { +- num_bits += 1; +- val = val >> 1; +- } +- if ((val != 0) || (num_bits == 0) || (num_bits > 16)) ++ if (!xtensa_mask_immediate (val)) + fatal_insn ("invalid mask", x); + +- fprintf (file, "%d", num_bits); ++ fprintf (file, "%d", floor_log2 (val + 1)); + } + else + output_operand_lossage ("invalid %%K value"); +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0011-Make-use-of-IN_RANGE-macro-where-appropriate.patch b/patches/gcc10.3/gcc-xtensa-0011-Make-use-of-IN_RANGE-macro-where-appropriate.patch new file mode 100644 index 0000000..419ebfe --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0011-Make-use-of-IN_RANGE-macro-where-appropriate.patch @@ -0,0 +1,174 @@ +From fa7073ff572c248896057a5a7841a3e1d98380ad Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:27:36 +0900 +Subject: [PATCH 06/31] xtensa: Make use of IN_RANGE macro where appropriate + +No functional changes. + +gcc/ChangeLog: + + * config/xtensa/constraints.md (M, O): Use the macro. + * config/xtensa/predicates.md (addsubx_operand, extui_fldsz_operand, + sext_fldsz_operand): Ditto. + * config/xtensa/xtensa.c (xtensa_simm8, xtensa_simm8x256, + xtensa_simm12b, xtensa_uimm8, xtensa_uimm8x2, xtensa_uimm8x4, + xtensa_mask_immediate, smalloffset_mem_p, printx, xtensa_call_save_reg, + xtensa_expand_prologue): Ditto. + * config/xtensa/xtensa.h (FUNCTION_ARG_REGNO_P): Ditto. +--- + gcc/config/xtensa/constraints.md | 4 ++-- + gcc/config/xtensa/predicates.md | 5 ++--- + gcc/config/xtensa/xtensa.c | 20 ++++++++++---------- + gcc/config/xtensa/xtensa.h | 2 +- + 4 files changed, 15 insertions(+), 16 deletions(-) + +diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md +index 2062c8816..9a8caab4f 100644 +--- a/gcc/config/xtensa/constraints.md ++++ b/gcc/config/xtensa/constraints.md +@@ -92,7 +92,7 @@ + "An integer constant in the range @minus{}32-95 for use with MOVI.N + instructions." + (and (match_code "const_int") +- (match_test "ival >= -32 && ival <= 95"))) ++ (match_test "IN_RANGE (ival, -32, 95)"))) + + (define_constraint "N" + "An unsigned 8-bit integer constant shifted left by 8 bits for use +@@ -103,7 +103,7 @@ + (define_constraint "O" + "An integer constant that can be used in ADDI.N instructions." + (and (match_code "const_int") +- (match_test "ival == -1 || (ival >= 1 && ival <= 15)"))) ++ (match_test "ival == -1 || IN_RANGE (ival, 1, 15)"))) + + (define_constraint "P" + "An integer constant that can be used as a mask value in an EXTUI +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index 3f84859b6..91b9343a2 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -25,8 +25,7 @@ + + (define_predicate "addsubx_operand" + (and (match_code "const_int") +- (match_test "INTVAL (op) >= 1 +- && INTVAL (op) <= 3"))) ++ (match_test "IN_RANGE (INTVAL (op), 1, 3)"))) + + (define_predicate "arith_operand" + (ior (and (match_code "const_int") +@@ -64,7 +63,7 @@ + + (define_predicate "sext_fldsz_operand" + (and (match_code "const_int") +- (match_test "INTVAL (op) >= 8 && INTVAL (op) <= 23"))) ++ (match_test "IN_RANGE (INTVAL (op), 8, 23)"))) + + (define_predicate "lsbitnum_operand" + (and (match_code "const_int") +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 4043f40ce..02dc5799a 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -341,42 +341,42 @@ struct gcc_target targetm = TARGET_INITIALIZER; + bool + xtensa_simm8 (HOST_WIDE_INT v) + { +- return v >= -128 && v <= 127; ++ return IN_RANGE (v, -128, 127); + } + + + bool + xtensa_simm8x256 (HOST_WIDE_INT v) + { +- return (v & 255) == 0 && (v >= -32768 && v <= 32512); ++ return (v & 255) == 0 && IN_RANGE (v, -32768, 32512); + } + + + bool + xtensa_simm12b (HOST_WIDE_INT v) + { +- return v >= -2048 && v <= 2047; ++ return IN_RANGE (v, -2048, 2047); + } + + + static bool + xtensa_uimm8 (HOST_WIDE_INT v) + { +- return v >= 0 && v <= 255; ++ return IN_RANGE (v, 0, 255); + } + + + static bool + xtensa_uimm8x2 (HOST_WIDE_INT v) + { +- return (v & 1) == 0 && (v >= 0 && v <= 510); ++ return (v & 1) == 0 && IN_RANGE (v, 0, 510); + } + + + static bool + xtensa_uimm8x4 (HOST_WIDE_INT v) + { +- return (v & 3) == 0 && (v >= 0 && v <= 1020); ++ return (v & 3) == 0 && IN_RANGE (v, 0, 1020); + } + + +@@ -527,7 +527,7 @@ smalloffset_mem_p (rtx op) + return FALSE; + + val = INTVAL (offset); +- return (val & 3) == 0 && (val >= 0 && val <= 60); ++ return (val & 3) == 0 && IN_RANGE (val, 0, 60); + } + } + return FALSE; +@@ -2352,7 +2352,7 @@ static void + printx (FILE *file, signed int val) + { + /* Print a hexadecimal value in a nice way. */ +- if ((val > -0xa) && (val < 0xa)) ++ if (IN_RANGE (val, -9, 9)) + fprintf (file, "%d", val); + else if (val < 0) + fprintf (file, "-0x%x", -val); +@@ -2732,7 +2732,7 @@ xtensa_call_save_reg(int regno) + return crtl->profile || !crtl->is_leaf || crtl->calls_eh_return || + df_regs_ever_live_p (regno); + +- if (crtl->calls_eh_return && regno >= 2 && regno < 4) ++ if (crtl->calls_eh_return && IN_RANGE (regno, 2, 3)) + return true; + + return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno); +@@ -2852,7 +2852,7 @@ xtensa_expand_prologue (void) + int callee_save_size = cfun->machine->callee_save_size; + + /* -128 is a limit of single addi instruction. */ +- if (total_size > 0 && total_size <= 128) ++ if (IN_RANGE (total_size, 1, 128)) + { + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-total_size))); +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index 5b102de51..3e9cbc943 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -493,7 +493,7 @@ enum reg_class + used for this purpose since all function arguments are pushed on + the stack. */ + #define FUNCTION_ARG_REGNO_P(N) \ +- ((N) >= GP_OUTGOING_ARG_FIRST && (N) <= GP_OUTGOING_ARG_LAST) ++ IN_RANGE ((N), GP_OUTGOING_ARG_FIRST, GP_OUTGOING_ARG_LAST) + + /* Record the number of argument words seen so far, along with a flag to + indicate whether these are incoming arguments. (FUNCTION_INCOMING_ARG +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0012-Fix-instruction-counting-regarding-block-move.patch b/patches/gcc10.3/gcc-xtensa-0012-Fix-instruction-counting-regarding-block-move.patch new file mode 100644 index 0000000..dae4a21 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0012-Fix-instruction-counting-regarding-block-move.patch @@ -0,0 +1,54 @@ +From 5cda5b41a7646d220f7351226b5da78955b0fc7f Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:29:22 +0900 +Subject: [PATCH 07/31] xtensa: Fix instruction counting regarding block move + expansion + +This patch makes counting the number of instructions of the remainder +(modulo 4) part more accurate. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_expand_block_move): + Make instruction counting more accurate, and simplify emitting insns. +--- + gcc/config/xtensa/xtensa.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 02dc5799a..0fe8b73ad 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1303,7 +1303,7 @@ xtensa_expand_block_move (rtx *operands) + move_ratio = 4; + if (optimize > 2) + move_ratio = LARGEST_MOVE_RATIO; +- num_pieces = (bytes / align) + (bytes % align); /* Close enough anyway. */ ++ num_pieces = (bytes / align) + ((bytes % align + 1) / 2); + if (num_pieces > move_ratio) + return 0; + +@@ -1340,7 +1340,7 @@ xtensa_expand_block_move (rtx *operands) + temp[next] = gen_reg_rtx (mode[next]); + + x = adjust_address (src_mem, mode[next], offset_ld); +- emit_insn (gen_rtx_SET (temp[next], x)); ++ emit_move_insn (temp[next], x); + + offset_ld += next_amount; + bytes -= next_amount; +@@ -1350,9 +1350,9 @@ xtensa_expand_block_move (rtx *operands) + if (active[phase]) + { + active[phase] = false; +- ++ + x = adjust_address (dst_mem, mode[phase], offset_st); +- emit_insn (gen_rtx_SET (x, temp[phase])); ++ emit_move_insn (x, temp[phase]); + + offset_st += amount[phase]; + } +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0013-Add-setmemsi-insn-pattern.patch b/patches/gcc10.3/gcc-xtensa-0013-Add-setmemsi-insn-pattern.patch new file mode 100644 index 0000000..a7212ce --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0013-Add-setmemsi-insn-pattern.patch @@ -0,0 +1,303 @@ +From 02572a935a2cbabc96387289300fb78d61dde555 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 24 May 2022 00:52:44 +0900 +Subject: [PATCH 08/31] xtensa: Add setmemsi insn pattern + +This patch introduces setmemsi insn pattern of two kinds, unrolled loop and +small loop, for fixed small length and constant initialization value. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h + (xtensa_expand_block_set_unrolled_loop, + xtensa_expand_block_set_small_loop): New prototypes. + * config/xtensa/xtensa.c (xtensa_sizeof_MOVI, + xtensa_expand_block_set_unrolled_loop, + xtensa_expand_block_set_small_loop): New functions. + * config/xtensa/xtensa.md (setmemsi): New expansion pattern. + * config/xtensa/xtensa.opt (mlongcalls): Add target mask. +--- + gcc/config/xtensa/xtensa-protos.h | 2 + + gcc/config/xtensa/xtensa.c | 211 ++++++++++++++++++++++++++++++ + gcc/config/xtensa/xtensa.md | 16 +++ + gcc/config/xtensa/xtensa.opt | 2 +- + 4 files changed, 230 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 18d803581..80b1da2bb 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -41,6 +41,8 @@ extern void xtensa_expand_conditional_branch (rtx *, machine_mode); + extern int xtensa_expand_conditional_move (rtx *, int); + extern int xtensa_expand_scc (rtx *, machine_mode); + extern int xtensa_expand_block_move (rtx *); ++extern int xtensa_expand_block_set_unrolled_loop (rtx *); ++extern int xtensa_expand_block_set_small_loop (rtx *); + extern void xtensa_split_operand_pair (rtx *, machine_mode); + extern int xtensa_emit_move_sequence (rtx *, machine_mode); + extern rtx xtensa_copy_incoming_a7 (rtx); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 0fe8b73ad..a6d76a953 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1363,6 +1363,217 @@ xtensa_expand_block_move (rtx *operands) + } + + ++/* Try to expand a block set operation to a sequence of RTL move ++ instructions. If not optimizing, or if the block size is not a ++ constant, or if the block is too large, or if the value to ++ initialize the block with is not a constant, the expansion ++ fails and GCC falls back to calling memset(). ++ ++ operands[0] is the destination ++ operands[1] is the length ++ operands[2] is the initialization value ++ operands[3] is the alignment */ ++ ++static int ++xtensa_sizeof_MOVI (HOST_WIDE_INT imm) ++{ ++ return (TARGET_DENSITY && IN_RANGE (imm, -32, 95)) ? 2 : 3; ++} ++ ++int ++xtensa_expand_block_set_unrolled_loop (rtx *operands) ++{ ++ rtx dst_mem = operands[0]; ++ HOST_WIDE_INT bytes, value, align; ++ int expand_len, funccall_len; ++ rtx x, reg; ++ int offset; ++ ++ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) ++ return 0; ++ ++ bytes = INTVAL (operands[1]); ++ if (bytes <= 0) ++ return 0; ++ value = (int8_t)INTVAL (operands[2]); ++ align = INTVAL (operands[3]); ++ if (align > MOVE_MAX) ++ align = MOVE_MAX; ++ ++ /* Insn expansion: holding the init value. ++ Either MOV(.N) or L32R w/litpool. */ ++ if (align == 1) ++ expand_len = xtensa_sizeof_MOVI (value); ++ else if (value == 0 || value == -1) ++ expand_len = TARGET_DENSITY ? 2 : 3; ++ else ++ expand_len = 3 + 4; ++ /* Insn expansion: a series of aligned memory stores. ++ Consist of S8I, S16I or S32I(.N). */ ++ expand_len += (bytes / align) * (TARGET_DENSITY ++ && align == 4 ? 2 : 3); ++ /* Insn expansion: the remainder, sub-aligned memory stores. ++ A combination of S8I and S16I as needed. */ ++ expand_len += ((bytes % align + 1) / 2) * 3; ++ ++ /* Function call: preparing two arguments. */ ++ funccall_len = xtensa_sizeof_MOVI (value); ++ funccall_len += xtensa_sizeof_MOVI (bytes); ++ /* Function call: calling memset(). */ ++ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; ++ ++ /* Apply expansion bonus (2x) if optimizing for speed. */ ++ if (optimize > 1 && !optimize_size) ++ funccall_len *= 2; ++ ++ /* Decide whether to expand or not, based on the sum of the length ++ of instructions. */ ++ if (expand_len > funccall_len) ++ return 0; ++ ++ x = XEXP (dst_mem, 0); ++ if (!REG_P (x)) ++ dst_mem = replace_equiv_address (dst_mem, force_reg (Pmode, x)); ++ switch (align) ++ { ++ case 1: ++ break; ++ case 2: ++ value = (int16_t)((uint8_t)value * 0x0101U); ++ break; ++ case 4: ++ value = (int32_t)((uint8_t)value * 0x01010101U); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ reg = force_reg (SImode, GEN_INT (value)); ++ ++ offset = 0; ++ do ++ { ++ int unit_size = MIN (bytes, align); ++ machine_mode unit_mode = (unit_size >= 4 ? SImode : ++ (unit_size >= 2 ? HImode : ++ QImode)); ++ unit_size = GET_MODE_SIZE (unit_mode); ++ ++ emit_move_insn (adjust_address (dst_mem, unit_mode, offset), ++ unit_mode == SImode ? reg ++ : convert_to_mode (unit_mode, reg, true)); ++ ++ offset += unit_size; ++ bytes -= unit_size; ++ } ++ while (bytes > 0); ++ ++ return 1; ++} ++ ++int ++xtensa_expand_block_set_small_loop (rtx *operands) ++{ ++ HOST_WIDE_INT bytes, value, align; ++ int expand_len, funccall_len; ++ rtx x, dst, end, reg; ++ machine_mode unit_mode; ++ rtx_code_label *label; ++ ++ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) ++ return 0; ++ ++ bytes = INTVAL (operands[1]); ++ if (bytes <= 0) ++ return 0; ++ value = (int8_t)INTVAL (operands[2]); ++ align = INTVAL (operands[3]); ++ if (align > MOVE_MAX) ++ align = MOVE_MAX; ++ ++ /* Totally-aligned block only. */ ++ if (bytes % align != 0) ++ return 0; ++ ++ /* If 4-byte aligned, small loop substitution is almost optimal, thus ++ limited to only offset to the end address for ADDI/ADDMI instruction. */ ++ if (align == 4 ++ && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) ++ return 0; ++ ++ /* If no 4-byte aligned, loop count should be treated as the constraint. */ ++ if (align != 4 ++ && bytes / align > ((optimize > 1 && !optimize_size) ? 8 : 15)) ++ return 0; ++ ++ /* Insn expansion: holding the init value. ++ Either MOV(.N) or L32R w/litpool. */ ++ if (align == 1) ++ expand_len = xtensa_sizeof_MOVI (value); ++ else if (value == 0 || value == -1) ++ expand_len = TARGET_DENSITY ? 2 : 3; ++ else ++ expand_len = 3 + 4; ++ /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ ++ expand_len += bytes > 127 ? 3 ++ : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; ++ ++ /* Insn expansion: the loop body and branch instruction. ++ For store, one of S8I, S16I or S32I(.N). ++ For advance, ADDI(.N). ++ For branch, BNE. */ ++ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) ++ + (TARGET_DENSITY ? 2 : 3) + 3; ++ ++ /* Function call: preparing two arguments. */ ++ funccall_len = xtensa_sizeof_MOVI (value); ++ funccall_len += xtensa_sizeof_MOVI (bytes); ++ /* Function call: calling memset(). */ ++ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; ++ ++ /* Apply expansion bonus (2x) if optimizing for speed. */ ++ if (optimize > 1 && !optimize_size) ++ funccall_len *= 2; ++ ++ /* Decide whether to expand or not, based on the sum of the length ++ of instructions. */ ++ if (expand_len > funccall_len) ++ return 0; ++ ++ x = XEXP (operands[0], 0); ++ if (!REG_P (x)) ++ x = XEXP (replace_equiv_address (operands[0], force_reg (Pmode, x)), 0); ++ dst = gen_reg_rtx (SImode); ++ emit_move_insn (dst, x); ++ end = gen_reg_rtx (SImode); ++ emit_insn (gen_addsi3 (end, dst, operands[1] /* the length */)); ++ switch (align) ++ { ++ case 1: ++ unit_mode = QImode; ++ break; ++ case 2: ++ value = (int16_t)((uint8_t)value * 0x0101U); ++ unit_mode = HImode; ++ break; ++ case 4: ++ value = (int32_t)((uint8_t)value * 0x01010101U); ++ unit_mode = SImode; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ reg = force_reg (unit_mode, GEN_INT (value)); ++ ++ label = gen_label_rtx (); ++ emit_label (label); ++ emit_move_insn (gen_rtx_MEM (unit_mode, dst), reg); ++ emit_insn (gen_addsi3 (dst, dst, GEN_INT (align))); ++ emit_cmp_and_jump_insns (dst, end, NE, const0_rtx, SImode, true, label); ++ ++ return 1; ++} ++ ++ + void + xtensa_expand_nonlocal_goto (rtx *operands) + { +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 251c313d5..9eb689efa 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1085,6 +1085,22 @@ + DONE; + }) + ++;; Block sets ++ ++(define_expand "setmemsi" ++ [(match_operand:BLK 0 "memory_operand") ++ (match_operand:SI 1 "") ++ (match_operand:SI 2 "") ++ (match_operand:SI 3 "const_int_operand")] ++ "!optimize_debug && optimize" ++{ ++ if (xtensa_expand_block_set_unrolled_loop (operands)) ++ DONE; ++ if (xtensa_expand_block_set_small_loop (operands)) ++ DONE; ++ FAIL; ++}) ++ + + ;; Shift instructions. + +diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt +index aef67970b..e1d992f5d 100644 +--- a/gcc/config/xtensa/xtensa.opt ++++ b/gcc/config/xtensa/xtensa.opt +@@ -27,7 +27,7 @@ Target Report Mask(FORCE_NO_PIC) + Disable position-independent code (PIC) for use in OS kernel code. + + mlongcalls +-Target ++Target Mask(LONGCALLS) + Use indirect CALLXn instructions for large programs. + + mtarget-align +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0014-Improve-bswap-sd-i2-insn-patterns.patch b/patches/gcc10.3/gcc-xtensa-0014-Improve-bswap-sd-i2-insn-patterns.patch new file mode 100644 index 0000000..a5fb6f1 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0014-Improve-bswap-sd-i2-insn-patterns.patch @@ -0,0 +1,254 @@ +From be1ca3aa6e9754ed16d1b7a60657912af02844da Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 13 May 2022 22:33:59 +0900 +Subject: [PATCH 09/31] xtensa: Improve bswap[sd]i2 insn patterns + +This patch makes bswap[sd]i2 better register allocation, and reconstructs +bswapsi2 in order to take advantage of GIMPLE manual byte-swapping +recognition. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (bswapsi2): New expansion pattern. + (bswapsi2_internal): Revise the template and condition, and add + detection code for preceding the same insn in order to omit a + "SSAI 8" instruction of the latter. + (bswapdi2): Suppress built-in insn expansion with the corresponding + library call when optimizing for size. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/bswap-O1.c: New. + * gcc.target/xtensa/bswap-O2.c: Ditto. + * gcc.target/xtensa/bswap-Os.c: Ditto. +--- + gcc/config/xtensa/xtensa.md | 77 +++++++++++++++++----- + gcc/testsuite/gcc.target/xtensa/bswap-O1.c | 37 +++++++++++ + gcc/testsuite/gcc.target/xtensa/bswap-O2.c | 37 +++++++++++ + gcc/testsuite/gcc.target/xtensa/bswap-Os.c | 37 +++++++++++ + 4 files changed, 172 insertions(+), 16 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O1.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-O2.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-Os.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 9eb689efa..cea280061 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -471,23 +471,68 @@ + + ;; Byte swap. + +-(define_insn "bswapsi2" +- [(set (match_operand:SI 0 "register_operand" "=&a") +- (bswap:SI (match_operand:SI 1 "register_operand" "r")))] +- "!optimize_size" +- "ssai\t8\;srli\t%0, %1, 16\;src\t%0, %0, %1\;src\t%0, %0, %0\;src\t%0, %1, %0" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "15")]) ++(define_expand "bswapsi2" ++ [(set (match_operand:SI 0 "register_operand" "") ++ (bswap:SI (match_operand:SI 1 "register_operand" "")))] ++ "!optimize_debug && optimize > 1" ++{ ++ /* GIMPLE manual byte-swapping recognition is now activated. ++ For both built-in and manual bswaps, emit corresponding library call ++ if optimizing for size, or a series of dedicated machine instructions ++ if otherwise. */ ++ if (optimize_size) ++ emit_library_call_value (optab_libfunc (bswap_optab, SImode), ++ operands[0], LCT_NORMAL, SImode, ++ operands[1], SImode); ++ else ++ emit_insn (gen_bswapsi2_internal (operands[0], operands[1])); ++ DONE; ++}) + +-(define_insn "bswapdi2" +- [(set (match_operand:DI 0 "register_operand" "=&a") +- (bswap:DI (match_operand:DI 1 "register_operand" "r")))] +- "!optimize_size" +- "ssai\t8\;srli\t%0, %D1, 16\;src\t%0, %0, %D1\;src\t%0, %0, %0\;src\t%0, %D1, %0\;srli\t%D0, %1, 16\;src\t%D0, %D0, %1\;src\t%D0, %D0, %D0\;src\t%D0, %1, %D0" +- [(set_attr "type" "arith") +- (set_attr "mode" "DI") +- (set_attr "length" "27")]) ++(define_insn "bswapsi2_internal" ++ [(set (match_operand:SI 0 "register_operand" "=a,&a") ++ (bswap:SI (match_operand:SI 1 "register_operand" "0,r"))) ++ (clobber (match_scratch:SI 2 "=&a,X"))] ++ "!optimize_debug && optimize > 1 && !optimize_size" ++{ ++ rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn); ++ const char *init = "ssai\t8\;"; ++ static char result[64]; ++ if (prev_insn && NONJUMP_INSN_P (prev_insn)) ++ { ++ rtx x = PATTERN (prev_insn); ++ if (GET_CODE (x) == PARALLEL && XVECLEN (x, 0) == 2 ++ && GET_CODE (XVECEXP (x, 0, 0)) == SET ++ && GET_CODE (XVECEXP (x, 0, 1)) == CLOBBER) ++ { ++ x = XEXP (XVECEXP (x, 0, 0), 1); ++ if (GET_CODE (x) == BSWAP && GET_MODE (x) == SImode) ++ init = ""; ++ } ++ } ++ sprintf (result, ++ (which_alternative == 0) ++ ? "%s" "srli\t%%2, %%1, 16\;src\t%%2, %%2, %%1\;src\t%%2, %%2, %%2\;src\t%%0, %%1, %%2" ++ : "%s" "srli\t%%0, %%1, 16\;src\t%%0, %%0, %%1\;src\t%%0, %%0, %%0\;src\t%%0, %%1, %%0", ++ init); ++ return result; ++} ++ [(set_attr "type" "arith,arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "15,15")]) ++ ++(define_expand "bswapdi2" ++ [(set (match_operand:DI 0 "register_operand" "") ++ (bswap:DI (match_operand:DI 1 "register_operand" "")))] ++ "!optimize_debug && optimize > 1 && optimize_size" ++{ ++ /* Replace with a single DImode library call. ++ Without this, two SImode library calls are emitted. */ ++ emit_library_call_value (optab_libfunc (bswap_optab, DImode), ++ operands[0], LCT_NORMAL, DImode, ++ operands[1], DImode); ++ DONE; ++}) + + + ;; Negation and one's complement. +diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O1.c b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c +new file mode 100644 +index 000000000..a0c885baa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/bswap-O1.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++unsigned int test_0(unsigned int a) ++{ ++ return (a & 0x000000FF) << 24 | ++ (a & 0x0000FF00) << 8 | ++ (a & 0x00FF0000) >> 8 | ++ (a & 0xFF000000) >> 24; ++} ++ ++unsigned int test_1(unsigned int a) ++{ ++ union ++ { ++ unsigned int i; ++ unsigned char a[4]; ++ } u, v; ++ u.i = a; ++ v.a[0] = u.a[3]; ++ v.a[1] = u.a[2]; ++ v.a[2] = u.a[1]; ++ v.a[3] = u.a[0]; ++ return v.i; ++} ++ ++unsigned int test_2(unsigned int a) ++{ ++ return __builtin_bswap32(a); ++} ++ ++unsigned long long test_3(unsigned long long a) ++{ ++ return __builtin_bswap64(a); ++} ++ ++/* { dg-final { scan-assembler-times "call" 2 } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-O2.c b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c +new file mode 100644 +index 000000000..4cf95b925 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/bswap-O2.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++unsigned int test_0(unsigned int a) ++{ ++ return (a & 0x000000FF) << 24 | ++ (a & 0x0000FF00) << 8 | ++ (a & 0x00FF0000) >> 8 | ++ (a & 0xFF000000) >> 24; ++} ++ ++unsigned int test_1(unsigned int a) ++{ ++ union ++ { ++ unsigned int i; ++ unsigned char a[4]; ++ } u, v; ++ u.i = a; ++ v.a[0] = u.a[3]; ++ v.a[1] = u.a[2]; ++ v.a[2] = u.a[1]; ++ v.a[3] = u.a[0]; ++ return v.i; ++} ++ ++unsigned int test_2(unsigned int a) ++{ ++ return __builtin_bswap32(a); ++} ++ ++unsigned long long test_3(unsigned long long a) ++{ ++ return __builtin_bswap64(a); ++} ++ ++/* { dg-final { scan-assembler-times "ssai" 4 } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-Os.c b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c +new file mode 100644 +index 000000000..1e010fd62 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/bswap-Os.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Os" } */ ++ ++unsigned int test_0(unsigned int a) ++{ ++ return (a & 0x000000FF) << 24 | ++ (a & 0x0000FF00) << 8 | ++ (a & 0x00FF0000) >> 8 | ++ (a & 0xFF000000) >> 24; ++} ++ ++unsigned int test_1(unsigned int a) ++{ ++ union ++ { ++ unsigned int i; ++ unsigned char a[4]; ++ } u, v; ++ u.i = a; ++ v.a[0] = u.a[3]; ++ v.a[1] = u.a[2]; ++ v.a[2] = u.a[1]; ++ v.a[3] = u.a[0]; ++ return v.i; ++} ++ ++unsigned int test_2(unsigned int a) ++{ ++ return __builtin_bswap32(a); ++} ++ ++unsigned long long test_3(unsigned long long a) ++{ ++ return __builtin_bswap64(a); ++} ++ ++/* { dg-final { scan-assembler-times "call" 4 } } */ +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0015-fix-PR-target-105879.patch b/patches/gcc10.3/gcc-xtensa-0015-fix-PR-target-105879.patch new file mode 100644 index 0000000..2c21f47 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0015-fix-PR-target-105879.patch @@ -0,0 +1,48 @@ +From 1848b547a6ac69a002d068239a5bc9463f3fae25 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Tue, 7 Jun 2022 21:01:01 -0700 +Subject: [PATCH 10/31] gcc: xtensa: fix PR target/105879 + +split_double operates with the 'word that comes first in memory in the +target' terminology, while gen_lowpart operates with the 'value +representing some low-order bits of X' terminology. They are not +equivalent and must be dealt with differently on little- and big-endian +targets. + +gcc/ + PR target/105879 + * config/xtensa/xtensa.md (movdi): Rename 'first' and 'second' + to 'lowpart' and 'highpart' so that they match 'gen_lowpart' and + 'gen_highpart' bitwise semantics and fix order of highpart and + lowpart depending on target endianness. +--- + gcc/config/xtensa/xtensa.md | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index cea280061..30d8ef96c 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -802,11 +802,14 @@ + because of offering further optimization opportunities. */ + if (register_operand (operands[0], DImode)) + { +- rtx first, second; +- +- split_double (operands[1], &first, &second); +- emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), first)); +- emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), second)); ++ rtx lowpart, highpart; ++ ++ if (TARGET_BIG_ENDIAN) ++ split_double (operands[1], &highpart, &lowpart); ++ else ++ split_double (operands[1], &lowpart, &highpart); ++ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), lowpart)); ++ emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), highpart)); + DONE; + } + +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0016-Implement-bswaphi2-insn-pattern.patch b/patches/gcc10.3/gcc-xtensa-0016-Implement-bswaphi2-insn-pattern.patch new file mode 100644 index 0000000..3a31e62 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0016-Implement-bswaphi2-insn-pattern.patch @@ -0,0 +1,39 @@ +From f47a902c9a94d2e9df879de4613dae62c8e9cc4f Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:44:32 +0900 +Subject: [PATCH 11/31] xtensa: Implement bswaphi2 insn pattern + +This patch adds bswaphi2 insn pattern that is one instruction less than the +default expansion. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (bswaphi2): New insn pattern. +--- + gcc/config/xtensa/xtensa.md | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 30d8ef96c..c1f44777d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -471,6 +471,16 @@ + + ;; Byte swap. + ++(define_insn "bswaphi2" ++ [(set (match_operand:HI 0 "register_operand" "=a") ++ (bswap:HI (match_operand:HI 1 "register_operand" "r"))) ++ (clobber (match_scratch:HI 2 "=&a"))] ++ "" ++ "extui\t%2, %1, 8, 8\;slli\t%0, %1, 8\;or\t%0, %0, %2" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "HI") ++ (set_attr "length" "9")]) ++ + (define_expand "bswapsi2" + [(set (match_operand:SI 0 "register_operand" "") + (bswap:SI (match_operand:SI 1 "register_operand" "")))] +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0017-Make-one_cmplsi2-optimizer-friendly.patch b/patches/gcc10.3/gcc-xtensa-0017-Make-one_cmplsi2-optimizer-friendly.patch new file mode 100644 index 0000000..017a30f --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0017-Make-one_cmplsi2-optimizer-friendly.patch @@ -0,0 +1,86 @@ +From 22b5756399ef63a4102334724b12a4c186075227 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:46:16 +0900 +Subject: [PATCH 12/31] xtensa: Make one_cmplsi2 optimizer-friendly + +In Xtensa ISA, there is no single machine instruction that calculates unary +bitwise negation. But a few optimizers assume that bitwise negation can be +done by a single insn. + +As a result, '((x < 0) ? ~x : x)' cannot be optimized to '(x ^ (x >> 31))' +ever before, for example. + +This patch relaxes such limitation, by putting the insn expansion off till +the split pass. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (one_cmplsi2): + Rearrange as an insn_and_split pattern. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/one_cmpl_abs.c: New. +--- + gcc/config/xtensa/xtensa.md | 26 +++++++++++++------ + .../gcc.target/xtensa/one_cmpl_abs.c | 9 +++++++ + 2 files changed, 27 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index c1f44777d..2f6d48d03 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -556,16 +556,26 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +-(define_expand "one_cmplsi2" +- [(set (match_operand:SI 0 "register_operand" "") +- (not:SI (match_operand:SI 1 "register_operand" "")))] ++(define_insn_and_split "one_cmplsi2" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (not:SI (match_operand:SI 1 "register_operand" "r")))] + "" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 2) ++ (const_int -1)) ++ (set (match_dup 0) ++ (xor:SI (match_dup 1) ++ (match_dup 2)))] + { +- rtx temp = gen_reg_rtx (SImode); +- emit_insn (gen_movsi (temp, constm1_rtx)); +- emit_insn (gen_xorsi3 (operands[0], temp, operands[1])); +- DONE; +-}) ++ operands[2] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 5) ++ (const_int 6)))]) + + (define_insn "negsf2" + [(set (match_operand:SF 0 "register_operand" "=f") +diff --git a/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c +new file mode 100644 +index 000000000..608f65fd7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/one_cmpl_abs.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++int one_cmpl_abs(int a) ++{ ++ return a < 0 ? ~a : a; ++} ++ ++/* { dg-final { scan-assembler-not "bgez" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0018-Optimize-x-y-to-x-y-y.patch b/patches/gcc10.3/gcc-xtensa-0018-Optimize-x-y-to-x-y-y.patch new file mode 100644 index 0000000..d1167a1 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0018-Optimize-x-y-to-x-y-y.patch @@ -0,0 +1,71 @@ +From cc259b2801c8d04c39169214041305fdd5b87acd Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:55:44 +0900 +Subject: [PATCH 13/31] xtensa: Optimize '(~x & y)' to '((x & y) ^ y)' + +In Xtensa ISA, there is no single machine instruction that calculates unary +bitwise negation. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*andsi3_bitcmpl): + New insn_and_split pattern. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/check_zero_byte.c: New. +--- + gcc/config/xtensa/xtensa.md | 20 +++++++++++++++++++ + .../gcc.target/xtensa/check_zero_byte.c | 9 +++++++++ + 2 files changed, 29 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/xtensa/check_zero_byte.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2f6d48d03..28ed1d34e 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -601,6 +601,26 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,3")]) + ++(define_insn_and_split "*andsi3_bitcmpl" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (not:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "r")))] ++ "" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 3) ++ (and:SI (match_dup 1) ++ (match_dup 2))) ++ (set (match_dup 0) ++ (xor:SI (match_dup 3) ++ (match_dup 2)))] ++{ ++ operands[3] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ + (define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "=a") + (ior:SI (match_operand:SI 1 "register_operand" "%r") +diff --git a/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c +new file mode 100644 +index 000000000..6a04aaeef +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/check_zero_byte.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O" } */ ++ ++int check_zero_byte(int v) ++{ ++ return (v - 0x01010101) & ~v & 0x80808080; ++} ++ ++/* { dg-final { scan-assembler-not "movi" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0019-Add-clrsbsi2-insn-pattern.patch b/patches/gcc10.3/gcc-xtensa-0019-Add-clrsbsi2-insn-pattern.patch new file mode 100644 index 0000000..ebaa985 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0019-Add-clrsbsi2-insn-pattern.patch @@ -0,0 +1,98 @@ +From ebd48d915076589f04b5c1ed50f9f5ddfae088e8 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 29 May 2022 19:57:35 +0900 +Subject: [PATCH 14/31] xtensa: Add clrsbsi2 insn pattern + +> (clrsb:m x) +> Represents the number of redundant leading sign bits in x, represented +> as an integer of mode m, starting at the most significant bit position. + +This explanation is just what the NSA instruction (not ever emitted before) +calculates in Xtensa ISA. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (clrsbsi2): New insn pattern. + +libgcc/ChangeLog: + + * config/xtensa/lib1funcs.S (__clrsbsi2): New function. + * config/xtensa/t-xtensa (LIB1ASMFUNCS): Add _clrsbsi2. +--- + gcc/config/xtensa/xtensa.md | 12 +++++++++++- + libgcc/config/xtensa/lib1funcs.S | 23 +++++++++++++++++++++++ + libgcc/config/xtensa/t-xtensa | 2 +- + 3 files changed, 35 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 28ed1d34e..6c76fb942 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -429,7 +429,17 @@ + (set_attr "length" "3")]) + + +-;; Count leading/trailing zeros and find first bit. ++;; Count redundant leading sign bits and leading/trailing zeros, ++;; and find first bit. ++ ++(define_insn "clrsbsi2" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (clrsb:SI (match_operand:SI 1 "register_operand" "r")))] ++ "TARGET_NSA" ++ "nsa\t%0, %1" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "3")]) + + (define_insn "clzsi2" + [(set (match_operand:SI 0 "register_operand" "=a") +diff --git a/libgcc/config/xtensa/lib1funcs.S b/libgcc/config/xtensa/lib1funcs.S +index b19deae14..ad9072c40 100644 +--- a/libgcc/config/xtensa/lib1funcs.S ++++ b/libgcc/config/xtensa/lib1funcs.S +@@ -456,6 +456,29 @@ __nsau_data: + #endif /* L_clz */ + + ++#ifdef L_clrsbsi2 ++ .align 4 ++ .global __clrsbsi2 ++ .type __clrsbsi2, @function ++__clrsbsi2: ++ leaf_entry sp, 16 ++#if XCHAL_HAVE_NSA ++ nsa a2, a2 ++#else ++ srai a3, a2, 31 ++ xor a3, a3, a2 ++ movi a2, 31 ++ beqz a3, .Lreturn ++ do_nsau a2, a3, a4, a5 ++ addi a2, a2, -1 ++.Lreturn: ++#endif ++ leaf_return ++ .size __clrsbsi2, . - __clrsbsi2 ++ ++#endif /* L_clrsbsi2 */ ++ ++ + #ifdef L_clzsi2 + .align 4 + .global __clzsi2 +diff --git a/libgcc/config/xtensa/t-xtensa b/libgcc/config/xtensa/t-xtensa +index 9836c96ae..084618b38 100644 +--- a/libgcc/config/xtensa/t-xtensa ++++ b/libgcc/config/xtensa/t-xtensa +@@ -1,6 +1,6 @@ + LIB1ASMSRC = xtensa/lib1funcs.S + LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \ +- _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \ ++ _umulsidi3 _clz _clrsbsi2 _clzsi2 _ctzsi2 _ffssi2 \ + _ashldi3 _ashrdi3 _lshrdi3 \ + _bswapsi2 _bswapdi2 \ + _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \ +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0020-Tweak-some-widen-multiplications.patch b/patches/gcc10.3/gcc-xtensa-0020-Tweak-some-widen-multiplications.patch new file mode 100644 index 0000000..8de8a89 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0020-Tweak-some-widen-multiplications.patch @@ -0,0 +1,110 @@ +From 1ba9369255749ccf9ec82565a192b1a523b0e374 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 10 Jun 2022 13:17:40 +0900 +Subject: [PATCH 15/31] xtensa: Tweak some widen multiplications + +umulsidi3 is faster than umuldi3 even if library call, and is also +prerequisite for fast constant division by multiplication. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (mulsidi3, umulsidi3): + Split into individual signedness, in order to use libcall + "__umulsidi3" but not the other. + (mulhisi3): Merge into one by using code iterator. + (mulsidi3, mulhisi3, umulhisi3): Remove. +--- + gcc/config/xtensa/xtensa.md | 56 +++++++++++++++++++++---------------- + 1 file changed, 32 insertions(+), 24 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 6c76fb942..3314b3fd6 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -224,20 +224,42 @@ + + ;; Multiplication. + +-(define_expand "mulsidi3" ++(define_expand "mulsidi3" + [(set (match_operand:DI 0 "register_operand") +- (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand")) +- (any_extend:DI (match_operand:SI 2 "register_operand"))))] ++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand")) ++ (sign_extend:DI (match_operand:SI 2 "register_operand"))))] + "TARGET_MUL32_HIGH" + { + rtx temp = gen_reg_rtx (SImode); + emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); +- emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), +- operands[1], operands[2])); ++ emit_insn (gen_mulsi3_highpart (gen_highpart (SImode, operands[0]), ++ operands[1], operands[2])); + emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); + DONE; + }) + ++(define_expand "umulsidi3" ++ [(set (match_operand:DI 0 "register_operand") ++ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand")) ++ (zero_extend:DI (match_operand:SI 2 "register_operand"))))] ++ "" ++{ ++ if (TARGET_MUL32_HIGH) ++ { ++ rtx temp = gen_reg_rtx (SImode); ++ emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); ++ emit_insn (gen_umulsi3_highpart (gen_highpart (SImode, operands[0]), ++ operands[1], operands[2])); ++ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); ++ } ++ else ++ emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "__umulsidi3"), ++ operands[0], LCT_NORMAL, DImode, ++ operands[1], SImode, ++ operands[2], SImode); ++ DONE; ++}) ++ + (define_insn "mulsi3_highpart" + [(set (match_operand:SI 0 "register_operand" "=a") + (truncate:SI +@@ -261,30 +283,16 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +-(define_insn "mulhisi3" +- [(set (match_operand:SI 0 "register_operand" "=C,A") +- (mult:SI (sign_extend:SI +- (match_operand:HI 1 "register_operand" "%r,r")) +- (sign_extend:SI +- (match_operand:HI 2 "register_operand" "r,r"))))] +- "TARGET_MUL16 || TARGET_MAC16" +- "@ +- mul16s\t%0, %1, %2 +- mul.aa.ll\t%1, %2" +- [(set_attr "type" "mul16,mac16") +- (set_attr "mode" "SI") +- (set_attr "length" "3,3")]) +- +-(define_insn "umulhisi3" ++(define_insn "mulhisi3" + [(set (match_operand:SI 0 "register_operand" "=C,A") +- (mult:SI (zero_extend:SI ++ (mult:SI (any_extend:SI + (match_operand:HI 1 "register_operand" "%r,r")) +- (zero_extend:SI ++ (any_extend:SI + (match_operand:HI 2 "register_operand" "r,r"))))] + "TARGET_MUL16 || TARGET_MAC16" + "@ +- mul16u\t%0, %1, %2 +- umul.aa.ll\t%1, %2" ++ mul16\t%0, %1, %2 ++ mul.aa.ll\t%1, %2" + [(set_attr "type" "mul16,mac16") + (set_attr "mode" "SI") + (set_attr "length" "3,3")]) +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0021-Consider-the-Loop-Option-when-setmemsi-is-exp.patch b/patches/gcc10.3/gcc-xtensa-0021-Consider-the-Loop-Option-when-setmemsi-is-exp.patch new file mode 100644 index 0000000..491da47 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0021-Consider-the-Loop-Option-when-setmemsi-is-exp.patch @@ -0,0 +1,125 @@ +From bc108c84544d5a0e6289628e8749a92c9695f006 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 10 Jun 2022 13:18:24 +0900 +Subject: [PATCH 16/31] xtensa: Consider the Loop Option when setmemsi is + expanded to small loop + +Now apply to almost any size of aligned block under such circumstances. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_expand_block_set_small_loop): + Pass through the block length / loop count conditions if + zero-overhead looping is configured and active, +--- + gcc/config/xtensa/xtensa.c | 71 +++++++++++++++++++++++++++----------- + 1 file changed, 50 insertions(+), 21 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index a6d76a953..e2f97b79c 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1473,7 +1473,7 @@ xtensa_expand_block_set_unrolled_loop (rtx *operands) + int + xtensa_expand_block_set_small_loop (rtx *operands) + { +- HOST_WIDE_INT bytes, value, align; ++ HOST_WIDE_INT bytes, value, align, count; + int expand_len, funccall_len; + rtx x, dst, end, reg; + machine_mode unit_mode; +@@ -1493,17 +1493,25 @@ xtensa_expand_block_set_small_loop (rtx *operands) + /* Totally-aligned block only. */ + if (bytes % align != 0) + return 0; ++ count = bytes / align; + +- /* If 4-byte aligned, small loop substitution is almost optimal, thus +- limited to only offset to the end address for ADDI/ADDMI instruction. */ +- if (align == 4 +- && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) +- return 0; ++ /* If the Loop Option (zero-overhead looping) is configured and active, ++ almost no restrictions about the length of the block. */ ++ if (! (TARGET_LOOPS && optimize)) ++ { ++ /* If 4-byte aligned, small loop substitution is almost optimal, ++ thus limited to only offset to the end address for ADDI/ADDMI ++ instruction. */ ++ if (align == 4 ++ && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) ++ return 0; + +- /* If no 4-byte aligned, loop count should be treated as the constraint. */ +- if (align != 4 +- && bytes / align > ((optimize > 1 && !optimize_size) ? 8 : 15)) +- return 0; ++ /* If no 4-byte aligned, loop count should be treated as the ++ constraint. */ ++ if (align != 4 ++ && count > ((optimize > 1 && !optimize_size) ? 8 : 15)) ++ return 0; ++ } + + /* Insn expansion: holding the init value. + Either MOV(.N) or L32R w/litpool. */ +@@ -1513,16 +1521,33 @@ xtensa_expand_block_set_small_loop (rtx *operands) + expand_len = TARGET_DENSITY ? 2 : 3; + else + expand_len = 3 + 4; +- /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ +- expand_len += bytes > 127 ? 3 +- : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; +- +- /* Insn expansion: the loop body and branch instruction. +- For store, one of S8I, S16I or S32I(.N). +- For advance, ADDI(.N). +- For branch, BNE. */ +- expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) +- + (TARGET_DENSITY ? 2 : 3) + 3; ++ if (TARGET_LOOPS && optimize) /* zero-overhead looping */ ++ { ++ /* Insn translation: Either MOV(.N) or L32R w/litpool for the ++ loop count. */ ++ expand_len += xtensa_simm12b (count) ? xtensa_sizeof_MOVI (count) ++ : 3 + 4; ++ /* Insn translation: LOOP, the zero-overhead looping setup ++ instruction. */ ++ expand_len += 3; ++ /* Insn expansion: the loop body instructions. ++ For store, one of S8I, S16I or S32I(.N). ++ For advance, ADDI(.N). */ ++ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) ++ + (TARGET_DENSITY ? 2 : 3); ++ } ++ else /* NO zero-overhead looping */ ++ { ++ /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ ++ expand_len += bytes > 127 ? 3 ++ : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; ++ /* Insn expansion: the loop body and branch instruction. ++ For store, one of S8I, S16I or S32I(.N). ++ For advance, ADDI(.N). ++ For branch, BNE. */ ++ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) ++ + (TARGET_DENSITY ? 2 : 3) + 3; ++ } + + /* Function call: preparing two arguments. */ + funccall_len = xtensa_sizeof_MOVI (value); +@@ -1545,7 +1570,11 @@ xtensa_expand_block_set_small_loop (rtx *operands) + dst = gen_reg_rtx (SImode); + emit_move_insn (dst, x); + end = gen_reg_rtx (SImode); +- emit_insn (gen_addsi3 (end, dst, operands[1] /* the length */)); ++ if (TARGET_LOOPS && optimize) ++ x = force_reg (SImode, operands[1] /* the length */); ++ else ++ x = operands[1]; ++ emit_insn (gen_addsi3 (end, dst, x)); + switch (align) + { + case 1: +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0022-Improve-instruction-cost-estimation-and-sugge.patch b/patches/gcc10.3/gcc-xtensa-0022-Improve-instruction-cost-estimation-and-sugge.patch new file mode 100644 index 0000000..5792a6f --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0022-Improve-instruction-cost-estimation-and-sugge.patch @@ -0,0 +1,325 @@ +From de854e2348b8159bc389471e68023986c8878c92 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 10 Jun 2022 13:19:32 +0900 +Subject: [PATCH 17/31] xtensa: Improve instruction cost estimation and + suggestion + +This patch implements a new target-specific relative RTL insn cost function +because of suboptimal cost estimation by default, and fixes several "length" +insn attributes (related to the cost estimation). + +And also introduces a new machine-dependent option "-mextra-l32r-costs=" +that tells implementation-specific InstRAM/ROM access penalty for L32R +instruction to the compiler (in clock-cycle units, 0 by default). + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_rtx_costs): Correct wrong case + for ABS and NEG, add missing case for BSWAP and CLRSB, and + double the costs for integer divisions using libfuncs if + optimizing for speed, in order to take advantage of fast constant + division by multiplication. + (TARGET_INSN_COST): New macro definition. + (xtensa_is_insn_L32R_p, xtensa_insn_cost): New functions for + calculating relative costs of a RTL insns, for both of speed and + size. + * config/xtensa/xtensa.md (return, nop, trap): Correct values of + the attribute "length" that depends on TARGET_DENSITY. + (define_asm_attributes, blockage, frame_blockage): Add missing + attributes. + * config/xtensa/xtensa.opt (-mextra-l32r-costs=): New machine- + dependent option, however, preparatory work for now. +--- + gcc/config/xtensa/xtensa.c | 116 ++++++++++++++++++++++++++++++++--- + gcc/config/xtensa/xtensa.md | 29 ++++++--- + gcc/config/xtensa/xtensa.opt | 4 ++ + 3 files changed, 134 insertions(+), 15 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index e2f97b79c..94ff901c5 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -55,6 +55,7 @@ along with GCC; see the file COPYING3. If not see + #include "dumpfile.h" + #include "hw-doloop.h" + #include "rtl-iter.h" ++#include "insn-attr.h" + + /* This file should be included last. */ + #include "target-def.h" +@@ -134,6 +135,7 @@ static unsigned int xtensa_multibss_section_type_flags (tree, const char *, + static section *xtensa_select_rtx_section (machine_mode, rtx, + unsigned HOST_WIDE_INT); + static bool xtensa_rtx_costs (rtx, machine_mode, int, int, int *, bool); ++static int xtensa_insn_cost (rtx_insn *, bool); + static int xtensa_register_move_cost (machine_mode, reg_class_t, + reg_class_t); + static int xtensa_memory_move_cost (machine_mode, reg_class_t, bool); +@@ -208,6 +210,8 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); + #define TARGET_MEMORY_MOVE_COST xtensa_memory_move_cost + #undef TARGET_RTX_COSTS + #define TARGET_RTX_COSTS xtensa_rtx_costs ++#undef TARGET_INSN_COST ++#define TARGET_INSN_COST xtensa_insn_cost + #undef TARGET_ADDRESS_COST + #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 + +@@ -3972,7 +3976,7 @@ xtensa_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, + static bool + xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + int opno ATTRIBUTE_UNUSED, +- int *total, bool speed ATTRIBUTE_UNUSED) ++ int *total, bool speed) + { + int code = GET_CODE (x); + +@@ -4060,9 +4064,14 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + return true; + + case CLZ: ++ case CLRSB: + *total = COSTS_N_INSNS (TARGET_NSA ? 1 : 50); + return true; + ++ case BSWAP: ++ *total = COSTS_N_INSNS (mode == HImode ? 3 : 5); ++ return true; ++ + case NOT: + *total = COSTS_N_INSNS (mode == DImode ? 3 : 2); + return true; +@@ -4086,13 +4095,16 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + return true; + + case ABS: ++ case NEG: + { + if (mode == SFmode) + *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50); + else if (mode == DFmode) + *total = COSTS_N_INSNS (50); +- else ++ else if (mode == DImode) + *total = COSTS_N_INSNS (4); ++ else ++ *total = COSTS_N_INSNS (1); + return true; + } + +@@ -4108,10 +4120,6 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + return true; + } + +- case NEG: +- *total = COSTS_N_INSNS (mode == DImode ? 4 : 2); +- return true; +- + case MULT: + { + if (mode == SFmode) +@@ -4151,11 +4159,11 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + case UMOD: + { + if (mode == DImode) +- *total = COSTS_N_INSNS (50); ++ *total = COSTS_N_INSNS (speed ? 100 : 50); + else if (TARGET_DIV32) + *total = COSTS_N_INSNS (32); + else +- *total = COSTS_N_INSNS (50); ++ *total = COSTS_N_INSNS (speed ? 100 : 50); + return true; + } + +@@ -4188,6 +4196,98 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + } + } + ++static bool ++xtensa_is_insn_L32R_p(const rtx_insn *insn) ++{ ++ rtx x = PATTERN (insn); ++ ++ if (GET_CODE (x) == SET) ++ { ++ x = XEXP (x, 1); ++ if (GET_CODE (x) == MEM) ++ { ++ x = XEXP (x, 0); ++ return (GET_CODE (x) == SYMBOL_REF || CONST_INT_P (x)) ++ && CONSTANT_POOL_ADDRESS_P (x); ++ } ++ } ++ ++ return false; ++} ++ ++/* Compute a relative costs of RTL insns. This is necessary in order to ++ achieve better RTL insn splitting/combination result. */ ++ ++static int ++xtensa_insn_cost (rtx_insn *insn, bool speed) ++{ ++ if (!(recog_memoized (insn) < 0)) ++ { ++ int len = get_attr_length (insn), n = (len + 2) / 3; ++ ++ if (len == 0) ++ return COSTS_N_INSNS (0); ++ ++ if (speed) /* For speed cost. */ ++ { ++ /* "L32R" may be particular slow (implementation-dependent). */ ++ if (xtensa_is_insn_L32R_p (insn)) ++ return COSTS_N_INSNS (1 + xtensa_extra_l32r_costs); ++ ++ /* Cost based on the pipeline model. */ ++ switch (get_attr_type (insn)) ++ { ++ case TYPE_STORE: ++ case TYPE_MOVE: ++ case TYPE_ARITH: ++ case TYPE_MULTI: ++ case TYPE_NOP: ++ case TYPE_FSTORE: ++ return COSTS_N_INSNS (n); ++ ++ case TYPE_LOAD: ++ return COSTS_N_INSNS (n - 1 + 2); ++ ++ case TYPE_JUMP: ++ case TYPE_CALL: ++ return COSTS_N_INSNS (n - 1 + 3); ++ ++ case TYPE_FCONV: ++ case TYPE_FLOAD: ++ case TYPE_MUL16: ++ case TYPE_MUL32: ++ case TYPE_RSR: ++ return COSTS_N_INSNS (n * 2); ++ ++ case TYPE_FMADD: ++ return COSTS_N_INSNS (n * 4); ++ ++ case TYPE_DIV32: ++ return COSTS_N_INSNS (n * 16); ++ ++ default: ++ break; ++ } ++ } ++ else /* For size cost. */ ++ { ++ /* Cost based on the instruction length. */ ++ if (get_attr_type (insn) != TYPE_UNKNOWN) ++ { ++ /* "L32R" itself plus constant in litpool. */ ++ if (xtensa_is_insn_L32R_p (insn)) ++ return COSTS_N_INSNS (2) + 1; ++ ++ /* Consider ".n" short instructions. */ ++ return COSTS_N_INSNS (n) - (n * 3 - len); ++ } ++ } ++ } ++ ++ /* Fall back. */ ++ return pattern_cost (PATTERN (insn), speed); ++} ++ + /* Worker function for TARGET_RETURN_IN_MEMORY. */ + + static bool +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 3314b3fd6..da6b71d1d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -98,7 +98,10 @@ + + ;; Describe a user's asm statement. + (define_asm_attributes +- [(set_attr "type" "multi")]) ++ [(set_attr "type" "multi") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ;; Should be the maximum possible length ++ ;; of a single machine instruction. + + + ;; Pipeline model. +@@ -1884,7 +1887,10 @@ + } + [(set_attr "type" "jump") + (set_attr "mode" "none") +- (set_attr "length" "2")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 2) ++ (const_int 3)))]) + + + ;; Miscellaneous instructions. +@@ -1939,7 +1945,10 @@ + } + [(set_attr "type" "nop") + (set_attr "mode" "none") +- (set_attr "length" "3")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 2) ++ (const_int 3)))]) + + (define_expand "nonlocal_goto" + [(match_operand:SI 0 "general_operand" "") +@@ -2003,8 +2012,9 @@ + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] + "" + "" +- [(set_attr "length" "0") +- (set_attr "type" "nop")]) ++ [(set_attr "type" "nop") ++ (set_attr "mode" "none") ++ (set_attr "length" "0")]) + + ;; Do not schedule instructions accessing memory before this point. + +@@ -2023,7 +2033,9 @@ + (unspec:BLK [(match_operand:SI 1 "" "")] UNSPEC_FRAME_BLOCKAGE))] + "" + "" +- [(set_attr "length" "0")]) ++ [(set_attr "type" "nop") ++ (set_attr "mode" "none") ++ (set_attr "length" "0")]) + + (define_insn "trap" + [(trap_if (const_int 1) (const_int 0))] +@@ -2036,7 +2048,10 @@ + } + [(set_attr "type" "trap") + (set_attr "mode" "none") +- (set_attr "length" "3")]) ++ (set (attr "length") ++ (if_then_else (match_test "!TARGET_DEBUG && TARGET_DENSITY") ++ (const_int 2) ++ (const_int 3)))]) + + ;; Setting up a frame pointer is tricky for Xtensa because GCC doesn't + ;; know if a frame pointer is required until the reload pass, and +diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt +index e1d992f5d..97aa44f92 100644 +--- a/gcc/config/xtensa/xtensa.opt ++++ b/gcc/config/xtensa/xtensa.opt +@@ -30,6 +30,10 @@ mlongcalls + Target Mask(LONGCALLS) + Use indirect CALLXn instructions for large programs. + ++mextra-l32r-costs= ++Target RejectNegative Joined UInteger Var(xtensa_extra_l32r_costs) Init(0) ++Set extra memory access cost for L32R instruction, in clock-cycle units. ++ + mtarget-align + Target + Automatically align branch targets to reduce branch penalties. +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0023-Improve-constant-synthesis-for-both-integer-a.patch b/patches/gcc10.3/gcc-xtensa-0023-Improve-constant-synthesis-for-both-integer-a.patch new file mode 100644 index 0000000..0e14673 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0023-Improve-constant-synthesis-for-both-integer-a.patch @@ -0,0 +1,400 @@ +From ed2c4b57807470b386e9abdf145282e197d9da65 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sat, 11 Jun 2022 00:26:17 +0900 +Subject: [PATCH 18/31] xtensa: Improve constant synthesis for both integer and + floating-point + +This patch revises the previous implementation of constant synthesis. + +First, changed to use define_split machine description pattern and to run +after reload pass, in order not to interfere some optimizations such as +the loop invariant motion. + +Second, not only integer but floating-point is subject to processing. + +Third, several new synthesis patterns - when the constant cannot fit into +a "MOVI Ax, simm12" instruction, but: + +I. can be represented as a power of two minus one (eg. 32767, 65535 or + 0x7fffffffUL) + => "MOVI(.N) Ax, -1" + "SRLI Ax, Ax, 1 ... 31" (or "EXTUI") +II. is between -34816 and 34559 + => "MOVI(.N) Ax, -2048 ... 2047" + "ADDMI Ax, Ax, -32768 ... 32512" +III. (existing case) can fit into a signed 12-bit if the trailing zero bits + are stripped + => "MOVI(.N) Ax, -2048 ... 2047" + "SLLI Ax, Ax, 1 ... 31" + +The above sequences consist of 5 or 6 bytes and have latency of 2 clock cycles, +in contrast with "L32R Ax, " (3 bytes and one clock latency, but may +suffer additional one clock pipeline stall and implementation-specific +InstRAM/ROM access penalty) plus 4 bytes of constant value. + +In addition, 3-instructions synthesis patterns (8 or 9 bytes, 3 clock latency) +are also provided when optimizing for speed and L32R instruction has +considerable access penalty: + +IV. 2-instructions synthesis (any of I ... III) followed by + "SLLI Ax, Ax, 1 ... 31" +V. 2-instructions synthesis followed by either "ADDX[248] Ax, Ax, Ax" + or "SUBX8 Ax, Ax, Ax" (multiplying by 3, 5, 7 or 9) + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h (xtensa_constantsynth): + New prototype. + * config/xtensa/xtensa.c (xtensa_emit_constantsynth, + xtensa_constantsynth_2insn, xtensa_constantsynth_rtx_SLLI, + xtensa_constantsynth_rtx_ADDSUBX, xtensa_constantsynth): + New backend functions that process the abovementioned logic. + (xtensa_emit_move_sequence): Revert the previous changes. + * config/xtensa/xtensa.md: New split patterns for integer + and floating-point, as the frontend part. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/constsynth_2insns.c: New. + * gcc.target/xtensa/constsynth_3insns.c: Ditto. + * gcc.target/xtensa/constsynth_double.c: Ditto. +--- + gcc/config/xtensa/xtensa-protos.h | 1 + + gcc/config/xtensa/xtensa.c | 133 +++++++++++++++--- + gcc/config/xtensa/xtensa.md | 50 +++++++ + .../gcc.target/xtensa/constsynth_2insns.c | 44 ++++++ + .../gcc.target/xtensa/constsynth_3insns.c | 24 ++++ + .../gcc.target/xtensa/constsynth_double.c | 11 ++ + 6 files changed, 247 insertions(+), 16 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_double.c + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 80b1da2bb..d65bc2954 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -44,6 +44,7 @@ extern int xtensa_expand_block_move (rtx *); + extern int xtensa_expand_block_set_unrolled_loop (rtx *); + extern int xtensa_expand_block_set_small_loop (rtx *); + extern void xtensa_split_operand_pair (rtx *, machine_mode); ++extern int xtensa_constantsynth (rtx, HOST_WIDE_INT); + extern int xtensa_emit_move_sequence (rtx *, machine_mode); + extern rtx xtensa_copy_incoming_a7 (rtx); + extern void xtensa_expand_nonlocal_goto (rtx *); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 94ff901c5..ba36d7244 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1027,6 +1027,123 @@ xtensa_split_operand_pair (rtx operands[4], machine_mode mode) + } + + ++/* Try to emit insns to load srcval (that cannot fit into signed 12-bit) ++ into dst with synthesizing a such constant value from a sequence of ++ load-immediate / arithmetic ones, instead of a L32R instruction ++ (plus a constant in litpool). */ ++ ++static void ++xtensa_emit_constantsynth (rtx dst, enum rtx_code code, ++ HOST_WIDE_INT imm0, HOST_WIDE_INT imm1, ++ rtx (*gen_op)(rtx, HOST_WIDE_INT), ++ HOST_WIDE_INT imm2) ++{ ++ gcc_assert (REG_P (dst)); ++ emit_move_insn (dst, GEN_INT (imm0)); ++ emit_move_insn (dst, gen_rtx_fmt_ee (code, SImode, ++ dst, GEN_INT (imm1))); ++ if (gen_op) ++ emit_move_insn (dst, gen_op (dst, imm2)); ++} ++ ++static int ++xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, ++ rtx (*gen_op)(rtx, HOST_WIDE_INT), ++ HOST_WIDE_INT op_imm) ++{ ++ int shift = exact_log2 (srcval + 1); ++ ++ if (IN_RANGE (shift, 1, 31)) ++ { ++ xtensa_emit_constantsynth (dst, LSHIFTRT, -1, 32 - shift, ++ gen_op, op_imm); ++ return 1; ++ } ++ ++ if (IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) ++ { ++ HOST_WIDE_INT imm0, imm1; ++ ++ if (srcval < -32768) ++ imm1 = -32768; ++ else if (srcval > 32512) ++ imm1 = 32512; ++ else ++ imm1 = srcval & ~255; ++ imm0 = srcval - imm1; ++ if (TARGET_DENSITY && imm1 < 32512 && IN_RANGE (imm0, 224, 255)) ++ imm0 -= 256, imm1 += 256; ++ xtensa_emit_constantsynth (dst, PLUS, imm0, imm1, gen_op, op_imm); ++ return 1; ++ } ++ ++ shift = ctz_hwi (srcval); ++ if (xtensa_simm12b (srcval >> shift)) ++ { ++ xtensa_emit_constantsynth (dst, ASHIFT, srcval >> shift, shift, ++ gen_op, op_imm); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static rtx ++xtensa_constantsynth_rtx_SLLI (rtx reg, HOST_WIDE_INT imm) ++{ ++ return gen_rtx_ASHIFT (SImode, reg, GEN_INT (imm)); ++} ++ ++static rtx ++xtensa_constantsynth_rtx_ADDSUBX (rtx reg, HOST_WIDE_INT imm) ++{ ++ return imm == 7 ++ ? gen_rtx_MINUS (SImode, gen_rtx_ASHIFT (SImode, reg, GEN_INT (3)), ++ reg) ++ : gen_rtx_PLUS (SImode, gen_rtx_ASHIFT (SImode, reg, ++ GEN_INT (floor_log2 (imm - 1))), ++ reg); ++} ++ ++int ++xtensa_constantsynth (rtx dst, HOST_WIDE_INT srcval) ++{ ++ /* No need for synthesizing for what fits into MOVI instruction. */ ++ if (xtensa_simm12b (srcval)) ++ return 0; ++ ++ /* 2-insns substitution. */ ++ if ((optimize_size || (optimize && xtensa_extra_l32r_costs >= 1)) ++ && xtensa_constantsynth_2insn (dst, srcval, NULL, 0)) ++ return 1; ++ ++ /* 3-insns substitution. */ ++ if (optimize > 1 && !optimize_size && xtensa_extra_l32r_costs >= 2) ++ { ++ int shift, divisor; ++ ++ /* 2-insns substitution followed by SLLI. */ ++ shift = ctz_hwi (srcval); ++ if (IN_RANGE (shift, 1, 31) && ++ xtensa_constantsynth_2insn (dst, srcval >> shift, ++ xtensa_constantsynth_rtx_SLLI, ++ shift)) ++ return 1; ++ ++ /* 2-insns substitution followed by ADDX[248] or SUBX8. */ ++ if (TARGET_ADDX) ++ for (divisor = 3; divisor <= 9; divisor += 2) ++ if (srcval % divisor == 0 && ++ xtensa_constantsynth_2insn (dst, srcval / divisor, ++ xtensa_constantsynth_rtx_ADDSUBX, ++ divisor)) ++ return 1; ++ } ++ ++ return 0; ++} ++ ++ + /* Emit insns to move operands[1] into operands[0]. + Return 1 if we have written out everything that needs to be done to + do the move. Otherwise, return 0 and the caller will emit the move +@@ -1064,22 +1181,6 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) + + if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16) + { +- /* Try to emit MOVI + SLLI sequence, that is smaller +- than L32R + literal. */ +- if (optimize_size && mode == SImode && CONST_INT_P (src) +- && register_operand (dst, mode)) +- { +- HOST_WIDE_INT srcval = INTVAL (src); +- int shift = ctz_hwi (srcval); +- +- if (xtensa_simm12b (srcval >> shift)) +- { +- emit_move_insn (dst, GEN_INT (srcval >> shift)); +- emit_insn (gen_ashlsi3_internal (dst, dst, GEN_INT (shift))); +- return 1; +- } +- } +- + src = force_const_mem (SImode, src); + operands[1] = src; + } +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index da6b71d1d..ddc3087fa 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -940,6 +940,19 @@ + (set_attr "mode" "SI") + (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")]) + ++(define_split ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operand:SI 1 "constantpool_operand"))] ++ "! optimize_debug && reload_completed" ++ [(const_int 0)] ++{ ++ rtx x = avoid_constant_pool_reference (operands[1]); ++ if (! CONST_INT_P (x)) ++ FAIL; ++ if (! xtensa_constantsynth (operands[0], INTVAL (x))) ++ emit_move_insn (operands[0], x); ++}) ++ + ;; 16-bit Integer moves + + (define_expand "movhi" +@@ -1144,6 +1157,43 @@ + (set_attr "mode" "SF") + (set_attr "length" "3")]) + ++(define_split ++ [(set (match_operand:SF 0 "register_operand") ++ (match_operand:SF 1 "constantpool_operand"))] ++ "! optimize_debug && reload_completed" ++ [(const_int 0)] ++{ ++ int i = 0; ++ rtx x = XEXP (operands[1], 0); ++ long l[2]; ++ if (GET_CODE (x) == SYMBOL_REF ++ && CONSTANT_POOL_ADDRESS_P (x)) ++ x = get_pool_constant (x); ++ else if (GET_CODE (x) == CONST) ++ { ++ x = XEXP (x, 0); ++ gcc_assert (GET_CODE (x) == PLUS ++ && GET_CODE (XEXP (x, 0)) == SYMBOL_REF ++ && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) ++ && CONST_INT_P (XEXP (x, 1))); ++ i = INTVAL (XEXP (x, 1)); ++ gcc_assert (i == 0 || i == 4); ++ i /= 4; ++ x = get_pool_constant (XEXP (x, 0)); ++ } ++ else ++ gcc_unreachable (); ++ if (GET_MODE (x) == SFmode) ++ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l[0]); ++ else if (GET_MODE (x) == DFmode) ++ REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); ++ else ++ FAIL; ++ x = gen_rtx_REG (SImode, REGNO (operands[0])); ++ if (! xtensa_constantsynth (x, l[i])) ++ emit_move_insn (x, GEN_INT (l[i])); ++}) ++ + ;; 64-bit floating point moves + + (define_expand "movdf" +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c +new file mode 100644 +index 000000000..43c85a250 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c +@@ -0,0 +1,44 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Os" } */ ++ ++int test_0(void) ++{ ++ return 4095; ++} ++ ++int test_1(void) ++{ ++ return 2147483647; ++} ++ ++int test_2(void) ++{ ++ return -34816; ++} ++ ++int test_3(void) ++{ ++ return -2049; ++} ++ ++int test_4(void) ++{ ++ return 2048; ++} ++ ++int test_5(void) ++{ ++ return 34559; ++} ++ ++int test_6(void) ++{ ++ return 43680; ++} ++ ++void test_7(int *p) ++{ ++ *p = -1432354816; ++} ++ ++/* { dg-final { scan-assembler-not "l32r" } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c +new file mode 100644 +index 000000000..f3c4a1c7c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mextra-l32r-costs=3" } */ ++ ++int test_0(void) ++{ ++ return 134217216; ++} ++ ++int test_1(void) ++{ ++ return -27604992; ++} ++ ++int test_2(void) ++{ ++ return -162279; ++} ++ ++void test_3(int *p) ++{ ++ *p = 192437; ++} ++ ++/* { dg-final { scan-assembler-not "l32r" } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +new file mode 100644 +index 000000000..890ca5047 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Os" } */ ++ ++void test(unsigned int count, double array[]) ++{ ++ unsigned int i; ++ for (i = 0; i < count; ++i) ++ array[i] = 1.0; ++} ++ ++/* { dg-final { scan-assembler-not "l32r" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0024-Improve-shift-operations-more.patch b/patches/gcc10.3/gcc-xtensa-0024-Improve-shift-operations-more.patch new file mode 100644 index 0000000..9c44b89 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0024-Improve-shift-operations-more.patch @@ -0,0 +1,383 @@ +From fd3771fcc13b8712c91cec70f4533760f72b54e1 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 01:38:31 +0900 +Subject: [PATCH 19/31] xtensa: Improve shift operations more + +This patch introduces funnel shifter utilization, and rearranges existing +"per-byte shift" insn patterns. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (logical_shift_operator, + xtensa_shift_per_byte_operator): New predicates. + * config/xtensa/xtensa-protos.h (xtensa_shlrd_which_direction): + New prototype. + * config/xtensa/xtensa.c (xtensa_shlrd_which_direction): + New helper function for funnel shift patterns. + * config/xtensa/xtensa.md (ior_op): New code iterator. + (*ashlsi3_1): Replace with new split pattern. + (*shift_per_byte): Unify *ashlsi3_3x, *ashrsi3_3x and *lshrsi3_3x. + (*shift_per_byte_omit_AND_0, *shift_per_byte_omit_AND_1): + New insn-and-split patterns that redirect to *xtensa_shift_per_byte, + in order to omit unnecessary bitwise AND operation. + (*shlrd_reg_, *shlrd_const_, *shlrd_per_byte_, + *shlrd_per_byte__omit_AND): + New insn patterns for funnel shifts. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/funnel_shifter.c: New. +--- + gcc/config/xtensa/predicates.md | 6 + + gcc/config/xtensa/xtensa-protos.h | 1 + + gcc/config/xtensa/xtensa.c | 14 ++ + gcc/config/xtensa/xtensa.md | 213 ++++++++++++++---- + .../gcc.target/xtensa/funnel_shifter.c | 17 ++ + 5 files changed, 213 insertions(+), 38 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/funnel_shifter.c + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index 91b9343a2..e7836f0ec 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -164,9 +164,15 @@ + (define_predicate "boolean_operator" + (match_code "eq,ne")) + ++(define_predicate "logical_shift_operator" ++ (match_code "ashift,lshiftrt")) ++ + (define_predicate "xtensa_cstoresi_operator" + (match_code "eq,ne,gt,ge,lt,le")) + ++(define_predicate "xtensa_shift_per_byte_operator" ++ (match_code "ashift,ashiftrt,lshiftrt")) ++ + (define_predicate "tls_symbol_operand" + (and (match_code "symbol_ref") + (match_test "SYMBOL_REF_TLS_MODEL (op) != 0"))) +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index d65bc2954..32743bc67 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -56,6 +56,7 @@ extern char *xtensa_emit_bit_branch (bool, bool, rtx *); + extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); + extern char *xtensa_emit_call (int, rtx *); + extern bool xtensa_tls_referenced_p (rtx); ++extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); + + #ifdef TREE_CODE + extern void init_cumulative_args (CUMULATIVE_ARGS *, int); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index ba36d7244..473cfaf9d 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -2394,6 +2394,20 @@ xtensa_tls_referenced_p (rtx x) + } + + ++/* Helper function for "*shlrd_..." patterns. */ ++ ++enum rtx_code ++xtensa_shlrd_which_direction (rtx op0, rtx op1) ++{ ++ if (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT) ++ return ASHIFT; /* shld */ ++ if (GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT) ++ return LSHIFTRT; /* shrd */ ++ ++ return UNKNOWN; ++} ++ ++ + /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ + + static bool +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index ddc3087fa..58bba89af 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -83,6 +83,9 @@ + ;; the same template. + (define_mode_iterator HQI [HI QI]) + ++;; This code iterator is for *shlrd and its variants. ++(define_code_iterator ior_op [ior plus]) ++ + + ;; Attributes. + +@@ -1272,16 +1275,6 @@ + operands[1] = xtensa_copy_incoming_a7 (operands[1]); + }) + +-(define_insn "*ashlsi3_1" +- [(set (match_operand:SI 0 "register_operand" "=a") +- (ashift:SI (match_operand:SI 1 "register_operand" "r") +- (const_int 1)))] +- "TARGET_DENSITY" +- "add.n\t%0, %1, %1" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "2")]) +- + (define_insn "ashlsi3_internal" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (ashift:SI (match_operand:SI 1 "register_operand" "r,r") +@@ -1294,16 +1287,14 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,6")]) + +-(define_insn "*ashlsi3_3x" +- [(set (match_operand:SI 0 "register_operand" "=a") +- (ashift:SI (match_operand:SI 1 "register_operand" "r") +- (ashift:SI (match_operand:SI 2 "register_operand" "r") +- (const_int 3))))] +- "" +- "ssa8b\t%2\;sll\t%0, %1" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "6")]) ++(define_split ++ [(set (match_operand:SI 0 "register_operand") ++ (ashift:SI (match_operand:SI 1 "register_operand") ++ (const_int 1)))] ++ "TARGET_DENSITY" ++ [(set (match_dup 0) ++ (plus:SI (match_dup 1) ++ (match_dup 1)))]) + + (define_insn "ashrsi3" + [(set (match_operand:SI 0 "register_operand" "=a,a") +@@ -1317,17 +1308,6 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,6")]) + +-(define_insn "*ashrsi3_3x" +- [(set (match_operand:SI 0 "register_operand" "=a") +- (ashiftrt:SI (match_operand:SI 1 "register_operand" "r") +- (ashift:SI (match_operand:SI 2 "register_operand" "r") +- (const_int 3))))] +- "" +- "ssa8l\t%2\;sra\t%0, %1" +- [(set_attr "type" "arith") +- (set_attr "mode" "SI") +- (set_attr "length" "6")]) +- + (define_insn "lshrsi3" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r") +@@ -1337,9 +1317,9 @@ + if (which_alternative == 0) + { + if ((INTVAL (operands[2]) & 0x1f) < 16) +- return "srli\t%0, %1, %R2"; ++ return "srli\t%0, %1, %R2"; + else +- return "extui\t%0, %1, %R2, %L2"; ++ return "extui\t%0, %1, %R2, %L2"; + } + return "ssr\t%2\;srl\t%0, %1"; + } +@@ -1347,13 +1327,170 @@ + (set_attr "mode" "SI") + (set_attr "length" "3,6")]) + +-(define_insn "*lshrsi3_3x" ++(define_insn "*shift_per_byte" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (match_operator:SI 3 "xtensa_shift_per_byte_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3))]))] ++ "!optimize_debug && optimize" ++{ ++ switch (GET_CODE (operands[3])) ++ { ++ case ASHIFT: return "ssa8b\t%2\;sll\t%0, %1"; ++ case ASHIFTRT: return "ssa8l\t%2\;sra\t%0, %1"; ++ case LSHIFTRT: return "ssa8l\t%2\;srl\t%0, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*shift_per_byte_omit_AND_0" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (match_operator:SI 4 "xtensa_shift_per_byte_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3)) ++ (match_operand:SI 3 "const_int_operand" "i"))]))] ++ "!optimize_debug && optimize ++ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (match_op_dup 4 ++ [(match_dup 1) ++ (ashift:SI (match_dup 2) ++ (const_int 3))]))] ++ "" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*shift_per_byte_omit_AND_1" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (match_operator:SI 4 "xtensa_shift_per_byte_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (neg:SI (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3)) ++ (match_operand:SI 3 "const_int_operand" "i")))]))] ++ "!optimize_debug && optimize ++ && (INTVAL (operands[3]) & 0x1f) == 3 << 3" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 5) ++ (neg:SI (match_dup 2))) ++ (set (match_dup 0) ++ (match_op_dup 4 ++ [(match_dup 1) ++ (ashift:SI (match_dup 5) ++ (const_int 3))]))] ++{ ++ operands[5] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "9")]) ++ ++(define_insn "*shlrd_reg_" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")]) ++ (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 3 "register_operand" "r") ++ (neg:SI (match_dup 2))])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" ++{ ++ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) ++ { ++ case ASHIFT: return "ssl\t%2\;src\t%0, %1, %3"; ++ case LSHIFTRT: return "ssr\t%2\;src\t%0, %3, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn "*shlrd_const_" + [(set (match_operand:SI 0 "register_operand" "=a") +- (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") +- (ashift:SI (match_operand:SI 2 "register_operand" "r") +- (const_int 3))))] ++ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 3 "const_int_operand" "i")]) ++ (match_operator:SI 6 "logical_shift_operator" ++ [(match_operand:SI 2 "register_operand" "r") ++ (match_operand:SI 4 "const_int_operand" "i")])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN ++ && IN_RANGE (INTVAL (operands[3]), 1, 31) ++ && IN_RANGE (INTVAL (operands[4]), 1, 31) ++ && INTVAL (operands[3]) + INTVAL (operands[4]) == 32" ++{ ++ switch (xtensa_shlrd_which_direction (operands[5], operands[6])) ++ { ++ case ASHIFT: return "ssai\t%L3\;src\t%0, %1, %2"; ++ case LSHIFTRT: return "ssai\t%R3\;src\t%0, %2, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn "*shlrd_per_byte_" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ior_op:SI (match_operator:SI 4 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3))]) ++ (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 3 "register_operand" "r") ++ (neg:SI (ashift:SI (match_dup 2) ++ (const_int 3)))])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN" ++{ ++ switch (xtensa_shlrd_which_direction (operands[4], operands[5])) ++ { ++ case ASHIFT: return "ssa8b\t%2\;src\t%0, %1, %3"; ++ case LSHIFTRT: return "ssa8l\t%2\;src\t%0, %3, %1"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*shlrd_per_byte__omit_AND" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ior_op:SI (match_operator:SI 5 "logical_shift_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 3)) ++ (match_operand:SI 4 "const_int_operand" "i"))]) ++ (match_operator:SI 6 "logical_shift_operator" ++ [(match_operand:SI 3 "register_operand" "r") ++ (neg:SI (and:SI (ashift:SI (match_dup 2) ++ (const_int 3)) ++ (match_dup 4)))])))] ++ "!optimize_debug && optimize ++ && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN ++ && (INTVAL (operands[4]) & 0x1f) == 3 << 3" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (ior_op:SI (match_op_dup 5 ++ [(match_dup 1) ++ (ashift:SI (match_dup 2) ++ (const_int 3))]) ++ (match_op_dup 6 ++ [(match_dup 3) ++ (neg:SI (ashift:SI (match_dup 2) ++ (const_int 3)))])))] + "" +- "ssa8l\t%2\;srl\t%0, %1" + [(set_attr "type" "arith") + (set_attr "mode" "SI") + (set_attr "length" "6")]) +diff --git a/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c +new file mode 100644 +index 000000000..c8f987ccd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++unsigned int test_0(const void *addr) ++{ ++ unsigned int n = (unsigned int)addr; ++ const unsigned int *a = (const unsigned int*)(n & ~3); ++ n = (n & 3) * 8; ++ return (a[0] >> n) | (a[1] << (32 - n)); ++} ++ ++unsigned int test_1(unsigned int a, unsigned int b) ++{ ++ return (a >> 16) + (b << 16); ++} ++ ++/* { dg-final { scan-assembler-times "src" 2 } } */ +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0025-Simplify-conditional-branch-move-insn-pattern.patch b/patches/gcc10.3/gcc-xtensa-0025-Simplify-conditional-branch-move-insn-pattern.patch new file mode 100644 index 0000000..cdb96ff --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0025-Simplify-conditional-branch-move-insn-pattern.patch @@ -0,0 +1,427 @@ +From 0690bcdd42d0aa6671f9ec3ccbbe70faa04ffb6b Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Mon, 31 Jan 2022 09:56:21 +0900 +Subject: [PATCH 20/31] xtensa: Simplify conditional branch/move insn patterns + +No need to describe the "false side" conditional insn patterns anymore. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h (xtensa_emit_branch): + Remove the first argument. + (xtensa_emit_bit_branch): Remove it because now called only from the + output statement of *bittrue insn pattern. + * config/xtensa/xtensa.c (gen_int_relational): Remove the last + argument 'p_invert', and make so that the condition is reversed by + itself as needed. + (xtensa_expand_conditional_branch): Share the common path, and remove + condition inversion code. + (xtensa_emit_branch, xtensa_emit_movcc): Simplify by removing the + "false side" pattern. + (xtensa_emit_bit_branch): Remove it because of the abovementioned + reason, and move the function body to *bittrue insn pattern. + * config/xtensa/xtensa.md (*bittrue): Transplant the output + statement from removed xtensa_emit_bit_branch(). + (*bfalse, *ubfalse, *bitfalse, *maskfalse): Remove the "false side" + insn patterns. +--- + gcc/config/xtensa/xtensa-protos.h | 3 +- + gcc/config/xtensa/xtensa.c | 111 ++++++++++------------------ + gcc/config/xtensa/xtensa.md | 117 ++++++++---------------------- + 3 files changed, 70 insertions(+), 161 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 32743bc67..e4b2d2f06 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -51,8 +51,7 @@ extern void xtensa_expand_nonlocal_goto (rtx *); + extern void xtensa_expand_compare_and_swap (rtx, rtx, rtx, rtx); + extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool); + extern void xtensa_emit_loop_end (rtx_insn *, rtx *); +-extern char *xtensa_emit_branch (bool, bool, rtx *); +-extern char *xtensa_emit_bit_branch (bool, bool, rtx *); ++extern char *xtensa_emit_branch (bool, rtx *); + extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); + extern char *xtensa_emit_call (int, rtx *); + extern bool xtensa_tls_referenced_p (rtx); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 473cfaf9d..8deae3d51 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -118,7 +118,7 @@ const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] = + + static void xtensa_option_override (void); + static enum internal_test map_test_to_internal_test (enum rtx_code); +-static rtx gen_int_relational (enum rtx_code, rtx, rtx, int *); ++static rtx gen_int_relational (enum rtx_code, rtx, rtx); + static rtx gen_float_relational (enum rtx_code, rtx, rtx); + static rtx gen_conditional_move (enum rtx_code, machine_mode, rtx, rtx); + static rtx fixup_subreg_mem (rtx); +@@ -670,8 +670,7 @@ map_test_to_internal_test (enum rtx_code test_code) + static rtx + gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + rtx cmp0, /* first operand to compare */ +- rtx cmp1, /* second operand to compare */ +- int *p_invert /* whether branch needs to reverse test */) ++ rtx cmp1 /* second operand to compare */) + { + struct cmp_info + { +@@ -703,6 +702,7 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + enum internal_test test; + machine_mode mode; + struct cmp_info *p_info; ++ int invert; + + test = map_test_to_internal_test (test_code); + gcc_assert (test != ITEST_MAX); +@@ -739,9 +739,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + } + + /* See if we need to invert the result. */ +- *p_invert = ((GET_CODE (cmp1) == CONST_INT) +- ? p_info->invert_const +- : p_info->invert_reg); ++ invert = ((GET_CODE (cmp1) == CONST_INT) ++ ? p_info->invert_const ++ : p_info->invert_reg); + + /* Comparison to constants, may involve adding 1 to change a LT into LE. + Comparison between two registers, may involve switching operands. */ +@@ -758,7 +758,9 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + cmp1 = temp; + } + +- return gen_rtx_fmt_ee (p_info->test_code, VOIDmode, cmp0, cmp1); ++ return gen_rtx_fmt_ee (invert ? reverse_condition (p_info->test_code) ++ : p_info->test_code, ++ VOIDmode, cmp0, cmp1); + } + + +@@ -817,45 +819,33 @@ xtensa_expand_conditional_branch (rtx *operands, machine_mode mode) + enum rtx_code test_code = GET_CODE (operands[0]); + rtx cmp0 = operands[1]; + rtx cmp1 = operands[2]; +- rtx cmp; +- int invert; +- rtx label1, label2; ++ rtx cmp, label; + + switch (mode) + { ++ case E_SFmode: ++ if (TARGET_HARD_FLOAT) ++ { ++ cmp = gen_float_relational (test_code, cmp0, cmp1); ++ break; ++ } ++ /* FALLTHRU */ ++ + case E_DFmode: + default: + fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1)); + + case E_SImode: +- invert = FALSE; +- cmp = gen_int_relational (test_code, cmp0, cmp1, &invert); +- break; +- +- case E_SFmode: +- if (!TARGET_HARD_FLOAT) +- fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, +- cmp0, cmp1)); +- invert = FALSE; +- cmp = gen_float_relational (test_code, cmp0, cmp1); ++ cmp = gen_int_relational (test_code, cmp0, cmp1); + break; + } + + /* Generate the branch. */ +- +- label1 = gen_rtx_LABEL_REF (VOIDmode, operands[3]); +- label2 = pc_rtx; +- +- if (invert) +- { +- label2 = label1; +- label1 = pc_rtx; +- } +- ++ label = gen_rtx_LABEL_REF (VOIDmode, operands[3]); + emit_jump_insn (gen_rtx_SET (pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, cmp, +- label1, +- label2))); ++ label, ++ pc_rtx))); + } + + +@@ -2058,21 +2048,20 @@ xtensa_emit_loop_end (rtx_insn *insn, rtx *operands) + + + char * +-xtensa_emit_branch (bool inverted, bool immed, rtx *operands) ++xtensa_emit_branch (bool immed, rtx *operands) + { + static char result[64]; +- enum rtx_code code; ++ enum rtx_code code = GET_CODE (operands[3]); + const char *op; + +- code = GET_CODE (operands[3]); + switch (code) + { +- case EQ: op = inverted ? "ne" : "eq"; break; +- case NE: op = inverted ? "eq" : "ne"; break; +- case LT: op = inverted ? "ge" : "lt"; break; +- case GE: op = inverted ? "lt" : "ge"; break; +- case LTU: op = inverted ? "geu" : "ltu"; break; +- case GEU: op = inverted ? "ltu" : "geu"; break; ++ case EQ: op = "eq"; break; ++ case NE: op = "ne"; break; ++ case LT: op = "lt"; break; ++ case GE: op = "ge"; break; ++ case LTU: op = "ltu"; break; ++ case GEU: op = "geu"; break; + default: gcc_unreachable (); + } + +@@ -2091,32 +2080,6 @@ xtensa_emit_branch (bool inverted, bool immed, rtx *operands) + } + + +-char * +-xtensa_emit_bit_branch (bool inverted, bool immed, rtx *operands) +-{ +- static char result[64]; +- const char *op; +- +- switch (GET_CODE (operands[3])) +- { +- case EQ: op = inverted ? "bs" : "bc"; break; +- case NE: op = inverted ? "bc" : "bs"; break; +- default: gcc_unreachable (); +- } +- +- if (immed) +- { +- unsigned bitnum = INTVAL (operands[1]) & 0x1f; +- operands[1] = GEN_INT (bitnum); +- sprintf (result, "b%si\t%%0, %%d1, %%2", op); +- } +- else +- sprintf (result, "b%s\t%%0, %%1, %%2", op); +- +- return result; +-} +- +- + char * + xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + { +@@ -2125,12 +2088,14 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + const char *op; + + code = GET_CODE (operands[4]); ++ if (inverted) ++ code = reverse_condition (code); + if (isbool) + { + switch (code) + { +- case EQ: op = inverted ? "t" : "f"; break; +- case NE: op = inverted ? "f" : "t"; break; ++ case EQ: op = "f"; break; ++ case NE: op = "t"; break; + default: gcc_unreachable (); + } + } +@@ -2138,10 +2103,10 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + { + switch (code) + { +- case EQ: op = inverted ? "nez" : "eqz"; break; +- case NE: op = inverted ? "eqz" : "nez"; break; +- case LT: op = inverted ? "gez" : "ltz"; break; +- case GE: op = inverted ? "ltz" : "gez"; break; ++ case EQ: op = "eqz"; break; ++ case NE: op = "nez"; break; ++ case LT: op = "ltz"; break; ++ case GE: op = "gez"; break; + default: gcc_unreachable (); + } + } +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 58bba89af..40000859d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1551,28 +1551,13 @@ + (define_insn "*btrue" + [(set (pc) + (if_then_else (match_operator 3 "branch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "branch_operand" "K,r")]) ++ [(match_operand:SI 0 "register_operand" "r,r") ++ (match_operand:SI 1 "branch_operand" "K,r")]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { +- return xtensa_emit_branch (false, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump,jump") +- (set_attr "mode" "none") +- (set_attr "length" "3,3")]) +- +-(define_insn "*bfalse" +- [(set (pc) +- (if_then_else (match_operator 3 "branch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "branch_operand" "K,r")]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] +- "" +-{ +- return xtensa_emit_branch (true, which_alternative == 0, operands); ++ return xtensa_emit_branch (which_alternative == 0, operands); + } + [(set_attr "type" "jump,jump") + (set_attr "mode" "none") +@@ -1581,28 +1566,13 @@ + (define_insn "*ubtrue" + [(set (pc) + (if_then_else (match_operator 3 "ubranch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "ubranch_operand" "L,r")]) ++ [(match_operand:SI 0 "register_operand" "r,r") ++ (match_operand:SI 1 "ubranch_operand" "L,r")]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { +- return xtensa_emit_branch (false, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump,jump") +- (set_attr "mode" "none") +- (set_attr "length" "3,3")]) +- +-(define_insn "*ubfalse" +- [(set (pc) +- (if_then_else (match_operator 3 "ubranch_operator" +- [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "ubranch_operand" "L,r")]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] +- "" +-{ +- return xtensa_emit_branch (true, which_alternative == 0, operands); ++ return xtensa_emit_branch (which_alternative == 0, operands); + } + [(set_attr "type" "jump,jump") + (set_attr "mode" "none") +@@ -1613,75 +1583,50 @@ + (define_insn "*bittrue" + [(set (pc) + (if_then_else (match_operator 3 "boolean_operator" +- [(zero_extract:SI +- (match_operand:SI 0 "register_operand" "r,r") +- (const_int 1) +- (match_operand:SI 1 "arith_operand" "J,r")) +- (const_int 0)]) +- (label_ref (match_operand 2 "" "")) +- (pc)))] +- "" +-{ +- return xtensa_emit_bit_branch (false, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump") +- (set_attr "mode" "none") +- (set_attr "length" "3")]) +- +-(define_insn "*bitfalse" +- [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" +- [(zero_extract:SI +- (match_operand:SI 0 "register_operand" "r,r") +- (const_int 1) +- (match_operand:SI 1 "arith_operand" "J,r")) ++ [(zero_extract:SI (match_operand:SI 0 "register_operand" "r,r") ++ (const_int 1) ++ (match_operand:SI 1 "arith_operand" "J,r")) + (const_int 0)]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] +- "" +-{ +- return xtensa_emit_bit_branch (true, which_alternative == 0, operands); +-} +- [(set_attr "type" "jump") +- (set_attr "mode" "none") +- (set_attr "length" "3")]) +- +-(define_insn "*masktrue" +- [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" +- [(and:SI (match_operand:SI 0 "register_operand" "r") +- (match_operand:SI 1 "register_operand" "r")) +- (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { ++ static char result[64]; ++ char op; + switch (GET_CODE (operands[3])) + { +- case EQ: return "bnone\t%0, %1, %2"; +- case NE: return "bany\t%0, %1, %2"; +- default: gcc_unreachable (); ++ case EQ: op = 'c'; break; ++ case NE: op = 's'; break; ++ default: gcc_unreachable (); + } ++ if (which_alternative == 0) ++ { ++ operands[1] = GEN_INT (INTVAL (operands[1]) & 0x1f); ++ sprintf (result, "bb%ci\t%%0, %%d1, %%2", op); ++ } ++ else ++ sprintf (result, "bb%c\t%%0, %%1, %%2", op); ++ return result; + } + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set_attr "length" "3")]) + +-(define_insn "*maskfalse" ++(define_insn "*masktrue" + [(set (pc) + (if_then_else (match_operator 3 "boolean_operator" +- [(and:SI (match_operand:SI 0 "register_operand" "r") +- (match_operand:SI 1 "register_operand" "r")) +- (const_int 0)]) +- (pc) +- (label_ref (match_operand 2 "" ""))))] ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "register_operand" "r")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] + "" + { + switch (GET_CODE (operands[3])) + { +- case EQ: return "bany\t%0, %1, %2"; +- case NE: return "bnone\t%0, %1, %2"; +- default: gcc_unreachable (); ++ case EQ: return "bnone\t%0, %1, %2"; ++ case NE: return "bany\t%0, %1, %2"; ++ default: gcc_unreachable (); + } + } + [(set_attr "type" "jump") +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0026-Make-use-of-BALL-BNALL-instructions.patch b/patches/gcc10.3/gcc-xtensa-0026-Make-use-of-BALL-BNALL-instructions.patch new file mode 100644 index 0000000..e1d2790 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0026-Make-use-of-BALL-BNALL-instructions.patch @@ -0,0 +1,101 @@ +From a7cf439409089eab17341a1a24fb9be2b967ca7c Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 27 May 2021 19:04:12 +0900 +Subject: [PATCH 21/31] xtensa: Make use of BALL/BNALL instructions + +In Xtensa ISA, there is no single machine instruction that calculates unary +bitwise negation, but a few similar fused instructions are exist: + + "BALL Ax, Ay, label" // if ((~Ax & Ay) == 0) goto label; + "BNALL Ax, Ay, label" // if ((~Ax & Ay) != 0) goto label; + +These instructions have never been emitted before, but it seems no reason not +to make use of them. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*masktrue_bitcmpl): New insn pattern. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/BALL-BNALL.c: New. +--- + gcc/config/xtensa/xtensa.md | 21 +++++++++++++ + gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c | 33 ++++++++++++++++++++ + 2 files changed, 54 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 40000859d..b34b2afb6 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1633,6 +1633,27 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_insn "*masktrue_bitcmpl" ++ [(set (pc) ++ (if_then_else (match_operator 3 "boolean_operator" ++ [(and:SI (not:SI (match_operand:SI 0 "register_operand" "r")) ++ (match_operand:SI 1 "register_operand" "r")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] ++ "" ++{ ++ switch (GET_CODE (operands[3])) ++ { ++ case EQ: return "ball\t%0, %1, %2"; ++ case NE: return "bnall\t%0, %1, %2"; ++ default: gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ++ + + ;; Zero-overhead looping support. + +diff --git a/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c +new file mode 100644 +index 000000000..ba61c6f37 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/BALL-BNALL.c +@@ -0,0 +1,33 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O" } */ ++ ++extern void foo(void); ++ ++void BNONE_test(int a, int b) ++{ ++ if (a & b) ++ foo(); ++} ++ ++void BANY_test(int a, int b) ++{ ++ if (!(a & b)) ++ foo(); ++} ++ ++void BALL_test(int a, int b) ++{ ++ if (~a & b) ++ foo(); ++} ++ ++void BNALL_test(int a, int b) ++{ ++ if (!(~a & b)) ++ foo(); ++} ++ ++/* { dg-final { scan-assembler-times "bnone" 1 } } */ ++/* { dg-final { scan-assembler-times "bany" 1 } } */ ++/* { dg-final { scan-assembler-times "ball" 1 } } */ ++/* { dg-final { scan-assembler-times "bnall" 1 } } */ +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0027-Optimize-bitwise-AND-operation-with-some-spec.patch b/patches/gcc10.3/gcc-xtensa-0027-Optimize-bitwise-AND-operation-with-some-spec.patch new file mode 100644 index 0000000..b13350f --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0027-Optimize-bitwise-AND-operation-with-some-spec.patch @@ -0,0 +1,252 @@ +From 43c7f8333028ff03d8a4681ab62de2febcc43f5c Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 01:28:43 +0900 +Subject: [PATCH 22/31] xtensa: Optimize bitwise AND operation with some + specific forms of constants + +This patch offers several insn-and-split patterns for bitwise AND with +register and constant that can be represented as: + +i. 1's least significant N bits and the others 0's (17 <= N <= 31) +ii. 1's most significant N bits and the others 0's (12 <= N <= 31) +iii. M 1's sequence of bits and trailing N 0's bits, that cannot fit into a + "MOVI Ax, simm12" instruction (1 <= M <= 16, 1 <= N <= 30) + +And also offers shortcuts for conditional branch if each of the abovementioned +operations is (not) equal to zero. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (shifted_mask_operand): + New predicate. + * config/xtensa/xtensa.md (*andsi3_const_pow2_minus_one): + New insn-and-split pattern. + (*andsi3_const_negative_pow2, *andsi3_const_shifted_mask, + *masktrue_const_pow2_minus_one, *masktrue_const_negative_pow2, + *masktrue_const_shifted_mask): Ditto. +--- + gcc/config/xtensa/predicates.md | 10 ++ + gcc/config/xtensa/xtensa.md | 179 ++++++++++++++++++++++++++++++++ + 2 files changed, 189 insertions(+) + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index e7836f0ec..367fc17f3 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -52,6 +52,16 @@ + (match_test "xtensa_mask_immediate (INTVAL (op))")) + (match_operand 0 "register_operand"))) + ++(define_predicate "shifted_mask_operand" ++ (match_code "const_int") ++{ ++ HOST_WIDE_INT mask = INTVAL (op); ++ int shift = ctz_hwi (mask); ++ ++ return IN_RANGE (shift, 1, 31) ++ && xtensa_mask_immediate ((uint32_t)mask >> shift); ++}) ++ + (define_predicate "extui_fldsz_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 1, 16)"))) +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index b34b2afb6..355fb7742 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -645,6 +645,83 @@ + (set_attr "mode" "SI") + (set_attr "length" "6")]) + ++(define_insn_and_split "*andsi3_const_pow2_minus_one" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "const_int_operand" "i")))] ++ "IN_RANGE (exact_log2 (INTVAL (operands[2]) + 1), 17, 31)" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (ashift:SI (match_dup 1) ++ (match_dup 2))) ++ (set (match_dup 0) ++ (lshiftrt:SI (match_dup 0) ++ (match_dup 2)))] ++{ ++ operands[2] = GEN_INT (32 - floor_log2 (INTVAL (operands[2]) + 1)); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && INTVAL (operands[2]) == 0x7FFFFFFF") ++ (const_int 5) ++ (const_int 6)))]) ++ ++(define_insn_and_split "*andsi3_const_negative_pow2" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "const_int_operand" "i")))] ++ "IN_RANGE (exact_log2 (-INTVAL (operands[2])), 12, 31)" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (lshiftrt:SI (match_dup 1) ++ (match_dup 2))) ++ (set (match_dup 0) ++ (ashift:SI (match_dup 0) ++ (match_dup 2)))] ++{ ++ operands[2] = GEN_INT (floor_log2 (-INTVAL (operands[2]))); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*andsi3_const_shifted_mask" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "shifted_mask_operand" "i")))] ++ "! xtensa_simm12b (INTVAL (operands[2]))" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (zero_extract:SI (match_dup 1) ++ (match_dup 3) ++ (match_dup 4))) ++ (set (match_dup 0) ++ (ashift:SI (match_dup 0) ++ (match_dup 2)))] ++{ ++ HOST_WIDE_INT mask = INTVAL (operands[2]); ++ int shift = ctz_hwi (mask); ++ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); ++ int mask_pos = shift; ++ if (BITS_BIG_ENDIAN) ++ mask_pos = (32 - (mask_size + shift)) & 0x1f; ++ operands[2] = GEN_INT (shift); ++ operands[3] = GEN_INT (mask_size); ++ operands[4] = GEN_INT (mask_pos); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && ctz_hwi (INTVAL (operands[2])) == 1") ++ (const_int 5) ++ (const_int 6)))]) ++ + (define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "=a") + (ior:SI (match_operand:SI 1 "register_operand" "%r") +@@ -1654,6 +1731,108 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_insn_and_split "*masktrue_const_pow2_minus_one" ++ [(set (pc) ++ (if_then_else (match_operator 3 "boolean_operator" ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "const_int_operand" "i")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] ++ "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31)" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 4) ++ (ashift:SI (match_dup 0) ++ (match_dup 1))) ++ (set (pc) ++ (if_then_else (match_op_dup 3 ++ [(match_dup 4) ++ (const_int 0)]) ++ (label_ref (match_dup 2)) ++ (pc)))] ++{ ++ operands[1] = GEN_INT (32 - floor_log2 (INTVAL (operands[1]) + 1)); ++ operands[4] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && INTVAL (operands[1]) == 0x7FFFFFFF") ++ (const_int 5) ++ (const_int 6)))]) ++ ++(define_insn_and_split "*masktrue_const_negative_pow2" ++ [(set (pc) ++ (if_then_else (match_operator 3 "boolean_operator" ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "const_int_operand" "i")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] ++ "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 12, 30)" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 4) ++ (lshiftrt:SI (match_dup 0) ++ (match_dup 1))) ++ (set (pc) ++ (if_then_else (match_op_dup 3 ++ [(match_dup 4) ++ (const_int 0)]) ++ (label_ref (match_dup 2)) ++ (pc)))] ++{ ++ operands[1] = GEN_INT (floor_log2 (-INTVAL (operands[1]))); ++ operands[4] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set_attr "length" "6")]) ++ ++(define_insn_and_split "*masktrue_const_shifted_mask" ++ [(set (pc) ++ (if_then_else (match_operator 4 "boolean_operator" ++ [(and:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "shifted_mask_operand" "i")) ++ (match_operand:SI 2 "const_int_operand" "i")]) ++ (label_ref (match_operand 3 "" "")) ++ (pc)))] ++ "(INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 ++ && xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 6) ++ (zero_extract:SI (match_dup 0) ++ (match_dup 5) ++ (match_dup 1))) ++ (set (pc) ++ (if_then_else (match_op_dup 4 ++ [(match_dup 6) ++ (match_dup 2)]) ++ (label_ref (match_dup 3)) ++ (pc)))] ++{ ++ HOST_WIDE_INT mask = INTVAL (operands[1]); ++ int shift = ctz_hwi (mask); ++ int mask_size = floor_log2 (((uint32_t)mask >> shift) + 1); ++ int mask_pos = shift; ++ if (BITS_BIG_ENDIAN) ++ mask_pos = (32 - (mask_size + shift)) & 0x1f; ++ operands[1] = GEN_INT (mask_pos); ++ operands[2] = GEN_INT ((uint32_t)INTVAL (operands[2]) >> shift); ++ operands[5] = GEN_INT (mask_size); ++ operands[6] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && (uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])) == 0") ++ (const_int 5) ++ (const_int 6)))]) ++ + + ;; Zero-overhead looping support. + +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0028-Document-new-mextra-l32r-costs-Xtensa-specifi.patch b/patches/gcc10.3/gcc-xtensa-0028-Document-new-mextra-l32r-costs-Xtensa-specifi.patch new file mode 100644 index 0000000..ebe9eb0 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0028-Document-new-mextra-l32r-costs-Xtensa-specifi.patch @@ -0,0 +1,44 @@ +From 7856e5d6344828b2a72aeef671a169dbd1a85a55 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:34:48 +0900 +Subject: [PATCH 23/31] xtensa: Document new -mextra-l32r-costs= + Xtensa-specific option + +gcc/ChangeLog: + * doc/invoke.texi: Document -mextra-l32r-costs= option. +--- + gcc/doc/invoke.texi | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index eabeec944..c35f51afb 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1385,7 +1385,8 @@ See RS/6000 and PowerPC Options. + -mtext-section-literals -mno-text-section-literals @gol + -mauto-litpools -mno-auto-litpools @gol + -mtarget-align -mno-target-align @gol +--mlongcalls -mno-longcalls} ++-mlongcalls -mno-longcalls @gol ++-mextra-l32r-costs=@var{cycles}} + + @emph{zSeries Options} + See S/390 and zSeries Options. +@@ -30519,6 +30520,14 @@ assembly code generated by GCC still shows direct call + instructions---look at the disassembled object code to see the actual + instructions. Note that the assembler uses an indirect call for + every cross-file call, not just those that really are out of range. ++ ++@item -mextra-l32r-costs=@var{n} ++@opindex mextra-l32r-costs ++Specify an extra cost of instruction RAM/ROM access for @code{L32R} ++instructions, in clock cycles. This affects, when optimizing for speed, ++whether loading a constant from literal pool using @code{L32R} or ++synthesizing the constant from a small one with a couple of arithmetic ++instructions. The default value is 0. + @end table + + @node zSeries Options +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0029-Add-support-for-sibling-call-optimization.patch b/patches/gcc10.3/gcc-xtensa-0029-Add-support-for-sibling-call-optimization.patch new file mode 100644 index 0000000..f5c0f78 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0029-Add-support-for-sibling-call-optimization.patch @@ -0,0 +1,354 @@ +From c985f67f0b9a35ca5f22647c326c6b43a2b237fa Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 15 Jun 2022 21:21:21 +0900 +Subject: [PATCH 24/31] xtensa: Add support for sibling call optimization + +This patch introduces support for sibling call optimization, when the Windowed +Register Option is NOT configured. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h (xtensa_prepare_expand_call, + xtensa_emit_sibcall): New prototypes. + (xtensa_expand_epilogue): Add new argument that specifies whether + or not sibling call. + * config/xtensa/xtensa.c (TARGET_FUNCTION_OK_FOR_SIBCALL): + New macro definition. + (xtensa_prepare_expand_call): New function in order to share + the common code. + (xtensa_emit_sibcall, xtensa_function_ok_for_sibcall): + New functions. + (xtensa_expand_epilogue): Add new argument sibcall_p and use it + for sibling call handling. + * config/xtensa/xtensa.md (call, call_value): + Use xtensa_prepare_expand_call. + (call_internal, call_value_internal): + Add the condition in order to be disabled if sibling call. + (sibcall, sibcall_value, sibcall_epilogue): New expansions. + (sibcall_internal, sibcall_value_internal): New insn patterns, + and split ones in order to take care of the indirect sibcalls. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/sibcalls.c: New. +--- + gcc/config/xtensa/xtensa-protos.h | 4 +- + gcc/config/xtensa/xtensa.c | 57 ++++++++++++- + gcc/config/xtensa/xtensa.md | 93 ++++++++++++++++++---- + gcc/testsuite/gcc.target/xtensa/sibcalls.c | 20 +++++ + 4 files changed, 155 insertions(+), 19 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/sibcalls.c + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index e4b2d2f06..75ed3bfb0 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -53,7 +53,9 @@ extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool); + extern void xtensa_emit_loop_end (rtx_insn *, rtx *); + extern char *xtensa_emit_branch (bool, rtx *); + extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); ++extern void xtensa_prepare_expand_call (int, rtx *); + extern char *xtensa_emit_call (int, rtx *); ++extern char *xtensa_emit_sibcall (int, rtx *); + extern bool xtensa_tls_referenced_p (rtx); + extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); + +@@ -73,7 +75,7 @@ extern int xtensa_dbx_register_number (int); + extern long compute_frame_size (poly_int64); + extern bool xtensa_use_return_instruction_p (void); + extern void xtensa_expand_prologue (void); +-extern void xtensa_expand_epilogue (void); ++extern void xtensa_expand_epilogue (bool); + extern void order_regs_for_local_alloc (void); + extern enum reg_class xtensa_regno_to_class (int regno); + extern HOST_WIDE_INT xtensa_initial_elimination_offset (int from, int to); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 8deae3d51..a714b980a 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -187,6 +187,7 @@ static bool xtensa_modes_tieable_p (machine_mode, machine_mode); + static HOST_WIDE_INT xtensa_constant_alignment (const_tree, HOST_WIDE_INT); + static HOST_WIDE_INT xtensa_starting_frame_offset (void); + static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); ++static bool xtensa_function_ok_for_sibcall (tree, tree); + + + +@@ -337,6 +338,9 @@ static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); + #undef TARGET_HAVE_SPECULATION_SAFE_VALUE + #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed + ++#undef TARGET_FUNCTION_OK_FOR_SIBCALL ++#define TARGET_FUNCTION_OK_FOR_SIBCALL xtensa_function_ok_for_sibcall ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + +@@ -2117,6 +2121,20 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) + } + + ++void ++xtensa_prepare_expand_call (int callop, rtx *operands) ++{ ++ rtx addr = XEXP (operands[callop], 0); ++ ++ if (flag_pic && SYMBOL_REF_P (addr) ++ && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) ++ addr = gen_sym_PLT (addr); ++ ++ if (!call_insn_operand (addr, VOIDmode)) ++ XEXP (operands[callop], 0) = copy_to_mode_reg (Pmode, addr); ++} ++ ++ + char * + xtensa_emit_call (int callop, rtx *operands) + { +@@ -2135,6 +2153,24 @@ xtensa_emit_call (int callop, rtx *operands) + } + + ++char * ++xtensa_emit_sibcall (int callop, rtx *operands) ++{ ++ static char result[64]; ++ rtx tgt = operands[callop]; ++ ++ if (GET_CODE (tgt) == CONST_INT) ++ sprintf (result, "j.l\t" HOST_WIDE_INT_PRINT_HEX ", a9", ++ INTVAL (tgt)); ++ else if (register_operand (tgt, VOIDmode)) ++ sprintf (result, "jx\t%%%d", callop); ++ else ++ sprintf (result, "j.l\t%%%d, a9", callop); ++ ++ return result; ++} ++ ++ + bool + xtensa_legitimate_address_p (machine_mode mode, rtx addr, bool strict) + { +@@ -3305,7 +3341,7 @@ xtensa_expand_prologue (void) + } + + void +-xtensa_expand_epilogue (void) ++xtensa_expand_epilogue (bool sibcall_p) + { + if (!TARGET_WINDOWED_ABI) + { +@@ -3339,10 +3375,13 @@ xtensa_expand_epilogue (void) + if (xtensa_call_save_reg(regno)) + { + rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); ++ rtx reg; + + offset -= UNITS_PER_WORD; +- emit_move_insn (gen_rtx_REG (SImode, regno), ++ emit_move_insn (reg = gen_rtx_REG (SImode, regno), + gen_frame_mem (SImode, x)); ++ if (regno == A0_REG && sibcall_p) ++ emit_use (reg); + } + } + +@@ -3377,7 +3416,8 @@ xtensa_expand_epilogue (void) + EH_RETURN_STACKADJ_RTX)); + } + cfun->machine->epilogue_done = true; +- emit_jump_insn (gen_return ()); ++ if (!sibcall_p) ++ emit_jump_insn (gen_return ()); + } + + bool +@@ -4893,4 +4933,15 @@ xtensa_asan_shadow_offset (void) + return HOST_WIDE_INT_UC (0x10000000); + } + ++/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */ ++static bool ++xtensa_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, tree exp ATTRIBUTE_UNUSED) ++{ ++ /* Do not allow sibcalls when windowed registers ABI is in effect. */ ++ if (TARGET_WINDOWED_ABI) ++ return false; ++ ++ return true; ++} ++ + #include "gt-xtensa.h" +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 355fb7742..2a11d1c86 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -25,6 +25,7 @@ + (A7_REG 7) + (A8_REG 8) + (A9_REG 9) ++ (A10_REG 10) + + (UNSPEC_NOP 2) + (UNSPEC_PLT 3) +@@ -2153,18 +2154,13 @@ + (match_operand 1 "" ""))] + "" + { +- rtx addr = XEXP (operands[0], 0); +- if (flag_pic && GET_CODE (addr) == SYMBOL_REF +- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) +- addr = gen_sym_PLT (addr); +- if (!call_insn_operand (addr, VOIDmode)) +- XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr); ++ xtensa_prepare_expand_call (0, operands); + }) + + (define_insn "call_internal" + [(call (mem (match_operand:SI 0 "call_insn_operand" "nir")) + (match_operand 1 "" "i"))] +- "" ++ "!SIBLING_CALL_P (insn)" + { + return xtensa_emit_call (0, operands); + } +@@ -2178,19 +2174,14 @@ + (match_operand 2 "" "")))] + "" + { +- rtx addr = XEXP (operands[1], 0); +- if (flag_pic && GET_CODE (addr) == SYMBOL_REF +- && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) +- addr = gen_sym_PLT (addr); +- if (!call_insn_operand (addr, VOIDmode)) +- XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr); ++ xtensa_prepare_expand_call (1, operands); + }) + + (define_insn "call_value_internal" + [(set (match_operand 0 "register_operand" "=a") + (call (mem (match_operand:SI 1 "call_insn_operand" "nir")) + (match_operand 2 "" "i")))] +- "" ++ "!SIBLING_CALL_P (insn)" + { + return xtensa_emit_call (1, operands); + } +@@ -2198,6 +2189,70 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_expand "sibcall" ++ [(call (match_operand 0 "memory_operand" "") ++ (match_operand 1 "" ""))] ++ "!TARGET_WINDOWED_ABI" ++{ ++ xtensa_prepare_expand_call (0, operands); ++}) ++ ++(define_insn "sibcall_internal" ++ [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nir")) ++ (match_operand 1 "" "i"))] ++ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" ++{ ++ return xtensa_emit_sibcall (0, operands); ++} ++ [(set_attr "type" "call") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ++ ++(define_split ++ [(call (mem:SI (match_operand:SI 0 "register_operand")) ++ (match_operand 1 ""))] ++ "reload_completed ++ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) ++ && IN_RANGE (REGNO (operands[0]), 12, 15)" ++ [(set (reg:SI A10_REG) ++ (match_dup 0)) ++ (call (mem:SI (reg:SI A10_REG)) ++ (match_dup 1))]) ++ ++(define_expand "sibcall_value" ++ [(set (match_operand 0 "register_operand" "") ++ (call (match_operand 1 "memory_operand" "") ++ (match_operand 2 "" "")))] ++ "!TARGET_WINDOWED_ABI" ++{ ++ xtensa_prepare_expand_call (1, operands); ++}) ++ ++(define_insn "sibcall_value_internal" ++ [(set (match_operand 0 "register_operand" "=a") ++ (call (mem:SI (match_operand:SI 1 "call_insn_operand" "nir")) ++ (match_operand 2 "" "i")))] ++ "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" ++{ ++ return xtensa_emit_sibcall (1, operands); ++} ++ [(set_attr "type" "call") ++ (set_attr "mode" "none") ++ (set_attr "length" "3")]) ++ ++(define_split ++ [(set (match_operand 0 "register_operand") ++ (call (mem:SI (match_operand:SI 1 "register_operand")) ++ (match_operand 2 "")))] ++ "reload_completed ++ && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) ++ && IN_RANGE (REGNO (operands[1]), 12, 15)" ++ [(set (reg:SI A10_REG) ++ (match_dup 1)) ++ (set (match_dup 0) ++ (call (mem:SI (reg:SI A10_REG)) ++ (match_dup 2)))]) ++ + (define_insn "entry" + [(set (reg:SI A1_REG) + (unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")] +@@ -2265,7 +2320,15 @@ + [(return)] + "" + { +- xtensa_expand_epilogue (); ++ xtensa_expand_epilogue (false); ++ DONE; ++}) ++ ++(define_expand "sibcall_epilogue" ++ [(return)] ++ "!TARGET_WINDOWED_ABI" ++{ ++ xtensa_expand_epilogue (true); + DONE; + }) + +diff --git a/gcc/testsuite/gcc.target/xtensa/sibcalls.c b/gcc/testsuite/gcc.target/xtensa/sibcalls.c +new file mode 100644 +index 000000000..d2b3fccf1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/sibcalls.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mabi=call0 -foptimize-sibling-calls" } */ ++ ++extern int foo(int); ++extern void bar(int); ++ ++int test_0(int a) { ++ return foo(a); ++} ++ ++void test_1(int a) { ++ bar(a); ++} ++ ++int test_2(int (*a)(void)) { ++ bar(0); ++ return a(); ++} ++ ++/* { dg-final { scan-assembler-not "ret" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0030-Add-some-dedicated-patterns-that-correspond-t.patch b/patches/gcc10.3/gcc-xtensa-0030-Add-some-dedicated-patterns-that-correspond-t.patch new file mode 100644 index 0000000..ad60202 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0030-Add-some-dedicated-patterns-that-correspond-t.patch @@ -0,0 +1,81 @@ +From 16878066a57f917814a8d6fe45f7f7d2eebdbbc0 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:37:54 +0900 +Subject: [PATCH 25/31] xtensa: Add some dedicated patterns that correspond to + GIMPLE canonicalizations + +This patch offers better RTL representations against straightforward +derivations from some tree optimizers' canonicalized forms. + +- rounding up to even, such as '(x + (x & 1))', is canonicalized to + '((x + 1) & -2)', but the former is one instruction less than the latter + in Xtensa ISA. +- signed greater or equal to zero as logical value '((signed)x >= 0)', + is canonicalized to '((unsigned)(x ^ -1) >> 31)', but the equivalent + '(((signed)x >> 31) + 1)' is one instruction less. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*round_up_to_even): + New insn-and-split pattern. + (*signed_ge_zero): Ditto. +--- + gcc/config/xtensa/xtensa.md | 45 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 45 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2a11d1c86..3e8e2e76f 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2709,3 +2709,48 @@ + xtensa_expand_atomic (, operands[0], operands[1], operands[2], true); + DONE; + }) ++ ++(define_insn_and_split "*round_up_to_even" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int 1)) ++ (const_int -2)))] ++ "" ++ "#" ++ "can_create_pseudo_p ()" ++ [(set (match_dup 2) ++ (and:SI (match_dup 1) ++ (const_int 1))) ++ (set (match_dup 0) ++ (plus:SI (match_dup 2) ++ (match_dup 1)))] ++{ ++ operands[2] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 5) ++ (const_int 6)))]) ++ ++(define_insn_and_split "*signed_ge_zero" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ge:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int 0)))] ++ "" ++ "#" ++ "" ++ [(set (match_dup 0) ++ (ashiftrt:SI (match_dup 1) ++ (const_int 31))) ++ (set (match_dup 0) ++ (plus:SI (match_dup 0) ++ (const_int 1)))] ++ "" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 5) ++ (const_int 6)))]) +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0031-Eliminate-unwanted-reg-reg-moves-during-DFmod.patch b/patches/gcc10.3/gcc-xtensa-0031-Eliminate-unwanted-reg-reg-moves-during-DFmod.patch new file mode 100644 index 0000000..28bb494 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0031-Eliminate-unwanted-reg-reg-moves-during-DFmod.patch @@ -0,0 +1,90 @@ +From a0f2dfa2e952111dbd85d2b2f1caaf570facce8a Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:39:49 +0900 +Subject: [PATCH 26/31] xtensa: Eliminate unwanted reg-reg moves during DFmode + input reloads + +When spilled DFmode registers are reloaded in, once loaded into a pair of +SImode regs and then copied from that regs. Such unwanted reg-reg moves +seems not to be eliminated at the "cprop_hardreg" stage, despite no problem +in output reloads. + +Luckily it is easy to resolve such inefficiencies, with the use of peephole2 +pattern. + +gcc/ChangeLog: + + * config/xtensa/predicates.md (reload_operand): + New predicate. + * config/xtensa/xtensa.md: New peephole2 pattern. +--- + gcc/config/xtensa/predicates.md | 13 +++++++++++++ + gcc/config/xtensa/xtensa.md | 31 +++++++++++++++++++++++++++++++ + 2 files changed, 44 insertions(+) + +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index 367fc17f3..c1cddb733 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -165,6 +165,19 @@ + (and (match_code "const_int") + (match_test "xtensa_mem_offset (INTVAL (op), SFmode)"))) + ++(define_predicate "reload_operand" ++ (match_code "mem") ++{ ++ const_rtx addr = XEXP (op, 0); ++ if (REG_P (addr)) ++ return REGNO (addr) == A1_REG; ++ if (GET_CODE (addr) == PLUS) ++ return REG_P (XEXP (addr, 0)) ++ && REGNO (XEXP (addr, 0)) == A1_REG ++ && CONST_INT_P (XEXP (addr, 1)); ++ return false; ++}) ++ + (define_predicate "branch_operator" + (match_code "eq,ne,lt,ge")) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 3e8e2e76f..2598c09c9 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2754,3 +2754,34 @@ + (if_then_else (match_test "TARGET_DENSITY") + (const_int 5) + (const_int 6)))]) ++ ++(define_peephole2 ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operand:SI 6 "reload_operand")) ++ (set (match_operand:SI 1 "register_operand") ++ (match_operand:SI 7 "reload_operand")) ++ (set (match_operand:SF 2 "register_operand") ++ (match_operand:SF 4 "register_operand")) ++ (set (match_operand:SF 3 "register_operand") ++ (match_operand:SF 5 "register_operand"))] ++ "REGNO (operands[0]) == REGNO (operands[4]) ++ && REGNO (operands[1]) == REGNO (operands[5]) ++ && peep2_reg_dead_p (4, operands[0]) ++ && peep2_reg_dead_p (4, operands[1])" ++ [(set (match_dup 2) ++ (match_dup 6)) ++ (set (match_dup 3) ++ (match_dup 7))] ++{ ++ uint32_t check = 0; ++ int i; ++ for (i = 0; i <= 3; ++i) ++ { ++ uint32_t mask = (uint32_t)1 << REGNO (operands[i]); ++ if (check & mask) ++ FAIL; ++ check |= mask; ++ } ++ operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0)); ++ operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0)); ++}) +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0032-Eliminate-DS-Cmode-hard-register-clobber-that.patch b/patches/gcc10.3/gcc-xtensa-0032-Eliminate-DS-Cmode-hard-register-clobber-that.patch new file mode 100644 index 0000000..7c4a869 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0032-Eliminate-DS-Cmode-hard-register-clobber-that.patch @@ -0,0 +1,99 @@ +From d6c2b11e9ce88f3b1a7ddcf9a2712b070ad4dbfb Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 14 Jun 2022 12:53:04 +0900 +Subject: [PATCH 27/31] xtensa: Eliminate [DS]Cmode hard register clobber that + is immediately followed by whole overwrite the register + +RTL expansion of substitution to [DS]Cmode hard register includes obstructive +register clobber. + +A simplest example: + + double _Complex test(double _Complex c) { + return c; + } + +will be converted to: + + (set (reg:DF 42 [ c ]) (reg:DF 2 a2)) + (set (reg:DF 43 [ c+8 ]) (reg:DF 4 a4)) + (clobber (reg:DC 2 a2)) + (set (reg:DF 2 a2) (reg:DF 42 [ c ])) + (set (reg:DF 4 a4) (reg:DF 43 [ c+8 ])) + (use (reg:DC 2 a2)) + (return) + +and then finally: + + test: + mov a8, a2 + mov a9, a3 + mov a6, a4 + mov a7, a5 + mov a2, a8 + mov a3, a9 + mov a4, a6 + mov a5, a7 + ret + +As you see, it is so ridiculous. + +This patch eliminates such clobber in order to prune away the wasted move +instructions by the optimizer: + + test: + ret + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (DSC): New split pattern and mode iterator. +--- + gcc/config/xtensa/xtensa.md | 28 ++++++++++++++++++++++++++++ + 1 file changed, 28 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 2598c09c9..124548dfe 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -87,6 +87,10 @@ + ;; This code iterator is for *shlrd and its variants. + (define_code_iterator ior_op [ior plus]) + ++;; This mode iterator allows the DC and SC patterns to be defined from ++;; the same template. ++(define_mode_iterator DSC [DC SC]) ++ + + ;; Attributes. + +@@ -2785,3 +2789,27 @@ + operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0)); + operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0)); + }) ++ ++(define_split ++ [(clobber (match_operand:DSC 0 "register_operand"))] ++ "GP_REG_P (REGNO (operands[0]))" ++ [(const_int 0)] ++{ ++ unsigned int regno = REGNO (operands[0]); ++ machine_mode inner_mode = GET_MODE_INNER (mode); ++ rtx_insn *insn; ++ rtx x; ++ if (! ((insn = next_nonnote_nondebug_insn (curr_insn)) ++ && NONJUMP_INSN_P (insn) ++ && GET_CODE (x = PATTERN (insn)) == SET ++ && REG_P (x = XEXP (x, 0)) ++ && GET_MODE (x) == inner_mode ++ && REGNO (x) == regno ++ && (insn = next_nonnote_nondebug_insn (insn)) ++ && NONJUMP_INSN_P (insn) ++ && GET_CODE (x = PATTERN (insn)) == SET ++ && REG_P (x = XEXP (x, 0)) ++ && GET_MODE (x) == inner_mode ++ && REGNO (x) == regno + REG_NREGS (operands[0]) / 2)) ++ FAIL; ++}) +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0033-Defer-storing-integer-constants-into-litpool-.patch b/patches/gcc10.3/gcc-xtensa-0033-Defer-storing-integer-constants-into-litpool-.patch new file mode 100644 index 0000000..6007b49 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0033-Defer-storing-integer-constants-into-litpool-.patch @@ -0,0 +1,111 @@ +From e37c151ca3beacb7f4f116a94c9c80223b0c6fbf Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 17 Jun 2022 22:47:49 +0900 +Subject: [PATCH 28/31] xtensa: Defer storing integer constants into litpool + until reload + +Storing integer constants into litpool in the early stage of compilation +hinders some integer optimizations. In fact, such integer constants are +not subject to the constant folding process. + +For example: + + extern unsigned short value; + extern void foo(void); + void test(void) { + if (value == 30001) + foo(); + } + + .literal_position + .literal .LC0, value + .literal .LC1, 30001 + test: + l32r a3, .LC0 + l32r a2, .LC1 + l16ui a3, a3, 0 + extui a2, a2, 0, 16 // runtime zero-extension despite constant + bne a3, a2, .L1 + j.l foo, a9 + .L1: + ret.n + +This patch defers the placement of integer constants into litpool until +the start of reload: + + .literal_position + .literal .LC0, value + .literal .LC1, 30001 + test: + l32r a3, .LC0 + l32r a2, .LC1 + l16ui a3, a3, 0 + bne a3, a2, .L1 + j.l foo, a9 + .L1: + ret.n + +gcc/ChangeLog: + + * config/xtensa/constraints.md (Y): + Change to include integer constants until reload begins. + * config/xtensa/predicates.md (move_operand): Ditto. + * config/xtensa/xtensa.c (xtensa_emit_move_sequence): + Change to allow storing integer constants into litpool only after + reload begins. +--- + gcc/config/xtensa/constraints.md | 6 ++++-- + gcc/config/xtensa/predicates.md | 5 +++-- + gcc/config/xtensa/xtensa.c | 3 ++- + 3 files changed, 9 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md +index 9a8caab4f..13b3daafc 100644 +--- a/gcc/config/xtensa/constraints.md ++++ b/gcc/config/xtensa/constraints.md +@@ -113,8 +113,10 @@ + + (define_constraint "Y" + "A constant that can be used in relaxed MOVI instructions." +- (and (match_code "const_int,const_double,const,symbol_ref,label_ref") +- (match_test "TARGET_AUTO_LITPOOLS"))) ++ (ior (and (match_code "const_int,const_double,const,symbol_ref,label_ref") ++ (match_test "TARGET_AUTO_LITPOOLS")) ++ (and (match_code "const_int") ++ (match_test "can_create_pseudo_p ()")))) + + ;; Memory constraints. Do not use define_memory_constraint here. Doing so + ;; causes reload to force some constants into the constant pool, but since +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index c1cddb733..633cc6264 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -147,8 +147,9 @@ + (match_test "!constantpool_mem_p (op) + || GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0"))) + (ior (and (match_code "const_int") +- (match_test "GET_MODE_CLASS (mode) == MODE_INT +- && xtensa_simm12b (INTVAL (op))")) ++ (match_test "(GET_MODE_CLASS (mode) == MODE_INT ++ && xtensa_simm12b (INTVAL (op))) ++ || can_create_pseudo_p ()")) + (and (match_code "const_int,const_double,const,symbol_ref,label_ref") + (match_test "(TARGET_CONST16 || TARGET_AUTO_LITPOOLS) + && CONSTANT_P (op) +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index a714b980a..1d64e2c76 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1173,7 +1173,8 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) + return 1; + } + +- if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16) ++ if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16 ++ && ! (CONST_INT_P (src) && can_create_pseudo_p ())) + { + src = force_const_mem (SImode, src); + operands[1] = src; +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0034-Apply-a-few-minor-fixes.patch b/patches/gcc10.3/gcc-xtensa-0034-Apply-a-few-minor-fixes.patch new file mode 100644 index 0000000..5ecac42 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0034-Apply-a-few-minor-fixes.patch @@ -0,0 +1,129 @@ +From dfaefed18297218392071039325baabac59d5c43 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 19 Jun 2022 22:32:45 +0900 +Subject: [PATCH 29/31] xtensa: Apply a few minor fixes + +No functional changes. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_emit_move_sequence): + Use can_create_pseudo_p(), instead of using individual + reload_in_progress and reload_completed. + (xtensa_expand_block_set_small_loop): Use xtensa_simm8x256(), + the existing predicate function. + (xtensa_is_insn_L32R_p, gen_int_relational, xtensa_emit_sibcall): + Use the standard RTX code predicate macros such as MEM_P, + SYMBOL_REF_P and/or CONST_INT_P. + * config/xtensa/xtensa.md: Avoid using numeric literals to determine + if callee-saved register, at the split patterns for indirect sibcall + fixups. +--- + gcc/config/xtensa/xtensa.c | 16 ++++++++-------- + gcc/config/xtensa/xtensa.md | 8 ++++---- + 2 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 1d64e2c76..595c5f96f 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -743,7 +743,7 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + } + + /* See if we need to invert the result. */ +- invert = ((GET_CODE (cmp1) == CONST_INT) ++ invert = (CONST_INT_P (cmp1) + ? p_info->invert_const + : p_info->invert_reg); + +@@ -1200,7 +1200,7 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) + } + } + +- if (!(reload_in_progress | reload_completed) ++ if (can_create_pseudo_p () + && !xtensa_valid_move (mode, operands)) + operands[1] = force_reg (mode, operands[1]); + +@@ -1603,7 +1603,7 @@ xtensa_expand_block_set_small_loop (rtx *operands) + thus limited to only offset to the end address for ADDI/ADDMI + instruction. */ + if (align == 4 +- && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) ++ && ! (bytes <= 127 || xtensa_simm8x256 (bytes))) + return 0; + + /* If no 4-byte aligned, loop count should be treated as the +@@ -2160,7 +2160,7 @@ xtensa_emit_sibcall (int callop, rtx *operands) + static char result[64]; + rtx tgt = operands[callop]; + +- if (GET_CODE (tgt) == CONST_INT) ++ if (CONST_INT_P (tgt)) + sprintf (result, "j.l\t" HOST_WIDE_INT_PRINT_HEX ", a9", + INTVAL (tgt)); + else if (register_operand (tgt, VOIDmode)) +@@ -4318,17 +4318,17 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + } + + static bool +-xtensa_is_insn_L32R_p(const rtx_insn *insn) ++xtensa_is_insn_L32R_p (const rtx_insn *insn) + { + rtx x = PATTERN (insn); + + if (GET_CODE (x) == SET) + { +- x = XEXP (x, 1); +- if (GET_CODE (x) == MEM) ++ x = SET_SRC (x); ++ if (MEM_P (x)) + { + x = XEXP (x, 0); +- return (GET_CODE (x) == SYMBOL_REF || CONST_INT_P (x)) ++ return (SYMBOL_REF_P (x) || CONST_INT_P (x)) + && CONSTANT_POOL_ADDRESS_P (x); + } + } +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 124548dfe..6f51a5357 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1251,14 +1251,14 @@ + int i = 0; + rtx x = XEXP (operands[1], 0); + long l[2]; +- if (GET_CODE (x) == SYMBOL_REF ++ if (SYMBOL_REF_P (x) + && CONSTANT_POOL_ADDRESS_P (x)) + x = get_pool_constant (x); + else if (GET_CODE (x) == CONST) + { + x = XEXP (x, 0); + gcc_assert (GET_CODE (x) == PLUS +- && GET_CODE (XEXP (x, 0)) == SYMBOL_REF ++ && SYMBOL_REF_P (XEXP (x, 0)) + && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) + && CONST_INT_P (XEXP (x, 1))); + i = INTVAL (XEXP (x, 1)); +@@ -2217,7 +2217,7 @@ + (match_operand 1 ""))] + "reload_completed + && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) +- && IN_RANGE (REGNO (operands[0]), 12, 15)" ++ && ! call_used_or_fixed_reg_p (REGNO (operands[0]))" + [(set (reg:SI A10_REG) + (match_dup 0)) + (call (mem:SI (reg:SI A10_REG)) +@@ -2250,7 +2250,7 @@ + (match_operand 2 "")))] + "reload_completed + && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) +- && IN_RANGE (REGNO (operands[1]), 12, 15)" ++ && ! call_used_or_fixed_reg_p (REGNO (operands[1]))" + [(set (reg:SI A10_REG) + (match_dup 1)) + (set (match_dup 0) +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0035-Fix-RTL-insn-cost-estimation-about-relaxed-MO.patch b/patches/gcc10.3/gcc-xtensa-0035-Fix-RTL-insn-cost-estimation-about-relaxed-MO.patch new file mode 100644 index 0000000..d65c44d --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0035-Fix-RTL-insn-cost-estimation-about-relaxed-MO.patch @@ -0,0 +1,56 @@ +From 48c657f23a61a41a46842b25bce4f287a56223a2 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Mon, 20 Jun 2022 01:56:16 +0900 +Subject: [PATCH 30/31] xtensa: Fix RTL insn cost estimation about relaxed MOVI + instructions + +These instructions will all be converted to L32R ones with litpool entries +by the assembler. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_is_insn_L32R_p): + Consider relaxed MOVI instructions as L32R. +--- + gcc/config/xtensa/xtensa.c | 22 ++++++++++++++-------- + 1 file changed, 14 insertions(+), 8 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 595c5f96f..b92ec9caa 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4322,17 +4322,23 @@ xtensa_is_insn_L32R_p (const rtx_insn *insn) + { + rtx x = PATTERN (insn); + +- if (GET_CODE (x) == SET) ++ if (GET_CODE (x) != SET) ++ return false; ++ ++ x = XEXP (x, 1); ++ if (MEM_P (x)) + { +- x = SET_SRC (x); +- if (MEM_P (x)) +- { +- x = XEXP (x, 0); +- return (SYMBOL_REF_P (x) || CONST_INT_P (x)) +- && CONSTANT_POOL_ADDRESS_P (x); +- } ++ x = XEXP (x, 0); ++ return (SYMBOL_REF_P (x) || CONST_INT_P (x)) ++ && CONSTANT_POOL_ADDRESS_P (x); + } + ++ /* relaxed MOVI instructions, that will be converted to L32R by the ++ assembler. */ ++ if (CONST_INT_P (x) ++ && ! xtensa_simm12b (INTVAL (x))) ++ return true; ++ + return false; + } + +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0036-Fix-buffer-overflow.patch b/patches/gcc10.3/gcc-xtensa-0036-Fix-buffer-overflow.patch new file mode 100644 index 0000000..35f9f10 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0036-Fix-buffer-overflow.patch @@ -0,0 +1,33 @@ +From 75c341c7de5c6f325d6ded7bd91d77793fe358d5 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 22 Jun 2022 04:04:45 +0900 +Subject: [PATCH 31/31] xtensa: Fix buffer overflow + +Fortify buffer overflow message reported. +(see https://github.com/earlephilhower/esp-quick-toolchain/issues/36) + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (bswapsi2_internal): + Enlarge the buffer that is obviously smaller than the template + string given to sprintf(). +--- + gcc/config/xtensa/xtensa.md | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 6f51a5357..81b016859 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -536,7 +536,7 @@ + { + rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn); + const char *init = "ssai\t8\;"; +- static char result[64]; ++ static char result[128]; + if (prev_insn && NONJUMP_INSN_P (prev_insn)) + { + rtx x = PATTERN (prev_insn); +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0037-Optimize-integer-constant-addition-that-is-be.patch b/patches/gcc10.3/gcc-xtensa-0037-Optimize-integer-constant-addition-that-is-be.patch new file mode 100644 index 0000000..0ea6d48 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0037-Optimize-integer-constant-addition-that-is-be.patch @@ -0,0 +1,95 @@ +From 9308911796a46bd689bbcc1cedef1b63ae9b871e Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 26 Jun 2022 14:07:56 +0900 +Subject: [PATCH] xtensa: Optimize integer constant addition that is + between -32896 and 32639 + +Such constants are often subject to the constant synthesis: + + int test(int a) { + return a - 31999; + } + + test: + movi a3, 1 + addmi a3, a3, -0x7d00 + add a2, a2, a3 + ret + +This patch optimizes such case as follows: + + test: + addi a2, a2, 1 + addmi a2, a2, -0x7d00 + ret + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: + Suppress unnecessary emitting nop insn in the split patterns for + integer/FP constant synthesis, and add new peephole2 pattern that + folds such synthesized additions. +--- + gcc/config/xtensa/xtensa.md | 35 +++++++++++++++++++++++++++++++++++ + 1 file changed, 35 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 81b016859..b697e16db 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1036,6 +1036,7 @@ + FAIL; + if (! xtensa_constantsynth (operands[0], INTVAL (x))) + emit_move_insn (operands[0], x); ++ DONE; + }) + + ;; 16-bit Integer moves +@@ -1277,6 +1278,7 @@ + x = gen_rtx_REG (SImode, REGNO (operands[0])); + if (! xtensa_constantsynth (x, l[i])) + emit_move_insn (x, GEN_INT (l[i])); ++ DONE; + }) + + ;; 64-bit floating point moves +@@ -2813,3 +2815,36 @@ + && REGNO (x) == regno + REG_NREGS (operands[0]) / 2)) + FAIL; + }) ++ ++(define_peephole2 ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operand:SI 1 "const_int_operand")) ++ (set (match_dup 0) ++ (plus:SI (match_dup 0) ++ (match_operand:SI 2 "const_int_operand"))) ++ (set (match_operand:SI 3 "register_operand") ++ (plus:SI (match_operand:SI 4 "register_operand") ++ (match_dup 0)))] ++ "IN_RANGE (INTVAL (operands[1]) + INTVAL (operands[2]), ++ (-128 - 32768), (127 + 32512)) ++ && REGNO (operands[0]) != REGNO (operands[3]) ++ && REGNO (operands[0]) != REGNO (operands[4]) ++ && peep2_reg_dead_p (3, operands[0])" ++ [(set (match_dup 3) ++ (plus:SI (match_dup 4) ++ (match_dup 1))) ++ (set (match_dup 3) ++ (plus:SI (match_dup 3) ++ (match_dup 2)))] ++{ ++ HOST_WIDE_INT value = INTVAL (operands[1]) + INTVAL (operands[2]); ++ int imm0, imm1; ++ value += 128; ++ if (value > 32512) ++ imm1 = 32512; ++ else ++ imm1 = value & ~255; ++ imm0 = value - imm1 - 128; ++ operands[1] = GEN_INT (imm0); ++ operands[2] = GEN_INT (imm1); ++}) +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0038-Minor-fix-for-FP-constant-synthesis.patch b/patches/gcc10.3/gcc-xtensa-0038-Minor-fix-for-FP-constant-synthesis.patch new file mode 100644 index 0000000..8fc23d8 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0038-Minor-fix-for-FP-constant-synthesis.patch @@ -0,0 +1,92 @@ +From 7bed998154345cb072cd425b5d61734d3e0bac5d Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 1 Jul 2022 13:39:34 +0900 +Subject: [PATCH] xtensa: Minor fix for FP constant synthesis + +This patch fixes an non-fatal issue about negative constant values derived +from FP constant synthesis on hosts whose 'long' is wider than 'int32_t'. + +And also replaces the dedicated code in FP constant synthesis split +pattern with the appropriate existing function call. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: + In FP constant synthesis split pattern, subcontract to + avoid_constant_pool_reference() as in the case of integer, + because it can handle well too. And cast to int32_t before + calling xtensa_constantsynth() in order to ignore upper 32-bit. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/constsynth_double.c: + Modify in order to catch the issue. +--- + gcc/config/xtensa/xtensa.md | 35 +++++-------------- + .../gcc.target/xtensa/constsynth_double.c | 2 +- + 2 files changed, 9 insertions(+), 28 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index b697e16db..6ef84b4f2 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1249,35 +1249,16 @@ + "! optimize_debug && reload_completed" + [(const_int 0)] + { +- int i = 0; +- rtx x = XEXP (operands[1], 0); +- long l[2]; +- if (SYMBOL_REF_P (x) +- && CONSTANT_POOL_ADDRESS_P (x)) +- x = get_pool_constant (x); +- else if (GET_CODE (x) == CONST) +- { +- x = XEXP (x, 0); +- gcc_assert (GET_CODE (x) == PLUS +- && SYMBOL_REF_P (XEXP (x, 0)) +- && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) +- && CONST_INT_P (XEXP (x, 1))); +- i = INTVAL (XEXP (x, 1)); +- gcc_assert (i == 0 || i == 4); +- i /= 4; +- x = get_pool_constant (XEXP (x, 0)); +- } +- else +- gcc_unreachable (); +- if (GET_MODE (x) == SFmode) +- REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l[0]); +- else if (GET_MODE (x) == DFmode) +- REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); +- else ++ rtx x = avoid_constant_pool_reference (operands[1]); ++ long l; ++ HOST_WIDE_INT value; ++ if (! CONST_DOUBLE_P (x) || GET_MODE (x) != SFmode) + FAIL; ++ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); + x = gen_rtx_REG (SImode, REGNO (operands[0])); +- if (! xtensa_constantsynth (x, l[i])) +- emit_move_insn (x, GEN_INT (l[i])); ++ value = (int32_t)l; ++ if (! xtensa_constantsynth (x, value)) ++ emit_move_insn (x, GEN_INT (value)); + DONE; + }) + +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +index 890ca5047..5fba6a986 100644 +--- a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c +@@ -5,7 +5,7 @@ void test(unsigned int count, double array[]) + { + unsigned int i; + for (i = 0; i < count; ++i) +- array[i] = 1.0; ++ array[i] = 8.988474246316506e+307; + } + + /* { dg-final { scan-assembler-not "l32r" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0039-constantsynth-Make-try-to-find-shorter-instru.patch b/patches/gcc10.3/gcc-xtensa-0039-constantsynth-Make-try-to-find-shorter-instru.patch new file mode 100644 index 0000000..fcb3c72 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0039-constantsynth-Make-try-to-find-shorter-instru.patch @@ -0,0 +1,132 @@ +From afcf727f9c4174b104b594cbd14cba9c57de71d1 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 15 Jul 2022 08:46:55 +0900 +Subject: [PATCH] xtensa: constantsynth: Make try to find shorter + instruction + +This patch allows the constant synthesis to choose shorter instruction +if possible. + + /* example */ + int test(void) { + return 128 << 8; + } + + ;; before + test: + movi a2, 0x100 + addmi a2, a2, 0x7f00 + ret.n + + ;; after + test: + movi.n a2, 1 + slli a2, a2, 15 + ret.n + +When the Code Density Option is configured, the latter is one byte smaller +than the former. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_emit_constantsynth): Remove. + (xtensa_constantsynth_2insn): Change to try all three synthetic + methods and to use the one that fits the immediate value of + the seed into a Narrow Move Immediate instruction "MOVI.N" + when the Code Density Option is configured. +--- + gcc/config/xtensa/xtensa.c | 58 +++++++++++++++++++------------------- + 1 file changed, 29 insertions(+), 29 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index b92ec9caa..a5330e52b 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1026,35 +1026,35 @@ xtensa_split_operand_pair (rtx operands[4], machine_mode mode) + load-immediate / arithmetic ones, instead of a L32R instruction + (plus a constant in litpool). */ + +-static void +-xtensa_emit_constantsynth (rtx dst, enum rtx_code code, +- HOST_WIDE_INT imm0, HOST_WIDE_INT imm1, +- rtx (*gen_op)(rtx, HOST_WIDE_INT), +- HOST_WIDE_INT imm2) +-{ +- gcc_assert (REG_P (dst)); +- emit_move_insn (dst, GEN_INT (imm0)); +- emit_move_insn (dst, gen_rtx_fmt_ee (code, SImode, +- dst, GEN_INT (imm1))); +- if (gen_op) +- emit_move_insn (dst, gen_op (dst, imm2)); +-} +- + static int + xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, + rtx (*gen_op)(rtx, HOST_WIDE_INT), + HOST_WIDE_INT op_imm) + { +- int shift = exact_log2 (srcval + 1); ++ HOST_WIDE_INT imm = INT_MAX; ++ rtx x = NULL_RTX; ++ int shift; + ++ gcc_assert (REG_P (dst)); ++ ++ shift = exact_log2 (srcval + 1); + if (IN_RANGE (shift, 1, 31)) + { +- xtensa_emit_constantsynth (dst, LSHIFTRT, -1, 32 - shift, +- gen_op, op_imm); +- return 1; ++ imm = -1; ++ x = gen_lshrsi3 (dst, dst, GEN_INT (32 - shift)); + } + +- if (IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) ++ ++ shift = ctz_hwi (srcval); ++ if ((!x || (TARGET_DENSITY && ! IN_RANGE (imm, -32, 95))) ++ && xtensa_simm12b (srcval >> shift)) ++ { ++ imm = srcval >> shift; ++ x = gen_ashlsi3 (dst, dst, GEN_INT (shift)); ++ } ++ ++ if ((!x || (TARGET_DENSITY && ! IN_RANGE (imm, -32, 95))) ++ && IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512))) + { + HOST_WIDE_INT imm0, imm1; + +@@ -1067,19 +1067,19 @@ xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval, + imm0 = srcval - imm1; + if (TARGET_DENSITY && imm1 < 32512 && IN_RANGE (imm0, 224, 255)) + imm0 -= 256, imm1 += 256; +- xtensa_emit_constantsynth (dst, PLUS, imm0, imm1, gen_op, op_imm); +- return 1; ++ imm = imm0; ++ x = gen_addsi3 (dst, dst, GEN_INT (imm1)); + } + +- shift = ctz_hwi (srcval); +- if (xtensa_simm12b (srcval >> shift)) +- { +- xtensa_emit_constantsynth (dst, ASHIFT, srcval >> shift, shift, +- gen_op, op_imm); +- return 1; +- } ++ if (!x) ++ return 0; + +- return 0; ++ emit_move_insn (dst, GEN_INT (imm)); ++ emit_insn (x); ++ if (gen_op) ++ emit_move_insn (dst, gen_op (dst, op_imm)); ++ ++ return 1; + } + + static rtx +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0040-Optimize-bitwise-AND-with-imm1-followed-by-br.patch b/patches/gcc10.3/gcc-xtensa-0040-Optimize-bitwise-AND-with-imm1-followed-by-br.patch new file mode 100644 index 0000000..acf6d99 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0040-Optimize-bitwise-AND-with-imm1-followed-by-br.patch @@ -0,0 +1,177 @@ +From 5776497b68fcce6bf31835cf0a4d693e336bb2ca Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 14 Jul 2022 20:47:46 +0900 +Subject: [PATCH] xtensa: Optimize "bitwise AND with imm1" followed by + "branch if (not) equal to imm2" + +This patch enhances the effectiveness of the previously posted one: +"xtensa: Optimize bitwise AND operation with some specific forms of constants". + + /* example */ + extern void foo(int); + void test(int a) { + if ((a & (-1U << 8)) == (128 << 8)) /* 0 or one of "b4const" */ + foo(a); + } + + ;; before + .global test + test: + movi a3, -0x100 + movi.n a4, 1 + and a3, a2, a3 + slli a4, a4, 15 + bne a3, a4, .L3 + j.l foo, a9 + .L1: + ret.n + + ;; after + .global test + test: + srli a3, a2, 8 + bnei a3, 128, .L1 + j.l foo, a9 + .L1: + ret.n + +gcc/ChangeLog: + + * config/xtensa/xtensa.md + (*masktrue_const_pow2_minus_one, *masktrue_const_negative_pow2, + *masktrue_const_shifted_mask): If the immediate for bitwise AND is + represented as '-(1 << N)', decrease the lower bound of N from 12 + to 1. And the other immediate for conditional branch is now no + longer limited to zero, but also one of some positive integers. + Finally, remove the checks of some conditions, because the comparison + expressions that don't satisfy such checks are determined as + compile-time constants and thus will be optimized away before + RTL expansion. +--- + gcc/config/xtensa/xtensa.md | 73 ++++++++++++++++++++++--------------- + 1 file changed, 44 insertions(+), 29 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 6ef84b4f2..ca8b3913d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1721,63 +1721,78 @@ + + (define_insn_and_split "*masktrue_const_pow2_minus_one" + [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" ++ (if_then_else (match_operator 4 "boolean_operator" + [(and:SI (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "const_int_operand" "i")) +- (const_int 0)]) +- (label_ref (match_operand 2 "" "")) ++ (match_operand:SI 2 "const_int_operand" "i")]) ++ (label_ref (match_operand 3 "" "")) + (pc)))] +- "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31)" ++ "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31) ++ /* && (~INTVAL (operands[1]) & INTVAL (operands[2])) == 0 // can be omitted */ ++ && xtensa_b4const_or_zero (INTVAL (operands[2]) << (32 - floor_log2 (INTVAL (operands[1]) + 1)))" + "#" + "&& can_create_pseudo_p ()" +- [(set (match_dup 4) ++ [(set (match_dup 5) + (ashift:SI (match_dup 0) + (match_dup 1))) + (set (pc) +- (if_then_else (match_op_dup 3 +- [(match_dup 4) +- (const_int 0)]) +- (label_ref (match_dup 2)) ++ (if_then_else (match_op_dup 4 ++ [(match_dup 5) ++ (match_dup 2)]) ++ (label_ref (match_dup 3)) + (pc)))] + { +- operands[1] = GEN_INT (32 - floor_log2 (INTVAL (operands[1]) + 1)); +- operands[4] = gen_reg_rtx (SImode); ++ int shift = 32 - floor_log2 (INTVAL (operands[1]) + 1); ++ operands[1] = GEN_INT (shift); ++ operands[2] = GEN_INT (INTVAL (operands[2]) << shift); ++ operands[5] = gen_reg_rtx (SImode); + } + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set (attr "length") +- (if_then_else (match_test "TARGET_DENSITY +- && INTVAL (operands[1]) == 0x7FFFFFFF") +- (const_int 5) +- (const_int 6)))]) ++ (if_then_else (match_test "(TARGET_DENSITY && INTVAL (operands[1]) == 0x7FFFFFFF) ++ && INTVAL (operands[2]) == 0") ++ (const_int 4) ++ (if_then_else (match_test "TARGET_DENSITY ++ && (INTVAL (operands[1]) == 0x7FFFFFFF ++ || INTVAL (operands[2]) == 0)") ++ (const_int 5) ++ (const_int 6))))]) + + (define_insn_and_split "*masktrue_const_negative_pow2" + [(set (pc) +- (if_then_else (match_operator 3 "boolean_operator" ++ (if_then_else (match_operator 4 "boolean_operator" + [(and:SI (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "const_int_operand" "i")) +- (const_int 0)]) +- (label_ref (match_operand 2 "" "")) ++ (match_operand:SI 2 "const_int_operand" "i")]) ++ (label_ref (match_operand 3 "" "")) + (pc)))] +- "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 12, 30)" ++ "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 1, 30) ++ /* && (~INTVAL (operands[1]) & INTVAL (operands[2])) == 0 // can be omitted */ ++ && xtensa_b4const_or_zero (INTVAL (operands[2]) >> floor_log2 (-INTVAL (operands[1])))" + "#" + "&& can_create_pseudo_p ()" +- [(set (match_dup 4) ++ [(set (match_dup 5) + (lshiftrt:SI (match_dup 0) + (match_dup 1))) + (set (pc) +- (if_then_else (match_op_dup 3 +- [(match_dup 4) +- (const_int 0)]) +- (label_ref (match_dup 2)) ++ (if_then_else (match_op_dup 4 ++ [(match_dup 5) ++ (match_dup 2)]) ++ (label_ref (match_dup 3)) + (pc)))] + { +- operands[1] = GEN_INT (floor_log2 (-INTVAL (operands[1]))); +- operands[4] = gen_reg_rtx (SImode); ++ int shift = floor_log2 (-INTVAL (operands[1])); ++ operands[1] = GEN_INT (shift); ++ operands[2] = GEN_INT (INTVAL (operands[2]) >> shift); ++ operands[5] = gen_reg_rtx (SImode); + } + [(set_attr "type" "jump") + (set_attr "mode" "none") +- (set_attr "length" "6")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY && INTVAL (operands[2]) == 0") ++ (const_int 5) ++ (const_int 6)))]) + + (define_insn_and_split "*masktrue_const_shifted_mask" + [(set (pc) +@@ -1787,8 +1802,8 @@ + (match_operand:SI 2 "const_int_operand" "i")]) + (label_ref (match_operand 3 "" "")) + (pc)))] +- "(INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 +- && xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" ++ "/* (INTVAL (operands[2]) & ((1 << ctz_hwi (INTVAL (operands[1]))) - 1)) == 0 // can be omitted ++ && */ xtensa_b4const_or_zero ((uint32_t)INTVAL (operands[2]) >> ctz_hwi (INTVAL (operands[1])))" + "#" + "&& can_create_pseudo_p ()" + [(set (match_dup 6) +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0041-Correct-the-relative-RTX-cost-that-correspond.patch b/patches/gcc10.3/gcc-xtensa-0041-Correct-the-relative-RTX-cost-that-correspond.patch new file mode 100644 index 0000000..ec12c18 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0041-Correct-the-relative-RTX-cost-that-correspond.patch @@ -0,0 +1,167 @@ +From 7435ec0392c1f36bf3740c3a9748e7149c0c153e Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sat, 16 Jul 2022 14:44:02 +0900 +Subject: [PATCH] xtensa: Correct the relative RTX cost that corresponds to the + Move Immediate "MOVI" instruction + +This patch corrects the overestimation of the relative cost of +'(set (reg) (const_int N))' where N fits into the instruction itself. + +In fact, such overestimation confuses the RTL loop invariant motion pass. +As a result, it brings almost no negative impact from the speed point of +view, but addtiional reg-reg move instructions and register allocation +pressure about the size. + + /* example, optimized for size */ + extern int foo(void); + extern int array[16]; + void test_0(void) { + unsigned int i; + for (i = 0; i < sizeof(array)/sizeof(*array); ++i) + array[i] = 1024; + } + void test_1(void) { + unsigned int i; + for (i = 0; i < sizeof(array)/sizeof(*array); ++i) + array[i] = array[i] ? 1024 : 0; + } + void test_2(void) { + unsigned int i; + for (i = 0; i < sizeof(array)/sizeof(*array); ++i) + array[i] = foo() ? 0 : 1024; + } + + ;; before + .literal_position + .literal .LC0, array + test_0: + l32r a3, .LC0 + movi.n a2, 0 + movi a4, 0x400 // OK + .L2: + s32i.n a4, a3, 0 + addi.n a2, a2, 1 + addi.n a3, a3, 4 + bnei a2, 16, .L2 + ret.n + .literal_position + .literal .LC1, array + test_1: + l32r a2, .LC1 + movi.n a3, 0 + movi a5, 0x400 // NG + .L6: + l32i.n a4, a2, 0 + beqz.n a4, .L5 + mov.n a4, a5 // should be "movi a4, 0x400" + .L5: + s32i.n a4, a2, 0 + addi.n a3, a3, 1 + addi.n a2, a2, 4 + bnei a3, 16, .L6 + ret.n + .literal_position + .literal .LC2, array + test_2: + addi sp, sp, -32 + s32i.n a12, sp, 24 + l32r a12, .LC2 + s32i.n a13, sp, 20 + s32i.n a14, sp, 16 + s32i.n a15, sp, 12 + s32i.n a0, sp, 28 + addi a13, a12, 64 + movi.n a15, 0 // NG + movi a14, 0x400 // and wastes callee-saved registers (only 4) + .L11: + call0 foo + mov.n a3, a14 // should be "movi a3, 0x400" + movnez a3, a15, a2 + s32i.n a3, a12, 0 + addi.n a12, a12, 4 + bne a12, a13, .L11 + l32i.n a0, sp, 28 + l32i.n a12, sp, 24 + l32i.n a13, sp, 20 + l32i.n a14, sp, 16 + l32i.n a15, sp, 12 + addi sp, sp, 32 + ret.n + + ;; after + .literal_position + .literal .LC0, array + test_0: + l32r a3, .LC0 + movi.n a2, 0 + movi a4, 0x400 // OK + .L2: + s32i.n a4, a3, 0 + addi.n a2, a2, 1 + addi.n a3, a3, 4 + bnei a2, 16, .L2 + ret.n + .literal_position + .literal .LC1, array + test_1: + l32r a2, .LC1 + movi.n a3, 0 + .L6: + l32i.n a4, a2, 0 + beqz.n a4, .L5 + movi a4, 0x400 // OK + .L5: + s32i.n a4, a2, 0 + addi.n a3, a3, 1 + addi.n a2, a2, 4 + bnei a3, 16, .L6 + ret.n + .literal_position + .literal .LC2, array + test_2: + addi sp, sp, -16 + s32i.n a12, sp, 8 + l32r a12, .LC2 + s32i.n a13, sp, 4 + s32i.n a0, sp, 12 + addi a13, a12, 64 + .L11: + call0 foo + movi.n a3, 0 // OK + movi a4, 0x400 // and less register allocation pressure + moveqz a3, a4, a2 + s32i.n a3, a12, 0 + addi.n a12, a12, 4 + bne a12, a13, .L11 + l32i.n a0, sp, 12 + l32i.n a12, sp, 8 + l32i.n a13, sp, 4 + addi sp, sp, 16 + ret.n + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_rtx_costs): + Change the relative cost of '(set (reg) (const_int N))' where + N fits into signed 12-bit from 4 to 0 if optimizing for size. + And use the appropriate macro instead of the bare number 4. +--- + gcc/config/xtensa/xtensa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index a5330e52b..bd3489bfe 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4109,7 +4109,7 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + case SET: + if (xtensa_simm12b (INTVAL (x))) + { +- *total = 4; ++ *total = speed ? COSTS_N_INSNS (1) : 0; + return true; + } + break; +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0042-Optimize-bitwise-AND-NOT-with-imm-followed-by.patch b/patches/gcc10.3/gcc-xtensa-0042-Optimize-bitwise-AND-NOT-with-imm-followed-by.patch new file mode 100644 index 0000000..f020eaa --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0042-Optimize-bitwise-AND-NOT-with-imm-followed-by.patch @@ -0,0 +1,179 @@ +From f4d76407c8c33229f9b1d7b81e713ed10a5d408b Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 21 Jul 2022 16:10:47 +0900 +Subject: [PATCH] xtensa: Optimize "bitwise AND NOT with imm" followed by + "branch if (not) equal to zero" + +The RTL combiner will transform "if ((x & C) == C) goto label;" +into "if ((~x & C) == 0) goto label;" and will try to match it with +the insn patterns. + + /* example */ + void test_0(int a) { + if ((char)a == 255) + foo(); + } + void test_1(int a) { + if ((unsigned short)a == 0xFFFF) + foo(); + } + void test_2(int a) { + if ((a & 0x00003F80) != 0x00003F80) + foo(); + } + + ;; before + test_0: + extui a2, a2, 0, 8 + movi a3, 0xff + bne a2, a3, .L1 + j.l foo, a9 + .L1: + ret.n + test_1: + movi.n a3, -1 + extui a2, a2, 0, 16 + extui a3, a3, 16, 16 + bne a2, a3, .L3 + j.l foo, a9 + .L3: + ret.n + test_2: + movi a3, 0x80 + extui a2, a2, 7, 7 + addmi a3, a3, 0x3f00 + slli a2, a2, 7 + beq a2, a3, .L5 + j.l foo, a9 + .L5: + ret.n + + ;; after + test_0: + movi a3, 0xff + bnall a2, a3, .L1 + j.l foo, a9 + .L1: + ret.n + test_1: + movi.n a3, -1 + extui a3, a3, 16, 16 + bnall a2, a3, .L3 + j.l foo, a9 + .L3: + ret.n + test_2: + movi a3, 0x80 + addmi a3, a3, 0x3f00 + ball a2, a3, .L5 + j.l foo, a9 + .L5: + ret.n + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*masktrue_const_bitcmpl): + Add a new insn_and_split pattern, and a few split patterns for + special cases. +--- + gcc/config/xtensa/xtensa.md | 84 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 84 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index ca8b3913d..ed1e072fe 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1719,6 +1719,90 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_insn_and_split "*masktrue_const_bitcmpl" ++ [(set (pc) ++ (if_then_else (match_operator 3 "boolean_operator" ++ [(and:SI (not:SI (match_operand:SI 0 "register_operand" "r")) ++ (match_operand:SI 1 "const_int_operand" "i")) ++ (const_int 0)]) ++ (label_ref (match_operand 2 "" "")) ++ (pc)))] ++ "exact_log2 (INTVAL (operands[1])) < 0" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 4) ++ (match_dup 1)) ++ (set (pc) ++ (if_then_else (match_op_dup 3 ++ [(and:SI (not:SI (match_dup 0)) ++ (match_dup 4)) ++ (const_int 0)]) ++ (label_ref (match_dup 2)) ++ (pc)))] ++{ ++ operands[4] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && IN_RANGE (INTVAL (operands[1]), -32, 95)") ++ (const_int 5) ++ (if_then_else (match_test "xtensa_simm12b (INTVAL (operands[1]))") ++ (const_int 6) ++ (const_int 10))))]) ++ ++(define_split ++ [(set (pc) ++ (if_then_else (match_operator 2 "boolean_operator" ++ [(subreg:HQI (not:SI (match_operand:SI 0 "register_operand")) 0) ++ (const_int 0)]) ++ (label_ref (match_operand 1 "")) ++ (pc)))] ++ "!BYTES_BIG_ENDIAN" ++ [(set (pc) ++ (if_then_else (match_op_dup 2 ++ [(and:SI (not:SI (match_dup 0)) ++ (match_dup 3)) ++ (const_int 0)]) ++ (label_ref (match_dup 1)) ++ (pc)))] ++{ ++ operands[3] = GEN_INT ((1 << GET_MODE_BITSIZE (mode)) - 1); ++}) ++ ++(define_split ++ [(set (pc) ++ (if_then_else (match_operator 2 "boolean_operator" ++ [(subreg:HI (not:SI (match_operand:SI 0 "register_operand")) 2) ++ (const_int 0)]) ++ (label_ref (match_operand 1 "")) ++ (pc)))] ++ "BYTES_BIG_ENDIAN" ++ [(set (pc) ++ (if_then_else (match_op_dup 2 ++ [(and:SI (not:SI (match_dup 0)) ++ (const_int 65535)) ++ (const_int 0)]) ++ (label_ref (match_dup 1)) ++ (pc)))]) ++ ++(define_split ++ [(set (pc) ++ (if_then_else (match_operator 2 "boolean_operator" ++ [(subreg:QI (not:SI (match_operand:SI 0 "register_operand")) 3) ++ (const_int 0)]) ++ (label_ref (match_operand 1 "")) ++ (pc)))] ++ "BYTES_BIG_ENDIAN" ++ [(set (pc) ++ (if_then_else (match_op_dup 2 ++ [(and:SI (not:SI (match_dup 0)) ++ (const_int 255)) ++ (const_int 0)]) ++ (label_ref (match_dup 1)) ++ (pc)))]) ++ + (define_insn_and_split "*masktrue_const_pow2_minus_one" + [(set (pc) + (if_then_else (match_operator 4 "boolean_operator" +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0043-Add-RTX-costs-for-if_then_else.patch b/patches/gcc10.3/gcc-xtensa-0043-Add-RTX-costs-for-if_then_else.patch new file mode 100644 index 0000000..2f20939 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0043-Add-RTX-costs-for-if_then_else.patch @@ -0,0 +1,30 @@ +From 6c4824b9ee3272c7621639f873b6a4b38b5e117e Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 28 Jul 2022 11:59:00 +0900 +Subject: [PATCH] xtensa: Add RTX costs for if_then_else + +It takes one machine instruction for both conditional branch and move. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_rtx_costs): + Add new case for IF_THEN_ELSE. +--- + gcc/config/xtensa/xtensa.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index bd3489bfe..b6f41a478 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4309,6 +4309,7 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, + + case ZERO_EXTRACT: + case ZERO_EXTEND: ++ case IF_THEN_ELSE: + *total = COSTS_N_INSNS (1); + return true; + +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0044-Fix-conflicting-hard-regno-between-indirect-s.patch b/patches/gcc10.3/gcc-xtensa-0044-Fix-conflicting-hard-regno-between-indirect-s.patch new file mode 100644 index 0000000..1840f9f --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0044-Fix-conflicting-hard-regno-between-indirect-s.patch @@ -0,0 +1,60 @@ +From 25b8acf68c6d262f75a84bbc8238e5c326c1b1bf Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sat, 30 Jul 2022 03:25:04 +0900 +Subject: [PATCH] xtensa: Fix conflicting hard regno between indirect + sibcall fixups and EH_RETURN_STACKADJ_RTX + +The hard register A10 was already allocated for EH_RETURN_STACKADJ_RTX. +(although exception handling and sibling call may not apply at the same time, + but for safety) + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: Change hard register number used in + the split patterns for indirect sibling call fixups from 10 to 11, + the last free one for the CALL0 ABI. +--- + gcc/config/xtensa/xtensa.md | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index ed1e072fe..9eeb73915 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -25,7 +25,7 @@ + (A7_REG 7) + (A8_REG 8) + (A9_REG 9) +- (A10_REG 10) ++ (A11_REG 11) + + (UNSPEC_NOP 2) + (UNSPEC_PLT 3) +@@ -2300,9 +2300,9 @@ + "reload_completed + && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) + && ! call_used_or_fixed_reg_p (REGNO (operands[0]))" +- [(set (reg:SI A10_REG) ++ [(set (reg:SI A11_REG) + (match_dup 0)) +- (call (mem:SI (reg:SI A10_REG)) ++ (call (mem:SI (reg:SI A11_REG)) + (match_dup 1))]) + + (define_expand "sibcall_value" +@@ -2333,10 +2333,10 @@ + "reload_completed + && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) + && ! call_used_or_fixed_reg_p (REGNO (operands[1]))" +- [(set (reg:SI A10_REG) ++ [(set (reg:SI A11_REG) + (match_dup 1)) + (set (match_dup 0) +- (call (mem:SI (reg:SI A10_REG)) ++ (call (mem:SI (reg:SI A11_REG)) + (match_dup 2)))]) + + (define_insn "entry" +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0045-Turn-on-fsplit-wide-types-early-by-default.patch b/patches/gcc10.3/gcc-xtensa-0045-Turn-on-fsplit-wide-types-early-by-default.patch new file mode 100644 index 0000000..e381a8d --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0045-Turn-on-fsplit-wide-types-early-by-default.patch @@ -0,0 +1,38 @@ +From 624bf9fd927ada2d6d6dc34f5e0de704e7ee268f Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 4 Aug 2022 19:56:27 +0900 +Subject: [PATCH] xtensa: Turn on -fsplit-wide-types-early by default + +Since GCC10, the "subreg2" optimization pass was no longer tied to enabling +"subreg1" unless -fsplit-wide-types-early was turned on (PR88233). However +on the Xtensa port, the lack of "subreg2" can degrade the quality of the +output code, especially for those that produce many D[FC]mode pseudos. + +This patch turns on -fsplit-wide-types-early by default in order to restore +the previous behavior. + +gcc/ChangeLog: + + * common/config/xtensa/xtensa-common.c + (xtensa_option_optimization_table): Add OPT_fsplit_wide_types_early + for OPT_LEVELS_ALL in order to restore pre-GCC10 behavior. +--- + gcc/common/config/xtensa/xtensa-common.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/gcc/common/config/xtensa/xtensa-common.c b/gcc/common/config/xtensa/xtensa-common.c +index dd751a14d..697a9eb22 100644 +--- a/gcc/common/config/xtensa/xtensa-common.c ++++ b/gcc/common/config/xtensa/xtensa-common.c +@@ -34,6 +34,8 @@ static const struct default_options xtensa_option_optimization_table[] = + assembler, so GCC cannot do a good job of reordering blocks. + Do not enable reordering unless it is explicitly requested. */ + { OPT_LEVELS_ALL, OPT_freorder_blocks, NULL, 0 }, ++ /* Split multi-word types early (pre-GCC10 behavior). */ ++ { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 }, + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0046-Optimize-stack-pointer-updates-in-function-pr.patch b/patches/gcc10.3/gcc-xtensa-0046-Optimize-stack-pointer-updates-in-function-pr.patch new file mode 100644 index 0000000..d94e38e --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0046-Optimize-stack-pointer-updates-in-function-pr.patch @@ -0,0 +1,171 @@ +From 745e9839f8d18724f31015a1dcbde2c2c513d3c5 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 17 Aug 2022 14:54:16 +0900 +Subject: [PATCH] xtensa: Optimize stack pointer updates in function + pro/epilogue under certain conditions + +This patch enforces the use of "addmi" machine instruction instead of +addition/subtraction with two source registers for adjusting the stack +pointer, if the adjustment fits into a signed 16-bit and is also a multiple +of 256. + + /* example */ + void test(void) { + char buffer[4096]; + __asm__(""::"m"(buffer)); + } + + ;; before + test: + movi.n a9, 1 + slli a9, a9, 12 + sub sp, sp, a9 + movi.n a9, 1 + slli a9, a9, 12 + add.n sp, sp, a9 + addi sp, sp, 0 + ret.n + + ;; after + test: + addmi sp, sp, -0x1000 + addmi sp, sp, 0x1000 + ret.n + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_expand_prologue): + Use an "addmi" machine instruction for updating the stack pointer + rather than addition/subtraction via hard register A9, if the amount + of change satisfies the literal value conditions of that instruction + when the CALL0 ABI is used. + (xtensa_expand_epilogue): Ditto. + And also inhibit the stack pointer addition of constant zero. +--- + gcc/config/xtensa/xtensa.c | 79 ++++++++++++++++++++++++++------------ + 1 file changed, 54 insertions(+), 25 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index b6f41a478..a93b15f4d 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -3186,7 +3186,6 @@ xtensa_expand_prologue (void) + rtx_insn *insn = NULL; + rtx note_rtx; + +- + total_size = compute_frame_size (get_frame_size ()); + + if (flag_stack_usage_info) +@@ -3242,10 +3241,17 @@ xtensa_expand_prologue (void) + } + else + { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, GEN_INT (total_size)); +- insn = emit_insn (gen_subsi3 (stack_pointer_rtx, +- stack_pointer_rtx, tmp_reg)); ++ if (xtensa_simm8x256 (-total_size)) ++ insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (-total_size))); ++ else ++ { ++ rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); ++ emit_move_insn (tmp_reg, GEN_INT (total_size)); ++ insn = emit_insn (gen_subsi3 (stack_pointer_rtx, ++ stack_pointer_rtx, tmp_reg)); ++ } + RTX_FRAME_RELATED_P (insn) = 1; + note_rtx = gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, +@@ -3273,11 +3279,19 @@ xtensa_expand_prologue (void) + if (total_size > 1024 + || (!callee_save_size && total_size > 128)) + { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, GEN_INT (total_size - +- callee_save_size)); +- insn = emit_insn (gen_subsi3 (stack_pointer_rtx, +- stack_pointer_rtx, tmp_reg)); ++ if (xtensa_simm8x256 (callee_save_size - total_size)) ++ insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (callee_save_size - ++ total_size))); ++ else ++ { ++ rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); ++ emit_move_insn (tmp_reg, GEN_INT (total_size - ++ callee_save_size)); ++ insn = emit_insn (gen_subsi3 (stack_pointer_rtx, ++ stack_pointer_rtx, tmp_reg)); ++ } + RTX_FRAME_RELATED_P (insn) = 1; + note_rtx = gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, +@@ -3351,12 +3365,21 @@ xtensa_expand_epilogue (bool sibcall_p) + + if (cfun->machine->current_frame_size > (frame_pointer_needed ? 127 : 1024)) + { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, GEN_INT (cfun->machine->current_frame_size - +- cfun->machine->callee_save_size)); +- emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ? +- hard_frame_pointer_rtx : stack_pointer_rtx, +- tmp_reg)); ++ if (xtensa_simm8x256 (cfun->machine->current_frame_size - ++ cfun->machine->callee_save_size)) ++ emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ? ++ hard_frame_pointer_rtx : stack_pointer_rtx, ++ GEN_INT (cfun->machine->current_frame_size - ++ cfun->machine->callee_save_size))); ++ else ++ { ++ rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); ++ emit_move_insn (tmp_reg, GEN_INT (cfun->machine->current_frame_size - ++ cfun->machine->callee_save_size)); ++ emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ? ++ hard_frame_pointer_rtx : stack_pointer_rtx, ++ tmp_reg)); ++ } + offset = cfun->machine->callee_save_size - UNITS_PER_WORD; + } + else +@@ -3396,18 +3419,24 @@ xtensa_expand_epilogue (bool sibcall_p) + offset = cfun->machine->current_frame_size; + else + offset = cfun->machine->callee_save_size; +- +- emit_insn (gen_addsi3 (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (offset))); ++ if (offset) ++ emit_insn (gen_addsi3 (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (offset))); + } + else + { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, +- GEN_INT (cfun->machine->current_frame_size)); +- emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, +- tmp_reg)); ++ if (xtensa_simm8x256 (cfun->machine->current_frame_size)) ++ emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (cfun->machine->current_frame_size))); ++ else ++ { ++ rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); ++ emit_move_insn (tmp_reg, ++ GEN_INT (cfun->machine->current_frame_size)); ++ emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, ++ tmp_reg)); ++ } + } + } + +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0047-Improve-indirect-sibling-call-handling.patch b/patches/gcc10.3/gcc-xtensa-0047-Improve-indirect-sibling-call-handling.patch new file mode 100644 index 0000000..a6e870f --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0047-Improve-indirect-sibling-call-handling.patch @@ -0,0 +1,166 @@ +From d8f7137070d92c297e1deecd6dabdb471ddaa9ab Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 18 Aug 2022 01:11:32 +0900 +Subject: [PATCH] xtensa: Improve indirect sibling call handling + +No longer needs the dedicated hard register (A11) for the address of the +call and the split patterns for fixups, due to the introduction of appropriate +register class and constraint. + +(Note: "ISC_REGS" contains a hard register A8 used as a "static chain" + pointer for nested functions, but no problem; Pointer to nested function + actually points to "trampoline", and trampoline itself doesn't receive + "static chain" pointer to its parent's stack frame from the caller.) + +gcc/ChangeLog: + + * config/xtensa/xtensa.h + (enum reg_class, REG_CLASS_NAMES, REG_CLASS_CONTENTS): + Add new register class "ISC_REGS". + * config/xtensa/constraints.md (c): Add new register constraint. + * config/xtensa/xtensa.md (define_constants): Remove "A11_REG". + (sibcall_internal, sibcall_value_internal): + Change to use the new register constraint, and remove two split + patterns for fixups that are no longer needed. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/sibcalls.c: Add a new test function to ensure + that registers for arguments (occupy from A2 to A7) and for indirect + sibcall (should be assigned to A8) neither conflict nor spill out. +--- + gcc/config/xtensa/constraints.md | 5 ++++ + gcc/config/xtensa/xtensa.h | 3 +++ + gcc/config/xtensa/xtensa.md | 29 ++-------------------- + gcc/testsuite/gcc.target/xtensa/sibcalls.c | 5 ++++ + 4 files changed, 15 insertions(+), 27 deletions(-) + +diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md +index 13b3daafc..f590dcf3a 100644 +--- a/gcc/config/xtensa/constraints.md ++++ b/gcc/config/xtensa/constraints.md +@@ -27,6 +27,11 @@ + "Boolean registers @code{b0}-@code{b15}; only available if the Xtensa + Boolean Option is configured.") + ++(define_register_constraint "c" "TARGET_WINDOWED_ABI ? NO_REGS : ISC_REGS" ++ "@internal ++ General-purpose AR registers for indirect sibling calls, @code{a2}- ++ @code{a8}.") ++ + (define_register_constraint "d" "TARGET_DENSITY ? AR_REGS: NO_REGS" + "@internal + All AR registers, including sp, but only if the Xtensa Code Density +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index 3e9cbc943..ee2238606 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -372,6 +372,7 @@ enum reg_class + FP_REGS, /* floating point registers */ + ACC_REG, /* MAC16 accumulator */ + SP_REG, /* sp register (aka a1) */ ++ ISC_REGS, /* registers for indirect sibling calls */ + RL_REGS, /* preferred reload regs (not sp or fp) */ + GR_REGS, /* integer registers except sp */ + AR_REGS, /* all integer registers */ +@@ -393,6 +394,7 @@ enum reg_class + "FP_REGS", \ + "ACC_REG", \ + "SP_REG", \ ++ "ISC_REGS", \ + "RL_REGS", \ + "GR_REGS", \ + "AR_REGS", \ +@@ -409,6 +411,7 @@ enum reg_class + { 0xfff80000, 0x00000007 }, /* floating-point registers */ \ + { 0x00000000, 0x00000008 }, /* MAC16 accumulator */ \ + { 0x00000002, 0x00000000 }, /* stack pointer register */ \ ++ { 0x000001fc, 0x00000000 }, /* registers for indirect sibling calls */ \ + { 0x0000fffd, 0x00000000 }, /* preferred reload registers */ \ + { 0x0000fffd, 0x00000000 }, /* general-purpose registers */ \ + { 0x0003ffff, 0x00000000 }, /* integer registers */ \ +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 9eeb73915..0c05c16b1 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -25,7 +25,6 @@ + (A7_REG 7) + (A8_REG 8) + (A9_REG 9) +- (A11_REG 11) + + (UNSPEC_NOP 2) + (UNSPEC_PLT 3) +@@ -2284,7 +2283,7 @@ + }) + + (define_insn "sibcall_internal" +- [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nir")) ++ [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nic")) + (match_operand 1 "" "i"))] + "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" + { +@@ -2294,17 +2293,6 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + +-(define_split +- [(call (mem:SI (match_operand:SI 0 "register_operand")) +- (match_operand 1 ""))] +- "reload_completed +- && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) +- && ! call_used_or_fixed_reg_p (REGNO (operands[0]))" +- [(set (reg:SI A11_REG) +- (match_dup 0)) +- (call (mem:SI (reg:SI A11_REG)) +- (match_dup 1))]) +- + (define_expand "sibcall_value" + [(set (match_operand 0 "register_operand" "") + (call (match_operand 1 "memory_operand" "") +@@ -2316,7 +2304,7 @@ + + (define_insn "sibcall_value_internal" + [(set (match_operand 0 "register_operand" "=a") +- (call (mem:SI (match_operand:SI 1 "call_insn_operand" "nir")) ++ (call (mem:SI (match_operand:SI 1 "call_insn_operand" "nic")) + (match_operand 2 "" "i")))] + "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)" + { +@@ -2326,19 +2314,6 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + +-(define_split +- [(set (match_operand 0 "register_operand") +- (call (mem:SI (match_operand:SI 1 "register_operand")) +- (match_operand 2 "")))] +- "reload_completed +- && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) +- && ! call_used_or_fixed_reg_p (REGNO (operands[1]))" +- [(set (reg:SI A11_REG) +- (match_dup 1)) +- (set (match_dup 0) +- (call (mem:SI (reg:SI A11_REG)) +- (match_dup 2)))]) +- + (define_insn "entry" + [(set (reg:SI A1_REG) + (unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")] +diff --git a/gcc/testsuite/gcc.target/xtensa/sibcalls.c b/gcc/testsuite/gcc.target/xtensa/sibcalls.c +index d2b3fccf1..dff6750e2 100644 +--- a/gcc/testsuite/gcc.target/xtensa/sibcalls.c ++++ b/gcc/testsuite/gcc.target/xtensa/sibcalls.c +@@ -17,4 +17,9 @@ int test_2(int (*a)(void)) { + return a(); + } + ++_Complex double test_3(_Complex double a, _Complex double (*b)(_Complex double, double)) { ++ bar(-1); ++ return b(a, 3.141592653589795); ++} ++ + /* { dg-final { scan-assembler-not "ret" } } */ +-- +2.20.1 + diff --git a/patches/gcc10.3/gcc-xtensa-0048-add-static-PIE-support.patch b/patches/gcc10.3/gcc-xtensa-0048-add-static-PIE-support.patch new file mode 100644 index 0000000..f16832b --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0048-add-static-PIE-support.patch @@ -0,0 +1,31 @@ +From 5773838c193d36476109de77d230391f6738bb62 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Fri, 12 Aug 2022 21:02:15 -0700 +Subject: [PATCH] xtensa: gcc: add static PIE support + +gcc/ + * config/xtensa/linux.h (LINK_SPEC): Add static-pie. +--- + gcc/config/xtensa/linux.h | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/linux.h b/gcc/config/xtensa/linux.h +index 62a33a9bc..49796c97f 100644 +--- a/gcc/config/xtensa/linux.h ++++ b/gcc/config/xtensa/linux.h +@@ -52,9 +52,10 @@ along with GCC; see the file COPYING3. If not see + #define LINK_SPEC \ + "%{shared:-shared} \ + %{!shared: \ +- %{!static: \ ++ %{!static:%{!static-pie: \ + %{rdynamic:-export-dynamic} \ +- -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \ ++ -dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} \ ++ %{static-pie:-static -pie --no-dynamic-linker -z text} \ + %{static:-static}}" + + #undef LOCAL_LABEL_PREFIX +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0049-Eliminate-unused-stack-frame-allocation-freei.patch b/patches/gcc10.3/gcc-xtensa-0049-Eliminate-unused-stack-frame-allocation-freei.patch new file mode 100644 index 0000000..0c11972 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0049-Eliminate-unused-stack-frame-allocation-freei.patch @@ -0,0 +1,300 @@ +From 55fbffc224d951aca1eab3cbfb74c540e7ef2f3f Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sun, 28 Aug 2022 22:42:25 +0900 +Subject: [PATCH] xtensa: Eliminate unused stack frame allocation/freeing + +In the example below, 'x' is once placed on the stack frame and then read +into registers as the argument value of bar(): + + /* example */ + struct foo { + int a, b; + }; + extern struct foo bar(struct foo); + struct foo test(void) { + struct foo x = { 0, 1 }; + return bar(x); + } + +Thanks to the dead store elimination, the initialization of 'x' turns into +merely loading the immediates to registers, but corresponding stack frame +growth is not rolled back. As a result: + + ;; prereq: the CALL0 ABI + ;; before + test: + addi sp, sp, -16 // unused stack frame allocation/freeing + movi.n a2, 0 + movi.n a3, 1 + addi sp, sp, 16 // because no instructions that refer to + j.l bar, a9 // the stack pointer between the two + +This patch eliminates such unused stack frame allocation/freeing: + + ;; after + test: + movi.n a2, 0 + movi.n a3, 1 + j.l bar, a9 + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (machine_function): New boolean member as + a flag that controls whether to emit the insns for stack pointer + adjustment inside of the pro/epilogue. + (xtensa_emit_adjust_stack_ptr): New function to share the common + codes and to emit insns if not inhibited. + (xtensa_expand_epilogue): Change to use the function mentioned + above when using the CALL0 ABI. + (xtensa_expand_prologue): Ditto. + And also change to set the inhibit flag used by + xtensa_emit_adjust_stack_ptr() to true if the stack pointer is only + used for its own adjustment. +--- + gcc/config/xtensa/xtensa.c | 164 ++++++++++++++++++------------------- + 1 file changed, 80 insertions(+), 84 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index a93b15f4d..97291fc0f 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -102,6 +102,7 @@ struct GTY(()) machine_function + int callee_save_size; + bool frame_laid_out; + bool epilogue_done; ++ bool inhibit_logues_a1_adjusts; + }; + + /* Vector, indexed by hard register number, which contains 1 for a +@@ -3084,7 +3085,7 @@ xtensa_output_literal (FILE *file, rtx x, machine_mode mode, int labelno) + } + + static bool +-xtensa_call_save_reg(int regno) ++xtensa_call_save_reg (int regno) + { + if (TARGET_WINDOWED_ABI) + return false; +@@ -3120,7 +3121,7 @@ compute_frame_size (poly_int64 size) + cfun->machine->callee_save_size = 0; + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) + { +- if (xtensa_call_save_reg(regno)) ++ if (xtensa_call_save_reg (regno)) + cfun->machine->callee_save_size += UNITS_PER_WORD; + } + +@@ -3175,6 +3176,49 @@ xtensa_initial_elimination_offset (int from, int to ATTRIBUTE_UNUSED) + return offset; + } + ++#define ADJUST_SP_NONE 0x0 ++#define ADJUST_SP_NEED_NOTE 0x1 ++#define ADJUST_SP_FRAME_PTR 0x2 ++static void ++xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int flags) ++{ ++ rtx_insn *insn; ++ rtx ptr = (flags & ADJUST_SP_FRAME_PTR) ? hard_frame_pointer_rtx ++ : stack_pointer_rtx; ++ ++ if (cfun->machine->inhibit_logues_a1_adjusts) ++ return; ++ ++ if (xtensa_simm8 (offset) ++ || xtensa_simm8x256 (offset)) ++ insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, GEN_INT (offset))); ++ else ++ { ++ rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); ++ ++ if (offset < 0) ++ { ++ emit_move_insn (tmp_reg, GEN_INT (-offset)); ++ insn = emit_insn (gen_subsi3 (stack_pointer_rtx, ptr, tmp_reg)); ++ } ++ else ++ { ++ emit_move_insn (tmp_reg, GEN_INT (offset)); ++ insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, tmp_reg)); ++ } ++ } ++ ++ if (flags & ADJUST_SP_NEED_NOTE) ++ { ++ rtx note_rtx = gen_rtx_SET (stack_pointer_rtx, ++ plus_constant (Pmode, stack_pointer_rtx, ++ offset)); ++ ++ RTX_FRAME_RELATED_P (insn) = 1; ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); ++ } ++} ++ + /* minimum frame = reg save area (4 words) plus static chain (1 word) + and the total number of words must be a multiple of 128 bits. */ + #define MIN_FRAME_SIZE (8 * UNITS_PER_WORD) +@@ -3210,17 +3254,30 @@ xtensa_expand_prologue (void) + int regno; + HOST_WIDE_INT offset = 0; + int callee_save_size = cfun->machine->callee_save_size; ++ df_ref ref; ++ bool stack_pointer_needed = frame_pointer_needed ++ || crtl->calls_eh_return; ++ ++ /* Check if the function body really needs the stack pointer. */ ++ if (!stack_pointer_needed) ++ for (ref = DF_REG_USE_CHAIN (A1_REG); ++ ref; ref = DF_REF_NEXT_REG (ref)) ++ if (DF_REF_CLASS (ref) == DF_REF_REGULAR ++ && NONJUMP_INSN_P (DF_REF_INSN (ref))) ++ stack_pointer_needed = true; ++ /* Check if callee-saved registers really need saving to the stack. */ ++ if (!stack_pointer_needed) ++ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) ++ if (xtensa_call_save_reg (regno)) ++ stack_pointer_needed = true; ++ ++ cfun->machine->inhibit_logues_a1_adjusts = !stack_pointer_needed; + + /* -128 is a limit of single addi instruction. */ + if (IN_RANGE (total_size, 1, 128)) + { +- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (-total_size))); +- RTX_FRAME_RELATED_P (insn) = 1; +- note_rtx = gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- -total_size)); +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); ++ xtensa_emit_adjust_stack_ptr (-total_size, ++ ADJUST_SP_NEED_NOTE); + offset = total_size - UNITS_PER_WORD; + } + else if (callee_save_size) +@@ -3230,33 +3287,14 @@ xtensa_expand_prologue (void) + * move it to its final location. */ + if (total_size > 1024) + { +- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (-callee_save_size))); +- RTX_FRAME_RELATED_P (insn) = 1; +- note_rtx = gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- -callee_save_size)); +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); ++ xtensa_emit_adjust_stack_ptr (-callee_save_size, ++ ADJUST_SP_NEED_NOTE); + offset = callee_save_size - UNITS_PER_WORD; + } + else + { +- if (xtensa_simm8x256 (-total_size)) +- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (-total_size))); +- else +- { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, GEN_INT (total_size)); +- insn = emit_insn (gen_subsi3 (stack_pointer_rtx, +- stack_pointer_rtx, tmp_reg)); +- } +- RTX_FRAME_RELATED_P (insn) = 1; +- note_rtx = gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- -total_size)); +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); ++ xtensa_emit_adjust_stack_ptr (-total_size, ++ ADJUST_SP_NEED_NOTE); + offset = total_size - UNITS_PER_WORD; + } + } +@@ -3278,27 +3316,8 @@ xtensa_expand_prologue (void) + } + if (total_size > 1024 + || (!callee_save_size && total_size > 128)) +- { +- if (xtensa_simm8x256 (callee_save_size - total_size)) +- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (callee_save_size - +- total_size))); +- else +- { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, GEN_INT (total_size - +- callee_save_size)); +- insn = emit_insn (gen_subsi3 (stack_pointer_rtx, +- stack_pointer_rtx, tmp_reg)); +- } +- RTX_FRAME_RELATED_P (insn) = 1; +- note_rtx = gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- callee_save_size - +- total_size)); +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); +- } ++ xtensa_emit_adjust_stack_ptr (callee_save_size - total_size, ++ ADJUST_SP_NEED_NOTE); + } + + if (frame_pointer_needed) +@@ -3365,21 +3384,11 @@ xtensa_expand_epilogue (bool sibcall_p) + + if (cfun->machine->current_frame_size > (frame_pointer_needed ? 127 : 1024)) + { +- if (xtensa_simm8x256 (cfun->machine->current_frame_size - +- cfun->machine->callee_save_size)) +- emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ? +- hard_frame_pointer_rtx : stack_pointer_rtx, +- GEN_INT (cfun->machine->current_frame_size - +- cfun->machine->callee_save_size))); +- else +- { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, GEN_INT (cfun->machine->current_frame_size - +- cfun->machine->callee_save_size)); +- emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ? +- hard_frame_pointer_rtx : stack_pointer_rtx, +- tmp_reg)); +- } ++ xtensa_emit_adjust_stack_ptr (cfun->machine->current_frame_size - ++ cfun->machine->callee_save_size, ++ frame_pointer_needed ++ ? ADJUST_SP_FRAME_PTR ++ : ADJUST_SP_NONE); + offset = cfun->machine->callee_save_size - UNITS_PER_WORD; + } + else +@@ -3420,24 +3429,11 @@ xtensa_expand_epilogue (bool sibcall_p) + else + offset = cfun->machine->callee_save_size; + if (offset) +- emit_insn (gen_addsi3 (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (offset))); ++ xtensa_emit_adjust_stack_ptr (offset, ADJUST_SP_NONE); + } + else +- { +- if (xtensa_simm8x256 (cfun->machine->current_frame_size)) +- emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (cfun->machine->current_frame_size))); +- else +- { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); +- emit_move_insn (tmp_reg, +- GEN_INT (cfun->machine->current_frame_size)); +- emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, +- tmp_reg)); +- } +- } ++ xtensa_emit_adjust_stack_ptr (cfun->machine->current_frame_size, ++ ADJUST_SP_NONE); + } + + if (crtl->calls_eh_return) +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0050-Make-complex-hard-register-clobber-eliminatio.patch b/patches/gcc10.3/gcc-xtensa-0050-Make-complex-hard-register-clobber-eliminatio.patch new file mode 100644 index 0000000..c39608c --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0050-Make-complex-hard-register-clobber-eliminatio.patch @@ -0,0 +1,111 @@ +From 78eac52fe49e1463bec7a838dd172b970412927b Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 30 Aug 2022 21:28:51 +0900 +Subject: [PATCH] xtensa: Make complex hard register clobber elimination + more robust and accurate + +This patch eliminates all clobbers for complex hard registers that will +be overwritten entirely afterwards (supersedence of +3867d414bd7d9e5b6fb2a51b1fb3d9e9e1eae9). + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: Rewrite the split pattern that performs + the abovementioned process so that insns that overwrite clobbered + register no longer need to be contiguous. + (DSC): Remove as no longer needed. +--- + gcc/config/xtensa/xtensa.md | 67 +++++++++++++++++++++++++------------ + 1 file changed, 45 insertions(+), 22 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 0c05c16b1..ec4a69e30 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -86,10 +86,6 @@ + ;; This code iterator is for *shlrd and its variants. + (define_code_iterator ior_op [ior plus]) + +-;; This mode iterator allows the DC and SC patterns to be defined from +-;; the same template. +-(define_mode_iterator DSC [DC SC]) +- + + ;; Attributes. + +@@ -2848,27 +2844,54 @@ + }) + + (define_split +- [(clobber (match_operand:DSC 0 "register_operand"))] +- "GP_REG_P (REGNO (operands[0]))" ++ [(clobber (match_operand 0 "register_operand"))] ++ "HARD_REGISTER_P (operands[0]) ++ && COMPLEX_MODE_P (GET_MODE (operands[0]))" + [(const_int 0)] + { +- unsigned int regno = REGNO (operands[0]); +- machine_mode inner_mode = GET_MODE_INNER (mode); ++ auto_sbitmap bmp (FIRST_PSEUDO_REGISTER); + rtx_insn *insn; +- rtx x; +- if (! ((insn = next_nonnote_nondebug_insn (curr_insn)) +- && NONJUMP_INSN_P (insn) +- && GET_CODE (x = PATTERN (insn)) == SET +- && REG_P (x = XEXP (x, 0)) +- && GET_MODE (x) == inner_mode +- && REGNO (x) == regno +- && (insn = next_nonnote_nondebug_insn (insn)) +- && NONJUMP_INSN_P (insn) +- && GET_CODE (x = PATTERN (insn)) == SET +- && REG_P (x = XEXP (x, 0)) +- && GET_MODE (x) == inner_mode +- && REGNO (x) == regno + REG_NREGS (operands[0]) / 2)) +- FAIL; ++ rtx reg = gen_rtx_REG (SImode, 0); ++ bitmap_set_range (bmp, REGNO (operands[0]), REG_NREGS (operands[0])); ++ for (insn = next_nonnote_nondebug_insn_bb (curr_insn); ++ insn; insn = next_nonnote_nondebug_insn_bb (insn)) ++ { ++ sbitmap_iterator iter; ++ unsigned int regno; ++ if (NONJUMP_INSN_P (insn)) ++ { ++ EXECUTE_IF_SET_IN_BITMAP (bmp, 2, regno, iter) ++ { ++ set_regno_raw (reg, regno, REG_NREGS (reg)); ++ if (reg_overlap_mentioned_p (reg, PATTERN (insn))) ++ break; ++ } ++ if (GET_CODE (PATTERN (insn)) == SET) ++ { ++ rtx x = SET_DEST (PATTERN (insn)); ++ if (REG_P (x) && HARD_REGISTER_P (x)) ++ bitmap_clear_range (bmp, REGNO (x), REG_NREGS (x)); ++ else if (SUBREG_P (x) && HARD_REGISTER_P (SUBREG_REG (x))) ++ { ++ struct subreg_info info; ++ subreg_get_info (regno = REGNO (SUBREG_REG (x)), ++ GET_MODE (SUBREG_REG (x)), ++ SUBREG_BYTE (x), GET_MODE (x), &info); ++ if (!info.representable_p) ++ break; ++ bitmap_clear_range (bmp, regno + info.offset, info.nregs); ++ } ++ } ++ if (bitmap_empty_p (bmp)) ++ goto FALLTHRU; ++ } ++ else if (CALL_P (insn)) ++ EXECUTE_IF_SET_IN_BITMAP (bmp, 2, regno, iter) ++ if (call_used_or_fixed_reg_p (regno)) ++ break; ++ } ++ FAIL; ++FALLTHRU:; + }) + + (define_peephole2 +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0051-constantsynth-Add-new-3-insns-synthesis-patte.patch b/patches/gcc10.3/gcc-xtensa-0051-constantsynth-Add-new-3-insns-synthesis-patte.patch new file mode 100644 index 0000000..0f6d156 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0051-constantsynth-Add-new-3-insns-synthesis-patte.patch @@ -0,0 +1,91 @@ +From dc825d410b7a3025d3b902f83bb3e360ac42f477 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sat, 3 Sep 2022 12:27:51 +0900 +Subject: [PATCH] xtensa: constantsynth: Add new 3-insns synthesis pattern + +This patch adds a new 3-instructions constant synthesis pattern: + +- A value that can fit into a signed 12-bit after a number of either bitwise + left or right rotations: + => "MOVI(.N) Ax, simm12" + "SSAI (1 ... 11) or (21 ... 31)" + + "SRC Ax, Ax, Ax" + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_constantsynth): + Add new pattern for the abovementioned case. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/constsynth_3insns.c (test_4): + Add new test function. +--- + gcc/config/xtensa/xtensa.c | 31 +++++++++++++++++++ + .../gcc.target/xtensa/constsynth_3insns.c | 11 +++++++ + 2 files changed, 42 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 97291fc0f..baee55ce3 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -1133,6 +1133,37 @@ xtensa_constantsynth (rtx dst, HOST_WIDE_INT srcval) + xtensa_constantsynth_rtx_ADDSUBX, + divisor)) + return 1; ++ ++ /* loading simm12 followed by left/right bitwise rotation: ++ MOVI + SSAI + SRC. */ ++ if ((srcval & 0x001FF800) == 0 ++ || (srcval & 0x001FF800) == 0x001FF800) ++ { ++ int32_t v; ++ ++ for (shift = 1; shift < 12; ++shift) ++ { ++ v = (int32_t)(((uint32_t)srcval >> shift) ++ | ((uint32_t)srcval << (32 - shift))); ++ if (xtensa_simm12b(v)) ++ { ++ emit_move_insn (dst, GEN_INT (v)); ++ emit_insn (gen_rotlsi3 (dst, dst, GEN_INT (shift))); ++ return 1; ++ } ++ } ++ for (shift = 1; shift < 12; ++shift) ++ { ++ v = (int32_t)(((uint32_t)srcval << shift) ++ | ((uint32_t)srcval >> (32 - shift))); ++ if (xtensa_simm12b(v)) ++ { ++ emit_move_insn (dst, GEN_INT (v)); ++ emit_insn (gen_rotrsi3 (dst, dst, GEN_INT (shift))); ++ return 1; ++ } ++ } ++ } + } + + return 0; +diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c +index f3c4a1c7c..831288c7d 100644 +--- a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c ++++ b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c +@@ -21,4 +21,15 @@ void test_3(int *p) + *p = 192437; + } + ++struct foo ++{ ++ unsigned int b : 10; ++ unsigned int g : 11; ++ unsigned int r : 11; ++}; ++void test_4(struct foo *p, unsigned int v) ++{ ++ p->g = v; ++} ++ + /* { dg-final { scan-assembler-not "l32r" } } */ +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0052-fix-builtin_apply-return-value.patch b/patches/gcc10.3/gcc-xtensa-0052-fix-builtin_apply-return-value.patch new file mode 100644 index 0000000..375f437 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0052-fix-builtin_apply-return-value.patch @@ -0,0 +1,81 @@ +From 4df06b8339667e15107034842185300cca85c51c Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Sat, 10 Sep 2022 17:31:07 -0700 +Subject: [PATCH] xtensa: gcc: fix builtin_apply return value + +xtensa may use up to 4 registers to return a value from a function, but +recognition of only one register in the xtensa_function_value_regno_p +and missing untyped_call pattern result in that only one register is +saved by the __builtin_apply and returned by the __builtin_apply_return. + +gcc/ + * config/xtensa/xtensa.c (xtensa_function_value_regno_p): + Recognize all 4 return registers. + * config/xtensa/xtensa.h (GP_RETURN_REG_COUNT): New definition. + * config/xtensa/xtensa.md (untyped_call): New pattern. +--- + gcc/config/xtensa/xtensa.c | 2 +- + gcc/config/xtensa/xtensa.h | 1 + + gcc/config/xtensa/xtensa.md | 21 +++++++++++++++++++++ + 3 files changed, 23 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index baee55ce3..ad4940913 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4508,7 +4508,7 @@ xtensa_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) + static bool + xtensa_function_value_regno_p (const unsigned int regno) + { +- return (regno == GP_RETURN); ++ return (regno >= GP_RETURN && regno < GP_RETURN + GP_RETURN_REG_COUNT); + } + + /* The static chain is passed in memory. Provide rtx giving 'mem' +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index ee2238606..3a986fa1c 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -477,6 +477,7 @@ enum reg_class + point, and values of coprocessor and user-defined modes. */ + #define GP_RETURN (GP_REG_FIRST + 2 + WINDOW_SIZE) + #define GP_OUTGOING_RETURN (GP_REG_FIRST + 2) ++#define GP_RETURN_REG_COUNT 4 + + /* Symbolic macros for the first/last argument registers. */ + #define GP_ARG_FIRST (GP_REG_FIRST + 2) +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index ec4a69e30..c18640b25 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2310,6 +2310,27 @@ + (set_attr "mode" "none") + (set_attr "length" "3")]) + ++(define_expand "untyped_call" ++ [(parallel [(call (match_operand 0 "") ++ (const_int 0)) ++ (match_operand 1 "") ++ (match_operand 2 "")])] ++ "" ++{ ++ int i; ++ ++ emit_call_insn (gen_call (operands[0], const0_rtx)); ++ ++ for (i = 0; i < XVECLEN (operands[2], 0); i++) ++ { ++ rtx set = XVECEXP (operands[2], 0, i); ++ emit_move_insn (SET_DEST (set), SET_SRC (set)); ++ } ++ ++ emit_insn (gen_blockage ()); ++ DONE; ++}) ++ + (define_insn "entry" + [(set (reg:SI A1_REG) + (unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")] +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0053-implement-MI-thunk-generation-for-call0-API.patch b/patches/gcc10.3/gcc-xtensa-0053-implement-MI-thunk-generation-for-call0-API.patch new file mode 100644 index 0000000..4384596 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0053-implement-MI-thunk-generation-for-call0-API.patch @@ -0,0 +1,164 @@ +From 3778aeadea08b3f630b89d711f634f967e8d24b3 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Thu, 14 Jul 2022 02:39:59 -0700 +Subject: [PATCH] xtensa: gcc: implement MI thunk generation for call0 ABI + +gcc/ + * config/xtensa/xtensa.c (xtensa_can_output_mi_thunk) + (xtensa_output_mi_thunk): New functions. + (TARGET_ASM_CAN_OUTPUT_MI_THUNK) + (TARGET_ASM_OUTPUT_MI_THUNK): New macro definitions. + (xtensa_prepare_expand_call): Use fixed register a8 as temporary + when called with reload_completed set to 1. +--- + gcc/config/xtensa/xtensa.c | 115 ++++++++++++++++++++++++++++++++++++- + 1 file changed, 114 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index ad4940913..0ccc63fdf 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -189,6 +189,14 @@ static HOST_WIDE_INT xtensa_constant_alignment (const_tree, HOST_WIDE_INT); + static HOST_WIDE_INT xtensa_starting_frame_offset (void); + static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); + static bool xtensa_function_ok_for_sibcall (tree, tree); ++static bool xtensa_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED, ++ HOST_WIDE_INT delta ATTRIBUTE_UNUSED, ++ HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED, ++ const_tree function ATTRIBUTE_UNUSED); ++static void xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, ++ HOST_WIDE_INT delta, ++ HOST_WIDE_INT vcall_offset, ++ tree function); + + + +@@ -342,6 +350,12 @@ static bool xtensa_function_ok_for_sibcall (tree, tree); + #undef TARGET_FUNCTION_OK_FOR_SIBCALL + #define TARGET_FUNCTION_OK_FOR_SIBCALL xtensa_function_ok_for_sibcall + ++#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK ++#define TARGET_ASM_CAN_OUTPUT_MI_THUNK xtensa_can_output_mi_thunk ++ ++#undef TARGET_ASM_OUTPUT_MI_THUNK ++#define TARGET_ASM_OUTPUT_MI_THUNK xtensa_output_mi_thunk ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + +@@ -2164,7 +2178,16 @@ xtensa_prepare_expand_call (int callop, rtx *operands) + addr = gen_sym_PLT (addr); + + if (!call_insn_operand (addr, VOIDmode)) +- XEXP (operands[callop], 0) = copy_to_mode_reg (Pmode, addr); ++ { ++ /* This may be called while generating MI thunk when we pretend ++ that reload is over. Use a8 as a temporary register in that case. */ ++ rtx reg = can_create_pseudo_p () ++ ? copy_to_mode_reg (Pmode, addr) ++ : copy_to_suggested_reg (addr, ++ gen_rtx_REG (Pmode, A8_REG), ++ Pmode); ++ XEXP (operands[callop], 0) = reg; ++ } + } + + +@@ -5008,4 +5031,94 @@ xtensa_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, tree exp ATTRIBUTE_U + return true; + } + ++static bool ++xtensa_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED, ++ HOST_WIDE_INT delta ATTRIBUTE_UNUSED, ++ HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED, ++ const_tree function ATTRIBUTE_UNUSED) ++{ ++ if (TARGET_WINDOWED_ABI) ++ return false; ++ ++ return true; ++} ++ ++/* Output code to add DELTA to the first argument, and then jump ++ to FUNCTION. Used for C++ multiple inheritance. */ ++static void ++xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, ++ HOST_WIDE_INT delta, ++ HOST_WIDE_INT vcall_offset, ++ tree function) ++{ ++ rtx this_rtx; ++ rtx funexp; ++ rtx_insn *insn; ++ int this_reg_no; ++ rtx temp0 = gen_rtx_REG (Pmode, A9_REG); ++ const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk)); ++ ++ reload_completed = 1; ++ ++ if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) ++ this_reg_no = 3; ++ else ++ this_reg_no = 2; ++ ++ this_rtx = gen_rtx_REG (Pmode, A0_REG + this_reg_no); ++ ++ if (delta) ++ { ++ if (xtensa_simm8 (delta)) ++ emit_insn (gen_addsi3 (this_rtx, this_rtx, GEN_INT (delta))); ++ else ++ { ++ emit_move_insn (temp0, GEN_INT (delta)); ++ emit_insn (gen_addsi3 (this_rtx, this_rtx, temp0)); ++ } ++ } ++ ++ if (vcall_offset) ++ { ++ rtx temp1 = gen_rtx_REG (Pmode, A0_REG + 10); ++ rtx addr = temp1; ++ ++ emit_move_insn (temp0, gen_rtx_MEM (Pmode, this_rtx)); ++ if (xtensa_uimm8x4 (vcall_offset)) ++ addr = plus_constant (Pmode, temp0, vcall_offset); ++ else if (xtensa_simm8 (vcall_offset)) ++ emit_insn (gen_addsi3 (temp1, temp0, GEN_INT (vcall_offset))); ++ else ++ { ++ emit_move_insn (temp1, GEN_INT (vcall_offset)); ++ emit_insn (gen_addsi3 (temp1, temp0, temp1)); ++ } ++ emit_move_insn (temp1, gen_rtx_MEM (Pmode, addr)); ++ emit_insn (gen_add2_insn (this_rtx, temp1)); ++ } ++ ++ /* Generate a tail call to the target function. */ ++ if (!TREE_USED (function)) ++ { ++ assemble_external (function); ++ TREE_USED (function) = 1; ++ } ++ ++ funexp = XEXP (DECL_RTL (function), 0); ++ funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); ++ insn = emit_call_insn (gen_sibcall (funexp, const0_rtx)); ++ SIBLING_CALL_P (insn) = 1; ++ ++ insn = get_insns (); ++ shorten_branches (insn); ++ assemble_start_function (thunk, fnname); ++ final_start_function (insn, file, 1); ++ final (insn, file, 1); ++ final_end_function (); ++ assemble_end_function (thunk, fnname); ++ ++ /* Stop pretending to be a post-reload pass. */ ++ reload_completed = 0; ++} ++ + #include "gt-xtensa.h" +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0054-enable-section-anchors-support.patch b/patches/gcc10.3/gcc-xtensa-0054-enable-section-anchors-support.patch new file mode 100644 index 0000000..cca2ff4 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0054-enable-section-anchors-support.patch @@ -0,0 +1,29 @@ +From 6e38872f54f49c0b3b3f72668dcdbfa66007ceb6 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Fri, 16 Sep 2022 20:56:39 -0700 +Subject: [PATCH] xtensa: gcc: enable section anchors support + +gcc/ + * config/xtensa/xtensa.c (TARGET_MAX_ANCHOR_OFFSET): New + definition. +--- + gcc/config/xtensa/xtensa.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 0ccc63fdf..ba4dd47c5 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -356,6 +356,9 @@ static void xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + #undef TARGET_ASM_OUTPUT_MI_THUNK + #define TARGET_ASM_OUTPUT_MI_THUNK xtensa_output_mi_thunk + ++#undef TARGET_MAX_ANCHOR_OFFSET ++#define TARGET_MAX_ANCHOR_OFFSET 1020 ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0055-Prepare-the-transition-from-Reload-to-LRA.patch b/patches/gcc10.3/gcc-xtensa-0055-Prepare-the-transition-from-Reload-to-LRA.patch new file mode 100644 index 0000000..11f1d50 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0055-Prepare-the-transition-from-Reload-to-LRA.patch @@ -0,0 +1,301 @@ +From 2fa3f80877ab2b7a06403097c09fbc4bc892d6e3 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 14 Oct 2022 19:43:23 +0900 +Subject: [PATCH] xtensa: Prepare the transition from Reload to LRA + +This patch provides the first step in the transition from Reload to LRA +in Xtensa. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h + (xtensa_split1_finished_p, xtensa_split_DI_reg_imm): New prototypes. + * config/xtensa/xtensa.c + (xtensa_split1_finished_p, xtensa_split_DI_reg_imm, xtensa_lra_p): + New functions. + (TARGET_LRA_P): Replace the dummy hook with xtensa_lra_p. + (xt_true_regnum): Rework. + * config/xtensa/xtensa.h (CALL_REALLY_USED_REGISTERS): + Switch from CALL_USED_REGISTERS, and revise the comment. + * config/xtensa/constraints.md (Y): + Use !xtensa_split1_finished_p() instead of can_create_pseudo_p(). + * config/xtensa/predicates.md (move_operand): Ditto. + * config/xtensa/xtensa.md: Add two new split patterns: + - splits DImode immediate load into two SImode ones + - puts out-of-constraint SImode constants into the constant pool + * config/xtensa/xtensa.opt (-mlra): New target-specific option + for testing purpose. +--- + gcc/config/xtensa/constraints.md | 2 +- + gcc/config/xtensa/predicates.md | 2 +- + gcc/config/xtensa/xtensa-protos.h | 2 + + gcc/config/xtensa/xtensa.c | 69 ++++++++++++++++++++++++++----- + gcc/config/xtensa/xtensa.h | 8 ++-- + gcc/config/xtensa/xtensa.md | 36 ++++++++++++---- + gcc/config/xtensa/xtensa.opt | 4 ++ + 7 files changed, 99 insertions(+), 24 deletions(-) + +diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md +index f590dcf3a..a2cb57000 100644 +--- a/gcc/config/xtensa/constraints.md ++++ b/gcc/config/xtensa/constraints.md +@@ -121,7 +121,7 @@ + (ior (and (match_code "const_int,const_double,const,symbol_ref,label_ref") + (match_test "TARGET_AUTO_LITPOOLS")) + (and (match_code "const_int") +- (match_test "can_create_pseudo_p ()")))) ++ (match_test "! xtensa_split1_finished_p ()")))) + + ;; Memory constraints. Do not use define_memory_constraint here. Doing so + ;; causes reload to force some constants into the constant pool, but since +diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md +index 633cc6264..09d9a5770 100644 +--- a/gcc/config/xtensa/predicates.md ++++ b/gcc/config/xtensa/predicates.md +@@ -149,7 +149,7 @@ + (ior (and (match_code "const_int") + (match_test "(GET_MODE_CLASS (mode) == MODE_INT + && xtensa_simm12b (INTVAL (op))) +- || can_create_pseudo_p ()")) ++ || ! xtensa_split1_finished_p ()")) + (and (match_code "const_int,const_double,const,symbol_ref,label_ref") + (match_test "(TARGET_CONST16 || TARGET_AUTO_LITPOOLS) + && CONSTANT_P (op) +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 75ed3bfb0..63b147a90 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -58,6 +58,8 @@ extern char *xtensa_emit_call (int, rtx *); + extern char *xtensa_emit_sibcall (int, rtx *); + extern bool xtensa_tls_referenced_p (rtx); + extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); ++extern bool xtensa_split1_finished_p (void); ++extern void xtensa_split_DI_reg_imm (rtx *); + + #ifdef TREE_CODE + extern void init_cumulative_args (CUMULATIVE_ARGS *, int); +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index ba4dd47c5..658d19924 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -56,6 +56,7 @@ along with GCC; see the file COPYING3. If not see + #include "hw-doloop.h" + #include "rtl-iter.h" + #include "insn-attr.h" ++#include "tree-pass.h" + + /* This file should be included last. */ + #include "target-def.h" +@@ -197,6 +198,7 @@ static void xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta, + HOST_WIDE_INT vcall_offset, + tree function); ++static bool xtensa_lra_p (void); + + + +@@ -291,7 +293,7 @@ static void xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + #define TARGET_CANNOT_FORCE_CONST_MEM xtensa_cannot_force_const_mem + + #undef TARGET_LRA_P +-#define TARGET_LRA_P hook_bool_void_false ++#define TARGET_LRA_P xtensa_lra_p + + #undef TARGET_LEGITIMATE_ADDRESS_P + #define TARGET_LEGITIMATE_ADDRESS_P xtensa_legitimate_address_p +@@ -482,21 +484,30 @@ xtensa_mask_immediate (HOST_WIDE_INT v) + int + xt_true_regnum (rtx x) + { +- if (GET_CODE (x) == REG) ++ if (REG_P (x)) + { +- if (reg_renumber +- && REGNO (x) >= FIRST_PSEUDO_REGISTER +- && reg_renumber[REGNO (x)] >= 0) ++ if (! HARD_REGISTER_P (x) ++ && reg_renumber ++ && (lra_in_progress || reg_renumber[REGNO (x)] >= 0)) + return reg_renumber[REGNO (x)]; + return REGNO (x); + } +- if (GET_CODE (x) == SUBREG) ++ if (SUBREG_P (x)) + { + int base = xt_true_regnum (SUBREG_REG (x)); +- if (base >= 0 && base < FIRST_PSEUDO_REGISTER) +- return base + subreg_regno_offset (REGNO (SUBREG_REG (x)), +- GET_MODE (SUBREG_REG (x)), +- SUBREG_BYTE (x), GET_MODE (x)); ++ ++ if (base >= 0 ++ && HARD_REGISTER_NUM_P (base)) ++ { ++ struct subreg_info info; ++ ++ subreg_get_info (lra_in_progress ++ ? (unsigned) base : REGNO (SUBREG_REG (x)), ++ GET_MODE (SUBREG_REG (x)), ++ SUBREG_BYTE (x), GET_MODE (x), &info); ++ if (info.representable_p) ++ return base + info.offset; ++ } + } + return -1; + } +@@ -2468,6 +2479,36 @@ xtensa_shlrd_which_direction (rtx op0, rtx op1) + } + + ++/* Return true after "split1" pass has been finished. */ ++ ++bool ++xtensa_split1_finished_p (void) ++{ ++ return cfun && (cfun->curr_properties & PROP_rtl_split_insns); ++} ++ ++ ++/* Split a DImode pair of reg (operand[0]) and const_int (operand[1]) into ++ two SImode pairs, the low-part (operands[0] and [1]) and the high-part ++ (operands[2] and [3]). */ ++ ++void ++xtensa_split_DI_reg_imm (rtx *operands) ++{ ++ rtx lowpart, highpart; ++ ++ if (WORDS_BIG_ENDIAN) ++ split_double (operands[1], &highpart, &lowpart); ++ else ++ split_double (operands[1], &lowpart, &highpart); ++ ++ operands[3] = highpart; ++ operands[2] = gen_highpart (SImode, operands[0]); ++ operands[1] = lowpart; ++ operands[0] = gen_lowpart (SImode, operands[0]); ++} ++ ++ + /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ + + static bool +@@ -5124,4 +5165,12 @@ xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + reload_completed = 0; + } + ++/* Implement TARGET_LRA_P. */ ++ ++static bool ++xtensa_lra_p (void) ++{ ++ return TARGET_LRA; ++} ++ + #include "gt-xtensa.h" +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index 3a986fa1c..4b08ee5c1 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -228,7 +228,7 @@ along with GCC; see the file COPYING3. If not see + } + + /* 1 for registers not available across function calls. +- These must include the FIXED_REGISTERS and also any ++ These need not include the FIXED_REGISTERS but must any + registers that can be used without being saved. + The latter must include the registers where values are returned + and the register where structure-value addresses are passed. +@@ -241,10 +241,10 @@ along with GCC; see the file COPYING3. If not see + + Proper values are computed in TARGET_CONDITIONAL_REGISTER_USAGE. */ + +-#define CALL_USED_REGISTERS \ ++#define CALL_REALLY_USED_REGISTERS \ + { \ +- 1, 1, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 2, 2, 2, 2, \ +- 1, 1, 1, \ ++ 1, 0, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 2, 2, 2, 2, \ ++ 0, 0, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, \ + } +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index c18640b25..7c248ed2d 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -943,14 +943,9 @@ + because of offering further optimization opportunities. */ + if (register_operand (operands[0], DImode)) + { +- rtx lowpart, highpart; +- +- if (TARGET_BIG_ENDIAN) +- split_double (operands[1], &highpart, &lowpart); +- else +- split_double (operands[1], &lowpart, &highpart); +- emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), lowpart)); +- emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), highpart)); ++ xtensa_split_DI_reg_imm (operands); ++ emit_move_insn (operands[0], operands[1]); ++ emit_move_insn (operands[2], operands[3]); + DONE; + } + +@@ -984,6 +979,19 @@ + } + }) + ++(define_split ++ [(set (match_operand:DI 0 "register_operand") ++ (match_operand:DI 1 "const_int_operand"))] ++ "!TARGET_CONST16 && !TARGET_AUTO_LITPOOLS ++ && ! xtensa_split1_finished_p ()" ++ [(set (match_dup 0) ++ (match_dup 1)) ++ (set (match_dup 2) ++ (match_dup 3))] ++{ ++ xtensa_split_DI_reg_imm (operands); ++}) ++ + ;; 32-bit Integer moves + + (define_expand "movsi" +@@ -1020,6 +1028,18 @@ + (set_attr "mode" "SI") + (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")]) + ++(define_split ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operand:SI 1 "const_int_operand"))] ++ "!TARGET_CONST16 && !TARGET_AUTO_LITPOOLS ++ && ! xtensa_split1_finished_p () ++ && ! xtensa_simm12b (INTVAL (operands[1]))" ++ [(set (match_dup 0) ++ (match_dup 1))] ++{ ++ operands[1] = force_const_mem (SImode, operands[1]); ++}) ++ + (define_split + [(set (match_operand:SI 0 "register_operand") + (match_operand:SI 1 "constantpool_operand"))] +diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt +index 97aa44f92..106af4e30 100644 +--- a/gcc/config/xtensa/xtensa.opt ++++ b/gcc/config/xtensa/xtensa.opt +@@ -34,6 +34,10 @@ mextra-l32r-costs= + Target RejectNegative Joined UInteger Var(xtensa_extra_l32r_costs) Init(0) + Set extra memory access cost for L32R instruction, in clock-cycle units. + ++mlra ++Target Mask(LRA) ++Use LRA instead of reload (transitional). ++ + mtarget-align + Target + Automatically align branch targets to reduce branch penalties. +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0056-Make-register-A0-allocable-for-the-CALL0-ABI.patch b/patches/gcc10.3/gcc-xtensa-0056-Make-register-A0-allocable-for-the-CALL0-ABI.patch new file mode 100644 index 0000000..9f0ab74 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0056-Make-register-A0-allocable-for-the-CALL0-ABI.patch @@ -0,0 +1,66 @@ +From d262f47d2f1c0dec78b59b0b3eec26d17b60bf83 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 21 Oct 2022 11:58:35 +0900 +Subject: [PATCH] xtensa: Make register A0 allocable for the CALL0 ABI + +This patch offers an additional allocable register by RA for the CALL0 +ABI. + +> Register a0 holds the return address upon entry to a function, but +> unlike the windowed register ABI, it is not reserved for this purpose +> and may hold other values after the return address has been saved. + - Xtensa ISA Reference Manual, + 8.1.2 "CALL0 Register Usage and Stack Layout" [p.589] + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_conditional_register_usage): + Remove register A0 from FIXED_REGS if the CALL0 ABI. + (xtensa_expand_epilogue): Change to emit '(use (reg:SI A0_REG))' + unconditionally after restoring callee-saved registers for + sibling-call functions, in order to prevent misleading that + register A0 is free to use. +--- + gcc/config/xtensa/xtensa.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 658d19924..767264641 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -3506,15 +3506,14 @@ xtensa_expand_epilogue (bool sibcall_p) + if (xtensa_call_save_reg(regno)) + { + rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); +- rtx reg; + + offset -= UNITS_PER_WORD; +- emit_move_insn (reg = gen_rtx_REG (SImode, regno), ++ emit_move_insn (gen_rtx_REG (SImode, regno), + gen_frame_mem (SImode, x)); +- if (regno == A0_REG && sibcall_p) +- emit_use (reg); + } + } ++ if (sibcall_p) ++ emit_use (gen_rtx_REG (SImode, A0_REG)); + + if (cfun->machine->current_frame_size > 0) + { +@@ -5005,6 +5004,13 @@ xtensa_conditional_register_usage (void) + /* Remove hard FP register from the preferred reload registers set. */ + CLEAR_HARD_REG_BIT (reg_class_contents[(int)RL_REGS], + HARD_FRAME_POINTER_REGNUM); ++ ++ /* Register A0 holds the return address upon entry to a function ++ for the CALL0 ABI, but unlike the windowed register ABI, it is ++ not reserved for this purpose and may hold other values after ++ the return address has been saved. */ ++ if (!TARGET_WINDOWED_ABI) ++ fixed_regs[A0_REG] = 0; + } + + /* Map hard register number to register class */ +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0057-Fix-out-of-bounds-array-access-in-the-movdi-p.patch b/patches/gcc10.3/gcc-xtensa-0057-Fix-out-of-bounds-array-access-in-the-movdi-p.patch new file mode 100644 index 0000000..ec28936 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0057-Fix-out-of-bounds-array-access-in-the-movdi-p.patch @@ -0,0 +1,74 @@ +From 3092ce3d24acb6ca10d8c980fb49685832566ae4 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 26 Oct 2022 15:27:51 +0900 +Subject: [PATCH] xtensa: Fix out-of-bounds array access in the movdi pattern + +The following new warnings were introduced in the commit +4f3f0296acbb ("xtensa: Prepare the transition from Reload to LRA"): + +gcc/config/xtensa/xtensa.md:945:26: error: array subscript 3 is above + array bounds of 'rtx_def* [2]' [-Werror=array-bounds] + 945 | emit_move_insn (operands[2], operands[3]); +gcc/config/xtensa/xtensa.md:945:26: error: array subscript 2 is above + array bounds of 'rtx_def* [2]' [-Werror=array-bounds] + 945 | emit_move_insn (operands[2], operands[3]); + +From gcc/insn-emit.cc (generated by building): + +> /* ../../gcc/config/xtensa/xtensa.md:932 */ +> rtx +> gen_movdi (rtx operand0, +> rtx operand1) +> { +> rtx_insn *_val = 0; +> start_sequence (); +> { +> rtx operands[2]; // only 2 elements +> operands[0] = operand0; +> operands[1] = operand1; +> #define FAIL return (end_sequence (), _val) +> #define DONE return (_val = get_insns (), end_sequence (), _val) +> #line 936 "../../gcc/config/xtensa/xtensa.md" +> { +> if (CONSTANT_P (operands[1])) +> { +> /* Split in halves if 64-bit Const-to-Reg moves +> because of offering further optimization opportunities. */ +> if (register_operand (operands[0], DImode)) +> { +> xtensa_split_DI_reg_imm (operands); // out-of-bounds! +> emit_move_insn (operands[0], operands[1]); +> emit_move_insn (operands[2], operands[3]); // out-of-bounds! +> DONE; +> } + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (movdi): + Copy operands[0...1] to ops[0...3] and then use the latter before + calling xtensa_split_DI_reg_imm() and emitting insns. +--- + gcc/config/xtensa/xtensa.md | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 7c248ed2d..31e5f1b28 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -943,9 +943,10 @@ + because of offering further optimization opportunities. */ + if (register_operand (operands[0], DImode)) + { +- xtensa_split_DI_reg_imm (operands); +- emit_move_insn (operands[0], operands[1]); +- emit_move_insn (operands[2], operands[3]); ++ rtx ops[4] = { operands[0], operands[1] }; ++ xtensa_split_DI_reg_imm (ops); ++ emit_move_insn (ops[0], ops[1]); ++ emit_move_insn (ops[2], ops[3]); + DONE; + } + +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0058-Tabify-and-trim-trailing-spaces.patch b/patches/gcc10.3/gcc-xtensa-0058-Tabify-and-trim-trailing-spaces.patch new file mode 100644 index 0000000..6ae21e1 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0058-Tabify-and-trim-trailing-spaces.patch @@ -0,0 +1,576 @@ +From b326051c7a6d15e15b4410ef658d2e67c0a604af Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 27 Dec 2022 15:30:12 +0900 +Subject: [PATCH] xtensa: Tabify, and trim trailing spaces + +Cosmetic and no functional changes. + +gcc/ChangeLog: + + * config/xtensa/elf.h: Tabify, and trim trailing spaces. + * config/xtensa/linux.h: Likewise. + * config/xtensa/uclinux.h: Likewise. + * config/xtensa/xtensa.c: Likewise. + * config/xtensa/xtensa.h: Likewise. + * config/xtensa/xtensa.md: Likewise. +--- + gcc/config/xtensa/elf.h | 32 ++++++------ + gcc/config/xtensa/linux.h | 1 - + gcc/config/xtensa/uclinux.h | 1 - + gcc/config/xtensa/xtensa.c | 85 ++++++++++++++++---------------- + gcc/config/xtensa/xtensa.h | 6 +-- + gcc/config/xtensa/xtensa.md | 98 ++++++++++++++++++------------------- + 6 files changed, 110 insertions(+), 113 deletions(-) + +diff --git a/gcc/config/xtensa/elf.h b/gcc/config/xtensa/elf.h +index 6fd589fed..e0d1d7275 100644 +--- a/gcc/config/xtensa/elf.h ++++ b/gcc/config/xtensa/elf.h +@@ -57,7 +57,7 @@ along with GCC; see the file COPYING3. If not see + "crt1-sim%O%s crt0%O%s crti%O%s crtbegin%O%s _vectors%O%s" + + #undef ENDFILE_SPEC +-#define ENDFILE_SPEC "crtend%O%s crtn%O%s" ++#define ENDFILE_SPEC "crtend%O%s crtn%O%s" + + #undef LINK_SPEC + #define LINK_SPEC \ +@@ -82,19 +82,17 @@ along with GCC; see the file COPYING3. If not see + /* Search for headers in $tooldir/arch/include and for libraries and + startfiles in $tooldir/arch/lib. */ + #define GCC_DRIVER_HOST_INITIALIZATION \ +-do \ +-{ \ +- char *tooldir, *archdir; \ +- tooldir = concat (tooldir_base_prefix, spec_machine, \ +- dir_separator_str, NULL); \ +- if (!IS_ABSOLUTE_PATH (tooldir)) \ +- tooldir = concat (standard_exec_prefix, spec_machine, dir_separator_str, \ +- spec_version, dir_separator_str, tooldir, NULL); \ +- archdir = concat (tooldir, "arch", dir_separator_str, NULL); \ +- add_prefix (&startfile_prefixes, \ +- concat (archdir, "lib", dir_separator_str, NULL), \ +- "GCC", PREFIX_PRIORITY_LAST, 0, 1); \ +- add_prefix (&include_prefixes, archdir, \ +- "GCC", PREFIX_PRIORITY_LAST, 0, 0); \ +- } \ +-while (0) ++ do { \ ++ char *tooldir, *archdir; \ ++ tooldir = concat (tooldir_base_prefix, spec_machine, \ ++ dir_separator_str, NULL); \ ++ if (!IS_ABSOLUTE_PATH (tooldir)) \ ++ tooldir = concat (standard_exec_prefix, spec_machine, dir_separator_str, \ ++ spec_version, dir_separator_str, tooldir, NULL); \ ++ archdir = concat (tooldir, "arch", dir_separator_str, NULL); \ ++ add_prefix (&startfile_prefixes, \ ++ concat (archdir, "lib", dir_separator_str, NULL), \ ++ "GCC", PREFIX_PRIORITY_LAST, 0, 1); \ ++ add_prefix (&include_prefixes, archdir, \ ++ "GCC", PREFIX_PRIORITY_LAST, 0, 0); \ ++ } while (0) +diff --git a/gcc/config/xtensa/linux.h b/gcc/config/xtensa/linux.h +index 49796c97f..51ea065bd 100644 +--- a/gcc/config/xtensa/linux.h ++++ b/gcc/config/xtensa/linux.h +@@ -65,4 +65,3 @@ along with GCC; see the file COPYING3. If not see + #define XTENSA_ALWAYS_PIC 1 + + #undef DBX_REGISTER_NUMBER +- +diff --git a/gcc/config/xtensa/uclinux.h b/gcc/config/xtensa/uclinux.h +index 64ba26f39..51b6f2f95 100644 +--- a/gcc/config/xtensa/uclinux.h ++++ b/gcc/config/xtensa/uclinux.h +@@ -66,4 +66,3 @@ along with GCC; see the file COPYING3. If not see + #define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function + + #undef DBX_REGISTER_NUMBER +- +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 767264641..d3dafa4aa 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -176,7 +176,7 @@ static bool constantpool_address_p (const_rtx addr); + static bool xtensa_legitimate_constant_p (machine_mode, rtx); + static void xtensa_reorg (void); + static bool xtensa_can_use_doloop_p (const widest_int &, const widest_int &, +- unsigned int, bool); ++ unsigned int, bool); + static const char *xtensa_invalid_within_doloop (const rtx_insn *); + + static bool xtensa_member_type_forces_blk (const_tree, +@@ -2105,7 +2105,7 @@ xtensa_emit_loop_end (rtx_insn *insn, rtx *operands) + done = 1; + } + break; +- } ++ } + } + + output_asm_insn ("%1_LEND:", operands); +@@ -2305,7 +2305,7 @@ xtensa_tls_module_base (void) + xtensa_tls_module_base_symbol = + gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_"); + SYMBOL_REF_FLAGS (xtensa_tls_module_base_symbol) +- |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; ++ |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; + } + + return xtensa_tls_module_base_symbol; +@@ -3444,7 +3444,7 @@ xtensa_expand_prologue (void) + } + } + else +- { ++ { + insn = emit_insn (gen_movsi (hard_frame_pointer_rtx, + stack_pointer_rtx)); + if (!TARGET_WINDOWED_ABI) +@@ -3567,11 +3567,12 @@ xtensa_set_return_address (rtx address, rtx scratch) + gen_rtx_REG (SImode, A0_REG)); + rtx insn; + +- if (total_size > 1024) { +- emit_move_insn (scratch, GEN_INT (total_size - UNITS_PER_WORD)); +- emit_insn (gen_addsi3 (scratch, frame, scratch)); +- a0_addr = scratch; +- } ++ if (total_size > 1024) ++ { ++ emit_move_insn (scratch, GEN_INT (total_size - UNITS_PER_WORD)); ++ emit_insn (gen_addsi3 (scratch, frame, scratch)); ++ a0_addr = scratch; ++ } + + insn = emit_move_insn (gen_frame_mem (SImode, a0_addr), address); + RTX_FRAME_RELATED_P (insn) = 1; +@@ -3853,8 +3854,8 @@ xtensa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + /* Check if the argument is in registers: + + if ((AP).__va_ndx <= __MAX_ARGS_IN_REGISTERS * 4 +- && !must_pass_in_stack (type)) +- __array = (AP).__va_reg; */ ++ && !must_pass_in_stack (type)) ++ __array = (AP).__va_reg; */ + + array = create_tmp_var (ptr_type_node); + +@@ -4550,8 +4551,8 @@ xtensa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) + /* Worker function for TARGET_FUNCTION_VALUE. */ + + rtx +-xtensa_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED, +- bool outgoing) ++xtensa_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED, ++ bool outgoing) + { + return gen_rtx_REG ((INTEGRAL_TYPE_P (valtype) + && TYPE_PRECISION (valtype) < BITS_PER_WORD) +@@ -4754,7 +4755,7 @@ xtensa_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x) + + static bool + xtensa_can_use_doloop_p (const widest_int &, const widest_int &, +- unsigned int loop_depth, bool entered_at_top) ++ unsigned int loop_depth, bool entered_at_top) + { + /* Considering limitations in the hardware, only use doloop + for innermost loops which must be entered from the top. */ +@@ -4793,32 +4794,32 @@ hwloop_optimize (hwloop_info loop) + if (loop->depth > 1) + { + if (dump_file) +- fprintf (dump_file, ";; loop %d is not innermost\n", +- loop->loop_no); ++ fprintf (dump_file, ";; loop %d is not innermost\n", ++ loop->loop_no); + return false; + } + + if (!loop->incoming_dest) + { + if (dump_file) +- fprintf (dump_file, ";; loop %d has more than one entry\n", +- loop->loop_no); ++ fprintf (dump_file, ";; loop %d has more than one entry\n", ++ loop->loop_no); + return false; + } + + if (loop->incoming_dest != loop->head) + { + if (dump_file) +- fprintf (dump_file, ";; loop %d is not entered from head\n", +- loop->loop_no); ++ fprintf (dump_file, ";; loop %d is not entered from head\n", ++ loop->loop_no); + return false; + } + + if (loop->has_call || loop->has_asm) + { + if (dump_file) +- fprintf (dump_file, ";; loop %d has invalid insn\n", +- loop->loop_no); ++ fprintf (dump_file, ";; loop %d has invalid insn\n", ++ loop->loop_no); + return false; + } + +@@ -4826,8 +4827,8 @@ hwloop_optimize (hwloop_info loop) + if (loop->iter_reg_used || loop->iter_reg_used_outside) + { + if (dump_file) +- fprintf (dump_file, ";; loop %d uses iterator\n", +- loop->loop_no); ++ fprintf (dump_file, ";; loop %d uses iterator\n", ++ loop->loop_no); + return false; + } + +@@ -4839,8 +4840,8 @@ hwloop_optimize (hwloop_info loop) + if (!insn) + { + if (dump_file) +- fprintf (dump_file, ";; loop %d start_label not before loop_end\n", +- loop->loop_no); ++ fprintf (dump_file, ";; loop %d start_label not before loop_end\n", ++ loop->loop_no); + return false; + } + +@@ -4864,8 +4865,8 @@ hwloop_optimize (hwloop_info loop) + start_sequence (); + + insn = emit_insn (gen_zero_cost_loop_start (loop->iter_reg, +- loop->start_label, +- loop->iter_reg)); ++ loop->start_label, ++ loop->iter_reg)); + + seq = get_insns (); + +@@ -4881,21 +4882,21 @@ hwloop_optimize (hwloop_info loop) + seq = emit_label_before (gen_label_rtx (), seq); + new_bb = create_basic_block (seq, insn, entry_bb); + FOR_EACH_EDGE (e, ei, loop->incoming) +- { +- if (!(e->flags & EDGE_FALLTHRU)) +- redirect_edge_and_branch_force (e, new_bb); +- else +- redirect_edge_succ (e, new_bb); +- } ++ { ++ if (!(e->flags & EDGE_FALLTHRU)) ++ redirect_edge_and_branch_force (e, new_bb); ++ else ++ redirect_edge_succ (e, new_bb); ++ } + + make_edge (new_bb, loop->head, 0); + } + else + { + while (DEBUG_INSN_P (entry_after) +- || (NOTE_P (entry_after) ++ || (NOTE_P (entry_after) + && NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK)) +- entry_after = PREV_INSN (entry_after); ++ entry_after = PREV_INSN (entry_after); + + emit_insn_after (seq, entry_after); + } +@@ -4916,15 +4917,15 @@ hwloop_fail (hwloop_info loop) + rtx_insn *insn = loop->loop_end; + + emit_insn_before (gen_addsi3 (loop->iter_reg, +- loop->iter_reg, +- constm1_rtx), +- loop->loop_end); ++ loop->iter_reg, ++ constm1_rtx), ++ loop->loop_end); + + test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx); + insn = emit_jump_insn_before (gen_cbranchsi4 (test, +- loop->iter_reg, const0_rtx, +- loop->start_label), +- loop->loop_end); ++ loop->iter_reg, const0_rtx, ++ loop->start_label), ++ loop->loop_end); + + JUMP_LABEL (insn) = loop->start_label; + LABEL_NUSES (loop->start_label)++; +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index 4b08ee5c1..b5fec1cb3 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -63,7 +63,7 @@ along with GCC; see the file COPYING3. If not see + #define TARGET_S32C1I XCHAL_HAVE_S32C1I + #define TARGET_ABSOLUTE_LITERALS XSHAL_USE_ABSOLUTE_LITERALS + #define TARGET_THREADPTR XCHAL_HAVE_THREADPTR +-#define TARGET_LOOPS XCHAL_HAVE_LOOPS ++#define TARGET_LOOPS XCHAL_HAVE_LOOPS + #define TARGET_WINDOWED_ABI (XSHAL_ABI == XTHAL_ABI_WINDOWED) + #define TARGET_DEBUG XCHAL_HAVE_DEBUG + #define TARGET_L32R XCHAL_HAVE_L32R +@@ -297,7 +297,7 @@ extern int leaf_function; + + /* Coprocessor registers */ + #define BR_REG_FIRST 18 +-#define BR_REG_LAST 18 ++#define BR_REG_LAST 18 + #define BR_REG_NUM (BR_REG_LAST - BR_REG_FIRST + 1) + + /* 16 floating-point registers */ +@@ -743,7 +743,7 @@ typedef struct xtensa_args + + + /* Define output to appear before the constant pool. */ +-#define ASM_OUTPUT_POOL_PROLOGUE(FILE, FUNNAME, FUNDECL, SIZE) \ ++#define ASM_OUTPUT_POOL_PROLOGUE(FILE, FUNNAME, FUNDECL, SIZE) \ + do { \ + if ((SIZE) > 0 || !TARGET_WINDOWED_ABI) \ + { \ +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 31e5f1b28..08fb6f312 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -70,13 +70,13 @@ + + ;; This code iterator is for floating-point comparisons. + (define_code_iterator any_scc_sf [eq lt le uneq unlt unle unordered]) +-(define_code_attr scc_sf [(eq "oeq") (lt "olt") (le "ole") ++(define_code_attr scc_sf [(eq "oeq") (lt "olt") (le "ole") + (uneq "ueq") (unlt "ult") (unle "ule") + (unordered "un")]) + + ;; This iterator and attribute allow to combine most atomic operations. + (define_code_iterator ATOMIC [and ior xor plus minus mult]) +-(define_code_attr atomic [(and "and") (ior "ior") (xor "xor") ++(define_code_attr atomic [(and "and") (ior "ior") (xor "xor") + (plus "add") (minus "sub") (mult "nand")]) + + ;; This mode iterator allows the HI and QI patterns to be defined from +@@ -195,7 +195,7 @@ + + (define_insn "subsi3" + [(set (match_operand:SI 0 "register_operand" "=a") +- (minus:SI (match_operand:SI 1 "register_operand" "r") ++ (minus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")))] + "" + "sub\t%0, %1, %2" +@@ -434,7 +434,7 @@ + + (define_insn "si3" + [(set (match_operand:SI 0 "register_operand" "=a") +- (any_minmax:SI (match_operand:SI 1 "register_operand" "%r") ++ (any_minmax:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" "r")))] + "TARGET_MINMAX" + "\t%0, %1, %2" +@@ -507,7 +507,7 @@ + + (define_expand "bswapsi2" + [(set (match_operand:SI 0 "register_operand" "") +- (bswap:SI (match_operand:SI 1 "register_operand" "")))] ++ (bswap:SI (match_operand:SI 1 "register_operand" "")))] + "!optimize_debug && optimize > 1" + { + /* GIMPLE manual byte-swapping recognition is now activated. +@@ -1025,7 +1025,7 @@ + %v0s32i\t%1, %0 + rsr\t%0, ACCLO + wsr\t%1, ACCLO" +- [(set_attr "type" "move,move,move,load,store,store,move,move,move,move,move,load,load,store,rsr,wsr") ++ [(set_attr "type" "move,move,move,load,store,store,move,move,move,move,move,load,load,store,rsr,wsr") + (set_attr "mode" "SI") + (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")]) + +@@ -1175,7 +1175,7 @@ + "((register_operand (operands[0], SFmode) + || register_operand (operands[1], SFmode)) + && !(FP_REG_P (xt_true_regnum (operands[0])) +- && (constantpool_mem_p (operands[1]) || CONSTANT_P (operands[1]))))" ++ && (constantpool_mem_p (operands[1]) || CONSTANT_P (operands[1]))))" + "@ + mov.s\t%0, %1 + %v1lsi\t%0, %1 +@@ -1360,7 +1360,7 @@ + [(set (match_operand:SI 0 "register_operand" "=a,a") + (ashift:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:SI 2 "arith_operand" "J,r")))] +- "" ++ "" + "@ + slli\t%0, %1, %R2 + ssl\t%2\;sll\t%0, %1" +@@ -1946,13 +1946,13 @@ + + (define_insn "zero_cost_loop_start" + [(set (pc) +- (if_then_else (ne (match_operand:SI 2 "register_operand" "0") +- (const_int 1)) +- (label_ref (match_operand 1 "" "")) +- (pc))) ++ (if_then_else (ne (match_operand:SI 2 "register_operand" "0") ++ (const_int 1)) ++ (label_ref (match_operand 1 "" "")) ++ (pc))) + (set (match_operand:SI 0 "register_operand" "=a") +- (plus (match_dup 0) +- (const_int -1))) ++ (plus (match_dup 0) ++ (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_START)] + "TARGET_LOOPS && optimize" + "loop\t%0, %l1_LEND" +@@ -1962,13 +1962,13 @@ + + (define_insn "zero_cost_loop_end" + [(set (pc) +- (if_then_else (ne (match_operand:SI 2 "nonimmediate_operand" "0,0") +- (const_int 1)) +- (label_ref (match_operand 1 "" "")) +- (pc))) ++ (if_then_else (ne (match_operand:SI 2 "nonimmediate_operand" "0,0") ++ (const_int 1)) ++ (label_ref (match_operand 1 "" "")) ++ (pc))) + (set (match_operand:SI 0 "nonimmediate_operand" "=a,m") +- (plus (match_dup 0) +- (const_int -1))) ++ (plus (match_dup 0) ++ (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END) + (clobber (match_scratch:SI 3 "=X,&r"))] + "TARGET_LOOPS && optimize" +@@ -1979,13 +1979,13 @@ + + (define_insn "loop_end" + [(set (pc) +- (if_then_else (ne (match_operand:SI 2 "register_operand" "0") +- (const_int 1)) +- (label_ref (match_operand 1 "" "")) +- (pc))) ++ (if_then_else (ne (match_operand:SI 2 "register_operand" "0") ++ (const_int 1)) ++ (label_ref (match_operand 1 "" "")) ++ (pc))) + (set (match_operand:SI 0 "register_operand" "=a") +- (plus (match_dup 0) +- (const_int -1))) ++ (plus (match_dup 0) ++ (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END)] + "TARGET_LOOPS && optimize" + { +@@ -1998,13 +1998,13 @@ + + (define_split + [(set (pc) +- (if_then_else (ne (match_operand:SI 0 "nonimmediate_operand" "") +- (const_int 1)) +- (label_ref (match_operand 1 "" "")) +- (pc))) ++ (if_then_else (ne (match_operand:SI 0 "nonimmediate_operand" "") ++ (const_int 1)) ++ (label_ref (match_operand 1 "" "")) ++ (pc))) + (set (match_operand:SI 2 "nonimmediate_operand" "") +- (plus:SI (match_dup 0) +- (const_int -1))) ++ (plus:SI (match_dup 0) ++ (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END) + (clobber (match_scratch 3))] + "TARGET_LOOPS && optimize && reload_completed" +@@ -2020,7 +2020,7 @@ + emit_move_insn (operands[0], operands[3]); + test = gen_rtx_NE (VOIDmode, operands[3], const0_rtx); + emit_jump_insn (gen_cbranchsi4 (test, operands[3], +- const0_rtx, operands[1])); ++ const0_rtx, operands[1])); + } + else + { +@@ -2034,15 +2034,15 @@ + ; operand 1 is the label to jump to at the top of the loop + (define_expand "doloop_end" + [(parallel [(set (pc) (if_then_else +- (ne (match_operand:SI 0 "" "") +- (const_int 1)) +- (label_ref (match_operand 1 "" "")) +- (pc))) +- (set (match_dup 0) +- (plus:SI (match_dup 0) +- (const_int -1))) +- (unspec [(const_int 0)] UNSPEC_LSETUP_END) +- (clobber (match_dup 2))])] ; match_scratch ++ (ne (match_operand:SI 0 "" "") ++ (const_int 1)) ++ (label_ref (match_operand 1 "" "")) ++ (pc))) ++ (set (match_dup 0) ++ (plus:SI (match_dup 0) ++ (const_int -1))) ++ (unspec [(const_int 0)] UNSPEC_LSETUP_END) ++ (clobber (match_dup 2))])] ; match_scratch + "TARGET_LOOPS && optimize" + { + /* The loop optimizer doesn't check the predicates... */ +@@ -2281,8 +2281,8 @@ + + (define_insn "call_value_internal" + [(set (match_operand 0 "register_operand" "=a") +- (call (mem (match_operand:SI 1 "call_insn_operand" "nir")) +- (match_operand 2 "" "i")))] ++ (call (mem (match_operand:SI 1 "call_insn_operand" "nir")) ++ (match_operand 2 "" "i")))] + "!SIBLING_CALL_P (insn)" + { + return xtensa_emit_call (1, operands); +@@ -2387,9 +2387,9 @@ + + (define_expand "allocate_stack" + [(set (match_operand 0 "nonimmed_operand") +- (minus (reg A1_REG) (match_operand 1 "add_operand"))) ++ (minus (reg A1_REG) (match_operand 1 "add_operand"))) + (set (reg A1_REG) +- (minus (reg A1_REG) (match_dup 1)))] ++ (minus (reg A1_REG) (match_dup 1)))] + "TARGET_WINDOWED_ABI" + { + if (CONST_INT_P (operands[1])) +@@ -2514,7 +2514,7 @@ + + (define_expand "frame_blockage" + [(set (match_dup 0) +- (unspec:BLK [(match_dup 1)] UNSPEC_FRAME_BLOCKAGE))] ++ (unspec:BLK [(match_dup 1)] UNSPEC_FRAME_BLOCKAGE))] + "" + { + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); +@@ -2524,7 +2524,7 @@ + + (define_insn "*frame_blockage" + [(set (match_operand:BLK 0 "" "") +- (unspec:BLK [(match_operand:SI 1 "" "")] UNSPEC_FRAME_BLOCKAGE))] ++ (unspec:BLK [(match_operand:SI 1 "" "")] UNSPEC_FRAME_BLOCKAGE))] + "" + "" + [(set_attr "type" "nop") +@@ -2801,7 +2801,7 @@ + (define_expand "sync_new_" + [(set (match_operand:HQI 0 "register_operand") + (ATOMIC:HQI (match_operand:HQI 1 "memory_operand") +- (match_operand:HQI 2 "register_operand"))) ++ (match_operand:HQI 2 "register_operand"))) + (set (match_dup 1) (ATOMIC:HQI (match_dup 1) (match_dup 2)))] + "TARGET_S32C1I" + { +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0059-Clean-up-xtensa_expand_prologue.patch b/patches/gcc10.3/gcc-xtensa-0059-Clean-up-xtensa_expand_prologue.patch new file mode 100644 index 0000000..8809c85 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0059-Clean-up-xtensa_expand_prologue.patch @@ -0,0 +1,42 @@ +From b1f4a90f366a3a5775f30507e2b7800ad366dcdc Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 27 Dec 2022 15:30:12 +0900 +Subject: [PATCH] xtensa: Clean up xtensa_expand_prologue + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_expand_prologue): Modify to + exit the inspection loops as soon as the necessity of stack + pointer is found. +--- + gcc/config/xtensa/xtensa.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index d3dafa4aa..d4713cd8d 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -3362,12 +3362,18 @@ xtensa_expand_prologue (void) + ref; ref = DF_REF_NEXT_REG (ref)) + if (DF_REF_CLASS (ref) == DF_REF_REGULAR + && NONJUMP_INSN_P (DF_REF_INSN (ref))) +- stack_pointer_needed = true; ++ { ++ stack_pointer_needed = true; ++ break; ++ } + /* Check if callee-saved registers really need saving to the stack. */ + if (!stack_pointer_needed) + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) + if (xtensa_call_save_reg (regno)) +- stack_pointer_needed = true; ++ { ++ stack_pointer_needed = true; ++ break; ++ } + + cfun->machine->inhibit_logues_a1_adjusts = !stack_pointer_needed; + +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0060-Change-GP_RETURN-_REG_COUNT-to-GP_RETURN_-FIR.patch b/patches/gcc10.3/gcc-xtensa-0060-Change-GP_RETURN-_REG_COUNT-to-GP_RETURN_-FIR.patch new file mode 100644 index 0000000..789c1a3 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0060-Change-GP_RETURN-_REG_COUNT-to-GP_RETURN_-FIR.patch @@ -0,0 +1,71 @@ +From e3b1e99a383cbceb2c910a3a88392f37e58daeb2 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 27 Dec 2022 15:30:12 +0900 +Subject: [PATCH] xtensa: Change GP_RETURN{,_REG_COUNT} to + GP_RETURN_{FIRST,LAST} + +gcc/ChangeLog: + + * config/xtensa/xtensa.h (GP_RETURN, GP_RETURN_REG_COUNT): + Change to GP_RETURN_FIRST and GP_RETURN_LAST, respectively. + * config/xtensa/xtensa.c (xtensa_function_value, + xtensa_libcall_value, xtensa_function_value_regno_p): Ditto. +--- + gcc/config/xtensa/xtensa.c | 10 +++++----- + gcc/config/xtensa/xtensa.h | 4 ++-- + 2 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index d4713cd8d..054a44ea3 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4561,9 +4561,9 @@ xtensa_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED, + bool outgoing) + { + return gen_rtx_REG ((INTEGRAL_TYPE_P (valtype) +- && TYPE_PRECISION (valtype) < BITS_PER_WORD) +- ? SImode : TYPE_MODE (valtype), +- outgoing ? GP_OUTGOING_RETURN : GP_RETURN); ++ && TYPE_PRECISION (valtype) < BITS_PER_WORD) ++ ? SImode : TYPE_MODE (valtype), ++ outgoing ? GP_OUTGOING_RETURN : GP_RETURN_FIRST); + } + + /* Worker function for TARGET_LIBCALL_VALUE. */ +@@ -4573,7 +4573,7 @@ xtensa_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) + { + return gen_rtx_REG ((GET_MODE_CLASS (mode) == MODE_INT + && GET_MODE_SIZE (mode) < UNITS_PER_WORD) +- ? SImode : mode, GP_RETURN); ++ ? SImode : mode, GP_RETURN_FIRST); + } + + /* Worker function TARGET_FUNCTION_VALUE_REGNO_P. */ +@@ -4581,7 +4581,7 @@ xtensa_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) + static bool + xtensa_function_value_regno_p (const unsigned int regno) + { +- return (regno >= GP_RETURN && regno < GP_RETURN + GP_RETURN_REG_COUNT); ++ return IN_RANGE (regno, GP_RETURN_FIRST, GP_RETURN_LAST); + } + + /* The static chain is passed in memory. Provide rtx giving 'mem' +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index b5fec1cb3..e3f808c42 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -475,9 +475,9 @@ enum reg_class + + /* Symbolic macros for the registers used to return integer, floating + point, and values of coprocessor and user-defined modes. */ +-#define GP_RETURN (GP_REG_FIRST + 2 + WINDOW_SIZE) ++#define GP_RETURN_FIRST (GP_REG_FIRST + 2 + WINDOW_SIZE) ++#define GP_RETURN_LAST (GP_RETURN_FIRST + 3) + #define GP_OUTGOING_RETURN (GP_REG_FIRST + 2) +-#define GP_RETURN_REG_COUNT 4 + + /* Symbolic macros for the first/last argument registers. */ + #define GP_ARG_FIRST (GP_REG_FIRST + 2) +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0061-Generate-density-instructions-in-set_frame_pt.patch b/patches/gcc10.3/gcc-xtensa-0061-Generate-density-instructions-in-set_frame_pt.patch new file mode 100644 index 0000000..5b71081 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0061-Generate-density-instructions-in-set_frame_pt.patch @@ -0,0 +1,38 @@ +From 46b7c587fe47fa73811d7cd9b453ee32f7ba8ad8 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 27 Dec 2022 15:30:12 +0900 +Subject: [PATCH] xtensa: Generate density instructions in set_frame_ptr + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (set_frame_ptr): Fix to reflect + TARGET_DENSITY. +--- + gcc/config/xtensa/xtensa.md | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 08fb6f312..06fda8aa5 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2567,12 +2567,15 @@ + "" + { + if (frame_pointer_needed) +- return "mov\ta7, sp"; ++ return (TARGET_DENSITY ? "mov.n\ta7, sp" : "mov\ta7, sp"); + return ""; + } + [(set_attr "type" "move") + (set_attr "mode" "SI") +- (set_attr "length" "3")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY") ++ (const_int 2) ++ (const_int 3)))]) + + ;; Post-reload splitter to remove fp assignment when it's not needed. + (define_split +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0062-use-define_c_enums-instead-of-define_constant.patch b/patches/gcc10.3/gcc-xtensa-0062-use-define_c_enums-instead-of-define_constant.patch new file mode 100644 index 0000000..57976f3 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0062-use-define_c_enums-instead-of-define_constant.patch @@ -0,0 +1,77 @@ +From 101c49b504fb567227291a381ada09273d8ec4a7 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Fri, 23 Dec 2022 12:17:09 -0800 +Subject: [PATCH] gcc: xtensa: use define_c_enums instead of + define_constants + +This improves RTL dumps readability. No functional changes. + +gcc/ + * config/xtensa/xtensa.md (unspec): Extract UNSPEC_* constants + into this enum. + (unspecv): Extract UNSPECV_* constants into this enum. +--- + gcc/config/xtensa/xtensa.md | 46 ++++++++++++++++++++----------------- + 1 file changed, 25 insertions(+), 21 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 06fda8aa5..a2cfb3df7 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -25,28 +25,32 @@ + (A7_REG 7) + (A8_REG 8) + (A9_REG 9) ++]) ++ ++(define_c_enum "unspec" [ ++ UNSPEC_NOP ++ UNSPEC_PLT ++ UNSPEC_RET_ADDR ++ UNSPEC_TPOFF ++ UNSPEC_DTPOFF ++ UNSPEC_TLS_FUNC ++ UNSPEC_TLS_ARG ++ UNSPEC_TLS_CALL ++ UNSPEC_TP ++ UNSPEC_MEMW ++ UNSPEC_LSETUP_START ++ UNSPEC_LSETUP_END ++ UNSPEC_FRAME_BLOCKAGE ++]) + +- (UNSPEC_NOP 2) +- (UNSPEC_PLT 3) +- (UNSPEC_RET_ADDR 4) +- (UNSPEC_TPOFF 5) +- (UNSPEC_DTPOFF 6) +- (UNSPEC_TLS_FUNC 7) +- (UNSPEC_TLS_ARG 8) +- (UNSPEC_TLS_CALL 9) +- (UNSPEC_TP 10) +- (UNSPEC_MEMW 11) +- (UNSPEC_LSETUP_START 12) +- (UNSPEC_LSETUP_END 13) +- (UNSPEC_FRAME_BLOCKAGE 14) +- +- (UNSPECV_SET_FP 1) +- (UNSPECV_ENTRY 2) +- (UNSPECV_S32RI 4) +- (UNSPECV_S32C1I 5) +- (UNSPECV_EH_RETURN 6) +- (UNSPECV_SET_TP 7) +- (UNSPECV_BLOCKAGE 8) ++(define_c_enum "unspecv" [ ++ UNSPECV_SET_FP ++ UNSPECV_ENTRY ++ UNSPECV_S32RI ++ UNSPECV_S32C1I ++ UNSPECV_EH_RETURN ++ UNSPECV_SET_TP ++ UNSPECV_BLOCKAGE + ]) + + ;; This code iterator allows signed and unsigned widening multiplications +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0063-Check-DF-availability-before-use.patch b/patches/gcc10.3/gcc-xtensa-0063-Check-DF-availability-before-use.patch new file mode 100644 index 0000000..afa8d82 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0063-Check-DF-availability-before-use.patch @@ -0,0 +1,31 @@ +From 4b938a83c19e3e7dc71b407e2f78f2ccbc57b742 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 29 Dec 2022 21:14:33 +0900 +Subject: [PATCH] xtensa: Check DF availability before use + +Perhaps no problem, but for safety. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_expand_prologue): Fix to check + DF availability before use of DF_* macros. +--- + gcc/config/xtensa/xtensa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 054a44ea3..8f748efa4 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -3357,7 +3357,7 @@ xtensa_expand_prologue (void) + || crtl->calls_eh_return; + + /* Check if the function body really needs the stack pointer. */ +- if (!stack_pointer_needed) ++ if (!stack_pointer_needed && df) + for (ref = DF_REG_USE_CHAIN (A1_REG); + ref; ref = DF_REF_NEXT_REG (ref)) + if (DF_REF_CLASS (ref) == DF_REF_REGULAR +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0064-use-GP_RETURN_-instead-of-magic-constant.patch b/patches/gcc10.3/gcc-xtensa-0064-use-GP_RETURN_-instead-of-magic-constant.patch new file mode 100644 index 0000000..93f3399 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0064-use-GP_RETURN_-instead-of-magic-constant.patch @@ -0,0 +1,28 @@ +From 4433ebb185be476704937b3a8b3fe5e568c22712 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Wed, 28 Dec 2022 11:27:21 -0800 +Subject: [PATCH] gcc: xtensa: use GP_RETURN_* instead of magic constant + +gcc/ + * config/xtensa/xtensa.c (xtensa_return_in_memory): Use + GP_RETURN_* instead of magic constant. +--- + gcc/config/xtensa/xtensa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 8f748efa4..ffd36217d 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4551,7 +4551,7 @@ static bool + xtensa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) + { + return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type) +- > 4 * UNITS_PER_WORD); ++ > (unsigned) (GP_RETURN_LAST - GP_RETURN_FIRST + 1) * UNITS_PER_WORD); + } + + /* Worker function for TARGET_FUNCTION_VALUE. */ +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0065-Optimize-stack-frame-adjustment-more.patch b/patches/gcc10.3/gcc-xtensa-0065-Optimize-stack-frame-adjustment-more.patch new file mode 100644 index 0000000..2964a2b --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0065-Optimize-stack-frame-adjustment-more.patch @@ -0,0 +1,245 @@ +From 385f165d92e68a58206005e9652e8e880a2ed2d7 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 28 Dec 2022 22:50:52 +0900 +Subject: [PATCH] xtensa: Optimize stack frame adjustment more + +This patch introduces a convenient helper function for integer immediate +addition with scratch register as needed, that splits and emits either +up to two ADDI/ADDMI machine instructions or an addition by register +following an integer immediate load (which may later be transformed by +constantsynth). + +By using the helper function, it makes stack frame adjustment logic +simplified and instruction count less in some cases. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c + (xtensa_split_imm_two_addends, xtensa_emit_add_imm): + New helper functions. + (xtensa_set_return_address, xtensa_output_mi_thunk): + Change to use the helper function. + (xtensa_emit_adjust_stack_ptr): Ditto. + And also change to try reusing the content of scratch register + A9 if the register is not modified in the function body. +--- + gcc/config/xtensa/xtensa.c | 151 ++++++++++++++++++++++++++----------- + 1 file changed, 106 insertions(+), 45 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index ffd36217d..b05ae9045 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -104,6 +104,7 @@ struct GTY(()) machine_function + bool frame_laid_out; + bool epilogue_done; + bool inhibit_logues_a1_adjusts; ++ rtx last_logues_a9_content; + }; + + /* Vector, indexed by hard register number, which contains 1 for a +@@ -2509,6 +2510,86 @@ xtensa_split_DI_reg_imm (rtx *operands) + } + + ++/* Try to split an integer value into what are suitable for two consecutive ++ immediate addition instructions, ADDI or ADDMI. */ ++ ++static bool ++xtensa_split_imm_two_addends (HOST_WIDE_INT imm, HOST_WIDE_INT v[2]) ++{ ++ HOST_WIDE_INT v0, v1; ++ ++ if (imm < -32768) ++ v0 = -32768, v1 = imm + 32768; ++ else if (imm > 32512) ++ v0 = 32512, v1 = imm - 32512; ++ else if (TARGET_DENSITY && xtensa_simm12b (imm)) ++ /* A pair of MOVI(.N) and ADD.N is one or two bytes less than two ++ immediate additions if TARGET_DENSITY. */ ++ return false; ++ else ++ v0 = (imm + 128) & ~255L, v1 = imm - v0; ++ ++ if (xtensa_simm8 (v1) || xtensa_simm8x256 (v1)) ++ { ++ v[0] = v0, v[1] = v1; ++ return true; ++ } ++ ++ return false; ++} ++ ++ ++/* Helper function for integer immediate addition with scratch register ++ as needed, that splits and emits either up to two ADDI/ADDMI machine ++ instructions or an addition by register following an integer immediate ++ load (which may later be transformed by constantsynth). ++ ++ If 'scratch' is NULL_RTX but still needed, a new pseudo-register will ++ be allocated. Thus, after the reload/LRA pass, the specified scratch ++ register must be a hard one. */ ++ ++static bool ++xtensa_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch, ++ bool need_note) ++{ ++ bool retval = false; ++ HOST_WIDE_INT v[2]; ++ rtx_insn *insn; ++ ++ if (imm == 0) ++ return false; ++ ++ if (xtensa_simm8 (imm) || xtensa_simm8x256 (imm)) ++ insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm))); ++ else if (xtensa_split_imm_two_addends (imm, v)) ++ { ++ if (!scratch) ++ scratch = gen_reg_rtx (SImode); ++ emit_insn (gen_addsi3 (scratch, src, GEN_INT (v[0]))); ++ insn = emit_insn (gen_addsi3 (dst, scratch, GEN_INT (v[1]))); ++ } ++ else ++ { ++ if (scratch) ++ emit_move_insn (scratch, GEN_INT (imm)); ++ else ++ scratch = force_reg (SImode, GEN_INT (imm)); ++ retval = true; ++ insn = emit_insn (gen_addsi3 (dst, src, scratch)); ++ } ++ ++ if (need_note) ++ { ++ rtx note_rtx = gen_rtx_SET (dst, plus_constant (Pmode, src, imm)); ++ ++ RTX_FRAME_RELATED_P (insn) = 1; ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); ++ } ++ ++ return retval; ++} ++ ++ + /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ + + static bool +@@ -3280,41 +3361,33 @@ xtensa_initial_elimination_offset (int from, int to ATTRIBUTE_UNUSED) + static void + xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int flags) + { ++ rtx src, scratch; + rtx_insn *insn; +- rtx ptr = (flags & ADJUST_SP_FRAME_PTR) ? hard_frame_pointer_rtx +- : stack_pointer_rtx; + + if (cfun->machine->inhibit_logues_a1_adjusts) + return; + +- if (xtensa_simm8 (offset) +- || xtensa_simm8x256 (offset)) +- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, GEN_INT (offset))); +- else +- { +- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); ++ src = (flags & ADJUST_SP_FRAME_PTR) ++ ? hard_frame_pointer_rtx : stack_pointer_rtx; ++ scratch = gen_rtx_REG (Pmode, A9_REG); + +- if (offset < 0) +- { +- emit_move_insn (tmp_reg, GEN_INT (-offset)); +- insn = emit_insn (gen_subsi3 (stack_pointer_rtx, ptr, tmp_reg)); +- } +- else +- { +- emit_move_insn (tmp_reg, GEN_INT (offset)); +- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, tmp_reg)); +- } +- } +- +- if (flags & ADJUST_SP_NEED_NOTE) ++ if (df && DF_REG_DEF_COUNT (A9_REG) == 0 ++ && cfun->machine->last_logues_a9_content ++ && -INTVAL (cfun->machine->last_logues_a9_content) == offset) + { +- rtx note_rtx = gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- offset)); ++ insn = emit_insn (gen_subsi3 (stack_pointer_rtx, src, scratch)); ++ if (flags & ADJUST_SP_NEED_NOTE) ++ { ++ rtx note_rtx = gen_rtx_SET (stack_pointer_rtx, ++ plus_constant (Pmode, src, offset)); + +- RTX_FRAME_RELATED_P (insn) = 1; +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); ++ } + } ++ else if (xtensa_emit_add_imm (stack_pointer_rtx, src, offset, scratch, ++ (flags & ADJUST_SP_NEED_NOTE))) ++ cfun->machine->last_logues_a9_content = GEN_INT (offset); + } + + /* minimum frame = reg save area (4 words) plus static chain (1 word) +@@ -3342,8 +3415,9 @@ xtensa_expand_prologue (void) + /* Use a8 as a temporary since a0-a7 may be live. */ + rtx tmp_reg = gen_rtx_REG (Pmode, A8_REG); + emit_insn (gen_entry (GEN_INT (MIN_FRAME_SIZE))); +- emit_move_insn (tmp_reg, GEN_INT (total_size - MIN_FRAME_SIZE)); +- emit_insn (gen_subsi3 (tmp_reg, stack_pointer_rtx, tmp_reg)); ++ xtensa_emit_add_imm (tmp_reg, stack_pointer_rtx, ++ MIN_FRAME_SIZE - total_size, ++ tmp_reg, false); + insn = emit_insn (gen_movsi (stack_pointer_rtx, tmp_reg)); + } + } +@@ -3575,8 +3649,8 @@ xtensa_set_return_address (rtx address, rtx scratch) + + if (total_size > 1024) + { +- emit_move_insn (scratch, GEN_INT (total_size - UNITS_PER_WORD)); +- emit_insn (gen_addsi3 (scratch, frame, scratch)); ++ xtensa_emit_add_imm (scratch, frame, total_size - UNITS_PER_WORD, ++ scratch, false); + a0_addr = scratch; + } + +@@ -5125,15 +5199,7 @@ xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + this_rtx = gen_rtx_REG (Pmode, A0_REG + this_reg_no); + + if (delta) +- { +- if (xtensa_simm8 (delta)) +- emit_insn (gen_addsi3 (this_rtx, this_rtx, GEN_INT (delta))); +- else +- { +- emit_move_insn (temp0, GEN_INT (delta)); +- emit_insn (gen_addsi3 (this_rtx, this_rtx, temp0)); +- } +- } ++ xtensa_emit_add_imm (this_rtx, this_rtx, delta, temp0, false); + + if (vcall_offset) + { +@@ -5143,13 +5209,8 @@ xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + emit_move_insn (temp0, gen_rtx_MEM (Pmode, this_rtx)); + if (xtensa_uimm8x4 (vcall_offset)) + addr = plus_constant (Pmode, temp0, vcall_offset); +- else if (xtensa_simm8 (vcall_offset)) +- emit_insn (gen_addsi3 (temp1, temp0, GEN_INT (vcall_offset))); + else +- { +- emit_move_insn (temp1, GEN_INT (vcall_offset)); +- emit_insn (gen_addsi3 (temp1, temp0, temp1)); +- } ++ xtensa_emit_add_imm (temp1, temp0, vcall_offset, temp1, false); + emit_move_insn (temp1, gen_rtx_MEM (Pmode, addr)); + emit_insn (gen_add2_insn (this_rtx, temp1)); + } +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0066-Optimize-bitwise-splicing-operation.patch b/patches/gcc10.3/gcc-xtensa-0066-Optimize-bitwise-splicing-operation.patch new file mode 100644 index 0000000..193de88 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0066-Optimize-bitwise-splicing-operation.patch @@ -0,0 +1,84 @@ +From fef84d1ba0cb5956687f776b22f51d9fa5e7d176 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 6 Jan 2023 14:08:06 +0900 +Subject: [PATCH] xtensa: Optimize bitwise splicing operation + +This patch optimizes the operation of cutting and splicing two register +values at a specified bit position, in other words, combining (bitwise +ORing) bits 0 through (C-1) of the register with bits C through 31 +of the other, where C is the specified immediate integer 17 through 31. + +This typically applies to signed copy of floating point number and +__builtin_return_address() if the windowed register ABI, and saves one +instruction compared to four shifts and a bitwise OR by the default RTL +combination pass. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*splice_bits): + New insn_and_split pattern. +--- + gcc/config/xtensa/xtensa.md | 47 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 47 insertions(+) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index a2cfb3df7..ba1c044c4 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -746,6 +746,53 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + ++(define_insn_and_split "*splice_bits" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 3 "const_int_operand" "i")) ++ (and:SI (match_operand:SI 2 "register_operand" "r") ++ (match_operand:SI 4 "const_int_operand" "i"))))] ++ ++ "!optimize_debug && optimize ++ && INTVAL (operands[3]) + INTVAL (operands[4]) == -1 ++ && (exact_log2 (INTVAL (operands[3]) + 1) > 16 ++ || exact_log2 (INTVAL (operands[4]) + 1) > 16)" ++ "#" ++ "&& can_create_pseudo_p ()" ++ [(set (match_dup 5) ++ (ashift:SI (match_dup 1) ++ (match_dup 4))) ++ (set (match_dup 6) ++ (lshiftrt:SI (match_dup 2) ++ (match_dup 3))) ++ (set (match_dup 0) ++ (ior:SI (lshiftrt:SI (match_dup 5) ++ (match_dup 4)) ++ (ashift:SI (match_dup 6) ++ (match_dup 3))))] ++{ ++ int shift; ++ if (INTVAL (operands[3]) < 0) ++ { ++ rtx x; ++ x = operands[1], operands[1] = operands[2], operands[2] = x; ++ x = operands[3], operands[3] = operands[4], operands[4] = x; ++ } ++ shift = floor_log2 (INTVAL (operands[3]) + 1); ++ operands[3] = GEN_INT (shift); ++ operands[4] = GEN_INT (32 - shift); ++ operands[5] = gen_reg_rtx (SImode); ++ operands[6] = gen_reg_rtx (SImode); ++} ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI") ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && (INTVAL (operands[3]) == 0x7FFFFFFF ++ || INTVAL (operands[4]) == 0x7FFFFFFF)") ++ (const_int 11) ++ (const_int 12)))]) ++ + + ;; Zero-extend instructions. + +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0067-Make-instruction-cost-estimation-for-size-mor.patch b/patches/gcc10.3/gcc-xtensa-0067-Make-instruction-cost-estimation-for-size-mor.patch new file mode 100644 index 0000000..a146e25 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0067-Make-instruction-cost-estimation-for-size-mor.patch @@ -0,0 +1,85 @@ +From 32f3873104faa4323d7db85262145b7895824e4a Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Tue, 10 Jan 2023 01:44:09 +0900 +Subject: [PATCH] xtensa: Make instruction cost estimation for size more + accurate + +Until now, we applied COSTS_N_INSNS() (multiplying by 4) after dividing +the instruction length by 3, so we couldn't express the difference less +than modulo 3 in insn cost for size (e.g. 11 Bytes and 12 bytes cost the +same). + +This patch fixes that. + +;; 2 bytes +addi.n a2, a2, -1 ; cost 3 + +;; 3 bytes +addmi a2, a2, 1024 ; cost 4 + +;; 4 bytes +movi.n a3, 80 ; cost 5 +bnez.n a2, a3, .L4 + +;; 5 bytes +srli a2, a3, 1 ; cost 7 +add.n a2, a2, a2 + +;; 6 bytes +ssai 8 ; cost 8 +src a4, a2, a3 + +:: 3 + 4 bytes +l32r a2, .L5 ; cost 9 + +;; 11 bytes ; cost 15 +;; 12 bytes ; cost 16 + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_insn_cost): + Let insn cost for size be obtained by applying COSTS_N_INSNS() + to instruction length and then dividing by 3. +--- + gcc/config/xtensa/xtensa.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index b05ae9045..e0adf069e 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -4554,13 +4554,15 @@ xtensa_insn_cost (rtx_insn *insn, bool speed) + { + if (!(recog_memoized (insn) < 0)) + { +- int len = get_attr_length (insn), n = (len + 2) / 3; ++ int len = get_attr_length (insn); + + if (len == 0) + return COSTS_N_INSNS (0); + + if (speed) /* For speed cost. */ + { ++ int n = (len + 2) / 3; ++ + /* "L32R" may be particular slow (implementation-dependent). */ + if (xtensa_is_insn_L32R_p (insn)) + return COSTS_N_INSNS (1 + xtensa_extra_l32r_costs); +@@ -4607,10 +4609,11 @@ xtensa_insn_cost (rtx_insn *insn, bool speed) + { + /* "L32R" itself plus constant in litpool. */ + if (xtensa_is_insn_L32R_p (insn)) +- return COSTS_N_INSNS (2) + 1; ++ len = 3 + 4; + +- /* Consider ".n" short instructions. */ +- return COSTS_N_INSNS (n) - (n * 3 - len); ++ /* Consider fractional instruction length (for example, ".n" ++ short instructions or "L32R" litpool constants. */ ++ return (COSTS_N_INSNS (len) + 1) / 3; + } + } + } +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0068-Tune-btrue-insn-pattern.patch b/patches/gcc10.3/gcc-xtensa-0068-Tune-btrue-insn-pattern.patch new file mode 100644 index 0000000..40caadd --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0068-Tune-btrue-insn-pattern.patch @@ -0,0 +1,55 @@ +From 5fe437012eb770e8fc2d2d9f859110e5cc707fc5 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 11 Jan 2023 19:26:03 +0900 +Subject: [PATCH] xtensa: Tune "*btrue" insn pattern + +This branch instruction has short encoding if EQ/NE comparison against +immediate zero when the Code Density Option is enabled, but its "length" +attribute was only for normal encoding. This patch fixes it. + +This patch also prevents undesireable replacement the comparison immediate +zero of the instruction (short encoding, as mentioned above) with a +register that has value of zero (normal encoding) by the postreload pass. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (*btrue): + Correct value of the attribute "length" that depends on + TARGET_DENSITY and operands, and add '?' character to the register + constraint of the compared operand. +--- + gcc/config/xtensa/xtensa.md | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index ba1c044c4..4b0b74368 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1684,7 +1684,7 @@ + [(set (pc) + (if_then_else (match_operator 3 "branch_operator" + [(match_operand:SI 0 "register_operand" "r,r") +- (match_operand:SI 1 "branch_operand" "K,r")]) ++ (match_operand:SI 1 "branch_operand" "K,?r")]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" +@@ -1693,7 +1693,14 @@ + } + [(set_attr "type" "jump,jump") + (set_attr "mode" "none") +- (set_attr "length" "3,3")]) ++ (set (attr "length") ++ (if_then_else (match_test "TARGET_DENSITY ++ && CONST_INT_P (operands[1]) ++ && INTVAL (operands[1]) == 0 ++ && (GET_CODE (operands[3]) == EQ ++ || GET_CODE (operands[3]) == NE)") ++ (const_int 2) ++ (const_int 3)))]) + + (define_insn "*ubtrue" + [(set (pc) +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0069-Optimize-ctzsi2-and-ffssi2-a-bit.patch b/patches/gcc10.3/gcc-xtensa-0069-Optimize-ctzsi2-and-ffssi2-a-bit.patch new file mode 100644 index 0000000..017244c --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0069-Optimize-ctzsi2-and-ffssi2-a-bit.patch @@ -0,0 +1,44 @@ +From c7111ec38b7a1825f759804b021afc7b7b5b7491 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 12 Jan 2023 10:01:01 +0900 +Subject: [PATCH] xtensa: Optimize ctzsi2 and ffssi2 a bit + +This patch saves one byte when the Code Density Option is enabled, + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (ctzsi2, ffssi2): + Rearrange the emitting codes. +--- + gcc/config/xtensa/xtensa.md | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 4b0b74368..4f1e8fd13 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -477,8 +477,8 @@ + emit_insn (gen_negsi2 (temp, operands[1])); + emit_insn (gen_andsi3 (temp, temp, operands[1])); + emit_insn (gen_clzsi2 (temp, temp)); +- emit_insn (gen_negsi2 (temp, temp)); +- emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (31))); ++ emit_move_insn (operands[0], GEN_INT (31)); ++ emit_insn (gen_subsi3 (operands[0], operands[0], temp)); + DONE; + }) + +@@ -491,8 +491,8 @@ + emit_insn (gen_negsi2 (temp, operands[1])); + emit_insn (gen_andsi3 (temp, temp, operands[1])); + emit_insn (gen_clzsi2 (temp, temp)); +- emit_insn (gen_negsi2 (temp, temp)); +- emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (32))); ++ emit_move_insn (operands[0], GEN_INT (32)); ++ emit_insn (gen_subsi3 (operands[0], operands[0], temp)); + DONE; + }) + +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0070-Remove-old-broken-tweak-for-leaf-function.patch b/patches/gcc10.3/gcc-xtensa-0070-Remove-old-broken-tweak-for-leaf-function.patch new file mode 100644 index 0000000..e182227 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0070-Remove-old-broken-tweak-for-leaf-function.patch @@ -0,0 +1,218 @@ +From 70feb8960c923e914f1e0bf8e7eae96300c708a2 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sat, 14 Jan 2023 04:31:46 +0900 +Subject: [PATCH] xtensa: Remove old broken tweak for leaf function + +In the before-IRA era, ORDER_REGS_FOR_LOCAL_ALLOC was called for each +function in Xtensa, and there was register allocation table reordering +for leaf functions to compensate for the poor performance of local-alloc. + +Today the adjustment hook is still called via its alternative +ADJUST_REG_ALLOC_ORDER, but it is only called once at the start of the IRA, +and leaf_function_p() erroneously returns true and also gives no argument +count. + +That straightforwardly misleads register allocation that all functions are +always leaves with no arguments, which leads to inefficiencies in allocation +results. + +Fortunately, IRA is smart enough than local-alloc to not need such assistance. + +This patch does away with the antiquated by removing the wreckage that no +longer works. + +gcc/ChangeLog: + + * config/xtensa/xtensa-protos.h (order_regs_for_local_alloc): + Rename to xtensa_adjust_reg_alloc_order. + * config/xtensa/xtensa.c (xtensa_adjust_reg_alloc_order): + Ditto. And also remove code to reorder register numbers for + leaf functions, rename the tables, and adjust the allocation + order for the call0 ABI to use register A0 more. + (xtensa_leaf_regs): Remove. + * config/xtensa/xtensa.h (REG_ALLOC_ORDER): Cosmetics. + (order_regs_for_local_alloc): Rename as the above. + (LEAF_REGISTERS, LEAF_REG_REMAP, leaf_function): Remove. +--- + gcc/config/xtensa/xtensa-protos.h | 2 +- + gcc/config/xtensa/xtensa.c | 77 +++++++------------------------ + gcc/config/xtensa/xtensa.h | 51 ++++++-------------- + 3 files changed, 31 insertions(+), 99 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h +index 63b147a90..39d5a5825 100644 +--- a/gcc/config/xtensa/xtensa-protos.h ++++ b/gcc/config/xtensa/xtensa-protos.h +@@ -78,7 +78,7 @@ extern long compute_frame_size (poly_int64); + extern bool xtensa_use_return_instruction_p (void); + extern void xtensa_expand_prologue (void); + extern void xtensa_expand_epilogue (bool); +-extern void order_regs_for_local_alloc (void); ++extern void xtensa_adjust_reg_alloc_order (void); + extern enum reg_class xtensa_regno_to_class (int regno); + extern HOST_WIDE_INT xtensa_initial_elimination_offset (int from, int to); + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index e0adf069e..db7ac3599 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -107,18 +107,6 @@ struct GTY(()) machine_function + rtx last_logues_a9_content; + }; + +-/* Vector, indexed by hard register number, which contains 1 for a +- register that is allowable in a candidate for leaf function +- treatment. */ +- +-const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] = +-{ +- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +- 1, 1, 1, +- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +- 1 +-}; +- + static void xtensa_option_override (void); + static enum internal_test map_test_to_internal_test (enum rtx_code); + static rtx gen_int_relational (enum rtx_code, rtx, rtx); +@@ -4175,58 +4163,25 @@ xtensa_secondary_reload (bool in_p, rtx x, reg_class_t rclass, + return NO_REGS; + } + ++/* Called once at the start of IRA, by ADJUST_REG_ALLOC_ORDER. */ + + void +-order_regs_for_local_alloc (void) ++xtensa_adjust_reg_alloc_order (void) + { +- if (!leaf_function_p ()) +- { +- static const int reg_nonleaf_alloc_order[FIRST_PSEUDO_REGISTER] = +- REG_ALLOC_ORDER; +- static const int reg_nonleaf_alloc_order_call0[FIRST_PSEUDO_REGISTER] = +- { +- 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 12, 13, 14, 15, +- 18, +- 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, +- 0, 1, 16, 17, +- 35, +- }; +- +- memcpy (reg_alloc_order, TARGET_WINDOWED_ABI ? +- reg_nonleaf_alloc_order : reg_nonleaf_alloc_order_call0, +- FIRST_PSEUDO_REGISTER * sizeof (int)); +- } +- else +- { +- int i, num_arg_regs; +- int nxt = 0; +- +- /* Use the AR registers in increasing order (skipping a0 and a1) +- but save the incoming argument registers for a last resort. */ +- num_arg_regs = crtl->args.info.arg_words; +- if (num_arg_regs > MAX_ARGS_IN_REGISTERS) +- num_arg_regs = MAX_ARGS_IN_REGISTERS; +- for (i = GP_ARG_FIRST; i < 16 - num_arg_regs; i++) +- reg_alloc_order[nxt++] = i + num_arg_regs; +- for (i = 0; i < num_arg_regs; i++) +- reg_alloc_order[nxt++] = GP_ARG_FIRST + i; +- +- /* List the coprocessor registers in order. */ +- for (i = 0; i < BR_REG_NUM; i++) +- reg_alloc_order[nxt++] = BR_REG_FIRST + i; +- +- /* List the FP registers in order for now. */ +- for (i = 0; i < 16; i++) +- reg_alloc_order[nxt++] = FP_REG_FIRST + i; +- +- /* GCC requires that we list *all* the registers.... */ +- reg_alloc_order[nxt++] = 0; /* a0 = return address */ +- reg_alloc_order[nxt++] = 1; /* a1 = stack pointer */ +- reg_alloc_order[nxt++] = 16; /* pseudo frame pointer */ +- reg_alloc_order[nxt++] = 17; /* pseudo arg pointer */ +- +- reg_alloc_order[nxt++] = ACC_REG_FIRST; /* MAC16 accumulator */ +- } ++ static const int reg_windowed_alloc_order[FIRST_PSEUDO_REGISTER] = ++ REG_ALLOC_ORDER; ++ static const int reg_call0_alloc_order[FIRST_PSEUDO_REGISTER] = ++ { ++ 9, 10, 11, 7, 6, 5, 4, 3, 2, 8, 0, 12, 13, 14, 15, ++ 18, ++ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, ++ 1, 16, 17, ++ 35, ++ }; ++ ++ memcpy (reg_alloc_order, TARGET_WINDOWED_ABI ? ++ reg_windowed_alloc_order : reg_call0_alloc_order, ++ FIRST_PSEUDO_REGISTER * sizeof (int)); + } + + +diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h +index e3f808c42..ef7f9e5d5 100644 +--- a/gcc/config/xtensa/xtensa.h ++++ b/gcc/config/xtensa/xtensa.h +@@ -249,44 +249,21 @@ along with GCC; see the file COPYING3. If not see + 1, \ + } + +-/* For non-leaf procedures on Xtensa processors, the allocation order +- is as specified below by REG_ALLOC_ORDER. For leaf procedures, we +- want to use the lowest numbered registers first to minimize +- register window overflows. However, local-alloc is not smart +- enough to consider conflicts with incoming arguments. If an +- incoming argument in a2 is live throughout the function and +- local-alloc decides to use a2, then the incoming argument must +- either be spilled or copied to another register. To get around +- this, we define ADJUST_REG_ALLOC_ORDER to redefine +- reg_alloc_order for leaf functions such that lowest numbered +- registers are used first with the exception that the incoming +- argument registers are not used until after other register choices +- have been exhausted. */ +- +-#define REG_ALLOC_ORDER \ +-{ 8, 9, 10, 11, 12, 13, 14, 15, 7, 6, 5, 4, 3, 2, \ +- 18, \ +- 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, \ +- 0, 1, 16, 17, \ +- 35, \ +-} +- +-#define ADJUST_REG_ALLOC_ORDER order_regs_for_local_alloc () +- +-/* For Xtensa, the only point of this is to prevent GCC from otherwise +- giving preference to call-used registers. To minimize window +- overflows for the AR registers, we want to give preference to the +- lower-numbered AR registers. For other register files, which are +- not windowed, we still prefer call-used registers, if there are any. */ +-extern const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER]; +-#define LEAF_REGISTERS xtensa_leaf_regs ++/* For the windowed register ABI on Xtensa processors, the allocation ++ order is as specified below by REG_ALLOC_ORDER. ++ For the call0 ABI, on the other hand, ADJUST_REG_ALLOC_ORDER hook ++ will be called once at the start of IRA, replacing it with the ++ appropriate one. */ + +-/* For Xtensa, no remapping is necessary, but this macro must be +- defined if LEAF_REGISTERS is defined. */ +-#define LEAF_REG_REMAP(REGNO) (REGNO) +- +-/* This must be declared if LEAF_REGISTERS is set. */ +-extern int leaf_function; ++#define REG_ALLOC_ORDER \ ++{ \ ++ 8, 9, 10, 11, 12, 13, 14, 15, 7, 6, 5, 4, 3, 2, \ ++ 18, \ ++ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, \ ++ 0, 1, 16, 17, \ ++ 35, \ ++} ++#define ADJUST_REG_ALLOC_ORDER xtensa_adjust_reg_alloc_order () + + /* Internal macros to classify a register number. */ + +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0071-Optimize-inversion-of-the-MSB.patch b/patches/gcc10.3/gcc-xtensa-0071-Optimize-inversion-of-the-MSB.patch new file mode 100644 index 0000000..323b830 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0071-Optimize-inversion-of-the-MSB.patch @@ -0,0 +1,59 @@ +From 97538d16c11c17764aab63695ce3b5275fd50d56 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Wed, 18 Jan 2023 09:53:38 +0900 +Subject: [PATCH] xtensa: Optimize inversion of the MSB + +Such operation can be done either bitwise-XOR or addition with -2147483648, +but the latter is one byte less if TARGET_DENSITY. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md (xorsi3_internal): + Rename from the original of "xorsi3". + (xorsi3): New expansion pattern that emits addition rather than + bitwise-XOR when the second source is a constant of -2147483648 + if TARGET_DENSITY. +--- + gcc/config/xtensa/xtensa.md | 26 +++++++++++++++++++++++++- + 1 file changed, 25 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 4f1e8fd13..c6a299cc1 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -736,7 +736,31 @@ + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +-(define_insn "xorsi3" ++(define_expand "xorsi3" ++ [(set (match_operand:SI 0 "register_operand") ++ (xor:SI (match_operand:SI 1 "register_operand") ++ (match_operand:SI 2 "nonmemory_operand")))] ++ "" ++{ ++ if (register_operand (operands[2], SImode)) ++ emit_insn (gen_xorsi3_internal (operands[0], operands[1], ++ operands[2])); ++ else ++ { ++ rtx (*gen_op)(rtx, rtx, rtx); ++ if (TARGET_DENSITY ++ && CONST_INT_P (operands[2]) ++ && INTVAL (operands[2]) == -2147483648L) ++ gen_op = gen_addsi3; ++ else ++ gen_op = gen_xorsi3_internal; ++ emit_insn (gen_op (operands[0], operands[1], ++ force_reg (SImode, operands[2]))); ++ } ++ DONE; ++}) ++ ++(define_insn "xorsi3_internal" + [(set (match_operand:SI 0 "register_operand" "=a") + (xor:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" "r")))] +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0072-Revise-complex-hard-register-clobber-eliminat.patch b/patches/gcc10.3/gcc-xtensa-0072-Revise-complex-hard-register-clobber-eliminat.patch new file mode 100644 index 0000000..157876b --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0072-Revise-complex-hard-register-clobber-eliminat.patch @@ -0,0 +1,112 @@ +From 91b14e1f0de9a690b6c3b411d1c2706e05063977 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 20 Jan 2023 08:30:01 +0900 +Subject: [PATCH] xtensa: Revise complex hard register clobber elimination + +In the previously posted patch +"xtensa: Make complex hard register clobber elimination more robust and accurate", +the check code for insns that refer to the [DS]Cmode hard register before +it is overwritten after it is clobbered is incomplete. Fortunately such +insns are seldom emitted, so it didn't matter. + +This patch fixes that for the sake of completeness. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: + Fix exit from loops detecting references before overwriting in the + split pattern. +--- + gcc/config/xtensa/xtensa.md | 72 +++++++++++++++++++------------------ + 1 file changed, 37 insertions(+), 35 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index c6a299cc1..4d976ece5 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2978,45 +2978,47 @@ + { + auto_sbitmap bmp (FIRST_PSEUDO_REGISTER); + rtx_insn *insn; +- rtx reg = gen_rtx_REG (SImode, 0); ++ rtx reg = gen_rtx_REG (SImode, 0), dest; ++ unsigned int regno; ++ sbitmap_iterator iter; + bitmap_set_range (bmp, REGNO (operands[0]), REG_NREGS (operands[0])); + for (insn = next_nonnote_nondebug_insn_bb (curr_insn); + insn; insn = next_nonnote_nondebug_insn_bb (insn)) +- { +- sbitmap_iterator iter; +- unsigned int regno; +- if (NONJUMP_INSN_P (insn)) +- { +- EXECUTE_IF_SET_IN_BITMAP (bmp, 2, regno, iter) +- { +- set_regno_raw (reg, regno, REG_NREGS (reg)); +- if (reg_overlap_mentioned_p (reg, PATTERN (insn))) +- break; +- } +- if (GET_CODE (PATTERN (insn)) == SET) +- { +- rtx x = SET_DEST (PATTERN (insn)); +- if (REG_P (x) && HARD_REGISTER_P (x)) +- bitmap_clear_range (bmp, REGNO (x), REG_NREGS (x)); +- else if (SUBREG_P (x) && HARD_REGISTER_P (SUBREG_REG (x))) +- { +- struct subreg_info info; +- subreg_get_info (regno = REGNO (SUBREG_REG (x)), +- GET_MODE (SUBREG_REG (x)), +- SUBREG_BYTE (x), GET_MODE (x), &info); +- if (!info.representable_p) +- break; +- bitmap_clear_range (bmp, regno + info.offset, info.nregs); +- } +- } +- if (bitmap_empty_p (bmp)) +- goto FALLTHRU; +- } +- else if (CALL_P (insn)) ++ if (NONJUMP_INSN_P (insn)) ++ { + EXECUTE_IF_SET_IN_BITMAP (bmp, 2, regno, iter) +- if (call_used_or_fixed_reg_p (regno)) +- break; +- } ++ { ++ set_regno_raw (reg, regno, REG_NREGS (reg)); ++ if (reg_referenced_p (reg, PATTERN (insn))) ++ goto ABORT; ++ } ++ if (GET_CODE (PATTERN (insn)) == SET ++ || GET_CODE (PATTERN (insn)) == CLOBBER) ++ { ++ dest = SET_DEST (PATTERN (insn)); ++ if (REG_P (dest) && HARD_REGISTER_P (dest)) ++ bitmap_clear_range (bmp, REGNO (dest), REG_NREGS (dest)); ++ else if (SUBREG_P (dest) ++ && HARD_REGISTER_P (SUBREG_REG (dest))) ++ { ++ struct subreg_info info; ++ subreg_get_info (regno = REGNO (SUBREG_REG (dest)), ++ GET_MODE (SUBREG_REG (dest)), ++ SUBREG_BYTE (dest), GET_MODE (dest), ++ &info); ++ if (!info.representable_p) ++ break; ++ bitmap_clear_range (bmp, regno + info.offset, info.nregs); ++ } ++ } ++ if (bitmap_empty_p (bmp)) ++ goto FALLTHRU; ++ } ++ else if (CALL_P (insn)) ++ EXECUTE_IF_SET_IN_BITMAP (bmp, 2, regno, iter) ++ if (call_used_or_fixed_reg_p (regno)) ++ goto ABORT; ++ABORT: + FAIL; + FALLTHRU:; + }) +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0073-Enforce-return-address-saving-when-Og-is-spec.patch b/patches/gcc10.3/gcc-xtensa-0073-Enforce-return-address-saving-when-Og-is-spec.patch new file mode 100644 index 0000000..1a7f055 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0073-Enforce-return-address-saving-when-Og-is-spec.patch @@ -0,0 +1,39 @@ +From 7ef080074a2c422e20a8e4dae50f6f002c6c2928 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Mon, 30 Jan 2023 18:37:55 +0900 +Subject: [PATCH] xtensa: Enforce return address saving when -Og is + specified + +Leaf function often omits saving its return address to the stack slot, +and this feature often makes debugging very confusing, especially for +stack dump analysis. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (xtensa_call_save_reg): Change to return + true if register A0 (return address register) when -Og is specified. +--- + gcc/config/xtensa/xtensa.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index db7ac3599..5c6ee7a8f 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -3258,8 +3258,11 @@ xtensa_call_save_reg (int regno) + return false; + + if (regno == A0_REG) +- return crtl->profile || !crtl->is_leaf || crtl->calls_eh_return || +- df_regs_ever_live_p (regno); ++ /* Ensure the return address to be saved to the stack slot in order ++ to assist stack dump analysis when -Og is specified. */ ++ return optimize_debug ++ || crtl->profile || !crtl->is_leaf || crtl->calls_eh_return ++ || df_regs_ever_live_p (regno); + + if (crtl->calls_eh_return && IN_RANGE (regno, 2, 3)) + return true; +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0074-fix-PR-target-108876.patch b/patches/gcc10.3/gcc-xtensa-0074-fix-PR-target-108876.patch new file mode 100644 index 0000000..9609f04 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0074-fix-PR-target-108876.patch @@ -0,0 +1,116 @@ +From 16cfee0871e5a6411b17adc2dc422b9760d17893 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Wed, 22 Feb 2023 22:08:21 -0800 +Subject: [PATCH 1/2] xtensa: fix PR target/108876 + +In commit b2ef02e8cbbaf95fee98be255f697f47193960ec, the sibling call +insn included (use (reg:SI A0_REG)) to fix the problem, which added +a USE chain unconditionally to the data flow of register A0 during +the sibling call. + +As a result, df_regs_ever_live_p (A0_REG) returns true, so even if +register A0 is not used outside of the sibling call insn, saves and +restores to stack slots are emitted in pro/epilogue, and finally +code size increases. +(This is why I never included (use A0) in sibling calls) + + /* example */ + extern int foo(int); + int test(int a) { + return foo(a * 3 + 1); + } + +;; before + test: + addi sp, sp, -16 ;; unneeded stack frame allocation (induced) + s32i.n a0, sp, 12 ;; unneeded saving of register A0 + l32i.n a0, sp, 12 ;; unneeded restoration of register A0 + addx2 a2, a2, a2 + addi.n a2, a2, 1 + addi sp, sp, 16 ;; unneeded stack frame freeing (induced) + j.l foo, a9 ;; sibling call (truly needs register A0) + +The essential cause is that we emit (use A0) *before* the insns that +does the stack pointer adjustment during epilogue expansion, so the +liveness of register A0 ends early, so register A0 is reused afterwards. + +This patch fixes the problem and avoids such regression by doing the +emit of (use A0) in the sibling call epilogue expansion at the end. + +;; after +test: + addx2 a2, a2, a2 + addi.n a2, a2, 1 + j.l foo, a9 + +>From RTL-pass "315r.rnreg" by +"gfortran -O3 -funroll-loops -mabi=call0 -S -da gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90": + + ;; Function selector_init (__selectors_MOD_selector_init, funcdef_no=2, decl_uid=987, cgraph_uid=3, symbol_order=4) + ... + (insn 3807 3806 3808 121 (set (reg:SI 15 a15) + (mem/c:SI (plus:SI (reg/f:SI 1 sp) + (const_int 268 [0x10c])) [31 S4 A32])) "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 53 {movsi_internal} + (nil)) + (insn 3808 3807 3809 121 (set (reg:SI 7 a7) + (const_int 288 [0x120])) "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 53 {movsi_internal} + (nil)) + (insn 3809 3808 3810 121 (set (reg/f:SI 1 sp) + (plus:SI (reg/f:SI 1 sp) + (reg:SI 7 a7))) "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 1 {addsi3} + (expr_list:REG_DEAD (reg:SI 9 a9) + (nil))) + (insn 3810 3809 721 121 (use (reg:SI 0 a0)) "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 -1 + (expr_list:REG_DEAD (reg:SI 0 a0) + (nil))) + (call_insn/j 721 3810 722 121 (call (mem:SI (symbol_ref:SI ("free") [flags 0x41] ) [0 __builtin_free S4 A32]) + (const_int 0 [0])) "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 discrim 1 106 {sibcall_internal} + (expr_list:REG_DEAD (reg:SI 2 a2) + (expr_list:REG_CALL_DECL (symbol_ref:SI ("free") [flags 0x41] ) + (expr_list:REG_EH_REGION (const_int 0 [0]) + (nil)))) + (expr_list:SI (use (reg:SI 2 a2)) + (nil))) + +(IMHO the "rnreg" pass doesn't take REG_ALLOC_ORDER into account; +it just seems to allocate registers in fixed_regs index order, +which may have hurt register A0 that became allocatable in the recent +patch) + +gcc/ChangeLog: + PR target/108876 + + * config/xtensa/xtensa.c (xtensa_expand_epilogue): + Emit (use (reg:SI A0_REG)) at the end in the sibling call + (i.e. the same place as (return) in the normal call). +--- + gcc/config/xtensa/xtensa.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 5c6ee7a8f..3426494f5 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -3583,8 +3583,6 @@ xtensa_expand_epilogue (bool sibcall_p) + gen_frame_mem (SImode, x)); + } + } +- if (sibcall_p) +- emit_use (gen_rtx_REG (SImode, A0_REG)); + + if (cfun->machine->current_frame_size > 0) + { +@@ -3610,7 +3608,9 @@ xtensa_expand_epilogue (bool sibcall_p) + EH_RETURN_STACKADJ_RTX)); + } + cfun->machine->epilogue_done = true; +- if (!sibcall_p) ++ if (sibcall_p) ++ emit_use (gen_rtx_REG (SImode, A0_REG)); ++ else + emit_jump_insn (gen_return ()); + } + +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0075-Fix-missing-mode-warnings-in-machine-descript.patch b/patches/gcc10.3/gcc-xtensa-0075-Fix-missing-mode-warnings-in-machine-descript.patch new file mode 100644 index 0000000..0099b1c --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0075-Fix-missing-mode-warnings-in-machine-descript.patch @@ -0,0 +1,54 @@ +From bed35098a6d3d0032716f23e5c631e7aa183f227 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Thu, 23 Feb 2023 12:42:32 +0900 +Subject: [PATCH 2/2] xtensa: Fix missing mode warnings in machine description + +gcc/ChangeLog: + + * config/xtensa/xtensa.md + (zero_cost_loop_start, zero_cost_loop_end, loop_end): + Add missing "SI:" to PLUS RTXes. +--- + gcc/config/xtensa/xtensa.md | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 4d976ece5..9c017dd19 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -2033,8 +2033,8 @@ + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_operand:SI 0 "register_operand" "=a") +- (plus (match_dup 0) +- (const_int -1))) ++ (plus:SI (match_dup 0) ++ (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_START)] + "TARGET_LOOPS && optimize" + "loop\t%0, %l1_LEND" +@@ -2049,8 +2049,8 @@ + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_operand:SI 0 "nonimmediate_operand" "=a,m") +- (plus (match_dup 0) +- (const_int -1))) ++ (plus:SI (match_dup 0) ++ (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END) + (clobber (match_scratch:SI 3 "=X,&r"))] + "TARGET_LOOPS && optimize" +@@ -2066,8 +2066,8 @@ + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_operand:SI 0 "register_operand" "=a") +- (plus (match_dup 0) +- (const_int -1))) ++ (plus:SI (match_dup 0) ++ (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END)] + "TARGET_LOOPS && optimize" + { +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0076-Eliminate-the-use-of-callee-saved-register-th.patch b/patches/gcc10.3/gcc-xtensa-0076-Eliminate-the-use-of-callee-saved-register-th.patch new file mode 100644 index 0000000..99109d5 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0076-Eliminate-the-use-of-callee-saved-register-th.patch @@ -0,0 +1,303 @@ +From 19e3ee5197e1de1ec6228cb54ff4ad8f27af5138 Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Fri, 27 Jan 2023 12:17:33 +0900 +Subject: [PATCH] xtensa: Eliminate the use of callee-saved register that + saves and restores only once + +In the case of the CALL0 ABI, values that must be retained before and +after function calls are placed in the callee-saved registers (A12 +through A15) and referenced later. However, it is often the case that +the save and the reference are each only once and a simple register- +register move (with two exceptions; i. the register saved to/restored +from is the stack pointer, ii. the function needs an additional stack +pointer adjustment to grow the stack). + +e.g. in the following example, if there are no other occurrences of +register A14: + +;; before + ; prologue { + ... + s32i.n a14, sp, 16 + ... ;; no frame pointer needed + ;; no additional stack growth + ; } prologue + ... + mov.n a14, a6 ;; A6 is not SP + ... + call0 foo + ... + mov.n a8, a14 ;; A8 is not SP + ... + ; epilogue { + ... + l32i.n a14, sp, 16 + ... + ; } epilogue + +It can be possible like this: + +;; after + ; prologue { + ... + (no save needed) + ... + ; } prologue + ... + s32i.n a6, sp, 16 ;; replaced with A14's slot + ... + call0 foo + ... + l32i.n a8, sp, 16 ;; through SP + ... + ; epilogue { + ... + (no restoration needed) + ... + ; } epilogue + +This patch adds the abovementioned logic to the function prologue/epilogue +RTL expander code. + +gcc/ChangeLog: + + * config/xtensa/xtensa.c (machine_function): Add new member + 'eliminated_callee_saved_bmp'. + (xtensa_can_eliminate_callee_saved_reg_p): New function to + determine whether the register can be eliminated or not. + (xtensa_expand_prologue): Add invoking the above function and + elimination the use of callee-saved register by using its stack + slot through the stack pointer (or the frame pointer if needed) + directly. + (xtensa_expand_prologue): Modify to not emit register restoration + insn from its stack slot if the register is already eliminated. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/elim_callee_saved.c: New. +--- + gcc/config/xtensa/xtensa.c | 132 ++++++++++++++---- + .../gcc.target/xtensa/elim_callee_saved.c | 38 +++++ + 2 files changed, 145 insertions(+), 25 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c + +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index 3426494f5..6aea625d9 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -105,6 +105,7 @@ struct GTY(()) machine_function + bool epilogue_done; + bool inhibit_logues_a1_adjusts; + rtx last_logues_a9_content; ++ HOST_WIDE_INT eliminated_callee_saved_bmp; + }; + + static void xtensa_option_override (void); +@@ -3381,6 +3382,66 @@ xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int flags) + cfun->machine->last_logues_a9_content = GEN_INT (offset); + } + ++static bool ++xtensa_can_eliminate_callee_saved_reg_p (unsigned int regno, ++ rtx_insn **p_insnS, ++ rtx_insn **p_insnR) ++{ ++ df_ref ref; ++ rtx_insn *insn, *insnS = NULL, *insnR = NULL; ++ rtx pattern; ++ ++ if (!optimize || !df || call_used_or_fixed_reg_p (regno)) ++ return false; ++ ++ for (ref = DF_REG_DEF_CHAIN (regno); ++ ref; ref = DF_REF_NEXT_REG (ref)) ++ if (DF_REF_CLASS (ref) != DF_REF_REGULAR ++ || DEBUG_INSN_P (insn = DF_REF_INSN (ref))) ++ continue; ++ else if (GET_CODE (pattern = PATTERN (insn)) == SET ++ && REG_P (SET_DEST (pattern)) ++ && REGNO (SET_DEST (pattern)) == regno ++ && REG_NREGS (SET_DEST (pattern)) == 1 ++ && REG_P (SET_SRC (pattern)) ++ && REGNO (SET_SRC (pattern)) != A1_REG) ++ { ++ if (insnS) ++ return false; ++ insnS = insn; ++ continue; ++ } ++ else ++ return false; ++ ++ for (ref = DF_REG_USE_CHAIN (regno); ++ ref; ref = DF_REF_NEXT_REG (ref)) ++ if (DF_REF_CLASS (ref) != DF_REF_REGULAR ++ || DEBUG_INSN_P (insn = DF_REF_INSN (ref))) ++ continue; ++ else if (GET_CODE (pattern = PATTERN (insn)) == SET ++ && REG_P (SET_SRC (pattern)) ++ && REGNO (SET_SRC (pattern)) == regno ++ && REG_NREGS (SET_SRC (pattern)) == 1 ++ && REG_P (SET_DEST (pattern)) ++ && REGNO (SET_DEST (pattern)) != A1_REG) ++ { ++ if (insnR) ++ return false; ++ insnR = insn; ++ continue; ++ } ++ else ++ return false; ++ ++ if (!insnS || !insnR) ++ return false; ++ ++ *p_insnS = insnS, *p_insnR = insnR; ++ ++ return true; ++} ++ + /* minimum frame = reg save area (4 words) plus static chain (1 word) + and the total number of words must be a multiple of 128 bits. */ + #define MIN_FRAME_SIZE (8 * UNITS_PER_WORD) +@@ -3420,6 +3481,7 @@ xtensa_expand_prologue (void) + df_ref ref; + bool stack_pointer_needed = frame_pointer_needed + || crtl->calls_eh_return; ++ bool large_stack_needed; + + /* Check if the function body really needs the stack pointer. */ + if (!stack_pointer_needed && df) +@@ -3468,23 +3530,41 @@ xtensa_expand_prologue (void) + } + } + ++ large_stack_needed = total_size > 1024 ++ || (!callee_save_size && total_size > 128); + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) +- { +- if (xtensa_call_save_reg(regno)) +- { +- rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); +- rtx mem = gen_frame_mem (SImode, x); +- rtx reg = gen_rtx_REG (SImode, regno); ++ if (xtensa_call_save_reg(regno)) ++ { ++ rtx x = gen_rtx_PLUS (Pmode, ++ stack_pointer_rtx, GEN_INT (offset)); ++ rtx mem = gen_frame_mem (SImode, x); ++ rtx_insn *insnS, *insnR; ++ ++ if (!large_stack_needed ++ && xtensa_can_eliminate_callee_saved_reg_p (regno, ++ &insnS, &insnR)) ++ { ++ if (frame_pointer_needed) ++ mem = replace_rtx (mem, stack_pointer_rtx, ++ hard_frame_pointer_rtx); ++ SET_DEST (PATTERN (insnS)) = mem; ++ df_insn_rescan (insnS); ++ SET_SRC (PATTERN (insnR)) = copy_rtx (mem); ++ df_insn_rescan (insnR); ++ cfun->machine->eliminated_callee_saved_bmp |= 1 << regno; ++ } ++ else ++ { ++ rtx reg = gen_rtx_REG (SImode, regno); + +- offset -= UNITS_PER_WORD; +- insn = emit_move_insn (mem, reg); +- RTX_FRAME_RELATED_P (insn) = 1; +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, +- gen_rtx_SET (mem, reg)); +- } +- } +- if (total_size > 1024 +- || (!callee_save_size && total_size > 128)) ++ insn = emit_move_insn (mem, reg); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, ++ gen_rtx_SET (mem, reg)); ++ } ++ offset -= UNITS_PER_WORD; ++ } ++ if (large_stack_needed) + xtensa_emit_adjust_stack_ptr (callee_save_size - total_size, + ADJUST_SP_NEED_NOTE); + } +@@ -3573,16 +3653,18 @@ xtensa_expand_epilogue (bool sibcall_p) + emit_insn (gen_blockage ()); + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) +- { +- if (xtensa_call_save_reg(regno)) +- { +- rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); +- +- offset -= UNITS_PER_WORD; +- emit_move_insn (gen_rtx_REG (SImode, regno), +- gen_frame_mem (SImode, x)); +- } +- } ++ if (xtensa_call_save_reg(regno)) ++ { ++ if (! (cfun->machine->eliminated_callee_saved_bmp ++ & (1 << regno))) ++ { ++ rtx x = gen_rtx_PLUS (Pmode, ++ stack_pointer_rtx, GEN_INT (offset)); ++ emit_move_insn (gen_rtx_REG (SImode, regno), ++ gen_frame_mem (SImode, x)); ++ } ++ offset -= UNITS_PER_WORD; ++ } + + if (cfun->machine->current_frame_size > 0) + { +diff --git a/gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c b/gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c +new file mode 100644 +index 000000000..cd3d6b9f2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c +@@ -0,0 +1,38 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mabi=call0" } */ ++ ++extern void foo(void); ++ ++/* eliminated one register (the reservoir of variable 'a') by its stack slot through the stack pointer. */ ++int test0(int a) { ++ int array[252]; /* the maximum bound of non-large stack. */ ++ foo(); ++ asm volatile("" : : "m"(array)); ++ return a; ++} ++ ++/* cannot eliminate if large stack is needed, because the offset from TOS cannot fit into single L32I/S32I instruction. */ ++int test1(int a) { ++ int array[10000]; /* requires large stack. */ ++ foo(); ++ asm volatile("" : : "m"(array)); ++ return a; ++} ++ ++/* register A15 is the reservoir of the stack pointer and cannot be eliminated if the frame pointer is needed. ++ other registers still can be, but through the frame pointer rather the stack pointer. */ ++int test2(int a) { ++ int* p = __builtin_alloca(16); ++ foo(); ++ asm volatile("" : : "r"(p)); ++ return a; ++} ++ ++/* in -O0 the composite hard registers may still remain unsplitted at pro_and_epilogue and must be excluded. */ ++extern double bar(void); ++int __attribute__((optimize(0))) test3(int a) { ++ return bar() + a; ++} ++ ++/* { dg-final { scan-assembler-times "mov\t|mov.n\t" 21 } } */ ++/* { dg-final { scan-assembler-times "a15, 8" 2 } } */ +-- +2.30.2 + diff --git a/patches/gcc10.3/gcc-xtensa-0077-Eliminate-unnecessary-general-purpose-reg-reg.patch b/patches/gcc10.3/gcc-xtensa-0077-Eliminate-unnecessary-general-purpose-reg-reg.patch new file mode 100644 index 0000000..f42c958 --- /dev/null +++ b/patches/gcc10.3/gcc-xtensa-0077-Eliminate-unnecessary-general-purpose-reg-reg.patch @@ -0,0 +1,159 @@ +From 33aef933318545ff759442b391d0a53aae43251e Mon Sep 17 00:00:00 2001 +From: Takayuki 'January June' Suwa +Date: Sat, 18 Feb 2023 13:43:34 +0900 +Subject: [PATCH] xtensa: Eliminate unnecessary general-purpose reg-reg + moves + +Register-register move instructions that can be easily seen as +unnecessary by the human eye may remain in the compiled result. +For example: + +/* example */ +double test(double a, double b) { + return __builtin_copysign(a, b); +} + +test: + add.n a3, a3, a3 + extui a5, a5, 31, 1 + ssai 1 + ;; Be in the same BB + src a7, a5, a3 ;; Replacing the destination doesn't + ;; violate any constraints of the + ;; operands + ;; No CALL insns in this span + ;; Both A3 and A7 are irrelevant to + ;; insns in this span + mov.n a3, a7 ;; An unnecessary reg-reg move + ;; A7 is not used after this + ret.n + +The last two instructions above, excluding the return instruction, +could be done like this: + + src a3, a5, a3 + +This symptom often occurs when handling DI/DFmode values with SImode +instructions. This patch solves the above problem using peephole2 +pattern. + +gcc/ChangeLog: + + * config/xtensa/xtensa.md: New peephole2 pattern that eliminates + the occurrence of general-purpose register used only once and for + transferring intermediate value. + +gcc/testsuite/ChangeLog: + + * gcc.target/xtensa/elim_GP_regmove_0.c: New test. + * gcc.target/xtensa/elim_GP_regmove_1.c: New test. +--- + gcc/config/xtensa/xtensa.md | 46 +++++++++++++++++++ + .../gcc.target/xtensa/elim_GP_regmove_0.c | 23 ++++++++++ + .../gcc.target/xtensa/elim_GP_regmove_1.c | 10 ++++ + 3 files changed, 79 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_0.c + create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_1.c + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 9c017dd19..628b27b32 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -3055,3 +3055,49 @@ FALLTHRU:; + operands[1] = GEN_INT (imm0); + operands[2] = GEN_INT (imm1); + }) ++ ++(define_peephole2 ++ [(set (match_operand 0 "register_operand") ++ (match_operand 1 "register_operand"))] ++ "REG_NREGS (operands[0]) == 1 && GP_REG_P (REGNO (operands[0])) ++ && REG_NREGS (operands[1]) == 1 && GP_REG_P (REGNO (operands[1])) ++ && peep2_reg_dead_p (1, operands[1])" ++ [(const_int 0)] ++{ ++ basic_block bb = BLOCK_FOR_INSN (curr_insn); ++ rtx_insn *head = BB_HEAD (bb), *insn; ++ rtx dest = operands[0], src = operands[1], pattern, t_dest, dest_orig; ++ for (insn = PREV_INSN (curr_insn); ++ insn && insn != head; ++ insn = PREV_INSN (insn)) ++ if (CALL_P (insn)) ++ break; ++ else if (INSN_P (insn)) ++ { ++ if (GET_CODE (pattern = PATTERN (insn)) == SET ++ && REG_P (t_dest = SET_DEST (pattern)) ++ && REG_NREGS (t_dest) == 1 ++ && REGNO (t_dest) == REGNO (src)) ++ { ++ dest_orig = SET_DEST (pattern); ++ SET_DEST (pattern) = gen_rtx_REG (GET_MODE (t_dest), ++ REGNO (dest)); ++ extract_insn (insn); ++ if (!constrain_operands (true, get_enabled_alternatives (insn))) ++ { ++ SET_DEST (pattern) = dest_orig; ++ goto ABORT; ++ } ++ df_insn_rescan (insn); ++ goto FALLTHRU; ++ } ++ if (reg_overlap_mentioned_p (dest, pattern) ++ || reg_overlap_mentioned_p (src, pattern) ++ || set_of (dest, insn) ++ || set_of (src, insn)) ++ break; ++ } ++ABORT: ++ FAIL; ++FALLTHRU:; ++}) +diff --git a/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_0.c b/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_0.c +new file mode 100644 +index 000000000..5c195c357 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_0.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fpeephole2" } */ ++ ++/* can be processed */ ++double test0(double a, double b) { ++ return __builtin_copysign(a, b); ++} ++ ++/* cannot be processed: due to violate '0' constraint of the 2nd source operand. */ ++int test1(int a, int b) { ++ int c; ++ asm volatile ("" : "=a"(c) : "r"(a), "0"(b)); ++ return c; ++} ++ ++/* cannot be processed: due to violate '&' constraint of the destination operand. */ ++int test2(int a) { ++ int b; ++ asm volatile ("" : "=&a"(b) : "r"(a)); ++ return b; ++} ++ ++/* { dg-final { scan-assembler-times "mov\t|mov.n\t" 2 } } */ +diff --git a/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_1.c b/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_1.c +new file mode 100644 +index 000000000..a13ef8188 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_1.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fpeephole2 -mabi=windowed" } */ ++ ++/* cannot be processed: due to violate 'a' constraint of the destination operand of the stack adjustment instruction. */ ++void test(void) { ++ int buffer[8192]; ++ asm volatile ("" : : "m"(buffer)); ++} ++ ++/* { dg-final { scan-assembler-times "movsp" 1 } } */ +-- +2.30.2 + diff --git a/patches/gcc11.1/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch b/patches/gcc11.1/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch deleted file mode 100644 index 336b961..0000000 --- a/patches/gcc11.1/gcc-xtensa-0006-make-trying-to-replace-l32r-with-movi-sll.patch +++ /dev/null @@ -1,29 +0,0 @@ -From f1568d0597ffd3027eebefc2cf31646ab5d5ca19 Mon Sep 17 00:00:00 2001 -From: Takayuki 'January June' Suwa -Date: Sun, 19 Dec 2021 22:44:03 +0900 -Subject: [PATCH] gcc: xtensa: make trying to replace 'l32r' with 'movi' + - 'slli' regardless of optimizing for size or not, because 'l32r' is much - slower than the latter on ESP8266 - ---- - gcc/config/xtensa/xtensa.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c -index 37c6ac1fd..6cd9d5528 100644 ---- a/gcc/config/xtensa/xtensa.c -+++ b/gcc/config/xtensa/xtensa.c -@@ -1074,8 +1074,8 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) - { - /* Try to emit MOVI + SLLI sequence, that is smaller - than L32R + literal. */ -- if (optimize_size && mode == SImode && CONST_INT_P (src) -- && register_operand (dst, mode)) -+ if (optimize >= 1 && ! optimize_debug && mode == SImode -+ && CONST_INT_P (src) && register_operand (dst, mode)) - { - HOST_WIDE_INT srcval = INTVAL (src); - int shift = ctz_hwi (srcval); --- -2.20.1 -