Message ID | f9eac5a4-0567-4da8-8eb7-803a5d6e8324@foss.arm.com |
---|---|
State | New |
Headers | show |
Series | [AArch64] Refactor of aarch64-ldpstp.md | expand |
Hi, Since I rebased the patch that this is based on, I have also rebased this patch. Jackson. On 09/12/2017 07:15 PM, Jackson Woodruff wrote: > Hi all, > > This patch removes a lot of duplicated code in aarch64-ldpstp.md. > > The patterns that did not previously generate a base register now > do not check for aarch64_mem_pair_operand in the pattern. This has > been extracted to a check in aarch64_operands_ok_for_ldpstp. > > All patterns in the file used to have explicit switching code to > swap loads and stores that were in the wrong order. > > This has been extracted into aarch64_ldp_str_operands > and aarch64_gen_adjusted_ldp_stp. > > This patch is based on my patch here: > https://gcc.gnu.org/ml/gcc-patches/2017-09/msg00346.html so should go in > after it. > > > Bootstrap and regtest OK on AArch64. > > OK for trunk? > > Jackson. > > gcc/ > > 2017-09-07 Jackson Woodruff <jackson.woodruff@arm.com> > > * config/aarch64/aarch64-ldpstp.md: Replace uses of > aarch64_mem_pair_operand with memory_operand and delete > operand swapping code. > * config/aarch64/aarch64.c (aarch64_operands_ok_for_ldpstp): > Add check for legitimate_address. > (aarch64_gen_adjusted_ldpstp): Add swap. > (aarch64_swap_ldrstr_operands): New. > * config/aarch64/aarch64-protos.h: Add > aarch64_swap_ldrstr_operands. diff --git a/gcc/config/aarch64/aarch64-ldpstp.md b/gcc/config/aarch64/aarch64-ldpstp.md index 14e860d258e548d4118d957675f8bdbb74615337..126bb702f6399d13ab2dc6c8b99bcbbf3b3a7516 100644 --- a/gcc/config/aarch64/aarch64-ldpstp.md +++ b/gcc/config/aarch64/aarch64-ldpstp.md @@ -20,26 +20,18 @@ (define_peephole2 [(set (match_operand:GPI 0 "register_operand" "") - (match_operand:GPI 1 "aarch64_mem_pair_operand" "")) + (match_operand:GPI 1 "memory_operand" "")) (set (match_operand:GPI 2 "register_operand" "") (match_operand:GPI 3 "memory_operand" ""))] "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)" [(parallel [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))])] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[1], &base, &offset_1); - extract_base_offset_in_addr (operands[3], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[2]); - std::swap (operands[1], operands[3]); - } + aarch64_swap_ldrstr_operands (operands, 1); }) (define_peephole2 - [(set (match_operand:GPI 0 "aarch64_mem_pair_operand" "") + [(set (match_operand:GPI 0 "memory_operand" "") (match_operand:GPI 1 "aarch64_reg_or_zero" "")) (set (match_operand:GPI 2 "memory_operand" "") (match_operand:GPI 3 "aarch64_reg_or_zero" ""))] @@ -47,39 +39,23 @@ [(parallel [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))])] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[0], &base, &offset_1); - extract_base_offset_in_addr (operands[2], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[2]); - std::swap (operands[1], operands[3]); - } + aarch64_swap_ldrstr_operands (operands, 0); }) (define_peephole2 [(set (match_operand:GPF 0 "register_operand" "") - (match_operand:GPF 1 "aarch64_mem_pair_operand" "")) + (match_operand:GPF 1 "memory_operand" "")) (set (match_operand:GPF 2 "register_operand" "") (match_operand:GPF 3 "memory_operand" ""))] "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)" [(parallel [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))])] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[1], &base, &offset_1); - extract_base_offset_in_addr (operands[3], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[2]); - std::swap (operands[1], operands[3]); - } + aarch64_swap_ldrstr_operands (operands, 1); }) (define_peephole2 - [(set (match_operand:GPF 0 "aarch64_mem_pair_operand" "") + [(set (match_operand:GPF 0 "memory_operand" "") (match_operand:GPF 1 "aarch64_reg_or_fp_zero" "")) (set (match_operand:GPF 2 "memory_operand" "") (match_operand:GPF 3 "aarch64_reg_or_fp_zero" ""))] @@ -87,39 +63,23 @@ [(parallel [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))])] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[0], &base, &offset_1); - extract_base_offset_in_addr (operands[2], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[2]); - std::swap (operands[1], operands[3]); - } + aarch64_swap_ldrstr_operands (operands, 0); }) (define_peephole2 [(set (match_operand:DREG 0 "register_operand" "") - (match_operand:DREG 1 "aarch64_mem_pair_operand" "")) + (match_operand:DREG 1 "memory_operand" "")) (set (match_operand:DREG2 2 "register_operand" "") (match_operand:DREG2 3 "memory_operand" ""))] "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)" [(parallel [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))])] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[1], &base, &offset_1); - extract_base_offset_in_addr (operands[3], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[2]); - std::swap (operands[1], operands[3]); - } + aarch64_swap_ldrstr_operands (operands, 1); }) (define_peephole2 - [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "") + [(set (match_operand:DREG 0 "memory_operand" "") (match_operand:DREG 1 "register_operand" "")) (set (match_operand:DREG2 2 "memory_operand" "") (match_operand:DREG2 3 "register_operand" ""))] @@ -128,57 +88,33 @@ [(parallel [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))])] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[0], &base, &offset_1); - extract_base_offset_in_addr (operands[2], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[2]); - std::swap (operands[1], operands[3]); - } + aarch64_swap_ldrstr_operands (operands, 0); }) ;; Handle sign/zero extended consecutive load/store. (define_peephole2 [(set (match_operand:DI 0 "register_operand" "") - (sign_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" ""))) + (sign_extend:DI (match_operand:SI 1 "memory_operand" ""))) (set (match_operand:DI 2 "register_operand" "") (sign_extend:DI (match_operand:SI 3 "memory_operand" "")))] "aarch64_operands_ok_for_ldpstp (operands, true, SImode)" [(parallel [(set (match_dup 0) (sign_extend:DI (match_dup 1))) (set (match_dup 2) (sign_extend:DI (match_dup 3)))])] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[1], &base, &offset_1); - extract_base_offset_in_addr (operands[3], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[2]); - std::swap (operands[1], operands[3]); - } + aarch64_swap_ldrstr_operands (operands, 1); }) (define_peephole2 [(set (match_operand:DI 0 "register_operand" "") - (zero_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" ""))) + (zero_extend:DI (match_operand:SI 1 "memory_operand" ""))) (set (match_operand:DI 2 "register_operand" "") (zero_extend:DI (match_operand:SI 3 "memory_operand" "")))] "aarch64_operands_ok_for_ldpstp (operands, true, SImode)" [(parallel [(set (match_dup 0) (zero_extend:DI (match_dup 1))) (set (match_dup 2) (zero_extend:DI (match_dup 3)))])] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[1], &base, &offset_1); - extract_base_offset_in_addr (operands[3], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[2]); - std::swap (operands[1], operands[3]); - } + aarch64_swap_ldrstr_operands (operands, 1); }) ;; Handle storing of a floating point zero. @@ -186,7 +122,7 @@ ;; as aarch64_operands_ok_for_ldpstp checks that the modes are ;; compatible. (define_peephole2 - [(set (match_operand:DSX 0 "aarch64_mem_pair_operand" "") + [(set (match_operand:DSX 0 "memory_operand" "") (match_operand:DSX 1 "aarch64_reg_zero_or_fp_zero" "")) (set (match_operand:<FCVT_TARGET> 2 "memory_operand" "") (match_operand:<FCVT_TARGET> 3 "aarch64_reg_zero_or_fp_zero" ""))] @@ -224,18 +160,6 @@ "aarch64_operands_adjust_ok_for_ldpstp (operands, true, <MODE>mode)" [(const_int 0)] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[1], &base, &offset_1); - extract_base_offset_in_addr (operands[3], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[6]); - std::swap (operands[1], operands[7]); - std::swap (operands[2], operands[4]); - std::swap (operands[3], operands[5]); - } - if (aarch64_gen_adjusted_ldpstp (operands, true, <MODE>mode, UNKNOWN)) DONE; else @@ -256,18 +180,6 @@ "aarch64_operands_adjust_ok_for_ldpstp (operands, true, <MODE>mode)" [(const_int 0)] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[1], &base, &offset_1); - extract_base_offset_in_addr (operands[3], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[6]); - std::swap (operands[1], operands[7]); - std::swap (operands[2], operands[4]); - std::swap (operands[3], operands[5]); - } - if (aarch64_gen_adjusted_ldpstp (operands, true, <MODE>mode, UNKNOWN)) DONE; else @@ -288,18 +200,6 @@ "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode)" [(const_int 0)] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[1], &base, &offset_1); - extract_base_offset_in_addr (operands[3], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[6]); - std::swap (operands[1], operands[7]); - std::swap (operands[2], operands[4]); - std::swap (operands[3], operands[5]); - } - if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, SIGN_EXTEND)) DONE; else @@ -320,18 +220,6 @@ "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode)" [(const_int 0)] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[1], &base, &offset_1); - extract_base_offset_in_addr (operands[3], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[6]); - std::swap (operands[1], operands[7]); - std::swap (operands[2], operands[4]); - std::swap (operands[3], operands[5]); - } - if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, ZERO_EXTEND)) DONE; else @@ -352,18 +240,6 @@ "aarch64_operands_adjust_ok_for_ldpstp (operands, false, <MODE>mode)" [(const_int 0)] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[0], &base, &offset_1); - extract_base_offset_in_addr (operands[2], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[6]); - std::swap (operands[1], operands[7]); - std::swap (operands[2], operands[4]); - std::swap (operands[3], operands[5]); - } - if (aarch64_gen_adjusted_ldpstp (operands, false, <MODE>mode, UNKNOWN)) DONE; else @@ -384,18 +260,6 @@ "aarch64_operands_adjust_ok_for_ldpstp (operands, false, <MODE>mode)" [(const_int 0)] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[0], &base, &offset_1); - extract_base_offset_in_addr (operands[2], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[6]); - std::swap (operands[1], operands[7]); - std::swap (operands[2], operands[4]); - std::swap (operands[3], operands[5]); - } - if (aarch64_gen_adjusted_ldpstp (operands, false, <MODE>mode, UNKNOWN)) DONE; else diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index e67c2ed5e80a2e4f4858cfcd40573efd2a8a4416..2cee3ff5b695db2fecd73471c398408e4657b8ca 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -469,6 +469,7 @@ int aarch64_ccmp_mode_to_code (machine_mode mode); bool extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset); bool aarch64_operands_ok_for_ldpstp (rtx *, bool, machine_mode); bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, scalar_mode); +void aarch64_swap_ldrstr_operands (rtx *, bool); extern void aarch64_asm_output_pool_epilogue (FILE *, const char *, tree, HOST_WIDE_INT); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index ea2ff88f91a18b3fcd43bd0dcafd9ebdcc0b2366..4c5ed9610cb8bbb337bbfcb9260d7fd227c68ce8 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -14770,9 +14770,18 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, /* In increasing order, the last load can clobber the address. */ if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2)) - return false; + return false; } + /* One of the memory accesses must be a mempair operand. + If it is not the first one, they need to be swapped by the + peephole. */ + if (!(aarch64_legitimate_address_p (GET_MODE (mem_1), + XEXP (mem_1, 0), PARALLEL, 0) + || aarch64_legitimate_address_p (GET_MODE (mem_2), + XEXP (mem_2, 0), PARALLEL, 0))) + return false; + if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1))) rclass_1 = FP_REGS; else @@ -14790,6 +14799,40 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, return true; } +/* Given OPERANDS of consecutive load/store that can be merged, + swap them if they are not in ascending order. */ +void +aarch64_swap_ldrstr_operands (rtx* operands, bool load) +{ + rtx mem_1, mem_2, base_1, base_2, offset_1, offset_2; + HOST_WIDE_INT offval_1, offval_2; + + if (load) + { + mem_1 = operands[1]; + mem_2 = operands[3]; + } + else + { + mem_1 = operands[0]; + mem_2 = operands[2]; + } + + extract_base_offset_in_addr (mem_1, &base_1, &offset_1); + extract_base_offset_in_addr (mem_2, &base_2, &offset_2); + + offval_1 = INTVAL (offset_1); + offval_2 = INTVAL (offset_2); + + if (offval_1 > offval_2) + { + /* Irrespective of whether this is a load or a store, + we do the same swap. */ + std::swap (operands[0], operands[2]); + std::swap (operands[1], operands[3]); + } +} + /* Given OPERANDS of consecutive load/store, check if we can merge them into ldp/stp by adjusting the offset. LOAD is true if they are load instructions. MODE is the mode of memory operands. @@ -14949,7 +14992,7 @@ bool aarch64_gen_adjusted_ldpstp (rtx *operands, bool load, scalar_mode mode, RTX_CODE code) { - rtx base, offset, t1, t2; + rtx base, offset_1, offset_2, t1, t2; rtx mem_1, mem_2, mem_3, mem_4; HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize; @@ -14969,13 +15012,24 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load, gcc_assert (code == UNKNOWN); } - extract_base_offset_in_addr (mem_1, &base, &offset); - gcc_assert (base != NULL_RTX && offset != NULL_RTX); + extract_base_offset_in_addr (mem_1, &base, &offset_1); + extract_base_offset_in_addr (mem_2, &base, &offset_2); + gcc_assert (base != NULL_RTX && offset_1 != NULL_RTX + && offset_2 != NULL_RTX); + + if (INTVAL (offset_1) > INTVAL (offset_2)) + { + std::swap (operands[0], operands[6]); + std::swap (operands[1], operands[7]); + std::swap (operands[2], operands[4]); + std::swap (operands[3], operands[5]); + } + /* Adjust offset thus it can fit in ldp/stp instruction. */ msize = GET_MODE_SIZE (mode); stp_off_limit = msize * 0x40; - off_val = INTVAL (offset); + off_val = INTVAL (offset_1); abs_off = (off_val < 0) ? -off_val : off_val; new_off = abs_off % stp_off_limit; adj_off = abs_off - new_off;
diff --git a/gcc/config/aarch64/aarch64-ldpstp.md b/gcc/config/aarch64/aarch64-ldpstp.md index 14e860d258e548d4118d957675f8bdbb74615337..126bb702f6399d13ab2dc6c8b99bcbbf3b3a7516 100644 --- a/gcc/config/aarch64/aarch64-ldpstp.md +++ b/gcc/config/aarch64/aarch64-ldpstp.md @@ -20,26 +20,18 @@ (define_peephole2 [(set (match_operand:GPI 0 "register_operand" "") - (match_operand:GPI 1 "aarch64_mem_pair_operand" "")) + (match_operand:GPI 1 "memory_operand" "")) (set (match_operand:GPI 2 "register_operand" "") (match_operand:GPI 3 "memory_operand" ""))] "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)" [(parallel [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))])] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[1], &base, &offset_1); - extract_base_offset_in_addr (operands[3], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[2]); - std::swap (operands[1], operands[3]); - } + aarch64_swap_ldrstr_operands (operands, 1); }) (define_peephole2 - [(set (match_operand:GPI 0 "aarch64_mem_pair_operand" "") + [(set (match_operand:GPI 0 "memory_operand" "") (match_operand:GPI 1 "aarch64_reg_or_zero" "")) (set (match_operand:GPI 2 "memory_operand" "") (match_operand:GPI 3 "aarch64_reg_or_zero" ""))] @@ -47,39 +39,23 @@ [(parallel [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))])] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[0], &base, &offset_1); - extract_base_offset_in_addr (operands[2], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[2]); - std::swap (operands[1], operands[3]); - } + aarch64_swap_ldrstr_operands (operands, 0); }) (define_peephole2 [(set (match_operand:GPF 0 "register_operand" "") - (match_operand:GPF 1 "aarch64_mem_pair_operand" "")) + (match_operand:GPF 1 "memory_operand" "")) (set (match_operand:GPF 2 "register_operand" "") (match_operand:GPF 3 "memory_operand" ""))] "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)" [(parallel [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))])] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[1], &base, &offset_1); - extract_base_offset_in_addr (operands[3], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[2]); - std::swap (operands[1], operands[3]); - } + aarch64_swap_ldrstr_operands (operands, 1); }) (define_peephole2 - [(set (match_operand:GPF 0 "aarch64_mem_pair_operand" "") + [(set (match_operand:GPF 0 "memory_operand" "") (match_operand:GPF 1 "aarch64_reg_or_fp_zero" "")) (set (match_operand:GPF 2 "memory_operand" "") (match_operand:GPF 3 "aarch64_reg_or_fp_zero" ""))] @@ -87,39 +63,23 @@ [(parallel [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))])] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[0], &base, &offset_1); - extract_base_offset_in_addr (operands[2], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[2]); - std::swap (operands[1], operands[3]); - } + aarch64_swap_ldrstr_operands (operands, 0); }) (define_peephole2 [(set (match_operand:DREG 0 "register_operand" "") - (match_operand:DREG 1 "aarch64_mem_pair_operand" "")) + (match_operand:DREG 1 "memory_operand" "")) (set (match_operand:DREG2 2 "register_operand" "") (match_operand:DREG2 3 "memory_operand" ""))] "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)" [(parallel [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))])] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[1], &base, &offset_1); - extract_base_offset_in_addr (operands[3], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[2]); - std::swap (operands[1], operands[3]); - } + aarch64_swap_ldrstr_operands (operands, 1); }) (define_peephole2 - [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "") + [(set (match_operand:DREG 0 "memory_operand" "") (match_operand:DREG 1 "register_operand" "")) (set (match_operand:DREG2 2 "memory_operand" "") (match_operand:DREG2 3 "register_operand" ""))] @@ -128,57 +88,33 @@ [(parallel [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))])] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[0], &base, &offset_1); - extract_base_offset_in_addr (operands[2], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[2]); - std::swap (operands[1], operands[3]); - } + aarch64_swap_ldrstr_operands (operands, 0); }) ;; Handle sign/zero extended consecutive load/store. (define_peephole2 [(set (match_operand:DI 0 "register_operand" "") - (sign_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" ""))) + (sign_extend:DI (match_operand:SI 1 "memory_operand" ""))) (set (match_operand:DI 2 "register_operand" "") (sign_extend:DI (match_operand:SI 3 "memory_operand" "")))] "aarch64_operands_ok_for_ldpstp (operands, true, SImode)" [(parallel [(set (match_dup 0) (sign_extend:DI (match_dup 1))) (set (match_dup 2) (sign_extend:DI (match_dup 3)))])] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[1], &base, &offset_1); - extract_base_offset_in_addr (operands[3], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[2]); - std::swap (operands[1], operands[3]); - } + aarch64_swap_ldrstr_operands (operands, 1); }) (define_peephole2 [(set (match_operand:DI 0 "register_operand" "") - (zero_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" ""))) + (zero_extend:DI (match_operand:SI 1 "memory_operand" ""))) (set (match_operand:DI 2 "register_operand" "") (zero_extend:DI (match_operand:SI 3 "memory_operand" "")))] "aarch64_operands_ok_for_ldpstp (operands, true, SImode)" [(parallel [(set (match_dup 0) (zero_extend:DI (match_dup 1))) (set (match_dup 2) (zero_extend:DI (match_dup 3)))])] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[1], &base, &offset_1); - extract_base_offset_in_addr (operands[3], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[2]); - std::swap (operands[1], operands[3]); - } + aarch64_swap_ldrstr_operands (operands, 1); }) ;; Handle storing of a floating point zero. @@ -186,7 +122,7 @@ ;; as aarch64_operands_ok_for_ldpstp checks that the modes are ;; compatible. (define_peephole2 - [(set (match_operand:DSX 0 "aarch64_mem_pair_operand" "") + [(set (match_operand:DSX 0 "memory_operand" "") (match_operand:DSX 1 "aarch64_reg_zero_or_fp_zero" "")) (set (match_operand:<FCVT_TARGET> 2 "memory_operand" "") (match_operand:<FCVT_TARGET> 3 "aarch64_reg_zero_or_fp_zero" ""))] @@ -224,18 +160,6 @@ "aarch64_operands_adjust_ok_for_ldpstp (operands, true, <MODE>mode)" [(const_int 0)] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[1], &base, &offset_1); - extract_base_offset_in_addr (operands[3], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[6]); - std::swap (operands[1], operands[7]); - std::swap (operands[2], operands[4]); - std::swap (operands[3], operands[5]); - } - if (aarch64_gen_adjusted_ldpstp (operands, true, <MODE>mode, UNKNOWN)) DONE; else @@ -256,18 +180,6 @@ "aarch64_operands_adjust_ok_for_ldpstp (operands, true, <MODE>mode)" [(const_int 0)] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[1], &base, &offset_1); - extract_base_offset_in_addr (operands[3], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[6]); - std::swap (operands[1], operands[7]); - std::swap (operands[2], operands[4]); - std::swap (operands[3], operands[5]); - } - if (aarch64_gen_adjusted_ldpstp (operands, true, <MODE>mode, UNKNOWN)) DONE; else @@ -288,18 +200,6 @@ "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode)" [(const_int 0)] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[1], &base, &offset_1); - extract_base_offset_in_addr (operands[3], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[6]); - std::swap (operands[1], operands[7]); - std::swap (operands[2], operands[4]); - std::swap (operands[3], operands[5]); - } - if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, SIGN_EXTEND)) DONE; else @@ -320,18 +220,6 @@ "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode)" [(const_int 0)] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[1], &base, &offset_1); - extract_base_offset_in_addr (operands[3], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[6]); - std::swap (operands[1], operands[7]); - std::swap (operands[2], operands[4]); - std::swap (operands[3], operands[5]); - } - if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, ZERO_EXTEND)) DONE; else @@ -352,18 +240,6 @@ "aarch64_operands_adjust_ok_for_ldpstp (operands, false, <MODE>mode)" [(const_int 0)] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[0], &base, &offset_1); - extract_base_offset_in_addr (operands[2], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[6]); - std::swap (operands[1], operands[7]); - std::swap (operands[2], operands[4]); - std::swap (operands[3], operands[5]); - } - if (aarch64_gen_adjusted_ldpstp (operands, false, <MODE>mode, UNKNOWN)) DONE; else @@ -384,18 +260,6 @@ "aarch64_operands_adjust_ok_for_ldpstp (operands, false, <MODE>mode)" [(const_int 0)] { - rtx base, offset_1, offset_2; - - extract_base_offset_in_addr (operands[0], &base, &offset_1); - extract_base_offset_in_addr (operands[2], &base, &offset_2); - if (INTVAL (offset_1) > INTVAL (offset_2)) - { - std::swap (operands[0], operands[6]); - std::swap (operands[1], operands[7]); - std::swap (operands[2], operands[4]); - std::swap (operands[3], operands[5]); - } - if (aarch64_gen_adjusted_ldpstp (operands, false, <MODE>mode, UNKNOWN)) DONE; else diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index ed5d373dc105feec5bff3885cef0f4d7f8c75b52..3ba1cfbf73f0a313ead91a08b83bc582457d952e 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -470,6 +470,7 @@ int aarch64_ccmp_mode_to_code (machine_mode mode); bool extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset); bool aarch64_operands_ok_for_ldpstp (rtx *, bool, machine_mode); bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, scalar_mode); +void aarch64_swap_ldrstr_operands (rtx *, bool); extern void aarch64_asm_output_pool_epilogue (FILE *, const char *, tree, HOST_WIDE_INT); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index dec920f996d7591e180ac844d8fdf1b18a3e7a8d..276e63c733f596a24ee1c5e2411d7a5fb6b88964 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -14772,9 +14772,18 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, /* In increasing order, the last load can clobber the address. */ if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2)) - return false; + return false; } + /* One of the memory accesses must be a mempair operand. + If it is not the first one, they need to be swapped by the + peephole. */ + if (!(aarch64_legitimate_address_p (GET_MODE (mem_1), + XEXP (mem_1, 0), PARALLEL, 0) + || aarch64_legitimate_address_p (GET_MODE (mem_2), + XEXP (mem_2, 0), PARALLEL, 0))) + return false; + if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1))) rclass_1 = FP_REGS; else @@ -14792,6 +14801,40 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, return true; } +/* Given OPERANDS of consecutive load/store that can be merged, + swap them if they are not in ascending order. */ +void +aarch64_swap_ldrstr_operands (rtx* operands, bool load) +{ + rtx mem_1, mem_2, base_1, base_2, offset_1, offset_2; + HOST_WIDE_INT offval_1, offval_2; + + if (load) + { + mem_1 = operands[1]; + mem_2 = operands[3]; + } + else + { + mem_1 = operands[0]; + mem_2 = operands[2]; + } + + extract_base_offset_in_addr (mem_1, &base_1, &offset_1); + extract_base_offset_in_addr (mem_2, &base_2, &offset_2); + + offval_1 = INTVAL (offset_1); + offval_2 = INTVAL (offset_2); + + if (offval_1 > offval_2) + { + /* Irrespective of whether this is a load or a store, + we do the same swap. */ + std::swap (operands[0], operands[2]); + std::swap (operands[1], operands[3]); + } +} + /* Given OPERANDS of consecutive load/store, check if we can merge them into ldp/stp by adjusting the offset. LOAD is true if they are load instructions. MODE is the mode of memory operands. @@ -14951,7 +14994,7 @@ bool aarch64_gen_adjusted_ldpstp (rtx *operands, bool load, scalar_mode mode, RTX_CODE code) { - rtx base, offset, t1, t2; + rtx base, offset_1, offset_2, t1, t2; rtx mem_1, mem_2, mem_3, mem_4; HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize; @@ -14971,13 +15014,24 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load, gcc_assert (code == UNKNOWN); } - extract_base_offset_in_addr (mem_1, &base, &offset); - gcc_assert (base != NULL_RTX && offset != NULL_RTX); + extract_base_offset_in_addr (mem_1, &base, &offset_1); + extract_base_offset_in_addr (mem_2, &base, &offset_2); + gcc_assert (base != NULL_RTX && offset_1 != NULL_RTX + && offset_2 != NULL_RTX); + + if (INTVAL (offset_1) > INTVAL (offset_2)) + { + std::swap (operands[0], operands[6]); + std::swap (operands[1], operands[7]); + std::swap (operands[2], operands[4]); + std::swap (operands[3], operands[5]); + } + /* Adjust offset thus it can fit in ldp/stp instruction. */ msize = GET_MODE_SIZE (mode); stp_off_limit = msize * 0x40; - off_val = INTVAL (offset); + off_val = INTVAL (offset_1); abs_off = (off_val < 0) ? -off_val : off_val; new_off = abs_off % stp_off_limit; adj_off = abs_off - new_off;