Message ID | 07fc21e1-8747-eca0-4dde-4f364ef1a414@foss.arm.com |
---|---|
State | New |
Headers | show |
Series | [AArch64] Merge stores of D register values of different modes | expand |
Thanks for doing this, looks good to me FWIW. I was just wondering: Jackson Woodruff <jackson.woodruff@foss.arm.com> writes: > @@ -14712,6 +14712,11 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, > if (!rtx_equal_p (base_1, base_2)) > return false; > > + /* Check that the operands are of the same size. */ > + if (GET_MODE_SIZE (GET_MODE (mem_1)) > + != GET_MODE_SIZE (GET_MODE (mem_2))) > + return false; > + > offval_1 = INTVAL (offset_1); > offval_2 = INTVAL (offset_2); > msize = GET_MODE_SIZE (mode); when can this trigger? Your iterators always seem to enforce correct pairings, so maybe this should be an assert instead. Thanks, Richard
On 09/12/2017 07:32 PM, Richard Sandiford wrote: > Thanks for doing this, looks good to me FWIW. I was just wondering: > > Jackson Woodruff <jackson.woodruff@foss.arm.com> writes: >> @@ -14712,6 +14712,11 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, >> if (!rtx_equal_p (base_1, base_2)) >> return false; >> >> + /* Check that the operands are of the same size. */ >> + if (GET_MODE_SIZE (GET_MODE (mem_1)) >> + != GET_MODE_SIZE (GET_MODE (mem_2))) >> + return false; >> + >> offval_1 = INTVAL (offset_1); >> offval_2 = INTVAL (offset_2); >> msize = GET_MODE_SIZE (mode); > > when can this trigger? Your iterators always seem to enforce correct > pairings, so maybe this should be an assert instead. Yes, it's true that this should never be triggered. I've changed it to an assert. I have also rebased on top of the renaming of load/store attributes patch https://gcc.gnu.org/ml/gcc-patches/2017-09/msg00702.html which had some conflicts with this. Is the updated patch OK for trunk? Thanks, Jackson. > > Thanks, > Richard > diff --git a/gcc/config/aarch64/aarch64-ldpstp.md b/gcc/config/aarch64/aarch64-ldpstp.md index e8dda42c2dd1e30c4607c67a2156ff7813bd89ea..14e860d258e548d4118d957675f8bdbb74615337 100644 --- a/gcc/config/aarch64/aarch64-ldpstp.md +++ b/gcc/config/aarch64/aarch64-ldpstp.md @@ -99,10 +99,10 @@ }) (define_peephole2 - [(set (match_operand:VD 0 "register_operand" "") - (match_operand:VD 1 "aarch64_mem_pair_operand" "")) - (set (match_operand:VD 2 "register_operand" "") - (match_operand:VD 3 "memory_operand" ""))] + [(set (match_operand:DREG 0 "register_operand" "") + (match_operand:DREG 1 "aarch64_mem_pair_operand" "")) + (set (match_operand:DREG2 2 "register_operand" "") + (match_operand:DREG2 3 "memory_operand" ""))] "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)" [(parallel [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))])] @@ -119,11 +119,12 @@ }) (define_peephole2 - [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "") - (match_operand:VD 1 "register_operand" "")) - (set (match_operand:VD 2 "memory_operand" "") - (match_operand:VD 3 "register_operand" ""))] - "TARGET_SIMD && aarch64_operands_ok_for_ldpstp (operands, false, <MODE>mode)" + [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "") + (match_operand:DREG 1 "register_operand" "")) + (set (match_operand:DREG2 2 "memory_operand" "") + (match_operand:DREG2 3 "register_operand" ""))] + "TARGET_SIMD + && aarch64_operands_ok_for_ldpstp (operands, false, <DREG:MODE>mode)" [(parallel [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))])] { @@ -138,7 +139,6 @@ } }) - ;; Handle sign/zero extended consecutive load/store. (define_peephole2 @@ -181,6 +181,30 @@ } }) +;; Handle storing of a floating point zero. +;; We can match modes that won't work for a stp instruction +;; as aarch64_operands_ok_for_ldpstp checks that the modes are +;; compatible. +(define_peephole2 + [(set (match_operand:DSX 0 "aarch64_mem_pair_operand" "") + (match_operand:DSX 1 "aarch64_reg_zero_or_fp_zero" "")) + (set (match_operand:<FCVT_TARGET> 2 "memory_operand" "") + (match_operand:<FCVT_TARGET> 3 "aarch64_reg_zero_or_fp_zero" ""))] + "aarch64_operands_ok_for_ldpstp (operands, false, DImode)" + [(parallel [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))])] +{ + rtx base, offset_1, offset_2; + + extract_base_offset_in_addr (operands[0], &base, &offset_1); + extract_base_offset_in_addr (operands[2], &base, &offset_2); + if (INTVAL (offset_1) > INTVAL (offset_2)) + { + std::swap (operands[0], operands[2]); + std::swap (operands[1], operands[3]); + } +}) + ;; Handle consecutive load/store whose offset is out of the range ;; supported by ldp/ldpsw/stp. We firstly adjust offset in a scratch ;; register, then merge them into ldp/ldpsw/stp by using the adjusted diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 8f045c210502330af9d47f6adfd46a9e36328b74..90f9415b3986eb737ecdfeed43fe798cdbb8334e 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -172,11 +172,11 @@ [(set_attr "type" "neon_store1_1reg<q>")] ) -(define_insn "load_pair<mode>" - [(set (match_operand:VD 0 "register_operand" "=w") - (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump")) - (set (match_operand:VD 2 "register_operand" "=w") - (match_operand:VD 3 "memory_operand" "m"))] +(define_insn "load_pair<DREG:mode><DREG2:mode>" + [(set (match_operand:DREG 0 "register_operand" "=w") + (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump")) + (set (match_operand:DREG2 2 "register_operand" "=w") + (match_operand:DREG2 3 "memory_operand" "m"))] "TARGET_SIMD && rtx_equal_p (XEXP (operands[3], 0), plus_constant (Pmode, @@ -186,11 +186,11 @@ [(set_attr "type" "neon_ldp")] ) -(define_insn "store_pair<mode>" - [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump") - (match_operand:VD 1 "register_operand" "w")) - (set (match_operand:VD 2 "memory_operand" "=m") - (match_operand:VD 3 "register_operand" "w"))] +(define_insn "vec_store_pair<DREG:mode><DREG2:mode>" + [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump") + (match_operand:DREG 1 "register_operand" "w")) + (set (match_operand:DREG2 2 "memory_operand" "=m") + (match_operand:DREG2 3 "register_operand" "w"))] "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0), plus_constant (Pmode, diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index de1fbdca950b452f5616f37eb0ea719ee793cfdc..ea2ff88f91a18b3fcd43bd0dcafd9ebdcc0b2366 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -3191,10 +3191,10 @@ aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2, switch (mode) { case E_DImode: - return gen_store_pairdi (mem1, reg1, mem2, reg2); + return gen_store_pair_dw_DIDI (mem1, reg1, mem2, reg2); case E_DFmode: - return gen_store_pairdf (mem1, reg1, mem2, reg2); + return gen_store_pair_dw_DFDF (mem1, reg1, mem2, reg2); default: gcc_unreachable (); @@ -3211,10 +3211,10 @@ aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2, switch (mode) { case E_DImode: - return gen_load_pairdi (reg1, mem1, reg2, mem2); + return gen_load_pair_dw_DIDI (reg1, mem1, reg2, mem2); case E_DFmode: - return gen_load_pairdf (reg1, mem1, reg2, mem2); + return gen_load_pair_dw_DFDF (reg1, mem1, reg2, mem2); default: gcc_unreachable (); @@ -14751,6 +14751,10 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, if (!rtx_equal_p (base_1, base_2)) return false; + /* The operands must be of the same size. */ + gcc_assert (GET_MODE_SIZE (GET_MODE (mem_1)) + == GET_MODE_SIZE (GET_MODE (mem_2))); + offval_1 = INTVAL (offset_1); offval_2 = INTVAL (offset_2); msize = GET_MODE_SIZE (mode); diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index f8cdb063546afaf3ca977d078da6417729af88a6..46af41379621927ab54835c7adc4cd2b5057fbfe 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1224,15 +1224,15 @@ ;; Operands 1 and 3 are tied together by the final condition; so we allow ;; fairly lax checking on the second memory operation. -(define_insn "load_pairsi" - [(set (match_operand:SI 0 "register_operand" "=r,*w") - (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump,Ump")) - (set (match_operand:SI 2 "register_operand" "=r,*w") - (match_operand:SI 3 "memory_operand" "m,m"))] - "rtx_equal_p (XEXP (operands[3], 0), - plus_constant (Pmode, - XEXP (operands[1], 0), - GET_MODE_SIZE (SImode)))" +(define_insn "load_pair_sw_<SX:MODE><SX2:MODE>" + [(set (match_operand:SX 0 "register_operand" "=r,w") + (match_operand:SX 1 "aarch64_mem_pair_operand" "Ump,Ump")) + (set (match_operand:SX2 2 "register_operand" "=r,w") + (match_operand:SX2 3 "memory_operand" "m,m"))] + "rtx_equal_p (XEXP (operands[3], 0), + plus_constant (Pmode, + XEXP (operands[1], 0), + GET_MODE_SIZE (<MODE>mode)))" "@ ldp\\t%w0, %w2, %1 ldp\\t%s0, %s2, %1" @@ -1240,15 +1240,16 @@ (set_attr "fp" "*,yes")] ) -(define_insn "load_pairdi" - [(set (match_operand:DI 0 "register_operand" "=r,*w") - (match_operand:DI 1 "aarch64_mem_pair_operand" "Ump,Ump")) - (set (match_operand:DI 2 "register_operand" "=r,*w") - (match_operand:DI 3 "memory_operand" "m,m"))] - "rtx_equal_p (XEXP (operands[3], 0), - plus_constant (Pmode, - XEXP (operands[1], 0), - GET_MODE_SIZE (DImode)))" +;; Storing different modes that can still be merged +(define_insn "load_pair_dw_<DX:MODE><DX2:MODE>" + [(set (match_operand:DX 0 "register_operand" "=r,w") + (match_operand:DX 1 "aarch64_mem_pair_operand" "Ump,Ump")) + (set (match_operand:DX2 2 "register_operand" "=r,w") + (match_operand:DX2 3 "memory_operand" "m,m"))] + "rtx_equal_p (XEXP (operands[3], 0), + plus_constant (Pmode, + XEXP (operands[1], 0), + GET_MODE_SIZE (<MODE>mode)))" "@ ldp\\t%x0, %x2, %1 ldp\\t%d0, %d2, %1" @@ -1257,17 +1258,18 @@ ) + ;; Operands 0 and 2 are tied together by the final condition; so we allow ;; fairly lax checking on the second memory operation. -(define_insn "store_pairsi" - [(set (match_operand:SI 0 "aarch64_mem_pair_operand" "=Ump,Ump") - (match_operand:SI 1 "aarch64_reg_or_zero" "rZ,*w")) - (set (match_operand:SI 2 "memory_operand" "=m,m") - (match_operand:SI 3 "aarch64_reg_or_zero" "rZ,*w"))] - "rtx_equal_p (XEXP (operands[2], 0), - plus_constant (Pmode, - XEXP (operands[0], 0), - GET_MODE_SIZE (SImode)))" +(define_insn "store_pair_sw_<SX:MODE><SX2:MODE>" + [(set (match_operand:SX 0 "aarch64_mem_pair_operand" "=Ump,Ump") + (match_operand:SX 1 "aarch64_reg_zero_or_fp_zero" "rYZ,w")) + (set (match_operand:SX2 2 "memory_operand" "=m,m") + (match_operand:SX2 3 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))] + "rtx_equal_p (XEXP (operands[2], 0), + plus_constant (Pmode, + XEXP (operands[0], 0), + GET_MODE_SIZE (<MODE>mode)))" "@ stp\\t%w1, %w3, %0 stp\\t%s1, %s3, %0" @@ -1275,15 +1277,16 @@ (set_attr "fp" "*,yes")] ) -(define_insn "store_pairdi" - [(set (match_operand:DI 0 "aarch64_mem_pair_operand" "=Ump,Ump") - (match_operand:DI 1 "aarch64_reg_or_zero" "rZ,*w")) - (set (match_operand:DI 2 "memory_operand" "=m,m") - (match_operand:DI 3 "aarch64_reg_or_zero" "rZ,*w"))] - "rtx_equal_p (XEXP (operands[2], 0), - plus_constant (Pmode, - XEXP (operands[0], 0), - GET_MODE_SIZE (DImode)))" +;; Storing different modes that can still be merged +(define_insn "store_pair_dw_<DX:MODE><DX2:MODE>" + [(set (match_operand:DX 0 "aarch64_mem_pair_operand" "=Ump,Ump") + (match_operand:DX 1 "aarch64_reg_zero_or_fp_zero" "rYZ,w")) + (set (match_operand:DX2 2 "memory_operand" "=m,m") + (match_operand:DX2 3 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))] + "rtx_equal_p (XEXP (operands[2], 0), + plus_constant (Pmode, + XEXP (operands[0], 0), + GET_MODE_SIZE (<MODE>mode)))" "@ stp\\t%x1, %x3, %0 stp\\t%d1, %d3, %0" @@ -1291,74 +1294,6 @@ (set_attr "fp" "*,yes")] ) -;; Operands 1 and 3 are tied together by the final condition; so we allow -;; fairly lax checking on the second memory operation. -(define_insn "load_pairsf" - [(set (match_operand:SF 0 "register_operand" "=w,*r") - (match_operand:SF 1 "aarch64_mem_pair_operand" "Ump,Ump")) - (set (match_operand:SF 2 "register_operand" "=w,*r") - (match_operand:SF 3 "memory_operand" "m,m"))] - "rtx_equal_p (XEXP (operands[3], 0), - plus_constant (Pmode, - XEXP (operands[1], 0), - GET_MODE_SIZE (SFmode)))" - "@ - ldp\\t%s0, %s2, %1 - ldp\\t%w0, %w2, %1" - [(set_attr "type" "neon_load1_2reg,load_8") - (set_attr "fp" "yes,*")] -) - -(define_insn "load_pairdf" - [(set (match_operand:DF 0 "register_operand" "=w,*r") - (match_operand:DF 1 "aarch64_mem_pair_operand" "Ump,Ump")) - (set (match_operand:DF 2 "register_operand" "=w,*r") - (match_operand:DF 3 "memory_operand" "m,m"))] - "rtx_equal_p (XEXP (operands[3], 0), - plus_constant (Pmode, - XEXP (operands[1], 0), - GET_MODE_SIZE (DFmode)))" - "@ - ldp\\t%d0, %d2, %1 - ldp\\t%x0, %x2, %1" - [(set_attr "type" "neon_load1_2reg,load_16") - (set_attr "fp" "yes,*")] -) - -;; Operands 0 and 2 are tied together by the final condition; so we allow -;; fairly lax checking on the second memory operation. -(define_insn "store_pairsf" - [(set (match_operand:SF 0 "aarch64_mem_pair_operand" "=Ump,Ump") - (match_operand:SF 1 "aarch64_reg_or_fp_zero" "w,*rY")) - (set (match_operand:SF 2 "memory_operand" "=m,m") - (match_operand:SF 3 "aarch64_reg_or_fp_zero" "w,*rY"))] - "rtx_equal_p (XEXP (operands[2], 0), - plus_constant (Pmode, - XEXP (operands[0], 0), - GET_MODE_SIZE (SFmode)))" - "@ - stp\\t%s1, %s3, %0 - stp\\t%w1, %w3, %0" - [(set_attr "type" "neon_store1_2reg,store_8") - (set_attr "fp" "yes,*")] -) - -(define_insn "store_pairdf" - [(set (match_operand:DF 0 "aarch64_mem_pair_operand" "=Ump,Ump") - (match_operand:DF 1 "aarch64_reg_or_fp_zero" "w,*rY")) - (set (match_operand:DF 2 "memory_operand" "=m,m") - (match_operand:DF 3 "aarch64_reg_or_fp_zero" "w,*rY"))] - "rtx_equal_p (XEXP (operands[2], 0), - plus_constant (Pmode, - XEXP (operands[0], 0), - GET_MODE_SIZE (DFmode)))" - "@ - stp\\t%d1, %d3, %0 - stp\\t%x1, %x3, %0" - [(set_attr "type" "neon_store1_2reg,store_16") - (set_attr "fp" "yes,*")] -) - ;; Load pair with post-index writeback. This is primarily used in function ;; epilogues. (define_insn "loadwb_pair<GPI:mode>_<P:mode>" diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 477dc35daf6a1184be15d942c62a111604f62f3c..0e1e9704e3866136959c08ba10077fa31c72c0ce 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -69,6 +69,12 @@ ;; Double vector modes. (define_mode_iterator VD [V8QI V4HI V4HF V2SI V2SF]) +;; All modes stored in registers d0-d31. +(define_mode_iterator DREG [V8QI V4HI V4HF V2SI V2SF DF]) + +;; Copy of the above. +(define_mode_iterator DREG2 [V8QI V4HI V4HF V2SI V2SF DF]) + ;; vector, 64-bit container, all integer modes (define_mode_iterator VD_BHSI [V8QI V4HI V2SI]) @@ -235,6 +241,18 @@ ;; Double scalar modes (define_mode_iterator DX [DI DF]) +;; Duplicate of the above +(define_mode_iterator DX2 [DI DF]) + +;; Single scalar modes +(define_mode_iterator SX [SI SF]) + +;; Duplicate of the above +(define_mode_iterator SX2 [SI SF]) + +;; Single and double integer and float modes +(define_mode_iterator DSX [DF DI SF SI]) + ;; Modes available for <f>mul lane operations. (define_mode_iterator VMUL [V4HI V8HI V2SI V4SI (V4HF "TARGET_SIMD_F16INST") diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 11243c4ce00aa7d16a886bb24b01180801c68f4e..ee6e050dd839c329baa05bdfe878b786f1def969 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -62,6 +62,10 @@ (and (match_code "const_double") (match_test "aarch64_float_const_zero_rtx_p (op)")))) +(define_predicate "aarch64_reg_zero_or_fp_zero" + (ior (match_operand 0 "aarch64_reg_or_fp_zero") + (match_operand 0 "aarch64_reg_or_zero"))) + (define_predicate "aarch64_reg_zero_or_m1_or_1" (and (match_code "reg,subreg,const_int") (ior (match_operand 0 "register_operand") diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_6.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_6.c new file mode 100644 index 0000000000000000000000000000000000000000..2d982f3389b668f2042d48ba3db04e619fd999f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_6.c @@ -0,0 +1,20 @@ +/* { dg-options "-O2" } */ + +typedef float __attribute__ ((vector_size (8))) vec; + +struct pair +{ + vec e1; + double e2; +}; + +vec tmp; + +void +stp (struct pair *p) +{ + p->e1 = tmp; + p->e2 = 1.0; + + /* { dg-final { scan-assembler "stp\td\[0-9\]+, d\[0-9\]+, \\\[x\[0-9\]+\\\]" } } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_7.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_7.c new file mode 100644 index 0000000000000000000000000000000000000000..06607de6b3e36a4d759d915a9f7880284391aa08 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_7.c @@ -0,0 +1,47 @@ +/* { dg-options "-O2" } */ + +struct pair +{ + double a; + long int b; +}; + +void +stp (struct pair *p) +{ + p->a = 0.0; + p->b = 1; +} + +/* { dg-final { scan-assembler "stp\txzr, x\[0-9\]+, \\\[x\[0-9\]+\\\]" } } */ + +void +stp2 (struct pair *p) +{ + p->a = 0.0; + p->b = 0; +} + +struct reverse_pair +{ + long int a; + double b; +}; + +void +stp_reverse (struct reverse_pair *p) +{ + p->a = 1; + p->b = 0.0; +} + +/* { dg-final { scan-assembler "stp\tx\[0-9\]+, xzr, \\\[x\[0-9\]+\\\]" } } */ + +void +stp_reverse2 (struct reverse_pair *p) +{ + p->a = 0; + p->b = 0.0; +} + +/* { dg-final { scan-assembler-times "stp\txzr, xzr, \\\[x\[0-9\]+\\\]" 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_8.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_8.c new file mode 100644 index 0000000000000000000000000000000000000000..1a47e233814e564d549245683a4e59fdb422bdad --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_8.c @@ -0,0 +1,30 @@ +/* { dg-options "-O2" } */ + +typedef float __attribute__ ((vector_size (8))) fvec; +typedef int __attribute__ ((vector_size (8))) ivec; + +struct pair +{ + double a; + fvec b; +}; + +void ldp (double *a, fvec *b, struct pair *p) +{ + *a = p->a; + *b = p->b; +} + +struct vec_pair +{ + fvec a; + ivec b; +}; + +void ldp2 (fvec *a, ivec *b, struct vec_pair *p) +{ + *a = p->a; + *b = p->b; +} + +/* { dg-final { scan-assembler-times "ldp\td\[0-9\], d\[0-9\]+, \\\[x\[0-9\]+\\\]" 2 } } */
diff --git a/gcc/config/aarch64/aarch64-ldpstp.md b/gcc/config/aarch64/aarch64-ldpstp.md index e8dda42c2dd1e30c4607c67a2156ff7813bd89ea..14e860d258e548d4118d957675f8bdbb74615337 100644 --- a/gcc/config/aarch64/aarch64-ldpstp.md +++ b/gcc/config/aarch64/aarch64-ldpstp.md @@ -99,10 +99,10 @@ }) (define_peephole2 - [(set (match_operand:VD 0 "register_operand" "") - (match_operand:VD 1 "aarch64_mem_pair_operand" "")) - (set (match_operand:VD 2 "register_operand" "") - (match_operand:VD 3 "memory_operand" ""))] + [(set (match_operand:DREG 0 "register_operand" "") + (match_operand:DREG 1 "aarch64_mem_pair_operand" "")) + (set (match_operand:DREG2 2 "register_operand" "") + (match_operand:DREG2 3 "memory_operand" ""))] "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)" [(parallel [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))])] @@ -119,11 +119,12 @@ }) (define_peephole2 - [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "") - (match_operand:VD 1 "register_operand" "")) - (set (match_operand:VD 2 "memory_operand" "") - (match_operand:VD 3 "register_operand" ""))] - "TARGET_SIMD && aarch64_operands_ok_for_ldpstp (operands, false, <MODE>mode)" + [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "") + (match_operand:DREG 1 "register_operand" "")) + (set (match_operand:DREG2 2 "memory_operand" "") + (match_operand:DREG2 3 "register_operand" ""))] + "TARGET_SIMD + && aarch64_operands_ok_for_ldpstp (operands, false, <DREG:MODE>mode)" [(parallel [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))])] { @@ -138,7 +139,6 @@ } }) - ;; Handle sign/zero extended consecutive load/store. (define_peephole2 @@ -181,6 +181,30 @@ } }) +;; Handle storing of a floating point zero. +;; We can match modes that won't work for a stp instruction +;; as aarch64_operands_ok_for_ldpstp checks that the modes are +;; compatible. +(define_peephole2 + [(set (match_operand:DSX 0 "aarch64_mem_pair_operand" "") + (match_operand:DSX 1 "aarch64_reg_zero_or_fp_zero" "")) + (set (match_operand:<FCVT_TARGET> 2 "memory_operand" "") + (match_operand:<FCVT_TARGET> 3 "aarch64_reg_zero_or_fp_zero" ""))] + "aarch64_operands_ok_for_ldpstp (operands, false, DImode)" + [(parallel [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))])] +{ + rtx base, offset_1, offset_2; + + extract_base_offset_in_addr (operands[0], &base, &offset_1); + extract_base_offset_in_addr (operands[2], &base, &offset_2); + if (INTVAL (offset_1) > INTVAL (offset_2)) + { + std::swap (operands[0], operands[2]); + std::swap (operands[1], operands[3]); + } +}) + ;; Handle consecutive load/store whose offset is out of the range ;; supported by ldp/ldpsw/stp. We firstly adjust offset in a scratch ;; register, then merge them into ldp/ldpsw/stp by using the adjusted diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index f3e084f8778d70c82823b92fa80ff96021ad26db..34f321a117cb96211a69119939fc518504bbf1a4 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -172,11 +172,11 @@ [(set_attr "type" "neon_store1_1reg<q>")] ) -(define_insn "load_pair<mode>" - [(set (match_operand:VD 0 "register_operand" "=w") - (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump")) - (set (match_operand:VD 2 "register_operand" "=w") - (match_operand:VD 3 "memory_operand" "m"))] +(define_insn "load_pair<DREG:mode><DREG2:mode>" + [(set (match_operand:DREG 0 "register_operand" "=w") + (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump")) + (set (match_operand:DREG2 2 "register_operand" "=w") + (match_operand:DREG2 3 "memory_operand" "m"))] "TARGET_SIMD && rtx_equal_p (XEXP (operands[3], 0), plus_constant (Pmode, @@ -186,11 +186,11 @@ [(set_attr "type" "neon_ldp")] ) -(define_insn "store_pair<mode>" - [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump") - (match_operand:VD 1 "register_operand" "w")) - (set (match_operand:VD 2 "memory_operand" "=m") - (match_operand:VD 3 "register_operand" "w"))] +(define_insn "vec_store_pair<DREG:mode><DREG2:mode>" + [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump") + (match_operand:DREG 1 "register_operand" "w")) + (set (match_operand:DREG2 2 "memory_operand" "=m") + (match_operand:DREG2 3 "register_operand" "w"))] "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0), plus_constant (Pmode, diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 28c4e0e64766060851c0c7cd6b86995fae25353d..a3bd1b1180903703d33ca822d06afc74f1748c44 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -3179,10 +3179,10 @@ aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2, switch (mode) { case DImode: - return gen_store_pairdi (mem1, reg1, mem2, reg2); + return gen_store_pair_dw_DIDI (mem1, reg1, mem2, reg2); case DFmode: - return gen_store_pairdf (mem1, reg1, mem2, reg2); + return gen_store_pair_dw_DFDF (mem1, reg1, mem2, reg2); default: gcc_unreachable (); @@ -3199,10 +3199,10 @@ aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2, switch (mode) { case DImode: - return gen_load_pairdi (reg1, mem1, reg2, mem2); + return gen_load_pair_dw_DIDI (reg1, mem1, reg2, mem2); case DFmode: - return gen_load_pairdf (reg1, mem1, reg2, mem2); + return gen_load_pair_dw_DFDF (reg1, mem1, reg2, mem2); default: gcc_unreachable (); @@ -14712,6 +14712,11 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, if (!rtx_equal_p (base_1, base_2)) return false; + /* Check that the operands are of the same size. */ + if (GET_MODE_SIZE (GET_MODE (mem_1)) + != GET_MODE_SIZE (GET_MODE (mem_2))) + return false; + offval_1 = INTVAL (offset_1); offval_2 = INTVAL (offset_2); msize = GET_MODE_SIZE (mode); diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index c1bca07308d84f50a6fa5af116f0fa20589882db..46affe8c63a58bd60b993349555e81c4c5008113 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1220,141 +1220,76 @@ ;; Operands 1 and 3 are tied together by the final condition; so we allow ;; fairly lax checking on the second memory operation. -(define_insn "load_pairsi" - [(set (match_operand:SI 0 "register_operand" "=r,*w") - (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump,Ump")) - (set (match_operand:SI 2 "register_operand" "=r,*w") - (match_operand:SI 3 "memory_operand" "m,m"))] - "rtx_equal_p (XEXP (operands[3], 0), - plus_constant (Pmode, - XEXP (operands[1], 0), - GET_MODE_SIZE (SImode)))" +(define_insn "load_pair_sw_<SX:MODE><SX2:MODE>" + [(set (match_operand:SX 0 "register_operand" "=r,w") + (match_operand:SX 1 "aarch64_mem_pair_operand" "Ump,Ump")) + (set (match_operand:SX2 2 "register_operand" "=r,w") + (match_operand:SX2 3 "memory_operand" "m,m"))] + "rtx_equal_p (XEXP (operands[3], 0), + plus_constant (Pmode, + XEXP (operands[1], 0), + GET_MODE_SIZE (<MODE>mode)))" "@ - ldp\\t%w0, %w2, %1 - ldp\\t%s0, %s2, %1" + ldp\t%w0, %w2, %1 + ldp\t%s0, %s2, %1" [(set_attr "type" "load2,neon_load1_2reg") (set_attr "fp" "*,yes")] ) -(define_insn "load_pairdi" - [(set (match_operand:DI 0 "register_operand" "=r,*w") - (match_operand:DI 1 "aarch64_mem_pair_operand" "Ump,Ump")) - (set (match_operand:DI 2 "register_operand" "=r,*w") - (match_operand:DI 3 "memory_operand" "m,m"))] - "rtx_equal_p (XEXP (operands[3], 0), - plus_constant (Pmode, - XEXP (operands[1], 0), - GET_MODE_SIZE (DImode)))" +;; Storing different modes that can still be merged +(define_insn "load_pair_dw_<DX:MODE><DX2:MODE>" + [(set (match_operand:DX 0 "register_operand" "=r,w") + (match_operand:DX 1 "aarch64_mem_pair_operand" "Ump,Ump")) + (set (match_operand:DX2 2 "register_operand" "=r,w") + (match_operand:DX2 3 "memory_operand" "m,m"))] + "rtx_equal_p (XEXP (operands[3], 0), + plus_constant (Pmode, + XEXP (operands[1], 0), + GET_MODE_SIZE (<MODE>mode)))" "@ - ldp\\t%x0, %x2, %1 - ldp\\t%d0, %d2, %1" + ldp\t%x0, %x2, %1 + ldp\t%d0, %d2, %1" [(set_attr "type" "load2,neon_load1_2reg") (set_attr "fp" "*,yes")] ) + ;; Operands 0 and 2 are tied together by the final condition; so we allow ;; fairly lax checking on the second memory operation. -(define_insn "store_pairsi" - [(set (match_operand:SI 0 "aarch64_mem_pair_operand" "=Ump,Ump") - (match_operand:SI 1 "aarch64_reg_or_zero" "rZ,*w")) - (set (match_operand:SI 2 "memory_operand" "=m,m") - (match_operand:SI 3 "aarch64_reg_or_zero" "rZ,*w"))] - "rtx_equal_p (XEXP (operands[2], 0), - plus_constant (Pmode, - XEXP (operands[0], 0), - GET_MODE_SIZE (SImode)))" +(define_insn "store_pair_sw_<SX:MODE><SX2:MODE>" + [(set (match_operand:SX 0 "aarch64_mem_pair_operand" "=Ump,Ump") + (match_operand:SX 1 "aarch64_reg_zero_or_fp_zero" "rYZ,w")) + (set (match_operand:SX2 2 "memory_operand" "=m,m") + (match_operand:SX2 3 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))] + "rtx_equal_p (XEXP (operands[2], 0), + plus_constant (Pmode, + XEXP (operands[0], 0), + GET_MODE_SIZE (<MODE>mode)))" "@ - stp\\t%w1, %w3, %0 - stp\\t%s1, %s3, %0" + stp\t%w1, %w3, %0 + stp\t%s1, %s3, %0" [(set_attr "type" "store2,neon_store1_2reg") (set_attr "fp" "*,yes")] ) -(define_insn "store_pairdi" - [(set (match_operand:DI 0 "aarch64_mem_pair_operand" "=Ump,Ump") - (match_operand:DI 1 "aarch64_reg_or_zero" "rZ,*w")) - (set (match_operand:DI 2 "memory_operand" "=m,m") - (match_operand:DI 3 "aarch64_reg_or_zero" "rZ,*w"))] - "rtx_equal_p (XEXP (operands[2], 0), - plus_constant (Pmode, - XEXP (operands[0], 0), - GET_MODE_SIZE (DImode)))" +;; Storing different modes that can still be merged +(define_insn "store_pair_dw_<DX:MODE><DX2:MODE>" + [(set (match_operand:DX 0 "aarch64_mem_pair_operand" "=Ump,Ump") + (match_operand:DX 1 "aarch64_reg_zero_or_fp_zero" "rYZ,w")) + (set (match_operand:DX2 2 "memory_operand" "=m,m") + (match_operand:DX2 3 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))] + "rtx_equal_p (XEXP (operands[2], 0), + plus_constant (Pmode, + XEXP (operands[0], 0), + GET_MODE_SIZE (<MODE>mode)))" "@ - stp\\t%x1, %x3, %0 - stp\\t%d1, %d3, %0" + stp\t%x1, %x3, %0 + stp\t%d1, %d3, %0" [(set_attr "type" "store2,neon_store1_2reg") (set_attr "fp" "*,yes")] ) -;; Operands 1 and 3 are tied together by the final condition; so we allow -;; fairly lax checking on the second memory operation. -(define_insn "load_pairsf" - [(set (match_operand:SF 0 "register_operand" "=w,*r") - (match_operand:SF 1 "aarch64_mem_pair_operand" "Ump,Ump")) - (set (match_operand:SF 2 "register_operand" "=w,*r") - (match_operand:SF 3 "memory_operand" "m,m"))] - "rtx_equal_p (XEXP (operands[3], 0), - plus_constant (Pmode, - XEXP (operands[1], 0), - GET_MODE_SIZE (SFmode)))" - "@ - ldp\\t%s0, %s2, %1 - ldp\\t%w0, %w2, %1" - [(set_attr "type" "neon_load1_2reg,load2") - (set_attr "fp" "yes,*")] -) - -(define_insn "load_pairdf" - [(set (match_operand:DF 0 "register_operand" "=w,*r") - (match_operand:DF 1 "aarch64_mem_pair_operand" "Ump,Ump")) - (set (match_operand:DF 2 "register_operand" "=w,*r") - (match_operand:DF 3 "memory_operand" "m,m"))] - "rtx_equal_p (XEXP (operands[3], 0), - plus_constant (Pmode, - XEXP (operands[1], 0), - GET_MODE_SIZE (DFmode)))" - "@ - ldp\\t%d0, %d2, %1 - ldp\\t%x0, %x2, %1" - [(set_attr "type" "neon_load1_2reg,load2") - (set_attr "fp" "yes,*")] -) - -;; Operands 0 and 2 are tied together by the final condition; so we allow -;; fairly lax checking on the second memory operation. -(define_insn "store_pairsf" - [(set (match_operand:SF 0 "aarch64_mem_pair_operand" "=Ump,Ump") - (match_operand:SF 1 "aarch64_reg_or_fp_zero" "w,*rY")) - (set (match_operand:SF 2 "memory_operand" "=m,m") - (match_operand:SF 3 "aarch64_reg_or_fp_zero" "w,*rY"))] - "rtx_equal_p (XEXP (operands[2], 0), - plus_constant (Pmode, - XEXP (operands[0], 0), - GET_MODE_SIZE (SFmode)))" - "@ - stp\\t%s1, %s3, %0 - stp\\t%w1, %w3, %0" - [(set_attr "type" "neon_store1_2reg,store2") - (set_attr "fp" "yes,*")] -) - -(define_insn "store_pairdf" - [(set (match_operand:DF 0 "aarch64_mem_pair_operand" "=Ump,Ump") - (match_operand:DF 1 "aarch64_reg_or_fp_zero" "w,*rY")) - (set (match_operand:DF 2 "memory_operand" "=m,m") - (match_operand:DF 3 "aarch64_reg_or_fp_zero" "w,*rY"))] - "rtx_equal_p (XEXP (operands[2], 0), - plus_constant (Pmode, - XEXP (operands[0], 0), - GET_MODE_SIZE (DFmode)))" - "@ - stp\\t%d1, %d3, %0 - stp\\t%x1, %x3, %0" - [(set_attr "type" "neon_store1_2reg,store2") - (set_attr "fp" "yes,*")] -) - ;; Load pair with post-index writeback. This is primarily used in function ;; epilogues. (define_insn "loadwb_pair<GPI:mode>_<P:mode>" diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index cceb57525c7aa44933419bd317b1f03a7b76f4c4..6147d93f56649cbc9fe577a433bca610e476ab2c 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -69,6 +69,12 @@ ;; Double vector modes. (define_mode_iterator VD [V8QI V4HI V4HF V2SI V2SF]) +;; All modes stored in registers d0-d31. +(define_mode_iterator DREG [V8QI V4HI V4HF V2SI V2SF DF]) + +;; Copy of the above. +(define_mode_iterator DREG2 [V8QI V4HI V4HF V2SI V2SF DF]) + ;; vector, 64-bit container, all integer modes (define_mode_iterator VD_BHSI [V8QI V4HI V2SI]) @@ -235,6 +241,18 @@ ;; Double scalar modes (define_mode_iterator DX [DI DF]) +;; Duplicate of the above +(define_mode_iterator DX2 [DI DF]) + +;; Single scalar modes +(define_mode_iterator SX [SI SF]) + +;; Duplicate of the above +(define_mode_iterator SX2 [SI SF]) + +;; Single and double integer and float modes +(define_mode_iterator DSX [DF DI SF SI]) + ;; Modes available for <f>mul lane operations. (define_mode_iterator VMUL [V4HI V8HI V2SI V4SI (V4HF "TARGET_SIMD_F16INST") diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 11243c4ce00aa7d16a886bb24b01180801c68f4e..ee6e050dd839c329baa05bdfe878b786f1def969 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -62,6 +62,10 @@ (and (match_code "const_double") (match_test "aarch64_float_const_zero_rtx_p (op)")))) +(define_predicate "aarch64_reg_zero_or_fp_zero" + (ior (match_operand 0 "aarch64_reg_or_fp_zero") + (match_operand 0 "aarch64_reg_or_zero"))) + (define_predicate "aarch64_reg_zero_or_m1_or_1" (and (match_code "reg,subreg,const_int") (ior (match_operand 0 "register_operand") diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_6.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_6.c new file mode 100644 index 0000000000000000000000000000000000000000..2d982f3389b668f2042d48ba3db04e619fd999f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_6.c @@ -0,0 +1,20 @@ +/* { dg-options "-O2" } */ + +typedef float __attribute__ ((vector_size (8))) vec; + +struct pair +{ + vec e1; + double e2; +}; + +vec tmp; + +void +stp (struct pair *p) +{ + p->e1 = tmp; + p->e2 = 1.0; + + /* { dg-final { scan-assembler "stp\td\[0-9\]+, d\[0-9\]+, \\\[x\[0-9\]+\\\]" } } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_7.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_7.c new file mode 100644 index 0000000000000000000000000000000000000000..06607de6b3e36a4d759d915a9f7880284391aa08 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_7.c @@ -0,0 +1,47 @@ +/* { dg-options "-O2" } */ + +struct pair +{ + double a; + long int b; +}; + +void +stp (struct pair *p) +{ + p->a = 0.0; + p->b = 1; +} + +/* { dg-final { scan-assembler "stp\txzr, x\[0-9\]+, \\\[x\[0-9\]+\\\]" } } */ + +void +stp2 (struct pair *p) +{ + p->a = 0.0; + p->b = 0; +} + +struct reverse_pair +{ + long int a; + double b; +}; + +void +stp_reverse (struct reverse_pair *p) +{ + p->a = 1; + p->b = 0.0; +} + +/* { dg-final { scan-assembler "stp\tx\[0-9\]+, xzr, \\\[x\[0-9\]+\\\]" } } */ + +void +stp_reverse2 (struct reverse_pair *p) +{ + p->a = 0; + p->b = 0.0; +} + +/* { dg-final { scan-assembler-times "stp\txzr, xzr, \\\[x\[0-9\]+\\\]" 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_8.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_8.c new file mode 100644 index 0000000000000000000000000000000000000000..1a47e233814e564d549245683a4e59fdb422bdad --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_8.c @@ -0,0 +1,30 @@ +/* { dg-options "-O2" } */ + +typedef float __attribute__ ((vector_size (8))) fvec; +typedef int __attribute__ ((vector_size (8))) ivec; + +struct pair +{ + double a; + fvec b; +}; + +void ldp (double *a, fvec *b, struct pair *p) +{ + *a = p->a; + *b = p->b; +} + +struct vec_pair +{ + fvec a; + ivec b; +}; + +void ldp2 (fvec *a, ivec *b, struct vec_pair *p) +{ + *a = p->a; + *b = p->b; +} + +/* { dg-final { scan-assembler-times "ldp\td\[0-9\], d\[0-9\]+, \\\[x\[0-9\]+\\\]" 2 } } */