Message ID | 7aad32511cb04e4f9707b3626e1116a2@SN2PR07MB029.namprd07.prod.outlook.com |
---|---|
State | New |
Headers | show |
You are better off CCing the maintainers for such reviews. Let me do that for you. I cannot approve or reject this patch but I have a few comments as below. On 10/29/13 09:22, Hurugalawadi, Naveen wrote: > > diff -uprN '-x*.orig' mainline-orig/gcc/config/aarch64/aarch64.md gcc-4.8.0/gcc/config/aarch64/aarch64.md > --- mainline-orig/gcc/config/aarch64/aarch64.md 2013-10-28 17:15:52.363975264 +0530 > +++ gcc-4.8.0/gcc/config/aarch64/aarch64.md 2013-10-29 10:11:09.656082201 +0530 > @@ -1068,6 +1068,27 @@ > (set_attr "mode" "<MODE>")] > ) > > +(define_peephole2 > + [(set (match_operand:GPI 0 "register_operand") > + (match_operand:GPI 1 "aarch64_mem_pair_operand")) > + (set (match_operand:GPI 2 "register_operand") > + (match_operand:GPI 3 "memory_operand"))] > + "GET_CODE (operands[1]) == MEM > + && GET_CODE (XEXP (operands[1], 0)) == PLUS > + && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == REG Use the predicates REG_P , CONST_INT_P and so on. Don't do this in this form. > + && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT > + && GET_MODE (operands[0]) == GET_MODE (XEXP (XEXP (operands[1], 0), 0)) > + && REGNO (operands[0]) != REGNO (operands[2]) > + && REGNO_REG_CLASS (REGNO (operands[0])) > + == REGNO_REG_CLASS (REGNO (operands[2])) > + && rtx_equal_p (XEXP (operands[3], 0), > + plus_constant (Pmode, XEXP (operands[1], 0), > + GET_MODE_SIZE (<MODE>mode))) > + && optimize_size" Why use optimize_size here ? I would have thought it was always good to generate ldp and stp instructions. If you want to make it a core specific decision make it a tuning decision but don't disable just based on size. Additionally if this check is common between all the patterns it would be better to factor this out into a predicate or a function in its own right. > + [(parallel [(set (match_dup 0) (match_dup 1)) > + (set (match_dup 2) (match_dup 3))])] > +) > + > ;; Operands 0 and 2 are tied together by the final condition; so we allow > ;; fairly lax checking on the second memory operation. > (define_insn "store_pair<mode>" > @@ -1085,6 +1106,27 @@ > (set_attr "mode" "<MODE>")] > ) > > +(define_peephole2 > + [(set (match_operand:GPI 0 "aarch64_mem_pair_operand") > + (match_operand:GPI 1 "register_operand")) > + (set (match_operand:GPI 2 "memory_operand") > + (match_operand:GPI 3 "register_operand"))] > + "GET_CODE (operands[0]) == MEM > + && GET_CODE (XEXP (operands[0], 0)) == PLUS > + && GET_CODE (XEXP (XEXP (operands[0], 0), 0)) == REG > + && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == CONST_INT > + && GET_MODE (operands[1]) == GET_MODE (XEXP (XEXP (operands[0], 0), 0)) > + && REGNO (operands[1]) != REGNO (operands[3]) > + && REGNO_REG_CLASS (REGNO (operands[1])) > + == REGNO_REG_CLASS (REGNO (operands[3])) > + && rtx_equal_p (XEXP (operands[2], 0), > + plus_constant (Pmode, XEXP (operands[0], 0), > + GET_MODE_SIZE (<MODE>mode))) > + && optimize_size" > + [(parallel [(set (match_dup 0) (match_dup 1)) > + (set (match_dup 2) (match_dup 3))])] > +) > + > ;; Operands 1 and 3 are tied together by the final condition; so we allow > ;; fairly lax checking on the second memory operation. > (define_insn "load_pair<mode>" > @@ -1102,6 +1144,28 @@ > (set_attr "mode" "<MODE>")] > ) > > +(define_peephole2 > + [(set (match_operand:GPF 0 "register_operand") > + (match_operand:GPF 1 "aarch64_mem_pair_operand")) > + (set (match_operand:GPF 2 "register_operand") > + (match_operand:GPF 3 "memory_operand"))] > + "GET_CODE (operands[1]) == MEM > + && GET_CODE (XEXP (operands[1], 0)) == PLUS > + && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == REG > + && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT > + && GET_MODE (operands[0]) == GET_MODE (XEXP (XEXP (operands[1], 0), 0)) > + && REGNO (operands[0]) != REGNO (operands[2]) > + && REGNO (operands[0]) >= 32 && REGNO (operands[2]) >= 32 I'm not sure what this means here but this check doesn't look right - 32 is V0_REGNUM - why are you checking that here ? If you really need to do that check then use V0_REGNUM or it may be better to see if this really is a SIMD regnum ? Can you not use something like FP_REGNUM_P here instead ? > + && REGNO_REG_CLASS (REGNO (operands[0])) > + == REGNO_REG_CLASS (REGNO (operands[2])) > + && rtx_equal_p (XEXP (operands[3], 0), > + plus_constant (Pmode, XEXP (operands[1], 0), > + GET_MODE_SIZE (<MODE>mode))) > + && optimize_size" > + [(parallel [(set (match_dup 0) (match_dup 1)) > + (set (match_dup 2) (match_dup 3))])] > +) > + > ;; Operands 0 and 2 are tied together by the final condition; so we allow > ;; fairly lax checking on the second memory operation. > (define_insn "store_pair<mode>" > @@ -1119,6 +1183,28 @@ > (set_attr "mode" "<MODE>")] > ) > > +(define_peephole2 > + [(set (match_operand:GPF 0 "aarch64_mem_pair_operand") > + (match_operand:GPF 1 "register_operand")) > + (set (match_operand:GPF 2 "memory_operand") > + (match_operand:GPF 3 "register_operand"))] > + "GET_CODE (operands[0]) == MEM > + && GET_CODE (XEXP (operands[0], 0)) == PLUS > + && GET_CODE (XEXP (XEXP (operands[0], 0), 0)) == REG > + && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == CONST_INT > + && GET_MODE (operands[1]) == GET_MODE (XEXP (XEXP (operands[0], 0), 0)) > + && REGNO (operands[1]) != REGNO (operands[3]) > + && REGNO (operands[1]) >= 32 && REGNO (operands[3]) >= 32 > + && REGNO_REG_CLASS (REGNO (operands[1])) > + == REGNO_REG_CLASS (REGNO (operands[3])) > + && rtx_equal_p (XEXP (operands[2], 0), > + plus_constant (Pmode, XEXP (operands[0], 0), > + GET_MODE_SIZE (<MODE>mode))) > + && optimize_size" > + [(parallel [(set (match_dup 0) (match_dup 1)) > + (set (match_dup 2) (match_dup 3))])] > +) > + > ;; Load pair with writeback. This is primarily used in function epilogues > ;; when restoring [fp,lr] > (define_insn "loadwb_pair<GPI:mode>_<P:mode>" > diff -uprN '-x*.orig' mainline-orig/gcc/testsuite/gcc.target/aarch64/ldp-stp.c gcc-4.8.0/gcc/testsuite/gcc.target/aarch64/ldp-stp.c > --- mainline-orig/gcc/testsuite/gcc.target/aarch64/ldp-stp.c 1970-01-01 05:30:00.000000000 +0530 > +++ gcc-4.8.0/gcc/testsuite/gcc.target/aarch64/ldp-stp.c 2013-10-28 19:01:11.695986357 +0530 > @@ -0,0 +1,33 @@ > +/* { dg-options "-Os" } */ > + > +extern void abort (void); > + > +typedef struct > +{ > + long int x, y; > +} ldst; > + > +void > +f (ldst p0, ldst p1, ldst p2, ldst p3, ldst p4, ldst p5) > +{ > + if (p2.x != 1 || p2.y != -1 > + || p3.x != -1 || p3.y != 1 || p4.x != 0 || p4.y != -1) > + abort (); > +} > + > +void > +foo () > +{ > + ldst p0, p1, p2, p3, p4, p5; > + > + p4.x = 0; > + p4.y = -1; > + > + p5.x = 1; > + p5.y = 0; > + > + f (p0, p1, p2, p3, p4, p5); > +} > + > +/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]" 3 } } */ > +/* { dg-final { scan-assembler-times "stp\tx\[0-9\]+, x\[0-9\]" 3 } } */
diff -uprN '-x*.orig' mainline-orig/gcc/config/aarch64/aarch64.md gcc-4.8.0/gcc/config/aarch64/aarch64.md --- mainline-orig/gcc/config/aarch64/aarch64.md 2013-10-28 17:15:52.363975264 +0530 +++ gcc-4.8.0/gcc/config/aarch64/aarch64.md 2013-10-29 10:11:09.656082201 +0530 @@ -1068,6 +1068,27 @@ (set_attr "mode" "<MODE>")] ) +(define_peephole2 + [(set (match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "aarch64_mem_pair_operand")) + (set (match_operand:GPI 2 "register_operand") + (match_operand:GPI 3 "memory_operand"))] + "GET_CODE (operands[1]) == MEM + && GET_CODE (XEXP (operands[1], 0)) == PLUS + && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == REG + && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT + && GET_MODE (operands[0]) == GET_MODE (XEXP (XEXP (operands[1], 0), 0)) + && REGNO (operands[0]) != REGNO (operands[2]) + && REGNO_REG_CLASS (REGNO (operands[0])) + == REGNO_REG_CLASS (REGNO (operands[2])) + && rtx_equal_p (XEXP (operands[3], 0), + plus_constant (Pmode, XEXP (operands[1], 0), + GET_MODE_SIZE (<MODE>mode))) + && optimize_size" + [(parallel [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))])] +) + ;; Operands 0 and 2 are tied together by the final condition; so we allow ;; fairly lax checking on the second memory operation. (define_insn "store_pair<mode>" @@ -1085,6 +1106,27 @@ (set_attr "mode" "<MODE>")] ) +(define_peephole2 + [(set (match_operand:GPI 0 "aarch64_mem_pair_operand") + (match_operand:GPI 1 "register_operand")) + (set (match_operand:GPI 2 "memory_operand") + (match_operand:GPI 3 "register_operand"))] + "GET_CODE (operands[0]) == MEM + && GET_CODE (XEXP (operands[0], 0)) == PLUS + && GET_CODE (XEXP (XEXP (operands[0], 0), 0)) == REG + && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == CONST_INT + && GET_MODE (operands[1]) == GET_MODE (XEXP (XEXP (operands[0], 0), 0)) + && REGNO (operands[1]) != REGNO (operands[3]) + && REGNO_REG_CLASS (REGNO (operands[1])) + == REGNO_REG_CLASS (REGNO (operands[3])) + && rtx_equal_p (XEXP (operands[2], 0), + plus_constant (Pmode, XEXP (operands[0], 0), + GET_MODE_SIZE (<MODE>mode))) + && optimize_size" + [(parallel [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))])] +) + ;; Operands 1 and 3 are tied together by the final condition; so we allow ;; fairly lax checking on the second memory operation. (define_insn "load_pair<mode>" @@ -1102,6 +1144,28 @@ (set_attr "mode" "<MODE>")] ) +(define_peephole2 + [(set (match_operand:GPF 0 "register_operand") + (match_operand:GPF 1 "aarch64_mem_pair_operand")) + (set (match_operand:GPF 2 "register_operand") + (match_operand:GPF 3 "memory_operand"))] + "GET_CODE (operands[1]) == MEM + && GET_CODE (XEXP (operands[1], 0)) == PLUS + && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == REG + && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT + && GET_MODE (operands[0]) == GET_MODE (XEXP (XEXP (operands[1], 0), 0)) + && REGNO (operands[0]) != REGNO (operands[2]) + && REGNO (operands[0]) >= 32 && REGNO (operands[2]) >= 32 + && REGNO_REG_CLASS (REGNO (operands[0])) + == REGNO_REG_CLASS (REGNO (operands[2])) + && rtx_equal_p (XEXP (operands[3], 0), + plus_constant (Pmode, XEXP (operands[1], 0), + GET_MODE_SIZE (<MODE>mode))) + && optimize_size" + [(parallel [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))])] +) + ;; Operands 0 and 2 are tied together by the final condition; so we allow ;; fairly lax checking on the second memory operation. (define_insn "store_pair<mode>" @@ -1119,6 +1183,28 @@ (set_attr "mode" "<MODE>")] ) +(define_peephole2 + [(set (match_operand:GPF 0 "aarch64_mem_pair_operand") + (match_operand:GPF 1 "register_operand")) + (set (match_operand:GPF 2 "memory_operand") + (match_operand:GPF 3 "register_operand"))] + "GET_CODE (operands[0]) == MEM + && GET_CODE (XEXP (operands[0], 0)) == PLUS + && GET_CODE (XEXP (XEXP (operands[0], 0), 0)) == REG + && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == CONST_INT + && GET_MODE (operands[1]) == GET_MODE (XEXP (XEXP (operands[0], 0), 0)) + && REGNO (operands[1]) != REGNO (operands[3]) + && REGNO (operands[1]) >= 32 && REGNO (operands[3]) >= 32 + && REGNO_REG_CLASS (REGNO (operands[1])) + == REGNO_REG_CLASS (REGNO (operands[3])) + && rtx_equal_p (XEXP (operands[2], 0), + plus_constant (Pmode, XEXP (operands[0], 0), + GET_MODE_SIZE (<MODE>mode))) + && optimize_size" + [(parallel [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))])] +) + ;; Load pair with writeback. This is primarily used in function epilogues ;; when restoring [fp,lr] (define_insn "loadwb_pair<GPI:mode>_<P:mode>" diff -uprN '-x*.orig' mainline-orig/gcc/testsuite/gcc.target/aarch64/ldp-stp.c gcc-4.8.0/gcc/testsuite/gcc.target/aarch64/ldp-stp.c --- mainline-orig/gcc/testsuite/gcc.target/aarch64/ldp-stp.c 1970-01-01 05:30:00.000000000 +0530 +++ gcc-4.8.0/gcc/testsuite/gcc.target/aarch64/ldp-stp.c 2013-10-28 19:01:11.695986357 +0530 @@ -0,0 +1,33 @@ +/* { dg-options "-Os" } */ + +extern void abort (void); + +typedef struct +{ + long int x, y; +} ldst; + +void +f (ldst p0, ldst p1, ldst p2, ldst p3, ldst p4, ldst p5) +{ + if (p2.x != 1 || p2.y != -1 + || p3.x != -1 || p3.y != 1 || p4.x != 0 || p4.y != -1) + abort (); +} + +void +foo () +{ + ldst p0, p1, p2, p3, p4, p5; + + p4.x = 0; + p4.y = -1; + + p5.x = 1; + p5.y = 0; + + f (p0, p1, p2, p3, p4, p5); +} + +/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]" 3 } } */ +/* { dg-final { scan-assembler-times "stp\tx\[0-9\]+, x\[0-9\]" 3 } } */