Patchwork [ARM,1/n] PR53447: optimizations of 64bit ALU operation with constant

login
register
mail settings
Submitter Carrot Wei
Date July 1, 2012, 3:29 p.m.
Message ID <CAEe8uED6R5Vo5TP-1ad3N5EsR-Zp43X6K6dCf3Hcemi54+TR8A@mail.gmail.com>
Download mbox | patch
Permalink /patch/168441/
State New
Headers show

Comments

Carrot Wei - July 1, 2012, 3:29 p.m.
On Fri, Jun 29, 2012 at 9:57 PM, Ramana Radhakrishnan
<ramana.radhakrishnan@linaro.org> wrote:
>
> On 29 June 2012 12:23, Carrot Wei <carrot@google.com> wrote:
> > Hi
> >
> > So the following is updated patch. Tested on qemu with arm/thumb modes
>
> Assuming this testing was with and without neon ? Because the patterns
> changed are different whether you use Neon or not.
>
Now the patch has been tested with all combination of arm/thumb
neon/non-neon modes.

> > without regression.
>
> Can you add some tests for all 4 cases ? See comments inline below for
> some changes ?
>
New test cases added.

> Ok with those changes if no regressions for above mentioned testing.
>
> > Index: config/arm/arm.md
> > ===================================================================
> > --- config/arm/arm.md   (revision 187751)
> > +++ config/arm/arm.md   (working copy)
> > @@ -574,7 +574,7 @@
> >  [(parallel
> >    [(set (match_operand:DI           0 "s_register_operand" "")
> >          (plus:DI (match_operand:DI 1 "s_register_operand" "")
> > -                  (match_operand:DI 2 "s_register_operand" "")))
> > +                  (match_operand:DI 2 "arm_adddi_operand"  "")))
> >     (clobber (reg:CC CC_REGNUM))])]
> >   "TARGET_EITHER"
> >   "
> > @@ -609,10 +609,10 @@
> >   [(set_attr "length" "4")]
> >  )
> >
> > -(define_insn_and_split "*arm_adddi3"
> > -  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r")
> > -       (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0")
> > -                (match_operand:DI 2 "s_register_operand" "r,  0")))
> > +(define_insn_and_split "arm_adddi3"
> > +  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r,&r,&r,&r")
> > +       (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r")
> > +                (match_operand:DI 2 "arm_adddi_operand"  "r,  0, r, Dd, Dd")))
> >    (clobber (reg:CC CC_REGNUM))]
> >   "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) && !TARGET_NEON"
> >   "#"
> > @@ -630,8 +630,17 @@
> >     operands[0] = gen_lowpart (SImode, operands[0]);
> >     operands[4] = gen_highpart (SImode, operands[1]);
> >     operands[1] = gen_lowpart (SImode, operands[1]);
> > -    operands[5] = gen_highpart (SImode, operands[2]);
> > -    operands[2] = gen_lowpart (SImode, operands[2]);
> > +    if (GET_CODE (operands[2]) == CONST_INT)
> > +      {
> > +       HOST_WIDE_INT v = INTVAL (operands[2]);
> > +       operands[5] = GEN_INT (ARM_SIGN_EXTEND ((v >> 32) & 0xFFFFFFFF));
> > +       operands[2] = GEN_INT (ARM_SIGN_EXTEND (v & 0xFFFFFFFF));
> > +      }
> > +    else
> > +      {
> > +       operands[5] = gen_highpart (SImode, operands[2]);
> > +       operands[2] = gen_lowpart (SImode, operands[2]);
> > +      }
>
> Instead
>
>
>  operands[5] = gen_highpart_mode (SImode, DImode, operands[2]);
>  operands[2] = gen_lowpart (SImode, operands[2]);
>
> So you don't need a check there.
>
A good method.
>
>
> >   }"
> >   [(set_attr "conds" "clob")
> >    (set_attr "length" "8")]
> > @@ -980,12 +989,14 @@
> >  )
> >
> >  (define_insn "*addsi3_carryin_<optab>"
> > -  [(set (match_operand:SI 0 "s_register_operand" "=r")
> > -       (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r")
> > -                         (match_operand:SI 2 "arm_rhs_operand" "rI"))
> > +  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
> > +       (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r,r")
> > +                         (match_operand:SI 2 "arm_not_operand" "rI,K"))
> >                 (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))))]
> >   "TARGET_32BIT"
> > -  "adc%?\\t%0, %1, %2"
> > +  "@
> > +   adc%?\\t%0, %1, %2
> > +   sbc%?\\t%0, %1, #%B2"
> >   [(set_attr "conds" "use")]
> >  )
>
> Any reason why you didn't consider making these changes to the
> *addsi3_carryin_alt2<optab> pattern ?
>
It's simply because of my ignorance.

thanks
Carrot

The actually committed patch is as following


2012-07-01  Wei Guozhi  <carrot@google.com>

	PR target/53447
	* gcc.target/arm/pr53447-1.c: New testcase.
	* gcc.target/arm/pr53447-2.c: New testcase.
	* gcc.target/arm/pr53447-3.c: New testcase.
	* gcc.target/arm/pr53447-4.c: New testcase.


2012-07-01  Wei Guozhi  <carrot@google.com>

	PR target/53447
	* config/arm/arm-protos.h (const_ok_for_dimode_op): New prototype.
	* config/arm/arm.c (const_ok_for_dimode_op): New function.
	* config/arm/constraints.md (Dd): New constraint.
	* config/arm/predicates.md (arm_adddi_operand): New predicate.
	* config/arm/arm.md (adddi3): Extend it to handle constants.
	(arm_adddi3): Likewise.
	(addsi3_carryin_<optab>): Extend it to handle sbc case.
	(addsi3_carryin_alt2_<optab>): Likewise.
	* config/arm/neon.md (adddi3_neon): Extend it to handle constants.

Patch

Index: testsuite/gcc.target/arm/pr53447-1.c
===================================================================
--- testsuite/gcc.target/arm/pr53447-1.c	(revision 0)
+++ testsuite/gcc.target/arm/pr53447-1.c	(revision 0)
@@ -0,0 +1,8 @@ 
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+/* { dg-final { scan-assembler-not "mov" } } */
+
+void t0p(long long * p)
+{
+  *p += 0x100000001;
+}
Index: testsuite/gcc.target/arm/pr53447-2.c
===================================================================
--- testsuite/gcc.target/arm/pr53447-2.c	(revision 0)
+++ testsuite/gcc.target/arm/pr53447-2.c	(revision 0)
@@ -0,0 +1,8 @@ 
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+/* { dg-final { scan-assembler-not "mov" } } */
+
+void t0p(long long * p)
+{
+  *p -= 0x100000008;
+}
Index: testsuite/gcc.target/arm/pr53447-3.c
===================================================================
--- testsuite/gcc.target/arm/pr53447-3.c	(revision 0)
+++ testsuite/gcc.target/arm/pr53447-3.c	(revision 0)
@@ -0,0 +1,9 @@ 
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+/* { dg-final { scan-assembler-not "mov" } } */
+
+
+void t0p(long long * p)
+{
+  *p +=0x1fffffff8;
+}
Index: testsuite/gcc.target/arm/pr53447-4.c
===================================================================
--- testsuite/gcc.target/arm/pr53447-4.c	(revision 0)
+++ testsuite/gcc.target/arm/pr53447-4.c	(revision 0)
@@ -0,0 +1,9 @@ 
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+/* { dg-final { scan-assembler-not "mov" } } */
+
+
+void t0p(long long * p)
+{
+  *p -=0x1fffffff8;
+}


Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c	(revision 189101)
+++ config/arm/arm.c	(working copy)
@@ -2507,6 +2507,28 @@ 
     }
 }

+/* Return true if I is a valid di mode constant for the operation CODE.  */
+int
+const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
+{
+  HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
+  HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
+  rtx hi = GEN_INT (hi_val);
+  rtx lo = GEN_INT (lo_val);
+
+  if (TARGET_THUMB1)
+    return 0;
+
+  switch (code)
+    {
+    case PLUS:
+      return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
+
+    default:
+      return 0;
+    }
+}
+
 /* Emit a sequence of insns to handle a large constant.
    CODE is the code of the operation required, it can be any of SET, PLUS,
    IOR, AND, XOR, MINUS;
Index: config/arm/arm-protos.h
===================================================================
--- config/arm/arm-protos.h	(revision 189101)
+++ config/arm/arm-protos.h	(working copy)
@@ -50,6 +50,7 @@ 
 extern bool arm_modes_tieable_p (enum machine_mode, enum machine_mode);
 extern int const_ok_for_arm (HOST_WIDE_INT);
 extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
+extern int const_ok_for_dimode_op (HOST_WIDE_INT, enum rtx_code);
 extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
 			       HOST_WIDE_INT, rtx, rtx, int);
 extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *);
Index: config/arm/neon.md
===================================================================
--- config/arm/neon.md	(revision 189101)
+++ config/arm/neon.md	(working copy)
@@ -587,9 +587,9 @@ 
 )

 (define_insn "adddi3_neon"
-  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w")
-        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w")
-                 (match_operand:DI 2 "s_register_operand" "w,r,0,w")))
+  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
+        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
+                 (match_operand:DI 2 "arm_adddi_operand"
"w,r,0,w,r,Dd,Dd")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_NEON"
 {
@@ -599,13 +599,16 @@ 
     case 3: return "vadd.i64\t%P0, %P1, %P2";
     case 1: return "#";
     case 2: return "#";
+    case 4: return "#";
+    case 5: return "#";
+    case 6: return "#";
     default: gcc_unreachable ();
     }
 }
-  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
-   (set_attr "conds" "*,clob,clob,*")
-   (set_attr "length" "*,8,8,*")
-   (set_attr "arch" "nota8,*,*,onlya8")]
+  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
+   (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
+   (set_attr "length" "*,8,8,*,8,8,8")
+   (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
 )

 (define_insn "*sub<mode>3_neon"
Index: config/arm/constraints.md
===================================================================
--- config/arm/constraints.md	(revision 189101)
+++ config/arm/constraints.md	(working copy)
@@ -31,7 +31,7 @@ 
 ;; 'H' was previously used for FPA.

 ;; The following multi-letter normal constraints have been used:
-;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
+;; in ARM/Thumb-2 state: Da, Db, Dc, Dd, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
 ;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
 ;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py

@@ -242,6 +242,12 @@ 
       (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 4
 		   && !(optimize_size || arm_ld_sched)")))

+(define_constraint "Dd"
+ "@internal
+  In ARM/Thumb-2 state a const_int that can be used by insn adddi."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, PLUS)")))
+
 (define_constraint "Di"
  "@internal
   In ARM/Thumb-2 state a const_int or const_double where both the high
Index: config/arm/predicates.md
===================================================================
--- config/arm/predicates.md	(revision 189101)
+++ config/arm/predicates.md	(working copy)
@@ -141,6 +141,11 @@ 
   (ior (match_operand 0 "arm_rhs_operand")
        (match_operand 0 "arm_neg_immediate_operand")))

+(define_predicate "arm_adddi_operand"
+  (ior (match_operand 0 "s_register_operand")
+       (and (match_code "const_int")
+	    (match_test "const_ok_for_dimode_op (INTVAL (op), PLUS)"))))
+
 (define_predicate "arm_addimm_operand"
   (ior (match_operand 0 "arm_immediate_operand")
        (match_operand 0 "arm_neg_immediate_operand")))
Index: config/arm/arm.md
===================================================================
--- config/arm/arm.md	(revision 189101)
+++ config/arm/arm.md	(working copy)
@@ -604,7 +604,7 @@ 
  [(parallel
    [(set (match_operand:DI           0 "s_register_operand" "")
 	  (plus:DI (match_operand:DI 1 "s_register_operand" "")
-	           (match_operand:DI 2 "s_register_operand" "")))
+	           (match_operand:DI 2 "arm_adddi_operand"  "")))
     (clobber (reg:CC CC_REGNUM))])]
   "TARGET_EITHER"
   "
@@ -630,9 +630,9 @@ 
 )

 (define_insn_and_split "*arm_adddi3"
-  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r")
-	(plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0")
-		 (match_operand:DI 2 "s_register_operand" "r,  0")))
+  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r,&r,&r,&r")
+	(plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r")
+		 (match_operand:DI 2 "arm_adddi_operand"  "r,  0, r, Dd, Dd")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_32BIT && !TARGET_NEON"
   "#"
@@ -650,7 +650,7 @@ 
     operands[0] = gen_lowpart (SImode, operands[0]);
     operands[4] = gen_highpart (SImode, operands[1]);
     operands[1] = gen_lowpart (SImode, operands[1]);
-    operands[5] = gen_highpart (SImode, operands[2]);
+    operands[5] = gen_highpart_mode (SImode, DImode, operands[2]);
     operands[2] = gen_lowpart (SImode, operands[2]);
   }"
   [(set_attr "conds" "clob")
@@ -1001,22 +1001,26 @@ 
 )

 (define_insn "*addsi3_carryin_<optab>"
-  [(set (match_operand:SI 0 "s_register_operand" "=r")
-	(plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r")
-			  (match_operand:SI 2 "arm_rhs_operand" "rI"))
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r,r")
+			  (match_operand:SI 2 "arm_not_operand" "rI,K"))
 		 (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))))]
   "TARGET_32BIT"
-  "adc%?\\t%0, %1, %2"
+  "@
+   adc%?\\t%0, %1, %2
+   sbc%?\\t%0, %1, #%B2"
   [(set_attr "conds" "use")]
 )

 (define_insn "*addsi3_carryin_alt2_<optab>"
-  [(set (match_operand:SI 0 "s_register_operand" "=r")
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
 	(plus:SI (plus:SI (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))
-			  (match_operand:SI 1 "s_register_operand" "%r"))
-		 (match_operand:SI 2 "arm_rhs_operand" "rI")))]
+			  (match_operand:SI 1 "s_register_operand" "%r,r"))
+		 (match_operand:SI 2 "arm_rhs_operand" "rI,K")))]
   "TARGET_32BIT"
-  "adc%?\\t%0, %1, %2"
+  "@
+   adc%?\\t%0, %1, %2
+   sbc%?\\t%0, %1, #%B2"
   [(set_attr "conds" "use")]
 )