Patchwork [ARM,1/n] PR53447: optimizations of 64bit ALU operation with constant

login
register
mail settings
Submitter Carrot Wei
Date June 8, 2012, 9:12 a.m.
Message ID <CAEe8uEB8QyrkV2ntxPeF+V_FdEHvfpLJ-e==CbZA8-9tkzfBLw@mail.gmail.com>
Download mbox | patch
Permalink /patch/163756/
State New
Headers show

Comments

Carrot Wei - June 8, 2012, 9:12 a.m.
Hi

In rtl expression, substract a constant c is expressed as add a value -c, so it
is alse processed by adddi3, and I extend it more to handle a subtraction of
64bit constant. I created an insn pattern arm_subdi3_immediate to specifically
represent substraction with 64bit constant while continue keeping the add rtl
expression.

Tested on arm qemu with both arm/thumb modes. OK for trunk?

thanks
Carrot


2012-06-08  Wei Guozhi  <carrot@google.com>

	PR target/53447
	* gcc.target/arm/pr53447-1.c: New testcase.
	* gcc.target/arm/pr53447-5.c: New testcase.


2012-06-08  Wei Guozhi  <carrot@google.com>

	PR target/53447
	* config/arm/constraints.md (Dd): New constraint.
	* config/arm/predicates.md (arm_neg_immediate_di_operand): New
	predicate.
	* config/arm/arm.md (adddi3): Extend it to handle constants.
	(arm_subdi3_immediate): New insn pattern.
	(arm_adddi3): Extend it to handle constants.
	* config/arm/neon.md (adddi3_neon): Likewise.





On Wed, Jun 6, 2012 at 7:16 PM, Carrot Wei <carrot@google.com> wrote:
> In the original patch, if "add r0, c" is not possible, but "sub r0,
> -c" is possible, it will use the sub instruction. Although they
> generate same result, but they may generate different CF flag, and
> cause subsequent adc to compute out wrong result. So I updated the
> patch to avoid using sub instruction.
>
> Tested on arm qemu with both arm/thumb mode.
>
> thanks
> Carrot
>
>
> 2012-06-06  Wei Guozhi  <carrot@google.com>
>
>        PR target/53447
>        * gcc.target/arm/pr53447-1.c: New testcase.
>
>
> 2012-06-06  Wei Guozhi  <carrot@google.com>
>
>        PR target/53447
>        * config/arm/arm.md (adddi3): Extend it to handle constants.
>        (arm_adddi3): Likewise.
>        * config/arm/neon.md (adddi3_neon): Likewise.
>
>
> Index: testsuite/gcc.target/arm/pr53447-1.c
> ===================================================================
> --- testsuite/gcc.target/arm/pr53447-1.c        (revision 0)
> +++ testsuite/gcc.target/arm/pr53447-1.c        (revision 0)
> @@ -0,0 +1,8 @@
> +/* { dg-options "-O2" }  */
> +/* { dg-require-effective-target arm32 } */
> +/* { dg-final { scan-assembler-not "mov" } } */
> +
> +void t0p(long long * p)
> +{
> +  *p += 0x100000001;
> +}
> Index: config/arm/neon.md
> ===================================================================
> --- config/arm/neon.md  (revision 187751)
> +++ config/arm/neon.md  (working copy)
> @@ -588,9 +588,9 @@
>  )
>
>  (define_insn "adddi3_neon"
> -  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w")
> -        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w")
> -                 (match_operand:DI 2 "s_register_operand" "w,r,0,w")))
> +  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
> +        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
> +                 (match_operand:DI 2 "reg_or_int_operand" "w,r,0,w,r,Di,Di")))
>    (clobber (reg:CC CC_REGNUM))]
>   "TARGET_NEON"
>  {
> @@ -600,13 +600,16 @@
>     case 3: return "vadd.i64\t%P0, %P1, %P2";
>     case 1: return "#";
>     case 2: return "#";
> +    case 4: return "#";
> +    case 5: return "#";
> +    case 6: return "#";
>     default: gcc_unreachable ();
>     }
>  }
> -  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
> -   (set_attr "conds" "*,clob,clob,*")
> -   (set_attr "length" "*,8,8,*")
> -   (set_attr "arch" "nota8,*,*,onlya8")]
> +  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
> +   (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
> +   (set_attr "length" "*,8,8,*,8,8,8")
> +   (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
>  )
>
>  (define_insn "*sub<mode>3_neon"
> Index: config/arm/arm.md
> ===================================================================
> --- config/arm/arm.md   (revision 187751)
> +++ config/arm/arm.md   (working copy)
> @@ -574,10 +574,21 @@
>  [(parallel
>    [(set (match_operand:DI           0 "s_register_operand" "")
>          (plus:DI (match_operand:DI 1 "s_register_operand" "")
> -                  (match_operand:DI 2 "s_register_operand" "")))
> +                  (match_operand:DI 2 "reg_or_int_operand" "")))
>     (clobber (reg:CC CC_REGNUM))])]
>   "TARGET_EITHER"
>   "
> +  if (GET_CODE (operands[2]) == CONST_INT)
> +    {
> +      if (TARGET_32BIT && arm_const_double_by_immediates (operands[2]))
> +       {
> +         emit_insn (gen_arm_adddi3 (operands[0], operands[1], operands[2]));
> +         DONE;
> +       }
> +      else
> +       operands[2] = force_reg (DImode, operands[2]);
> +    }
> +
>   if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
>     {
>       if (!cirrus_fp_register (operands[0], DImode))
> @@ -609,10 +620,10 @@
>   [(set_attr "length" "4")]
>  )
>
> -(define_insn_and_split "*arm_adddi3"
> -  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r")
> -       (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0")
> -                (match_operand:DI 2 "s_register_operand" "r,  0")))
> +(define_insn_and_split "arm_adddi3"
> +  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r,&r,&r,&r")
> +       (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r")
> +                (match_operand:DI 2 "reg_or_int_operand" "r,  0, r, Di,Di")))
>    (clobber (reg:CC CC_REGNUM))]
>   "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) && !TARGET_NEON"
>   "#"
> @@ -630,8 +641,17 @@
>     operands[0] = gen_lowpart (SImode, operands[0]);
>     operands[4] = gen_highpart (SImode, operands[1]);
>     operands[1] = gen_lowpart (SImode, operands[1]);
> -    operands[5] = gen_highpart (SImode, operands[2]);
> -    operands[2] = gen_lowpart (SImode, operands[2]);
> +    if (GET_CODE (operands[2]) == CONST_INT)
> +      {
> +       HOST_WIDE_INT v = INTVAL (operands[2]);
> +       operands[5] = GEN_INT (ARM_SIGN_EXTEND ((v >> 32) & 0xFFFFFFFF));
> +       operands[2] = GEN_INT (ARM_SIGN_EXTEND (v & 0xFFFFFFFF));
> +      }
> +    else
> +      {
> +       operands[5] = gen_highpart (SImode, operands[2]);
> +       operands[2] = gen_lowpart (SImode, operands[2]);
> +      }
>   }"
>   [(set_attr "conds" "clob")
>    (set_attr "length" "8")]
>

Patch

Index: testsuite/gcc.target/arm/pr53447-1.c
===================================================================
--- testsuite/gcc.target/arm/pr53447-1.c	(revision 0)
+++ testsuite/gcc.target/arm/pr53447-1.c	(revision 0)
@@ -0,0 +1,8 @@ 
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+/* { dg-final { scan-assembler-not "mov" } } */
+
+void t0p(long long * p)
+{
+  *p += 0x100000001;
+}
Index: testsuite/gcc.target/arm/pr53447-5.c
===================================================================
--- testsuite/gcc.target/arm/pr53447-5.c	(revision 0)
+++ testsuite/gcc.target/arm/pr53447-5.c	(revision 0)
@@ -0,0 +1,8 @@ 
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+/* { dg-final { scan-assembler-not "mov" } } */
+
+void t0p(long long * p)
+{
+  *p -= 0x100000008;
+}
Index: config/arm/neon.md
===================================================================
--- config/arm/neon.md	(revision 187751)
+++ config/arm/neon.md	(working copy)
@@ -588,9 +588,9 @@ 
 )

 (define_insn "adddi3_neon"
-  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w")
-        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w")
-                 (match_operand:DI 2 "s_register_operand" "w,r,0,w")))
+  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
+        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
+                 (match_operand:DI 2 "arm_di_operand"     "w,r,0,w,r,Di,Di")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_NEON"
 {
@@ -600,13 +600,16 @@ 
     case 3: return "vadd.i64\t%P0, %P1, %P2";
     case 1: return "#";
     case 2: return "#";
+    case 4: return "#";
+    case 5: return "#";
+    case 6: return "#";
     default: gcc_unreachable ();
     }
 }
-  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
-   (set_attr "conds" "*,clob,clob,*")
-   (set_attr "length" "*,8,8,*")
-   (set_attr "arch" "nota8,*,*,onlya8")]
+  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
+   (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
+   (set_attr "length" "*,8,8,*,8,8,8")
+   (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
 )

 (define_insn "*sub<mode>3_neon"
Index: config/arm/constraints.md
===================================================================
--- config/arm/constraints.md	(revision 187751)
+++ config/arm/constraints.md	(working copy)
@@ -29,7 +29,7 @@ 
 ;; in Thumb-1 state: I, J, K, L, M, N, O

 ;; The following multi-letter normal constraints have been used:
-;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
+;; in ARM/Thumb-2 state: Da, Db, Dc, Dd, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
 ;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
 ;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py

@@ -251,6 +251,13 @@ 
       (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 4
 		   && !(optimize_size || arm_ld_sched)")))

+(define_constraint "Dd"
+ "@internal
+  In ARM/Thumb-2 state a const_int whose negative value satisfy constraint
+  Di."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT && arm_const_double_by_immediates
(GEN_INT (-ival))")))
+
 (define_constraint "Di"
  "@internal
   In ARM/Thumb-2 state a const_int or const_double where both the high
Index: config/arm/predicates.md
===================================================================
--- config/arm/predicates.md	(revision 187751)
+++ config/arm/predicates.md	(working copy)
@@ -117,6 +117,10 @@ 
   (and (match_code "const_int,const_double")
        (match_test "arm_const_double_by_immediates (op)")))

+(define_predicate "arm_neg_immediate_di_operand"
+  (and (match_code "const_int")
+       (match_test "arm_const_double_by_immediates (GEN_INT (-INTVAL (op)))")))
+
 (define_predicate "arm_neg_immediate_operand"
   (and (match_code "const_int")
        (match_test "const_ok_for_arm (-INTVAL (op))")))
Index: config/arm/arm.md
===================================================================
--- config/arm/arm.md	(revision 187751)
+++ config/arm/arm.md	(working copy)
@@ -574,10 +574,29 @@ 
  [(parallel
    [(set (match_operand:DI           0 "s_register_operand" "")
 	  (plus:DI (match_operand:DI 1 "s_register_operand" "")
-	           (match_operand:DI 2 "s_register_operand" "")))
+	           (match_operand:DI 2 "reg_or_int_operand" "")))
     (clobber (reg:CC CC_REGNUM))])]
   "TARGET_EITHER"
   "
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      rtx neg_val = GEN_INT (-INTVAL (operands[2]));
+      if (TARGET_32BIT && arm_const_double_by_immediates (operands[2]))
+	{
+	  emit_insn (gen_arm_adddi3 (operands[0], operands[1], operands[2]));
+	  DONE;
+	}
+      else if (TARGET_32BIT && arm_const_double_by_immediates (neg_val))
+	{
+	  emit_insn (gen_arm_subdi3_immediate (operands[0],
+					       operands[1],
+					       operands[2]));
+	  DONE;
+	}
+      else
+	operands[2] = force_reg (DImode, operands[2]);
+    }
+
   if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
     {
       if (!cirrus_fp_register (operands[0], DImode))
@@ -609,10 +628,10 @@ 
   [(set_attr "length" "4")]
 )

-(define_insn_and_split "*arm_adddi3"
-  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r")
-	(plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0")
-		 (match_operand:DI 2 "s_register_operand" "r,  0")))
+(define_insn_and_split "arm_adddi3"
+  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r,&r,&r,&r")
+	(plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r")
+		 (match_operand:DI 2 "arm_di_operand"     "r,  0, r, Di,Di")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) && !TARGET_NEON"
   "#"
@@ -630,8 +649,17 @@ 
     operands[0] = gen_lowpart (SImode, operands[0]);
     operands[4] = gen_highpart (SImode, operands[1]);
     operands[1] = gen_lowpart (SImode, operands[1]);
-    operands[5] = gen_highpart (SImode, operands[2]);
-    operands[2] = gen_lowpart (SImode, operands[2]);
+    if (GET_CODE (operands[2]) == CONST_INT)
+      {
+	HOST_WIDE_INT v = INTVAL (operands[2]);
+	operands[5] = GEN_INT (ARM_SIGN_EXTEND ((v >> 32) & 0xFFFFFFFF));
+	operands[2] = GEN_INT (ARM_SIGN_EXTEND (v & 0xFFFFFFFF));
+      }
+    else
+      {
+	operands[5] = gen_highpart (SImode, operands[2]);
+	operands[2] = gen_lowpart (SImode, operands[2]);
+      }
   }"
   [(set_attr "conds" "clob")
    (set_attr "length" "8")]
@@ -1122,6 +1150,25 @@ 
    (set_attr "length" "8")]
 )

+(define_insn "arm_subdi3_immediate"
+  [(set (match_operand:DI          0 "s_register_operand"           "=&r,&r")
+        (plus:DI (match_operand:DI 1 "s_register_operand"           "0, r")
+                 (match_operand:DI 2 "arm_neg_immediate_di_operand" "Dd,Dd")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT"
+  "*
+  {
+    HOST_WIDE_INT v = -INTVAL (operands[2]);
+    operands[3] = GEN_INT (ARM_SIGN_EXTEND ((v >> 32) & 0xFFFFFFFF));
+    operands[2] = GEN_INT (ARM_SIGN_EXTEND (v & 0xFFFFFFFF));
+    output_asm_insn (\"subs\\t%Q0, %Q1, %2\", operands);
+    output_asm_insn (\"sbc\\t%R0, %R1, %3\", operands);
+    return \"\";
+  }"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")]
+)
+
 (define_insn "*thumb_subdi3"
   [(set (match_operand:DI           0 "register_operand" "=l")
 	(minus:DI (match_operand:DI 1 "register_operand"  "0")