Patchwork [PATCH:,PR,target/44072] Replace "cmp r0, -1" by "adds r0, 1" in thumb2

login
register
mail settings
Submitter Richard Earnshaw
Date June 19, 2010, 11:11 p.m.
Message ID <4C1D4EAC.8030103@buzzard.freeserve.co.uk>
Download mbox | patch
Permalink /patch/56251/
State New
Headers show

Comments

Richard Earnshaw - June 19, 2010, 11:11 p.m.
On 07/06/10 09:48, Carrot Wei wrote:
> Hi
>
> I've modified the patch a little. Please take another look.
>
> 1. Add code to clobber cc register.
> 2. Change the constraint "l,?h,0" to "l,0,h".
>
> It should be noted that the change of constraint doesn't bring the expected
> result. For the following simple code:
>
> if (x == -8)
>      foo1();
> 	
> GCC generates:
>
>         mov     ip, r0
>         cmp     ip, #-8
>         bne     .L1
>         ...
> 	
> It is even worse than current default behavior. Ian explained as:
>
> "At first glance, I would say that the third alternative does not
> require a scratch register whereas the second alternative does require
> one.  Therefore, the second alternative costs more.  Scratch registers
> are, rightly, expensive, because in the general case (though not in
> this tiny case) they require spilling values onto the stack."
>
> So gcc can't correctly model the costs of the second and third alternatives
> in a situation without high register pressure. I will open a bug entry after
> check in this patch.
>
> In practice it is not a big problem. Because -1 is the most frequently compared
> negative constant since it is usually used to indicate an error return value.
> Testing with CSiBE confirms this. There are only less than 10 functions get
> larger but more than 10 times number of functions get smaller. This patch also
> passed regression testing on qemu.
>
> thanks
> Guozhi
>
>
> ChangeLog:
> 2010-06-07  Wei Guozhi<carrot@google.com>
>
>          PR target/44072
>          * config/arm/thumb2.md (thumb2_cbranchsi4_scratch): New insn pattern.
>
> ChangeLog:
> 2010-06-07  Wei Guozhi<carrot@google.com>
>
>          PR target/44072
>          * gcc.target/arm/pr44072.c: New testcase.
>
> 	
> Index: thumb2.md
> ===================================================================
> --- thumb2.md   (revision 160356)
> +++ thumb2.md   (working copy)
> @@ -1591,3 +1591,75 @@
>                  (const_int 8)
>                  (const_int 10)))))]
>   )
> +
> +(define_insn "thumb2_cbranchsi4_scratch"
> +  [(set (pc) (if_then_else
> +             (match_operator 4 "arm_comparison_operator"
> +              [(match_operand:SI 1 "s_register_operand" "l,0,h")
> +               (match_operand:SI 2 "thumb1_cmpneg_operand" "Pt,Ps,Ps")])
> +             (label_ref (match_operand 3 "" ""))
> +             (pc)))
> +   (clobber (match_scratch:SI 0 "=l,l,X"))
> +   (clobber (reg:CC CC_REGNUM))]
> +  "TARGET_THUMB2"
> +  "*
> +  if (which_alternative == 2)
> +    {
> +      output_asm_insn (\"cmp\\t%1, %2\", operands);
> +      switch (get_attr_length (insn))
> +       {
> +         case 6:  return \"b%d4\\t%l3\";
> +         case 8:  return \"b%D4\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
> +         default: return \"b%D4\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
> +       }
> +    }
> +  else
> +    {
> +      if (which_alternative == 0)
> +       output_asm_insn (\"adds\\t%0, %1, #%n2\", operands);
> +      else
> +       output_asm_insn (\"adds\\t%0, #%n2\", operands);
> +
> +      switch (get_attr_length (insn))
> +       {
> +         case 4:  return \"b%d4\\t%l3\";
> +         case 6:  return \"b%D4\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
> +         default: return \"b%D4\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
> +       }
> +    }
> +  "
> +  [(set (attr "far_jump")
> +       (if_then_else
> +         (eq (symbol_ref ("which_alternative"))
> +                         (const_int 2))
> +         (if_then_else
> +           (eq_attr "length" "10")
> +           (const_string "yes")
> +           (const_string "no"))
> +         (if_then_else
> +           (eq_attr "length" "8")
> +           (const_string "yes")
> +           (const_string "no"))))
> +   (set (attr "length")
> +       (if_then_else
> +         (eq (symbol_ref ("which_alternative"))
> +                         (const_int 2))
> +         (if_then_else
> +           (and (ge (minus (match_dup 3) (pc)) (const_int -250))
> +                (le (minus (match_dup 3) (pc)) (const_int 256)))
> +           (const_int 6)
> +           (if_then_else
> +               (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
> +                    (le (minus (match_dup 3) (pc)) (const_int 2048)))
> +               (const_int 8)
> +               (const_int 10)))
> +         (if_then_else
> +           (and (ge (minus (match_dup 3) (pc)) (const_int -250))
> +                (le (minus (match_dup 3) (pc)) (const_int 256)))
> +           (const_int 4)
> +           (if_then_else
> +               (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
> +                    (le (minus (match_dup 3) (pc)) (const_int 2048)))
> +               (const_int 6)
> +               (const_int 8)))))]
> +)
>
> Index: pr44072.c
> ===================================================================
> --- pr44072.c   (revision 0)
> +++ pr44072.c   (revision 0)
> @@ -0,0 +1,9 @@
> +/* { dg-options "-march=armv7-a -mthumb -Os" }  */
> +/* { dg-final { scan-assembler "adds" } } */
> +
> +void foo1();
> +void bar5(int x)
> +{
> +  if (x == -1)
> +    foo1();
> +}
>
>
> On Thu, May 13, 2010 at 3:28 PM, Richard Earnshaw<rearnsha@arm.com>  wrote:
>> On Wed, 2010-05-12 at 16:43 +0800, Carrot Wei wrote:
>>> Hi
>>>
>>> Both "cmp r0, -1" and "adds r0, 1" have the same effect when used to set
>>> conditions for following branch. But adds with small positive immediate is a
>>> 16 bit instruction and cmp with negative immediate is a 32 bit instruction. So
>>> we prefer adds in thumb2.
>>>
>>> Thumb1 already has an insn pattern "cbranchsi4_scratch" to do this, this patch
>>> adds the corresponding insn pattern for thumb2.
>>>
>>> +
>>> +(define_insn "thumb2_cbranchsi4_scratch"
>>> +  [(set (pc) (if_then_else
>>> +             (match_operator 4 "arm_comparison_operator"
>>> +              [(match_operand:SI 1 "s_register_operand" "l,?h,0")
>>> +               (match_operand:SI 2 "thumb1_cmpneg_operand" "Pt,Ps,Ps")])
>>
>> You should put the constrains in the order that they are preferred,
>> rather than trying to force the behaviour with '?'.  So the constraints
>> for op1 should be "l,0,h" and the rest of the pattern adjusted to match.
>>
>> OK with that change.

My apologies for the delay responding to this, I've been pondering Ian's 
comments about the cost of adding an extra scratch register.  We really 
don't want to do that if there isn't one available for free (using CMN 
will be much cheaper).

I think the best way to handle that is the way we handle similar 
conversions to use 16-bit thumb instructions, namely to use peephole2. 
That runs after register allocation and will tell us quickly if we can 
either clobber the input operand, or if there's a suitable scratch 
register elsewhere that's available.

To that effect, I've just committed the attached patch.

R.

2010-06-19  Richard Earnshaw  <rearnsha@arm.com>

	PR target/44072
	* arm.md (cmpsi2_addneg): Prefer emitting adds to subs with a negative
	immediate.
	* constraints.md (Pw, Px): New constraints.
	* thumb2.md (cmpsi2_addneg peephole2): New peepholes.

	PR target/44072
	* gcc.target/arm/thumb2-cmpneg2add-1.c: New test.
	* gcc.target/arm/thumb2-cmpneg2add-2.c: New test.

Patch

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 628bd62..1608929 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -737,14 +737,14 @@ 
   [(set (reg:CC CC_REGNUM)
 	(compare:CC
 	 (match_operand:SI 1 "s_register_operand" "r,r")
-	 (match_operand:SI 2 "arm_addimm_operand" "I,L")))
+	 (match_operand:SI 2 "arm_addimm_operand" "L,I")))
    (set (match_operand:SI 0 "s_register_operand" "=r,r")
 	(plus:SI (match_dup 1)
-		 (match_operand:SI 3 "arm_addimm_operand" "L,I")))]
+		 (match_operand:SI 3 "arm_addimm_operand" "I,L")))]
   "TARGET_32BIT && INTVAL (operands[2]) == -INTVAL (operands[3])"
   "@
-   sub%.\\t%0, %1, %2
-   add%.\\t%0, %1, #%n2"
+   add%.\\t%0, %1, %3
+   sub%.\\t%0, %1, #%n3"
   [(set_attr "conds" "set")]
 )
 
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 575d0ac..6d6c77d 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -31,7 +31,7 @@ 
 ;; The following multi-letter normal constraints have been used:
 ;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy
 ;; in Thumb-1 state: Pa, Pb
-;; in Thumb-2 state: Ps, Pt, Pu, Pv
+;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px
 
 ;; The following memory constraints have been used:
 ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
@@ -168,6 +168,16 @@ 
   (and (match_code "const_int")
        (match_test "TARGET_THUMB2 && ival >= -255 && ival <= 0")))
 
+(define_constraint "Pw"
+  "@internal In Thumb-2 state a constant in the range -255 to -1"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && ival >= -255 && ival <= -1")))
+
+(define_constraint "Px"
+  "@internal In Thumb-2 state a constant in the range -7 to -1"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1")))
+
 (define_constraint "G"
  "In ARM/Thumb-2 state a valid FPA immediate constant."
  (and (match_code "const_double")
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 76a3b98..7045d14 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -1231,6 +1231,32 @@ 
    (set_attr "length" "2")]
 )
 
+(define_peephole2
+  [(set (match_operand:CC 0 "cc_register" "")
+	(compare:CC (match_operand:SI 1 "low_register_operand" "")
+		    (match_operand:SI 2 "const_int_operand" "")))]
+  "TARGET_THUMB2
+   && peep2_reg_dead_p (1, operands[1])
+   && satisfies_constraint_Pw (operands[2])"
+  [(parallel
+    [(set (match_dup 0) (compare:CC (match_dup 1) (match_dup 2)))
+     (set (match_dup 1) (plus:SI (match_dup 1) (match_dup 3)))])]
+  "operands[3] = GEN_INT (- INTVAL (operands[2]));"
+)
+
+(define_peephole2
+  [(match_scratch:SI 3 "l")
+   (set (match_operand:CC 0 "cc_register" "")
+	(compare:CC (match_operand:SI 1 "low_register_operand" "")
+		    (match_operand:SI 2 "const_int_operand" "")))]
+  "TARGET_THUMB2
+   && satisfies_constraint_Px (operands[2])"
+  [(parallel
+    [(set (match_dup 0) (compare:CC (match_dup 1) (match_dup 2)))
+     (set (match_dup 3) (plus:SI (match_dup 1) (match_dup 4)))])]
+  "operands[4] = GEN_INT (- INTVAL (operands[2]));"
+)
+
 (define_insn "*thumb2_addsi3_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
 	(compare:CC_NOOV
diff --git a/gcc/testsuite/gcc.target/arm/thumb2-cmpneg2add-1.c b/gcc/testsuite/gcc.target/arm/thumb2-cmpneg2add-1.c
new file mode 100644
index 0000000..d75f13a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/thumb2-cmpneg2add-1.c
@@ -0,0 +1,12 @@ 
+/* Use ADDS clobbering source operand, rather than CMN */
+/* { dg-options "-mthumb -Os" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-final { scan-assembler "adds" } } */
+/* { dg-final { scan-assembler-not "cmn" } } */
+
+void foo1(void);
+void bar5(int x)
+{
+  if (x == -15)
+    foo1();
+}
diff --git a/gcc/testsuite/gcc.target/arm/thumb2-cmpneg2add-2.c b/gcc/testsuite/gcc.target/arm/thumb2-cmpneg2add-2.c
new file mode 100644
index 0000000..358bc6e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/thumb2-cmpneg2add-2.c
@@ -0,0 +1,12 @@ 
+/* Use ADDS with a scratch, rather than CMN */
+/* { dg-options "-mthumb -Os" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-final { scan-assembler "adds" } } */
+/* { dg-final { scan-assembler-not "cmn" } } */
+
+void foo1(int);
+void bar5(int x)
+{
+  if (x == -1)
+    foo1(x);
+}