Patchwork [ARM] Improve use of conditional execution in thumb mode.

login
register
mail settings
Submitter Andrew Stubbs
Date Feb. 17, 2012, 3:30 p.m.
Message ID <4F3E72A1.9080600@codesourcery.com>
Download mbox | patch
Permalink /patch/141847/
State New
Headers show

Comments

Andrew Stubbs - Feb. 17, 2012, 3:30 p.m.
On 14/02/12 18:00, Andrew Stubbs wrote:
> On Tue 14 Feb 2012 17:35:36 GMT, Richard Earnshaw wrote:
>>> Bernds checked in a patch last year (or maybe even before that) to make
>>> the selection of flags clobbered insns run very late (certainly after
>>> condexec had run), so I'm not sure why you're not seeing this.
>>
>> Hm, you mentioned some peepholes. Try removing them....
>
> Hmmm, it seems you're right. The machine reorg pass now takes care of
> this. Well ... that was easy!
>
> Here's a patch to remove the obsolete peepholes. I've confirmed it works
> with the testcase.

Testing revealed that the new thumb_reorg code was not as thorough as 
the old peepholes at converting insns to 16-bit encodings.

This updated patch rewrites thumb_reorg to fix up all the missing cases, 
and adds a testcase to make sure it's all good. The parts that were in 
the old patch are unchanged.

I did initially try to insert the new cases into the old code, but the 
if conditions got way out of hand, and/or I ended up duplicating the 
active code. I've therefore recast the code to make it more scalable, 
but it boils down to the exact same thing.

I've got a full test run going again.

OK for 4.8, again?

Andrew

Patch

2012-02-17  Andrew Stubbs  <ams@codesourcery.com>

	gcc/
	* config/arm/arm.c (thumb2_reorg): Add complete support
	for 16-bit instructions.
	* config/arm/thumb2.md: Delete obsolete flag-clobbering peepholes.

	gcc/testsuite/
	* gcc.target/arm/thumb-16bit-ops.c: New file.
	* gcc.target/arm/thumb-ifcvt.c: New file.

---
 gcc/config/arm/arm.c                           |  132 +++++++++++++++----
 gcc/config/arm/thumb2.md                       |  107 ----------------
 gcc/testsuite/gcc.target/arm/thumb-16bit-ops.c |  164 ++++++++++++++++++++++++
 gcc/testsuite/gcc.target/arm/thumb-ifcvt.c     |   19 +++
 4 files changed, 286 insertions(+), 136 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/thumb-16bit-ops.c
 create mode 100644 gcc/testsuite/gcc.target/arm/thumb-ifcvt.c

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 0bded8d..c3a19e4 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -13246,47 +13246,121 @@  thumb2_reorg (void)
       FOR_BB_INSNS_REVERSE (bb, insn)
 	{
 	  if (NONJUMP_INSN_P (insn)
-	      && !REGNO_REG_SET_P (&live, CC_REGNUM))
+	      && !REGNO_REG_SET_P (&live, CC_REGNUM)
+	      && GET_CODE (PATTERN (insn)) == SET)
 	    {
+	      enum {SKIP, CONV, SWAP_CONV} action = SKIP;
 	      rtx pat = PATTERN (insn);
-	      if (GET_CODE (pat) == SET
-		  && low_register_operand (XEXP (pat, 0), SImode)
-		  && thumb_16bit_operator (XEXP (pat, 1), SImode)
-		  && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
-		  && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
+	      rtx dst = XEXP (pat, 0);
+	      rtx src = XEXP (pat, 1);
+	      rtx op0 = NULL_RTX, op1 = NULL_RTX;
+
+	      if (!OBJECT_P (src))
+		  op0 = XEXP (src, 0);
+
+	      if (BINARY_P (src))
+		  op1 = XEXP (src, 1);
+
+	      if (low_register_operand (dst, SImode))
 		{
-		  rtx dst = XEXP (pat, 0);
-		  rtx src = XEXP (pat, 1);
-		  rtx op0 = XEXP (src, 0);
-		  rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
-			     ? XEXP (src, 1) : NULL);
-
-		  if (rtx_equal_p (dst, op0)
-		      || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
+		  switch (GET_CODE (src))
 		    {
-		      rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
-		      rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
-		      rtvec vec = gen_rtvec (2, pat, clobber);
-
-		      PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
-		      INSN_CODE (insn) = -1;
+		    case PLUS:
+		    case MINUS:
+		      if (low_register_operand (op0, SImode))
+			{
+			  /* ADDS <Rd>,<Rn>,<Rm>  */
+			  if (low_register_operand (op1, SImode))
+			    action = CONV;
+			  /* ADDS <Rdn>,#<imm8>  */
+			  else if (rtx_equal_p (dst, op0)
+				   && CONST_INT_P (op1)
+				   && IN_RANGE (INTVAL (op1), 0, 255))
+			    action = CONV;
+			  /* ADDS <Rd>,<Rn>,#<imm3>  */
+			  else if (CONST_INT_P (op1)
+				   && IN_RANGE (INTVAL (op1), 0, 7))
+			    action = CONV;
+			}
+		      break;
+		    case MULT:
+		      /* MULS <Rdm>,<Rn>,<Rdm>
+			 As an exception to the rule, this is only used
+			 when optimizing for size since MULS is slow on all
+			 known implementations.  */
+		      if (!optimize_function_for_size_p (cfun))
+			break;
+		      /* else fall through.  */
+		    case AND:
+		    case IOR:
+		    case XOR:
+		      /* ANDS <Rdn>,<Rm>  */
+		      if (rtx_equal_p (dst, op0)
+			  && low_register_operand (op1, SImode))
+			action = CONV;
+		      else if (rtx_equal_p (dst, op1)
+			       && low_register_operand (op0, SImode))
+			action = SWAP_CONV;
+		      break;
+		    case ASHIFTRT:
+		    case ASHIFT:
+		    case LSHIFTRT:
+		      /* ASRS <Rdn>,<Rm> */
+		      if (rtx_equal_p (dst, op0)
+			  && low_register_operand (op1, SImode))
+			action = CONV;
+		      /* ASRS <Rd>,<Rm>,#<imm5> */
+		      else if (low_register_operand (op0, SImode)
+			       && CONST_INT_P (op1)
+			       && IN_RANGE (INTVAL (op1), 0, 31))
+			action = CONV;
+		      break;
+		    case ROTATERT:
+		      /* RORS <Rdn>,<Rm>  */
+		      if (rtx_equal_p (dst, op0)
+			  && low_register_operand (op1, SImode))
+			action = CONV;
+		      break;
+		    case NOT:
+		    case NEG:
+		      /* MVNS <Rd>,<Rm>  */
+		      if (low_register_operand (op0, SImode))
+			action = CONV;
+		      break;
+		    case CONST_INT:
+		      /* MOVS <Rd>,#<imm8>  */
+		      if (CONST_INT_P (src)
+			  && IN_RANGE (INTVAL (src), 0, 255))
+			action = CONV;
+		      break;
+		    case REG:
+		      /* MOVS and MOV<c> with registers have different
+			 encodings, so are not relevant here.  */
+		      break;
+		    default:
+		      break;
 		    }
-		  /* We can also handle a commutative operation where the
-		     second operand matches the destination.  */
-		  else if (op1 && rtx_equal_p (dst, op1))
-		    {
-		      rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
-		      rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
-		      rtvec vec;
+		}
+
+	      if (action != SKIP)
+		{
+		  rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
+		  rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
+		  rtvec vec;
 
+		  if (action == SWAP_CONV)
+		    {
 		      src = copy_rtx (src);
 		      XEXP (src, 0) = op1;
 		      XEXP (src, 1) = op0;
 		      pat = gen_rtx_SET (VOIDmode, dst, src);
 		      vec = gen_rtvec (2, pat, clobber);
-		      PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
-		      INSN_CODE (insn) = -1;
 		    }
+		  else /* action == CONV */
+		    vec = gen_rtvec (2, pat, clobber);
+
+		  PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
+		  INSN_CODE (insn) = -1;
 		}
 	    }
 
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 05585da..799a3df 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -677,26 +677,6 @@ 
    (set_attr "length" "2")]
 )
 
-;; Similarly for 16-bit shift instructions
-;; There is no 16-bit rotate by immediate instruction.
-(define_peephole2
-  [(set (match_operand:SI   0 "low_register_operand" "")
-	(match_operator:SI  3 "shift_operator"
-	 [(match_operand:SI 1 "low_register_operand" "")
-	  (match_operand:SI 2 "low_reg_or_int_operand" "")]))]
-  "TARGET_THUMB2
-   && peep2_regno_dead_p(0, CC_REGNUM)
-   && ((GET_CODE(operands[3]) != ROTATE && GET_CODE(operands[3]) != ROTATERT)
-       || REG_P(operands[2]))"
-  [(parallel
-    [(set (match_dup 0)
-	  (match_op_dup 3
-	   [(match_dup 1)
-	    (match_dup 2)]))
-     (clobber (reg:CC CC_REGNUM))])]
-  ""
-)
-
 (define_insn "*thumb2_shiftsi3_short"
   [(set (match_operand:SI   0 "low_register_operand" "=l")
 	(match_operator:SI  3 "shift_operator"
@@ -715,20 +695,6 @@ 
 		      (const_string "alu_shift_reg")))]
 )
 
-;; 16-bit load immediate
-(define_peephole2
-  [(set (match_operand:QHSI 0 "low_register_operand" "")
-	(match_operand:QHSI 1 "const_int_operand" ""))]
-  "TARGET_THUMB2
-   && peep2_regno_dead_p(0, CC_REGNUM)
-   && (unsigned HOST_WIDE_INT) INTVAL(operands[1]) < 256"
-  [(parallel
-    [(set (match_dup 0)
-	  (match_dup 1))
-     (clobber (reg:CC CC_REGNUM))])]
-  ""
-)
-
 (define_insn "*thumb2_mov<mode>_shortim"
   [(set (match_operand:QHSI 0 "low_register_operand" "=l")
 	(match_operand:QHSI 1 "const_int_operand" "I"))
@@ -739,24 +705,6 @@ 
    (set_attr "length" "2")]
 )
 
-;; 16-bit add/sub immediate
-(define_peephole2
-  [(set (match_operand:SI 0 "low_register_operand" "")
-	(plus:SI (match_operand:SI 1 "low_register_operand" "")
-		 (match_operand:SI 2 "const_int_operand" "")))]
-  "TARGET_THUMB2
-   && peep2_regno_dead_p(0, CC_REGNUM)
-   && ((rtx_equal_p(operands[0], operands[1])
-	&& INTVAL(operands[2]) > -256 && INTVAL(operands[2]) < 256)
-       || (INTVAL(operands[2]) > -8 && INTVAL(operands[2]) < 8))"
-  [(parallel
-    [(set (match_dup 0)
-	  (plus:SI (match_dup 1)
-		   (match_dup 2)))
-     (clobber (reg:CC CC_REGNUM))])]
-  ""
-)
-
 (define_insn "*thumb2_addsi_short"
   [(set (match_operand:SI 0 "low_register_operand" "=l,l")
 	(plus:SI (match_operand:SI 1 "low_register_operand" "l,0")
@@ -868,35 +816,6 @@ 
    (set_attr "length" "2,4")]
 )
 
-;; 16-bit encodings of "muls" and "mul<c>".  We only use these when
-;; optimizing for size since "muls" is slow on all known
-;; implementations and since "mul<c>" will be generated by
-;; "*arm_mulsi3_v6" anyhow.  The assembler will use a 16-bit encoding
-;; for "mul<c>" whenever possible anyhow.
-(define_peephole2
-  [(set (match_operand:SI 0 "low_register_operand" "")
-        (mult:SI (match_operand:SI 1 "low_register_operand" "")
-                 (match_dup 0)))]
-  "TARGET_THUMB2 && optimize_size && peep2_regno_dead_p (0, CC_REGNUM)"
-  [(parallel
-    [(set (match_dup 0)
-           (mult:SI (match_dup 0) (match_dup 1)))
-     (clobber (reg:CC CC_REGNUM))])]
-  ""
-)
-
-(define_peephole2
-  [(set (match_operand:SI 0 "low_register_operand" "")
-        (mult:SI (match_dup 0)
-	         (match_operand:SI 1 "low_register_operand" "")))]
-  "TARGET_THUMB2 && optimize_size && peep2_regno_dead_p (0, CC_REGNUM)"
-  [(parallel
-    [(set (match_dup 0)
-           (mult:SI (match_dup 0) (match_dup 1)))
-     (clobber (reg:CC CC_REGNUM))])]
-  ""
-)
-
 (define_insn "*thumb2_mulsi_short"
   [(set (match_operand:SI 0 "low_register_operand" "=l")
         (mult:SI (match_operand:SI 1 "low_register_operand" "%0")
@@ -979,19 +898,6 @@ 
 	    (const_int 8)))]
 )
 
-;; 16-bit complement
-(define_peephole2
-  [(set (match_operand:SI 0 "low_register_operand" "")
-	(not:SI (match_operand:SI 1 "low_register_operand" "")))]
-  "TARGET_THUMB2
-   && peep2_regno_dead_p(0, CC_REGNUM)"
-  [(parallel
-    [(set (match_dup 0)
-	  (not:SI (match_dup 1)))
-     (clobber (reg:CC CC_REGNUM))])]
-  ""
-)
-
 (define_insn "*thumb2_one_cmplsi2_short"
   [(set (match_operand:SI 0 "low_register_operand" "=l")
 	(not:SI (match_operand:SI 1 "low_register_operand" "l")))
@@ -1002,19 +908,6 @@ 
    (set_attr "length" "2")]
 )
 
-;; 16-bit negate
-(define_peephole2
-  [(set (match_operand:SI 0 "low_register_operand" "")
-	(neg:SI (match_operand:SI 1 "low_register_operand" "")))]
-  "TARGET_THUMB2
-   && peep2_regno_dead_p(0, CC_REGNUM)"
-  [(parallel
-    [(set (match_dup 0)
-	  (neg:SI (match_dup 1)))
-     (clobber (reg:CC CC_REGNUM))])]
-  ""
-)
-
 (define_insn "*thumb2_negsi2_short"
   [(set (match_operand:SI 0 "low_register_operand" "=l")
 	(neg:SI (match_operand:SI 1 "low_register_operand" "l")))
diff --git a/gcc/testsuite/gcc.target/arm/thumb-16bit-ops.c b/gcc/testsuite/gcc.target/arm/thumb-16bit-ops.c
new file mode 100644
index 0000000..3e43e4a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/thumb-16bit-ops.c
@@ -0,0 +1,164 @@ 
+/* Check that the compiler properly uses 16-bit encodings where available.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-options "-Os -fno-builtin" } */
+
+int
+f (int a, int b )
+{
+  return a + b;
+}
+
+/* { dg-final { scan-assembler "adds	r0, r0, r1" } } */
+
+int
+g1 (int a)
+{
+  return a + 255;
+}
+
+/* { dg-final { scan-assembler "adds	r0, r0, #255" } } */
+
+int
+g2 (int a)
+{
+  return a + 256;
+}
+
+/* { dg-final { scan-assembler "add	r0, r0, #256" } } */
+
+int
+h1 (int a, int b)
+{
+  return b + 7;
+}
+
+/* { dg-final { scan-assembler "adds	r0, r1, #7" } } */
+
+int
+h2 (int a, int b)
+{
+  return b + 8;
+}
+
+/* { dg-final { scan-assembler "add	r0, r1, #8" } } */
+
+int
+i (int a, int b)
+{
+  return b;
+}
+
+/* { dg-final { scan-assembler "mov	r0, r1" } } */
+
+int
+j1 ()
+{
+  return 255;
+}
+
+/* { dg-final { scan-assembler "movs	r0, #255" } } */
+
+int
+j2 ()
+{
+  return 256;
+}
+
+/* { dg-final { scan-assembler "mov	r0, #256" } } */
+
+int
+k (int a, int b)
+{
+  return b << 15;
+}
+
+/* { dg-final { scan-assembler "lsls	r0, r1, #15" } } */
+
+int
+l1 (int a, int b)
+{
+  return a << b;
+}
+
+/* { dg-final { scan-assembler "lsls	r0, r0, r1" } } */
+
+int
+l2 (int a, int b, int c)
+{
+  return b << c;
+}
+
+/* { dg-final { scan-assembler "lsl	r0, r1, r2" } } */
+
+int
+m (int a, int b)
+{
+  return b >> 15;
+}
+
+/* { dg-final { scan-assembler "asrs	r0, r1, #15" } } */
+
+int
+n1 (int a, int b)
+{
+  return a >> b;
+}
+
+/* { dg-final { scan-assembler "asrs	r0, r0, r1" } } */
+
+int
+n2 (int a, int b, int c)
+{
+  return b >> c;
+}
+
+/* { dg-final { scan-assembler "asr	r0, r1, r2" } } */
+
+unsigned int
+o (unsigned int a, unsigned int b)
+{
+  return b >> 15;
+}
+
+/* { dg-final { scan-assembler "lsrs	r0, r1, #15" } } */
+
+unsigned int
+p1 (unsigned int a, unsigned int b)
+{
+  return a >> b;
+}
+
+/* { dg-final { scan-assembler "lsrs	r0, r0, r1" } } */
+
+unsigned int
+p2 (unsigned int a, unsigned int b, unsigned int c)
+{
+  return b >> c;
+}
+
+/* { dg-final { scan-assembler "lsr	r0, r1, r2" } } */
+
+int
+q (int a, int b)
+{
+  return b * a;
+}
+
+/* { dg-final { scan-assembler "muls	r0, r1, r0" } } */
+
+int
+r (int a, int b)
+{
+  return ~b;
+}
+
+/* { dg-final { scan-assembler "mvns	r0, r1" } } */
+
+int
+s (int a, int b)
+{
+  return -b;
+}
+
+/* { dg-final { scan-assembler "negs	r0, r1" } } */
diff --git a/gcc/testsuite/gcc.target/arm/thumb-ifcvt.c b/gcc/testsuite/gcc.target/arm/thumb-ifcvt.c
new file mode 100644
index 0000000..b03bbce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/thumb-ifcvt.c
@@ -0,0 +1,19 @@ 
+/* Check that Thumb 16-bit shifts can be if-converted.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-options "-O2" } */
+
+int
+foo (int a, int b)
+{
+  if (a != b)
+    {
+      a = a << b;
+      a = a >> 1;
+    }
+
+  return a + b;
+}
+
+/* { dg-final { scan-assembler "lslne" } } */
+/* { dg-final { scan-assembler "asrne" } } */