Patchwork [SH] PR 54089 - Add support for rotcl instruction

login
register
mail settings
Submitter Oleg Endo
Date Nov. 5, 2012, 9:05 a.m.
Message ID <1352106318.8110.114.camel@yam-132-YW-E178-FTW>
Download mbox | patch
Permalink /patch/197164/
State New
Headers show

Comments

Oleg Endo - Nov. 5, 2012, 9:05 a.m.
Hello,

This patch adds support for SH's rotcl instruction.
While working on it, I've noticed that the DImode left shift by one insn
was not used anymore, and instead ended up as 'x + x'.  This
transformation was happening before/during RTL expansion.  The fix for
it was to adjust the costs for DImode plus/minus.

Tested on rev 193135 with
make -k check RUNTESTFLAGS="--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"

and no new failures.
OK?

Cheers,
Oleg

gcc/ChangeLog:

	PR target/54089
	* config/sh/sh.c (and_xor_ior_costs, addsubcosts): Double the 	
	costs for ops larger than SImode.
	* config/sh/sh.md (rotcl, *rotcl): New insns and splits.
	(ashldi3_k): Convert to insn_and_split and use new rotcl insn.

testsuite/ChangeLog:

	PR target/54089
	* gcc.target/sh/pr54089-8.c: New.
	* gcc.target/sh/pr54089-9.c: New.
Kaz Kojima - Nov. 6, 2012, 10:35 a.m.
Oleg Endo <oleg.endo@t-online.de> wrote:
> This patch adds support for SH's rotcl instruction.
> While working on it, I've noticed that the DImode left shift by one insn
> was not used anymore, and instead ended up as 'x + x'.  This
> transformation was happening before/during RTL expansion.  The fix for
> it was to adjust the costs for DImode plus/minus.
> 
> Tested on rev 193135 with
> make -k check RUNTESTFLAGS="--target_board=sh-sim
> \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"
> 
> and no new failures.
> OK?

OK.

Regards,
	kaz

Patch

Index: gcc/testsuite/gcc.target/sh/pr54089-9.c
===================================================================
--- gcc/testsuite/gcc.target/sh/pr54089-9.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr54089-9.c	(revision 0)
@@ -0,0 +1,63 @@ 
+/* Check that the rotcr instruction is generated.  */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O1" } */
+/* { dg-skip-if "" { "sh*-*-*" } { "-m5*"} { "" } }  */
+/* { dg-final { scan-assembler-times "rotcl" 4 } } */
+/* { dg-final { scan-assembler-not "movt" } } */
+/* { dg-final { scan-assembler-not "or\t" } } */
+/* { dg-final { scan-assembler-not "rotl" } } */
+/* { dg-final { scan-assembler-not "and" } } */
+
+typedef char bool;
+
+int
+test_00 (int* a, int* b)
+{
+  int i;
+  int r = 0;
+  for (i = 0; i < 16; ++i)
+    {
+      bool t = a[i] == b[i];
+      r = (r << 1) | t;
+    }
+  return r;
+}
+
+int
+test_01 (int* a, int* b)
+{
+  int i;
+  int r = 0;
+  for (i = 0; i < 16; ++i)
+    {
+      bool t = a[i] == b[i];
+      r = (r << 2) | t;
+    }
+  return r;
+}
+
+int
+test_02 (int* a, int* b)
+{
+  int i;
+  int r = 0;
+  for (i = 0; i < 16; ++i)
+    {
+      bool t = a[i] == b[i];
+      r = (r << 3) | t;
+    }
+  return r;
+}
+
+int
+test_03 (const bool* a)
+{
+  int i;
+  int r = 0;
+  for (i = 0; i < 16; ++i)
+    {
+      bool t = a[i];
+      r = (r << 1) | (t & 1);
+    }
+  return r;
+}
Index: gcc/testsuite/gcc.target/sh/pr54089-8.c
===================================================================
--- gcc/testsuite/gcc.target/sh/pr54089-8.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr54089-8.c	(revision 0)
@@ -0,0 +1,203 @@ 
+/* Check that the rotcl instruction is generated.  */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O1" } */
+/* { dg-skip-if "" { "sh*-*-*" } { "-m5*"} { "" } }  */
+/* { dg-final { scan-assembler-times "rotcl" 28 } } */
+
+typedef char bool;
+
+long long
+test_00 (long long a)
+{
+  return a << 1;
+}
+
+unsigned int
+test_01 (unsigned int a, int b, int c)
+{
+  bool r = b == c;
+  return ((a << 1) | r);
+}
+
+unsigned int
+test_02 (unsigned int a, int b, int c)
+{
+  bool r = b == c;
+  return ((a << 2) | r);
+}
+
+unsigned int
+test_03 (unsigned int a, int b, int c)
+{
+  bool r = b == c;
+  return ((a << 3) | r);
+}
+
+unsigned int
+test_04 (unsigned int a, int b, int c)
+{
+  bool r = b == c;
+  return ((a << 4) | r);
+}
+
+unsigned int
+test_05 (unsigned int a, int b, int c)
+{
+  bool r = b == c;
+  return ((a << 5) | r);
+}
+
+unsigned int
+test_06 (unsigned int a, int b, int c)
+{
+  bool r = b == c;
+  return ((a << 6) | r);
+}
+
+unsigned int
+test_07 (unsigned int a, int b, int c)
+{
+  bool r = b == c;
+  return ((a << 7) | r);
+}
+
+unsigned int
+test_08 (unsigned int a, int b, int c)
+{
+  bool r = b == c;
+  return ((a << 8) | r);
+}
+
+unsigned int
+test_09 (unsigned int a, int b, int c)
+{
+  bool r = b == c;
+  return ((a << 31) | r);
+}
+
+unsigned int
+test_10 (unsigned int a, int b)
+{
+  /* 1x shlr, 1x rotcl  */
+  return (a << 1) | (b & 1);
+}
+
+unsigned int
+test_11 (unsigned int a, int b)
+{
+  /* 1x shlr, 1x rotcl (+1x add as shll)  */
+  return (a << 2) | (b & 1);
+}
+
+unsigned int
+test_12 (unsigned int a, int b)
+{
+  /* 1x shlr, 1x shll2, 1x rotcl  */
+  return (a << 3) | (b & 1);
+}
+
+unsigned int
+test_13 (unsigned int a, int b)
+{
+  /* 1x shll, 1x rotcl  */
+  bool r = b < 0;
+  return (a << 1) | r;
+}
+
+unsigned int
+test_14 (unsigned int a, int b, int c)
+{
+  bool r = b != c;
+  return ((a << 1) | r);
+}
+
+unsigned int
+test_15 (unsigned int a, int b, int c)
+{
+  bool r = b != c;
+  return ((a << 11) | r);
+}
+
+unsigned int
+test_16 (unsigned int a, int b, int c)
+{
+  bool r = b != c;
+  return ((a << 3) | r);
+}
+
+unsigned int
+test_17 (unsigned int a, int b, int c)
+{
+  bool r = b != c;
+  return ((a << 4) | r);
+}
+
+unsigned int
+test_18 (unsigned int a, int b, int c)
+{
+  bool r = b != c;
+  return ((a << 5) | r);
+}
+
+unsigned int
+test_19 (unsigned int a, int b, int c)
+{
+  bool r = b != c;
+  return ((a << 6) | r);
+}
+
+unsigned int
+test_20 (unsigned int a, int b, int c)
+{
+  bool r = b != c;
+  return ((a << 7) | r);
+}
+
+unsigned int
+test_21 (unsigned int a, int b, int c)
+{
+  bool r = b != c;
+  return ((a << 8) | r);
+}
+
+unsigned int
+test_22 (unsigned int a, int b, int c)
+{
+  bool r = b != c;
+  return ((a << 31) | r);
+}
+
+unsigned int
+test_23 (unsigned int a, int b, int c)
+{
+  /* 1x shll, 1x rotcl  */
+  return (a >> 31) | (b << 13);
+}
+
+unsigned int
+test_24 (unsigned int a, unsigned int b)
+{
+  /* 1x shll, 1x rotcl  */
+  return (a >> 31) | (b << 1);
+}
+
+unsigned int
+test_25 (unsigned int a, unsigned int b)
+{
+  /* 1x shll, 1x rotcl  */
+  return (a >> 31) | (b << 3);
+}
+
+unsigned int
+test_26 (unsigned int a, unsigned int b)
+{
+  /* 1x shll, 1x rotcl  */
+  return (b << 3) | (a >> 31);
+}
+
+unsigned int
+test_27 (unsigned int a, unsigned int b)
+{
+  /* 1x shlr, 1x rotcl  */
+  return (a << 1) | ((b >> 4) & 1);
+}
Index: gcc/config/sh/sh.c
===================================================================
--- gcc/config/sh/sh.c	(revision 193135)
+++ gcc/config/sh/sh.c	(working copy)
@@ -3224,14 +3224,18 @@ 
 static inline int
 and_xor_ior_costs (rtx x, int code)
 {
-  int i;
+  /* On SH1-4 we have only max. SImode operations.
+     Double the cost for modes > SImode.  */
+  const int cost_scale = !TARGET_SHMEDIA
+			 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
+			 ? 2 : 1;
 
   /* A logical operation with two registers is a single cycle
      instruction.  */
   if (!CONST_INT_P (XEXP (x, 1)))
-    return 1;
+    return 1 * cost_scale;
 
-  i = INTVAL (XEXP (x, 1));
+  int i = INTVAL (XEXP (x, 1));
 
   if (TARGET_SHMEDIA)
     {
@@ -3244,19 +3248,19 @@ 
 
   /* These constants are single cycle extu.[bw] instructions.  */
   if ((i == 0xff || i == 0xffff) && code == AND)
-    return 1;
+    return 1 * cost_scale;
   /* Constants that can be used in an instruction as an immediate are
      a single cycle, but this requires r0, so make it a little more
      expensive.  */
   if (CONST_OK_FOR_K08 (i))
-    return 2;
+    return 2 * cost_scale;
   /* Constants that can be loaded with a mov immediate need one more cycle.
      This case is probably unnecessary.  */
   if (CONST_OK_FOR_I08 (i))
-    return 2;
+    return 2 * cost_scale;
   /* Any other constant requires an additional 2 cycle pc-relative load.
      This case is probably unnecessary.  */
-  return 3;
+  return 3 * cost_scale;
 }
 
 /* Return the cost of an addition or a subtraction.  */
@@ -3264,15 +3268,21 @@ 
 static inline int
 addsubcosts (rtx x)
 {
+  /* On SH1-4 we have only max. SImode operations.
+     Double the cost for modes > SImode.  */
+  const int cost_scale = !TARGET_SHMEDIA
+			 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
+			 ? 2 : 1;
+
   /* Adding a register is a single cycle insn.  */
   if (REG_P (XEXP (x, 1))
       || GET_CODE (XEXP (x, 1)) == SUBREG)
-    return 1;
+    return 1 * cost_scale;
 
   /* Likewise for small constants.  */
   if (CONST_INT_P (XEXP (x, 1))
       && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
-    return 1;
+    return 1 * cost_scale;
 
   if (TARGET_SHMEDIA)
     switch (GET_CODE (XEXP (x, 1)))
@@ -3297,7 +3307,7 @@ 
 
   /* Any other constant requires a 2 cycle pc-relative load plus an
      addition.  */
-  return 3;
+  return 3 * cost_scale;
 }
 
 /* Return the cost of a multiply.  */
Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md	(revision 193135)
+++ gcc/config/sh/sh.md	(working copy)
@@ -4006,10 +4006,11 @@ 
     FAIL;
 })
 
-;; The rotcr insn is used primarily in DImode right shifts (arithmetic
-;; and logical).  It can also be used to implement things like
+;; The rotcr and rotcl insns are used primarily in DImode shifts by one.
+;; They can also be used to implement things like
 ;;	bool t = a == b;
-;;	int x = (y >> 1) | (t << 31);
+;;	int x0 = (y >> 1) | (t << 31);	// rotcr
+;;	int x1 = (y << 1) | t;		// rotcl
 (define_insn "rotcr"
   [(set (match_operand:SI 0 "arith_reg_dest" "=r")
 	(ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
@@ -4022,6 +4023,17 @@ 
   "rotcr	%0"
   [(set_attr "type" "arith")])
 
+(define_insn "rotcl"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			   (const_int 1))
+		(match_operand:SI 2 "t_reg_operand")))
+   (set (reg:SI T_REG)
+	(lshiftrt:SI (match_dup 1) (const_int 31)))]
+  "TARGET_SH1"
+  "rotcl	%0"
+  [(set_attr "type" "arith")])
+
 ;; Simplified rotcr version for combine, which allows arbitrary shift
 ;; amounts for the reg.  If the shift amount is '1' rotcr can be used
 ;; directly.  Otherwise we have to insert a shift in between.
@@ -4121,6 +4133,160 @@ 
 			   (ashift:SI (match_dup 1) (const_int 31))))
 	      (clobber (reg:SI T_REG))])])
 
+;; Basically the same as the rotcr pattern above, but for rotcl.
+;; FIXME: Fold copy pasted split code for rotcr and rotcl.
+(define_insn_and_split "*rotcl"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand")
+			   (match_operand:SI 2 "const_int_operand"))
+		(and:SI (match_operand:SI 3 "arith_reg_or_t_reg_operand")
+			(const_int 1))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  gcc_assert (INTVAL (operands[2]) > 0);
+
+  if (INTVAL (operands[2]) > 1)
+    {
+      const rtx shift_count = GEN_INT (INTVAL (operands[2]) - 1);
+      rtx prev_set_t_insn = NULL_RTX;
+      rtx tmp_t_reg = NULL_RTX;
+
+      /* If we're going to emit a shift sequence that clobbers the T_REG,
+	 try to find the previous insn that sets the T_REG and emit the 
+	 shift insn before that insn, to remove the T_REG dependency.
+	 If the insn that sets the T_REG cannot be found, store the T_REG
+	 in a temporary reg and restore it after the shift.  */
+      if (sh_ashlsi_clobbers_t_reg_p (shift_count)
+	  && ! sh_dynamicalize_shift_p (shift_count))
+	{
+	  prev_set_t_insn = prev_nonnote_insn_bb (curr_insn);
+
+	  /* Skip the nott insn, which was probably inserted by the splitter
+	     of *rotcl_neg_t.  Don't use one of the recog functions
+	     here during insn splitting, since that causes problems in later
+	     passes.  */
+	  if (prev_set_t_insn != NULL_RTX)
+	    {
+	      rtx pat = PATTERN (prev_set_t_insn);
+	      if (GET_CODE (pat) == SET
+		  && t_reg_operand (XEXP (pat, 0), SImode)
+		  && negt_reg_operand (XEXP (pat, 1), SImode))
+	      prev_set_t_insn = prev_nonnote_insn_bb (prev_set_t_insn);
+	    }
+
+	  if (! (prev_set_t_insn != NULL_RTX
+		 && reg_set_p (get_t_reg_rtx (), prev_set_t_insn)
+		 && ! reg_referenced_p (get_t_reg_rtx (),
+					PATTERN (prev_set_t_insn))))
+	    {
+	      prev_set_t_insn = NULL_RTX;
+	      tmp_t_reg = gen_reg_rtx (SImode);
+	      emit_insn (gen_move_insn (tmp_t_reg, get_t_reg_rtx ()));
+	    } 
+	}
+
+      rtx shift_result = gen_reg_rtx (SImode);
+      rtx shift_insn = gen_ashlsi3 (shift_result, operands[1], shift_count);
+      operands[1] = shift_result;
+
+      /* Emit the shift insn before the insn that sets T_REG, if possible.  */
+      if (prev_set_t_insn != NULL_RTX)
+	emit_insn_before (shift_insn, prev_set_t_insn);
+      else
+	emit_insn (shift_insn);
+
+      /* Restore T_REG if it has been saved before.  */
+      if (tmp_t_reg != NULL_RTX)
+	emit_insn (gen_cmpgtsi_t (tmp_t_reg, const0_rtx));
+    }
+
+  /* For the rotcl insn to work, operands[3] must be in T_REG.
+     If it is not we can get it there by shifting it right one bit.
+     In this case T_REG is not an input for this insn, thus we don't have to
+     pay attention as of where to insert the shlr insn.  */
+  if (! t_reg_operand (operands[3], SImode))
+    {
+      /* We don't care about the shifted result here, only the T_REG.  */
+      emit_insn (gen_shlr (gen_reg_rtx (SImode), operands[3]));
+      operands[3] = get_t_reg_rtx ();
+    }
+
+  emit_insn (gen_rotcl (operands[0], operands[1], operands[3]));
+  DONE;
+})
+
+;; rotcl combine pattern variations
+(define_insn_and_split "*rotcl"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand")
+			   (match_operand:SI 2 "const_int_operand"))
+		(match_operand:SI 3 "t_reg_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (ior:SI (ashift:SI (match_dup 1) (match_dup 2))
+			   (and:SI (match_dup 3) (const_int 1))))
+	      (clobber (reg:SI T_REG))])])
+
+(define_insn_and_split "*rotcl"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (and:SI (match_operand:SI 1 "arith_reg_or_t_reg_operand")
+			(const_int 1))
+		(ashift:SI (match_operand:SI 2 "arith_reg_operand")
+			   (match_operand:SI 3 "const_int_operand"))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (ior:SI (ashift:SI (match_dup 2) (match_dup 3))
+			   (and:SI (match_dup 1) (const_int 1))))
+	      (clobber (reg:SI T_REG))])])
+
+(define_insn_and_split "*rotcl"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand")
+			   (match_operand:SI 2 "const_int_operand"))
+		(lshiftrt:SI (match_operand:SI 3 "arith_reg_operand")
+			     (const_int 31))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (ior:SI (ashift:SI (match_dup 1) (match_dup 2))
+			   (and:SI (reg:SI T_REG) (const_int 1))))
+	      (clobber (reg:SI T_REG))])]
+{
+  /* We don't care about the result of the left shift, only the T_REG.  */
+  emit_insn (gen_shll (gen_reg_rtx (SImode), operands[3]));
+})
+
+(define_insn_and_split "*rotcl"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (lshiftrt:SI (match_operand:SI 3 "arith_reg_operand")
+			     (const_int 31))
+		(ashift:SI (match_operand:SI 1 "arith_reg_operand")
+			   (match_operand:SI 2 "const_int_operand"))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (ior:SI (ashift:SI (match_dup 1) (match_dup 2))
+			   (and:SI (reg:SI T_REG) (const_int 1))))
+	      (clobber (reg:SI T_REG))])]
+{
+  /* We don't care about the result of the left shift, only the T_REG.  */
+  emit_insn (gen_shll (gen_reg_rtx (SImode), operands[3]));
+})
+
 ;; rotcr combine bridge pattern which will make combine try out more
 ;; complex patterns.
 (define_insn_and_split "*rotcr"
@@ -4189,6 +4355,35 @@ 
   emit_insn (gen_nott (get_t_reg_rtx ()));
 })
 
+;; rotcl combine patterns for rotating in the negated T_REG value.
+;; For some strange reason these have to be specified as splits which combine
+;; will pick up.  If they are specified as insn_and_split like the
+;; *rotcr_neg_t patterns above, combine would recognize them successfully
+;; but not emit them on non-SH2A targets.
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (match_operand:SI 1 "negt_reg_operand")
+		(ashift:SI (match_operand:SI 2 "arith_reg_operand")
+			   (match_operand:SI 3 "const_int_operand"))))]
+  "TARGET_SH1"
+  [(set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))
+   (parallel [(set (match_dup 0)
+		   (ior:SI (ashift:SI (match_dup 2) (match_dup 3))
+			   (and:SI (reg:SI T_REG) (const_int 1))))
+	      (clobber (reg:SI T_REG))])])
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (ashift:SI (match_operand:SI 2 "arith_reg_operand")
+			   (match_operand:SI 3 "const_int_operand"))
+		(match_operand:SI 1 "negt_reg_operand")))]
+  "TARGET_SH1"
+  [(set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))
+   (parallel [(set (match_dup 0)
+		   (ior:SI (ashift:SI (match_dup 2) (match_dup 3))
+			   (and:SI (reg:SI T_REG) (const_int 1))))
+	      (clobber (reg:SI T_REG))])])
+
 ;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
 ;; SImode shift left
 
@@ -4480,16 +4675,22 @@ 
   DONE;
 })
 
-;; This should be an define_insn_and_split.
-(define_insn "ashldi3_k"
+(define_insn_and_split "ashldi3_k"
   [(set (match_operand:DI 0 "arith_reg_dest" "=r")
 	(ashift:DI (match_operand:DI 1 "arith_reg_operand" "0")
 		   (const_int 1)))
    (clobber (reg:SI T_REG))]
   "TARGET_SH1"
-  "shll	%R0\;rotcl	%S0"
-  [(set_attr "length" "4")
-   (set_attr "type" "arith")])
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx high = gen_highpart (SImode, operands[0]);
+  rtx low = gen_lowpart (SImode, operands[0]);
+  emit_insn (gen_shll (low, low));
+  emit_insn (gen_rotcl (high, high, get_t_reg_rtx ()));
+  DONE;
+})
 
 (define_insn "ashldi3_media"
   [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")