Patchwork [GCC/ARM] Fix big size regression in register renaming for thumb2 instruction set

login
register
mail settings
Submitter Bin Cheng
Date Nov. 30, 2012, 5:01 a.m.
Message ID <002e01cdceb7$d4d37fa0$7e7a7ee0$@cheng@arm.com>
Download mbox | patch
Permalink /patch/202862/
State New
Headers show

Comments

Bin Cheng - Nov. 30, 2012, 5:01 a.m.
Hi,
Big code size regression is introduced by register renaming pass on thumb2
instruction set. The root cause comprises two aspects:
1. The pass renames LO_REGS into high registers, changing 2 bytes
instructions into 4 bytes. This is because function find_best_rename_reg
does not forbid renaming LO_REGS into high register.
2. The pass renames two operands instructions into three operand ones,
changing 2 bytes instructions into 4 bytes. Though renaming pass has already
taken two operands instructions into account, it needs matching constraint
alternatives for such instructions provided by backend, which are missed in
arm.md. Instruction patterns affected are as listed below:
	arm_addsi3
	arm_subsi3_insn
	arm_mulsi3_v6
	arm_andsi3_insn
	andsi_notsi_si
	iorsi3_insn
	arm_shiftsi3

This patch fixes the problems:
1. Change find_best_rename_reg function.
If PREFERRED_CLASS is defined as register class other than NO_REGS: we don't
rename old_reg into non-preferred register if old_reg is in PREFERRED_CLASS;
otherwise we rename old_reg into preferred register whenever possible, and
only after that we try to rename it into other registers. Also the loop is
reduced into one pass as a side-effect.
2. Add matching operand alternatives for mentioned patterns in arm.md.

With this patch, code size of CSiBE built with "-mthumb -mcpu=cortex-m3 -Os
-frename-registers" improved by 0.8% on trunk and now it is better than
register renaming disabled.

I tested the patches on cortex-m3 with and without register renaming, and on
x86 with register renaming.

I will apply this patch into ARM Embedded-4_7-branch and commit it to TRUNK
after approval.

Thanks.


2012-11-30  Bin Cheng  <bin.cheng@arm.com>

	* config/arm/arm-cores.def (cortex-m3, cortex-m4): Use v7m.
	* config/arm/arm-protos.h (tune_params): Add
	preferred_renaming_class.
	* config/arm/arm.c (arm_slowmul_tune, arm_fastmul_tune)
	(arm_strongarm_tune, arm_xscale_tune, arm_9e_tune, arm_v6t2_tune)
	(arm_cortex_tune, arm_cortex_a15_tune, arm_cortex_a5_tune)
	(arm_cortex_a9_tune, arm_v6m_tune, arm_fa726te_tune): Set
	preferred_renaming_class field.
	(arm_v7m_tune): New.
	(arm_preferred_rename_class): Return preferred renaming register
	class.
	* config/arm/arm.md (*arm_addsi3, *arm_subsi3_insn, *arm_mulsi3_v6)
	(*arm_andsi3_insn, andsi_notsi_si, *iorsi3_insn, *arm_xorsi3)
	(*arm_shiftsi3): Add alternatives for Thumb2 set.
	* regrename.c (find_best_rename_reg): Don't rename preferred
	register to non-preferred register.

Patch

Index: gcc/regrename.c
===================================================================
--- gcc/regrename.c	(revision 193920)
+++ gcc/regrename.c	(working copy)
@@ -357,8 +357,9 @@  find_best_rename_reg (du_head_p this_head, enum re
 {
   bool has_preferred_class;
   enum reg_class preferred_class;
-  int pass;
-  int best_new_reg = old_reg;
+  int new_reg;
+  int best_reg = old_reg;
+  int best_preferred_reg = old_reg;
 
   /* Further narrow the set of registers we can use for renaming.
      If the chain needs a call-saved register, mark the call-used
@@ -374,39 +375,36 @@  find_best_rename_reg (du_head_p this_head, enum re
   preferred_class
     = (enum reg_class) targetm.preferred_rename_class (super_class);
 
-  /* If PREFERRED_CLASS is not NO_REGS, we iterate in the first pass
-     over registers that belong to PREFERRED_CLASS and try to find the
-     best register within the class.  If that failed, we iterate in
-     the second pass over registers that don't belong to the class.
-     If PREFERRED_CLASS is NO_REGS, we iterate over all registers in
-     ascending order without any preference.  */
+  /* If PREFERRED_CLASS is defined as register class other than NO_REGS:
+     we don't rename old_reg into non-preferred register if old_reg is in
+     PREFERRED_CLASS; otherwise we rename old_reg into preferred register
+     whenever possible, and only after that we try to rename it into other
+     registers.  */
   has_preferred_class = (preferred_class != NO_REGS);
-  for (pass = (has_preferred_class ? 0 : 1); pass < 2; pass++)
+  for (new_reg = 0; new_reg < FIRST_PSEUDO_REGISTER; new_reg++)
     {
-      int new_reg;
-      for (new_reg = 0; new_reg < FIRST_PSEUDO_REGISTER; new_reg++)
+      /* Don't rename to non-preferred register if old_reg is in
+	 PREFERRED_CLASS.  */
+      if (has_preferred_class
+	  && TEST_HARD_REG_BIT (reg_class_contents[preferred_class], old_reg)
+	  && !TEST_HARD_REG_BIT (reg_class_contents[preferred_class], new_reg))
+	continue;
+
+      if (check_new_reg_p (old_reg, new_reg, this_head, *unavailable))
 	{
+	  /* Record new_reg in best_preferred_reg if it's in PREFERRED_CLASS,
+	     otherwise record it in best_reg.  */
 	  if (has_preferred_class
-	      && (pass == 0)
-	      != TEST_HARD_REG_BIT (reg_class_contents[preferred_class],
-				    new_reg))
-	    continue;
-
-	  /* In the first pass, we force the renaming of registers that
-	     don't belong to PREFERRED_CLASS to registers that do, even
-	     though the latters were used not very long ago.  */
-	  if (check_new_reg_p (old_reg, new_reg, this_head,
-			       *unavailable)
-	      && ((pass == 0
-		   && !TEST_HARD_REG_BIT (reg_class_contents[preferred_class],
-					  best_new_reg))
-		  || tick[best_new_reg] > tick[new_reg]))
-	    best_new_reg = new_reg;
+	      && TEST_HARD_REG_BIT (reg_class_contents[preferred_class],
+				    new_reg)
+	      && tick[best_preferred_reg] > tick[new_reg])
+	    best_preferred_reg = new_reg;
+	  else if (tick[best_reg] > tick[new_reg])
+	    best_reg = new_reg;
 	}
-      if (pass == 0 && best_new_reg != old_reg)
-	break;
     }
-  return best_new_reg;
+
+  return (best_preferred_reg != old_reg) ? best_preferred_reg : best_reg;
 }
 
 /* Perform register renaming on the current function.  */
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c	(revision 193920)
+++ gcc/config/arm/arm.c	(working copy)
@@ -895,6 +895,7 @@  const struct tune_params arm_slowmul_tune =
   arm_default_branch_cost,
   false,					/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 const struct tune_params arm_fastmul_tune =
@@ -908,6 +909,7 @@  const struct tune_params arm_fastmul_tune =
   arm_default_branch_cost,
   false,					/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 /* StrongARM has early execution of branches, so a sequence that is worth
@@ -924,6 +926,7 @@  const struct tune_params arm_strongarm_tune =
   arm_default_branch_cost,
   false,					/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 const struct tune_params arm_xscale_tune =
@@ -937,6 +940,7 @@  const struct tune_params arm_xscale_tune =
   arm_default_branch_cost,
   false,					/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 const struct tune_params arm_9e_tune =
@@ -950,6 +954,7 @@  const struct tune_params arm_9e_tune =
   arm_default_branch_cost,
   false,					/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 const struct tune_params arm_v6t2_tune =
@@ -963,6 +968,7 @@  const struct tune_params arm_v6t2_tune =
   arm_default_branch_cost,
   false,					/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
@@ -977,6 +983,7 @@  const struct tune_params arm_cortex_tune =
   arm_default_branch_cost,
   false,					/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 const struct tune_params arm_cortex_a15_tune =
@@ -990,6 +997,7 @@  const struct tune_params arm_cortex_a15_tune =
   arm_default_branch_cost,
   true,						/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
@@ -1006,6 +1014,7 @@  const struct tune_params arm_cortex_a5_tune =
   arm_cortex_a5_branch_cost,
   false,					/* Prefer LDRD/STRD.  */
   {false, false},				/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 const struct tune_params arm_cortex_a9_tune =
@@ -1019,7 +1028,22 @@  const struct tune_params arm_cortex_a9_tune =
   arm_default_branch_cost,
   false,					/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
+ 
+const struct tune_params arm_v7m_tune =
+{
+  arm_9e_rtx_costs,
+  NULL,
+  1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  false,					/* Prefer constant pool.  */
+  arm_default_branch_cost,
+  false,					/* Prefer LDRD/STRD.  */
+  {true, true},					/* Prefer non short circuit.  */
+  LO_REGS,					/* Preferred rename class.  */
+};
 
 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
    arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus.  */
@@ -1034,6 +1058,7 @@  const struct tune_params arm_v6m_tune =
   arm_default_branch_cost,
   false,					/* Prefer LDRD/STRD.  */
   {false, false},				/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 const struct tune_params arm_fa726te_tune =
@@ -1047,6 +1072,7 @@  const struct tune_params arm_fa726te_tune =
   arm_default_branch_cost,
   false,					/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 
@@ -25727,10 +25753,11 @@  arm_preferred_rename_class (reg_class_t rclass)
   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
      and code size can be reduced.  */
-  if (TARGET_THUMB2 && rclass == GENERAL_REGS)
-    return LO_REGS;
+  if (optimize_size)
+    return (TARGET_THUMB2 && reg_class_subset_p (rclass, CORE_REGS))
+	    ? LO_REGS : NO_REGS;
   else
-    return NO_REGS;
+    return TARGET_THUMB2 ? current_tune->preferred_renaming_class : NO_REGS;
 }
 
 /* Compute the atrribute "length" of insn "*push_multi".
Index: gcc/config/arm/arm-cores.def
===================================================================
--- gcc/config/arm/arm-cores.def	(revision 193920)
+++ gcc/config/arm/arm-cores.def	(working copy)
@@ -133,8 +133,8 @@  ARM_CORE("cortex-a15",	  cortexa15,	7A,				 FL_LDS
 ARM_CORE("cortex-r4",	  cortexr4,	7R,				 FL_LDSCHED, cortex)
 ARM_CORE("cortex-r4f",	  cortexr4f,	7R,				 FL_LDSCHED, cortex)
 ARM_CORE("cortex-r5",	  cortexr5,	7R,				 FL_LDSCHED | FL_ARM_DIV, cortex)
-ARM_CORE("cortex-m4",	  cortexm4,	7EM,				 FL_LDSCHED, cortex)
-ARM_CORE("cortex-m3",	  cortexm3,	7M,				 FL_LDSCHED, cortex)
+ARM_CORE("cortex-m4",	  cortexm4,	7EM,				 FL_LDSCHED, v7m)
+ARM_CORE("cortex-m3",	  cortexm3,	7M,				 FL_LDSCHED, v7m)
 ARM_CORE("cortex-m1",	  cortexm1,	6M,				 FL_LDSCHED, v6m)
 ARM_CORE("cortex-m0",	  cortexm0,	6M,				 FL_LDSCHED, v6m)
 ARM_CORE("cortex-m0plus", cortexm0plus,	6M,				 FL_LDSCHED, v6m)
Index: gcc/config/arm/arm-protos.h
===================================================================
--- gcc/config/arm/arm-protos.h	(revision 193920)
+++ gcc/config/arm/arm-protos.h	(working copy)
@@ -247,6 +247,8 @@  struct tune_params
      performance. The first element covers Thumb state and the second one
      is for ARM state.  */
   bool logical_op_non_short_circuit[2];
+  /* Preferred reg class for register renaming.  */
+  enum reg_class preferred_renaming_class;
 };
 
 extern const struct tune_params *current_tune;
Index: gcc/config/arm/arm.md
===================================================================
--- gcc/config/arm/arm.md	(revision 193920)
+++ gcc/config/arm/arm.md	(working copy)
@@ -780,11 +780,12 @@ 
 ;;  (plus (reg rN) (reg sp)) into (reg rN).  In this case reload will
 ;; put the duplicated register first, and not try the commutative version.
 (define_insn_and_split "*arm_addsi3"
-  [(set (match_operand:SI          0 "s_register_operand" "=rk, r,k, r,r, k, r, k,k,r, k, r")
-	(plus:SI (match_operand:SI 1 "s_register_operand" "%0, rk,k, r,rk,k, rk,k,r,rk,k, rk")
-		 (match_operand:SI 2 "reg_or_int_operand" "rk, rI,rI,k,Pj,Pj,L, L,L,PJ,PJ,?n")))]
+  [(set (match_operand:SI          0 "s_register_operand" "=l, rk, r,k, r,r, k, l, r, k,k,r, k, r")
+	(plus:SI (match_operand:SI 1 "s_register_operand" "%0, 0, rk,k, r,rk,k, 0, rk,k,r,rk,k, rk")
+		 (match_operand:SI 2 "reg_or_int_operand" "Py,rk, rI,rI,k,Pj,Pj,Pv,L, L,L,PJ,PJ,?n")))]
   "TARGET_32BIT"
   "@
+   add%?\\t%0, %1, %2
    add%?\\t%0, %0, %2
    add%?\\t%0, %1, %2
    add%?\\t%0, %1, %2
@@ -794,6 +795,7 @@ 
    sub%?\\t%0, %1, #%n2
    sub%?\\t%0, %1, #%n2
    sub%?\\t%0, %1, #%n2
+   sub%?\\t%0, %1, #%n2
    subw%?\\t%0, %1, #%n2
    subw%?\\t%0, %1, #%n2
    #"
@@ -808,9 +810,9 @@ 
 		      operands[1], 0);
   DONE;
   "
-  [(set_attr "length" "2,4,4,4,4,4,4,4,4,4,4,16")
+  [(set_attr "length" "2,2,4,4,4,4,4,2,4,4,4,4,4,16")
    (set_attr "predicable" "yes")
-   (set_attr "arch" "t2,*,*,*,t2,t2,*,*,a,t2,t2,*")]
+   (set_attr "arch" "t2,t2,*,*,*,t2,t2,t2,*,*,a,t2,t2,*")]
 )
 
 (define_insn_and_split "*thumb1_addsi3"
@@ -1267,14 +1269,15 @@ 
 
 ; ??? Check Thumb-2 split length
 (define_insn_and_split "*arm_subsi3_insn"
-  [(set (match_operand:SI           0 "s_register_operand" "=r,r,rk,r")
-	(minus:SI (match_operand:SI 1 "reg_or_int_operand" "rI,r,k,?n")
-		  (match_operand:SI 2 "reg_or_int_operand" "r,rI,r, r")))]
+  [(set (match_operand:SI           0 "s_register_operand" "=r,l, r,rk,r")
+	(minus:SI (match_operand:SI 1 "reg_or_int_operand" "rI,0, r,k,?n")
+		  (match_operand:SI 2 "reg_or_int_operand" "r,Py,rI,r, r")))]
   "TARGET_32BIT"
   "@
    rsb%?\\t%0, %2, %1
    sub%?\\t%0, %1, %2
    sub%?\\t%0, %1, %2
+   sub%?\\t%0, %1, %2
    #"
   "&& (CONST_INT_P (operands[1])
        && !const_ok_for_arm (INTVAL (operands[1])))"
@@ -1284,8 +1287,9 @@ 
                       INTVAL (operands[1]), operands[0], operands[2], 0);
   DONE;
   "
-  [(set_attr "length" "4,4,4,16")
-   (set_attr "predicable" "yes")]
+  [(set_attr "length" "4,2,4,4,16")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "*,t2,*,*,*")]
 )
 
 (define_peephole2
@@ -1390,13 +1394,15 @@ 
 )
 
 (define_insn "*arm_mulsi3_v6"
-  [(set (match_operand:SI          0 "s_register_operand" "=r")
-	(mult:SI (match_operand:SI 1 "s_register_operand" "r")
-		 (match_operand:SI 2 "s_register_operand" "r")))]
+  [(set (match_operand:SI          0 "s_register_operand" "=l,r")
+	(mult:SI (match_operand:SI 1 "s_register_operand" "%l,r")
+		 (match_operand:SI 2 "s_register_operand" "0, r")))]
   "TARGET_32BIT && arm_arch6"
   "mul%?\\t%0, %1, %2"
-  [(set_attr "insn" "mul")
-   (set_attr "predicable" "yes")]
+  [(set_attr "length" "2,4")
+   (set_attr "insn" "mul")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "t2,*")]
 )
 
 ; Unfortunately with the Thumb the '&'/'0' trick can fails when operands 
@@ -2204,12 +2210,14 @@ 
 
 ; ??? Check split length for Thumb-2
 (define_insn_and_split "*arm_andsi3_insn"
-  [(set (match_operand:SI         0 "s_register_operand" "=r,r,r")
-	(and:SI (match_operand:SI 1 "s_register_operand" "r,r,r")
-		(match_operand:SI 2 "reg_or_int_operand" "rI,K,?n")))]
+  [(set (match_operand:SI         0 "s_register_operand" "=l,l,r,r,r")
+	(and:SI (match_operand:SI 1 "s_register_operand" "0, l,r,r,r")
+		(match_operand:SI 2 "reg_or_int_operand" "l, 0,rI,K,?n")))]
   "TARGET_32BIT"
   "@
    and%?\\t%0, %1, %2
+   and%?\\t%0, %2, %1
+   and%?\\t%0, %1, %2
    bic%?\\t%0, %1, #%B2
    #"
   "TARGET_32BIT
@@ -2222,8 +2230,9 @@ 
 	               INTVAL (operands[2]), operands[0], operands[1], 0);
   DONE;
   "
-  [(set_attr "length" "4,4,16")
-   (set_attr "predicable" "yes")]
+  [(set_attr "length" "2,2,4,4,16")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "t2,t2,*,*,*")]
 )
 
 (define_insn "*thumb1_andsi3_insn"
@@ -2800,12 +2809,14 @@ 
 )
   
 (define_insn "andsi_notsi_si"
-  [(set (match_operand:SI 0 "s_register_operand" "=r")
-	(and:SI (not:SI (match_operand:SI 2 "s_register_operand" "r"))
-		(match_operand:SI 1 "s_register_operand" "r")))]
+  [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+	(and:SI (not:SI (match_operand:SI 2 "s_register_operand" "l,r"))
+		(match_operand:SI 1 "s_register_operand" "0,r")))]
   "TARGET_32BIT"
   "bic%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")]
+  [(set_attr "length" "2,4")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "t2,*")]
 )
 
 (define_insn "thumb1_bicsi3"
@@ -2930,12 +2941,13 @@ 
 )
 
 (define_insn_and_split "*iorsi3_insn"
-  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
-	(ior:SI (match_operand:SI 1 "s_register_operand" "%r,r,r")
-		(match_operand:SI 2 "reg_or_int_operand" "rI,K,?n")))]
+  [(set (match_operand:SI 0 "s_register_operand" "=l,r,r,r")
+	(ior:SI (match_operand:SI 1 "s_register_operand" "%0,r,r,r")
+		(match_operand:SI 2 "reg_or_int_operand" "l,rI,K,?n")))]
   "TARGET_32BIT"
   "@
    orr%?\\t%0, %1, %2
+   orr%?\\t%0, %1, %2
    orn%?\\t%0, %1, #%B2
    #"
   "TARGET_32BIT
@@ -2948,8 +2960,8 @@ 
                       INTVAL (operands[2]), operands[0], operands[1], 0);
   DONE;
 }
-  [(set_attr "length" "4,4,16")
-   (set_attr "arch" "32,t2,32")
+  [(set_attr "length" "2,4,4,16")
+   (set_attr "arch" "t2,32,t2,32")
    (set_attr "predicable" "yes")])
 
 (define_insn "*thumb1_iorsi3_insn"
@@ -3068,12 +3080,13 @@ 
 )
 
 (define_insn_and_split "*arm_xorsi3"
-  [(set (match_operand:SI         0 "s_register_operand" "=r,r")
-	(xor:SI (match_operand:SI 1 "s_register_operand" "%r,r")
-		(match_operand:SI 2 "reg_or_int_operand" "rI,?n")))]
+  [(set (match_operand:SI         0 "s_register_operand" "=l,r,r")
+	(xor:SI (match_operand:SI 1 "s_register_operand" "%0,r,r")
+		(match_operand:SI 2 "reg_or_int_operand" "l,rI,?n")))]
   "TARGET_32BIT"
   "@
    eor%?\\t%0, %1, %2
+   eor%?\\t%0, %1, %2
    #"
   "TARGET_32BIT
    && CONST_INT_P (operands[2])
@@ -3084,8 +3097,9 @@ 
                       INTVAL (operands[2]), operands[0], operands[1], 0);
   DONE;
 }
-  [(set_attr "length" "4,16")
-   (set_attr "predicable" "yes")]
+  [(set_attr "length" "2,4,16")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "t2,*,*")]
 )
 
 (define_insn "*thumb1_xorsi3_insn"
@@ -3818,17 +3832,19 @@ 
 )
 
 (define_insn "*arm_shiftsi3"
-  [(set (match_operand:SI   0 "s_register_operand" "=r")
+  [(set (match_operand:SI   0 "s_register_operand" "=l,r")
 	(match_operator:SI  3 "shift_operator"
-	 [(match_operand:SI 1 "s_register_operand"  "r")
-	  (match_operand:SI 2 "reg_or_int_operand" "rM")]))]
+	 [(match_operand:SI 1 "s_register_operand"  "0,r")
+	  (match_operand:SI 2 "reg_or_int_operand" "l,rM")]))]
   "TARGET_32BIT"
   "* return arm_output_shift(operands, 0);"
-  [(set_attr "predicable" "yes")
+  [(set_attr "length" "2,4")
+   (set_attr "predicable" "yes")
    (set_attr "shift" "1")
    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
 		      (const_string "alu_shift")
-		      (const_string "alu_shift_reg")))]
+		      (const_string "alu_shift_reg")))
+   (set_attr "arch" "t2,*")]
 )
 
 (define_insn "*shiftsi3_compare0"