Patchwork [ARM-Embedded-4_7/Commit] Fix size regression of regrename on Thumb2

login
register
mail settings
Submitter Bin Cheng
Date Nov. 30, 2012, 6:05 a.m.
Message ID <003601cdcec0$b9a91100$2cfb3300$@cheng@arm.com>
Download mbox | patch
Permalink /patch/202870/
State New
Headers show

Comments

Bin Cheng - Nov. 30, 2012, 6:05 a.m.
Hi,
I committed the patch fixing size regression of regrename on Thumb2 to ARM
Embedded-4_7 branch as r193980.

Thanks.

gcc/ChangeLog.arm
2012-11-30  Bin Cheng  <bin.cheng@arm.com>

	* config/arm/arm-protos.h (tune_params): Add
	preferred_renaming_class.
	* config/arm/arm.c (arm_slowmul_tune, arm_fastmul_tune)
	(arm_strongarm_tune, arm_xscale_tune, arm_9e_tune, arm_v6t2_tune)
	(arm_cortex_tune, arm_cortex_a5_tune, arm_cortex_a9_tune)
	(arm_cortex_v7m_tune, arm_cortex_v6m_tune, arm_fa726te_tune): Set
	preferred_renaming_class field.
	(arm_preferred_rename_class): Return preferred renaming register
	class.
	* config/arm/arm.md (*arm_addsi3, *arm_subsi3_insn, *arm_mulsi3_v6)
	(*arm_andsi3_insn, andsi_notsi_si, *iorsi3_insn, *arm_xorsi3)
	(*arm_shiftsi3): Add alternatives for Thumb2 set.
	* regrename.c (find_best_rename_reg): Don't rename preferred
	register to non-preferred register.

Patch

Index: gcc/regrename.c
===================================================================
--- gcc/regrename.c	(revision 193979)
+++ gcc/regrename.c	(revision 193980)
@@ -358,8 +358,9 @@ 
 {
   bool has_preferred_class;
   enum reg_class preferred_class;
-  int pass;
-  int best_new_reg = old_reg;
+  int new_reg;
+  int best_reg = old_reg;
+  int best_preferred_reg = old_reg;
 
   /* Further narrow the set of registers we can use for renaming.
      If the chain needs a call-saved register, mark the call-used
@@ -375,39 +376,36 @@ 
   preferred_class
     = (enum reg_class) targetm.preferred_rename_class (super_class);
 
-  /* If PREFERRED_CLASS is not NO_REGS, we iterate in the first pass
-     over registers that belong to PREFERRED_CLASS and try to find the
-     best register within the class.  If that failed, we iterate in
-     the second pass over registers that don't belong to the class.
-     If PREFERRED_CLASS is NO_REGS, we iterate over all registers in
-     ascending order without any preference.  */
+  /* If PREFERRED_CLASS is defined as register class other than NO_REGS:
+     we don't rename old_reg into non-preferred register if old_reg is in
+     PREFERRED_CLASS; otherwise we rename old_reg into preferred register
+     whenever possible, and only after that we try to rename it into other
+     registers.  */
   has_preferred_class = (preferred_class != NO_REGS);
-  for (pass = (has_preferred_class ? 0 : 1); pass < 2; pass++)
+  for (new_reg = 0; new_reg < FIRST_PSEUDO_REGISTER; new_reg++)
     {
-      int new_reg;
-      for (new_reg = 0; new_reg < FIRST_PSEUDO_REGISTER; new_reg++)
+      /* Don't rename to non-preferred register if old_reg is in
+	 PREFERRED_CLASS.  */
+      if (has_preferred_class
+	  && TEST_HARD_REG_BIT (reg_class_contents[preferred_class], old_reg)
+	  && !TEST_HARD_REG_BIT (reg_class_contents[preferred_class], new_reg))
+	continue;
+
+      if (check_new_reg_p (old_reg, new_reg, this_head, *unavailable))
 	{
+	  /* Record new_reg in best_preferred_reg if it's in PREFERRED_CLASS,
+	     otherwise record it in best_reg.  */
 	  if (has_preferred_class
-	      && (pass == 0)
-	      != TEST_HARD_REG_BIT (reg_class_contents[preferred_class],
-				    new_reg))
-	    continue;
-
-	  /* In the first pass, we force the renaming of registers that
-	     don't belong to PREFERRED_CLASS to registers that do, even
-	     though the latters were used not very long ago.  */
-	  if (check_new_reg_p (old_reg, new_reg, this_head,
-			       *unavailable)
-	      && ((pass == 0
-		   && !TEST_HARD_REG_BIT (reg_class_contents[preferred_class],
-					  best_new_reg))
-		  || tick[best_new_reg] > tick[new_reg]))
-	    best_new_reg = new_reg;
+	      && TEST_HARD_REG_BIT (reg_class_contents[preferred_class],
+				    new_reg)
+	      && tick[best_preferred_reg] > tick[new_reg])
+	    best_preferred_reg = new_reg;
+	  else if (tick[best_reg] > tick[new_reg])
+	    best_reg = new_reg;
 	}
-      if (pass == 0 && best_new_reg != old_reg)
-	break;
     }
-  return best_new_reg;
+
+  return (best_preferred_reg != old_reg) ? best_preferred_reg : best_reg;
 }
 
 /* Perform register renaming on the current function.  */
Index: gcc/ChangeLog.arm
===================================================================
--- gcc/ChangeLog.arm	(revision 193979)
+++ gcc/ChangeLog.arm	(revision 193980)
@@ -1,5 +1,22 @@ 
 2012-11-30  Bin Cheng  <bin.cheng@arm.com>
 
+	* config/arm/arm-protos.h (tune_params): Add
+	preferred_renaming_class.
+	* config/arm/arm.c (arm_slowmul_tune, arm_fastmul_tune)
+	(arm_strongarm_tune, arm_xscale_tune, arm_9e_tune, arm_v6t2_tune)
+	(arm_cortex_tune, arm_cortex_a5_tune, arm_cortex_a9_tune)
+	(arm_cortex_v7m_tune, arm_cortex_v6m_tune, arm_fa726te_tune): Set
+	preferred_renaming_class field.
+	(arm_preferred_rename_class): Return preferred renaming register
+	class.
+	* config/arm/arm.md (*arm_addsi3, *arm_subsi3_insn, *arm_mulsi3_v6)
+	(*arm_andsi3_insn, andsi_notsi_si, *iorsi3_insn, *arm_xorsi3)
+	(*arm_shiftsi3): Add alternatives for Thumb2 set.
+	* regrename.c (find_best_rename_reg): Don't rename preferred
+	register to non-preferred register.
+
+2012-11-30  Bin Cheng  <bin.cheng@arm.com>
+
 	* config/arm/arm.c (arm_option_override): Disable option
 	-fira-hoist-pressure on Thumb2.
 
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c	(revision 193979)
+++ gcc/config/arm/arm.c	(revision 193980)
@@ -883,6 +883,7 @@ 
   arm_default_branch_cost,
   arm_default_unroll_times,
   {true, true},					/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 const struct tune_params arm_fastmul_tune =
@@ -896,6 +897,7 @@ 
   arm_default_branch_cost,
   arm_default_unroll_times,
   {true, true},					/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 /* StrongARM has early execution of branches, so a sequence that is worth
@@ -912,6 +914,7 @@ 
   arm_default_branch_cost,
   arm_default_unroll_times,
   {true, true},					/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 const struct tune_params arm_xscale_tune =
@@ -925,6 +928,7 @@ 
   arm_default_branch_cost,
   arm_default_unroll_times,
   {true, true},					/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 const struct tune_params arm_9e_tune =
@@ -938,6 +942,7 @@ 
   arm_default_branch_cost,
   arm_default_unroll_times,
   {true, true},					/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 const struct tune_params arm_v6t2_tune =
@@ -951,6 +956,7 @@ 
   arm_default_branch_cost,
   arm_default_unroll_times,
   {true, true},					/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
@@ -965,6 +971,7 @@ 
   arm_default_branch_cost,
   arm_default_unroll_times,
   {true, true},					/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
@@ -981,6 +988,7 @@ 
   arm_cortex_a5_branch_cost,
   arm_default_unroll_times,
   {false, false},				/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 const struct tune_params arm_cortex_a9_tune =
@@ -994,6 +1002,7 @@ 
   arm_default_branch_cost,
   arm_default_unroll_times,
   {true, true},					/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
@@ -1008,6 +1017,7 @@ 
   arm_cortex_v7m_branch_cost,
   arm_cortex_m_unroll_times,
   {false, false},				/* Prefer non short circuit.  */
+  LO_REGS,					/* Preferred rename class.  */
 };
 
 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
@@ -1022,6 +1032,7 @@ 
   arm_default_branch_cost,
   arm_cortex_m_unroll_times,
   {false, false},				/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 const struct tune_params arm_fa726te_tune =
@@ -1035,6 +1046,7 @@ 
   arm_default_branch_cost,
   arm_default_unroll_times,
   {true, true},					/* Prefer non short circuit.  */
+  NO_REGS,					/* Preferred rename class.  */
 };
 
 
@@ -24834,10 +24846,11 @@ 
   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
      and code size can be reduced.  */
-  if (TARGET_THUMB2 && rclass == GENERAL_REGS)
-    return LO_REGS;
+  if (optimize_size)
+    return (TARGET_THUMB2 && reg_class_subset_p (rclass, CORE_REGS))
+	    ? LO_REGS : NO_REGS;
   else
-    return NO_REGS;
+    return TARGET_THUMB2 ? current_tune->preferred_renaming_class : NO_REGS;
 }
 
 /* Compute the atrribute "length" of insn "*push_multi".
Index: gcc/config/arm/arm-protos.h
===================================================================
--- gcc/config/arm/arm-protos.h	(revision 193979)
+++ gcc/config/arm/arm-protos.h	(revision 193980)
@@ -243,6 +243,8 @@ 
      performance. The first element covers Thumb state and the second one
      is for ARM state.  */
   bool logical_op_non_short_circuit[2];
+  /* Preferred reg class for register renaming.  */
+  enum reg_class preferred_renaming_class;
 };
 
 extern const struct tune_params *current_tune;
Index: gcc/config/arm/arm.md
===================================================================
--- gcc/config/arm/arm.md	(revision 193979)
+++ gcc/config/arm/arm.md	(revision 193980)
@@ -718,18 +718,20 @@ 
 ;;  (plus (reg rN) (reg sp)) into (reg rN).  In this case reload will
 ;; put the duplicated register first, and not try the commutative version.
 (define_insn_and_split "*arm_addsi3"
-  [(set (match_operand:SI          0 "s_register_operand" "=r, k,r,r, k, r, k,r, k, r")
-	(plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k, rk,k,rk,k, rk")
-		 (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,Pj,Pj,L, L,PJ,PJ,?n")))]
+  [(set (match_operand:SI          0 "s_register_operand" "=l, r, k,r,r, k, l, r, k,r, k, r")
+	(plus:SI (match_operand:SI 1 "s_register_operand" "%0, rk,k,r,rk,k, 0, rk,k,rk,k, rk")
+		 (match_operand:SI 2 "reg_or_int_operand" "Py,rI,rI,k,Pj,Pj,Pv,L, L,PJ,PJ,?n")))]
   "TARGET_32BIT"
   "@
    add%?\\t%0, %1, %2
    add%?\\t%0, %1, %2
+   add%?\\t%0, %1, %2
    add%?\\t%0, %2, %1
    addw%?\\t%0, %1, %2
    addw%?\\t%0, %1, %2
    sub%?\\t%0, %1, #%n2
    sub%?\\t%0, %1, #%n2
+   sub%?\\t%0, %1, #%n2
    subw%?\\t%0, %1, #%n2
    subw%?\\t%0, %1, #%n2
    #"
@@ -744,9 +746,9 @@ 
 		      operands[1], 0);
   DONE;
   "
-  [(set_attr "length" "4,4,4,4,4,4,4,4,4,16")
+  [(set_attr "length" "2,4,4,4,4,4,2,4,4,4,4,16")
    (set_attr "predicable" "yes")
-   (set_attr "arch" "*,*,*,t2,t2,*,*,t2,t2,*")]
+   (set_attr "arch" "t2,*,*,*,t2,t2,t2,*,*,t2,t2,*")]
 )
 
 (define_insn_and_split "*thumb1_addsi3"
@@ -1214,14 +1216,15 @@ 
 
 ; ??? Check Thumb-2 split length
 (define_insn_and_split "*arm_subsi3_insn"
-  [(set (match_operand:SI           0 "s_register_operand" "=r,r,rk,r")
-	(minus:SI (match_operand:SI 1 "reg_or_int_operand" "rI,r,k,?n")
-		  (match_operand:SI 2 "reg_or_int_operand" "r,rI,r, r")))]
+  [(set (match_operand:SI           0 "s_register_operand" "=r,l, r,rk,r")
+	(minus:SI (match_operand:SI 1 "reg_or_int_operand" "rI,0, r,k,?n")
+		  (match_operand:SI 2 "reg_or_int_operand" "r,Py,rI,r, r")))]
   "TARGET_32BIT"
   "@
    rsb%?\\t%0, %2, %1
    sub%?\\t%0, %1, %2
    sub%?\\t%0, %1, %2
+   sub%?\\t%0, %1, %2
    #"
   "&& (GET_CODE (operands[1]) == CONST_INT
        && !const_ok_for_arm (INTVAL (operands[1])))"
@@ -1231,8 +1234,9 @@ 
                       INTVAL (operands[1]), operands[0], operands[2], 0);
   DONE;
   "
-  [(set_attr "length" "4,4,4,16")
-   (set_attr "predicable" "yes")]
+  [(set_attr "length" "4,2,4,4,16")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "*,t2,*,*,*")]
 )
 
 (define_peephole2
@@ -1351,13 +1355,15 @@ 
 )
 
 (define_insn "*arm_mulsi3_v6"
-  [(set (match_operand:SI          0 "s_register_operand" "=r")
-	(mult:SI (match_operand:SI 1 "s_register_operand" "r")
-		 (match_operand:SI 2 "s_register_operand" "r")))]
+  [(set (match_operand:SI          0 "s_register_operand" "=l,r")
+	(mult:SI (match_operand:SI 1 "s_register_operand" "%l,r")
+		 (match_operand:SI 2 "s_register_operand" "0,r")))]
   "TARGET_32BIT && arm_arch6"
   "mul%?\\t%0, %1, %2"
-  [(set_attr "insn" "mul")
-   (set_attr "predicable" "yes")]
+  [(set_attr "length" "2,4")
+   (set_attr "insn" "mul")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "t2,*")]
 )
 
 ; Unfortunately with the Thumb the '&'/'0' trick can fails when operands 
@@ -2187,12 +2193,14 @@ 
 
 ; ??? Check split length for Thumb-2
 (define_insn_and_split "*arm_andsi3_insn"
-  [(set (match_operand:SI         0 "s_register_operand" "=r,r,r")
-	(and:SI (match_operand:SI 1 "s_register_operand" "r,r,r")
-		(match_operand:SI 2 "reg_or_int_operand" "rI,K,?n")))]
+  [(set (match_operand:SI         0 "s_register_operand" "=l,l,r,r,r")
+	(and:SI (match_operand:SI 1 "s_register_operand" "0, l,r,r,r")
+		(match_operand:SI 2 "reg_or_int_operand" "l, 0,rI,K,?n")))]
   "TARGET_32BIT"
   "@
    and%?\\t%0, %1, %2
+   and%?\\t%0, %2, %1
+   and%?\\t%0, %1, %2
    bic%?\\t%0, %1, #%B2
    #"
   "TARGET_32BIT
@@ -2205,8 +2213,9 @@ 
 	               INTVAL (operands[2]), operands[0], operands[1], 0);
   DONE;
   "
-  [(set_attr "length" "4,4,16")
-   (set_attr "predicable" "yes")]
+  [(set_attr "length" "2,2,4,4,16")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "t2,t2,*,*,*")]
 )
 
 (define_insn "*thumb1_andsi3_insn"
@@ -2783,12 +2792,14 @@ 
 )
   
 (define_insn "andsi_notsi_si"
-  [(set (match_operand:SI 0 "s_register_operand" "=r")
-	(and:SI (not:SI (match_operand:SI 2 "s_register_operand" "r"))
-		(match_operand:SI 1 "s_register_operand" "r")))]
+  [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+	(and:SI (not:SI (match_operand:SI 2 "s_register_operand" "l,r"))
+		(match_operand:SI 1 "s_register_operand" "0,r")))]
   "TARGET_32BIT"
   "bic%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")]
+  [(set_attr "length" "2,4")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "t2,*")]
 )
 
 (define_insn "thumb1_bicsi3"
@@ -2913,12 +2924,13 @@ 
 )
 
 (define_insn_and_split "*iorsi3_insn"
-  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
-	(ior:SI (match_operand:SI 1 "s_register_operand" "%r,r,r")
-		(match_operand:SI 2 "reg_or_int_operand" "rI,K,?n")))]
+  [(set (match_operand:SI 0 "s_register_operand" "=l,r,r,r")
+	(ior:SI (match_operand:SI 1 "s_register_operand" "%0,r,r,r")
+		(match_operand:SI 2 "reg_or_int_operand" "l,rI,K,?n")))]
   "TARGET_32BIT"
   "@
    orr%?\\t%0, %1, %2
+   orr%?\\t%0, %1, %2
    orn%?\\t%0, %1, #%B2
    #"
   "TARGET_32BIT
@@ -2931,8 +2943,8 @@ 
                       INTVAL (operands[2]), operands[0], operands[1], 0);
   DONE;
 }
-  [(set_attr "length" "4,4,16")
-   (set_attr "arch" "32,t2,32")
+  [(set_attr "length" "2,4,4,16")
+   (set_attr "arch" "t2,32,t2,32")
    (set_attr "predicable" "yes")])
 
 (define_insn "*thumb1_iorsi3_insn"
@@ -3051,12 +3063,13 @@ 
 )
 
 (define_insn_and_split "*arm_xorsi3"
-  [(set (match_operand:SI         0 "s_register_operand" "=r,r")
-	(xor:SI (match_operand:SI 1 "s_register_operand" "%r,r")
-		(match_operand:SI 2 "reg_or_int_operand" "rI,?n")))]
+  [(set (match_operand:SI         0 "s_register_operand" "=l,r,r")
+	(xor:SI (match_operand:SI 1 "s_register_operand" "%0,r,r")
+		(match_operand:SI 2 "reg_or_int_operand" "l,rI,?n")))]
   "TARGET_32BIT"
   "@
    eor%?\\t%0, %1, %2
+   eor%?\\t%0, %1, %2
    #"
   "TARGET_32BIT
    && GET_CODE (operands[2]) == CONST_INT
@@ -3067,8 +3080,9 @@ 
                       INTVAL (operands[2]), operands[0], operands[1], 0);
   DONE;
 }
-  [(set_attr "length" "4,16")
-   (set_attr "predicable" "yes")]
+  [(set_attr "length" "2,4,16")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "t2,*,*")]
 )
 
 (define_insn "*thumb1_xorsi3_insn"
@@ -3672,17 +3686,19 @@ 
 )
 
 (define_insn "*arm_shiftsi3"
-  [(set (match_operand:SI   0 "s_register_operand" "=r")
+  [(set (match_operand:SI   0 "s_register_operand" "=l,r")
 	(match_operator:SI  3 "shift_operator"
-	 [(match_operand:SI 1 "s_register_operand"  "r")
-	  (match_operand:SI 2 "reg_or_int_operand" "rM")]))]
+	 [(match_operand:SI 1 "s_register_operand"  "0,r")
+	  (match_operand:SI 2 "reg_or_int_operand" "l,rM")]))]
   "TARGET_32BIT"
   "* return arm_output_shift(operands, 0);"
-  [(set_attr "predicable" "yes")
+  [(set_attr "length" "2,4")
+   (set_attr "predicable" "yes")
    (set_attr "shift" "1")
    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
 		      (const_string "alu_shift")
-		      (const_string "alu_shift_reg")))]
+		      (const_string "alu_shift_reg")))
+   (set_attr "arch" "t2,*")]
 )
 
 (define_insn "*shiftsi3_compare0"