Patchwork [lra] patch to fix ppc32 code size degradation and a small clean up

login
register
mail settings
Submitter Vladimir Makarov
Date Aug. 26, 2011, 7:10 p.m.
Message ID <4E57EFBC.6060601@redhat.com>
Download mbox | patch
Permalink /patch/111825/
State New
Headers show

Comments

Vladimir Makarov - Aug. 26, 2011, 7:10 p.m.
LRA on ppc32 had some code size degradation in comparison with the 
reload pass.  The reason for that is systematic usage of moves from 
memory to memory through two integer registers for DFmode instead of one 
floating point register as reload does.

   The following patch solves the problem.  It is achieved by 
preferencing an insn alternative with smallest number of registers 
involved when higher priority rules (like # of needed reloads) have the 
same results.

   I wish I could use also register pressure information for choosing an 
alternative but unfortunately it will result in slower LRA because the 
info is not available at this subpass (constraints).

   Another wish would be to use insn length but again it needs (a 
temporary) transformation to final result insn which is not known yet at 
this stage because we did not assigned hard registers to reload pseudos 
or memory to spilled pseudos.

The patch also contains a clean up of function mark_not_eliminable.

The patch was bootstrapped on x86-64 and ppc64.

2011-08-26  Vladimir Makarov <vmakarov@redhat.com>

         * lra-constraints.c (best_reload_nregs): New variable.
         (process_alt_operands): Add preferences for smaller hard registers
         involved.  Increase reject for all failed non registers.

         * lra-eliminations.c (mark_not_eliminable): Add check on hard
         register before looping on eliminations.

Patch

Index: lra-constraints.c
===================================================================
--- lra-constraints.c	(revision 178120)
+++ lra-constraints.c	(working copy)
@@ -1143,6 +1143,10 @@  static int best_losers, best_overall;
 /* Number of small register classes used for operands of the best
    alternative.  */
 static int best_small_class_operands_num;
+/* Overall number hard registers used for reloads.  For example, on
+   some targets we need 2 general registers to reload DFmode and only
+   one floating point register.  */
+static int best_reload_nregs;
 /* Overall number reflecting distances of previous reloading the same
    value.  It is used to improve inheritance chances.  */
 static int best_reload_sum;
@@ -1415,7 +1419,7 @@  process_alt_operands (int only_alternati
   rtx no_subreg_operand[MAX_RECOG_OPERANDS], operand_reg[MAX_RECOG_OPERANDS];
   int hard_regno[MAX_RECOG_OPERANDS];
   enum machine_mode biggest_mode[MAX_RECOG_OPERANDS];
-  int reload_sum;
+  int reload_nregs, reload_sum;
 
   /* Calculate some data common for all alternatives to speed up the
      function.  */
@@ -1460,7 +1464,7 @@  process_alt_operands (int only_alternati
 	  (only_alternative >= 0 && nalt != only_alternative))
 	continue;
 
-      overall = losers = reject = reload_sum = 0;
+      overall = losers = reject = reload_nregs = reload_sum = 0;
       for (nop = 0; nop < n_operands; nop++)
 	reject += (curr_static_id
 		   ->operand_alternative[nalt * n_operands + nop].reject);
@@ -2003,7 +2007,7 @@  process_alt_operands (int only_alternati
 	      /* Input reloads can be inherited more often than output
 		 reloads can be removed, so penalize output
 		 reloads.  */
-	      if (curr_static_id->operand[nop].type != OP_IN)
+	      if (!REG_P (op) || curr_static_id->operand[nop].type != OP_IN)
 		reject++;
 	      /* SUBREGS ??? */
 	      if (this_alternative_matches >= 0)
@@ -2012,6 +2016,9 @@  process_alt_operands (int only_alternati
 		}
 	      else if (no_regs_p && ! this_alternative_offmemok && ! constmemok)
 		goto fail;
+
+	      if (! no_regs_p)
+		reload_nregs += ira_reg_class_max_nregs[this_alternative][mode];
 	    }
   
 	  if (early_clobber_p)
@@ -2128,7 +2135,9 @@  process_alt_operands (int only_alternati
 			  < best_small_class_operands_num
 			  || (small_class_operands_num
 			      == best_small_class_operands_num
-			      && best_reload_sum < reload_sum))))))
+			      && (reload_nregs < best_reload_nregs
+				  || (reload_nregs == best_reload_nregs
+				      && best_reload_sum < reload_sum))))))))
 	{
 	  for (nop = 0; nop < n_operands; nop++)
 	    {
@@ -2145,6 +2154,7 @@  process_alt_operands (int only_alternati
 	  best_overall = overall;
 	  best_losers = losers;
 	  best_small_class_operands_num = small_class_operands_num;
+	  best_reload_nregs = reload_nregs;
 	  best_reload_sum = reload_sum;
 	  goal_alt_number = nalt;
 	}
Index: lra-eliminations.c
===================================================================
--- lra-eliminations.c	(revision 178120)
+++ lra-eliminations.c	(working copy)
@@ -671,49 +671,46 @@  mark_not_eliminable (rtx x)
     case POST_DEC:
     case POST_MODIFY:
     case PRE_MODIFY:
-      /* If we modify the source of an elimination rule, disable it.  */
-      for (ep = reg_eliminate; ep < &reg_eliminate[NUM_ELIMINABLE_REGS]; ep++)
-	if (ep->from_rtx == XEXP (x, 0)
-	    || (ep->to_rtx == XEXP (x, 0)
-		&& ep->to_rtx != hard_frame_pointer_rtx))
-	  setup_can_eliminate (ep, false);
-
-      /* These two aren't unary operators.  */
-      if (code == POST_MODIFY || code == PRE_MODIFY)
-	break;
-
-      mark_not_eliminable (XEXP (x, 0));
-      return;
-
-    case SUBREG:
-      mark_not_eliminable (SUBREG_REG (x));
+      if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER)
+	/* If we modify the source of an elimination rule, disable it.  */
+	for (ep = reg_eliminate;
+	     ep < &reg_eliminate[NUM_ELIMINABLE_REGS];
+	       ep++)
+	  if (ep->from_rtx == XEXP (x, 0)
+	      || (ep->to_rtx == XEXP (x, 0)
+		  && ep->to_rtx != hard_frame_pointer_rtx))
+	    setup_can_eliminate (ep, false);
       return;
 
     case USE:
-      /* If using a register that is the source of an eliminate we still
-	 think can be performed, note it cannot be performed since we don't
-	 know how this register is used.  */
-      for (ep = reg_eliminate; ep < &reg_eliminate[NUM_ELIMINABLE_REGS]; ep++)
-	if (ep->from_rtx == XEXP (x, 0) && ep->to_rtx != hard_frame_pointer_rtx)
-	  setup_can_eliminate (ep, false);
-
-      mark_not_eliminable (XEXP (x, 0));
+      if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER)
+	/* If using a register that is the source of an eliminate we
+	   still think can be performed, note it cannot be performed
+	   since we don't know how this register is used.  */
+	for (ep = reg_eliminate;
+	     ep < &reg_eliminate[NUM_ELIMINABLE_REGS];
+	     ep++)
+	  if (ep->from_rtx == XEXP (x, 0)
+	      && ep->to_rtx != hard_frame_pointer_rtx)
+	    setup_can_eliminate (ep, false);
       return;
 
     case CLOBBER:
-      /* If clobbering a register that is the replacement register for an
-	 elimination we still think can be performed, note that it cannot
-	 be performed.  Otherwise, we need not be concerned about it.  */
-      for (ep = reg_eliminate; ep < &reg_eliminate[NUM_ELIMINABLE_REGS]; ep++)
-	if (ep->to_rtx == XEXP (x, 0) && ep->to_rtx != hard_frame_pointer_rtx)
-	  setup_can_eliminate (ep, false);
-
-      mark_not_eliminable (XEXP (x, 0));
+      if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER)
+	/* If clobbering a register that is the replacement register for an
+	   elimination we still think can be performed, note that it cannot
+	   be performed.  Otherwise, we need not be concerned about it.  */
+	for (ep = reg_eliminate;
+	     ep < &reg_eliminate[NUM_ELIMINABLE_REGS];
+	     ep++)
+	  if (ep->to_rtx == XEXP (x, 0)
+	      && ep->to_rtx != hard_frame_pointer_rtx)
+	    setup_can_eliminate (ep, false);
       return;
 
     case SET:
       /* Check for setting a register that we know about.  */
-      if (REG_P (SET_DEST (x)))
+      if (REG_P (SET_DEST (x)) && REGNO (SET_DEST (x)) < FIRST_PSEUDO_REGISTER)
 	{
 	  /* See if this is setting the replacement register for an
 	     elimination.