Patchwork [lra] patch to speed up LRA

login
register
mail settings
Submitter Vladimir Makarov
Date June 9, 2011, 3:56 p.m.
Message ID <4DF0ED14.4090402@redhat.com>
Download mbox | patch
Permalink /patch/99778/
State New
Headers show

Comments

Vladimir Makarov - June 9, 2011, 3:56 p.m.
The following patch removes coalescing when it is not necessary, 
decrease number of insns to process for constraints, fix a typo, and 
implements Joseph Myers proposals.

The patch was successfully bootstraped (with LRA which is default) on 
x86-64, itanium, and ppc64.

2011-06-09  Vladimir Makarov <vmakarov@redhat.com>

         * lra-spills.c: Don't include toplev.h.

         * lra-eliminations.c: Ditto.

         * lra-lives.c: Ditto.

         * lra-saves.c: Ditto.

         * lra-coalesce.c: Ditto.

         * lra-equivs.c: Ditto.

         * lra.c: Ditto.
         (lra): Skip coalescing after spills and some assignments. Remove
         live range creation before spills.

         * lra-constraints.c: Don't include toplev.h.
         (inherit_reload_reg): Invalidate used
         alternative for changed insn.

         * lra-assigns.c: Don't include toplev.h.
         (assign_by_spills): Remove parameter. Clear all_spilled_pseudos if
         necessary.  Fix typo in setting changed_pseudo_bitmap.  Don't set
         up changed_pseudo_bitmap for reload pseudos.
         (lra_assign): Return flag of spilling non-reload and
         non-inheritance pseudos.

         * Makefile.in (lra.o, lra-assigns.o, lra-coalesce.o): Remove
         dependence on toplev.h.
         (lra-constraints.o, lra-eliminations.o, lra-lives.o, 
lra-saves.o): Ditto.
         (lra-spills.o): Ditto.

         * lra-int.h (lra_assign): Don't include toplev.h.
         Change return type.
         * target.def (register_bank): Move description from tm.texi.in.

         * doc/tm.texi.in (TARGET_REGISTER_BANK): Move description to 
target.def.

         * doc/tm.texi: Update.

Patch

Index: doc/tm.texi
===================================================================
--- doc/tm.texi	(revision 174485)
+++ doc/tm.texi	(working copy)
@@ -2858,16 +2858,7 @@  as below:
 @end defmac
 
 @deftypefn {Target Hook} int TARGET_REGISTER_BANK (int)
-A target hook which returns the register bank number to which the
-register @var{hard_regno} belongs to.  The smaller the number, the
-more preferable the hard register usage (when all other conditions are
-the same).  This hook can be used to prefer some hard register over
-others in LRA.  For example, some x86-64 register usage needs
-additional prefix which makes instructions longer.  The hook can
-return bigger bank number for such registers make them less favorable
-and as result making the generated code smaller.
-
-The default version of this target hook returns always zero.
+A target hook which returns the register bank number to which the  register @var{hard_regno} belongs to.  The smaller the number, the  more preferable the hard register usage (when all other conditions are  the same).  This hook can be used to prefer some hard register over  others in LRA.  For example, some x86-64 register usage needs  additional prefix which makes instructions longer.  The hook can  return bigger bank number for such registers make them less favorable  and as result making the generated code smaller.    The default version of this target hook returns always zero.
 @end deftypefn
 
 @node Old Constraints
Index: doc/tm.texi.in
===================================================================
--- doc/tm.texi.in	(revision 174485)
+++ doc/tm.texi.in	(working copy)
@@ -2846,17 +2846,6 @@  as below:
 @end defmac
 
 @hook TARGET_REGISTER_BANK
-A target hook which returns the register bank number to which the
-register @var{hard_regno} belongs to.  The smaller the number, the
-more preferable the hard register usage (when all other conditions are
-the same).  This hook can be used to prefer some hard register over
-others in LRA.  For example, some x86-64 register usage needs
-additional prefix which makes instructions longer.  The hook can
-return bigger bank number for such registers make them less favorable
-and as result making the generated code smaller.
-
-The default version of this target hook returns always zero.
-@end deftypefn
 
 @node Old Constraints
 @section Obsolete Macros for Defining Constraints
Index: lra-assigns.c
===================================================================
--- lra-assigns.c	(revision 174485)
+++ lra-assigns.c	(working copy)
@@ -35,7 +35,6 @@  along with GCC; see the file COPYING3.  
 #include "expr.h"
 #include "basic-block.h"
 #include "except.h"
-#include "toplev.h"
 #include "df.h"
 #include "ira.h"
 #include "sparseset.h"
@@ -889,11 +888,9 @@  static bitmap_head all_spilled_pseudos;
 /* All pseudos whose allocation was changed.  */
 static bitmap_head changed_pseudo_bitmap;
 
-/* Assign hard registers to reload pseudos and other pseudos.  Setup
-   insns should be processed on the next constraint pass in
-   TO_PROCESS.  */
+/* Assign hard registers to reload pseudos and other pseudos.  */
 static void
-assign_by_spills (bitmap to_process)
+assign_by_spills (void)
 {
   int i, n, nfails, iter, regno, hard_regno, cost;
   rtx insn, set;
@@ -943,7 +940,9 @@  assign_by_spills (bitmap to_process)
 	    }
 	  else
 	    {
-	      bitmap_set_bit (&changed_pseudo_bitmap, regno);
+	      /* Remember that reload pseudos can be spilled on the
+		 1st pass.  */
+	      bitmap_clear_bit (&all_spilled_pseudos, regno);
 	      assign_hard_regno (hard_regno, regno);
 	    }
 	}
@@ -991,12 +990,10 @@  assign_by_spills (bitmap to_process)
 			     lra_reg_info[regno].freq);
 		  update_lives (regno, true);
 		  lra_setup_reg_renumber (regno, -1, false);
-		  bitmap_set_bit (&changed_pseudo_bitmap, regno);
 		}
 	    }
       n = nfails;
     }
-  bitmap_ior_into (to_process, &changed_insns);
   bitmap_clear (&changed_insns);
   for (n = 0, i = FIRST_PSEUDO_REGISTER; i < max_reg_num (); i++)
     if ((i < lra_constraint_new_regno_start
@@ -1024,7 +1021,7 @@  assign_by_spills (bitmap to_process)
 	  for (curr_regno = lra_reg_info[regno].first;
 	       curr_regno >= 0;
 	       curr_regno = lra_reg_info[curr_regno].next)
-	    bitmap_set_bit (&all_spilled_pseudos, curr_regno);
+	    bitmap_set_bit (&changed_pseudo_bitmap, curr_regno);
 	}
     }
   free (assigned_pseudos);
@@ -1040,14 +1037,19 @@  assign_by_spills (bitmap to_process)
 /* Entry function to assign hard registers to new reload pseudos
    starting with LRA_CONSTRAINT_NEW_REGNO_START (by possible spilling
    of old pseudos) and possibly to the old pseudos.  The function adds
-   what insns to process to TO_PROCESS.  That is all insns who
-   contains pseudos with changed allocation.  */
-void
+   what insns to process for the next constraint pass.  Those are all
+   insns who contains non-reload and non-inheritance pseudos with
+   changed allocation.
+
+   Return true if we did not spill any non-reload and non-inheritance
+   pseudos.  */
+bool
 lra_assign (void)
 {
   unsigned int u;
   bitmap_iterator bi;
   bitmap_head insns_to_process;
+  bool no_spills_p;
 
   init_lives ();
   sorted_pseudos = (int *) xmalloc (sizeof (int) * max_reg_num ());
@@ -1055,13 +1057,15 @@  lra_assign (void)
   bitmap_initialize (&all_spilled_pseudos, &reg_obstack);
   setup_live_pseudos_and_spill_after_equiv_moves (&all_spilled_pseudos);
   /* Setup insns to process.  */
-  bitmap_initialize (&insns_to_process, &reg_obstack);
   bitmap_initialize (&changed_pseudo_bitmap, &reg_obstack);
   init_live_reload_pseudos ();
-  assign_by_spills (&insns_to_process);
+  assign_by_spills ();
   finish_live_reload_pseudos ();
   bitmap_ior_into (&changed_pseudo_bitmap, &all_spilled_pseudos);
+  bitmap_and_compl_into (&all_spilled_pseudos, &lra_inheritance_pseudos);
+  no_spills_p = bitmap_empty_p (&all_spilled_pseudos);
   bitmap_clear (&all_spilled_pseudos);
+  bitmap_initialize (&insns_to_process, &reg_obstack);
   EXECUTE_IF_SET_IN_BITMAP (&changed_pseudo_bitmap, 0, u, bi)
     bitmap_ior_into (&insns_to_process, &lra_reg_info[u].insn_bitmap);
   bitmap_clear (&changed_pseudo_bitmap);
@@ -1075,4 +1079,5 @@  lra_assign (void)
   free (sorted_pseudos);
   free (sorted_reload_pseudos);
   finish_lives ();
+  return no_spills_p;
 }
Index: target.def
===================================================================
--- target.def	(revision 174485)
+++ target.def	(working copy)
@@ -2231,7 +2231,16 @@  DEFHOOK
 /* Return register bank of given hard regno for the current target.  */
 DEFHOOK
 (register_bank,
- "",
+ "A target hook which returns the register bank number to which the\
+  register @var{hard_regno} belongs to.  The smaller the number, the\
+  more preferable the hard register usage (when all other conditions are\
+  the same).  This hook can be used to prefer some hard register over\
+  others in LRA.  For example, some x86-64 register usage needs\
+  additional prefix which makes instructions longer.  The hook can\
+  return bigger bank number for such registers make them less favorable\
+  and as result making the generated code smaller.\
+  \
+  The default version of this target hook returns always zero.",
  int, (int),
  default_register_bank)
 
Index: lra-int.h
===================================================================
--- lra-int.h	(revision 174485)
+++ lra-int.h	(working copy)
@@ -333,7 +333,7 @@  extern void lra_setup_reload_pseudo_pref
 /* lra-assigns.c: */
 
 extern void lra_setup_reg_renumber (int, int, bool);
-extern void lra_assign (void);
+extern bool lra_assign (void);
 
 
 /* lra-coalesce.c: */
Index: lra.c
===================================================================
--- lra.c	(revision 174485)
+++ lra.c	(working copy)
@@ -37,7 +37,6 @@  along with GCC; see the file COPYING3.  
 #include "expr.h"
 #include "basic-block.h"
 #include "except.h"
-#include "toplev.h"
 #include "tree-pass.h"
 #include "timevar.h"
 #include "target.h"
@@ -2037,7 +2036,7 @@  void
 lra (FILE *f)
 {
   int i;
-  bool first_p, scratch_p, inserted_p;
+  bool first_p, scratch_p, inserted_p, coalesce_skip_p;
   
   lra_dump_file = f;
 
@@ -2099,11 +2098,13 @@  lra (FILE *f)
   lra_constraint_new_insn_uid_start = get_max_uid ();
   bitmap_initialize (&lra_inheritance_pseudos, &reg_obstack);
   bitmap_clear (&lra_dont_inherit_pseudos);
+  coalesce_skip_p = false;
   for (;;)
     {
       for (;;)
 	{
-	  lra_coalesce ();
+	  if (! coalesce_skip_p)
+	    lra_coalesce ();
 	  /* We should try to assign hard registers to scratches even
 	     if there were no RTL transformations in
 	     lra_constraints.  */
@@ -2112,16 +2113,20 @@  lra (FILE *f)
 	  lra_inheritance ();
 	  /* We need live ranges for lra_assign -- so build them.  */
 	  lra_create_live_ranges (true);
-	  lra_assign ();
+	  /* If we don't spill non-reload and non-inheritance pseudos,
+	     there is no sense to run memory-memory move coalescing.
+	     If inheritance pseudos were spilled, the memory-memory
+	     moves involving them will be removed by pass undoing
+	     inheritance.  */
+	  coalesce_skip_p = lra_assign ();
 	  if (lra_undo_inheritance ())
 	    lra_create_live_ranges (false);
 	  first_p = false;
 	}
-      /* We need live ranges for lra_assign -- so build them.  */
       first_p = false;
-      lra_create_live_ranges (false);
       if (! lra_spill ())
 	break;
+      coalesce_skip_p = true;
       lra_eliminate (false);
       lra_constraint_new_regno_start = max_reg_num ();
       lra_constraint_new_insn_uid_start = get_max_uid ();
Index: lra-eliminations.c
===================================================================
--- lra-eliminations.c	(revision 174546)
+++ lra-eliminations.c	(working copy)
@@ -38,7 +38,6 @@  along with GCC; see the file COPYING3.  
 #include "basic-block.h"
 #include "except.h"
 #include "optabs.h"
-#include "toplev.h"
 #include "df.h"
 #include "ira.h"
 #include "rtl-error.h"
Index: lra-spills.c
===================================================================
--- lra-spills.c	(revision 174485)
+++ lra-spills.c	(working copy)
@@ -38,7 +38,6 @@  along with GCC; see the file COPYING3.  
 #include "expr.h"
 #include "basic-block.h"
 #include "except.h"
-#include "toplev.h"
 #include "timevar.h"
 #include "target.h"
 #include "lra-int.h"
Index: lra-lives.c
===================================================================
--- lra-lives.c	(revision 174485)
+++ lra-lives.c	(working copy)
@@ -34,7 +34,6 @@  along with GCC; see the file COPYING3.  
 #include "expr.h"
 #include "basic-block.h"
 #include "except.h"
-#include "toplev.h"
 #include "df.h"
 #include "ira.h"
 #include "sparseset.h"
Index: lra-saves.c
===================================================================
--- lra-saves.c	(revision 174485)
+++ lra-saves.c	(working copy)
@@ -38,7 +38,6 @@  along with GCC; see the file COPYING3.  
 #include "expr.h"
 #include "basic-block.h"
 #include "except.h"
-#include "toplev.h"
 #include "cfgloop.h"
 #include "df.h"
 #include "ira.h"
Index: lra-coalesce.c
===================================================================
--- lra-coalesce.c	(revision 174485)
+++ lra-coalesce.c	(working copy)
@@ -39,7 +39,6 @@  along with GCC; see the file COPYING3.  
 #include "expr.h"
 #include "basic-block.h"
 #include "except.h"
-#include "toplev.h"
 #include "timevar.h"
 #include "ira.h"
 #include "lra-int.h"
Index: lra-constraints.c
===================================================================
--- lra-constraints.c	(revision 174546)
+++ lra-constraints.c	(working copy)
@@ -44,7 +44,6 @@ 
 #include "basic-block.h"
 #include "except.h"
 #include "optabs.h"
-#include "toplev.h"
 #include "df.h"
 #include "ira.h"
 #include "rtl-error.h"
@@ -3331,6 +3330,8 @@  inherit_reload_reg (rtx reload_reg, int 
 	      && (int) REGNO (dst) >= lra_constraint_new_regno_start);
   SET_SRC (set) = new_reg;
   lra_push_insn (last_reload_insn);
+  /* Invalidate alternatives for insn should be processed.  */
+  lra_set_used_insn_alternative_by_uid (INSN_UID (last_reload_insn), -1);
   if (lra_dump_file != NULL)
     {
       fprintf (lra_dump_file, "    Inheritance reuse change:\n");
Index: lra-equivs.c
===================================================================
--- lra-equivs.c	(revision 174485)
+++ lra-equivs.c	(working copy)
@@ -33,7 +33,6 @@  along with GCC; see the file COPYING3.  
 #include "addresses.h"
 #include "insn-config.h"
 #include "recog.h"
-#include "toplev.h"
 #include "tree-pass.h"
 #include "target.h"
 #include "params.h"
Index: Makefile.in
===================================================================
--- Makefile.in	(revision 174485)
+++ Makefile.in	(working copy)
@@ -3417,42 +3417,42 @@  ira.o: ira.c $(CONFIG_H) $(SYSTEM_H) cor
 lra.o : lra.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
    $(RTL_H) $(REGS_H) insn-config.h insn-codes.h $(TIMEVAR_H) $(TREE_PASS_H) \
    $(DF_H) $(RECOG_H) output.h addresses.h $(REGS_H) hard-reg-set.h \
-   $(FLAGS_H) $(FUNCTION_H) $(EXPR_H) $(BASIC_BLOCK_H) toplev.h $(TM_P_H) \
+   $(FLAGS_H) $(FUNCTION_H) $(EXPR_H) $(BASIC_BLOCK_H) $(TM_P_H) \
    $(EXCEPT_H) ira.h $(LRA_INT_H)
 lra-assigns.o : lra-assigns.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(RTL_H) $(REGS_H) insn-config.h $(DF_H) \
    $(RECOG_H) output.h $(REGS_H) hard-reg-set.h $(FLAGS_H) $(FUNCTION_H) \
-   $(EXPR_H) $(BASIC_BLOCK_H) toplev.h $(TM_P_H) $(EXCEPT_H) ira.h \
+   $(EXPR_H) $(BASIC_BLOCK_H) $(TM_P_H) $(EXCEPT_H) ira.h \
    rtl-error.h $(LRA_INT_H)
 lra-coalesce.o : lra-coalesce.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(RTL_H) $(REGS_H) insn-config.h $(DF_H) \
    $(RECOG_H) output.h $(REGS_H) hard-reg-set.h $(FLAGS_H) $(FUNCTION_H) \
-   $(EXPR_H) $(BASIC_BLOCK_H) toplev.h $(TM_P_H) $(EXCEPT_H) ira.h \
+   $(EXPR_H) $(BASIC_BLOCK_H) $(TM_P_H) $(EXCEPT_H) ira.h \
    rtl-error.h ira.h $(LRA_INT_H)
 lra-constraints.o : lra-constraints.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(RTL_H) $(REGS_H) insn-config.h insn-codes.h $(DF_H) \
    $(RECOG_H) output.h addresses.h $(REGS_H) hard-reg-set.h $(FLAGS_H) \
-   $(FUNCTION_H) $(EXPR_H) $(BASIC_BLOCK_H) toplev.h $(TM_P_H) $(EXCEPT_H) \
+   $(FUNCTION_H) $(EXPR_H) $(BASIC_BLOCK_H) $(TM_P_H) $(EXCEPT_H) \
    ira.h rtl-error.h $(LRA_INT_H)
 lra-eliminations.o : lra-eliminations.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(RTL_H) $(REGS_H) insn-config.h $(DF_H) \
    $(RECOG_H) output.h $(REGS_H) hard-reg-set.h $(FLAGS_H) $(FUNCTION_H) \
-   $(EXPR_H) $(BASIC_BLOCK_H) toplev.h $(TM_P_H) $(EXCEPT_H) ira.h \
+   $(EXPR_H) $(BASIC_BLOCK_H) $(TM_P_H) $(EXCEPT_H) ira.h \
    rtl-error.h $(LRA_INT_H)
 lra-lives.o : lra-lives.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
    $(RTL_H) $(REGS_H) insn-config.h $(DF_H) \
    $(RECOG_H) output.h $(REGS_H) hard-reg-set.h $(FLAGS_H) $(FUNCTION_H) \
-   $(EXPR_H) $(BASIC_BLOCK_H) toplev.h $(TM_P_H) $(EXCEPT_H) \
+   $(EXPR_H) $(BASIC_BLOCK_H) $(TM_P_H) $(EXCEPT_H) \
    $(LRA_INT_H)
 lra-saves.o : lra-saves.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
    $(RTL_H) $(REGS_H) insn-config.h $(DF_H) \
    $(RECOG_H) output.h $(REGS_H) hard-reg-set.h $(FLAGS_H) $(FUNCTION_H) \
-   $(EXPR_H) $(BASIC_BLOCK_H) toplev.h $(TM_P_H) $(EXCEPT_H) \
+   $(EXPR_H) $(BASIC_BLOCK_H) $(TM_P_H) $(EXCEPT_H) \
    ira.h $(LRA_INT_H)
 lra-spills.o : lra-spills.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
    $(RTL_H) $(REGS_H) insn-config.h $(DF_H) \
    $(RECOG_H) output.h $(REGS_H) hard-reg-set.h $(FLAGS_H) $(FUNCTION_H) \
-   $(EXPR_H) $(BASIC_BLOCK_H) toplev.h $(TM_P_H) $(EXCEPT_H) \
+   $(EXPR_H) $(BASIC_BLOCK_H) $(TM_P_H) $(EXCEPT_H) \
    $(LRA_INT_H)
 regmove.o : regmove.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
    insn-config.h $(TIMEVAR_H) $(TREE_PASS_H) $(DF_H) \