Patchwork patch to remove temporary clobbers in LRA

login
register
mail settings
Submitter Vladimir Makarov
Date Oct. 28, 2012, 8:06 p.m.
Message ID <508D903F.8030208@redhat.com>
Download mbox | patch
Permalink /patch/194747/
State New
Headers show

Comments

Vladimir Makarov - Oct. 28, 2012, 8:06 p.m.
Working on a problem where var-tracking started to spend more time 
after LRA is switched on, I found that LRA creates significant # 
clobbers for x86 code (I saw 20% of all insns). Clobbers are created for 
correct live range analysis in LRA in cases when matching pseudos of 
*different modes* should be in the same hard register.
   As these clobbers are not needed anymore after LRA, the following 
patch removes them.

   Unfortunately, it does not solve the problem I mentioned.  Now, LRA 
generates approximately the same # of insns and debug_insn.  Moreover, 
two functions (canonicalize_values_star and set_slot_part) in 
var-tracking spending most of time have approximately the same coverage, 
in other words numbers of execution for each line are the same 
(differences < 1%).  Still var-tracking after LRA spent on 20% more time 
than after reload. I have no idea what to investigate more but I'll 
continue my work on the problem next week.

   The patch was successfully bootstrapped on x86/x86-64.

   Committed as rev. 192897.

2012-10-28  Vladimir Makarov  <vmakarov@redhat.com>

     * rtl.h (struct rtx_def): Add a comment for member unchanging.
     * lra-int.h (LRA_TEMP_CLOBBER_P): New macro.
     (lra_hard_reg_substitution): Rename to lra_final_code_change.
     * lra-constraints.c (match_reload): Mark temporary clobbers.
     * lra-spill.c (lra_hard_reg_substitution): Rename to
     lra_final_code_change.  Remove temporary clobbers.
     * lra.c (lra): Rename to lra_final_code_change.
Steven Bosscher - Oct. 28, 2012, 8:11 p.m.
On Sun, Oct 28, 2012 at 9:06 PM, Vladimir Makarov wrote:
>   Unfortunately, it does not solve the problem I mentioned.  Now, LRA
> generates approximately the same # of insns and debug_insn.  Moreover, two
> functions (canonicalize_values_star and set_slot_part) in var-tracking
> spending most of time have approximately the same coverage, in other words
> numbers of execution for each line are the same (differences < 1%).  Still
> var-tracking after LRA spent on 20% more time than after reload. I have no
> idea what to investigate more but I'll continue my work on the problem next
> week.

Well, it may be more fruitful to see why var-tracking is so slow. The
problem existed before LRA was merged, it's just triggered more often
after the LRA merge. But the real problem is var-tracking.

Ciao!
Steven

Patch

Index: rtl.h
===================================================================
--- rtl.h	(revision 192896)
+++ rtl.h	(working copy)
@@ -283,7 +283,8 @@  struct GTY((chain_next ("RTX_NEXT (&%h)"
      1 in a NOTE, or EXPR_LIST for a const call.
      1 in a JUMP_INSN of an annulling branch.
      1 in a CONCAT is VAL_EXPR_IS_CLOBBERED in var-tracking.c.
-     1 in a preserved VALUE is PRESERVED_VALUE_P in cselib.c.  */
+     1 in a preserved VALUE is PRESERVED_VALUE_P in cselib.c.
+     1 in a clobber temporarily created for LRA.  */
   unsigned int unchanging : 1;
   /* 1 in a MEM or ASM_OPERANDS expression if the memory reference is volatile.
      1 in an INSN, CALL_INSN, JUMP_INSN, CODE_LABEL, BARRIER, or NOTE
Index: lra.c
===================================================================
--- lra.c	(revision 192896)
+++ lra.c	(working copy)
@@ -2334,7 +2334,7 @@  lra (FILE *f)
     }
   restore_scratches ();
   lra_eliminate (true);
-  lra_hard_reg_substitution ();
+  lra_final_code_change ();
   lra_in_progress = 0;
   lra_clear_live_ranges ();
   lra_live_ranges_finish ();
Index: lra-constraints.c
===================================================================
--- lra-constraints.c	(revision 192896)
+++ lra-constraints.c	(working copy)
@@ -663,7 +663,7 @@  match_reload (signed char out, signed ch
 	      rtx *before, rtx *after)
 {
   int i, in;
-  rtx new_in_reg, new_out_reg, reg;
+  rtx new_in_reg, new_out_reg, reg, clobber;
   enum machine_mode inmode, outmode;
   rtx in_rtx = *curr_id->operand_loc[ins[0]];
   rtx out_rtx = *curr_id->operand_loc[out];
@@ -694,8 +694,10 @@  match_reload (signed char out, signed ch
 	    new_in_reg = gen_rtx_SUBREG (inmode, reg, 0);
 	  /* NEW_IN_REG is non-paradoxical subreg.  We don't want
 	     NEW_OUT_REG living above.  We add clobber clause for
-	     this.  */
-	  emit_clobber (new_out_reg);
+	     this.  This is just a temporary clobber.  We can remove
+	     it at the end of LRA work.  */
+	  clobber = emit_clobber (new_out_reg);
+	  LRA_TEMP_CLOBBER_P (PATTERN (clobber)) = 1;
 	}
     }
   else
Index: lra-int.h
===================================================================
--- lra-int.h	(revision 192896)
+++ lra-int.h	(working copy)
@@ -241,6 +241,10 @@  struct lra_insn_recog_data
 
 typedef struct lra_insn_recog_data *lra_insn_recog_data_t;
 
+/* Whether the clobber is used temporary in LRA.  */
+#define LRA_TEMP_CLOBBER_P(x) \
+  (RTL_FLAG_CHECK1 ("TEMP_CLOBBER_P", (x), CLOBBER)->unchanging)
+
 /* lra.c: */
 
 extern FILE *lra_dump_file;
@@ -346,7 +350,7 @@  extern bool lra_coalesce (void);
 
 extern bool lra_need_for_spills_p (void);
 extern void lra_spill (void);
-extern void lra_hard_reg_substitution (void);
+extern void lra_final_code_change (void);
 
 
 /* lra-elimination.c: */
Index: lra-spills.c
===================================================================
--- lra-spills.c	(revision 192896)
+++ lra-spills.c	(working copy)
@@ -614,13 +614,13 @@  alter_subregs (rtx *loc, bool final_p)
 }
 
 /* Final change of pseudos got hard registers into the corresponding
-   hard registers.  */
+   hard registers and removing temporary clobbers.  */
 void
-lra_hard_reg_substitution (void)
+lra_final_code_change (void)
 {
   int i, hard_regno;
   basic_block bb;
-  rtx insn;
+  rtx insn, curr;
   int max_regno = max_reg_num ();
 
   for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
@@ -628,9 +628,21 @@  lra_hard_reg_substitution (void)
 	&& (hard_regno = lra_get_regno_hard_regno (i)) >= 0)
       SET_REGNO (regno_reg_rtx[i], hard_regno);
   FOR_EACH_BB (bb)
-    FOR_BB_INSNS (bb, insn)
+    FOR_BB_INSNS_SAFE (bb, insn, curr)
       if (INSN_P (insn))
 	{
+	  rtx pat = PATTERN (insn);
+
+	  if (GET_CODE (pat) == CLOBBER && LRA_TEMP_CLOBBER_P (pat))
+	    {
+	      /* Remove clobbers temporarily created in LRA.  We don't
+		 need them anymore and don't want to waste compiler
+		 time processing them in a few subsequent passes.  */
+	      lra_invalidate_insn_data (insn);
+	      remove_insn (insn);
+	      continue;
+	    }
+
 	  lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
 	  bool insn_change_p = false;