Patchwork [lra] patch to improve elimination and inheritance

login
register
mail settings
Submitter Vladimir Makarov
Date Oct. 13, 2011, 8:14 p.m.
Message ID <4E9746B7.80206@redhat.com>
Download mbox | patch
Permalink /patch/119601/
State New
Headers show

Comments

Vladimir Makarov - Oct. 13, 2011, 8:14 p.m.
The following patch contains some of my work for last 2 weeks.

First of all, it improves register elimination to permit elimination a 
register to itself.  It resulted in fixing SPEC2000 code size 
degradation for ppc64.

The patch also contains improving inheritance by assigning the same hard 
register to inheritance pseudo and connected reload pseudos.

And, finally, different changes to speed up LRA and some bug fixes are 
also in the patch.

The patch was successfully bootstrapped on x86/x86-64 and ppc64.

Committed as revision 179942.

2011-10-13  Vladimir Makarov <vmakarov@redhat.com>

     * lra-assign.c (process_copy_to_form_allocno): Rename to
     process_copy_to_form_thread.
     (conflict_reload_pseudos): Rename to
     conflict_reload_and_inheritance_pseudos.
     (live_reload_pseudos): Rename to
     live_reload_and_inheritance_pseudos.
     (init_live_reload_pseudos): Rename to
     init_live_reload_and_inheritance_pseudos.
     (finish_live_reload_and_inheritance_pseudos): Rename to
     finish_live_reload_pseudos.
     (find_hard_regno_for): Add new argument try_only_hard_regno.  Use
     live_pseudos_reg_renumber instead of reg_renumber.  Check
     live_pseudos_reg_renumber when adding to
     conflict_reload_and_inheritance_pseudos.  Process
     preferred_hard_regno2 only if preferred_hard_regno1 is
     non-negative.
     (setup_try_hard_regno_pseudos): Use live_pseudos_reg_renumber
     instead of reg_renumber.
     (spill_for): Ditto.  Pass new parameter to find_hard_regno_for.
     (assign_temporarily): Don't change reg_renumber.
     (setup_live_pseudos_and_spill_after_equiv_moves): Only call
     update_lives if lra_risky_equiv_subst_p is false.
     (improve_inheritance): New function.
     (assign_by_spills): Call improve_inheritance.

     * lra.c (lra): Call lra_eliminate after lra_constraints.

     * lra-constraints.c (contains_pseudo_p): Rename to contains_reg_p.
     Add a new argument.  Modify it for new semantics.
     (lra_risky_equiv_subst_p): New global var.
     (lra_constraints): Set up lra_risky_equiv_subst_p.  Compare
     get_equiv_substitution with right value.

     * lra-eliminations.c (eliminate_regs_in_insn): Remove a dead
     code.
     (spill_pseudos): Clear to_process after its processing.
     (update_reg_eliminate): Add a new argument.  Set it up.  Modify
     lra_no_alloc_regs and eliminable_regset.
     (init_elim_table): Remove the argument.  Permit elimination from a
     register to itself.
     (lra_init_elimination): Add a new argument.  Set it up.  Don't set
     up liveness fo HARD_FRAME_POINTER_REGNUM.
     (lra_eliminate): Rename to_process to insns_with_changed_offsets.
     Call update_reg_eliminate with insns_with_changed_offsets.
     Restructure code.  Add additional assertion on
     insns_with_changed_offsets.

     * lra.h (lra_init_elimination): New argument.

     * lra-int.h (lra_risky_equiv_subst_p): New flag declaration.

     * ira.c (ira_setup_eliminable_regset): Add a new argument.  Set
     up liveness fo HARD_FRAME_POINTER_REGNUM if necessary.
     (ira): Call ira_setup_eliminable_regset with a new parameter.

     * ira.h (ira_setup_eliminable_regset): New argument.

     * loop-invariant.c (calculate_loop_reg_pressure): Call
     ira_setup_eliminable_regset with a new parameter.

     * haifa-sched.c (sched_init): Ditto.

Patch

Index: lra-assigns.c
===================================================================
--- lra-assigns.c	(revision 179932)
+++ lra-assigns.c	(working copy)
@@ -64,7 +64,7 @@  static struct regno_assign_info *regno_a
 /* Process a pseudo copy with frequency COPY_FREQ connecting REGNO1
    and REGNO2 to form threads.  */
 static void
-process_copy_to_form_allocno (int regno1, int regno2, int copy_freq)
+process_copy_to_form_thread (int regno1, int regno2, int copy_freq)
 {
   int last, regno1_first, regno2_first;
 
@@ -111,7 +111,7 @@  init_regno_assign_info (void)
 	&& reg_renumber[regno2] < 0 && lra_reg_info[regno2].nrefs != 0
 	&& (ira_available_class_regs[regno_allocno_class_array[regno1]]
 	    == ira_available_class_regs[regno_allocno_class_array[regno2]]))
-      process_copy_to_form_allocno (regno1, regno2, cp->freq);
+      process_copy_to_form_thread (regno1, regno2, cp->freq);
 }
 
 /* Free REGNO_ASSIGN_INFO.  */
@@ -243,42 +243,42 @@  update_lives (int regno, bool free_p)
 /* Sparseset used to calculate reload pseudos conflicting with a given
    pseudo when we are trying to find a hard register for the given
    pseudo.  */
-static sparseset conflict_reload_pseudos;
+static sparseset conflict_reload_and_inheritance_pseudos;
 
-/* Map: program point -> bitmap of all reload pseudos living at the
-   point.  */
-static bitmap_head *live_reload_pseudos;
+/* Map: program point -> bitmap of all reload and inheritance pseudos
+   living at the point.  */
+static bitmap_head *live_reload_and_inheritance_pseudos;
 
 /* Allocate and initialize data about living reload pseudos at any
    given program point.  */
 static void
-init_live_reload_pseudos (void)
+init_live_reload_and_inheritance_pseudos (void)
 {
   int i, p;
   lra_live_range_t r;
   
-  conflict_reload_pseudos = sparseset_alloc (max_reg_num ());
-  live_reload_pseudos
+  conflict_reload_and_inheritance_pseudos = sparseset_alloc (max_reg_num ());
+  live_reload_and_inheritance_pseudos
     = (bitmap_head *) xmalloc (sizeof (bitmap_head) * lra_live_max_point);
   for (p = 0; p < lra_live_max_point; p++)
-    bitmap_initialize (&live_reload_pseudos[p], &reg_obstack);
+    bitmap_initialize (&live_reload_and_inheritance_pseudos[p], &reg_obstack);
   for (i = lra_constraint_new_regno_start; i < max_reg_num (); i++)
     for (r = lra_reg_info[i].live_ranges; r != NULL; r = r->next)
       for (p = r->start; p <= r->finish; p++)
-	bitmap_set_bit (&live_reload_pseudos[p], i);
+	bitmap_set_bit (&live_reload_and_inheritance_pseudos[p], i);
 }
 
 /* Finalize data about living reload pseudos at any given program
    point.  */
 static void
-finish_live_reload_pseudos (void)
+finish_live_reload_and_inheritance_pseudos (void)
 {
   int p;
 
-  sparseset_free (conflict_reload_pseudos);
+  sparseset_free (conflict_reload_and_inheritance_pseudos);
   for (p = 0; p < lra_live_max_point; p++)
-    bitmap_clear (&live_reload_pseudos[p]);
-  free (live_reload_pseudos);
+    bitmap_clear (&live_reload_and_inheritance_pseudos[p]);
+  free (live_reload_and_inheritance_pseudos);
 }
 
 /* The value used to check that cost of given hard reg is defined
@@ -292,13 +292,13 @@  static int hard_regno_costs_check[FIRST_
    CURR_HARD_REGNO_COSTS_CHECK.  */
 static int hard_regno_costs[FIRST_PSEUDO_REGISTER];
 
-/* Find and return best free hard register for pseudo REGNO.  In
-   failure case, return a negative number.  Return through *COST the
-   cost of usage of the hard register for the pseudo.  Best free hard
-   register has smallest cost of usage for REGNO or smallest register
-   bank if the cost is the same.  */
+/* Find and return best (or TRY_ONLY_HARD_REGNO) free hard register
+   for pseudo REGNO.  In failure case, return a negative number.
+   Return through *COST the cost of usage of the hard register for the
+   pseudo.  Best free hard register has smallest cost of usage for
+   REGNO or smallest register bank if the cost is the same.  */
 static int
-find_hard_regno_for (int regno, int *cost)
+find_hard_regno_for (int regno, int *cost, int try_only_hard_regno)
 {
   HARD_REG_SET conflict_set;
   int best_cost = INT_MAX, best_bank = INT_MAX, best_usage = INT_MAX;
@@ -314,7 +314,7 @@  find_hard_regno_for (int regno, int *cos
   rclass = regno_allocno_class_array[regno];
   rclass_intersect_p = ira_reg_classes_intersect_p[rclass];
   curr_hard_regno_costs_check++;
-  sparseset_clear (conflict_reload_pseudos);
+  sparseset_clear (conflict_reload_and_inheritance_pseudos);
   sparseset_clear (live_range_hard_reg_pseudos);
   IOR_HARD_REG_SET (conflict_set, lra_reg_info[regno].conflict_hard_regs);
   for (r = lra_reg_info[regno].live_ranges; r != NULL; r = r->next)
@@ -322,10 +322,12 @@  find_hard_regno_for (int regno, int *cos
       EXECUTE_IF_SET_IN_BITMAP (&live_hard_reg_pseudos[r->start], 0, k, bi)
 	if (rclass_intersect_p[regno_allocno_class_array[k]])
 	  sparseset_set_bit (live_range_hard_reg_pseudos, k);
-      EXECUTE_IF_SET_IN_BITMAP (&live_reload_pseudos[r->start], 0, k, bi)
-	if (reg_renumber[k] < 0
+      EXECUTE_IF_SET_IN_BITMAP (&live_reload_and_inheritance_pseudos[r->start],
+				0, k, bi)
+	if (lra_reg_info[k].preferred_hard_regno1 >= 0
+	    && live_pseudos_reg_renumber[k] < 0
 	    && rclass_intersect_p[regno_allocno_class_array[k]])
-	  sparseset_set_bit (conflict_reload_pseudos, k);
+	  sparseset_set_bit (conflict_reload_and_inheritance_pseudos, k);
       for (p = r->start + 1; p <= r->finish; p++)
 	{
 	  lra_live_range_t r2;
@@ -333,9 +335,11 @@  find_hard_regno_for (int regno, int *cos
 	  for (r2 = lra_start_point_ranges[p]; r2 != NULL; r2 = r2->start_next)
 	    {
 	      if (r2->regno >= lra_constraint_new_regno_start
-		  && reg_renumber[r2->regno] < 0
+		  && lra_reg_info[r2->regno].preferred_hard_regno1 >= 0
+		  && live_pseudos_reg_renumber[r2->regno] < 0
 		  && rclass_intersect_p[regno_allocno_class_array[r2->regno]])
-		sparseset_set_bit (conflict_reload_pseudos, r2->regno);
+		sparseset_set_bit (conflict_reload_and_inheritance_pseudos,
+				   r2->regno);
 	      if (live_pseudos_reg_renumber[r2->regno] >= 0
 		  && rclass_intersect_p[regno_allocno_class_array[r2->regno]])
 		sparseset_set_bit (live_range_hard_reg_pseudos, r2->regno);
@@ -349,34 +353,38 @@  find_hard_regno_for (int regno, int *cos
       hard_regno_costs_check[hard_regno] = curr_hard_regno_costs_check;
       hard_regno_costs[hard_regno]
 	-= lra_reg_info[regno].preferred_hard_regno_profit1;
-    }
-  if ((hard_regno = lra_reg_info[regno].preferred_hard_regno2) >= 0)
-    {
-      if (hard_regno_costs_check[hard_regno] != curr_hard_regno_costs_check)
-	hard_regno_costs[hard_regno] = 0;
-      hard_regno_costs_check[hard_regno] = curr_hard_regno_costs_check;
-      hard_regno_costs[hard_regno]
-	-= lra_reg_info[regno].preferred_hard_regno_profit2;
+      if ((hard_regno = lra_reg_info[regno].preferred_hard_regno2) >= 0)
+	{
+	  if (hard_regno_costs_check[hard_regno]
+	      != curr_hard_regno_costs_check)
+	    hard_regno_costs[hard_regno] = 0;
+	  hard_regno_costs_check[hard_regno] = curr_hard_regno_costs_check;
+	  hard_regno_costs[hard_regno]
+	    -= lra_reg_info[regno].preferred_hard_regno_profit2;
+	}
     }
 #ifdef STACK_REGS
   if (lra_reg_info[regno].no_stack_p)
     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
       SET_HARD_REG_BIT (conflict_set, i);
 #endif
-  sparseset_clear_bit (conflict_reload_pseudos, regno);
+  sparseset_clear_bit (conflict_reload_and_inheritance_pseudos, regno);
   val = lra_reg_info[regno].val;
   EXECUTE_IF_SET_IN_SPARSESET (live_range_hard_reg_pseudos, conflict_regno)
     if (val != lra_reg_info[conflict_regno].val)
-      lra_add_hard_reg_set (reg_renumber[conflict_regno],
-			    lra_reg_info[conflict_regno].biggest_mode,
-			    &conflict_set);
-  if (hard_reg_set_subset_p (reg_class_contents[rclass],
-			     conflict_set))
-    return -1;
-  EXECUTE_IF_SET_IN_SPARSESET (conflict_reload_pseudos, conflict_regno)
-    if (reg_renumber[conflict_regno] < 0
-	&& val != lra_reg_info[conflict_regno].val)
       {
+	lra_add_hard_reg_set (live_pseudos_reg_renumber[conflict_regno],
+			      lra_reg_info[conflict_regno].biggest_mode,
+			      &conflict_set);
+	if (hard_reg_set_subset_p (reg_class_contents[rclass],
+				   conflict_set))
+	  return -1;
+      }
+  EXECUTE_IF_SET_IN_SPARSESET (conflict_reload_and_inheritance_pseudos,
+			       conflict_regno)
+    if (val != lra_reg_info[conflict_regno].val)
+      {
+	gcc_assert (live_pseudos_reg_renumber[conflict_regno] < 0);
 	if ((hard_regno
 	     = lra_reg_info[conflict_regno].preferred_hard_regno1) >= 0)
 	  {
@@ -386,16 +394,17 @@  find_hard_regno_for (int regno, int *cos
 	    hard_regno_costs_check[hard_regno] = curr_hard_regno_costs_check;
 	    hard_regno_costs[hard_regno]
 	      += lra_reg_info[conflict_regno].preferred_hard_regno_profit1;
-	  }
-	if ((hard_regno
-	     = lra_reg_info[conflict_regno].preferred_hard_regno2) >= 0)
-	  {
-	    if (hard_regno_costs_check[hard_regno]
-		!= curr_hard_regno_costs_check)
-	      hard_regno_costs[hard_regno] = 0;
-	    hard_regno_costs_check[hard_regno] = curr_hard_regno_costs_check;
-	    hard_regno_costs[hard_regno]
-	      += lra_reg_info[conflict_regno].preferred_hard_regno_profit2;
+	    if ((hard_regno
+		 = lra_reg_info[conflict_regno].preferred_hard_regno2) >= 0)
+	      {
+		if (hard_regno_costs_check[hard_regno]
+		    != curr_hard_regno_costs_check)
+		  hard_regno_costs[hard_regno] = 0;
+		hard_regno_costs_check[hard_regno]
+		  = curr_hard_regno_costs_check;
+		hard_regno_costs[hard_regno]
+		  += lra_reg_info[conflict_regno].preferred_hard_regno_profit2;
+	      }
 	  }
       }
   /* That is important for allocation of multi-word pseudos.  */
@@ -406,44 +415,51 @@  find_hard_regno_for (int regno, int *cos
   best_hard_regno = -1;
   for (i = 0; i < rclass_size; i++)
     {
-      hard_regno = ira_class_hard_regs[rclass][i];
-      if (lra_hard_reg_set_intersection_p (hard_regno,
-					   PSEUDO_REGNO_MODE (regno),
-					   conflict_set)
+      if (try_only_hard_regno >= 0)
+	hard_regno = try_only_hard_regno;
+      else
+	hard_regno = ira_class_hard_regs[rclass][i];
+      if (! lra_hard_reg_set_intersection_p (hard_regno,
+					     PSEUDO_REGNO_MODE (regno),
+					     conflict_set)
 	  /* We can not use prohibited_class_mode_regs because it is
 	     defined not for all classes.  */
-	  || ! HARD_REGNO_MODE_OK (hard_regno, PSEUDO_REGNO_MODE (regno)))
-	continue;
-      if (hard_regno_costs_check[hard_regno] != curr_hard_regno_costs_check)
+	  && HARD_REGNO_MODE_OK (hard_regno, PSEUDO_REGNO_MODE (regno)))
 	{
-	  hard_regno_costs_check[hard_regno] = curr_hard_regno_costs_check;
-	  hard_regno_costs[hard_regno] = 0;
-	}
-      for (j = 0;
-	   j < hard_regno_nregs[hard_regno][PSEUDO_REGNO_MODE (regno)];
-	   j++)
-	if (! TEST_HARD_REG_BIT (call_used_reg_set, hard_regno + j)
-	    && ! df_regs_ever_live_p (hard_regno + j))
-	  /* It needs save restore.  */
-	  hard_regno_costs[hard_regno]
-	    += 2 * ENTRY_BLOCK_PTR->next_bb->frequency;
-      bank = targetm.register_bank (hard_regno);
-      if (best_hard_regno < 0 || hard_regno_costs[hard_regno] < best_cost
-	  || (hard_regno_costs[hard_regno] == best_cost
-	      && (bank < best_bank
-		  /* Hard register usage leveling actually results
-		     in bigger code for targets with conditional
-		     execution like ARM because it reduces chance
-		     of if-conversion after LRA.  */
-		  || (! targetm.have_conditional_execution ()
-		      && bank == best_bank
-		      && best_usage > lra_hard_reg_usage[hard_regno]))))
-	{
-	  best_hard_regno = hard_regno;
-	  best_cost = hard_regno_costs[hard_regno];
-	  best_bank = bank;
-	  best_usage = lra_hard_reg_usage[hard_regno];
+	  if (hard_regno_costs_check[hard_regno]
+	      != curr_hard_regno_costs_check)
+	    {
+	      hard_regno_costs_check[hard_regno] = curr_hard_regno_costs_check;
+	      hard_regno_costs[hard_regno] = 0;
+	    }
+	  for (j = 0;
+	       j < hard_regno_nregs[hard_regno][PSEUDO_REGNO_MODE (regno)];
+	       j++)
+	    if (! TEST_HARD_REG_BIT (call_used_reg_set, hard_regno + j)
+		&& ! df_regs_ever_live_p (hard_regno + j))
+	      /* It needs save restore.  */
+	      hard_regno_costs[hard_regno]
+		+= 2 * ENTRY_BLOCK_PTR->next_bb->frequency;
+	  bank = targetm.register_bank (hard_regno);
+	  if (best_hard_regno < 0 || hard_regno_costs[hard_regno] < best_cost
+	      || (hard_regno_costs[hard_regno] == best_cost
+		  && (bank < best_bank
+		      /* Hard register usage leveling actually results
+			 in bigger code for targets with conditional
+			 execution like ARM because it reduces chance
+			 of if-conversion after LRA.  */
+		      || (! targetm.have_conditional_execution ()
+			  && bank == best_bank
+			  && best_usage > lra_hard_reg_usage[hard_regno]))))
+	    {
+	      best_hard_regno = hard_regno;
+	      best_cost = hard_regno_costs[hard_regno];
+	      best_bank = bank;
+	      best_usage = lra_hard_reg_usage[hard_regno];
+	    }
 	}
+      if (try_only_hard_regno >= 0)
+	break;
     }
   if (best_hard_regno >= 0)
     *cost = best_cost - lra_reg_info[regno].freq;
@@ -575,10 +591,10 @@  setup_try_hard_regno_pseudos (int p, enu
   EXECUTE_IF_SET_IN_BITMAP (&live_hard_reg_pseudos[p], 0, spill_regno, bi)
     {
       mode = PSEUDO_REGNO_MODE (spill_regno);
-      if (lra_hard_reg_set_intersection_p (reg_renumber[spill_regno], mode,
+      if (lra_hard_reg_set_intersection_p (live_pseudos_reg_renumber[spill_regno], mode,
 					   reg_class_contents[rclass]))
 	{
-	  hard_regno = reg_renumber[spill_regno];
+	  hard_regno = live_pseudos_reg_renumber[spill_regno];
 	  for (i = hard_regno_nregs[hard_regno][mode] - 1; i >= 0; i--)
 	    {
 	      if (try_hard_reg_pseudos_check[hard_regno + i]
@@ -612,7 +628,6 @@  assign_temporarily (int regno, int hard_
 	  bitmap_set_bit (&live_hard_reg_pseudos[p], regno);
     }
   live_pseudos_reg_renumber[regno] = hard_regno;
-  reg_renumber[regno] = hard_regno;
 }
 
 /* Array used for sorting reload pseudos for subsequent allocation
@@ -711,24 +726,26 @@  spill_for (int regno, bitmap spilled_pse
       /* We are trying to spill reload pseudo.  That is wrong we
 	 should assign all reload pseudos, otherwise we cannot reuse
 	 selected alternativies.  */
-      hard_regno = find_hard_regno_for (regno, &cost);
+      hard_regno = find_hard_regno_for (regno, &cost, -1);
       if (hard_regno >= 0)
 	{
 	  assign_temporarily (regno, hard_regno);
 	  n = 0;
 	  EXECUTE_IF_SET_IN_SPARSESET (live_range_reload_pseudos, reload_regno)
-	    if (reg_renumber[reload_regno] < 0
+	    if (live_pseudos_reg_renumber[reload_regno] < 0
 		&& (hard_reg_set_intersect_p
 		    (reg_class_contents[regno_allocno_class_array[reload_regno]],
 		     spilled_hard_regs)))
 	      sorted_reload_pseudos[n++] = reload_regno;
-	  qsort (sorted_reload_pseudos, n, sizeof (int), reload_pseudo_compare_func);
+	  qsort (sorted_reload_pseudos, n, sizeof (int),
+		 reload_pseudo_compare_func);
 	  for (j = 0; j < n; j++)
 	    {
 	      reload_regno = sorted_reload_pseudos[j];
-	      if (reg_renumber[reload_regno] < 0
+	      if (live_pseudos_reg_renumber[reload_regno] < 0
 		  && (reload_hard_regno
-		      = find_hard_regno_for (reload_regno, &reload_cost)) >= 0
+		      = find_hard_regno_for (reload_regno,
+					     &reload_cost, -1)) >= 0
 		  && (lra_hard_reg_set_intersection_p
 		      (reload_hard_regno, PSEUDO_REGNO_MODE (reload_regno),
 		       spilled_hard_regs)))
@@ -767,7 +784,7 @@  spill_for (int regno, bitmap spilled_pse
 	  for (j = 0; j < n; j++)
 	    {
 	      reload_regno = sorted_reload_pseudos[j];
-	      if (reg_renumber[reload_regno] >= 0)
+	      if (live_pseudos_reg_renumber[reload_regno] >= 0)
 		assign_temporarily (reload_regno, -1);
 	    }
 	}
@@ -837,7 +854,14 @@  setup_live_pseudos_and_spill_after_equiv
 
   for (n = 0, i = FIRST_PSEUDO_REGISTER; i < max_reg_num (); i++)
     if (reg_renumber[i] >= 0 && lra_reg_info[i].nrefs > 0)
-      sorted_pseudos[n++] = i;
+      {
+	if (lra_risky_equiv_subst_p)
+	  sorted_pseudos[n++] = i;
+	else
+	  update_lives (i, false);
+      }
+  if (! lra_risky_equiv_subst_p)
+    return;
   qsort (sorted_pseudos, n, sizeof (int), pseudo_compare_func);
   for (i = 0; i < n; i++)
     {
@@ -884,6 +908,64 @@  setup_live_pseudos_and_spill_after_equiv
     }
 }
 
+/* Improve allocation by assigning the same hard regno of inheritance
+   pseudos to the connected pseudos.  We need this because inheritance
+   pseudos are allocated after reload pseudos in the thread and when
+   we assign a hard register to a reload pseudo we don't know yet that
+   the connected inheritance pseudos can get the same hard
+   register.  */
+static void
+improve_inheritance (void)
+{
+  unsigned int k;
+  int regno, another_regno, hard_regno, another_hard_regno, cost, i, n;
+  lra_copy_t cp, next_cp;
+  bitmap_iterator bi;
+
+  n = 0;
+  EXECUTE_IF_SET_IN_BITMAP (&lra_inheritance_pseudos, 0, k, bi)
+    if (reg_renumber[k] >= 0 && lra_reg_info[k].nrefs != 0)
+      sorted_pseudos[n++] = k;
+  qsort (sorted_pseudos, n, sizeof (int), pseudo_compare_func);
+  for (i = 0; i < n; i++)
+    {
+      regno = sorted_pseudos[i];
+      hard_regno = reg_renumber[regno];
+      gcc_assert (hard_regno >= 0);
+      for (cp = lra_reg_info[regno].copies; cp != NULL; cp = next_cp)
+	{
+	  if (cp->regno1 == regno)
+	    {
+	      next_cp = cp->regno1_next;
+	      another_regno = cp->regno2;
+	    }
+	  else if (cp->regno2 == regno)
+	    {
+	      next_cp = cp->regno2_next;
+	      another_regno = cp->regno1;
+	    }
+	  else
+	    gcc_unreachable ();
+	  if ((another_hard_regno = reg_renumber[another_regno]) >= 0
+	      && another_hard_regno != hard_regno)
+	    {
+	      if (lra_dump_file != NULL)
+		fprintf (lra_dump_file,
+			 "    Improving inheritance for %d(%d) and %d(%d)...\n",
+			 regno, hard_regno, another_regno, another_hard_regno);
+	      update_lives (another_regno, true);
+	      lra_setup_reg_renumber (another_regno, -1, false);
+	      if (hard_regno
+		  == find_hard_regno_for (another_regno, &cost, hard_regno))
+		assign_hard_regno (hard_regno, another_regno);
+	      else
+		assign_hard_regno (another_hard_regno, another_regno);
+	    }
+	}
+    }
+}
+
+
 /* Bitmap finaly containing all pseudos spilled on this assignment
    pass.  */
 static bitmap_head all_spilled_pseudos;
@@ -935,8 +1017,9 @@  assign_by_spills (void)
 		     ORIGINAL_REGNO (regno_reg_rtx[regno]),
 		     lra_reg_info[regno].freq, regno_assign_info[regno].first,
 		     regno_assign_info[regno_assign_info[regno].first].freq);
-	  hard_regno = find_hard_regno_for (regno, &cost);
-	  if (hard_regno < 0 && ! bitmap_bit_p (&lra_inheritance_pseudos, regno))
+	  hard_regno = find_hard_regno_for (regno, &cost, -1);
+	  if (hard_regno < 0
+	      && ! bitmap_bit_p (&lra_inheritance_pseudos, regno))
 	    hard_regno = spill_for (regno, &all_spilled_pseudos);
 	  if (hard_regno < 0)
 	    {
@@ -999,6 +1082,7 @@  assign_by_spills (void)
 	  }
       n = nfails;
     }
+  improve_inheritance ();
   bitmap_clear (&changed_insns);
   /* Inheritance pseudo can be assigned and after that spilled.  We
      should look at the final result.  */
@@ -1021,7 +1105,7 @@  assign_by_spills (void)
   for (i = 0; i < n; i++)
     {
       regno = sorted_pseudos[i];
-      hard_regno = find_hard_regno_for (regno, &cost);
+      hard_regno = find_hard_regno_for (regno, &cost, -1);
       if (hard_regno >= 0)
 	{
 	  bitmap_set_bit (&changed_pseudo_bitmap, regno);
@@ -1072,9 +1156,9 @@  lra_assign (void)
   setup_live_pseudos_and_spill_after_equiv_moves (&all_spilled_pseudos);
   /* Setup insns to process.  */
   bitmap_initialize (&changed_pseudo_bitmap, &reg_obstack);
-  init_live_reload_pseudos ();
+  init_live_reload_and_inheritance_pseudos ();
   assign_by_spills ();
-  finish_live_reload_pseudos ();
+  finish_live_reload_and_inheritance_pseudos ();
   bitmap_ior_into (&changed_pseudo_bitmap, &all_spilled_pseudos);
   no_spills_p = true;
   EXECUTE_IF_SET_IN_BITMAP (&all_spilled_pseudos, 0, u, bi)
Index: lra-int.h
===================================================================
--- lra-int.h	(revision 179932)
+++ lra-int.h	(working copy)
@@ -288,6 +288,7 @@  extern rtx lra_secondary_memory[NUM_MACH
 extern int lra_constraint_offset (int, enum machine_mode);
 
 extern int lra_constraint_iter;
+extern bool lra_risky_equiv_subst_p;
 extern int lra_inheritance_iter;
 extern int lra_undo_inheritance_iter;
 extern bool lra_constraints (bool);
Index: lra.c
===================================================================
--- lra.c	(revision 179932)
+++ lra.c	(working copy)
@@ -2154,6 +2154,15 @@  lra (FILE *f)
 	  if (! lra_constraints (lra_constraint_iter == 0)
 	      && (lra_constraint_iter > 1 || ! scratch_p))
 	    break;
+	  /* Constraint transformations may result in that eliminable
+	     hard regs become uneliminable and pseudos which use them
+	     should be spilled.  It is better to do it before pseudo
+	     assignments.
+
+	     For example, rs6000 can make
+	     RS6000_PIC_OFFSET_TABLE_REGNUM uneliminable if we started
+	     to use a constant pool.  */
+	  lra_eliminate (false);
 	  lra_inheritance ();
 	  /* We need live ranges for lra_assign -- so build them.  */
 	  lra_create_live_ranges (true);
@@ -2176,6 +2185,8 @@  lra (FILE *f)
       if (! lra_spill ())
 	break;
       coalesce_skip_p = true;
+      /* Assignment of stack slots changes elimination offsets for
+	 some eliminations.  So update the offsets here.  */
       lra_eliminate (false);
       lra_constraint_new_regno_start = max_reg_num ();
       lra_constraint_new_insn_uid_start = get_max_uid ();
Index: lra.h
===================================================================
--- lra.h	(revision 179932)
+++ lra.h	(working copy)
@@ -20,7 +20,7 @@  You should have received a copy of the G
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-extern void lra_init_elimination (HARD_REG_SET *);
+extern void lra_init_elimination (void);
 extern rtx lra_eliminate_regs (rtx, enum machine_mode, rtx);
 extern void lra (FILE *);
 extern void lra_init_once (void);
Index: lra-eliminations.c
===================================================================
--- lra-eliminations.c	(revision 179932)
+++ lra-eliminations.c	(working copy)
@@ -763,9 +763,7 @@  static void
 eliminate_regs_in_insn (rtx insn, bool replace_p)
 {
   int icode = recog_memoized (insn);
-  rtx old_body = PATTERN (insn);
   rtx old_set = single_set (insn);
-  rtx new_body;
   bool val;
   int i, regno;
   rtx substed_operand[MAX_RECOG_OPERANDS];
@@ -775,7 +773,7 @@  eliminate_regs_in_insn (rtx insn, bool r
   lra_insn_recog_data_t id;
   struct lra_static_insn_data *static_id;
 
-  if (icode < 0 && asm_noperands (old_body) < 0 && ! DEBUG_INSN_P (insn))
+  if (icode < 0 && asm_noperands (PATTERN (insn)) < 0 && ! DEBUG_INSN_P (insn))
     {
       gcc_assert (GET_CODE (PATTERN (insn)) == USE
 		  || GET_CODE (PATTERN (insn)) == CLOBBER
@@ -834,14 +832,6 @@  eliminate_regs_in_insn (rtx insn, bool r
 			    : offset);
 		  src = plus_constant (ep->to_rtx, offset);
 		  
-		  new_body = old_body;
-		  if (0 && ! replace_p)
-		    {
-		      new_body = copy_insn (old_body);
-		      if (REG_NOTES (insn))
-			REG_NOTES (insn) = copy_insn_1 (REG_NOTES (insn));
-		    }
-		  PATTERN (insn) = new_body;
 		  old_set = single_set (insn);
 		  
 		  /* First see if this insn remains valid when we make
@@ -930,14 +920,6 @@  eliminate_regs_in_insn (rtx insn, bool r
 	    {
 	      rtx new_src = plus_constant (to_rtx, offset);
 	      
-	      new_body = old_body;
-	      if (0 && ! replace_p)
-		{
-		  new_body = copy_insn (old_body);
-		  if (REG_NOTES (insn))
-		    REG_NOTES (insn) = copy_insn_1 (REG_NOTES (insn));
-		}
-	      PATTERN (insn) = new_body;
 	      old_set = single_set (insn);
 
 	      /* First see if this insn remains valid when we make the
@@ -1017,26 +999,11 @@  eliminate_regs_in_insn (rtx insn, bool r
 
   if (val)
     {
-      /* If we aren't replacing things permanently and we changed something,
-	 make another copy to ensure that all the RTL is new.  Otherwise
-	 things can go wrong if curr_insn_transform swaps commutative operands
-	 and one is inside RTL that has been copied while the other is not.  */
-      new_body = old_body;
-      if (0 && ! replace_p)
-	{
-	  new_body = copy_insn (old_body);
-	  if (REG_NOTES (insn))
-	    REG_NOTES (insn) = copy_insn_1 (REG_NOTES (insn));
-	}
-      PATTERN (insn) = new_body;
-
-      val = false;
-
-      /* If we had a move insn but now we don't, re-recognize it.  This will
-	 cause spurious re-recognition if the old move had a PARALLEL since
-	 the new one still will, but we can't call single_set without
-	 having put NEW_BODY into the insn and the re-recognition won't
-	 hurt in this rare case.  */
+      /* If we had a move insn but now we don't, re-recognize it.
+	 This will cause spurious re-recognition if the old move had a
+	 PARALLEL since the new one still will, but we can't call
+	 single_set without having put new body into the insn and the
+	 re-recognition won't hurt in this rare case.  */
       id = lra_update_insn_recog_data (insn);
       static_id = id->insn_static_data;
     }
@@ -1075,25 +1042,24 @@  spill_pseudos (HARD_REG_SET set)
 	bitmap_ior_into (&to_process, &lra_reg_info[i].insn_bitmap);
       }
   IOR_HARD_REG_SET (lra_no_alloc_regs, set);
-  bitmap_clear (&to_process);
   for (insn = get_insns (); insn != NULL_RTX; insn = NEXT_INSN (insn))
     if (bitmap_bit_p (&to_process, INSN_UID (insn)))
       {
 	lra_push_insn (insn);
 	lra_set_used_insn_alternative (insn, -1);
       }
+  bitmap_clear (&to_process);
 }
 
 /* Update all offsets and possibility for elimination on eliminable
-   registers.  See if anything that happened changes which
-   eliminations are valid.  If there are no any changes return false.
-   Otherwise, spill pseudos assigned to not eliminable pseudos,
-   restore offsets, and return true.  */
-static bool
-update_reg_eliminate (void)
+   registers.  Spill pseudos assigned to registers which became
+   uneliminable, update LRA_NO_ALLOC_REGS and ELIMINABLE_REG_SET.  Add
+   insns to INSNS_WITH_CHANGED_OFFSETS containing eliminable hard
+   registers whose offsets should be changed.  */
+static void
+update_reg_eliminate (bitmap insns_with_changed_offsets)
 {
   bool prev;
-  bool change_p = false;
   struct elim_table *ep, *ep1;
   HARD_REG_SET temp_hard_reg_set;
 
@@ -1127,7 +1093,6 @@  update_reg_eliminate (void)
 	    fprintf (lra_dump_file,
 		     "  Elimination %d to %d is not possible anymore\n",
 		     ep->from, ep->to);
-	  change_p = true;
 	  /* Mark that is not eliminable anymore.  */
 	  elimination_map[ep->from] = NULL;
 	  for (ep1 = ep + 1; ep1 < &reg_eliminate[NUM_ELIMINABLE_REGS]; ep1++)
@@ -1151,6 +1116,9 @@  update_reg_eliminate (void)
 			 ep->from);
 	      self_elim_offsets[ep->from] = -ep->offset;
 	      SET_HARD_REG_BIT (temp_hard_reg_set, ep->from);
+	      if (ep->offset != 0)
+		bitmap_ior_into (insns_with_changed_offsets,
+				 &lra_reg_info[ep->from].insn_bitmap);
 	    }
 	}
 
@@ -1159,15 +1127,15 @@  update_reg_eliminate (void)
 #else
       INITIAL_FRAME_POINTER_OFFSET (ep->offset);
 #endif
-      if (elimination_map[ep->from] == ep
-	  && ep->previous_offset != ep->offset)
-	change_p = true;
     }
-  if (! change_p)
-    return false;
+  IOR_HARD_REG_SET (lra_no_alloc_regs, temp_hard_reg_set);
+  AND_COMPL_HARD_REG_SET (eliminable_regset, temp_hard_reg_set);
   spill_pseudos (temp_hard_reg_set);
   setup_elimination_map ();
-  return true;
+  for (ep = reg_eliminate; ep < &reg_eliminate[NUM_ELIMINABLE_REGS]; ep++)
+    if (elimination_map[ep->from] == ep && ep->previous_offset != ep->offset)
+      bitmap_ior_into (insns_with_changed_offsets,
+		       &lra_reg_info[ep->from].insn_bitmap);
 }
 
 /* Initialize the table of registers to eliminate.  Pre-condition:
@@ -1176,7 +1144,7 @@  update_reg_eliminate (void)
    used for allocation because their identical elimination is not
    possible.  */
 static void
-init_elim_table (HARD_REG_SET *dont_use_regs)
+init_elim_table (void)
 {
   bool value_p;
   struct elim_table *ep;
@@ -1184,7 +1152,6 @@  init_elim_table (HARD_REG_SET *dont_use_
   const struct elim_table_1 *ep1;
 #endif
 
-  CLEAR_HARD_REG_SET (*dont_use_regs);
   if (!reg_eliminate)
     reg_eliminate = XCNEWVEC (struct elim_table, NUM_ELIMINABLE_REGS);
 
@@ -1204,13 +1171,6 @@  init_elim_table (HARD_REG_SET *dont_use_
 		       && frame_pointer_needed 
 		       && (! SUPPORTS_STACK_ALIGNMENT
 			   || ! stack_realign_fp)));
-      if (value_p && ep->from == ep->to)
-	{
-	  /* We do not support tricks to use elimination to find is
-	     the hard register needed.  */
-	  value_p = false;
-	  SET_HARD_REG_BIT (*dont_use_regs, ep->from);
-	}
       setup_can_eliminate (ep, value_p);
     }
 #else
@@ -1232,22 +1192,19 @@  init_elim_table (HARD_REG_SET *dont_use_
     }
 }
 
-/* Entry function for initializeation of elimination once per
-   function.  Set up hard registers which can not be used in
-   allocation in *DONT_USE_REGS.  */
+/* Entry function for initialization of elimination once per
+   function.  */
 void
-lra_init_elimination (HARD_REG_SET *dont_use_regs)
+lra_init_elimination (void)
 {
   basic_block bb;
   rtx insn;
 
-  init_elim_table (dont_use_regs);
+  init_elim_table ();
   FOR_EACH_BB (bb)
     FOR_BB_INSNS (bb, insn)
     if (NONDEBUG_INSN_P (insn))
       mark_not_eliminable (PATTERN (insn));
-  if (frame_pointer_needed)
-    df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM, true);
   setup_elimination_map ();
 }
 
@@ -1299,17 +1256,35 @@  lra_eliminate (bool final_p)
   int i;
   basic_block bb;
   rtx insn, temp, mem_loc, invariant;
-  bitmap_head to_process;
+  bitmap_head insns_with_changed_offsets;
   struct elim_table *ep;
   int regs_num = max_reg_num ();
 #ifdef SECONDARY_MEMORY_NEEDED
   int mode;
 #endif
 
+  bitmap_initialize (&insns_with_changed_offsets, &reg_obstack);
   if (final_p)
-    gcc_assert (! update_reg_eliminate ());
-  else if (! update_reg_eliminate ())
-    return;
+    {
+#ifdef ENABLE_CHECKING
+      update_reg_eliminate (&insns_with_changed_offsets);
+      if (! bitmap_empty_p (&insns_with_changed_offsets))
+	gcc_unreachable ();
+#endif
+      /* We change eliminable hard registers in insns so we should do
+	 this for all insns containing any eliminable hard
+	 register.  */
+      for (ep = reg_eliminate; ep < &reg_eliminate[NUM_ELIMINABLE_REGS]; ep++)
+	if (elimination_map[ep->from] != NULL)
+	  bitmap_ior_into (&insns_with_changed_offsets,
+			   &lra_reg_info[ep->from].insn_bitmap);
+    }
+  else
+    {
+      update_reg_eliminate (&insns_with_changed_offsets);
+      if (bitmap_empty_p (&insns_with_changed_offsets))
+	return;
+    }
   if (lra_dump_file != NULL)
     {
       fprintf (lra_dump_file, "New elimination table:\n");
@@ -1339,15 +1314,11 @@  lra_eliminate (bool final_p)
 	= lra_eliminate_regs_1 (lra_secondary_memory[mode],
 				VOIDmode, final_p, ! final_p, false);
 #endif
-  bitmap_initialize (&to_process, &reg_obstack);
-  for (ep = reg_eliminate; ep < &reg_eliminate[NUM_ELIMINABLE_REGS]; ep++)
-    if (elimination_map[ep->from] != NULL)
-      bitmap_ior_into (&to_process, &lra_reg_info[ep->from].insn_bitmap);
   FOR_EACH_BB (bb)
     FOR_BB_INSNS_SAFE (bb, insn, temp)
       {
-	if (bitmap_bit_p (&to_process, INSN_UID (insn)))
+	if (bitmap_bit_p (&insns_with_changed_offsets, INSN_UID (insn)))
 	  process_insn_for_elimination (insn, final_p);
       }
-  bitmap_clear (&to_process);
+  bitmap_clear (&insns_with_changed_offsets);
 }
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 179885)
+++ ChangeLog	(working copy)
@@ -1,3 +1,67 @@ 
+2011-10-13  Vladimir Makarov  <vmakarov@redhat.com>
+
+	* lra-assign.c (process_copy_to_form_allocno): Rename to
+	process_copy_to_form_thread.
+	(conflict_reload_pseudos): Rename to
+	conflict_reload_and_inheritance_pseudos.
+	(live_reload_pseudos): Rename to
+	live_reload_and_inheritance_pseudos.
+	(init_live_reload_pseudos): Rename to
+	init_live_reload_and_inheritance_pseudos.
+	(finish_live_reload_and_inheritance_pseudos): Rename to
+	finish_live_reload_pseudos.
+	(find_hard_regno_for): Add new argument try_only_hard_regno.  Use
+	live_pseudos_reg_renumber instead of reg_renumber.  Check
+	live_pseudos_reg_renumber when adding to
+	conflict_reload_and_inheritance_pseudos.  Process
+	preferred_hard_regno2 only if preferred_hard_regno1 is
+	non-negative.
+	(setup_try_hard_regno_pseudos): Use live_pseudos_reg_renumber
+	instead of reg_renumber.
+	(spill_for): Ditto.  Pass new parameter to find_hard_regno_for.
+	(assign_temporarily): Don't change reg_renumber.
+	(setup_live_pseudos_and_spill_after_equiv_moves): Only call
+	update_lives if lra_risky_equiv_subst_p is false.
+	(improve_inheritance): New function.
+	(assign_by_spills): Call improve_inheritance.
+
+	* lra.c (lra): Call lra_eliminate after lra_constraints.
+
+	* lra-constraints.c (contains_pseudo_p): Rename to contains_reg_p.
+	Add a new argument.  Modify it for new semantics.
+	(lra_risky_equiv_subst_p): New global var.
+	(lra_constraints): Set up lra_risky_equiv_subst_p.  Compare
+	get_equiv_substitution with right value.
+
+	* lra-eliminations.c (eliminate_regs_in_insn): Remove a dead
+	code.
+	(spill_pseudos): Clear to_process after its processing.
+	(update_reg_eliminate): Add a new argument.  Set it up.  Modify
+	lra_no_alloc_regs and eliminable_regset.
+	(init_elim_table): Remove the argument.  Permit elimination from a
+	register to itself.
+	(lra_init_elimination): Add a new argument.  Set it up.  Don't set
+	up liveness fo HARD_FRAME_POINTER_REGNUM.
+	(lra_eliminate): Rename to_process to insns_with_changed_offsets.
+	Call update_reg_eliminate with insns_with_changed_offsets.
+	Restructure code.  Add additional assertion on
+	insns_with_changed_offsets.
+
+	* lra.h (lra_init_elimination): New argument.
+
+	* lra-int.h (lra_risky_equiv_subst_p): New flag declaration.
+
+	* ira.c (ira_setup_eliminable_regset): Add a new argument.  Set
+	up liveness fo HARD_FRAME_POINTER_REGNUM if necessary.
+	(ira): Call ira_setup_eliminable_regset with a new parameter.
+
+	* ira.h (ira_setup_eliminable_regset): New argument.
+
+	* loop-invariant.c (calculate_loop_reg_pressure): Call
+	ira_setup_eliminable_regset with a new parameter.
+
+	* haifa-sched.c (sched_init): Ditto.
+	
 2011-09-29  Vladimir Makarov  <vmakarov@redhat.com>
 
 	* lra-int.h (lra_get_copy): New prototype.
Index: haifa-sched.c
===================================================================
--- haifa-sched.c	(revision 179932)
+++ haifa-sched.c	(working copy)
@@ -4440,7 +4440,7 @@  sched_init (void)
 		      && common_sched_info->sched_pass_id == SCHED_RGN_PASS);
 
   if (sched_pressure_p)
-    ira_setup_eliminable_regset ();
+    ira_setup_eliminable_regset (false);
 
   /* Initialize SPEC_INFO.  */
   if (targetm.sched.set_sched_flags)
Index: lra-constraints.c
===================================================================
--- lra-constraints.c	(revision 179932)
+++ lra-constraints.c	(working copy)
@@ -3165,9 +3165,10 @@  in_list_p (rtx x, rtx list)
   return false;
 }
 
-/* Return true if X contains a (spilled if SPILLED_P) pseudo.  */
+/* Return true if X contains an allocatable hard register (if
+   HARD_REG_P) or a (spilled if SPILLED_P) pseudo.  */
 static bool
-contains_pseudo_p (rtx x, bool spilled_p)
+contains_reg_p (rtx x, bool hard_reg_p, bool spilled_p)
 {
   int i, j;
   const char *fmt;
@@ -3176,24 +3177,40 @@  contains_pseudo_p (rtx x, bool spilled_p
   code = GET_CODE (x);
   if (REG_P (x))
     {
-      if (REGNO (x) < FIRST_PSEUDO_REGISTER)
-	return false;
-      if (! spilled_p)
-	return true;
-      return lra_get_regno_hard_regno (REGNO (x)) < 0;
+      int regno = REGNO (x);
+      HARD_REG_SET alloc_regs;
+
+      if (hard_reg_p)
+	{
+	  if (regno >= FIRST_PSEUDO_REGISTER)
+	    regno = lra_get_regno_hard_regno (regno);
+	  if (regno < 0)
+	    return false;
+	  COMPL_HARD_REG_SET (alloc_regs, lra_no_alloc_regs);
+	  return lra_hard_reg_set_intersection_p (regno, GET_MODE (x),
+						  alloc_regs);
+	}
+      else
+	{
+	  if (regno < FIRST_PSEUDO_REGISTER)
+	    return false;
+	  if (! spilled_p)
+	    return true;
+	  return lra_get_regno_hard_regno (regno) < 0;
+	}
     }
   fmt = GET_RTX_FORMAT (code);
   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
     {
       if (fmt[i] == 'e')
 	{
-	  if (contains_pseudo_p (XEXP (x, i), spilled_p))
+	  if (contains_reg_p (XEXP (x, i), hard_reg_p, spilled_p))
 	    return true;
 	}
       else if (fmt[i] == 'E')
 	{
 	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
-	    if (contains_pseudo_p (XVECEXP (x, i, j), spilled_p))
+	    if (contains_reg_p (XVECEXP (x, i, j), hard_reg_p, spilled_p))
 	      return true;
 	}
     }
@@ -3255,6 +3272,11 @@  debug_loc_equivalence_change_p (rtx *loc
 /* The current iteration number of this LRA pass.  */
 int lra_constraint_iter;
 
+/* True if we substituted equiv which needs checking register
+   allocation correctness because the equivalent value contains
+   allocatiable hard registers.  */
+bool lra_risky_equiv_subst_p;
+
 /* Entry function of LRA constraint pass.  Return true if the
    constraint pass did change the code.  */
 bool
@@ -3275,6 +3297,7 @@  lra_constraints (bool first_p)
       ("Maximum number of LRA constraint passes is achieved (%d)\n",
        MAX_CONSTRAINT_ITERATION_NUMBER);
   changed_p = false;
+  lra_risky_equiv_subst_p = false;
   new_insn_uid_start = get_max_uid ();
   new_regno_start = first_p ? lra_constraint_new_regno_start : max_reg_num ();
   for (i = FIRST_PSEUDO_REGISTER; i < new_regno_start; i++)
@@ -3291,7 +3314,7 @@  lra_constraints (bool first_p)
 	  }
 	else if ((x = get_equiv_substitution (regno_reg_rtx[i])) != NULL_RTX)
 	  {
-	    if (! first_p && contains_pseudo_p (x, false))
+	    if (! first_p && contains_reg_p (x, false, false))
 	      /* After RTL transformation, we can not guarantee that
 		 pseudo in the susbtitution was not reloaded which
 		 might make equivalence invalid.  For example, in
@@ -3304,7 +3327,7 @@  lra_constraints (bool first_p)
 		 the memory address register was reloaded before the
 		 2nd insn.  */
 	      ira_reg_equiv[i].defined_p = false;
-	    if (contains_pseudo_p (x, true))
+	    if (contains_reg_p (x, false, true))
 	      ira_reg_equiv[i].profitable_p = false;
 	  }
       }
@@ -3371,7 +3394,7 @@  lra_constraints (bool first_p)
 		       || in_list_p (curr_insn,
 				     ira_reg_equiv
 				     [REGNO (dest_reg)].init_insns)))
-		  || (SET_SRC (set) != get_equiv_substitution (SET_SRC (set))
+		  || ((x = get_equiv_substitution (SET_SRC (set))) != SET_SRC (set)
 		      && in_list_p (curr_insn,
 				    ira_reg_equiv
 				    [REGNO (SET_SRC (set))].init_insns)))
@@ -3386,6 +3409,8 @@  lra_constraints (bool first_p)
 			       BLOCK_FOR_INSN (curr_insn)->frequency);
 		      print_rtl_slim (lra_dump_file, curr_insn, curr_insn, -1, 0);
 		    }
+		  if (contains_reg_p (x, true, false))
+		    lra_risky_equiv_subst_p = true;
 		  lra_set_insn_deleted (curr_insn);
 		  continue;
 		}
Index: loop-invariant.c
===================================================================
--- loop-invariant.c	(revision 179932)
+++ loop-invariant.c	(working copy)
@@ -1806,7 +1806,7 @@  calculate_loop_reg_pressure (void)
 	bitmap_initialize (&LOOP_DATA (loop)->regs_ref, &reg_obstack);
 	bitmap_initialize (&LOOP_DATA (loop)->regs_live, &reg_obstack);
       }
-  ira_setup_eliminable_regset ();
+  ira_setup_eliminable_regset (false);
   bitmap_initialize (&curr_regs_live, &reg_obstack);
   FOR_EACH_BB (bb)
     {
Index: ira.c
===================================================================
--- ira.c	(revision 179932)
+++ ira.c	(working copy)
@@ -1797,11 +1797,12 @@  compute_regs_asm_clobbered (void)
 }
 
 
-/* Set up ELIMINABLE_REGSET, IRA_NO_ALLOC_REGS, and REGS_EVER_LIVE.  */
+/* Set up ELIMINABLE_REGSET, IRA_NO_ALLOC_REGS, and REGS_EVER_LIVE.
+   If the function is called from IRA (not from the insn scheduler or
+   RTL loop invariant motion), FROM_IRA_P is true.  */
 void
-ira_setup_eliminable_regset (void)
+ira_setup_eliminable_regset (bool from_ira_p)
 {
-  HARD_REG_SET dont_use_regs;
 #ifdef ELIMINABLE_REGS
   int i;
   static const struct {const int from, to; } eliminables[] = ELIMINABLE_REGS;
@@ -1820,13 +1821,15 @@  ira_setup_eliminable_regset (void)
        || crtl->stack_realign_needed
        || targetm.frame_pointer_required ());
 
-  if (flag_lra)
-    lra_init_elimination (&dont_use_regs);
-  else
-    CLEAR_HARD_REG_SET (dont_use_regs);
+  if (from_ira_p && flag_lra)
+    /* It can change FRAME_POINTER_NEEDED.  We call it only from IRA
+       because it is expensive.  */
+    lra_init_elimination ();
 
+  if (frame_pointer_needed)
+    df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM, true);
+    
   COPY_HARD_REG_SET (ira_no_alloc_regs, no_unit_alloc_regs);
-  IOR_HARD_REG_SET (ira_no_alloc_regs, dont_use_regs);
   CLEAR_HARD_REG_SET (eliminable_regset);
 
   compute_regs_asm_clobbered ();
@@ -3803,7 +3806,7 @@  ira (FILE *f)
     }
 
   max_regno_before_ira = allocated_reg_info_size = max_reg_num ();
-  ira_setup_eliminable_regset ();
+  ira_setup_eliminable_regset (true);
 
   ira_overall_cost = ira_reg_cost = ira_mem_cost = 0;
   ira_load_cost = ira_store_cost = ira_shuffle_cost = 0;
Index: ira.h
===================================================================
--- ira.h	(revision 179932)
+++ ira.h	(working copy)
@@ -175,7 +175,7 @@  extern struct ira_reg_equiv *ira_reg_equ
 extern void ira_init_once (void);
 extern void ira_init (void);
 extern void ira_finish_once (void);
-extern void ira_setup_eliminable_regset (void);
+extern void ira_setup_eliminable_regset (bool);
 extern rtx ira_eliminate_regs (rtx, enum machine_mode);
 extern void ira_set_pseudo_classes (FILE *);
 extern void ira_implicitly_set_insn_hard_regs (HARD_REG_SET *);