diff mbox

patch to fix PR69847

Message ID 57A0C5BB.1070104@redhat.com
State New
Headers show

Commit Message

Vladimir Makarov Aug. 2, 2016, 4:09 p.m. UTC
The following patch is for

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69847

   The patch implements an invariant inheritance and some hard reg 
assignment improvements in LRA.  Actually I tried several approaches to 
implement this.  This patch is what worked best (a compromise between 
complexity and performance results).

   The patch was successfully tested and bootstrapped on x86-64 and 
ppc64.  As the patch changes a complicated code (inheritance code) in 
LRA, there is non-zero probability of regressions but I will work to fix 
them if/when they are reported.

   Committed as rev. 238991.

Comments

H.J. Lu Aug. 2, 2016, 6:27 p.m. UTC | #1
On Tue, Aug 2, 2016 at 9:09 AM, Vladimir Makarov <vmakarov@redhat.com> wrote:
>   The following patch is for
>
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69847
>
>   The patch implements an invariant inheritance and some hard reg assignment
> improvements in LRA.  Actually I tried several approaches to implement this.
> This patch is what worked best (a compromise between complexity and
> performance results).
>
>   The patch was successfully tested and bootstrapped on x86-64 and ppc64.
> As the patch changes a complicated code (inheritance code) in LRA, there is
> non-zero probability of regressions but I will work to fix them if/when they
> are reported.
>
>   Committed as rev. 238991.

This caused:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72778
diff mbox

Patch

2016-08-02  Vladimir Makarov  <vmakarov@redhat.com>

	PR rtl-optimization/69847
	* lra-int.h (struct lra-reg): Use restore_rtx instead of
	restore_regno.
	(lra_rtx_hash): New.
	* lra.c (initialize_lra_reg_info_element): Use restore_rtx instead
	of restore_regno.
	(lra_rtx_hash): Rename and move lra-remat.c::rtx_hash.
	* lra-remat.c (rtx_hash): Rename and Move to lra.c.
	* lra-spills.c (lra_final_code_change): Don't delete insn when the
	next insn is USE with the same reg as the current insn source.
	* lra-constraints.c (curr_insn_transform): Use restore_rtx instead
	of restore_regno.
	(lra_constraints_init): Call initiate_invariants.
	(lra_constraints_finish): Call finish_invariants.
	(struct invariant, invariant_t, invariant_ptr_t): New.
	(const_invariant_ptr_t, invariants, invariants_pool): New.
	(invariant_table, invariant_hash, invariant_eq_p): New.
	(insert_invariant, initiate_invariants, finish_invariants): New.
	(clear_invariants, invalid_invariant_regs): New.
	(inherit_reload_reg, split_reg, fix_bb_live_info): Use restore_rtx
	instead of restore_regno.
	(invariant_p, process_invariant_for_inheritance): New.
	(inherit_in_ebb): Implement invariant inheritance.
	(lra_inheritance): Initialize and finalize invalid_invariant_regs.
	(remove_inheritance_pseudos): Implement undoing invariant
	inheritance.
	(undo_optional_reloads, lra_undo_inheritance): Use restore_rtx
	instead of restore_regno.
	* lra-assigns.c (regno_live_length): New.
	(reload_pseudo_compare_func): Use regno_live_length.
	(assign_by_spills): Use restore_rtx instead of restore_regno.
	(lra_assign): Ditto.  Initiate regno_live_length.

Index: lra-int.h
===================================================================
--- lra-int.h	(revision 238990)
+++ lra-int.h	(working copy)
@@ -99,9 +99,9 @@  struct lra_reg
      *non-debug* insns.	 */
   int nrefs, freq;
   int last_reload;
-  /* Regno used to undo the inheritance.  It can be non-zero only
-     between couple of inheritance and undo inheritance passes.	 */
-  int restore_regno;
+  /* rtx used to undo the inheritance.  It can be non-null only
+     between subsequent inheritance and undo inheritance passes.  */
+  rtx restore_rtx;
   /* Value holding by register.	 If the pseudos have the same value
      they do not conflict.  */
   int val;
@@ -285,6 +285,7 @@  extern lra_insn_recog_data_t *lra_insn_r
 extern int lra_curr_reload_num;
 
 extern void lra_dump_bitmap_with_title (const char *, bitmap, int);
+extern hashval_t lra_rtx_hash (rtx x);
 extern void lra_push_insn (rtx_insn *);
 extern void lra_push_insn_by_uid (unsigned int);
 extern void lra_push_insn_and_update_insn_regno_info (rtx_insn *);
Index: lra-assigns.c
===================================================================
--- lra-assigns.c	(revision 238990)
+++ lra-assigns.c	(working copy)
@@ -107,6 +107,10 @@  static bool former_reload_pseudo_spill_p
    lra_get_allocno_class.  It is used to speed up the code.  */
 static enum reg_class *regno_allocno_class_array;
 
+/* Array containing lengths of pseudo live ranges.  It is used to
+   speed up the code.  */
+static int *regno_live_length;
+
 /* Information about the thread to which a pseudo belongs.  Threads are
    a set of connected reload and inheritance pseudos with the same set of
    available hard registers.  Lone registers belong to their own threads.  */
@@ -227,6 +231,11 @@  reload_pseudo_compare_func (const void *
   /* Put pseudos from the thread nearby.  */
   if ((diff = regno_assign_info[r1].first - regno_assign_info[r2].first) != 0)
     return diff;
+  /* Prefer pseudos with longer live ranges.  It sets up better
+     prefered hard registers for the thread pseudos and decreases
+     register-register moves between the thread pseudos.  */
+  if ((diff = regno_live_length[r2] - regno_live_length[r1]) != 0)
+    return diff;
   /* If regs are equally good, sort by their numbers, so that the
      results of qsort leave nothing to chance.	*/
   return r1 - r2;
@@ -1300,7 +1309,8 @@  find_all_spills_for (int regno)
 static void
 assign_by_spills (void)
 {
-  int i, n, nfails, iter, regno, hard_regno, cost, restore_regno;
+  int i, n, nfails, iter, regno, hard_regno, cost;
+  rtx restore_rtx;
   rtx_insn *insn;
   bitmap_head changed_insns, do_not_assign_nonreload_pseudos;
   unsigned int u, conflict_regno;
@@ -1333,6 +1343,8 @@  assign_by_spills (void)
       for (i = 0; i < n; i++)
 	{
 	  regno = sorted_pseudos[i];
+	  if (reg_renumber[regno] >= 0)
+	    continue;
 	  if (lra_dump_file != NULL)
 	    fprintf (lra_dump_file, "	 Assigning to %d "
 		     "(cl=%s, orig=%d, freq=%d, tfirst=%d, tfreq=%d)...\n",
@@ -1471,21 +1483,25 @@  assign_by_spills (void)
 	 such inheritance or split pseudos.  */
       bitmap_initialize (&do_not_assign_nonreload_pseudos, &reg_obstack);
       EXECUTE_IF_SET_IN_BITMAP (&lra_inheritance_pseudos, 0, u, bi)
-	if ((restore_regno = lra_reg_info[u].restore_regno) >= 0
+	if ((restore_rtx = lra_reg_info[u].restore_rtx) != NULL_RTX
+	    && REG_P (restore_rtx)
 	    && reg_renumber[u] < 0
 	    && bitmap_bit_p (&lra_inheritance_pseudos, u))
-	  bitmap_set_bit (&do_not_assign_nonreload_pseudos, restore_regno);
+	  bitmap_set_bit (&do_not_assign_nonreload_pseudos, REGNO (restore_rtx));
       EXECUTE_IF_SET_IN_BITMAP (&lra_split_regs, 0, u, bi)
-	if ((restore_regno = lra_reg_info[u].restore_regno) >= 0
+	if ((restore_rtx = lra_reg_info[u].restore_rtx) != NULL_RTX
 	    && reg_renumber[u] >= 0)
-	  bitmap_set_bit (&do_not_assign_nonreload_pseudos, restore_regno);
+	  {
+	    lra_assert (REG_P (restore_rtx));
+	    bitmap_set_bit (&do_not_assign_nonreload_pseudos, REGNO (restore_rtx));
+	  }
       for (n = 0, i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
 	if (((i < lra_constraint_new_regno_start
 	      && ! bitmap_bit_p (&do_not_assign_nonreload_pseudos, i))
 	     || (bitmap_bit_p (&lra_inheritance_pseudos, i)
-		 && lra_reg_info[i].restore_regno >= 0)
+		 && lra_reg_info[i].restore_rtx != NULL_RTX)
 	     || (bitmap_bit_p (&lra_split_regs, i)
-		 && lra_reg_info[i].restore_regno >= 0)
+		 && lra_reg_info[i].restore_rtx != NULL_RTX)
 	     || bitmap_bit_p (&lra_subreg_reload_pseudos, i)
 	     || bitmap_bit_p (&lra_optional_reload_pseudos, i))
 	    && reg_renumber[i] < 0 && lra_reg_info[i].nrefs != 0
@@ -1513,7 +1529,7 @@  assign_by_spills (void)
 	      enum reg_class spill_class;
 	      
 	      if (targetm.spill_class == NULL
-		  || lra_reg_info[regno].restore_regno < 0
+		  || lra_reg_info[regno].restore_rtx == NULL_RTX
 		  || ! bitmap_bit_p (&lra_inheritance_pseudos, regno)
 		  || (spill_class
 		      = ((enum reg_class)
@@ -1570,8 +1586,17 @@  lra_assign (void)
   sorted_pseudos = XNEWVEC (int, max_regno);
   sorted_reload_pseudos = XNEWVEC (int, max_regno);
   regno_allocno_class_array = XNEWVEC (enum reg_class, max_regno);
+  regno_live_length = XNEWVEC (int, max_regno);
   for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
-    regno_allocno_class_array[i] = lra_get_allocno_class (i);
+    {
+      int l;
+      lra_live_range_t r;
+
+      regno_allocno_class_array[i] = lra_get_allocno_class (i);
+      for (l = 0, r = lra_reg_info[i].live_ranges; r != NULL; r = r->next)
+	l  += r->finish - r->start + 1;
+      regno_live_length[i] = l;
+    }
   former_reload_pseudo_spill_p = false;
   init_regno_assign_info ();
   bitmap_initialize (&all_spilled_pseudos, &reg_obstack);
@@ -1594,7 +1619,7 @@  lra_assign (void)
   EXECUTE_IF_SET_IN_BITMAP (&all_spilled_pseudos, 0, u, bi)
     /* We ignore spilled pseudos created on last inheritance pass
        because they will be removed.  */
-    if (lra_reg_info[u].restore_regno < 0)
+    if (lra_reg_info[u].restore_rtx == NULL_RTX)
       {
 	no_spills_p = false;
 	break;
@@ -1613,6 +1638,7 @@  lra_assign (void)
     }
   bitmap_clear (&insns_to_process);
   finish_regno_assign_info ();
+  free (regno_live_length);
   free (regno_allocno_class_array);
   free (sorted_pseudos);
   free (sorted_reload_pseudos);
@@ -1631,3 +1657,4 @@  lra_assign (void)
        LRA_MAX_ASSIGNMENT_ITERATION_NUMBER);
   return no_spills_p;
 }
+
Index: lra.c
===================================================================
--- lra.c	(revision 238990)
+++ lra.c	(working copy)
@@ -1286,7 +1286,7 @@  initialize_lra_reg_info_element (int i)
   lra_reg_info[i].live_ranges = NULL;
   lra_reg_info[i].nrefs = lra_reg_info[i].freq = 0;
   lra_reg_info[i].last_reload = 0;
-  lra_reg_info[i].restore_regno = -1;
+  lra_reg_info[i].restore_rtx = NULL_RTX;
   lra_reg_info[i].val = get_new_reg_value ();
   lra_reg_info[i].offset = 0;
   lra_reg_info[i].copies = NULL;
@@ -1620,6 +1620,92 @@  lra_get_insn_regs (int uid)
 }
 
 
+
+/* Recursive hash function for RTL X.  */
+hashval_t
+lra_rtx_hash (rtx x)
+{
+  int i, j;
+  enum rtx_code code;
+  const char *fmt;
+  hashval_t val = 0;
+
+  if (x == 0)
+    return val;
+
+  code = GET_CODE (x);
+  val += (int) code + 4095;
+
+  /* Some RTL can be compared nonrecursively.  */
+  switch (code)
+    {
+    case REG:
+      return val + REGNO (x);
+
+    case LABEL_REF:
+      return iterative_hash_object (XEXP (x, 0), val);
+
+    case SYMBOL_REF:
+      return iterative_hash_object (XSTR (x, 0), val);
+
+    case SCRATCH:
+    case CONST_DOUBLE:
+    case CONST_INT:
+    case CONST_VECTOR:
+      return val;
+
+    default:
+      break;
+    }
+
+  /* Hash the elements.  */
+  fmt = GET_RTX_FORMAT (code);
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    {
+      switch (fmt[i])
+	{
+	case 'w':
+	  val += XWINT (x, i);
+	  break;
+
+	case 'n':
+	case 'i':
+	  val += XINT (x, i);
+	  break;
+
+	case 'V':
+	case 'E':
+	  val += XVECLEN (x, i);
+
+	  for (j = 0; j < XVECLEN (x, i); j++)
+	    val += lra_rtx_hash (XVECEXP (x, i, j));
+	  break;
+
+	case 'e':
+	  val += lra_rtx_hash (XEXP (x, i));
+	  break;
+
+	case 'S':
+	case 's':
+	  val += htab_hash_string (XSTR (x, i));
+	  break;
+
+	case 'u':
+	case '0':
+	case 't':
+	  break;
+
+	  /* It is believed that rtx's at this level will never
+	     contain anything but integers and other rtx's, except for
+	     within LABEL_REFs and SYMBOL_REFs.  */
+	default:
+	  abort ();
+	}
+    }
+  return val;
+}
+
+
 
 /* This page contains code dealing with stack of the insns which
    should be processed by the next constraint pass.  */
Index: lra-constraints.c
===================================================================
--- lra-constraints.c	(revision 238990)
+++ lra-constraints.c	(working copy)
@@ -4105,14 +4105,16 @@  curr_insn_transform (bool check_only_p)
 
       if (optional_p)
 	{
-	  lra_assert (REG_P (op));
-	  regno = REGNO (op);
+	  rtx reg = op;
+
+	  lra_assert (REG_P (reg));
+	  regno = REGNO (reg);
 	  op = *curr_id->operand_loc[i]; /* Substitution.  */
 	  if (GET_CODE (op) == SUBREG)
 	    op = SUBREG_REG (op);
 	  gcc_assert (REG_P (op) && (int) REGNO (op) >= new_regno_start);
 	  bitmap_set_bit (&lra_optional_reload_pseudos, REGNO (op));
-	  lra_reg_info[REGNO (op)].restore_regno = regno;
+	  lra_reg_info[REGNO (op)].restore_rtx = reg;
 	  if (lra_dump_file != NULL)
 	    fprintf (lra_dump_file,
 		     "      Making reload reg %d for reg %d optional\n",
@@ -4646,11 +4648,15 @@  lra_constraints (bool first_p)
   return changed_p;
 }
 
+static void initiate_invariants (void);
+static void finish_invariants (void);
+
 /* Initiate the LRA constraint pass.  It is done once per
    function.  */
 void
 lra_constraints_init (void)
 {
+  initiate_invariants ();
 }
 
 /* Finalize the LRA constraint pass.  It is done once per
@@ -4658,6 +4664,100 @@  lra_constraints_init (void)
 void
 lra_constraints_finish (void)
 {
+  finish_invariants ();
+}
+
+
+
+/* Structure describes invariants for ineheritance.  */
+struct invariant
+{
+  /* The order number of the invariant.  */
+  int num;
+  /* The invariant RTX.  */
+  rtx invariant_rtx;
+  /* The origin insn of the invariant.  */
+  rtx_insn *insn;
+};
+
+typedef struct invariant invariant_t;
+typedef invariant_t *invariant_ptr_t;
+typedef const invariant_t *const_invariant_ptr_t;
+
+/* Pointer to the inheritance invariants.  */
+static vec<invariant_ptr_t> invariants;
+
+/* Allocation pool for the invariants.  */
+static object_allocator<struct invariant> *invariants_pool;
+
+/* Hash table for the invariants.  */
+static htab_t invariant_table;
+
+/* Hash function for INVARIANT.  */
+static hashval_t
+invariant_hash (const void *invariant)
+{
+  rtx inv = ((const_invariant_ptr_t) invariant)->invariant_rtx;
+  return lra_rtx_hash (inv);
+}
+
+/* Equal function for invariants INVARIANT1 and INVARIANT2.  */
+static int
+invariant_eq_p (const void *invariant1, const void *invariant2)
+{
+  rtx inv1 = ((const_invariant_ptr_t) invariant1)->invariant_rtx;
+  rtx inv2 = ((const_invariant_ptr_t) invariant2)->invariant_rtx;
+
+  return rtx_equal_p (inv1, inv2);
+}
+
+/* Insert INVARIANT_RTX into the table if it is not there yet.  Return
+   invariant which is in the table.  */
+static invariant_ptr_t
+insert_invariant (rtx invariant_rtx)
+{
+  void **entry_ptr;
+  invariant_t invariant;
+  invariant_ptr_t invariant_ptr;
+
+  invariant.invariant_rtx = invariant_rtx;
+  entry_ptr = htab_find_slot (invariant_table, &invariant, INSERT);
+  if (*entry_ptr == NULL)
+    {
+      invariant_ptr = invariants_pool->allocate ();
+      invariant_ptr->invariant_rtx = invariant_rtx;
+      invariant_ptr->insn = NULL;
+      invariants.safe_push (invariant_ptr);
+      *entry_ptr = (void *) invariant_ptr;
+    }
+  return (invariant_ptr_t) *entry_ptr;
+}
+
+/* Initiate the invariant table.  */
+static void
+initiate_invariants (void)
+{
+  invariants.create (100);
+  invariants_pool = new object_allocator<struct invariant> ("Inheritance invariants");
+  invariant_table = htab_create (100, invariant_hash, invariant_eq_p, NULL);
+}
+
+/* Finish the invariant table.  */
+static void
+finish_invariants (void)
+{
+  htab_delete (invariant_table);
+  delete invariants_pool;
+  invariants.release ();
+}
+
+/* Make the invariant table empty.  */
+static void
+clear_invariants (void)
+{
+  htab_empty (invariant_table);
+  invariants_pool->release ();
+  invariants.truncate (0);
 }
 
 
@@ -4789,6 +4889,10 @@  check_secondary_memory_needed_p (enum re
    (inheritance/split pseudos and original registers).	*/
 static bitmap_head check_only_regs;
 
+/* Reload pseudos can not be involded in invariant inheritance in the
+   current EBB.  */
+static bitmap_head invalid_invariant_regs;
+
 /* Do inheritance transformations for insn INSN, which defines (if
    DEF_P) or uses ORIGINAL_REGNO.  NEXT_USAGE_INSNS specifies which
    instruction in the EBB next uses ORIGINAL_REGNO; it has the same
@@ -4910,7 +5014,7 @@  inherit_reload_reg (bool def_p, int orig
   if (lra_dump_file != NULL)
     fprintf (lra_dump_file, "    Original reg change %d->%d (bb%d):\n",
 	     original_regno, REGNO (new_reg), BLOCK_FOR_INSN (insn)->index);
-  lra_reg_info[REGNO (new_reg)].restore_regno = original_regno;
+  lra_reg_info[REGNO (new_reg)].restore_rtx = regno_reg_rtx[original_regno];
   bitmap_set_bit (&check_only_regs, REGNO (new_reg));
   bitmap_set_bit (&check_only_regs, original_regno);
   bitmap_set_bit (&lra_inheritance_pseudos, REGNO (new_reg));
@@ -5194,7 +5298,7 @@  split_reg (bool before_p, int original_r
       return false;
     }
   after_p = usage_insns[original_regno].after_p;
-  lra_reg_info[REGNO (new_reg)].restore_regno = original_regno;
+  lra_reg_info[REGNO (new_reg)].restore_rtx = regno_reg_rtx[original_regno];
   bitmap_set_bit (&check_only_regs, REGNO (new_reg));
   bitmap_set_bit (&check_only_regs, original_regno);
   bitmap_set_bit (&lra_split_regs, REGNO (new_reg));
@@ -5273,6 +5377,134 @@  split_if_necessary (int regno, machine_m
   return res;
 }
 
+/* Return TRUE if rtx X is considered as an invariant for
+   inheritance.  */
+static bool
+invariant_p (const_rtx x)
+{
+  machine_mode mode;
+  const char *fmt;
+  enum rtx_code code;
+  int i, j;
+
+  code = GET_CODE (x);
+  mode = GET_MODE (x);
+  if (code == SUBREG)
+    {
+      x = SUBREG_REG (x);
+      code = GET_CODE (x);
+      if (GET_MODE_SIZE (GET_MODE (x)) > GET_MODE_SIZE (mode))
+	mode = GET_MODE (x);
+    }
+
+  if (MEM_P (x))
+    return false;
+
+  if (REG_P (x))
+    {
+      int i, nregs, regno = REGNO (x);
+
+      if (regno >= FIRST_PSEUDO_REGISTER || regno == STACK_POINTER_REGNUM
+	  || TEST_HARD_REG_BIT (eliminable_regset, regno)
+	  || GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
+	return false;
+      nregs = hard_regno_nregs[regno][mode];
+      for (i = 0; i < nregs; i++)
+	if (! fixed_regs[regno + i]
+	    /* A hard register may be clobbered in the current insn
+	       but we can ignore this case because if the hard
+	       register is used it should be set somewhere after the
+	       clobber.  */
+	    || bitmap_bit_p (&invalid_invariant_regs, regno + i))
+	  return false;
+    }
+  fmt = GET_RTX_FORMAT (code);
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+	{
+	  if (! invariant_p (XEXP (x, i)))
+	    return false;
+	}
+      else if (fmt[i] == 'E')
+	{
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    if (! invariant_p (XVECEXP (x, i, j)))
+	      return false;
+	}
+    }
+  return true;
+}
+
+/* We have 'dest_reg <- invariant'.  Let us try to make an invariant
+   inheritance transformation (using dest_reg instead invariant in a
+   subsequent insn).  */
+static bool
+process_invariant_for_inheritance (rtx dst_reg, rtx invariant_rtx)
+{
+  invariant_ptr_t invariant_ptr;
+  rtx_insn *insn, *new_insns;
+  rtx insn_set, insn_reg, new_reg;
+  int insn_regno;
+  bool succ_p = false;
+  int dst_regno = REGNO (dst_reg);
+  enum machine_mode dst_mode = GET_MODE (dst_reg);
+  enum reg_class cl = lra_get_allocno_class (dst_regno), insn_reg_cl;
+
+  invariant_ptr = insert_invariant (invariant_rtx);
+  if ((insn = invariant_ptr->insn) != NULL_RTX)
+    {
+      /* We have a subsequent insn using the invariant.  */
+      insn_set = single_set (insn);
+      lra_assert (insn_set != NULL);
+      insn_reg = SET_DEST (insn_set);
+      lra_assert (REG_P (insn_reg));
+      insn_regno = REGNO (insn_reg);
+      insn_reg_cl = lra_get_allocno_class (insn_regno);
+
+      if (dst_mode == GET_MODE (insn_reg)
+	  /* We should consider only result move reg insns which are
+	     cheap.  */
+	  && targetm.register_move_cost (dst_mode, cl, insn_reg_cl) == 2
+	  && targetm.register_move_cost (dst_mode, cl, cl) == 2)
+	{
+	  if (lra_dump_file != NULL)
+	    fprintf (lra_dump_file,
+		     "    [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[\n");
+	  new_reg = lra_create_new_reg (dst_mode, dst_reg,
+					cl, "invariant inheritance");
+	  bitmap_set_bit (&lra_inheritance_pseudos, REGNO (new_reg));
+	  bitmap_set_bit (&check_only_regs, REGNO (new_reg));
+	  lra_reg_info[REGNO (new_reg)].restore_rtx = invariant_rtx;
+	  start_sequence ();
+	  lra_emit_move (new_reg, dst_reg);
+	  new_insns = get_insns ();
+	  end_sequence ();
+	  lra_process_new_insns (curr_insn, NULL, new_insns,
+				 "Add invariant inheritance<-original");
+	  start_sequence ();
+	  lra_emit_move (SET_DEST (insn_set), new_reg);
+	  new_insns = get_insns ();
+	  end_sequence ();
+	  lra_process_new_insns (insn, NULL, new_insns,
+				 "Changing reload<-inheritance");
+	  lra_set_insn_deleted (insn);
+	  succ_p = true;
+	  if (lra_dump_file != NULL)
+	    {
+	      fprintf (lra_dump_file,
+		       "    Invariant inheritance reuse change %d (bb%d):\n",
+		       REGNO (new_reg), BLOCK_FOR_INSN (insn)->index);
+	      dump_insn_slim (lra_dump_file, insn);
+	      fprintf (lra_dump_file,
+		       "	  ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]\n");
+	    }
+	}
+    }
+  invariant_ptr->insn = curr_insn;
+  return succ_p;
+}
+
 /* Check only registers living at the current program point in the
    current EBB.	 */
 static bitmap_head live_regs;
@@ -5495,7 +5727,7 @@  inherit_in_ebb (rtx_insn *head, rtx_insn
   int i, src_regno, dst_regno, nregs;
   bool change_p, succ_p, update_reloads_num_p;
   rtx_insn *prev_insn, *last_insn;
-  rtx next_usage_insns, set;
+  rtx next_usage_insns, curr_set;
   enum reg_class cl;
   struct lra_insn_reg *reg;
   basic_block last_processed_bb, curr_bb = NULL;
@@ -5507,8 +5739,10 @@  inherit_in_ebb (rtx_insn *head, rtx_insn
 
   change_p = false;
   curr_usage_insns_check++;
+  clear_invariants ();
   reloads_num = calls_num = 0;
   bitmap_clear (&check_only_regs);
+  bitmap_clear (&invalid_invariant_regs);
   last_processed_bb = NULL;
   CLEAR_HARD_REG_SET (potential_reload_hard_regs);
   COPY_HARD_REG_SET (live_hard_regs, eliminable_regset);
@@ -5556,13 +5790,11 @@  inherit_in_ebb (rtx_insn *head, rtx_insn
 	    }
 	}
       src_regno = dst_regno = -1;
-      if (NONDEBUG_INSN_P (curr_insn)
-	  && (set = single_set (curr_insn)) != NULL_RTX
-	  && REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
-	{
-	  src_regno = REGNO (SET_SRC (set));
-	  dst_regno = REGNO (SET_DEST (set));
-	}
+      curr_set = single_set (curr_insn);
+      if (curr_set != NULL_RTX && REG_P (SET_DEST (curr_set)))
+	dst_regno = REGNO (SET_DEST (curr_set));
+      if (curr_set != NULL_RTX && REG_P (SET_SRC (curr_set)))
+	src_regno = REGNO (SET_SRC (curr_set));
       update_reloads_num_p = true;
       if (src_regno < lra_constraint_new_regno_start
 	  && src_regno >= FIRST_PSEUDO_REGISTER
@@ -5587,6 +5819,22 @@  inherit_in_ebb (rtx_insn *head, rtx_insn
 	    IOR_HARD_REG_SET (potential_reload_hard_regs,
 			      reg_class_contents[cl]);
 	}
+      else if (src_regno < 0
+	       && dst_regno >= lra_constraint_new_regno_start
+	       && invariant_p (SET_SRC (curr_set))
+	       && (cl = lra_get_allocno_class (dst_regno)) != NO_REGS
+	       && ! bitmap_bit_p (&invalid_invariant_regs, dst_regno))
+	{
+	  /* 'reload_pseudo <- invariant'.  */
+	  if (ira_class_hard_regs_num[cl] <= max_small_class_regs_num)
+	    reloads_num++;
+	  update_reloads_num_p = false;
+	  if (process_invariant_for_inheritance (SET_DEST (curr_set), SET_SRC (curr_set)))
+	    change_p = true;
+	  if (hard_reg_set_subset_p (reg_class_contents[cl], live_hard_regs))
+	    IOR_HARD_REG_SET (potential_reload_hard_regs,
+			      reg_class_contents[cl]);
+	}
       else if (src_regno >= lra_constraint_new_regno_start
 	       && dst_regno < lra_constraint_new_regno_start
 	       && dst_regno >= FIRST_PSEUDO_REGISTER
@@ -5819,16 +6067,15 @@  inherit_in_ebb (rtx_insn *head, rtx_insn
 	    }
 	}
       if (update_reloads_num_p
-	  && NONDEBUG_INSN_P (curr_insn)
-          && (set = single_set (curr_insn)) != NULL_RTX)
+	  && NONDEBUG_INSN_P (curr_insn) && curr_set != NULL_RTX)
 	{
 	  int regno = -1;
-	  if ((REG_P (SET_DEST (set))
-	       && (regno = REGNO (SET_DEST (set))) >= lra_constraint_new_regno_start
+	  if ((REG_P (SET_DEST (curr_set))
+	       && (regno = REGNO (SET_DEST (curr_set))) >= lra_constraint_new_regno_start
 	       && reg_renumber[regno] < 0
 	       && (cl = lra_get_allocno_class (regno)) != NO_REGS)
-	      || (REG_P (SET_SRC (set))
-	          && (regno = REGNO (SET_SRC (set))) >= lra_constraint_new_regno_start
+	      || (REG_P (SET_SRC (curr_set))
+	          && (regno = REGNO (SET_SRC (curr_set))) >= lra_constraint_new_regno_start
 	          && reg_renumber[regno] < 0
 	          && (cl = lra_get_allocno_class (regno)) != NO_REGS))
 	    {
@@ -5839,6 +6086,25 @@  inherit_in_ebb (rtx_insn *head, rtx_insn
 	                          reg_class_contents[cl]);
 	    }
 	}
+      if (NONDEBUG_INSN_P (curr_insn))
+	{
+	  int regno;
+
+	  /* Invalidate invariants with changed regs.  */
+	  curr_id = lra_get_insn_recog_data (curr_insn);
+	  for (reg = curr_id->regs; reg != NULL; reg = reg->next)
+	    if (reg->type != OP_IN)
+	      bitmap_set_bit (&invalid_invariant_regs, reg->regno);
+	  curr_static_id = curr_id->insn_static_data;
+	  for (reg = curr_static_id->hard_regs; reg != NULL; reg = reg->next)
+	    if (reg->type != OP_IN)
+	      bitmap_set_bit (&invalid_invariant_regs, reg->regno);
+	  if (curr_id->arg_hard_regs != NULL)
+	    for (i = 0; (regno = curr_id->arg_hard_regs[i]) >= 0; i++)
+		bitmap_set_bit (&invalid_invariant_regs,
+				regno >= FIRST_PSEUDO_REGISTER
+				? regno : regno - FIRST_PSEUDO_REGISTER);
+	}
       /* We reached the start of the current basic block.  */
       if (prev_insn == NULL_RTX || prev_insn == PREV_INSN (head)
 	  || BLOCK_FOR_INSN (prev_insn) != curr_bb)
@@ -5911,6 +6177,7 @@  lra_inheritance (void)
   for (i = 0; i < lra_constraint_new_regno_start; i++)
     usage_insns[i].check = 0;
   bitmap_initialize (&check_only_regs, &reg_obstack);
+  bitmap_initialize (&invalid_invariant_regs, &reg_obstack);
   bitmap_initialize (&live_regs, &reg_obstack);
   bitmap_initialize (&temp_bitmap, &reg_obstack);
   bitmap_initialize (&ebb_global_regs, &reg_obstack);
@@ -5947,6 +6214,7 @@  lra_inheritance (void)
   bitmap_clear (&ebb_global_regs);
   bitmap_clear (&temp_bitmap);
   bitmap_clear (&live_regs);
+  bitmap_clear (&invalid_invariant_regs);
   bitmap_clear (&check_only_regs);
   free (usage_insns);
 
@@ -5970,8 +6238,9 @@  fix_bb_live_info (bitmap live, bitmap re
   bitmap_iterator bi;
 
   EXECUTE_IF_SET_IN_BITMAP (removed_pseudos, 0, regno, bi)
-    if (bitmap_clear_bit (live, regno))
-      bitmap_set_bit (live, lra_reg_info[regno].restore_regno);
+    if (bitmap_clear_bit (live, regno)
+	&& REG_P (lra_reg_info[regno].restore_rtx))
+      bitmap_set_bit (live, REGNO (lra_reg_info[regno].restore_rtx));
 }
 
 /* Return regno of the (subreg of) REG. Otherwise, return a negative
@@ -6024,7 +6293,8 @@  static bool
 remove_inheritance_pseudos (bitmap remove_pseudos)
 {
   basic_block bb;
-  int regno, sregno, prev_sregno, dregno, restore_regno;
+  int regno, sregno, prev_sregno, dregno;
+  rtx restore_rtx;
   rtx set, prev_set;
   rtx_insn *prev_insn;
   bool change_p, done_p;
@@ -6053,13 +6323,42 @@  remove_inheritance_pseudos (bitmap remov
 
 	  if (sregno >= 0 && dregno >= 0)
 	    {
-	      if ((bitmap_bit_p (remove_pseudos, sregno)
-		   && (lra_reg_info[sregno].restore_regno == dregno
+	      if (bitmap_bit_p (remove_pseudos, dregno)
+		  && ! REG_P (lra_reg_info[dregno].restore_rtx))
+		{
+		  /* invariant inheritance pseudo <- original pseudo */
+		  if (lra_dump_file != NULL)
+		    {
+		      fprintf (lra_dump_file, "	   Removing invariant inheritance:\n");
+		      dump_insn_slim (lra_dump_file, curr_insn);
+		      fprintf (lra_dump_file, "\n");
+		    }
+		  delete_move_and_clobber (curr_insn, dregno);
+		  done_p = true;
+		}
+	      else if (bitmap_bit_p (remove_pseudos, sregno)
+		       && ! REG_P (lra_reg_info[sregno].restore_rtx))
+		{
+		  /* reload pseudo <- invariant inheritance pseudo */
+		  start_sequence ();
+		  /* We can not just change the source.  It might be
+		     an insn different from the move.  */
+		  lra_emit_move (SET_DEST (set), lra_reg_info[sregno].restore_rtx);
+		  rtx_insn *new_insns = get_insns ();
+		  end_sequence ();
+		  lra_process_new_insns (curr_insn, NULL, new_insns,
+					 "Changing reload<-invariant inheritance");
+		  delete_move_and_clobber (curr_insn, dregno);
+		  done_p = true;
+		}
+	      else if ((bitmap_bit_p (remove_pseudos, sregno)
+			&& (get_regno (lra_reg_info[sregno].restore_rtx) == dregno
+			    || (bitmap_bit_p (remove_pseudos, dregno)
+				&& get_regno (lra_reg_info[sregno].restore_rtx) >= 0
+				&& (get_regno (lra_reg_info[sregno].restore_rtx)
+				    == get_regno (lra_reg_info[dregno].restore_rtx)))))
 		       || (bitmap_bit_p (remove_pseudos, dregno)
-			   && (lra_reg_info[sregno].restore_regno
-			       == lra_reg_info[dregno].restore_regno))))
-		  || (bitmap_bit_p (remove_pseudos, dregno)
-		      && lra_reg_info[dregno].restore_regno == sregno))
+			   && get_regno (lra_reg_info[dregno].restore_rtx) == sregno))
 		/* One of the following cases:
 		     original <- removed inheritance pseudo
 		     removed inherit pseudo <- another removed inherit pseudo
@@ -6104,13 +6403,16 @@  remove_inheritance_pseudos (bitmap remov
 		      && (int) REGNO (SET_DEST (prev_set)) == sregno
 		      && ((prev_sregno = REGNO (SET_SRC (prev_set)))
 			  >= FIRST_PSEUDO_REGISTER)
-		      /* As we consider chain of inheritance or
-			 splitting described in above comment we should
-			 check that sregno and prev_sregno were
-			 inheritance/split pseudos created from the
-			 same original regno.  */
-		      && (lra_reg_info[sregno].restore_regno
-			  == lra_reg_info[prev_sregno].restore_regno)
+		      && (lra_reg_info[prev_sregno].restore_rtx == NULL_RTX
+			  ||
+			  /* As we consider chain of inheritance or
+			     splitting described in above comment we should
+			     check that sregno and prev_sregno were
+			     inheritance/split pseudos created from the
+			     same original regno.  */
+			  (get_regno (lra_reg_info[sregno].restore_rtx) >= 0
+			   && (get_regno (lra_reg_info[sregno].restore_rtx)
+			       == get_regno (lra_reg_info[prev_sregno].restore_rtx))))
 		      && ! bitmap_bit_p (remove_pseudos, prev_sregno))
 		    {
 		      lra_assert (GET_MODE (SET_SRC (prev_set))
@@ -6124,14 +6426,13 @@  remove_inheritance_pseudos (bitmap remov
 			 inheritance pseudo for another pseudo.  */
 		      if (bitmap_bit_p (remove_pseudos, dregno)
 			  && bitmap_bit_p (&lra_inheritance_pseudos, dregno)
-			  && (restore_regno
-			      = lra_reg_info[dregno].restore_regno) >= 0)
+			  && (restore_rtx
+			      = lra_reg_info[dregno].restore_rtx) != NULL_RTX)
 			{
 			  if (GET_CODE (SET_DEST (set)) == SUBREG)
-			    SUBREG_REG (SET_DEST (set))
-			      = regno_reg_rtx[restore_regno];
+			    SUBREG_REG (SET_DEST (set)) = restore_rtx;
 			  else
-			    SET_DEST (set) = regno_reg_rtx[restore_regno];
+			    SET_DEST (set) = restore_rtx;
 			}
 		      lra_push_insn_and_update_insn_regno_info (curr_insn);
 		      lra_set_used_insn_alternative_by_uid
@@ -6155,14 +6456,13 @@  remove_inheritance_pseudos (bitmap remov
 	      for (reg = curr_id->regs; reg != NULL; reg = reg->next)
 		{
 		  regno = reg->regno;
-		  restore_regno = lra_reg_info[regno].restore_regno;
-		  if (restore_regno >= 0)
+		  restore_rtx = lra_reg_info[regno].restore_rtx;
+		  if (restore_rtx != NULL_RTX)
 		    {
 		      if (change_p && bitmap_bit_p (remove_pseudos, regno))
 			{
 			  lra_substitute_pseudo_within_insn
-			    (curr_insn, regno, regno_reg_rtx[restore_regno],
-			     false);
+			    (curr_insn, regno, restore_rtx, false);
 			  restored_regs_p = true;
 			}
 		      else
@@ -6213,11 +6513,11 @@  undo_optional_reloads (void)
     {
       keep_p = false;
       /* Keep optional reloads from previous subpasses.  */
-      if (lra_reg_info[regno].restore_regno < 0
+      if (lra_reg_info[regno].restore_rtx == NULL_RTX
 	  /* If the original pseudo changed its allocation, just
 	     removing the optional pseudo is dangerous as the original
 	     pseudo will have longer live range.  */
-	  || reg_renumber[lra_reg_info[regno].restore_regno] >= 0)
+	  || reg_renumber[REGNO (lra_reg_info[regno].restore_rtx)] >= 0)
 	keep_p = true;
       else if (reg_renumber[regno] >= 0)
 	EXECUTE_IF_SET_IN_BITMAP (&lra_reg_info[regno].insn_bitmap, 0, uid, bi2)
@@ -6231,7 +6531,7 @@  undo_optional_reloads (void)
 	      continue;
 	    if (REGNO (dest) == regno
 		/* Ignore insn for optional reloads itself.  */
-		&& lra_reg_info[regno].restore_regno != (int) REGNO (src)
+		&& REGNO (lra_reg_info[regno].restore_rtx) != REGNO (src)
 		/* Check only inheritance on last inheritance pass.  */
 		&& (int) REGNO (src) >= new_regno_start
 		/* Check that the optional reload was inherited.  */
@@ -6264,11 +6564,11 @@  undo_optional_reloads (void)
 	      dest = SET_DEST (set);
 	      if (REG_P (src) && REG_P (dest)
 		  && ((REGNO (src) == regno
-		       && (lra_reg_info[regno].restore_regno
-			   == (int) REGNO (dest)))
+		       && (REGNO (lra_reg_info[regno].restore_rtx)
+			   == REGNO (dest)))
 		      || (REGNO (dest) == regno
-			  && (lra_reg_info[regno].restore_regno
-			      == (int) REGNO (src)))))
+			  && (REGNO (lra_reg_info[regno].restore_rtx)
+			      == REGNO (src)))))
 		{
 		  if (lra_dump_file != NULL)
 		    {
@@ -6286,8 +6586,7 @@  undo_optional_reloads (void)
 		 reload.  */
 	    }
 	  lra_substitute_pseudo_within_insn
-	    (insn, regno, regno_reg_rtx[lra_reg_info[regno].restore_regno],
-	     false);
+	    (insn, regno, lra_reg_info[regno].restore_rtx, false);
 	  lra_update_insn_regno_info (insn);
 	  if (lra_dump_file != NULL)
 	    {
@@ -6299,7 +6598,7 @@  undo_optional_reloads (void)
     }
   /* Clear restore_regnos.  */
   EXECUTE_IF_SET_IN_BITMAP (&lra_optional_reload_pseudos, 0, regno, bi)
-    lra_reg_info[regno].restore_regno = -1;
+    lra_reg_info[regno].restore_rtx = NULL_RTX;
   bitmap_clear (&insn_bitmap);
   bitmap_clear (&removed_optional_reload_pseudos);
   return change_p;
@@ -6311,8 +6610,9 @@  bool
 lra_undo_inheritance (void)
 {
   unsigned int regno;
-  int restore_regno, hard_regno;
+  int hard_regno;
   int n_all_inherit, n_inherit, n_all_split, n_split;
+  rtx restore_rtx;
   bitmap_head remove_pseudos;
   bitmap_iterator bi;
   bool change_p;
@@ -6327,14 +6627,15 @@  lra_undo_inheritance (void)
   bitmap_initialize (&remove_pseudos, &reg_obstack);
   n_inherit = n_all_inherit = 0;
   EXECUTE_IF_SET_IN_BITMAP (&lra_inheritance_pseudos, 0, regno, bi)
-    if (lra_reg_info[regno].restore_regno >= 0)
+    if (lra_reg_info[regno].restore_rtx != NULL_RTX)
       {
 	n_all_inherit++;
 	if (reg_renumber[regno] < 0
 	    /* If the original pseudo changed its allocation, just
 	       removing inheritance is dangerous as for changing
 	       allocation we used shorter live-ranges.  */
-	    && reg_renumber[lra_reg_info[regno].restore_regno] < 0)
+	    && (! REG_P (lra_reg_info[regno].restore_rtx)
+		|| reg_renumber[REGNO (lra_reg_info[regno].restore_rtx)] < 0))
 	  bitmap_set_bit (&remove_pseudos, regno);
 	else
 	  n_inherit++;
@@ -6345,8 +6646,10 @@  lra_undo_inheritance (void)
 	     (double) n_inherit / n_all_inherit * 100);
   n_split = n_all_split = 0;
   EXECUTE_IF_SET_IN_BITMAP (&lra_split_regs, 0, regno, bi)
-    if ((restore_regno = lra_reg_info[regno].restore_regno) >= 0)
+    if ((restore_rtx = lra_reg_info[regno].restore_rtx) != NULL_RTX)
       {
+	int restore_regno = REGNO (restore_rtx);
+
 	n_all_split++;
 	hard_regno = (restore_regno >= FIRST_PSEUDO_REGISTER
 		      ? reg_renumber[restore_regno] : restore_regno);
@@ -6368,9 +6671,9 @@  lra_undo_inheritance (void)
   bitmap_clear (&remove_pseudos);
   /* Clear restore_regnos.  */
   EXECUTE_IF_SET_IN_BITMAP (&lra_inheritance_pseudos, 0, regno, bi)
-    lra_reg_info[regno].restore_regno = -1;
+    lra_reg_info[regno].restore_rtx = NULL_RTX;
   EXECUTE_IF_SET_IN_BITMAP (&lra_split_regs, 0, regno, bi)
-    lra_reg_info[regno].restore_regno = -1;
+    lra_reg_info[regno].restore_rtx = NULL_RTX;
   change_p = undo_optional_reloads () || change_p;
   return change_p;
 }
Index: lra-remat.c
===================================================================
--- lra-remat.c	(revision 238990)
+++ lra-remat.c	(working copy)
@@ -167,92 +167,6 @@  get_remat_bb_data_by_index (int index)
 
 
 
-/* Recursive hash function for RTL X.  */
-static hashval_t
-rtx_hash (rtx x)
-{
-  int i, j;
-  enum rtx_code code;
-  const char *fmt;
-  hashval_t val = 0;
-
-  if (x == 0)
-    return val;
-
-  code = GET_CODE (x);
-  val += (int) code + 4095;
-
-  /* Some RTL can be compared nonrecursively.  */
-  switch (code)
-    {
-    case REG:
-      return val + REGNO (x);
-
-    case LABEL_REF:
-      return iterative_hash_object (XEXP (x, 0), val);
-
-    case SYMBOL_REF:
-      return iterative_hash_object (XSTR (x, 0), val);
-
-    case SCRATCH:
-    case CONST_DOUBLE:
-    case CONST_INT:
-    case CONST_VECTOR:
-      return val;
-
-    default:
-      break;
-    }
-
-  /* Hash the elements.  */
-  fmt = GET_RTX_FORMAT (code);
-  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
-    {
-      switch (fmt[i])
-	{
-	case 'w':
-	  val += XWINT (x, i);
-	  break;
-
-	case 'n':
-	case 'i':
-	  val += XINT (x, i);
-	  break;
-
-	case 'V':
-	case 'E':
-	  val += XVECLEN (x, i);
-
-	  for (j = 0; j < XVECLEN (x, i); j++)
-	    val += rtx_hash (XVECEXP (x, i, j));
-	  break;
-
-	case 'e':
-	  val += rtx_hash (XEXP (x, i));
-	  break;
-
-	case 'S':
-	case 's':
-	  val += htab_hash_string (XSTR (x, i));
-	  break;
-
-	case 'u':
-	case '0':
-	case 't':
-	  break;
-
-	  /* It is believed that rtx's at this level will never
-	     contain anything but integers and other rtx's, except for
-	     within LABEL_REFs and SYMBOL_REFs.  */
-	default:
-	  abort ();
-	}
-    }
-  return val;
-}
-
-
-
 /* Hash table for the candidates.  Different insns (e.g. structurally
    the same insns or even insns with different unused output regs) can
    be represented by the same candidate in the table.  */
Index: lra-spills.c
===================================================================
--- lra-spills.c	(revision 238990)
+++ lra-spills.c	(working copy)
@@ -693,7 +693,7 @@  lra_final_code_change (void)
 {
   int i, hard_regno;
   basic_block bb;
-  rtx_insn *insn, *curr;
+  rtx_insn *insn, *curr, *next_insn;
   int max_regno = max_reg_num ();
 
   for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
@@ -727,7 +727,12 @@  lra_final_code_change (void)
 	  if (NONJUMP_INSN_P (insn) && GET_CODE (pat) == SET
 	      && REG_P (SET_SRC (pat)) && REG_P (SET_DEST (pat))
 	      && REGNO (SET_SRC (pat)) == REGNO (SET_DEST (pat))
-	      && ! return_regno_p (REGNO (SET_SRC (pat))))
+	      && (! return_regno_p (REGNO (SET_SRC (pat)))
+		  || (next_insn = next_nondebug_insn (insn)) == NULL_RTX
+		  || ! INSN_P (next_insn)
+		  || GET_CODE (PATTERN (next_insn)) != USE
+		  || ! REG_P (XEXP (PATTERN (next_insn), 0))
+		  || REGNO (SET_SRC (pat)) != REGNO (XEXP (PATTERN (next_insn), 0))))
 	    {
 	      lra_invalidate_insn_data (insn);
 	      delete_insn (insn);