diff mbox

Rename across basic block boundaries

Message ID 4E00A296.4000306@codesourcery.com
State New
Headers show

Commit Message

Bernd Schmidt June 21, 2011, 1:54 p.m. UTC
This patch requires
  http://gcc.gnu.org/ml/gcc-patches/2011-05/msg02193.html
as a prerequisite, and supersedes
  http://gcc.gnu.org/ml/gcc-patches/2011-05/msg02194.html

The idea here is to allow regrename to operate across basic block
boundaries. This helps for targets that use sched_ebb (such as C6X), and
by exposing more chains, we also help on targets that can benefit from
PREFERRED_RENAME_CLASS (again, C6X).

The previous patch I posted only works on extended basic blocks, but
what I really wanted was to have this analysis also work for loops.
Another drawback of the previous patch was that it scanned ebbs as one
large block, which could potentially lead to worse optimization if we
encountered a situation that makes us fail the entire block.

This version, when reaching the end of a basic block, records state
which is used to initialize open chains when starting to scan its
successors. We still only ever scan each block once; for loops, it's
typically good enough to have data from one incoming edge. If for some
reason we can't match up all outgoing and incoming edges, we just fail
to merge them, behaviour then reverts to what we currently have
(renaming inside a single block).

One change in behaviour is that tick isn't reset for each basic block.
However, this shouldn't be a problem as it just randomizes the order in
which we choose registers.

Bootstrapped and tested on i686-linux (with -frename-registers forced on
for -O2); and a variant of it tested in a 4.5 C6X tree.  Benchmarking on
C6X suggested that it helps with certain performance issues.  Ok (this
and the preliminary patch)?


Bernd
* regrename.c (struct du_head): Make nregs signed.
	(scan_rtx_reg, scan_rtx_address, dump_def_use_chain): Remove
	declarations.
	(closed_chains): Remove.
	(chain_from_id): New static function.
	(dump_def_use_chain): Change argument to be an int, indicating
	the first ID to print.  All callers changed.
	(mark_conflict, create_new_chain): Move upwards in the file.
	(merge_overlapping_regs): Use chain_from_id.  Assert that
	chains don't conflict with themselves.
	(rename_chains): Take no argument.  Iterate over id_to_chain
	rather to find chains to rename.  Clear tick before the main
	loop.
	(struct incoming_reg_info): New struct.
	(struct bb_rename_info): New struct.
	(init_rename_info, set_incoming_from_chain, merge_chains): New
	static functions.
	(regrename_analyze): New static function, broken out of
	regrename_optimize.  Record and make use of open chain information
	at basic block boundaries, and merge chains where possible.
	(scan_rtx_reg): Make this_nregs signed.  Don't update
	closed_chains.
	(build_def_use): Return a bool to indicate success.  All callers
	changed.  Don't initialize global data here.
	(regrename_optimize): Move most code out of here into
	regrename_analyze.
	* regs.h (add_range_to_hard_reg_set, remove_range_from_hard_reg_set,
	range_overlaps_hard_reg_set_p, range_in_hard_reg_set_p): New
	static inline functions.

Comments

Bernd Schmidt Aug. 23, 2011, 10:15 a.m. UTC | #1
Ping for the patch at
http://gcc.gnu.org/ml/gcc-patches/2011-06/msg01595.html

> This patch requires
>   http://gcc.gnu.org/ml/gcc-patches/2011-05/msg02193.html
> as a prerequisite, and supersedes
>   http://gcc.gnu.org/ml/gcc-patches/2011-05/msg02194.html
> 
> The idea here is to allow regrename to operate across basic block
> boundaries. This helps for targets that use sched_ebb (such as C6X), and
> by exposing more chains, we also help on targets that can benefit from
> PREFERRED_RENAME_CLASS (again, C6X).


Bernd
Richard Sandiford Aug. 24, 2011, 11:12 a.m. UTC | #2
Sorry, I'm find this a bit tough to review.  Could you provide some
overview comments somewhere to say what the new algorithm is?
The comment at the head of regrename.c still describes the current
bb-local algorithm.

One thing though:

Bernd Schmidt <bernds@codesourcery.com> writes:
> @@ -215,8 +306,9 @@ merge_overlapping_regs (HARD_REG_SET *ps
>    IOR_HARD_REG_SET (*pset, head->hard_conflicts);
>    EXECUTE_IF_SET_IN_BITMAP (&head->conflicts, 0, i, bi)
>      {
> -      du_head_p other = VEC_index (du_head_p, id_to_chain, i);
> +      du_head_p other = chain_from_id (i);
>        unsigned j = other->nregs;
> +      gcc_assert (other != head);
>        while (j-- > 0)
>  	SET_HARD_REG_BIT (*pset, other->regno + j);
>      }

Is this effectively cubic in the number of chains?  There are O(chains)
calls to merge_overlapping_regs, O(chains) nodes in the conflicts bitmap,
and chain_from_id chases O(chains) merges.

I realise this probably isn't a problem in practice.  Just looking
for reassurance. :-)

Richard
diff mbox

Patch

Index: trunk/gcc/regrename.c
===================================================================
--- trunk.orig/gcc/regrename.c
+++ trunk/gcc/regrename.c
@@ -75,8 +75,9 @@  struct du_head
   struct du_head *next_chain;
   /* The first and last elements of this chain.  */
   struct du_chain *first, *last;
-  /* Describes the register being tracked.  */
-  unsigned regno, nregs;
+  /* Describe the register being tracked, register number and count.  */
+  unsigned regno;
+  int nregs;
 
   /* A unique id to be used as an index into the conflicts bitmaps.  */
   unsigned id;
@@ -138,14 +139,9 @@  static int this_tick = 0;
 static struct obstack rename_obstack;
 
 static void do_replace (struct du_head *, int);
-static void scan_rtx_reg (rtx, rtx *, enum reg_class,
-			  enum scan_actions, enum op_type);
-static void scan_rtx_address (rtx, rtx *, enum reg_class,
-			      enum scan_actions, enum machine_mode);
 static void scan_rtx (rtx, rtx *, enum reg_class, enum scan_actions,
 		      enum op_type);
-static struct du_head *build_def_use (basic_block);
-static void dump_def_use_chain (struct du_head *);
+static bool build_def_use (basic_block);
 
 typedef struct du_head *du_head_p;
 DEF_VEC_P (du_head_p);
@@ -157,9 +153,8 @@  static unsigned current_id;
 /* A mapping of unique id numbers to chains.  */
 static VEC(du_head_p, heap) *id_to_chain;
 
-/* List of currently open chains, and closed chains that can be renamed.  */
+/* List of currently open chains.  */
 static struct du_head *open_chains;
-static struct du_head *closed_chains;
 
 /* Bitmap of open chains.  The bits set always match the list found in
    open_chains.  */
@@ -172,14 +167,32 @@  static HARD_REG_SET live_in_chains;
    between this and live_in_chains is empty.  */
 static HARD_REG_SET live_hard_regs;
 
-/* Dump all def/use chains in CHAINS to DUMP_FILE.  */
+/* Return the chain corresponding to id number ID.  Take into account that
+   chains may have been merged.  */
+static du_head_p
+chain_from_id (unsigned int id)
+{
+  for (;;)
+    {
+      du_head_p chain = VEC_index (du_head_p, id_to_chain, id);
+      if (chain->id == id)
+	return chain;
+      id = chain->id;
+    }
+}
+
+/* Dump all def/use chains, starting at id FROM.  */
 
 static void
-dump_def_use_chain (struct du_head *head)
+dump_def_use_chain (int from)
 {
-  while (head)
+  du_head_p head;
+  int i;
+  FOR_EACH_VEC_ELT (du_head_p, id_to_chain, i, head)
     {
       struct du_chain *this_du = head->first;
+      if (i < from)
+	continue;
       fprintf (dump_file, "Register %s (%d):",
 	       reg_names[head->regno], head->nregs);
       while (this_du)
@@ -204,6 +217,84 @@  free_chain_data (void)
   VEC_free (du_head_p, heap, id_to_chain);
 }
 
+/* Walk all chains starting with CHAINS and record that they conflict with
+   another chain whose id is ID.  */
+
+static void
+mark_conflict (struct du_head *chains, unsigned id)
+{
+  while (chains)
+    {
+      bitmap_set_bit (&chains->conflicts, id);
+      chains = chains->next_chain;
+    }
+}
+
+/* Create a new chain for THIS_NREGS registers starting at THIS_REGNO,
+   and record its occurrence in *LOC, which is being written to in INSN.
+   This access requires a register of class CL.  */
+
+static du_head_p
+create_new_chain (unsigned this_regno, unsigned this_nregs, rtx *loc,
+		  rtx insn, enum reg_class cl)
+{
+  struct du_head *head = XOBNEW (&rename_obstack, struct du_head);
+  struct du_chain *this_du;
+  int nregs;
+
+  head->next_chain = open_chains;
+  head->regno = this_regno;
+  head->nregs = this_nregs;
+  head->need_caller_save_reg = 0;
+  head->cannot_rename = 0;
+
+  VEC_safe_push (du_head_p, heap, id_to_chain, head);
+  head->id = current_id++;
+
+  bitmap_initialize (&head->conflicts, &bitmap_default_obstack);
+  bitmap_copy (&head->conflicts, &open_chains_set);
+  mark_conflict (open_chains, head->id);
+
+  /* Since we're tracking this as a chain now, remove it from the
+     list of conflicting live hard registers and track it in
+     live_in_chains instead.  */
+  nregs = head->nregs;
+  while (nregs-- > 0)
+    {
+      SET_HARD_REG_BIT (live_in_chains, head->regno + nregs);
+      CLEAR_HARD_REG_BIT (live_hard_regs, head->regno + nregs);
+    }
+
+  COPY_HARD_REG_SET (head->hard_conflicts, live_hard_regs);
+  bitmap_set_bit (&open_chains_set, head->id);
+
+  open_chains = head;
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "Creating chain %s (%d)",
+	       reg_names[head->regno], head->id);
+      if (insn != NULL_RTX)
+	fprintf (dump_file, " at insn %d", INSN_UID (insn));
+      fprintf (dump_file, "\n");
+    }
+
+  if (insn == NULL_RTX)
+    {
+      head->first = head->last = NULL;
+      return head;
+    }
+
+  this_du = XOBNEW (&rename_obstack, struct du_chain);
+  head->first = head->last = this_du;
+
+  this_du->next_use = 0;
+  this_du->loc = loc;
+  this_du->insn = insn;
+  this_du->cl = cl;
+  return head;
+}
+
 /* For a def-use chain HEAD, find which registers overlap its lifetime and
    set the corresponding bits in *PSET.  */
 
@@ -215,8 +306,9 @@  merge_overlapping_regs (HARD_REG_SET *ps
   IOR_HARD_REG_SET (*pset, head->hard_conflicts);
   EXECUTE_IF_SET_IN_BITMAP (&head->conflicts, 0, i, bi)
     {
-      du_head_p other = VEC_index (du_head_p, id_to_chain, i);
+      du_head_p other = chain_from_id (i);
       unsigned j = other->nregs;
+      gcc_assert (other != head);
       while (j-- > 0)
 	SET_HARD_REG_BIT (*pset, other->regno + j);
     }
@@ -269,13 +361,15 @@  check_new_reg_p (int reg ATTRIBUTE_UNUSE
   return true;
 }
 
-/* Process the closed chains starting with ALL_CHAINS and rename
-   registers if possible.  */
+/* Perform register renaming on the current function.  */
 static void
-rename_chains (du_head_p all_chains)
+rename_chains (void)
 {
   HARD_REG_SET unavailable;
+  du_head_p this_head;
+  int i;
 
+  memset (tick, 0, sizeof tick);
 
   CLEAR_HARD_REG_SET (unavailable);
   /* Don't clobber traceback for noreturn functions.  */
@@ -287,11 +381,10 @@  rename_chains (du_head_p all_chains)
 #endif
     }
 
-  while (all_chains)
+  FOR_EACH_VEC_ELT (du_head_p, id_to_chain, i, this_head)
     {
       int new_reg, best_new_reg, best_nregs;
       int n_uses;
-      struct du_head *this_head = all_chains;
       struct du_chain *tmp;
       HARD_REG_SET this_unavailable;
       int reg = this_head->regno;
@@ -300,8 +393,6 @@  rename_chains (du_head_p all_chains)
       enum reg_class preferred_class;
       bool has_preferred_class;
 
-      all_chains = this_head->next_chain;
-
       if (this_head->cannot_rename)
 	continue;
 
@@ -418,50 +509,401 @@  rename_chains (du_head_p all_chains)
     }
 }
 
-/* Perform register renaming on the current function.  */
+struct incoming_reg_info {
+  int nregs;
+  bool unusable;
+};
 
-static unsigned int
-regrename_optimize (void)
+/* A structure recording information about each basic block.  It is saved
+   and restored around basic block boundaries.  */
+struct bb_rename_info
 {
+  /* The basic block corresponding to this structure.  */
   basic_block bb;
-  char *first_obj;
+  /* Copies of the global information.  */
+  bitmap_head open_chains_set;
+  bitmap_head incoming_open_chains_set;
+  unsigned n_preds;
+  struct incoming_reg_info incoming[FIRST_PSEUDO_REGISTER];
+};
 
-  df_set_flags (DF_LR_RUN_DCE);
-  df_note_add_problem ();
-  df_analyze ();
-  df_set_flags (DF_DEFER_INSN_RESCAN);
+/* Initialize a rename_info structure P for basic block BB, which starts a new
+   scan.  */
+static void
+init_rename_info (struct bb_rename_info *p, basic_block bb)
+{
+  int i;
+  df_ref *def_rec;
+  HARD_REG_SET start_chains_set;
 
-  memset (tick, 0, sizeof tick);
+  p->bb = bb;
+  bitmap_initialize (&p->open_chains_set, &bitmap_default_obstack);
+  bitmap_initialize (&p->incoming_open_chains_set, &bitmap_default_obstack);
 
-  gcc_obstack_init (&rename_obstack);
-  first_obj = XOBNEWVAR (&rename_obstack, char, 0);
+  open_chains = NULL;
+  bitmap_clear (&open_chains_set);
+
+  CLEAR_HARD_REG_SET (live_in_chains);
+  REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_in (bb));
+  for (def_rec = df_get_artificial_defs (bb->index); *def_rec; def_rec++)
+    {
+      df_ref def = *def_rec;
+      if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
+	SET_HARD_REG_BIT (live_hard_regs, DF_REF_REGNO (def));
+    }
+
+  CLEAR_HARD_REG_SET (start_chains_set);
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      struct incoming_reg_info *iri = p->incoming + i;
+      if (iri->nregs > 0 && !iri->unusable
+	  && range_in_hard_reg_set_p (live_hard_regs, i, iri->nregs))
+	{
+	  SET_HARD_REG_BIT (start_chains_set, i);
+	  remove_range_from_hard_reg_set (&live_hard_regs, i, iri->nregs);
+	}
+    }
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      struct incoming_reg_info *iri = p->incoming + i;
+      if (TEST_HARD_REG_BIT (start_chains_set, i))
+	{
+	  du_head_p chain;
+	  if (dump_file)
+	    fprintf (dump_file, "opening incoming chain\n");
+	  chain = create_new_chain (i, iri->nregs, NULL, NULL_RTX, NO_REGS);
+	  bitmap_set_bit (&p->incoming_open_chains_set, chain->id);
+	}
+    }
+}
+
+/* Record in RI that the block corresponding to it has an incoming
+   live value, described by CHAIN.  */
+static void
+set_incoming_from_chain (struct bb_rename_info *ri, du_head_p chain)
+{
+  int min_reg, max_reg, i;
+  int incoming_nregs = ri->incoming[chain->regno].nregs;
+  int nregs;
+
+  /* If we've recorded the same information before, everything is fine.  */
+  if (incoming_nregs == chain->nregs)
+    {
+      if (dump_file)
+	fprintf (dump_file, "reg %d/%d already recorded\n",
+		 chain->regno, chain->nregs);
+      return;
+    }
+
+  /* If we have no information for any of the involved registers, update
+     the incoming array.  */
+  nregs = chain->nregs;
+  while (nregs-- > 0)
+    if (ri->incoming[chain->regno + nregs].nregs != 0
+	|| ri->incoming[chain->regno + nregs].unusable)
+      break;
+  if (nregs < 0)
+    {
+      nregs = chain->nregs;
+      ri->incoming[chain->regno].nregs = nregs;
+      while (nregs-- > 1)
+	ri->incoming[chain->regno + nregs].nregs = -nregs;
+      if (dump_file)
+	fprintf (dump_file, "recorded reg %d/%d\n",
+		 chain->regno, chain->nregs);
+      return;
+    }
+
+  /* There must be some kind of conflict.  Set the unusable for all
+     overlapping registers.  */
+  min_reg = chain->regno;
+  if (incoming_nregs < 0)
+    min_reg += incoming_nregs;
+  max_reg = chain->regno + chain->nregs;
+  for (i = min_reg; i < max_reg; i++)
+    {
+      ri->incoming[i].unusable = true;
+    }
+}
+
+/* Merge the two chains C1 and C2 so that all conflict information is
+   recorded and C1, and the id of C2 is changed to that of C1.  */
+static void
+merge_chains (du_head_p c1, du_head_p c2)
+{
+  if (c1 == c2)
+    return;
+
+  if (c2->first != NULL)
+    {
+      if (c1->first == NULL)
+	c1->first = c2->first;
+      else
+	c1->last->next_use = c2->first;
+      c1->last = c2->last;
+    }
+
+  c2->first = c2->last = NULL;
+  c2->id = c1->id;
+
+  IOR_HARD_REG_SET (c1->hard_conflicts, c2->hard_conflicts);
+  bitmap_ior_into (&c1->conflicts, &c2->conflicts);
+
+  c1->need_caller_save_reg |= c2->need_caller_save_reg;
+  c1->cannot_rename |= c2->cannot_rename;
+}
 
+/* Analyze the current function and build chains for renaming.  */
+
+static void
+regrename_analyze (void)
+{
+  struct bb_rename_info *rename_info;
+  int i, ri_index, n_processed;
+  basic_block bb;
+  bitmap_head processed_bbs;
+  VEC (basic_block, heap) *bb_queue;
+
+  rename_info = XCNEWVEC (struct bb_rename_info, n_basic_blocks);
+  ri_index = 0;
   FOR_EACH_BB (bb)
     {
-      struct du_head *all_chains = 0;
+      struct bb_rename_info *ri = rename_info + ri_index;
+      ri->bb = bb;
+      ri->n_preds = EDGE_COUNT (bb->preds);
+      bb->aux = ri;
+      ri_index++;
+    }
 
-      id_to_chain = VEC_alloc (du_head_p, heap, 0);
+  current_id = 0;
+  id_to_chain = VEC_alloc (du_head_p, heap, 0);
+  bitmap_initialize (&open_chains_set, &bitmap_default_obstack);
+
+  bb_queue = VEC_alloc (basic_block, heap, 0);
+
+  n_processed = 0;
+  bitmap_initialize (&processed_bbs, &bitmap_default_obstack);
+  VEC_safe_push (basic_block, heap, bb_queue, single_succ (ENTRY_BLOCK_PTR));
+
+  while (!VEC_empty (basic_block, bb_queue))
+    {
+      bool success;
+      edge e;
+      edge_iterator ei;
+      basic_block bb1 = VEC_pop (basic_block, bb_queue);
+      struct bb_rename_info *this_info = (struct bb_rename_info *)bb1->aux;
+      int old_length = VEC_length (du_head_p, id_to_chain);
 
       if (dump_file)
-	fprintf (dump_file, "\nBasic block %d:\n", bb->index);
+	fprintf (dump_file, "\nprocessing block %d:\n", bb1->index);
+
+      bitmap_set_bit (&processed_bbs, bb1->index);
+      n_processed++;
+      init_rename_info (this_info, bb1);
 
-      all_chains = build_def_use (bb);
+      success = build_def_use (bb1);
+      if (success)
+	{
+	  if (dump_file)
+	    dump_def_use_chain (old_length);
+	  bitmap_copy (&this_info->open_chains_set, &open_chains_set);
+	}
+      else
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "failed\n");
+	  bb1->aux = NULL;
+	  VEC_truncate (du_head_p, id_to_chain, old_length);
+	  current_id = old_length;
+	  bitmap_clear (&this_info->incoming_open_chains_set);
+	  open_chains = NULL;
+	}
+
+      FOR_EACH_EDGE (e, ei, bb1->succs)
+	{
+	  struct bb_rename_info *dest_ri;
+	  struct du_head *chain;
+
+	  if (dump_file)
+	    fprintf (dump_file, "successor block %d\n", e->dest->index);
+
+	  if (e->flags & (EDGE_EH | EDGE_ABNORMAL))
+	    continue;
+	  dest_ri = (struct bb_rename_info *)e->dest->aux;
+	  if (dest_ri == NULL)
+	    continue;
+	  gcc_assert (dest_ri->n_preds > 0);
+	  dest_ri->n_preds--;
+	  if (dest_ri->n_preds == 0
+	      && !bitmap_bit_p (&processed_bbs, dest_ri->bb->index))
+	    VEC_safe_push (basic_block, heap, bb_queue, e->dest);
+	  if (success)
+	    for (chain = open_chains; chain; chain = chain->next_chain)
+	      set_incoming_from_chain (dest_ri, chain);
+	}
+
+      if (VEC_empty (basic_block, bb_queue)
+	  && n_processed < ri_index)
+	{
+	  basic_block best = NULL;
+	  FOR_EACH_BB (bb)
+	    {
+	      struct bb_rename_info *ri = (struct bb_rename_info *)bb->aux;
+	      if (!bitmap_bit_p (&processed_bbs, bb->index)
+		  && (best == 0
+		      || ri->n_preds != EDGE_COUNT (bb->preds)))
+		best = bb;
+	    }
+	  if (dump_file)
+	    fprintf (dump_file, "\npicking new start block %d\n", best->index);
+	  VEC_safe_push (basic_block, heap, bb_queue, best);
+	}
+    }
+  bitmap_clear (&processed_bbs);
+  VEC_free (basic_block, heap, bb_queue);
+
+  for (i = 0; i < ri_index; i++)
+    {
+      struct bb_rename_info *bb_ri = rename_info + i;
+      unsigned j;
+      bitmap_iterator bi;
+
+      if (bb_ri->bb->aux == NULL)
+	continue;
 
       if (dump_file)
-	dump_def_use_chain (all_chains);
+	fprintf (dump_file, "processing bb %d in edges\n", bb_ri->bb->index);
+
+      EXECUTE_IF_SET_IN_BITMAP (&bb_ri->incoming_open_chains_set, 0, j, bi)
+	{
+	  edge e;
+	  edge_iterator ei;
+	  struct du_head *chain = chain_from_id (j);
+	  int n_preds_used = 0, n_preds_joined = 0;
+
+	  FOR_EACH_EDGE (e, ei, bb_ri->bb->preds)
+	    {
+	      struct bb_rename_info *src_ri;
+	      unsigned k;
+	      bitmap_iterator bi2;
+	      HARD_REG_SET live;
+	      bool success = false;
+
+	      REG_SET_TO_HARD_REG_SET (live, df_get_live_out (e->src));
+	      if (!range_overlaps_hard_reg_set_p (live, chain->regno,
+						  chain->nregs))
+		continue;
+	      n_preds_used++;
+
+	      if (e->flags & (EDGE_EH | EDGE_ABNORMAL))
+		continue;
 
-      rename_chains (all_chains);
+	      src_ri = (struct bb_rename_info *)e->src->aux;
+	      if (src_ri == NULL)
+		continue;
+
+	      EXECUTE_IF_SET_IN_BITMAP (&src_ri->open_chains_set,
+					0, k, bi2)
+		{
+		  struct du_head *outgoing_chain = chain_from_id (k);
 
-      free_chain_data ();
-      obstack_free (&rename_obstack, first_obj);
+		  if (outgoing_chain->regno == chain->regno
+		      && outgoing_chain->nregs == chain->nregs)
+		    {
+		      n_preds_joined++;
+		      success = true;
+		      break;
+		    }
+		}
+	      if (!success && dump_file)
+		fprintf (dump_file, "failure to match with pred block %d\n",
+			 e->src->index);
+	    }
+	  if (n_preds_joined < n_preds_used)
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "cannot rename chain %d\n", j);
+	      chain->cannot_rename = 1;
+	    }
+	}
     }
+  for (i = 0; i < ri_index; i++)
+    {
+      struct bb_rename_info *bb_ri = rename_info + i;
+      unsigned j;
+      bitmap_iterator bi;
 
-  obstack_free (&rename_obstack, NULL);
+      if (bb_ri->bb->aux == NULL)
+	continue;
 
-  if (dump_file)
-    fputc ('\n', dump_file);
+      if (dump_file)
+	fprintf (dump_file, "processing bb %d out edges\n", bb_ri->bb->index);
 
-  return 0;
+      EXECUTE_IF_SET_IN_BITMAP (&bb_ri->open_chains_set, 0, j, bi)
+	{
+	  edge e;
+	  edge_iterator ei;
+	  struct du_head *chain = chain_from_id (j);
+	  int n_succs_used = 0, n_succs_joined = 0;
+
+	  FOR_EACH_EDGE (e, ei, bb_ri->bb->succs)
+	    {
+	      bool printed = false;
+	      struct bb_rename_info *dest_ri;
+	      unsigned k;
+	      bitmap_iterator bi2;
+	      HARD_REG_SET live;
+
+	      REG_SET_TO_HARD_REG_SET (live, df_get_live_in (e->dest));
+	      if (!range_overlaps_hard_reg_set_p (live, chain->regno,
+						  chain->nregs))
+		continue;
+	      
+	      n_succs_used++;
+
+	      dest_ri = (struct bb_rename_info *)e->dest->aux;
+	      if (dest_ri == NULL)
+		continue;
+
+	      EXECUTE_IF_SET_IN_BITMAP (&dest_ri->incoming_open_chains_set,
+					0, k, bi2)
+		{
+		  struct du_head *incoming_chain = chain_from_id (k);
+
+		  if (incoming_chain->regno == chain->regno
+		      && incoming_chain->nregs == chain->nregs)
+		    {
+		      if (dump_file)
+			{
+			  if (!printed)
+			    fprintf (dump_file, "merging blocks for edge %d -> %d\n",
+				     e->src->index, e->dest->index);
+			  printed = true;
+			  fprintf (dump_file, "  merging chains %d (->%d) and %d (->%d) [%s]\n",
+				   k, incoming_chain->id, j, chain->id, 
+				   reg_names[incoming_chain->regno]);
+			}
+
+		      merge_chains (chain, incoming_chain);
+		      n_succs_joined++;
+		      break;
+		    }
+		}
+	    }
+	  if (n_succs_joined < n_succs_used)
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "cannot rename chain %d\n",
+			 j);
+	      chain->cannot_rename = 1;
+	    }
+	}
+    }
+
+  free (rename_info);
+
+  FOR_EACH_BB (bb)
+    bb->aux = NULL;
 }
 
 static void
@@ -470,8 +912,6 @@  do_replace (struct du_head *head, int re
   struct du_chain *chain;
   unsigned int base_regno = head->regno;
 
-  gcc_assert (! DEBUG_INSN_P (head->first->insn));
-
   for (chain = head->first; chain; chain = chain->next_use)
     {
       unsigned int regno = ORIGINAL_REGNO (*chain->loc);
@@ -494,19 +934,6 @@  do_replace (struct du_head *head, int re
 }
 
 
-/* Walk all chains starting with CHAINS and record that they conflict with
-   another chain whose id is ID.  */
-
-static void
-mark_conflict (struct du_head *chains, unsigned id)
-{
-  while (chains)
-    {
-      bitmap_set_bit (&chains->conflicts, id);
-      chains = chains->next_chain;
-    }
-}
-
 /* True if we found a register with a size mismatch, which means that we
    can't track its lifetime accurately.  If so, we abort the current block
    without renaming.  */
@@ -572,71 +999,6 @@  note_sets_clobbers (rtx x, const_rtx set
     add_to_hard_reg_set (&chain->hard_conflicts, GET_MODE (x), REGNO (x));
 }
 
-/* Create a new chain for THIS_NREGS registers starting at THIS_REGNO,
-   and record its occurrence in *LOC, which is being written to in INSN.
-   This access requires a register of class CL.  */
-
-static void
-create_new_chain (unsigned this_regno, unsigned this_nregs, rtx *loc,
-		  rtx insn, enum reg_class cl)
-{
-  struct du_head *head = XOBNEW (&rename_obstack, struct du_head);
-  struct du_chain *this_du;
-  int nregs;
-
-  head->next_chain = open_chains;
-  open_chains = head;
-  head->regno = this_regno;
-  head->nregs = this_nregs;
-  head->need_caller_save_reg = 0;
-  head->cannot_rename = 0;
-
-  VEC_safe_push (du_head_p, heap, id_to_chain, head);
-  head->id = current_id++;
-
-  bitmap_initialize (&head->conflicts, &bitmap_default_obstack);
-  bitmap_copy (&head->conflicts, &open_chains_set);
-  mark_conflict (open_chains, head->id);
-
-  /* Since we're tracking this as a chain now, remove it from the
-     list of conflicting live hard registers and track it in
-     live_in_chains instead.  */
-  nregs = head->nregs;
-  while (nregs-- > 0)
-    {
-      SET_HARD_REG_BIT (live_in_chains, head->regno + nregs);
-      CLEAR_HARD_REG_BIT (live_hard_regs, head->regno + nregs);
-    }
-
-  COPY_HARD_REG_SET (head->hard_conflicts, live_hard_regs);
-  bitmap_set_bit (&open_chains_set, head->id);
-
-  open_chains = head;
-
-  if (dump_file)
-    {
-      fprintf (dump_file, "Creating chain %s (%d)",
-	       reg_names[head->regno], head->id);
-      if (insn != NULL_RTX)
-	fprintf (dump_file, " at insn %d", INSN_UID (insn));
-      fprintf (dump_file, "\n");
-    }
-
-  if (insn == NULL_RTX)
-    {
-      head->first = head->last = NULL;
-      return;
-    }
-
-  this_du = XOBNEW (&rename_obstack, struct du_chain);
-  head->first = head->last = this_du;
-
-  this_du->next_use = 0;
-  this_du->loc = loc;
-  this_du->insn = insn;
-  this_du->cl = cl;
-}
-
 static void
 scan_rtx_reg (rtx insn, rtx *loc, enum reg_class cl, enum scan_actions action,
 	      enum op_type type)
@@ -645,7 +1007,7 @@  scan_rtx_reg (rtx insn, rtx *loc, enum r
   rtx x = *loc;
   enum machine_mode mode = GET_MODE (x);
   unsigned this_regno = REGNO (x);
-  unsigned this_nregs = hard_regno_nregs[this_regno][mode];
+  int this_nregs = hard_regno_nregs[this_regno][mode];
 
   if (action == mark_write)
     {
@@ -745,8 +1107,6 @@  scan_rtx_reg (rtx insn, rtx *loc, enum r
 
 	  if (subset && !superset)
 	    head->cannot_rename = 1;
-	  head->next_chain = closed_chains;
-	  closed_chains = head;
 	  bitmap_clear_bit (&open_chains_set, head->id);
 
 	  nregs = head->nregs;
@@ -1132,28 +1492,14 @@  record_out_operands (rtx insn, bool earl
 
 /* Build def/use chain.  */
 
-static struct du_head *
+static bool
 build_def_use (basic_block bb)
 {
   rtx insn;
-  df_ref *def_rec;
   unsigned HOST_WIDE_INT untracked_operands;
 
-  open_chains = closed_chains = NULL;
-
   fail_current_block = false;
 
-  current_id = 0;
-  bitmap_initialize (&open_chains_set, &bitmap_default_obstack);
-  CLEAR_HARD_REG_SET (live_in_chains);
-  REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_in (bb));
-  for (def_rec = df_get_artificial_defs (bb->index); *def_rec; def_rec++)
-    {
-      df_ref def = *def_rec;
-      if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
-	SET_HARD_REG_BIT (live_hard_regs, DF_REF_REGNO (def));
-    }
-
   for (insn = BB_HEAD (bb); ; insn = NEXT_INSN (insn))
     {
       if (NONDEBUG_INSN_P (insn))
@@ -1392,14 +1738,32 @@  build_def_use (basic_block bb)
 	break;
     }
 
-  bitmap_clear (&open_chains_set);
-
   if (fail_current_block)
-    return NULL;
+    return false;
+
+  return true;
+}
+
+/* Perform register renaming on the current function.  */
+
+static unsigned int
+regrename_optimize (void)
+{
+  df_set_flags (DF_LR_RUN_DCE);
+  df_note_add_problem ();
+  df_analyze ();
+  df_set_flags (DF_DEFER_INSN_RESCAN);
+
+  gcc_obstack_init (&rename_obstack);
 
-  /* Since we close every chain when we find a REG_DEAD note, anything that
-     is still open lives past the basic block, so it can't be renamed.  */
-  return closed_chains;
+  regrename_analyze ();
+
+  rename_chains ();
+
+  free_chain_data ();
+  obstack_free (&rename_obstack, NULL);
+
+  return 0;
 }
 
 static bool
Index: trunk/gcc/regs.h
===================================================================
--- trunk.orig/gcc/regs.h
+++ trunk/gcc/regs.h
@@ -397,4 +397,48 @@  overlaps_hard_reg_set_p (const HARD_REG_
   return false;
 }
 
+/* Like add_to_hard_reg_set, but use a REGNO/NREGS range instead of
+   REGNO and MODE.  */
+
+static inline void
+add_range_to_hard_reg_set (HARD_REG_SET *regs, unsigned int regno,
+			   int nregs)
+{
+  while (nregs-- > 0)
+    SET_HARD_REG_BIT (*regs, regno + nregs);
+}
+
+/* Likewise, but remove the registers.  */
+
+static inline void
+remove_range_from_hard_reg_set (HARD_REG_SET *regs, unsigned int regno,
+				int nregs)
+{
+  while (nregs-- > 0)
+    CLEAR_HARD_REG_BIT (*regs, regno + nregs);
+}
+
+/* Like overlaps_hard_reg_set_p, but use a REGNO/NREGS range instead of
+   REGNO and MODE.  */
+static inline bool
+range_overlaps_hard_reg_set_p (const HARD_REG_SET set, unsigned regno,
+			       int nregs)
+{
+  while (nregs-- > 0)
+    if (TEST_HARD_REG_BIT (set, regno + nregs))
+      return true;
+  return false;
+}
+
+/* Like in_hard_reg_set_p, but use a REGNO/NREGS range instead of
+   REGNO and MODE.  */
+static inline bool
+range_in_hard_reg_set_p (const HARD_REG_SET set, unsigned regno, int nregs)
+{
+  while (nregs-- > 0)
+    if (!TEST_HARD_REG_BIT (set, regno + nregs))
+      return false;
+  return true;
+}
+
 #endif /* GCC_REGS_H */