diff mbox

Rename across basic block boundaries

Message ID 4E569522.7080008@codesourcery.com
State New
Headers show

Commit Message

Bernd Schmidt Aug. 25, 2011, 6:32 p.m. UTC
On 08/24/11 13:12, Richard Sandiford wrote:
> Sorry, I'm find this a bit tough to review.  Could you provide some
> overview comments somewhere to say what the new algorithm is?
> The comment at the head of regrename.c still describes the current
> bb-local algorithm.

New patch below, with extra comments.  Let me know if more are needed.

> One thing though:
> 
> Bernd Schmidt <bernds@codesourcery.com> writes:
>> @@ -215,8 +306,9 @@ merge_overlapping_regs (HARD_REG_SET *ps
>>    IOR_HARD_REG_SET (*pset, head->hard_conflicts);
>>    EXECUTE_IF_SET_IN_BITMAP (&head->conflicts, 0, i, bi)
>>      {
>> -      du_head_p other = VEC_index (du_head_p, id_to_chain, i);
>> +      du_head_p other = chain_from_id (i);
>>        unsigned j = other->nregs;
>> +      gcc_assert (other != head);
>>        while (j-- > 0)
>>  	SET_HARD_REG_BIT (*pset, other->regno + j);
>>      }
> 
> Is this effectively cubic in the number of chains?  There are O(chains)
> calls to merge_overlapping_regs, O(chains) nodes in the conflicts bitmap,
> and chain_from_id chases O(chains) merges.

I've made chain_from_id store its final result in the original chain, so
it'll take O(chains) only once per chain.

Bootstrapped and tested on i686-linux (with rr enabled at O2). I've also
redone performance tests with a popular embedded benchmark on C6X; some
fluctuations around +/-5%, and 20% improvement on one benchmark.


Bernd
* regrename.c (struct du_head): Make nregs signed.
	(scan_rtx_reg, scan_rtx_address, dump_def_use_chain): Remove
	declarations.
	(closed_chains): Remove.
	(chain_from_id): New static function.
	(dump_def_use_chain): Change argument to be an int, indicating
	the first ID to print.  All callers changed.
	(mark_conflict, create_new_chain): Move upwards in the file.
	(merge_overlapping_regs): Use chain_from_id.  Assert that
	chains don't conflict with themselves.
	(rename_chains): Take no argument.  Iterate over id_to_chain
	rather to find chains to rename.  Clear tick before the main
	loop.
	(struct incoming_reg_info): New struct.
	(struct bb_rename_info): New struct.
	(init_rename_info, set_incoming_from_chain, merge_chains): New
	static functions.
	(regrename_analyze): New static function, broken out of
	regrename_optimize.  Record and make use of open chain information
	at basic block boundaries, and merge chains where possible.
	(scan_rtx_reg): Make this_nregs signed.  Don't update
	closed_chains.
	(build_def_use): Return a bool to indicate success.  All callers
	changed.  Don't initialize global data here.
	(regrename_optimize): Move most code out of here into
	regrename_analyze.
	* regs.h (add_range_to_hard_reg_set, remove_range_from_hard_reg_set,
	range_overlaps_hard_reg_set_p, range_in_hard_reg_set_p): New
	static inline functions.

Comments

Richard Sandiford Aug. 26, 2011, 12:57 p.m. UTC | #1
Rather than using global variables and then copying them into a bb
structure, would it be possible to write directly into the bb structure?
The answer's probably "no", just asking. :-)

Bernd Schmidt <bernds@codesourcery.com> writes:
> 	* regrename.c (struct du_head): Make nregs signed.
> 	(scan_rtx_reg, scan_rtx_address, dump_def_use_chain): Remove
> 	declarations.

This bit was split out.

> 	(mark_conflict, create_new_chain): Move upwards in the file.

Same here.  Should mention the change to create_new_chain's interface
though.

> -     2. For each chain, the set of possible renaming registers is computed.
> +     2. We try combine the local chains across basic block boundaries by
> +        comparing chains that were open at the start or end of a block to
> +	those in successor/predecessor blocks.

"try to combine"

> +/* Dump all def/use chains, starting at id FROM.  */
>  
>  static void
> -dump_def_use_chain (struct du_head *head)
> +dump_def_use_chain (int from)
>  {
> -  while (head)
> +  du_head_p head;
> +  int i;
> +  FOR_EACH_VEC_ELT (du_head_p, id_to_chain, i, head)
>      {
>        struct du_chain *this_du = head->first;
> +      if (i < from)
> +	continue;
>        fprintf (dump_file, "Register %s (%d):",
>  	       reg_names[head->regno], head->nregs);
>        while (this_du)

I know it's only a dumping function, but maybe this'd be a good excuse
to add:

#define FOR_EACH_VEC_ELT_FROM(T, V, I, P, FROM)		\
  for (I = (FROM); VEC_iterate (T, (V), (I), (P)); ++(I))

> +/* A structure recording information about each basic block.  It is saved
> +   and restored around basic block boundaries.  */
> +struct bb_rename_info

Probably worth saying here or elsewhere that the bb->aux field points
to this information and that (more importantly) the bb->aux is null
for blocks that could not be optimised, including the exit block.

> +/* Record in RI that the block corresponding to it has an incoming
> +   live value, described by CHAIN.  */
> +static void
> +set_incoming_from_chain (struct bb_rename_info *ri, du_head_p chain)
> +{
> +  int min_reg, max_reg, i;
> +  int incoming_nregs = ri->incoming[chain->regno].nregs;
> +  int nregs;
> +
> +  /* If we've recorded the same information before, everything is fine.  */
> +  if (incoming_nregs == chain->nregs)
> +    {
> +      if (dump_file)
> +	fprintf (dump_file, "reg %d/%d already recorded\n",
> +		 chain->regno, chain->nregs);
> +      return;
> +    }
> +
> +  /* If we have no information for any of the involved registers, update
> +     the incoming array.  */
> +  nregs = chain->nregs;
> +  while (nregs-- > 0)
> +    if (ri->incoming[chain->regno + nregs].nregs != 0
> +	|| ri->incoming[chain->regno + nregs].unusable)
> +      break;
> +  if (nregs < 0)
> +    {
> +      nregs = chain->nregs;
> +      ri->incoming[chain->regno].nregs = nregs;
> +      while (nregs-- > 1)
> +	ri->incoming[chain->regno + nregs].nregs = -nregs;
> +      if (dump_file)
> +	fprintf (dump_file, "recorded reg %d/%d\n",
> +		 chain->regno, chain->nregs);
> +      return;
> +    }
> +
> +  /* There must be some kind of conflict.  Set the unusable for all
> +     overlapping registers.  */
> +  min_reg = chain->regno;
> +  if (incoming_nregs < 0)
> +    min_reg += incoming_nregs;
> +  max_reg = chain->regno + chain->nregs;
> +  for (i = min_reg; i < max_reg; i++)
> +    ri->incoming[i].unusable = true;

In the incoming_nregs < 0 case, we only need to set
ri->incoming[chain->regno + incoming_nregs] itself, right,
not the other registers between that and ri->incoming[chain->regno]?
If so, I think it'd be clearer to have:

  /* There must be some kind of conflict.  Prevent both the old and
     new ranges from being used.  */
  if (incoming_nregs < 0)
    ri->incoming[chain->regno + incoming_nregs].unusable = true;
  for (i = 0; i < chain->nregs; i++)
    ri->incoming[chain->regno + i].unusable = true;

When I first looked at the code, I was wondering why we changed every
register in (chain->regno + incoming_nregs, chain_regno), but none in
[chain->regno + chain->nregs, OLD_END).  Seems like we should do neither
(as in the above suggestion) or both.

> +  /* Process all basic blocks by using a worklist, adding unvisited successor
> +     blocks whenever we reach the end of one basic blocks.  This ensures that
> +     whenever possible, we only process a block after at least one of its
> +     predecessors, which provides a "seeding" effect to make the logic in
> +     set_incoming_from_chain and init_rename_info useful.  */

Wouldn't a reverse post-order (inverted_post_order_compute) allow even
more pre-opening (as well as being less code)?

Looked good to me otherwise.

Richard
diff mbox

Patch

Index: regrename.c
===================================================================
--- regrename.c	(revision 178065)
+++ regrename.c	(working copy)
@@ -47,18 +47,24 @@ 
 
      1. Local def/use chains are built: within each basic block, chains are
 	opened and closed; if a chain isn't closed at the end of the block,
-	it is dropped.
+	it is dropped.  We pre-open chains if we have already examined a
+	predecessor block and found chains live at the end which match
+	live registers at the start of the new block.
 
-     2. For each chain, the set of possible renaming registers is computed.
+     2. We try combine the local chains across basic block boundaries by
+        comparing chains that were open at the start or end of a block to
+	those in successor/predecessor blocks.
+
+     3. For each chain, the set of possible renaming registers is computed.
 	This takes into account the renaming of previously processed chains.
 	Optionally, a preferred class is computed for the renaming register.
 
-     3. The best renaming register is computed for the chain in the above set,
+     4. The best renaming register is computed for the chain in the above set,
 	using a round-robin allocation.  If a preferred class exists, then the
 	round-robin allocation is done within the class first, if possible.
 	The round-robin allocation of renaming registers itself is global.
 
-     4. If a renaming register has been found, it is substituted in the chain.
+     5. If a renaming register has been found, it is substituted in the chain.
 
   Targets can parameterize the pass by specifying a preferred class for the
   renaming register for a given (super)class of registers to be renamed.  */
@@ -75,8 +81,9 @@  struct du_head
   struct du_head *next_chain;
   /* The first and last elements of this chain.  */
   struct du_chain *first, *last;
-  /* Describes the register being tracked.  */
-  unsigned regno, nregs;
+  /* Describe the register being tracked, register number and count.  */
+  unsigned regno;
+  int nregs;
 
   /* A unique id to be used as an index into the conflicts bitmaps.  */
   unsigned id;
@@ -140,6 +147,7 @@  static struct obstack rename_obstack;
 static void do_replace (struct du_head *, int);
 static void scan_rtx (rtx, rtx *, enum reg_class, enum scan_actions,
 		      enum op_type);
+static bool build_def_use (basic_block);
 
 typedef struct du_head *du_head_p;
 DEF_VEC_P (du_head_p);
@@ -151,9 +159,8 @@  static unsigned current_id;
 /* A mapping of unique id numbers to chains.  */
 static VEC(du_head_p, heap) *id_to_chain;
 
-/* List of currently open chains, and closed chains that can be renamed.  */
+/* List of currently open chains.  */
 static struct du_head *open_chains;
-static struct du_head *closed_chains;
 
 /* Bitmap of open chains.  The bits set always match the list found in
    open_chains.  */
@@ -166,14 +173,34 @@  static HARD_REG_SET live_in_chains;
    between this and live_in_chains is empty.  */
 static HARD_REG_SET live_hard_regs;
 
-/* Dump all def/use chains in CHAINS to DUMP_FILE.  */
+/* Return the chain corresponding to id number ID.  Take into account that
+   chains may have been merged.  */
+static du_head_p
+chain_from_id (unsigned int id)
+{
+  du_head_p first_chain = VEC_index (du_head_p, id_to_chain, id);
+  du_head_p chain = first_chain;
+  while (chain->id != id)
+    {
+      id = chain->id;
+      chain = VEC_index (du_head_p, id_to_chain, id);
+    }
+  first_chain->id = id;
+  return chain;
+}
+
+/* Dump all def/use chains, starting at id FROM.  */
 
 static void
-dump_def_use_chain (struct du_head *head)
+dump_def_use_chain (int from)
 {
-  while (head)
+  du_head_p head;
+  int i;
+  FOR_EACH_VEC_ELT (du_head_p, id_to_chain, i, head)
     {
       struct du_chain *this_du = head->first;
+      if (i < from)
+	continue;
       fprintf (dump_file, "Register %s (%d):",
 	       reg_names[head->regno], head->nregs);
       while (this_du)
@@ -215,7 +242,7 @@  mark_conflict (struct du_head *chains, u
    and record its occurrence in *LOC, which is being written to in INSN.
    This access requires a register of class CL.  */
 
-static void
+static du_head_p
 create_new_chain (unsigned this_regno, unsigned this_nregs, rtx *loc,
 		  rtx insn, enum reg_class cl)
 {
@@ -224,7 +251,6 @@  create_new_chain (unsigned this_regno, u
   int nregs;
 
   head->next_chain = open_chains;
-  open_chains = head;
   head->regno = this_regno;
   head->nregs = this_nregs;
   head->need_caller_save_reg = 0;
@@ -264,7 +290,7 @@  create_new_chain (unsigned this_regno, u
   if (insn == NULL_RTX)
     {
       head->first = head->last = NULL;
-      return;
+      return head;
     }
 
   this_du = XOBNEW (&rename_obstack, struct du_chain);
@@ -274,6 +300,7 @@  create_new_chain (unsigned this_regno, u
   this_du->loc = loc;
   this_du->insn = insn;
   this_du->cl = cl;
+  return head;
 }
 
 /* For a def-use chain HEAD, find which registers overlap its lifetime and
@@ -287,8 +314,9 @@  merge_overlapping_regs (HARD_REG_SET *ps
   IOR_HARD_REG_SET (*pset, head->hard_conflicts);
   EXECUTE_IF_SET_IN_BITMAP (&head->conflicts, 0, i, bi)
     {
-      du_head_p other = VEC_index (du_head_p, id_to_chain, i);
+      du_head_p other = chain_from_id (i);
       unsigned j = other->nregs;
+      gcc_assert (other != head);
       while (j-- > 0)
 	SET_HARD_REG_BIT (*pset, other->regno + j);
     }
@@ -341,12 +369,15 @@  check_new_reg_p (int reg ATTRIBUTE_UNUSE
   return true;
 }
 
-/* Process the closed chains starting with ALL_CHAINS and rename
-   registers if possible.  */
+/* Perform register renaming on the current function.  */
 static void
-rename_chains (du_head_p all_chains)
+rename_chains (void)
 {
   HARD_REG_SET unavailable;
+  du_head_p this_head;
+  int i;
+
+  memset (tick, 0, sizeof tick);
 
   CLEAR_HARD_REG_SET (unavailable);
   /* Don't clobber traceback for noreturn functions.  */
@@ -358,11 +389,10 @@  rename_chains (du_head_p all_chains)
 #endif
     }
 
-  while (all_chains)
+  FOR_EACH_VEC_ELT (du_head_p, id_to_chain, i, this_head)
     {
       int new_reg, best_new_reg, best_nregs;
       int n_uses;
-      struct du_head *this_head = all_chains;
       struct du_chain *tmp;
       HARD_REG_SET this_unavailable;
       int reg = this_head->regno;
@@ -371,8 +401,6 @@  rename_chains (du_head_p all_chains)
       enum reg_class preferred_class;
       bool has_preferred_class;
 
-      all_chains = this_head->next_chain;
-
       if (this_head->cannot_rename)
 	continue;
 
@@ -488,14 +516,453 @@  rename_chains (du_head_p all_chains)
     }
 }
 
+/* A structure to record information for each hard register at the start of
+   a basic block.  */
+struct incoming_reg_info {
+  /* Holds the number of registers used in the chain that gave us information
+     about this register.  Zero means no information known yet, while a
+     negative value is used for something that is part of, but not the first
+     register in a multi-register value.  */
+  int nregs;
+  /* Set to true if we have accesses that conflict in the number of registers
+     used.  */
+  bool unusable;
+};
+
+/* A structure recording information about each basic block.  It is saved
+   and restored around basic block boundaries.  */
+struct bb_rename_info
+{
+  /* The basic block corresponding to this structure.  */
+  basic_block bb;
+  /* Copies of the global information.  */
+  bitmap_head open_chains_set;
+  bitmap_head incoming_open_chains_set;
+  unsigned n_preds;
+  struct incoming_reg_info incoming[FIRST_PSEUDO_REGISTER];
+};
+
+/* Initialize a rename_info structure P for basic block BB, which starts a new
+   scan.  */
+static void
+init_rename_info (struct bb_rename_info *p, basic_block bb)
+{
+  int i;
+  df_ref *def_rec;
+  HARD_REG_SET start_chains_set;
+
+  p->bb = bb;
+  bitmap_initialize (&p->open_chains_set, &bitmap_default_obstack);
+  bitmap_initialize (&p->incoming_open_chains_set, &bitmap_default_obstack);
+
+  open_chains = NULL;
+  bitmap_clear (&open_chains_set);
+
+  CLEAR_HARD_REG_SET (live_in_chains);
+  REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_in (bb));
+  for (def_rec = df_get_artificial_defs (bb->index); *def_rec; def_rec++)
+    {
+      df_ref def = *def_rec;
+      if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
+	SET_HARD_REG_BIT (live_hard_regs, DF_REF_REGNO (def));
+    }
+
+  /* Open chains based on information from (at least one) predecessor
+     block.  This gives us a chance later on to combine chains across
+     basic block boundaries.  Inconsistencies (in access sizes) will
+     be caught normally and dealt with conservatively by disabling the
+     chain for renaming, and there is no risk of losing optimization
+     opportunities by opening chains either: if we did not open the
+     chains, we'd have to track the live register as a hard reg, and
+     we'd be unable to rename it in any case.  */
+  CLEAR_HARD_REG_SET (start_chains_set);
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      struct incoming_reg_info *iri = p->incoming + i;
+      if (iri->nregs > 0 && !iri->unusable
+	  && range_in_hard_reg_set_p (live_hard_regs, i, iri->nregs))
+	{
+	  SET_HARD_REG_BIT (start_chains_set, i);
+	  remove_range_from_hard_reg_set (&live_hard_regs, i, iri->nregs);
+	}
+    }
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      struct incoming_reg_info *iri = p->incoming + i;
+      if (TEST_HARD_REG_BIT (start_chains_set, i))
+	{
+	  du_head_p chain;
+	  if (dump_file)
+	    fprintf (dump_file, "opening incoming chain\n");
+	  chain = create_new_chain (i, iri->nregs, NULL, NULL_RTX, NO_REGS);
+	  bitmap_set_bit (&p->incoming_open_chains_set, chain->id);
+	}
+    }
+}
+
+/* Record in RI that the block corresponding to it has an incoming
+   live value, described by CHAIN.  */
+static void
+set_incoming_from_chain (struct bb_rename_info *ri, du_head_p chain)
+{
+  int min_reg, max_reg, i;
+  int incoming_nregs = ri->incoming[chain->regno].nregs;
+  int nregs;
+
+  /* If we've recorded the same information before, everything is fine.  */
+  if (incoming_nregs == chain->nregs)
+    {
+      if (dump_file)
+	fprintf (dump_file, "reg %d/%d already recorded\n",
+		 chain->regno, chain->nregs);
+      return;
+    }
+
+  /* If we have no information for any of the involved registers, update
+     the incoming array.  */
+  nregs = chain->nregs;
+  while (nregs-- > 0)
+    if (ri->incoming[chain->regno + nregs].nregs != 0
+	|| ri->incoming[chain->regno + nregs].unusable)
+      break;
+  if (nregs < 0)
+    {
+      nregs = chain->nregs;
+      ri->incoming[chain->regno].nregs = nregs;
+      while (nregs-- > 1)
+	ri->incoming[chain->regno + nregs].nregs = -nregs;
+      if (dump_file)
+	fprintf (dump_file, "recorded reg %d/%d\n",
+		 chain->regno, chain->nregs);
+      return;
+    }
+
+  /* There must be some kind of conflict.  Set the unusable for all
+     overlapping registers.  */
+  min_reg = chain->regno;
+  if (incoming_nregs < 0)
+    min_reg += incoming_nregs;
+  max_reg = chain->regno + chain->nregs;
+  for (i = min_reg; i < max_reg; i++)
+    ri->incoming[i].unusable = true;
+}
+
+/* Merge the two chains C1 and C2 so that all conflict information is
+   recorded and C1, and the id of C2 is changed to that of C1.  */
+static void
+merge_chains (du_head_p c1, du_head_p c2)
+{
+  if (c1 == c2)
+    return;
+
+  if (c2->first != NULL)
+    {
+      if (c1->first == NULL)
+	c1->first = c2->first;
+      else
+	c1->last->next_use = c2->first;
+      c1->last = c2->last;
+    }
+
+  c2->first = c2->last = NULL;
+  c2->id = c1->id;
+
+  IOR_HARD_REG_SET (c1->hard_conflicts, c2->hard_conflicts);
+  bitmap_ior_into (&c1->conflicts, &c2->conflicts);
+
+  c1->need_caller_save_reg |= c2->need_caller_save_reg;
+  c1->cannot_rename |= c2->cannot_rename;
+}
+
+/* Analyze the current function and build chains for renaming.  */
+
+static void
+regrename_analyze (void)
+{
+  struct bb_rename_info *rename_info;
+  int i, ri_index, n_processed;
+  basic_block bb;
+  bitmap_head processed_bbs;
+  VEC (basic_block, heap) *bb_queue;
+
+  /* Gather some information about the blocks in this function.  */
+  rename_info = XCNEWVEC (struct bb_rename_info, n_basic_blocks);
+  ri_index = 0;
+  FOR_EACH_BB (bb)
+    {
+      struct bb_rename_info *ri = rename_info + ri_index;
+      ri->bb = bb;
+      ri->n_preds = EDGE_COUNT (bb->preds);
+      bb->aux = ri;
+      ri_index++;
+    }
+
+  current_id = 0;
+  id_to_chain = VEC_alloc (du_head_p, heap, 0);
+  bitmap_initialize (&open_chains_set, &bitmap_default_obstack);
+
+  /* Process all basic blocks by using a worklist, adding unvisited successor
+     blocks whenever we reach the end of one basic blocks.  This ensures that
+     whenever possible, we only process a block after at least one of its
+     predecessors, which provides a "seeding" effect to make the logic in
+     set_incoming_from_chain and init_rename_info useful.  */
+
+  bb_queue = VEC_alloc (basic_block, heap, 0);
+
+  n_processed = 0;
+  bitmap_initialize (&processed_bbs, &bitmap_default_obstack);
+
+  VEC_safe_push (basic_block, heap, bb_queue, single_succ (ENTRY_BLOCK_PTR));
+
+  while (!VEC_empty (basic_block, bb_queue))
+    {
+      bool success;
+      edge e;
+      edge_iterator ei;
+      basic_block bb1 = VEC_pop (basic_block, bb_queue);
+      struct bb_rename_info *this_info = (struct bb_rename_info *)bb1->aux;
+      int old_length = VEC_length (du_head_p, id_to_chain);
+
+      if (dump_file)
+	fprintf (dump_file, "\nprocessing block %d:\n", bb1->index);
+
+      bitmap_set_bit (&processed_bbs, bb1->index);
+      n_processed++;
+      init_rename_info (this_info, bb1);
+
+      success = build_def_use (bb1);
+      if (success)
+	{
+	  if (dump_file)
+	    dump_def_use_chain (old_length);
+	  bitmap_copy (&this_info->open_chains_set, &open_chains_set);
+	}
+      else
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "failed\n");
+	  bb1->aux = NULL;
+	  VEC_truncate (du_head_p, id_to_chain, old_length);
+	  current_id = old_length;
+	  bitmap_clear (&this_info->incoming_open_chains_set);
+	  open_chains = NULL;
+	}
+
+      /* Add successor blocks to the worklist if necessary, and record
+	 data about our own open chains at the end of this block, which
+	 will be used to pre-open chains when processing the successors.  */
+      FOR_EACH_EDGE (e, ei, bb1->succs)
+	{
+	  struct bb_rename_info *dest_ri;
+	  struct du_head *chain;
+
+	  if (dump_file)
+	    fprintf (dump_file, "successor block %d\n", e->dest->index);
+
+	  if (e->flags & (EDGE_EH | EDGE_ABNORMAL))
+	    continue;
+	  dest_ri = (struct bb_rename_info *)e->dest->aux;
+	  if (dest_ri == NULL)
+	    continue;
+	  gcc_assert (dest_ri->n_preds > 0);
+	  dest_ri->n_preds--;
+	  if (dest_ri->n_preds == 0
+	      && !bitmap_bit_p (&processed_bbs, dest_ri->bb->index))
+	    VEC_safe_push (basic_block, heap, bb_queue, e->dest);
+	  if (success)
+	    for (chain = open_chains; chain; chain = chain->next_chain)
+	      set_incoming_from_chain (dest_ri, chain);
+	}
+
+      if (VEC_empty (basic_block, bb_queue)
+	  && n_processed < ri_index)
+	{
+	  basic_block best = NULL;
+	  FOR_EACH_BB (bb)
+	    {
+	      struct bb_rename_info *ri = (struct bb_rename_info *)bb->aux;
+	      if (!bitmap_bit_p (&processed_bbs, bb->index)
+		  && (best == 0
+		      || ri->n_preds != EDGE_COUNT (bb->preds)))
+		best = bb;
+	    }
+	  if (dump_file)
+	    fprintf (dump_file, "\npicking new start block %d\n", best->index);
+	  VEC_safe_push (basic_block, heap, bb_queue, best);
+	}
+    }
+  bitmap_clear (&processed_bbs);
+  VEC_free (basic_block, heap, bb_queue);
+
+  /* Now, combine the chains data we have gathered across basic block
+     boundaries.
+
+     For every basic block, there may be chains open at the start, or at the
+     end.  Rather than exclude them from renaming, we look for open chains
+     with matching registers at the other side of the CFG edge.
+
+     For a given chain using register R, open at the start of block B, we
+     must find an open chain using R on the other side of every edge leading
+     to B, if the register is live across this edge.  In the code below,
+     N_PREDS_USED counts the number of edges where the register is live, and
+     N_PREDS_JOINED counts those where we found an appropriate chain for
+     joining.
+
+     We perform the analysis for both incoming and outgoing edges, but we
+     only need to merge once (in the second part, after verifying outgoing
+     edges).  */
+  for (i = 0; i < ri_index; i++)
+    {
+      struct bb_rename_info *bb_ri = rename_info + i;
+      unsigned j;
+      bitmap_iterator bi;
+
+      if (bb_ri->bb->aux == NULL)
+	continue;
+
+      if (dump_file)
+	fprintf (dump_file, "processing bb %d in edges\n", bb_ri->bb->index);
+
+      EXECUTE_IF_SET_IN_BITMAP (&bb_ri->incoming_open_chains_set, 0, j, bi)
+	{
+	  edge e;
+	  edge_iterator ei;
+	  struct du_head *chain = chain_from_id (j);
+	  int n_preds_used = 0, n_preds_joined = 0;
+
+	  FOR_EACH_EDGE (e, ei, bb_ri->bb->preds)
+	    {
+	      struct bb_rename_info *src_ri;
+	      unsigned k;
+	      bitmap_iterator bi2;
+	      HARD_REG_SET live;
+	      bool success = false;
+
+	      REG_SET_TO_HARD_REG_SET (live, df_get_live_out (e->src));
+	      if (!range_overlaps_hard_reg_set_p (live, chain->regno,
+						  chain->nregs))
+		continue;
+	      n_preds_used++;
+
+	      if (e->flags & (EDGE_EH | EDGE_ABNORMAL))
+		continue;
+
+	      src_ri = (struct bb_rename_info *)e->src->aux;
+	      if (src_ri == NULL)
+		continue;
+
+	      EXECUTE_IF_SET_IN_BITMAP (&src_ri->open_chains_set,
+					0, k, bi2)
+		{
+		  struct du_head *outgoing_chain = chain_from_id (k);
+
+		  if (outgoing_chain->regno == chain->regno
+		      && outgoing_chain->nregs == chain->nregs)
+		    {
+		      n_preds_joined++;
+		      success = true;
+		      break;
+		    }
+		}
+	      if (!success && dump_file)
+		fprintf (dump_file, "failure to match with pred block %d\n",
+			 e->src->index);
+	    }
+	  if (n_preds_joined < n_preds_used)
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "cannot rename chain %d\n", j);
+	      chain->cannot_rename = 1;
+	    }
+	}
+    }
+  for (i = 0; i < ri_index; i++)
+    {
+      struct bb_rename_info *bb_ri = rename_info + i;
+      unsigned j;
+      bitmap_iterator bi;
+
+      if (bb_ri->bb->aux == NULL)
+	continue;
+
+      if (dump_file)
+	fprintf (dump_file, "processing bb %d out edges\n", bb_ri->bb->index);
+
+      EXECUTE_IF_SET_IN_BITMAP (&bb_ri->open_chains_set, 0, j, bi)
+	{
+	  edge e;
+	  edge_iterator ei;
+	  struct du_head *chain = chain_from_id (j);
+	  int n_succs_used = 0, n_succs_joined = 0;
+
+	  FOR_EACH_EDGE (e, ei, bb_ri->bb->succs)
+	    {
+	      bool printed = false;
+	      struct bb_rename_info *dest_ri;
+	      unsigned k;
+	      bitmap_iterator bi2;
+	      HARD_REG_SET live;
+
+	      REG_SET_TO_HARD_REG_SET (live, df_get_live_in (e->dest));
+	      if (!range_overlaps_hard_reg_set_p (live, chain->regno,
+						  chain->nregs))
+		continue;
+	      
+	      n_succs_used++;
+
+	      dest_ri = (struct bb_rename_info *)e->dest->aux;
+	      if (dest_ri == NULL)
+		continue;
+
+	      EXECUTE_IF_SET_IN_BITMAP (&dest_ri->incoming_open_chains_set,
+					0, k, bi2)
+		{
+		  struct du_head *incoming_chain = chain_from_id (k);
+
+		  if (incoming_chain->regno == chain->regno
+		      && incoming_chain->nregs == chain->nregs)
+		    {
+		      if (dump_file)
+			{
+			  if (!printed)
+			    fprintf (dump_file,
+				     "merging blocks for edge %d -> %d\n",
+				     e->src->index, e->dest->index);
+			  printed = true;
+			  fprintf (dump_file,
+				   "  merging chains %d (->%d) and %d (->%d) [%s]\n",
+				   k, incoming_chain->id, j, chain->id, 
+				   reg_names[incoming_chain->regno]);
+			}
+
+		      merge_chains (chain, incoming_chain);
+		      n_succs_joined++;
+		      break;
+		    }
+		}
+	    }
+	  if (n_succs_joined < n_succs_used)
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "cannot rename chain %d\n",
+			 j);
+	      chain->cannot_rename = 1;
+	    }
+	}
+    }
+
+  free (rename_info);
+
+  FOR_EACH_BB (bb)
+    bb->aux = NULL;
+}
+
 static void
 do_replace (struct du_head *head, int reg)
 {
   struct du_chain *chain;
   unsigned int base_regno = head->regno;
 
-  gcc_assert (! DEBUG_INSN_P (head->first->insn));
-
   for (chain = head->first; chain; chain = chain->next_use)
     {
       unsigned int regno = ORIGINAL_REGNO (*chain->loc);
@@ -591,7 +1058,7 @@  scan_rtx_reg (rtx insn, rtx *loc, enum r
   rtx x = *loc;
   enum machine_mode mode = GET_MODE (x);
   unsigned this_regno = REGNO (x);
-  unsigned this_nregs = hard_regno_nregs[this_regno][mode];
+  int this_nregs = hard_regno_nregs[this_regno][mode];
 
   if (action == mark_write)
     {
@@ -691,8 +1158,6 @@  scan_rtx_reg (rtx insn, rtx *loc, enum r
 
 	  if (subset && !superset)
 	    head->cannot_rename = 1;
-	  head->next_chain = closed_chains;
-	  closed_chains = head;
 	  bitmap_clear_bit (&open_chains_set, head->id);
 
 	  nregs = head->nregs;
@@ -1078,28 +1543,14 @@  record_out_operands (rtx insn, bool earl
 
 /* Build def/use chain.  */
 
-static struct du_head *
+static bool
 build_def_use (basic_block bb)
 {
   rtx insn;
-  df_ref *def_rec;
   unsigned HOST_WIDE_INT untracked_operands;
 
-  open_chains = closed_chains = NULL;
-
   fail_current_block = false;
 
-  current_id = 0;
-  bitmap_initialize (&open_chains_set, &bitmap_default_obstack);
-  CLEAR_HARD_REG_SET (live_in_chains);
-  REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_in (bb));
-  for (def_rec = df_get_artificial_defs (bb->index); *def_rec; def_rec++)
-    {
-      df_ref def = *def_rec;
-      if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
-	SET_HARD_REG_BIT (live_hard_regs, DF_REF_REGNO (def));
-    }
-
   for (insn = BB_HEAD (bb); ; insn = NEXT_INSN (insn))
     {
       if (NONDEBUG_INSN_P (insn))
@@ -1338,14 +1789,10 @@  build_def_use (basic_block bb)
 	break;
     }
 
-  bitmap_clear (&open_chains_set);
-
   if (fail_current_block)
-    return NULL;
+    return false;
 
-  /* Since we close every chain when we find a REG_DEAD note, anything that
-     is still open lives past the basic block, so it can't be renamed.  */
-  return closed_chains;
+  return true;
 }
 
 /* Perform register renaming on the current function.  */
@@ -1353,44 +1800,20 @@  build_def_use (basic_block bb)
 static unsigned int
 regrename_optimize (void)
 {
-  basic_block bb;
-  char *first_obj;
-
   df_set_flags (DF_LR_RUN_DCE);
   df_note_add_problem ();
   df_analyze ();
   df_set_flags (DF_DEFER_INSN_RESCAN);
 
-  memset (tick, 0, sizeof tick);
-
   gcc_obstack_init (&rename_obstack);
-  first_obj = XOBNEWVAR (&rename_obstack, char, 0);
 
-  FOR_EACH_BB (bb)
-    {
-      struct du_head *all_chains = 0;
-
-      id_to_chain = VEC_alloc (du_head_p, heap, 0);
-
-      if (dump_file)
-	fprintf (dump_file, "\nBasic block %d:\n", bb->index);
-
-      all_chains = build_def_use (bb);
-
-      if (dump_file)
-	dump_def_use_chain (all_chains);
-
-      rename_chains (all_chains);
+  regrename_analyze ();
 
-      free_chain_data ();
-      obstack_free (&rename_obstack, first_obj);
-    }
+  rename_chains ();
 
+  free_chain_data ();
   obstack_free (&rename_obstack, NULL);
 
-  if (dump_file)
-    fputc ('\n', dump_file);
-
   return 0;
 }
 
Index: regs.h
===================================================================
--- regs.h	(revision 178030)
+++ regs.h	(working copy)
@@ -397,4 +397,48 @@  overlaps_hard_reg_set_p (const HARD_REG_
   return false;
 }
 
+/* Like add_to_hard_reg_set, but use a REGNO/NREGS range instead of
+   REGNO and MODE.  */
+
+static inline void
+add_range_to_hard_reg_set (HARD_REG_SET *regs, unsigned int regno,
+			   int nregs)
+{
+  while (nregs-- > 0)
+    SET_HARD_REG_BIT (*regs, regno + nregs);
+}
+
+/* Likewise, but remove the registers.  */
+
+static inline void
+remove_range_from_hard_reg_set (HARD_REG_SET *regs, unsigned int regno,
+				int nregs)
+{
+  while (nregs-- > 0)
+    CLEAR_HARD_REG_BIT (*regs, regno + nregs);
+}
+
+/* Like overlaps_hard_reg_set_p, but use a REGNO/NREGS range instead of
+   REGNO and MODE.  */
+static inline bool
+range_overlaps_hard_reg_set_p (const HARD_REG_SET set, unsigned regno,
+			       int nregs)
+{
+  while (nregs-- > 0)
+    if (TEST_HARD_REG_BIT (set, regno + nregs))
+      return true;
+  return false;
+}
+
+/* Like in_hard_reg_set_p, but use a REGNO/NREGS range instead of
+   REGNO and MODE.  */
+static inline bool
+range_in_hard_reg_set_p (const HARD_REG_SET set, unsigned regno, int nregs)
+{
+  while (nregs-- > 0)
+    if (!TEST_HARD_REG_BIT (set, regno + nregs))
+      return false;
+  return true;
+}
+
 #endif /* GCC_REGS_H */