Patchwork [Ping,0/3] New macro PREFERRED_RENAME_CLASS

login
register
mail settings
Submitter Yao Qi
Date Nov. 5, 2010, 3:35 p.m.
Message ID <4CD42437.8060404@codesourcery.com>
Download mbox | patch
Permalink /patch/70265/
State New
Headers show

Comments

Yao Qi - Nov. 5, 2010, 3:35 p.m.
On 11/05/2010 04:55 PM, Eric Botcazou wrote:
>> Patch 1: does everything except adding target hook
>> preferred_rename_class .  It is a separate improvement to register-rename.
>> http://gcc.gnu.org/ml/gcc-patches/2010-10/msg02197.html
>
> You didn't really inline the callback, did you?  If you want to keep the
> comparer function standalone, then remove the superfluous indirection.
>

Done.  Removed function pointer CMP, and use merge_sort_callback directly.

>
> @@ -51,6 +51,8 @@ struct du_head
>     struct du_head *next_chain;
>     /* The first and last elements of this chain.  */
>     struct du_chain *first, *last;
> +  /* The length of du_chain for non-debug insns.  */
> +  int length;
>     /* Describes the register being tracked.  */
>     unsigned regno, nregs;
>
> du_chain is a structure tag, so you need to elaborate.
>
>

Add one sentence in comment to explain it a little more, "The du_head 
linked list can be sorted by this, and register-rename can prefer 
register classes according to this order.".

>
> @@ -154,6 +156,141 @@ merge_overlapping_regs (HARD_REG_SET *pset, struct
> du_head *head)
>       }
>   }
>
> +/* Get the next LENGTH element in list from START.  If length of
> +   list from START is less than or equal to LENGTH, return the
> +   last element.  */
> +static struct du_head *
> +get_tail_du_head (struct du_head *start, int length)
> +{
> +  while (length--&&  start->next_chain != NULL)
> +      start = start->next_chain;
> +
> +  return start;
>
> Avoid TAIL and HEAD in the same function name.  GET_ELEMENT is clearer.  And
> LENGTH is a poor choice for the parameter name, use N instead.
>
> /* Return the Nth element in LIST.  If LIST contains less than N elements,
>     return the last one.  */
>
> static struct du_head *
> get_element (struct du_head *list, int n)
>
>

Fixed as you suggested.

>
> +/* Merge the first two sequences of CURRENT_LENGTH nodes in the
> +   linked list pointed by START_NODE.  Update START_NODE to point
> +   to the merged nodes, and return a pointer to the last merged
> +   node.   CMP is callback  for comparison.  */
> +
> +static struct du_head *
> +merge(struct du_head **start_node, int current_length,
> +      int (*cmp) (const struct du_head *, const struct du_head *))
>
> Why CURRENT_LENGTH?  Just use LENGTH.
>

Done.

> The first sentence is somewhat ambiguous as one might get the impression that
> the 2 sequences aren't already present in the list.
>
> /* Merge the first 2 sub-lists of LENGTH nodes contained in the linked list
>     pointed to by START_NODE.  Update...
>
> Document what happens when the linked list doesn't contain enough elements.
>
>

Done.

> +	  current_tail_node =
> +	    get_tail_du_head (current_sorted_node,
> +			      current_length - left_count);
>
> '=' on the next line
>
>

Done.

> +  return current_tail_node->next_chain != NULL ?
> +    current_tail_node : NULL;
>
> return (current_tail_node->next_chain ? current_tail_node : NULL);
>

Done.

>
> +/* Non-recursive merge sort to du_head list.  */
> +static void
> +sort_du_head (struct du_head **head,
> +	      int(*cmp)(const struct du_head *, const struct du_head *))
>
> /* Sort the linked list pointed to by HEAD.  The algorithm is a non-recursive
>     merge sort [that ...optional more detailed explanation...]
>
>

Done.  Add more detailed explanation in comments.

> +  do
> +    {
> +      struct du_head **segment = head;
> +
>
> Trailing spaces.
>

Which one is trailing space?

> +      while ((last_tail = merge(segment, current_length, cmp))
> +	     != NULL)
>
> Why breaking the line?
>

Fixed.

> +	{
> +	  segment =&last_tail->next_chain;
> +	}
>
> Superfluous curly braces.
>

Fixed.

>
> Are you sure that this revised implementation works as expected?  Won't it
> stop after the 1-length merge phase?
>

Ur, my first version is correct.  Revert it to my first version.

>
> @@ -265,13 +405,13 @@ regrename_optimize (void)
>
>   	  /* Now potential_regs is a reasonable approximation, let's
>   	     have a closer look at each register still in there.  */
> -	  for (new_reg = 0; new_reg<  FIRST_PSEUDO_REGISTER; new_reg++)
> +	  for (new_reg = FIRST_PSEUDO_REGISTER - 1; new_reg>= 0; new_reg--)
>
> What is this hunk for?  It isn't mentioned in the ChangeLog.
>

Yeah, we should explain it a little bit in comments.  I add this 
paragraph below in comments, and ChangeLog will be updated accordingly.
"Registers from high to low is iterated here, in order to 	     prefer 
low registers.  On some ports, low registers are 	     better than high 
registers in terms of final code size."
Eric Botcazou - Nov. 8, 2010, 12:34 p.m.
> Done.  Removed function pointer CMP, and use merge_sort_callback directly.

+/* Comparison function of merge sort.  Return true if A is less than
+   B, otherwise return false.  */
+static inline int 
+merge_sort_callback(const struct du_head *a, 
+		    const struct du_head *b)
+{
+  return a->length < b->length;
+}

It isn't really a callback anymore. :-)

There are trailing spaces after the "int" and the "*a,".  There are more of 
them throughout the patch.  Good editors have options to make them visible.

> Add one sentence in comment to explain it a little more, "The du_head
> linked list can be sorted by this, and register-rename can prefer
> register classes according to this order.".

Thanks.  Still "the length of du_chain" doesn't make much sense.  So I'd write
"The number of elements of this chain, excluding those corresponding to
 references of the register in debug insns."

> Fixed as you suggested.

Thanks.  Note that we generally have a blank line between the block comment of 
a function and the function itself.

+  /* Returns NULL if this pass of merge is done.  */
+  return (current_tail_node->next_chain ? current_tail_node : NULL);

No 's' at the end of "Return".

> Ur, my first version is correct.  Revert it to my first version.

Looks better indeed. :-)  No need to break a line if it doesn't overflow.

> Yeah, we should explain it a little bit in comments.  I add this
> paragraph below in comments, and ChangeLog will be updated accordingly.
> "Registers from high to low is iterated here, in order to 	     prefer
> low registers.  On some ports, low registers are 	     better than high
> registers in terms of final code size."

This can be different on other ports so you cannot change it unilaterally.

Why do you need this for the ARM?  Won't the hook already achieve this?

On the other hand, the hook seems overly aggressive: IIUC, on the ARM, it will 
prevent non-LO_REGS from being renaming registers for GENERAL_REGS.  Do we 
really want not to rename if all LO_REGS are already taken, instead of using 
an available non-LO_REGS reg?

Wouldn't it be better to add a more fine-grained hook, for example 

DEFHOOK
(preferred_renaming_class,
 "",
 reg_class_t,
 (unsigned int, reg_class_t),
 hook_regclasst_unsignedint_regclasst_null)

that would return the Nth preferred renaming (sub)class when passed an integer 
N (0 being the most preferred) and a class of registers?
Yao Qi - Nov. 8, 2010, 2:54 p.m.
On 11/08/2010 08:34 PM, Eric Botcazou wrote:
>> Done.  Removed function pointer CMP, and use merge_sort_callback directly.
> 
> +/* Comparison function of merge sort.  Return true if A is less than
> +   B, otherwise return false.  */
> +static inline int 
> +merge_sort_callback(const struct du_head *a, 
> +		    const struct du_head *b)
> +{
> +  return a->length < b->length;
> +}
> 
> It isn't really a callback anymore. :-)
> 
> There are trailing spaces after the "int" and the "*a,".  There are more of 
> them throughout the patch.  Good editors have options to make them visible.
> 

I noticed it when I installed whitespace.el in my emacs. :)  Thanks for
your suggestions.  I'll include my fixes as you pointed out in my next
patch.

>> Yeah, we should explain it a little bit in comments.  I add this
>> paragraph below in comments, and ChangeLog will be updated accordingly.
>> "Registers from high to low is iterated here, in order to 	     prefer
>> low registers.  On some ports, low registers are 	     better than high
>> registers in terms of final code size."
> 
> This can be different on other ports so you cannot change it unilaterally.
> 

Currently, we iterate registers from low to high, and resulting
"preferring" high registers in each iteration.  Generally speaking, low
registers are "better", at least not worse, than high register, in terms
of code size or other metrics.  Why can't we reverse the order of
iteration?  I don't see any ports do something specific to this
iteration order.  I run regression test on ARM, X86, X86_64, and don't
see any regressions if iteration order is reversed.  So I assume that it
is not harmful to reverse the iteration order to all ports, and useful
to some ports, not only to ARM.

> Why do you need this for the ARM?  Won't the hook already achieve this?
> 

The goal of this piece of work is to assign low registers to mostly used
registers, so that code size can be reduced.  There is similar feature
on X86_64, pointed out by HJ.  In order to achieve this, we need 1) a
target hook to prefer low register class, 2) iterate registers from high
to low.

> On the other hand, the hook seems overly aggressive: IIUC, on the ARM, it will 
> prevent non-LO_REGS from being renaming registers for GENERAL_REGS.  Do we 
> really want not to rename if all LO_REGS are already taken, instead of using 
> an available non-LO_REGS reg?

Yes, the situation you described is correct, but this situation is *not*
popular, because we've sorted du_head linked list, and assign mostly
used registers to low registers.  I can't give you a quantitative
measurements, but code size of some programs is reduced.

> Wouldn't it be better to add a more fine-grained hook, for example 
> 
> DEFHOOK
> (preferred_renaming_class,
>  "",
>  reg_class_t,
>  (unsigned int, reg_class_t),
>  hook_regclasst_unsignedint_regclasst_null)
> 
> that would return the Nth preferred renaming (sub)class when passed an integer 
> N (0 being the most preferred) and a class of registers?  

I thought of target hook like this before, but this can't solve the
issue of iteration order.  If different ports need different iteration
order, we have to provide a target hook for this.
Eric Botcazou - Nov. 9, 2010, 9:16 a.m.
> Currently, we iterate registers from low to high, and resulting
> "preferring" high registers in each iteration.  Generally speaking, low
> registers are "better", at least not worse, than high register, in terms
> of code size or other metrics.  Why can't we reverse the order of
> iteration?

Generally we avoid changing things if we cannot prove that there is a benefit 
in doing so.  The compiler supports many architectures and you never really 
know how a change behaves with each of them in particular.  Why would "low" 
registers be universally better than "high" registers?

> I don't see any ports do something specific to this iteration order.  I run
> regression test on ARM, X86, X86_64, and don't see any regressions if
> iteration order is reversed.  So I assume that it is not harmful to reverse
> the iteration order to all ports, and useful to some ports, not only to ARM.

Testing on a couple of architectures isn't really sufficient to conclude here 
in my opinion, so I wouldn't change this, unless specifically requested by 
the presence of the hook.

> The goal of this piece of work is to assign low registers to mostly used
> registers, so that code size can be reduced.  There is similar feature
> on X86_64, pointed out by HJ.  In order to achieve this, we need 1) a
> target hook to prefer low register class, 2) iterate registers from high
> to low.

OK, but AFAICS the target hook as currently written isn't a "preference" thing 
but an "exclusion" thing, i.e. all the non-LO_REGS will be excluded and not 
only disparaged for GENERAL_REGS.  So, once non-LO_REGS are excluded, why 
changing the iteration order?  Does the iteration order within the LO_REGS 
class matter as well for the ARM?

> Yes, the situation you described is correct, but this situation is *not*
> popular, because we've sorted du_head linked list, and assign mostly
> used registers to low registers.  I can't give you a quantitative
> measurements, but code size of some programs is reduced.

OK, on the ARM, but what about the 20 other architectures that may want to 
paramerize the pass too?

> I thought of target hook like this before, but this can't solve the
> issue of iteration order.  If different ports need different iteration
> order, we have to provide a target hook for this.

This can solve the iteration order like so: you compute a "representative" 
class for the renaming chain, and then you iterate over N, call the hook, and 
iterate over the contents of the class it returns.
Yao Qi - Nov. 9, 2010, 3:28 p.m.
On 11/09/2010 05:16 PM, Eric Botcazou wrote:
>> Currently, we iterate registers from low to high, and resulting
>> "preferring" high registers in each iteration.  Generally speaking, low
>> registers are "better", at least not worse, than high register, in terms
>> of code size or other metrics.  Why can't we reverse the order of
>> iteration?
> 
> Generally we avoid changing things if we cannot prove that there is a benefit 
> in doing so.  The compiler supports many architectures and you never really 
> know how a change behaves with each of them in particular.  Why would "low" 
> registers be universally better than "high" registers?
> 

Eric,
I can't prove that statement.

>> I don't see any ports do something specific to this iteration order.  I run
>> regression test on ARM, X86, X86_64, and don't see any regressions if
>> iteration order is reversed.  So I assume that it is not harmful to reverse
>> the iteration order to all ports, and useful to some ports, not only to ARM.
> 
> Testing on a couple of architectures isn't really sufficient to conclude here 
> in my opinion, so I wouldn't change this, unless specifically requested by 
> the presence of the hook.
> 

OK, I agree on this.

>> The goal of this piece of work is to assign low registers to mostly used
>> registers, so that code size can be reduced.  There is similar feature
>> on X86_64, pointed out by HJ.  In order to achieve this, we need 1) a
>> target hook to prefer low register class, 2) iterate registers from high
>> to low.
> 
> OK, but AFAICS the target hook as currently written isn't a "preference" thing 
> but an "exclusion" thing, i.e. all the non-LO_REGS will be excluded and not 
> only disparaged for GENERAL_REGS.  So, once non-LO_REGS are excluded, why 
> changing the iteration order?  Does the iteration order within the LO_REGS 
> class matter as well for the ARM?
> 

No, order doesn't matter within the LO_REGS class.

>> Yes, the situation you described is correct, but this situation is *not*
>> popular, because we've sorted du_head linked list, and assign mostly
>> used registers to low registers.  I can't give you a quantitative
>> measurements, but code size of some programs is reduced.
> 
> OK, on the ARM, but what about the 20 other architectures that may want to 
> paramerize the pass too?
> 

That is a good question, and I have no answer to it. :)

>> I thought of target hook like this before, but this can't solve the
>> issue of iteration order.  If different ports need different iteration
>> order, we have to provide a target hook for this.
> 
> This can solve the iteration order like so: you compute a "representative" 
> class for the renaming chain, and then you iterate over N, call the hook, and 
> iterate over the contents of the class it returns.
> 

I consider this patch again today, and here is a design like this,

  1.  Don't change iteration order,
  2.  Define new hook preferred_rename_register_p,
  DEFHOOK
  (preferred_renaming_register_p,
  "True if hardware register is what target prefers to rename",
  bool,
  (int),
  preferred_renaming_register_false)

  3.  Call this hook like this,
for (new_reg = 0; new_reg < FIRST_PSEUDO_REGISTER; new_reg++)
{
  ...
  if (! tmp)
    {
      if (tick[best_new_reg] > tick[new_reg])
        {
          best_new_reg = new_reg;
          best_nregs = nregs;
	
+	  if (best_new_reg != reg /* Have better choice.  */
+             /* If this register is what target prefers to rename,
break the loop,  and not skip this register.  */
+	      && targetm.preferred_rename_register_p(best_new_reg))
+	        break;
	 }
    }
}

  4.  On ARM, preferred_renaming_register_p returns true if target is
thumb2 and register's class is LO_REGS.  Other ports can define their
own preference here.

This design has some advantages against last one,
  1.  More target-independent,
  2.  We prefer register rather than exclude register.

What do you think of this design?
Eric Botcazou - Nov. 11, 2010, 5:34 p.m.
> I consider this patch again today, and here is a design like this,
>
>   1.  Don't change iteration order,
>   2.  Define new hook preferred_rename_register_p,
>   DEFHOOK
>   (preferred_renaming_register_p,
>   "True if hardware register is what target prefers to rename",
>   bool,
>   (int),
>   preferred_renaming_register_false)
>
>   3.  Call this hook like this,
> for (new_reg = 0; new_reg < FIRST_PSEUDO_REGISTER; new_reg++)
> {
>   ...
>   if (! tmp)
>     {
>       if (tick[best_new_reg] > tick[new_reg])
>         {
>           best_new_reg = new_reg;
>           best_nregs = nregs;
>
> +	  if (best_new_reg != reg /* Have better choice.  */
> +             /* If this register is what target prefers to rename,
> break the loop,  and not skip this register.  */
> +	      && targetm.preferred_rename_register_p(best_new_reg))
> +	        break;
> 	 }
>     }
> }
>
>   4.  On ARM, preferred_renaming_register_p returns true if target is
> thumb2 and register's class is LO_REGS.  Other ports can define their
> own preference here.
>
> This design has some advantages against last one,
>   1.  More target-independent,
>   2.  We prefer register rather than exclude register.
>
> What do you think of this design?

It's a progress, but the first approach of using register classes instead of 
individual registers seemed to be the right granularity here.

For each element in the chain, we have the class.  Can we compute a superset 
of all the classes in the chain?  If we cannot or if this is ALL_REGS, then 
we iterate as before.  If we can, we call the hook on the result and we first 
iterate on the contents of this result (a class); if we find a new reg, we 
stop, otherwise we iterate on the remaining registers.
Eric Botcazou - Nov. 11, 2010, 5:39 p.m.
> For each element in the chain, we have the class.  Can we compute a
> superset of all the classes in the chain?  If we cannot or if this is
> ALL_REGS, then we iterate as before.  If we can, we call the hook on the
> result and we first iterate on the contents of this result (a class); if we
> find a new reg, we stop, otherwise we iterate on the remaining registers.

"the remaining registers" are the registers not in the class.

Patch

gcc/
        * regrename.c (struct du_head): Add new element length.
        (sort_du_head, get_element, merge, merge_sort_callback):
        New functions of merge sort implementation to du_head list.
        (regrename_optimize): Sort du_head linked list by length.
	Iterate registers in a reversed order to prefer low registers.
        (create_new_chain):  Initialize length.
        (scan_rtx_reg): Increase length for non-debug insns.

diff --git a/gcc/regrename.c b/gcc/regrename.c
index 2535ab7..31745f8 100644
--- a/gcc/regrename.c
+++ b/gcc/regrename.c
@@ -51,6 +51,10 @@  struct du_head
   struct du_head *next_chain;
   /* The first and last elements of this chain.  */
   struct du_chain *first, *last;
+  /* The length of du_chain for non-debug insns.  The du_head linked
+     list can be sorted by this, and register-rename can prefer
+     register classes according to this order.  */
+  int length;
   /* Describes the register being tracked.  */
   unsigned regno, nregs;
 
@@ -154,6 +158,151 @@  merge_overlapping_regs (HARD_REG_SET *pset, struct du_head *head)
     }
 }
 
+/* Return the Nth element in LIST.  If LIST contains less than N
+   elements, return the last one.  */
+static struct du_head *
+get_element (struct du_head *list, int n)
+{
+  while (n-- && list->next_chain != NULL)
+    list = list->next_chain;
+
+  return list;
+}
+
+/* Comparison function of merge sort.  Return true if A is less than
+   B, otherwise return false.  */
+static inline int 
+merge_sort_callback(const struct du_head *a, 
+		    const struct du_head *b)
+{
+  return a->length < b->length;
+}
+
+/* Merge the first 2 sub-lists of LENGTH nodes contained in the
+   linked list pointed to by START_NODE.  Update START_NODE to point 
+   to the merged nodes, and return a pointer to the last merged
+   node.  Return NULL if START_NODE doesn't contain enough
+   elements, or this pass of merge is done.  */
+static struct du_head *
+merge(struct du_head **start_node, int length)
+{
+  int i, left_count, right_count;
+  struct du_head *left, *right;
+  /* Current node of sort result.  */
+  struct du_head *current_sorted_node;
+  /* Tail node of sort, used to connect with next piece of list.  */
+  struct du_head *current_tail_node;
+
+  if (*start_node == NULL)
+    return NULL;
+
+  left = right = *start_node;
+  right_count = left_count = 0;
+
+  /* Step RIGHT along the list by LENGTH places.  */
+  for (i = 0; i < length; i++)
+    {
+      right = right->next_chain;
+      if (right == NULL)
+	  return NULL;
+    }
+
+  /* Initialize current_sorted_node.  */
+  if (merge_sort_callback (left, right))
+    {
+      ++right_count;
+      current_sorted_node = right;
+      *start_node = right;
+      right = right->next_chain;
+    }
+  else
+    {
+      ++left_count;
+      current_sorted_node = left;
+      left = left->next_chain;
+    }
+
+  while (1)
+    {
+      /* Choose LEFT or RIGHT to take the next element from.  If 
+	 either is empty, choose from the other one.  */
+      if (left_count == length || left == NULL)
+	{
+	  current_sorted_node->next_chain = right;
+	  current_tail_node 
+	    = get_element (current_sorted_node, 
+			   length - right_count);
+
+	  break;
+	}
+      else if (right_count == length || right == NULL)
+	{
+	  /* Save the head node of next piece of linked list.  */
+	  struct du_head *tmp = current_sorted_node->next_chain;
+
+	  current_sorted_node->next_chain = left;
+	  current_tail_node
+	    = get_element (current_sorted_node,
+			   length - left_count);
+	  /* Connect sorted list to next piece of list.  */
+	  current_tail_node->next_chain = tmp;
+	  break;
+	}
+      else
+	{
+	  /* Normal merge operations.  If both LEFT and RIGHT are 
+	     non-empty, compare the first element of each and choose 
+	     the lower one.  */
+	  if (merge_sort_callback (left, right))
+	    {
+	      right_count++;
+	      current_sorted_node->next_chain = right;
+	      right = right->next_chain;
+	    }
+	  else
+	    {
+	      left_count++;
+	      current_sorted_node->next_chain = left;
+	      left = left->next_chain;
+	    }
+	  current_sorted_node = current_sorted_node->next_chain;
+	}
+    }
+  /* Returns NULL if this pass of merge is done.  */
+  return (current_tail_node->next_chain ? current_tail_node : NULL);
+}
+
+/* Sort the linked list pointed to by HEAD.  The algorithm is a
+   non-recursive merge sort to linked list.  */
+static void
+sort_du_head (struct du_head **head)
+{
+  int current_length = 1;
+  struct du_head *last_tail;
+
+  /* In each pass, lists of size current_length is merged to 
+     lists of size 2xcurrent_length (Initially current_length 
+     is 1).  */
+  while (1)
+    {
+      last_tail = merge(head, current_length);
+      if (last_tail != NULL)
+	{
+	  do
+	    {
+	      last_tail = merge (&last_tail->next_chain, 
+				 current_length);
+	    }
+	  while (last_tail != NULL);
+	  current_length *= 2;
+	}
+      else
+	break;
+    }
+}
+
 /* Perform register renaming on the current function.  */
 
 static unsigned int
@@ -195,6 +344,8 @@  regrename_optimize (void)
       if (dump_file)
 	dump_def_use_chain (all_chains);
 
+      sort_du_head (&all_chains);
+
       CLEAR_HARD_REG_SET (unavailable);
       /* Don't clobber traceback for noreturn functions.  */
       if (frame_pointer_needed)
@@ -251,6 +402,7 @@  regrename_optimize (void)
 	      if (DEBUG_INSN_P (tmp->insn))
 		continue;
 	      n_uses++;
 	      IOR_COMPL_HARD_REG_SET (this_unavailable,
 				      reg_class_contents[tmp->cl]);
 	    }
@@ -264,14 +416,17 @@  regrename_optimize (void)
 	  merge_overlapping_regs (&this_unavailable, this_head);
 
 	  /* Now potential_regs is a reasonable approximation, let's
-	     have a closer look at each register still in there.  */
-	  for (new_reg = 0; new_reg < FIRST_PSEUDO_REGISTER; new_reg++)
+	     have a closer look at each register still in there.
+	     Registers from high to low is iterated here, in order to
+	     prefer low registers.  On some ports, low registers are
+	     better than high registers in terms of final code size.  */
+	  for (new_reg = FIRST_PSEUDO_REGISTER - 1; new_reg >= 0; new_reg--)
 	    {
 	      enum machine_mode mode = GET_MODE (*this_head->first->loc);
 	      int nregs = hard_regno_nregs[new_reg][mode];
 
 	      for (i = nregs - 1; i >= 0; --i)
-	        if (TEST_HARD_REG_BIT (this_unavailable, new_reg + i)
+		if (TEST_HARD_REG_BIT (this_unavailable, new_reg + i)
 		    || fixed_regs[new_reg + i]
 		    || global_regs[new_reg + i]
 		    /* Can't use regs which aren't saved by the prologue.  */
@@ -527,6 +682,7 @@  create_new_chain (unsigned this_regno, unsigned this_nregs, rtx *loc,
   head->need_caller_save_reg = 0;
   head->cannot_rename = 0;
   head->terminated = 0;
+  head->length = 0;
 
   VEC_safe_push (du_head_p, heap, id_to_chain, head);
   head->id = current_id++;
@@ -572,6 +728,8 @@  create_new_chain (unsigned this_regno, unsigned this_nregs, rtx *loc,
   this_du->loc = loc;
   this_du->insn = insn;
   this_du->cl = cl;
+
+  head->length = 1;
 }
 
 static void
@@ -661,6 +819,8 @@  scan_rtx_reg (rtx insn, rtx *loc, enum reg_class cl, enum scan_actions action,
 		head->last->next_use = this_du;
 	      head->last = this_du;
 
+	      if (!DEBUG_INSN_P (insn))
+		head->length++;
 	    }
 	  /* Avoid adding the same location in a DEBUG_INSN multiple times,
 	     which could happen with non-exact overlap.  */