Patchwork [1/3] Sort du_head list and restrict register class by PREFERRED_RENAME_CLASS

login
register
mail settings
Submitter Yao Qi
Date Oct. 26, 2010, 10:46 a.m.
Message ID <1288089965.2501.34.camel@yaolp>
Download mbox | patch
Permalink /patch/69222/
State New
Headers show

Comments

Yao Qi - Oct. 26, 2010, 10:46 a.m.
On Thu, 2010-10-21 at 10:51 +0200, Paolo Bonzini wrote:
> On 10/21/2010 08:50 AM, Yao Qi wrote:
> 

Paolo,
This is a updated patch 1, which does everything except adding
PREFERRED_RENAME_CLASS.  It is a separate improvement to
register-rename, and can be in first.

Regression tested on x86_64-unknow-linux-gnu.  No regression.  OK to
mainline?

Patch 2(adds PREFERRED_RENAME_CLASS to docs and code) and patch 3(adds
the macro to the ARM backend) will be done soon.

> > +/* Get the next LENGTH element in list from START.  If length of
> > +   list from START is less than LENGTH, return the last one.  */
> 
> Looks like it is "If the length of the list headed from START is _less 
> than or equal to_ LENGTH, return the last element".

Fixed.

> 
> > +static struct du_head *
> > +get_tail_du_head (struct du_head *start, int length)
> > +{
> > +  while (length--&&  start->next_chain != NULL)
> > +    {
> > +      start = start->next_chain;
> > +    }
> 
> Useless braces.
> 
Fixed.

> > +
> > +  return start;
> > +}
> > +
> > +
> > +/* Merge the linked list starting from START_NODE.  CMP is callback
> > +   for comparison.  */
> 
> This comments leaves some questions unanswered: merge with what?  What 
> is returned, and what is left in *START_NODE?
> 
> In other words, something like this should do:
> 
> /* Merge the first two sequences of CURRENT_LENGTH nodes in the linked
>     list pointed by START_NODE.  Update START_NODE to point to the
>     merged nodes, and return a pointer to the last merged node.  */
> 
Fixed.

> > +static struct du_head *
> > +merge(struct du_head **start_node, int current_length,
> > +      int(*cmp)(const struct du_head *, const struct du_head *))
> 
> Spaces required after function name and return type ("int (*cmp) (const 
> etc.").
> 
Fixed.

> > +{
> > +  int can_merge_p, i, left_count, right_count;
> > +  struct du_head *left, *right;
> > +  /* Current node of sort result.  */
> > +  struct du_head *current_sorted_node;
> > +  /* Tail node of sort, used to connect with next piece of list.  */
> > +  struct du_head *current_tail_node;
> > +
> > +  if (*start_node == NULL)
> > +    return NULL;
> > +
> > +  can_merge_p = 1;
> > +  left = right = *start_node;
> > +  right_count = left_count = 0;
> > +
> > +  for (i = 0; i<  current_length; i++)
> > +    {
> > +      right = right->next_chain;
> > +      if (right == NULL)
> > +	{
> > +	  can_merge_p = 0;
> 
> Just return NULL right away.
> 
Fixed.

> > +	  break;
> > +	}
> > +    }
> > +
> > +  if (!can_merge_p)
> > +    return NULL;
> 
> In sort_du_head:
> 
> > +   while (1)
> > +    {
> > +      last_tail = merge(head, current_length, cmp);
> > +      if (last_tail != NULL)
> > +	{
> > +	  do
> > +	    {
> > +	      last_tail = merge (&last_tail->next_chain,
> > +				 current_length, cmp);
> > +	    }
> > +	  while (last_tail != NULL);
> > +	  current_length *= 2;
> > +	}
> > +      else
> > +	break;
> > +    }
> 
> Very nice algorithm!
> 
> What about however something even simpler, like:
> 
>    do
>      {
>        segment = head;
>        segments = 1;
>        while ((last_tail = merge(segment, current_length, cmp)) != NULL)
>          {
>            segment = &last_tail->next_chain;
>            segments++;
>          }
>        current_length *= 2;
>      }
>    while (segments > 1);
> 
> (Untested).
> 

Yeah, it can be simpler, but the code above is not correct, because
outer do-while loop may be endless.  Mine is like this:
  do
    {
      struct du_head **segment = head;
      
      while ((last_tail = merge(segment, current_length, cmp)) 
            != NULL)
       {
         segment = &last_tail->next_chain;
       }
      current_length *= 2;
    }
  while (last_tail);


> > +/* Call back function of merge sort.  Return true if A is less than
> > +   B, otherwise return false.  */
> > +static int
> > +merge_sort_callback(const struct du_head *a,
> > +		    const struct du_head *b)
> > +{
> > +  return a->length<  b->length;
> > +}
> 
> Maybe this can be inlined?

inlined.

> 
> >   /* Perform register renaming on the current function.  */
> >
> >   static unsigned int
> > @@ -195,6 +340,8 @@ regrename_optimize (void)
> >         if (dump_file)
> >   	dump_def_use_chain (all_chains);
> >
> > +      sort_du_head (&all_chains, merge_sort_callback);
> > +
> >         CLEAR_HARD_REG_SET (unavailable);
> >         /* Don't clobber traceback for noreturn functions.  */
> >         if (frame_pointer_needed)
> > @@ -251,8 +398,11 @@ regrename_optimize (void)
> >   	      if (DEBUG_INSN_P (tmp->insn))
> >   		continue;
> >   	      n_uses++;
> > +#ifndef PREFERRED_RENAME_CLASS
> > +#define PREFERRED_RENAME_CLASS(CLASS) CLASS
> > +#endif
> 
> This goes in defaults.h.  I'm not sure about the performance 
> implications of making this a target hook.  Also, it looks like 
> everything except PREFERRED_RENAME_CLASS is actually a separate 
> improvement to rename-registers that could go in first.  So, I would 
> structure the series like this.:
> 
> - patch 1 does everything except adding PREFERRED_RENAME_CLASS
> 
> - patch 2 adds PREFERRED_RENAME_CLASS to docs and code
> 
> - patch 3 adds the macro to the ARM backend.
> 
> >   	      IOR_COMPL_HARD_REG_SET (this_unavailable,
> > -				      reg_class_contents[tmp->cl]);
> > +				      reg_class_contents[PREFERRED_RENAME_CLASS(tmp->cl)]);
> >   	    }
> >
> >   	  if (n_uses<  2)
> 
> 
> >   static bool
> >   gate_handle_regrename (void)
> >   {
> > -  return (optimize>  0&&  (flag_rename_registers));
> > +  return ((optimize>  0 || optimize_size)&&  (flag_rename_registers));
> >   }
> 
> Not needed, optimize_size should always be set.  (Sorry for the garbling 
> of spaces that my mailer did).

Done.

> 
> Thanks,
> 
> Paolo
Paolo Bonzini - Oct. 26, 2010, 10:54 a.m.
Looks good, but I cannot approve it.

There are only a couple of extra braces.  You don't need to resend it, a 
reviewer will assume you fix it before committing:

> +  for (i = 0; i<  current_length; i++)
> +    {
> +      right = right->next_chain;
> +      if (right == NULL)
> +	{
> +	  return NULL;
> +	}
> +    }

... and ...

> +      while ((last_tail = merge(segment, current_length, cmp))
> +	     != NULL)
> +	{
> +	  segment =&last_tail->next_chain;
> +	}

... here.

BTW the two PREFERRED_RENAME_CLASS patches can be merged into one, I think.

Paolo

Patch

gcc/
        * regrename.c (struct du_head): Add new element length.
        (sort_du_head, get_tail_du_head, merge, merge_sort_callback):
        New functions of merge sort implementation to du_head list.
        (regrename_optimize): Sort du_head linked list by length.
        (create_new_chain):  Initialize length.
        (scan_rtx_reg): Increase du_chain_length for non-debug insns.

diff --git a/gcc/regrename.c b/gcc/regrename.c
index 2535ab7..a560847 100644
--- a/gcc/regrename.c
+++ b/gcc/regrename.c
@@ -51,6 +51,8 @@  struct du_head
   struct du_head *next_chain;
   /* The first and last elements of this chain.  */
   struct du_chain *first, *last;
+  /* The length of du_chain for non-debug insns.  */
+  int length;
   /* Describes the register being tracked.  */
   unsigned regno, nregs;
 
@@ -154,6 +156,141 @@  merge_overlapping_regs (HARD_REG_SET *pset, struct du_head *head)
     }
 }
 
+/* Get the next LENGTH element in list from START.  If length of
+   list from START is less than or equal to LENGTH, return the 
+   last element.  */
+static struct du_head *
+get_tail_du_head (struct du_head *start, int length)
+{
+  while (length-- && start->next_chain != NULL)
+      start = start->next_chain;
+
+  return start;
+}
+
+/* Merge the first two sequences of CURRENT_LENGTH nodes in the 
+   linked list pointed by START_NODE.  Update START_NODE to point 
+   to the merged nodes, and return a pointer to the last merged
+   node.   CMP is callback  for comparison.  */
+
+static struct du_head *
+merge(struct du_head **start_node, int current_length,
+      int (*cmp) (const struct du_head *, const struct du_head *))
+{
+  int i, left_count, right_count;
+  struct du_head *left, *right;
+  /* Current node of sort result.  */
+  struct du_head *current_sorted_node;
+  /* Tail node of sort, used to connect with next piece of list.  */
+  struct du_head *current_tail_node;
+
+  if (*start_node == NULL)
+    return NULL;
+
+  left = right = *start_node;
+  right_count = left_count = 0;
+
+  for (i = 0; i < current_length; i++)
+    {
+      right = right->next_chain;
+      if (right == NULL)
+	{
+	  return NULL;
+	}
+    }
+
+  /* Initialize current_sorted_node.  */
+  if (cmp (left, right))
+    {
+      ++right_count;
+      current_sorted_node = right;
+      *start_node = right;
+      right = right->next_chain;
+    }
+  else
+    {
+      ++left_count;
+      current_sorted_node = left;
+      left = left->next_chain;
+    }
+
+  while (1)
+    {
+      if (left_count == current_length || left == NULL)
+	{
+	  current_sorted_node->next_chain = right;
+	  current_tail_node = 
+	    get_tail_du_head (current_sorted_node, 
+			      current_length - right_count);
+
+	  break;
+	}
+      else if (right_count == current_length || right == NULL)
+	{
+	  /* Save the head node of next piece of linked list.  */
+	  struct du_head *tmp = current_sorted_node->next_chain;
+
+	  current_sorted_node->next_chain = left;
+	  current_tail_node = 
+	    get_tail_du_head (current_sorted_node,
+			      current_length - left_count);
+	  /* Connect sorted list to next piece of list.  */
+	  current_tail_node->next_chain = tmp;
+	  break;
+	}
+      else
+	{
+	  /* Normal merge operations.  */
+	  if (cmp (left, right))
+	    {
+	      right_count++;
+	      current_sorted_node->next_chain = right;
+	      right = right->next_chain;
+	    }
+	  else
+	    {
+	      left_count++;
+	      current_sorted_node->next_chain = left;
+	      left = left->next_chain;
+	    }
+	  current_sorted_node = current_sorted_node->next_chain;
+	}
+    }
+  return current_tail_node->next_chain != NULL ? 
+    current_tail_node : NULL;
+}
+
+
+/* Non-recursive merge sort to du_head list.  */
+static void
+sort_du_head (struct du_head **head,
+	      int(*cmp)(const struct du_head *, const struct du_head *))
+{
+  int current_length = 1;
+  struct du_head *last_tail;
+  do
+    {
+      struct du_head **segment = head;
+      
+      while ((last_tail = merge(segment, current_length, cmp)) 
+	     != NULL)
+	{
+	  segment = &last_tail->next_chain;
+	}
+      current_length *= 2;
+    }
+  while (last_tail);
+}
+
+/* Call back function of merge sort.  Return true if A is less than
+   B, otherwise return false.  */
+static inline int 
+merge_sort_callback(const struct du_head *a, 
+		    const struct du_head *b)
+{
+  return a->length < b->length;
+}
+
 /* Perform register renaming on the current function.  */
 
 static unsigned int
@@ -195,6 +332,8 @@  regrename_optimize (void)
       if (dump_file)
 	dump_def_use_chain (all_chains);
 
+      sort_du_head (&all_chains, merge_sort_callback);
+
       CLEAR_HARD_REG_SET (unavailable);
       /* Don't clobber traceback for noreturn functions.  */
       if (frame_pointer_needed)
@@ -251,6 +390,7 @@  regrename_optimize (void)
 	      if (DEBUG_INSN_P (tmp->insn))
 		continue;
 	      n_uses++;
+
 	      IOR_COMPL_HARD_REG_SET (this_unavailable,
 				      reg_class_contents[tmp->cl]);
 	    }
@@ -265,13 +405,13 @@  regrename_optimize (void)
 
 	  /* Now potential_regs is a reasonable approximation, let's
 	     have a closer look at each register still in there.  */
-	  for (new_reg = 0; new_reg < FIRST_PSEUDO_REGISTER; new_reg++)
+	  for (new_reg = FIRST_PSEUDO_REGISTER - 1; new_reg >= 0; new_reg--)
 	    {
 	      enum machine_mode mode = GET_MODE (*this_head->first->loc);
 	      int nregs = hard_regno_nregs[new_reg][mode];
 
 	      for (i = nregs - 1; i >= 0; --i)
-	        if (TEST_HARD_REG_BIT (this_unavailable, new_reg + i)
+		if (TEST_HARD_REG_BIT (this_unavailable, new_reg + i)
 		    || fixed_regs[new_reg + i]
 		    || global_regs[new_reg + i]
 		    /* Can't use regs which aren't saved by the prologue.  */
@@ -527,6 +667,7 @@  create_new_chain (unsigned this_regno, unsigned this_nregs, rtx *loc,
   head->need_caller_save_reg = 0;
   head->cannot_rename = 0;
   head->terminated = 0;
+  head->length = 0;
 
   VEC_safe_push (du_head_p, heap, id_to_chain, head);
   head->id = current_id++;
@@ -572,6 +713,8 @@  create_new_chain (unsigned this_regno, unsigned this_nregs, rtx *loc,
   this_du->loc = loc;
   this_du->insn = insn;
   this_du->cl = cl;
+
+  head->length = 1;
 }
 
 static void
@@ -661,6 +804,8 @@  scan_rtx_reg (rtx insn, rtx *loc, enum reg_class cl, enum scan_actions action,
 		head->last->next_use = this_du;
 	      head->last = this_du;
 
+	      if (!DEBUG_INSN_P (insn))
+		head->length++;
 	    }
 	  /* Avoid adding the same location in a DEBUG_INSN multiple times,
 	     which could happen with non-exact overlap.  */