Patchwork Speed up find_loc_in_1pdv (PR debug/41371)

login
register
mail settings
Submitter Alexandre Oliva
Date June 10, 2010, 5:07 p.m.
Message ID <oreigeolyd.fsf@livre.localdomain>
Download mbox | patch
Permalink /patch/55239/
State New
Headers show

Comments

Alexandre Oliva - June 10, 2010, 5:07 p.m.
On Jun 10, 2010, Richard Guenther <rguenther@suse.de> wrote:

> There's some odd spacing here:

> +      dv = dv_from_value (node->loc);
> +      rvar = (variable)        htab_find_with_hash (vars, dv, 
> dv_htab_hash (dv));

Thanks, fixed.

> Ok for trunk and the branch.

Here's the patch I installed, with an additional comment.

> Do you have updated timings for the testcases in the PR?

No, I haven't measured the timing difference.
Jakub Jelinek - June 10, 2010, 7:40 p.m.
On Thu, Jun 10, 2010 at 02:07:38PM -0300, Alexandre Oliva wrote:
> On Jun 10, 2010, Richard Guenther <rguenther@suse.de> wrote:
> 
> > There's some odd spacing here:
> 
> > +      dv = dv_from_value (node->loc);
> > +      rvar = (variable)        htab_find_with_hash (vars, dv, 
> > dv_htab_hash (dv));
> 
> Thanks, fixed.
> 
> > Ok for trunk and the branch.
> 
> Here's the patch I installed, with an additional comment.
> 
> > Do you have updated timings for the testcases in the PR?
> 
> No, I haven't measured the timing difference.

I've tried to measure the difference on --enable-checking=release
cc1/cc1plus, though the results vary a lot, but in general, except for one
of the runs, the new cc1/cc1plus was faster:

time ./cc1plus.160558 -quiet -O2 -g bug90.cc -o bug90.s1; time ./cc1plus.160559 -quiet -O2 -g bug90.cc -o bug90.s2
user	2m54.723s
user	2m41.328s
time ./cc1plus.160558 -quiet -O2 -g bug90.cc -o bug90.s1; time ./cc1plus.160559 -quiet -O2 -g bug90.cc -o bug90.s2
user	3m18.700s
user	2m30.857s
time ./cc1plus.160558 -quiet -O2 -g bug90.cc -o bug90.s1; time ./cc1plus.160559 -quiet -O2 -g bug90.cc -o bug90.s2
user	3m12.270s
user	2m39.853s
time ./cc1plus.160558 -quiet -m32 -quiet -g -Os -fomit-frame-pointer bug-611650_analysis.ii; time ./cc1plus.160559 -quiet -m32 -quiet -g -Os -fomit-frame-pointer bug-611650_analysis.ii
user	2m8.505s
user	1m58.971s
time ./cc1plus.160558 -quiet -m32 -quiet -g -Os -fomit-frame-pointer bug-611650_analysis.ii; time ./cc1plus.160559 -quiet -m32 -quiet -g -Os -fomit-frame-pointer bug-611650_analysis.ii
user	2m39.698s
user	2m5.826s
time ./cc1plus.160558 -quiet -m32 -quiet -g -Os -fomit-frame-pointer bug-611650_analysis.ii; time ./cc1plus.160559 -quiet -m32 -quiet -g -Os -fomit-frame-pointer bug-611650_analysis.ii
user	2m57.066s
user	1m55.971s
time ./cc1.160558 -m32 -fPIC -fno-strict-aliasing -g -O2 rh598310.i -quiet; time ./cc1.160559 -m32 -fPIC -fno-strict-aliasing -g -O2 rh598310.i -quiet 
user	0m56.140s
user	0m53.239s
time ./cc1.160558 -m32 -fPIC -fno-strict-aliasing -g -O2 rh598310.i -quiet; time ./cc1.160559 -m32 -fPIC -fno-strict-aliasing -g -O2 rh598310.i -quiet 
user	1m4.315s
user	1m9.517s
time ./cc1.160558 -m32 -fPIC -fno-strict-aliasing -g -O2 rh598310.i -quiet; time ./cc1.160559 -m32 -fPIC -fno-strict-aliasing -g -O2 rh598310.i -quiet 
user	0m56.003s
user	0m54.495s


	Jakub

Patch

for  gcc/ChangeLog
from  Alexandre Oliva  <aoliva@redhat.com>

	PR debug/41371
	* var-tracking.c (find_loc_in_1pdv): Remove recursion, only
	tail-recurse into canonical node.  Fast-forward over
	non-canonical VALUEs.

Index: gcc/var-tracking.c
===================================================================
--- gcc/var-tracking.c.orig	2010-06-10 08:15:13.000000000 -0300
+++ gcc/var-tracking.c	2010-06-10 09:26:18.000000000 -0300
@@ -2479,125 +2479,81 @@  dv_changed_p (decl_or_value dv)
 
 /* Return a location list node whose loc is rtx_equal to LOC, in the
    location list of a one-part variable or value VAR, or in that of
-   any values recursively mentioned in the location lists.  */
+   any values recursively mentioned in the location lists.  VARS must
+   be in star-canonical form.  */
 
 static location_chain
 find_loc_in_1pdv (rtx loc, variable var, htab_t vars)
 {
   location_chain node;
   enum rtx_code loc_code;
-  location_chain ret = NULL;
-  int unmark_self = 0;
-#ifdef ENABLE_CHECKING
-  static int mark_count;
-#endif
 
   if (!var)
-    return ret;
+    return NULL;
 
 #ifdef ENABLE_CHECKING
   gcc_assert (dv_onepart_p (var->dv));
 #endif
 
   if (!var->n_var_parts)
-    return ret;
+    return NULL;
 
 #ifdef ENABLE_CHECKING
   gcc_assert (var->var_part[0].offset == 0);
+  gcc_assert (loc != dv_as_opaque (var->dv));
 #endif
 
   loc_code = GET_CODE (loc);
   for (node = var->var_part[0].loc_chain; node; node = node->next)
     {
+      decl_or_value dv;
+      variable rvar;
+
       if (GET_CODE (node->loc) != loc_code)
 	{
 	  if (GET_CODE (node->loc) != VALUE)
 	    continue;
 	}
       else if (loc == node->loc)
-	{
-	  ret = node;
-	  break;
-	}
+	return node;
       else if (loc_code != VALUE)
 	{
 	  if (rtx_equal_p (loc, node->loc))
-	    {
-	      ret = node;
-	      break;
-	    }
+	    return node;
 	  continue;
 	}
-      if (!VALUE_RECURSED_INTO (node->loc))
-	{
-	  decl_or_value dv = dv_from_value (node->loc);
-	  variable rvar = (variable)
-	    htab_find_with_hash (vars, dv, dv_htab_hash (dv));
 
-	  if (rvar)
+      /* Since we're in star-canonical form, we don't need to visit
+	 non-canonical nodes: one-part variables and non-canonical
+	 values would only point back to the canonical node.  */
+      if (dv_is_value_p (var->dv)
+	  && !canon_value_cmp (node->loc, dv_as_value (var->dv)))
+	{
+	  /* Skip all subsequent VALUEs.  */
+	  while (node->next && GET_CODE (node->next->loc) == VALUE)
 	    {
-	      location_chain where;
-
-	      if (!unmark_self)
-		{
-		  if (dv_is_value_p (var->dv)
-		      && !VALUE_RECURSED_INTO (dv_as_value (var->dv)))
-		    {
-		      unmark_self = 1;
+	      node = node->next;
 #ifdef ENABLE_CHECKING
-		      mark_count++;
-#endif
-		      VALUE_RECURSED_INTO (dv_as_value (var->dv)) = true;
-		    }
-		  else
-		    unmark_self = -1;
-		}
-
-#ifdef ENABLE_CHECKING
-	      mark_count++;
-	      /* The recursion count is bounded because we're
-		 searching in a star-canonicalized set, i.e., each
-		 equivalence set of values is arranged so that the
-		 canonical value has all locations and equivalent
-		 values, whereas equivalent values only point back to
-		 the canonical.  So, if we start at the canonical
-		 value, we'll recurse at most into each sibling, so
-		 the recurse limit will be 2.  If we start at a
-		 non-canonical value, we'll recurse into the
-		 canonical, and from there to other siblings, so
-		 recurse limit will be 3.  If we start at a one-part
-		 variable, we add one level of recursion, but we don't
-		 count it.  */
-	      gcc_assert (mark_count <= 3);
-#endif
-	      VALUE_RECURSED_INTO (node->loc) = true;
-	      if ((where = find_loc_in_1pdv (loc, rvar, vars)))
-		{
-#ifdef ENABLE_CHECKING
-		  mark_count--;
-#endif
-		  VALUE_RECURSED_INTO (node->loc) = false;
-		  ret = where;
-		  break;
-		}
-	      VALUE_RECURSED_INTO (node->loc) = false;
-#ifdef ENABLE_CHECKING
-	      mark_count--;
+	      gcc_assert (!canon_value_cmp (node->loc,
+					    dv_as_value (var->dv)));
 #endif
+	      if (loc == node->loc)
+		return node;
 	    }
+	  continue;
 	}
-    }
 
-  if (unmark_self > 0)
-    {
-      VALUE_RECURSED_INTO (dv_as_value (var->dv)) = false;
 #ifdef ENABLE_CHECKING
-      mark_count--;
-      gcc_assert (mark_count == 0);
+      gcc_assert (node == var->var_part[0].loc_chain);
+      gcc_assert (!node->next);
 #endif
+
+      dv = dv_from_value (node->loc);
+      rvar = (variable) htab_find_with_hash (vars, dv, dv_htab_hash (dv));
+      return find_loc_in_1pdv (loc, rvar, vars);
     }
 
-  return ret;
+  return NULL;
 }
 
 /* Hash table iteration argument passed to variable_merge.  */
@@ -4031,6 +3987,12 @@  variable_post_merge_perm_vals (void **ps
   var = shared_hash_find (set->vars, dv);
   if (var)
     {
+      /* Although variable_post_merge_new_vals may have made decls
+	 non-star-canonical, values that pre-existed in canonical form
+	 remain canonical, and newly-created values reference a single
+	 REG, so they are canonical as well.  Since VAR has the
+	 location list for a VALUE, using find_loc_in_1pdv for it is
+	 fine, since VALUEs don't map back to DECLs.  */
       if (find_loc_in_1pdv (pnode->loc, var, shared_hash_htab (set->vars)))
 	return 1;
       val_reset (set, dv);