Patchwork speedup stack var conflicts (PR54146)

login
register
mail settings
Submitter Michael Matz
Date Aug. 3, 2012, 1:14 p.m.
Message ID <alpine.LNX.2.00.1208031508030.16619@wotan.suse.de>
Download mbox | patch
Permalink /patch/174990/
State New
Headers show

Comments

Michael Matz - Aug. 3, 2012, 1:14 p.m.
Hi,

as Steven noted in the bug report one reason of the slowness are the 
myriads of calls to bitmap_set_bit, resulting from the clique generation 
at the start of basic blocks.  Can be sped up by using bitmap_ior.  He 
implemented upper triangular form for the conflict bitmap, but I couldn't 
measure speed differences either way and the need to copy and change the 
work bitmap for each BB was bugging me, so I decided to not do that.

Additionally I changed the iteration order to be RPO, reducing the number 
of iterations until stabilizing (in the testcase for some routines from 8 
to 3).

Regstrapping on x86_64-linux in progress, okay for trunk?


Ciao,
Michael.
Richard Guenther - Aug. 3, 2012, 1:38 p.m.
On Fri, Aug 3, 2012 at 3:14 PM, Michael Matz <matz@suse.de> wrote:
> Hi,
>
> as Steven noted in the bug report one reason of the slowness are the
> myriads of calls to bitmap_set_bit, resulting from the clique generation
> at the start of basic blocks.  Can be sped up by using bitmap_ior.  He
> implemented upper triangular form for the conflict bitmap, but I couldn't
> measure speed differences either way and the need to copy and change the
> work bitmap for each BB was bugging me, so I decided to not do that.
>
> Additionally I changed the iteration order to be RPO, reducing the number
> of iterations until stabilizing (in the testcase for some routines from 8
> to 3).
>
> Regstrapping on x86_64-linux in progress, okay for trunk?

Ok.

Thanks,
Richard.

>
> Ciao,
> Michael.
> --
>         * cfgexpand.c (add_scope_conflicts_1): Use bitmap_ior_into.
>         (add_scope_conflicts): Iterate in RPO order.
>         (add_stack_protection_conflicts): Iterate over the other triangle.
>         (fini_vars_expansion): Clear stack_vars_sorted.
>
> Index: cfgexpand.c
> ===================================================================
> --- cfgexpand.c (revision 190077)
> +++ cfgexpand.c (working copy)
> @@ -429,10 +429,10 @@ add_scope_conflicts_1 (basic_block bb, b
>               unsigned i;
>               EXECUTE_IF_SET_IN_BITMAP (work, 0, i, bi)
>                 {
> -                 unsigned j;
> -                 bitmap_iterator bj;
> -                 EXECUTE_IF_SET_IN_BITMAP (work, i + 1, j, bj)
> -                   add_stack_var_conflict (i, j);
> +                 struct stack_var *a = &stack_vars[i];
> +                 if (!a->conflicts)
> +                   a->conflicts = BITMAP_ALLOC (NULL);
> +                 bitmap_ior_into (a->conflicts, work);
>                 }
>               visit = visit_conflict;
>             }
> @@ -450,6 +450,8 @@ add_scope_conflicts (void)
>    basic_block bb;
>    bool changed;
>    bitmap work = BITMAP_ALLOC (NULL);
> +  int *rpo;
> +  int n_bbs;
>
>    /* We approximate the live range of a stack variable by taking the first
>       mention of its name as starting point(s), and by the end-of-scope
> @@ -464,13 +466,19 @@ add_scope_conflicts (void)
>    FOR_ALL_BB (bb)
>      bb->aux = BITMAP_ALLOC (NULL);
>
> +  rpo = XNEWVEC (int, last_basic_block);
> +  n_bbs = pre_and_rev_post_order_compute (NULL, rpo, false);
> +
>    changed = true;
>    while (changed)
>      {
> +      int i;
>        changed = false;
> -      FOR_EACH_BB (bb)
> +      for (i = 0; i < n_bbs; i++)
>         {
> -         bitmap active = (bitmap)bb->aux;
> +         bitmap active;
> +         bb = BASIC_BLOCK (rpo[i]);
> +         active = (bitmap)bb->aux;
>           add_scope_conflicts_1 (bb, work, false);
>           if (bitmap_ior_into (active, work))
>             changed = true;
> @@ -480,6 +488,7 @@ add_scope_conflicts (void)
>    FOR_EACH_BB (bb)
>      add_scope_conflicts_1 (bb, work, true);
>
> +  free (rpo);
>    BITMAP_FREE (work);
>    FOR_ALL_BB (bb)
>      BITMAP_FREE (bb->aux);
> @@ -1344,7 +1353,7 @@ add_stack_protection_conflicts (void)
>    for (i = 0; i < n; ++i)
>      {
>        unsigned char ph_i = phase[i];
> -      for (j = 0; j < i; ++j)
> +      for (j = i + 1; j < n; ++j)
>         if (ph_i != phase[j])
>           add_stack_var_conflict (i, j);
>      }
> @@ -1393,6 +1402,7 @@ fini_vars_expansion (void)
>    XDELETEVEC (stack_vars);
>    XDELETEVEC (stack_vars_sorted);
>    stack_vars = NULL;
> +  stack_vars_sorted = NULL;
>    stack_vars_alloc = stack_vars_num = 0;
>    pointer_map_destroy (decl_to_stack_part);
>    decl_to_stack_part = NULL;

Patch

Index: cfgexpand.c
===================================================================
--- cfgexpand.c	(revision 190077)
+++ cfgexpand.c	(working copy)
@@ -429,10 +429,10 @@  add_scope_conflicts_1 (basic_block bb, b
 	      unsigned i;
 	      EXECUTE_IF_SET_IN_BITMAP (work, 0, i, bi)
 		{
-		  unsigned j;
-		  bitmap_iterator bj;
-		  EXECUTE_IF_SET_IN_BITMAP (work, i + 1, j, bj)
-		    add_stack_var_conflict (i, j);
+		  struct stack_var *a = &stack_vars[i];
+		  if (!a->conflicts)
+		    a->conflicts = BITMAP_ALLOC (NULL);
+		  bitmap_ior_into (a->conflicts, work);
 		}
 	      visit = visit_conflict;
 	    }
@@ -450,6 +450,8 @@  add_scope_conflicts (void)
   basic_block bb;
   bool changed;
   bitmap work = BITMAP_ALLOC (NULL);
+  int *rpo;
+  int n_bbs;
 
   /* We approximate the live range of a stack variable by taking the first
      mention of its name as starting point(s), and by the end-of-scope
@@ -464,13 +466,19 @@  add_scope_conflicts (void)
   FOR_ALL_BB (bb)
     bb->aux = BITMAP_ALLOC (NULL);
 
+  rpo = XNEWVEC (int, last_basic_block);
+  n_bbs = pre_and_rev_post_order_compute (NULL, rpo, false);
+
   changed = true;
   while (changed)
     {
+      int i;
       changed = false;
-      FOR_EACH_BB (bb)
+      for (i = 0; i < n_bbs; i++)
 	{
-	  bitmap active = (bitmap)bb->aux;
+	  bitmap active;
+	  bb = BASIC_BLOCK (rpo[i]);
+	  active = (bitmap)bb->aux;
 	  add_scope_conflicts_1 (bb, work, false);
 	  if (bitmap_ior_into (active, work))
 	    changed = true;
@@ -480,6 +488,7 @@  add_scope_conflicts (void)
   FOR_EACH_BB (bb)
     add_scope_conflicts_1 (bb, work, true);
 
+  free (rpo);
   BITMAP_FREE (work);
   FOR_ALL_BB (bb)
     BITMAP_FREE (bb->aux);
@@ -1344,7 +1353,7 @@  add_stack_protection_conflicts (void)
   for (i = 0; i < n; ++i)
     {
       unsigned char ph_i = phase[i];
-      for (j = 0; j < i; ++j)
+      for (j = i + 1; j < n; ++j)
 	if (ph_i != phase[j])
 	  add_stack_var_conflict (i, j);
     }
@@ -1393,6 +1402,7 @@  fini_vars_expansion (void)
   XDELETEVEC (stack_vars);
   XDELETEVEC (stack_vars_sorted);
   stack_vars = NULL;
+  stack_vars_sorted = NULL;
   stack_vars_alloc = stack_vars_num = 0;
   pointer_map_destroy (decl_to_stack_part);
   decl_to_stack_part = NULL;