Patchwork Fix PR52976

login
register
mail settings
Submitter William J. Schmidt
Date April 14, 2012, 5:05 p.m.
Message ID <1334423102.19102.59.camel@gnopaine>
Download mbox | patch
Permalink /patch/152529/
State New
Headers show

Comments

William J. Schmidt - April 14, 2012, 5:05 p.m.
This patch corrects two errors in reassociating expressions with
repeated factors.  First, undistribution needs to recognize repeated
factors.  For now, repeated factors will be ineligible for this
optimization.  In the future, this can be improved.  Second, when a
__builtin_powi call is introduced, its target SSA name must be given a
rank higher than other operands in the operand list.  Otherwise, uses of
the call result may be introduced prior to the call.

Bootstrapped and regression tested on powerpc64-linux.  Confirmed that
cpu2000 and cpu2006 SPEC tests build cleanly.  OK for trunk?

Thanks,
Bill


2012-04-14  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	PR tree-optimization/52976
	* tree-ssa-reassoc.c (add_to_ops_vec_max_rank): New function.
	(undistribute_ops_list): Ops with repeat counts aren't eligible for
	undistribution.
	(attempt_builtin_powi): Call add_to_ops_vec_max_rank.
Richard Guenther - April 16, 2012, 9:01 a.m.
On Sat, Apr 14, 2012 at 7:05 PM, William J. Schmidt
<wschmidt@linux.vnet.ibm.com> wrote:
> This patch corrects two errors in reassociating expressions with
> repeated factors.  First, undistribution needs to recognize repeated
> factors.  For now, repeated factors will be ineligible for this
> optimization.  In the future, this can be improved.  Second, when a
> __builtin_powi call is introduced, its target SSA name must be given a
> rank higher than other operands in the operand list.  Otherwise, uses of
> the call result may be introduced prior to the call.
>
> Bootstrapped and regression tested on powerpc64-linux.  Confirmed that
> cpu2000 and cpu2006 SPEC tests build cleanly.  OK for trunk?

Ok, given it fixes quite some fallout.

But I wonder why the rank computation does not properly "work"
automagically in the powi case.

Also for undistribution it looks like this might introduce missed optimizations?
Thus, how hard would it be to teach it to properly handle ->count != 1?  ISTR
it does some counting itself.

Thanks,
Richard.

> Thanks,
> Bill
>
>
> 2012-04-14  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
>
>        PR tree-optimization/52976
>        * tree-ssa-reassoc.c (add_to_ops_vec_max_rank): New function.
>        (undistribute_ops_list): Ops with repeat counts aren't eligible for
>        undistribution.
>        (attempt_builtin_powi): Call add_to_ops_vec_max_rank.
>
>
> Index: gcc/tree-ssa-reassoc.c
> ===================================================================
> --- gcc/tree-ssa-reassoc.c      (revision 186393)
> +++ gcc/tree-ssa-reassoc.c      (working copy)
> @@ -544,6 +544,28 @@ add_repeat_to_ops_vec (VEC(operand_entry_t, heap)
>   reassociate_stats.pows_encountered++;
>  }
>
> +/* Add an operand entry to *OPS for the tree operand OP, giving the
> +   new entry a larger rank than any other operand already in *OPS.  */
> +
> +static void
> +add_to_ops_vec_max_rank (VEC(operand_entry_t, heap) **ops, tree op)
> +{
> +  operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
> +  operand_entry_t oe1;
> +  unsigned i;
> +  unsigned max_rank = 0;
> +
> +  FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe1)
> +    if (oe1->rank > max_rank)
> +      max_rank = oe1->rank;
> +
> +  oe->op = op;
> +  oe->rank = max_rank + 1;
> +  oe->id = next_operand_entry_id++;
> +  oe->count = 1;
> +  VEC_safe_push (operand_entry_t, heap, *ops, oe);
> +}
> +
>  /* Return true if STMT is reassociable operation containing a binary
>    operation with tree code CODE, and is inside LOOP.  */
>
> @@ -1200,6 +1222,7 @@ undistribute_ops_list (enum tree_code opcode,
>       dcode = gimple_assign_rhs_code (oe1def);
>       if ((dcode != MULT_EXPR
>           && dcode != RDIV_EXPR)
> +         || oe1->count != 1
>          || !is_reassociable_op (oe1def, dcode, loop))
>        continue;
>
> @@ -1243,6 +1266,8 @@ undistribute_ops_list (enum tree_code opcode,
>          oecount c;
>          void **slot;
>          size_t idx;
> +         if (oe1->count != 1)
> +           continue;
>          c.oecode = oecode;
>          c.cnt = 1;
>          c.id = next_oecount_id++;
> @@ -1311,7 +1336,7 @@ undistribute_ops_list (enum tree_code opcode,
>
>          FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
>            {
> -             if (oe1->op == c->op)
> +             if (oe1->op == c->op && oe1->count == 1)
>                {
>                  SET_BIT (candidates2, i);
>                  ++nr_candidates2;
> @@ -3275,8 +3300,10 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
>          gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
>        }
>
> -      /* Append the result of this iteration to the ops vector.  */
> -      add_to_ops_vec (ops, iter_result);
> +      /* Append the result of this iteration to the ops vector.
> +         Give it a rank higher than all other ranks in the ops vector
> +         so that all uses of it will be forced to come after it.  */
> +      add_to_ops_vec_max_rank (ops, iter_result);
>
>       /* Decrement the occurrence count of each element in the product
>         by the count found above, and remove this many copies of each
>
>
William J. Schmidt - April 16, 2012, 12:11 p.m.
On Mon, 2012-04-16 at 11:01 +0200, Richard Guenther wrote:
> On Sat, Apr 14, 2012 at 7:05 PM, William J. Schmidt
> <wschmidt@linux.vnet.ibm.com> wrote:
> > This patch corrects two errors in reassociating expressions with
> > repeated factors.  First, undistribution needs to recognize repeated
> > factors.  For now, repeated factors will be ineligible for this
> > optimization.  In the future, this can be improved.  Second, when a
> > __builtin_powi call is introduced, its target SSA name must be given a
> > rank higher than other operands in the operand list.  Otherwise, uses of
> > the call result may be introduced prior to the call.
> >
> > Bootstrapped and regression tested on powerpc64-linux.  Confirmed that
> > cpu2000 and cpu2006 SPEC tests build cleanly.  OK for trunk?
> 
> Ok, given it fixes quite some fallout.
> 
OK, thanks.

> But I wonder why the rank computation does not properly "work"
> automagically in the powi case.

The reassociator generally tries to replace expressions in place unless
the rank system tells it otherwise.  At the moment, __builtin_powi calls
are added right before the root of the reassociation chain (the last
multiply).  In the cases that failed, the natural rank of the call was
one greater than the rank of the repeated factors, and there were other
factors with higher rank than that.  So the call was in the middle of
the ranks but placement required it to have the highest rank.  Because
the call can't be further reassociated, it sort of ruins the flexibility
of the rank system's placement algorithm.

It would probably be better to insert the calls as early as necessary,
but no earlier, to properly order things while letting the rank system
do its job normally.  That would help reduce lifetimes of reassociated
values.  I didn't see an obvious way to do that with a quick fix; I'm
planning to think about it some more.

> 
> Also for undistribution it looks like this might introduce missed optimizations?
> Thus, how hard would it be to teach it to properly handle ->count != 1?  ISTR
> it does some counting itself.

I'm planning to work on that as well.  I looked at it enough over the
weekend to know it wasn't completely trivial, so I wanted to get the
problem papered over for now.  It shouldn't be too hard to get right.

Thanks,
Bill

> 
> Thanks,
> Richard.
> 
> > Thanks,
> > Bill
> >
> >
> > 2012-04-14  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> >
> >        PR tree-optimization/52976
> >        * tree-ssa-reassoc.c (add_to_ops_vec_max_rank): New function.
> >        (undistribute_ops_list): Ops with repeat counts aren't eligible for
> >        undistribution.
> >        (attempt_builtin_powi): Call add_to_ops_vec_max_rank.
> >
> >
> > Index: gcc/tree-ssa-reassoc.c
> > ===================================================================
> > --- gcc/tree-ssa-reassoc.c      (revision 186393)
> > +++ gcc/tree-ssa-reassoc.c      (working copy)
> > @@ -544,6 +544,28 @@ add_repeat_to_ops_vec (VEC(operand_entry_t, heap)
> >   reassociate_stats.pows_encountered++;
> >  }
> >
> > +/* Add an operand entry to *OPS for the tree operand OP, giving the
> > +   new entry a larger rank than any other operand already in *OPS.  */
> > +
> > +static void
> > +add_to_ops_vec_max_rank (VEC(operand_entry_t, heap) **ops, tree op)
> > +{
> > +  operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
> > +  operand_entry_t oe1;
> > +  unsigned i;
> > +  unsigned max_rank = 0;
> > +
> > +  FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe1)
> > +    if (oe1->rank > max_rank)
> > +      max_rank = oe1->rank;
> > +
> > +  oe->op = op;
> > +  oe->rank = max_rank + 1;
> > +  oe->id = next_operand_entry_id++;
> > +  oe->count = 1;
> > +  VEC_safe_push (operand_entry_t, heap, *ops, oe);
> > +}
> > +
> >  /* Return true if STMT is reassociable operation containing a binary
> >    operation with tree code CODE, and is inside LOOP.  */
> >
> > @@ -1200,6 +1222,7 @@ undistribute_ops_list (enum tree_code opcode,
> >       dcode = gimple_assign_rhs_code (oe1def);
> >       if ((dcode != MULT_EXPR
> >           && dcode != RDIV_EXPR)
> > +         || oe1->count != 1
> >          || !is_reassociable_op (oe1def, dcode, loop))
> >        continue;
> >
> > @@ -1243,6 +1266,8 @@ undistribute_ops_list (enum tree_code opcode,
> >          oecount c;
> >          void **slot;
> >          size_t idx;
> > +         if (oe1->count != 1)
> > +           continue;
> >          c.oecode = oecode;
> >          c.cnt = 1;
> >          c.id = next_oecount_id++;
> > @@ -1311,7 +1336,7 @@ undistribute_ops_list (enum tree_code opcode,
> >
> >          FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
> >            {
> > -             if (oe1->op == c->op)
> > +             if (oe1->op == c->op && oe1->count == 1)
> >                {
> >                  SET_BIT (candidates2, i);
> >                  ++nr_candidates2;
> > @@ -3275,8 +3300,10 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
> >          gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
> >        }
> >
> > -      /* Append the result of this iteration to the ops vector.  */
> > -      add_to_ops_vec (ops, iter_result);
> > +      /* Append the result of this iteration to the ops vector.
> > +         Give it a rank higher than all other ranks in the ops vector
> > +         so that all uses of it will be forced to come after it.  */
> > +      add_to_ops_vec_max_rank (ops, iter_result);
> >
> >       /* Decrement the occurrence count of each element in the product
> >         by the count found above, and remove this many copies of each
> >
> >
>
H.J. Lu - May 3, 2012, 10:54 p.m.
On Sat, Apr 14, 2012 at 10:05 AM, William J. Schmidt
<wschmidt@linux.vnet.ibm.com> wrote:
> This patch corrects two errors in reassociating expressions with
> repeated factors.  First, undistribution needs to recognize repeated
> factors.  For now, repeated factors will be ineligible for this
> optimization.  In the future, this can be improved.  Second, when a
> __builtin_powi call is introduced, its target SSA name must be given a
> rank higher than other operands in the operand list.  Otherwise, uses of
> the call result may be introduced prior to the call.
>
> Bootstrapped and regression tested on powerpc64-linux.  Confirmed that
> cpu2000 and cpu2006 SPEC tests build cleanly.  OK for trunk?
>
> Thanks,
> Bill
>
>
> 2012-04-14  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
>
>        PR tree-optimization/52976
>        * tree-ssa-reassoc.c (add_to_ops_vec_max_rank): New function.
>        (undistribute_ops_list): Ops with repeat counts aren't eligible for
>        undistribution.
>        (attempt_builtin_powi): Call add_to_ops_vec_max_rank.
>
>

This caused:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53217

Patch

Index: gcc/tree-ssa-reassoc.c
===================================================================
--- gcc/tree-ssa-reassoc.c	(revision 186393)
+++ gcc/tree-ssa-reassoc.c	(working copy)
@@ -544,6 +544,28 @@  add_repeat_to_ops_vec (VEC(operand_entry_t, heap)
   reassociate_stats.pows_encountered++;
 }
 
+/* Add an operand entry to *OPS for the tree operand OP, giving the
+   new entry a larger rank than any other operand already in *OPS.  */
+
+static void
+add_to_ops_vec_max_rank (VEC(operand_entry_t, heap) **ops, tree op)
+{
+  operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
+  operand_entry_t oe1;
+  unsigned i;
+  unsigned max_rank = 0;
+
+  FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe1)
+    if (oe1->rank > max_rank)
+      max_rank = oe1->rank;
+
+  oe->op = op;
+  oe->rank = max_rank + 1;
+  oe->id = next_operand_entry_id++;
+  oe->count = 1;
+  VEC_safe_push (operand_entry_t, heap, *ops, oe);
+}
+
 /* Return true if STMT is reassociable operation containing a binary
    operation with tree code CODE, and is inside LOOP.  */
 
@@ -1200,6 +1222,7 @@  undistribute_ops_list (enum tree_code opcode,
       dcode = gimple_assign_rhs_code (oe1def);
       if ((dcode != MULT_EXPR
 	   && dcode != RDIV_EXPR)
+	  || oe1->count != 1
 	  || !is_reassociable_op (oe1def, dcode, loop))
 	continue;
 
@@ -1243,6 +1266,8 @@  undistribute_ops_list (enum tree_code opcode,
 	  oecount c;
 	  void **slot;
 	  size_t idx;
+	  if (oe1->count != 1)
+	    continue;
 	  c.oecode = oecode;
 	  c.cnt = 1;
 	  c.id = next_oecount_id++;
@@ -1311,7 +1336,7 @@  undistribute_ops_list (enum tree_code opcode,
 
 	  FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
 	    {
-	      if (oe1->op == c->op)
+	      if (oe1->op == c->op && oe1->count == 1)
 		{
 		  SET_BIT (candidates2, i);
 		  ++nr_candidates2;
@@ -3275,8 +3300,10 @@  attempt_builtin_powi (gimple stmt, VEC(operand_ent
 	  gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
 	}
 
-      /* Append the result of this iteration to the ops vector.  */
-      add_to_ops_vec (ops, iter_result);
+      /* Append the result of this iteration to the ops vector.
+         Give it a rank higher than all other ranks in the ops vector
+         so that all uses of it will be forced to come after it.  */
+      add_to_ops_vec_max_rank (ops, iter_result);
 
       /* Decrement the occurrence count of each element in the product
 	 by the count found above, and remove this many copies of each