Patchwork combine BIT_FIELD_REF and VEC_PERM_EXPR

login
register
mail settings
Submitter Marc Glisse
Date Sept. 1, 2012, 6:54 p.m.
Message ID <alpine.DEB.2.02.1209012034540.14982@stedding.saclay.inria.fr>
Download mbox | patch
Permalink /patch/181149/
State New
Headers show

Comments

Marc Glisse - Sept. 1, 2012, 6:54 p.m.
Hello,

this patch makes it so that instead of taking the k-th element of a 
shuffle of V, we directly take the k'-th element of V.

Note that I am *not* checking that the shuffle is only used once. There 
can be some circumstances where this optimization will make us miss other 
opportunities later, but restricting it to the single use case would make 
it much less useful.

This is also my first contact with BIT_FIELD_REF, I may have missed some 
properties of that tree.

bootstrap+testsuite ok.

2012-09-01  Marc Glisse  <marc.glisse@inria.fr>

gcc/
 	* tree-ssa-forwprop.c (simplify_bitfield): New function.
 	(ssa_forward_propagate_and_combine): Call it.

gcc/testsuite/
 	* gcc.dg/tree-ssa/forwprop-21.c: New testcase.

(-21 because I have another patch pending review that adds a -20 testcase)
(simplify_bitfield appears before simplify_permutation to minimize 
conflicts with that same patch, I can put it back in order when I commit 
if you prefer)
Richard Guenther - Sept. 3, 2012, 12:40 p.m.
On Sat, Sep 1, 2012 at 8:54 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> Hello,
>
> this patch makes it so that instead of taking the k-th element of a shuffle
> of V, we directly take the k'-th element of V.
>
> Note that I am *not* checking that the shuffle is only used once. There can
> be some circumstances where this optimization will make us miss other
> opportunities later, but restricting it to the single use case would make it
> much less useful.
>
> This is also my first contact with BIT_FIELD_REF, I may have missed some
> properties of that tree.
>
> bootstrap+testsuite ok.
>
> 2012-09-01  Marc Glisse  <marc.glisse@inria.fr>
>
> gcc/
>         * tree-ssa-forwprop.c (simplify_bitfield): New function.
>         (ssa_forward_propagate_and_combine): Call it.
>
> gcc/testsuite/
>         * gcc.dg/tree-ssa/forwprop-21.c: New testcase.
>
> (-21 because I have another patch pending review that adds a -20 testcase)
> (simplify_bitfield appears before simplify_permutation to minimize conflicts
> with that same patch, I can put it back in order when I commit if you
> prefer)
>
> --
> Marc Glisse
> Index: testsuite/gcc.dg/tree-ssa/forwprop-21.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/forwprop-21.c     (revision 0)
> +++ testsuite/gcc.dg/tree-ssa/forwprop-21.c     (revision 0)
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-optimized" } */
> +typedef int v4si __attribute__ ((vector_size (4 * sizeof(int))));
> +
> +int
> +test (v4si *x, v4si *y)
> +{
> +  v4si m = { 2, 3, 6, 5 };
> +  v4si z = __builtin_shuffle (*x, *y, m);
> +  return z[2];
> +}
> +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
>
> Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-21.c
> ___________________________________________________________________
> Added: svn:eol-style
>    + native
> Added: svn:keywords
>    + Author Date Id Revision URL
>
> Index: tree-ssa-forwprop.c
> ===================================================================
> --- tree-ssa-forwprop.c (revision 190847)
> +++ tree-ssa-forwprop.c (working copy)
> @@ -2570,20 +2570,88 @@ combine_conversions (gimple_stmt_iterato
>               gimple_assign_set_rhs_code (stmt, CONVERT_EXPR);
>               update_stmt (stmt);
>               return remove_prop_source_from_use (op0) ? 2 : 1;
>             }
>         }
>      }
>
>    return 0;
>  }
>
> +/* Combine an element access with a shuffle.  Returns true if there were
> +   any changes made, else it returns false.  */
> +
> +static bool
> +simplify_bitfield (gimple_stmt_iterator *gsi)
> +{
> +  gimple stmt = gsi_stmt (*gsi);
> +  gimple def_stmt;
> +  tree op, op0, op1, op2;
> +  unsigned idx, n, size;
> +  enum tree_code code;
> +
> +  op = gimple_assign_rhs1 (stmt);
> +  gcc_checking_assert (TREE_CODE (op) == BIT_FIELD_REF);
> +
> +  op0 = TREE_OPERAND (op, 0);
> +  op1 = TREE_OPERAND (op, 1);
> +  op2 = TREE_OPERAND (op, 2);
> +
> +  if (TREE_CODE (TREE_TYPE (op0)) != VECTOR_TYPE)
> +    return false;
> +
> +  size = TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (TREE_TYPE (op0))));
> +  n = TREE_INT_CST_LOW (op1) / size;
> +  idx = TREE_INT_CST_LOW (op2) / size;
> +
> +  if (n != 1)
> +    return false;
> +
> +  if (TREE_CODE (op0) != SSA_NAME)
> +    return false;

Please do the early outs where you compute the arguments.  Thus, right
after getting op0 in this case or right after computing n for the n != 1 check.

I think you need to verify that the type of 'op' is actually the element type
of op0.  The BIT_FIELD_REF can happily access elements two and three
of { 1, 2, 3, 4 } as a long for example.  See the BIT_FIELD_REF foldings
in fold-const.c.

> +  def_stmt = SSA_NAME_DEF_STMT (op0);
> +  if (!def_stmt || !is_gimple_assign (def_stmt)
> +      || !can_propagate_from (def_stmt))
> +    return false;
> +
> +  code = gimple_assign_rhs_code (def_stmt);
> +
> +  if (code == VEC_PERM_EXPR)
> +    {
> +      tree p, m, index, tem;
> +      unsigned nelts;
> +      m = gimple_assign_rhs3 (def_stmt);
> +      if (TREE_CODE (m) != VECTOR_CST)
> +       return false;
> +      nelts = VECTOR_CST_NELTS (m);
> +      idx = TREE_INT_CST_LOW (VECTOR_CST_ELT (m, idx));
> +      idx %= 2 * nelts;
> +      if (idx < nelts)
> +       {
> +         p = gimple_assign_rhs1 (def_stmt);
> +       }
> +      else
> +       {
> +         p = gimple_assign_rhs2 (def_stmt);
> +         idx -= nelts;
> +       }
> +      index = build_int_cst (TREE_TYPE (TREE_TYPE (m)), idx * size);
> +      tem = fold_build3 (BIT_FIELD_REF, TREE_TYPE (op), p, op1, index);

This shouldn't simplify, so you can use build3 instead.  Please also add
handling of code == CONSTRUCTOR.

Thanks,
Richard.

> +      gimple_assign_set_rhs1 (stmt, tem);
> +      update_stmt (stmt);
> +      return true;
> +    }
> +
> +  return false;
> +}
> +
>  /* Determine whether applying the 2 permutations (mask1 then mask2)
>     gives back one of the input.  */
>
>  static int
>  is_combined_permutation_identity (tree mask1, tree mask2)
>  {
>    tree mask;
>    unsigned int nelts, i, j;
>    bool maybe_identity1 = true;
>    bool maybe_identity2 = true;
> @@ -2828,20 +2896,22 @@ ssa_forward_propagate_and_combine (void)
>                       cfg_changed = true;
>                     changed = did_something != 0;
>                   }
>                 else if (code == VEC_PERM_EXPR)
>                   {
>                     int did_something = simplify_permutation (&gsi);
>                     if (did_something == 2)
>                       cfg_changed = true;
>                     changed = did_something != 0;
>                   }
> +               else if (code == BIT_FIELD_REF)
> +                 changed = simplify_bitfield (&gsi);
>                 break;
>               }
>
>             case GIMPLE_SWITCH:
>               changed = simplify_gimple_switch (stmt);
>               break;
>
>             case GIMPLE_COND:
>               {
>                 int did_something;
>
Marc Glisse - Sept. 3, 2012, 1:39 p.m.
On Mon, 3 Sep 2012, Richard Guenther wrote:

> Please do the early outs where you compute the arguments.  Thus, right 
> after getting op0 in this case or right after computing n for the n != 1 
> check.

Ok.

> I think you need to verify that the type of 'op' is actually the element type
> of op0.  The BIT_FIELD_REF can happily access elements two and three
> of { 1, 2, 3, 4 } as a long for example.

Indeed I missed that.

> See the BIT_FIELD_REF foldings in fold-const.c.

That's what I was looking at (picked the same variable names size, idx, n) 
but I forgot that test :-(

>> +  if (code == VEC_PERM_EXPR)
>> +    {
>> +      tree p, m, index, tem;
>> +      unsigned nelts;
>> +      m = gimple_assign_rhs3 (def_stmt);
>> +      if (TREE_CODE (m) != VECTOR_CST)
>> +       return false;
>> +      nelts = VECTOR_CST_NELTS (m);
>> +      idx = TREE_INT_CST_LOW (VECTOR_CST_ELT (m, idx));
>> +      idx %= 2 * nelts;
>> +      if (idx < nelts)
>> +       {
>> +         p = gimple_assign_rhs1 (def_stmt);
>> +       }
>> +      else
>> +       {
>> +         p = gimple_assign_rhs2 (def_stmt);
>> +         idx -= nelts;
>> +       }
>> +      index = build_int_cst (TREE_TYPE (TREE_TYPE (m)), idx * size);
>> +      tem = fold_build3 (BIT_FIELD_REF, TREE_TYPE (op), p, op1, index);
>
> This shouldn't simplify, so you can use build3 instead.

I think that it is possible for p to be a VECTOR_CST, if the shuffle 
involves one constant and one non-constant vectors, no?

Now that I look at this line, I wonder if I am missing some unshare_expr 
for p and/or op1.

> Please also add handling of code == CONSTRUCTOR.

The cases I tried were already handled by fre1. I can add code for 
constructor, but I'll need to look for a testcase first. Can that go to a 
different patch?
Richard Guenther - Sept. 3, 2012, 3:16 p.m.
On Mon, Sep 3, 2012 at 3:39 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Mon, 3 Sep 2012, Richard Guenther wrote:
>
>> Please do the early outs where you compute the arguments.  Thus, right
>> after getting op0 in this case or right after computing n for the n != 1
>> check.
>
>
> Ok.
>
>> I think you need to verify that the type of 'op' is actually the element
>> type
>> of op0.  The BIT_FIELD_REF can happily access elements two and three
>> of { 1, 2, 3, 4 } as a long for example.
>
>
> Indeed I missed that.
>
>
>> See the BIT_FIELD_REF foldings in fold-const.c.
>
>
> That's what I was looking at (picked the same variable names size, idx, n)
> but I forgot that test :-(
>
>
>>> +  if (code == VEC_PERM_EXPR)
>>> +    {
>>> +      tree p, m, index, tem;
>>> +      unsigned nelts;
>>> +      m = gimple_assign_rhs3 (def_stmt);
>>> +      if (TREE_CODE (m) != VECTOR_CST)
>>> +       return false;
>>> +      nelts = VECTOR_CST_NELTS (m);
>>> +      idx = TREE_INT_CST_LOW (VECTOR_CST_ELT (m, idx));
>>> +      idx %= 2 * nelts;
>>> +      if (idx < nelts)
>>> +       {
>>> +         p = gimple_assign_rhs1 (def_stmt);
>>> +       }
>>> +      else
>>> +       {
>>> +         p = gimple_assign_rhs2 (def_stmt);
>>> +         idx -= nelts;
>>> +       }
>>> +      index = build_int_cst (TREE_TYPE (TREE_TYPE (m)), idx * size);
>>> +      tem = fold_build3 (BIT_FIELD_REF, TREE_TYPE (op), p, op1, index);
>>
>>
>> This shouldn't simplify, so you can use build3 instead.
>
>
> I think that it is possible for p to be a VECTOR_CST, if the shuffle
> involves one constant and one non-constant vectors, no?

Well, constant propagation should have handled it ...

If you use fold_build3 you need to check that the result is in expected form
(a is_gimple_invariant or an SSA_NAME).

> Now that I look at this line, I wonder if I am missing some unshare_expr for
> p and/or op1.

If either is a CONSTRUCTOR and its def stmt is not removed and it survives
into tem then yes ...

>> Please also add handling of code == CONSTRUCTOR.
>
>
> The cases I tried were already handled by fre1. I can add code for
> constructor, but I'll need to look for a testcase first. Can that go to a
> different patch?

Yes.

Thanks,
Richard.

> --
> Marc Glisse
Marc Glisse - Sept. 3, 2012, 4:12 p.m.
On Mon, 3 Sep 2012, Richard Guenther wrote:

>>>> +  if (code == VEC_PERM_EXPR)
>>>> +    {
>>>> +      tree p, m, index, tem;
>>>> +      unsigned nelts;
>>>> +      m = gimple_assign_rhs3 (def_stmt);
>>>> +      if (TREE_CODE (m) != VECTOR_CST)
>>>> +       return false;
>>>> +      nelts = VECTOR_CST_NELTS (m);
>>>> +      idx = TREE_INT_CST_LOW (VECTOR_CST_ELT (m, idx));
>>>> +      idx %= 2 * nelts;
>>>> +      if (idx < nelts)
>>>> +       {
>>>> +         p = gimple_assign_rhs1 (def_stmt);
>>>> +       }
>>>> +      else
>>>> +       {
>>>> +         p = gimple_assign_rhs2 (def_stmt);
>>>> +         idx -= nelts;
>>>> +       }
>>>> +      index = build_int_cst (TREE_TYPE (TREE_TYPE (m)), idx * size);
>>>> +      tem = fold_build3 (BIT_FIELD_REF, TREE_TYPE (op), p, op1, index);
>>>
>>>
>>> This shouldn't simplify, so you can use build3 instead.
>>
>>
>> I think that it is possible for p to be a VECTOR_CST, if the shuffle
>> involves one constant and one non-constant vectors, no?
>
> Well, constant propagation should have handled it ...

When it sees __builtin_shuffle(cst1,var,cst2)[cst3], CCP should basically 
do the same thing I am doing here, in the hope that the element will be 
part of cst1 instead of var? What if builtin_shuffle takes 2 constructors, 
one of which contains at least one constant? It looks easier to handle it 
here and let the next run of CCP notice the simplified expression. Or do 
you mean I should add the new function to CCP (or even fold) instead of 
forwprop? (wouldn't be the first time CCP does more than constant 
propagation)

> If you use fold_build3 you need to check that the result is in expected form
> (a is_gimple_invariant or an SSA_NAME).
>
>> Now that I look at this line, I wonder if I am missing some unshare_expr for
>> p and/or op1.
>
> If either is a CONSTRUCTOR and its def stmt is not removed and it survives
> into tem then yes ...

But the integer_cst doesn't need it. Ok, thanks.
Richard Guenther - Sept. 4, 2012, 9:38 a.m.
On Mon, Sep 3, 2012 at 6:12 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Mon, 3 Sep 2012, Richard Guenther wrote:
>
>>>>> +  if (code == VEC_PERM_EXPR)
>>>>> +    {
>>>>> +      tree p, m, index, tem;
>>>>> +      unsigned nelts;
>>>>> +      m = gimple_assign_rhs3 (def_stmt);
>>>>> +      if (TREE_CODE (m) != VECTOR_CST)
>>>>> +       return false;
>>>>> +      nelts = VECTOR_CST_NELTS (m);
>>>>> +      idx = TREE_INT_CST_LOW (VECTOR_CST_ELT (m, idx));
>>>>> +      idx %= 2 * nelts;
>>>>> +      if (idx < nelts)
>>>>> +       {
>>>>> +         p = gimple_assign_rhs1 (def_stmt);
>>>>> +       }
>>>>> +      else
>>>>> +       {
>>>>> +         p = gimple_assign_rhs2 (def_stmt);
>>>>> +         idx -= nelts;
>>>>> +       }
>>>>> +      index = build_int_cst (TREE_TYPE (TREE_TYPE (m)), idx * size);
>>>>> +      tem = fold_build3 (BIT_FIELD_REF, TREE_TYPE (op), p, op1,
>>>>> index);
>>>>
>>>>
>>>>
>>>> This shouldn't simplify, so you can use build3 instead.
>>>
>>>
>>>
>>> I think that it is possible for p to be a VECTOR_CST, if the shuffle
>>> involves one constant and one non-constant vectors, no?
>>
>>
>> Well, constant propagation should have handled it ...
>
>
> When it sees __builtin_shuffle(cst1,var,cst2)[cst3], CCP should basically do
> the same thing I am doing here, in the hope that the element will be part of
> cst1 instead of var?

Yes, if CCP sees

 vec_1 = VEC_PERM <cst1, var, cst2>;
scalar_2 = BIT_FIELD_REF <vec_1, ....cst3>;

then if vec_1 ends up being constant it should figure out that vec_1 is constant
and that scalar_2 is constant.  Of course if we have

 vec_1 = VEC_PERM <cst1, var1, var2>;

and var1/var2 are CONSTRUCTORS with some elements constants then
it won't be able to do that and forwprop should do it.  So I suppose handling
constants in forwprop is fine (but it would be nice to double-check if in
the first example CCP figures out that vec_1 and scalar_2 are constant).

> What if builtin_shuffle takes 2 constructors, one of
> which contains at least one constant? It looks easier to handle it here and
> let the next run of CCP notice the simplified expression. Or do you mean I
> should add the new function to CCP (or even fold) instead of forwprop?
> (wouldn't be the first time CCP does more than constant propagation)

CCP should only do lattice-based constant propagation, other cases need
to be handled in forwprop.

>> If you use fold_build3 you need to check that the result is in expected
>> form
>> (a is_gimple_invariant or an SSA_NAME).
>>
>>> Now that I look at this line, I wonder if I am missing some unshare_expr
>>> for
>>> p and/or op1.
>>
>>
>> If either is a CONSTRUCTOR and its def stmt is not removed and it survives
>> into tem then yes ...
>
>
> But the integer_cst doesn't need it. Ok, thanks.
>
> --
> Marc Glisse

Patch

Index: testsuite/gcc.dg/tree-ssa/forwprop-21.c

===================================================================
--- testsuite/gcc.dg/tree-ssa/forwprop-21.c	(revision 0)

+++ testsuite/gcc.dg/tree-ssa/forwprop-21.c	(revision 0)

@@ -0,0 +1,13 @@ 

+/* { dg-do compile } */

+/* { dg-options "-O -fdump-tree-optimized" } */

+typedef int v4si __attribute__ ((vector_size (4 * sizeof(int))));

+

+int

+test (v4si *x, v4si *y)

+{

+  v4si m = { 2, 3, 6, 5 };

+  v4si z = __builtin_shuffle (*x, *y, m);

+  return z[2];

+}

+/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */

+/* { dg-final { cleanup-tree-dump "optimized" } } */


Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-21.c
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Author Date Id Revision URL

Index: tree-ssa-forwprop.c

===================================================================
--- tree-ssa-forwprop.c	(revision 190847)

+++ tree-ssa-forwprop.c	(working copy)

@@ -2570,20 +2570,88 @@  combine_conversions (gimple_stmt_iterato

 	      gimple_assign_set_rhs_code (stmt, CONVERT_EXPR);
 	      update_stmt (stmt);
 	      return remove_prop_source_from_use (op0) ? 2 : 1;
 	    }
 	}
     }
 
   return 0;
 }
 
+/* Combine an element access with a shuffle.  Returns true if there were

+   any changes made, else it returns false.  */

+ 

+static bool

+simplify_bitfield (gimple_stmt_iterator *gsi)

+{

+  gimple stmt = gsi_stmt (*gsi);

+  gimple def_stmt;

+  tree op, op0, op1, op2;

+  unsigned idx, n, size;

+  enum tree_code code;

+

+  op = gimple_assign_rhs1 (stmt);

+  gcc_checking_assert (TREE_CODE (op) == BIT_FIELD_REF);

+

+  op0 = TREE_OPERAND (op, 0);

+  op1 = TREE_OPERAND (op, 1);

+  op2 = TREE_OPERAND (op, 2);

+

+  if (TREE_CODE (TREE_TYPE (op0)) != VECTOR_TYPE)

+    return false;

+

+  size = TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (TREE_TYPE (op0))));

+  n = TREE_INT_CST_LOW (op1) / size;

+  idx = TREE_INT_CST_LOW (op2) / size;

+

+  if (n != 1)

+    return false;

+

+  if (TREE_CODE (op0) != SSA_NAME)

+    return false;

+

+  def_stmt = SSA_NAME_DEF_STMT (op0);

+  if (!def_stmt || !is_gimple_assign (def_stmt)

+      || !can_propagate_from (def_stmt))

+    return false;

+

+  code = gimple_assign_rhs_code (def_stmt);

+

+  if (code == VEC_PERM_EXPR)

+    {

+      tree p, m, index, tem;

+      unsigned nelts;

+      m = gimple_assign_rhs3 (def_stmt);

+      if (TREE_CODE (m) != VECTOR_CST)

+	return false;

+      nelts = VECTOR_CST_NELTS (m);

+      idx = TREE_INT_CST_LOW (VECTOR_CST_ELT (m, idx));

+      idx %= 2 * nelts;

+      if (idx < nelts)

+	{

+	  p = gimple_assign_rhs1 (def_stmt);

+	}

+      else

+	{

+	  p = gimple_assign_rhs2 (def_stmt);

+	  idx -= nelts;

+	}

+      index = build_int_cst (TREE_TYPE (TREE_TYPE (m)), idx * size);

+      tem = fold_build3 (BIT_FIELD_REF, TREE_TYPE (op), p, op1, index);

+      gimple_assign_set_rhs1 (stmt, tem);

+      update_stmt (stmt);

+      return true;

+    }

+

+  return false;

+}

+

 /* Determine whether applying the 2 permutations (mask1 then mask2)
    gives back one of the input.  */
 
 static int
 is_combined_permutation_identity (tree mask1, tree mask2)
 {
   tree mask;
   unsigned int nelts, i, j;
   bool maybe_identity1 = true;
   bool maybe_identity2 = true;
@@ -2828,20 +2896,22 @@  ssa_forward_propagate_and_combine (void)

 		      cfg_changed = true;
 		    changed = did_something != 0;
 		  }
 		else if (code == VEC_PERM_EXPR)
 		  {
 		    int did_something = simplify_permutation (&gsi);
 		    if (did_something == 2)
 		      cfg_changed = true;
 		    changed = did_something != 0;
 		  }
+		else if (code == BIT_FIELD_REF)

+		  changed = simplify_bitfield (&gsi);

 		break;
 	      }
 
 	    case GIMPLE_SWITCH:
 	      changed = simplify_gimple_switch (stmt);
 	      break;
 
 	    case GIMPLE_COND:
 	      {
 		int did_something;