diff mbox

PR71752 - SLP: Maintain operand ordering when creating vec defs

Message ID D3D74E6A.11866%alan.hayward@arm.com
State New
Headers show

Commit Message

Alan Hayward Aug. 15, 2016, 9:48 a.m. UTC
The testcase pr71752.c was failing because the SLP code was generating an
SLP
vector using arguments from the SLP scalar stmts, but was using the wrong
argument number.

vect_get_slp_defs() is given a vector of operands. When calling down to
vect_get_constant_vectors it uses i as op_num - making the assumption that
the
first op in the vector refers to the first argument in the SLP scalar
statement, the second op refers to the second arg and so on.

However, previously in vectorizable_reduction, the call to
vect_get_vec_defs
destroyed this ordering by potentially only passing op1.

The solution is in vectorizable_reduction to create a vector of operands
equal
in size to the number of arguments in the SLP statements. We maintain the
argument ordering and if we don't require defs for that argument we instead
push NULL into the vector. In vect_get_slp_defs we need to handle cases
where
an op might be NULL.

Tested with a check run on X86 and AArch64.
Ok to commit?


Changelog:

gcc/
	* tree-vect-loop.c (vectorizable_reduction): Keep SLP operand ordering.
	* tree-vect-slp.c (vect_get_slp_defs): Handle null operands.

gcc/testsuite/
	* gcc.dg/vect/pr71752.c: New.



Thanks,
Alan.




 	 node or we need to create them (for invariants and constants).  We
 	 check if the LHS of the first stmt of the next child matches OPRND.
@@ -3240,7 +3249,7 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,

       if (!vectorized_defs)
         {
-          if (i == 0)
+	  if (first_iteration)
             {
               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
               /* Number of vector stmts was calculated according to LHS in
@@ -3276,6 +3285,8 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
       /* For reductions, we only need initial values.  */
       if (reduc_index != -1)
         return;
+
+      first_iteration = false;
     }
 }

Comments

Richard Biener Aug. 15, 2016, 11:17 a.m. UTC | #1
On Mon, Aug 15, 2016 at 11:48 AM, Alan Hayward <alan.hayward@arm.com> wrote:
> The testcase pr71752.c was failing because the SLP code was generating an
> SLP
> vector using arguments from the SLP scalar stmts, but was using the wrong
> argument number.
>
> vect_get_slp_defs() is given a vector of operands. When calling down to
> vect_get_constant_vectors it uses i as op_num - making the assumption that
> the
> first op in the vector refers to the first argument in the SLP scalar
> statement, the second op refers to the second arg and so on.
>
> However, previously in vectorizable_reduction, the call to
> vect_get_vec_defs
> destroyed this ordering by potentially only passing op1.
>
> The solution is in vectorizable_reduction to create a vector of operands
> equal
> in size to the number of arguments in the SLP statements. We maintain the
> argument ordering and if we don't require defs for that argument we instead
> push NULL into the vector. In vect_get_slp_defs we need to handle cases
> where
> an op might be NULL.
>
> Tested with a check run on X86 and AArch64.
> Ok to commit?
>

Ugh.  Note the logic in vect_get_slp_defs is incredibly fragile - I
think you can't
simply "skip" ops the way you do as you need to still increment child_index
accordingly for ignored ops.

Why not let the function compute defs for all ops?  That said, the
vectorizable_reduction
part certainly is fixing a bug (I think I've seen similar issues
elsewhere though).

Richard.

> Changelog:
>
> gcc/
>         * tree-vect-loop.c (vectorizable_reduction): Keep SLP operand ordering.
>         * tree-vect-slp.c (vect_get_slp_defs): Handle null operands.
>
> gcc/testsuite/
>         * gcc.dg/vect/pr71752.c: New.
>
>
>
> Thanks,
> Alan.
>
>
>
>
> diff --git a/gcc/testsuite/gcc.dg/vect/pr71752.c
> b/gcc/testsuite/gcc.dg/vect/pr71752.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..8d26754b4fedf8b104caae8742a445dff
> bf23f0a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr71752.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +
> +unsigned int q4, yg;
> +
> +unsigned int
> +w6 (unsigned int z5, unsigned int jv)
> +{
> +  unsigned int *f2 = &jv;
> +
> +  while (*f2 < 21)
> +    {
> +      q4 -= jv;
> +      z5 -= jv;
> +      f2 = &yg;
> +      ++(*f2);
> +    }
> +  return z5;
> +}
> +
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index
> 2a7e0c6661bc1ba82c9f03720e550749f2252a7c..826481af3d1d8b29bcdbd7d81c0fd5a85
> 9ecd9b0 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -5364,7 +5364,7 @@ vectorizable_reduction (gimple *stmt,
> gimple_stmt_iterator *gsi,
>    auto_vec<tree> vect_defs;
>    auto_vec<gimple *> phis;
>    int vec_num;
> -  tree def0, def1, tem, op0, op1 = NULL_TREE;
> +  tree def0, def1, tem, op1 = NULL_TREE;
>    bool first_p = true;
>    tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
>    gimple *cond_expr_induction_def_stmt = NULL;
> @@ -5964,29 +5964,36 @@ vectorizable_reduction (gimple *stmt,
> gimple_stmt_iterator *gsi,
>        /* Handle uses.  */
>        if (j == 0)
>          {
> -          op0 = ops[!reduc_index];
> -          if (op_type == ternary_op)
> -            {
> -              if (reduc_index == 0)
> -                op1 = ops[2];
> -              else
> -                op1 = ops[1];
> -            }
> +         if (slp_node)
> +           {
> +             /* Get vec defs for all the operands except the reduction index,
> +               ensuring the ordering of the ops in the vector is kept.  */
> +             auto_vec<tree, 3> slp_ops;
> +             auto_vec<vec<tree>, 3> vec_defs;
>
> -          if (slp_node)
> -            vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
> -                               slp_node, -1);
> +             slp_ops.quick_push ((reduc_index == 0) ? NULL : ops[0]);
> +             slp_ops.quick_push ((reduc_index == 1) ? NULL : ops[1]);
> +             if (op_type == ternary_op)
> +               slp_ops.quick_push ((reduc_index == 2) ? NULL : ops[2]);
> +
> +             vect_get_slp_defs (slp_ops, slp_node, &vec_defs, -1);
> +
> +             vec_oprnds0.safe_splice (vec_defs[(reduc_index == 0) ? 1 : 0]);
> +             if (op_type == ternary_op)
> +               vec_oprnds1.safe_splice (vec_defs[(reduc_index == 2) ? 1 : 2]);
> +           }
>            else
> -            {
> +           {
>                loop_vec_def0 = vect_get_vec_def_for_operand
> (ops[!reduc_index],
>                                                              stmt);
>                vec_oprnds0.quick_push (loop_vec_def0);
>                if (op_type == ternary_op)
>                 {
> +                op1 = (reduc_index == 0) ? ops[2] : ops[1];
>                   loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt);
>                   vec_oprnds1.quick_push (loop_vec_def1);
>                 }
> -            }
> +           }
>          }
>        else
>          {
> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
> index
> fb325d54f1084461d44cd54a98e5b7f99541a188..7c480d59c823b5258255c8be047f050c8
> 3cc91fd 100644
> --- a/gcc/tree-vect-slp.c
> +++ b/gcc/tree-vect-slp.c
> @@ -3200,10 +3200,19 @@ vect_get_slp_defs (vec<tree> ops, slp_tree
> slp_node,
>    vec<tree> vec_defs;
>    tree oprnd;
>    bool vectorized_defs;
> +  bool first_iteration = true;
>
>    first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
>    FOR_EACH_VEC_ELT (ops, i, oprnd)
>      {
> +      if (oprnd == NULL)
> +       {
> +         vec_defs = vNULL;
> +         vec_defs.create (0);
> +         vec_oprnds->quick_push (vec_defs);
> +         continue;
> +       }
> +
>        /* For each operand we check if it has vectorized definitions in a
> child
>          node or we need to create them (for invariants and constants).  We
>          check if the LHS of the first stmt of the next child matches OPRND.
> @@ -3240,7 +3249,7 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
>
>        if (!vectorized_defs)
>          {
> -          if (i == 0)
> +         if (first_iteration)
>              {
>                number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
>                /* Number of vector stmts was calculated according to LHS in
> @@ -3276,6 +3285,8 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
>        /* For reductions, we only need initial values.  */
>        if (reduc_index != -1)
>          return;
> +
> +      first_iteration = false;
>      }
>  }
>
>
Alan Hayward Aug. 15, 2016, 2:16 p.m. UTC | #2
On 15/08/2016 12:17, "Richard Biener" <richard.guenther@gmail.com> wrote:

>On Mon, Aug 15, 2016 at 11:48 AM, Alan Hayward <alan.hayward@arm.com>
>wrote:
>> The testcase pr71752.c was failing because the SLP code was generating
>>an
>> SLP
>> vector using arguments from the SLP scalar stmts, but was using the
>>wrong
>> argument number.
>>
>> vect_get_slp_defs() is given a vector of operands. When calling down to
>> vect_get_constant_vectors it uses i as op_num - making the assumption
>>that
>> the
>> first op in the vector refers to the first argument in the SLP scalar
>> statement, the second op refers to the second arg and so on.
>>
>> However, previously in vectorizable_reduction, the call to
>> vect_get_vec_defs
>> destroyed this ordering by potentially only passing op1.
>>
>> The solution is in vectorizable_reduction to create a vector of operands
>> equal
>> in size to the number of arguments in the SLP statements. We maintain
>>the
>> argument ordering and if we don't require defs for that argument we
>>instead
>> push NULL into the vector. In vect_get_slp_defs we need to handle cases
>> where
>> an op might be NULL.
>>
>> Tested with a check run on X86 and AArch64.
>> Ok to commit?
>>
>
>Ugh.  Note the logic in vect_get_slp_defs is incredibly fragile - I
>think you can't
>simply "skip" ops the way you do as you need to still increment
>child_index
>accordingly for ignored ops.

Agreed, I should be maintaining the child_index.
Looking at the code, I need a valid oprnd for that code to work.

Given that the size of the ops vector is never more than 3, it probably
makes
sense to reset child_index each time and do a quick for loop through all
the
children.

>
>Why not let the function compute defs for all ops?  That said, the
>vectorizable_reduction
>part certainly is fixing a bug (I think I've seen similar issues
>elsewhere though).

If you let it compute defs for all ops then you can end up creating invalid
initial value SLP vectors which cause SSA failures (even though nothing
uses those
vectors).

In the test case, SLP is defined for _3. Op1 of this is q4_lsm.5_8, but
that is
a loop phi. Creating SLP vec refs for it results in vect_cst__67 and
vect_cst__68, which are clearly invalid:



<bb 4>:
  _58 = yg_lsm.7_23 + 1;
  _59 = _58 + 1;
  _60 = _58 + 2;
  vect_cst__61 = {yg_lsm.7_23, _58, _59, _60};
  vect_cst__62 = { 4, 4, 4, 4 };
  vect_cst__67 = {q4_lsm.5_8, z5_19, q4_lsm.5_8, z5_19};
  vect_cst__68 = {q4_lsm.5_8, z5_19, q4_lsm.5_8, z5_19};
  vect_cst__69 = {jv_9(D), jv_9(D), jv_9(D), jv_9(D)};
  vect_cst__70 = {jv_9(D), jv_9(D), jv_9(D), jv_9(D)};
  vect_cst__73 = {0, 0, 0, 0};
  vect_cst__74 = {q4_lsm.5_16, z5_10(D), 0, 0};
  vect_cst__91 = { 1, 1, 1, 1 };

  <bb 5>:
  # z5_19 = PHI <z5_10(D)(4), z5_13(10)>
  # q4_lsm.5_8 = PHI <q4_lsm.5_16(4), _3(10)>
  # yg_lsm.7_17 = PHI <yg_lsm.7_23(4), _5(10)>
  # vect_vec_iv_.14_63 = PHI <vect_cst__61(4), vect_vec_iv_.14_64(10)>
  # vect__3.15_65 = PHI <vect_cst__74(4), vect__3.15_71(10)>
  # vect__3.15_66 = PHI <vect_cst__73(4), vect__3.15_72(10)>
  # ivtmp_94 = PHI <0(4), ivtmp_95(10)>
  vect_vec_iv_.14_64 = vect_vec_iv_.14_63 + vect_cst__62;
  _3 = q4_lsm.5_8 - jv_9(D);
  vect__3.15_71 = vect__3.15_65 - vect_cst__70;
  vect__3.15_72 = vect__3.15_66 - vect_cst__69;
  z5_13 = z5_19 - jv_9(D);
  vect__5.17_92 = vect_vec_iv_.14_63 + vect_cst__91;
  _5 = yg_lsm.7_17 + 1;
  ivtmp_95 = ivtmp_94 + 1;
  if (ivtmp_95 < bnd.11_18)
    goto <bb 10>;
  else
    goto <bb 8>;




>
>Richard.
>
>> Changelog:
>>
>> gcc/
>>         * tree-vect-loop.c (vectorizable_reduction): Keep SLP operand
>>ordering.
>>         * tree-vect-slp.c (vect_get_slp_defs): Handle null operands.
>>
>> gcc/testsuite/
>>         * gcc.dg/vect/pr71752.c: New.
>>
>>
>>
>> Thanks,
>> Alan.
>>
>>
>>
>>
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr71752.c
>> b/gcc/testsuite/gcc.dg/vect/pr71752.c
>> new file mode 100644
>> index
>> 
>>0000000000000000000000000000000000000000..8d26754b4fedf8b104caae8742a445d
>>ff
>> bf23f0a
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr71752.c
>> @@ -0,0 +1,19 @@
>> +/* { dg-do compile } */
>> +
>> +unsigned int q4, yg;
>> +
>> +unsigned int
>> +w6 (unsigned int z5, unsigned int jv)
>> +{
>> +  unsigned int *f2 = &jv;
>> +
>> +  while (*f2 < 21)
>> +    {
>> +      q4 -= jv;
>> +      z5 -= jv;
>> +      f2 = &yg;
>> +      ++(*f2);
>> +    }
>> +  return z5;
>> +}
>> +
>> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
>> index
>> 
>>2a7e0c6661bc1ba82c9f03720e550749f2252a7c..826481af3d1d8b29bcdbd7d81c0fd5a
>>85
>> 9ecd9b0 100644
>> --- a/gcc/tree-vect-loop.c
>> +++ b/gcc/tree-vect-loop.c
>> @@ -5364,7 +5364,7 @@ vectorizable_reduction (gimple *stmt,
>> gimple_stmt_iterator *gsi,
>>    auto_vec<tree> vect_defs;
>>    auto_vec<gimple *> phis;
>>    int vec_num;
>> -  tree def0, def1, tem, op0, op1 = NULL_TREE;
>> +  tree def0, def1, tem, op1 = NULL_TREE;
>>    bool first_p = true;
>>    tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type =
>>NULL_TREE;
>>    gimple *cond_expr_induction_def_stmt = NULL;
>> @@ -5964,29 +5964,36 @@ vectorizable_reduction (gimple *stmt,
>> gimple_stmt_iterator *gsi,
>>        /* Handle uses.  */
>>        if (j == 0)
>>          {
>> -          op0 = ops[!reduc_index];
>> -          if (op_type == ternary_op)
>> -            {
>> -              if (reduc_index == 0)
>> -                op1 = ops[2];
>> -              else
>> -                op1 = ops[1];
>> -            }
>> +         if (slp_node)
>> +           {
>> +             /* Get vec defs for all the operands except the reduction
>>index,
>> +               ensuring the ordering of the ops in the vector is kept.
>> */
>> +             auto_vec<tree, 3> slp_ops;
>> +             auto_vec<vec<tree>, 3> vec_defs;
>>
>> -          if (slp_node)
>> -            vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
>>&vec_oprnds1,
>> -                               slp_node, -1);
>> +             slp_ops.quick_push ((reduc_index == 0) ? NULL : ops[0]);
>> +             slp_ops.quick_push ((reduc_index == 1) ? NULL : ops[1]);
>> +             if (op_type == ternary_op)
>> +               slp_ops.quick_push ((reduc_index == 2) ? NULL : ops[2]);
>> +
>> +             vect_get_slp_defs (slp_ops, slp_node, &vec_defs, -1);
>> +
>> +             vec_oprnds0.safe_splice (vec_defs[(reduc_index == 0) ? 1
>>: 0]);
>> +             if (op_type == ternary_op)
>> +               vec_oprnds1.safe_splice (vec_defs[(reduc_index == 2) ?
>>1 : 2]);
>> +           }
>>            else
>> -            {
>> +           {
>>                loop_vec_def0 = vect_get_vec_def_for_operand
>> (ops[!reduc_index],
>>                                                              stmt);
>>                vec_oprnds0.quick_push (loop_vec_def0);
>>                if (op_type == ternary_op)
>>                 {
>> +                op1 = (reduc_index == 0) ? ops[2] : ops[1];
>>                   loop_vec_def1 = vect_get_vec_def_for_operand (op1,
>>stmt);
>>                   vec_oprnds1.quick_push (loop_vec_def1);
>>                 }
>> -            }
>> +           }
>>          }
>>        else
>>          {
>> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
>> index
>> 
>>fb325d54f1084461d44cd54a98e5b7f99541a188..7c480d59c823b5258255c8be047f050
>>c8
>> 3cc91fd 100644
>> --- a/gcc/tree-vect-slp.c
>> +++ b/gcc/tree-vect-slp.c
>> @@ -3200,10 +3200,19 @@ vect_get_slp_defs (vec<tree> ops, slp_tree
>> slp_node,
>>    vec<tree> vec_defs;
>>    tree oprnd;
>>    bool vectorized_defs;
>> +  bool first_iteration = true;
>>
>>    first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
>>    FOR_EACH_VEC_ELT (ops, i, oprnd)
>>      {
>> +      if (oprnd == NULL)
>> +       {
>> +         vec_defs = vNULL;
>> +         vec_defs.create (0);
>> +         vec_oprnds->quick_push (vec_defs);
>> +         continue;
>> +       }
>> +
>>        /* For each operand we check if it has vectorized definitions in
>>a
>> child
>>          node or we need to create them (for invariants and constants).
>> We
>>          check if the LHS of the first stmt of the next child matches
>>OPRND.
>> @@ -3240,7 +3249,7 @@ vect_get_slp_defs (vec<tree> ops, slp_tree
>>slp_node,
>>
>>        if (!vectorized_defs)
>>          {
>> -          if (i == 0)
>> +         if (first_iteration)
>>              {
>>                number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS
>>(slp_node);
>>                /* Number of vector stmts was calculated according to
>>LHS in
>> @@ -3276,6 +3285,8 @@ vect_get_slp_defs (vec<tree> ops, slp_tree
>>slp_node,
>>        /* For reductions, we only need initial values.  */
>>        if (reduc_index != -1)
>>          return;
>> +
>> +      first_iteration = false;
>>      }
>>  }
>>
>>
>
Richard Biener Aug. 16, 2016, 8:33 a.m. UTC | #3
On Mon, Aug 15, 2016 at 4:16 PM, Alan Hayward <alan.hayward@arm.com> wrote:
>
>
> On 15/08/2016 12:17, "Richard Biener" <richard.guenther@gmail.com> wrote:
>
>>On Mon, Aug 15, 2016 at 11:48 AM, Alan Hayward <alan.hayward@arm.com>
>>wrote:
>>> The testcase pr71752.c was failing because the SLP code was generating
>>>an
>>> SLP
>>> vector using arguments from the SLP scalar stmts, but was using the
>>>wrong
>>> argument number.
>>>
>>> vect_get_slp_defs() is given a vector of operands. When calling down to
>>> vect_get_constant_vectors it uses i as op_num - making the assumption
>>>that
>>> the
>>> first op in the vector refers to the first argument in the SLP scalar
>>> statement, the second op refers to the second arg and so on.
>>>
>>> However, previously in vectorizable_reduction, the call to
>>> vect_get_vec_defs
>>> destroyed this ordering by potentially only passing op1.
>>>
>>> The solution is in vectorizable_reduction to create a vector of operands
>>> equal
>>> in size to the number of arguments in the SLP statements. We maintain
>>>the
>>> argument ordering and if we don't require defs for that argument we
>>>instead
>>> push NULL into the vector. In vect_get_slp_defs we need to handle cases
>>> where
>>> an op might be NULL.
>>>
>>> Tested with a check run on X86 and AArch64.
>>> Ok to commit?
>>>
>>
>>Ugh.  Note the logic in vect_get_slp_defs is incredibly fragile - I
>>think you can't
>>simply "skip" ops the way you do as you need to still increment
>>child_index
>>accordingly for ignored ops.
>
> Agreed, I should be maintaining the child_index.
> Looking at the code, I need a valid oprnd for that code to work.
>
> Given that the size of the ops vector is never more than 3, it probably
> makes
> sense to reset child_index each time and do a quick for loop through all
> the
> children.

Hmm, yes - that should work.  It should also remove the need to keep
operand order in sync?  So it might no longer require the
vectorizable_reduction change.

Richard.

>>
>>Why not let the function compute defs for all ops?  That said, the
>>vectorizable_reduction
>>part certainly is fixing a bug (I think I've seen similar issues
>>elsewhere though).
>
> If you let it compute defs for all ops then you can end up creating invalid
> initial value SLP vectors which cause SSA failures (even though nothing
> uses those
> vectors).
>
> In the test case, SLP is defined for _3. Op1 of this is q4_lsm.5_8, but
> that is
> a loop phi. Creating SLP vec refs for it results in vect_cst__67 and
> vect_cst__68, which are clearly invalid:
>
>
>
> <bb 4>:
>   _58 = yg_lsm.7_23 + 1;
>   _59 = _58 + 1;
>   _60 = _58 + 2;
>   vect_cst__61 = {yg_lsm.7_23, _58, _59, _60};
>   vect_cst__62 = { 4, 4, 4, 4 };
>   vect_cst__67 = {q4_lsm.5_8, z5_19, q4_lsm.5_8, z5_19};
>   vect_cst__68 = {q4_lsm.5_8, z5_19, q4_lsm.5_8, z5_19};
>   vect_cst__69 = {jv_9(D), jv_9(D), jv_9(D), jv_9(D)};
>   vect_cst__70 = {jv_9(D), jv_9(D), jv_9(D), jv_9(D)};
>   vect_cst__73 = {0, 0, 0, 0};
>   vect_cst__74 = {q4_lsm.5_16, z5_10(D), 0, 0};
>   vect_cst__91 = { 1, 1, 1, 1 };
>
>   <bb 5>:
>   # z5_19 = PHI <z5_10(D)(4), z5_13(10)>
>   # q4_lsm.5_8 = PHI <q4_lsm.5_16(4), _3(10)>
>   # yg_lsm.7_17 = PHI <yg_lsm.7_23(4), _5(10)>
>   # vect_vec_iv_.14_63 = PHI <vect_cst__61(4), vect_vec_iv_.14_64(10)>
>   # vect__3.15_65 = PHI <vect_cst__74(4), vect__3.15_71(10)>
>   # vect__3.15_66 = PHI <vect_cst__73(4), vect__3.15_72(10)>
>   # ivtmp_94 = PHI <0(4), ivtmp_95(10)>
>   vect_vec_iv_.14_64 = vect_vec_iv_.14_63 + vect_cst__62;
>   _3 = q4_lsm.5_8 - jv_9(D);
>   vect__3.15_71 = vect__3.15_65 - vect_cst__70;
>   vect__3.15_72 = vect__3.15_66 - vect_cst__69;
>   z5_13 = z5_19 - jv_9(D);
>   vect__5.17_92 = vect_vec_iv_.14_63 + vect_cst__91;
>   _5 = yg_lsm.7_17 + 1;
>   ivtmp_95 = ivtmp_94 + 1;
>   if (ivtmp_95 < bnd.11_18)
>     goto <bb 10>;
>   else
>     goto <bb 8>;
>
>
>
>
>>
>>Richard.
>>
>>> Changelog:
>>>
>>> gcc/
>>>         * tree-vect-loop.c (vectorizable_reduction): Keep SLP operand
>>>ordering.
>>>         * tree-vect-slp.c (vect_get_slp_defs): Handle null operands.
>>>
>>> gcc/testsuite/
>>>         * gcc.dg/vect/pr71752.c: New.
>>>
>>>
>>>
>>> Thanks,
>>> Alan.
>>>
>>>
>>>
>>>
>>> diff --git a/gcc/testsuite/gcc.dg/vect/pr71752.c
>>> b/gcc/testsuite/gcc.dg/vect/pr71752.c
>>> new file mode 100644
>>> index
>>>
>>>0000000000000000000000000000000000000000..8d26754b4fedf8b104caae8742a445d
>>>ff
>>> bf23f0a
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.dg/vect/pr71752.c
>>> @@ -0,0 +1,19 @@
>>> +/* { dg-do compile } */
>>> +
>>> +unsigned int q4, yg;
>>> +
>>> +unsigned int
>>> +w6 (unsigned int z5, unsigned int jv)
>>> +{
>>> +  unsigned int *f2 = &jv;
>>> +
>>> +  while (*f2 < 21)
>>> +    {
>>> +      q4 -= jv;
>>> +      z5 -= jv;
>>> +      f2 = &yg;
>>> +      ++(*f2);
>>> +    }
>>> +  return z5;
>>> +}
>>> +
>>> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
>>> index
>>>
>>>2a7e0c6661bc1ba82c9f03720e550749f2252a7c..826481af3d1d8b29bcdbd7d81c0fd5a
>>>85
>>> 9ecd9b0 100644
>>> --- a/gcc/tree-vect-loop.c
>>> +++ b/gcc/tree-vect-loop.c
>>> @@ -5364,7 +5364,7 @@ vectorizable_reduction (gimple *stmt,
>>> gimple_stmt_iterator *gsi,
>>>    auto_vec<tree> vect_defs;
>>>    auto_vec<gimple *> phis;
>>>    int vec_num;
>>> -  tree def0, def1, tem, op0, op1 = NULL_TREE;
>>> +  tree def0, def1, tem, op1 = NULL_TREE;
>>>    bool first_p = true;
>>>    tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type =
>>>NULL_TREE;
>>>    gimple *cond_expr_induction_def_stmt = NULL;
>>> @@ -5964,29 +5964,36 @@ vectorizable_reduction (gimple *stmt,
>>> gimple_stmt_iterator *gsi,
>>>        /* Handle uses.  */
>>>        if (j == 0)
>>>          {
>>> -          op0 = ops[!reduc_index];
>>> -          if (op_type == ternary_op)
>>> -            {
>>> -              if (reduc_index == 0)
>>> -                op1 = ops[2];
>>> -              else
>>> -                op1 = ops[1];
>>> -            }
>>> +         if (slp_node)
>>> +           {
>>> +             /* Get vec defs for all the operands except the reduction
>>>index,
>>> +               ensuring the ordering of the ops in the vector is kept.
>>> */
>>> +             auto_vec<tree, 3> slp_ops;
>>> +             auto_vec<vec<tree>, 3> vec_defs;
>>>
>>> -          if (slp_node)
>>> -            vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
>>>&vec_oprnds1,
>>> -                               slp_node, -1);
>>> +             slp_ops.quick_push ((reduc_index == 0) ? NULL : ops[0]);
>>> +             slp_ops.quick_push ((reduc_index == 1) ? NULL : ops[1]);
>>> +             if (op_type == ternary_op)
>>> +               slp_ops.quick_push ((reduc_index == 2) ? NULL : ops[2]);
>>> +
>>> +             vect_get_slp_defs (slp_ops, slp_node, &vec_defs, -1);
>>> +
>>> +             vec_oprnds0.safe_splice (vec_defs[(reduc_index == 0) ? 1
>>>: 0]);
>>> +             if (op_type == ternary_op)
>>> +               vec_oprnds1.safe_splice (vec_defs[(reduc_index == 2) ?
>>>1 : 2]);
>>> +           }
>>>            else
>>> -            {
>>> +           {
>>>                loop_vec_def0 = vect_get_vec_def_for_operand
>>> (ops[!reduc_index],
>>>                                                              stmt);
>>>                vec_oprnds0.quick_push (loop_vec_def0);
>>>                if (op_type == ternary_op)
>>>                 {
>>> +                op1 = (reduc_index == 0) ? ops[2] : ops[1];
>>>                   loop_vec_def1 = vect_get_vec_def_for_operand (op1,
>>>stmt);
>>>                   vec_oprnds1.quick_push (loop_vec_def1);
>>>                 }
>>> -            }
>>> +           }
>>>          }
>>>        else
>>>          {
>>> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
>>> index
>>>
>>>fb325d54f1084461d44cd54a98e5b7f99541a188..7c480d59c823b5258255c8be047f050
>>>c8
>>> 3cc91fd 100644
>>> --- a/gcc/tree-vect-slp.c
>>> +++ b/gcc/tree-vect-slp.c
>>> @@ -3200,10 +3200,19 @@ vect_get_slp_defs (vec<tree> ops, slp_tree
>>> slp_node,
>>>    vec<tree> vec_defs;
>>>    tree oprnd;
>>>    bool vectorized_defs;
>>> +  bool first_iteration = true;
>>>
>>>    first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
>>>    FOR_EACH_VEC_ELT (ops, i, oprnd)
>>>      {
>>> +      if (oprnd == NULL)
>>> +       {
>>> +         vec_defs = vNULL;
>>> +         vec_defs.create (0);
>>> +         vec_oprnds->quick_push (vec_defs);
>>> +         continue;
>>> +       }
>>> +
>>>        /* For each operand we check if it has vectorized definitions in
>>>a
>>> child
>>>          node or we need to create them (for invariants and constants).
>>> We
>>>          check if the LHS of the first stmt of the next child matches
>>>OPRND.
>>> @@ -3240,7 +3249,7 @@ vect_get_slp_defs (vec<tree> ops, slp_tree
>>>slp_node,
>>>
>>>        if (!vectorized_defs)
>>>          {
>>> -          if (i == 0)
>>> +         if (first_iteration)
>>>              {
>>>                number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS
>>>(slp_node);
>>>                /* Number of vector stmts was calculated according to
>>>LHS in
>>> @@ -3276,6 +3285,8 @@ vect_get_slp_defs (vec<tree> ops, slp_tree
>>>slp_node,
>>>        /* For reductions, we only need initial values.  */
>>>        if (reduc_index != -1)
>>>          return;
>>> +
>>> +      first_iteration = false;
>>>      }
>>>  }
>>>
>>>
>>
>
>
diff mbox

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/pr71752.c
b/gcc/testsuite/gcc.dg/vect/pr71752.c
new file mode 100644
index 
0000000000000000000000000000000000000000..8d26754b4fedf8b104caae8742a445dff
bf23f0a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr71752.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+
+unsigned int q4, yg;
+
+unsigned int
+w6 (unsigned int z5, unsigned int jv)
+{
+  unsigned int *f2 = &jv;
+
+  while (*f2 < 21)
+    {
+      q4 -= jv;
+      z5 -= jv;
+      f2 = &yg;
+      ++(*f2);
+    }
+  return z5;
+}
+
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 
2a7e0c6661bc1ba82c9f03720e550749f2252a7c..826481af3d1d8b29bcdbd7d81c0fd5a85
9ecd9b0 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -5364,7 +5364,7 @@  vectorizable_reduction (gimple *stmt,
gimple_stmt_iterator *gsi,
   auto_vec<tree> vect_defs;
   auto_vec<gimple *> phis;
   int vec_num;
-  tree def0, def1, tem, op0, op1 = NULL_TREE;
+  tree def0, def1, tem, op1 = NULL_TREE;
   bool first_p = true;
   tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
   gimple *cond_expr_induction_def_stmt = NULL;
@@ -5964,29 +5964,36 @@  vectorizable_reduction (gimple *stmt,
gimple_stmt_iterator *gsi,
       /* Handle uses.  */
       if (j == 0)
         {
-          op0 = ops[!reduc_index];
-          if (op_type == ternary_op)
-            {
-              if (reduc_index == 0)
-                op1 = ops[2];
-              else
-                op1 = ops[1];
-            }
+	  if (slp_node)
+	    {
+	      /* Get vec defs for all the operands except the reduction index,
+		ensuring the ordering of the ops in the vector is kept.  */
+	      auto_vec<tree, 3> slp_ops;
+	      auto_vec<vec<tree>, 3> vec_defs;

-          if (slp_node)
-            vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
-                               slp_node, -1);
+	      slp_ops.quick_push ((reduc_index == 0) ? NULL : ops[0]);
+	      slp_ops.quick_push ((reduc_index == 1) ? NULL : ops[1]);
+	      if (op_type == ternary_op)
+		slp_ops.quick_push ((reduc_index == 2) ? NULL : ops[2]);
+
+	      vect_get_slp_defs (slp_ops, slp_node, &vec_defs, -1);
+
+	      vec_oprnds0.safe_splice (vec_defs[(reduc_index == 0) ? 1 : 0]);
+	      if (op_type == ternary_op)
+		vec_oprnds1.safe_splice (vec_defs[(reduc_index == 2) ? 1 : 2]);
+	    }
           else
-            {
+	    {
               loop_vec_def0 = vect_get_vec_def_for_operand
(ops[!reduc_index],
                                                             stmt);
               vec_oprnds0.quick_push (loop_vec_def0);
               if (op_type == ternary_op)
                {
+		 op1 = (reduc_index == 0) ? ops[2] : ops[1];
                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt);
                  vec_oprnds1.quick_push (loop_vec_def1);
                }
-            }
+	    }
         }
       else
         {
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 
fb325d54f1084461d44cd54a98e5b7f99541a188..7c480d59c823b5258255c8be047f050c8
3cc91fd 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -3200,10 +3200,19 @@  vect_get_slp_defs (vec<tree> ops, slp_tree
slp_node,
   vec<tree> vec_defs;
   tree oprnd;
   bool vectorized_defs;
+  bool first_iteration = true;

   first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
   FOR_EACH_VEC_ELT (ops, i, oprnd)
     {
+      if (oprnd == NULL)
+	{
+	  vec_defs = vNULL;
+	  vec_defs.create (0);
+	  vec_oprnds->quick_push (vec_defs);
+	  continue;
+	}
+
       /* For each operand we check if it has vectorized definitions in a
child