diff mbox series

Set bound/cmp/control for until wrap loop.

Message ID 20210830061535.146396-1-guojiufu@linux.ibm.com
State New
Headers show
Series Set bound/cmp/control for until wrap loop. | expand

Commit Message

Jiufu Guo Aug. 30, 2021, 6:15 a.m. UTC
Hi,

In patch r12-3136, niter->control, niter->bound and niter->cmp are
derived from number_of_iterations_lt.  While for 'until wrap condition',
the calculation in number_of_iterations_lt is not align the requirements
on the define of them and requirements in determine_exit_conditions.

This patch calculate niter->control, niter->bound and niter->cmp in
number_of_iterations_until_wrap.

The ICEs in the PR are pass with this patch.
Bootstrap and reg-tests pass on ppc64/ppc64le and x86.
Is this ok for trunk?

BR.
Jiufu Guo

---
 gcc/tree-ssa-loop-niter.c              | 14 +++++++++++++-
 gcc/testsuite/gcc.dg/pr102087.c        | 25 +++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/vect/pr101145_3.c |  4 +++-
 3 files changed, 41 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr102087.c

Comments

Jiufu Guo Aug. 30, 2021, 6:36 a.m. UTC | #1
On 2021-08-30 14:15, Jiufu Guo wrote:
> Hi,
> 
> In patch r12-3136, niter->control, niter->bound and niter->cmp are
> derived from number_of_iterations_lt.  While for 'until wrap 
> condition',
> the calculation in number_of_iterations_lt is not align the 
> requirements
> on the define of them and requirements in determine_exit_conditions.
> 
> This patch calculate niter->control, niter->bound and niter->cmp in
> number_of_iterations_until_wrap.
> 
> The ICEs in the PR are pass with this patch.
> Bootstrap and reg-tests pass on ppc64/ppc64le and x86.
> Is this ok for trunk?
> 
> BR.
> Jiufu Guo
> 
Add ChangeLog:
gcc/ChangeLog:

2021-08-30  Jiufu Guo  <guojiufu@linux.ibm.com>

         PR tree-optimization/102087
         * tree-ssa-loop-niter.c (number_of_iterations_until_wrap):
         Set bound/cmp/control for niter.

gcc/testsuite/ChangeLog:

2021-08-30  Jiufu Guo  <guojiufu@linux.ibm.com>

         PR tree-optimization/102087
         * gcc.dg/vect/pr101145_3.c: Update tests.
         * gcc.dg/pr102087.c: New test.

> ---
>  gcc/tree-ssa-loop-niter.c              | 14 +++++++++++++-
>  gcc/testsuite/gcc.dg/pr102087.c        | 25 +++++++++++++++++++++++++
>  gcc/testsuite/gcc.dg/vect/pr101145_3.c |  4 +++-
>  3 files changed, 41 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/pr102087.c
> 
> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> index 7af92d1c893..747f04d3ce0 100644
> --- a/gcc/tree-ssa-loop-niter.c
> +++ b/gcc/tree-ssa-loop-niter.c
> @@ -1482,7 +1482,7 @@ number_of_iterations_until_wrap (class loop *,
> tree type, affine_iv *iv0,
>  				 affine_iv *iv1, class tree_niter_desc *niter)
>  {
>    tree niter_type = unsigned_type_for (type);
> -  tree step, num, assumptions, may_be_zero;
> +  tree step, num, assumptions, may_be_zero, span;
>    wide_int high, low, max, min;
> 
>    may_be_zero = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, 
> iv0->base);
> @@ -1513,6 +1513,8 @@ number_of_iterations_until_wrap (class loop *,
> tree type, affine_iv *iv0,
>  	low = wi::to_wide (iv0->base);
>        else
>  	low = min;
> +
> +      niter->control = *iv1;
>      }
>    /* {base, -C} < n.  */
>    else if (tree_int_cst_sign_bit (iv0->step) && integer_zerop 
> (iv1->step))
> @@ -1533,6 +1535,8 @@ number_of_iterations_until_wrap (class loop *,
> tree type, affine_iv *iv0,
>  	high = wi::to_wide (iv1->base);
>        else
>  	high = max;
> +
> +      niter->control = *iv0;
>      }
>    else
>      return false;
> @@ -1556,6 +1560,14 @@ number_of_iterations_until_wrap (class loop *,
> tree type, affine_iv *iv0,
>  				      niter->assumptions, assumptions);
> 
>    niter->control.no_overflow = false;
> +  niter->control.base = fold_build2 (MINUS_EXPR, niter_type,
> +				     niter->control.base, niter->control.step);
> +  span = fold_build2 (MULT_EXPR, niter_type, niter->niter,
> +		      fold_convert (niter_type, niter->control.step));
> +  niter->bound = fold_build2 (PLUS_EXPR, niter_type, span,
> +			      fold_convert (niter_type, niter->control.base));
> +  niter->bound = fold_convert (type, niter->bound);
> +  niter->cmp = NE_EXPR;
> 
>    return true;
>  }
> diff --git a/gcc/testsuite/gcc.dg/pr102087.c 
> b/gcc/testsuite/gcc.dg/pr102087.c
> new file mode 100644
> index 00000000000..ef1f9f5cba9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr102087.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +unsigned __attribute__ ((noinline))
> +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{
> +  while (n < ++l)
> +    *a++ = *b++ + 1;
> +  return l;
> +}
> +
> +volatile int a[1];
> +unsigned b;
> +int c;
> +
> +int
> +check ()
> +{
> +  int d;
> +  for (; b > 1; b++)
> +    for (c = 0; c < 2; c++)
> +      for (d = 0; d < 2; d++)
> +	a[0];
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> index 99289afec0b..40cb0240aaa 100644
> --- a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> @@ -1,5 +1,6 @@
>  /* { dg-require-effective-target vect_int } */
>  /* { dg-options "-O3 -fdump-tree-vect-details" } */
> +
>  #define TYPE int *
>  #define MIN ((TYPE)0)
>  #define MAX ((TYPE)((long long)-1))
> @@ -10,4 +11,5 @@
> 
>  #include "pr101145.inc"
> 
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } 
> */
> +/* pointer size may not be vectorized, checking niter is ok. */
> +/* { dg-final { scan-tree-dump "Symbolic number of iterations is" 
> "vect" } } */
Richard Biener Aug. 30, 2021, 12:02 p.m. UTC | #2
On Mon, 30 Aug 2021, guojiufu wrote:

> On 2021-08-30 14:15, Jiufu Guo wrote:
> > Hi,
> > 
> > In patch r12-3136, niter->control, niter->bound and niter->cmp are
> > derived from number_of_iterations_lt.  While for 'until wrap condition',
> > the calculation in number_of_iterations_lt is not align the requirements
> > on the define of them and requirements in determine_exit_conditions.
> > 
> > This patch calculate niter->control, niter->bound and niter->cmp in
> > number_of_iterations_until_wrap.
> > 
> > The ICEs in the PR are pass with this patch.
> > Bootstrap and reg-tests pass on ppc64/ppc64le and x86.
> > Is this ok for trunk?
> > 
> > BR.
> > Jiufu Guo
> > 
> Add ChangeLog:
> gcc/ChangeLog:
> 
> 2021-08-30  Jiufu Guo  <guojiufu@linux.ibm.com>
> 
>         PR tree-optimization/102087
>         * tree-ssa-loop-niter.c (number_of_iterations_until_wrap):
>         Set bound/cmp/control for niter.
> 
> gcc/testsuite/ChangeLog:
> 
> 2021-08-30  Jiufu Guo  <guojiufu@linux.ibm.com>
> 
>         PR tree-optimization/102087
>         * gcc.dg/vect/pr101145_3.c: Update tests.
>         * gcc.dg/pr102087.c: New test.
> 
> > ---
> >  gcc/tree-ssa-loop-niter.c              | 14 +++++++++++++-
> >  gcc/testsuite/gcc.dg/pr102087.c        | 25 +++++++++++++++++++++++++
> >  gcc/testsuite/gcc.dg/vect/pr101145_3.c |  4 +++-
> >  3 files changed, 41 insertions(+), 2 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.dg/pr102087.c
> > 
> > diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> > index 7af92d1c893..747f04d3ce0 100644
> > --- a/gcc/tree-ssa-loop-niter.c
> > +++ b/gcc/tree-ssa-loop-niter.c
> > @@ -1482,7 +1482,7 @@ number_of_iterations_until_wrap (class loop *,
> > tree type, affine_iv *iv0,
> >  				 affine_iv *iv1, class tree_niter_desc *niter)
> >  {
> >    tree niter_type = unsigned_type_for (type);
> > -  tree step, num, assumptions, may_be_zero;
> > +  tree step, num, assumptions, may_be_zero, span;
> >    wide_int high, low, max, min;
> > 
> >    may_be_zero = fold_build2 (LE_EXPR, boolean_type_node, iv1->base,
> > iv0->base);
> > @@ -1513,6 +1513,8 @@ number_of_iterations_until_wrap (class loop *,
> > tree type, affine_iv *iv0,
> >   low = wi::to_wide (iv0->base);
> >         else
> > 	low = min;
> > +
> > +      niter->control = *iv1;
> >      }
> >    /* {base, -C} < n.  */
> >    else if (tree_int_cst_sign_bit (iv0->step) && integer_zerop 
> > (iv1->step))
> > @@ -1533,6 +1535,8 @@ number_of_iterations_until_wrap (class loop *,
> > tree type, affine_iv *iv0,
> >   high = wi::to_wide (iv1->base);
> >         else
> > 	high = max;
> > +
> > +      niter->control = *iv0;
> >      }
> >    else
> >      return false;

it looks like the above two should already be in effect from the
caller (guarding with integer_nozerop)?

> > @@ -1556,6 +1560,14 @@ number_of_iterations_until_wrap (class loop *,
> > tree type, affine_iv *iv0,
> >            niter->assumptions, assumptions);
> > 
> >    niter->control.no_overflow = false;
> > +  niter->control.base = fold_build2 (MINUS_EXPR, niter_type,
> > +				     niter->control.base,
> > niter->control.step);

how do we know IVn - STEP doesn't already wrap?  A comment might be
good to explain you're turning the simplified exit condition into

   { IVbase - STEP, +, STEP } != niter * STEP + (IVbase - STEP)

which, when mathematically looking at it makes me wonder why there's
the seemingly redundant '- STEP' term?  Also is NE_EXPR really
correct since STEP might be not 1?  Only for non equality compares
the '- STEP' should matter?

Richard.

> > +  span = fold_build2 (MULT_EXPR, niter_type, niter->niter,
> > +		      fold_convert (niter_type, niter->control.step));
> > +  niter->bound = fold_build2 (PLUS_EXPR, niter_type, span,
> > +			      fold_convert (niter_type, niter->control.base));
> > +  niter->bound = fold_convert (type, niter->bound);
> > +  niter->cmp = NE_EXPR;
> > 
> >    return true;
> > }
> > diff --git a/gcc/testsuite/gcc.dg/pr102087.c
> > b/gcc/testsuite/gcc.dg/pr102087.c
> > new file mode 100644
> > index 00000000000..ef1f9f5cba9
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/pr102087.c
> > @@ -0,0 +1,25 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3" } */
> > +
> > +unsigned __attribute__ ((noinline))
> > +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> > +{
> > +  while (n < ++l)
> > +    *a++ = *b++ + 1;
> > +  return l;
> > +}
> > +
> > +volatile int a[1];
> > +unsigned b;
> > +int c;
> > +
> > +int
> > +check ()
> > +{
> > +  int d;
> > +  for (; b > 1; b++)
> > +    for (c = 0; c < 2; c++)
> > +      for (d = 0; d < 2; d++)
> > +	a[0];
> > +  return 0;
> > +}
> > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> > b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> > index 99289afec0b..40cb0240aaa 100644
> > --- a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> > +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> > @@ -1,5 +1,6 @@
> >  /* { dg-require-effective-target vect_int } */
> >  /* { dg-options "-O3 -fdump-tree-vect-details" } */
> > +
> >  #define TYPE int *
> >  #define MIN ((TYPE)0)
> >  #define MAX ((TYPE)((long long)-1))
> > @@ -10,4 +11,5 @@
> > 
> >  #include "pr101145.inc"
> > 
> > -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
> > +/* pointer size may not be vectorized, checking niter is ok. */
> > +/* { dg-final { scan-tree-dump "Symbolic number of iterations is" "vect" }
> > } */
>
Jiufu Guo Aug. 31, 2021, 3:12 a.m. UTC | #3
On 2021-08-30 20:02, Richard Biener wrote:
> On Mon, 30 Aug 2021, guojiufu wrote:
> 
>> On 2021-08-30 14:15, Jiufu Guo wrote:
>> > Hi,
>> >
>> > In patch r12-3136, niter->control, niter->bound and niter->cmp are
>> > derived from number_of_iterations_lt.  While for 'until wrap condition',
>> > the calculation in number_of_iterations_lt is not align the requirements
>> > on the define of them and requirements in determine_exit_conditions.
>> >
>> > This patch calculate niter->control, niter->bound and niter->cmp in
>> > number_of_iterations_until_wrap.
>> >
>> > The ICEs in the PR are pass with this patch.
>> > Bootstrap and reg-tests pass on ppc64/ppc64le and x86.
>> > Is this ok for trunk?
>> >
>> > BR.
>> > Jiufu Guo
>> >
>> Add ChangeLog:
>> gcc/ChangeLog:
>> 
>> 2021-08-30  Jiufu Guo  <guojiufu@linux.ibm.com>
>> 
>>         PR tree-optimization/102087
>>         * tree-ssa-loop-niter.c (number_of_iterations_until_wrap):
>>         Set bound/cmp/control for niter.
>> 
>> gcc/testsuite/ChangeLog:
>> 
>> 2021-08-30  Jiufu Guo  <guojiufu@linux.ibm.com>
>> 
>>         PR tree-optimization/102087
>>         * gcc.dg/vect/pr101145_3.c: Update tests.
>>         * gcc.dg/pr102087.c: New test.
>> 
>> > ---
>> >  gcc/tree-ssa-loop-niter.c              | 14 +++++++++++++-
>> >  gcc/testsuite/gcc.dg/pr102087.c        | 25 +++++++++++++++++++++++++
>> >  gcc/testsuite/gcc.dg/vect/pr101145_3.c |  4 +++-
>> >  3 files changed, 41 insertions(+), 2 deletions(-)
>> >  create mode 100644 gcc/testsuite/gcc.dg/pr102087.c
>> >
>> > diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
>> > index 7af92d1c893..747f04d3ce0 100644
>> > --- a/gcc/tree-ssa-loop-niter.c
>> > +++ b/gcc/tree-ssa-loop-niter.c
>> > @@ -1482,7 +1482,7 @@ number_of_iterations_until_wrap (class loop *,
>> > tree type, affine_iv *iv0,
>> >  				 affine_iv *iv1, class tree_niter_desc *niter)
>> >  {
>> >    tree niter_type = unsigned_type_for (type);
>> > -  tree step, num, assumptions, may_be_zero;
>> > +  tree step, num, assumptions, may_be_zero, span;
>> >    wide_int high, low, max, min;
>> >
>> >    may_be_zero = fold_build2 (LE_EXPR, boolean_type_node, iv1->base,
>> > iv0->base);
>> > @@ -1513,6 +1513,8 @@ number_of_iterations_until_wrap (class loop *,
>> > tree type, affine_iv *iv0,
>> >   low = wi::to_wide (iv0->base);
>> >         else
>> > 	low = min;
>> > +
>> > +      niter->control = *iv1;
>> >      }
>> >    /* {base, -C} < n.  */
>> >    else if (tree_int_cst_sign_bit (iv0->step) && integer_zerop
>> > (iv1->step))
>> > @@ -1533,6 +1535,8 @@ number_of_iterations_until_wrap (class loop *,
>> > tree type, affine_iv *iv0,
>> >   high = wi::to_wide (iv1->base);
>> >         else
>> > 	high = max;
>> > +
>> > +      niter->control = *iv0;
>> >      }
>> >    else
>> >      return false;
> 
> it looks like the above two should already be in effect from the
> caller (guarding with integer_nozerop)?

I add them just because set these fields in one function.
Yes, they have been set in caller already,  I could remove them here.

> 
>> > @@ -1556,6 +1560,14 @@ number_of_iterations_until_wrap (class loop *,
>> > tree type, affine_iv *iv0,
>> >            niter->assumptions, assumptions);
>> >
>> >    niter->control.no_overflow = false;
>> > +  niter->control.base = fold_build2 (MINUS_EXPR, niter_type,
>> > +				     niter->control.base,
>> > niter->control.step);
> 
> how do we know IVn - STEP doesn't already wrap?

The last IV value is just cross the max/min value of the type
at the last iteration,  then IVn - STEP is the nearest value
to max(or min) and not wrap.

> A comment might be
> good to explain you're turning the simplified exit condition into
> 
>    { IVbase - STEP, +, STEP } != niter * STEP + (IVbase - STEP)
> 
> which, when mathematically looking at it makes me wonder why there's
> the seemingly redundant '- STEP' term?  Also is NE_EXPR really
> correct since STEP might be not 1?  Only for non equality compares
> the '- STEP' should matter?

I need to add comments for this.  This is a little tricky.
The last value of the original IV just cross max/min at most one STEP,
at there wrapping already happen.
Using "{IVbase, +, STEP} != niter * STEP + IVbase" is not wrong
in the aspect of exit condition.

But this would not work well with existing code:
like determine_exit_conditions, which will convert NE_EXP to
LT_EXPR/GT_EXPR.  And so, the '- STEP' is added to adjust the
IV.base and bound, with '- STEP' the bound will be the last value
just before wrap.

Thanks again for your review!

BR.
Jiufu

> 
> Richard.
> 
>> > +  span = fold_build2 (MULT_EXPR, niter_type, niter->niter,
>> > +		      fold_convert (niter_type, niter->control.step));
>> > +  niter->bound = fold_build2 (PLUS_EXPR, niter_type, span,
>> > +			      fold_convert (niter_type, niter->control.base));
>> > +  niter->bound = fold_convert (type, niter->bound);
>> > +  niter->cmp = NE_EXPR;
>> >
>> >    return true;
>> > }
>> > diff --git a/gcc/testsuite/gcc.dg/pr102087.c
>> > b/gcc/testsuite/gcc.dg/pr102087.c
>> > new file mode 100644
>> > index 00000000000..ef1f9f5cba9
>> > --- /dev/null
>> > +++ b/gcc/testsuite/gcc.dg/pr102087.c
>> > @@ -0,0 +1,25 @@
>> > +/* { dg-do compile } */
>> > +/* { dg-options "-O3" } */
>> > +
>> > +unsigned __attribute__ ((noinline))
>> > +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
>> > +{
>> > +  while (n < ++l)
>> > +    *a++ = *b++ + 1;
>> > +  return l;
>> > +}
>> > +
>> > +volatile int a[1];
>> > +unsigned b;
>> > +int c;
>> > +
>> > +int
>> > +check ()
>> > +{
>> > +  int d;
>> > +  for (; b > 1; b++)
>> > +    for (c = 0; c < 2; c++)
>> > +      for (d = 0; d < 2; d++)
>> > +	a[0];
>> > +  return 0;
>> > +}
>> > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> > b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> > index 99289afec0b..40cb0240aaa 100644
>> > --- a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> > +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> > @@ -1,5 +1,6 @@
>> >  /* { dg-require-effective-target vect_int } */
>> >  /* { dg-options "-O3 -fdump-tree-vect-details" } */
>> > +
>> >  #define TYPE int *
>> >  #define MIN ((TYPE)0)
>> >  #define MAX ((TYPE)((long long)-1))
>> > @@ -10,4 +11,5 @@
>> >
>> >  #include "pr101145.inc"
>> >
>> > -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
>> > +/* pointer size may not be vectorized, checking niter is ok. */
>> > +/* { dg-final { scan-tree-dump "Symbolic number of iterations is" "vect" }
>> > } */
>>
Richard Biener Aug. 31, 2021, 1:37 p.m. UTC | #4
On Tue, 31 Aug 2021, guojiufu wrote:

> On 2021-08-30 20:02, Richard Biener wrote:
> > On Mon, 30 Aug 2021, guojiufu wrote:
> > 
> >> On 2021-08-30 14:15, Jiufu Guo wrote:
> >> > Hi,
> >> >
> >> > In patch r12-3136, niter->control, niter->bound and niter->cmp are
> >> > derived from number_of_iterations_lt.  While for 'until wrap condition',
> >> > the calculation in number_of_iterations_lt is not align the requirements
> >> > on the define of them and requirements in determine_exit_conditions.
> >> >
> >> > This patch calculate niter->control, niter->bound and niter->cmp in
> >> > number_of_iterations_until_wrap.
> >> >
> >> > The ICEs in the PR are pass with this patch.
> >> > Bootstrap and reg-tests pass on ppc64/ppc64le and x86.
> >> > Is this ok for trunk?
> >> >
> >> > BR.
> >> > Jiufu Guo
> >> >
> >> Add ChangeLog:
> >> gcc/ChangeLog:
> >> 
> >> 2021-08-30  Jiufu Guo  <guojiufu@linux.ibm.com>
> >> 
> >>         PR tree-optimization/102087
> >>         * tree-ssa-loop-niter.c (number_of_iterations_until_wrap):
> >>         Set bound/cmp/control for niter.
> >> 
> >> gcc/testsuite/ChangeLog:
> >> 
> >> 2021-08-30  Jiufu Guo  <guojiufu@linux.ibm.com>
> >> 
> >>         PR tree-optimization/102087
> >>         * gcc.dg/vect/pr101145_3.c: Update tests.
> >>         * gcc.dg/pr102087.c: New test.
> >> 
> >> > ---
> >> >  gcc/tree-ssa-loop-niter.c              | 14 +++++++++++++-
> >> >  gcc/testsuite/gcc.dg/pr102087.c        | 25 +++++++++++++++++++++++++
> >> >  gcc/testsuite/gcc.dg/vect/pr101145_3.c |  4 +++-
> >> >  3 files changed, 41 insertions(+), 2 deletions(-)
> >> >  create mode 100644 gcc/testsuite/gcc.dg/pr102087.c
> >> >
> >> > diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> >> > index 7af92d1c893..747f04d3ce0 100644
> >> > --- a/gcc/tree-ssa-loop-niter.c
> >> > +++ b/gcc/tree-ssa-loop-niter.c
> >> > @@ -1482,7 +1482,7 @@ number_of_iterations_until_wrap (class loop *,
> >> > tree type, affine_iv *iv0,
> >> >  				 affine_iv *iv1, class tree_niter_desc *niter)
> >> >  {
> >> >    tree niter_type = unsigned_type_for (type);
> >> > -  tree step, num, assumptions, may_be_zero;
> >> > +  tree step, num, assumptions, may_be_zero, span;
> >> >    wide_int high, low, max, min;
> >> >
> >> >    may_be_zero = fold_build2 (LE_EXPR, boolean_type_node, iv1->base,
> >> > iv0->base);
> >> > @@ -1513,6 +1513,8 @@ number_of_iterations_until_wrap (class loop *,
> >> > tree type, affine_iv *iv0,
> >> >   low = wi::to_wide (iv0->base);
> >> >          else
> >> > 	low = min;
> >> > +
> >> > +      niter->control = *iv1;
> >> >      }
> >> >    /* {base, -C} < n.  */
> >> >    else if (tree_int_cst_sign_bit (iv0->step) && integer_zerop
> >> > (iv1->step))
> >> > @@ -1533,6 +1535,8 @@ number_of_iterations_until_wrap (class loop *,
> >> > tree type, affine_iv *iv0,
> >> >   high = wi::to_wide (iv1->base);
> >> >          else
> >> > 	high = max;
> >> > +
> >> > +      niter->control = *iv0;
> >> >      }
> >> >    else
> >> >      return false;
> > 
> > it looks like the above two should already be in effect from the
> > caller (guarding with integer_nozerop)?
> 
> I add them just because set these fields in one function.
> Yes, they have been set in caller already,  I could remove them here.
> 
> > 
> >> > @@ -1556,6 +1560,14 @@ number_of_iterations_until_wrap (class loop *,
> >> > tree type, affine_iv *iv0,
> >> >            niter->assumptions, assumptions);
> >> >
> >> >    niter->control.no_overflow = false;
> >> > +  niter->control.base = fold_build2 (MINUS_EXPR, niter_type,
> >> > +				     niter->control.base,
> >> > niter->control.step);
> > 
> > how do we know IVn - STEP doesn't already wrap?
> 
> The last IV value is just cross the max/min value of the type
> at the last iteration,  then IVn - STEP is the nearest value
> to max(or min) and not wrap.
> 
> > A comment might be
> > good to explain you're turning the simplified exit condition into
> > 
> >    { IVbase - STEP, +, STEP } != niter * STEP + (IVbase - STEP)
> > 
> > which, when mathematically looking at it makes me wonder why there's
> > the seemingly redundant '- STEP' term?  Also is NE_EXPR really
> > correct since STEP might be not 1?  Only for non equality compares
> > the '- STEP' should matter?
> 
> I need to add comments for this.  This is a little tricky.
> The last value of the original IV just cross max/min at most one STEP,
> at there wrapping already happen.
> Using "{IVbase, +, STEP} != niter * STEP + IVbase" is not wrong
> in the aspect of exit condition.
> 
> But this would not work well with existing code:
> like determine_exit_conditions, which will convert NE_EXP to
> LT_EXPR/GT_EXPR.  And so, the '- STEP' is added to adjust the
> IV.base and bound, with '- STEP' the bound will be the last value
> just before wrap.

Hmm.  The control IV is documented as

  /* The simplified shape of the exit condition.  The loop exits if
     CONTROL CMP BOUND is false, where CMP is one of NE_EXPR,
     LT_EXPR, or GT_EXPR, and step of CONTROL is positive if CMP is
     LE_EXPR and negative if CMP is GE_EXPR.  This information is used
     by loop unrolling.  */
  affine_iv control;

but determine_exit_conditions seems to assume the IV does not wrap?
In fact determine_exit_conditions seems to just build ->base CMP bound
where bound is the IV bound biased by #unroll * step - step.  So how
does biasing by step * 1 help?

Does the control IV wrap in our case?

Richard.

> Thanks again for your review!
> 
> BR.
> Jiufu
> 
> > 
> > Richard.
> > 
> >> > +  span = fold_build2 (MULT_EXPR, niter_type, niter->niter,
> >> > +		      fold_convert (niter_type, niter->control.step));
> >> > +  niter->bound = fold_build2 (PLUS_EXPR, niter_type, span,
> >> > +			      fold_convert (niter_type, niter->control.base));
> >> > +  niter->bound = fold_convert (type, niter->bound);
> >> > +  niter->cmp = NE_EXPR;
> >> >
> >> >    return true;
> >> > }
> >> > diff --git a/gcc/testsuite/gcc.dg/pr102087.c
> >> > b/gcc/testsuite/gcc.dg/pr102087.c
> >> > new file mode 100644
> >> > index 00000000000..ef1f9f5cba9
> >> > --- /dev/null
> >> > +++ b/gcc/testsuite/gcc.dg/pr102087.c
> >> > @@ -0,0 +1,25 @@
> >> > +/* { dg-do compile } */
> >> > +/* { dg-options "-O3" } */
> >> > +
> >> > +unsigned __attribute__ ((noinline))
> >> > +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> >> > +{
> >> > +  while (n < ++l)
> >> > +    *a++ = *b++ + 1;
> >> > +  return l;
> >> > +}
> >> > +
> >> > +volatile int a[1];
> >> > +unsigned b;
> >> > +int c;
> >> > +
> >> > +int
> >> > +check ()
> >> > +{
> >> > +  int d;
> >> > +  for (; b > 1; b++)
> >> > +    for (c = 0; c < 2; c++)
> >> > +      for (d = 0; d < 2; d++)
> >> > +	a[0];
> >> > +  return 0;
> >> > +}
> >> > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> >> > b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> >> > index 99289afec0b..40cb0240aaa 100644
> >> > --- a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> >> > +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> >> > @@ -1,5 +1,6 @@
> >> >  /* { dg-require-effective-target vect_int } */
> >> >  /* { dg-options "-O3 -fdump-tree-vect-details" } */
> >> > +
> >> >  #define TYPE int *
> >> >  #define MIN ((TYPE)0)
> >> >  #define MAX ((TYPE)((long long)-1))
> >> > @@ -10,4 +11,5 @@
> >> >
> >> >  #include "pr101145.inc"
> >> >
> >> > -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } }
> >> > */
> >> > +/* pointer size may not be vectorized, checking niter is ok. */
> >> > +/* { dg-final { scan-tree-dump "Symbolic number of iterations is" "vect"
> >> > }
> >> > } */
> >> 
> 
>
Jiufu Guo Sept. 1, 2021, 3:30 a.m. UTC | #5
Richard Biener <rguenther@suse.de> writes:

> On Tue, 31 Aug 2021, guojiufu wrote:
>
>> On 2021-08-30 20:02, Richard Biener wrote:
>> > On Mon, 30 Aug 2021, guojiufu wrote:
>> > 
>> >> On 2021-08-30 14:15, Jiufu Guo wrote:
>> >> > Hi,
>> >> >
>> >> > In patch r12-3136, niter->control, niter->bound and 
>> >> > niter->cmp are
>> >> > derived from number_of_iterations_lt.  While for 'until 
>> >> > wrap condition',
>> >> > the calculation in number_of_iterations_lt is not align 
>> >> > the requirements
>> >> > on the define of them and requirements in 
>> >> > determine_exit_conditions.
>> >> >
>> >> > This patch calculate niter->control, niter->bound and 
>> >> > niter->cmp in
>> >> > number_of_iterations_until_wrap.
>> >> >
>> >> > The ICEs in the PR are pass with this patch.
>> >> > Bootstrap and reg-tests pass on ppc64/ppc64le and x86.
>> >> > Is this ok for trunk?
>> >> >
>> >> > BR.
>> >> > Jiufu Guo
>> >> >
>> >> Add ChangeLog:
>> >> >  create mode 100644 gcc/testsuite/gcc.dg/pr102087.c
>> >> >
>> >> > diff --git a/gcc/tree-ssa-loop-niter.c 
>> >> > b/gcc/tree-ssa-loop-niter.c
>> >> > index 7af92d1c893..747f04d3ce0 100644
>> >> > --- a/gcc/tree-ssa-loop-niter.c
>> >> > +++ b/gcc/tree-ssa-loop-niter.c
>> >> > @@ -1482,7 +1482,7 @@ number_of_iterations_until_wrap 
>> >> > (class loop *,
>> >> > tree type, affine_iv *iv0,
>> >> >  				 affine_iv *iv1, class 
>> >> >  tree_niter_desc *niter)
>> >> >  {
>> >> >    tree niter_type = unsigned_type_for (type);
>> >> > -  tree step, num, assumptions, may_be_zero;
>> >> > +  tree step, num, assumptions, may_be_zero, span;
>> >> >    wide_int high, low, max, min;
>> >> >
>> >> >    may_be_zero = fold_build2 (LE_EXPR, boolean_type_node, 
>> >> >    iv1->base,
>> >> > iv0->base);
>> >> > @@ -1513,6 +1513,8 @@ number_of_iterations_until_wrap 
>> >> > (class loop *,
>> >> > tree type, affine_iv *iv0,
>> >> >   low = wi::to_wide (iv0->base);
>> >> >          else
>> >> > 	low = min;
>> >> > +
>> >> > +      niter->control = *iv1;
>> >> >      }
>> >> >    /* {base, -C} < n.  */
>> >> >    else if (tree_int_cst_sign_bit (iv0->step) && 
>> >> >    integer_zerop
>> >> > (iv1->step))
>> >> > @@ -1533,6 +1535,8 @@ number_of_iterations_until_wrap 
>> >> > (class loop *,
>> >> > tree type, affine_iv *iv0,
>> >> >   high = wi::to_wide (iv1->base);
>> >> >          else
>> >> > 	high = max;
>> >> > +
>> >> > +      niter->control = *iv0;
>> >> >      }
>> >> >    else
>> >> >      return false;
>> > 
>> > it looks like the above two should already be in effect from 
>> > the
>> > caller (guarding with integer_nozerop)?
>> 
>> I add them just because set these fields in one function.
>> Yes, they have been set in caller already,  I could remove them 
>> here.
>> 
>> > 
>> >> > @@ -1556,6 +1560,14 @@ number_of_iterations_until_wrap 
>> >> > (class loop *,
>> >> > tree type, affine_iv *iv0,
>> >> >            niter->assumptions, assumptions);
>> >> >
>> >> >    niter->control.no_overflow = false;
>> >> > +  niter->control.base = fold_build2 (MINUS_EXPR, 
>> >> > niter_type,
>> >> > +				     niter->control.base,
>> >> > niter->control.step);
>> > 
>> > how do we know IVn - STEP doesn't already wrap?
>> 
>> The last IV value is just cross the max/min value of the type
>> at the last iteration,  then IVn - STEP is the nearest value
>> to max(or min) and not wrap.
>> 
>> > A comment might be
>> > good to explain you're turning the simplified exit condition 
>> > into
>> > 
>> >    { IVbase - STEP, +, STEP } != niter * STEP + (IVbase - 
>> >    STEP)
>> > 
>> > which, when mathematically looking at it makes me wonder why 
>> > there's
>> > the seemingly redundant '- STEP' term?  Also is NE_EXPR 
>> > really
>> > correct since STEP might be not 1?  Only for non equality 
>> > compares
>> > the '- STEP' should matter?
>> 
>> I need to add comments for this.  This is a little tricky.
>> The last value of the original IV just cross max/min at most 
>> one STEP,
>> at there wrapping already happen.
>> Using "{IVbase, +, STEP} != niter * STEP + IVbase" is not wrong
>> in the aspect of exit condition.
>> 
>> But this would not work well with existing code:
>> like determine_exit_conditions, which will convert NE_EXP to
>> LT_EXPR/GT_EXPR.  And so, the '- STEP' is added to adjust the
>> IV.base and bound, with '- STEP' the bound will be the last 
>> value
>> just before wrap.
>
> Hmm.  The control IV is documented as
>
>   /* The simplified shape of the exit condition.  The loop exits 
>   if
>      CONTROL CMP BOUND is false, where CMP is one of NE_EXPR,
>      LT_EXPR, or GT_EXPR, and step of CONTROL is positive if CMP 
>      is
>      LE_EXPR and negative if CMP is GE_EXPR.  This information 
>      is used
>      by loop unrolling.  */
>   affine_iv control;
>
> but determine_exit_conditions seems to assume the IV does not 
> wrap?

Strictly speaking , I would say yes,  determine_exit_conditions 
assume
IV does not wrap: there is code:

  if (cmp == LT_EXPR)
    assum = fold_build2 (GE_EXPR, boolean_type_node,
			 bound,
			 fold_build2 (PLUS_EXPR, type, min, 
			 delta));
  else
     xxxx
     
This means if 'bound' is the value after wrap, the 'assum' with be 
false.
This is also the reason that we may need to biase 'bound' and 
'base' by
'step * 1'.  Because, in our case like "while(n<l++)",
if we set 'bound' as 'iv.base + niter * step', the value of 
'bound' will
be '0(zero)' which crosses max of the type one STEP.
So, we may transform the exit condition to
"{IVbase - STEP, +, STEP } != niter * STEP + (IVbase - STEP)"
And then new control IV and new bound does not wrap.

> In fact determine_exit_conditions seems to just build ->base CMP 
> bound
> where bound is the IV bound biased by #unroll * step - step.  So 
> how
> does biasing by step * 1 help?

determine_exit_conditions adjust bound by #unroll * step - step 
for more
check, like transform to "base cmp new-bound"; for this checking, 
'bound'
is ok with wrapped value, but for the 'assum' as above (assum =
bound >= min + #unroll * step - step), biasing "step * 1" would be 
fine.

>
> Does the control IV wrap in our case?

I think, this is a key question, which may affect if it make sense 
for
above transform.  Thanks!
Let me explain my understanding: for original exit condition (like 
n<l++),
the final value of the IV(i), it is already cross max/min value of 
the type.
Or say, for the value of IV(i) "after exit loop", wraps if 
strictly
speaking.
While, for all the values of IV(i) inside the loop, they are in 
valid
range of the type;  we may treat it as 'no wrap' for some usage 
like unroll.

Thanks for review and future comments and sugguestions!

BR,
Jiufu

>
> Richard.
>
>> Thanks again for your review!
>> 
>> BR.
>> Jiufu
>> 
>> > 
>> > Richard.
>> > 
>> >> > +  span = fold_build2 (MULT_EXPR, niter_type, 
>> >> > niter->niter,
>> >> > +		      fold_convert (niter_type, 
>> >> > niter->control.step));
>> >> > +  niter->bound = fold_build2 (PLUS_EXPR, niter_type, 
>> >> > span,
>> >> > +			      fold_convert (niter_type, 
>> >> > niter->control.base));
>> >> > +  niter->bound = fold_convert (type, niter->bound);
>> >> > +  niter->cmp = NE_EXPR;
>> >> >
>> >> >    return true;
>> >> > }
>> >> > diff --git a/gcc/testsuite/gcc.dg/pr102087.c
>> >> > b/gcc/testsuite/gcc.dg/pr102087.c
>> >> > new file mode 100644
>> >> > index 00000000000..ef1f9f5cba9
>> >> > --- /dev/null
>> >> > +++ b/gcc/testsuite/gcc.dg/pr102087.c
>> >> > @@ -0,0 +1,25 @@
>> >> > +/* { dg-do compile } */
>> >> > +/* { dg-options "-O3" } */
>> >> > +
>> >> > +unsigned __attribute__ ((noinline))
>> >> > +foo (int *__restrict__ a, int *__restrict__ b, unsigned 
>> >> > l, unsigned n)
>> >> > +{
>> >> > +  while (n < ++l)
>> >> > +    *a++ = *b++ + 1;
>> >> > +  return l;
>> >> > +}
>> >> > +
>> >> > +volatile int a[1];
>> >> > +unsigned b;
>> >> > +int c;
>> >> > +
>> >> > +int
>> >> > +check ()
>> >> > +{
>> >> > +  int d;
>> >> > +  for (; b > 1; b++)
>> >> > +    for (c = 0; c < 2; c++)
>> >> > +      for (d = 0; d < 2; d++)
>> >> > +	a[0];
>> >> > +  return 0;
>> >> > +}
>> >> > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> >> > b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> >> > index 99289afec0b..40cb0240aaa 100644
>> >> > --- a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> >> > +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> >> > @@ -1,5 +1,6 @@
>> >> >  /* { dg-require-effective-target vect_int } */
>> >> >  /* { dg-options "-O3 -fdump-tree-vect-details" } */
>> >> > +
>> >> >  #define TYPE int *
>> >> >  #define MIN ((TYPE)0)
>> >> >  #define MAX ((TYPE)((long long)-1))
>> >> > @@ -10,4 +11,5 @@
>> >> >
>> >> >  #include "pr101145.inc"
>> >> >
>> >> > -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 
>> >> > 2 "vect" } }
>> >> > */
>> >> > +/* pointer size may not be vectorized, checking niter is 
>> >> > ok. */
>> >> > +/* { dg-final { scan-tree-dump "Symbolic number of 
>> >> > iterations is" "vect"
>> >> > }
>> >> > } */
>> >> 
>> 
>>
Jiufu Guo Sept. 1, 2021, 5:22 a.m. UTC | #6
在 2021/9/1 上午11:30, Jiufu Guo via Gcc-patches 写道:
> Richard Biener <rguenther@suse.de> writes:
>
>> On Tue, 31 Aug 2021, guojiufu wrote:
>>
>>> On 2021-08-30 20:02, Richard Biener wrote:
>>> > On Mon, 30 Aug 2021, guojiufu wrote:
>>> > >> On 2021-08-30 14:15, Jiufu Guo wrote:
>>> >> > Hi,
>>> >> >
>>> >> > In patch r12-3136, niter->control, niter->bound and >> > 
>>> niter->cmp are
>>> >> > derived from number_of_iterations_lt.  While for 'until >> > 
>>> wrap condition',
>>> >> > the calculation in number_of_iterations_lt is not align >> > 
>>> the requirements
>>> >> > on the define of them and requirements in >> > 
>>> determine_exit_conditions.
>>> >> >
>>> >> > This patch calculate niter->control, niter->bound and >> > 
>>> niter->cmp in
>>> >> > number_of_iterations_until_wrap.
>>> >> >
>>> >> > The ICEs in the PR are pass with this patch.
>>> >> > Bootstrap and reg-tests pass on ppc64/ppc64le and x86.
>>> >> > Is this ok for trunk?
>>> >> >
>>> >> > BR.
>>> >> > Jiufu Guo
>>> >> >
>>> >> Add ChangeLog:
>>> >> >  create mode 100644 gcc/testsuite/gcc.dg/pr102087.c
>>> >> >
>>> >> > diff --git a/gcc/tree-ssa-loop-niter.c >> > 
>>> b/gcc/tree-ssa-loop-niter.c
>>> >> > index 7af92d1c893..747f04d3ce0 100644
>>> >> > --- a/gcc/tree-ssa-loop-niter.c
>>> >> > +++ b/gcc/tree-ssa-loop-niter.c
>>> >> > @@ -1482,7 +1482,7 @@ number_of_iterations_until_wrap >> > 
>>> (class loop *,
>>> >> > tree type, affine_iv *iv0,
>>> >> >                   affine_iv *iv1, class >> >  tree_niter_desc 
>>> *niter)
>>> >> >  {
>>> >> >    tree niter_type = unsigned_type_for (type);
>>> >> > -  tree step, num, assumptions, may_be_zero;
>>> >> > +  tree step, num, assumptions, may_be_zero, span;
>>> >> >    wide_int high, low, max, min;
>>> >> >
>>> >> >    may_be_zero = fold_build2 (LE_EXPR, boolean_type_node, >> 
>>> >    iv1->base,
>>> >> > iv0->base);
>>> >> > @@ -1513,6 +1513,8 @@ number_of_iterations_until_wrap >> > 
>>> (class loop *,
>>> >> > tree type, affine_iv *iv0,
>>> >> >   low = wi::to_wide (iv0->base);
>>> >> >          else
>>> >> >     low = min;
>>> >> > +
>>> >> > +      niter->control = *iv1;
>>> >> >      }
>>> >> >    /* {base, -C} < n.  */
>>> >> >    else if (tree_int_cst_sign_bit (iv0->step) && >> >    
>>> integer_zerop
>>> >> > (iv1->step))
>>> >> > @@ -1533,6 +1535,8 @@ number_of_iterations_until_wrap >> > 
>>> (class loop *,
>>> >> > tree type, affine_iv *iv0,
>>> >> >   high = wi::to_wide (iv1->base);
>>> >> >          else
>>> >> >     high = max;
>>> >> > +
>>> >> > +      niter->control = *iv0;
>>> >> >      }
>>> >> >    else
>>> >> >      return false;
>>> > > it looks like the above two should already be in effect from > the
>>> > caller (guarding with integer_nozerop)?
>>>
>>> I add them just because set these fields in one function.
>>> Yes, they have been set in caller already,  I could remove them here.
>>>
>>> > >> > @@ -1556,6 +1560,14 @@ number_of_iterations_until_wrap >> > 
>>> (class loop *,
>>> >> > tree type, affine_iv *iv0,
>>> >> >            niter->assumptions, assumptions);
>>> >> >
>>> >> >    niter->control.no_overflow = false;
>>> >> > +  niter->control.base = fold_build2 (MINUS_EXPR, >> > niter_type,
>>> >> > +                     niter->control.base,
>>> >> > niter->control.step);
>>> > > how do we know IVn - STEP doesn't already wrap?
>>>
>>> The last IV value is just cross the max/min value of the type
>>> at the last iteration,  then IVn - STEP is the nearest value
>>> to max(or min) and not wrap.
>>>
>>> > A comment might be
>>> > good to explain you're turning the simplified exit condition > into
>>> > >    { IVbase - STEP, +, STEP } != niter * STEP + (IVbase - >    
>>> STEP)
>>> > > which, when mathematically looking at it makes me wonder why > 
>>> there's
>>> > the seemingly redundant '- STEP' term?  Also is NE_EXPR > really
>>> > correct since STEP might be not 1?  Only for non equality > compares
>>> > the '- STEP' should matter?
>>>
>>> I need to add comments for this.  This is a little tricky.
>>> The last value of the original IV just cross max/min at most one STEP,
>>> at there wrapping already happen.
>>> Using "{IVbase, +, STEP} != niter * STEP + IVbase" is not wrong
>>> in the aspect of exit condition.
>>>
>>> But this would not work well with existing code:
>>> like determine_exit_conditions, which will convert NE_EXP to
>>> LT_EXPR/GT_EXPR.  And so, the '- STEP' is added to adjust the
>>> IV.base and bound, with '- STEP' the bound will be the last value
>>> just before wrap.
>>
>> Hmm.  The control IV is documented as
>>
>>   /* The simplified shape of the exit condition.  The loop exits   if
>>      CONTROL CMP BOUND is false, where CMP is one of NE_EXPR,
>>      LT_EXPR, or GT_EXPR, and step of CONTROL is positive if CMP      is
>>      LE_EXPR and negative if CMP is GE_EXPR.  This information      
>> is used
>>      by loop unrolling.  */
>>   affine_iv control;
>>
>> but determine_exit_conditions seems to assume the IV does not wrap?
>
> Strictly speaking , I would say yes,  determine_exit_conditions assume
> IV does not wrap: there is code:
>
>  if (cmp == LT_EXPR)
>    assum = fold_build2 (GE_EXPR, boolean_type_node,
>              bound,
>              fold_build2 (PLUS_EXPR, type, min, delta));
>  else
>     xxxx
>     This means if 'bound' is the value after wrap, the 'assum' with be 
> false.
> This is also the reason that we may need to biase 'bound' and 'base' by
> 'step * 1'.  Because, in our case like "while(n<l++)",
> if we set 'bound' as 'iv.base + niter * step', the value of 'bound' will
> be '0(zero)' which crosses max of the type one STEP.
> So, we may transform the exit condition to
> "{IVbase - STEP, +, STEP } != niter * STEP + (IVbase - STEP)"
> And then new control IV and new bound does not wrap.
>
>> In fact determine_exit_conditions seems to just build ->base CMP bound
>> where bound is the IV bound biased by #unroll * step - step.  So how
>> does biasing by step * 1 help?
>
> determine_exit_conditions adjust bound by #unroll * step - step for more
> check, like transform to "base cmp new-bound"; for this checking, 'bound'
> is ok with wrapped value, but for the 'assum' as above (assum =
> bound >= min + #unroll * step - step), biasing "step * 1" would be fine.
>
>>
>> Does the control IV wrap in our case?
>
> I think, this is a key question, which may affect if it make sense for
> above transform.  Thanks!
> Let me explain my understanding: for original exit condition (like 
> n<l++),
> the final value of the IV(i), it is already cross max/min value of the 
> type.
> Or say, for the value of IV(i) "after exit loop", wraps if strictly
> speaking.
> While, for all the values of IV(i) inside the loop, they are in valid
> range of the type;  we may treat it as 'no wrap' for some usage like 
> unroll.

Correction: for all the "other" values of IV(i) inside the loop are not 
wrap.

But the issue is the 'final' value of IV(i) may be used in the last 
iteration.

So, I'm also wondering if we may need to keep it as:

"{IVbase, +, STEP } != niter * STEP + IVbase.

>
> Thanks for review and future comments and suggestions!

BTW, I notice the lines seems not well wrap-line, sorry.

>
> BR,
> Jiufu
>
>>
>> Richard.
>>
>>> Thanks again for your review!
>>>
>>> BR.
>>> Jiufu
>>>
>>> > > Richard.
>>> > >> > +  span = fold_build2 (MULT_EXPR, niter_type, >> > niter->niter,
>>> >> > +              fold_convert (niter_type, >> > 
>>> niter->control.step));
>>> >> > +  niter->bound = fold_build2 (PLUS_EXPR, niter_type, >> > span,
>>> >> > +                  fold_convert (niter_type, >> > 
>>> niter->control.base));
>>> >> > +  niter->bound = fold_convert (type, niter->bound);
>>> >> > +  niter->cmp = NE_EXPR;
>>> >> >
>>> >> >    return true;
>>> >> > }
>>> >> > diff --git a/gcc/testsuite/gcc.dg/pr102087.c
>>> >> > b/gcc/testsuite/gcc.dg/pr102087.c
>>> >> > new file mode 100644
>>> >> > index 00000000000..ef1f9f5cba9
>>> >> > --- /dev/null
>>> >> > +++ b/gcc/testsuite/gcc.dg/pr102087.c
>>> >> > @@ -0,0 +1,25 @@
>>> >> > +/* { dg-do compile } */
>>> >> > +/* { dg-options "-O3" } */
>>> >> > +
>>> >> > +unsigned __attribute__ ((noinline))
>>> >> > +foo (int *__restrict__ a, int *__restrict__ b, unsigned >> > 
>>> l, unsigned n)
>>> >> > +{
>>> >> > +  while (n < ++l)
>>> >> > +    *a++ = *b++ + 1;
>>> >> > +  return l;
>>> >> > +}
>>> >> > +
>>> >> > +volatile int a[1];
>>> >> > +unsigned b;
>>> >> > +int c;
>>> >> > +
>>> >> > +int
>>> >> > +check ()
>>> >> > +{
>>> >> > +  int d;
>>> >> > +  for (; b > 1; b++)
>>> >> > +    for (c = 0; c < 2; c++)
>>> >> > +      for (d = 0; d < 2; d++)
>>> >> > +    a[0];
>>> >> > +  return 0;
>>> >> > +}
>>> >> > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>>> >> > b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>>> >> > index 99289afec0b..40cb0240aaa 100644
>>> >> > --- a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>>> >> > +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>>> >> > @@ -1,5 +1,6 @@
>>> >> >  /* { dg-require-effective-target vect_int } */
>>> >> >  /* { dg-options "-O3 -fdump-tree-vect-details" } */
>>> >> > +
>>> >> >  #define TYPE int *
>>> >> >  #define MIN ((TYPE)0)
>>> >> >  #define MAX ((TYPE)((long long)-1))
>>> >> > @@ -10,4 +11,5 @@
>>> >> >
>>> >> >  #include "pr101145.inc"
>>> >> >
>>> >> > -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" >> > 
>>> 2 "vect" } }
>>> >> > */
>>> >> > +/* pointer size may not be vectorized, checking niter is >> > 
>>> ok. */
>>> >> > +/* { dg-final { scan-tree-dump "Symbolic number of >> > 
>>> iterations is" "vect"
>>> >> > }
>>> >> > } */
>>> >>
>>>
Jiufu Guo Sept. 2, 2021, 9:13 a.m. UTC | #7
Richard Biener <rguenther@suse.de> writes:

> On Tue, 31 Aug 2021, guojiufu wrote:
>
>> On 2021-08-30 20:02, Richard Biener wrote:
>> > On Mon, 30 Aug 2021, guojiufu wrote:
>> > 
>> >> On 2021-08-30 14:15, Jiufu Guo wrote:
>> >> > Hi,
>> >> >
>> >> > In patch r12-3136, niter->control, niter->bound and niter->cmp are
>> >> > derived from number_of_iterations_lt.  While for 'until wrap condition',
>> >> > the calculation in number_of_iterations_lt is not align the requirements
>> >> > on the define of them and requirements in determine_exit_conditions.
>> >> >
>> >> > This patch calculate niter->control, niter->bound and niter->cmp in
>> >> > number_of_iterations_until_wrap.
>> >> >
>> >> > The ICEs in the PR are pass with this patch.
>> >> > Bootstrap and reg-tests pass on ppc64/ppc64le and x86.
>> >> > Is this ok for trunk?
>> >> >
>> >> > BR.
>> >> > Jiufu Guo
>> >> >
>> >> Add ChangeLog:
>> >> gcc/ChangeLog:
>> >> 
>> >> 2021-08-30  Jiufu Guo  <guojiufu@linux.ibm.com>
>> >> 
>> >>         PR tree-optimization/102087
>> >>         * tree-ssa-loop-niter.c (number_of_iterations_until_wrap):
>> >>         Set bound/cmp/control for niter.
>> >> 
>> >> gcc/testsuite/ChangeLog:
>> >> 
>> >> 2021-08-30  Jiufu Guo  <guojiufu@linux.ibm.com>
>> >> 
>> >>         PR tree-optimization/102087
>> >>         * gcc.dg/vect/pr101145_3.c: Update tests.
>> >>         * gcc.dg/pr102087.c: New test.
>> >> 
>> >> > ---
>> >> >  gcc/tree-ssa-loop-niter.c              | 14 +++++++++++++-
>> >> >  gcc/testsuite/gcc.dg/pr102087.c        | 25 +++++++++++++++++++++++++
>> >> >  gcc/testsuite/gcc.dg/vect/pr101145_3.c |  4 +++-
>> >> >  3 files changed, 41 insertions(+), 2 deletions(-)
>> >> >  create mode 100644 gcc/testsuite/gcc.dg/pr102087.c
>> >> >
>> >> > diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
>> >> > index 7af92d1c893..747f04d3ce0 100644
>> >> > --- a/gcc/tree-ssa-loop-niter.c
>> >> > +++ b/gcc/tree-ssa-loop-niter.c
>> >> > @@ -1482,7 +1482,7 @@ number_of_iterations_until_wrap (class loop *,
>> >> > tree type, affine_iv *iv0,
>> >> >  				 affine_iv *iv1, class tree_niter_desc *niter)
>> >> >  {
>> >> >    tree niter_type = unsigned_type_for (type);
>> >> > -  tree step, num, assumptions, may_be_zero;
>> >> > +  tree step, num, assumptions, may_be_zero, span;
>> >> >    wide_int high, low, max, min;
>> >> >
>> >> >    may_be_zero = fold_build2 (LE_EXPR, boolean_type_node, iv1->base,
>> >> > iv0->base);
>> >> > @@ -1513,6 +1513,8 @@ number_of_iterations_until_wrap (class loop *,
>> >> > tree type, affine_iv *iv0,
>> >> >   low = wi::to_wide (iv0->base);
>> >> >          else
>> >> > 	low = min;
>> >> > +
>> >> > +      niter->control = *iv1;
>> >> >      }
>> >> >    /* {base, -C} < n.  */
>> >> >    else if (tree_int_cst_sign_bit (iv0->step) && integer_zerop
>> >> > (iv1->step))
>> >> > @@ -1533,6 +1535,8 @@ number_of_iterations_until_wrap (class loop *,
>> >> > tree type, affine_iv *iv0,
>> >> >   high = wi::to_wide (iv1->base);
>> >> >          else
>> >> > 	high = max;
>> >> > +
>> >> > +      niter->control = *iv0;
>> >> >      }
>> >> >    else
>> >> >      return false;
>> > 
>> > it looks like the above two should already be in effect from the
>> > caller (guarding with integer_nozerop)?
>> 
>> I add them just because set these fields in one function.
>> Yes, they have been set in caller already,  I could remove them here.
>> 
>> > 
>> >> > @@ -1556,6 +1560,14 @@ number_of_iterations_until_wrap (class loop *,
>> >> > tree type, affine_iv *iv0,
>> >> >            niter->assumptions, assumptions);
>> >> >
>> >> >    niter->control.no_overflow = false;
>> >> > +  niter->control.base = fold_build2 (MINUS_EXPR, niter_type,
>> >> > +				     niter->control.base,
>> >> > niter->control.step);
>> > 
>> > how do we know IVn - STEP doesn't already wrap?
>> 
>> The last IV value is just cross the max/min value of the type
>> at the last iteration,  then IVn - STEP is the nearest value
>> to max(or min) and not wrap.
>> 
>> > A comment might be
>> > good to explain you're turning the simplified exit condition into
>> > 
>> >    { IVbase - STEP, +, STEP } != niter * STEP + (IVbase - STEP)
>> > 
>> > which, when mathematically looking at it makes me wonder why there's
>> > the seemingly redundant '- STEP' term?  Also is NE_EXPR really
>> > correct since STEP might be not 1?  Only for non equality compares
I may miss the question in previous mail.  If STEP is not 1, NE_EXPR
Would be still correct, because the niter is an integer, and the then
after 'niter' iterations, the value should meet 'base + niter * STEP'.

BR,
Jiufu.
>> > the '- STEP' should matter?
>> 
>> I need to add comments for this.  This is a little tricky.
>> The last value of the original IV just cross max/min at most one STEP,
>> at there wrapping already happen.
>> Using "{IVbase, +, STEP} != niter * STEP + IVbase" is not wrong
>> in the aspect of exit condition.
>> 
>> But this would not work well with existing code:
>> like determine_exit_conditions, which will convert NE_EXP to
>> LT_EXPR/GT_EXPR.  And so, the '- STEP' is added to adjust the
>> IV.base and bound, with '- STEP' the bound will be the last value
>> just before wrap.
>
> Hmm.  The control IV is documented as
>
>   /* The simplified shape of the exit condition.  The loop exits if
>      CONTROL CMP BOUND is false, where CMP is one of NE_EXPR,
>      LT_EXPR, or GT_EXPR, and step of CONTROL is positive if CMP is
>      LE_EXPR and negative if CMP is GE_EXPR.  This information is used
>      by loop unrolling.  */
>   affine_iv control;
>
> but determine_exit_conditions seems to assume the IV does not wrap?
> In fact determine_exit_conditions seems to just build ->base CMP bound
> where bound is the IV bound biased by #unroll * step - step.  So how
> does biasing by step * 1 help?
>
> Does the control IV wrap in our case?
>
> Richard.
>
>> Thanks again for your review!
>> 
>> BR.
>> Jiufu
>> 
>> > 
>> > Richard.
>> > 
>> >> > +  span = fold_build2 (MULT_EXPR, niter_type, niter->niter,
>> >> > +		      fold_convert (niter_type, niter->control.step));
>> >> > +  niter->bound = fold_build2 (PLUS_EXPR, niter_type, span,
>> >> > +			      fold_convert (niter_type, niter->control.base));
>> >> > +  niter->bound = fold_convert (type, niter->bound);
>> >> > +  niter->cmp = NE_EXPR;
>> >> >
>> >> >    return true;
>> >> > }
>> >> > diff --git a/gcc/testsuite/gcc.dg/pr102087.c
>> >> > b/gcc/testsuite/gcc.dg/pr102087.c
>> >> > new file mode 100644
>> >> > index 00000000000..ef1f9f5cba9
>> >> > --- /dev/null
>> >> > +++ b/gcc/testsuite/gcc.dg/pr102087.c
>> >> > @@ -0,0 +1,25 @@
>> >> > +/* { dg-do compile } */
>> >> > +/* { dg-options "-O3" } */
>> >> > +
>> >> > +unsigned __attribute__ ((noinline))
>> >> > +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
>> >> > +{
>> >> > +  while (n < ++l)
>> >> > +    *a++ = *b++ + 1;
>> >> > +  return l;
>> >> > +}
>> >> > +
>> >> > +volatile int a[1];
>> >> > +unsigned b;
>> >> > +int c;
>> >> > +
>> >> > +int
>> >> > +check ()
>> >> > +{
>> >> > +  int d;
>> >> > +  for (; b > 1; b++)
>> >> > +    for (c = 0; c < 2; c++)
>> >> > +      for (d = 0; d < 2; d++)
>> >> > +	a[0];
>> >> > +  return 0;
>> >> > +}
>> >> > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> >> > b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> >> > index 99289afec0b..40cb0240aaa 100644
>> >> > --- a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> >> > +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> >> > @@ -1,5 +1,6 @@
>> >> >  /* { dg-require-effective-target vect_int } */
>> >> >  /* { dg-options "-O3 -fdump-tree-vect-details" } */
>> >> > +
>> >> >  #define TYPE int *
>> >> >  #define MIN ((TYPE)0)
>> >> >  #define MAX ((TYPE)((long long)-1))
>> >> > @@ -10,4 +11,5 @@
>> >> >
>> >> >  #include "pr101145.inc"
>> >> >
>> >> > -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } }
>> >> > */
>> >> > +/* pointer size may not be vectorized, checking niter is ok. */
>> >> > +/* { dg-final { scan-tree-dump "Symbolic number of iterations is" "vect"
>> >> > }
>> >> > } */
>> >> 
>> 
>>
diff mbox series

Patch

diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 7af92d1c893..747f04d3ce0 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -1482,7 +1482,7 @@  number_of_iterations_until_wrap (class loop *, tree type, affine_iv *iv0,
 				 affine_iv *iv1, class tree_niter_desc *niter)
 {
   tree niter_type = unsigned_type_for (type);
-  tree step, num, assumptions, may_be_zero;
+  tree step, num, assumptions, may_be_zero, span;
   wide_int high, low, max, min;
 
   may_be_zero = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base);
@@ -1513,6 +1513,8 @@  number_of_iterations_until_wrap (class loop *, tree type, affine_iv *iv0,
 	low = wi::to_wide (iv0->base);
       else
 	low = min;
+
+      niter->control = *iv1;
     }
   /* {base, -C} < n.  */
   else if (tree_int_cst_sign_bit (iv0->step) && integer_zerop (iv1->step))
@@ -1533,6 +1535,8 @@  number_of_iterations_until_wrap (class loop *, tree type, affine_iv *iv0,
 	high = wi::to_wide (iv1->base);
       else
 	high = max;
+
+      niter->control = *iv0;
     }
   else
     return false;
@@ -1556,6 +1560,14 @@  number_of_iterations_until_wrap (class loop *, tree type, affine_iv *iv0,
 				      niter->assumptions, assumptions);
 
   niter->control.no_overflow = false;
+  niter->control.base = fold_build2 (MINUS_EXPR, niter_type,
+				     niter->control.base, niter->control.step);
+  span = fold_build2 (MULT_EXPR, niter_type, niter->niter,
+		      fold_convert (niter_type, niter->control.step));
+  niter->bound = fold_build2 (PLUS_EXPR, niter_type, span,
+			      fold_convert (niter_type, niter->control.base));
+  niter->bound = fold_convert (type, niter->bound);
+  niter->cmp = NE_EXPR;
 
   return true;
 }
diff --git a/gcc/testsuite/gcc.dg/pr102087.c b/gcc/testsuite/gcc.dg/pr102087.c
new file mode 100644
index 00000000000..ef1f9f5cba9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr102087.c
@@ -0,0 +1,25 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+unsigned __attribute__ ((noinline))
+foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  while (n < ++l)
+    *a++ = *b++ + 1;
+  return l;
+}
+
+volatile int a[1];
+unsigned b;
+int c;
+
+int
+check ()
+{
+  int d;
+  for (; b > 1; b++)
+    for (c = 0; c < 2; c++)
+      for (d = 0; d < 2; d++)
+	a[0];
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
index 99289afec0b..40cb0240aaa 100644
--- a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
+++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
@@ -1,5 +1,6 @@ 
 /* { dg-require-effective-target vect_int } */
 /* { dg-options "-O3 -fdump-tree-vect-details" } */
+
 #define TYPE int *
 #define MIN ((TYPE)0)
 #define MAX ((TYPE)((long long)-1))
@@ -10,4 +11,5 @@ 
 
 #include "pr101145.inc"
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* pointer size may not be vectorized, checking niter is ok. */
+/* { dg-final { scan-tree-dump "Symbolic number of iterations is" "vect" } } */