diff mbox

Fixup nb_iterations_upper_bound adjustment for vectorized loops

Message ID 20160428132619.GA6099@msticlxl57.ims.intel.com
State New
Headers show

Commit Message

Ilya Enkovich April 28, 2016, 1:26 p.m. UTC
On 27 Apr 16:05, Richard Biener wrote:
> >>
> >> I'd like to see testcases covering the corner-cases - have them have
> >> upper bound estimates by adjusting known array sizes and also cover
> >> the case of peeling for gaps.
> >
> > OK, I'll make more tests.
> > Thanks,
> > Ilya
> >
> >>
> >> Richard.
> >>

Could you please look at new tests?  I added one simple case with
known array size and similar tests with a peeling for gaps w/ and
w/o vector iteration peeled.

Checked new tests with RUNTESTFLAGS="vect.exp=vect-nb-iter-ub-* --target_board=unix{-m32,}
on x86_64-pc-linux-gnu.  OK for trunk?

Thanks,
Ilya
--
gcc/

2016-04-28  Ilya Enkovich  <ilya.enkovich@intel.com>

	* tree-vect-loop.c (vect_transform_loop): Fix
	nb_iterations_upper_bound computation for vectorized loop.

gcc/testsuite/

2016-04-28  Ilya Enkovich  <ilya.enkovich@intel.com>

	* gcc.target/i386/vect-unpack-2.c (avx512bw_test): Avoid
	optimization of vector loop.
	* gcc.target/i386/vect-unpack-3.c: New test.
	* gcc.dg/vect/vect-nb-iter-ub-1.c: New test.
	* gcc.dg/vect/vect-nb-iter-ub-2.c: New test.
	* gcc.dg/vect/vect-nb-iter-ub-3.c: New test.

Comments

Richard Biener April 28, 2016, 1:59 p.m. UTC | #1
On Thu, Apr 28, 2016 at 3:26 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
> On 27 Apr 16:05, Richard Biener wrote:
>> >>
>> >> I'd like to see testcases covering the corner-cases - have them have
>> >> upper bound estimates by adjusting known array sizes and also cover
>> >> the case of peeling for gaps.
>> >
>> > OK, I'll make more tests.
>> > Thanks,
>> > Ilya
>> >
>> >>
>> >> Richard.
>> >>
>
> Could you please look at new tests?  I added one simple case with
> known array size and similar tests with a peeling for gaps w/ and
> w/o vector iteration peeled.
>
> Checked new tests with RUNTESTFLAGS="vect.exp=vect-nb-iter-ub-* --target_board=unix{-m32,}
> on x86_64-pc-linux-gnu.  OK for trunk?

Can you make the new testcases runtime ones, thus check that the
vectorized outcome
is ok (so we don't forget any trailing iterations)?

Ok with that change.

Richard.

> Thanks,
> Ilya
> --
> gcc/
>
> 2016-04-28  Ilya Enkovich  <ilya.enkovich@intel.com>
>
>         * tree-vect-loop.c (vect_transform_loop): Fix
>         nb_iterations_upper_bound computation for vectorized loop.
>
> gcc/testsuite/
>
> 2016-04-28  Ilya Enkovich  <ilya.enkovich@intel.com>
>
>         * gcc.target/i386/vect-unpack-2.c (avx512bw_test): Avoid
>         optimization of vector loop.
>         * gcc.target/i386/vect-unpack-3.c: New test.
>         * gcc.dg/vect/vect-nb-iter-ub-1.c: New test.
>         * gcc.dg/vect/vect-nb-iter-ub-2.c: New test.
>         * gcc.dg/vect/vect-nb-iter-ub-3.c: New test.
>
>
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-1.c b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-1.c
> new file mode 100644
> index 0000000..b7504a8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-1.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-mavx512bw -fdump-tree-cunroll-details" { target { i?86-*-* x86_64-*-* } } } */
> +
> +int ii[127];
> +char cc[127];
> +
> +void
> +foo (int s)
> +{
> +  int i;
> +   for (i = 0; i < s; i++)
> +     ii[i] = (int) cc[i];
> +}
> +
> +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target { i?86-*-* x86_64-*-* } } } } */
> +/* { dg-final { scan-tree-dump "loop turned into non-loop; it never loops" "cunroll" { target { i?86-*-* x86_64-*-* } } } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-2.c b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-2.c
> new file mode 100644
> index 0000000..5332636
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-2.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-mavx512bw -fdump-tree-cunroll-details" { target { i?86-*-* x86_64-*-* } } } */
> +
> +int ii[128];
> +char cc[256];
> +
> +void
> +foo (int s)
> +{
> +  int i;
> +   for (i = 0; i < s; i++)
> +     ii[i] = (int) cc[i*2];
> +}
> +
> +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target { i?86-*-* x86_64-*-* } } } } */
> +/* { dg-final { scan-tree-dump "loop turned into non-loop; it never loops" "cunroll" { target { i?86-*-* x86_64-*-* } } } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-3.c b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-3.c
> new file mode 100644
> index 0000000..5610f6a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-3.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-mavx512bw -fdump-tree-cunroll-details" { target { i?86-*-* x86_64-*-* } } } */
> +
> +int ii[130];
> +char cc[258];
> +
> +void
> +foo (int s)
> +{
> +  int i;
> +   for (i = 0; i < s; i++)
> +     ii[i] = (int) cc[i*2];
> +}
> +
> +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target { i?86-*-* x86_64-*-* } } } } */
> +/* { dg-final { scan-tree-dump-not "loop turned into non-loop; it never loops" "cunroll" { target { i?86-*-* x86_64-*-* } } } } */
> diff --git a/gcc/testsuite/gcc.target/i386/vect-unpack-2.c b/gcc/testsuite/gcc.target/i386/vect-unpack-2.c
> index 4825248..51c518e 100644
> --- a/gcc/testsuite/gcc.target/i386/vect-unpack-2.c
> +++ b/gcc/testsuite/gcc.target/i386/vect-unpack-2.c
> @@ -6,19 +6,22 @@
>
>  #define N 120
>  signed int yy[10000];
> +signed char zz[10000];
>
>  void
> -__attribute__ ((noinline)) foo (signed char s)
> +__attribute__ ((noinline,noclone)) foo (int s)
>  {
> -   signed char i;
> +   int i;
>     for (i = 0; i < s; i++)
> -     yy[i] = (signed int) i;
> +     yy[i] = zz[i];
>  }
>
>  void
>  avx512bw_test ()
>  {
>    signed char i;
> +  for (i = 0; i < N; i++)
> +    zz[i] = i;
>    foo (N);
>    for (i = 0; i < N; i++)
>      if ( (signed int)i != yy [i] )
> diff --git a/gcc/testsuite/gcc.target/i386/vect-unpack-3.c b/gcc/testsuite/gcc.target/i386/vect-unpack-3.c
> new file mode 100644
> index 0000000..eb8a93e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/vect-unpack-3.c
> @@ -0,0 +1,29 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fdump-tree-vect-details -ftree-vectorize -ffast-math -mavx512bw -save-temps" } */
> +/* { dg-require-effective-target avx512bw } */
> +
> +#include "avx512bw-check.h"
> +
> +#define N 120
> +signed int yy[10000];
> +
> +void
> +__attribute__ ((noinline)) foo (signed char s)
> +{
> +   signed char i;
> +   for (i = 0; i < s; i++)
> +     yy[i] = (signed int) i;
> +}
> +
> +void
> +avx512bw_test ()
> +{
> +  signed char i;
> +  foo (N);
> +  for (i = 0; i < N; i++)
> +    if ( (signed int)i != yy [i] )
> +      abort ();
> +}
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +/* { dg-final { scan-assembler-not "vpmovsxbw\[ \\t\]+\[^\n\]*%zmm" } } */
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index d813b86..da98211 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -6921,11 +6921,13 @@ vect_transform_loop (loop_vec_info loop_vinfo)
>    /* Reduce loop iterations by the vectorization factor.  */
>    scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vectorization_factor),
>                       expected_iterations / vectorization_factor);
> -  loop->nb_iterations_upper_bound
> -    = wi::udiv_floor (loop->nb_iterations_upper_bound, vectorization_factor);
>    if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
>        && loop->nb_iterations_upper_bound != 0)
>      loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - 1;
> +  loop->nb_iterations_upper_bound
> +    = wi::udiv_floor (loop->nb_iterations_upper_bound + 1,
> +                     vectorization_factor) - 1;
> +
>    if (loop->any_estimate)
>      {
>        loop->nb_iterations_estimate
diff mbox

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-1.c b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-1.c
new file mode 100644
index 0000000..b7504a8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-1.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-mavx512bw -fdump-tree-cunroll-details" { target { i?86-*-* x86_64-*-* } } } */
+
+int ii[127];
+char cc[127];
+
+void
+foo (int s)
+{
+  int i;
+   for (i = 0; i < s; i++)
+     ii[i] = (int) cc[i];
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump "loop turned into non-loop; it never loops" "cunroll" { target { i?86-*-* x86_64-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-2.c b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-2.c
new file mode 100644
index 0000000..5332636
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-2.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-mavx512bw -fdump-tree-cunroll-details" { target { i?86-*-* x86_64-*-* } } } */
+
+int ii[128];
+char cc[256];
+
+void
+foo (int s)
+{
+  int i;
+   for (i = 0; i < s; i++)
+     ii[i] = (int) cc[i*2];
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump "loop turned into non-loop; it never loops" "cunroll" { target { i?86-*-* x86_64-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-3.c b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-3.c
new file mode 100644
index 0000000..5610f6a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-3.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-mavx512bw -fdump-tree-cunroll-details" { target { i?86-*-* x86_64-*-* } } } */
+
+int ii[130];
+char cc[258];
+
+void
+foo (int s)
+{
+  int i;
+   for (i = 0; i < s; i++)
+     ii[i] = (int) cc[i*2];
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump-not "loop turned into non-loop; it never loops" "cunroll" { target { i?86-*-* x86_64-*-* } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-unpack-2.c b/gcc/testsuite/gcc.target/i386/vect-unpack-2.c
index 4825248..51c518e 100644
--- a/gcc/testsuite/gcc.target/i386/vect-unpack-2.c
+++ b/gcc/testsuite/gcc.target/i386/vect-unpack-2.c
@@ -6,19 +6,22 @@ 
 
 #define N 120
 signed int yy[10000];
+signed char zz[10000];
 
 void
-__attribute__ ((noinline)) foo (signed char s)
+__attribute__ ((noinline,noclone)) foo (int s)
 {
-   signed char i;
+   int i;
    for (i = 0; i < s; i++)
-     yy[i] = (signed int) i;
+     yy[i] = zz[i];
 }
 
 void
 avx512bw_test ()
 {
   signed char i;
+  for (i = 0; i < N; i++)
+    zz[i] = i;
   foo (N);
   for (i = 0; i < N; i++)
     if ( (signed int)i != yy [i] )
diff --git a/gcc/testsuite/gcc.target/i386/vect-unpack-3.c b/gcc/testsuite/gcc.target/i386/vect-unpack-3.c
new file mode 100644
index 0000000..eb8a93e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-unpack-3.c
@@ -0,0 +1,29 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-tree-vect-details -ftree-vectorize -ffast-math -mavx512bw -save-temps" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512bw-check.h"
+
+#define N 120
+signed int yy[10000];
+
+void
+__attribute__ ((noinline)) foo (signed char s)
+{
+   signed char i;
+   for (i = 0; i < s; i++)
+     yy[i] = (signed int) i;
+}
+
+void
+avx512bw_test ()
+{
+  signed char i;
+  foo (N);
+  for (i = 0; i < N; i++)
+    if ( (signed int)i != yy [i] )
+      abort ();
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-assembler-not "vpmovsxbw\[ \\t\]+\[^\n\]*%zmm" } } */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index d813b86..da98211 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -6921,11 +6921,13 @@  vect_transform_loop (loop_vec_info loop_vinfo)
   /* Reduce loop iterations by the vectorization factor.  */
   scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vectorization_factor),
 		      expected_iterations / vectorization_factor);
-  loop->nb_iterations_upper_bound
-    = wi::udiv_floor (loop->nb_iterations_upper_bound, vectorization_factor);
   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
       && loop->nb_iterations_upper_bound != 0)
     loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - 1;
+  loop->nb_iterations_upper_bound
+    = wi::udiv_floor (loop->nb_iterations_upper_bound + 1,
+		      vectorization_factor) - 1;
+
   if (loop->any_estimate)
     {
       loop->nb_iterations_estimate