diff mbox series

middle-end: update vector loop upper bounds when early break vect [PR113734]

Message ID patch-18262-tamar@arm.com
State New
Headers show
Series middle-end: update vector loop upper bounds when early break vect [PR113734] | expand

Commit Message

Tamar Christina Feb. 13, 2024, 10:41 a.m. UTC
Hi All,

When doing early break vectorization we should treat the final iteration as
possibly being partial.  This so that when we calculate the vector loop upper
bounds we take into account that final iteration could have done some work.

The attached testcase shows that if we don't then cunroll may unroll the loop an
if the upper bound is wrong we lose a vector iteration.

This is similar to how we adjust the scalar loop bounds for the PEELED case.

Bootstrapped Regtested on aarch64-none-linux-gnu and
x86_64-pc-linux-gnu no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	PR tree-optimization/113734
	* tree-vect-loop.cc (vect_transform_loop): Treat the final iteration of
	an early break loop as partial.

gcc/testsuite/ChangeLog:

	PR tree-optimization/113734
	* gcc.dg/vect/vect-early-break_117-pr113734.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_117-pr113734.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_117-pr113734.c
new file mode 100644
index 0000000000000000000000000000000000000000..36ae09483dfd426f977a3d92cf24a78d76de6961




--
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_117-pr113734.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_117-pr113734.c
new file mode 100644
index 0000000000000000000000000000000000000000..36ae09483dfd426f977a3d92cf24a78d76de6961
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_117-pr113734.c
@@ -0,0 +1,37 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break_hw } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+#include "tree-vect.h"
+
+#define N 306
+#define NEEDLE 136
+
+int table[N];
+
+__attribute__ ((noipa))
+int foo (int i, unsigned short parse_tables_n)
+{
+  parse_tables_n >>= 9;
+  parse_tables_n += 11;
+  while (i < N && parse_tables_n--)
+    table[i++] = 0;
+
+  return table[NEEDLE];
+}
+
+int main ()
+{
+  check_vect ();
+
+  for (int j = 0; j < N; j++)
+    table[j] = -1;
+
+  if (foo (0, 0xFFFF) != 0)
+    __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 854e9d78bc71721e6559a6bc5dff78c813603a78..0b1656fef2fed83f30295846c382ad9fb318454a 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -12171,7 +12171,8 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
   /* True if the final iteration might not handle a full vector's
      worth of scalar iterations.  */
   bool final_iter_may_be_partial
-    = LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo);
+    = LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
+      || LOOP_VINFO_EARLY_BREAKS (loop_vinfo);
   /* The minimum number of iterations performed by the epilogue.  This
      is 1 when peeling for gaps because we always need a final scalar
      iteration.  */

Comments

Richard Biener Feb. 13, 2024, 10:48 a.m. UTC | #1
On Tue, 13 Feb 2024, Tamar Christina wrote:

> Hi All,
> 
> When doing early break vectorization we should treat the final iteration as
> possibly being partial.  This so that when we calculate the vector loop upper
> bounds we take into account that final iteration could have done some work.
> 
> The attached testcase shows that if we don't then cunroll may unroll the loop an
> if the upper bound is wrong we lose a vector iteration.
> 
> This is similar to how we adjust the scalar loop bounds for the PEELED case.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and
> x86_64-pc-linux-gnu no issues.
> 
> Ok for master?

OK.

Thanks,
Richard.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	PR tree-optimization/113734
> 	* tree-vect-loop.cc (vect_transform_loop): Treat the final iteration of
> 	an early break loop as partial.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR tree-optimization/113734
> 	* gcc.dg/vect/vect-early-break_117-pr113734.c: New test.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_117-pr113734.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_117-pr113734.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..36ae09483dfd426f977a3d92cf24a78d76de6961
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_117-pr113734.c
> @@ -0,0 +1,37 @@
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break_hw } */
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-additional-options "-O3" } */
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +
> +#include "tree-vect.h"
> +
> +#define N 306
> +#define NEEDLE 136
> +
> +int table[N];
> +
> +__attribute__ ((noipa))
> +int foo (int i, unsigned short parse_tables_n)
> +{
> +  parse_tables_n >>= 9;
> +  parse_tables_n += 11;
> +  while (i < N && parse_tables_n--)
> +    table[i++] = 0;
> +
> +  return table[NEEDLE];
> +}
> +
> +int main ()
> +{
> +  check_vect ();
> +
> +  for (int j = 0; j < N; j++)
> +    table[j] = -1;
> +
> +  if (foo (0, 0xFFFF) != 0)
> +    __builtin_abort ();
> +
> +  return 0;
> +}
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 854e9d78bc71721e6559a6bc5dff78c813603a78..0b1656fef2fed83f30295846c382ad9fb318454a 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -12171,7 +12171,8 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
>    /* True if the final iteration might not handle a full vector's
>       worth of scalar iterations.  */
>    bool final_iter_may_be_partial
> -    = LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo);
> +    = LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
> +      || LOOP_VINFO_EARLY_BREAKS (loop_vinfo);
>    /* The minimum number of iterations performed by the epilogue.  This
>       is 1 when peeling for gaps because we always need a final scalar
>       iteration.  */
> 
> 
> 
> 
>
diff mbox series

Patch

--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_117-pr113734.c
@@ -0,0 +1,37 @@ 
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break_hw } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+#include "tree-vect.h"
+
+#define N 306
+#define NEEDLE 136
+
+int table[N];
+
+__attribute__ ((noipa))
+int foo (int i, unsigned short parse_tables_n)
+{
+  parse_tables_n >>= 9;
+  parse_tables_n += 11;
+  while (i < N && parse_tables_n--)
+    table[i++] = 0;
+
+  return table[NEEDLE];
+}
+
+int main ()
+{
+  check_vect ();
+
+  for (int j = 0; j < N; j++)
+    table[j] = -1;
+
+  if (foo (0, 0xFFFF) != 0)
+    __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 854e9d78bc71721e6559a6bc5dff78c813603a78..0b1656fef2fed83f30295846c382ad9fb318454a 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -12171,7 +12171,8 @@  vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
   /* True if the final iteration might not handle a full vector's
      worth of scalar iterations.  */
   bool final_iter_may_be_partial
-    = LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo);
+    = LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
+      || LOOP_VINFO_EARLY_BREAKS (loop_vinfo);
   /* The minimum number of iterations performed by the epilogue.  This
      is 1 when peeling for gaps because we always need a final scalar
      iteration.  */