From 15c6f6b6bc396f53474ea380f506a7f74d7a05af Mon Sep 17 00:00:00 2001
From: Sandra Loosemore <sandra@codesourcery.com>
Date: Tue, 13 Sep 2022 23:50:27 +0000
Subject: [PATCH] OpenMP: Enable vectorization in all OpenMP loops
This patch marks all OpenMP worksharing loops (not just those with the
simd descriptor) as candidates for vectorization when -ftree-loop-optimize
is active and loop vectorization is not explicitly disabled with
-fno-tree-loop-vectorize.
gcc/ChangeLog:
* omp-expand.cc (maybe_auto_vectorize_loop): New.
(expand_omp_for_generic): Call it.
(expand_omp_for_static_chunk): Likewise.
(expand_omp_taskloop_for_inner): Likewise.
(expand_oacc_for): Likewise.
gcc/testsuite/ChangeLog:
* c-c++-common/gomp/vectorize-1.c: New.
* c-c++-common/gomp/vectorize-2.c: New.
* c-c++-common/gomp/vectorize-3.c: New.
* c-c++-common/gomp/vectorize-s.c: New.
* gcc.dg/gomp/pr46032-2.c: Compile with -fno-tree-loop-vectorize.
* gcc.dg/gomp/pr46032-3.c: Likewise.
---
gcc/omp-expand.cc | 23 ++++++++++++++
gcc/testsuite/c-c++-common/gomp/vectorize-1.c | 31 +++++++++++++++++++
gcc/testsuite/c-c++-common/gomp/vectorize-2.c | 31 +++++++++++++++++++
gcc/testsuite/c-c++-common/gomp/vectorize-3.c | 31 +++++++++++++++++++
gcc/testsuite/c-c++-common/gomp/vectorize-s.c | 31 +++++++++++++++++++
gcc/testsuite/gcc.dg/gomp/pr46032-2.c | 2 +-
gcc/testsuite/gcc.dg/gomp/pr46032-3.c | 2 +-
7 files changed, 149 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/c-c++-common/gomp/vectorize-1.c
create mode 100644 gcc/testsuite/c-c++-common/gomp/vectorize-2.c
create mode 100644 gcc/testsuite/c-c++-common/gomp/vectorize-3.c
create mode 100644 gcc/testsuite/c-c++-common/gomp/vectorize-s.c
@@ -3711,6 +3711,22 @@ expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
return cont_bb;
}
+/* Helper function for various subroutines of expand_omp_for.
+ If not -fno-tree-loop-vectorize and -ftree-loop-optimize,
+ hint that we want to vectorize the loop LOOP. */
+static void
+maybe_auto_vectorize_loop (class loop *loop)
+{
+ if ((flag_tree_loop_vectorize
+ || !OPTION_SET_P (flag_tree_loop_vectorize))
+ && flag_tree_loop_optimize)
+ {
+ loop->force_vectorize = true;
+ cfun->has_force_vectorize_loops = true;
+ }
+}
+
+
/* A subroutine of expand_omp_for. Generate code for a parallel
loop with any schedule. Given parameters:
@@ -4650,6 +4666,7 @@ expand_omp_for_generic (struct omp_region *region,
new_loop->header = l0_bb;
new_loop->latch = l2_bb;
add_loop (new_loop, outer_loop);
+ maybe_auto_vectorize_loop (new_loop);
/* Allocate a loop structure for the original loop unless we already
had one. */
@@ -4660,6 +4677,7 @@ expand_omp_for_generic (struct omp_region *region,
orig_loop->header = l1_bb;
/* The loop may have multiple latches. */
add_loop (orig_loop, new_loop);
+ maybe_auto_vectorize_loop (orig_loop);
}
}
}
@@ -5551,6 +5569,7 @@ expand_omp_for_static_nochunk (struct omp_region *region,
if (collapse_bb == NULL)
loop->latch = cont_bb;
add_loop (loop, body_bb->loop_father);
+ maybe_auto_vectorize_loop (loop);
}
}
@@ -6268,6 +6287,7 @@ expand_omp_for_static_chunk (struct omp_region *region,
trip_loop->header = iter_part_bb;
trip_loop->latch = trip_update_bb;
add_loop (trip_loop, iter_part_bb->loop_father);
+ maybe_auto_vectorize_loop (trip_loop);
if (loop != entry_bb->loop_father)
{
@@ -6285,6 +6305,7 @@ expand_omp_for_static_chunk (struct omp_region *region,
if (collapse_bb == NULL)
loop->latch = cont_bb;
add_loop (loop, trip_loop);
+ maybe_auto_vectorize_loop (loop);
}
}
}
@@ -7439,6 +7460,7 @@ expand_omp_taskloop_for_inner (struct omp_region *region,
if (collapse_bb == NULL)
loop->latch = cont_bb;
add_loop (loop, body_bb->loop_father);
+ maybe_auto_vectorize_loop (loop);
}
}
@@ -8006,6 +8028,7 @@ expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
inner_loop->header = elem_body_bb;
inner_loop->latch = elem_cont_bb;
add_loop (inner_loop, body_loop);
+ maybe_auto_vectorize_loop (inner_loop);
}
}
}
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -O1 -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" } } */
+
+/* Test that OMP loops are vectorized at all optimization levels
+ whether or not the "simd" specifier is present. */
+
+extern void init (int n, int *p);
+extern int g (int n, int *p);
+
+int f1 (void)
+{
+ int a1[32], a2[32], a3[32];
+ init (32, a1);
+ init (32, a2);
+ #pragma omp for
+ for (int i = 0; i < 32; i++)
+ a3[i] = a1[i] + a2[i];
+ return g (4, a3);
+}
+
+int f2 (void)
+{
+ int a1[32], a2[32], a3[32];
+ init (32, a1);
+ init (32, a2);
+ #pragma omp for simd
+ for (int i = 0; i < 32; i++)
+ a3[i] = a1[i] + a2[i];
+ return g (4, a3);
+}
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -O2 -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" } } */
+
+/* Test that OMP loops are vectorized at all optimization levels
+ whether or not the "simd" specifier is present. */
+
+extern void init (int n, int *p);
+extern int g (int n, int *p);
+
+int f1 (void)
+{
+ int a1[32], a2[32], a3[32];
+ init (32, a1);
+ init (32, a2);
+ #pragma omp for
+ for (int i = 0; i < 32; i++)
+ a3[i] = a1[i] + a2[i];
+ return g (4, a3);
+}
+
+int f2 (void)
+{
+ int a1[32], a2[32], a3[32];
+ init (32, a1);
+ init (32, a2);
+ #pragma omp for simd
+ for (int i = 0; i < 32; i++)
+ a3[i] = a1[i] + a2[i];
+ return g (4, a3);
+}
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -O3 -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" } } */
+
+/* Test that OMP loops are vectorized at all optimization levels
+ whether or not the "simd" specifier is present. */
+
+extern void init (int n, int *p);
+extern int g (int n, int *p);
+
+int f1 (void)
+{
+ int a1[32], a2[32], a3[32];
+ init (32, a1);
+ init (32, a2);
+ #pragma omp for
+ for (int i = 0; i < 32; i++)
+ a3[i] = a1[i] + a2[i];
+ return g (4, a3);
+}
+
+int f2 (void)
+{
+ int a1[32], a2[32], a3[32];
+ init (32, a1);
+ init (32, a2);
+ #pragma omp for simd
+ for (int i = 0; i < 32; i++)
+ a3[i] = a1[i] + a2[i];
+ return g (4, a3);
+}
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -Os -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" } } */
+
+/* Test that OMP loops are vectorized at all optimization levels
+ whether or not the "simd" specifier is present. */
+
+extern void init (int n, int *p);
+extern int g (int n, int *p);
+
+int f1 (void)
+{
+ int a1[32], a2[32], a3[32];
+ init (32, a1);
+ init (32, a2);
+ #pragma omp for
+ for (int i = 0; i < 32; i++)
+ a3[i] = a1[i] + a2[i];
+ return g (4, a3);
+}
+
+int f2 (void)
+{
+ int a1[32], a2[32], a3[32];
+ init (32, a1);
+ init (32, a2);
+ #pragma omp for simd
+ for (int i = 0; i < 32; i++)
+ a3[i] = a1[i] + a2[i];
+ return g (4, a3);
+}
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fno-tree-vectorize -fopenmp -std=c99 -fipa-pta -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fno-tree-vectorize -fno-tree-loop-vectorize -fopenmp -std=c99 -fipa-pta -fdump-tree-optimized" } */
#define N 2
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fno-tree-vectorize -fopenmp -std=c99 -fipa-pta -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fno-tree-vectorize -fno-tree-loop-vectorize -fopenmp -std=c99 -fipa-pta -fdump-tree-optimized" } */
#define N 2
--
2.31.1