diff mbox

[gomp4,committed] Handle double reduction in oacc kernels pass group

Message ID 55B73B57.5090606@mentor.com
State New
Headers show

Commit Message

Tom de Vries July 28, 2015, 8:20 a.m. UTC
Hi,

this patch adds a test-case with a double reduction in an oacc kernels 
region.

In order to get it in the proper shape for parloops to deal with, I 
needed to repeat the pass_lim/pass_copy_prop sequence.

Bootstrapped and reg-tested on x86_64.

Committed to gomp-4_0-branch.

Thanks,
- Tom
diff mbox

Patch

Handle double reduction in oacc kernels pass group

2015-07-28  Tom de Vries  <tom@codesourcery.com>

	* passes.def: Repeat pass_lim and pass_copy_prop in oacc kernels pass
	group.

	* c-c++-common/goacc/kernels-double-reduction.c: New test.
---
 gcc/passes.def                                     |  2 ++
 .../c-c++-common/goacc/kernels-double-reduction.c  | 37 ++++++++++++++++++++++
 2 files changed, 39 insertions(+)
 create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c

diff --git a/gcc/passes.def b/gcc/passes.def
index ae91ed1..e31e39f 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -96,6 +96,8 @@  along with GCC; see the file COPYING3.  If not see
 	      NEXT_PASS (pass_tree_loop_init);
 	      NEXT_PASS (pass_lim);
 	      NEXT_PASS (pass_copy_prop);
+	      NEXT_PASS (pass_lim);
+	      NEXT_PASS (pass_copy_prop);
 	      NEXT_PASS (pass_scev_cprop);
       	      NEXT_PASS (pass_parallelize_loops_oacc_kernels);
 	      NEXT_PASS (pass_expand_omp_ssa);
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c b/gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
new file mode 100644
index 0000000..81467a9
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
@@ -0,0 +1,37 @@ 
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include <stdlib.h>
+
+#define N 500
+
+unsigned int a[N][N];
+
+void  __attribute__((noinline,noclone))
+foo (void)
+{
+  int i, j;
+  unsigned int sum = 1;
+
+#pragma acc kernels copyin (a[0:N]) copy (sum)
+  {
+    for (i = 0; i < N; ++i)
+      for (j = 0; j < N; ++j)
+	sum += a[i][j];
+  }
+
+  if (sum != 5001)
+    abort ();
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized.  */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops_oacc_kernels" } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 1 "parloops_oacc_kernels" } } */
-- 
1.9.1