diff mbox

[gomp4] parallel reduction nested inside data regions

Message ID 55F331DD.4020004@mentor.com
State New
Headers show

Commit Message

Cesar Philippidis Sept. 11, 2015, 7:56 p.m. UTC
This patch corrects the way that build_outer_var_ref deals with data
mappings in acc parallel and kernels when they are nested in some other
construct (i.e. acc data). This issue can be reproduced with acc
parallel reduction nested nested inside a acc data region.

I've applied this fix to gomp-4_0-branch.

Cesar
diff mbox

Patch

2015-09-11  Cesar Philippidis  <cesar@codesourcery.com>

	gcc/
	* omp-low.c (build_outer_var_ref):

	gcc/testsuite/
	* c-c++-common/goacc/parallel-reduction.c: Enclose the parallel
	reduction inside an acc data region.

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c: Enclose
	one parallel reduction inside a data region.


diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 09adea8..ba37372 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -1240,6 +1240,8 @@  build_outer_var_ref (tree var, omp_context *ctx)
       if (x == NULL_TREE)
 	x = var;
     }
+  else if (is_oacc_parallel (ctx))
+    x = var;
   else if (ctx->outer)
     {
       /* OpenACC may have multiple outer contexts (one per loop).  */
@@ -1256,7 +1258,7 @@  build_outer_var_ref (tree var, omp_context *ctx)
       else
 	x = lookup_decl (var, ctx->outer);
     }
-  else if (is_reference (var) || is_oacc_parallel (ctx)
+  else if (is_reference (var)
 	   || extract_oacc_routine_gwv (current_function_decl) != 0)
     /* This can happen with orphaned constructs.  If var is reference, it is
        possible it is shared and as such valid.  */
diff --git a/gcc/testsuite/c-c++-common/goacc/parallel-reduction.c b/gcc/testsuite/c-c++-common/goacc/parallel-reduction.c
index debed55..d7cc947 100644
--- a/gcc/testsuite/c-c++-common/goacc/parallel-reduction.c
+++ b/gcc/testsuite/c-c++-common/goacc/parallel-reduction.c
@@ -2,11 +2,15 @@  int
 main ()
 {
   int sum = 0;
+  int dummy = 0;
 
-#pragma acc parallel num_gangs (10) copy (sum) reduction (+:sum)
+#pragma acc data copy (dummy)
   {
-    int v = 5;
-    sum += 10 + v;
+#pragma acc parallel num_gangs (10) copy (sum) reduction (+:sum)
+    {
+      int v = 5;
+      sum += 10 + v;
+    }
   }
 
   return sum;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c
index 381d5b6..d328f46 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c
@@ -10,10 +10,14 @@  main ()
 {
   int s1 = 0, s2 = 0;
   int i;
+  int dummy = 0;
 
-#pragma acc parallel num_gangs (N) reduction (+:s1)
+#pragma acc data copy (dummy)
   {
-    s1++;
+#pragma acc parallel num_gangs (N) reduction (+:s1)
+    {
+      s1++;
+    }
   }
 
   if (acc_get_device_type () != acc_device_nvidia)