Enable firstprivate OpenACC reductions
2018-01-31 Cesar Philippidis <cesar@codesourcery.com>
gcc/
* gimplify.c (omp_add_variable): Allow certain OpenACC reduction
variables to remain firstprivate.
gcc/testsuite/
* c-c++-common/goacc/reduction-8.c: New test.
@@ -6740,9 +6740,16 @@ omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags)
else
splay_tree_insert (ctx->variables, (splay_tree_key)decl, flags);
- /* For reductions clauses in OpenACC loop directives, by default create a
- copy clause on the enclosing parallel construct for carrying back the
- results. */
+ /* For OpenACC loop directives, when a reduction is immediately
+ enclosed within an acc parallel or kernels construct, it must
+ have an implied copy data mapping. E.g.
+
+ #pragma acc parallel
+ {
+ #pragma acc loop reduction (+:sum)
+
+ a copy clause for sum should be added on the enclosing parallel
+ construct for carrying back the results. */
if (ctx->region_type == ORT_ACC && (flags & GOVD_REDUCTION))
{
struct gimplify_omp_ctx *outer_ctx = ctx->outer_context;
@@ -6758,8 +6765,11 @@ omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags)
vector = true;
}
- /* Set new copy map as 'private' if sure we're not gang-partitioning. */
- bool map_private;
+ /* Reduction data maps need to be marked as private for worker
+ and vector loops, in order to ensure that value of the
+ reduction carried back to the host. Set new copy map as
+ 'private' if sure we're not gang-partitioning. */
+ bool map_private, update_data_map = false;
if (gang)
map_private = false;
@@ -6768,6 +6778,10 @@ omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags)
else
map_private = oacc_privatize_reduction (ctx->outer_context);
+ if (ctx->outer_context
+ && ctx->outer_context->region_type == ORT_ACC_PARALLEL)
+ update_data_map = true;
+
while (outer_ctx)
{
n = splay_tree_lookup (outer_ctx->variables, (splay_tree_key)decl);
@@ -6784,7 +6798,8 @@ omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags)
gcc_assert (!(n->value & GOVD_FIRSTPRIVATE)
&& (n->value & GOVD_MAP));
}
- else if (outer_ctx->region_type == ORT_ACC_PARALLEL)
+ else if (update_data_map
+ && outer_ctx->region_type == ORT_ACC_PARALLEL)
{
/* Remove firstprivate and make it a copy map. */
n->value &= ~GOVD_FIRSTPRIVATE;
@@ -6796,7 +6811,8 @@ omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags)
n->value |= GOVD_MAP_PRIVATE;
}
}
- else if (outer_ctx->region_type == ORT_ACC_PARALLEL)
+ else if (update_data_map
+ && outer_ctx->region_type == ORT_ACC_PARALLEL)
{
unsigned f = GOVD_MAP | GOVD_SEEN;
new file mode 100644
@@ -0,0 +1,94 @@
+/* { dg-additional-options "-fdump-tree-gimple" } */
+
+#define n 1000
+
+int
+main(void)
+{
+ int i, j;
+ int result, array[n];
+
+#pragma acc parallel loop reduction (+:result)
+ for (i = 0; i < n; i++)
+ result ++;
+
+#pragma acc parallel
+#pragma acc loop reduction (+:result)
+ for (i = 0; i < n; i++)
+ result ++;
+
+#pragma acc parallel
+#pragma acc loop
+ for (i = 0; i < n; i++)
+ {
+ result = i;
+
+#pragma acc loop reduction(+:result)
+ for (j = 0; j < n; j++)
+ result ++;
+
+ array[i] = result;
+ }
+
+#pragma acc parallel
+#pragma acc loop
+ for (i = 0; i < n; i++)
+ {
+ result = i;
+
+#pragma acc loop worker vector reduction(+:result)
+ for (j = 0; j < n; j++)
+ result ++;
+
+ array[i] = result;
+ }
+
+#pragma acc parallel
+#pragma acc loop // { dg-warning "insufficient partitioning" }
+ for (i = 0; i < n; i++)
+ {
+ result = i;
+
+#pragma acc loop gang reduction(+:result)
+ for (j = 0; j < n; j++)
+ result ++;
+
+ array[i] = result;
+ }
+
+#pragma acc parallel copy(result)
+#pragma acc loop // { dg-warning "insufficient partitioning" }
+ for (i = 0; i < n; i++)
+ {
+ result = i;
+
+#pragma acc loop gang reduction(+:result)
+ for (j = 0; j < n; j++)
+ result ++;
+
+ array[i] = result;
+ }
+
+#pragma acc kernels
+#pragma acc loop
+ for (i = 0; i < n; i++)
+ {
+ result = i;
+
+#pragma acc loop reduction(+:result)
+ for (j = 0; j < n; j++)
+ result ++;
+
+ array[i] = result;
+ }
+
+ return 0;
+}
+
+/* Check that default copy maps are generated for loop reductions. */
+/* { dg-final { scan-tree-dump-times "reduction..:result. map.tofrom:result .len: 4.." 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "oacc_parallel map.tofrom:result .len: 4.." 2 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "map.tofrom:array .len: 4000.. firstprivate.result." 3 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "map.tofrom:result .len: 4.. map.tofrom:array .len: 4000.." 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "map.tofrom:array .len: 4000.. map.force_tofrom:result .len: 4.." 1 "gimple" } } */
+