From e52253bcc0916d9a7c7ba4bbe7501ae1ded3b8a8 Mon Sep 17 00:00:00 2001
From: Julian Brown <julian@codesourcery.com>
Date: Fri, 9 Aug 2019 13:01:33 -0700
Subject: [PATCH] Wait at end of OpenACC asynchronous kernels regions
In OpenACC 'kernels' decomposition, we're improperly nesting synchronous and
asynchronous data and compute regions, giving rise to data races when the
asynchronicity is actually executed, as is visible in at least on test case
with GCN offloading.
The proper fix is to correctly use the asynchronous interfaces, making the
currently synchronous data regions fully asynchronous (see also
<https://gcc.gnu.org/PR97390> "[OpenACC] 'async' clause on 'data' construct",
which is to share the same implementation), but that's for later; for now add
some more synchronization.
gcc/
* omp-oacc-kernels-decompose.cc (add_wait): New function, split out
of...
(add_async_clauses_and_wait): ...here. Call new outlined function.
(decompose_kernels_region_body): Add wait at the end of
explicitly-asynchronous kernels regions.
libgomp/
* testsuite/libgomp.oacc-c-c++-common/f-asyncwait-1.c: Remove GCN
offloading execution XFAIL.
Co-Authored-By: Thomas Schwinge <thomas@codesourcery.com>
---
gcc/omp-oacc-kernels-decompose.cc | 31 ++++++++++++++-----
.../libgomp.oacc-c-c++-common/f-asyncwait-1.c | 1 -
2 files changed, 24 insertions(+), 8 deletions(-)
@@ -878,6 +878,18 @@ maybe_build_inner_data_region (location_t loc, gimple *body,
return body;
}
+static void
+add_wait (location_t loc, gimple_seq *region_body)
+{
+ /* A "#pragma acc wait" is just a call GOACC_wait (acc_async_sync, 0). */
+ tree wait_fn = builtin_decl_explicit (BUILT_IN_GOACC_WAIT);
+ tree sync_arg = build_int_cst (integer_type_node, GOMP_ASYNC_SYNC);
+ gimple *wait_call = gimple_build_call (wait_fn, 2,
+ sync_arg, integer_zero_node);
+ gimple_set_location (wait_call, loc);
+ gimple_seq_add_stmt (region_body, wait_call);
+}
+
/* Helper function of decompose_kernels_region_body. The statements in
REGION_BODY are expected to be decomposed parts; add an 'async' clause to
each. Also add a 'wait' directive at the end of the sequence. */
@@ -900,13 +912,7 @@ add_async_clauses_and_wait (location_t loc, gimple_seq *region_body)
gimple_omp_target_set_clauses (as_a <gomp_target *> (stmt),
target_clauses);
}
- /* A '#pragma acc wait' is just a call 'GOACC_wait (acc_async_sync, 0)'. */
- tree wait_fn = builtin_decl_explicit (BUILT_IN_GOACC_WAIT);
- tree sync_arg = build_int_cst (integer_type_node, GOMP_ASYNC_SYNC);
- gimple *wait_call = gimple_build_call (wait_fn, 2,
- sync_arg, integer_zero_node);
- gimple_set_location (wait_call, loc);
- gimple_seq_add_stmt (region_body, wait_call);
+ add_wait (loc, region_body);
}
/* Auxiliary analysis of the body of a kernels region, to determine for each
@@ -1352,6 +1358,17 @@ decompose_kernels_region_body (gimple *kernels_region, tree kernels_clauses)
a wait directive at the end. */
if (async_clause == NULL)
add_async_clauses_and_wait (loc, ®ion_body);
+ else
+ /* !!! If we have asynchronous parallel blocks inside a (synchronous) data
+ region, then target memory will get unmapped at the point the data
+ region ends, even if the inner asynchronous parallels have not yet
+ completed. For kernels marked "async", we might want to use "enter data
+ async(...)" and "exit data async(...)" instead, or asynchronous data
+ regions (see also <https://gcc.gnu.org/PR97390>
+ "[OpenACC] 'async' clause on 'data' construct",
+ which is to share the same implementation).
+ For now, insert a (synchronous) wait at the end of the block. */
+ add_wait (loc, ®ion_body);
tree kernels_locals = gimple_bind_vars (as_a <gbind *> (kernels_body));
gimple *body = gimple_build_bind (kernels_locals, region_body,
@@ -3,7 +3,6 @@
/* Based on '../libgomp.oacc-fortran/asyncwait-1.f90'. */
/* { dg-additional-options "--param=openacc-kernels=decompose" } */
-/* { dg-xfail-run-if TODO { openacc_radeon_accel_selected } } */
/* { dg-additional-options "-fopt-info-all-omp" }
{ dg-additional-options "-foffload=-fopt-info-all-omp" } */
--
2.34.1