[08/13] Fix host-to-device copies from rodata for AMD GCN
diff mbox series

Message ID acd47fa134521477cdeb40df0045ba35f4bc8379.1573849744.git.julian@codesourcery.com
State New
Headers show
Series
  • AMD GCN worker partitioning support
Related show

Commit Message

Julian Brown Nov. 15, 2019, 9:44 p.m. UTC
It appears that the hsa_memory_copy API routine
has problems copying from read-only data: in the
libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-8.c test, a "const"
variable cannot be successfully copied to the target. I think the problem
is with read-only page mappings (in the HSA runtime). Luckily (?), if
the copy fails, the API call fails with a HSA_STATUS_ERROR return code,
which we can detect and use to trigger a workaround.

I've also added return-code checks to several other uses of the
hsa_memory_copy API routine, in an attempt to avoid silent runtime
failures. (A previous fix for a similar problem, removed before upstream
submission, does not appear to apply to this exact situation.)

OK?

Julian

ChangeLog

	libgomp/
	* plugin/plugin-gcn.c (hsa_memory_copy_wrapper): New.
	(copy_data, GOMP_OFFLOAD_host2dev): Use above function.
	(GOMP_OFFLOAD_dev2host, GOMP_OFFLOAD_dev2dev): Check hsa_memory_copy
	return code.
---
 libgomp/plugin/plugin-gcn.c | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

Comments

Andrew Stubbs Nov. 18, 2019, 11 a.m. UTC | #1
On 15/11/2019 21:44, Julian Brown wrote:
> +static void
> +hsa_memory_copy_wrapper (void *dst, const void *src, size_t len)
> +{
> +  hsa_status_t status = hsa_fns.hsa_memory_copy_fn (dst, src, len);
> +
> +  if (status == HSA_STATUS_SUCCESS)
> +    return;
> +
> +  /* It appears that the copy fails if the source data is in a read-only page.
> +     We can't detect that easily, so try copying the data to a temporary buffer
> +     and doing the copy again if we got an error above.  */
> +
> +  void *src_copy = malloc (len);
> +  memcpy (src_copy, src, len);
> +  status = hsa_fns.hsa_memory_copy_fn (dst, (const void *) src_copy, len);
> +  free (src_copy);
> +  if (status != HSA_STATUS_SUCCESS)
> +    GOMP_PLUGIN_error ("memory copy failed");
> +}

I'd like a GCN_DEBUG (or GCN_WARNING?) in the fallback case, so that we 
can see if it does that very often. If so I'd think we should report it 
as a bug to ROCm.

Otherwise the patch is fine by me.

Andrew

Patch
diff mbox series

diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 7b95a4cef8f..eb016e3fcd6 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -2940,6 +2940,29 @@  maybe_init_omp_async (struct agent_info *agent)
       = GOMP_OFFLOAD_openacc_async_construct (agent->device_id);
 }
 
+/* A wrapper that works around an issue in the HSA runtime with host-to-device
+   copies from read-only pages.  */
+
+static void
+hsa_memory_copy_wrapper (void *dst, const void *src, size_t len)
+{
+  hsa_status_t status = hsa_fns.hsa_memory_copy_fn (dst, src, len);
+
+  if (status == HSA_STATUS_SUCCESS)
+    return;
+
+  /* It appears that the copy fails if the source data is in a read-only page.
+     We can't detect that easily, so try copying the data to a temporary buffer
+     and doing the copy again if we got an error above.  */
+
+  void *src_copy = malloc (len);
+  memcpy (src_copy, src, len);
+  status = hsa_fns.hsa_memory_copy_fn (dst, (const void *) src_copy, len);
+  free (src_copy);
+  if (status != HSA_STATUS_SUCCESS)
+    GOMP_PLUGIN_error ("memory copy failed");
+}
+
 /* Copy data to or from a device.  This is intended for use as an async
    callback event.  */
 
@@ -2950,7 +2973,7 @@  copy_data (void *data_)
   GCN_DEBUG ("Async thread %d:%d: Copying %zu bytes from (%p) to (%p)\n",
 	     data->aq->agent->device_id, data->aq->id, data->len, data->src,
 	     data->dst);
-  hsa_fns.hsa_memory_copy_fn (data->dst, data->src, data->len);
+  hsa_memory_copy_wrapper (data->dst, data->src, data->len);
   if (data->free_src)
     free ((void *) data->src);
   free (data);
@@ -3643,7 +3666,9 @@  GOMP_OFFLOAD_dev2host (int device, void *dst, const void *src, size_t n)
 {
   GCN_DEBUG ("Copying %zu bytes from device %d (%p) to host (%p)\n", n, device,
 	     src, dst);
-  hsa_fns.hsa_memory_copy_fn (dst, src, n);
+  hsa_status_t status = hsa_fns.hsa_memory_copy_fn (dst, src, n);
+  if (status != HSA_STATUS_SUCCESS)
+    GOMP_PLUGIN_error ("memory copy failed");
   return true;
 }
 
@@ -3654,7 +3679,7 @@  GOMP_OFFLOAD_host2dev (int device, void *dst, const void *src, size_t n)
 {
   GCN_DEBUG ("Copying %zu bytes from host (%p) to device %d (%p)\n", n, src,
 	     device, dst);
-  hsa_fns.hsa_memory_copy_fn (dst, src, n);
+  hsa_memory_copy_wrapper (dst, src, n);
   return true;
 }
 
@@ -3675,7 +3700,9 @@  GOMP_OFFLOAD_dev2dev (int device, void *dst, const void *src, size_t n)
 
   GCN_DEBUG ("Copying %zu bytes from device %d (%p) to device %d (%p)\n", n,
 	     device, src, device, dst);
-  hsa_fns.hsa_memory_copy_fn (dst, src, n);
+  hsa_status_t status = hsa_fns.hsa_memory_copy_fn (dst, src, n);
+  if (status != HSA_STATUS_SUCCESS)
+    GOMP_PLUGIN_error ("memory copy failed");
   return true;
 }