@@ -1127,9 +1127,9 @@ failure:
return false;
}
-/* Part of the libgomp plugin interface. Run a kernel on a device N and pass
- the it an array of pointers in VARS as a parameter. The kernel is
- identified by FN_PTR which must point to a kernel_info structure. */
+/* Part of the libgomp plugin interface. Run a kernel on device N and pass it
+ an array of pointers in VARS as a parameter. The kernel is identified by
+ FN_PTR which must point to a kernel_info structure. */
void
GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void** args)
@@ -1237,13 +1237,62 @@ GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void** args)
GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
}
+/* Information to be passed to a thread running a kernel asycnronously. */
+
+struct async_run_info
+{
+ int device;
+ void *tgt_fn;
+ void *tgt_vars;
+ void **args;
+ void *async_data;
+};
+
+/* Thread routine to run a kernel asynchronously. */
+
+static void *
+run_kernel_asynchronously (void *thread_arg)
+{
+ struct async_run_info *info = (struct async_run_info *) thread_arg;
+ int device = info->device;
+ void *tgt_fn = info->tgt_fn;
+ void *tgt_vars = info->tgt_vars;
+ void **args = info->args;
+ void *async_data = info->async_data;
+
+ free (info);
+ GOMP_OFFLOAD_run (device, tgt_fn, tgt_vars, args);
+ GOMP_PLUGIN_target_task_completion (async_data);
+ return NULL;
+}
+
+/* Part of the libgomp plugin interface. Run a kernel like GOMP_OFFLOAD_run
+ does, but asynchronously and call GOMP_PLUGIN_target_task_completion when it
+ has finished. */
+
void
GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
void **args, void *async_data)
{
- /* FIXME: Implement. */
- GOMP_PLUGIN_fatal ("Support for HSA does not yet implement asynchronous "
- "execution. ");
+ pthread_t pt;
+ struct async_run_info *info;
+ HSA_DEBUG ("GOMP_OFFLOAD_async_run invoked\n")
+ info = GOMP_PLUGIN_malloc (sizeof (struct async_run_info));
+
+ info->device = device;
+ info->tgt_fn = tgt_fn;
+ info->tgt_vars = tgt_vars;
+ info->args = args;
+ info->async_data = async_data;
+
+ int err = pthread_create (&pt, NULL, &run_kernel_asynchronously, info);
+ if (err != 0)
+ GOMP_PLUGIN_fatal ("HSA asynchronous thread creation failed: %s",
+ strerror (err));
+ err = pthread_detach (pt);
+ if (err != 0)
+ GOMP_PLUGIN_fatal ("Failed to detach a thread to run HRA kernel "
+ "asynchronously: %s", strerror (err));
}
/* Deinitialize all information associated with MODULE and kernels within
@@ -1527,7 +1527,8 @@ GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
struct gomp_device_descr *devicep = resolve_device (device);
if (devicep == NULL
- || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
return gomp_target_data_fallback ();
struct target_mem_desc *tgt
@@ -1557,7 +1558,8 @@ GOMP_target_update (int device, const void *unused, size_t mapnum,
struct gomp_device_descr *devicep = resolve_device (device);
if (devicep == NULL
- || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
return;
gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
@@ -1608,7 +1610,8 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
}
if (devicep == NULL
- || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
return;
struct gomp_thread *thr = gomp_thread ();
@@ -1730,7 +1733,8 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
}
if (devicep == NULL
- || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
return;
struct gomp_thread *thr = gomp_thread ();
@@ -1861,7 +1865,8 @@ omp_target_alloc (size_t size, int device_num)
if (devicep == NULL)
return NULL;
- if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
return malloc (size);
gomp_mutex_lock (&devicep->lock);
@@ -1889,7 +1894,8 @@ omp_target_free (void *device_ptr, int device_num)
if (devicep == NULL)
return;
- if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
{
free (device_ptr);
return;
@@ -1946,7 +1952,8 @@ omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
if (dst_devicep == NULL)
return EINVAL;
- if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
dst_devicep = NULL;
}
if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
@@ -1958,7 +1965,8 @@ omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
if (src_devicep == NULL)
return EINVAL;
- if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
src_devicep = NULL;
}
if (src_devicep == NULL && dst_devicep == NULL)
@@ -2088,7 +2096,8 @@ omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
if (dst_devicep == NULL)
return EINVAL;
- if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
dst_devicep = NULL;
}
if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
@@ -2100,7 +2109,8 @@ omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
if (src_devicep == NULL)
return EINVAL;
- if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
src_devicep = NULL;
}
@@ -652,6 +652,7 @@ gomp_create_target_task (struct gomp_device_descr *devicep,
ttask->devicep = devicep;
ttask->fn = fn;
ttask->mapnum = mapnum;
+ ttask->args = args;
memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));