@@ -978,8 +978,8 @@ event_add (enum ptx_event_type type, CUe
void
nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
- size_t *sizes, unsigned short *kinds, int num_gangs, int num_workers,
- int vector_length, int async, void *targ_mem_desc)
+ size_t *sizes, unsigned short *kinds, int num_gangs,
+ int num_workers, int vector_length, int async, void *targ_mem_desc)
{
struct targ_fn_descriptor *targ_fn = (struct targ_fn_descriptor *) fn;
CUfunction function;
@@ -1137,7 +1137,6 @@ nvptx_host2dev (void *d, const void *h,
CUresult r;
CUdeviceptr pb;
size_t ps;
- struct nvptx_thread *nvthd = nvptx_thread ();
if (!s)
return 0;
@@ -1162,7 +1161,8 @@ nvptx_host2dev (void *d, const void *h,
GOMP_PLUGIN_fatal ("invalid size");
#ifndef DISABLE_ASYNC
- if (nvthd->current_stream != nvthd->ptx_dev->null_stream)
+ struct nvptx_thread *nvthd = nvptx_thread ();
+ if (nvthd && nvthd->current_stream != nvthd->ptx_dev->null_stream)
{
CUevent *e;
@@ -1202,7 +1202,6 @@ nvptx_dev2host (void *h, const void *d,
CUresult r;
CUdeviceptr pb;
size_t ps;
- struct nvptx_thread *nvthd = nvptx_thread ();
if (!s)
return 0;
@@ -1227,7 +1226,8 @@ nvptx_dev2host (void *h, const void *d,
GOMP_PLUGIN_fatal ("invalid size");
#ifndef DISABLE_ASYNC
- if (nvthd->current_stream != nvthd->ptx_dev->null_stream)
+ struct nvptx_thread *nvthd = nvptx_thread ();
+ if (nvthd && nvthd->current_stream != nvthd->ptx_dev->null_stream)
{
CUevent *e;
@@ -1559,7 +1559,8 @@ GOMP_OFFLOAD_get_name (void)
unsigned int
GOMP_OFFLOAD_get_caps (void)
{
- return GOMP_OFFLOAD_CAP_OPENACC_200;
+ return GOMP_OFFLOAD_CAP_OPENACC_200
+ | GOMP_OFFLOAD_CAP_OPENMP_400;
}
int
@@ -1759,7 +1760,7 @@ GOMP_OFFLOAD_openacc_parallel (void (*fn
void *targ_mem_desc)
{
nvptx_exec (fn, mapnum, hostaddrs, devaddrs, sizes, kinds, num_gangs,
- num_workers, vector_length, async, targ_mem_desc);
+ num_workers, vector_length, async, targ_mem_desc);
}
void
@@ -1889,3 +1890,27 @@ GOMP_OFFLOAD_openacc_set_cuda_stream (in
{
return nvptx_set_cuda_stream (async, stream);
}
+
+void
+GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars)
+{
+ CUfunction function = ((struct targ_fn_descriptor *) tgt_fn)->fn;
+ CUresult r;
+ struct ptx_device *ptx_dev = ptx_devices[ord];
+ const char *maybe_abort_msg = "(perhaps abort was called)";
+ void *args = &tgt_vars;
+
+ r = cuLaunchKernel (function,
+ 1, 1, 1,
+ 1, 1, 1,
+ 0, ptx_dev->null_stream->stream, &args, 0);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuLaunchKernel error: %s", cuda_error (r));
+
+ r = cuCtxSynchronize ();
+ if (r == CUDA_ERROR_LAUNCH_FAILED)
+ GOMP_PLUGIN_fatal ("cuCtxSynchronize error: %s %s\n", cuda_error (r),
+ maybe_abort_msg);
+ else if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuCtxSynchronize error: %s", cuda_error (r));
+}