diff mbox series

[committed,libgomp,nvptx] Fall back to cuLinkAddData/cuLinkCreate if _v2 not found

Message ID 20180808143635.GA22569@delia
State New
Headers show
Series [committed,libgomp,nvptx] Fall back to cuLinkAddData/cuLinkCreate if _v2 not found | expand

Commit Message

Tom de Vries Aug. 8, 2018, 2:36 p.m. UTC
On Tue, Aug 07, 2018 at 06:52:59AM -0700, Cesar Philippidis wrote:

> I spotted an error
> with the patch; I realized that the cuda.h that ships with libgomp
> emulates version CUDA 8.0. That lead to problems using cuLinkAddData,
> because that function gets remapped to cuLinkAddData_v2 in CUDA 6.5 and
> newer.
> 

Right. [ I found that problem is mentioned here already:
( https://gcc.gnu.org/ml/gcc-patches/2017-01/msg01670.html ). ]

This patch should fix it.

Committed.

Thanks,
- Tom

[libgomp, nvptx] Fall back to cuLinkAddData/cuLinkCreate if _v2 not found

Cuda driver api functions cuLinkAddData and cuLinkCreate are available starting
version 5.5.  In version 6.5, they are remapped onto _v2 versions.

The dlopen interface of the libgomp nvptx plugin uses the _v2 versions, so it
won't work with a cuda driver with driver api version lower than 6.5.

This patch fixes the problem by testing for the presence of the _v2 versions,
and falling back to the original versions in case of absence of the _v2
versions.

Build on x86_64 with nvptx accelerator and reg-tested libgomp, both with and
without --without-cuda-driver.

2018-08-08  Tom de Vries  <tdevries@suse.de>

	* plugin/cuda-lib.def (cuLinkAddData_v2, cuLinkCreate_v2): Declare using
	CUDA_ONE_CALL_MAYBE_NULL.
	* plugin/plugin-nvptx.c (cuLinkAddData, cuLinkCreate): Undef and declare.
	(cuLinkAddData_v2, cuLinkCreate_v2): Declare.
	(link_ptx): Fall back to cuLinkAddData/cuLinkCreate if the _v2 versions
	are not found.

---
 libgomp/plugin/cuda-lib.def   |  2 ++
 libgomp/plugin/plugin-nvptx.c | 28 ++++++++++++++++++++++++----
 2 files changed, 26 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/libgomp/plugin/cuda-lib.def b/libgomp/plugin/cuda-lib.def
index 6365cdbfcbe4..29028b504a05 100644
--- a/libgomp/plugin/cuda-lib.def
+++ b/libgomp/plugin/cuda-lib.def
@@ -19,8 +19,10 @@  CUDA_ONE_CALL_MAYBE_NULL (cuGetErrorString)
 CUDA_ONE_CALL (cuInit)
 CUDA_ONE_CALL (cuLaunchKernel)
 CUDA_ONE_CALL (cuLinkAddData)
+CUDA_ONE_CALL_MAYBE_NULL (cuLinkAddData_v2)
 CUDA_ONE_CALL (cuLinkComplete)
 CUDA_ONE_CALL (cuLinkCreate)
+CUDA_ONE_CALL_MAYBE_NULL (cuLinkCreate_v2)
 CUDA_ONE_CALL (cuLinkDestroy)
 CUDA_ONE_CALL (cuMemAlloc)
 CUDA_ONE_CALL (cuMemAllocHost)
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index b549b7740039..6799a264976d 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -54,6 +54,18 @@  extern CUresult cuGetErrorString (CUresult, const char **);
 #define CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR 82
 #endif
 
+#if CUDA_VERSION >= 6050
+#undef cuLinkCreate
+#undef cuLinkAddData
+CUresult cuLinkAddData (CUlinkState, CUjitInputType, void *, size_t,
+			const char *, unsigned, CUjit_option *, void **);
+CUresult cuLinkCreate (unsigned, CUjit_option *, void **, CUlinkState *);
+#else
+CUresult cuLinkAddData_v2 (CUlinkState, CUjitInputType, void *, size_t,
+			   const char *, unsigned, CUjit_option *, void **);
+CUresult cuLinkCreate_v2 (unsigned, CUjit_option *, void **, CUlinkState *);
+#endif
+
 #define DO_PRAGMA(x) _Pragma (#x)
 
 #if PLUGIN_NVPTX_DYNAMIC
@@ -938,16 +950,24 @@  link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
       nopts++;
     }
 
-  CUDA_CALL (cuLinkCreate, nopts, opts, optvals, &linkstate);
+  if (CUDA_CALL_EXISTS (cuLinkCreate_v2))
+    CUDA_CALL (cuLinkCreate_v2, nopts, opts, optvals, &linkstate);
+  else
+    CUDA_CALL (cuLinkCreate, nopts, opts, optvals, &linkstate);
 
   for (; num_objs--; ptx_objs++)
     {
       /* cuLinkAddData's 'data' argument erroneously omits the const
 	 qualifier.  */
       GOMP_PLUGIN_debug (0, "Loading:\n---\n%s\n---\n", ptx_objs->code);
-      r = CUDA_CALL_NOCHECK (cuLinkAddData, linkstate, CU_JIT_INPUT_PTX,
-			     (char *) ptx_objs->code, ptx_objs->size,
-			     0, 0, 0, 0);
+      if (CUDA_CALL_EXISTS (cuLinkAddData_v2))
+	r = CUDA_CALL_NOCHECK (cuLinkAddData_v2, linkstate, CU_JIT_INPUT_PTX,
+			       (char *) ptx_objs->code, ptx_objs->size,
+			       0, 0, 0, 0);
+      else
+	r = CUDA_CALL_NOCHECK (cuLinkAddData, linkstate, CU_JIT_INPUT_PTX,
+			       (char *) ptx_objs->code, ptx_objs->size,
+			       0, 0, 0, 0);
       if (r != CUDA_SUCCESS)
 	{
 	  GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);