diff mbox

Allow building GCC with PTX offloading even without CUDA being installed (gcc and nvptx-tools patches)

Message ID 87ziikwjp8.fsf@hertz.schwinge.homeip.net
State New
Headers show

Commit Message

Thomas Schwinge Jan. 21, 2017, 2:50 p.m. UTC
Hi!

On Fri, 13 Jan 2017 19:11:23 +0100, Jakub Jelinek <jakub@redhat.com> wrote:
> This is something that has been discussed already during the last Cauldron.
> Especially for distributions it is undesirable to need to have proprietary
> CUDA libraries and headers installed when building GCC.

ACK.

> These two patches allow building GCC without CUDA around in a way that later
> on can offload to PTX if libcuda.so.1 is installed

Thanks!

I'd like to have some additional changes done; see the attached patch,
and also some further comments below.

> In order to configure gcc to load libcuda.so.1 dynamically,
> one has to either configure it --without-cuda-driver, or without
> --with-cuda-driver=/--with-cuda-driver-lib=/--with-cuda-driver-include=
> options if cuda.h and -lcuda aren't found in the default locations.

Would be good to have that documented ;-) -- done.

> The nvptx-tools change

(I'll get to that later.)

> --- libgomp/plugin/configfrag.ac.jj	2017-01-13 12:07:56.000000000 +0100
> +++ libgomp/plugin/configfrag.ac	2017-01-13 17:33:26.608240936 +0100
> @@ -58,10 +58,12 @@ AC_ARG_WITH(cuda-driver-include,
>  AC_ARG_WITH(cuda-driver-lib,
>  	[AS_HELP_STRING([--with-cuda-driver-lib=PATH],
>  		[specify directory for the installed CUDA driver library])])
> -if test "x$with_cuda_driver" != x; then
> -  CUDA_DRIVER_INCLUDE=$with_cuda_driver/include
> -  CUDA_DRIVER_LIB=$with_cuda_driver/lib
> -fi
> +case "x$with_cuda_driver" in
> +  x | xno) ;;
> +  *) CUDA_DRIVER_INCLUDE=$with_cuda_driver/include
> +     CUDA_DRIVER_LIB=$with_cuda_driver/lib
> +     ;;
> +esac

I (obviously) agree with your intended (?) "--without-cuda-driver"
semantics, but I think a "--with-cuda-driver" option should actually mean
that the system's/installed CUDA driver package *must* be used (and
similar for other "--with-cuda-driver*" options); and I also added
"--with-cuda-driver=check" to allow overriding earlier such options (that
is, restore the default "check" behavior).

I say 'intended (?) "--without-cuda-driver" semantics', because with your
current patch/code, if I got that right, if one specifies
"--without-cuda-driver" but actually does have a CUDA driver system
installation available, then the nvptx libgomp plugin will still link
against that one, instead of "dlopen"ing it.  So I changed that
accordingly.

> +PLUGIN_NVPTX_DYNAMIC=0

I find the name "PLUGIN_NVPTX_DYNAMIC" a bit misleading, as this isn't
about the nvptx plugin being "dynamic" but rather it's about its usage of
the CUDA driver library.  Thus renamed to "CUDA_DRIVER_DYNAMIC".

> @@ -167,9 +170,17 @@ if test x"$enable_offload_targets" != x;
>  	LIBS=$PLUGIN_NVPTX_save_LIBS
>  	case $PLUGIN_NVPTX in
>  	  nvptx*)
> -	    PLUGIN_NVPTX=0
> -	    AC_MSG_ERROR([CUDA driver package required for nvptx support])
> -	    ;;
> +	    if test "x$CUDA_DRIVER_INCLUDE" = x \
> +	       && test "x$CUDA_DRIVER_LIB" = x; then
> +	      PLUGIN_NVPTX=1
> +	      PLUGIN_NVPTX_CPPFLAGS='-I$(srcdir)/plugin/cuda'
> +	      PLUGIN_NVPTX_LIBS='-ldl'
> +	      PLUGIN_NVPTX_DYNAMIC=1
> +	    else
> +	      PLUGIN_NVPTX=0
> +	      AC_MSG_ERROR([CUDA driver package required for nvptx support])
> +	    fi
> +	  ;;
>  	esac

I reworked that logic to accommodate for the additional
"--with-cuda-driver" usage.

> --- libgomp/plugin/plugin-nvptx.c.jj	2017-01-13 12:07:56.000000000 +0100
> +++ libgomp/plugin/plugin-nvptx.c	2017-01-13 18:00:39.693284346 +0100

> +/* -1 if init_cuda_lib has not been called yet, false
> +   if it has been and failed, true if it has been and succeeded.  */
> +static char cuda_lib_inited = -1;

Don't we actually have to worry here about multiple threads running into
this in parallel -- thus need locking (or atomic accesses?) when
accessing "cuda_lib_inited"?

> +/* Dynamically load the CUDA runtime library and initialize function

Not "CUDA runtime" but actually "CUDA driver" -- changed.

> +   pointers, return false if unsuccessful, true if successful.  */
> +static bool
> +init_cuda_lib (void)
> +{
> +  if (cuda_lib_inited != -1)
> +    return cuda_lib_inited;
> +  const char *cuda_runtime_lib = "libcuda.so.1";
> +  void *h = dlopen (cuda_runtime_lib, RTLD_LAZY);
> +  cuda_lib_inited = false;
> +  if (h == NULL)
> +    return false;

I'd like some GOMP_PLUGIN_debug output for this and the following "return
false" cases -- added.

> +# undef CUDA_ONE_CALL
> +# define CUDA_ONE_CALL(call) CUDA_ONE_CALL_1 (call)
> +# define CUDA_ONE_CALL_1(call) \
> +  cuda_lib.call = dlsym (h, #call);	\
> +  if (cuda_lib.call == NULL)		\
> +    return false;
> +  CUDA_CALLS
> +  cuda_lib_inited = true;
> +  return true;
>  }

> --- libgomp/plugin/cuda/cuda.h.jj	2017-01-13 15:58:00.966544147 +0100
> +++ libgomp/plugin/cuda/cuda.h	2017-01-13 17:02:47.355817896 +0100

> +#define CUDA_VERSION 8000

Does that make it compatible to CUDA 8.0 (and later) only?  (Not yet
checked.)

(Have not reviewed this new file any further.)


Currently testing the following patch; OK for trunk?

commit 4ef19c27a9567df03f82282b8ae6608c5d88472d
Author: Thomas Schwinge <thomas@codesourcery.com>
Date:   Sat Jan 21 15:25:44 2017 +0100

    libgomp: Additional "--with-cuda-driver" changes
    
            gcc/
            * doc/install.texi: Document "--with-cuda-driver" and related
            options.
            libgomp/
            * plugin/plugin-nvptx.c (init_cuda_lib): Add GOMP_PLUGIN_debug
            calls.
            * plugin/configfrag.ac: Document "--with-cuda-driver" and related
            options.  Handle "--with-cuda-driver", "--with-cuda-driver=check",
            and "--without-cuda-driver" options.
            (PLUGIN_NVPTX_DYNAMIC): Rename to...
            (CUDA_DRIVER_DYNAMIC): ... this.  Adjust all users.
            * config.h.in: Regenerate.
            * configure: Likewise.
---
 gcc/doc/install.texi          |  23 +++++++
 libgomp/config.h.in           |   8 +--
 libgomp/configure             | 146 ++++++++++++++++++++++++++++++------------
 libgomp/plugin/configfrag.ac  | 139 +++++++++++++++++++++++++++-------------
 libgomp/plugin/plugin-nvptx.c |  32 +++++----
 5 files changed, 248 insertions(+), 100 deletions(-)



Grüße
 Thomas

Comments

Jakub Jelinek Jan. 21, 2017, 3:28 p.m. UTC | #1
On Sat, Jan 21, 2017 at 03:50:43PM +0100, Thomas Schwinge wrote:
> > In order to configure gcc to load libcuda.so.1 dynamically,
> > one has to either configure it --without-cuda-driver, or without
> > --with-cuda-driver=/--with-cuda-driver-lib=/--with-cuda-driver-include=
> > options if cuda.h and -lcuda aren't found in the default locations.
> 
> Would be good to have that documented ;-) -- done.

Thanks.

> I (obviously) agree with your intended (?) "--without-cuda-driver"
> semantics, but I think a "--with-cuda-driver" option should actually mean
> that the system's/installed CUDA driver package *must* be used (and
> similar for other "--with-cuda-driver*" options); and I also added
> "--with-cuda-driver=check" to allow overriding earlier such options (that
> is, restore the default "check" behavior).
> 
> I say 'intended (?) "--without-cuda-driver" semantics', because with your
> current patch/code, if I got that right, if one specifies
> "--without-cuda-driver" but actually does have a CUDA driver system
> installation available, then the nvptx libgomp plugin will still link
> against that one, instead of "dlopen"ing it.  So I changed that
> accordingly.

Agreed.

> > +PLUGIN_NVPTX_DYNAMIC=0
> 
> I find the name "PLUGIN_NVPTX_DYNAMIC" a bit misleading, as this isn't
> about the nvptx plugin being "dynamic" but rather it's about its usage of
> the CUDA driver library.  Thus renamed to "CUDA_DRIVER_DYNAMIC".

Ack.

> > --- libgomp/plugin/plugin-nvptx.c.jj	2017-01-13 12:07:56.000000000 +0100
> > +++ libgomp/plugin/plugin-nvptx.c	2017-01-13 18:00:39.693284346 +0100
> 
> > +/* -1 if init_cuda_lib has not been called yet, false
> > +   if it has been and failed, true if it has been and succeeded.  */
> > +static char cuda_lib_inited = -1;
> 
> Don't we actually have to worry here about multiple threads running into
> this in parallel -- thus need locking (or atomic accesses?) when
> accessing "cuda_lib_inited"?

I thought it is only accessed when a lock is held, but I could be wrong.
Also, please se my question about why we ever call cuInit in nvptx_init
(whether nvptx_get_num_devices doesn't have to be called first).

> > +/* Dynamically load the CUDA runtime library and initialize function
> 
> Not "CUDA runtime" but actually "CUDA driver" -- changed.

Ok.

> I'd like some GOMP_PLUGIN_debug output for this and the following "return
> false" cases -- added.

Ok.

> > --- libgomp/plugin/cuda/cuda.h.jj	2017-01-13 15:58:00.966544147 +0100
> > +++ libgomp/plugin/cuda/cuda.h	2017-01-13 17:02:47.355817896 +0100
> 
> > +#define CUDA_VERSION 8000
> 
> Does that make it compatible to CUDA 8.0 (and later) only?  (Not yet
> checked.)

The only reason for that is
#if CUDA_VERSION < 7000
  /* Specified in documentation and present in library from at least
     5.5.  Not declared in header file prior to 7.0.  */
  extern CUresult cuGetErrorString (CUresult, const char **);
#endif
I wanted to make it clear that cuGetErrorString prototype is provided.

I must say I don't know enough about ABI and API incompatibilities between
different CUDA versions, I presume functions with defines like:
#define cuLinkCreate cuLinkCreate_v2
at some point weren't using the _v2 suffixes, but have no idea if they had
different arguments or what.  Perhaps that would be supportable by having
some fallback if for those dlsym fails or something.

> @@ -48,26 +49,44 @@ AC_SUBST(CUDA_DRIVER_LIB)
>  CUDA_DRIVER_CPPFLAGS=
>  CUDA_DRIVER_LDFLAGS=
>  AC_ARG_WITH(cuda-driver,
> +	[AS_HELP_STRING([--without-cuda-driver],
> +		[do not use the system's CUDA driver package])])
> +AC_ARG_WITH(cuda-driver,
> +	[AS_HELP_STRING([--with-cuda-driver=check],
> +		[use the system's CUDA driver package, if usable [default]])])
> +AC_ARG_WITH(cuda-driver,
> +	[AS_HELP_STRING([--with-cuda-driver],
> +		[use the system's CUDA driver package])])
> +AC_ARG_WITH(cuda-driver,
>  	[AS_HELP_STRING([--with-cuda-driver=PATH],
> -		[specify prefix directory for installed CUDA driver package.
> -		 Equivalent to --with-cuda-driver-include=PATH/include
> -		 plus --with-cuda-driver-lib=PATH/lib])])
> +		[use installed CUDA driver package, and specify prefix
> +		directory.  Equivalent to
> +		--with-cuda-driver-include=PATH/include plus
> +		--with-cuda-driver-lib=PATH/lib])],
> +	[],
> +	[with_cuda_driver=check])

I admit my autoconf knowledge is limited, but it looks certainly strange
to have several AC_ARG_WITH for the same option.  Shouldn't we use
one AC_ARG_WITH(cuda-driver,
with multiple AS_HELP_STRING inside of its second argument?

>  AC_ARG_WITH(cuda-driver-include,
>  	[AS_HELP_STRING([--with-cuda-driver-include=PATH],
> -		[specify directory for installed CUDA driver include files])])
> +		[use installed CUDA driver package, and specify directory for
> +		include files])])
>  AC_ARG_WITH(cuda-driver-lib,
>  	[AS_HELP_STRING([--with-cuda-driver-lib=PATH],
> -		[specify directory for the installed CUDA driver library])])
> +		[use installed CUDA driver package, and specify directory for
> +		libraries])])
>  case "x$with_cuda_driver" in
> -  x | xno) ;;
> -  *) CUDA_DRIVER_INCLUDE=$with_cuda_driver/include
> -     CUDA_DRIVER_LIB=$with_cuda_driver/lib
> -     ;;
> +  xcheck | xno | xyes)
> +    ;;
> +  *)
> +    CUDA_DRIVER_INCLUDE=$with_cuda_driver/include
> +    CUDA_DRIVER_LIB=$with_cuda_driver/lib
> +    ;;
>  esac
>  if test "x$with_cuda_driver_include" != x; then
> +  CUDA_DRIVER_DYNAMIC=0
>    CUDA_DRIVER_INCLUDE=$with_cuda_driver_include
>  fi
>  if test "x$with_cuda_driver_lib" != x; then
> +  CUDA_DRIVER_DYNAMIC=0
>    CUDA_DRIVER_LIB=$with_cuda_driver_lib
>  fi
>  if test "x$CUDA_DRIVER_INCLUDE" != x; then
> @@ -76,12 +95,22 @@ fi
>  if test "x$CUDA_DRIVER_LIB" != x; then
>    CUDA_DRIVER_LDFLAGS=-L$CUDA_DRIVER_LIB
>  fi
> +case "x$with_cuda_driver" in
> +  xcheck)
> +    CUDA_DRIVER_DYNAMIC=check
> +    ;;
> +  xno)
> +    CUDA_DRIVER_DYNAMIC=1
> +    ;;
> +  xyes | *)
> +    CUDA_DRIVER_DYNAMIC=0
> +    ;;
> +esac

Why two separate case constructs?  Can't you do what you do in the second
in the second instead of that
> +  xcheck | xno | xyes)
> +    ;;
and just add CUDA_DRIVER_DYNAMIC=0 also to the *) case?
> +	case $CUDA_DRIVER_DYNAMIC in
> +	  1)
> +	    PLUGIN_NVPTX=1
> +	    ;;
> +	  check | 0)

Wouldn't it be far simpler to just use
	PLUGIN_NVPTX=1
	if test $CUDA_DRIVER_DYNAMIC != 1; then

> +	    # Determine whether the system's CUDA driver package is usable.
> +	    PLUGIN_NVPTX=0
> +
> +	    # Tentatively point to the system's CUDA driver package.
> +	    PLUGIN_NVPTX_CPPFLAGS=$CUDA_DRIVER_CPPFLAGS
> +	    PLUGIN_NVPTX_LDFLAGS=$CUDA_DRIVER_LDFLAGS
> +	    PLUGIN_NVPTX_LIBS=-lcuda
> +
> +	    PLUGIN_NVPTX_save_CPPFLAGS=$CPPFLAGS
> +	    CPPFLAGS="$PLUGIN_NVPTX_CPPFLAGS $CPPFLAGS"
> +	    PLUGIN_NVPTX_save_LDFLAGS=$LDFLAGS
> +	    LDFLAGS="$PLUGIN_NVPTX_LDFLAGS $LDFLAGS"
> +	    PLUGIN_NVPTX_save_LIBS=$LIBS
> +	    LIBS="$PLUGIN_NVPTX_LIBS $LIBS"
> +	    AC_LINK_IFELSE(
> +	      [AC_LANG_PROGRAM(
> +		[#include "cuda.h"],
> +		  [CUresult r = cuCtxPushCurrent (NULL);])],
> +	      [PLUGIN_NVPTX=1])
> +	    CPPFLAGS=$PLUGIN_NVPTX_save_CPPFLAGS
> +	    LDFLAGS=$PLUGIN_NVPTX_save_LDFLAGS
> +	    LIBS=$PLUGIN_NVPTX_save_LIBS

	fi

> +	    ;;
> +	  *)
> +	    AC_MSG_ERROR([internal error])
> +	    ;;
> +	esac

and drop the above?

> +	case $CUDA_DRIVER_DYNAMIC:$PLUGIN_NVPTX in
> +	  check:0)
> +	    CUDA_DRIVER_DYNAMIC=1
> +	    PLUGIN_NVPTX=1
> +	    ;;
> +	  check:1)
> +	    CUDA_DRIVER_DYNAMIC=0
> +	    ;;
> +	  0:1 | 1:1)
> +	    ;;
> +	  0:0)
> +	    AC_MSG_ERROR([CUDA driver package not usable])
> +	    ;;
> +	  *)
> +	    AC_MSG_ERROR([internal error])
> +	    ;;
>  	esac

This is fine.

> +	if test $CUDA_DRIVER_DYNAMIC = 1; then

Here you use very similar test rather than case (just an argument
why IMHO case is unnecessary).  But not a big deal for me.

> +  const char *cuda_driver_lib = "libcuda.so.1";
> +  void *h = dlopen (cuda_driver_lib, RTLD_LAZY);

Note the HSAIL plugin uses secure_getenv and allows an env var to override
the location of the runtime library.  Dunno if we don't want to do that too,
but in any case, it can be done incrementally.

Otherwise LGTM (and thanks for testing it and patch).

	Jakub
diff mbox

Patch

diff --git gcc/doc/install.texi gcc/doc/install.texi
index cccf812..769bdc5 100644
--- gcc/doc/install.texi
+++ gcc/doc/install.texi
@@ -2061,6 +2061,29 @@  If @samp{hsa} is specified as one of the targets, the compiler will be
 built with support for HSA GPU accelerators.  Because the same
 compiler will emit the accelerator code, no path should be specified.
 
+@item --without-cuda-driver
+@itemx --with-cuda-driver=check
+@itemx --with-cuda-driver
+@itemx --with-cuda-driver=@var{pathname}
+@itemx --with-cuda-driver-include=@var{pathname}
+@itemx --with-cuda-driver-lib=@var{pathname}
+
+If you configure GCC for nvptx offloading, @code{libgomp}'s nvptx
+plugin requires to use the CUDA driver package.  The default is to
+link against the system's installation, if usable.  If that is not
+available, or if @option{--without-cuda-driver} has been specified,
+the plugin will instead @code{dlopen} the CUDA driver library at
+run-time.  With the exception of @option{--with-cuda-driver=check}
+which overrides any earlier options and restores the default behavior,
+all other usage of @option{--with-cuda-driver=@var{pathname}},
+@option{--with-cuda-driver-include=@var{pathname}}, or
+@option{--with-cuda-driver-lib=@var{pathname}} sets the include and
+library paths accordingly, and causes the build to stop if the CUDA
+driver package in these locations is not usable.  The
+@option{--with-cuda-driver=@var{pathname}} option is a shorthand for
+@option{--with-cuda-driver-lib=@var{pathname}/lib} and
+@option{--with-cuda-driver-include=@var{pathname}/include}.
+
 @item --with-hsa-runtime=@var{pathname}
 @itemx --with-hsa-runtime-include=@var{pathname}
 @itemx --with-hsa-runtime-lib=@var{pathname}
diff --git libgomp/plugin/configfrag.ac libgomp/plugin/configfrag.ac
index c4a9279..c18e118 100644
--- libgomp/plugin/configfrag.ac
+++ libgomp/plugin/configfrag.ac
@@ -41,6 +41,7 @@  AC_CHECK_FUNCS_ONCE(secure_getenv __secure_getenv getuid geteuid getgid getegid)
 
 
 # Look for the CUDA driver package.
+CUDA_DRIVER_DYNAMIC=invalid
 CUDA_DRIVER_INCLUDE=
 CUDA_DRIVER_LIB=
 AC_SUBST(CUDA_DRIVER_INCLUDE)
@@ -48,26 +49,44 @@  AC_SUBST(CUDA_DRIVER_LIB)
 CUDA_DRIVER_CPPFLAGS=
 CUDA_DRIVER_LDFLAGS=
 AC_ARG_WITH(cuda-driver,
+	[AS_HELP_STRING([--without-cuda-driver],
+		[do not use the system's CUDA driver package])])
+AC_ARG_WITH(cuda-driver,
+	[AS_HELP_STRING([--with-cuda-driver=check],
+		[use the system's CUDA driver package, if usable [default]])])
+AC_ARG_WITH(cuda-driver,
+	[AS_HELP_STRING([--with-cuda-driver],
+		[use the system's CUDA driver package])])
+AC_ARG_WITH(cuda-driver,
 	[AS_HELP_STRING([--with-cuda-driver=PATH],
-		[specify prefix directory for installed CUDA driver package.
-		 Equivalent to --with-cuda-driver-include=PATH/include
-		 plus --with-cuda-driver-lib=PATH/lib])])
+		[use installed CUDA driver package, and specify prefix
+		directory.  Equivalent to
+		--with-cuda-driver-include=PATH/include plus
+		--with-cuda-driver-lib=PATH/lib])],
+	[],
+	[with_cuda_driver=check])
 AC_ARG_WITH(cuda-driver-include,
 	[AS_HELP_STRING([--with-cuda-driver-include=PATH],
-		[specify directory for installed CUDA driver include files])])
+		[use installed CUDA driver package, and specify directory for
+		include files])])
 AC_ARG_WITH(cuda-driver-lib,
 	[AS_HELP_STRING([--with-cuda-driver-lib=PATH],
-		[specify directory for the installed CUDA driver library])])
+		[use installed CUDA driver package, and specify directory for
+		libraries])])
 case "x$with_cuda_driver" in
-  x | xno) ;;
-  *) CUDA_DRIVER_INCLUDE=$with_cuda_driver/include
-     CUDA_DRIVER_LIB=$with_cuda_driver/lib
-     ;;
+  xcheck | xno | xyes)
+    ;;
+  *)
+    CUDA_DRIVER_INCLUDE=$with_cuda_driver/include
+    CUDA_DRIVER_LIB=$with_cuda_driver/lib
+    ;;
 esac
 if test "x$with_cuda_driver_include" != x; then
+  CUDA_DRIVER_DYNAMIC=0
   CUDA_DRIVER_INCLUDE=$with_cuda_driver_include
 fi
 if test "x$with_cuda_driver_lib" != x; then
+  CUDA_DRIVER_DYNAMIC=0
   CUDA_DRIVER_LIB=$with_cuda_driver_lib
 fi
 if test "x$CUDA_DRIVER_INCLUDE" != x; then
@@ -76,12 +95,22 @@  fi
 if test "x$CUDA_DRIVER_LIB" != x; then
   CUDA_DRIVER_LDFLAGS=-L$CUDA_DRIVER_LIB
 fi
+case "x$with_cuda_driver" in
+  xcheck)
+    CUDA_DRIVER_DYNAMIC=check
+    ;;
+  xno)
+    CUDA_DRIVER_DYNAMIC=1
+    ;;
+  xyes | *)
+    CUDA_DRIVER_DYNAMIC=0
+    ;;
+esac
 
 PLUGIN_NVPTX=0
 PLUGIN_NVPTX_CPPFLAGS=
 PLUGIN_NVPTX_LDFLAGS=
 PLUGIN_NVPTX_LIBS=
-PLUGIN_NVPTX_DYNAMIC=0
 AC_SUBST(PLUGIN_NVPTX)
 AC_SUBST(PLUGIN_NVPTX_CPPFLAGS)
 AC_SUBST(PLUGIN_NVPTX_LDFLAGS)
@@ -149,39 +178,63 @@  if test x"$enable_offload_targets" != x; then
 	;;
       nvptx*)
         tgt_name=nvptx
-	PLUGIN_NVPTX=$tgt
-	PLUGIN_NVPTX_CPPFLAGS=$CUDA_DRIVER_CPPFLAGS
-	PLUGIN_NVPTX_LDFLAGS=$CUDA_DRIVER_LDFLAGS
-	PLUGIN_NVPTX_LIBS='-lcuda'
 
-	PLUGIN_NVPTX_save_CPPFLAGS=$CPPFLAGS
-	CPPFLAGS="$PLUGIN_NVPTX_CPPFLAGS $CPPFLAGS"
-	PLUGIN_NVPTX_save_LDFLAGS=$LDFLAGS
-	LDFLAGS="$PLUGIN_NVPTX_LDFLAGS $LDFLAGS"
-	PLUGIN_NVPTX_save_LIBS=$LIBS
-	LIBS="$PLUGIN_NVPTX_LIBS $LIBS"
-	AC_LINK_IFELSE(
-	  [AC_LANG_PROGRAM(
-	    [#include "cuda.h"],
-	      [CUresult r = cuCtxPushCurrent (NULL);])],
-	  [PLUGIN_NVPTX=1])
-	CPPFLAGS=$PLUGIN_NVPTX_save_CPPFLAGS
-	LDFLAGS=$PLUGIN_NVPTX_save_LDFLAGS
-	LIBS=$PLUGIN_NVPTX_save_LIBS
-	case $PLUGIN_NVPTX in
-	  nvptx*)
-	    if test "x$CUDA_DRIVER_INCLUDE" = x \
-	       && test "x$CUDA_DRIVER_LIB" = x; then
-	      PLUGIN_NVPTX=1
-	      PLUGIN_NVPTX_CPPFLAGS='-I$(srcdir)/plugin/cuda'
-	      PLUGIN_NVPTX_LIBS='-ldl'
-	      PLUGIN_NVPTX_DYNAMIC=1
-	    else
-	      PLUGIN_NVPTX=0
-	      AC_MSG_ERROR([CUDA driver package required for nvptx support])
-	    fi
-	  ;;
+	case $CUDA_DRIVER_DYNAMIC in
+	  1)
+	    PLUGIN_NVPTX=1
+	    ;;
+	  check | 0)
+	    # Determine whether the system's CUDA driver package is usable.
+	    PLUGIN_NVPTX=0
+
+	    # Tentatively point to the system's CUDA driver package.
+	    PLUGIN_NVPTX_CPPFLAGS=$CUDA_DRIVER_CPPFLAGS
+	    PLUGIN_NVPTX_LDFLAGS=$CUDA_DRIVER_LDFLAGS
+	    PLUGIN_NVPTX_LIBS=-lcuda
+
+	    PLUGIN_NVPTX_save_CPPFLAGS=$CPPFLAGS
+	    CPPFLAGS="$PLUGIN_NVPTX_CPPFLAGS $CPPFLAGS"
+	    PLUGIN_NVPTX_save_LDFLAGS=$LDFLAGS
+	    LDFLAGS="$PLUGIN_NVPTX_LDFLAGS $LDFLAGS"
+	    PLUGIN_NVPTX_save_LIBS=$LIBS
+	    LIBS="$PLUGIN_NVPTX_LIBS $LIBS"
+	    AC_LINK_IFELSE(
+	      [AC_LANG_PROGRAM(
+		[#include "cuda.h"],
+		  [CUresult r = cuCtxPushCurrent (NULL);])],
+	      [PLUGIN_NVPTX=1])
+	    CPPFLAGS=$PLUGIN_NVPTX_save_CPPFLAGS
+	    LDFLAGS=$PLUGIN_NVPTX_save_LDFLAGS
+	    LIBS=$PLUGIN_NVPTX_save_LIBS
+	    ;;
+	  *)
+	    AC_MSG_ERROR([internal error])
+	    ;;
+	esac
+
+	case $CUDA_DRIVER_DYNAMIC:$PLUGIN_NVPTX in
+	  check:0)
+	    CUDA_DRIVER_DYNAMIC=1
+	    PLUGIN_NVPTX=1
+	    ;;
+	  check:1)
+	    CUDA_DRIVER_DYNAMIC=0
+	    ;;
+	  0:1 | 1:1)
+	    ;;
+	  0:0)
+	    AC_MSG_ERROR([CUDA driver package not usable])
+	    ;;
+	  *)
+	    AC_MSG_ERROR([internal error])
+	    ;;
 	esac
+	if test $CUDA_DRIVER_DYNAMIC = 1; then
+	  # Point to the "dynamic" files.
+	  PLUGIN_NVPTX_CPPFLAGS='-I$(srcdir)/plugin/cuda'
+	  PLUGIN_NVPTX_LDFLAGS=
+	  PLUGIN_NVPTX_LIBS=-ldl
+	fi
 	;;
       hsa*)
 	case "${target}" in
@@ -252,8 +305,8 @@  AC_DEFINE_UNQUOTED(OFFLOAD_TARGETS, "$offload_targets",
 AM_CONDITIONAL([PLUGIN_NVPTX], [test $PLUGIN_NVPTX = 1])
 AC_DEFINE_UNQUOTED([PLUGIN_NVPTX], [$PLUGIN_NVPTX],
   [Define to 1 if the NVIDIA plugin is built, 0 if not.])
-AC_DEFINE_UNQUOTED([PLUGIN_NVPTX_DYNAMIC], [$PLUGIN_NVPTX_DYNAMIC],
-  [Define to 1 if the NVIDIA plugin should dlopen libcuda.so.1, 0 if it should be linked against it.])
+AC_DEFINE_UNQUOTED([CUDA_DRIVER_DYNAMIC], [$CUDA_DRIVER_DYNAMIC],
+  [Define to 1 to dlopen the CUDA driver library, to 0 if linking against it.])
 AM_CONDITIONAL([PLUGIN_HSA], [test $PLUGIN_HSA = 1])
 AC_DEFINE_UNQUOTED([PLUGIN_HSA], [$PLUGIN_HSA],
   [Define to 1 if the HSA plugin is built, 0 if not.])
diff --git libgomp/plugin/plugin-nvptx.c libgomp/plugin/plugin-nvptx.c
index 4144218..e236af6 100644
--- libgomp/plugin/plugin-nvptx.c
+++ libgomp/plugin/plugin-nvptx.c
@@ -48,7 +48,7 @@ 
 #include <assert.h>
 #include <errno.h>
 
-#if PLUGIN_NVPTX_DYNAMIC
+#if CUDA_DRIVER_DYNAMIC
 # include <dlfcn.h>
 
 # define CUDA_CALLS \
@@ -103,40 +103,48 @@  CUDA_ONE_CALL (cuStreamWaitEvent)
 struct cuda_lib_s {
   CUDA_CALLS
 } cuda_lib;
+# undef CUDA_ONE_CALL
 
 /* -1 if init_cuda_lib has not been called yet, false
    if it has been and failed, true if it has been and succeeded.  */
 static char cuda_lib_inited = -1;
 
-/* Dynamically load the CUDA runtime library and initialize function
+/* Dynamically load the CUDA driver library and initialize function
    pointers, return false if unsuccessful, true if successful.  */
 static bool
 init_cuda_lib (void)
 {
+  GOMP_PLUGIN_debug (0, "%s; initially: cuda_lib_inited=%hhd\n",
+		     __FUNCTION__, cuda_lib_inited);
+
   if (cuda_lib_inited != -1)
     return cuda_lib_inited;
-  const char *cuda_runtime_lib = "libcuda.so.1";
-  void *h = dlopen (cuda_runtime_lib, RTLD_LAZY);
+  const char *cuda_driver_lib = "libcuda.so.1";
+  void *h = dlopen (cuda_driver_lib, RTLD_LAZY);
   cuda_lib_inited = false;
   if (h == NULL)
-    return false;
-# undef CUDA_ONE_CALL
+    goto dl_fail;
 # define CUDA_ONE_CALL(call) CUDA_ONE_CALL_1 (call)
 # define CUDA_ONE_CALL_1(call) \
   cuda_lib.call = dlsym (h, #call);	\
   if (cuda_lib.call == NULL)		\
-    return false;
+    goto dl_fail;
   CUDA_CALLS
-  cuda_lib_inited = true;
-  return true;
-}
 # undef CUDA_ONE_CALL
 # undef CUDA_ONE_CALL_1
+  cuda_lib_inited = true;
+  return true;
+
+ dl_fail:
+  GOMP_PLUGIN_debug (0, "  while loading %s: %s\n",
+		     cuda_driver_lib, dlerror ());
+  return false;
+}
 # define CUDA_CALL_PREFIX cuda_lib.
-#else
+#else /* CUDA_DRIVER_DYNAMIC */
 # define CUDA_CALL_PREFIX
 # define init_cuda_lib() true
-#endif
+#endif /* CUDA_DRIVER_DYNAMIC */
 
 /* Convenience macros for the frequently used CUDA library call and
    error handling sequence as well as CUDA library calls that