diff mbox

[3/4] Handle GOMP_OPENACC_NVPTX_JIT=-O[0-4] in libgomp nvptx plugin

Message ID 5b55fc1c-2e52-ea57-787d-e56e5fe1c06d@mentor.com
State New
Headers show

Commit Message

Tom de Vries June 26, 2017, 11:48 a.m. UTC
[ reposting with proper subject ]

On 06/26/2017 01:42 PM, Tom de Vries wrote:
> On 06/26/2017 01:24 PM, Tom de Vries wrote:
>> Hi,
>>
>> I've written a patch series to facilitate debugging libgomp openacc 
>> testcase failures on the nvptx accelerator.
>>
>>
>> When running an openacc test-case on an nvptx accelerator, the 
>> following happens:
>> - the plugin obtains the ptx assembly for the acceleration kernels
>> - it calls the cuda jit to compile and link the ptx into a module
>> - it loads the module
>> - it starts an acceleration kernel
>>
>> The patch series adds these environment variables:
>> - GOMP_OPENACC_NVPTX_SAVE_TEMPS: a means to save the resulting module
>>    such that it can be investigated using nvdisasm and cuobjdump.
>> - GOMP_OPENACC_NVPTX_DISASM: a means to see the resulting module in
>>    the debug output,  by writing it into a file and calling nvdisasm on
>>    it
>> - GOMP_OPENACC_NVPTX_JIT: a means to set parameters of the
>>    compilation/linking process, currently supporting:
>>    * -O[0-4], mapping onto CU_JIT_OPTIMIZATION_LEVEL
>>    * -ori, mapping onto CU_JIT_NEW_SM3X_OPT
>>
>>
>> The patch series consists of these patches:
>>
>> 3. Handle GOMP_OPENACC_NVPTX_JIT=-O[0-4] in libgomp nvptx plugin
> 
> This patch adds handling of Handle GOMP_OPENACC_NVPTX_JIT=-O[0-4].

Thanks,
- Tom
diff mbox

Patch

Handle GOMP_OPENACC_NVPTX_JIT=-O[0-4] in libgomp nvptx plugin

2017-06-26  Tom de Vries  <tom@codesourcery.com>

	* plugin/cuda/cuda.h (enum CUjit_option): Add CU_JIT_OPTIMIZATION_LEVEL.
	* plugin/plugin-nvptx.c (process_GOMP_OPENACC_NVPTX_JIT): New function.
	(link_ptx): Add CU_JIT_OPTIMIZATION_LEVEL to opts.

---
 libgomp/plugin/cuda/cuda.h    |  1 +
 libgomp/plugin/plugin-nvptx.c | 44 ++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/libgomp/plugin/cuda/cuda.h b/libgomp/plugin/cuda/cuda.h
index 25d5d19..75dfe3d 100644
--- a/libgomp/plugin/cuda/cuda.h
+++ b/libgomp/plugin/cuda/cuda.h
@@ -88,6 +88,7 @@  typedef enum {
   CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES = 4,
   CU_JIT_ERROR_LOG_BUFFER = 5,
   CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES = 6,
+  CU_JIT_OPTIMIZATION_LEVEL = 7,
   CU_JIT_LOG_VERBOSE = 12
 } CUjit_option;
 
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index df1bfdd..3cd5557 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -977,12 +977,43 @@  debug_linkout (void *linkout, size_t linkoutsize)
     }
 }
 
+static void
+process_GOMP_OPENACC_NVPTX_JIT (intptr_t *gomp_openacc_nvptx_o)
+{
+  const char *var_name = "GOMP_OPENACC_NVPTX_JIT";
+  const char *env_var = getenv (var_name);
+  notify_var (var_name, env_var);
+
+  *gomp_openacc_nvptx_o = 4;
+  if (env_var == NULL)
+    return;
+
+  const char *c = env_var;
+  while (*c != '\0')
+    {
+      while (*c == ' ')
+	c++;
+
+      if (c[0] == '-' && c[1] == 'O'
+	  && '0' <= c[2] && c[2] <= '4'
+	  && (c[3] == '\0' || c[3] == ' '))
+	{
+	  *gomp_openacc_nvptx_o = c[2] - '0';
+	  c += 3;
+	  continue;
+	}
+
+      GOMP_PLUGIN_error ("Error parsing %s", var_name);
+      break;
+    }
+}
+
 static bool
 link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
 	  unsigned num_objs)
 {
-  CUjit_option opts[6];
-  void *optvals[6];
+  CUjit_option opts[7];
+  void *optvals[7];
   float elapsed = 0.0;
   char elog[1024];
   char ilog[16384];
@@ -1009,7 +1040,14 @@  link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
   opts[5] = CU_JIT_LOG_VERBOSE;
   optvals[5] = (void *) 1;
 
-  CUDA_CALL (cuLinkCreate, 6, opts, optvals, &linkstate);
+  static intptr_t gomp_openacc_nvptx_o = -1;
+  if (gomp_openacc_nvptx_o == -1)
+    process_GOMP_OPENACC_NVPTX_JIT (&gomp_openacc_nvptx_o);
+
+  opts[6] = CU_JIT_OPTIMIZATION_LEVEL;
+  optvals[6] = (void *) gomp_openacc_nvptx_o;
+
+  CUDA_CALL (cuLinkCreate, 7, opts, optvals, &linkstate);
 
   for (; num_objs--; ptx_objs++)
     {