diff mbox

ptx offload data format

Message ID 560C5697.2040107@acm.org
State New
Headers show

Commit Message

Nathan Sidwell Sept. 30, 2015, 9:39 p.m. UTC
I've merged this patch to trunk.  It changes the PTX offload data format to be 
an array of pointers to strings, preparing the way for the static linking patch 
that Thomas is working on.

For the moment, we retain the automatic linking on of the support functions 
during PTX JITing.  Some of the changes to link_ptx were done by Bernd a while back.

No change to the PTX ABI version number, as that just got incremented last week 
with the launch API change -- it's in a state of flux right now.

nathan
diff mbox

Patch

2015-09-30  Nathan Sidwell  <nathan@codesourcery.com>

	gcc/
	* config/nvptx/mkoffload.c (process): Change offload data format.

2015-09-30  Nathan Sidwell  <nathan@codesourcery.com>
	    Bernd Schmidt <bernds@codesourcery.com>

	libgomp/
	* plugin/plugin-nvptx.c (targ_fn_launch): Use GOMP_DIM_MAX.
	(struct targ_ptx_obj): New.
	(nvptx_tdata): Move earlier, change data format.
	(link_ptx): Take targ_ptx_obj ptr and count.  Allow multiple
	objects.
	(GOMP_OFFLOAD_load_image): Adjust.

Index: gcc/config/nvptx/mkoffload.c
===================================================================
--- gcc/config/nvptx/mkoffload.c	(revision 228242)
+++ gcc/config/nvptx/mkoffload.c	(working copy)
@@ -844,39 +844,53 @@  process (FILE *in, FILE *out)
   Token *tok = tokenize (input);
   const char *comma;
   id_map const *id;
+  unsigned obj_count = 0;
+  unsigned ix;
 
   do
     tok = parse_file (tok);
   while (tok->kind);
 
-  fprintf (out, "static const char ptx_code[] = \n");
+  fprintf (out, "static const char ptx_code_%u[] = \n", obj_count++);
   write_stmts (out, rev_stmts (decls));
   write_stmts (out, rev_stmts (vars));
   write_stmts (out, rev_stmts (fns));
   fprintf (out, ";\n\n");
 
+  /* Dump out array of pointers to ptx object strings.  */
+  fprintf (out, "static const struct ptx_obj {\n"
+	   "  const char *code;\n"
+	   "  __SIZE_TYPE__ size;\n"
+	   "} ptx_objs[] = {");
+  for (comma = "", ix = 0; ix != obj_count; comma = ",", ix++)
+    fprintf (out, "%s\n\t{ptx_code_%u, sizeof (ptx_code_%u)}", comma, ix, ix);
+  fprintf (out, "\n};\n\n");
+
+  /* Dump out variable idents.  */
   fprintf (out, "static const char *const var_mappings[] = {");
   for (comma = "", id = var_ids; id; comma = ",", id = id->next)
     fprintf (out, "%s\n\t%s", comma, id->ptx_name);
   fprintf (out, "\n};\n\n");
 
+  /* Dump out function idents.  */
   fprintf (out, "static const struct nvptx_fn {\n"
 	   "  const char *name;\n"
-	   "  unsigned short dim[3];\n"
-	   "} func_mappings[] = {\n");
+	   "  unsigned short dim[%d];\n"
+	   "} func_mappings[] = {\n", GOMP_DIM_MAX);
   for (comma = "", id = func_ids; id; comma = ",", id = id->next)
     fprintf (out, "%s\n\t{%s}", comma, id->ptx_name);
   fprintf (out, "\n};\n\n");
 
   fprintf (out,
 	   "static const struct nvptx_tdata {\n"
-	   "  const char *ptx_src;\n"
+	   "  const struct ptx_obj *ptx_objs;\n"
+	   "  unsigned ptx_num;\n"
 	   "  const char *const *var_names;\n"
-	   "  __SIZE_TYPE__ var_num;\n"
+	   "  unsigned var_num;\n"
 	   "  const struct nvptx_fn *fn_names;\n"
-	   "  __SIZE_TYPE__ fn_num;\n"
+	   "  unsigned fn_num;\n"
 	   "} target_data = {\n"
-	   "  ptx_code,\n"
+	   "  ptx_objs, sizeof (ptx_objs) / sizeof (ptx_objs[0]),\n"
 	   "  var_mappings,"
 	   "  sizeof (var_mappings) / sizeof (var_mappings[0]),\n"
 	   "  func_mappings,"
Index: libgomp/plugin/plugin-nvptx.c
===================================================================
--- libgomp/plugin/plugin-nvptx.c	(revision 228265)
+++ libgomp/plugin/plugin-nvptx.c	(working copy)
@@ -224,9 +224,31 @@  map_push (struct ptx_stream *s, int asyn
 struct targ_fn_launch
 {
   const char *fn;
-  unsigned short dim[3];
+  unsigned short dim[GOMP_DIM_MAX];
 };
 
+/* Target PTX object information.  */
+
+struct targ_ptx_obj
+{
+  const char *code;
+  size_t size;
+};
+
+/* Target data image information.  */
+
+typedef struct nvptx_tdata
+{
+  const struct targ_ptx_obj *ptx_objs;
+  unsigned ptx_num;
+
+  const char *const *var_names;
+  unsigned var_num;
+
+  const struct targ_fn_launch *fn_descs;
+  unsigned fn_num;
+} nvptx_tdata_t;
+
 /* Descriptor of a loaded function.  */
 
 struct targ_fn_descriptor
@@ -688,7 +710,8 @@  nvptx_get_num_devices (void)
 
 
 static void
-link_ptx (CUmodule *module, const char *ptx_code)
+link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
+	  unsigned num_objs)
 {
   CUjit_option opts[7];
   void *optvals[7];
@@ -702,8 +725,6 @@  link_ptx (CUmodule *module, const char *
   void *linkout;
   size_t linkoutsize __attribute__ ((unused));
 
-  GOMP_PLUGIN_debug (0, "attempting to load:\n---\n%s\n---\n", ptx_code);
-
   opts[0] = CU_JIT_WALL_TIME;
   optvals[0] = &elapsed;
 
@@ -758,25 +779,37 @@  link_ptx (CUmodule *module, const char *
 			 cuda_error (r));
     }
 
-  /* cuLinkAddData's 'data' argument erroneously omits the const qualifier.  */
-  r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, (char *)ptx_code,
-              strlen (ptx_code) + 1, 0, 0, 0, 0);
-  if (r != CUDA_SUCCESS)
+  for (; num_objs--; ptx_objs++)
     {
-      GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
-      GOMP_PLUGIN_fatal ("cuLinkAddData (ptx_code) error: %s", cuda_error (r));
+      /* cuLinkAddData's 'data' argument erroneously omits the const
+	 qualifier.  */
+      GOMP_PLUGIN_debug (0, "Loading:\n---\n%s\n---\n", ptx_objs->code);
+      r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, (char*)ptx_objs->code,
+			 ptx_objs->size, 0, 0, 0, 0);
+      if (r != CUDA_SUCCESS)
+	{
+	  GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
+	  GOMP_PLUGIN_fatal ("cuLinkAddData (ptx_code) error: %s",
+			     cuda_error (r));
+	}
     }
 
+  GOMP_PLUGIN_debug (0, "Linking\n");
   r = cuLinkComplete (linkstate, &linkout, &linkoutsize);
-  if (r != CUDA_SUCCESS)
-    GOMP_PLUGIN_fatal ("cuLinkComplete error: %s", cuda_error (r));
 
   GOMP_PLUGIN_debug (0, "Link complete: %fms\n", elapsed);
   GOMP_PLUGIN_debug (0, "Link log %s\n", &ilog[0]);
 
+  if (r != CUDA_SUCCESS)
+    GOMP_PLUGIN_fatal ("cuLinkComplete error: %s", cuda_error (r));
+
   r = cuModuleLoadData (module, linkout);
   if (r != CUDA_SUCCESS)
     GOMP_PLUGIN_fatal ("cuModuleLoadData error: %s", cuda_error (r));
+
+  r = cuLinkDestroy (linkstate);
+  if (r != CUDA_SUCCESS)
+    GOMP_PLUGIN_fatal ("cuLinkDestory error: %s", cuda_error (r));
 }
 
 static void
@@ -1502,19 +1535,6 @@  GOMP_OFFLOAD_fini_device (int n)
   pthread_mutex_unlock (&ptx_dev_lock);
 }
 
-/* Data emitted by mkoffload.  */
-
-typedef struct nvptx_tdata
-{
-  const char *ptx_src;
-
-  const char *const *var_names;
-  size_t var_num;
-
-  const struct targ_fn_launch *fn_descs;
-  size_t fn_num;
-} nvptx_tdata_t;
-
 /* Return the libgomp version number we're compatible with.  There is
    no requirement for cross-version compatibility.  */
 
@@ -1553,7 +1573,7 @@  GOMP_OFFLOAD_load_image (int ord, unsign
   
   nvptx_attach_host_thread_to_device (ord);
 
-  link_ptx (&module, img_header->ptx_src);
+  link_ptx (&module, img_header->ptx_objs, img_header->ptx_num);
 
   /* The mkoffload utility emits a struct of pointers/integers at the
      start of each offload image.  The array of kernel names and the