diff mbox

[gomp4] PTX target format

Message ID 55B95A80.2070008@acm.org
State New
Headers show

Commit Message

Nathan Sidwell July 29, 2015, 10:58 p.m. UTC
I've committed this to gomp4 branch.  It changes the ptx target data format from 
a string array with embedded NULs, to an array of pointers & sizes to separate 
strings for each object file.  This avoids the use of strlen when loading onto 
the PTX device.

Not incrementing the PTX version number, as that just got incremented for the 
launch API change.

nathan
2015-07-29  Nathan Sidwell  <nathan@codesourcery.com>

	gcc/
	* config/nvptx/mkoffload.c (process): Reimplement emission of ptx
	objects to set of arrays.

	libgomp/
	* plugin/plugin-nvptx.c (struct targ_ptx_obj): New.
	(struct nvptx_tdata): Move earlier, adjust.
	(link_ptx): Take targ_ptx_obj array and adjust.
	(GOMP_OFFLOAD_load_image_ver): Adjust link_ptx call.
diff mbox

Patch

Index: libgomp/plugin/plugin-nvptx.c
===================================================================
--- libgomp/plugin/plugin-nvptx.c	(revision 226371)
+++ libgomp/plugin/plugin-nvptx.c	(working copy)
@@ -290,6 +290,28 @@  struct targ_fn_launch
   unsigned short dim[GOMP_DIM_MAX];
 };
 
+/* Target PTX object information.  */
+
+struct targ_ptx_obj
+{
+  const char *code;
+  size_t size;
+};
+
+/* Target data image information.  */
+
+typedef struct nvptx_tdata
+{
+  const struct targ_ptx_obj *ptx_objs;
+  unsigned ptx_num;
+
+  const char *const *var_names;
+  unsigned var_num;
+
+  const struct targ_fn_launch *fn_descs;
+  unsigned fn_num;
+} nvptx_tdata_t;
+
 /* Descriptor of a loaded function.  */
 
 struct targ_fn_descriptor
@@ -824,7 +846,8 @@  nvptx_get_num_devices (void)
 
 
 static void
-link_ptx (CUmodule *module, char const *ptx_code, size_t length)
+link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
+	  unsigned num_objs)
 {
   CUjit_option opts[7];
   void *optvals[7];
@@ -838,8 +861,6 @@  link_ptx (CUmodule *module, char const *
   void *linkout;
   size_t linkoutsize __attribute__ ((unused));
 
-  GOMP_PLUGIN_debug (0, "attempting to load:\n---\n%s\n---\n", ptx_code);
-
   opts[0] = CU_JIT_WALL_TIME;
   optvals[0] = &elapsed;
 
@@ -865,25 +886,22 @@  link_ptx (CUmodule *module, char const *
   if (r != CUDA_SUCCESS)
     GOMP_PLUGIN_fatal ("cuLinkCreate error: %s", cuda_error (r));
 
-  size_t off = 0;
-  while (off < length)
+  for (; num_objs--; ptx_objs++)
     {
-      int l = strlen (ptx_code + off);
       /* cuLinkAddData's 'data' argument erroneously omits the const
 	 qualifier.  */
-      r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, (char*)ptx_code + off, l + 1,
-			 0, 0, 0, 0);
+      GOMP_PLUGIN_debug (0, "Loading:\n---\n%s\n---\n", ptx_objs->code);
+      r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, (char*)ptx_objs->code,
+			 ptx_objs->size, 0, 0, 0, 0);
       if (r != CUDA_SUCCESS)
 	{
 	  GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
-	  GOMP_PLUGIN_fatal ("cuLinkAddData (ptx_code) error: %s", cuda_error (r));
+	  GOMP_PLUGIN_fatal ("cuLinkAddData (ptx_code) error: %s",
+			     cuda_error (r));
 	}
-
-      off += l;
-      while (off < length && ptx_code[off] == '\0')
-	off++;
     }
 
+  GOMP_PLUGIN_debug (0, "Linking\n");
   r = cuLinkComplete (linkstate, &linkout, &linkoutsize);
 
   GOMP_PLUGIN_debug (0, "Link complete: %fms\n", elapsed);
@@ -1619,18 +1637,6 @@  GOMP_OFFLOAD_fini_device (int n)
   pthread_mutex_unlock (&ptx_dev_lock);
 }
 
-typedef struct nvptx_tdata
-{
-  const char *ptx_src;
-  size_t ptx_len;
-
-  const char *const *var_names;
-  size_t var_num;
-
-  const struct targ_fn_launch *fn_descs;
-  size_t fn_num;
-} nvptx_tdata_t;
-
 /* Return the libgomp version number we're compatible with.  There is
    no requirement for cross-version compatibility.  */
 
@@ -1670,7 +1676,7 @@  GOMP_OFFLOAD_load_image_ver (unsigned ve
   
   nvptx_attach_host_thread_to_device (ord);
 
-  link_ptx (&module, img_header->ptx_src, img_header->ptx_len);
+  link_ptx (&module, img_header->ptx_objs, img_header->ptx_num);
 
   /* The mkoffload utility emits a struct of pointers/integers at the
      start of each offload image.  The array of kernel names and the
Index: gcc/config/nvptx/mkoffload.c
===================================================================
--- gcc/config/nvptx/mkoffload.c	(revision 226371)
+++ gcc/config/nvptx/mkoffload.c	(working copy)
@@ -229,51 +229,68 @@  process (FILE *in, FILE *out)
   const char *input = read_file (in, &len);
   const char *comma;
   id_map const *id;
+  unsigned obj_count = 0;
+  size_t i;
 
-  fprintf (out, "static const char ptx_code[] = \n \"");
-  for (size_t i = 0; i < len; i++)
+  /* Dump out char arrays for each PTX object file.  These are
+     terminated by a NUL.  */
+  for (i = 0; i != len;)
     {
-      char c = input[i];
-      bool nl = false;
-      switch (c)
+      char c;
+      
+      fprintf (out, "static const char ptx_code_%u[] =\n\t\"", obj_count++);
+      while ((c = input[i++]))
 	{
-	case '\0':
-	  putc ('\\', out);
-	  c = '0';
-	  break;
-	case '\r':
-	  continue;
-	case '\n':
-	  putc ('\\', out);
-	  c = 'n';
-	  nl = true;
-	  break;
-	case '"':
-	case '\\':
-	  putc ('\\', out);
-	  break;
-
-	case '/':
-	  if (strncmp (input + i, "//:VAR_MAP ", 11) == 0)
-	    record_id (input + i + 11, &vars_tail);
-	  if (strncmp (input + i, "//:FUNC_MAP ", 12) == 0)
-	    record_id (input + i + 12, &funcs_tail);
-	  break;
-
-	default:
-	  break;
+	  switch (c)
+	    {
+	    case '\r':
+	      continue;
+	    case '\n':
+	      fprintf (out, "\\n\"\n\t\"");
+	      /* Look for mappings on subsequent lines.  */
+	      while (strncmp (input + i, "//:", 3) == 0)
+		{
+		  i += 3;
+
+		  if (strncmp (input + i, "VAR_MAP ", 8) == 0)
+		    record_id (input + i + 8, &vars_tail);
+		  else if (strncmp (input + i, "FUNC_MAP ", 9) == 0)
+		    record_id (input + i + 9, &funcs_tail);
+		  else
+		    abort ();
+		  /* Skip to next line. */
+		  while (input[i++] != '\n')
+		    continue;
+		}
+	      continue;
+	    case '"':
+	    case '\\':
+	      putc ('\\', out);
+	      break;
+	    default:
+	      break;
+	    }
+	  putc (c, out);
 	}
-      putc (c, out);
-      if (nl)
-	fputs ("\"\n\t\"", out);
+      fprintf (out, "\";\n\n");
     }
-  fprintf (out, "\";\n\n");
 
+  /* Dump out array of pointers to ptx object strings.  */
+  fprintf (out, "static const struct ptx_obj {\n"
+	   "  const char *code;\n"
+	   "  __SIZE_TYPE__ size;\n"
+	   "} ptx_objs[] = {");
+  for (comma = "", i = 0; i != obj_count; comma = ",", i++)
+    fprintf (out, "%s\n\t{ptx_code_%u, sizeof (ptx_code_%u)}", comma, i, i);
+  fprintf (out, "\n};\n\n");
+
+  /* Dump out variable idents.  */
   fprintf (out, "static const char *const var_mappings[] = {");
   for (comma = "", id = var_ids; id; comma = ",", id = id->next)
     fprintf (out, "%s\n\t%s", comma, id->ptx_name);
   fprintf (out, "\n};\n\n");
 
+  /* Dump out function idents.  */
   fprintf (out, "static const struct nvptx_fn {\n"
 	   "  const char *name;\n"
 	   "  unsigned short dim[%d];\n"
@@ -284,14 +301,14 @@  process (FILE *in, FILE *out)
 
   fprintf (out,
 	   "static const struct nvptx_tdata {\n"
-	   "  const char *ptx_src;\n"
-	   "  __SIZE_TYPE__ ptx_len;\n"
+	   "  const struct ptx_obj *ptx_objs;\n"
+	   "  unsigned ptx_num;\n"
 	   "  const char *const *var_names;\n"
-	   "  __SIZE_TYPE__ var_num;\n"
+	   "  unsigned var_num;\n"
 	   "  const struct nvptx_fn *fn_names;\n"
-	   "  __SIZE_TYPE__ fn_num;\n"
+	   "  unsigned fn_num;\n"
 	   "} target_data = {\n"
-	   "  ptx_code, sizeof (ptx_code),\n"
+	   "  ptx_objs, sizeof (ptx_objs) / sizeof (ptx_objs[0]),\n"
 	   "  var_mappings,"
 	   "  sizeof (var_mappings) / sizeof (var_mappings[0]),\n"
 	   "  func_mappings,"