diff mbox

[gomp4] PTX launch dimensions

Message ID 55BF5774.50502@acm.org
State New
Headers show

Commit Message

Nathan Sidwell Aug. 3, 2015, 11:58 a.m. UTC
I've committed this to gomp4.  The ptx backend can now examine the openacc 
attribute to determine launch dimensions and figure out whether vector or worker 
single neutering is needed.

nathan
diff mbox

Patch

2015-08-03  Nathan Sidwell  <nathan@codesourcery.com>

	* config/nvptx/nvptx.c (nvptx_reorg): Check get_oacc_fn_attrib for
	launch dimensions and only do parallel processing when present.
	Check dimensions to determine neutering requirements.
	(nvptx_record_offload_symbol): Launch dimension attribute must be
	present on offloaded functions.

Index: gcc/config/nvptx/nvptx.c
===================================================================
--- gcc/config/nvptx/nvptx.c	(revision 226485)
+++ gcc/config/nvptx/nvptx.c	(working copy)
@@ -2980,13 +2980,42 @@  nvptx_reorg (void)
     if (REG_N_SETS (i) == 0 && REG_N_REFS (i) == 0)
       regno_reg_rtx[i] = const0_rtx;
 
-  parallel *pars = nvptx_discover_pars (&bb_insn_map);
-
-  nvptx_process_pars (pars);
-  nvptx_neuter_pars (pars, (GOMP_DIM_MASK (GOMP_DIM_VECTOR)
-			    | GOMP_DIM_MASK (GOMP_DIM_WORKER)), 0);
-
-  delete pars;
+  /* Determine launch dimensions of the function.  If it is not an
+     offloaded function  (i.e. this is a regular compiler), the
+     function has no neutering.  */
+  tree attr = get_oacc_fn_attrib (current_function_decl);
+  if (attr)
+    {
+      unsigned mask = 0;
+      tree dims = TREE_VALUE (attr);
+      unsigned ix;
+
+      for (ix = 0; ix != GOMP_DIM_MAX; ix++)
+	{
+	  unsigned HOST_WIDE_INT dim = 0;
+
+	  if (dims)
+	    {
+	      tree cst = TREE_VALUE (dims);
+
+	      dim = TREE_INT_CST_LOW (cst);
+	      dims = TREE_CHAIN (dims);
+	    }
+	  if (dim != 1)
+	    mask |= GOMP_DIM_MASK (ix);
+	}
+      /* If there is worker neutering, there must be vector
+	 neutering.  Otherwise the hardware will fail.  This really
+	 should be dealt with earlier because it indicates faulty
+	 logic in determining launch dimensions.  */
+      if (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
+	mask |= GOMP_DIM_MASK (GOMP_DIM_VECTOR);
+
+      parallel *pars = nvptx_discover_pars (&bb_insn_map);
+      nvptx_process_pars (pars);
+      nvptx_neuter_pars (pars, mask, 0);
+      delete pars;
+    }
 
   nvptx_reorg_subreg ();
   
@@ -3073,32 +3102,25 @@  nvptx_record_offload_symbol (tree decl)
     case FUNCTION_DECL:
       {
 	tree attr = get_oacc_fn_attrib (decl);
-	tree dims = NULL_TREE;
+	tree dims = TREE_VALUE (attr);
 	unsigned ix;
 	
-	if (attr)
-	  dims = TREE_VALUE (attr);
 	fprintf (asm_out_file, "//:FUNC_MAP \"%s\"",
 		 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
 
-	for (ix = 0; ix != GOMP_DIM_MAX; ix++)
+	for (ix = 0; ix != GOMP_DIM_MAX; ix++, dims = TREE_CHAIN (dims))
 	  {
-	    unsigned HOST_WIDE_INT dim = 0;
-	    if (dims)
-	      {
-		tree cst = TREE_VALUE (dims);
-
-		/* When device_type support is added an ealier pass
-		   should have massaged the attribute to be
-		   ptx-specific.  */
-		gcc_assert (TREE_CODE (cst) == INTEGER_CST);
-
-		dim = TREE_INT_CST_LOW (cst);
-		dims = TREE_CHAIN (dims);
-	      }
+	    tree cst = TREE_VALUE (dims);
+
+	    /* When device_type support is added an earlier pass
+	       should have massaged the attribute to be
+	       ptx-specific.  */
+	    gcc_assert (TREE_CODE (cst) == INTEGER_CST);
+
+	    unsigned HOST_WIDE_INT dim = TREE_INT_CST_LOW (cst);
 	    fprintf (asm_out_file, ", " HOST_WIDE_INT_PRINT_HEX, dim);
 	  }
-	
+
 	fprintf (asm_out_file, "\n");
       }
       break;