diff mbox

[gomp4] kernels offload fns

Message ID 56829657.9060106@acm.org
State New
Headers show

Commit Message

Nathan Sidwell Dec. 29, 2015, 2:19 p.m. UTC
In developing a non-unity default partition mechanism I discovered there was no 
mechanism to reliably determine whether an offload was for a kernels region or 
not.  The tree-ssa pass uses a heuristic that is sufficient for its  needs, but 
not very clear.

This patch adjusts set_oacc_fn_attrib to accept a 'kernels' parameter, which it 
encodes on the TREE_PUBLIC flag of the  attribute values.  I add an 
oacc_fn_attrib_kernels_p predicate and use it where needed.

(The defaulting mechanism needs to reliably determine kernels from parallel 
offload regions).

nathnan
diff mbox

Patch

2015-12-29  Nathan Sidwell  <nathan@acm.org>

	* omp-low.c (set_oacc_fn_attrib): Add IS_KERNEL arg, encode on
	TREE_PUBLIC.
	(oacc_fn_attrib_kernels_p): New.
	(oacc_fn_attrib_level): New.
	(expand_omp_target): Pass kernels_p to set_oacc_fn_attrib.
	(oacc_validate_dims): Add LEVEL arg, don't return it.
	(new_oacc_loop_routine): Use oacc_fn_attrib_level, not
	oacc_validate_dims.
	(execute_oacc_device_lower): Use oacc_fn_attrib_level, validate
	dimensions after discovering loops.  Add more dump info.
	* omp-low.h (set_oacc_fn_attrib): Add IS_KERNEL arg.
	(oacc_fn_attrib_kernels_p): Declare.
	* tree-parloops.c (create_parallel_loop): Adjust
	set_oacc_fn_attrib call.
	* tree-ssa-loop.c (gate_oacc_kernels): Use oacc_fn_attrib_kernels_p.

Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c	(revision 231992)
+++ gcc/omp-low.c	(working copy)
@@ -12625,10 +12625,11 @@  replace_oacc_fn_attrib (tree fn, tree di
 
 /* Scan CLAUSES for launch dimensions and attach them to the oacc
    function attribute.  Push any that are non-constant onto the ARGS
-   list, along with an appropriate GOMP_LAUNCH_DIM tag.  */
+   list, along with an appropriate GOMP_LAUNCH_DIM tag.  IS_KERNEL is
+   true, if these are for a kernels region offload function.  */
 
 void
-set_oacc_fn_attrib (tree fn, tree clauses, vec<tree> *args)
+set_oacc_fn_attrib (tree fn, tree clauses, bool is_kernel, vec<tree> *args)
 {
   /* Must match GOMP_DIM ordering.  */
   static const omp_clause_code ids[]
@@ -12653,6 +12654,9 @@  set_oacc_fn_attrib (tree fn, tree clause
 	  non_const |= GOMP_DIM_MASK (ix);
 	}
       attr = tree_cons (NULL_TREE, dim, attr);
+      /* Note kernelness with TREE_PUBLIC.  */
+      if (is_kernel)
+	TREE_PUBLIC (attr) = 1;
     }
 
   replace_oacc_fn_attrib (fn, attr);
@@ -12721,6 +12725,36 @@  get_oacc_fn_attrib (tree fn)
   return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn));
 }
 
+/* Return true if this oacc fn attrib is for a kernels offload
+   region.  We use the TREE_PUBLIC flag of each dimension -- only
+   need to check the first one.  */
+
+bool
+oacc_fn_attrib_kernels_p (tree attr)
+{
+  return TREE_PUBLIC (TREE_VALUE (attr));
+}
+
+/* Return level at which oacc routine may spawn a partitioned loop, or
+   -1 if it is not a routine (i.e. is an offload fn).  */
+
+int
+oacc_fn_attrib_level (tree attr)
+{
+  tree pos = TREE_VALUE (attr);
+
+  if (!TREE_PURPOSE (pos))
+    return -1;
+  
+  int ix = 0;
+  for (ix = 0; ix != GOMP_DIM_MAX;
+       ix++, pos = TREE_CHAIN (pos))
+    if (!integer_zerop (TREE_PURPOSE (pos)))
+      break;
+
+  return ix;
+}
+
 /* Extract an oacc execution dimension from FN.  FN must be an
    offloaded function or routine that has already had its execution
    dimensions lowered to the target-specific values.  */
@@ -13045,6 +13079,7 @@  expand_omp_target (struct omp_region *re
   enum built_in_function start_ix;
   location_t clause_loc;
   unsigned int flags_i = 0;
+  bool oacc_kernels_p = false;
 
   switch (gimple_omp_target_kind (entry_stmt))
     {
@@ -13064,8 +13099,10 @@  expand_omp_target (struct omp_region *re
       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
       flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
       break;
-    case GF_OMP_TARGET_KIND_OACC_PARALLEL:
     case GF_OMP_TARGET_KIND_OACC_KERNELS:
+      oacc_kernels_p = true;
+      /* FALLTHROUGH */
+    case GF_OMP_TARGET_KIND_OACC_PARALLEL:
       start_ix = BUILT_IN_GOACC_PARALLEL;
       break;
     case GF_OMP_TARGET_KIND_OACC_DATA:
@@ -13247,7 +13284,7 @@  expand_omp_target (struct omp_region *re
       break;
     case BUILT_IN_GOACC_PARALLEL:
       {
-	set_oacc_fn_attrib (child_fn, clauses, &args);
+	set_oacc_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
 	tagging = true;
       }
       /* FALLTHRU */
@@ -19259,17 +19296,17 @@  oacc_xform_loop (gcall *call)
 }
 
 /* Validate and update the dimensions for offloaded FN.  ATTRS is the
-   raw attribute.  DIMS is an array of dimensions, which is returned.
-   Returns the function level dimensionality --  the level at which an
-   offload routine wishes to partition a loop.  */
+   raw attribute.  DIMS is an array of dimensions, which is filled in.
+   LEVEL is the partitioning level of a routine, or -1 for an offload
+   region itself.  */
 
-static int
-oacc_validate_dims (tree fn, tree attrs, int *dims)
+static void
+oacc_validate_dims (tree fn, tree attrs, int *dims, int level)
 {
   tree purpose[GOMP_DIM_MAX];
   unsigned ix;
   tree pos = TREE_VALUE (attrs);
-  int fn_level = -1;
+  bool is_kernel = oacc_fn_attrib_kernels_p (attrs);
 
   /* Make sure the attribute creator attached the dimension
      information.  */
@@ -19278,21 +19315,12 @@  oacc_validate_dims (tree fn, tree attrs,
   for (ix = 0; ix != GOMP_DIM_MAX; ix++)
     {
       purpose[ix] = TREE_PURPOSE (pos);
-
-      if (purpose[ix])
-	{
-	  if (integer_zerop (purpose[ix]))
-	    fn_level = ix + 1;
-	  else if (fn_level < 0)
-	    fn_level = ix;
-	}
-
       tree val = TREE_VALUE (pos);
       dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
       pos = TREE_CHAIN (pos);
     }
 
-  bool changed = targetm.goacc.validate_dims (fn, dims, fn_level);
+  bool changed = targetm.goacc.validate_dims (fn, dims, level);
 
   /* Default anything left to 1.  */
   for (ix = 0; ix != GOMP_DIM_MAX; ix++)
@@ -19307,13 +19335,15 @@  oacc_validate_dims (tree fn, tree attrs,
       /* Replace the attribute with new values.  */
       pos = NULL_TREE;
       for (ix = GOMP_DIM_MAX; ix--;)
-	pos = tree_cons (purpose[ix],
-			 build_int_cst (integer_type_node, dims[ix]),
-			 pos);
+	{
+	  pos = tree_cons (purpose[ix],
+			   build_int_cst (integer_type_node, dims[ix]),
+			   pos);
+	  if (is_kernel)
+	    TREE_PUBLIC (pos) = 1;
+	}
       replace_oacc_fn_attrib (fn, pos);
     }
-
-  return fn_level;
 }
 
 /* Create an empty OpenACC loop structure at LOC.  */
@@ -19385,7 +19415,7 @@  new_oacc_loop_routine (oacc_loop *parent
 {
   oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
   int dims[GOMP_DIM_MAX];
-  int level = oacc_validate_dims (decl, attrs, dims);
+  int level = oacc_fn_attrib_level (attrs);
 
   gcc_assert (level >= 0);
 
@@ -20015,13 +20045,30 @@  execute_oacc_device_lower ()
       return TODO_discard_function;
     }
 
-  int dims[GOMP_DIM_MAX];
-  int fn_level = oacc_validate_dims (current_function_decl, attr, dims);
-
   /* Discover, partition and process the loops.  */
   oacc_loop *loops = oacc_loop_discovery ();
+  int fn_level = oacc_fn_attrib_level (attr);
+
+  if (dump_file)
+    fprintf (dump_file, oacc_fn_attrib_kernels_p (attr)
+	     ? "Function is kernels offload\n"
+	     : fn_level < 0 ? "Function is parallel offload\n"
+	     : "Function is routine level %d\n", fn_level);
+
   unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
   oacc_loop_partition (loops, outer_mask);
+
+  int dims[GOMP_DIM_MAX];
+  oacc_validate_dims (current_function_decl, attr, dims, fn_level);
+
+  if (dump_file)
+    {
+      const char *comma = "Compute dimensions [";
+      for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
+	fprintf (dump_file, "%s%d", comma, dims[ix]);
+      fprintf (dump_file, "]\n");
+    }
+
   oacc_loop_process (loops);
   if (dump_file)
     {
Index: gcc/omp-low.h
===================================================================
--- gcc/omp-low.h	(revision 231992)
+++ gcc/omp-low.h	(working copy)
@@ -33,7 +33,8 @@  extern tree omp_member_access_dummy_var
 extern void replace_oacc_fn_attrib (tree, tree);
 extern tree build_oacc_routine_dims (tree);
 extern tree get_oacc_fn_attrib (tree);
-extern void set_oacc_fn_attrib (tree, tree, vec<tree> *);
+extern bool oacc_fn_attrib_kernels_p (tree);
+extern void set_oacc_fn_attrib (tree, tree, bool, vec<tree> *);
 extern int get_oacc_ifn_dim_arg (const gimple *);
 extern int get_oacc_fn_dim_size (tree, int);
 
Index: gcc/tree-parloops.c
===================================================================
--- gcc/tree-parloops.c	(revision 231992)
+++ gcc/tree-parloops.c	(working copy)
@@ -2054,7 +2054,7 @@  create_parallel_loop (struct loop *loop,
       tree clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS);
       OMP_CLAUSE_NUM_GANGS_EXPR (clause)
 	= build_int_cst (integer_type_node, n_threads);
-      set_oacc_fn_attrib (cfun->decl, clause, NULL);
+      set_oacc_fn_attrib (cfun->decl, clause, true, NULL);
     }
 
   /* Initialize NEW_DATA.  */
Index: gcc/tree-ssa-loop.c
===================================================================
--- gcc/tree-ssa-loop.c	(revision 231992)
+++ gcc/tree-ssa-loop.c	(working copy)
@@ -154,12 +154,7 @@  gate_oacc_kernels (function *fn)
   tree oacc_function_attr = get_oacc_fn_attrib (fn->decl);
   if (oacc_function_attr == NULL_TREE)
     return false;
-
-  tree val = TREE_VALUE (oacc_function_attr);
-  while (val != NULL_TREE && TREE_VALUE (val) == NULL_TREE)
-    val = TREE_CHAIN (val);
-
-  if (val != NULL_TREE)
+  if (!oacc_fn_attrib_kernels_p (oacc_function_attr))
     return false;
 
   struct loop *loop;