diff mbox

[2/5] omp-low: implement SIMT privatization, part 1

Message ID 1490197595-31938-3-git-send-email-amonakov@ispras.ru
State New
Headers show

Commit Message

Alexander Monakov March 22, 2017, 3:46 p.m. UTC
This patch adjusts privatization in OpenMP SIMD loops lowered for SIMT targets.
At lowering time, private variables receive "omp simt private" attribute, get
mentioned in argument list of GOMP_SIMT_ENTER function, and get a clobbering
assignment just prior to GOMP_SIMT_EXIT function.

The following patch will implement the second step: privatized variables are
converted to fields of a struct allocated by a call to GOMP_SIMT_ENTER_ALLOC.
This function is similar to __builtin_alloca_with_align, except that it
obtains per-SIMT-lane storage and implicitly performs target-specific actions;
on NVPTX that means a transition to per-lane softstacks and inverting the
uniform-simt mask.


	* internal-fn.c (expand_GOMP_SIMT_ENTER): New.
        (expand_GOMP_SIMT_ENTER_ALLOC): New.
        (expand_GOMP_SIMT_EXIT): New.
        * internal-fn.def (GOMP_SIMT_ENTER): New internal function.
        (GOMP_SIMT_ENTER_ALLOC): Ditto.
        (GOMP_SIMT_EXIT): Ditto.
        * target-insns.def (omp_simt_enter): New insn.
        (omp_simt_exit): Ditto.
        * omp-low.c (struct omplow_simd_context): New fields simt_eargs,
        simt_dlist.
        (lower_rec_simd_input_clauses): Implement SIMT privatization.
        (lower_rec_input_clauses): Likewise.
        (lower_lastprivate_clauses): Handle SIMT privatization.

---
 gcc/internal-fn.c    |  42 ++++++++++++++++
 gcc/internal-fn.def  |   3 ++
 gcc/omp-low.c        | 133 +++++++++++++++++++++++++++++++++++++--------------
 gcc/target-insns.def |   2 +
 4 files changed, 143 insertions(+), 37 deletions(-)

Comments

Jakub Jelinek March 23, 2017, 10:31 a.m. UTC | #1
On Wed, Mar 22, 2017 at 06:46:32PM +0300, Alexander Monakov wrote:
> This patch adjusts privatization in OpenMP SIMD loops lowered for SIMT targets.
> At lowering time, private variables receive "omp simt private" attribute, get
> mentioned in argument list of GOMP_SIMT_ENTER function, and get a clobbering
> assignment just prior to GOMP_SIMT_EXIT function.
> 
> The following patch will implement the second step: privatized variables are
> converted to fields of a struct allocated by a call to GOMP_SIMT_ENTER_ALLOC.
> This function is similar to __builtin_alloca_with_align, except that it
> obtains per-SIMT-lane storage and implicitly performs target-specific actions;
> on NVPTX that means a transition to per-lane softstacks and inverting the
> uniform-simt mask.

Ok for trunk (if all the other patches are acked).

> 	* internal-fn.c (expand_GOMP_SIMT_ENTER): New.
>         (expand_GOMP_SIMT_ENTER_ALLOC): New.
>         (expand_GOMP_SIMT_EXIT): New.
>         * internal-fn.def (GOMP_SIMT_ENTER): New internal function.
>         (GOMP_SIMT_ENTER_ALLOC): Ditto.
>         (GOMP_SIMT_EXIT): Ditto.
>         * target-insns.def (omp_simt_enter): New insn.
>         (omp_simt_exit): Ditto.
>         * omp-low.c (struct omplow_simd_context): New fields simt_eargs,
>         simt_dlist.
>         (lower_rec_simd_input_clauses): Implement SIMT privatization.
>         (lower_rec_input_clauses): Likewise.
>         (lower_lastprivate_clauses): Handle SIMT privatization.

	Jakub
diff mbox

Patch

diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index df7b930..75fe027 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -166,6 +166,48 @@  expand_GOMP_USE_SIMT (internal_fn, gcall *)
   gcc_unreachable ();
 }
 
+/* This should get expanded in omp_device_lower pass.  */
+
+static void
+expand_GOMP_SIMT_ENTER (internal_fn, gcall *)
+{
+  gcc_unreachable ();
+}
+
+/* Allocate per-lane storage and begin non-uniform execution region.  */
+
+static void
+expand_GOMP_SIMT_ENTER_ALLOC (internal_fn, gcall *stmt)
+{
+  rtx target;
+  tree lhs = gimple_call_lhs (stmt);
+  if (lhs)
+    target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  else
+    target = gen_reg_rtx (Pmode);
+  rtx size = expand_normal (gimple_call_arg (stmt, 0));
+  rtx align = expand_normal (gimple_call_arg (stmt, 1));
+  struct expand_operand ops[3];
+  create_output_operand (&ops[0], target, Pmode);
+  create_input_operand (&ops[1], size, Pmode);
+  create_input_operand (&ops[2], align, Pmode);
+  gcc_assert (targetm.have_omp_simt_enter ());
+  expand_insn (targetm.code_for_omp_simt_enter, 3, ops);
+}
+
+/* Deallocate per-lane storage and leave non-uniform execution region.  */
+
+static void
+expand_GOMP_SIMT_EXIT (internal_fn, gcall *stmt)
+{
+  gcc_checking_assert (!gimple_call_lhs (stmt));
+  rtx arg = expand_normal (gimple_call_arg (stmt, 0));
+  struct expand_operand ops[1];
+  create_input_operand (&ops[0], arg, Pmode);
+  gcc_assert (targetm.have_omp_simt_exit ());
+  expand_insn (targetm.code_for_omp_simt_exit, 1, ops);
+}
+
 /* Lane index on SIMT targets: thread index in the warp on NVPTX.  On targets
    without SIMT execution this should be expanded in omp_device_lower pass.  */
 
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 2ba69c9..e162d81 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -142,6 +142,9 @@  DEF_INTERNAL_INT_FN (PARITY, ECF_CONST, parity, unary)
 DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST, popcount, unary)
 
 DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_SIMT_ENTER, ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_SIMT_ENTER_ALLOC, ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_SIMT_EXIT, ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMT_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMT_VF, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMT_LAST_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index c2c69cb..4199668 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -3457,6 +3457,8 @@  omp_clause_aligned_alignment (tree clause)
 struct omplow_simd_context {
   tree idx;
   tree lane;
+  vec<tree, va_heap> simt_eargs;
+  gimple_seq simt_dlist;
   int max_vf;
   bool is_simt;
 };
@@ -3492,18 +3494,39 @@  lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
   if (sctx->max_vf == 1)
     return false;
 
-  tree atype = build_array_type_nelts (TREE_TYPE (new_var), sctx->max_vf);
-  tree avar = create_tmp_var_raw (atype);
-  if (TREE_ADDRESSABLE (new_var))
-    TREE_ADDRESSABLE (avar) = 1;
-  DECL_ATTRIBUTES (avar)
-    = tree_cons (get_identifier ("omp simd array"), NULL,
-		 DECL_ATTRIBUTES (avar));
-  gimple_add_tmp_var (avar);
-  ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->idx,
-		 NULL_TREE, NULL_TREE);
-  lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->lane,
-		 NULL_TREE, NULL_TREE);
+  if (sctx->is_simt)
+    {
+      if (is_gimple_reg (new_var))
+	{
+	  ivar = lvar = new_var;
+	  return true;
+	}
+      tree type = TREE_TYPE (new_var), ptype = build_pointer_type (type);
+      ivar = lvar = create_tmp_var (type);
+      TREE_ADDRESSABLE (ivar) = 1;
+      DECL_ATTRIBUTES (ivar) = tree_cons (get_identifier ("omp simt private"),
+					  NULL, DECL_ATTRIBUTES (ivar));
+      sctx->simt_eargs.safe_push (build1 (ADDR_EXPR, ptype, ivar));
+      tree clobber = build_constructor (type, NULL);
+      TREE_THIS_VOLATILE (clobber) = 1;
+      gimple *g = gimple_build_assign (ivar, clobber);
+      gimple_seq_add_stmt (&sctx->simt_dlist, g);
+    }
+  else
+    {
+      tree atype = build_array_type_nelts (TREE_TYPE (new_var), sctx->max_vf);
+      tree avar = create_tmp_var_raw (atype);
+      if (TREE_ADDRESSABLE (new_var))
+	TREE_ADDRESSABLE (avar) = 1;
+      DECL_ATTRIBUTES (avar)
+	= tree_cons (get_identifier ("omp simd array"), NULL,
+		     DECL_ATTRIBUTES (avar));
+      gimple_add_tmp_var (avar);
+      ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->idx,
+		     NULL_TREE, NULL_TREE);
+      lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->lane,
+		     NULL_TREE, NULL_TREE);
+    }
   if (DECL_P (new_var))
     {
       SET_DECL_VALUE_EXPR (new_var, lvar);
@@ -3547,8 +3570,8 @@  lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
   bool is_simd = (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
 		  && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD);
   omplow_simd_context sctx = omplow_simd_context ();
-  tree simt_lane = NULL_TREE;
-  tree ivar = NULL_TREE, lvar = NULL_TREE;
+  tree simt_lane = NULL_TREE, simtrec = NULL_TREE;
+  tree ivar = NULL_TREE, lvar = NULL_TREE, uid = NULL_TREE;
   gimple_seq llist[3] = { };
 
   copyin_seq = NULL;
@@ -3581,6 +3604,10 @@  lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
 	  continue;
 	}
 
+  /* Add a placeholder for simduid.  */
+  if (sctx.is_simt && sctx.max_vf != 1)
+    sctx.simt_eargs.safe_push (NULL_TREE);
+
   /* Do all the fixed sized types in the first pass, and the variable sized
      types in the second pass.  This makes sure that the scalar arguments to
      the variable sized types are processed before we use them in the
@@ -4468,21 +4495,43 @@  lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
 	}
     }
 
-  if (sctx.lane)
+  if (sctx.max_vf == 1)
+    sctx.is_simt = false;
+
+  if (sctx.lane || sctx.is_simt)
     {
-      tree uid = create_tmp_var (ptr_type_node, "simduid");
+      uid = create_tmp_var (ptr_type_node, "simduid");
       /* Don't want uninit warnings on simduid, it is always uninitialized,
 	 but we use it not for the value, but for the DECL_UID only.  */
       TREE_NO_WARNING (uid) = 1;
+      c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SIMDUID_);
+      OMP_CLAUSE__SIMDUID__DECL (c) = uid;
+      OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (ctx->stmt);
+      gimple_omp_for_set_clauses (ctx->stmt, c);
+    }
+  /* Emit GOMP_SIMT_ENTER () to enter non-uniform execution and allocate
+     privatized data.  Initialize pointers to privatized instances.  */
+  if (sctx.is_simt)
+    {
+      sctx.simt_eargs[0] = uid;
+      gimple *g
+	= gimple_build_call_internal_vec (IFN_GOMP_SIMT_ENTER, sctx.simt_eargs);
+      gimple_call_set_lhs (g, uid);
+      gimple_seq_add_stmt (ilist, g);
+      sctx.simt_eargs.release ();
+
+      simtrec = create_tmp_var (pointer_type_node, ".omp_simt");
+      g = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 1, uid);
+      gimple_call_set_lhs (g, simtrec);
+      gimple_seq_add_stmt (ilist, g);
+    }
+  if (sctx.lane)
+    {
       gimple *g
 	= gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 1, uid);
       gimple_call_set_lhs (g, sctx.lane);
       gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt));
       gsi_insert_before_without_update (&gsi, g, GSI_SAME_STMT);
-      c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SIMDUID_);
-      OMP_CLAUSE__SIMDUID__DECL (c) = uid;
-      OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (ctx->stmt);
-      gimple_omp_for_set_clauses (ctx->stmt, c);
       g = gimple_build_assign (sctx.lane, INTEGER_CST,
 			       build_int_cst (unsigned_type_node, 0));
       gimple_seq_add_stmt (ilist, g);
@@ -4545,6 +4594,13 @@  lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
 	    gimple_seq_add_stmt (seq, gimple_build_label (end));
 	  }
     }
+  if (sctx.is_simt)
+    {
+      gimple_seq_add_seq (dlist, sctx.simt_dlist);
+      gimple *g
+	= gimple_build_call_internal (IFN_GOMP_SIMT_EXIT, 1, simtrec);
+      gimple_seq_add_stmt (dlist, g);
+    }
 
   /* The copyin sequence is not to be executed by the main thread, since
      that would result in self-copies.  Perhaps not visible to scalars,
@@ -4715,7 +4771,8 @@  lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
 	  if (simduid && DECL_HAS_VALUE_EXPR_P (new_var))
 	    {
 	      tree val = DECL_VALUE_EXPR (new_var);
-	      if (TREE_CODE (val) == ARRAY_REF
+	      if (!maybe_simt
+		  && TREE_CODE (val) == ARRAY_REF
 		  && VAR_P (TREE_OPERAND (val, 0))
 		  && lookup_attribute ("omp simd array",
 				       DECL_ATTRIBUTES (TREE_OPERAND (val,
@@ -4734,24 +4791,26 @@  lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
 		  new_var = build4 (ARRAY_REF, TREE_TYPE (val),
 				    TREE_OPERAND (val, 0), lastlane,
 				    NULL_TREE, NULL_TREE);
-		  if (maybe_simt)
+		}
+	      else if (maybe_simt
+		       && VAR_P (val)
+		       && lookup_attribute ("omp simt private",
+					    DECL_ATTRIBUTES (val)))
+		{
+		  if (simtlast == NULL)
 		    {
-		      gcall *g;
-		      if (simtlast == NULL)
-			{
-			  simtlast = create_tmp_var (unsigned_type_node);
-			  g = gimple_build_call_internal
-			    (IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
-			  gimple_call_set_lhs (g, simtlast);
-			  gimple_seq_add_stmt (stmt_list, g);
-			}
-		      x = build_call_expr_internal_loc
-			(UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
-			 TREE_TYPE (new_var), 2, new_var, simtlast);
-		      new_var = unshare_expr (new_var);
-		      gimplify_assign (new_var, x, stmt_list);
-		      new_var = unshare_expr (new_var);
+		      simtlast = create_tmp_var (unsigned_type_node);
+		      gcall *g = gimple_build_call_internal
+			(IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
+		      gimple_call_set_lhs (g, simtlast);
+		      gimple_seq_add_stmt (stmt_list, g);
 		    }
+		  x = build_call_expr_internal_loc
+		    (UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
+		     TREE_TYPE (val), 2, val, simtlast);
+		  new_var = unshare_expr (new_var);
+		  gimplify_assign (new_var, x, stmt_list);
+		  new_var = unshare_expr (new_var);
 		}
 	    }
 
diff --git a/gcc/target-insns.def b/gcc/target-insns.def
index 2968c87..fb92f72 100644
--- a/gcc/target-insns.def
+++ b/gcc/target-insns.def
@@ -68,6 +68,8 @@  DEF_TARGET_INSN (oacc_dim_pos, (rtx x0, rtx x1))
 DEF_TARGET_INSN (oacc_dim_size, (rtx x0, rtx x1))
 DEF_TARGET_INSN (oacc_fork, (rtx x0, rtx x1, rtx x2))
 DEF_TARGET_INSN (oacc_join, (rtx x0, rtx x1, rtx x2))
+DEF_TARGET_INSN (omp_simt_enter, (rtx x0, rtx x1, rtx x2))
+DEF_TARGET_INSN (omp_simt_exit, (rtx x0))
 DEF_TARGET_INSN (omp_simt_lane, (rtx x0))
 DEF_TARGET_INSN (omp_simt_last_lane, (rtx x0, rtx x1))
 DEF_TARGET_INSN (omp_simt_ordered, (rtx x0, rtx x1))