@@ -166,6 +166,48 @@ expand_GOMP_USE_SIMT (internal_fn, gcall *)
gcc_unreachable ();
}
+/* This should get expanded in omp_device_lower pass. */
+
+static void
+expand_GOMP_SIMT_ENTER (internal_fn, gcall *)
+{
+ gcc_unreachable ();
+}
+
+/* Allocate per-lane storage and begin non-uniform execution region. */
+
+static void
+expand_GOMP_SIMT_ENTER_ALLOC (internal_fn, gcall *stmt)
+{
+ rtx target;
+ tree lhs = gimple_call_lhs (stmt);
+ if (lhs)
+ target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+ else
+ target = gen_reg_rtx (Pmode);
+ rtx size = expand_normal (gimple_call_arg (stmt, 0));
+ rtx align = expand_normal (gimple_call_arg (stmt, 1));
+ struct expand_operand ops[3];
+ create_output_operand (&ops[0], target, Pmode);
+ create_input_operand (&ops[1], size, Pmode);
+ create_input_operand (&ops[2], align, Pmode);
+ gcc_assert (targetm.have_omp_simt_enter ());
+ expand_insn (targetm.code_for_omp_simt_enter, 3, ops);
+}
+
+/* Deallocate per-lane storage and leave non-uniform execution region. */
+
+static void
+expand_GOMP_SIMT_EXIT (internal_fn, gcall *stmt)
+{
+ gcc_checking_assert (!gimple_call_lhs (stmt));
+ rtx arg = expand_normal (gimple_call_arg (stmt, 0));
+ struct expand_operand ops[1];
+ create_input_operand (&ops[0], arg, Pmode);
+ gcc_assert (targetm.have_omp_simt_exit ());
+ expand_insn (targetm.code_for_omp_simt_exit, 1, ops);
+}
+
/* Lane index on SIMT targets: thread index in the warp on NVPTX. On targets
without SIMT execution this should be expanded in omp_device_lower pass. */
@@ -142,6 +142,9 @@ DEF_INTERNAL_INT_FN (PARITY, ECF_CONST, parity, unary)
DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST, popcount, unary)
DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_SIMT_ENTER, ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_SIMT_ENTER_ALLOC, ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_SIMT_EXIT, ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_VF, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_LAST_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
@@ -3457,6 +3457,8 @@ omp_clause_aligned_alignment (tree clause)
struct omplow_simd_context {
tree idx;
tree lane;
+ vec<tree, va_heap> simt_eargs;
+ gimple_seq simt_dlist;
int max_vf;
bool is_simt;
};
@@ -3492,18 +3494,39 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
if (sctx->max_vf == 1)
return false;
- tree atype = build_array_type_nelts (TREE_TYPE (new_var), sctx->max_vf);
- tree avar = create_tmp_var_raw (atype);
- if (TREE_ADDRESSABLE (new_var))
- TREE_ADDRESSABLE (avar) = 1;
- DECL_ATTRIBUTES (avar)
- = tree_cons (get_identifier ("omp simd array"), NULL,
- DECL_ATTRIBUTES (avar));
- gimple_add_tmp_var (avar);
- ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->idx,
- NULL_TREE, NULL_TREE);
- lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->lane,
- NULL_TREE, NULL_TREE);
+ if (sctx->is_simt)
+ {
+ if (is_gimple_reg (new_var))
+ {
+ ivar = lvar = new_var;
+ return true;
+ }
+ tree type = TREE_TYPE (new_var), ptype = build_pointer_type (type);
+ ivar = lvar = create_tmp_var (type);
+ TREE_ADDRESSABLE (ivar) = 1;
+ DECL_ATTRIBUTES (ivar) = tree_cons (get_identifier ("omp simt private"),
+ NULL, DECL_ATTRIBUTES (ivar));
+ sctx->simt_eargs.safe_push (build1 (ADDR_EXPR, ptype, ivar));
+ tree clobber = build_constructor (type, NULL);
+ TREE_THIS_VOLATILE (clobber) = 1;
+ gimple *g = gimple_build_assign (ivar, clobber);
+ gimple_seq_add_stmt (&sctx->simt_dlist, g);
+ }
+ else
+ {
+ tree atype = build_array_type_nelts (TREE_TYPE (new_var), sctx->max_vf);
+ tree avar = create_tmp_var_raw (atype);
+ if (TREE_ADDRESSABLE (new_var))
+ TREE_ADDRESSABLE (avar) = 1;
+ DECL_ATTRIBUTES (avar)
+ = tree_cons (get_identifier ("omp simd array"), NULL,
+ DECL_ATTRIBUTES (avar));
+ gimple_add_tmp_var (avar);
+ ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->idx,
+ NULL_TREE, NULL_TREE);
+ lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->lane,
+ NULL_TREE, NULL_TREE);
+ }
if (DECL_P (new_var))
{
SET_DECL_VALUE_EXPR (new_var, lvar);
@@ -3547,8 +3570,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
bool is_simd = (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
&& gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD);
omplow_simd_context sctx = omplow_simd_context ();
- tree simt_lane = NULL_TREE;
- tree ivar = NULL_TREE, lvar = NULL_TREE;
+ tree simt_lane = NULL_TREE, simtrec = NULL_TREE;
+ tree ivar = NULL_TREE, lvar = NULL_TREE, uid = NULL_TREE;
gimple_seq llist[3] = { };
copyin_seq = NULL;
@@ -3581,6 +3604,10 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
continue;
}
+ /* Add a placeholder for simduid. */
+ if (sctx.is_simt && sctx.max_vf != 1)
+ sctx.simt_eargs.safe_push (NULL_TREE);
+
/* Do all the fixed sized types in the first pass, and the variable sized
types in the second pass. This makes sure that the scalar arguments to
the variable sized types are processed before we use them in the
@@ -4468,21 +4495,43 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
}
}
- if (sctx.lane)
+ if (sctx.max_vf == 1)
+ sctx.is_simt = false;
+
+ if (sctx.lane || sctx.is_simt)
{
- tree uid = create_tmp_var (ptr_type_node, "simduid");
+ uid = create_tmp_var (ptr_type_node, "simduid");
/* Don't want uninit warnings on simduid, it is always uninitialized,
but we use it not for the value, but for the DECL_UID only. */
TREE_NO_WARNING (uid) = 1;
+ c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SIMDUID_);
+ OMP_CLAUSE__SIMDUID__DECL (c) = uid;
+ OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (ctx->stmt);
+ gimple_omp_for_set_clauses (ctx->stmt, c);
+ }
+ /* Emit GOMP_SIMT_ENTER () to enter non-uniform execution and allocate
+ privatized data. Initialize pointers to privatized instances. */
+ if (sctx.is_simt)
+ {
+ sctx.simt_eargs[0] = uid;
+ gimple *g
+ = gimple_build_call_internal_vec (IFN_GOMP_SIMT_ENTER, sctx.simt_eargs);
+ gimple_call_set_lhs (g, uid);
+ gimple_seq_add_stmt (ilist, g);
+ sctx.simt_eargs.release ();
+
+ simtrec = create_tmp_var (pointer_type_node, ".omp_simt");
+ g = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 1, uid);
+ gimple_call_set_lhs (g, simtrec);
+ gimple_seq_add_stmt (ilist, g);
+ }
+ if (sctx.lane)
+ {
gimple *g
= gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 1, uid);
gimple_call_set_lhs (g, sctx.lane);
gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt));
gsi_insert_before_without_update (&gsi, g, GSI_SAME_STMT);
- c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SIMDUID_);
- OMP_CLAUSE__SIMDUID__DECL (c) = uid;
- OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (ctx->stmt);
- gimple_omp_for_set_clauses (ctx->stmt, c);
g = gimple_build_assign (sctx.lane, INTEGER_CST,
build_int_cst (unsigned_type_node, 0));
gimple_seq_add_stmt (ilist, g);
@@ -4545,6 +4594,13 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
gimple_seq_add_stmt (seq, gimple_build_label (end));
}
}
+ if (sctx.is_simt)
+ {
+ gimple_seq_add_seq (dlist, sctx.simt_dlist);
+ gimple *g
+ = gimple_build_call_internal (IFN_GOMP_SIMT_EXIT, 1, simtrec);
+ gimple_seq_add_stmt (dlist, g);
+ }
/* The copyin sequence is not to be executed by the main thread, since
that would result in self-copies. Perhaps not visible to scalars,
@@ -4715,7 +4771,8 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
if (simduid && DECL_HAS_VALUE_EXPR_P (new_var))
{
tree val = DECL_VALUE_EXPR (new_var);
- if (TREE_CODE (val) == ARRAY_REF
+ if (!maybe_simt
+ && TREE_CODE (val) == ARRAY_REF
&& VAR_P (TREE_OPERAND (val, 0))
&& lookup_attribute ("omp simd array",
DECL_ATTRIBUTES (TREE_OPERAND (val,
@@ -4734,24 +4791,26 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
new_var = build4 (ARRAY_REF, TREE_TYPE (val),
TREE_OPERAND (val, 0), lastlane,
NULL_TREE, NULL_TREE);
- if (maybe_simt)
+ }
+ else if (maybe_simt
+ && VAR_P (val)
+ && lookup_attribute ("omp simt private",
+ DECL_ATTRIBUTES (val)))
+ {
+ if (simtlast == NULL)
{
- gcall *g;
- if (simtlast == NULL)
- {
- simtlast = create_tmp_var (unsigned_type_node);
- g = gimple_build_call_internal
- (IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
- gimple_call_set_lhs (g, simtlast);
- gimple_seq_add_stmt (stmt_list, g);
- }
- x = build_call_expr_internal_loc
- (UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
- TREE_TYPE (new_var), 2, new_var, simtlast);
- new_var = unshare_expr (new_var);
- gimplify_assign (new_var, x, stmt_list);
- new_var = unshare_expr (new_var);
+ simtlast = create_tmp_var (unsigned_type_node);
+ gcall *g = gimple_build_call_internal
+ (IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
+ gimple_call_set_lhs (g, simtlast);
+ gimple_seq_add_stmt (stmt_list, g);
}
+ x = build_call_expr_internal_loc
+ (UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
+ TREE_TYPE (val), 2, val, simtlast);
+ new_var = unshare_expr (new_var);
+ gimplify_assign (new_var, x, stmt_list);
+ new_var = unshare_expr (new_var);
}
}
@@ -68,6 +68,8 @@ DEF_TARGET_INSN (oacc_dim_pos, (rtx x0, rtx x1))
DEF_TARGET_INSN (oacc_dim_size, (rtx x0, rtx x1))
DEF_TARGET_INSN (oacc_fork, (rtx x0, rtx x1, rtx x2))
DEF_TARGET_INSN (oacc_join, (rtx x0, rtx x1, rtx x2))
+DEF_TARGET_INSN (omp_simt_enter, (rtx x0, rtx x1, rtx x2))
+DEF_TARGET_INSN (omp_simt_exit, (rtx x0))
DEF_TARGET_INSN (omp_simt_lane, (rtx x0))
DEF_TARGET_INSN (omp_simt_last_lane, (rtx x0, rtx x1))
DEF_TARGET_INSN (omp_simt_ordered, (rtx x0, rtx x1))