@@ -166,6 +166,40 @@ expand_GOMP_USE_SIMT (internal_fn, gcall *)
gcc_unreachable ();
}
+/* Allocate per-lane storage and begin non-uniform execution region. */
+
+static void
+expand_GOMP_SIMT_ENTER (internal_fn, gcall *stmt)
+{
+ rtx target;
+ tree lhs = gimple_call_lhs (stmt);
+ if (lhs)
+ target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+ else
+ target = gen_reg_rtx (Pmode);
+ rtx size = expand_normal (gimple_call_arg (stmt, 0));
+ rtx align = expand_normal (gimple_call_arg (stmt, 1));
+ struct expand_operand ops[3];
+ create_output_operand (&ops[0], target, Pmode);
+ create_input_operand (&ops[1], size, Pmode);
+ create_input_operand (&ops[2], align, Pmode);
+ gcc_assert (targetm.have_omp_simt_enter ());
+ expand_insn (targetm.code_for_omp_simt_enter, 3, ops);
+}
+
+/* Deallocate per-lane storage and leave non-uniform execution region. */
+
+static void
+expand_GOMP_SIMT_EXIT (internal_fn, gcall *stmt)
+{
+ gcc_checking_assert (!gimple_call_lhs (stmt));
+ rtx arg = expand_normal (gimple_call_arg (stmt, 0));
+ struct expand_operand ops[1];
+ create_input_operand (&ops[0], arg, Pmode);
+ gcc_assert (targetm.have_omp_simt_exit ());
+ expand_insn (targetm.code_for_omp_simt_exit, 1, ops);
+}
+
/* Lane index on SIMT targets: thread index in the warp on NVPTX. On targets
without SIMT execution this should be expanded in omp_device_lower pass. */
@@ -142,6 +142,8 @@ DEF_INTERNAL_INT_FN (PARITY, ECF_CONST, parity, unary)
DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST, popcount, unary)
DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_SIMT_ENTER, ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_SIMT_EXIT, ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_VF, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_LAST_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
@@ -3452,6 +3452,8 @@ omp_clause_aligned_alignment (tree clause)
struct omplow_simd_context {
tree idx;
tree lane;
+ tree simtrec;
+ gimple_seq simt_ilist;
int max_vf;
bool is_simt;
};
@@ -3488,18 +3490,48 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
if (max_vf == 1)
return false;
- tree atype = build_array_type_nelts (TREE_TYPE (new_var), max_vf);
- tree avar = create_tmp_var_raw (atype);
- if (TREE_ADDRESSABLE (new_var))
- TREE_ADDRESSABLE (avar) = 1;
- DECL_ATTRIBUTES (avar)
- = tree_cons (get_identifier ("omp simd array"), NULL,
- DECL_ATTRIBUTES (avar));
- gimple_add_tmp_var (avar);
- ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->idx,
- NULL_TREE, NULL_TREE);
- lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->lane,
- NULL_TREE, NULL_TREE);
+ if (sctx->is_simt)
+ {
+ if (is_gimple_reg (new_var))
+ {
+ ivar = lvar = new_var;
+ return true;
+ }
+ tree field = build_decl (DECL_SOURCE_LOCATION (new_var), FIELD_DECL,
+ DECL_NAME (new_var), TREE_TYPE (new_var));
+ SET_DECL_ALIGN (field, DECL_ALIGN (new_var));
+ DECL_USER_ALIGN (field) = DECL_USER_ALIGN (new_var);
+ TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (new_var);
+ tree rectype = TREE_TYPE (TREE_TYPE (sctx->simtrec));
+ insert_field_into_struct (rectype, field);
+
+ tree ptr = create_tmp_var (build_pointer_type (TREE_TYPE (new_var)));
+ DECL_ATTRIBUTES (ptr)
+ = tree_cons (get_identifier ("omp simt ref"), NULL,
+ DECL_ATTRIBUTES (ptr));
+ ivar = lvar = build1 (INDIRECT_REF, TREE_TYPE (new_var), ptr);
+
+ tree t = build1 (INDIRECT_REF, rectype, sctx->simtrec);
+ t = omp_build_component_ref (t, field);
+ t = build1 (ADDR_EXPR, TREE_TYPE (ptr), t);
+ gimple *g = gimple_build_assign (ptr, t);
+ gimple_seq_add_stmt (&sctx->simt_ilist, g);
+ }
+ else
+ {
+ tree atype = build_array_type_nelts (TREE_TYPE (new_var), max_vf);
+ tree avar = create_tmp_var_raw (atype);
+ if (TREE_ADDRESSABLE (new_var))
+ TREE_ADDRESSABLE (avar) = 1;
+ DECL_ATTRIBUTES (avar)
+ = tree_cons (get_identifier ("omp simd array"), NULL,
+ DECL_ATTRIBUTES (avar));
+ gimple_add_tmp_var (avar);
+ ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->idx,
+ NULL_TREE, NULL_TREE);
+ lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->lane,
+ NULL_TREE, NULL_TREE);
+ }
if (DECL_P (new_var))
{
SET_DECL_VALUE_EXPR (new_var, lvar);
@@ -3577,6 +3609,16 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
continue;
}
+ /* Prepare the structure used for SIMT privatization. */
+ if (sctx.is_simt && sctx.max_vf != 1)
+ {
+ tree type = lang_hooks.types.make_type (RECORD_TYPE);
+ TYPE_ARTIFICIAL (type) = TYPE_NAMELESS (type) = 1;
+ TREE_ADDRESSABLE (type) = 1;
+ type = build_pointer_type (type);
+ sctx.simtrec = create_tmp_var (type, ".omp_simt");
+ }
+
/* Do all the fixed sized types in the first pass, and the variable sized
types in the second pass. This makes sure that the scalar arguments to
the variable sized types are processed before we use them in the
@@ -4464,6 +4506,26 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
}
}
+ /* Emit GOMP_SIMT_ENTER () to enter non-uniform execution and allocate
+ privatized data. Initialize pointers to privatized instances. */
+ if (sctx.is_simt && sctx.max_vf != 1)
+ {
+ tree rectype = TREE_TYPE (TREE_TYPE (sctx.simtrec));
+ layout_type (rectype);
+ tree size = TYPE_SIZE_UNIT (rectype);
+ tree align = build_int_cst (TREE_TYPE (size), TYPE_ALIGN_UNIT (rectype));
+ gimple *g
+ = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER, 2, size, align);
+ gimple_call_set_lhs (g, sctx.simtrec);
+ gimple_seq seq = NULL;
+ gimple_seq_add_stmt (&seq, g);
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_start (sctx.simt_ilist); !gsi_end_p (gsi); gsi_next (&gsi))
+ gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
+ gimple_seq_add_seq (&seq, sctx.simt_ilist);
+ gimple_seq_add_seq (&seq, *ilist);
+ *ilist = seq;
+ }
if (sctx.lane)
{
tree uid = create_tmp_var (ptr_type_node, "simduid");
@@ -4548,6 +4610,17 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
gimple_seq_add_stmt (seq, gimple_build_label (end));
}
}
+ if (sctx.is_simt && sctx.max_vf != 1)
+ {
+ tree rectype = TREE_TYPE (TREE_TYPE (sctx.simtrec));
+ tree clobber = build_constructor (rectype, NULL);
+ TREE_THIS_VOLATILE (clobber) = 1;
+ gimplify_assign (build1 (INDIRECT_REF, rectype, sctx.simtrec), clobber,
+ dlist);
+ gimple *g
+ = gimple_build_call_internal (IFN_GOMP_SIMT_EXIT, 1, sctx.simtrec);
+ gimple_seq_add_stmt (dlist, g);
+ }
/* The copyin sequence is not to be executed by the main thread, since
that would result in self-copies. Perhaps not visible to scalars,
@@ -4718,7 +4791,8 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
if (simduid && DECL_HAS_VALUE_EXPR_P (new_var))
{
tree val = DECL_VALUE_EXPR (new_var);
- if (TREE_CODE (val) == ARRAY_REF
+ if (!maybe_simt
+ && TREE_CODE (val) == ARRAY_REF
&& VAR_P (TREE_OPERAND (val, 0))
&& lookup_attribute ("omp simd array",
DECL_ATTRIBUTES (TREE_OPERAND (val,
@@ -4737,24 +4811,28 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
new_var = build4 (ARRAY_REF, TREE_TYPE (val),
TREE_OPERAND (val, 0), lastlane,
NULL_TREE, NULL_TREE);
- if (maybe_simt)
+ }
+ else if (maybe_simt
+ && TREE_CODE (val) == INDIRECT_REF
+ && VAR_P (TREE_OPERAND (val, 0))
+ && lookup_attribute ("omp simt ref",
+ DECL_ATTRIBUTES (TREE_OPERAND (val,
+ 0))))
+ {
+ if (simtlast == NULL)
{
- gcall *g;
- if (simtlast == NULL)
- {
- simtlast = create_tmp_var (unsigned_type_node);
- g = gimple_build_call_internal
- (IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
- gimple_call_set_lhs (g, simtlast);
- gimple_seq_add_stmt (stmt_list, g);
- }
- x = build_call_expr_internal_loc
- (UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
- TREE_TYPE (new_var), 2, new_var, simtlast);
- new_var = unshare_expr (new_var);
- gimplify_assign (new_var, x, stmt_list);
- new_var = unshare_expr (new_var);
+ simtlast = create_tmp_var (unsigned_type_node);
+ gcall *g = gimple_build_call_internal
+ (IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
+ gimple_call_set_lhs (g, simtlast);
+ gimple_seq_add_stmt (stmt_list, g);
}
+ x = build_call_expr_internal_loc
+ (UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
+ TREE_TYPE (val), 2, val, simtlast);
+ new_var = unshare_expr (new_var);
+ gimplify_assign (new_var, x, stmt_list);
+ new_var = unshare_expr (new_var);
}
}
@@ -68,6 +68,8 @@ DEF_TARGET_INSN (oacc_dim_pos, (rtx x0, rtx x1))
DEF_TARGET_INSN (oacc_dim_size, (rtx x0, rtx x1))
DEF_TARGET_INSN (oacc_fork, (rtx x0, rtx x1, rtx x2))
DEF_TARGET_INSN (oacc_join, (rtx x0, rtx x1, rtx x2))
+DEF_TARGET_INSN (omp_simt_enter, (rtx x0, rtx x1, rtx x2))
+DEF_TARGET_INSN (omp_simt_exit, (rtx x0))
DEF_TARGET_INSN (omp_simt_lane, (rtx x0))
DEF_TARGET_INSN (omp_simt_last_lane, (rtx x0, rtx x1))
DEF_TARGET_INSN (omp_simt_ordered, (rtx x0, rtx x1))