2016-04-06 Cesar Philippidis <cesar@codesourcery.com>
PR lto/70289
gcc/
* gimplify.c (gimplify_adjust_acc_parallel_reductions): New function.
(gimplify_omp_workshare): Call it. Add new data clauses for acc
parallel reductions as needed.
* omp-low.c (is_oacc_parallel_reduction): New function.
(scan_sharing_clauses): Use it to prevent installing local variables
for those used in acc parallel reductions.
(lower_rec_input_clauses): Remove dead code.
(lower_oacc_reductions): Add support for reference reductions.
(lower_reduction_clauses): Remove dead code.
(lower_omp_target): Don't remap variables appearing in acc parallel
reductions.
* gcc/tree.h (OMP_CLAUSE_MAP_IN_REDUCTION): New macro.
gcc/testsuite/
* c-c++-common/goacc/reduction-5.c: New test.
* c-c++-common/goacc/reduction-promotions.c: New test.
* gfortran.dg/goacc/reduction-3.f95: New test.
* gfortran.dg/goacc/reduction-promotions.f90: New test.
libgomp/
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c: New test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c: New test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c: New test.
* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c: Increase
test coverage.
* testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c: New test.
* testsuite/libgomp.oacc-c-c++-common/pr70289.c: New test.
* testsuite/libgomp.oacc-c-c++-common/pr70373.c: New test.
* testsuite/libgomp.oacc-c-c++-common/reduction-1.c: Increate test
coverage.
* testsuite/libgomp.oacc-c-c++-common/reduction-2.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/reduction-3.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/reduction-4.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/reduction-6.c: New test.
* testsuite/libgomp.oacc-c-c++-common/reduction.h: New test.
* testsuite/libgomp.oacc-fortran/parallel-reduction.f90: New test.
* testsuite/libgomp.oacc-fortran/pr70289.f90: New test.
* testsuite/libgomp.oacc-fortran/reduction-1.f90: Increate test
coverage.
* testsuite/libgomp.oacc-fortran/reduction-2.f90: Likewise.
* testsuite/libgomp.oacc-fortran/reduction-3.f90: Likewise.
* testsuite/libgomp.oacc-fortran/reduction-4.f90: Likewise.
* testsuite/libgomp.oacc-fortran/reduction-5.f90: Likewise.
* testsuite/libgomp.oacc-fortran/reduction-6.f90: Likewise.
* testsuite/libgomp.oacc-fortran/reduction-7.f90: New test.
@@ -9484,6 +9484,123 @@ optimize_target_teams (tree target, gimple_seq *pre_p)
OMP_TARGET_CLAUSES (target) = c;
}
+/* OpenACC parallel reductions need a present_or_copy clause to ensure
+ that the original variable used in the reduction gets updated on
+ the host. This function scans CLAUSES for reductions and adds or
+ adjusts the data clauses as necessary. Any incompatible data clause
+ will be reported as a warning and promoted to present_or_copy. Any
+ private reduction will be treated as an error. This function
+ returns a list of new present_or_copy data clauses. */
+
+static tree
+gimplify_adjust_acc_parallel_reductions (tree *clauses)
+{
+ tree c, list = NULL_TREE;
+ hash_set<tree> *reduction_decls, *pointer_decls;
+ reduction_decls = new hash_set<tree>;
+ pointer_decls = new hash_set<tree>;
+
+ /* Scan 1: Construct a hash set with all of the reduction decls. */
+ for (c = *clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ {
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
+ {
+ reduction_decls->add (OMP_CLAUSE_DECL (c));
+ if (POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c))))
+ pointer_decls->add (OMP_CLAUSE_DECL (c));
+ }
+ }
+
+ if (reduction_decls->elements () == 0)
+ goto cleanup;
+
+ /* Scan 2: Adjust the data clause for each reduction. */
+ for (c = *clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ {
+ int kind = -1;
+ tree decl;
+
+ switch (OMP_CLAUSE_CODE (c))
+ {
+ case OMP_CLAUSE_MAP:
+ kind = OMP_CLAUSE_MAP_KIND (c);
+ case OMP_CLAUSE_PRIVATE:
+ case OMP_CLAUSE_FIRSTPRIVATE:
+ decl = OMP_CLAUSE_DECL (c);
+
+ if (!DECL_P (decl))
+ decl = TREE_OPERAND (decl, 0);
+ gcc_assert (DECL_P (decl));
+
+ /* Reference variables always have a GOMP_MAP_POINTER. Mark
+ that clause as IN_REDUCTION, and ignore it. */
+ if (POINTER_TYPE_P (TREE_TYPE (decl))
+ && kind == GOMP_MAP_POINTER
+ && pointer_decls->contains (decl))
+ {
+ OMP_CLAUSE_MAP_IN_REDUCTION (c) = 1;
+ break;
+ }
+
+ if (!reduction_decls->contains (decl))
+ break;
+
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP)
+ {
+ if (!pointer_decls->contains (decl))
+ OMP_CLAUSE_MAP_IN_REDUCTION(c) = 1;
+
+ if (!((kind & GOMP_MAP_TOFROM) == GOMP_MAP_TOFROM
+ || kind == GOMP_MAP_FORCE_PRESENT))
+ {
+ warning_at (OMP_CLAUSE_LOCATION (c), 0, "incompatible data "
+ "clause with reduction on %qE; promoting to "
+ "present_or_copy", DECL_NAME (decl));
+
+ OMP_CLAUSE_CODE (c) = OMP_CLAUSE_MAP;
+ OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_TOFROM);
+ }
+ reduction_decls->remove (decl);
+ break;
+ }
+
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE
+ || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
+ {
+ error_at (OMP_CLAUSE_LOCATION (c), "invalid private reduction "
+ "on %qE", DECL_NAME (decl));
+ reduction_decls->remove (decl);
+ }
+ default:;
+ }
+ }
+
+ if (reduction_decls->elements () == 0)
+ goto cleanup;
+
+ /* Scan 3: Add a present_or_copy clause for any reduction variable which
+ doens't have a data clause already. */
+ for (hash_set<tree>::iterator iter = reduction_decls->begin ();
+ iter != reduction_decls->end (); ++iter)
+ {
+ tree decl = *iter;
+
+ tree nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_MAP);
+ OMP_CLAUSE_SET_MAP_KIND (nc, GOMP_MAP_TOFROM);
+ OMP_CLAUSE_DECL (nc) = decl;
+ if (!POINTER_TYPE_P (TREE_TYPE (decl)))
+ OMP_CLAUSE_MAP_IN_REDUCTION (nc) = 1;
+ TREE_CHAIN (nc) = list;
+ list = nc;
+ }
+
+ cleanup:
+ delete reduction_decls;
+ delete pointer_decls;
+
+ return list;
+}
+
/* Gimplify the gross structure of several OMP constructs. */
static void
@@ -9491,6 +9608,7 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p)
{
tree expr = *expr_p;
gimple *stmt;
+ tree acc_reductions = NULL_TREE;
gimple_seq body = NULL;
enum omp_region_type ort;
@@ -9508,6 +9626,8 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p)
break;
case OACC_PARALLEL:
ort = ORT_ACC_PARALLEL;
+ acc_reductions
+ = gimplify_adjust_acc_parallel_reductions (&OMP_CLAUSES (expr));
break;
case OACC_DATA:
ort = ORT_ACC_DATA;
@@ -9606,6 +9726,48 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p)
gimplify_seq_add_stmt (pre_p, stmt);
*expr_p = NULL_TREE;
+
+ /* Finalize any parallel acc reductions. */
+ if (acc_reductions)
+ {
+ tree c, nc, t;
+ tree clauses = NULL_TREE;
+
+ c = nc = acc_reductions;
+
+ while (c)
+ {
+ nc = OMP_CLAUSE_CHAIN (c);
+ OMP_CLAUSE_CHAIN (c) = NULL_TREE;
+ lang_hooks.decls.omp_finish_clause (c, pre_p);
+
+ /* Find the last data clause introduced by omp_finish_decls,
+ marking any pointer data maps as IN_REDUCTION. */
+ for (t = c; t; t = TREE_CHAIN (t))
+ {
+ if (POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (t))))
+ OMP_CLAUSE_MAP_IN_REDUCTION (t) = 1;
+
+ if (TREE_CHAIN (t) == NULL_TREE)
+ break;
+ }
+
+ /* Update the chain of clauses. */
+ TREE_CHAIN (t) = clauses;
+ clauses = c;
+
+ c = nc;
+ }
+
+ /* Update the list of clauses in the gimple stmt. */
+ for (t = gimple_omp_target_clauses (stmt); OMP_CLAUSE_CHAIN (t);
+ t = OMP_CLAUSE_CHAIN (t))
+ ;
+
+ OMP_CLAUSE_CHAIN (t) = clauses;
+ }
+
+ return;
}
/* Gimplify the gross structure of OpenACC enter/exit data, update, and OpenMP
@@ -2122,7 +2122,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
else
install_var_field (decl, true, 3, ctx,
base_pointers_restrict);
- if (is_gimple_omp_offloaded (ctx->stmt))
+ if (is_gimple_omp_offloaded (ctx->stmt)
+ && !OMP_CLAUSE_MAP_IN_REDUCTION (c))
install_var_local (decl, ctx);
}
}
@@ -4837,7 +4838,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
gimplify_assign (ptr, x, ilist);
}
}
- else if (is_reference (var) && !is_oacc_parallel (ctx))
+ else if (is_reference (var))
{
/* For references that are being privatized for Fortran,
allocate new backing storage for the new pointer
@@ -5573,7 +5574,8 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
tree orig = OMP_CLAUSE_DECL (c);
tree var = maybe_lookup_decl (orig, ctx);
tree ref_to_res = NULL_TREE;
- tree incoming, outgoing;
+ tree incoming, outgoing, v1, v2, v3;
+ bool is_private = false;
enum tree_code rcode = OMP_CLAUSE_REDUCTION_CODE (c);
if (rcode == MINUS_EXPR)
@@ -5586,7 +5588,6 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
if (!var)
var = orig;
- gcc_assert (!is_reference (var));
incoming = outgoing = var;
@@ -5622,22 +5623,38 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
for (; cls; cls = OMP_CLAUSE_CHAIN (cls))
if (OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_REDUCTION
&& orig == OMP_CLAUSE_DECL (cls))
- goto has_outer_reduction;
+ {
+ incoming = outgoing = lookup_decl (orig, probe);
+ goto has_outer_reduction;
+ }
+ else if ((OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_FIRSTPRIVATE
+ || OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_PRIVATE)
+ && orig == OMP_CLAUSE_DECL (cls))
+ {
+ is_private = true;
+ goto do_lookup;
+ }
}
do_lookup:
/* This is the outermost construct with this reduction,
see if there's a mapping for it. */
if (gimple_code (outer->stmt) == GIMPLE_OMP_TARGET
- && maybe_lookup_field (orig, outer))
+ && maybe_lookup_field (orig, outer) && !is_private)
{
ref_to_res = build_receiver_ref (orig, false, outer);
if (is_reference (orig))
ref_to_res = build_simple_mem_ref (ref_to_res);
+ tree type = TREE_TYPE (var);
+ if (POINTER_TYPE_P (type))
+ type = TREE_TYPE (type);
+
outgoing = var;
- incoming = omp_reduction_init_op (loc, rcode, TREE_TYPE (var));
+ incoming = omp_reduction_init_op (loc, rcode, type);
}
+ else if (ctx->outer)
+ incoming = outgoing = lookup_decl (orig, ctx->outer);
else
incoming = outgoing = orig;
@@ -5647,6 +5664,37 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
if (!ref_to_res)
ref_to_res = integer_zero_node;
+ if (is_reference (orig))
+ {
+ tree type = TREE_TYPE (var);
+ const char *id = IDENTIFIER_POINTER (DECL_NAME (var));
+
+ if (!inner)
+ {
+ tree x = create_tmp_var (TREE_TYPE (type), id);
+ gimplify_assign (var, build_fold_addr_expr (x), fork_seq);
+ }
+
+ v1 = create_tmp_var (type, id);
+ v2 = create_tmp_var (type, id);
+ v3 = create_tmp_var (type, id);
+
+ gimplify_assign (v1, var, fork_seq);
+ gimplify_assign (v2, var, fork_seq);
+ gimplify_assign (v3, var, fork_seq);
+
+ var = build_simple_mem_ref (var);
+ v1 = build_simple_mem_ref (v1);
+ v2 = build_simple_mem_ref (v2);
+ v3 = build_simple_mem_ref (v3);
+ outgoing = build_simple_mem_ref (outgoing);
+
+ if (TREE_CODE (incoming) != INTEGER_CST)
+ incoming = build_simple_mem_ref (incoming);
+ }
+ else
+ v1 = v2 = v3 = var;
+
/* Determine position in reduction buffer, which may be used
by target. */
enum machine_mode mode = TYPE_MODE (TREE_TYPE (var));
@@ -5676,20 +5724,20 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
TREE_TYPE (var), 6, init_code,
unshare_expr (ref_to_res),
- var, level, op, off);
+ v1, level, op, off);
tree fini_call
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
TREE_TYPE (var), 6, fini_code,
unshare_expr (ref_to_res),
- var, level, op, off);
+ v2, level, op, off);
tree teardown_call
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
TREE_TYPE (var), 6, teardown_code,
- ref_to_res, var, level, op, off);
+ ref_to_res, v3, level, op, off);
- gimplify_assign (var, setup_call, &before_fork);
- gimplify_assign (var, init_call, &after_fork);
- gimplify_assign (var, fini_call, &before_join);
+ gimplify_assign (v1, setup_call, &before_fork);
+ gimplify_assign (v2, init_call, &after_fork);
+ gimplify_assign (v3, fini_call, &before_join);
gimplify_assign (outgoing, teardown_call, &after_join);
}
@@ -5931,9 +5979,6 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx)
}
}
- if (is_gimple_omp_oacc (ctx->stmt))
- return;
-
stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START),
0);
gimple_seq_add_stmt (stmt_seqp, stmt);
@@ -15820,7 +15865,10 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
if (!maybe_lookup_field (var, ctx))
continue;
- if (offloaded)
+ /* Don't remap oacc parallel reduction variables, because the
+ intermediate result must be local to each gang. */
+ if (offloaded && !(OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
+ && OMP_CLAUSE_MAP_IN_REDUCTION(c)))
{
x = build_receiver_ref (var, true, ctx);
tree new_var = lookup_decl (var, ctx);
@@ -1532,6 +1532,9 @@ extern void protected_set_expr_location (tree, location_t);
treatment if OMP_CLAUSE_SIZE is zero. */
#define OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION(NODE) \
TREE_PROTECTED (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP))
+/* Nonzero if this map clause is for an ACC parallel reduction variable. */
+#define OMP_CLAUSE_MAP_IN_REDUCTION(NODE) \
+ TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP))
#define OMP_CLAUSE_PROC_BIND_KIND(NODE) \
(OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_PROC_BIND)->omp_clause.subcode.proc_bind_kind)