@@ -644,6 +644,8 @@ DEF_FUNCTION_TYPE_4 (BT_FN_INT_FILEPTR_I
BT_INT, BT_FILEPTR, BT_INT, BT_CONST_STRING, BT_VALIST_ARG)
DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_UINT_UINT,
BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT)
+DEF_FUNCTION_TYPE_4 (BT_FN_UINT_OMPFN_PTR_UINT_UINT,
+ BT_UINT, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT)
DEF_FUNCTION_TYPE_4 (BT_FN_VOID_PTR_WORD_WORD_PTR,
BT_VOID, BT_PTR, BT_WORD, BT_WORD, BT_PTR)
DEF_FUNCTION_TYPE_4 (BT_FN_VOID_SIZE_VPTR_PTR_INT, BT_VOID, BT_SIZE,
@@ -315,6 +315,9 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_DOACROSS
BT_FN_VOID_ULL_VAR, ATTR_NOTHROW_LEAF_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_PARALLEL, "GOMP_parallel",
BT_FN_VOID_OMPFN_PTR_UINT_UINT, ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_PARALLEL_REDUCTIONS,
+ "GOMP_parallel_reductions",
+ BT_FN_UINT_OMPFN_PTR_UINT_UINT, ATTR_NOTHROW_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASK, "GOMP_task",
BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT,
ATTR_NOTHROW_LIST)
@@ -1097,8 +1097,9 @@ scan_sharing_clauses (tree clauses, omp_
&& (TREE_CODE (TREE_TYPE (TREE_TYPE (t)))
== POINTER_TYPE)))))
&& !is_variable_sized (t)
- && (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IN_REDUCTION
- || !is_task_ctx (ctx)))
+ && (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION
+ || (!OMP_CLAUSE_REDUCTION_TASK (c)
+ && !is_task_ctx (ctx))))
{
by_ref = use_pointer_for_field (t, NULL);
if (is_task_ctx (ctx)
@@ -1113,7 +1114,10 @@ scan_sharing_clauses (tree clauses, omp_
}
break;
}
- if (is_task_ctx (ctx))
+ if (is_task_ctx (ctx)
+ || (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
+ && OMP_CLAUSE_REDUCTION_TASK (c)
+ && is_parallel_ctx (ctx)))
{
/* Global variables don't need to be copied,
the receiver side will use them directly. */
@@ -1851,6 +1855,23 @@ scan_omp_parallel (gimple_stmt_iterator
if (gimple_omp_parallel_combined_p (stmt))
add_taskreg_looptemp_clauses (GF_OMP_FOR_KIND_FOR, stmt, outer_ctx);
+ for (tree c = omp_find_clause (gimple_omp_parallel_clauses (stmt),
+ OMP_CLAUSE_REDUCTION);
+ c; c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE_REDUCTION))
+ if (OMP_CLAUSE_REDUCTION_TASK (c))
+ {
+ tree type = build_pointer_type (pointer_sized_int_node);
+ tree temp = create_tmp_var (type);
+ tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__REDUCTEMP_);
+ if (outer_ctx)
+ insert_decl_map (&outer_ctx->cb, temp, temp);
+ OMP_CLAUSE_DECL (c) = temp;
+ OMP_CLAUSE_CHAIN (c) = gimple_omp_parallel_clauses (stmt);
+ gimple_omp_parallel_set_clauses (stmt, c);
+ break;
+ }
+ else if (OMP_CLAUSE_CHAIN (c) == NULL_TREE)
+ break;
ctx = new_omp_context (stmt, outer_ctx);
taskreg_contexts.safe_push (ctx);
@@ -2029,8 +2050,31 @@ finish_taskreg_scan (omp_context *ctx)
}
}
- if (gimple_code (ctx->stmt) == GIMPLE_OMP_PARALLEL
- || gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
+ if (gimple_code (ctx->stmt) == GIMPLE_OMP_PARALLEL)
+ {
+ tree clauses = gimple_omp_parallel_clauses (ctx->stmt);
+ tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
+ if (c)
+ {
+ /* Move the _reductemp_ clause first. GOMP_parallel_reductions
+ expects to find it at the start of data. */
+ tree f = lookup_field (OMP_CLAUSE_DECL (c), ctx);
+ tree *p = &TYPE_FIELDS (ctx->record_type);
+ while (*p)
+ if (*p == f)
+ {
+ *p = DECL_CHAIN (*p);
+ break;
+ }
+ else
+ p = &DECL_CHAIN (*p);
+ DECL_CHAIN (f) = TYPE_FIELDS (ctx->record_type);
+ TYPE_FIELDS (ctx->record_type) = f;
+ }
+ layout_type (ctx->record_type);
+ fixup_child_record_type (ctx);
+ }
+ else if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
{
layout_type (ctx->record_type);
fixup_child_record_type (ctx);
@@ -3846,9 +3890,8 @@ lower_rec_input_clauses (tree clauses, g
break;
case OMP_CLAUSE_REDUCTION:
case OMP_CLAUSE_IN_REDUCTION:
- if (OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c))
- reduction_omp_orig_ref = true;
- if (is_task_ctx (ctx) /* || OMP_CLAUSE_REDUCTION_TASK (c) */)
+ if (is_task_ctx (ctx)
+ || (OMP_CLAUSE_REDUCTION_TASK (c) && is_parallel_ctx (ctx)))
{
task_reduction_p = true;
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
@@ -3877,6 +3920,8 @@ lower_rec_input_clauses (tree clauses, g
}
}
}
+ else if (OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c))
+ reduction_omp_orig_ref = true;
break;
case OMP_CLAUSE__LOOPTEMP_:
case OMP_CLAUSE__REDUCTEMP_:
@@ -4215,15 +4260,19 @@ lower_rec_input_clauses (tree clauses, g
tree end = create_artificial_label (UNKNOWN_LOCATION);
if (cond)
{
- tree condv = create_tmp_var (boolean_type_node);
- gimple *g
- = gimple_build_assign (condv, build_simple_mem_ref (cond));
- gimple_seq_add_stmt (ilist, g);
- tree lab1 = create_artificial_label (UNKNOWN_LOCATION);
- g = gimple_build_cond (NE_EXPR, condv,
- boolean_false_node, end, lab1);
- gimple_seq_add_stmt (ilist, g);
- gimple_seq_add_stmt (ilist, gimple_build_label (lab1));
+ gimple *g;
+ if (!is_parallel_ctx (ctx))
+ {
+ tree condv = create_tmp_var (boolean_type_node);
+ g = gimple_build_assign (condv,
+ build_simple_mem_ref (cond));
+ gimple_seq_add_stmt (ilist, g);
+ tree lab1 = create_artificial_label (UNKNOWN_LOCATION);
+ g = gimple_build_cond (NE_EXPR, condv,
+ boolean_false_node, end, lab1);
+ gimple_seq_add_stmt (ilist, g);
+ gimple_seq_add_stmt (ilist, gimple_build_label (lab1));
+ }
g = gimple_build_assign (build_simple_mem_ref (cond),
boolean_true_node);
gimple_seq_add_stmt (ilist, g);
@@ -4920,17 +4969,23 @@ lower_rec_input_clauses (tree clauses, g
tree lab2 = NULL_TREE;
if (cond)
{
- tree condv = create_tmp_var (boolean_type_node);
- gimple *g
- = gimple_build_assign (condv,
- build_simple_mem_ref (cond));
- gimple_seq_add_stmt (ilist, g);
- tree lab1 = create_artificial_label (UNKNOWN_LOCATION);
- lab2 = create_artificial_label (UNKNOWN_LOCATION);
- g = gimple_build_cond (NE_EXPR, condv,
- boolean_false_node, lab2, lab1);
- gimple_seq_add_stmt (ilist, g);
- gimple_seq_add_stmt (ilist, gimple_build_label (lab1));
+ gimple *g;
+ if (!is_parallel_ctx (ctx))
+ {
+ tree condv = create_tmp_var (boolean_type_node);
+ tree m = build_simple_mem_ref (cond);
+ g = gimple_build_assign (condv, m);
+ gimple_seq_add_stmt (ilist, g);
+ tree lab1
+ = create_artificial_label (UNKNOWN_LOCATION);
+ lab2 = create_artificial_label (UNKNOWN_LOCATION);
+ g = gimple_build_cond (NE_EXPR, condv,
+ boolean_false_node,
+ lab2, lab1);
+ gimple_seq_add_stmt (ilist, g);
+ gimple_seq_add_stmt (ilist,
+ gimple_build_label (lab1));
+ }
g = gimple_build_assign (build_simple_mem_ref (cond),
boolean_true_node);
gimple_seq_add_stmt (ilist, g);
@@ -4958,7 +5013,8 @@ lower_rec_input_clauses (tree clauses, g
DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
if (cond)
{
- gimple_seq_add_stmt (ilist, gimple_build_label (lab2));
+ if (lab2)
+ gimple_seq_add_stmt (ilist, gimple_build_label (lab2));
break;
}
goto do_dtor;
@@ -4972,6 +5028,7 @@ lower_rec_input_clauses (tree clauses, g
if (cond)
{
gimple *g;
+ tree lab2 = NULL_TREE;
/* GOMP_taskgroup_reduction_register memsets the whole
array to zero. If the initializer is zero, we don't
need to initialize it again, just mark it as ever
@@ -4986,21 +5043,28 @@ lower_rec_input_clauses (tree clauses, g
/* Otherwise, emit
if (!cond) { cond = true; new_var = x; } */
- tree condv = create_tmp_var (boolean_type_node);
- g = gimple_build_assign (condv,
- build_simple_mem_ref (cond));
- gimple_seq_add_stmt (ilist, g);
- tree lab1 = create_artificial_label (UNKNOWN_LOCATION);
- tree lab2 = create_artificial_label (UNKNOWN_LOCATION);
- g = gimple_build_cond (NE_EXPR, condv,
- boolean_false_node, lab2, lab1);
- gimple_seq_add_stmt (ilist, g);
- gimple_seq_add_stmt (ilist, gimple_build_label (lab1));
+ if (!is_parallel_ctx (ctx))
+ {
+ tree condv = create_tmp_var (boolean_type_node);
+ tree m = build_simple_mem_ref (cond);
+ g = gimple_build_assign (condv, m);
+ gimple_seq_add_stmt (ilist, g);
+ tree lab1
+ = create_artificial_label (UNKNOWN_LOCATION);
+ lab2 = create_artificial_label (UNKNOWN_LOCATION);
+ g = gimple_build_cond (NE_EXPR, condv,
+ boolean_false_node,
+ lab2, lab1);
+ gimple_seq_add_stmt (ilist, g);
+ gimple_seq_add_stmt (ilist,
+ gimple_build_label (lab1));
+ }
g = gimple_build_assign (build_simple_mem_ref (cond),
boolean_true_node);
gimple_seq_add_stmt (ilist, g);
gimplify_assign (new_var, x, ilist);
- gimple_seq_add_stmt (ilist, gimple_build_label (lab2));
+ if (lab2)
+ gimple_seq_add_stmt (ilist, gimple_build_label (lab2));
break;
}
@@ -5696,7 +5760,9 @@ lower_reduction_clauses (tree clauses, g
/* First see if there is exactly one reduction clause. Use OMP_ATOMIC
update in that case, otherwise use a lock. */
for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c))
- if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
+ && (!OMP_CLAUSE_REDUCTION_TASK (c)
+ || !is_parallel_ctx (ctx)))
{
if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)
|| TREE_CODE (OMP_CLAUSE_DECL (c)) == MEM_REF)
@@ -5717,7 +5783,9 @@ lower_reduction_clauses (tree clauses, g
enum tree_code code;
location_t clause_loc = OMP_CLAUSE_LOCATION (c);
- if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION)
+ if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION
+ || (OMP_CLAUSE_REDUCTION_TASK (c)
+ && is_parallel_ctx (ctx)))
continue;
enum omp_clause_code ccode = OMP_CLAUSE_REDUCTION;
@@ -6012,6 +6080,8 @@ lower_send_clauses (tree clauses, gimple
case OMP_CLAUSE_REDUCTION:
if (is_task_ctx (ctx))
continue;
+ if (OMP_CLAUSE_REDUCTION_TASK (c) && is_parallel_ctx (ctx))
+ continue;
break;
case OMP_CLAUSE_SHARED:
if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
@@ -6939,14 +7009,26 @@ lower_omp_task_reductions (omp_context *
reduce and destruct it. */
tree idx = create_tmp_var (size_type_node);
gimple_seq_add_stmt (end, gimple_build_assign (idx, size_zero_node));
- t = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
- tree num_thr = create_tmp_var (integer_type_node);
- gimple *g = gimple_build_call (t, 0);
- gimple_call_set_lhs (g, num_thr);
- gimple_seq_add_stmt (end, g);
tree num_thr_sz = create_tmp_var (size_type_node);
- g = gimple_build_assign (num_thr_sz, NOP_EXPR, num_thr);
- gimple_seq_add_stmt (end, g);
+ gimple *g;
+ if (code != OMP_PARALLEL)
+ {
+ t = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
+ tree num_thr = create_tmp_var (integer_type_node);
+ g = gimple_build_call (t, 0);
+ gimple_call_set_lhs (g, num_thr);
+ gimple_seq_add_stmt (end, g);
+ g = gimple_build_assign (num_thr_sz, NOP_EXPR, num_thr);
+ gimple_seq_add_stmt (end, g);
+ }
+ else
+ {
+ tree c = omp_find_clause (gimple_omp_parallel_clauses (ctx->stmt),
+ OMP_CLAUSE__REDUCTEMP_);
+ t = fold_convert (pointer_sized_int_node, OMP_CLAUSE_DECL (c));
+ t = fold_convert (size_type_node, t);
+ gimplify_assign (num_thr_sz, t, end);
+ }
t = build4 (ARRAY_REF, pointer_sized_int_node, avar, size_int (2),
NULL_TREE, NULL_TREE);
tree data = create_tmp_var (pointer_sized_int_node);
@@ -7035,7 +7117,11 @@ lower_omp_task_reductions (omp_context *
tree bfield = DECL_CHAIN (field);
tree cond;
- if (TREE_TYPE (ptr) == ptr_type_node)
+ if (code == OMP_PARALLEL)
+ /* In parallel all threads unconditionally initialize all their
+ task reduction private variables. */
+ cond = boolean_true_node;
+ else if (TREE_TYPE (ptr) == ptr_type_node)
{
cond = build2 (POINTER_PLUS_EXPR, ptr_type_node, ptr,
unshare_expr (byte_position (bfield)));
@@ -7185,7 +7271,7 @@ lower_omp_task_reductions (omp_context *
else
SET_DECL_VALUE_EXPR (d, new_var);
DECL_HAS_VALUE_EXPR_P (d) = 1;
- lower_omp (&OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c), ctx->outer);
+ lower_omp (&OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c), ctx);
if (oldv)
SET_DECL_VALUE_EXPR (d, oldv);
else
@@ -7223,9 +7309,9 @@ lower_omp_task_reductions (omp_context *
g = gimple_build_call (t, 1, build_fold_addr_expr (avar));
gimple_seq_add_stmt (start, g);
}
- else if (code == OMP_TASKLOOP)
+ else if (code == OMP_TASKLOOP || code == OMP_PARALLEL)
{
- tree c = omp_find_clause (gimple_omp_task_clauses (ctx->stmt),
+ tree c = omp_find_clause (gimple_omp_taskreg_clauses (ctx->stmt),
OMP_CLAUSE__REDUCTEMP_);
t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (c)),
build_fold_addr_expr (avar));
@@ -8602,18 +8688,25 @@ lower_omp_taskreg (gimple_stmt_iterator
if (ctx->srecord_type)
create_task_copyfn (as_a <gomp_task *> (stmt), ctx);
- gimple_seq taskloop_ilist = NULL;
- gimple_seq taskloop_olist = NULL;
- if (is_task_ctx (ctx) && gimple_omp_task_taskloop_p (ctx->stmt))
+ gimple_seq tskred_ilist = NULL;
+ gimple_seq tskred_olist = NULL;
+ if ((is_task_ctx (ctx)
+ && gimple_omp_task_taskloop_p (ctx->stmt)
+ && omp_find_clause (gimple_omp_task_clauses (ctx->stmt),
+ OMP_CLAUSE_REDUCTION))
+ || (is_parallel_ctx (ctx)
+ && omp_find_clause (gimple_omp_parallel_clauses (stmt),
+ OMP_CLAUSE__REDUCTEMP_)))
{
if (dep_bind == NULL)
{
push_gimplify_context ();
dep_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
}
- lower_omp_task_reductions (ctx, OMP_TASKLOOP,
- gimple_omp_task_clauses (ctx->stmt),
- &taskloop_ilist, &taskloop_olist);
+ lower_omp_task_reductions (ctx, is_task_ctx (ctx) ? OMP_TASKLOOP
+ : OMP_PARALLEL,
+ gimple_omp_taskreg_clauses (ctx->stmt),
+ &tskred_ilist, &tskred_olist);
}
push_gimplify_context ();
@@ -8711,9 +8805,9 @@ lower_omp_taskreg (gimple_stmt_iterator
if (dep_bind)
{
gimple_bind_add_seq (dep_bind, dep_ilist);
- gimple_bind_add_seq (dep_bind, taskloop_ilist);
+ gimple_bind_add_seq (dep_bind, tskred_ilist);
gimple_bind_add_stmt (dep_bind, bind);
- gimple_bind_add_seq (dep_bind, taskloop_olist);
+ gimple_bind_add_seq (dep_bind, tskred_olist);
gimple_bind_add_seq (dep_bind, dep_olist);
pop_gimplify_context (dep_bind);
}
@@ -174,6 +174,8 @@ workshare_safe_to_combine_p (basic_block
return true;
gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
+ if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
+ return false;
omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
@@ -310,6 +312,13 @@ determine_parallel_type (struct omp_regi
ws_entry_bb = region->inner->entry;
ws_exit_bb = region->inner->exit;
+ /* Give up for task reductions on the parallel, while it is implementable,
+ adding another big set of APIs or slowing down the normal paths is
+ not acceptable. */
+ tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
+ if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
+ return;
+
if (single_succ (par_entry_bb) == ws_entry_bb
&& single_succ (ws_exit_bb) == par_exit_bb
&& workshare_safe_to_combine_p (ws_entry_bb)
@@ -559,7 +568,10 @@ expand_parallel_call (struct omp_region
/* Determine what flavor of GOMP_parallel we will be
emitting. */
start_ix = BUILT_IN_GOMP_PARALLEL;
- if (is_combined_parallel (region))
+ tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
+ if (rtmp)
+ start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
+ else if (is_combined_parallel (region))
{
switch (region->inner->type)
{
@@ -716,6 +728,13 @@ expand_parallel_call (struct omp_region
t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
builtin_decl_explicit (start_ix), args);
+ if (rtmp)
+ {
+ tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
+ t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
+ fold_convert (type,
+ fold_convert (pointer_sized_int_node, t)));
+ }
force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
@@ -150,6 +150,8 @@ DEF_FUNCTION_TYPE_3 (BT_FN_VOID_SIZE_SIZ
DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_UINT_UINT,
BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT)
+DEF_FUNCTION_TYPE_4 (BT_FN_UINT_OMPFN_PTR_UINT_UINT,
+ BT_UINT, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT)
DEF_FUNCTION_TYPE_4 (BT_FN_VOID_PTR_WORD_WORD_PTR,
BT_VOID, BT_PTR, BT_WORD, BT_WORD, BT_PTR)
DEF_FUNCTION_TYPE_4 (BT_FN_VOID_SIZE_VPTR_PTR_INT, BT_VOID, BT_SIZE,
@@ -810,6 +810,8 @@ extern bool gomp_create_target_task (str
size_t *, unsigned short *, unsigned int,
void **, void **,
enum gomp_target_task_state);
+extern struct gomp_taskgroup *gomp_parallel_reduction_register (uintptr_t *,
+ unsigned);
static void inline
gomp_finish_task (struct gomp_task *task)
@@ -822,7 +824,8 @@ gomp_finish_task (struct gomp_task *task
extern struct gomp_team *gomp_new_team (unsigned);
extern void gomp_team_start (void (*) (void *), void *, unsigned,
- unsigned, struct gomp_team *);
+ unsigned, struct gomp_team *,
+ struct gomp_taskgroup *);
extern void gomp_team_end (void);
extern void gomp_free_thread (void *);
extern int gomp_pause_host (void);
@@ -236,6 +236,8 @@ extern void GOMP_doacross_ull_wait (unsi
extern void GOMP_parallel_start (void (*) (void *), void *, unsigned);
extern void GOMP_parallel_end (void);
extern void GOMP_parallel (void (*) (void *), void *, unsigned, unsigned);
+extern unsigned GOMP_parallel_reductions (void (*) (void *), void *, unsigned,
+ unsigned);
extern bool GOMP_cancel (int, bool);
extern bool GOMP_cancellation_point (int);
@@ -316,11 +316,12 @@ GOMP_4.5 {
GOMP_5.0 {
global:
- GOMP_taskwait_depend;
- GOMP_teams_reg;
+ GOMP_parallel_reductions;
GOMP_taskgroup_reduction_register;
GOMP_taskgroup_reduction_unregister;
GOMP_task_reduction_remap;
+ GOMP_taskwait_depend;
+ GOMP_teams_reg;
} GOMP_4.5;
OACC_2.0 {
@@ -563,7 +563,7 @@ gomp_parallel_loop_start (void (*fn) (vo
num_threads = gomp_resolve_num_threads (num_threads, 0);
team = gomp_new_team (num_threads);
gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
- gomp_team_start (fn, data, num_threads, flags, team);
+ gomp_team_start (fn, data, num_threads, flags, team, NULL);
}
void
@@ -123,7 +123,8 @@ void
GOMP_parallel_start (void (*fn) (void *), void *data, unsigned num_threads)
{
num_threads = gomp_resolve_num_threads (num_threads, 0);
- gomp_team_start (fn, data, num_threads, 0, gomp_new_team (num_threads));
+ gomp_team_start (fn, data, num_threads, 0, gomp_new_team (num_threads),
+ NULL);
}
void
@@ -161,14 +162,33 @@ GOMP_parallel_end (void)
ialias (GOMP_parallel_end)
void
-GOMP_parallel (void (*fn) (void *), void *data, unsigned num_threads, unsigned int flags)
+GOMP_parallel (void (*fn) (void *), void *data, unsigned num_threads,
+ unsigned int flags)
{
num_threads = gomp_resolve_num_threads (num_threads, 0);
- gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads));
+ gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads),
+ NULL);
fn (data);
ialias_call (GOMP_parallel_end) ();
}
+unsigned
+GOMP_parallel_reductions (void (*fn) (void *), void *data,
+ unsigned num_threads, unsigned int flags)
+{
+ struct gomp_taskgroup *taskgroup;
+ num_threads = gomp_resolve_num_threads (num_threads, 0);
+ uintptr_t *rdata = *(uintptr_t **)data;
+ taskgroup = gomp_parallel_reduction_register (rdata, num_threads);
+ gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads),
+ taskgroup);
+ fn (data);
+ ialias_call (GOMP_parallel_end) ();
+ gomp_sem_destroy (&taskgroup->taskgroup_sem);
+ free (taskgroup);
+ return num_threads;
+}
+
bool
GOMP_cancellation_point (int which)
{
@@ -140,7 +140,7 @@ GOMP_parallel_sections_start (void (*fn)
num_threads = gomp_resolve_num_threads (num_threads, count);
team = gomp_new_team (num_threads);
gomp_sections_init (&team->work_shares[0], count);
- gomp_team_start (fn, data, num_threads, 0, team);
+ gomp_team_start (fn, data, num_threads, 0, team, NULL);
}
ialias_redirect (GOMP_parallel_end)
@@ -154,7 +154,7 @@ GOMP_parallel_sections (void (*fn) (void
num_threads = gomp_resolve_num_threads (num_threads, count);
team = gomp_new_team (num_threads);
gomp_sections_init (&team->work_shares[0], count);
- gomp_team_start (fn, data, num_threads, flags, team);
+ gomp_team_start (fn, data, num_threads, flags, team, NULL);
fn (data);
GOMP_parallel_end ();
}
@@ -1763,13 +1763,27 @@ GOMP_taskyield (void)
/* Nothing at the moment. */
}
+static inline struct gomp_taskgroup *
+gomp_taskgroup_init (struct gomp_taskgroup *prev)
+{
+ struct gomp_taskgroup *taskgroup
+ = gomp_malloc (sizeof (struct gomp_taskgroup));
+ taskgroup->prev = prev;
+ priority_queue_init (&taskgroup->taskgroup_queue);
+ taskgroup->in_taskgroup_wait = false;
+ taskgroup->reductions = prev ? prev->reductions : NULL;
+ taskgroup->cancelled = false;
+ taskgroup->num_children = 0;
+ gomp_sem_init (&taskgroup->taskgroup_sem, 0);
+ return taskgroup;
+}
+
void
GOMP_taskgroup_start (void)
{
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
struct gomp_task *task = thr->task;
- struct gomp_taskgroup *taskgroup, *prev;
/* If team is NULL, all tasks are executed as
GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
@@ -1777,16 +1791,7 @@ GOMP_taskgroup_start (void)
by the time GOMP_taskgroup_end is called. */
if (team == NULL)
return;
- taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup));
- prev = task->taskgroup;
- taskgroup->prev = prev;
- priority_queue_init (&taskgroup->taskgroup_queue);
- taskgroup->in_taskgroup_wait = false;
- taskgroup->reductions = prev ? prev->reductions : NULL;
- taskgroup->cancelled = false;
- taskgroup->num_children = 0;
- gomp_sem_init (&taskgroup->taskgroup_sem, 0);
- task->taskgroup = taskgroup;
+ task->taskgroup = gomp_taskgroup_init (task->taskgroup);
}
void
@@ -1951,63 +1956,12 @@ GOMP_taskgroup_end (void)
free (taskgroup);
}
-/* The format of data is:
- data[0] cnt
- data[1] size
- data[2] alignment (on output array pointer)
- data[3] allocator (-1 if malloc allocator)
- data[4] next pointer
- data[5] used internally (htab pointer)
- data[6] used internally (end of array)
- cnt times
- ent[0] address
- ent[1] offset
- ent[2] used internally (pointer to data[0]). */
-
-void
-GOMP_taskgroup_reduction_register (uintptr_t *data)
+static inline void
+gomp_reduction_register (uintptr_t *data, uintptr_t *old, unsigned nthreads)
{
- struct gomp_thread *thr = gomp_thread ();
- struct gomp_team *team = thr->ts.team;
- struct gomp_task *task;
- if (__builtin_expect (team == NULL, 0))
- {
- /* The task reduction code needs a team and task, so for
- orphaned taskgroups just create the implicit team. */
- struct gomp_task_icv *icv;
- team = gomp_new_team (1);
- task = thr->task;
- icv = task ? &task->icv : &gomp_global_icv;
- team->prev_ts = thr->ts;
- thr->ts.team = team;
- thr->ts.team_id = 0;
- thr->ts.work_share = &team->work_shares[0];
- thr->ts.last_work_share = NULL;
-#ifdef HAVE_SYNC_BUILTINS
- thr->ts.single_count = 0;
-#endif
- thr->ts.static_trip = 0;
- thr->task = &team->implicit_task[0];
- gomp_init_task (thr->task, NULL, icv);
- if (task)
- {
- thr->task = task;
- gomp_end_task ();
- free (task);
- thr->task = &team->implicit_task[0];
- }
-#ifdef LIBGOMP_USE_PTHREADS
- else
- pthread_setspecific (gomp_thread_destructor, thr);
-#endif
- GOMP_taskgroup_start ();
- }
- unsigned nthreads = team->nthreads;
size_t total_cnt = 0;
- uintptr_t *d = data, *old;
+ uintptr_t *d = data;
struct htab *old_htab = NULL, *new_htab;
- task = thr->task;
- old = task->taskgroup->reductions;
do
{
size_t sz = d[1] * nthreads;
@@ -2072,6 +2026,62 @@ GOMP_taskgroup_reduction_register (uintp
}
while (1);
d[5] = (uintptr_t) new_htab;
+}
+
+/* The format of data is:
+ data[0] cnt
+ data[1] size
+ data[2] alignment (on output array pointer)
+ data[3] allocator (-1 if malloc allocator)
+ data[4] next pointer
+ data[5] used internally (htab pointer)
+ data[6] used internally (end of array)
+ cnt times
+ ent[0] address
+ ent[1] offset
+ ent[2] used internally (pointer to data[0]). */
+
+void
+GOMP_taskgroup_reduction_register (uintptr_t *data)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+ struct gomp_task *task;
+ if (__builtin_expect (team == NULL, 0))
+ {
+ /* The task reduction code needs a team and task, so for
+ orphaned taskgroups just create the implicit team. */
+ struct gomp_task_icv *icv;
+ team = gomp_new_team (1);
+ task = thr->task;
+ icv = task ? &task->icv : &gomp_global_icv;
+ team->prev_ts = thr->ts;
+ thr->ts.team = team;
+ thr->ts.team_id = 0;
+ thr->ts.work_share = &team->work_shares[0];
+ thr->ts.last_work_share = NULL;
+#ifdef HAVE_SYNC_BUILTINS
+ thr->ts.single_count = 0;
+#endif
+ thr->ts.static_trip = 0;
+ thr->task = &team->implicit_task[0];
+ gomp_init_task (thr->task, NULL, icv);
+ if (task)
+ {
+ thr->task = task;
+ gomp_end_task ();
+ free (task);
+ thr->task = &team->implicit_task[0];
+ }
+#ifdef LIBGOMP_USE_PTHREADS
+ else
+ pthread_setspecific (gomp_thread_destructor, thr);
+#endif
+ GOMP_taskgroup_start ();
+ }
+ unsigned nthreads = team->nthreads;
+ task = thr->task;
+ gomp_reduction_register (data, task->taskgroup->reductions, nthreads);
task->taskgroup->reductions = data;
}
@@ -2087,6 +2097,7 @@ GOMP_taskgroup_reduction_unregister (uin
}
while (d && !d[5]);
}
+ialias (GOMP_taskgroup_reduction_unregister)
/* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
original list item or address of previously remapped original list
@@ -2160,6 +2171,15 @@ GOMP_task_reduction_remap (size_t cnt, s
}
}
+struct gomp_taskgroup *
+gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads)
+{
+ struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL);
+ gomp_reduction_register (data, NULL, nthreads);
+ taskgroup->reductions = data;
+ return taskgroup;
+}
+
int
omp_in_final (void)
{
@@ -302,7 +302,8 @@ gomp_free_thread (void *arg __attribute_
#ifdef LIBGOMP_USE_PTHREADS
void
gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
- unsigned flags, struct gomp_team *team)
+ unsigned flags, struct gomp_team *team,
+ struct gomp_taskgroup *taskgroup)
{
struct gomp_thread_start_data *start_data;
struct gomp_thread *thr, *nthr;
@@ -364,6 +365,7 @@ gomp_team_start (void (*fn) (void *), vo
&& thr->ts.level < gomp_bind_var_list_len)
bind_var = gomp_bind_var_list[thr->ts.level];
gomp_init_task (thr->task, task, icv);
+ thr->task->taskgroup = taskgroup;
team->implicit_task[0].icv.nthreads_var = nthreads_var;
team->implicit_task[0].icv.bind_var = bind_var;
@@ -638,6 +640,7 @@ gomp_team_start (void (*fn) (void *), vo
gomp_init_task (nthr->task, task, icv);
team->implicit_task[i].icv.nthreads_var = nthreads_var;
team->implicit_task[i].icv.bind_var = bind_var;
+ nthr->task->taskgroup = taskgroup;
nthr->fn = fn;
nthr->data = data;
team->ordered_release[i] = &nthr->release;
@@ -823,6 +826,7 @@ gomp_team_start (void (*fn) (void *), vo
gomp_init_task (start_data->task, task, icv);
team->implicit_task[i].icv.nthreads_var = nthreads_var;
team->implicit_task[i].icv.bind_var = bind_var;
+ start_data->task->taskgroup = taskgroup;
start_data->thread_pool = pool;
start_data->nested = nested;
@@ -116,7 +116,8 @@ gomp_thread_start (struct gomp_thread_po
void
gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
- unsigned flags, struct gomp_team *team)
+ unsigned flags, struct gomp_team *team,
+ struct gomp_taskgroup *taskgroup)
{
struct gomp_thread *thr, *nthr;
struct gomp_task *task;
@@ -147,6 +148,7 @@ gomp_team_start (void (*fn) (void *), vo
nthreads_var = icv->nthreads_var;
gomp_init_task (thr->task, task, icv);
team->implicit_task[0].icv.nthreads_var = nthreads_var;
+ team->implicit_task[0].taskgroup = taskgroup;
if (nthreads == 1)
return;
@@ -166,6 +168,7 @@ gomp_team_start (void (*fn) (void *), vo
nthr->task = &team->implicit_task[i];
gomp_init_task (nthr->task, task, icv);
team->implicit_task[i].icv.nthreads_var = nthreads_var;
+ team->implicit_task[i].taskgroup = taskgroup;
nthr->fn = fn;
nthr->data = data;
team->ordered_release[i] = &nthr->release;
@@ -0,0 +1,123 @@
+#include <omp.h>
+#include <stdlib.h>
+
+struct S { unsigned long int s, t; };
+
+void
+rbar (struct S *p, struct S *o)
+{
+ p->s = 1;
+ if (o->t != 5)
+ abort ();
+ p->t = 9;
+}
+
+static inline void
+rbaz (struct S *o, struct S *i)
+{
+ if (o->t != 5 || i->t != 9)
+ abort ();
+ o->s *= i->s;
+}
+
+#pragma omp declare reduction (+: struct S : omp_out.s += omp_in.s) \
+ initializer (omp_priv = { 0, 3 })
+#pragma omp declare reduction (*: struct S : rbaz (&omp_out, &omp_in)) \
+ initializer (rbar (&omp_priv, &omp_orig))
+
+struct S g = { 0, 7 };
+struct S h = { 1, 5 };
+
+int
+foo (int *a, int *b)
+{
+ int x = 0;
+ #pragma omp taskloop reduction (+:x) in_reduction (+:b[0])
+ for (int i = 0; i < 64; i++)
+ {
+ x += a[i];
+ *b += a[i] * 2;
+ }
+ return x;
+}
+
+unsigned long long int
+bar (int *a, unsigned long long int *b)
+{
+ unsigned long long int x = 1;
+ #pragma omp taskloop reduction (*:x) in_reduction (*:b[0])
+ for (int i = 0; i < 64; i++)
+ {
+ #pragma omp task in_reduction (*:x)
+ x *= a[i];
+ #pragma omp task in_reduction (*:b[0])
+ *b *= (3 - a[i]);
+ }
+ return x;
+}
+
+void
+baz (int i, int *a, int *c)
+{
+ #pragma omp task in_reduction (*:h) in_reduction (+:g)
+ {
+ g.s += 7 * a[i];
+ h.s *= (3 - c[i]);
+ if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9))
+ abort ();
+ }
+}
+
+int
+main ()
+{
+ int i, j, a[64], b = 0, c[64];
+ unsigned long long int d = 1, e;
+ int r = 0, t;
+ struct S m = { 0, 7 };
+ struct S n = { 1, 5 };
+ for (i = 0; i < 64; i++)
+ {
+ a[i] = 2 * i;
+ c[i] = 1 + ((i % 3) != 1);
+ }
+ #pragma omp parallel reduction (task, +:b) reduction(+:r) \
+ reduction(task,*:d) reduction (task, +: g, m) \
+ reduction (task, *: h, n) shared(t)
+ {
+ #pragma omp master
+ {
+ j = foo (a, &b);
+ t = omp_get_num_threads ();
+ }
+ r++;
+ #pragma omp single nowait
+ e = bar (c, &d);
+ #pragma omp master
+ #pragma omp taskloop in_reduction (+: g, m) in_reduction (*: h, n)
+ for (i = 0; i < 64; ++i)
+ {
+ g.s += 3 * a[i];
+ h.s *= (3 - c[i]);
+ m.s += 4 * a[i];
+ n.s *= c[i];
+ if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9)
+ || (m.t != 7 && m.t != 3) || (n.t != 5 && n.t != 9))
+ abort ();
+ baz (i, a, c);
+ }
+ }
+ if (n.s != (1ULL << 43) || n.t != 5)
+ abort ();
+ if (j != 63 * 64 || b != 63 * 64 * 2)
+ abort ();
+ if (e != (1ULL << 43) || d != (1ULL << 21))
+ abort ();
+ if (g.s != 63 * 64 * 10 || g.t != 7)
+ abort ();
+ if (h.s != (1ULL << 42) || h.t != 5)
+ abort ();
+ if (m.s != 63 * 64 * 4 || m.t != 7)
+ abort ();
+ return 0;
+}
@@ -0,0 +1,216 @@
+#ifdef __cplusplus
+extern "C"
+#endif
+void abort (void);
+
+int a[2];
+long long int b[7] = { 9, 11, 1, 1, 1, 13, 15 };
+int e[3] = { 5, 0, 5 };
+int f[5] = { 6, 7, 0, 0, 9 };
+int g[4] = { 1, 0, 0, 2 };
+int h[3] = { 0, 1, 4 };
+int k[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } };
+long long *s;
+long long (*t)[2];
+
+void
+foo (int n, int *c, long long int *d, int m[3], int *r, int o[4], int *p, int q[4][2])
+{
+ int i;
+ for (i = 0; i < 2; i++)
+ #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+ in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
+ in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
+ in_reduction (*: s[1:2], t[2:2][:])
+ {
+ a[0] += 7;
+ a[1] += 17;
+ b[2] *= 2;
+ b[4] *= 2;
+ c[0] += 6;
+ d[1] *= 2;
+ e[1] += 19;
+ f[2] += 21;
+ f[3] += 23;
+ g[1] += 25;
+ g[2] += 27;
+ h[0] += 29;
+ k[1][0] += 31;
+ k[2][1] += 33;
+ m[1] += 19;
+ r[2] += 21;
+ r[3] += 23;
+ o[1] += 25;
+ o[2] += 27;
+ p[0] += 29;
+ q[1][0] += 31;
+ q[2][1] += 33;
+ s[1] *= 2;
+ t[2][0] *= 2;
+ t[3][1] *= 2;
+ }
+}
+
+void
+test (int n)
+{
+ int c[2] = { 0, 0 };
+ int p[3] = { 0, 1, 4 };
+ int q[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } };
+ long long ss[4] = { 5, 1, 1, 6 };
+ long long tt[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } };
+ long long int d[] = { 1, 1 };
+ int m[3] = { 5, 0, 5 };
+ int r[5] = { 6, 7, 0, 0, 9 };
+ int o[4] = { 1, 0, 0, 2 };
+ s = ss;
+ t = tt;
+ #pragma omp parallel reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \
+ reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
+ reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
+ reduction (task, *: t[2:2][:], s[1:n + 1]) num_threads(4)
+ {
+ int i;
+ #pragma omp for
+ for (i = 0; i < 4; i++)
+ {
+ #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
+ in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
+ in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
+ in_reduction (*: s[1:2], t[2:2][:])
+ {
+ int j;
+ a[0] += 2;
+ a[1] += 3;
+ b[2] *= 2;
+ f[3] += 8;
+ g[1] += 9;
+ g[2] += 10;
+ h[0] += 11;
+ k[1][1] += 13;
+ k[2][1] += 15;
+ m[1] += 16;
+ r[2] += 8;
+ s[1] *= 2;
+ t[2][1] *= 2;
+ t[3][1] *= 2;
+ for (j = 0; j < 2; j++)
+ #pragma omp task in_reduction (+: a, c[:2]) \
+ in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
+ in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
+ in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
+ in_reduction (*: s[n:2], t[2:2][:])
+ {
+ m[1] += 6;
+ r[2] += 7;
+ q[1][0] += 17;
+ q[2][0] += 19;
+ a[0] += 4;
+ a[1] += 5;
+ b[3] *= 2;
+ b[4] *= 2;
+ f[3] += 18;
+ g[1] += 29;
+ g[2] += 18;
+ h[0] += 19;
+ s[2] *= 2;
+ t[2][0] *= 2;
+ t[3][0] *= 2;
+ foo (n, c, d, m, r, o, p, q);
+ r[3] += 18;
+ o[1] += 29;
+ o[2] += 18;
+ p[0] += 19;
+ c[0] += 4;
+ c[1] += 5;
+ d[0] *= 2;
+ e[1] += 6;
+ f[2] += 7;
+ k[1][0] += 17;
+ k[2][0] += 19;
+ }
+ r[3] += 8;
+ o[1] += 9;
+ o[2] += 10;
+ p[0] += 11;
+ q[1][1] += 13;
+ q[2][1] += 15;
+ b[3] *= 2;
+ c[0] += 4;
+ c[1] += 9;
+ d[0] *= 2;
+ e[1] += 16;
+ f[2] += 8;
+ }
+ }
+ }
+ if (a[0] != 7 * 16 + 4 * 8 + 2 * 4
+ || a[1] != 17 * 16 + 5 * 8 + 3 * 4
+ || b[0] != 9 || b[1] != 11
+ || b[2] != 1LL << (16 + 4)
+ || b[3] != 1LL << (8 + 4)
+ || b[4] != 1LL << (16 + 8)
+ || b[5] != 13 || b[6] != 15
+ || c[0] != 6 * 16 + 4 * 8 + 4 * 4
+ || c[1] != 5 * 8 + 9 * 4
+ || d[0] != 1LL << (8 + 4)
+ || d[1] != 1LL << 16
+ || e[0] != 5
+ || e[1] != 19 * 16 + 6 * 8 + 16 * 4
+ || e[2] != 5
+ || f[0] != 6
+ || f[1] != 7
+ || f[2] != 21 * 16 + 7 * 8 + 8 * 4
+ || f[3] != 23 * 16 + 18 * 8 + 8 * 4
+ || f[4] != 9
+ || g[0] != 1
+ || g[1] != 25 * 16 + 29 * 8 + 9 * 4
+ || g[2] != 27 * 16 + 18 * 8 + 10 * 4
+ || g[3] != 2
+ || h[0] != 29 * 16 + 19 * 8 + 11 * 4
+ || h[1] != 1 || h[2] != 4
+ || k[0][0] != 5 || k[0][1] != 6
+ || k[1][0] != 31 * 16 + 17 * 8
+ || k[1][1] != 13 * 4
+ || k[2][0] != 19 * 8
+ || k[2][1] != 33 * 16 + 15 * 4
+ || k[3][0] != 7 || k[3][1] != 8
+ || m[0] != 5
+ || m[1] != 19 * 16 + 6 * 8 + 16 * 4
+ || m[2] != 5
+ || o[0] != 1
+ || o[1] != 25 * 16 + 29 * 8 + 9 * 4
+ || o[2] != 27 * 16 + 18 * 8 + 10 * 4
+ || o[3] != 2
+ || p[0] != 29 * 16 + 19 * 8 + 11 * 4
+ || p[1] != 1 || p[2] != 4
+ || q[0][0] != 5 || q[0][1] != 6
+ || q[1][0] != 31 * 16 + 17 * 8
+ || q[1][1] != 13 * 4
+ || q[2][0] != 19 * 8
+ || q[2][1] != 33 * 16 + 15 * 4
+ || q[3][0] != 7 || q[3][1] != 8
+ || r[0] != 6
+ || r[1] != 7
+ || r[2] != 21 * 16 + 7 * 8 + 8 * 4
+ || r[3] != 23 * 16 + 18 * 8 + 8 * 4
+ || r[4] != 9
+ || ss[0] != 5
+ || ss[1] != 1LL << (16 + 4)
+ || ss[2] != 1LL << 8
+ || ss[3] != 6
+ || tt[0][0] != 9 || tt[0][1] != 10 || tt[1][0] != 11 || tt[1][1] != 12
+ || tt[2][0] != 1LL << (16 + 8)
+ || tt[2][1] != 1LL << 4
+ || tt[3][0] != 1LL << 8
+ || tt[3][1] != 1LL << (16 + 4)
+ || tt[4][0] != 13 || tt[4][1] != 14)
+ abort ();
+}
+
+int
+main ()
+{
+ test (1);
+ return 0;
+}
@@ -0,0 +1,86 @@
+typedef __SIZE_TYPE__ size_t;
+extern void abort (void);
+
+void
+bar (int *a, int *b, int *c, int (*d)[2], int (*e)[4], size_t n, int f[1][n], int g[1][n * 2])
+{
+ #pragma omp task in_reduction (*: a[:n], b[3:n], c[n:n], d[0][:n], e[0][1:n], f[0][:], g[0][1:n])
+ {
+ a[0] *= 12;
+ a[1] *= 13;
+ b[3] *= 14;
+ b[4] *= 15;
+ c[n] *= 16;
+ c[n + 1] *= 17;
+ d[0][0] *= 18;
+ d[0][1] *= 19;
+ e[0][1] *= 20;
+ e[0][2] *= 21;
+ f[0][0] *= 22;
+ f[0][1] *= 23;
+ g[0][1] *= 24;
+ g[0][2] *= 25;
+ }
+}
+
+void
+baz (size_t n, void *x, void *y, int f[1][n], int g[1][n * 2])
+{
+ int a[n], b[n + 3], c[2 * n];
+ int (*d)[n] = (int (*)[n]) x;
+ int (*e)[n * 2] = (int (*)[n * 2]) y;
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ a[i] = 1;
+ b[i + 3] = 1;
+ c[i + n] = 1;
+ d[0][i] = 1;
+ e[0][i + 1] = 1;
+ f[0][i] = 1;
+ g[0][i + 1] = 1;
+ }
+ #pragma omp parallel num_threads(2) firstprivate (n) \
+ reduction (task, *: a, b[3:n], c[n:n], d[0][:n], e[0][1:n], f[0][:], g[0][1:n])
+ {
+ #pragma omp master
+ bar (a, b, c, (int (*)[2]) d, (int (*)[4]) e, n, f, g);
+ #pragma omp master
+ #pragma omp task in_reduction (*: a, b[3:n], c[n:n], d[0][:n], e[0][1:n], f[0][:], g[0][1:n])
+ {
+ a[0] *= 2;
+ a[1] *= 3;
+ b[3] *= 4;
+ b[4] *= 5;
+ c[n] *= 6;
+ c[n + 1] *= 7;
+ d[0][0] *= 8;
+ d[0][1] *= 9;
+ e[0][1] *= 10;
+ e[0][2] *= 11;
+ f[0][0] *= 12;
+ f[0][1] *= 13;
+ g[0][1] *= 14;
+ g[0][2] *= 15;
+ }
+ n = 0;
+ }
+ if (a[0] != 24 || a[1] != 39 || b[3] != 56 || b[4] != 75)
+ abort ();
+ if (c[2] != 96 || c[3] != 119 || d[0][0] != 144 || d[0][1] != 171)
+ abort ();
+ if (e[0][1] != 200 || e[0][2] != 231 || f[0][0] != 264 || f[0][1] != 299)
+ abort ();
+ if (g[0][1] != 336 || g[0][2] != 375)
+ abort ();
+}
+
+int
+main ()
+{
+ int d[1][2], e[1][4], f[1][2], g[1][4];
+ volatile int two;
+ two = 2;
+ baz (two, (void *) d, (void *) e, f, g);
+ return 0;
+}
@@ -0,0 +1,70 @@
+extern "C" void abort ();
+
+int as;
+int &a = as;
+long int bs = 1;
+long int &b = bs;
+
+template <typename T, typename U>
+void
+foo (T &c, U &d)
+{
+ T i;
+ for (i = 0; i < 2; i++)
+ #pragma omp task in_reduction (*: d) in_reduction (+: c) \
+ in_reduction (+: a) in_reduction (*: b)
+ {
+ a += 7;
+ b *= 2;
+ c += 9;
+ d *= 3;
+ }
+}
+
+template <typename T, typename U>
+void
+bar ()
+{
+ T cs = 0;
+ T &c = cs;
+ U ds = 1;
+ #pragma omp parallel if (0)
+ {
+ U &d = ds;
+ #pragma omp parallel reduction (task, +: a, c) reduction (task, *: b, d)
+ {
+ T i;
+ #pragma omp for
+ for (i = 0; i < 4; i++)
+ #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d)
+ {
+ T j;
+ a += 7;
+ b *= 2;
+ for (j = 0; j < 2; j++)
+ #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d)
+ {
+ a += 7;
+ b *= 2;
+ c += 9;
+ d *= 3;
+ foo (c, d);
+ }
+ c += 9;
+ d *= 3;
+ }
+ }
+#define THREEP4 (3LL * 3LL * 3LL * 3LL)
+ if (d != (THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4
+ * THREEP4))
+ abort ();
+ }
+ if (a != 28 * 7 || b != (1L << 28) || c != 28 * 9)
+ abort ();
+}
+
+int
+main ()
+{
+ bar<int, long long int> ();
+}
@@ -0,0 +1,128 @@
+#include <omp.h>
+#include <stdlib.h>
+
+struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; };
+
+int S::cnt1;
+int S::cnt2;
+int S::cnt3;
+
+S::S ()
+{
+ #pragma omp atomic
+ cnt1++;
+}
+
+S::S (long int x, long int y) : s (x), t (y)
+{
+ #pragma omp atomic update
+ ++cnt2;
+}
+
+S::~S ()
+{
+ #pragma omp atomic
+ cnt3 = cnt3 + 1;
+ if (t < 3 || t > 9 || (t & 1) == 0)
+ abort ();
+}
+
+void
+bar (S *p, S *o)
+{
+ p->s = 1;
+ if (o->t != 5)
+ abort ();
+ p->t = 9;
+}
+
+static inline void
+baz (S *o, S *i)
+{
+ if (o->t != 5 || i->t != 9)
+ abort ();
+ o->s *= i->s;
+}
+
+#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3))
+#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig))
+
+S a = { 0, 7 };
+S b (1, 5);
+
+void
+foo ()
+{
+ int i;
+ for (i = 0; i < 2; i++)
+ #pragma omp task in_reduction (*: b) in_reduction (+: a)
+ {
+ a.s += 7;
+ b.s *= 2;
+ if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9))
+ abort ();
+ }
+}
+
+void
+test ()
+{
+ S c = { 0, 7 };
+ int t;
+ #pragma omp parallel num_threads (1)
+ {
+ S d (1, 5);
+ int r = 0;
+ #pragma omp parallel reduction (task, +: a, c) reduction (task, *: b, d) \
+ reduction (+: r)
+ {
+ int i;
+ #pragma omp master
+ t = omp_get_num_threads ();
+ r++;
+ a.s += 3;
+ c.s += 4;
+ #pragma omp for
+ for (i = 0; i < 4; i++)
+ #pragma omp task in_reduction (*: b, d) in_reduction (+: a, c)
+ {
+ int j;
+ a.s += 7;
+ b.s *= 2;
+ for (j = 0; j < 2; j++)
+ #pragma omp task in_reduction (+: a) in_reduction (*: b) \
+ in_reduction (+: c) in_reduction (*: d)
+ {
+ a.s += 7;
+ b.s *= 2;
+ c.s += 9;
+ d.s *= 3;
+ foo ();
+ if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9)
+ || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9))
+ abort ();
+ }
+ c.s += 9;
+ d.s *= 3;
+ if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9)
+ || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9))
+ abort ();
+ }
+ }
+#define THREEP4 (3L * 3L * 3L * 3L)
+ if (d.s != (THREEP4 * THREEP4 * THREEP4) || d.t != 5 || r != t)
+ abort ();
+ }
+ if (a.s != 28 * 7 + 3 * t || a.t != 7 || b.s != (1L << 28) || b.t != 5
+ || c.s != 12 * 9 + 4 * t || c.t != 7)
+ abort ();
+}
+
+int
+main ()
+{
+ int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3;
+ test ();
+ if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3)
+ abort ();
+}
@@ -0,0 +1,125 @@
+extern "C" void abort ();
+
+struct S { S (); S (long long int, int); ~S (); static int cnt1, cnt2, cnt3; long long int s; int t; };
+
+int S::cnt1;
+int S::cnt2;
+int S::cnt3;
+
+S::S ()
+{
+ #pragma omp atomic
+ cnt1++;
+}
+
+S::S (long long int x, int y) : s (x), t (y)
+{
+ #pragma omp atomic update
+ ++cnt2;
+}
+
+S::~S ()
+{
+ #pragma omp atomic
+ cnt3 = cnt3 + 1;
+ if (t < 3 || t > 9 || (t & 1) == 0)
+ abort ();
+}
+
+void
+bar (S *p, S *o)
+{
+ p->s = 1;
+ if (o->t != 5)
+ abort ();
+ p->t = 9;
+}
+
+static inline void
+baz (S *o, S *i)
+{
+ if (o->t != 5 || i->t != 9)
+ abort ();
+ o->s *= i->s;
+}
+
+#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3))
+#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig))
+
+S as = { 0LL, 7 };
+S &a = as;
+S bs (1LL, 5);
+S &b = bs;
+
+void
+foo (S &c, S &d)
+{
+ int i;
+ for (i = 0; i < 2; i++)
+ #pragma omp task in_reduction (+: c) in_reduction (*: b, d) in_reduction (+: a)
+ {
+ a.s += 7;
+ b.s *= 2;
+ c.s += 9;
+ d.s *= 3;
+ if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9)
+ || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9))
+ abort ();
+ }
+}
+
+void
+test ()
+{
+ S cs = { 0LL, 7 };
+ S &c = cs;
+ S ds (1LL, 5);
+ #pragma omp parallel if (0)
+ {
+ S &d = ds;
+ #pragma omp parallel reduction (task, +: a, c) reduction (task, *: b, d)
+ {
+ #pragma omp for
+ for (int i = 0; i < 4; i++)
+ #pragma omp task in_reduction (*: b, d) in_reduction (+: a, c)
+ {
+ int j;
+ a.s += 7;
+ b.s *= 2;
+ for (j = 0; j < 2; j++)
+ #pragma omp task in_reduction (+: a) in_reduction (*: b) \
+ in_reduction (+: c) in_reduction (*: d)
+ {
+ a.s += 7;
+ b.s *= 2;
+ c.s += 9;
+ d.s *= 3;
+ foo (c, d);
+ if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9)
+ || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9))
+ abort ();
+ }
+ c.s += 9;
+ d.s *= 3;
+ if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9)
+ || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9))
+ abort ();
+ }
+ }
+#define THREEP7 (3LL * 3LL * 3LL * 3LL * 3LL * 3LL * 3LL)
+ if (d.s != (THREEP7 * THREEP7 * THREEP7 * THREEP7) || d.t != 5)
+ abort ();
+ }
+ if (a.s != 28 * 7 || a.t != 7 || b.s != (1L << 28) || b.t != 5
+ || c.s != 28 * 9 || c.t != 7)
+ abort ();
+}
+
+int
+main ()
+{
+ int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3;
+ test ();
+ if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3)
+ abort ();
+}
@@ -0,0 +1,237 @@
+extern "C" void abort ();
+
+int as[2];
+int (&a)[2] = as;
+long long int bs[7] = { 9, 11, 1, 1, 1, 13, 15 };
+long long int (&b)[7] = bs;
+int es[3] = { 5, 0, 5 };
+int (&e)[3] = es;
+int fs[5] = { 6, 7, 0, 0, 9 };
+int (&f)[5] = fs;
+int gs[4] = { 1, 0, 0, 2 };
+int (&g)[4] = gs;
+int hs[3] = { 0, 1, 4 };
+int (&h)[3] = hs;
+int ks[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } };
+int (&k)[4][2] = ks;
+long long *ss;
+long long *&s = ss;
+long long (*ts)[2];
+long long (*&t)[2] = ts;
+
+template <typename T>
+void
+foo (T &n, T *&c, long long int *&d, T (&m)[3], T *&r, T (&o)[4], T *&p, T (&q)[4][2])
+{
+ T i;
+ for (i = 0; i < 2; i++)
+ #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+ in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
+ in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
+ in_reduction (*: s[1:2], t[2:2][:])
+ {
+ a[0] += 7;
+ a[1] += 17;
+ b[2] *= 2;
+ b[4] *= 2;
+ c[0] += 6;
+ d[1] *= 2;
+ e[1] += 19;
+ f[2] += 21;
+ f[3] += 23;
+ g[1] += 25;
+ g[2] += 27;
+ h[0] += 29;
+ k[1][0] += 31;
+ k[2][1] += 33;
+ m[1] += 19;
+ r[2] += 21;
+ r[3] += 23;
+ o[1] += 25;
+ o[2] += 27;
+ p[0] += 29;
+ q[1][0] += 31;
+ q[2][1] += 33;
+ s[1] *= 2;
+ t[2][0] *= 2;
+ t[3][1] *= 2;
+ }
+}
+
+template <typename T>
+void
+test (T &n)
+{
+ T cs[2] = { 0, 0 };
+ T (&c)[2] = cs;
+ T ps[3] = { 0, 1, 4 };
+ T (&p)[3] = ps;
+ T qs[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } };
+ T (&q)[4][2] = qs;
+ long long sb[4] = { 5, 1, 1, 6 };
+ long long tb[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } };
+ T ms[3] = { 5, 0, 5 };
+ T os[4] = { 1, 0, 0, 2 };
+ s = sb;
+ t = tb;
+ #pragma omp parallel if (0)
+ {
+ long long int ds[] = { 1, 1 };
+ long long int (&d)[2] = ds;
+ T (&m)[3] = ms;
+ T rs[5] = { 6, 7, 0, 0, 9 };
+ T (&r)[5] = rs;
+ T (&o)[4] = os;
+ #pragma omp parallel reduction (task,+: a, c) reduction (task,*: b[2 * n:3 * n], d) \
+ reduction (task,+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
+ reduction (task,+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
+ reduction (task,*: t[2:2][:], s[1:n + 1])
+ {
+ #pragma omp for
+ for (int i = 0; i < 4; i++)
+ #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
+ in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
+ in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
+ in_reduction (*: s[1:2], t[2:2][:])
+ {
+ T j;
+ a[0] += 2;
+ a[1] += 3;
+ b[2] *= 2;
+ f[3] += 8;
+ g[1] += 9;
+ g[2] += 10;
+ h[0] += 11;
+ k[1][1] += 13;
+ k[2][1] += 15;
+ m[1] += 16;
+ r[2] += 8;
+ s[1] *= 2;
+ t[2][1] *= 2;
+ t[3][1] *= 2;
+ for (j = 0; j < 2; j++)
+ #pragma omp task in_reduction (+: a, c[:2]) \
+ in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
+ in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
+ in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
+ in_reduction (*: s[n:2], t[2:2][:])
+ {
+ m[1] += 6;
+ r[2] += 7;
+ q[1][0] += 17;
+ q[2][0] += 19;
+ a[0] += 4;
+ a[1] += 5;
+ b[3] *= 2;
+ b[4] *= 2;
+ f[3] += 18;
+ g[1] += 29;
+ g[2] += 18;
+ h[0] += 19;
+ s[2] *= 2;
+ t[2][0] *= 2;
+ t[3][0] *= 2;
+ T *cp = c;
+ long long int *dp = d;
+ T *rp = r;
+ T *pp = p;
+ foo (n, cp, dp, m, rp, o, pp, q);
+ r[3] += 18;
+ o[1] += 29;
+ o[2] += 18;
+ p[0] += 19;
+ c[0] += 4;
+ c[1] += 5;
+ d[0] *= 2;
+ e[1] += 6;
+ f[2] += 7;
+ k[1][0] += 17;
+ k[2][0] += 19;
+ }
+ r[3] += 8;
+ o[1] += 9;
+ o[2] += 10;
+ p[0] += 11;
+ q[1][1] += 13;
+ q[2][1] += 15;
+ b[3] *= 2;
+ c[0] += 4;
+ c[1] += 9;
+ d[0] *= 2;
+ e[1] += 16;
+ f[2] += 8;
+ }
+ }
+ if (d[0] != 1LL << (8 + 4)
+ || d[1] != 1LL << 16
+ || m[0] != 5
+ || m[1] != 19 * 16 + 6 * 8 + 16 * 4
+ || m[2] != 5
+ || r[0] != 6
+ || r[1] != 7
+ || r[2] != 21 * 16 + 7 * 8 + 8 * 4
+ || r[3] != 23 * 16 + 18 * 8 + 8 * 4
+ || r[4] != 9
+ || o[0] != 1
+ || o[1] != 25 * 16 + 29 * 8 + 9 * 4
+ || o[2] != 27 * 16 + 18 * 8 + 10 * 4
+ || o[3] != 2)
+ abort ();
+ }
+ if (a[0] != 7 * 16 + 4 * 8 + 2 * 4
+ || a[1] != 17 * 16 + 5 * 8 + 3 * 4
+ || b[0] != 9 || b[1] != 11
+ || b[2] != 1LL << (16 + 4)
+ || b[3] != 1LL << (8 + 4)
+ || b[4] != 1LL << (16 + 8)
+ || b[5] != 13 || b[6] != 15
+ || c[0] != 6 * 16 + 4 * 8 + 4 * 4
+ || c[1] != 5 * 8 + 9 * 4
+ || e[0] != 5
+ || e[1] != 19 * 16 + 6 * 8 + 16 * 4
+ || e[2] != 5
+ || f[0] != 6
+ || f[1] != 7
+ || f[2] != 21 * 16 + 7 * 8 + 8 * 4
+ || f[3] != 23 * 16 + 18 * 8 + 8 * 4
+ || f[4] != 9
+ || g[0] != 1
+ || g[1] != 25 * 16 + 29 * 8 + 9 * 4
+ || g[2] != 27 * 16 + 18 * 8 + 10 * 4
+ || g[3] != 2
+ || h[0] != 29 * 16 + 19 * 8 + 11 * 4
+ || h[1] != 1 || h[2] != 4
+ || k[0][0] != 5 || k[0][1] != 6
+ || k[1][0] != 31 * 16 + 17 * 8
+ || k[1][1] != 13 * 4
+ || k[2][0] != 19 * 8
+ || k[2][1] != 33 * 16 + 15 * 4
+ || k[3][0] != 7 || k[3][1] != 8
+ || p[0] != 29 * 16 + 19 * 8 + 11 * 4
+ || p[1] != 1 || p[2] != 4
+ || q[0][0] != 5 || q[0][1] != 6
+ || q[1][0] != 31 * 16 + 17 * 8
+ || q[1][1] != 13 * 4
+ || q[2][0] != 19 * 8
+ || q[2][1] != 33 * 16 + 15 * 4
+ || q[3][0] != 7 || q[3][1] != 8
+ || sb[0] != 5
+ || sb[1] != 1LL << (16 + 4)
+ || sb[2] != 1LL << 8
+ || sb[3] != 6
+ || tb[0][0] != 9 || tb[0][1] != 10 || tb[1][0] != 11 || tb[1][1] != 12
+ || tb[2][0] != 1LL << (16 + 8)
+ || tb[2][1] != 1LL << 4
+ || tb[3][0] != 1LL << 8
+ || tb[3][1] != 1LL << (16 + 4)
+ || tb[4][0] != 13 || tb[4][1] != 14)
+ abort ();
+}
+
+int
+main ()
+{
+ int n = 1;
+ test (n);
+ return 0;
+}
@@ -0,0 +1,321 @@
+extern "C" void abort ();
+
+struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; };
+
+int S::cnt1;
+int S::cnt2;
+int S::cnt3;
+
+S::S ()
+{
+ #pragma omp atomic
+ cnt1++;
+}
+
+S::S (long int x, long int y) : s (x), t (y)
+{
+ #pragma omp atomic update
+ ++cnt2;
+}
+
+S::~S ()
+{
+ #pragma omp atomic
+ cnt3 = cnt3 + 1;
+ if (t < 3 || t > 9 || (t & 1) == 0)
+ abort ();
+}
+
+void
+bar (S *p, S *o)
+{
+ p->s = 1;
+ if (o->t != 5)
+ abort ();
+ p->t = 9;
+}
+
+static inline void
+baz (S *o, S *i)
+{
+ if (o->t != 5 || i->t != 9)
+ abort ();
+ o->s *= i->s;
+}
+
+#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3))
+#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig))
+
+S a[2] = { { 0, 7 }, { 0, 7 } };
+S b[7] = { { 9, 5 }, { 11, 5 }, { 1, 5 }, { 1, 5 }, { 1, 5 }, { 13, 5 }, { 15, 5 } };
+S e[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } };
+S f[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } };
+S g[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } };
+S h[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } };
+S k[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } };
+S *s;
+S (*t)[2];
+
+template <int N>
+void
+foo (int n, S *c, S *d, S m[3], S *r, S o[4], S *p, S q[4][2])
+{
+ int i;
+ for (i = 0; i < 2; i++)
+ #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+ in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
+ in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
+ in_reduction (*: s[1:2], t[2:2][:])
+ {
+ a[0].s += 7;
+ a[1].s += 17;
+ b[2].s *= 2;
+ b[4].s *= 2;
+ c[0].s += 6;
+ d[1].s *= 2;
+ e[1].s += 19;
+ f[2].s += 21;
+ f[3].s += 23;
+ g[1].s += 25;
+ g[2].s += 27;
+ h[0].s += 29;
+ k[1][0].s += 31;
+ k[2][1].s += 33;
+ m[1].s += 19;
+ r[2].s += 21;
+ r[3].s += 23;
+ o[1].s += 25;
+ o[2].s += 27;
+ p[0].s += 29;
+ q[1][0].s += 31;
+ q[2][1].s += 33;
+ s[1].s *= 2;
+ t[2][0].s *= 2;
+ t[3][1].s *= 2;
+ if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3)
+ || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3))
+ abort ();
+ for (int z = 0; z < 2; z++)
+ if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3)
+ || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3)
+ || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3)
+ || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3)
+ || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3)
+ || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3)
+ || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9))
+ abort ();
+ for (int z = 0; z < 3; z++)
+ if (b[z + 2].t != 5 && b[z + 2].t != 9)
+ abort ();
+ }
+}
+
+template <int N>
+void
+test (int n)
+{
+ S c[2] = { { 0, 7 }, { 0, 7 } };
+ S p[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } };
+ S q[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } };
+ S ss[4] = { { 5, 5 }, { 1, 5 }, { 1, 5 }, { 6, 5 } };
+ S tt[5][2] = { { { 9, 5 }, { 10, 5 } }, { { 11, 5 }, { 12, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 13, 5 }, { 14, 5 } } };
+ s = ss;
+ t = tt;
+ #pragma omp parallel num_threads (1) if (0)
+ {
+ S d[] = { { 1, 5 }, { 1, 5 } };
+ S m[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } };
+ S r[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } };
+ S o[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } };
+ #pragma omp parallel reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \
+ reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
+ reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
+ reduction (task, *: t[2:2][:], s[1:n + 1])
+ {
+ #pragma omp for
+ for (int i = 0; i < 4; i++)
+ #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
+ in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
+ in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
+ in_reduction (*: s[1:2], t[2:2][:])
+ {
+ int j;
+ a[0].s += 2;
+ a[1].s += 3;
+ b[2].s *= 2;
+ f[3].s += 8;
+ g[1].s += 9;
+ g[2].s += 10;
+ h[0].s += 11;
+ k[1][1].s += 13;
+ k[2][1].s += 15;
+ m[1].s += 16;
+ r[2].s += 8;
+ s[1].s *= 2;
+ t[2][1].s *= 2;
+ t[3][1].s *= 2;
+ if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3)
+ || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3))
+ abort ();
+ for (int z = 0; z < 2; z++)
+ if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3)
+ || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3)
+ || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3)
+ || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3)
+ || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3)
+ || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3)
+ || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9))
+ abort ();
+ for (int z = 0; z < 3; z++)
+ if (b[z + 2].t != 5 && b[z + 2].t != 9)
+ abort ();
+ for (j = 0; j < 2; j++)
+ #pragma omp task in_reduction (+: a, c[:2]) \
+ in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
+ in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
+ in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
+ in_reduction (*: s[n:2], t[2:2][:])
+ {
+ m[1].s += 6;
+ r[2].s += 7;
+ q[1][0].s += 17;
+ q[2][0].s += 19;
+ a[0].s += 4;
+ a[1].s += 5;
+ b[3].s *= 2;
+ b[4].s *= 2;
+ f[3].s += 18;
+ g[1].s += 29;
+ g[2].s += 18;
+ h[0].s += 19;
+ s[2].s *= 2;
+ t[2][0].s *= 2;
+ t[3][0].s *= 2;
+ foo<N> (n, c, d, m, r, o, p, q);
+ if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3)
+ || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3))
+ abort ();
+ for (int z = 0; z < 2; z++)
+ if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3)
+ || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3)
+ || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3)
+ || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3)
+ || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3)
+ || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3)
+ || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9))
+ abort ();
+ for (int z = 0; z < 3; z++)
+ if (b[z + 2].t != 5 && b[z + 2].t != 9)
+ abort ();
+ r[3].s += 18;
+ o[1].s += 29;
+ o[2].s += 18;
+ p[0].s += 19;
+ c[0].s += 4;
+ c[1].s += 5;
+ d[0].s *= 2;
+ e[1].s += 6;
+ f[2].s += 7;
+ k[1][0].s += 17;
+ k[2][0].s += 19;
+ }
+ r[3].s += 8;
+ o[1].s += 9;
+ o[2].s += 10;
+ p[0].s += 11;
+ q[1][1].s += 13;
+ q[2][1].s += 15;
+ b[3].s *= 2;
+ c[0].s += 4;
+ c[1].s += 9;
+ d[0].s *= 2;
+ e[1].s += 16;
+ f[2].s += 8;
+ }
+ }
+ if (d[0].s != 1LL << (8 + 4)
+ || d[1].s != 1LL << 16
+ || m[0].s != 5
+ || m[1].s != 19 * 16 + 6 * 8 + 16 * 4
+ || m[2].s != 5
+ || r[0].s != 6
+ || r[1].s != 7
+ || r[2].s != 21 * 16 + 7 * 8 + 8 * 4
+ || r[3].s != 23 * 16 + 18 * 8 + 8 * 4
+ || r[4].s != 9
+ || o[0].s != 1
+ || o[1].s != 25 * 16 + 29 * 8 + 9 * 4
+ || o[2].s != 27 * 16 + 18 * 8 + 10 * 4
+ || o[3].s != 2)
+ abort ();
+ if (e[1].t != 7 || h[0].t != 7 || m[1].t != 7 || p[0].t != 7)
+ abort ();
+ for (int z = 0; z < 2; z++)
+ if (a[z].t != 7 || c[z].t != 7 || d[z].t != 5 || f[z + 2].t != 7
+ || g[z + 1].t != 7 || r[z + 2].t != 7 || s[z + 1].t != 5 || o[z + 1].t != 7
+ || k[z + 1][0].t != 7 || k[z + 1][1].t != 7 || q[z + 1][0].t != 7 || q[z + 1][1].t != 7
+ || t[z + 2][0].t != 5 || t[z + 2][1].t != 5)
+ abort ();
+ for (int z = 0; z < 3; z++)
+ if (b[z + 2].t != 5)
+ abort ();
+ }
+ if (a[0].s != 7 * 16 + 4 * 8 + 2 * 4
+ || a[1].s != 17 * 16 + 5 * 8 + 3 * 4
+ || b[0].s != 9 || b[1].s != 11
+ || b[2].s != 1LL << (16 + 4)
+ || b[3].s != 1LL << (8 + 4)
+ || b[4].s != 1LL << (16 + 8)
+ || b[5].s != 13 || b[6].s != 15
+ || c[0].s != 6 * 16 + 4 * 8 + 4 * 4
+ || c[1].s != 5 * 8 + 9 * 4
+ || e[0].s != 5
+ || e[1].s != 19 * 16 + 6 * 8 + 16 * 4
+ || e[2].s != 5
+ || f[0].s != 6
+ || f[1].s != 7
+ || f[2].s != 21 * 16 + 7 * 8 + 8 * 4
+ || f[3].s != 23 * 16 + 18 * 8 + 8 * 4
+ || f[4].s != 9
+ || g[0].s != 1
+ || g[1].s != 25 * 16 + 29 * 8 + 9 * 4
+ || g[2].s != 27 * 16 + 18 * 8 + 10 * 4
+ || g[3].s != 2
+ || h[0].s != 29 * 16 + 19 * 8 + 11 * 4
+ || h[1].s != 1 || h[2].s != 4
+ || k[0][0].s != 5 || k[0][1].s != 6
+ || k[1][0].s != 31 * 16 + 17 * 8
+ || k[1][1].s != 13 * 4
+ || k[2][0].s != 19 * 8
+ || k[2][1].s != 33 * 16 + 15 * 4
+ || k[3][0].s != 7 || k[3][1].s != 8
+ || p[0].s != 29 * 16 + 19 * 8 + 11 * 4
+ || p[1].s != 1 || p[2].s != 4
+ || q[0][0].s != 5 || q[0][1].s != 6
+ || q[1][0].s != 31 * 16 + 17 * 8
+ || q[1][1].s != 13 * 4
+ || q[2][0].s != 19 * 8
+ || q[2][1].s != 33 * 16 + 15 * 4
+ || q[3][0].s != 7 || q[3][1].s != 8
+ || ss[0].s != 5
+ || ss[1].s != 1LL << (16 + 4)
+ || ss[2].s != 1LL << 8
+ || ss[3].s != 6
+ || tt[0][0].s != 9 || tt[0][1].s != 10 || tt[1][0].s != 11 || tt[1][1].s != 12
+ || tt[2][0].s != 1LL << (16 + 8)
+ || tt[2][1].s != 1LL << 4
+ || tt[3][0].s != 1LL << 8
+ || tt[3][1].s != 1LL << (16 + 4)
+ || tt[4][0].s != 13 || tt[4][1].s != 14)
+ abort ();
+}
+
+int
+main ()
+{
+ int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3;
+ test<0> (1);
+ if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3)
+ abort ();
+ return 0;
+}
@@ -0,0 +1,342 @@
+extern "C" void abort ();
+
+struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; };
+
+int S::cnt1;
+int S::cnt2;
+int S::cnt3;
+
+S::S ()
+{
+ #pragma omp atomic
+ cnt1++;
+}
+
+S::S (long int x, long int y) : s (x), t (y)
+{
+ #pragma omp atomic update
+ ++cnt2;
+}
+
+S::~S ()
+{
+ #pragma omp atomic
+ cnt3 = cnt3 + 1;
+ if (t < 3 || t > 9 || (t & 1) == 0)
+ abort ();
+}
+
+void
+bar (S *p, S *o)
+{
+ p->s = 1;
+ if (o->t != 5)
+ abort ();
+ p->t = 9;
+}
+
+static inline void
+baz (S *o, S *i)
+{
+ if (o->t != 5 || i->t != 9)
+ abort ();
+ o->s *= i->s;
+}
+
+#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3))
+#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig))
+
+S as[2] = { { 0, 7 }, { 0, 7 } };
+S (&a)[2] = as;
+S bs[7] = { { 9, 5 }, { 11, 5 }, { 1, 5 }, { 1, 5 }, { 1, 5 }, { 13, 5 }, { 15, 5 } };
+S (&b)[7] = bs;
+S es[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } };
+S (&e)[3] = es;
+S fs[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } };
+S (&f)[5] = fs;
+S gs[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } };
+S (&g)[4] = gs;
+S hs[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } };
+S (&h)[3] = hs;
+S ks[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } };
+S (&k)[4][2] = ks;
+S *ss;
+S *&s = ss;
+S (*ts)[2];
+S (*&t)[2] = ts;
+
+template <typename S, typename T>
+void
+foo (T &n, S *&c, S *&d, S (&m)[3], S *&r, S (&o)[4], S *&p, S (&q)[4][2])
+{
+ T i;
+ for (i = 0; i < 2; i++)
+ #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+ in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
+ in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
+ in_reduction (*: s[1:2], t[2:2][:])
+ {
+ a[0].s += 7;
+ a[1].s += 17;
+ b[2].s *= 2;
+ b[4].s *= 2;
+ c[0].s += 6;
+ d[1].s *= 2;
+ e[1].s += 19;
+ f[2].s += 21;
+ f[3].s += 23;
+ g[1].s += 25;
+ g[2].s += 27;
+ h[0].s += 29;
+ k[1][0].s += 31;
+ k[2][1].s += 33;
+ m[1].s += 19;
+ r[2].s += 21;
+ r[3].s += 23;
+ o[1].s += 25;
+ o[2].s += 27;
+ p[0].s += 29;
+ q[1][0].s += 31;
+ q[2][1].s += 33;
+ s[1].s *= 2;
+ t[2][0].s *= 2;
+ t[3][1].s *= 2;
+ if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3)
+ || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3))
+ abort ();
+ for (T z = 0; z < 2; z++)
+ if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3)
+ || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3)
+ || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3)
+ || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3)
+ || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3)
+ || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3)
+ || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9))
+ abort ();
+ for (T z = 0; z < 3; z++)
+ if (b[z + 2].t != 5 && b[z + 2].t != 9)
+ abort ();
+ }
+}
+
+template <typename S, typename T>
+void
+test (T &n)
+{
+ S cs[2] = { { 0, 7 }, { 0, 7 } };
+ S (&c)[2] = cs;
+ S ps[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } };
+ S (&p)[3] = ps;
+ S qs[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } };
+ S (&q)[4][2] = qs;
+ S sb[4] = { { 5, 5 }, { 1, 5 }, { 1, 5 }, { 6, 5 } };
+ S tb[5][2] = { { { 9, 5 }, { 10, 5 } }, { { 11, 5 }, { 12, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 13, 5 }, { 14, 5 } } };
+ S ms[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } };
+ S os[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } };
+ s = sb;
+ t = tb;
+ #pragma omp parallel if (0)
+ {
+ S ds[] = { { 1, 5 }, { 1, 5 } };
+ S (&d)[2] = ds;
+ S (&m)[3] = ms;
+ S rs[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } };
+ S (&r)[5] = rs;
+ S (&o)[4] = os;
+ #pragma omp parallel reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \
+ reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
+ reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
+ reduction (task, *: t[2:2][:], s[1:n + 1])
+ {
+ #pragma omp for
+ for (T i = 0; i < 4; i++)
+ #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
+ in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
+ in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
+ in_reduction (*: s[1:2], t[2:2][:])
+ {
+ T j;
+ a[0].s += 2;
+ a[1].s += 3;
+ b[2].s *= 2;
+ f[3].s += 8;
+ g[1].s += 9;
+ g[2].s += 10;
+ h[0].s += 11;
+ k[1][1].s += 13;
+ k[2][1].s += 15;
+ m[1].s += 16;
+ r[2].s += 8;
+ s[1].s *= 2;
+ t[2][1].s *= 2;
+ t[3][1].s *= 2;
+ if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3)
+ || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3))
+ abort ();
+ for (T z = 0; z < 2; z++)
+ if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3)
+ || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3)
+ || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3)
+ || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3)
+ || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3)
+ || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3)
+ || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9))
+ abort ();
+ for (T z = 0; z < 3; z++)
+ if (b[z + 2].t != 5 && b[z + 2].t != 9)
+ abort ();
+ for (j = 0; j < 2; j++)
+ #pragma omp task in_reduction (+: a, c[:2]) \
+ in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
+ in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
+ in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
+ in_reduction (*: s[n:2], t[2:2][:])
+ {
+ m[1].s += 6;
+ r[2].s += 7;
+ q[1][0].s += 17;
+ q[2][0].s += 19;
+ a[0].s += 4;
+ a[1].s += 5;
+ b[3].s *= 2;
+ b[4].s *= 2;
+ f[3].s += 18;
+ g[1].s += 29;
+ g[2].s += 18;
+ h[0].s += 19;
+ s[2].s *= 2;
+ t[2][0].s *= 2;
+ t[3][0].s *= 2;
+ S *cp = c;
+ S *dp = d;
+ S *rp = r;
+ S *pp = p;
+ if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3)
+ || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3))
+ abort ();
+ for (T z = 0; z < 2; z++)
+ if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3)
+ || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3)
+ || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3)
+ || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3)
+ || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3)
+ || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3)
+ || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9))
+ abort ();
+ for (T z = 0; z < 3; z++)
+ if (b[z + 2].t != 5 && b[z + 2].t != 9)
+ abort ();
+ foo (n, cp, dp, m, rp, o, pp, q);
+ r[3].s += 18;
+ o[1].s += 29;
+ o[2].s += 18;
+ p[0].s += 19;
+ c[0].s += 4;
+ c[1].s += 5;
+ d[0].s *= 2;
+ e[1].s += 6;
+ f[2].s += 7;
+ k[1][0].s += 17;
+ k[2][0].s += 19;
+ }
+ r[3].s += 8;
+ o[1].s += 9;
+ o[2].s += 10;
+ p[0].s += 11;
+ q[1][1].s += 13;
+ q[2][1].s += 15;
+ b[3].s *= 2;
+ c[0].s += 4;
+ c[1].s += 9;
+ d[0].s *= 2;
+ e[1].s += 16;
+ f[2].s += 8;
+ }
+ }
+ if (d[0].s != 1LL << (8 + 4)
+ || d[1].s != 1LL << 16
+ || m[0].s != 5
+ || m[1].s != 19 * 16 + 6 * 8 + 16 * 4
+ || m[2].s != 5
+ || r[0].s != 6
+ || r[1].s != 7
+ || r[2].s != 21 * 16 + 7 * 8 + 8 * 4
+ || r[3].s != 23 * 16 + 18 * 8 + 8 * 4
+ || r[4].s != 9
+ || o[0].s != 1
+ || o[1].s != 25 * 16 + 29 * 8 + 9 * 4
+ || o[2].s != 27 * 16 + 18 * 8 + 10 * 4
+ || o[3].s != 2)
+ abort ();
+ if (e[1].t != 7 || h[0].t != 7 || m[1].t != 7 || p[0].t != 7)
+ abort ();
+ for (T z = 0; z < 2; z++)
+ if (a[z].t != 7 || c[z].t != 7 || d[z].t != 5 || f[z + 2].t != 7
+ || g[z + 1].t != 7 || r[z + 2].t != 7 || s[z + 1].t != 5 || o[z + 1].t != 7
+ || k[z + 1][0].t != 7 || k[z + 1][1].t != 7 || q[z + 1][0].t != 7 || q[z + 1][1].t != 7
+ || t[z + 2][0].t != 5 || t[z + 2][1].t != 5)
+ abort ();
+ for (T z = 0; z < 3; z++)
+ if (b[z + 2].t != 5)
+ abort ();
+ }
+ if (a[0].s != 7 * 16 + 4 * 8 + 2 * 4
+ || a[1].s != 17 * 16 + 5 * 8 + 3 * 4
+ || b[0].s != 9 || b[1].s != 11
+ || b[2].s != 1LL << (16 + 4)
+ || b[3].s != 1LL << (8 + 4)
+ || b[4].s != 1LL << (16 + 8)
+ || b[5].s != 13 || b[6].s != 15
+ || c[0].s != 6 * 16 + 4 * 8 + 4 * 4
+ || c[1].s != 5 * 8 + 9 * 4
+ || e[0].s != 5
+ || e[1].s != 19 * 16 + 6 * 8 + 16 * 4
+ || e[2].s != 5
+ || f[0].s != 6
+ || f[1].s != 7
+ || f[2].s != 21 * 16 + 7 * 8 + 8 * 4
+ || f[3].s != 23 * 16 + 18 * 8 + 8 * 4
+ || f[4].s != 9
+ || g[0].s != 1
+ || g[1].s != 25 * 16 + 29 * 8 + 9 * 4
+ || g[2].s != 27 * 16 + 18 * 8 + 10 * 4
+ || g[3].s != 2
+ || h[0].s != 29 * 16 + 19 * 8 + 11 * 4
+ || h[1].s != 1 || h[2].s != 4
+ || k[0][0].s != 5 || k[0][1].s != 6
+ || k[1][0].s != 31 * 16 + 17 * 8
+ || k[1][1].s != 13 * 4
+ || k[2][0].s != 19 * 8
+ || k[2][1].s != 33 * 16 + 15 * 4
+ || k[3][0].s != 7 || k[3][1].s != 8
+ || p[0].s != 29 * 16 + 19 * 8 + 11 * 4
+ || p[1].s != 1 || p[2].s != 4
+ || q[0][0].s != 5 || q[0][1].s != 6
+ || q[1][0].s != 31 * 16 + 17 * 8
+ || q[1][1].s != 13 * 4
+ || q[2][0].s != 19 * 8
+ || q[2][1].s != 33 * 16 + 15 * 4
+ || q[3][0].s != 7 || q[3][1].s != 8
+ || sb[0].s != 5
+ || sb[1].s != 1LL << (16 + 4)
+ || sb[2].s != 1LL << 8
+ || sb[3].s != 6
+ || tb[0][0].s != 9 || tb[0][1].s != 10 || tb[1][0].s != 11 || tb[1][1].s != 12
+ || tb[2][0].s != 1LL << (16 + 8)
+ || tb[2][1].s != 1LL << 4
+ || tb[3][0].s != 1LL << 8
+ || tb[3][1].s != 1LL << (16 + 4)
+ || tb[4][0].s != 13 || tb[4][1].s != 14)
+ abort ();
+}
+
+int
+main ()
+{
+ int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3;
+ int n = 1;
+ test<S, int> (n);
+ if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3)
+ abort ();
+ return 0;
+}