[gomp5] Add support for reduction clause task modifier on parallel

Message ID	20181023134103.GJ11625@tucnak
State	New
Headers	show Return-Path: <gcc-patches-return-488132-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:date :from:to:subject:message-id:reply-to:mime-version:content-type; q=dns; s=default; b=ZNErYiRAsc548P1/Cq06frV1pftC2hm8g2hzmS/OILs LJZxDO+SDvPt+3Dc4jYqptpDICaEbXHWxOlizkSuOQgb5lBVhfLyIgFV2hbYMQF/ uVDDO7bg1n0tx8vZ96yWlY2YRrbrBOv4diVzzTcddgQDc02Va+1VaSRYsZ00CjyE = Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org Date: Tue, 23 Oct 2018 15:41:03 +0200 From: Jakub Jelinek <jakub@redhat.com> To: gcc-patches@gcc.gnu.org Subject: [gomp5] Add support for reduction clause task modifier on parallel Message-ID: <20181023134103.GJ11625@tucnak> Reply-To: Jakub Jelinek <jakub@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.9.2 (2017-12-15)
Series	[gomp5] Add support for reduction clause task modifier on parallel \| expand [gomp5] Add support for reduction clause task modifier on parallel

--- gcc/builtin-types.def.jj 2018-09-25 14:30:29.038766626 +0200 +++ gcc/builtin-types.def 2018-10-22 13:02:46.904919759 +0200 @@ -644,6 +644,8 @@ DEF_FUNCTION_TYPE_4 (BT_FN_INT_FILEPTR_I BT_INT, BT_FILEPTR, BT_INT, BT_CONST_STRING, BT_VALIST_ARG) DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_UINT_UINT, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT) +DEF_FUNCTION_TYPE_4 (BT_FN_UINT_OMPFN_PTR_UINT_UINT, + BT_UINT, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT) DEF_FUNCTION_TYPE_4 (BT_FN_VOID_PTR_WORD_WORD_PTR, BT_VOID, BT_PTR, BT_WORD, BT_WORD, BT_PTR) DEF_FUNCTION_TYPE_4 (BT_FN_VOID_SIZE_VPTR_PTR_INT, BT_VOID, BT_SIZE, --- gcc/omp-builtins.def.jj 2018-09-25 14:32:54.671315163 +0200 +++ gcc/omp-builtins.def 2018-10-22 13:02:15.955438340 +0200 @@ -315,6 +315,9 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_DOACROSS BT_FN_VOID_ULL_VAR, ATTR_NOTHROW_LEAF_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_PARALLEL, "GOMP_parallel", BT_FN_VOID_OMPFN_PTR_UINT_UINT, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_PARALLEL_REDUCTIONS, + "GOMP_parallel_reductions", + BT_FN_UINT_OMPFN_PTR_UINT_UINT, ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASK, "GOMP_task", BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT, ATTR_NOTHROW_LIST) --- gcc/omp-low.c.jj 2018-10-16 11:05:17.353749518 +0200 +++ gcc/omp-low.c 2018-10-23 14:39:18.057581035 +0200 @@ -1097,8 +1097,9 @@ scan_sharing_clauses (tree clauses, omp_ && (TREE_CODE (TREE_TYPE (TREE_TYPE (t))) == POINTER_TYPE))))) && !is_variable_sized (t) - && (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IN_REDUCTION - || !is_task_ctx (ctx))) + && (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION + || (!OMP_CLAUSE_REDUCTION_TASK (c) + && !is_task_ctx (ctx)))) { by_ref = use_pointer_for_field (t, NULL); if (is_task_ctx (ctx) @@ -1113,7 +1114,10 @@ scan_sharing_clauses (tree clauses, omp_ } break; } - if (is_task_ctx (ctx)) + if (is_task_ctx (ctx) + || (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION + && OMP_CLAUSE_REDUCTION_TASK (c) + && is_parallel_ctx (ctx))) { /* Global variables don't need to be copied, the receiver side will use them directly. */ @@ -1851,6 +1855,23 @@ scan_omp_parallel (gimple_stmt_iterator if (gimple_omp_parallel_combined_p (stmt)) add_taskreg_looptemp_clauses (GF_OMP_FOR_KIND_FOR, stmt, outer_ctx); + for (tree c = omp_find_clause (gimple_omp_parallel_clauses (stmt), + OMP_CLAUSE_REDUCTION); + c; c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE_REDUCTION)) + if (OMP_CLAUSE_REDUCTION_TASK (c)) + { + tree type = build_pointer_type (pointer_sized_int_node); + tree temp = create_tmp_var (type); + tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__REDUCTEMP_); + if (outer_ctx) + insert_decl_map (&outer_ctx->cb, temp, temp); + OMP_CLAUSE_DECL (c) = temp; + OMP_CLAUSE_CHAIN (c) = gimple_omp_parallel_clauses (stmt); + gimple_omp_parallel_set_clauses (stmt, c); + break; + } + else if (OMP_CLAUSE_CHAIN (c) == NULL_TREE) + break; ctx = new_omp_context (stmt, outer_ctx); taskreg_contexts.safe_push (ctx); @@ -2029,8 +2050,31 @@ finish_taskreg_scan (omp_context *ctx) } } - if (gimple_code (ctx->stmt) == GIMPLE_OMP_PARALLEL - || gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS) + if (gimple_code (ctx->stmt) == GIMPLE_OMP_PARALLEL) + { + tree clauses = gimple_omp_parallel_clauses (ctx->stmt); + tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); + if (c) + { + /* Move the _reductemp_ clause first. GOMP_parallel_reductions + expects to find it at the start of data. */ + tree f = lookup_field (OMP_CLAUSE_DECL (c), ctx); + tree *p = &TYPE_FIELDS (ctx->record_type); + while (*p) + if (*p == f) + { + *p = DECL_CHAIN (*p); + break; + } + else + p = &DECL_CHAIN (*p); + DECL_CHAIN (f) = TYPE_FIELDS (ctx->record_type); + TYPE_FIELDS (ctx->record_type) = f; + } + layout_type (ctx->record_type); + fixup_child_record_type (ctx); + } + else if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS) { layout_type (ctx->record_type); fixup_child_record_type (ctx); @@ -3846,9 +3890,8 @@ lower_rec_input_clauses (tree clauses, g break; case OMP_CLAUSE_REDUCTION: case OMP_CLAUSE_IN_REDUCTION: - if (OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c)) - reduction_omp_orig_ref = true; - if (is_task_ctx (ctx) /* || OMP_CLAUSE_REDUCTION_TASK (c) */) + if (is_task_ctx (ctx) + || (OMP_CLAUSE_REDUCTION_TASK (c) && is_parallel_ctx (ctx))) { task_reduction_p = true; if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION) @@ -3877,6 +3920,8 @@ lower_rec_input_clauses (tree clauses, g } } } + else if (OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c)) + reduction_omp_orig_ref = true; break; case OMP_CLAUSE__LOOPTEMP_: case OMP_CLAUSE__REDUCTEMP_: @@ -4215,15 +4260,19 @@ lower_rec_input_clauses (tree clauses, g tree end = create_artificial_label (UNKNOWN_LOCATION); if (cond) { - tree condv = create_tmp_var (boolean_type_node); - gimple *g - = gimple_build_assign (condv, build_simple_mem_ref (cond)); - gimple_seq_add_stmt (ilist, g); - tree lab1 = create_artificial_label (UNKNOWN_LOCATION); - g = gimple_build_cond (NE_EXPR, condv, - boolean_false_node, end, lab1); - gimple_seq_add_stmt (ilist, g); - gimple_seq_add_stmt (ilist, gimple_build_label (lab1)); + gimple *g; + if (!is_parallel_ctx (ctx)) + { + tree condv = create_tmp_var (boolean_type_node); + g = gimple_build_assign (condv, + build_simple_mem_ref (cond)); + gimple_seq_add_stmt (ilist, g); + tree lab1 = create_artificial_label (UNKNOWN_LOCATION); + g = gimple_build_cond (NE_EXPR, condv, + boolean_false_node, end, lab1); + gimple_seq_add_stmt (ilist, g); + gimple_seq_add_stmt (ilist, gimple_build_label (lab1)); + } g = gimple_build_assign (build_simple_mem_ref (cond), boolean_true_node); gimple_seq_add_stmt (ilist, g); @@ -4920,17 +4969,23 @@ lower_rec_input_clauses (tree clauses, g tree lab2 = NULL_TREE; if (cond) { - tree condv = create_tmp_var (boolean_type_node); - gimple *g - = gimple_build_assign (condv, - build_simple_mem_ref (cond)); - gimple_seq_add_stmt (ilist, g); - tree lab1 = create_artificial_label (UNKNOWN_LOCATION); - lab2 = create_artificial_label (UNKNOWN_LOCATION); - g = gimple_build_cond (NE_EXPR, condv, - boolean_false_node, lab2, lab1); - gimple_seq_add_stmt (ilist, g); - gimple_seq_add_stmt (ilist, gimple_build_label (lab1)); + gimple *g; + if (!is_parallel_ctx (ctx)) + { + tree condv = create_tmp_var (boolean_type_node); + tree m = build_simple_mem_ref (cond); + g = gimple_build_assign (condv, m); + gimple_seq_add_stmt (ilist, g); + tree lab1 + = create_artificial_label (UNKNOWN_LOCATION); + lab2 = create_artificial_label (UNKNOWN_LOCATION); + g = gimple_build_cond (NE_EXPR, condv, + boolean_false_node, + lab2, lab1); + gimple_seq_add_stmt (ilist, g); + gimple_seq_add_stmt (ilist, + gimple_build_label (lab1)); + } g = gimple_build_assign (build_simple_mem_ref (cond), boolean_true_node); gimple_seq_add_stmt (ilist, g); @@ -4958,7 +5013,8 @@ lower_rec_input_clauses (tree clauses, g DECL_HAS_VALUE_EXPR_P (placeholder) = 0; if (cond) { - gimple_seq_add_stmt (ilist, gimple_build_label (lab2)); + if (lab2) + gimple_seq_add_stmt (ilist, gimple_build_label (lab2)); break; } goto do_dtor; @@ -4972,6 +5028,7 @@ lower_rec_input_clauses (tree clauses, g if (cond) { gimple *g; + tree lab2 = NULL_TREE; /* GOMP_taskgroup_reduction_register memsets the whole array to zero. If the initializer is zero, we don't need to initialize it again, just mark it as ever @@ -4986,21 +5043,28 @@ lower_rec_input_clauses (tree clauses, g /* Otherwise, emit if (!cond) { cond = true; new_var = x; } */ - tree condv = create_tmp_var (boolean_type_node); - g = gimple_build_assign (condv, - build_simple_mem_ref (cond)); - gimple_seq_add_stmt (ilist, g); - tree lab1 = create_artificial_label (UNKNOWN_LOCATION); - tree lab2 = create_artificial_label (UNKNOWN_LOCATION); - g = gimple_build_cond (NE_EXPR, condv, - boolean_false_node, lab2, lab1); - gimple_seq_add_stmt (ilist, g); - gimple_seq_add_stmt (ilist, gimple_build_label (lab1)); + if (!is_parallel_ctx (ctx)) + { + tree condv = create_tmp_var (boolean_type_node); + tree m = build_simple_mem_ref (cond); + g = gimple_build_assign (condv, m); + gimple_seq_add_stmt (ilist, g); + tree lab1 + = create_artificial_label (UNKNOWN_LOCATION); + lab2 = create_artificial_label (UNKNOWN_LOCATION); + g = gimple_build_cond (NE_EXPR, condv, + boolean_false_node, + lab2, lab1); + gimple_seq_add_stmt (ilist, g); + gimple_seq_add_stmt (ilist, + gimple_build_label (lab1)); + } g = gimple_build_assign (build_simple_mem_ref (cond), boolean_true_node); gimple_seq_add_stmt (ilist, g); gimplify_assign (new_var, x, ilist); - gimple_seq_add_stmt (ilist, gimple_build_label (lab2)); + if (lab2) + gimple_seq_add_stmt (ilist, gimple_build_label (lab2)); break; } @@ -5696,7 +5760,9 @@ lower_reduction_clauses (tree clauses, g /* First see if there is exactly one reduction clause. Use OMP_ATOMIC update in that case, otherwise use a lock. */ for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION + && (!OMP_CLAUSE_REDUCTION_TASK (c) + || !is_parallel_ctx (ctx))) { if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) || TREE_CODE (OMP_CLAUSE_DECL (c)) == MEM_REF) @@ -5717,7 +5783,9 @@ lower_reduction_clauses (tree clauses, g enum tree_code code; location_t clause_loc = OMP_CLAUSE_LOCATION (c); - if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION) + if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION + || (OMP_CLAUSE_REDUCTION_TASK (c) + && is_parallel_ctx (ctx))) continue; enum omp_clause_code ccode = OMP_CLAUSE_REDUCTION; @@ -6012,6 +6080,8 @@ lower_send_clauses (tree clauses, gimple case OMP_CLAUSE_REDUCTION: if (is_task_ctx (ctx)) continue; + if (OMP_CLAUSE_REDUCTION_TASK (c) && is_parallel_ctx (ctx)) + continue; break; case OMP_CLAUSE_SHARED: if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c)) @@ -6939,14 +7009,26 @@ lower_omp_task_reductions (omp_context * reduce and destruct it. */ tree idx = create_tmp_var (size_type_node); gimple_seq_add_stmt (end, gimple_build_assign (idx, size_zero_node)); - t = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); - tree num_thr = create_tmp_var (integer_type_node); - gimple *g = gimple_build_call (t, 0); - gimple_call_set_lhs (g, num_thr); - gimple_seq_add_stmt (end, g); tree num_thr_sz = create_tmp_var (size_type_node); - g = gimple_build_assign (num_thr_sz, NOP_EXPR, num_thr); - gimple_seq_add_stmt (end, g); + gimple *g; + if (code != OMP_PARALLEL) + { + t = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); + tree num_thr = create_tmp_var (integer_type_node); + g = gimple_build_call (t, 0); + gimple_call_set_lhs (g, num_thr); + gimple_seq_add_stmt (end, g); + g = gimple_build_assign (num_thr_sz, NOP_EXPR, num_thr); + gimple_seq_add_stmt (end, g); + } + else + { + tree c = omp_find_clause (gimple_omp_parallel_clauses (ctx->stmt), + OMP_CLAUSE__REDUCTEMP_); + t = fold_convert (pointer_sized_int_node, OMP_CLAUSE_DECL (c)); + t = fold_convert (size_type_node, t); + gimplify_assign (num_thr_sz, t, end); + } t = build4 (ARRAY_REF, pointer_sized_int_node, avar, size_int (2), NULL_TREE, NULL_TREE); tree data = create_tmp_var (pointer_sized_int_node); @@ -7035,7 +7117,11 @@ lower_omp_task_reductions (omp_context * tree bfield = DECL_CHAIN (field); tree cond; - if (TREE_TYPE (ptr) == ptr_type_node) + if (code == OMP_PARALLEL) + /* In parallel all threads unconditionally initialize all their + task reduction private variables. */ + cond = boolean_true_node; + else if (TREE_TYPE (ptr) == ptr_type_node) { cond = build2 (POINTER_PLUS_EXPR, ptr_type_node, ptr, unshare_expr (byte_position (bfield))); @@ -7185,7 +7271,7 @@ lower_omp_task_reductions (omp_context * else SET_DECL_VALUE_EXPR (d, new_var); DECL_HAS_VALUE_EXPR_P (d) = 1; - lower_omp (&OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c), ctx->outer); + lower_omp (&OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c), ctx); if (oldv) SET_DECL_VALUE_EXPR (d, oldv); else @@ -7223,9 +7309,9 @@ lower_omp_task_reductions (omp_context * g = gimple_build_call (t, 1, build_fold_addr_expr (avar)); gimple_seq_add_stmt (start, g); } - else if (code == OMP_TASKLOOP) + else if (code == OMP_TASKLOOP || code == OMP_PARALLEL) { - tree c = omp_find_clause (gimple_omp_task_clauses (ctx->stmt), + tree c = omp_find_clause (gimple_omp_taskreg_clauses (ctx->stmt), OMP_CLAUSE__REDUCTEMP_); t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (c)), build_fold_addr_expr (avar)); @@ -8602,18 +8688,25 @@ lower_omp_taskreg (gimple_stmt_iterator if (ctx->srecord_type) create_task_copyfn (as_a <gomp_task *> (stmt), ctx); - gimple_seq taskloop_ilist = NULL; - gimple_seq taskloop_olist = NULL; - if (is_task_ctx (ctx) && gimple_omp_task_taskloop_p (ctx->stmt)) + gimple_seq tskred_ilist = NULL; + gimple_seq tskred_olist = NULL; + if ((is_task_ctx (ctx) + && gimple_omp_task_taskloop_p (ctx->stmt) + && omp_find_clause (gimple_omp_task_clauses (ctx->stmt), + OMP_CLAUSE_REDUCTION)) + || (is_parallel_ctx (ctx) + && omp_find_clause (gimple_omp_parallel_clauses (stmt), + OMP_CLAUSE__REDUCTEMP_))) { if (dep_bind == NULL) { push_gimplify_context (); dep_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK)); } - lower_omp_task_reductions (ctx, OMP_TASKLOOP, - gimple_omp_task_clauses (ctx->stmt), - &taskloop_ilist, &taskloop_olist); + lower_omp_task_reductions (ctx, is_task_ctx (ctx) ? OMP_TASKLOOP + : OMP_PARALLEL, + gimple_omp_taskreg_clauses (ctx->stmt), + &tskred_ilist, &tskred_olist); } push_gimplify_context (); @@ -8711,9 +8805,9 @@ lower_omp_taskreg (gimple_stmt_iterator if (dep_bind) { gimple_bind_add_seq (dep_bind, dep_ilist); - gimple_bind_add_seq (dep_bind, taskloop_ilist); + gimple_bind_add_seq (dep_bind, tskred_ilist); gimple_bind_add_stmt (dep_bind, bind); - gimple_bind_add_seq (dep_bind, taskloop_olist); + gimple_bind_add_seq (dep_bind, tskred_olist); gimple_bind_add_seq (dep_bind, dep_olist); pop_gimplify_context (dep_bind); } --- gcc/omp-expand.c.jj 2018-10-11 18:58:39.526603068 +0200 +++ gcc/omp-expand.c 2018-10-22 14:45:44.239718861 +0200 @@ -174,6 +174,8 @@ workshare_safe_to_combine_p (basic_block return true; gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR); + if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR) + return false; omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL); @@ -310,6 +312,13 @@ determine_parallel_type (struct omp_regi ws_entry_bb = region->inner->entry; ws_exit_bb = region->inner->exit; + /* Give up for task reductions on the parallel, while it is implementable, + adding another big set of APIs or slowing down the normal paths is + not acceptable. */ + tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb)); + if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_)) + return; + if (single_succ (par_entry_bb) == ws_entry_bb && single_succ (ws_exit_bb) == par_exit_bb && workshare_safe_to_combine_p (ws_entry_bb) @@ -559,7 +568,10 @@ expand_parallel_call (struct omp_region /* Determine what flavor of GOMP_parallel we will be emitting. */ start_ix = BUILT_IN_GOMP_PARALLEL; - if (is_combined_parallel (region)) + tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); + if (rtmp) + start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS; + else if (is_combined_parallel (region)) { switch (region->inner->type) { @@ -716,6 +728,13 @@ expand_parallel_call (struct omp_region t = build_call_expr_loc_vec (UNKNOWN_LOCATION, builtin_decl_explicit (start_ix), args); + if (rtmp) + { + tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp)); + t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp), + fold_convert (type, + fold_convert (pointer_sized_int_node, t))); + } force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, GSI_CONTINUE_LINKING); --- gcc/fortran/types.def.jj 2018-09-25 14:33:30.349714581 +0200 +++ gcc/fortran/types.def 2018-10-22 13:03:21.342342728 +0200 @@ -150,6 +150,8 @@ DEF_FUNCTION_TYPE_3 (BT_FN_VOID_SIZE_SIZ DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_UINT_UINT, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT) +DEF_FUNCTION_TYPE_4 (BT_FN_UINT_OMPFN_PTR_UINT_UINT, + BT_UINT, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT) DEF_FUNCTION_TYPE_4 (BT_FN_VOID_PTR_WORD_WORD_PTR, BT_VOID, BT_PTR, BT_WORD, BT_WORD, BT_PTR) DEF_FUNCTION_TYPE_4 (BT_FN_VOID_SIZE_VPTR_PTR_INT, BT_VOID, BT_SIZE, --- libgomp/libgomp.h.jj 2018-09-21 17:58:36.625533500 +0200 +++ libgomp/libgomp.h 2018-10-19 14:59:11.376046168 +0200 @@ -810,6 +810,8 @@ extern bool gomp_create_target_task (str size_t *, unsigned short *, unsigned int, void **, void **, enum gomp_target_task_state); +extern struct gomp_taskgroup *gomp_parallel_reduction_register (uintptr_t *, + unsigned); static void inline gomp_finish_task (struct gomp_task *task) @@ -822,7 +824,8 @@ gomp_finish_task (struct gomp_task *task extern struct gomp_team *gomp_new_team (unsigned); extern void gomp_team_start (void (*) (void *), void *, unsigned, - unsigned, struct gomp_team *); + unsigned, struct gomp_team *, + struct gomp_taskgroup *); extern void gomp_team_end (void); extern void gomp_free_thread (void *); extern int gomp_pause_host (void); --- libgomp/libgomp_g.h.jj 2018-10-16 14:58:29.181537535 +0200 +++ libgomp/libgomp_g.h 2018-10-22 12:57:46.591951749 +0200 @@ -236,6 +236,8 @@ extern void GOMP_doacross_ull_wait (unsi extern void GOMP_parallel_start (void (*) (void *), void *, unsigned); extern void GOMP_parallel_end (void); extern void GOMP_parallel (void (*) (void *), void *, unsigned, unsigned); +extern unsigned GOMP_parallel_reductions (void (*) (void *), void *, unsigned, + unsigned); extern bool GOMP_cancel (int, bool); extern bool GOMP_cancellation_point (int); --- libgomp/libgomp.map.jj 2018-09-19 14:28:05.356205102 +0200 +++ libgomp/libgomp.map 2018-10-19 15:50:48.601485600 +0200 @@ -316,11 +316,12 @@ GOMP_4.5 { GOMP_5.0 { global: - GOMP_taskwait_depend; - GOMP_teams_reg; + GOMP_parallel_reductions; GOMP_taskgroup_reduction_register; GOMP_taskgroup_reduction_unregister; GOMP_task_reduction_remap; + GOMP_taskwait_depend; + GOMP_teams_reg; } GOMP_4.5; OACC_2.0 { --- libgomp/loop.c.jj 2018-04-30 13:20:48.939859265 +0200 +++ libgomp/loop.c 2018-10-19 15:07:17.238963796 +0200 @@ -563,7 +563,7 @@ gomp_parallel_loop_start (void (*fn) (vo num_threads = gomp_resolve_num_threads (num_threads, 0); team = gomp_new_team (num_threads); gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size); - gomp_team_start (fn, data, num_threads, flags, team); + gomp_team_start (fn, data, num_threads, flags, team, NULL); } void --- libgomp/parallel.c.jj 2018-04-30 13:21:07.365866671 +0200 +++ libgomp/parallel.c 2018-10-22 12:58:06.905611379 +0200 @@ -123,7 +123,8 @@ void GOMP_parallel_start (void (*fn) (void *), void *data, unsigned num_threads) { num_threads = gomp_resolve_num_threads (num_threads, 0); - gomp_team_start (fn, data, num_threads, 0, gomp_new_team (num_threads)); + gomp_team_start (fn, data, num_threads, 0, gomp_new_team (num_threads), + NULL); } void @@ -161,14 +162,33 @@ GOMP_parallel_end (void) ialias (GOMP_parallel_end) void -GOMP_parallel (void (*fn) (void *), void *data, unsigned num_threads, unsigned int flags) +GOMP_parallel (void (*fn) (void *), void *data, unsigned num_threads, + unsigned int flags) { num_threads = gomp_resolve_num_threads (num_threads, 0); - gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads)); + gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads), + NULL); fn (data); ialias_call (GOMP_parallel_end) (); } +unsigned +GOMP_parallel_reductions (void (*fn) (void *), void *data, + unsigned num_threads, unsigned int flags) +{ + struct gomp_taskgroup *taskgroup; + num_threads = gomp_resolve_num_threads (num_threads, 0); + uintptr_t *rdata = *(uintptr_t **)data; + taskgroup = gomp_parallel_reduction_register (rdata, num_threads); + gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads), + taskgroup); + fn (data); + ialias_call (GOMP_parallel_end) (); + gomp_sem_destroy (&taskgroup->taskgroup_sem); + free (taskgroup); + return num_threads; +} + bool GOMP_cancellation_point (int which) { --- libgomp/sections.c.jj 2018-04-30 13:21:04.644865579 +0200 +++ libgomp/sections.c 2018-10-19 15:24:54.376308340 +0200 @@ -140,7 +140,7 @@ GOMP_parallel_sections_start (void (*fn) num_threads = gomp_resolve_num_threads (num_threads, count); team = gomp_new_team (num_threads); gomp_sections_init (&team->work_shares[0], count); - gomp_team_start (fn, data, num_threads, 0, team); + gomp_team_start (fn, data, num_threads, 0, team, NULL); } ialias_redirect (GOMP_parallel_end) @@ -154,7 +154,7 @@ GOMP_parallel_sections (void (*fn) (void num_threads = gomp_resolve_num_threads (num_threads, count); team = gomp_new_team (num_threads); gomp_sections_init (&team->work_shares[0], count); - gomp_team_start (fn, data, num_threads, flags, team); + gomp_team_start (fn, data, num_threads, flags, team, NULL); fn (data); GOMP_parallel_end (); } --- libgomp/task.c.jj 2018-10-10 19:24:00.989961920 +0200 +++ libgomp/task.c 2018-10-19 16:15:32.898831504 +0200 @@ -1763,13 +1763,27 @@ GOMP_taskyield (void) /* Nothing at the moment. */ } +static inline struct gomp_taskgroup * +gomp_taskgroup_init (struct gomp_taskgroup *prev) +{ + struct gomp_taskgroup *taskgroup + = gomp_malloc (sizeof (struct gomp_taskgroup)); + taskgroup->prev = prev; + priority_queue_init (&taskgroup->taskgroup_queue); + taskgroup->in_taskgroup_wait = false; + taskgroup->reductions = prev ? prev->reductions : NULL; + taskgroup->cancelled = false; + taskgroup->num_children = 0; + gomp_sem_init (&taskgroup->taskgroup_sem, 0); + return taskgroup; +} + void GOMP_taskgroup_start (void) { struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; struct gomp_task *task = thr->task; - struct gomp_taskgroup *taskgroup, *prev; /* If team is NULL, all tasks are executed as GOMP_TASK_UNDEFERRED tasks and thus all children tasks of @@ -1777,16 +1791,7 @@ GOMP_taskgroup_start (void) by the time GOMP_taskgroup_end is called. */ if (team == NULL) return; - taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup)); - prev = task->taskgroup; - taskgroup->prev = prev; - priority_queue_init (&taskgroup->taskgroup_queue); - taskgroup->in_taskgroup_wait = false; - taskgroup->reductions = prev ? prev->reductions : NULL; - taskgroup->cancelled = false; - taskgroup->num_children = 0; - gomp_sem_init (&taskgroup->taskgroup_sem, 0); - task->taskgroup = taskgroup; + task->taskgroup = gomp_taskgroup_init (task->taskgroup); } void @@ -1951,63 +1956,12 @@ GOMP_taskgroup_end (void) free (taskgroup); } -/* The format of data is: - data[0] cnt - data[1] size - data[2] alignment (on output array pointer) - data[3] allocator (-1 if malloc allocator) - data[4] next pointer - data[5] used internally (htab pointer) - data[6] used internally (end of array) - cnt times - ent[0] address - ent[1] offset - ent[2] used internally (pointer to data[0]). */ - -void -GOMP_taskgroup_reduction_register (uintptr_t *data) +static inline void +gomp_reduction_register (uintptr_t *data, uintptr_t *old, unsigned nthreads) { - struct gomp_thread *thr = gomp_thread (); - struct gomp_team *team = thr->ts.team; - struct gomp_task *task; - if (__builtin_expect (team == NULL, 0)) - { - /* The task reduction code needs a team and task, so for - orphaned taskgroups just create the implicit team. */ - struct gomp_task_icv *icv; - team = gomp_new_team (1); - task = thr->task; - icv = task ? &task->icv : &gomp_global_icv; - team->prev_ts = thr->ts; - thr->ts.team = team; - thr->ts.team_id = 0; - thr->ts.work_share = &team->work_shares[0]; - thr->ts.last_work_share = NULL; -#ifdef HAVE_SYNC_BUILTINS - thr->ts.single_count = 0; -#endif - thr->ts.static_trip = 0; - thr->task = &team->implicit_task[0]; - gomp_init_task (thr->task, NULL, icv); - if (task) - { - thr->task = task; - gomp_end_task (); - free (task); - thr->task = &team->implicit_task[0]; - } -#ifdef LIBGOMP_USE_PTHREADS - else - pthread_setspecific (gomp_thread_destructor, thr); -#endif - GOMP_taskgroup_start (); - } - unsigned nthreads = team->nthreads; size_t total_cnt = 0; - uintptr_t *d = data, *old; + uintptr_t *d = data; struct htab *old_htab = NULL, *new_htab; - task = thr->task; - old = task->taskgroup->reductions; do { size_t sz = d[1] * nthreads; @@ -2072,6 +2026,62 @@ GOMP_taskgroup_reduction_register (uintp } while (1); d[5] = (uintptr_t) new_htab; +} + +/* The format of data is: + data[0] cnt + data[1] size + data[2] alignment (on output array pointer) + data[3] allocator (-1 if malloc allocator) + data[4] next pointer + data[5] used internally (htab pointer) + data[6] used internally (end of array) + cnt times + ent[0] address + ent[1] offset + ent[2] used internally (pointer to data[0]). */ + +void +GOMP_taskgroup_reduction_register (uintptr_t *data) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + struct gomp_task *task; + if (__builtin_expect (team == NULL, 0)) + { + /* The task reduction code needs a team and task, so for + orphaned taskgroups just create the implicit team. */ + struct gomp_task_icv *icv; + team = gomp_new_team (1); + task = thr->task; + icv = task ? &task->icv : &gomp_global_icv; + team->prev_ts = thr->ts; + thr->ts.team = team; + thr->ts.team_id = 0; + thr->ts.work_share = &team->work_shares[0]; + thr->ts.last_work_share = NULL; +#ifdef HAVE_SYNC_BUILTINS + thr->ts.single_count = 0; +#endif + thr->ts.static_trip = 0; + thr->task = &team->implicit_task[0]; + gomp_init_task (thr->task, NULL, icv); + if (task) + { + thr->task = task; + gomp_end_task (); + free (task); + thr->task = &team->implicit_task[0]; + } +#ifdef LIBGOMP_USE_PTHREADS + else + pthread_setspecific (gomp_thread_destructor, thr); +#endif + GOMP_taskgroup_start (); + } + unsigned nthreads = team->nthreads; + task = thr->task; + gomp_reduction_register (data, task->taskgroup->reductions, nthreads); task->taskgroup->reductions = data; } @@ -2087,6 +2097,7 @@ GOMP_taskgroup_reduction_unregister (uin } while (d && !d[5]); } +ialias (GOMP_taskgroup_reduction_unregister) /* For i = 0 to cnt-1, remap ptrs[i] which is either address of the original list item or address of previously remapped original list @@ -2160,6 +2171,15 @@ GOMP_task_reduction_remap (size_t cnt, s } } +struct gomp_taskgroup * +gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads) +{ + struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL); + gomp_reduction_register (data, NULL, nthreads); + taskgroup->reductions = data; + return taskgroup; +} + int omp_in_final (void) { --- libgomp/team.c.jj 2018-10-08 19:05:58.135130888 +0200 +++ libgomp/team.c 2018-10-19 15:06:09.109097148 +0200 @@ -302,7 +302,8 @@ gomp_free_thread (void *arg __attribute_ #ifdef LIBGOMP_USE_PTHREADS void gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, - unsigned flags, struct gomp_team *team) + unsigned flags, struct gomp_team *team, + struct gomp_taskgroup *taskgroup) { struct gomp_thread_start_data *start_data; struct gomp_thread *thr, *nthr; @@ -364,6 +365,7 @@ gomp_team_start (void (*fn) (void *), vo && thr->ts.level < gomp_bind_var_list_len) bind_var = gomp_bind_var_list[thr->ts.level]; gomp_init_task (thr->task, task, icv); + thr->task->taskgroup = taskgroup; team->implicit_task[0].icv.nthreads_var = nthreads_var; team->implicit_task[0].icv.bind_var = bind_var; @@ -638,6 +640,7 @@ gomp_team_start (void (*fn) (void *), vo gomp_init_task (nthr->task, task, icv); team->implicit_task[i].icv.nthreads_var = nthreads_var; team->implicit_task[i].icv.bind_var = bind_var; + nthr->task->taskgroup = taskgroup; nthr->fn = fn; nthr->data = data; team->ordered_release[i] = &nthr->release; @@ -823,6 +826,7 @@ gomp_team_start (void (*fn) (void *), vo gomp_init_task (start_data->task, task, icv); team->implicit_task[i].icv.nthreads_var = nthreads_var; team->implicit_task[i].icv.bind_var = bind_var; + start_data->task->taskgroup = taskgroup; start_data->thread_pool = pool; start_data->nested = nested; --- libgomp/config/nvptx/team.c.jj 2018-07-27 12:58:39.425317985 +0200 +++ libgomp/config/nvptx/team.c 2018-10-19 15:27:28.510747309 +0200 @@ -116,7 +116,8 @@ gomp_thread_start (struct gomp_thread_po void gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, - unsigned flags, struct gomp_team *team) + unsigned flags, struct gomp_team *team, + struct gomp_taskgroup *taskgroup) { struct gomp_thread *thr, *nthr; struct gomp_task *task; @@ -147,6 +148,7 @@ gomp_team_start (void (*fn) (void *), vo nthreads_var = icv->nthreads_var; gomp_init_task (thr->task, task, icv); team->implicit_task[0].icv.nthreads_var = nthreads_var; + team->implicit_task[0].taskgroup = taskgroup; if (nthreads == 1) return; @@ -166,6 +168,7 @@ gomp_team_start (void (*fn) (void *), vo nthr->task = &team->implicit_task[i]; gomp_init_task (nthr->task, task, icv); team->implicit_task[i].icv.nthreads_var = nthreads_var; + team->implicit_task[i].taskgroup = taskgroup; nthr->fn = fn; nthr->data = data; team->ordered_release[i] = &nthr->release; --- libgomp/testsuite/libgomp.c-c++-common/task-reduction-6.c.jj 2018-10-22 15:11:27.032039935 +0200 +++ libgomp/testsuite/libgomp.c-c++-common/task-reduction-6.c 2018-10-23 12:00:44.702719131 +0200 @@ -0,0 +1,123 @@ +#include <omp.h> +#include <stdlib.h> + +struct S { unsigned long int s, t; }; + +void +rbar (struct S *p, struct S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +rbaz (struct S *o, struct S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: struct S : omp_out.s += omp_in.s) \ + initializer (omp_priv = { 0, 3 }) +#pragma omp declare reduction (*: struct S : rbaz (&omp_out, &omp_in)) \ + initializer (rbar (&omp_priv, &omp_orig)) + +struct S g = { 0, 7 }; +struct S h = { 1, 5 }; + +int +foo (int *a, int *b) +{ + int x = 0; + #pragma omp taskloop reduction (+:x) in_reduction (+:b[0]) + for (int i = 0; i < 64; i++) + { + x += a[i]; + *b += a[i] * 2; + } + return x; +} + +unsigned long long int +bar (int *a, unsigned long long int *b) +{ + unsigned long long int x = 1; + #pragma omp taskloop reduction (*:x) in_reduction (*:b[0]) + for (int i = 0; i < 64; i++) + { + #pragma omp task in_reduction (*:x) + x *= a[i]; + #pragma omp task in_reduction (*:b[0]) + *b *= (3 - a[i]); + } + return x; +} + +void +baz (int i, int *a, int *c) +{ + #pragma omp task in_reduction (*:h) in_reduction (+:g) + { + g.s += 7 * a[i]; + h.s *= (3 - c[i]); + if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9)) + abort (); + } +} + +int +main () +{ + int i, j, a[64], b = 0, c[64]; + unsigned long long int d = 1, e; + int r = 0, t; + struct S m = { 0, 7 }; + struct S n = { 1, 5 }; + for (i = 0; i < 64; i++) + { + a[i] = 2 * i; + c[i] = 1 + ((i % 3) != 1); + } + #pragma omp parallel reduction (task, +:b) reduction(+:r) \ + reduction(task,*:d) reduction (task, +: g, m) \ + reduction (task, *: h, n) shared(t) + { + #pragma omp master + { + j = foo (a, &b); + t = omp_get_num_threads (); + } + r++; + #pragma omp single nowait + e = bar (c, &d); + #pragma omp master + #pragma omp taskloop in_reduction (+: g, m) in_reduction (*: h, n) + for (i = 0; i < 64; ++i) + { + g.s += 3 * a[i]; + h.s *= (3 - c[i]); + m.s += 4 * a[i]; + n.s *= c[i]; + if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9) + || (m.t != 7 && m.t != 3) || (n.t != 5 && n.t != 9)) + abort (); + baz (i, a, c); + } + } + if (n.s != (1ULL << 43) || n.t != 5) + abort (); + if (j != 63 * 64 || b != 63 * 64 * 2) + abort (); + if (e != (1ULL << 43) || d != (1ULL << 21)) + abort (); + if (g.s != 63 * 64 * 10 || g.t != 7) + abort (); + if (h.s != (1ULL << 42) || h.t != 5) + abort (); + if (m.s != 63 * 64 * 4 || m.t != 7) + abort (); + return 0; +} --- libgomp/testsuite/libgomp.c-c++-common/task-reduction-7.c.jj 2018-10-22 15:27:58.357494701 +0200 +++ libgomp/testsuite/libgomp.c-c++-common/task-reduction-7.c 2018-10-23 12:09:46.101643735 +0200 @@ -0,0 +1,216 @@ +#ifdef __cplusplus +extern "C" +#endif +void abort (void); + +int a[2]; +long long int b[7] = { 9, 11, 1, 1, 1, 13, 15 }; +int e[3] = { 5, 0, 5 }; +int f[5] = { 6, 7, 0, 0, 9 }; +int g[4] = { 1, 0, 0, 2 }; +int h[3] = { 0, 1, 4 }; +int k[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; +long long *s; +long long (*t)[2]; + +void +foo (int n, int *c, long long int *d, int m[3], int *r, int o[4], int *p, int q[4][2]) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0] += 7; + a[1] += 17; + b[2] *= 2; + b[4] *= 2; + c[0] += 6; + d[1] *= 2; + e[1] += 19; + f[2] += 21; + f[3] += 23; + g[1] += 25; + g[2] += 27; + h[0] += 29; + k[1][0] += 31; + k[2][1] += 33; + m[1] += 19; + r[2] += 21; + r[3] += 23; + o[1] += 25; + o[2] += 27; + p[0] += 29; + q[1][0] += 31; + q[2][1] += 33; + s[1] *= 2; + t[2][0] *= 2; + t[3][1] *= 2; + } +} + +void +test (int n) +{ + int c[2] = { 0, 0 }; + int p[3] = { 0, 1, 4 }; + int q[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; + long long ss[4] = { 5, 1, 1, 6 }; + long long tt[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } }; + long long int d[] = { 1, 1 }; + int m[3] = { 5, 0, 5 }; + int r[5] = { 6, 7, 0, 0, 9 }; + int o[4] = { 1, 0, 0, 2 }; + s = ss; + t = tt; + #pragma omp parallel reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \ + reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (task, *: t[2:2][:], s[1:n + 1]) num_threads(4) + { + int i; + #pragma omp for + for (i = 0; i < 4; i++) + { + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + int j; + a[0] += 2; + a[1] += 3; + b[2] *= 2; + f[3] += 8; + g[1] += 9; + g[2] += 10; + h[0] += 11; + k[1][1] += 13; + k[2][1] += 15; + m[1] += 16; + r[2] += 8; + s[1] *= 2; + t[2][1] *= 2; + t[3][1] *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1] += 6; + r[2] += 7; + q[1][0] += 17; + q[2][0] += 19; + a[0] += 4; + a[1] += 5; + b[3] *= 2; + b[4] *= 2; + f[3] += 18; + g[1] += 29; + g[2] += 18; + h[0] += 19; + s[2] *= 2; + t[2][0] *= 2; + t[3][0] *= 2; + foo (n, c, d, m, r, o, p, q); + r[3] += 18; + o[1] += 29; + o[2] += 18; + p[0] += 19; + c[0] += 4; + c[1] += 5; + d[0] *= 2; + e[1] += 6; + f[2] += 7; + k[1][0] += 17; + k[2][0] += 19; + } + r[3] += 8; + o[1] += 9; + o[2] += 10; + p[0] += 11; + q[1][1] += 13; + q[2][1] += 15; + b[3] *= 2; + c[0] += 4; + c[1] += 9; + d[0] *= 2; + e[1] += 16; + f[2] += 8; + } + } + } + if (a[0] != 7 * 16 + 4 * 8 + 2 * 4 + || a[1] != 17 * 16 + 5 * 8 + 3 * 4 + || b[0] != 9 || b[1] != 11 + || b[2] != 1LL << (16 + 4) + || b[3] != 1LL << (8 + 4) + || b[4] != 1LL << (16 + 8) + || b[5] != 13 || b[6] != 15 + || c[0] != 6 * 16 + 4 * 8 + 4 * 4 + || c[1] != 5 * 8 + 9 * 4 + || d[0] != 1LL << (8 + 4) + || d[1] != 1LL << 16 + || e[0] != 5 + || e[1] != 19 * 16 + 6 * 8 + 16 * 4 + || e[2] != 5 + || f[0] != 6 + || f[1] != 7 + || f[2] != 21 * 16 + 7 * 8 + 8 * 4 + || f[3] != 23 * 16 + 18 * 8 + 8 * 4 + || f[4] != 9 + || g[0] != 1 + || g[1] != 25 * 16 + 29 * 8 + 9 * 4 + || g[2] != 27 * 16 + 18 * 8 + 10 * 4 + || g[3] != 2 + || h[0] != 29 * 16 + 19 * 8 + 11 * 4 + || h[1] != 1 || h[2] != 4 + || k[0][0] != 5 || k[0][1] != 6 + || k[1][0] != 31 * 16 + 17 * 8 + || k[1][1] != 13 * 4 + || k[2][0] != 19 * 8 + || k[2][1] != 33 * 16 + 15 * 4 + || k[3][0] != 7 || k[3][1] != 8 + || m[0] != 5 + || m[1] != 19 * 16 + 6 * 8 + 16 * 4 + || m[2] != 5 + || o[0] != 1 + || o[1] != 25 * 16 + 29 * 8 + 9 * 4 + || o[2] != 27 * 16 + 18 * 8 + 10 * 4 + || o[3] != 2 + || p[0] != 29 * 16 + 19 * 8 + 11 * 4 + || p[1] != 1 || p[2] != 4 + || q[0][0] != 5 || q[0][1] != 6 + || q[1][0] != 31 * 16 + 17 * 8 + || q[1][1] != 13 * 4 + || q[2][0] != 19 * 8 + || q[2][1] != 33 * 16 + 15 * 4 + || q[3][0] != 7 || q[3][1] != 8 + || r[0] != 6 + || r[1] != 7 + || r[2] != 21 * 16 + 7 * 8 + 8 * 4 + || r[3] != 23 * 16 + 18 * 8 + 8 * 4 + || r[4] != 9 + || ss[0] != 5 + || ss[1] != 1LL << (16 + 4) + || ss[2] != 1LL << 8 + || ss[3] != 6 + || tt[0][0] != 9 || tt[0][1] != 10 || tt[1][0] != 11 || tt[1][1] != 12 + || tt[2][0] != 1LL << (16 + 8) + || tt[2][1] != 1LL << 4 + || tt[3][0] != 1LL << 8 + || tt[3][1] != 1LL << (16 + 4) + || tt[4][0] != 13 || tt[4][1] != 14) + abort (); +} + +int +main () +{ + test (1); + return 0; +} --- libgomp/testsuite/libgomp.c/task-reduction-2.c.jj 2018-10-23 12:47:15.036017405 +0200 +++ libgomp/testsuite/libgomp.c/task-reduction-2.c 2018-10-23 14:17:16.715619755 +0200 @@ -0,0 +1,86 @@ +typedef __SIZE_TYPE__ size_t; +extern void abort (void); + +void +bar (int *a, int *b, int *c, int (*d)[2], int (*e)[4], size_t n, int f[1][n], int g[1][n * 2]) +{ + #pragma omp task in_reduction (*: a[:n], b[3:n], c[n:n], d[0][:n], e[0][1:n], f[0][:], g[0][1:n]) + { + a[0] *= 12; + a[1] *= 13; + b[3] *= 14; + b[4] *= 15; + c[n] *= 16; + c[n + 1] *= 17; + d[0][0] *= 18; + d[0][1] *= 19; + e[0][1] *= 20; + e[0][2] *= 21; + f[0][0] *= 22; + f[0][1] *= 23; + g[0][1] *= 24; + g[0][2] *= 25; + } +} + +void +baz (size_t n, void *x, void *y, int f[1][n], int g[1][n * 2]) +{ + int a[n], b[n + 3], c[2 * n]; + int (*d)[n] = (int (*)[n]) x; + int (*e)[n * 2] = (int (*)[n * 2]) y; + int i; + for (i = 0; i < n; i++) + { + a[i] = 1; + b[i + 3] = 1; + c[i + n] = 1; + d[0][i] = 1; + e[0][i + 1] = 1; + f[0][i] = 1; + g[0][i + 1] = 1; + } + #pragma omp parallel num_threads(2) firstprivate (n) \ + reduction (task, *: a, b[3:n], c[n:n], d[0][:n], e[0][1:n], f[0][:], g[0][1:n]) + { + #pragma omp master + bar (a, b, c, (int (*)[2]) d, (int (*)[4]) e, n, f, g); + #pragma omp master + #pragma omp task in_reduction (*: a, b[3:n], c[n:n], d[0][:n], e[0][1:n], f[0][:], g[0][1:n]) + { + a[0] *= 2; + a[1] *= 3; + b[3] *= 4; + b[4] *= 5; + c[n] *= 6; + c[n + 1] *= 7; + d[0][0] *= 8; + d[0][1] *= 9; + e[0][1] *= 10; + e[0][2] *= 11; + f[0][0] *= 12; + f[0][1] *= 13; + g[0][1] *= 14; + g[0][2] *= 15; + } + n = 0; + } + if (a[0] != 24 || a[1] != 39 || b[3] != 56 || b[4] != 75) + abort (); + if (c[2] != 96 || c[3] != 119 || d[0][0] != 144 || d[0][1] != 171) + abort (); + if (e[0][1] != 200 || e[0][2] != 231 || f[0][0] != 264 || f[0][1] != 299) + abort (); + if (g[0][1] != 336 || g[0][2] != 375) + abort (); +} + +int +main () +{ + int d[1][2], e[1][4], f[1][2], g[1][4]; + volatile int two; + two = 2; + baz (two, (void *) d, (void *) e, f, g); + return 0; +} --- libgomp/testsuite/libgomp.c++/task-reduction-8.C.jj 2018-10-23 12:50:03.941193045 +0200 +++ libgomp/testsuite/libgomp.c++/task-reduction-8.C 2018-10-23 13:30:47.923296583 +0200 @@ -0,0 +1,70 @@ +extern "C" void abort (); + +int as; +int &a = as; +long int bs = 1; +long int &b = bs; + +template <typename T, typename U> +void +foo (T &c, U &d) +{ + T i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (*: d) in_reduction (+: c) \ + in_reduction (+: a) in_reduction (*: b) + { + a += 7; + b *= 2; + c += 9; + d *= 3; + } +} + +template <typename T, typename U> +void +bar () +{ + T cs = 0; + T &c = cs; + U ds = 1; + #pragma omp parallel if (0) + { + U &d = ds; + #pragma omp parallel reduction (task, +: a, c) reduction (task, *: b, d) + { + T i; + #pragma omp for + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d) + { + T j; + a += 7; + b *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d) + { + a += 7; + b *= 2; + c += 9; + d *= 3; + foo (c, d); + } + c += 9; + d *= 3; + } + } +#define THREEP4 (3LL * 3LL * 3LL * 3LL) + if (d != (THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4 + * THREEP4)) + abort (); + } + if (a != 28 * 7 || b != (1L << 28) || c != 28 * 9) + abort (); +} + +int +main () +{ + bar<int, long long int> (); +} --- libgomp/testsuite/libgomp.c++/task-reduction-9.C.jj 2018-10-23 13:03:23.654821918 +0200 +++ libgomp/testsuite/libgomp.c++/task-reduction-9.C 2018-10-23 13:16:34.994575846 +0200 @@ -0,0 +1,128 @@ +#include <omp.h> +#include <stdlib.h> + +struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long int x, long int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S a = { 0, 7 }; +S b (1, 5); + +void +foo () +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (*: b) in_reduction (+: a) + { + a.s += 7; + b.s *= 2; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9)) + abort (); + } +} + +void +test () +{ + S c = { 0, 7 }; + int t; + #pragma omp parallel num_threads (1) + { + S d (1, 5); + int r = 0; + #pragma omp parallel reduction (task, +: a, c) reduction (task, *: b, d) \ + reduction (+: r) + { + int i; + #pragma omp master + t = omp_get_num_threads (); + r++; + a.s += 3; + c.s += 4; + #pragma omp for + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (*: b, d) in_reduction (+: a, c) + { + int j; + a.s += 7; + b.s *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a) in_reduction (*: b) \ + in_reduction (+: c) in_reduction (*: d) + { + a.s += 7; + b.s *= 2; + c.s += 9; + d.s *= 3; + foo (); + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + c.s += 9; + d.s *= 3; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + } +#define THREEP4 (3L * 3L * 3L * 3L) + if (d.s != (THREEP4 * THREEP4 * THREEP4) || d.t != 5 || r != t) + abort (); + } + if (a.s != 28 * 7 + 3 * t || a.t != 7 || b.s != (1L << 28) || b.t != 5 + || c.s != 12 * 9 + 4 * t || c.t != 7) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + test (); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); +} --- libgomp/testsuite/libgomp.c++/task-reduction-10.C.jj 2018-10-23 13:17:02.753111130 +0200 +++ libgomp/testsuite/libgomp.c++/task-reduction-10.C 2018-10-23 14:20:58.239936096 +0200 @@ -0,0 +1,125 @@ +extern "C" void abort (); + +struct S { S (); S (long long int, int); ~S (); static int cnt1, cnt2, cnt3; long long int s; int t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long long int x, int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S as = { 0LL, 7 }; +S &a = as; +S bs (1LL, 5); +S &b = bs; + +void +foo (S &c, S &d) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: c) in_reduction (*: b, d) in_reduction (+: a) + { + a.s += 7; + b.s *= 2; + c.s += 9; + d.s *= 3; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } +} + +void +test () +{ + S cs = { 0LL, 7 }; + S &c = cs; + S ds (1LL, 5); + #pragma omp parallel if (0) + { + S &d = ds; + #pragma omp parallel reduction (task, +: a, c) reduction (task, *: b, d) + { + #pragma omp for + for (int i = 0; i < 4; i++) + #pragma omp task in_reduction (*: b, d) in_reduction (+: a, c) + { + int j; + a.s += 7; + b.s *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a) in_reduction (*: b) \ + in_reduction (+: c) in_reduction (*: d) + { + a.s += 7; + b.s *= 2; + c.s += 9; + d.s *= 3; + foo (c, d); + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + c.s += 9; + d.s *= 3; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + } +#define THREEP7 (3LL * 3LL * 3LL * 3LL * 3LL * 3LL * 3LL) + if (d.s != (THREEP7 * THREEP7 * THREEP7 * THREEP7) || d.t != 5) + abort (); + } + if (a.s != 28 * 7 || a.t != 7 || b.s != (1L << 28) || b.t != 5 + || c.s != 28 * 9 || c.t != 7) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + test (); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); +} --- libgomp/testsuite/libgomp.c++/task-reduction-11.C.jj 2018-10-23 13:24:50.401282019 +0200 +++ libgomp/testsuite/libgomp.c++/task-reduction-11.C 2018-10-23 14:21:24.766494993 +0200 @@ -0,0 +1,237 @@ +extern "C" void abort (); + +int as[2]; +int (&a)[2] = as; +long long int bs[7] = { 9, 11, 1, 1, 1, 13, 15 }; +long long int (&b)[7] = bs; +int es[3] = { 5, 0, 5 }; +int (&e)[3] = es; +int fs[5] = { 6, 7, 0, 0, 9 }; +int (&f)[5] = fs; +int gs[4] = { 1, 0, 0, 2 }; +int (&g)[4] = gs; +int hs[3] = { 0, 1, 4 }; +int (&h)[3] = hs; +int ks[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; +int (&k)[4][2] = ks; +long long *ss; +long long *&s = ss; +long long (*ts)[2]; +long long (*&t)[2] = ts; + +template <typename T> +void +foo (T &n, T *&c, long long int *&d, T (&m)[3], T *&r, T (&o)[4], T *&p, T (&q)[4][2]) +{ + T i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0] += 7; + a[1] += 17; + b[2] *= 2; + b[4] *= 2; + c[0] += 6; + d[1] *= 2; + e[1] += 19; + f[2] += 21; + f[3] += 23; + g[1] += 25; + g[2] += 27; + h[0] += 29; + k[1][0] += 31; + k[2][1] += 33; + m[1] += 19; + r[2] += 21; + r[3] += 23; + o[1] += 25; + o[2] += 27; + p[0] += 29; + q[1][0] += 31; + q[2][1] += 33; + s[1] *= 2; + t[2][0] *= 2; + t[3][1] *= 2; + } +} + +template <typename T> +void +test (T &n) +{ + T cs[2] = { 0, 0 }; + T (&c)[2] = cs; + T ps[3] = { 0, 1, 4 }; + T (&p)[3] = ps; + T qs[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; + T (&q)[4][2] = qs; + long long sb[4] = { 5, 1, 1, 6 }; + long long tb[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } }; + T ms[3] = { 5, 0, 5 }; + T os[4] = { 1, 0, 0, 2 }; + s = sb; + t = tb; + #pragma omp parallel if (0) + { + long long int ds[] = { 1, 1 }; + long long int (&d)[2] = ds; + T (&m)[3] = ms; + T rs[5] = { 6, 7, 0, 0, 9 }; + T (&r)[5] = rs; + T (&o)[4] = os; + #pragma omp parallel reduction (task,+: a, c) reduction (task,*: b[2 * n:3 * n], d) \ + reduction (task,+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (task,+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (task,*: t[2:2][:], s[1:n + 1]) + { + #pragma omp for + for (int i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + T j; + a[0] += 2; + a[1] += 3; + b[2] *= 2; + f[3] += 8; + g[1] += 9; + g[2] += 10; + h[0] += 11; + k[1][1] += 13; + k[2][1] += 15; + m[1] += 16; + r[2] += 8; + s[1] *= 2; + t[2][1] *= 2; + t[3][1] *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1] += 6; + r[2] += 7; + q[1][0] += 17; + q[2][0] += 19; + a[0] += 4; + a[1] += 5; + b[3] *= 2; + b[4] *= 2; + f[3] += 18; + g[1] += 29; + g[2] += 18; + h[0] += 19; + s[2] *= 2; + t[2][0] *= 2; + t[3][0] *= 2; + T *cp = c; + long long int *dp = d; + T *rp = r; + T *pp = p; + foo (n, cp, dp, m, rp, o, pp, q); + r[3] += 18; + o[1] += 29; + o[2] += 18; + p[0] += 19; + c[0] += 4; + c[1] += 5; + d[0] *= 2; + e[1] += 6; + f[2] += 7; + k[1][0] += 17; + k[2][0] += 19; + } + r[3] += 8; + o[1] += 9; + o[2] += 10; + p[0] += 11; + q[1][1] += 13; + q[2][1] += 15; + b[3] *= 2; + c[0] += 4; + c[1] += 9; + d[0] *= 2; + e[1] += 16; + f[2] += 8; + } + } + if (d[0] != 1LL << (8 + 4) + || d[1] != 1LL << 16 + || m[0] != 5 + || m[1] != 19 * 16 + 6 * 8 + 16 * 4 + || m[2] != 5 + || r[0] != 6 + || r[1] != 7 + || r[2] != 21 * 16 + 7 * 8 + 8 * 4 + || r[3] != 23 * 16 + 18 * 8 + 8 * 4 + || r[4] != 9 + || o[0] != 1 + || o[1] != 25 * 16 + 29 * 8 + 9 * 4 + || o[2] != 27 * 16 + 18 * 8 + 10 * 4 + || o[3] != 2) + abort (); + } + if (a[0] != 7 * 16 + 4 * 8 + 2 * 4 + || a[1] != 17 * 16 + 5 * 8 + 3 * 4 + || b[0] != 9 || b[1] != 11 + || b[2] != 1LL << (16 + 4) + || b[3] != 1LL << (8 + 4) + || b[4] != 1LL << (16 + 8) + || b[5] != 13 || b[6] != 15 + || c[0] != 6 * 16 + 4 * 8 + 4 * 4 + || c[1] != 5 * 8 + 9 * 4 + || e[0] != 5 + || e[1] != 19 * 16 + 6 * 8 + 16 * 4 + || e[2] != 5 + || f[0] != 6 + || f[1] != 7 + || f[2] != 21 * 16 + 7 * 8 + 8 * 4 + || f[3] != 23 * 16 + 18 * 8 + 8 * 4 + || f[4] != 9 + || g[0] != 1 + || g[1] != 25 * 16 + 29 * 8 + 9 * 4 + || g[2] != 27 * 16 + 18 * 8 + 10 * 4 + || g[3] != 2 + || h[0] != 29 * 16 + 19 * 8 + 11 * 4 + || h[1] != 1 || h[2] != 4 + || k[0][0] != 5 || k[0][1] != 6 + || k[1][0] != 31 * 16 + 17 * 8 + || k[1][1] != 13 * 4 + || k[2][0] != 19 * 8 + || k[2][1] != 33 * 16 + 15 * 4 + || k[3][0] != 7 || k[3][1] != 8 + || p[0] != 29 * 16 + 19 * 8 + 11 * 4 + || p[1] != 1 || p[2] != 4 + || q[0][0] != 5 || q[0][1] != 6 + || q[1][0] != 31 * 16 + 17 * 8 + || q[1][1] != 13 * 4 + || q[2][0] != 19 * 8 + || q[2][1] != 33 * 16 + 15 * 4 + || q[3][0] != 7 || q[3][1] != 8 + || sb[0] != 5 + || sb[1] != 1LL << (16 + 4) + || sb[2] != 1LL << 8 + || sb[3] != 6 + || tb[0][0] != 9 || tb[0][1] != 10 || tb[1][0] != 11 || tb[1][1] != 12 + || tb[2][0] != 1LL << (16 + 8) + || tb[2][1] != 1LL << 4 + || tb[3][0] != 1LL << 8 + || tb[3][1] != 1LL << (16 + 4) + || tb[4][0] != 13 || tb[4][1] != 14) + abort (); +} + +int +main () +{ + int n = 1; + test (n); + return 0; +} --- libgomp/testsuite/libgomp.c++/task-reduction-12.C.jj 2018-10-23 14:42:45.736106078 +0200 +++ libgomp/testsuite/libgomp.c++/task-reduction-12.C 2018-10-23 14:44:39.489202716 +0200 @@ -0,0 +1,321 @@ +extern "C" void abort (); + +struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long int x, long int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S a[2] = { { 0, 7 }, { 0, 7 } }; +S b[7] = { { 9, 5 }, { 11, 5 }, { 1, 5 }, { 1, 5 }, { 1, 5 }, { 13, 5 }, { 15, 5 } }; +S e[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; +S f[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; +S g[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; +S h[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; +S k[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; +S *s; +S (*t)[2]; + +template <int N> +void +foo (int n, S *c, S *d, S m[3], S *r, S o[4], S *p, S q[4][2]) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0].s += 7; + a[1].s += 17; + b[2].s *= 2; + b[4].s *= 2; + c[0].s += 6; + d[1].s *= 2; + e[1].s += 19; + f[2].s += 21; + f[3].s += 23; + g[1].s += 25; + g[2].s += 27; + h[0].s += 29; + k[1][0].s += 31; + k[2][1].s += 33; + m[1].s += 19; + r[2].s += 21; + r[3].s += 23; + o[1].s += 25; + o[2].s += 27; + p[0].s += 29; + q[1][0].s += 31; + q[2][1].s += 33; + s[1].s *= 2; + t[2][0].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + } +} + +template <int N> +void +test (int n) +{ + S c[2] = { { 0, 7 }, { 0, 7 } }; + S p[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; + S q[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; + S ss[4] = { { 5, 5 }, { 1, 5 }, { 1, 5 }, { 6, 5 } }; + S tt[5][2] = { { { 9, 5 }, { 10, 5 } }, { { 11, 5 }, { 12, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 13, 5 }, { 14, 5 } } }; + s = ss; + t = tt; + #pragma omp parallel num_threads (1) if (0) + { + S d[] = { { 1, 5 }, { 1, 5 } }; + S m[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; + S r[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; + S o[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; + #pragma omp parallel reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \ + reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (task, *: t[2:2][:], s[1:n + 1]) + { + #pragma omp for + for (int i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + int j; + a[0].s += 2; + a[1].s += 3; + b[2].s *= 2; + f[3].s += 8; + g[1].s += 9; + g[2].s += 10; + h[0].s += 11; + k[1][1].s += 13; + k[2][1].s += 15; + m[1].s += 16; + r[2].s += 8; + s[1].s *= 2; + t[2][1].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1].s += 6; + r[2].s += 7; + q[1][0].s += 17; + q[2][0].s += 19; + a[0].s += 4; + a[1].s += 5; + b[3].s *= 2; + b[4].s *= 2; + f[3].s += 18; + g[1].s += 29; + g[2].s += 18; + h[0].s += 19; + s[2].s *= 2; + t[2][0].s *= 2; + t[3][0].s *= 2; + foo<N> (n, c, d, m, r, o, p, q); + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + r[3].s += 18; + o[1].s += 29; + o[2].s += 18; + p[0].s += 19; + c[0].s += 4; + c[1].s += 5; + d[0].s *= 2; + e[1].s += 6; + f[2].s += 7; + k[1][0].s += 17; + k[2][0].s += 19; + } + r[3].s += 8; + o[1].s += 9; + o[2].s += 10; + p[0].s += 11; + q[1][1].s += 13; + q[2][1].s += 15; + b[3].s *= 2; + c[0].s += 4; + c[1].s += 9; + d[0].s *= 2; + e[1].s += 16; + f[2].s += 8; + } + } + if (d[0].s != 1LL << (8 + 4) + || d[1].s != 1LL << 16 + || m[0].s != 5 + || m[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || m[2].s != 5 + || r[0].s != 6 + || r[1].s != 7 + || r[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || r[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || r[4].s != 9 + || o[0].s != 1 + || o[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || o[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || o[3].s != 2) + abort (); + if (e[1].t != 7 || h[0].t != 7 || m[1].t != 7 || p[0].t != 7) + abort (); + for (int z = 0; z < 2; z++) + if (a[z].t != 7 || c[z].t != 7 || d[z].t != 5 || f[z + 2].t != 7 + || g[z + 1].t != 7 || r[z + 2].t != 7 || s[z + 1].t != 5 || o[z + 1].t != 7 + || k[z + 1][0].t != 7 || k[z + 1][1].t != 7 || q[z + 1][0].t != 7 || q[z + 1][1].t != 7 + || t[z + 2][0].t != 5 || t[z + 2][1].t != 5) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5) + abort (); + } + if (a[0].s != 7 * 16 + 4 * 8 + 2 * 4 + || a[1].s != 17 * 16 + 5 * 8 + 3 * 4 + || b[0].s != 9 || b[1].s != 11 + || b[2].s != 1LL << (16 + 4) + || b[3].s != 1LL << (8 + 4) + || b[4].s != 1LL << (16 + 8) + || b[5].s != 13 || b[6].s != 15 + || c[0].s != 6 * 16 + 4 * 8 + 4 * 4 + || c[1].s != 5 * 8 + 9 * 4 + || e[0].s != 5 + || e[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || e[2].s != 5 + || f[0].s != 6 + || f[1].s != 7 + || f[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || f[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || f[4].s != 9 + || g[0].s != 1 + || g[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || g[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || g[3].s != 2 + || h[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || h[1].s != 1 || h[2].s != 4 + || k[0][0].s != 5 || k[0][1].s != 6 + || k[1][0].s != 31 * 16 + 17 * 8 + || k[1][1].s != 13 * 4 + || k[2][0].s != 19 * 8 + || k[2][1].s != 33 * 16 + 15 * 4 + || k[3][0].s != 7 || k[3][1].s != 8 + || p[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || p[1].s != 1 || p[2].s != 4 + || q[0][0].s != 5 || q[0][1].s != 6 + || q[1][0].s != 31 * 16 + 17 * 8 + || q[1][1].s != 13 * 4 + || q[2][0].s != 19 * 8 + || q[2][1].s != 33 * 16 + 15 * 4 + || q[3][0].s != 7 || q[3][1].s != 8 + || ss[0].s != 5 + || ss[1].s != 1LL << (16 + 4) + || ss[2].s != 1LL << 8 + || ss[3].s != 6 + || tt[0][0].s != 9 || tt[0][1].s != 10 || tt[1][0].s != 11 || tt[1][1].s != 12 + || tt[2][0].s != 1LL << (16 + 8) + || tt[2][1].s != 1LL << 4 + || tt[3][0].s != 1LL << 8 + || tt[3][1].s != 1LL << (16 + 4) + || tt[4][0].s != 13 || tt[4][1].s != 14) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + test<0> (1); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); + return 0; +} --- libgomp/testsuite/libgomp.c++/task-reduction-13.C.jj 2018-10-23 14:44:51.759997393 +0200 +++ libgomp/testsuite/libgomp.c++/task-reduction-13.C 2018-10-23 14:48:52.418970379 +0200 @@ -0,0 +1,342 @@ +extern "C" void abort (); + +struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long int x, long int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S as[2] = { { 0, 7 }, { 0, 7 } }; +S (&a)[2] = as; +S bs[7] = { { 9, 5 }, { 11, 5 }, { 1, 5 }, { 1, 5 }, { 1, 5 }, { 13, 5 }, { 15, 5 } }; +S (&b)[7] = bs; +S es[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; +S (&e)[3] = es; +S fs[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; +S (&f)[5] = fs; +S gs[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; +S (&g)[4] = gs; +S hs[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; +S (&h)[3] = hs; +S ks[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; +S (&k)[4][2] = ks; +S *ss; +S *&s = ss; +S (*ts)[2]; +S (*&t)[2] = ts; + +template <typename S, typename T> +void +foo (T &n, S *&c, S *&d, S (&m)[3], S *&r, S (&o)[4], S *&p, S (&q)[4][2]) +{ + T i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0].s += 7; + a[1].s += 17; + b[2].s *= 2; + b[4].s *= 2; + c[0].s += 6; + d[1].s *= 2; + e[1].s += 19; + f[2].s += 21; + f[3].s += 23; + g[1].s += 25; + g[2].s += 27; + h[0].s += 29; + k[1][0].s += 31; + k[2][1].s += 33; + m[1].s += 19; + r[2].s += 21; + r[3].s += 23; + o[1].s += 25; + o[2].s += 27; + p[0].s += 29; + q[1][0].s += 31; + q[2][1].s += 33; + s[1].s *= 2; + t[2][0].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (T z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (T z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + } +} + +template <typename S, typename T> +void +test (T &n) +{ + S cs[2] = { { 0, 7 }, { 0, 7 } }; + S (&c)[2] = cs; + S ps[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; + S (&p)[3] = ps; + S qs[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; + S (&q)[4][2] = qs; + S sb[4] = { { 5, 5 }, { 1, 5 }, { 1, 5 }, { 6, 5 } }; + S tb[5][2] = { { { 9, 5 }, { 10, 5 } }, { { 11, 5 }, { 12, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 13, 5 }, { 14, 5 } } }; + S ms[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; + S os[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; + s = sb; + t = tb; + #pragma omp parallel if (0) + { + S ds[] = { { 1, 5 }, { 1, 5 } }; + S (&d)[2] = ds; + S (&m)[3] = ms; + S rs[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; + S (&r)[5] = rs; + S (&o)[4] = os; + #pragma omp parallel reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \ + reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (task, *: t[2:2][:], s[1:n + 1]) + { + #pragma omp for + for (T i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + T j; + a[0].s += 2; + a[1].s += 3; + b[2].s *= 2; + f[3].s += 8; + g[1].s += 9; + g[2].s += 10; + h[0].s += 11; + k[1][1].s += 13; + k[2][1].s += 15; + m[1].s += 16; + r[2].s += 8; + s[1].s *= 2; + t[2][1].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (T z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (T z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1].s += 6; + r[2].s += 7; + q[1][0].s += 17; + q[2][0].s += 19; + a[0].s += 4; + a[1].s += 5; + b[3].s *= 2; + b[4].s *= 2; + f[3].s += 18; + g[1].s += 29; + g[2].s += 18; + h[0].s += 19; + s[2].s *= 2; + t[2][0].s *= 2; + t[3][0].s *= 2; + S *cp = c; + S *dp = d; + S *rp = r; + S *pp = p; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (T z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (T z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + foo (n, cp, dp, m, rp, o, pp, q); + r[3].s += 18; + o[1].s += 29; + o[2].s += 18; + p[0].s += 19; + c[0].s += 4; + c[1].s += 5; + d[0].s *= 2; + e[1].s += 6; + f[2].s += 7; + k[1][0].s += 17; + k[2][0].s += 19; + } + r[3].s += 8; + o[1].s += 9; + o[2].s += 10; + p[0].s += 11; + q[1][1].s += 13; + q[2][1].s += 15; + b[3].s *= 2; + c[0].s += 4; + c[1].s += 9; + d[0].s *= 2; + e[1].s += 16; + f[2].s += 8; + } + } + if (d[0].s != 1LL << (8 + 4) + || d[1].s != 1LL << 16 + || m[0].s != 5 + || m[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || m[2].s != 5 + || r[0].s != 6 + || r[1].s != 7 + || r[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || r[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || r[4].s != 9 + || o[0].s != 1 + || o[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || o[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || o[3].s != 2) + abort (); + if (e[1].t != 7 || h[0].t != 7 || m[1].t != 7 || p[0].t != 7) + abort (); + for (T z = 0; z < 2; z++) + if (a[z].t != 7 || c[z].t != 7 || d[z].t != 5 || f[z + 2].t != 7 + || g[z + 1].t != 7 || r[z + 2].t != 7 || s[z + 1].t != 5 || o[z + 1].t != 7 + || k[z + 1][0].t != 7 || k[z + 1][1].t != 7 || q[z + 1][0].t != 7 || q[z + 1][1].t != 7 + || t[z + 2][0].t != 5 || t[z + 2][1].t != 5) + abort (); + for (T z = 0; z < 3; z++) + if (b[z + 2].t != 5) + abort (); + } + if (a[0].s != 7 * 16 + 4 * 8 + 2 * 4 + || a[1].s != 17 * 16 + 5 * 8 + 3 * 4 + || b[0].s != 9 || b[1].s != 11 + || b[2].s != 1LL << (16 + 4) + || b[3].s != 1LL << (8 + 4) + || b[4].s != 1LL << (16 + 8) + || b[5].s != 13 || b[6].s != 15 + || c[0].s != 6 * 16 + 4 * 8 + 4 * 4 + || c[1].s != 5 * 8 + 9 * 4 + || e[0].s != 5 + || e[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || e[2].s != 5 + || f[0].s != 6 + || f[1].s != 7 + || f[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || f[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || f[4].s != 9 + || g[0].s != 1 + || g[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || g[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || g[3].s != 2 + || h[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || h[1].s != 1 || h[2].s != 4 + || k[0][0].s != 5 || k[0][1].s != 6 + || k[1][0].s != 31 * 16 + 17 * 8 + || k[1][1].s != 13 * 4 + || k[2][0].s != 19 * 8 + || k[2][1].s != 33 * 16 + 15 * 4 + || k[3][0].s != 7 || k[3][1].s != 8 + || p[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || p[1].s != 1 || p[2].s != 4 + || q[0][0].s != 5 || q[0][1].s != 6 + || q[1][0].s != 31 * 16 + 17 * 8 + || q[1][1].s != 13 * 4 + || q[2][0].s != 19 * 8 + || q[2][1].s != 33 * 16 + 15 * 4 + || q[3][0].s != 7 || q[3][1].s != 8 + || sb[0].s != 5 + || sb[1].s != 1LL << (16 + 4) + || sb[2].s != 1LL << 8 + || sb[3].s != 6 + || tb[0][0].s != 9 || tb[0][1].s != 10 || tb[1][0].s != 11 || tb[1][1].s != 12 + || tb[2][0].s != 1LL << (16 + 8) + || tb[2][1].s != 1LL << 4 + || tb[3][0].s != 1LL << 8 + || tb[3][1].s != 1LL << (16 + 4) + || tb[4][0].s != 13 || tb[4][1].s != 14) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + int n = 1; + test<S, int> (n); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); + return 0; +}

[gomp5] Add support for reduction clause task modifier on parallel

Commit Message

Patch