Message ID | 538E538E.9010304@codesourcery.com |
---|---|
State | New |
Headers | show |
Hi Cesar! > This patch, which is derived from Ilmir Usmanov's work posted here > <https://gcc.gnu.org/ml/gcc-patches/2014-04/msg00027.html>, implements > the loop directive in openacc. The original patch is mostly intact, Thank you! I looked through the patch and found that you also added middle-end part. I don't know a lot about middle-end, so, probably, Thomas could review the part. However, there is a regression in middle-end. About front-ends, especially fortran front-end: > I did disable support for do concurrent loops since openacc > 2.0a supports fortran up to fortran 2003. As I can see, you didn't remove helper code for DO CONCURRENT loops transformation (see below). > @@ -12217,8 +12221,8 @@ c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code, > for (cl = clauses; cl; cl = OMP_CLAUSE_CHAIN (cl)) > if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_COLLAPSE) > { > - gcc_assert (code != OACC_LOOP); > - collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (cl)); > + //gcc_assert (code != OACC_LOOP); I suppose you forgot to remove this comment. > +++ b/gcc/testsuite/gfortran.dg/goacc/loop-4.f95 > @@ -0,0 +1,16 @@ > +! { dg-do compile } > +! { dg-additional-options "-fdump-tree-original -std=f2008" } > + > +PROGRAM test > + IMPLICIT NONE > + INTEGER :: a(64), b(64), c(64), i, j, k > + ! Must be replaced by three loops. > + !$acc loop > + DO CONCURRENT (i=1:64, j=1:64, k=1:64, i==j .and. j==k) This test is obsolete. I think you should remove this testcase since you are not supporting DO CONCURRENT loops. > +++ b/gcc/testsuite/gfortran.dg/goacc/loop-tree.f95 For this test you should update tree-pretty-print.c (I forgot this): @@ -675,13 +675,13 @@ dump_omp_clause (pretty_printer *buffer, tree clause, int spc, int flags) case OMP_CLAUSE_WORKER: pp_string (buffer, "worker("); - dump_generic_node (buffer, OMP_CLAUSE_DECL (clause), spc, flags, false); + dump_generic_node (buffer, OMP_CLAUSE_WORKER_EXPR (clause), spc, flags, false); pp_character(buffer, ')'); break; case OMP_CLAUSE_VECTOR: pp_string (buffer, "vector("); - dump_generic_node (buffer, OMP_CLAUSE_DECL (clause), spc, flags, false); + dump_generic_node (buffer, OMP_CLAUSE_VECTOR_EXPR (clause), spc, flags, false); pp_character(buffer, ')'); break; > +/* Recursively generate conditional expressions. */ > +static tree > +gfc_trans_oacc_loop_generate_mask_conds (gfc_code *code, int collapse) > +{ > + if (collapse > 1) > + return gfc_trans_oacc_loop_generate_mask_conds (code->block->next, > + collapse - 1); > + else > + return gfc_trans_omp_code (code->block->next, true); > +} > +static tree > +gfc_trans_oacc_loop (gfc_code *code, stmtblock_t *pblock, > + gfc_omp_clauses *loop_clauses) > +{ > + /* DO CONCURRENT specific vars. */ > + int nforloops = 0; > + int current_for = 0; > + > + if (collapse <= 0) > + collapse = 1; > + > + code = code->block->next; > + > + if (code->op == EXEC_DO_CONCURRENT) > + gfc_error ("!$ACC LOOP directive is unsupported on DO CONCURRENT %L", > + &code->loc); > + > + gcc_assert (code->op == EXEC_DO); > + > + if (pblock == NULL) > + { > + gfc_start_block (&block); > + pblock = █ > + } > + > + /* Calculate number of required for loops. */ > + old_code = code; > + for (i = 0; i < collapse; i++) > + { > + if (code->op == EXEC_DO) > + nforloops++; > + else > + gcc_unreachable (); > + code = code->block->next; > + } > + code = old_code; > + > + /* Set the number of required for loops for collapse. */ > + /* FIXME: this is probably correct, but OMP_CLAUSE_COLLAPSE isn't supported > + yet. */ > + loop_clauses->collapse = nforloops; > + > + omp_clauses = gfc_trans_omp_clauses (pblock, loop_clauses, code->loc); > + > + init = make_tree_vec (nforloops); > + cond = make_tree_vec (nforloops); > + incr = make_tree_vec (nforloops); > + > + for (i = 0; i < collapse; i++) > + { > + if (code->op == EXEC_DO) > + gfc_trans_oacc_loop_generate_for (pblock, &se, code->ext.iterator->var, > + code->ext.iterator->start, > + code->ext.iterator->end, > + code->ext.iterator->step, > + current_for++, &init, &cond, &incr, > + &inits); > + else > + gcc_unreachable (); > + if (i + 1 < collapse) > + code = code->block->next; > + } > + > + if (pblock != &block) > + { > + pushlevel (); > + gfc_start_block (&block); > + } This is complicated for simple DO loops. I think the following will be enough (see gfc_trans_omp_do). > code = code->block->next; > + if (code->op == EXEC_DO_CONCURRENT) > + gfc_error ("!$ACC LOOP directive is unsupported on DO CONCURRENT %L", > + &code->loc); > gcc_assert (code->op == EXEC_DO); > > init = make_tree_vec (collapse); > cond = make_tree_vec (collapse); > incr = make_tree_vec (collapse); > > if (pblock == NULL) > { > gfc_start_block (&block); > pblock = █ > } > @@ -1817,13 +1818,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) > case OMP_CLAUSE_PRIVATE: > case OMP_CLAUSE_FIRSTPRIVATE: > case OMP_CLAUSE_REDUCTION: > - if (is_gimple_omp_oacc_specifically (ctx->stmt)) > - { > - sorry ("clause not supported yet"); > - break; > - } This change produces regression on parallel-tree.f95 testcase: ICE.
Hi Cesar! On Tue, 3 Jun 2014 16:00:30 -0700, Cesar Philippidis <cesar@codesourcery.com> wrote: > in order to make > the patch yield more interesting results, I've also enabled the private > clause. Is this patch ok for the gomp-4_0-branch? > gcc/ > * c/c-parser.c (c_parser_oacc_all_clauses): Update handling for Note that gcc/c/ as well as gcc/fortran/ have separate ChangeLog* files. > OMP_CLAUSE_COLLAPSE and OMP_CLAUSE_PRIVATE. Only for OMP_CLAUSE_COLLAPSE, not OMP_CLAUSE_PRIVATE. > (c_parser_oacc_kernels): Likewise. OACC_LOOP_CLAUSE_MASK, not c_parser_oacc_kernels. > --- a/gcc/c/c-parser.c > +++ b/gcc/c/c-parser.c > @@ -11228,6 +11228,10 @@ c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask, > > switch (c_kind) > { > + case PRAGMA_OMP_CLAUSE_COLLAPSE: Won't this need additional work? It seems that for combined directives (kernels loop, parallel loop), we currently don't (or, don't correctly) parse the clauses, and support in clause splitting (c-family/c-omp:c_omp_split_clauses) is also (generally) missing, I think? Anyway, this is a separate change from your Fortran loop support, so should (ideally) be a separate patch/commit. (Also, I'm not sure to which extent we're at all currently handling combined directives in gimplification and lowering?) > + clauses = c_parser_omp_clause_collapse (parser, clauses); > + c_name = "collapse"; > + break; Update the comment on c_parser_omp_clause_collapse to state that it's for OpenACC, too. > @@ -12217,8 +12221,8 @@ c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code, > for (cl = clauses; cl; cl = OMP_CLAUSE_CHAIN (cl)) > if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_COLLAPSE) > { > - gcc_assert (code != OACC_LOOP); > - collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (cl)); > + //gcc_assert (code != OACC_LOOP); > + collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (cl)); > } As Ilmir noted, remove the gcc_assert -- assuming you have some confidence that the following code (including gimplification and lowering) matches the OpenACC semantics for collapse != 1. > --- a/gcc/gimple.h > +++ b/gcc/gimple.h > @@ -5809,15 +5809,25 @@ is_gimple_omp (const_gimple stmt) > need any special handling for OpenACC. */ > > static inline bool > -is_gimple_omp_oacc_specifically (const_gimple stmt) > +is_gimple_omp_oacc_specifically (const_gimple stmt, > + enum omp_clause_code code = OMP_CLAUSE_ERROR) > { > gcc_assert (is_gimple_omp (stmt)); > switch (gimple_code (stmt)) > { > case GIMPLE_OACC_KERNELS: > case GIMPLE_OACC_PARALLEL: > - return true; > + switch (code) > + { > + case OMP_CLAUSE_COLLAPSE: > + case OMP_CLAUSE_PRIVATE: > + return false; > + default: > + return true; > + } > case GIMPLE_OMP_FOR: > + if (code == OMP_CLAUSE_COLLAPSE || code == OMP_CLAUSE_PRIVATE) > + return false; > switch (gimple_omp_for_kind (stmt)) > { > case GF_OMP_FOR_KIND_OACC_LOOP: Hmm, why do we need this? > --- a/gcc/omp-low.c > +++ b/gcc/omp-low.c > @@ -1534,7 +1534,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) > switch (OMP_CLAUSE_CODE (c)) > { > case OMP_CLAUSE_PRIVATE: > - gcc_assert (!is_gimple_omp_oacc_specifically (ctx->stmt)); > + gcc_assert (!is_gimple_omp_oacc_specifically (ctx->stmt, > + OMP_CLAUSE_CODE (c))); I'd say, in these "guarded" code paths, if you have confidence that they're now correct for OpenACC, that is, a clause such as OMP_CLAUSE_PRIVATE is "interpreted" correctly for OpenACC (it has the same semantics as as for OpenMP), then you should simply remove the assert completely (or, if applicable, move the case OMP_CLAUSE_PRIVATE or the surrounding cases so that OMP_CLAUSE_PRIVATE is no longer covered by the assert). For example, do it like this: case OMP_CLAUSE_NOWAIT: case OMP_CLAUSE_ORDERED: - case OMP_CLAUSE_COLLAPSE: case OMP_CLAUSE_UNTIED: case OMP_CLAUSE_MERGEABLE: case OMP_CLAUSE_PROC_BIND: case OMP_CLAUSE_SAFELEN: gcc_assert (!is_gimple_omp_oacc_specifically (ctx->stmt)); + /* FALLTHRU */ + case OMP_CLAUSE_COLLAPSE: break; With these things addressed/verified, the OMP_CLAUSE_COLLAPSE changes are good to commit, thanks! > @@ -1762,7 +1763,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) > } > } > break; > - > case OMP_CLAUSE_NOWAIT: > case OMP_CLAUSE_ORDERED: > case OMP_CLAUSE_COLLAPSE: To ease my life ;-) as a branch maintainer, please don't introduce such divergence from the GCC trunk code. > @@ -1817,13 +1818,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) > case OMP_CLAUSE_PRIVATE: > case OMP_CLAUSE_FIRSTPRIVATE: > case OMP_CLAUSE_REDUCTION: > - if (is_gimple_omp_oacc_specifically (ctx->stmt)) > - { > - sorry ("clause not supported yet"); > - break; > - } Above that block is OMP_CLAUSE_LASTPRIVATE, which should have (should get added) an assert for !OpenACC, and even though we're adding the OpenACC private, firstprivate, and reduction clauses, we're not there yet; the OpenACC private and firstprivate ones do differ from the OpenMP ones; I have a WIP patch. (And, unless I'm confused, there even is a difference in OpenACC depending on whether the private clause is attached to parallel or loop directive... Wonder how that is to work with the combined parallel loop directive?) Ilmir says you're then getting an ICE instead of this sorry message; in this case it's probably indeed better to keep the sorry message for the respective unsupported clauses. > @@ -1896,6 +1893,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) > sorry ("clause not supported yet"); > break; > } > + break; > case OMP_CLAUSE_COPYPRIVATE: > case OMP_CLAUSE_COPYIN: > case OMP_CLAUSE_DEFAULT: No need for that break, I think? So, if this helps you to make progress, I'm OK for you to commit the preliminary support for OMP_CLAUSE_PRIVATE, and I'll then revisit this clause/code in the near future, for the correct OpenACC semantics. Review of the Fortran changes I'll defer to someone who knows this code (thanks already, Ilmir!); only one small comment: > --- /dev/null > +++ b/gcc/testsuite/gfortran.dg/goacc/loop-tree.f95 > +[...] > \ No newline at end of file Please add that one. ;-) Grüße, Thomas
On Wed, Jun 4, 2014 at 2:00 AM, Cesar Philippidis <cesar@codesourcery.com> wrote: > One item on my to do list is adding support for subarrays in openacc in > fortran. So far I've got Ilmir's patch > <https://gcc.gnu.org/ml/gcc-patches/2014-05/msg01832.html> to work with > some local arrays, but not with allocatable arrays. I saw some chatter > on IRC this morning regarding array pointers and allocatable arrays. I'm > curious about how aliasing is going to be detected. Is that going to be > handled inside libgomp or by the compiler? Eg, consider a subroutine > which takes in to allocatable arrays as parameters, a and b. What > happens when a == b? We don't want to have two different copies of > whatever a and b point to on the target. Fortran does not allow aliasing of dummy arguments, so a compiler is allowed to optimize assuming aliasing does not occur. The exception is dummy arguments with the POINTER attribute, those can alias with other variables having the POINTER or TARGET attributes. So an ALLOCATABLE variable can not alias with any other variable, unless it has the TARGET attribute. > > Thanks, > Cesar
Janne Blomqvist wrote: > Fortran does not allow aliasing of dummy arguments, That's not quite true: It permits aliasing variables (also without TARGET or POINTER attribute) – but if you modify one, you may no longer access the other, unless they do have the POINTER or TARGET attribute. (See below for the formal description.) > so a compiler is allowed to optimize assuming aliasing does not occur. > The exception is dummy arguments with the POINTER attribute, those can > alias with other variables having the POINTER or TARGET attributes. So > an ALLOCATABLE variable can not alias with any other variable, unless > it has the TARGET attribute. Well, two variables with TARGET attribute are also permitted to alias. Tobias PR: Now the same as above, but using a quote from Fortran 2008: "12.5.2.13 Restrictions on entities associated with dummy arguments While an entity is associated with a dummy argument, the following restrictions hold. (1) Action that affects the allocation status of the entity or a subobject thereof shall be taken through the dummy argument. (2) If the allocation status of the entity or a subobject thereof is affected through the dummy argument, then at any time during the invocation and execution of the procedure, either before or after the allocation or deallocation, it shall be referenced only through the dummy argument. (3) Action that affects the value of the entity or any subobject of it shall be taken only through the dummy argument unless (a) the dummy argument has the POINTER attribute or (b) the dummy argument has the TARGET attribute, the dummy argument does not have INTENT(IN), the dummy argument is a scalar object or an assumed-shape array without the CONTIGUOUS attribute, and the actual argument is a target other than an array section with a vector subscript. (4) If the value of the entity or any subobject of it is affected through the dummy argument, then at any time during the invocation and execution of the procedure, either before or after the definition, it may be referenced only through that dummy argument unless (a) the dummy argument has the POINTER attribute or (b) the dummy argument has the TARGET attribute, the dummy argument does not have INTENT(IN), the dummy argument is a scalar object or an assumed-shape array without the CONTIGUOUS attribute, and the actual argument is a target other than an array section with a vector subscript."
2014-06-03 Ilmir Usmanov <i.usmanov@samsung.com> Cesar Philippidis <cesar@codesourcery.com> gcc/ * c/c-parser.c (c_parser_oacc_all_clauses): Update handling for OMP_CLAUSE_COLLAPSE and OMP_CLAUSE_PRIVATE. (c_parser_oacc_kernels): Likewise. (c_parser_omp_for_loop): Likewise. * gimple.h (is_gimple_omp_oacc_specifically): Likewise. * omp-low.c (scan_sharing_clauses): Likewise. * fortran/trans-openmp.c (gfc_trans_oacc_loop): New function. (gfc_trans_oacc_combined_directive): Call it. (gfc_trans_oacc_directive): Likewise. (gfc_trans_oacc_loop_generate_for): New helper function. (gfc_trans_oacc_loop_generate_mask_conds): Likewise. gcc/testsuite/ * c-c++-common/goacc/collapse-1.c: New test. * gfortran.dg/goacc/loop-4.f95: Likewise. * gfortran.dg/goacc/loop-tree.f95: Likewise. diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index e20348e..7b3f52c 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -11228,6 +11228,10 @@ c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask, switch (c_kind) { + case PRAGMA_OMP_CLAUSE_COLLAPSE: + clauses = c_parser_omp_clause_collapse (parser, clauses); + c_name = "collapse"; + break; case PRAGMA_OMP_CLAUSE_COPY: clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); c_name = "copy"; @@ -11648,7 +11652,7 @@ c_parser_oacc_kernels (location_t loc, c_parser *parser, char *p_name) */ #define OACC_LOOP_CLAUSE_MASK \ - (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NONE) + (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_COLLAPSE) static tree c_parser_oacc_loop (location_t loc, c_parser *parser, char *p_name) @@ -12217,8 +12221,8 @@ c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code, for (cl = clauses; cl; cl = OMP_CLAUSE_CHAIN (cl)) if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_COLLAPSE) { - gcc_assert (code != OACC_LOOP); - collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (cl)); + //gcc_assert (code != OACC_LOOP); + collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (cl)); } gcc_assert (collapse >= 1); diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c index 08f6faa..2f0d498 100644 --- a/gcc/fortran/trans-openmp.c +++ b/gcc/fortran/trans-openmp.c @@ -1856,11 +1856,237 @@ typedef struct dovar_init_d { tree init; } dovar_init; +/* Helper function to generate a single for loop. */ +static void +gfc_trans_oacc_loop_generate_for (stmtblock_t *pblock, gfc_se *se, + gfc_expr *var_expr, gfc_expr *start_expr, + gfc_expr *end_expr, gfc_expr *step_expr, + int i, tree *init, tree *cond, tree *incr, + vec<dovar_init>* inits) +{ + int simple = 0; + tree dovar, from, to, step, type, tmp, count = NULL_TREE; + + /* Evaluate all the expressions. */ + gfc_init_se (se, NULL); + gfc_conv_expr_lhs (se, var_expr); + gfc_add_block_to_block (pblock, &se->pre); + dovar = se->expr; + type = TREE_TYPE (dovar); + gcc_assert (TREE_CODE (type) == INTEGER_TYPE); + + gfc_init_se (se, NULL); + gfc_conv_expr_val (se, start_expr); + gfc_add_block_to_block (pblock, &se->pre); + from = gfc_evaluate_now (se->expr, pblock); + + gfc_init_se (se, NULL); + gfc_conv_expr_val (se, end_expr); + gfc_add_block_to_block (pblock, &se->pre); + to = gfc_evaluate_now (se->expr, pblock); + + gfc_init_se (se, NULL); + gfc_conv_expr_val (se, step_expr); + gfc_add_block_to_block (pblock, &se->pre); + step = gfc_evaluate_now (se->expr, pblock); + + /* Special case simple loops. */ + if (TREE_CODE (dovar) == VAR_DECL) + { + if (integer_onep (step)) + simple = 1; + else if (tree_int_cst_equal (step, integer_minus_one_node)) + simple = -1; + } + + /* Loop body. */ + if (simple) + { + TREE_VEC_ELT (*init, i) = build2_v (MODIFY_EXPR, dovar, from); + /* The condition should not be folded. */ + TREE_VEC_ELT (*cond, i) = build2_loc (input_location, simple > 0 + ? LE_EXPR : GE_EXPR, + boolean_type_node, dovar, to); + TREE_VEC_ELT (*incr, i) = fold_build2_loc (input_location, PLUS_EXPR, + type, dovar, step); + TREE_VEC_ELT (*incr, i) = fold_build2_loc (input_location, + MODIFY_EXPR, + type, dovar, + TREE_VEC_ELT (*incr, i)); + } + else + { + /* STEP is not 1 or -1. Use: + for (count = 0; count < (to + step - from) / step; count++) + { + dovar = from + count * step; + body; + cycle_label:; + } */ + tmp = fold_build2_loc (input_location, MINUS_EXPR, type, step, from); + tmp = fold_build2_loc (input_location, PLUS_EXPR, type, to, tmp); + tmp = fold_build2_loc (input_location, TRUNC_DIV_EXPR, type, tmp, + step); + tmp = gfc_evaluate_now (tmp, pblock); + count = gfc_create_var (type, "count"); + TREE_VEC_ELT (*init, i) = build2_v (MODIFY_EXPR, count, + build_int_cst (type, 0)); + /* The condition should not be folded. */ + TREE_VEC_ELT (*cond, i) = build2_loc (input_location, LT_EXPR, + boolean_type_node, + count, tmp); + TREE_VEC_ELT (*incr, i) = fold_build2_loc (input_location, PLUS_EXPR, + type, count, + build_int_cst (type, 1)); + TREE_VEC_ELT (*incr, i) = fold_build2_loc (input_location, + MODIFY_EXPR, type, count, + TREE_VEC_ELT (*incr, i)); + + /* Initialize DOVAR. */ + tmp = fold_build2_loc (input_location, MULT_EXPR, type, count, step); + tmp = fold_build2_loc (input_location, PLUS_EXPR, type, from, tmp); + dovar_init e = {dovar, tmp}; + inits->safe_push (e); + } +} + +/* Recursively generate conditional expressions. */ +static tree +gfc_trans_oacc_loop_generate_mask_conds (gfc_code *code, int collapse) +{ + if (collapse > 1) + return gfc_trans_oacc_loop_generate_mask_conds (code->block->next, + collapse - 1); + else + return gfc_trans_omp_code (code->block->next, true); +} + +static tree +gfc_trans_oacc_loop (gfc_code *code, stmtblock_t *pblock, + gfc_omp_clauses *loop_clauses) +{ + gfc_se se; + tree init, cond, incr, stmt, cycle_label, tmp, omp_clauses; + stmtblock_t block; + stmtblock_t body; + gfc_omp_clauses *clauses = code->ext.omp_clauses; + int i, collapse = clauses->collapse; + vec<dovar_init> inits = vNULL; + dovar_init *di; + unsigned ix; + gfc_code *old_code; + + /* DO CONCURRENT specific vars. */ + int nforloops = 0; + int current_for = 0; + + if (collapse <= 0) + collapse = 1; + + code = code->block->next; + + if (code->op == EXEC_DO_CONCURRENT) + gfc_error ("!$ACC LOOP directive is unsupported on DO CONCURRENT %L", + &code->loc); + + gcc_assert (code->op == EXEC_DO); + + if (pblock == NULL) + { + gfc_start_block (&block); + pblock = █ + } + + /* Calculate number of required for loops. */ + old_code = code; + for (i = 0; i < collapse; i++) + { + if (code->op == EXEC_DO) + nforloops++; + else + gcc_unreachable (); + code = code->block->next; + } + code = old_code; + + /* Set the number of required for loops for collapse. */ + /* FIXME: this is probably correct, but OMP_CLAUSE_COLLAPSE isn't supported + yet. */ + loop_clauses->collapse = nforloops; + + omp_clauses = gfc_trans_omp_clauses (pblock, loop_clauses, code->loc); + + init = make_tree_vec (nforloops); + cond = make_tree_vec (nforloops); + incr = make_tree_vec (nforloops); + + for (i = 0; i < collapse; i++) + { + if (code->op == EXEC_DO) + gfc_trans_oacc_loop_generate_for (pblock, &se, code->ext.iterator->var, + code->ext.iterator->start, + code->ext.iterator->end, + code->ext.iterator->step, + current_for++, &init, &cond, &incr, + &inits); + else + gcc_unreachable (); + if (i + 1 < collapse) + code = code->block->next; + } + + if (pblock != &block) + { + pushlevel (); + gfc_start_block (&block); + } + + gfc_start_block (&body); + + /* Generate complicated dovars. */ + FOR_EACH_VEC_ELT (inits, ix, di) + gfc_add_modify (&body, di->var, di->init); + inits.release (); + + /* Cycle statement is implemented with a goto. Exit statement must not be + present for this loop. */ + cycle_label = gfc_build_label_decl (NULL_TREE); + + /* Put these labels where they can be found later. */ + + code->cycle_label = cycle_label; + code->exit_label = NULL_TREE; + + /* Main loop body. */ + tmp = gfc_trans_oacc_loop_generate_mask_conds (old_code, collapse); + gfc_add_expr_to_block (&body, tmp); + + /* Label for cycle statements (if needed). */ + if (TREE_USED (cycle_label)) + { + tmp = build1_v (LABEL_EXPR, cycle_label); + gfc_add_expr_to_block (&body, tmp); + } + + /* End of loop body. */ + stmt = make_node (OACC_LOOP); + + TREE_TYPE (stmt) = void_type_node; + OMP_FOR_BODY (stmt) = gfc_finish_block (&body); + OMP_FOR_CLAUSES (stmt) = omp_clauses; + OMP_FOR_INIT (stmt) = init; + OMP_FOR_COND (stmt) = cond; + OMP_FOR_INCR (stmt) = incr; + gfc_add_expr_to_block (&block, stmt); + + return gfc_finish_block (&block); +} + /* parallel loop and kernels loop. */ static tree gfc_trans_oacc_combined_directive (gfc_code *code) { - stmtblock_t block; + stmtblock_t block, *pblock = NULL; gfc_omp_clauses construct_clauses, loop_clauses; tree stmt, oacc_clauses = NULL_TREE; enum tree_code construct_code; @@ -1899,11 +2125,21 @@ gfc_trans_oacc_combined_directive (gfc_code *code) oacc_clauses = gfc_trans_omp_clauses (&block, &construct_clauses, code->loc); } - - gfc_error ("!$ACC LOOP directive not implemented yet %L", &code->loc); - stmt = gfc_trans_omp_code (code->block->next, true); + if (!loop_clauses.seq) + pblock = █ + else + pushlevel (); + stmt = gfc_trans_oacc_loop (code, pblock, &loop_clauses); + if (TREE_CODE (stmt) != BIND_EXPR) + stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0)); + else + poplevel (0, 0); stmt = build2_loc (input_location, construct_code, void_type_node, stmt, oacc_clauses); + if (code->op == EXEC_OACC_KERNELS_LOOP) + OACC_KERNELS_COMBINED (stmt) = 1; + else + OACC_PARALLEL_COMBINED (stmt) = 1; gfc_add_expr_to_block (&block, stmt); return gfc_finish_block (&block); } @@ -2763,8 +2999,7 @@ gfc_trans_oacc_directive (gfc_code *code) case EXEC_OACC_HOST_DATA: return gfc_trans_oacc_construct (code); case EXEC_OACC_LOOP: - gfc_error ("!$ACC LOOP directive not implemented yet %L", &code->loc); - return NULL_TREE; + return gfc_trans_oacc_loop (code, NULL, code->ext.omp_clauses); case EXEC_OACC_UPDATE: case EXEC_OACC_WAIT: case EXEC_OACC_CACHE: diff --git a/gcc/gimple.h b/gcc/gimple.h index 60b4896..13486ca 100644 --- a/gcc/gimple.h +++ b/gcc/gimple.h @@ -5809,15 +5809,25 @@ is_gimple_omp (const_gimple stmt) need any special handling for OpenACC. */ static inline bool -is_gimple_omp_oacc_specifically (const_gimple stmt) +is_gimple_omp_oacc_specifically (const_gimple stmt, + enum omp_clause_code code = OMP_CLAUSE_ERROR) { gcc_assert (is_gimple_omp (stmt)); switch (gimple_code (stmt)) { case GIMPLE_OACC_KERNELS: case GIMPLE_OACC_PARALLEL: - return true; + switch (code) + { + case OMP_CLAUSE_COLLAPSE: + case OMP_CLAUSE_PRIVATE: + return false; + default: + return true; + } case GIMPLE_OMP_FOR: + if (code == OMP_CLAUSE_COLLAPSE || code == OMP_CLAUSE_PRIVATE) + return false; switch (gimple_omp_for_kind (stmt)) { case GF_OMP_FOR_KIND_OACC_LOOP: diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 3e282c0..2d53db2 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -1534,7 +1534,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) switch (OMP_CLAUSE_CODE (c)) { case OMP_CLAUSE_PRIVATE: - gcc_assert (!is_gimple_omp_oacc_specifically (ctx->stmt)); + gcc_assert (!is_gimple_omp_oacc_specifically (ctx->stmt, + OMP_CLAUSE_CODE (c))); decl = OMP_CLAUSE_DECL (c); if (OMP_CLAUSE_PRIVATE_OUTER_REF (c)) goto do_private; @@ -1762,7 +1763,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) } } break; - case OMP_CLAUSE_NOWAIT: case OMP_CLAUSE_ORDERED: case OMP_CLAUSE_COLLAPSE: @@ -1770,7 +1770,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_MERGEABLE: case OMP_CLAUSE_PROC_BIND: case OMP_CLAUSE_SAFELEN: - gcc_assert (!is_gimple_omp_oacc_specifically (ctx->stmt)); + gcc_assert (!is_gimple_omp_oacc_specifically (ctx->stmt, + OMP_CLAUSE_CODE (c))); break; case OMP_CLAUSE_ALIGNED: @@ -1817,13 +1818,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_PRIVATE: case OMP_CLAUSE_FIRSTPRIVATE: case OMP_CLAUSE_REDUCTION: - if (is_gimple_omp_oacc_specifically (ctx->stmt)) - { - sorry ("clause not supported yet"); - break; - } case OMP_CLAUSE_LINEAR: - gcc_assert (!is_gimple_omp_oacc_specifically (ctx->stmt)); + gcc_assert (!is_gimple_omp_oacc_specifically (ctx->stmt, + OMP_CLAUSE_CODE (c))); decl = OMP_CLAUSE_DECL (c); if (is_variable_sized (decl)) install_var_local (decl, ctx); @@ -1896,6 +1893,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) sorry ("clause not supported yet"); break; } + break; case OMP_CLAUSE_COPYPRIVATE: case OMP_CLAUSE_COPYIN: case OMP_CLAUSE_DEFAULT: @@ -1918,7 +1916,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE__LOOPTEMP_: case OMP_CLAUSE_TO: case OMP_CLAUSE_FROM: - gcc_assert (!is_gimple_omp_oacc_specifically (ctx->stmt)); + gcc_assert (!is_gimple_omp_oacc_specifically (ctx->stmt, + OMP_CLAUSE_CODE (c))); case OMP_CLAUSE_NUM_GANGS: case OMP_CLAUSE_NUM_WORKERS: case OMP_CLAUSE_VECTOR_LENGTH: diff --git a/gcc/testsuite/c-c++-common/goacc/collapse-1.c b/gcc/testsuite/c-c++-common/goacc/collapse-1.c new file mode 100644 index 0000000..1321301 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/collapse-1.c @@ -0,0 +1,16 @@ +void +foo (void) +{ + int i, j; +#pragma acc parallel +#pragma acc loop collapse(1) + for (i = 0; i < 10; i++) + ; + +#pragma acc parallel +#pragma acc loop collapse(2) + for (i = 0; i < 10; i++) + for (j = 0; j < 10; j++) + ; + +} diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-4.f95 b/gcc/testsuite/gfortran.dg/goacc/loop-4.f95 new file mode 100644 index 0000000..eba20af --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/loop-4.f95 @@ -0,0 +1,16 @@ +! { dg-do compile } +! { dg-additional-options "-fdump-tree-original -std=f2008" } + +PROGRAM test + IMPLICIT NONE + INTEGER :: a(64), b(64), c(64), i, j, k + ! Must be replaced by three loops. + !$acc loop + DO CONCURRENT (i=1:64, j=1:64, k=1:64, i==j .and. j==k) + a(i) = b(j) + c(k) = b(j) + END DO +END PROGRAM test +! { dg-prune-output "sorry, unimplemented: Clause not supported yet" } +! { dg-final { scan-tree-dump-times "collapse\\(3\\)" 1 "original" } } +! { dg-final { cleanup-tree-dump "original" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-tree.f95 b/gcc/testsuite/gfortran.dg/goacc/loop-tree.f95 new file mode 100644 index 0000000..ec1fb1f --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/loop-tree.f95 @@ -0,0 +1,50 @@ +! { dg-do compile } +! { dg-additional-options "-fdump-tree-original -std=f2008" } + +! test for tree-dump-original and spaces-commas + +program test + implicit none + integer :: i, j, k, m, sum + REAL :: a(64), b(64), c(64) + + !$acc kernels + !$acc loop seq collapse(2) + DO i = 1,10 + DO j = 1,10 + ENDDO + ENDDO + + !$acc loop independent gang (3) + DO i = 1,10 + !$acc loop worker(3) ! { dg-error "work-sharing region may not be closely nested inside of work-sharing, critical, ordered, master or explicit task region" } + DO j = 1,10 + !$acc loop vector(5) + DO k = 1,10 + ENDDO + ENDDO + ENDDO + !$acc end kernels + + sum = 0 + !$acc parallel + !$acc loop private(m) reduction(+:sum) + DO i = 1,10 + sum = sum + 1 + ENDDO + !$acc end parallel + +end program test +! { dg-prune-output "sorry, unimplemented: Clause not supported yet" } +! { dg-final { scan-tree-dump-times "pragma acc loop" 5 "original" } } + +! { dg-final { scan-tree-dump-times "ordered" 1 "original" } } +! { dg-final { scan-tree-dump-times "collapse\\(2\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "independent" 1 "original" } } +! { dg-final { scan-tree-dump-times "gang\\(3\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "worker\\(3\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "vector\\(5\\)" 1 "original" } } + +! { dg-final { scan-tree-dump-times "private\\(m\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "reduction\\(\\+:sum\\)" 1 "original" } } +! { dg-final { cleanup-tree-dump "original" } } \ No newline at end of file