>From 50c0eef6f0a48fa05ab5de8924376a75fb23aca6 Mon Sep 17 00:00:00 2001
From: Ilmir Usmanov <i.usmanov@samsung.com>
Date: Sat, 22 Mar 2014 18:54:17 +0400
Subject: [PATCH] Transform OpenACC loop directive to GENERIC
---
* gcc/fortran/trans-openmp.c (gfc_trans_oacc_loop): New function.
(gfc_trans_oacc_combined_directive): Call it.
(gfc_trans_oacc_directive): Likewise.
(gfc_trans_oacc_loop_generate_for): New helper function.
(gfc_trans_oacc_loop_generate_mask_conds): Likewise.
* gcc/tree-pretty-print (dump_omp_clause): Fix WORKER and VECTOR.
* gcc/omp-low.c (scan_sharing_clauses): Reject OpenACC loop clauses.
gcc/testsuite/gfortran.dg/goacc/
* loop-tree.f95: New test.
* loop-4.f95: Likewise.
@@ -1571,11 +1571,304 @@ typedef struct dovar_init_d {
tree init;
} dovar_init;
+/* Helper function to generate a single for loop. */
+static void
+gfc_trans_oacc_loop_generate_for (stmtblock_t *pblock, gfc_se *se,
+ gfc_expr *var_expr, gfc_expr *start_expr,
+ gfc_expr *end_expr, gfc_expr *step_expr,
+ int i, tree *init, tree *cond, tree *incr,
+ vec<dovar_init>* inits)
+{
+ int simple = 0;
+ tree dovar, from, to, step, type, tmp, count = NULL_TREE;
+
+ /* Evaluate all the expressions. */
+ gfc_init_se (se, NULL);
+ gfc_conv_expr_lhs (se, var_expr);
+ gfc_add_block_to_block (pblock, &se->pre);
+ dovar = se->expr;
+ type = TREE_TYPE (dovar);
+ gcc_assert (TREE_CODE (type) == INTEGER_TYPE);
+
+ gfc_init_se (se, NULL);
+ gfc_conv_expr_val (se, start_expr);
+ gfc_add_block_to_block (pblock, &se->pre);
+ from = gfc_evaluate_now (se->expr, pblock);
+
+ gfc_init_se (se, NULL);
+ gfc_conv_expr_val (se, end_expr);
+ gfc_add_block_to_block (pblock, &se->pre);
+ to = gfc_evaluate_now (se->expr, pblock);
+
+ gfc_init_se (se, NULL);
+ gfc_conv_expr_val (se, step_expr);
+ gfc_add_block_to_block (pblock, &se->pre);
+ step = gfc_evaluate_now (se->expr, pblock);
+
+ /* Special case simple loops. */
+ if (TREE_CODE (dovar) == VAR_DECL)
+ {
+ if (integer_onep (step))
+ simple = 1;
+ else if (tree_int_cst_equal (step, integer_minus_one_node))
+ simple = -1;
+ }
+
+ /* Loop body. */
+ if (simple)
+ {
+ TREE_VEC_ELT (*init, i) = build2_v (MODIFY_EXPR, dovar, from);
+ /* The condition should not be folded. */
+ TREE_VEC_ELT (*cond, i) = build2_loc (input_location, simple > 0
+ ? LE_EXPR : GE_EXPR,
+ boolean_type_node, dovar, to);
+ TREE_VEC_ELT (*incr, i) = fold_build2_loc (input_location, PLUS_EXPR,
+ type, dovar, step);
+ TREE_VEC_ELT (*incr, i) = fold_build2_loc (input_location,
+ MODIFY_EXPR,
+ type, dovar,
+ TREE_VEC_ELT (*incr, i));
+ }
+ else
+ {
+ /* STEP is not 1 or -1. Use:
+ for (count = 0; count < (to + step - from) / step; count++)
+ {
+ dovar = from + count * step;
+ body;
+ cycle_label:;
+ } */
+ tmp = fold_build2_loc (input_location, MINUS_EXPR, type, step, from);
+ tmp = fold_build2_loc (input_location, PLUS_EXPR, type, to, tmp);
+ tmp = fold_build2_loc (input_location, TRUNC_DIV_EXPR, type, tmp,
+ step);
+ tmp = gfc_evaluate_now (tmp, pblock);
+ count = gfc_create_var (type, "count");
+ TREE_VEC_ELT (*init, i) = build2_v (MODIFY_EXPR, count,
+ build_int_cst (type, 0));
+ /* The condition should not be folded. */
+ TREE_VEC_ELT (*cond, i) = build2_loc (input_location, LT_EXPR,
+ boolean_type_node,
+ count, tmp);
+ TREE_VEC_ELT (*incr, i) = fold_build2_loc (input_location, PLUS_EXPR,
+ type, count,
+ build_int_cst (type, 1));
+ TREE_VEC_ELT (*incr, i) = fold_build2_loc (input_location,
+ MODIFY_EXPR, type, count,
+ TREE_VEC_ELT (*incr, i));
+
+ /* Initialize DOVAR. */
+ tmp = fold_build2_loc (input_location, MULT_EXPR, type, count, step);
+ tmp = fold_build2_loc (input_location, PLUS_EXPR, type, from, tmp);
+ dovar_init e = {dovar, tmp};
+ inits->safe_push (e);
+ }
+}
+
+/* Recursively generate conditional expressions. */
+static tree
+gfc_trans_oacc_loop_generate_mask_conds (gfc_code *code, int collapse)
+{
+ if (code->op == EXEC_DO_CONCURRENT && code->expr1)
+ {
+ gfc_se if_se;
+ locus saved_loc;
+ location_t loc;
+ tree stmt, body;
+
+ /* Initialize a statement builder for each block. Puts in NULL_TREEs. */
+ gfc_init_se (&if_se, NULL);
+ gfc_start_block (&if_se.pre);
+
+ /* Calculate the IF condition expression. */
+ if (code->expr1->where.lb)
+ {
+ gfc_save_backend_locus (&saved_loc);
+ gfc_set_backend_locus (&code->expr1->where);
+ }
+ gfc_conv_expr_val (&if_se, code->expr1);
+ if (code->expr1->where.lb)
+ gfc_restore_backend_locus (&saved_loc);
+
+ /* Generate or translate body. */
+ if (collapse > 1)
+ body = gfc_trans_oacc_loop_generate_mask_conds (code->block->next,
+ collapse - 1);
+ else
+ body = gfc_trans_omp_code (code->block->next, true);
+
+ /* Generate conditional expression. */
+ loc = code->expr1->where.lb ? code->expr1->where.lb->location
+ : input_location;
+ stmt = fold_build3_loc (loc, COND_EXPR, void_type_node, if_se.expr, body,
+ build_empty_stmt (input_location));
+
+ gfc_add_expr_to_block (&if_se.pre, stmt);
+ return gfc_finish_block (&if_se.pre);
+ }
+ else if (collapse > 1)
+ return gfc_trans_oacc_loop_generate_mask_conds (code->block->next,
+ collapse - 1);
+ else
+ return gfc_trans_omp_code (code->block->next, true);
+}
+
+/* Unlike OpenMP's one, OpenACC implementation supports DO CONCURRENT loops.
+ For each dovar in DO CONCURRENT loop it generates single for loop.
+ All generated for loops must be perfectly nested (and collapsed later).
+ Hence, unlike gfc_trans_do_concurrent, one need to generate mask checks
+ inside of the deepest for loop.
+
+ For example, if we have loop like
+
+ !$ACC LOOP
+ DO CONCURRENT (i=1:64:2,j=1:64:2,k=1:64:2,i==j.and.j==k)
+ body
+ ENDDO
+
+ The result must be like
+
+ #pragma acc loop collapse(3)
+ for(count.0=0; count.0<32; count.0=count.0+1)
+ for(count.1=0; count.1<32; count.1=count.1+1)
+ for(count.2=0; count.2<32; count.2=count.2+1)
+ {
+ i = count.0 * 2 + 1;
+ j = count.1 * 2 + 1;
+ k = count.2 * 2 + 1;
+ if (i==j && j==k)
+ body;
+ cycle_label:;
+ }
+ */
+static tree
+gfc_trans_oacc_loop (gfc_code *code, stmtblock_t *pblock,
+ gfc_omp_clauses *loop_clauses)
+{
+ gfc_se se;
+ tree init, cond, incr, stmt, cycle_label, tmp, omp_clauses;
+ stmtblock_t block;
+ stmtblock_t body;
+ gfc_omp_clauses *clauses = code->ext.omp_clauses;
+ int i, collapse = clauses->collapse;
+ vec<dovar_init> inits = vNULL;
+ dovar_init *di;
+ unsigned ix;
+ gfc_code *old_code;
+
+ /* DO CONCURRENT specific vars. */
+ gfc_forall_iterator *fai;
+ int nforloops = 0;
+ int current_for = 0;
+
+ if (collapse <= 0)
+ collapse = 1;
+
+ code = code->block->next;
+ gcc_assert (code->op == EXEC_DO || code->op == EXEC_DO_CONCURRENT);
+
+ if (pblock == NULL)
+ {
+ gfc_start_block (&block);
+ pblock = █
+ }
+
+ /* Calculate number of required for loops. */
+ old_code = code;
+ for (i = 0; i < collapse; i++)
+ {
+ if (code->op == EXEC_DO)
+ nforloops++;
+ else if (code->op == EXEC_DO_CONCURRENT)
+ for (fai = code->ext.forall_iterator; fai; fai = fai->next)
+ nforloops++;
+ else
+ gcc_unreachable ();
+ code = code->block->next;
+ }
+ code = old_code;
+
+ /* Set the number of required for loops for collapse. */
+ loop_clauses->collapse = nforloops;
+
+ omp_clauses = gfc_trans_omp_clauses (pblock, loop_clauses, code->loc);
+
+ init = make_tree_vec (nforloops);
+ cond = make_tree_vec (nforloops);
+ incr = make_tree_vec (nforloops);
+
+ for (i = 0; i < collapse; i++)
+ {
+ if (code->op == EXEC_DO)
+ gfc_trans_oacc_loop_generate_for (pblock, &se, code->ext.iterator->var,
+ code->ext.iterator->start,
+ code->ext.iterator->end,
+ code->ext.iterator->step,
+ current_for++, &init, &cond, &incr,
+ &inits);
+ else if (code->op == EXEC_DO_CONCURRENT)
+ for (fai = code->ext.forall_iterator; fai; fai = fai->next)
+ gfc_trans_oacc_loop_generate_for (pblock, &se, fai->var, fai->start,
+ fai->end, fai->stride, current_for++,
+ &init, &cond, &incr, &inits);
+ else
+ gcc_unreachable ();
+ if (i + 1 < collapse)
+ code = code->block->next;
+ }
+
+ if (pblock != &block)
+ {
+ pushlevel ();
+ gfc_start_block (&block);
+ }
+
+ gfc_start_block (&body);
+
+ /* Generate complicated dovars. */
+ FOR_EACH_VEC_ELT (inits, ix, di)
+ gfc_add_modify (&body, di->var, di->init);
+ inits.release ();
+
+ /* Cycle statement is implemented with a goto. Exit statement must not be
+ present for this loop. */
+ cycle_label = gfc_build_label_decl (NULL_TREE);
+
+ /* Put these labels where they can be found later. */
+
+ code->cycle_label = cycle_label;
+ code->exit_label = NULL_TREE;
+
+ /* Main loop body. */
+ tmp = gfc_trans_oacc_loop_generate_mask_conds (old_code, collapse);
+ gfc_add_expr_to_block (&body, tmp);
+
+ /* Label for cycle statements (if needed). */
+ if (TREE_USED (cycle_label))
+ {
+ tmp = build1_v (LABEL_EXPR, cycle_label);
+ gfc_add_expr_to_block (&body, tmp);
+ }
+
+ /* End of loop body. */
+ stmt = make_node (OACC_LOOP);
+
+ TREE_TYPE (stmt) = void_type_node;
+ OMP_FOR_BODY (stmt) = gfc_finish_block (&body);
+ OMP_FOR_CLAUSES (stmt) = omp_clauses;
+ OMP_FOR_INIT (stmt) = init;
+ OMP_FOR_COND (stmt) = cond;
+ OMP_FOR_INCR (stmt) = incr;
+ gfc_add_expr_to_block (&block, stmt);
+
+ return gfc_finish_block (&block);
+}
+
/* parallel loop and kernels loop. */
static tree
gfc_trans_oacc_combined_directive (gfc_code *code)
{
- stmtblock_t block;
+ stmtblock_t block, *pblock = NULL;
gfc_omp_clauses construct_clauses, loop_clauses;
tree stmt, oacc_clauses = NULL_TREE;
enum tree_code construct_code;
@@ -1614,11 +1907,21 @@ gfc_trans_oacc_combined_directive (gfc_code *code)
oacc_clauses = gfc_trans_omp_clauses (&block, &construct_clauses,
code->loc);
}
-
- gfc_error ("!$ACC LOOP directive not implemented yet %L", &code->loc);
- stmt = gfc_trans_omp_code (code->block->next, true);
+ if (!loop_clauses.seq)
+ pblock = █
+ else
+ pushlevel ();
+ stmt = gfc_trans_oacc_loop (code, pblock, &loop_clauses);
+ if (TREE_CODE (stmt) != BIND_EXPR)
+ stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0));
+ else
+ poplevel (0, 0);
stmt = build2_loc (input_location, construct_code, void_type_node, stmt,
oacc_clauses);
+ if (code->op == EXEC_OACC_KERNELS_LOOP)
+ OACC_KERNELS_COMBINED (stmt) = 1;
+ else
+ OACC_PARALLEL_COMBINED (stmt) = 1;
gfc_add_expr_to_block (&block, stmt);
return gfc_finish_block (&block);
}
@@ -2258,8 +2561,7 @@ gfc_trans_oacc_directive (gfc_code *code)
case EXEC_OACC_HOST_DATA:
return gfc_trans_oacc_construct (code);
case EXEC_OACC_LOOP:
- gfc_error ("!$ACC LOOP directive not implemented yet %L", &code->loc);
- return NULL_TREE;
+ return gfc_trans_oacc_loop (code, NULL, code->ext.omp_clauses);
case EXEC_OACC_UPDATE:
case EXEC_OACC_WAIT:
case EXEC_OACC_CACHE:
@@ -1557,7 +1557,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
case OMP_CLAUSE_REDUCTION:
if (is_gimple_omp_oacc_specifically (ctx->stmt))
{
- sorry ("clause not supported yet");
+ sorry ("Clause not supported yet");
break;
}
case OMP_CLAUSE_LINEAR:
@@ -1613,7 +1613,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
case OMP_CLAUSE_IF:
if (is_gimple_omp_oacc_specifically (ctx->stmt))
{
- sorry ("clause not supported yet");
+ sorry ("Clause not supported yet");
break;
}
case OMP_CLAUSE_FINAL:
@@ -1739,9 +1739,14 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
}
break;
- case OMP_CLAUSE_NOWAIT:
case OMP_CLAUSE_ORDERED:
case OMP_CLAUSE_COLLAPSE:
+ if (is_gimple_omp_oacc_specifically (ctx->stmt))
+ {
+ sorry ("Clause not supported yet");
+ break;
+ }
+ case OMP_CLAUSE_NOWAIT:
case OMP_CLAUSE_UNTIED:
case OMP_CLAUSE_MERGEABLE:
case OMP_CLAUSE_PROC_BIND:
@@ -1795,7 +1800,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
case OMP_CLAUSE_REDUCTION:
if (is_gimple_omp_oacc_specifically (ctx->stmt))
{
- sorry ("clause not supported yet");
+ sorry ("Clause not supported yet");
break;
}
case OMP_CLAUSE_LINEAR:
@@ -1864,9 +1869,11 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
break;
case OMP_CLAUSE_IF:
+ case OMP_CLAUSE_ORDERED:
+ case OMP_CLAUSE_COLLAPSE:
if (is_gimple_omp_oacc_specifically (ctx->stmt))
{
- sorry ("clause not supported yet");
+ sorry ("Clause not supported yet");
break;
}
case OMP_CLAUSE_COPYPRIVATE:
@@ -1879,8 +1886,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
case OMP_CLAUSE_SCHEDULE:
case OMP_CLAUSE_DIST_SCHEDULE:
case OMP_CLAUSE_NOWAIT:
- case OMP_CLAUSE_ORDERED:
- case OMP_CLAUSE_COLLAPSE:
case OMP_CLAUSE_UNTIED:
case OMP_CLAUSE_FINAL:
case OMP_CLAUSE_MERGEABLE:
new file mode 100644
@@ -0,0 +1,16 @@
+! { dg-do compile }
+! { dg-additional-options "-fdump-tree-original -std=f2008" }
+
+PROGRAM test
+ IMPLICIT NONE
+ INTEGER :: a(64), b(64), c(64), i, j, k
+ ! Must be replaced by three loops.
+ !$acc loop
+ DO CONCURRENT (i=1:64, j=1:64, k=1:64, i==j .and. j==k)
+ a(i) = b(j)
+ c(k) = b(j)
+ END DO
+END PROGRAM test
+! { dg-prune-output "sorry, unimplemented: Clause not supported yet" }
+! { dg-final { scan-tree-dump-times "collapse\\(3\\)" 1 "original" } }
+! { dg-final { cleanup-tree-dump "original" } }
new file mode 100644
@@ -0,0 +1,50 @@
+! { dg-do compile }
+! { dg-additional-options "-fdump-tree-original -std=f2008" }
+
+! test for tree-dump-original and spaces-commas
+
+program test
+ implicit none
+ integer :: i, j, k, m, sum
+ REAL :: a(64), b(64), c(64)
+
+ !$acc kernels
+ !$acc loop seq collapse(2)
+ DO i = 1,10
+ DO j = 1,10
+ ENDDO
+ ENDDO
+
+ !$acc loop independent gang (3)
+ DO i = 1,10
+ !$acc loop worker(3) ! { dg-error "work-sharing region may not be closely nested inside of work-sharing, critical, ordered, master or explicit task region" }
+ DO j = 1,10
+ !$acc loop vector(5)
+ DO k = 1,10
+ ENDDO
+ ENDDO
+ ENDDO
+ !$acc end kernels
+
+ sum = 0
+ !$acc parallel
+ !$acc loop private(m) reduction(+:sum)
+ DO i = 1,10
+ sum = sum + 1
+ ENDDO
+ !$acc end parallel
+
+end program test
+! { dg-prune-output "sorry, unimplemented: Clause not supported yet" }
+! { dg-final { scan-tree-dump-times "pragma acc loop" 5 "original" } }
+
+! { dg-final { scan-tree-dump-times "ordered" 1 "original" } }
+! { dg-final { scan-tree-dump-times "collapse\\(2\\)" 1 "original" } }
+! { dg-final { scan-tree-dump-times "independent" 1 "original" } }
+! { dg-final { scan-tree-dump-times "gang\\(3\\)" 1 "original" } }
+! { dg-final { scan-tree-dump-times "worker\\(3\\)" 1 "original" } }
+! { dg-final { scan-tree-dump-times "vector\\(5\\)" 1 "original" } }
+
+! { dg-final { scan-tree-dump-times "private\\(m\\)" 1 "original" } }
+! { dg-final { scan-tree-dump-times "reduction\\(\\+:sum\\)" 1 "original" } }
+! { dg-final { cleanup-tree-dump "original" } }
\ No newline at end of file
@@ -674,13 +674,15 @@ dump_omp_clause (pretty_printer *buffer, tree clause, int spc, int flags)
case OMP_CLAUSE_WORKER:
pp_string (buffer, "worker(");
- dump_generic_node (buffer, OMP_CLAUSE_DECL (clause), spc, flags, false);
+ dump_generic_node (buffer, OMP_CLAUSE_WORKER_EXPR (clause), spc, flags,
+ false);
pp_character(buffer, ')');
break;
case OMP_CLAUSE_VECTOR:
pp_string (buffer, "vector(");
- dump_generic_node (buffer, OMP_CLAUSE_DECL (clause), spc, flags, false);
+ dump_generic_node (buffer, OMP_CLAUSE_VECTOR_EXPR (clause), spc, flags,
+ false);
pp_character(buffer, ')');
break;
--
1.8.3.2