diff mbox

[Fortran,gomp4] : Transform OpenACC loop directive

Message ID 53299ABC.6010407@samsung.com
State New
Headers show

Commit Message

Ilmir Usmanov March 19, 2014, 1:25 p.m. UTC
Hi Tobias!

This patch implements transformation of OpenACC loop directive from 
Fortran AST to GENERIC.

Successfully bootstrapped and tested with no new regressions on 
x86_64-unknown-linux-gnu.

OK for gomp4 branch?

Comments

Tobias Burnus March 19, 2014, 8:16 p.m. UTC | #1
Hi Illmir,

Ilmir Usmanov:
> This patch implements transformation of OpenACC loop directive from 
> Fortran AST to GENERIC.

If I followed correctly, with this patch the Fortran FE implementation 
of OpenACC is complete, except for:

* !$acc cache() - parsing supported, but then aborting with a 
not-implemented error
* OpenACC 2.0a additions.

Am I right?

> Successfully bootstrapped and tested with no new regressions on 
> x86_64-unknown-linux-gnu.
> OK for gomp4 branch?

I leave the review of gcc/tree-pretty-print.c part (looks good to me) to 
Thomas, who might have also a comment to the Fortran part.

For a DO loop, the code looks okay.


For DO CONCURRENT, it is not. I think we should really consider to 
reject DO CONCURRENT with a "not permitted"; it is currently not 
explicitly supported by OpenACC; I think we can still worry about it, 
when it will be explicitly added to OpenACC. Otherwise, see 
gfc_trans_do_concurrent for how to handle the do concurrent loops.

Issues with DO CONCURRENT:

* You use "code->ext.iterator->var" - that's fine with DO but not with 
DO CONCURRENT, which uses a "code->ext.forall_iterator"

* Do concurrent also handles multiple variables in a single statement, 
such as:

integer :: i, j, b(3,5)
DO CONCURRENT(i=1:3, j=1:5:2)
   b(i, j) = -42
END DO
end

* And do concurrent also supports masks:

logical :: my_mask(3)
integer :: i, b(3)
b(i) = [5, 5, 2]
my_mask = [.true., .false., .true.]
do concurrent (i=1:3, b(i) == 5 .and. my_mask(i))
   b(i) = -42
end do
end

Tobias
diff mbox

Patch

From de2dd5ba0c48500e8e9084bd46cbfac2f21352fe Mon Sep 17 00:00:00 2001
From: Ilmir Usmanov <i.usmanov@samsung.com>
Date: Wed, 19 Mar 2014 15:12:36 +0400
Subject: [PATCH] Transform OpenACC loop directive from fortran AST to GENERIC

---
	* gcc/fortran/trans-openmp.c (gfc_trans_oacc_loop): New function.
	(gfc_trans_oacc_combined_directive): Call it.
	(gfc_trans_oacc_directive): Likewise.
	* gcc/tree-pretty-print (dump_omp_clause): Fix WORKER and VECTOR.
	* gcc/testsuite/gfortran.dg/goacc/loop-tree.f95: New test.

diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index 29364f4..cb7c970 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -1571,11 +1571,181 @@  typedef struct dovar_init_d {
   tree init;
 } dovar_init;
 
+
+static tree
+gfc_trans_oacc_loop (gfc_code *code, stmtblock_t *pblock,
+		     gfc_omp_clauses *loop_clauses)
+{
+  gfc_se se;
+  tree dovar, stmt, from, to, step, type, init, cond, incr;
+  tree count = NULL_TREE, cycle_label, tmp, omp_clauses;
+  stmtblock_t block;
+  stmtblock_t body;
+  gfc_omp_clauses *clauses = code->ext.omp_clauses;
+  int i, collapse = clauses->collapse;
+  vec<dovar_init> inits = vNULL;
+  dovar_init *di;
+  unsigned ix;
+
+  if (collapse <= 0)
+    collapse = 1;
+
+  code = code->block->next;
+  gcc_assert (code->op == EXEC_DO || code->op == EXEC_DO_CONCURRENT);
+
+  init = make_tree_vec (collapse);
+  cond = make_tree_vec (collapse);
+  incr = make_tree_vec (collapse);
+
+  if (pblock == NULL)
+    {
+      gfc_start_block (&block);
+      pblock = &block;
+    }
+
+  omp_clauses = gfc_trans_omp_clauses (pblock, loop_clauses, code->loc);
+
+  for (i = 0; i < collapse; i++)
+    {
+      int simple = 0;
+
+      /* Evaluate all the expressions in the iterator.  */
+      gfc_init_se (&se, NULL);
+      gfc_conv_expr_lhs (&se, code->ext.iterator->var);
+      gfc_add_block_to_block (pblock, &se.pre);
+      dovar = se.expr;
+      type = TREE_TYPE (dovar);
+      gcc_assert (TREE_CODE (type) == INTEGER_TYPE);
+
+      gfc_init_se (&se, NULL);
+      gfc_conv_expr_val (&se, code->ext.iterator->start);
+      gfc_add_block_to_block (pblock, &se.pre);
+      from = gfc_evaluate_now (se.expr, pblock);
+
+      gfc_init_se (&se, NULL);
+      gfc_conv_expr_val (&se, code->ext.iterator->end);
+      gfc_add_block_to_block (pblock, &se.pre);
+      to = gfc_evaluate_now (se.expr, pblock);
+
+      gfc_init_se (&se, NULL);
+      gfc_conv_expr_val (&se, code->ext.iterator->step);
+      gfc_add_block_to_block (pblock, &se.pre);
+      step = gfc_evaluate_now (se.expr, pblock);
+
+      /* Special case simple loops.  */
+      if (TREE_CODE (dovar) == VAR_DECL)
+	{
+	  if (integer_onep (step))
+	    simple = 1;
+	  else if (tree_int_cst_equal (step, integer_minus_one_node))
+	    simple = -1;
+	}
+
+      /* Loop body.  */
+      if (simple)
+	{
+	  TREE_VEC_ELT (init, i) = build2_v (MODIFY_EXPR, dovar, from);
+	  /* The condition should not be folded.  */
+	  TREE_VEC_ELT (cond, i) = build2_loc (input_location, simple > 0
+					       ? LE_EXPR : GE_EXPR,
+					       boolean_type_node, dovar, to);
+	  TREE_VEC_ELT (incr, i) = fold_build2_loc (input_location, PLUS_EXPR,
+						    type, dovar, step);
+	  TREE_VEC_ELT (incr, i) = fold_build2_loc (input_location,
+						    MODIFY_EXPR,
+						    type, dovar,
+						    TREE_VEC_ELT (incr, i));
+	}
+      else
+	{
+	  /* STEP is not 1 or -1.  Use:
+	     for (count = 0; count < (to + step - from) / step; count++)
+	       {
+		 dovar = from + count * step;
+		 body;
+	       cycle_label:;
+	       }  */
+	  tmp = fold_build2_loc (input_location, MINUS_EXPR, type, step, from);
+	  tmp = fold_build2_loc (input_location, PLUS_EXPR, type, to, tmp);
+	  tmp = fold_build2_loc (input_location, TRUNC_DIV_EXPR, type, tmp,
+				 step);
+	  tmp = gfc_evaluate_now (tmp, pblock);
+	  count = gfc_create_var (type, "count");
+	  TREE_VEC_ELT (init, i) = build2_v (MODIFY_EXPR, count,
+					     build_int_cst (type, 0));
+	  /* The condition should not be folded.  */
+	  TREE_VEC_ELT (cond, i) = build2_loc (input_location, LT_EXPR,
+					       boolean_type_node,
+					       count, tmp);
+	  TREE_VEC_ELT (incr, i) = fold_build2_loc (input_location, PLUS_EXPR,
+						    type, count,
+						    build_int_cst (type, 1));
+	  TREE_VEC_ELT (incr, i) = fold_build2_loc (input_location,
+						    MODIFY_EXPR, type, count,
+						    TREE_VEC_ELT (incr, i));
+
+	  /* Initialize DOVAR.  */
+	  tmp = fold_build2_loc (input_location, MULT_EXPR, type, count, step);
+	  tmp = fold_build2_loc (input_location, PLUS_EXPR, type, from, tmp);
+	  dovar_init e = {dovar, tmp};
+	  inits.safe_push (e);
+	}
+
+      if (i + 1 < collapse)
+	code = code->block->next;
+    }
+
+  if (pblock != &block)
+    {
+      pushlevel ();
+      gfc_start_block (&block);
+    }
+
+  gfc_start_block (&body);
+
+  FOR_EACH_VEC_ELT (inits, ix, di)
+    gfc_add_modify (&body, di->var, di->init);
+  inits.release ();
+
+  /* Cycle statement is implemented with a goto.  Exit statement must not be
+     present for this loop.  */
+  cycle_label = gfc_build_label_decl (NULL_TREE);
+
+  /* Put these labels where they can be found later.  */
+
+  code->cycle_label = cycle_label;
+  code->exit_label = NULL_TREE;
+
+  /* Main loop body.  */
+  tmp = gfc_trans_omp_code (code->block->next, true);
+  gfc_add_expr_to_block (&body, tmp);
+
+  /* Label for cycle statements (if needed).  */
+  if (TREE_USED (cycle_label))
+    {
+      tmp = build1_v (LABEL_EXPR, cycle_label);
+      gfc_add_expr_to_block (&body, tmp);
+    }
+
+  /* End of loop body.  */
+  stmt = make_node (OACC_LOOP);
+
+  TREE_TYPE (stmt) = void_type_node;
+  OMP_FOR_BODY (stmt) = gfc_finish_block (&body);
+  OMP_FOR_CLAUSES (stmt) = omp_clauses;
+  OMP_FOR_INIT (stmt) = init;
+  OMP_FOR_COND (stmt) = cond;
+  OMP_FOR_INCR (stmt) = incr;
+  gfc_add_expr_to_block (&block, stmt);
+
+  return gfc_finish_block (&block);
+}
+
 /* parallel loop and kernels loop. */
 static tree
 gfc_trans_oacc_combined_directive (gfc_code *code)
 {
-  stmtblock_t block;
+  stmtblock_t block, *pblock = NULL;
   gfc_omp_clauses construct_clauses, loop_clauses;
   tree stmt, oacc_clauses = NULL_TREE;
   enum tree_code construct_code;
@@ -1614,11 +1784,21 @@  gfc_trans_oacc_combined_directive (gfc_code *code)
       oacc_clauses = gfc_trans_omp_clauses (&block, &construct_clauses,
 					    code->loc);
     }
-    
-  gfc_error ("!$ACC LOOP directive not implemented yet %L", &code->loc);
-  stmt = gfc_trans_omp_code (code->block->next, true);
+  if (!loop_clauses.seq)
+    pblock = &block;
+  else
+    pushlevel ();
+  stmt = gfc_trans_oacc_loop (code, pblock, &loop_clauses);
+  if (TREE_CODE (stmt) != BIND_EXPR)
+    stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0));
+  else
+    poplevel (0, 0);
   stmt = build2_loc (input_location, construct_code, void_type_node, stmt,
 		     oacc_clauses);
+  if (code->op == EXEC_OACC_KERNELS_LOOP)
+    OACC_KERNELS_COMBINED (stmt) = 1;
+  else
+    OACC_PARALLEL_COMBINED (stmt) = 1;
   gfc_add_expr_to_block (&block, stmt);
   return gfc_finish_block (&block);
 }
@@ -2258,8 +2438,7 @@  gfc_trans_oacc_directive (gfc_code *code)
     case EXEC_OACC_HOST_DATA:
       return gfc_trans_oacc_construct (code);
     case EXEC_OACC_LOOP:
-      gfc_error ("!$ACC LOOP directive not implemented yet %L", &code->loc);
-      return NULL_TREE;
+      return gfc_trans_oacc_loop (code, NULL, code->ext.omp_clauses);
     case EXEC_OACC_UPDATE:
     case EXEC_OACC_WAIT:
     case EXEC_OACC_CACHE:
diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-tree.f95 b/gcc/testsuite/gfortran.dg/goacc/loop-tree.f95
new file mode 100644
index 0000000..ed0edce
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/loop-tree.f95
@@ -0,0 +1,49 @@ 
+! { dg-do compile } 
+! { dg-additional-options "-fdump-tree-original" } 
+
+! test for tree-dump-original and spaces-commas
+
+program test
+  implicit none
+  integer :: i, j, k, m, sum
+
+  !$acc kernels 
+  !$acc loop seq collapse(2)
+  DO i = 1,10
+    DO j = 1,10
+    ENDDO
+  ENDDO
+
+  !$acc loop independent gang (3)
+  DO i = 1,10
+    !$acc loop worker(3)
+    DO j = 1,10
+      !$acc loop vector(5)
+      DO k = 1,10
+      ENDDO
+    ENDDO
+  ENDDO
+  !$acc end kernels
+
+  sum = 0
+  !$acc parallel
+  !$acc loop private(m) reduction(+:sum)
+  DO i = 1,10
+    sum = sum + 1
+  ENDDO
+  !$acc end parallel
+
+end program test
+! { dg-excess-errors "unimplemented" }
+! { dg-final { scan-tree-dump-times "pragma acc loop" 5 "original" } } 
+
+! { dg-final { scan-tree-dump-times "ordered" 1 "original" } }
+! { dg-final { scan-tree-dump-times "collapse\\(2\\)" 1 "original" } } 
+! { dg-final { scan-tree-dump-times "independent" 1 "original" } } 
+! { dg-final { scan-tree-dump-times "gang\\(3\\)" 1 "original" } } 
+! { dg-final { scan-tree-dump-times "worker\\(3\\)" 1 "original" } } 
+! { dg-final { scan-tree-dump-times "vector\\(5\\)" 1 "original" } } 
+
+! { dg-final { scan-tree-dump-times "private\\(m\\)" 1 "original" } } 
+! { dg-final { scan-tree-dump-times "reduction\\(\\+:sum\\)" 1 "original" } } 
+! { dg-final { cleanup-tree-dump "original" } } 
\ No newline at end of file
diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c
index 49e5f6c..d30b3c2 100644
--- a/gcc/tree-pretty-print.c
+++ b/gcc/tree-pretty-print.c
@@ -674,13 +674,15 @@  dump_omp_clause (pretty_printer *buffer, tree clause, int spc, int flags)
 
     case OMP_CLAUSE_WORKER:
       pp_string (buffer, "worker(");
-      dump_generic_node (buffer, OMP_CLAUSE_DECL (clause), spc, flags, false);
+      dump_generic_node (buffer, OMP_CLAUSE_WORKER_EXPR (clause), spc, flags, 
+			 false);
       pp_character(buffer, ')');
       break;
 
     case OMP_CLAUSE_VECTOR:
       pp_string (buffer, "vector(");
-      dump_generic_node (buffer, OMP_CLAUSE_DECL (clause), spc, flags, false);
+      dump_generic_node (buffer, OMP_CLAUSE_VECTOR_EXPR (clause), spc, flags,
+			 false);
       pp_character(buffer, ')');
       break;
 
-- 
1.8.3.2