diff mbox

[gomp4] arrays inside modules

Message ID 5695AD6B.5060802@codesourcery.com
State New
Headers show

Commit Message

Cesar Philippidis Jan. 13, 2016, 1:50 a.m. UTC
I've applied this patch to gomp-4_0-branch which fixes an ICE when an
array declared inside a module is used inside an offloaded acc region.
Bad things happen when you try to use sym->backend_decl when it wasn't
defined.

This patch was part of an optimization that I implemented in gomp4 in an
attempt to move all of the non-array descriptor array variables into the
offloaded region. Applications, such as cloverleaf, sometimes have a lot
of small offloaded regions, and treating those supplementary array
variables as firstprivate caused a measurable I/O overhead.

Cesar
diff mbox

Patch

2015-10-13  Cesar Philippidis  <cesar@codesourcery.com>

	gcc/fortran/
	* trans-array.c (gfc_trans_array_bounds): Add an INIT_VLA argument
	to control whether VLAs should be initialized.  Don't mark this
	function as static.
	(gfc_trans_auto_array_allocation): Update call to
	gfc_trans_array_bounds.
	(gfc_trans_g77_array): Likewise.
	* trans-array.h: Declare gfc_trans_array_bounds.
	* trans-openmp.c (gfc_scan_nodesc_arrays): New function.
	(gfc_privatize_nodesc_arrays_1): New function.
	(gfc_privatize_nodesc_arrays): New function.
	(gfc_init_nodesc_arrays): New function.
	(gfc_trans_oacc_construct): Initialize any internal variables for
	arrays without array descriptors inside the offloaded parallel and
	kernels region.
	(gfc_trans_oacc_combined_directive): Likewise.

	gcc/testsuite/
	* gfortran.dg/goacc/default_none.f95: New test.

diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c
index a6b761b..86f983a 100644
--- a/gcc/fortran/trans-array.c
+++ b/gcc/fortran/trans-array.c
@@ -5709,9 +5709,9 @@  gfc_trans_array_cobounds (tree type, stmtblock_t * pblock,
 /* Generate code to evaluate non-constant array bounds.  Sets *poffset and
    returns the size (in elements) of the array.  */
 
-static tree
+tree
 gfc_trans_array_bounds (tree type, gfc_symbol * sym, tree * poffset,
-                        stmtblock_t * pblock)
+                        stmtblock_t * pblock, bool init_vla)
 {
   gfc_array_spec *as;
   tree size;
@@ -5788,7 +5788,9 @@  gfc_trans_array_bounds (tree type, gfc_symbol * sym, tree * poffset,
     }
 
   gfc_trans_array_cobounds (type, pblock, sym);
-  gfc_trans_vla_type_sizes (sym, pblock);
+
+  if (init_vla)
+    gfc_trans_vla_type_sizes (sym, pblock);
 
   *poffset = offset;
   return size;
@@ -5852,7 +5854,7 @@  gfc_trans_auto_array_allocation (tree decl, gfc_symbol * sym,
       && !INTEGER_CST_P (sym->ts.u.cl->backend_decl))
     gfc_conv_string_length (sym->ts.u.cl, NULL, &init);
 
-  size = gfc_trans_array_bounds (type, sym, &offset, &init);
+  size = gfc_trans_array_bounds (type, sym, &offset, &init, true);
 
   /* Don't actually allocate space for Cray Pointees.  */
   if (sym->attr.cray_pointee)
@@ -5947,7 +5949,7 @@  gfc_trans_g77_array (gfc_symbol * sym, gfc_wrapped_block * block)
     gfc_conv_string_length (sym->ts.u.cl, NULL, &init);
 
   /* Evaluate the bounds of the array.  */
-  gfc_trans_array_bounds (type, sym, &offset, &init);
+  gfc_trans_array_bounds (type, sym, &offset, &init, true);
 
   /* Set the offset.  */
   if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL)
diff --git a/gcc/fortran/trans-array.h b/gcc/fortran/trans-array.h
index 52f1c9a..8dbafb9 100644
--- a/gcc/fortran/trans-array.h
+++ b/gcc/fortran/trans-array.h
@@ -44,6 +44,8 @@  void gfc_trans_g77_array (gfc_symbol *, gfc_wrapped_block *);
 /* Generate code to deallocate an array, if it is allocated.  */
 tree gfc_trans_dealloc_allocated (tree, bool, gfc_expr *);
 
+tree gfc_trans_array_bounds (tree, gfc_symbol *, tree *, stmtblock_t *, bool);
+
 tree gfc_full_array_size (stmtblock_t *, tree, int);
 
 tree gfc_duplicate_allocatable (tree, tree, tree, int, tree);
diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index 8c1e897..f2e9803 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -39,6 +39,8 @@  along with GCC; see the file COPYING3.  If not see
 #include "arith.h"
 #include "omp-low.h"
 #include "gomp-constants.h"
+#include "hash-set.h"
+#include "tree-iterator.h"
 
 int ompws_flags;
 
@@ -2716,22 +2718,157 @@  gfc_trans_omp_code (gfc_code *code, bool force_empty)
   return stmt;
 }
 
+void gfc_debug_expr (gfc_expr *);
+
+/* Add any array that does not have an array descriptor to the hash_set
+   pointed to by DATA.  */
+
+static int
+gfc_scan_nodesc_arrays (gfc_expr **e, int *walk_subtrees ATTRIBUTE_UNUSED,
+		void *data)
+{
+  hash_set<gfc_symbol *> *arrays = (hash_set<gfc_symbol *> *)data;
+
+  if ((*e)->expr_type == EXPR_VARIABLE)
+    {
+      gfc_symbol *sym = (*e)->symtree->n.sym;
+
+      if (sym->attr.dimension && gfc_is_nodesc_array (sym))
+	arrays->add (sym);
+    }
+
+  return 0;
+}
+
+/* Build a set of internal array variables (lbound, ubound, stride, etc.)
+   that need privatization.  */
+
+static tree
+gfc_privatize_nodesc_arrays_1 (tree *tp, int *walk_subtrees, void *data)
+{
+  hash_set<tree> *decls = (hash_set<tree> *)data;
+
+  if (TREE_CODE (*tp) == MODIFY_EXPR)
+    {
+      tree lhs = TREE_OPERAND (*tp, 0);
+      if (DECL_P (lhs))
+	decls->add (lhs);
+    }
+
+  if (IS_TYPE_OR_DECL_P (*tp))
+    *walk_subtrees = false;
+
+  return NULL;
+}
+
+/* Reinitialize all of the arrays inside ARRAY_SET in BLOCK.  Append private
+   clauses for those arrays in CLAUSES.  */
+
+static tree
+gfc_privatize_nodesc_arrays (hash_set<gfc_symbol *> *array_set,
+			     stmtblock_t *block, tree clauses)
+{
+  hash_set<gfc_symbol *>::iterator its = array_set->begin ();
+  hash_set<tree> *private_decls = new hash_set<tree>;
+
+  for (; its != array_set->end (); ++its)
+    {
+      gfc_symbol *sym = *its;
+      tree parm = sym->backend_decl;
+      tree type = TREE_TYPE (parm);
+      tree offset, tmp;
+
+      /* Evaluate the bounds of the array.  */
+      gfc_trans_array_bounds (type, sym, &offset, block, false);
+
+      /* Set the offset.  */
+      if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL)
+	gfc_add_modify (block, GFC_TYPE_ARRAY_OFFSET (type), offset);
+
+      /* Set the pointer itself if we aren't using the parameter
+	 directly.  */
+      if (TREE_CODE (parm) != PARM_DECL && DECL_LANG_SPECIFIC (parm)
+	  && GFC_DECL_SAVED_DESCRIPTOR (parm))
+	{
+	  tmp = convert (TREE_TYPE (parm),
+			 GFC_DECL_SAVED_DESCRIPTOR (parm));
+	  gfc_add_modify (block, parm, tmp);
+	}
+    }
+
+  /* Add private clauses for any variables that are used by
+     gfc_trans_array_bounds.  */
+  walk_tree_without_duplicates (&block->head, gfc_privatize_nodesc_arrays_1,
+				private_decls);
+
+  hash_set<tree>::iterator itt = private_decls->begin ();
+
+  for (; itt != private_decls->end (); ++itt)
+    {
+      tree nc = build_omp_clause (input_location, OMP_CLAUSE_PRIVATE);
+      OMP_CLAUSE_DECL (nc) = *itt;
+      OMP_CLAUSE_CHAIN (nc) = clauses;
+      clauses = nc;
+    }
+
+  delete private_decls;
+
+  return clauses;
+}
+
+/* Reinitialize any arrays in CLAUSES used inside CODE which do not contain
+   array descriptors if SCAN_NODESC_ARRAYS is TRUE.  Place the initialization
+   sequences in CODE.  Update CLAUSES to contain OMP_CLAUSE_PRIVATE for any
+   arrays which were initialized.  */
+
+static hash_set<gfc_symbol *> *
+gfc_init_nodesc_arrays (stmtblock_t *inner, tree *clauses, gfc_code *code,
+			bool scan_nodesc_arrays)
+{
+  hash_set<gfc_symbol *> *array_set = NULL;
+
+  if (!scan_nodesc_arrays)
+    return NULL;
+
+  array_set = new hash_set<gfc_symbol *>;
+  gfc_code_walker (&code, gfc_dummy_code_callback, gfc_scan_nodesc_arrays,
+		   array_set);
+
+  if (array_set->elements ())
+    {
+      gfc_start_block (inner);
+      pushlevel ();
+      *clauses = gfc_privatize_nodesc_arrays (array_set, inner, *clauses);
+    }
+  else
+    {
+      delete array_set;
+      array_set = NULL;
+    }
+
+  return array_set;
+}
+
 /* Trans OpenACC directives. */
 /* parallel, kernels, data and host_data. */
 static tree
 gfc_trans_oacc_construct (gfc_code *code)
 {
-  stmtblock_t block;
+  stmtblock_t block, inner;
   tree stmt, oacc_clauses;
   enum tree_code construct_code;
+  bool scan_nodesc_arrays = false;
+  hash_set<gfc_symbol *> *array_set = NULL;
 
   switch (code->op)
     {
       case EXEC_OACC_PARALLEL:
 	construct_code = OACC_PARALLEL;
+	scan_nodesc_arrays = true;
 	break;
       case EXEC_OACC_KERNELS:
 	construct_code = OACC_KERNELS;
+	scan_nodesc_arrays = true;
 	break;
       case EXEC_OACC_DATA:
 	construct_code = OACC_DATA;
@@ -2746,10 +2883,25 @@  gfc_trans_oacc_construct (gfc_code *code)
   gfc_start_block (&block);
   oacc_clauses = gfc_trans_omp_clauses (&block, code->ext.omp_clauses,
 					code->loc);
+
+  array_set = gfc_init_nodesc_arrays (&inner, &oacc_clauses, code,
+				      scan_nodesc_arrays);
+
   stmt = gfc_trans_omp_code (code->block->next, true);
+
+  if (array_set && array_set->elements ())
+    {
+      gfc_add_expr_to_block (&inner, stmt);
+      stmt = gfc_finish_block (&inner);
+      stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0));
+      delete array_set;
+    }
+
   stmt = build2_loc (input_location, construct_code, void_type_node, stmt,
 		     oacc_clauses);
+
   gfc_add_expr_to_block (&block, stmt);
+
   return gfc_finish_block (&block);
 }
 
@@ -3483,18 +3635,22 @@  gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
 static tree
 gfc_trans_oacc_combined_directive (gfc_code *code)
 {
-  stmtblock_t block, *pblock = NULL;
+  stmtblock_t block, inner, *pblock = NULL;
   gfc_omp_clauses construct_clauses, loop_clauses;
   tree stmt, oacc_clauses = NULL_TREE;
   enum tree_code construct_code;
+  bool scan_nodesc_arrays = false;
+  hash_set<gfc_symbol *> *array_set = NULL;
 
   switch (code->op)
     {
       case EXEC_OACC_PARALLEL_LOOP:
 	construct_code = OACC_PARALLEL;
+	scan_nodesc_arrays = true;
 	break;
       case EXEC_OACC_KERNELS_LOOP:
 	construct_code = OACC_KERNELS;
+	scan_nodesc_arrays = true;
 	break;
       default:
 	gcc_unreachable ();
@@ -3526,18 +3682,35 @@  gfc_trans_oacc_combined_directive (gfc_code *code)
       oacc_clauses = gfc_trans_omp_clauses (&block, &construct_clauses,
 					    code->loc);
     }
+
+  array_set = gfc_init_nodesc_arrays (&inner, &oacc_clauses, code,
+				      scan_nodesc_arrays);
+
   if (!loop_clauses.seq)
-    pblock = &block;
+    pblock = (array_set && array_set->elements ()) ? &inner : &block;
   else
     pushlevel ();
   stmt = gfc_trans_omp_do (code, EXEC_OACC_LOOP, pblock, &loop_clauses, NULL);
+
+  if (array_set && array_set->elements ())
+    gfc_add_expr_to_block (&inner, stmt);
+
   if (TREE_CODE (stmt) != BIND_EXPR)
     stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0));
   else
     poplevel (0, 0);
+
+  if (array_set && array_set->elements ())
+    {
+      stmt = gfc_finish_block (&inner);
+      stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0));
+      delete array_set;
+    }
+
   stmt = build2_loc (input_location, construct_code, void_type_node, stmt,
 		     oacc_clauses);
   gfc_add_expr_to_block (&block, stmt);
+
   return gfc_finish_block (&block);
 }
 
diff --git a/gcc/testsuite/gfortran.dg/goacc/default_none.f95 b/gcc/testsuite/gfortran.dg/goacc/default_none.f95
new file mode 100644
index 0000000..5ce66ae
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/default_none.f95
@@ -0,0 +1,59 @@ 
+! Ensure that the internal array variables, offset, lbound, etc., don't
+! trigger errors with default(none).
+
+! { dg-do compile }
+
+program main
+  implicit none
+  integer i
+  integer,parameter :: n = 100
+  integer,allocatable :: a1(:), a2(:,:)
+
+  allocate (a1 (n))
+  allocate (a2 (-n:n,-n:n))
+  a1 (:) = -1
+
+  !$acc parallel loop default(none) copy (a1(1:n))
+  do i = 1,n
+     a1(i) = i
+  end do
+  !$acc end parallel loop
+
+  call foo (a1)
+  call bar (a1, n)
+  call foobar (a2,n)
+
+contains
+
+  subroutine foo (da1)
+    integer :: da1(n)
+
+    !$acc parallel loop default(none) copy (da1(1:n))
+    do i = 1,n
+       da1(i) = i*2
+    end do
+    !$acc end parallel loop
+  end subroutine foo
+end program main
+
+subroutine bar (da2,n)
+  integer :: n, da2(n)
+  integer i
+
+  !$acc parallel loop default(none) copy (da2(1:n)) firstprivate(n)
+  do i = 1,n
+     da2(i) = i*3
+  end do
+  !$acc end parallel loop
+end subroutine bar
+
+subroutine foobar (da3,n)
+  integer :: n, da3(-n:n,-n:n)
+  integer i
+
+  !$acc parallel loop default(none) copy (da3(-n:n,-n:n)) firstprivate(n)
+  do i = 1,n
+     da3(i,0) = i*3
+  end do
+  !$acc end parallel loop
+end subroutine foobar