2017-04-05 Cesar Philippidis <cesar@codesourcery.com>
gcc/fortran/
* gfortran.h (enum gfc_omp_map_op): Add OMP_MAP_DECLARE_ALLOCATE,
OMP_MAP_DECLARE_DEALLOCATE.
* openmp.c (gfc_match_oacc_declare): Add support for OMP_MAP_ALLOC and
OMP_MAP_TO, as those match the OpenACC 2.5 semantics.
* trans-array.c (gfc_array_allocate): Call
gfc_trans_oacc_declare_allocate for decls with oacc_decalre_create
attributes set.
(gfc_array_deallocate): Likewise.
* trans-decl.c (add_attributes_to_decl): Enable lowering of OpenACC
declared create, copyin and deviceptr clauses.
(add_clause): Don't duplicate OpenACC declare clauses.
(find_module_oacc_declare_clauses): Relax oacc_declare_create to
OMP_MAP_ALLOC, and oacc_declare_copying to OMP_MAP_TO. This matches
the OpenACC 2.5 semantics.
* trans-openmp.c (gfc_trans_omp_clauses_1): Handle
OMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}.
(gfc_trans_oacc_declare_allocate): New function.
* trans-stmt.h: Declare gfc_trans_oacc_declare_allocate.
gcc/
* omp-low.c (scan_sharing_clauses): Update handling of OpenACC declare
create, copyin and deviceptr to have local lifetimes.
(lower_omp_target): Handle GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}.
* tree-pretty-print.c (dump_omp_clause): Likewise.
gcc/testsuite/
* gfortran.dg/goacc/declare-allocatable-1.f90: New test.
include/
* gomp-constants.h (enum gomp_map_kind): Define GOMP_MAP_DECLARE,
GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}.
libgomp/
* libgomp.h: Declare gomp_acc_declare_allocate.
* oacc-mem.c (gomp_acc_declare_allocate): New function.
* oacc-parallel.c (GOACC_enter_exit_data): Handle
GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}.
* testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90: New test.
@@ -1156,7 +1156,9 @@ enum gfc_omp_map_op
OMP_MAP_RELEASE,
OMP_MAP_ALWAYS_TO,
OMP_MAP_ALWAYS_FROM,
- OMP_MAP_ALWAYS_TOFROM
+ OMP_MAP_ALWAYS_TOFROM,
+ OMP_MAP_DECLARE_ALLOCATE,
+ OMP_MAP_DECLARE_DEALLOCATE
};
enum gfc_omp_linear_op
@@ -2233,10 +2233,12 @@ gfc_match_oacc_declare (void)
switch (n->u.map_op)
{
case OMP_MAP_FORCE_ALLOC:
+ case OMP_MAP_ALLOC:
s->attr.oacc_declare_create = 1;
break;
case OMP_MAP_FORCE_TO:
+ case OMP_MAP_TO:
s->attr.oacc_declare_copyin = 1;
break;
@@ -88,6 +88,7 @@ along with GCC; see the file COPYING3. If not see
#include "trans-types.h"
#include "trans-array.h"
#include "trans-const.h"
+#include "trans-stmt.h"
#include "dependency.h"
static bool gfc_get_array_constructor_size (mpz_t *, gfc_constructor_base);
@@ -5394,6 +5395,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg,
gfc_expr **upper;
gfc_ref *ref, *prev_ref = NULL;
bool allocatable, coarray, dimension, alloc_w_e3_arr_spec = false;
+ bool oacc_declare = false;
ref = expr->ref;
@@ -5408,6 +5410,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg,
allocatable = expr->symtree->n.sym->attr.allocatable;
coarray = expr->symtree->n.sym->attr.codimension;
dimension = expr->symtree->n.sym->attr.dimension;
+ oacc_declare = expr->symtree->n.sym->attr.oacc_declare_create;
}
else
{
@@ -5540,7 +5543,12 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg,
/* Update the array descriptors. */
if (dimension)
- gfc_conv_descriptor_offset_set (&set_descriptor_block, se->expr, offset);
+ {
+ gfc_conv_descriptor_offset_set (&set_descriptor_block, se->expr, offset);
+
+ if (oacc_declare)
+ gfc_trans_oacc_declare_allocate (&set_descriptor_block, expr, true);
+ }
set_descriptor = gfc_finish_block (&set_descriptor_block);
if (status != NULL_TREE)
@@ -5581,6 +5589,7 @@ gfc_array_deallocate (tree descriptor, tree pstat, tree errmsg, tree errlen,
tree tmp;
stmtblock_t block;
bool coarray = gfc_is_coarray (expr);
+ gfc_symbol *sym = expr->symtree->n.sym;
gfc_start_block (&block);
@@ -5588,6 +5597,9 @@ gfc_array_deallocate (tree descriptor, tree pstat, tree errmsg, tree errlen,
var = gfc_conv_descriptor_data_get (descriptor);
STRIP_NOPS (var);
+ if (!coarray && sym->attr.oacc_declare_create)
+ gfc_trans_oacc_declare_allocate (&block, expr, false);
+
/* Parameter is the address of the data component. */
tmp = gfc_deallocate_with_status (coarray ? descriptor : var, pstat, errmsg,
errlen, label_finish, false, expr, coarray);
@@ -1324,10 +1324,10 @@ add_attributes_to_decl (symbol_attribute sym_attr, tree list)
}
if (sym_attr.omp_declare_target
-#if 0 /* TODO */
|| sym_attr.oacc_declare_create
|| sym_attr.oacc_declare_copyin
|| sym_attr.oacc_declare_deviceptr
+#if 0 /* TODO */
|| sym_attr.oacc_declare_device_resident
#endif
)
@@ -5932,13 +5932,17 @@ add_clause (gfc_symbol *sym, gfc_omp_map_op map_op)
{
gfc_omp_namelist *n;
+ if (!module_oacc_clauses)
+ module_oacc_clauses = gfc_get_omp_clauses ();
+
+ for (n = module_oacc_clauses->lists[OMP_LIST_MAP]; n != NULL; n = n->next)
+ if (n->sym->backend_decl == sym->backend_decl)
+ return;
+
n = gfc_get_omp_namelist ();
n->sym = sym;
n->u.map_op = map_op;
- if (!module_oacc_clauses)
- module_oacc_clauses = gfc_get_omp_clauses ();
-
if (module_oacc_clauses->lists[OMP_LIST_MAP])
n->next = module_oacc_clauses->lists[OMP_LIST_MAP];
@@ -5954,10 +5958,10 @@ find_module_oacc_declare_clauses (gfc_symbol *sym)
gfc_omp_map_op map_op;
if (sym->attr.oacc_declare_create)
- map_op = OMP_MAP_FORCE_ALLOC;
+ map_op = OMP_MAP_ALLOC;
if (sym->attr.oacc_declare_copyin)
- map_op = OMP_MAP_FORCE_TO;
+ map_op = OMP_MAP_TO;
if (sym->attr.oacc_declare_deviceptr)
map_op = OMP_MAP_FORCE_DEVICEPTR;
@@ -2360,6 +2360,12 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses,
case OMP_MAP_FORCE_DEVICEPTR:
OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_FORCE_DEVICEPTR);
break;
+ case OMP_MAP_DECLARE_ALLOCATE:
+ OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_DECLARE_ALLOCATE);
+ break;
+ case OMP_MAP_DECLARE_DEALLOCATE:
+ OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_DECLARE_DEALLOCATE);
+ break;
default:
gcc_unreachable ();
}
@@ -5369,6 +5375,41 @@ gfc_trans_oacc_declare (gfc_code *code)
return gfc_finish_block (&block);
}
+/* Create an OpenACC enter or exit data construct for an OpenACC declared
+ variable that has been allocated or deallocated. */
+
+tree
+gfc_trans_oacc_declare_allocate (stmtblock_t *block, gfc_expr *expr,
+ bool allocate)
+{
+ gfc_omp_clauses *clauses = gfc_get_omp_clauses ();
+ gfc_omp_namelist *p = gfc_get_omp_namelist ();
+ tree oacc_clauses, stmt;
+ enum tree_code construct_code;
+
+ p->sym = expr->symtree->n.sym;
+ p->where = expr->where;
+
+ if (allocate)
+ {
+ p->u.map_op = OMP_MAP_DECLARE_ALLOCATE;
+ construct_code = OACC_ENTER_DATA;
+ }
+ else
+ {
+ p->u.map_op = OMP_MAP_DECLARE_DEALLOCATE;
+ construct_code = OACC_EXIT_DATA;
+ }
+ clauses->lists[OMP_LIST_MAP] = p;
+
+ oacc_clauses = gfc_trans_omp_clauses (block, clauses, expr->where);
+ stmt = build1_loc (input_location, construct_code, void_type_node,
+ oacc_clauses);
+ gfc_add_expr_to_block (block, stmt);
+
+ return stmt;
+}
+
tree
gfc_trans_oacc_directive (gfc_code *code)
{
@@ -67,6 +67,7 @@ tree gfc_trans_omp_directive (gfc_code *);
void gfc_trans_omp_declare_simd (gfc_namespace *);
tree gfc_trans_oacc_directive (gfc_code *);
tree gfc_trans_oacc_declare (gfc_namespace *);
+tree gfc_trans_oacc_declare_allocate (stmtblock_t *, gfc_expr *, bool);
/* trans-io.c */
tree gfc_trans_open (gfc_code *);
@@ -2265,7 +2265,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
&& is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx))
&& varpool_node::get_create (decl)->offloadable
&& !lookup_attribute ("omp declare target link",
- DECL_ATTRIBUTES (decl)))
+ DECL_ATTRIBUTES (decl))
+ && !(is_gimple_omp_oacc (ctx->stmt)))
break;
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
&& OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER)
@@ -16740,6 +16741,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
case GOMP_MAP_FORCE_PRESENT:
case GOMP_MAP_FORCE_DEVICEPTR:
case GOMP_MAP_DEVICE_RESIDENT:
+ case GOMP_MAP_DECLARE_ALLOCATE:
+ case GOMP_MAP_DECLARE_DEALLOCATE:
case GOMP_MAP_DYNAMIC_ARRAY_TO:
case GOMP_MAP_DYNAMIC_ARRAY_FROM:
case GOMP_MAP_DYNAMIC_ARRAY_TOFROM:
new file mode 100644
@@ -0,0 +1,25 @@
+! Verify that OpenACC declared allocatable arrays have implicit
+! OpenACC enter and exit pragmas at the time of allocation and
+! deallocation.
+
+! { dg-additional-options "-fdump-tree-original" }
+
+program allocate
+ implicit none
+ integer, allocatable :: a(:)
+ integer, parameter :: n = 100
+ integer i
+ !$acc declare create(a)
+
+ allocate (a(n))
+
+ !$acc parallel loop copyout(a)
+ do i = 1, n
+ a(i) = i
+ end do
+
+ deallocate (a)
+end program allocate
+
+! { dg-final { scan-tree-dump-times "pragma acc enter data map.declare_allocate" 1 "gimple" } }
+! { dg-final { scan-tree-dump-times "pragma acc exit data map.declare_deallocate" 1 "gimple" } }
@@ -764,6 +764,12 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, int flags)
case GOMP_MAP_DYNAMIC_ARRAY_FORCE_PRESENT:
pp_string (pp, "force_present,dynamic_array");
break;
+ case GOMP_MAP_DECLARE_ALLOCATE:
+ pp_string (pp, "declare_allocate");
+ break;
+ case GOMP_MAP_DECLARE_DEALLOCATE:
+ pp_string (pp, "declare_deallocate");
+ break;
default:
gcc_unreachable ();
}
@@ -41,6 +41,7 @@
#define GOMP_MAP_FLAG_SPECIAL_1 (1 << 3)
#define GOMP_MAP_FLAG_SPECIAL_2 (1 << 4)
#define GOMP_MAP_FLAG_SPECIAL_3 (1 << 5)
+#define GOMP_MAP_FLAG_SPECIAL_4 (1 << 6)
#define GOMP_MAP_FLAG_SPECIAL (GOMP_MAP_FLAG_SPECIAL_1 \
| GOMP_MAP_FLAG_SPECIAL_0)
/* Flag to force a specific behavior (or else, trigger a run-time error). */
@@ -149,6 +150,12 @@ enum gomp_map_kind
| GOMP_MAP_FORCE_ALLOC),
GOMP_MAP_DYNAMIC_ARRAY_FORCE_PRESENT = (GOMP_MAP_DYNAMIC_ARRAY
| GOMP_MAP_FORCE_PRESENT),
+ /* Mapping kinds for allocatable arrays. */
+ GOMP_MAP_DECLARE = (GOMP_MAP_FLAG_SPECIAL_4),
+ GOMP_MAP_DECLARE_ALLOCATE = (GOMP_MAP_DECLARE
+ | GOMP_MAP_FORCE_TO),
+ GOMP_MAP_DECLARE_DEALLOCATE = (GOMP_MAP_DECLARE
+ | GOMP_MAP_FORCE_FROM),
/* Internal to GCC, not used in libgomp. */
/* Do not map, but pointer assign a pointer instead. */
GOMP_MAP_FIRSTPRIVATE_POINTER = (GOMP_MAP_LAST | 1),
@@ -974,6 +974,8 @@ enum gomp_map_vars_kind
extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *);
extern void gomp_acc_remove_pointer (void *, bool, int, int);
+extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *,
+ unsigned short *);
extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
size_t, void **, void **,
@@ -704,6 +704,34 @@ acc_update_self_async (void *h, size_t s, int async)
}
void
+gomp_acc_declare_allocate (bool allocate, size_t mapnum, void **hostaddrs,
+ size_t *sizes, unsigned short *kinds)
+{
+ gomp_debug (0, " %s: processing\n", __FUNCTION__);
+
+ if (allocate)
+ {
+ assert (mapnum == 3);
+
+ /* Allocate memory for the array data. */
+ uintptr_t data = (uintptr_t) acc_create (hostaddrs[0], sizes[0]);
+
+ /* Update the PSET. */
+ acc_update_device (hostaddrs[1], sizes[1]);
+ void *pset = acc_deviceptr (hostaddrs[1]);
+ acc_memcpy_to_device (pset, &data, sizeof (uintptr_t));
+ }
+ else
+ {
+ /* Deallocate memory for the array data. */
+ void *data = acc_deviceptr (hostaddrs[0]);
+ acc_free (data);
+ }
+
+ gomp_debug (0, " %s: end\n", __FUNCTION__);
+}
+
+void
gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
void *kinds)
{
@@ -484,14 +484,16 @@ GOACC_enter_exit_data (int device, size_t mapnum,
|| kind == GOMP_MAP_FORCE_PRESENT
|| kind == GOMP_MAP_FORCE_TO
|| kind == GOMP_MAP_TO
- || kind == GOMP_MAP_ALLOC)
+ || kind == GOMP_MAP_ALLOC
+ || kind == GOMP_MAP_DECLARE_ALLOCATE)
{
data_enter = true;
break;
}
if (kind == GOMP_MAP_DELETE
- || kind == GOMP_MAP_FORCE_FROM)
+ || kind == GOMP_MAP_FORCE_FROM
+ || kind == GOMP_MAP_DECLARE_DEALLOCATE)
break;
gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
@@ -540,7 +542,10 @@ GOACC_enter_exit_data (int device, size_t mapnum,
}
else
{
- if (!acc_is_present (hostaddrs[i], sizes[i]))
+ if (kind == GOMP_MAP_DECLARE_ALLOCATE)
+ gomp_acc_declare_allocate (true, pointer, &hostaddrs[i],
+ &sizes[i], &kinds[i]);
+ else if (!acc_is_present (hostaddrs[i], sizes[i]))
{
gomp_acc_insert_pointer (pointer, &hostaddrs[i],
&sizes[i], &kinds[i]);
@@ -579,7 +584,10 @@ GOACC_enter_exit_data (int device, size_t mapnum,
}
else
{
- if (acc_is_present (hostaddrs[i], sizes[i]))
+ if (kind == GOMP_MAP_DECLARE_DEALLOCATE)
+ gomp_acc_declare_allocate (false, pointer, &hostaddrs[i],
+ &sizes[i], &kinds[i]);
+ else if (acc_is_present (hostaddrs[i], sizes[i]))
{
gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
== GOMP_MAP_FORCE_FROM, async,
new file mode 100644
@@ -0,0 +1,211 @@
+! Test declare create with allocatable arrays.
+
+! { dg-do run }
+
+module vars
+ implicit none
+ integer, parameter :: n = 100
+ real*8, allocatable :: b(:)
+ !$acc declare create (b)
+end module vars
+
+program test
+ use vars
+ use openacc
+ implicit none
+ real*8 :: a
+ integer :: i
+
+ interface
+ subroutine sub1
+ !$acc routine gang
+ end subroutine sub1
+
+ subroutine sub2
+ end subroutine sub2
+
+ real*8 function fun1 (ix)
+ integer ix
+ !$acc routine seq
+ end function fun1
+
+ real*8 function fun2 (ix)
+ integer ix
+ !$acc routine seq
+ end function fun2
+ end interface
+
+ if (allocated (b)) call abort
+
+ ! Test local usage of an allocated declared array.
+
+ allocate (b(n))
+
+ if (.not.allocated (b)) call abort
+ if (acc_is_present (b) .neqv. .true.) call abort
+
+ a = 2.0
+
+ !$acc parallel loop
+ do i = 1, n
+ b(i) = i * a
+ end do
+
+ if (.not.acc_is_present (b)) call abort
+
+ !$acc update host(b)
+
+ do i = 1, n
+ if (b(i) /= i*a) call abort
+ end do
+
+ deallocate (b)
+
+ ! Test the usage of an allocated declared array inside an acc
+ ! routine subroutine.
+
+ allocate (b(n))
+
+ if (.not.allocated (b)) call abort
+ if (acc_is_present (b) .neqv. .true.) call abort
+
+ !$acc parallel
+ call sub1
+ !$acc end parallel
+
+ if (.not.acc_is_present (b)) call abort
+
+ !$acc update host(b)
+
+ do i = 1, n
+ if (b(i) /= i*2) call abort
+ end do
+
+ deallocate (b)
+
+ ! Test the usage of an allocated declared array inside a host
+ ! subroutine.
+
+ call sub2
+
+ if (.not.acc_is_present (b)) call abort
+
+ !$acc update host(b)
+
+ do i = 1, n
+ if (b(i) /= 1.0) call abort
+ end do
+
+ deallocate (b)
+
+ if (allocated (b)) call abort
+
+ ! Test the usage of an allocated declared array inside an acc
+ ! routine function.
+
+ allocate (b(n))
+
+ if (.not.allocated (b)) call abort
+ if (acc_is_present (b) .neqv. .true.) call abort
+
+ !$acc parallel loop
+ do i = 1, n
+ b(i) = 1.0
+ end do
+
+ !$acc parallel loop
+ do i = 1, n
+ b(i) = fun1 (i)
+ end do
+
+ if (.not.acc_is_present (b)) call abort
+
+ !$acc update host(b)
+
+ do i = 1, n
+ if (b(i) /= i) call abort
+ end do
+
+ deallocate (b)
+
+ ! Test the usage of an allocated declared array inside a host
+ ! function.
+
+ allocate (b(n))
+
+ if (.not.allocated (b)) call abort
+ if (acc_is_present (b) .neqv. .true.) call abort
+
+ !$acc parallel loop
+ do i = 1, n
+ b(i) = 1.0
+ end do
+
+ !$acc update host(b)
+
+ do i = 1, n
+ b(i) = fun2 (i)
+ end do
+
+ if (.not.acc_is_present (b)) call abort
+
+ do i = 1, n
+ if (b(i) /= i*i) call abort
+ end do
+
+ deallocate (b)
+end program test
+
+! Set each element in array 'b' at index i to i*2.
+
+subroutine sub1 ! { dg-warning "region is worker partitioned" }
+ use vars
+ implicit none
+ integer i
+ !$acc routine gang
+
+ !$acc loop
+ do i = 1, n
+ b(i) = i*2
+ end do
+end subroutine sub1
+
+! Allocate array 'b', and set it to all 1.0.
+
+subroutine sub2
+ use vars
+ use openacc
+ implicit none
+ integer i
+
+ allocate (b(n))
+
+ if (.not.allocated (b)) call abort
+ if (acc_is_present (b) .neqv. .true.) call abort
+
+ !$acc parallel loop
+ do i = 1, n
+ b(i) = 1.0
+ end do
+end subroutine sub2
+
+! Return b(i) * i;
+
+real*8 function fun1 (i)
+ use vars
+ implicit none
+ integer i
+ !$acc routine seq
+
+ fun1 = b(i) * i
+end function fun1
+
+! Return b(i) * i * i;
+
+real*8 function fun2 (i)
+ use vars
+ implicit none
+ integer i
+
+ fun2 = b(i) * i * i
+end function fun2