diff mbox

[openacc] use firstprivate pointers for subarrays in c and c++

Message ID 5744A893.3040204@codesourcery.com
State New
Headers show

Commit Message

Cesar Philippidis May 24, 2016, 7:16 p.m. UTC
On 05/23/2016 11:09 PM, Jakub Jelinek wrote:
> On Mon, May 23, 2016 at 07:31:53PM -0700, Cesar Philippidis wrote:
>> @@ -12559,7 +12560,7 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
>>  	  t = OMP_CLAUSE_DECL (c);
>>  	  if (TREE_CODE (t) == TREE_LIST)
>>  	    {
>> -	      if (handle_omp_array_sections (c, ort & C_ORT_OMP))
>> +	      if (handle_omp_array_sections (c, ort & (C_ORT_OMP | C_ORT_ACC)))
>>  		{
>>  		  remove = true;
>>  		  break;
> 
> You haven't touched the /c/ handle_omp_array_sections{,_1}.  As I said, I believe
> you can just drop the is_omp argument altogether (unlike C++), or, pass for
> consistency ort itself there as well.  But I bet the argument will be
> unused.

OK, I removed is_omp. I only had to guard one call to
handle_omp_array_sections from c_finish_omp_clauses because OpenACC
doesn't support array reductions.

Is this OK for trunk?

Cesar

Comments

Jakub Jelinek May 24, 2016, 7:22 p.m. UTC | #1
On Tue, May 24, 2016 at 12:16:35PM -0700, Cesar Philippidis wrote:
> --- a/gcc/c/c-typeck.c
> +++ b/gcc/c/c-typeck.c
> @@ -11939,8 +11939,7 @@ c_finish_omp_cancellation_point (location_t loc, tree clauses)
>  
>  static tree
>  handle_omp_array_sections_1 (tree c, tree t, vec<tree> &types,
> -			     bool &maybe_zero_len, unsigned int &first_non_one,
> -			     bool is_omp)
> +			     bool &maybe_zero_len, unsigned int &first_non_one)
>  {
>    tree ret, low_bound, length, type;
>    if (TREE_CODE (t) != TREE_LIST)
> @@ -11949,7 +11948,6 @@ handle_omp_array_sections_1 (tree c, tree t, vec<tree> &types,
>  	return error_mark_node;
>        ret = t;
>        if (TREE_CODE (t) == COMPONENT_REF
> -	  && is_omp

Sorry, I've missed this one.  The patch is ok if you add on top of the
current patch ort argument to c-typeck.c (handle_omp_array_sections{,_1})
and use here && ort == C_ORT_OMP like in the C++ FE.

	Jakub
Thomas Schwinge May 25, 2016, 4:33 p.m. UTC | #2
Hi!

A few more comments on the patch, as committed in r236678, also for
Chung-Lin and Tom.

The ChangeLos are missing references to GCC PRs, so these now should be
updated manually.  For example, your changes relate to PR70688 "bogus
OpenACC data clause errors involving reductions", and some of the
gcc/c/c-parser.c:c_finish_omp_clauses and
gcc/cp/parser.c:finish_omp_clauses changes (for OpenACC: "data clauses"
instead of "map clauses") and corresponding
gcc/testsuite/c-c++-common/goacc/data-clause-duplicate-1.c etc. test
suite updates relate to <http://gcc.gnu.org/PR65095> "Adapt OpenMP
diagnostic messages for OpenACC" (but that still is to remain open until
addressed in full).  Don't know if there are any other related PRs?

> --- gcc/c/c-parser.c
> +++ gcc/c/c-parser.c
> @@ -13602,6 +13602,7 @@ c_parser_oacc_declare (c_parser *parser)
>  
>        switch (OMP_CLAUSE_MAP_KIND (t))
>  	{
> +	case GOMP_MAP_FIRSTPRIVATE_POINTER:
>  	case GOMP_MAP_FORCE_ALLOC:
>  	case GOMP_MAP_FORCE_TO:
>  	case GOMP_MAP_FORCE_DEVICEPTR:
| 	case GOMP_MAP_DEVICE_RESIDENT:
| 	  break;
| 
| 	case GOMP_MAP_POINTER:
| 	  /* Generated by c_finish_omp_clauses from array sections;
| 	     avoid spurious diagnostics.  */
| 	  break;

Is "case GOMP_MAP_FIRSTPRIVATE_POINTER" meant to replace the "case
GOMP_MAP_POINTER"?  If yes, then please remove that one (does that become
gcc_unreachable?), and update/move the comment, or if not, please update
the comment, too.  ;-)

> --- gcc/c/c-typeck.c
> +++ gcc/c/c-typeck.c

>  /* Handle array sections for clause C.  */
>  
>  static bool
> -handle_omp_array_sections (tree c, bool is_omp)
> +handle_omp_array_sections (tree c, enum c_omp_region_type ort)
>  {
>    [...]
> @@ -12427,7 +12427,7 @@ handle_omp_array_sections (tree c, bool is_omp)
>  	      && TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE))
>  	return false;
>        gcc_assert (OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FORCE_DEVICEPTR);
> -      if (is_omp)
> +      if (ort == C_ORT_OMP || ort == C_ORT_ACC)
>  	switch (OMP_CLAUSE_MAP_KIND (c))
>  	  {
>  	  case GOMP_MAP_ALLOC:
| 	  case GOMP_MAP_TO:
| 	  case GOMP_MAP_FROM:
| 	  case GOMP_MAP_TOFROM:
| 	  case GOMP_MAP_ALWAYS_TO:
| 	  case GOMP_MAP_ALWAYS_FROM:
| 	  case GOMP_MAP_ALWAYS_TOFROM:
| 	  case GOMP_MAP_RELEASE:
| 	  case GOMP_MAP_DELETE:
| 	    OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION (c) = 1;
| 	    break;
| 	  default:
>  	    break;
>  	  }

Why doesn't that apply also to the other (OpenACC) map kinds?  Comparing
to the full list in include/gomp-constants.h:enum gomp_map_kind, there
are several missing here.

>        tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP);
> -      if (!is_omp)
> +      if (ort != C_ORT_OMP && ort != C_ORT_ACC)
>  	OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_POINTER);
>        else if (TREE_CODE (t) == COMPONENT_REF)
>  	OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ALWAYS_POINTER);

> --- gcc/cp/parser.c
> +++ gcc/cp/parser.c
> @@ -35214,6 +35214,7 @@ cp_parser_oacc_declare (cp_parser *parser, cp_token *pragma_tok)
>        gcc_assert (OMP_CLAUSE_CODE (t) == OMP_CLAUSE_MAP);
>        switch (OMP_CLAUSE_MAP_KIND (t))
>  	{
> +	case GOMP_MAP_FIRSTPRIVATE_POINTER:
>  	case GOMP_MAP_FORCE_ALLOC:
>  	case GOMP_MAP_FORCE_TO:
>  	case GOMP_MAP_FORCE_DEVICEPTR:

Likewise to my gcc/c/c-parser.c comments.

> --- gcc/cp/semantics.c
> +++ gcc/cp/semantics.c

>  /* Handle array sections for clause C.  */
>  
>  static bool
> -handle_omp_array_sections (tree c, bool is_omp)
> +handle_omp_array_sections (tree c, enum c_omp_region_type ort)
>  {
>    [...]
> @@ -4988,7 +4989,7 @@ handle_omp_array_sections (tree c, bool is_omp)
>  	      || (TREE_CODE (t) == COMPONENT_REF
>  		  && TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE))
>  	    return false;
> -	  if (is_omp)
> +	  if (ort == C_ORT_OMP || ort == C_ORT_ACC)
>  	    switch (OMP_CLAUSE_MAP_KIND (c))
>  	      {
>  	      case GOMP_MAP_ALLOC:

Likewise to my gcc/c/c-typeck.c comments.

> @@ -5007,7 +5008,7 @@ handle_omp_array_sections (tree c, bool is_omp)
>  	      }
>  	  tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c),
>  				      OMP_CLAUSE_MAP);
> -	  if (!is_omp)
> +	  if ((ort & C_ORT_OMP_DECLARE_SIMD) != C_ORT_OMP && ort != C_ORT_ACC)
>  	    OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_POINTER);
>  	  else if (TREE_CODE (t) == COMPONENT_REF)
>  	    OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ALWAYS_POINTER);

Shouldn't that simply be "ort != C_ORT_OMP && ort != C_ORT_ACC"?

> @@ -6054,7 +6070,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
>  	    omp_note_field_privatization (t, OMP_CLAUSE_DECL (c));
>  	  else
>  	    t = OMP_CLAUSE_DECL (c);
> -	  if (t == current_class_ptr)
> +	  if (ort != C_ORT_ACC && t == current_class_ptr)
>  	    {
>  	      error ("%<this%> allowed in OpenMP only in %<declare simd%>"
>  		     " clauses");

;-) Hmm, reminds me of the unresolved task to support the C++ "this"
pointer in OpenACC...  Anyway, in GCC trunk, we're not allowing "this"
usage, I think, so I suppose this should stay as-is?  (Possibly with an
OpenACC-specific error message.)

> @@ -6681,7 +6701,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
>  		     omp_clause_code_name[OMP_CLAUSE_CODE (c)]);
>  	      remove = true;
>  	    }
> -	  else if (t == current_class_ptr)
> +	  else if (ort != C_ORT_ACC && t == current_class_ptr)
>  	    {
>  	      error ("%<this%> allowed in OpenMP only in %<declare simd%>"
>  		     " clauses");

Likewise.

> --- gcc/gimplify.c
> +++ gcc/gimplify.c
> @@ -6280,6 +6280,9 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, tree decl, bool in_code)
>  		        error ("variable %qE declared in enclosing "
>  			       "%<host_data%> region", DECL_NAME (decl));
>  		      nflags |= GOVD_MAP;
> +		      if (octx->region_type == ORT_ACC_DATA
> +			  && (n2->value & GOVD_MAP_0LEN_ARRAY))
> +			nflags |= GOVD_MAP_0LEN_ARRAY;
>  		      goto found_outer;
>  		    }
>  		}

Later on, everyone will have a hard time to understand that logic, so
please add comments for such special handling.  Why is ORT_ACC_DATA being
handled differently from the OpenMP target data construct, for example?

> @@ -6855,9 +6858,14 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
>  	    {
>  	    case OMP_TARGET:
>  	      break;
> +	    case OACC_DATA:
> +	      if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE)
> +		break;

Likewise.

Also add a "/* FALLTHRU */" comment here.

>  	    case OMP_TARGET_DATA:
>  	    case OMP_TARGET_ENTER_DATA:
>  	    case OMP_TARGET_EXIT_DATA:
> +	    case OACC_ENTER_DATA:
> +	    case OACC_EXIT_DATA:
>  	    case OACC_HOST_DATA:
>  	      if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER
>  		  || (OMP_CLAUSE_MAP_KIND (c)
| 		      == GOMP_MAP_FIRSTPRIVATE_REFERENCE))
| 		/* For target {,enter ,exit }data only the array slice is
| 		   mapped, but not the pointer to it.  */
| 		remove = true;
| 	      break;
| 	    default:
| 	      break;
| 	    }

By the way, why is this not relevant for the OpenACC update and OpenMP
target update directives, OACC_UPDATE and OMP_TARGET_UPDATE?  Is it
because theses only update existing mappings but don't create new ones?

> @@ -7311,6 +7319,10 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
>  		    omp_notice_variable (outer_ctx, t, true);
>  		}
>  	    }
> +	  if (code == OACC_DATA
> +	      && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
> +	      && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER)
> +	    flags |= GOVD_MAP_0LEN_ARRAY;

Again, please add a comment to such special handling.

>  	  omp_add_variable (ctx, decl, flags);
>  	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
>  	      && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
> @@ -7569,6 +7581,10 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
>  	  gcc_unreachable ();
>  	}
>  
> +      if (code == OACC_DATA
> +	  && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
> +	  && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER)
> +	remove = true;
>        if (remove)
>  	*list_p = OMP_CLAUSE_CHAIN (c);
>        else

Likewise.

> --- gcc/testsuite/c-c++-common/goacc/kernels-alias-3.c
> +++ gcc/testsuite/c-c++-common/goacc/kernels-alias-3.c
> @@ -17,5 +17,5 @@ foo (void)
>  /* Only the omp_data_i related loads should be annotated with
>     non-base 0 cliques.  */
>  /* { dg-final { scan-tree-dump-times "clique 1 base 1" 2 "ealias" } } */
> -/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 3 "ealias" } } */
> +/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 2 "ealias" } } */
>  
> --- gcc/testsuite/c-c++-common/goacc/kernels-alias-4.c
> +++ gcc/testsuite/c-c++-common/goacc/kernels-alias-4.c
> @@ -19,5 +19,5 @@ foo (void)
>  /* Only the omp_data_i related loads should be annotated with
>     non-base 0 cliques.  */
>  /* { dg-final { scan-tree-dump-times "clique 1 base 1" 2 "ealias" } } */
> -/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 3 "ealias" } } */
> +/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 2 "ealias" } } */
>  
> --- gcc/testsuite/c-c++-common/goacc/kernels-alias-5.c
> +++ gcc/testsuite/c-c++-common/goacc/kernels-alias-5.c
> @@ -15,5 +15,5 @@ foo (int *a)
>  
>  /* Only the omp_data_i related loads should be annotated with cliques.  */
>  /* { dg-final { scan-tree-dump-times "clique 1 base 1" 2 "ealias" } } */
> -/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 4 "ealias" } } */
> +/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 2 "ealias" } } */

You once explained to me that "the new firstprivate subarray pointer
changes sometimes results in fewer data clauses".  Tom CCed for your
information, also for the following ones:

> --- gcc/testsuite/c-c++-common/goacc/kernels-alias-8.c
> +++ gcc/testsuite/c-c++-common/goacc/kernels-alias-8.c
> @@ -7,7 +7,7 @@ extern void *acc_copyin (void *, size_t);
>  void
>  foo (int *a, size_t n)
>  {
> -  int *p = (int *)acc_copyin (&a, n);
> +  int *p = (int *)acc_copyin (a, n);

ACK (I think).

>  #pragma acc kernels deviceptr (p) pcopy(a[0:n])
>    {
> @@ -18,5 +18,5 @@ foo (int *a, size_t n)
>  
>  /* Only the omp_data_i related loads should be annotated with cliques.  */
>  /* { dg-final { scan-tree-dump-times "clique 1 base 1" 2 "ealias" } } */
> -/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 3 "ealias" } } */
> +/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 2 "ealias" } } */

Probably as above and/or related to the acc_copyin change?

> --- gcc/testsuite/c-c++-common/goacc/kernels-alias-ipa-pta-3.c
> +++ gcc/testsuite/c-c++-common/goacc/kernels-alias-ipa-pta-3.c
> @@ -31,6 +31,5 @@ foo (void)
>    free (c);
>  }
>  
> -/* { dg-final { scan-tree-dump-times "(?n)= 0;$" 1 "optimized" } } */
> -/* { dg-final { scan-tree-dump-times "(?n)= 1;$" 1 "optimized" } } */
> -/* { dg-final { scan-tree-dump-times "(?n)= \\*a" 1 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "(?n)= 0;$" 1 "optimized" { target c } } } */
> +/* { dg-final { scan-tree-dump-times "(?n)= 1;$" 1 "optimized" { target c }  } } */

But that one looks strange to me.  Are we still testing what we're meant
to be testing?  Why is C++ different from C?  Needs a comment, please.

> --- /dev/null
> +++ gcc/testsuite/g++.dg/goacc/data-1.C

As you're duplicating most of the content (first using C++ reference
types, and then templated), please cross-reference that file with the
original gcc/testsuite/c-c++-common/goacc/data-2.c file, and vice verse.

> @@ -0,0 +1,39 @@
> +void
> +foo (int &a, int (&b)[100], int &n)
> +{
> +#pragma acc enter data copyin (a, b) async wait
> +#pragma acc enter data create (b[20:30]) async wait
> +#pragma acc enter data (a) /* { dg-error "expected '#pragma acc' clause before '\\\(' token" } */
> +#pragma acc enter data create (b(1:10)) /* { dg-error "expected '\\\)' before '\\\(' token" } */
> +#pragma acc exit data delete (a) if (0)
> +#pragma acc exit data copyout (b) if (a)
> +#pragma acc exit data delete (b)
> +#pragma acc enter /* { dg-error "expected 'data' in" } */
> +#pragma acc exit /* { dg-error "expected 'data' in" } */
> +#pragma acc enter data /* { dg-error "has no data movement clause" } */
> +#pragma acc exit data /* { dg-error "has no data movement clause" } */
> +#pragma acc enter Data /* { dg-error "invalid pragma before" } */
> +#pragma acc exit copyout (b) /* { dg-error "invalid pragma before" } */
> +}
> +
> +template<typename T>
> +void
> +foo (T &a, T (&b)[100], T &n)
> +{
> +#pragma acc enter data copyin (a, b) async wait
> +#pragma acc enter data create (b[20:30]) async wait
> +#pragma acc enter data (a) /* { dg-error "expected '#pragma acc' clause before '\\\(' token" } */
> +#pragma acc enter data create (b(1:10)) /* { dg-error "expected '\\\)' before '\\\(' token" } */
> +#pragma acc exit data delete (a) if (0)
> +#pragma acc exit data copyout (b) if (a)
> +#pragma acc exit data delete (b)
> +#pragma acc enter /* { dg-error "expected 'data' in" } */
> +#pragma acc exit /* { dg-error "expected 'data' in" } */
> +#pragma acc enter data /* { dg-error "has no data movement clause" } */
> +#pragma acc exit data /* { dg-error "has no data movement clause" } */
> +#pragma acc enter Data /* { dg-error "invalid pragma before" } */
> +#pragma acc exit copyout (b) /* { dg-error "invalid pragma before" } */
> +}
> +
> +/* { dg-error "has no data movement clause" "" { target *-*-* } 6 } */
> +/* { dg-error "has no data movement clause" "" { target *-*-* } 25 } */

I prefer if these dg-error directives are placed in the lines following
the ones they relate to (so in line 7 and line 27 in this case).

> --- libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c
> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c
> @@ -1,6 +1,4 @@
>  /* { dg-do run { target openacc_nvidia_accel_selected } } */
> -/* <http://news.gmane.org/find-root.php?message_id=%3C87pp0aaksc.fsf%40kepler.schwinge.homeip.net%3E>.
> -   { dg-xfail-run-if "TODO" { *-*-* } } */
>  /* { dg-additional-options "-lcuda" } */
>  
>  #include <openacc.h>

Chung-Lin CCed, because his "[PATCH, libgomp] Rewire OpenACC async",
<http://news.gmane.org/find-root.php?message_id=%3Cd37ca1c5-c8ed-5464-9660-7269f1460615%40codesourcery.com%3E>
is also meant to resolves this XFAIL.  Is that just a coincidence?

> copy from libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
> copy to libgomp/testsuite/libgomp.oacc-c-c++-common/data-2-lib.c

> --- libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/data-2-lib.c
> @@ -1,12 +1,18 @@
> +/* This test is similar to data-2.c, but it uses acc_* library functions
> +   to move data.  */
> [...]

> --- libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
> @@ -1,3 +1,5 @@
> +/* Test 'acc enter/exit data' regions.  */
> +

Should also note here that its content is duplicated in data-2-lib.c.

> @@ -25,7 +27,7 @@ main (int argc, char **argv)
>      }
>  
>  #pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (N) async
> -#pragma acc parallel async wait
> +#pragma acc parallel present (a[0:N], b[0:N]) async wait
>  #pragma acc loop
>    for (i = 0; i < N; i++)
>      b[i] = a[i];

I don't understand why we're adding all these "present" clauses instead
of relying on the standard/implicit "present_or_copy" behavior?
(... which is what users would be doing, I think?)  Same question applies
to data-2-lib.c, too.

> @@ -49,7 +51,7 @@ main (int argc, char **argv)
>      }
>  
>  #pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (N) async (1)
> -#pragma acc parallel async (1)
> +#pragma acc parallel present (a[0:N], b[0:N])  async (1)
>  #pragma acc loop
>    for (i = 0; i < N; i++)
>      b[i] = a[i];
> @@ -76,17 +78,17 @@ main (int argc, char **argv)
>  
>  #pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (c[0:N]) copyin (d[0:N]) copyin (N) async (1)
>  
> -#pragma acc parallel async (1) wait (1)
> +#pragma acc parallel present (a[0:N], b[0:N]) async (1) wait (1)
>  #pragma acc loop
>    for (i = 0; i < N; i++)
>      b[i] = (a[i] * a[i] * a[i]) / a[i];
>  
> -#pragma acc parallel async (2) wait (1)
> +#pragma acc parallel present (a[0:N], c[0:N]) async (2) wait (1)
>  #pragma acc loop
>    for (i = 0; i < N; i++)
>      c[i] = (a[i] + a[i] + a[i] + a[i]) / a[i];
>  
> -#pragma acc parallel async (3) wait (1)
> +#pragma acc parallel present (a[0:N], d[0:N]) async (3) wait (1)
>  #pragma acc loop
>    for (i = 0; i < N; i++)
>      d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i];
> @@ -120,26 +122,27 @@ main (int argc, char **argv)
>  
>  #pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (c[0:N]) copyin (d[0:N]) copyin (e[0:N]) copyin (N) async (1)
>  
> -#pragma acc parallel async (1) wait (1)
> +#pragma acc parallel present (a[0:N], b[0:N]) async (1) wait (1)
>    for (int ii = 0; ii < N; ii++)
>      b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
>  
> -#pragma acc parallel async (2) wait (1)
> +#pragma acc parallel present (a[0:N], c[0:N]) async (2) wait (1)
>    for (int ii = 0; ii < N; ii++)
>      c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
>  
> -#pragma acc parallel async (3) wait (1)
> +#pragma acc parallel present (a[0:N], d[0:N]) async (3) wait (1)
>    for (int ii = 0; ii < N; ii++)
>      d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
>  
> -#pragma acc parallel wait (1) async (4)
> +#pragma acc parallel present (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) \
> +  wait (1) async (4)
>    for (int ii = 0; ii < N; ii++)
>      e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
>  
> -#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) copyout (d[0:N]) copyout (e[0:N]) wait (1, 2, 3, 4) async (1)
> +#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) \
> +  copyout (d[0:N]) copyout (e[0:N]) wait (1, 2, 3, 4) async (1)
>  #pragma acc wait (1)

> --- libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c
> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c

> @@ -25,7 +27,7 @@ main (int argc, char **argv)
>      }
>  
>  #pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (N) async
> -#pragma acc parallel async wait
> +#pragma acc parallel present (a[0:N], b[0:N]) async wait
>  #pragma acc loop
>    for (i = 0; i < N; i++)
>      b[i] = a[i];

Likewise ("present" clauses).

> @@ -49,7 +51,7 @@ main (int argc, char **argv)
>      }
>  
>  #pragma acc update device (a[0:N], b[0:N]) async (1)
> -#pragma acc parallel async (1)
> +#pragma acc parallel present (a[0:N], b[0:N]) async (1)
>  #pragma acc loop
>    for (i = 0; i < N; i++)
>      b[i] = a[i];
> @@ -78,17 +80,17 @@ main (int argc, char **argv)
>  #pragma acc update device (b[0:N]) async (2)
>  #pragma acc enter data copyin (c[0:N], d[0:N]) async (3)
>  
> -#pragma acc parallel async (1) wait (1,2)
> +#pragma acc parallel present (a[0:N], b[0:N]) async (1) wait (1,2)
>  #pragma acc loop
>    for (i = 0; i < N; i++)
>      b[i] = (a[i] * a[i] * a[i]) / a[i];
>  
> -#pragma acc parallel async (2) wait (1,3)
> +#pragma acc parallel present (a[0:N], c[0:N]) async (2) wait (1,3)
>  #pragma acc loop
>    for (i = 0; i < N; i++)
>      c[i] = (a[i] + a[i] + a[i] + a[i]) / a[i];
>  
> -#pragma acc parallel async (3) wait (1,3)
> +#pragma acc parallel present (a[0:N], d[0:N]) async (3) wait (1,3)
>  #pragma acc loop
>    for (i = 0; i < N; i++)
>      d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i];
> @@ -123,27 +125,28 @@ main (int argc, char **argv)
>  #pragma acc update device (a[0:N], b[0:N], c[0:N], d[0:N]) async (1)
>  #pragma acc enter data copyin (e[0:N]) async (5)
>  
> -#pragma acc parallel async (1) wait (1)
> +#pragma acc parallel present (a[0:N], b[0:N]) async (1) wait (1)
>    for (int ii = 0; ii < N; ii++)
>      b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
>  
> -#pragma acc parallel async (2) wait (1)
> +#pragma acc parallel present (a[0:N], c[0:N]) async (2) wait (1)
>    for (int ii = 0; ii < N; ii++)
>      c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
>  
> -#pragma acc parallel async (3) wait (1)
> +#pragma acc parallel present (a[0:N], d[0:N]) async (3) wait (1)
>    for (int ii = 0; ii < N; ii++)
>      d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
>  
> -#pragma acc parallel wait (1,5) async (4)
> +#pragma acc parallel present (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) \
> +  wait (1,5) async (4)
>    for (int ii = 0; ii < N; ii++)
>      e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
>  
> -#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) copyout (d[0:N]) copyout (e[0:N]) wait (1, 2, 3, 4) async (1)
> +#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) \
> +  copyout (d[0:N]) copyout (e[0:N]) wait (1, 2, 3, 4) async (1)
>  #pragma acc exit data delete (N)
>  #pragma acc wait (1)

> --- /dev/null
> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/enter_exit-lib.c
> @@ -0,0 +1,70 @@
> +/* Verify enter/exit data interoperablilty between pragmas and
> +   acc library calls.  */
> +
> +/* { dg-do run } */
> +
> +#include <stdlib.h>
> +#include <assert.h>
> +#include <openacc.h>
> +
> +int
> +main ()
> +{
> +  int *p = (int *)malloc (sizeof (int));
> +
> +  /* Test 1: pragma input, library output.  */
> +  
> +#pragma acc enter data copyin (p[0:1])
> +
> +#pragma acc parallel present (p[0:1]) num_gangs (1)
> +  {
> +    p[0] = 1;
> +  }
> +
> +  acc_copyout (p, sizeof (int));
> +
> +  assert (p[0] == 1);
> +  
> +  /* Test 2: library input, pragma output.  */
> +
> +  acc_copyin (p, sizeof (int));
> +
> +#pragma acc parallel present (p[0:1]) num_gangs (1)
> +  {
> +    p[0] = 2;
> +  }
> +
> +#pragma acc exit data copyout (p[0:1])
> +  
> +  assert (p[0] == 2);
> +
> +  /* Test 3: library input, library output.  */
> +
> +  acc_copyin (p, sizeof (int));
> +
> +#pragma acc parallel present (p[0:1]) num_gangs (1)
> +  {
> +    p[0] = 3;
> +  }
> +
> +  acc_copyout (p, sizeof (int));
> +  
> +  assert (p[0] == 3);
> +
> +  /* Test 4: pragma input, pragma output.  */
> +
> +#pragma acc enter data copyin (p[0:1])
> +  
> +#pragma acc parallel present (p[0:1]) num_gangs (1)
> +  {
> +    p[0] = 3;

Meant to use "4" here?

> +  }
> +
> +#pragma acc exit data copyout (p[0:1])
> +  
> +  assert (p[0] == 3);
> +  
> +  free (p);
> +
> +  return 0;
> +}

> --- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-13.c
> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-13.c
> @@ -1,4 +1,6 @@
> -/* { dg-do run } */
> +/* Check acc_is_present and acc_delete.  */

Thanks for all these summary comments that you've added!


Grüße
 Thomas
Cesar Philippidis June 1, 2016, 5:33 p.m. UTC | #3
On 05/25/2016 09:33 AM, Thomas Schwinge wrote:

>> --- gcc/c/c-parser.c
>> +++ gcc/c/c-parser.c
>> @@ -13602,6 +13602,7 @@ c_parser_oacc_declare (c_parser *parser)
>>  
>>        switch (OMP_CLAUSE_MAP_KIND (t))
>>  	{
>> +	case GOMP_MAP_FIRSTPRIVATE_POINTER:
>>  	case GOMP_MAP_FORCE_ALLOC:
>>  	case GOMP_MAP_FORCE_TO:
>>  	case GOMP_MAP_FORCE_DEVICEPTR:
> | 	case GOMP_MAP_DEVICE_RESIDENT:
> | 	  break;
> | 
> | 	case GOMP_MAP_POINTER:
> | 	  /* Generated by c_finish_omp_clauses from array sections;
> | 	     avoid spurious diagnostics.  */
> | 	  break;
> 
> Is "case GOMP_MAP_FIRSTPRIVATE_POINTER" meant to replace the "case
> GOMP_MAP_POINTER"?  If yes, then please remove that one (does that become
> gcc_unreachable?), and update/move the comment, or if not, please update
> the comment, too.  ;-)

This can be pruned out. I'm preparing a follow up patch with it's removal.

>> --- gcc/c/c-typeck.c
>> +++ gcc/c/c-typeck.c
> 
>>  /* Handle array sections for clause C.  */
>>  
>>  static bool
>> -handle_omp_array_sections (tree c, bool is_omp)
>> +handle_omp_array_sections (tree c, enum c_omp_region_type ort)
>>  {
>>    [...]
>> @@ -12427,7 +12427,7 @@ handle_omp_array_sections (tree c, bool is_omp)
>>  	      && TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE))
>>  	return false;
>>        gcc_assert (OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FORCE_DEVICEPTR);
>> -      if (is_omp)
>> +      if (ort == C_ORT_OMP || ort == C_ORT_ACC)
>>  	switch (OMP_CLAUSE_MAP_KIND (c))
>>  	  {
>>  	  case GOMP_MAP_ALLOC:
> | 	  case GOMP_MAP_TO:
> | 	  case GOMP_MAP_FROM:
> | 	  case GOMP_MAP_TOFROM:
> | 	  case GOMP_MAP_ALWAYS_TO:
> | 	  case GOMP_MAP_ALWAYS_FROM:
> | 	  case GOMP_MAP_ALWAYS_TOFROM:
> | 	  case GOMP_MAP_RELEASE:
> | 	  case GOMP_MAP_DELETE:
> | 	    OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION (c) = 1;
> | 	    break;
> | 	  default:
>>  	    break;
>>  	  }
> 
> Why doesn't that apply also to the other (OpenACC) map kinds?  Comparing
> to the full list in include/gomp-constants.h:enum gomp_map_kind, there
> are several missing here.

It does look like there are situations where OpenACC can take
zero-length arrays, e.g. when the length argument is a variable. This
will be fixed in my follow up patch.

>>        tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP);
>> -      if (!is_omp)
>> +      if (ort != C_ORT_OMP && ort != C_ORT_ACC)
>>  	OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_POINTER);
>>        else if (TREE_CODE (t) == COMPONENT_REF)
>>  	OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ALWAYS_POINTER);
> 
>> --- gcc/cp/parser.c
>> +++ gcc/cp/parser.c
>> @@ -35214,6 +35214,7 @@ cp_parser_oacc_declare (cp_parser *parser, cp_token *pragma_tok)
>>        gcc_assert (OMP_CLAUSE_CODE (t) == OMP_CLAUSE_MAP);
>>        switch (OMP_CLAUSE_MAP_KIND (t))
>>  	{
>> +	case GOMP_MAP_FIRSTPRIVATE_POINTER:
>>  	case GOMP_MAP_FORCE_ALLOC:
>>  	case GOMP_MAP_FORCE_TO:
>>  	case GOMP_MAP_FORCE_DEVICEPTR:
> 
> Likewise to my gcc/c/c-parser.c comments.
> 
>> --- gcc/cp/semantics.c
>> +++ gcc/cp/semantics.c
> 
>>  /* Handle array sections for clause C.  */
>>  
>>  static bool
>> -handle_omp_array_sections (tree c, bool is_omp)
>> +handle_omp_array_sections (tree c, enum c_omp_region_type ort)
>>  {
>>    [...]
>> @@ -4988,7 +4989,7 @@ handle_omp_array_sections (tree c, bool is_omp)
>>  	      || (TREE_CODE (t) == COMPONENT_REF
>>  		  && TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE))
>>  	    return false;
>> -	  if (is_omp)
>> +	  if (ort == C_ORT_OMP || ort == C_ORT_ACC)
>>  	    switch (OMP_CLAUSE_MAP_KIND (c))
>>  	      {
>>  	      case GOMP_MAP_ALLOC:
> 
> Likewise to my gcc/c/c-typeck.c comments.
> 
>> @@ -5007,7 +5008,7 @@ handle_omp_array_sections (tree c, bool is_omp)
>>  	      }
>>  	  tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c),
>>  				      OMP_CLAUSE_MAP);
>> -	  if (!is_omp)
>> +	  if ((ort & C_ORT_OMP_DECLARE_SIMD) != C_ORT_OMP && ort != C_ORT_ACC)
>>  	    OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_POINTER);
>>  	  else if (TREE_CODE (t) == COMPONENT_REF)
>>  	    OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ALWAYS_POINTER);
> 
> Shouldn't that simply be "ort != C_ORT_OMP && ort != C_ORT_ACC"?

No, because then that wouldn't cover omp declare simd.

>> @@ -6054,7 +6070,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
>>  	    omp_note_field_privatization (t, OMP_CLAUSE_DECL (c));
>>  	  else
>>  	    t = OMP_CLAUSE_DECL (c);
>> -	  if (t == current_class_ptr)
>> +	  if (ort != C_ORT_ACC && t == current_class_ptr)
>>  	    {
>>  	      error ("%<this%> allowed in OpenMP only in %<declare simd%>"
>>  		     " clauses");
> 
> ;-) Hmm, reminds me of the unresolved task to support the C++ "this"
> pointer in OpenACC...  Anyway, in GCC trunk, we're not allowing "this"
> usage, I think, so I suppose this should stay as-is?  (Possibly with an
> OpenACC-specific error message.)

What do you want to do about c++'s 'this' in OpenACC? It looks like
gomp4 partially supports it. Maybe we should wait to get clarification
from the technical committee?

>> @@ -6681,7 +6701,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
>>  		     omp_clause_code_name[OMP_CLAUSE_CODE (c)]);
>>  	      remove = true;
>>  	    }
>> -	  else if (t == current_class_ptr)
>> +	  else if (ort != C_ORT_ACC && t == current_class_ptr)
>>  	    {
>>  	      error ("%<this%> allowed in OpenMP only in %<declare simd%>"
>>  		     " clauses");
> 
> Likewise.
> 
>> --- gcc/gimplify.c
>> +++ gcc/gimplify.c
>> @@ -6280,6 +6280,9 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, tree decl, bool in_code)
>>  		        error ("variable %qE declared in enclosing "
>>  			       "%<host_data%> region", DECL_NAME (decl));
>>  		      nflags |= GOVD_MAP;
>> +		      if (octx->region_type == ORT_ACC_DATA
>> +			  && (n2->value & GOVD_MAP_0LEN_ARRAY))
>> +			nflags |= GOVD_MAP_0LEN_ARRAY;
>>  		      goto found_outer;
>>  		    }
>>  		}
> 
> Later on, everyone will have a hard time to understand that logic, so
> please add comments for such special handling.  Why is ORT_ACC_DATA being
> handled differently from the OpenMP target data construct, for example?

It's because pointers in OpenACC have two separate meanings depending on
context, whereas in OpenMP they only have one meaning. In OpenACC, a
pointer by itself, e.g., copy(ptr), is supposed to be treated like
scalar. However, a pointer to a subarray, e.g. copy(ptr[0:10]) is
supposed to treated as firstprivate so that only the data gets the data
clause.

These gimplifier changes were necessary to get proper implicit data
clauses for parallel and kernels regions nested inside data regions. E.g.

  #pragma acc data copy(ptr[0:10])
  {
    #pragma acc parallel
    {
      ... ptr[x] = ...
    }
  }

Without these changes, ptr would be treated like a scalar.

>> @@ -6855,9 +6858,14 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
>>  	    {
>>  	    case OMP_TARGET:
>>  	      break;
>> +	    case OACC_DATA:
>> +	      if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE)
>> +		break;
> 
> Likewise.
> 
> Also add a "/* FALLTHRU */" comment here.
> 
>>  	    case OMP_TARGET_DATA:
>>  	    case OMP_TARGET_ENTER_DATA:
>>  	    case OMP_TARGET_EXIT_DATA:
>> +	    case OACC_ENTER_DATA:
>> +	    case OACC_EXIT_DATA:
>>  	    case OACC_HOST_DATA:
>>  	      if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER
>>  		  || (OMP_CLAUSE_MAP_KIND (c)
> | 		      == GOMP_MAP_FIRSTPRIVATE_REFERENCE))
> | 		/* For target {,enter ,exit }data only the array slice is
> | 		   mapped, but not the pointer to it.  */
> | 		remove = true;
> | 	      break;
> | 	    default:
> | 	      break;
> | 	    }
> 
> By the way, why is this not relevant for the OpenACC update and OpenMP
> target update directives, OACC_UPDATE and OMP_TARGET_UPDATE?  Is it
> because theses only update existing mappings but don't create new ones?

I suppose they can be added here, but lower_omp_target already ignores
GOMP_MAP_FIRSTPRIVATE_POINTER for non-offloaded and data_region regions.

Cesar
Jakub Jelinek June 1, 2016, 5:52 p.m. UTC | #4
On Wed, Jun 01, 2016 at 10:33:32AM -0700, Cesar Philippidis wrote:
> >>  	switch (OMP_CLAUSE_MAP_KIND (c))
> >>  	  {
> >>  	  case GOMP_MAP_ALLOC:
> > | 	  case GOMP_MAP_TO:
> > | 	  case GOMP_MAP_FROM:
> > | 	  case GOMP_MAP_TOFROM:
> > | 	  case GOMP_MAP_ALWAYS_TO:
> > | 	  case GOMP_MAP_ALWAYS_FROM:
> > | 	  case GOMP_MAP_ALWAYS_TOFROM:
> > | 	  case GOMP_MAP_RELEASE:
> > | 	  case GOMP_MAP_DELETE:
> > | 	    OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION (c) = 1;
> > | 	    break;
> > | 	  default:
> >>  	    break;
> >>  	  }
> > 
> > Why doesn't that apply also to the other (OpenACC) map kinds?  Comparing
> > to the full list in include/gomp-constants.h:enum gomp_map_kind, there
> > are several missing here.
> 
> It does look like there are situations where OpenACC can take
> zero-length arrays, e.g. when the length argument is a variable. This
> will be fixed in my follow up patch.

The question is if you need/want the OpenMP 4.5 mandated handling of zero length
array sections for those, where mapping of zero length array section (unlike
e.g. GNU extension zero length object) doesn't actually map it if not
already mapped, but sets the corresponding pointer to NULL.  If already
mapped, it increments refcount. 

> >> @@ -6054,7 +6070,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
> >>  	    omp_note_field_privatization (t, OMP_CLAUSE_DECL (c));
> >>  	  else
> >>  	    t = OMP_CLAUSE_DECL (c);
> >> -	  if (t == current_class_ptr)
> >> +	  if (ort != C_ORT_ACC && t == current_class_ptr)
> >>  	    {
> >>  	      error ("%<this%> allowed in OpenMP only in %<declare simd%>"
> >>  		     " clauses");
> > 
> > ;-) Hmm, reminds me of the unresolved task to support the C++ "this"
> > pointer in OpenACC...  Anyway, in GCC trunk, we're not allowing "this"
> > usage, I think, so I suppose this should stay as-is?  (Possibly with an
> > OpenACC-specific error message.)
> 
> What do you want to do about c++'s 'this' in OpenACC? It looks like
> gomp4 partially supports it. Maybe we should wait to get clarification
> from the technical committee?

In OpenMP 4.5 it is only allowed in declare simd clauses, but that might
change in OpenMP 5.0.

	Jakub
diff mbox

Patch

2016-05-24  Cesar Philippidis  <cesar@codesourcery.com>

	gcc/c
	* c-parser.c (c_parser_oacc_declare): Add support for
	GOMP_MAP_FIRSTPRIVATE_POINTER.
	* c-typeck.c (handle_omp_array_sections_1): Remove is_omp argument.
	(handle_omp_array_sections): Likewise.
	(c_finish_omp_clauses): Add specific errors and warning messages for
	OpenACC.  Use firsrtprivate pointers for OpenACC subarrays.  Update
	calls to handle_omp_array_sections.

	gcc/cp/
	* parser.c (cp_parser_oacc_declare): Add support for
	GOMP_MAP_FIRSTPRIVATE_POINTER.
	* semantics.c (handle_omp_array_sections_1): Replace bool is_omp
	argument with enum c_omp_region_type ort.  Don't privatize OpenACC
	non-static members.
	(handle_omp_array_sections): Replace bool is_omp argument with enum
	c_omp_region_type ort.  Update call to handle_omp_array_sections_1.
	(finish_omp_clauses): Add specific errors and warning messages for
	OpenACC.  Use firsrtprivate pointers for OpenACC subarrays.  Update
	call to handle_omp_array_sections.

	gcc/
	* gimplify.c (omp_notice_variable): Use zero-length arrays for data
	pointers inside OACC_DATA regions.
	(gimplify_scan_omp_clauses): Prune firstprivate clause associated
	with OACC_DATA, OACC_ENTER_DATA and OACC_EXIT data regions.
	(gimplify_adjust_omp_clauses): Fix typo in comment.

	gcc/testsuite/
	* c-c++-common/goacc/data-clause-duplicate-1.c: Adjust test.
	* c-c++-common/goacc/deviceptr-1.c: Likewise.
	* c-c++-common/goacc/kernels-alias-3.c: Likewise.
	* c-c++-common/goacc/kernels-alias-4.c: Likewise.
	* c-c++-common/goacc/kernels-alias-5.c: Likewise.
	* c-c++-common/goacc/kernels-alias-8.c: Likewise.
	* c-c++-common/goacc/kernels-alias-ipa-pta-3.c: Likewise.
	* c-c++-common/goacc/pcopy.c: Likewise.
	* c-c++-common/goacc/pcopyin.c: Likewise.
	* c-c++-common/goacc/pcopyout.c: Likewise.
	* c-c++-common/goacc/pcreate.c: Likewise.
	* c-c++-common/goacc/pr70688.c: New test.
	* c-c++-common/goacc/present-1.c: Adjust test.
	* c-c++-common/goacc/reduction-5.c: Likewise.
	* g++.dg/goacc/data-1.C: New test.

	libgomp/
	* oacc-mem.c (acc_malloc): Update handling of shared-memory targets.
	(acc_free): Likewise.
	(acc_memcpy_to_device): Likewise.
	(acc_memcpy_from_device): Likewise.
	(acc_deviceptr): Likewise.
	(acc_hostptr): Likewise.
	(acc_is_present): Likewise.
	(acc_map_data): Likewise.
	(acc_unmap_data): Likewise.
	(present_create_copy): Likewise.
	(delete_copyout): Likewise.
	(update_dev_host): Likewise.
	* testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c: Remove xfail.
	* testsuite/libgomp.oacc-c-c++-common/data-2-lib.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/data-2.c: Adjust test.
	* testsuite/libgomp.oacc-c-c++-common/data-3.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/enter_exit-lib.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/lib-13.c: Adjust test so that
	it only runs on nvptx targets.
	* testsuite/libgomp.oacc-c-c++-common/lib-14.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-15.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-16.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-17.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-18.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-20.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-21.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-22.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-23.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-24.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-25.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-28.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-29.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-30.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-34.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-42.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-43.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-44.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-47.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-48.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-52.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-53.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-54.c: Likewise.


diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 1bc5eed..1cf4fb4 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -13602,6 +13602,7 @@  c_parser_oacc_declare (c_parser *parser)
 
       switch (OMP_CLAUSE_MAP_KIND (t))
 	{
+	case GOMP_MAP_FIRSTPRIVATE_POINTER:
 	case GOMP_MAP_FORCE_ALLOC:
 	case GOMP_MAP_FORCE_TO:
 	case GOMP_MAP_FORCE_DEVICEPTR:
diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index 74bad2a..33858bb 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -11939,8 +11939,7 @@  c_finish_omp_cancellation_point (location_t loc, tree clauses)
 
 static tree
 handle_omp_array_sections_1 (tree c, tree t, vec<tree> &types,
-			     bool &maybe_zero_len, unsigned int &first_non_one,
-			     bool is_omp)
+			     bool &maybe_zero_len, unsigned int &first_non_one)
 {
   tree ret, low_bound, length, type;
   if (TREE_CODE (t) != TREE_LIST)
@@ -11949,7 +11948,6 @@  handle_omp_array_sections_1 (tree c, tree t, vec<tree> &types,
 	return error_mark_node;
       ret = t;
       if (TREE_CODE (t) == COMPONENT_REF
-	  && is_omp
 	  && (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
 	      || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_TO
 	      || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FROM))
@@ -11996,7 +11994,7 @@  handle_omp_array_sections_1 (tree c, tree t, vec<tree> &types,
     }
 
   ret = handle_omp_array_sections_1 (c, TREE_CHAIN (t), types,
-				     maybe_zero_len, first_non_one, is_omp);
+				     maybe_zero_len, first_non_one);
   if (ret == error_mark_node || ret == NULL_TREE)
     return ret;
 
@@ -12227,14 +12225,13 @@  handle_omp_array_sections_1 (tree c, tree t, vec<tree> &types,
 /* Handle array sections for clause C.  */
 
 static bool
-handle_omp_array_sections (tree c, bool is_omp)
+handle_omp_array_sections (tree c)
 {
   bool maybe_zero_len = false;
   unsigned int first_non_one = 0;
   auto_vec<tree, 10> types;
   tree first = handle_omp_array_sections_1 (c, OMP_CLAUSE_DECL (c), types,
-					    maybe_zero_len, first_non_one,
-					    is_omp);
+					    maybe_zero_len, first_non_one);
   if (first == error_mark_node)
     return true;
   if (first == NULL_TREE)
@@ -12427,27 +12424,24 @@  handle_omp_array_sections (tree c, bool is_omp)
 	      && TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE))
 	return false;
       gcc_assert (OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FORCE_DEVICEPTR);
-      if (is_omp)
-	switch (OMP_CLAUSE_MAP_KIND (c))
-	  {
-	  case GOMP_MAP_ALLOC:
-	  case GOMP_MAP_TO:
-	  case GOMP_MAP_FROM:
-	  case GOMP_MAP_TOFROM:
-	  case GOMP_MAP_ALWAYS_TO:
-	  case GOMP_MAP_ALWAYS_FROM:
-	  case GOMP_MAP_ALWAYS_TOFROM:
-	  case GOMP_MAP_RELEASE:
-	  case GOMP_MAP_DELETE:
-	    OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION (c) = 1;
-	    break;
-	  default:
-	    break;
-	  }
+      switch (OMP_CLAUSE_MAP_KIND (c))
+	{
+	case GOMP_MAP_ALLOC:
+	case GOMP_MAP_TO:
+	case GOMP_MAP_FROM:
+	case GOMP_MAP_TOFROM:
+	case GOMP_MAP_ALWAYS_TO:
+	case GOMP_MAP_ALWAYS_FROM:
+	case GOMP_MAP_ALWAYS_TOFROM:
+	case GOMP_MAP_RELEASE:
+	case GOMP_MAP_DELETE:
+	  OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION (c) = 1;
+	  break;
+	default:
+	  break;
+	}
       tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP);
-      if (!is_omp)
-	OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_POINTER);
-      else if (TREE_CODE (t) == COMPONENT_REF)
+      if (TREE_CODE (t) == COMPONENT_REF)
 	OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ALWAYS_POINTER);
       else
 	OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_FIRSTPRIVATE_POINTER);
@@ -12520,7 +12514,7 @@  tree
 c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 {
   bitmap_head generic_head, firstprivate_head, lastprivate_head;
-  bitmap_head aligned_head, map_head, map_field_head;
+  bitmap_head aligned_head, map_head, map_field_head, oacc_reduction_head;
   tree c, t, type, *pc;
   tree simdlen = NULL_TREE, safelen = NULL_TREE;
   bool branch_seen = false;
@@ -12537,6 +12531,7 @@  c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
   bitmap_initialize (&aligned_head, &bitmap_default_obstack);
   bitmap_initialize (&map_head, &bitmap_default_obstack);
   bitmap_initialize (&map_field_head, &bitmap_default_obstack);
+  bitmap_initialize (&oacc_reduction_head, &bitmap_default_obstack);
 
   for (pc = &clauses, c = clauses; c ; c = *pc)
     {
@@ -12560,7 +12555,7 @@  c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 	  t = OMP_CLAUSE_DECL (c);
 	  if (TREE_CODE (t) == TREE_LIST)
 	    {
-	      if (handle_omp_array_sections (c, ort & C_ORT_OMP))
+	      if ((ort & C_ORT_OMP) && handle_omp_array_sections (c))
 		{
 		  remove = true;
 		  break;
@@ -12874,6 +12869,17 @@  c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 			omp_clause_code_name[OMP_CLAUSE_CODE (c)]);
 	      remove = true;
 	    }
+	  else if (ort == C_ORT_ACC
+		   && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
+	    {
+	      if (bitmap_bit_p (&oacc_reduction_head, DECL_UID (t)))
+		{
+		  error ("%qD appears more than once in reduction clauses", t);
+		  remove = true;
+		}
+	      else
+		bitmap_set_bit (&oacc_reduction_head, DECL_UID (t));
+	    }
 	  else if (bitmap_bit_p (&generic_head, DECL_UID (t))
 		   || bitmap_bit_p (&firstprivate_head, DECL_UID (t))
 		   || bitmap_bit_p (&lastprivate_head, DECL_UID (t)))
@@ -12885,7 +12891,10 @@  c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 	  else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE
 		   && bitmap_bit_p (&map_head, DECL_UID (t)))
 	    {
-	      error ("%qD appears both in data and map clauses", t);
+	      if (ort == C_ORT_ACC)
+		error ("%qD appears more than once in data clauses", t);
+	      else
+		error ("%qD appears both in data and map clauses", t);
 	      remove = true;
 	    }
 	  else
@@ -12911,7 +12920,10 @@  c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 	    }
 	  else if (bitmap_bit_p (&map_head, DECL_UID (t)))
 	    {
-	      error ("%qD appears both in data and map clauses", t);
+	      if (ort == C_ORT_ACC)
+		error ("%qD appears more than once in data clauses", t);
+	      else
+		error ("%qD appears both in data and map clauses", t);
 	      remove = true;
 	    }
 	  else
@@ -13004,7 +13016,7 @@  c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 	    }
 	  if (TREE_CODE (t) == TREE_LIST)
 	    {
-	      if (handle_omp_array_sections (c, ort & C_ORT_OMP))
+	      if (handle_omp_array_sections (c))
 		remove = true;
 	      break;
 	    }
@@ -13027,7 +13039,7 @@  c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 	  t = OMP_CLAUSE_DECL (c);
 	  if (TREE_CODE (t) == TREE_LIST)
 	    {
-	      if (handle_omp_array_sections (c, ort & C_ORT_OMP))
+	      if (handle_omp_array_sections (c))
 		remove = true;
 	      else
 		{
@@ -13054,6 +13066,9 @@  c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 			  if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP)
 			    error ("%qD appears more than once in motion"
 				   " clauses", t);
+			  else if (ort == C_ORT_ACC)
+			    error ("%qD appears more than once in data"
+				   " clauses", t);
 			  else
 			    error ("%qD appears more than once in map"
 				   " clauses", t);
@@ -13155,7 +13170,10 @@  c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 		}
 	      else if (bitmap_bit_p (&map_head, DECL_UID (t)))
 		{
-		  error ("%qD appears both in data and map clauses", t);
+		  if (ort == C_ORT_ACC)
+		    error ("%qD appears more than once in data clauses", t);
+		  else
+		    error ("%qD appears both in data and map clauses", t);
 		  remove = true;
 		}
 	      else
@@ -13165,6 +13183,8 @@  c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 	    {
 	      if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP)
 		error ("%qD appears more than once in motion clauses", t);
+	      else if (ort == C_ORT_ACC)
+		error ("%qD appears more than once in data clauses", t);
 	      else
 		error ("%qD appears more than once in map clauses", t);
 	      remove = true;
@@ -13172,7 +13192,10 @@  c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 	  else if (bitmap_bit_p (&generic_head, DECL_UID (t))
 		   || bitmap_bit_p (&firstprivate_head, DECL_UID (t)))
 	    {
-	      error ("%qD appears both in data and map clauses", t);
+	      if (ort == C_ORT_ACC)
+		error ("%qD appears more than once in data clauses", t);
+	      else
+		error ("%qD appears both in data and map clauses", t);
 	      remove = true;
 	    }
 	  else
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index a68a510..d21230f 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -35214,6 +35214,7 @@  cp_parser_oacc_declare (cp_parser *parser, cp_token *pragma_tok)
       gcc_assert (OMP_CLAUSE_CODE (t) == OMP_CLAUSE_MAP);
       switch (OMP_CLAUSE_MAP_KIND (t))
 	{
+	case GOMP_MAP_FIRSTPRIVATE_POINTER:
 	case GOMP_MAP_FORCE_ALLOC:
 	case GOMP_MAP_FORCE_TO:
 	case GOMP_MAP_FORCE_DEVICEPTR:
diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index 06dee5a..ad65a1b 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -4472,7 +4472,7 @@  omp_privatize_field (tree t, bool shared)
 static tree
 handle_omp_array_sections_1 (tree c, tree t, vec<tree> &types,
 			     bool &maybe_zero_len, unsigned int &first_non_one,
-			     bool is_omp)
+			     enum c_omp_region_type ort)
 {
   tree ret, low_bound, length, type;
   if (TREE_CODE (t) != TREE_LIST)
@@ -4484,7 +4484,7 @@  handle_omp_array_sections_1 (tree c, tree t, vec<tree> &types,
 	t = TREE_OPERAND (t, 0);
       ret = t;
       if (TREE_CODE (t) == COMPONENT_REF
-	  && is_omp
+	  && ort == C_ORT_OMP
 	  && (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
 	      || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_TO
 	      || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FROM)
@@ -4545,11 +4545,12 @@  handle_omp_array_sections_1 (tree c, tree t, vec<tree> &types,
       return ret;
     }
 
-  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
+  if (ort == C_ORT_OMP
+      && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
       && TREE_CODE (TREE_CHAIN (t)) == FIELD_DECL)
     TREE_CHAIN (t) = omp_privatize_field (TREE_CHAIN (t), false);
   ret = handle_omp_array_sections_1 (c, TREE_CHAIN (t), types,
-				     maybe_zero_len, first_non_one, is_omp);
+				     maybe_zero_len, first_non_one, ort);
   if (ret == error_mark_node || ret == NULL_TREE)
     return ret;
 
@@ -4792,14 +4793,14 @@  handle_omp_array_sections_1 (tree c, tree t, vec<tree> &types,
 /* Handle array sections for clause C.  */
 
 static bool
-handle_omp_array_sections (tree c, bool is_omp)
+handle_omp_array_sections (tree c, enum c_omp_region_type ort)
 {
   bool maybe_zero_len = false;
   unsigned int first_non_one = 0;
   auto_vec<tree, 10> types;
   tree first = handle_omp_array_sections_1 (c, OMP_CLAUSE_DECL (c), types,
 					    maybe_zero_len, first_non_one,
-					    is_omp);
+					    ort);
   if (first == error_mark_node)
     return true;
   if (first == NULL_TREE)
@@ -4988,7 +4989,7 @@  handle_omp_array_sections (tree c, bool is_omp)
 	      || (TREE_CODE (t) == COMPONENT_REF
 		  && TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE))
 	    return false;
-	  if (is_omp)
+	  if (ort == C_ORT_OMP || ort == C_ORT_ACC)
 	    switch (OMP_CLAUSE_MAP_KIND (c))
 	      {
 	      case GOMP_MAP_ALLOC:
@@ -5007,7 +5008,7 @@  handle_omp_array_sections (tree c, bool is_omp)
 	      }
 	  tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c),
 				      OMP_CLAUSE_MAP);
-	  if (!is_omp)
+	  if ((ort & C_ORT_OMP_DECLARE_SIMD) != C_ORT_OMP && ort != C_ORT_ACC)
 	    OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_POINTER);
 	  else if (TREE_CODE (t) == COMPONENT_REF)
 	    OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ALWAYS_POINTER);
@@ -5774,7 +5775,7 @@  tree
 finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 {
   bitmap_head generic_head, firstprivate_head, lastprivate_head;
-  bitmap_head aligned_head, map_head, map_field_head;
+  bitmap_head aligned_head, map_head, map_field_head, oacc_reduction_head;
   tree c, t, *pc;
   tree safelen = NULL_TREE;
   bool branch_seen = false;
@@ -5788,6 +5789,7 @@  finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
   bitmap_initialize (&aligned_head, &bitmap_default_obstack);
   bitmap_initialize (&map_head, &bitmap_default_obstack);
   bitmap_initialize (&map_field_head, &bitmap_default_obstack);
+  bitmap_initialize (&oacc_reduction_head, &bitmap_default_obstack);
 
   for (pc = &clauses, c = clauses; c ; c = *pc)
     {
@@ -5807,8 +5809,7 @@  finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 	  t = OMP_CLAUSE_DECL (c);
 	  if (TREE_CODE (t) == TREE_LIST)
 	    {
-	      if (handle_omp_array_sections (c, ((ort & C_ORT_OMP_DECLARE_SIMD)
-						 == C_ORT_OMP)))
+	      if (handle_omp_array_sections (c, ort))
 		{
 		  remove = true;
 		  break;
@@ -6018,6 +6019,17 @@  finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 		       omp_clause_code_name[OMP_CLAUSE_CODE (c)]);
 	      remove = true;
 	    }
+	  else if (ort == C_ORT_ACC
+		   && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
+	    {
+	      if (bitmap_bit_p (&oacc_reduction_head, DECL_UID (t)))
+		{
+		  error ("%qD appears more than once in reduction clauses", t);
+		  remove = true;
+		}
+	      else
+		bitmap_set_bit (&oacc_reduction_head, DECL_UID (t));
+	    }
 	  else if (bitmap_bit_p (&generic_head, DECL_UID (t))
 		   || bitmap_bit_p (&firstprivate_head, DECL_UID (t))
 		   || bitmap_bit_p (&lastprivate_head, DECL_UID (t)))
@@ -6028,7 +6040,10 @@  finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 	  else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE
 		   && bitmap_bit_p (&map_head, DECL_UID (t)))
 	    {
-	      error ("%qD appears both in data and map clauses", t);
+	      if (ort == C_ORT_ACC)
+		error ("%qD appears more than once in data clauses", t);
+	      else
+		error ("%qD appears both in data and map clauses", t);
 	      remove = true;
 	    }
 	  else
@@ -6038,7 +6053,8 @@  finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 	handle_field_decl:
 	  if (!remove
 	      && TREE_CODE (t) == FIELD_DECL
-	      && t == OMP_CLAUSE_DECL (c))
+	      && t == OMP_CLAUSE_DECL (c)
+	      && ort != C_ORT_ACC)
 	    {
 	      OMP_CLAUSE_DECL (c)
 		= omp_privatize_field (t, (OMP_CLAUSE_CODE (c)
@@ -6054,7 +6070,7 @@  finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 	    omp_note_field_privatization (t, OMP_CLAUSE_DECL (c));
 	  else
 	    t = OMP_CLAUSE_DECL (c);
-	  if (t == current_class_ptr)
+	  if (ort != C_ORT_ACC && t == current_class_ptr)
 	    {
 	      error ("%<this%> allowed in OpenMP only in %<declare simd%>"
 		     " clauses");
@@ -6081,7 +6097,10 @@  finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 	    }
 	  else if (bitmap_bit_p (&map_head, DECL_UID (t)))
 	    {
-	      error ("%qD appears both in data and map clauses", t);
+	      if (ort == C_ORT_ACC)
+		error ("%qD appears more than once in data clauses", t);
+	      else
+		error ("%qD appears both in data and map clauses", t);
 	      remove = true;
 	    }
 	  else
@@ -6529,8 +6548,7 @@  finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 	    }
 	  if (TREE_CODE (t) == TREE_LIST)
 	    {
-	      if (handle_omp_array_sections (c, ((ort & C_ORT_OMP_DECLARE_SIMD)
-						 == C_ORT_OMP)))
+	      if (handle_omp_array_sections (c, ort))
 		remove = true;
 	      break;
 	    }
@@ -6564,8 +6582,7 @@  finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 	  t = OMP_CLAUSE_DECL (c);
 	  if (TREE_CODE (t) == TREE_LIST)
 	    {
-	      if (handle_omp_array_sections (c, ((ort & C_ORT_OMP_DECLARE_SIMD)
-						 == C_ORT_OMP)))
+	      if (handle_omp_array_sections (c, ort))
 		remove = true;
 	      else
 		{
@@ -6594,6 +6611,9 @@  finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 			  if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP)
 			    error ("%qD appears more than once in motion"
 				   " clauses", t);
+			  else if (ort == C_ORT_ACC)
+			    error ("%qD appears more than once in data"
+				   " clauses", t);
 			  else
 			    error ("%qD appears more than once in map"
 				   " clauses", t);
@@ -6681,7 +6701,7 @@  finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 		     omp_clause_code_name[OMP_CLAUSE_CODE (c)]);
 	      remove = true;
 	    }
-	  else if (t == current_class_ptr)
+	  else if (ort != C_ORT_ACC && t == current_class_ptr)
 	    {
 	      error ("%<this%> allowed in OpenMP only in %<declare simd%>"
 		     " clauses");
@@ -6730,7 +6750,10 @@  finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 		}
 	      else if (bitmap_bit_p (&map_head, DECL_UID (t)))
 		{
-		  error ("%qD appears both in data and map clauses", t);
+		  if (ort == C_ORT_ACC)
+		    error ("%qD appears more than once in data clauses", t);
+		  else
+		    error ("%qD appears both in data and map clauses", t);
 		  remove = true;
 		}
 	      else
@@ -6740,6 +6763,8 @@  finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 	    {
 	      if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP)
 		error ("%qD appears more than once in motion clauses", t);
+	      if (ort == C_ORT_ACC)
+		error ("%qD appears more than once in data clauses", t);
 	      else
 		error ("%qD appears more than once in map clauses", t);
 	      remove = true;
@@ -6747,7 +6772,10 @@  finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 	  else if (bitmap_bit_p (&generic_head, DECL_UID (t))
 		   || bitmap_bit_p (&firstprivate_head, DECL_UID (t)))
 	    {
-	      error ("%qD appears both in data and map clauses", t);
+	      if (ort == C_ORT_ACC)
+		error ("%qD appears more than once in data clauses", t);
+	      else
+		error ("%qD appears both in data and map clauses", t);
 	      remove = true;
 	    }
 	  else
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index e702bc4..5cffdc1 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -6279,6 +6279,9 @@  omp_notice_variable (struct gimplify_omp_ctx *ctx, tree decl, bool in_code)
 		        error ("variable %qE declared in enclosing "
 			       "%<host_data%> region", DECL_NAME (decl));
 		      nflags |= GOVD_MAP;
+		      if (octx->region_type == ORT_ACC_DATA
+			  && (n2->value & GOVD_MAP_0LEN_ARRAY))
+			nflags |= GOVD_MAP_0LEN_ARRAY;
 		      goto found_outer;
 		    }
 		}
@@ -6854,9 +6857,14 @@  gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 	    {
 	    case OMP_TARGET:
 	      break;
+	    case OACC_DATA:
+	      if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE)
+		break;
 	    case OMP_TARGET_DATA:
 	    case OMP_TARGET_ENTER_DATA:
 	    case OMP_TARGET_EXIT_DATA:
+	    case OACC_ENTER_DATA:
+	    case OACC_EXIT_DATA:
 	    case OACC_HOST_DATA:
 	      if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER
 		  || (OMP_CLAUSE_MAP_KIND (c)
@@ -7310,6 +7318,10 @@  gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 		    omp_notice_variable (outer_ctx, t, true);
 		}
 	    }
+	  if (code == OACC_DATA
+	      && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
+	      && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER)
+	    flags |= GOVD_MAP_0LEN_ARRAY;
 	  omp_add_variable (ctx, decl, flags);
 	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
 	      && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
@@ -7568,6 +7580,10 @@  gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 	  gcc_unreachable ();
 	}
 
+      if (code == OACC_DATA
+	  && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
+	  && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER)
+	remove = true;
       if (remove)
 	*list_p = OMP_CLAUSE_CHAIN (c);
       else
@@ -8028,7 +8044,7 @@  gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
 	      break;
 	    }
 	  decl = OMP_CLAUSE_DECL (c);
-	  /* Data clasues associated with acc parallel reductions must be
+	  /* Data clauses associated with acc parallel reductions must be
 	     compatible with present_or_copy.  Warn and adjust the clause
 	     if that is not the case.  */
 	  if (ctx->region_type == ORT_ACC_PARALLEL)
diff --git a/gcc/testsuite/c-c++-common/goacc/data-clause-duplicate-1.c b/gcc/testsuite/c-c++-common/goacc/data-clause-duplicate-1.c
index 7a1cf68..6245beb 100644
--- a/gcc/testsuite/c-c++-common/goacc/data-clause-duplicate-1.c
+++ b/gcc/testsuite/c-c++-common/goacc/data-clause-duplicate-1.c
@@ -2,12 +2,12 @@  void
 fun (void)
 {
   float *fp;
-#pragma acc parallel copy(fp[0:2],fp[0:2]) /* { dg-error "'fp' appears more than once in map clauses" } */
+#pragma acc parallel copy(fp[0:2],fp[0:2]) /* { dg-error "'fp' appears more than once in data clauses" } */
   ;
-#pragma acc kernels present_or_copyin(fp[3]) present_or_copyout(fp[7:4]) /* { dg-error "'fp' appears more than once in map clauses" } */
+#pragma acc kernels present_or_copyin(fp[3]) present_or_copyout(fp[7:4]) /* { dg-error "'fp' appears more than once in data clauses" } */
   ;
-#pragma acc data create(fp[:10]) deviceptr(fp) /* { dg-error "'fp' appears more than once in map clauses" } */
+#pragma acc data create(fp[:10]) deviceptr(fp) /* { dg-error "'fp' appears more than once in data clauses" } */
   ;
-#pragma acc data create(fp) present(fp) /* { dg-error "'fp' appears more than once in map clauses" } */
+#pragma acc data create(fp) present(fp) /* { dg-error "'fp' appears more than once in data clauses" } */
   ;
 }
diff --git a/gcc/testsuite/c-c++-common/goacc/deviceptr-1.c b/gcc/testsuite/c-c++-common/goacc/deviceptr-1.c
index 08ddb10..3aa0e8a 100644
--- a/gcc/testsuite/c-c++-common/goacc/deviceptr-1.c
+++ b/gcc/testsuite/c-c++-common/goacc/deviceptr-1.c
@@ -47,7 +47,7 @@  fun2 (void)
   /* { dg-error "'u' undeclared" "u undeclared" { target *-*-* } 46 } */
   /* { dg-error "'fun2' is not a variable" "fun2 not a variable" { target *-*-* } 46 } */
   /* { dg-error "'i' is not a pointer variable" "i not a pointer variable" { target *-*-* } 46 } */
-  /* { dg-error "'fp' appears more than once in map clauses" "fp more than once" { target *-*-* } 46 } */
+  /* { dg-error "'fp' appears more than once in data clauses" "fp more than once" { target *-*-* } 46 } */
   ;
 }
 
@@ -55,11 +55,11 @@  void
 fun3 (void)
 {
   float *fp;
-#pragma acc data deviceptr(fp,fp) /* { dg-error "'fp' appears more than once in map clauses" } */
+#pragma acc data deviceptr(fp,fp) /* { dg-error "'fp' appears more than once in data clauses" } */
   ;
-#pragma acc parallel deviceptr(fp) deviceptr(fp) /* { dg-error "'fp' appears more than once in map clauses" } */
+#pragma acc parallel deviceptr(fp) deviceptr(fp) /* { dg-error "'fp' appears more than once in data clauses" } */
   ;
-#pragma acc kernels copy(fp) deviceptr(fp) /* { dg-error "'fp' appears more than once in map clauses" } */
+#pragma acc kernels copy(fp) deviceptr(fp) /* { dg-error "'fp' appears more than once in data clauses" } */
   ;
 }
 
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-alias-3.c b/gcc/testsuite/c-c++-common/goacc/kernels-alias-3.c
index 6989c1c..2934f12 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-alias-3.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-alias-3.c
@@ -17,5 +17,5 @@  foo (void)
 /* Only the omp_data_i related loads should be annotated with
    non-base 0 cliques.  */
 /* { dg-final { scan-tree-dump-times "clique 1 base 1" 2 "ealias" } } */
-/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 3 "ealias" } } */
+/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 2 "ealias" } } */
 
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-alias-4.c b/gcc/testsuite/c-c++-common/goacc/kernels-alias-4.c
index d41802c..f6ee5b5 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-alias-4.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-alias-4.c
@@ -19,5 +19,5 @@  foo (void)
 /* Only the omp_data_i related loads should be annotated with
    non-base 0 cliques.  */
 /* { dg-final { scan-tree-dump-times "clique 1 base 1" 2 "ealias" } } */
-/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 3 "ealias" } } */
+/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 2 "ealias" } } */
 
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-alias-5.c b/gcc/testsuite/c-c++-common/goacc/kernels-alias-5.c
index 6fefe183..74425fb 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-alias-5.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-alias-5.c
@@ -15,5 +15,5 @@  foo (int *a)
 
 /* Only the omp_data_i related loads should be annotated with cliques.  */
 /* { dg-final { scan-tree-dump-times "clique 1 base 1" 2 "ealias" } } */
-/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 4 "ealias" } } */
+/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 2 "ealias" } } */
 
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-alias-8.c b/gcc/testsuite/c-c++-common/goacc/kernels-alias-8.c
index 3b91acd..69200cc 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-alias-8.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-alias-8.c
@@ -7,7 +7,7 @@  extern void *acc_copyin (void *, size_t);
 void
 foo (int *a, size_t n)
 {
-  int *p = (int *)acc_copyin (&a, n);
+  int *p = (int *)acc_copyin (a, n);
 
 #pragma acc kernels deviceptr (p) pcopy(a[0:n])
   {
@@ -18,5 +18,5 @@  foo (int *a, size_t n)
 
 /* Only the omp_data_i related loads should be annotated with cliques.  */
 /* { dg-final { scan-tree-dump-times "clique 1 base 1" 2 "ealias" } } */
-/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 3 "ealias" } } */
+/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 2 "ealias" } } */
 
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-alias-ipa-pta-3.c b/gcc/testsuite/c-c++-common/goacc/kernels-alias-ipa-pta-3.c
index 1eb56eb..1ea0e73 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-alias-ipa-pta-3.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-alias-ipa-pta-3.c
@@ -31,6 +31,5 @@  foo (void)
   free (c);
 }
 
-/* { dg-final { scan-tree-dump-times "(?n)= 0;$" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "(?n)= 1;$" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "(?n)= \\*a" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "(?n)= 0;$" 1 "optimized" { target c } } } */
+/* { dg-final { scan-tree-dump-times "(?n)= 1;$" 1 "optimized" { target c }  } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/pcopy.c b/gcc/testsuite/c-c++-common/goacc/pcopy.c
index 02c4383..0e0aad5 100644
--- a/gcc/testsuite/c-c++-common/goacc/pcopy.c
+++ b/gcc/testsuite/c-c++-common/goacc/pcopy.c
@@ -7,4 +7,4 @@  f (char *cp)
   ;
 }
 
-/* { dg-final { scan-tree-dump-times "#pragma acc parallel map\\(tofrom:\\*\\(cp \\+ 3\\) \\\[len: 5]\\) map\\(alloc:cp \\\[pointer assign, bias: 3]\\)" 1 "original" } } */
+/* { dg-final { scan-tree-dump-times "#pragma acc parallel map\\(tofrom:\\*\\(cp \\+ 3\\) \\\[len: 5]\\) map\\(firstprivate:cp \\\[pointer assign, bias: 3]\\)" 1 "original" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/pcopyin.c b/gcc/testsuite/c-c++-common/goacc/pcopyin.c
index 10911fc..3085251 100644
--- a/gcc/testsuite/c-c++-common/goacc/pcopyin.c
+++ b/gcc/testsuite/c-c++-common/goacc/pcopyin.c
@@ -7,4 +7,4 @@  f (char *cp)
   ;
 }
 
-/* { dg-final { scan-tree-dump-times "#pragma acc parallel map\\(to:\\*\\(cp \\+ 4\\) \\\[len: 6]\\) map\\(alloc:cp \\\[pointer assign, bias: 4]\\)" 1 "original" } } */
+/* { dg-final { scan-tree-dump-times "#pragma acc parallel map\\(to:\\*\\(cp \\+ 4\\) \\\[len: 6]\\) map\\(firstprivate:cp \\\[pointer assign, bias: 4]\\)" 1 "original" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/pcopyout.c b/gcc/testsuite/c-c++-common/goacc/pcopyout.c
index 703ac2f..47c454c 100644
--- a/gcc/testsuite/c-c++-common/goacc/pcopyout.c
+++ b/gcc/testsuite/c-c++-common/goacc/pcopyout.c
@@ -7,4 +7,4 @@  f (char *cp)
   ;
 }
 
-/* { dg-final { scan-tree-dump-times "#pragma acc parallel map\\(from:\\*\\(cp \\+ 5\\) \\\[len: 7]\\) map\\(alloc:cp \\\[pointer assign, bias: 5]\\)" 1 "original" } } */
+/* { dg-final { scan-tree-dump-times "#pragma acc parallel map\\(from:\\*\\(cp \\+ 5\\) \\\[len: 7]\\) map\\(firstprivate:cp \\\[pointer assign, bias: 5]\\)" 1 "original" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/pcreate.c b/gcc/testsuite/c-c++-common/goacc/pcreate.c
index 00bf155..a403e5a 100644
--- a/gcc/testsuite/c-c++-common/goacc/pcreate.c
+++ b/gcc/testsuite/c-c++-common/goacc/pcreate.c
@@ -7,4 +7,4 @@  f (char *cp)
   ;
 }
 
-/* { dg-final { scan-tree-dump-times "#pragma acc parallel map\\(alloc:\\*\\(cp \\+ 6\\) \\\[len: 8]\\) map\\(alloc:cp \\\[pointer assign, bias: 6]\\)" 1 "original" } } */
+/* { dg-final { scan-tree-dump-times "#pragma acc parallel map\\(alloc:\\*\\(cp \\+ 6\\) \\\[len: 8]\\) map\\(firstprivate:cp \\\[pointer assign, bias: 6]\\)" 1 "original" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/pr70688.c b/gcc/testsuite/c-c++-common/goacc/pr70688.c
new file mode 100644
index 0000000..5a23665
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/pr70688.c
@@ -0,0 +1,48 @@ 
+const int n = 100;
+
+int
+private_reduction ()
+{
+  int i, r;
+
+  #pragma acc parallel
+  #pragma acc loop private (r) reduction (+:r)
+  for (i = 0; i < 100; i++)
+    r += 10;
+
+  return r;
+}
+
+int
+parallel_reduction ()
+{
+  int sum = 0;
+  int dummy = 0;
+
+#pragma acc data copy (dummy)
+  {
+#pragma acc parallel num_gangs (10) copy (sum) reduction (+:sum)
+    {
+      int v = 5;
+      sum += 10 + v;
+    }
+  }
+
+  return sum;
+}
+
+int
+main ()
+{
+  int i, s = 0;
+
+#pragma acc parallel num_gangs (10) copy (s) reduction (+:s)
+  for (i = 0; i < n; i++)
+    s += i+1;
+
+#pragma acc parallel num_gangs (10) reduction (+:s) copy (s)
+  for (i = 0; i < n; i++)
+    s += i+1;
+
+  return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/goacc/present-1.c b/gcc/testsuite/c-c++-common/goacc/present-1.c
index 7537948..51362b2 100644
--- a/gcc/testsuite/c-c++-common/goacc/present-1.c
+++ b/gcc/testsuite/c-c++-common/goacc/present-1.c
@@ -7,4 +7,4 @@  f (char *cp)
   ;
 }
 
-/* { dg-final { scan-tree-dump-times "#pragma acc parallel map\\(force_present:\\*\\(cp \\+ 7\\) \\\[len: 9]\\) map\\(alloc:cp \\\[pointer assign, bias: 7]\\)" 1 "original" } } */
+/* { dg-final { scan-tree-dump-times "#pragma acc parallel map\\(force_present:\\*\\(cp \\+ 7\\) \\\[len: 9]\\) map\\(firstprivate:cp \\\[pointer assign, bias: 7]\\)" 1 "original" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/reduction-5.c b/gcc/testsuite/c-c++-common/goacc/reduction-5.c
index 74daad3..dfdbab9 100644
--- a/gcc/testsuite/c-c++-common/goacc/reduction-5.c
+++ b/gcc/testsuite/c-c++-common/goacc/reduction-5.c
@@ -7,9 +7,9 @@  main(void)
 {
   int v1;
 
-#pragma acc parallel reduction(+:v1) private(v1) /* { dg-error "appears more than once in data clauses" } */
+#pragma acc parallel reduction(+:v1) private(v1) /* { dg-error "invalid private reduction" } */
   ;
-#pragma acc parallel reduction(+:v1) firstprivate(v1) /* { dg-error "appears more than once in data clauses" } */
+#pragma acc parallel reduction(+:v1) firstprivate(v1) /* { dg-error "invalid private reduction" } */
   ;
 
   return 0;
diff --git a/gcc/testsuite/g++.dg/goacc/data-1.C b/gcc/testsuite/g++.dg/goacc/data-1.C
new file mode 100644
index 0000000..54676dc
--- /dev/null
+++ b/gcc/testsuite/g++.dg/goacc/data-1.C
@@ -0,0 +1,39 @@ 
+void
+foo (int &a, int (&b)[100], int &n)
+{
+#pragma acc enter data copyin (a, b) async wait
+#pragma acc enter data create (b[20:30]) async wait
+#pragma acc enter data (a) /* { dg-error "expected '#pragma acc' clause before '\\\(' token" } */
+#pragma acc enter data create (b(1:10)) /* { dg-error "expected '\\\)' before '\\\(' token" } */
+#pragma acc exit data delete (a) if (0)
+#pragma acc exit data copyout (b) if (a)
+#pragma acc exit data delete (b)
+#pragma acc enter /* { dg-error "expected 'data' in" } */
+#pragma acc exit /* { dg-error "expected 'data' in" } */
+#pragma acc enter data /* { dg-error "has no data movement clause" } */
+#pragma acc exit data /* { dg-error "has no data movement clause" } */
+#pragma acc enter Data /* { dg-error "invalid pragma before" } */
+#pragma acc exit copyout (b) /* { dg-error "invalid pragma before" } */
+}
+
+template<typename T>
+void
+foo (T &a, T (&b)[100], T &n)
+{
+#pragma acc enter data copyin (a, b) async wait
+#pragma acc enter data create (b[20:30]) async wait
+#pragma acc enter data (a) /* { dg-error "expected '#pragma acc' clause before '\\\(' token" } */
+#pragma acc enter data create (b(1:10)) /* { dg-error "expected '\\\)' before '\\\(' token" } */
+#pragma acc exit data delete (a) if (0)
+#pragma acc exit data copyout (b) if (a)
+#pragma acc exit data delete (b)
+#pragma acc enter /* { dg-error "expected 'data' in" } */
+#pragma acc exit /* { dg-error "expected 'data' in" } */
+#pragma acc enter data /* { dg-error "has no data movement clause" } */
+#pragma acc exit data /* { dg-error "has no data movement clause" } */
+#pragma acc enter Data /* { dg-error "invalid pragma before" } */
+#pragma acc exit copyout (b) /* { dg-error "invalid pragma before" } */
+}
+
+/* { dg-error "has no data movement clause" "" { target *-*-* } 6 } */
+/* { dg-error "has no data movement clause" "" { target *-*-* } 25 } */
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index ce1905c..665e208 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -32,6 +32,7 @@ 
 #include "gomp-constants.h"
 #include "oacc-int.h"
 #include <stdint.h>
+#include <string.h>
 #include <assert.h>
 
 /* Return block containing [H->S), or NULL if not contained.  The device lock
@@ -104,6 +105,9 @@  acc_malloc (size_t s)
 
   assert (thr->dev);
 
+  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    return malloc (s);
+
   return thr->dev->alloc_func (thr->dev->target_id, s);
 }
 
@@ -124,6 +128,9 @@  acc_free (void *d)
 
   struct gomp_device_descr *acc_dev = thr->dev;
 
+  if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    return free (d);
+
   gomp_mutex_lock (&acc_dev->lock);
 
   /* We don't have to call lazy open here, as the ptr value must have
@@ -154,6 +161,12 @@  acc_memcpy_to_device (void *d, void *h, size_t s)
 
   assert (thr && thr->dev);
 
+  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    {
+      memmove (d, h, s);
+      return;
+    }
+
   thr->dev->host2dev_func (thr->dev->target_id, d, h, s);
 }
 
@@ -166,6 +179,12 @@  acc_memcpy_from_device (void *h, void *d, size_t s)
 
   assert (thr && thr->dev);
 
+  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    {
+      memmove (h, d, s);
+      return;
+    }
+
   thr->dev->dev2host_func (thr->dev->target_id, h, d, s);
 }
 
@@ -184,6 +203,9 @@  acc_deviceptr (void *h)
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *dev = thr->dev;
 
+  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    return h;
+
   gomp_mutex_lock (&dev->lock);
 
   n = lookup_host (dev, h, 1);
@@ -218,6 +240,9 @@  acc_hostptr (void *d)
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
+  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    return d;
+
   gomp_mutex_lock (&acc_dev->lock);
 
   n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
@@ -252,6 +277,9 @@  acc_is_present (void *h, size_t s)
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
+  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    return h != NULL;
+
   gomp_mutex_lock (&acc_dev->lock);
 
   n = lookup_host (acc_dev, h, s);
@@ -271,7 +299,7 @@  acc_is_present (void *h, size_t s)
 void
 acc_map_data (void *h, void *d, size_t s)
 {
-  struct target_mem_desc *tgt;
+  struct target_mem_desc *tgt = NULL;
   size_t mapnum = 1;
   void *hostaddrs = h;
   void *devaddrs = d;
@@ -287,9 +315,6 @@  acc_map_data (void *h, void *d, size_t s)
     {
       if (d != h)
         gomp_fatal ("cannot map data on shared-memory system");
-
-      tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
-			   GOMP_MAP_VARS_OPENACC);
     }
   else
     {
@@ -335,6 +360,10 @@  acc_unmap_data (void *h)
 
   /* No need to call lazy open, as the address must have been mapped.  */
 
+  /* This is a no-op on shared-memory targets.  */
+  if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    return;
+
   size_t host_size;
 
   gomp_mutex_lock (&acc_dev->lock);
@@ -405,6 +434,9 @@  present_create_copy (unsigned f, void *h, size_t s)
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
+  if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    return h;
+
   gomp_mutex_lock (&acc_dev->lock);
 
   n = lookup_host (acc_dev, h, s);
@@ -496,6 +528,9 @@  delete_copyout (unsigned f, void *h, size_t s)
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
+  if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    return;
+
   gomp_mutex_lock (&acc_dev->lock);
 
   n = lookup_host (acc_dev, h, s);
@@ -553,6 +588,9 @@  update_dev_host (int is_dev, void *h, size_t s)
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
+  if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    return;
+
   gomp_mutex_lock (&acc_dev->lock);
 
   n = lookup_host (acc_dev, h, s);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c
index f3b490a..d478ce2 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c
@@ -1,6 +1,4 @@ 
 /* { dg-do run { target openacc_nvidia_accel_selected } } */
-/* <http://news.gmane.org/find-root.php?message_id=%3C87pp0aaksc.fsf%40kepler.schwinge.homeip.net%3E>.
-   { dg-xfail-run-if "TODO" { *-*-* } } */
 /* { dg-additional-options "-lcuda" } */
 
 #include <openacc.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2-lib.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2-lib.c
new file mode 100644
index 0000000..e1aa2c9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2-lib.c
@@ -0,0 +1,185 @@ 
+/* This test is similar to data-2.c, but it uses acc_* library functions
+   to move data.  */
+
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  int N = 128; //1024 * 1024;
+  float *a, *b, *c, *d, *e;
+  void *d_a, *d_b, *d_c, *d_d;
+  int i;
+  int nbytes;
+
+  nbytes = N * sizeof (float);
+
+  a = (float *) malloc (nbytes);
+  b = (float *) malloc (nbytes);
+  c = (float *) malloc (nbytes);
+  d = (float *) malloc (nbytes);
+  e = (float *) malloc (nbytes);
+
+  for (i = 0; i < N; i++)
+    {
+      a[i] = 3.0;
+      b[i] = 0.0;
+    }
+
+  d_a = acc_copyin (a, nbytes);
+  d_b = acc_copyin (b, nbytes);
+  acc_copyin (&N, sizeof (int));
+  
+#pragma acc parallel present (a[0:N], b[0:N], N) async wait
+#pragma acc loop
+  for (i = 0; i < N; i++)
+    b[i] = a[i];
+
+  acc_wait_all ();
+
+  acc_memcpy_from_device (a, d_a, nbytes);
+  acc_memcpy_from_device (b, d_b, nbytes);
+
+  for (i = 0; i < N; i++)
+    {
+      assert (a[i] == 3.0);
+      assert (b[i] == 3.0);
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      a[i] = 2.0;
+      b[i] = 0.0;
+    }
+
+  acc_update_device (a, nbytes);
+  acc_update_device (b, nbytes);
+  
+#pragma acc parallel present (a[0:N], b[0:N], N)  async (1)
+#pragma acc loop
+  for (i = 0; i < N; i++)
+    b[i] = a[i];
+
+  acc_memcpy_from_device (a, d_a, nbytes);
+  acc_memcpy_from_device (b, d_b, nbytes);
+  
+  for (i = 0; i < N; i++)
+    {
+      assert (a[i] == 2.0);
+      assert (b[i] == 2.0);
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      a[i] = 3.0;
+      b[i] = 0.0;
+      c[i] = 0.0;
+      d[i] = 0.0;
+    }
+
+  acc_update_device (a, nbytes);
+  acc_update_device (b, nbytes);
+  d_c = acc_copyin (c, nbytes);
+  d_d = acc_copyin (d, nbytes);
+
+#pragma acc parallel present (a[0:N], b[0:N], N) async (1)
+#pragma acc loop
+  for (i = 0; i < N; i++)
+    b[i] = (a[i] * a[i] * a[i]) / a[i];
+
+#pragma acc parallel present (a[0:N], c[0:N], N) async (2)
+#pragma acc loop
+  for (i = 0; i < N; i++)
+    c[i] = (a[i] + a[i] + a[i] + a[i]) / a[i];
+
+#pragma acc parallel present (a[0:N], d[0:N], N) async (3)
+#pragma acc loop
+  for (i = 0; i < N; i++)
+    d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i];
+
+  acc_wait_all ();
+  
+  acc_memcpy_from_device (a, d_a, nbytes);
+  acc_memcpy_from_device (b, d_b, nbytes);
+  acc_memcpy_from_device (c, d_c, nbytes);
+  acc_memcpy_from_device (d, d_d, nbytes);
+  
+  for (i = 0; i < N; i++)
+    {
+      if (a[i] != 3.0)
+	abort ();
+
+      if (b[i] != 9.0)
+	abort ();
+
+      if (c[i] != 4.0)
+	abort ();
+
+      if (d[i] != 1.0)
+	abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      a[i] = 2.0;
+      b[i] = 0.0;
+      c[i] = 0.0;
+      d[i] = 0.0;
+      e[i] = 0.0;
+    }
+
+  acc_update_device (a, nbytes);
+  acc_update_device (b, nbytes);
+  acc_update_device (c, nbytes);
+  acc_update_device (d, nbytes);
+  acc_copyin (e, nbytes);
+
+#pragma acc parallel present (a[0:N], b[0:N], N) async (1)
+  for (int ii = 0; ii < N; ii++)
+    b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+
+#pragma acc parallel present (a[0:N], c[0:N], N) async (2)
+  for (int ii = 0; ii < N; ii++)
+    c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+
+#pragma acc parallel present (a[0:N], d[0:N], N) async (3)
+  for (int ii = 0; ii < N; ii++)
+    d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+
+#pragma acc parallel present (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N], N) \
+  async (4)
+  for (int ii = 0; ii < N; ii++)
+    e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
+
+  acc_wait_all ();
+  acc_copyout (a, nbytes);
+  acc_copyout (b, nbytes);
+  acc_copyout (c, nbytes); 
+  acc_copyout (d, nbytes);
+  acc_copyout (e, nbytes);
+  acc_delete (&N, sizeof (int));
+
+  for (i = 0; i < N; i++)
+    {
+      if (a[i] != 2.0)
+	abort ();
+
+      if (b[i] != 4.0)
+	abort ();
+
+      if (c[i] != 4.0)
+	abort ();
+
+      if (d[i] != 1.0)
+	abort ();
+
+      if (e[i] != 11.0)
+	abort ();
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
index f867a66..c1c0825 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
@@ -1,3 +1,5 @@ 
+/* Test 'acc enter/exit data' regions.  */
+
 /* { dg-do run } */
 
 #include <stdlib.h>
@@ -25,7 +27,7 @@  main (int argc, char **argv)
     }
 
 #pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (N) async
-#pragma acc parallel async wait
+#pragma acc parallel present (a[0:N], b[0:N]) async wait
 #pragma acc loop
   for (i = 0; i < N; i++)
     b[i] = a[i];
@@ -49,7 +51,7 @@  main (int argc, char **argv)
     }
 
 #pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (N) async (1)
-#pragma acc parallel async (1)
+#pragma acc parallel present (a[0:N], b[0:N])  async (1)
 #pragma acc loop
   for (i = 0; i < N; i++)
     b[i] = a[i];
@@ -76,17 +78,17 @@  main (int argc, char **argv)
 
 #pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (c[0:N]) copyin (d[0:N]) copyin (N) async (1)
 
-#pragma acc parallel async (1) wait (1)
+#pragma acc parallel present (a[0:N], b[0:N]) async (1) wait (1)
 #pragma acc loop
   for (i = 0; i < N; i++)
     b[i] = (a[i] * a[i] * a[i]) / a[i];
 
-#pragma acc parallel async (2) wait (1)
+#pragma acc parallel present (a[0:N], c[0:N]) async (2) wait (1)
 #pragma acc loop
   for (i = 0; i < N; i++)
     c[i] = (a[i] + a[i] + a[i] + a[i]) / a[i];
 
-#pragma acc parallel async (3) wait (1)
+#pragma acc parallel present (a[0:N], d[0:N]) async (3) wait (1)
 #pragma acc loop
   for (i = 0; i < N; i++)
     d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i];
@@ -120,26 +122,27 @@  main (int argc, char **argv)
 
 #pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (c[0:N]) copyin (d[0:N]) copyin (e[0:N]) copyin (N) async (1)
 
-#pragma acc parallel async (1) wait (1)
+#pragma acc parallel present (a[0:N], b[0:N]) async (1) wait (1)
   for (int ii = 0; ii < N; ii++)
     b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
 
-#pragma acc parallel async (2) wait (1)
+#pragma acc parallel present (a[0:N], c[0:N]) async (2) wait (1)
   for (int ii = 0; ii < N; ii++)
     c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
 
-#pragma acc parallel async (3) wait (1)
+#pragma acc parallel present (a[0:N], d[0:N]) async (3) wait (1)
   for (int ii = 0; ii < N; ii++)
     d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
 
-#pragma acc parallel wait (1) async (4)
+#pragma acc parallel present (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) \
+  wait (1) async (4)
   for (int ii = 0; ii < N; ii++)
     e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
 
-#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) copyout (d[0:N]) copyout (e[0:N]) wait (1, 2, 3, 4) async (1)
+#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) \
+  copyout (d[0:N]) copyout (e[0:N]) wait (1, 2, 3, 4) async (1)
 #pragma acc wait (1)
 
-
   for (i = 0; i < N; i++)
     {
       if (a[i] != 2.0)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c
index 747109f..0bf706a 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c
@@ -1,3 +1,5 @@ 
+/* Test 'acc enter/exit data' regions with 'acc update'.  */
+
 /* { dg-do run } */
 
 #include <stdlib.h>
@@ -25,7 +27,7 @@  main (int argc, char **argv)
     }
 
 #pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (N) async
-#pragma acc parallel async wait
+#pragma acc parallel present (a[0:N], b[0:N]) async wait
 #pragma acc loop
   for (i = 0; i < N; i++)
     b[i] = a[i];
@@ -49,7 +51,7 @@  main (int argc, char **argv)
     }
 
 #pragma acc update device (a[0:N], b[0:N]) async (1)
-#pragma acc parallel async (1)
+#pragma acc parallel present (a[0:N], b[0:N]) async (1)
 #pragma acc loop
   for (i = 0; i < N; i++)
     b[i] = a[i];
@@ -78,17 +80,17 @@  main (int argc, char **argv)
 #pragma acc update device (b[0:N]) async (2)
 #pragma acc enter data copyin (c[0:N], d[0:N]) async (3)
 
-#pragma acc parallel async (1) wait (1,2)
+#pragma acc parallel present (a[0:N], b[0:N]) async (1) wait (1,2)
 #pragma acc loop
   for (i = 0; i < N; i++)
     b[i] = (a[i] * a[i] * a[i]) / a[i];
 
-#pragma acc parallel async (2) wait (1,3)
+#pragma acc parallel present (a[0:N], c[0:N]) async (2) wait (1,3)
 #pragma acc loop
   for (i = 0; i < N; i++)
     c[i] = (a[i] + a[i] + a[i] + a[i]) / a[i];
 
-#pragma acc parallel async (3) wait (1,3)
+#pragma acc parallel present (a[0:N], d[0:N]) async (3) wait (1,3)
 #pragma acc loop
   for (i = 0; i < N; i++)
     d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i];
@@ -123,27 +125,28 @@  main (int argc, char **argv)
 #pragma acc update device (a[0:N], b[0:N], c[0:N], d[0:N]) async (1)
 #pragma acc enter data copyin (e[0:N]) async (5)
 
-#pragma acc parallel async (1) wait (1)
+#pragma acc parallel present (a[0:N], b[0:N]) async (1) wait (1)
   for (int ii = 0; ii < N; ii++)
     b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
 
-#pragma acc parallel async (2) wait (1)
+#pragma acc parallel present (a[0:N], c[0:N]) async (2) wait (1)
   for (int ii = 0; ii < N; ii++)
     c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
 
-#pragma acc parallel async (3) wait (1)
+#pragma acc parallel present (a[0:N], d[0:N]) async (3) wait (1)
   for (int ii = 0; ii < N; ii++)
     d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
 
-#pragma acc parallel wait (1,5) async (4)
+#pragma acc parallel present (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) \
+  wait (1,5) async (4)
   for (int ii = 0; ii < N; ii++)
     e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
 
-#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) copyout (d[0:N]) copyout (e[0:N]) wait (1, 2, 3, 4) async (1)
+#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) \
+  copyout (d[0:N]) copyout (e[0:N]) wait (1, 2, 3, 4) async (1)
 #pragma acc exit data delete (N)
 #pragma acc wait (1)
 
-
   for (i = 0; i < N; i++)
     {
       if (a[i] != 2.0)
@@ -162,5 +165,11 @@  main (int argc, char **argv)
 	abort ();
     }
 
+  free (a);
+  free (b);
+  free (c);
+  free (d);
+  free (e);
+
   return 0;
 }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/enter_exit-lib.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/enter_exit-lib.c
new file mode 100644
index 0000000..b5b37b2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/enter_exit-lib.c
@@ -0,0 +1,70 @@ 
+/* Verify enter/exit data interoperablilty between pragmas and
+   acc library calls.  */
+
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <openacc.h>
+
+int
+main ()
+{
+  int *p = (int *)malloc (sizeof (int));
+
+  /* Test 1: pragma input, library output.  */
+  
+#pragma acc enter data copyin (p[0:1])
+
+#pragma acc parallel present (p[0:1]) num_gangs (1)
+  {
+    p[0] = 1;
+  }
+
+  acc_copyout (p, sizeof (int));
+
+  assert (p[0] == 1);
+  
+  /* Test 2: library input, pragma output.  */
+
+  acc_copyin (p, sizeof (int));
+
+#pragma acc parallel present (p[0:1]) num_gangs (1)
+  {
+    p[0] = 2;
+  }
+
+#pragma acc exit data copyout (p[0:1])
+  
+  assert (p[0] == 2);
+
+  /* Test 3: library input, library output.  */
+
+  acc_copyin (p, sizeof (int));
+
+#pragma acc parallel present (p[0:1]) num_gangs (1)
+  {
+    p[0] = 3;
+  }
+
+  acc_copyout (p, sizeof (int));
+  
+  assert (p[0] == 3);
+
+  /* Test 4: pragma input, pragma output.  */
+
+#pragma acc enter data copyin (p[0:1])
+  
+#pragma acc parallel present (p[0:1]) num_gangs (1)
+  {
+    p[0] = 3;
+  }
+
+#pragma acc exit data copyout (p[0:1])
+  
+  assert (p[0] == 3);
+  
+  free (p);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-13.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-13.c
index 7098ef3..d665533 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-13.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-13.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Check acc_is_present and acc_delete.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdlib.h>
 #include <openacc.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-14.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-14.c
index a9632f7..ee21257 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-14.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-14.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Check acc_is_present.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdlib.h>
 #include <openacc.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-15.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-15.c
index 4f6a731..50c1701 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-15.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-15.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Check acc_is_present and acc_copyout.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdlib.h>
 #include <openacc.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-16.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-16.c
index 28e4e5c..c81a78d 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-16.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-16.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Test if duplicate data mappings with acc_copy_in.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-17.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-17.c
index 7d1767e..a3487e8 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-17.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-17.c
@@ -1,4 +1,7 @@ 
-/* { dg-do run } */
+/* Check acc_copyout failure with acc_device_nvidia.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-18.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-18.c
index 160b33c..b686cc9 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-18.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-18.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Verify that acc_delete unregisters data mappings on the device.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-20.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-20.c
index 4f8e14c..25ceb3a 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-20.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-20.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Exercise acc_copyin and acc_copyout on nvidia targets.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-21.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-21.c
index d908700..b170f81 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-21.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-21.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Exercise acc_copyin and acc_copyout on nvidia targets.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-22.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-22.c
index a6c0197..65ff440 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-22.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-22.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Exercise acc_copyin and acc_copyout on nvidia targets.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-23.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-23.c
index 2339dd6..fd4dc59 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-23.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-23.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Exercise acc_copyin and acc_copyout on nvidia targets.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-24.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-24.c
index d7de8e3..09e2817 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-24.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-24.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Exercise acc_create, acc_is_present and acc_delete.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdlib.h>
 #include <openacc.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-25.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-25.c
index bb709d3..5f00ccb 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-25.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-25.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Exercise acc_create and acc_delete on nvidia targets.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-28.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-28.c
index 9304daa..7a96ab2 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-28.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-28.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Exercise acc_delete with a NULL address on nvidia targets.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-29.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-29.c
index 92e3858..318a060 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-29.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-29.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Exercise acc_delete with size zero on nvidia targets.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-30.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-30.c
index e81627d..9bc9ecc 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-30.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-30.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Exercise an invalid partial acc_delete on nvidia targets.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-34.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-34.c
index 031c731..a24916d 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-34.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-34.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Exercise an invalid acc_present_or_create on nvidia targets.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-42.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-42.c
index de5d1c1..30b90d4 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-42.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-42.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Exercise acc_update_device on unmapped data on nvidia targets.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-43.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-43.c
index 0d593f0..5db2912 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-43.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-43.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Exercise acc_update_device with a NULL data address on nvidia targets.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-44.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-44.c
index e98ecc4..8bbf016 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-44.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-44.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Exercise acc_update_device with size zero data on nvidia targets.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-47.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-47.c
index f26fc33..c214042 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-47.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-47.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Exercise acc_update_self with a NULL data mapping on nvidia targets.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdio.h>
 #include <string.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-48.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-48.c
index 253ce59..afa137f 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-48.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-48.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Exercise acc_update_self with a size zero data mapping on nvidia targets.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdio.h>
 #include <string.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-52.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-52.c
index cfbb077..25c70c2 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-52.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-52.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Exercise acc_map_data with a NULL data mapping on nvidia targets.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-53.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-53.c
index 5de376d..a8ee7df 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-53.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-53.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Exercise acc_map_data with a NULL data mapping on nvidia targets.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-54.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-54.c
index 3e621c3..fc221f4 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-54.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-54.c
@@ -1,4 +1,6 @@ 
-/* { dg-do run } */
+/* Exercise acc_map_data with data size of zero on nvidia targets.  */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
 
 #include <stdio.h>
 #include <stdlib.h>