diff mbox

[OpenACC,0/7] host_data construct

Message ID 20151118124747.30a2ec5d@octopus
State New
Headers show

Commit Message

Julian Brown Nov. 18, 2015, 12:47 p.m. UTC
On Thu, 12 Nov 2015 11:16:21 +0000
Julian Brown <julian@codesourcery.com> wrote:

> Here's a version of the patch which (hopefully) brings OpenACC on par
> with OpenMP with respect to use_device/use_device_ptr variables. The
> implementation is essentially the same now for OpenACC as for OpenMP
> (i.e. using mapping structures): so for now, only array or pointer
> variables can be used as use_device variables. The included tests have
> been adjusted accordingly.

Here's a rebased version of the patch, since the previous version no
longer applies cleanly. Re-tested OK (libgomp tests). ChangeLog as
before. (Ping.)

Julian

Comments

Jakub Jelinek Nov. 19, 2015, 1:13 p.m. UTC | #1
On Wed, Nov 18, 2015 at 12:47:47PM +0000, Julian Brown wrote:

The FE/gimplifier part is okay, but I really don't like the
omp-low.c changes, mostly the *lookup_decl_in_outer_ctx* changes.
If I count well, we have right now 27 maybe_lookup_decl_in_outer_ctx
callers and 7 lookup_decl_in_outer_ctx callers, you want to change
behavior of 1 maybe_lookup_decl_in_outer_ctx and 1
lookup_decl_in_outer_ctx.  Why exactly those 2 and not the others?
What are the exact rules (what does the standard say about it)?
I'd expect that all phases (scan_sharing_clauses, lower_omp* and
expand_omp*) should agree on the same behavior, otherwise I can't see how it
can work properly.  And, if you want to change just a couple of spots,
I'd strongly prefer to add new functions with this weirdo behavior, rather
than tweaking the original function.

> --- a/gcc/omp-low.c
> +++ b/gcc/omp-low.c
> @@ -390,8 +390,8 @@ scan_omp_op (tree *tp, omp_context *ctx)
>  }
>  
>  static void lower_omp (gimple_seq *, omp_context *);
> -static tree lookup_decl_in_outer_ctx (tree, omp_context *);
> -static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *);
> +static tree lookup_decl_in_outer_ctx (tree, omp_context *, bool = false);
> +static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *, bool = false);
>  
>  /* Find an OMP clause of type KIND within CLAUSES.  */
>  
> @@ -1935,6 +1935,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
>  	  install_var_local (decl, ctx);
>  	  break;
>  
> +	case OMP_CLAUSE_USE_DEVICE:
>  	case OMP_CLAUSE_USE_DEVICE_PTR:
>  	  decl = OMP_CLAUSE_DECL (c);
>  	  if (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
> @@ -2137,7 +2138,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
>  	  break;
>  
>  	case OMP_CLAUSE_DEVICE_RESIDENT:
> -	case OMP_CLAUSE_USE_DEVICE:
>  	case OMP_CLAUSE__CACHE_:
>  	  sorry ("Clause not supported yet");
>  	  break;
> @@ -2288,6 +2288,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
>  	case OMP_CLAUSE_SIMD:
>  	case OMP_CLAUSE_NOGROUP:
>  	case OMP_CLAUSE_DEFAULTMAP:
> +	case OMP_CLAUSE_USE_DEVICE:
>  	case OMP_CLAUSE_USE_DEVICE_PTR:
>  	case OMP_CLAUSE__CILK_FOR_COUNT_:
>  	case OMP_CLAUSE_ASYNC:
> @@ -2305,7 +2306,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
>  	  break;
>  
>  	case OMP_CLAUSE_DEVICE_RESIDENT:
> -	case OMP_CLAUSE_USE_DEVICE:
>  	case OMP_CLAUSE__CACHE_:
>  	  sorry ("Clause not supported yet");
>  	  break;
> @@ -3608,6 +3608,8 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx)
>  	    case GF_OMP_TARGET_KIND_OACC_UPDATE: stmt_name = "update"; break;
>  	    case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
>  	      stmt_name = "enter/exit data"; break;
> +	    case GF_OMP_TARGET_KIND_OACC_HOST_DATA: stmt_name = "host_data";
> +	      break;
>  	    default: gcc_unreachable ();
>  	    }
>  	  switch (gimple_omp_target_kind (ctx->stmt))
> @@ -3619,6 +3621,8 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx)
>  	    case GF_OMP_TARGET_KIND_OACC_KERNELS:
>  	      ctx_stmt_name = "kernels"; break;
>  	    case GF_OMP_TARGET_KIND_OACC_DATA: ctx_stmt_name = "data"; break;
> +	    case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
> +	      ctx_stmt_name = "host_data"; break;
>  	    default: gcc_unreachable ();
>  	    }
>  
> @@ -3941,13 +3945,22 @@ maybe_lookup_ctx (gimple *stmt)
>      parallelism happens only rarely.  */
>  
>  static tree
> -lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
> +lookup_decl_in_outer_ctx (tree decl, omp_context *ctx,
> +			  bool skip_hostdata)
>  {
>    tree t;
>    omp_context *up;
>  
>    for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
> -    t = maybe_lookup_decl (decl, up);
> +    {
> +      if (skip_hostdata
> +	  && gimple_code (up->stmt) == GIMPLE_OMP_TARGET
> +	  && gimple_omp_target_kind (up->stmt)
> +	     == GF_OMP_TARGET_KIND_OACC_HOST_DATA)
> +	continue;
> +
> +      t = maybe_lookup_decl (decl, up);
> +    }
>  
>    gcc_assert (!ctx->is_nested || t || is_global_var (decl));
>  
> @@ -3959,13 +3972,22 @@ lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
>     in outer contexts.  */
>  
>  static tree
> -maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
> +maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx,
> +				bool skip_hostdata)
>  {
>    tree t = NULL;
>    omp_context *up;
>  
>    for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
> -    t = maybe_lookup_decl (decl, up);
> +    {
> +      if (skip_hostdata
> +	  && gimple_code (up->stmt) == GIMPLE_OMP_TARGET
> +	  && gimple_omp_target_kind (up->stmt)
> +	     == GF_OMP_TARGET_KIND_OACC_HOST_DATA)
> +	continue;
> +
> +      t = maybe_lookup_decl (decl, up);
> +    }
>  
>    return t ? t : decl;
>  }
> @@ -12499,6 +12521,7 @@ expand_omp_target (struct omp_region *region)
>        break;
>      case GF_OMP_TARGET_KIND_DATA:
>      case GF_OMP_TARGET_KIND_OACC_DATA:
> +    case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
>        data_region = true;
>        break;
>      default:
> @@ -12742,6 +12765,9 @@ expand_omp_target (struct omp_region *region)
>      case GF_OMP_TARGET_KIND_OACC_DECLARE:
>        start_ix = BUILT_IN_GOACC_DECLARE;
>        break;
> +    case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
> +      start_ix = BUILT_IN_GOACC_HOST_DATA;
> +      break;
>      default:
>        gcc_unreachable ();
>      }
> @@ -12866,6 +12892,7 @@ expand_omp_target (struct omp_region *region)
>      case BUILT_IN_GOACC_DATA_START:
>      case BUILT_IN_GOACC_DECLARE:
>      case BUILT_IN_GOMP_TARGET_DATA:
> +    case BUILT_IN_GOACC_HOST_DATA:
>        break;
>      case BUILT_IN_GOMP_TARGET:
>      case BUILT_IN_GOMP_TARGET_UPDATE:
> @@ -13173,6 +13200,7 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent,
>  		case GF_OMP_TARGET_KIND_OACC_PARALLEL:
>  		case GF_OMP_TARGET_KIND_OACC_KERNELS:
>  		case GF_OMP_TARGET_KIND_OACC_DATA:
> +		case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
>  		  break;
>  		case GF_OMP_TARGET_KIND_UPDATE:
>  		case GF_OMP_TARGET_KIND_ENTER_DATA:
> @@ -14972,6 +15000,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
>        break;
>      case GF_OMP_TARGET_KIND_DATA:
>      case GF_OMP_TARGET_KIND_OACC_DATA:
> +    case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
>        data_region = true;
>        break;
>      default:
> @@ -15079,7 +15108,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
>  	  {
>  	    if (TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE)
>  	      {
> -		if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx))
> +		if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx,
> +								   true))
>  		    && varpool_node::get_create (var)->offloadable)
>  		  continue;
>  
> @@ -15178,6 +15208,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
>  	  }
>  	break;
>  
> +      case OMP_CLAUSE_USE_DEVICE:
>        case OMP_CLAUSE_USE_DEVICE_PTR:
>        case OMP_CLAUSE_IS_DEVICE_PTR:
>  	var = OMP_CLAUSE_DECL (c);
> @@ -15316,7 +15347,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
>  	      talign = DECL_ALIGN_UNIT (ovar);
>  	    if (nc)
>  	      {
> -		var = lookup_decl_in_outer_ctx (ovar, ctx);
> +		var = lookup_decl_in_outer_ctx (ovar, ctx, true);
>  		x = build_sender_ref (ovar, ctx);
>  
>  		if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
> @@ -15563,12 +15594,14 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
>  				    build_int_cstu (tkind_type, tkind));
>  	    break;
>  
> +	  case OMP_CLAUSE_USE_DEVICE:
>  	  case OMP_CLAUSE_USE_DEVICE_PTR:
>  	  case OMP_CLAUSE_IS_DEVICE_PTR:
>  	    ovar = OMP_CLAUSE_DECL (c);
>  	    var = lookup_decl_in_outer_ctx (ovar, ctx);
>  	    x = build_sender_ref (ovar, ctx);
> -	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR)
> +	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR
> +		|| OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE)
>  	      tkind = GOMP_MAP_USE_DEVICE_PTR;
>  	    else
>  	      tkind = GOMP_MAP_FIRSTPRIVATE_INT;
> @@ -15771,10 +15804,12 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
>  				     gimple_build_assign (new_var, x));
>  	      }
>  	    break;
> +	  case OMP_CLAUSE_USE_DEVICE:
>  	  case OMP_CLAUSE_USE_DEVICE_PTR:
>  	  case OMP_CLAUSE_IS_DEVICE_PTR:
>  	    var = OMP_CLAUSE_DECL (c);
> -	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR)
> +	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR
> +		|| OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE)
>  	      x = build_sender_ref (var, ctx);
>  	    else
>  	      x = build_receiver_ref (var, false, ctx);
> @@ -16761,6 +16796,7 @@ make_gimple_omp_edges (basic_block bb, struct omp_region **region,
>  	case GF_OMP_TARGET_KIND_OACC_PARALLEL:
>  	case GF_OMP_TARGET_KIND_OACC_KERNELS:
>  	case GF_OMP_TARGET_KIND_OACC_DATA:
> +	case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
>  	  break;
>  	case GF_OMP_TARGET_KIND_UPDATE:
>  	case GF_OMP_TARGET_KIND_ENTER_DATA:

	Jakub
Julian Brown Nov. 19, 2015, 2:26 p.m. UTC | #2
On Thu, 19 Nov 2015 14:13:45 +0100
Jakub Jelinek <jakub@redhat.com> wrote:

> On Wed, Nov 18, 2015 at 12:47:47PM +0000, Julian Brown wrote:
> 
> The FE/gimplifier part is okay, but I really don't like the
> omp-low.c changes, mostly the *lookup_decl_in_outer_ctx* changes.
> If I count well, we have right now 27 maybe_lookup_decl_in_outer_ctx
> callers and 7 lookup_decl_in_outer_ctx callers, you want to change
> behavior of 1 maybe_lookup_decl_in_outer_ctx and 1
> lookup_decl_in_outer_ctx.  Why exactly those 2 and not the others?

The not-very-good reason is that those are the merely the places that
allowed the supplied examples to work, and I'm wary of changing other
code that I don't understand very well.

> What are the exact rules (what does the standard say about it)?
> I'd expect that all phases (scan_sharing_clauses, lower_omp* and
> expand_omp*) should agree on the same behavior, otherwise I can't see
> how it can work properly.

OK, thanks -- as to what the standard says, it's so ill-specified in
this area that nothing can be learned about the behaviour of offloaded
regions within host_data constructs, and my question about that on the
technical mailing list is still unanswered (actually Nathan suggested
in private mail that the conservative thing to do would be to disallow
offloaded regions entirely within host_data constructs, so maybe that's
the way to go).

OpenMP 4.5 seems to *not* specify the skipping-over behaviour for
use_device_ptr variables (p105, lines 20-23):

"The is_device_ptr clause is used to indicate that a list item is a
device pointer already in the device data environment and that it
should be used directly. Support for device pointers created outside
of OpenMP, specifically outside of the omp_target_alloc routine and the
use_device_ptr clause, is implementation defined."

That suggests that use_device_ptr is a valid way to create device
pointers for use in enclosed target regions: the behaviour I assumed
was wrong for OpenACC. So I think my guess at the "most-obvious"
behaviour was probably misguided anyway.

It's maybe even more complicated. Consider the example:

char x[1024];

#pragma acc enter data copyin(x)

#pragma acc host_data use_device(x)
{
  target_primitive(x);
  #pragma acc parallel present(x)    [1]
  {
    x[5] = 0;                        [2]
  }
}

Here, the "present" clause marked [1] will fail (because 'x' is a
target pointer now). If it's omitted, the array access [2] will cause an
implicit present_or_copy to be used for the 'x' pointer (which again
will fail, because now 'x' points to target data). Maybe what we
actually need is,

#pragma acc host_data use_device(x)
{
  target_primitive(x);
  #pragma acc parallel deviceptr(x)
  {
    ...
  }
}

with the deviceptr(x) clause magically substituted in the parallel
construct, but I'm struggling to see how we could justify doing that
when that behaviour's not mentioned in the spec at all.

Aha, so: maybe manually using deviceptr(x) is implicitly mandatory in
this situation, and missing it out should be an error? That suddenly
seems to make most sense. I'll see about fixing the patch to do that.

Julian
Jakub Jelinek Nov. 19, 2015, 3:57 p.m. UTC | #3
On Thu, Nov 19, 2015 at 02:26:50PM +0000, Julian Brown wrote:
> OK, thanks -- as to what the standard says, it's so ill-specified in
> this area that nothing can be learned about the behaviour of offloaded
> regions within host_data constructs, and my question about that on the
> technical mailing list is still unanswered (actually Nathan suggested
> in private mail that the conservative thing to do would be to disallow
> offloaded regions entirely within host_data constructs, so maybe that's
> the way to go).
> 
> OpenMP 4.5 seems to *not* specify the skipping-over behaviour for
> use_device_ptr variables (p105, lines 20-23):
> 
> "The is_device_ptr clause is used to indicate that a list item is a
> device pointer already in the device data environment and that it
> should be used directly. Support for device pointers created outside
> of OpenMP, specifically outside of the omp_target_alloc routine and the
> use_device_ptr clause, is implementation defined."
> 
> That suggests that use_device_ptr is a valid way to create device
> pointers for use in enclosed target regions: the behaviour I assumed
> was wrong for OpenACC. So I think my guess at the "most-obvious"
> behaviour was probably misguided anyway.

use_device_ptr kind of privatizes the variable, the private variable being
the device pointer corresponding to the host pointer outside of the target
data with use_device_ptr clause.

And, if you want to use that device pointer in a target region, it should be
on the is_device_ptr clause on the target construct.  See e.g.
libgomp.c/target-18.c testcase.
  int a[4];
...
  #pragma omp target data map(to:a)
  #pragma omp target data use_device_ptr(a) map(from:err)
  #pragma omp target is_device_ptr(a) private(i) map(from:err)
  {
    err = 0;
    for (i = 0; i < 4; i++)
      if (a[i] != 23 + i)
        err = 1;
  }
The implementation has this way a choice how to implement device pointers
(what use_device_ptr gives you, or say omp_target_alloc returns)
- either (GCC's choice at least for the XeonPhi and hopefully PTX, HSA does
not care, as it shares address space) implement them as host pointer
encoding the bits the target device wants to use, or some kind of
descriptor.  In the former case, is_device_ptr is essentially a
firstprivate, you bitwise copy the device pointer from the host to target
device, where you can dereference it etc.  In the descriptor case you'd
do some transformation of the host side representation of the device pointer
to the device side.

> 
> It's maybe even more complicated. Consider the example:
> 
> char x[1024];
> 
> #pragma acc enter data copyin(x)
> 
> #pragma acc host_data use_device(x)
> {
>   target_primitive(x);
>   #pragma acc parallel present(x)    [1]
>   {
>     x[5] = 0;                        [2]
>   }
> }

If it is unclear, I think disallowing acc {parallel,kernels} inside of
acc host_data might be too big hammer, but perhaps just erroring out
or warning during gimplification that if you (explicitly or implicitly)
try to map a var that is in use_device clause in some outer context,
it is either wrong, unsupported or will not do what users think?

I will double check on omp-lang, but supposedly we could for OpenMP
warn in similar cases (use_device_ptr clause instead of use_device),
except when it is passed to is_device_ptr clause, because I think the
behavior is just unspecified otherwise.
> 
> Here, the "present" clause marked [1] will fail (because 'x' is a
> target pointer now). If it's omitted, the array access [2] will cause an
> implicit present_or_copy to be used for the 'x' pointer (which again
> will fail, because now 'x' points to target data). Maybe what we
> actually need is,
> 
> #pragma acc host_data use_device(x)
> {
>   target_primitive(x);
>   #pragma acc parallel deviceptr(x)
>   {
>     ...
>   }
> }
> 
> with the deviceptr(x) clause magically substituted in the parallel
> construct, but I'm struggling to see how we could justify doing that
> when that behaviour's not mentioned in the spec at all.

Is deviceptr as above meant to work?  That is the OpenACC counterpart
of is_device_ptr, right?  If yes, then I'd suggest just warning if you
try to implicitly or explicitly map something use_device in outer contexts,
and just make sure you don't ICE on the cases where you warn.
If the standard does not say what it means, then it is unspecified
behavior...

	Jakub
diff mbox

Patch

commit 0201a5927c380da65d6400afad4a0e277fb85786
Author: Julian Brown <julian@codesourcery.com>
Date:   Mon Nov 2 06:31:47 2015 -0800

    OpenACC host_data support using mapping regions.

diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c
index 12c3e75..56cf697 100644
--- a/gcc/c-family/c-pragma.c
+++ b/gcc/c-family/c-pragma.c
@@ -1251,6 +1251,7 @@  static const struct omp_pragma_def oacc_pragmas[] = {
   { "declare", PRAGMA_OACC_DECLARE },
   { "enter", PRAGMA_OACC_ENTER_DATA },
   { "exit", PRAGMA_OACC_EXIT_DATA },
+  { "host_data", PRAGMA_OACC_HOST_DATA },
   { "kernels", PRAGMA_OACC_KERNELS },
   { "loop", PRAGMA_OACC_LOOP },
   { "parallel", PRAGMA_OACC_PARALLEL },
diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h
index 999ac67..dd246b9 100644
--- a/gcc/c-family/c-pragma.h
+++ b/gcc/c-family/c-pragma.h
@@ -33,6 +33,7 @@  enum pragma_kind {
   PRAGMA_OACC_DECLARE,
   PRAGMA_OACC_ENTER_DATA,
   PRAGMA_OACC_EXIT_DATA,
+  PRAGMA_OACC_HOST_DATA,
   PRAGMA_OACC_KERNELS,
   PRAGMA_OACC_LOOP,
   PRAGMA_OACC_PARALLEL,
@@ -167,6 +168,7 @@  enum pragma_omp_clause {
   PRAGMA_OACC_CLAUSE_SELF,
   PRAGMA_OACC_CLAUSE_SEQ,
   PRAGMA_OACC_CLAUSE_TILE,
+  PRAGMA_OACC_CLAUSE_USE_DEVICE,
   PRAGMA_OACC_CLAUSE_VECTOR,
   PRAGMA_OACC_CLAUSE_VECTOR_LENGTH,
   PRAGMA_OACC_CLAUSE_WAIT,
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 7b10764..0a5c8bb 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -10267,6 +10267,8 @@  c_parser_omp_clause_name (c_parser *parser)
 	    result = PRAGMA_OMP_CLAUSE_UNTIED;
 	  else if (!strcmp ("use_device_ptr", p))
 	    result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR;
+	  else if (!strcmp ("use_device", p))
+	    result = PRAGMA_OACC_CLAUSE_USE_DEVICE;
 	  break;
 	case 'v':
 	  if (!strcmp ("vector", p))
@@ -11619,6 +11621,15 @@  c_parser_oacc_clause_tile (c_parser *parser, tree list)
   return c;
 }
 
+/* OpenACC 2.0:
+   use_device ( variable-list ) */
+
+static tree
+c_parser_oacc_clause_use_device (c_parser *parser, tree list)
+{
+  return c_parser_omp_var_list_parens (parser, OMP_CLAUSE_USE_DEVICE, list);
+}
+
 /* OpenACC:
    wait ( int-expr-list ) */
 
@@ -12928,6 +12939,10 @@  c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask,
 	  clauses = c_parser_oacc_data_clause (parser, c_kind, clauses);
 	  c_name = "self";
 	  break;
+	case PRAGMA_OACC_CLAUSE_USE_DEVICE:
+	  clauses = c_parser_oacc_clause_use_device (parser, clauses);
+	  c_name = "use_device";
+	  break;
 	case PRAGMA_OACC_CLAUSE_SEQ:
 	  clauses = c_parser_oacc_simple_clause (parser, OMP_CLAUSE_SEQ,
 						clauses);
@@ -13577,6 +13592,29 @@  c_parser_oacc_enter_exit_data (c_parser *parser, bool enter)
 
 
 /* OpenACC 2.0:
+   # pragma acc host_data oacc-data-clause[optseq] new-line
+     structured-block
+*/
+
+#define OACC_HOST_DATA_CLAUSE_MASK					\
+	( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) )
+
+static tree
+c_parser_oacc_host_data (location_t loc, c_parser *parser)
+{
+  tree stmt, clauses, block;
+
+  clauses = c_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK,
+				       "#pragma acc host_data");
+
+  block = c_begin_omp_parallel ();
+  add_stmt (c_parser_omp_structured_block (parser));
+  stmt = c_finish_oacc_host_data (loc, clauses, block);
+  return stmt;
+}
+
+
+/* OpenACC 2.0:
 
    # pragma acc loop oacc-loop-clause[optseq] new-line
      structured-block
@@ -16884,6 +16922,9 @@  c_parser_omp_construct (c_parser *parser)
     case PRAGMA_OACC_DATA:
       stmt = c_parser_oacc_data (loc, parser);
       break;
+    case PRAGMA_OACC_HOST_DATA:
+      stmt = c_parser_oacc_host_data (loc, parser);
+      break;
     case PRAGMA_OACC_KERNELS:
     case PRAGMA_OACC_PARALLEL:
       strcpy (p_name, "#pragma acc");
diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h
index 6bc216a..848131e 100644
--- a/gcc/c/c-tree.h
+++ b/gcc/c/c-tree.h
@@ -653,6 +653,7 @@  extern tree c_finish_goto_ptr (location_t, tree);
 extern tree c_expr_to_decl (tree, bool *, bool *);
 extern tree c_finish_omp_construct (location_t, enum tree_code, tree, tree);
 extern tree c_finish_oacc_data (location_t, tree, tree);
+extern tree c_finish_oacc_host_data (location_t, tree, tree);
 extern tree c_begin_omp_parallel (void);
 extern tree c_finish_omp_parallel (location_t, tree, tree);
 extern tree c_begin_omp_task (void);
diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index c18c307..837775b 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -11597,6 +11597,25 @@  c_finish_oacc_data (location_t loc, tree clauses, tree block)
   return add_stmt (stmt);
 }
 
+/* Generate OACC_HOST_DATA, with CLAUSES and BLOCK as its compound
+   statement.  LOC is the location of the OACC_HOST_DATA.  */
+
+tree
+c_finish_oacc_host_data (location_t loc, tree clauses, tree block)
+{
+  tree stmt;
+
+  block = c_end_compound_stmt (loc, block, true);
+
+  stmt = make_node (OACC_HOST_DATA);
+  TREE_TYPE (stmt) = void_type_node;
+  OACC_HOST_DATA_CLAUSES (stmt) = clauses;
+  OACC_HOST_DATA_BODY (stmt) = block;
+  SET_EXPR_LOCATION (stmt, loc);
+
+  return add_stmt (stmt);
+}
+
 /* Like c_begin_compound_stmt, except force the retention of the BLOCK.  */
 
 tree
@@ -13040,6 +13059,7 @@  c_finish_omp_clauses (tree clauses, bool is_omp, bool declare_simd)
 	  bitmap_set_bit (&map_head, DECL_UID (t));
 	  goto check_dup_generic;
 
+	case OMP_CLAUSE_USE_DEVICE:
 	case OMP_CLAUSE_IS_DEVICE_PTR:
 	case OMP_CLAUSE_USE_DEVICE_PTR:
 	  t = OMP_CLAUSE_DECL (c);
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 160bf1e..2300220 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -6349,6 +6349,7 @@  extern void finish_omp_threadprivate		(tree);
 extern tree begin_omp_structured_block		(void);
 extern tree finish_omp_structured_block		(tree);
 extern tree finish_oacc_data			(tree, tree);
+extern tree finish_oacc_host_data		(tree, tree);
 extern tree finish_omp_construct		(enum tree_code, tree, tree);
 extern tree begin_omp_parallel			(void);
 extern tree finish_omp_parallel			(tree, tree);
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 0e1116b..462aef7 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -29230,6 +29230,8 @@  cp_parser_omp_clause_name (cp_parser *parser)
 	    result = PRAGMA_OMP_CLAUSE_UNTIED;
 	  else if (!strcmp ("use_device_ptr", p))
 	    result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR;
+	  else if (!strcmp ("use_device", p))
+	    result = PRAGMA_OACC_CLAUSE_USE_DEVICE;
 	  break;
 	case 'v':
 	  if (!strcmp ("vector", p))
@@ -31596,6 +31598,11 @@  cp_parser_oacc_all_clauses (cp_parser *parser, omp_clause_mask mask,
 	  clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses);
 	  c_name = "self";
 	  break;
+	case PRAGMA_OACC_CLAUSE_USE_DEVICE:
+	  clauses = cp_parser_omp_var_list (parser, OMP_CLAUSE_USE_DEVICE,
+					    clauses);
+	  c_name = "use_device";
+	  break;
 	case PRAGMA_OACC_CLAUSE_SEQ:
 	  clauses = cp_parser_oacc_simple_clause (parser, OMP_CLAUSE_SEQ,
 						 clauses, here);
@@ -34507,6 +34514,30 @@  cp_parser_oacc_data (cp_parser *parser, cp_token *pragma_tok)
   return stmt;
 }
 
+#define OACC_HOST_DATA_CLAUSE_MASK					\
+  ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) )
+
+/* OpenACC 2.0:
+  # pragma acc host_data <clauses> new-line
+  structured-block  */
+
+static tree
+cp_parser_oacc_host_data (cp_parser *parser, cp_token *pragma_tok)
+{
+  tree stmt, clauses, block;
+  unsigned int save;
+
+  clauses = cp_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK,
+					"#pragma acc host_data", pragma_tok);
+
+  block = begin_omp_parallel ();
+  save = cp_parser_begin_omp_structured_block (parser);
+  cp_parser_statement (parser, NULL_TREE, false, NULL);
+  cp_parser_end_omp_structured_block (parser, save);
+  stmt = finish_oacc_host_data (clauses, block);
+  return stmt;
+}
+
 /* OpenACC 2.0:
    # pragma acc declare oacc-data-clause[optseq] new-line
 */
@@ -35926,6 +35957,9 @@  cp_parser_omp_construct (cp_parser *parser, cp_token *pragma_tok)
     case PRAGMA_OACC_EXIT_DATA:
       stmt = cp_parser_oacc_enter_exit_data (parser, pragma_tok, false);
       break;
+    case PRAGMA_OACC_HOST_DATA:
+      stmt = cp_parser_oacc_host_data (parser, pragma_tok);
+      break;
     case PRAGMA_OACC_KERNELS:
     case PRAGMA_OACC_PARALLEL:
       strcpy (p_name, "#pragma acc");
@@ -36504,6 +36538,7 @@  cp_parser_pragma (cp_parser *parser, enum pragma_context context)
     case PRAGMA_OACC_DATA:
     case PRAGMA_OACC_ENTER_DATA:
     case PRAGMA_OACC_EXIT_DATA:
+    case PRAGMA_OACC_HOST_DATA:
     case PRAGMA_OACC_KERNELS:
     case PRAGMA_OACC_PARALLEL:
     case PRAGMA_OACC_LOOP:
diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index e7e5d8e..3bb6184 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -6835,6 +6835,7 @@  finish_omp_clauses (tree clauses, bool allow_fields, bool declare_simd)
 	    }
 	  break;
 
+	case OMP_CLAUSE_USE_DEVICE:
 	case OMP_CLAUSE_IS_DEVICE_PTR:
 	case OMP_CLAUSE_USE_DEVICE_PTR:
 	  field_ok = allow_fields;
@@ -7390,6 +7391,24 @@  finish_oacc_data (tree clauses, tree block)
   return add_stmt (stmt);
 }
 
+/* Generate OACC_HOST_DATA, with CLAUSES and BLOCK as its compound
+   statement.  */
+
+tree
+finish_oacc_host_data (tree clauses, tree block)
+{
+  tree stmt;
+
+  block = finish_omp_structured_block (block);
+
+  stmt = make_node (OACC_HOST_DATA);
+  TREE_TYPE (stmt) = void_type_node;
+  OACC_HOST_DATA_CLAUSES (stmt) = clauses;
+  OACC_HOST_DATA_BODY (stmt) = block;
+
+  return add_stmt (stmt);
+}
+
 /* Generate OMP construct CODE, with BODY and CLAUSES as its compound
    statement.  */
 
diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c
index 7764201..f1abf5c 100644
--- a/gcc/gimple-pretty-print.c
+++ b/gcc/gimple-pretty-print.c
@@ -1356,6 +1356,9 @@  dump_gimple_omp_target (pretty_printer *buffer, gomp_target *gs,
     case GF_OMP_TARGET_KIND_OACC_DECLARE:
       kind = " oacc_declare";
       break;
+    case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+      kind = " oacc_host_data";
+      break;
     default:
       gcc_unreachable ();
     }
diff --git a/gcc/gimple.h b/gcc/gimple.h
index 6eb22de..3e9fb2e 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -171,6 +171,7 @@  enum gf_mask {
     GF_OMP_TARGET_KIND_OACC_UPDATE = 8,
     GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA = 9,
     GF_OMP_TARGET_KIND_OACC_DECLARE = 10,
+    GF_OMP_TARGET_KIND_OACC_HOST_DATA = 11,
 
     /* True on an GIMPLE_OMP_RETURN statement if the return does not require
        a thread synchronization via some sort of barrier.  The exact barrier
@@ -6003,6 +6004,7 @@  is_gimple_omp_oacc (const gimple *stmt)
 	case GF_OMP_TARGET_KIND_OACC_UPDATE:
 	case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
 	case GF_OMP_TARGET_KIND_OACC_DECLARE:
+	case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
 	  return true;
 	default:
 	  return false;
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index a3ed378..cedc485 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -6414,6 +6414,7 @@  gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
       case OMP_TARGET_DATA:
       case OMP_TARGET_ENTER_DATA:
       case OMP_TARGET_EXIT_DATA:
+      case OACC_HOST_DATA:
 	ctx->target_firstprivatize_array_bases = true;
       default:
 	break;
@@ -6679,6 +6680,7 @@  gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 	    case OMP_TARGET_DATA:
 	    case OMP_TARGET_ENTER_DATA:
 	    case OMP_TARGET_EXIT_DATA:
+	    case OACC_HOST_DATA:
 	      if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER
 		  || (OMP_CLAUSE_MAP_KIND (c)
 		      == GOMP_MAP_FIRSTPRIVATE_REFERENCE))
@@ -7088,6 +7090,7 @@  gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 	    }
 	  goto do_notice;
 
+	case OMP_CLAUSE_USE_DEVICE:
 	case OMP_CLAUSE_USE_DEVICE_PTR:
 	  flags = GOVD_FIRSTPRIVATE | GOVD_EXPLICIT;
 	  goto do_add;
@@ -7323,7 +7326,6 @@  gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 	  break;
 
 	case OMP_CLAUSE_DEVICE_RESIDENT:
-	case OMP_CLAUSE_USE_DEVICE:
 	  remove = true;
 	  break;
 
@@ -9196,6 +9198,9 @@  gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p)
     case OMP_TEAMS:
       ort = OMP_TEAMS_COMBINED (expr) ? ORT_COMBINED_TEAMS : ORT_TEAMS;
       break;
+    case OACC_HOST_DATA:
+      ort = ORT_TARGET_DATA;
+      break;
     default:
       gcc_unreachable ();
     }
@@ -9217,6 +9222,7 @@  gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p)
 	  switch (TREE_CODE (expr))
 	    {
 	    case OACC_DATA:
+	    case OACC_HOST_DATA:
 	      end_ix = BUILT_IN_GOACC_DATA_END;
 	      break;
 	    case OMP_TARGET_DATA:
@@ -9248,6 +9254,10 @@  gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p)
       stmt = gimple_build_omp_target (body, GF_OMP_TARGET_KIND_OACC_KERNELS,
 				      OMP_CLAUSES (expr));
       break;
+    case OACC_HOST_DATA:
+      stmt = gimple_build_omp_target (body, GF_OMP_TARGET_KIND_OACC_HOST_DATA,
+				      OMP_CLAUSES (expr));
+      break;
     case OACC_PARALLEL:
       stmt = gimple_build_omp_target (body, GF_OMP_TARGET_KIND_OACC_PARALLEL,
 				      OMP_CLAUSES (expr));
@@ -10357,16 +10367,12 @@  gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 	  ret = GS_ALL_DONE;
 	  break;
 
-	case OACC_HOST_DATA:
-	  sorry ("directive not yet implemented");
-	  ret = GS_ALL_DONE;
-	  break;
-
 	case OACC_DECLARE:
 	  gimplify_oacc_declare (expr_p, pre_p);
 	  ret = GS_ALL_DONE;
 	  break;
 
+	case OACC_HOST_DATA:
 	case OACC_DATA:
 	case OACC_KERNELS:
 	case OACC_PARALLEL:
diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def
index d540dab..35f5014 100644
--- a/gcc/omp-builtins.def
+++ b/gcc/omp-builtins.def
@@ -47,6 +47,8 @@  DEF_GOACC_BUILTIN (BUILT_IN_GOACC_UPDATE, "GOACC_update",
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait",
 		   BT_FN_VOID_INT_INT_VAR,
 		   ATTR_NOTHROW_LIST)
+DEF_GOACC_BUILTIN (BUILT_IN_GOACC_HOST_DATA, "GOACC_host_data",
+		   BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST)
 
 DEF_GOACC_BUILTIN_COMPILER (BUILT_IN_ACC_ON_DEVICE, "acc_on_device",
 			    BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST)
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 830db75..756ea5a 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -390,8 +390,8 @@  scan_omp_op (tree *tp, omp_context *ctx)
 }
 
 static void lower_omp (gimple_seq *, omp_context *);
-static tree lookup_decl_in_outer_ctx (tree, omp_context *);
-static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *);
+static tree lookup_decl_in_outer_ctx (tree, omp_context *, bool = false);
+static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *, bool = false);
 
 /* Find an OMP clause of type KIND within CLAUSES.  */
 
@@ -1935,6 +1935,7 @@  scan_sharing_clauses (tree clauses, omp_context *ctx)
 	  install_var_local (decl, ctx);
 	  break;
 
+	case OMP_CLAUSE_USE_DEVICE:
 	case OMP_CLAUSE_USE_DEVICE_PTR:
 	  decl = OMP_CLAUSE_DECL (c);
 	  if (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
@@ -2137,7 +2138,6 @@  scan_sharing_clauses (tree clauses, omp_context *ctx)
 	  break;
 
 	case OMP_CLAUSE_DEVICE_RESIDENT:
-	case OMP_CLAUSE_USE_DEVICE:
 	case OMP_CLAUSE__CACHE_:
 	  sorry ("Clause not supported yet");
 	  break;
@@ -2288,6 +2288,7 @@  scan_sharing_clauses (tree clauses, omp_context *ctx)
 	case OMP_CLAUSE_SIMD:
 	case OMP_CLAUSE_NOGROUP:
 	case OMP_CLAUSE_DEFAULTMAP:
+	case OMP_CLAUSE_USE_DEVICE:
 	case OMP_CLAUSE_USE_DEVICE_PTR:
 	case OMP_CLAUSE__CILK_FOR_COUNT_:
 	case OMP_CLAUSE_ASYNC:
@@ -2305,7 +2306,6 @@  scan_sharing_clauses (tree clauses, omp_context *ctx)
 	  break;
 
 	case OMP_CLAUSE_DEVICE_RESIDENT:
-	case OMP_CLAUSE_USE_DEVICE:
 	case OMP_CLAUSE__CACHE_:
 	  sorry ("Clause not supported yet");
 	  break;
@@ -3608,6 +3608,8 @@  check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx)
 	    case GF_OMP_TARGET_KIND_OACC_UPDATE: stmt_name = "update"; break;
 	    case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
 	      stmt_name = "enter/exit data"; break;
+	    case GF_OMP_TARGET_KIND_OACC_HOST_DATA: stmt_name = "host_data";
+	      break;
 	    default: gcc_unreachable ();
 	    }
 	  switch (gimple_omp_target_kind (ctx->stmt))
@@ -3619,6 +3621,8 @@  check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx)
 	    case GF_OMP_TARGET_KIND_OACC_KERNELS:
 	      ctx_stmt_name = "kernels"; break;
 	    case GF_OMP_TARGET_KIND_OACC_DATA: ctx_stmt_name = "data"; break;
+	    case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+	      ctx_stmt_name = "host_data"; break;
 	    default: gcc_unreachable ();
 	    }
 
@@ -3941,13 +3945,22 @@  maybe_lookup_ctx (gimple *stmt)
     parallelism happens only rarely.  */
 
 static tree
-lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
+lookup_decl_in_outer_ctx (tree decl, omp_context *ctx,
+			  bool skip_hostdata)
 {
   tree t;
   omp_context *up;
 
   for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
-    t = maybe_lookup_decl (decl, up);
+    {
+      if (skip_hostdata
+	  && gimple_code (up->stmt) == GIMPLE_OMP_TARGET
+	  && gimple_omp_target_kind (up->stmt)
+	     == GF_OMP_TARGET_KIND_OACC_HOST_DATA)
+	continue;
+
+      t = maybe_lookup_decl (decl, up);
+    }
 
   gcc_assert (!ctx->is_nested || t || is_global_var (decl));
 
@@ -3959,13 +3972,22 @@  lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
    in outer contexts.  */
 
 static tree
-maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
+maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx,
+				bool skip_hostdata)
 {
   tree t = NULL;
   omp_context *up;
 
   for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
-    t = maybe_lookup_decl (decl, up);
+    {
+      if (skip_hostdata
+	  && gimple_code (up->stmt) == GIMPLE_OMP_TARGET
+	  && gimple_omp_target_kind (up->stmt)
+	     == GF_OMP_TARGET_KIND_OACC_HOST_DATA)
+	continue;
+
+      t = maybe_lookup_decl (decl, up);
+    }
 
   return t ? t : decl;
 }
@@ -12499,6 +12521,7 @@  expand_omp_target (struct omp_region *region)
       break;
     case GF_OMP_TARGET_KIND_DATA:
     case GF_OMP_TARGET_KIND_OACC_DATA:
+    case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
       data_region = true;
       break;
     default:
@@ -12742,6 +12765,9 @@  expand_omp_target (struct omp_region *region)
     case GF_OMP_TARGET_KIND_OACC_DECLARE:
       start_ix = BUILT_IN_GOACC_DECLARE;
       break;
+    case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+      start_ix = BUILT_IN_GOACC_HOST_DATA;
+      break;
     default:
       gcc_unreachable ();
     }
@@ -12866,6 +12892,7 @@  expand_omp_target (struct omp_region *region)
     case BUILT_IN_GOACC_DATA_START:
     case BUILT_IN_GOACC_DECLARE:
     case BUILT_IN_GOMP_TARGET_DATA:
+    case BUILT_IN_GOACC_HOST_DATA:
       break;
     case BUILT_IN_GOMP_TARGET:
     case BUILT_IN_GOMP_TARGET_UPDATE:
@@ -13173,6 +13200,7 @@  build_omp_regions_1 (basic_block bb, struct omp_region *parent,
 		case GF_OMP_TARGET_KIND_OACC_PARALLEL:
 		case GF_OMP_TARGET_KIND_OACC_KERNELS:
 		case GF_OMP_TARGET_KIND_OACC_DATA:
+		case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
 		  break;
 		case GF_OMP_TARGET_KIND_UPDATE:
 		case GF_OMP_TARGET_KIND_ENTER_DATA:
@@ -14972,6 +15000,7 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
       break;
     case GF_OMP_TARGET_KIND_DATA:
     case GF_OMP_TARGET_KIND_OACC_DATA:
+    case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
       data_region = true;
       break;
     default:
@@ -15079,7 +15108,8 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 	  {
 	    if (TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE)
 	      {
-		if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx))
+		if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx,
+								   true))
 		    && varpool_node::get_create (var)->offloadable)
 		  continue;
 
@@ -15178,6 +15208,7 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 	  }
 	break;
 
+      case OMP_CLAUSE_USE_DEVICE:
       case OMP_CLAUSE_USE_DEVICE_PTR:
       case OMP_CLAUSE_IS_DEVICE_PTR:
 	var = OMP_CLAUSE_DECL (c);
@@ -15316,7 +15347,7 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 	      talign = DECL_ALIGN_UNIT (ovar);
 	    if (nc)
 	      {
-		var = lookup_decl_in_outer_ctx (ovar, ctx);
+		var = lookup_decl_in_outer_ctx (ovar, ctx, true);
 		x = build_sender_ref (ovar, ctx);
 
 		if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
@@ -15563,12 +15594,14 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 				    build_int_cstu (tkind_type, tkind));
 	    break;
 
+	  case OMP_CLAUSE_USE_DEVICE:
 	  case OMP_CLAUSE_USE_DEVICE_PTR:
 	  case OMP_CLAUSE_IS_DEVICE_PTR:
 	    ovar = OMP_CLAUSE_DECL (c);
 	    var = lookup_decl_in_outer_ctx (ovar, ctx);
 	    x = build_sender_ref (ovar, ctx);
-	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR)
+	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR
+		|| OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE)
 	      tkind = GOMP_MAP_USE_DEVICE_PTR;
 	    else
 	      tkind = GOMP_MAP_FIRSTPRIVATE_INT;
@@ -15771,10 +15804,12 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 				     gimple_build_assign (new_var, x));
 	      }
 	    break;
+	  case OMP_CLAUSE_USE_DEVICE:
 	  case OMP_CLAUSE_USE_DEVICE_PTR:
 	  case OMP_CLAUSE_IS_DEVICE_PTR:
 	    var = OMP_CLAUSE_DECL (c);
-	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR)
+	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR
+		|| OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE)
 	      x = build_sender_ref (var, ctx);
 	    else
 	      x = build_receiver_ref (var, false, ctx);
@@ -16761,6 +16796,7 @@  make_gimple_omp_edges (basic_block bb, struct omp_region **region,
 	case GF_OMP_TARGET_KIND_OACC_PARALLEL:
 	case GF_OMP_TARGET_KIND_OACC_KERNELS:
 	case GF_OMP_TARGET_KIND_OACC_DATA:
+	case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
 	  break;
 	case GF_OMP_TARGET_KIND_UPDATE:
 	case GF_OMP_TARGET_KIND_ENTER_DATA:
diff --git a/gcc/tree-nested.c b/gcc/tree-nested.c
index 1f6311c..7579cb6 100644
--- a/gcc/tree-nested.c
+++ b/gcc/tree-nested.c
@@ -1072,6 +1072,7 @@  convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_SHARED:
 	case OMP_CLAUSE_TO_DECLARE:
 	case OMP_CLAUSE_LINK:
+	case OMP_CLAUSE_USE_DEVICE:
 	case OMP_CLAUSE_USE_DEVICE_PTR:
 	case OMP_CLAUSE_IS_DEVICE_PTR:
 	do_decl_clause:
@@ -1719,6 +1720,7 @@  convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_SHARED:
 	case OMP_CLAUSE_TO_DECLARE:
 	case OMP_CLAUSE_LINK:
+	case OMP_CLAUSE_USE_DEVICE:
 	case OMP_CLAUSE_USE_DEVICE_PTR:
 	case OMP_CLAUSE_IS_DEVICE_PTR:
 	do_decl_clause:
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index 4d42c42..ea9344d 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -394,6 +394,7 @@  GOACC_2.0.1 {
   global:
 	GOACC_declare;
 	GOACC_parallel_keyed;
+	GOACC_host_data;
 } GOACC_2.0;
 
 GOMP_PLUGIN_1.0 {
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index a80ede4..db7cab3 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -490,6 +490,46 @@  GOACC_wait (int async, int num_waits, ...)
     goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval);
 }
 
+void
+GOACC_host_data (int device, size_t mapnum,
+		 void **hostaddrs, size_t *sizes, unsigned short *kinds)
+{
+  bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
+  struct target_mem_desc *tgt;
+
+#ifdef HAVE_INTTYPES_H
+  gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
+	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
+#else
+  gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
+	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
+#endif
+
+  goacc_lazy_initialize ();
+
+  struct goacc_thread *thr = goacc_thread ();
+  struct gomp_device_descr *acc_dev = thr->dev;
+
+  /* Host fallback or 'do nothing'.  */
+  if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+      || host_fallback)
+    {
+      tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
+			   GOMP_MAP_VARS_OPENACC);
+      tgt->prev = thr->mapped_data;
+      thr->mapped_data = tgt;
+
+      return;
+    }
+
+  gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
+  tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
+		       GOMP_MAP_VARS_OPENACC);
+  gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
+  tgt->prev = thr->mapped_data;
+  thr->mapped_data = tgt;
+}
+
 int
 GOACC_get_num_threads (void)
 {
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c
new file mode 100644
index 0000000..8dc7c2d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c
@@ -0,0 +1,118 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cublas_v2.h>
+
+void
+saxpy_host (int n, float a, float *x, float *y)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+    y[i] = y[i] + a * x[i];
+}
+
+#pragma acc routine
+void
+saxpy_target (int n, float a, float *x, float *y)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+    y[i] = y[i] + a * x[i];
+}
+
+int
+main(int argc, char **argv)
+{
+#define N 8
+  int i;
+  float x_ref[N], y_ref[N];
+  float x[N], y[N];
+  cublasHandle_t h;
+  float a = 2.0;
+
+#pragma acc data copyin (x[0:N]) copy (y[0:N])
+  {
+    float *xp, *yp;
+#pragma acc host_data use_device (x, y)
+    {
+#pragma acc parallel pcopy (xp, yp)
+      {
+        xp = x;
+	yp = y;
+      }
+    }
+
+    if (xp != acc_deviceptr (x) || yp != acc_deviceptr (y))
+      abort ();
+  }
+
+  for (i = 0; i < N; i++)
+    {
+      x[i] = x_ref[i] = 4.0 + i;
+      y[i] = y_ref[i] = 3.0;
+    }
+
+  saxpy_host (N, a, x_ref, y_ref);
+
+  cublasCreate (&h);
+
+#pragma acc data copyin (x[0:N]) copy (y[0:N])
+  {
+#pragma acc host_data use_device (x, y)
+    {
+      cublasSaxpy (h, N, &a, x, 1, y, 1);
+    }
+  }
+
+  for (i = 0; i < N; i++)
+    {
+      if (y[i] != y_ref[i])
+        abort ();
+    }
+
+#pragma acc data create (x[0:N]) copyout (y[0:N])
+  {
+#pragma acc kernels
+    for (i = 0; i < N; i++)
+      y[i] = 3.0;
+
+#pragma acc host_data use_device (x, y)
+    {
+      cublasSaxpy (h, N, &a, x, 1, y, 1);
+    }
+  }
+
+  cublasDestroy (h);
+
+  for (i = 0; i < N; i++)
+    {
+      if (y[i] != y_ref[i])
+        abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    y[i] = 3.0;
+
+#pragma acc data copyin (x[0:N]) copyin (a) copy (y[0:N])
+  {
+#pragma acc host_data use_device (x, y)
+    {
+#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a)
+      saxpy_target (N, a, x, y);
+    }
+  }
+
+  for (i = 0; i < N; i++)
+    {
+      if (y[i] != y_ref[i])
+        abort ();
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c
new file mode 100644
index 0000000..614f143
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c
@@ -0,0 +1,31 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+char *global_in_host;
+
+void foo (char *in)
+{
+  if (!acc_is_present (global_in_host, sizeof (*global_in_host))
+      || in != acc_deviceptr (global_in_host))
+    abort ();
+}
+
+int
+main (int argc, char **argv)
+{
+  char mydata[1024];
+
+  global_in_host = mydata;
+
+#pragma acc data copyin(mydata)
+  {
+#pragma acc host_data use_device (mydata)
+    {
+      foo (mydata);
+    }
+  }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-3.c
new file mode 100644
index 0000000..942a01d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-3.c
@@ -0,0 +1,28 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+
+#include <openacc.h>
+#include <stdlib.h>
+
+#define N 1024
+
+int main (int argc, char* argv[])
+{
+  int x[N];
+
+#pragma acc data copyin (x[0:N])
+  {
+    int *xp;
+#pragma acc host_data use_device (x)
+    {
+#pragma acc parallel present (x) copyout (xp)
+      {
+        xp = x;
+      }
+    }
+
+    if (xp != acc_deviceptr (x))
+      abort ();
+  }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-4.c
new file mode 100644
index 0000000..f53fc90
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-4.c
@@ -0,0 +1,29 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+
+#include <openacc.h>
+#include <stdlib.h>
+
+#define N 1024
+
+int main (int argc, char* argv[])
+{
+  int x[N], *xp2;
+
+#pragma acc data copyin (x[0:N])
+  {
+    int *xp;
+#pragma acc host_data use_device (x)
+    {
+#pragma acc data present (x)
+      {
+        xp = x;
+      }
+      xp2 = x;
+    }
+
+    if (xp != acc_deviceptr (x) || xp2 != xp)
+      abort ();
+  }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-5.c
new file mode 100644
index 0000000..82c84a6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-5.c
@@ -0,0 +1,38 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+
+#include <openacc.h>
+#include <stdlib.h>
+
+#define N 1024
+
+int main (int argc, char* argv[])
+{
+  int x[N], y[N], *yp;
+
+  yp = y + 1;
+
+#pragma acc data copyin (x[0:N])
+  {
+    int *xp, *yp2;
+#pragma acc host_data use_device (x)
+    {
+#pragma acc data present (x) copyin (y)
+      {
+#pragma acc host_data use_device (yp)
+	{
+	  xp = x;
+	  yp2 = yp;
+	}
+
+        if (yp2 != acc_deviceptr (yp))
+	  abort ();
+      }
+    }
+
+    if (xp != acc_deviceptr (x))
+      abort ();
+
+  }
+
+  return 0;
+}