[6/8,og9] Make OpenACC function-parameter explosion optional
diff mbox series

Message ID d1a4d60a3bb42efaba6ac417af4116f053d32f93.1564776081.git.julian@codesourcery.com
State New
Headers show
Series
  • AMD GCN offloading support
Related show

Commit Message

Julian Brown Aug. 2, 2019, 8:12 p.m. UTC
This patch adjusts the implementation of function-argument flattening
by Cesar posted (for the og7 branch) here so that it only affects NVPTX:

https://gcc.gnu.org/ml/gcc-patches/2017-12/msg01456.html

Changes made are as follows (briefly):

  * The GOACC_parallel_keyed_v2 libgomp entry point has been removed, in
    favour of using a launch tag (GOMP_LAUNCH_ARGS_EXPLODED) to indicate
    that an offload function should be launched with flattened-out
    arguments (rather than passing all arguments in an array).

  * A new target hook (TARGET_GOACC_EXPLODE_ARGS) has been introduced.  This
    must be implemented in the *host* (not offload) compiler, and returns
    TRUE if offload kernels should be called with flattened-out arguments.

The patch also contains the configury bits to disable building of libffi
for the AMD GCN target, as is required for the build to complete.

Julian

ChangeLog

2019-07-31  Julian Brown  <julian@codesourcery.com>

	* configure.ac (amdgcn*-*-*): Add target-libffi to noconfigdirs for AMD
	GCN.
	* configure: Regenerated.

	gcc/
	* builtin-types.def (BT_FN_VOID_INT_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR):
	Remove.
	* config/i386/i386.c (ix86_goacc_explode_args): New.
	(TARGET_GOACC_EXPLODE_ARGS): Define, using above function.
	* doc/tm.texi: Regenerated.
	* doc/tm.texi.in: Add TARGET_GOACC_EXPLODE_ARGS hook.
	* fortran/types.def (BT_FN_VOID_INT_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR):
	Remove.
	* omp-builtins.def (GOACC_parallel_keyed_v2): Remove.
	* omp-expand.c (expand_omp_target): Use explode_args target hook.
	Use GOMP_LAUNCH_ARGS_EXPLODED launch tag.
	* omp-low.c (build_receiver_ref, build_sender_ref,
	create_omp_child_function, scan_omp_target, lower_omp_target): Use
	explode_args target hook.
	* target.def (explode_args): New target hook.
	* tree-ssa-structalias.c (target.h): Include.
	(find_func_aliases_for_builtin_call): Conditionalise disabling of pass
	for OpenACC parallel regions based on explode_args target hook.  Remove
	'params' from BUILT_IN_GOACC_PARALLEL arguments.
	(find_func_clobbers): Likewise.
	(ipa_pta_execute): Update for removed 'params' argument.

	include/
	* gomp-constants.h (GOMP_LAUNCH_ARGS_EXPLODED): Define.

	libgomp/
	* libgomp.map (GOMP_2.0.GOMP_4_BRANCH): Remove GOACC_parallel_keyed_v2.
	* libgomp_g.h (GOACC_parallel_keyed_v2): Remove prototype.
	* oacc-parallel.c (GOACC_parallel_keyed_internal): Rename to...
	(GOACC_parallel_keyed): ...this.  Handle GOMP_LAUNCH_ARGS_EXPLODED
	launch tag.  Remove previous wrapper functions.
	(GOACC_parallel_keyed_v2): Remove.
---
 ChangeLog.openacc          |  6 +++
 configure                  |  3 ++
 configure.ac               |  3 ++
 gcc/ChangeLog.openacc      | 24 ++++++++++
 gcc/builtin-types.def      |  4 --
 gcc/config/i386/i386.c     | 32 +++++++++++++
 gcc/doc/tm.texi            |  5 ++
 gcc/doc/tm.texi.in         |  2 +
 gcc/fortran/types.def      |  4 --
 gcc/omp-builtins.def       |  4 +-
 gcc/omp-expand.c           | 18 ++++----
 gcc/omp-low.c              | 28 +++++++-----
 gcc/target.def             |  7 +++
 gcc/tree-ssa-structalias.c | 52 +++++++++++++++------
 include/ChangeLog.openacc  |  4 ++
 include/gomp-constants.h   |  1 +
 libgomp/ChangeLog.openacc  |  9 ++++
 libgomp/libgomp.map        |  1 -
 libgomp/libgomp_g.h        |  2 -
 libgomp/oacc-parallel.c    | 93 ++++++++++++++++----------------------
 20 files changed, 201 insertions(+), 101 deletions(-)

Patch
diff mbox series

diff --git a/ChangeLog.openacc b/ChangeLog.openacc
index 1b54affbe80..156a9b9a798 100644
--- a/ChangeLog.openacc
+++ b/ChangeLog.openacc
@@ -1,3 +1,9 @@ 
+2019-07-31  Julian Brown  <julian@codesourcery.com>
+
+	* configure.ac (amdgcn*-*-*): Add target-libffi to noconfigdirs for AMD
+	GCN.
+	* configure: Regenerated.
+
 2018-12-20  Maciej W. Rozycki  <macro@codesourcery.com>
 
 	* Makefile.def (lang_env_dependencies): Disable `cxx' dependency
diff --git a/configure b/configure
index 033929b0ab8..ef00d1f5249 100755
--- a/configure
+++ b/configure
@@ -3466,6 +3466,9 @@  case "${target}" in
   alpha*-*-*vms*)
     noconfigdirs="$noconfigdirs target-libffi"
     ;;
+  amdgcn*-*-*)
+    noconfigdirs="$noconfigdirs target-libffi"
+    ;;
   arm*-*-freebsd*)
     noconfigdirs="$noconfigdirs target-libffi"
     ;;
diff --git a/configure.ac b/configure.ac
index de361880ba7..5184b82f300 100644
--- a/configure.ac
+++ b/configure.ac
@@ -748,6 +748,9 @@  case "${target}" in
   alpha*-*-*vms*)
     noconfigdirs="$noconfigdirs target-libffi"
     ;;
+  amdgcn*-*-*)
+    noconfigdirs="$noconfigdirs target-libffi"
+    ;;
   arm*-*-freebsd*)
     noconfigdirs="$noconfigdirs target-libffi"
     ;;
diff --git a/gcc/ChangeLog.openacc b/gcc/ChangeLog.openacc
index 4a806549d50..0caa1cd1401 100644
--- a/gcc/ChangeLog.openacc
+++ b/gcc/ChangeLog.openacc
@@ -1,3 +1,27 @@ 
+2019-07-31  Julian Brown  <julian@codesourcery.com>
+
+	* builtin-types.def (BT_FN_VOID_INT_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR):
+	Remove.
+	* config/i386/i386.c (ix86_goacc_explode_args): New.
+	(TARGET_GOACC_EXPLODE_ARGS): Define, using above function.
+	* doc/tm.texi: Regenerated.
+	* doc/tm.texi.in: Add TARGET_GOACC_EXPLODE_ARGS hook.
+	* fortran/types.def (BT_FN_VOID_INT_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR):
+	Remove.
+	* omp-builtins.def (GOACC_parallel_keyed_v2): Remove.
+	* omp-expand.c (expand_omp_target): Use explode_args target hook.
+	Use GOMP_LAUNCH_ARGS_EXPLODED launch tag.
+	* omp-low.c (build_receiver_ref, build_sender_ref,
+	create_omp_child_function, scan_omp_target, lower_omp_target): Use
+	explode_args target hook.
+	* target.def (explode_args): New target hook.
+	* tree-ssa-structalias.c (target.h): Include.
+	(find_func_aliases_for_builtin_call): Conditionalise disabling of pass
+	for OpenACC parallel regions based on explode_args target hook.  Remove
+	'params' from BUILT_IN_GOACC_PARALLEL arguments.
+	(find_func_clobbers): Likewise.
+	(ipa_pta_execute): Update for removed 'params' argument.
+
 2019-07-31  Julian Brown  <julian@codesourcery.com>
 	    Andrew Stubbs  <ams@codesourcery.com>
 
diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def
index 9ee86b4957e..e5c9e063c48 100644
--- a/gcc/builtin-types.def
+++ b/gcc/builtin-types.def
@@ -826,10 +826,6 @@  DEF_FUNCTION_TYPE_VAR_6 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
 			 BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE,
 			 BT_PTR, BT_PTR, BT_PTR)
 
-DEF_FUNCTION_TYPE_VAR_7 (BT_FN_VOID_INT_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
-			 BT_VOID, BT_INT, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE,
-			 BT_PTR, BT_PTR, BT_PTR)
-
 DEF_FUNCTION_TYPE_VAR_7 (BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
 			 BT_VOID, BT_INT, BT_SIZE, BT_PTR, BT_PTR,
 			 BT_PTR, BT_INT, BT_INT)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 8a1ffd3769f..bd319e3d1b5 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -51206,6 +51206,35 @@  ix86_push_rounding (poly_int64 bytes)
   return ROUND_UP (bytes, UNITS_PER_WORD);
 }
 
+/* Return TRUE if offloaded OpenACC target-code regions should have their
+   parameters passed as separate function arguments, rather than in an array.
+   This can be a performance win on some (NVidia) GPUs.  */
+
+bool
+ix86_goacc_explode_args (void)
+{
+#ifdef OFFLOAD_TARGETS
+  const char *offload_targets = OFFLOAD_TARGETS;
+  if (strstr (offload_targets, "nvptx"))
+    {
+      if (strchr (offload_targets, ','))
+	{
+	  static bool warned_ptx_args = false;
+	  if (!warned_ptx_args)
+	    {
+	      warning (0, "NVidia PTX parameter-passing optimization disabled "
+		       "with multiple offload targets");
+	      warned_ptx_args = true;
+	    }
+	  return false;
+	}
+      return true;
+    }
+
+#endif
+  return false;
+}
+
 /* Target-specific selftests.  */
 
 #if CHECKING_P
@@ -51981,6 +52010,9 @@  ix86_run_selftests (void)
 #define TARGET_GET_MULTILIB_ABI_NAME \
   ix86_get_multilib_abi_name
 
+#undef TARGET_GOACC_EXPLODE_ARGS
+#define TARGET_GOACC_EXPLODE_ARGS ix86_goacc_explode_args
+
 #if CHECKING_P
 #undef TARGET_RUN_TARGET_SELFTESTS
 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 9f6bf8d190c..9b88498eb95 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6162,6 +6162,11 @@  memories.  A return value of NULL indicates that the target does not
 handle this VAR_DECL, and normal RTL expanding is resumed.
 @end deftypefn
 
+@deftypefn {Target Hook} bool TARGET_GOACC_EXPLODE_ARGS (void)
+Define this hook to TRUE if arguments to offload regions should be
+exploded, i.e. passed as true arguments rather than in an argument array.
+@end deftypefn
+
 @node Anchored Addresses
 @section Anchored Addresses
 @cindex anchored addresses
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index a3ec9702ac8..c9c4341a35f 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4210,6 +4210,8 @@  address;  but often a machine-dependent strategy can generate better code.
 
 @hook TARGET_GOACC_EXPAND_ACCEL_VAR
 
+@hook TARGET_GOACC_EXPLODE_ARGS
+
 @node Anchored Addresses
 @section Anchored Addresses
 @cindex anchored addresses
diff --git a/gcc/fortran/types.def b/gcc/fortran/types.def
index 5c976338ff6..b96e292fc81 100644
--- a/gcc/fortran/types.def
+++ b/gcc/fortran/types.def
@@ -272,7 +272,3 @@  DEF_FUNCTION_TYPE_VAR_7 (BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
 DEF_FUNCTION_TYPE_VAR_6 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
 			  BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE,
 			  BT_PTR, BT_PTR, BT_PTR)
-
-DEF_FUNCTION_TYPE_VAR_7 (BT_FN_VOID_INT_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
-			  BT_VOID, BT_INT, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE,
-			  BT_PTR, BT_PTR, BT_PTR)
diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def
index 99cb8fa336b..9961c287494 100644
--- a/gcc/omp-builtins.def
+++ b/gcc/omp-builtins.def
@@ -38,8 +38,8 @@  DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DATA_END, "GOACC_data_end",
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_ENTER_EXIT_DATA, "GOACC_enter_exit_data",
 		   BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
 		   ATTR_NOTHROW_LIST)
-DEF_GOACC_BUILTIN (BUILT_IN_GOACC_PARALLEL, "GOACC_parallel_keyed_v2",
-		   BT_FN_VOID_INT_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
+DEF_GOACC_BUILTIN (BUILT_IN_GOACC_PARALLEL, "GOACC_parallel_keyed",
+		   BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
 		   ATTR_NOTHROW_LIST)
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_UPDATE, "GOACC_update",
 		   BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
index c93348037a3..1e15edf9593 100644
--- a/gcc/omp-expand.c
+++ b/gcc/omp-expand.c
@@ -7306,11 +7306,11 @@  expand_omp_target (struct omp_region *region)
   gomp_target *entry_stmt;
   gimple *stmt;
   edge e;
-  bool offloaded, data_region, oacc_parallel;
+  bool offloaded, data_region, oacc_explode_args;
 
   entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
   new_bb = region->entry;
-  oacc_parallel = false;
+  oacc_explode_args = false;
 
   offloaded = is_gimple_omp_offloaded (entry_stmt);
   switch (gimple_omp_target_kind (entry_stmt))
@@ -7319,7 +7319,8 @@  expand_omp_target (struct omp_region *region)
     case GF_OMP_TARGET_KIND_OACC_SERIAL:
     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
-      oacc_parallel = true;
+      if (targetm.goacc.explode_args ())
+	oacc_explode_args = true;
       gcc_fallthrough ();
     case GF_OMP_TARGET_KIND_REGION:
     case GF_OMP_TARGET_KIND_UPDATE:
@@ -7406,7 +7407,7 @@  expand_omp_target (struct omp_region *region)
 	 .OMP_DATA_I may have been converted into a different local
 	 variable.  In which case, we need to keep the assignment.  */
       tree data_arg = gimple_omp_target_data_arg (entry_stmt);
-      if (data_arg && !oacc_parallel)
+      if (data_arg && !oacc_explode_args)
 	{
 	  basic_block entry_succ_bb = single_succ (entry_bb);
 	  gimple_stmt_iterator gsi;
@@ -7772,11 +7773,6 @@  expand_omp_target (struct omp_region *region)
     }
   else
     args.quick_push (device);
-  if (start_ix == BUILT_IN_GOACC_PARALLEL)
-    {
-      tree use_params = oacc_parallel ? integer_one_node : integer_zero_node;
-      args.quick_push (use_params);
-    }
   if (offloaded)
     args.quick_push (build_fold_addr_expr (child_fn));
   args.quick_push (t1);
@@ -7885,6 +7881,10 @@  expand_omp_target (struct omp_region *region)
 				    unsigned_type_node, len);
 	    args[t_wait_idx] = len;
 	  }
+
+	if (tagging && oacc_explode_args)
+	  args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ARGS_EXPLODED,
+					    NULL_TREE, 0));
       }
       break;
     default:
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 97c00217d9f..fe911599142 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -562,7 +562,7 @@  build_receiver_ref (tree var, bool by_ref, omp_context *ctx)
 {
   tree x, field = lookup_field (var, ctx);
 
-  if (is_oacc_parallel_or_serial (ctx))
+  if (is_oacc_parallel_or_serial (ctx) && targetm.goacc.explode_args ())
     x = lookup_parm (var, ctx);
   else
     {
@@ -716,7 +716,7 @@  build_sender_ref (tree var, omp_context *ctx)
 static void
 install_parm_decl (tree var, tree type, omp_context *ctx)
 {
-  if (!is_oacc_parallel_or_serial (ctx))
+  if (!is_oacc_parallel_or_serial (ctx) || !targetm.goacc.explode_args ())
     return;
 
   splay_tree_key key = (splay_tree_key) var;
@@ -1932,7 +1932,7 @@  create_omp_child_function (omp_context *ctx, bool task_copy,
   if (task_copy)
     type = build_function_type_list (void_type_node, ptr_type_node,
 				     ptr_type_node, NULL_TREE);
-  else if (is_oacc_parallel_or_serial (ctx))
+  else if (is_oacc_parallel_or_serial (ctx) && targetm.goacc.explode_args ())
     {
       tree *arg_types = (tree *) alloca (sizeof (tree) * map_cnt);
       for (unsigned int i = 0; i < map_cnt; i++)
@@ -2012,7 +2012,7 @@  create_omp_child_function (omp_context *ctx, bool task_copy,
   DECL_CONTEXT (t) = decl;
   DECL_RESULT (decl) = t;
 
-  if (!is_oacc_parallel_or_serial (ctx))
+  if (!is_oacc_parallel_or_serial (ctx) || !targetm.goacc.explode_args ())
     {
       tree data_name = get_identifier (".omp_data_i");
       t = build_decl (DECL_SOURCE_LOCATION (decl), PARM_DECL, data_name,
@@ -2947,7 +2947,7 @@  scan_omp_target (gomp_target *stmt, omp_context *outer_ctx)
   bool base_pointers_restrict = false;
   if (offloaded)
     {
-      if (!is_oacc_parallel_or_serial (ctx))
+      if (!is_oacc_parallel_or_serial (ctx) || !targetm.goacc.explode_args ())
 	{
 	  create_omp_child_function (ctx, false);
 	  gimple_omp_target_set_child_fn (stmt, ctx->cb.dst_fn);
@@ -9826,6 +9826,7 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
   location_t loc = gimple_location (stmt);
   bool offloaded, data_region;
   unsigned int map_cnt = 0, init_cnt = 0;
+  bool oacc_explode_args = targetm.goacc.explode_args ();
 
   offloaded = is_gimple_omp_offloaded (stmt);
   switch (gimple_omp_target_kind (stmt))
@@ -9883,7 +9884,7 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 
   /* Determine init_cnt to finish initialize ctx.  */
 
-  if (is_oacc_parallel_or_serial (ctx))
+  if (is_oacc_parallel_or_serial (ctx) && oacc_explode_args)
     {
       for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
 	switch (OMP_CLAUSE_CODE (c))
@@ -10215,7 +10216,7 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 
   if (offloaded)
     {
-      if (is_oacc_parallel_or_serial (ctx))
+      if (is_oacc_parallel_or_serial (ctx) && oacc_explode_args)
 	gcc_assert (init_cnt == map_cnt);
       target_nesting_level++;
       lower_omp (&tgt_body, ctx);
@@ -10459,7 +10460,8 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 	    if (s == NULL_TREE && is_gimple_omp_oacc (ctx->stmt))
 	      s = integer_one_node;
 	    s = fold_convert (size_type_node, s);
-	    decl_args = append_decl_arg (ovar, decl_args, ctx);
+	    if (oacc_explode_args)
+	      decl_args = append_decl_arg (ovar, decl_args, ctx);
 	    purpose = size_int (map_idx++);
 	    CONSTRUCTOR_APPEND_ELT (vsize, purpose, s);
 	    if (TREE_CODE (s) != INTEGER_CST)
@@ -10601,7 +10603,8 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 	    else
 	      s = TYPE_SIZE_UNIT (TREE_TYPE (ovar));
 	    s = fold_convert (size_type_node, s);
-	    decl_args = append_decl_arg (ovar, decl_args, ctx);
+	    if (oacc_explode_args)
+	      decl_args = append_decl_arg (ovar, decl_args, ctx);
 	    purpose = size_int (map_idx++);
 	    CONSTRUCTOR_APPEND_ELT (vsize, purpose, s);
 	    if (TREE_CODE (s) != INTEGER_CST)
@@ -10674,7 +10677,8 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 				       gimple_build_label (opt_arg_label));
 	      }
 	    s = size_int (0);
-	    decl_args = append_decl_arg (ovar, decl_args, ctx);
+	    if (oacc_explode_args)
+	      decl_args = append_decl_arg (ovar, decl_args, ctx);
 	    purpose = size_int (map_idx++);
 	    CONSTRUCTOR_APPEND_ELT (vsize, purpose, s);
 	    gcc_checking_assert (tkind
@@ -10687,7 +10691,7 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 	  }
 
       gcc_assert (map_idx == map_cnt);
-      if (is_oacc_parallel_or_serial (ctx))
+      if (is_oacc_parallel_or_serial (ctx) && oacc_explode_args)
 	DECL_ARGUMENTS (child_fn) = nreverse (decl_args);
 
       DECL_INITIAL (TREE_VEC_ELT (t, 1))
@@ -10727,7 +10731,7 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
     {
       t = build_fold_addr_expr_loc (loc, ctx->sender_decl);
       /* fixup_child_record_type might have changed receiver_decl's type.  */
-      if (!is_oacc_parallel_or_serial (ctx))
+      if (!is_oacc_parallel_or_serial (ctx) || !oacc_explode_args)
 	{
 	  t = fold_convert_loc (loc, TREE_TYPE (ctx->receiver_decl), t);
 	  gimple_seq_add_stmt (&new_body,
diff --git a/gcc/target.def b/gcc/target.def
index 294af6cb1d6..d26b888a485 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1729,6 +1729,13 @@  handle this VAR_DECL, and normal RTL expanding is resumed.",
 rtx, (tree var),
 NULL)
 
+DEFHOOK
+(explode_args,
+"Define this hook to TRUE if arguments to offload regions should be\n\
+exploded, i.e. passed as true arguments rather than in an argument array.",
+bool, (void),
+hook_bool_void_false)
+
 HOOK_VECTOR_END (goacc)
 
 /* Functions relating to vectorization.  */
diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index 2f29c39565e..ebdf31e241e 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -43,6 +43,8 @@ 
 #include "stringpool.h"
 #include "attribs.h"
 #include "tree-ssa.h"
+#include "target.h"
+#include "gomp-constants.h"
 
 /* The idea behind this analyzer is to generate set constraints from the
    program, then solve the resulting constraints in order to generate the
@@ -4692,10 +4694,10 @@  find_func_aliases_for_builtin_call (struct function *fn, gcall *t)
       case BUILT_IN_GOMP_PARALLEL:
       case BUILT_IN_GOACC_PARALLEL:
 	{
-	  bool oacc_parallel = false;
 	  if (in_ipa_mode)
 	    {
 	      unsigned int fnpos, argpos;
+	      bool oacc_exploded_parallel = false;
 	      switch (DECL_FUNCTION_CODE (fndecl))
 		{
 		case BUILT_IN_GOMP_PARALLEL:
@@ -4706,16 +4708,28 @@  find_func_aliases_for_builtin_call (struct function *fn, gcall *t)
 		case BUILT_IN_GOACC_PARALLEL:
 		  /* __builtin_GOACC_parallel (flags_m, fn, mapnum, hostaddrs,
 					       sizes, kinds, ...).  */
-		  fnpos = 2;
-		  argpos = 4;
-		  oacc_parallel = gimple_call_arg (t, 1) == integer_one_node;
+		  fnpos = 1;
+		  argpos = 3;
+		  if (targetm.goacc.explode_args ())
+		    for (int i = 6; i < gimple_call_num_args (t); i++)
+		      {
+		        tree arg = gimple_call_arg (t, i);
+			if (TREE_CODE (arg) == INTEGER_CST
+			    && (tree_to_shwi (arg)
+				== GOMP_LAUNCH_PACK (GOMP_LAUNCH_ARGS_EXPLODED,
+						     0, 0)))
+			  {
+			    oacc_exploded_parallel = true;
+			    break;
+			  }
+		      }
 		  break;
 		default:
 		  gcc_unreachable ();
 		}
 
-	      if (oacc_parallel)
-		break;
+	      if (oacc_exploded_parallel)
+	        break;
 
 	      tree fnarg = gimple_call_arg (t, fnpos);
 	      gcc_assert (TREE_CODE (fnarg) == ADDR_EXPR);
@@ -5258,7 +5272,7 @@  find_func_clobbers (struct function *fn, gimple *origt)
 	      unsigned int fnpos, argpos;
 	      unsigned int implicit_use_args[2];
 	      unsigned int num_implicit_use_args = 0;
-	      bool oacc_parallel = false;
+	      bool oacc_exploded_parallel = false;
 	      switch (DECL_FUNCTION_CODE (decl))
 		{
 		case BUILT_IN_GOMP_PARALLEL:
@@ -5269,17 +5283,29 @@  find_func_clobbers (struct function *fn, gimple *origt)
 		case BUILT_IN_GOACC_PARALLEL:
 		  /* __builtin_GOACC_parallel (flags_m, fn, mapnum, hostaddrs,
 					       sizes, kinds, ...).  */
-		  fnpos = 2;
-		  argpos = 4;
+		  fnpos = 1;
+		  argpos = 3;
+		  implicit_use_args[num_implicit_use_args++] = 4;
 		  implicit_use_args[num_implicit_use_args++] = 5;
-		  implicit_use_args[num_implicit_use_args++] = 6;
-		  oacc_parallel = gimple_call_arg (t, 1) == integer_one_node;
+		  if (targetm.goacc.explode_args ())
+		    for (int i = 6; i < gimple_call_num_args (t); i++)
+		      {
+		        tree arg = gimple_call_arg (t, i);
+			if (TREE_CODE (arg) == INTEGER_CST
+			    && (tree_to_shwi (arg)
+				== GOMP_LAUNCH_PACK (GOMP_LAUNCH_ARGS_EXPLODED,
+						     0, 0)))
+			  {
+			    oacc_exploded_parallel = true;
+			    break;
+			  }
+		      }
 		  break;
 		default:
 		  gcc_unreachable ();
 		}
 
-	      if (oacc_parallel)
+	      if (oacc_exploded_parallel)
 		break;
 
 	      tree fnarg = gimple_call_arg (t, fnpos);
@@ -8216,7 +8242,7 @@  ipa_pta_execute (void)
 		if (gimple_call_builtin_p (stmt, BUILT_IN_GOMP_PARALLEL))
 		  called_decl = TREE_OPERAND (gimple_call_arg (stmt, 0), 0);
 		else if (gimple_call_builtin_p (stmt, BUILT_IN_GOACC_PARALLEL))
-		  called_decl = TREE_OPERAND (gimple_call_arg (stmt, 2), 0);
+		  called_decl = TREE_OPERAND (gimple_call_arg (stmt, 1), 0);
 
 		if (called_decl != NULL_TREE
 		    && !fndecl_maybe_in_other_partition (called_decl))
diff --git a/include/ChangeLog.openacc b/include/ChangeLog.openacc
index e0584385f43..2400ca54394 100644
--- a/include/ChangeLog.openacc
+++ b/include/ChangeLog.openacc
@@ -1,3 +1,7 @@ 
+2019-07-31  Julian Brown  <julian@codesourcery.com>
+
+	* gomp-constants.h (GOMP_LAUNCH_ARGS_EXPLODED): Define.
+
 2019-07-10  Julian Brown  <julian@codesourcery.com>
 
 	* gomp-constants.h (gomp_map_kind): Add GOMP_MAP_ATTACH_DETACH.
diff --git a/include/gomp-constants.h b/include/gomp-constants.h
index 0e15cfb303e..aae074596f4 100644
--- a/include/gomp-constants.h
+++ b/include/gomp-constants.h
@@ -293,6 +293,7 @@  enum gomp_map_kind
 #define GOMP_LAUNCH_DIM		1  /* Launch dimensions, op = mask */
 #define GOMP_LAUNCH_ASYNC	2  /* Async, op = cst val if not MAX  */
 #define GOMP_LAUNCH_WAIT	3  /* Waits, op = num waits.  */
+#define GOMP_LAUNCH_ARGS_EXPLODED 4 /* Exploded args, op ignored.  */
 #define GOMP_LAUNCH_CODE_SHIFT	28
 #define GOMP_LAUNCH_DEVICE_SHIFT 16
 #define GOMP_LAUNCH_OP_SHIFT 0
diff --git a/libgomp/ChangeLog.openacc b/libgomp/ChangeLog.openacc
index e6c81d4f43f..c03f8714408 100644
--- a/libgomp/ChangeLog.openacc
+++ b/libgomp/ChangeLog.openacc
@@ -1,3 +1,12 @@ 
+2019-07-31  Julian Brown  <julian@codesourcery.com>
+
+	* libgomp.map (GOMP_2.0.GOMP_4_BRANCH): Remove GOACC_parallel_keyed_v2.
+	* libgomp_g.h (GOACC_parallel_keyed_v2): Remove prototype.
+	* oacc-parallel.c (GOACC_parallel_keyed_internal): Rename to...
+	(GOACC_parallel_keyed): ...this.  Handle GOMP_LAUNCH_ARGS_EXPLODED
+	launch tag.  Remove previous wrapper functions.
+	(GOACC_parallel_keyed_v2): Remove.
+
 2019-07-31  Julian Brown  <julian@codesourcery.com>
 	    Andrew Stubbs  <ams@codesourcery.com>
 
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index 02596b0e265..0118761d4bf 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -522,7 +522,6 @@  GOACC_2.0.1 {
 GOACC_2.0.GOMP_4_BRANCH {
   global:
 	GOMP_set_offload_targets;
-	GOACC_parallel_keyed_v2;
 } GOACC_2.0.1;
 
 
diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h
index 410c6f4e14a..4bf61d59003 100644
--- a/libgomp/libgomp_g.h
+++ b/libgomp/libgomp_g.h
@@ -362,8 +362,6 @@  extern void GOMP_teams_reg (void (*) (void *), void *, unsigned, unsigned,
 
 extern void GOACC_parallel_keyed (int, void (*) (void *), size_t,
 				  void **, size_t *, unsigned short *, ...);
-extern void GOACC_parallel_keyed_v2 (int, int, void (*) (void *), size_t,
-				  void **, size_t *, unsigned short *, ...);
 extern void GOACC_parallel (int, void (*) (void *), size_t, void **, size_t *,
 			    unsigned short *, int, int, int, int, int, ...);
 extern void GOACC_data_start (int, size_t, void **, size_t *,
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index 6b089763e9a..1bd0775f226 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -174,13 +174,14 @@  goacc_call_host_fn (void (*fn) (void *), size_t mapnum, void **hostaddrs,
    blocks to be copied to/from the device.  Varadic arguments are
    keyed optional parameters terminated with a zero.  */
 
-static void
-GOACC_parallel_keyed_internal (int flags_m, int params, void (*fn) (void *),
-			       size_t mapnum, void **hostaddrs, size_t *sizes,
-			       unsigned short *kinds, va_list *ap)
+void
+GOACC_parallel_keyed (int flags_m, void (*fn) (void *), size_t mapnum,
+		      void **hostaddrs, size_t *sizes, unsigned short *kinds,
+		      ...)
 {
   int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
 
+  va_list ap;
   struct goacc_thread *thr;
   struct gomp_device_descr *acc_dev;
   struct target_mem_desc *tgt;
@@ -192,6 +193,7 @@  GOACC_parallel_keyed_internal (int flags_m, int params, void (*fn) (void *),
   int async = GOMP_ASYNC_SYNC;
   unsigned dims[GOMP_DIM_MAX];
   unsigned tag;
+  bool args_exploded = false;
 
 #ifdef HAVE_INTTYPES_H
   gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
@@ -259,31 +261,14 @@  GOACC_parallel_keyed_internal (int flags_m, int params, void (*fn) (void *),
 
   handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
 
-  /* Host fallback if "if" clause is false or if the current device is set to
-     the host.  */
-  if (flags & GOACC_FLAG_HOST_FALLBACK)
-    {
-      prof_info.device_type = acc_device_host;
-      api_info.device_type = prof_info.device_type;
-      goacc_save_and_set_bind (acc_device_host);
-      goacc_call_host_fn (fn, mapnum, hostaddrs, params);
-      goacc_restore_bind ();
-      goto out_prof;
-    }
-  else if (acc_device_type (acc_dev->type) == acc_device_host)
-    {
-      goacc_call_host_fn (fn, mapnum, hostaddrs, params);
-      goto out_prof;
-    }
-  else if (profiling_p)
-    api_info.device_api = acc_device_api_cuda;
-
   /* Default: let the runtime choose.  */
   for (i = 0; i != GOMP_DIM_MAX; i++)
     dims[i] = 0;
 
+  va_start (ap, kinds);
+
   /* TODO: This will need amending when device_type is implemented.  */
-  while ((tag = va_arg (*ap, unsigned)) != 0)
+  while ((tag = va_arg (ap, unsigned)) != 0)
     {
       if (GOMP_LAUNCH_DEVICE (tag))
 	gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
@@ -297,7 +282,7 @@  GOACC_parallel_keyed_internal (int flags_m, int params, void (*fn) (void *),
 
 	    for (i = 0; i != GOMP_DIM_MAX; i++)
 	      if (mask & GOMP_DIM_MASK (i))
-		dims[i] = va_arg (*ap, unsigned);
+		dims[i] = va_arg (ap, unsigned);
 	  }
 	  break;
 
@@ -307,7 +292,7 @@  GOACC_parallel_keyed_internal (int flags_m, int params, void (*fn) (void *),
 	    async = GOMP_LAUNCH_OP (tag);
 
 	    if (async == GOMP_LAUNCH_OP_MAX)
-	      async = va_arg (*ap, unsigned);
+	      async = va_arg (ap, unsigned);
 
 	    if (profiling_p)
 	      {
@@ -321,16 +306,40 @@  GOACC_parallel_keyed_internal (int flags_m, int params, void (*fn) (void *),
 	case GOMP_LAUNCH_WAIT:
 	  {
 	    unsigned num_waits = GOMP_LAUNCH_OP (tag);
-	    goacc_wait (async, num_waits, ap);
+	    goacc_wait (async, num_waits, &ap);
 	    break;
 	  }
 
+	case GOMP_LAUNCH_ARGS_EXPLODED:
+	  args_exploded = true;
+	  break;
+
 	default:
 	  gomp_fatal ("unrecognized offload code '%d',"
 		      " libgomp is too old", GOMP_LAUNCH_CODE (tag));
 	}
     }
-  
+  va_end (ap);
+
+  /* Host fallback if "if" clause is false or if the current device is set to
+     the host.  */
+  if (flags & GOACC_FLAG_HOST_FALLBACK)
+    {
+      prof_info.device_type = acc_device_host;
+      api_info.device_type = prof_info.device_type;
+      goacc_save_and_set_bind (acc_device_host);
+      goacc_call_host_fn (fn, mapnum, hostaddrs, args_exploded);
+      goacc_restore_bind ();
+      goto out_prof;
+    }
+  else if (acc_device_type (acc_dev->type) == acc_device_host)
+    {
+      goacc_call_host_fn (fn, mapnum, hostaddrs, args_exploded);
+      goto out_prof;
+    }
+  else if (profiling_p)
+    api_info.device_api = acc_device_api_cuda;
+
   if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
     {
       k.host_start = (uintptr_t) fn;
@@ -392,7 +401,7 @@  GOACC_parallel_keyed_internal (int flags_m, int params, void (*fn) (void *),
 
   if (aq == NULL)
     {
-      if (params)
+      if (args_exploded)
 	acc_dev->openacc.exec_params_func (tgt_fn, mapnum, hostaddrs, devaddrs,
 					   dims, tgt);
       else
@@ -401,7 +410,7 @@  GOACC_parallel_keyed_internal (int flags_m, int params, void (*fn) (void *),
     }
   else
     {
-      if (params)
+      if (args_exploded)
 	acc_dev->openacc.async.exec_params_func (tgt_fn, mapnum, hostaddrs,
 						 devaddrs, dims, tgt, aq);
       else
@@ -452,30 +461,6 @@  GOACC_parallel_keyed_internal (int flags_m, int params, void (*fn) (void *),
     }
 }
 
-void
-GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
-		      size_t mapnum, void **hostaddrs, size_t *sizes,
-		      unsigned short *kinds, ...)
-{
-  va_list ap;
-  va_start (ap, kinds);
-  GOACC_parallel_keyed_internal (flags_m, 0, fn, mapnum, hostaddrs, sizes,
-				 kinds, &ap);
-  va_end (ap);
-}
-
-void
-GOACC_parallel_keyed_v2 (int flags_m, int args, void (*fn) (void *),
-			 size_t mapnum, void **hostaddrs, size_t *sizes,
-			 unsigned short *kinds, ...)
-{
-  va_list ap;
-  va_start (ap, kinds);
-  GOACC_parallel_keyed_internal (flags_m, args, fn, mapnum, hostaddrs, sizes,
-				 kinds, &ap);
-  va_end (ap);
-}
-
 /* Legacy entry point, only provide host execution.  */
 
 void