diff mbox

[gomp4] Redesign oacc_parallel launch API

Message ID 55B9497C.9080201@acm.org
State New
Headers show

Commit Message

Nathan Sidwell July 29, 2015, 9:45 p.m. UTC
On 07/29/15 08:24, Nathan Sidwell wrote:
> On 07/29/15 05:22, Thomas Schwinge wrote:

>> Likewise for the other torture testing flags.
>
>
> Investigating ...  (I've seen those failures be intermittent)

Interestingly the fails go away with an unoptimized libgomp.  I've observed 
something vaguely like that before.  The observed failure mode was getting stuck 
inside the driver library opening the device.  Which is very strange.


Anyway, I've committed the attached to gomp4 branch, which separates the ASYNC 
and WAIT tags, for a slightly better interface.  It doesn't fixup the failure 
thought.  Still thinking about that.

nathan

Comments

Nathan Sidwell July 30, 2015, 2:41 p.m. UTC | #1
On 07/29/15 17:45, Nathan Sidwell wrote:
> On 07/29/15 08:24, Nathan Sidwell wrote:
>> On 07/29/15 05:22, Thomas Schwinge wrote:
>
>>> Likewise for the other torture testing flags.
>>
>>
>> Investigating ...  (I've seen those failures be intermittent)
>
> Interestingly the fails go away with an unoptimized libgomp.  I've observed
> something vaguely like that before.  The observed failure mode was getting stuck
> inside the driver library opening the device.  Which is very strange.
>

I am no longer observing the failures.
Thomas Schwinge July 30, 2015, 2:44 p.m. UTC | #2
Hi Nathan!

On Wed, 29 Jul 2015 17:45:32 -0400, Nathan Sidwell <nathan@acm.org> wrote:
> On 07/29/15 08:24, Nathan Sidwell wrote:
> > On 07/29/15 05:22, Thomas Schwinge wrote:
> 
> >> Likewise for the other torture testing flags.
> >
> >
> > Investigating ...  (I've seen those failures be intermittent)

(For me, they were persistent, within the handful of testsuite runs I'd
done.)

> Interestingly the fails go away with an unoptimized libgomp.  I've observed 
> something vaguely like that before.  The observed failure mode was getting stuck 
> inside the driver library opening the device.  Which is very strange.

Uh...  :-/

> Anyway, I've committed the attached to gomp4 branch, which separates the ASYNC 
> and WAIT tags, for a slightly better interface.

Thanks!

At first, I saw a number of spurious FAILs and timeouts with different
test cases each time I ran the testsuite, which disappeared once I
manually triggered a full rebuild -- is there some build dependency
missing somewhere...

> It doesn't fixup the failure 
> thought.  Still thinking about that.

Hmm, for me the failures now seem to be done; again, within the handful
of testsuite runs I've done.


Grüße,
 Thomas
diff mbox

Patch

2015-07-29  Nathan Sidwell  <nathan@codesourcery.com>

	include/
	* gomp-constants.h (GOMP_LAUNCH_ASYNC_WAIT): Replace with ...
	(GOMP_LAUNCH_ASYNC, GOMP_LAUNCH_WAIT): ... these.
	(GOMP_LAUNCH_OP_MAX): New.

	libgomp/
	* plugin/plugin-nvptx.c (nvptx_wait): Add debug print.
	* oacc-parallel.c (GOACC_parallel_keyed): Process separate ASYNC
	and WAIT tags.
	(GOACC_parallel): Adjust forwarding.

	gcc/
	* omp-low.c (expand_omp_target): Emit separate ASYNC and WAIT tags.

Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c	(revision 226346)
+++ gcc/omp-low.c	(working copy)
@@ -9875,23 +9875,35 @@  expand_omp_target (struct omp_region *re
 				      integer_type_node,
 				      build_int_cst (integer_type_node,
 						     GOMP_ASYNC_SYNC));
-	if (t_async && !tagging)
+	if (tagging && t_async)
 	  {
-	    args.safe_push (t_async);
-	    t_async = NULL_TREE;
+	    unsigned HOST_WIDE_INT i_async;
+
+	    if (TREE_CODE (t_async) == INTEGER_CST)
+	      {
+		/* See if we can pack the async arg in to the tag's
+		   operand.  */
+		i_async = TREE_INT_CST_LOW (t_async);
+
+		if (i_async < GOMP_LAUNCH_OP_MAX)
+		  t_async = NULL_TREE;
+	      }
+	    if (t_async)
+	      i_async = GOMP_LAUNCH_OP_MAX;
+	    args.safe_push (oacc_launch_pack
+			    (GOMP_LAUNCH_ASYNC, NULL_TREE, i_async));
 	  }
+	if (t_async)
+	  args.safe_push (t_async);
 
 	/* Save the argument index, and... */
 	unsigned t_wait_idx = args.length ();
 	unsigned num_waits = 0;
 	c = find_omp_clause (clauses, OMP_CLAUSE_WAIT);
-	if (!tagging || c || t_async)
+	if (!tagging || c)
 	  /* ... push a placeholder.  */
 	  args.safe_push (integer_zero_node);
 
-	if (tagging && t_async)
-	  args.safe_push (t_async);
-	
 	for (; c; c = OMP_CLAUSE_CHAIN (c))
 	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
 	    {
@@ -9901,13 +9913,13 @@  expand_omp_target (struct omp_region *re
 	      num_waits++;
 	    }
 
-	if (!tagging || num_waits || t_async)
+	if (!tagging || num_waits)
 	  {
 	    tree len;
 
 	    /* Now that we know the number, update the placeholder.  */
 	    if (tagging)
-	      len = oacc_launch_pack (GOMP_LAUNCH_ASYNC_WAIT,
+	      len = oacc_launch_pack (GOMP_LAUNCH_WAIT,
 				      NULL_TREE, num_waits);
 	    else
 	      len = build_int_cst (integer_type_node, num_waits);
Index: libgomp/oacc-parallel.c
===================================================================
--- libgomp/oacc-parallel.c	(revision 226346)
+++ libgomp/oacc-parallel.c	(working copy)
@@ -268,11 +268,20 @@  GOACC_parallel_keyed (int device, void (
 	  }
 	  break;
 
-	case GOMP_LAUNCH_ASYNC_WAIT:
+	case GOMP_LAUNCH_ASYNC:
+	  {
+	    /* Small constant values are encoded in the operand.  */
+	    async = GOMP_LAUNCH_OP (tag);
+
+	    if (async == GOMP_LAUNCH_OP_MAX)
+	      async = va_arg (ap, unsigned);
+	    break;
+	  }
+
+	case GOMP_LAUNCH_WAIT:
 	  {
 	    unsigned num_waits = GOMP_LAUNCH_OP (tag);
 
-	    async = va_arg (ap, unsigned);
 	    if (num_waits)
 	      goacc_wait (async, num_waits, &ap);
 	    break;
@@ -357,8 +366,9 @@  GOACC_parallel (int device, void (*fn) (
 			GOMP_LAUNCH_PACK (GOMP_LAUNCH_DIM, 0,
 					  GOMP_DIM_MASK (GOMP_DIM_MAX) - 1),
 			num_gangs, num_workers, vector_length,
-			GOMP_LAUNCH_PACK (GOMP_LAUNCH_ASYNC_WAIT,
-					  0, num_waits),
+			GOMP_LAUNCH_PACK (GOMP_LAUNCH_ASYNC, 0,
+					  GOMP_LAUNCH_OP_MAX), async,
+			GOMP_LAUNCH_PACK (GOMP_LAUNCH_WAIT, 0, num_waits),
 			async, waits[0], waits[1], waits[2], waits[3],
 			waits[4], waits[5], waits[6], waits[7], waits[8]);
 }
Index: libgomp/plugin/plugin-nvptx.c
===================================================================
--- libgomp/plugin/plugin-nvptx.c	(revision 226346)
+++ libgomp/plugin/plugin-nvptx.c	(working copy)
@@ -1346,6 +1346,8 @@  nvptx_wait (int async)
   if (!s)
     GOMP_PLUGIN_fatal ("unknown async %d", async);
 
+  GOMP_PLUGIN_debug (0, "  %s: waiting on async=%d\n", __FUNCTION__, async);
+
   r = cuStreamSynchronize (s->stream);
   if (r != CUDA_SUCCESS)
     GOMP_PLUGIN_fatal ("cuStreamSynchronize error: %s", cuda_error (r));
Index: include/gomp-constants.h
===================================================================
--- include/gomp-constants.h	(revision 226346)
+++ include/gomp-constants.h	(working copy)
@@ -131,7 +131,8 @@  enum gomp_map_kind
 /* Varadic launch arguments.  */
 #define GOMP_LAUNCH_END 	0  /* End of args, no dev or op */
 #define GOMP_LAUNCH_DIM		1  /* Launch dimensions, op = mask */
-#define GOMP_LAUNCH_ASYNC_WAIT	2  /* Async & Waits, op = num waits.  */
+#define GOMP_LAUNCH_ASYNC	2  /* Async, op = cst val if not MAX  */
+#define GOMP_LAUNCH_WAIT	3  /* Waits, op = num waits.  */
 #define GOMP_LAUNCH_CODE_SHIFT	28
 #define GOMP_LAUNCH_DEVICE_SHIFT 16
 #define GOMP_LAUNCH_OP_SHIFT 0
@@ -142,6 +143,7 @@  enum gomp_map_kind
 #define GOMP_LAUNCH_CODE(X) (((X) >> GOMP_LAUNCH_CODE_SHIFT) & 0xf)
 #define GOMP_LAUNCH_DEVICE(X) (((X) >> GOMP_LAUNCH_DEVICE_SHIFT) & 0xfff)
 #define GOMP_LAUNCH_OP(X) (((X) >> GOMP_LAUNCH_OP_SHIFT) & 0xffff)
+#define GOMP_LAUNCH_OP_MAX 0xffff
 
 /* Versions of libgomp and device-specific plugins.  */
 #define GOMP_VERSION	0