diff mbox

[gomp4.1] Support #pragma omp target {enter,exit} data

Message ID 20150729190652.GA44830@msticlxl57.ims.intel.com
State New
Headers show

Commit Message

Ilya Verbin July 29, 2015, 7:06 p.m. UTC
On Mon, Jul 06, 2015 at 22:42:10 +0200, Jakub Jelinek wrote:
> As has been clarified on omp-lang, we actually shouldn't be mapping or
> unmapping the pointer and/or reference, only the array slice itself, except
> in target construct (and even for that it is changing from mapping to
> private + pointer assignment).

I've updated this patch.  make check-target-libgomp passed.


libgomp/
	* target.c (gomp_map_vars_existing): Fix target address for 'always to'
	array sections.
	(gomp_unmap_vars): Decrement k->refcount when it is 1 and
	k->async_refcount is 0.
	(gomp_offload_image_to_device): Set tgt's refcount to infinity.
	(gomp_exit_data): New static function.
	(GOMP_target_enter_exit_data): Support mapping/unmapping.
	* testsuite/libgomp.c/target-11.c: Extend for testing 'always to' array
	sections.
	* testsuite/libgomp.c/target-20.c: New test.




  -- Ilya

Comments

Jakub Jelinek July 30, 2015, 8:12 a.m. UTC | #1
On Wed, Jul 29, 2015 at 10:06:52PM +0300, Ilya Verbin wrote:
> @@ -1241,6 +1245,62 @@ GOMP_target_update (int device, const void *unused, size_t mapnum,
>    gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
>  }
>  
> +static void
> +gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
> +		void **hostaddrs, size_t *sizes, unsigned short *kinds)
> +{
> +  const int typemask = 0xff;
> +  size_t i;
> +  gomp_mutex_lock (&devicep->lock);
> +  for (i = 0; i < mapnum; i++)
> +    {
> +      struct splay_tree_key_s cur_node;
> +      unsigned char kind = kinds[i] & typemask;
> +      switch (kind)
> +	{
> +	case GOMP_MAP_FROM:
> +	case GOMP_MAP_ALWAYS_FROM:
> +	case GOMP_MAP_DELETE:
> +	case GOMP_MAP_RELEASE:

Please handle here GOMP_MAP_ZERO_LEN_ARRAY_SECTION too.
It should use gomp_map_lookup (while all others splay_tree_lookup),
otherwise it is the same as GOMP_MAP_RELEASE.

> @@ -1280,13 +1337,20 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
>      }
>  
>    if (is_enter_data)
> -    {
> -      /* TODO  */
> -    }
> +    for (i = 0; i < mapnum; i++)
> +      {
> +	struct target_mem_desc *tgt_var
> +	  = gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i],
> +			   &kinds[i], true, false);
> +	tgt_var->refcount--;
> +
> +	/* If the variable was already mapped, tgt_var is not needed.  Otherwise
> +	   tgt_var will be freed by gomp_unmap_vars or gomp_exit_data.  */
> +	if (tgt_var->refcount == 0)
> +	  free (tgt_var);

This is racy, you don't hold the device lock here anymore, so you shouldn't
decrease refcounts or test it etc.
I think better would be to change the bool is_target argument to
gomp_map_vars into an enum, and use 3 values there for now
- GOMP_VARS_MAP_TARGET, GOMP_VARS_MAP_DATA, GOMP_VARS_MAP_ENTER_DATA or so,
and for GOMP_VARS_MAP_ENTER_DATA perform the decrement of refcount and
freeing if it is zero (but then also better return NULL).

> diff --git a/libgomp/testsuite/libgomp.c/target-20.c b/libgomp/testsuite/libgomp.c/target-20.c
> new file mode 100644
> index 0000000..ec7e245
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.c/target-20.c
> @@ -0,0 +1,111 @@
> +/* { dg-require-effective-target offload_device } */

This test will fail on HSA, you don't assume just that it doesn't
fallback to host, but also non-shared address space.
I think it would be better to start with some check for non-shared address
space, like:
/* This test relies on non-shared address space.  Punt otherwise.  */
void ensure_nonshared_as (void)
{
  int a = 8;
  #pragma omp target map(to:a)
  {
    a++;
  }
  if (a == 8)
    exit (0);
}

And generally, it is better to have most of the tests not relying on
offloading only or even non-shared address space, so that we also test
shared address space and host fallback.  But a few tests won't hurt...

	Jakub
diff mbox

Patch

diff --git a/libgomp/target.c b/libgomp/target.c
index ef74d43..ad375c9 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -191,7 +191,8 @@  gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
 
   if (GOMP_MAP_ALWAYS_TO_P (kind))
     devicep->host2dev_func (devicep->target_id,
-			    (void *) (oldn->tgt->tgt_start + oldn->tgt_offset),
+			    (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
+				      + newn->host_start - oldn->host_start),
 			    (void *) newn->host_start,
 			    newn->host_end - newn->host_start);
   if (oldn->refcount != REFCOUNT_INFINITY)
@@ -664,15 +665,18 @@  gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
 	continue;
 
       bool do_unmap = false;
-      if (k->refcount > 1)
+      if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
+	k->refcount--;
+      else if (k->refcount == 1)
 	{
-	  if (k->refcount != REFCOUNT_INFINITY)
-	    k->refcount--;
+	  if (k->async_refcount > 0)
+	    k->async_refcount--;
+	  else
+	    {
+	      k->refcount--;
+	      do_unmap = true;
+	    }
 	}
-      else if (k->async_refcount > 0)
-	k->async_refcount--;
-      else
-	do_unmap = true;
 
       if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
 	  || tgt->list[i].always_copy_from)
@@ -798,7 +802,7 @@  gomp_offload_image_to_device (struct gomp_device_descr *devicep,
   /* Insert host-target address mapping into splay tree.  */
   struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
   tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
-  tgt->refcount = 1;
+  tgt->refcount = REFCOUNT_INFINITY;
   tgt->tgt_start = 0;
   tgt->tgt_end = 0;
   tgt->to_free = NULL;
@@ -1241,6 +1245,62 @@  GOMP_target_update (int device, const void *unused, size_t mapnum,
   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
 }
 
+static void
+gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
+		void **hostaddrs, size_t *sizes, unsigned short *kinds)
+{
+  const int typemask = 0xff;
+  size_t i;
+  gomp_mutex_lock (&devicep->lock);
+  for (i = 0; i < mapnum; i++)
+    {
+      struct splay_tree_key_s cur_node;
+      unsigned char kind = kinds[i] & typemask;
+      switch (kind)
+	{
+	case GOMP_MAP_FROM:
+	case GOMP_MAP_ALWAYS_FROM:
+	case GOMP_MAP_DELETE:
+	case GOMP_MAP_RELEASE:
+	  cur_node.host_start = (uintptr_t) hostaddrs[i];
+	  cur_node.host_end = cur_node.host_start + sizes[i];
+	  splay_tree_key k = splay_tree_lookup (&devicep->mem_map, &cur_node);
+	  if (!k)
+	    continue;
+
+	  if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
+	    k->refcount--;
+	  if (kind == GOMP_MAP_DELETE && k->refcount != REFCOUNT_INFINITY)
+	    k->refcount = 0;
+
+	  if ((kind == GOMP_MAP_FROM && k->refcount == 0)
+	      || kind == GOMP_MAP_ALWAYS_FROM)
+	    devicep->dev2host_func (devicep->target_id,
+				    (void *) cur_node.host_start,
+				    (void *) (k->tgt->tgt_start + k->tgt_offset
+					      + cur_node.host_start
+					      - k->host_start),
+				    cur_node.host_end - cur_node.host_start);
+	  if (k->refcount == 0)
+	    {
+	      splay_tree_remove (&devicep->mem_map, k);
+	      if (k->tgt->refcount > 1)
+		k->tgt->refcount--;
+	      else
+		gomp_unmap_tgt (k->tgt);
+	    }
+
+	  break;
+	default:
+	  gomp_mutex_unlock (&devicep->lock);
+	  gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
+		      kind);
+	}
+    }
+
+  gomp_mutex_unlock (&devicep->lock);
+}
+
 void
 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
 			     size_t *sizes, unsigned short *kinds)
@@ -1259,9 +1319,6 @@  GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
     {
       unsigned char kind = kinds[i] & typemask;
 
-      if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
-	continue;
-
       if (kind == GOMP_MAP_ALLOC
 	  || kind == GOMP_MAP_TO
 	  || kind == GOMP_MAP_ALWAYS_TO)
@@ -1280,13 +1337,20 @@  GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
     }
 
   if (is_enter_data)
-    {
-      /* TODO  */
-    }
+    for (i = 0; i < mapnum; i++)
+      {
+	struct target_mem_desc *tgt_var
+	  = gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i],
+			   &kinds[i], true, false);
+	tgt_var->refcount--;
+
+	/* If the variable was already mapped, tgt_var is not needed.  Otherwise
+	   tgt_var will be freed by gomp_unmap_vars or gomp_exit_data.  */
+	if (tgt_var->refcount == 0)
+	  free (tgt_var);
+      }
   else
-    {
-      /* TODO  */
-    }
+    gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
 }
 
 void
diff --git a/libgomp/testsuite/libgomp.c/target-11.c b/libgomp/testsuite/libgomp.c/target-11.c
index b86097a..98882f0 100644
--- a/libgomp/testsuite/libgomp.c/target-11.c
+++ b/libgomp/testsuite/libgomp.c/target-11.c
@@ -9,6 +9,17 @@  void test_array_section (int *p)
 {
   #pragma omp target data map(alloc: p[0:N])
     {
+      int ok = 1;
+      for (int i = 10; i < 10 + 4; i++)
+	p[i] = 997 * i;
+
+      #pragma omp target map(always to:p[10:4]) map(tofrom: ok)
+	for (int i = 10; i < 10 + 4; i++)
+	  if (p[i] != 997 * i)
+	    ok = 0;
+
+      assert (ok);
+
       #pragma omp target map(always from:p[7:9])
 	for (int i = 0; i < N; i++)
 	  p[i] = i;
diff --git a/libgomp/testsuite/libgomp.c/target-20.c b/libgomp/testsuite/libgomp.c/target-20.c
new file mode 100644
index 0000000..ec7e245
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-20.c
@@ -0,0 +1,111 @@ 
+/* { dg-require-effective-target offload_device } */
+
+#include <stdlib.h>
+#include <assert.h>
+
+#define N 40
+
+int sum;
+int var1 = 1;
+int var2 = 2;
+
+#pragma omp declare target
+int D[N];
+#pragma omp end declare target
+
+void enter_data (int *X)
+{
+  #pragma omp target enter data map(to: var1, var2, X[:N]) map(alloc: sum)
+}
+
+void exit_data_0 (int *D)
+{
+  #pragma omp target exit data map(delete: D[:N])
+}
+
+void exit_data_1 ()
+{
+  #pragma omp target exit data map(from: var1)
+}
+
+void exit_data_2 (int *X)
+{
+  #pragma omp target exit data map(from: var2) map(release: X[:N], sum)
+}
+
+void test_nested ()
+{
+  int X = 0, Y = 0, Z = 0;
+
+  #pragma omp target data map(from: X, Y, Z)
+    {
+      #pragma omp target data map(from: X, Y, Z)
+	{
+	  #pragma omp target map(from: X, Y, Z)
+	    X = Y = Z = 1337;
+	  assert (X == 0);
+	  assert (Y == 0);
+	  assert (Z == 0);
+
+	  #pragma omp target exit data map(from: X) map(release: Y)
+	  assert (X == 0);
+	  assert (Y == 0);
+
+	  #pragma omp target exit data map(release: Y) map(delete: Z)
+	  assert (Y == 0);
+	  assert (Z == 0);
+	}
+      assert (X == 1337);
+      assert (Y == 0);
+      assert (Z == 0);
+
+      #pragma omp target map(from: X)
+	X = 2448;
+      assert (X == 2448);
+      assert (Y == 0);
+      assert (Z == 0);
+
+      X = 4896;
+    }
+  assert (X == 4896);
+  assert (Y == 0);
+  assert (Z == 0);
+}
+
+int main ()
+{
+  int *X = malloc (N * sizeof (int));
+  int *Y = malloc (N * sizeof (int));
+  X[10] = 10;
+  Y[20] = 20;
+  enter_data (X);
+
+  exit_data_0 (D); /* This should have no effect on D.  */
+
+  #pragma omp target map(alloc: var1, var2, X[:N]) map(to: Y[:N]) \
+    map(always from: sum)
+    {
+      var1 += X[10];
+      var2 += Y[20];
+      sum = var1 + var2;
+      D[sum]++;
+    }
+
+  assert (var1 == 1);
+  assert (var2 == 2);
+  assert (sum == 33);
+
+  exit_data_1 ();
+  assert (var1 == 11);
+  assert (var2 == 2);
+
+  exit_data_2 (X);
+  assert (var2 == 22);
+
+  free (X);
+  free (Y);
+
+  test_nested ();
+
+  return 0;
+}