diff mbox series

[PR92116,PR92877,OpenACC] Replace 'openacc.data_environ' by standard libgomp mechanics (was: [PATCH] OpenACC reference count overhaul)

Message ID 87y2vl141c.fsf@euler.schwinge.homeip.net
State New
Headers show
Series [PR92116,PR92877,OpenACC] Replace 'openacc.data_environ' by standard libgomp mechanics (was: [PATCH] OpenACC reference count overhaul) | expand

Commit Message

Thomas Schwinge Dec. 9, 2019, 11:18 p.m. UTC
Hi!

\o/ Yay for the first split-out piece of the big "OpenACC reference count
overhaul" going in:

On 2019-10-29T12:15:01+0000, Julian Brown <julian@codesourcery.com> wrote:
> On Mon, 21 Oct 2019 16:14:11 +0200
> Thomas Schwinge <thomas@codesourcery.com> wrote:
>> Remeber to look into <https://gcc.gnu.org/PR92116> "Potential null
>> pointer dereference in 'gomp_acc_remove_pointer'", which may be
>> relevant here.

I investigated and answered that one, and "we shall be removing this code
from 'gomp_acc_remove_pointer' any moment now" -- now done by means of:

>  - the "data_environ" field in the device descriptor -- a linear linked
>    list containing a target memory descriptor for each "acc enter data"
>    mapping -- has been removed.  This brings OpenACC closer to the
>    OpenMP implementation for non-lexically-scoped data mapping
>    (GOMP_target_enter_exit_data), and is potentially a performance win
>    if lots of data is mapped in this way.

And, the 'data_environ' on-the-side data structure caused actual bugs:
structured mappings (via 'gomp_map_vars') didn't maintain 'data_environ',
so 'lookup_dev' didn't work for these, which caused some diagnostic
confusion as well as 'acc_hostptr' always returning NULL for these, huh!

See attached "[PR92116, PR92877] [OpenACC] Replace 'openacc.data_environ'
by standard libgomp mechanics", committed to trunk in r279147.


Grüße
 Thomas
diff mbox series

Patch

From a74d1c85921f0828075a6bf35e94df411d110673 Mon Sep 17 00:00:00 2001
From: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Mon, 9 Dec 2019 22:52:56 +0000
Subject: [PATCH] [PR92116, PR92877] [OpenACC] Replace 'openacc.data_environ'
 by standard libgomp mechanics

	libgomp/
	PR libgomp/92116
	PR libgomp/92877
	* oacc-mem.c (lookup_dev): Reimplement.  Adjust all users.
	* libgomp.h (struct acc_dispatch_t): Remove 'data_environ' member.
	Adjust all users.
	* testsuite/libgomp.oacc-c-c++-common/acc_free-pr92503-4-2.c:
	Remove XFAIL.
	* testsuite/libgomp.oacc-c-c++-common/acc_free-pr92503-4.c:
	Likewise.
	* testsuite/libgomp.oacc-c-c++-common/pr92877-1.c: New file.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@279147 138bc75d-0d04-0410-961f-82ee72b054a4
---
 libgomp/ChangeLog                             |  15 +++
 libgomp/libgomp.h                             |  10 +-
 libgomp/oacc-host.c                           |   2 -
 libgomp/oacc-mem.c                            | 121 ++++--------------
 libgomp/target.c                              |   1 -
 .../acc_free-pr92503-4-2.c                    |   4 +-
 .../acc_free-pr92503-4.c                      |   4 +-
 .../libgomp.oacc-c-c++-common/pr92877-1.c     |  19 +++
 8 files changed, 64 insertions(+), 112 deletions(-)
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr92877-1.c

diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index 62092a2d765..83227032f88 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,18 @@ 
+2019-12-09  Thomas Schwinge  <thomas@codesourcery.com>
+	    Julian Brown  <julian@codesourcery.com>
+
+	PR libgomp/92116
+	PR libgomp/92877
+
+	* oacc-mem.c (lookup_dev): Reimplement.  Adjust all users.
+	* libgomp.h (struct acc_dispatch_t): Remove 'data_environ' member.
+	Adjust all users.
+	* testsuite/libgomp.oacc-c-c++-common/acc_free-pr92503-4-2.c:
+	Remove XFAIL.
+	* testsuite/libgomp.oacc-c-c++-common/acc_free-pr92503-4.c:
+	Likewise.
+	* testsuite/libgomp.oacc-c-c++-common/pr92877-1.c: New file.
+
 2019-12-09  Thomas Schwinge  <thomas@codesourcery.com>
 
 	PR libgomp/92503
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index bab733d2b2d..a35aa07c80b 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -1025,13 +1025,6 @@  splay_compare (splay_tree_key x, splay_tree_key y)
 
 typedef struct acc_dispatch_t
 {
-  /* This is a linked list of data mapped using the
-     acc_map_data/acc_unmap_data or "acc enter data"/"acc exit data" pragmas.
-     Unlike mapped_data in the goacc_thread struct, unmapping can
-     happen out-of-order with respect to mapping.  */
-  /* This is guarded by the lock in the "outer" struct gomp_device_descr.  */
-  struct target_mem_desc *data_environ;
-
   /* Execute.  */
   __typeof (GOMP_OFFLOAD_openacc_exec) *exec_func;
 
@@ -1132,8 +1125,7 @@  struct gomp_device_descr
   enum gomp_device_state state;
 
   /* OpenACC-specific data and functions.  */
-  /* This is mutable because of its mutable data_environ and target_data
-     members.  */
+  /* This is mutable because of its mutable target_data member.  */
   acc_dispatch_t openacc;
 };
 
diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c
index cbcac9bf7b3..e9cd4bfcd4a 100644
--- a/libgomp/oacc-host.c
+++ b/libgomp/oacc-host.c
@@ -264,8 +264,6 @@  static struct gomp_device_descr host_dispatch =
     .state = GOMP_DEVICE_UNINITIALIZED,
 
     .openacc = {
-      .data_environ = NULL,
-
       .exec_func = host_openacc_exec,
 
       .create_thread_data_func = host_openacc_create_thread_data,
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 81ebddf7580..369a11696da 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -50,44 +50,42 @@  lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
   return key;
 }
 
-/* Return block containing [D->S), or NULL if not contained.
-   The list isn't ordered by device address, so we have to iterate
-   over the whole array.  This is not expected to be a common
-   operation.  The device lock associated with TGT must be locked on entry, and
-   remains locked on exit.  */
+/* Helper for lookup_dev.  Iterate over splay tree.  */
 
 static splay_tree_key
-lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
+lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s)
 {
-  int i;
-  struct target_mem_desc *t;
+  splay_tree_key key = &node->key;
+  if (d >= key->tgt->tgt_start && d + s <= key->tgt->tgt_end)
+    return key;
 
-  if (!tgt)
-    return NULL;
+  key = NULL;
+  if (node->left)
+    key = lookup_dev_1 (node->left, d, s);
+  if (!key && node->right)
+    key = lookup_dev_1 (node->right, d, s);
 
-  for (t = tgt; t != NULL; t = t->prev)
-    {
-      if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
-        break;
-    }
+  return key;
+}
 
-  if (!t)
-    return NULL;
+/* Return block containing [D->S), or NULL if not contained.
 
-  for (i = 0; i < t->list_count; i++)
-    {
-      void * offset;
+   This iterates over the splay tree.  This is not expected to be a common
+   operation.
 
-      splay_tree_key k = &t->array[i].key;
-      offset = d - t->tgt_start + k->tgt_offset;
+   The device lock associated with MEM_MAP must be locked on entry, and remains
+   locked on exit.  */
 
-      if (k->host_start + offset <= (void *) k->host_end)
-        return k;
-    }
+static splay_tree_key
+lookup_dev (splay_tree mem_map, void *d, size_t s)
+{
+  if (!mem_map || !mem_map->root)
+    return NULL;
 
-  return NULL;
+  return lookup_dev_1 (mem_map->root, (uintptr_t) d, s);
 }
 
+
 /* OpenACC is silent on how memory exhaustion is indicated.  We return
    NULL.  */
 
@@ -147,7 +145,7 @@  acc_free (void *d)
   /* We don't have to call lazy open here, as the ptr value must have
      been returned by acc_malloc.  It's not permitted to pass NULL in
      (unless you got that null from acc_malloc).  */
-  if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
+  if ((k = lookup_dev (&acc_dev->mem_map, d, 1)))
     {
       void *offset = d - k->tgt->tgt_start + k->tgt_offset;
       void *h = k->host_start + offset;
@@ -300,7 +298,7 @@  acc_hostptr (void *d)
 
   gomp_mutex_lock (&acc_dev->lock);
 
-  n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
+  n = lookup_dev (&acc_dev->mem_map, d, 1);
 
   if (!n)
     {
@@ -395,7 +393,7 @@  acc_map_data (void *h, void *d, size_t s)
 		      (int)s);
 	}
 
-      if (lookup_dev (thr->dev->openacc.data_environ, d, s))
+      if (lookup_dev (&thr->dev->mem_map, d, s))
         {
 	  gomp_mutex_unlock (&acc_dev->lock);
 	  gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
@@ -418,11 +416,6 @@  acc_map_data (void *h, void *d, size_t s)
 	  thr->api_info = NULL;
 	}
     }
-
-  gomp_mutex_lock (&acc_dev->lock);
-  tgt->prev = acc_dev->openacc.data_environ;
-  acc_dev->openacc.data_environ = tgt;
-  gomp_mutex_unlock (&acc_dev->lock);
 }
 
 void
@@ -482,25 +475,11 @@  acc_unmap_data (void *h)
 
   if (t->refcount == 2)
     {
-      struct target_mem_desc *tp;
-
       /* This is the last reference, so pull the descriptor off the
          chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
          freeing the device memory. */
       t->tgt_end = 0;
       t->to_free = 0;
-
-      for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
-	   tp = t, t = t->prev)
-	if (n->tgt == t)
-	  {
-	    if (tp)
-	      tp->prev = t->prev;
-	    else
-	      acc_dev->openacc.data_environ = t->prev;
-
-	    break;
-	  }
     }
 
   gomp_mutex_unlock (&acc_dev->lock);
@@ -597,13 +576,7 @@  present_create_copy (unsigned f, void *h, size_t s, int async)
       /* Initialize dynamic refcount.  */
       tgt->list[0].key->dynamic_refcount = 1;
 
-      gomp_mutex_lock (&acc_dev->lock);
-
       d = tgt->to_free;
-      tgt->prev = acc_dev->openacc.data_environ;
-      acc_dev->openacc.data_environ = tgt;
-
-      gomp_mutex_unlock (&acc_dev->lock);
     }
 
   if (profiling_p)
@@ -749,21 +722,6 @@  delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
 
   if (n->refcount == 0)
     {
-      if (n->tgt->refcount == 2)
-	{
-	  struct target_mem_desc *tp, *t;
-	  for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
-	       tp = t, t = t->prev)
-	    if (n->tgt == t)
-	      {
-		if (tp)
-		  tp->prev = t->prev;
-		else
-		  acc_dev->openacc.data_environ = t->prev;
-		break;
-	      }
-	}
-
       if (f & FLAG_COPYOUT)
 	{
 	  goacc_aq aq = get_goacc_asyncqueue (async);
@@ -954,11 +912,6 @@  gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
 
   /* Initialize dynamic refcount.  */
   tgt->list[0].key->dynamic_refcount = 1;
-
-  gomp_mutex_lock (&acc_dev->lock);
-  tgt->prev = acc_dev->openacc.data_environ;
-  acc_dev->openacc.data_environ = tgt;
-  gomp_mutex_unlock (&acc_dev->lock);
 }
 
 void
@@ -1009,26 +962,6 @@  gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
 
   if (n->refcount == 0)
     {
-      if (t->refcount == minrefs)
-	{
-	  /* This is the last reference, so pull the descriptor off the
-	     chain. This prevents gomp_unmap_vars via gomp_unmap_tgt from
-	     freeing the device memory. */
-	  struct target_mem_desc *tp;
-	  for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
-	       tp = t, t = t->prev)
-	    {
-	      if (n->tgt == t)
-		{
-		  if (tp)
-		    tp->prev = t->prev;
-		  else
-		    acc_dev->openacc.data_environ = t->prev;
-		  break;
-		}
-	    }
-	}
-
       /* Set refcount to 1 to allow gomp_unmap_vars to unmap it.  */
       n->refcount = 1;
       t->refcount = minrefs;
diff --git a/libgomp/target.c b/libgomp/target.c
index 13f7921651f..39a24f56395 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -2897,7 +2897,6 @@  gomp_target_init (void)
 		current_device.type = current_device.get_type_func ();
 		current_device.mem_map.root = NULL;
 		current_device.state = GOMP_DEVICE_UNINITIALIZED;
-		current_device.openacc.data_environ = NULL;
 		for (i = 0; i < new_num_devices; i++)
 		  {
 		    current_device.target_id = i;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_free-pr92503-4-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_free-pr92503-4-2.c
index bbf44319687..48226cf64c7 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_free-pr92503-4-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_free-pr92503-4-2.c
@@ -25,7 +25,5 @@  main ()
 }
 
 /* { dg-output "CheCKpOInT(\n|\r\n|\r).*" }
-   TODO PR92877
-   { dg-output "libgomp: cuMemGetAddressRange_v2 error: named symbol not found" { target openacc_nvidia_accel_selected } }
-   { dg-output "refusing to free device memory space at \[0-9a-fA-FxX\]+ that is still mapped at \\\[\[0-9a-fA-FxX\]+,\\\+64\\\]" { xfail *-*-* } }
+   { dg-output "refusing to free device memory space at \[0-9a-fA-FxX\]+ that is still mapped at \\\[\[0-9a-fA-FxX\]+,\\\+64\\\]" }
    { dg-shouldfail "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_free-pr92503-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_free-pr92503-4.c
index 6212f9eae47..7638d528575 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_free-pr92503-4.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_free-pr92503-4.c
@@ -26,7 +26,5 @@  main ()
 }
 
 /* { dg-output "CheCKpOInT(\n|\r\n|\r).*" }
-   TODO PR92877
-   { dg-output "libgomp: cuMemGetAddressRange_v2 error: named symbol not found" { target openacc_nvidia_accel_selected } }
-   { dg-output "refusing to free device memory space at \[0-9a-fA-FxX\]+ that is still mapped at \\\[\[0-9a-fA-FxX\]+,\\\+87\\\]" { xfail *-*-* } }
+   { dg-output "refusing to free device memory space at \[0-9a-fA-FxX\]+ that is still mapped at \\\[\[0-9a-fA-FxX\]+,\\\+87\\\]" }
    { dg-shouldfail "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92877-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92877-1.c
new file mode 100644
index 00000000000..02595a9c0e5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92877-1.c
@@ -0,0 +1,19 @@ 
+/* Make sure that we can resolve back via 'acc_hostptr' an 'acc_deviceptr'
+   retrieved for a structured mapping.  */
+
+#include <assert.h>
+#include <openacc.h>
+
+int
+main ()
+{
+  int var;
+
+#pragma acc data create (var)
+  {
+    void *var_p_d = acc_deviceptr (&var);
+    assert (acc_hostptr (var_p_d) == &var);
+  }
+
+  return 0;
+}
-- 
2.17.1