OpenACC reference count consistency checking
diff mbox series

Message ID 1543578624-1511-1-git-send-email-julian@codesourcery.com
State New
Headers show
Series
  • OpenACC reference count consistency checking
Related show

Commit Message

Julian Brown Nov. 30, 2018, 11:50 a.m. UTC
This is a trunk-compatible version of the patch posted here:

  https://gcc.gnu.org/ml/gcc-patches/2018-11/msg02365.html

I understand it may not be suitable for committing (especially not
outside stage 1 -- though it's "obviously harmless" in its dormant state),
but it might be helpful for review purposes for the main attach/detach
patch, i.e.:

  https://gcc.gnu.org/ml/gcc-patches/2018-11/msg02556.html

For convenience, I will copy the blurb from the og8 submission of the
patch here.

[...] The model used for checking is as follows.

 1. Each splay tree key that references a target memory descriptor
    increases that descriptor's refcount by 1.

 2. Each variable listed in a target memory descriptor that links back to a
    splay tree key increases that key's refcount by 1. Each target memory
    descriptor's variable list is counted only once, even if multiple
    splay tree keys point to it (via their "tgt" field).

 3. Additional ("real") target memory descriptors may be present
    representing data mapped through "acc data" or "acc parallel/kernels"
    blocks.  These descriptors have their refcount bumped, and the
    variables linked through such blocks have their refcounts bumped also
    (again, with "once only" semantics).

 4. Asynchronous operations "artificially" bump the reference counts for
    referenced target memory descriptors (but *not* for linked
    variables/splay tree keys), in order to delay freeing mapped device
    memory until the asynchronous operation has completed.  We model this,
    for checking purposes only, using an off-side linked list.

 5. "Virtual" reference counts ("virtual_refcount") cannot be checked
    purely statically, so we add the incoming value to each key's
    statically-determined reference count ("refcount_chk"), and make
    sure that the total matches the incoming reference count ("refcount").

Thanks,

Julian

ChangeLog

	libgomp/
	* libgomp.h (RC_CHECKING): New macro, disabled by default, guarding all
	hunks in this patch.
	(target_mem_desc): Add forward declaration.
	(async_tgt_use): New struct.
	(target_mem_desc): Add refcount_chk, mark fields.
	(acc_dispatch_t): Add tgt_uses, au_lock fields.
	(dump_tgt, gomp_rc_check): Add prototypes.
	* oacc-async (goacc_async_unmap_tgt): Add refcount self-check code.
	(goacc_async_copyout_unmap_vars): Likewise.
	(goacc_remove_var_async): Likewise.
	* oacc-parallel.c (GOACC_parallel_keyed_internal): Add refcount
	self-check code.
	(GOACC_data_start, GOACC_data_end, GOACC_enter_exit_data): Likewise.
	* target.c (stdio.h): Include.
	(dump_tgt, rc_check_clear, rc_check_count, rc_check_verify)
	(gomp_rc_check): New functions to consistency-check reference counts.
	(gomp_target_init): Initialise self-check-related device fields.
---
 libgomp/libgomp.h       |   31 +++++++
 libgomp/oacc-async.c    |   46 +++++++++++
 libgomp/oacc-parallel.c |   33 ++++++++
 libgomp/target.c        |  199 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 309 insertions(+), 0 deletions(-)

Patch
diff mbox series

diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index df49c1b..24cbddd 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -874,9 +874,26 @@  struct target_var_desc {
   uintptr_t length;
 };
 
+/* Uncomment to enable reference-count consistency checking (for development
+   use only).  */
+/*#define RC_CHECKING 1*/
+
+#ifdef RC_CHECKING
+struct target_mem_desc;
+
+struct async_tgt_use {
+  struct target_mem_desc *tgt;
+  struct async_tgt_use *next;
+};
+#endif
+
 struct target_mem_desc {
   /* Reference count.  */
   uintptr_t refcount;
+#ifdef RC_CHECKING
+  uintptr_t refcount_chk;
+  bool mark;
+#endif
   /* All the splay nodes allocated together.  */
   splay_tree_node array;
   /* Start of the target region.  */
@@ -925,6 +942,10 @@  struct splay_tree_key_s {
      "present increment" operations (via "acc enter data") refering to the same
      host-memory block.  */
   uintptr_t virtual_refcount;
+#ifdef RC_CHECKING
+  /* The recalculated reference count, for verification.  */
+  uintptr_t refcount_chk;
+#endif
   /* For a block with attached pointers, the attachment counters for each.  */
   unsigned short *attach_count;
   /* Pointer to the original mapping of "omp declare target link" object.  */
@@ -958,6 +979,10 @@  typedef struct acc_dispatch_t
     int nasyncqueue;
     struct goacc_asyncqueue **asyncqueue;
     struct goacc_asyncqueue_list *active;
+#ifdef RC_CHECKING
+    struct async_tgt_use *tgt_uses;
+    gomp_mutex_t au_lock;
+#endif
 
     __typeof (GOMP_OFFLOAD_openacc_async_construct) *construct_func;
     __typeof (GOMP_OFFLOAD_openacc_async_destruct) *destruct_func;
@@ -1085,6 +1110,12 @@  extern void gomp_detach_pointer (struct gomp_device_descr *,
 				 struct goacc_asyncqueue *, splay_tree_key,
 				 uintptr_t, bool, struct gomp_coalesce_buf *);
 
+#ifdef RC_CHECKING
+extern void dump_tgt (const char *, struct target_mem_desc *);
+extern void gomp_rc_check (struct gomp_device_descr *,
+			   struct target_mem_desc *);
+#endif
+
 extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
 					      size_t, void **, void **,
 					      size_t *, void *, bool,
diff --git a/libgomp/oacc-async.c b/libgomp/oacc-async.c
index 077e28f..8b0f228 100644
--- a/libgomp/oacc-async.c
+++ b/libgomp/oacc-async.c
@@ -243,6 +243,29 @@  goacc_async_unmap_tgt (void *ptr)
 {
   struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
 
+#ifdef RC_CHECKING
+  {
+    struct gomp_device_descr *devicep = tgt->device_descr;
+    struct async_tgt_use *aup, *au;
+    gomp_mutex_lock (&devicep->openacc.async.au_lock);
+    /* Remove tgt from asynchronous-use list.  */
+    for (aup = NULL, au = devicep->openacc.async.tgt_uses; au;
+	 aup = au, au = au->next)
+      if (au->tgt == tgt)
+	{
+	  if (aup)
+	    aup->next = au->next;
+	  else
+	    devicep->openacc.async.tgt_uses = au->next;
+	  free (au);
+	  break;
+	}
+    if (!au)
+      gomp_fatal ("can't find tgt %p to remove in async list", tgt);
+    gomp_mutex_unlock (&devicep->openacc.async.au_lock);
+  }
+#endif
+
   if (tgt->refcount > 1)
     tgt->refcount--;
   else
@@ -258,6 +281,18 @@  goacc_async_copyout_unmap_vars (struct target_mem_desc *tgt,
   /* Increment reference to delay freeing of device memory until callback
      has triggered.  */
   tgt->refcount++;
+
+#ifdef RC_CHECKING
+  {
+    struct async_tgt_use *au = malloc (sizeof (struct async_tgt_use));
+    gomp_mutex_lock (&devicep->openacc.async.au_lock);
+    /* Record the asynchronous use of this target_mem_desc.  */
+    au->next = devicep->openacc.async.tgt_uses;
+    au->tgt = tgt;
+    devicep->openacc.async.tgt_uses = au;
+    gomp_mutex_unlock (&devicep->openacc.async.au_lock);
+  }
+#endif
   gomp_unmap_vars_async (tgt, true, aq);
   devicep->openacc.async.queue_callback_func (aq, goacc_async_unmap_tgt,
 					      (void *) tgt);
@@ -276,6 +311,17 @@  goacc_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key n,
   struct target_mem_desc *tgt = n->tgt;
   assert (tgt);
   tgt->refcount++;
+#ifdef RC_CHECKING
+  {
+    gomp_mutex_lock (&devicep->openacc.async.au_lock);
+    struct async_tgt_use *au = malloc (sizeof (struct async_tgt_use));
+    /* Record the asynchronous use of this target_mem_desc.  */
+    au->next = devicep->openacc.async.tgt_uses;
+    au->tgt = tgt;
+    devicep->openacc.async.tgt_uses = au;
+    gomp_mutex_unlock (&devicep->openacc.async.au_lock);
+  }
+#endif
   gomp_remove_var (devicep, n);
   devicep->openacc.async.queue_callback_func (aq, goacc_async_unmap_tgt,
                                              (void *) tgt);
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index 0e9a3e8..22aafb2 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -253,6 +253,15 @@  GOACC_parallel_keyed (int device, void (*fn) (void *),
   tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
 			     true, GOMP_MAP_VARS_OPENACC);
 
+#ifdef RC_CHECKING
+  gomp_mutex_lock (&acc_dev->lock);
+  assert (tgt);
+  dump_tgt (__FUNCTION__, tgt);
+  tgt->prev = thr->mapped_data;
+  gomp_rc_check (acc_dev, tgt);
+  gomp_mutex_unlock (&acc_dev->lock);
+#endif
+
   devaddrs = gomp_alloca (sizeof (void *) * mapnum);
   for (i = 0; i < mapnum; i++)
     devaddrs[i] = (void *) gomp_map_val (tgt, hostaddrs, i);
@@ -270,6 +279,12 @@  GOACC_parallel_keyed (int device, void (*fn) (void *),
 					dims, tgt, aq);
       goacc_async_copyout_unmap_vars (tgt, aq);
     }
+
+#ifdef RC_CHECKING
+  gomp_mutex_lock (&acc_dev->lock);
+  gomp_rc_check (acc_dev, thr->mapped_data);
+  gomp_mutex_unlock (&acc_dev->lock);
+#endif
 }
 
 /* Legacy entry point, only provide host execution.  */
@@ -324,6 +339,12 @@  GOACC_data_start (int device, size_t mapnum,
   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
   tgt->prev = thr->mapped_data;
   thr->mapped_data = tgt;
+
+#ifdef RC_CHECKING
+  gomp_mutex_lock (&acc_dev->lock);
+  gomp_rc_check (acc_dev, thr->mapped_data);
+  gomp_mutex_unlock (&acc_dev->lock);
+#endif
 }
 
 void
@@ -336,6 +357,12 @@  GOACC_data_end (void)
   thr->mapped_data = tgt->prev;
   gomp_unmap_vars (tgt, true);
   gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
+
+#ifdef RC_CHECKING
+  gomp_mutex_lock (&thr->dev->lock);
+  gomp_rc_check (thr->dev, thr->mapped_data);
+  gomp_mutex_unlock (&thr->dev->lock);
+#endif
 }
 
 void
@@ -624,6 +651,12 @@  GOACC_enter_exit_data (int device, size_t mapnum,
 	    }
 	}
     }
+
+#ifdef RC_CHECKING
+  gomp_mutex_lock (&acc_dev->lock);
+  gomp_rc_check (acc_dev, thr->mapped_data);
+  gomp_mutex_unlock (&acc_dev->lock);
+#endif
 }
 
 static void
diff --git a/libgomp/target.c b/libgomp/target.c
index 6e115d1..b9f8ce8 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -40,6 +40,9 @@ 
 #include <assert.h>
 #include <errno.h>
 #include <limits.h>
+#ifdef RC_CHECKING
+#include <stdio.h>
+#endif
 
 #ifdef PLUGIN_SUPPORT
 #include <dlfcn.h>
@@ -360,6 +363,198 @@  gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
     }
 }
 
+#ifdef RC_CHECKING
+void
+dump_tgt (const char *where, struct target_mem_desc *tgt)
+{
+  if (!getenv ("GOMP_DEBUG_TGT"))
+    return;
+
+  fprintf (stderr, "%s: %s: tgt=%p\n", __FUNCTION__, where, (void*) tgt);
+  fprintf (stderr, "refcount=%d\n", (int) tgt->refcount);
+  fprintf (stderr, "tgt_start=%p\n", (void*) tgt->tgt_start);
+  fprintf (stderr, "tgt_end=%p\n", (void*) tgt->tgt_end);
+  fprintf (stderr, "to_free=%p\n", tgt->to_free);
+  fprintf (stderr, "list_count=%d\n", (int) tgt->list_count);
+  for (int i = 0; i < tgt->list_count; i++)
+    {
+      fprintf (stderr, "list item %d:\n", i);
+      fprintf (stderr, "  key: %p\n", (void*) tgt->list[i].key);
+      if (tgt->list[i].key)
+	{
+	  fprintf (stderr, "  key.host_start=%p\n",
+		   (void*) tgt->list[i].key->host_start);
+	  fprintf (stderr, "  key.host_end=%p\n",
+		   (void*) tgt->list[i].key->host_end);
+	  fprintf (stderr, "  key.tgt=%p\n", (void*) tgt->list[i].key->tgt);
+	  fprintf (stderr, "  key.offset=%d\n",
+		   (int) tgt->list[i].key->tgt_offset);
+	  fprintf (stderr, "  key.refcount=%d\n",
+		   (int) tgt->list[i].key->refcount);
+	  fprintf (stderr, "  key.virtual_refcount=%d\n",
+		   (int) tgt->list[i].key->virtual_refcount);
+	  fprintf (stderr, "  key.attach_count=%p\n",
+		   (void*) tgt->list[i].key->attach_count);
+	  fprintf (stderr, "  key.link_key=%p\n",
+		   (void*) tgt->list[i].key->link_key);
+	}
+    }
+  fprintf (stderr, "\n");
+}
+
+static void
+rc_check_clear (splay_tree_node node)
+{
+  splay_tree_key k = &node->key;
+
+  k->refcount_chk = 0;
+  k->tgt->refcount_chk = 0;
+  k->tgt->mark = false;
+
+  if (node->left)
+    rc_check_clear (node->left);
+  if (node->right)
+    rc_check_clear (node->right);
+}
+
+static void
+rc_check_count (splay_tree_node node)
+{
+  splay_tree_key k = &node->key;
+  struct target_mem_desc *t;
+
+  /* Add virtual reference counts ("acc enter data", etc.) for this key.  */
+  k->refcount_chk += k->virtual_refcount;
+
+  t = k->tgt;
+  t->refcount_chk++;
+
+  if (!t->mark)
+    {
+      for (int i = 0; i < t->list_count; i++)
+	if (t->list[i].key)
+	  t->list[i].key->refcount_chk++;
+
+      t->mark = true;
+    }
+
+  if (node->left)
+    rc_check_count (node->left);
+  if (node->right)
+    rc_check_count (node->right);
+}
+
+static bool
+rc_check_verify (splay_tree_node node, bool noisy, bool errors)
+{
+  splay_tree_key k = &node->key;
+  struct target_mem_desc *t;
+
+  if (k->refcount != REFCOUNT_INFINITY)
+    {
+      if (noisy)
+	fprintf (stderr, "key %p (%p..+%d): rc=%d/%d, virt_rc=%d\n", k,
+		 (void *) k->host_start, (int) (k->host_end - k->host_start),
+		 (int) k->refcount, (int) k->refcount_chk,
+		 (int) k->virtual_refcount);
+
+      if (k->refcount != k->refcount_chk)
+	{
+	  if (noisy)
+	    fprintf (stderr, "  -- key refcount mismatch!\n");
+	  errors = true;
+	}
+
+      t = k->tgt;
+
+      if (noisy)
+	fprintf (stderr, "tgt %p: rc=%d/%d\n", t, (int) t->refcount,
+		 (int) t->refcount_chk);
+
+      if (t->refcount != t->refcount_chk)
+	{
+	  if (noisy)
+	    fprintf (stderr,
+		     "  -- target memory descriptor refcount mismatch!\n");
+	  errors = true;
+	}
+    }
+
+  if (node->left)
+    errors |= rc_check_verify (node->left, noisy, errors);
+  if (node->right)
+    errors |= rc_check_verify (node->right, noisy, errors);
+
+  return errors;
+}
+
+/* Call with device locked.  */
+
+attribute_hidden void
+gomp_rc_check (struct gomp_device_descr *devicep, struct target_mem_desc *tgt)
+{
+  splay_tree sp = &devicep->mem_map;
+
+  bool noisy = getenv ("GOMP_DEBUG_TGT") != 0;
+
+  if (noisy)
+    fprintf (stderr, "\n*** GOMP_RC_CHECK ***\n\n");
+
+  if (sp->root)
+    {
+      gomp_mutex_lock (&devicep->openacc.async.au_lock);
+      struct async_tgt_use *async_uses = devicep->openacc.async.tgt_uses;
+
+      rc_check_clear (sp->root);
+
+      for (struct target_mem_desc *t = tgt; t; t = t->prev)
+	{
+	  t->refcount_chk = 0;
+	  t->mark = false;
+	}
+      for (struct async_tgt_use *au = async_uses; au; au = au->next)
+	{
+	  struct target_mem_desc *t = au->tgt;
+	  t->refcount_chk = 0;
+	  t->mark = false;
+	}
+
+      /* Add references for interconnected splay-tree keys.  */
+      rc_check_count (sp->root);
+
+      /* Add references for the tgt for a currently-executing kernel and/or
+	 any enclosing data directives.  */
+      for (struct target_mem_desc *t = tgt; t; t = t->prev)
+	{
+	  t->refcount_chk++;
+
+	  if (!t->mark)
+	    {
+	      for (int i = 0; i < t->list_count; i++)
+		if (t->list[i].key)
+		  t->list[i].key->refcount_chk++;
+
+	      t->mark = true;
+	    }
+	}
+
+      /* Add references from in-progress asynchronous operations.  */
+      for (struct async_tgt_use *au = async_uses; au; au = au->next)
+	{
+	  struct target_mem_desc *t = au->tgt;
+	  t->refcount_chk++;
+	}
+
+      if (rc_check_verify (sp->root, noisy, false))
+	{
+	  gomp_mutex_unlock (&devicep->lock);
+	  gomp_fatal ("refcount checking failure");
+	}
+      gomp_mutex_unlock (&devicep->openacc.async.au_lock);
+    }
+}
+#endif
+
 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
    gomp_map_0len_lookup found oldn for newn.
    Helper function of gomp_map_vars.  */
@@ -3274,6 +3469,10 @@  gomp_target_init (void)
 		current_device.type = current_device.get_type_func ();
 		current_device.mem_map.root = NULL;
 		current_device.state = GOMP_DEVICE_UNINITIALIZED;
+#ifdef RC_CHECKING
+		current_device.openacc.async.tgt_uses = NULL;
+		gomp_mutex_init (&current_device.openacc.async.au_lock);
+#endif
 
 		/* Augment DEVICES and NUM_DEVICES.  */
 		devices = gomp_realloc (devices,