diff mbox series

[2/2,og8] OpenACC reference count consistency checking

Message ID a2ad700fe445996408136d9bf6f2cbf967f232b6.1543438190.git.julian@codesourcery.com
State New
Headers show
Series Further OpenACC/libgomp refcounting fixes | expand

Commit Message

Julian Brown Nov. 28, 2018, 9:22 p.m. UTC
This is the reference count consistency-checking code.  The model used
for checking is as follows.

 1. Each splay tree key that references a target memory descriptor
    increases that descriptor's refcount by 1.

 2. Each variable listed in a target memory descriptor that links back to a
    splay tree key increases that key's refcount by 1. Each target memory
    descriptor's variable list is counted only once, even if multiple
    splay tree keys point to it (via their "tgt" field).

 3. Additional ("real") target memory descriptors may be present
    representing data mapped through "acc data" or "acc parallel/kernels"
    blocks.  These descriptors have their refcount bumped, and the
    variables linked through such blocks have their refcounts bumped also
    (again, with "once only" semantics).

 4. Asynchronous operations "artificially" bump the reference counts for
    referenced target memory descriptors (but *not* for linked
    variables/splay tree keys), in order to delay freeing mapped device
    memory until the asynchronous operation has completed.  We model this,
    for checking purposes only, using an off-side linked list.

 5. "Virtual" reference counts ("virtual_refcount") cannot be checked
    purely statically, so we add the incoming value to each key's
    statically-determined reference count ("refcount_chk"), and make
    sure that the total matches the incoming reference count ("refcount").

With the previous patch, as noted in the parent email, this allows a
libgomp test run to complete successfully (with checking enabled).

Julian

ChangeLog

	libgomp/
	* libgomp.h (RC_CHECKING): New macro, disabled by default, guarding all
	hunks in this patch.
	(target_mem_desc): Add forward declaration.
	(async_tgt_use): New struct.
	(target_mem_desc): Add refcount_chk, mark fields.
	(acc_dispatch_t): Add tgt_uses, au_lock fields.
	(dump_tgt, gomp_rc_check): Add prototypes.
	* oacc-async (goacc_async_unmap_tgt): Add refcount self-check code.
	(goacc_async_copyout_unmap_vars): Likewise.
	(goacc_remove_var_async): Likewise.
	* oacc-parallel.c (GOACC_parallel_keyed_internal): Add refcount
	self-check code.
	(GOACC_data_start, GOACC_data_end, GOACC_enter_exit_data): Likewise.
	* target.c (stdio.h): Include.
	(dump_tgt, rc_check_clear, rc_check_count, rc_check_verify)
	(gomp_rc_check): New functions to consistency-check reference counts.
	(gomp_target_init): Initialise self-check-related device fields.
---
 libgomp/libgomp.h       |   33 ++++++++-
 libgomp/oacc-async.c    |   46 +++++++++++
 libgomp/oacc-parallel.c |   33 ++++++++
 libgomp/target.c        |  199 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 310 insertions(+), 1 deletions(-)
diff mbox series

Patch

diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index ea44afc..77cc923 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -814,9 +814,26 @@  struct target_var_desc {
   uintptr_t length;
 };
 
+/* Uncomment to enable reference-count consistency checking (for development
+   use only).  */
+/*#define RC_CHECKING 1*/
+
+#ifdef RC_CHECKING
+struct target_mem_desc;
+
+struct async_tgt_use {
+  struct target_mem_desc *tgt;
+  struct async_tgt_use *next;
+};
+#endif
+
 struct target_mem_desc {
   /* Reference count.  */
   uintptr_t refcount;
+#ifdef RC_CHECKING
+  uintptr_t refcount_chk;
+  bool mark;
+#endif
   /* All the splay nodes allocated together.  */
   splay_tree_node array;
   /* Start of the target region.  */
@@ -865,6 +882,10 @@  struct splay_tree_key_s {
      "present increment" operations (via "acc enter data") refering to the same
      host-memory block.  */
   uintptr_t virtual_refcount;
+#ifdef RC_CHECKING
+  /* The recalculated reference count, for verification.  */
+  uintptr_t refcount_chk;
+#endif
   /* For a block with attached pointers, the attachment counters for each.  */
   unsigned short *attach_count;
   /* Pointer to the original mapping of "omp declare target link" object.  */
@@ -899,7 +920,11 @@  typedef struct acc_dispatch_t
     int nasyncqueue;
     struct goacc_asyncqueue **asyncqueue;
     struct goacc_asyncqueue_list *active;
-    
+#ifdef RC_CHECKING
+    struct async_tgt_use *tgt_uses;
+    gomp_mutex_t au_lock;
+#endif
+
     __typeof (GOMP_OFFLOAD_openacc_async_construct) *construct_func;
     __typeof (GOMP_OFFLOAD_openacc_async_destruct) *destruct_func;
     __typeof (GOMP_OFFLOAD_openacc_async_test) *test_func;
@@ -1028,6 +1053,12 @@  extern void gomp_detach_pointer (struct gomp_device_descr *,
 				 struct goacc_asyncqueue *, splay_tree_key,
 				 uintptr_t, bool, struct gomp_coalesce_buf *);
 
+#ifdef RC_CHECKING
+extern void dump_tgt (const char *, struct target_mem_desc *);
+extern void gomp_rc_check (struct gomp_device_descr *,
+			   struct target_mem_desc *);
+#endif
+
 extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
 					      size_t, void **, void **,
 					      size_t *, void *, bool,
diff --git a/libgomp/oacc-async.c b/libgomp/oacc-async.c
index be47222..6992957 100644
--- a/libgomp/oacc-async.c
+++ b/libgomp/oacc-async.c
@@ -365,6 +365,29 @@  goacc_async_unmap_tgt (void *ptr)
 {
   struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
 
+#ifdef RC_CHECKING
+  {
+    struct gomp_device_descr *devicep = tgt->device_descr;
+    struct async_tgt_use *aup, *au;
+    gomp_mutex_lock (&devicep->openacc.async.au_lock);
+    /* Remove tgt from asynchronous-use list.  */
+    for (aup = NULL, au = devicep->openacc.async.tgt_uses; au;
+	 aup = au, au = au->next)
+      if (au->tgt == tgt)
+	{
+	  if (aup)
+	    aup->next = au->next;
+	  else
+	    devicep->openacc.async.tgt_uses = au->next;
+	  free (au);
+	  break;
+	}
+    if (!au)
+      gomp_fatal ("can't find tgt %p to remove in async list", tgt);
+    gomp_mutex_unlock (&devicep->openacc.async.au_lock);
+  }
+#endif
+
   if (tgt->refcount > 1)
     tgt->refcount--;
   else
@@ -380,6 +403,18 @@  goacc_async_copyout_unmap_vars (struct target_mem_desc *tgt,
   /* Increment reference to delay freeing of device memory until callback
      has triggered.  */
   tgt->refcount++;
+
+#ifdef RC_CHECKING
+  {
+    struct async_tgt_use *au = malloc (sizeof (struct async_tgt_use));
+    gomp_mutex_lock (&devicep->openacc.async.au_lock);
+    /* Record the asynchronous use of this target_mem_desc.  */
+    au->next = devicep->openacc.async.tgt_uses;
+    au->tgt = tgt;
+    devicep->openacc.async.tgt_uses = au;
+    gomp_mutex_unlock (&devicep->openacc.async.au_lock);
+  }
+#endif
   gomp_unmap_vars_async (tgt, true, aq);
   devicep->openacc.async.queue_callback_func (aq, goacc_async_unmap_tgt,
 					      (void *) tgt);
@@ -398,6 +433,17 @@  goacc_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key n,
   struct target_mem_desc *tgt = n->tgt;
   assert (tgt);
   tgt->refcount++;
+#ifdef RC_CHECKING
+  {
+    gomp_mutex_lock (&devicep->openacc.async.au_lock);
+    struct async_tgt_use *au = malloc (sizeof (struct async_tgt_use));
+    /* Record the asynchronous use of this target_mem_desc.  */
+    au->next = devicep->openacc.async.tgt_uses;
+    au->tgt = tgt;
+    devicep->openacc.async.tgt_uses = au;
+    gomp_mutex_unlock (&devicep->openacc.async.au_lock);
+  }
+#endif
   gomp_remove_var (devicep, n);
   devicep->openacc.async.queue_callback_func (aq, goacc_async_unmap_tgt,
                                              (void *) tgt);
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index a4487b8..c74221f 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -375,6 +375,15 @@  GOACC_parallel_keyed_internal (int device, int params, void (*fn) (void *),
 				&api_info);
     }
 
+#ifdef RC_CHECKING
+  gomp_mutex_lock (&acc_dev->lock);
+  assert (tgt);
+  dump_tgt (__FUNCTION__, tgt);
+  tgt->prev = thr->mapped_data;
+  gomp_rc_check (acc_dev, tgt);
+  gomp_mutex_unlock (&acc_dev->lock);
+#endif
+
   devaddrs = gomp_alloca (sizeof (void *) * mapnum);
   for (i = 0; i < mapnum; i++)
     devaddrs[i] = (void *) gomp_map_val (tgt, hostaddrs, i);
@@ -418,6 +427,12 @@  GOACC_parallel_keyed_internal (int device, int params, void (*fn) (void *),
       goacc_async_copyout_unmap_vars (tgt, aq);
     }
 
+#ifdef RC_CHECKING
+  gomp_mutex_lock (&acc_dev->lock);
+  gomp_rc_check (acc_dev, thr->mapped_data);
+  gomp_mutex_unlock (&acc_dev->lock);
+#endif
+
  out:
   if (profiling_dispatch_p)
     {
@@ -589,6 +604,12 @@  GOACC_data_start (int device, size_t mapnum,
       thr->prof_info = NULL;
       thr->api_info = NULL;
     }
+
+#ifdef RC_CHECKING
+  gomp_mutex_lock (&acc_dev->lock);
+  gomp_rc_check (acc_dev, thr->mapped_data);
+  gomp_mutex_unlock (&acc_dev->lock);
+#endif
 }
 
 void
@@ -664,6 +685,12 @@  GOACC_data_end (void)
       thr->prof_info = NULL;
       thr->api_info = NULL;
     }
+
+#ifdef RC_CHECKING
+  gomp_mutex_lock (&acc_dev->lock);
+  gomp_rc_check (acc_dev, thr->mapped_data);
+  gomp_mutex_unlock (&acc_dev->lock);
+#endif
 }
 
 void
@@ -1023,6 +1050,12 @@  GOACC_enter_exit_data (int device, size_t mapnum,
 	}
     }
 
+#ifdef RC_CHECKING
+  gomp_mutex_lock (&acc_dev->lock);
+  gomp_rc_check (acc_dev, thr->mapped_data);
+  gomp_mutex_unlock (&acc_dev->lock);
+#endif
+
  out:
   if (profiling_dispatch_p)
     {
diff --git a/libgomp/target.c b/libgomp/target.c
index 91139a6..d6b67f8 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -40,6 +40,9 @@ 
 #include <assert.h>
 #include <errno.h>
 #include <limits.h>
+#ifdef RC_CHECKING
+#include <stdio.h>
+#endif
 
 #ifdef PLUGIN_SUPPORT
 #include <dlfcn.h>
@@ -361,6 +364,198 @@  gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
     }
 }
 
+#ifdef RC_CHECKING
+void
+dump_tgt (const char *where, struct target_mem_desc *tgt)
+{
+  if (!getenv ("GOMP_DEBUG_TGT"))
+    return;
+
+  fprintf (stderr, "%s: %s: tgt=%p\n", __FUNCTION__, where, (void*) tgt);
+  fprintf (stderr, "refcount=%d\n", (int) tgt->refcount);
+  fprintf (stderr, "tgt_start=%p\n", (void*) tgt->tgt_start);
+  fprintf (stderr, "tgt_end=%p\n", (void*) tgt->tgt_end);
+  fprintf (stderr, "to_free=%p\n", tgt->to_free);
+  fprintf (stderr, "list_count=%d\n", (int) tgt->list_count);
+  for (int i = 0; i < tgt->list_count; i++)
+    {
+      fprintf (stderr, "list item %d:\n", i);
+      fprintf (stderr, "  key: %p\n", (void*) tgt->list[i].key);
+      if (tgt->list[i].key)
+        {
+	  fprintf (stderr, "  key.host_start=%p\n",
+		   (void*) tgt->list[i].key->host_start);
+	  fprintf (stderr, "  key.host_end=%p\n",
+		   (void*) tgt->list[i].key->host_end);
+	  fprintf (stderr, "  key.tgt=%p\n", (void*) tgt->list[i].key->tgt);
+	  fprintf (stderr, "  key.offset=%d\n",
+		   (int) tgt->list[i].key->tgt_offset);
+	  fprintf (stderr, "  key.refcount=%d\n",
+		   (int) tgt->list[i].key->refcount);
+	  fprintf (stderr, "  key.virtual_refcount=%d\n",
+		   (int) tgt->list[i].key->virtual_refcount);
+	  fprintf (stderr, "  key.attach_count=%p\n",
+		   (void*) tgt->list[i].key->attach_count);
+	  fprintf (stderr, "  key.link_key=%p\n",
+		   (void*) tgt->list[i].key->link_key);
+	}
+    }
+  fprintf (stderr, "\n");
+}
+
+static void
+rc_check_clear (splay_tree_node node)
+{
+  splay_tree_key k = &node->key;
+
+  k->refcount_chk = 0;
+  k->tgt->refcount_chk = 0;
+  k->tgt->mark = false;
+
+  if (node->left)
+    rc_check_clear (node->left);
+  if (node->right)
+    rc_check_clear (node->right);
+}
+
+static void
+rc_check_count (splay_tree_node node)
+{
+  splay_tree_key k = &node->key;
+  struct target_mem_desc *t;
+
+  /* Add virtual reference counts ("acc enter data", etc.) for this key.  */
+  k->refcount_chk += k->virtual_refcount;
+
+  t = k->tgt;
+  t->refcount_chk++;
+
+  if (!t->mark)
+    {
+      for (int i = 0; i < t->list_count; i++)
+	if (t->list[i].key)
+	  t->list[i].key->refcount_chk++;
+
+      t->mark = true;
+    }
+
+  if (node->left)
+    rc_check_count (node->left);
+  if (node->right)
+    rc_check_count (node->right);
+}
+
+static bool
+rc_check_verify (splay_tree_node node, bool noisy, bool errors)
+{
+  splay_tree_key k = &node->key;
+  struct target_mem_desc *t;
+
+  if (k->refcount != REFCOUNT_INFINITY)
+    {
+      if (noisy)
+	fprintf (stderr, "key %p (%p..+%d): rc=%d/%d, virt_rc=%d\n", k,
+		 (void *) k->host_start, (int) (k->host_end - k->host_start),
+		 (int) k->refcount, (int) k->refcount_chk,
+		 (int) k->virtual_refcount);
+
+      if (k->refcount != k->refcount_chk)
+        {
+	  if (noisy)
+	    fprintf (stderr, "  -- key refcount mismatch!\n");
+	  errors = true;
+	}
+
+      t = k->tgt;
+
+      if (noisy)
+	fprintf (stderr, "tgt %p: rc=%d/%d\n", t, (int) t->refcount,
+		 (int) t->refcount_chk);
+
+      if (t->refcount != t->refcount_chk)
+        {
+	  if (noisy)
+	    fprintf (stderr,
+		     "  -- target memory descriptor refcount mismatch!\n");
+	  errors = true;
+	}
+    }
+
+  if (node->left)
+    errors |= rc_check_verify (node->left, noisy, errors);
+  if (node->right)
+    errors |= rc_check_verify (node->right, noisy, errors);
+
+  return errors;
+}
+
+/* Call with device locked.  */
+
+attribute_hidden void
+gomp_rc_check (struct gomp_device_descr *devicep, struct target_mem_desc *tgt)
+{
+  splay_tree sp = &devicep->mem_map;
+
+  bool noisy = getenv ("GOMP_DEBUG_TGT") != 0;
+
+  if (noisy)
+    fprintf (stderr, "\n*** GOMP_RC_CHECK ***\n\n");
+
+  if (sp->root)
+    {
+      gomp_mutex_lock (&devicep->openacc.async.au_lock);
+      struct async_tgt_use *async_uses = devicep->openacc.async.tgt_uses;
+
+      rc_check_clear (sp->root);
+
+      for (struct target_mem_desc *t = tgt; t; t = t->prev)
+	{
+	  t->refcount_chk = 0;
+	  t->mark = false;
+	}
+      for (struct async_tgt_use *au = async_uses; au; au = au->next)
+        {
+	  struct target_mem_desc *t = au->tgt;
+	  t->refcount_chk = 0;
+	  t->mark = false;
+	}
+
+      /* Add references for interconnected splay-tree keys.  */
+      rc_check_count (sp->root);
+
+      /* Add references for the tgt for a currently-executing kernel and/or
+	 any enclosing data directives.  */
+      for (struct target_mem_desc *t = tgt; t; t = t->prev)
+	{
+	  t->refcount_chk++;
+
+	  if (!t->mark)
+	    {
+	      for (int i = 0; i < t->list_count; i++)
+		if (t->list[i].key)
+		  t->list[i].key->refcount_chk++;
+
+	      t->mark = true;
+	    }
+	}
+
+      /* Add references from in-progress asynchronous operations.  */
+      for (struct async_tgt_use *au = async_uses; au; au = au->next)
+        {
+	  struct target_mem_desc *t = au->tgt;
+	  t->refcount_chk++;
+	}
+
+      if (rc_check_verify (sp->root, noisy, false))
+        {
+	  gomp_mutex_unlock (&devicep->lock);
+	  gomp_fatal ("refcount checking failure");
+	}
+      gomp_mutex_unlock (&devicep->openacc.async.au_lock);
+    }
+}
+#endif
+
 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
    gomp_map_0len_lookup found oldn for newn.
    Helper function of gomp_map_vars.  */
@@ -3622,6 +3817,10 @@  gomp_target_init (void)
 		current_device.type = current_device.get_type_func ();
 		current_device.mem_map.root = NULL;
 		current_device.state = GOMP_DEVICE_UNINITIALIZED;
+#ifdef RC_CHECKING
+		current_device.openacc.async.tgt_uses = NULL;
+		gomp_mutex_init (&current_device.openacc.async.au_lock);
+#endif
 
 		/* Augment DEVICES and NUM_DEVICES.  */
 		devices = gomp_realloc (devices,