diff mbox

[gomp4.5] Handle #pragma omp declare target link

Message ID 20151201172927.GA7692@msticlxl57.ims.intel.com
State New
Headers show

Commit Message

Ilya Verbin Dec. 1, 2015, 5:29 p.m. UTC
On Tue, Dec 01, 2015 at 14:15:59 +0100, Jakub Jelinek wrote:
> On Tue, Dec 01, 2015 at 11:48:51AM +0300, Ilya Verbin wrote:
> > > On 01 Dec 2015, at 11:18, Jakub Jelinek <jakub@redhat.com> wrote:
> > >> On Mon, Nov 30, 2015 at 11:55:20PM +0300, Ilya Verbin wrote:
> > >> Ok, but it doesn't solve the issue with doing it for the executable, because
> > >> gomp_unmap_tgt (n->tgt) will want to run free_func on uninitialized device.
> > > 
> > > ?? You mean that the
> > > devicep->unload_image_func (devicep->target_id, version, target_data);
> > > call deinitializes the device or something else (I mean, if there is some
> > > other tgt, then it had to be initialized)?
> > 
> > No, I mean that it can be deinitialized from plugin's __run_exit_handlers (see my last mail with the patch).
> 
> Then the bug is that you have too many atexit registered handlers that
> perform some finalization, better would be to have a single one that
> performs everything in order.
> 
> Anyway, the other option is in the atexit handlers (liboffloadmic and/or the
> intelmic plugin) to set some flag and ignore free_func calls when the flag
> is set or something like that.
> 
> Note library destructors can also use OpenMP code in them, similarly C++
> dtors etc., so when you at some point finalize certain device, you should
> arrange for newer events on the device to be ignored and new offloadings to
> go to host fallback.

So I guess the decision to do host fallback should be made in resolve_device,
rather than in plugins (in free_func and all others).  Is this patch OK?
make check-target-libgomp pass both using emul and hw, offloading from dlopened
libs also works fine.


libgomp/
	* target.c (finalized): New static variable.
	(resolve_device): Do nothing when finalized is true.
	(GOMP_offload_register_ver): Likewise.
	(GOMP_offload_unregister_ver): Likewise.
	(gomp_target_fini): New static function.
	(gomp_target_init): Call gomp_target_fini at exit.
liboffloadmic/
	* plugin/libgomp-plugin-intelmic.cpp (unregister_main_image): Remove.
	(register_main_image): Do not call unregister_main_image at exit.
	(GOMP_OFFLOAD_fini_device): Allow for OpenMP.  Unregister main image.




  -- Ilya

Comments

Jakub Jelinek Dec. 1, 2015, 7:05 p.m. UTC | #1
On Tue, Dec 01, 2015 at 08:29:27PM +0300, Ilya Verbin wrote:
> libgomp/
> 	* target.c (finalized): New static variable.
> 	(resolve_device): Do nothing when finalized is true.
> 	(GOMP_offload_register_ver): Likewise.
> 	(GOMP_offload_unregister_ver): Likewise.
> 	(gomp_target_fini): New static function.
> 	(gomp_target_init): Call gomp_target_fini at exit.
> liboffloadmic/
> 	* plugin/libgomp-plugin-intelmic.cpp (unregister_main_image): Remove.
> 	(register_main_image): Do not call unregister_main_image at exit.
> 	(GOMP_OFFLOAD_fini_device): Allow for OpenMP.  Unregister main image.
> 
> diff --git a/libgomp/target.c b/libgomp/target.c
> index cf9d0e6..320178e 100644
> --- a/libgomp/target.c
> +++ b/libgomp/target.c
> @@ -78,6 +78,10 @@ static int num_devices;
>  /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices.  */
>  static int num_devices_openmp;
>  
> +/* True when offloading runtime is finalized.  */
> +static bool finalized;


> +
> +
>  /* Similar to gomp_realloc, but release register_lock before gomp_fatal.  */
>  
>  static void *
> @@ -108,6 +112,9 @@ gomp_get_num_devices (void)
>  static struct gomp_device_descr *
>  resolve_device (int device_id)
>  {
> +  if (finalized)
> +    return NULL;
> +

This is racy, tsan would tell you so.
Instead of a global var, I'd just change the devicep->is_initialized 
field from bool into a 3 state field (perhaps enum), with states
uninitialized, initialized, finalized, and then say in resolve_device,

  gomp_mutex_lock (&devices[device_id].lock);
  if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
    gomp_init_device (&devices[device_id]);
  else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
    {
      gomp_mutex_unlock (&devices[device_id].lock);
      return NULL;
    }
  gomp_mutex_unlock (&devices[device_id].lock);

Though, of course, that is incomplete, because resolve_device takes one
lock, gomp_get_target_fn_addr another one, gomp_map_vars yet another one.
So I think either we want to rewrite the locking, such that say
resolve_device returns a locked device and then you perform stuff on the
locked device (disadvantage is that gomp_map_vars will call gomp_malloc
with the lock held, which can take some time to allocate the memory),
or there needs to be the possibility that gomp_map_vars rechecks if the
device has not been finalized after taking the lock and returns to the
caller if the device has been finalized in between resolve_device and
gomp_map_vars.

	Jakub
diff mbox

Patch

diff --git a/libgomp/target.c b/libgomp/target.c
index cf9d0e6..320178e 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -78,6 +78,10 @@  static int num_devices;
 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices.  */
 static int num_devices_openmp;
 
+/* True when offloading runtime is finalized.  */
+static bool finalized;
+
+
 /* Similar to gomp_realloc, but release register_lock before gomp_fatal.  */
 
 static void *
@@ -108,6 +112,9 @@  gomp_get_num_devices (void)
 static struct gomp_device_descr *
 resolve_device (int device_id)
 {
+  if (finalized)
+    return NULL;
+
   if (device_id == GOMP_DEVICE_ICV)
     {
       struct gomp_task_icv *icv = gomp_icv (false);
@@ -1095,6 +1102,9 @@  GOMP_offload_register_ver (unsigned version, const void *host_table,
 {
   int i;
 
+  if (finalized)
+    return;
+
   if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
     gomp_fatal ("Library too old for offload (version %u < %u)",
 		GOMP_VERSION, GOMP_VERSION_LIB (version));
@@ -1143,6 +1153,9 @@  GOMP_offload_unregister_ver (unsigned version, const void *host_table,
 {
   int i;
 
+  if (finalized)
+    return;
+
   gomp_mutex_lock (&register_lock);
 
   /* Unload image from all initialized devices.  */
@@ -2282,6 +2295,24 @@  gomp_load_plugin_for_device (struct gomp_device_descr *device,
   return 0;
 }
 
+/* This function finalizes the runtime needed for offloading and all initialized
+   devices.  */
+
+static void
+gomp_target_fini (void)
+{
+  finalized = true;
+
+  int i;
+  for (i = 0; i < num_devices; i++)
+    {
+      struct gomp_device_descr *devicep = &devices[i];
+      gomp_mutex_lock (&devicep->lock);
+      gomp_fini_device (devicep);
+      gomp_mutex_unlock (&devicep->lock);
+    }
+}
+
 /* This function initializes the runtime needed for offloading.
    It parses the list of offload targets and tries to load the plugins for
    these targets.  On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
@@ -2387,6 +2418,9 @@  gomp_target_init (void)
       if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
 	goacc_register (&devices[i]);
     }
+
+  if (atexit (gomp_target_fini) != 0)
+    gomp_fatal ("atexit failed");
 }
 
 #else /* PLUGIN_SUPPORT */
diff --git a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
index f8c1725..68f7b2c 100644
--- a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
+++ b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
@@ -231,12 +231,6 @@  offload (const char *file, uint64_t line, int device, const char *name,
 }
 
 static void
-unregister_main_image ()
-{
-  __offload_unregister_image (&main_target_image);
-}
-
-static void
 register_main_image ()
 {
   /* Do not check the return value, because old versions of liboffloadmic did
@@ -246,12 +240,6 @@  register_main_image ()
   /* liboffloadmic will call GOMP_PLUGIN_target_task_completion when
      asynchronous task on target is completed.  */
   __offload_register_task_callback (GOMP_PLUGIN_target_task_completion);
-
-  if (atexit (unregister_main_image) != 0)
-    {
-      fprintf (stderr, "%s: atexit failed\n", __FILE__);
-      exit (1);
-    }
 }
 
 /* liboffloadmic loads and runs offload_target_main on all available devices
@@ -269,8 +257,9 @@  extern "C" void
 GOMP_OFFLOAD_fini_device (int device)
 {
   TRACE ("(device = %d)", device);
-  /* Unreachable for GOMP_OFFLOAD_CAP_OPENMP_400.  */
-  abort ();
+
+  /* liboffloadmic will finalize target processes on all available devices.  */
+  __offload_unregister_image (&main_target_image);
 }
 
 static void