diff mbox

[7/10] OpenACC 2.0 support for libgomp - OpenACC runtime, NVidia PTX/CUDA plugin

Message ID 874mrw2hfp.fsf@kepler.schwinge.homeip.net
State New
Headers show

Commit Message

Thomas Schwinge Jan. 12, 2015, 2:37 p.m. UTC
Hi!

On Tue, 23 Sep 2014 19:19:31 +0100, Julian Brown <julian@codesourcery.com> wrote:
> This patch contains the bulk of the OpenACC 2.0 runtime support, [...]

> --- /dev/null
> +++ b/libgomp/libgomp-plugin.c
> @@ -0,0 +1,106 @@

> +/* Exported (non-hidden) functions exposing libgomp interface for plugins.  */

> +void
> +gomp_plugin_mutex_init (gomp_mutex_t *mutex)
> +{
> +  gomp_mutex_init (mutex);
> +}
> +
> +void
> +gomp_plugin_mutex_destroy (gomp_mutex_t *mutex)
> +{
> +  gomp_mutex_destroy (mutex);
> +}
> +
> +void
> +gomp_plugin_mutex_lock (gomp_mutex_t *mutex)
> +{
> +  gomp_mutex_lock (mutex);
> +}
> +
> +void
> +gomp_plugin_mutex_unlock (gomp_mutex_t *mutex)
> +{
> +  gomp_mutex_unlock (mutex);
> +}

> --- a/libgomp/libgomp.map
> +++ b/libgomp/libgomp.map

> +PLUGIN_1.0 {
> +  global:

> +	gomp_plugin_mutex_init;
> +	gomp_plugin_mutex_destroy;
> +	gomp_plugin_mutex_lock;
> +	gomp_plugin_mutex_unlock;

> +};

> --- /dev/null
> +++ b/libgomp/plugin-nvptx.c
> @@ -0,0 +1,1854 @@
> +/* Plugin for NVPTX execution.

> +#include "libgomp.h"

Plugins in libgomp are not to depend on libgomp internals (libgomp.h),
and given that...

> +struct PTX_device
> +{

> +  /* A lock for use when manipulating the above stream list and array.  */
> +  gomp_mutex_t stream_lock;

> +};

> +static gomp_mutex_t PTX_event_lock;

> +static void
> +init_streams_for_device (struct PTX_device *ptx_dev, int concurrency)
> +{

> +  gomp_plugin_mutex_init (&ptx_dev->stream_lock);

> +}
> +[...]

... it much more makes sense to just use pthread mutexes here.  Committed
to gomp-4_0-branch in r219467:

commit 4de7ea8222739fa60d6eb81284dac61dc2bae7b2
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Mon Jan 12 14:35:51 2015 +0000

    libgomp: Use pthread mutexes in the nvptx plugin.
    
    ... instead of libgomp's internal mutex implementation.  Plugins aren't to
    depend on internal libgomp interfaces, and how would you instantiate a
    gomp_mutex_t in a plugin without knowing what it is exactly?
    
    	libgomp/
    	* plugin/plugin-nvptx.c (struct ptx_device): Turn stream_lock
    	member into a pthread_mutex_t.  Adjust all users.
    	(ptx_event_lock): Likewise.
    	* libgomp-plugin.c (GOMP_PLUGIN_mutex_init)
    	(GOMP_PLUGIN_mutex_destroy, GOMP_PLUGIN_mutex_lock)
    	(GOMP_PLUGIN_mutex_unlock): Remove.
    	* libgomp-plugin.h (GOMP_PLUGIN_mutex_init)
    	(GOMP_PLUGIN_mutex_destroy, GOMP_PLUGIN_mutex_lock)
    	(GOMP_PLUGIN_mutex_unlock): Likewise.
    	* libgomp.map (GOMP_PLUGIN_1.0): Remove GOMP_PLUGIN_mutex_init,
    	GOMP_PLUGIN_mutex_destroy, GOMP_PLUGIN_mutex_lock,
    	GOMP_PLUGIN_mutex_unlock.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@219467 138bc75d-0d04-0410-961f-82ee72b054a4
---
 libgomp/ChangeLog.gomp        | 15 +++++++++++++++
 libgomp/libgomp-plugin.c      | 24 ------------------------
 libgomp/libgomp-plugin.h      |  7 -------
 libgomp/libgomp.map           |  4 ----
 libgomp/plugin/plugin-nvptx.c | 39 ++++++++++++++++++++-------------------
 5 files changed, 35 insertions(+), 54 deletions(-)



Grüße,
 Thomas
diff mbox

Patch

diff --git libgomp/ChangeLog.gomp libgomp/ChangeLog.gomp
index 745b836..d955a85 100644
--- libgomp/ChangeLog.gomp
+++ libgomp/ChangeLog.gomp
@@ -1,3 +1,18 @@ 
+2015-01-12  Thomas Schwinge  <thomas@codesourcery.com>
+
+	* plugin/plugin-nvptx.c (struct ptx_device): Turn stream_lock
+	member into a pthread_mutex_t.  Adjust all users.
+	(ptx_event_lock): Likewise.
+	* libgomp-plugin.c (GOMP_PLUGIN_mutex_init)
+	(GOMP_PLUGIN_mutex_destroy, GOMP_PLUGIN_mutex_lock)
+	(GOMP_PLUGIN_mutex_unlock): Remove.
+	* libgomp-plugin.h (GOMP_PLUGIN_mutex_init)
+	(GOMP_PLUGIN_mutex_destroy, GOMP_PLUGIN_mutex_lock)
+	(GOMP_PLUGIN_mutex_unlock): Likewise.
+	* libgomp.map (GOMP_PLUGIN_1.0): Remove GOMP_PLUGIN_mutex_init,
+	GOMP_PLUGIN_mutex_destroy, GOMP_PLUGIN_mutex_lock,
+	GOMP_PLUGIN_mutex_unlock.
+
 2014-12-22  Thomas Schwinge  <thomas@codesourcery.com>
 
 	* libgomp.c (struct gomp_device_descr): Add lock member.
diff --git libgomp/libgomp-plugin.c libgomp/libgomp-plugin.c
index 0026270..77e250e 100644
--- libgomp/libgomp-plugin.c
+++ libgomp/libgomp-plugin.c
@@ -82,27 +82,3 @@  GOMP_PLUGIN_fatal (const char *msg, ...)
   /* Unreachable.  */
   abort ();
 }
-
-void
-GOMP_PLUGIN_mutex_init (gomp_mutex_t *mutex)
-{
-  gomp_mutex_init (mutex);
-}
-
-void
-GOMP_PLUGIN_mutex_destroy (gomp_mutex_t *mutex)
-{
-  gomp_mutex_destroy (mutex);
-}
-
-void
-GOMP_PLUGIN_mutex_lock (gomp_mutex_t *mutex)
-{
-  gomp_mutex_lock (mutex);
-}
-
-void
-GOMP_PLUGIN_mutex_unlock (gomp_mutex_t *mutex)
-{
-  gomp_mutex_unlock (mutex);
-}
diff --git libgomp/libgomp-plugin.h libgomp/libgomp-plugin.h
index 051d4e2..2e2be1f 100644
--- libgomp/libgomp-plugin.h
+++ libgomp/libgomp-plugin.h
@@ -29,8 +29,6 @@ 
 #ifndef LIBGOMP_PLUGIN_H
 #define LIBGOMP_PLUGIN_H 1
 
-#include "mutex.h"
-
 extern void *GOMP_PLUGIN_malloc (size_t) __attribute__((malloc));
 extern void *GOMP_PLUGIN_malloc_cleared (size_t) __attribute__((malloc));
 extern void *GOMP_PLUGIN_realloc (void *, size_t);
@@ -42,9 +40,4 @@  extern void GOMP_PLUGIN_error (const char *, ...)
 extern void GOMP_PLUGIN_fatal (const char *, ...)
 	__attribute__((noreturn, format (printf, 1, 2)));
 
-extern void GOMP_PLUGIN_mutex_init (gomp_mutex_t *);
-extern void GOMP_PLUGIN_mutex_destroy (gomp_mutex_t *);
-extern void GOMP_PLUGIN_mutex_lock (gomp_mutex_t *);
-extern void GOMP_PLUGIN_mutex_unlock (gomp_mutex_t *);
-
 #endif
diff --git libgomp/libgomp.map libgomp/libgomp.map
index aa1fdb8..bfdb78c 100644
--- libgomp/libgomp.map
+++ libgomp/libgomp.map
@@ -334,10 +334,6 @@  GOMP_PLUGIN_1.0 {
 	GOMP_PLUGIN_error;
 	GOMP_PLUGIN_debug;
 	GOMP_PLUGIN_fatal;
-	GOMP_PLUGIN_mutex_init;
-	GOMP_PLUGIN_mutex_destroy;
-	GOMP_PLUGIN_mutex_lock;
-	GOMP_PLUGIN_mutex_unlock;
 	GOMP_PLUGIN_async_unmap_vars;
 	GOMP_PLUGIN_acc_thread;
 };
diff --git libgomp/plugin/plugin-nvptx.c libgomp/plugin/plugin-nvptx.c
index 593b1a9..f92ff40 100644
--- libgomp/plugin/plugin-nvptx.c
+++ libgomp/plugin/plugin-nvptx.c
@@ -39,6 +39,7 @@ 
 #include "oacc-ptx.h"
 #include "oacc-plugin.h"
 
+#include <pthread.h>
 #include <cuda.h>
 #include <stdint.h>
 #include <string.h>
@@ -302,7 +303,7 @@  struct ptx_device
     int size;
   } async_streams;
   /* A lock for use when manipulating the above stream list and array.  */
-  gomp_mutex_t stream_lock;
+  pthread_mutex_t stream_lock;
   int ord;
   bool overlap;
   bool map;
@@ -331,7 +332,7 @@  struct ptx_event
   struct ptx_event *next;
 };
 
-static gomp_mutex_t ptx_event_lock;
+static pthread_mutex_t ptx_event_lock;
 static struct ptx_event *ptx_events;
 
 #define _XSTR(s) _STR(s)
@@ -424,7 +425,7 @@  init_streams_for_device (struct ptx_device *ptx_dev, int concurrency)
   ptx_dev->null_stream = null_stream;
 
   ptx_dev->active_streams = NULL;
-  GOMP_PLUGIN_mutex_init (&ptx_dev->stream_lock);
+  pthread_mutex_init (&ptx_dev->stream_lock, NULL);
 
   if (concurrency < 1)
     concurrency = 1;
@@ -484,7 +485,7 @@  select_stream_for_async (int async, pthread_t thread, bool create,
     async++;
 
   if (create)
-    GOMP_PLUGIN_mutex_lock (&ptx_dev->stream_lock);
+    pthread_mutex_lock (&ptx_dev->stream_lock);
 
   /* NOTE: AFAICT there's no particular need for acc_async_sync to map to the
      null stream, and in fact better performance may be obtainable if it doesn't
@@ -566,7 +567,7 @@  select_stream_for_async (int async, pthread_t thread, bool create,
       if (thread != stream->host_thread)
         stream->multithreaded = true;
 
-      GOMP_PLUGIN_mutex_unlock (&ptx_dev->stream_lock);
+      pthread_mutex_unlock (&ptx_dev->stream_lock);
     }
   else if (stream && !stream->multithreaded
 	   && !pthread_equal (stream->host_thread, thread))
@@ -597,7 +598,7 @@  nvptx_init (void)
 
   ptx_events = NULL;
 
-  GOMP_PLUGIN_mutex_init (&ptx_event_lock);
+  pthread_mutex_init (&ptx_event_lock, NULL);
 
   ptx_inited = true;
 
@@ -822,7 +823,7 @@  event_gc (bool memmap_lockable)
   struct ptx_event *ptx_event = ptx_events;
   struct nvptx_thread *nvthd = nvptx_thread ();
 
-  GOMP_PLUGIN_mutex_lock (&ptx_event_lock);
+  pthread_mutex_lock (&ptx_event_lock);
 
   while (ptx_event != NULL)
     {
@@ -883,7 +884,7 @@  event_gc (bool memmap_lockable)
 	}
     }
 
-  GOMP_PLUGIN_mutex_unlock (&ptx_event_lock);
+  pthread_mutex_unlock (&ptx_event_lock);
 }
 
 static void
@@ -901,12 +902,12 @@  event_add (enum ptx_event_type type, CUevent *e, void *h)
   ptx_event->addr = h;
   ptx_event->ord = nvthd->ptx_dev->ord;
 
-  GOMP_PLUGIN_mutex_lock (&ptx_event_lock);
+  pthread_mutex_lock (&ptx_event_lock);
 
   ptx_event->next = ptx_events;
   ptx_events = ptx_event;
 
-  GOMP_PLUGIN_mutex_unlock (&ptx_event_lock);
+  pthread_mutex_unlock (&ptx_event_lock);
 }
 
 void
@@ -1239,19 +1240,19 @@  nvptx_async_test_all (void)
   pthread_t self = pthread_self ();
   struct nvptx_thread *nvthd = nvptx_thread ();
 
-  GOMP_PLUGIN_mutex_lock (&nvthd->ptx_dev->stream_lock);
+  pthread_mutex_lock (&nvthd->ptx_dev->stream_lock);
 
   for (s = nvthd->ptx_dev->active_streams; s != NULL; s = s->next)
     {
       if ((s->multithreaded || pthread_equal (s->host_thread, self))
 	  && cuStreamQuery (s->stream) == CUDA_ERROR_NOT_READY)
 	{
-	  GOMP_PLUGIN_mutex_unlock (&nvthd->ptx_dev->stream_lock);
+	  pthread_mutex_unlock (&nvthd->ptx_dev->stream_lock);
 	  return 0;
 	}
     }
 
-  GOMP_PLUGIN_mutex_unlock (&nvthd->ptx_dev->stream_lock);
+  pthread_mutex_unlock (&nvthd->ptx_dev->stream_lock);
 
   event_gc (true);
 
@@ -1322,7 +1323,7 @@  nvptx_wait_all (void)
   pthread_t self = pthread_self ();
   struct nvptx_thread *nvthd = nvptx_thread ();
 
-  GOMP_PLUGIN_mutex_lock (&nvthd->ptx_dev->stream_lock);
+  pthread_mutex_lock (&nvthd->ptx_dev->stream_lock);
 
   /* Wait for active streams initiated by this thread (or by multiple threads)
      to complete.  */
@@ -1342,7 +1343,7 @@  nvptx_wait_all (void)
 	}
     }
 
-  GOMP_PLUGIN_mutex_unlock (&nvthd->ptx_dev->stream_lock);
+  pthread_mutex_unlock (&nvthd->ptx_dev->stream_lock);
 
   event_gc (true);
 }
@@ -1368,7 +1369,7 @@  nvptx_wait_all_async (int async)
 
   event_gc (true);
 
-  GOMP_PLUGIN_mutex_lock (&nvthd->ptx_dev->stream_lock);
+  pthread_mutex_lock (&nvthd->ptx_dev->stream_lock);
 
   for (other_stream = nvthd->ptx_dev->active_streams;
        other_stream != NULL;
@@ -1396,7 +1397,7 @@  nvptx_wait_all_async (int async)
 	GOMP_PLUGIN_fatal ("cuStreamWaitEvent error: %s", cuda_error (r));
    }
 
-  GOMP_PLUGIN_mutex_unlock (&nvthd->ptx_dev->stream_lock);
+  pthread_mutex_unlock (&nvthd->ptx_dev->stream_lock);
 }
 
 static void *
@@ -1442,7 +1443,7 @@  nvptx_set_cuda_stream (int async, void *stream)
   pthread_t self = pthread_self ();
   struct nvptx_thread *nvthd = nvptx_thread ();
 
-  GOMP_PLUGIN_mutex_lock (&nvthd->ptx_dev->stream_lock);
+  pthread_mutex_lock (&nvthd->ptx_dev->stream_lock);
 
   if (async < 0)
     GOMP_PLUGIN_fatal ("bad async %d", async);
@@ -1474,7 +1475,7 @@  nvptx_set_cuda_stream (int async, void *stream)
       free (oldstream);
     }
 
-  GOMP_PLUGIN_mutex_unlock (&nvthd->ptx_dev->stream_lock);
+  pthread_mutex_unlock (&nvthd->ptx_dev->stream_lock);
 
   (void) select_stream_for_async (async, self, true, (CUstream) stream);