diff mbox

[gomp-nvptx,8/9] libgomp: update gomp_nvptx_main for -mgomp

Message ID 1448983707-18854-9-git-send-email-amonakov@ispras.ru
State New
Headers show

Commit Message

Alexander Monakov Dec. 1, 2015, 3:28 p.m. UTC
Here's how I've updated gomp_nvptx_main to set up shared memory arrays
__nvptx_stacks and __nvptx_uni for -mgomp.  Since it makes sense only for
-mgomp multilib, I've wrapped the whole file under #ifdef that checks
corresponding built-in macros.

Reaching those shared memory arrays is awkward.  I cannot declare them with
toplevel asms because the compiler implicitely declares them too, and ptxas
does not handle duplicated declaration.  Ideally I'd like to be able to say:

    extern char *__shared __nvptx_stacks[32];

Bernd, is your position on exposing shared memory as first-class address space
on NVPTX subject to change?  Do you remember what middle-end issues you've
encountered when trying that?

	* config/nvptx/team.c (gomp_nvptx_main): Rename to...
	(gomp_nvptx_main_1): ... this and mark noinline.
	(gomp_nvptx_main): Wrap the above, set up __nvptx_uni and
	__nvptx_stacks.
---
 libgomp/config/nvptx/team.c | 37 +++++++++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 8 deletions(-)

Comments

Bernd Schmidt Dec. 1, 2015, 3:55 p.m. UTC | #1
On 12/01/2015 04:28 PM, Alexander Monakov wrote:
> Bernd, is your position on exposing shared memory as first-class address space
> on NVPTX subject to change?  Do you remember what middle-end issues you've
> encountered when trying that?

TYPE_ADDR_SPACE does not reliably contain the address space. Patches to 
deal with that (rather than fix it which Joseph doesn't like) got really 
ugly and I gave up on it. So please use the patch I sent which deals 
with .shared inside the ptx backend (although I think it may have to be 
reworked a little since Nathan changed the code around recently).


Bernd
Jakub Jelinek Dec. 2, 2015, 11:02 a.m. UTC | #2
On Tue, Dec 01, 2015 at 06:28:26PM +0300, Alexander Monakov wrote:
> +void
> +gomp_nvptx_main (void (*fn) (void *), void *fn_data)
> +{
> +  int tid, ntids;
> +  asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
> +  asm ("mov.u32 %0, %%ntid.y;" : "=r"(ntids));

Formatting (missing space before ( ).

	Jakub
diff mbox

Patch

diff --git a/libgomp/config/nvptx/team.c b/libgomp/config/nvptx/team.c
index 88d1d34..deb0860 100644
--- a/libgomp/config/nvptx/team.c
+++ b/libgomp/config/nvptx/team.c
@@ -24,6 +24,8 @@ 
 
 /* This file handles the maintainence of threads on NVPTX.  */
 
+#if defined __nvptx_softstack && defined __nvptx_unisimt__
+
 #include "libgomp.h"
 #include <stdlib.h>
 
@@ -31,15 +33,9 @@  struct gomp_thread *nvptx_thrs;
 
 static void gomp_thread_start (struct gomp_thread_pool *);
 
-void
-gomp_nvptx_main (void (*fn) (void *), void *fn_data)
+static void __attribute__((noinline))
+gomp_nvptx_main_1 (void (*fn) (void *), void *fn_data, int ntids, int tid)
 {
-  int ntids, tid, laneid;
-  asm ("mov.u32 %0, %%laneid;" : "=r" (laneid));
-  if (laneid)
-    return;
-  asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
-  asm ("mov.u32 %0, %%ntid.y;" : "=r"(ntids));
   if (tid == 0)
     {
       gomp_global_icv.nthreads_var = ntids;
@@ -72,6 +68,30 @@  gomp_nvptx_main (void (*fn) (void *), void *fn_data)
     }
 }
 
+void
+gomp_nvptx_main (void (*fn) (void *), void *fn_data)
+{
+  int tid, ntids;
+  asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
+  asm ("mov.u32 %0, %%ntid.y;" : "=r"(ntids));
+  char *stacks = 0;
+  int *__nvptx_uni;
+  asm ("cvta.shared.u64 %0, __nvptx_uni;" : "=r" (__nvptx_uni));
+  __nvptx_uni[tid] = 0;
+  if (tid == 0)
+    {
+      size_t stacksize = 131072;
+      stacks = gomp_malloc (stacksize * ntids);
+      char **__nvptx_stacks = 0;
+      asm ("cvta.shared.u64 %0, __nvptx_stacks;" : "=r" (__nvptx_stacks));
+      for (int i = 0; i < ntids; i++)
+	__nvptx_stacks[i] = stacks + stacksize * (i + 1);
+    }
+  asm ("bar.sync 0;");
+  gomp_nvptx_main_1 (fn, fn_data, ntids, tid);
+  free (stacks);
+}
+
 /* This function is a pthread_create entry point.  This contains the idle
    loop in which a thread waits to be called up to become part of a team.  */
 
@@ -160,3 +180,4 @@  gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
 }
 
 #include "../../team.c"
+#endif