diff mbox

[WIP] OpenMP 4 NVPTX support

Message ID 87twswyy9n.fsf@schwinge.name
State New
Headers show

Commit Message

Thomas Schwinge July 22, 2015, 4:04 p.m. UTC
Hi!

On Tue, 21 Apr 2015 17:58:39 +0200, Jakub Jelinek <jakub@redhat.com> wrote:
> Attached is a minimal patch to get at least a trivial OpenMP 4.0 testcase
> offloading to NVPTX (the first patch).  The second patch is WIP, just first
> few needed changes to make libgomp to build for NVPTX (several weeks of work
> at least).

We're not in particular working on making nvptx offloading work for
OpenMP, but also for OpenACC offloading a tiny bit of code is required to
be shipped in an offloading device's runtime library -- code that
conceptually belongs into libgomp.  (On gomp-4_0-branch, it currently
lives in libgcc because that was easier to do.)  Actually, as I should
find out, building a "dummy" (empty) libgomp for nvptx is not actually
difficult.  Additionally to your second patch (U2; quoted at the end of
this email), we'll need the following:

commit ea5213c1eb6e525f64aa103312e8e0ac88048122
Author: Thomas Schwinge <thomas@codesourcery.com>
Date:   Wed Jul 22 12:12:41 2015 +0200

    Empty libgomp for nvptx
    
        $ mkdir libgomp/config/nvptx
        $ cp libgomp/config/{linux,nvptx}/omp-lock.h
        $ for f in libgomp{,/config/linux,/config/posix}/*.c; do touch libgomp/config/nvptx/"$(basename "$f")"; done
---
 libgomp/config/nvptx/affinity.c       |  0
 libgomp/config/nvptx/alloc.c          |  0
 libgomp/config/nvptx/bar.c            |  0
 libgomp/config/nvptx/barrier.c        |  0
 libgomp/config/nvptx/critical.c       |  0
 libgomp/config/nvptx/env.c            |  0
 libgomp/config/nvptx/error.c          |  0
 libgomp/config/nvptx/fortran.c        |  0
 libgomp/config/nvptx/iter.c           |  0
 libgomp/config/nvptx/iter_ull.c       |  0
 libgomp/config/nvptx/libgomp-plugin.c |  0
 libgomp/config/nvptx/lock.c           |  0
 libgomp/config/nvptx/loop.c           |  0
 libgomp/config/nvptx/loop_ull.c       |  0
 libgomp/config/nvptx/mutex.c          |  0
 libgomp/config/nvptx/oacc-async.c     |  0
 libgomp/config/nvptx/oacc-cuda.c      |  0
 libgomp/config/nvptx/oacc-host.c      |  0
 libgomp/config/nvptx/oacc-init.c      |  0
 libgomp/config/nvptx/oacc-mem.c       |  0
 libgomp/config/nvptx/oacc-parallel.c  |  0
 libgomp/config/nvptx/oacc-plugin.c    |  0
 libgomp/config/nvptx/omp-lock.h       | 12 ++++++++++++
 libgomp/config/nvptx/ordered.c        |  0
 libgomp/config/nvptx/parallel.c       |  0
 libgomp/config/nvptx/proc.c           |  0
 libgomp/config/nvptx/ptrlock.c        |  0
 libgomp/config/nvptx/sections.c       |  0
 libgomp/config/nvptx/sem.c            |  0
 libgomp/config/nvptx/single.c         |  0
 libgomp/config/nvptx/splay-tree.c     |  0
 libgomp/config/nvptx/target.c         |  0
 libgomp/config/nvptx/task.c           |  0
 libgomp/config/nvptx/team.c           |  0
 libgomp/config/nvptx/time.c           |  0
 libgomp/config/nvptx/work.c           |  0
 36 files changed, 12 insertions(+)


This, obviously, is still very bare-bones, but it works, and can be
extended later.


> we build libgcc.a, libc.a, libm.a, libgfortran.a (and in the future hopefully libgomp.a),
> nothing attempts to link those in :(.

Together with the changes highlighted above, I'd then work on merging
into trunk the nvptx linking code present on gomp-4_0-branch, OK?


For reference, your second patch (U2):

> --- libgomp/configure.tgt.jj	2015-04-21 08:38:00.000000000 +0200
> +++ libgomp/configure.tgt	2015-04-21 10:59:30.857197475 +0200
> @@ -151,6 +151,10 @@ case "${target}" in
>  	XLDFLAGS="${XLDFLAGS} -lpthread"
>  	;;
>  
> +  nvptx*-*-*)
> +	config_path="nvptx"
> +	;;
> +
>    *)
>  	;;
>  
> --- libgomp/libgomp.h.jj	2015-04-21 08:38:00.000000000 +0200
> +++ libgomp/libgomp.h	2015-04-21 11:15:35.952217394 +0200
> @@ -40,7 +40,9 @@
>  #include "gstdint.h"
>  #include "libgomp-plugin.h"
>  
> +#ifdef HAVE_PTHREAD_H
>  #include <pthread.h>
> +#endif
>  #include <stdbool.h>
>  #include <stdlib.h>
>  #include <stdarg.h>
> --- libgomp/configure.ac.jj	2015-04-21 08:38:00.000000000 +0200
> +++ libgomp/configure.ac	2015-04-21 11:06:38.418117846 +0200
> @@ -179,6 +179,9 @@ case "$host" in
>    *-*-rtems*)
>      # RTEMS supports Pthreads, but the library is not available at GCC build time.
>      ;;
> +  nvptx*-*-*)
> +    # NVPTX does not support Pthreads, has its own code replacement.
> +    ;;
>    *)
>      # Check to see if -pthread or -lpthread is needed.  Prefer the former.
>      # In case the pthread.h system header is not found, this test will fail.
> --- configure.ac.jj	2015-04-21 08:38:09.000000000 +0200
> +++ configure.ac	2015-04-21 09:14:50.107827544 +0200
> @@ -539,6 +539,9 @@ if test x$enable_libgomp = x ; then
>  	;;
>      *-*-darwin* | *-*-aix*)
>  	;;
> +    # And on NVPTX as an offloading target.
> +    nvptx*-*-*)
> +	;;
>      *)
>  	noconfigdirs="$noconfigdirs target-libgomp"
>  	;;


Grüße,
 Thomas

Comments

Jakub Jelinek July 22, 2015, 4:38 p.m. UTC | #1
On Wed, Jul 22, 2015 at 06:04:20PM +0200, Thomas Schwinge wrote:
> On Tue, 21 Apr 2015 17:58:39 +0200, Jakub Jelinek <jakub@redhat.com> wrote:
> > Attached is a minimal patch to get at least a trivial OpenMP 4.0 testcase
> > offloading to NVPTX (the first patch).  The second patch is WIP, just first
> > few needed changes to make libgomp to build for NVPTX (several weeks of work
> > at least).
> 
> We're not in particular working on making nvptx offloading work for
> OpenMP, but also for OpenACC offloading a tiny bit of code is required to
> be shipped in an offloading device's runtime library -- code that
> conceptually belongs into libgomp.  (On gomp-4_0-branch, it currently
> lives in libgcc because that was easier to do.)  Actually, as I should
> find out, building a "dummy" (empty) libgomp for nvptx is not actually
> difficult.  Additionally to your second patch (U2; quoted at the end of
> this email), we'll need the following:

The U2 version was a very early one, I've posted a newer version later,
but supposedly we can go with my U2 (if you've tested it together with your
patch, please check it in yourself) and your patch, and then
incrementally start removing the zero sized stubs or replacing them with
something real.

	Jakub
diff mbox

Patch

diff --git libgomp/config/nvptx/affinity.c libgomp/config/nvptx/affinity.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/alloc.c libgomp/config/nvptx/alloc.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/bar.c libgomp/config/nvptx/bar.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/barrier.c libgomp/config/nvptx/barrier.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/critical.c libgomp/config/nvptx/critical.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/env.c libgomp/config/nvptx/env.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/error.c libgomp/config/nvptx/error.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/fortran.c libgomp/config/nvptx/fortran.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/iter.c libgomp/config/nvptx/iter.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/iter_ull.c libgomp/config/nvptx/iter_ull.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/libgomp-plugin.c libgomp/config/nvptx/libgomp-plugin.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/lock.c libgomp/config/nvptx/lock.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/loop.c libgomp/config/nvptx/loop.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/loop_ull.c libgomp/config/nvptx/loop_ull.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/mutex.c libgomp/config/nvptx/mutex.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-async.c libgomp/config/nvptx/oacc-async.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-cuda.c libgomp/config/nvptx/oacc-cuda.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-host.c libgomp/config/nvptx/oacc-host.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-init.c libgomp/config/nvptx/oacc-init.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-mem.c libgomp/config/nvptx/oacc-mem.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-parallel.c libgomp/config/nvptx/oacc-parallel.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-plugin.c libgomp/config/nvptx/oacc-plugin.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/omp-lock.h libgomp/config/nvptx/omp-lock.h
new file mode 100644
index 0000000..2ca7c5e
--- /dev/null
+++ libgomp/config/nvptx/omp-lock.h
@@ -0,0 +1,12 @@ 
+/* This header is used during the build process to find the size and 
+   alignment of the public OpenMP locks, so that we can export data
+   structures without polluting the namespace.
+
+   When using the Linux futex primitive, non-recursive locks require
+   one int.  Recursive locks require we identify the owning task
+   and so require in addition one int and a pointer.  */
+
+typedef int omp_lock_t;
+typedef struct { int lock, count; void *owner; } omp_nest_lock_t;
+typedef int omp_lock_25_t;
+typedef struct { int owner, count; } omp_nest_lock_25_t;
diff --git libgomp/config/nvptx/ordered.c libgomp/config/nvptx/ordered.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/parallel.c libgomp/config/nvptx/parallel.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/proc.c libgomp/config/nvptx/proc.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/ptrlock.c libgomp/config/nvptx/ptrlock.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/sections.c libgomp/config/nvptx/sections.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/sem.c libgomp/config/nvptx/sem.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/single.c libgomp/config/nvptx/single.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/splay-tree.c libgomp/config/nvptx/splay-tree.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/target.c libgomp/config/nvptx/target.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/task.c libgomp/config/nvptx/task.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/team.c libgomp/config/nvptx/team.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/time.c libgomp/config/nvptx/time.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/work.c libgomp/config/nvptx/work.c
new file mode 100644
index 0000000..e69de29


Next, we can then (on gomp-4_0-branch) move the libgcc code into libgomp:

commit d8d75d17630d7633be4f1733fd195a104cb2ccc4
Author: Thomas Schwinge <thomas@codesourcery.com>
Date:   Wed Jul 22 13:05:16 2015 +0200

    [nvptx] Move GOMP stuff from libgcc to libgomp
---
 libgcc/config.host                       |  6 +---
 libgcc/config/nvptx/gomp-acc_on_device.c |  9 -----
 libgcc/config/nvptx/gomp-atomic.asm      | 37 ---------------------
 libgcc/config/nvptx/t-nvptx              | 11 ------
 libgomp/config/nvptx/critical.c          | 57 ++++++++++++++++++++++++++++++++
 libgomp/config/nvptx/oacc-init.c         | 35 ++++++++++++++++++++
 6 files changed, 93 insertions(+), 62 deletions(-)

diff --git libgcc/config.host libgcc/config.host
index ee7ce03..3a2c75d 100644
--- libgcc/config.host
+++ libgcc/config.host
@@ -1304,11 +1304,7 @@  mep*-*-*)
 	;;
 nvptx-*)
 	tmake_file="$tmake_file nvptx/t-nvptx"
-	if test "x${enable_as_accelerator_for}" != x; then
-		extra_parts="crt0.o libgomp.a libgomp.spec"
-	else
-		extra_parts="crt0.o"
-	fi
+	extra_parts="crt0.o"
 	;;
 *)
 	echo "*** Configuration ${host} not supported" 1>&2
diff --git libgcc/config/nvptx/gomp-acc_on_device.c libgcc/config/nvptx/gomp-acc_on_device.c
deleted file mode 100644
index e4278f9..0000000
--- libgcc/config/nvptx/gomp-acc_on_device.c
+++ /dev/null
@@ -1,9 +0,0 @@ 
-int acc_on_device(int d)
-{
-  return __builtin_acc_on_device(d);
-}
-
-int acc_on_device_h_(int *d)
-{
-  return acc_on_device(*d);
-}
diff --git libgcc/config/nvptx/gomp-atomic.asm libgcc/config/nvptx/gomp-atomic.asm
deleted file mode 100644
index ae9d925..0000000
--- libgcc/config/nvptx/gomp-atomic.asm
+++ /dev/null
@@ -1,37 +0,0 @@ 
-
-// BEGIN PREAMBLE
-	.version	3.1
-	.target	sm_30
-	.address_size 64
-	.extern .shared .u8 sdata[];
-// END PREAMBLE
-
-// BEGIN VAR DEF: libgomp_ptx_lock
-.global .align 4 .u32 libgomp_ptx_lock;
-
-// BEGIN GLOBAL FUNCTION DECL: GOMP_atomic_start
-.visible .func GOMP_atomic_start;
-// BEGIN GLOBAL FUNCTION DEF: GOMP_atomic_start
-.visible .func GOMP_atomic_start
-{
-	.reg .pred 	%p<2>;
-	.reg .s32 	%r<2>;
-	.reg .s64 	%rd<2>;
-BB5_1:
-	mov.u64 	%rd1, libgomp_ptx_lock;
-	atom.global.cas.b32 	%r1, [%rd1], 0, 1;
-	setp.ne.s32	%p1, %r1, 0;
-	@%p1 bra 	BB5_1;
-	ret;
-	}
-// BEGIN GLOBAL FUNCTION DECL: GOMP_atomic_end
-.visible .func GOMP_atomic_end;
-// BEGIN GLOBAL FUNCTION DEF: GOMP_atomic_end
-.visible .func GOMP_atomic_end
-{
-	.reg .s32 	%r<2>;
-	.reg .s64 	%rd<2>;
-	mov.u64 	%rd1, libgomp_ptx_lock;
-	atom.global.exch.b32 	%r1, [%rd1], 0;
-	ret;
-	}
diff --git libgcc/config/nvptx/t-nvptx libgcc/config/nvptx/t-nvptx
index c8741c4..0c2cea0 100644
--- libgcc/config/nvptx/t-nvptx
+++ libgcc/config/nvptx/t-nvptx
@@ -13,14 +13,3 @@  crt0.o: $(srcdir)/config/nvptx/crt0.s
 # support it, and it may cause the build to fail, because of alloca usage, for
 # example.
 INHIBIT_LIBC_CFLAGS = -Dinhibit_libc
-
-gomp-acc_on_device.o: $(srcdir)/config/nvptx/gomp-acc_on_device.c
-	$(gcc_compile) -c -fno-builtin-acc_on_device $<
-gomp-atomic.o: $(srcdir)/config/nvptx/gomp-atomic.asm
-	cp $< $@
-
-OBJS_libgomp= gomp-acc_on_device.o gomp-atomic.o
-libgomp.a: $(OBJS_libgomp)
-	$(AR_CREATE_FOR_TARGET) $@ $(OBJS_libgomp)
-libgomp.spec:
-	echo "*link_gomp: -lgomp" >$@
diff --git libgomp/config/nvptx/critical.c libgomp/config/nvptx/critical.c
index e69de29..1f55aad 100644
--- libgomp/config/nvptx/critical.c
+++ libgomp/config/nvptx/critical.c
@@ -0,0 +1,57 @@ 
+/* GOMP atomic routines
+
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+   Contributed by Mentor Embedded.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+__asm__ ("// BEGIN VAR DEF: libgomp_ptx_lock\n"
+	 ".global .align 4 .u32 libgomp_ptx_lock;\n"
+	 "\n"
+	 "// BEGIN GLOBAL FUNCTION DECL: GOMP_atomic_start\n"
+	 ".visible .func GOMP_atomic_start;\n"
+	 "// BEGIN GLOBAL FUNCTION DEF: GOMP_atomic_start\n"
+	 ".visible .func GOMP_atomic_start\n"
+	 "{\n"
+	 "	.reg .pred 	%p<2>;\n"
+	 "	.reg .s32 	%r<2>;\n"
+	 "	.reg .s64 	%rd<2>;\n"
+	 "BB5_1:\n"
+	 "	mov.u64 	%rd1, libgomp_ptx_lock;\n"
+	 "	atom.global.cas.b32 	%r1, [%rd1], 0, 1;\n"
+	 "	setp.ne.s32	%p1, %r1, 0;\n"
+	 "	@%p1 bra 	BB5_1;\n"
+	 "	ret;\n"
+	 "	}\n"
+	 "// BEGIN GLOBAL FUNCTION DECL: GOMP_atomic_end\n"
+	 ".visible .func GOMP_atomic_end;\n"
+	 "// BEGIN GLOBAL FUNCTION DEF: GOMP_atomic_end\n"
+	 ".visible .func GOMP_atomic_end\n"
+	 "{\n"
+	 "	.reg .s32 	%r<2>;\n"
+	 "	.reg .s64 	%rd<2>;\n"
+	 "	mov.u64 	%rd1, libgomp_ptx_lock;\n"
+	 "	atom.global.exch.b32 	%r1, [%rd1], 0;\n"
+	 "	ret;\n"
+	 "	}");
diff --git libgomp/config/nvptx/oacc-init.c libgomp/config/nvptx/oacc-init.c
index e69de29..e2c54c9 100644
--- libgomp/config/nvptx/oacc-init.c
+++ libgomp/config/nvptx/oacc-init.c
@@ -0,0 +1,35 @@ 
+/* OpenACC Runtime initialization routines
+
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+   Contributed by Mentor Embedded.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "openacc.h"
+
+int
+acc_on_device (acc_device_t d)
+{
+  return __builtin_acc_on_device (d);
+}