diff mbox

[gomp4.1,WIP] omp_target_* libgomp APIs

Message ID 20150709140657.GR10247@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek July 9, 2015, 2:06 p.m. UTC
Hi!

The latest spec adds a bunch of new functions, this patch attempts to
implement them, except I gave up partly in omp_target_associate_ptr
and completely in omp_target_disassociate_ptr for now.

As for the plugins, I think we'll want some plugin callback to support
offloading device <-> offloading device memcpy (at least for the same
devicep and target_id), and perhaps as optimization also some
callbacks through which 2 or 3 dimensional omp_target_memcpy_rect
in between host and device, or device and host, or device to same device
can be optimized to avoid too many separate operations.

For the associate/disassociate, I'm waiting for some clarifications (well,
for omp_target_is_present too) and then supposedly it should wait until
you are done with your enter/exit data changes.

Shall I commit this now, or wait until it is clarified etc.?

2015-07-09  Jakub Jelinek  <jakub@redhat.com>

	* omp.h.in (omp_get_initial_device,
	omp_target_alloc, omp_target_free, omp_target_is_present,
	omp_target_memcpy, omp_target_memcpy_rect, omp_target_associate_ptr,
	omp_target_disassociate_ptr): New prototypes.
	* omp_lib.f90.in (omp_get_initial_device): New interface.
	* omp_lib.h.in (omp_get_initial_device): New extern.
	* libgomp.map (OMP_4.1): Add omp_get_initial_device,
	omp_get_initial_device_, omp_target_alloc, omp_target_free,
	omp_target_is_present, omp_target_memcpy, omp_target_memcpy_rect,
	omp_target_associate_ptr and omp_target_disassociate_ptr symbols.
	* env.c (omp_get_initial_device): New function.  Add ialias.
	* fortran.c (omp_get_initial_device): Add iredirect.
	(omp_get_initial_device_): New function.
	* target.c (omp_target_alloc, omp_target_free,
	omp_target_is_present, omp_target_memcpy,
	omp_target_memcpy_rect_worker, omp_target_memcpy_rect,
	omp_target_associate_ptr, omp_target_disassociate_ptr): New
	functions.


	Jakub
diff mbox

Patch

--- libgomp/omp.h.in.jj	2015-07-09 09:31:17.183203026 +0200
+++ libgomp/omp.h.in	2015-07-09 11:05:02.761819081 +0200
@@ -139,8 +139,25 @@  extern int omp_get_num_teams (void) __GO
 extern int omp_get_team_num (void) __GOMP_NOTHROW;
 
 extern int omp_is_initial_device (void) __GOMP_NOTHROW;
+extern int omp_get_initial_device (void) __GOMP_NOTHROW;
 extern int omp_get_max_task_priority (void) __GOMP_NOTHROW;
 
+extern void *omp_target_alloc (__SIZE_TYPE__, int) __GOMP_NOTHROW;
+extern void omp_target_free (void *, int) __GOMP_NOTHROW;
+extern int omp_target_is_present (void *, __SIZE_TYPE__, int) __GOMP_NOTHROW;
+extern int omp_target_memcpy (void *, void *, __SIZE_TYPE__, __SIZE_TYPE__,
+			      __SIZE_TYPE__, int, int) __GOMP_NOTHROW;
+extern int omp_target_memcpy_rect (void *, void *, __SIZE_TYPE__, int,
+				   const __SIZE_TYPE__ *,
+				   const __SIZE_TYPE__ *,
+				   const __SIZE_TYPE__ *,
+				   const __SIZE_TYPE__ *,
+				   const __SIZE_TYPE__ *, int, int)
+  __GOMP_NOTHROW;
+extern int omp_target_associate_ptr (void *, void *, __SIZE_TYPE__,
+				     __SIZE_TYPE__, int) __GOMP_NOTHROW;
+extern int omp_target_disassociate_ptr (void *, int) __GOMP_NOTHROW;
+
 #ifdef __cplusplus
 }
 #endif
--- libgomp/omp_lib.f90.in.jj	2015-07-09 09:31:17.182203041 +0200
+++ libgomp/omp_lib.f90.in	2015-07-09 11:05:02.762819066 +0200
@@ -422,6 +422,12 @@ 
         end interface
 
         interface
+          function omp_get_initial_device ()
+            integer (4) :: omp_get_initial_device
+          end function omp_get_initial_device
+        end interface
+
+        interface
           function omp_get_max_task_priority ()
             integer (4) :: omp_get_max_task_priority
           end function omp_get_max_task_priority
--- libgomp/omp_lib.h.in.jj	2015-07-09 09:31:17.170203220 +0200
+++ libgomp/omp_lib.h.in	2015-07-09 11:05:02.761819081 +0200
@@ -121,6 +121,8 @@ 
 
       external omp_is_initial_device
       logical(4) omp_is_initial_device
+      external omp_get_initial_device
+      integer(4) omp_get_initial_device
 
       external omp_get_max_task_priority
       integer(4) omp_get_max_task_priority
--- libgomp/libgomp.map.jj	2015-06-24 12:11:40.000000000 +0200
+++ libgomp/libgomp.map	2015-07-09 12:42:58.387515811 +0200
@@ -153,6 +153,15 @@  OMP_4.1 {
 	omp_get_partition_place_nums;
 	omp_get_partition_place_nums_;
 	omp_get_partition_place_nums_8_;
+	omp_get_initial_device;
+	omp_get_initial_device_;
+	omp_target_alloc;
+	omp_target_free;
+	omp_target_is_present;
+	omp_target_memcpy;
+	omp_target_memcpy_rect;
+	omp_target_associate_ptr;
+	omp_target_disassociate_ptr;
 } OMP_4.0;
 
 GOMP_1.0 {
--- libgomp/env.c.jj	2015-06-24 12:11:40.000000000 +0200
+++ libgomp/env.c	2015-07-09 15:51:28.659893446 +0200
@@ -29,6 +29,7 @@ 
 #include "libgomp.h"
 #include "libgomp_f.h"
 #include "oacc-int.h"
+#include "gomp-constants.h"
 #include <ctype.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -1461,6 +1462,12 @@  omp_is_initial_device (void)
 }
 
 int
+omp_get_initial_device (void)
+{
+  return GOMP_DEVICE_HOST_FALLBACK;
+}
+
+int
 omp_get_num_places (void)
 {
   return gomp_places_list_len;
@@ -1526,6 +1533,7 @@  ialias (omp_get_num_devices)
 ialias (omp_get_num_teams)
 ialias (omp_get_team_num)
 ialias (omp_is_initial_device)
+ialias (omp_get_initial_device)
 ialias (omp_get_max_task_priority)
 ialias (omp_get_num_places)
 ialias (omp_get_place_num)
--- libgomp/fortran.c.jj	2015-06-24 12:11:40.000000000 +0200
+++ libgomp/fortran.c	2015-07-09 12:33:13.544183419 +0200
@@ -80,6 +80,7 @@  ialias_redirect (omp_get_num_devices)
 ialias_redirect (omp_get_num_teams)
 ialias_redirect (omp_get_team_num)
 ialias_redirect (omp_is_initial_device)
+ialias_redirect (omp_get_initial_device)
 ialias_redirect (omp_get_max_task_priority)
 #endif
 
@@ -565,6 +566,12 @@  omp_is_initial_device_ (void)
 }
 
 int32_t
+omp_get_initial_device_ (void)
+{
+  return omp_get_initial_device ();
+}
+
+int32_t
 omp_get_max_task_priority_ (void)
 {
   return omp_get_max_task_priority ();
--- libgomp/target.c.jj	2015-06-26 15:05:41.000000000 +0200
+++ libgomp/target.c	2015-07-09 15:53:30.474108680 +0200
@@ -38,6 +38,7 @@ 
 #endif
 #include <string.h>
 #include <assert.h>
+#include <errno.h>
 
 #ifdef PLUGIN_SUPPORT
 #include <dlfcn.h>
@@ -1175,6 +1176,343 @@  GOMP_teams (unsigned int num_teams, unsi
   (void) num_teams;
 }
 
+void *
+omp_target_alloc (size_t size, int device_num)
+{
+  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+    return malloc (size);
+
+  if (device_num < 0)
+    return NULL;
+
+  struct gomp_device_descr *devicep = resolve_device (device_num);
+  if (devicep == NULL)
+    return NULL;
+
+  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    return malloc (size);
+
+  gomp_mutex_lock (&devicep->lock);
+  void *ret = devicep->alloc_func (devicep->target_id, size);
+  gomp_mutex_unlock (&devicep->lock);
+  return ret;
+}
+
+void
+omp_target_free (void *device_ptr, int device_num)
+{
+  if (device_ptr == NULL)
+    return;
+
+  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+    {
+      free (device_ptr);
+      return;
+    }
+
+  if (device_num < 0)
+    return;
+
+  struct gomp_device_descr *devicep = resolve_device (device_num);
+  if (devicep == NULL)
+    return;
+
+  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    {
+      free (device_ptr);
+      return;
+    }
+
+  gomp_mutex_lock (&devicep->lock);
+  devicep->free_func (devicep->target_id, device_ptr);
+  gomp_mutex_unlock (&devicep->lock);
+}
+
+int
+omp_target_is_present (void *ptr, size_t offset, int device_num)
+{
+  if (ptr == NULL)
+    return offset == 0;
+
+  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+    return 1;
+
+  if (device_num < 0)
+    return 0;
+
+  struct gomp_device_descr *devicep = resolve_device (device_num);
+  if (devicep == NULL)
+    return 0;
+
+  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    return 1;
+
+  gomp_mutex_lock (&devicep->lock);
+  struct splay_tree_s *mem_map = &devicep->mem_map;
+  struct splay_tree_key_s cur_node;
+
+  cur_node.host_start = (uintptr_t) ptr + offset;
+  cur_node.host_end = cur_node.host_start + 1;
+  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
+  if (n == NULL)
+    {
+      /* Could be possibly zero size array section.  */
+      cur_node.host_end--;
+      n = splay_tree_lookup (mem_map, &cur_node);
+      if (n == NULL)
+	{
+	  cur_node.host_start--;
+	  n = splay_tree_lookup (mem_map, &cur_node);
+	  cur_node.host_start++;
+	}
+    }
+  int ret = n != NULL;
+  gomp_mutex_unlock (&devicep->lock);
+  return ret;
+}
+
+int
+omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
+		   size_t src_offset, int dst_device, int src_device)
+{
+  struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
+
+  if (dst_device != GOMP_DEVICE_HOST_FALLBACK)
+    {
+      if (dst_device < 0)
+	return EINVAL;
+
+      dst_devicep = resolve_device (dst_device);
+      if (dst_devicep == NULL)
+	return EINVAL;
+
+      if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+	dst_devicep = NULL;
+    }
+  if (src_device != GOMP_DEVICE_HOST_FALLBACK)
+    {
+      if (src_device < 0)
+	return EINVAL;
+
+      src_devicep = resolve_device (src_device);
+      if (src_devicep == NULL)
+	return EINVAL;
+
+      if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+	src_devicep = NULL;
+    }
+  if (src_devicep == NULL && dst_devicep == NULL)
+    {
+      memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
+      return 0;
+    }
+  if (src_devicep == NULL)
+    {
+      gomp_mutex_lock (&dst_devicep->lock);
+      dst_devicep->host2dev_func (dst_devicep->target_id,
+				  (char *) dst + dst_offset,
+				  (char *) src + src_offset, length);
+      gomp_mutex_unlock (&dst_devicep->lock);
+      return 0;
+    }
+  if (dst_devicep == NULL)
+    {
+      gomp_mutex_lock (&src_devicep->lock);
+      src_devicep->dev2host_func (src_devicep->target_id,
+				  (char *) dst + dst_offset,
+				  (char *) src + src_offset, length);
+      gomp_mutex_unlock (&src_devicep->lock);
+      return 0;
+    }
+  /* FIXME: Support device-to-device somehow?  */
+  return EINVAL;
+}
+
+static int
+omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
+			       int num_dims, const size_t *volume,
+			       const size_t *dst_offsets,
+			       const size_t *src_offsets,
+			       const size_t *dst_dimensions,
+			       const size_t *src_dimensions,
+			       struct gomp_device_descr *dst_devicep,
+			       struct gomp_device_descr *src_devicep)
+{
+  size_t dst_slice = element_size;
+  size_t src_slice = element_size;
+  size_t j, dst_off, src_off, length;
+  int i, ret;
+
+  if (num_dims == 1)
+    {
+      if (__builtin_mul_overflow (element_size, volume[0], &length)
+	  || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
+	  || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
+	return EINVAL;
+      if (dst_devicep == NULL && src_devicep == NULL)
+	memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
+      else if (src_devicep == NULL)
+	dst_devicep->host2dev_func (dst_devicep->target_id,
+				    (char *) dst + dst_off,
+				    (char *) src + src_off, length);
+      else if (dst_devicep == NULL)
+	src_devicep->dev2host_func (src_devicep->target_id,
+				    (char *) dst + dst_off,
+				    (char *) src + src_off, length);
+      else
+	return EINVAL;
+      return 0;
+    }
+
+  /* FIXME: it would be nice to have some plugin function to handle
+     num_dims == 2 and num_dims == 3 more efficiently.  Larger ones can
+     be handled in the generic recursion below, and for host-host it
+     should be used even for any num_dims >= 2.  */
+
+  for (i = 1; i < num_dims; i++)
+    if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
+	|| __builtin_mul_overflow (src_slice, dst_dimensions[i], &src_slice))
+      return EINVAL;
+  if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
+      || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
+    return EINVAL;
+  for (j = 0; j < volume[0]; j++)
+    {
+      ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
+					   (char *) src + src_off,
+					   element_size, num_dims - 1,
+					   volume + 1, dst_offsets + 1,
+					   src_offsets + 1, dst_dimensions + 1,
+					   src_dimensions + 1, dst_devicep,
+					   src_devicep);
+      if (ret)
+	return ret;
+      dst_off += dst_slice;
+      src_off += src_slice;
+    }
+  return 0;
+}
+
+int
+omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
+			int num_dims, const size_t *volume,
+			const size_t *dst_offsets,
+			const size_t *src_offsets,
+			const size_t *dst_dimensions,
+			const size_t *src_dimensions,
+			int dst_device_num, int src_device_num)
+{
+  struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
+
+  if (!dst && !src)
+    return INT_MAX;
+
+  if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
+    {
+      if (dst_device_num < 0)
+	return EINVAL;
+
+      dst_devicep = resolve_device (dst_device_num);
+      if (dst_devicep == NULL)
+	return EINVAL;
+
+      if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+	dst_devicep = NULL;
+    }
+  if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
+    {
+      if (src_device_num < 0)
+	return EINVAL;
+
+      src_devicep = resolve_device (src_device_num);
+      if (src_devicep == NULL)
+	return EINVAL;
+
+      if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+	src_devicep = NULL;
+    }
+
+  /* FIXME: Support device-to-device somehow?  */
+  if (src_devicep != NULL && dst_devicep != NULL)
+    return EINVAL;
+
+  if (src_devicep)
+    gomp_mutex_lock (&src_devicep->lock);
+  else if (dst_devicep)
+    gomp_mutex_lock (&dst_devicep->lock);
+  int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
+					   volume, dst_offsets, src_offsets,
+					   dst_dimensions, src_dimensions,
+					   dst_devicep, src_devicep);
+  if (src_devicep)
+    gomp_mutex_unlock (&src_devicep->lock);
+  else if (dst_devicep)
+    gomp_mutex_unlock (&dst_devicep->lock);
+  return ret;
+}
+
+int
+omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
+			  size_t device_offset, int device_num)
+{
+  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+    return EINVAL;
+
+  if (device_num < 0)
+    return EINVAL;
+
+  struct gomp_device_descr *devicep = resolve_device (device_num);
+  if (devicep == NULL)
+    return EINVAL;
+
+  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    return EINVAL;
+
+  gomp_mutex_lock (&devicep->lock);
+
+  struct splay_tree_s *mem_map = &devicep->mem_map;
+  struct splay_tree_key_s cur_node;
+  int ret = EINVAL;
+
+  cur_node.host_start = (uintptr_t) host_ptr;
+  cur_node.host_end = cur_node.host_start + size;
+  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
+  if (n)
+    {
+      if (n->tgt->tgt_start + n->tgt_offset
+	  == (uintptr_t) device_ptr + device_offset
+	  && n->host_start <= cur_node.host_start
+	  && n->host_end >= cur_node.host_end)
+	ret = 0;
+    }
+  else
+    {
+      /* FIXME: finish this.  */
+    }
+  gomp_mutex_unlock (&devicep->lock);
+  return ret;
+}
+
+int
+omp_target_disassociate_ptr (void *ptr, int device_num)
+{
+  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+    return EINVAL;
+
+  if (device_num < 0)
+    return EINVAL;
+
+  struct gomp_device_descr *devicep = resolve_device (device_num);
+  if (devicep == NULL)
+    return EINVAL;
+
+  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    return EINVAL;
+
+  /* FIXME: finish this.  */
+  return EINVAL;
+}
+
 #ifdef PLUGIN_SUPPORT
 
 /* This function tries to load a plugin for DEVICE.  Name of plugin is passed