@@ -430,6 +430,7 @@ struct gomp_target_task
size_t *sizes;
unsigned short *kinds;
unsigned int flags;
+ bool is_completed;
void *hostaddrs[];
};
@@ -877,6 +878,7 @@ struct gomp_device_descr
void *(*host2dev_func) (int, void *, const void *, size_t);
void *(*dev2dev_func) (int, void *, const void *, size_t);
void (*run_func) (int, void *, void *);
+ void (*async_run_func) (int, void *, void *, bool *);
/* Splay tree containing information about mapped memory regions. */
struct splay_tree_s mem_map;
@@ -1339,6 +1339,14 @@ GOMP_target_41 (int device, void (*fn) (void *), size_t mapnum,
{
struct gomp_device_descr *devicep = resolve_device (device);
+ /* FIXME: Check for thr->ts.team && !thr->task->final_task ? */
+ if (flags & GOMP_TARGET_FLAG_NOWAIT)
+ {
+ gomp_create_target_task (devicep, fn, mapnum, hostaddrs, sizes, kinds,
+ flags, depend);
+ return;
+ }
+
/* If there are depend clauses, but nowait is not present,
block the parent task until the dependencies are resolved
and then just continue with the rest of the function as if it
@@ -1650,34 +1658,56 @@ void
gomp_target_task_fn (void *data)
{
struct gomp_target_task *ttask = (struct gomp_target_task *) data;
+ struct gomp_device_descr *devicep = ttask->devicep;
+
if (ttask->fn != NULL)
{
- /* GOMP_target_41 */
+ if (devicep == NULL
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ {
+ gomp_target_fallback_firstprivate (ttask->fn, ttask->mapnum,
+ ttask->hostaddrs, ttask->sizes,
+ ttask->kinds);
+ return;
+ }
+
+ void *fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn);
+ struct target_mem_desc *tgt_vars
+ = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, NULL,
+ ttask->sizes, ttask->kinds, true,
+ GOMP_MAP_VARS_TARGET);
+ devicep->async_run_func (devicep->target_id, fn_addr,
+ (void *) tgt_vars->tgt_start,
+ &ttask->is_completed);
+
+ /* FIXME: Move the task into some sleeping state, remove this loop from
+ here. */
+ while (!ttask->is_completed);
+ return;
}
- else if (ttask->devicep == NULL
- || !(ttask->devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ else if (devicep == NULL
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
return;
size_t i;
if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
- gomp_update (ttask->devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
+ gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
ttask->kinds, true);
else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
for (i = 0; i < ttask->mapnum; i++)
if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
{
- gomp_map_vars (ttask->devicep, ttask->sizes[i] + 1,
- &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
- &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
+ gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
+ NULL, &ttask->sizes[i], &ttask->kinds[i], true,
+ GOMP_MAP_VARS_ENTER_DATA);
i += ttask->sizes[i];
}
else
- gomp_map_vars (ttask->devicep, 1, &ttask->hostaddrs[i], NULL,
- &ttask->sizes[i], &ttask->kinds[i],
- true, GOMP_MAP_VARS_ENTER_DATA);
+ gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
+ &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
else
- gomp_exit_data (ttask->devicep, ttask->mapnum, ttask->hostaddrs,
- ttask->sizes, ttask->kinds);
+ gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
+ ttask->kinds);
}
void
@@ -2122,6 +2152,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
{
DLSYM (run);
+ DLSYM (async_run);
DLSYM (dev2dev);
}
if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
@@ -480,6 +480,7 @@ gomp_create_target_task (struct gomp_device_descr *devicep,
task->kind = GOMP_TASK_WAITING;
task->in_tied_task = parent->in_tied_task;
task->taskgroup = taskgroup;
+ /* FIXME: Segmentation fault here, if there are no dependencies. */
ttask = (struct gomp_target_task *) &task->depend[(uintptr_t) depend[0]];
ttask->devicep = devicep;
ttask->fn = fn;
@@ -490,6 +491,7 @@ gomp_create_target_task (struct gomp_device_descr *devicep,
ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
ttask->flags = flags;
+ ttask->is_completed = false;
task->fn = gomp_target_task_fn;
task->fn_data = ttask;
task->final_task = 0;
new file mode 100644
@@ -0,0 +1,50 @@
+#include <stdlib.h>
+#include <unistd.h>
+
+int main ()
+{
+ int a = 0, b = 0, c = 0, d[7];
+
+ #pragma omp parallel
+ #pragma omp single
+ {
+ #pragma omp task depend(out: d[0])
+ a = 2;
+
+ #pragma omp target enter data nowait map(to: a,b,c) depend(in: d[0]) depend(out: d[1])
+
+ #pragma omp target nowait map(alloc: a) depend(in: d[1]) depend(out: d[2])
+ a++;
+
+ #pragma omp target nowait map(alloc: b) depend(in: d[2]) depend(out: d[3])
+ {
+ usleep (1000);
+ b = 4;
+ }
+
+ #pragma omp target nowait map(alloc: b) depend(in: d[2]) depend(out: d[4])
+ {
+ usleep (5000);
+ b = 5;
+ }
+
+ #pragma omp target nowait map(alloc: c) depend(in: d[3], d[4]) depend(out: d[5])
+ {
+ usleep (5000);
+ c = 6;
+ }
+
+ #pragma omp target nowait map(alloc: c) depend(in: d[3], d[4]) depend(out: d[6])
+ {
+ usleep (1000);
+ c = 7;
+ }
+
+ #pragma omp target exit data map(always,from: a,b,c) depend(in: d[5], d[6])
+ }
+
+ if (a != 3 || (b != 4 && b != 5) || (c != 6 && c != 7))
+ abort ();
+
+ return 0;
+}
@@ -192,11 +192,23 @@ GOMP_OFFLOAD_get_num_devices (void)
static void
offload (const char *file, uint64_t line, int device, const char *name,
- int num_vars, VarDesc *vars, VarDesc2 *vars2)
+ int num_vars, VarDesc *vars, VarDesc2 *vars2, const void **async_data)
{
OFFLOAD ofld = __offload_target_acquire1 (&device, file, line);
if (ofld)
- __offload_offload1 (ofld, name, 0, num_vars, vars, vars2, 0, NULL, NULL);
+ {
+ if (async_data == NULL)
+ __offload_offload1 (ofld, name, 0, num_vars, vars, vars2, 0, NULL,
+ NULL);
+ else
+ {
+ OffloadFlags flags;
+ flags.flags = 0;
+ flags.bits.omp_async = 1;
+ __offload_offload3 (ofld, name, 0, num_vars, vars, NULL, 0, NULL,
+ async_data, 0, NULL, flags, NULL);
+ }
+ }
else
{
fprintf (stderr, "%s:%d: Offload target acquire failed\n", file, line);
@@ -218,7 +230,7 @@ GOMP_OFFLOAD_init_device (int device)
TRACE ("");
pthread_once (&main_image_is_registered, register_main_image);
offload (__FILE__, __LINE__, device, "__offload_target_init_proc", 0,
- NULL, NULL);
+ NULL, NULL, NULL);
}
extern "C" void
@@ -240,7 +252,7 @@ get_target_table (int device, int &num_funcs, int &num_vars, void **&table)
VarDesc2 vd1g[2] = { { "num_funcs", 0 }, { "num_vars", 0 } };
offload (__FILE__, __LINE__, device, "__offload_target_table_p1", 2,
- vd1, vd1g);
+ vd1, vd1g, NULL);
int table_size = num_funcs + 2 * num_vars;
if (table_size > 0)
@@ -254,7 +266,7 @@ get_target_table (int device, int &num_funcs, int &num_vars, void **&table)
VarDesc2 vd2g = { "table", 0 };
offload (__FILE__, __LINE__, device, "__offload_target_table_p2", 1,
- &vd2, &vd2g);
+ &vd2, &vd2g, NULL);
}
}
@@ -401,8 +413,8 @@ GOMP_OFFLOAD_alloc (int device, size_t size)
vd1[1].size = sizeof (void *);
VarDesc2 vd1g[2] = { { "size", 0 }, { "tgt_ptr", 0 } };
- offload (__FILE__, __LINE__, device, "__offload_target_alloc", 2, vd1, vd1g);
-
+ offload (__FILE__, __LINE__, device, "__offload_target_alloc", 2, vd1, vd1g,
+ NULL);
return tgt_ptr;
}
@@ -416,7 +428,8 @@ GOMP_OFFLOAD_free (int device, void *tgt_ptr)
vd1.size = sizeof (void *);
VarDesc2 vd1g = { "tgt_ptr", 0 };
- offload (__FILE__, __LINE__, device, "__offload_target_free", 1, &vd1, &vd1g);
+ offload (__FILE__, __LINE__, device, "__offload_target_free", 1, &vd1, &vd1g,
+ NULL);
}
extern "C" void *
@@ -435,7 +448,7 @@ GOMP_OFFLOAD_host2dev (int device, void *tgt_ptr, const void *host_ptr,
VarDesc2 vd1g[2] = { { "tgt_ptr", 0 }, { "size", 0 } };
offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p1", 2,
- vd1, vd1g);
+ vd1, vd1g, NULL);
VarDesc vd2 = vd_host2tgt;
vd2.ptr = (void *) host_ptr;
@@ -443,7 +456,7 @@ GOMP_OFFLOAD_host2dev (int device, void *tgt_ptr, const void *host_ptr,
VarDesc2 vd2g = { "var", 0 };
offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p2", 1,
- &vd2, &vd2g);
+ &vd2, &vd2g, NULL);
return tgt_ptr;
}
@@ -464,7 +477,7 @@ GOMP_OFFLOAD_dev2host (int device, void *host_ptr, const void *tgt_ptr,
VarDesc2 vd1g[2] = { { "tgt_ptr", 0 }, { "size", 0 } };
offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p1", 2,
- vd1, vd1g);
+ vd1, vd1g, NULL);
VarDesc vd2 = vd_tgt2host;
vd2.ptr = (void *) host_ptr;
@@ -472,7 +485,7 @@ GOMP_OFFLOAD_dev2host (int device, void *host_ptr, const void *tgt_ptr,
VarDesc2 vd2g = { "var", 0 };
offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p2", 1,
- &vd2, &vd2g);
+ &vd2, &vd2g, NULL);
return host_ptr;
}
@@ -495,22 +508,43 @@ GOMP_OFFLOAD_dev2dev (int device, void *dst_ptr, const void *src_ptr,
VarDesc2 vd1g[3] = { { "dst_ptr", 0 }, { "src_ptr", 0 }, { "size", 0 } };
offload (__FILE__, __LINE__, device, "__offload_target_tgt2tgt", 3, vd1,
- vd1g);
+ vd1g, NULL);
return dst_ptr;
}
extern "C" void
+GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
+ bool *async_data)
+{
+ TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p, async_data = %p)", device,
+ tgt_fn, tgt_vars, async_data);
+
+ VarDesc vd[2] = { vd_host2tgt, vd_host2tgt };
+ vd[0].ptr = &tgt_fn;
+ vd[0].size = sizeof (void *);
+ vd[1].ptr = &tgt_vars;
+ vd[1].size = sizeof (void *);
+
+ offload (__FILE__, __LINE__, device, "__offload_target_run", 2, vd, NULL,
+ (const void **) async_data);
+}
+
+extern "C" void
GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars)
{
- TRACE ("(tgt_fn = %p, tgt_vars = %p)", tgt_fn, tgt_vars);
+ TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p)", device, tgt_fn, tgt_vars);
- VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt };
- vd1[0].ptr = &tgt_fn;
- vd1[0].size = sizeof (void *);
- vd1[1].ptr = &tgt_vars;
- vd1[1].size = sizeof (void *);
- VarDesc2 vd1g[2] = { { "tgt_fn", 0 }, { "tgt_vars", 0 } };
+ GOMP_OFFLOAD_async_run (device, tgt_fn, tgt_vars, NULL);
+}
+
+/* Called by liboffloadmic when asynchronous function is completed. */
+
+extern "C" void
+__gomp_offload_intelmic_async_completed (const void *async_data)
+{
+ TRACE ("(async_data = %p)", async_data);
- offload (__FILE__, __LINE__, device, "__offload_target_run", 2, vd1, vd1g);
+ bool *ttask_is_completed = (bool *) async_data;
+ *ttask_is_completed = true;
}
@@ -64,6 +64,9 @@ static void __offload_fini_library(void);
#define GET_OFFLOAD_NUMBER(timer_data) \
timer_data? timer_data->offload_number : 0
+extern "C" void
+__gomp_offload_intelmic_async_completed (const void *);
+
extern "C" {
#ifdef TARGET_WINNT
// Windows does not support imports from libraries without actually
@@ -2508,7 +2511,7 @@ extern "C" {
const void *info
)
{
- /* TODO: Call callback function, pass info. */
+ __gomp_offload_intelmic_async_completed (info);
}
}