@@ -64,6 +64,9 @@ static void __offload_fini_library(void)
#define GET_OFFLOAD_NUMBER(timer_data) \
timer_data? timer_data->offload_number : 0
+extern "C" void
+__gomp_offload_intelmic_async_completed (const void *);
+
extern "C" {
#ifdef TARGET_WINNT
// Windows does not support imports from libraries without actually
@@ -2507,7 +2510,7 @@ extern "C" {
const void *info
)
{
- /* TODO: Call callback function, pass info. */
+ __gomp_offload_intelmic_async_completed (info);
}
}
@@ -192,11 +192,23 @@ GOMP_OFFLOAD_get_num_devices (void)
static void
offload (const char *file, uint64_t line, int device, const char *name,
- int num_vars, VarDesc *vars, VarDesc2 *vars2)
+ int num_vars, VarDesc *vars, VarDesc2 *vars2, const void **async_data)
{
OFFLOAD ofld = __offload_target_acquire1 (&device, file, line);
if (ofld)
- __offload_offload1 (ofld, name, 0, num_vars, vars, vars2, 0, NULL, NULL);
+ {
+ if (async_data == NULL)
+ __offload_offload1 (ofld, name, 0, num_vars, vars, vars2, 0, NULL,
+ NULL);
+ else
+ {
+ OffloadFlags flags;
+ flags.flags = 0;
+ flags.bits.omp_async = 1;
+ __offload_offload3 (ofld, name, 0, num_vars, vars, NULL, 0, NULL,
+ async_data, 0, NULL, flags, NULL);
+ }
+ }
else
{
fprintf (stderr, "%s:%d: Offload target acquire failed\n", file, line);
@@ -218,7 +230,7 @@ GOMP_OFFLOAD_init_device (int device)
TRACE ("");
pthread_once (&main_image_is_registered, register_main_image);
offload (__FILE__, __LINE__, device, "__offload_target_init_proc", 0,
- NULL, NULL);
+ NULL, NULL, NULL);
}
extern "C" void
@@ -240,7 +252,7 @@ get_target_table (int device, int &num_f
VarDesc2 vd1g[2] = { { "num_funcs", 0 }, { "num_vars", 0 } };
offload (__FILE__, __LINE__, device, "__offload_target_table_p1", 2,
- vd1, vd1g);
+ vd1, vd1g, NULL);
int table_size = num_funcs + 2 * num_vars;
if (table_size > 0)
@@ -254,7 +266,7 @@ get_target_table (int device, int &num_f
VarDesc2 vd2g = { "table", 0 };
offload (__FILE__, __LINE__, device, "__offload_target_table_p2", 1,
- &vd2, &vd2g);
+ &vd2, &vd2g, NULL);
}
}
@@ -401,8 +413,8 @@ GOMP_OFFLOAD_alloc (int device, size_t s
vd1[1].size = sizeof (void *);
VarDesc2 vd1g[2] = { { "size", 0 }, { "tgt_ptr", 0 } };
- offload (__FILE__, __LINE__, device, "__offload_target_alloc", 2, vd1, vd1g);
-
+ offload (__FILE__, __LINE__, device, "__offload_target_alloc", 2, vd1, vd1g,
+ NULL);
return tgt_ptr;
}
@@ -416,7 +428,8 @@ GOMP_OFFLOAD_free (int device, void *tgt
vd1.size = sizeof (void *);
VarDesc2 vd1g = { "tgt_ptr", 0 };
- offload (__FILE__, __LINE__, device, "__offload_target_free", 1, &vd1, &vd1g);
+ offload (__FILE__, __LINE__, device, "__offload_target_free", 1, &vd1, &vd1g,
+ NULL);
}
extern "C" void *
@@ -435,7 +448,7 @@ GOMP_OFFLOAD_host2dev (int device, void
VarDesc2 vd1g[2] = { { "tgt_ptr", 0 }, { "size", 0 } };
offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p1", 2,
- vd1, vd1g);
+ vd1, vd1g, NULL);
VarDesc vd2 = vd_host2tgt;
vd2.ptr = (void *) host_ptr;
@@ -443,7 +456,7 @@ GOMP_OFFLOAD_host2dev (int device, void
VarDesc2 vd2g = { "var", 0 };
offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p2", 1,
- &vd2, &vd2g);
+ &vd2, &vd2g, NULL);
return tgt_ptr;
}
@@ -464,7 +477,7 @@ GOMP_OFFLOAD_dev2host (int device, void
VarDesc2 vd1g[2] = { { "tgt_ptr", 0 }, { "size", 0 } };
offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p1", 2,
- vd1, vd1g);
+ vd1, vd1g, NULL);
VarDesc vd2 = vd_tgt2host;
vd2.ptr = (void *) host_ptr;
@@ -472,7 +485,7 @@ GOMP_OFFLOAD_dev2host (int device, void
VarDesc2 vd2g = { "var", 0 };
offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p2", 1,
- &vd2, &vd2g);
+ &vd2, &vd2g, NULL);
return host_ptr;
}
@@ -495,22 +508,42 @@ GOMP_OFFLOAD_dev2dev (int device, void *
VarDesc2 vd1g[3] = { { "dst_ptr", 0 }, { "src_ptr", 0 }, { "size", 0 } };
offload (__FILE__, __LINE__, device, "__offload_target_tgt2tgt", 3, vd1,
- vd1g);
+ vd1g, NULL);
return dst_ptr;
}
extern "C" void
+GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
+ void *async_data)
+{
+ TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p, async_data = %p)", device,
+ tgt_fn, tgt_vars, async_data);
+
+ VarDesc vd[2] = { vd_host2tgt, vd_host2tgt };
+ vd[0].ptr = &tgt_fn;
+ vd[0].size = sizeof (void *);
+ vd[1].ptr = &tgt_vars;
+ vd[1].size = sizeof (void *);
+
+ offload (__FILE__, __LINE__, device, "__offload_target_run", 2, vd, NULL,
+ (const void **) async_data);
+}
+
+extern "C" void
GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars)
{
- TRACE ("(tgt_fn = %p, tgt_vars = %p)", tgt_fn, tgt_vars);
+ TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p)", device, tgt_fn, tgt_vars);
- VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt };
- vd1[0].ptr = &tgt_fn;
- vd1[0].size = sizeof (void *);
- vd1[1].ptr = &tgt_vars;
- vd1[1].size = sizeof (void *);
- VarDesc2 vd1g[2] = { { "tgt_fn", 0 }, { "tgt_vars", 0 } };
+ GOMP_OFFLOAD_async_run (device, tgt_fn, tgt_vars, NULL);
+}
+
+/* Called by liboffloadmic when asynchronous function is completed. */
+
+extern "C" void
+__gomp_offload_intelmic_async_completed (const void *async_data)
+{
+ TRACE ("(async_data = %p)", async_data);
- offload (__FILE__, __LINE__, device, "__offload_target_run", 2, vd1, vd1g);
+ GOMP_PLUGIN_target_task_completion ((void *) async_data);
}
@@ -59,10 +59,20 @@ struct addr_pair
uintptr_t end;
};
+/* Various state of OpenMP async offloading tasks. */
+enum gomp_target_task_state
+{
+ GOMP_TARGET_TASK_DATA,
+ GOMP_TARGET_TASK_BEFORE_MAP,
+ GOMP_TARGET_TASK_FALLBACK,
+ GOMP_TARGET_TASK_RUNNING
+};
+
/* Miscellaneous functions. */
extern void *GOMP_PLUGIN_malloc (size_t) __attribute__ ((malloc));
extern void *GOMP_PLUGIN_malloc_cleared (size_t) __attribute__ ((malloc));
extern void *GOMP_PLUGIN_realloc (void *, size_t);
+void GOMP_PLUGIN_target_task_completion (void *);
extern void GOMP_PLUGIN_debug (int, const char *, ...)
__attribute__ ((format (printf, 2, 3)));
@@ -0,0 +1,50 @@
+#include <stdlib.h>
+#include <unistd.h>
+
+int main ()
+{
+ int a = 0, b = 0, c = 0, d[7];
+
+ #pragma omp parallel
+ #pragma omp single
+ {
+ #pragma omp task depend(out: d[0])
+ a = 2;
+
+ #pragma omp target enter data nowait map(to: a,b,c) depend(in: d[0]) depend(out: d[1])
+
+ #pragma omp target nowait map(alloc: a) depend(in: d[1]) depend(out: d[2])
+ a++;
+
+ #pragma omp target nowait map(alloc: b) depend(in: d[2]) depend(out: d[3])
+ {
+ usleep (1000);
+ b = 4;
+ }
+
+ #pragma omp target nowait map(alloc: b) depend(in: d[2]) depend(out: d[4])
+ {
+ usleep (5000);
+ b = 5;
+ }
+
+ #pragma omp target nowait map(alloc: c) depend(in: d[3], d[4]) depend(out: d[5])
+ {
+ usleep (5000);
+ c = 6;
+ }
+
+ #pragma omp target nowait map(alloc: c) depend(in: d[3], d[4]) depend(out: d[6])
+ {
+ usleep (1000);
+ c = 7;
+ }
+
+ #pragma omp target exit data map(always,from: a,b,c) depend(in: d[5], d[6])
+ }
+
+ if (a != 3 || (b != 4 && b != 5) || (c != 6 && c != 7))
+ abort ();
+
+ return 0;
+}
@@ -1348,17 +1348,7 @@ GOMP_target (int device, void (*fn) (voi
struct target_mem_desc *tgt_vars
= gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
GOMP_MAP_VARS_TARGET);
- struct gomp_thread old_thr, *thr = gomp_thread ();
- old_thr = *thr;
- memset (thr, '\0', sizeof (*thr));
- if (gomp_places_list)
- {
- thr->place = old_thr.place;
- thr->ts.place_partition_len = gomp_places_list_len;
- }
devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
- gomp_free_thread (thr);
- *thr = old_thr;
gomp_unmap_vars (tgt_vars, true);
}
@@ -1387,10 +1377,23 @@ GOMP_target_ext (int device, void (*fn)
(void) num_teams;
(void) thread_limit;
- /* If there are depend clauses, but nowait is not present,
- block the parent task until the dependencies are resolved
- and then just continue with the rest of the function as if it
- is a merged task. */
+ if (flags & GOMP_TARGET_FLAG_NOWAIT)
+ {
+ struct gomp_thread *thr = gomp_thread ();
+ if (thr->ts.team
+ && !thr->task->final_task)
+ {
+ gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
+ sizes, kinds, flags, depend,
+ GOMP_TARGET_TASK_BEFORE_MAP);
+ return;
+ }
+ }
+
+ /* If there are depend clauses, but nowait is not present
+ (or we are in a final task), block the parent task until the
+ dependencies are resolved and then just continue with the rest
+ of the function as if it is a merged task. */
if (depend != NULL)
{
struct gomp_thread *thr = gomp_thread ();
@@ -1410,17 +1413,7 @@ GOMP_target_ext (int device, void (*fn)
struct target_mem_desc *tgt_vars
= gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
GOMP_MAP_VARS_TARGET);
- struct gomp_thread old_thr, *thr = gomp_thread ();
- old_thr = *thr;
- memset (thr, '\0', sizeof (*thr));
- if (gomp_places_list)
- {
- thr->place = old_thr.place;
- thr->ts.place_partition_len = gomp_places_list_len;
- }
devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
- gomp_free_thread (thr);
- *thr = old_thr;
gomp_unmap_vars (tgt_vars, true);
}
@@ -1527,23 +1520,25 @@ GOMP_target_update_ext (int device, size
&& thr->ts.team
&& !thr->task->final_task)
{
- gomp_create_target_task (devicep, (void (*) (void *)) NULL,
- mapnum, hostaddrs, sizes, kinds,
- flags | GOMP_TARGET_FLAG_UPDATE,
- depend);
- return;
+ if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
+ mapnum, hostaddrs, sizes, kinds,
+ flags | GOMP_TARGET_FLAG_UPDATE,
+ depend, GOMP_TARGET_TASK_DATA))
+ return;
}
+ else
+ {
+ struct gomp_team *team = thr->ts.team;
+ /* If parallel or taskgroup has been cancelled, don't start new
+ tasks. */
+ if (team
+ && (gomp_team_barrier_cancelled (&team->barrier)
+ || (thr->task->taskgroup
+ && thr->task->taskgroup->cancelled)))
+ return;
- struct gomp_team *team = thr->ts.team;
- /* If parallel or taskgroup has been cancelled, don't start new
- tasks. */
- if (team
- && (gomp_team_barrier_cancelled (&team->barrier)
- || (thr->task->taskgroup
- && thr->task->taskgroup->cancelled)))
- return;
-
- gomp_task_maybe_wait_for_dependencies (depend);
+ gomp_task_maybe_wait_for_dependencies (depend);
+ }
}
}
@@ -1647,22 +1642,25 @@ GOMP_target_enter_exit_data (int device,
&& thr->ts.team
&& !thr->task->final_task)
{
- gomp_create_target_task (devicep, (void (*) (void *)) NULL,
- mapnum, hostaddrs, sizes, kinds,
- flags, depend);
- return;
+ if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
+ mapnum, hostaddrs, sizes, kinds,
+ flags, depend,
+ GOMP_TARGET_TASK_DATA))
+ return;
}
+ else
+ {
+ struct gomp_team *team = thr->ts.team;
+ /* If parallel or taskgroup has been cancelled, don't start new
+ tasks. */
+ if (team
+ && (gomp_team_barrier_cancelled (&team->barrier)
+ || (thr->task->taskgroup
+ && thr->task->taskgroup->cancelled)))
+ return;
- struct gomp_team *team = thr->ts.team;
- /* If parallel or taskgroup has been cancelled, don't start new
- tasks. */
- if (team
- && (gomp_team_barrier_cancelled (&team->barrier)
- || (thr->task->taskgroup
- && thr->task->taskgroup->cancelled)))
- return;
-
- gomp_task_maybe_wait_for_dependencies (depend);
+ gomp_task_maybe_wait_for_dependencies (depend);
+ }
}
}
@@ -1694,38 +1692,65 @@ GOMP_target_enter_exit_data (int device,
gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
}
-void
+bool
gomp_target_task_fn (void *data)
{
struct gomp_target_task *ttask = (struct gomp_target_task *) data;
+ struct gomp_device_descr *devicep = ttask->devicep;
+
if (ttask->fn != NULL)
{
- /* GOMP_target_ext */
- }
- else if (ttask->devicep == NULL
- || !(ttask->devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
- return;
+ if (devicep == NULL
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ {
+ ttask->state = GOMP_TARGET_TASK_FALLBACK;
+ gomp_target_fallback_firstprivate (ttask->fn, ttask->mapnum,
+ ttask->hostaddrs, ttask->sizes,
+ ttask->kinds);
+ return false;
+ }
+
+ if (ttask->state == GOMP_TARGET_TASK_RUNNING)
+ {
+ gomp_unmap_vars (ttask->tgt, true);
+ return false;
+ }
+
+ void *fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn);
+ ttask->tgt
+ = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, NULL,
+ ttask->sizes, ttask->kinds, true,
+ GOMP_MAP_VARS_TARGET);
+ ttask->state = GOMP_TARGET_TASK_RUNNING;
+
+ devicep->async_run_func (devicep->target_id, fn_addr,
+ (void *) ttask->tgt->tgt_start, (void *) ttask);
+ return true;
+ }
+ else if (devicep == NULL
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ return false;
size_t i;
if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
- gomp_update (ttask->devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
+ gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
ttask->kinds, true);
else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
for (i = 0; i < ttask->mapnum; i++)
if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
{
- gomp_map_vars (ttask->devicep, ttask->sizes[i] + 1,
- &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
- &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
+ gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
+ NULL, &ttask->sizes[i], &ttask->kinds[i], true,
+ GOMP_MAP_VARS_ENTER_DATA);
i += ttask->sizes[i];
}
else
- gomp_map_vars (ttask->devicep, 1, &ttask->hostaddrs[i], NULL,
- &ttask->sizes[i], &ttask->kinds[i],
- true, GOMP_MAP_VARS_ENTER_DATA);
+ gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
+ &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
else
- gomp_exit_data (ttask->devicep, ttask->mapnum, ttask->hostaddrs,
- ttask->sizes, ttask->kinds);
+ gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
+ ttask->kinds);
+ return false;
}
void
@@ -2170,6 +2195,7 @@ gomp_load_plugin_for_device (struct gomp
if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
{
DLSYM (run);
+ DLSYM (async_run);
DLSYM (dev2dev);
}
if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
@@ -406,3 +406,8 @@ GOMP_PLUGIN_1.0 {
GOMP_PLUGIN_async_unmap_vars;
GOMP_PLUGIN_acc_thread;
};
+
+GOMP_PLUGIN_1.1 {
+ global:
+ GOMP_PLUGIN_target_task_completion;
+} GOMP_PLUGIN_1.0;
@@ -482,11 +482,12 @@ ialias (GOMP_taskgroup_end)
/* Called for nowait target tasks. */
-void
+bool
gomp_create_target_task (struct gomp_device_descr *devicep,
void (*fn) (void *), size_t mapnum, void **hostaddrs,
size_t *sizes, unsigned short *kinds,
- unsigned int flags, void **depend)
+ unsigned int flags, void **depend,
+ enum gomp_target_task_state state)
{
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
@@ -495,7 +496,7 @@ gomp_create_target_task (struct gomp_dev
if (team
&& (gomp_team_barrier_cancelled (&team->barrier)
|| (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
- return;
+ return true;
struct gomp_target_task *ttask;
struct gomp_task *task;
@@ -503,19 +504,44 @@ gomp_create_target_task (struct gomp_dev
struct gomp_taskgroup *taskgroup = parent->taskgroup;
bool do_wake;
size_t depend_size = 0;
+ uintptr_t depend_cnt = 0;
+ size_t tgt_align = 0, tgt_size = 0;
if (depend != NULL)
- depend_size = ((uintptr_t) depend[0]
- * sizeof (struct gomp_task_depend_entry));
+ {
+ depend_cnt = (uintptr_t) depend[0];
+ depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
+ }
+ if (fn)
+ {
+ /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
+ firstprivate on the target task. */
+ size_t i;
+ for (i = 0; i < mapnum; i++)
+ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
+ {
+ size_t align = (size_t) 1 << (kinds[i] >> 8);
+ if (tgt_align < align)
+ tgt_align = align;
+ tgt_size = (tgt_size + align - 1) & ~(align - 1);
+ tgt_size += sizes[i];
+ }
+ if (tgt_align)
+ tgt_size += tgt_align - 1;
+ else
+ tgt_size = 0;
+ }
+
task = gomp_malloc (sizeof (*task) + depend_size
+ sizeof (*ttask)
+ mapnum * (sizeof (void *) + sizeof (size_t)
- + sizeof (unsigned short)));
+ + sizeof (unsigned short))
+ + tgt_size);
gomp_init_task (task, parent, gomp_icv (false));
task->kind = GOMP_TASK_WAITING;
task->in_tied_task = parent->in_tied_task;
task->taskgroup = taskgroup;
- ttask = (struct gomp_target_task *) &task->depend[(uintptr_t) depend[0]];
+ ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
ttask->devicep = devicep;
ttask->fn = fn;
ttask->mapnum = mapnum;
@@ -524,8 +550,29 @@ gomp_create_target_task (struct gomp_dev
memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
+ if (tgt_align)
+ {
+ char *tgt = (char *) &ttask->kinds[mapnum];
+ size_t i;
+ uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
+ if (al)
+ tgt += tgt_align - al;
+ tgt_size = 0;
+ for (i = 0; i < mapnum; i++)
+ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
+ {
+ size_t align = (size_t) 1 << (kinds[i] >> 8);
+ tgt_size = (tgt_size + align - 1) & ~(align - 1);
+ memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
+ ttask->hostaddrs[i] = tgt + tgt_size;
+ tgt_size = tgt_size + sizes[i];
+ }
+ }
ttask->flags = flags;
- task->fn = gomp_target_task_fn;
+ ttask->state = state;
+ ttask->task = task;
+ ttask->team = team;
+ task->fn = NULL;
task->fn_data = ttask;
task->final_task = 0;
gomp_mutex_lock (&team->task_lock);
@@ -536,19 +583,26 @@ gomp_create_target_task (struct gomp_dev
gomp_mutex_unlock (&team->task_lock);
gomp_finish_task (task);
free (task);
- return;
+ return true;
}
- if (taskgroup)
- taskgroup->num_children++;
if (depend_size)
{
gomp_task_handle_depend (task, parent, depend);
if (task->num_dependees)
{
gomp_mutex_unlock (&team->task_lock);
- return;
+ return true;
}
}
+ if (state == GOMP_TARGET_TASK_DATA)
+ {
+ gomp_mutex_unlock (&team->task_lock);
+ gomp_finish_task (task);
+ free (task);
+ return false;
+ }
+ if (taskgroup)
+ taskgroup->num_children++;
priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
PRIORITY_INSERT_BEGIN,
/*adjust_parent_depends_on=*/false,
@@ -570,6 +624,95 @@ gomp_create_target_task (struct gomp_dev
gomp_mutex_unlock (&team->task_lock);
if (do_wake)
gomp_team_barrier_wake (&team->barrier, 1);
+ return true;
+}
+
+static void inline
+priority_queue_move_task_first (enum priority_queue_type type,
+ struct priority_queue *head,
+ struct gomp_task *task)
+{
+#if _LIBGOMP_CHECKING_
+ if (!priority_queue_task_in_queue_p (type, head, task))
+ gomp_fatal ("Attempt to move first missing task %p", task);
+#endif
+ struct priority_list *list;
+ if (priority_queue_multi_p (head))
+ {
+ list = priority_queue_lookup_priority (head, task->priority);
+#if _LIBGOMP_CHECKING_
+ if (!list)
+ gomp_fatal ("Unable to find priority %d", task->priority);
+#endif
+ }
+ else
+ list = &head->l;
+ priority_list_remove (list, task_to_priority_node (type, task), 0);
+ priority_list_insert (type, list, task, task->priority,
+ PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
+ task->parent_depends_on);
+}
+
+/* Signal that a target task TTASK has completed the asynchronously
+ running phase and should be requeued as a task to handle the
+ variable unmapping. */
+
+void
+GOMP_PLUGIN_target_task_completion (void *data)
+{
+ struct gomp_target_task *ttask = (struct gomp_target_task *) data;
+ struct gomp_task *task = ttask->task;
+ struct gomp_team *team = ttask->team;
+
+ gomp_mutex_lock (&team->task_lock);
+ struct gomp_task *parent = task->parent;
+ if (parent)
+ priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
+ task);
+
+ struct gomp_taskgroup *taskgroup = task->taskgroup;
+ if (taskgroup)
+ priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
+ task);
+
+ priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
+ PRIORITY_INSERT_BEGIN, false,
+ task->parent_depends_on);
+ task->kind = GOMP_TASK_WAITING;
+ if (parent && parent->taskwait)
+ {
+ if (parent->taskwait->in_taskwait)
+ {
+ /* One more task has had its dependencies met.
+ Inform any waiters. */
+ parent->taskwait->in_taskwait = false;
+ gomp_sem_post (&parent->taskwait->taskwait_sem);
+ }
+ else if (parent->taskwait->in_depend_wait)
+ {
+ /* One more task has had its dependencies met.
+ Inform any waiters. */
+ parent->taskwait->in_depend_wait = false;
+ gomp_sem_post (&parent->taskwait->taskwait_sem);
+ }
+ }
+ if (taskgroup && taskgroup->in_taskgroup_wait)
+ {
+ /* One more task has had its dependencies met.
+ Inform any waiters. */
+ taskgroup->in_taskgroup_wait = false;
+ gomp_sem_post (&taskgroup->taskgroup_sem);
+ }
+
+ ++team->task_queued_count;
+ gomp_team_barrier_set_task_pending (&team->barrier);
+ /* I'm afraid this can't be done after releasing team->task_lock,
+ as gomp_target_task_completion is run from unrelated thread and
+ therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
+ the team could be gone already. */
+ if (team->nthreads > team->task_running_count)
+ gomp_team_barrier_wake (&team->barrier, 1);
+ gomp_mutex_unlock (&team->task_lock);
}
/* Given a parent_depends_on task in LIST, move it to the front of its
@@ -1041,7 +1184,20 @@ gomp_barrier_handle_tasks (gomp_barrier_
if (child_task)
{
thr->task = child_task;
- child_task->fn (child_task->fn_data);
+ if (__builtin_expect (child_task->fn == NULL, 0))
+ {
+ if (gomp_target_task_fn (child_task->fn_data))
+ {
+ thr->task = task;
+ gomp_mutex_lock (&team->task_lock);
+ child_task->kind = GOMP_TASK_ASYNC_RUNNING;
+ team->task_running_count--;
+ child_task = NULL;
+ continue;
+ }
+ }
+ else
+ child_task->fn (child_task->fn_data);
thr->task = task;
}
else
@@ -1170,7 +1326,19 @@ GOMP_taskwait (void)
if (child_task)
{
thr->task = child_task;
- child_task->fn (child_task->fn_data);
+ if (__builtin_expect (child_task->fn == NULL, 0))
+ {
+ if (gomp_target_task_fn (child_task->fn_data))
+ {
+ thr->task = task;
+ gomp_mutex_lock (&team->task_lock);
+ child_task->kind = GOMP_TASK_ASYNC_RUNNING;
+ child_task = NULL;
+ continue;
+ }
+ }
+ else
+ child_task->fn (child_task->fn_data);
thr->task = task;
}
else
@@ -1342,7 +1510,19 @@ gomp_task_maybe_wait_for_dependencies (v
if (child_task)
{
thr->task = child_task;
- child_task->fn (child_task->fn_data);
+ if (__builtin_expect (child_task->fn == NULL, 0))
+ {
+ if (gomp_target_task_fn (child_task->fn_data))
+ {
+ thr->task = task;
+ gomp_mutex_lock (&team->task_lock);
+ child_task->kind = GOMP_TASK_ASYNC_RUNNING;
+ child_task = NULL;
+ continue;
+ }
+ }
+ else
+ child_task->fn (child_task->fn_data);
thr->task = task;
}
else
@@ -1450,8 +1630,8 @@ GOMP_taskgroup_end (void)
= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
PQ_TEAM, &team->task_queue,
&unused);
- }
- else
+ }
+ else
{
gomp_mutex_unlock (&team->task_lock);
if (to_free)
@@ -1506,7 +1686,19 @@ GOMP_taskgroup_end (void)
if (child_task)
{
thr->task = child_task;
- child_task->fn (child_task->fn_data);
+ if (__builtin_expect (child_task->fn == NULL, 0))
+ {
+ if (gomp_target_task_fn (child_task->fn_data))
+ {
+ thr->task = task;
+ gomp_mutex_lock (&team->task_lock);
+ child_task->kind = GOMP_TASK_ASYNC_RUNNING;
+ child_task = NULL;
+ continue;
+ }
+ }
+ else
+ child_task->fn (child_task->fn_data);
thr->task = task;
}
else
@@ -85,7 +85,7 @@ priority_queue_task_in_queue_p (enum pri
order. LIST is a priority list of type TYPE.
The expected order is that GOMP_TASK_WAITING tasks come before
- GOMP_TASK_TIED ones.
+ GOMP_TASK_TIED/GOMP_TASK_ASYNC_RUNNING ones.
If CHECK_DEPS is TRUE, we also check that parent_depends_on WAITING
tasks come before !parent_depends_on WAITING tasks. This is only
@@ -104,7 +104,7 @@ priority_list_verify (enum priority_queu
struct gomp_task *t = priority_node_to_task (type, p);
if (seen_tied && t->kind == GOMP_TASK_WAITING)
gomp_fatal ("priority_queue_verify: WAITING task after TIED");
- if (t->kind == GOMP_TASK_TIED)
+ if (t->kind >= GOMP_TASK_TIED)
seen_tied = true;
else if (check_deps && t->kind == GOMP_TASK_WAITING)
{
@@ -373,7 +373,12 @@ enum gomp_task_kind
/* Task created by GOMP_task and waiting to be run. */
GOMP_TASK_WAITING,
/* Task currently executing or scheduled and about to execute. */
- GOMP_TASK_TIED
+ GOMP_TASK_TIED,
+ /* Used for target tasks that have vars mapped and async run started,
+ but not yet completed. Once that completes, they will be readded
+ into the queues as GOMP_TASK_WAITING in order to perform the var
+ unmapping. */
+ GOMP_TASK_ASYNC_RUNNING
};
struct gomp_task_depend_entry
@@ -453,6 +458,8 @@ struct gomp_task
struct gomp_task_depend_entry depend[];
};
+/* This structure describes a single #pragma omp taskgroup. */
+
struct gomp_taskgroup
{
struct gomp_taskgroup *prev;
@@ -464,6 +471,8 @@ struct gomp_taskgroup
size_t num_children;
};
+/* This structure describes a target task. */
+
struct gomp_target_task
{
struct gomp_device_descr *devicep;
@@ -472,6 +481,10 @@ struct gomp_target_task
size_t *sizes;
unsigned short *kinds;
unsigned int flags;
+ enum gomp_target_task_state state;
+ struct target_mem_desc *tgt;
+ struct gomp_task *task;
+ struct gomp_team *team;
void *hostaddrs[];
};
@@ -723,10 +736,10 @@ extern void gomp_init_task (struct gomp_
extern void gomp_end_task (void);
extern void gomp_barrier_handle_tasks (gomp_barrier_state_t);
extern void gomp_task_maybe_wait_for_dependencies (void **);
-extern void gomp_create_target_task (struct gomp_device_descr *,
+extern bool gomp_create_target_task (struct gomp_device_descr *,
void (*) (void *), size_t, void **,
size_t *, unsigned short *, unsigned int,
- void **);
+ void **, enum gomp_target_task_state);
static void inline
gomp_finish_task (struct gomp_task *task)
@@ -747,7 +760,7 @@ extern void gomp_free_thread (void *);
extern void gomp_init_targets_once (void);
extern int gomp_get_num_devices (void);
-extern void gomp_target_task_fn (void *);
+extern bool gomp_target_task_fn (void *);
/* Splay tree definitions. */
typedef struct splay_tree_node_s *splay_tree_node;
@@ -901,6 +914,7 @@ struct gomp_device_descr
void *(*host2dev_func) (int, void *, const void *, size_t);
void *(*dev2dev_func) (int, void *, const void *, size_t);
void (*run_func) (int, void *, void *);
+ void (*async_run_func) (int, void *, void *, void *);
/* Splay tree containing information about mapped memory regions. */
struct splay_tree_s mem_map;