diff mbox

[gomp4.1] depend nowait support for target {update,{enter,exit} data}

Message ID 20150908092014.GA1847@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Sept. 8, 2015, 9:20 a.m. UTC
Hi!

This patch does two things:
1) removes fatal error from #pragma omp target update if object is not
mapped (at all, partial mapping is still a fatal error); the 4.1 draft spec
says that nothing is copied if the object is not mapped (first hunk)

2) implements nowait support for #pragma omp target {update,{enter,exit} data}
- if depend clause is not present, nowait is ignored, similarly if there is
no team (not inside of a parallel), or if the encountering task is final,
or if no children of the current task had depend clauses yet.  Otherwise,
a task is created, and when the dependencies are resolved and the task
scheduler will schedule it, it will perform the required update/enter/exit
action(s).  If there are depend clauses, the "target task" is not really
executed "immediately" as the spec says, but the spec is broken and I
believe is going to change (the question is when and to what wording).

nowait support for #pragma omp target is not implemented yet, supposedly we
need to mark those somehow (some flag) already in the struct gomp_task
structure, essentially it will need either 2 or 3 callbacks
(the current one, executed when the dependencies are resolved (it actually
waits until some thread schedules it after that point, I think it is
undesirable to run it with the tasking lock held), which would perform
the gomp_map_vars and initiate the running of the region, and then some
query routine which would poll the plugin whether the task is done or not,
and either perform the finalization (unmap_vars) if it is done (and in any
case return bool whether it should be polled again or not), and if the
finalization is not done there, also another callback for the finalization.
Also, there is the issue that if we are waiting for task that needs to be
polled, and we don't have any further tasks to run, we shouldn't really
attempt to sleep on some semaphore (e.g. in taskwait, end of
taskgroup, etc.) or barrier, but rather either need to keep polling it, or
call the query hook with some argument that it should sleep in there until
the work is done by the offloading device.
Also, there needs to be a way for the target nowait first callback to say
that it is using host fallback and thus acts as a normal task, therefore
once the task fn finishes, the task is done.

2015-09-08  Jakub Jelinek  <jakub@redhat.com>

	* target.c (gomp_update): Remove fatal error if object is not mapped.

	* target.c (GOMP_target_update_41): Handle nowait update with
	dependencies.  Don't call gomp_update if parallel or taskgroup has
	been cancelled.
	(GOMP_target_enter_exit_data): Likewise.
	(gomp_target_task_fn): New function.
	* task.c (gomp_task_handle_depend): New function, copied from...
	(GOMP_task): ... here.  Use gomp_task_handle_depend.
	(gomp_create_target_task): New function.
	* libgomp.h (struct gomp_target_task): New type.
	(gomp_create_target_task, gomp_target_task_fn): New prototypes.
	* testsuite/libgomp.c/target-27.c: New test.


	Jakub
diff mbox

Patch

--- libgomp/target.c.jj	2015-09-03 16:51:06.000000000 +0200
+++ libgomp/target.c	2015-09-08 09:55:24.591484158 +0200
@@ -899,13 +899,6 @@  gomp_update (struct gomp_device_descr *d
 						- n->host_start),
 				      cur_node.host_end - cur_node.host_start);
 	  }
-	else
-	  {
-	    gomp_mutex_unlock (&devicep->lock);
-	    gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
-			(void *) cur_node.host_start,
-			(void *) cur_node.host_end);
-	  }
       }
   gomp_mutex_unlock (&devicep->lock);
 }
@@ -1460,18 +1453,50 @@  GOMP_target_update_41 (int device, size_
   /* If there are depend clauses, but nowait is not present,
      block the parent task until the dependencies are resolved
      and then just continue with the rest of the function as if it
-     is a merged task.  */
+     is a merged task.  Until we are able to schedule task during
+     variable mapping or unmapping, ignore nowait if depend clauses
+     are not present.  */
   if (depend != NULL)
     {
       struct gomp_thread *thr = gomp_thread ();
       if (thr->task && thr->task->depend_hash)
-	gomp_task_maybe_wait_for_dependencies (depend);
+	{
+	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
+	      && thr->ts.team
+	      && !thr->task->final_task)
+	    {
+	      gomp_create_target_task (devicep, (void (*) (void *)) NULL,
+				       mapnum, hostaddrs, sizes, kinds,
+				       flags | GOMP_TARGET_FLAG_UPDATE,
+				       depend);
+	      return;
+	    }
+
+	  struct gomp_team *team = thr->ts.team;
+	  /* If parallel or taskgroup has been cancelled, don't start new
+	     tasks.  */
+	  if (team
+	      && (gomp_team_barrier_cancelled (&team->barrier)
+		  || (thr->task->taskgroup
+		      && thr->task->taskgroup->cancelled)))
+	    return;
+
+	  gomp_task_maybe_wait_for_dependencies (depend);
+	}
     }
 
   if (devicep == NULL
       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
     return;
 
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_team *team = thr->ts.team;
+  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
+  if (team
+      && (gomp_team_barrier_cancelled (&team->barrier)
+	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
+    return;
+
   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
 }
 
@@ -1548,18 +1573,49 @@  GOMP_target_enter_exit_data (int device,
   /* If there are depend clauses, but nowait is not present,
      block the parent task until the dependencies are resolved
      and then just continue with the rest of the function as if it
-     is a merged task.  */
+     is a merged task.  Until we are able to schedule task during
+     variable mapping or unmapping, ignore nowait if depend clauses
+     are not present.  */
   if (depend != NULL)
     {
       struct gomp_thread *thr = gomp_thread ();
       if (thr->task && thr->task->depend_hash)
-	gomp_task_maybe_wait_for_dependencies (depend);
+	{
+	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
+	      && thr->ts.team
+	      && !thr->task->final_task)
+	    {
+	      gomp_create_target_task (devicep, (void (*) (void *)) NULL,
+				       mapnum, hostaddrs, sizes, kinds,
+				       flags, depend);
+	      return;
+	    }
+
+	  struct gomp_team *team = thr->ts.team;
+	  /* If parallel or taskgroup has been cancelled, don't start new
+	     tasks.  */
+	  if (team
+	      && (gomp_team_barrier_cancelled (&team->barrier)
+		  || (thr->task->taskgroup
+		      && thr->task->taskgroup->cancelled)))
+	    return;
+
+	  gomp_task_maybe_wait_for_dependencies (depend);
+	}
     }
 
   if (devicep == NULL
       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
     return;
 
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_team *team = thr->ts.team;
+  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
+  if (team
+      && (gomp_team_barrier_cancelled (&team->barrier)
+	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
+    return;
+
   size_t i;
   if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
     for (i = 0; i < mapnum; i++)
@@ -1577,6 +1633,40 @@  GOMP_target_enter_exit_data (int device,
 }
 
 void
+gomp_target_task_fn (void *data)
+{
+  struct gomp_target_task *ttask = (struct gomp_target_task *) data;
+  if (ttask->fn != NULL)
+    {
+      /* GOMP_target_41 */
+    }
+  else if (ttask->devicep == NULL
+	   || !(ttask->devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    return;
+
+  size_t i;
+  if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
+    gomp_update (ttask->devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
+		 ttask->kinds, true);
+  else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
+    for (i = 0; i < ttask->mapnum; i++)
+      if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
+	{
+	  gomp_map_vars (ttask->devicep, ttask->sizes[i] + 1,
+			 &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
+			 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
+	  i += ttask->sizes[i];
+	}
+      else
+	gomp_map_vars (ttask->devicep, 1, &ttask->hostaddrs[i], NULL,
+		       &ttask->sizes[i], &ttask->kinds[i],
+		       true, GOMP_MAP_VARS_ENTER_DATA);
+  else
+    gomp_exit_data (ttask->devicep, ttask->mapnum, ttask->hostaddrs,
+		    ttask->sizes, ttask->kinds);
+}
+
+void
 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
 {
   if (thread_limit)
--- libgomp/task.c.jj	2015-09-02 15:22:14.000000000 +0200
+++ libgomp/task.c	2015-09-08 10:20:54.163978966 +0200
@@ -108,6 +108,123 @@  gomp_clear_parent (struct gomp_task *chi
     while (task != children);
 }
 
+/* Helper function for GOMP_task and gomp_create_target_task.  Depend clause
+   handling for undeferred task creation.  */
+
+static void
+gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
+			 void **depend)
+{
+  size_t ndepend = (uintptr_t) depend[0];
+  size_t nout = (uintptr_t) depend[1];
+  size_t i;
+  hash_entry_type ent;
+
+  task->depend_count = ndepend;
+  task->num_dependees = 0;
+  if (parent->depend_hash == NULL)
+    parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
+  for (i = 0; i < ndepend; i++)
+    {
+      task->depend[i].addr = depend[2 + i];
+      task->depend[i].next = NULL;
+      task->depend[i].prev = NULL;
+      task->depend[i].task = task;
+      task->depend[i].is_in = i >= nout;
+      task->depend[i].redundant = false;
+      task->depend[i].redundant_out = false;
+
+      hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
+					      &task->depend[i], INSERT);
+      hash_entry_type out = NULL, last = NULL;
+      if (*slot)
+	{
+	  /* If multiple depends on the same task are the same, all but the
+	     first one are redundant.  As inout/out come first, if any of them
+	     is inout/out, it will win, which is the right semantics.  */
+	  if ((*slot)->task == task)
+	    {
+	      task->depend[i].redundant = true;
+	      continue;
+	    }
+	  for (ent = *slot; ent; ent = ent->next)
+	    {
+	      if (ent->redundant_out)
+		break;
+
+	      last = ent;
+
+	      /* depend(in:...) doesn't depend on earlier depend(in:...).  */
+	      if (i >= nout && ent->is_in)
+		continue;
+
+	      if (!ent->is_in)
+		out = ent;
+
+	      struct gomp_task *tsk = ent->task;
+	      if (tsk->dependers == NULL)
+		{
+		  tsk->dependers
+		    = gomp_malloc (sizeof (struct gomp_dependers_vec)
+				   + 6 * sizeof (struct gomp_task *));
+		  tsk->dependers->n_elem = 1;
+		  tsk->dependers->allocated = 6;
+		  tsk->dependers->elem[0] = task;
+		  task->num_dependees++;
+		  continue;
+		}
+	      /* We already have some other dependency on tsk from earlier
+		 depend clause.  */
+	      else if (tsk->dependers->n_elem
+		       && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
+			   == task))
+		continue;
+	      else if (tsk->dependers->n_elem == tsk->dependers->allocated)
+		{
+		  tsk->dependers->allocated
+		    = tsk->dependers->allocated * 2 + 2;
+		  tsk->dependers
+		    = gomp_realloc (tsk->dependers,
+				    sizeof (struct gomp_dependers_vec)
+				    + (tsk->dependers->allocated
+				       * sizeof (struct gomp_task *)));
+		}
+	      tsk->dependers->elem[tsk->dependers->n_elem++] = task;
+	      task->num_dependees++;
+	    }
+	  task->depend[i].next = *slot;
+	  (*slot)->prev = &task->depend[i];
+	}
+      *slot = &task->depend[i];
+
+      /* There is no need to store more than one depend({,in}out:) task per
+	 address in the hash table chain for the purpose of creation of
+	 deferred tasks, because each out depends on all earlier outs, thus it
+	 is enough to record just the last depend({,in}out:).  For depend(in:),
+	 we need to keep all of the previous ones not terminated yet, because
+	 a later depend({,in}out:) might need to depend on all of them.  So, if
+	 the new task's clause is depend({,in}out:), we know there is at most
+	 one other depend({,in}out:) clause in the list (out).  For
+	 non-deferred tasks we want to see all outs, so they are moved to the
+	 end of the chain, after first redundant_out entry all following
+	 entries should be redundant_out.  */
+      if (!task->depend[i].is_in && out)
+	{
+	  if (out != last)
+	    {
+	      out->next->prev = out->prev;
+	      out->prev->next = out->next;
+	      out->next = last->next;
+	      out->prev = last;
+	      last->next = out;
+	      if (out->next)
+		out->next->prev = out;
+	    }
+	  out->redundant_out = true;
+	}
+    }
+}
+
 /* Called when encountering an explicit task directive.  If IF_CLAUSE is
    false, then we must not delay in executing the task.  If UNTIED is true,
    then the task may be executed by any member of the team.
@@ -248,123 +365,7 @@  GOMP_task (void (*fn) (void *), void *da
 	taskgroup->num_children++;
       if (depend_size)
 	{
-	  size_t ndepend = (uintptr_t) depend[0];
-	  size_t nout = (uintptr_t) depend[1];
-	  size_t i;
-	  hash_entry_type ent;
-
-	  task->depend_count = ndepend;
-	  task->num_dependees = 0;
-	  if (parent->depend_hash == NULL)
-	    parent->depend_hash
-	      = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
-	  for (i = 0; i < ndepend; i++)
-	    {
-	      task->depend[i].addr = depend[2 + i];
-	      task->depend[i].next = NULL;
-	      task->depend[i].prev = NULL;
-	      task->depend[i].task = task;
-	      task->depend[i].is_in = i >= nout;
-	      task->depend[i].redundant = false;
-	      task->depend[i].redundant_out = false;
-
-	      hash_entry_type *slot
-		= htab_find_slot (&parent->depend_hash, &task->depend[i],
-				  INSERT);
-	      hash_entry_type out = NULL, last = NULL;
-	      if (*slot)
-		{
-		  /* If multiple depends on the same task are the
-		     same, all but the first one are redundant.
-		     As inout/out come first, if any of them is
-		     inout/out, it will win, which is the right
-		     semantics.  */
-		  if ((*slot)->task == task)
-		    {
-		      task->depend[i].redundant = true;
-		      continue;
-		    }
-		  for (ent = *slot; ent; ent = ent->next)
-		    {
-		      if (ent->redundant_out)
-			break;
-
-		      last = ent;
-
-		      /* depend(in:...) doesn't depend on earlier
-			 depend(in:...).  */
-		      if (i >= nout && ent->is_in)
-			continue;
-
-		      if (!ent->is_in)
-			out = ent;
-
-		      struct gomp_task *tsk = ent->task;
-		      if (tsk->dependers == NULL)
-			{
-			  tsk->dependers
-			    = gomp_malloc (sizeof (struct gomp_dependers_vec)
-					   + 6 * sizeof (struct gomp_task *));
-			  tsk->dependers->n_elem = 1;
-			  tsk->dependers->allocated = 6;
-			  tsk->dependers->elem[0] = task;
-			  task->num_dependees++;
-			  continue;
-			}
-		      /* We already have some other dependency on tsk
-			 from earlier depend clause.  */
-		      else if (tsk->dependers->n_elem
-			       && (tsk->dependers->elem[tsk->dependers->n_elem
-							- 1]
-				   == task))
-			continue;
-		      else if (tsk->dependers->n_elem
-			       == tsk->dependers->allocated)
-			{
-			  tsk->dependers->allocated
-			    = tsk->dependers->allocated * 2 + 2;
-			  tsk->dependers
-			    = gomp_realloc (tsk->dependers,
-					    sizeof (struct gomp_dependers_vec)
-					    + (tsk->dependers->allocated
-					       * sizeof (struct gomp_task *)));
-			}
-		      tsk->dependers->elem[tsk->dependers->n_elem++] = task;
-		      task->num_dependees++;
-		    }
-		  task->depend[i].next = *slot;
-		  (*slot)->prev = &task->depend[i];
-		}
-	      *slot = &task->depend[i];
-
-	      /* There is no need to store more than one depend({,in}out:)
-		 task per address in the hash table chain for the purpose
-		 of creation of deferred tasks, because each out
-		 depends on all earlier outs, thus it is enough to record
-		 just the last depend({,in}out:).  For depend(in:), we need
-		 to keep all of the previous ones not terminated yet, because
-		 a later depend({,in}out:) might need to depend on all of
-		 them.  So, if the new task's clause is depend({,in}out:),
-		 we know there is at most one other depend({,in}out:) clause
-		 in the list (out).  For non-deferred tasks we want to see
-		 all outs, so they are moved to the end of the chain,
-		 after first redundant_out entry all following entries
-		 should be redundant_out.  */
-	      if (!task->depend[i].is_in && out)
-		{
-		  if (out != last)
-		    {
-		      out->next->prev = out->prev;
-		      out->prev->next = out->next;
-		      out->next = last->next;
-		      out->prev = last;
-		      last->next = out;
-		      if (out->next)
-			out->next->prev = out;
-		    }
-		  out->redundant_out = true;
-		}
-	    }
+	  gomp_task_handle_depend (task, parent, depend);
 	  if (task->num_dependees)
 	    {
 	      gomp_mutex_unlock (&team->task_lock);
@@ -444,6 +445,128 @@  ialias (GOMP_taskgroup_end)
 #undef UTYPE
 #undef GOMP_taskloop
 
+/* Called for nowait target tasks.  */
+
+void
+gomp_create_target_task (struct gomp_device_descr *devicep,
+			 void (*fn) (void *), size_t mapnum, void **hostaddrs,
+			 size_t *sizes, unsigned short *kinds,
+			 unsigned int flags, void **depend)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_team *team = thr->ts.team;
+
+  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
+  if (team
+      && (gomp_team_barrier_cancelled (&team->barrier)
+	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
+    return;
+
+  struct gomp_target_task *ttask;
+  struct gomp_task *task;
+  struct gomp_task *parent = thr->task;
+  struct gomp_taskgroup *taskgroup = parent->taskgroup;
+  bool do_wake;
+  size_t depend_size = 0;
+
+  if (depend != NULL)
+    depend_size = ((uintptr_t) depend[0]
+		   * sizeof (struct gomp_task_depend_entry));
+  task = gomp_malloc (sizeof (*task) + depend_size
+		      + sizeof (*ttask)
+		      + mapnum * (sizeof (void *) + sizeof (size_t)
+				  + sizeof (unsigned short)));
+  gomp_init_task (task, parent, gomp_icv (false));
+  task->kind = GOMP_TASK_WAITING;
+  task->in_tied_task = parent->in_tied_task;
+  task->taskgroup = taskgroup;
+  ttask = (struct gomp_target_task *) &task->depend[(uintptr_t) depend[0]];
+  ttask->devicep = devicep;
+  ttask->fn = fn;
+  ttask->mapnum = mapnum;
+  memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
+  ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
+  memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
+  ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
+  memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
+  ttask->flags = flags;
+  task->fn = gomp_target_task_fn;
+  task->fn_data = ttask;
+  task->final_task = 0;
+  gomp_mutex_lock (&team->task_lock);
+  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
+  if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier)
+			|| (taskgroup && taskgroup->cancelled), 0))
+    {
+      gomp_mutex_unlock (&team->task_lock);
+      gomp_finish_task (task);
+      free (task);
+      return;
+    }
+  if (taskgroup)
+    taskgroup->num_children++;
+  if (depend_size)
+    {
+      gomp_task_handle_depend (task, parent, depend);
+      if (task->num_dependees)
+	{
+	  gomp_mutex_unlock (&team->task_lock);
+	  return;
+	}
+    }
+  if (parent->children)
+    {
+      task->next_child = parent->children;
+      task->prev_child = parent->children->prev_child;
+      task->next_child->prev_child = task;
+      task->prev_child->next_child = task;
+    }
+  else
+    {
+      task->next_child = task;
+      task->prev_child = task;
+    }
+  parent->children = task;
+  if (taskgroup)
+    {
+      /* If applicable, place task into its taskgroup.  */
+      if (taskgroup->children)
+	{
+	  task->next_taskgroup = taskgroup->children;
+	  task->prev_taskgroup = taskgroup->children->prev_taskgroup;
+	  task->next_taskgroup->prev_taskgroup = task;
+	  task->prev_taskgroup->next_taskgroup = task;
+	}
+      else
+	{
+	  task->next_taskgroup = task;
+	  task->prev_taskgroup = task;
+	}
+      taskgroup->children = task;
+    }
+  if (team->task_queue)
+    {
+      task->next_queue = team->task_queue;
+      task->prev_queue = team->task_queue->prev_queue;
+      task->next_queue->prev_queue = task;
+      task->prev_queue->next_queue = task;
+    }
+  else
+    {
+      task->next_queue = task;
+      task->prev_queue = task;
+      team->task_queue = task;
+    }
+  ++team->task_count;
+  ++team->task_queued_count;
+  gomp_team_barrier_set_task_pending (&team->barrier);
+  do_wake = team->task_running_count + !parent->in_tied_task
+	    < team->nthreads;
+  gomp_mutex_unlock (&team->task_lock);
+  if (do_wake)
+    gomp_team_barrier_wake (&team->barrier, 1);
+}
+
 static inline bool
 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
 		   struct gomp_taskgroup *taskgroup, struct gomp_team *team)
--- libgomp/libgomp.h.jj	2015-09-03 16:33:42.000000000 +0200
+++ libgomp/libgomp.h	2015-09-08 10:18:44.164897688 +0200
@@ -374,6 +374,17 @@  struct gomp_taskgroup
   size_t num_children;
 };
 
+struct gomp_target_task
+{
+  struct gomp_device_descr *devicep;
+  void (*fn) (void *);
+  size_t mapnum;
+  size_t *sizes;
+  unsigned short *kinds;
+  unsigned int flags;
+  void *hostaddrs[];
+};
+
 /* This structure describes a "team" of threads.  These are the threads
    that are spawned by a PARALLEL constructs, as well as the work sharing
    constructs that the team encounters.  */
@@ -653,6 +664,10 @@  extern void gomp_init_task (struct gomp_
 extern void gomp_end_task (void);
 extern void gomp_barrier_handle_tasks (gomp_barrier_state_t);
 extern void gomp_task_maybe_wait_for_dependencies (void **);
+extern void gomp_create_target_task (struct gomp_device_descr *,
+				     void (*) (void *), size_t, void **,
+				     size_t *, unsigned short *, unsigned int,
+				     void **);
 
 static void inline
 gomp_finish_task (struct gomp_task *task)
@@ -673,6 +688,7 @@  extern void gomp_free_thread (void *);
 
 extern void gomp_init_targets_once (void);
 extern int gomp_get_num_devices (void);
+extern void gomp_target_task_fn (void *);
 
 typedef struct splay_tree_node_s *splay_tree_node;
 typedef struct splay_tree_s *splay_tree;
--- libgomp/testsuite/libgomp.c/target-27.c.jj	2015-09-07 17:56:17.503966807 +0200
+++ libgomp/testsuite/libgomp.c/target-27.c	2015-09-07 18:46:23.000000000 +0200
@@ -0,0 +1,67 @@ 
+#include <stdlib.h>
+#include <unistd.h>
+
+int
+main ()
+{
+  int x = 0, y = 0, z = 0, err;
+  int shared_mem = 0;
+  #pragma omp target map(to: shared_mem)
+  shared_mem = 1;
+  #pragma omp parallel
+  #pragma omp single
+  {
+    #pragma omp task depend(in: x)
+    {
+      usleep (5000);
+      x = 1;
+    }
+    #pragma omp task depend(in: x)
+    {
+      usleep (6000);
+      y = 2;
+    }
+    #pragma omp task depend(out: z)
+    {
+      usleep (7000);
+      z = 3;
+    }
+    #pragma omp target enter data map(to: x, y, z) depend(inout: x, z) nowait
+    #pragma omp task depend(inout: x, z)
+    {
+      x++; y++; z++;
+    }
+    #pragma omp target update to(x, y) depend(inout: x) nowait
+    #pragma omp target enter data map(always, to: z) depend(inout: z) nowait
+    #pragma omp target map (alloc: x, y, z) map (from: err) depend(inout: x, z)
+    {
+      err = x != 2 || y != 3 || z != 4;
+      x = 5; y = 6; z = 7;
+    }
+    #pragma omp task depend(in: x)
+    {
+      usleep (5000);
+      if (!shared_mem)
+	x = 1;
+    }
+    #pragma omp task depend(in: x)
+    {
+      usleep (6000);
+      if (!shared_mem)
+	y = 2;
+    }
+    #pragma omp task depend(out: z)
+    {
+      usleep (3000);
+      if (!shared_mem)
+	z = 3;
+    }
+    #pragma omp target exit data map(release: z) depend(inout: z) nowait
+    #pragma omp target exit data map(from: x, y) depend(inout: x) nowait
+    #pragma omp target exit data map(from: z) depend(inout: z) nowait
+    #pragma omp taskwait
+    if (err || x != 5 || y != 6 || z != 7)
+      abort ();
+  }
+  return 0;
+}