[gomp5] Fix task reduction handling in implicit parallel regions

Message ID 20181008175843.GK11625@tucnak
State New
Headers show
Series
  • [gomp5] Fix task reduction handling in implicit parallel regions
Related show

Commit Message

Jakub Jelinek Oct. 8, 2018, 5:58 p.m.
Hi!

In implicit parallel regions, we have NULL teams and often NULL task.
For task reductions we need both non-NULL, so this patch creates such a team
in that case, like we do already for target nowait.

Tested on x86_64-linux, committed to gomp-5_0-branch.

2018-10-08  Jakub Jelinek  <jakub@redhat.com>

	* task.c (GOMP_taskgroup_reduction_register): If team is NULL, create
	implicit team with 1 thread and call GOMP_taskgroup_start again.  Don't
	mix declarations with statements.
	* team.c (gomp_team_end): Determine nesting by thr->ts.level != 0
	rather than thr->ts.team != NULL.
	* testsuite/libgomp.c-c++-common/task-reduction-4.c: New test.



	Jakub

Patch

--- libgomp/task.c.jj	2018-10-08 12:20:53.712125100 +0200
+++ libgomp/task.c	2018-10-08 18:29:51.410292170 +0200
@@ -1968,11 +1968,45 @@  GOMP_taskgroup_reduction_register (uintp
 {
   struct gomp_thread *thr = gomp_thread ();
   struct gomp_team *team = thr->ts.team;
-  struct gomp_task *task = thr->task;
-  unsigned nthreads = team ? team->nthreads : 1;
+  struct gomp_task *task;
+  if (__builtin_expect (team == NULL, 0))
+    {
+      /* The task reduction code needs a team and task, so for
+	 orphaned taskgroups just create the implicit team.  */
+      struct gomp_task_icv *icv;
+      team = gomp_new_team (1);
+      task = thr->task;
+      icv = task ? &task->icv : &gomp_global_icv;
+      team->prev_ts = thr->ts;
+      thr->ts.team = team;
+      thr->ts.team_id = 0;
+      thr->ts.work_share = &team->work_shares[0];
+      thr->ts.last_work_share = NULL;
+#ifdef HAVE_SYNC_BUILTINS
+      thr->ts.single_count = 0;
+#endif
+      thr->ts.static_trip = 0;
+      thr->task = &team->implicit_task[0];
+      gomp_init_task (thr->task, NULL, icv);
+      if (task)
+	{
+	  thr->task = task;
+	  gomp_end_task ();
+	  free (task);
+	  thr->task = &team->implicit_task[0];
+	}
+#ifdef LIBGOMP_USE_PTHREADS
+      else
+	pthread_setspecific (gomp_thread_destructor, thr);
+#endif
+      GOMP_taskgroup_start ();
+    }
+  unsigned nthreads = team->nthreads;
   size_t total_cnt = 0;
-  uintptr_t *d = data;
-  uintptr_t *old = task->taskgroup->reductions;
+  uintptr_t *d = data, *old;
+  struct htab *old_htab = NULL, *new_htab;
+  task = thr->task;
+  old = task->taskgroup->reductions;
   do
     {
       size_t sz = d[1] * nthreads;
@@ -1992,13 +2026,12 @@  GOMP_taskgroup_reduction_register (uintp
 	d = (uintptr_t *) d[4];
     }
   while (1);
-  struct htab *old_htab = NULL;
   if (old && old[5])
     {
       old_htab = (struct htab *) old[5];
       total_cnt += htab_elements (old_htab);
     }
-  struct htab *new_htab = htab_create (total_cnt);
+  new_htab = htab_create (total_cnt);
   if (old_htab)
     {
       /* Copy old hash table, like in htab_expand.  */
--- libgomp/team.c.jj	2018-07-27 12:57:16.000000000 +0200
+++ libgomp/team.c	2018-10-08 19:05:58.135130888 +0200
@@ -945,7 +945,7 @@  gomp_team_end (void)
   gomp_end_task ();
   thr->ts = team->prev_ts;
 
-  if (__builtin_expect (thr->ts.team != NULL, 0))
+  if (__builtin_expect (thr->ts.level != 0, 0))
     {
 #ifdef HAVE_SYNC_BUILTINS
       __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
--- libgomp/testsuite/libgomp.c-c++-common/task-reduction-4.c.jj	2018-10-08 18:35:52.181268647 +0200
+++ libgomp/testsuite/libgomp.c-c++-common/task-reduction-4.c	2018-10-08 18:35:52.181268647 +0200
@@ -0,0 +1,70 @@ 
+extern
+#ifdef __cplusplus
+"C"
+#endif
+void abort (void);
+
+void
+bar (long long int *p)
+{
+  p[0] *= 2;
+  #pragma omp task in_reduction (*: p[0])
+  p[0] *= 3;
+}
+
+void
+foo (long long int *p, long long int *q)
+{
+  #pragma omp taskgroup task_reduction (*: p[0])
+  {
+    #pragma omp task in_reduction (*: p[0])
+    bar (p);
+    #pragma omp task in_reduction (*: p[0])
+    bar (p);
+    bar (p);
+    #pragma omp taskgroup task_reduction (*: q[0])
+    {
+      #pragma omp task in_reduction (*: q[0])
+      bar (q);
+      #pragma omp task in_reduction (*: q[0])
+      bar (q);
+      #pragma omp task in_reduction (*: q[0])
+      bar (q);
+      bar (q);
+      #pragma omp task in_reduction (*: p[0])
+      {
+	#pragma omp taskgroup task_reduction (*: p[0])
+	{
+	  #pragma omp task in_reduction (*: p[0])
+	  bar (p);
+	  p[0] *= 2;
+	  #pragma omp task in_reduction (*: p[0])
+	  bar (p);
+	}
+      }
+    }
+  }
+}
+
+int
+main ()
+{
+  long long int p = 1LL, q = 1LL;
+  foo (&p, &q);
+  if (p != 6LL * 6LL * 6LL * 6LL * 6LL * 2LL || q != 6LL * 6LL * 6LL * 6LL)
+    abort ();
+  p = 1LL;
+  q = 1LL;
+  #pragma omp taskgroup
+  foo (&p, &q);
+  if (p != 6LL * 6LL * 6LL * 6LL * 6LL * 2LL || q != 6LL * 6LL * 6LL * 6LL)
+    abort ();
+  p = 1LL;
+  q = 1LL;
+  #pragma omp parallel
+  #pragma omp single
+  foo (&p, &q);
+  if (p != 6LL * 6LL * 6LL * 6LL * 6LL * 2LL || q != 6LL * 6LL * 6LL * 6LL)
+    abort ();
+  return 0;
+}