diff mbox

Ping x 5: [PATCH] [libgomp] make it possible to use OMP on both sides of a fork

Message ID CAPJVwBk1oBPVKWa=gtaSBvGXfsFAiMJfr2+=ZSF70EGakE++hg@mail.gmail.com
State New
Headers show

Commit Message

Nathaniel Smith May 14, 2014, 2:47 p.m. UTC
Hi all,

Pinging again about the patch below. The lack of this patch is
essentially a blocker to using gcc+python+openmp together, which is a
shame, since python is increasingly important in numerical computing,
openmp is pretty useful, and gcc is the only openmp implementation
that does not support this functionality.

-n

On Tue, Apr 15, 2014 at 1:19 PM, Nathaniel Smith <njs@pobox.com> wrote:
> On Tue, Mar 4, 2014 at 11:37 PM, Nathaniel Smith <njs@pobox.com> wrote:
>> On Tue, Feb 18, 2014 at 8:58 PM, Richard Henderson <rth@redhat.com> wrote:
>>> On 02/16/2014 03:59 PM, Nathaniel Smith wrote:
>>>> Yes, but the problem is that depending on what the user intends to do
>>>> after forking, our pthread_atfork handler might help or it might hurt,
>>>> and we don't know which. Consider these two cases:
>>>>   - fork+exec
>>>>   - fork+continue to use OMP in child
>>>> The former case is totally POSIX-legal, even when performed at
>>>> arbitrary places, even when another thread is, say, in the middle of
>>>> calling malloc().
>>>
>>> Point well taken.
>>
>> Hi all,
>>
>> I guess this patch has gotten all the feedback that it's getting. Any
>> interest in committing it? :-) I don't have commit access.
>>
>> 2014-02-12  Nathaniel J. Smith  <njs@pobox.com>
>>
>>         * team.c (gomp_free_pool_helper): Move per-thread cleanup to main
>>         thread.
>>         (gomp_free_thread): Delegate implementation to...
>>         (gomp_free_thread_pool): ...this new function. Like old
>>         gomp_free_thread, but does per-thread cleanup, and has option to
>>         skip everything that involves interacting with actual threads,
>>         which is useful when called after fork.
>>         (gomp_after_fork_callback): New function.
>>         (gomp_team_start): Register atfork handler, and check for fork on
>>         entry.
>
> Pinging this again now that trunk has re-opened. For compliant code
> this patch has essentially no impact (OMP-using code acquires a
> single-line post-fork callback which sets a flag; everything else
> works the same as now). For technically non-compliant "mostly serial"
> code that uses OMP in some places, and forks children in other places,
> it makes a best effort attempt to clean up the thread pool detritus
> left by a fork, instead of simply deadlocking as currently, so as to
> allow children to use OMP as well. This makes GOMP match the behaviour
> of all other OMP implementations I'm aware of.
>
> Previous discussion:
>   http://gcc.gnu.org/ml/gcc-patches/2014-02/msg00813.html
> Bug:
>    http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60035
>
> I don't have a commit bit -- please commit if acceptable.
>
> Cheers,
> -n
>
> --
> Nathaniel J. Smith
> Postdoctoral researcher - Informatics - University of Edinburgh
> http://vorpus.org
diff mbox

Patch

Index: team.c
===================================================================
--- team.c	(revision 207398)
+++ team.c	(working copy)
@@ -28,6 +28,7 @@ 
 #include "libgomp.h"
 #include <stdlib.h>
 #include <string.h>
+#include <stdbool.h>
 
 /* This attribute contains PTHREAD_CREATE_DETACHED.  */
 pthread_attr_t gomp_thread_attr;
@@ -43,6 +44,8 @@  __thread struct gomp_thread gomp_tls_data;
 pthread_key_t gomp_tls_key;
 #endif
 
+/* This is to enable best-effort cleanup after fork.  */
+static bool gomp_we_are_forked;
 
 /* This structure is used to communicate across pthread_create.  */
 
@@ -204,42 +207,41 @@  static struct gomp_thread_pool *gomp_new_thread_po
   return pool;
 }
 
+/* Free a thread pool and release its threads. */
+
 static void
 gomp_free_pool_helper (void *thread_pool)
 {
-  struct gomp_thread *thr = gomp_thread ();
   struct gomp_thread_pool *pool
     = (struct gomp_thread_pool *) thread_pool;
   gomp_barrier_wait_last (&pool->threads_dock);
-  gomp_sem_destroy (&thr->release);
-  thr->thread_pool = NULL;
-  thr->task = NULL;
   pthread_exit (NULL);
 }
 
-/* Free a thread pool and release its threads. */
-
-void
-gomp_free_thread (void *arg __attribute__((unused)))
+static void
+gomp_free_thread_pool (bool threads_are_running)
 {
   struct gomp_thread *thr = gomp_thread ();
   struct gomp_thread_pool *pool = thr->thread_pool;
   if (pool)
     {
+      int i;
       if (pool->threads_used > 0)
 	{
-	  int i;
-	  for (i = 1; i < pool->threads_used; i++)
+	  if (threads_are_running)
 	    {
-	      struct gomp_thread *nthr = pool->threads[i];
-	      nthr->fn = gomp_free_pool_helper;
-	      nthr->data = pool;
+	      for (i = 1; i < pool->threads_used; i++)
+		{
+		  struct gomp_thread *nthr = pool->threads[i];
+		  nthr->fn = gomp_free_pool_helper;
+		  nthr->data = pool;
+		}
+	      /* This barrier undocks threads docked on pool->threads_dock.  */
+	      gomp_barrier_wait (&pool->threads_dock);
+	      /* And this waits till all threads have called
+		 gomp_barrier_wait_last in gomp_free_pool_helper.  */
+	      gomp_barrier_wait (&pool->threads_dock);
 	    }
-	  /* This barrier undocks threads docked on pool->threads_dock.  */
-	  gomp_barrier_wait (&pool->threads_dock);
-	  /* And this waits till all threads have called gomp_barrier_wait_last
-	     in gomp_free_pool_helper.  */
-	  gomp_barrier_wait (&pool->threads_dock);
 	  /* Now it is safe to destroy the barrier and free the pool.  */
 	  gomp_barrier_destroy (&pool->threads_dock);
 
@@ -251,6 +253,14 @@  gomp_free_pool_helper (void *thread_pool)
 	  gomp_managed_threads -= pool->threads_used - 1L;
 	  gomp_mutex_unlock (&gomp_managed_threads_lock);
 #endif
+	  /* Clean up thread objects */
+	  for (i = 1; i < pool->threads_used; i++)
+	    {
+	      struct gomp_thread *nthr = pool->threads[i];
+	      gomp_sem_destroy (&nthr->release);
+	      nthr->thread_pool = NULL;
+	      nthr->task = NULL;
+	    }
 	}
       free (pool->threads);
       if (pool->last_team)
@@ -266,6 +276,58 @@  gomp_free_pool_helper (void *thread_pool)
     }
 }
 
+/* This is called whenever a thread exits which has a non-NULL value for
+   gomp_thread_destructor. In practice, the only thread for which this occurs
+   is the one which created the thread pool.
+*/
+void
+gomp_free_thread (void *arg __attribute__((unused)))
+{
+  gomp_free_thread_pool (true);
+}
+
+/* This is called in the child process after a fork.
+
+   According to POSIX, if a process which uses threads calls fork(), then
+   there are very few things that the resulting child process can do safely --
+   mostly just exec().
+
+   However, in practice, (almost?) all POSIX implementations seem to allow
+   arbitrary code to run inside the child, *if* the parent process's threads
+   are in a well-defined state when the fork occurs. And this circumstance can
+   easily arise in OMP-using programs, e.g. when a library function like DGEMM
+   uses OMP internally, and some other unrelated part of the program calls
+   fork() at some other time, when no OMP sections are running.
+
+   Therefore, we make a best effort attempt to handle the case:
+
+     OMP section (in parent) -> quiesce -> fork -> OMP section (in child)
+
+   "Best-effort" here means that:
+   - Your system may or may not be able to handle this kind of code at all;
+     our goal is just to make sure that if it fails it's not gomp's fault.
+   - All threadprivate variables will be reset in the child. Fortunately this
+     is entirely compliant with the spec, according to the rule of nasal
+     demons.
+   - We must have minimal speed impact, and no correctness impact, on
+     compliant programs.
+
+   We use this callback to notice when a fork has a occurred, and if the child
+   later attempts to enter an OMP section (via gomp_team_start), then we know
+   that it is non-compliant, and are free to apply our best-effort strategy of
+   cleaning up the old thread pool structures and spawning a new one. Because
+   compliant programs never call gomp_team_start after forking, they are
+   unaffected.
+*/
+static void
+gomp_after_fork_callback (void)
+{
+  /* Only "async-signal-safe operations" are allowed here, so let's keep it
+     simple. No mutex is needed, because we are currently single-threaded.
+  */
+  gomp_we_are_forked = 1;
+}
+
 /* Launch a team.  */
 
 void
@@ -288,11 +350,19 @@  gomp_team_start (void (*fn) (void *), void *data,
 
   thr = gomp_thread ();
   nested = thr->ts.team != NULL;
+  if (__builtin_expect (gomp_we_are_forked, 0))
+    {
+      gomp_free_thread_pool (0);
+      gomp_we_are_forked = 0;
+    }
   if (__builtin_expect (thr->thread_pool == NULL, 0))
     {
       thr->thread_pool = gomp_new_thread_pool ();
       thr->thread_pool->threads_busy = nthreads;
+      /* The pool should be cleaned up whenever this thread exits... */
       pthread_setspecific (gomp_thread_destructor, thr);
+      /* ...and also in any fork()ed children. */
+      pthread_atfork (NULL, NULL, gomp_after_fork_callback);
     }
   pool = thr->thread_pool;
   task = thr->task;
Index: testsuite/libgomp.c/fork-1.c
===================================================================
--- testsuite/libgomp.c/fork-1.c	(revision 0)
+++ testsuite/libgomp.c/fork-1.c	(working copy)
@@ -0,0 +1,77 @@ 
+/* { dg-do run } */
+/* { dg-timeout 10 } */
+
+#include <omp.h>
+#include <string.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <assert.h>
+
+static int saw[4];
+
+static void
+check_parallel (int exit_on_failure)
+{
+  memset (saw, 0, sizeof (saw));
+  #pragma omp parallel num_threads (2)
+  {
+    int iam = omp_get_thread_num ();
+    saw[iam] = 1;
+  }
+
+  // Encode failure in status code to report to parent process
+  if (exit_on_failure)
+    {
+      if (saw[0] != 1)
+        _exit(1);
+      else if (saw[1] != 1)
+        _exit(2);
+      else if (saw[2] != 0)
+        _exit(3);
+      else if (saw[3] != 0)
+        _exit(4);
+      else
+        _exit(0);
+  }
+  // Use regular assertions
+  else
+    {
+      assert (saw[0] == 1);
+      assert (saw[1] == 1);
+      assert (saw[2] == 0);
+      assert (saw[3] == 0);
+    }
+}
+
+int
+main ()
+{
+  // Initialize the OMP thread pool in the parent process
+  check_parallel (0);
+  pid_t fork_pid = fork();
+  if (fork_pid == -1)
+    return 1;
+  else if (fork_pid == 0)
+    {
+      // Call OMP again in the child process and encode failures in exit
+      // code.
+      check_parallel (1);
+    }
+  else
+    {
+      // Check that OMP runtime is still functional in parent process after
+      // the fork.
+      check_parallel (0);
+
+      // Wait for the child to finish and check the exit code.
+      int child_status = 0;
+      pid_t wait_pid = wait(&child_status);
+      assert (wait_pid == fork_pid);
+      assert (WEXITSTATUS (child_status) == 0);
+
+      // Check that the termination of the child process did not impact
+      // OMP in parent process.
+      check_parallel (0);
+    }
+  return 0;
+}