Message ID | 811aff39780f8d997421dcda8c6afaf01ea1ee6a.1629208174.git.fweimer@redhat.com |
---|---|
State | New |
Headers | show |
Series | Fix ESRCH issues in pthread_cancel, pthread_kill | expand |
On 17/08/2021 10:51, Florian Weimer via Libc-alpha wrote: > Thread exit is delayed until all pending pthread_kill operations > on the thread have completed. This avoids sending signals to the > wrong thread or a spurious ESRCH error. > > The test sysdeps/pthread/tst-pthread_cancel-select-loop.c is derived > from a downstream test originally written by Marek Polacek. And I really hope we could first sort out the BZ#19951 and move away of using pthread::tid as the synchronization member to check thread termination. I sent a possible fix [1], but it does not fully handle the pthread::tid access, it would require to a proper lock to synchronize between thread exit and pthread_kill (as Rich has suggested on BZ#12889). I will need to send an updated version of my pthread fixes patchset, since clone3 changes broke it and INVALID_TD_P needs fixing as well. [1] https://patchwork.sourceware.org/project/glibc/patch/20210610193639.3650754-5-adhemerval.zanella@linaro.org/ > --- > nptl/allocatestack.c | 1 + > nptl/descr.h | 12 ++ > nptl/pthread_create.c | 20 ++++ > nptl/pthread_kill.c | 23 +++- > support/Makefile | 3 +- > sysdeps/pthread/Makefile | 2 + > sysdeps/pthread/tst-kill4.c | 90 -------------- > .../pthread/tst-pthread_cancel-select-loop.c | 87 ++++++++++++++ > sysdeps/pthread/tst-pthread_kill-exiting.c | 110 ++++++++++++++++++ > 9 files changed, 254 insertions(+), 94 deletions(-) > delete mode 100644 sysdeps/pthread/tst-kill4.c > create mode 100644 sysdeps/pthread/tst-pthread_cancel-select-loop.c > create mode 100644 sysdeps/pthread/tst-pthread_kill-exiting.c > > diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c > index cfe37a3443..344f7f8ec5 100644 > --- a/nptl/allocatestack.c > +++ b/nptl/allocatestack.c > @@ -127,6 +127,7 @@ get_cached_stack (size_t *sizep, void **memp) > /* No pending event. */ > result->nextevent = NULL; > > + result->exit_signal_futex = 0; > result->tls_state = (struct tls_internal_t) { 0 }; > > /* Clear the DTV. */ > diff --git a/nptl/descr.h b/nptl/descr.h > index c85778d449..7255d1dc77 100644 > --- a/nptl/descr.h > +++ b/nptl/descr.h > @@ -396,6 +396,18 @@ struct pthread > PTHREAD_CANCEL_ASYNCHRONOUS). */ > unsigned char canceltype; > > + /* Futex to prevent thread exit during concurrent pthread_kill. The > + least-significant bit is an exit flag. The remaining bits count > + the number of pthread_kill operations in progress. > + > + Upon exit, the thread bit sets the LSB, and waits until the futex > + variable is 1. > + > + pthread_kill atomically adds 2 to the futex variable before the > + signal-sending operation, subtracts 2 after it, and skips sending > + the signal if the least-significant bit is set. */ > + unsigned int exit_signal_futex; > + > /* Used on strsignal. */ > struct tls_internal_t tls_state; > > diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c > index d8ec299cb1..7e1a3cfcdd 100644 > --- a/nptl/pthread_create.c > +++ b/nptl/pthread_create.c > @@ -37,6 +37,7 @@ > #include <sys/single_threaded.h> > #include <version.h> > #include <clone_internal.h> > +#include <futex-internal.h> > > #include <shlib-compat.h> > > @@ -485,6 +486,25 @@ start_thread (void *arg) > /* This was the last thread. */ > exit (0); > > + /* Blocking signals is required to make pthread_kill > + async-signal-safe. It prevents sending a signal from this thread > + to itself during its final stages. This must come after the exit > + call above because atexit handlers must not run with signals > + blocked. */ > + __libc_signal_block_all (NULL); > + > + /* Delay exiting this thread until there are no concurrent > + pthread_kill operations in progress. */ > + { > + /* Indicate that no further signals should be sent. */ > + atomic_fetch_or_release (&pd->exit_signal_futex, 1); > + > + /* Wait until there are no pthread_kill operations left. */ > + unsigned int val; > + while ((val = atomic_load_acquire (&pd->exit_signal_futex)) != 1) > + futex_wait (&pd->exit_signal_futex, val, FUTEX_PRIVATE); > + } > + > #ifndef __ASSUME_SET_ROBUST_LIST > /* If this thread has any robust mutexes locked, handle them now. */ > # if __PTHREAD_MUTEX_HAVE_PREV > diff --git a/nptl/pthread_kill.c b/nptl/pthread_kill.c > index 5d4c86f920..b67bd4b769 100644 > --- a/nptl/pthread_kill.c > +++ b/nptl/pthread_kill.c > @@ -18,6 +18,7 @@ > > #include <unistd.h> > #include <pthreadP.h> > +#include <futex-internal.h> > #include <shlib-compat.h> > > int > @@ -41,9 +42,25 @@ __pthread_kill_internal (pthread_t threadid, int signo) > { > pid_t pid = __getpid (); > > - val = INTERNAL_SYSCALL_CALL (tgkill, pid, tid, signo); > - val = (INTERNAL_SYSCALL_ERROR_P (val) > - ? INTERNAL_SYSCALL_ERRNO (val) : 0); > + if (atomic_fetch_add_acq_rel (&pd->exit_signal_futex, 2) & 1) > + /* If the thread is exiting, it has already blocked all > + signals, so sending the signal does not have any effect. > + Not sending the signal avoids a spurious ESRCH from tgkill > + if the thread manages to complete its exit before the > + signal can be sent. */ > + val = 0; > + else > + { > + val = INTERNAL_SYSCALL_CALL (tgkill, pid, tid, signo); > + val = (INTERNAL_SYSCALL_ERROR_P (val) > + ? INTERNAL_SYSCALL_ERRNO (val) : 0); > + } > + > + /* A value 3 before the update indicates that the thread is > + exiting, and this is the last remaining concurrent > + pthread_kill operation. */ > + if (atomic_fetch_add_acq_rel (&pd->exit_signal_futex, -2) == 3) > + futex_wake (&pd->exit_signal_futex, 1, FUTEX_PRIVATE); > } > else > /* The kernel reports that the thread has exited. POSIX specifies > diff --git a/support/Makefile b/support/Makefile > index a462781718..ef2b1a980a 100644 > --- a/support/Makefile > +++ b/support/Makefile > @@ -82,9 +82,10 @@ libsupport-routines = \ > support_test_compare_blob \ > support_test_compare_failure \ > support_test_compare_string \ > - support_write_file_string \ > support_test_main \ > support_test_verify_impl \ > + support_wait_for_thread_exit \ > + support_write_file_string \ > temp_file \ > timespec \ > timespec-time64 \ > diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile > index dedfa0d290..48dba717a1 100644 > --- a/sysdeps/pthread/Makefile > +++ b/sysdeps/pthread/Makefile > @@ -119,7 +119,9 @@ tests += tst-cnd-basic tst-mtx-trylock tst-cnd-broadcast \ > tst-unwind-thread \ > tst-pt-vfork1 tst-pt-vfork2 tst-vfork1x tst-vfork2x \ > tst-pthread_cancel-exited \ > + tst-pthread_cancel-select-loop \ > tst-pthread_kill-exited \ > + tst-pthread_kill-exiting \ > # tests > > tests-time64 := \ > diff --git a/sysdeps/pthread/tst-kill4.c b/sysdeps/pthread/tst-kill4.c > deleted file mode 100644 > index 9563939792..0000000000 > --- a/sysdeps/pthread/tst-kill4.c > +++ /dev/null > @@ -1,90 +0,0 @@ > -/* Copyright (C) 2003-2021 Free Software Foundation, Inc. > - This file is part of the GNU C Library. > - Contributed by Ulrich Drepper <drepper@redhat.com>, 2003. > - > - The GNU C Library is free software; you can redistribute it and/or > - modify it under the terms of the GNU Lesser General Public > - License as published by the Free Software Foundation; either > - version 2.1 of the License, or (at your option) any later version. > - > - The GNU C Library is distributed in the hope that it will be useful, > - but WITHOUT ANY WARRANTY; without even the implied warranty of > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - Lesser General Public License for more details. > - > - You should have received a copy of the GNU Lesser General Public > - License along with the GNU C Library; if not, see > - <https://www.gnu.org/licenses/>. */ > - > -#include <errno.h> > -#include <pthread.h> > -#include <signal.h> > -#include <stdio.h> > -#include <stdlib.h> > -#include <unistd.h> > - > - > -static void * > -tf (void *a) > -{ > - return NULL; > -} > - > - > -int > -do_test (void) > -{ > - pthread_attr_t at; > - if (pthread_attr_init (&at) != 0) > - { > - puts ("attr_create failed"); > - exit (1); > - } > - > - /* Limit thread stack size, because if it is too large, pthread_join > - will free it immediately rather than put it into stack cache. */ > - if (pthread_attr_setstacksize (&at, 2 * 1024 * 1024) != 0) > - { > - puts ("setstacksize failed"); > - exit (1); > - } > - > - pthread_t th; > - if (pthread_create (&th, &at, tf, NULL) != 0) > - { > - puts ("create failed"); > - exit (1); > - } > - > - pthread_attr_destroy (&at); > - > - if (pthread_join (th, NULL) != 0) > - { > - puts ("join failed"); > - exit (1); > - } > - > - /* The following only works because we assume here something about > - the implementation. Namely, that the memory allocated for the > - thread descriptor is not going away, that the TID field is > - cleared and therefore the signal is sent to process 0, and that > - we can savely assume there is no other process with this ID at > - that time. */ > - int e = pthread_kill (th, 0); > - if (e == 0) > - { > - puts ("pthread_kill succeeded"); > - exit (1); > - } > - if (e != ESRCH) > - { > - puts ("pthread_kill didn't return ESRCH"); > - exit (1); > - } > - > - return 0; > -} > - > - > -#define TEST_FUNCTION do_test () > -#include "../test-skeleton.c" > diff --git a/sysdeps/pthread/tst-pthread_cancel-select-loop.c b/sysdeps/pthread/tst-pthread_cancel-select-loop.c > new file mode 100644 > index 0000000000..a62087589c > --- /dev/null > +++ b/sysdeps/pthread/tst-pthread_cancel-select-loop.c > @@ -0,0 +1,87 @@ > +/* Test that pthread_cancel succeeds during thread exit. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +/* This test tries to trigger an internal race condition in > + pthread_cancel, where the cancellation signal is sent after the > + thread has begun the cancellation process. This can result in a > + spurious ESRCH error. For the original bug 12889, the window is > + quite small, so the bug was not reproduced in every run. */ > + > +#include <stdbool.h> > +#include <stddef.h> > +#include <support/check.h> > +#include <support/xthread.h> > +#include <support/xunistd.h> > +#include <sys/select.h> > +#include <unistd.h> > + > +/* Set to true by timeout_thread_function when the test should > + terminate. */ > +static bool timeout; > + > +static void * > +timeout_thread_function (void *unused) > +{ > + usleep (5 * 1000 * 1000); > + __atomic_store_n (&timeout, true, __ATOMIC_RELAXED); > + return NULL; > +} > + > +/* Used for blocking the select function below. */ > +static int pipe_fds[2]; > + > +static void * > +canceled_thread_function (void *unused) > +{ > + while (true) > + { > + fd_set rfs; > + fd_set wfs; > + fd_set efs; > + FD_ZERO (&rfs); > + FD_ZERO (&wfs); > + FD_ZERO (&efs); > + FD_SET (pipe_fds[0], &rfs); > + > + /* If the cancellation request is recognized early, the thread > + begins exiting while the cancellation signal arrives. */ > + select (FD_SETSIZE, &rfs, &wfs, &efs, NULL); > + } > + return NULL; > +} > + > +static int > +do_test (void) > +{ > + xpipe (pipe_fds); > + pthread_t thr_timeout = xpthread_create (NULL, timeout_thread_function, NULL); > + > + while (!__atomic_load_n (&timeout, __ATOMIC_RELAXED)) > + { > + pthread_t thr = xpthread_create (NULL, canceled_thread_function, NULL); > + xpthread_cancel (thr); > + TEST_VERIFY (xpthread_join (thr) == PTHREAD_CANCELED); > + } > + > + xpthread_join (thr_timeout); > + xclose (pipe_fds[0]); > + xclose (pipe_fds[1]); > + return 0; > +} > + > +#include <support/test-driver.c> > diff --git a/sysdeps/pthread/tst-pthread_kill-exiting.c b/sysdeps/pthread/tst-pthread_kill-exiting.c > new file mode 100644 > index 0000000000..15550d2310 > --- /dev/null > +++ b/sysdeps/pthread/tst-pthread_kill-exiting.c > @@ -0,0 +1,110 @@ > +/* Test that pthread_kill succeeds during thread exit. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +/* This test verifies that pthread_kill for a thread that is exiting > + succeeds (with or without actually delivering the signal). */ > + > +#include <array_length.h> > +#include <stdbool.h> > +#include <stddef.h> > +#include <support/xsignal.h> > +#include <support/xthread.h> > +#include <unistd.h> > + > +/* Set to true by timeout_thread_function when the test should > + terminate. */ > +static bool timeout; > + > +static void * > +timeout_thread_function (void *unused) > +{ > + usleep (1000 * 1000); > + __atomic_store_n (&timeout, true, __ATOMIC_RELAXED); > + return NULL; > +} > + > +/* Used to synchronize the sending threads with the target thread and > + main thread. */ > +static pthread_barrier_t barrier_1; > +static pthread_barrier_t barrier_2; > + > +/* The target thread to which signals are to be sent. */ > +static pthread_t target_thread; > + > +static void * > +sender_thread_function (void *unused) > +{ > + while (!__atomic_load_n (&timeout, __ATOMIC_RELAXED)) > + { > + /* Wait until target_thread has been initialized. The target > + thread and main thread participate in this barrier. */ > + xpthread_barrier_wait (&barrier_1); > + > + xpthread_kill (target_thread, SIGUSR1); > + > + /* Communicate that the signal has been sent. The main thread > + participates in this barrier. */ > + xpthread_barrier_wait (&barrier_2); > + } > + return NULL; > +} > + > +static void * > +target_thread_function (void *unused) > +{ > + target_thread = pthread_self (); > + xpthread_barrier_wait (&barrier_1); > + return NULL; > +} > + > +static int > +do_test (void) > +{ > + xsignal (SIGUSR1, SIG_IGN); > + > + pthread_t thr_timeout = xpthread_create (NULL, timeout_thread_function, NULL); > + > + pthread_t threads[4]; > + xpthread_barrier_init (&barrier_1, NULL, array_length (threads) + 2); > + xpthread_barrier_init (&barrier_2, NULL, array_length (threads) + 1); > + > + for (int i = 0; i < array_length (threads); ++i) > + threads[i] = xpthread_create (NULL, sender_thread_function, NULL); > + > + while (!__atomic_load_n (&timeout, __ATOMIC_RELAXED)) > + { > + xpthread_create (NULL, target_thread_function, NULL); > + > + /* Wait for the target thread to be set up and signal sending to > + start. */ > + xpthread_barrier_wait (&barrier_1); > + > + /* Wait for signal sending to complete. */ > + xpthread_barrier_wait (&barrier_2); > + > + xpthread_join (target_thread); > + } > + > + for (int i = 0; i < array_length (threads); ++i) > + xpthread_join (threads[i]); > + xpthread_join (thr_timeout); > + > + return 0; > +} > + > +#include <support/test-driver.c> >
* Adhemerval Zanella via Libc-alpha: > On 17/08/2021 10:51, Florian Weimer via Libc-alpha wrote: >> Thread exit is delayed until all pending pthread_kill operations >> on the thread have completed. This avoids sending signals to the >> wrong thread or a spurious ESRCH error. >> >> The test sysdeps/pthread/tst-pthread_cancel-select-loop.c is derived >> from a downstream test originally written by Marek Polacek. > > And I really hope we could first sort out the BZ#19951 and move away > of using pthread::tid as the synchronization member to check thread > termination. I sent a possible fix [1], but it does not fully handle > the pthread::tid access, it would require to a proper lock to > synchronize between thread exit and pthread_kill (as Rich has suggested > on BZ#12889). > > I will need to send an updated version of my pthread fixes patchset, > since clone3 changes broke it and INVALID_TD_P needs fixing as well. > > [1] https://patchwork.sourceware.org/project/glibc/patch/20210610193639.3650754-5-adhemerval.zanella@linaro.org/ I think my patch is independent of the other fixes. Its advantage over other proposals is that it does not add waiting to pthread_kill, so there is no signal-safety issue. (But I probably should test if cancellation from a signal handler that is invoked synchronously via pthread_kill still works.) Thanks, Florian
On 19/08/2021 13:37, Florian Weimer wrote: > * Adhemerval Zanella via Libc-alpha: > >> On 17/08/2021 10:51, Florian Weimer via Libc-alpha wrote: >>> Thread exit is delayed until all pending pthread_kill operations >>> on the thread have completed. This avoids sending signals to the >>> wrong thread or a spurious ESRCH error. >>> >>> The test sysdeps/pthread/tst-pthread_cancel-select-loop.c is derived >>> from a downstream test originally written by Marek Polacek. >> >> And I really hope we could first sort out the BZ#19951 and move away >> of using pthread::tid as the synchronization member to check thread >> termination. I sent a possible fix [1], but it does not fully handle >> the pthread::tid access, it would require to a proper lock to >> synchronize between thread exit and pthread_kill (as Rich has suggested >> on BZ#12889). >> >> I will need to send an updated version of my pthread fixes patchset, >> since clone3 changes broke it and INVALID_TD_P needs fixing as well. >> >> [1] https://patchwork.sourceware.org/project/glibc/patch/20210610193639.3650754-5-adhemerval.zanella@linaro.org/ > > I think my patch is independent of the other fixes. Its advantage over > other proposals is that it does not add waiting to pthread_kill, so > there is no signal-safety issue. I am not sure, the way I think we should fix to also fix BZ#19951 would to add a lock to access 'tid' and uses not only on pthread_kill, but also on pthread_getcpuclockid, pthread_getschedparam, pthread_setschedparam, and pthread_setschedprio. The lock also simplifies the code, there is no need to special handling of futex internal state. I also think pthread_kill would also be simplified. The lock is just to simplify the code, if we really need to add some scalability we might use a more clever code as you are proposing. > > (But I probably should test if cancellation from a signal handler that > is invoked synchronously via pthread_kill still works.) > > Thanks, > Florian >
* Adhemerval Zanella: > On 19/08/2021 13:37, Florian Weimer wrote: >> * Adhemerval Zanella via Libc-alpha: >> >>> On 17/08/2021 10:51, Florian Weimer via Libc-alpha wrote: >>>> Thread exit is delayed until all pending pthread_kill operations >>>> on the thread have completed. This avoids sending signals to the >>>> wrong thread or a spurious ESRCH error. >>>> >>>> The test sysdeps/pthread/tst-pthread_cancel-select-loop.c is derived >>>> from a downstream test originally written by Marek Polacek. >>> >>> And I really hope we could first sort out the BZ#19951 and move away >>> of using pthread::tid as the synchronization member to check thread >>> termination. I sent a possible fix [1], but it does not fully handle >>> the pthread::tid access, it would require to a proper lock to >>> synchronize between thread exit and pthread_kill (as Rich has suggested >>> on BZ#12889). >>> >>> I will need to send an updated version of my pthread fixes patchset, >>> since clone3 changes broke it and INVALID_TD_P needs fixing as well. >>> >>> [1] https://patchwork.sourceware.org/project/glibc/patch/20210610193639.3650754-5-adhemerval.zanella@linaro.org/ >> >> I think my patch is independent of the other fixes. Its advantage over >> other proposals is that it does not add waiting to pthread_kill, so >> there is no signal-safety issue. > > I am not sure, the way I think we should fix to also fix BZ#19951 > would to add a lock to access 'tid' and uses not only on pthread_kill, > but also on pthread_getcpuclockid, pthread_getschedparam, > pthread_setschedparam, and pthread_setschedprio. The lock also > simplifies the code, there is no need to special handling of futex > internal state. I also think pthread_kill would also be simplified. > > The lock is just to simplify the code, if we really need to add > some scalability we might use a more clever code as you are proposing. I don't think we can wait for the fix for bug 19951 because glibc 2.34 has a real regression in pthread_cancel: it can fail spuriously with ESRCH. And it can happen with a single pthread_cancel call in the program, you don't even need concurrent pthread_cancel to trigger this bug now. I have moved the magic no-TID-reuse functionality into a separate function in the new version, so we can replace it once we have something better. Thanks, Florian
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c index cfe37a3443..344f7f8ec5 100644 --- a/nptl/allocatestack.c +++ b/nptl/allocatestack.c @@ -127,6 +127,7 @@ get_cached_stack (size_t *sizep, void **memp) /* No pending event. */ result->nextevent = NULL; + result->exit_signal_futex = 0; result->tls_state = (struct tls_internal_t) { 0 }; /* Clear the DTV. */ diff --git a/nptl/descr.h b/nptl/descr.h index c85778d449..7255d1dc77 100644 --- a/nptl/descr.h +++ b/nptl/descr.h @@ -396,6 +396,18 @@ struct pthread PTHREAD_CANCEL_ASYNCHRONOUS). */ unsigned char canceltype; + /* Futex to prevent thread exit during concurrent pthread_kill. The + least-significant bit is an exit flag. The remaining bits count + the number of pthread_kill operations in progress. + + Upon exit, the thread bit sets the LSB, and waits until the futex + variable is 1. + + pthread_kill atomically adds 2 to the futex variable before the + signal-sending operation, subtracts 2 after it, and skips sending + the signal if the least-significant bit is set. */ + unsigned int exit_signal_futex; + /* Used on strsignal. */ struct tls_internal_t tls_state; diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c index d8ec299cb1..7e1a3cfcdd 100644 --- a/nptl/pthread_create.c +++ b/nptl/pthread_create.c @@ -37,6 +37,7 @@ #include <sys/single_threaded.h> #include <version.h> #include <clone_internal.h> +#include <futex-internal.h> #include <shlib-compat.h> @@ -485,6 +486,25 @@ start_thread (void *arg) /* This was the last thread. */ exit (0); + /* Blocking signals is required to make pthread_kill + async-signal-safe. It prevents sending a signal from this thread + to itself during its final stages. This must come after the exit + call above because atexit handlers must not run with signals + blocked. */ + __libc_signal_block_all (NULL); + + /* Delay exiting this thread until there are no concurrent + pthread_kill operations in progress. */ + { + /* Indicate that no further signals should be sent. */ + atomic_fetch_or_release (&pd->exit_signal_futex, 1); + + /* Wait until there are no pthread_kill operations left. */ + unsigned int val; + while ((val = atomic_load_acquire (&pd->exit_signal_futex)) != 1) + futex_wait (&pd->exit_signal_futex, val, FUTEX_PRIVATE); + } + #ifndef __ASSUME_SET_ROBUST_LIST /* If this thread has any robust mutexes locked, handle them now. */ # if __PTHREAD_MUTEX_HAVE_PREV diff --git a/nptl/pthread_kill.c b/nptl/pthread_kill.c index 5d4c86f920..b67bd4b769 100644 --- a/nptl/pthread_kill.c +++ b/nptl/pthread_kill.c @@ -18,6 +18,7 @@ #include <unistd.h> #include <pthreadP.h> +#include <futex-internal.h> #include <shlib-compat.h> int @@ -41,9 +42,25 @@ __pthread_kill_internal (pthread_t threadid, int signo) { pid_t pid = __getpid (); - val = INTERNAL_SYSCALL_CALL (tgkill, pid, tid, signo); - val = (INTERNAL_SYSCALL_ERROR_P (val) - ? INTERNAL_SYSCALL_ERRNO (val) : 0); + if (atomic_fetch_add_acq_rel (&pd->exit_signal_futex, 2) & 1) + /* If the thread is exiting, it has already blocked all + signals, so sending the signal does not have any effect. + Not sending the signal avoids a spurious ESRCH from tgkill + if the thread manages to complete its exit before the + signal can be sent. */ + val = 0; + else + { + val = INTERNAL_SYSCALL_CALL (tgkill, pid, tid, signo); + val = (INTERNAL_SYSCALL_ERROR_P (val) + ? INTERNAL_SYSCALL_ERRNO (val) : 0); + } + + /* A value 3 before the update indicates that the thread is + exiting, and this is the last remaining concurrent + pthread_kill operation. */ + if (atomic_fetch_add_acq_rel (&pd->exit_signal_futex, -2) == 3) + futex_wake (&pd->exit_signal_futex, 1, FUTEX_PRIVATE); } else /* The kernel reports that the thread has exited. POSIX specifies diff --git a/support/Makefile b/support/Makefile index a462781718..ef2b1a980a 100644 --- a/support/Makefile +++ b/support/Makefile @@ -82,9 +82,10 @@ libsupport-routines = \ support_test_compare_blob \ support_test_compare_failure \ support_test_compare_string \ - support_write_file_string \ support_test_main \ support_test_verify_impl \ + support_wait_for_thread_exit \ + support_write_file_string \ temp_file \ timespec \ timespec-time64 \ diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile index dedfa0d290..48dba717a1 100644 --- a/sysdeps/pthread/Makefile +++ b/sysdeps/pthread/Makefile @@ -119,7 +119,9 @@ tests += tst-cnd-basic tst-mtx-trylock tst-cnd-broadcast \ tst-unwind-thread \ tst-pt-vfork1 tst-pt-vfork2 tst-vfork1x tst-vfork2x \ tst-pthread_cancel-exited \ + tst-pthread_cancel-select-loop \ tst-pthread_kill-exited \ + tst-pthread_kill-exiting \ # tests tests-time64 := \ diff --git a/sysdeps/pthread/tst-kill4.c b/sysdeps/pthread/tst-kill4.c deleted file mode 100644 index 9563939792..0000000000 --- a/sysdeps/pthread/tst-kill4.c +++ /dev/null @@ -1,90 +0,0 @@ -/* Copyright (C) 2003-2021 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@redhat.com>, 2003. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#include <errno.h> -#include <pthread.h> -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> - - -static void * -tf (void *a) -{ - return NULL; -} - - -int -do_test (void) -{ - pthread_attr_t at; - if (pthread_attr_init (&at) != 0) - { - puts ("attr_create failed"); - exit (1); - } - - /* Limit thread stack size, because if it is too large, pthread_join - will free it immediately rather than put it into stack cache. */ - if (pthread_attr_setstacksize (&at, 2 * 1024 * 1024) != 0) - { - puts ("setstacksize failed"); - exit (1); - } - - pthread_t th; - if (pthread_create (&th, &at, tf, NULL) != 0) - { - puts ("create failed"); - exit (1); - } - - pthread_attr_destroy (&at); - - if (pthread_join (th, NULL) != 0) - { - puts ("join failed"); - exit (1); - } - - /* The following only works because we assume here something about - the implementation. Namely, that the memory allocated for the - thread descriptor is not going away, that the TID field is - cleared and therefore the signal is sent to process 0, and that - we can savely assume there is no other process with this ID at - that time. */ - int e = pthread_kill (th, 0); - if (e == 0) - { - puts ("pthread_kill succeeded"); - exit (1); - } - if (e != ESRCH) - { - puts ("pthread_kill didn't return ESRCH"); - exit (1); - } - - return 0; -} - - -#define TEST_FUNCTION do_test () -#include "../test-skeleton.c" diff --git a/sysdeps/pthread/tst-pthread_cancel-select-loop.c b/sysdeps/pthread/tst-pthread_cancel-select-loop.c new file mode 100644 index 0000000000..a62087589c --- /dev/null +++ b/sysdeps/pthread/tst-pthread_cancel-select-loop.c @@ -0,0 +1,87 @@ +/* Test that pthread_cancel succeeds during thread exit. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +/* This test tries to trigger an internal race condition in + pthread_cancel, where the cancellation signal is sent after the + thread has begun the cancellation process. This can result in a + spurious ESRCH error. For the original bug 12889, the window is + quite small, so the bug was not reproduced in every run. */ + +#include <stdbool.h> +#include <stddef.h> +#include <support/check.h> +#include <support/xthread.h> +#include <support/xunistd.h> +#include <sys/select.h> +#include <unistd.h> + +/* Set to true by timeout_thread_function when the test should + terminate. */ +static bool timeout; + +static void * +timeout_thread_function (void *unused) +{ + usleep (5 * 1000 * 1000); + __atomic_store_n (&timeout, true, __ATOMIC_RELAXED); + return NULL; +} + +/* Used for blocking the select function below. */ +static int pipe_fds[2]; + +static void * +canceled_thread_function (void *unused) +{ + while (true) + { + fd_set rfs; + fd_set wfs; + fd_set efs; + FD_ZERO (&rfs); + FD_ZERO (&wfs); + FD_ZERO (&efs); + FD_SET (pipe_fds[0], &rfs); + + /* If the cancellation request is recognized early, the thread + begins exiting while the cancellation signal arrives. */ + select (FD_SETSIZE, &rfs, &wfs, &efs, NULL); + } + return NULL; +} + +static int +do_test (void) +{ + xpipe (pipe_fds); + pthread_t thr_timeout = xpthread_create (NULL, timeout_thread_function, NULL); + + while (!__atomic_load_n (&timeout, __ATOMIC_RELAXED)) + { + pthread_t thr = xpthread_create (NULL, canceled_thread_function, NULL); + xpthread_cancel (thr); + TEST_VERIFY (xpthread_join (thr) == PTHREAD_CANCELED); + } + + xpthread_join (thr_timeout); + xclose (pipe_fds[0]); + xclose (pipe_fds[1]); + return 0; +} + +#include <support/test-driver.c> diff --git a/sysdeps/pthread/tst-pthread_kill-exiting.c b/sysdeps/pthread/tst-pthread_kill-exiting.c new file mode 100644 index 0000000000..15550d2310 --- /dev/null +++ b/sysdeps/pthread/tst-pthread_kill-exiting.c @@ -0,0 +1,110 @@ +/* Test that pthread_kill succeeds during thread exit. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +/* This test verifies that pthread_kill for a thread that is exiting + succeeds (with or without actually delivering the signal). */ + +#include <array_length.h> +#include <stdbool.h> +#include <stddef.h> +#include <support/xsignal.h> +#include <support/xthread.h> +#include <unistd.h> + +/* Set to true by timeout_thread_function when the test should + terminate. */ +static bool timeout; + +static void * +timeout_thread_function (void *unused) +{ + usleep (1000 * 1000); + __atomic_store_n (&timeout, true, __ATOMIC_RELAXED); + return NULL; +} + +/* Used to synchronize the sending threads with the target thread and + main thread. */ +static pthread_barrier_t barrier_1; +static pthread_barrier_t barrier_2; + +/* The target thread to which signals are to be sent. */ +static pthread_t target_thread; + +static void * +sender_thread_function (void *unused) +{ + while (!__atomic_load_n (&timeout, __ATOMIC_RELAXED)) + { + /* Wait until target_thread has been initialized. The target + thread and main thread participate in this barrier. */ + xpthread_barrier_wait (&barrier_1); + + xpthread_kill (target_thread, SIGUSR1); + + /* Communicate that the signal has been sent. The main thread + participates in this barrier. */ + xpthread_barrier_wait (&barrier_2); + } + return NULL; +} + +static void * +target_thread_function (void *unused) +{ + target_thread = pthread_self (); + xpthread_barrier_wait (&barrier_1); + return NULL; +} + +static int +do_test (void) +{ + xsignal (SIGUSR1, SIG_IGN); + + pthread_t thr_timeout = xpthread_create (NULL, timeout_thread_function, NULL); + + pthread_t threads[4]; + xpthread_barrier_init (&barrier_1, NULL, array_length (threads) + 2); + xpthread_barrier_init (&barrier_2, NULL, array_length (threads) + 1); + + for (int i = 0; i < array_length (threads); ++i) + threads[i] = xpthread_create (NULL, sender_thread_function, NULL); + + while (!__atomic_load_n (&timeout, __ATOMIC_RELAXED)) + { + xpthread_create (NULL, target_thread_function, NULL); + + /* Wait for the target thread to be set up and signal sending to + start. */ + xpthread_barrier_wait (&barrier_1); + + /* Wait for signal sending to complete. */ + xpthread_barrier_wait (&barrier_2); + + xpthread_join (target_thread); + } + + for (int i = 0; i < array_length (threads); ++i) + xpthread_join (threads[i]); + xpthread_join (thr_timeout); + + return 0; +} + +#include <support/test-driver.c>