Message ID | 20201127143355.1844509-1-lamm@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | nptl: Fix __futex_clocklock64 return error check [BZ #26964] | expand |
On 27/11/2020 11:33, Lucas A. M. Magalhaes via Libc-alpha wrote: > The earlier implementation of this, __lll_clocklock, calls lll_clockwait > that doesn't return the futex syscall error codes. It always tries again > if that fails. > > However in the current implementation, when the futex returns EAGAIN, > __futex_clocklock64 will also return EGAIN, even if the futex is taken. > > This patch fixes the EAGAIN issue and also adds a check for EINTR. As > futex syscall can return EINTR if the thread is interrupted by a signal. > In this case I'm assuming the function should continue trying to lock as > there is no mention to about it on POSIX. Also add a test for both > scenarios. Thanks for working on this, I should have caught this on the "y2038: nptl: Convert pthread_mutex_{clock|timed}lock to support 64 bit" review. Some comments below. > --- > nptl/Makefile | 2 +- > nptl/tst-pthread-timedlock-lockloop.c | 136 ++++++++++++++++++++++++++ > sysdeps/nptl/futex-internal.h | 9 ++ > 3 files changed, 146 insertions(+), 1 deletion(-) > create mode 100644 nptl/tst-pthread-timedlock-lockloop.c > > diff --git a/nptl/Makefile b/nptl/Makefile > index a48426a396..91324e09f2 100644 > --- a/nptl/Makefile > +++ b/nptl/Makefile > @@ -298,7 +298,7 @@ tests = tst-attr2 tst-attr3 tst-default-attr \ > tst-thread-affinity-sched \ > tst-pthread-defaultattr-free \ > tst-pthread-attr-sigmask \ > - > + tst-pthread-timedlock-lockloop \ > > tests-container = tst-pthread-getattr > Ok, although the final '\' is not required. > diff --git a/nptl/tst-pthread-timedlock-lockloop.c b/nptl/tst-pthread-timedlock-lockloop.c > new file mode 100644 > index 0000000000..abcc5724cf > --- /dev/null > +++ b/nptl/tst-pthread-timedlock-lockloop.c > @@ -0,0 +1,136 @@ > +/* Make sure pthrea_mutex_timedlock doesn't return spurious error codes. > + > + Copyright (C) 2020 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > +#include <errno.h> > +#include <pthread.h> > +#include <signal.h> > +#include <stdio.h> > +#include <stdlib.h> > +#include <time.h> > + > +#define NANO_PER_SEC 1000000000LL > +#define TIMEOUT (NANO_PER_SEC / 1000LL) > +#define NUM_THREADS 50 > +#define RETEST_TIMES 100 > + > +struct worker_args > +{ > + pthread_mutex_t mtx; > + int runs; > +}; > + > +void > +signal_handler (int sig_num) > +{ > + if (sig_num != SIGUSR1) > + printf ("Unexpected signal"); > +} Use TEST_COMPARE, but I would just remove the check since printf is not really async-signal-safe. > + > +/* Call pthread_mutex_timedlock()/pthread_mutex_unlock() repetitively, hoping > + that one of them returns EAGAIN or EINTR unexpectedly. */ > +static void * > +worker (void *arg) > +{ > + pthread_mutex_t *mtx = &(((struct worker_args *) arg)->mtx); Move both the mutex and the iteration number to a global variable, it would simplify the code a lot. > + struct timespec abs_time; > + signal (SIGUSR1, signal_handler); This is done concurrent signal handler set, which I think is not what you meant. Move is to do_test function prior thread creation. > + > + for (unsigned run = 0; run < ((struct worker_args *) arg)->runs; run++) > + { > + clock_gettime (CLOCK_REALTIME, &abs_time); > + abs_time.tv_nsec += TIMEOUT; > + if (abs_time.tv_nsec >= NANO_PER_SEC) > + { > + abs_time.tv_sec++; > + abs_time.tv_nsec -= NANO_PER_SEC; > + } Use: struct timespec abs_time = timespec_add (xclock_now (CLOCK_REALTIME), make_timespec (0, 1000000)); > + > + int ret = pthread_mutex_timedlock (mtx, &abs_time); Check also pthread_mutex_clocklock against CLOCK_REALTIME and CLOCK_MONOTONIC (maybe by executing the same batch check with different worker threads). > + > + if (ret == 0) > + pthread_mutex_unlock (mtx); Use xpthread_mutex_unlock > + > + if (ret == EAGAIN || ret == EINTR) > + { > + printf ("Unexpected return %d\n", ret); > + return (void *) 1; > + } > + } Use TEST_VERIFY_EXIT (ret != EAGAIN && ret != EINTR); Or just: TEST_VERIFY_EXIT (ret == 0 || ret == ETIMEDOUT) This will bail early in the case of failure and simplify the error handling. > + return NULL; > +} > + > +static int > +do_test (void) > +{ > + pthread_t *workers = > + (pthread_t *) malloc (NUM_THREADS * sizeof (pthread_t)); Use xmalloc, but since NUM_THREADS is quite small and pthread_t is an a word size on all supported ABI just allocate on the stack. > + > + struct worker_args args; > + pthread_mutex_init (&(args.mtx), NULL); Move it to a global variable, it would simplify the code a lot. > + > + int *thread_ret = NULL, ret = 0; > + > + /* Run the checks to catch an EAGAIN. */ > + /* As there is no way to ensure the error condition, just run the test many > + times hoping to catch the error. */ > + args.runs = 100; > + for (int run = 0; run < RETEST_TIMES; run++) > + { > + for (int i = 0; i < NUM_THREADS; i++) > + { > + pthread_create (&workers[i], NULL, worker, (void *) &args); Use xpthread_create and remove the brackets. > + } > + for (int i = 0; i < NUM_THREADS; i++) > + { > + pthread_join (workers[i], (void **) &thread_ret); Use xpthread_join. > + if (thread_ret != NULL) > + ++ret; > + } > + if (ret != 0) > + goto err; There is no need to this check if you just bail out on a failure in the worker thread. > + } > + > + /* Run the test to check if we catch an EINTR. */ > + /* As there is no way to ensure the error condition, just run the test many > + times hoping to catch the error. */ > + pthread_t thread; > + args.runs = 1; > + for (int i = 0; i < RETEST_TIMES * 1000; i++) > + { > + if (pthread_mutex_lock (&(args.mtx)) != 0) > + { > + printf ("Mutex lock failed\n"); > + goto err; > + } > + pthread_create (&thread, NULL, worker, (void *) &args); > + pthread_kill (thread, SIGUSR1); This is most like won't interrupt the pthread mutex lock depending of the scheduling and the underlying hardware, but I don't have a better alternative than adding a small timeout before callling pthread_kill (which it is itself flaky as well). > + pthread_mutex_unlock (&(args.mtx)); > + pthread_join (thread, (void **) &thread_ret); The the pthread function from support/xthread.h. > + if (thread_ret != NULL) > + goto err; > + } > + > + free (workers); > + return 0; > + > +err: > + free (workers); > + return 1; > +} > + > +#include <support/test-driver.c> > diff --git a/sysdeps/nptl/futex-internal.h b/sysdeps/nptl/futex-internal.h > index e67803952f..f16d26d994 100644 > --- a/sysdeps/nptl/futex-internal.h > +++ b/sysdeps/nptl/futex-internal.h > @@ -424,10 +424,19 @@ __futex_clocklock64 (int *futex, clockid_t clockid, > { > while (atomic_exchange_acq (futex, 2) != 0) > { > + /* At this point we tried to get the futex but failed and set its > + value to 2. However the futex value can be changed by other > + thread before this calls the futex syscall. If so the syscall > + will return EAGAIN. */ > err = __futex_abstimed_wait64 ((unsigned int *) futex, 2, clockid, > abstime, private); > if (err == EINVAL || err == ETIMEDOUT || err == EOVERFLOW) > break; > + /* If EAGAIN or EINTR is returned here the error code should be reset > + as we will try again to acquire the futex and it may success. > + Otherwise the mutex will be locked and the return will not be 0. */ > + if (err == EAGAIN || err == EINTR) > + err = 0; > } > } > return err; > Indeed, it should be similar to what it is done on nptl/pthread_join_common.c. Maybe something more simple as below, it generates less code: static __always_inline int __futex_clocklock64 (int *futex, clockid_t clockid, const struct __timespec64 *abstime, int private) { if (__glibc_unlikely (atomic_compare_and_exchange_bool_acq (futex, 1, 0))) { while (atomic_exchange_acq (futex, 2) != 0) { int err = __futex_abstimed_wait64 ( (unsigned int *) futex, 2, clockid, abstime, private); if (err == EINVAL || err == ETIMEDOUT || err == EOVERFLOW) return err; } } return 0; } (I am not sure if EINVAL will even be returned in current __futex_clocklock64, but it should be ok to test it).
diff --git a/nptl/Makefile b/nptl/Makefile index a48426a396..91324e09f2 100644 --- a/nptl/Makefile +++ b/nptl/Makefile @@ -298,7 +298,7 @@ tests = tst-attr2 tst-attr3 tst-default-attr \ tst-thread-affinity-sched \ tst-pthread-defaultattr-free \ tst-pthread-attr-sigmask \ - + tst-pthread-timedlock-lockloop \ tests-container = tst-pthread-getattr diff --git a/nptl/tst-pthread-timedlock-lockloop.c b/nptl/tst-pthread-timedlock-lockloop.c new file mode 100644 index 0000000000..abcc5724cf --- /dev/null +++ b/nptl/tst-pthread-timedlock-lockloop.c @@ -0,0 +1,136 @@ +/* Make sure pthrea_mutex_timedlock doesn't return spurious error codes. + + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ +#include <errno.h> +#include <pthread.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> + +#define NANO_PER_SEC 1000000000LL +#define TIMEOUT (NANO_PER_SEC / 1000LL) +#define NUM_THREADS 50 +#define RETEST_TIMES 100 + +struct worker_args +{ + pthread_mutex_t mtx; + int runs; +}; + +void +signal_handler (int sig_num) +{ + if (sig_num != SIGUSR1) + printf ("Unexpected signal"); +} + +/* Call pthread_mutex_timedlock()/pthread_mutex_unlock() repetitively, hoping + that one of them returns EAGAIN or EINTR unexpectedly. */ +static void * +worker (void *arg) +{ + pthread_mutex_t *mtx = &(((struct worker_args *) arg)->mtx); + struct timespec abs_time; + signal (SIGUSR1, signal_handler); + + for (unsigned run = 0; run < ((struct worker_args *) arg)->runs; run++) + { + clock_gettime (CLOCK_REALTIME, &abs_time); + abs_time.tv_nsec += TIMEOUT; + if (abs_time.tv_nsec >= NANO_PER_SEC) + { + abs_time.tv_sec++; + abs_time.tv_nsec -= NANO_PER_SEC; + } + + int ret = pthread_mutex_timedlock (mtx, &abs_time); + + if (ret == 0) + pthread_mutex_unlock (mtx); + + if (ret == EAGAIN || ret == EINTR) + { + printf ("Unexpected return %d\n", ret); + return (void *) 1; + } + } + return NULL; +} + +static int +do_test (void) +{ + pthread_t *workers = + (pthread_t *) malloc (NUM_THREADS * sizeof (pthread_t)); + + struct worker_args args; + pthread_mutex_init (&(args.mtx), NULL); + + int *thread_ret = NULL, ret = 0; + + /* Run the checks to catch an EAGAIN. */ + /* As there is no way to ensure the error condition, just run the test many + times hoping to catch the error. */ + args.runs = 100; + for (int run = 0; run < RETEST_TIMES; run++) + { + for (int i = 0; i < NUM_THREADS; i++) + { + pthread_create (&workers[i], NULL, worker, (void *) &args); + } + for (int i = 0; i < NUM_THREADS; i++) + { + pthread_join (workers[i], (void **) &thread_ret); + if (thread_ret != NULL) + ++ret; + } + if (ret != 0) + goto err; + } + + /* Run the test to check if we catch an EINTR. */ + /* As there is no way to ensure the error condition, just run the test many + times hoping to catch the error. */ + pthread_t thread; + args.runs = 1; + for (int i = 0; i < RETEST_TIMES * 1000; i++) + { + if (pthread_mutex_lock (&(args.mtx)) != 0) + { + printf ("Mutex lock failed\n"); + goto err; + } + pthread_create (&thread, NULL, worker, (void *) &args); + pthread_kill (thread, SIGUSR1); + pthread_mutex_unlock (&(args.mtx)); + pthread_join (thread, (void **) &thread_ret); + if (thread_ret != NULL) + goto err; + } + + free (workers); + return 0; + +err: + free (workers); + return 1; +} + +#include <support/test-driver.c> diff --git a/sysdeps/nptl/futex-internal.h b/sysdeps/nptl/futex-internal.h index e67803952f..f16d26d994 100644 --- a/sysdeps/nptl/futex-internal.h +++ b/sysdeps/nptl/futex-internal.h @@ -424,10 +424,19 @@ __futex_clocklock64 (int *futex, clockid_t clockid, { while (atomic_exchange_acq (futex, 2) != 0) { + /* At this point we tried to get the futex but failed and set its + value to 2. However the futex value can be changed by other + thread before this calls the futex syscall. If so the syscall + will return EAGAIN. */ err = __futex_abstimed_wait64 ((unsigned int *) futex, 2, clockid, abstime, private); if (err == EINVAL || err == ETIMEDOUT || err == EOVERFLOW) break; + /* If EAGAIN or EINTR is returned here the error code should be reset + as we will try again to acquire the futex and it may success. + Otherwise the mutex will be locked and the return will not be 0. */ + if (err == EAGAIN || err == EINTR) + err = 0; } } return err;