Message ID | 87a6vlthqn.fsf@oldenburg2.str.redhat.com |
---|---|
State | New |
Headers | show |
Series | nptl: Move stack list variables into _rtld_global | expand |
On 13/11/2020 12:10, Florian Weimer via Libc-alpha wrote: > Now __thread_gscope_wait (the function behind THREAD_GSCOPE_WAIT, > formerly __wait_lookup_done) can be implemented directly in ld.so, > eliminating the unprotected GL (dl_wait_lookup_done) function > pointer. LGTM, thanks. Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> > > --- > csu/libc-tls.c | 7 ++ > elf/Makefile | 6 +- > elf/dl-support.c | 9 ++- > elf/dl-thread_gscope_wait.c | 2 + > elf/rtld.c | 12 +++ > nptl/allocatestack.c | 147 +++++++++-------------------------- > nptl/descr.h | 3 +- > nptl/nptl-init.c | 11 +-- > nptl/pthreadP.h | 4 - > nptl/pthread_create.c | 8 +- > nptl_db/structs.def | 4 +- > nptl_db/td_init.c | 14 ++++ > nptl_db/td_ta_map_lwp2thr.c | 2 +- > nptl_db/td_ta_thr_iter.c | 4 +- > nptl_db/td_thr_tlsbase.c | 7 +- > nptl_db/td_thr_validate.c | 39 +++++++++- > nptl_db/thread_dbP.h | 13 ++++ > sysdeps/aarch64/nptl/tls.h | 2 - > sysdeps/alpha/nptl/tls.h | 2 - > sysdeps/arc/nptl/tls.h | 2 - > sysdeps/arm/nptl/tls.h | 2 - > sysdeps/csky/nptl/tls.h | 2 - > sysdeps/generic/ldsodefs.h | 19 ++++- > sysdeps/hppa/nptl/tls.h | 2 - > sysdeps/i386/nptl/tls.h | 2 - > sysdeps/ia64/nptl/tls.h | 2 - > sysdeps/m68k/nptl/tls.h | 2 - > sysdeps/microblaze/nptl/tls.h | 2 - > sysdeps/mips/nptl/tls.h | 2 - > sysdeps/nios2/nptl/tls.h | 2 - > sysdeps/nptl/dl-thread_gscope_wait.c | 80 +++++++++++++++++++ > sysdeps/powerpc/nptl/tls.h | 2 - > sysdeps/riscv/nptl/tls.h | 2 - > sysdeps/s390/nptl/tls.h | 2 - > sysdeps/sh/nptl/tls.h | 2 - > sysdeps/sparc/nptl/tls.h | 2 - > sysdeps/x86_64/nptl/tls.h | 2 - > 37 files changed, 243 insertions(+), 184 deletions(-) > > diff --git a/csu/libc-tls.c b/csu/libc-tls.c > index 06e76bd395..c3589f0a7d 100644 > --- a/csu/libc-tls.c > +++ b/csu/libc-tls.c > @@ -24,6 +24,7 @@ > #include <stdio.h> > #include <sys/param.h> > #include <array_length.h> > +#include <list.h> > > #ifdef SHARED > #error makefile bug, this file is for static only > @@ -193,6 +194,12 @@ __libc_setup_tls (void) > if (__builtin_expect (lossage != NULL, 0)) > _startup_fatal (lossage); > > +#if THREAD_GSCOPE_IN_TCB > + INIT_LIST_HEAD (&_dl_stack_used); > + INIT_LIST_HEAD (&_dl_stack_user); > + list_add (&THREAD_SELF->list, &_dl_stack_user); > +#endif > + > /* Update the executable's link map with enough information to make > the TLS routines happy. */ > main_map->l_tls_align = align; Ok. > diff --git a/elf/Makefile b/elf/Makefile > index f10cc59e7c..2015383eb2 100644 > --- a/elf/Makefile > +++ b/elf/Makefile > @@ -34,7 +34,8 @@ dl-routines = $(addprefix dl-,load lookup object reloc deps \ > version profile tls origin scope \ > execstack open close trampoline \ > exception sort-maps lookup-direct \ > - call-libc-early-init write) > + call-libc-early-init write \ > + thread_gscope_wait) > ifeq (yes,$(use-ldconfig)) > dl-routines += dl-cache > endif Ok. > @@ -54,7 +55,8 @@ endif > all-dl-routines = $(dl-routines) $(sysdep-dl-routines) > # But they are absent from the shared libc, because that code is in ld.so. > elide-routines.os = $(all-dl-routines) dl-support enbl-secure dl-origin \ > - dl-sysdep dl-exception dl-reloc-static-pie > + dl-sysdep dl-exception dl-reloc-static-pie \ > + thread_gscope_wait > > # ld.so uses those routines, plus some special stuff for being the program > # interpreter and operating independent of libc. Ok. > diff --git a/elf/dl-support.c b/elf/dl-support.c > index afbc94df54..15e6d787c9 100644 > --- a/elf/dl-support.c > +++ b/elf/dl-support.c > @@ -183,10 +183,11 @@ ElfW(Word) _dl_stack_flags = DEFAULT_STACK_PERMS; > int (*_dl_make_stack_executable_hook) (void **) = _dl_make_stack_executable; > > > -/* Function in libpthread to wait for termination of lookups. */ > -void (*_dl_wait_lookup_done) (void); > - > -#if !THREAD_GSCOPE_IN_TCB > +#if THREAD_GSCOPE_IN_TCB > +list_t _dl_stack_used; > +list_t _dl_stack_user; > +int _dl_stack_cache_lock; > +#else > int _dl_thread_gscope_count; > #endif > struct dl_scope_free_list *_dl_scope_free_list; Ok. > diff --git a/elf/dl-thread_gscope_wait.c b/elf/dl-thread_gscope_wait.c > new file mode 100644 > index 0000000000..28e1494730 > --- /dev/null > +++ b/elf/dl-thread_gscope_wait.c > @@ -0,0 +1,2 @@ > +/* By default, the dynamic linker does not use an out-of-line > + __thread_gscope_wait function. */ Ok. > diff --git a/elf/rtld.c b/elf/rtld.c > index 5d117d0d2c..c4ffc8d4b7 100644 > --- a/elf/rtld.c > +++ b/elf/rtld.c > @@ -48,6 +48,7 @@ > #include <array_length.h> > #include <libc-early-init.h> > #include <dl-main.h> > +#include <list.h> > > #include <assert.h> > > @@ -799,6 +800,9 @@ cannot allocate TLS data structures for initial thread\n"); > const char *lossage = TLS_INIT_TP (tcbp); > if (__glibc_unlikely (lossage != NULL)) > _dl_fatal_printf ("cannot set up thread-local storage: %s\n", lossage); > +#if THREAD_GSCOPE_IN_TCB > + list_add (&THREAD_SELF->list, &GL (dl_stack_user)); > +#endif > tls_init_tp_called = true; > > return tcbp; Ok. > @@ -1139,6 +1143,11 @@ dl_main (const ElfW(Phdr) *phdr, > GL(dl_rtld_unlock_recursive) = rtld_lock_default_unlock_recursive; > #endif > > +#if THREAD_GSCOPE_IN_TCB > + INIT_LIST_HEAD (&GL (dl_stack_used)); > + INIT_LIST_HEAD (&GL (dl_stack_user)); > +#endif > + > /* The explicit initialization here is cheaper than processing the reloc > in the _rtld_local definition's initializer. */ > GL(dl_make_stack_executable_hook) = &_dl_make_stack_executable; Ok. > @@ -2383,6 +2392,9 @@ dl_main (const ElfW(Phdr) *phdr, > if (__glibc_unlikely (lossage != NULL)) > _dl_fatal_printf ("cannot set up thread-local storage: %s\n", > lossage); > +#if THREAD_GSCOPE_IN_TCB > + list_add (&THREAD_SELF->list, &GL (dl_stack_user)); > +#endif > } > > /* Make sure no new search directories have been added. */ Ok. > diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c > index 4b45f8c884..b7f9eeebf6 100644 > --- a/nptl/allocatestack.c > +++ b/nptl/allocatestack.c > @@ -106,26 +106,14 @@ > static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default. */ > static size_t stack_cache_actsize; > > -/* Mutex protecting this variable. */ > -static int stack_cache_lock = LLL_LOCK_INITIALIZER; > - > /* List of queued stack frames. */ > static LIST_HEAD (stack_cache); > > -/* List of the stacks in use. */ > -static LIST_HEAD (stack_used); > - > /* We need to record what list operations we are going to do so that, > in case of an asynchronous interruption due to a fork() call, we > can correct for the work. */ > static uintptr_t in_flight_stack; > > -/* List of the threads with user provided stacks in use. No need to > - initialize this, since it's done in __pthread_initialize_minimal. */ > -list_t __stack_user __attribute__ ((nocommon)); > -hidden_data_def (__stack_user) > - > - > /* Check whether the stack is still used or not. */ > #define FREE_P(descr) ((descr)->tid <= 0) > Ok. > @@ -173,7 +161,7 @@ get_cached_stack (size_t *sizep, void **memp) > struct pthread *result = NULL; > list_t *entry; > > - lll_lock (stack_cache_lock, LLL_PRIVATE); > + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); > > /* Search the cache for a matching entry. We search for the > smallest stack which has at least the required size. Note that Ok. > @@ -206,7 +194,7 @@ get_cached_stack (size_t *sizep, void **memp) > || __builtin_expect (result->stackblock_size > 4 * size, 0)) > { > /* Release the lock. */ > - lll_unlock (stack_cache_lock, LLL_PRIVATE); > + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); > > return NULL; > } Ok. > @@ -218,13 +206,13 @@ get_cached_stack (size_t *sizep, void **memp) > stack_list_del (&result->list); > > /* And add to the list of stacks in use. */ > - stack_list_add (&result->list, &stack_used); > + stack_list_add (&result->list, &GL (dl_stack_used)); > > /* And decrease the cache size. */ > stack_cache_actsize -= result->stackblock_size; > > /* Release the lock early. */ > - lll_unlock (stack_cache_lock, LLL_PRIVATE); > + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); > > /* Report size and location of the stack to the caller. */ > *sizep = result->stackblock_size; Ok. > @@ -510,12 +498,12 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, > > > /* Prepare to modify global data. */ > - lll_lock (stack_cache_lock, LLL_PRIVATE); > + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); > > /* And add to the list of stacks in use. */ > - list_add (&pd->list, &__stack_user); > + list_add (&pd->list, &GL (dl_stack_user)); > > - lll_unlock (stack_cache_lock, LLL_PRIVATE); > + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); > } > else > { Ok. > @@ -644,12 +632,12 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, > > > /* Prepare to modify global data. */ > - lll_lock (stack_cache_lock, LLL_PRIVATE); > + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); > > /* And add to the list of stacks in use. */ > - stack_list_add (&pd->list, &stack_used); > + stack_list_add (&pd->list, &GL (dl_stack_used)); > > - lll_unlock (stack_cache_lock, LLL_PRIVATE); > + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); > > > /* There might have been a race. Another thread might have Ok. > @@ -690,12 +678,12 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, > if (__mprotect (guard, guardsize, PROT_NONE) != 0) > { > mprot_error: > - lll_lock (stack_cache_lock, LLL_PRIVATE); > + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); > > /* Remove the thread from the list. */ > stack_list_del (&pd->list); > > - lll_unlock (stack_cache_lock, LLL_PRIVATE); > + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); > > /* Get rid of the TLS block we allocated. */ > _dl_deallocate_tls (TLS_TPADJ (pd), false); Ok. > @@ -799,7 +787,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, > void > __deallocate_stack (struct pthread *pd) > { > - lll_lock (stack_cache_lock, LLL_PRIVATE); > + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); > > /* Remove the thread from the list of threads with user defined > stacks. */ > @@ -815,7 +803,7 @@ __deallocate_stack (struct pthread *pd) > /* Free the memory associated with the ELF TLS. */ > _dl_deallocate_tls (TLS_TPADJ (pd), false); > > - lll_unlock (stack_cache_lock, LLL_PRIVATE); > + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); > } > > Ok. > @@ -831,10 +819,10 @@ __make_stacks_executable (void **stack_endp) > const size_t pagemask = ~(__getpagesize () - 1); > #endif > > - lll_lock (stack_cache_lock, LLL_PRIVATE); > + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); > > list_t *runp; > - list_for_each (runp, &stack_used) > + list_for_each (runp, &GL (dl_stack_used)) > { > err = change_stack_perm (list_entry (runp, struct pthread, list) > #ifdef NEED_SEPARATE_REGISTER_STACK Ok. > @@ -860,7 +848,7 @@ __make_stacks_executable (void **stack_endp) > break; > } > > - lll_unlock (stack_cache_lock, LLL_PRIVATE); > + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); > > return err; > } > @@ -891,8 +879,8 @@ __reclaim_stacks (void) > pointers at the head of the list are inconsistent. */ > list_t *l = NULL; > > - if (stack_used.next->prev != &stack_used) > - l = &stack_used; > + if (GL (dl_stack_used).next->prev != &GL (dl_stack_used)) > + l = &GL (dl_stack_used); > else if (stack_cache.next->prev != &stack_cache) > l = &stack_cache; > Ok. > @@ -914,7 +902,7 @@ __reclaim_stacks (void) > > /* Mark all stacks except the still running one as free. */ > list_t *runp; > - list_for_each (runp, &stack_used) > + list_for_each (runp, &GL (dl_stack_used)) > { > struct pthread *curp = list_entry (runp, struct pthread, list); > if (curp != self) Ok. > @@ -948,7 +936,7 @@ __reclaim_stacks (void) > } > > /* Add the stack of all running threads to the cache. */ > - list_splice (&stack_used, &stack_cache); > + list_splice (&GL (dl_stack_used), &stack_cache); > > /* Remove the entry for the current thread to from the cache list > and add it to the list of running threads. Which of the two Ok. > @@ -956,13 +944,13 @@ __reclaim_stacks (void) > stack_list_del (&self->list); > > /* Re-initialize the lists for all the threads. */ > - INIT_LIST_HEAD (&stack_used); > - INIT_LIST_HEAD (&__stack_user); > + INIT_LIST_HEAD (&GL (dl_stack_used)); > + INIT_LIST_HEAD (&GL (dl_stack_user)); > > if (__glibc_unlikely (THREAD_GETMEM (self, user_stack))) > - list_add (&self->list, &__stack_user); > + list_add (&self->list, &GL (dl_stack_user)); > else > - list_add (&self->list, &stack_used); > + list_add (&self->list, &GL (dl_stack_used)); > > /* There is one thread running. */ > __nptl_nthreads = 1; Ok. > @@ -970,7 +958,7 @@ __reclaim_stacks (void) > in_flight_stack = 0; > > /* Initialize locks. */ > - stack_cache_lock = LLL_LOCK_INITIALIZER; > + GL (dl_stack_cache_lock) = LLL_LOCK_INITIALIZER; > __default_pthread_attr_lock = LLL_LOCK_INITIALIZER; > } > Ok. > @@ -1083,7 +1071,7 @@ __nptl_setxid (struct xid_command *cmdp) > { > int signalled; > int result; > - lll_lock (stack_cache_lock, LLL_PRIVATE); > + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); > > __xidcmd = cmdp; > cmdp->cntr = 0; > @@ -1093,7 +1081,7 @@ __nptl_setxid (struct xid_command *cmdp) > > /* Iterate over the list with system-allocated threads first. */ > list_t *runp; > - list_for_each (runp, &stack_used) > + list_for_each (runp, &GL (dl_stack_used)) > { > struct pthread *t = list_entry (runp, struct pthread, list); > if (t == self) > @@ -1103,7 +1091,7 @@ __nptl_setxid (struct xid_command *cmdp) > } > > /* Now the list with threads using user-allocated stacks. */ > - list_for_each (runp, &__stack_user) > + list_for_each (runp, &GL (dl_stack_user)) > { > struct pthread *t = list_entry (runp, struct pthread, list); > if (t == self) > @@ -1119,7 +1107,7 @@ __nptl_setxid (struct xid_command *cmdp) > { > signalled = 0; > > - list_for_each (runp, &stack_used) > + list_for_each (runp, &GL (dl_stack_used)) > { > struct pthread *t = list_entry (runp, struct pthread, list); > if (t == self) > @@ -1128,7 +1116,7 @@ __nptl_setxid (struct xid_command *cmdp) > signalled += setxid_signal_thread (cmdp, t); > } > > - list_for_each (runp, &__stack_user) > + list_for_each (runp, &GL (dl_stack_user)) > { > struct pthread *t = list_entry (runp, struct pthread, list); > if (t == self) > @@ -1149,7 +1137,7 @@ __nptl_setxid (struct xid_command *cmdp) > > /* Clean up flags, so that no thread blocks during exit waiting > for a signal which will never come. */ > - list_for_each (runp, &stack_used) > + list_for_each (runp, &GL (dl_stack_used)) > { > struct pthread *t = list_entry (runp, struct pthread, list); > if (t == self) > @@ -1158,7 +1146,7 @@ __nptl_setxid (struct xid_command *cmdp) > setxid_unmark_thread (cmdp, t); > } > > - list_for_each (runp, &__stack_user) > + list_for_each (runp, &GL (dl_stack_user)) > { > struct pthread *t = list_entry (runp, struct pthread, list); > if (t == self) > @@ -1180,7 +1168,7 @@ __nptl_setxid (struct xid_command *cmdp) > } > __nptl_setxid_error (cmdp, error); > > - lll_unlock (stack_cache_lock, LLL_PRIVATE); > + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); > return result; > } > Ok. > @@ -1204,75 +1192,16 @@ void > attribute_hidden > __pthread_init_static_tls (struct link_map *map) > { > - lll_lock (stack_cache_lock, LLL_PRIVATE); > + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); > > /* Iterate over the list with system-allocated threads first. */ > list_t *runp; > - list_for_each (runp, &stack_used) > + list_for_each (runp, &GL (dl_stack_used)) > init_one_static_tls (list_entry (runp, struct pthread, list), map); > > /* Now the list with threads using user-allocated stacks. */ > - list_for_each (runp, &__stack_user) > + list_for_each (runp, &GL (dl_stack_user)) > init_one_static_tls (list_entry (runp, struct pthread, list), map); > Ok. > - lll_unlock (stack_cache_lock, LLL_PRIVATE); > -} > - > - > -void > -attribute_hidden > -__wait_lookup_done (void) > -{ > - lll_lock (stack_cache_lock, LLL_PRIVATE); > - > - struct pthread *self = THREAD_SELF; > - > - /* Iterate over the list with system-allocated threads first. */ > - list_t *runp; > - list_for_each (runp, &stack_used) > - { > - struct pthread *t = list_entry (runp, struct pthread, list); > - if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED) > - continue; > - > - int *const gscope_flagp = &t->header.gscope_flag; > - > - /* We have to wait until this thread is done with the global > - scope. First tell the thread that we are waiting and > - possibly have to be woken. */ > - if (atomic_compare_and_exchange_bool_acq (gscope_flagp, > - THREAD_GSCOPE_FLAG_WAIT, > - THREAD_GSCOPE_FLAG_USED)) > - continue; > - > - do > - futex_wait_simple ((unsigned int *) gscope_flagp, > - THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE); > - while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT); > - } > - > - /* Now the list with threads using user-allocated stacks. */ > - list_for_each (runp, &__stack_user) > - { > - struct pthread *t = list_entry (runp, struct pthread, list); > - if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED) > - continue; > - > - int *const gscope_flagp = &t->header.gscope_flag; > - > - /* We have to wait until this thread is done with the global > - scope. First tell the thread that we are waiting and > - possibly have to be woken. */ > - if (atomic_compare_and_exchange_bool_acq (gscope_flagp, > - THREAD_GSCOPE_FLAG_WAIT, > - THREAD_GSCOPE_FLAG_USED)) > - continue; > - > - do > - futex_wait_simple ((unsigned int *) gscope_flagp, > - THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE); > - while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT); > - } > - > - lll_unlock (stack_cache_lock, LLL_PRIVATE); > + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); > } Ok. > diff --git a/nptl/descr.h b/nptl/descr.h > index d8343ff9a1..b172ee408b 100644 > --- a/nptl/descr.h > +++ b/nptl/descr.h > @@ -162,7 +162,8 @@ struct pthread > void *__padding[24]; > }; > > - /* This descriptor's link on the `stack_used' or `__stack_user' list. */ > + /* This descriptor's link on the GL (dl_stack_used) or > + GL (dl_stack_user) list. */ > list_t list; > > /* Thread ID - which is also a 'is this thread descriptor (and Ok. > diff --git a/nptl/nptl-init.c b/nptl/nptl-init.c > index 4aa1231bec..53b817715d 100644 > --- a/nptl/nptl-init.c > +++ b/nptl/nptl-init.c > @@ -251,12 +251,9 @@ __pthread_initialize_minimal_internal (void) > purposes this is good enough. */ > THREAD_SETMEM (pd, stackblock_size, (size_t) __libc_stack_end); > > - /* Initialize the list of all running threads with the main thread. */ > - INIT_LIST_HEAD (&__stack_user); > - list_add (&pd->list, &__stack_user); > - > - /* Before initializing __stack_user, the debugger could not find us and > - had to set __nptl_initial_report_events. Propagate its setting. */ > + /* Before initializing GL (dl_stack_user), the debugger could not > + find us and had to set __nptl_initial_report_events. Propagate > + its setting. */ > THREAD_SETMEM (pd, report_events, __nptl_initial_report_events); > > struct sigaction sa; Ok. > @@ -336,8 +333,6 @@ __pthread_initialize_minimal_internal (void) > > GL(dl_init_static_tls) = &__pthread_init_static_tls; > > - GL(dl_wait_lookup_done) = &__wait_lookup_done; > - > /* Register the fork generation counter with the libc. */ Ok. > #ifndef TLS_MULTIPLE_THREADS_IN_TCB > __libc_multiple_threads_ptr = > diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h > index 686f54aa93..a7510f9f63 100644 > --- a/nptl/pthreadP.h > +++ b/nptl/pthreadP.h > @@ -208,10 +208,6 @@ extern void __default_pthread_attr_freeres (void) attribute_hidden; > extern size_t __static_tls_size attribute_hidden; > extern size_t __static_tls_align_m1 attribute_hidden; > > -/* Thread descriptor handling. */ > -extern list_t __stack_user; > -hidden_proto (__stack_user) > - > /* Attribute handling. */ > extern struct pthread_attr *__attr_list attribute_hidden; > extern int __attr_list_lock attribute_hidden; Ok. > diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c > index 447f005ece..bad4e57a84 100644 > --- a/nptl/pthread_create.c > +++ b/nptl/pthread_create.c > @@ -213,9 +213,9 @@ __find_in_stack_list (struct pthread *pd) > list_t *entry; > struct pthread *result = NULL; > > - lll_lock (stack_cache_lock, LLL_PRIVATE); > + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); > > - list_for_each (entry, &stack_used) > + list_for_each (entry, &GL (dl_stack_used)) > { > struct pthread *curp; > > @@ -228,7 +228,7 @@ __find_in_stack_list (struct pthread *pd) > } > > if (result == NULL) > - list_for_each (entry, &__stack_user) > + list_for_each (entry, &GL (dl_stack_user)) > { > struct pthread *curp; > > @@ -240,7 +240,7 @@ __find_in_stack_list (struct pthread *pd) > } > } > > - lll_unlock (stack_cache_lock, LLL_PRIVATE); > + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); > > return result; > } Ok. > diff --git a/nptl_db/structs.def b/nptl_db/structs.def > index 472e9008fa..2ca33598b1 100644 > --- a/nptl_db/structs.def > +++ b/nptl_db/structs.def > @@ -69,8 +69,6 @@ DB_STRUCT (td_eventbuf_t) > DB_STRUCT_FIELD (td_eventbuf_t, eventnum) > DB_STRUCT_FIELD (td_eventbuf_t, eventdata) > > -DB_SYMBOL (stack_used) > -DB_SYMBOL (__stack_user) > DB_SYMBOL (nptl_version) > DB_FUNCTION (__nptl_create_event) > DB_FUNCTION (__nptl_death_event) > @@ -106,6 +104,8 @@ DB_STRUCT (rtld_global) > DB_RTLD_VARIABLE (_rtld_global) > #endif > DB_RTLD_GLOBAL_FIELD (dl_tls_dtv_slotinfo_list) > +DB_RTLD_GLOBAL_FIELD (dl_stack_user) > +DB_RTLD_GLOBAL_FIELD (dl_stack_used) > > DB_STRUCT (dtv_slotinfo_list) > DB_STRUCT_FIELD (dtv_slotinfo_list, len) Ok. > diff --git a/nptl_db/td_init.c b/nptl_db/td_init.c > index 86773dcb06..79a1f90312 100644 > --- a/nptl_db/td_init.c > +++ b/nptl_db/td_init.c > @@ -29,3 +29,17 @@ td_init (void) > LOG ("td_init"); > return TD_OK; > } > + > +bool > +__td_ta_rtld_global (td_thragent_t *ta) > +{ > + if (ta->ta_addr__rtld_global == 0 > + && td_mod_lookup (ta->ph, LD_SO, SYM__rtld_global, > + &ta->ta_addr__rtld_global) != PS_OK) > + { > + ta->ta_addr__rtld_global = (void*)-1; > + return false; > + } > + else > + return ta->ta_addr__rtld_global != (void*)-1; > +} ok. > diff --git a/nptl_db/td_ta_map_lwp2thr.c b/nptl_db/td_ta_map_lwp2thr.c > index d04d4bec23..81ff48808c 100644 > --- a/nptl_db/td_ta_map_lwp2thr.c > +++ b/nptl_db/td_ta_map_lwp2thr.c > @@ -187,7 +187,7 @@ td_ta_map_lwp2thr (const td_thragent_t *ta_arg, > fake a special descriptor for the initial thread. */ > > psaddr_t list; > - td_err_e err = DB_GET_SYMBOL (list, ta, __stack_user); > + td_err_e err = __td_ta_stack_user (ta, &list); > if (err != TD_OK) > return err; > Ok. > diff --git a/nptl_db/td_ta_thr_iter.c b/nptl_db/td_ta_thr_iter.c > index d59782bd6e..e406bcbd39 100644 > --- a/nptl_db/td_ta_thr_iter.c > +++ b/nptl_db/td_ta_thr_iter.c > @@ -133,14 +133,14 @@ td_ta_thr_iter (const td_thragent_t *ta_arg, td_thr_iter_f *callback, > have to iterate over both lists separately. We start with the > list of threads with user-defined stacks. */ > > - err = DB_GET_SYMBOL (list, ta, __stack_user); > + err = __td_ta_stack_user (ta, &list); > if (err == TD_OK) > err = iterate_thread_list (ta, callback, cbdata_p, state, ti_pri, > list, true); > > /* And the threads with stacks allocated by the implementation. */ > if (err == TD_OK) > - err = DB_GET_SYMBOL (list, ta, stack_used); > + err = __td_ta_stack_used (ta, &list); > if (err == TD_OK) > err = iterate_thread_list (ta, callback, cbdata_p, state, ti_pri, > list, false); Ok. > diff --git a/nptl_db/td_thr_tlsbase.c b/nptl_db/td_thr_tlsbase.c > index 8198934112..eeba0a04d8 100644 > --- a/nptl_db/td_thr_tlsbase.c > +++ b/nptl_db/td_thr_tlsbase.c > @@ -28,12 +28,7 @@ dtv_slotinfo_list (td_thragent_t *ta, > td_err_e err; > psaddr_t head; > > - if (ta->ta_addr__rtld_global == 0 > - && td_mod_lookup (ta->ph, LD_SO, SYM__rtld_global, > - &ta->ta_addr__rtld_global) != PS_OK) > - ta->ta_addr__rtld_global = (void*)-1; > - > - if (ta->ta_addr__rtld_global != (void*)-1) > + if (__td_ta_rtld_global (ta)) > { > err = DB_GET_FIELD (head, ta, ta->ta_addr__rtld_global, > rtld_global, _dl_tls_dtv_slotinfo_list, 0); > diff --git a/nptl_db/td_thr_validate.c b/nptl_db/td_thr_validate.c Ok. > index c709c5e9be..d2f4107cf2 100644 > --- a/nptl_db/td_thr_validate.c > +++ b/nptl_db/td_thr_validate.c > @@ -20,6 +20,41 @@ > #include "thread_dbP.h" > #include <stdbool.h> > > +td_err_e > +__td_ta_stack_user (td_thragent_t *ta, psaddr_t *plist) > +{ > + if (__td_ta_rtld_global (ta)) > + return DB_GET_FIELD_ADDRESS (*plist, ta, ta->ta_addr__rtld_global, > + rtld_global, _dl_stack_user, 0); > + else > + { > + if (ta->ta_addr__dl_stack_user == 0 > + && td_mod_lookup (ta->ph, NULL, SYM__dl_stack_user, > + &ta->ta_addr__dl_stack_user) != PS_OK) > + return TD_ERR; > + *plist = ta->ta_addr__dl_stack_user; > + return TD_OK; > + } > +} > + > +td_err_e > +__td_ta_stack_used (td_thragent_t *ta, psaddr_t *plist) > +{ > + > + if (__td_ta_rtld_global (ta)) > + return DB_GET_FIELD_ADDRESS (*plist, ta, ta->ta_addr__rtld_global, > + rtld_global, _dl_stack_used, 0); > + else > + { > + if (ta->ta_addr__dl_stack_used == 0 > + && td_mod_lookup (ta->ph, NULL, SYM__dl_stack_used, > + &ta->ta_addr__dl_stack_used) != PS_OK) > + return TD_ERR; > + *plist = ta->ta_addr__dl_stack_used; > + return TD_OK; > + } > +} > + > static td_err_e > check_thread_list (const td_thrhandle_t *th, psaddr_t head, bool *uninit) > { Ok. > @@ -62,7 +97,7 @@ td_thr_validate (const td_thrhandle_t *th) > > /* First check the list with threads using user allocated stacks. */ > bool uninit = false; > - err = DB_GET_SYMBOL (list, th->th_ta_p, __stack_user); > + err = __td_ta_stack_user (th->th_ta_p, &list); > if (err == TD_OK) > err = check_thread_list (th, list, &uninit); > > @@ -70,7 +105,7 @@ td_thr_validate (const td_thrhandle_t *th) > using implementation allocated stacks. */ > if (err == TD_NOTHR) > { > - err = DB_GET_SYMBOL (list, th->th_ta_p, stack_used); > + err = __td_ta_stack_used (th->th_ta_p, &list); > if (err == TD_OK) > err = check_thread_list (th, list, &uninit); > Ok. > diff --git a/nptl_db/thread_dbP.h b/nptl_db/thread_dbP.h > index 8a666b8d4f..9574e68d70 100644 > --- a/nptl_db/thread_dbP.h > +++ b/nptl_db/thread_dbP.h > @@ -269,4 +269,17 @@ extern td_err_e _td_check_sizeof (td_thragent_t *ta, uint32_t *sizep, > extern td_err_e __td_ta_lookup_th_unique (const td_thragent_t *ta, > lwpid_t lwpid, td_thrhandle_t *th); > > +/* Try to initialize TA->ta_addr__rtld_global. Return true on > + success, false on failure (which may be cached). */ > +bool __td_ta_rtld_global (td_thragent_t *ta) attribute_hidden; > + > +/* Obtain the address of the list_t fields _dl_stack_user and > + _dl_stack_used in _rtld_global, or fall back to the global > + variables of the same name (to support statically linked > + programs). */ > +td_err_e __td_ta_stack_user (td_thragent_t *ta, psaddr_t *plist) > + attribute_hidden; > +td_err_e __td_ta_stack_used (td_thragent_t *ta, psaddr_t *plist) > + attribute_hidden; > + > #endif /* thread_dbP.h */ Ok. > diff --git a/sysdeps/aarch64/nptl/tls.h b/sysdeps/aarch64/nptl/tls.h > index e5627f777e..dbef4704f1 100644 > --- a/sysdeps/aarch64/nptl/tls.h > +++ b/sysdeps/aarch64/nptl/tls.h > @@ -129,8 +129,6 @@ typedef struct > atomic_write_barrier (); \ > } \ > while (0) > -# define THREAD_GSCOPE_WAIT() \ > - GL(dl_wait_lookup_done) () > > # endif /* __ASSEMBLER__ */ > Ok. > diff --git a/sysdeps/alpha/nptl/tls.h b/sysdeps/alpha/nptl/tls.h > index 82549607fd..f3101f57c9 100644 > --- a/sysdeps/alpha/nptl/tls.h > +++ b/sysdeps/alpha/nptl/tls.h > @@ -123,8 +123,6 @@ typedef struct > atomic_write_barrier (); \ > } \ > while (0) > -#define THREAD_GSCOPE_WAIT() \ > - GL(dl_wait_lookup_done) () > > #else /* __ASSEMBLER__ */ > # include <tcb-offsets.h> Ok. > diff --git a/sysdeps/arc/nptl/tls.h b/sysdeps/arc/nptl/tls.h > index 184b550ab5..cc5abb1931 100644 > --- a/sysdeps/arc/nptl/tls.h > +++ b/sysdeps/arc/nptl/tls.h > @@ -131,8 +131,6 @@ typedef struct > atomic_write_barrier (); \ > } \ > while (0) > -#define THREAD_GSCOPE_WAIT() \ > - GL(dl_wait_lookup_done) () > > #endif /* !__ASSEMBLER__ */ > Ok. > diff --git a/sysdeps/arm/nptl/tls.h b/sysdeps/arm/nptl/tls.h > index 24a488ff37..91dd7498e4 100644 > --- a/sysdeps/arm/nptl/tls.h > +++ b/sysdeps/arm/nptl/tls.h > @@ -120,8 +120,6 @@ typedef struct > atomic_write_barrier (); \ > } \ > while (0) > -#define THREAD_GSCOPE_WAIT() \ > - GL(dl_wait_lookup_done) () > > #endif /* __ASSEMBLER__ */ > Ok. > diff --git a/sysdeps/csky/nptl/tls.h b/sysdeps/csky/nptl/tls.h > index bcca9674a1..f308773d40 100644 > --- a/sysdeps/csky/nptl/tls.h > +++ b/sysdeps/csky/nptl/tls.h > @@ -147,8 +147,6 @@ typedef struct > atomic_write_barrier (); \ > } \ > while (0) > -# define THREAD_GSCOPE_WAIT() \ > - GL(dl_wait_lookup_done) () > > #endif /* __ASSEMBLER__ */ > Ok. > diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h > index 382eeb9be0..b1da03cafe 100644 > --- a/sysdeps/generic/ldsodefs.h > +++ b/sysdeps/generic/ldsodefs.h > @@ -38,6 +38,7 @@ > #include <libc-lock.h> > #include <hp-timing.h> > #include <tls.h> > +#include <list_t.h> > > __BEGIN_DECLS > > @@ -461,15 +462,22 @@ struct rtld_global > > EXTERN void (*_dl_init_static_tls) (struct link_map *); > > - EXTERN void (*_dl_wait_lookup_done) (void); > - > /* Scopes to free after next THREAD_GSCOPE_WAIT (). */ > EXTERN struct dl_scope_free_list > { > size_t count; > void *list[50]; > } *_dl_scope_free_list; > -#if !THREAD_GSCOPE_IN_TCB > +#if THREAD_GSCOPE_IN_TCB > + /* List of active thread stacks, with memory managed by glibc. */ > + EXTERN list_t _dl_stack_used; > + > + /* List of thread stacks that were allocated by the application. */ > + EXTERN list_t _dl_stack_user; > + > + /* Mutex protecting the stack lists. */ > + EXTERN int _dl_stack_cache_lock; > +#else > EXTERN int _dl_thread_gscope_count; > #endif > #ifdef SHARED > @@ -1252,6 +1260,11 @@ link_map_audit_state (struct link_map *l, size_t index) > } > #endif /* SHARED */ > > +#if THREAD_GSCOPE_IN_TCB > +void __thread_gscope_wait (void) attribute_hidden; > +# define THREAD_GSCOPE_WAIT() __thread_gscope_wait () > +#endif > + > __END_DECLS > > #endif /* ldsodefs.h */ Ok. > diff --git a/sysdeps/hppa/nptl/tls.h b/sysdeps/hppa/nptl/tls.h > index 2315340735..f319cb42e2 100644 > --- a/sysdeps/hppa/nptl/tls.h > +++ b/sysdeps/hppa/nptl/tls.h > @@ -154,8 +154,6 @@ static inline void __set_cr27(struct pthread *cr27) > atomic_write_barrier (); \ > } \ > while (0) > -#define THREAD_GSCOPE_WAIT() \ > - GL(dl_wait_lookup_done) () > > #endif /* !__ASSEMBLER__ */ > Ok. > diff --git a/sysdeps/i386/nptl/tls.h b/sysdeps/i386/nptl/tls.h > index 5042d52b98..b74347bacd 100644 > --- a/sysdeps/i386/nptl/tls.h > +++ b/sysdeps/i386/nptl/tls.h > @@ -387,8 +387,6 @@ tls_fill_user_desc (union user_desc_init *desc, > while (0) > #define THREAD_GSCOPE_SET_FLAG() \ > THREAD_SETMEM (THREAD_SELF, header.gscope_flag, THREAD_GSCOPE_FLAG_USED) > -#define THREAD_GSCOPE_WAIT() \ > - GL(dl_wait_lookup_done) () > > #endif /* __ASSEMBLER__ */ > Ok. > diff --git a/sysdeps/ia64/nptl/tls.h b/sysdeps/ia64/nptl/tls.h > index f5b1684ef3..42b082dad6 100644 > --- a/sysdeps/ia64/nptl/tls.h > +++ b/sysdeps/ia64/nptl/tls.h > @@ -175,8 +175,6 @@ register struct pthread *__thread_self __asm__("r13"); > atomic_write_barrier (); \ > } \ > while (0) > -#define THREAD_GSCOPE_WAIT() \ > - GL(dl_wait_lookup_done) () > > #endif /* __ASSEMBLER__ */ > Ok. > diff --git a/sysdeps/m68k/nptl/tls.h b/sysdeps/m68k/nptl/tls.h > index 68ea952e79..69e174484e 100644 > --- a/sysdeps/m68k/nptl/tls.h > +++ b/sysdeps/m68k/nptl/tls.h > @@ -153,8 +153,6 @@ extern void * __m68k_read_tp (void); > atomic_write_barrier (); \ > } \ > while (0) > -#define THREAD_GSCOPE_WAIT() \ > - GL(dl_wait_lookup_done) () > > #endif /* __ASSEMBLER__ */ > Ok. > diff --git a/sysdeps/microblaze/nptl/tls.h b/sysdeps/microblaze/nptl/tls.h > index a094fdb798..78f63019dc 100644 > --- a/sysdeps/microblaze/nptl/tls.h > +++ b/sysdeps/microblaze/nptl/tls.h > @@ -136,8 +136,6 @@ typedef struct > atomic_write_barrier (); \ > } \ > while (0) > -# define THREAD_GSCOPE_WAIT() \ > - GL (dl_wait_lookup_done) () > > #endif /* __ASSEMBLER__ */ > Ok. > diff --git a/sysdeps/mips/nptl/tls.h b/sysdeps/mips/nptl/tls.h > index 8b55f19c37..7e7ac43d6c 100644 > --- a/sysdeps/mips/nptl/tls.h > +++ b/sysdeps/mips/nptl/tls.h > @@ -178,8 +178,6 @@ typedef struct > atomic_write_barrier (); \ > } \ > while (0) > -#define THREAD_GSCOPE_WAIT() \ > - GL(dl_wait_lookup_done) () > > #endif /* __ASSEMBLER__ */ > Ok. > diff --git a/sysdeps/nios2/nptl/tls.h b/sysdeps/nios2/nptl/tls.h > index facb27c761..776fe9bb2d 100644 > --- a/sysdeps/nios2/nptl/tls.h > +++ b/sysdeps/nios2/nptl/tls.h > @@ -157,8 +157,6 @@ register struct pthread *__thread_self __asm__("r23"); > atomic_write_barrier (); \ > } \ > while (0) > -#define THREAD_GSCOPE_WAIT() \ > - GL(dl_wait_lookup_done) () > > #endif /* __ASSEMBLER__ */ > Ok. > diff --git a/sysdeps/nptl/dl-thread_gscope_wait.c b/sysdeps/nptl/dl-thread_gscope_wait.c > new file mode 100644 > index 0000000000..a9bfbee24c > --- /dev/null > +++ b/sysdeps/nptl/dl-thread_gscope_wait.c > @@ -0,0 +1,80 @@ > +/* Out-of-line notification function for the GSCOPE locking mechanism. > + Copyright (C) 2007-2020 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <nptl/descr.h> > +#include <futex-internal.h> > +#include <ldsodefs.h> > +#include <list.h> > +#include <lowlevellock.h> > + > +void > +__thread_gscope_wait (void) > +{ > + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); > + > + struct pthread *self = THREAD_SELF; > + > + /* Iterate over the list with system-allocated threads first. */ > + list_t *runp; > + list_for_each (runp, &GL (dl_stack_used)) > + { > + struct pthread *t = list_entry (runp, struct pthread, list); > + if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED) > + continue; > + > + int *const gscope_flagp = &t->header.gscope_flag; > + > + /* We have to wait until this thread is done with the global > + scope. First tell the thread that we are waiting and > + possibly have to be woken. */ > + if (atomic_compare_and_exchange_bool_acq (gscope_flagp, > + THREAD_GSCOPE_FLAG_WAIT, > + THREAD_GSCOPE_FLAG_USED)) > + continue; > + > + do > + futex_wait_simple ((unsigned int *) gscope_flagp, > + THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE); > + while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT); > + } > + > + /* Now the list with threads using user-allocated stacks. */ > + list_for_each (runp, &GL (dl_stack_user)) > + { > + struct pthread *t = list_entry (runp, struct pthread, list); > + if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED) > + continue; > + > + int *const gscope_flagp = &t->header.gscope_flag; > + > + /* We have to wait until this thread is done with the global > + scope. First tell the thread that we are waiting and > + possibly have to be woken. */ > + if (atomic_compare_and_exchange_bool_acq (gscope_flagp, > + THREAD_GSCOPE_FLAG_WAIT, > + THREAD_GSCOPE_FLAG_USED)) > + continue; > + > + do > + futex_wait_simple ((unsigned int *) gscope_flagp, > + THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE); > + while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT); > + } > + > + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); > +} Ok, this is basically __wait_lookup_done. > diff --git a/sysdeps/powerpc/nptl/tls.h b/sysdeps/powerpc/nptl/tls.h > index b1f0b30fbb..261eecfd18 100644 > --- a/sysdeps/powerpc/nptl/tls.h > +++ b/sysdeps/powerpc/nptl/tls.h > @@ -245,8 +245,6 @@ register void *__thread_register __asm__ ("r13"); > atomic_write_barrier (); \ > } \ > while (0) > -#define THREAD_GSCOPE_WAIT() \ > - GL(dl_wait_lookup_done) () > > #endif /* __ASSEMBLER__ */ > Ok. > diff --git a/sysdeps/riscv/nptl/tls.h b/sysdeps/riscv/nptl/tls.h > index a9167bc143..41d9db10cf 100644 > --- a/sysdeps/riscv/nptl/tls.h > +++ b/sysdeps/riscv/nptl/tls.h > @@ -139,8 +139,6 @@ typedef struct > atomic_write_barrier (); \ > } \ > while (0) > -# define THREAD_GSCOPE_WAIT() \ > - GL(dl_wait_lookup_done) () > > #endif /* __ASSEMBLER__ */ > Ok. > diff --git a/sysdeps/s390/nptl/tls.h b/sysdeps/s390/nptl/tls.h > index 4a9b3570eb..7653109617 100644 > --- a/sysdeps/s390/nptl/tls.h > +++ b/sysdeps/s390/nptl/tls.h > @@ -185,8 +185,6 @@ typedef struct > atomic_write_barrier (); \ > } \ > while (0) > -#define THREAD_GSCOPE_WAIT() \ > - GL(dl_wait_lookup_done) () > > #endif /* __ASSEMBLER__ */ > Ok. > diff --git a/sysdeps/sh/nptl/tls.h b/sysdeps/sh/nptl/tls.h > index 6ccad251f9..2d5e3731b2 100644 > --- a/sysdeps/sh/nptl/tls.h > +++ b/sysdeps/sh/nptl/tls.h > @@ -161,8 +161,6 @@ typedef struct > atomic_write_barrier (); \ > } \ > while (0) > -#define THREAD_GSCOPE_WAIT() \ > - GL(dl_wait_lookup_done) () > > #endif /* __ASSEMBLER__ */ > Ok. > diff --git a/sysdeps/sparc/nptl/tls.h b/sysdeps/sparc/nptl/tls.h > index 18a548e86b..b08a156131 100644 > --- a/sysdeps/sparc/nptl/tls.h > +++ b/sysdeps/sparc/nptl/tls.h > @@ -158,8 +158,6 @@ register struct pthread *__thread_self __asm__("%g7"); > atomic_write_barrier (); \ > } \ > while (0) > -#define THREAD_GSCOPE_WAIT() \ > - GL(dl_wait_lookup_done) () > > #endif /* !ASSEMBLER */ > Ok. > diff --git a/sysdeps/x86_64/nptl/tls.h b/sysdeps/x86_64/nptl/tls.h > index fbd7f9cb89..a08bf972de 100644 > --- a/sysdeps/x86_64/nptl/tls.h > +++ b/sysdeps/x86_64/nptl/tls.h > @@ -332,8 +332,6 @@ _Static_assert (offsetof (tcbhead_t, __glibc_unused2) == 0x80, > while (0) > # define THREAD_GSCOPE_SET_FLAG() \ > THREAD_SETMEM (THREAD_SELF, header.gscope_flag, THREAD_GSCOPE_FLAG_USED) > -# define THREAD_GSCOPE_WAIT() \ > - GL(dl_wait_lookup_done) () > > #endif /* __ASSEMBLER__ */ > > Ok.
On 2020-11-13 10:10 a.m., Florian Weimer wrote: > Now __thread_gscope_wait (the function behind THREAD_GSCOPE_WAIT, > formerly __wait_lookup_done) can be implemented directly in ld.so, > eliminating the unprotected GL (dl_wait_lookup_done) function > pointer. Hi Florian, Presumably starting with this commit (I don't really know how to build a glibc and test against it), GDB fails to attach to a threaded process because libthread_db fails to initialize. See: https://sourceware.org/bugzilla/show_bug.cgi?id=27526 The difference in behavior as seen from GDB is that libthread_db now asks to look up a symbol "_dl_stack_user" in module NULL. GDB can't find this symbol, which fails the initialization. Can you shed some light on this? Is this request expected, and where is GDB expected to find this symbol? Thanks, Simon
* Simon Marchi via Libc-alpha: > On 2020-11-13 10:10 a.m., Florian Weimer wrote: >> Now __thread_gscope_wait (the function behind THREAD_GSCOPE_WAIT, >> formerly __wait_lookup_done) can be implemented directly in ld.so, >> eliminating the unprotected GL (dl_wait_lookup_done) function >> pointer. > > Hi Florian, > > Presumably starting with this commit (I don't really know how to build a > glibc and test against it), GDB fails to attach to a threaded process > because libthread_db fails to initialize. See: > > https://sourceware.org/bugzilla/show_bug.cgi?id=27526 > > The difference in behavior as seen from GDB is that libthread_db now > asks to look up a symbol "_dl_stack_user" in module NULL. GDB can't > find this symbol, which fails the initialization. > > Can you shed some light on this? Is this request expected, and where is > GDB expected to find this symbol? It is not expected. This is the fallback path if _rtld_global cannot be located. The actual failure is that __td_ta_rtld_global does not succeed. Any suggestion how to debug this further? Sorry, I had thought I tested this. I guess we should add some sort of regression testing for nptl_db, but I don't know how to tell GDB to use the just-built nptl_db. Thanks, Florian
On Mär 05 2021, Florian Weimer via Libc-alpha wrote: > Sorry, I had thought I tested this. I guess we should add some sort of > regression testing for nptl_db, but I don't know how to tell GDB to use > the just-built nptl_db. (gdb) help set libthread-db-search-path Andreas.
On 2021-03-05 12:15 p.m., Florian Weimer wrote:> * Simon Marchi via Libc-alpha: > >> On 2020-11-13 10:10 a.m., Florian Weimer wrote: >>> Now __thread_gscope_wait (the function behind THREAD_GSCOPE_WAIT, >>> formerly __wait_lookup_done) can be implemented directly in ld.so, >>> eliminating the unprotected GL (dl_wait_lookup_done) function >>> pointer. >> >> Hi Florian, >> >> Presumably starting with this commit (I don't really know how to build a >> glibc and test against it), GDB fails to attach to a threaded process >> because libthread_db fails to initialize. See: >> >> https://sourceware.org/bugzilla/show_bug.cgi?id=27526 >> >> The difference in behavior as seen from GDB is that libthread_db now >> asks to look up a symbol "_dl_stack_user" in module NULL. GDB can't >> find this symbol, which fails the initialization. >> >> Can you shed some light on this? Is this request expected, and where is >> GDB expected to find this symbol? > > It is not expected. This is the fallback path if _rtld_global cannot be > located. The actual failure is that __td_ta_rtld_global does not > succeed. [adding gdb-patches] Ok, thanks for that tip. Indeed I see that GDB returns PS_NOSYM for _rtld_global. If I now log what GDB returns: LOOKUP nptl_version in libpthread.so.0 Found 0x7fdb02713037 LOOKUP _rtld_global in ld-linux-x86-64.so.2 Not found LOOKUP _dl_stack_user in (null) Not found So this lookup of _rtld_global is new too. And I think I see the problem, it looks like an ordering issue: libthread_db is loaded when GDB notices the program has libpthread loaded in it. When attaching, GDB walks the shared library list. In that list, libpthread comes before ld-linux. So at the time we try to load libthread_db, GDB hasn't yet noticed that the program has ld-linux loaded in it, hasn't ingested its symbols, so doesn't find _rtld_global. For comparison, glibc 2.31 (on Ubuntu 20.04) only requested symbols in libpthread itself, so there wasn't this ordering issue: LOOKUP nptl_version in libpthread.so.0 Found 0x7f38538e9037 LOOKUP __stack_user in libpthread.so.0 Found 0x7f38538f3350 LOOKUP _thread_db_list_t_next in libpthread.so.0 Found 0x7f38538e93b0 LOOKUP _thread_db_const_thread_area in libpthread.so.0 Found 0x7f38538e92b4 LOOKUP _thread_db_sizeof_pthread in libpthread.so.0 Found 0x7f38538e92cc LOOKUP _thread_db_pthread_specific in libpthread.so.0 Found 0x7f38538e9400 LOOKUP _thread_db_pthread_schedpolicy in libpthread.so.0 Found 0x7f38538e9420 LOOKUP _thread_db_pthread_schedparam_sched_priority in libpthread.so.0 Found 0x7f38538e9410 LOOKUP _thread_db_pthread_tid in libpthread.so.0 Found 0x7f38538e9450 LOOKUP _thread_db_pthread_cancelhandling in libpthread.so.0 Found 0x7f38538e9430 LOOKUP _thread_db_pthread_report_events in libpthread.so.0 Found 0x7f38538e9460 LOOKUP _thread_db_pthread_start_routine in libpthread.so.0 Found 0x7f38538e9440 LOOKUP _thread_db_pthread_eventbuf_eventmask_event_bits in libpthread.so.0 Found 0x7f38538e93d0 If we have to deal with this, I guess that GDB should now do things in a different order: go through the whole library list and load their symbols. And then if one of those libraries were libpthread, try to initialize libthread_db. Simon
* Simon Marchi: > On 2021-03-05 12:15 p.m., Florian Weimer wrote:> * Simon Marchi via Libc-alpha: >> >>> On 2020-11-13 10:10 a.m., Florian Weimer wrote: >>>> Now __thread_gscope_wait (the function behind THREAD_GSCOPE_WAIT, >>>> formerly __wait_lookup_done) can be implemented directly in ld.so, >>>> eliminating the unprotected GL (dl_wait_lookup_done) function >>>> pointer. >>> >>> Hi Florian, >>> >>> Presumably starting with this commit (I don't really know how to build a >>> glibc and test against it), GDB fails to attach to a threaded process >>> because libthread_db fails to initialize. See: >>> >>> https://sourceware.org/bugzilla/show_bug.cgi?id=27526 >>> >>> The difference in behavior as seen from GDB is that libthread_db now >>> asks to look up a symbol "_dl_stack_user" in module NULL. GDB can't >>> find this symbol, which fails the initialization. >>> >>> Can you shed some light on this? Is this request expected, and where is >>> GDB expected to find this symbol? >> >> It is not expected. This is the fallback path if _rtld_global cannot be >> located. The actual failure is that __td_ta_rtld_global does not >> succeed. > > [adding gdb-patches] > > Ok, thanks for that tip. Indeed I see that GDB returns PS_NOSYM for > _rtld_global. If I now log what GDB returns: > > LOOKUP nptl_version in libpthread.so.0 > Found 0x7fdb02713037 > LOOKUP _rtld_global in ld-linux-x86-64.so.2 > Not found > LOOKUP _dl_stack_user in (null) > Not found > > So this lookup of _rtld_global is new too. And I think I see the > problem, it looks like an ordering issue: libthread_db is loaded when > GDB notices the program has libpthread loaded in it. When attaching, > GDB walks the shared library list. In that list, libpthread comes > before ld-linux. So at the time we try to load libthread_db, GDB hasn't > yet noticed that the program has ld-linux loaded in it, hasn't ingested > its symbols, so doesn't find _rtld_global. Oh, that reads like a plausible explanation. And I assume the non-attaching case, where GDB starts the process, is very different, and this ordering issue does not appear? > If we have to deal with this, I guess that GDB should now do things in a > different order: go through the whole library list and load their > symbols. And then if one of those libraries were libpthread, try to > initialize libthread_db. Initialization of libthread_db should be unconditional. Programs use TLS data without linking against libpthread. And glibc 2.34 might not have a separate libpthread at all. Thanks, Florian
On 2021-03-05 1:03 p.m., Florian Weimer wrote: > Oh, that reads like a plausible explanation. And I assume the > non-attaching case, where GDB starts the process, is very different, and > this ordering issue does not appear? Indeed, in that case ld-linux-x86-64.so.2 is loaded before libpthread.so.0 (ld-linux is necessarily loaded before the others, I guess, since it's the one loading the others). So the symbol is found: LOOKUP _rtld_global in ld-linux-x86-64.so.2 Found 0x7ffff7ffd000 >> If we have to deal with this, I guess that GDB should now do things in a >> different order: go through the whole library list and load their >> symbols. And then if one of those libraries were libpthread, try to >> initialize libthread_db. > > Initialization of libthread_db should be unconditional. Programs use > TLS data without linking against libpthread. And glibc 2.34 might not > have a separate libpthread at all. Ok, currently GDB attempts to load libthread_db when noticing the main objfile / program (I guess it is needed if the program is statically linked to libpthread?) or when seeing a library named libpthread*. I'm not sure how to fix this, other than making GDB attempt to load libthread_db on every new shared library it notices, since that new shared library may "finally" make it work. The current code specifically exists to avoid trying to load libthread_db for every new shared library we notice, since that was considered wasteful. Here's the original thread about it: https://sourceware.org/pipermail/gdb-patches/2011-October/085781.html https://pi.simark.ca/gdb-patches/20111005182705.D744E2461D1@ruffy.mtv.corp.google.com/ About the hypothetical scenario for glibc 2.34: do you mean that the pthread infrastructure will directly be in libc.so? If so, our current strategy of attempting to load libthread_db only for the main program or a libpthread* library will indeed not work. And I suppose that will also require trying to load libthread_db on every new shared lib... Simon
* Simon Marchi: >>> If we have to deal with this, I guess that GDB should now do things in a >>> different order: go through the whole library list and load their >>> symbols. And then if one of those libraries were libpthread, try to >>> initialize libthread_db. >> >> Initialization of libthread_db should be unconditional. Programs use >> TLS data without linking against libpthread. And glibc 2.34 might not >> have a separate libpthread at all. > > Ok, currently GDB attempts to load libthread_db when noticing the main > objfile / program (I guess it is needed if the program is statically > linked to libpthread?) or when seeing a library named libpthread*. Would it be possible to load libthread_db unconditionally after loading all shared objects? Then it is loaded only once. > About the hypothetical scenario for glibc 2.34: do you mean that the > pthread infrastructure will directly be in libc.so? If so, our current > strategy of attempting to load libthread_db only for the main program > or a libpthread* library will indeed not work. And I suppose that will > also require trying to load libthread_db on every new shared lib... I think one attempt loading is enough, after all shared objects are available. In both the attaching and starting case, libpthread will be seen by libthread_db if it is there. I do not think it is necessary to try loading libpthread_db again for each dlopen. Maybe you could restrict that to trigger on libpthread, but then dlopen of libpthread does not really work today. Thanks, Florian
* Florian Weimer via Gdb-patches: > * Simon Marchi: > >>>> If we have to deal with this, I guess that GDB should now do things in a >>>> different order: go through the whole library list and load their >>>> symbols. And then if one of those libraries were libpthread, try to >>>> initialize libthread_db. >>> >>> Initialization of libthread_db should be unconditional. Programs use >>> TLS data without linking against libpthread. And glibc 2.34 might not >>> have a separate libpthread at all. >> >> Ok, currently GDB attempts to load libthread_db when noticing the main >> objfile / program (I guess it is needed if the program is statically >> linked to libpthread?) or when seeing a library named libpthread*. > > Would it be possible to load libthread_db unconditionally after loading > all shared objects? Then it is loaded only once. > >> About the hypothetical scenario for glibc 2.34: do you mean that the >> pthread infrastructure will directly be in libc.so? If so, our current >> strategy of attempting to load libthread_db only for the main program >> or a libpthread* library will indeed not work. And I suppose that will >> also require trying to load libthread_db on every new shared lib... > > I think one attempt loading is enough, after all shared objects are > available. In both the attaching and starting case, libpthread will be > seen by libthread_db if it is there. I do not think it is necessary to > try loading libpthread_db again for each dlopen. Maybe you could > restrict that to trigger on libpthread, but then dlopen of libpthread > does not really work today. I would appreciate if we could make some progress on this issue. Please let me know if you need glibc test builds or something in that area. Thanks.
On 2021-03-29 4:26 a.m., Florian Weimer wrote:> * Florian Weimer via Gdb-patches: > >> * Simon Marchi: >> >>>>> If we have to deal with this, I guess that GDB should now do things in a >>>>> different order: go through the whole library list and load their >>>>> symbols. And then if one of those libraries were libpthread, try to >>>>> initialize libthread_db. >>>> >>>> Initialization of libthread_db should be unconditional. Programs use >>>> TLS data without linking against libpthread. And glibc 2.34 might not >>>> have a separate libpthread at all. >>> >>> Ok, currently GDB attempts to load libthread_db when noticing the main >>> objfile / program (I guess it is needed if the program is statically >>> linked to libpthread?) or when seeing a library named libpthread*. >> >> Would it be possible to load libthread_db unconditionally after loading >> all shared objects? Then it is loaded only once. >> >>> About the hypothetical scenario for glibc 2.34: do you mean that the >>> pthread infrastructure will directly be in libc.so? If so, our current >>> strategy of attempting to load libthread_db only for the main program >>> or a libpthread* library will indeed not work. And I suppose that will >>> also require trying to load libthread_db on every new shared lib... >> >> I think one attempt loading is enough, after all shared objects are >> available. In both the attaching and starting case, libpthread will be >> seen by libthread_db if it is there. I do not think it is necessary to >> try loading libpthread_db again for each dlopen. Maybe you could >> restrict that to trigger on libpthread, but then dlopen of libpthread >> does not really work today. > > I would appreciate if we could make some progress on this issue. > Please let me know if you need glibc test builds or something in that > area. Thanks. Hi Florian, I'll try to look into it, but I can't promise anything as I have nearly zero free / personal time for GDB these days. Simon
diff --git a/csu/libc-tls.c b/csu/libc-tls.c index 06e76bd395..c3589f0a7d 100644 --- a/csu/libc-tls.c +++ b/csu/libc-tls.c @@ -24,6 +24,7 @@ #include <stdio.h> #include <sys/param.h> #include <array_length.h> +#include <list.h> #ifdef SHARED #error makefile bug, this file is for static only @@ -193,6 +194,12 @@ __libc_setup_tls (void) if (__builtin_expect (lossage != NULL, 0)) _startup_fatal (lossage); +#if THREAD_GSCOPE_IN_TCB + INIT_LIST_HEAD (&_dl_stack_used); + INIT_LIST_HEAD (&_dl_stack_user); + list_add (&THREAD_SELF->list, &_dl_stack_user); +#endif + /* Update the executable's link map with enough information to make the TLS routines happy. */ main_map->l_tls_align = align; diff --git a/elf/Makefile b/elf/Makefile index f10cc59e7c..2015383eb2 100644 --- a/elf/Makefile +++ b/elf/Makefile @@ -34,7 +34,8 @@ dl-routines = $(addprefix dl-,load lookup object reloc deps \ version profile tls origin scope \ execstack open close trampoline \ exception sort-maps lookup-direct \ - call-libc-early-init write) + call-libc-early-init write \ + thread_gscope_wait) ifeq (yes,$(use-ldconfig)) dl-routines += dl-cache endif @@ -54,7 +55,8 @@ endif all-dl-routines = $(dl-routines) $(sysdep-dl-routines) # But they are absent from the shared libc, because that code is in ld.so. elide-routines.os = $(all-dl-routines) dl-support enbl-secure dl-origin \ - dl-sysdep dl-exception dl-reloc-static-pie + dl-sysdep dl-exception dl-reloc-static-pie \ + thread_gscope_wait # ld.so uses those routines, plus some special stuff for being the program # interpreter and operating independent of libc. diff --git a/elf/dl-support.c b/elf/dl-support.c index afbc94df54..15e6d787c9 100644 --- a/elf/dl-support.c +++ b/elf/dl-support.c @@ -183,10 +183,11 @@ ElfW(Word) _dl_stack_flags = DEFAULT_STACK_PERMS; int (*_dl_make_stack_executable_hook) (void **) = _dl_make_stack_executable; -/* Function in libpthread to wait for termination of lookups. */ -void (*_dl_wait_lookup_done) (void); - -#if !THREAD_GSCOPE_IN_TCB +#if THREAD_GSCOPE_IN_TCB +list_t _dl_stack_used; +list_t _dl_stack_user; +int _dl_stack_cache_lock; +#else int _dl_thread_gscope_count; #endif struct dl_scope_free_list *_dl_scope_free_list; diff --git a/elf/dl-thread_gscope_wait.c b/elf/dl-thread_gscope_wait.c new file mode 100644 index 0000000000..28e1494730 --- /dev/null +++ b/elf/dl-thread_gscope_wait.c @@ -0,0 +1,2 @@ +/* By default, the dynamic linker does not use an out-of-line + __thread_gscope_wait function. */ diff --git a/elf/rtld.c b/elf/rtld.c index 5d117d0d2c..c4ffc8d4b7 100644 --- a/elf/rtld.c +++ b/elf/rtld.c @@ -48,6 +48,7 @@ #include <array_length.h> #include <libc-early-init.h> #include <dl-main.h> +#include <list.h> #include <assert.h> @@ -799,6 +800,9 @@ cannot allocate TLS data structures for initial thread\n"); const char *lossage = TLS_INIT_TP (tcbp); if (__glibc_unlikely (lossage != NULL)) _dl_fatal_printf ("cannot set up thread-local storage: %s\n", lossage); +#if THREAD_GSCOPE_IN_TCB + list_add (&THREAD_SELF->list, &GL (dl_stack_user)); +#endif tls_init_tp_called = true; return tcbp; @@ -1139,6 +1143,11 @@ dl_main (const ElfW(Phdr) *phdr, GL(dl_rtld_unlock_recursive) = rtld_lock_default_unlock_recursive; #endif +#if THREAD_GSCOPE_IN_TCB + INIT_LIST_HEAD (&GL (dl_stack_used)); + INIT_LIST_HEAD (&GL (dl_stack_user)); +#endif + /* The explicit initialization here is cheaper than processing the reloc in the _rtld_local definition's initializer. */ GL(dl_make_stack_executable_hook) = &_dl_make_stack_executable; @@ -2383,6 +2392,9 @@ dl_main (const ElfW(Phdr) *phdr, if (__glibc_unlikely (lossage != NULL)) _dl_fatal_printf ("cannot set up thread-local storage: %s\n", lossage); +#if THREAD_GSCOPE_IN_TCB + list_add (&THREAD_SELF->list, &GL (dl_stack_user)); +#endif } /* Make sure no new search directories have been added. */ diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c index 4b45f8c884..b7f9eeebf6 100644 --- a/nptl/allocatestack.c +++ b/nptl/allocatestack.c @@ -106,26 +106,14 @@ static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default. */ static size_t stack_cache_actsize; -/* Mutex protecting this variable. */ -static int stack_cache_lock = LLL_LOCK_INITIALIZER; - /* List of queued stack frames. */ static LIST_HEAD (stack_cache); -/* List of the stacks in use. */ -static LIST_HEAD (stack_used); - /* We need to record what list operations we are going to do so that, in case of an asynchronous interruption due to a fork() call, we can correct for the work. */ static uintptr_t in_flight_stack; -/* List of the threads with user provided stacks in use. No need to - initialize this, since it's done in __pthread_initialize_minimal. */ -list_t __stack_user __attribute__ ((nocommon)); -hidden_data_def (__stack_user) - - /* Check whether the stack is still used or not. */ #define FREE_P(descr) ((descr)->tid <= 0) @@ -173,7 +161,7 @@ get_cached_stack (size_t *sizep, void **memp) struct pthread *result = NULL; list_t *entry; - lll_lock (stack_cache_lock, LLL_PRIVATE); + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); /* Search the cache for a matching entry. We search for the smallest stack which has at least the required size. Note that @@ -206,7 +194,7 @@ get_cached_stack (size_t *sizep, void **memp) || __builtin_expect (result->stackblock_size > 4 * size, 0)) { /* Release the lock. */ - lll_unlock (stack_cache_lock, LLL_PRIVATE); + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); return NULL; } @@ -218,13 +206,13 @@ get_cached_stack (size_t *sizep, void **memp) stack_list_del (&result->list); /* And add to the list of stacks in use. */ - stack_list_add (&result->list, &stack_used); + stack_list_add (&result->list, &GL (dl_stack_used)); /* And decrease the cache size. */ stack_cache_actsize -= result->stackblock_size; /* Release the lock early. */ - lll_unlock (stack_cache_lock, LLL_PRIVATE); + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); /* Report size and location of the stack to the caller. */ *sizep = result->stackblock_size; @@ -510,12 +498,12 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, /* Prepare to modify global data. */ - lll_lock (stack_cache_lock, LLL_PRIVATE); + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); /* And add to the list of stacks in use. */ - list_add (&pd->list, &__stack_user); + list_add (&pd->list, &GL (dl_stack_user)); - lll_unlock (stack_cache_lock, LLL_PRIVATE); + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); } else { @@ -644,12 +632,12 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, /* Prepare to modify global data. */ - lll_lock (stack_cache_lock, LLL_PRIVATE); + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); /* And add to the list of stacks in use. */ - stack_list_add (&pd->list, &stack_used); + stack_list_add (&pd->list, &GL (dl_stack_used)); - lll_unlock (stack_cache_lock, LLL_PRIVATE); + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); /* There might have been a race. Another thread might have @@ -690,12 +678,12 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, if (__mprotect (guard, guardsize, PROT_NONE) != 0) { mprot_error: - lll_lock (stack_cache_lock, LLL_PRIVATE); + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); /* Remove the thread from the list. */ stack_list_del (&pd->list); - lll_unlock (stack_cache_lock, LLL_PRIVATE); + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); /* Get rid of the TLS block we allocated. */ _dl_deallocate_tls (TLS_TPADJ (pd), false); @@ -799,7 +787,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, void __deallocate_stack (struct pthread *pd) { - lll_lock (stack_cache_lock, LLL_PRIVATE); + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); /* Remove the thread from the list of threads with user defined stacks. */ @@ -815,7 +803,7 @@ __deallocate_stack (struct pthread *pd) /* Free the memory associated with the ELF TLS. */ _dl_deallocate_tls (TLS_TPADJ (pd), false); - lll_unlock (stack_cache_lock, LLL_PRIVATE); + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); } @@ -831,10 +819,10 @@ __make_stacks_executable (void **stack_endp) const size_t pagemask = ~(__getpagesize () - 1); #endif - lll_lock (stack_cache_lock, LLL_PRIVATE); + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); list_t *runp; - list_for_each (runp, &stack_used) + list_for_each (runp, &GL (dl_stack_used)) { err = change_stack_perm (list_entry (runp, struct pthread, list) #ifdef NEED_SEPARATE_REGISTER_STACK @@ -860,7 +848,7 @@ __make_stacks_executable (void **stack_endp) break; } - lll_unlock (stack_cache_lock, LLL_PRIVATE); + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); return err; } @@ -891,8 +879,8 @@ __reclaim_stacks (void) pointers at the head of the list are inconsistent. */ list_t *l = NULL; - if (stack_used.next->prev != &stack_used) - l = &stack_used; + if (GL (dl_stack_used).next->prev != &GL (dl_stack_used)) + l = &GL (dl_stack_used); else if (stack_cache.next->prev != &stack_cache) l = &stack_cache; @@ -914,7 +902,7 @@ __reclaim_stacks (void) /* Mark all stacks except the still running one as free. */ list_t *runp; - list_for_each (runp, &stack_used) + list_for_each (runp, &GL (dl_stack_used)) { struct pthread *curp = list_entry (runp, struct pthread, list); if (curp != self) @@ -948,7 +936,7 @@ __reclaim_stacks (void) } /* Add the stack of all running threads to the cache. */ - list_splice (&stack_used, &stack_cache); + list_splice (&GL (dl_stack_used), &stack_cache); /* Remove the entry for the current thread to from the cache list and add it to the list of running threads. Which of the two @@ -956,13 +944,13 @@ __reclaim_stacks (void) stack_list_del (&self->list); /* Re-initialize the lists for all the threads. */ - INIT_LIST_HEAD (&stack_used); - INIT_LIST_HEAD (&__stack_user); + INIT_LIST_HEAD (&GL (dl_stack_used)); + INIT_LIST_HEAD (&GL (dl_stack_user)); if (__glibc_unlikely (THREAD_GETMEM (self, user_stack))) - list_add (&self->list, &__stack_user); + list_add (&self->list, &GL (dl_stack_user)); else - list_add (&self->list, &stack_used); + list_add (&self->list, &GL (dl_stack_used)); /* There is one thread running. */ __nptl_nthreads = 1; @@ -970,7 +958,7 @@ __reclaim_stacks (void) in_flight_stack = 0; /* Initialize locks. */ - stack_cache_lock = LLL_LOCK_INITIALIZER; + GL (dl_stack_cache_lock) = LLL_LOCK_INITIALIZER; __default_pthread_attr_lock = LLL_LOCK_INITIALIZER; } @@ -1083,7 +1071,7 @@ __nptl_setxid (struct xid_command *cmdp) { int signalled; int result; - lll_lock (stack_cache_lock, LLL_PRIVATE); + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); __xidcmd = cmdp; cmdp->cntr = 0; @@ -1093,7 +1081,7 @@ __nptl_setxid (struct xid_command *cmdp) /* Iterate over the list with system-allocated threads first. */ list_t *runp; - list_for_each (runp, &stack_used) + list_for_each (runp, &GL (dl_stack_used)) { struct pthread *t = list_entry (runp, struct pthread, list); if (t == self) @@ -1103,7 +1091,7 @@ __nptl_setxid (struct xid_command *cmdp) } /* Now the list with threads using user-allocated stacks. */ - list_for_each (runp, &__stack_user) + list_for_each (runp, &GL (dl_stack_user)) { struct pthread *t = list_entry (runp, struct pthread, list); if (t == self) @@ -1119,7 +1107,7 @@ __nptl_setxid (struct xid_command *cmdp) { signalled = 0; - list_for_each (runp, &stack_used) + list_for_each (runp, &GL (dl_stack_used)) { struct pthread *t = list_entry (runp, struct pthread, list); if (t == self) @@ -1128,7 +1116,7 @@ __nptl_setxid (struct xid_command *cmdp) signalled += setxid_signal_thread (cmdp, t); } - list_for_each (runp, &__stack_user) + list_for_each (runp, &GL (dl_stack_user)) { struct pthread *t = list_entry (runp, struct pthread, list); if (t == self) @@ -1149,7 +1137,7 @@ __nptl_setxid (struct xid_command *cmdp) /* Clean up flags, so that no thread blocks during exit waiting for a signal which will never come. */ - list_for_each (runp, &stack_used) + list_for_each (runp, &GL (dl_stack_used)) { struct pthread *t = list_entry (runp, struct pthread, list); if (t == self) @@ -1158,7 +1146,7 @@ __nptl_setxid (struct xid_command *cmdp) setxid_unmark_thread (cmdp, t); } - list_for_each (runp, &__stack_user) + list_for_each (runp, &GL (dl_stack_user)) { struct pthread *t = list_entry (runp, struct pthread, list); if (t == self) @@ -1180,7 +1168,7 @@ __nptl_setxid (struct xid_command *cmdp) } __nptl_setxid_error (cmdp, error); - lll_unlock (stack_cache_lock, LLL_PRIVATE); + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); return result; } @@ -1204,75 +1192,16 @@ void attribute_hidden __pthread_init_static_tls (struct link_map *map) { - lll_lock (stack_cache_lock, LLL_PRIVATE); + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); /* Iterate over the list with system-allocated threads first. */ list_t *runp; - list_for_each (runp, &stack_used) + list_for_each (runp, &GL (dl_stack_used)) init_one_static_tls (list_entry (runp, struct pthread, list), map); /* Now the list with threads using user-allocated stacks. */ - list_for_each (runp, &__stack_user) + list_for_each (runp, &GL (dl_stack_user)) init_one_static_tls (list_entry (runp, struct pthread, list), map); - lll_unlock (stack_cache_lock, LLL_PRIVATE); -} - - -void -attribute_hidden -__wait_lookup_done (void) -{ - lll_lock (stack_cache_lock, LLL_PRIVATE); - - struct pthread *self = THREAD_SELF; - - /* Iterate over the list with system-allocated threads first. */ - list_t *runp; - list_for_each (runp, &stack_used) - { - struct pthread *t = list_entry (runp, struct pthread, list); - if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED) - continue; - - int *const gscope_flagp = &t->header.gscope_flag; - - /* We have to wait until this thread is done with the global - scope. First tell the thread that we are waiting and - possibly have to be woken. */ - if (atomic_compare_and_exchange_bool_acq (gscope_flagp, - THREAD_GSCOPE_FLAG_WAIT, - THREAD_GSCOPE_FLAG_USED)) - continue; - - do - futex_wait_simple ((unsigned int *) gscope_flagp, - THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE); - while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT); - } - - /* Now the list with threads using user-allocated stacks. */ - list_for_each (runp, &__stack_user) - { - struct pthread *t = list_entry (runp, struct pthread, list); - if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED) - continue; - - int *const gscope_flagp = &t->header.gscope_flag; - - /* We have to wait until this thread is done with the global - scope. First tell the thread that we are waiting and - possibly have to be woken. */ - if (atomic_compare_and_exchange_bool_acq (gscope_flagp, - THREAD_GSCOPE_FLAG_WAIT, - THREAD_GSCOPE_FLAG_USED)) - continue; - - do - futex_wait_simple ((unsigned int *) gscope_flagp, - THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE); - while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT); - } - - lll_unlock (stack_cache_lock, LLL_PRIVATE); + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); } diff --git a/nptl/descr.h b/nptl/descr.h index d8343ff9a1..b172ee408b 100644 --- a/nptl/descr.h +++ b/nptl/descr.h @@ -162,7 +162,8 @@ struct pthread void *__padding[24]; }; - /* This descriptor's link on the `stack_used' or `__stack_user' list. */ + /* This descriptor's link on the GL (dl_stack_used) or + GL (dl_stack_user) list. */ list_t list; /* Thread ID - which is also a 'is this thread descriptor (and diff --git a/nptl/nptl-init.c b/nptl/nptl-init.c index 4aa1231bec..53b817715d 100644 --- a/nptl/nptl-init.c +++ b/nptl/nptl-init.c @@ -251,12 +251,9 @@ __pthread_initialize_minimal_internal (void) purposes this is good enough. */ THREAD_SETMEM (pd, stackblock_size, (size_t) __libc_stack_end); - /* Initialize the list of all running threads with the main thread. */ - INIT_LIST_HEAD (&__stack_user); - list_add (&pd->list, &__stack_user); - - /* Before initializing __stack_user, the debugger could not find us and - had to set __nptl_initial_report_events. Propagate its setting. */ + /* Before initializing GL (dl_stack_user), the debugger could not + find us and had to set __nptl_initial_report_events. Propagate + its setting. */ THREAD_SETMEM (pd, report_events, __nptl_initial_report_events); struct sigaction sa; @@ -336,8 +333,6 @@ __pthread_initialize_minimal_internal (void) GL(dl_init_static_tls) = &__pthread_init_static_tls; - GL(dl_wait_lookup_done) = &__wait_lookup_done; - /* Register the fork generation counter with the libc. */ #ifndef TLS_MULTIPLE_THREADS_IN_TCB __libc_multiple_threads_ptr = diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h index 686f54aa93..a7510f9f63 100644 --- a/nptl/pthreadP.h +++ b/nptl/pthreadP.h @@ -208,10 +208,6 @@ extern void __default_pthread_attr_freeres (void) attribute_hidden; extern size_t __static_tls_size attribute_hidden; extern size_t __static_tls_align_m1 attribute_hidden; -/* Thread descriptor handling. */ -extern list_t __stack_user; -hidden_proto (__stack_user) - /* Attribute handling. */ extern struct pthread_attr *__attr_list attribute_hidden; extern int __attr_list_lock attribute_hidden; diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c index 447f005ece..bad4e57a84 100644 --- a/nptl/pthread_create.c +++ b/nptl/pthread_create.c @@ -213,9 +213,9 @@ __find_in_stack_list (struct pthread *pd) list_t *entry; struct pthread *result = NULL; - lll_lock (stack_cache_lock, LLL_PRIVATE); + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); - list_for_each (entry, &stack_used) + list_for_each (entry, &GL (dl_stack_used)) { struct pthread *curp; @@ -228,7 +228,7 @@ __find_in_stack_list (struct pthread *pd) } if (result == NULL) - list_for_each (entry, &__stack_user) + list_for_each (entry, &GL (dl_stack_user)) { struct pthread *curp; @@ -240,7 +240,7 @@ __find_in_stack_list (struct pthread *pd) } } - lll_unlock (stack_cache_lock, LLL_PRIVATE); + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); return result; } diff --git a/nptl_db/structs.def b/nptl_db/structs.def index 472e9008fa..2ca33598b1 100644 --- a/nptl_db/structs.def +++ b/nptl_db/structs.def @@ -69,8 +69,6 @@ DB_STRUCT (td_eventbuf_t) DB_STRUCT_FIELD (td_eventbuf_t, eventnum) DB_STRUCT_FIELD (td_eventbuf_t, eventdata) -DB_SYMBOL (stack_used) -DB_SYMBOL (__stack_user) DB_SYMBOL (nptl_version) DB_FUNCTION (__nptl_create_event) DB_FUNCTION (__nptl_death_event) @@ -106,6 +104,8 @@ DB_STRUCT (rtld_global) DB_RTLD_VARIABLE (_rtld_global) #endif DB_RTLD_GLOBAL_FIELD (dl_tls_dtv_slotinfo_list) +DB_RTLD_GLOBAL_FIELD (dl_stack_user) +DB_RTLD_GLOBAL_FIELD (dl_stack_used) DB_STRUCT (dtv_slotinfo_list) DB_STRUCT_FIELD (dtv_slotinfo_list, len) diff --git a/nptl_db/td_init.c b/nptl_db/td_init.c index 86773dcb06..79a1f90312 100644 --- a/nptl_db/td_init.c +++ b/nptl_db/td_init.c @@ -29,3 +29,17 @@ td_init (void) LOG ("td_init"); return TD_OK; } + +bool +__td_ta_rtld_global (td_thragent_t *ta) +{ + if (ta->ta_addr__rtld_global == 0 + && td_mod_lookup (ta->ph, LD_SO, SYM__rtld_global, + &ta->ta_addr__rtld_global) != PS_OK) + { + ta->ta_addr__rtld_global = (void*)-1; + return false; + } + else + return ta->ta_addr__rtld_global != (void*)-1; +} diff --git a/nptl_db/td_ta_map_lwp2thr.c b/nptl_db/td_ta_map_lwp2thr.c index d04d4bec23..81ff48808c 100644 --- a/nptl_db/td_ta_map_lwp2thr.c +++ b/nptl_db/td_ta_map_lwp2thr.c @@ -187,7 +187,7 @@ td_ta_map_lwp2thr (const td_thragent_t *ta_arg, fake a special descriptor for the initial thread. */ psaddr_t list; - td_err_e err = DB_GET_SYMBOL (list, ta, __stack_user); + td_err_e err = __td_ta_stack_user (ta, &list); if (err != TD_OK) return err; diff --git a/nptl_db/td_ta_thr_iter.c b/nptl_db/td_ta_thr_iter.c index d59782bd6e..e406bcbd39 100644 --- a/nptl_db/td_ta_thr_iter.c +++ b/nptl_db/td_ta_thr_iter.c @@ -133,14 +133,14 @@ td_ta_thr_iter (const td_thragent_t *ta_arg, td_thr_iter_f *callback, have to iterate over both lists separately. We start with the list of threads with user-defined stacks. */ - err = DB_GET_SYMBOL (list, ta, __stack_user); + err = __td_ta_stack_user (ta, &list); if (err == TD_OK) err = iterate_thread_list (ta, callback, cbdata_p, state, ti_pri, list, true); /* And the threads with stacks allocated by the implementation. */ if (err == TD_OK) - err = DB_GET_SYMBOL (list, ta, stack_used); + err = __td_ta_stack_used (ta, &list); if (err == TD_OK) err = iterate_thread_list (ta, callback, cbdata_p, state, ti_pri, list, false); diff --git a/nptl_db/td_thr_tlsbase.c b/nptl_db/td_thr_tlsbase.c index 8198934112..eeba0a04d8 100644 --- a/nptl_db/td_thr_tlsbase.c +++ b/nptl_db/td_thr_tlsbase.c @@ -28,12 +28,7 @@ dtv_slotinfo_list (td_thragent_t *ta, td_err_e err; psaddr_t head; - if (ta->ta_addr__rtld_global == 0 - && td_mod_lookup (ta->ph, LD_SO, SYM__rtld_global, - &ta->ta_addr__rtld_global) != PS_OK) - ta->ta_addr__rtld_global = (void*)-1; - - if (ta->ta_addr__rtld_global != (void*)-1) + if (__td_ta_rtld_global (ta)) { err = DB_GET_FIELD (head, ta, ta->ta_addr__rtld_global, rtld_global, _dl_tls_dtv_slotinfo_list, 0); diff --git a/nptl_db/td_thr_validate.c b/nptl_db/td_thr_validate.c index c709c5e9be..d2f4107cf2 100644 --- a/nptl_db/td_thr_validate.c +++ b/nptl_db/td_thr_validate.c @@ -20,6 +20,41 @@ #include "thread_dbP.h" #include <stdbool.h> +td_err_e +__td_ta_stack_user (td_thragent_t *ta, psaddr_t *plist) +{ + if (__td_ta_rtld_global (ta)) + return DB_GET_FIELD_ADDRESS (*plist, ta, ta->ta_addr__rtld_global, + rtld_global, _dl_stack_user, 0); + else + { + if (ta->ta_addr__dl_stack_user == 0 + && td_mod_lookup (ta->ph, NULL, SYM__dl_stack_user, + &ta->ta_addr__dl_stack_user) != PS_OK) + return TD_ERR; + *plist = ta->ta_addr__dl_stack_user; + return TD_OK; + } +} + +td_err_e +__td_ta_stack_used (td_thragent_t *ta, psaddr_t *plist) +{ + + if (__td_ta_rtld_global (ta)) + return DB_GET_FIELD_ADDRESS (*plist, ta, ta->ta_addr__rtld_global, + rtld_global, _dl_stack_used, 0); + else + { + if (ta->ta_addr__dl_stack_used == 0 + && td_mod_lookup (ta->ph, NULL, SYM__dl_stack_used, + &ta->ta_addr__dl_stack_used) != PS_OK) + return TD_ERR; + *plist = ta->ta_addr__dl_stack_used; + return TD_OK; + } +} + static td_err_e check_thread_list (const td_thrhandle_t *th, psaddr_t head, bool *uninit) { @@ -62,7 +97,7 @@ td_thr_validate (const td_thrhandle_t *th) /* First check the list with threads using user allocated stacks. */ bool uninit = false; - err = DB_GET_SYMBOL (list, th->th_ta_p, __stack_user); + err = __td_ta_stack_user (th->th_ta_p, &list); if (err == TD_OK) err = check_thread_list (th, list, &uninit); @@ -70,7 +105,7 @@ td_thr_validate (const td_thrhandle_t *th) using implementation allocated stacks. */ if (err == TD_NOTHR) { - err = DB_GET_SYMBOL (list, th->th_ta_p, stack_used); + err = __td_ta_stack_used (th->th_ta_p, &list); if (err == TD_OK) err = check_thread_list (th, list, &uninit); diff --git a/nptl_db/thread_dbP.h b/nptl_db/thread_dbP.h index 8a666b8d4f..9574e68d70 100644 --- a/nptl_db/thread_dbP.h +++ b/nptl_db/thread_dbP.h @@ -269,4 +269,17 @@ extern td_err_e _td_check_sizeof (td_thragent_t *ta, uint32_t *sizep, extern td_err_e __td_ta_lookup_th_unique (const td_thragent_t *ta, lwpid_t lwpid, td_thrhandle_t *th); +/* Try to initialize TA->ta_addr__rtld_global. Return true on + success, false on failure (which may be cached). */ +bool __td_ta_rtld_global (td_thragent_t *ta) attribute_hidden; + +/* Obtain the address of the list_t fields _dl_stack_user and + _dl_stack_used in _rtld_global, or fall back to the global + variables of the same name (to support statically linked + programs). */ +td_err_e __td_ta_stack_user (td_thragent_t *ta, psaddr_t *plist) + attribute_hidden; +td_err_e __td_ta_stack_used (td_thragent_t *ta, psaddr_t *plist) + attribute_hidden; + #endif /* thread_dbP.h */ diff --git a/sysdeps/aarch64/nptl/tls.h b/sysdeps/aarch64/nptl/tls.h index e5627f777e..dbef4704f1 100644 --- a/sysdeps/aarch64/nptl/tls.h +++ b/sysdeps/aarch64/nptl/tls.h @@ -129,8 +129,6 @@ typedef struct atomic_write_barrier (); \ } \ while (0) -# define THREAD_GSCOPE_WAIT() \ - GL(dl_wait_lookup_done) () # endif /* __ASSEMBLER__ */ diff --git a/sysdeps/alpha/nptl/tls.h b/sysdeps/alpha/nptl/tls.h index 82549607fd..f3101f57c9 100644 --- a/sysdeps/alpha/nptl/tls.h +++ b/sysdeps/alpha/nptl/tls.h @@ -123,8 +123,6 @@ typedef struct atomic_write_barrier (); \ } \ while (0) -#define THREAD_GSCOPE_WAIT() \ - GL(dl_wait_lookup_done) () #else /* __ASSEMBLER__ */ # include <tcb-offsets.h> diff --git a/sysdeps/arc/nptl/tls.h b/sysdeps/arc/nptl/tls.h index 184b550ab5..cc5abb1931 100644 --- a/sysdeps/arc/nptl/tls.h +++ b/sysdeps/arc/nptl/tls.h @@ -131,8 +131,6 @@ typedef struct atomic_write_barrier (); \ } \ while (0) -#define THREAD_GSCOPE_WAIT() \ - GL(dl_wait_lookup_done) () #endif /* !__ASSEMBLER__ */ diff --git a/sysdeps/arm/nptl/tls.h b/sysdeps/arm/nptl/tls.h index 24a488ff37..91dd7498e4 100644 --- a/sysdeps/arm/nptl/tls.h +++ b/sysdeps/arm/nptl/tls.h @@ -120,8 +120,6 @@ typedef struct atomic_write_barrier (); \ } \ while (0) -#define THREAD_GSCOPE_WAIT() \ - GL(dl_wait_lookup_done) () #endif /* __ASSEMBLER__ */ diff --git a/sysdeps/csky/nptl/tls.h b/sysdeps/csky/nptl/tls.h index bcca9674a1..f308773d40 100644 --- a/sysdeps/csky/nptl/tls.h +++ b/sysdeps/csky/nptl/tls.h @@ -147,8 +147,6 @@ typedef struct atomic_write_barrier (); \ } \ while (0) -# define THREAD_GSCOPE_WAIT() \ - GL(dl_wait_lookup_done) () #endif /* __ASSEMBLER__ */ diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h index 382eeb9be0..b1da03cafe 100644 --- a/sysdeps/generic/ldsodefs.h +++ b/sysdeps/generic/ldsodefs.h @@ -38,6 +38,7 @@ #include <libc-lock.h> #include <hp-timing.h> #include <tls.h> +#include <list_t.h> __BEGIN_DECLS @@ -461,15 +462,22 @@ struct rtld_global EXTERN void (*_dl_init_static_tls) (struct link_map *); - EXTERN void (*_dl_wait_lookup_done) (void); - /* Scopes to free after next THREAD_GSCOPE_WAIT (). */ EXTERN struct dl_scope_free_list { size_t count; void *list[50]; } *_dl_scope_free_list; -#if !THREAD_GSCOPE_IN_TCB +#if THREAD_GSCOPE_IN_TCB + /* List of active thread stacks, with memory managed by glibc. */ + EXTERN list_t _dl_stack_used; + + /* List of thread stacks that were allocated by the application. */ + EXTERN list_t _dl_stack_user; + + /* Mutex protecting the stack lists. */ + EXTERN int _dl_stack_cache_lock; +#else EXTERN int _dl_thread_gscope_count; #endif #ifdef SHARED @@ -1252,6 +1260,11 @@ link_map_audit_state (struct link_map *l, size_t index) } #endif /* SHARED */ +#if THREAD_GSCOPE_IN_TCB +void __thread_gscope_wait (void) attribute_hidden; +# define THREAD_GSCOPE_WAIT() __thread_gscope_wait () +#endif + __END_DECLS #endif /* ldsodefs.h */ diff --git a/sysdeps/hppa/nptl/tls.h b/sysdeps/hppa/nptl/tls.h index 2315340735..f319cb42e2 100644 --- a/sysdeps/hppa/nptl/tls.h +++ b/sysdeps/hppa/nptl/tls.h @@ -154,8 +154,6 @@ static inline void __set_cr27(struct pthread *cr27) atomic_write_barrier (); \ } \ while (0) -#define THREAD_GSCOPE_WAIT() \ - GL(dl_wait_lookup_done) () #endif /* !__ASSEMBLER__ */ diff --git a/sysdeps/i386/nptl/tls.h b/sysdeps/i386/nptl/tls.h index 5042d52b98..b74347bacd 100644 --- a/sysdeps/i386/nptl/tls.h +++ b/sysdeps/i386/nptl/tls.h @@ -387,8 +387,6 @@ tls_fill_user_desc (union user_desc_init *desc, while (0) #define THREAD_GSCOPE_SET_FLAG() \ THREAD_SETMEM (THREAD_SELF, header.gscope_flag, THREAD_GSCOPE_FLAG_USED) -#define THREAD_GSCOPE_WAIT() \ - GL(dl_wait_lookup_done) () #endif /* __ASSEMBLER__ */ diff --git a/sysdeps/ia64/nptl/tls.h b/sysdeps/ia64/nptl/tls.h index f5b1684ef3..42b082dad6 100644 --- a/sysdeps/ia64/nptl/tls.h +++ b/sysdeps/ia64/nptl/tls.h @@ -175,8 +175,6 @@ register struct pthread *__thread_self __asm__("r13"); atomic_write_barrier (); \ } \ while (0) -#define THREAD_GSCOPE_WAIT() \ - GL(dl_wait_lookup_done) () #endif /* __ASSEMBLER__ */ diff --git a/sysdeps/m68k/nptl/tls.h b/sysdeps/m68k/nptl/tls.h index 68ea952e79..69e174484e 100644 --- a/sysdeps/m68k/nptl/tls.h +++ b/sysdeps/m68k/nptl/tls.h @@ -153,8 +153,6 @@ extern void * __m68k_read_tp (void); atomic_write_barrier (); \ } \ while (0) -#define THREAD_GSCOPE_WAIT() \ - GL(dl_wait_lookup_done) () #endif /* __ASSEMBLER__ */ diff --git a/sysdeps/microblaze/nptl/tls.h b/sysdeps/microblaze/nptl/tls.h index a094fdb798..78f63019dc 100644 --- a/sysdeps/microblaze/nptl/tls.h +++ b/sysdeps/microblaze/nptl/tls.h @@ -136,8 +136,6 @@ typedef struct atomic_write_barrier (); \ } \ while (0) -# define THREAD_GSCOPE_WAIT() \ - GL (dl_wait_lookup_done) () #endif /* __ASSEMBLER__ */ diff --git a/sysdeps/mips/nptl/tls.h b/sysdeps/mips/nptl/tls.h index 8b55f19c37..7e7ac43d6c 100644 --- a/sysdeps/mips/nptl/tls.h +++ b/sysdeps/mips/nptl/tls.h @@ -178,8 +178,6 @@ typedef struct atomic_write_barrier (); \ } \ while (0) -#define THREAD_GSCOPE_WAIT() \ - GL(dl_wait_lookup_done) () #endif /* __ASSEMBLER__ */ diff --git a/sysdeps/nios2/nptl/tls.h b/sysdeps/nios2/nptl/tls.h index facb27c761..776fe9bb2d 100644 --- a/sysdeps/nios2/nptl/tls.h +++ b/sysdeps/nios2/nptl/tls.h @@ -157,8 +157,6 @@ register struct pthread *__thread_self __asm__("r23"); atomic_write_barrier (); \ } \ while (0) -#define THREAD_GSCOPE_WAIT() \ - GL(dl_wait_lookup_done) () #endif /* __ASSEMBLER__ */ diff --git a/sysdeps/nptl/dl-thread_gscope_wait.c b/sysdeps/nptl/dl-thread_gscope_wait.c new file mode 100644 index 0000000000..a9bfbee24c --- /dev/null +++ b/sysdeps/nptl/dl-thread_gscope_wait.c @@ -0,0 +1,80 @@ +/* Out-of-line notification function for the GSCOPE locking mechanism. + Copyright (C) 2007-2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <nptl/descr.h> +#include <futex-internal.h> +#include <ldsodefs.h> +#include <list.h> +#include <lowlevellock.h> + +void +__thread_gscope_wait (void) +{ + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); + + struct pthread *self = THREAD_SELF; + + /* Iterate over the list with system-allocated threads first. */ + list_t *runp; + list_for_each (runp, &GL (dl_stack_used)) + { + struct pthread *t = list_entry (runp, struct pthread, list); + if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED) + continue; + + int *const gscope_flagp = &t->header.gscope_flag; + + /* We have to wait until this thread is done with the global + scope. First tell the thread that we are waiting and + possibly have to be woken. */ + if (atomic_compare_and_exchange_bool_acq (gscope_flagp, + THREAD_GSCOPE_FLAG_WAIT, + THREAD_GSCOPE_FLAG_USED)) + continue; + + do + futex_wait_simple ((unsigned int *) gscope_flagp, + THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE); + while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT); + } + + /* Now the list with threads using user-allocated stacks. */ + list_for_each (runp, &GL (dl_stack_user)) + { + struct pthread *t = list_entry (runp, struct pthread, list); + if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED) + continue; + + int *const gscope_flagp = &t->header.gscope_flag; + + /* We have to wait until this thread is done with the global + scope. First tell the thread that we are waiting and + possibly have to be woken. */ + if (atomic_compare_and_exchange_bool_acq (gscope_flagp, + THREAD_GSCOPE_FLAG_WAIT, + THREAD_GSCOPE_FLAG_USED)) + continue; + + do + futex_wait_simple ((unsigned int *) gscope_flagp, + THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE); + while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT); + } + + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); +} diff --git a/sysdeps/powerpc/nptl/tls.h b/sysdeps/powerpc/nptl/tls.h index b1f0b30fbb..261eecfd18 100644 --- a/sysdeps/powerpc/nptl/tls.h +++ b/sysdeps/powerpc/nptl/tls.h @@ -245,8 +245,6 @@ register void *__thread_register __asm__ ("r13"); atomic_write_barrier (); \ } \ while (0) -#define THREAD_GSCOPE_WAIT() \ - GL(dl_wait_lookup_done) () #endif /* __ASSEMBLER__ */ diff --git a/sysdeps/riscv/nptl/tls.h b/sysdeps/riscv/nptl/tls.h index a9167bc143..41d9db10cf 100644 --- a/sysdeps/riscv/nptl/tls.h +++ b/sysdeps/riscv/nptl/tls.h @@ -139,8 +139,6 @@ typedef struct atomic_write_barrier (); \ } \ while (0) -# define THREAD_GSCOPE_WAIT() \ - GL(dl_wait_lookup_done) () #endif /* __ASSEMBLER__ */ diff --git a/sysdeps/s390/nptl/tls.h b/sysdeps/s390/nptl/tls.h index 4a9b3570eb..7653109617 100644 --- a/sysdeps/s390/nptl/tls.h +++ b/sysdeps/s390/nptl/tls.h @@ -185,8 +185,6 @@ typedef struct atomic_write_barrier (); \ } \ while (0) -#define THREAD_GSCOPE_WAIT() \ - GL(dl_wait_lookup_done) () #endif /* __ASSEMBLER__ */ diff --git a/sysdeps/sh/nptl/tls.h b/sysdeps/sh/nptl/tls.h index 6ccad251f9..2d5e3731b2 100644 --- a/sysdeps/sh/nptl/tls.h +++ b/sysdeps/sh/nptl/tls.h @@ -161,8 +161,6 @@ typedef struct atomic_write_barrier (); \ } \ while (0) -#define THREAD_GSCOPE_WAIT() \ - GL(dl_wait_lookup_done) () #endif /* __ASSEMBLER__ */ diff --git a/sysdeps/sparc/nptl/tls.h b/sysdeps/sparc/nptl/tls.h index 18a548e86b..b08a156131 100644 --- a/sysdeps/sparc/nptl/tls.h +++ b/sysdeps/sparc/nptl/tls.h @@ -158,8 +158,6 @@ register struct pthread *__thread_self __asm__("%g7"); atomic_write_barrier (); \ } \ while (0) -#define THREAD_GSCOPE_WAIT() \ - GL(dl_wait_lookup_done) () #endif /* !ASSEMBLER */ diff --git a/sysdeps/x86_64/nptl/tls.h b/sysdeps/x86_64/nptl/tls.h index fbd7f9cb89..a08bf972de 100644 --- a/sysdeps/x86_64/nptl/tls.h +++ b/sysdeps/x86_64/nptl/tls.h @@ -332,8 +332,6 @@ _Static_assert (offsetof (tcbhead_t, __glibc_unused2) == 0x80, while (0) # define THREAD_GSCOPE_SET_FLAG() \ THREAD_SETMEM (THREAD_SELF, header.gscope_flag, THREAD_GSCOPE_FLAG_USED) -# define THREAD_GSCOPE_WAIT() \ - GL(dl_wait_lookup_done) () #endif /* __ASSEMBLER__ */