Message ID | 20210922075042.20132-1-szabolcs.nagy@arm.com |
---|---|
State | New |
Headers | show |
Series | [1/2] elf: Avoid deadlock between pthread_create and ctors [BZ #28357] | expand |
On 22/09/2021 04:50, Szabolcs Nagy via Libc-alpha wrote: > The fix for bug 19329 caused a regression such that pthread_create can > deadlock when concurrent ctors from dlopen are waiting for it to finish. > Use a new GL(dl_load_tls_lock) in pthread_create that is not taken > around ctors in dlopen. > > The new lock is also used in __tls_get_addr instead of GL(dl_load_lock). > > The new lock is held in _dl_open_worker and _dl_close_worker around > most of the logic before/after the init/fini routines. When init/fini Double space after period. > routines are running then TLS is in a consistent, usable state. > In _dl_open_worker the new lock requires catching and reraising dlopen > failures that happen in the critical section. I think this make sense. > > The new lock is reinitialized in a fork child, to keep the existing > behaviour and it is kept recursive in case malloc interposition or tls > access from signal handlers can retake it. It is not obvious if this > is necessary or helps, but avoids changing the preexisting behaviour. I am not sure about it either, but the malloc tls usage is also a can of worms so I think it should be ok . > > The new lock may be more appropriate for dl_iterate_phdr too than > GL(dl_load_write_lock), since TLS state of an incompletely loaded > module may be accessed. If the new lock can replace the old one, > that can be a separate change. > > Fixes bug 28357. LGTM, but I the testcase should be merge with this patch. Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> > --- > elf/dl-close.c | 6 ++++++ > elf/dl-open.c | 35 ++++++++++++++++++++++++++++++++++- > elf/dl-support.c | 7 +++++++ > elf/dl-tls.c | 16 ++++++++-------- > elf/rtld.c | 1 + > posix/fork.c | 3 +++ > sysdeps/generic/ldsodefs.h | 9 ++++++++- > 7 files changed, 67 insertions(+), 10 deletions(-) > > diff --git a/elf/dl-close.c b/elf/dl-close.c > index 93ff5c96e9..cfe0f1c0c9 100644 > --- a/elf/dl-close.c > +++ b/elf/dl-close.c > @@ -549,6 +549,9 @@ _dl_close_worker (struct link_map *map, bool force) > size_t tls_free_end; > tls_free_start = tls_free_end = NO_TLS_OFFSET; > > + /* Protects global and module specitic TLS state. */ > + __rtld_lock_lock_recursive (GL(dl_load_tls_lock)); > + > /* We modify the list of loaded objects. */ > __rtld_lock_lock_recursive (GL(dl_load_write_lock)); > > @@ -784,6 +787,9 @@ _dl_close_worker (struct link_map *map, bool force) > GL(dl_tls_static_used) = tls_free_start; > } > > + /* TLS is cleaned up for the unloaded modules. */ > + __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); > + > #ifdef SHARED > /* Auditing checkpoint: we have deleted all objects. */ > if (__glibc_unlikely (do_audit)) Ok. > diff --git a/elf/dl-open.c b/elf/dl-open.c > index a25443f6d1..071b678bf6 100644 > --- a/elf/dl-open.c > +++ b/elf/dl-open.c > @@ -66,6 +66,9 @@ struct dl_open_args > libc_map value in the namespace in case of a dlopen failure. */ > bool libc_already_loaded; > > + /* Set to true if the end of dl_open_worker_begin was reached. */ > + bool worker_continue; > + > /* Original parameters to the program and the current environment. */ > int argc; > char **argv; Ok. > @@ -482,7 +485,7 @@ call_dl_init (void *closure) > } > > static void > -dl_open_worker (void *a) > +dl_open_worker_begin (void *a) > { > struct dl_open_args *args = a; > const char *file = args->file; > @@ -774,6 +777,36 @@ dl_open_worker (void *a) > _dl_call_libc_early_init (libc_map, false); > } > > + args->worker_continue = true; > +} > + > +static void > +dl_open_worker (void *a) > +{ > + struct dl_open_args *args = a; > + > + args->worker_continue = false; > + > + { > + /* Protects global and module specific TLS state. */ > + __rtld_lock_lock_recursive (GL(dl_load_tls_lock)); > + > + struct dl_exception ex; > + int err = _dl_catch_exception (&ex, dl_open_worker_begin, args); > + > + __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); > + > + if (__glibc_unlikely (ex.errstring != NULL)) > + /* Reraise the error. */ > + _dl_signal_exception (err, &ex, NULL); > + } > + > + if (!args->worker_continue) > + return; > + > + int mode = args->mode; > + struct link_map *new = args->map; > + > /* Run the initializer functions of new objects. Temporarily > disable the exception handler, so that lazy binding failures are > fatal. */ Ok. > diff --git a/elf/dl-support.c b/elf/dl-support.c > index 02e2ed72f5..2a2d42a09f 100644 > --- a/elf/dl-support.c > +++ b/elf/dl-support.c > @@ -228,6 +228,13 @@ __rtld_lock_define_initialized_recursive (, _dl_load_lock) > list of loaded objects while an object is added to or removed from > that list. */ > __rtld_lock_define_initialized_recursive (, _dl_load_write_lock) > + /* This lock protects global and module specific TLS related data. > + E.g it is held in dlopen and dlclose when GL(dl_tls_generation), I think 'E.g' requires an extra comma 'E.g.'. > + GL(dl_tls_max_dtv_idx) or GL(dl_tls_dtv_slotinfo_list) are > + accessed and when TLS related relocations are processed for > + a module. It was introduced to keep pthread_create accessing > + TLS state that is being set up. */ > +__rtld_lock_define_initialized_recursive (, _dl_load_tls_lock) > > > #ifdef HAVE_AUX_VECTOR Ok. > diff --git a/elf/dl-tls.c b/elf/dl-tls.c > index d554ae4497..9260d2d696 100644 > --- a/elf/dl-tls.c > +++ b/elf/dl-tls.c > @@ -532,7 +532,7 @@ _dl_allocate_tls_init (void *result) > size_t maxgen = 0; > > /* Protects global dynamic TLS related state. */ > - __rtld_lock_lock_recursive (GL(dl_load_lock)); > + __rtld_lock_lock_recursive (GL(dl_load_tls_lock)); > > /* Check if the current dtv is big enough. */ > if (dtv[-1].counter < GL(dl_tls_max_dtv_idx)) Ok. > @@ -606,7 +606,7 @@ _dl_allocate_tls_init (void *result) > listp = listp->next; > assert (listp != NULL); > } > - __rtld_lock_unlock_recursive (GL(dl_load_lock)); > + __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); > > /* The DTV version is up-to-date now. */ > dtv[0].counter = maxgen; Ok. > @@ -745,7 +745,7 @@ _dl_update_slotinfo (unsigned long int req_modid) > > Here the dtv needs to be updated to new_gen generation count. > > - This code may be called during TLS access when GL(dl_load_lock) > + This code may be called during TLS access when GL(dl_load_tls_lock) > is not held. In that case the user code has to synchronize with > dlopen and dlclose calls of relevant modules. A module m is > relevant if the generation of m <= new_gen and dlclose of m is Ok. > @@ -867,11 +867,11 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map) > if (__glibc_unlikely (the_map->l_tls_offset > != FORCED_DYNAMIC_TLS_OFFSET)) > { > - __rtld_lock_lock_recursive (GL(dl_load_lock)); > + __rtld_lock_lock_recursive (GL(dl_load_tls_lock)); > if (__glibc_likely (the_map->l_tls_offset == NO_TLS_OFFSET)) > { > the_map->l_tls_offset = FORCED_DYNAMIC_TLS_OFFSET; > - __rtld_lock_unlock_recursive (GL(dl_load_lock)); > + __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); > } > else if (__glibc_likely (the_map->l_tls_offset > != FORCED_DYNAMIC_TLS_OFFSET)) Ok. > @@ -883,7 +883,7 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map) > #else > # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" > #endif > - __rtld_lock_unlock_recursive (GL(dl_load_lock)); > + __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); > > dtv[GET_ADDR_MODULE].pointer.to_free = NULL; > dtv[GET_ADDR_MODULE].pointer.val = p; Ok. > @@ -891,7 +891,7 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map) > return (char *) p + GET_ADDR_OFFSET; > } > else > - __rtld_lock_unlock_recursive (GL(dl_load_lock)); > + __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); > } > struct dtv_pointer result = allocate_and_init (the_map); > dtv[GET_ADDR_MODULE].pointer = result; Ok. > @@ -962,7 +962,7 @@ _dl_tls_get_addr_soft (struct link_map *l) > return NULL; > > dtv_t *dtv = THREAD_DTV (); > - /* This may be called without holding the GL(dl_load_lock). Reading > + /* This may be called without holding the GL(dl_load_tls_lock). Reading > arbitrary gen value is fine since this is best effort code. */ > size_t gen = atomic_load_relaxed (&GL(dl_tls_generation)); > if (__glibc_unlikely (dtv[0].counter != gen)) Ok. > diff --git a/elf/rtld.c b/elf/rtld.c > index 742c413c48..096d1cc172 100644 > --- a/elf/rtld.c > +++ b/elf/rtld.c > @@ -322,6 +322,7 @@ struct rtld_global _rtld_global = > #ifdef _LIBC_REENTRANT > ._dl_load_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER, > ._dl_load_write_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER, > + ._dl_load_tls_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER, > #endif > ._dl_nns = 1, > ._dl_ns = Ok. > diff --git a/posix/fork.c b/posix/fork.c > index c471f7b15f..021691b9b7 100644 > --- a/posix/fork.c > +++ b/posix/fork.c > @@ -99,6 +99,9 @@ __libc_fork (void) > /* Reset the lock the dynamic loader uses to protect its data. */ > __rtld_lock_initialize (GL(dl_load_lock)); > > + /* Reset the lock protecting dynamic TLS related data. */ > + __rtld_lock_initialize (GL(dl_load_tls_lock)); > + > reclaim_stacks (); > > /* Run the handlers registered for the child. */ Ok. > diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h > index 6e50fcd7cd..f946744e5d 100644 > --- a/sysdeps/generic/ldsodefs.h > +++ b/sysdeps/generic/ldsodefs.h > @@ -372,6 +372,13 @@ struct rtld_global > list of loaded objects while an object is added to or removed > from that list. */ > __rtld_lock_define_recursive (EXTERN, _dl_load_write_lock) > + /* This lock protects global and module specific TLS related data. > + E.g it is held in dlopen and dlclose when GL(dl_tls_generation), Same as before. > + GL(dl_tls_max_dtv_idx) or GL(dl_tls_dtv_slotinfo_list) are > + accessed and when TLS related relocations are processed for > + a module. It was introduced to keep pthread_create accessing > + TLS state that is being set up. */ > + __rtld_lock_define_recursive (EXTERN, _dl_load_tls_lock) > > /* Incremented whenever something may have been added to dl_loaded. */ > EXTERN unsigned long long _dl_load_adds; > @@ -1271,7 +1278,7 @@ extern int _dl_scope_free (void *) attribute_hidden; > > /* Add module to slot information data. If DO_ADD is false, only the > required memory is allocated. Must be called with GL > - (dl_load_lock) acquired. If the function has already been called > + (dl_load_tls_lock) acquired. If the function has already been called > for the link map L with !do_add, then this function will not raise > an exception, otherwise it is possible that it encounters a memory > allocation failure. */ > Ok.
diff --git a/elf/dl-close.c b/elf/dl-close.c index 93ff5c96e9..cfe0f1c0c9 100644 --- a/elf/dl-close.c +++ b/elf/dl-close.c @@ -549,6 +549,9 @@ _dl_close_worker (struct link_map *map, bool force) size_t tls_free_end; tls_free_start = tls_free_end = NO_TLS_OFFSET; + /* Protects global and module specitic TLS state. */ + __rtld_lock_lock_recursive (GL(dl_load_tls_lock)); + /* We modify the list of loaded objects. */ __rtld_lock_lock_recursive (GL(dl_load_write_lock)); @@ -784,6 +787,9 @@ _dl_close_worker (struct link_map *map, bool force) GL(dl_tls_static_used) = tls_free_start; } + /* TLS is cleaned up for the unloaded modules. */ + __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); + #ifdef SHARED /* Auditing checkpoint: we have deleted all objects. */ if (__glibc_unlikely (do_audit)) diff --git a/elf/dl-open.c b/elf/dl-open.c index a25443f6d1..071b678bf6 100644 --- a/elf/dl-open.c +++ b/elf/dl-open.c @@ -66,6 +66,9 @@ struct dl_open_args libc_map value in the namespace in case of a dlopen failure. */ bool libc_already_loaded; + /* Set to true if the end of dl_open_worker_begin was reached. */ + bool worker_continue; + /* Original parameters to the program and the current environment. */ int argc; char **argv; @@ -482,7 +485,7 @@ call_dl_init (void *closure) } static void -dl_open_worker (void *a) +dl_open_worker_begin (void *a) { struct dl_open_args *args = a; const char *file = args->file; @@ -774,6 +777,36 @@ dl_open_worker (void *a) _dl_call_libc_early_init (libc_map, false); } + args->worker_continue = true; +} + +static void +dl_open_worker (void *a) +{ + struct dl_open_args *args = a; + + args->worker_continue = false; + + { + /* Protects global and module specific TLS state. */ + __rtld_lock_lock_recursive (GL(dl_load_tls_lock)); + + struct dl_exception ex; + int err = _dl_catch_exception (&ex, dl_open_worker_begin, args); + + __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); + + if (__glibc_unlikely (ex.errstring != NULL)) + /* Reraise the error. */ + _dl_signal_exception (err, &ex, NULL); + } + + if (!args->worker_continue) + return; + + int mode = args->mode; + struct link_map *new = args->map; + /* Run the initializer functions of new objects. Temporarily disable the exception handler, so that lazy binding failures are fatal. */ diff --git a/elf/dl-support.c b/elf/dl-support.c index 02e2ed72f5..2a2d42a09f 100644 --- a/elf/dl-support.c +++ b/elf/dl-support.c @@ -228,6 +228,13 @@ __rtld_lock_define_initialized_recursive (, _dl_load_lock) list of loaded objects while an object is added to or removed from that list. */ __rtld_lock_define_initialized_recursive (, _dl_load_write_lock) + /* This lock protects global and module specific TLS related data. + E.g it is held in dlopen and dlclose when GL(dl_tls_generation), + GL(dl_tls_max_dtv_idx) or GL(dl_tls_dtv_slotinfo_list) are + accessed and when TLS related relocations are processed for + a module. It was introduced to keep pthread_create accessing + TLS state that is being set up. */ +__rtld_lock_define_initialized_recursive (, _dl_load_tls_lock) #ifdef HAVE_AUX_VECTOR diff --git a/elf/dl-tls.c b/elf/dl-tls.c index d554ae4497..9260d2d696 100644 --- a/elf/dl-tls.c +++ b/elf/dl-tls.c @@ -532,7 +532,7 @@ _dl_allocate_tls_init (void *result) size_t maxgen = 0; /* Protects global dynamic TLS related state. */ - __rtld_lock_lock_recursive (GL(dl_load_lock)); + __rtld_lock_lock_recursive (GL(dl_load_tls_lock)); /* Check if the current dtv is big enough. */ if (dtv[-1].counter < GL(dl_tls_max_dtv_idx)) @@ -606,7 +606,7 @@ _dl_allocate_tls_init (void *result) listp = listp->next; assert (listp != NULL); } - __rtld_lock_unlock_recursive (GL(dl_load_lock)); + __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); /* The DTV version is up-to-date now. */ dtv[0].counter = maxgen; @@ -745,7 +745,7 @@ _dl_update_slotinfo (unsigned long int req_modid) Here the dtv needs to be updated to new_gen generation count. - This code may be called during TLS access when GL(dl_load_lock) + This code may be called during TLS access when GL(dl_load_tls_lock) is not held. In that case the user code has to synchronize with dlopen and dlclose calls of relevant modules. A module m is relevant if the generation of m <= new_gen and dlclose of m is @@ -867,11 +867,11 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map) if (__glibc_unlikely (the_map->l_tls_offset != FORCED_DYNAMIC_TLS_OFFSET)) { - __rtld_lock_lock_recursive (GL(dl_load_lock)); + __rtld_lock_lock_recursive (GL(dl_load_tls_lock)); if (__glibc_likely (the_map->l_tls_offset == NO_TLS_OFFSET)) { the_map->l_tls_offset = FORCED_DYNAMIC_TLS_OFFSET; - __rtld_lock_unlock_recursive (GL(dl_load_lock)); + __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); } else if (__glibc_likely (the_map->l_tls_offset != FORCED_DYNAMIC_TLS_OFFSET)) @@ -883,7 +883,7 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map) #else # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" #endif - __rtld_lock_unlock_recursive (GL(dl_load_lock)); + __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); dtv[GET_ADDR_MODULE].pointer.to_free = NULL; dtv[GET_ADDR_MODULE].pointer.val = p; @@ -891,7 +891,7 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map) return (char *) p + GET_ADDR_OFFSET; } else - __rtld_lock_unlock_recursive (GL(dl_load_lock)); + __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); } struct dtv_pointer result = allocate_and_init (the_map); dtv[GET_ADDR_MODULE].pointer = result; @@ -962,7 +962,7 @@ _dl_tls_get_addr_soft (struct link_map *l) return NULL; dtv_t *dtv = THREAD_DTV (); - /* This may be called without holding the GL(dl_load_lock). Reading + /* This may be called without holding the GL(dl_load_tls_lock). Reading arbitrary gen value is fine since this is best effort code. */ size_t gen = atomic_load_relaxed (&GL(dl_tls_generation)); if (__glibc_unlikely (dtv[0].counter != gen)) diff --git a/elf/rtld.c b/elf/rtld.c index 742c413c48..096d1cc172 100644 --- a/elf/rtld.c +++ b/elf/rtld.c @@ -322,6 +322,7 @@ struct rtld_global _rtld_global = #ifdef _LIBC_REENTRANT ._dl_load_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER, ._dl_load_write_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER, + ._dl_load_tls_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER, #endif ._dl_nns = 1, ._dl_ns = diff --git a/posix/fork.c b/posix/fork.c index c471f7b15f..021691b9b7 100644 --- a/posix/fork.c +++ b/posix/fork.c @@ -99,6 +99,9 @@ __libc_fork (void) /* Reset the lock the dynamic loader uses to protect its data. */ __rtld_lock_initialize (GL(dl_load_lock)); + /* Reset the lock protecting dynamic TLS related data. */ + __rtld_lock_initialize (GL(dl_load_tls_lock)); + reclaim_stacks (); /* Run the handlers registered for the child. */ diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h index 6e50fcd7cd..f946744e5d 100644 --- a/sysdeps/generic/ldsodefs.h +++ b/sysdeps/generic/ldsodefs.h @@ -372,6 +372,13 @@ struct rtld_global list of loaded objects while an object is added to or removed from that list. */ __rtld_lock_define_recursive (EXTERN, _dl_load_write_lock) + /* This lock protects global and module specific TLS related data. + E.g it is held in dlopen and dlclose when GL(dl_tls_generation), + GL(dl_tls_max_dtv_idx) or GL(dl_tls_dtv_slotinfo_list) are + accessed and when TLS related relocations are processed for + a module. It was introduced to keep pthread_create accessing + TLS state that is being set up. */ + __rtld_lock_define_recursive (EXTERN, _dl_load_tls_lock) /* Incremented whenever something may have been added to dl_loaded. */ EXTERN unsigned long long _dl_load_adds; @@ -1271,7 +1278,7 @@ extern int _dl_scope_free (void *) attribute_hidden; /* Add module to slot information data. If DO_ADD is false, only the required memory is allocated. Must be called with GL - (dl_load_lock) acquired. If the function has already been called + (dl_load_tls_lock) acquired. If the function has already been called for the link map L with !do_add, then this function will not raise an exception, otherwise it is possible that it encounters a memory allocation failure. */