Message ID | 87lf6i5g7m.fsf@oldenburg.str.redhat.com |
---|---|
State | New |
Headers | show |
Series | nptl: Use out-of-line wake function in __libc_lock_unlock slow path | expand |
On 07/07/2021 11:52, Florian Weimer via Libc-alpha wrote: > This slightly reduces code size, as can be seen below. > __libc_lock_unlock is usually used along with __libc_lock_lock in > the same function. __libc_lock_lock already has an out-of-line > slow path, so this change should not introduce many additional > non-leaf functions. > > This change also fixes a link failure in 32-bit Arm thumb mode > because commit 1f9c804fbd699104adefbce9e56d2c8aa711b6b9 > ("nptl: Use internal low-level lock type for !IS_IN (libc)") > introduced __libc_do_syscall calls outside of libc. > > Before x86-64: > > text data bss dec hex filename > 1937748 20456 54896 2013100 1eb7ac libc.so.6 > 25601 856 12768 39225 9939 nss/libnss_db.so.2 > 40310 952 25144 66406 10366 nss/libnss_files.so.2 > > After x86-64: > text data bss dec hex filename > 1935312 20456 54896 2010664 1eae28 libc.so.6 > 25559 864 12768 39191 9917 nss/libnss_db.so.2 > 39764 960 25144 65868 1014c nss/libnss_files.so.2 > > Before i686: > > 2110961 11272 39144 2161377 20fae1 libc.so.6 > 27243 428 12652 40323 9d83 nss/libnss_db.so.2 > 43062 476 25028 68566 10bd6 nss/libnss_files.so.2 > > After i686: > > 2107347 11272 39144 2157763 20ecc3 libc.so.6 > 26929 432 12652 40013 9c4d nss/libnss_db.so.2 > 43132 480 25028 68640 10c20 nss/libnss_files.so.2 > I check a build for some -march variants (armv5te, armv6, armv6t2, armv7-a, armv7 -mthumb) and I saw no regression on armv7 and armv7 thumb. The patch looks ok in general, only one question below. > --- > nptl/Versions | 1 + > nptl/lowlevellock.c | 14 ++++++++++++++ > sysdeps/nptl/lowlevellock.h | 26 ++++++++++++++++++-------- > 3 files changed, 33 insertions(+), 8 deletions(-) > > diff --git a/nptl/Versions b/nptl/Versions > index 2a75f013f2..3221de89d1 100644 > --- a/nptl/Versions > +++ b/nptl/Versions > @@ -380,6 +380,7 @@ libc { > } > GLIBC_PRIVATE { > __libc_alloca_cutoff; > + __lll_lock_wake_private; > __lll_lock_wait_private; > __nptl_create_event; > __nptl_death_event; > diff --git a/nptl/lowlevellock.c b/nptl/lowlevellock.c > index 2d077d8694..4f88178964 100644 > --- a/nptl/lowlevellock.c > +++ b/nptl/lowlevellock.c > @@ -52,6 +52,20 @@ __lll_lock_wait (int *futex, int private) > } > libc_hidden_def (__lll_lock_wait) > > +void > +__lll_lock_wake_private (int *futex) > +{ > + lll_futex_wake (futex, 1, LLL_PRIVATE); > +} > +libc_hidden_def (__lll_lock_wake_private) > + > +void > +__lll_lock_wake (int *futex, int private) > +{ > + lll_futex_wake (futex, 1, private); > +} > +libc_hidden_def (__lll_lock_wake) > + > #if ENABLE_ELISION_SUPPORT > int __pthread_force_elision __attribute__ ((nocommon)); > libc_hidden_data_def (__pthread_force_elision) Do we really need two functions for this? > diff --git a/sysdeps/nptl/lowlevellock.h b/sysdeps/nptl/lowlevellock.h > index be60c9ac28..4d95114ed3 100644 > --- a/sysdeps/nptl/lowlevellock.h > +++ b/sysdeps/nptl/lowlevellock.h > @@ -125,6 +125,11 @@ libc_hidden_proto (__lll_lock_wait) > #define lll_cond_lock(futex, private) __lll_cond_lock (&(futex), private) > > > +extern void __lll_lock_wake_private (int *futex); > +libc_hidden_proto (__lll_lock_wake_private) > +extern void __lll_lock_wake (int *futex, int private); > +libc_hidden_proto (__lll_lock_wake) > + > /* This is an expression rather than a statement even though its value is > void, so that it can be used in a comma expression or as an expression > that's cast to void. */ Ok. > @@ -137,14 +142,19 @@ libc_hidden_proto (__lll_lock_wait) > acquires the lock and when there will be no further lock acquisitions; > thus, we must not access the lock after releasing it, or those accesses > could be concurrent with mutex destruction or reuse of the memory. */ > -#define __lll_unlock(futex, private) \ > - ((void) \ > - ({ \ > - int *__futex = (futex); \ > - int __private = (private); \ > - int __oldval = atomic_exchange_rel (__futex, 0); \ > - if (__glibc_unlikely (__oldval > 1)) \ > - lll_futex_wake (__futex, 1, __private); \ > +#define __lll_unlock(futex, private) \ > + ((void) \ > + ({ \ > + int *__futex = (futex); \ > + int __private = (private); \ > + int __oldval = atomic_exchange_rel (__futex, 0); \ > + if (__glibc_unlikely (__oldval > 1)) \ > + { \ > + if (__builtin_constant_p (private) && (private) == LLL_PRIVATE) \ > + __lll_lock_wake_private (__futex); \ > + else \ > + __lll_lock_wake (__futex, __private); \ > + } \ > })) > #define lll_unlock(futex, private) \ > __lll_unlock (&(futex), private) >
* Adhemerval Zanella: >> diff --git a/nptl/lowlevellock.c b/nptl/lowlevellock.c >> index 2d077d8694..4f88178964 100644 >> --- a/nptl/lowlevellock.c >> +++ b/nptl/lowlevellock.c >> @@ -52,6 +52,20 @@ __lll_lock_wait (int *futex, int private) >> } >> libc_hidden_def (__lll_lock_wait) >> >> +void >> +__lll_lock_wake_private (int *futex) >> +{ >> + lll_futex_wake (futex, 1, LLL_PRIVATE); >> +} >> +libc_hidden_def (__lll_lock_wake_private) >> + >> +void >> +__lll_lock_wake (int *futex, int private) >> +{ >> + lll_futex_wake (futex, 1, private); >> +} >> +libc_hidden_def (__lll_lock_wake) >> + >> #if ENABLE_ELISION_SUPPORT >> int __pthread_force_elision __attribute__ ((nocommon)); >> libc_hidden_data_def (__pthread_force_elision) > > Do we really need two functions for this? It mirrors the wait side, which has two functions as well. We probably only need __lll_lock_wake_private these days, but I didn't want to start cleaning up the libc-lock implementation with its deep macro nesting. Thanks, Florian
On 08/07/2021 15:24, Florian Weimer wrote: > * Adhemerval Zanella: > >>> diff --git a/nptl/lowlevellock.c b/nptl/lowlevellock.c >>> index 2d077d8694..4f88178964 100644 >>> --- a/nptl/lowlevellock.c >>> +++ b/nptl/lowlevellock.c >>> @@ -52,6 +52,20 @@ __lll_lock_wait (int *futex, int private) >>> } >>> libc_hidden_def (__lll_lock_wait) >>> >>> +void >>> +__lll_lock_wake_private (int *futex) >>> +{ >>> + lll_futex_wake (futex, 1, LLL_PRIVATE); >>> +} >>> +libc_hidden_def (__lll_lock_wake_private) >>> + >>> +void >>> +__lll_lock_wake (int *futex, int private) >>> +{ >>> + lll_futex_wake (futex, 1, private); >>> +} >>> +libc_hidden_def (__lll_lock_wake) >>> + >>> #if ENABLE_ELISION_SUPPORT >>> int __pthread_force_elision __attribute__ ((nocommon)); >>> libc_hidden_data_def (__pthread_force_elision) >> >> Do we really need two functions for this? > > It mirrors the wait side, which has two functions as well. We probably > only need __lll_lock_wake_private these days, but I didn't want to start > cleaning up the libc-lock implementation with its deep macro nesting. Fair enough. Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
diff --git a/nptl/Versions b/nptl/Versions index 2a75f013f2..3221de89d1 100644 --- a/nptl/Versions +++ b/nptl/Versions @@ -380,6 +380,7 @@ libc { } GLIBC_PRIVATE { __libc_alloca_cutoff; + __lll_lock_wake_private; __lll_lock_wait_private; __nptl_create_event; __nptl_death_event; diff --git a/nptl/lowlevellock.c b/nptl/lowlevellock.c index 2d077d8694..4f88178964 100644 --- a/nptl/lowlevellock.c +++ b/nptl/lowlevellock.c @@ -52,6 +52,20 @@ __lll_lock_wait (int *futex, int private) } libc_hidden_def (__lll_lock_wait) +void +__lll_lock_wake_private (int *futex) +{ + lll_futex_wake (futex, 1, LLL_PRIVATE); +} +libc_hidden_def (__lll_lock_wake_private) + +void +__lll_lock_wake (int *futex, int private) +{ + lll_futex_wake (futex, 1, private); +} +libc_hidden_def (__lll_lock_wake) + #if ENABLE_ELISION_SUPPORT int __pthread_force_elision __attribute__ ((nocommon)); libc_hidden_data_def (__pthread_force_elision) diff --git a/sysdeps/nptl/lowlevellock.h b/sysdeps/nptl/lowlevellock.h index be60c9ac28..4d95114ed3 100644 --- a/sysdeps/nptl/lowlevellock.h +++ b/sysdeps/nptl/lowlevellock.h @@ -125,6 +125,11 @@ libc_hidden_proto (__lll_lock_wait) #define lll_cond_lock(futex, private) __lll_cond_lock (&(futex), private) +extern void __lll_lock_wake_private (int *futex); +libc_hidden_proto (__lll_lock_wake_private) +extern void __lll_lock_wake (int *futex, int private); +libc_hidden_proto (__lll_lock_wake) + /* This is an expression rather than a statement even though its value is void, so that it can be used in a comma expression or as an expression that's cast to void. */ @@ -137,14 +142,19 @@ libc_hidden_proto (__lll_lock_wait) acquires the lock and when there will be no further lock acquisitions; thus, we must not access the lock after releasing it, or those accesses could be concurrent with mutex destruction or reuse of the memory. */ -#define __lll_unlock(futex, private) \ - ((void) \ - ({ \ - int *__futex = (futex); \ - int __private = (private); \ - int __oldval = atomic_exchange_rel (__futex, 0); \ - if (__glibc_unlikely (__oldval > 1)) \ - lll_futex_wake (__futex, 1, __private); \ +#define __lll_unlock(futex, private) \ + ((void) \ + ({ \ + int *__futex = (futex); \ + int __private = (private); \ + int __oldval = atomic_exchange_rel (__futex, 0); \ + if (__glibc_unlikely (__oldval > 1)) \ + { \ + if (__builtin_constant_p (private) && (private) == LLL_PRIVATE) \ + __lll_lock_wake_private (__futex); \ + else \ + __lll_lock_wake (__futex, __private); \ + } \ })) #define lll_unlock(futex, private) \ __lll_unlock (&(futex), private)