Message ID | 782ad0d9a371fa66bd54df07413f3d15fba0cf5a.1615914632.git.fweimer@redhat.com |
---|---|
State | New |
Headers | show |
Series | libpthread removal: NPTL forwarders are gone | expand |
On 16/03/2021 14:31, Florian Weimer via Libc-alpha wrote: > This is optimization is similar in spirit to the SINGLE_THREAD_P check > in the malloc implementation. Doing this in generic code allows us > to prioritize those cases which are likely to occur in single-threaded > programs (normal and recursive mutexes). At first I though about suggesting to move it on generic lowlevellock.h, but it is used quite only in some specific places and once libpthread is moved we can use libc_lock_lock (which uses pthread_mutex) instead. The only issue with the approach of moving this optimization to pthread_mutex_lock (instead of custom lock as lll_lock) is to use a slight more memory due pthread_mutex_t object, but I think the sparse usage within glibc should be ok. It would be good if we disentangle and consolidate both lowlevellock.h and libc-lock.h so we have only header and definitions. LGTM, thanks. Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> > --- > nptl/pthread_mutex_cond_lock.c | 1 + > nptl/pthread_mutex_lock.c | 25 ++++++++++++++++++++++--- > nptl/pthread_mutex_unlock.c | 17 ++++++++++++++++- > 3 files changed, 39 insertions(+), 4 deletions(-) > > diff --git a/nptl/pthread_mutex_cond_lock.c b/nptl/pthread_mutex_cond_lock.c > index 2f0771302f..3386bd689b 100644 > --- a/nptl/pthread_mutex_cond_lock.c > +++ b/nptl/pthread_mutex_cond_lock.c > @@ -2,6 +2,7 @@ > > #define LLL_MUTEX_LOCK(mutex) \ > lll_cond_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex)) > +#define LLL_MUTEX_LOCK_OPTIMIZED(mutex) LLL_MUTEX_LOCK (mutex) > > /* Not actually elided so far. Needed? */ > #define LLL_MUTEX_LOCK_ELISION(mutex) \ > diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c > index f0de7b7fd6..8649a92ffb 100644 > --- a/nptl/pthread_mutex_lock.c > +++ b/nptl/pthread_mutex_lock.c > @@ -30,8 +30,27 @@ > /* Some of the following definitions differ when pthread_mutex_cond_lock.c > includes this file. */ > #ifndef LLL_MUTEX_LOCK > -# define LLL_MUTEX_LOCK(mutex) \ > +/* lll_lock with single-thread optimization. */ > +static inline void > +lll_mutex_lock_optimized (pthread_mutex_t *mutex) > +{ > + /* The single-threaded optimization is only valid for private > + mutexes. For process-shared mutexes, the mutex could be in a > + shared mapping, so synchronization with another process is needed > + even without any threads. If the lock is already marked as > + acquired, POSIX requires that pthread_mutex_lock deadlocks for > + normal mutexes, so skip the optimization in that case as > + well. */ > + int private = PTHREAD_MUTEX_PSHARED (mutex); > + if (private == LLL_PRIVATE && SINGLE_THREAD_P && mutex->__data.__lock == 0) > + mutex->__data.__lock = 1; > + else > + lll_lock (mutex->__data.__lock, private); > +} > + > +# define LLL_MUTEX_LOCK(mutex) \ > lll_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex)) Ok. > +# define LLL_MUTEX_LOCK_OPTIMIZED(mutex) lll_mutex_lock_optimized (mutex) > # define LLL_MUTEX_TRYLOCK(mutex) \ > lll_trylock ((mutex)->__data.__lock) > # define LLL_ROBUST_MUTEX_LOCK_MODIFIER 0 > @@ -64,7 +83,7 @@ __pthread_mutex_lock (pthread_mutex_t *mutex) > FORCE_ELISION (mutex, goto elision); > simple: > /* Normal mutex. */ > - LLL_MUTEX_LOCK (mutex); > + LLL_MUTEX_LOCK_OPTIMIZED (mutex); > assert (mutex->__data.__owner == 0); > } > #if ENABLE_ELISION_SUPPORT Ok. > @@ -99,7 +118,7 @@ __pthread_mutex_lock (pthread_mutex_t *mutex) > } > > /* We have to get the mutex. */ > - LLL_MUTEX_LOCK (mutex); > + LLL_MUTEX_LOCK_OPTIMIZED (mutex); > > assert (mutex->__data.__owner == 0); > mutex->__data.__count = 1; Ok. > diff --git a/nptl/pthread_mutex_unlock.c b/nptl/pthread_mutex_unlock.c > index 3b5ccdacf9..655093ee9a 100644 > --- a/nptl/pthread_mutex_unlock.c > +++ b/nptl/pthread_mutex_unlock.c > @@ -28,6 +28,21 @@ static int > __pthread_mutex_unlock_full (pthread_mutex_t *mutex, int decr) > __attribute_noinline__; > > +/* lll_lock with single-thread optimization. */ > +static inline void > +lll_mutex_unlock_optimized (pthread_mutex_t *mutex) > +{ > + /* The single-threaded optimization is only valid for private > + mutexes. For process-shared mutexes, the mutex could be in a > + shared mapping, so synchronization with another process is needed > + even without any threads. */ > + int private = PTHREAD_MUTEX_PSHARED (mutex); > + if (private == LLL_PRIVATE && SINGLE_THREAD_P) > + mutex->__data.__lock = 0; > + else > + lll_unlock (mutex->__data.__lock, private); > +} > + > int > attribute_hidden > __pthread_mutex_unlock_usercnt (pthread_mutex_t *mutex, int decr) > @@ -51,7 +66,7 @@ __pthread_mutex_unlock_usercnt (pthread_mutex_t *mutex, int decr) > --mutex->__data.__nusers; > > /* Unlock. */ > - lll_unlock (mutex->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex)); > + lll_mutex_unlock_optimized (mutex); > > LIBC_PROBE (mutex_release, 1, mutex); > > Ok.
diff --git a/nptl/pthread_mutex_cond_lock.c b/nptl/pthread_mutex_cond_lock.c index 2f0771302f..3386bd689b 100644 --- a/nptl/pthread_mutex_cond_lock.c +++ b/nptl/pthread_mutex_cond_lock.c @@ -2,6 +2,7 @@ #define LLL_MUTEX_LOCK(mutex) \ lll_cond_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex)) +#define LLL_MUTEX_LOCK_OPTIMIZED(mutex) LLL_MUTEX_LOCK (mutex) /* Not actually elided so far. Needed? */ #define LLL_MUTEX_LOCK_ELISION(mutex) \ diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c index f0de7b7fd6..8649a92ffb 100644 --- a/nptl/pthread_mutex_lock.c +++ b/nptl/pthread_mutex_lock.c @@ -30,8 +30,27 @@ /* Some of the following definitions differ when pthread_mutex_cond_lock.c includes this file. */ #ifndef LLL_MUTEX_LOCK -# define LLL_MUTEX_LOCK(mutex) \ +/* lll_lock with single-thread optimization. */ +static inline void +lll_mutex_lock_optimized (pthread_mutex_t *mutex) +{ + /* The single-threaded optimization is only valid for private + mutexes. For process-shared mutexes, the mutex could be in a + shared mapping, so synchronization with another process is needed + even without any threads. If the lock is already marked as + acquired, POSIX requires that pthread_mutex_lock deadlocks for + normal mutexes, so skip the optimization in that case as + well. */ + int private = PTHREAD_MUTEX_PSHARED (mutex); + if (private == LLL_PRIVATE && SINGLE_THREAD_P && mutex->__data.__lock == 0) + mutex->__data.__lock = 1; + else + lll_lock (mutex->__data.__lock, private); +} + +# define LLL_MUTEX_LOCK(mutex) \ lll_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex)) +# define LLL_MUTEX_LOCK_OPTIMIZED(mutex) lll_mutex_lock_optimized (mutex) # define LLL_MUTEX_TRYLOCK(mutex) \ lll_trylock ((mutex)->__data.__lock) # define LLL_ROBUST_MUTEX_LOCK_MODIFIER 0 @@ -64,7 +83,7 @@ __pthread_mutex_lock (pthread_mutex_t *mutex) FORCE_ELISION (mutex, goto elision); simple: /* Normal mutex. */ - LLL_MUTEX_LOCK (mutex); + LLL_MUTEX_LOCK_OPTIMIZED (mutex); assert (mutex->__data.__owner == 0); } #if ENABLE_ELISION_SUPPORT @@ -99,7 +118,7 @@ __pthread_mutex_lock (pthread_mutex_t *mutex) } /* We have to get the mutex. */ - LLL_MUTEX_LOCK (mutex); + LLL_MUTEX_LOCK_OPTIMIZED (mutex); assert (mutex->__data.__owner == 0); mutex->__data.__count = 1; diff --git a/nptl/pthread_mutex_unlock.c b/nptl/pthread_mutex_unlock.c index 3b5ccdacf9..655093ee9a 100644 --- a/nptl/pthread_mutex_unlock.c +++ b/nptl/pthread_mutex_unlock.c @@ -28,6 +28,21 @@ static int __pthread_mutex_unlock_full (pthread_mutex_t *mutex, int decr) __attribute_noinline__; +/* lll_lock with single-thread optimization. */ +static inline void +lll_mutex_unlock_optimized (pthread_mutex_t *mutex) +{ + /* The single-threaded optimization is only valid for private + mutexes. For process-shared mutexes, the mutex could be in a + shared mapping, so synchronization with another process is needed + even without any threads. */ + int private = PTHREAD_MUTEX_PSHARED (mutex); + if (private == LLL_PRIVATE && SINGLE_THREAD_P) + mutex->__data.__lock = 0; + else + lll_unlock (mutex->__data.__lock, private); +} + int attribute_hidden __pthread_mutex_unlock_usercnt (pthread_mutex_t *mutex, int decr) @@ -51,7 +66,7 @@ __pthread_mutex_unlock_usercnt (pthread_mutex_t *mutex, int decr) --mutex->__data.__nusers; /* Unlock. */ - lll_unlock (mutex->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex)); + lll_mutex_unlock_optimized (mutex); LIBC_PROBE (mutex_release, 1, mutex);