diff mbox series

[v1,2/4] nptl: Continue use arch prefered atomic exchange in spinlock loop

Message ID 20220929031452.2551219-2-goldstein.w.n@gmail.com
State New
Headers show
Series [v1,1/4] Benchtests: Add benchtests for pthread_spin_lock and mutex_trylock | expand

Commit Message

Noah Goldstein Sept. 29, 2022, 3:14 a.m. UTC
Despite using the preferred atomic exchange in the initial check, the
loop was unconditionally using CAS which is not desired on some
architectures (those that didn't set `ATOMIC_EXCHANGE_USES_CAS`).

No meaningful perf changes measured on broadwell but still seems like
a reasonable change.

Full check passes on x86-64.
---
 nptl/pthread_spin_lock.c | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)
diff mbox series

Patch

diff --git a/nptl/pthread_spin_lock.c b/nptl/pthread_spin_lock.c
index 19d1759f9a..1bdd6e2048 100644
--- a/nptl/pthread_spin_lock.c
+++ b/nptl/pthread_spin_lock.c
@@ -20,6 +20,20 @@ 
 #include "pthreadP.h"
 #include <shlib-compat.h>
 
+#if ATOMIC_EXCHANGE_USES_CAS
+/* Try to acquire the lock with a CAS instruction as this architecture
+   has no exchange instruction.  The acquisition succeeds if the lock is not
+   acquired.  */
+#  define pthread_spin_lock_grab_lock(mem, val, c) \
+    atomic_compare_exchange_weak_acquire (lock, &val, 1))
+#else
+/* Try to acquire the lock with an exchange instruction as this architecture
+   has such an instruction and we assume it is faster than a CAS.
+   The acquisition succeeds if the lock is not in an acquired state.  */
+#  define pthread_spin_lock_grab_lock(mem, val, c) \
+    (atomic_exchange_acquire (lock, 1) == 0)
+#endif
+
 int
 __pthread_spin_lock (pthread_spinlock_t *lock)
 {
@@ -36,19 +50,8 @@  __pthread_spin_lock (pthread_spinlock_t *lock)
      We use acquire MO to synchronize-with the release MO store in
      pthread_spin_unlock, and thus ensure that prior critical sections
      happen-before this critical section.  */
-#if ! ATOMIC_EXCHANGE_USES_CAS
-  /* Try to acquire the lock with an exchange instruction as this architecture
-     has such an instruction and we assume it is faster than a CAS.
-     The acquisition succeeds if the lock is not in an acquired state.  */
-  if (__glibc_likely (atomic_exchange_acquire (lock, 1) == 0))
+  if (__glibc_likely (pthread_spin_lock_grab_lock (lock, &val, 1)))
     return 0;
-#else
-  /* Try to acquire the lock with a CAS instruction as this architecture
-     has no exchange instruction.  The acquisition succeeds if the lock is not
-     acquired.  */
-  if (__glibc_likely (atomic_compare_exchange_weak_acquire (lock, &val, 1)))
-    return 0;
-#endif
 
   do
     {
@@ -75,7 +78,7 @@  __pthread_spin_lock (pthread_spinlock_t *lock)
       /* We need acquire memory order here for the same reason as mentioned
 	 for the first try to lock the spinlock.  */
     }
-  while (!atomic_compare_exchange_weak_acquire (lock, &val, 1));
+  while (!pthread_spin_lock_grab_lock (lock, &val, 1));
 
   return 0;
 }