Message ID | 55E89415.1020005@linux.vnet.ibm.com |
---|---|
State | New |
Headers | show |
"Paul E. Murphy" <murphyp@linux.vnet.ibm.com> writes: > With TLE enabled, the adapt count variable update incurs > an 8% overhead before entering the critical section of an > elided mutex. > > Instead, if it is done right after leaving the critical > section, this serialization can be avoided. > > This alters the existing behavior of __lll_trylock_elision > as it will only decrement the adapt_count if it successfully > acquires the lock. > > 2015-09-01 Paul E. Murphy <murphyp@linux.vnet.ibm.com> > * sysdeps/unix/sysv/linux/powerpc/elision-lock.c > (__lll_lock_elision): Remove adapt_count decrement... > * sysdeps/unix/sysv/linux/powerpc/elision-trylock.c > (__lll_trylock_elision): Likewise. > * sysdeps/unix/sysv/linux/powerpc/elision-unlock.c > (__lll_unlock_elision): ... to here. And utilize > new adapt_count parameter. > * sysdeps/unix/sysv/linux/powerpc/lowlevellock.h > (__lll_unlock_elision): Update to include adapt_count > parameter. > (lll_unlock_elision): Pass pointer to adapt_count > variable. LGTM. I'm pushing it. Thanks!
diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-lock.c b/sysdeps/unix/sysv/linux/powerpc/elision-lock.c index 26d272e..3762732 100644 --- a/sysdeps/unix/sysv/linux/powerpc/elision-lock.c +++ b/sysdeps/unix/sysv/linux/powerpc/elision-lock.c @@ -47,7 +47,6 @@ __lll_lock_elision (int *lock, short *adapt_count, EXTRAARG int pshared) { if (*adapt_count > 0) { - (*adapt_count)--; goto use_lock; } diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c b/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c index 7b6d1b9..440939c 100644 --- a/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c +++ b/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c @@ -36,7 +36,6 @@ __lll_trylock_elision (int *futex, short *adapt_count) /* Only try a transaction if it's worth it. */ if (*adapt_count > 0) { - (*adapt_count)--; goto use_lock; } diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c b/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c index f04c339..72b893d 100644 --- a/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c +++ b/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c @@ -21,12 +21,20 @@ #include "htm.h" int -__lll_unlock_elision(int *lock, int pshared) +__lll_unlock_elision (int *lock, short *adapt_count, int pshared) { /* When the lock was free we're in a transaction. */ if (*lock == 0) __builtin_tend (0); else - lll_unlock ((*lock), pshared); + { + lll_unlock ((*lock), pshared); + + /* Update the adapt count AFTER completing the critical section. + Doing this here prevents unneeded stalling when entering + a critical section. Saving about 8% runtime on P8. */ + if (*adapt_count > 0) + (*adapt_count)--; + } return 0; } diff --git a/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h b/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h index 16479e7..6769c25 100644 --- a/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h +++ b/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h @@ -32,7 +32,7 @@ extern int __lll_timedlock_elision extern int __lll_lock_elision (int *futex, short *adapt_count, int private) attribute_hidden; -extern int __lll_unlock_elision(int *lock, int private) +extern int __lll_unlock_elision (int *lock, short *adapt_count, int private) attribute_hidden; extern int __lll_trylock_elision(int *lock, short *adapt_count) @@ -41,7 +41,7 @@ extern int __lll_trylock_elision(int *lock, short *adapt_count) #define lll_lock_elision(futex, adapt_count, private) \ __lll_lock_elision (&(futex), &(adapt_count), private) #define lll_unlock_elision(futex, adapt_count, private) \ - __lll_unlock_elision (&(futex), private) + __lll_unlock_elision (&(futex), &(adapt_count), private) #define lll_trylock_elision(futex, adapt_count) \ __lll_trylock_elision (&(futex), &(adapt_count))