diff mbox

[v2,08/13] qemu-thread: optimize spin_lock for uncontended locks

Message ID 1460050358-25025-9-git-send-email-cota@braap.org
State New
Headers show

Commit Message

Emilio Cota April 7, 2016, 5:32 p.m. UTC
This way we can acquire the lock with xchg+test, instead of test+xchg+test.
Most spinlocks should be uncontended so this should result in a ne
performance gain.

Before:
  4ad957:       eb 09                   jmp    4ad962 <qht_insert+0x32>
  4ad959:       0f 1f 80 00 00 00 00    nopl   0x0(%rax)
  4ad960:       f3 90                   pause
  4ad962:       8b 03                   mov    (%rbx),%eax
  4ad964:       85 c0                   test   %eax,%eax
  4ad966:       75 f8                   jne    4ad960 <qht_insert+0x30>
  4ad968:       89 f8                   mov    %edi,%eax
  4ad96a:       87 03                   xchg   %eax,(%rbx)
  4ad96c:       85 c0                   test   %eax,%eax
  4ad96e:       75 f2                   jne    4ad962 <qht_insert+0x32>

After:
  4ad980:       89 f8                   mov    %edi,%eax
  4ad982:       87 03                   xchg   %eax,(%rbx)
  4ad984:       85 c0                   test   %eax,%eax
  4ad986:       74 12                   je     4ad99a <qht_insert+0x4a>
  4ad988:       0f 1f 84 00 00 00 00    nopl   0x0(%rax,%rax,1)
  4ad98f:       00
  4ad990:       8b 03                   mov    (%rbx),%eax
  4ad992:       85 c0                   test   %eax,%eax
  4ad994:       74 ea                   je     4ad980 <qht_insert+0x30>
  4ad996:       f3 90                   pause
  4ad998:       eb f6                   jmp    4ad990 <qht_insert+0x40>

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 include/qemu/thread.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

Comments

Richard Henderson April 8, 2016, 6:40 p.m. UTC | #1
On 04/07/2016 10:32 AM, Emilio G. Cota wrote:
>  static inline void qemu_spin_lock(QemuSpin *spin)
>  {
> -    do {
> +    while (atomic_xchg(&spin->value, true)) {
>          while (atomic_read(&spin->value)) {
>              cpu_relax();
>          }
> -    } while (atomic_xchg(&spin->value, true));
> +    }
>  }

And merge this one as well, please.  It's a good improvement, but there's
little point to keeping these separate.


r~
diff mbox

Patch

diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index 599965e..e2af57c 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -74,11 +74,11 @@  static inline void qemu_spin_init(QemuSpin *spin)
 
 static inline void qemu_spin_lock(QemuSpin *spin)
 {
-    do {
+    while (atomic_xchg(&spin->value, true)) {
         while (atomic_read(&spin->value)) {
             cpu_relax();
         }
-    } while (atomic_xchg(&spin->value, true));
+    }
 }
 
 static inline int qemu_spin_trylock(QemuSpin *spin)