diff mbox

[03/10] replace spinlock by QemuMutex.

Message ID 1439397664-70734-4-git-send-email-pbonzini@redhat.com
State New
Headers show

Commit Message

Paolo Bonzini Aug. 12, 2015, 4:40 p.m. UTC
From: KONRAD Frederic <fred.konrad@greensocs.com>

spinlock is only used in two cases:
  * cpu-exec.c: to protect TranslationBlock
  * mem_helper.c: for lock helper in target-i386 (which seems broken).

It's a pthread_mutex_t in user-mode so better using QemuMutex directly in this
case.
It allows as well to reuse tb_lock mutex of TBContext in case of multithread
TCG.

Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
Message-Id: <1439220437-23957-5-git-send-email-fred.konrad@greensocs.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 cpu-exec.c               | 15 +++------------
 include/exec/exec-all.h  |  4 ++--
 linux-user/main.c        |  6 +++---
 target-i386/cpu.h        |  3 +++
 target-i386/mem_helper.c | 25 ++++++++++++++++++++++---
 target-i386/translate.c  |  2 ++
 tcg/tcg.h                |  4 ++++
 translate-all.c          | 34 ++++++++++++++++++++++++++++++++++
 8 files changed, 73 insertions(+), 20 deletions(-)

Comments

fred.konrad@greensocs.com Aug. 13, 2015, 12:17 p.m. UTC | #1
On 12/08/2015 18:40, Paolo Bonzini wrote:
> From: KONRAD Frederic <fred.konrad@greensocs.com>
>
> spinlock is only used in two cases:
>    * cpu-exec.c: to protect TranslationBlock
>    * mem_helper.c: for lock helper in target-i386 (which seems broken).
>
> It's a pthread_mutex_t in user-mode so better using QemuMutex directly in this
> case.
> It allows as well to reuse tb_lock mutex of TBContext in case of multithread
> TCG.
>
> Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
> Message-Id: <1439220437-23957-5-git-send-email-fred.konrad@greensocs.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>   cpu-exec.c               | 15 +++------------
>   include/exec/exec-all.h  |  4 ++--
>   linux-user/main.c        |  6 +++---
>   target-i386/cpu.h        |  3 +++
>   target-i386/mem_helper.c | 25 ++++++++++++++++++++++---
>   target-i386/translate.c  |  2 ++
>   tcg/tcg.h                |  4 ++++
>   translate-all.c          | 34 ++++++++++++++++++++++++++++++++++
>   8 files changed, 73 insertions(+), 20 deletions(-)
>
> diff --git a/cpu-exec.c b/cpu-exec.c
> index 713540f..c680cca 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -356,9 +356,6 @@ int cpu_exec(CPUState *cpu)
>       uintptr_t next_tb;
>       SyncClocks sc;
>   
> -    /* This must be volatile so it is not trashed by longjmp() */
> -    volatile bool have_tb_lock = false;
> -
>       if (cpu->halted) {
>           if (!cpu_has_work(cpu)) {
>               return EXCP_HALTED;
> @@ -475,8 +472,7 @@ int cpu_exec(CPUState *cpu)
>                       cpu->exception_index = EXCP_INTERRUPT;
>                       cpu_loop_exit(cpu);
>                   }
> -                spin_lock(&tcg_ctx.tb_ctx.tb_lock);
> -                have_tb_lock = true;
> +                tb_lock();
>                   tb = tb_find_fast(cpu);
>                   /* Note: we do it here to avoid a gcc bug on Mac OS X when
>                      doing it in tb_find_slow */
> @@ -498,9 +494,7 @@ int cpu_exec(CPUState *cpu)
>                       tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK),
>                                   next_tb & TB_EXIT_MASK, tb);
>                   }
> -                have_tb_lock = false;
> -                spin_unlock(&tcg_ctx.tb_ctx.tb_lock);
> -
> +                tb_unlock();
>                   /* cpu_interrupt might be called while translating the
>                      TB, but before it is linked into a potentially
>                      infinite loop and becomes env->current_tb. Avoid
> @@ -567,10 +561,7 @@ int cpu_exec(CPUState *cpu)
>               x86_cpu = X86_CPU(cpu);
>               env = &x86_cpu->env;
>   #endif
> -            if (have_tb_lock) {
> -                spin_unlock(&tcg_ctx.tb_ctx.tb_lock);
> -                have_tb_lock = false;
> -            }
> +            tb_lock_reset();
>           }
>       } /* for(;;) */
>   
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index 29775c0..dafd177 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -178,7 +178,7 @@ struct TranslationBlock {
>       struct TranslationBlock *jmp_first;
>   };
>   
> -#include "exec/spinlock.h"
> +#include "qemu/thread.h"
>   
>   typedef struct TBContext TBContext;
>   
> @@ -188,7 +188,7 @@ struct TBContext {
>       TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
>       int nb_tbs;
>       /* any access to the tbs or the page table must use this lock */
> -    spinlock_t tb_lock;
> +    QemuMutex tb_lock;
>   
>       /* statistics */
>       int tb_flush_count;
> diff --git a/linux-user/main.c b/linux-user/main.c
> index fdee981..fd06ce9 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -107,7 +107,7 @@ static int pending_cpus;
>   /* Make sure everything is in a consistent state for calling fork().  */
>   void fork_start(void)
>   {
> -    pthread_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
> +    qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
>       pthread_mutex_lock(&exclusive_lock);
>       mmap_fork_start();
>   }
> @@ -129,11 +129,11 @@ void fork_end(int child)
>           pthread_mutex_init(&cpu_list_mutex, NULL);
>           pthread_cond_init(&exclusive_cond, NULL);
>           pthread_cond_init(&exclusive_resume, NULL);
> -        pthread_mutex_init(&tcg_ctx.tb_ctx.tb_lock, NULL);
> +        qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
>           gdbserver_fork(thread_cpu);
>       } else {
>           pthread_mutex_unlock(&exclusive_lock);
> -        pthread_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
> +        qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
We might want to use tb_lock/unlock in user code as well instead of 
calling directly
qemu_mutex_* ?

>       }
>   }
>   
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index ead2832..3655ff3 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -1318,6 +1318,9 @@ static inline MemTxAttrs cpu_get_mem_attrs(CPUX86State *env)
>   void cpu_set_mxcsr(CPUX86State *env, uint32_t val);
>   void cpu_set_fpuc(CPUX86State *env, uint16_t val);
>   
> +/* mem_helper.c */
> +void helper_lock_init(void);
> +
>   /* svm_helper.c */
>   void cpu_svm_check_intercept_param(CPUX86State *env1, uint32_t type,
>                                      uint64_t param);
> diff --git a/target-i386/mem_helper.c b/target-i386/mem_helper.c
> index 1aec8a5..8bf0da2 100644
> --- a/target-i386/mem_helper.c
> +++ b/target-i386/mem_helper.c
> @@ -23,18 +23,37 @@
>   
>   /* broken thread support */
>   
> -static spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;
> +#if defined(CONFIG_USER_ONLY)
> +QemuMutex global_cpu_lock;
>   
>   void helper_lock(void)
>   {
> -    spin_lock(&global_cpu_lock);
> +    qemu_mutex_lock(&global_cpu_lock);
>   }
>   
>   void helper_unlock(void)
>   {
> -    spin_unlock(&global_cpu_lock);
> +    qemu_mutex_unlock(&global_cpu_lock);
>   }
>   
> +void helper_lock_init(void)
> +{
> +    qemu_mutex_init(&global_cpu_lock);
> +}
> +#else
> +void helper_lock(void)
> +{
> +}
> +
> +void helper_unlock(void)
> +{
> +}
> +
> +void helper_lock_init(void)
> +{
> +}
> +#endif
> +
>   void helper_cmpxchg8b(CPUX86State *env, target_ulong a0)
>   {
>       uint64_t d;
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 82e2245..443bf60 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -7899,6 +7899,8 @@ void optimize_flags_init(void)
>                                            offsetof(CPUX86State, regs[i]),
>                                            reg_names[i]);
>       }
> +
> +    helper_lock_init();
>   }
>   
>   /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index 231a781..0ae648f 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -594,6 +594,10 @@ void *tcg_malloc_internal(TCGContext *s, int size);
>   void tcg_pool_reset(TCGContext *s);
>   void tcg_pool_delete(TCGContext *s);
>   
> +void tb_lock(void);
> +void tb_unlock(void);
> +void tb_lock_reset(void);
> +
>   static inline void *tcg_malloc(int size)
>   {
>       TCGContext *s = &tcg_ctx;
> diff --git a/translate-all.c b/translate-all.c
> index 9c46ffa..a6bff72 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -129,6 +129,39 @@ static void *l1_map[V_L1_SIZE];
>   /* code generation context */
>   TCGContext tcg_ctx;
>   
> +/* translation block context */
> +#ifdef CONFIG_USER_ONLY
> +__thread volatile int have_tb_lock;
> +#endif
> +
> +void tb_lock(void)
> +{
> +#ifdef CONFIG_USER_ONLY
> +    assert(!have_tb_lock);
> +    qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
> +    have_tb_lock++;
> +#endif
> +}
> +
> +void tb_unlock(void)
> +{
> +#ifdef CONFIG_USER_ONLY
> +    assert(have_tb_lock);
> +    have_tb_lock--;
> +    qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
> +#endif
> +}
> +
> +void tb_lock_reset(void)
> +{
> +#ifdef CONFIG_USER_ONLY
> +    if (have_tb_lock) {
> +        qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
> +        have_tb_lock = 0;
> +    }
> +#endif
> +}
> +
>   static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
>                            tb_page_addr_t phys_page2);
>   static TranslationBlock *tb_find_pc(uintptr_t tc_ptr);
> @@ -676,6 +709,7 @@ static inline void code_gen_alloc(size_t tb_size)
>               CODE_GEN_AVG_BLOCK_SIZE;
>       tcg_ctx.tb_ctx.tbs =
>               g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock));
> +    qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
Maybe we can initialize the mutex only for CONFIG_USER_ONLY?

Fred
>   }
>   
>   /* Must be called before using the QEMU cpus. 'tb_size' is the size
Paolo Bonzini Aug. 13, 2015, 1:12 p.m. UTC | #2
On 13/08/2015 14:17, Frederic Konrad wrote:
>> diff --git a/linux-user/main.c b/linux-user/main.c
>> index fdee981..fd06ce9 100644
>> --- a/linux-user/main.c
>> +++ b/linux-user/main.c
>> @@ -107,7 +107,7 @@ static int pending_cpus;
>>   /* Make sure everything is in a consistent state for calling
>> fork().  */
>>   void fork_start(void)
>>   {
>> -    pthread_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
>> +    qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
>>       pthread_mutex_lock(&exclusive_lock);
>>       mmap_fork_start();
>>   }
>> @@ -129,11 +129,11 @@ void fork_end(int child)
>>           pthread_mutex_init(&cpu_list_mutex, NULL);
>>           pthread_cond_init(&exclusive_cond, NULL);
>>           pthread_cond_init(&exclusive_resume, NULL);
>> -        pthread_mutex_init(&tcg_ctx.tb_ctx.tb_lock, NULL);
>> +        qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
>>           gdbserver_fork(thread_cpu);
>>       } else {
>>           pthread_mutex_unlock(&exclusive_lock);
>> -        pthread_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
>> +        qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
> We might want to use tb_lock/unlock in user code as well instead of
> calling directly qemu_mutex_* ?

You cannot do that because of the recursive locking assertions; the
child is not using qemu_mutex_unlock, it's using qemu_mutex_init.  So I
would have to add some kind of tb_lock_reset_after_fork() function which
is a bit ugly.

>> @@ -676,6 +709,7 @@ static inline void code_gen_alloc(size_t tb_size)
>>               CODE_GEN_AVG_BLOCK_SIZE;
>>       tcg_ctx.tb_ctx.tbs =
>>               g_malloc(tcg_ctx.code_gen_max_blocks *
>> sizeof(TranslationBlock));
>> +    qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
> Maybe we can initialize the mutex only for CONFIG_USER_ONLY?

It's okay, it doesn't consume system resources.

Paolo
fred.konrad@greensocs.com Aug. 13, 2015, 1:21 p.m. UTC | #3
On 13/08/2015 15:12, Paolo Bonzini wrote:
>
> On 13/08/2015 14:17, Frederic Konrad wrote:
>>> diff --git a/linux-user/main.c b/linux-user/main.c
>>> index fdee981..fd06ce9 100644
>>> --- a/linux-user/main.c
>>> +++ b/linux-user/main.c
>>> @@ -107,7 +107,7 @@ static int pending_cpus;
>>>    /* Make sure everything is in a consistent state for calling
>>> fork().  */
>>>    void fork_start(void)
>>>    {
>>> -    pthread_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
>>> +    qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
>>>        pthread_mutex_lock(&exclusive_lock);
>>>        mmap_fork_start();
>>>    }
>>> @@ -129,11 +129,11 @@ void fork_end(int child)
>>>            pthread_mutex_init(&cpu_list_mutex, NULL);
>>>            pthread_cond_init(&exclusive_cond, NULL);
>>>            pthread_cond_init(&exclusive_resume, NULL);
>>> -        pthread_mutex_init(&tcg_ctx.tb_ctx.tb_lock, NULL);
>>> +        qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
>>>            gdbserver_fork(thread_cpu);
>>>        } else {
>>>            pthread_mutex_unlock(&exclusive_lock);
>>> -        pthread_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
>>> +        qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
>> We might want to use tb_lock/unlock in user code as well instead of
>> calling directly qemu_mutex_* ?
> You cannot do that because of the recursive locking assertions; the
> child is not using qemu_mutex_unlock, it's using qemu_mutex_init.  So I
> would have to add some kind of tb_lock_reset_after_fork() function which
> is a bit ugly.

True.

Fred
>>> @@ -676,6 +709,7 @@ static inline void code_gen_alloc(size_t tb_size)
>>>                CODE_GEN_AVG_BLOCK_SIZE;
>>>        tcg_ctx.tb_ctx.tbs =
>>>                g_malloc(tcg_ctx.code_gen_max_blocks *
>>> sizeof(TranslationBlock));
>>> +    qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
>> Maybe we can initialize the mutex only for CONFIG_USER_ONLY?
> It's okay, it doesn't consume system resources.
>
> Paolo
Peter Maydell Aug. 28, 2015, 2:49 p.m. UTC | #4
On 12 August 2015 at 17:40, Paolo Bonzini <pbonzini@redhat.com> wrote:
> From: KONRAD Frederic <fred.konrad@greensocs.com>
>
> spinlock is only used in two cases:
>   * cpu-exec.c: to protect TranslationBlock
>   * mem_helper.c: for lock helper in target-i386 (which seems broken).
>
> It's a pthread_mutex_t in user-mode so better using QemuMutex directly in this
> case.
> It allows as well to reuse tb_lock mutex of TBContext in case of multithread
> TCG.

The line wrapping in this commit message (and the grammar) looks a bit off...

> Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
> Message-Id: <1439220437-23957-5-git-send-email-fred.konrad@greensocs.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  cpu-exec.c               | 15 +++------------
>  include/exec/exec-all.h  |  4 ++--
>  linux-user/main.c        |  6 +++---
>  target-i386/cpu.h        |  3 +++
>  target-i386/mem_helper.c | 25 ++++++++++++++++++++++---
>  target-i386/translate.c  |  2 ++
>  tcg/tcg.h                |  4 ++++
>  translate-all.c          | 34 ++++++++++++++++++++++++++++++++++
>  8 files changed, 73 insertions(+), 20 deletions(-)

After this commit it looks like we have no users of spinlock_t
at all. It would be good to have a followup patch which deleted
include/exec/spinlock.h.

Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM
fred.konrad@greensocs.com Aug. 28, 2015, 2:53 p.m. UTC | #5
On 28/08/2015 16:49, Peter Maydell wrote:
> On 12 August 2015 at 17:40, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> From: KONRAD Frederic <fred.konrad@greensocs.com>
>>
>> spinlock is only used in two cases:
>>    * cpu-exec.c: to protect TranslationBlock
>>    * mem_helper.c: for lock helper in target-i386 (which seems broken).
>>
>> It's a pthread_mutex_t in user-mode so better using QemuMutex directly in this
>> case.
>> It allows as well to reuse tb_lock mutex of TBContext in case of multithread
>> TCG.
> The line wrapping in this commit message (and the grammar) looks a bit off...
>
>> Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
>> Message-Id: <1439220437-23957-5-git-send-email-fred.konrad@greensocs.com>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>   cpu-exec.c               | 15 +++------------
>>   include/exec/exec-all.h  |  4 ++--
>>   linux-user/main.c        |  6 +++---
>>   target-i386/cpu.h        |  3 +++
>>   target-i386/mem_helper.c | 25 ++++++++++++++++++++++---
>>   target-i386/translate.c  |  2 ++
>>   tcg/tcg.h                |  4 ++++
>>   translate-all.c          | 34 ++++++++++++++++++++++++++++++++++
>>   8 files changed, 73 insertions(+), 20 deletions(-)
> After this commit it looks like we have no users of spinlock_t
> at all. It would be good to have a followup patch which deleted
> include/exec/spinlock.h.

Seems Paolo forget to pick up the following patch from the mttcg tree:
"remove unused spinlock."
https://lists.gnu.org/archive/html/qemu-devel/2015-08/msg01129.html

Thanks,
Fred
> Otherwise
> Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
>
> thanks
> -- PMM
Paolo Bonzini Aug. 29, 2015, 6:51 a.m. UTC | #6
On 28/08/2015 16:53, Frederic Konrad wrote:
>> After this commit it looks like we have no users of spinlock_t
>> at all. It would be good to have a followup patch which deleted
>> include/exec/spinlock.h.
> 
> Seems Paolo forget to pick up the following patch from the mttcg tree:
> "remove unused spinlock."
> https://lists.gnu.org/archive/html/qemu-devel/2015-08/msg01129.html

Yes, that patch does the trick. :)

Paolo
diff mbox

Patch

diff --git a/cpu-exec.c b/cpu-exec.c
index 713540f..c680cca 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -356,9 +356,6 @@  int cpu_exec(CPUState *cpu)
     uintptr_t next_tb;
     SyncClocks sc;
 
-    /* This must be volatile so it is not trashed by longjmp() */
-    volatile bool have_tb_lock = false;
-
     if (cpu->halted) {
         if (!cpu_has_work(cpu)) {
             return EXCP_HALTED;
@@ -475,8 +472,7 @@  int cpu_exec(CPUState *cpu)
                     cpu->exception_index = EXCP_INTERRUPT;
                     cpu_loop_exit(cpu);
                 }
-                spin_lock(&tcg_ctx.tb_ctx.tb_lock);
-                have_tb_lock = true;
+                tb_lock();
                 tb = tb_find_fast(cpu);
                 /* Note: we do it here to avoid a gcc bug on Mac OS X when
                    doing it in tb_find_slow */
@@ -498,9 +494,7 @@  int cpu_exec(CPUState *cpu)
                     tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK),
                                 next_tb & TB_EXIT_MASK, tb);
                 }
-                have_tb_lock = false;
-                spin_unlock(&tcg_ctx.tb_ctx.tb_lock);
-
+                tb_unlock();
                 /* cpu_interrupt might be called while translating the
                    TB, but before it is linked into a potentially
                    infinite loop and becomes env->current_tb. Avoid
@@ -567,10 +561,7 @@  int cpu_exec(CPUState *cpu)
             x86_cpu = X86_CPU(cpu);
             env = &x86_cpu->env;
 #endif
-            if (have_tb_lock) {
-                spin_unlock(&tcg_ctx.tb_ctx.tb_lock);
-                have_tb_lock = false;
-            }
+            tb_lock_reset();
         }
     } /* for(;;) */
 
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 29775c0..dafd177 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -178,7 +178,7 @@  struct TranslationBlock {
     struct TranslationBlock *jmp_first;
 };
 
-#include "exec/spinlock.h"
+#include "qemu/thread.h"
 
 typedef struct TBContext TBContext;
 
@@ -188,7 +188,7 @@  struct TBContext {
     TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
     int nb_tbs;
     /* any access to the tbs or the page table must use this lock */
-    spinlock_t tb_lock;
+    QemuMutex tb_lock;
 
     /* statistics */
     int tb_flush_count;
diff --git a/linux-user/main.c b/linux-user/main.c
index fdee981..fd06ce9 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -107,7 +107,7 @@  static int pending_cpus;
 /* Make sure everything is in a consistent state for calling fork().  */
 void fork_start(void)
 {
-    pthread_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
+    qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
     pthread_mutex_lock(&exclusive_lock);
     mmap_fork_start();
 }
@@ -129,11 +129,11 @@  void fork_end(int child)
         pthread_mutex_init(&cpu_list_mutex, NULL);
         pthread_cond_init(&exclusive_cond, NULL);
         pthread_cond_init(&exclusive_resume, NULL);
-        pthread_mutex_init(&tcg_ctx.tb_ctx.tb_lock, NULL);
+        qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
         gdbserver_fork(thread_cpu);
     } else {
         pthread_mutex_unlock(&exclusive_lock);
-        pthread_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
+        qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
     }
 }
 
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index ead2832..3655ff3 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -1318,6 +1318,9 @@  static inline MemTxAttrs cpu_get_mem_attrs(CPUX86State *env)
 void cpu_set_mxcsr(CPUX86State *env, uint32_t val);
 void cpu_set_fpuc(CPUX86State *env, uint16_t val);
 
+/* mem_helper.c */
+void helper_lock_init(void);
+
 /* svm_helper.c */
 void cpu_svm_check_intercept_param(CPUX86State *env1, uint32_t type,
                                    uint64_t param);
diff --git a/target-i386/mem_helper.c b/target-i386/mem_helper.c
index 1aec8a5..8bf0da2 100644
--- a/target-i386/mem_helper.c
+++ b/target-i386/mem_helper.c
@@ -23,18 +23,37 @@ 
 
 /* broken thread support */
 
-static spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;
+#if defined(CONFIG_USER_ONLY)
+QemuMutex global_cpu_lock;
 
 void helper_lock(void)
 {
-    spin_lock(&global_cpu_lock);
+    qemu_mutex_lock(&global_cpu_lock);
 }
 
 void helper_unlock(void)
 {
-    spin_unlock(&global_cpu_lock);
+    qemu_mutex_unlock(&global_cpu_lock);
 }
 
+void helper_lock_init(void)
+{
+    qemu_mutex_init(&global_cpu_lock);
+}
+#else
+void helper_lock(void)
+{
+}
+
+void helper_unlock(void)
+{
+}
+
+void helper_lock_init(void)
+{
+}
+#endif
+
 void helper_cmpxchg8b(CPUX86State *env, target_ulong a0)
 {
     uint64_t d;
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 82e2245..443bf60 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -7899,6 +7899,8 @@  void optimize_flags_init(void)
                                          offsetof(CPUX86State, regs[i]),
                                          reg_names[i]);
     }
+
+    helper_lock_init();
 }
 
 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 231a781..0ae648f 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -594,6 +594,10 @@  void *tcg_malloc_internal(TCGContext *s, int size);
 void tcg_pool_reset(TCGContext *s);
 void tcg_pool_delete(TCGContext *s);
 
+void tb_lock(void);
+void tb_unlock(void);
+void tb_lock_reset(void);
+
 static inline void *tcg_malloc(int size)
 {
     TCGContext *s = &tcg_ctx;
diff --git a/translate-all.c b/translate-all.c
index 9c46ffa..a6bff72 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -129,6 +129,39 @@  static void *l1_map[V_L1_SIZE];
 /* code generation context */
 TCGContext tcg_ctx;
 
+/* translation block context */
+#ifdef CONFIG_USER_ONLY
+__thread volatile int have_tb_lock;
+#endif
+
+void tb_lock(void)
+{
+#ifdef CONFIG_USER_ONLY
+    assert(!have_tb_lock);
+    qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
+    have_tb_lock++;
+#endif
+}
+
+void tb_unlock(void)
+{
+#ifdef CONFIG_USER_ONLY
+    assert(have_tb_lock);
+    have_tb_lock--;
+    qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
+#endif
+}
+
+void tb_lock_reset(void)
+{
+#ifdef CONFIG_USER_ONLY
+    if (have_tb_lock) {
+        qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
+        have_tb_lock = 0;
+    }
+#endif
+}
+
 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
                          tb_page_addr_t phys_page2);
 static TranslationBlock *tb_find_pc(uintptr_t tc_ptr);
@@ -676,6 +709,7 @@  static inline void code_gen_alloc(size_t tb_size)
             CODE_GEN_AVG_BLOCK_SIZE;
     tcg_ctx.tb_ctx.tbs =
             g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock));
+    qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
 }
 
 /* Must be called before using the QEMU cpus. 'tb_size' is the size