diff mbox

[RFC,v2,05/11] tcg: protect TBContext with tb_lock.

Message ID 1459870344-16773-6-git-send-email-alex.bennee@linaro.org
State New
Headers show

Commit Message

Alex Bennée April 5, 2016, 3:32 p.m. UTC
From: KONRAD Frederic <fred.konrad@greensocs.com>

This protects TBContext with tb_lock to make tb_* thread safe.

We can still have issue with tb_flush in case of multithread TCG:
another CPU can be executing code during a flush.

This can be fixed later by making all other TCG thread exiting before calling
tb_flush().

Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
Message-Id: <1439220437-23957-8-git-send-email-fred.konrad@greensocs.com>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[AJB: moved into tree, clean-up history]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

---
v2 (base-patches, ajb):
  - re-base fixes
v7 (FK, MTTCG):
  - Drop a tb_lock in already locked restore_state_to_opc.
v6 (FK, MTTCG):
  - Drop a tb_lock arround tb_find_fast in cpu-exec.c.
---
 cpu-exec.c         |  8 +++++++-
 exec.c             |  3 +++
 hw/i386/kvmvapic.c |  3 +++
 translate-all.c    | 32 +++++++++++++++++++++++++-------
 4 files changed, 38 insertions(+), 8 deletions(-)

Comments

Sergey Fedorov May 11, 2016, 12:45 p.m. UTC | #1
On 05/04/16 18:32, Alex Bennée wrote:
(snip)
> diff --git a/cpu-exec.c b/cpu-exec.c
> index 74065d9..bd50fef 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -205,18 +205,24 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
>      if (max_cycles > CF_COUNT_MASK)
>          max_cycles = CF_COUNT_MASK;
>  
> +    tb_lock();
>      cpu->tb_invalidated_flag = false;
>      tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
>                       max_cycles | CF_NOCACHE
>                           | (ignore_icount ? CF_IGNORE_ICOUNT : 0));
>      tb->orig_tb = cpu->tb_invalidated_flag ? NULL : orig_tb;
>      cpu->current_tb = tb;
> +    tb_unlock();
> +
>      /* execute the generated code */
>      trace_exec_tb_nocache(tb, tb->pc);
> -    cpu_tb_exec(cpu, tb);
> +    cpu_tb_exec(cpu, tb->tc_ptr);

Very suspicious change. I can't even find which patch changes
cpu_tb_exec() accordingly.

> +
> +    tb_lock();
>      cpu->current_tb = NULL;
>      tb_phys_invalidate(tb, -1);
>      tb_free(tb);
> +    tb_unlock();
>  }
>  #endif
>  
> diff --git a/exec.c b/exec.c
> index 17f390e..c46c123 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -2111,6 +2111,9 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
>                      continue;
>                  }
>                  cpu->watchpoint_hit = wp;
> +
> +                /* Unlocked by cpu_loop_exit or cpu_resume_from_signal.  */

In fact, neither cpu_resume_from_signal() nor cpu_loop_exit() unlocks
the lock by itself, it gets unlocked after sigsetjmp() returns via
siglongjmp() back to cpu_exec(). So maybe it would be more clear to say
something like "'tb_lock' gets unlocked after siglongjmp()"?

> +                tb_lock();
>                  tb_check_watchpoint(cpu);
>                  if (wp->flags & BP_STOP_BEFORE_ACCESS) {
>                      cpu->exception_index = EXCP_DEBUG;
(snip)
> diff --git a/translate-all.c b/translate-all.c
> index a7ff5e7..935d24c 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -834,7 +834,9 @@ static void page_flush_tb(void)
>  }
>  
>  /* flush all the translation blocks */
> -/* XXX: tb_flush is currently not thread safe */
> +/* XXX: tb_flush is currently not thread safe.  System emulation calls it only
> + * with tb_lock taken or from safe_work, so no need to take tb_lock here.
> + */

"System emulation"? What about user-mode emulation?

>  void tb_flush(CPUState *cpu)
>  {
>  #if defined(DEBUG_FLUSH)
> @@ -1350,6 +1352,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
>      /* we remove all the TBs in the range [start, end[ */
>      /* XXX: see if in some cases it could be faster to invalidate all
>         the code */
> +    tb_lock();

Don't we need also protect a call to page_find() above? page_find()
calls page_find_alloc() which is noted to be called with 'tb_lock' held.
However, it might depend on the way we treat 'mmap_lock' in system mode
emulation. We might also consider taking the lock outside of
tb_invalidate_phys*() functions because they can be called after
page_find().

>      tb = p->first_tb;
>      while (tb != NULL) {
>          n = (uintptr_t)tb & 3;
> @@ -1417,12 +1420,13 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
>      if (current_tb_modified) {
>          /* we generate a block containing just the instruction
>             modifying the memory. It will ensure that it cannot modify
> -           itself */
> +           itself.  cpu_resume_from_signal unlocks tb_lock.  */
>          cpu->current_tb = NULL;
>          tb_gen_code(cpu, current_pc, current_cs_base, current_flags, 1);
>          cpu_resume_from_signal(cpu, NULL);
>      }
>  #endif
> +    tb_unlock();
>  }
>  
>  #ifdef CONFIG_SOFTMMU
(snip)
> @ -1627,6 +1636,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
>      target_ulong pc, cs_base;
>      uint64_t flags;
>  
> +    tb_lock();

We don't have to take 'tb_lock' for nether tb_find_pc() nor
cpu_restore_state_from_tb() because the lock does not protect from
tb_flush() anyway. I think the lock should be taken just before the
first call to tb_phys_invalidate() in this function.

>      tb = tb_find_pc(retaddr);
>      if (!tb) {
>          cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
> @@ -1678,11 +1688,15 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
>      /* FIXME: In theory this could raise an exception.  In practice
>         we have already translated the block once so it's probably ok.  */
>      tb_gen_code(cpu, pc, cs_base, flags, cflags);
> -    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
> -       the first in the TB) then we end up generating a whole new TB and
> -       repeating the fault, which is horribly inefficient.
> -       Better would be to execute just this insn uncached, or generate a
> -       second new TB.  */
> +
> +    /* This unlocks the tb_lock.
> +     *
> +     * TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
> +     * the first in the TB) then we end up generating a whole new TB and
> +     * repeating the fault, which is horribly inefficient.
> +     * Better would be to execute just this insn uncached, or generate a
> +     * second new TB.
> +     */
>      cpu_resume_from_signal(cpu, NULL);
>  }
(snip)

Kind regards,
Sergey
Paolo Bonzini May 11, 2016, 12:52 p.m. UTC | #2
Just a couple answers/remarks.

On 11/05/2016 14:45, Sergey Fedorov wrote:
>> diff --git a/exec.c b/exec.c
>> index 17f390e..c46c123 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -2111,6 +2111,9 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
>>                      continue;
>>                  }
>>                  cpu->watchpoint_hit = wp;
>> +
>> +                /* Unlocked by cpu_loop_exit or cpu_resume_from_signal.  */
> 
> In fact, neither cpu_resume_from_signal() nor cpu_loop_exit() unlocks
> the lock by itself, it gets unlocked after sigsetjmp() returns via
> siglongjmp() back to cpu_exec(). So maybe it would be more clear to say
> something like "'tb_lock' gets unlocked after siglongjmp()"?

Yes, or "cpu_exec() unlocks tb_lock after cpu_loop_exit or
cpu_resume_from_signal".  Something like that, anyway.

>>  void tb_flush(CPUState *cpu)
>>  {
>>  #if defined(DEBUG_FLUSH)
>> @@ -1350,6 +1352,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
>>      /* we remove all the TBs in the range [start, end[ */
>>      /* XXX: see if in some cases it could be faster to invalidate all
>>         the code */
>> +    tb_lock();
> 
> Don't we need also protect a call to page_find() above? page_find()
> calls page_find_alloc() which is noted to be called with 'tb_lock' held.

Only if alloc=1; page_find calls it with alloc=0.

> However, it might depend on the way we treat 'mmap_lock' in system mode
> emulation.

It's just not there; generally speaking it's replaced with tb_lock.

>> @ -1627,6 +1636,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
>>      target_ulong pc, cs_base;
>>      uint64_t flags;
>>  
>> +    tb_lock();
> 
> We don't have to take 'tb_lock' for nether tb_find_pc() nor
> cpu_restore_state_from_tb() because the lock does not protect from
> tb_flush() anyway. I think the lock should be taken just before the
> first call to tb_phys_invalidate() in this function.

Indeed, this dates back to when cpu_restore_state_from_tb did recompilation.

In general, I don't have a big problem with slightly bigger critical
sections than necessary, if they aren't in a hot path or they avoid
repeated lock-unlock.

Thanks,

Paolo
Sergey Fedorov May 11, 2016, 1:42 p.m. UTC | #3
On 11/05/16 15:52, Paolo Bonzini wrote:
> Just a couple answers/remarks.
>
> On 11/05/2016 14:45, Sergey Fedorov wrote:
(snip)
>>> @@ -1350,6 +1352,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
>>>      /* we remove all the TBs in the range [start, end[ */
>>>      /* XXX: see if in some cases it could be faster to invalidate all
>>>         the code */
>>> +    tb_lock();
>> Don't we need also protect a call to page_find() above? page_find()
>> calls page_find_alloc() which is noted to be called with 'tb_lock' held.
> Only if alloc=1; page_find calls it with alloc=0.

Year, right :)

>> However, it might depend on the way we treat 'mmap_lock' in system mode
>> emulation.
> It's just not there; generally speaking it's replaced with tb_lock.

So why do we need yet another lock, 'mmap_lock', for user-mode emulation
and don't need it for system mode?

Kind regards,
Sergey
Alex Bennée June 1, 2016, 10:30 a.m. UTC | #4
Sergey Fedorov <serge.fdrv@gmail.com> writes:

> On 05/04/16 18:32, Alex Bennée wrote:
> (snip)
>> diff --git a/cpu-exec.c b/cpu-exec.c
>> index 74065d9..bd50fef 100644
>> --- a/cpu-exec.c
>> +++ b/cpu-exec.c
>> @@ -205,18 +205,24 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
>>      if (max_cycles > CF_COUNT_MASK)
>>          max_cycles = CF_COUNT_MASK;
>>
>> +    tb_lock();
>>      cpu->tb_invalidated_flag = false;
>>      tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
>>                       max_cycles | CF_NOCACHE
>>                           | (ignore_icount ? CF_IGNORE_ICOUNT : 0));
>>      tb->orig_tb = cpu->tb_invalidated_flag ? NULL : orig_tb;
>>      cpu->current_tb = tb;
>> +    tb_unlock();
>> +
>>      /* execute the generated code */
>>      trace_exec_tb_nocache(tb, tb->pc);
>> -    cpu_tb_exec(cpu, tb);
>> +    cpu_tb_exec(cpu, tb->tc_ptr);
>
> Very suspicious change. I can't even find which patch changes
> cpu_tb_exec() accordingly.

I think that came from a patch this series was based on.
It's gone now.

>
>> +
>> +    tb_lock();
>>      cpu->current_tb = NULL;
>>      tb_phys_invalidate(tb, -1);
>>      tb_free(tb);
>> +    tb_unlock();
>>  }
>>  #endif
>>
>> diff --git a/exec.c b/exec.c
>> index 17f390e..c46c123 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -2111,6 +2111,9 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
>>                      continue;
>>                  }
>>                  cpu->watchpoint_hit = wp;
>> +
>> +                /* Unlocked by cpu_loop_exit or cpu_resume_from_signal.  */
>
> In fact, neither cpu_resume_from_signal() nor cpu_loop_exit() unlocks
> the lock by itself, it gets unlocked after sigsetjmp() returns via
> siglongjmp() back to cpu_exec(). So maybe it would be more clear to say
> something like "'tb_lock' gets unlocked after siglongjmp()"?


"Locks are reset when we longjmp back to the main cpu_exec loop"?

Looking at where the patch is though I think I need to bring that bit
forward from the main series.

>
>> +                tb_lock();
>>                  tb_check_watchpoint(cpu);
>>                  if (wp->flags & BP_STOP_BEFORE_ACCESS) {
>>                      cpu->exception_index = EXCP_DEBUG;
> (snip)
>> diff --git a/translate-all.c b/translate-all.c
>> index a7ff5e7..935d24c 100644
>> --- a/translate-all.c
>> +++ b/translate-all.c
>> @@ -834,7 +834,9 @@ static void page_flush_tb(void)
>>  }
>>
>>  /* flush all the translation blocks */
>> -/* XXX: tb_flush is currently not thread safe */
>> +/* XXX: tb_flush is currently not thread safe.  System emulation calls it only
>> + * with tb_lock taken or from safe_work, so no need to take tb_lock here.
>> + */
>
> "System emulation"? What about user-mode emulation?

It's still not thread safe ;-)

It's a harder problem to solve because we can't just suspend all
threads to reset the translation buffer. I'm not sure we want to try and
fix it in this series.

>
>>  void tb_flush(CPUState *cpu)
>>  {
>>  #if defined(DEBUG_FLUSH)
>> @@ -1350,6 +1352,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
>>      /* we remove all the TBs in the range [start, end[ */
>>      /* XXX: see if in some cases it could be faster to invalidate all
>>         the code */
>> +    tb_lock();
>
> Don't we need also protect a call to page_find() above? page_find()
> calls page_find_alloc() which is noted to be called with 'tb_lock' held.
> However, it might depend on the way we treat 'mmap_lock' in system mode
> emulation. We might also consider taking the lock outside of
> tb_invalidate_phys*() functions because they can be called after
> page_find().
>
>>      tb = p->first_tb;
>>      while (tb != NULL) {
>>          n = (uintptr_t)tb & 3;
>> @@ -1417,12 +1420,13 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
>>      if (current_tb_modified) {
>>          /* we generate a block containing just the instruction
>>             modifying the memory. It will ensure that it cannot modify
>> -           itself */
>> +           itself.  cpu_resume_from_signal unlocks tb_lock.  */
>>          cpu->current_tb = NULL;
>>          tb_gen_code(cpu, current_pc, current_cs_base, current_flags, 1);
>>          cpu_resume_from_signal(cpu, NULL);
>>      }
>>  #endif
>> +    tb_unlock();
>>  }
>>
>>  #ifdef CONFIG_SOFTMMU
> (snip)
>> @ -1627,6 +1636,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
>>      target_ulong pc, cs_base;
>>      uint64_t flags;
>>
>> +    tb_lock();
>
> We don't have to take 'tb_lock' for nether tb_find_pc() nor
> cpu_restore_state_from_tb() because the lock does not protect from
> tb_flush() anyway. I think the lock should be taken just before the
> first call to tb_phys_invalidate() in this function.
>
>>      tb = tb_find_pc(retaddr);
>>      if (!tb) {
>>          cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
>> @@ -1678,11 +1688,15 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
>>      /* FIXME: In theory this could raise an exception.  In practice
>>         we have already translated the block once so it's probably ok.  */
>>      tb_gen_code(cpu, pc, cs_base, flags, cflags);
>> -    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
>> -       the first in the TB) then we end up generating a whole new TB and
>> -       repeating the fault, which is horribly inefficient.
>> -       Better would be to execute just this insn uncached, or generate a
>> -       second new TB.  */
>> +
>> +    /* This unlocks the tb_lock.
>> +     *
>> +     * TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
>> +     * the first in the TB) then we end up generating a whole new TB and
>> +     * repeating the fault, which is horribly inefficient.
>> +     * Better would be to execute just this insn uncached, or generate a
>> +     * second new TB.
>> +     */
>>      cpu_resume_from_signal(cpu, NULL);
>>  }
> (snip)
>
> Kind regards,
> Sergey


--
Alex Bennée
Sergey Fedorov June 2, 2016, 2:37 p.m. UTC | #5
On 01/06/16 13:30, Alex Bennée wrote:
> Sergey Fedorov <serge.fdrv@gmail.com> writes:
>
>> On 05/04/16 18:32, Alex Bennée wrote:
>> (snip)
>>> diff --git a/exec.c b/exec.c
>>> index 17f390e..c46c123 100644
>>> --- a/exec.c
>>> +++ b/exec.c
>>> @@ -2111,6 +2111,9 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
>>>                      continue;
>>>                  }
>>>                  cpu->watchpoint_hit = wp;
>>> +
>>> +                /* Unlocked by cpu_loop_exit or cpu_resume_from_signal.  */
>> In fact, neither cpu_resume_from_signal() nor cpu_loop_exit() unlocks
>> the lock by itself, it gets unlocked after sigsetjmp() returns via
>> siglongjmp() back to cpu_exec(). So maybe it would be more clear to say
>> something like "'tb_lock' gets unlocked after siglongjmp()"?
>
> "Locks are reset when we longjmp back to the main cpu_exec loop"?

Yes, it this looks fine.

> Looking at where the patch is though I think I need to bring that bit
> forward from the main series.
>
>>> +                tb_lock();
>>>                  tb_check_watchpoint(cpu);
>>>                  if (wp->flags & BP_STOP_BEFORE_ACCESS) {
>>>                      cpu->exception_index = EXCP_DEBUG;
>> (snip)
>>> diff --git a/translate-all.c b/translate-all.c
>>> index a7ff5e7..935d24c 100644
>>> --- a/translate-all.c
>>> +++ b/translate-all.c
>>> @@ -834,7 +834,9 @@ static void page_flush_tb(void)
>>>  }
>>>
>>>  /* flush all the translation blocks */
>>> -/* XXX: tb_flush is currently not thread safe */
>>> +/* XXX: tb_flush is currently not thread safe.  System emulation calls it only
>>> + * with tb_lock taken or from safe_work, so no need to take tb_lock here.
>>> + */
>> "System emulation"? What about user-mode emulation?
> It's still not thread safe ;-)
>
> It's a harder problem to solve because we can't just suspend all
> threads to reset the translation buffer. I'm not sure we want to try and
> fix it in this series.

I think it could be possible to do something like start_exclusive() to
achieve this in user-only emulation.

>>>  void tb_flush(CPUState *cpu)
>>>  {
>>>  #if defined(DEBUG_FLUSH)

Kind regards,
Sergey
diff mbox

Patch

diff --git a/cpu-exec.c b/cpu-exec.c
index 74065d9..bd50fef 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -205,18 +205,24 @@  static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
     if (max_cycles > CF_COUNT_MASK)
         max_cycles = CF_COUNT_MASK;
 
+    tb_lock();
     cpu->tb_invalidated_flag = false;
     tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
                      max_cycles | CF_NOCACHE
                          | (ignore_icount ? CF_IGNORE_ICOUNT : 0));
     tb->orig_tb = cpu->tb_invalidated_flag ? NULL : orig_tb;
     cpu->current_tb = tb;
+    tb_unlock();
+
     /* execute the generated code */
     trace_exec_tb_nocache(tb, tb->pc);
-    cpu_tb_exec(cpu, tb);
+    cpu_tb_exec(cpu, tb->tc_ptr);
+
+    tb_lock();
     cpu->current_tb = NULL;
     tb_phys_invalidate(tb, -1);
     tb_free(tb);
+    tb_unlock();
 }
 #endif
 
diff --git a/exec.c b/exec.c
index 17f390e..c46c123 100644
--- a/exec.c
+++ b/exec.c
@@ -2111,6 +2111,9 @@  static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
                     continue;
                 }
                 cpu->watchpoint_hit = wp;
+
+                /* Unlocked by cpu_loop_exit or cpu_resume_from_signal.  */
+                tb_lock();
                 tb_check_watchpoint(cpu);
                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
                     cpu->exception_index = EXCP_DEBUG;
diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c
index c69f374..7c0d542 100644
--- a/hw/i386/kvmvapic.c
+++ b/hw/i386/kvmvapic.c
@@ -14,6 +14,7 @@ 
 #include "sysemu/kvm.h"
 #include "hw/i386/apic_internal.h"
 #include "hw/sysbus.h"
+#include "tcg/tcg.h"
 
 #define VAPIC_IO_PORT           0x7e
 
@@ -446,6 +447,8 @@  static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip)
     resume_all_vcpus();
 
     if (!kvm_enabled()) {
+        /* Unlocked by cpu_resume_from_signal.  */
+        tb_lock();
         cs->current_tb = NULL;
         tb_gen_code(cs, current_pc, current_cs_base, current_flags, 1);
         cpu_resume_from_signal(cs, NULL);
diff --git a/translate-all.c b/translate-all.c
index a7ff5e7..935d24c 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -834,7 +834,9 @@  static void page_flush_tb(void)
 }
 
 /* flush all the translation blocks */
-/* XXX: tb_flush is currently not thread safe */
+/* XXX: tb_flush is currently not thread safe.  System emulation calls it only
+ * with tb_lock taken or from safe_work, so no need to take tb_lock here.
+ */
 void tb_flush(CPUState *cpu)
 {
 #if defined(DEBUG_FLUSH)
@@ -1350,6 +1352,7 @@  void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
     /* we remove all the TBs in the range [start, end[ */
     /* XXX: see if in some cases it could be faster to invalidate all
        the code */
+    tb_lock();
     tb = p->first_tb;
     while (tb != NULL) {
         n = (uintptr_t)tb & 3;
@@ -1417,12 +1420,13 @@  void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
     if (current_tb_modified) {
         /* we generate a block containing just the instruction
            modifying the memory. It will ensure that it cannot modify
-           itself */
+           itself.  cpu_resume_from_signal unlocks tb_lock.  */
         cpu->current_tb = NULL;
         tb_gen_code(cpu, current_pc, current_cs_base, current_flags, 1);
         cpu_resume_from_signal(cpu, NULL);
     }
 #endif
+    tb_unlock();
 }
 
 #ifdef CONFIG_SOFTMMU
@@ -1489,6 +1493,8 @@  static void tb_invalidate_phys_page(tb_page_addr_t addr,
     if (!p) {
         return;
     }
+
+    tb_lock();
     tb = p->first_tb;
 #ifdef TARGET_HAS_PRECISE_SMC
     if (tb && pc != 0) {
@@ -1530,9 +1536,12 @@  static void tb_invalidate_phys_page(tb_page_addr_t addr,
         if (locked) {
             mmap_unlock();
         }
+
+        /* tb_lock released by cpu_resume_from_signal.  */
         cpu_resume_from_signal(cpu, puc);
     }
 #endif
+    tb_unlock();
 }
 #endif
 
@@ -1627,6 +1636,7 @@  void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
     target_ulong pc, cs_base;
     uint64_t flags;
 
+    tb_lock();
     tb = tb_find_pc(retaddr);
     if (!tb) {
         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
@@ -1678,11 +1688,15 @@  void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
     /* FIXME: In theory this could raise an exception.  In practice
        we have already translated the block once so it's probably ok.  */
     tb_gen_code(cpu, pc, cs_base, flags, cflags);
-    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
-       the first in the TB) then we end up generating a whole new TB and
-       repeating the fault, which is horribly inefficient.
-       Better would be to execute just this insn uncached, or generate a
-       second new TB.  */
+
+    /* This unlocks the tb_lock.
+     *
+     * TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
+     * the first in the TB) then we end up generating a whole new TB and
+     * repeating the fault, which is horribly inefficient.
+     * Better would be to execute just this insn uncached, or generate a
+     * second new TB.
+     */
     cpu_resume_from_signal(cpu, NULL);
 }
 
@@ -1707,6 +1721,8 @@  void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
     int direct_jmp_count, direct_jmp2_count, cross_page;
     TranslationBlock *tb;
 
+    tb_lock();
+
     target_code_size = 0;
     max_target_code_size = 0;
     cross_page = 0;
@@ -1762,6 +1778,8 @@  void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
             tcg_ctx.tb_ctx.tb_phys_invalidate_count);
     cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
     tcg_dump_info(f, cpu_fprintf);
+
+    tb_unlock();
 }
 
 void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf)