diff mbox

[11/10] tcg: comment on which functions have to be called with tb_lock held

Message ID 1439397664-70734-12-git-send-email-pbonzini@redhat.com
State New
Headers show

Commit Message

Paolo Bonzini Aug. 12, 2015, 4:41 p.m. UTC
softmmu requires more functions to be thread-safe, because translation
blocks can be invalidated from e.g. notdirty callbacks.  Probably the
same holds for user-mode emulation, it's just that no one has ever
tried to produce a coherent locking there.

This patch will guide the introduction of more tb_lock and tb_unlock
calls for system emulation.

Note that after this patch some (most) of the mentioned functions are
still called outside tb_lock/tb_unlock.  The next one will rectify this.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 exec.c                  |  1 +
 include/exec/exec-all.h |  2 ++
 include/qom/cpu.h       |  3 +++
 tcg/tcg.h               |  2 ++
 translate-all.c         | 35 ++++++++++++++++++++++++++++-------
 5 files changed, 36 insertions(+), 7 deletions(-)

Comments

fred.konrad@greensocs.com Aug. 13, 2015, 12:51 p.m. UTC | #1
On 12/08/2015 18:41, Paolo Bonzini wrote:
> softmmu requires more functions to be thread-safe, because translation
> blocks can be invalidated from e.g. notdirty callbacks.  Probably the
> same holds for user-mode emulation, it's just that no one has ever
> tried to produce a coherent locking there.
>
> This patch will guide the introduction of more tb_lock and tb_unlock
> calls for system emulation.
>
> Note that after this patch some (most) of the mentioned functions are
> still called outside tb_lock/tb_unlock.  The next one will rectify this.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>   exec.c                  |  1 +
>   include/exec/exec-all.h |  2 ++
>   include/qom/cpu.h       |  3 +++
>   tcg/tcg.h               |  2 ++
>   translate-all.c         | 35 ++++++++++++++++++++++++++++-------
>   5 files changed, 36 insertions(+), 7 deletions(-)
>
> diff --git a/exec.c b/exec.c
> index 54cd70a..856a859 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -748,6 +748,7 @@ int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
>   {
>       CPUBreakpoint *bp;
>   
> +    /* TODO: locking (RCU?) */
>       bp = g_malloc(sizeof(*bp));
>   
>       bp->pc = pc;
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index b3f900a..943d97a 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -74,6 +74,7 @@ typedef struct TranslationBlock TranslationBlock;
>   
>   void gen_intermediate_code(CPUArchState *env, struct TranslationBlock *tb);
>   void gen_intermediate_code_pc(CPUArchState *env, struct TranslationBlock *tb);
> +/* Called with tb_lock held.  */
>   void restore_state_to_opc(CPUArchState *env, struct TranslationBlock *tb,
>                             int pc_pos);
>   
> @@ -278,6 +279,7 @@ static inline void tb_set_jmp_target(TranslationBlock *tb,
>   
>   #endif
>   
> +/* Called with tb_lock held.  */
>   static inline void tb_add_jump(TranslationBlock *tb, int n,
>                                  TranslationBlock *tb_next)
>   {
> diff --git a/include/qom/cpu.h b/include/qom/cpu.h
> index 77bbff2..56b1f4d 100644
> --- a/include/qom/cpu.h
> +++ b/include/qom/cpu.h
> @@ -285,7 +285,10 @@ struct CPUState {
>   
>       void *env_ptr; /* CPUArchState */
>       struct TranslationBlock *current_tb;
> +
> +    /* Protected by tb_lock.  */
>       struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
This is temporary as a first step?

> +
>       struct GDBRegisterState *gdb_regs;
>       int gdb_num_regs;
>       int gdb_num_g_regs;
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index 0ae648f..a2cad31 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -590,6 +590,7 @@ static inline bool tcg_op_buf_full(void)
>   
>   /* pool based memory allocation */
>   
> +/* tb_lock must be held for tcg_malloc_internal. */
>   void *tcg_malloc_internal(TCGContext *s, int size);
>   void tcg_pool_reset(TCGContext *s);
>   void tcg_pool_delete(TCGContext *s);
> @@ -598,6 +599,7 @@ void tb_lock(void);
>   void tb_unlock(void);
>   void tb_lock_reset(void);
>   
> +/* Called with tb_lock held.  */
>   static inline void *tcg_malloc(int size)
>   {
>       TCGContext *s = &tcg_ctx;
> diff --git a/translate-all.c b/translate-all.c
> index edb9cb1..17d3cd1 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -237,6 +237,7 @@ int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr
>   }
>   
>   /* The cpu state corresponding to 'searched_pc' is restored.
> + * Called with tb_lock held.
>    */
>   static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
>                                        uintptr_t searched_pc)
> @@ -424,6 +425,7 @@ static void page_init(void)
>   }
>   
>   /* If alloc=1:
> + * Called with tb_lock held for system emulation.
>    * Called with mmap_lock held for user-mode emulation.
>    */
>   static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
> @@ -734,8 +736,12 @@ bool tcg_enabled(void)
>       return tcg_ctx.code_gen_buffer != NULL;
>   }
>   
> -/* Allocate a new translation block. Flush the translation buffer if
> -   too many translation blocks or too much generated code. */
> +/*
> + * Allocate a new translation block. Flush the translation buffer if
> + * too many translation blocks or too much generated code.
> + *
> + * Called with tb_lock held.
> + */
>   static TranslationBlock *tb_alloc(target_ulong pc)
>   {
There is the famous tb_flush which needs to be called with tb_lock held 
as well.
There are several place where it's called.

>       TranslationBlock *tb;
> @@ -751,6 +757,7 @@ static TranslationBlock *tb_alloc(target_ulong pc)
>       return tb;
>   }
>   
> +/* Called with tb_lock held.  */
>   void tb_free(TranslationBlock *tb)
>   {
>       /* In practice this is mostly used for single use temporary TB
> @@ -859,7 +866,10 @@ static void tb_invalidate_check(target_ulong address)
>       }
>   }
>   
> -/* verify that all the pages have correct rights for code */
> +/* verify that all the pages have correct rights for code
> + *
> + * Called with tb_lock held.
> + */
>   static void tb_page_check(void)
>   {
>       TranslationBlock *tb;
> @@ -947,7 +957,10 @@ static inline void tb_reset_jump(TranslationBlock *tb, int n)
>       tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
>   }
>   
> -/* invalidate one TB */
> +/* invalidate one TB
> + *
> + * Called with tb_lock held.
> + */
>   void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
>   {
>       CPUState *cpu;
> @@ -1036,7 +1049,7 @@ static void build_page_bitmap(PageDesc *p)
>   }
>   #endif
>   
> -/* Called with mmap_lock held for user mode emulation.  */
> +/* Called with tb_lock held, and mmap_lock too for user mode emulation.  */
>   TranslationBlock *tb_gen_code(CPUState *cpu,
>                                 target_ulong pc, target_ulong cs_base,
>                                 int flags, int cflags)
> @@ -1234,7 +1247,9 @@ void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
>       }
>       if (!p->code_bitmap &&
>           ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
> -        /* build code bitmap */
> +        /* build code bitmap.  FIXME: writes should be protected by
> +         * tb_lock, reads by tb_lock or RCU.
> +         */
>           build_page_bitmap(p);
>       }
>       if (p->code_bitmap) {
> @@ -1324,6 +1339,7 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr,
>   
>   /* add the tb in the target page and protect it if necessary
>    *
> + * Called with tb_lock held.
>    * Called with mmap_lock held for user-mode emulation.
>    */
>   static inline void tb_alloc_page(TranslationBlock *tb,
> @@ -1383,6 +1399,7 @@ static inline void tb_alloc_page(TranslationBlock *tb,
>   /* add a new TB and link it to the physical page tables. phys_page2 is
>    * (-1) to indicate that only one page contains the TB.
>    *
> + * Called with tb_lock held.
>    * Called with mmap_lock held for user-mode emulation.
>    */
>   static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
> @@ -1423,7 +1440,10 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
>   }
>   
>   /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
> -   tb[1].tc_ptr. Return NULL if not found */
> + * tb[1].tc_ptr. Return NULL if not found
> + *
> + * Called with tb_lock held.
> + */
>   static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
>   {
>       int m_min, m_max, m;
> @@ -1476,6 +1496,7 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr)
>   }
>   #endif /* !defined(CONFIG_USER_ONLY) */
>   
> +/* Called with tb_lock held.  */
>   void tb_check_watchpoint(CPUState *cpu)
>   {
>       TranslationBlock *tb;
Paolo Bonzini Aug. 13, 2015, 12:59 p.m. UTC | #2
On 13/08/2015 14:51, Frederic Konrad wrote:
>> diff --git a/include/qom/cpu.h b/include/qom/cpu.h
>> index 77bbff2..56b1f4d 100644
>> --- a/include/qom/cpu.h
>> +++ b/include/qom/cpu.h
>> @@ -285,7 +285,10 @@ struct CPUState {
>>         void *env_ptr; /* CPUArchState */
>>       struct TranslationBlock *current_tb;
>> +
>> +    /* Protected by tb_lock.  */
>>       struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
> This is temporary as a first step?

Yes, I now saw that tb_lock has a huge contention in tb_find_fast. :)

I've now extracted parts of your patch "tcg: protect TBContext with
tb_lock" into a separate "tcg: move tb_find_fast outside the tb_lock
critical section" that also applies to user-mode emulation.  That way I
get good scalability on Dhrystone, same as with your branch.

Do you agree with the first 10 patches as a first step towards
upstreaming the MTTCG work?

Paolo

>> +
>>       struct GDBRegisterState *gdb_regs;
>>       int gdb_num_regs;
>>       int gdb_num_g_regs;
>> diff --git a/tcg/tcg.h b/tcg/tcg.h
>> index 0ae648f..a2cad31 100644
>> --- a/tcg/tcg.h
>> +++ b/tcg/tcg.h
>> @@ -590,6 +590,7 @@ static inline bool tcg_op_buf_full(void)
>>     /* pool based memory allocation */
>>   +/* tb_lock must be held for tcg_malloc_internal. */
>>   void *tcg_malloc_internal(TCGContext *s, int size);
>>   void tcg_pool_reset(TCGContext *s);
>>   void tcg_pool_delete(TCGContext *s);
>> @@ -598,6 +599,7 @@ void tb_lock(void);
>>   void tb_unlock(void);
>>   void tb_lock_reset(void);
>>   +/* Called with tb_lock held.  */
>>   static inline void *tcg_malloc(int size)
>>   {
>>       TCGContext *s = &tcg_ctx;
>> diff --git a/translate-all.c b/translate-all.c
>> index edb9cb1..17d3cd1 100644
>> --- a/translate-all.c
>> +++ b/translate-all.c
>> @@ -237,6 +237,7 @@ int cpu_gen_code(CPUArchState *env,
>> TranslationBlock *tb, int *gen_code_size_ptr
>>   }
>>     /* The cpu state corresponding to 'searched_pc' is restored.
>> + * Called with tb_lock held.
>>    */
>>   static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock
>> *tb,
>>                                        uintptr_t searched_pc)
>> @@ -424,6 +425,7 @@ static void page_init(void)
>>   }
>>     /* If alloc=1:
>> + * Called with tb_lock held for system emulation.
>>    * Called with mmap_lock held for user-mode emulation.
>>    */
>>   static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
>> @@ -734,8 +736,12 @@ bool tcg_enabled(void)
>>       return tcg_ctx.code_gen_buffer != NULL;
>>   }
>>   -/* Allocate a new translation block. Flush the translation buffer if
>> -   too many translation blocks or too much generated code. */
>> +/*
>> + * Allocate a new translation block. Flush the translation buffer if
>> + * too many translation blocks or too much generated code.
>> + *
>> + * Called with tb_lock held.
>> + */
>>   static TranslationBlock *tb_alloc(target_ulong pc)
>>   {
> There is the famous tb_flush which needs to be called with tb_lock held
> as well.
> There are several place where it's called.
> 
>>       TranslationBlock *tb;
>> @@ -751,6 +757,7 @@ static TranslationBlock *tb_alloc(target_ulong pc)
>>       return tb;
>>   }
>>   +/* Called with tb_lock held.  */
>>   void tb_free(TranslationBlock *tb)
>>   {
>>       /* In practice this is mostly used for single use temporary TB
>> @@ -859,7 +866,10 @@ static void tb_invalidate_check(target_ulong
>> address)
>>       }
>>   }
>>   -/* verify that all the pages have correct rights for code */
>> +/* verify that all the pages have correct rights for code
>> + *
>> + * Called with tb_lock held.
>> + */
>>   static void tb_page_check(void)
>>   {
>>       TranslationBlock *tb;
>> @@ -947,7 +957,10 @@ static inline void tb_reset_jump(TranslationBlock
>> *tb, int n)
>>       tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr +
>> tb->tb_next_offset[n]));
>>   }
>>   -/* invalidate one TB */
>> +/* invalidate one TB
>> + *
>> + * Called with tb_lock held.
>> + */
>>   void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
>>   {
>>       CPUState *cpu;
>> @@ -1036,7 +1049,7 @@ static void build_page_bitmap(PageDesc *p)
>>   }
>>   #endif
>>   -/* Called with mmap_lock held for user mode emulation.  */
>> +/* Called with tb_lock held, and mmap_lock too for user mode
>> emulation.  */
>>   TranslationBlock *tb_gen_code(CPUState *cpu,
>>                                 target_ulong pc, target_ulong cs_base,
>>                                 int flags, int cflags)
>> @@ -1234,7 +1247,9 @@ void tb_invalidate_phys_page_fast(tb_page_addr_t
>> start, int len)
>>       }
>>       if (!p->code_bitmap &&
>>           ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
>> -        /* build code bitmap */
>> +        /* build code bitmap.  FIXME: writes should be protected by
>> +         * tb_lock, reads by tb_lock or RCU.
>> +         */
>>           build_page_bitmap(p);
>>       }
>>       if (p->code_bitmap) {
>> @@ -1324,6 +1339,7 @@ static void
>> tb_invalidate_phys_page(tb_page_addr_t addr,
>>     /* add the tb in the target page and protect it if necessary
>>    *
>> + * Called with tb_lock held.
>>    * Called with mmap_lock held for user-mode emulation.
>>    */
>>   static inline void tb_alloc_page(TranslationBlock *tb,
>> @@ -1383,6 +1399,7 @@ static inline void
>> tb_alloc_page(TranslationBlock *tb,
>>   /* add a new TB and link it to the physical page tables. phys_page2 is
>>    * (-1) to indicate that only one page contains the TB.
>>    *
>> + * Called with tb_lock held.
>>    * Called with mmap_lock held for user-mode emulation.
>>    */
>>   static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
>> @@ -1423,7 +1440,10 @@ static void tb_link_page(TranslationBlock *tb,
>> tb_page_addr_t phys_pc,
>>   }
>>     /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
>> -   tb[1].tc_ptr. Return NULL if not found */
>> + * tb[1].tc_ptr. Return NULL if not found
>> + *
>> + * Called with tb_lock held.
>> + */
>>   static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
>>   {
>>       int m_min, m_max, m;
>> @@ -1476,6 +1496,7 @@ void tb_invalidate_phys_addr(AddressSpace *as,
>> hwaddr addr)
>>   }
>>   #endif /* !defined(CONFIG_USER_ONLY) */
>>   +/* Called with tb_lock held.  */
>>   void tb_check_watchpoint(CPUState *cpu)
>>   {
>>       TranslationBlock *tb;
>
fred.konrad@greensocs.com Aug. 13, 2015, 1:32 p.m. UTC | #3
On 13/08/2015 14:59, Paolo Bonzini wrote:
>
> On 13/08/2015 14:51, Frederic Konrad wrote:
>>> diff --git a/include/qom/cpu.h b/include/qom/cpu.h
>>> index 77bbff2..56b1f4d 100644
>>> --- a/include/qom/cpu.h
>>> +++ b/include/qom/cpu.h
>>> @@ -285,7 +285,10 @@ struct CPUState {
>>>          void *env_ptr; /* CPUArchState */
>>>        struct TranslationBlock *current_tb;
>>> +
>>> +    /* Protected by tb_lock.  */
>>>        struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
>> This is temporary as a first step?
> Yes, I now saw that tb_lock has a huge contention in tb_find_fast. :)
Yes it is just enormous. Makes MTTCG 2x slower than upstream :).

> I've now extracted parts of your patch "tcg: protect TBContext with
> tb_lock" into a separate "tcg: move tb_find_fast outside the tb_lock
> critical section" that also applies to user-mode emulation.  That way I
> get good scalability on Dhrystone, same as with your branch.

I guess with the whole tlb/tb flush safe? Which is theorically protecting
tb_jmp_cache (or at least let only the right thread accessing it).
The drawback of all that is I'm not sure this is faster when we have a 
lot of
context switches. For tb_flush it's not really a problem as it happen 
approximately
never but the tb_invalidate, tlb_*_flush are more regular.

Fred
>
> Do you agree with the first 10 patches as a first step towards
> upstreaming the MTTCG work?
>
> Paolo
>
>>> +
>>>        struct GDBRegisterState *gdb_regs;
>>>        int gdb_num_regs;
>>>        int gdb_num_g_regs;
>>> diff --git a/tcg/tcg.h b/tcg/tcg.h
>>> index 0ae648f..a2cad31 100644
>>> --- a/tcg/tcg.h
>>> +++ b/tcg/tcg.h
>>> @@ -590,6 +590,7 @@ static inline bool tcg_op_buf_full(void)
>>>      /* pool based memory allocation */
>>>    +/* tb_lock must be held for tcg_malloc_internal. */
>>>    void *tcg_malloc_internal(TCGContext *s, int size);
>>>    void tcg_pool_reset(TCGContext *s);
>>>    void tcg_pool_delete(TCGContext *s);
>>> @@ -598,6 +599,7 @@ void tb_lock(void);
>>>    void tb_unlock(void);
>>>    void tb_lock_reset(void);
>>>    +/* Called with tb_lock held.  */
>>>    static inline void *tcg_malloc(int size)
>>>    {
>>>        TCGContext *s = &tcg_ctx;
>>> diff --git a/translate-all.c b/translate-all.c
>>> index edb9cb1..17d3cd1 100644
>>> --- a/translate-all.c
>>> +++ b/translate-all.c
>>> @@ -237,6 +237,7 @@ int cpu_gen_code(CPUArchState *env,
>>> TranslationBlock *tb, int *gen_code_size_ptr
>>>    }
>>>      /* The cpu state corresponding to 'searched_pc' is restored.
>>> + * Called with tb_lock held.
>>>     */
>>>    static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock
>>> *tb,
>>>                                         uintptr_t searched_pc)
>>> @@ -424,6 +425,7 @@ static void page_init(void)
>>>    }
>>>      /* If alloc=1:
>>> + * Called with tb_lock held for system emulation.
>>>     * Called with mmap_lock held for user-mode emulation.
>>>     */
>>>    static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
>>> @@ -734,8 +736,12 @@ bool tcg_enabled(void)
>>>        return tcg_ctx.code_gen_buffer != NULL;
>>>    }
>>>    -/* Allocate a new translation block. Flush the translation buffer if
>>> -   too many translation blocks or too much generated code. */
>>> +/*
>>> + * Allocate a new translation block. Flush the translation buffer if
>>> + * too many translation blocks or too much generated code.
>>> + *
>>> + * Called with tb_lock held.
>>> + */
>>>    static TranslationBlock *tb_alloc(target_ulong pc)
>>>    {
>> There is the famous tb_flush which needs to be called with tb_lock held
>> as well.
>> There are several place where it's called.
>>
>>>        TranslationBlock *tb;
>>> @@ -751,6 +757,7 @@ static TranslationBlock *tb_alloc(target_ulong pc)
>>>        return tb;
>>>    }
>>>    +/* Called with tb_lock held.  */
>>>    void tb_free(TranslationBlock *tb)
>>>    {
>>>        /* In practice this is mostly used for single use temporary TB
>>> @@ -859,7 +866,10 @@ static void tb_invalidate_check(target_ulong
>>> address)
>>>        }
>>>    }
>>>    -/* verify that all the pages have correct rights for code */
>>> +/* verify that all the pages have correct rights for code
>>> + *
>>> + * Called with tb_lock held.
>>> + */
>>>    static void tb_page_check(void)
>>>    {
>>>        TranslationBlock *tb;
>>> @@ -947,7 +957,10 @@ static inline void tb_reset_jump(TranslationBlock
>>> *tb, int n)
>>>        tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr +
>>> tb->tb_next_offset[n]));
>>>    }
>>>    -/* invalidate one TB */
>>> +/* invalidate one TB
>>> + *
>>> + * Called with tb_lock held.
>>> + */
>>>    void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
>>>    {
>>>        CPUState *cpu;
>>> @@ -1036,7 +1049,7 @@ static void build_page_bitmap(PageDesc *p)
>>>    }
>>>    #endif
>>>    -/* Called with mmap_lock held for user mode emulation.  */
>>> +/* Called with tb_lock held, and mmap_lock too for user mode
>>> emulation.  */
>>>    TranslationBlock *tb_gen_code(CPUState *cpu,
>>>                                  target_ulong pc, target_ulong cs_base,
>>>                                  int flags, int cflags)
>>> @@ -1234,7 +1247,9 @@ void tb_invalidate_phys_page_fast(tb_page_addr_t
>>> start, int len)
>>>        }
>>>        if (!p->code_bitmap &&
>>>            ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
>>> -        /* build code bitmap */
>>> +        /* build code bitmap.  FIXME: writes should be protected by
>>> +         * tb_lock, reads by tb_lock or RCU.
>>> +         */
>>>            build_page_bitmap(p);
>>>        }
>>>        if (p->code_bitmap) {
>>> @@ -1324,6 +1339,7 @@ static void
>>> tb_invalidate_phys_page(tb_page_addr_t addr,
>>>      /* add the tb in the target page and protect it if necessary
>>>     *
>>> + * Called with tb_lock held.
>>>     * Called with mmap_lock held for user-mode emulation.
>>>     */
>>>    static inline void tb_alloc_page(TranslationBlock *tb,
>>> @@ -1383,6 +1399,7 @@ static inline void
>>> tb_alloc_page(TranslationBlock *tb,
>>>    /* add a new TB and link it to the physical page tables. phys_page2 is
>>>     * (-1) to indicate that only one page contains the TB.
>>>     *
>>> + * Called with tb_lock held.
>>>     * Called with mmap_lock held for user-mode emulation.
>>>     */
>>>    static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
>>> @@ -1423,7 +1440,10 @@ static void tb_link_page(TranslationBlock *tb,
>>> tb_page_addr_t phys_pc,
>>>    }
>>>      /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
>>> -   tb[1].tc_ptr. Return NULL if not found */
>>> + * tb[1].tc_ptr. Return NULL if not found
>>> + *
>>> + * Called with tb_lock held.
>>> + */
>>>    static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
>>>    {
>>>        int m_min, m_max, m;
>>> @@ -1476,6 +1496,7 @@ void tb_invalidate_phys_addr(AddressSpace *as,
>>> hwaddr addr)
>>>    }
>>>    #endif /* !defined(CONFIG_USER_ONLY) */
>>>    +/* Called with tb_lock held.  */
>>>    void tb_check_watchpoint(CPUState *cpu)
>>>    {
>>>        TranslationBlock *tb;
Paolo Bonzini Aug. 13, 2015, 2:39 p.m. UTC | #4
On 13/08/2015 15:32, Frederic Konrad wrote:
>> I've now extracted parts of your patch "tcg: protect TBContext with
>> tb_lock" into a separate "tcg: move tb_find_fast outside the tb_lock
>> critical section" that also applies to user-mode emulation.  That way I
>> get good scalability on Dhrystone, same as with your branch.
> 
> I guess with the whole tlb/tb flush safe?

Yes, that should go before the lock-free tb_find_fast.

> Which is theorically
> protecting tb_jmp_cache (or at least let only the right thread
> accessing it). The drawback of all that is I'm not sure this is
> faster when we have a lot of context switches. For tb_flush it's not
> really a problem as it happen approximately never but the
> tb_invalidate, tlb_*_flush are more regular.

TBs are physically-tagged so invalidates are not too frequent unless the
guest does self-modifying code or swaps.  They shouldn't be a source of
tb_lock contention except if you do SMC (not just dynamic recompilation:
really self-modifying code).

TBs are a good match for RCU overall.  TB data is immutable if you
sacrifice the tb_phys_hash optimization, so there's no need to copy
anything, hence modifications to the lists are rare and reclamations
(tb_flush) are extremely rare.

TLB shootdowns of course will slow down things, but those are a separate
issue and we don't really care: the ARM flush-all-CPUs is probably
faster than an IPI anyway.

Paolo
Peter Maydell Aug. 13, 2015, 3:32 p.m. UTC | #5
On 13 August 2015 at 15:39, Paolo Bonzini <pbonzini@redhat.com> wrote:
> TBs are physically-tagged so invalidates are not too frequent unless the
> guest does self-modifying code or swaps.  They shouldn't be a source of
> tb_lock contention except if you do SMC (not just dynamic recompilation:
> really self-modifying code).

Do guests with writeable data and code in the same page also
get treated as SMC for these purposes? (That's more common
than real SMC.)

-- PMM
Paolo Bonzini Aug. 13, 2015, 4:20 p.m. UTC | #6
On 13/08/2015 17:32, Peter Maydell wrote:
>> > TBs are physically-tagged so invalidates are not too frequent unless the
>> > guest does self-modifying code or swaps.  They shouldn't be a source of
>> > tb_lock contention except if you do SMC (not just dynamic recompilation:
>> > really self-modifying code).
> Do guests with writeable data and code in the same page also
> get treated as SMC for these purposes? (That's more common
> than real SMC.)

Not after a short while, when the code_bitmap kicks in.

Paolo
diff mbox

Patch

diff --git a/exec.c b/exec.c
index 54cd70a..856a859 100644
--- a/exec.c
+++ b/exec.c
@@ -748,6 +748,7 @@  int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 {
     CPUBreakpoint *bp;
 
+    /* TODO: locking (RCU?) */
     bp = g_malloc(sizeof(*bp));
 
     bp->pc = pc;
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index b3f900a..943d97a 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -74,6 +74,7 @@  typedef struct TranslationBlock TranslationBlock;
 
 void gen_intermediate_code(CPUArchState *env, struct TranslationBlock *tb);
 void gen_intermediate_code_pc(CPUArchState *env, struct TranslationBlock *tb);
+/* Called with tb_lock held.  */
 void restore_state_to_opc(CPUArchState *env, struct TranslationBlock *tb,
                           int pc_pos);
 
@@ -278,6 +279,7 @@  static inline void tb_set_jmp_target(TranslationBlock *tb,
 
 #endif
 
+/* Called with tb_lock held.  */
 static inline void tb_add_jump(TranslationBlock *tb, int n,
                                TranslationBlock *tb_next)
 {
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 77bbff2..56b1f4d 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -285,7 +285,10 @@  struct CPUState {
 
     void *env_ptr; /* CPUArchState */
     struct TranslationBlock *current_tb;
+
+    /* Protected by tb_lock.  */
     struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
+
     struct GDBRegisterState *gdb_regs;
     int gdb_num_regs;
     int gdb_num_g_regs;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 0ae648f..a2cad31 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -590,6 +590,7 @@  static inline bool tcg_op_buf_full(void)
 
 /* pool based memory allocation */
 
+/* tb_lock must be held for tcg_malloc_internal. */
 void *tcg_malloc_internal(TCGContext *s, int size);
 void tcg_pool_reset(TCGContext *s);
 void tcg_pool_delete(TCGContext *s);
@@ -598,6 +599,7 @@  void tb_lock(void);
 void tb_unlock(void);
 void tb_lock_reset(void);
 
+/* Called with tb_lock held.  */
 static inline void *tcg_malloc(int size)
 {
     TCGContext *s = &tcg_ctx;
diff --git a/translate-all.c b/translate-all.c
index edb9cb1..17d3cd1 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -237,6 +237,7 @@  int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr
 }
 
 /* The cpu state corresponding to 'searched_pc' is restored.
+ * Called with tb_lock held.
  */
 static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
                                      uintptr_t searched_pc)
@@ -424,6 +425,7 @@  static void page_init(void)
 }
 
 /* If alloc=1:
+ * Called with tb_lock held for system emulation.
  * Called with mmap_lock held for user-mode emulation.
  */
 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
@@ -734,8 +736,12 @@  bool tcg_enabled(void)
     return tcg_ctx.code_gen_buffer != NULL;
 }
 
-/* Allocate a new translation block. Flush the translation buffer if
-   too many translation blocks or too much generated code. */
+/*
+ * Allocate a new translation block. Flush the translation buffer if
+ * too many translation blocks or too much generated code.
+ *
+ * Called with tb_lock held.
+ */
 static TranslationBlock *tb_alloc(target_ulong pc)
 {
     TranslationBlock *tb;
@@ -751,6 +757,7 @@  static TranslationBlock *tb_alloc(target_ulong pc)
     return tb;
 }
 
+/* Called with tb_lock held.  */
 void tb_free(TranslationBlock *tb)
 {
     /* In practice this is mostly used for single use temporary TB
@@ -859,7 +866,10 @@  static void tb_invalidate_check(target_ulong address)
     }
 }
 
-/* verify that all the pages have correct rights for code */
+/* verify that all the pages have correct rights for code
+ *
+ * Called with tb_lock held.
+ */
 static void tb_page_check(void)
 {
     TranslationBlock *tb;
@@ -947,7 +957,10 @@  static inline void tb_reset_jump(TranslationBlock *tb, int n)
     tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
 }
 
-/* invalidate one TB */
+/* invalidate one TB
+ *
+ * Called with tb_lock held.
+ */
 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
 {
     CPUState *cpu;
@@ -1036,7 +1049,7 @@  static void build_page_bitmap(PageDesc *p)
 }
 #endif
 
-/* Called with mmap_lock held for user mode emulation.  */
+/* Called with tb_lock held, and mmap_lock too for user mode emulation.  */
 TranslationBlock *tb_gen_code(CPUState *cpu,
                               target_ulong pc, target_ulong cs_base,
                               int flags, int cflags)
@@ -1234,7 +1247,9 @@  void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
     }
     if (!p->code_bitmap &&
         ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
-        /* build code bitmap */
+        /* build code bitmap.  FIXME: writes should be protected by
+         * tb_lock, reads by tb_lock or RCU.
+         */
         build_page_bitmap(p);
     }
     if (p->code_bitmap) {
@@ -1324,6 +1339,7 @@  static void tb_invalidate_phys_page(tb_page_addr_t addr,
 
 /* add the tb in the target page and protect it if necessary
  *
+ * Called with tb_lock held.
  * Called with mmap_lock held for user-mode emulation.
  */
 static inline void tb_alloc_page(TranslationBlock *tb,
@@ -1383,6 +1399,7 @@  static inline void tb_alloc_page(TranslationBlock *tb,
 /* add a new TB and link it to the physical page tables. phys_page2 is
  * (-1) to indicate that only one page contains the TB.
  *
+ * Called with tb_lock held.
  * Called with mmap_lock held for user-mode emulation.
  */
 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
@@ -1423,7 +1440,10 @@  static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
 }
 
 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
-   tb[1].tc_ptr. Return NULL if not found */
+ * tb[1].tc_ptr. Return NULL if not found
+ *
+ * Called with tb_lock held.
+ */
 static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
 {
     int m_min, m_max, m;
@@ -1476,6 +1496,7 @@  void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr)
 }
 #endif /* !defined(CONFIG_USER_ONLY) */
 
+/* Called with tb_lock held.  */
 void tb_check_watchpoint(CPUState *cpu)
 {
     TranslationBlock *tb;