Message ID | 1439397664-70734-12-git-send-email-pbonzini@redhat.com |
---|---|
State | New |
Headers | show |
On 12/08/2015 18:41, Paolo Bonzini wrote: > softmmu requires more functions to be thread-safe, because translation > blocks can be invalidated from e.g. notdirty callbacks. Probably the > same holds for user-mode emulation, it's just that no one has ever > tried to produce a coherent locking there. > > This patch will guide the introduction of more tb_lock and tb_unlock > calls for system emulation. > > Note that after this patch some (most) of the mentioned functions are > still called outside tb_lock/tb_unlock. The next one will rectify this. > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > exec.c | 1 + > include/exec/exec-all.h | 2 ++ > include/qom/cpu.h | 3 +++ > tcg/tcg.h | 2 ++ > translate-all.c | 35 ++++++++++++++++++++++++++++------- > 5 files changed, 36 insertions(+), 7 deletions(-) > > diff --git a/exec.c b/exec.c > index 54cd70a..856a859 100644 > --- a/exec.c > +++ b/exec.c > @@ -748,6 +748,7 @@ int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags, > { > CPUBreakpoint *bp; > > + /* TODO: locking (RCU?) */ > bp = g_malloc(sizeof(*bp)); > > bp->pc = pc; > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index b3f900a..943d97a 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -74,6 +74,7 @@ typedef struct TranslationBlock TranslationBlock; > > void gen_intermediate_code(CPUArchState *env, struct TranslationBlock *tb); > void gen_intermediate_code_pc(CPUArchState *env, struct TranslationBlock *tb); > +/* Called with tb_lock held. */ > void restore_state_to_opc(CPUArchState *env, struct TranslationBlock *tb, > int pc_pos); > > @@ -278,6 +279,7 @@ static inline void tb_set_jmp_target(TranslationBlock *tb, > > #endif > > +/* Called with tb_lock held. */ > static inline void tb_add_jump(TranslationBlock *tb, int n, > TranslationBlock *tb_next) > { > diff --git a/include/qom/cpu.h b/include/qom/cpu.h > index 77bbff2..56b1f4d 100644 > --- a/include/qom/cpu.h > +++ b/include/qom/cpu.h > @@ -285,7 +285,10 @@ struct CPUState { > > void *env_ptr; /* CPUArchState */ > struct TranslationBlock *current_tb; > + > + /* Protected by tb_lock. */ > struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; This is temporary as a first step? > + > struct GDBRegisterState *gdb_regs; > int gdb_num_regs; > int gdb_num_g_regs; > diff --git a/tcg/tcg.h b/tcg/tcg.h > index 0ae648f..a2cad31 100644 > --- a/tcg/tcg.h > +++ b/tcg/tcg.h > @@ -590,6 +590,7 @@ static inline bool tcg_op_buf_full(void) > > /* pool based memory allocation */ > > +/* tb_lock must be held for tcg_malloc_internal. */ > void *tcg_malloc_internal(TCGContext *s, int size); > void tcg_pool_reset(TCGContext *s); > void tcg_pool_delete(TCGContext *s); > @@ -598,6 +599,7 @@ void tb_lock(void); > void tb_unlock(void); > void tb_lock_reset(void); > > +/* Called with tb_lock held. */ > static inline void *tcg_malloc(int size) > { > TCGContext *s = &tcg_ctx; > diff --git a/translate-all.c b/translate-all.c > index edb9cb1..17d3cd1 100644 > --- a/translate-all.c > +++ b/translate-all.c > @@ -237,6 +237,7 @@ int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr > } > > /* The cpu state corresponding to 'searched_pc' is restored. > + * Called with tb_lock held. > */ > static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, > uintptr_t searched_pc) > @@ -424,6 +425,7 @@ static void page_init(void) > } > > /* If alloc=1: > + * Called with tb_lock held for system emulation. > * Called with mmap_lock held for user-mode emulation. > */ > static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc) > @@ -734,8 +736,12 @@ bool tcg_enabled(void) > return tcg_ctx.code_gen_buffer != NULL; > } > > -/* Allocate a new translation block. Flush the translation buffer if > - too many translation blocks or too much generated code. */ > +/* > + * Allocate a new translation block. Flush the translation buffer if > + * too many translation blocks or too much generated code. > + * > + * Called with tb_lock held. > + */ > static TranslationBlock *tb_alloc(target_ulong pc) > { There is the famous tb_flush which needs to be called with tb_lock held as well. There are several place where it's called. > TranslationBlock *tb; > @@ -751,6 +757,7 @@ static TranslationBlock *tb_alloc(target_ulong pc) > return tb; > } > > +/* Called with tb_lock held. */ > void tb_free(TranslationBlock *tb) > { > /* In practice this is mostly used for single use temporary TB > @@ -859,7 +866,10 @@ static void tb_invalidate_check(target_ulong address) > } > } > > -/* verify that all the pages have correct rights for code */ > +/* verify that all the pages have correct rights for code > + * > + * Called with tb_lock held. > + */ > static void tb_page_check(void) > { > TranslationBlock *tb; > @@ -947,7 +957,10 @@ static inline void tb_reset_jump(TranslationBlock *tb, int n) > tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n])); > } > > -/* invalidate one TB */ > +/* invalidate one TB > + * > + * Called with tb_lock held. > + */ > void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) > { > CPUState *cpu; > @@ -1036,7 +1049,7 @@ static void build_page_bitmap(PageDesc *p) > } > #endif > > -/* Called with mmap_lock held for user mode emulation. */ > +/* Called with tb_lock held, and mmap_lock too for user mode emulation. */ > TranslationBlock *tb_gen_code(CPUState *cpu, > target_ulong pc, target_ulong cs_base, > int flags, int cflags) > @@ -1234,7 +1247,9 @@ void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len) > } > if (!p->code_bitmap && > ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) { > - /* build code bitmap */ > + /* build code bitmap. FIXME: writes should be protected by > + * tb_lock, reads by tb_lock or RCU. > + */ > build_page_bitmap(p); > } > if (p->code_bitmap) { > @@ -1324,6 +1339,7 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr, > > /* add the tb in the target page and protect it if necessary > * > + * Called with tb_lock held. > * Called with mmap_lock held for user-mode emulation. > */ > static inline void tb_alloc_page(TranslationBlock *tb, > @@ -1383,6 +1399,7 @@ static inline void tb_alloc_page(TranslationBlock *tb, > /* add a new TB and link it to the physical page tables. phys_page2 is > * (-1) to indicate that only one page contains the TB. > * > + * Called with tb_lock held. > * Called with mmap_lock held for user-mode emulation. > */ > static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, > @@ -1423,7 +1440,10 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, > } > > /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr < > - tb[1].tc_ptr. Return NULL if not found */ > + * tb[1].tc_ptr. Return NULL if not found > + * > + * Called with tb_lock held. > + */ > static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) > { > int m_min, m_max, m; > @@ -1476,6 +1496,7 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr) > } > #endif /* !defined(CONFIG_USER_ONLY) */ > > +/* Called with tb_lock held. */ > void tb_check_watchpoint(CPUState *cpu) > { > TranslationBlock *tb;
On 13/08/2015 14:51, Frederic Konrad wrote: >> diff --git a/include/qom/cpu.h b/include/qom/cpu.h >> index 77bbff2..56b1f4d 100644 >> --- a/include/qom/cpu.h >> +++ b/include/qom/cpu.h >> @@ -285,7 +285,10 @@ struct CPUState { >> void *env_ptr; /* CPUArchState */ >> struct TranslationBlock *current_tb; >> + >> + /* Protected by tb_lock. */ >> struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; > This is temporary as a first step? Yes, I now saw that tb_lock has a huge contention in tb_find_fast. :) I've now extracted parts of your patch "tcg: protect TBContext with tb_lock" into a separate "tcg: move tb_find_fast outside the tb_lock critical section" that also applies to user-mode emulation. That way I get good scalability on Dhrystone, same as with your branch. Do you agree with the first 10 patches as a first step towards upstreaming the MTTCG work? Paolo >> + >> struct GDBRegisterState *gdb_regs; >> int gdb_num_regs; >> int gdb_num_g_regs; >> diff --git a/tcg/tcg.h b/tcg/tcg.h >> index 0ae648f..a2cad31 100644 >> --- a/tcg/tcg.h >> +++ b/tcg/tcg.h >> @@ -590,6 +590,7 @@ static inline bool tcg_op_buf_full(void) >> /* pool based memory allocation */ >> +/* tb_lock must be held for tcg_malloc_internal. */ >> void *tcg_malloc_internal(TCGContext *s, int size); >> void tcg_pool_reset(TCGContext *s); >> void tcg_pool_delete(TCGContext *s); >> @@ -598,6 +599,7 @@ void tb_lock(void); >> void tb_unlock(void); >> void tb_lock_reset(void); >> +/* Called with tb_lock held. */ >> static inline void *tcg_malloc(int size) >> { >> TCGContext *s = &tcg_ctx; >> diff --git a/translate-all.c b/translate-all.c >> index edb9cb1..17d3cd1 100644 >> --- a/translate-all.c >> +++ b/translate-all.c >> @@ -237,6 +237,7 @@ int cpu_gen_code(CPUArchState *env, >> TranslationBlock *tb, int *gen_code_size_ptr >> } >> /* The cpu state corresponding to 'searched_pc' is restored. >> + * Called with tb_lock held. >> */ >> static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock >> *tb, >> uintptr_t searched_pc) >> @@ -424,6 +425,7 @@ static void page_init(void) >> } >> /* If alloc=1: >> + * Called with tb_lock held for system emulation. >> * Called with mmap_lock held for user-mode emulation. >> */ >> static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc) >> @@ -734,8 +736,12 @@ bool tcg_enabled(void) >> return tcg_ctx.code_gen_buffer != NULL; >> } >> -/* Allocate a new translation block. Flush the translation buffer if >> - too many translation blocks or too much generated code. */ >> +/* >> + * Allocate a new translation block. Flush the translation buffer if >> + * too many translation blocks or too much generated code. >> + * >> + * Called with tb_lock held. >> + */ >> static TranslationBlock *tb_alloc(target_ulong pc) >> { > There is the famous tb_flush which needs to be called with tb_lock held > as well. > There are several place where it's called. > >> TranslationBlock *tb; >> @@ -751,6 +757,7 @@ static TranslationBlock *tb_alloc(target_ulong pc) >> return tb; >> } >> +/* Called with tb_lock held. */ >> void tb_free(TranslationBlock *tb) >> { >> /* In practice this is mostly used for single use temporary TB >> @@ -859,7 +866,10 @@ static void tb_invalidate_check(target_ulong >> address) >> } >> } >> -/* verify that all the pages have correct rights for code */ >> +/* verify that all the pages have correct rights for code >> + * >> + * Called with tb_lock held. >> + */ >> static void tb_page_check(void) >> { >> TranslationBlock *tb; >> @@ -947,7 +957,10 @@ static inline void tb_reset_jump(TranslationBlock >> *tb, int n) >> tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + >> tb->tb_next_offset[n])); >> } >> -/* invalidate one TB */ >> +/* invalidate one TB >> + * >> + * Called with tb_lock held. >> + */ >> void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) >> { >> CPUState *cpu; >> @@ -1036,7 +1049,7 @@ static void build_page_bitmap(PageDesc *p) >> } >> #endif >> -/* Called with mmap_lock held for user mode emulation. */ >> +/* Called with tb_lock held, and mmap_lock too for user mode >> emulation. */ >> TranslationBlock *tb_gen_code(CPUState *cpu, >> target_ulong pc, target_ulong cs_base, >> int flags, int cflags) >> @@ -1234,7 +1247,9 @@ void tb_invalidate_phys_page_fast(tb_page_addr_t >> start, int len) >> } >> if (!p->code_bitmap && >> ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) { >> - /* build code bitmap */ >> + /* build code bitmap. FIXME: writes should be protected by >> + * tb_lock, reads by tb_lock or RCU. >> + */ >> build_page_bitmap(p); >> } >> if (p->code_bitmap) { >> @@ -1324,6 +1339,7 @@ static void >> tb_invalidate_phys_page(tb_page_addr_t addr, >> /* add the tb in the target page and protect it if necessary >> * >> + * Called with tb_lock held. >> * Called with mmap_lock held for user-mode emulation. >> */ >> static inline void tb_alloc_page(TranslationBlock *tb, >> @@ -1383,6 +1399,7 @@ static inline void >> tb_alloc_page(TranslationBlock *tb, >> /* add a new TB and link it to the physical page tables. phys_page2 is >> * (-1) to indicate that only one page contains the TB. >> * >> + * Called with tb_lock held. >> * Called with mmap_lock held for user-mode emulation. >> */ >> static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, >> @@ -1423,7 +1440,10 @@ static void tb_link_page(TranslationBlock *tb, >> tb_page_addr_t phys_pc, >> } >> /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr < >> - tb[1].tc_ptr. Return NULL if not found */ >> + * tb[1].tc_ptr. Return NULL if not found >> + * >> + * Called with tb_lock held. >> + */ >> static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) >> { >> int m_min, m_max, m; >> @@ -1476,6 +1496,7 @@ void tb_invalidate_phys_addr(AddressSpace *as, >> hwaddr addr) >> } >> #endif /* !defined(CONFIG_USER_ONLY) */ >> +/* Called with tb_lock held. */ >> void tb_check_watchpoint(CPUState *cpu) >> { >> TranslationBlock *tb; >
On 13/08/2015 14:59, Paolo Bonzini wrote: > > On 13/08/2015 14:51, Frederic Konrad wrote: >>> diff --git a/include/qom/cpu.h b/include/qom/cpu.h >>> index 77bbff2..56b1f4d 100644 >>> --- a/include/qom/cpu.h >>> +++ b/include/qom/cpu.h >>> @@ -285,7 +285,10 @@ struct CPUState { >>> void *env_ptr; /* CPUArchState */ >>> struct TranslationBlock *current_tb; >>> + >>> + /* Protected by tb_lock. */ >>> struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; >> This is temporary as a first step? > Yes, I now saw that tb_lock has a huge contention in tb_find_fast. :) Yes it is just enormous. Makes MTTCG 2x slower than upstream :). > I've now extracted parts of your patch "tcg: protect TBContext with > tb_lock" into a separate "tcg: move tb_find_fast outside the tb_lock > critical section" that also applies to user-mode emulation. That way I > get good scalability on Dhrystone, same as with your branch. I guess with the whole tlb/tb flush safe? Which is theorically protecting tb_jmp_cache (or at least let only the right thread accessing it). The drawback of all that is I'm not sure this is faster when we have a lot of context switches. For tb_flush it's not really a problem as it happen approximately never but the tb_invalidate, tlb_*_flush are more regular. Fred > > Do you agree with the first 10 patches as a first step towards > upstreaming the MTTCG work? > > Paolo > >>> + >>> struct GDBRegisterState *gdb_regs; >>> int gdb_num_regs; >>> int gdb_num_g_regs; >>> diff --git a/tcg/tcg.h b/tcg/tcg.h >>> index 0ae648f..a2cad31 100644 >>> --- a/tcg/tcg.h >>> +++ b/tcg/tcg.h >>> @@ -590,6 +590,7 @@ static inline bool tcg_op_buf_full(void) >>> /* pool based memory allocation */ >>> +/* tb_lock must be held for tcg_malloc_internal. */ >>> void *tcg_malloc_internal(TCGContext *s, int size); >>> void tcg_pool_reset(TCGContext *s); >>> void tcg_pool_delete(TCGContext *s); >>> @@ -598,6 +599,7 @@ void tb_lock(void); >>> void tb_unlock(void); >>> void tb_lock_reset(void); >>> +/* Called with tb_lock held. */ >>> static inline void *tcg_malloc(int size) >>> { >>> TCGContext *s = &tcg_ctx; >>> diff --git a/translate-all.c b/translate-all.c >>> index edb9cb1..17d3cd1 100644 >>> --- a/translate-all.c >>> +++ b/translate-all.c >>> @@ -237,6 +237,7 @@ int cpu_gen_code(CPUArchState *env, >>> TranslationBlock *tb, int *gen_code_size_ptr >>> } >>> /* The cpu state corresponding to 'searched_pc' is restored. >>> + * Called with tb_lock held. >>> */ >>> static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock >>> *tb, >>> uintptr_t searched_pc) >>> @@ -424,6 +425,7 @@ static void page_init(void) >>> } >>> /* If alloc=1: >>> + * Called with tb_lock held for system emulation. >>> * Called with mmap_lock held for user-mode emulation. >>> */ >>> static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc) >>> @@ -734,8 +736,12 @@ bool tcg_enabled(void) >>> return tcg_ctx.code_gen_buffer != NULL; >>> } >>> -/* Allocate a new translation block. Flush the translation buffer if >>> - too many translation blocks or too much generated code. */ >>> +/* >>> + * Allocate a new translation block. Flush the translation buffer if >>> + * too many translation blocks or too much generated code. >>> + * >>> + * Called with tb_lock held. >>> + */ >>> static TranslationBlock *tb_alloc(target_ulong pc) >>> { >> There is the famous tb_flush which needs to be called with tb_lock held >> as well. >> There are several place where it's called. >> >>> TranslationBlock *tb; >>> @@ -751,6 +757,7 @@ static TranslationBlock *tb_alloc(target_ulong pc) >>> return tb; >>> } >>> +/* Called with tb_lock held. */ >>> void tb_free(TranslationBlock *tb) >>> { >>> /* In practice this is mostly used for single use temporary TB >>> @@ -859,7 +866,10 @@ static void tb_invalidate_check(target_ulong >>> address) >>> } >>> } >>> -/* verify that all the pages have correct rights for code */ >>> +/* verify that all the pages have correct rights for code >>> + * >>> + * Called with tb_lock held. >>> + */ >>> static void tb_page_check(void) >>> { >>> TranslationBlock *tb; >>> @@ -947,7 +957,10 @@ static inline void tb_reset_jump(TranslationBlock >>> *tb, int n) >>> tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + >>> tb->tb_next_offset[n])); >>> } >>> -/* invalidate one TB */ >>> +/* invalidate one TB >>> + * >>> + * Called with tb_lock held. >>> + */ >>> void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) >>> { >>> CPUState *cpu; >>> @@ -1036,7 +1049,7 @@ static void build_page_bitmap(PageDesc *p) >>> } >>> #endif >>> -/* Called with mmap_lock held for user mode emulation. */ >>> +/* Called with tb_lock held, and mmap_lock too for user mode >>> emulation. */ >>> TranslationBlock *tb_gen_code(CPUState *cpu, >>> target_ulong pc, target_ulong cs_base, >>> int flags, int cflags) >>> @@ -1234,7 +1247,9 @@ void tb_invalidate_phys_page_fast(tb_page_addr_t >>> start, int len) >>> } >>> if (!p->code_bitmap && >>> ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) { >>> - /* build code bitmap */ >>> + /* build code bitmap. FIXME: writes should be protected by >>> + * tb_lock, reads by tb_lock or RCU. >>> + */ >>> build_page_bitmap(p); >>> } >>> if (p->code_bitmap) { >>> @@ -1324,6 +1339,7 @@ static void >>> tb_invalidate_phys_page(tb_page_addr_t addr, >>> /* add the tb in the target page and protect it if necessary >>> * >>> + * Called with tb_lock held. >>> * Called with mmap_lock held for user-mode emulation. >>> */ >>> static inline void tb_alloc_page(TranslationBlock *tb, >>> @@ -1383,6 +1399,7 @@ static inline void >>> tb_alloc_page(TranslationBlock *tb, >>> /* add a new TB and link it to the physical page tables. phys_page2 is >>> * (-1) to indicate that only one page contains the TB. >>> * >>> + * Called with tb_lock held. >>> * Called with mmap_lock held for user-mode emulation. >>> */ >>> static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, >>> @@ -1423,7 +1440,10 @@ static void tb_link_page(TranslationBlock *tb, >>> tb_page_addr_t phys_pc, >>> } >>> /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr < >>> - tb[1].tc_ptr. Return NULL if not found */ >>> + * tb[1].tc_ptr. Return NULL if not found >>> + * >>> + * Called with tb_lock held. >>> + */ >>> static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) >>> { >>> int m_min, m_max, m; >>> @@ -1476,6 +1496,7 @@ void tb_invalidate_phys_addr(AddressSpace *as, >>> hwaddr addr) >>> } >>> #endif /* !defined(CONFIG_USER_ONLY) */ >>> +/* Called with tb_lock held. */ >>> void tb_check_watchpoint(CPUState *cpu) >>> { >>> TranslationBlock *tb;
On 13/08/2015 15:32, Frederic Konrad wrote: >> I've now extracted parts of your patch "tcg: protect TBContext with >> tb_lock" into a separate "tcg: move tb_find_fast outside the tb_lock >> critical section" that also applies to user-mode emulation. That way I >> get good scalability on Dhrystone, same as with your branch. > > I guess with the whole tlb/tb flush safe? Yes, that should go before the lock-free tb_find_fast. > Which is theorically > protecting tb_jmp_cache (or at least let only the right thread > accessing it). The drawback of all that is I'm not sure this is > faster when we have a lot of context switches. For tb_flush it's not > really a problem as it happen approximately never but the > tb_invalidate, tlb_*_flush are more regular. TBs are physically-tagged so invalidates are not too frequent unless the guest does self-modifying code or swaps. They shouldn't be a source of tb_lock contention except if you do SMC (not just dynamic recompilation: really self-modifying code). TBs are a good match for RCU overall. TB data is immutable if you sacrifice the tb_phys_hash optimization, so there's no need to copy anything, hence modifications to the lists are rare and reclamations (tb_flush) are extremely rare. TLB shootdowns of course will slow down things, but those are a separate issue and we don't really care: the ARM flush-all-CPUs is probably faster than an IPI anyway. Paolo
On 13 August 2015 at 15:39, Paolo Bonzini <pbonzini@redhat.com> wrote: > TBs are physically-tagged so invalidates are not too frequent unless the > guest does self-modifying code or swaps. They shouldn't be a source of > tb_lock contention except if you do SMC (not just dynamic recompilation: > really self-modifying code). Do guests with writeable data and code in the same page also get treated as SMC for these purposes? (That's more common than real SMC.) -- PMM
On 13/08/2015 17:32, Peter Maydell wrote: >> > TBs are physically-tagged so invalidates are not too frequent unless the >> > guest does self-modifying code or swaps. They shouldn't be a source of >> > tb_lock contention except if you do SMC (not just dynamic recompilation: >> > really self-modifying code). > Do guests with writeable data and code in the same page also > get treated as SMC for these purposes? (That's more common > than real SMC.) Not after a short while, when the code_bitmap kicks in. Paolo
diff --git a/exec.c b/exec.c index 54cd70a..856a859 100644 --- a/exec.c +++ b/exec.c @@ -748,6 +748,7 @@ int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags, { CPUBreakpoint *bp; + /* TODO: locking (RCU?) */ bp = g_malloc(sizeof(*bp)); bp->pc = pc; diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index b3f900a..943d97a 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -74,6 +74,7 @@ typedef struct TranslationBlock TranslationBlock; void gen_intermediate_code(CPUArchState *env, struct TranslationBlock *tb); void gen_intermediate_code_pc(CPUArchState *env, struct TranslationBlock *tb); +/* Called with tb_lock held. */ void restore_state_to_opc(CPUArchState *env, struct TranslationBlock *tb, int pc_pos); @@ -278,6 +279,7 @@ static inline void tb_set_jmp_target(TranslationBlock *tb, #endif +/* Called with tb_lock held. */ static inline void tb_add_jump(TranslationBlock *tb, int n, TranslationBlock *tb_next) { diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 77bbff2..56b1f4d 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -285,7 +285,10 @@ struct CPUState { void *env_ptr; /* CPUArchState */ struct TranslationBlock *current_tb; + + /* Protected by tb_lock. */ struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; + struct GDBRegisterState *gdb_regs; int gdb_num_regs; int gdb_num_g_regs; diff --git a/tcg/tcg.h b/tcg/tcg.h index 0ae648f..a2cad31 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -590,6 +590,7 @@ static inline bool tcg_op_buf_full(void) /* pool based memory allocation */ +/* tb_lock must be held for tcg_malloc_internal. */ void *tcg_malloc_internal(TCGContext *s, int size); void tcg_pool_reset(TCGContext *s); void tcg_pool_delete(TCGContext *s); @@ -598,6 +599,7 @@ void tb_lock(void); void tb_unlock(void); void tb_lock_reset(void); +/* Called with tb_lock held. */ static inline void *tcg_malloc(int size) { TCGContext *s = &tcg_ctx; diff --git a/translate-all.c b/translate-all.c index edb9cb1..17d3cd1 100644 --- a/translate-all.c +++ b/translate-all.c @@ -237,6 +237,7 @@ int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr } /* The cpu state corresponding to 'searched_pc' is restored. + * Called with tb_lock held. */ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, uintptr_t searched_pc) @@ -424,6 +425,7 @@ static void page_init(void) } /* If alloc=1: + * Called with tb_lock held for system emulation. * Called with mmap_lock held for user-mode emulation. */ static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc) @@ -734,8 +736,12 @@ bool tcg_enabled(void) return tcg_ctx.code_gen_buffer != NULL; } -/* Allocate a new translation block. Flush the translation buffer if - too many translation blocks or too much generated code. */ +/* + * Allocate a new translation block. Flush the translation buffer if + * too many translation blocks or too much generated code. + * + * Called with tb_lock held. + */ static TranslationBlock *tb_alloc(target_ulong pc) { TranslationBlock *tb; @@ -751,6 +757,7 @@ static TranslationBlock *tb_alloc(target_ulong pc) return tb; } +/* Called with tb_lock held. */ void tb_free(TranslationBlock *tb) { /* In practice this is mostly used for single use temporary TB @@ -859,7 +866,10 @@ static void tb_invalidate_check(target_ulong address) } } -/* verify that all the pages have correct rights for code */ +/* verify that all the pages have correct rights for code + * + * Called with tb_lock held. + */ static void tb_page_check(void) { TranslationBlock *tb; @@ -947,7 +957,10 @@ static inline void tb_reset_jump(TranslationBlock *tb, int n) tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n])); } -/* invalidate one TB */ +/* invalidate one TB + * + * Called with tb_lock held. + */ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) { CPUState *cpu; @@ -1036,7 +1049,7 @@ static void build_page_bitmap(PageDesc *p) } #endif -/* Called with mmap_lock held for user mode emulation. */ +/* Called with tb_lock held, and mmap_lock too for user mode emulation. */ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc, target_ulong cs_base, int flags, int cflags) @@ -1234,7 +1247,9 @@ void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len) } if (!p->code_bitmap && ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) { - /* build code bitmap */ + /* build code bitmap. FIXME: writes should be protected by + * tb_lock, reads by tb_lock or RCU. + */ build_page_bitmap(p); } if (p->code_bitmap) { @@ -1324,6 +1339,7 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr, /* add the tb in the target page and protect it if necessary * + * Called with tb_lock held. * Called with mmap_lock held for user-mode emulation. */ static inline void tb_alloc_page(TranslationBlock *tb, @@ -1383,6 +1399,7 @@ static inline void tb_alloc_page(TranslationBlock *tb, /* add a new TB and link it to the physical page tables. phys_page2 is * (-1) to indicate that only one page contains the TB. * + * Called with tb_lock held. * Called with mmap_lock held for user-mode emulation. */ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, @@ -1423,7 +1440,10 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, } /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr < - tb[1].tc_ptr. Return NULL if not found */ + * tb[1].tc_ptr. Return NULL if not found + * + * Called with tb_lock held. + */ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) { int m_min, m_max, m; @@ -1476,6 +1496,7 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr) } #endif /* !defined(CONFIG_USER_ONLY) */ +/* Called with tb_lock held. */ void tb_check_watchpoint(CPUState *cpu) { TranslationBlock *tb;
softmmu requires more functions to be thread-safe, because translation blocks can be invalidated from e.g. notdirty callbacks. Probably the same holds for user-mode emulation, it's just that no one has ever tried to produce a coherent locking there. This patch will guide the introduction of more tb_lock and tb_unlock calls for system emulation. Note that after this patch some (most) of the mentioned functions are still called outside tb_lock/tb_unlock. The next one will rectify this. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- exec.c | 1 + include/exec/exec-all.h | 2 ++ include/qom/cpu.h | 3 +++ tcg/tcg.h | 2 ++ translate-all.c | 35 ++++++++++++++++++++++++++++------- 5 files changed, 36 insertions(+), 7 deletions(-)