Message ID | 1440375847-17603-27-git-send-email-cota@braap.org |
---|---|
State | New |
Headers | show |
On 23/08/2015 17:23, Emilio G. Cota wrote: > This paves the way for a lockless tb_find_fast. > > Signed-off-by: Emilio G. Cota <cota@braap.org> > --- > cpu-exec.c | 8 +++++++- > exec.c | 2 ++ > include/qom/cpu.h | 15 +++++++++++++++ > qom/cpu.c | 2 +- > translate-all.c | 32 +++++++++++++++++++++++++++++++- > 5 files changed, 56 insertions(+), 3 deletions(-) > > diff --git a/cpu-exec.c b/cpu-exec.c > index f758928..5ad578d 100644 > --- a/cpu-exec.c > +++ b/cpu-exec.c > @@ -334,7 +334,9 @@ static TranslationBlock *tb_find_slow(CPUState *cpu, > } > > /* we add the TB in the virtual pc hash table */ > + seqlock_write_lock(&cpu->tb_jmp_cache_sequence); > cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb; > + seqlock_write_unlock(&cpu->tb_jmp_cache_sequence); > return tb; > } > > @@ -343,13 +345,17 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu) > CPUArchState *env = (CPUArchState *)cpu->env_ptr; > TranslationBlock *tb; > target_ulong cs_base, pc; > + unsigned int version; > int flags; > > /* we record a subset of the CPU state. It will > always be the same before a given translated block > is executed. */ > cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); > - tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]; > + do { > + version = seqlock_read_begin(&cpu->tb_jmp_cache_sequence); > + tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]; > + } while (seqlock_read_retry(&cpu->tb_jmp_cache_sequence, version)); > if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base || > tb->flags != flags)) { > tb = tb_find_slow(cpu, pc, cs_base, flags); > diff --git a/exec.c b/exec.c > index edf2236..ae6f416 100644 > --- a/exec.c > +++ b/exec.c > @@ -577,6 +577,8 @@ void cpu_exec_init(CPUState *cpu, Error **errp) > int cpu_index; > Error *local_err = NULL; > > + qemu_mutex_init(&cpu->tb_jmp_cache_lock); > + seqlock_init(&cpu->tb_jmp_cache_sequence, &cpu->tb_jmp_cache_lock); > #ifndef CONFIG_USER_ONLY > cpu->as = &address_space_memory; > cpu->thread_id = qemu_get_thread_id(); > diff --git a/include/qom/cpu.h b/include/qom/cpu.h > index dbe0438..f383c24 100644 > --- a/include/qom/cpu.h > +++ b/include/qom/cpu.h > @@ -27,6 +27,7 @@ > #include "exec/hwaddr.h" > #include "exec/memattrs.h" > #include "qemu/queue.h" > +#include "qemu/seqlock.h" > #include "qemu/thread.h" > #include "qemu/typedefs.h" > > @@ -287,6 +288,13 @@ struct CPUState { > > void *env_ptr; /* CPUArchState */ > struct TranslationBlock *current_tb; > + /* > + * The seqlock here is needed because not all updates are to a single > + * entry; sometimes we want to atomically clear all entries that belong to > + * a given page, e.g. when flushing said page. > + */ > + QemuMutex tb_jmp_cache_lock; > + QemuSeqLock tb_jmp_cache_sequence; > struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; > > struct GDBRegisterState *gdb_regs; > @@ -342,6 +350,13 @@ extern struct CPUTailQ cpus; > > extern __thread CPUState *current_cpu; > > +static inline void cpu_tb_jmp_cache_clear(CPUState *cpu) > +{ > + seqlock_write_lock(&cpu->tb_jmp_cache_sequence); > + memset(cpu->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *)); > + seqlock_write_unlock(&cpu->tb_jmp_cache_sequence); > +} > + > /** > * cpu_paging_enabled: > * @cpu: The CPU whose state is to be inspected. > diff --git a/qom/cpu.c b/qom/cpu.c > index ac19710..5e72e7a 100644 > --- a/qom/cpu.c > +++ b/qom/cpu.c > @@ -251,7 +251,7 @@ static void cpu_common_reset(CPUState *cpu) > cpu->icount_decr.u32 = 0; > cpu->can_do_io = 1; > cpu->exception_index = -1; > - memset(cpu->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *)); > + cpu_tb_jmp_cache_clear(cpu); > } > > static bool cpu_common_has_work(CPUState *cs) > diff --git a/translate-all.c b/translate-all.c > index 76a0be8..668b43a 100644 > --- a/translate-all.c > +++ b/translate-all.c > @@ -863,7 +863,7 @@ void tb_flush(CPUState *cpu) > tcg_ctx.tb_ctx.nb_tbs = 0; > > CPU_FOREACH(cpu) { > - memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache)); > + cpu_tb_jmp_cache_clear(cpu); > } > > memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, sizeof(tcg_ctx.tb_ctx.tb_phys_hash)); > @@ -988,6 +988,27 @@ static inline void tb_jmp_remove(TranslationBlock *tb, int n) > } > } > > +static inline void tb_jmp_cache_entry_clear(CPUState *cpu, TranslationBlock *tb) > +{ > + unsigned int version; > + unsigned int h; > + bool hit = false; > + > + h = tb_jmp_cache_hash_func(tb->pc); > + do { > + version = seqlock_read_begin(&cpu->tb_jmp_cache_sequence); > + hit = cpu->tb_jmp_cache[h] == tb; > + } while (seqlock_read_retry(&cpu->tb_jmp_cache_sequence, version)); > + > + if (hit) { > + seqlock_write_lock(&cpu->tb_jmp_cache_sequence); > + if (likely(cpu->tb_jmp_cache[h] == tb)) { > + cpu->tb_jmp_cache[h] = NULL; > + } > + seqlock_write_unlock(&cpu->tb_jmp_cache_sequence); > + } > +} > + > /* invalidate one TB > * > * Called with tb_lock held. > @@ -1024,6 +1045,13 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) > invalidate_page_bitmap(p); > } > > + tcg_ctx.tb_ctx.tb_invalidated_flag = 1; > + > + /* remove the TB from the hash list */ > + CPU_FOREACH(cpu) { > + tb_jmp_cache_entry_clear(cpu, tb); > + } > + > /* suppress this TB from the two jump lists */ > tb_jmp_remove(tb, 0); > tb_jmp_remove(tb, 1); > @@ -1707,12 +1735,14 @@ void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr) > /* Discard jump cache entries for any tb which might potentially > overlap the flushed page. */ > i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE); > + seqlock_write_lock(&cpu->tb_jmp_cache_sequence); > memset(&cpu->tb_jmp_cache[i], 0, > TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); > > i = tb_jmp_cache_hash_page(addr); > memset(&cpu->tb_jmp_cache[i], 0, > TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); > + seqlock_write_unlock(&cpu->tb_jmp_cache_sequence); > } > > void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) > I'm not sure how the last three patches compare with the existing "tcg: move tb_find_fast outside the tb_lock critical section"? Paolo
On Sun, Aug 23, 2015 at 18:14:58 -0700, Paolo Bonzini wrote: > On 23/08/2015 17:23, Emilio G. Cota wrote: > > This paves the way for a lockless tb_find_fast. > > > > Signed-off-by: Emilio G. Cota <cota@braap.org> > > --- (snip) > > @@ -1707,12 +1735,14 @@ void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr) > > /* Discard jump cache entries for any tb which might potentially > > overlap the flushed page. */ > > i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE); > > + seqlock_write_lock(&cpu->tb_jmp_cache_sequence); > > memset(&cpu->tb_jmp_cache[i], 0, > > TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); > > > > i = tb_jmp_cache_hash_page(addr); > > memset(&cpu->tb_jmp_cache[i], 0, > > TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); > > + seqlock_write_unlock(&cpu->tb_jmp_cache_sequence); > > } > > > > void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) > > > > I'm not sure how the last three patches compare with the existing "tcg: > move tb_find_fast outside the tb_lock critical section"? The seqlock for tb_jmp_cache is necessary the moment that the array can be wiped out with a memset(), as shown above. That function (tb_flush_jmp_cache) is called by tlb_flush_page, which has many callers. One could argue that we could enforce calling tlb_flush_page to be a) always done by the owner thread or b) done while all others CPUs are paused. I argue that worrying about that is not worth it; let's protect the array with a seqlock, which on TSO is essentially free, and worry about more important things. Wrt the next two patches: Patch 27 is an improvement in that each TB has its own valid flag, which makes sense because this should only affect TB's that are trying to chain to/from it, not all TBs. Patch 28 uses the RCU QLIST which to me seems cleaner and less error-prone than open-coding an RCU LIST. Thanks, Emilio
> The seqlock for tb_jmp_cache is necessary the moment that the > array can be wiped out with a memset(), as shown above. That > function (tb_flush_jmp_cache) is called by tlb_flush_page, > which has many callers. > > One could argue that we could enforce calling tlb_flush_page to be > a) always done by the owner thread or b) done while all others CPUs > are paused. > > I argue that worrying about that is not worth it; let's protect > the array with a seqlock, which on TSO is essentially free, and > worry about more important things. Got it, this makes sense. Paolo > Wrt the next two patches: > > Patch 27 is an improvement in that each TB has its own valid flag, > which makes sense because this should only affect TB's that are > trying to chain to/from it, not all TBs. > > Patch 28 uses the RCU QLIST which to me seems cleaner and less > error-prone than open-coding an RCU LIST. > > Thanks, > > Emilio >
On 24/08/2015 02:23, Emilio G. Cota wrote: > This paves the way for a lockless tb_find_fast. Having now reviewed the patch, I think we can do better. The idea is: - only the CPU thread can set cpu->tb_jmp_cache[] - other threads can, under seqlock protection, _clear_ cpu->tb_jmp_cache[] - the seqlock can be protected by tb_lock. Then you need not retry the read, you can just fall back to the slow path, which will take the tb_lock and thus serialize with the clearer. Paolo > Signed-off-by: Emilio G. Cota <cota@braap.org> > --- > cpu-exec.c | 8 +++++++- > exec.c | 2 ++ > include/qom/cpu.h | 15 +++++++++++++++ > qom/cpu.c | 2 +- > translate-all.c | 32 +++++++++++++++++++++++++++++++- > 5 files changed, 56 insertions(+), 3 deletions(-) > > diff --git a/cpu-exec.c b/cpu-exec.c > index f758928..5ad578d 100644 > --- a/cpu-exec.c > +++ b/cpu-exec.c > @@ -334,7 +334,9 @@ static TranslationBlock *tb_find_slow(CPUState *cpu, > } > > /* we add the TB in the virtual pc hash table */ > + seqlock_write_lock(&cpu->tb_jmp_cache_sequence); > cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb; > + seqlock_write_unlock(&cpu->tb_jmp_cache_sequence); > return tb; > } > > @@ -343,13 +345,17 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu) > CPUArchState *env = (CPUArchState *)cpu->env_ptr; > TranslationBlock *tb; > target_ulong cs_base, pc; > + unsigned int version; > int flags; > > /* we record a subset of the CPU state. It will > always be the same before a given translated block > is executed. */ > cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); > - tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]; > + do { > + version = seqlock_read_begin(&cpu->tb_jmp_cache_sequence); > + tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]; > + } while (seqlock_read_retry(&cpu->tb_jmp_cache_sequence, version)); > if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base || > tb->flags != flags)) { > tb = tb_find_slow(cpu, pc, cs_base, flags); > diff --git a/exec.c b/exec.c > index edf2236..ae6f416 100644 > --- a/exec.c > +++ b/exec.c > @@ -577,6 +577,8 @@ void cpu_exec_init(CPUState *cpu, Error **errp) > int cpu_index; > Error *local_err = NULL; > > + qemu_mutex_init(&cpu->tb_jmp_cache_lock); > + seqlock_init(&cpu->tb_jmp_cache_sequence, &cpu->tb_jmp_cache_lock); > #ifndef CONFIG_USER_ONLY > cpu->as = &address_space_memory; > cpu->thread_id = qemu_get_thread_id(); > diff --git a/include/qom/cpu.h b/include/qom/cpu.h > index dbe0438..f383c24 100644 > --- a/include/qom/cpu.h > +++ b/include/qom/cpu.h > @@ -27,6 +27,7 @@ > #include "exec/hwaddr.h" > #include "exec/memattrs.h" > #include "qemu/queue.h" > +#include "qemu/seqlock.h" > #include "qemu/thread.h" > #include "qemu/typedefs.h" > > @@ -287,6 +288,13 @@ struct CPUState { > > void *env_ptr; /* CPUArchState */ > struct TranslationBlock *current_tb; > + /* > + * The seqlock here is needed because not all updates are to a single > + * entry; sometimes we want to atomically clear all entries that belong to > + * a given page, e.g. when flushing said page. > + */ > + QemuMutex tb_jmp_cache_lock; > + QemuSeqLock tb_jmp_cache_sequence; > struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; > > struct GDBRegisterState *gdb_regs; > @@ -342,6 +350,13 @@ extern struct CPUTailQ cpus; > > extern __thread CPUState *current_cpu; > > +static inline void cpu_tb_jmp_cache_clear(CPUState *cpu) > +{ > + seqlock_write_lock(&cpu->tb_jmp_cache_sequence); > + memset(cpu->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *)); > + seqlock_write_unlock(&cpu->tb_jmp_cache_sequence); > +} > + > /** > * cpu_paging_enabled: > * @cpu: The CPU whose state is to be inspected. > diff --git a/qom/cpu.c b/qom/cpu.c > index ac19710..5e72e7a 100644 > --- a/qom/cpu.c > +++ b/qom/cpu.c > @@ -251,7 +251,7 @@ static void cpu_common_reset(CPUState *cpu) > cpu->icount_decr.u32 = 0; > cpu->can_do_io = 1; > cpu->exception_index = -1; > - memset(cpu->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *)); > + cpu_tb_jmp_cache_clear(cpu); > } > > static bool cpu_common_has_work(CPUState *cs) > diff --git a/translate-all.c b/translate-all.c > index 76a0be8..668b43a 100644 > --- a/translate-all.c > +++ b/translate-all.c > @@ -863,7 +863,7 @@ void tb_flush(CPUState *cpu) > tcg_ctx.tb_ctx.nb_tbs = 0; > > CPU_FOREACH(cpu) { > - memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache)); > + cpu_tb_jmp_cache_clear(cpu); > } > > memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, sizeof(tcg_ctx.tb_ctx.tb_phys_hash)); > @@ -988,6 +988,27 @@ static inline void tb_jmp_remove(TranslationBlock *tb, int n) > } > } > > +static inline void tb_jmp_cache_entry_clear(CPUState *cpu, TranslationBlock *tb) > +{ > + unsigned int version; > + unsigned int h; > + bool hit = false; > + > + h = tb_jmp_cache_hash_func(tb->pc); > + do { > + version = seqlock_read_begin(&cpu->tb_jmp_cache_sequence); > + hit = cpu->tb_jmp_cache[h] == tb; > + } while (seqlock_read_retry(&cpu->tb_jmp_cache_sequence, version)); > + > + if (hit) { > + seqlock_write_lock(&cpu->tb_jmp_cache_sequence); > + if (likely(cpu->tb_jmp_cache[h] == tb)) { > + cpu->tb_jmp_cache[h] = NULL; > + } > + seqlock_write_unlock(&cpu->tb_jmp_cache_sequence); > + } > +} > + > /* invalidate one TB > * > * Called with tb_lock held. > @@ -1024,6 +1045,13 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) > invalidate_page_bitmap(p); > } > > + tcg_ctx.tb_ctx.tb_invalidated_flag = 1; > + > + /* remove the TB from the hash list */ > + CPU_FOREACH(cpu) { > + tb_jmp_cache_entry_clear(cpu, tb); > + } > + > /* suppress this TB from the two jump lists */ > tb_jmp_remove(tb, 0); > tb_jmp_remove(tb, 1); > @@ -1707,12 +1735,14 @@ void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr) > /* Discard jump cache entries for any tb which might potentially > overlap the flushed page. */ > i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE); > + seqlock_write_lock(&cpu->tb_jmp_cache_sequence); > memset(&cpu->tb_jmp_cache[i], 0, > TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); > > i = tb_jmp_cache_hash_page(addr); > memset(&cpu->tb_jmp_cache[i], 0, > TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); > + seqlock_write_unlock(&cpu->tb_jmp_cache_sequence); > } > > void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) >
On 04/09/2015 10:50, Paolo Bonzini wrote: > Having now reviewed the patch, I think we can do better. > > The idea is: > > - only the CPU thread can set cpu->tb_jmp_cache[] > > - other threads can, under seqlock protection, _clear_ cpu->tb_jmp_cache[] > > - the seqlock can be protected by tb_lock. Then you need not retry the > read, you can just fall back to the slow path, which will take the > tb_lock and thus serialize with the clearer. ... and then we're back to the idea of making tb_invalidated_flag per-TB. :) Paolo
diff --git a/cpu-exec.c b/cpu-exec.c index f758928..5ad578d 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -334,7 +334,9 @@ static TranslationBlock *tb_find_slow(CPUState *cpu, } /* we add the TB in the virtual pc hash table */ + seqlock_write_lock(&cpu->tb_jmp_cache_sequence); cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb; + seqlock_write_unlock(&cpu->tb_jmp_cache_sequence); return tb; } @@ -343,13 +345,17 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu) CPUArchState *env = (CPUArchState *)cpu->env_ptr; TranslationBlock *tb; target_ulong cs_base, pc; + unsigned int version; int flags; /* we record a subset of the CPU state. It will always be the same before a given translated block is executed. */ cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); - tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]; + do { + version = seqlock_read_begin(&cpu->tb_jmp_cache_sequence); + tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]; + } while (seqlock_read_retry(&cpu->tb_jmp_cache_sequence, version)); if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base || tb->flags != flags)) { tb = tb_find_slow(cpu, pc, cs_base, flags); diff --git a/exec.c b/exec.c index edf2236..ae6f416 100644 --- a/exec.c +++ b/exec.c @@ -577,6 +577,8 @@ void cpu_exec_init(CPUState *cpu, Error **errp) int cpu_index; Error *local_err = NULL; + qemu_mutex_init(&cpu->tb_jmp_cache_lock); + seqlock_init(&cpu->tb_jmp_cache_sequence, &cpu->tb_jmp_cache_lock); #ifndef CONFIG_USER_ONLY cpu->as = &address_space_memory; cpu->thread_id = qemu_get_thread_id(); diff --git a/include/qom/cpu.h b/include/qom/cpu.h index dbe0438..f383c24 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -27,6 +27,7 @@ #include "exec/hwaddr.h" #include "exec/memattrs.h" #include "qemu/queue.h" +#include "qemu/seqlock.h" #include "qemu/thread.h" #include "qemu/typedefs.h" @@ -287,6 +288,13 @@ struct CPUState { void *env_ptr; /* CPUArchState */ struct TranslationBlock *current_tb; + /* + * The seqlock here is needed because not all updates are to a single + * entry; sometimes we want to atomically clear all entries that belong to + * a given page, e.g. when flushing said page. + */ + QemuMutex tb_jmp_cache_lock; + QemuSeqLock tb_jmp_cache_sequence; struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; struct GDBRegisterState *gdb_regs; @@ -342,6 +350,13 @@ extern struct CPUTailQ cpus; extern __thread CPUState *current_cpu; +static inline void cpu_tb_jmp_cache_clear(CPUState *cpu) +{ + seqlock_write_lock(&cpu->tb_jmp_cache_sequence); + memset(cpu->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *)); + seqlock_write_unlock(&cpu->tb_jmp_cache_sequence); +} + /** * cpu_paging_enabled: * @cpu: The CPU whose state is to be inspected. diff --git a/qom/cpu.c b/qom/cpu.c index ac19710..5e72e7a 100644 --- a/qom/cpu.c +++ b/qom/cpu.c @@ -251,7 +251,7 @@ static void cpu_common_reset(CPUState *cpu) cpu->icount_decr.u32 = 0; cpu->can_do_io = 1; cpu->exception_index = -1; - memset(cpu->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *)); + cpu_tb_jmp_cache_clear(cpu); } static bool cpu_common_has_work(CPUState *cs) diff --git a/translate-all.c b/translate-all.c index 76a0be8..668b43a 100644 --- a/translate-all.c +++ b/translate-all.c @@ -863,7 +863,7 @@ void tb_flush(CPUState *cpu) tcg_ctx.tb_ctx.nb_tbs = 0; CPU_FOREACH(cpu) { - memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache)); + cpu_tb_jmp_cache_clear(cpu); } memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, sizeof(tcg_ctx.tb_ctx.tb_phys_hash)); @@ -988,6 +988,27 @@ static inline void tb_jmp_remove(TranslationBlock *tb, int n) } } +static inline void tb_jmp_cache_entry_clear(CPUState *cpu, TranslationBlock *tb) +{ + unsigned int version; + unsigned int h; + bool hit = false; + + h = tb_jmp_cache_hash_func(tb->pc); + do { + version = seqlock_read_begin(&cpu->tb_jmp_cache_sequence); + hit = cpu->tb_jmp_cache[h] == tb; + } while (seqlock_read_retry(&cpu->tb_jmp_cache_sequence, version)); + + if (hit) { + seqlock_write_lock(&cpu->tb_jmp_cache_sequence); + if (likely(cpu->tb_jmp_cache[h] == tb)) { + cpu->tb_jmp_cache[h] = NULL; + } + seqlock_write_unlock(&cpu->tb_jmp_cache_sequence); + } +} + /* invalidate one TB * * Called with tb_lock held. @@ -1024,6 +1045,13 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) invalidate_page_bitmap(p); } + tcg_ctx.tb_ctx.tb_invalidated_flag = 1; + + /* remove the TB from the hash list */ + CPU_FOREACH(cpu) { + tb_jmp_cache_entry_clear(cpu, tb); + } + /* suppress this TB from the two jump lists */ tb_jmp_remove(tb, 0); tb_jmp_remove(tb, 1); @@ -1707,12 +1735,14 @@ void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr) /* Discard jump cache entries for any tb which might potentially overlap the flushed page. */ i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE); + seqlock_write_lock(&cpu->tb_jmp_cache_sequence); memset(&cpu->tb_jmp_cache[i], 0, TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); i = tb_jmp_cache_hash_page(addr); memset(&cpu->tb_jmp_cache[i], 0, TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); + seqlock_write_unlock(&cpu->tb_jmp_cache_sequence); } void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
This paves the way for a lockless tb_find_fast. Signed-off-by: Emilio G. Cota <cota@braap.org> --- cpu-exec.c | 8 +++++++- exec.c | 2 ++ include/qom/cpu.h | 15 +++++++++++++++ qom/cpu.c | 2 +- translate-all.c | 32 +++++++++++++++++++++++++++++++- 5 files changed, 56 insertions(+), 3 deletions(-)