Message ID | 1493187803-4510-2-git-send-email-cota@braap.org |
---|---|
State | New |
Headers | show |
On 26/04/2017 08:23, Emilio G. Cota wrote: > This paves the way for upcoming work. > > Reviewed-by: Richard Henderson <rth@twiddle.net> > Signed-off-by: Emilio G. Cota <cota@braap.org> > --- > tcg-runtime.c | 21 +++++++++++++++++++++ > tcg/tcg-runtime.h | 2 ++ > tcg/tcg.h | 1 + > 3 files changed, 24 insertions(+) > > diff --git a/tcg-runtime.c b/tcg-runtime.c > index 4c60c96..90d2d4b 100644 > --- a/tcg-runtime.c > +++ b/tcg-runtime.c > @@ -27,6 +27,7 @@ > #include "exec/helper-proto.h" > #include "exec/cpu_ldst.h" > #include "exec/exec-all.h" > +#include "exec/tb-hash.h" > > /* 32-bit helpers */ > > @@ -141,6 +142,26 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg) > return ctpop64(arg); > } > > +void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr) > +{ > + CPUState *cpu = ENV_GET_CPU(env); > + TranslationBlock *tb; > + target_ulong cs_base, pc; > + uint32_t flags; > + > + if (unlikely(atomic_read(&cpu->exit_request))) { > + goto out_epilogue; > + } This should not be necessary, because cpu->icount_decr will be checked by the prologue of the destination tb. Paolo > + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); > + tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]); > + if (likely(tb && tb->pc == addr && tb->cs_base == cs_base && > + tb->flags == flags)) { > + return tb->tc_ptr; > + } > + out_epilogue: > + return tcg_ctx.code_gen_epilogue; > +} > + > void HELPER(exit_atomic)(CPUArchState *env) > { > cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC()); > diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h > index 114ea6f..c41d38a 100644 > --- a/tcg/tcg-runtime.h > +++ b/tcg/tcg-runtime.h > @@ -24,6 +24,8 @@ DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64) > DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32) > DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64) > > +DEF_HELPER_FLAGS_2(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env, tl) > + > DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env) > > #ifdef CONFIG_SOFTMMU > diff --git a/tcg/tcg.h b/tcg/tcg.h > index 6c216bb..5ec48d1 100644 > --- a/tcg/tcg.h > +++ b/tcg/tcg.h > @@ -699,6 +699,7 @@ struct TCGContext { > extension that allows arithmetic on void*. */ > int code_gen_max_blocks; > void *code_gen_prologue; > + void *code_gen_epilogue; > void *code_gen_buffer; > size_t code_gen_buffer_size; > void *code_gen_ptr; >
On 04/26/2017 08:23 AM, Emilio G. Cota wrote: > This paves the way for upcoming work. > > Reviewed-by: Richard Henderson <rth@twiddle.net> > Signed-off-by: Emilio G. Cota <cota@braap.org> > --- > tcg-runtime.c | 21 +++++++++++++++++++++ > tcg/tcg-runtime.h | 2 ++ > tcg/tcg.h | 1 + > 3 files changed, 24 insertions(+) > > diff --git a/tcg-runtime.c b/tcg-runtime.c > index 4c60c96..90d2d4b 100644 > --- a/tcg-runtime.c > +++ b/tcg-runtime.c > @@ -27,6 +27,7 @@ > #include "exec/helper-proto.h" > #include "exec/cpu_ldst.h" > #include "exec/exec-all.h" > +#include "exec/tb-hash.h" > > /* 32-bit helpers */ > > @@ -141,6 +142,26 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg) > return ctpop64(arg); > } > > +void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr) > +{ > + CPUState *cpu = ENV_GET_CPU(env); > + TranslationBlock *tb; > + target_ulong cs_base, pc; > + uint32_t flags; > + > + if (unlikely(atomic_read(&cpu->exit_request))) { > + goto out_epilogue; > + } Paolo is right. This will also be checked by the first instructions of the TB and there's little point in repeating it here, especially if it is indeed unlikely. > + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); > + tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]); > + if (likely(tb && tb->pc == addr && tb->cs_base == cs_base && > + tb->flags == flags)) { This comparison is wrong. It will incorrectly reject a TB for i386 guest when CS_BASE != 0. You really want tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]); if (tb) { cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); if (tb->pc == pc && tb->cs_base == cs_base && tb->flags == flags) { return tb->tc_ptr; } } return tcg_ctx.code_gen_epilogue; where you don't even load the cpu state if there isn't a preliminary hit in the cache. (Note to self: That minor optimization would also apply to tb_find.) I also wonder, if we've gone this far, if we wouldn't go all the way and also check tb_htable_lookup. r~
Emilio G. Cota <cota@braap.org> writes: > This paves the way for upcoming work. > > Reviewed-by: Richard Henderson <rth@twiddle.net> > Signed-off-by: Emilio G. Cota <cota@braap.org> > --- > tcg-runtime.c | 21 +++++++++++++++++++++ > tcg/tcg-runtime.h | 2 ++ > tcg/tcg.h | 1 + > 3 files changed, 24 insertions(+) > > diff --git a/tcg-runtime.c b/tcg-runtime.c > index 4c60c96..90d2d4b 100644 > --- a/tcg-runtime.c > +++ b/tcg-runtime.c > @@ -27,6 +27,7 @@ > #include "exec/helper-proto.h" > #include "exec/cpu_ldst.h" > #include "exec/exec-all.h" > +#include "exec/tb-hash.h" > > /* 32-bit helpers */ > > @@ -141,6 +142,26 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg) > return ctpop64(arg); > } > > +void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr) > +{ > + CPUState *cpu = ENV_GET_CPU(env); > + TranslationBlock *tb; > + target_ulong cs_base, pc; > + uint32_t flags; > + > + if (unlikely(atomic_read(&cpu->exit_request))) { > + goto out_epilogue; > + } > + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); > + tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]); > + if (likely(tb && tb->pc == addr && tb->cs_base == cs_base && > + tb->flags == flags)) { Should we also not be checking the TB hasn't been invalidated: tb->invalid? > + return tb->tc_ptr; > + } > + out_epilogue: > + return tcg_ctx.code_gen_epilogue; > +} > + > void HELPER(exit_atomic)(CPUArchState *env) > { > cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC()); > diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h > index 114ea6f..c41d38a 100644 > --- a/tcg/tcg-runtime.h > +++ b/tcg/tcg-runtime.h > @@ -24,6 +24,8 @@ DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64) > DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32) > DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64) > > +DEF_HELPER_FLAGS_2(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env, tl) > + > DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env) > > #ifdef CONFIG_SOFTMMU > diff --git a/tcg/tcg.h b/tcg/tcg.h > index 6c216bb..5ec48d1 100644 > --- a/tcg/tcg.h > +++ b/tcg/tcg.h > @@ -699,6 +699,7 @@ struct TCGContext { > extension that allows arithmetic on void*. */ > int code_gen_max_blocks; > void *code_gen_prologue; > + void *code_gen_epilogue; > void *code_gen_buffer; > size_t code_gen_buffer_size; > void *code_gen_ptr; -- Alex Bennée
On 04/26/2017 12:29 PM, Alex Bennée wrote: > > Emilio G. Cota <cota@braap.org> writes: > >> This paves the way for upcoming work. >> >> Reviewed-by: Richard Henderson <rth@twiddle.net> >> Signed-off-by: Emilio G. Cota <cota@braap.org> >> --- >> tcg-runtime.c | 21 +++++++++++++++++++++ >> tcg/tcg-runtime.h | 2 ++ >> tcg/tcg.h | 1 + >> 3 files changed, 24 insertions(+) >> >> diff --git a/tcg-runtime.c b/tcg-runtime.c >> index 4c60c96..90d2d4b 100644 >> --- a/tcg-runtime.c >> +++ b/tcg-runtime.c >> @@ -27,6 +27,7 @@ >> #include "exec/helper-proto.h" >> #include "exec/cpu_ldst.h" >> #include "exec/exec-all.h" >> +#include "exec/tb-hash.h" >> >> /* 32-bit helpers */ >> >> @@ -141,6 +142,26 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg) >> return ctpop64(arg); >> } >> >> +void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr) >> +{ >> + CPUState *cpu = ENV_GET_CPU(env); >> + TranslationBlock *tb; >> + target_ulong cs_base, pc; >> + uint32_t flags; >> + >> + if (unlikely(atomic_read(&cpu->exit_request))) { >> + goto out_epilogue; >> + } >> + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); >> + tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]); >> + if (likely(tb && tb->pc == addr && tb->cs_base == cs_base && >> + tb->flags == flags)) { > > Should we also not be checking the TB hasn't been invalidated: tb->invalid? We don't check in tb_find. I guess we're assuming that such have been purged from the tb_jmp_cache. That said, tb_phys_invalidate assumes tb_locked, and I don't immediately remember how all that is supposed to tie together. r~
On 26/04/2017 12:29, Alex Bennée wrote: > > Emilio G. Cota <cota@braap.org> writes: > >> This paves the way for upcoming work. >> >> Reviewed-by: Richard Henderson <rth@twiddle.net> >> Signed-off-by: Emilio G. Cota <cota@braap.org> >> --- >> tcg-runtime.c | 21 +++++++++++++++++++++ >> tcg/tcg-runtime.h | 2 ++ >> tcg/tcg.h | 1 + >> 3 files changed, 24 insertions(+) >> >> diff --git a/tcg-runtime.c b/tcg-runtime.c >> index 4c60c96..90d2d4b 100644 >> --- a/tcg-runtime.c >> +++ b/tcg-runtime.c >> @@ -27,6 +27,7 @@ >> #include "exec/helper-proto.h" >> #include "exec/cpu_ldst.h" >> #include "exec/exec-all.h" >> +#include "exec/tb-hash.h" >> >> /* 32-bit helpers */ >> >> @@ -141,6 +142,26 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg) >> return ctpop64(arg); >> } >> >> +void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr) >> +{ >> + CPUState *cpu = ENV_GET_CPU(env); >> + TranslationBlock *tb; >> + target_ulong cs_base, pc; >> + uint32_t flags; >> + >> + if (unlikely(atomic_read(&cpu->exit_request))) { >> + goto out_epilogue; >> + } >> + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); >> + tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]); >> + if (likely(tb && tb->pc == addr && tb->cs_base == cs_base && >> + tb->flags == flags)) { > > Should we also not be checking the TB hasn't been invalidated: tb->invalid? It's not needed because this lookup is (if I understand it right) once only and is not reused later. This is why tb_find doesn't check tb->invalid, but uses it to avoid adding the TB to the chain. Good: tb_find tb_phys_invalidate tb_lock tb->invalid = true lookup cache cache hit tb_unlock tb_lock tb->invalid? yes, skip tb_add_jump tb_unlock execute tb once Bad (doesn't happen): tb_find tb_phys_invalidate tb_lock tb->invalid = true lookup cache cache hit tb_unlock tb_lock tb_add_jump tb_unlock execute tb many times Paolo >> + return tb->tc_ptr; >> + } >> + out_epilogue: >> + return tcg_ctx.code_gen_epilogue; >> +} >> + >> void HELPER(exit_atomic)(CPUArchState *env) >> { >> cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC()); >> diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h >> index 114ea6f..c41d38a 100644 >> --- a/tcg/tcg-runtime.h >> +++ b/tcg/tcg-runtime.h >> @@ -24,6 +24,8 @@ DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64) >> DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32) >> DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64) >> >> +DEF_HELPER_FLAGS_2(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env, tl) >> + >> DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env) >> >> #ifdef CONFIG_SOFTMMU >> diff --git a/tcg/tcg.h b/tcg/tcg.h >> index 6c216bb..5ec48d1 100644 >> --- a/tcg/tcg.h >> +++ b/tcg/tcg.h >> @@ -699,6 +699,7 @@ struct TCGContext { >> extension that allows arithmetic on void*. */ >> int code_gen_max_blocks; >> void *code_gen_prologue; >> + void *code_gen_epilogue; >> void *code_gen_buffer; >> size_t code_gen_buffer_size; >> void *code_gen_ptr; > > > -- > Alex Bennée >
Paolo Bonzini <pbonzini@redhat.com> writes: > On 26/04/2017 12:29, Alex Bennée wrote: >> >> Emilio G. Cota <cota@braap.org> writes: >> >>> This paves the way for upcoming work. >>> >>> Reviewed-by: Richard Henderson <rth@twiddle.net> >>> Signed-off-by: Emilio G. Cota <cota@braap.org> >>> --- >>> tcg-runtime.c | 21 +++++++++++++++++++++ >>> tcg/tcg-runtime.h | 2 ++ >>> tcg/tcg.h | 1 + >>> 3 files changed, 24 insertions(+) >>> >>> diff --git a/tcg-runtime.c b/tcg-runtime.c >>> index 4c60c96..90d2d4b 100644 >>> --- a/tcg-runtime.c >>> +++ b/tcg-runtime.c >>> @@ -27,6 +27,7 @@ >>> #include "exec/helper-proto.h" >>> #include "exec/cpu_ldst.h" >>> #include "exec/exec-all.h" >>> +#include "exec/tb-hash.h" >>> >>> /* 32-bit helpers */ >>> >>> @@ -141,6 +142,26 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg) >>> return ctpop64(arg); >>> } >>> >>> +void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr) >>> +{ >>> + CPUState *cpu = ENV_GET_CPU(env); >>> + TranslationBlock *tb; >>> + target_ulong cs_base, pc; >>> + uint32_t flags; >>> + >>> + if (unlikely(atomic_read(&cpu->exit_request))) { >>> + goto out_epilogue; >>> + } >>> + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); >>> + tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]); >>> + if (likely(tb && tb->pc == addr && tb->cs_base == cs_base && >>> + tb->flags == flags)) { >> >> Should we also not be checking the TB hasn't been invalidated: tb->invalid? > > It's not needed because this lookup is (if I understand it right) once > only and is not reused later. This is why tb_find doesn't check > tb->invalid, but uses it to avoid adding the TB to the chain. Right. And when tb->invalid = true is set we then flush it from the jump cache so it will never be found by the helper after. OK nothing to see here ;-) Reviewed-by: Alex Bennée <alex.bennee@linaro.org> > > Good: > > tb_find tb_phys_invalidate > tb_lock > tb->invalid = true > lookup cache > cache hit > tb_unlock > tb_lock > tb->invalid? > yes, skip tb_add_jump > tb_unlock > execute tb once > > Bad (doesn't happen): > > tb_find tb_phys_invalidate > tb_lock > tb->invalid = true > lookup cache > cache hit > tb_unlock > tb_lock > tb_add_jump > tb_unlock > execute tb many times > > Paolo > >>> + return tb->tc_ptr; >>> + } >>> + out_epilogue: >>> + return tcg_ctx.code_gen_epilogue; >>> +} >>> + >>> void HELPER(exit_atomic)(CPUArchState *env) >>> { >>> cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC()); >>> diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h >>> index 114ea6f..c41d38a 100644 >>> --- a/tcg/tcg-runtime.h >>> +++ b/tcg/tcg-runtime.h >>> @@ -24,6 +24,8 @@ DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64) >>> DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32) >>> DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64) >>> >>> +DEF_HELPER_FLAGS_2(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env, tl) >>> + >>> DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env) >>> >>> #ifdef CONFIG_SOFTMMU >>> diff --git a/tcg/tcg.h b/tcg/tcg.h >>> index 6c216bb..5ec48d1 100644 >>> --- a/tcg/tcg.h >>> +++ b/tcg/tcg.h >>> @@ -699,6 +699,7 @@ struct TCGContext { >>> extension that allows arithmetic on void*. */ >>> int code_gen_max_blocks; >>> void *code_gen_prologue; >>> + void *code_gen_epilogue; >>> void *code_gen_buffer; >>> size_t code_gen_buffer_size; >>> void *code_gen_ptr; >> >> >> -- >> Alex Bennée >> -- Alex Bennée
On Wed, Apr 26, 2017 at 10:40:45 +0200, Richard Henderson wrote: > On 04/26/2017 08:23 AM, Emilio G. Cota wrote: (snip) > >+ cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); > >+ tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]); > >+ if (likely(tb && tb->pc == addr && tb->cs_base == cs_base && > >+ tb->flags == flags)) { > > This comparison is wrong. It will incorrectly reject a TB for i386 guest > when CS_BASE != 0. You really want > > tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]); > if (tb) { > cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); > if (tb->pc == pc && tb->cs_base == cs_base && tb->flags == flags) { > return tb->tc_ptr; > } > } > return tcg_ctx.code_gen_epilogue; wrt the comparison, the only change I notice in your suggested change is tb->pc == pc instead of tb->pc == addr , which seems innocuous to me (since tb->pc == addr). I fail to see how this relates to your "CS_BASE != 0" comment. What am I missing? E.
On 04/26/2017 11:56 PM, Emilio G. Cota wrote: > On Wed, Apr 26, 2017 at 10:40:45 +0200, Richard Henderson wrote: >> On 04/26/2017 08:23 AM, Emilio G. Cota wrote: > (snip) >>> + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); >>> + tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]); >>> + if (likely(tb && tb->pc == addr && tb->cs_base == cs_base && >>> + tb->flags == flags)) { >> >> This comparison is wrong. It will incorrectly reject a TB for i386 guest >> when CS_BASE != 0. You really want >> >> tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]); >> if (tb) { >> cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); >> if (tb->pc == pc && tb->cs_base == cs_base && tb->flags == flags) { >> return tb->tc_ptr; >> } >> } >> return tcg_ctx.code_gen_epilogue; > > wrt the comparison, the only change I notice in your suggested change is > tb->pc == pc > > instead of > tb->pc == addr > > , which seems innocuous to me (since tb->pc == addr). > > I fail to see how this relates to your "CS_BASE != 0" comment. > What am I missing? Recall how you computed vaddr for target/i386: addr = pc + cs_base r~
On Thu, Apr 27, 2017 at 00:29:49 +0200, Richard Henderson wrote: > On 04/26/2017 11:56 PM, Emilio G. Cota wrote: > >On Wed, Apr 26, 2017 at 10:40:45 +0200, Richard Henderson wrote: > >>On 04/26/2017 08:23 AM, Emilio G. Cota wrote: > >(snip) > >>>+ cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); > >>>+ tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]); > >>>+ if (likely(tb && tb->pc == addr && tb->cs_base == cs_base && > >>>+ tb->flags == flags)) { > >> > >>This comparison is wrong. It will incorrectly reject a TB for i386 guest > >>when CS_BASE != 0. You really want > >> > >> tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]); > >> if (tb) { > >> cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); > >> if (tb->pc == pc && tb->cs_base == cs_base && tb->flags == flags) { > >> return tb->tc_ptr; > >> } > >> } > >> return tcg_ctx.code_gen_epilogue; > > > >wrt the comparison, the only change I notice in your suggested change is > > tb->pc == pc > > > >instead of > > tb->pc == addr > > > >, which seems innocuous to me (since tb->pc == addr). > > > >I fail to see how this relates to your "CS_BASE != 0" comment. > >What am I missing? > > Recall how you computed vaddr for target/i386: > > addr = pc + cs_base I see, thanks! Emilio
On Wed, Apr 26, 2017 at 10:40:45 +0200, Richard Henderson wrote: > On 04/26/2017 08:23 AM, Emilio G. Cota wrote: > >This paves the way for upcoming work. > > > >Reviewed-by: Richard Henderson <rth@twiddle.net> > >Signed-off-by: Emilio G. Cota <cota@braap.org> > >--- > > tcg-runtime.c | 21 +++++++++++++++++++++ > > tcg/tcg-runtime.h | 2 ++ > > tcg/tcg.h | 1 + > > 3 files changed, 24 insertions(+) > > > >diff --git a/tcg-runtime.c b/tcg-runtime.c > >index 4c60c96..90d2d4b 100644 > >--- a/tcg-runtime.c > >+++ b/tcg-runtime.c > >@@ -27,6 +27,7 @@ > > #include "exec/helper-proto.h" > > #include "exec/cpu_ldst.h" > > #include "exec/exec-all.h" > >+#include "exec/tb-hash.h" > > /* 32-bit helpers */ > >@@ -141,6 +142,26 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg) > > return ctpop64(arg); > > } > >+ cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); > >+ tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]); > >+ if (likely(tb && tb->pc == addr && tb->cs_base == cs_base && > >+ tb->flags == flags)) { > > This comparison is wrong. It will incorrectly reject a TB for i386 guest > when CS_BASE != 0. You really want > > tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]); > if (tb) { > cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); > if (tb->pc == pc && tb->cs_base == cs_base && tb->flags == flags) { > return tb->tc_ptr; > } > } > return tcg_ctx.code_gen_epilogue; > > where you don't even load the cpu state if there isn't a preliminary hit in > the cache. Yes, I like this. > (Note to self: That minor optimization would also apply to tb_find.) FWIW I looked at tb_find -- you need the pc though, which comes from loading the CPU state: cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); ^^ tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]); ^^ If we wanted to really avoid getting all the state I guess we'd have to add another function that returned just the pc. E.
diff --git a/tcg-runtime.c b/tcg-runtime.c index 4c60c96..90d2d4b 100644 --- a/tcg-runtime.c +++ b/tcg-runtime.c @@ -27,6 +27,7 @@ #include "exec/helper-proto.h" #include "exec/cpu_ldst.h" #include "exec/exec-all.h" +#include "exec/tb-hash.h" /* 32-bit helpers */ @@ -141,6 +142,26 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg) return ctpop64(arg); } +void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr) +{ + CPUState *cpu = ENV_GET_CPU(env); + TranslationBlock *tb; + target_ulong cs_base, pc; + uint32_t flags; + + if (unlikely(atomic_read(&cpu->exit_request))) { + goto out_epilogue; + } + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); + tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]); + if (likely(tb && tb->pc == addr && tb->cs_base == cs_base && + tb->flags == flags)) { + return tb->tc_ptr; + } + out_epilogue: + return tcg_ctx.code_gen_epilogue; +} + void HELPER(exit_atomic)(CPUArchState *env) { cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC()); diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h index 114ea6f..c41d38a 100644 --- a/tcg/tcg-runtime.h +++ b/tcg/tcg-runtime.h @@ -24,6 +24,8 @@ DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64) DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_FLAGS_2(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env, tl) + DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env) #ifdef CONFIG_SOFTMMU diff --git a/tcg/tcg.h b/tcg/tcg.h index 6c216bb..5ec48d1 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -699,6 +699,7 @@ struct TCGContext { extension that allows arithmetic on void*. */ int code_gen_max_blocks; void *code_gen_prologue; + void *code_gen_epilogue; void *code_gen_buffer; size_t code_gen_buffer_size; void *code_gen_ptr;