diff mbox

[v3,01/10] tcg-runtime: add lookup_tb_ptr helper

Message ID 1493187803-4510-2-git-send-email-cota@braap.org
State New
Headers show

Commit Message

Emilio Cota April 26, 2017, 6:23 a.m. UTC
This paves the way for upcoming work.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 tcg-runtime.c     | 21 +++++++++++++++++++++
 tcg/tcg-runtime.h |  2 ++
 tcg/tcg.h         |  1 +
 3 files changed, 24 insertions(+)

Comments

Paolo Bonzini April 26, 2017, 7:50 a.m. UTC | #1
On 26/04/2017 08:23, Emilio G. Cota wrote:
> This paves the way for upcoming work.
> 
> Reviewed-by: Richard Henderson <rth@twiddle.net>
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  tcg-runtime.c     | 21 +++++++++++++++++++++
>  tcg/tcg-runtime.h |  2 ++
>  tcg/tcg.h         |  1 +
>  3 files changed, 24 insertions(+)
> 
> diff --git a/tcg-runtime.c b/tcg-runtime.c
> index 4c60c96..90d2d4b 100644
> --- a/tcg-runtime.c
> +++ b/tcg-runtime.c
> @@ -27,6 +27,7 @@
>  #include "exec/helper-proto.h"
>  #include "exec/cpu_ldst.h"
>  #include "exec/exec-all.h"
> +#include "exec/tb-hash.h"
>  
>  /* 32-bit helpers */
>  
> @@ -141,6 +142,26 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg)
>      return ctpop64(arg);
>  }
>  
> +void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr)
> +{
> +    CPUState *cpu = ENV_GET_CPU(env);
> +    TranslationBlock *tb;
> +    target_ulong cs_base, pc;
> +    uint32_t flags;
> +
> +    if (unlikely(atomic_read(&cpu->exit_request))) {
> +        goto out_epilogue;
> +    }

This should not be necessary, because cpu->icount_decr will be checked
by the prologue of the destination tb.

Paolo

> +    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
> +    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
> +    if (likely(tb && tb->pc == addr && tb->cs_base == cs_base &&
> +               tb->flags == flags)) {
> +        return tb->tc_ptr;
> +    }
> + out_epilogue:
> +    return tcg_ctx.code_gen_epilogue;
> +}
> +
>  void HELPER(exit_atomic)(CPUArchState *env)
>  {
>      cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC());
> diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h
> index 114ea6f..c41d38a 100644
> --- a/tcg/tcg-runtime.h
> +++ b/tcg/tcg-runtime.h
> @@ -24,6 +24,8 @@ DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64)
>  DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32)
>  DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64)
>  
> +DEF_HELPER_FLAGS_2(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env, tl)
> +
>  DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
>  
>  #ifdef CONFIG_SOFTMMU
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index 6c216bb..5ec48d1 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -699,6 +699,7 @@ struct TCGContext {
>         extension that allows arithmetic on void*.  */
>      int code_gen_max_blocks;
>      void *code_gen_prologue;
> +    void *code_gen_epilogue;
>      void *code_gen_buffer;
>      size_t code_gen_buffer_size;
>      void *code_gen_ptr;
>
Richard Henderson April 26, 2017, 8:40 a.m. UTC | #2
On 04/26/2017 08:23 AM, Emilio G. Cota wrote:
> This paves the way for upcoming work.
> 
> Reviewed-by: Richard Henderson <rth@twiddle.net>
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>   tcg-runtime.c     | 21 +++++++++++++++++++++
>   tcg/tcg-runtime.h |  2 ++
>   tcg/tcg.h         |  1 +
>   3 files changed, 24 insertions(+)
> 
> diff --git a/tcg-runtime.c b/tcg-runtime.c
> index 4c60c96..90d2d4b 100644
> --- a/tcg-runtime.c
> +++ b/tcg-runtime.c
> @@ -27,6 +27,7 @@
>   #include "exec/helper-proto.h"
>   #include "exec/cpu_ldst.h"
>   #include "exec/exec-all.h"
> +#include "exec/tb-hash.h"
>   
>   /* 32-bit helpers */
>   
> @@ -141,6 +142,26 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg)
>       return ctpop64(arg);
>   }
>   
> +void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr)
> +{
> +    CPUState *cpu = ENV_GET_CPU(env);
> +    TranslationBlock *tb;
> +    target_ulong cs_base, pc;
> +    uint32_t flags;
> +
> +    if (unlikely(atomic_read(&cpu->exit_request))) {
> +        goto out_epilogue;
> +    }

Paolo is right.  This will also be checked by the first instructions of the TB 
and there's little point in repeating it here, especially if it is indeed unlikely.

> +    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
> +    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
> +    if (likely(tb && tb->pc == addr && tb->cs_base == cs_base &&
> +               tb->flags == flags)) {

This comparison is wrong.  It will incorrectly reject a TB for i386 guest when 
CS_BASE != 0.  You really want

   tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
   if (tb) {
     cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
     if (tb->pc == pc && tb->cs_base == cs_base && tb->flags == flags) {
       return tb->tc_ptr;
     }
   }
   return tcg_ctx.code_gen_epilogue;

where you don't even load the cpu state if there isn't a preliminary hit in the 
cache.  (Note to self: That minor optimization would also apply to tb_find.)

I also wonder, if we've gone this far, if we wouldn't go all the way and also 
check tb_htable_lookup.


r~
Alex Bennée April 26, 2017, 10:29 a.m. UTC | #3
Emilio G. Cota <cota@braap.org> writes:

> This paves the way for upcoming work.
>
> Reviewed-by: Richard Henderson <rth@twiddle.net>
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  tcg-runtime.c     | 21 +++++++++++++++++++++
>  tcg/tcg-runtime.h |  2 ++
>  tcg/tcg.h         |  1 +
>  3 files changed, 24 insertions(+)
>
> diff --git a/tcg-runtime.c b/tcg-runtime.c
> index 4c60c96..90d2d4b 100644
> --- a/tcg-runtime.c
> +++ b/tcg-runtime.c
> @@ -27,6 +27,7 @@
>  #include "exec/helper-proto.h"
>  #include "exec/cpu_ldst.h"
>  #include "exec/exec-all.h"
> +#include "exec/tb-hash.h"
>
>  /* 32-bit helpers */
>
> @@ -141,6 +142,26 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg)
>      return ctpop64(arg);
>  }
>
> +void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr)
> +{
> +    CPUState *cpu = ENV_GET_CPU(env);
> +    TranslationBlock *tb;
> +    target_ulong cs_base, pc;
> +    uint32_t flags;
> +
> +    if (unlikely(atomic_read(&cpu->exit_request))) {
> +        goto out_epilogue;
> +    }
> +    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
> +    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
> +    if (likely(tb && tb->pc == addr && tb->cs_base == cs_base &&
> +               tb->flags == flags)) {

Should we also not be checking the TB hasn't been invalidated: tb->invalid?

> +        return tb->tc_ptr;
> +    }
> + out_epilogue:
> +    return tcg_ctx.code_gen_epilogue;
> +}
> +
>  void HELPER(exit_atomic)(CPUArchState *env)
>  {
>      cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC());
> diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h
> index 114ea6f..c41d38a 100644
> --- a/tcg/tcg-runtime.h
> +++ b/tcg/tcg-runtime.h
> @@ -24,6 +24,8 @@ DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64)
>  DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32)
>  DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64)
>
> +DEF_HELPER_FLAGS_2(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env, tl)
> +
>  DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
>
>  #ifdef CONFIG_SOFTMMU
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index 6c216bb..5ec48d1 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -699,6 +699,7 @@ struct TCGContext {
>         extension that allows arithmetic on void*.  */
>      int code_gen_max_blocks;
>      void *code_gen_prologue;
> +    void *code_gen_epilogue;
>      void *code_gen_buffer;
>      size_t code_gen_buffer_size;
>      void *code_gen_ptr;


--
Alex Bennée
Richard Henderson April 26, 2017, 10:43 a.m. UTC | #4
On 04/26/2017 12:29 PM, Alex Bennée wrote:
> 
> Emilio G. Cota <cota@braap.org> writes:
> 
>> This paves the way for upcoming work.
>>
>> Reviewed-by: Richard Henderson <rth@twiddle.net>
>> Signed-off-by: Emilio G. Cota <cota@braap.org>
>> ---
>>   tcg-runtime.c     | 21 +++++++++++++++++++++
>>   tcg/tcg-runtime.h |  2 ++
>>   tcg/tcg.h         |  1 +
>>   3 files changed, 24 insertions(+)
>>
>> diff --git a/tcg-runtime.c b/tcg-runtime.c
>> index 4c60c96..90d2d4b 100644
>> --- a/tcg-runtime.c
>> +++ b/tcg-runtime.c
>> @@ -27,6 +27,7 @@
>>   #include "exec/helper-proto.h"
>>   #include "exec/cpu_ldst.h"
>>   #include "exec/exec-all.h"
>> +#include "exec/tb-hash.h"
>>
>>   /* 32-bit helpers */
>>
>> @@ -141,6 +142,26 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg)
>>       return ctpop64(arg);
>>   }
>>
>> +void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr)
>> +{
>> +    CPUState *cpu = ENV_GET_CPU(env);
>> +    TranslationBlock *tb;
>> +    target_ulong cs_base, pc;
>> +    uint32_t flags;
>> +
>> +    if (unlikely(atomic_read(&cpu->exit_request))) {
>> +        goto out_epilogue;
>> +    }
>> +    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
>> +    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
>> +    if (likely(tb && tb->pc == addr && tb->cs_base == cs_base &&
>> +               tb->flags == flags)) {
> 
> Should we also not be checking the TB hasn't been invalidated: tb->invalid?

We don't check in tb_find.

I guess we're assuming that such have been purged from the tb_jmp_cache.  That 
said, tb_phys_invalidate assumes tb_locked, and I don't immediately remember 
how all that is supposed to tie together.


r~
Paolo Bonzini April 26, 2017, 3:11 p.m. UTC | #5
On 26/04/2017 12:29, Alex Bennée wrote:
> 
> Emilio G. Cota <cota@braap.org> writes:
> 
>> This paves the way for upcoming work.
>>
>> Reviewed-by: Richard Henderson <rth@twiddle.net>
>> Signed-off-by: Emilio G. Cota <cota@braap.org>
>> ---
>>  tcg-runtime.c     | 21 +++++++++++++++++++++
>>  tcg/tcg-runtime.h |  2 ++
>>  tcg/tcg.h         |  1 +
>>  3 files changed, 24 insertions(+)
>>
>> diff --git a/tcg-runtime.c b/tcg-runtime.c
>> index 4c60c96..90d2d4b 100644
>> --- a/tcg-runtime.c
>> +++ b/tcg-runtime.c
>> @@ -27,6 +27,7 @@
>>  #include "exec/helper-proto.h"
>>  #include "exec/cpu_ldst.h"
>>  #include "exec/exec-all.h"
>> +#include "exec/tb-hash.h"
>>
>>  /* 32-bit helpers */
>>
>> @@ -141,6 +142,26 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg)
>>      return ctpop64(arg);
>>  }
>>
>> +void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr)
>> +{
>> +    CPUState *cpu = ENV_GET_CPU(env);
>> +    TranslationBlock *tb;
>> +    target_ulong cs_base, pc;
>> +    uint32_t flags;
>> +
>> +    if (unlikely(atomic_read(&cpu->exit_request))) {
>> +        goto out_epilogue;
>> +    }
>> +    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
>> +    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
>> +    if (likely(tb && tb->pc == addr && tb->cs_base == cs_base &&
>> +               tb->flags == flags)) {
> 
> Should we also not be checking the TB hasn't been invalidated: tb->invalid?

It's not needed because this lookup is (if I understand it right) once
only and is not reused later.  This is why tb_find doesn't check
tb->invalid, but uses it to avoid adding the TB to the chain.

Good:

	tb_find			tb_phys_invalidate
				  tb_lock
				  tb->invalid = true
	  lookup cache
	  cache hit
				  tb_unlock
	  tb_lock
	  tb->invalid?
	    yes, skip tb_add_jump
	  tb_unlock
	  execute tb once

Bad (doesn't happen):

	tb_find			tb_phys_invalidate
				  tb_lock
				  tb->invalid = true
	  lookup cache
	  cache hit
				  tb_unlock
	  tb_lock
	  tb_add_jump
	  tb_unlock
	  execute tb many times

Paolo

>> +        return tb->tc_ptr;
>> +    }
>> + out_epilogue:
>> +    return tcg_ctx.code_gen_epilogue;
>> +}
>> +
>>  void HELPER(exit_atomic)(CPUArchState *env)
>>  {
>>      cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC());
>> diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h
>> index 114ea6f..c41d38a 100644
>> --- a/tcg/tcg-runtime.h
>> +++ b/tcg/tcg-runtime.h
>> @@ -24,6 +24,8 @@ DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64)
>>  DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32)
>>  DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64)
>>
>> +DEF_HELPER_FLAGS_2(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env, tl)
>> +
>>  DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
>>
>>  #ifdef CONFIG_SOFTMMU
>> diff --git a/tcg/tcg.h b/tcg/tcg.h
>> index 6c216bb..5ec48d1 100644
>> --- a/tcg/tcg.h
>> +++ b/tcg/tcg.h
>> @@ -699,6 +699,7 @@ struct TCGContext {
>>         extension that allows arithmetic on void*.  */
>>      int code_gen_max_blocks;
>>      void *code_gen_prologue;
>> +    void *code_gen_epilogue;
>>      void *code_gen_buffer;
>>      size_t code_gen_buffer_size;
>>      void *code_gen_ptr;
> 
> 
> --
> Alex Bennée
>
Alex Bennée April 26, 2017, 4:16 p.m. UTC | #6
Paolo Bonzini <pbonzini@redhat.com> writes:

> On 26/04/2017 12:29, Alex Bennée wrote:
>>
>> Emilio G. Cota <cota@braap.org> writes:
>>
>>> This paves the way for upcoming work.
>>>
>>> Reviewed-by: Richard Henderson <rth@twiddle.net>
>>> Signed-off-by: Emilio G. Cota <cota@braap.org>
>>> ---
>>>  tcg-runtime.c     | 21 +++++++++++++++++++++
>>>  tcg/tcg-runtime.h |  2 ++
>>>  tcg/tcg.h         |  1 +
>>>  3 files changed, 24 insertions(+)
>>>
>>> diff --git a/tcg-runtime.c b/tcg-runtime.c
>>> index 4c60c96..90d2d4b 100644
>>> --- a/tcg-runtime.c
>>> +++ b/tcg-runtime.c
>>> @@ -27,6 +27,7 @@
>>>  #include "exec/helper-proto.h"
>>>  #include "exec/cpu_ldst.h"
>>>  #include "exec/exec-all.h"
>>> +#include "exec/tb-hash.h"
>>>
>>>  /* 32-bit helpers */
>>>
>>> @@ -141,6 +142,26 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg)
>>>      return ctpop64(arg);
>>>  }
>>>
>>> +void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr)
>>> +{
>>> +    CPUState *cpu = ENV_GET_CPU(env);
>>> +    TranslationBlock *tb;
>>> +    target_ulong cs_base, pc;
>>> +    uint32_t flags;
>>> +
>>> +    if (unlikely(atomic_read(&cpu->exit_request))) {
>>> +        goto out_epilogue;
>>> +    }
>>> +    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
>>> +    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
>>> +    if (likely(tb && tb->pc == addr && tb->cs_base == cs_base &&
>>> +               tb->flags == flags)) {
>>
>> Should we also not be checking the TB hasn't been invalidated: tb->invalid?
>
> It's not needed because this lookup is (if I understand it right) once
> only and is not reused later.  This is why tb_find doesn't check
> tb->invalid, but uses it to avoid adding the TB to the chain.

Right. And when tb->invalid = true is set we then flush it from the
jump cache so it will never be found by the helper after.


OK nothing to see here ;-)

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

>
> Good:
>
> 	tb_find			tb_phys_invalidate
> 				  tb_lock
> 				  tb->invalid = true
> 	  lookup cache
> 	  cache hit
> 				  tb_unlock
> 	  tb_lock
> 	  tb->invalid?
> 	    yes, skip tb_add_jump
> 	  tb_unlock
> 	  execute tb once
>
> Bad (doesn't happen):
>
> 	tb_find			tb_phys_invalidate
> 				  tb_lock
> 				  tb->invalid = true
> 	  lookup cache
> 	  cache hit
> 				  tb_unlock
> 	  tb_lock
> 	  tb_add_jump
> 	  tb_unlock
> 	  execute tb many times
>
> Paolo
>
>>> +        return tb->tc_ptr;
>>> +    }
>>> + out_epilogue:
>>> +    return tcg_ctx.code_gen_epilogue;
>>> +}
>>> +
>>>  void HELPER(exit_atomic)(CPUArchState *env)
>>>  {
>>>      cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC());
>>> diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h
>>> index 114ea6f..c41d38a 100644
>>> --- a/tcg/tcg-runtime.h
>>> +++ b/tcg/tcg-runtime.h
>>> @@ -24,6 +24,8 @@ DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64)
>>>  DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32)
>>>  DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64)
>>>
>>> +DEF_HELPER_FLAGS_2(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env, tl)
>>> +
>>>  DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
>>>
>>>  #ifdef CONFIG_SOFTMMU
>>> diff --git a/tcg/tcg.h b/tcg/tcg.h
>>> index 6c216bb..5ec48d1 100644
>>> --- a/tcg/tcg.h
>>> +++ b/tcg/tcg.h
>>> @@ -699,6 +699,7 @@ struct TCGContext {
>>>         extension that allows arithmetic on void*.  */
>>>      int code_gen_max_blocks;
>>>      void *code_gen_prologue;
>>> +    void *code_gen_epilogue;
>>>      void *code_gen_buffer;
>>>      size_t code_gen_buffer_size;
>>>      void *code_gen_ptr;
>>
>>
>> --
>> Alex Bennée
>>


--
Alex Bennée
Emilio Cota April 26, 2017, 9:56 p.m. UTC | #7
On Wed, Apr 26, 2017 at 10:40:45 +0200, Richard Henderson wrote:
> On 04/26/2017 08:23 AM, Emilio G. Cota wrote:
(snip)
> >+    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
> >+    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
> >+    if (likely(tb && tb->pc == addr && tb->cs_base == cs_base &&
> >+               tb->flags == flags)) {
> 
> This comparison is wrong.  It will incorrectly reject a TB for i386 guest
> when CS_BASE != 0.  You really want
> 
>   tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
>   if (tb) {
>     cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
>     if (tb->pc == pc && tb->cs_base == cs_base && tb->flags == flags) {
>       return tb->tc_ptr;
>     }
>   }
>   return tcg_ctx.code_gen_epilogue;

wrt the comparison, the only change I notice in your suggested change is
  tb->pc == pc

instead of
  tb->pc == addr

, which seems innocuous to me (since tb->pc == addr).

I fail to see how this relates to your "CS_BASE != 0" comment.
What am I missing?

		E.
Richard Henderson April 26, 2017, 10:29 p.m. UTC | #8
On 04/26/2017 11:56 PM, Emilio G. Cota wrote:
> On Wed, Apr 26, 2017 at 10:40:45 +0200, Richard Henderson wrote:
>> On 04/26/2017 08:23 AM, Emilio G. Cota wrote:
> (snip)
>>> +    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
>>> +    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
>>> +    if (likely(tb && tb->pc == addr && tb->cs_base == cs_base &&
>>> +               tb->flags == flags)) {
>>
>> This comparison is wrong.  It will incorrectly reject a TB for i386 guest
>> when CS_BASE != 0.  You really want
>>
>>    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
>>    if (tb) {
>>      cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
>>      if (tb->pc == pc && tb->cs_base == cs_base && tb->flags == flags) {
>>        return tb->tc_ptr;
>>      }
>>    }
>>    return tcg_ctx.code_gen_epilogue;
> 
> wrt the comparison, the only change I notice in your suggested change is
>    tb->pc == pc
> 
> instead of
>    tb->pc == addr
> 
> , which seems innocuous to me (since tb->pc == addr).
> 
> I fail to see how this relates to your "CS_BASE != 0" comment.
> What am I missing?

Recall how you computed vaddr for target/i386:

   addr = pc + cs_base


r~
Emilio Cota April 26, 2017, 10:45 p.m. UTC | #9
On Thu, Apr 27, 2017 at 00:29:49 +0200, Richard Henderson wrote:
> On 04/26/2017 11:56 PM, Emilio G. Cota wrote:
> >On Wed, Apr 26, 2017 at 10:40:45 +0200, Richard Henderson wrote:
> >>On 04/26/2017 08:23 AM, Emilio G. Cota wrote:
> >(snip)
> >>>+    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
> >>>+    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
> >>>+    if (likely(tb && tb->pc == addr && tb->cs_base == cs_base &&
> >>>+               tb->flags == flags)) {
> >>
> >>This comparison is wrong.  It will incorrectly reject a TB for i386 guest
> >>when CS_BASE != 0.  You really want
> >>
> >>   tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
> >>   if (tb) {
> >>     cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
> >>     if (tb->pc == pc && tb->cs_base == cs_base && tb->flags == flags) {
> >>       return tb->tc_ptr;
> >>     }
> >>   }
> >>   return tcg_ctx.code_gen_epilogue;
> >
> >wrt the comparison, the only change I notice in your suggested change is
> >   tb->pc == pc
> >
> >instead of
> >   tb->pc == addr
> >
> >, which seems innocuous to me (since tb->pc == addr).
> >
> >I fail to see how this relates to your "CS_BASE != 0" comment.
> >What am I missing?
> 
> Recall how you computed vaddr for target/i386:
> 
>   addr = pc + cs_base

I see, thanks!

		Emilio
Emilio Cota April 26, 2017, 11:17 p.m. UTC | #10
On Wed, Apr 26, 2017 at 10:40:45 +0200, Richard Henderson wrote:
> On 04/26/2017 08:23 AM, Emilio G. Cota wrote:
> >This paves the way for upcoming work.
> >
> >Reviewed-by: Richard Henderson <rth@twiddle.net>
> >Signed-off-by: Emilio G. Cota <cota@braap.org>
> >---
> >  tcg-runtime.c     | 21 +++++++++++++++++++++
> >  tcg/tcg-runtime.h |  2 ++
> >  tcg/tcg.h         |  1 +
> >  3 files changed, 24 insertions(+)
> >
> >diff --git a/tcg-runtime.c b/tcg-runtime.c
> >index 4c60c96..90d2d4b 100644
> >--- a/tcg-runtime.c
> >+++ b/tcg-runtime.c
> >@@ -27,6 +27,7 @@
> >  #include "exec/helper-proto.h"
> >  #include "exec/cpu_ldst.h"
> >  #include "exec/exec-all.h"
> >+#include "exec/tb-hash.h"
> >  /* 32-bit helpers */
> >@@ -141,6 +142,26 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg)
> >      return ctpop64(arg);
> >  }
> >+    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
> >+    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
> >+    if (likely(tb && tb->pc == addr && tb->cs_base == cs_base &&
> >+               tb->flags == flags)) {
> 
> This comparison is wrong.  It will incorrectly reject a TB for i386 guest
> when CS_BASE != 0.  You really want
> 
>   tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
>   if (tb) {
>     cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
>     if (tb->pc == pc && tb->cs_base == cs_base && tb->flags == flags) {
>       return tb->tc_ptr;
>     }
>   }
>   return tcg_ctx.code_gen_epilogue;
> 
> where you don't even load the cpu state if there isn't a preliminary hit in
> the cache.

Yes, I like this.

> (Note to self: That minor optimization would also apply to tb_find.)

FWIW I looked at tb_find -- you need the pc though, which comes from
loading the CPU state:

    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
                               ^^
    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]);
                                                                   ^^

If we wanted to really avoid getting all the state I guess we'd have to add
another function that returned just the pc.

		E.
diff mbox

Patch

diff --git a/tcg-runtime.c b/tcg-runtime.c
index 4c60c96..90d2d4b 100644
--- a/tcg-runtime.c
+++ b/tcg-runtime.c
@@ -27,6 +27,7 @@ 
 #include "exec/helper-proto.h"
 #include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
+#include "exec/tb-hash.h"
 
 /* 32-bit helpers */
 
@@ -141,6 +142,26 @@  uint64_t HELPER(ctpop_i64)(uint64_t arg)
     return ctpop64(arg);
 }
 
+void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr)
+{
+    CPUState *cpu = ENV_GET_CPU(env);
+    TranslationBlock *tb;
+    target_ulong cs_base, pc;
+    uint32_t flags;
+
+    if (unlikely(atomic_read(&cpu->exit_request))) {
+        goto out_epilogue;
+    }
+    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
+    if (likely(tb && tb->pc == addr && tb->cs_base == cs_base &&
+               tb->flags == flags)) {
+        return tb->tc_ptr;
+    }
+ out_epilogue:
+    return tcg_ctx.code_gen_epilogue;
+}
+
 void HELPER(exit_atomic)(CPUArchState *env)
 {
     cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC());
diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h
index 114ea6f..c41d38a 100644
--- a/tcg/tcg-runtime.h
+++ b/tcg/tcg-runtime.h
@@ -24,6 +24,8 @@  DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32)
 DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64)
 
+DEF_HELPER_FLAGS_2(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env, tl)
+
 DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
 
 #ifdef CONFIG_SOFTMMU
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 6c216bb..5ec48d1 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -699,6 +699,7 @@  struct TCGContext {
        extension that allows arithmetic on void*.  */
     int code_gen_max_blocks;
     void *code_gen_prologue;
+    void *code_gen_epilogue;
     void *code_gen_buffer;
     size_t code_gen_buffer_size;
     void *code_gen_ptr;