diff mbox

atomics: add volatile_read/volatile_set

Message ID f54518bb-d349-aa24-3bfb-c4788abfa22d@redhat.com
State New
Headers show

Commit Message

Paolo Bonzini July 18, 2016, 4:53 p.m. UTC
On 18/07/2016 18:52, Sergey Fedorov wrote:
> So how are we going to use them?

Instead of atomic_read/atomic_set when marking invalid TBs.



Thanks,

Paolo

> Thanks,
> Sergey
> 
> On 18/07/16 17:17, Paolo Bonzini wrote:
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>  docs/atomics.txt      | 19 ++++++++++++++++---
>>  include/qemu/atomic.h | 17 +++++++++++++++++
>>  2 files changed, 33 insertions(+), 3 deletions(-)
>>
>> diff --git a/docs/atomics.txt b/docs/atomics.txt
>> index c95950b..1f21d2e 100644
>> --- a/docs/atomics.txt
>> +++ b/docs/atomics.txt
>> @@ -123,6 +123,14 @@ to do so, because it tells readers which variables are shared with
>>  other threads, and which are local to the current thread or protected
>>  by other, more mundane means.
>>  
>> +atomic_read() and atomic_set() only support accesses as large as a
>> +pointer.  If you need to access variables larger than a pointer you
>> +can use volatile_read() and volatile_set(), but be careful: these always
>> +use volatile accesses, and 64-bit volatile accesses are not atomic on
>> +several 32-bit processors such as ARMv7.  In other words, volatile_read
>> +and volatile_set only provide "safe register" semantics when applied to
>> +64-bit variables.
>> +
>>  Memory barriers control the order of references to shared memory.
>>  They come in four kinds:
>>  
>> @@ -335,11 +343,16 @@ and memory barriers, and the equivalents in QEMU:
>>    Both semantics prevent the compiler from doing certain transformations;
>>    the difference is that atomic accesses are guaranteed to be atomic,
>>    while volatile accesses aren't. Thus, in the volatile case we just cross
>> -  our fingers hoping that the compiler will generate atomic accesses,
>> -  since we assume the variables passed are machine-word sized and
>> -  properly aligned.
>> +  our fingers hoping that the compiler and processor will provide atomic
>> +  accesses, since we assume the variables passed are machine-word sized
>> +  and properly aligned.
>> +
>>    No barriers are implied by atomic_read/set in either Linux or QEMU.
>>  
>> +- volatile_read and volatile_set are equivalent to ACCESS_ONCE in Linux.
>> +  No barriers are implied by volatile_read/set in QEMU, nor by
>> +  ACCESS_ONCE in Linux.
>> +
>>  - atomic read-modify-write operations in Linux are of three kinds:
>>  
>>           atomic_OP          returns void
>> diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
>> index 7e13fca..8409bdb 100644
>> --- a/include/qemu/atomic.h
>> +++ b/include/qemu/atomic.h
>> @@ -18,6 +18,12 @@
>>  /* Compiler barrier */
>>  #define barrier()   ({ asm volatile("" ::: "memory"); (void)0; })
>>  
>> +/* These will only be atomic if the processor does the fetch or store
>> + * in a single issue memory operation
>> + */
>> +#define volatile_read(ptr)       (*(__typeof__(*ptr) volatile*) (ptr))
>> +#define volatile_set(ptr, i)     ((*(__typeof__(*ptr) volatile*) (ptr)) = (i))
>> +
>>  #ifdef __ATOMIC_RELAXED
>>  /* For C11 atomic ops */
>>  
>> @@ -260,6 +266,17 @@
>>   */
>>  #define atomic_read(ptr)       (*(__typeof__(*ptr) volatile*) (ptr))
>>  #define atomic_set(ptr, i)     ((*(__typeof__(*ptr) volatile*) (ptr)) = (i))
>> +#define atomic_read(ptr)                              \
>> +    ({                                                \
>> +    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
>> +    volatile_read(ptr);                               \
>> +    })
>> +
>> +#define atomic_set(ptr, i)  do {                      \
>> +    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
>> +    volatile_set(ptr, i);                             \
>> +} while(0)
>> +
>>  
>>  /**
>>   * atomic_rcu_read - reads a RCU-protected pointer to a local variable
>

Comments

Sergey Fedorov July 18, 2016, 4:57 p.m. UTC | #1
On 18/07/16 19:53, Paolo Bonzini wrote:
>
> On 18/07/2016 18:52, Sergey Fedorov wrote:
>> So how are we going to use them?
> Instead of atomic_read/atomic_set when marking invalid TBs.

But shouldn't they be atomic to avoid reading torn writes?

Thanks,
Sergey

>
> diff --git a/cpu-exec.c b/cpu-exec.c
> index fd43de8..1275f3d 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -292,10 +292,10 @@ static inline TranslationBlock *tb_find(CPUState *cpu,
>         always be the same before a given translated block
>         is executed. */
>      cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
> -    tb = atomic_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]);
> -    if (unlikely(!tb || atomic_read(&tb->pc) != pc ||
> -                 atomic_read(&tb->cs_base) != cs_base ||
> -                 atomic_read(&tb->flags) != flags)) {
> +    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]);
> +    if (unlikely(!tb || volatile_read(&tb->pc) != pc ||
> +                 volatile_read(&tb->cs_base) != cs_base ||
> +                 volatile_read(&tb->flags) != flags)) {
>          tb = tb_htable_lookup(cpu, pc, cs_base, flags);
>          if (!tb) {
>  
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index 8f0afcd..35e963b 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -262,9 +262,9 @@ static inline void tb_mark_invalid(TranslationBlock *tb)
>      uint32_t flags = 0;
>  
>      cpu_get_invalid_tb_cpu_state(&pc, &cs_base, &flags);
> -    atomic_set(&tb->pc, pc);
> -    atomic_set(&tb->cs_base, cs_base);
> -    atomic_set(&tb->flags, flags);
> +    volatile_set(&tb->pc, pc);
> +    volatile_set(&tb->cs_base, cs_base);
> +    volatile_set(&tb->flags, flags);
>  }
>  
>  static inline bool tb_is_invalid(TranslationBlock *tb)
>
>
> Thanks,
>
> Paolo
>
>> Thanks,
>> Sergey
>>
>> On 18/07/16 17:17, Paolo Bonzini wrote:
>>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>>> ---
>>>  docs/atomics.txt      | 19 ++++++++++++++++---
>>>  include/qemu/atomic.h | 17 +++++++++++++++++
>>>  2 files changed, 33 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/docs/atomics.txt b/docs/atomics.txt
>>> index c95950b..1f21d2e 100644
>>> --- a/docs/atomics.txt
>>> +++ b/docs/atomics.txt
>>> @@ -123,6 +123,14 @@ to do so, because it tells readers which variables are shared with
>>>  other threads, and which are local to the current thread or protected
>>>  by other, more mundane means.
>>>  
>>> +atomic_read() and atomic_set() only support accesses as large as a
>>> +pointer.  If you need to access variables larger than a pointer you
>>> +can use volatile_read() and volatile_set(), but be careful: these always
>>> +use volatile accesses, and 64-bit volatile accesses are not atomic on
>>> +several 32-bit processors such as ARMv7.  In other words, volatile_read
>>> +and volatile_set only provide "safe register" semantics when applied to
>>> +64-bit variables.
>>> +
>>>  Memory barriers control the order of references to shared memory.
>>>  They come in four kinds:
>>>  
>>> @@ -335,11 +343,16 @@ and memory barriers, and the equivalents in QEMU:
>>>    Both semantics prevent the compiler from doing certain transformations;
>>>    the difference is that atomic accesses are guaranteed to be atomic,
>>>    while volatile accesses aren't. Thus, in the volatile case we just cross
>>> -  our fingers hoping that the compiler will generate atomic accesses,
>>> -  since we assume the variables passed are machine-word sized and
>>> -  properly aligned.
>>> +  our fingers hoping that the compiler and processor will provide atomic
>>> +  accesses, since we assume the variables passed are machine-word sized
>>> +  and properly aligned.
>>> +
>>>    No barriers are implied by atomic_read/set in either Linux or QEMU.
>>>  
>>> +- volatile_read and volatile_set are equivalent to ACCESS_ONCE in Linux.
>>> +  No barriers are implied by volatile_read/set in QEMU, nor by
>>> +  ACCESS_ONCE in Linux.
>>> +
>>>  - atomic read-modify-write operations in Linux are of three kinds:
>>>  
>>>           atomic_OP          returns void
>>> diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
>>> index 7e13fca..8409bdb 100644
>>> --- a/include/qemu/atomic.h
>>> +++ b/include/qemu/atomic.h
>>> @@ -18,6 +18,12 @@
>>>  /* Compiler barrier */
>>>  #define barrier()   ({ asm volatile("" ::: "memory"); (void)0; })
>>>  
>>> +/* These will only be atomic if the processor does the fetch or store
>>> + * in a single issue memory operation
>>> + */
>>> +#define volatile_read(ptr)       (*(__typeof__(*ptr) volatile*) (ptr))
>>> +#define volatile_set(ptr, i)     ((*(__typeof__(*ptr) volatile*) (ptr)) = (i))
>>> +
>>>  #ifdef __ATOMIC_RELAXED
>>>  /* For C11 atomic ops */
>>>  
>>> @@ -260,6 +266,17 @@
>>>   */
>>>  #define atomic_read(ptr)       (*(__typeof__(*ptr) volatile*) (ptr))
>>>  #define atomic_set(ptr, i)     ((*(__typeof__(*ptr) volatile*) (ptr)) = (i))
>>> +#define atomic_read(ptr)                              \
>>> +    ({                                                \
>>> +    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
>>> +    volatile_read(ptr);                               \
>>> +    })
>>> +
>>> +#define atomic_set(ptr, i)  do {                      \
>>> +    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
>>> +    volatile_set(ptr, i);                             \
>>> +} while(0)
>>> +
>>>  
>>>  /**
>>>   * atomic_rcu_read - reads a RCU-protected pointer to a local variable
Paolo Bonzini July 18, 2016, 5 p.m. UTC | #2
On 18/07/2016 18:57, Sergey Fedorov wrote:
> On 18/07/16 19:53, Paolo Bonzini wrote:
> > On 18/07/2016 18:52, Sergey Fedorov wrote:
> > > So how are we going to use them?
> > Instead of atomic_read/atomic_set when marking invalid TBs.
> But shouldn't they be atomic to avoid reading torn writes?

A torn write would probably fail to match anyway, but even if it doesn't
it is indistinguishable from a race, isn't it?

By the way, tb_cmp should also use volatile_read.

Paolo
Sergey Fedorov July 18, 2016, 5:07 p.m. UTC | #3
On 18/07/16 20:00, Paolo Bonzini wrote:
>
> On 18/07/2016 18:57, Sergey Fedorov wrote:
>> On 18/07/16 19:53, Paolo Bonzini wrote:
>>> On 18/07/2016 18:52, Sergey Fedorov wrote:
>>>> So how are we going to use them?
>>> Instead of atomic_read/atomic_set when marking invalid TBs.
>> But shouldn't they be atomic to avoid reading torn writes?
> A torn write would probably fail to match anyway, but even if it doesn't
> it is indistinguishable from a race, isn't it?

I'm afraid, torn write can happen to be a false match against a wrong
TB. In case of a race with atomic access we either get the right TB or
an invalid one which couldn't match any valid CPU state. Probably, we
have to make sure (and document this) that TB invalidation process
cannot make a partially invalidated TB which can match any meaningful
CPU state.

>
> By the way, tb_cmp should also use volatile_read.

You are right, we must user the save type of access in tb_cmp().

Thanks,
Sergey
Paolo Bonzini July 18, 2016, 5:11 p.m. UTC | #4
On 18/07/2016 19:07, Sergey Fedorov wrote:
> On 18/07/16 20:00, Paolo Bonzini wrote:
>>
>> On 18/07/2016 18:57, Sergey Fedorov wrote:
>>> On 18/07/16 19:53, Paolo Bonzini wrote:
>>>> On 18/07/2016 18:52, Sergey Fedorov wrote:
>>>>> So how are we going to use them?
>>>> Instead of atomic_read/atomic_set when marking invalid TBs.
>>> But shouldn't they be atomic to avoid reading torn writes?
>> A torn write would probably fail to match anyway, but even if it doesn't
>> it is indistinguishable from a race, isn't it?
> 
> I'm afraid, torn write can happen to be a false match against a wrong
> TB. In case of a race with atomic access we either get the right TB or
> an invalid one which couldn't match any valid CPU state. Probably, we
> have to make sure (and document this) that TB invalidation process
> cannot make a partially invalidated TB which can match any meaningful
> CPU state.

x86 is atomic (because flags are 32-bit); those that have cs_base==0 are
safe against torn writes too.  Only SPARC perhaps could use
"tb->cs_base|=1" instead in case 0xffffffff........ matches another TB.

Paolo

>>
>> By the way, tb_cmp should also use volatile_read.
> 
> You are right, we must user the save type of access in tb_cmp().
> 
> Thanks,
> Sergey
>
Sergey Fedorov July 18, 2016, 5:17 p.m. UTC | #5
On 18/07/16 20:11, Paolo Bonzini wrote:
>
> On 18/07/2016 19:07, Sergey Fedorov wrote:
>> On 18/07/16 20:00, Paolo Bonzini wrote:
>>> On 18/07/2016 18:57, Sergey Fedorov wrote:
>>>> On 18/07/16 19:53, Paolo Bonzini wrote:
>>>>> On 18/07/2016 18:52, Sergey Fedorov wrote:
>>>>>> So how are we going to use them?
>>>>> Instead of atomic_read/atomic_set when marking invalid TBs.
>>>> But shouldn't they be atomic to avoid reading torn writes?
>>> A torn write would probably fail to match anyway, but even if it doesn't
>>> it is indistinguishable from a race, isn't it?
>> I'm afraid, torn write can happen to be a false match against a wrong
>> TB. In case of a race with atomic access we either get the right TB or
>> an invalid one which couldn't match any valid CPU state. Probably, we
>> have to make sure (and document this) that TB invalidation process
>> cannot make a partially invalidated TB which can match any meaningful
>> CPU state.
> x86 is atomic (because flags are 32-bit); those that have cs_base==0 are
> safe against torn writes too.  Only SPARC perhaps could use
> "tb->cs_base|=1" instead in case 0xffffffff........ matches another TB.

That could really work but needs some comment, of course. BTW, what is
the main point of such change? A bit more performance on some 32-bit hosts?

Thanks,
Sergey

>
> Paolo
>
>>> By the way, tb_cmp should also use volatile_read.
>> You are right, we must user the save type of access in tb_cmp().
>>
>> Thanks,
>> Sergey
>>
Sergey Fedorov July 18, 2016, 5:25 p.m. UTC | #6
On 18/07/16 20:22, Paolo Bonzini wrote:
>
> On 18/07/2016 19:17, Sergey Fedorov wrote:
>> On 18/07/16 20:11, Paolo Bonzini wrote:
>>> On 18/07/2016 19:07, Sergey Fedorov wrote:
>>>> On 18/07/16 20:00, Paolo Bonzini wrote:
>>>>> On 18/07/2016 18:57, Sergey Fedorov wrote:
>>>>>> On 18/07/16 19:53, Paolo Bonzini wrote:
>>>>>>> On 18/07/2016 18:52, Sergey Fedorov wrote:
>>>>>>>> So how are we going to use them?
>>>>>>> Instead of atomic_read/atomic_set when marking invalid TBs.
>>>>>> But shouldn't they be atomic to avoid reading torn writes?
>>>>> A torn write would probably fail to match anyway, but even if it doesn't
>>>>> it is indistinguishable from a race, isn't it?
>>>> I'm afraid, torn write can happen to be a false match against a wrong
>>>> TB. In case of a race with atomic access we either get the right TB or
>>>> an invalid one which couldn't match any valid CPU state. Probably, we
>>>> have to make sure (and document this) that TB invalidation process
>>>> cannot make a partially invalidated TB which can match any meaningful
>>>> CPU state.
>>> x86 is atomic (because flags are 32-bit); those that have cs_base==0 are
>>> safe against torn writes too.  Only SPARC perhaps could use
>>> "tb->cs_base|=1" instead in case 0xffffffff........ matches another TB.
>> That could really work but needs some comment, of course.
> Yup.  A simpler possibility is this:
>
> diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
> index e327a35..3278d8a 100644
> --- a/target-sparc/cpu.h
> +++ b/target-sparc/cpu.h
> @@ -753,14 +753,14 @@ static inline void cpu_get_invalid_tb_cpu_state(target_ulong *pc,
>                                                  target_ulong *cs_base,
>                                                  uint32_t *flags)
>  {
> -    *cs_base = -1; /* npc must be a multible of 4 */
> +    *flags = TB_FLAG_MMU_MASK;
>  }

Hmm, not sure if it is really simpler to follow. Maybe " |= 1;" anyway?

>  
>  static inline bool cpu_tb_cpu_state_is_invalidated(target_ulong pc,
>                                                     target_ulong cs_base,
>                                                     uint32_t flags)
>  {
> -    return cs_base == -1;
> +    return flags == TB_FLAG_MMU_MASK;
>  }
>  
>  static inline bool tb_fpu_enabled(int tb_flags)
>
>
> I'll send a fixup patch now, ack it and I'll send another pull request. :)
>
>> BTW, what is
>> the main point of such change? A bit more performance on some 32-bit hosts?
> No, succeeding to compile on 32-bit hosts. :)
Paolo Bonzini July 18, 2016, 5:28 p.m. UTC | #7
On 18/07/2016 19:25, Sergey Fedorov wrote:
>> > @@ -753,14 +753,14 @@ static inline void cpu_get_invalid_tb_cpu_state(target_ulong *pc,
>> >                                                  target_ulong *cs_base,
>> >                                                  uint32_t *flags)
>> >  {
>> > -    *cs_base = -1; /* npc must be a multible of 4 */
>> > +    *flags = TB_FLAG_MMU_MASK;
>> >  }
> Hmm, not sure if it is really simpler to follow. Maybe " |= 1;" anyway?

|= 1 has the problem that tb_mark_invalid doesn't pass TB's tuple into
cpu_get_invalid_tb_cpu_state, and I didn't want to change that.  I'll
add a comment,

    /* TB_FLAG_MMU_MASK is not a valid MMU index, which makes it is an
     * impossible flag combination for valid TBs.
     */

Paolo
Sergey Fedorov July 18, 2016, 5:31 p.m. UTC | #8
On 18/07/16 20:28, Paolo Bonzini wrote:
>
> On 18/07/2016 19:25, Sergey Fedorov wrote:
>>>> @@ -753,14 +753,14 @@ static inline void cpu_get_invalid_tb_cpu_state(target_ulong *pc,
>>>>                                                  target_ulong *cs_base,
>>>>                                                  uint32_t *flags)
>>>>  {
>>>> -    *cs_base = -1; /* npc must be a multible of 4 */
>>>> +    *flags = TB_FLAG_MMU_MASK;
>>>>  }
>> Hmm, not sure if it is really simpler to follow. Maybe " |= 1;" anyway?
> |= 1 has the problem that tb_mark_invalid doesn't pass TB's tuple into
> cpu_get_invalid_tb_cpu_state, and I didn't want to change that.  I'll
> add a comment,
>
>     /* TB_FLAG_MMU_MASK is not a valid MMU index, which makes it is an
>      * impossible flag combination for valid TBs.
>      */
>

I wonder if using a dedicated field to mark TBs invalid would be so slow
that we couldn't afford it...

Kind regards,
Sergey
Paolo Bonzini July 18, 2016, 5:58 p.m. UTC | #9
On 18/07/2016 19:31, Sergey Fedorov wrote:
> On 18/07/16 20:28, Paolo Bonzini wrote:
>>
>> On 18/07/2016 19:25, Sergey Fedorov wrote:
>>>>> @@ -753,14 +753,14 @@ static inline void cpu_get_invalid_tb_cpu_state(target_ulong *pc,
>>>>>                                                  target_ulong *cs_base,
>>>>>                                                  uint32_t *flags)
>>>>>  {
>>>>> -    *cs_base = -1; /* npc must be a multible of 4 */
>>>>> +    *flags = TB_FLAG_MMU_MASK;
>>>>>  }
>>> Hmm, not sure if it is really simpler to follow. Maybe " |= 1;" anyway?
>> |= 1 has the problem that tb_mark_invalid doesn't pass TB's tuple into
>> cpu_get_invalid_tb_cpu_state, and I didn't want to change that.  I'll
>> add a comment,
>>
>>     /* TB_FLAG_MMU_MASK is not a valid MMU index, which makes it is an
>>      * impossible flag combination for valid TBs.
>>      */
>>
> 
> I wonder if using a dedicated field to mark TBs invalid would be so slow
> that we couldn't afford it...

We could, but it would be slower too. :)

"Just make flags=-1 invalid" is probably a valid one too.  There are
many ways to implement it: use less than 32 bits (or equivalently
reserve one bit for invalid TBs), ensure some combos are invalid, etc.
It would probably be valid for all current targets, since no one uses 32
bits.  ARM is the closest to exhausting flag space probably.

Still, I like your patches very much so I'd like to proceed with them,
only with some changes to fix compilation on 32-bit hosts.

Paolo
Sergey Fedorov July 18, 2016, 7:04 p.m. UTC | #10
On 18/07/16 20:58, Paolo Bonzini wrote:
>
> On 18/07/2016 19:31, Sergey Fedorov wrote:
>> On 18/07/16 20:28, Paolo Bonzini wrote:
>>> On 18/07/2016 19:25, Sergey Fedorov wrote:
>>>>>> @@ -753,14 +753,14 @@ static inline void cpu_get_invalid_tb_cpu_state(target_ulong *pc,
>>>>>>                                                  target_ulong *cs_base,
>>>>>>                                                  uint32_t *flags)
>>>>>>  {
>>>>>> -    *cs_base = -1; /* npc must be a multible of 4 */
>>>>>> +    *flags = TB_FLAG_MMU_MASK;
>>>>>>  }
>>>> Hmm, not sure if it is really simpler to follow. Maybe " |= 1;" anyway?
>>> |= 1 has the problem that tb_mark_invalid doesn't pass TB's tuple into
>>> cpu_get_invalid_tb_cpu_state, and I didn't want to change that.  I'll
>>> add a comment,
>>>
>>>     /* TB_FLAG_MMU_MASK is not a valid MMU index, which makes it is an
>>>      * impossible flag combination for valid TBs.
>>>      */
>>>
>> I wonder if using a dedicated field to mark TBs invalid would be so slow
>> that we couldn't afford it...
> We could, but it would be slower too. :)

How much performance do we really need and how much performance can we
loose introducing such a flag? We should yet gain something reducing
tb_lock contention. Maybe it is worthwhile to use a dedicated flag to
keep code more clear? There's always a question of balance between code
structure and maintainability (future of the TCG) on one hand and
performance (present of the TCG) on the other hand.

Kind regards,
Sergey

>
> "Just make flags=-1 invalid" is probably a valid one too.  There are
> many ways to implement it: use less than 32 bits (or equivalently
> reserve one bit for invalid TBs), ensure some combos are invalid, etc.
> It would probably be valid for all current targets, since no one uses 32
> bits.  ARM is the closest to exhausting flag space probably.
>
> Still, I like your patches very much so I'd like to proceed with them,
> only with some changes to fix compilation on 32-bit hosts.
>
> Paolo
Paolo Bonzini July 18, 2016, 8:54 p.m. UTC | #11
On 18/07/2016 21:04, Sergey Fedorov wrote:
> How much performance do we really need and how much performance can we
> loose introducing such a flag? We should yet gain something reducing
> tb_lock contention. Maybe it is worthwhile to use a dedicated flag to
> keep code more clear? There's always a question of balance between code
> structure and maintainability (future of the TCG) on one hand and
> performance (present of the TCG) on the other hand.

Fair enough, can you send a patch that does this?  If we can do the
check only just before tb_add_jump, indeed there's no reason to do all
the mess with invalid state.

Paolo
diff mbox

Patch

diff --git a/cpu-exec.c b/cpu-exec.c
index fd43de8..1275f3d 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -292,10 +292,10 @@  static inline TranslationBlock *tb_find(CPUState *cpu,
        always be the same before a given translated block
        is executed. */
     cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
-    tb = atomic_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]);
-    if (unlikely(!tb || atomic_read(&tb->pc) != pc ||
-                 atomic_read(&tb->cs_base) != cs_base ||
-                 atomic_read(&tb->flags) != flags)) {
+    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]);
+    if (unlikely(!tb || volatile_read(&tb->pc) != pc ||
+                 volatile_read(&tb->cs_base) != cs_base ||
+                 volatile_read(&tb->flags) != flags)) {
         tb = tb_htable_lookup(cpu, pc, cs_base, flags);
         if (!tb) {
 
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 8f0afcd..35e963b 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -262,9 +262,9 @@  static inline void tb_mark_invalid(TranslationBlock *tb)
     uint32_t flags = 0;
 
     cpu_get_invalid_tb_cpu_state(&pc, &cs_base, &flags);
-    atomic_set(&tb->pc, pc);
-    atomic_set(&tb->cs_base, cs_base);
-    atomic_set(&tb->flags, flags);
+    volatile_set(&tb->pc, pc);
+    volatile_set(&tb->cs_base, cs_base);
+    volatile_set(&tb->flags, flags);
 }
 
 static inline bool tb_is_invalid(TranslationBlock *tb)