diff mbox

[v2,1/6] tcg: Ensure safe tb_jmp_cache lookup out of 'tb_lock'

Message ID 1467735496-16256-2-git-send-email-alex.bennee@linaro.org
State New
Headers show

Commit Message

Alex Bennée July 5, 2016, 4:18 p.m. UTC
From: Sergey Fedorov <serge.fdrv@gmail.com>

First, ensure atomicity of CPU's 'tb_jmp_cache' access by:
 * using atomic_read() to look up a TB when not holding 'tb_lock';
 * using atomic_write() to remove a TB from each CPU's local cache on
   TB invalidation.

Second, add some memory barriers to ensure we don't put the TB being
invalidated back to CPU's 'tb_jmp_cache'. If we fail to look up a TB in
CPU's local cache because it is being invalidated by some other thread
then it must not be found in the shared TB hash table. Otherwise we'd
put it back to CPU's local cache.

Note that this patch does *not* make CPU's TLB invalidation safe if it
is done from some other thread while the CPU is in its execution loop.

Signed-off-by: Sergey Fedorov <serge.fdrv@gmail.com>
Signed-off-by: Sergey Fedorov <sergey.fedorov@linaro.org>
[AJB: fixed missing atomic set, tweak title]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Emilio G. Cota <cota@braap.org>

---
v1 (AJB):
  - tweak title
  - fixed missing set of tb_jmp_cache
v2
  - fix spelling s/con't/can't/
  - add atomic_read while clearing tb_jmp_cache
  - add r-b tags
---
 cpu-exec.c      | 9 +++++++--
 translate-all.c | 9 +++++++--
 2 files changed, 14 insertions(+), 4 deletions(-)

Comments

Sergey Fedorov July 7, 2016, 1:52 p.m. UTC | #1
I was not sure if the language I used in the source code comments is
100% correct. So it would be fine if someone could check if it is easy
to understand ;)

Thanks,
Sergey

On 05/07/16 19:18, Alex Bennée wrote:
> From: Sergey Fedorov <serge.fdrv@gmail.com>
>
> First, ensure atomicity of CPU's 'tb_jmp_cache' access by:
>  * using atomic_read() to look up a TB when not holding 'tb_lock';
>  * using atomic_write() to remove a TB from each CPU's local cache on
>    TB invalidation.
>
> Second, add some memory barriers to ensure we don't put the TB being
> invalidated back to CPU's 'tb_jmp_cache'. If we fail to look up a TB in
> CPU's local cache because it is being invalidated by some other thread
> then it must not be found in the shared TB hash table. Otherwise we'd
> put it back to CPU's local cache.
>
> Note that this patch does *not* make CPU's TLB invalidation safe if it
> is done from some other thread while the CPU is in its execution loop.
>
> Signed-off-by: Sergey Fedorov <serge.fdrv@gmail.com>
> Signed-off-by: Sergey Fedorov <sergey.fedorov@linaro.org>
> [AJB: fixed missing atomic set, tweak title]
> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> Reviewed-by: Richard Henderson <rth@twiddle.net>
> Reviewed-by: Emilio G. Cota <cota@braap.org>
>
> ---
> v1 (AJB):
>   - tweak title
>   - fixed missing set of tb_jmp_cache
> v2
>   - fix spelling s/con't/can't/
>   - add atomic_read while clearing tb_jmp_cache
>   - add r-b tags
> ---
>  cpu-exec.c      | 9 +++++++--
>  translate-all.c | 9 +++++++--
>  2 files changed, 14 insertions(+), 4 deletions(-)
>
> diff --git a/cpu-exec.c b/cpu-exec.c
> index b840e1d..10ce1cb 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -285,6 +285,11 @@ static TranslationBlock *tb_find_slow(CPUState *cpu,
>  {
>      TranslationBlock *tb;
>  
> +    /* Ensure that we won't find a TB in the shared hash table
> +     * if it is being invalidated by some other thread.
> +     * Otherwise we'd put it back to CPU's local cache.
> +     * Pairs with smp_wmb() in tb_phys_invalidate(). */
> +    smp_rmb();
>      tb = tb_find_physical(cpu, pc, cs_base, flags);
>      if (tb) {
>          goto found;
> @@ -315,7 +320,7 @@ static TranslationBlock *tb_find_slow(CPUState *cpu,
>  
>  found:
>      /* we add the TB in the virtual pc hash table */
> -    cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
> +    atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
>      return tb;
>  }
>  
> @@ -333,7 +338,7 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu,
>         is executed. */
>      cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
>      tb_lock();
> -    tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
> +    tb = atomic_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]);
>      if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
>                   tb->flags != flags)) {
>          tb = tb_find_slow(cpu, pc, cs_base, flags);
> diff --git a/translate-all.c b/translate-all.c
> index eaa95e4..96efe48 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -1004,11 +1004,16 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
>          invalidate_page_bitmap(p);
>      }
>  
> +    /* Ensure that we won't find the TB in the shared hash table
> +     * if we can't see it in CPU's local cache.
> +     * Pairs with smp_rmb() in tb_find_slow(). */
> +    smp_wmb();
> +
>      /* remove the TB from the hash list */
>      h = tb_jmp_cache_hash_func(tb->pc);
>      CPU_FOREACH(cpu) {
> -        if (cpu->tb_jmp_cache[h] == tb) {
> -            cpu->tb_jmp_cache[h] = NULL;
> +        if (atomic_read(&cpu->tb_jmp_cache[h]) == tb) {
> +            atomic_set(&cpu->tb_jmp_cache[h], NULL);
>          }
>      }
>
Sergey Fedorov July 8, 2016, 2:51 p.m. UTC | #2
On 05/07/16 19:18, Alex Bennée wrote:
> From: Sergey Fedorov <serge.fdrv@gmail.com>
>
> First, ensure atomicity of CPU's 'tb_jmp_cache' access by:
>  * using atomic_read() to look up a TB when not holding 'tb_lock';
>  * using atomic_write() to remove a TB from each CPU's local cache on
>    TB invalidation.
>
> Second, add some memory barriers to ensure we don't put the TB being
> invalidated back to CPU's 'tb_jmp_cache'. If we fail to look up a TB in
> CPU's local cache because it is being invalidated by some other thread
> then it must not be found in the shared TB hash table. Otherwise we'd
> put it back to CPU's local cache.
>
> Note that this patch does *not* make CPU's TLB invalidation safe if it
> is done from some other thread while the CPU is in its execution loop.
>
> Signed-off-by: Sergey Fedorov <serge.fdrv@gmail.com>
> Signed-off-by: Sergey Fedorov <sergey.fedorov@linaro.org>
> [AJB: fixed missing atomic set, tweak title]
> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> Reviewed-by: Richard Henderson <rth@twiddle.net>
> Reviewed-by: Emilio G. Cota <cota@braap.org>
>
> ---
> v1 (AJB):
>   - tweak title
>   - fixed missing set of tb_jmp_cache
> v2
>   - fix spelling s/con't/can't/
>   - add atomic_read while clearing tb_jmp_cache
>   - add r-b tags
> ---
>  cpu-exec.c      | 9 +++++++--
>  translate-all.c | 9 +++++++--
>  2 files changed, 14 insertions(+), 4 deletions(-)
>
> diff --git a/cpu-exec.c b/cpu-exec.c
> index b840e1d..10ce1cb 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -285,6 +285,11 @@ static TranslationBlock *tb_find_slow(CPUState *cpu,
>  {
>      TranslationBlock *tb;
>  
> +    /* Ensure that we won't find a TB in the shared hash table
> +     * if it is being invalidated by some other thread.
> +     * Otherwise we'd put it back to CPU's local cache.
> +     * Pairs with smp_wmb() in tb_phys_invalidate(). */
> +    smp_rmb();
>      tb = tb_find_physical(cpu, pc, cs_base, flags);
>      if (tb) {
>          goto found;
> @@ -315,7 +320,7 @@ static TranslationBlock *tb_find_slow(CPUState *cpu,
>  
>  found:
>      /* we add the TB in the virtual pc hash table */
> -    cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
> +    atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
>      return tb;
>  }
>  
> @@ -333,7 +338,7 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu,
>         is executed. */
>      cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
>      tb_lock();
> -    tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
> +    tb = atomic_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]);
>      if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
>                   tb->flags != flags)) {
>          tb = tb_find_slow(cpu, pc, cs_base, flags);
> diff --git a/translate-all.c b/translate-all.c
> index eaa95e4..96efe48 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -1004,11 +1004,16 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
>          invalidate_page_bitmap(p);
>      }
>  
> +    /* Ensure that we won't find the TB in the shared hash table
> +     * if we can't see it in CPU's local cache.
> +     * Pairs with smp_rmb() in tb_find_slow(). */
> +    smp_wmb();
> +
>      /* remove the TB from the hash list */
>      h = tb_jmp_cache_hash_func(tb->pc);
>      CPU_FOREACH(cpu) {
> -        if (cpu->tb_jmp_cache[h] == tb) {
> -            cpu->tb_jmp_cache[h] = NULL;
> +        if (atomic_read(&cpu->tb_jmp_cache[h]) == tb) {

I think this can be a simple read because we can't race here with
setting 'tb_jmp_cache' in tb_find_slow().

Kind regards,
Sergey

> +            atomic_set(&cpu->tb_jmp_cache[h], NULL);
>          }
>      }
>
diff mbox

Patch

diff --git a/cpu-exec.c b/cpu-exec.c
index b840e1d..10ce1cb 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -285,6 +285,11 @@  static TranslationBlock *tb_find_slow(CPUState *cpu,
 {
     TranslationBlock *tb;
 
+    /* Ensure that we won't find a TB in the shared hash table
+     * if it is being invalidated by some other thread.
+     * Otherwise we'd put it back to CPU's local cache.
+     * Pairs with smp_wmb() in tb_phys_invalidate(). */
+    smp_rmb();
     tb = tb_find_physical(cpu, pc, cs_base, flags);
     if (tb) {
         goto found;
@@ -315,7 +320,7 @@  static TranslationBlock *tb_find_slow(CPUState *cpu,
 
 found:
     /* we add the TB in the virtual pc hash table */
-    cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
+    atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
     return tb;
 }
 
@@ -333,7 +338,7 @@  static inline TranslationBlock *tb_find_fast(CPUState *cpu,
        is executed. */
     cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
     tb_lock();
-    tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
+    tb = atomic_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]);
     if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
                  tb->flags != flags)) {
         tb = tb_find_slow(cpu, pc, cs_base, flags);
diff --git a/translate-all.c b/translate-all.c
index eaa95e4..96efe48 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -1004,11 +1004,16 @@  void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
         invalidate_page_bitmap(p);
     }
 
+    /* Ensure that we won't find the TB in the shared hash table
+     * if we can't see it in CPU's local cache.
+     * Pairs with smp_rmb() in tb_find_slow(). */
+    smp_wmb();
+
     /* remove the TB from the hash list */
     h = tb_jmp_cache_hash_func(tb->pc);
     CPU_FOREACH(cpu) {
-        if (cpu->tb_jmp_cache[h] == tb) {
-            cpu->tb_jmp_cache[h] = NULL;
+        if (atomic_read(&cpu->tb_jmp_cache[h]) == tb) {
+            atomic_set(&cpu->tb_jmp_cache[h], NULL);
         }
     }