diff mbox

[RFC,V6,14/18] add a callback when tb_invalidate is called.

Message ID 1435330053-18733-15-git-send-email-fred.konrad@greensocs.com
State New
Headers show

Commit Message

fred.konrad@greensocs.com June 26, 2015, 2:47 p.m. UTC
From: KONRAD Frederic <fred.konrad@greensocs.com>

Instead of doing the jump cache invalidation directly in tb_invalidate delay it
after the exit so we don't have an other CPU trying to execute the code being
invalidated.

Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
---
 translate-all.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 59 insertions(+), 2 deletions(-)

Comments

Paolo Bonzini June 26, 2015, 4:20 p.m. UTC | #1
On 26/06/2015 16:47, fred.konrad@greensocs.com wrote:
> From: KONRAD Frederic <fred.konrad@greensocs.com>
> 
> Instead of doing the jump cache invalidation directly in tb_invalidate delay it
> after the exit so we don't have an other CPU trying to execute the code being
> invalidated.
> 
> Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
> ---
>  translate-all.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 59 insertions(+), 2 deletions(-)
> 
> diff --git a/translate-all.c b/translate-all.c
> index ade2269..468648d 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -61,6 +61,7 @@
>  #include "translate-all.h"
>  #include "qemu/bitmap.h"
>  #include "qemu/timer.h"
> +#include "sysemu/cpus.h"
>  
>  //#define DEBUG_TB_INVALIDATE
>  //#define DEBUG_FLUSH
> @@ -966,14 +967,58 @@ static inline void tb_reset_jump(TranslationBlock *tb, int n)
>      tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
>  }
>  
> +struct CPUDiscardTBParams {
> +    CPUState *cpu;
> +    TranslationBlock *tb;
> +};
> +
> +static void cpu_discard_tb_from_jmp_cache(void *opaque)
> +{
> +    unsigned int h;
> +    struct CPUDiscardTBParams *params = opaque;
> +
> +    h = tb_jmp_cache_hash_func(params->tb->pc);
> +    if (params->cpu->tb_jmp_cache[h] == params->tb) {
> +        params->cpu->tb_jmp_cache[h] = NULL;
> +    }

It is a bit more tricky, but I think you can avoid async_run_on_cpu by
doing this:

1) introduce a QemuSeqLock in TBContext, e.g. invalidate_seqlock.

2) wrap this "if" with seqlock_write_lock/unlock

3) in cpu-exec.c do this:

     /* we add the TB in the virtual pc hash table */
+    idx = seqlock_read_begin(&tcg_ctx.tb_ctx.invalidate_seqlock);
     cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
+    if (seqlock_read_retry(&tcg_ctx.tb_ctx.invalidate_seqlock)) {
+        /* Another CPU invalidated a tb in the meanwhile.  We do not
+         * know if it's this one, but play it safe and avoid caching
+         * it.
+         */
+        cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = NULL;
+    }

> +    /* suppress this TB from the two jump lists */
> +    tb_jmp_remove(tb, 0);
> +    tb_jmp_remove(tb, 1);

If you do the above synchronously, this part doesn't need to be deferred
either.

Then, immediately after the two tb_jmp_remove calls you can also check
whether "(tb->jmp_first & 3) == 2": if so, the expensive expensive
async_run_safe_work_on_cpu can be skipped.

Paolo

> +#endif /* MTTCG */
>  
>      tcg_ctx.tb_ctx.tb_phys_invalidate_count++;
>      tb_unlock();
>
fred.konrad@greensocs.com June 26, 2015, 4:40 p.m. UTC | #2
On 26/06/2015 18:20, Paolo Bonzini wrote:
>
> On 26/06/2015 16:47, fred.konrad@greensocs.com wrote:
>> From: KONRAD Frederic <fred.konrad@greensocs.com>
>>
>> Instead of doing the jump cache invalidation directly in tb_invalidate delay it
>> after the exit so we don't have an other CPU trying to execute the code being
>> invalidated.
>>
>> Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
>> ---
>>   translate-all.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>>   1 file changed, 59 insertions(+), 2 deletions(-)
>>
>> diff --git a/translate-all.c b/translate-all.c
>> index ade2269..468648d 100644
>> --- a/translate-all.c
>> +++ b/translate-all.c
>> @@ -61,6 +61,7 @@
>>   #include "translate-all.h"
>>   #include "qemu/bitmap.h"
>>   #include "qemu/timer.h"
>> +#include "sysemu/cpus.h"
>>   
>>   //#define DEBUG_TB_INVALIDATE
>>   //#define DEBUG_FLUSH
>> @@ -966,14 +967,58 @@ static inline void tb_reset_jump(TranslationBlock *tb, int n)
>>       tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
>>   }
>>   
>> +struct CPUDiscardTBParams {
>> +    CPUState *cpu;
>> +    TranslationBlock *tb;
>> +};
>> +
>> +static void cpu_discard_tb_from_jmp_cache(void *opaque)
>> +{
>> +    unsigned int h;
>> +    struct CPUDiscardTBParams *params = opaque;
>> +
>> +    h = tb_jmp_cache_hash_func(params->tb->pc);
>> +    if (params->cpu->tb_jmp_cache[h] == params->tb) {
>> +        params->cpu->tb_jmp_cache[h] = NULL;
>> +    }
> It is a bit more tricky, but I think you can avoid async_run_on_cpu by
> doing this:
>
> 1) introduce a QemuSeqLock in TBContext, e.g. invalidate_seqlock.
>
> 2) wrap this "if" with seqlock_write_lock/unlock
>
> 3) in cpu-exec.c do this:
>
>       /* we add the TB in the virtual pc hash table */
> +    idx = seqlock_read_begin(&tcg_ctx.tb_ctx.invalidate_seqlock);
>       cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
> +    if (seqlock_read_retry(&tcg_ctx.tb_ctx.invalidate_seqlock)) {
> +        /* Another CPU invalidated a tb in the meanwhile.  We do not
> +         * know if it's this one, but play it safe and avoid caching
> +         * it.
> +         */
> +        cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = NULL;
> +    }
>
>> +    /* suppress this TB from the two jump lists */
>> +    tb_jmp_remove(tb, 0);
>> +    tb_jmp_remove(tb, 1);
> If you do the above synchronously, this part doesn't need to be deferred
> either.
>
> Then, immediately after the two tb_jmp_remove calls you can also check
> whether "(tb->jmp_first & 3) == 2": if so, the expensive expensive
> async_run_safe_work_on_cpu can be skipped.
>
> Paolo
Ok seems tricky :) I'll take a look at this.

Fred

>> +#endif /* MTTCG */
>>   
>>       tcg_ctx.tb_ctx.tb_phys_invalidate_count++;
>>       tb_unlock();
>>
Alex Bennée July 7, 2015, 3:32 p.m. UTC | #3
fred.konrad@greensocs.com writes:

> From: KONRAD Frederic <fred.konrad@greensocs.com>
>
> Instead of doing the jump cache invalidation directly in tb_invalidate delay it
> after the exit so we don't have an other CPU trying to execute the code being
> invalidated.
>
> Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
> ---
>  translate-all.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 59 insertions(+), 2 deletions(-)
>
> diff --git a/translate-all.c b/translate-all.c
> index ade2269..468648d 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -61,6 +61,7 @@
>  #include "translate-all.h"
>  #include "qemu/bitmap.h"
>  #include "qemu/timer.h"
> +#include "sysemu/cpus.h"
>  
>  //#define DEBUG_TB_INVALIDATE
>  //#define DEBUG_FLUSH
> @@ -966,14 +967,58 @@ static inline void tb_reset_jump(TranslationBlock *tb, int n)
>      tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
>  }
>  
> +struct CPUDiscardTBParams {
> +    CPUState *cpu;
> +    TranslationBlock *tb;
> +};
> +
> +static void cpu_discard_tb_from_jmp_cache(void *opaque)
> +{
> +    unsigned int h;
> +    struct CPUDiscardTBParams *params = opaque;
> +
> +    h = tb_jmp_cache_hash_func(params->tb->pc);
> +    if (params->cpu->tb_jmp_cache[h] == params->tb) {
> +        params->cpu->tb_jmp_cache[h] = NULL;
> +    }
> +
> +    g_free(opaque);
> +}
> +
> +static void tb_invalidate_jmp_remove(void *opaque)
> +{
> +    TranslationBlock *tb = opaque;
> +    TranslationBlock *tb1, *tb2;
> +    unsigned int n1;
> +
> +    /* suppress this TB from the two jump lists */
> +    tb_jmp_remove(tb, 0);
> +    tb_jmp_remove(tb, 1);
> +
> +    /* suppress any remaining jumps to this TB */
> +    tb1 = tb->jmp_first;
> +    for (;;) {
> +        n1 = (uintptr_t)tb1 & 3;
> +        if (n1 == 2) {
> +            break;
> +        }
> +        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
> +        tb2 = tb1->jmp_next[n1];
> +        tb_reset_jump(tb1, n1);
> +        tb1->jmp_next[n1] = NULL;
> +        tb1 = tb2;
> +    }
> +    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
> +}
> +
>  /* invalidate one TB */
>  void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
>  {
>      CPUState *cpu;
>      PageDesc *p;
> -    unsigned int h, n1;
> +    unsigned int h;
>      tb_page_addr_t phys_pc;
> -    TranslationBlock *tb1, *tb2;
> +    struct CPUDiscardTBParams *params;
>  
>      tb_lock();
>  
> @@ -996,6 +1041,9 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
>  
>      tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
>  
> +#if 0 /*MTTCG*/
> +    TranslationBlock *tb1, *tb2;
> +    unsigned int n1;

We may as well bite the bullet and get some build logic in to
conditionally build MTTCG (with the aim they will all be eventually).

>      /* remove the TB from the hash list */
>      h = tb_jmp_cache_hash_func(tb->pc);
>      CPU_FOREACH(cpu) {
> @@ -1022,6 +1070,15 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
>          tb1 = tb2;
>      }
>      tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
> +#else
> +    CPU_FOREACH(cpu) {
> +        params = g_malloc(sizeof(struct CPUDiscardTBParams));
> +        params->cpu = cpu;
> +        params->tb = tb;
> +        async_run_on_cpu(cpu, cpu_discard_tb_from_jmp_cache, params);
> +    }
> +    async_run_safe_work_on_cpu(first_cpu, tb_invalidate_jmp_remove, tb);
> +#endif /* MTTCG */
>  
>      tcg_ctx.tb_ctx.tb_phys_invalidate_count++;
>      tb_unlock();
diff mbox

Patch

diff --git a/translate-all.c b/translate-all.c
index ade2269..468648d 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -61,6 +61,7 @@ 
 #include "translate-all.h"
 #include "qemu/bitmap.h"
 #include "qemu/timer.h"
+#include "sysemu/cpus.h"
 
 //#define DEBUG_TB_INVALIDATE
 //#define DEBUG_FLUSH
@@ -966,14 +967,58 @@  static inline void tb_reset_jump(TranslationBlock *tb, int n)
     tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
 }
 
+struct CPUDiscardTBParams {
+    CPUState *cpu;
+    TranslationBlock *tb;
+};
+
+static void cpu_discard_tb_from_jmp_cache(void *opaque)
+{
+    unsigned int h;
+    struct CPUDiscardTBParams *params = opaque;
+
+    h = tb_jmp_cache_hash_func(params->tb->pc);
+    if (params->cpu->tb_jmp_cache[h] == params->tb) {
+        params->cpu->tb_jmp_cache[h] = NULL;
+    }
+
+    g_free(opaque);
+}
+
+static void tb_invalidate_jmp_remove(void *opaque)
+{
+    TranslationBlock *tb = opaque;
+    TranslationBlock *tb1, *tb2;
+    unsigned int n1;
+
+    /* suppress this TB from the two jump lists */
+    tb_jmp_remove(tb, 0);
+    tb_jmp_remove(tb, 1);
+
+    /* suppress any remaining jumps to this TB */
+    tb1 = tb->jmp_first;
+    for (;;) {
+        n1 = (uintptr_t)tb1 & 3;
+        if (n1 == 2) {
+            break;
+        }
+        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
+        tb2 = tb1->jmp_next[n1];
+        tb_reset_jump(tb1, n1);
+        tb1->jmp_next[n1] = NULL;
+        tb1 = tb2;
+    }
+    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
+}
+
 /* invalidate one TB */
 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
 {
     CPUState *cpu;
     PageDesc *p;
-    unsigned int h, n1;
+    unsigned int h;
     tb_page_addr_t phys_pc;
-    TranslationBlock *tb1, *tb2;
+    struct CPUDiscardTBParams *params;
 
     tb_lock();
 
@@ -996,6 +1041,9 @@  void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
 
     tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
 
+#if 0 /*MTTCG*/
+    TranslationBlock *tb1, *tb2;
+    unsigned int n1;
     /* remove the TB from the hash list */
     h = tb_jmp_cache_hash_func(tb->pc);
     CPU_FOREACH(cpu) {
@@ -1022,6 +1070,15 @@  void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
         tb1 = tb2;
     }
     tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
+#else
+    CPU_FOREACH(cpu) {
+        params = g_malloc(sizeof(struct CPUDiscardTBParams));
+        params->cpu = cpu;
+        params->tb = tb;
+        async_run_on_cpu(cpu, cpu_discard_tb_from_jmp_cache, params);
+    }
+    async_run_safe_work_on_cpu(first_cpu, tb_invalidate_jmp_remove, tb);
+#endif /* MTTCG */
 
     tcg_ctx.tb_ctx.tb_phys_invalidate_count++;
     tb_unlock();