diff mbox

[RFC,V7,14/19] cpu: introduce tlb_flush*_all.

Message ID 1439220437-23957-15-git-send-email-fred.konrad@greensocs.com
State New
Headers show

Commit Message

fred.konrad@greensocs.com Aug. 10, 2015, 3:27 p.m. UTC
From: KONRAD Frederic <fred.konrad@greensocs.com>

Some architectures allow to flush the tlb of other VCPUs. This is not a problem
when we have only one thread for all VCPUs but it definitely needs to be an
asynchronous work when we are in true multithreaded work.

TODO: Some test case, I fear some bad results in case a VCPUs execute a barrier
      or something like that.

Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
---
 cputlb.c                | 76 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/exec/exec-all.h |  2 ++
 2 files changed, 78 insertions(+)

Comments

Paolo Bonzini Aug. 10, 2015, 3:54 p.m. UTC | #1
On 10/08/2015 17:27, fred.konrad@greensocs.com wrote:
> From: KONRAD Frederic <fred.konrad@greensocs.com>
> 
> Some architectures allow to flush the tlb of other VCPUs. This is not a problem
> when we have only one thread for all VCPUs but it definitely needs to be an
> asynchronous work when we are in true multithreaded work.
> 
> TODO: Some test case, I fear some bad results in case a VCPUs execute a barrier
>       or something like that.
> 
> Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
> ---
>  cputlb.c                | 76 +++++++++++++++++++++++++++++++++++++++++++++++++
>  include/exec/exec-all.h |  2 ++
>  2 files changed, 78 insertions(+)

I still believe this should be a target-specific change.  This would
also make it easier to do the remote TLB flush synchronously, as is the
case on ARM (if I understand correctly).

Paolo

> diff --git a/cputlb.c b/cputlb.c
> index 79fff1c..e5853fd 100644
> --- a/cputlb.c
> +++ b/cputlb.c
> @@ -72,6 +72,45 @@ void tlb_flush(CPUState *cpu, int flush_global)
>      tlb_flush_count++;
>  }
>  
> +struct TLBFlushParams {
> +    CPUState *cpu;
> +    int flush_global;
> +};
> +
> +static void tlb_flush_async_work(void *opaque)
> +{
> +    struct TLBFlushParams *params = opaque;
> +
> +    tlb_flush(params->cpu, params->flush_global);
> +    g_free(params);
> +}
> +
> +void tlb_flush_all(int flush_global)
> +{
> +    CPUState *cpu;
> +    struct TLBFlushParams *params;
> +
> +#if 0 /* MTTCG */
> +    CPU_FOREACH(cpu) {
> +        tlb_flush(cpu, flush_global);
> +    }
> +#else
> +    CPU_FOREACH(cpu) {
> +        if (qemu_cpu_is_self(cpu)) {
> +            /* async_run_on_cpu handle this case but this just avoid a malloc
> +             * here.
> +             */
> +            tlb_flush(cpu, flush_global);
> +        } else {
> +            params = g_malloc(sizeof(struct TLBFlushParams));
> +            params->cpu = cpu;
> +            params->flush_global = flush_global;
> +            async_run_on_cpu(cpu, tlb_flush_async_work, params);
> +        }
> +    }
> +#endif /* MTTCG */
> +}
> +
>  static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
>  {
>      if (addr == (tlb_entry->addr_read &
> @@ -124,6 +163,43 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
>      tb_flush_jmp_cache(cpu, addr);
>  }
>  
> +struct TLBFlushPageParams {
> +    CPUState *cpu;
> +    target_ulong addr;
> +};
> +
> +static void tlb_flush_page_async_work(void *opaque)
> +{
> +    struct TLBFlushPageParams *params = opaque;
> +
> +    tlb_flush_page(params->cpu, params->addr);
> +    g_free(params);
> +}
> +
> +void tlb_flush_page_all(target_ulong addr)
> +{
> +    CPUState *cpu;
> +    struct TLBFlushPageParams *params;
> +
> +    CPU_FOREACH(cpu) {
> +#if 0 /* !MTTCG */
> +        tlb_flush_page(cpu, addr);
> +#else
> +        if (qemu_cpu_is_self(cpu)) {
> +            /* async_run_on_cpu handle this case but this just avoid a malloc
> +             * here.
> +             */
> +            tlb_flush_page(cpu, addr);
> +        } else {
> +            params = g_malloc(sizeof(struct TLBFlushPageParams));
> +            params->cpu = cpu;
> +            params->addr = addr;
> +            async_run_on_cpu(cpu, tlb_flush_page_async_work, params);
> +        }
> +#endif /* MTTCG */
> +    }
> +}
> +
>  /* update the TLBs so that writes to code in the virtual page 'addr'
>     can be detected */
>  void tlb_protect_code(ram_addr_t ram_addr)
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index 9f1c1cb..e9512df 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -97,7 +97,9 @@ bool qemu_in_vcpu_thread(void);
>  void cpu_reload_memory_map(CPUState *cpu);
>  void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as);
>  /* cputlb.c */
> +void tlb_flush_page_all(target_ulong addr);
>  void tlb_flush_page(CPUState *cpu, target_ulong addr);
> +void tlb_flush_all(int flush_global);
>  void tlb_flush(CPUState *cpu, int flush_global);
>  void tlb_set_page(CPUState *cpu, target_ulong vaddr,
>                    hwaddr paddr, int prot,
>
Peter Maydell Aug. 10, 2015, 4 p.m. UTC | #2
On 10 August 2015 at 16:54, Paolo Bonzini <pbonzini@redhat.com> wrote:
> On 10/08/2015 17:27, fred.konrad@greensocs.com wrote:
>> From: KONRAD Frederic <fred.konrad@greensocs.com>
>>
>> Some architectures allow to flush the tlb of other VCPUs. This is not a problem
>> when we have only one thread for all VCPUs but it definitely needs to be an
>> asynchronous work when we are in true multithreaded work.
>>
>> TODO: Some test case, I fear some bad results in case a VCPUs execute a barrier
>>       or something like that.
>>
>> Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
>> ---
>>  cputlb.c                | 76 +++++++++++++++++++++++++++++++++++++++++++++++++
>>  include/exec/exec-all.h |  2 ++
>>  2 files changed, 78 insertions(+)
>
> I still believe this should be a target-specific change.  This would
> also make it easier to do the remote TLB flush synchronously, as is the
> case on ARM (if I understand correctly).

ARM TLB flushes have to complete by the next barrier instruction
(or equivalent thing); so they're asynchronous but with a guest-controlled
synchronization point.

Also, compare the series I posted recently for adding missing
TLB operations:
https://lists.gnu.org/archive/html/qemu-devel/2015-08/msg00945.html
which adds support for flush-specific-mmuidx operations, which would
increase the number of primitives you're trying to support here.
That might argue for making this target-specific.

thanks
-- PMM
diff mbox

Patch

diff --git a/cputlb.c b/cputlb.c
index 79fff1c..e5853fd 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -72,6 +72,45 @@  void tlb_flush(CPUState *cpu, int flush_global)
     tlb_flush_count++;
 }
 
+struct TLBFlushParams {
+    CPUState *cpu;
+    int flush_global;
+};
+
+static void tlb_flush_async_work(void *opaque)
+{
+    struct TLBFlushParams *params = opaque;
+
+    tlb_flush(params->cpu, params->flush_global);
+    g_free(params);
+}
+
+void tlb_flush_all(int flush_global)
+{
+    CPUState *cpu;
+    struct TLBFlushParams *params;
+
+#if 0 /* MTTCG */
+    CPU_FOREACH(cpu) {
+        tlb_flush(cpu, flush_global);
+    }
+#else
+    CPU_FOREACH(cpu) {
+        if (qemu_cpu_is_self(cpu)) {
+            /* async_run_on_cpu handle this case but this just avoid a malloc
+             * here.
+             */
+            tlb_flush(cpu, flush_global);
+        } else {
+            params = g_malloc(sizeof(struct TLBFlushParams));
+            params->cpu = cpu;
+            params->flush_global = flush_global;
+            async_run_on_cpu(cpu, tlb_flush_async_work, params);
+        }
+    }
+#endif /* MTTCG */
+}
+
 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
 {
     if (addr == (tlb_entry->addr_read &
@@ -124,6 +163,43 @@  void tlb_flush_page(CPUState *cpu, target_ulong addr)
     tb_flush_jmp_cache(cpu, addr);
 }
 
+struct TLBFlushPageParams {
+    CPUState *cpu;
+    target_ulong addr;
+};
+
+static void tlb_flush_page_async_work(void *opaque)
+{
+    struct TLBFlushPageParams *params = opaque;
+
+    tlb_flush_page(params->cpu, params->addr);
+    g_free(params);
+}
+
+void tlb_flush_page_all(target_ulong addr)
+{
+    CPUState *cpu;
+    struct TLBFlushPageParams *params;
+
+    CPU_FOREACH(cpu) {
+#if 0 /* !MTTCG */
+        tlb_flush_page(cpu, addr);
+#else
+        if (qemu_cpu_is_self(cpu)) {
+            /* async_run_on_cpu handle this case but this just avoid a malloc
+             * here.
+             */
+            tlb_flush_page(cpu, addr);
+        } else {
+            params = g_malloc(sizeof(struct TLBFlushPageParams));
+            params->cpu = cpu;
+            params->addr = addr;
+            async_run_on_cpu(cpu, tlb_flush_page_async_work, params);
+        }
+#endif /* MTTCG */
+    }
+}
+
 /* update the TLBs so that writes to code in the virtual page 'addr'
    can be detected */
 void tlb_protect_code(ram_addr_t ram_addr)
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 9f1c1cb..e9512df 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -97,7 +97,9 @@  bool qemu_in_vcpu_thread(void);
 void cpu_reload_memory_map(CPUState *cpu);
 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as);
 /* cputlb.c */
+void tlb_flush_page_all(target_ulong addr);
 void tlb_flush_page(CPUState *cpu, target_ulong addr);
+void tlb_flush_all(int flush_global);
 void tlb_flush(CPUState *cpu, int flush_global);
 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
                   hwaddr paddr, int prot,