diff mbox

[v2,3/3] target-ppc: tlbie should have global effect

Message ID 1473426865-14191-3-git-send-email-nikunj@linux.vnet.ibm.com
State New
Headers show

Commit Message

Nikunj A Dadhania Sept. 9, 2016, 1:14 p.m. UTC
tlbie (H_REMOVE, H_PROTECT and H_BULK_REMOVE for pseries) should have a
global effect.

Introduces TLB_NEED_GLOBAL_FLUSH flag. During delayed flush, once taking
care of local flush, check broadcast flush(ptesync, tlbsync, etc) is
needed. Depending on the bitmask state of the tlb_need_flush, tlb is
flushed from other cpus if needed and the flags are cleared.

Suggested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 hw/ppc/spapr_hcall.c     |  2 ++
 target-ppc/cpu.h         |  1 +
 target-ppc/helper_regs.h | 19 +++++++++++++++++++
 target-ppc/mmu-hash64.c  |  2 +-
 target-ppc/mmu_helper.c  | 10 +++++++---
 target-ppc/translate.c   |  6 ++++++
 6 files changed, 36 insertions(+), 4 deletions(-)

Comments

Benjamin Herrenschmidt Sept. 9, 2016, 11:03 p.m. UTC | #1
On Fri, 2016-09-09 at 18:44 +0530, Nikunj A Dadhania wrote:
> +static inline void tlb_clear_flag(CPUState *cs)

> +{

> +    PowerPCCPU *cpu = POWERPC_CPU(cs);

> +    CPUPPCState *env = &cpu->env;

> +

> +    env->tlb_need_flush = 0;

> +}


What is the point of making this a separate function ?

Also I'm not 100% certain about the correctness of clearing
TLB_NEED_GLOBAL_FLUSH on the "other" guy.

We could have the situation where:

	cpu 1:					cpu 2:
	sets both				..
	isync (clears local flush)		..
	<insert new translation>
	..					set both
	..					..
	..					..
	ptesync (clears global flush)		.. (both gets cleared)

Now here, you can see that cpu2 never does a global flush and so the
new translation inserted by cpu 1 is not cleared while architecturally
it should be.

That being said, I doubt the above scenario can happen in practice,
but I think it's safer if you only clear the local bit on the "other"
CPUs.

>  static inline void check_tlb_flush(CPUPPCState *env, uint32_t

> global)

>  {

>      CPUState *cs = CPU(ppc_env_get_cpu(env));

> @@ -161,6 +169,17 @@ static inline void check_tlb_flush(CPUPPCState

> *env, uint32_t global)

>          tlb_flush(cs, 1);

>          env->tlb_need_flush &= ~TLB_NEED_LOCAL_FLUSH;

>      }

> +

> +    if (global && (env->tlb_need_flush & TLB_NEED_GLOBAL_FLUSH)) {

> +        CPUState *other_cs;

> +        CPU_FOREACH(other_cs) {

> +            if (other_cs != cs) {

> +                tlb_clear_flag(other_cs);

> +                tlb_flush(other_cs, 1);

> +            }

> +        }

> +        env->tlb_need_flush &= ~TLB_NEED_GLOBAL_FLUSH;

> +    }

>  }

>  #else
David Gibson Sept. 12, 2016, 3:02 a.m. UTC | #2
On Sat, Sep 10, 2016 at 09:03:56AM +1000, Benjamin Herrenschmidt wrote:
> On Fri, 2016-09-09 at 18:44 +0530, Nikunj A Dadhania wrote:
> > +static inline void tlb_clear_flag(CPUState *cs)
> > +{
> > +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> > +    CPUPPCState *env = &cpu->env;
> > +
> > +    env->tlb_need_flush = 0;
> > +}
> 
> What is the point of making this a separate function ?
> 
> Also I'm not 100% certain about the correctness of clearing
> TLB_NEED_GLOBAL_FLUSH on the "other" guy.
> 
> We could have the situation where:
> 
> 	cpu 1:					cpu 2:
> 	sets both				..
> 	isync (clears local flush)		..
> 	<insert new translation>
> 	..					set both
> 	..					..
> 	..					..
> 	ptesync (clears global flush)		.. (both gets cleared)
> 
> Now here, you can see that cpu2 never does a global flush and so the
> new translation inserted by cpu 1 is not cleared while architecturally
> it should be.
> 
> That being said, I doubt the above scenario can happen in practice,
> but I think it's safer if you only clear the local bit on the "other"
> CPUs.

I'll wait for a respin addressing these comments.

Please also add a cover letter on the next version.

> 
> >  static inline void check_tlb_flush(CPUPPCState *env, uint32_t
> > global)
> >  {
> >      CPUState *cs = CPU(ppc_env_get_cpu(env));
> > @@ -161,6 +169,17 @@ static inline void check_tlb_flush(CPUPPCState
> > *env, uint32_t global)
> >          tlb_flush(cs, 1);
> >          env->tlb_need_flush &= ~TLB_NEED_LOCAL_FLUSH;
> >      }
> > +
> > +    if (global && (env->tlb_need_flush & TLB_NEED_GLOBAL_FLUSH)) {
> > +        CPUState *other_cs;
> > +        CPU_FOREACH(other_cs) {
> > +            if (other_cs != cs) {
> > +                tlb_clear_flag(other_cs);
> > +                tlb_flush(other_cs, 1);
> > +            }
> > +        }
> > +        env->tlb_need_flush &= ~TLB_NEED_GLOBAL_FLUSH;
> > +    }
> >  }
> >  #else
>
Nikunj A Dadhania Sept. 12, 2016, 4:43 a.m. UTC | #3
Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:

> On Fri, 2016-09-09 at 18:44 +0530, Nikunj A Dadhania wrote:
>> +static inline void tlb_clear_flag(CPUState *cs)
>> +{
>> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
>> +    CPUPPCState *env = &cpu->env;
>> +
>> +    env->tlb_need_flush = 0;
>> +}
>
> What is the point of making this a separate function ?

When I wrote, i thought this would be used from various places. But dont
think its a requirement. Will inline it.

> Also I'm not 100% certain about the correctness of clearing
> TLB_NEED_GLOBAL_FLUSH on the "other" guy.
>
> We could have the situation where:
>
> 	cpu 1:					cpu 2:
> 	sets both				..
> 	isync (clears local flush)		..
> 	<insert new translation>
> 	..					set both
> 	..					..
> 	..					..
> 	ptesync (clears global flush)		.. (both gets cleared)
>
> Now here, you can see that cpu2 never does a global flush and so the
> new translation inserted by cpu 1 is not cleared while architecturally
> it should be.

Right, will only clear the local flag.

> That being said, I doubt the above scenario can happen in practice,
> but I think it's safer if you only clear the local bit on the "other"
> CPUs.

Regards,
Nikunj
diff mbox

Patch

diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index ef12ea0..6144e17 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -319,6 +319,8 @@  static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     ppc_hash64_store_hpte(cpu, pte_index,
                           (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
     ppc_hash64_tlb_flush_hpte(cpu, pte_index, v, r);
+    /* Flush the tlb */
+    check_tlb_flush(env, 1);
     /* Don't need a memory barrier, due to qemu's global lock */
     ppc_hash64_store_hpte(cpu, pte_index, v | HPTE64_V_HPTE_DIRTY, r);
     return H_SUCCESS;
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 71111dc..50fe0f5 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1010,6 +1010,7 @@  struct CPUPPCState {
     bool kvm_sw_tlb;  /* non-zero if KVM SW TLB API is active                */
     uint32_t tlb_need_flush; /* Delayed flush needed */
 #define TLB_NEED_LOCAL_FLUSH   0x1
+#define TLB_NEED_GLOBAL_FLUSH  0x2
 #endif
 
     /* Other registers */
diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
index bcf65ce..24872ca 100644
--- a/target-ppc/helper_regs.h
+++ b/target-ppc/helper_regs.h
@@ -154,6 +154,14 @@  static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
 }
 
 #if !defined(CONFIG_USER_ONLY)
+static inline void tlb_clear_flag(CPUState *cs)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+
+    env->tlb_need_flush = 0;
+}
+
 static inline void check_tlb_flush(CPUPPCState *env, uint32_t global)
 {
     CPUState *cs = CPU(ppc_env_get_cpu(env));
@@ -161,6 +169,17 @@  static inline void check_tlb_flush(CPUPPCState *env, uint32_t global)
         tlb_flush(cs, 1);
         env->tlb_need_flush &= ~TLB_NEED_LOCAL_FLUSH;
     }
+
+    if (global && (env->tlb_need_flush & TLB_NEED_GLOBAL_FLUSH)) {
+        CPUState *other_cs;
+        CPU_FOREACH(other_cs) {
+            if (other_cs != cs) {
+                tlb_clear_flag(other_cs);
+                tlb_flush(other_cs, 1);
+            }
+        }
+        env->tlb_need_flush &= ~TLB_NEED_GLOBAL_FLUSH;
+    }
 }
 #else
 static inline void check_tlb_flush(CPUPPCState *env, uint32_t global) { }
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index 1f52b64..fdb7a78 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -912,7 +912,7 @@  void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
      * invalidate, and we still don't have a tlb_flush_mask(env, n,
      * mask) in QEMU, we just invalidate all TLBs
      */
-    tlb_flush(CPU(cpu), 1);
+    cpu->env.tlb_need_flush = TLB_NEED_GLOBAL_FLUSH | TLB_NEED_LOCAL_FLUSH;
 }
 
 void ppc_hash64_update_rmls(CPUPPCState *env)
diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index bf9f329..1dd057a 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -2757,7 +2757,7 @@  static inline void booke206_invalidate_ea_tlb(CPUPPCState *env, int tlbn,
 
 void helper_booke206_tlbivax(CPUPPCState *env, target_ulong address)
 {
-    PowerPCCPU *cpu = ppc_env_get_cpu(env);
+    CPUState *cs;
 
     if (address & 0x4) {
         /* flush all entries */
@@ -2774,11 +2774,15 @@  void helper_booke206_tlbivax(CPUPPCState *env, target_ulong address)
     if (address & 0x8) {
         /* flush TLB1 entries */
         booke206_invalidate_ea_tlb(env, 1, address);
-        tlb_flush(CPU(cpu), 1);
+        CPU_FOREACH(cs) {
+            tlb_flush(cs, 1);
+        }
     } else {
         /* flush TLB0 entries */
         booke206_invalidate_ea_tlb(env, 0, address);
-        tlb_flush_page(CPU(cpu), address & MAS2_EPN_MASK);
+        CPU_FOREACH(cs) {
+            tlb_flush_page(cs, address & MAS2_EPN_MASK);
+        }
     }
 }
 
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 77bb312..2aae43b 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -4443,6 +4443,7 @@  static void gen_tlbie(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     GEN_PRIV;
 #else
+    TCGv_i32 t1;
     CHK_HV;
 
     if (NARROW_MODE(ctx)) {
@@ -4453,6 +4454,11 @@  static void gen_tlbie(DisasContext *ctx)
     } else {
         gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
     }
+    t1 = tcg_temp_new_i32();
+    tcg_gen_ld_i32(t1, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
+    tcg_gen_ori_i32(t1, t1, TLB_NEED_GLOBAL_FLUSH);
+    tcg_gen_st_i32(t1, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
+    tcg_temp_free_i32(t1);
 #endif /* defined(CONFIG_USER_ONLY) */
 }