diff mbox

[v4,3/3] target-ppc: tlbie/tlbivax should have global effect

Message ID 1473832442-17762-4-git-send-email-nikunj@linux.vnet.ibm.com
State New
Headers show

Commit Message

Nikunj A Dadhania Sept. 14, 2016, 5:54 a.m. UTC
tlbie (BookS) and tlbivax (BookE) plus the H_CALLs(pseries) should have
a global effect.

Introduces TLB_NEED_GLOBAL_FLUSH flag. During lazy tlb flush, after
taking care of pending local flushes, check broadcast flush(at context
synchronizing event ptesync/tlbsync, etc) is needed. Depending on the
bitmask state of the tlb_need_flush, tlb is flushed from other cpus if
needed and the flags are cleared.

Suggested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 hw/ppc/spapr_hcall.c     |  2 ++
 target-ppc/cpu.h         |  1 +
 target-ppc/helper_regs.h | 17 +++++++++++++++++
 target-ppc/mmu-hash64.c  |  2 +-
 target-ppc/mmu_helper.c  | 10 +++++++---
 target-ppc/translate.c   |  6 ++++++
 6 files changed, 34 insertions(+), 4 deletions(-)

Comments

David Gibson Sept. 15, 2016, 12:25 a.m. UTC | #1
On Wed, Sep 14, 2016 at 11:24:02AM +0530, Nikunj A Dadhania wrote:
> tlbie (BookS) and tlbivax (BookE) plus the H_CALLs(pseries) should have
> a global effect.
> 
> Introduces TLB_NEED_GLOBAL_FLUSH flag. During lazy tlb flush, after
> taking care of pending local flushes, check broadcast flush(at context
> synchronizing event ptesync/tlbsync, etc) is needed. Depending on the
> bitmask state of the tlb_need_flush, tlb is flushed from other cpus if
> needed and the flags are cleared.
> 
> Suggested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr_hcall.c     |  2 ++
>  target-ppc/cpu.h         |  1 +
>  target-ppc/helper_regs.h | 17 +++++++++++++++++
>  target-ppc/mmu-hash64.c  |  2 +-
>  target-ppc/mmu_helper.c  | 10 +++++++---
>  target-ppc/translate.c   |  6 ++++++
>  6 files changed, 34 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index ef12ea0..6144e17 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -319,6 +319,8 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>      ppc_hash64_store_hpte(cpu, pte_index,
>                            (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
>      ppc_hash64_tlb_flush_hpte(cpu, pte_index, v, r);
> +    /* Flush the tlb */
> +    check_tlb_flush(env, 1);
>      /* Don't need a memory barrier, due to qemu's global lock */
>      ppc_hash64_store_hpte(cpu, pte_index, v | HPTE64_V_HPTE_DIRTY, r);
>      return H_SUCCESS;
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 71111dc..50fe0f5 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -1010,6 +1010,7 @@ struct CPUPPCState {
>      bool kvm_sw_tlb;  /* non-zero if KVM SW TLB API is active                */
>      uint32_t tlb_need_flush; /* Delayed flush needed */
>  #define TLB_NEED_LOCAL_FLUSH   0x1
> +#define TLB_NEED_GLOBAL_FLUSH  0x2
>  #endif
>  
>      /* Other registers */
> diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
> index bcf65ce..fd2c961 100644
> --- a/target-ppc/helper_regs.h
> +++ b/target-ppc/helper_regs.h
> @@ -161,6 +161,23 @@ static inline void check_tlb_flush(CPUPPCState *env, uint32_t global)
>          tlb_flush(cs, 1);
>          env->tlb_need_flush &= ~TLB_NEED_LOCAL_FLUSH;
>      }
> +
> +    /* Propagate TLB invalidations to other CPUs when the guest uses broadcast
> +     * TLB invalidation instructions.
> +     */
> +    if (global && (env->tlb_need_flush & TLB_NEED_GLOBAL_FLUSH)) {
> +        CPUState *other_cs;
> +        CPU_FOREACH(other_cs) {
> +            if (other_cs != cs) {
> +                PowerPCCPU *cpu = POWERPC_CPU(other_cs);
> +                CPUPPCState *other_env = &cpu->env;
> +
> +                other_env->tlb_need_flush &= ~TLB_NEED_LOCAL_FLUSH;
> +                tlb_flush(other_cs, 1);
> +            }
> +        }
> +        env->tlb_need_flush &= ~TLB_NEED_GLOBAL_FLUSH;
> +    }
>  }
>  #else
>  static inline void check_tlb_flush(CPUPPCState *env, uint32_t global) { }
> diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
> index 1f52b64..fdb7a78 100644
> --- a/target-ppc/mmu-hash64.c
> +++ b/target-ppc/mmu-hash64.c
> @@ -912,7 +912,7 @@ void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
>       * invalidate, and we still don't have a tlb_flush_mask(env, n,
>       * mask) in QEMU, we just invalidate all TLBs
>       */
> -    tlb_flush(CPU(cpu), 1);
> +    cpu->env.tlb_need_flush = TLB_NEED_GLOBAL_FLUSH | TLB_NEED_LOCAL_FLUSH;
>  }
>  
>  void ppc_hash64_update_rmls(CPUPPCState *env)
> diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
> index bf9f329..1dd057a 100644
> --- a/target-ppc/mmu_helper.c
> +++ b/target-ppc/mmu_helper.c
> @@ -2757,7 +2757,7 @@ static inline void booke206_invalidate_ea_tlb(CPUPPCState *env, int tlbn,
>  
>  void helper_booke206_tlbivax(CPUPPCState *env, target_ulong address)
>  {
> -    PowerPCCPU *cpu = ppc_env_get_cpu(env);
> +    CPUState *cs;
>  
>      if (address & 0x4) {
>          /* flush all entries */
> @@ -2774,11 +2774,15 @@ void helper_booke206_tlbivax(CPUPPCState *env, target_ulong address)
>      if (address & 0x8) {
>          /* flush TLB1 entries */
>          booke206_invalidate_ea_tlb(env, 1, address);
> -        tlb_flush(CPU(cpu), 1);
> +        CPU_FOREACH(cs) {
> +            tlb_flush(cs, 1);
> +        }
>      } else {
>          /* flush TLB0 entries */
>          booke206_invalidate_ea_tlb(env, 0, address);
> -        tlb_flush_page(CPU(cpu), address & MAS2_EPN_MASK);
> +        CPU_FOREACH(cs) {
> +            tlb_flush_page(cs, address & MAS2_EPN_MASK);
> +        }

Why are these explicit CPU_FOREACH()s instead of using the flags
you've just built?

>      }
>  }
>  
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 5026804..d96ff66 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -4448,6 +4448,7 @@ static void gen_tlbie(DisasContext *ctx)
>  #if defined(CONFIG_USER_ONLY)
>      GEN_PRIV;
>  #else
> +    TCGv_i32 t1;
>      CHK_HV;
>  
>      if (NARROW_MODE(ctx)) {
> @@ -4458,6 +4459,11 @@ static void gen_tlbie(DisasContext *ctx)
>      } else {
>          gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
>      }
> +    t1 = tcg_temp_new_i32();
> +    tcg_gen_ld_i32(t1, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
> +    tcg_gen_ori_i32(t1, t1, TLB_NEED_GLOBAL_FLUSH);
> +    tcg_gen_st_i32(t1, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
> +    tcg_temp_free_i32(t1);
>  #endif /* defined(CONFIG_USER_ONLY) */
>  }
>
Benjamin Herrenschmidt Sept. 15, 2016, 1:41 a.m. UTC | #2
On Thu, 2016-09-15 at 10:25 +1000, David Gibson wrote:
> >  void helper_booke206_tlbivax(CPUPPCState *env, target_ulong
> address)
> >  {
> > -    PowerPCCPU *cpu = ppc_env_get_cpu(env);
> > +    CPUState *cs;
> >  
> >      if (address & 0x4) {
> >          /* flush all entries */
> > @@ -2774,11 +2774,15 @@ void helper_booke206_tlbivax(CPUPPCState
> *env, target_ulong address)
> >      if (address & 0x8) {
> >          /* flush TLB1 entries */
> >          booke206_invalidate_ea_tlb(env, 1, address);
> > -        tlb_flush(CPU(cpu), 1);
> > +        CPU_FOREACH(cs) {
> > +            tlb_flush(cs, 1);
> > +        }
> >      } else {
> >          /* flush TLB0 entries */
> >          booke206_invalidate_ea_tlb(env, 0, address);
> > -        tlb_flush_page(CPU(cpu), address & MAS2_EPN_MASK);
> > +        CPU_FOREACH(cs) {
> > +            tlb_flush_page(cs, address & MAS2_EPN_MASK);
> > +        }
> 
> Why are these explicit CPU_FOREACH()s instead of using the flags
> you've just bui

Because we haven't converted BookE to lazy TLB flushing yet...

Cheers,
Ben.
David Gibson Sept. 15, 2016, 1:48 a.m. UTC | #3
On Thu, Sep 15, 2016 at 11:41:01AM +1000, Benjamin Herrenschmidt wrote:
> On Thu, 2016-09-15 at 10:25 +1000, David Gibson wrote:
> > >  void helper_booke206_tlbivax(CPUPPCState *env, target_ulong
> > address)
> > >  {
> > > -    PowerPCCPU *cpu = ppc_env_get_cpu(env);
> > > +    CPUState *cs;
> > >  
> > >      if (address & 0x4) {
> > >          /* flush all entries */
> > > @@ -2774,11 +2774,15 @@ void helper_booke206_tlbivax(CPUPPCState
> > *env, target_ulong address)
> > >      if (address & 0x8) {
> > >          /* flush TLB1 entries */
> > >          booke206_invalidate_ea_tlb(env, 1, address);
> > > -        tlb_flush(CPU(cpu), 1);
> > > +        CPU_FOREACH(cs) {
> > > +            tlb_flush(cs, 1);
> > > +        }
> > >      } else {
> > >          /* flush TLB0 entries */
> > >          booke206_invalidate_ea_tlb(env, 0, address);
> > > -        tlb_flush_page(CPU(cpu), address & MAS2_EPN_MASK);
> > > +        CPU_FOREACH(cs) {
> > > +            tlb_flush_page(cs, address & MAS2_EPN_MASK);
> > > +        }
> > 
> > Why are these explicit CPU_FOREACH()s instead of using the flags
> > you've just bui
> 
> Because we haven't converted BookE to lazy TLB flushing yet...

Ah, right.
diff mbox

Patch

diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index ef12ea0..6144e17 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -319,6 +319,8 @@  static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     ppc_hash64_store_hpte(cpu, pte_index,
                           (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
     ppc_hash64_tlb_flush_hpte(cpu, pte_index, v, r);
+    /* Flush the tlb */
+    check_tlb_flush(env, 1);
     /* Don't need a memory barrier, due to qemu's global lock */
     ppc_hash64_store_hpte(cpu, pte_index, v | HPTE64_V_HPTE_DIRTY, r);
     return H_SUCCESS;
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 71111dc..50fe0f5 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1010,6 +1010,7 @@  struct CPUPPCState {
     bool kvm_sw_tlb;  /* non-zero if KVM SW TLB API is active                */
     uint32_t tlb_need_flush; /* Delayed flush needed */
 #define TLB_NEED_LOCAL_FLUSH   0x1
+#define TLB_NEED_GLOBAL_FLUSH  0x2
 #endif
 
     /* Other registers */
diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
index bcf65ce..fd2c961 100644
--- a/target-ppc/helper_regs.h
+++ b/target-ppc/helper_regs.h
@@ -161,6 +161,23 @@  static inline void check_tlb_flush(CPUPPCState *env, uint32_t global)
         tlb_flush(cs, 1);
         env->tlb_need_flush &= ~TLB_NEED_LOCAL_FLUSH;
     }
+
+    /* Propagate TLB invalidations to other CPUs when the guest uses broadcast
+     * TLB invalidation instructions.
+     */
+    if (global && (env->tlb_need_flush & TLB_NEED_GLOBAL_FLUSH)) {
+        CPUState *other_cs;
+        CPU_FOREACH(other_cs) {
+            if (other_cs != cs) {
+                PowerPCCPU *cpu = POWERPC_CPU(other_cs);
+                CPUPPCState *other_env = &cpu->env;
+
+                other_env->tlb_need_flush &= ~TLB_NEED_LOCAL_FLUSH;
+                tlb_flush(other_cs, 1);
+            }
+        }
+        env->tlb_need_flush &= ~TLB_NEED_GLOBAL_FLUSH;
+    }
 }
 #else
 static inline void check_tlb_flush(CPUPPCState *env, uint32_t global) { }
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index 1f52b64..fdb7a78 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -912,7 +912,7 @@  void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
      * invalidate, and we still don't have a tlb_flush_mask(env, n,
      * mask) in QEMU, we just invalidate all TLBs
      */
-    tlb_flush(CPU(cpu), 1);
+    cpu->env.tlb_need_flush = TLB_NEED_GLOBAL_FLUSH | TLB_NEED_LOCAL_FLUSH;
 }
 
 void ppc_hash64_update_rmls(CPUPPCState *env)
diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index bf9f329..1dd057a 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -2757,7 +2757,7 @@  static inline void booke206_invalidate_ea_tlb(CPUPPCState *env, int tlbn,
 
 void helper_booke206_tlbivax(CPUPPCState *env, target_ulong address)
 {
-    PowerPCCPU *cpu = ppc_env_get_cpu(env);
+    CPUState *cs;
 
     if (address & 0x4) {
         /* flush all entries */
@@ -2774,11 +2774,15 @@  void helper_booke206_tlbivax(CPUPPCState *env, target_ulong address)
     if (address & 0x8) {
         /* flush TLB1 entries */
         booke206_invalidate_ea_tlb(env, 1, address);
-        tlb_flush(CPU(cpu), 1);
+        CPU_FOREACH(cs) {
+            tlb_flush(cs, 1);
+        }
     } else {
         /* flush TLB0 entries */
         booke206_invalidate_ea_tlb(env, 0, address);
-        tlb_flush_page(CPU(cpu), address & MAS2_EPN_MASK);
+        CPU_FOREACH(cs) {
+            tlb_flush_page(cs, address & MAS2_EPN_MASK);
+        }
     }
 }
 
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 5026804..d96ff66 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -4448,6 +4448,7 @@  static void gen_tlbie(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     GEN_PRIV;
 #else
+    TCGv_i32 t1;
     CHK_HV;
 
     if (NARROW_MODE(ctx)) {
@@ -4458,6 +4459,11 @@  static void gen_tlbie(DisasContext *ctx)
     } else {
         gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
     }
+    t1 = tcg_temp_new_i32();
+    tcg_gen_ld_i32(t1, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
+    tcg_gen_ori_i32(t1, t1, TLB_NEED_GLOBAL_FLUSH);
+    tcg_gen_st_i32(t1, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
+    tcg_temp_free_i32(t1);
 #endif /* defined(CONFIG_USER_ONLY) */
 }