diff mbox

[03/77] ppc: Do some batching of TCG tlb flushes

Message ID 1447201710-10229-4-git-send-email-benh@kernel.crashing.org
State New
Headers show

Commit Message

Benjamin Herrenschmidt Nov. 11, 2015, 12:27 a.m. UTC
On ppc64 especially, we flush the tlb on any slbie or tlbie instruction.

However, those instructions often come in bursts of 3 or more (context
switch will favor a series of slbie's for example to an slbia if the
SLB has less than a certain number of entries in it, and tlbie's can
happen in a series, with PAPR, H_BULK_REMOVE can remove up to 4 entries
at a time.

Doing a tlb_flush() each time is a waste of time. We end up doing a memset
of the whole TLB, reloading it for the next instruction, memset'ing again,
etc...

Those instructions don't have to take effect immediately. For slbie, they
can wait for the next context synchronizing event. For tlbie, the next
tlbsync.

This implements batching by keeping a flag that indicates that we have a
TLB in need of flushing. We check it on interrupts, rfi's, isync's and
tlbsync and flush the TLB if needed.

This reduces the number of tlb_flush() on a boot to a ubuntu installer
first dialog screen from roughly 360K down to 36K.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/ppc/spapr_hcall.c     | 12 +++++++++---
 target-ppc/cpu.h         |  2 ++
 target-ppc/excp_helper.c |  9 +++++++++
 target-ppc/helper.h      |  1 +
 target-ppc/helper_regs.h | 13 +++++++++++++
 target-ppc/mmu-hash64.c  | 12 +++---------
 target-ppc/mmu_helper.c  |  9 ++++++++-
 target-ppc/translate.c   | 39 ++++++++++++++++++++++++++++++++++++---
 8 files changed, 81 insertions(+), 16 deletions(-)

Comments

David Gibson Nov. 16, 2015, 5 a.m. UTC | #1
On Wed, Nov 11, 2015 at 11:27:16AM +1100, Benjamin Herrenschmidt wrote:
> On ppc64 especially, we flush the tlb on any slbie or tlbie instruction.
> 
> However, those instructions often come in bursts of 3 or more (context
> switch will favor a series of slbie's for example to an slbia if the
> SLB has less than a certain number of entries in it, and tlbie's can
> happen in a series, with PAPR, H_BULK_REMOVE can remove up to 4 entries
> at a time.
> 
> Doing a tlb_flush() each time is a waste of time. We end up doing a memset
> of the whole TLB, reloading it for the next instruction, memset'ing again,
> etc...
> 
> Those instructions don't have to take effect immediately. For slbie, they
> can wait for the next context synchronizing event. For tlbie, the next
> tlbsync.
> 
> This implements batching by keeping a flag that indicates that we have a
> TLB in need of flushing. We check it on interrupts, rfi's, isync's and
> tlbsync and flush the TLB if needed.
> 
> This reduces the number of tlb_flush() on a boot to a ubuntu installer
> first dialog screen from roughly 360K down to 36K.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  hw/ppc/spapr_hcall.c     | 12 +++++++++---
>  target-ppc/cpu.h         |  2 ++
>  target-ppc/excp_helper.c |  9 +++++++++
>  target-ppc/helper.h      |  1 +
>  target-ppc/helper_regs.h | 13 +++++++++++++
>  target-ppc/mmu-hash64.c  | 12 +++---------
>  target-ppc/mmu_helper.c  |  9 ++++++++-
>  target-ppc/translate.c   | 39 ++++++++++++++++++++++++++++++++++++---
>  8 files changed, 81 insertions(+), 16 deletions(-)
> 
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index cebceea..7e2cb4b 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -220,6 +220,7 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>  
>      switch (ret) {
>      case REMOVE_SUCCESS:
> +        check_tlb_flush(env);
>          return H_SUCCESS;
>  
>      case REMOVE_NOT_FOUND:
> @@ -257,6 +258,7 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>                                    target_ulong opcode, target_ulong *args)
>  {
>      CPUPPCState *env = &cpu->env;
> +    target_ulong rc = H_SUCCESS;
>      int i;
>  
>      for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
> @@ -290,14 +292,18 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>              break;
>  
>          case REMOVE_PARM:
> -            return H_PARAMETER;
> +            rc = H_PARAMETER;
> +            goto exit;
>  
>          case REMOVE_HW:
> -            return H_HARDWARE;
> +            rc = H_HARDWARE;
> +            goto exit;
>          }
>      }
> + exit:
> +    check_tlb_flush(env);
>  
> -    return H_SUCCESS;
> +    return rc;
>  }
>  
>  static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index aaa7117..e6c43f9 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -1013,6 +1013,8 @@ struct CPUPPCState {
>      /* PowerPC 64 SLB area */
>      ppc_slb_t slb[MAX_SLB_ENTRIES];
>      int32_t slb_nr;
> +    /* tcg TLB needs flush (deferred slb inval instruction typically) */
> +    uint32_t tlb_need_flush;
>  #endif
>      /* segment registers */
>      hwaddr htab_base;
> diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
> index 3e39098..c1d6605 100644
> --- a/target-ppc/excp_helper.c
> +++ b/target-ppc/excp_helper.c
> @@ -671,6 +671,11 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>      /* Reset exception state */
>      cs->exception_index = POWERPC_EXCP_NONE;
>      env->error_code = 0;
> +
> +    /* Any interrupt is context synchronizing, check if TCG TLB
> +     * needs a delayed flush on ppc64
> +     */
> +    check_tlb_flush(env);
>  }
>  
>  void ppc_cpu_do_interrupt(CPUState *cs)
> @@ -692,6 +697,7 @@ static void ppc_hw_interrupt(CPUPPCState *env)
>                    __func__, env, env->pending_interrupts,
>                    cs->interrupt_request, (int)msr_me, (int)msr_ee);
>  #endif
> +
>      /* External reset */
>      if (env->pending_interrupts & (1 << PPC_INTERRUPT_RESET)) {
>          env->pending_interrupts &= ~(1 << PPC_INTERRUPT_RESET);
> @@ -896,6 +902,9 @@ static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr,
>       * as rfi is always the last insn of a TB
>       */
>      cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
> +
> +    /* Context synchronizing: check if TCG TLB needs flush */
> +    check_tlb_flush(env);
>  }
>  
>  void helper_rfi(CPUPPCState *env)
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index 869be15..ff2d50b 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -16,6 +16,7 @@ DEF_HELPER_1(rfmci, void, env)
>  DEF_HELPER_1(rfid, void, env)
>  DEF_HELPER_1(hrfid, void, env)
>  #endif
> +DEF_HELPER_1(check_tlb_flush, void, env)
>  #endif
>  
>  DEF_HELPER_3(lmw, void, env, tl, i32)
> diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
> index f7edd5b..57da931 100644
> --- a/target-ppc/helper_regs.h
> +++ b/target-ppc/helper_regs.h
> @@ -151,4 +151,17 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
>      return excp;
>  }
>  
> +#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
> +static inline void check_tlb_flush(CPUPPCState *env)
> +{
> +    CPUState *cs = CPU(ppc_env_get_cpu(env));
> +    if (env->tlb_need_flush) {
> +        env->tlb_need_flush = 0;
> +        tlb_flush(cs, 1);
> +    }
> +}
> +#else
> +static inline void check_tlb_flush(CPUPPCState *env) { }
> +#endif
> +
>  #endif /* !defined(__HELPER_REGS_H__) */
> diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
> index 7df6ede..71e1d14 100644
> --- a/target-ppc/mmu-hash64.c
> +++ b/target-ppc/mmu-hash64.c
> @@ -97,10 +97,8 @@ void dump_slb(FILE *f, fprintf_function cpu_fprintf, CPUPPCState *env)
>  
>  void helper_slbia(CPUPPCState *env)
>  {
> -    PowerPCCPU *cpu = ppc_env_get_cpu(env);
> -    int n, do_invalidate;
> +    int n;
>  
> -    do_invalidate = 0;
>      /* XXX: Warning: slbia never invalidates the first segment */
>      for (n = 1; n < env->slb_nr; n++) {
>          ppc_slb_t *slb = &env->slb[n];
> @@ -111,17 +109,13 @@ void helper_slbia(CPUPPCState *env)
>               *      and we still don't have a tlb_flush_mask(env, n, mask)
>               *      in QEMU, we just invalidate all TLBs
>               */
> -            do_invalidate = 1;
> +            env->tlb_need_flush = true;
>          }
>      }
> -    if (do_invalidate) {
> -        tlb_flush(CPU(cpu), 1);
> -    }
>  }
>  
>  void helper_slbie(CPUPPCState *env, target_ulong addr)
>  {
> -    PowerPCCPU *cpu = ppc_env_get_cpu(env);
>      ppc_slb_t *slb;
>  
>      slb = slb_lookup(env, addr);
> @@ -136,7 +130,7 @@ void helper_slbie(CPUPPCState *env, target_ulong addr)
>           *      and we still don't have a tlb_flush_mask(env, n, mask)
>           *      in QEMU, we just invalidate all TLBs
>           */
> -        tlb_flush(CPU(cpu), 1);
> +        env->tlb_need_flush = true;
>      }
>  }
>  
> diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
> index e52d0e5..54bc5d1 100644
> --- a/target-ppc/mmu_helper.c
> +++ b/target-ppc/mmu_helper.c
> @@ -23,6 +23,7 @@
>  #include "mmu-hash64.h"
>  #include "mmu-hash32.h"
>  #include "exec/cpu_ldst.h"
> +#include "helper_regs.h"
>  
>  //#define DEBUG_MMU
>  //#define DEBUG_BATS
> @@ -1940,6 +1941,7 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
>      case POWERPC_MMU_2_03:
>      case POWERPC_MMU_2_06:
>      case POWERPC_MMU_2_07:
> +        env->tlb_need_flush = 0;
>  #endif /* defined(TARGET_PPC64) */
>          tlb_flush(CPU(cpu), 1);
>          break;

Any particular reason you're leaving this one as an immediate rather
than deferred flush?

> @@ -2019,7 +2021,7 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
>           *      and we still don't have a tlb_flush_mask(env, n, mask) in QEMU,
>           *      we just invalidate all TLBs
>           */
> -        tlb_flush(CPU(cpu), 1);
> +        env->tlb_need_flush = 1;
>          break;
>  #endif /* defined(TARGET_PPC64) */
>      default:
> @@ -2904,6 +2906,11 @@ void helper_booke206_tlbflush(CPUPPCState *env, target_ulong type)
>  }
>  
>  
> +void helper_check_tlb_flush(CPUPPCState *env)
> +{
> +    check_tlb_flush(env);
> +}
> +
>  /*****************************************************************************/
>  
>  /* try to fill the TLB and return an exception if error. If retaddr is
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 6d9f252..e18d204 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -3299,9 +3299,32 @@ static void gen_eieio(DisasContext *ctx)
>  {
>  }
>  
> +#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
> +static inline void gen_check_tlb_flush(DisasContext *ctx)
> +{
> +    TCGv_i32 t = tcg_temp_new_i32();
> +    TCGLabel *l = gen_new_label();
> +
> +    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
> +    tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l);
> +    gen_helper_check_tlb_flush(cpu_env);
> +    gen_set_label(l);
> +    tcg_temp_free_i32(t);
> +}
> +#else
> +static inline void gen_check_tlb_flush(DisasContext *ctx) { }
> +#endif
> +
>  /* isync */
>  static void gen_isync(DisasContext *ctx)
>  {
> +    /*
> +     * We need to check for a pending TLB flush. This can only happen in
> +     * kernel mode however so check MSR_PR
> +     */
> +    if (!ctx->pr) {
> +        gen_check_tlb_flush(ctx);
> +    }
>      gen_stop_exception(ctx);
>  }
>  
> @@ -3458,6 +3481,15 @@ STCX(stqcx_, 16);
>  /* sync */
>  static void gen_sync(DisasContext *ctx)
>  {
> +    uint32_t l = (ctx->opcode >> 21) & 3;
> +
> +    /*
> +     * For l == 2, it's a ptesync, We need to check for a pending TLB flush.
> +     * This can only happen in kernel mode however so check MSR_PR as well.
> +     */
> +    if (l == 2 && !ctx->pr) {
> +        gen_check_tlb_flush(ctx);
> +    }
>  }
>  
>  /* wait */
> @@ -4851,10 +4883,11 @@ static void gen_tlbsync(DisasContext *ctx)
>          gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
>          return;
>      }
> -    /* This has no effect: it should ensure that all previous
> -     * tlbie have completed
> +    /* tlbsync is a nop for server, ptesync handles delayed tlb flush,
> +     * embedded however needs to deal with tlbsync. We don't try to be
> +     * fancy and swallow the overhead of checking for both.
>       */
> -    gen_stop_exception(ctx);
> +    gen_check_tlb_flush(ctx);
>  #endif
>  }
>  

Should you be clearing the pending flush flag cpu_reset()?
Benjamin Herrenschmidt Nov. 16, 2015, 10:16 a.m. UTC | #2
On Mon, 2015-11-16 at 16:00 +1100, David Gibson wrote:

> >  //#define DEBUG_MMU
> >  //#define DEBUG_BATS
> > @@ -1940,6 +1941,7 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
> >      case POWERPC_MMU_2_03:
> >      case POWERPC_MMU_2_06:
> >      case POWERPC_MMU_2_07:
> > +        env->tlb_need_flush = 0;
> >  #endif /* defined(TARGET_PPC64) */
> >          tlb_flush(CPU(cpu), 1);
> >          break;
> 
> Any particular reason you're leaving this one as an immediate rather
> than deferred flush?

A couple yes. It's mostly unused on server CPUs (we don't do tlbia),
and it's used by ppc_cpu_reset(). In that latter case, I like having
everything really cleaned up ... 

> Should you be clearing the pending flush flag cpu_reset()?

That should happen as a result of the above.

Cheers,
Ben.
David Gibson Nov. 19, 2015, 6:09 a.m. UTC | #3
On Mon, Nov 16, 2015 at 09:16:08PM +1100, Benjamin Herrenschmidt wrote:
> On Mon, 2015-11-16 at 16:00 +1100, David Gibson wrote:
> > 
> > >  //#define DEBUG_MMU
> > >  //#define DEBUG_BATS
> > > @@ -1940,6 +1941,7 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
> > >      case POWERPC_MMU_2_03:
> > >      case POWERPC_MMU_2_06:
> > >      case POWERPC_MMU_2_07:
> > > +        env->tlb_need_flush = 0;
> > >  #endif /* defined(TARGET_PPC64) */
> > >          tlb_flush(CPU(cpu), 1);
> > >          break;
> > 
> > Any particular reason you're leaving this one as an immediate rather
> > than deferred flush?
> 
> A couple yes. It's mostly unused on server CPUs (we don't do tlbia),
> and it's used by ppc_cpu_reset(). In that latter case, I like having
> everything really cleaned up ... 
> 
> > Should you be clearing the pending flush flag cpu_reset()?
> 
> That should happen as a result of the above.

Ok.
diff mbox

Patch

diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index cebceea..7e2cb4b 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -220,6 +220,7 @@  static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 
     switch (ret) {
     case REMOVE_SUCCESS:
+        check_tlb_flush(env);
         return H_SUCCESS;
 
     case REMOVE_NOT_FOUND:
@@ -257,6 +258,7 @@  static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                   target_ulong opcode, target_ulong *args)
 {
     CPUPPCState *env = &cpu->env;
+    target_ulong rc = H_SUCCESS;
     int i;
 
     for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
@@ -290,14 +292,18 @@  static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
             break;
 
         case REMOVE_PARM:
-            return H_PARAMETER;
+            rc = H_PARAMETER;
+            goto exit;
 
         case REMOVE_HW:
-            return H_HARDWARE;
+            rc = H_HARDWARE;
+            goto exit;
         }
     }
+ exit:
+    check_tlb_flush(env);
 
-    return H_SUCCESS;
+    return rc;
 }
 
 static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index aaa7117..e6c43f9 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1013,6 +1013,8 @@  struct CPUPPCState {
     /* PowerPC 64 SLB area */
     ppc_slb_t slb[MAX_SLB_ENTRIES];
     int32_t slb_nr;
+    /* tcg TLB needs flush (deferred slb inval instruction typically) */
+    uint32_t tlb_need_flush;
 #endif
     /* segment registers */
     hwaddr htab_base;
diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index 3e39098..c1d6605 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -671,6 +671,11 @@  static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
     /* Reset exception state */
     cs->exception_index = POWERPC_EXCP_NONE;
     env->error_code = 0;
+
+    /* Any interrupt is context synchronizing, check if TCG TLB
+     * needs a delayed flush on ppc64
+     */
+    check_tlb_flush(env);
 }
 
 void ppc_cpu_do_interrupt(CPUState *cs)
@@ -692,6 +697,7 @@  static void ppc_hw_interrupt(CPUPPCState *env)
                   __func__, env, env->pending_interrupts,
                   cs->interrupt_request, (int)msr_me, (int)msr_ee);
 #endif
+
     /* External reset */
     if (env->pending_interrupts & (1 << PPC_INTERRUPT_RESET)) {
         env->pending_interrupts &= ~(1 << PPC_INTERRUPT_RESET);
@@ -896,6 +902,9 @@  static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr,
      * as rfi is always the last insn of a TB
      */
     cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
+
+    /* Context synchronizing: check if TCG TLB needs flush */
+    check_tlb_flush(env);
 }
 
 void helper_rfi(CPUPPCState *env)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 869be15..ff2d50b 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -16,6 +16,7 @@  DEF_HELPER_1(rfmci, void, env)
 DEF_HELPER_1(rfid, void, env)
 DEF_HELPER_1(hrfid, void, env)
 #endif
+DEF_HELPER_1(check_tlb_flush, void, env)
 #endif
 
 DEF_HELPER_3(lmw, void, env, tl, i32)
diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
index f7edd5b..57da931 100644
--- a/target-ppc/helper_regs.h
+++ b/target-ppc/helper_regs.h
@@ -151,4 +151,17 @@  static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
     return excp;
 }
 
+#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
+static inline void check_tlb_flush(CPUPPCState *env)
+{
+    CPUState *cs = CPU(ppc_env_get_cpu(env));
+    if (env->tlb_need_flush) {
+        env->tlb_need_flush = 0;
+        tlb_flush(cs, 1);
+    }
+}
+#else
+static inline void check_tlb_flush(CPUPPCState *env) { }
+#endif
+
 #endif /* !defined(__HELPER_REGS_H__) */
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index 7df6ede..71e1d14 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -97,10 +97,8 @@  void dump_slb(FILE *f, fprintf_function cpu_fprintf, CPUPPCState *env)
 
 void helper_slbia(CPUPPCState *env)
 {
-    PowerPCCPU *cpu = ppc_env_get_cpu(env);
-    int n, do_invalidate;
+    int n;
 
-    do_invalidate = 0;
     /* XXX: Warning: slbia never invalidates the first segment */
     for (n = 1; n < env->slb_nr; n++) {
         ppc_slb_t *slb = &env->slb[n];
@@ -111,17 +109,13 @@  void helper_slbia(CPUPPCState *env)
              *      and we still don't have a tlb_flush_mask(env, n, mask)
              *      in QEMU, we just invalidate all TLBs
              */
-            do_invalidate = 1;
+            env->tlb_need_flush = true;
         }
     }
-    if (do_invalidate) {
-        tlb_flush(CPU(cpu), 1);
-    }
 }
 
 void helper_slbie(CPUPPCState *env, target_ulong addr)
 {
-    PowerPCCPU *cpu = ppc_env_get_cpu(env);
     ppc_slb_t *slb;
 
     slb = slb_lookup(env, addr);
@@ -136,7 +130,7 @@  void helper_slbie(CPUPPCState *env, target_ulong addr)
          *      and we still don't have a tlb_flush_mask(env, n, mask)
          *      in QEMU, we just invalidate all TLBs
          */
-        tlb_flush(CPU(cpu), 1);
+        env->tlb_need_flush = true;
     }
 }
 
diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index e52d0e5..54bc5d1 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -23,6 +23,7 @@ 
 #include "mmu-hash64.h"
 #include "mmu-hash32.h"
 #include "exec/cpu_ldst.h"
+#include "helper_regs.h"
 
 //#define DEBUG_MMU
 //#define DEBUG_BATS
@@ -1940,6 +1941,7 @@  void ppc_tlb_invalidate_all(CPUPPCState *env)
     case POWERPC_MMU_2_03:
     case POWERPC_MMU_2_06:
     case POWERPC_MMU_2_07:
+        env->tlb_need_flush = 0;
 #endif /* defined(TARGET_PPC64) */
         tlb_flush(CPU(cpu), 1);
         break;
@@ -2019,7 +2021,7 @@  void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
          *      and we still don't have a tlb_flush_mask(env, n, mask) in QEMU,
          *      we just invalidate all TLBs
          */
-        tlb_flush(CPU(cpu), 1);
+        env->tlb_need_flush = 1;
         break;
 #endif /* defined(TARGET_PPC64) */
     default:
@@ -2904,6 +2906,11 @@  void helper_booke206_tlbflush(CPUPPCState *env, target_ulong type)
 }
 
 
+void helper_check_tlb_flush(CPUPPCState *env)
+{
+    check_tlb_flush(env);
+}
+
 /*****************************************************************************/
 
 /* try to fill the TLB and return an exception if error. If retaddr is
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 6d9f252..e18d204 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -3299,9 +3299,32 @@  static void gen_eieio(DisasContext *ctx)
 {
 }
 
+#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
+static inline void gen_check_tlb_flush(DisasContext *ctx)
+{
+    TCGv_i32 t = tcg_temp_new_i32();
+    TCGLabel *l = gen_new_label();
+
+    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
+    tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l);
+    gen_helper_check_tlb_flush(cpu_env);
+    gen_set_label(l);
+    tcg_temp_free_i32(t);
+}
+#else
+static inline void gen_check_tlb_flush(DisasContext *ctx) { }
+#endif
+
 /* isync */
 static void gen_isync(DisasContext *ctx)
 {
+    /*
+     * We need to check for a pending TLB flush. This can only happen in
+     * kernel mode however so check MSR_PR
+     */
+    if (!ctx->pr) {
+        gen_check_tlb_flush(ctx);
+    }
     gen_stop_exception(ctx);
 }
 
@@ -3458,6 +3481,15 @@  STCX(stqcx_, 16);
 /* sync */
 static void gen_sync(DisasContext *ctx)
 {
+    uint32_t l = (ctx->opcode >> 21) & 3;
+
+    /*
+     * For l == 2, it's a ptesync, We need to check for a pending TLB flush.
+     * This can only happen in kernel mode however so check MSR_PR as well.
+     */
+    if (l == 2 && !ctx->pr) {
+        gen_check_tlb_flush(ctx);
+    }
 }
 
 /* wait */
@@ -4851,10 +4883,11 @@  static void gen_tlbsync(DisasContext *ctx)
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
-    /* This has no effect: it should ensure that all previous
-     * tlbie have completed
+    /* tlbsync is a nop for server, ptesync handles delayed tlb flush,
+     * embedded however needs to deal with tlbsync. We don't try to be
+     * fancy and swallow the overhead of checking for both.
      */
-    gen_stop_exception(ctx);
+    gen_check_tlb_flush(ctx);
 #endif
 }