diff mbox

[RFC,4/4] target-ppc: flush tlb from all the cpu

Message ID 1473034203.2313.38.camel@kernel.crashing.org
State New
Headers show

Commit Message

Benjamin Herrenschmidt Sept. 5, 2016, 12:10 a.m. UTC
On Sun, 2016-09-04 at 18:00 +0100, Alex Bennée wrote:

> When is the synchronisation point? On ARM we end the basic block on
> system instructions that mess with the cache. As a result the flush
> is done as soon as we exit the run loop on the next instruction.

Talking o this... Nikunj, I notice, all our TLB flushing is only ever
done on the "current" CPU. I mean today, without MT-TCG. That looks
broken already isn't it ?

Looking at ARM, they do this:

/* IS variants of TLB operations must affect all cores */
static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                             uint64_t value)
{
    CPUState *other_cs;

    CPU_FOREACH(other_cs) {
        tlb_flush(other_cs, 1);
    }
}

I wonder if we should audit all tlb_flush() calls in target-ppc to
do the right thing as well ?

Something like the (untested, not even compiled as I have to run) patch
below.

Now to do things a bit better, we could split the check_tlb_flush() helper
(or pass an argument) to tell it whether to check/flush other CPUs or not.

All the slb operations and tlbiel only need to affect the current CPU, but
broadcast tlbie's (and thus H_REMOVE) should have a global affect. We could
add another flag to the env in addition to the tlb_need_flush, something like
tlb_need_global_flush which is set on tlbie instructions to inform
check_tlb_flush what to do.


Cheers,
Ben.

Comments

Nikunj A Dadhania Sept. 6, 2016, 1:55 a.m. UTC | #1
Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:

> On Sun, 2016-09-04 at 18:00 +0100, Alex Bennée wrote:
>
>> When is the synchronisation point? On ARM we end the basic block on
>> system instructions that mess with the cache. As a result the flush
>> is done as soon as we exit the run loop on the next instruction.
>
> Talking o this... Nikunj, I notice, all our TLB flushing is only ever
> done on the "current" CPU. I mean today, without MT-TCG. That looks
> broken already isn't it ?

Without MT-TCG, there was only one cpu, so I think we never hit that
issue.

>
> Looking at ARM, they do this:
>
> /* IS variants of TLB operations must affect all cores */
> static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
>                              uint64_t value)
> {
>     CPUState *other_cs;
>
>     CPU_FOREACH(other_cs) {
>         tlb_flush(other_cs, 1);
>     }
> }
>
> I wonder if we should audit all tlb_flush() calls in target-ppc to
> do the right thing as well ?
>
> Something like the (untested, not even compiled as I have to run) patch
> below.
>
> Now to do things a bit better, we could split the check_tlb_flush() helper
> (or pass an argument) to tell it whether to check/flush other CPUs or not.
>
> All the slb operations and tlbiel only need to affect the current CPU, but
> broadcast tlbie's (and thus H_REMOVE) should have a global affect. We could
> add another flag to the env in addition to the tlb_need_flush, something like
> tlb_need_global_flush which is set on tlbie instructions to inform
> check_tlb_flush what to do.
>
> diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
> index 0ee0e5a..f2302ec 100644
> --- a/target-ppc/excp_helper.c
> +++ b/target-ppc/excp_helper.c
> @@ -959,8 +959,13 @@ static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr)
>  {
>      CPUState *cs = CPU(ppc_env_get_cpu(env));
>
> -    /* MSR:POW cannot be set by any form of rfi */
> -    msr &= ~(1ULL << MSR_POW);
> +    /* These bits cannot be set by RFI on non-BookE systems and so must
> +     * be filtered out. 6xx and 7xxx with SW TLB management will put
> +     * TLB related junk in there among other things.
> +     */
> +    if (!(env->excp_model & POWERPC_EXCP_BOOKE)) {
> +            msr &= ~(target_ulong)0xf0000;
> +    }
>
>  #if defined(TARGET_PPC64)
>      /* Switching to 32-bit ? Crop the nip */
> @@ -990,7 +995,6 @@ void helper_rfi(CPUPPCState *env)
>      do_rfi(env, env->spr[SPR_SRR0], env->spr[SPR_SRR1] & 0xfffffffful);
>  }
>
> -#define MSR_BOOK3S_MASK
>  #if defined(TARGET_PPC64)
>  void helper_rfid(CPUPPCState *env)
>  {
> diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
> index 3d279f1..f3eb21d 100644
> --- a/target-ppc/helper_regs.h
> +++ b/target-ppc/helper_regs.h
> @@ -156,10 +156,15 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
>  #if !defined(CONFIG_USER_ONLY)
>  static inline void check_tlb_flush(CPUPPCState *env)
>  {
> -    CPUState *cs = CPU(ppc_env_get_cpu(env));
> -    if (env->tlb_need_flush) {
> -        env->tlb_need_flush = 0;
> -        tlb_flush(cs, 1);
> +    CPUState *cs;
> +
> +    CPU_FOREACH(cs) {
> +        PowerPCCPU *cpu = POWERPC_CPU(cs);
> +        CPUPPCState *env = &cpu->env;
> +        if (env->tlb_need_flush) {
> +            env->tlb_need_flush = 0;
> +            tlb_flush(cs, 1);
> +        }
>      }
>  }
>  #else
> diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
> index 8118143..a76c92b 100644
> --- a/target-ppc/mmu-hash64.c
> +++ b/target-ppc/mmu-hash64.c
> @@ -907,12 +907,16 @@ void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
>                                 target_ulong pte_index,
>                                 target_ulong pte0, target_ulong pte1)
>  {
> -    /*
> -     * XXX: given the fact that there are too many segments to
> -     * invalidate, and we still don't have a tlb_flush_mask(env, n,
> -     * mask) in QEMU, we just invalidate all TLBs
> -     */
> -    tlb_flush(CPU(cpu), 1);
> +    CPUState *cs;
> +
> +    CPU_FOREACH(cs) {
> +        /*
> +         * XXX: given the fact that there are too many segments to
> +         * invalidate, and we still don't have a tlb_flush_mask(env, n,
> +         * mask) in QEMU, we just invalidate all TLBs
> +         */
> +        tlb_flush(cs, 1);
> +    }
>  }
>
>  void ppc_hash64_update_rmls(CPUPPCState *env)
> diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
> index 696bb03..1d84fc4 100644
> --- a/target-ppc/mmu_helper.c
> +++ b/target-ppc/mmu_helper.c
> @@ -2758,6 +2758,7 @@ static inline void booke206_invalidate_ea_tlb(CPUPPCState *env, int tlbn,
>  void helper_booke206_tlbivax(CPUPPCState *env, target_ulong address)
>  {
>      PowerPCCPU *cpu = ppc_env_get_cpu(env);
> +    CPUState *other_cs;
>
>      if (address & 0x4) {
>          /* flush all entries */
> @@ -2774,11 +2775,15 @@ void helper_booke206_tlbivax(CPUPPCState *env, target_ulong address)
>      if (address & 0x8) {
>          /* flush TLB1 entries */
>          booke206_invalidate_ea_tlb(env, 1, address);
> -        tlb_flush(CPU(cpu), 1);
> +        CPU_FOREACH(other_cs) {
> +            tlb_flush(other_cs, 1);
> +        }
>      } else {
>          /* flush TLB0 entries */
>          booke206_invalidate_ea_tlb(env, 0, address);
> -        tlb_flush_page(CPU(cpu), address & MAS2_EPN_MASK);
> +        CPU_FOREACH(other_cs) {
> +            tlb_flush_page(other_cs, address & MAS2_EPN_MASK);
> +        }
>      }
>  }

Sure, will have a round of testing.

Regards
Nikunj
Benjamin Herrenschmidt Sept. 6, 2016, 3:05 a.m. UTC | #2
On Tue, 2016-09-06 at 07:25 +0530, Nikunj A Dadhania wrote:
> > Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:
> 
> > 
> > On Sun, 2016-09-04 at 18:00 +0100, Alex Bennée wrote:
> > 
> > > 
> > > When is the synchronisation point? On ARM we end the basic block on
> > > system instructions that mess with the cache. As a result the flush
> > > is done as soon as we exit the run loop on the next instruction.
> > 
> > Talking o this... Nikunj, I notice, all our TLB flushing is only ever
> > done on the "current" CPU. I mean today, without MT-TCG. That looks
> > broken already isn't it ?
> 
> Without MT-TCG, there was only one cpu, so I think we never hit that
> issue.

No there isn't. You can start qemu with --smp 4 and have 4 CPUs. It
will alternate between them, but they *will* have differrent TLBs.
Nikunj A Dadhania Sept. 6, 2016, 4:53 a.m. UTC | #3
Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:

> On Tue, 2016-09-06 at 07:25 +0530, Nikunj A Dadhania wrote:
>> > Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:
>> 
>> > 
>> > On Sun, 2016-09-04 at 18:00 +0100, Alex Bennée wrote:
>> > 
>> > > 
>> > > When is the synchronisation point? On ARM we end the basic block on
>> > > system instructions that mess with the cache. As a result the flush
>> > > is done as soon as we exit the run loop on the next instruction.
>> > 
>> > Talking o this... Nikunj, I notice, all our TLB flushing is only ever
>> > done on the "current" CPU. I mean today, without MT-TCG. That looks
>> > broken already isn't it ?
>> 
>> Without MT-TCG, there was only one cpu, so I think we never hit that
>> issue.
>
> No there isn't. You can start qemu with --smp 4 and have 4 CPUs.

That case was prevented to even start in case of TCG. That is why I had
to add "target-ppc: with MTTCG report more threads"

> It will alternate between them, but they *will* have differrent TLBs.

Regards
Nikunj
Benjamin Herrenschmidt Sept. 6, 2016, 5:30 a.m. UTC | #4
On Tue, 2016-09-06 at 10:23 +0530, Nikunj A Dadhania wrote:
> >
> > No there isn't. You can start qemu with --smp 4 and have 4 CPUs.
> 
> That case was prevented to even start in case of TCG. That is why I had
> to add "target-ppc: with MTTCG report more threads"

No, it works, you are confusing cores and threads ... you always could
start TCG with multiple cores. Try the powernv or the mac model...

Cheers,
Ben.
Nikunj A Dadhania Sept. 6, 2016, 6:57 a.m. UTC | #5
Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:

> On Tue, 2016-09-06 at 10:23 +0530, Nikunj A Dadhania wrote:
>> >
>> > No there isn't. You can start qemu with --smp 4 and have 4 CPUs.
>> 
>> That case was prevented to even start in case of TCG. That is why I had
>> to add "target-ppc: with MTTCG report more threads"
>
> No, it works, you are confusing cores and threads ... you always could
> start TCG with multiple cores. Try the powernv or the mac model...

Yes, your right, forgot about the multi core case.

Regards
Nikunj
diff mbox

Patch

diff --git a/roms/SLOF b/roms/SLOF
--- a/roms/SLOF
+++ b/roms/SLOF
@@ -1 +1 @@ 
-Subproject commit e3d05727a074619fc12d0a67f05cf2c42c875cce
+Subproject commit e3d05727a074619fc12d0a67f05cf2c42c875cce-dirty
diff --git a/roms/openbios b/roms/openbios
index e79bca6..46ee135 160000
--- a/roms/openbios
+++ b/roms/openbios
@@ -1 +1 @@ 
-Subproject commit e79bca64838c96ec44fd7acd508879c5284233dd
+Subproject commit 46ee1352c50aa891e3dce9b3e3428ae9a5703fbe-dirty
diff --git a/roms/seabios b/roms/seabios
--- a/roms/seabios
+++ b/roms/seabios
@@ -1 +1 @@ 
-Subproject commit e2fc41e24ee0ada60fc511d60b15a41b294538be
+Subproject commit e2fc41e24ee0ada60fc511d60b15a41b294538be-dirty
diff --git a/roms/vgabios b/roms/vgabios
--- a/roms/vgabios
+++ b/roms/vgabios
@@ -1 +1 @@ 
-Subproject commit 19ea12c230ded95928ecaef0db47a82231c2e485
+Subproject commit 19ea12c230ded95928ecaef0db47a82231c2e485-dirty
diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index 0ee0e5a..f2302ec 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -959,8 +959,13 @@  static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr)
 {
     CPUState *cs = CPU(ppc_env_get_cpu(env));
 
-    /* MSR:POW cannot be set by any form of rfi */
-    msr &= ~(1ULL << MSR_POW);
+    /* These bits cannot be set by RFI on non-BookE systems and so must
+     * be filtered out. 6xx and 7xxx with SW TLB management will put
+     * TLB related junk in there among other things.
+     */
+    if (!(env->excp_model & POWERPC_EXCP_BOOKE)) {
+            msr &= ~(target_ulong)0xf0000;
+    }
 
 #if defined(TARGET_PPC64)
     /* Switching to 32-bit ? Crop the nip */
@@ -990,7 +995,6 @@  void helper_rfi(CPUPPCState *env)
     do_rfi(env, env->spr[SPR_SRR0], env->spr[SPR_SRR1] & 0xfffffffful);
 }
 
-#define MSR_BOOK3S_MASK
 #if defined(TARGET_PPC64)
 void helper_rfid(CPUPPCState *env)
 {
diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
index 3d279f1..f3eb21d 100644
--- a/target-ppc/helper_regs.h
+++ b/target-ppc/helper_regs.h
@@ -156,10 +156,15 @@  static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
 #if !defined(CONFIG_USER_ONLY)
 static inline void check_tlb_flush(CPUPPCState *env)
 {
-    CPUState *cs = CPU(ppc_env_get_cpu(env));
-    if (env->tlb_need_flush) {
-        env->tlb_need_flush = 0;
-        tlb_flush(cs, 1);
+    CPUState *cs;
+
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+        CPUPPCState *env = &cpu->env;
+        if (env->tlb_need_flush) {
+            env->tlb_need_flush = 0;
+            tlb_flush(cs, 1);
+        }
     }
 }
 #else
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index 8118143..a76c92b 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -907,12 +907,16 @@  void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
                                target_ulong pte_index,
                                target_ulong pte0, target_ulong pte1)
 {
-    /*
-     * XXX: given the fact that there are too many segments to
-     * invalidate, and we still don't have a tlb_flush_mask(env, n,
-     * mask) in QEMU, we just invalidate all TLBs
-     */
-    tlb_flush(CPU(cpu), 1);
+    CPUState *cs;
+
+    CPU_FOREACH(cs) {
+        /*
+         * XXX: given the fact that there are too many segments to
+         * invalidate, and we still don't have a tlb_flush_mask(env, n,
+         * mask) in QEMU, we just invalidate all TLBs
+         */
+        tlb_flush(cs, 1);
+    }
 }
 
 void ppc_hash64_update_rmls(CPUPPCState *env)
diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index 696bb03..1d84fc4 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -2758,6 +2758,7 @@  static inline void booke206_invalidate_ea_tlb(CPUPPCState *env, int tlbn,
 void helper_booke206_tlbivax(CPUPPCState *env, target_ulong address)
 {
     PowerPCCPU *cpu = ppc_env_get_cpu(env);
+    CPUState *other_cs;
 
     if (address & 0x4) {
         /* flush all entries */
@@ -2774,11 +2775,15 @@  void helper_booke206_tlbivax(CPUPPCState *env, target_ulong address)
     if (address & 0x8) {
         /* flush TLB1 entries */
         booke206_invalidate_ea_tlb(env, 1, address);
-        tlb_flush(CPU(cpu), 1);
+        CPU_FOREACH(other_cs) {
+            tlb_flush(other_cs, 1);
+        }
     } else {
         /* flush TLB0 entries */
         booke206_invalidate_ea_tlb(env, 0, address);
-        tlb_flush_page(CPU(cpu), address & MAS2_EPN_MASK);
+        CPU_FOREACH(other_cs) {
+            tlb_flush_page(other_cs, address & MAS2_EPN_MASK);
+        }
     }
 }