diff mbox

[09/13] KVM: PPC: Book3S HV: Adapt TLB invalidations to work on POWER9

Message ID 1479454122-26994-10-git-send-email-paulus@ozlabs.org (mailing list archive)
State Superseded
Headers show

Commit Message

Paul Mackerras Nov. 18, 2016, 7:28 a.m. UTC
POWER9 adds new capabilities to the tlbie (TLB invalidate entry)
and tlbiel (local tlbie) instructions.  Both instructions get a
set of new parameters (RIC, PRS and R) which appear as bits in the
instruction word.  The tlbiel instruction now has a second register
operand, which contains a PID and/or LPID value if needed, and
should otherwise contain 0.

This adapts KVM-HV's usage of tlbie and tlbiel to work on POWER9
as well as older processors.  Since we only handle HPT guests so
far, we need RIC=0 PRS=0 R=0, which ends up with the same instruction
word as on previous processors, so we don't need to conditionally
execute different instructions depending on the processor.

The local flush on first entry to a guest in book3s_hv_rmhandlers.S
is a loop which depends on the number of TLB sets.  Rather than
using feature sections to set the number of iterations based on
which CPU we're on, we now work out this number at VM creation time
and store it in the kvm_arch struct.  That will make it possible to
get the number from the device tree in future, which will help with
compatibility with future processors.

Since mmu_partition_table_set_entry() does a global flush of the
whole LPID, we don't need to do the TLB flush on first entry to the
guest on each processor.  Therefore we don't set all bits in the
tlb_need_flush bitmap on VM startup on POWER9.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_host.h     |  1 +
 arch/powerpc/kernel/asm-offsets.c       |  1 +
 arch/powerpc/kvm/book3s_hv.c            | 17 ++++++++++++++++-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c     | 10 ++++++++--
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  8 ++------
 5 files changed, 28 insertions(+), 9 deletions(-)

Comments

Aneesh Kumar K.V Nov. 18, 2016, 2:41 p.m. UTC | #1
Paul Mackerras <paulus@ozlabs.org> writes:

> POWER9 adds new capabilities to the tlbie (TLB invalidate entry)
> and tlbiel (local tlbie) instructions.  Both instructions get a
> set of new parameters (RIC, PRS and R) which appear as bits in the
> instruction word.  The tlbiel instruction now has a second register
> operand, which contains a PID and/or LPID value if needed, and
> should otherwise contain 0.
>
> This adapts KVM-HV's usage of tlbie and tlbiel to work on POWER9
> as well as older processors.  Since we only handle HPT guests so
> far, we need RIC=0 PRS=0 R=0, which ends up with the same instruction
> word as on previous processors, so we don't need to conditionally
> execute different instructions depending on the processor.
>
> The local flush on first entry to a guest in book3s_hv_rmhandlers.S
> is a loop which depends on the number of TLB sets.  Rather than
> using feature sections to set the number of iterations based on
> which CPU we're on, we now work out this number at VM creation time
> and store it in the kvm_arch struct.  That will make it possible to
> get the number from the device tree in future, which will help with
> compatibility with future processors.
>
> Since mmu_partition_table_set_entry() does a global flush of the
> whole LPID, we don't need to do the TLB flush on first entry to the
> guest on each processor.  Therefore we don't set all bits in the
> tlb_need_flush bitmap on VM startup on POWER9.
>
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
> ---
>  arch/powerpc/include/asm/kvm_host.h     |  1 +
>  arch/powerpc/kernel/asm-offsets.c       |  1 +
>  arch/powerpc/kvm/book3s_hv.c            | 17 ++++++++++++++++-
>  arch/powerpc/kvm/book3s_hv_rm_mmu.c     | 10 ++++++++--
>  arch/powerpc/kvm/book3s_hv_rmhandlers.S |  8 ++------
>  5 files changed, 28 insertions(+), 9 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 0d94608..ea78864 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -244,6 +244,7 @@ struct kvm_arch_memory_slot {
>  struct kvm_arch {
>  	unsigned int lpid;
>  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +	unsigned int tlb_sets;
>  	unsigned long hpt_virt;
>  	struct revmap_entry *revmap;
>  	atomic64_t mmio_update;
> diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
> index 494241b..b9c8386 100644
> --- a/arch/powerpc/kernel/asm-offsets.c
> +++ b/arch/powerpc/kernel/asm-offsets.c
> @@ -487,6 +487,7 @@ int main(void)
>  
>  	/* book3s */
>  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +	DEFINE(KVM_TLB_SETS, offsetof(struct kvm, arch.tlb_sets));
>  	DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));
>  	DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
>  	DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 59e18dfb..8395a7f 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -3260,8 +3260,11 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
>  	 * Since we don't flush the TLB when tearing down a VM,
>  	 * and this lpid might have previously been used,
>  	 * make sure we flush on each core before running the new VM.
> +	 * On POWER9, the tlbie in mmu_partition_table_set_entry()
> +	 * does this flush for us.
>  	 */
> -	cpumask_setall(&kvm->arch.need_tlb_flush);
> +	if (!cpu_has_feature(CPU_FTR_ARCH_300))
> +		cpumask_setall(&kvm->arch.need_tlb_flush);
>  
>  	/* Start out with the default set of hcalls enabled */
>  	memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
> @@ -3287,6 +3290,17 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
>  	kvm->arch.lpcr = lpcr;
>  
>  	/*
> +	 * Work out how many sets the TLB has, for the use of
> +	 * the TLB invalidation loop in book3s_hv_rmhandlers.S.
> +	 */
> +	if (cpu_has_feature(CPU_FTR_ARCH_300))
> +		kvm->arch.tlb_sets = 256;	/* POWER9 */
> +	else if (cpu_has_feature(CPU_FTR_ARCH_207S))
> +		kvm->arch.tlb_sets = 512;	/* POWER8 */
> +	else
> +		kvm->arch.tlb_sets = 128;	/* POWER7 */
> +

We have 

#define POWER7_TLB_SETS		128	/* # sets in POWER7 TLB */
#define POWER8_TLB_SETS		512	/* # sets in POWER8 TLB */
#define POWER9_TLB_SETS_HASH	256	/* # sets in POWER9 TLB Hash mode */
#define POWER9_TLB_SETS_RADIX	128	/* # sets in POWER9 TLB Radix mode */

May be use that instead of opencoding ?


> +	/*
>  	 * Track that we now have a HV mode VM active. This blocks secondary
>  	 * CPU threads from coming online.
>  	 */
> @@ -3728,3 +3742,4 @@ module_exit(kvmppc_book3s_exit_hv);
>  MODULE_LICENSE("GPL");
>  MODULE_ALIAS_MISCDEV(KVM_MINOR);
>  MODULE_ALIAS("devname:kvm");
> +
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> index 1179e40..9ef3c4b 100644
> --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> @@ -424,13 +424,18 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
>  {
>  	long i;
>  
> +	/*
> +	 * We use the POWER9 5-operand versions of tlbie and tlbiel here.
> +	 * Since we are using RIC=0 PRS=0 R=0, and P7/P8 tlbiel ignores
> +	 * the RS field, this is backwards-compatible with P7 and P8.
> +	 */
>  	if (global) {
>  		while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
>  			cpu_relax();
>  		if (need_sync)
>  			asm volatile("ptesync" : : : "memory");
>  		for (i = 0; i < npages; ++i)
> -			asm volatile(PPC_TLBIE(%1,%0) : :
> +			asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
>  				     "r" (rbvalues[i]), "r" (kvm->arch.lpid));
>  		asm volatile("eieio; tlbsync; ptesync" : : : "memory");
>  		kvm->arch.tlbie_lock = 0;
> @@ -438,7 +443,8 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
>  		if (need_sync)
>  			asm volatile("ptesync" : : : "memory");
>  		for (i = 0; i < npages; ++i)
> -			asm volatile("tlbiel %0" : : "r" (rbvalues[i]));
> +			asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : :
> +				     "r" (rbvalues[i]), "r" (0));
>  		asm volatile("ptesync" : : : "memory");
>  	}
>  }
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index 219a04f..acae5c3 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -613,12 +613,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
>  	stdcx.	r7,0,r6
>  	bne	23b
>  	/* Flush the TLB of any entries for this LPID */
> -	/* use arch 2.07S as a proxy for POWER8 */
> -BEGIN_FTR_SECTION
> -	li	r6,512			/* POWER8 has 512 sets */
> -FTR_SECTION_ELSE
> -	li	r6,128			/* POWER7 has 128 sets */
> -ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
> +	lwz	r6,KVM_TLB_SETS(r9)
> +	li	r0,0			/* RS for P9 version of tlbiel */
>  	mtctr	r6
>  	li	r7,0x800		/* IS field = 0b10 */
>  	ptesync
> -- 
> 2.7.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt Nov. 18, 2016, 9:57 p.m. UTC | #2
On Fri, 2016-11-18 at 20:11 +0530, Aneesh Kumar K.V wrote:
> > +      * Work out how many sets the TLB has, for the use of
> > +      * the TLB invalidation loop in book3s_hv_rmhandlers.S.
> > +      */
> > +     if (cpu_has_feature(CPU_FTR_ARCH_300))
> > +             kvm->arch.tlb_sets = 256;       /* POWER9 */
> > +     else if (cpu_has_feature(CPU_FTR_ARCH_207S))
> > +             kvm->arch.tlb_sets = 512;       /* POWER8 */
> > +     else
> > +             kvm->arch.tlb_sets = 128;       /* POWER7 */
> > +
> 
> We have 
> 
> #define POWER7_TLB_SETS         128     /* # sets in POWER7 TLB */
> #define POWER8_TLB_SETS         512     /* # sets in POWER8 TLB */
> #define POWER9_TLB_SETS_HASH    256     /* # sets in POWER9 TLB Hash mode */
> #define POWER9_TLB_SETS_RADIX   128     /* # sets in POWER9 TLB Radix mode */
> 
> May be use that instead of opencoding ?

Both are bad and are going to kill us for future backward
compatibility.

These should be a device-tree property. We can fallback to hard wired
values if it doesn't exist but we should at least look for one.

Note: P8 firmwares all have a bug creating a bogus "tlb-sets" property
in the CPU node, so let's create a new one instead, with 2 entries
(hash vs. radix) or 2 new ones, one for hash and one for radix (when
available).

Cheers,
Ben.
Paul Mackerras Nov. 19, 2016, 4:13 a.m. UTC | #3
On Fri, Nov 18, 2016 at 08:11:34PM +0530, Aneesh Kumar K.V wrote:
> > @@ -3287,6 +3290,17 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
> >  	kvm->arch.lpcr = lpcr;
> >  
> >  	/*
> > +	 * Work out how many sets the TLB has, for the use of
> > +	 * the TLB invalidation loop in book3s_hv_rmhandlers.S.
> > +	 */
> > +	if (cpu_has_feature(CPU_FTR_ARCH_300))
> > +		kvm->arch.tlb_sets = 256;	/* POWER9 */
> > +	else if (cpu_has_feature(CPU_FTR_ARCH_207S))
> > +		kvm->arch.tlb_sets = 512;	/* POWER8 */
> > +	else
> > +		kvm->arch.tlb_sets = 128;	/* POWER7 */
> > +
> 
> We have 
> 
> #define POWER7_TLB_SETS		128	/* # sets in POWER7 TLB */
> #define POWER8_TLB_SETS		512	/* # sets in POWER8 TLB */
> #define POWER9_TLB_SETS_HASH	256	/* # sets in POWER9 TLB Hash mode */
> #define POWER9_TLB_SETS_RADIX	128	/* # sets in POWER9 TLB Radix mode */
> 
> May be use that instead of opencoding ?

Doing that would make it easier to check that we're using the same
values everywhere but harder to see what actual numbers we're
getting.  I guess I could use the symbols and put the values in the
comments.  In any case, in future these values are just going to be
default values if we can't find a suitable device-tree property.

Paul.
Paul Mackerras Nov. 19, 2016, 4:14 a.m. UTC | #4
On Sat, Nov 19, 2016 at 08:57:28AM +1100, Benjamin Herrenschmidt wrote:
> On Fri, 2016-11-18 at 20:11 +0530, Aneesh Kumar K.V wrote:
> > > +      * Work out how many sets the TLB has, for the use of
> > > +      * the TLB invalidation loop in book3s_hv_rmhandlers.S.
> > > +      */
> > > +     if (cpu_has_feature(CPU_FTR_ARCH_300))
> > > +             kvm->arch.tlb_sets = 256;       /* POWER9 */
> > > +     else if (cpu_has_feature(CPU_FTR_ARCH_207S))
> > > +             kvm->arch.tlb_sets = 512;       /* POWER8 */
> > > +     else
> > > +             kvm->arch.tlb_sets = 128;       /* POWER7 */
> > > +
> > 
> > We have 
> > 
> > #define POWER7_TLB_SETS         128     /* # sets in POWER7 TLB */
> > #define POWER8_TLB_SETS         512     /* # sets in POWER8 TLB */
> > #define POWER9_TLB_SETS_HASH    256     /* # sets in POWER9 TLB Hash mode */
> > #define POWER9_TLB_SETS_RADIX   128     /* # sets in POWER9 TLB Radix mode */
> > 
> > May be use that instead of opencoding ?
> 
> Both are bad and are going to kill us for future backward
> compatibility.
> 
> These should be a device-tree property. We can fallback to hard wired
> values if it doesn't exist but we should at least look for one.

Tell me what the property is called and I'll add code to use it. :)
That's the whole reason why I moved this to C code.

> Note: P8 firmwares all have a bug creating a bogus "tlb-sets" property
> in the CPU node, so let's create a new one instead, with 2 entries
> (hash vs. radix) or 2 new ones, one for hash and one for radix (when
> available).

Paul.
Benjamin Herrenschmidt Nov. 19, 2016, 4:41 a.m. UTC | #5
On Sat, 2016-11-19 at 15:14 +1100, Paul Mackerras wrote:
> 
> > These should be a device-tree property. We can fallback to hard wired
> > values if it doesn't exist but we should at least look for one.
> 
> Tell me what the property is called and I'll add code to use it. :)
> That's the whole reason why I moved this to C code.
> 
> > 
> > Note: P8 firmwares all have a bug creating a bogus "tlb-sets" property
> > in the CPU node, so let's create a new one instead, with 2 entries
> > (hash vs. radix) or 2 new ones, one for hash and one for radix (when
> > available).

Well, as I said above, there's a defined one but it has bogus values
on almost all P8 firwmares. So I think we need the core code to export
values for use by both the core mm and KVM which can then be picked up
from the DT with "quirks" to fixup the DT values.

(A bit like I did for the never-applied cache geometry patches)

That or we make up new names.

The question remains whether we need a separate property for radix
vs. hash though, we probably should as the "radix is half of hash"
might not be true on future chips.

Cheers,
Ben.
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 0d94608..ea78864 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -244,6 +244,7 @@  struct kvm_arch_memory_slot {
 struct kvm_arch {
 	unsigned int lpid;
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	unsigned int tlb_sets;
 	unsigned long hpt_virt;
 	struct revmap_entry *revmap;
 	atomic64_t mmio_update;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 494241b..b9c8386 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -487,6 +487,7 @@  int main(void)
 
 	/* book3s */
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	DEFINE(KVM_TLB_SETS, offsetof(struct kvm, arch.tlb_sets));
 	DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));
 	DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
 	DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 59e18dfb..8395a7f 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3260,8 +3260,11 @@  static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 	 * Since we don't flush the TLB when tearing down a VM,
 	 * and this lpid might have previously been used,
 	 * make sure we flush on each core before running the new VM.
+	 * On POWER9, the tlbie in mmu_partition_table_set_entry()
+	 * does this flush for us.
 	 */
-	cpumask_setall(&kvm->arch.need_tlb_flush);
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		cpumask_setall(&kvm->arch.need_tlb_flush);
 
 	/* Start out with the default set of hcalls enabled */
 	memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
@@ -3287,6 +3290,17 @@  static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 	kvm->arch.lpcr = lpcr;
 
 	/*
+	 * Work out how many sets the TLB has, for the use of
+	 * the TLB invalidation loop in book3s_hv_rmhandlers.S.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		kvm->arch.tlb_sets = 256;	/* POWER9 */
+	else if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		kvm->arch.tlb_sets = 512;	/* POWER8 */
+	else
+		kvm->arch.tlb_sets = 128;	/* POWER7 */
+
+	/*
 	 * Track that we now have a HV mode VM active. This blocks secondary
 	 * CPU threads from coming online.
 	 */
@@ -3728,3 +3742,4 @@  module_exit(kvmppc_book3s_exit_hv);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_MISCDEV(KVM_MINOR);
 MODULE_ALIAS("devname:kvm");
+
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 1179e40..9ef3c4b 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -424,13 +424,18 @@  static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
 {
 	long i;
 
+	/*
+	 * We use the POWER9 5-operand versions of tlbie and tlbiel here.
+	 * Since we are using RIC=0 PRS=0 R=0, and P7/P8 tlbiel ignores
+	 * the RS field, this is backwards-compatible with P7 and P8.
+	 */
 	if (global) {
 		while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
 			cpu_relax();
 		if (need_sync)
 			asm volatile("ptesync" : : : "memory");
 		for (i = 0; i < npages; ++i)
-			asm volatile(PPC_TLBIE(%1,%0) : :
+			asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
 				     "r" (rbvalues[i]), "r" (kvm->arch.lpid));
 		asm volatile("eieio; tlbsync; ptesync" : : : "memory");
 		kvm->arch.tlbie_lock = 0;
@@ -438,7 +443,8 @@  static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
 		if (need_sync)
 			asm volatile("ptesync" : : : "memory");
 		for (i = 0; i < npages; ++i)
-			asm volatile("tlbiel %0" : : "r" (rbvalues[i]));
+			asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : :
+				     "r" (rbvalues[i]), "r" (0));
 		asm volatile("ptesync" : : : "memory");
 	}
 }
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 219a04f..acae5c3 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -613,12 +613,8 @@  END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	stdcx.	r7,0,r6
 	bne	23b
 	/* Flush the TLB of any entries for this LPID */
-	/* use arch 2.07S as a proxy for POWER8 */
-BEGIN_FTR_SECTION
-	li	r6,512			/* POWER8 has 512 sets */
-FTR_SECTION_ELSE
-	li	r6,128			/* POWER7 has 128 sets */
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
+	lwz	r6,KVM_TLB_SETS(r9)
+	li	r0,0			/* RS for P9 version of tlbiel */
 	mtctr	r6
 	li	r7,0x800		/* IS field = 0b10 */
 	ptesync