diff mbox series

[RFC,26/32] KVM: PPC: Book3S HV: Emulate Privileged TLBIE for guest hypervisors

Message ID 1537524123-9578-27-git-send-email-paulus@ozlabs.org
State Superseded
Headers show
Series KVM: PPC: Book3S HV: Nested HV virtualization | expand

Commit Message

Paul Mackerras Sept. 21, 2018, 10:01 a.m. UTC
From: Suraj Jitindar Singh <sjitindarsingh@gmail.com>

When running a nested (L2) guest the guest (L1) hypervisor will use
hypervisor privileged tlb invalidation instructions (to manage the
partition scoped page tables) which will result in hypervisor
emulation assistance interrupts. We emulate these instructions on behalf
of the L1 guest.

The tlbie instruction can invalidate different scopes:

Invalidate TLB for a given target address:
- This invalidates a single L2 -> L1 pte
- We need to invalidate any L2 -> L0 shadow_pgtable ptes which map the L2
  address space which is being invalidated. This is because a single
  L2 -> L1 pte may have been mapped with more than one pte in the
  L2 -> L0 page tables.

Invalidate the entire TLB for a given LPID or for all LPIDs:
- Invalidate the entire shadow_pgtable for a given nested guest, or
  for all nested guests.

Invalidate the PWC (page walk cache) for a given LPID or for all LPIDs:
- We don't cache the PWC, so nothing to do

Invalidate the entire TLB, PWC and partition table for a given/all LPIDs:
- Here we free the entire nest state since it will all need to be
  reinitialised anyway.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |  12 ++
 arch/powerpc/include/asm/ppc-opcode.h         |   1 +
 arch/powerpc/kvm/book3s_emulate.c             |   1 -
 arch/powerpc/kvm/book3s_hv_nested.c           | 198 +++++++++++++++++++++++++-
 4 files changed, 209 insertions(+), 3 deletions(-)

Comments

David Gibson Sept. 27, 2018, 4:12 a.m. UTC | #1
On Fri, Sep 21, 2018 at 08:01:57PM +1000, Paul Mackerras wrote:
> From: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
> 
> When running a nested (L2) guest the guest (L1) hypervisor will use
> hypervisor privileged tlb invalidation instructions (to manage the
> partition scoped page tables) which will result in hypervisor
> emulation assistance interrupts. We emulate these instructions on behalf
> of the L1 guest.
> 
> The tlbie instruction can invalidate different scopes:
> 
> Invalidate TLB for a given target address:
> - This invalidates a single L2 -> L1 pte
> - We need to invalidate any L2 -> L0 shadow_pgtable ptes which map the L2
>   address space which is being invalidated. This is because a single
>   L2 -> L1 pte may have been mapped with more than one pte in the
>   L2 -> L0 page tables.
> 
> Invalidate the entire TLB for a given LPID or for all LPIDs:
> - Invalidate the entire shadow_pgtable for a given nested guest, or
>   for all nested guests.
> 
> Invalidate the PWC (page walk cache) for a given LPID or for all LPIDs:
> - We don't cache the PWC, so nothing to do
> 
> Invalidate the entire TLB, PWC and partition table for a given/all LPIDs:
> - Here we free the entire nest state since it will all need to be
>   reinitialised anyway.
> 
> Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  arch/powerpc/include/asm/book3s/64/mmu-hash.h |  12 ++
>  arch/powerpc/include/asm/ppc-opcode.h         |   1 +
>  arch/powerpc/kvm/book3s_emulate.c             |   1 -
>  arch/powerpc/kvm/book3s_hv_nested.c           | 198 +++++++++++++++++++++++++-
>  4 files changed, 209 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> index b3520b5..66db23e 100644
> --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> @@ -203,6 +203,18 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
>  	BUG();
>  }
>  
> +static inline unsigned int ap_to_shift(unsigned long ap)
> +{
> +	int psize;
> +
> +	for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
> +		if (mmu_psize_defs[psize].ap == ap)
> +			return mmu_psize_defs[psize].shift;
> +	}
> +
> +	return -1;
> +}
> +
>  static inline unsigned long get_sllp_encoding(int psize)
>  {
>  	unsigned long sllp;
> diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
> index 665af14..6093bc8 100644
> --- a/arch/powerpc/include/asm/ppc-opcode.h
> +++ b/arch/powerpc/include/asm/ppc-opcode.h
> @@ -104,6 +104,7 @@
>  #define OP_31_XOP_LHZUX     311
>  #define OP_31_XOP_MSGSNDP   142
>  #define OP_31_XOP_MSGCLRP   174
> +#define OP_31_XOP_TLBIE     306
>  #define OP_31_XOP_MFSPR     339
>  #define OP_31_XOP_LWAX      341
>  #define OP_31_XOP_LHAX      343
> diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
> index 2654df2..8c7e933 100644
> --- a/arch/powerpc/kvm/book3s_emulate.c
> +++ b/arch/powerpc/kvm/book3s_emulate.c
> @@ -36,7 +36,6 @@
>  #define OP_31_XOP_MTSR		210
>  #define OP_31_XOP_MTSRIN	242
>  #define OP_31_XOP_TLBIEL	274
> -#define OP_31_XOP_TLBIE		306
>  /* Opcode is officially reserved, reuse it as sc 1 when sc 1 doesn't trap */
>  #define OP_31_XOP_FAKE_SC1	308
>  #define OP_31_XOP_SLBMTE	402
> diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
> index 9a50feb..84c82a2 100644
> --- a/arch/powerpc/kvm/book3s_hv_nested.c
> +++ b/arch/powerpc/kvm/book3s_hv_nested.c
> @@ -457,7 +457,7 @@ void kvmhv_release_all_nested(struct kvm *kvm)
>  }
>  
>  /* caller must hold gp->tlb_lock */
> -void kvmhv_flush_nested(struct kvm_nested_guest *gp)
> +static void kvmhv_flush_nested(struct kvm_nested_guest *gp)
>  {
>  	struct kvm *kvm = gp->parent;
>  
> @@ -650,10 +650,204 @@ static int kvmhv_emulate_priv_mfspr(struct kvm_run *run, struct kvm_vcpu *vcpu,
>  	return EMULATE_FAIL;
>  }
>  
> +static inline int get_ric(unsigned int instr)
> +{
> +	return (instr >> 18) & 0x3;
> +}
> +
> +static inline int get_prs(unsigned int instr)
> +{
> +	return (instr >> 17) & 0x1;
> +}
> +
> +static inline int get_r(unsigned int instr)
> +{
> +	return (instr >> 16) & 0x1;
> +}
> +
> +static inline int get_lpid(unsigned long r_val)
> +{
> +	return r_val & 0xffffffff;
> +}
> +
> +static inline int get_is(unsigned long r_val)
> +{
> +	return (r_val >> 10) & 0x3;
> +}
> +
> +static inline int get_ap(unsigned long r_val)
> +{
> +	return (r_val >> 5) & 0x7;
> +}
> +
> +static inline long get_epn(unsigned long r_val)
> +{
> +	return r_val >> 12;
> +}
> +
> +static int kvmhv_emulate_tlbie_tlb_addr(struct kvm_vcpu *vcpu, int lpid,
> +					int ap, long epn)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	struct kvm_nested_guest *gp;
> +	long npages;
> +	int shift;
> +	unsigned long addr;
> +
> +	shift = ap_to_shift(ap);
> +	addr = epn << 12;
> +	if (shift < 0) {
> +		/* Invalid ap encoding */
> +		kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
> +		return EMULATE_AGAIN;
> +	}
> +
> +	addr &= ~((1UL << shift) - 1);
> +	npages = 1UL << (shift - PAGE_SHIFT);
> +
> +	gp = kvmhv_get_nested(kvm, lpid, false);
> +	if (!gp) /* No such guest -> nothing to do */
> +		return EMULATE_DONE;
> +	mutex_lock(&gp->tlb_lock);
> +
> +	/* There may be more than one host page backing this single guest pte */
> +	do {
> +		kvmhv_invalidate_shadow_pte(vcpu, gp, addr, &shift);
> +
> +		npages -= 1UL << (shift - PAGE_SHIFT);
> +		addr += 1UL << shift;
> +	} while (npages > 0);
> +
> +	mutex_unlock(&gp->tlb_lock);
> +	kvmhv_put_nested(gp);
> +	return EMULATE_DONE;
> +}
> +
> +static void kvmhv_emulate_tlbie_lpid(struct kvm_vcpu *vcpu,
> +				     struct kvm_nested_guest *gp, int ric)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +
> +	mutex_lock(&gp->tlb_lock);
> +	switch (ric) {
> +	case 0:
> +		/* Invalidate TLB */
> +		spin_lock(&kvm->mmu_lock);
> +		kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
> +					  gp->shadow_lpid);
> +		radix__flush_tlb_lpid(gp->shadow_lpid);
> +		spin_unlock(&kvm->mmu_lock);
> +		break;
> +	case 1:
> +		/*
> +		 * Invalidate PWC
> +		 * We don't cache this -> nothing to do
> +		 */
> +		break;
> +	case 2:
> +		/* Invalidate TLB, PWC and caching of partition table entries */
> +		kvmhv_flush_nested(gp);
> +		break;
> +	default:
> +		break;
> +	}
> +	mutex_unlock(&gp->tlb_lock);
> +}
> +
> +static int kvmhv_emulate_tlbie_all_lpid(struct kvm_vcpu *vcpu, int ric)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	struct kvm_nested_guest *gp;
> +	int i, ret = EMULATE_DONE;
> +
> +	spin_lock(&kvm->mmu_lock);
> +	for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
> +		gp = kvm->arch.nested_guests[i];
> +		if (gp) {
> +			spin_unlock(&kvm->mmu_lock);
> +			kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
> +			spin_lock(&kvm->mmu_lock);
> +		}
> +	}
> +	spin_unlock(&kvm->mmu_lock);
> +
> +	return ret;
> +}
> +
> +static int kvmhv_emulate_priv_tlbie(struct kvm_vcpu *vcpu, unsigned int instr)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	struct kvm_nested_guest *gp;
> +	int rs, rb;
> +	int r, ric, prs, is, ap;
> +	int lpid;
> +	long epn;
> +	int ret = EMULATE_DONE;
> +
> +	rs = get_rs(instr);
> +	rb = get_rb(instr);
> +
> +	ric = get_ric(instr);
> +	prs = get_prs(instr);
> +	r = get_r(instr);
> +	lpid = get_lpid(kvmppc_get_gpr(vcpu, rs));
> +	is = get_is(kvmppc_get_gpr(vcpu, rb));
> +
> +	/*
> +	 * These cases are invalid and __should__ have caused a machine check
> +	 * r   != 1 -> Only radix supported
> +	 * prs == 1 -> Not HV privileged
> +	 * ric == 3 -> No clusted bombs for radix
> +	 * is  == 1 -> Partition scoped translations not associated with pid
> +	 * (!is) && (ric == 1 || ric == 2) -> Not supported by ISA
> +	 */
> +	if ((!r) || (prs) || (ric == 3) || (is == 1) ||
> +	    ((!is) && (ric == 1 || ric == 2))) {
> +		kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
> +		return EMULATE_AGAIN;
> +	}
> +
> +	switch (is) {
> +	case 0:
> +		/*
> +		 * We know ric == 0
> +		 * Invalidate TLB for a given target address
> +		 */
> +		epn = get_epn(kvmppc_get_gpr(vcpu, rb));
> +		ap = get_ap(kvmppc_get_gpr(vcpu, rb));
> +		ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap, epn);
> +		break;
> +	case 2:
> +		/* Invalidate matching LPID */
> +		gp = kvmhv_get_nested(kvm, lpid, false);
> +		if (gp) {
> +			kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
> +			kvmhv_put_nested(gp);
> +		}
> +		ret = EMULATE_DONE;
> +		break;
> +	case 3:
> +		/* Invalidate ALL LPIDs */
> +		ret = kvmhv_emulate_tlbie_all_lpid(vcpu, ric);
> +		break;
> +	default:
> +		kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
> +		ret = EMULATE_AGAIN;
> +		break;
> +	}
> +
> +	return ret;
> +}
> +
>  static int kvmhv_emulate_priv_op_31(struct kvm_run *run, struct kvm_vcpu *vcpu,
>  				    unsigned int instr)
>  {
> -	return EMULATE_FAIL;
> +	switch (get_xop(instr)) {
> +	case OP_31_XOP_TLBIE:
> +		return kvmhv_emulate_priv_tlbie(vcpu, instr);
> +	default:
> +		return EMULATE_FAIL;
> +	}
>  }
>  
>  static int kvmhv_emulate_priv_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index b3520b5..66db23e 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -203,6 +203,18 @@  static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
 	BUG();
 }
 
+static inline unsigned int ap_to_shift(unsigned long ap)
+{
+	int psize;
+
+	for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+		if (mmu_psize_defs[psize].ap == ap)
+			return mmu_psize_defs[psize].shift;
+	}
+
+	return -1;
+}
+
 static inline unsigned long get_sllp_encoding(int psize)
 {
 	unsigned long sllp;
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 665af14..6093bc8 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -104,6 +104,7 @@ 
 #define OP_31_XOP_LHZUX     311
 #define OP_31_XOP_MSGSNDP   142
 #define OP_31_XOP_MSGCLRP   174
+#define OP_31_XOP_TLBIE     306
 #define OP_31_XOP_MFSPR     339
 #define OP_31_XOP_LWAX      341
 #define OP_31_XOP_LHAX      343
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 2654df2..8c7e933 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -36,7 +36,6 @@ 
 #define OP_31_XOP_MTSR		210
 #define OP_31_XOP_MTSRIN	242
 #define OP_31_XOP_TLBIEL	274
-#define OP_31_XOP_TLBIE		306
 /* Opcode is officially reserved, reuse it as sc 1 when sc 1 doesn't trap */
 #define OP_31_XOP_FAKE_SC1	308
 #define OP_31_XOP_SLBMTE	402
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 9a50feb..84c82a2 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -457,7 +457,7 @@  void kvmhv_release_all_nested(struct kvm *kvm)
 }
 
 /* caller must hold gp->tlb_lock */
-void kvmhv_flush_nested(struct kvm_nested_guest *gp)
+static void kvmhv_flush_nested(struct kvm_nested_guest *gp)
 {
 	struct kvm *kvm = gp->parent;
 
@@ -650,10 +650,204 @@  static int kvmhv_emulate_priv_mfspr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	return EMULATE_FAIL;
 }
 
+static inline int get_ric(unsigned int instr)
+{
+	return (instr >> 18) & 0x3;
+}
+
+static inline int get_prs(unsigned int instr)
+{
+	return (instr >> 17) & 0x1;
+}
+
+static inline int get_r(unsigned int instr)
+{
+	return (instr >> 16) & 0x1;
+}
+
+static inline int get_lpid(unsigned long r_val)
+{
+	return r_val & 0xffffffff;
+}
+
+static inline int get_is(unsigned long r_val)
+{
+	return (r_val >> 10) & 0x3;
+}
+
+static inline int get_ap(unsigned long r_val)
+{
+	return (r_val >> 5) & 0x7;
+}
+
+static inline long get_epn(unsigned long r_val)
+{
+	return r_val >> 12;
+}
+
+static int kvmhv_emulate_tlbie_tlb_addr(struct kvm_vcpu *vcpu, int lpid,
+					int ap, long epn)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_nested_guest *gp;
+	long npages;
+	int shift;
+	unsigned long addr;
+
+	shift = ap_to_shift(ap);
+	addr = epn << 12;
+	if (shift < 0) {
+		/* Invalid ap encoding */
+		kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+		return EMULATE_AGAIN;
+	}
+
+	addr &= ~((1UL << shift) - 1);
+	npages = 1UL << (shift - PAGE_SHIFT);
+
+	gp = kvmhv_get_nested(kvm, lpid, false);
+	if (!gp) /* No such guest -> nothing to do */
+		return EMULATE_DONE;
+	mutex_lock(&gp->tlb_lock);
+
+	/* There may be more than one host page backing this single guest pte */
+	do {
+		kvmhv_invalidate_shadow_pte(vcpu, gp, addr, &shift);
+
+		npages -= 1UL << (shift - PAGE_SHIFT);
+		addr += 1UL << shift;
+	} while (npages > 0);
+
+	mutex_unlock(&gp->tlb_lock);
+	kvmhv_put_nested(gp);
+	return EMULATE_DONE;
+}
+
+static void kvmhv_emulate_tlbie_lpid(struct kvm_vcpu *vcpu,
+				     struct kvm_nested_guest *gp, int ric)
+{
+	struct kvm *kvm = vcpu->kvm;
+
+	mutex_lock(&gp->tlb_lock);
+	switch (ric) {
+	case 0:
+		/* Invalidate TLB */
+		spin_lock(&kvm->mmu_lock);
+		kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
+					  gp->shadow_lpid);
+		radix__flush_tlb_lpid(gp->shadow_lpid);
+		spin_unlock(&kvm->mmu_lock);
+		break;
+	case 1:
+		/*
+		 * Invalidate PWC
+		 * We don't cache this -> nothing to do
+		 */
+		break;
+	case 2:
+		/* Invalidate TLB, PWC and caching of partition table entries */
+		kvmhv_flush_nested(gp);
+		break;
+	default:
+		break;
+	}
+	mutex_unlock(&gp->tlb_lock);
+}
+
+static int kvmhv_emulate_tlbie_all_lpid(struct kvm_vcpu *vcpu, int ric)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_nested_guest *gp;
+	int i, ret = EMULATE_DONE;
+
+	spin_lock(&kvm->mmu_lock);
+	for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
+		gp = kvm->arch.nested_guests[i];
+		if (gp) {
+			spin_unlock(&kvm->mmu_lock);
+			kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
+			spin_lock(&kvm->mmu_lock);
+		}
+	}
+	spin_unlock(&kvm->mmu_lock);
+
+	return ret;
+}
+
+static int kvmhv_emulate_priv_tlbie(struct kvm_vcpu *vcpu, unsigned int instr)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_nested_guest *gp;
+	int rs, rb;
+	int r, ric, prs, is, ap;
+	int lpid;
+	long epn;
+	int ret = EMULATE_DONE;
+
+	rs = get_rs(instr);
+	rb = get_rb(instr);
+
+	ric = get_ric(instr);
+	prs = get_prs(instr);
+	r = get_r(instr);
+	lpid = get_lpid(kvmppc_get_gpr(vcpu, rs));
+	is = get_is(kvmppc_get_gpr(vcpu, rb));
+
+	/*
+	 * These cases are invalid and __should__ have caused a machine check
+	 * r   != 1 -> Only radix supported
+	 * prs == 1 -> Not HV privileged
+	 * ric == 3 -> No clusted bombs for radix
+	 * is  == 1 -> Partition scoped translations not associated with pid
+	 * (!is) && (ric == 1 || ric == 2) -> Not supported by ISA
+	 */
+	if ((!r) || (prs) || (ric == 3) || (is == 1) ||
+	    ((!is) && (ric == 1 || ric == 2))) {
+		kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+		return EMULATE_AGAIN;
+	}
+
+	switch (is) {
+	case 0:
+		/*
+		 * We know ric == 0
+		 * Invalidate TLB for a given target address
+		 */
+		epn = get_epn(kvmppc_get_gpr(vcpu, rb));
+		ap = get_ap(kvmppc_get_gpr(vcpu, rb));
+		ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap, epn);
+		break;
+	case 2:
+		/* Invalidate matching LPID */
+		gp = kvmhv_get_nested(kvm, lpid, false);
+		if (gp) {
+			kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
+			kvmhv_put_nested(gp);
+		}
+		ret = EMULATE_DONE;
+		break;
+	case 3:
+		/* Invalidate ALL LPIDs */
+		ret = kvmhv_emulate_tlbie_all_lpid(vcpu, ric);
+		break;
+	default:
+		kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+		ret = EMULATE_AGAIN;
+		break;
+	}
+
+	return ret;
+}
+
 static int kvmhv_emulate_priv_op_31(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				    unsigned int instr)
 {
-	return EMULATE_FAIL;
+	switch (get_xop(instr)) {
+	case OP_31_XOP_TLBIE:
+		return kvmhv_emulate_priv_tlbie(vcpu, instr);
+	default:
+		return EMULATE_FAIL;
+	}
 }
 
 static int kvmhv_emulate_priv_op(struct kvm_run *run, struct kvm_vcpu *vcpu,