Patchwork [1/4] KVM: PPC: Book3S HV: Restructure HPT entry creation code

login
register
mail settings
Submitter Paul Mackerras
Date Nov. 14, 2012, 4:31 a.m.
Message ID <20121114043132.GB13832@drongo>
Download mbox | patch
Permalink /patch/198826/
State New
Headers show

Comments

Paul Mackerras - Nov. 14, 2012, 4:31 a.m.
This restructures the code that creates HPT (hashed page table)
entries so that it can be called in situations where we don't have a
struct vcpu pointer, only a struct kvm pointer.  It also fixes a bug
where kvmppc_map_vrma() would corrupt the guest R4 value.

Most of the work of kvmppc_virtmode_h_enter is now done by a new
function, kvmppc_virtmode_do_h_enter, which itself calls another new
function, kvmppc_do_h_enter, which contains most of the old
kvmppc_h_enter.  The new kvmppc_do_h_enter takes explicit arguments
for the place to return the HPTE index, the Linux page tables to use,
and whether it is being called in real mode, thus removing the need
for it to have the vcpu as an argument.

Currently kvmppc_map_vrma creates the VRMA (virtual real mode area)
HPTEs by calling kvmppc_virtmode_h_enter, which is designed primarily
to handle H_ENTER hcalls from the guest that need to pin a page of
memory.  Since H_ENTER returns the index of the created HPTE in R4,
kvmppc_virtmode_h_enter updates the guest R4, corrupting the guest R4
in the case when it gets called from kvmppc_map_vrma on the first
VCPU_RUN ioctl.  With this, kvmppc_map_vrma instead calls
kvmppc_virtmode_do_h_enter with the address of a dummy word as the
place to store the HPTE index, thus avoiding corrupting the guest R4.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/include/asm/kvm_book3s.h |    5 +++--
 arch/powerpc/kvm/book3s_64_mmu_hv.c   |   36 +++++++++++++++++++++++----------
 arch/powerpc/kvm/book3s_hv_rm_mmu.c   |   27 ++++++++++++++++---------
 3 files changed, 45 insertions(+), 23 deletions(-)
Alexander Graf - Nov. 19, 2012, 12:54 p.m.
On 14.11.2012, at 05:31, Paul Mackerras wrote:

> This restructures the code that creates HPT (hashed page table)
> entries so that it can be called in situations where we don't have a
> struct vcpu pointer, only a struct kvm pointer.  It also fixes a bug
> where kvmppc_map_vrma() would corrupt the guest R4 value.
> 
> Most of the work of kvmppc_virtmode_h_enter is now done by a new
> function, kvmppc_virtmode_do_h_enter, which itself calls another new
> function, kvmppc_do_h_enter, which contains most of the old
> kvmppc_h_enter.  The new kvmppc_do_h_enter takes explicit arguments
> for the place to return the HPTE index, the Linux page tables to use,
> and whether it is being called in real mode, thus removing the need
> for it to have the vcpu as an argument.
> 
> Currently kvmppc_map_vrma creates the VRMA (virtual real mode area)
> HPTEs by calling kvmppc_virtmode_h_enter, which is designed primarily
> to handle H_ENTER hcalls from the guest that need to pin a page of
> memory.  Since H_ENTER returns the index of the created HPTE in R4,
> kvmppc_virtmode_h_enter updates the guest R4, corrupting the guest R4
> in the case when it gets called from kvmppc_map_vrma on the first
> VCPU_RUN ioctl.  With this, kvmppc_map_vrma instead calls
> kvmppc_virtmode_do_h_enter with the address of a dummy word as the
> place to store the HPTE index, thus avoiding corrupting the guest R4.
> 
> Signed-off-by: Paul Mackerras <paulus@samba.org>

Thanks, applied to kvm-ppc-next.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 36fcf41..fea768f 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -157,8 +157,9 @@  extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
 extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
 extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 			long pte_index, unsigned long pteh, unsigned long ptel);
-extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
-			long pte_index, unsigned long pteh, unsigned long ptel);
+extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
+			long pte_index, unsigned long pteh, unsigned long ptel,
+			pgd_t *pgdir, bool realmode, unsigned long *idx_ret);
 extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
 			struct kvm_memory_slot *memslot, unsigned long *map);
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 2a89a36..6ee6516 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -41,6 +41,10 @@ 
 /* Power architecture requires HPT is at least 256kB */
 #define PPC_MIN_HPT_ORDER	18
 
+static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
+				long pte_index, unsigned long pteh,
+				unsigned long ptel, unsigned long *pte_idx_ret);
+
 long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 {
 	unsigned long hpt;
@@ -185,6 +189,7 @@  void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
 	unsigned long addr, hash;
 	unsigned long psize;
 	unsigned long hp0, hp1;
+	unsigned long idx_ret;
 	long ret;
 	struct kvm *kvm = vcpu->kvm;
 
@@ -216,7 +221,8 @@  void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
 		hash = (hash << 3) + 7;
 		hp_v = hp0 | ((addr >> 16) & ~0x7fUL);
 		hp_r = hp1 | addr;
-		ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r);
+		ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, hash, hp_v, hp_r,
+						 &idx_ret);
 		if (ret != H_SUCCESS) {
 			pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
 			       addr, ret);
@@ -354,15 +360,10 @@  static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
 	return err;
 }
 
-/*
- * We come here on a H_ENTER call from the guest when we are not
- * using mmu notifiers and we don't have the requested page pinned
- * already.
- */
-long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
-			long pte_index, unsigned long pteh, unsigned long ptel)
+long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
+				long pte_index, unsigned long pteh,
+				unsigned long ptel, unsigned long *pte_idx_ret)
 {
-	struct kvm *kvm = vcpu->kvm;
 	unsigned long psize, gpa, gfn;
 	struct kvm_memory_slot *memslot;
 	long ret;
@@ -390,8 +391,8 @@  long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
  do_insert:
 	/* Protect linux PTE lookup from page table destruction */
 	rcu_read_lock_sched();	/* this disables preemption too */
-	vcpu->arch.pgdir = current->mm->pgd;
-	ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel);
+	ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
+				current->mm->pgd, false, pte_idx_ret);
 	rcu_read_unlock_sched();
 	if (ret == H_TOO_HARD) {
 		/* this can't happen */
@@ -402,6 +403,19 @@  long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 
 }
 
+/*
+ * We come here on a H_ENTER call from the guest when we are not
+ * using mmu notifiers and we don't have the requested page pinned
+ * already.
+ */
+long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+			     long pte_index, unsigned long pteh,
+			     unsigned long ptel)
+{
+	return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index,
+					  pteh, ptel, &vcpu->arch.gpr[4]);
+}
+
 static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
 							 gva_t eaddr)
 {
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 5e06e31..362dffe 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -103,14 +103,14 @@  static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 	unlock_rmap(rmap);
 }
 
-static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva,
+static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
 			      int writing, unsigned long *pte_sizep)
 {
 	pte_t *ptep;
 	unsigned long ps = *pte_sizep;
 	unsigned int shift;
 
-	ptep = find_linux_pte_or_hugepte(vcpu->arch.pgdir, hva, &shift);
+	ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
 	if (!ptep)
 		return __pte(0);
 	if (shift)
@@ -130,10 +130,10 @@  static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
 	hpte[0] = hpte_v;
 }
 
-long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
-		    long pte_index, unsigned long pteh, unsigned long ptel)
+long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
+		       long pte_index, unsigned long pteh, unsigned long ptel,
+		       pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
 {
-	struct kvm *kvm = vcpu->kvm;
 	unsigned long i, pa, gpa, gfn, psize;
 	unsigned long slot_fn, hva;
 	unsigned long *hpte;
@@ -147,7 +147,6 @@  long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 	unsigned int writing;
 	unsigned long mmu_seq;
 	unsigned long rcbits;
-	bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
 
 	psize = hpte_page_size(pteh, ptel);
 	if (!psize)
@@ -201,7 +200,7 @@  long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 
 		/* Look up the Linux PTE for the backing page */
 		pte_size = psize;
-		pte = lookup_linux_pte(vcpu, hva, writing, &pte_size);
+		pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
 		if (pte_present(pte)) {
 			if (writing && !pte_write(pte))
 				/* make the actual HPTE be read-only */
@@ -210,6 +209,7 @@  long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 			pa = pte_pfn(pte) << PAGE_SHIFT;
 		}
 	}
+
 	if (pte_size < psize)
 		return H_PARAMETER;
 	if (pa && pte_size > psize)
@@ -297,7 +297,7 @@  long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 		lock_rmap(rmap);
 		/* Check for pending invalidations under the rmap chain lock */
 		if (kvm->arch.using_mmu_notifiers &&
-		    mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
+		    mmu_notifier_retry(kvm, mmu_seq)) {
 			/* inval in progress, write a non-present HPTE */
 			pteh |= HPTE_V_ABSENT;
 			pteh &= ~HPTE_V_VALID;
@@ -318,10 +318,17 @@  long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 	hpte[0] = pteh;
 	asm volatile("ptesync" : : : "memory");
 
-	vcpu->arch.gpr[4] = pte_index;
+	*pte_idx_ret = pte_index;
 	return H_SUCCESS;
 }
-EXPORT_SYMBOL_GPL(kvmppc_h_enter);
+EXPORT_SYMBOL_GPL(kvmppc_do_h_enter);
+
+long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+		    long pte_index, unsigned long pteh, unsigned long ptel)
+{
+	return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel,
+				 vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]);
+}
 
 #define LOCK_TOKEN	(*(u32 *)(&get_paca()->lock_token))