Patchwork [-V3,25/25] powerpc: Handle hugepages in kvm

login
register
mail settings
Submitter Aneesh Kumar K.V
Date March 15, 2013, 9:40 a.m.
Message ID <1363340407-22619-26-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/227939/
State Superseded, archived
Headers show

Comments

Aneesh Kumar K.V - March 15, 2013, 9:40 a.m.
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

We could possibly avoid some of these changes because most of the HUGE PMD bits
map to PTE bits.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kvm_book3s_64.h |   31 ++++++++++++
 arch/powerpc/kvm/book3s_64_mmu_hv.c      |   12 ++++-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c      |   75 ++++++++++++++++++++++--------
 3 files changed, 97 insertions(+), 21 deletions(-)

Patch

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 38bec1d..1c5c799 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -110,6 +110,7 @@  static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 	return rb;
 }
 
+/* FIXME !! should we use hpte_actual_psize or hpte decode ? */
 static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
 {
 	/* only handle 4k, 64k and 16M pages for now */
@@ -189,6 +190,36 @@  static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing)
 	return pte;
 }
 
+/*
+ * Lock and read a linux hugepage PMD.  If it's present and writable, atomically
+ * set dirty and referenced bits and return the PMD, otherwise return 0.
+ */
+static inline pmd_t kvmppc_read_update_linux_hugepmd(pmd_t *p, int writing)
+{
+	pmd_t pmd, tmp;
+
+	/* wait until _PAGE_BUSY is clear then set it atomically */
+	__asm__ __volatile__ (
+		"1:	ldarx	%0,0,%3\n"
+		"	andi.	%1,%0,%4\n"
+		"	bne-	1b\n"
+		"	ori	%1,%0,%4\n"
+		"	stdcx.	%1,0,%3\n"
+		"	bne-	1b"
+		: "=&r" (pmd), "=&r" (tmp), "=m" (*p)
+		: "r" (p), "i" (PMD_HUGE_BUSY)
+		: "cc");
+
+	if (pmd_large(pmd)) {
+		pmd = pmd_mkyoung(pmd);
+		if (writing && pmd_write(pmd))
+			pmd = pte_mkdirty(pmd);
+	}
+
+	*p = pmd;	/* clears PMD_HUGE_BUSY */
+	return pmd;
+}
+
 /* Return HPTE cache control bits corresponding to Linux pte bits */
 static inline unsigned long hpte_cache_bits(unsigned long pte_val)
 {
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 4f2a7dc..da006da 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -675,6 +675,7 @@  int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		}
 		/* if the guest wants write access, see if that is OK */
 		if (!writing && hpte_is_writable(r)) {
+			int hugepage;
 			pte_t *ptep, pte;
 
 			/*
@@ -683,11 +684,18 @@  int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			 */
 			rcu_read_lock_sched();
 			ptep = find_linux_pte_or_hugepte(current->mm->pgd,
-							 hva, NULL, NULL);
-			if (ptep && pte_present(*ptep)) {
+							 hva, NULL, &hugepage);
+			if (!hugepage && ptep && pte_present(*ptep)) {
 				pte = kvmppc_read_update_linux_pte(ptep, 1);
 				if (pte_write(pte))
 					write_ok = 1;
+			} else if (hugepage && ptep) {
+				pmd_t pmd = *(pmd_t *)ptep;
+				if (pmd_large(pmd)) {
+					pmd = kvmppc_read_update_linux_hugepmd((pmd_t *)ptep, 1);
+					if (pmd_write(pmd))
+						write_ok = 1;
+				}
 			}
 			rcu_read_unlock_sched();
 		}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 7c8e1ed..e9d4e3a 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -146,24 +146,37 @@  static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 }
 
 static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
-			      int writing, unsigned long *pte_sizep)
+			      int writing, unsigned long *pte_sizep,
+			      int *hugepage)
 {
 	pte_t *ptep;
 	unsigned long ps = *pte_sizep;
 	unsigned int shift;
 
-	ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift, NULL);
+	ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift, hugepage);
 	if (!ptep)
 		return __pte(0);
-	if (shift)
-		*pte_sizep = 1ul << shift;
-	else
-		*pte_sizep = PAGE_SIZE;
+	if (*hugepage) {
+		*pte_sizep = 1ul << 24;
+	} else {
+		if (shift)
+			*pte_sizep = 1ul << shift;
+		else
+			*pte_sizep = PAGE_SIZE;
+	}
 	if (ps > *pte_sizep)
 		return __pte(0);
-	if (!pte_present(*ptep))
-		return __pte(0);
-	return kvmppc_read_update_linux_pte(ptep, writing);
+
+	if (*hugepage) {
+		pmd_t *pmdp = (pmd_t *)ptep;
+		if (!pmd_large(*pmdp))
+			return __pmd(0);
+		return kvmppc_read_update_linux_hugepmd(pmdp, writing);
+	} else {
+		if (!pte_present(*ptep))
+			return __pte(0);
+		return kvmppc_read_update_linux_pte(ptep, writing);
+	}
 }
 
 static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
@@ -239,18 +252,34 @@  long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 		pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
 		pa &= PAGE_MASK;
 	} else {
+		int hugepage;
+
 		/* Translate to host virtual address */
 		hva = __gfn_to_hva_memslot(memslot, gfn);
 
 		/* Look up the Linux PTE for the backing page */
 		pte_size = psize;
-		pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
-		if (pte_present(pte)) {
-			if (writing && !pte_write(pte))
-				/* make the actual HPTE be read-only */
-				ptel = hpte_make_readonly(ptel);
-			is_io = hpte_cache_bits(pte_val(pte));
-			pa = pte_pfn(pte) << PAGE_SHIFT;
+		pte = lookup_linux_pte(pgdir, hva, writing, &pte_size, &hugepage);
+		if (hugepage) {
+			pmd_t pmd = (pmd_t)pte;
+			if (!pmd_large(pmd)) {
+				if (writing && !pmd_write(pmd))
+					/* make the actual HPTE be read-only */
+					ptel = hpte_make_readonly(ptel);
+				/*
+				 * we support hugepage only for RAM
+				 */
+				is_io = 0;
+				pa = pmd_pfn(pmd) << PAGE_SHIFT;
+			}
+		} else {
+			if (pte_present(pte)) {
+				if (writing && !pte_write(pte))
+					/* make the actual HPTE be read-only */
+					ptel = hpte_make_readonly(ptel);
+				is_io = hpte_cache_bits(pte_val(pte));
+				pa = pte_pfn(pte) << PAGE_SHIFT;
+			}
 		}
 	}
 
@@ -645,10 +674,18 @@  long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 			gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
 			memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
 			if (memslot) {
+				int hugepage;
 				hva = __gfn_to_hva_memslot(memslot, gfn);
-				pte = lookup_linux_pte(pgdir, hva, 1, &psize);
-				if (pte_present(pte) && !pte_write(pte))
-					r = hpte_make_readonly(r);
+				pte = lookup_linux_pte(pgdir, hva, 1,
+						       &psize, &hugepage);
+				if (hugepage) {
+					pmd_t pmd = (pmd_t)pte;
+					if (pmd_large(pmd) && !pmd_write(pmd))
+						r = hpte_make_readonly(r);
+				} else {
+					if (pte_present(pte) && !pte_write(pte))
+						r = hpte_make_readonly(r);
+				}
 			}
 		}
 	}