Patchwork [1/3,v6] kvm: powerpc: keep only pte search logic in lookup_linux_pte

login
register
mail settings
Submitter Bharat Bhushan
Date Oct. 4, 2013, 2:55 p.m.
Message ID <1380898533-1444-2-git-send-email-Bharat.Bhushan@freescale.com>
Download mbox | patch
Permalink /patch/280641/
State New
Headers show

Comments

Bharat Bhushan - Oct. 4, 2013, 2:55 p.m.
lookup_linux_pte() was searching for a pte and also sets access
flags is writable. This function now searches only pte while
access flag setting is done explicitly.

This pte lookup is not kvm specific, so moved to common code (asm/pgtable.h)
My Followup patch will use this on booke.

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
---
 v5->v6
 - return NULL rather than _pte(0) as this was
   giving compilation error with STRICT_MM_TYPECHECKS
 - Also not only check for NULL pointer in caller rather than
   calling pte_present() twice

 arch/powerpc/include/asm/pgtable.h  |   24 +++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv_rm_mmu.c |   36 +++++++++++-----------------------
 2 files changed, 36 insertions(+), 24 deletions(-)
Alexander Graf - Oct. 4, 2013, 3:19 p.m.
On 04.10.2013, at 16:55, Bharat Bhushan wrote:

> lookup_linux_pte() was searching for a pte and also sets access
> flags is writable. This function now searches only pte while
> access flag setting is done explicitly.
> 
> This pte lookup is not kvm specific, so moved to common code (asm/pgtable.h)
> My Followup patch will use this on booke.
> 
> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>

Paul, please ack.


Alex

> ---
> v5->v6
> - return NULL rather than _pte(0) as this was
>   giving compilation error with STRICT_MM_TYPECHECKS
> - Also not only check for NULL pointer in caller rather than
>   calling pte_present() twice
> 
> arch/powerpc/include/asm/pgtable.h  |   24 +++++++++++++++++++++++
> arch/powerpc/kvm/book3s_hv_rm_mmu.c |   36 +++++++++++-----------------------
> 2 files changed, 36 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
> index 7d6eacf..5e41a31 100644
> --- a/arch/powerpc/include/asm/pgtable.h
> +++ b/arch/powerpc/include/asm/pgtable.h
> @@ -223,6 +223,30 @@ extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
> #endif
> pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
> 				 unsigned *shift);
> +
> +static inline pte_t *lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
> +				     unsigned long *pte_sizep)
> +{
> +	pte_t *ptep;
> +	unsigned long ps = *pte_sizep;
> +	unsigned int shift;
> +
> +	ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
> +	if (!ptep)
> +		return NULL;
> +	if (shift)
> +		*pte_sizep = 1ul << shift;
> +	else
> +		*pte_sizep = PAGE_SIZE;
> +
> +	if (ps > *pte_sizep)
> +		return NULL;
> +
> +	if (!pte_present(*ptep))
> +		return NULL;
> +
> +	return ptep;
> +}
> #endif /* __ASSEMBLY__ */
> 
> #endif /* __KERNEL__ */
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> index 45e30d6..8ab54e8 100644
> --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> @@ -134,25 +134,6 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
> 	unlock_rmap(rmap);
> }
> 
> -static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
> -			      int writing, unsigned long *pte_sizep)
> -{
> -	pte_t *ptep;
> -	unsigned long ps = *pte_sizep;
> -	unsigned int hugepage_shift;
> -
> -	ptep = find_linux_pte_or_hugepte(pgdir, hva, &hugepage_shift);
> -	if (!ptep)
> -		return __pte(0);
> -	if (hugepage_shift)
> -		*pte_sizep = 1ul << hugepage_shift;
> -	else
> -		*pte_sizep = PAGE_SIZE;
> -	if (ps > *pte_sizep)
> -		return __pte(0);
> -	return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift);
> -}
> -
> static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
> {
> 	asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
> @@ -173,6 +154,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
> 	unsigned long is_io;
> 	unsigned long *rmap;
> 	pte_t pte;
> +	pte_t *ptep;
> 	unsigned int writing;
> 	unsigned long mmu_seq;
> 	unsigned long rcbits;
> @@ -231,8 +213,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
> 
> 		/* Look up the Linux PTE for the backing page */
> 		pte_size = psize;
> -		pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
> -		if (pte_present(pte)) {
> +		ptep = lookup_linux_pte(pgdir, hva, &pte_size);
> +		if (ptep) {
> +			pte = kvmppc_read_update_linux_pte(ptep, writing);
> 			if (writing && !pte_write(pte))
> 				/* make the actual HPTE be read-only */
> 				ptel = hpte_make_readonly(ptel);
> @@ -661,15 +644,20 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
> 			struct kvm_memory_slot *memslot;
> 			pgd_t *pgdir = vcpu->arch.pgdir;
> 			pte_t pte;
> +			pte_t *ptep;
> 
> 			psize = hpte_page_size(v, r);
> 			gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
> 			memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
> 			if (memslot) {
> 				hva = __gfn_to_hva_memslot(memslot, gfn);
> -				pte = lookup_linux_pte(pgdir, hva, 1, &psize);
> -				if (pte_present(pte) && !pte_write(pte))
> -					r = hpte_make_readonly(r);
> +				ptep = lookup_linux_pte(pgdir, hva, &psize);
> +				if (ptep) {
> +					pte = kvmppc_read_update_linux_pte(ptep,
> +									   1);
> +					if (pte_present(pte) && !pte_write(pte))
> +						r = hpte_make_readonly(r);
> +				}
> 			}
> 		}
> 	}
> -- 
> 1.7.0.4
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan - Oct. 4, 2013, 3:31 p.m.
Adding Paul

> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de]
> Sent: Friday, October 04, 2013 8:49 PM
> To: Bhushan Bharat-R65777
> Cc: kvm@vger.kernel.org; kvm-ppc@vger.kernel.org; Wood Scott-B07421;
> benh@kernel.crashing.org; Bhushan Bharat-R65777
> Subject: Re: [PATCH 1/3 v6] kvm: powerpc: keep only pte search logic in
> lookup_linux_pte
> 
> 
> On 04.10.2013, at 16:55, Bharat Bhushan wrote:
> 
> > lookup_linux_pte() was searching for a pte and also sets access flags
> > is writable. This function now searches only pte while access flag
> > setting is done explicitly.
> >
> > This pte lookup is not kvm specific, so moved to common code
> > (asm/pgtable.h) My Followup patch will use this on booke.
> >
> > Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> 
> Paul, please ack.
> 
> 
> Alex
> 
> > ---
> > v5->v6
> > - return NULL rather than _pte(0) as this was
> >   giving compilation error with STRICT_MM_TYPECHECKS
> > - Also not only check for NULL pointer in caller rather than
> >   calling pte_present() twice
> >
> > arch/powerpc/include/asm/pgtable.h  |   24 +++++++++++++++++++++++
> > arch/powerpc/kvm/book3s_hv_rm_mmu.c |   36 +++++++++++-----------------------
> > 2 files changed, 36 insertions(+), 24 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/pgtable.h
> > b/arch/powerpc/include/asm/pgtable.h
> > index 7d6eacf..5e41a31 100644
> > --- a/arch/powerpc/include/asm/pgtable.h
> > +++ b/arch/powerpc/include/asm/pgtable.h
> > @@ -223,6 +223,30 @@ extern int gup_hugepte(pte_t *ptep, unsigned long
> > sz, unsigned long addr, #endif pte_t *find_linux_pte_or_hugepte(pgd_t
> > *pgdir, unsigned long ea,
> > 				 unsigned *shift);
> > +
> > +static inline pte_t *lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
> > +				     unsigned long *pte_sizep)
> > +{
> > +	pte_t *ptep;
> > +	unsigned long ps = *pte_sizep;
> > +	unsigned int shift;
> > +
> > +	ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
> > +	if (!ptep)
> > +		return NULL;
> > +	if (shift)
> > +		*pte_sizep = 1ul << shift;
> > +	else
> > +		*pte_sizep = PAGE_SIZE;
> > +
> > +	if (ps > *pte_sizep)
> > +		return NULL;
> > +
> > +	if (!pte_present(*ptep))
> > +		return NULL;
> > +
> > +	return ptep;
> > +}
> > #endif /* __ASSEMBLY__ */
> >
> > #endif /* __KERNEL__ */
> > diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > index 45e30d6..8ab54e8 100644
> > --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > @@ -134,25 +134,6 @@ static void remove_revmap_chain(struct kvm *kvm, long
> pte_index,
> > 	unlock_rmap(rmap);
> > }
> >
> > -static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
> > -			      int writing, unsigned long *pte_sizep)
> > -{
> > -	pte_t *ptep;
> > -	unsigned long ps = *pte_sizep;
> > -	unsigned int hugepage_shift;
> > -
> > -	ptep = find_linux_pte_or_hugepte(pgdir, hva, &hugepage_shift);
> > -	if (!ptep)
> > -		return __pte(0);
> > -	if (hugepage_shift)
> > -		*pte_sizep = 1ul << hugepage_shift;
> > -	else
> > -		*pte_sizep = PAGE_SIZE;
> > -	if (ps > *pte_sizep)
> > -		return __pte(0);
> > -	return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift);
> > -}
> > -
> > static inline void unlock_hpte(unsigned long *hpte, unsigned long
> > hpte_v) {
> > 	asm volatile(PPC_RELEASE_BARRIER "" : : : "memory"); @@ -173,6 +154,7
> > @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
> > 	unsigned long is_io;
> > 	unsigned long *rmap;
> > 	pte_t pte;
> > +	pte_t *ptep;
> > 	unsigned int writing;
> > 	unsigned long mmu_seq;
> > 	unsigned long rcbits;
> > @@ -231,8 +213,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned
> > long flags,
> >
> > 		/* Look up the Linux PTE for the backing page */
> > 		pte_size = psize;
> > -		pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
> > -		if (pte_present(pte)) {
> > +		ptep = lookup_linux_pte(pgdir, hva, &pte_size);
> > +		if (ptep) {
> > +			pte = kvmppc_read_update_linux_pte(ptep, writing);
> > 			if (writing && !pte_write(pte))
> > 				/* make the actual HPTE be read-only */
> > 				ptel = hpte_make_readonly(ptel);
> > @@ -661,15 +644,20 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned
> long flags,
> > 			struct kvm_memory_slot *memslot;
> > 			pgd_t *pgdir = vcpu->arch.pgdir;
> > 			pte_t pte;
> > +			pte_t *ptep;
> >
> > 			psize = hpte_page_size(v, r);
> > 			gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
> > 			memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
> > 			if (memslot) {
> > 				hva = __gfn_to_hva_memslot(memslot, gfn);
> > -				pte = lookup_linux_pte(pgdir, hva, 1, &psize);
> > -				if (pte_present(pte) && !pte_write(pte))
> > -					r = hpte_make_readonly(r);
> > +				ptep = lookup_linux_pte(pgdir, hva, &psize);
> > +				if (ptep) {
> > +					pte = kvmppc_read_update_linux_pte(ptep,
> > +									   1);
> > +					if (pte_present(pte) && !pte_write(pte))
> > +						r = hpte_make_readonly(r);
> > +				}
> > 			}
> > 		}
> > 	}
> > --
> > 1.7.0.4
> >
> >
> 


--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Mackerras - Oct. 6, 2013, 11:08 p.m.
On Fri, Oct 04, 2013 at 08:25:31PM +0530, Bharat Bhushan wrote:
> lookup_linux_pte() was searching for a pte and also sets access
> flags is writable. This function now searches only pte while
> access flag setting is done explicitly.

So in order to reduce some code duplication, you have added code
duplication in the existing callers of this function.  I'm not
convinced it's an overall win.  What's left in this function is pretty
trivial, just a call to find_linux_pte_or_hugepte() and some pagesize
computations.  I would prefer you found a way to do what you want
without adding code duplication at the existing call sites.  Maybe you
could have a new find_linux_pte_and_check_pagesize() and call that
from the existing lookup_linux_pte().

The other thing you've done, without commenting on why you have done
it, is to add a pte_present check without having looked at _PAGE_BUSY.
kvmppc_read_update_linux_pte() only checks _PAGE_PRESENT after
checking that _PAGE_BUSY is clear, so this is a semantic change, which
I think is wrong for server processors.

So, on the whole, NACK from me for this patch.

Paul.
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan - Oct. 7, 2013, 3:55 a.m.
Hi Paul,

> -----Original Message-----
> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-owner@vger.kernel.org] On
> Behalf Of Paul Mackerras
> Sent: Monday, October 07, 2013 4:39 AM
> To: Bhushan Bharat-R65777
> Cc: agraf@suse.de; kvm@vger.kernel.org; kvm-ppc@vger.kernel.org; Wood Scott-
> B07421; benh@kernel.crashing.org; Bhushan Bharat-R65777
> Subject: Re: [PATCH 1/3 v6] kvm: powerpc: keep only pte search logic in
> lookup_linux_pte
> 
> On Fri, Oct 04, 2013 at 08:25:31PM +0530, Bharat Bhushan wrote:
> > lookup_linux_pte() was searching for a pte and also sets access flags
> > is writable. This function now searches only pte while access flag
> > setting is done explicitly.


> 
> So in order to reduce some code duplication, you have added code duplication in
> the existing callers of this function.  I'm not convinced it's an overall win.

lookup_linux_pte(): as per name it is supposed to only lookup for a pte, but it is doing more than that (Also updating the pte). So I made this function to only do lookup (which also check size). I am not an MM expert but I think we can make this function better like you suggested checking pte_present() only if _PAGE_BUSY not set.

> What's left in this function is pretty trivial, just a call to
> find_linux_pte_or_hugepte() and some pagesize computations.  I would prefer you
> found a way to do what you want without adding code duplication at the existing
> call sites.

What about doing this way:
1) A function which will do the lookup for Linux pte. May be call that as lookup_linux_pte()
2) lookup + page update (what the existing function lookup_linux_pte() is doing). Will rename this function to lookup_linux_pte_and_update(), which will call above defined lookup_linux_pte()


Thanks
-Bharat

>  Maybe you could have a new find_linux_pte_and_check_pagesize() and
> call that from the existing lookup_linux_pte().
> 
> The other thing you've done, without commenting on why you have done it, is to
> add a pte_present check without having looked at _PAGE_BUSY.
> kvmppc_read_update_linux_pte() only checks _PAGE_PRESENT after checking that
> _PAGE_BUSY is clear, so this is a semantic change, which I think is wrong for
> server processors.
> 
> So, on the whole, NACK from me for this patch.
> 
> Paul.
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body
> of a message to majordomo@vger.kernel.org More majordomo info at
> http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 7d6eacf..5e41a31 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -223,6 +223,30 @@  extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 #endif
 pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
 				 unsigned *shift);
+
+static inline pte_t *lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
+				     unsigned long *pte_sizep)
+{
+	pte_t *ptep;
+	unsigned long ps = *pte_sizep;
+	unsigned int shift;
+
+	ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
+	if (!ptep)
+		return NULL;
+	if (shift)
+		*pte_sizep = 1ul << shift;
+	else
+		*pte_sizep = PAGE_SIZE;
+
+	if (ps > *pte_sizep)
+		return NULL;
+
+	if (!pte_present(*ptep))
+		return NULL;
+
+	return ptep;
+}
 #endif /* __ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 45e30d6..8ab54e8 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -134,25 +134,6 @@  static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 	unlock_rmap(rmap);
 }
 
-static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
-			      int writing, unsigned long *pte_sizep)
-{
-	pte_t *ptep;
-	unsigned long ps = *pte_sizep;
-	unsigned int hugepage_shift;
-
-	ptep = find_linux_pte_or_hugepte(pgdir, hva, &hugepage_shift);
-	if (!ptep)
-		return __pte(0);
-	if (hugepage_shift)
-		*pte_sizep = 1ul << hugepage_shift;
-	else
-		*pte_sizep = PAGE_SIZE;
-	if (ps > *pte_sizep)
-		return __pte(0);
-	return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift);
-}
-
 static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
 {
 	asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
@@ -173,6 +154,7 @@  long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	unsigned long is_io;
 	unsigned long *rmap;
 	pte_t pte;
+	pte_t *ptep;
 	unsigned int writing;
 	unsigned long mmu_seq;
 	unsigned long rcbits;
@@ -231,8 +213,9 @@  long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 
 		/* Look up the Linux PTE for the backing page */
 		pte_size = psize;
-		pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
-		if (pte_present(pte)) {
+		ptep = lookup_linux_pte(pgdir, hva, &pte_size);
+		if (ptep) {
+			pte = kvmppc_read_update_linux_pte(ptep, writing);
 			if (writing && !pte_write(pte))
 				/* make the actual HPTE be read-only */
 				ptel = hpte_make_readonly(ptel);
@@ -661,15 +644,20 @@  long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 			struct kvm_memory_slot *memslot;
 			pgd_t *pgdir = vcpu->arch.pgdir;
 			pte_t pte;
+			pte_t *ptep;
 
 			psize = hpte_page_size(v, r);
 			gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
 			memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
 			if (memslot) {
 				hva = __gfn_to_hva_memslot(memslot, gfn);
-				pte = lookup_linux_pte(pgdir, hva, 1, &psize);
-				if (pte_present(pte) && !pte_write(pte))
-					r = hpte_make_readonly(r);
+				ptep = lookup_linux_pte(pgdir, hva, &psize);
+				if (ptep) {
+					pte = kvmppc_read_update_linux_pte(ptep,
+									   1);
+					if (pte_present(pte) && !pte_write(pte))
+						r = hpte_make_readonly(r);
+				}
 			}
 		}
 	}