diff mbox

KVM: PPC: BOOK3S: HV: Use base page size when comparing against slb value

Message ID 1402644190-15604-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com
State New, archived
Headers show

Commit Message

Aneesh Kumar K.V June 13, 2014, 7:23 a.m. UTC
With guest supporting Multiple page size per segment (MPSS),
hpte_page_size returns actual page size used. Add a new function to
return base page size and use that to compare against the the page size
calculated from SLB

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kvm_book3s_64.h | 19 +++++++++++++++++--
 arch/powerpc/kvm/book3s_64_mmu_hv.c      |  2 +-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c      |  2 +-
 3 files changed, 19 insertions(+), 4 deletions(-)

Comments

Alexander Graf June 13, 2014, 10:03 a.m. UTC | #1
On 13.06.14 09:23, Aneesh Kumar K.V wrote:
> With guest supporting Multiple page size per segment (MPSS),
> hpte_page_size returns actual page size used. Add a new function to
> return base page size and use that to compare against the the page size
> calculated from SLB

Why? What does this fix? Is this a bug fix, an enhancement? Don't 
describe only what you do, but also why you do it.


Alex

>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>   arch/powerpc/include/asm/kvm_book3s_64.h | 19 +++++++++++++++++--
>   arch/powerpc/kvm/book3s_64_mmu_hv.c      |  2 +-
>   arch/powerpc/kvm/book3s_hv_rm_mmu.c      |  2 +-
>   3 files changed, 19 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
> index 34422be566ce..3d0f3fb9c6b6 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_64.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_64.h
> @@ -202,8 +202,10 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
>   	return rb;
>   }
>   
> -static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
> +static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l,
> +					     bool is_base_size)
>   {
> +
>   	int size, a_psize;
>   	/* Look at the 8 bit LP value */
>   	unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
> @@ -218,14 +220,27 @@ static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
>   				continue;
>   
>   			a_psize = __hpte_actual_psize(lp, size);
> -			if (a_psize != -1)
> +			if (a_psize != -1) {
> +				if (is_base_size)
> +					return 1ul << mmu_psize_defs[size].shift;
>   				return 1ul << mmu_psize_defs[a_psize].shift;
> +			}
>   		}
>   
>   	}
>   	return 0;
>   }
>   
> +static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
> +{
> +	return __hpte_page_size(h, l, 0);
> +}
> +
> +static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l)
> +{
> +	return __hpte_page_size(h, l, 1);
> +}
> +
>   static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
>   {
>   	return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index f53cf2eae36a..7ff45ed27c65 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -1567,7 +1567,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
>   				goto out;
>   			}
>   			if (!rma_setup && is_vrma_hpte(v)) {
> -				unsigned long psize = hpte_page_size(v, r);
> +				unsigned long psize = hpte_base_page_size(v, r);
>   				unsigned long senc = slb_pgsize_encoding(psize);
>   				unsigned long lpcr;
>   
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> index 87624ab5ba82..c6aca75b8376 100644
> --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> @@ -839,7 +839,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
>   			 * to check against the actual page size.
>   			 */
>   			if ((v & valid) && (v & mask) == val &&
> -			    hpte_page_size(v, r) == (1ul << pshift))
> +			    hpte_base_page_size(v, r) == (1ul << pshift))
>   				/* Return with the HPTE still locked */
>   				return (hash << 3) + (i >> 1);
>   

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Aneesh Kumar K.V June 13, 2014, 2:28 p.m. UTC | #2
Alexander Graf <agraf@suse.de> writes:

> On 13.06.14 09:23, Aneesh Kumar K.V wrote:
>> With guest supporting Multiple page size per segment (MPSS),
>> hpte_page_size returns actual page size used. Add a new function to
>> return base page size and use that to compare against the the page size
>> calculated from SLB
>
> Why? What does this fix? Is this a bug fix, an enhancement? Don't 
> describe only what you do, but also why you do it.
>
>

This could result in page fault failures (unhandled page fault) because
even though we have a valid hpte entry mapping a 16MB page, since we
were comparing actual page size against page size calculated from SLB
bits kvmppc_hv_find_lock_hpte will fail and return -1. I did not observe
a failure in real and the bug was found during code audit. That could be
because with THP we have guest ram backed by hugetlbfs and we always
find the page in the host linux page table. The will result in do_h_enter always
inserting HPTE_V_VALID entry and hence we might not really end up calling
kvmppc_hv_find_lock_hpte.

-aneesh

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf June 13, 2014, 2:44 p.m. UTC | #3
On 13.06.14 16:28, Aneesh Kumar K.V wrote:
> Alexander Graf <agraf@suse.de> writes:
>
>> On 13.06.14 09:23, Aneesh Kumar K.V wrote:
>>> With guest supporting Multiple page size per segment (MPSS),
>>> hpte_page_size returns actual page size used. Add a new function to
>>> return base page size and use that to compare against the the page size
>>> calculated from SLB
>> Why? What does this fix? Is this a bug fix, an enhancement? Don't
>> describe only what you do, but also why you do it.
>>
>>
> This could result in page fault failures (unhandled page fault) because
> even though we have a valid hpte entry mapping a 16MB page, since we
> were comparing actual page size against page size calculated from SLB
> bits kvmppc_hv_find_lock_hpte will fail and return -1. I did not observe
> a failure in real and the bug was found during code audit. That could be
> because with THP we have guest ram backed by hugetlbfs and we always
> find the page in the host linux page table. The will result in do_h_enter always
> inserting HPTE_V_VALID entry and hence we might not really end up calling
> kvmppc_hv_find_lock_hpte.

So why do we need to override to base page size for the VRMA region? 
Also I think you want to change the comment above the line in 
find_lock_hpte you're changing.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Aneesh Kumar K.V June 13, 2014, 4:43 p.m. UTC | #4
Alexander Graf <agraf@suse.de> writes:

> On 13.06.14 16:28, Aneesh Kumar K.V wrote:
>> Alexander Graf <agraf@suse.de> writes:
>>
>>> On 13.06.14 09:23, Aneesh Kumar K.V wrote:
>>>> With guest supporting Multiple page size per segment (MPSS),
>>>> hpte_page_size returns actual page size used. Add a new function to
>>>> return base page size and use that to compare against the the page size
>>>> calculated from SLB
>>> Why? What does this fix? Is this a bug fix, an enhancement? Don't
>>> describe only what you do, but also why you do it.
>>>
>>>
>> This could result in page fault failures (unhandled page fault) because
>> even though we have a valid hpte entry mapping a 16MB page, since we
>> were comparing actual page size against page size calculated from SLB
>> bits kvmppc_hv_find_lock_hpte will fail and return -1. I did not observe
>> a failure in real and the bug was found during code audit. That could be
>> because with THP we have guest ram backed by hugetlbfs and we always
>> find the page in the host linux page table. The will result in do_h_enter always
>> inserting HPTE_V_VALID entry and hence we might not really end up calling
>> kvmppc_hv_find_lock_hpte.
>
> So why do we need to override to base page size for the VRMA region?

slb encoding should be derived based on base page size. 

> Also I think you want to change the comment above the line in 
> find_lock_hpte you're changing.
>

Will do that.

-aneesh

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 34422be566ce..3d0f3fb9c6b6 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -202,8 +202,10 @@  static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 	return rb;
 }
 
-static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
+static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l,
+					     bool is_base_size)
 {
+
 	int size, a_psize;
 	/* Look at the 8 bit LP value */
 	unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
@@ -218,14 +220,27 @@  static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
 				continue;
 
 			a_psize = __hpte_actual_psize(lp, size);
-			if (a_psize != -1)
+			if (a_psize != -1) {
+				if (is_base_size)
+					return 1ul << mmu_psize_defs[size].shift;
 				return 1ul << mmu_psize_defs[a_psize].shift;
+			}
 		}
 
 	}
 	return 0;
 }
 
+static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
+{
+	return __hpte_page_size(h, l, 0);
+}
+
+static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l)
+{
+	return __hpte_page_size(h, l, 1);
+}
+
 static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
 {
 	return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index f53cf2eae36a..7ff45ed27c65 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1567,7 +1567,7 @@  static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 				goto out;
 			}
 			if (!rma_setup && is_vrma_hpte(v)) {
-				unsigned long psize = hpte_page_size(v, r);
+				unsigned long psize = hpte_base_page_size(v, r);
 				unsigned long senc = slb_pgsize_encoding(psize);
 				unsigned long lpcr;
 
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 87624ab5ba82..c6aca75b8376 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -839,7 +839,7 @@  long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
 			 * to check against the actual page size.
 			 */
 			if ((v & valid) && (v & mask) == val &&
-			    hpte_page_size(v, r) == (1ul << pshift))
+			    hpte_base_page_size(v, r) == (1ul << pshift))
 				/* Return with the HPTE still locked */
 				return (hash << 3) + (i >> 1);