Patchwork [-V7,18/18] powerpc: Update tlbie/tlbiel as per ISA doc

login
register
mail settings
Submitter Aneesh Kumar K.V
Date April 28, 2013, 7:37 p.m.
Message ID <1367177859-7893-19-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/240281/
State Accepted, archived
Headers show

Comments

Aneesh Kumar K.V - April 28, 2013, 7:37 p.m.
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Encode the actual page correctly in tlbie/tlbiel. This make sure we handle
multiple page size segment correctly.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/hash_native_64.c | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)
David Gibson - April 30, 2013, 6:15 a.m.
On Mon, Apr 29, 2013 at 01:07:39AM +0530, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> 
> Encode the actual page correctly in tlbie/tlbiel. This make sure we handle
> multiple page size segment correctly.

As mentioned in previous comments, this commit message needs to give
much more detail about what precisely the existing implementation is
doing wrong.

> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  arch/powerpc/mm/hash_native_64.c | 32 ++++++++++++++++++++++++++++++--
>  1 file changed, 30 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
> index bb920ee..6a2aead 100644
> --- a/arch/powerpc/mm/hash_native_64.c
> +++ b/arch/powerpc/mm/hash_native_64.c
> @@ -61,7 +61,10 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
>  
>  	switch (psize) {
>  	case MMU_PAGE_4K:
> +		/* clear out bits after (52) [0....52.....63] */
> +		va &= ~((1ul << (64 - 52)) - 1);
>  		va |= ssize << 8;
> +		va |= mmu_psize_defs[apsize].sllp << 6;
>  		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
>  			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
>  			     : "memory");
> @@ -69,9 +72,20 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
>  	default:
>  		/* We need 14 to 14 + i bits of va */
>  		penc = mmu_psize_defs[psize].penc[apsize];
> -		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
> +		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
>  		va |= penc << 12;
>  		va |= ssize << 8;
> +		/* Add AVAL part */
> +		if (psize != apsize) {
> +			/*
> +			 * MPSS, 64K base page size and 16MB parge page size
> +			 * We don't need all the bits, but rest of the bits
> +			 * must be ignored by the processor.
> +			 * vpn cover upto 65 bits of va. (0...65) and we need
> +			 * 58..64 bits of va.

I can't understand what this comment is saying.  Why do we need to do
something different in the psize != apsize case?

> +			 */
> +			va |= (vpn & 0xfe);
> +		}
>  		va |= 1; /* L */
>  		asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
>  			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
> @@ -96,16 +110,30 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
>  
>  	switch (psize) {
>  	case MMU_PAGE_4K:
> +		/* clear out bits after(52) [0....52.....63] */
> +		va &= ~((1ul << (64 - 52)) - 1);
>  		va |= ssize << 8;
> +		va |= mmu_psize_defs[apsize].sllp << 6;
>  		asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
>  			     : : "r"(va) : "memory");
>  		break;
>  	default:
>  		/* We need 14 to 14 + i bits of va */
>  		penc = mmu_psize_defs[psize].penc[apsize];
> -		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
> +		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
>  		va |= penc << 12;
>  		va |= ssize << 8;
> +		/* Add AVAL part */
> +		if (psize != apsize) {
> +			/*
> +			 * MPSS, 64K base page size and 16MB parge page size
> +			 * We don't need all the bits, but rest of the bits
> +			 * must be ignored by the processor.
> +			 * vpn cover upto 65 bits of va. (0...65) and we need
> +			 * 58..64 bits of va.
> +			 */
> +			va |= (vpn & 0xfe);
> +		}
>  		va |= 1; /* L */
>  		asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
>  			     : : "r"(va) : "memory");
Aneesh Kumar K.V - April 30, 2013, 5:21 p.m.
David Gibson <dwg@au1.ibm.com> writes:

> On Mon, Apr 29, 2013 at 01:07:39AM +0530, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>> 
>> Encode the actual page correctly in tlbie/tlbiel. This make sure we handle
>> multiple page size segment correctly.
>
> As mentioned in previous comments, this commit message needs to give
> much more detail about what precisely the existing implementation is
> doing wrong.
>
>> 
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
>> ---
>>  arch/powerpc/mm/hash_native_64.c | 32 ++++++++++++++++++++++++++++++--
>>  1 file changed, 30 insertions(+), 2 deletions(-)
>> 
>> diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
>> index bb920ee..6a2aead 100644
>> --- a/arch/powerpc/mm/hash_native_64.c
>> +++ b/arch/powerpc/mm/hash_native_64.c
>> @@ -61,7 +61,10 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
>>  
>>  	switch (psize) {
>>  	case MMU_PAGE_4K:
>> +		/* clear out bits after (52) [0....52.....63] */
>> +		va &= ~((1ul << (64 - 52)) - 1);
>>  		va |= ssize << 8;
>> +		va |= mmu_psize_defs[apsize].sllp << 6;
>>  		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
>>  			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
>>  			     : "memory");
>> @@ -69,9 +72,20 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
>>  	default:
>>  		/* We need 14 to 14 + i bits of va */
>>  		penc = mmu_psize_defs[psize].penc[apsize];
>> -		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
>> +		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
>>  		va |= penc << 12;
>>  		va |= ssize << 8;
>> +		/* Add AVAL part */
>> +		if (psize != apsize) {
>> +			/*
>> +			 * MPSS, 64K base page size and 16MB parge page size
>> +			 * We don't need all the bits, but rest of the bits
>> +			 * must be ignored by the processor.
>> +			 * vpn cover upto 65 bits of va. (0...65) and we need
>> +			 * 58..64 bits of va.
>
> I can't understand what this comment is saying.  Why do we need to do
> something different in the psize != apsize case?
>
>> +			 */
>> +			va |= (vpn & 0xfe);
>> +		}

That is as per ISA doc. It says if base page size == actual page size,
(RB)56:62 must be zeros, which must be ignored by the processor.
Otherwise it should be filled with the selected bits of VA as explained above.

We only support MPSS with base page size = 64K and actual page size = 16MB.

-aneesh
David Gibson - May 1, 2013, 5:26 a.m.
On Tue, Apr 30, 2013 at 10:51:00PM +0530, Aneesh Kumar K.V wrote:
> David Gibson <dwg@au1.ibm.com> writes:
> 
> > On Mon, Apr 29, 2013 at 01:07:39AM +0530, Aneesh Kumar K.V wrote:
> >> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> >> 
> >> Encode the actual page correctly in tlbie/tlbiel. This make sure we handle
> >> multiple page size segment correctly.
> >
> > As mentioned in previous comments, this commit message needs to give
> > much more detail about what precisely the existing implementation is
> > doing wrong.
> >
> >> 
> >> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> >> ---
> >>  arch/powerpc/mm/hash_native_64.c | 32 ++++++++++++++++++++++++++++++--
> >>  1 file changed, 30 insertions(+), 2 deletions(-)
> >> 
> >> diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
> >> index bb920ee..6a2aead 100644
> >> --- a/arch/powerpc/mm/hash_native_64.c
> >> +++ b/arch/powerpc/mm/hash_native_64.c
> >> @@ -61,7 +61,10 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
> >>  
> >>  	switch (psize) {
> >>  	case MMU_PAGE_4K:
> >> +		/* clear out bits after (52) [0....52.....63] */
> >> +		va &= ~((1ul << (64 - 52)) - 1);
> >>  		va |= ssize << 8;
> >> +		va |= mmu_psize_defs[apsize].sllp << 6;
> >>  		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
> >>  			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
> >>  			     : "memory");
> >> @@ -69,9 +72,20 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
> >>  	default:
> >>  		/* We need 14 to 14 + i bits of va */
> >>  		penc = mmu_psize_defs[psize].penc[apsize];
> >> -		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
> >> +		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
> >>  		va |= penc << 12;
> >>  		va |= ssize << 8;
> >> +		/* Add AVAL part */
> >> +		if (psize != apsize) {
> >> +			/*
> >> +			 * MPSS, 64K base page size and 16MB parge page size
> >> +			 * We don't need all the bits, but rest of the bits
> >> +			 * must be ignored by the processor.
> >> +			 * vpn cover upto 65 bits of va. (0...65) and we need
> >> +			 * 58..64 bits of va.
> >
> > I can't understand what this comment is saying.  Why do we need to do
> > something different in the psize != apsize case?
> >
> >> +			 */
> >> +			va |= (vpn & 0xfe);
> >> +		}
> 
> That is as per ISA doc. It says if base page size == actual page size,
> (RB)56:62 must be zeros, which must be ignored by the processor.
> Otherwise it should be filled with the selected bits of VA as explained above.

What you've just said here makes much more sense than what's written
in the comment in the code.

> We only support MPSS with base page size = 64K and actual page size = 16MB.

Is that actually relevant to this code though?
Aneesh Kumar K.V - May 1, 2013, 7:47 a.m.
David Gibson <dwg@au1.ibm.com> writes:

> On Tue, Apr 30, 2013 at 10:51:00PM +0530, Aneesh Kumar K.V wrote:
>> David Gibson <dwg@au1.ibm.com> writes:
>> 
>> > On Mon, Apr 29, 2013 at 01:07:39AM +0530, Aneesh Kumar K.V wrote:
>> >> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>> >> 
>> >> Encode the actual page correctly in tlbie/tlbiel. This make sure we handle
>> >> multiple page size segment correctly.
>> >
>> > As mentioned in previous comments, this commit message needs to give
>> > much more detail about what precisely the existing implementation is
>> > doing wrong.
>> >
>> >> 
>> >> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
>> >> ---
>> >>  arch/powerpc/mm/hash_native_64.c | 32 ++++++++++++++++++++++++++++++--
>> >>  1 file changed, 30 insertions(+), 2 deletions(-)
>> >> 
>> >> diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
>> >> index bb920ee..6a2aead 100644
>> >> --- a/arch/powerpc/mm/hash_native_64.c
>> >> +++ b/arch/powerpc/mm/hash_native_64.c
>> >> @@ -61,7 +61,10 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
>> >>  
>> >>  	switch (psize) {
>> >>  	case MMU_PAGE_4K:
>> >> +		/* clear out bits after (52) [0....52.....63] */
>> >> +		va &= ~((1ul << (64 - 52)) - 1);
>> >>  		va |= ssize << 8;
>> >> +		va |= mmu_psize_defs[apsize].sllp << 6;
>> >>  		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
>> >>  			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
>> >>  			     : "memory");
>> >> @@ -69,9 +72,20 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
>> >>  	default:
>> >>  		/* We need 14 to 14 + i bits of va */
>> >>  		penc = mmu_psize_defs[psize].penc[apsize];
>> >> -		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
>> >> +		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
>> >>  		va |= penc << 12;
>> >>  		va |= ssize << 8;
>> >> +		/* Add AVAL part */
>> >> +		if (psize != apsize) {
>> >> +			/*
>> >> +			 * MPSS, 64K base page size and 16MB parge page size
>> >> +			 * We don't need all the bits, but rest of the bits
>> >> +			 * must be ignored by the processor.
>> >> +			 * vpn cover upto 65 bits of va. (0...65) and we need
>> >> +			 * 58..64 bits of va.
>> >
>> > I can't understand what this comment is saying.  Why do we need to do
>> > something different in the psize != apsize case?
>> >
>> >> +			 */
>> >> +			va |= (vpn & 0xfe);
>> >> +		}
>> 
>> That is as per ISA doc. It says if base page size == actual page size,
>> (RB)56:62 must be zeros, which must be ignored by the processor.
>> Otherwise it should be filled with the selected bits of VA as explained above.
>
> What you've just said here makes much more sense than what's written
> in the comment in the code.
>
>> We only support MPSS with base page size = 64K and actual page size = 16MB.
>
> Is that actually relevant to this code though?

In a way yes. The number of bits we we select out of VA depends on the
base page size and actual page size. We have a math around that
documented in ISA. Now since we support only 64K and 16MB we can make it
simpler by only selecting required bits and not making it a
function. But then it is also not relevant to the code in that ISA also
state other bits in (RB)56:62 must be zero. I wanted to capture both the
details in the comment. 

-aneesh
Simon Jeons - May 1, 2013, 7:52 a.m.
On 05/01/2013 03:47 PM, Aneesh Kumar K.V wrote:
> David Gibson <dwg@au1.ibm.com> writes:
>
>> On Tue, Apr 30, 2013 at 10:51:00PM +0530, Aneesh Kumar K.V wrote:
>>> David Gibson <dwg@au1.ibm.com> writes:
>>>
>>>> On Mon, Apr 29, 2013 at 01:07:39AM +0530, Aneesh Kumar K.V wrote:
>>>>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>>>>>
>>>>> Encode the actual page correctly in tlbie/tlbiel. This make sure we handle
>>>>> multiple page size segment correctly.
>>>> As mentioned in previous comments, this commit message needs to give
>>>> much more detail about what precisely the existing implementation is
>>>> doing wrong.
>>>>
>>>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
>>>>> ---
>>>>>   arch/powerpc/mm/hash_native_64.c | 32 ++++++++++++++++++++++++++++++--
>>>>>   1 file changed, 30 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
>>>>> index bb920ee..6a2aead 100644
>>>>> --- a/arch/powerpc/mm/hash_native_64.c
>>>>> +++ b/arch/powerpc/mm/hash_native_64.c
>>>>> @@ -61,7 +61,10 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
>>>>>   
>>>>>   	switch (psize) {
>>>>>   	case MMU_PAGE_4K:
>>>>> +		/* clear out bits after (52) [0....52.....63] */
>>>>> +		va &= ~((1ul << (64 - 52)) - 1);
>>>>>   		va |= ssize << 8;
>>>>> +		va |= mmu_psize_defs[apsize].sllp << 6;
>>>>>   		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
>>>>>   			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
>>>>>   			     : "memory");
>>>>> @@ -69,9 +72,20 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
>>>>>   	default:
>>>>>   		/* We need 14 to 14 + i bits of va */
>>>>>   		penc = mmu_psize_defs[psize].penc[apsize];
>>>>> -		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
>>>>> +		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
>>>>>   		va |= penc << 12;
>>>>>   		va |= ssize << 8;
>>>>> +		/* Add AVAL part */
>>>>> +		if (psize != apsize) {
>>>>> +			/*
>>>>> +			 * MPSS, 64K base page size and 16MB parge page size
>>>>> +			 * We don't need all the bits, but rest of the bits
>>>>> +			 * must be ignored by the processor.
>>>>> +			 * vpn cover upto 65 bits of va. (0...65) and we need
>>>>> +			 * 58..64 bits of va.
>>>> I can't understand what this comment is saying.  Why do we need to do
>>>> something different in the psize != apsize case?
>>>>
>>>>> +			 */
>>>>> +			va |= (vpn & 0xfe);
>>>>> +		}
>>> That is as per ISA doc. It says if base page size == actual page size,
>>> (RB)56:62 must be zeros, which must be ignored by the processor.
>>> Otherwise it should be filled with the selected bits of VA as explained above.
>> What you've just said here makes much more sense than what's written
>> in the comment in the code.
>>
>>> We only support MPSS with base page size = 64K and actual page size = 16MB.
>> Is that actually relevant to this code though?
> In a way yes. The number of bits we we select out of VA depends on the
> base page size and actual page size. We have a math around that
> documented in ISA. Now since we support only 64K and 16MB we can make it
> simpler by only selecting required bits and not making it a
> function. But then it is also not relevant to the code in that ISA also
> state other bits in (RB)56:62 must be zero. I wanted to capture both the
> details in the comment.

What's the benefit of ppc use hash-based page table instead of 
tree-based page table? If you said that hash is faster, could x86 change 
to it?

>
> -aneesh
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
David Gibson - May 1, 2013, 11:36 a.m.
On Wed, May 01, 2013 at 03:52:30PM +0800, Simon Jeons wrote:
> On 05/01/2013 03:47 PM, Aneesh Kumar K.V wrote:
> >David Gibson <dwg@au1.ibm.com> writes:
> >
> >>On Tue, Apr 30, 2013 at 10:51:00PM +0530, Aneesh Kumar K.V wrote:
> >>>David Gibson <dwg@au1.ibm.com> writes:
> >>>
> >>>>On Mon, Apr 29, 2013 at 01:07:39AM +0530, Aneesh Kumar K.V wrote:
> >>>>>From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> >>>>>
> >>>>>Encode the actual page correctly in tlbie/tlbiel. This make sure we handle
> >>>>>multiple page size segment correctly.
> >>>>As mentioned in previous comments, this commit message needs to give
> >>>>much more detail about what precisely the existing implementation is
> >>>>doing wrong.
> >>>>
> >>>>>Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> >>>>>---
> >>>>>  arch/powerpc/mm/hash_native_64.c | 32 ++++++++++++++++++++++++++++++--
> >>>>>  1 file changed, 30 insertions(+), 2 deletions(-)
> >>>>>
> >>>>>diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
> >>>>>index bb920ee..6a2aead 100644
> >>>>>--- a/arch/powerpc/mm/hash_native_64.c
> >>>>>+++ b/arch/powerpc/mm/hash_native_64.c
> >>>>>@@ -61,7 +61,10 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
> >>>>>  	switch (psize) {
> >>>>>  	case MMU_PAGE_4K:
> >>>>>+		/* clear out bits after (52) [0....52.....63] */
> >>>>>+		va &= ~((1ul << (64 - 52)) - 1);
> >>>>>  		va |= ssize << 8;
> >>>>>+		va |= mmu_psize_defs[apsize].sllp << 6;
> >>>>>  		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
> >>>>>  			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
> >>>>>  			     : "memory");
> >>>>>@@ -69,9 +72,20 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
> >>>>>  	default:
> >>>>>  		/* We need 14 to 14 + i bits of va */
> >>>>>  		penc = mmu_psize_defs[psize].penc[apsize];
> >>>>>-		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
> >>>>>+		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
> >>>>>  		va |= penc << 12;
> >>>>>  		va |= ssize << 8;
> >>>>>+		/* Add AVAL part */
> >>>>>+		if (psize != apsize) {
> >>>>>+			/*
> >>>>>+			 * MPSS, 64K base page size and 16MB parge page size
> >>>>>+			 * We don't need all the bits, but rest of the bits
> >>>>>+			 * must be ignored by the processor.
> >>>>>+			 * vpn cover upto 65 bits of va. (0...65) and we need
> >>>>>+			 * 58..64 bits of va.
> >>>>I can't understand what this comment is saying.  Why do we need to do
> >>>>something different in the psize != apsize case?
> >>>>
> >>>>>+			 */
> >>>>>+			va |= (vpn & 0xfe);
> >>>>>+		}
> >>>That is as per ISA doc. It says if base page size == actual page size,
> >>>(RB)56:62 must be zeros, which must be ignored by the processor.
> >>>Otherwise it should be filled with the selected bits of VA as explained above.
> >>What you've just said here makes much more sense than what's written
> >>in the comment in the code.
> >>
> >>>We only support MPSS with base page size = 64K and actual page size = 16MB.
> >>Is that actually relevant to this code though?
> >In a way yes. The number of bits we we select out of VA depends on the
> >base page size and actual page size. We have a math around that
> >documented in ISA. Now since we support only 64K and 16MB we can make it
> >simpler by only selecting required bits and not making it a
> >function. But then it is also not relevant to the code in that ISA also
> >state other bits in (RB)56:62 must be zero. I wanted to capture both the
> >details in the comment.
> 
> What's the benefit of ppc use hash-based page table instead of
> tree-based page table? If you said that hash is faster, could x86
> change to it?

Complex and debatable at best.  But in any case it's a fixed property
of the CPU, not something that can be changed in software.
David Gibson - May 2, 2013, 5:23 a.m.
On Wed, May 01, 2013 at 01:17:41PM +0530, Aneesh Kumar K.V wrote:
> David Gibson <dwg@au1.ibm.com> writes:
> 
> > On Tue, Apr 30, 2013 at 10:51:00PM +0530, Aneesh Kumar K.V wrote:
> >> David Gibson <dwg@au1.ibm.com> writes:
> >> 
> >> > On Mon, Apr 29, 2013 at 01:07:39AM +0530, Aneesh Kumar K.V wrote:
> >> >> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> >> >> 
> >> >> Encode the actual page correctly in tlbie/tlbiel. This make sure we handle
> >> >> multiple page size segment correctly.
> >> >
> >> > As mentioned in previous comments, this commit message needs to give
> >> > much more detail about what precisely the existing implementation is
> >> > doing wrong.
> >> >
> >> >> 
> >> >> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> >> >> ---
> >> >>  arch/powerpc/mm/hash_native_64.c | 32 ++++++++++++++++++++++++++++++--
> >> >>  1 file changed, 30 insertions(+), 2 deletions(-)
> >> >> 
> >> >> diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
> >> >> index bb920ee..6a2aead 100644
> >> >> --- a/arch/powerpc/mm/hash_native_64.c
> >> >> +++ b/arch/powerpc/mm/hash_native_64.c
> >> >> @@ -61,7 +61,10 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
> >> >>  
> >> >>  	switch (psize) {
> >> >>  	case MMU_PAGE_4K:
> >> >> +		/* clear out bits after (52) [0....52.....63] */
> >> >> +		va &= ~((1ul << (64 - 52)) - 1);
> >> >>  		va |= ssize << 8;
> >> >> +		va |= mmu_psize_defs[apsize].sllp << 6;
> >> >>  		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
> >> >>  			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
> >> >>  			     : "memory");
> >> >> @@ -69,9 +72,20 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
> >> >>  	default:
> >> >>  		/* We need 14 to 14 + i bits of va */
> >> >>  		penc = mmu_psize_defs[psize].penc[apsize];
> >> >> -		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
> >> >> +		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
> >> >>  		va |= penc << 12;
> >> >>  		va |= ssize << 8;
> >> >> +		/* Add AVAL part */
> >> >> +		if (psize != apsize) {
> >> >> +			/*
> >> >> +			 * MPSS, 64K base page size and 16MB parge page size
> >> >> +			 * We don't need all the bits, but rest of the bits
> >> >> +			 * must be ignored by the processor.
> >> >> +			 * vpn cover upto 65 bits of va. (0...65) and we need
> >> >> +			 * 58..64 bits of va.
> >> >
> >> > I can't understand what this comment is saying.  Why do we need to do
> >> > something different in the psize != apsize case?
> >> >
> >> >> +			 */
> >> >> +			va |= (vpn & 0xfe);
> >> >> +		}
> >> 
> >> That is as per ISA doc. It says if base page size == actual page size,
> >> (RB)56:62 must be zeros, which must be ignored by the processor.
> >> Otherwise it should be filled with the selected bits of VA as explained above.
> >
> > What you've just said here makes much more sense than what's written
> > in the comment in the code.
> >
> >> We only support MPSS with base page size = 64K and actual page size = 16MB.
> >
> > Is that actually relevant to this code though?
> 
> In a way yes. The number of bits we we select out of VA depends on the
> base page size and actual page size. We have a math around that
> documented in ISA. Now since we support only 64K and 16MB we can make it
> simpler by only selecting required bits and not making it a
> function. But then it is also not relevant to the code in that ISA also
> state other bits in (RB)56:62 must be zero. I wanted to capture both the
> details in the comment. 

Urgh, so the code assumings just that combination, but has no
assertion or check that its actually the case.  Very fragile.

Even though we don't do other MPSS combinations yet, I'd much prefer
to see the tlbie code get the encoding correct in all cases.

Patch

diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index bb920ee..6a2aead 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -61,7 +61,10 @@  static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
 
 	switch (psize) {
 	case MMU_PAGE_4K:
+		/* clear out bits after (52) [0....52.....63] */
+		va &= ~((1ul << (64 - 52)) - 1);
 		va |= ssize << 8;
+		va |= mmu_psize_defs[apsize].sllp << 6;
 		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
 			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
 			     : "memory");
@@ -69,9 +72,20 @@  static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
 	default:
 		/* We need 14 to 14 + i bits of va */
 		penc = mmu_psize_defs[psize].penc[apsize];
-		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
+		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
 		va |= penc << 12;
 		va |= ssize << 8;
+		/* Add AVAL part */
+		if (psize != apsize) {
+			/*
+			 * MPSS, 64K base page size and 16MB parge page size
+			 * We don't need all the bits, but rest of the bits
+			 * must be ignored by the processor.
+			 * vpn cover upto 65 bits of va. (0...65) and we need
+			 * 58..64 bits of va.
+			 */
+			va |= (vpn & 0xfe);
+		}
 		va |= 1; /* L */
 		asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
 			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
@@ -96,16 +110,30 @@  static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
 
 	switch (psize) {
 	case MMU_PAGE_4K:
+		/* clear out bits after(52) [0....52.....63] */
+		va &= ~((1ul << (64 - 52)) - 1);
 		va |= ssize << 8;
+		va |= mmu_psize_defs[apsize].sllp << 6;
 		asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
 			     : : "r"(va) : "memory");
 		break;
 	default:
 		/* We need 14 to 14 + i bits of va */
 		penc = mmu_psize_defs[psize].penc[apsize];
-		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
+		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
 		va |= penc << 12;
 		va |= ssize << 8;
+		/* Add AVAL part */
+		if (psize != apsize) {
+			/*
+			 * MPSS, 64K base page size and 16MB parge page size
+			 * We don't need all the bits, but rest of the bits
+			 * must be ignored by the processor.
+			 * vpn cover upto 65 bits of va. (0...65) and we need
+			 * 58..64 bits of va.
+			 */
+			va |= (vpn & 0xfe);
+		}
 		va |= 1; /* L */
 		asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
 			     : : "r"(va) : "memory");