Patchwork [-V5,21/25] powerpc: Handle hugepage in perf callchain

login
register
mail settings
Submitter Aneesh Kumar K.V
Date April 4, 2013, 5:57 a.m.
Message ID <1365055083-31956-22-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/233645/
State Changes Requested, archived
Delegated to: Michael Ellerman
Headers show

Comments

Aneesh Kumar K.V - April 4, 2013, 5:57 a.m.
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/perf/callchain.c |   32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)
David Gibson - April 12, 2013, 1:34 a.m.
On Thu, Apr 04, 2013 at 11:27:59AM +0530, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  arch/powerpc/perf/callchain.c |   32 +++++++++++++++++++++-----------
>  1 file changed, 21 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
> index 578cac7..99262ce 100644
> --- a/arch/powerpc/perf/callchain.c
> +++ b/arch/powerpc/perf/callchain.c
> @@ -115,7 +115,7 @@ static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
>  {
>  	pgd_t *pgdir;
>  	pte_t *ptep, pte;
> -	unsigned shift;
> +	unsigned shift, hugepage;
>  	unsigned long addr = (unsigned long) ptr;
>  	unsigned long offset;
>  	unsigned long pfn;
> @@ -125,20 +125,30 @@ static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
>  	if (!pgdir)
>  		return -EFAULT;
>  
> -	ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift, NULL);
> +	ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift, &hugepage);

So, this patch pretty much demonstrates that your earlier patch adding
the optional hugepage argument and making the existing callers pass
NULL was broken.

Any code which calls this function and doesn't use and handle the
hugepage return value is horribly broken, so permitting the hugepage
parameter to be optional is itself broken.

I think instead you need to have an early patch that replaces
find_linux_pte_or_hugepte with a new, more abstracted interface, so
that code using it will remain correct when hugepage PMDs become
possible.

>  	if (!shift)
>  		shift = PAGE_SHIFT;
>  
> -	/* align address to page boundary */
> -	offset = addr & ((1UL << shift) - 1);
> -	addr -= offset;
> -
> -	if (ptep == NULL)
> -		return -EFAULT;
> -	pte = *ptep;
> -	if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
> +	if (!ptep)
>  		return -EFAULT;
> -	pfn = pte_pfn(pte);
> +
> +	if (hugepage) {
> +		pmd_t pmd = *(pmd_t *)ptep;
> +		shift = mmu_psize_defs[MMU_PAGE_16M].shift;
> +		offset = addr & ((1UL << shift) - 1);
> +
> +		if (!pmd_large(pmd) || !(pmd_val(pmd) & PMD_HUGE_USER))
> +			return -EFAULT;
> +		pfn = pmd_pfn(pmd);
> +	} else {
> +		offset = addr & ((1UL << shift) - 1);
> +
> +		pte = *ptep;
> +		if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
> +			return -EFAULT;
> +		pfn = pte_pfn(pte);
> +	}
> +
>  	if (!page_is_ram(pfn))
>  		return -EFAULT;
>
Aneesh Kumar K.V - April 12, 2013, 5:05 a.m.
David Gibson <dwg@au1.ibm.com> writes:

> On Thu, Apr 04, 2013 at 11:27:59AM +0530, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>> 
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
>> ---
>>  arch/powerpc/perf/callchain.c |   32 +++++++++++++++++++++-----------
>>  1 file changed, 21 insertions(+), 11 deletions(-)
>> 
>> diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
>> index 578cac7..99262ce 100644
>> --- a/arch/powerpc/perf/callchain.c
>> +++ b/arch/powerpc/perf/callchain.c
>> @@ -115,7 +115,7 @@ static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
>>  {
>>  	pgd_t *pgdir;
>>  	pte_t *ptep, pte;
>> -	unsigned shift;
>> +	unsigned shift, hugepage;
>>  	unsigned long addr = (unsigned long) ptr;
>>  	unsigned long offset;
>>  	unsigned long pfn;
>> @@ -125,20 +125,30 @@ static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
>>  	if (!pgdir)
>>  		return -EFAULT;
>>  
>> -	ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift, NULL);
>> +	ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift, &hugepage);
>
> So, this patch pretty much demonstrates that your earlier patch adding
> the optional hugepage argument and making the existing callers pass
> NULL was broken.
>
> Any code which calls this function and doesn't use and handle the
> hugepage return value is horribly broken, so permitting the hugepage
> parameter to be optional is itself broken.
>
> I think instead you need to have an early patch that replaces
> find_linux_pte_or_hugepte with a new, more abstracted interface, so
> that code using it will remain correct when hugepage PMDs become
> possible.


The entire thing could have been simple if we supported only one
hugepage size (this is what sparc ended up doing). I guess we don't want
to do that. Also we want to support 16MB and 16GB, which mean we need
hugepd for 16GB at PGD level. My goal was to keep the hugetlb related
code for both 16MB and 16GB similar and consider THP huge page in a
different bucket.

Let me look at again how best I can simplify find_linux_pte_or_hugepte

-aneehs

Patch

diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index 578cac7..99262ce 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -115,7 +115,7 @@  static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
 {
 	pgd_t *pgdir;
 	pte_t *ptep, pte;
-	unsigned shift;
+	unsigned shift, hugepage;
 	unsigned long addr = (unsigned long) ptr;
 	unsigned long offset;
 	unsigned long pfn;
@@ -125,20 +125,30 @@  static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
 	if (!pgdir)
 		return -EFAULT;
 
-	ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift, NULL);
+	ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift, &hugepage);
 	if (!shift)
 		shift = PAGE_SHIFT;
 
-	/* align address to page boundary */
-	offset = addr & ((1UL << shift) - 1);
-	addr -= offset;
-
-	if (ptep == NULL)
-		return -EFAULT;
-	pte = *ptep;
-	if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
+	if (!ptep)
 		return -EFAULT;
-	pfn = pte_pfn(pte);
+
+	if (hugepage) {
+		pmd_t pmd = *(pmd_t *)ptep;
+		shift = mmu_psize_defs[MMU_PAGE_16M].shift;
+		offset = addr & ((1UL << shift) - 1);
+
+		if (!pmd_large(pmd) || !(pmd_val(pmd) & PMD_HUGE_USER))
+			return -EFAULT;
+		pfn = pmd_pfn(pmd);
+	} else {
+		offset = addr & ((1UL << shift) - 1);
+
+		pte = *ptep;
+		if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
+			return -EFAULT;
+		pfn = pte_pfn(pte);
+	}
+
 	if (!page_is_ram(pfn))
 		return -EFAULT;