diff mbox

[RFC,4/9] powerpc/mm: Split huge_pte_alloc function for BOOK3S 64K

Message ID 1457525450-4262-4-git-send-email-khandual@linux.vnet.ibm.com (mailing list archive)
State RFC
Headers show

Commit Message

Anshuman Khandual March 9, 2016, 12:10 p.m. UTC
From: root <root@ltcalpine2-lp8.aus.stglabs.ibm.com>

Currently the 'huge_pte_alloc' function has two versions, one for the
BOOK3S and the other one for the BOOK3E platforms. This change splits
the BOOK3S version into two parts, one for the 4K page size based
implementation and the other one for the 64K page sized implementation.
This change is one of the prerequisites towards enabling GENERAL_HUGETLB
implementation for BOOK3S 64K based huge pages.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
 arch/powerpc/mm/hugetlbpage.c | 67 +++++++++++++++++++++++++++----------------
 1 file changed, 43 insertions(+), 24 deletions(-)

Comments

Aneesh Kumar K.V March 9, 2016, 7:55 p.m. UTC | #1
Anshuman Khandual <khandual@linux.vnet.ibm.com> writes:

> [ text/plain ]
> From: root <root@ltcalpine2-lp8.aus.stglabs.ibm.com>
>
> Currently the 'huge_pte_alloc' function has two versions, one for the
> BOOK3S and the other one for the BOOK3E platforms. This change splits
> the BOOK3S version into two parts, one for the 4K page size based
> implementation and the other one for the 64K page sized implementation.
> This change is one of the prerequisites towards enabling GENERAL_HUGETLB
> implementation for BOOK3S 64K based huge pages.

I really wish we reduce #ifdefs in C code and start splitting hash
and nonhash code out where ever we can. 

What we really want here is a book3s version and in book3s version use
powerpc specific huge_pte_alloc only if GENERAL_HUGETLB was not defined.
Don't limit it to 64k linux page size. We should select between powerpc
specific implementation and generic code using GENERAL_HUGETLB define.


>
> Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
> ---
>  arch/powerpc/mm/hugetlbpage.c | 67 +++++++++++++++++++++++++++----------------
>  1 file changed, 43 insertions(+), 24 deletions(-)
>
> diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
> index 744e24b..a49c6ae 100644
> --- a/arch/powerpc/mm/hugetlbpage.c
> +++ b/arch/powerpc/mm/hugetlbpage.c
> @@ -59,6 +59,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
>  	return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL, NULL);
>  }
>
> +#if !defined(CONFIG_PPC_64K_PAGES) || !defined(CONFIG_PPC_BOOK3S_64)
>  static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
>  			   unsigned long address, unsigned pdshift, unsigned pshift)
>  {
> @@ -117,6 +118,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
>  	spin_unlock(&mm->page_table_lock);
>  	return 0;
>  }
> +#endif /* !defined(CONFIG_PPC_64K_PAGES) || !defined(CONFIG_PPC_BOOK3S_64) */
>
>  /*
>   * These macros define how to determine which level of the page table holds
> @@ -131,6 +133,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
>  #endif
>
>  #ifdef CONFIG_PPC_BOOK3S_64
> +#ifdef CONFIG_PPC_4K_PAGES
>  /*
>   * At this point we do the placement change only for BOOK3S 64. This would
>   * possibly work on other subarchs.
> @@ -146,32 +149,23 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
>
>  	addr &= ~(sz-1);
>  	pg = pgd_offset(mm, addr);
> -
> -	if (pshift == PGDIR_SHIFT)
> -		/* 16GB huge page */
> -		return (pte_t *) pg;
> -	else if (pshift > PUD_SHIFT)
> -		/*
> -		 * We need to use hugepd table
> -		 */
> +	if (pshift > PUD_SHIFT) {
>  		hpdp = (hugepd_t *)pg;
> -	else {
> -		pdshift = PUD_SHIFT;
> -		pu = pud_alloc(mm, pg, addr);
> -		if (pshift == PUD_SHIFT)
> -			return (pte_t *)pu;
> -		else if (pshift > PMD_SHIFT)
> -			hpdp = (hugepd_t *)pu;
> -		else {
> -			pdshift = PMD_SHIFT;
> -			pm = pmd_alloc(mm, pu, addr);
> -			if (pshift == PMD_SHIFT)
> -				/* 16MB hugepage */
> -				return (pte_t *)pm;
> -			else
> -				hpdp = (hugepd_t *)pm;
> -		}
> +		goto hugepd_search;
> +	}
> +
> +	pdshift = PUD_SHIFT;
> +	pu = pud_alloc(mm, pg, addr);
> +	if (pshift > PMD_SHIFT) {
> +		hpdp = (hugepd_t *)pu;
> +		goto hugepd_search;
>  	}
> +
> +	pdshift = PMD_SHIFT;
> +	pm = pmd_alloc(mm, pu, addr);
> +	hpdp = (hugepd_t *)pm;
> +
> +hugepd_search:
>  	if (!hpdp)
>  		return NULL;
>
> @@ -184,6 +178,31 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
>  }
>
>  #else
> +pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
> +{
> +	pgd_t *pg;
> +	pud_t *pu;
> +	pmd_t *pm;
> +	unsigned pshift = __ffs(sz);
> +
> +	addr &= ~(sz-1);
> +	pg = pgd_offset(mm, addr);
> +
> +	if (pshift == PGDIR_SHIFT)	/* 16GB Huge Page */
> +		return (pte_t *)pg;
> +
> +	pu = pud_alloc(mm, pg, addr);	/* NA, skipped */
> +	if (pshift == PUD_SHIFT)
> +		return (pte_t *)pu;
> +
> +	pm = pmd_alloc(mm, pu, addr);	/* 16MB Huge Page */
> +	if (pshift == PMD_SHIFT)
> +		return (pte_t *)pm;
> +
> +	return NULL;
> +}
> +#endif
> +#else
>
>  pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
>  {
> -- 
> 2.1.0
Anshuman Khandual March 10, 2016, 5:33 a.m. UTC | #2
On 03/10/2016 01:25 AM, Aneesh Kumar K.V wrote:
> Anshuman Khandual <khandual@linux.vnet.ibm.com> writes:
> 
>> > [ text/plain ]
>> > From: root <root@ltcalpine2-lp8.aus.stglabs.ibm.com>
>> >
>> > Currently the 'huge_pte_alloc' function has two versions, one for the
>> > BOOK3S and the other one for the BOOK3E platforms. This change splits
>> > the BOOK3S version into two parts, one for the 4K page size based
>> > implementation and the other one for the 64K page sized implementation.
>> > This change is one of the prerequisites towards enabling GENERAL_HUGETLB
>> > implementation for BOOK3S 64K based huge pages.
> I really wish we reduce #ifdefs in C code and start splitting hash
> and nonhash code out where ever we can.

Okay but here we are only dealing with 64K and 4K configs inside book3s.
I guess it covers both hash and no hash implementations. Not sure if I
got it correctly.

> 
> What we really want here is a book3s version and in book3s version use
> powerpc specific huge_pte_alloc only if GENERAL_HUGETLB was not defined.

got it.

> Don't limit it to 64k linux page size. We should select between powerpc
> specific implementation and generic code using GENERAL_HUGETLB define.

Got it. will try.
diff mbox

Patch

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 744e24b..a49c6ae 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -59,6 +59,7 @@  pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 	return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL, NULL);
 }
 
+#if !defined(CONFIG_PPC_64K_PAGES) || !defined(CONFIG_PPC_BOOK3S_64)
 static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
 			   unsigned long address, unsigned pdshift, unsigned pshift)
 {
@@ -117,6 +118,7 @@  static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
 	spin_unlock(&mm->page_table_lock);
 	return 0;
 }
+#endif /* !defined(CONFIG_PPC_64K_PAGES) || !defined(CONFIG_PPC_BOOK3S_64) */
 
 /*
  * These macros define how to determine which level of the page table holds
@@ -131,6 +133,7 @@  static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
 #endif
 
 #ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_4K_PAGES
 /*
  * At this point we do the placement change only for BOOK3S 64. This would
  * possibly work on other subarchs.
@@ -146,32 +149,23 @@  pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
 
 	addr &= ~(sz-1);
 	pg = pgd_offset(mm, addr);
-
-	if (pshift == PGDIR_SHIFT)
-		/* 16GB huge page */
-		return (pte_t *) pg;
-	else if (pshift > PUD_SHIFT)
-		/*
-		 * We need to use hugepd table
-		 */
+	if (pshift > PUD_SHIFT) {
 		hpdp = (hugepd_t *)pg;
-	else {
-		pdshift = PUD_SHIFT;
-		pu = pud_alloc(mm, pg, addr);
-		if (pshift == PUD_SHIFT)
-			return (pte_t *)pu;
-		else if (pshift > PMD_SHIFT)
-			hpdp = (hugepd_t *)pu;
-		else {
-			pdshift = PMD_SHIFT;
-			pm = pmd_alloc(mm, pu, addr);
-			if (pshift == PMD_SHIFT)
-				/* 16MB hugepage */
-				return (pte_t *)pm;
-			else
-				hpdp = (hugepd_t *)pm;
-		}
+		goto hugepd_search;
+	}
+
+	pdshift = PUD_SHIFT;
+	pu = pud_alloc(mm, pg, addr);
+	if (pshift > PMD_SHIFT) {
+		hpdp = (hugepd_t *)pu;
+		goto hugepd_search;
 	}
+
+	pdshift = PMD_SHIFT;
+	pm = pmd_alloc(mm, pu, addr);
+	hpdp = (hugepd_t *)pm;
+
+hugepd_search:
 	if (!hpdp)
 		return NULL;
 
@@ -184,6 +178,31 @@  pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
 }
 
 #else
+pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
+{
+	pgd_t *pg;
+	pud_t *pu;
+	pmd_t *pm;
+	unsigned pshift = __ffs(sz);
+
+	addr &= ~(sz-1);
+	pg = pgd_offset(mm, addr);
+
+	if (pshift == PGDIR_SHIFT)	/* 16GB Huge Page */
+		return (pte_t *)pg;
+
+	pu = pud_alloc(mm, pg, addr);	/* NA, skipped */
+	if (pshift == PUD_SHIFT)
+		return (pte_t *)pu;
+
+	pm = pmd_alloc(mm, pu, addr);	/* 16MB Huge Page */
+	if (pshift == PMD_SHIFT)
+		return (pte_t *)pm;
+
+	return NULL;
+}
+#endif
+#else
 
 pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
 {