diff mbox

[02/10] mm/hugetlb: Add PGD based implementation awareness

Message ID 1460007464-26726-3-git-send-email-khandual@linux.vnet.ibm.com (mailing list archive)
State Not Applicable
Headers show

Commit Message

Anshuman Khandual April 7, 2016, 5:37 a.m. UTC
Currently the config ARCH_WANT_GENERAL_HUGETLB enabled functions like
'huge_pte_alloc' and 'huge_pte_offset' dont take into account HugeTLB
page implementation at the PGD level. This is also true for functions
like 'follow_page_mask' which is called from move_pages() system call.
This lack of PGD level huge page support prohibits some architectures
to use these generic HugeTLB functions.

This change adds the required PGD based implementation awareness and
with that, more architectures like POWER which implements 16GB pages
at the PGD level along with the 16MB pages at the PMD level can now
use ARCH_WANT_GENERAL_HUGETLB config option.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
 include/linux/hugetlb.h |  3 +++
 mm/gup.c                |  6 ++++++
 mm/hugetlb.c            | 20 ++++++++++++++++++++
 3 files changed, 29 insertions(+)

Comments

Balbir Singh April 7, 2016, 9:04 a.m. UTC | #1
On 07/04/16 15:37, Anshuman Khandual wrote:
> Currently the config ARCH_WANT_GENERAL_HUGETLB enabled functions like
> 'huge_pte_alloc' and 'huge_pte_offset' dont take into account HugeTLB
> page implementation at the PGD level. This is also true for functions
> like 'follow_page_mask' which is called from move_pages() system call.
> This lack of PGD level huge page support prohibits some architectures
> to use these generic HugeTLB functions.
> 

From what I know of move_pages(), it will always call follow_page_mask()
with FOLL_GET (I could be wrong here) and the implementation below
returns NULL for follow_huge_pgd().

> This change adds the required PGD based implementation awareness and
> with that, more architectures like POWER which implements 16GB pages
> at the PGD level along with the 16MB pages at the PMD level can now
> use ARCH_WANT_GENERAL_HUGETLB config option.
> 
> Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
> ---
>  include/linux/hugetlb.h |  3 +++
>  mm/gup.c                |  6 ++++++
>  mm/hugetlb.c            | 20 ++++++++++++++++++++
>  3 files changed, 29 insertions(+)
> 
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 7d953c2..71832e1 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -115,6 +115,8 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
>  				pmd_t *pmd, int flags);
>  struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
>  				pud_t *pud, int flags);
> +struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address,
> +				pgd_t *pgd, int flags);
>  int pmd_huge(pmd_t pmd);
>  int pud_huge(pud_t pmd);
>  unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
> @@ -143,6 +145,7 @@ static inline void hugetlb_show_meminfo(void)
>  }
>  #define follow_huge_pmd(mm, addr, pmd, flags)	NULL
>  #define follow_huge_pud(mm, addr, pud, flags)	NULL
> +#define follow_huge_pgd(mm, addr, pgd, flags)	NULL
>  #define prepare_hugepage_range(file, addr, len)	(-EINVAL)
>  #define pmd_huge(x)	0
>  #define pud_huge(x)	0
> diff --git a/mm/gup.c b/mm/gup.c
> index fb87aea..9bac78c 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -234,6 +234,12 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
>  	pgd = pgd_offset(mm, address);
>  	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
>  		return no_page_table(vma, flags);
> +	if (pgd_huge(*pgd) && vma->vm_flags & VM_HUGETLB) {
> +		page = follow_huge_pgd(mm, address, pgd, flags);
> +		if (page)
> +			return page;
> +		return no_page_table(vma, flags);
This will return NULL as well?
> +	}
>  
>  	pud = pud_offset(pgd, address);
>  	if (pud_none(*pud))
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 19d0d08..5ea3158 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -4250,6 +4250,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
>  	pte_t *pte = NULL;
>  
>  	pgd = pgd_offset(mm, addr);
> +	if (sz == PGDIR_SIZE) {
> +		pte = (pte_t *)pgd;
> +		goto huge_pgd;
> +	}
> +

No allocation for a pgd slot - right?

>  	pud = pud_alloc(mm, pgd, addr);
>  	if (pud) {
>  		if (sz == PUD_SIZE) {
> @@ -4262,6 +4267,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
>  				pte = (pte_t *)pmd_alloc(mm, pud, addr);
>  		}
>  	}
> +
> +huge_pgd:
>  	BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
>  
>  	return pte;
> @@ -4275,6 +4282,8 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
>  
>  	pgd = pgd_offset(mm, addr);
>  	if (pgd_present(*pgd)) {
> +		if (pgd_huge(*pgd))
> +			return (pte_t *)pgd;
>  		pud = pud_offset(pgd, addr);
>  		if (pud_present(*pud)) {
>  			if (pud_huge(*pud))
> @@ -4343,6 +4352,17 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
>  	return pte_page(*(pte_t *)pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
>  }
>  
> +struct page * __weak
> +follow_huge_pgd(struct mm_struct *mm, unsigned long address,
> +		pgd_t *pgd, int flags)
> +{
> +	if (flags & FOLL_GET)
> +		return NULL;
> +
> +	return pte_page(*(pte_t *)pgd) +
> +				((address & ~PGDIR_MASK) >> PAGE_SHIFT);
> +}
> +
>  #ifdef CONFIG_MEMORY_FAILURE
>  
>  /*
>
Anshuman Khandual April 11, 2016, 5:25 a.m. UTC | #2
On 04/07/2016 02:34 PM, Balbir Singh wrote:
> 
> 
> On 07/04/16 15:37, Anshuman Khandual wrote:
>> Currently the config ARCH_WANT_GENERAL_HUGETLB enabled functions like
>> 'huge_pte_alloc' and 'huge_pte_offset' dont take into account HugeTLB
>> page implementation at the PGD level. This is also true for functions
>> like 'follow_page_mask' which is called from move_pages() system call.
>> This lack of PGD level huge page support prohibits some architectures
>> to use these generic HugeTLB functions.
>>
> 
> From what I know of move_pages(), it will always call follow_page_mask()
> with FOLL_GET (I could be wrong here) and the implementation below
> returns NULL for follow_huge_pgd().

You are right. This patch makes ARCH_WANT_GENERAL_HUGETLB functions aware
of PGD implementation so that we can do all transactions on 16GB pages
using these function instead of the present arch overrides. But that also
requires follow_page_mask() changes for every other access to the page
than the migrate_pages() usage.

But yes, we dont support migrate_pages() on PGD based pages yet, hence
it just returns NULL in that case. May be the commit message needs to
reflect this.

> 
>> This change adds the required PGD based implementation awareness and
>> with that, more architectures like POWER which implements 16GB pages
>> at the PGD level along with the 16MB pages at the PMD level can now
>> use ARCH_WANT_GENERAL_HUGETLB config option.
>>
>> Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
>> ---
>>  include/linux/hugetlb.h |  3 +++
>>  mm/gup.c                |  6 ++++++
>>  mm/hugetlb.c            | 20 ++++++++++++++++++++
>>  3 files changed, 29 insertions(+)
>>
>> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
>> index 7d953c2..71832e1 100644
>> --- a/include/linux/hugetlb.h
>> +++ b/include/linux/hugetlb.h
>> @@ -115,6 +115,8 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
>>  				pmd_t *pmd, int flags);
>>  struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
>>  				pud_t *pud, int flags);
>> +struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address,
>> +				pgd_t *pgd, int flags);
>>  int pmd_huge(pmd_t pmd);
>>  int pud_huge(pud_t pmd);
>>  unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
>> @@ -143,6 +145,7 @@ static inline void hugetlb_show_meminfo(void)
>>  }
>>  #define follow_huge_pmd(mm, addr, pmd, flags)	NULL
>>  #define follow_huge_pud(mm, addr, pud, flags)	NULL
>> +#define follow_huge_pgd(mm, addr, pgd, flags)	NULL
>>  #define prepare_hugepage_range(file, addr, len)	(-EINVAL)
>>  #define pmd_huge(x)	0
>>  #define pud_huge(x)	0
>> diff --git a/mm/gup.c b/mm/gup.c
>> index fb87aea..9bac78c 100644
>> --- a/mm/gup.c
>> +++ b/mm/gup.c
>> @@ -234,6 +234,12 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
>>  	pgd = pgd_offset(mm, address);
>>  	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
>>  		return no_page_table(vma, flags);
>> +	if (pgd_huge(*pgd) && vma->vm_flags & VM_HUGETLB) {
>> +		page = follow_huge_pgd(mm, address, pgd, flags);
>> +		if (page)
>> +			return page;
>> +		return no_page_table(vma, flags);
> This will return NULL as well?

That right, no_page_table() returns NULL for FOLL_GET when we fall through
after failing on follow_huge_pgd().

>> +	}
>>  
>>  	pud = pud_offset(pgd, address);
>>  	if (pud_none(*pud))
>> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
>> index 19d0d08..5ea3158 100644
>> --- a/mm/hugetlb.c
>> +++ b/mm/hugetlb.c
>> @@ -4250,6 +4250,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
>>  	pte_t *pte = NULL;
>>  
>>  	pgd = pgd_offset(mm, addr);
>> +	if (sz == PGDIR_SIZE) {
>> +		pte = (pte_t *)pgd;
>> +		goto huge_pgd;
>> +	}
>> +
> 
> No allocation for a pgd slot - right?

No, its already allocated for the mm during creation.
Anshuman Khandual April 11, 2016, 6:10 a.m. UTC | #3
On 04/11/2016 10:55 AM, Anshuman Khandual wrote:
> On 04/07/2016 02:34 PM, Balbir Singh wrote:
>> > 
>> > 
>> > On 07/04/16 15:37, Anshuman Khandual wrote:
>>> >> Currently the config ARCH_WANT_GENERAL_HUGETLB enabled functions like
>>> >> 'huge_pte_alloc' and 'huge_pte_offset' dont take into account HugeTLB
>>> >> page implementation at the PGD level. This is also true for functions
>>> >> like 'follow_page_mask' which is called from move_pages() system call.
>>> >> This lack of PGD level huge page support prohibits some architectures
>>> >> to use these generic HugeTLB functions.
>>> >>
>> > 
>> > From what I know of move_pages(), it will always call follow_page_mask()
>> > with FOLL_GET (I could be wrong here) and the implementation below
>> > returns NULL for follow_huge_pgd().
> You are right. This patch makes ARCH_WANT_GENERAL_HUGETLB functions aware
> of PGD implementation so that we can do all transactions on 16GB pages
> using these function instead of the present arch overrides. But that also
> requires follow_page_mask() changes for every other access to the page
> than the migrate_pages() usage.
> 
> But yes, we dont support migrate_pages() on PGD based pages yet, hence
> it just returns NULL in that case. May be the commit message needs to
> reflect this.

The next commit actually changes follow_huge_pud|pgd() functions to
support FOLL_GET and PGD based huge page migration.
diff mbox

Patch

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 7d953c2..71832e1 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -115,6 +115,8 @@  struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 				pmd_t *pmd, int flags);
 struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
 				pud_t *pud, int flags);
+struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address,
+				pgd_t *pgd, int flags);
 int pmd_huge(pmd_t pmd);
 int pud_huge(pud_t pmd);
 unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
@@ -143,6 +145,7 @@  static inline void hugetlb_show_meminfo(void)
 }
 #define follow_huge_pmd(mm, addr, pmd, flags)	NULL
 #define follow_huge_pud(mm, addr, pud, flags)	NULL
+#define follow_huge_pgd(mm, addr, pgd, flags)	NULL
 #define prepare_hugepage_range(file, addr, len)	(-EINVAL)
 #define pmd_huge(x)	0
 #define pud_huge(x)	0
diff --git a/mm/gup.c b/mm/gup.c
index fb87aea..9bac78c 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -234,6 +234,12 @@  struct page *follow_page_mask(struct vm_area_struct *vma,
 	pgd = pgd_offset(mm, address);
 	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
 		return no_page_table(vma, flags);
+	if (pgd_huge(*pgd) && vma->vm_flags & VM_HUGETLB) {
+		page = follow_huge_pgd(mm, address, pgd, flags);
+		if (page)
+			return page;
+		return no_page_table(vma, flags);
+	}
 
 	pud = pud_offset(pgd, address);
 	if (pud_none(*pud))
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 19d0d08..5ea3158 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4250,6 +4250,11 @@  pte_t *huge_pte_alloc(struct mm_struct *mm,
 	pte_t *pte = NULL;
 
 	pgd = pgd_offset(mm, addr);
+	if (sz == PGDIR_SIZE) {
+		pte = (pte_t *)pgd;
+		goto huge_pgd;
+	}
+
 	pud = pud_alloc(mm, pgd, addr);
 	if (pud) {
 		if (sz == PUD_SIZE) {
@@ -4262,6 +4267,8 @@  pte_t *huge_pte_alloc(struct mm_struct *mm,
 				pte = (pte_t *)pmd_alloc(mm, pud, addr);
 		}
 	}
+
+huge_pgd:
 	BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
 
 	return pte;
@@ -4275,6 +4282,8 @@  pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 
 	pgd = pgd_offset(mm, addr);
 	if (pgd_present(*pgd)) {
+		if (pgd_huge(*pgd))
+			return (pte_t *)pgd;
 		pud = pud_offset(pgd, addr);
 		if (pud_present(*pud)) {
 			if (pud_huge(*pud))
@@ -4343,6 +4352,17 @@  follow_huge_pud(struct mm_struct *mm, unsigned long address,
 	return pte_page(*(pte_t *)pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
 }
 
+struct page * __weak
+follow_huge_pgd(struct mm_struct *mm, unsigned long address,
+		pgd_t *pgd, int flags)
+{
+	if (flags & FOLL_GET)
+		return NULL;
+
+	return pte_page(*(pte_t *)pgd) +
+				((address & ~PGDIR_MASK) >> PAGE_SHIFT);
+}
+
 #ifdef CONFIG_MEMORY_FAILURE
 
 /*