Message ID | 1452582968-22669-4-git-send-email-aneesh.kumar@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
On Tue, 12 Jan 2016 12:45:38 +0530 "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote: > This is needed so that we can support both hash and radix page table > using single kernel. Radix kernel uses a 4 level table. > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> > --- > arch/powerpc/Kconfig | 1 + > arch/powerpc/include/asm/book3s/64/hash-4k.h | 33 > +-------------------------- > arch/powerpc/include/asm/book3s/64/hash-64k.h | 20 +++++++++------- > arch/powerpc/include/asm/book3s/64/hash.h | 8 +++++++ > arch/powerpc/include/asm/book3s/64/pgtable.h | 25 > +++++++++++++++++++- arch/powerpc/include/asm/pgalloc-64.h | > 24 ++++++++++++++++--- arch/powerpc/include/asm/pgtable-types.h > | 13 +++++++---- arch/powerpc/mm/init_64.c | 21 > ++++++++++++----- 8 files changed, 90 insertions(+), 55 deletions(-) > > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig > index 378f1127ca98..618afea4c9fc 100644 > --- a/arch/powerpc/Kconfig > +++ b/arch/powerpc/Kconfig > @@ -303,6 +303,7 @@ config ZONE_DMA32 snip > - > #define PTE_INDEX_SIZE 8 > -#define PMD_INDEX_SIZE 10 > -#define PUD_INDEX_SIZE 0 > +#define PMD_INDEX_SIZE 5 > +#define PUD_INDEX_SIZE 5 > #define PGD_INDEX_SIZE 12 > OK, so PMD index split from 10 to 5 and 5 to PMD/PUD? What is the plan for huge pages, I saw you mentioned it was a TODO > #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) > #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) > +#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) > #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) > > /* With 4k base page size, hugepage PTEs go at the PMD level */ > @@ -20,8 +19,13 @@ > #define PMD_SIZE (1UL << PMD_SHIFT) > #define PMD_MASK (~(PMD_SIZE-1)) > > +/* PUD_SHIFT determines what a third-level page table entry can map > */ +#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) > +#define PUD_SIZE (1UL << PUD_SHIFT) > +#define PUD_MASK (~(PUD_SIZE-1)) > + > /* PGDIR_SHIFT determines what a third-level page table entry can > map */ -#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) > +#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) > #define PGDIR_SIZE (1UL << PGDIR_SHIFT) > #define PGDIR_MASK (~(PGDIR_SIZE-1)) > > @@ -61,6 +65,8 @@ > #define PMD_MASKED_BITS (PTE_FRAG_SIZE - 1) > /* Bits to mask out from a PGD/PUD to get to the PMD page */ The comment looks like it applied to PMD and not PUD. > #define PUD_MASKED_BITS 0x1ff Given that PUD is now 5 bits, this should be 0x1f? > +/* FIXME!! check this */ > +#define PGD_MASKED_BITS 0 > PGD_MASKED_BITS is 0? Shouldn't it be 0xfe > #ifndef __ASSEMBLY__ > > @@ -130,11 +136,9 @@ extern bool __rpte_sub_valid(real_pte_t rpte, > unsigned long index); #else > #define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) > #endif > +#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) > #define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) > > -#define pgd_pte(pgd) (pud_pte(((pud_t){ pgd }))) > -#define pte_pgd(pte) ((pgd_t)pte_pud(pte)) > - > #ifdef CONFIG_HUGETLB_PAGE > /* > * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can > have diff --git a/arch/powerpc/include/asm/book3s/64/hash.h > b/arch/powerpc/include/asm/book3s/64/hash.h index > f46974d0134a..9ff1e056acef 100644 --- > a/arch/powerpc/include/asm/book3s/64/hash.h +++ > b/arch/powerpc/include/asm/book3s/64/hash.h @@ -226,6 +226,7 @@ > #define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS) > > #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & > (PTRS_PER_PGD - 1)) +#define pud_index(address) (((address) >> > (PUD_SHIFT)) & (PTRS_PER_PUD - 1)) #define pmd_index(address) > (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1)) #define > pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1)) > @@ -354,8 +355,15 @@ static inline void __ptep_set_access_flags(pte_t > *ptep, pte_t entry) :"cc"); > } > > +static inline int pgd_bad(pgd_t pgd) > +{ > + return (pgd_val(pgd) == 0); > +} > + > #define __HAVE_ARCH_PTE_SAME > #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & > ~_PAGE_HPTEFLAGS) == 0) +#define pgd_page_vaddr(pgd) > (pgd_val(pgd) & ~PGD_MASKED_BITS) + > > /* Generic accessors to PTE bits */ > static inline int pte_write(pte_t pte) > { return !!(pte_val(pte) & _PAGE_RW);} diff --git > a/arch/powerpc/include/asm/book3s/64/pgtable.h > b/arch/powerpc/include/asm/book3s/64/pgtable.h index > e7162dba987e..8f639401c7ba 100644 --- > a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ > b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -111,6 +111,26 @@ > static inline void pgd_set(pgd_t *pgdp, unsigned long val) *pgdp = > __pgd(val); } > +static inline void pgd_clear(pgd_t *pgdp) > +{ > + *pgdp = __pgd(0); > +} > + > +#define pgd_none(pgd) (!pgd_val(pgd)) > +#define pgd_present(pgd) (!pgd_none(pgd)) > + > +static inline pte_t pgd_pte(pgd_t pgd) > +{ > + return __pte(pgd_val(pgd)); > +} > + > +static inline pgd_t pte_pgd(pte_t pte) > +{ > + return __pgd(pte_val(pte)); > +} > + > +extern struct page *pgd_page(pgd_t pgd); > + > /* > * Find an entry in a page-table-directory. We combine the address > region > * (the high order N bits) and the pgd portion of the address. > @@ -118,9 +138,10 @@ static inline void pgd_set(pgd_t *pgdp, unsigned > long val) > #define pgd_offset(mm, address) ((mm)->pgd + > pgd_index(address)) > +#define pud_offset(pgdp, addr) \ > + (((pud_t *) pgd_page_vaddr(*(pgdp))) + pud_index(addr)) > #define pmd_offset(pudp,addr) \ > (((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr)) > - > #define pte_offset_kernel(dir,addr) \ > (((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr)) > > @@ -135,6 +156,8 @@ static inline void pgd_set(pgd_t *pgdp, unsigned > long val) pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, > pte_val(e)) #define pmd_ERROR(e) \ > pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, > pmd_val(e)) +#define pud_ERROR(e) \ > + pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, > pud_val(e)) #define pgd_ERROR(e) \ > pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, > pgd_val(e)) > diff --git a/arch/powerpc/include/asm/pgalloc-64.h > b/arch/powerpc/include/asm/pgalloc-64.h index > 69ef28a81733..014489a619d0 100644 --- > a/arch/powerpc/include/asm/pgalloc-64.h +++ > b/arch/powerpc/include/asm/pgalloc-64.h @@ -171,7 +171,25 @@ extern > void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int > shift); extern void __tlb_remove_table(void *_table); #endif > > -#define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned > long)pmd) +#ifndef __PAGETABLE_PUD_FOLDED > +/* book3s 64 is 4 level page table */ > +#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD) > +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned > long addr) +{ > + return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), > + GFP_KERNEL|__GFP_REPEAT); > +} > + > +static inline void pud_free(struct mm_struct *mm, pud_t *pud) > +{ > + kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud); > +} > +#endif > + > +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, > pmd_t *pmd) +{ > + pud_set(pud, (unsigned long)pmd); > +} > > static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t > *pmd, pte_t *pte) > @@ -233,11 +251,11 @@ static inline void pmd_free(struct mm_struct > *mm, pmd_t *pmd) > #define __pmd_free_tlb(tlb, pmd, addr) \ > pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX) > -#ifndef CONFIG_PPC_64K_PAGES > +#ifndef __PAGETABLE_PUD_FOLDED > #define __pud_free_tlb(tlb, pud, addr) \ > pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE) > > -#endif /* CONFIG_PPC_64K_PAGES */ > +#endif /* __PAGETABLE_PUD_FOLDED */ > > #define check_pgt_cache() do { } while (0) > > diff --git a/arch/powerpc/include/asm/pgtable-types.h > b/arch/powerpc/include/asm/pgtable-types.h index > 71487e1ca638..43140f8b0592 100644 --- > a/arch/powerpc/include/asm/pgtable-types.h +++ > b/arch/powerpc/include/asm/pgtable-types.h @@ -21,15 +21,18 @@ static > inline unsigned long pmd_val(pmd_t x) return x.pmd; > } > > -/* PUD level exusts only on 4k pages */ > -#ifndef CONFIG_PPC_64K_PAGES > +/* > + * 64 bit hash always use 4 level table. Everybody else use 4 level > + * only for 4K page size. > + */ > +#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES) > typedef struct { unsigned long pud; } pud_t; > #define __pud(x) ((pud_t) { (x) }) > static inline unsigned long pud_val(pud_t x) > { > return x.pud; > } > -#endif /* !CONFIG_PPC_64K_PAGES */ > +#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */ > #endif /* CONFIG_PPC64 */ > > /* PGD level */ > @@ -66,14 +69,14 @@ static inline unsigned long pmd_val(pmd_t pmd) > return pmd; > } > > -#ifndef CONFIG_PPC_64K_PAGES > +#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES) > typedef unsigned long pud_t; > #define __pud(x) (x) > static inline unsigned long pud_val(pud_t pud) > { > return pud; > } > -#endif /* !CONFIG_PPC_64K_PAGES */ > +#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */ > #endif /* CONFIG_PPC64 */ > > typedef unsigned long pgd_t; > diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c > index 379a6a90644b..8ce1ec24d573 100644 > --- a/arch/powerpc/mm/init_64.c > +++ b/arch/powerpc/mm/init_64.c > @@ -85,6 +85,11 @@ static void pgd_ctor(void *addr) > memset(addr, 0, PGD_TABLE_SIZE); > } > > +static void pud_ctor(void *addr) > +{ > + memset(addr, 0, PUD_TABLE_SIZE); > +} > + > static void pmd_ctor(void *addr) > { > memset(addr, 0, PMD_TABLE_SIZE); > @@ -138,14 +143,18 @@ void pgtable_cache_init(void) > { > pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); > pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor); > + /* > + * In all current configs, when the PUD index exists it's the > + * same size as either the pgd or pmd index except with THP > enabled > + * on book3s 64 > + */ > + if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) > + pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor); > + > if (!PGT_CACHE(PGD_INDEX_SIZE) > || !PGT_CACHE(PMD_CACHE_INDEX)) panic("Couldn't allocate pgtable > caches"); > - /* In all current configs, when the PUD index exists it's the > - * same size as either the pgd or pmd index. Verify that the > - * initialization above has also created a PUD cache. This > - * will need re-examiniation if we add new possibilities for > - * the pagetable layout. */ > - BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)); > + if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) > + panic("Couldn't allocate pud pgtable caches"); > } > > #ifdef CONFIG_SPARSEMEM_VMEMMAP
On 12/01/16 18:15, Aneesh Kumar K.V wrote: > This is needed so that we can support both hash and radix page table > using single kernel. Radix kernel uses a 4 level table. > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> > --- > arch/powerpc/Kconfig | 1 + > arch/powerpc/include/asm/book3s/64/hash-4k.h | 33 +-------------------------- > arch/powerpc/include/asm/book3s/64/hash-64k.h | 20 +++++++++------- > arch/powerpc/include/asm/book3s/64/hash.h | 8 +++++++ > arch/powerpc/include/asm/book3s/64/pgtable.h | 25 +++++++++++++++++++- > arch/powerpc/include/asm/pgalloc-64.h | 24 ++++++++++++++++--- > arch/powerpc/include/asm/pgtable-types.h | 13 +++++++---- > arch/powerpc/mm/init_64.c | 21 ++++++++++++----- > 8 files changed, 90 insertions(+), 55 deletions(-) > > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig > index 378f1127ca98..618afea4c9fc 100644 > --- a/arch/powerpc/Kconfig > +++ b/arch/powerpc/Kconfig > @@ -303,6 +303,7 @@ config ZONE_DMA32 > config PGTABLE_LEVELS > int > default 2 if !PPC64 > + default 4 if PPC_BOOK3S_64 > default 3 if PPC_64K_PAGES > default 4 > > diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h > index ea0414d6659e..c78f5928001b 100644 > --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h > +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h > @@ -57,39 +57,8 @@ > #define _PAGE_4K_PFN 0 > #ifndef __ASSEMBLY__ > /* > - * 4-level page tables related bits > + * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() > */ > - > -#define pgd_none(pgd) (!pgd_val(pgd)) > -#define pgd_bad(pgd) (pgd_val(pgd) == 0) > -#define pgd_present(pgd) (pgd_val(pgd) != 0) > -#define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) > - > -static inline void pgd_clear(pgd_t *pgdp) > -{ > - *pgdp = __pgd(0); > -} > - > -static inline pte_t pgd_pte(pgd_t pgd) > -{ > - return __pte(pgd_val(pgd)); > -} > - > -static inline pgd_t pte_pgd(pte_t pte) > -{ > - return __pgd(pte_val(pte)); > -} > -extern struct page *pgd_page(pgd_t pgd); > - > -#define pud_offset(pgdp, addr) \ > - (((pud_t *) pgd_page_vaddr(*(pgdp))) + \ > - (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) > - > -#define pud_ERROR(e) \ > - pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) > - > -/* > - * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */ > #define remap_4k_pfn(vma, addr, pfn, prot) \ > remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot)) > > diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h > index 849bbec80f7b..5c9392b71a6b 100644 > --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h > +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h > @@ -1,15 +1,14 @@ > #ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H > #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H > > -#include <asm-generic/pgtable-nopud.h> > - > #define PTE_INDEX_SIZE 8 > -#define PMD_INDEX_SIZE 10 > -#define PUD_INDEX_SIZE 0 > +#define PMD_INDEX_SIZE 5 > +#define PUD_INDEX_SIZE 5 > #define PGD_INDEX_SIZE 12 10 splits to 5 and 5 for PMD/PUD? Does this impact huge page? > > #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) > #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) > +#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) > #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) > > /* With 4k base page size, hugepage PTEs go at the PMD level */ > @@ -20,8 +19,13 @@ > #define PMD_SIZE (1UL << PMD_SHIFT) > #define PMD_MASK (~(PMD_SIZE-1)) > > +/* PUD_SHIFT determines what a third-level page table entry can map */ > +#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) > +#define PUD_SIZE (1UL << PUD_SHIFT) > +#define PUD_MASK (~(PUD_SIZE-1)) > + > /* PGDIR_SHIFT determines what a third-level page table entry can map */ > -#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) > +#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) > #define PGDIR_SIZE (1UL << PGDIR_SHIFT) > #define PGDIR_MASK (~(PGDIR_SIZE-1)) > > @@ -61,6 +65,8 @@ > #define PMD_MASKED_BITS (PTE_FRAG_SIZE - 1) > /* Bits to mask out from a PGD/PUD to get to the PMD page */ > #define PUD_MASKED_BITS 0x1ff > +/* FIXME!! check this */ Shouldn't PUD_MASKED_BITS be 0x1f? > +#define PGD_MASKED_BITS 0 > 0? > #ifndef __ASSEMBLY__ > > @@ -130,11 +136,9 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index); > #else > #define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) > #endif > +#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) > #define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) > > -#define pgd_pte(pgd) (pud_pte(((pud_t){ pgd }))) > -#define pte_pgd(pte) ((pgd_t)pte_pud(pte)) > - > #ifdef CONFIG_HUGETLB_PAGE > /* > * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have > diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h > index f46974d0134a..9ff1e056acef 100644 > --- a/arch/powerpc/include/asm/book3s/64/hash.h > +++ b/arch/powerpc/include/asm/book3s/64/hash.h > @@ -226,6 +226,7 @@ > #define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS) > > #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1)) > +#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1)) > #define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1)) > #define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1)) > > @@ -354,8 +355,15 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) > :"cc"); > } > > +static inline int pgd_bad(pgd_t pgd) > +{ > + return (pgd_val(pgd) == 0); > +} > + > #define __HAVE_ARCH_PTE_SAME > #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) > +#define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) > + > > /* Generic accessors to PTE bits */ > static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} > diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h > index e7162dba987e..8f639401c7ba 100644 > --- a/arch/powerpc/include/asm/book3s/64/pgtable.h > +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h > @@ -111,6 +111,26 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val) > *pgdp = __pgd(val); > } > > +static inline void pgd_clear(pgd_t *pgdp) > +{ > + *pgdp = __pgd(0); > +} > + > +#define pgd_none(pgd) (!pgd_val(pgd)) > +#define pgd_present(pgd) (!pgd_none(pgd)) > + > +static inline pte_t pgd_pte(pgd_t pgd) > +{ > + return __pte(pgd_val(pgd)); > +} > + > +static inline pgd_t pte_pgd(pte_t pte) > +{ > + return __pgd(pte_val(pte)); > +} > + > +extern struct page *pgd_page(pgd_t pgd); > + > /* > * Find an entry in a page-table-directory. We combine the address region > * (the high order N bits) and the pgd portion of the address. > @@ -118,9 +138,10 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val) > > #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) > > +#define pud_offset(pgdp, addr) \ > + (((pud_t *) pgd_page_vaddr(*(pgdp))) + pud_index(addr)) > #define pmd_offset(pudp,addr) \ > (((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr)) > - > #define pte_offset_kernel(dir,addr) \ > (((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr)) > > @@ -135,6 +156,8 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val) > pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) > #define pmd_ERROR(e) \ > pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) > +#define pud_ERROR(e) \ > + pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) > #define pgd_ERROR(e) \ > pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) > > diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h > index 69ef28a81733..014489a619d0 100644 > --- a/arch/powerpc/include/asm/pgalloc-64.h > +++ b/arch/powerpc/include/asm/pgalloc-64.h > @@ -171,7 +171,25 @@ extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); > extern void __tlb_remove_table(void *_table); > #endif > > -#define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd) > +#ifndef __PAGETABLE_PUD_FOLDED > +/* book3s 64 is 4 level page table */ > +#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD) > +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) > +{ > + return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), > + GFP_KERNEL|__GFP_REPEAT); > +} > + > +static inline void pud_free(struct mm_struct *mm, pud_t *pud) > +{ > + kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud); > +} > +#endif > + > +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) > +{ > + pud_set(pud, (unsigned long)pmd); > +} > > static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, > pte_t *pte) > @@ -233,11 +251,11 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) > > #define __pmd_free_tlb(tlb, pmd, addr) \ > pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX) > -#ifndef CONFIG_PPC_64K_PAGES > +#ifndef __PAGETABLE_PUD_FOLDED > #define __pud_free_tlb(tlb, pud, addr) \ > pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE) > > -#endif /* CONFIG_PPC_64K_PAGES */ > +#endif /* __PAGETABLE_PUD_FOLDED */ > > #define check_pgt_cache() do { } while (0) > > diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h > index 71487e1ca638..43140f8b0592 100644 > --- a/arch/powerpc/include/asm/pgtable-types.h > +++ b/arch/powerpc/include/asm/pgtable-types.h > @@ -21,15 +21,18 @@ static inline unsigned long pmd_val(pmd_t x) > return x.pmd; > } > > -/* PUD level exusts only on 4k pages */ > -#ifndef CONFIG_PPC_64K_PAGES > +/* > + * 64 bit hash always use 4 level table. Everybody else use 4 level > + * only for 4K page size. > + */ > +#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES) > typedef struct { unsigned long pud; } pud_t; > #define __pud(x) ((pud_t) { (x) }) > static inline unsigned long pud_val(pud_t x) > { > return x.pud; > } > -#endif /* !CONFIG_PPC_64K_PAGES */ > +#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */ > #endif /* CONFIG_PPC64 */ > > /* PGD level */ > @@ -66,14 +69,14 @@ static inline unsigned long pmd_val(pmd_t pmd) > return pmd; > } > > -#ifndef CONFIG_PPC_64K_PAGES > +#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES) > typedef unsigned long pud_t; > #define __pud(x) (x) > static inline unsigned long pud_val(pud_t pud) > { > return pud; > } > -#endif /* !CONFIG_PPC_64K_PAGES */ > +#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */ > #endif /* CONFIG_PPC64 */ > > typedef unsigned long pgd_t; > diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c > index 379a6a90644b..8ce1ec24d573 100644 > --- a/arch/powerpc/mm/init_64.c > +++ b/arch/powerpc/mm/init_64.c > @@ -85,6 +85,11 @@ static void pgd_ctor(void *addr) > memset(addr, 0, PGD_TABLE_SIZE); > } > > +static void pud_ctor(void *addr) > +{ > + memset(addr, 0, PUD_TABLE_SIZE); > +} > + > static void pmd_ctor(void *addr) > { > memset(addr, 0, PMD_TABLE_SIZE); > @@ -138,14 +143,18 @@ void pgtable_cache_init(void) > { > pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); > pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor); > + /* > + * In all current configs, when the PUD index exists it's the > + * same size as either the pgd or pmd index except with THP enabled > + * on book3s 64 > + */ > + if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) > + pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor); > + > if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX)) > panic("Couldn't allocate pgtable caches"); > - /* In all current configs, when the PUD index exists it's the > - * same size as either the pgd or pmd index. Verify that the > - * initialization above has also created a PUD cache. This > - * will need re-examiniation if we add new possibilities for > - * the pagetable layout. */ > - BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)); > + if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) > + panic("Couldn't allocate pud pgtable caches"); > } > > #ifdef CONFIG_SPARSEMEM_VMEMMAP
Balbir Singh <bsingharora@gmail.com> writes: > On 12/01/16 18:15, Aneesh Kumar K.V wrote: >> This is needed so that we can support both hash and radix page table >> using single kernel. Radix kernel uses a 4 level table. >> ..... > diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h >> index 849bbec80f7b..5c9392b71a6b 100644 >> --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h >> +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h >> @@ -1,15 +1,14 @@ >> #ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H >> #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H >> >> -#include <asm-generic/pgtable-nopud.h> >> - >> #define PTE_INDEX_SIZE 8 >> -#define PMD_INDEX_SIZE 10 >> -#define PUD_INDEX_SIZE 0 >> +#define PMD_INDEX_SIZE 5 >> +#define PUD_INDEX_SIZE 5 >> #define PGD_INDEX_SIZE 12 > > > 10 splits to 5 and 5 for PMD/PUD? Does this impact huge page? Nope. We have huge page at top level and pmd level. (16G and 16M) > >> >> #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) >> #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) >> +#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) >> #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) >> >> /* With 4k base page size, hugepage PTEs go at the PMD level */ >> @@ -20,8 +19,13 @@ >> #define PMD_SIZE (1UL << PMD_SHIFT) >> #define PMD_MASK (~(PMD_SIZE-1)) >> >> +/* PUD_SHIFT determines what a third-level page table entry can map */ >> +#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) >> +#define PUD_SIZE (1UL << PUD_SHIFT) >> +#define PUD_MASK (~(PUD_SIZE-1)) >> + >> /* PGDIR_SHIFT determines what a third-level page table entry can map */ >> -#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) >> +#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) >> #define PGDIR_SIZE (1UL << PGDIR_SHIFT) >> #define PGDIR_MASK (~(PGDIR_SIZE-1)) >> >> @@ -61,6 +65,8 @@ >> #define PMD_MASKED_BITS (PTE_FRAG_SIZE - 1) >> /* Bits to mask out from a PGD/PUD to get to the PMD page */ >> #define PUD_MASKED_BITS 0x1ff >> +/* FIXME!! check this */ > > Shouldn't PUD_MASKED_BITS be 0x1f? > >> +#define PGD_MASKED_BITS 0 >> > 0? > The MASKED_BITS need to be cleaned up hence the FIXME!! Linux page table are aligned differently and I didn't want to cleanup that in this series. IMHO using #defines like above instead of deriving it from the pmd table align value is wrong. Will get to that later. >> #ifndef __ASSEMBLY__ >> >> @@ -130,11 +136,9 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index); >> #else >> #define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) >> #endif >> +#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) >> #define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) >> >> -#define pgd_pte(pgd) (pud_pte(((pud_t){ pgd }))) >> -#define pte_pgd(pte) ((pgd_t)pte_pud(pte)) >> - >> #ifdef CONFIG_HUGETLB_PAGE -aneesh
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 378f1127ca98..618afea4c9fc 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -303,6 +303,7 @@ config ZONE_DMA32 config PGTABLE_LEVELS int default 2 if !PPC64 + default 4 if PPC_BOOK3S_64 default 3 if PPC_64K_PAGES default 4 diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index ea0414d6659e..c78f5928001b 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -57,39 +57,8 @@ #define _PAGE_4K_PFN 0 #ifndef __ASSEMBLY__ /* - * 4-level page tables related bits + * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */ - -#define pgd_none(pgd) (!pgd_val(pgd)) -#define pgd_bad(pgd) (pgd_val(pgd) == 0) -#define pgd_present(pgd) (pgd_val(pgd) != 0) -#define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) - -static inline void pgd_clear(pgd_t *pgdp) -{ - *pgdp = __pgd(0); -} - -static inline pte_t pgd_pte(pgd_t pgd) -{ - return __pte(pgd_val(pgd)); -} - -static inline pgd_t pte_pgd(pte_t pte) -{ - return __pgd(pte_val(pte)); -} -extern struct page *pgd_page(pgd_t pgd); - -#define pud_offset(pgdp, addr) \ - (((pud_t *) pgd_page_vaddr(*(pgdp))) + \ - (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) - -#define pud_ERROR(e) \ - pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) - -/* - * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */ #define remap_4k_pfn(vma, addr, pfn, prot) \ remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot)) diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 849bbec80f7b..5c9392b71a6b 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -1,15 +1,14 @@ #ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H -#include <asm-generic/pgtable-nopud.h> - #define PTE_INDEX_SIZE 8 -#define PMD_INDEX_SIZE 10 -#define PUD_INDEX_SIZE 0 +#define PMD_INDEX_SIZE 5 +#define PUD_INDEX_SIZE 5 #define PGD_INDEX_SIZE 12 #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) +#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) /* With 4k base page size, hugepage PTEs go at the PMD level */ @@ -20,8 +19,13 @@ #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) +/* PUD_SHIFT determines what a third-level page table entry can map */ +#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) + /* PGDIR_SHIFT determines what a third-level page table entry can map */ -#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) +#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) @@ -61,6 +65,8 @@ #define PMD_MASKED_BITS (PTE_FRAG_SIZE - 1) /* Bits to mask out from a PGD/PUD to get to the PMD page */ #define PUD_MASKED_BITS 0x1ff +/* FIXME!! check this */ +#define PGD_MASKED_BITS 0 #ifndef __ASSEMBLY__ @@ -130,11 +136,9 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index); #else #define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) #endif +#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) #define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) -#define pgd_pte(pgd) (pud_pte(((pud_t){ pgd }))) -#define pte_pgd(pte) ((pgd_t)pte_pud(pte)) - #ifdef CONFIG_HUGETLB_PAGE /* * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index f46974d0134a..9ff1e056acef 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -226,6 +226,7 @@ #define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS) #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1)) +#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1)) #define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1)) #define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1)) @@ -354,8 +355,15 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) :"cc"); } +static inline int pgd_bad(pgd_t pgd) +{ + return (pgd_val(pgd) == 0); +} + #define __HAVE_ARCH_PTE_SAME #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) +#define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) + /* Generic accessors to PTE bits */ static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index e7162dba987e..8f639401c7ba 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -111,6 +111,26 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val) *pgdp = __pgd(val); } +static inline void pgd_clear(pgd_t *pgdp) +{ + *pgdp = __pgd(0); +} + +#define pgd_none(pgd) (!pgd_val(pgd)) +#define pgd_present(pgd) (!pgd_none(pgd)) + +static inline pte_t pgd_pte(pgd_t pgd) +{ + return __pte(pgd_val(pgd)); +} + +static inline pgd_t pte_pgd(pte_t pte) +{ + return __pgd(pte_val(pte)); +} + +extern struct page *pgd_page(pgd_t pgd); + /* * Find an entry in a page-table-directory. We combine the address region * (the high order N bits) and the pgd portion of the address. @@ -118,9 +138,10 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) +#define pud_offset(pgdp, addr) \ + (((pud_t *) pgd_page_vaddr(*(pgdp))) + pud_index(addr)) #define pmd_offset(pudp,addr) \ (((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr)) - #define pte_offset_kernel(dir,addr) \ (((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr)) @@ -135,6 +156,8 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val) pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) +#define pud_ERROR(e) \ + pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h index 69ef28a81733..014489a619d0 100644 --- a/arch/powerpc/include/asm/pgalloc-64.h +++ b/arch/powerpc/include/asm/pgalloc-64.h @@ -171,7 +171,25 @@ extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); extern void __tlb_remove_table(void *_table); #endif -#define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd) +#ifndef __PAGETABLE_PUD_FOLDED +/* book3s 64 is 4 level page table */ +#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD) +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), + GFP_KERNEL|__GFP_REPEAT); +} + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud); +} +#endif + +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + pud_set(pud, (unsigned long)pmd); +} static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) @@ -233,11 +251,11 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) #define __pmd_free_tlb(tlb, pmd, addr) \ pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX) -#ifndef CONFIG_PPC_64K_PAGES +#ifndef __PAGETABLE_PUD_FOLDED #define __pud_free_tlb(tlb, pud, addr) \ pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE) -#endif /* CONFIG_PPC_64K_PAGES */ +#endif /* __PAGETABLE_PUD_FOLDED */ #define check_pgt_cache() do { } while (0) diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h index 71487e1ca638..43140f8b0592 100644 --- a/arch/powerpc/include/asm/pgtable-types.h +++ b/arch/powerpc/include/asm/pgtable-types.h @@ -21,15 +21,18 @@ static inline unsigned long pmd_val(pmd_t x) return x.pmd; } -/* PUD level exusts only on 4k pages */ -#ifndef CONFIG_PPC_64K_PAGES +/* + * 64 bit hash always use 4 level table. Everybody else use 4 level + * only for 4K page size. + */ +#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES) typedef struct { unsigned long pud; } pud_t; #define __pud(x) ((pud_t) { (x) }) static inline unsigned long pud_val(pud_t x) { return x.pud; } -#endif /* !CONFIG_PPC_64K_PAGES */ +#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */ #endif /* CONFIG_PPC64 */ /* PGD level */ @@ -66,14 +69,14 @@ static inline unsigned long pmd_val(pmd_t pmd) return pmd; } -#ifndef CONFIG_PPC_64K_PAGES +#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES) typedef unsigned long pud_t; #define __pud(x) (x) static inline unsigned long pud_val(pud_t pud) { return pud; } -#endif /* !CONFIG_PPC_64K_PAGES */ +#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */ #endif /* CONFIG_PPC64 */ typedef unsigned long pgd_t; diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 379a6a90644b..8ce1ec24d573 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -85,6 +85,11 @@ static void pgd_ctor(void *addr) memset(addr, 0, PGD_TABLE_SIZE); } +static void pud_ctor(void *addr) +{ + memset(addr, 0, PUD_TABLE_SIZE); +} + static void pmd_ctor(void *addr) { memset(addr, 0, PMD_TABLE_SIZE); @@ -138,14 +143,18 @@ void pgtable_cache_init(void) { pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor); + /* + * In all current configs, when the PUD index exists it's the + * same size as either the pgd or pmd index except with THP enabled + * on book3s 64 + */ + if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) + pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor); + if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX)) panic("Couldn't allocate pgtable caches"); - /* In all current configs, when the PUD index exists it's the - * same size as either the pgd or pmd index. Verify that the - * initialization above has also created a PUD cache. This - * will need re-examiniation if we add new possibilities for - * the pagetable layout. */ - BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)); + if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) + panic("Couldn't allocate pud pgtable caches"); } #ifdef CONFIG_SPARSEMEM_VMEMMAP
This is needed so that we can support both hash and radix page table using single kernel. Radix kernel uses a 4 level table. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/book3s/64/hash-4k.h | 33 +-------------------------- arch/powerpc/include/asm/book3s/64/hash-64k.h | 20 +++++++++------- arch/powerpc/include/asm/book3s/64/hash.h | 8 +++++++ arch/powerpc/include/asm/book3s/64/pgtable.h | 25 +++++++++++++++++++- arch/powerpc/include/asm/pgalloc-64.h | 24 ++++++++++++++++--- arch/powerpc/include/asm/pgtable-types.h | 13 +++++++---- arch/powerpc/mm/init_64.c | 21 ++++++++++++----- 8 files changed, 90 insertions(+), 55 deletions(-)