Patchwork powerpc: add 16K/64K pages support for the 44x PPC32 architectures.

login
register
mail settings
Submitter Ilya Yanok
Date Dec. 11, 2008, 1:55 a.m.
Message ID <1228960541-11407-1-git-send-email-yanok@emcraft.com>
Download mbox | patch
Permalink /patch/13389/
State Accepted, archived
Commit ca9153a3a2a7556d091dfe080e42b0e67881fff6
Delegated to: Benjamin Herrenschmidt
Headers show

Comments

Ilya Yanok - Dec. 11, 2008, 1:55 a.m.
This patch adds support for page sizes bigger than 4K (16K/64K) on
PPC 44x.
PGDIR table is much smaller than page in case of 16K/64K pages (512
and 32 bytes resp.) so we allocate PGDIR with kzalloc() instead of
__get_free_pages().
PTE table covers rather big memory area in case of 16K/64K pages
(32MB and 512MB resp.) so we can easily put FIXMAP and PKMAP in
area covered by one PTE table.

Signed-off-by: Yuri Tikhonov <yur@emcraft.com>
Signed-off-by: Vladimir Panfilov <pvr@emcraft.com>
Signed-off-by: Ilya Yanok <yanok@emcraft.com>
---
 arch/powerpc/Kconfig                   |   58 ++++++++++++++++++++++++--------
 arch/powerpc/include/asm/highmem.h     |   19 +++++++++-
 arch/powerpc/include/asm/mmu-44x.h     |   17 +++++++++
 arch/powerpc/include/asm/page.h        |   13 ++++---
 arch/powerpc/include/asm/page_32.h     |    7 +++-
 arch/powerpc/kernel/asm-offsets.c      |    4 ++
 arch/powerpc/kernel/head_44x.S         |   23 ++++++++-----
 arch/powerpc/kernel/misc_32.S          |   12 +++---
 arch/powerpc/mm/pgtable_32.c           |   23 ++++++++-----
 arch/powerpc/platforms/Kconfig.cputype |    2 +-
 10 files changed, 130 insertions(+), 48 deletions(-)
Josh Boyer - Dec. 17, 2008, 7:56 p.m.
On Thu, Dec 11, 2008 at 04:55:41AM +0300, Ilya Yanok wrote:
>This patch adds support for page sizes bigger than 4K (16K/64K) on
>PPC 44x.
>PGDIR table is much smaller than page in case of 16K/64K pages (512
>and 32 bytes resp.) so we allocate PGDIR with kzalloc() instead of
>__get_free_pages().
>PTE table covers rather big memory area in case of 16K/64K pages
>(32MB and 512MB resp.) so we can easily put FIXMAP and PKMAP in
>area covered by one PTE table.
>
>Signed-off-by: Yuri Tikhonov <yur@emcraft.com>
>Signed-off-by: Vladimir Panfilov <pvr@emcraft.com>
>Signed-off-by: Ilya Yanok <yanok@emcraft.com>

I tested this a bit today on a Bamboo board.  Overall, it functioned
well enough to not crash :).  Note that I also included Hollis'
memory size alignment patch which is required.

The code looks pretty clean now.  I think if we're going to include
this patch it should go in now.

Acked-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>

>---
> arch/powerpc/Kconfig                   |   58 ++++++++++++++++++++++++--------
> arch/powerpc/include/asm/highmem.h     |   19 +++++++++-
> arch/powerpc/include/asm/mmu-44x.h     |   17 +++++++++
> arch/powerpc/include/asm/page.h        |   13 ++++---
> arch/powerpc/include/asm/page_32.h     |    7 +++-
> arch/powerpc/kernel/asm-offsets.c      |    4 ++
> arch/powerpc/kernel/head_44x.S         |   23 ++++++++-----
> arch/powerpc/kernel/misc_32.S          |   12 +++---
> arch/powerpc/mm/pgtable_32.c           |   23 ++++++++-----
> arch/powerpc/platforms/Kconfig.cputype |    2 +-
> 10 files changed, 130 insertions(+), 48 deletions(-)
>
>diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>index 525c13a..cd8ff7c 100644
>--- a/arch/powerpc/Kconfig
>+++ b/arch/powerpc/Kconfig
>@@ -401,23 +401,53 @@ config PPC_HAS_HASH_64K
> 	depends on PPC64
> 	default n
>
>-config PPC_64K_PAGES
>-	bool "64k page size"
>-	depends on PPC64
>-	select PPC_HAS_HASH_64K
>+choice
>+	prompt "Page size"
>+	default PPC_4K_PAGES
> 	help
>-	  This option changes the kernel logical page size to 64k. On machines
>-	  without processor support for 64k pages, the kernel will simulate
>-	  them by loading each individual 4k page on demand transparently,
>-	  while on hardware with such support, it will be used to map
>-	  normal application pages.
>+	  Select the kernel logical page size. Increasing the page size
>+	  will reduce software overhead at each page boundary, allow
>+	  hardware prefetch mechanisms to be more effective, and allow
>+	  larger dma transfers increasing IO efficiency and reducing
>+	  overhead. However the utilization of memory will increase.
>+	  For example, each cached file will using a multiple of the
>+	  page size to hold its contents and the difference between the
>+	  end of file and the end of page is wasted.
>+
>+	  Some dedicated systems, such as software raid serving with
>+	  accelerated calculations, have shown significant increases.
>+
>+	  If you configure a 64 bit kernel for 64k pages but the
>+	  processor does not support them, then the kernel will simulate
>+	  them with 4k pages, loading them on demand, but with the
>+	  reduced software overhead and larger internal fragmentation.
>+	  For the 32 bit kernel, a large page option will not be offered
>+	  unless it is supported by the configured processor.
>+
>+	  If unsure, choose 4K_PAGES.
>+
>+config PPC_4K_PAGES
>+	bool "4k page size"
>+
>+config PPC_16K_PAGES
>+	bool "16k page size" if 44x
>+
>+config PPC_64K_PAGES
>+	bool "64k page size" if 44x || PPC_STD_MMU_64
>+	select PPC_HAS_HASH_64K if PPC_STD_MMU_64
>+
>+endchoice
>
> config FORCE_MAX_ZONEORDER
> 	int "Maximum zone order"
>-	range 9 64 if PPC_64K_PAGES
>-	default "9" if PPC_64K_PAGES
>-	range 13 64 if PPC64 && !PPC_64K_PAGES
>-	default "13" if PPC64 && !PPC_64K_PAGES
>+	range 9 64 if PPC_STD_MMU_64 && PPC_64K_PAGES
>+	default "9" if PPC_STD_MMU_64 && PPC_64K_PAGES
>+	range 13 64 if PPC_STD_MMU_64 && !PPC_64K_PAGES
>+	default "13" if PPC_STD_MMU_64 && !PPC_64K_PAGES
>+	range 9 64 if PPC_STD_MMU_32 && PPC_16K_PAGES
>+	default "9" if PPC_STD_MMU_32 && PPC_16K_PAGES
>+	range 7 64 if PPC_STD_MMU_32 && PPC_64K_PAGES
>+	default "7" if PPC_STD_MMU_32 && PPC_64K_PAGES
> 	range 11 64
> 	default "11"
> 	help
>@@ -437,7 +467,7 @@ config FORCE_MAX_ZONEORDER
>
> config PPC_SUBPAGE_PROT
> 	bool "Support setting protections for 4k subpages"
>-	depends on PPC_64K_PAGES
>+	depends on PPC_STD_MMU_64 && PPC_64K_PAGES
> 	help
> 	  This option adds support for a system call to allow user programs
> 	  to set access permissions (read/write, readonly, or no access)
>diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h
>index 91c5895..7d6bb37 100644
>--- a/arch/powerpc/include/asm/highmem.h
>+++ b/arch/powerpc/include/asm/highmem.h
>@@ -38,9 +38,24 @@ extern pte_t *pkmap_page_table;
>  * easily, subsequent pte tables have to be allocated in one physical
>  * chunk of RAM.
>  */
>-#define LAST_PKMAP 	(1 << PTE_SHIFT)
>-#define LAST_PKMAP_MASK (LAST_PKMAP-1)
>+/*
>+ * We use one full pte table with 4K pages. And with 16K/64K pages pte
>+ * table covers enough memory (32MB and 512MB resp.) that both FIXMAP
>+ * and PKMAP can be placed in single pte table. We use 1024 pages for
>+ * PKMAP in case of 16K/64K pages.
>+ */
>+#ifdef CONFIG_PPC_4K_PAGES
>+#define PKMAP_ORDER	PTE_SHIFT
>+#else
>+#define PKMAP_ORDER	10
>+#endif
>+#define LAST_PKMAP	(1 << PKMAP_ORDER)
>+#ifndef CONFIG_PPC_4K_PAGES
>+#define PKMAP_BASE	(FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1))
>+#else
> #define PKMAP_BASE	((FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) & PMD_MASK)
>+#endif
>+#define LAST_PKMAP_MASK	(LAST_PKMAP-1)
> #define PKMAP_NR(virt)  ((virt-PKMAP_BASE) >> PAGE_SHIFT)
> #define PKMAP_ADDR(nr)  (PKMAP_BASE + ((nr) << PAGE_SHIFT))
>
>diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h
>index a825524..73e1909 100644
>--- a/arch/powerpc/include/asm/mmu-44x.h
>+++ b/arch/powerpc/include/asm/mmu-44x.h
>@@ -4,6 +4,8 @@
>  * PPC440 support
>  */
>
>+#include <asm/page.h>
>+
> #define PPC44x_MMUCR_TID	0x000000ff
> #define PPC44x_MMUCR_STS	0x00010000
>
>@@ -73,4 +75,19 @@ typedef struct {
> /* Size of the TLBs used for pinning in lowmem */
> #define PPC_PIN_SIZE	(1 << 28)	/* 256M */
>
>+#if (PAGE_SHIFT == 12)
>+#define PPC44x_TLBE_SIZE	PPC44x_TLB_4K
>+#elif (PAGE_SHIFT == 14)
>+#define PPC44x_TLBE_SIZE	PPC44x_TLB_16K
>+#elif (PAGE_SHIFT == 16)
>+#define PPC44x_TLBE_SIZE	PPC44x_TLB_64K
>+#else
>+#error "Unsupported PAGE_SIZE"
>+#endif
>+
>+#define PPC44x_PGD_OFF_SHIFT	(32 - PGDIR_SHIFT + PGD_T_LOG2)
>+#define PPC44x_PGD_OFF_MASK_BIT	(PGDIR_SHIFT - PGD_T_LOG2)
>+#define PPC44x_PTE_ADD_SHIFT	(32 - PGDIR_SHIFT + PTE_SHIFT + PTE_T_LOG2)
>+#define PPC44x_PTE_ADD_MASK_BIT	(32 - PTE_T_LOG2 - PTE_SHIFT)
>+
> #endif /* _ASM_POWERPC_MMU_44X_H_ */
>diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
>index c0b8d4a..197d569 100644
>--- a/arch/powerpc/include/asm/page.h
>+++ b/arch/powerpc/include/asm/page.h
>@@ -19,12 +19,15 @@
> #include <asm/kdump.h>
>
> /*
>- * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software
>+ * On regular PPC32 page size is 4K (but we support 4K/16K/64K pages
>+ * on PPC44x). For PPC64 we support either 4K or 64K software
>  * page size. When using 64K pages however, whether we are really supporting
>  * 64K pages in HW or not is irrelevant to those definitions.
>  */
>-#ifdef CONFIG_PPC_64K_PAGES
>+#if defined(CONFIG_PPC_64K_PAGES)
> #define PAGE_SHIFT		16
>+#elif defined(CONFIG_PPC_16K_PAGES)
>+#define PAGE_SHIFT		14
> #else
> #define PAGE_SHIFT		12
> #endif
>@@ -151,7 +154,7 @@ typedef struct { pte_basic_t pte; } pte_t;
> /* 64k pages additionally define a bigger "real PTE" type that gathers
>  * the "second half" part of the PTE for pseudo 64k pages
>  */
>-#ifdef CONFIG_PPC_64K_PAGES
>+#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
> typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
> #else
> typedef struct { pte_t pte; } real_pte_t;
>@@ -191,10 +194,10 @@ typedef pte_basic_t pte_t;
> #define pte_val(x)	(x)
> #define __pte(x)	(x)
>
>-#ifdef CONFIG_PPC_64K_PAGES
>+#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
> typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
> #else
>-typedef unsigned long real_pte_t;
>+typedef pte_t real_pte_t;
> #endif
>
>
>diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
>index d77072a..1458d95 100644
>--- a/arch/powerpc/include/asm/page_32.h
>+++ b/arch/powerpc/include/asm/page_32.h
>@@ -19,6 +19,8 @@
> #define PTE_FLAGS_OFFSET	0
> #endif
>
>+#define PTE_SHIFT	(PAGE_SHIFT - PTE_T_LOG2)	/* full page */
>+
> #ifndef __ASSEMBLY__
> /*
>  * The basic type of a PTE - 64 bits for those CPUs with > 32 bit
>@@ -26,10 +28,8 @@
>  */
> #ifdef CONFIG_PTE_64BIT
> typedef unsigned long long pte_basic_t;
>-#define PTE_SHIFT	(PAGE_SHIFT - 3)	/* 512 ptes per page */
> #else
> typedef unsigned long pte_basic_t;
>-#define PTE_SHIFT	(PAGE_SHIFT - 2)	/* 1024 ptes per page */
> #endif
>
> struct page;
>@@ -39,6 +39,9 @@ extern void copy_page(void *to, void *from);
>
> #include <asm-generic/page.h>
>
>+#define PGD_T_LOG2	(__builtin_ffs(sizeof(pgd_t)) - 1)
>+#define PTE_T_LOG2	(__builtin_ffs(sizeof(pte_t)) - 1)
>+
> #endif /* __ASSEMBLY__ */
>
> #endif /* _ASM_POWERPC_PAGE_32_H */
>diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
>index 75c5dd0..0142318 100644
>--- a/arch/powerpc/kernel/asm-offsets.c
>+++ b/arch/powerpc/kernel/asm-offsets.c
>@@ -378,6 +378,10 @@ int main(void)
> 	DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
> 	DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
> #endif
>+#ifdef CONFIG_44x
>+	DEFINE(PGD_T_LOG2, PGD_T_LOG2);
>+	DEFINE(PTE_T_LOG2, PTE_T_LOG2);
>+#endif
>
> 	return 0;
> }
>diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
>index f3a1ea9..3bf6bd1 100644
>--- a/arch/powerpc/kernel/head_44x.S
>+++ b/arch/powerpc/kernel/head_44x.S
>@@ -391,12 +391,14 @@ interrupt_base:
> 	rlwimi	r13,r12,10,30,30
>
> 	/* Load the PTE */
>-	rlwinm 	r12, r10, 13, 19, 29	/* Compute pgdir/pmd offset */
>+	/* Compute pgdir/pmd offset */
>+	rlwinm  r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK_BIT, 29
> 	lwzx	r11, r12, r11		/* Get pgd/pmd entry */
> 	rlwinm.	r12, r11, 0, 0, 20	/* Extract pt base address */
> 	beq	2f			/* Bail if no table */
>
>-	rlwimi	r12, r10, 23, 20, 28	/* Compute pte address */
>+	/* Compute pte address */
>+	rlwimi  r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK_BIT, 28
> 	lwz	r11, 0(r12)		/* Get high word of pte entry */
> 	lwz	r12, 4(r12)		/* Get low word of pte entry */
>
>@@ -485,12 +487,14 @@ tlb_44x_patch_hwater_D:
> 	/* Make up the required permissions */
> 	li	r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_HWEXEC
>
>-	rlwinm	r12, r10, 13, 19, 29	/* Compute pgdir/pmd offset */
>+	/* Compute pgdir/pmd offset */
>+	rlwinm 	r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK_BIT, 29
> 	lwzx	r11, r12, r11		/* Get pgd/pmd entry */
> 	rlwinm.	r12, r11, 0, 0, 20	/* Extract pt base address */
> 	beq	2f			/* Bail if no table */
>
>-	rlwimi	r12, r10, 23, 20, 28	/* Compute pte address */
>+	/* Compute pte address */
>+	rlwimi	r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK_BIT, 28
> 	lwz	r11, 0(r12)		/* Get high word of pte entry */
> 	lwz	r12, 4(r12)		/* Get low word of pte entry */
>
>@@ -554,15 +558,16 @@ tlb_44x_patch_hwater_I:
>  */
> finish_tlb_load:
> 	/* Combine RPN & ERPN an write WS 0 */
>-	rlwimi	r11,r12,0,0,19
>+	rlwimi	r11,r12,0,0,31-PAGE_SHIFT
> 	tlbwe	r11,r13,PPC44x_TLB_XLAT
>
> 	/*
> 	 * Create WS1. This is the faulting address (EPN),
> 	 * page size, and valid flag.
> 	 */
>-	li	r11,PPC44x_TLB_VALID | PPC44x_TLB_4K
>-	rlwimi	r10,r11,0,20,31			/* Insert valid and page size*/
>+	li	r11,PPC44x_TLB_VALID | PPC44x_TLBE_SIZE
>+	/* Insert valid and page size */
>+	rlwimi	r10,r11,0,PPC44x_PTE_ADD_MASK_BIT,31
> 	tlbwe	r10,r13,PPC44x_TLB_PAGEID	/* Write PAGEID */
>
> 	/* And WS 2 */
>@@ -634,12 +639,12 @@ _GLOBAL(set_context)
>  * goes at the beginning of the data segment, which is page-aligned.
>  */
> 	.data
>-	.align	12
>+	.align	PAGE_SHIFT
> 	.globl	sdata
> sdata:
> 	.globl	empty_zero_page
> empty_zero_page:
>-	.space	4096
>+	.space	PAGE_SIZE
>
> /*
>  * To support >32-bit physical addresses, we use an 8KB pgdir.
>diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
>index bdc8b0e..950b708 100644
>--- a/arch/powerpc/kernel/misc_32.S
>+++ b/arch/powerpc/kernel/misc_32.S
>@@ -647,8 +647,8 @@ _GLOBAL(__flush_dcache_icache)
> BEGIN_FTR_SECTION
> 	blr
> END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
>-	rlwinm	r3,r3,0,0,19			/* Get page base address */
>-	li	r4,4096/L1_CACHE_BYTES	/* Number of lines in a page */
>+	rlwinm	r3,r3,0,0,31-PAGE_SHIFT		/* Get page base address */
>+	li	r4,PAGE_SIZE/L1_CACHE_BYTES	/* Number of lines in a page */
> 	mtctr	r4
> 	mr	r6,r3
> 0:	dcbst	0,r3				/* Write line to ram */
>@@ -688,8 +688,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
> 	rlwinm	r0,r10,0,28,26			/* clear DR */
> 	mtmsr	r0
> 	isync
>-	rlwinm	r3,r3,0,0,19			/* Get page base address */
>-	li	r4,4096/L1_CACHE_BYTES	/* Number of lines in a page */
>+	rlwinm	r3,r3,0,0,31-PAGE_SHIFT		/* Get page base address */
>+	li	r4,PAGE_SIZE/L1_CACHE_BYTES	/* Number of lines in a page */
> 	mtctr	r4
> 	mr	r6,r3
> 0:	dcbst	0,r3				/* Write line to ram */
>@@ -713,7 +713,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
>  * void clear_pages(void *page, int order) ;
>  */
> _GLOBAL(clear_pages)
>-	li	r0,4096/L1_CACHE_BYTES
>+	li	r0,PAGE_SIZE/L1_CACHE_BYTES
> 	slw	r0,r0,r4
> 	mtctr	r0
> #ifdef CONFIG_8xx
>@@ -771,7 +771,7 @@ _GLOBAL(copy_page)
> 	dcbt	r5,r4
> 	li	r11,L1_CACHE_BYTES+4
> #endif /* MAX_COPY_PREFETCH */
>-	li	r0,4096/L1_CACHE_BYTES - MAX_COPY_PREFETCH
>+	li	r0,PAGE_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH
> 	crclr	4*cr0+eq
> 2:
> 	mtctr	r0
>diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
>index c31d6d2..f966a5e 100644
>--- a/arch/powerpc/mm/pgtable_32.c
>+++ b/arch/powerpc/mm/pgtable_32.c
>@@ -72,24 +72,29 @@ extern unsigned long p_mapped_by_tlbcam(unsigned long pa);
> #define p_mapped_by_tlbcam(x)	(0UL)
> #endif /* HAVE_TLBCAM */
>
>-#ifdef CONFIG_PTE_64BIT
>-/* Some processors use an 8kB pgdir because they have 8-byte Linux PTEs. */
>-#define PGDIR_ORDER	1
>-#else
>-#define PGDIR_ORDER	0
>-#endif
>+#define PGDIR_ORDER	(32 + PGD_T_LOG2 - PGDIR_SHIFT)
>
> pgd_t *pgd_alloc(struct mm_struct *mm)
> {
> 	pgd_t *ret;
>
>-	ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
>+	/* pgdir take page or two with 4K pages and a page fraction otherwise */
>+#ifndef CONFIG_PPC_4K_PAGES
>+	ret = (pgd_t *)kzalloc(1 << PGDIR_ORDER, GFP_KERNEL);
>+#else
>+	ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
>+			PGDIR_ORDER - PAGE_SHIFT);
>+#endif
> 	return ret;
> }
>
> void pgd_free(struct mm_struct *mm, pgd_t *pgd)
> {
>-	free_pages((unsigned long)pgd, PGDIR_ORDER);
>+#ifndef CONFIG_PPC_4K_PAGES
>+	kfree((void *)pgd);
>+#else
>+	free_pages((unsigned long)pgd, PGDIR_ORDER - PAGE_SHIFT);
>+#endif
> }
>
> __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
>@@ -400,7 +405,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
> #endif /* CONFIG_DEBUG_PAGEALLOC */
>
> static int fixmaps;
>-unsigned long FIXADDR_TOP = 0xfffff000;
>+unsigned long FIXADDR_TOP = (-PAGE_SIZE);
> EXPORT_SYMBOL(FIXADDR_TOP);
>
> void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
>diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
>index 548efa5..51098bc 100644
>--- a/arch/powerpc/platforms/Kconfig.cputype
>+++ b/arch/powerpc/platforms/Kconfig.cputype
>@@ -204,7 +204,7 @@ config PPC_STD_MMU_32
>
> config PPC_MM_SLICES
> 	bool
>-	default y if HUGETLB_PAGE || PPC_64K_PAGES
>+	default y if HUGETLB_PAGE || (PPC_STD_MMU_64 && PPC_64K_PAGES)
> 	default n
>
> config VIRT_CPU_ACCOUNTING
>-- 
>1.5.6.1
>
>_______________________________________________
>Linuxppc-dev mailing list
>Linuxppc-dev@ozlabs.org
>https://ozlabs.org/mailman/listinfo/linuxppc-dev
Josh Boyer - Dec. 24, 2008, 5:03 p.m.
On Wed, Dec 17, 2008 at 02:56:07PM -0500, Josh Boyer wrote:
>On Thu, Dec 11, 2008 at 04:55:41AM +0300, Ilya Yanok wrote:
>>This patch adds support for page sizes bigger than 4K (16K/64K) on
>>PPC 44x.
>>PGDIR table is much smaller than page in case of 16K/64K pages (512
>>and 32 bytes resp.) so we allocate PGDIR with kzalloc() instead of
>>__get_free_pages().
>>PTE table covers rather big memory area in case of 16K/64K pages
>>(32MB and 512MB resp.) so we can easily put FIXMAP and PKMAP in
>>area covered by one PTE table.
>>
>>Signed-off-by: Yuri Tikhonov <yur@emcraft.com>
>>Signed-off-by: Vladimir Panfilov <pvr@emcraft.com>
>>Signed-off-by: Ilya Yanok <yanok@emcraft.com>
>
>I tested this a bit today on a Bamboo board.  Overall, it functioned
>well enough to not crash :).  Note that I also included Hollis'
>memory size alignment patch which is required.
>
>The code looks pretty clean now.  I think if we're going to include
>this patch it should go in now.
>
>Acked-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>

Ben, Paul,

What else is needed to get this patch included?

josh
Benjamin Herrenschmidt - Dec. 26, 2008, 9:22 p.m.
> >The code looks pretty clean now.  I think if we're going to include
> >this patch it should go in now.
> >
> >Acked-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
> 
> Ben, Paul,
> 
> What else is needed to get this patch included?

Can you remind us the pre-req ?

Cheers,
Ben.
Josh Boyer - Dec. 27, 2008, 12:05 p.m.
On Sat, Dec 27, 2008 at 08:22:55AM +1100, Benjamin Herrenschmidt wrote:
>
>> >The code looks pretty clean now.  I think if we're going to include
>> >this patch it should go in now.
>> >
>> >Acked-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
>> 
>> Ben, Paul,
>> 
>> What else is needed to get this patch included?
>
>Can you remind us the pre-req ?

http://patchwork.ozlabs.org/patch/10951/

josh

Patch

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 525c13a..cd8ff7c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -401,23 +401,53 @@  config PPC_HAS_HASH_64K
 	depends on PPC64
 	default n
 
-config PPC_64K_PAGES
-	bool "64k page size"
-	depends on PPC64
-	select PPC_HAS_HASH_64K
+choice
+	prompt "Page size"
+	default PPC_4K_PAGES
 	help
-	  This option changes the kernel logical page size to 64k. On machines
-	  without processor support for 64k pages, the kernel will simulate
-	  them by loading each individual 4k page on demand transparently,
-	  while on hardware with such support, it will be used to map
-	  normal application pages.
+	  Select the kernel logical page size. Increasing the page size
+	  will reduce software overhead at each page boundary, allow
+	  hardware prefetch mechanisms to be more effective, and allow
+	  larger dma transfers increasing IO efficiency and reducing
+	  overhead. However the utilization of memory will increase.
+	  For example, each cached file will using a multiple of the
+	  page size to hold its contents and the difference between the
+	  end of file and the end of page is wasted.
+
+	  Some dedicated systems, such as software raid serving with
+	  accelerated calculations, have shown significant increases.
+
+	  If you configure a 64 bit kernel for 64k pages but the
+	  processor does not support them, then the kernel will simulate
+	  them with 4k pages, loading them on demand, but with the
+	  reduced software overhead and larger internal fragmentation.
+	  For the 32 bit kernel, a large page option will not be offered
+	  unless it is supported by the configured processor.
+
+	  If unsure, choose 4K_PAGES.
+
+config PPC_4K_PAGES
+	bool "4k page size"
+
+config PPC_16K_PAGES
+	bool "16k page size" if 44x
+
+config PPC_64K_PAGES
+	bool "64k page size" if 44x || PPC_STD_MMU_64
+	select PPC_HAS_HASH_64K if PPC_STD_MMU_64
+
+endchoice
 
 config FORCE_MAX_ZONEORDER
 	int "Maximum zone order"
-	range 9 64 if PPC_64K_PAGES
-	default "9" if PPC_64K_PAGES
-	range 13 64 if PPC64 && !PPC_64K_PAGES
-	default "13" if PPC64 && !PPC_64K_PAGES
+	range 9 64 if PPC_STD_MMU_64 && PPC_64K_PAGES
+	default "9" if PPC_STD_MMU_64 && PPC_64K_PAGES
+	range 13 64 if PPC_STD_MMU_64 && !PPC_64K_PAGES
+	default "13" if PPC_STD_MMU_64 && !PPC_64K_PAGES
+	range 9 64 if PPC_STD_MMU_32 && PPC_16K_PAGES
+	default "9" if PPC_STD_MMU_32 && PPC_16K_PAGES
+	range 7 64 if PPC_STD_MMU_32 && PPC_64K_PAGES
+	default "7" if PPC_STD_MMU_32 && PPC_64K_PAGES
 	range 11 64
 	default "11"
 	help
@@ -437,7 +467,7 @@  config FORCE_MAX_ZONEORDER
 
 config PPC_SUBPAGE_PROT
 	bool "Support setting protections for 4k subpages"
-	depends on PPC_64K_PAGES
+	depends on PPC_STD_MMU_64 && PPC_64K_PAGES
 	help
 	  This option adds support for a system call to allow user programs
 	  to set access permissions (read/write, readonly, or no access)
diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h
index 91c5895..7d6bb37 100644
--- a/arch/powerpc/include/asm/highmem.h
+++ b/arch/powerpc/include/asm/highmem.h
@@ -38,9 +38,24 @@  extern pte_t *pkmap_page_table;
  * easily, subsequent pte tables have to be allocated in one physical
  * chunk of RAM.
  */
-#define LAST_PKMAP 	(1 << PTE_SHIFT)
-#define LAST_PKMAP_MASK (LAST_PKMAP-1)
+/*
+ * We use one full pte table with 4K pages. And with 16K/64K pages pte
+ * table covers enough memory (32MB and 512MB resp.) that both FIXMAP
+ * and PKMAP can be placed in single pte table. We use 1024 pages for
+ * PKMAP in case of 16K/64K pages.
+ */
+#ifdef CONFIG_PPC_4K_PAGES
+#define PKMAP_ORDER	PTE_SHIFT
+#else
+#define PKMAP_ORDER	10
+#endif
+#define LAST_PKMAP	(1 << PKMAP_ORDER)
+#ifndef CONFIG_PPC_4K_PAGES
+#define PKMAP_BASE	(FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1))
+#else
 #define PKMAP_BASE	((FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) & PMD_MASK)
+#endif
+#define LAST_PKMAP_MASK	(LAST_PKMAP-1)
 #define PKMAP_NR(virt)  ((virt-PKMAP_BASE) >> PAGE_SHIFT)
 #define PKMAP_ADDR(nr)  (PKMAP_BASE + ((nr) << PAGE_SHIFT))
 
diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h
index a825524..73e1909 100644
--- a/arch/powerpc/include/asm/mmu-44x.h
+++ b/arch/powerpc/include/asm/mmu-44x.h
@@ -4,6 +4,8 @@ 
  * PPC440 support
  */
 
+#include <asm/page.h>
+
 #define PPC44x_MMUCR_TID	0x000000ff
 #define PPC44x_MMUCR_STS	0x00010000
 
@@ -73,4 +75,19 @@  typedef struct {
 /* Size of the TLBs used for pinning in lowmem */
 #define PPC_PIN_SIZE	(1 << 28)	/* 256M */
 
+#if (PAGE_SHIFT == 12)
+#define PPC44x_TLBE_SIZE	PPC44x_TLB_4K
+#elif (PAGE_SHIFT == 14)
+#define PPC44x_TLBE_SIZE	PPC44x_TLB_16K
+#elif (PAGE_SHIFT == 16)
+#define PPC44x_TLBE_SIZE	PPC44x_TLB_64K
+#else
+#error "Unsupported PAGE_SIZE"
+#endif
+
+#define PPC44x_PGD_OFF_SHIFT	(32 - PGDIR_SHIFT + PGD_T_LOG2)
+#define PPC44x_PGD_OFF_MASK_BIT	(PGDIR_SHIFT - PGD_T_LOG2)
+#define PPC44x_PTE_ADD_SHIFT	(32 - PGDIR_SHIFT + PTE_SHIFT + PTE_T_LOG2)
+#define PPC44x_PTE_ADD_MASK_BIT	(32 - PTE_T_LOG2 - PTE_SHIFT)
+
 #endif /* _ASM_POWERPC_MMU_44X_H_ */
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index c0b8d4a..197d569 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -19,12 +19,15 @@ 
 #include <asm/kdump.h>
 
 /*
- * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software
+ * On regular PPC32 page size is 4K (but we support 4K/16K/64K pages
+ * on PPC44x). For PPC64 we support either 4K or 64K software
  * page size. When using 64K pages however, whether we are really supporting
  * 64K pages in HW or not is irrelevant to those definitions.
  */
-#ifdef CONFIG_PPC_64K_PAGES
+#if defined(CONFIG_PPC_64K_PAGES)
 #define PAGE_SHIFT		16
+#elif defined(CONFIG_PPC_16K_PAGES)
+#define PAGE_SHIFT		14
 #else
 #define PAGE_SHIFT		12
 #endif
@@ -151,7 +154,7 @@  typedef struct { pte_basic_t pte; } pte_t;
 /* 64k pages additionally define a bigger "real PTE" type that gathers
  * the "second half" part of the PTE for pseudo 64k pages
  */
-#ifdef CONFIG_PPC_64K_PAGES
+#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
 typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
 #else
 typedef struct { pte_t pte; } real_pte_t;
@@ -191,10 +194,10 @@  typedef pte_basic_t pte_t;
 #define pte_val(x)	(x)
 #define __pte(x)	(x)
 
-#ifdef CONFIG_PPC_64K_PAGES
+#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
 typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
 #else
-typedef unsigned long real_pte_t;
+typedef pte_t real_pte_t;
 #endif
 
 
diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
index d77072a..1458d95 100644
--- a/arch/powerpc/include/asm/page_32.h
+++ b/arch/powerpc/include/asm/page_32.h
@@ -19,6 +19,8 @@ 
 #define PTE_FLAGS_OFFSET	0
 #endif
 
+#define PTE_SHIFT	(PAGE_SHIFT - PTE_T_LOG2)	/* full page */
+
 #ifndef __ASSEMBLY__
 /*
  * The basic type of a PTE - 64 bits for those CPUs with > 32 bit
@@ -26,10 +28,8 @@ 
  */
 #ifdef CONFIG_PTE_64BIT
 typedef unsigned long long pte_basic_t;
-#define PTE_SHIFT	(PAGE_SHIFT - 3)	/* 512 ptes per page */
 #else
 typedef unsigned long pte_basic_t;
-#define PTE_SHIFT	(PAGE_SHIFT - 2)	/* 1024 ptes per page */
 #endif
 
 struct page;
@@ -39,6 +39,9 @@  extern void copy_page(void *to, void *from);
 
 #include <asm-generic/page.h>
 
+#define PGD_T_LOG2	(__builtin_ffs(sizeof(pgd_t)) - 1)
+#define PTE_T_LOG2	(__builtin_ffs(sizeof(pte_t)) - 1)
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_PAGE_32_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 75c5dd0..0142318 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -378,6 +378,10 @@  int main(void)
 	DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
 	DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
 #endif
+#ifdef CONFIG_44x
+	DEFINE(PGD_T_LOG2, PGD_T_LOG2);
+	DEFINE(PTE_T_LOG2, PTE_T_LOG2);
+#endif
 
 	return 0;
 }
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index f3a1ea9..3bf6bd1 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -391,12 +391,14 @@  interrupt_base:
 	rlwimi	r13,r12,10,30,30
 
 	/* Load the PTE */
-	rlwinm 	r12, r10, 13, 19, 29	/* Compute pgdir/pmd offset */
+	/* Compute pgdir/pmd offset */
+	rlwinm  r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK_BIT, 29
 	lwzx	r11, r12, r11		/* Get pgd/pmd entry */
 	rlwinm.	r12, r11, 0, 0, 20	/* Extract pt base address */
 	beq	2f			/* Bail if no table */
 
-	rlwimi	r12, r10, 23, 20, 28	/* Compute pte address */
+	/* Compute pte address */
+	rlwimi  r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK_BIT, 28
 	lwz	r11, 0(r12)		/* Get high word of pte entry */
 	lwz	r12, 4(r12)		/* Get low word of pte entry */
 
@@ -485,12 +487,14 @@  tlb_44x_patch_hwater_D:
 	/* Make up the required permissions */
 	li	r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_HWEXEC
 
-	rlwinm	r12, r10, 13, 19, 29	/* Compute pgdir/pmd offset */
+	/* Compute pgdir/pmd offset */
+	rlwinm 	r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK_BIT, 29
 	lwzx	r11, r12, r11		/* Get pgd/pmd entry */
 	rlwinm.	r12, r11, 0, 0, 20	/* Extract pt base address */
 	beq	2f			/* Bail if no table */
 
-	rlwimi	r12, r10, 23, 20, 28	/* Compute pte address */
+	/* Compute pte address */
+	rlwimi	r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK_BIT, 28
 	lwz	r11, 0(r12)		/* Get high word of pte entry */
 	lwz	r12, 4(r12)		/* Get low word of pte entry */
 
@@ -554,15 +558,16 @@  tlb_44x_patch_hwater_I:
  */
 finish_tlb_load:
 	/* Combine RPN & ERPN an write WS 0 */
-	rlwimi	r11,r12,0,0,19
+	rlwimi	r11,r12,0,0,31-PAGE_SHIFT
 	tlbwe	r11,r13,PPC44x_TLB_XLAT
 
 	/*
 	 * Create WS1. This is the faulting address (EPN),
 	 * page size, and valid flag.
 	 */
-	li	r11,PPC44x_TLB_VALID | PPC44x_TLB_4K
-	rlwimi	r10,r11,0,20,31			/* Insert valid and page size*/
+	li	r11,PPC44x_TLB_VALID | PPC44x_TLBE_SIZE
+	/* Insert valid and page size */
+	rlwimi	r10,r11,0,PPC44x_PTE_ADD_MASK_BIT,31
 	tlbwe	r10,r13,PPC44x_TLB_PAGEID	/* Write PAGEID */
 
 	/* And WS 2 */
@@ -634,12 +639,12 @@  _GLOBAL(set_context)
  * goes at the beginning of the data segment, which is page-aligned.
  */
 	.data
-	.align	12
+	.align	PAGE_SHIFT
 	.globl	sdata
 sdata:
 	.globl	empty_zero_page
 empty_zero_page:
-	.space	4096
+	.space	PAGE_SIZE
 
 /*
  * To support >32-bit physical addresses, we use an 8KB pgdir.
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index bdc8b0e..950b708 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -647,8 +647,8 @@  _GLOBAL(__flush_dcache_icache)
 BEGIN_FTR_SECTION
 	blr
 END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
-	rlwinm	r3,r3,0,0,19			/* Get page base address */
-	li	r4,4096/L1_CACHE_BYTES	/* Number of lines in a page */
+	rlwinm	r3,r3,0,0,31-PAGE_SHIFT		/* Get page base address */
+	li	r4,PAGE_SIZE/L1_CACHE_BYTES	/* Number of lines in a page */
 	mtctr	r4
 	mr	r6,r3
 0:	dcbst	0,r3				/* Write line to ram */
@@ -688,8 +688,8 @@  END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
 	rlwinm	r0,r10,0,28,26			/* clear DR */
 	mtmsr	r0
 	isync
-	rlwinm	r3,r3,0,0,19			/* Get page base address */
-	li	r4,4096/L1_CACHE_BYTES	/* Number of lines in a page */
+	rlwinm	r3,r3,0,0,31-PAGE_SHIFT		/* Get page base address */
+	li	r4,PAGE_SIZE/L1_CACHE_BYTES	/* Number of lines in a page */
 	mtctr	r4
 	mr	r6,r3
 0:	dcbst	0,r3				/* Write line to ram */
@@ -713,7 +713,7 @@  END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
  * void clear_pages(void *page, int order) ;
  */
 _GLOBAL(clear_pages)
-	li	r0,4096/L1_CACHE_BYTES
+	li	r0,PAGE_SIZE/L1_CACHE_BYTES
 	slw	r0,r0,r4
 	mtctr	r0
 #ifdef CONFIG_8xx
@@ -771,7 +771,7 @@  _GLOBAL(copy_page)
 	dcbt	r5,r4
 	li	r11,L1_CACHE_BYTES+4
 #endif /* MAX_COPY_PREFETCH */
-	li	r0,4096/L1_CACHE_BYTES - MAX_COPY_PREFETCH
+	li	r0,PAGE_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH
 	crclr	4*cr0+eq
 2:
 	mtctr	r0
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index c31d6d2..f966a5e 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -72,24 +72,29 @@  extern unsigned long p_mapped_by_tlbcam(unsigned long pa);
 #define p_mapped_by_tlbcam(x)	(0UL)
 #endif /* HAVE_TLBCAM */
 
-#ifdef CONFIG_PTE_64BIT
-/* Some processors use an 8kB pgdir because they have 8-byte Linux PTEs. */
-#define PGDIR_ORDER	1
-#else
-#define PGDIR_ORDER	0
-#endif
+#define PGDIR_ORDER	(32 + PGD_T_LOG2 - PGDIR_SHIFT)
 
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	pgd_t *ret;
 
-	ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
+	/* pgdir take page or two with 4K pages and a page fraction otherwise */
+#ifndef CONFIG_PPC_4K_PAGES
+	ret = (pgd_t *)kzalloc(1 << PGDIR_ORDER, GFP_KERNEL);
+#else
+	ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
+			PGDIR_ORDER - PAGE_SHIFT);
+#endif
 	return ret;
 }
 
 void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
-	free_pages((unsigned long)pgd, PGDIR_ORDER);
+#ifndef CONFIG_PPC_4K_PAGES
+	kfree((void *)pgd);
+#else
+	free_pages((unsigned long)pgd, PGDIR_ORDER - PAGE_SHIFT);
+#endif
 }
 
 __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
@@ -400,7 +405,7 @@  void kernel_map_pages(struct page *page, int numpages, int enable)
 #endif /* CONFIG_DEBUG_PAGEALLOC */
 
 static int fixmaps;
-unsigned long FIXADDR_TOP = 0xfffff000;
+unsigned long FIXADDR_TOP = (-PAGE_SIZE);
 EXPORT_SYMBOL(FIXADDR_TOP);
 
 void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 548efa5..51098bc 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -204,7 +204,7 @@  config PPC_STD_MMU_32
 
 config PPC_MM_SLICES
 	bool
-	default y if HUGETLB_PAGE || PPC_64K_PAGES
+	default y if HUGETLB_PAGE || (PPC_STD_MMU_64 && PPC_64K_PAGES)
 	default n
 
 config VIRT_CPU_ACCOUNTING