Patchwork powerpc: add support for PAGE_SIZEs greater than 4KB for

login
register
mail settings
Submitter Ilya Yanok
Date Sept. 10, 2008, 9:53 p.m.
Message ID <1221083587-8091-2-git-send-email-yanok@emcraft.com>
Download mbox | patch
Permalink /patch/232/
State Changes Requested
Headers show

Comments

Ilya Yanok - Sept. 10, 2008, 9:53 p.m.
This patch adds support for page sizes bigger than 4KB (16KB/64KB/256KB) on
PPC 44x.

Signed-off-by: Yuri Tikhonov <yur@emcraft.com>
Signed-off-by: Ilya Yanok <yanok@emcraft.com>
prodyut hazarika - Sept. 11, 2008, 4:57 p.m.
I was planning to post a similar patch. Good that you already posted
it :-) I will try to finish off similar patch for 40x processors.

>
> +choice
> +       prompt "Page size"
> +       depends on 44x && PPC32
> +       default PPC32_4K_PAGES
> +       help
> +         The PAGE_SIZE definition. Increasing the page size may
> +         improve the system performance in some dedicated cases.
> +         If unsure, set it to 4 KB.
> +
You should mention an example of dedicated cases (eg. RAID).
I think this help should mention that for page size 256KB, you will
need to have a special version of binutils, since the ELF standard
mentions page sizes only upto 64KB.

> -#ifdef CONFIG_PPC_64K_PAGES
> +#if defined(CONFIG_PPC32_256K_PAGES)
> +#define PAGE_SHIFT             18
> +#elif defined(CONFIG_PPC32_64K_PAGES) || defined(CONFIG_PPC_64K_PAGES)
>  #define PAGE_SHIFT             16
> +#elif defined(CONFIG_PPC32_16K_PAGES)
> +#define PAGE_SHIFT             14
>  #else
>  #define PAGE_SHIFT             12
>  #endif

Why should the new defines be inside CONFIG_PPC_64K_PAGES? The
definition CONFIG_PPC_64K_PAGES is repeated.
Shouldn't these defines be like this:
#if defined(CONFIG_PPC32_256K_PAGES)
#define PAGE_SHIFT             18
#elif defined(CONFIG_PPC32_64K_PAGES) || defined(CONFIG_PPC_64K_PAGES)
#define PAGE_SHIFT             16
#elif defined(CONFIG_PPC32_16K_PAGES)
#define PAGE_SHIFT             14
#else
#define PAGE_SHIFT             12
#endif

> +#elif (PAGE_SHIFT == 14)
> +/*
> + * PAGE_SIZE  16K
> + * PAGE_SHIFT 14
> + * PTE_SHIFT  11
> + * PMD_SHIFT  25
> + */
> +#define PPC44x_TLBE_SIZE       PPC44x_TLB_16K
> +#define PPC44x_PGD_OFF_SH      9  /*(32 - PMD_SHIFT + 2)*/
> +#define PPC44x_PGD_OFF_M1      23 /*(PMD_SHIFT - 2)*/
> +#define PPC44x_PTE_ADD_SH      21 /*32 - PMD_SHIFT + PTE_SHIFT + 3*/
> +#define PPC44x_PTE_ADD_M1      18 /*32 - 3 - PTE_SHIFT*/
> +#define PPC44x_RPN_M2          17 /*31 - PAGE_SHIFT*/

Please change PPC44x_PGD_OFF_SH to PPC44x_PGD_OFF_SHIFT. SH sounds
very confusing. I don't like the MI and M2 names too. Change
PPC44x_RPN_M2 to PPC44x_RPN_MASK. Change M1 to MASK in
PPC44x_PGD_OFF_M1 and PPC44x_PTE_ADD_M1 .
Is there no way a define like
#define PPC44x_PGD_OFF_SH      (32 - PMD_SHIFT + 2)
be used in assembly file. If yes, we can avoid repeating the defines.

I think these 44x specific defines should go to asm/mmu-44x.h since I
am planning to post a patch for 40x. For those processors, the defines
below will changes as:
#define PPC44x_PTE_ADD_SH      (32 - PMD_SHIFT + PTE_SHIFT + 2)
#define PPC44x_PTE_ADD_M1      (32 - 2 - PTE_SHIFT)
Since these defines are not generic, they should be put in the mmu
specific header file rather than adding a new header file. When 40x
processors are supported, the corresponding defines can go to
include/asm/mmu-40x.h

> +#elif (PAGE_SHIFT == 18)
> +/*
> + * PAGE_SIZE  256K
> + * PAGE_SHIFT 18
> + * PTE_SHIFT  11
> + * PMD_SHIFT  29
> + */
> +#define PPC44x_TLBE_SIZE       PPC44x_TLB_256K
> +#define PPC44x_PGD_OFF_SH      5  /*(32 - PMD_SHIFT + 2)*/
> +#define PPC44x_PGD_OFF_M1      27 /*(PMD_SHIFT - 2)*/
> +#define PPC44x_PTE_ADD_SH      17 /*32 - PMD_SHIFT + PTE_SHIFT + 3*/
> +#define PPC44x_PTE_ADD_M1      18 /*32 - 3 - PTE_SHIFT*/
> +#define PPC44x_RPN_M2          13 /*31 - PAGE_SHIFT*/

For 256KB page size, I cannot understand why PTE_SHIFT is 11. Since
each PTE entry is 8 byte, PTE_SHIFT should have been 15. But then
there would be no bits in the Effective address for the 1st level
PGDIR offset. On what basis PTE_SHIFT of 11 is chosen? This overflow
problem happens only for 256KB page size.
Yuri Tikhonov - Sept. 11, 2008, 6:15 p.m.
Hello,

On Thursday, September 11, 2008 you wrote:

> I was planning to post a similar patch. Good that you already posted
> it :-) I will try to finish off similar patch for 40x processors.

>>
>> +choice
>> +       prompt "Page size"
>> +       depends on 44x && PPC32
>> +       default PPC32_4K_PAGES
>> +       help
>> +         The PAGE_SIZE definition. Increasing the page size may
>> +         improve the system performance in some dedicated cases.
>> +         If unsure, set it to 4 KB.
>> +
> You should mention an example of dedicated cases (eg. RAID).

ACK.

> I think this help should mention that for page size 256KB, you will
> need to have a special version of binutils, since the ELF standard
> mentions page sizes only upto 64KB.

 Right. We use ELDK-4.2 for compiling applications to be run on 256K
PAGE_SIZE kernel. This toolchain includes necessary changes for
ELF_MAXPAGESIZE in binutils/bfd/elf32-ppc.c.

>> -#ifdef CONFIG_PPC_64K_PAGES
>> +#if defined(CONFIG_PPC32_256K_PAGES)
>> +#define PAGE_SHIFT             18
>> +#elif defined(CONFIG_PPC32_64K_PAGES) || defined(CONFIG_PPC_64K_PAGES)
>>  #define PAGE_SHIFT             16
>> +#elif defined(CONFIG_PPC32_16K_PAGES)
>> +#define PAGE_SHIFT             14
>>  #else
>>  #define PAGE_SHIFT             12
>>  #endif

> Why should the new defines be inside CONFIG_PPC_64K_PAGES? The
> definition CONFIG_PPC_64K_PAGES is repeated.

 We decided to introduce new CONFIG_PPC32_64K_PAGES option to
distinguish using 64K pages on PPC32 and PPC64, so PAGE_SHIFT will be
defined as 16 when the CONFIG_PPC_64K_PAGES option is set on some PPC64
platform, and as 16 when the CONFIG_PPC32_64K_PAGES option is set on
some ppc44x PPC32 platform.

> Shouldn't these defines be like this:
> #if defined(CONFIG_PPC32_256K_PAGES)
> #define PAGE_SHIFT             18
> #elif defined(CONFIG_PPC32_64K_PAGES) || defined(CONFIG_PPC_64K_PAGES)
> #define PAGE_SHIFT             16
> #elif defined(CONFIG_PPC32_16K_PAGES)
> #define PAGE_SHIFT             14
> #else
> #define PAGE_SHIFT             12
> #endif

 Admittedly, I don't see the difference between your version and
Ilya's one. Am I missing something ?

>> +#elif (PAGE_SHIFT == 14)
>> +/*
>> + * PAGE_SIZE  16K
>> + * PAGE_SHIFT 14
>> + * PTE_SHIFT  11
>> + * PMD_SHIFT  25
>> + */
>> +#define PPC44x_TLBE_SIZE       PPC44x_TLB_16K
>> +#define PPC44x_PGD_OFF_SH      9  /*(32 - PMD_SHIFT + 2)*/
>> +#define PPC44x_PGD_OFF_M1      23 /*(PMD_SHIFT - 2)*/
>> +#define PPC44x_PTE_ADD_SH      21 /*32 - PMD_SHIFT + PTE_SHIFT + 3*/
>> +#define PPC44x_PTE_ADD_M1      18 /*32 - 3 - PTE_SHIFT*/
>> +#define PPC44x_RPN_M2          17 /*31 - PAGE_SHIFT*/

> Please change PPC44x_PGD_OFF_SH to PPC44x_PGD_OFF_SHIFT. SH sounds
> very confusing. I don't like the MI and M2 names too. Change
> PPC44x_RPN_M2 to PPC44x_RPN_MASK. Change M1 to MASK in
> PPC44x_PGD_OFF_M1 and PPC44x_PTE_ADD_M1 .
> Is there no way a define like
> #define PPC44x_PGD_OFF_SH      (32 - PMD_SHIFT + 2)
> be used in assembly file. If yes, we can avoid repeating the defines.

> I think these 44x specific defines should go to asm/mmu-44x.h since I
> am planning to post a patch for 40x. For those processors, the defines
> below will changes as:
> #define PPC44x_PTE_ADD_SH      (32 - PMD_SHIFT + PTE_SHIFT + 2)
> #define PPC44x_PTE_ADD_M1      (32 - 2 - PTE_SHIFT)
> Since these defines are not generic, they should be put in the mmu
> specific header file rather than adding a new header file. When 40x
> processors are supported, the corresponding defines can go to
> include/asm/mmu-40x.h

>> +#elif (PAGE_SHIFT == 18)
>> +/*
>> + * PAGE_SIZE  256K
>> + * PAGE_SHIFT 18
>> + * PTE_SHIFT  11
>> + * PMD_SHIFT  29
>> + */
>> +#define PPC44x_TLBE_SIZE       PPC44x_TLB_256K
>> +#define PPC44x_PGD_OFF_SH      5  /*(32 - PMD_SHIFT + 2)*/
>> +#define PPC44x_PGD_OFF_M1      27 /*(PMD_SHIFT - 2)*/
>> +#define PPC44x_PTE_ADD_SH      17 /*32 - PMD_SHIFT + PTE_SHIFT + 3*/
>> +#define PPC44x_PTE_ADD_M1      18 /*32 - 3 - PTE_SHIFT*/
>> +#define PPC44x_RPN_M2          13 /*31 - PAGE_SHIFT*/

> For 256KB page size, I cannot understand why PTE_SHIFT is 11. Since
> each PTE entry is 8 byte, PTE_SHIFT should have been 15. But then
> there would be no bits in the Effective address for the 1st level
> PGDIR offset. On what basis PTE_SHIFT of 11 is chosen? This overflow
> problem happens only for 256KB page size.

 We should use smaller PTE area in address to free some bits for PGDIR
part. I guess the only impact this approach has is ineffective usage
of memory pages allocated for PTE tables, since having PTE_SHIFT of 11
we use only 1/16 of pages with PTEs.

 Regards, Yuri

 --
 Yuri Tikhonov, Senior Software Engineer
 Emcraft Systems, www.emcraft.com
Ilya Yanok - Sept. 11, 2008, 6:28 p.m.
Hi,

prodyut hazarika wrote:
>> +choice
>> +       prompt "Page size"
>> +       depends on 44x && PPC32
>> +       default PPC32_4K_PAGES
>> +       help
>> +         The PAGE_SIZE definition. Increasing the page size may
>> +         improve the system performance in some dedicated cases.
>> +         If unsure, set it to 4 KB.
>> +
>>     
> You should mention an example of dedicated cases (eg. RAID).
> I think this help should mention that for page size 256KB, you will
> need to have a special version of binutils, since the ELF standard
> mentions page sizes only upto 64KB.
>   

Agreed.

>> -#ifdef CONFIG_PPC_64K_PAGES
>> +#if defined(CONFIG_PPC32_256K_PAGES)
>> +#define PAGE_SHIFT             18
>> +#elif defined(CONFIG_PPC32_64K_PAGES) || defined(CONFIG_PPC_64K_PAGES)
>>  #define PAGE_SHIFT             16
>> +#elif defined(CONFIG_PPC32_16K_PAGES)
>> +#define PAGE_SHIFT             14
>>  #else
>>  #define PAGE_SHIFT             12
>>  #endif
>>     
>
> Why should the new defines be inside CONFIG_PPC_64K_PAGES? The
>   

I think you missed first '-' on the first line.

> definition CONFIG_PPC_64K_PAGES is repeated.
> Shouldn't these defines be like this:
> #if defined(CONFIG_PPC32_256K_PAGES)
> #define PAGE_SHIFT             18
> #elif defined(CONFIG_PPC32_64K_PAGES) || defined(CONFIG_PPC_64K_PAGES)
> #define PAGE_SHIFT             16
> #elif defined(CONFIG_PPC32_16K_PAGES)
> #define PAGE_SHIFT             14
> #else
> #define PAGE_SHIFT             12
> #endif
>   

And they do actually :)

> Please change PPC44x_PGD_OFF_SH to PPC44x_PGD_OFF_SHIFT. SH sounds
> very confusing. I don't like the MI and M2 names too. Change
> PPC44x_RPN_M2 to PPC44x_RPN_MASK. Change M1 to MASK in
> PPC44x_PGD_OFF_M1 and PPC44x_PTE_ADD_M1 .
>   

Agreed.

> Is there no way a define like
> #define PPC44x_PGD_OFF_SH      (32 - PMD_SHIFT + 2)
> be used in assembly file. If yes, we can avoid repeating the defines.
>   

We can use defined like this, problem is that PMD_SHIFT and PTE_SHIFT
declared inside #ifndef __ASSEMBLY__

> I think these 44x specific defines should go to asm/mmu-44x.h since I
>   

Agreed.

> For 256KB page size, I cannot understand why PTE_SHIFT is 11. Since
> each PTE entry is 8 byte, PTE_SHIFT should have been 15. But then
> there would be no bits in the Effective address for the 1st level
> PGDIR offset. On what basis PTE_SHIFT of 11 is chosen? This overflow
> problem happens only for 256KB page size.
>   

I think Yuri has commented on this already.

Any comments on the issues mentioned in introductory message?

Regards, Ilya.
prodyut hazarika - Sept. 11, 2008, 6:38 p.m.
>
> I think you missed first '-' on the first line.
>
I was not too careful :-)


>> I think these 44x specific defines should go to asm/mmu-44x.h since I
>>
>
> Agreed.
>
It would be great to have user-friendly names.
Also moving to the mmu-4xx specific header files hides the changes to 4xx files.


>> For 256KB page size, I cannot understand why PTE_SHIFT is 11. Since
>> each PTE entry is 8 byte, PTE_SHIFT should have been 15. But then
>> there would be no bits in the Effective address for the 1st level
>> PGDIR offset. On what basis PTE_SHIFT of 11 is chosen? This overflow
>> problem happens only for 256KB page size.
>>
>
> I think Yuri has commented on this already.
>
Thanks.

In file arch/powerpc/mm/pgtable_32.c, we have:

#ifdef CONFIG_PTE_64BIT
/* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */
#define PGDIR_ORDER     1
#else
#define PGDIR_ORDER     0
#endif
pgd_t *pgd_alloc(struct mm_struct *mm)
{
        pgd_t *ret;

        ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
        return ret;
}

Thus, we allocate 2 pages for 44x processors for PGD. This is needed
only for 4K page.
We are anyway not using the whole 64K or 256K page for the PGD. So
there is no point to waste an additional 64K or 256KB page

Change this to:
#ifdef CONFIG_PTE_64BIT
#if (PAGE_SHIFT == 12)
/* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */
#define PGDIR_ORDER     1
#else
#define PGDIR_ORDER     0
#endif
#else
#define PGDIR_ORDER     0
#endif
prodyut hazarika - Sept. 11, 2008, 6:53 p.m.
>
> Any comments on the issues mentioned in introductory message?
>

I am reviewing the changes more thoroughly. I will give additional
comments within the next 2-3 days.
I am working on putting similar support for 405EX (Kilauea board), but
having some issues.
I will post the patch after I am done with it.

Also, it would be great if you could point me what changes are
necessary to recompile the binutils.
I would like to test the 256KB changes on my Canyonlands board. I have
got 16KB/64KB working.

Regards,
Prodyut Hazarika
Josh Boyer - Sept. 11, 2008, 8:09 p.m.
On Thu, Sep 11, 2008 at 10:15:07PM +0400, Yuri Tikhonov wrote:
>> I think this help should mention that for page size 256KB, you will
>> need to have a special version of binutils, since the ELF standard
>> mentions page sizes only upto 64KB.
>
> Right. We use ELDK-4.2 for compiling applications to be run on 256K
>PAGE_SIZE kernel. This toolchain includes necessary changes for
>ELF_MAXPAGESIZE in binutils/bfd/elf32-ppc.c.

Ok, but not everyone does.  And I think setting the page size to this
should be harder, maybe even dependent upon CONFIG_BROKEN.

I need to look over the patch a bit more, but some of the comments you've
already gotten seem valid.

josh
Ilya Yanok - Sept. 11, 2008, 9:51 p.m.
Hi,

prodyut hazarika wrote:
> Also, it would be great if you could point me what changes are
> necessary to recompile the binutils.
> I would like to test the 256KB changes on my Canyonlands board. I have
> got 16KB/64KB working.
>   

I think this should be enough:

--- binutils-2.16.1/ld/emulparams/elf32ppc.sh.orig    2007-08-21 
14:18:56.000000000 +0200
+++ binutils-2.16.1/ld/emulparams/elf32ppc.sh    2007-08-21 
14:19:42.000000000 +0200
@@ -8,7 +8,7 @@ GENERATE_PIE_SCRIPT=yes
 SCRIPT_NAME=elf
 OUTPUT_FORMAT="elf32-powerpc"
 TEXT_START_ADDR=0x01800000
-MAXPAGESIZE=0x10000
+MAXPAGESIZE=0x40000
 COMMONPAGESIZE=0x1000
 ARCH=powerpc:common
 MACHINE=
--- binutils-2.16.1/bfd/elf32-ppc.c.orig    2007-09-04 
13:11:29.000000000 +0200
+++ binutils-2.16.1/bfd/elf32-ppc.c    2007-09-04 13:10:25.000000000 +0200
@@ -6197,7 +6197,7 @@
 #ifdef __QNXTARGET__
 #define ELF_MAXPAGESIZE        0x1000
 #else
-#define ELF_MAXPAGESIZE        0x10000
+#define ELF_MAXPAGESIZE        0x40000
 #endif
 #define ELF_MINPAGESIZE        0x1000
 #define elf_info_to_howto    ppc_elf_info_to_howto

And you need to rebuild the whole RFS with patched binutils of cause.

Regards, Ilya.
prodyut hazarika - Sept. 11, 2008, 10:37 p.m.
>        /*
>         * Create WS1. This is the faulting address (EPN),
>         * page size, and valid flag.
>         */
> -       li      r11,PPC44x_TLB_VALID | PPC44x_TLB_4K
> +       li      r11,PPC44x_TLB_VALID | PPC44x_TLBE_SIZE
>        rlwimi  r10,r11,0,20,31                 /* Insert valid and page size*/
>        tlbwe   r10,r13,PPC44x_TLB_PAGEID       /* Write PAGEID */
>

Change
>        rlwimi  r10,r11,0,20,31                 /* Insert valid and page size*/
to
>        rlwimi  r10,r11,0,PPC44x_PTE_ADD_M1,31                 /* Insert valid and page size*/
Ilya Yanok - Sept. 11, 2008, 10:44 p.m.
Hi,

prodyut hazarika wrote:
> In file arch/powerpc/mm/pgtable_32.c, we have:
>
> #ifdef CONFIG_PTE_64BIT
> /* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */
> #define PGDIR_ORDER     1
> #else
> #define PGDIR_ORDER     0
> #endif
> pgd_t *pgd_alloc(struct mm_struct *mm)
> {
>         pgd_t *ret;
>
>         ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
>         return ret;
> }
>
> Thus, we allocate 2 pages for 44x processors for PGD. This is needed
> only for 4K page.
> We are anyway not using the whole 64K or 256K page for the PGD. So
> there is no point to waste an additional 64K or 256KB page
>   

Ok. Not sure I'm right but I think 16K case doesn't need second page 
too. (PGDIR_SHIFT=25, so sizeof(pgd_t)<<(32-PGDIR_SHIFT) < 16KB)

> Change this to:
> #ifdef CONFIG_PTE_64BIT
> #if (PAGE_SHIFT == 12)
>   

I think #ifdef CONFIG_PTE_64BIT is a little bit confusing here...  
Actually PGDIR_ORDER  should be something like max(32 + 2 - PGDIR_SHIFT 
- PAGE_SHIFT, 0)

> /* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */
> #define PGDIR_ORDER     1
> #else
> #define PGDIR_ORDER     0
> #endif
> #else
> #define PGDIR_ORDER     0
> #endif
>   

Yuri, any comments?

Regards, Ilya.
Yuri Tikhonov - Sept. 11, 2008, 11:20 p.m.
Hello Prodyut,

Thanks for your comments. Some answers below.

On Friday, September 12, 2008 you wrote:

>>        /*
>>         * Create WS1. This is the faulting address (EPN),
>>         * page size, and valid flag.
>>         */
>> -       li      r11,PPC44x_TLB_VALID | PPC44x_TLB_4K
>> +       li      r11,PPC44x_TLB_VALID | PPC44x_TLBE_SIZE
>>        rlwimi  r10,r11,0,20,31                 /* Insert valid and page size*/
>>        tlbwe   r10,r13,PPC44x_TLB_PAGEID       /* Write PAGEID */
>>

> Change
>>        rlwimi  r10,r11,0,20,31                 /* Insert valid and page size*/
> to
>>        rlwimi  r10,r11,0,PPC44x_PTE_ADD_M1,31                 /* Insert valid and page size*/

 Agree. We'll fix this.

 I guess this works for us, because we used the large EPN mask here
which covered more bits in EPN field of TLB entries, than it was
required for 16/64/256K PAGE_SIZE cases:

TLB Word 0 / bits 0..21:   EPN (Effective Page Number) [from 4 to 22 bits]
TLB Word 0 / bit 22 :      V (Valid bit) [1 bit]
TLB Word 0 / bits 24..27 : SIZE (Page Size) [4 bits]

 Thus, doing 'rlwimi' we masked our V/SIZE bits and cleared EPN for
all 4/16/64/256K PAGE_SIZE cases.

 Regards, Yuri

 --
 Yuri Tikhonov, Senior Software Engineer
 Emcraft Systems, www.emcraft.com
Ilya Yanok - Sept. 11, 2008, 11:38 p.m.
Hello Josh,

Josh Boyer wrote:
> Ok, but not everyone does.  And I think setting the page size to this
> should be harder, maybe even dependent upon CONFIG_BROKEN.
>   

Well, we are violating ELF standard here... CONFIG_BROKEN seems to be 
adequate for me.

> I need to look over the patch a bit more, but some of the comments you've
> already gotten seem valid.
>   

I'll address them and post updated patch in a few days.

Regards, Ilya.
Yuri Tikhonov - Sept. 11, 2008, 11:52 p.m.
Hi Ilya,

On Friday, September 12, 2008 you wrote:

> Hi,

> prodyut hazarika wrote:
>> In file arch/powerpc/mm/pgtable_32.c, we have:
>>
>> #ifdef CONFIG_PTE_64BIT
>> /* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */
>> #define PGDIR_ORDER     1
>> #else
>> #define PGDIR_ORDER     0
>> #endif
>> pgd_t *pgd_alloc(struct mm_struct *mm)
>> {
>>         pgd_t *ret;
>>
>>         ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
>>         return ret;
>> }
>>
>> Thus, we allocate 2 pages for 44x processors for PGD. This is needed
>> only for 4K page.
>> We are anyway not using the whole 64K or 256K page for the PGD. So
>> there is no point to waste an additional 64K or 256KB page
>>   

> Ok. Not sure I'm right but I think 16K case doesn't need second page 
> too. (PGDIR_SHIFT=25, so sizeof(pgd_t)<<(32-PGDIR_SHIFT) < 16KB)

 ACK, no need need in a second page when working with 16K pages.
Prodyut's approach addresses this too, but ...

>> Change this to:
>> #ifdef CONFIG_PTE_64BIT
>> #if (PAGE_SHIFT == 12)
>>   

> I think #ifdef CONFIG_PTE_64BIT is a little bit confusing here...  
> Actually PGDIR_ORDER  should be something like max(32 + 2 - PGDIR_SHIFT
> - PAGE_SHIFT, 0)

>> /* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */
>> #define PGDIR_ORDER     1
>> #else
>> #define PGDIR_ORDER     0
>> #endif
>> #else
>> #define PGDIR_ORDER     0
>> #endif
>>   

> Yuri, any comments?

 ... as for me, I like your approach more.

 Regards, Yuri

 --
 Yuri Tikhonov, Senior Software Engineer
 Emcraft Systems, www.emcraft.com
Josh Boyer - Sept. 12, 2008, 12:47 a.m.
On Fri, 12 Sep 2008 03:38:39 +0400
Ilya Yanok <yanok@emcraft.com> wrote:

> Hello Josh,
> 
> Josh Boyer wrote:
> > Ok, but not everyone does.  And I think setting the page size to this
> > should be harder, maybe even dependent upon CONFIG_BROKEN.
> >   
> 
> Well, we are violating ELF standard here... CONFIG_BROKEN seems to be 
> adequate for me.

Right, that was my thinking as well.  You can add an explanation for
that in the help text.

josh
David Gibson - Sept. 12, 2008, 3:48 a.m.
On Thu, Sep 11, 2008 at 01:53:06AM +0400, Ilya Yanok wrote:
> This patch adds support for page sizes bigger than 4KB (16KB/64KB/256KB) on
> PPC 44x.

[snip]
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 587da5e..ca93157 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -413,6 +413,29 @@ config PPC_64K_PAGES
>  	  while on hardware with such support, it will be used to map
>  	  normal application pages.
>  
> +choice
> +	prompt "Page size"
> +	depends on 44x && PPC32
> +	default PPC32_4K_PAGES
> +	help
> +	  The PAGE_SIZE definition. Increasing the page size may
> +	  improve the system performance in some dedicated cases.
> +	  If unsure, set it to 4 KB.
> +
> +config PPC32_4K_PAGES
> +	bool "4k page size"
> +
> +config PPC32_16K_PAGES
> +	bool "16k page size"
> +
> +config PPC32_64K_PAGES
> +	bool "64k page size"
> +
> +config PPC32_256K_PAGES
> +	bool "256k page size"
> +

I don't see any reason to have a separate set of config options for 32
and 64-bit.  Just make the once choice, but only have the individual
pagesize options enabled on machines that support them.

[snip]
> index e088545..1de90b4 100644
> --- a/arch/powerpc/include/asm/page.h
> +++ b/arch/powerpc/include/asm/page.h
> @@ -15,12 +15,17 @@
>  #include <asm/types.h>
>  
>  /*
> - * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software
> + * On regular PPC32 page size is 4K (but we support 4K/16K/64K/256K pages
> + * on PPC44x). For PPC64 we support either 4K or 64K software
>   * page size. When using 64K pages however, whether we are really supporting
>   * 64K pages in HW or not is irrelevant to those definitions.
>   */
> -#ifdef CONFIG_PPC_64K_PAGES
> +#if defined(CONFIG_PPC32_256K_PAGES)
> +#define PAGE_SHIFT		18
> +#elif defined(CONFIG_PPC32_64K_PAGES) || defined(CONFIG_PPC_64K_PAGES)
>  #define PAGE_SHIFT		16
> +#elif defined(CONFIG_PPC32_16K_PAGES)
> +#define PAGE_SHIFT		14
>  #else
>  #define PAGE_SHIFT		12
>  #endif
> @@ -140,11 +145,19 @@ typedef struct { pte_basic_t pte; } pte_t;
>  /* 64k pages additionally define a bigger "real PTE" type that gathers
>   * the "second half" part of the PTE for pseudo 64k pages
>   */
> +#ifdef CONFIG_PPC64
>  #ifdef CONFIG_PPC_64K_PAGES
>  typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
>  #else
>  typedef struct { pte_t pte; } real_pte_t;
>  #endif
> +#else
> +#ifdef CONFIG_PPC32_4K_PAGES
> +typedef struct { pte_t pte; } real_pte_t;
> +#else
> +typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;

I don't think you should need a real_pte_t type for the 32-bit
implementation.  It's just there because of how we implement
64k granularity page allocation on hardware that only does 4k
translations.

[snip]
> diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
> index ebfae53..d176270 100644
> --- a/arch/powerpc/include/asm/page_32.h
> +++ b/arch/powerpc/include/asm/page_32.h
> @@ -20,7 +20,11 @@
>   */
>  #ifdef CONFIG_PTE_64BIT
>  typedef unsigned long long pte_basic_t;
> +#ifdef CONFIG_PPC32_256K_PAGES
> +#define PTE_SHIFT	(PAGE_SHIFT - 7)

This doesn't look right.  You should be eliding one of the levels of
page table if you don't need it, rather than leaving the bottom level
PTE page largely empty.

[snip]
> +#if (PAGE_SHIFT == 12)
> +/*
> + * PAGE_SIZE  4K
> + * PAGE_SHIFT 12
> + * PTE_SHIFT   9
> + * PMD_SHIFT  21
> + */
> +#define PPC44x_TLBE_SIZE	PPC44x_TLB_4K
> +#define PPC44x_PGD_OFF_SH	13 /*(32 - PMD_SHIFT + 2)*/
> +#define PPC44x_PGD_OFF_M1	19 /*(PMD_SHIFT - 2)*/
> +#define PPC44x_PTE_ADD_SH	23 /*32 - PMD_SHIFT + PTE_SHIFT + 3*/
> +#define PPC44x_PTE_ADD_M1	20 /*32 - 3 - PTE_SHIFT*/
> +#define PPC44x_RPN_M2		19 /*31 - PAGE_SHIFT*/

Uh.. you have the formulae for these things right there in the
comments, so why aren't you using those and avoiding this nasty
multiway ifdef...

[snip]
> diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
> index 9665a26..4e7cd1f 100644
> --- a/arch/powerpc/include/asm/thread_info.h
> +++ b/arch/powerpc/include/asm/thread_info.h
> @@ -15,8 +15,12 @@
>  #ifdef CONFIG_PPC64
>  #define THREAD_SHIFT		14
>  #else
> +#if defined(CONFIG_PPC32_256K_PAGES)
> +#define THREAD_SHIFT		15

Hrm.. more peculiar special cases for 256K pages.  I think it might be
clearer if you split the patch into one which supports page sizes up
to 64k, then another that does the extra hacks for 256k pages.

[snip]
> @@ -391,12 +392,14 @@ interrupt_base:
>  	rlwimi	r13,r12,10,30,30
>  
>  	/* Load the PTE */
> -	rlwinm 	r12, r10, 13, 19, 29	/* Compute pgdir/pmd offset */
> +	/* Compute pgdir/pmd offset */
> +	rlwinm  r12, r10, PPC44x_PGD_OFF_SH, PPC44x_PGD_OFF_M1, 29

I agree with others that these constants need better names.  Or even
derive the values from PMD_SHIFT or whatnot right here inline, rather
than defining special constants.

[snip]
> diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
> index fce2df9..4f802df 100644
> --- a/arch/powerpc/kernel/head_booke.h
> +++ b/arch/powerpc/kernel/head_booke.h
> @@ -20,7 +20,9 @@
>  	beq	1f;							     \
>  	mfspr	r1,SPRN_SPRG3;		/* if from user, start at top of   */\
>  	lwz	r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack   */\
> -	addi	r1,r1,THREAD_SIZE;					     \
> +	lis	r11,THREAD_SIZE@h;					     \
> +	ori	r11,r11,THREAD_SIZE@l;					     \
> +	add	r1,r1,r11;
> \

It would be nice if we could avoid the extra instruction here when the
page sizes isn't big enough to require it.

>  1:	subi	r1,r1,INT_FRAME_SIZE;	/* Allocate an exception frame     */\
>  	mr	r11,r1;							     \
>  	stw	r10,_CCR(r11);          /* save various registers	   */\
> @@ -112,7 +114,8 @@
>  	andi.	r10,r10,MSR_PR;						     \
>  	mfspr	r11,SPRN_SPRG3;		/* if from user, start at top of   */\
>  	lwz	r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
> -	addi	r11,r11,EXC_LVL_FRAME_OVERHEAD;	/* allocate stack frame    */\
> +	addis	r11,r11,EXC_LVL_FRAME_OVERHEAD@ha; /* allocate stack frame */\
> +	addi	r11,r11,EXC_LVL_FRAME_OVERHEAD@l;  /* allocate stack frame */\

And here.
Benjamin Herrenschmidt - Sept. 13, 2008, 5:46 p.m.
> [snip]
> > diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
> > index fce2df9..4f802df 100644
> > --- a/arch/powerpc/kernel/head_booke.h
> > +++ b/arch/powerpc/kernel/head_booke.h
> > @@ -20,7 +20,9 @@
> >  	beq	1f;							     \
> >  	mfspr	r1,SPRN_SPRG3;		/* if from user, start at top of   */\
> >  	lwz	r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack   */\
> > -	addi	r1,r1,THREAD_SIZE;					     \
> > +	lis	r11,THREAD_SIZE@h;					     \
> > +	ori	r11,r11,THREAD_SIZE@l;					     \
> > +	add	r1,r1,r11;
> > \
> 
> It would be nice if we could avoid the extra instruction here when the
> page sizes isn't big enough to require it.

As a matter of fact, I don't see why THREAD_SIZE should ever need that,
there is no reason to change the kernel stack size.

> >  1:	subi	r1,r1,INT_FRAME_SIZE;	/* Allocate an exception frame     */\
> >  	mr	r11,r1;							     \
> >  	stw	r10,_CCR(r11);          /* save various registers	   */\
> > @@ -112,7 +114,8 @@
> >  	andi.	r10,r10,MSR_PR;						     \
> >  	mfspr	r11,SPRN_SPRG3;		/* if from user, start at top of   */\
> >  	lwz	r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
> > -	addi	r11,r11,EXC_LVL_FRAME_OVERHEAD;	/* allocate stack frame    */\
> > +	addis	r11,r11,EXC_LVL_FRAME_OVERHEAD@ha; /* allocate stack frame */\
> > +	addi	r11,r11,EXC_LVL_FRAME_OVERHEAD@l;  /* allocate stack frame */\
> 
> And here.

Same comment. That size doesn't need to change and can remain safely
below 32k

Ben.
Benjamin Herrenschmidt - Sept. 13, 2008, 5:49 p.m.
On Thu, 2008-09-11 at 09:57 -0700, prodyut hazarika wrote:

> You should mention an example of dedicated cases (eg. RAID).
> I think this help should mention that for page size 256KB, you will
> need to have a special version of binutils, since the ELF standard
> mentions page sizes only upto 64KB.

He should also put a BIG FAT warning with 256K pages. Those will -not-
work with most existing userspace. The maximum page size supported by
the current userspace ABI is 64K as this is the alignment requirement of
our toolchain for the various program sections.

(Note that uClibc last I looked was also still broken with >4K pages but
that shouldn't be a big deal to fix).

Ben.
Josh Boyer - Sept. 13, 2008, 11:37 p.m.
On Sat, 13 Sep 2008 10:49:13 -0700
Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:

> On Thu, 2008-09-11 at 09:57 -0700, prodyut hazarika wrote:
> 
> > You should mention an example of dedicated cases (eg. RAID).
> > I think this help should mention that for page size 256KB, you will
> > need to have a special version of binutils, since the ELF standard
> > mentions page sizes only upto 64KB.
> 
> He should also put a BIG FAT warning with 256K pages. Those will -not-
> work with most existing userspace. The maximum page size supported by
> the current userspace ABI is 64K as this is the alignment requirement of
> our toolchain for the various program sections.

I think that's been noted like 5 times now :)

josh
Ilya Yanok - Sept. 26, 2008, 11:35 p.m.
Hello David,

David Gibson wrote:
> I don't see any reason to have a separate set of config options for 32
> and 64-bit.  Just make the once choice, but only have the individual
> pagesize options enabled on machines that support them.
>   

Well. I can see some. First, on PPC64 kernel emulates 64K pages on 
hardware that can't do it and we are not going to do such an emulation 
on PPC32 now. Then CONFIG_PPC_64K_PAGES selects PPC_HAS_HASH_64K and our 
code has nothing to do with it. And last but not least, we don't use 
PPC64 kernels for now so we just tried not to break something we can't 
test. But if everybody thinks that having a single option is a good idea 
I'll do it that way.

> I don't think you should need a real_pte_t type for the 32-bit
> implementation.  It's just there because of how we implement
> 64k granularity page allocation on hardware that only does 4k
> translations.
>   

You are right. Thanks.

>> diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
>> index ebfae53..d176270 100644
>> --- a/arch/powerpc/include/asm/page_32.h
>> +++ b/arch/powerpc/include/asm/page_32.h
>> @@ -20,7 +20,11 @@
>>   */
>>  #ifdef CONFIG_PTE_64BIT
>>  typedef unsigned long long pte_basic_t;
>> +#ifdef CONFIG_PPC32_256K_PAGES
>> +#define PTE_SHIFT	(PAGE_SHIFT - 7)
>>     
>
> This doesn't look right.  You should be eliding one of the levels of
> page table if you don't need it, rather than leaving the bottom level
> PTE page largely empty.
>   

Hm... We have only two levels really so if we elide one there will be 
only one left. Don't sure if kernel can work with this...

>> +#if (PAGE_SHIFT == 12)
>> +/*
>> + * PAGE_SIZE  4K
>> + * PAGE_SHIFT 12
>> + * PTE_SHIFT   9
>> + * PMD_SHIFT  21
>> + */
>> +#define PPC44x_TLBE_SIZE	PPC44x_TLB_4K
>> +#define PPC44x_PGD_OFF_SH	13 /*(32 - PMD_SHIFT + 2)*/
>> +#define PPC44x_PGD_OFF_M1	19 /*(PMD_SHIFT - 2)*/
>> +#define PPC44x_PTE_ADD_SH	23 /*32 - PMD_SHIFT + PTE_SHIFT + 3*/
>> +#define PPC44x_PTE_ADD_M1	20 /*32 - 3 - PTE_SHIFT*/
>> +#define PPC44x_RPN_M2		19 /*31 - PAGE_SHIFT*/
>>     
>
> Uh.. you have the formulae for these things right there in the
> comments, so why aren't you using those and avoiding this nasty
> multiway ifdef...
>   

We need to get PMD_SHIFT and friends out of #ifndef __ASSEMBLY__ for 
that. And some of them are under include/asm-generic so patch becomes 
not powerpc-specific...

>> diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
>> index 9665a26..4e7cd1f 100644
>> --- a/arch/powerpc/include/asm/thread_info.h
>> +++ b/arch/powerpc/include/asm/thread_info.h
>> @@ -15,8 +15,12 @@
>>  #ifdef CONFIG_PPC64
>>  #define THREAD_SHIFT		14
>>  #else
>> +#if defined(CONFIG_PPC32_256K_PAGES)
>> +#define THREAD_SHIFT		15
>>     
>
> Hrm.. more peculiar special cases for 256K pages.  I think it might be
> clearer if you split the patch into one which supports page sizes up
> to 64k, then another that does the extra hacks for 256k pages.
>   

Agreed.

>> diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
>> index fce2df9..4f802df 100644
>> --- a/arch/powerpc/kernel/head_booke.h
>> +++ b/arch/powerpc/kernel/head_booke.h
>> @@ -20,7 +20,9 @@
>>  	beq	1f;							     \
>>  	mfspr	r1,SPRN_SPRG3;		/* if from user, start at top of   */\
>>  	lwz	r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack   */\
>> -	addi	r1,r1,THREAD_SIZE;					     \
>> +	lis	r11,THREAD_SIZE@h;					     \
>> +	ori	r11,r11,THREAD_SIZE@l;					     \
>> +	add	r1,r1,r11;
>> \
>>     
>
> It would be nice if we could avoid the extra instruction here when the
> page sizes isn't big enough to require it.
>   

Ok. This is going to go to 256K-dirty-hacks.patch anyway.

Regards, Ilya.
Ilya Yanok - Sept. 26, 2008, 11:43 p.m.
Hello Benjamin,

Benjamin Herrenschmidt wrote:
>> [snip]
>>     
>>> diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
>>> index fce2df9..4f802df 100644
>>> --- a/arch/powerpc/kernel/head_booke.h
>>> +++ b/arch/powerpc/kernel/head_booke.h
>>> @@ -20,7 +20,9 @@
>>>  	beq	1f;							     \
>>>  	mfspr	r1,SPRN_SPRG3;		/* if from user, start at top of   */\
>>>  	lwz	r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack   */\
>>> -	addi	r1,r1,THREAD_SIZE;					     \
>>> +	lis	r11,THREAD_SIZE@h;					     \
>>> +	ori	r11,r11,THREAD_SIZE@l;					     \
>>> +	add	r1,r1,r11;
>>> \
>>>       
>> It would be nice if we could avoid the extra instruction here when the
>> page sizes isn't big enough to require it.
>>     
>
> As a matter of fact, I don't see why THREAD_SIZE should ever need that,
> there is no reason to change the kernel stack size.
>   

Well, this was introduced because of that:

kernel/fork.c [179]:

 max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);

With THREAD_SIZE=8K we will get mempages divided by zero if PAGE_SIZE is 
bigger than 64K. (Well, another reason not to use >64K pages). Not sure 
this is the right way to fix it.

Regards, Ilya.
David Gibson - Sept. 29, 2008, 2:58 a.m.
On Sat, Sep 27, 2008 at 03:35:27AM +0400, Ilya Yanok wrote:
> Hello David,
>
> David Gibson wrote:
>> I don't see any reason to have a separate set of config options for 32
>> and 64-bit.  Just make the once choice, but only have the individual
>> pagesize options enabled on machines that support them.
>
> Well. I can see some. First, on PPC64 kernel emulates 64K pages on  
> hardware that can't do it and we are not going to do such an emulation  
> on PPC32 now.

So?

> Then CONFIG_PPC_64K_PAGES selects PPC_HAS_HASH_64K and our  
> code has nothing to do with it. 

Well, obviously the generic 64K option wouldn't select
PPC_HAS_HASH_64K.  That would be dependent on both 64K_PAGES and
PPC64.

> And last but not least, we don't use  
> PPC64 kernels for now so we just tried not to break something we can't  
> test. But if everybody thinks that having a single option is a good idea  
> I'll do it that way.

Hrm, well that has something to be said for it.  But it's not hard to
at least build a ppc64 kernel to test if you've broken that.

>> I don't think you should need a real_pte_t type for the 32-bit
>> implementation.  It's just there because of how we implement
>> 64k granularity page allocation on hardware that only does 4k
>> translations.
>
> You are right. Thanks.
>
>>> diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
>>> index ebfae53..d176270 100644
>>> --- a/arch/powerpc/include/asm/page_32.h
>>> +++ b/arch/powerpc/include/asm/page_32.h
>>> @@ -20,7 +20,11 @@
>>>   */
>>>  #ifdef CONFIG_PTE_64BIT
>>>  typedef unsigned long long pte_basic_t;
>>> +#ifdef CONFIG_PPC32_256K_PAGES
>>> +#define PTE_SHIFT	(PAGE_SHIFT - 7)
>>>     
>>
>> This doesn't look right.  You should be eliding one of the levels of
>> page table if you don't need it, rather than leaving the bottom level
>> PTE page largely empty.
>
> Hm... We have only two levels really so if we elide one there will be  
> only one left. Don't sure if kernel can work with this...

Ah.. that's a point.  But again this is a 256K specific hack, so we
can worry about it later.

>>> +#if (PAGE_SHIFT == 12)
>>> +/*
>>> + * PAGE_SIZE  4K
>>> + * PAGE_SHIFT 12
>>> + * PTE_SHIFT   9
>>> + * PMD_SHIFT  21
>>> + */
>>> +#define PPC44x_TLBE_SIZE	PPC44x_TLB_4K
>>> +#define PPC44x_PGD_OFF_SH	13 /*(32 - PMD_SHIFT + 2)*/
>>> +#define PPC44x_PGD_OFF_M1	19 /*(PMD_SHIFT - 2)*/
>>> +#define PPC44x_PTE_ADD_SH	23 /*32 - PMD_SHIFT + PTE_SHIFT + 3*/
>>> +#define PPC44x_PTE_ADD_M1	20 /*32 - 3 - PTE_SHIFT*/
>>> +#define PPC44x_RPN_M2		19 /*31 - PAGE_SHIFT*/
>>>     
>>
>> Uh.. you have the formulae for these things right there in the
>> comments, so why aren't you using those and avoiding this nasty
>> multiway ifdef...
>
> We need to get PMD_SHIFT and friends out of #ifndef __ASSEMBLY__ for  
> that. And some of them are under include/asm-generic so patch becomes  
> not powerpc-specific...

So use arch/powerpc/kernel/asm-offsets.c, that's what it's for.

Patch

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 587da5e..ca93157 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -413,6 +413,29 @@  config PPC_64K_PAGES
 	  while on hardware with such support, it will be used to map
 	  normal application pages.
 
+choice
+	prompt "Page size"
+	depends on 44x && PPC32
+	default PPC32_4K_PAGES
+	help
+	  The PAGE_SIZE definition. Increasing the page size may
+	  improve the system performance in some dedicated cases.
+	  If unsure, set it to 4 KB.
+
+config PPC32_4K_PAGES
+	bool "4k page size"
+
+config PPC32_16K_PAGES
+	bool "16k page size"
+
+config PPC32_64K_PAGES
+	bool "64k page size"
+
+config PPC32_256K_PAGES
+	bool "256k page size"
+
+endchoice
+
 config FORCE_MAX_ZONEORDER
 	int "Maximum zone order"
 	default "9" if PPC_64K_PAGES
diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h
index 5d99b64..1aec96d 100644
--- a/arch/powerpc/include/asm/highmem.h
+++ b/arch/powerpc/include/asm/highmem.h
@@ -38,9 +38,15 @@  extern pte_t *pkmap_page_table;
  * easily, subsequent pte tables have to be allocated in one physical
  * chunk of RAM.
  */
+#if defined(CONFIG_PPC32_64K_PAGES) || defined(CONFIG_PPC32_256K_PAGES)
+#define PKMAP_ORDER	(27 - PAGE_SHIFT)
+#define LAST_PKMAP	(1 << PKMAP_ORDER)
+#define PKMAP_BASE	(FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1))
+#else
 #define LAST_PKMAP 	(1 << PTE_SHIFT)
-#define LAST_PKMAP_MASK (LAST_PKMAP-1)
 #define PKMAP_BASE	((FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) & PMD_MASK)
+#endif
+#define LAST_PKMAP_MASK	(LAST_PKMAP-1)
 #define PKMAP_NR(virt)  ((virt-PKMAP_BASE) >> PAGE_SHIFT)
 #define PKMAP_ADDR(nr)  (PKMAP_BASE + ((nr) << PAGE_SHIFT))
 
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index e088545..1de90b4 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -15,12 +15,17 @@ 
 #include <asm/types.h>
 
 /*
- * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software
+ * On regular PPC32 page size is 4K (but we support 4K/16K/64K/256K pages
+ * on PPC44x). For PPC64 we support either 4K or 64K software
  * page size. When using 64K pages however, whether we are really supporting
  * 64K pages in HW or not is irrelevant to those definitions.
  */
-#ifdef CONFIG_PPC_64K_PAGES
+#if defined(CONFIG_PPC32_256K_PAGES)
+#define PAGE_SHIFT		18
+#elif defined(CONFIG_PPC32_64K_PAGES) || defined(CONFIG_PPC_64K_PAGES)
 #define PAGE_SHIFT		16
+#elif defined(CONFIG_PPC32_16K_PAGES)
+#define PAGE_SHIFT		14
 #else
 #define PAGE_SHIFT		12
 #endif
@@ -140,11 +145,19 @@  typedef struct { pte_basic_t pte; } pte_t;
 /* 64k pages additionally define a bigger "real PTE" type that gathers
  * the "second half" part of the PTE for pseudo 64k pages
  */
+#ifdef CONFIG_PPC64
 #ifdef CONFIG_PPC_64K_PAGES
 typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
 #else
 typedef struct { pte_t pte; } real_pte_t;
 #endif
+#else
+#ifdef CONFIG_PPC32_4K_PAGES
+typedef struct { pte_t pte; } real_pte_t;
+#else
+typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
+#endif
+#endif /* !CONFIG_PPC64 */
 
 /* PMD level */
 #ifdef CONFIG_PPC64
@@ -180,12 +193,19 @@  typedef pte_basic_t pte_t;
 #define pte_val(x)	(x)
 #define __pte(x)	(x)
 
+#ifdef CONFIG_PPC64
 #ifdef CONFIG_PPC_64K_PAGES
 typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
 #else
 typedef unsigned long real_pte_t;
 #endif
-
+#else
+#ifdef CONFIG_PPC32_4K_PAGES
+typedef unsigned long real_pte_t;
+#else
+typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
+#endif
+#endif /* !PPC64 */
 
 #ifdef CONFIG_PPC64
 typedef unsigned long pmd_t;
diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
index ebfae53..d176270 100644
--- a/arch/powerpc/include/asm/page_32.h
+++ b/arch/powerpc/include/asm/page_32.h
@@ -20,7 +20,11 @@ 
  */
 #ifdef CONFIG_PTE_64BIT
 typedef unsigned long long pte_basic_t;
+#ifdef CONFIG_PPC32_256K_PAGES
+#define PTE_SHIFT	(PAGE_SHIFT - 7)
+#else
 #define PTE_SHIFT	(PAGE_SHIFT - 3)	/* 512 ptes per page */
+#endif
 #else
 typedef unsigned long pte_basic_t;
 #define PTE_SHIFT	(PAGE_SHIFT - 2)	/* 1024 ptes per page */
diff --git a/arch/powerpc/include/asm/ppc_page_asm.h b/arch/powerpc/include/asm/ppc_page_asm.h
new file mode 100644
index 0000000..e1250fa
--- /dev/null
+++ b/arch/powerpc/include/asm/ppc_page_asm.h
@@ -0,0 +1,75 @@ 
+/*
+ * arch/powerpc/include/asm/ppc_page_asm.h
+ *
+ * 2007 (C) DENX Software Engineering.
+ *
+ *  This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of
+ * any kind, whether express or implied.
+ *
+ *  The page definitions used in the asm files ppc_44x.S and misc.S.
+ * PAGE_SIZE = 4K and 64K are only supported on the PPC44x.
+ *
+ */
+#ifndef PPC_PAGE_ASM_H
+#define PPC_PAGE_ASM_H
+
+#include <asm/page.h>
+
+#if (PAGE_SHIFT == 12)
+/*
+ * PAGE_SIZE  4K
+ * PAGE_SHIFT 12
+ * PTE_SHIFT   9
+ * PMD_SHIFT  21
+ */
+#define PPC44x_TLBE_SIZE	PPC44x_TLB_4K
+#define PPC44x_PGD_OFF_SH	13 /*(32 - PMD_SHIFT + 2)*/
+#define PPC44x_PGD_OFF_M1	19 /*(PMD_SHIFT - 2)*/
+#define PPC44x_PTE_ADD_SH	23 /*32 - PMD_SHIFT + PTE_SHIFT + 3*/
+#define PPC44x_PTE_ADD_M1	20 /*32 - 3 - PTE_SHIFT*/
+#define PPC44x_RPN_M2		19 /*31 - PAGE_SHIFT*/
+#elif (PAGE_SHIFT == 14)
+/*
+ * PAGE_SIZE  16K
+ * PAGE_SHIFT 14
+ * PTE_SHIFT  11
+ * PMD_SHIFT  25
+ */
+#define PPC44x_TLBE_SIZE	PPC44x_TLB_16K
+#define PPC44x_PGD_OFF_SH	9  /*(32 - PMD_SHIFT + 2)*/
+#define PPC44x_PGD_OFF_M1	23 /*(PMD_SHIFT - 2)*/
+#define PPC44x_PTE_ADD_SH	21 /*32 - PMD_SHIFT + PTE_SHIFT + 3*/
+#define PPC44x_PTE_ADD_M1	18 /*32 - 3 - PTE_SHIFT*/
+#define PPC44x_RPN_M2		17 /*31 - PAGE_SHIFT*/
+#elif (PAGE_SHIFT == 16)
+/*
+ * PAGE_SIZE  64K
+ * PAGE_SHIFT 16
+ * PTE_SHIFT  13
+ * PMD_SHIFT  29
+ */
+#define PPC44x_TLBE_SIZE	PPC44x_TLB_64K
+#define PPC44x_PGD_OFF_SH	5  /*(32 - PMD_SHIFT + 2)*/
+#define PPC44x_PGD_OFF_M1	27 /*(PMD_SHIFT - 2)*/
+#define PPC44x_PTE_ADD_SH	19 /*32 - PMD_SHIFT + PTE_SHIFT + 3*/
+#define PPC44x_PTE_ADD_M1	16 /*32 - 3 - PTE_SHIFT*/
+#define PPC44x_RPN_M2		15 /*31 - PAGE_SHIFT*/
+#elif (PAGE_SHIFT == 18)
+/*
+ * PAGE_SIZE  256K
+ * PAGE_SHIFT 18
+ * PTE_SHIFT  11
+ * PMD_SHIFT  29
+ */
+#define PPC44x_TLBE_SIZE	PPC44x_TLB_256K
+#define PPC44x_PGD_OFF_SH	5  /*(32 - PMD_SHIFT + 2)*/
+#define PPC44x_PGD_OFF_M1	27 /*(PMD_SHIFT - 2)*/
+#define PPC44x_PTE_ADD_SH	17 /*32 - PMD_SHIFT + PTE_SHIFT + 3*/
+#define PPC44x_PTE_ADD_M1	18 /*32 - 3 - PTE_SHIFT*/
+#define PPC44x_RPN_M2		13 /*31 - PAGE_SHIFT*/
+#else
+#error "Unsupported PAGE_SIZE"
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 9665a26..4e7cd1f 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -15,8 +15,12 @@ 
 #ifdef CONFIG_PPC64
 #define THREAD_SHIFT		14
 #else
+#if defined(CONFIG_PPC32_256K_PAGES)
+#define THREAD_SHIFT		15
+#else
 #define THREAD_SHIFT		13
 #endif
+#endif
 
 #define THREAD_SIZE		(1 << THREAD_SHIFT)
 
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index f3a1ea9..c0a99a4 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -36,6 +36,7 @@ 
 #include <asm/thread_info.h>
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
+#include <asm/ppc_page_asm.h>
 #include "head_booke.h"
 
 
@@ -391,12 +392,14 @@  interrupt_base:
 	rlwimi	r13,r12,10,30,30
 
 	/* Load the PTE */
-	rlwinm 	r12, r10, 13, 19, 29	/* Compute pgdir/pmd offset */
+	/* Compute pgdir/pmd offset */
+	rlwinm  r12, r10, PPC44x_PGD_OFF_SH, PPC44x_PGD_OFF_M1, 29
 	lwzx	r11, r12, r11		/* Get pgd/pmd entry */
 	rlwinm.	r12, r11, 0, 0, 20	/* Extract pt base address */
 	beq	2f			/* Bail if no table */
 
-	rlwimi	r12, r10, 23, 20, 28	/* Compute pte address */
+	/* Compute pte address */
+	rlwimi  r12, r10, PPC44x_PTE_ADD_SH, PPC44x_PTE_ADD_M1, 28
 	lwz	r11, 0(r12)		/* Get high word of pte entry */
 	lwz	r12, 4(r12)		/* Get low word of pte entry */
 
@@ -485,12 +488,14 @@  tlb_44x_patch_hwater_D:
 	/* Make up the required permissions */
 	li	r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_HWEXEC
 
-	rlwinm	r12, r10, 13, 19, 29	/* Compute pgdir/pmd offset */
+	/* Compute pgdir/pmd offset */
+	rlwinm 	r12, r10, PPC44x_PGD_OFF_SH, PPC44x_PGD_OFF_M1, 29
 	lwzx	r11, r12, r11		/* Get pgd/pmd entry */
 	rlwinm.	r12, r11, 0, 0, 20	/* Extract pt base address */
 	beq	2f			/* Bail if no table */
 
-	rlwimi	r12, r10, 23, 20, 28	/* Compute pte address */
+	/* Compute pte address */
+	rlwimi	r12, r10, PPC44x_PTE_ADD_SH, PPC44x_PTE_ADD_M1, 28
 	lwz	r11, 0(r12)		/* Get high word of pte entry */
 	lwz	r12, 4(r12)		/* Get low word of pte entry */
 
@@ -554,14 +559,14 @@  tlb_44x_patch_hwater_I:
  */
 finish_tlb_load:
 	/* Combine RPN & ERPN an write WS 0 */
-	rlwimi	r11,r12,0,0,19
+	rlwimi	r11,r12,0,0,PPC44x_RPN_M2
 	tlbwe	r11,r13,PPC44x_TLB_XLAT
 
 	/*
 	 * Create WS1. This is the faulting address (EPN),
 	 * page size, and valid flag.
 	 */
-	li	r11,PPC44x_TLB_VALID | PPC44x_TLB_4K
+	li	r11,PPC44x_TLB_VALID | PPC44x_TLBE_SIZE
 	rlwimi	r10,r11,0,20,31			/* Insert valid and page size*/
 	tlbwe	r10,r13,PPC44x_TLB_PAGEID	/* Write PAGEID */
 
@@ -634,12 +639,12 @@  _GLOBAL(set_context)
  * goes at the beginning of the data segment, which is page-aligned.
  */
 	.data
-	.align	12
+	.align	PAGE_SHIFT
 	.globl	sdata
 sdata:
 	.globl	empty_zero_page
 empty_zero_page:
-	.space	4096
+	.space	PAGE_SIZE
 
 /*
  * To support >32-bit physical addresses, we use an 8KB pgdir.
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index fce2df9..4f802df 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -20,7 +20,9 @@ 
 	beq	1f;							     \
 	mfspr	r1,SPRN_SPRG3;		/* if from user, start at top of   */\
 	lwz	r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack   */\
-	addi	r1,r1,THREAD_SIZE;					     \
+	lis	r11,THREAD_SIZE@h;					     \
+	ori	r11,r11,THREAD_SIZE@l;					     \
+	add	r1,r1,r11;						     \
 1:	subi	r1,r1,INT_FRAME_SIZE;	/* Allocate an exception frame     */\
 	mr	r11,r1;							     \
 	stw	r10,_CCR(r11);          /* save various registers	   */\
@@ -112,7 +114,8 @@ 
 	andi.	r10,r10,MSR_PR;						     \
 	mfspr	r11,SPRN_SPRG3;		/* if from user, start at top of   */\
 	lwz	r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
-	addi	r11,r11,EXC_LVL_FRAME_OVERHEAD;	/* allocate stack frame    */\
+	addis	r11,r11,EXC_LVL_FRAME_OVERHEAD@ha; /* allocate stack frame */\
+	addi	r11,r11,EXC_LVL_FRAME_OVERHEAD@l;  /* allocate stack frame */\
 	beq	1f;							     \
 	/* COMING FROM USER MODE */					     \
 	stw	r9,_CCR(r11);		/* save CR			   */\
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 7a6dfbc..97463ba 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -29,6 +29,7 @@ 
 #include <asm/asm-offsets.h>
 #include <asm/processor.h>
 #include <asm/kexec.h>
+#include <asm/ppc_page_asm.h>
 
 	.text
 
@@ -589,8 +590,8 @@  _GLOBAL(__flush_dcache_icache)
 BEGIN_FTR_SECTION
 	blr
 END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
-	rlwinm	r3,r3,0,0,19			/* Get page base address */
-	li	r4,4096/L1_CACHE_BYTES	/* Number of lines in a page */
+	rlwinm	r3,r3,0,0,PPC44x_RPN_M2		/* Get page base address */
+	li	r4,PAGE_SIZE/L1_CACHE_BYTES	/* Number of lines in a page */
 	mtctr	r4
 	mr	r6,r3
 0:	dcbst	0,r3				/* Write line to ram */
@@ -630,8 +631,8 @@  END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
 	rlwinm	r0,r10,0,28,26			/* clear DR */
 	mtmsr	r0
 	isync
-	rlwinm	r3,r3,0,0,19			/* Get page base address */
-	li	r4,4096/L1_CACHE_BYTES	/* Number of lines in a page */
+	rlwinm	r3,r3,0,0,PPC44x_RPN_M2		/* Get page base address */
+	li	r4,PAGE_SIZE/L1_CACHE_BYTES	/* Number of lines in a page */
 	mtctr	r4
 	mr	r6,r3
 0:	dcbst	0,r3				/* Write line to ram */
@@ -655,7 +656,7 @@  END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
  * void clear_pages(void *page, int order) ;
  */
 _GLOBAL(clear_pages)
-	li	r0,4096/L1_CACHE_BYTES
+	li	r0,PAGE_SIZE/L1_CACHE_BYTES
 	slw	r0,r0,r4
 	mtctr	r0
 #ifdef CONFIG_8xx
@@ -713,7 +714,7 @@  _GLOBAL(copy_page)
 	dcbt	r5,r4
 	li	r11,L1_CACHE_BYTES+4
 #endif /* MAX_COPY_PREFETCH */
-	li	r0,4096/L1_CACHE_BYTES - MAX_COPY_PREFETCH
+	li	r0,PAGE_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH
 	crclr	4*cr0+eq
 2:
 	mtctr	r0
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 2001abd..efaf46a 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -400,7 +400,7 @@  void kernel_map_pages(struct page *page, int numpages, int enable)
 #endif /* CONFIG_DEBUG_PAGEALLOC */
 
 static int fixmaps;
-unsigned long FIXADDR_TOP = 0xfffff000;
+unsigned long FIXADDR_TOP = (-PAGE_SIZE);
 EXPORT_SYMBOL(FIXADDR_TOP);
 
 void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)