Patchwork [1/2] powerpc: enable the relocatable support for the fsl booke 32bit kernel

login
register
mail settings
Submitter Kevin Hao
Date June 27, 2013, 2 a.m.
Message ID <1372298434-20220-2-git-send-email-haokexin@gmail.com>
Download mbox | patch
Permalink /patch/254926/
State Superseded
Headers show

Comments

Kevin Hao - June 27, 2013, 2 a.m.
This is based on the codes in the head_44x.S. Since we always align to
256M before mapping the PAGE_OFFSET for a relocatable kernel, we also
change the init tlb map to 256M size.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
 arch/powerpc/Kconfig                          |  2 +-
 arch/powerpc/kernel/fsl_booke_entry_mapping.S |  8 ++-
 arch/powerpc/kernel/head_fsl_booke.S          | 92 +++++++++++++++++++++++++--
 3 files changed, 94 insertions(+), 8 deletions(-)
Kevin Hao - June 28, 2013, 1:36 a.m.
On Thu, Jun 27, 2013 at 02:58:34PM -0500, Scott Wood wrote:
> On 06/26/2013 09:00:33 PM, Kevin Hao wrote:
> >This is based on the codes in the head_44x.S. Since we always align to
> >256M before mapping the PAGE_OFFSET for a relocatable kernel, we also
> >change the init tlb map to 256M size.
> 
> Why 256M?

For two reasons:
  1. This is the size which both e500v1 and e500v2 support.
  2. Since we always use the PAGE_OFFSET as 0xc0000000, the 256M is
     max alignment value we can use for this virtual address.

> 
> This tightens the alignment requirement for dynamic memstart.

Yes. But since RELOCATABLE is a superset of DYNAMIC_MEMSTART, we can always
use RELOCATABLE instead of DYNAMIC_MEMSTART for fsl booke board in any cases.
So DYNAMIC_MEMSTART will seem not so useful after we enable this feature.

>  And
> what about boards with less than 256 MiB of RAM?

It should be fine. We just create the map in the tlb. The MM still use
the real size of memory.

> 
> >@@ -176,6 +176,8 @@ skpinv:	addi	r6,r6,1				/* Increment */
> > /* 7. Jump to KERNELBASE mapping */
> > 	lis	r6,(KERNELBASE & ~0xfff)@h
> > 	ori	r6,r6,(KERNELBASE & ~0xfff)@l
> >+	rlwinm	r7,r25,0,4,31
> >+	add	r6,r7,r6
> 
> Please consider using the more readable form of rlwinm/rlwimi:
> 
> 	rlwinm	r7,r25,0,0x0fffffff

Sure.

> 
> > #elif defined(ENTRY_MAPPING_KEXEC_SETUP)
> > /*
> >diff --git a/arch/powerpc/kernel/head_fsl_booke.S
> >b/arch/powerpc/kernel/head_fsl_booke.S
> >index d10a7ca..c3b4c8e53 100644
> >--- a/arch/powerpc/kernel/head_fsl_booke.S
> >+++ b/arch/powerpc/kernel/head_fsl_booke.S
> >@@ -83,10 +83,43 @@ _ENTRY(_start);
> > 	andc	r31,r20,r18		/* r31 = page base */
> > 	or	r31,r31,r19		/* r31 = devtree phys addr */
> > 	mfspr	r30,SPRN_MAS7
> >-
> >-	li	r25,0			/* phys kernel start (low) */
> > 	li	r24,0			/* CPU number */
> >-	li	r23,0			/* phys kernel start (high) */
> >+
> >+#ifdef CONFIG_RELOCATABLE
> >+	bl	0f				/* Get our runtime address */
> >+0:	mflr	r3				/* Make it accessible */
> >+	addis	r3,r3,(_stext - 0b)@ha
> >+	addi	r3,r3,(_stext - 0b)@l 	/* Get our current runtime base */
> >+
> >+	/* Translate _stext address to physical, save in r23/r25 */
> >+	tlbsx	0,r3			/* must succeed */
> >+
> >+	mfspr	r16,SPRN_MAS1
> >+	mfspr	r20,SPRN_MAS3
> >+	rlwinm	r17,r16,25,0x1f		/* r17 = log2(page size) */
> >+	li	r18,1024
> >+	slw	r18,r18,r17		/* r18 = page size */
> >+	addi	r18,r18,-1
> >+	and	r19,r3,r18		/* r19 = page offset */
> >+	andc	r25,r20,r18		/* r25 = page base */
> >+	or	r25,r25,r19		/* r25 = _stext phys addr */
> >+	mfspr	r23,SPRN_MAS7
> 
> This duplicates the code for finding the device tree physical
> address... maybe factor it out into a function?

Sure.

> 
> >@@ -197,7 +230,58 @@ _ENTRY(__early_start)
> >
> > 	bl	early_init
> >
> >-#ifdef CONFIG_DYNAMIC_MEMSTART
> >+#ifdef CONFIG_RELOCATABLE
> >+	/*
> >+	 * Relocatable kernel support based on processing of dynamic
> >+	 * relocation entries.
> >+	 *
> >+	 * r25/r23 will contain RPN/ERPN for the start address of memory
> 
> The start of memory or the start of the kernel?

Should be the start of the kernel. Will fix the comment.

> 
> >+	 */
> >+	lis	r3,kernstart_addr@ha
> >+	la	r3,kernstart_addr@l(r3)
> >+
> >+#ifdef CONFIG_PHYS_64BIT
> >+	stw	r23,0(r3)
> >+	stw	r25,4(r3)
> >+#else
> >+	stw	r25,0(r3)
> >+#endif
> 
> This part looks the same for relocatable and dynamic memstart -- can
> you avoid duplicating?

OK.

> 
> >+	/*
> >+	 * Compute the virt_phys_offset :
> >+	 * virt_phys_offset = stext.run - kernstart_addr
> >+	 *
> >+	 * stext.run = (KERNELBASE & ~0xfffffff) + (kernstart_addr &
> >0xfffffff)
> >+	 * When we relocate, we have :
> >+	 *
> >+	 *	(kernstart_addr & 0xfffffff) = (stext.run & 0xfffffff)
> >+	 *
> >+	 * hence:
> >+	 *  virt_phys_offset = (KERNELBASE & ~0xfffffff) -
> >+	 *                              (kernstart_addr & ~0xfffffff)
> >+	 *
> >+	 */
> >+
> >+	/* KERNELBASE&~0xfffffff => (r4,r5) */
> 
> >+	li	r4, 0		/* higer 32bit */
> >+	lis	r5,KERNELBASE@h
> 
> Please be consistent with whitespace.

Fixed.

Thanks,
Kevin

> 
> -Scott
Scott Wood - June 28, 2013, 1:52 a.m.
On 06/27/2013 08:36:37 PM, Kevin Hao wrote:
> On Thu, Jun 27, 2013 at 02:58:34PM -0500, Scott Wood wrote:
> > On 06/26/2013 09:00:33 PM, Kevin Hao wrote:
> > >This is based on the codes in the head_44x.S. Since we always  
> align to
> > >256M before mapping the PAGE_OFFSET for a relocatable kernel, we  
> also
> > >change the init tlb map to 256M size.
> >
> > Why 256M?
> 
> For two reasons:
>   1. This is the size which both e500v1 and e500v2 support.
>   2. Since we always use the PAGE_OFFSET as 0xc0000000, the 256M is
>      max alignment value we can use for this virtual address.

0xc0000000 is 1G-aligned, so I don't see why 256M is the maximum (after  
verifying that enough memory is present with the right alignment, of  
course).  The TLB1 savings would probably not be enough to justify  
figuring that out, though.

-Scott
Kevin Hao - June 30, 2013, 7:33 a.m.
On Thu, Jun 27, 2013 at 08:47:27PM -0500, Scott Wood wrote:
> On 06/27/2013 08:36:37 PM, Kevin Hao wrote:
> >On Thu, Jun 27, 2013 at 02:58:34PM -0500, Scott Wood wrote:
> >> On 06/26/2013 09:00:33 PM, Kevin Hao wrote:
> >> >This is based on the codes in the head_44x.S. Since we always
> >align to
> >> >256M before mapping the PAGE_OFFSET for a relocatable kernel,
> >we also
> >> >change the init tlb map to 256M size.
> >>
> >> Why 256M?
> >
> >For two reasons:
> >  1. This is the size which both e500v1 and e500v2 support.
> >  2. Since we always use the PAGE_OFFSET as 0xc0000000, the 256M is
> >     max alignment value we can use for this virtual address.
> 
> Is there any reason why 64M won't continue to work here?

Yes. In general we would map the 0 ~ 256M memory region in the first
tlb1 entry. If we align to 64M, the relocatable kernel would not work
if loaded above 64M memory. For example, if we load a relocatable kernel
at 64M memory, we will relocate it as:
	__pa(PAGE_OFFSET) = 0x4000000

But in map_mem_in_cams function, it will create a memory map as:
	__pa(PAGE_OFFSET) = 0x0

The kernel will definitely not work in this case.
	
> 
> >> This tightens the alignment requirement for dynamic memstart.
> >
> >Yes. But since RELOCATABLE is a superset of DYNAMIC_MEMSTART, we
> >can always
> >use RELOCATABLE instead of DYNAMIC_MEMSTART for fsl booke board in
> >any cases.
> 
> The extra flexibility of RELOCATABLE may help some use cases, but
> you'd still require the entire 256M naturally aligned region
> containing the kernel to be present and owned by this instance of
> Linux.
> 
> >So DYNAMIC_MEMSTART will seem not so useful after we enable this
> >feature.
> 
> Then why doesn't this patch remove it?

According to the Kconfig it is still used by 44x. And maybe someone
still want to use this relocation method.

> 
> >>  And
> >> what about boards with less than 256 MiB of RAM?
> >
> >It should be fine. We just create the map in the tlb. The MM still use
> >the real size of memory.
> 
> No, you must not map anything that is not present with a mapping
> that is executable and/or not guarded, or you could get speculative
> accesses to who-knows-what.

Yes, there may be speculative access in this case.

>  Even if RAM is present there but owned
> by some other entity, you could be creating illegal aliases if that
> other entity mapped it cache-inhibited or similar.

Fair enough. So it seems error prone if we map this 256M memory region
blindly. But if we don't do this, it seems that we have to do twice relocation.
The first time we just align to a predefined value (64M for example), and
then parse the device tree and get the real memstart_addr. After that we
should relocate the kernel to the real start address. It seems a little
complicated. Do you have any better ideas?

Thanks,
Kevin


> 
> -Scott
Kevin Hao - June 30, 2013, 7:34 a.m.
On Thu, Jun 27, 2013 at 08:52:20PM -0500, Scott Wood wrote:
> On 06/27/2013 08:36:37 PM, Kevin Hao wrote:
> >On Thu, Jun 27, 2013 at 02:58:34PM -0500, Scott Wood wrote:
> >> On 06/26/2013 09:00:33 PM, Kevin Hao wrote:
> >> >This is based on the codes in the head_44x.S. Since we always
> >align to
> >> >256M before mapping the PAGE_OFFSET for a relocatable kernel,
> >we also
> >> >change the init tlb map to 256M size.
> >>
> >> Why 256M?
> >
> >For two reasons:
> >  1. This is the size which both e500v1 and e500v2 support.
> >  2. Since we always use the PAGE_OFFSET as 0xc0000000, the 256M is
> >     max alignment value we can use for this virtual address.
> 
> 0xc0000000 is 1G-aligned, so I don't see why 256M is the maximum
> (after verifying that enough memory is present with the right
> alignment, of course).  The TLB1 savings would probably not be
> enough to justify figuring that out, though.

Sorry, I didn't make myself clear. Yes, the 0xc0000000 is 1G-aligned.
For a 32bit kernel we only have a 1G memory region for the kernel space,
but we can't use all of it for direct map. So we always set the
__max_low_memory to 0x30000000. And for e500 core, it doesn't support
512M page size. So the 256M is the max page size we can use for a
32bit kernel. And since we always cover the boot code in the first
tlb entry, we then use the 256M as the alignment value for the
relocatable kernel.

Thanks,
Kevin

> 
> -Scott
Kevin Hao - July 2, 2013, 3:24 a.m.
On Mon, Jul 01, 2013 at 07:30:45PM -0500, Scott Wood wrote:
> On 06/30/2013 02:33:10 AM, Kevin Hao wrote:
> >On Thu, Jun 27, 2013 at 08:47:27PM -0500, Scott Wood wrote:
> >> On 06/27/2013 08:36:37 PM, Kevin Hao wrote:
> >> >On Thu, Jun 27, 2013 at 02:58:34PM -0500, Scott Wood wrote:
> >> >> On 06/26/2013 09:00:33 PM, Kevin Hao wrote:
> >> >> >This is based on the codes in the head_44x.S. Since we always
> >> >align to
> >> >> >256M before mapping the PAGE_OFFSET for a relocatable kernel,
> >> >we also
> >> >> >change the init tlb map to 256M size.
> >> >>
> >> >> Why 256M?
> >> >
> >> >For two reasons:
> >> >  1. This is the size which both e500v1 and e500v2 support.
> >> >  2. Since we always use the PAGE_OFFSET as 0xc0000000, the 256M is
> >> >     max alignment value we can use for this virtual address.
> >>
> >> Is there any reason why 64M won't continue to work here?
> >
> >Yes. In general we would map the 0 ~ 256M memory region in the first
> >tlb1 entry. If we align to 64M, the relocatable kernel would not work
> >if loaded above 64M memory. For example, if we load a relocatable
> >kernel
> >at 64M memory, we will relocate it as:
> >	__pa(PAGE_OFFSET) = 0x4000000
> >
> >But in map_mem_in_cams function, it will create a memory map as:
> >	__pa(PAGE_OFFSET) = 0x0
> >
> >The kernel will definitely not work in this case.
> 
> That's a problem with map_mem_in_cams(), as discussed in the thread
> on other patch.  Perhaps fully solving those problems is not
> worthwhile at this time, but we should at least be able to determine
> the TLB size automatically based on the alignment of the address
> you're trying to map.  64M would be used unless (address & (256M -
> 1)) >= 64M.  I hope we can continue to assume the kernel won't cross
> a 64M boundary.

No. The problem is we don't know the physical address of the start of
lowmem at booting. So we have to align to physical address (phys1) blindly
and map the PAGE_OFFSET from there. Then once we get the physical address
(phys2) of the start of lowmem from the device tree later, we will map the
PAGE_OFFSET to the start of lowmem. If the phys1 is not equal to phys2,
we get a problem. The reasons that we can't still map the PAGE_OFFSET to
phys1 in the map_mem_in_cams() are:
  * if phys1 > phys2, then this will waste the memory between phys2 ~ phys1.
  * if phys1 < phys2, then we map a memory region which don't belong to
    this kernel.
 
> 
> >> >> This tightens the alignment requirement for dynamic memstart.
> >> >
> >> >Yes. But since RELOCATABLE is a superset of DYNAMIC_MEMSTART, we
> >> >can always
> >> >use RELOCATABLE instead of DYNAMIC_MEMSTART for fsl booke board in
> >> >any cases.
> >>
> >> The extra flexibility of RELOCATABLE may help some use cases, but
> >> you'd still require the entire 256M naturally aligned region
> >> containing the kernel to be present and owned by this instance of
> >> Linux.
> >>
> >> >So DYNAMIC_MEMSTART will seem not so useful after we enable this
> >> >feature.
> >>
> >> Then why doesn't this patch remove it?
> >
> >According to the Kconfig it is still used by 44x.
> 
> RELOCATABLE appears to be supported on 44x, and is what CRASH_DUMP
> uses on 44x.

The kdump kernel on 44x also use the RELOCATABLE method.

> 
> >And maybe someone still want to use this relocation method.
> 
> Then you don't get to dismiss claims that you're changing
> DYNAMIC_MEMSTART alignment requirements by saying that RELOCATABLE
> is a strict superset. :-)  Given the requirement that the kernel be
> in the first TLB entry, though, using RELOCATABLE rather than
> DYNAMIC_MEMSTART doesn't fix the alignment problem.
> 
> I don't think it makes sense to keep both mechanisms around unless
> there's some obvious reason to prefer DYNAMIC_MEMSTART.

The DYNAMIC_MEMSTART still can be used for such as AMP kernel. It does have
a more small footprint than RELOCATABLE and also doesn't have the overhead
of the relocation. So I don't want to drop it in a rush.

Thanks,
Kevin

> 
> >> >>  And
> >> >> what about boards with less than 256 MiB of RAM?
> >> >
> >> >It should be fine. We just create the map in the tlb. The MM
> >still use
> >> >the real size of memory.
> >>
> >> No, you must not map anything that is not present with a mapping
> >> that is executable and/or not guarded, or you could get speculative
> >> accesses to who-knows-what.
> >
> >Yes, there may be speculative access in this case.
> >
> >>  Even if RAM is present there but owned
> >> by some other entity, you could be creating illegal aliases if that
> >> other entity mapped it cache-inhibited or similar.
> >
> >Fair enough. So it seems error prone if we map this 256M memory region
> >blindly. But if we don't do this, it seems that we have to do
> >twice relocation.
> >The first time we just align to a predefined value (64M for
> >example), and
> >then parse the device tree and get the real memstart_addr. After
> >that we
> >should relocate the kernel to the real start address. It seems a
> >little
> >complicated. Do you have any better ideas?
> 
> This seems like the proper way to address it, assuming we're
> unwilling to map the kernel image somewhere other than the normal
> lowmem mapping (and I think we're unwilling, given how tight the
> address space is on 32-bit, and the intrusiveness of the change).
> The dynamic determination of 64M versus 256M could be an acceptable
> alternative though, if we're OK with not supporting arbitrary
> relocatable scenarios, but just those that are either needed by
> kdump, or supported by current kernels (with DYNAMIC_MEMSTART, or
> just starting at zero with less than 256M of RAM).  If we go that
> route, the limitations should be documented.
> 
> -Scott
Kevin Hao - July 3, 2013, 3 a.m.
On Tue, Jul 02, 2013 at 05:39:18PM -0500, Scott Wood wrote:
> On 07/01/2013 10:24:47 PM, Kevin Hao wrote:
> >On Mon, Jul 01, 2013 at 07:30:45PM -0500, Scott Wood wrote:
> >> On 06/30/2013 02:33:10 AM, Kevin Hao wrote:
> >> >On Thu, Jun 27, 2013 at 08:47:27PM -0500, Scott Wood wrote:
> >> >> On 06/27/2013 08:36:37 PM, Kevin Hao wrote:
> >> >> >On Thu, Jun 27, 2013 at 02:58:34PM -0500, Scott Wood wrote:
> >> >> >> On 06/26/2013 09:00:33 PM, Kevin Hao wrote:
> >> >> >> >This is based on the codes in the head_44x.S. Since we always
> >> >> >align to
> >> >> >> >256M before mapping the PAGE_OFFSET for a relocatable kernel,
> >> >> >we also
> >> >> >> >change the init tlb map to 256M size.
> >> >> >>
> >> >> >> Why 256M?
> >> >> >
> >> >> >For two reasons:
> >> >> >  1. This is the size which both e500v1 and e500v2 support.
> >> >> >  2. Since we always use the PAGE_OFFSET as 0xc0000000, the
> >256M is
> >> >> >     max alignment value we can use for this virtual address.
> >> >>
> >> >> Is there any reason why 64M won't continue to work here?
> >> >
> >> >Yes. In general we would map the 0 ~ 256M memory region in the
> >first
> >> >tlb1 entry. If we align to 64M, the relocatable kernel would
> >not work
> >> >if loaded above 64M memory. For example, if we load a relocatable
> >> >kernel
> >> >at 64M memory, we will relocate it as:
> >> >	__pa(PAGE_OFFSET) = 0x4000000
> >> >
> >> >But in map_mem_in_cams function, it will create a memory map as:
> >> >	__pa(PAGE_OFFSET) = 0x0
> >> >
> >> >The kernel will definitely not work in this case.
> >>
> >> That's a problem with map_mem_in_cams(), as discussed in the thread
> >> on other patch.  Perhaps fully solving those problems is not
> >> worthwhile at this time, but we should at least be able to determine
> >> the TLB size automatically based on the alignment of the address
> >> you're trying to map.  64M would be used unless (address & (256M -
> >> 1)) >= 64M.  I hope we can continue to assume the kernel won't cross
> >> a 64M boundary.
> >
> >No. The problem is we don't know the physical address of the start of
> >lowmem at booting. So we have to align to physical address (phys1)
> >blindly
> >and map the PAGE_OFFSET from there. Then once we get the physical
> >address
> >(phys2) of the start of lowmem from the device tree later, we will
> >map the
> >PAGE_OFFSET to the start of lowmem. If the phys1 is not equal to
> >phys2,
> >we get a problem.
> 
> How would you get phys1 != phys2, unless the kernel begins in a
> 256M-aligned region other than the first (which you said is already
> not supported)?

Yes, this is the only case which phys1 != phys2 if we align to 256M.
I plan to also fix this in the next version.

> 
> If (phys1 & (256M - 1)) < 64M, then you'd get the same phys2
> regardless of whether you align it to 64M or 256M.
> Otherwise, we use a 256M page which is what you're already doing.

Yes, you are right. I am just trying to say we will run into problem
when loading a kernel between 64M ~ 256M if we don't align to 256M.

> 
> >> >And maybe someone still want to use this relocation method.
> >>
> >> Then you don't get to dismiss claims that you're changing
> >> DYNAMIC_MEMSTART alignment requirements by saying that RELOCATABLE
> >> is a strict superset. :-)  Given the requirement that the kernel be
> >> in the first TLB entry, though, using RELOCATABLE rather than
> >> DYNAMIC_MEMSTART doesn't fix the alignment problem.
> >>
> >> I don't think it makes sense to keep both mechanisms around unless
> >> there's some obvious reason to prefer DYNAMIC_MEMSTART.
> >
> >The DYNAMIC_MEMSTART still can be used for such as AMP kernel. It
> >does have
> >a more small footprint than RELOCATABLE and also doesn't have the
> >overhead
> >of the relocation. So I don't want to drop it in a rush.
> 
> How much overhead (space and time) is this really?

The following is the additional sections when relocatable is enabled for
a p2020rdb board.
   section        size
  .dynsym       000007f0
  .dynstr       00000926
  .dynamic      00000080
  .hash         00000388
  .interp       00000011
  .rela.dyn     00215250

The time for the relocation is about 32ms on a p2020rdb board.

> 
> It will keep the code (and especially the diff) simpler to have this
> replace DYNAMIC_MEMSTART rather than add to it.

OK. If you think that the above overhead is acceptable, I can drop the
DYNAMIC_MEMSTART in the next version.

Thanks,
Kevin
> 
> -Scott
Scott Wood - July 3, 2013, 8:38 p.m.
On 07/02/2013 10:00:44 PM, Kevin Hao wrote:
> On Tue, Jul 02, 2013 at 05:39:18PM -0500, Scott Wood wrote:
> > How much overhead (space and time) is this really?
> 
> The following is the additional sections when relocatable is enabled  
> for
> a p2020rdb board.
>    section        size
>   .dynsym       000007f0
>   .dynstr       00000926
>   .dynamic      00000080
>   .hash         00000388
>   .interp       00000011
>   .rela.dyn     00215250
> 
> The time for the relocation is about 32ms on a p2020rdb board.

Hmm... more relocations than I expected.  What percentage is this of  
the total image size?

-Scott
Kevin Hao - July 4, 2013, 1:08 a.m.
On Wed, Jul 03, 2013 at 03:38:27PM -0500, Scott Wood wrote:
> On 07/02/2013 10:00:44 PM, Kevin Hao wrote:
> >On Tue, Jul 02, 2013 at 05:39:18PM -0500, Scott Wood wrote:
> >> How much overhead (space and time) is this really?
> >
> >The following is the additional sections when relocatable is
> >enabled for
> >a p2020rdb board.
> >   section        size
> >  .dynsym       000007f0
> >  .dynstr       00000926
> >  .dynamic      00000080
> >  .hash         00000388
> >  .interp       00000011
> >  .rela.dyn     00215250
> >
> >The time for the relocation is about 32ms on a p2020rdb board.
> 
> Hmm... more relocations than I expected.  What percentage is this of
> the total image size?

The size of vmlinux.bin is about 10M. The percentage of the relocation
section is about 20%. But look on the bright side of thing, all the relocation
stuff are in init section and should be discarded at runtime. :-)

Thanks,
Kevin

> 
> -Scott
Scott Wood - July 8, 2013, 4:48 p.m.
On 07/03/2013 08:08:18 PM, Kevin Hao wrote:
> On Wed, Jul 03, 2013 at 03:38:27PM -0500, Scott Wood wrote:
> > On 07/02/2013 10:00:44 PM, Kevin Hao wrote:
> > >On Tue, Jul 02, 2013 at 05:39:18PM -0500, Scott Wood wrote:
> > >> How much overhead (space and time) is this really?
> > >
> > >The following is the additional sections when relocatable is
> > >enabled for
> > >a p2020rdb board.
> > >   section        size
> > >  .dynsym       000007f0
> > >  .dynstr       00000926
> > >  .dynamic      00000080
> > >  .hash         00000388
> > >  .interp       00000011
> > >  .rela.dyn     00215250
> > >
> > >The time for the relocation is about 32ms on a p2020rdb board.
> >
> > Hmm... more relocations than I expected.  What percentage is this of
> > the total image size?
> 
> The size of vmlinux.bin is about 10M. The percentage of the relocation
> section is about 20%. But look on the bright side of thing, all the  
> relocation
> stuff are in init section and should be discarded at runtime. :-)

That doesn't reduce the space it takes up in flash...

-Scott
Kevin Hao - July 9, 2013, 1:26 a.m.
On Mon, Jul 08, 2013 at 11:48:25AM -0500, Scott Wood wrote:
> On 07/03/2013 08:08:18 PM, Kevin Hao wrote:
> >On Wed, Jul 03, 2013 at 03:38:27PM -0500, Scott Wood wrote:
> >> On 07/02/2013 10:00:44 PM, Kevin Hao wrote:
> >> >On Tue, Jul 02, 2013 at 05:39:18PM -0500, Scott Wood wrote:
> >> >> How much overhead (space and time) is this really?
> >> >
> >> >The following is the additional sections when relocatable is
> >> >enabled for
> >> >a p2020rdb board.
> >> >   section        size
> >> >  .dynsym       000007f0
> >> >  .dynstr       00000926
> >> >  .dynamic      00000080
> >> >  .hash         00000388
> >> >  .interp       00000011
> >> >  .rela.dyn     00215250
> >> >
> >> >The time for the relocation is about 32ms on a p2020rdb board.
> >>
> >> Hmm... more relocations than I expected.  What percentage is this of
> >> the total image size?
> >
> >The size of vmlinux.bin is about 10M. The percentage of the relocation
> >section is about 20%. But look on the bright side of thing, all
> >the relocation
> >stuff are in init section and should be discarded at runtime. :-)
> 
> That doesn't reduce the space it takes up in flash...

Yes. But since we always use a compressed uImage kernel in flash,
the increasing size of the image would shrink to about 500K.

Thanks,
Kevin

> 
> -Scott

Patch

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c33e3ad..9eb97ac 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -866,7 +866,7 @@  config DYNAMIC_MEMSTART
 
 config RELOCATABLE
 	bool "Build a relocatable kernel"
-	depends on ADVANCED_OPTIONS && FLATMEM && 44x
+	depends on ADVANCED_OPTIONS && FLATMEM && (44x || FSL_BOOKE)
 	select NONSTATIC_KERNEL
 	help
 	  This builds a kernel image that is capable of running at the
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
index a92c79b..32a4b38 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
@@ -165,10 +165,10 @@  skpinv:	addi	r6,r6,1				/* Increment */
 	lis	r6,0x1000		/* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 */
 	mtspr	SPRN_MAS0,r6
 	lis	r6,(MAS1_VALID|MAS1_IPROT)@h
-	ori	r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
+	ori	r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_256M))@l
 	mtspr	SPRN_MAS1,r6
-	lis	r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_SMP)@h
-	ori	r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_SMP)@l
+	lis	r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_256M, M_IF_SMP)@h
+	ori	r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_256M, M_IF_SMP)@l
 	mtspr	SPRN_MAS2,r6
 	mtspr	SPRN_MAS3,r8
 	tlbwe
@@ -176,6 +176,8 @@  skpinv:	addi	r6,r6,1				/* Increment */
 /* 7. Jump to KERNELBASE mapping */
 	lis	r6,(KERNELBASE & ~0xfff)@h
 	ori	r6,r6,(KERNELBASE & ~0xfff)@l
+	rlwinm	r7,r25,0,4,31
+	add	r6,r7,r6
 
 #elif defined(ENTRY_MAPPING_KEXEC_SETUP)
 /*
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index d10a7ca..c3b4c8e53 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -83,10 +83,43 @@  _ENTRY(_start);
 	andc	r31,r20,r18		/* r31 = page base */
 	or	r31,r31,r19		/* r31 = devtree phys addr */
 	mfspr	r30,SPRN_MAS7
-
-	li	r25,0			/* phys kernel start (low) */
 	li	r24,0			/* CPU number */
-	li	r23,0			/* phys kernel start (high) */
+
+#ifdef CONFIG_RELOCATABLE
+	bl	0f				/* Get our runtime address */
+0:	mflr	r3				/* Make it accessible */
+	addis	r3,r3,(_stext - 0b)@ha
+	addi	r3,r3,(_stext - 0b)@l 	/* Get our current runtime base */
+
+	/* Translate _stext address to physical, save in r23/r25 */
+	tlbsx	0,r3			/* must succeed */
+
+	mfspr	r16,SPRN_MAS1
+	mfspr	r20,SPRN_MAS3
+	rlwinm	r17,r16,25,0x1f		/* r17 = log2(page size) */
+	li	r18,1024
+	slw	r18,r18,r17		/* r18 = page size */
+	addi	r18,r18,-1
+	and	r19,r3,r18		/* r19 = page offset */
+	andc	r25,r20,r18		/* r25 = page base */
+	or	r25,r25,r19		/* r25 = _stext phys addr */
+	mfspr	r23,SPRN_MAS7
+
+	/*
+	 * We have the runtime (virutal) address of our base.
+	 * We calculate our shift of offset from a 256M page.
+	 * We could map the 256M page we belong to at PAGE_OFFSET and
+	 * get going from there.
+	 */
+	lis	r4,KERNELBASE@h
+	ori	r4,r4,KERNELBASE@l
+	rlwinm	r6,r25,0,4,31			/* r6 = PHYS_START % 256M */
+	rlwinm	r5,r4,0,4,31			/* r5 = KERNELBASE % 256M */
+	subf	r3,r5,r6			/* r3 = r6 - r5 */
+	add	r3,r4,r3			/* Required Virutal Address */
+
+	bl	relocate
+#endif
 
 /* We try to not make any assumptions about how the boot loader
  * setup or used the TLBs.  We invalidate all mappings from the
@@ -197,7 +230,58 @@  _ENTRY(__early_start)
 
 	bl	early_init
 
-#ifdef CONFIG_DYNAMIC_MEMSTART
+#ifdef CONFIG_RELOCATABLE
+	/*
+	 * Relocatable kernel support based on processing of dynamic
+	 * relocation entries.
+	 *
+	 * r25/r23 will contain RPN/ERPN for the start address of memory
+	 */
+	lis	r3,kernstart_addr@ha
+	la	r3,kernstart_addr@l(r3)
+
+#ifdef CONFIG_PHYS_64BIT
+	stw	r23,0(r3)
+	stw	r25,4(r3)
+#else
+	stw	r25,0(r3)
+#endif
+
+	/*
+	 * Compute the virt_phys_offset :
+	 * virt_phys_offset = stext.run - kernstart_addr
+	 *
+	 * stext.run = (KERNELBASE & ~0xfffffff) + (kernstart_addr & 0xfffffff)
+	 * When we relocate, we have :
+	 *
+	 *	(kernstart_addr & 0xfffffff) = (stext.run & 0xfffffff)
+	 *
+	 * hence:
+	 *  virt_phys_offset = (KERNELBASE & ~0xfffffff) -
+	 *                              (kernstart_addr & ~0xfffffff)
+	 *
+	 */
+
+	/* KERNELBASE&~0xfffffff => (r4,r5) */
+	li	r4, 0		/* higer 32bit */
+	lis	r5,KERNELBASE@h
+	rlwinm	r5,r5,0,0,3	/* Align to 256M, lower 32bit */
+
+	rlwinm	r7,r25,0,0,3
+	/*
+	 * 64bit subtraction.
+	 */
+	subfc	r5,r7,r5
+	subfe	r4,r23,r4
+
+	/* Store virt_phys_offset */
+	lis	r3,virt_phys_offset@ha
+	la	r3,virt_phys_offset@l(r3)
+
+	stw	r4,0(r3)
+	stw	r5,4(r3)
+
+#elif defined(CONFIG_DYNAMIC_MEMSTART)
 	lis	r3,kernstart_addr@ha
 	la	r3,kernstart_addr@l(r3)
 #ifdef CONFIG_PHYS_64BIT