diff mbox

[U-Boot,v2,4/4] arm: factorize relocate_code routine

Message ID 1368525030-5162-5-git-send-email-albert.u.boot@aribaud.net
State Superseded
Delegated to: Albert ARIBAUD
Headers show

Commit Message

Albert ARIBAUD May 14, 2013, 9:50 a.m. UTC
Replace all relocate_code routines from ARM start.S files
with a single instance in file arch/arm/lib/relocate.S.
For PXA, this requires moving the dcache unlocking code
from within relocate_code into c_runtime_cpu_setup.

Signed-off-by: Albert ARIBAUD <albert.u.boot@aribaud.net>
---
Changes in v2:
- use ENTRY / ENDPROC macros
- preserve PXA25X dcache unlocking

 arch/arm/cpu/arm1136/start.S   |   76 ---------------------------
 arch/arm/cpu/arm1176/start.S   |   76 ---------------------------
 arch/arm/cpu/arm720t/start.S   |   76 ---------------------------
 arch/arm/cpu/arm920t/start.S   |   75 ---------------------------
 arch/arm/cpu/arm925t/start.S   |   76 ---------------------------
 arch/arm/cpu/arm926ejs/start.S |   76 ---------------------------
 arch/arm/cpu/arm946es/start.S  |   76 ---------------------------
 arch/arm/cpu/arm_intcm/start.S |   76 ---------------------------
 arch/arm/cpu/armv7/start.S     |   76 ---------------------------
 arch/arm/cpu/ixp/start.S       |   76 ---------------------------
 arch/arm/cpu/pxa/start.S       |   92 ++++-----------------------------
 arch/arm/cpu/s3c44b0/start.S   |   76 ---------------------------
 arch/arm/cpu/sa1100/start.S    |   76 ---------------------------
 arch/arm/lib/Makefile          |    1 +
 arch/arm/lib/relocate.S        |  111 ++++++++++++++++++++++++++++++++++++++++
 15 files changed, 121 insertions(+), 994 deletions(-)
 create mode 100644 arch/arm/lib/relocate.S

Comments

Benoît Thébaudeau May 14, 2013, 4:01 p.m. UTC | #1
Hi Albert,

On Tuesday, May 14, 2013 11:50:30 AM, Albert ARIBAUD wrote:
> Replace all relocate_code routines from ARM start.S files
> with a single instance in file arch/arm/lib/relocate.S.
> For PXA, this requires moving the dcache unlocking code
> from within relocate_code into c_runtime_cpu_setup.
> 
> Signed-off-by: Albert ARIBAUD <albert.u.boot@aribaud.net>

[...]

> diff --git a/arch/arm/cpu/pxa/start.S b/arch/arm/cpu/pxa/start.S
> index 595778a..f9947de 100644
> --- a/arch/arm/cpu/pxa/start.S
> +++ b/arch/arm/cpu/pxa/start.S
> @@ -167,93 +167,19 @@ reset:
>  	bl	_main
>  
>  /*------------------------------------------------------------------------------*/
> -#ifndef CONFIG_SPL_BUILD
> -/*
> - * void relocate_code(addr_moni)
> - *
> - * This function relocates the monitor code.
> - */
> -	.globl	relocate_code
> -relocate_code:
> -	mov	r6, r0	/* save addr of destination */
> -
> -/* Disable the Dcache RAM lock for stack now */
> -#ifdef	CONFIG_CPU_PXA25X
> -	mov	r12, lr
> -	bl	cpu_init_crit
> -	mov	lr, r12
> -#endif
> -
> -	adr	r0, _start
> -	subs	r9, r6, r0		/* r9 <- relocation offset */
> -	beq	relocate_done		/* skip relocation */
> -	mov	r1, r6			/* r1 <- scratch for copy_loop */
> -	ldr	r3, _image_copy_end_ofs
> -	add	r2, r0, r3		/* r2 <- source end address	    */
> -
> -copy_loop:
> -	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
> -	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
> -	cmp	r0, r2			/* until source end address [r2]    */
> -	blo	copy_loop
> -
> -	/*
> -	 * fix .rel.dyn relocations
> -	 */
> -	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
> -	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
> -	add	r10, r10, r0		/* r10 <- sym table in FLASH */
> -	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
> -	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
> -	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
> -	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
> -fixloop:
> -	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
> -	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
> -	ldr	r1, [r2, #4]
> -	and	r7, r1, #0xff
> -	cmp	r7, #23			/* relative fixup? */
> -	beq	fixrel
> -	cmp	r7, #2			/* absolute fixup? */
> -	beq	fixabs
> -	/* ignore unknown type of fixup */
> -	b	fixnext
> -fixabs:
> -	/* absolute fix: set location to (offset) symbol value */
> -	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
> -	add	r1, r10, r1		/* r1 <- address of symbol in table */
> -	ldr	r1, [r1, #4]		/* r1 <- symbol value */
> -	add	r1, r1, r9		/* r1 <- relocated sym addr */
> -	b	fixnext
> -fixrel:
> -	/* relative fix: increase location by offset */
> -	ldr	r1, [r0]
> -	add	r1, r1, r9
> -fixnext:
> -	str	r1, [r0]
> -	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
> -	cmp	r2, r3
> -	blo	fixloop
> -
> -relocate_done:
> -
> -	bx	lr
> -
> -_image_copy_end_ofs:
> -	.word __image_copy_end - _start
> -_rel_dyn_start_ofs:
> -	.word __rel_dyn_start - _start
> -_rel_dyn_end_ofs:
> -	.word __rel_dyn_end - _start
> -_dynsym_start_ofs:
> -	.word __dynsym_start - _start
> -
> -#endif
>  
>  	.globl	c_runtime_cpu_setup
>  c_runtime_cpu_setup:
>  
> -	bx	lr
> +	/*
> +	 * Unlock (actually, disable) the cache now that board_init_f
> +	 * is done. We could do this earlier but we would need to add
> +	 * a new C runtime hook, whereas c_runtime_cpu_setup already
> +	 * exists.
> +	 * As this routine is just a call to cpu_init_crit, let us
> +	 * tail-optimize and do a simple branch here.
> +	 */
> +	b	cpu_init_crit

Shouldn't the "#ifdef CONFIG_CPU_PXA25X" from the original code be kept here?

>  
>  /*
>   *************************************************************************

[...]

> diff --git a/arch/arm/lib/relocate.S b/arch/arm/lib/relocate.S
> new file mode 100644
> index 0000000..9e91fae
> --- /dev/null
> +++ b/arch/arm/lib/relocate.S
> @@ -0,0 +1,111 @@
> +/*
> + *  relocate - common relocation function for ARM U-Boot
> + *
> + *  Copyright (c) 2013  Albert ARIBAUD <albert.u.boot@aribaud.net>
> + *
> + * See file CREDITS for list of people who contributed to this
> + * project.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +
> +#include <linux/linkage.h>
> +
> +/*
> + * These are defined in the board-specific linker script.
> + * Subtracting _start from them lets the linker put their
> + * relative position in the executable instead of leaving
> + * them null.
> + */

This comment is obsolete. It should either be removed or updated.

> +
> +/*
> + * void relocate_code(addr_moni)
> + *
> + * This function relocates the monitor code.
> + */
> +ENTRY(relocate_code)
> +	mov	r6, r0	/* save addr of destination */
> +
> +	ldr	r0, =_start

If you are avoiding the "ldr =" construct below, why do you use it here? You
could "ldr r0, _TEXT_BASE".

> +	subs	r9, r6, r0		/* r9 <- relocation offset */
> +	beq	relocate_done		/* skip relocation */
> +	mov	r1, r6			/* r1 <- scratch for copy loop */
> +	ldr	r3, _image_copy_end_ofs
> +	add	r2, r0, r3		/* r2 <- source end address	    */

Adding _start to a relocate_code-relative _image_copy_end_ofs?!

> +
> +copy_loop:
> +	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
> +	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
> +	cmp	r0, r2			/* until source end address [r2]    */
> +	blo	copy_loop
> +
> +	/*
> +	 * fix .rel.dyn relocations
> +	 */
> +	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
> +	add	r10, r10, r9		/* r10 <- sym table in FLASH */
> +	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
> +	add	r2, r2, r9		/* r2 <- rel dyn start in FLASH */
> +	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
> +	add	r3, r3, r9		/* r3 <- rel dyn end in FLASH */

This is mixing relocate_code-relative offsets with the relocation offset!

> +fixloop:
> +	ldr	r0, [r2]		/* r0 <- SRC location to fix up */
> +	add	r0, r0, r9		/* r0 <- DST location to fix up */
> +	ldr	r1, [r2, #4]
> +	and	r7, r1, #0xff
> +	cmp	r7, #23			/* relative fixup? */
> +	beq	fixrel
> +	cmp	r7, #2			/* absolute fixup? */
> +	beq	fixabs
> +	/* ignore unknown type of fixup */
> +	b	fixnext
> +fixabs:
> +	/* absolute fix: set location to (offset) symbol value */
> +	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
> +	add	r1, r10, r1		/* r1 <- address of symbol in table */
> +	ldr	r1, [r1, #4]		/* r1 <- symbol value */
> +	add	r1, r1, r9		/* r1 <- relocated sym addr */
> +	b	fixnext
> +fixrel:
> +	/* relative fix: increase location by offset */
> +	ldr	r1, [r0]
> +	add	r1, r1, r9
> +fixnext:
> +	str	r1, [r0]
> +	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
> +	cmp	r2, r3
> +	blo	fixloop
> +
> +relocate_done:
> +
> +	/* ARMv4- don't know bx lr but the assembler fails to see that */
> +
> +#ifdef __ARM_ARCH_4__
> +        mov        pc, lr
> +#else
> +        bx        lr
> +#endif
> +
> +_image_copy_end_ofs:
> +	.word __image_copy_end - relocate_code
> +_rel_dyn_start_ofs:
> +	.word __rel_dyn_start - relocate_code
> +_rel_dyn_end_ofs:
> +	.word __rel_dyn_end - relocate_code
> +_dynsym_start_ofs:
> +	.word __dynsym_start - relocate_code
> +
> +ENDPROC(relocate_code)
> --
> 1.7.10.4

Best regards,
Benoît
Albert ARIBAUD May 14, 2013, 4:32 p.m. UTC | #2
Hi Benoît,

On Tue, 14 May 2013 18:01:50 +0200 (CEST), Benoît Thébaudeau
<benoit.thebaudeau@advansee.com> wrote:

> Hi Albert,

> >  	.globl	c_runtime_cpu_setup
> >  c_runtime_cpu_setup:
> >  
> > -	bx	lr
> > +	/*
> > +	 * Unlock (actually, disable) the cache now that board_init_f
> > +	 * is done. We could do this earlier but we would need to add
> > +	 * a new C runtime hook, whereas c_runtime_cpu_setup already
> > +	 * exists.
> > +	 * As this routine is just a call to cpu_init_crit, let us
> > +	 * tail-optimize and do a simple branch here.
> > +	 */
> > +	b	cpu_init_crit
> 
> Shouldn't the "#ifdef CONFIG_CPU_PXA25X" from the original code be kept here?

Yes, it should indeed. Thanks for spotting this!

> Best regards,
> Benoît

Amicalement,
Benoît Thébaudeau May 14, 2013, 5:17 p.m. UTC | #3
Hi Albert,

On Tuesday, May 14, 2013 6:32:08 PM, Albert ARIBAUD wrote:
> Hi Benoît,
> 
> On Tue, 14 May 2013 18:01:50 +0200 (CEST), Benoît Thébaudeau
> <benoit.thebaudeau@advansee.com> wrote:
> 
> > Hi Albert,
> 
> > >  	.globl	c_runtime_cpu_setup
> > >  c_runtime_cpu_setup:
> > >  
> > > -	bx	lr
> > > +	/*
> > > +	 * Unlock (actually, disable) the cache now that board_init_f
> > > +	 * is done. We could do this earlier but we would need to add
> > > +	 * a new C runtime hook, whereas c_runtime_cpu_setup already
> > > +	 * exists.
> > > +	 * As this routine is just a call to cpu_init_crit, let us
> > > +	 * tail-optimize and do a simple branch here.
> > > +	 */
> > > +	b	cpu_init_crit
> > 
> > Shouldn't the "#ifdef CONFIG_CPU_PXA25X" from the original code be kept
> > here?
> 
> Yes, it should indeed. Thanks for spotting this!

Have you seen the end of my notes for relocate.S? After all, keeping for later
your patches moving to compiler-generated symbols seems to complicate things and
to be more error-prone.

Best regards,
Benoît
Benoît Thébaudeau May 14, 2013, 6:49 p.m. UTC | #4
Hi Albert,

On Tuesday, May 14, 2013 8:51:58 PM, Albert ARIBAUD wrote:
> Hi Benoît,
> 
> On Tue, 14 May 2013 19:17:46 +0200 (CEST), Benoît Thébaudeau
> <benoit.thebaudeau@advansee.com> wrote:
> 
> > Hi Albert,
> > 
> > On Tuesday, May 14, 2013 6:32:08 PM, Albert ARIBAUD wrote:
> > > Hi Benoît,
> > > 
> > > On Tue, 14 May 2013 18:01:50 +0200 (CEST), Benoît Thébaudeau
> > > <benoit.thebaudeau@advansee.com> wrote:
> > > 
> > > > Hi Albert,
> > > 
> > > > >  	.globl	c_runtime_cpu_setup
> > > > >  c_runtime_cpu_setup:
> > > > >  
> > > > > -	bx	lr
> > > > > +	/*
> > > > > +	 * Unlock (actually, disable) the cache now that board_init_f
> > > > > +	 * is done. We could do this earlier but we would need to add
> > > > > +	 * a new C runtime hook, whereas c_runtime_cpu_setup already
> > > > > +	 * exists.
> > > > > +	 * As this routine is just a call to cpu_init_crit, let us
> > > > > +	 * tail-optimize and do a simple branch here.
> > > > > +	 */
> > > > > +	b	cpu_init_crit
> > > > 
> > > > Shouldn't the "#ifdef CONFIG_CPU_PXA25X" from the original code be kept
> > > > here?
> > > 
> > > Yes, it should indeed. Thanks for spotting this!
> > 
> > Have you seen the end of my notes for relocate.S? After all, keeping for
> > later
> > your patches moving to compiler-generated symbols seems to complicate
> > things and
> > to be more error-prone.
> 
> (that's unrelated to the PXA25X stuff above, right?)

Yes.

> I don't think deferring the compiler-generated symbols patches
> complicates things, as anyway, they would only apply after this
> patch (so as to apply only on a single instance of relocate_code) so
> there's no way they could simplify things occurring before them.
> 
> OTOH, they do simplify the code of relocate_code. That's why I put them
> in a separate series dealing with optimizing relocate_code. I'll post
> this second series right away, and let people decide whether both
> series should be merged in a single one.

Yes, this is what I meant.

Best regards,
Benoît
Albert ARIBAUD May 14, 2013, 6:51 p.m. UTC | #5
Hi Benoît,

On Tue, 14 May 2013 19:17:46 +0200 (CEST), Benoît Thébaudeau
<benoit.thebaudeau@advansee.com> wrote:

> Hi Albert,
> 
> On Tuesday, May 14, 2013 6:32:08 PM, Albert ARIBAUD wrote:
> > Hi Benoît,
> > 
> > On Tue, 14 May 2013 18:01:50 +0200 (CEST), Benoît Thébaudeau
> > <benoit.thebaudeau@advansee.com> wrote:
> > 
> > > Hi Albert,
> > 
> > > >  	.globl	c_runtime_cpu_setup
> > > >  c_runtime_cpu_setup:
> > > >  
> > > > -	bx	lr
> > > > +	/*
> > > > +	 * Unlock (actually, disable) the cache now that board_init_f
> > > > +	 * is done. We could do this earlier but we would need to add
> > > > +	 * a new C runtime hook, whereas c_runtime_cpu_setup already
> > > > +	 * exists.
> > > > +	 * As this routine is just a call to cpu_init_crit, let us
> > > > +	 * tail-optimize and do a simple branch here.
> > > > +	 */
> > > > +	b	cpu_init_crit
> > > 
> > > Shouldn't the "#ifdef CONFIG_CPU_PXA25X" from the original code be kept
> > > here?
> > 
> > Yes, it should indeed. Thanks for spotting this!
> 
> Have you seen the end of my notes for relocate.S? After all, keeping for later
> your patches moving to compiler-generated symbols seems to complicate things and
> to be more error-prone.

(that's unrelated to the PXA25X stuff above, right?)

I don't think deferring the compiler-generated symbols patches
complicates things, as anyway, they would only apply after this
patch (so as to apply only on a single instance of relocate_code) so
there's no way they could simplify things occurring before them.

OTOH, they do simplify the code of relocate_code. That's why I put them
in a separate series dealing with optimizing relocate_code. I'll post
this second series right away, and let people decide whether both
series should be merged in a single one.

> Best regards,
> Benoît

Amicalement,
Albert ARIBAUD May 15, 2013, 7:31 a.m. UTC | #6
Hi Benoît,

(I had indeed missed remarks in your reply; apologies for this)

On Tue, 14 May 2013 18:01:50 +0200 (CEST), Benoît Thébaudeau
<benoit.thebaudeau@advansee.com> wrote:

> Hi Albert,

> > +/*
> > + * These are defined in the board-specific linker script.
> > + * Subtracting _start from them lets the linker put their
> > + * relative position in the executable instead of leaving
> > + * them null.
> > + */  
> 
> This comment is obsolete. It should either be removed or updated.

Correct.

> > +
> > +/*
> > + * void relocate_code(addr_moni)
> > + *
> > + * This function relocates the monitor code.
> > + */
> > +ENTRY(relocate_code)
> > +	mov	r6, r0	/* save addr of destination */
> > +
> > +	ldr	r0, =_start  
> 
> If you are avoiding the "ldr =" construct below, why do you use it
> here? You could "ldr r0, _TEXT_BASE".

_start is defined in a compilation unit, not in the linker file.
References to _start such as the one above are therefore correct before
as well as after relocation.

> > +	subs	r9, r6, r0		/* r9 <- relocation offset */
> > +	beq	relocate_done		/* skip relocation */
> > +	mov	r1, r6			/* r1 <- scratch for copy loop */
> > +	ldr	r3, _image_copy_end_ofs
> > +	add	r2, r0, r3		/* r2 <- source end address	    */
> 
> Adding _start to a relocate_code-relative _image_copy_end_ofs?!

You're correct. For some reason I did not complete the rewrite here. :(

I'd made the offsets relative to relocate_code in order to avoid
the linker issues with subtracting symbols not defined in the current
compilation unit.

Then I should add =relocate_code to r3, not =_start, and also -- as r9
is not the right offset here -- compute r7 as the delta between the
link-time =_start and the run-time relocate_code (r7 becomes useless
once R10, r2 and r3 are fixed).

I'd run-tested tested each commit but apparently this bug did not
cause any immediately visible issue. This time I'll step-test it.

> > +	/*
> > +	 * fix .rel.dyn relocations
> > +	 */
> > +	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
> > +	add	r10, r10, r9		/* r10 <- sym table in FLASH */
> > +	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
> > +	add	r2, r2, r9		/* r2 <- rel dyn start in FLASH */
> > +	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
> > +	add	r3, r3, r9		/* r3 <- rel dyn end in FLASH */
> 
> This is mixing relocate_code-relative offsets with the relocation offset!

Correct, that's where r7 kicks in to replace r9.

Again, apologies for missing this.

> Best regards,
> Benoît

Amicalement,
Albert ARIBAUD May 15, 2013, 8:30 a.m. UTC | #7
On Wed, 15 May 2013 09:31:37 +0200, Albert ARIBAUD
<albert.u.boot@aribaud.net> wrote:

> Then I should add =relocate_code to r3, not =_start, and also -- as r9
> is not the right offset here -- compute r7 as the delta between the
> link-time =_start and the run-time relocate_code (r7 becomes useless
> once R10, r2 and r3 are fixed).

I badly need a coffee break... Make this "r7 equal to =relocate_code",
simply.

Amicalement,
Benoît Thébaudeau May 15, 2013, 4:36 p.m. UTC | #8
Hi Albert,

On Wednesday, May 15, 2013 10:30:00 AM, Albert ARIBAUD wrote:
> On Wed, 15 May 2013 09:31:37 +0200, Albert ARIBAUD
> <albert.u.boot@aribaud.net> wrote:
> 
> > Then I should add =relocate_code to r3, not =_start, and also -- as r9
> > is not the right offset here -- compute r7 as the delta between the
> > link-time =_start and the run-time relocate_code (r7 becomes useless
> > once R10, r2 and r3 are fixed).
> 
> I badly need a coffee break... Make this "r7 equal to =relocate_code",
> simply.

Or "adr r7, relocate_code" for the same result in order to avoid using the
literal pool when useless.

Best regards,
Benoît
Albert ARIBAUD May 15, 2013, 5:55 p.m. UTC | #9
Hi Benoît,

On Wed, 15 May 2013 18:36:05 +0200 (CEST), Benoît Thébaudeau
<benoit.thebaudeau@advansee.com> wrote:

> Hi Albert,
> 
> On Wednesday, May 15, 2013 10:30:00 AM, Albert ARIBAUD wrote:
> > On Wed, 15 May 2013 09:31:37 +0200, Albert ARIBAUD
> > <albert.u.boot@aribaud.net> wrote:
> > 
> > > Then I should add =relocate_code to r3, not =_start, and also -- as r9
> > > is not the right offset here -- compute r7 as the delta between the
> > > link-time =_start and the run-time relocate_code (r7 becomes useless
> > > once R10, r2 and r3 are fixed).
> > 
> > I badly need a coffee break... Make this "r7 equal to =relocate_code",
> > simply.
> 
> Or "adr r7, relocate_code" for the same result in order to avoid using the
> literal pool when useless.

Correct. That'll be in v3.

> Best regards,
> Benoît

Amicalement,
Benoît Thébaudeau May 16, 2013, 2:28 p.m. UTC | #10
Hi Albert,

On Thursday, May 16, 2013 4:29:49 PM, Albert ARIBAUD wrote:
> Hi Benoît,
> 
> On Wed, 15 May 2013 19:55:31 +0200, Albert ARIBAUD
> <albert.u.boot@aribaud.net> wrote:
> 
> > Hi Benoît,
> > 
> > On Wed, 15 May 2013 18:36:05 +0200 (CEST), Benoît Thébaudeau
> > <benoit.thebaudeau@advansee.com> wrote:
> > 
> > > Hi Albert,
> > > 
> > > On Wednesday, May 15, 2013 10:30:00 AM, Albert ARIBAUD wrote:
> > > > On Wed, 15 May 2013 09:31:37 +0200, Albert ARIBAUD
> > > > <albert.u.boot@aribaud.net> wrote:
> > > > 
> > > > > Then I should add =relocate_code to r3, not =_start, and also -- as
> > > > > r9
> > > > > is not the right offset here -- compute r7 as the delta between the
> > > > > link-time =_start and the run-time relocate_code (r7 becomes useless
> > > > > once R10, r2 and r3 are fixed).
> > > > 
> > > > I badly need a coffee break... Make this "r7 equal to =relocate_code",
> > > > simply.
> > > 
> > > Or "adr r7, relocate_code" for the same result in order to avoid using
> > > the
> > > literal pool when useless.
> > 
> > Correct. That'll be in v3.
> 
> Sorry, I missed this change in V3 and left the ldr. Functionally it
> does not have any impact, since as already mentioned, this code always
> runs at the link-time location. You may want Want me to fix it anyway?

I have not yet looked at your V3, but yes, I'd prefer.

Best regards,
Benoît
Albert ARIBAUD May 16, 2013, 2:29 p.m. UTC | #11
Hi Benoît,

On Wed, 15 May 2013 19:55:31 +0200, Albert ARIBAUD
<albert.u.boot@aribaud.net> wrote:

> Hi Benoît,
> 
> On Wed, 15 May 2013 18:36:05 +0200 (CEST), Benoît Thébaudeau
> <benoit.thebaudeau@advansee.com> wrote:
> 
> > Hi Albert,
> > 
> > On Wednesday, May 15, 2013 10:30:00 AM, Albert ARIBAUD wrote:
> > > On Wed, 15 May 2013 09:31:37 +0200, Albert ARIBAUD
> > > <albert.u.boot@aribaud.net> wrote:
> > > 
> > > > Then I should add =relocate_code to r3, not =_start, and also -- as r9
> > > > is not the right offset here -- compute r7 as the delta between the
> > > > link-time =_start and the run-time relocate_code (r7 becomes useless
> > > > once R10, r2 and r3 are fixed).
> > > 
> > > I badly need a coffee break... Make this "r7 equal to =relocate_code",
> > > simply.
> > 
> > Or "adr r7, relocate_code" for the same result in order to avoid using the
> > literal pool when useless.
> 
> Correct. That'll be in v3.

Sorry, I missed this change in V3 and left the ldr. Functionally it
does not have any impact, since as already mentioned, this code always
runs at the link-time location. You may want Want me to fix it anyway?

Amicalement,
Albert ARIBAUD May 16, 2013, 2:52 p.m. UTC | #12
Hi Benoît,

On Thu, 16 May 2013 16:28:55 +0200 (CEST), Benoît Thébaudeau
<benoit.thebaudeau@advansee.com> wrote:

> Hi Albert,
> 
> On Thursday, May 16, 2013 4:29:49 PM, Albert ARIBAUD wrote:
> > Hi Benoît,
> > 
> > On Wed, 15 May 2013 19:55:31 +0200, Albert ARIBAUD
> > <albert.u.boot@aribaud.net> wrote:
> > 
> > > Hi Benoît,
> > > 
> > > On Wed, 15 May 2013 18:36:05 +0200 (CEST), Benoît Thébaudeau
> > > <benoit.thebaudeau@advansee.com> wrote:
> > > 
> > > > Hi Albert,
> > > > 
> > > > On Wednesday, May 15, 2013 10:30:00 AM, Albert ARIBAUD wrote:
> > > > > On Wed, 15 May 2013 09:31:37 +0200, Albert ARIBAUD
> > > > > <albert.u.boot@aribaud.net> wrote:
> > > > > 
> > > > > > Then I should add =relocate_code to r3, not =_start, and also -- as
> > > > > > r9
> > > > > > is not the right offset here -- compute r7 as the delta between the
> > > > > > link-time =_start and the run-time relocate_code (r7 becomes useless
> > > > > > once R10, r2 and r3 are fixed).
> > > > > 
> > > > > I badly need a coffee break... Make this "r7 equal to =relocate_code",
> > > > > simply.
> > > > 
> > > > Or "adr r7, relocate_code" for the same result in order to avoid using
> > > > the
> > > > literal pool when useless.
> > > 
> > > Correct. That'll be in v3.
> > 
> > Sorry, I missed this change in V3 and left the ldr. Functionally it
> > does not have any impact, since as already mentioned, this code always
> > runs at the link-time location. You may want Want me to fix it anyway?
> 
> I have not yet looked at your V3, but yes, I'd prefer.

Alright, then, V4 it is.

> Best regards,
> Benoît

Amicalement,
Albert ARIBAUD May 16, 2013, 2:56 p.m. UTC | #13
On Thu, 16 May 2013 16:52:38 +0200, Albert ARIBAUD
<albert.u.boot@aribaud.net> wrote:

> Hi Benoît,
> 
> On Thu, 16 May 2013 16:28:55 +0200 (CEST), Benoît Thébaudeau
> <benoit.thebaudeau@advansee.com> wrote:
> 
> > Hi Albert,
> > 
> > On Thursday, May 16, 2013 4:29:49 PM, Albert ARIBAUD wrote:
> > > Hi Benoît,
> > > 
> > > On Wed, 15 May 2013 19:55:31 +0200, Albert ARIBAUD
> > > <albert.u.boot@aribaud.net> wrote:
> > > 
> > > > Hi Benoît,
> > > > 
> > > > On Wed, 15 May 2013 18:36:05 +0200 (CEST), Benoît Thébaudeau
> > > > <benoit.thebaudeau@advansee.com> wrote:
> > > > 
> > > > > Hi Albert,
> > > > > 
> > > > > On Wednesday, May 15, 2013 10:30:00 AM, Albert ARIBAUD wrote:
> > > > > > On Wed, 15 May 2013 09:31:37 +0200, Albert ARIBAUD
> > > > > > <albert.u.boot@aribaud.net> wrote:
> > > > > > 
> > > > > > > Then I should add =relocate_code to r3, not =_start, and also -- as
> > > > > > > r9
> > > > > > > is not the right offset here -- compute r7 as the delta between the
> > > > > > > link-time =_start and the run-time relocate_code (r7 becomes useless
> > > > > > > once R10, r2 and r3 are fixed).
> > > > > > 
> > > > > > I badly need a coffee break... Make this "r7 equal to =relocate_code",
> > > > > > simply.
> > > > > 
> > > > > Or "adr r7, relocate_code" for the same result in order to avoid using
> > > > > the
> > > > > literal pool when useless.
> > > > 
> > > > Correct. That'll be in v3.
> > > 
> > > Sorry, I missed this change in V3 and left the ldr. Functionally it
> > > does not have any impact, since as already mentioned, this code always
> > > runs at the link-time location. You may want Want me to fix it anyway?
> > 
> > I have not yet looked at your V3, but yes, I'd prefer.
> 
> Alright, then, V4 it is.

Hmm -- I'd actually made the ldr to adr fix in V3, so no V4 for now, V3
is fine.

Amicalement,
diff mbox

Patch

diff --git a/arch/arm/cpu/arm1136/start.S b/arch/arm/cpu/arm1136/start.S
index 1eec2e0..edf249d 100644
--- a/arch/arm/cpu/arm1136/start.S
+++ b/arch/arm/cpu/arm1136/start.S
@@ -169,82 +169,6 @@  next:
 
 /*------------------------------------------------------------------------------*/
 
-#ifndef CONFIG_SPL_BUILD
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-
-relocate_done:
-
-	bx	lr
-
-_image_copy_end_ofs:
-	.word __image_copy_end - _start
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
-#endif
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/cpu/arm1176/start.S b/arch/arm/cpu/arm1176/start.S
index 3c2a52c..65292bc 100644
--- a/arch/arm/cpu/arm1176/start.S
+++ b/arch/arm/cpu/arm1176/start.S
@@ -221,82 +221,6 @@  skip_tcmdisable:
 
 /*------------------------------------------------------------------------------*/
 
-#ifndef CONFIG_SPL_BUILD
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-
-relocate_done:
-
-	bx	lr
-
-_image_copy_end_ofs:
-	.word __image_copy_end - _start
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
-#endif
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/cpu/arm720t/start.S b/arch/arm/cpu/arm720t/start.S
index 983f8ad..a0444de 100644
--- a/arch/arm/cpu/arm720t/start.S
+++ b/arch/arm/cpu/arm720t/start.S
@@ -151,82 +151,6 @@  reset:
 
 /*------------------------------------------------------------------------------*/
 
-#ifndef CONFIG_SPL_BUILD
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-
-relocate_done:
-
-	mov	pc, lr
-
-_image_copy_end_ofs:
-	.word __image_copy_end - _start
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
-#endif
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/cpu/arm920t/start.S b/arch/arm/cpu/arm920t/start.S
index 889329f..3232065 100644
--- a/arch/arm/cpu/arm920t/start.S
+++ b/arch/arm/cpu/arm920t/start.S
@@ -190,81 +190,6 @@  copyex:
 
 /*------------------------------------------------------------------------------*/
 
-#ifndef CONFIG_SPL_BUILD
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-relocate_done:
-
-	mov	pc, lr
-
-_image_copy_end_ofs:
-	.word __image_copy_end - _start
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
-#endif
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/cpu/arm925t/start.S b/arch/arm/cpu/arm925t/start.S
index 64e8ae5..97eb276 100644
--- a/arch/arm/cpu/arm925t/start.S
+++ b/arch/arm/cpu/arm925t/start.S
@@ -180,82 +180,6 @@  poll1:
 
 /*------------------------------------------------------------------------------*/
 
-#ifndef CONFIG_SPL_BUILD
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-
-relocate_done:
-
-	mov	pc, lr
-
-_image_copy_end_ofs:
-	.word __image_copy_end - _start
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
-#endif
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/cpu/arm926ejs/start.S b/arch/arm/cpu/arm926ejs/start.S
index d5c4ab2..5fc8e04 100644
--- a/arch/arm/cpu/arm926ejs/start.S
+++ b/arch/arm/cpu/arm926ejs/start.S
@@ -186,82 +186,6 @@  reset:
 
 /*------------------------------------------------------------------------------*/
 
-#ifndef CONFIG_SPL_BUILD
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-
-relocate_done:
-
-	bx	lr
-
-_image_copy_end_ofs:
-	.word __image_copy_end - _start
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
-#endif
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/cpu/arm946es/start.S b/arch/arm/cpu/arm946es/start.S
index 2fc730c..e9d0c34 100644
--- a/arch/arm/cpu/arm946es/start.S
+++ b/arch/arm/cpu/arm946es/start.S
@@ -155,82 +155,6 @@  reset:
 
 /*------------------------------------------------------------------------------*/
 
-#ifndef CONFIG_SPL_BUILD
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-
-relocate_done:
-
-	mov	pc, lr
-
-_image_copy_end_ofs:
-	.word __image_copy_end - _start
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
-#endif
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/cpu/arm_intcm/start.S b/arch/arm/cpu/arm_intcm/start.S
index c483b53..8dfd919 100644
--- a/arch/arm/cpu/arm_intcm/start.S
+++ b/arch/arm/cpu/arm_intcm/start.S
@@ -151,82 +151,6 @@  reset:
 
 /*------------------------------------------------------------------------------*/
 
-#ifndef CONFIG_SPL_BUILD
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-
-relocate_done:
-
-	bx	lr
-
-_image_copy_end_ofs:
-	.word __image_copy_end - _start
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
-#endif
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/cpu/armv7/start.S b/arch/arm/cpu/armv7/start.S
index 3ade510..8e9cb19 100644
--- a/arch/arm/cpu/armv7/start.S
+++ b/arch/arm/cpu/armv7/start.S
@@ -163,82 +163,6 @@  reset:
 
 /*------------------------------------------------------------------------------*/
 
-#ifndef CONFIG_SPL_BUILD
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-ENTRY(relocate_code)
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-
-relocate_done:
-
-	bx	lr
-
-_image_copy_end_ofs:
-	.word __image_copy_end - _start
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-ENDPROC(relocate_code)
-
-#endif
-
 ENTRY(c_runtime_cpu_setup)
 /*
  * If I-cache is enabled invalidate it
diff --git a/arch/arm/cpu/ixp/start.S b/arch/arm/cpu/ixp/start.S
index 8458f45..46cba0c 100644
--- a/arch/arm/cpu/ixp/start.S
+++ b/arch/arm/cpu/ixp/start.S
@@ -253,82 +253,6 @@  reset:
 
 /*------------------------------------------------------------------------------*/
 
-#ifndef CONFIG_SPL_BUILD
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-
-relocate_done:
-
-	bx	lr
-
-_image_copy_end_ofs:
-	.word __image_copy_end - _start
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
-#endif
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/cpu/pxa/start.S b/arch/arm/cpu/pxa/start.S
index 595778a..f9947de 100644
--- a/arch/arm/cpu/pxa/start.S
+++ b/arch/arm/cpu/pxa/start.S
@@ -167,93 +167,19 @@  reset:
 	bl	_main
 
 /*------------------------------------------------------------------------------*/
-#ifndef CONFIG_SPL_BUILD
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-/* Disable the Dcache RAM lock for stack now */
-#ifdef	CONFIG_CPU_PXA25X
-	mov	r12, lr
-	bl	cpu_init_crit
-	mov	lr, r12
-#endif
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-
-relocate_done:
-
-	bx	lr
-
-_image_copy_end_ofs:
-	.word __image_copy_end - _start
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
-#endif
 
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
-	bx	lr
+	/*
+	 * Unlock (actually, disable) the cache now that board_init_f
+	 * is done. We could do this earlier but we would need to add
+	 * a new C runtime hook, whereas c_runtime_cpu_setup already
+	 * exists.
+	 * As this routine is just a call to cpu_init_crit, let us
+	 * tail-optimize and do a simple branch here.
+	 */
+	b	cpu_init_crit
 
 /*
  *************************************************************************
diff --git a/arch/arm/cpu/s3c44b0/start.S b/arch/arm/cpu/s3c44b0/start.S
index 33a0a81..78183fc 100644
--- a/arch/arm/cpu/s3c44b0/start.S
+++ b/arch/arm/cpu/s3c44b0/start.S
@@ -136,82 +136,6 @@  reset:
 
 /*------------------------------------------------------------------------------*/
 
-#ifndef CONFIG_SPL_BUILD
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-
-relocate_done:
-
-	bx	lr
-
-_image_copy_end_ofs:
-	.word __image_copy_end - _start
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
-#endif
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/cpu/sa1100/start.S b/arch/arm/cpu/sa1100/start.S
index e0c45d6..30d5a90 100644
--- a/arch/arm/cpu/sa1100/start.S
+++ b/arch/arm/cpu/sa1100/start.S
@@ -140,82 +140,6 @@  reset:
 
 /*------------------------------------------------------------------------------*/
 
-#ifndef CONFIG_SPL_BUILD
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-
-relocate_done:
-
-	mov	pc, lr
-
-_image_copy_end_ofs:
-	.word __image_copy_end - _start
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
-#endif
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 6ae161a..30e3290 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -39,6 +39,7 @@  GLCOBJS	+= div0.o
 SOBJS-y += crt0.o
 
 ifndef CONFIG_SPL_BUILD
+SOBJS-y += relocate.o
 ifndef CONFIG_SYS_GENERIC_BOARD
 COBJS-y	+= board.o
 endif
diff --git a/arch/arm/lib/relocate.S b/arch/arm/lib/relocate.S
new file mode 100644
index 0000000..9e91fae
--- /dev/null
+++ b/arch/arm/lib/relocate.S
@@ -0,0 +1,111 @@ 
+/*
+ *  relocate - common relocation function for ARM U-Boot
+ *
+ *  Copyright (c) 2013  Albert ARIBAUD <albert.u.boot@aribaud.net>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <linux/linkage.h>
+
+/*
+ * These are defined in the board-specific linker script.
+ * Subtracting _start from them lets the linker put their
+ * relative position in the executable instead of leaving
+ * them null.
+ */
+
+/*
+ * void relocate_code(addr_moni)
+ *
+ * This function relocates the monitor code.
+ */
+ENTRY(relocate_code)
+	mov	r6, r0	/* save addr of destination */
+
+	ldr	r0, =_start
+	subs	r9, r6, r0		/* r9 <- relocation offset */
+	beq	relocate_done		/* skip relocation */
+	mov	r1, r6			/* r1 <- scratch for copy loop */
+	ldr	r3, _image_copy_end_ofs
+	add	r2, r0, r3		/* r2 <- source end address	    */
+
+copy_loop:
+	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
+	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
+	cmp	r0, r2			/* until source end address [r2]    */
+	blo	copy_loop
+
+	/*
+	 * fix .rel.dyn relocations
+	 */
+	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
+	add	r10, r10, r9		/* r10 <- sym table in FLASH */
+	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
+	add	r2, r2, r9		/* r2 <- rel dyn start in FLASH */
+	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
+	add	r3, r3, r9		/* r3 <- rel dyn end in FLASH */
+fixloop:
+	ldr	r0, [r2]		/* r0 <- SRC location to fix up */
+	add	r0, r0, r9		/* r0 <- DST location to fix up */
+	ldr	r1, [r2, #4]
+	and	r7, r1, #0xff
+	cmp	r7, #23			/* relative fixup? */
+	beq	fixrel
+	cmp	r7, #2			/* absolute fixup? */
+	beq	fixabs
+	/* ignore unknown type of fixup */
+	b	fixnext
+fixabs:
+	/* absolute fix: set location to (offset) symbol value */
+	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
+	add	r1, r10, r1		/* r1 <- address of symbol in table */
+	ldr	r1, [r1, #4]		/* r1 <- symbol value */
+	add	r1, r1, r9		/* r1 <- relocated sym addr */
+	b	fixnext
+fixrel:
+	/* relative fix: increase location by offset */
+	ldr	r1, [r0]
+	add	r1, r1, r9
+fixnext:
+	str	r1, [r0]
+	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
+	cmp	r2, r3
+	blo	fixloop
+
+relocate_done:
+
+	/* ARMv4- don't know bx lr but the assembler fails to see that */
+
+#ifdef __ARM_ARCH_4__
+        mov        pc, lr
+#else
+        bx        lr
+#endif
+
+_image_copy_end_ofs:
+	.word __image_copy_end - relocate_code
+_rel_dyn_start_ofs:
+	.word __rel_dyn_start - relocate_code
+_rel_dyn_end_ofs:
+	.word __rel_dyn_end - relocate_code
+_dynsym_start_ofs:
+	.word __dynsym_start - relocate_code
+
+ENDPROC(relocate_code)