Patchwork [U-Boot,4/4] arm: factorize relocate_code routine

login
register
mail settings
Submitter Albert ARIBAUD
Date May 10, 2013, 9:56 p.m.
Message ID <1368223012-17609-5-git-send-email-albert.u.boot@aribaud.net>
Download mbox | patch
Permalink /patch/243077/
State Superseded
Delegated to: Albert ARIBAUD
Headers show

Comments

Albert ARIBAUD - May 10, 2013, 9:56 p.m.
Signed-off-by: Albert ARIBAUD <albert.u.boot@aribaud.net>
---
 arch/arm/cpu/arm1136/start.S   |   82 --------------------------------
 arch/arm/cpu/arm1176/start.S   |   75 ------------------------------
 arch/arm/cpu/arm720t/start.S   |   82 --------------------------------
 arch/arm/cpu/arm920t/start.S   |   75 ------------------------------
 arch/arm/cpu/arm925t/start.S   |   75 ------------------------------
 arch/arm/cpu/arm926ejs/start.S |   82 --------------------------------
 arch/arm/cpu/arm946es/start.S  |   75 ------------------------------
 arch/arm/cpu/arm_intcm/start.S |   75 ------------------------------
 arch/arm/cpu/armv7/start.S     |   78 -------------------------------
 arch/arm/cpu/ixp/start.S       |   75 ------------------------------
 arch/arm/cpu/pxa/start.S       |   84 ---------------------------------
 arch/arm/cpu/s3c44b0/start.S   |   75 ------------------------------
 arch/arm/cpu/sa1100/start.S    |   75 ------------------------------
 arch/arm/lib/Makefile          |    2 +-
 arch/arm/lib/relocate.S        |  100 ++++++++++++++++++++++++++++++++++++++++
 15 files changed, 101 insertions(+), 1009 deletions(-)
 create mode 100644 arch/arm/lib/relocate.S
Benoît Thébaudeau - May 11, 2013, 2:04 a.m.
Hi Albert,

On Friday, May 10, 2013 11:56:52 PM, Albert ARIBAUD wrote:
> Signed-off-by: Albert ARIBAUD <albert.u.boot@aribaud.net>
> ---
>  arch/arm/cpu/arm1136/start.S   |   82 --------------------------------
>  arch/arm/cpu/arm1176/start.S   |   75 ------------------------------
>  arch/arm/cpu/arm720t/start.S   |   82 --------------------------------
>  arch/arm/cpu/arm920t/start.S   |   75 ------------------------------
>  arch/arm/cpu/arm925t/start.S   |   75 ------------------------------
>  arch/arm/cpu/arm926ejs/start.S |   82 --------------------------------
>  arch/arm/cpu/arm946es/start.S  |   75 ------------------------------
>  arch/arm/cpu/arm_intcm/start.S |   75 ------------------------------
>  arch/arm/cpu/armv7/start.S     |   78 -------------------------------
>  arch/arm/cpu/ixp/start.S       |   75 ------------------------------
>  arch/arm/cpu/pxa/start.S       |   84 ---------------------------------
>  arch/arm/cpu/s3c44b0/start.S   |   75 ------------------------------
>  arch/arm/cpu/sa1100/start.S    |   75 ------------------------------
>  arch/arm/lib/Makefile          |    2 +-
>  arch/arm/lib/relocate.S        |  100
>  ++++++++++++++++++++++++++++++++++++++++
>  15 files changed, 101 insertions(+), 1009 deletions(-)
>  create mode 100644 arch/arm/lib/relocate.S
> 

[...]

> diff --git a/arch/arm/cpu/pxa/start.S b/arch/arm/cpu/pxa/start.S
> index ada91a6..3078bec 100644
> --- a/arch/arm/cpu/pxa/start.S
> +++ b/arch/arm/cpu/pxa/start.S
> @@ -170,90 +170,6 @@ reset:
>  
>  	bl	_main
>  
> -/*------------------------------------------------------------------------------*/
> -#ifndef CONFIG_SPL_BUILD
> -/*
> - * void relocate_code(addr_moni)
> - *
> - * This function relocates the monitor code.
> - */
> -	.globl	relocate_code
> -relocate_code:
> -	mov	r6, r0	/* save addr of destination */
> -
> -/* Disable the Dcache RAM lock for stack now */
> -#ifdef	CONFIG_CPU_PXA25X
> -	mov	r12, lr
> -	bl	cpu_init_crit
> -	mov	lr, r12
> -#endif

What about this thing that you silently drop?

> -
> -	adr	r0, _start
> -	subs	r9, r6, r0		/* r9 <- relocation offset */
> -	beq	relocate_done		/* skip relocation */
> -	mov	r1, r6			/* r1 <- scratch for copy_loop */
> -	ldr	r3, _image_copy_end_ofs
> -	add	r2, r0, r3		/* r2 <- source end address	    */
> -
> -copy_loop:
> -	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
> -	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
> -	cmp	r0, r2			/* until source end address [r2]    */
> -	blo	copy_loop
> -
> -#ifndef CONFIG_SPL_BUILD
> -	/*
> -	 * fix .rel.dyn relocations
> -	 */
> -	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
> -	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
> -	add	r10, r10, r0		/* r10 <- sym table in FLASH */
> -	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
> -	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
> -	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
> -	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
> -fixloop:
> -	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
> -	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
> -	ldr	r1, [r2, #4]
> -	and	r7, r1, #0xff
> -	cmp	r7, #23			/* relative fixup? */
> -	beq	fixrel
> -	cmp	r7, #2			/* absolute fixup? */
> -	beq	fixabs
> -	/* ignore unknown type of fixup */
> -	b	fixnext
> -fixabs:
> -	/* absolute fix: set location to (offset) symbol value */
> -	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
> -	add	r1, r10, r1		/* r1 <- address of symbol in table */
> -	ldr	r1, [r1, #4]		/* r1 <- symbol value */
> -	add	r1, r1, r9		/* r1 <- relocated sym addr */
> -	b	fixnext
> -fixrel:
> -	/* relative fix: increase location by offset */
> -	ldr	r1, [r0]
> -	add	r1, r1, r9
> -fixnext:
> -	str	r1, [r0]
> -	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
> -	cmp	r2, r3
> -	blo	fixloop
> -#endif
> -
> -relocate_done:
> -
> -	bx	lr
> -
> -_rel_dyn_start_ofs:
> -	.word __rel_dyn_start - _start
> -_rel_dyn_end_ofs:
> -	.word __rel_dyn_end - _start
> -_dynsym_start_ofs:
> -	.word __dynsym_start - _start
> -
> -#endif
> -
>  	.globl	c_runtime_cpu_setup
>  c_runtime_cpu_setup:
>  

[...]

> diff --git a/arch/arm/lib/relocate.S b/arch/arm/lib/relocate.S
> new file mode 100644
> index 0000000..ce533ca
> --- /dev/null
> +++ b/arch/arm/lib/relocate.S
> @@ -0,0 +1,100 @@
> +/*
> + *  relocate - common relocation function for ARM U-Boot
> + *
> + *  Copyright (c) 2013  Albert ARIBAUD <albert.u.boot@aribaud.net>
> + *
> + * See file CREDITS for list of people who contributed to this
> + * project.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +
> +/*
> + * These are defined in the board-specific linker script.
> + * Subtracting _start from them lets the linker put their
> + * relative position in the executable instead of leaving
> + * them null.
> + */
> +
> +	.globl	__image_copy_start
> +	.globl	__image_copy_end
> +	.globl	__rel_dyn_start
> +	.globl	__rel_dyn_end
> +	.globl	__dynsym_start

.globl is for exporting symbols to the linker, not importing, so the lines above
should be removed.

> +
> +/*------------------------------------------------------------------------------*/
> +
> +/*
> + * void relocate_code(addr_moni)
> + *
> + * This function relocates the monitor code.
> + */
> +	.globl	relocate_code
> +relocate_code:

Or "ENTRY(relocate_code)" instead of the two lines above.

> +	mov	r6, r0	/* save addr of destination */
> +
> +	ldr	r0, =_start

This is wrong. Here, r0 will get _start link-time address, while with 'adr' in
the original code, r0 was getting _start runtime address, which is not the same
for all boards. relocate_code() is and must stay position-independent code, at
least for SPL.

E.g., for mx31pdk or tx25, the SPL is first loaded by the boot ROM in the NAND
Flash controller buffer, then executed from there. The SPL then has to use
relocate_code() to relocate itself to CONFIG_SPL_TEXT_BASE in order to free the
NFC buffer to load U-Boot from NAND Flash. This means that 'ldr r0, =_start'
would set r0 to CONFIG_SPL_TEXT_BASE, while 'adr r0, _start' sets r0 to the
address of the NFC buffer. Since the SPL calls relocate_code() with
CONFIG_SPL_TEXT_BASE, the 'ldr' choice would just result in a branch to
relocate_done below, which would abort the relocation and break the boot on
those boards.

The issue is that 'adr' requires that _start be defined in the same file and
section. That's why my patch 31/31 was using a macro to define relocate_code()
in the start.S files. Perhaps some other constructs like
'sub r0, pc, . - _start' would work, but I doubt it if _start is not in the same
file and section since this is exactly how 'adr' is translated.

> +	subs	r9, r6, r0		/* r9 <- relocation offset */
> +	beq	relocate_done		/* skip relocation */
> +	mov	r1, r6			/* r1 <- scratch for copy_loop */
> +	ldr	r2, =__image_copy_end
> +
> +copy_loop:
> +	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
> +	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
> +	cmp	r0, r2			/* until source end address [r2]    */
> +	blo	copy_loop
> +
> +#ifndef CONFIG_SPL_BUILD
> +	/*
> +	 * fix .rel.dyn relocations
> +	 */
> +	ldr	r0, =_TEXT_BASE		/* r0 <- Text base */

The value you load above into r0 then gets overwritten prior to having been
used, so you can drop this line, all the more it is wrong, loading the address
of _TEXT_BASE instead of its value.

> +	ldr	r10, =__dynsym_start	/* r10 <- sym table ofs */
> +	ldr	r2, =__rel_dyn_start	/* r2 <- rel dyn start ofs */
> +	ldr	r3, =__rel_dyn_end	/* r3 <- rel dyn end ofs */

'ofs' should be replaced with 'in FLASH' in the comments above.

Contrary to above, this construct works here because this can't be SPL code.

> +fixloop:
> +	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
> +	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
> +	ldr	r1, [r2, #4]
> +	and	r7, r1, #0xff
> +	cmp	r7, #23			/* relative fixup? */
> +	beq	fixrel
> +	cmp	r7, #2			/* absolute fixup? */
> +	beq	fixabs
> +	/* ignore unknown type of fixup */
> +	b	fixnext
> +fixabs:
> +	/* absolute fix: set location to (offset) symbol value */
> +	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
> +	add	r1, r10, r1		/* r1 <- address of symbol in table */
> +	ldr	r1, [r1, #4]		/* r1 <- symbol value */
> +	add	r1, r1, r9		/* r1 <- relocated sym addr */
> +	b	fixnext
> +fixrel:
> +	/* relative fix: increase location by offset */
> +	ldr	r1, [r0]
> +	add	r1, r1, r9
> +fixnext:
> +	str	r1, [r0]
> +	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
> +	cmp	r2, r3
> +	blo	fixloop
> +#endif
> +
> +relocate_done:
> +
> +	bx	lr

This instruction is not supported on ARMv4, even if GCC does not complain about
it (it looks like this is not even fixed by the linker, except if the
instruction was issued by GCC itself), but it is required for more recent ARM
versions for Thumb inter-working, which is enabled by some boards in U-Boot.
Hence, shouldn't the line above be replaced with:

#ifdef __ARM_ARCH_4__
        mov        pc, lr
#else
        bx        lr
#endif

Or using CONFIG_SYS_THUMB_BUILD?

"ENDPROC(relocate_code)" should be added here if you go for ENTRY() at the
beginning.

Best regards,
Benoît
Albert ARIBAUD - May 11, 2013, 7:40 a.m.
Hi Benoît,

On Sat, 11 May 2013 04:04:09 +0200 (CEST), Benoît Thébaudeau
<benoit.thebaudeau@advansee.com> wrote:

> Hi Albert,
> 
> On Friday, May 10, 2013 11:56:52 PM, Albert ARIBAUD wrote:
> > Signed-off-by: Albert ARIBAUD <albert.u.boot@aribaud.net>

> > diff --git a/arch/arm/cpu/pxa/start.S b/arch/arm/cpu/pxa/start.S
> > index ada91a6..3078bec 100644
> > --- a/arch/arm/cpu/pxa/start.S
> > +++ b/arch/arm/cpu/pxa/start.S
> > @@ -170,90 +170,6 @@ reset:
> >  
> >  	bl	_main
> >  
> > -/*------------------------------------------------------------------------------*/
> > -#ifndef CONFIG_SPL_BUILD
> > -/*
> > - * void relocate_code(addr_moni)
> > - *
> > - * This function relocates the monitor code.
> > - */
> > -	.globl	relocate_code
> > -relocate_code:
> > -	mov	r6, r0	/* save addr of destination */
> > -
> > -/* Disable the Dcache RAM lock for stack now */
> > -#ifdef	CONFIG_CPU_PXA25X
> > -	mov	r12, lr
> > -	bl	cpu_init_crit
> > -	mov	lr, r12
> > -#endif  
> 
> What about this thing that you silently drop?

Overlook on my side, and PXA is not one of my test vehicles so I
missed it while testing. Will 'undrop' in V2.

> > diff --git a/arch/arm/lib/relocate.S b/arch/arm/lib/relocate.S
> > new file mode 100644
> > index 0000000..ce533ca
> > --- /dev/null
> > +++ b/arch/arm/lib/relocate.S
> > @@ -0,0 +1,100 @@
> > +/*
> > + *  relocate - common relocation function for ARM U-Boot
> > + *
> > + *  Copyright (c) 2013  Albert ARIBAUD <albert.u.boot@aribaud.net>
> > + *
> > + * See file CREDITS for list of people who contributed to this
> > + * project.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License as
> > + * published by the Free Software Foundation; either version 2 of
> > + * the License, or (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> > + * MA 02111-1307 USA
> > + */
> > +
> > +/*
> > + * These are defined in the board-specific linker script.
> > + * Subtracting _start from them lets the linker put their
> > + * relative position in the executable instead of leaving
> > + * them null.
> > + */
> > +
> > +	.globl	__image_copy_start
> > +	.globl	__image_copy_end
> > +	.globl	__rel_dyn_start
> > +	.globl	__rel_dyn_end
> > +	.globl	__dynsym_start
> 
> .globl is for exporting symbols to the linker, not importing, so the lines above
> should be removed.

correct. Will drop in V2.

> > +
> > +/*------------------------------------------------------------------------------*/
> > +
> > +/*
> > + * void relocate_code(addr_moni)
> > + *
> > + * This function relocates the monitor code.
> > + */
> > +	.globl	relocate_code
> > +relocate_code:
> 
> Or "ENTRY(relocate_code)" instead of the two lines above.

I'd gone for just moving code around rather than cosmetic, but I can do
the cosmetic change at the end of the patch too.

> > +	mov	r6, r0	/* save addr of destination */
> > +
> > +	ldr	r0, =_start
> 
> This is wrong. Here, r0 will get _start link-time address, while with 'adr' in
> the original code, r0 was getting _start runtime address, which is not the same
> for all boards. relocate_code() is and must stay position-independent code, at
> least for SPL.
> 
> E.g., for mx31pdk or tx25, the SPL is first loaded by the boot ROM
in the NAND
> Flash controller buffer, then executed from there. The SPL then has to use
> relocate_code() to relocate itself to CONFIG_SPL_TEXT_BASE in order to free the
> NFC buffer to load U-Boot from NAND Flash. This means that 'ldr r0, =_start'
> would set r0 to CONFIG_SPL_TEXT_BASE, while 'adr r0, _start' sets r0 to the
> address of the NFC buffer. Since the SPL calls relocate_code() with
> CONFIG_SPL_TEXT_BASE, the 'ldr' choice would just result in a branch to
> relocate_done below, which would abort the relocation and break the boot on
> those boards.
> 
> The issue is that 'adr' requires that _start be defined in the same file and
> section. That's why my patch 31/31 was using a macro to define relocate_code()
> in the start.S files. Perhaps some other constructs like
> 'sub r0, pc, . - _start' would work, but I doubt it if _start is not in the same
> file and section since this is exactly how 'adr' is translated.

Alright, so -- formally, there is no assumption that code running before
relocation must be position-independent, as far as I remember. The
assumption is that it is designed to run at the link-time location, and
that it is responsible for making sure that the rest of the code is
properly relocated and thus will run at whatever location it was moved
to.

In the case you mention, SPL does not relocate itself: it only *moves*
itself, skipping the actual relocation (reference fixing part) -- it
could havdoe so with a short ad hoc loop instead of relocate_code, and
thus not impose a specific requirement on relocate_code that was not
initially there. I'd be happier with a preprocessor conditional at the
crt0.S stage to choose between relocate_code and a new, short,
move_code routine depending on whether we are building SPL or not.

That being said, I am fine with trying to make the code before
relocation as position-independent as possible anyway; let me see how
this can be done.

> > +	subs	r9, r6, r0		/* r9 <- relocation offset */
> > +	beq	relocate_done		/* skip relocation */
> > +	mov	r1, r6			/* r1 <- scratch for copy_loop */
> > +	ldr	r2, =__image_copy_end
> > +
> > +copy_loop:
> > +	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
> > +	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
> > +	cmp	r0, r2			/* until source end address [r2]    */
> > +	blo	copy_loop
> > +
> > +#ifndef CONFIG_SPL_BUILD
> > +	/*
> > +	 * fix .rel.dyn relocations
> > +	 */
> > +	ldr	r0, =_TEXT_BASE		/* r0 <- Text base */
> 
> The value you load above into r0 then gets overwritten prior to having been
> used, so you can drop this line, all the more it is wrong, loading the address
> of _TEXT_BASE instead of its value.

Correct. Will fix in V2.

> > +	ldr	r10, =__dynsym_start	/* r10 <- sym table ofs */
> > +	ldr	r2, =__rel_dyn_start	/* r2 <- rel dyn start ofs */
> > +	ldr	r3, =__rel_dyn_end	/* r3 <- rel dyn end ofs */
> 
> 'ofs' should be replaced with 'in FLASH' in the comments above.
> 
> Contrary to above, this construct works here because this can't be SPL code.

Actually, this would not work either if used by non-SPL code as SPL code
expects to use it, e.g. by loading U-Boot at another address -- the
issue is in the use scenario, not in the requirements for SPL vs.
U-Boot.

> > +relocate_done:
> > +
> > +	bx	lr
> 
> This instruction is not supported on ARMv4, even if GCC does not complain about
> it (it looks like this is not even fixed by the linker, except if the
> instruction was issued by GCC itself), but it is required for more recent ARM
> versions for Thumb inter-working, which is enabled by some boards in U-Boot.
> Hence, shouldn't the line above be replaced with:
> 
> #ifdef __ARM_ARCH_4__
>         mov        pc, lr
> #else
>         bx        lr
> #endif
> 
> Or using CONFIG_SYS_THUMB_BUILD?

I would have preferred adding --fix-v4bx to the linker flags for ARMv4
targets, but it seems to have no effect at least with some toolchains.
I'll fix the code above with the ARMv4 conditional, as it is always
correct regardless of U-boot configuration options semantics.

> "ENDPROC(relocate_code)" should be added here if you go for ENTRY() at the
> beginning.

Ok.

> Best regards,
> Benoît

Thanks for your review!

Amicalement,
Benoît Thébaudeau - May 11, 2013, 1:40 p.m.
Hi Albert,

On Saturday, May 11, 2013 9:40:02 AM, Albert ARIBAUD wrote:
> Hi Benoît,
> 
> On Sat, 11 May 2013 04:04:09 +0200 (CEST), Benoît Thébaudeau
> <benoit.thebaudeau@advansee.com> wrote:
> 
> > Hi Albert,
> > 
> > On Friday, May 10, 2013 11:56:52 PM, Albert ARIBAUD wrote:
> > > Signed-off-by: Albert ARIBAUD <albert.u.boot@aribaud.net>

[...]

> > > +	mov	r6, r0	/* save addr of destination */
> > > +
> > > +	ldr	r0, =_start
> > 
> > This is wrong. Here, r0 will get _start link-time address, while with 'adr'
> > in
> > the original code, r0 was getting _start runtime address, which is not the
> > same
> > for all boards. relocate_code() is and must stay position-independent code,
> > at
> > least for SPL.
> > 
> > E.g., for mx31pdk or tx25, the SPL is first loaded by the boot ROM
> in the NAND
> > Flash controller buffer, then executed from there. The SPL then has to use
> > relocate_code() to relocate itself to CONFIG_SPL_TEXT_BASE in order to free
> > the
> > NFC buffer to load U-Boot from NAND Flash. This means that 'ldr r0,
> > =_start'
> > would set r0 to CONFIG_SPL_TEXT_BASE, while 'adr r0, _start' sets r0 to the
> > address of the NFC buffer. Since the SPL calls relocate_code() with
> > CONFIG_SPL_TEXT_BASE, the 'ldr' choice would just result in a branch to
> > relocate_done below, which would abort the relocation and break the boot on
> > those boards.
> > 
> > The issue is that 'adr' requires that _start be defined in the same file
> > and
> > section. That's why my patch 31/31 was using a macro to define
> > relocate_code()
> > in the start.S files. Perhaps some other constructs like
> > 'sub r0, pc, . - _start' would work, but I doubt it if _start is not in the
> > same
> > file and section since this is exactly how 'adr' is translated.
> 
> Alright, so -- formally, there is no assumption that code running before
> relocation must be position-independent, as far as I remember. The
> assumption is that it is designed to run at the link-time location, and
> that it is responsible for making sure that the rest of the code is
> properly relocated and thus will run at whatever location it was moved
> to.
> 
> In the case you mention, SPL does not relocate itself: it only *moves*
> itself, skipping the actual relocation (reference fixing part) -- it
> could havdoe so with a short ad hoc loop instead of relocate_code, and
> thus not impose a specific requirement on relocate_code that was not
> initially there. I'd be happier with a preprocessor conditional at the
> crt0.S stage to choose between relocate_code and a new, short,
> move_code routine depending on whether we are building SPL or not.
> 
> That being said, I am fine with trying to make the code before
> relocation as position-independent as possible anyway; let me see how
> this can be done.

OK, I let you try something. Otherwise, I agree with what you say above. The
original code for the non-SPL part below is not position-independent, so it is
safe to assume that only the beginning of relocate_code() (before
'#ifndef CONFIG_SPL_BUILD') should be position-independent, and only for SPL. It
is also probably true that the initial runtime address of all SPLs is known at
build time, so a new CONFIG_SPL_ could be created for that purpose and used in a
dedicated small SPL copy routine, which would also make the need for SPL move
testable at build time, and solve the 'adr' vs. 'ldr' issue.

> > > +	subs	r9, r6, r0		/* r9 <- relocation offset */
> > > +	beq	relocate_done		/* skip relocation */
> > > +	mov	r1, r6			/* r1 <- scratch for copy_loop */
> > > +	ldr	r2, =__image_copy_end
> > > +
> > > +copy_loop:
> > > +	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
> > > +	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
> > > +	cmp	r0, r2			/* until source end address [r2]    */
> > > +	blo	copy_loop
> > > +
> > > +#ifndef CONFIG_SPL_BUILD
> > > +	/*
> > > +	 * fix .rel.dyn relocations
> > > +	 */
> > > +	ldr	r0, =_TEXT_BASE		/* r0 <- Text base */
> > 
> > The value you load above into r0 then gets overwritten prior to having been
> > used, so you can drop this line, all the more it is wrong, loading the
> > address
> > of _TEXT_BASE instead of its value.
> 
> Correct. Will fix in V2.
> 
> > > +	ldr	r10, =__dynsym_start	/* r10 <- sym table ofs */
> > > +	ldr	r2, =__rel_dyn_start	/* r2 <- rel dyn start ofs */
> > > +	ldr	r3, =__rel_dyn_end	/* r3 <- rel dyn end ofs */
> > 
> > 'ofs' should be replaced with 'in FLASH' in the comments above.
> > 
> > Contrary to above, this construct works here because this can't be SPL
> > code.
> 
> Actually, this would not work either if used by non-SPL code as SPL code
> expects to use it, e.g. by loading U-Boot at another address -- the
> issue is in the use scenario, not in the requirements for SPL vs.
> U-Boot.

Right, but this new code behaves exactly in the same way as the original code,
so it is safe to assume that there is no such use scenario. This new code just
can't introduce a regression.

> > > +relocate_done:
> > > +
> > > +	bx	lr
> > 
> > This instruction is not supported on ARMv4, even if GCC does not complain
> > about
> > it (it looks like this is not even fixed by the linker, except if the
> > instruction was issued by GCC itself), but it is required for more recent
> > ARM
> > versions for Thumb inter-working, which is enabled by some boards in
> > U-Boot.
> > Hence, shouldn't the line above be replaced with:
> > 
> > #ifdef __ARM_ARCH_4__
> >         mov        pc, lr
> > #else
> >         bx        lr
> > #endif
> > 
> > Or using CONFIG_SYS_THUMB_BUILD?
> 
> I would have preferred adding --fix-v4bx to the linker flags for ARMv4
> targets, but it seems to have no effect at least with some toolchains.

Indeed.

> I'll fix the code above with the ARMv4 conditional, as it is always
> correct regardless of U-boot configuration options semantics.
> 
> > "ENDPROC(relocate_code)" should be added here if you go for ENTRY() at the
> > beginning.
> 
> Ok.
> 
> > Best regards,
> > Benoît
> 
> Thanks for your review!

You're welcome.

Best regards,
Benoît
Simon Glass - May 11, 2013, 10:13 p.m.
On Fri, May 10, 2013 at 3:56 PM, Albert ARIBAUD
<albert.u.boot@aribaud.net> wrote:
>
> Signed-off-by: Albert ARIBAUD <albert.u.boot@aribaud.net>
> ---
>  arch/arm/cpu/arm1136/start.S   |   82 --------------------------------
>  arch/arm/cpu/arm1176/start.S   |   75 ------------------------------
>  arch/arm/cpu/arm720t/start.S   |   82 --------------------------------
>  arch/arm/cpu/arm920t/start.S   |   75 ------------------------------
>  arch/arm/cpu/arm925t/start.S   |   75 ------------------------------
>  arch/arm/cpu/arm926ejs/start.S |   82 --------------------------------
>  arch/arm/cpu/arm946es/start.S  |   75 ------------------------------
>  arch/arm/cpu/arm_intcm/start.S |   75 ------------------------------
>  arch/arm/cpu/armv7/start.S     |   78 -------------------------------
>  arch/arm/cpu/ixp/start.S       |   75 ------------------------------
>  arch/arm/cpu/pxa/start.S       |   84 ---------------------------------
>  arch/arm/cpu/s3c44b0/start.S   |   75 ------------------------------
>  arch/arm/cpu/sa1100/start.S    |   75 ------------------------------
>  arch/arm/lib/Makefile          |    2 +-
>  arch/arm/lib/relocate.S        |  100 ++++++++++++++++++++++++++++++++++++++++
>  15 files changed, 101 insertions(+), 1009 deletions(-)
>  create mode 100644 arch/arm/lib/relocate.S
>

I tested this series on snow (Exynos 5250, ARMv7, with SPL) with no
apparent regressions.

Tested-by: Simon Glass <sjg@chromium.org>

Regards,
Simon

Patch

diff --git a/arch/arm/cpu/arm1136/start.S b/arch/arm/cpu/arm1136/start.S
index ab8fd56..4b3f828 100644
--- a/arch/arm/cpu/arm1136/start.S
+++ b/arch/arm/cpu/arm1136/start.S
@@ -167,88 +167,6 @@  next:
 
 	bl	_main
 
-/*------------------------------------------------------------------------------*/
-
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-#ifndef CONFIG_SPL_BUILD
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-#endif
-
-relocate_done:
-
-	bx	lr
-
-_image_copy_end_ofs:
-	.word __image_copy_end - _start
-
-#ifndef CONFIG_SPL_BUILD
-
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
-#endif
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/cpu/arm1176/start.S b/arch/arm/cpu/arm1176/start.S
index f20da8e..6929b08 100644
--- a/arch/arm/cpu/arm1176/start.S
+++ b/arch/arm/cpu/arm1176/start.S
@@ -223,81 +223,6 @@  skip_tcmdisable:
 
 	bl	_main
 
-/*------------------------------------------------------------------------------*/
-
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-#ifndef CONFIG_SPL_BUILD
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-#endif
-
-relocate_done:
-
-	bx	lr
-
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/cpu/arm720t/start.S b/arch/arm/cpu/arm720t/start.S
index b85509c..afb40eb 100644
--- a/arch/arm/cpu/arm720t/start.S
+++ b/arch/arm/cpu/arm720t/start.S
@@ -149,88 +149,6 @@  reset:
 
 	bl	_main
 
-/*------------------------------------------------------------------------------*/
-
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-#ifndef CONFIG_SPL_BUILD
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-#endif
-
-relocate_done:
-
-	mov	pc, lr
-
-_image_copy_end_ofs:
-	.word __image_copy_end - _start
-
-#ifndef CONFIG_SPL_BUILD
-
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
-#endif
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/cpu/arm920t/start.S b/arch/arm/cpu/arm920t/start.S
index 6250025..dbfca78 100644
--- a/arch/arm/cpu/arm920t/start.S
+++ b/arch/arm/cpu/arm920t/start.S
@@ -192,81 +192,6 @@  copyex:
 
 	bl	_main
 
-/*------------------------------------------------------------------------------*/
-
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-#ifndef CONFIG_SPL_BUILD
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-#endif
-
-relocate_done:
-
-	mov	pc, lr
-
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/cpu/arm925t/start.S b/arch/arm/cpu/arm925t/start.S
index 021e241..372b9e9 100644
--- a/arch/arm/cpu/arm925t/start.S
+++ b/arch/arm/cpu/arm925t/start.S
@@ -182,81 +182,6 @@  poll1:
 
 	bl	_main
 
-/*------------------------------------------------------------------------------*/
-
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-#ifndef CONFIG_SPL_BUILD
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-#endif
-
-relocate_done:
-
-	mov	pc, lr
-
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/cpu/arm926ejs/start.S b/arch/arm/cpu/arm926ejs/start.S
index 736361a..2ee591b 100644
--- a/arch/arm/cpu/arm926ejs/start.S
+++ b/arch/arm/cpu/arm926ejs/start.S
@@ -184,88 +184,6 @@  reset:
 
 	bl	_main
 
-/*------------------------------------------------------------------------------*/
-
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-#ifndef CONFIG_SPL_BUILD
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-#endif
-
-relocate_done:
-
-	bx	lr
-
-_image_copy_end_ofs:
-	.word __image_copy_end - _start
-
-#ifndef CONFIG_SPL_BUILD
-
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
-#endif
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/cpu/arm946es/start.S b/arch/arm/cpu/arm946es/start.S
index 9c2b70d..e31af6f 100644
--- a/arch/arm/cpu/arm946es/start.S
+++ b/arch/arm/cpu/arm946es/start.S
@@ -157,81 +157,6 @@  reset:
 
 	bl	_main
 
-/*------------------------------------------------------------------------------*/
-
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-#ifndef CONFIG_SPL_BUILD
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-#endif
-
-relocate_done:
-
-	mov	pc, lr
-
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/cpu/arm_intcm/start.S b/arch/arm/cpu/arm_intcm/start.S
index 5e8c528..9cce12f 100644
--- a/arch/arm/cpu/arm_intcm/start.S
+++ b/arch/arm/cpu/arm_intcm/start.S
@@ -153,81 +153,6 @@  reset:
 
 	bl	_main
 
-/*------------------------------------------------------------------------------*/
-
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-#ifndef CONFIG_SPL_BUILD
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-#endif
-
-relocate_done:
-
-	bx	lr
-
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/cpu/armv7/start.S b/arch/arm/cpu/armv7/start.S
index 3ade510..bfdc22b 100644
--- a/arch/arm/cpu/armv7/start.S
+++ b/arch/arm/cpu/armv7/start.S
@@ -161,84 +161,6 @@  reset:
 
 	bl	_main
 
-/*------------------------------------------------------------------------------*/
-
-#ifndef CONFIG_SPL_BUILD
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-ENTRY(relocate_code)
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-
-relocate_done:
-
-	bx	lr
-
-_image_copy_end_ofs:
-	.word __image_copy_end - _start
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-ENDPROC(relocate_code)
-
-#endif
-
 ENTRY(c_runtime_cpu_setup)
 /*
  * If I-cache is enabled invalidate it
diff --git a/arch/arm/cpu/ixp/start.S b/arch/arm/cpu/ixp/start.S
index 69ef8aa..f9594d0 100644
--- a/arch/arm/cpu/ixp/start.S
+++ b/arch/arm/cpu/ixp/start.S
@@ -255,81 +255,6 @@  reset:
 
 	bl	_main
 
-/*------------------------------------------------------------------------------*/
-
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-#ifndef CONFIG_SPL_BUILD
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-#endif
-
-relocate_done:
-
-	bx	lr
-
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/cpu/pxa/start.S b/arch/arm/cpu/pxa/start.S
index ada91a6..3078bec 100644
--- a/arch/arm/cpu/pxa/start.S
+++ b/arch/arm/cpu/pxa/start.S
@@ -170,90 +170,6 @@  reset:
 
 	bl	_main
 
-/*------------------------------------------------------------------------------*/
-#ifndef CONFIG_SPL_BUILD
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-/* Disable the Dcache RAM lock for stack now */
-#ifdef	CONFIG_CPU_PXA25X
-	mov	r12, lr
-	bl	cpu_init_crit
-	mov	lr, r12
-#endif
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-#ifndef CONFIG_SPL_BUILD
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-#endif
-
-relocate_done:
-
-	bx	lr
-
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
-#endif
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/cpu/s3c44b0/start.S b/arch/arm/cpu/s3c44b0/start.S
index 7361aa2..4e1a444 100644
--- a/arch/arm/cpu/s3c44b0/start.S
+++ b/arch/arm/cpu/s3c44b0/start.S
@@ -138,81 +138,6 @@  reset:
 
 	bl	_main
 
-/*------------------------------------------------------------------------------*/
-
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-#ifndef CONFIG_SPL_BUILD
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-#endif
-
-relocate_done:
-
-	bx	lr
-
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/cpu/sa1100/start.S b/arch/arm/cpu/sa1100/start.S
index 8a2eafd..c79f8c9 100644
--- a/arch/arm/cpu/sa1100/start.S
+++ b/arch/arm/cpu/sa1100/start.S
@@ -142,81 +142,6 @@  reset:
 
 	bl	_main
 
-/*------------------------------------------------------------------------------*/
-
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
-
-#ifndef CONFIG_SPL_BUILD
-	/*
-	 * fix .rel.dyn relocations
-	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-#endif
-
-relocate_done:
-
-	mov	pc, lr
-
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
 	.globl	c_runtime_cpu_setup
 c_runtime_cpu_setup:
 
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index ac51cc1..f8349c5 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -36,7 +36,7 @@  GLSOBJS	+= _umodsi3.o
 
 GLCOBJS	+= div0.o
 
-SOBJS-y += crt0.o
+SOBJS-y += crt0.o relocate.o
 
 ifndef CONFIG_SPL_BUILD
 ifndef CONFIG_SYS_GENERIC_BOARD
diff --git a/arch/arm/lib/relocate.S b/arch/arm/lib/relocate.S
new file mode 100644
index 0000000..ce533ca
--- /dev/null
+++ b/arch/arm/lib/relocate.S
@@ -0,0 +1,100 @@ 
+/*
+ *  relocate - common relocation function for ARM U-Boot
+ *
+ *  Copyright (c) 2013  Albert ARIBAUD <albert.u.boot@aribaud.net>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+/*
+ * These are defined in the board-specific linker script.
+ * Subtracting _start from them lets the linker put their
+ * relative position in the executable instead of leaving
+ * them null.
+ */
+
+	.globl	__image_copy_start
+	.globl	__image_copy_end
+	.globl	__rel_dyn_start
+	.globl	__rel_dyn_end
+	.globl	__dynsym_start
+
+/*------------------------------------------------------------------------------*/
+
+/*
+ * void relocate_code(addr_moni)
+ *
+ * This function relocates the monitor code.
+ */
+	.globl	relocate_code
+relocate_code:
+	mov	r6, r0	/* save addr of destination */
+
+	ldr	r0, =_start
+	subs	r9, r6, r0		/* r9 <- relocation offset */
+	beq	relocate_done		/* skip relocation */
+	mov	r1, r6			/* r1 <- scratch for copy_loop */
+	ldr	r2, =__image_copy_end
+
+copy_loop:
+	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
+	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
+	cmp	r0, r2			/* until source end address [r2]    */
+	blo	copy_loop
+
+#ifndef CONFIG_SPL_BUILD
+	/*
+	 * fix .rel.dyn relocations
+	 */
+	ldr	r0, =_TEXT_BASE		/* r0 <- Text base */
+	ldr	r10, =__dynsym_start	/* r10 <- sym table ofs */
+	ldr	r2, =__rel_dyn_start	/* r2 <- rel dyn start ofs */
+	ldr	r3, =__rel_dyn_end	/* r3 <- rel dyn end ofs */
+fixloop:
+	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
+	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
+	ldr	r1, [r2, #4]
+	and	r7, r1, #0xff
+	cmp	r7, #23			/* relative fixup? */
+	beq	fixrel
+	cmp	r7, #2			/* absolute fixup? */
+	beq	fixabs
+	/* ignore unknown type of fixup */
+	b	fixnext
+fixabs:
+	/* absolute fix: set location to (offset) symbol value */
+	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
+	add	r1, r10, r1		/* r1 <- address of symbol in table */
+	ldr	r1, [r1, #4]		/* r1 <- symbol value */
+	add	r1, r1, r9		/* r1 <- relocated sym addr */
+	b	fixnext
+fixrel:
+	/* relative fix: increase location by offset */
+	ldr	r1, [r0]
+	add	r1, r1, r9
+fixnext:
+	str	r1, [r0]
+	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
+	cmp	r2, r3
+	blo	fixloop
+#endif
+
+relocate_done:
+
+	bx	lr