Patchwork [U-Boot,v2,8/9] arm: use thumb interworking returns in libgcc

login
register
mail settings
Submitter Allen Martin
Date Aug. 1, 2012, 8:32 p.m.
Message ID <1343853146-15498-9-git-send-email-amartin@nvidia.com>
Download mbox | patch
Permalink /patch/174595/
State Superseded
Headers show

Comments

Allen Martin - Aug. 1, 2012, 8:32 p.m.
If CONFIG_SYS_THUMB_BUILD is enabled, use thumb interworking return
instructions from libgcc routines, otherwise use ARM returns.

Signed-off-by: Allen Martin <amartin@nvidia.com>
---
 arch/arm/include/asm/assembler.h |   10 ++++++++++
 arch/arm/lib/_ashldi3.S          |    3 ++-
 arch/arm/lib/_ashrdi3.S          |    3 ++-
 arch/arm/lib/_divsi3.S           |   15 +++++++++++----
 arch/arm/lib/_lshrdi3.S          |    3 ++-
 arch/arm/lib/_modsi3.S           |    9 ++++++++-
 arch/arm/lib/_udivsi3.S          |    8 +++++---
 arch/arm/lib/_umodsi3.S          |   18 +++++++++++++++++-
 arch/arm/lib/memcpy.S            |   10 ++++++++++
 arch/arm/lib/memset.S            |    3 ++-
 10 files changed, 69 insertions(+), 13 deletions(-)
Aneesh V - Aug. 1, 2012, 9:11 p.m.
Hi Allen,

On Wed, Aug 1, 2012 at 1:32 PM, Allen Martin <amartin@nvidia.com> wrote:

> If CONFIG_SYS_THUMB_BUILD is enabled, use thumb interworking return
> instructions from libgcc routines, otherwise use ARM returns.
>
> Signed-off-by: Allen Martin <amartin@nvidia.com>
> ---
>  arch/arm/include/asm/assembler.h |   10 ++++++++++
>  arch/arm/lib/_ashldi3.S          |    3 ++-
>  arch/arm/lib/_ashrdi3.S          |    3 ++-
>  arch/arm/lib/_divsi3.S           |   15 +++++++++++----
>  arch/arm/lib/_lshrdi3.S          |    3 ++-
>  arch/arm/lib/_modsi3.S           |    9 ++++++++-
>  arch/arm/lib/_udivsi3.S          |    8 +++++---
>  arch/arm/lib/_umodsi3.S          |   18 +++++++++++++++++-
>  arch/arm/lib/memcpy.S            |   10 ++++++++++
>  arch/arm/lib/memset.S            |    3 ++-
>  10 files changed, 69 insertions(+), 13 deletions(-)
>
> diff --git a/arch/arm/include/asm/assembler.h
> b/arch/arm/include/asm/assembler.h
> index 5e4789b..25ece01 100644
> --- a/arch/arm/include/asm/assembler.h
> +++ b/arch/arm/include/asm/assembler.h
> @@ -13,6 +13,7 @@
>   *  Do not include any C declarations in this file - it is included by
>   *  assembler source.
>   */
> +#include <config.h>
>
>  /*
>   * Endian independent macros for shifting bytes within registers.
> @@ -58,3 +59,12 @@
>   * Cache alligned
>   */
>  #define CALGN(code...) code
> +
> +/*
> + * return instruction
> + */
> +#ifdef CONFIG_SYS_THUMB_BUILD
> +#define RET    bx      lr
> +#else
> +#define RET    mov     pc, lr
>

Why not "bx lr" in all cases? In that case you can just replace all the
instances of
"mov pc, lr" directly by "bx lr" instead of this macro. That looks cleaner
to me.

BTW, as far as I remember when I did this originally my compiler was
compiling
all assembly functions in ARM and it was automatically converting "mov pc,
lr" to
"bx lr" ,where necessary. Maybe that was just my compiler and I don't
remember
the details now. Did you really face any issue with "mov pc, lr" making
wrong jumps?

best regards,
Aneesh
Allen Martin - Aug. 1, 2012, 9:55 p.m.
On Wed, Aug 01, 2012 at 02:11:48PM -0700, V, Aneesh wrote:
> Hi Allen,
> 
> On Wed, Aug 1, 2012 at 1:32 PM, Allen Martin <amartin@nvidia.com<mailto:amartin@nvidia.com>> wrote:
> If CONFIG_SYS_THUMB_BUILD is enabled, use thumb interworking return
> instructions from libgcc routines, otherwise use ARM returns.
> 
> Signed-off-by: Allen Martin <amartin@nvidia.com<mailto:amartin@nvidia.com>>
> ---
>  arch/arm/include/asm/assembler.h |   10 ++++++++++
>  arch/arm/lib/_ashldi3.S          |    3 ++-
>  arch/arm/lib/_ashrdi3.S          |    3 ++-
>  arch/arm/lib/_divsi3.S           |   15 +++++++++++----
>  arch/arm/lib/_lshrdi3.S          |    3 ++-
>  arch/arm/lib/_modsi3.S           |    9 ++++++++-
>  arch/arm/lib/_udivsi3.S          |    8 +++++---
>  arch/arm/lib/_umodsi3.S          |   18 +++++++++++++++++-
>  arch/arm/lib/memcpy.S            |   10 ++++++++++
>  arch/arm/lib/memset.S            |    3 ++-
>  10 files changed, 69 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
> index 5e4789b..25ece01 100644
> --- a/arch/arm/include/asm/assembler.h
> +++ b/arch/arm/include/asm/assembler.h
> @@ -13,6 +13,7 @@
>   *  Do not include any C declarations in this file - it is included by
>   *  assembler source.
>   */
> +#include <config.h>
> 
>  /*
>   * Endian independent macros for shifting bytes within registers.
> @@ -58,3 +59,12 @@
>   * Cache alligned
>   */
>  #define CALGN(code...) code
> +
> +/*
> + * return instruction
> + */
> +#ifdef CONFIG_SYS_THUMB_BUILD
> +#define RET    bx      lr
> +#else
> +#define RET    mov     pc, lr
> 
> Why not "bx lr" in all cases? In that case you can just replace all the instances of
> "mov pc, lr" directly by "bx lr" instead of this macro. That looks cleaner to me.

I didn't want to break any older ARM architectures that don't support the
bx instruction but use this code.


> BTW, as far as I remember when I did this originally my compiler was compiling
> all assembly functions in ARM and it was automatically converting "mov pc, lr" to
> "bx lr" ,where necessary. Maybe that was just my compiler and I don't remember
> the details now. Did you really face any issue with "mov pc, lr" making wrong jumps?

Yes, without this change if I disassemble the code I'm seeing it's
still a mov instruction and it hangs.

-Allen
Aneesh V - Aug. 1, 2012, 10:15 p.m.
Hi Allen,

On Wed, Aug 1, 2012 at 2:55 PM, Allen Martin <amartin@nvidia.com> wrote:

> On Wed, Aug 01, 2012 at 02:11:48PM -0700, V, Aneesh wrote:
> > Hi Allen,
> >
> > On Wed, Aug 1, 2012 at 1:32 PM, Allen Martin <amartin@nvidia.com<mailto:
> amartin@nvidia.com>> wrote:
> > If CONFIG_SYS_THUMB_BUILD is enabled, use thumb interworking return
> > instructions from libgcc routines, otherwise use ARM returns.
> >
> > Signed-off-by: Allen Martin <amartin@nvidia.com<mailto:
> amartin@nvidia.com>>
> > ---
> >  arch/arm/include/asm/assembler.h |   10 ++++++++++
> >  arch/arm/lib/_ashldi3.S          |    3 ++-
> >  arch/arm/lib/_ashrdi3.S          |    3 ++-
> >  arch/arm/lib/_divsi3.S           |   15 +++++++++++----
> >  arch/arm/lib/_lshrdi3.S          |    3 ++-
> >  arch/arm/lib/_modsi3.S           |    9 ++++++++-
> >  arch/arm/lib/_udivsi3.S          |    8 +++++---
> >  arch/arm/lib/_umodsi3.S          |   18 +++++++++++++++++-
> >  arch/arm/lib/memcpy.S            |   10 ++++++++++
> >  arch/arm/lib/memset.S            |    3 ++-
> >  10 files changed, 69 insertions(+), 13 deletions(-)
> >
> > diff --git a/arch/arm/include/asm/assembler.h
> b/arch/arm/include/asm/assembler.h
> > index 5e4789b..25ece01 100644
> > --- a/arch/arm/include/asm/assembler.h
> > +++ b/arch/arm/include/asm/assembler.h
> > @@ -13,6 +13,7 @@
> >   *  Do not include any C declarations in this file - it is included by
> >   *  assembler source.
> >   */
> > +#include <config.h>
> >
> >  /*
> >   * Endian independent macros for shifting bytes within registers.
> > @@ -58,3 +59,12 @@
> >   * Cache alligned
> >   */
> >  #define CALGN(code...) code
> > +
> > +/*
> > + * return instruction
> > + */
> > +#ifdef CONFIG_SYS_THUMB_BUILD
> > +#define RET    bx      lr
> > +#else
> > +#define RET    mov     pc, lr
> >
> > Why not "bx lr" in all cases? In that case you can just replace all the
> instances of
> > "mov pc, lr" directly by "bx lr" instead of this macro. That looks
> cleaner to me.
>
> I didn't want to break any older ARM architectures that don't support the
> bx instruction but use this code.
>
>
Which is earlier than armv4t, right? On quick look it didn't seem there is
anything
older than that in u-boot. But yes, it's perhaps better to be safe.


>
> > BTW, as far as I remember when I did this originally my compiler was
> compiling
> > all assembly functions in ARM and it was automatically converting "mov
> pc, lr" to
> > "bx lr" ,where necessary. Maybe that was just my compiler and I don't
> remember
> > the details now. Did you really face any issue with "mov pc, lr" making
> wrong jumps?
>
> Yes, without this change if I disassemble the code I'm seeing it's
> still a mov instruction and it hangs.
>
>
Ok.

br,
Aneesh
Allen Martin - Aug. 1, 2012, 10:28 p.m.
On Wed, Aug 01, 2012 at 03:15:45PM -0700, V, Aneesh wrote:
> > I didn't want to break any older ARM architectures that don't support the
> > bx instruction but use this code.
> 
> 
> Which is earlier than armv4t, right? On quick look it didn't seem there is anything
> older than that in u-boot. But yes, it's perhaps better to be safe.

Yes, in particular bx is available in armv4t but not armv4, and there
are architectures being compiled -march=armv4 in u-boot:

$ grep march arch/arm/cpu/*/config.mk
arch/arm/cpu/arm1136/config.mk:PLATFORM_CPPFLAGS += -march=armv5
arch/arm/cpu/arm1176/config.mk:PLATFORM_CPPFLAGS += -march=armv5t
arch/arm/cpu/arm720t/config.mk:PLATFORM_CPPFLAGS += -march=armv4
-mtune=arm7tdmi
arch/arm/cpu/arm920t/config.mk:PLATFORM_CPPFLAGS += -march=armv4
arch/arm/cpu/arm925t/config.mk:PLATFORM_CPPFLAGS += -march=armv4
arch/arm/cpu/arm926ejs/config.mk:PLATFORM_CPPFLAGS += -march=armv5te
arch/arm/cpu/arm946es/config.mk:PLATFORM_CPPFLAGS +=  -march=armv4
arch/arm/cpu/arm_intcm/config.mk:PLATFORM_CPPFLAGS +=  -march=armv4
arch/arm/cpu/armv7/config.mk:PF_CPPFLAGS_ARMV7 := $(call cc-option,
-march=armv7-a, -march=armv5)
arch/arm/cpu/ixp/config.mk:PLATFORM_CPPFLAGS += -mbig-endian
-march=armv5te -mtune=strongarm1100
arch/arm/cpu/lh7a40x/config.mk:PLATFORM_CPPFLAGS += -march=armv4
arch/arm/cpu/pxa/config.mk:PLATFORM_CPPFLAGS += -march=armv5te
-mtune=xscale
arch/arm/cpu/s3c44b0/config.mk:PLATFORM_CPPFLAGS += -march=armv4
-mtune=arm7tdmi -msoft-float
arch/arm/cpu/sa1100/config.mk:PLATFORM_CPPFLAGS += -march=armv4
-mtune=strongarm1100

Probably some of these are actually armv4t, but I don't want to touch
them :^)

-Allen

Patch

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 5e4789b..25ece01 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -13,6 +13,7 @@ 
  *  Do not include any C declarations in this file - it is included by
  *  assembler source.
  */
+#include <config.h>
 
 /*
  * Endian independent macros for shifting bytes within registers.
@@ -58,3 +59,12 @@ 
  * Cache alligned
  */
 #define CALGN(code...) code
+
+/*
+ * return instruction
+ */
+#ifdef CONFIG_SYS_THUMB_BUILD
+#define RET	bx	lr
+#else
+#define RET	mov	pc, lr
+#endif
diff --git a/arch/arm/lib/_ashldi3.S b/arch/arm/lib/_ashldi3.S
index 834ddc2..e280f26 100644
--- a/arch/arm/lib/_ashldi3.S
+++ b/arch/arm/lib/_ashldi3.S
@@ -25,6 +25,7 @@  along with this program; see the file COPYING.  If not, write to
 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
 Boston, MA 02110-1301, USA.  */
 
+#include <asm/assembler.h>
 
 #ifdef __ARMEB__
 #define al r1
@@ -45,4 +46,4 @@  __aeabi_llsl:
 	movpl	ah, al, lsl r3
 	orrmi	ah, ah, al, lsr ip
 	mov	al, al, lsl r2
-	mov	pc, lr
+	RET
diff --git a/arch/arm/lib/_ashrdi3.S b/arch/arm/lib/_ashrdi3.S
index 671ac87..3edda9f 100644
--- a/arch/arm/lib/_ashrdi3.S
+++ b/arch/arm/lib/_ashrdi3.S
@@ -25,6 +25,7 @@  along with this program; see the file COPYING.  If not, write to
 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
 Boston, MA 02110-1301, USA.  */
 
+#include <asm/assembler.h>
 
 #ifdef __ARMEB__
 #define al r1
@@ -45,4 +46,4 @@  __aeabi_lasr:
 	movpl	al, ah, asr r3
 	orrmi	al, al, ah, lsl ip
 	mov	ah, ah, asr r2
-	mov	pc, lr
+	RET
diff --git a/arch/arm/lib/_divsi3.S b/arch/arm/lib/_divsi3.S
index cfbadb2..ab59d78 100644
--- a/arch/arm/lib/_divsi3.S
+++ b/arch/arm/lib/_divsi3.S
@@ -1,3 +1,5 @@ 
+#include <config.h>
+#include <asm/assembler.h>
 
 .macro ARM_DIV_BODY dividend, divisor, result, curbit
 
@@ -116,27 +118,32 @@  __aeabi_idiv:
 
 	cmp	ip, #0
 	rsbmi	r0, r0, #0
-	mov	pc, lr
+	RET
 
 10:	teq	ip, r0				@ same sign ?
 	rsbmi	r0, r0, #0
-	mov	pc, lr
+	RET
 
 11:	movlo	r0, #0
 	moveq	r0, ip, asr #31
 	orreq	r0, r0, #1
-	mov	pc, lr
+	RET
 
 12:	ARM_DIV2_ORDER r1, r2
 
 	cmp	ip, #0
 	mov	r0, r3, lsr r2
 	rsbmi	r0, r0, #0
-	mov	pc, lr
+	RET
 
 Ldiv0:
 
 	str	lr, [sp, #-4]!
 	bl	__div0
 	mov	r0, #0			@ About as wrong as it could be.
+#ifdef CONFIG_SYS_THUMB_BUILD
+	ldr	lr, [sp], #4
+	bx	lr
+#else
 	ldr	pc, [sp], #4
+#endif
diff --git a/arch/arm/lib/_lshrdi3.S b/arch/arm/lib/_lshrdi3.S
index e7fa799..4d7784d 100644
--- a/arch/arm/lib/_lshrdi3.S
+++ b/arch/arm/lib/_lshrdi3.S
@@ -25,6 +25,7 @@  along with this program; see the file COPYING.  If not, write to
 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
 Boston, MA 02110-1301, USA.  */
 
+#include <asm/assembler.h>
 
 #ifdef __ARMEB__
 #define al r1
@@ -45,4 +46,4 @@  __aeabi_llsr:
 	movpl	al, ah, lsr r3
 	orrmi	al, al, ah, lsl ip
 	mov	ah, ah, lsr r2
-	mov	pc, lr
+	RET
diff --git a/arch/arm/lib/_modsi3.S b/arch/arm/lib/_modsi3.S
index 539c584..9c7ce8c 100644
--- a/arch/arm/lib/_modsi3.S
+++ b/arch/arm/lib/_modsi3.S
@@ -1,3 +1,5 @@ 
+#include <config.h>
+#include <asm/assembler.h>
 
 .macro ARM_MOD_BODY dividend, divisor, order, spare
 
@@ -88,7 +90,7 @@  __modsi3:
 
 10:	cmp	ip, #0
 	rsbmi	r0, r0, #0
-	mov	pc, lr
+	RET
 
 
 Ldiv0:
@@ -96,4 +98,9 @@  Ldiv0:
 	str	lr, [sp, #-4]!
 	bl	__div0
 	mov	r0, #0			@ About as wrong as it could be.
+#ifdef CONFIG_SYS_THUMB_BUILD
+	ldr	lr, [sp], #4
+	bx	lr
+#else
 	ldr	pc, [sp], #4
+#endif
diff --git a/arch/arm/lib/_udivsi3.S b/arch/arm/lib/_udivsi3.S
index 1309802..09a1664 100644
--- a/arch/arm/lib/_udivsi3.S
+++ b/arch/arm/lib/_udivsi3.S
@@ -1,3 +1,5 @@ 
+#include <asm/assembler.h>
+
 /* # 1 "libgcc1.S" */
 @ libgcc1 routines for ARM cpu.
 @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
@@ -64,7 +66,7 @@  Loop3:
 	bne	Loop3
 Lgot_result:
 	mov	r0, result
-	mov	pc, lr
+	RET
 Ldiv0:
 	str	lr, [sp, #-4]!
 	bl	 __div0       (PLT)
@@ -80,7 +82,7 @@  __aeabi_uidivmod:
 	ldmfd	sp!, {r1, r2, ip, lr}
 	mul	r3, r0, r2
 	sub	r1, r1, r3
-	mov	pc, lr
+	RET
 
 .globl __aeabi_idivmod
 __aeabi_idivmod:
@@ -90,4 +92,4 @@  __aeabi_idivmod:
 	ldmfd	sp!, {r1, r2, ip, lr}
 	mul	r3, r0, r2
 	sub	r1, r1, r3
-	mov	pc, lr
+	RET
diff --git a/arch/arm/lib/_umodsi3.S b/arch/arm/lib/_umodsi3.S
index 8465ef0..93eadf9 100644
--- a/arch/arm/lib/_umodsi3.S
+++ b/arch/arm/lib/_umodsi3.S
@@ -1,3 +1,6 @@ 
+#include <config.h>
+#include <asm/assembler.h>
+
 /* # 1 "libgcc1.S" */
 @ libgcc1 routines for ARM cpu.
 @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
@@ -19,7 +22,11 @@  curbit		.req	r3
 	beq	Ldiv0
 	mov	curbit, #1
 	cmp	dividend, divisor
+#ifdef CONFIG_SYS_THUMB_BUILD
+	bxcc	lr
+#else
 	movcc	pc, lr
+#endif
 Loop1:
 	@ Unless the divisor is very big, shift it up in multiples of
 	@ four bits, since this is the amount of unwinding in the main
@@ -66,19 +73,28 @@  Loop3:
 	@ then none of the below will match, since the bit in ip will not be
 	@ in the bottom nibble.
 	ands	overdone, overdone, #0xe0000000
+#ifdef CONFIG_SYS_THUMB_BUILD
+	bxeq	lr
+#else
 	moveq	pc, lr				@ No fixups needed
+#endif
 	tst	overdone, ip, ror #3
 	addne	dividend, dividend, divisor, lsr #3
 	tst	overdone, ip, ror #2
 	addne	dividend, dividend, divisor, lsr #2
 	tst	overdone, ip, ror #1
 	addne	dividend, dividend, divisor, lsr #1
-	mov	pc, lr
+	RET
 Ldiv0:
 	str	lr, [sp, #-4]!
 	bl	 __div0       (PLT)
 	mov	r0, #0			@ about as wrong as it could be
+#ifdef CONFIG_SYS_THUMB_BUILD
+	ldmia	sp!, {lr}
+	bx	lr
+#else
 	ldmia	sp!, {pc}
+#endif
 	.size  __umodsi3       , . -  __umodsi3
 /* # 320 "libgcc1.S" */
 /* # 421 "libgcc1.S" */
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index f655256..1e1dbd4 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -10,6 +10,7 @@ 
  *  published by the Free Software Foundation.
  */
 
+#include <config.h>
 #include <asm/assembler.h>
 
 #define W(instr)	instr
@@ -61,7 +62,11 @@ 
 memcpy:
 
 		cmp	r0, r1
+#ifdef CONFIG_SYS_THUMB_BUILD
+		bxeq	lr
+#else
 		moveq	pc, lr
+#endif
 
 		enter	r4, lr
 
@@ -149,7 +154,12 @@  memcpy:
 		str1b	r0, r4, cs, abort=21f
 		str1b	r0, ip, cs, abort=21f
 
+#ifdef CONFIG_SYS_THUMB_BUILD
+		exit	r4, lr
+		bx	lr
+#else
 		exit	r4, pc
+#endif
 
 9:		rsb	ip, ip, #4
 		cmp	ip, #2
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 0cdf895..e149c46 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -9,6 +9,7 @@ 
  *
  *  ASM optimised string functions
  */
+#include <config.h>
 #include <asm/assembler.h>
 
 	.text
@@ -123,4 +124,4 @@  memset:
 	strneb	r1, [r0], #1
 	tst	r2, #1
 	strneb	r1, [r0], #1
-	mov	pc, lr
+	RET