diff mbox series

arm: ARMv4 assembly compatibility

Message ID 20220810090446.1469742-1-saproj@gmail.com
State Changes Requested
Delegated to: Tom Rini
Headers show
Series arm: ARMv4 assembly compatibility | expand

Commit Message

Sergei Antonov Aug. 10, 2022, 9:04 a.m. UTC
There is currently a problem that U-Boot can not work on ARMv4
because assembly imlementations of memcpy() and some other functions
use "bx lr" instruction that is not available on ARMv4 ("mov pc, lr"
should be used instead).

A working preprocessor-based solution to this problem is found in
arch/arm/lib/relocate.S. Move it to the "ret" macro in
arch/arm/include/asm/assembler.h and change all "bx lr" code
to "ret lr" in functions that may run on ARMv4. Linux source code
deals with this problem in the same manner.

Signed-off-by: Sergei Antonov <saproj@gmail.com>
CC: Samuel Holland <samuel@sholland.org>
CC: Ye Li <ye.li@nxp.com>
CC: Simon Glass <sjg@chromium.org>
CC: Andre Przywara <andre.przywara@arm.com>
CC: Marek Vasut <marex@denx.de>
CC: Sean Anderson <sean.anderson@seco.com>
CC: Tom Rini <trini@konsulko.com>
CC: Wolfgang Denk <wd@denx.de>
---
 arch/arm/include/asm/assembler.h |  6 ++++++
 arch/arm/lib/lib1funcs.S         |  8 ++++----
 arch/arm/lib/memcpy.S            |  6 +++---
 arch/arm/lib/relocate.S          | 10 ++--------
 arch/arm/lib/setjmp.S            |  4 ++--
 5 files changed, 17 insertions(+), 17 deletions(-)

Comments

Sergei Antonov Aug. 16, 2022, 10:14 a.m. UTC | #1
On Wed, 10 Aug 2022 at 12:05, Sergei Antonov <saproj@gmail.com> wrote:
>
> There is currently a problem that U-Boot can not work on ARMv4
> because assembly imlementations of memcpy() and some other functions
> use "bx lr" instruction that is not available on ARMv4 ("mov pc, lr"
> should be used instead).

Hello! There has been no replies to this message. I hope it did not
make it into spam folders. The proposed patch allows U-Boot to run on
ARMv4 systems without breaking (or even changing) other systems.
Please, consider it.
Andre Przywara Aug. 16, 2022, 4:17 p.m. UTC | #2
On Wed, 10 Aug 2022 12:04:46 +0300
Sergei Antonov <saproj@gmail.com> wrote:

Hi,

> There is currently a problem that U-Boot can not work on ARMv4
> because assembly imlementations of memcpy() and some other functions
> use "bx lr" instruction that is not available on ARMv4 ("mov pc, lr"
> should be used instead).

So there is a comment in the current code, right above what's shown here in
the first hunk, which says:
/*
 * We only support cores that support at least Thumb-1 and thus we use
 * 'bx lr'
 */
This was added by Tom in commit 431afb4ef9fe, when he removed some code
very close to what you are adding back now.

So what is the story here? This commit seems to suggest U-Boot doesn't support
even ARMv5 without "T", has this changed? There are probably other code
places which would need adjustment to run on ARMv4?

Tom, can you say why support for Thumb-less code / older architectures was
dropped? No users, I guess?

Sergei, can you say what is this for? Are you adding support for a SoC
with an ARM810 core from 1996?

Cheers,
Andre



> 
> A working preprocessor-based solution to this problem is found in
> arch/arm/lib/relocate.S. Move it to the "ret" macro in
> arch/arm/include/asm/assembler.h and change all "bx lr" code
> to "ret lr" in functions that may run on ARMv4. Linux source code
> deals with this problem in the same manner.
> 
> Signed-off-by: Sergei Antonov <saproj@gmail.com>
> CC: Samuel Holland <samuel@sholland.org>
> CC: Ye Li <ye.li@nxp.com>
> CC: Simon Glass <sjg@chromium.org>
> CC: Andre Przywara <andre.przywara@arm.com>
> CC: Marek Vasut <marex@denx.de>
> CC: Sean Anderson <sean.anderson@seco.com>
> CC: Tom Rini <trini@konsulko.com>
> CC: Wolfgang Denk <wd@denx.de>
> ---
>  arch/arm/include/asm/assembler.h |  6 ++++++
>  arch/arm/lib/lib1funcs.S         |  8 ++++----
>  arch/arm/lib/memcpy.S            |  6 +++---
>  arch/arm/lib/relocate.S          | 10 ++--------
>  arch/arm/lib/setjmp.S            |  4 ++--
>  5 files changed, 17 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
> index b146918586..911f7a1b49 100644
> --- a/arch/arm/include/asm/assembler.h
> +++ b/arch/arm/include/asm/assembler.h
> @@ -63,11 +63,17 @@
>   */
>  	.irp	c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
>  	.macro	ret\c, reg
> +
> +	/* ARMv4- don't know bx lr but the assembler fails to see that */
> +#ifdef __ARM_ARCH_4__
> +	mov\c	pc, \reg
> +#else
>  	.ifeqs	"\reg", "lr"
>  	bx\c	\reg
>  	.else
>  	mov\c	pc, \reg
>  	.endif
> +#endif
>  	.endm
>  	.endr
>  
> diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
> index 700eee5fbb..7ff4446dd6 100644
> --- a/arch/arm/lib/lib1funcs.S
> +++ b/arch/arm/lib/lib1funcs.S
> @@ -377,7 +377,7 @@ ENTRY(__gnu_thumb1_case_sqi)
>  	lsls	r1, r1, #1
>  	add	lr, lr, r1
>  	pop	{r1}
> -	bx	lr
> +	ret	lr
>  ENDPROC(__gnu_thumb1_case_sqi)
>  .popsection
>  
> @@ -391,7 +391,7 @@ ENTRY(__gnu_thumb1_case_uqi)
>  	lsls	r1, r1, #1
>  	add	lr, lr, r1
>  	pop	{r1}
> -	bx	lr
> +	ret	lr
>  ENDPROC(__gnu_thumb1_case_uqi)
>  .popsection
>  
> @@ -406,7 +406,7 @@ ENTRY(__gnu_thumb1_case_shi)
>  	lsls	r1, r1, #1
>  	add	lr, lr, r1
>  	pop	{r0, r1}
> -	bx	lr
> +	ret	lr
>  ENDPROC(__gnu_thumb1_case_shi)
>  .popsection
>  
> @@ -421,7 +421,7 @@ ENTRY(__gnu_thumb1_case_uhi)
>  	lsls	r1, r1, #1
>  	add	lr, lr, r1
>  	pop	{r0, r1}
> -	bx	lr
> +	ret	lr
>  ENDPROC(__gnu_thumb1_case_uhi)
>  .popsection
>  #endif
> diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
> index eee7a219ce..a1c996f94e 100644
> --- a/arch/arm/lib/memcpy.S
> +++ b/arch/arm/lib/memcpy.S
> @@ -59,7 +59,7 @@
>  #endif
>  ENTRY(memcpy)
>  		cmp	r0, r1
> -		bxeq	lr
> +		reteq	lr
>  
>  		enter	r4, lr
>  
> @@ -148,7 +148,7 @@ ENTRY(memcpy)
>  		str1b	r0, ip, cs, abort=21f
>  
>  		exit	r4, lr
> -		bx	lr
> +		ret	lr
>  
>  9:		rsb	ip, ip, #4
>  		cmp	ip, #2
> @@ -258,7 +258,7 @@ ENTRY(memcpy)
>  
>  	.macro	copy_abort_end
>  	ldmfd	sp!, {r4, lr}
> -	bx	lr
> +	ret	lr
>  	.endm
>  
>  ENDPROC(memcpy)
> diff --git a/arch/arm/lib/relocate.S b/arch/arm/lib/relocate.S
> index 5102bfabde..dd6f2e3bd5 100644
> --- a/arch/arm/lib/relocate.S
> +++ b/arch/arm/lib/relocate.S
> @@ -61,7 +61,7 @@ ENTRY(relocate_vectors)
>  	stmia	r1!, {r2-r8,r10}
>  #endif
>  #endif
> -	bx	lr
> +	ret	lr
>  
>  ENDPROC(relocate_vectors)
>  
> @@ -127,13 +127,7 @@ relocate_done:
>  	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
>  #endif
>  
> -	/* ARMv4- don't know bx lr but the assembler fails to see that */
> -
> -#ifdef __ARM_ARCH_4__
> -	mov	pc, lr
> -#else
> -	bx	lr
> -#endif
> +	ret	lr
>  
>  ENDPROC(relocate_code)
>  
> diff --git a/arch/arm/lib/setjmp.S b/arch/arm/lib/setjmp.S
> index 176a1d5315..2f041aeef0 100644
> --- a/arch/arm/lib/setjmp.S
> +++ b/arch/arm/lib/setjmp.S
> @@ -17,7 +17,7 @@ ENTRY(setjmp)
>  	mov  ip, sp
>  	stm  a1, {v1-v8, ip, lr}
>  	mov  a1, #0
> -	bx   lr
> +	ret  lr
>  ENDPROC(setjmp)
>  .popsection
>  
> @@ -31,6 +31,6 @@ ENTRY(longjmp)
>  	bne  1f
>  	mov  a1, #1
>  1:
> -	bx   lr
> +	ret  lr
>  ENDPROC(longjmp)
>  .popsection
Ralph Siemsen Aug. 16, 2022, 5:51 p.m. UTC | #3
On Tue, Aug 16, 2022 at 12:17 PM Andre Przywara <andre.przywara@arm.com> wrote:
>
> So what is the story here? This commit seems to suggest U-Boot doesn't support
> even ARMv5 without "T", has this changed? There are probably other code
> places which would need adjustment to run on ARMv4?

Note that gcc 6.0 and later considers armv4 (non-Thumb) support to be
deprecated. [1] [2]

[1] https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html
[2] https://lkml.iu.edu/hypermail/linux/kernel/1712.2/05158.html

On Wed, 10 Aug 2022 12:04:46 +0300 Sergei Antonov <saproj@gmail.com> wrote:
>
> A working preprocessor-based solution to this problem is found in
> arch/arm/lib/relocate.S. Move it to the "ret" macro in
> arch/arm/include/asm/assembler.h and change all "bx lr" code
> to "ret lr" in functions that may run on ARMv4. Linux source code
> deals with this problem in the same manner.

Just wanted to point out another option: the linker has a --fix-v4bx
flat, which performs the same conversion of "bx lr" to "mov pc lr".
Although it is kind of an unusual place for such a transformation, it
has the advantage that one does not need to fixup the source code of
every piece of code you might like to compile. I used this some years
ago [3], and it seems it still works in 2020 [4]

[3] http://lists.infradead.org/pipermail/netwinder/2017-August/000267.html
[4] https://lists.gnu.org/archive/html/bug-binutils/2020-02/msg00174.html

Ralph
Sergei Antonov Aug. 17, 2022, 8:27 a.m. UTC | #4
On Tue, 16 Aug 2022 at 19:17, Andre Przywara <andre.przywara@arm.com> wrote:
>
> On Wed, 10 Aug 2022 12:04:46 +0300
> Sergei Antonov <saproj@gmail.com> wrote:
>
> Hi,
>
> > There is currently a problem that U-Boot can not work on ARMv4
> > because assembly imlementations of memcpy() and some other functions
> > use "bx lr" instruction that is not available on ARMv4 ("mov pc, lr"
> > should be used instead).
>
> So there is a comment in the current code, right above what's shown here in
> the first hunk, which says:
> /*
>  * We only support cores that support at least Thumb-1 and thus we use
>  * 'bx lr'
>  */

I did not notice it. Thanks.

> This was added by Tom in commit 431afb4ef9fe, when he removed some code
> very close to what you are adding back now.
>
> So what is the story here? This commit seems to suggest U-Boot doesn't support
> even ARMv5 without "T", has this changed? There are probably other code
> places which would need adjustment to run on ARMv4?

I can not explain why there was __ARM_ARCH_5E__ in the removed code. I
am dealing with the issue that ARMv4 ("four" without letter "t") does
not understand "bx lr" while ARMv4T does. The device on which I run
modern U-Boot has FA526 - a CPU similar to ARM810 which is ARMv4
without Thumb. Its MIDR register is 0x66015261 which means that
Architecture=1.

> Tom, can you say why support for Thumb-less code / older architectures was
> dropped? No users, I guess?
>
> Sergei, can you say what is this for? Are you adding support for a SoC
> with an ARM810 core from 1996?

Yes.
And U-Boot for this device is already working: able to boot Linux and
do TFTP stuff. My company is going to use it on hundreds of devices.
It would be great for us to not have to patch U-Boot locally and have
ARMv4-compatible assembly in the mainline.
Tom Rini Aug. 20, 2022, 7:13 p.m. UTC | #5
On Wed, Aug 17, 2022 at 11:27:39AM +0300, Sergei Antonov wrote:
> On Tue, 16 Aug 2022 at 19:17, Andre Przywara <andre.przywara@arm.com> wrote:
> >
> > On Wed, 10 Aug 2022 12:04:46 +0300
> > Sergei Antonov <saproj@gmail.com> wrote:
> >
> > Hi,
> >
> > > There is currently a problem that U-Boot can not work on ARMv4
> > > because assembly imlementations of memcpy() and some other functions
> > > use "bx lr" instruction that is not available on ARMv4 ("mov pc, lr"
> > > should be used instead).
> >
> > So there is a comment in the current code, right above what's shown here in
> > the first hunk, which says:
> > /*
> >  * We only support cores that support at least Thumb-1 and thus we use
> >  * 'bx lr'
> >  */
> 
> I did not notice it. Thanks.
> 
> > This was added by Tom in commit 431afb4ef9fe, when he removed some code
> > very close to what you are adding back now.
> >
> > So what is the story here? This commit seems to suggest U-Boot doesn't support
> > even ARMv5 without "T", has this changed? There are probably other code
> > places which would need adjustment to run on ARMv4?
> 
> I can not explain why there was __ARM_ARCH_5E__ in the removed code. I
> am dealing with the issue that ARMv4 ("four" without letter "t") does
> not understand "bx lr" while ARMv4T does. The device on which I run
> modern U-Boot has FA526 - a CPU similar to ARM810 which is ARMv4
> without Thumb. Its MIDR register is 0x66015261 which means that
> Architecture=1.
> 
> > Tom, can you say why support for Thumb-less code / older architectures was
> > dropped? No users, I guess?
> >
> > Sergei, can you say what is this for? Are you adding support for a SoC
> > with an ARM810 core from 1996?
> 
> Yes.
> And U-Boot for this device is already working: able to boot Linux and
> do TFTP stuff. My company is going to use it on hundreds of devices.
> It would be great for us to not have to patch U-Boot locally and have
> ARMv4-compatible assembly in the mainline.

So, my recollection at the time was that we were / had removed all
upstream platforms that were using a core design that didn't accept "bx
lr". If you're going to upstream the rest of your platform, please do a
v2 that also updates the comment Andre pointed out. But by itself, the
general policy is that we support platforms that are upstream, and
anything else is functionally dead code.
Sergei Antonov Aug. 21, 2022, 1:39 p.m. UTC | #6
On Sat, 20 Aug 2022 at 22:13, Tom Rini <trini@konsulko.com> wrote:
> So, my recollection at the time was that we were / had removed all
> upstream platforms that were using a core design that didn't accept "bx
> lr". If you're going to upstream the rest of your platform, please do a
> v2 that also updates the comment Andre pointed out. But by itself, the
> general policy is that we support platforms that are upstream, and
> anything else is functionally dead code.

Yes, I am going to upstream the rest of my platform. It will involve
resurrecting some removed non-DM drivers and porting them to DM.
Submitting now the v2 of the patch with an updated commend.
diff mbox series

Patch

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index b146918586..911f7a1b49 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -63,11 +63,17 @@ 
  */
 	.irp	c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
 	.macro	ret\c, reg
+
+	/* ARMv4- don't know bx lr but the assembler fails to see that */
+#ifdef __ARM_ARCH_4__
+	mov\c	pc, \reg
+#else
 	.ifeqs	"\reg", "lr"
 	bx\c	\reg
 	.else
 	mov\c	pc, \reg
 	.endif
+#endif
 	.endm
 	.endr
 
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
index 700eee5fbb..7ff4446dd6 100644
--- a/arch/arm/lib/lib1funcs.S
+++ b/arch/arm/lib/lib1funcs.S
@@ -377,7 +377,7 @@  ENTRY(__gnu_thumb1_case_sqi)
 	lsls	r1, r1, #1
 	add	lr, lr, r1
 	pop	{r1}
-	bx	lr
+	ret	lr
 ENDPROC(__gnu_thumb1_case_sqi)
 .popsection
 
@@ -391,7 +391,7 @@  ENTRY(__gnu_thumb1_case_uqi)
 	lsls	r1, r1, #1
 	add	lr, lr, r1
 	pop	{r1}
-	bx	lr
+	ret	lr
 ENDPROC(__gnu_thumb1_case_uqi)
 .popsection
 
@@ -406,7 +406,7 @@  ENTRY(__gnu_thumb1_case_shi)
 	lsls	r1, r1, #1
 	add	lr, lr, r1
 	pop	{r0, r1}
-	bx	lr
+	ret	lr
 ENDPROC(__gnu_thumb1_case_shi)
 .popsection
 
@@ -421,7 +421,7 @@  ENTRY(__gnu_thumb1_case_uhi)
 	lsls	r1, r1, #1
 	add	lr, lr, r1
 	pop	{r0, r1}
-	bx	lr
+	ret	lr
 ENDPROC(__gnu_thumb1_case_uhi)
 .popsection
 #endif
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index eee7a219ce..a1c996f94e 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -59,7 +59,7 @@ 
 #endif
 ENTRY(memcpy)
 		cmp	r0, r1
-		bxeq	lr
+		reteq	lr
 
 		enter	r4, lr
 
@@ -148,7 +148,7 @@  ENTRY(memcpy)
 		str1b	r0, ip, cs, abort=21f
 
 		exit	r4, lr
-		bx	lr
+		ret	lr
 
 9:		rsb	ip, ip, #4
 		cmp	ip, #2
@@ -258,7 +258,7 @@  ENTRY(memcpy)
 
 	.macro	copy_abort_end
 	ldmfd	sp!, {r4, lr}
-	bx	lr
+	ret	lr
 	.endm
 
 ENDPROC(memcpy)
diff --git a/arch/arm/lib/relocate.S b/arch/arm/lib/relocate.S
index 5102bfabde..dd6f2e3bd5 100644
--- a/arch/arm/lib/relocate.S
+++ b/arch/arm/lib/relocate.S
@@ -61,7 +61,7 @@  ENTRY(relocate_vectors)
 	stmia	r1!, {r2-r8,r10}
 #endif
 #endif
-	bx	lr
+	ret	lr
 
 ENDPROC(relocate_vectors)
 
@@ -127,13 +127,7 @@  relocate_done:
 	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
 #endif
 
-	/* ARMv4- don't know bx lr but the assembler fails to see that */
-
-#ifdef __ARM_ARCH_4__
-	mov	pc, lr
-#else
-	bx	lr
-#endif
+	ret	lr
 
 ENDPROC(relocate_code)
 
diff --git a/arch/arm/lib/setjmp.S b/arch/arm/lib/setjmp.S
index 176a1d5315..2f041aeef0 100644
--- a/arch/arm/lib/setjmp.S
+++ b/arch/arm/lib/setjmp.S
@@ -17,7 +17,7 @@  ENTRY(setjmp)
 	mov  ip, sp
 	stm  a1, {v1-v8, ip, lr}
 	mov  a1, #0
-	bx   lr
+	ret  lr
 ENDPROC(setjmp)
 .popsection
 
@@ -31,6 +31,6 @@  ENTRY(longjmp)
 	bne  1f
 	mov  a1, #1
 1:
-	bx   lr
+	ret  lr
 ENDPROC(longjmp)
 .popsection