diff mbox series

aarch64: fix strcpy and strnlen for big-endian

Message ID 20200515021228.64541-1-shaolexi@huawei.com
State New
Headers show
Series aarch64: fix strcpy and strnlen for big-endian | expand

Commit Message

Lexi Shao May 15, 2020, 2:12 a.m. UTC
This patch fixes the optimized implementation of strcpy and strnlen
on a big-endian arm64 machine.

The optimized method uses neon, which can process 128bit with one
instruction. On a big-endian machine, the bit order should be reversed
for the whole 128-bits double word. But with instuction
	rev64	datav.16b, datav.16b
it reverses 64bits in the two halves rather than reverseing 128bits.
There is no such instruction as rev128 to reverse the 128bits, but we
can fix this by loading the data registers accordingly.

Fixes 0237b61526e7("aarch64: Optimized implementation of strcpy") and
2911cb68ed3d("aarch64: Optimized implementation of strnlen").

Signed-off-by: Lexi Shao <shaolexi@huawei.com>
---
 sysdeps/aarch64/strcpy.S  | 5 +++++
 sysdeps/aarch64/strnlen.S | 5 +++++
 2 files changed, 10 insertions(+)

Comments

Szabolcs Nagy May 15, 2020, 10:03 a.m. UTC | #1
The 05/15/2020 10:12, Lexi Shao wrote:
> This patch fixes the optimized implementation of strcpy and strnlen
> on a big-endian arm64 machine.
> 
> The optimized method uses neon, which can process 128bit with one
> instruction. On a big-endian machine, the bit order should be reversed
> for the whole 128-bits double word. But with instuction
> 	rev64	datav.16b, datav.16b
> it reverses 64bits in the two halves rather than reverseing 128bits.
> There is no such instruction as rev128 to reverse the 128bits, but we
> can fix this by loading the data registers accordingly.
> 
> Fixes 0237b61526e7("aarch64: Optimized implementation of strcpy") and
> 2911cb68ed3d("aarch64: Optimized implementation of strnlen").
> 
> Signed-off-by: Lexi Shao <shaolexi@huawei.com>

Please add the bug reference to the title i.e.
append [BZ #25824]

note the patch was corrupted below you might want
to check if it's something on your side. (in this
case i could fix it because it was in the context)

with those fixed it's ok to commit,

Reviewed-by: Szabolcs Nagy  <szabolcs.nagy@arm.com>

if you don't have commit rights then i can commit
this for you.

> ---
>  sysdeps/aarch64/strcpy.S  | 5 +++++
>  sysdeps/aarch64/strnlen.S | 5 +++++
>  2 files changed, 10 insertions(+)
> 
> diff --git a/sysdeps/aarch64/strcpy.S b/sysdeps/aarch64/strcpy.S
> index 52c21c9..08859dd 100644
> --- a/sysdeps/aarch64/strcpy.S
> +++ b/sysdeps/aarch64/strcpy.S
> @@ -234,8 +234,13 @@ L(entry_no_page_cross):
>  #endif
>  	/* ���loc */
corrupt: ^^^^^^^^^

>  	cmeq	datav.16b, datav.16b, #0
> +#ifdef __AARCH64EB__
> +	mov	data1, datav.d[1]
> +	mov	data2, datav.d[0]
> +#else
>  	mov	data1, datav.d[0]
>  	mov	data2, datav.d[1]
> +#endif
>  	cmp	data1, 0
>  	csel	data1, data1, data2, ne
>  	mov	pos, 8
> diff --git a/sysdeps/aarch64/strnlen.S b/sysdeps/aarch64/strnlen.S
> index 5981247..086a5c7 100644
> --- a/sysdeps/aarch64/strnlen.S
> +++ b/sysdeps/aarch64/strnlen.S
> @@ -154,8 +154,13 @@ L(loop_end):
>  	   byte.  */
>  
>  	cmeq	datav.16b, datav.16b, #0
> +#ifdef __AARCH64EB__
> +	mov	data1, datav.d[1]
> +	mov	data2, datav.d[0]
> +#else
>  	mov	data1, datav.d[0]
>  	mov	data2, datav.d[1]
> +#endif
>  	cmp	data1, 0
>  	csel	data1, data1, data2, ne
>  	sub	len, src, srcin
> -- 
> 2.12.3
>
Lexi Shao May 15, 2020, 10:40 a.m. UTC | #2
The 05/15/2020 18:04, Szabolcs Nagy wrote:
>The 05/15/2020 10:12, Lexi Shao wrote:
>> This patch fixes the optimized implementation of strcpy and strnlen on
>> a big-endian arm64 machine.
>>
>> The optimized method uses neon, which can process 128bit with one
>> instruction. On a big-endian machine, the bit order should be reversed
>> for the whole 128-bits double word. But with instuction
>>      rev64   datav.16b, datav.16b
>> it reverses 64bits in the two halves rather than reverseing 128bits.
>> There is no such instruction as rev128 to reverse the 128bits, but we
>> can fix this by loading the data registers accordingly.
>>
>> Fixes 0237b61526e7("aarch64: Optimized implementation of strcpy") and
>> 2911cb68ed3d("aarch64: Optimized implementation of strnlen").
>>
>> Signed-off-by: Lexi Shao <shaolexi@huawei.com>
>
>Please add the bug reference to the title i.e.
>append [BZ #25824]
>
>note the patch was corrupted below you might want to check if it's something on your side. (in this case i could fix it because it was in the context)
>
>with those fixed it's ok to commit,
>
>Reviewed-by: Szabolcs Nagy  <szabolcs.nagy@arm.com>
>
>if you don't have commit rights then i can commit this for you.

No I don't have commit rights. I will send out a new patch soon that fixes the corruption and the title soon, please push the commit for me, thanks!

>
>> ---
diff mbox series

Patch

diff --git a/sysdeps/aarch64/strcpy.S b/sysdeps/aarch64/strcpy.S
index 52c21c9..08859dd 100644
--- a/sysdeps/aarch64/strcpy.S
+++ b/sysdeps/aarch64/strcpy.S
@@ -234,8 +234,13 @@  L(entry_no_page_cross):
 #endif
 	/* ���loc */
 	cmeq	datav.16b, datav.16b, #0
+#ifdef __AARCH64EB__
+	mov	data1, datav.d[1]
+	mov	data2, datav.d[0]
+#else
 	mov	data1, datav.d[0]
 	mov	data2, datav.d[1]
+#endif
 	cmp	data1, 0
 	csel	data1, data1, data2, ne
 	mov	pos, 8
diff --git a/sysdeps/aarch64/strnlen.S b/sysdeps/aarch64/strnlen.S
index 5981247..086a5c7 100644
--- a/sysdeps/aarch64/strnlen.S
+++ b/sysdeps/aarch64/strnlen.S
@@ -154,8 +154,13 @@  L(loop_end):
 	   byte.  */
 
 	cmeq	datav.16b, datav.16b, #0
+#ifdef __AARCH64EB__
+	mov	data1, datav.d[1]
+	mov	data2, datav.d[0]
+#else
 	mov	data1, datav.d[0]
 	mov	data2, datav.d[1]
+#endif
 	cmp	data1, 0
 	csel	data1, data1, data2, ne
 	sub	len, src, srcin