diff mbox

ARCv2: update memset() so it could be used without double load/stores

Message ID 1437401552-30993-1-git-send-email-abrodkin@synopsys.com
State New
Headers show

Commit Message

Alexey Brodkin July 20, 2015, 2:12 p.m. UTC
From: Claudiu Zissulescu <claziss@synopsys.com>

Existing version of memset() relies on existence of 64-bit load/stores.
While ARC HS38 may not have those instructions implemented in SoC.

Proposed implementation checks if "-mno-ll64" option was passed to gcc
(for ARCv2 "-mll64" is set implicitly by default) by checking __LL64__
definition and if it is not defined uses 32-bit load/stores.

Signed-off-by: Claudiu Zissulescu <claziss@synopsys.com>
Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
---
 libc/string/arc/arcv2/memset.S | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

Comments

Vineet Gupta Oct. 20, 2015, 9:03 a.m. UTC | #1
On Monday 20 July 2015 07:42 PM, Alexey Brodkin wrote:
> From: Claudiu Zissulescu <claziss@synopsys.com>
> 
> Existing version of memset() relies on existence of 64-bit load/stores.
> While ARC HS38 may not have those instructions implemented in SoC.
> 
> Proposed implementation checks if "-mno-ll64" option was passed to gcc
> (for ARCv2 "-mll64" is set implicitly by default) by checking __LL64__
> definition and if it is not defined uses 32-bit load/stores.
> 
> Signed-off-by: Claudiu Zissulescu <claziss@synopsys.com>
> Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
> Cc: Vineet Gupta <vgupta@synopsys.com>


Ping ! Could this patch be applied please !

Thx,
-Vineet

> ---
>  libc/string/arc/arcv2/memset.S | 30 ++++++++++++++++++++++++++++++
>  1 file changed, 30 insertions(+)
> 
> diff --git a/libc/string/arc/arcv2/memset.S b/libc/string/arc/arcv2/memset.S
> index d076ad1..0918d37 100644
> --- a/libc/string/arc/arcv2/memset.S
> +++ b/libc/string/arc/arcv2/memset.S
> @@ -52,6 +52,7 @@ ENTRY(memset)
>  	lpnz	@.Lset64bytes
>  	;; LOOP START
>  	PREWRITE(r3, 64)	;Prefetch the next write location
> +#ifdef __LL64__
>  	std.ab	r4, [r3, 8]
>  	std.ab	r4, [r3, 8]
>  	std.ab	r4, [r3, 8]
> @@ -60,16 +61,45 @@ ENTRY(memset)
>  	std.ab	r4, [r3, 8]
>  	std.ab	r4, [r3, 8]
>  	std.ab	r4, [r3, 8]
> +#else
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +#endif
>  .Lset64bytes:
>  
>  	lsr.f	lp_count, r2, 5 ;Last remaining  max 124 bytes
>  	lpnz	.Lset32bytes
>  	;; LOOP START
>  	prefetchw [r3, 32]	;Prefetch the next write location
> +#ifdef __LL64__
>  	std.ab	r4, [r3, 8]
>  	std.ab	r4, [r3, 8]
>  	std.ab	r4, [r3, 8]
>  	std.ab	r4, [r3, 8]
> +#else
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +	st.ab	r4, [r3, 4]
> +#endif
>  .Lset32bytes:
>  
>  	and.f	lp_count, r2, 0x1F ;Last remaining 31 bytes
>
Vineet Gupta Dec. 23, 2015, 12:33 p.m. UTC | #2
On Tuesday 20 October 2015 02:33 PM, Vineet Gupta wrote:
> On Monday 20 July 2015 07:42 PM, Alexey Brodkin wrote:
>> From: Claudiu Zissulescu <claziss@synopsys.com>
>>
>> Existing version of memset() relies on existence of 64-bit load/stores.
>> While ARC HS38 may not have those instructions implemented in SoC.
>>
>> Proposed implementation checks if "-mno-ll64" option was passed to gcc
>> (for ARCv2 "-mll64" is set implicitly by default) by checking __LL64__
>> definition and if it is not defined uses 32-bit load/stores.
>>
>> Signed-off-by: Claudiu Zissulescu <claziss@synopsys.com>
>> Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
>> Cc: Vineet Gupta <vgupta@synopsys.com>
> 
> 
> Ping ! Could this patch be applied please !

Ping !

> 
> Thx,
> -Vineet
> 
>> ---
>>  libc/string/arc/arcv2/memset.S | 30 ++++++++++++++++++++++++++++++
>>  1 file changed, 30 insertions(+)
>>
>> diff --git a/libc/string/arc/arcv2/memset.S b/libc/string/arc/arcv2/memset.S
>> index d076ad1..0918d37 100644
>> --- a/libc/string/arc/arcv2/memset.S
>> +++ b/libc/string/arc/arcv2/memset.S
>> @@ -52,6 +52,7 @@ ENTRY(memset)
>>  	lpnz	@.Lset64bytes
>>  	;; LOOP START
>>  	PREWRITE(r3, 64)	;Prefetch the next write location
>> +#ifdef __LL64__
>>  	std.ab	r4, [r3, 8]
>>  	std.ab	r4, [r3, 8]
>>  	std.ab	r4, [r3, 8]
>> @@ -60,16 +61,45 @@ ENTRY(memset)
>>  	std.ab	r4, [r3, 8]
>>  	std.ab	r4, [r3, 8]
>>  	std.ab	r4, [r3, 8]
>> +#else
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +#endif
>>  .Lset64bytes:
>>  
>>  	lsr.f	lp_count, r2, 5 ;Last remaining  max 124 bytes
>>  	lpnz	.Lset32bytes
>>  	;; LOOP START
>>  	prefetchw [r3, 32]	;Prefetch the next write location
>> +#ifdef __LL64__
>>  	std.ab	r4, [r3, 8]
>>  	std.ab	r4, [r3, 8]
>>  	std.ab	r4, [r3, 8]
>>  	std.ab	r4, [r3, 8]
>> +#else
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +	st.ab	r4, [r3, 4]
>> +#endif
>>  .Lset32bytes:
>>  
>>  	and.f	lp_count, r2, 0x1F ;Last remaining 31 bytes
>>
diff mbox

Patch

diff --git a/libc/string/arc/arcv2/memset.S b/libc/string/arc/arcv2/memset.S
index d076ad1..0918d37 100644
--- a/libc/string/arc/arcv2/memset.S
+++ b/libc/string/arc/arcv2/memset.S
@@ -52,6 +52,7 @@  ENTRY(memset)
 	lpnz	@.Lset64bytes
 	;; LOOP START
 	PREWRITE(r3, 64)	;Prefetch the next write location
+#ifdef __LL64__
 	std.ab	r4, [r3, 8]
 	std.ab	r4, [r3, 8]
 	std.ab	r4, [r3, 8]
@@ -60,16 +61,45 @@  ENTRY(memset)
 	std.ab	r4, [r3, 8]
 	std.ab	r4, [r3, 8]
 	std.ab	r4, [r3, 8]
+#else
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+#endif
 .Lset64bytes:
 
 	lsr.f	lp_count, r2, 5 ;Last remaining  max 124 bytes
 	lpnz	.Lset32bytes
 	;; LOOP START
 	prefetchw [r3, 32]	;Prefetch the next write location
+#ifdef __LL64__
 	std.ab	r4, [r3, 8]
 	std.ab	r4, [r3, 8]
 	std.ab	r4, [r3, 8]
 	std.ab	r4, [r3, 8]
+#else
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+#endif
 .Lset32bytes:
 
 	and.f	lp_count, r2, 0x1F ;Last remaining 31 bytes