diff mbox series

[v4,2/3] arm64: memset-arm64: Use simple memset when cache is disabled

Message ID 20210812144751.2563707-3-sr@denx.de
State Superseded
Delegated to: Tom Rini
Headers show
Series arm64: Add optimized memset/memcpy/memove functions | expand

Commit Message

Stefan Roese Aug. 12, 2021, 2:47 p.m. UTC
The optimized memset uses the dc opcode, which causes problems when the
cache is disabled. This patch adds a check if the cache is disabled and
uses a very simple memset implementation in this case. Otherwise the
optimized version is used.

Signed-off-by: Stefan Roese <sr@denx.de>

---

Changes in v4:
- Use macros instead of register names, following the optimized code
- Add zero size check

Changes in v2:
- New patch

 arch/arm/lib/memset-arm64.S | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
diff mbox series

Patch

diff --git a/arch/arm/lib/memset-arm64.S b/arch/arm/lib/memset-arm64.S
index 710f6f582cad..ee9f9a96cfe6 100644
--- a/arch/arm/lib/memset-arm64.S
+++ b/arch/arm/lib/memset-arm64.S
@@ -11,6 +11,7 @@ 
  *
  */
 
+#include <asm/macro.h>
 #include "asmdefs.h"
 
 #define dstin	x0
@@ -25,6 +26,37 @@  ENTRY (memset)
 	PTR_ARG (0)
 	SIZE_ARG (2)
 
+	/*
+	 * The optimized memset uses the dc opcode, which causes problems
+	 * when the cache is disabled. Let's check if the cache is disabled
+	 * and use a very simple memset implementation in this case. Otherwise
+	 * jump to the optimized version.
+	 */
+	switch_el x6, 3f, 2f, 1f
+3:	mrs	x6, sctlr_el3
+	b	0f
+2:	mrs	x6, sctlr_el2
+	b	0f
+1:	mrs	x6, sctlr_el1
+0:
+	tst	x6, #CR_C
+	bne	9f
+
+	/*
+	 * A very "simple" memset implementation without the use of the
+	 * dc opcode. Can be run with caches disabled.
+	 */
+	mov	x3, #0x0
+	cmp	count, x3	/* check for zero length */
+	beq	8f
+4:	strb	valw, [dstin, x3]
+	add	x3, x3, #0x1
+	cmp	count, x3
+	bne	4b
+8:	ret
+9:
+
+	/* Here the optimized memset version starts */
 	dup	v0.16B, valw
 	add	dstend, dstin, count