diff mbox series

[5/5] ARCv2: LIB: MEMCPY: fixed and optimised routine

Message ID 20190129104942.31705-6-Eugeniy.Paltsev@synopsys.com
State New
Headers show
Series introduce unaligned access under a Kconfig option | expand

Commit Message

Eugeniy Paltsev Jan. 29, 2019, 10:49 a.m. UTC
Optimise code to use efficient unaligned memory access which is
available on ARCv2. This allows us to really simplify memcpy code
and speed up the code one and a half times (in case of unaligned
source or destination).

Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
---
 arch/arc/Kconfig                      |  4 +++
 arch/arc/lib/Makefile                 |  5 +++-
 arch/arc/lib/memcpy-archs-unaligned.S | 46 +++++++++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 1 deletion(-)
 create mode 100644 arch/arc/lib/memcpy-archs-unaligned.S

Comments

Vineet Gupta Jan. 29, 2019, 9:55 p.m. UTC | #1
On 1/29/19 2:49 AM, Eugeniy Paltsev wrote:
> Optimise code to use efficient unaligned memory access which is
> available on ARCv2. This allows us to really simplify memcpy code
> and speed up the code one and a half times (in case of unaligned
> source or destination).
>
> Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
> ---
>  arch/arc/Kconfig                      |  4 +++
>  arch/arc/lib/Makefile                 |  5 +++-
>  arch/arc/lib/memcpy-archs-unaligned.S | 46 +++++++++++++++++++++++++++++++++++
>  3 files changed, 54 insertions(+), 1 deletion(-)
>  create mode 100644 arch/arc/lib/memcpy-archs-unaligned.S
>
> diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
> index a1d976c612a6..88f1a3205b8f 100644
> --- a/arch/arc/Kconfig
> +++ b/arch/arc/Kconfig
> @@ -396,6 +396,10 @@ config ARC_USE_UNALIGNED_MEM_ACCESS
>  	  which is disabled by default. Enable unaligned access in
>  	  hardware and use it in software.
>  
> +#dummy symbol for using in makefile
> +config ARC_NO_UNALIGNED_MEM_ACCESS
> +	def_bool !ARC_USE_UNALIGNED_MEM_ACCESS
> +

Not needed - you can use the kconfig symbols in Makefile.
See arch/arc/kernel/Makefile

>  config ARC_HAS_LL64
>  	bool "Insn: 64bit LDD/STD"
>  	help
> diff --git a/arch/arc/lib/Makefile b/arch/arc/lib/Makefile
> index b1656d156097..59cc8b61342e 100644
> --- a/arch/arc/lib/Makefile
> +++ b/arch/arc/lib/Makefile
> @@ -8,4 +8,7 @@
>  lib-y	:= strchr-700.o strcpy-700.o strlen.o memcmp.o
>  
>  lib-$(CONFIG_ISA_ARCOMPACT)	+= memcpy-700.o memset.o strcmp.o
> -lib-$(CONFIG_ISA_ARCV2)		+= memcpy-archs.o memset-archs.o strcmp-archs.o
> +lib-$(CONFIG_ISA_ARCV2)		+= memset-archs.o strcmp-archs.o
> +
> +lib-$(CONFIG_ARC_NO_UNALIGNED_MEM_ACCESS)	+= memcpy-archs.o
> +lib-$(CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS)	+= memcpy-archs-unaligned.o

ifdef CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS
lib-$(CONFIG_ISA_ARCV2) +=memcpy-archs-unaligned.o
else
lib-$(CONFIG_ISA_ARCV2) +=memcpy-archs.o
endif

> diff --git a/arch/arc/lib/memcpy-archs-unaligned.S b/arch/arc/lib/memcpy-archs-unaligned.S
> new file mode 100644
> index 000000000000..e09b51d4de70
> --- /dev/null
> +++ b/arch/arc/lib/memcpy-archs-unaligned.S
> @@ -0,0 +1,46 @@
> +/* SPDX-License-Identifier: GPL-2.0+ */
> +//
> +// ARCv2 memcpy implementation optimized for unaligned memory access using.
> +//
> +// Copyright (C) 2019 Synopsys
> +// Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
> +
> +#include <linux/linkage.h>
> +
> +#ifdef CONFIG_ARC_HAS_LL64
> +# define LOADX(DST,RX)		ldd.ab	DST, [RX, 8]
> +# define STOREX(SRC,RX)		std.ab	SRC, [RX, 8]
> +# define ZOLSHFT		5
> +# define ZOLAND			0x1F
> +#else
> +# define LOADX(DST,RX)		ld.ab	DST, [RX, 4]
> +# define STOREX(SRC,RX)		st.ab	SRC, [RX, 4]
> +# define ZOLSHFT		4
> +# define ZOLAND			0xF
> +#endif
> +
> +ENTRY_CFI(memcpy)
> +	mov	r3, r0		; don;t clobber ret val
> +
> +	lsr.f	lp_count, r2, ZOLSHFT
> +	lpnz	@.Lcopy32_64bytes
> +	;; LOOP START
> +	LOADX	(r6, r1)
> +	LOADX	(r8, r1)
> +	LOADX	(r10, r1)
> +	LOADX	(r4, r1)
> +	STOREX	(r6, r3)
> +	STOREX	(r8, r3)
> +	STOREX	(r10, r3)
> +	STOREX	(r4, r3)
> +.Lcopy32_64bytes:
> +
> +	and.f	lp_count, r2, ZOLAND ;Last remaining 31 bytes
> +	lpnz	@.Lcopyremainingbytes
> +	;; LOOP START
> +	ldb.ab	r5, [r1, 1]
> +	stb.ab	r5, [r3, 1]



> +.Lcopyremainingbytes:
> +
> +	j	[blink]
> +END_CFI(memcpy)
diff mbox series

Patch

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index a1d976c612a6..88f1a3205b8f 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -396,6 +396,10 @@  config ARC_USE_UNALIGNED_MEM_ACCESS
 	  which is disabled by default. Enable unaligned access in
 	  hardware and use it in software.
 
+#dummy symbol for using in makefile
+config ARC_NO_UNALIGNED_MEM_ACCESS
+	def_bool !ARC_USE_UNALIGNED_MEM_ACCESS
+
 config ARC_HAS_LL64
 	bool "Insn: 64bit LDD/STD"
 	help
diff --git a/arch/arc/lib/Makefile b/arch/arc/lib/Makefile
index b1656d156097..59cc8b61342e 100644
--- a/arch/arc/lib/Makefile
+++ b/arch/arc/lib/Makefile
@@ -8,4 +8,7 @@ 
 lib-y	:= strchr-700.o strcpy-700.o strlen.o memcmp.o
 
 lib-$(CONFIG_ISA_ARCOMPACT)	+= memcpy-700.o memset.o strcmp.o
-lib-$(CONFIG_ISA_ARCV2)		+= memcpy-archs.o memset-archs.o strcmp-archs.o
+lib-$(CONFIG_ISA_ARCV2)		+= memset-archs.o strcmp-archs.o
+
+lib-$(CONFIG_ARC_NO_UNALIGNED_MEM_ACCESS)	+= memcpy-archs.o
+lib-$(CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS)	+= memcpy-archs-unaligned.o
diff --git a/arch/arc/lib/memcpy-archs-unaligned.S b/arch/arc/lib/memcpy-archs-unaligned.S
new file mode 100644
index 000000000000..e09b51d4de70
--- /dev/null
+++ b/arch/arc/lib/memcpy-archs-unaligned.S
@@ -0,0 +1,46 @@ 
+/* SPDX-License-Identifier: GPL-2.0+ */
+//
+// ARCv2 memcpy implementation optimized for unaligned memory access using.
+//
+// Copyright (C) 2019 Synopsys
+// Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+
+#include <linux/linkage.h>
+
+#ifdef CONFIG_ARC_HAS_LL64
+# define LOADX(DST,RX)		ldd.ab	DST, [RX, 8]
+# define STOREX(SRC,RX)		std.ab	SRC, [RX, 8]
+# define ZOLSHFT		5
+# define ZOLAND			0x1F
+#else
+# define LOADX(DST,RX)		ld.ab	DST, [RX, 4]
+# define STOREX(SRC,RX)		st.ab	SRC, [RX, 4]
+# define ZOLSHFT		4
+# define ZOLAND			0xF
+#endif
+
+ENTRY_CFI(memcpy)
+	mov	r3, r0		; don;t clobber ret val
+
+	lsr.f	lp_count, r2, ZOLSHFT
+	lpnz	@.Lcopy32_64bytes
+	;; LOOP START
+	LOADX	(r6, r1)
+	LOADX	(r8, r1)
+	LOADX	(r10, r1)
+	LOADX	(r4, r1)
+	STOREX	(r6, r3)
+	STOREX	(r8, r3)
+	STOREX	(r10, r3)
+	STOREX	(r4, r3)
+.Lcopy32_64bytes:
+
+	and.f	lp_count, r2, ZOLAND ;Last remaining 31 bytes
+	lpnz	@.Lcopyremainingbytes
+	;; LOOP START
+	ldb.ab	r5, [r1, 1]
+	stb.ab	r5, [r3, 1]
+.Lcopyremainingbytes:
+
+	j	[blink]
+END_CFI(memcpy)