=====================================================================
length=256, char=0: 139.96 (-698.28%) 9.07 ( 48.26%) 17.53
length=257, char=0: 140.50 (-699.03%) 9.53 ( 45.80%) 17.58
length=258, char=0: 140.96 (-703.95%) 9.58 ( 45.36%) 17.53
length=259, char=0: 141.56 (-705.16%) 9.53 ( 45.79%) 17.58
length=260, char=0: 142.15 (-710.76%) 9.57 ( 45.39%) 17.53
length=261, char=0: 142.50 (-710.39%) 9.53 ( 45.78%) 17.58
length=262, char=0: 142.97 (-715.09%) 9.57 ( 45.42%) 17.54
length=263, char=0: 143.51 (-716.18%) 9.53 ( 45.80%) 17.58
length=264, char=0: 143.93 (-720.55%) 9.58 ( 45.39%) 17.54
length=265, char=0: 144.56 (-722.07%) 9.53 ( 45.80%) 17.59
length=266, char=0: 144.98 (-726.42%) 9.58 ( 45.42%) 17.54
length=267, char=0: 145.53 (-727.53%) 9.53 ( 45.80%) 17.59
length=268, char=0: 146.25 (-731.81%) 9.53 ( 45.79%) 17.58
length=269, char=0: 146.52 (-735.39%) 9.53 ( 45.66%) 17.54
length=270, char=0: 146.97 (-735.81%) 9.53 ( 45.80%) 17.58
length=271, char=0: 147.54 (-741.08%) 9.58 ( 45.38%) 17.54
length=512, char=0: 268.26 (-1307.85%) 12.06 ( 36.71%) 19.05
length=513, char=0: 268.73 (-1273.89%) 13.56 ( 30.68%) 19.56
length=514, char=0: 269.31 (-1276.89%) 13.56 ( 30.68%) 19.56
length=515, char=0: 269.73 (-1279.05%) 13.56 ( 30.68%) 19.56
length=516, char=0: 270.34 (-1282.24%) 13.56 ( 30.67%) 19.56
length=517, char=0: 270.83 (-1284.71%) 13.56 ( 30.66%) 19.56
length=518, char=0: 271.20 (-1286.54%) 13.56 ( 30.67%) 19.56
length=519, char=0: 271.67 (-1288.67%) 13.65 ( 30.24%) 19.56
length=520, char=0: 272.14 (-1291.04%) 13.65 ( 30.22%) 19.56
length=521, char=0: 272.66 (-1293.69%) 13.65 ( 30.23%) 19.56
length=522, char=0: 273.14 (-1296.13%) 13.65 ( 30.20%) 19.56
length=523, char=0: 273.64 (-1298.75%) 13.65 ( 30.23%) 19.56
length=524, char=0: 274.34 (-1302.16%) 13.66 ( 30.20%) 19.57
length=525, char=0: 274.64 (-1297.78%) 13.56 ( 30.99%) 19.65
length=526, char=0: 275.20 (-1300.04%) 13.56 ( 31.01%) 19.66
length=527, char=0: 275.66 (-1302.86%) 13.56 ( 30.99%) 19.65
length=1024, char=0: 524.46 (-2169.75%) 20.12 ( 12.92%) 23.11
length=1025, char=0: 525.14 (-2124.63%) 21.62 ( 8.40%) 23.61
length=1026, char=0: 525.59 (-2125.36%) 21.88 ( 7.37%) 23.62
length=1027, char=0: 525.98 (-2127.14%) 21.62 ( 8.46%) 23.62
length=1028, char=0: 526.68 (-2131.10%) 21.62 ( 8.42%) 23.61
length=1029, char=0: 527.10 (-2131.70%) 21.79 ( 7.73%) 23.62
length=1030, char=0: 527.54 (-2118.51%) 21.62 ( 9.10%) 23.78
length=1031, char=0: 527.98 (-2136.37%) 21.62 ( 8.43%) 23.61
length=1032, char=0: 528.70 (-2139.38%) 21.62 ( 8.43%) 23.61
length=1033, char=0: 529.25 (-2124.37%) 21.62 ( 9.11%) 23.79
length=1034, char=0: 529.48 (-2142.95%) 21.62 ( 8.43%) 23.61
length=1035, char=0: 530.11 (-2145.13%) 21.62 ( 8.44%) 23.61
length=1036, char=0: 530.76 (-2147.10%) 21.79 ( 7.73%) 23.62
length=1037, char=0: 531.03 (-2149.45%) 21.62 ( 8.42%) 23.61
length=1038, char=0: 531.64 (-2151.87%) 21.62 ( 8.42%) 23.61
length=1039, char=0: 531.99 (-2151.63%) 21.80 ( 7.75%) 23.63
* sysdeps/aarch64/memset-reg.h: New file.
* sysdeps/aarch64/memset.S: Use it.
(__memset): Rename to MEMSET macro.
[ZVA_MACRO]: Use zva_macro.
* sysdeps/aarch64/multiarch/Makefile (sysdep_routines):
Add memset_generic and memset_falkor.
* sysdeps/aarch64/multiarch/ifunc-impl-list.c
(__libc_ifunc_impl_list): Add memset ifuncs.
* sysdeps/aarch64/multiarch/init-arch.h (INIT_ARCH): New
local variable zva_size.
* sysdeps/aarch64/multiarch/memset.c: New file.
* sysdeps/aarch64/multiarch/memset_generic.S: New file.
* sysdeps/aarch64/multiarch/memset_falkor.S: New file.
* sysdeps/aarch64/multiarch/rtld-memset.S: New file.
* sysdeps/unix/sysv/linux/aarch64/cpu-features.c
(DCZID_DZP_MASK): New macro.
(DCZID_BS_MASK): Likewise.
(init_cpu_features): Read and set zva_size.
* sysdeps/unix/sysv/linux/aarch64/cpu-features.h
(struct cpu_features): New member zva_size.
---
sysdeps/aarch64/memset-reg.h | 30 +++++++++++++++
sysdeps/aarch64/memset.S | 27 +++++--------
sysdeps/aarch64/multiarch/Makefile | 2 +-
sysdeps/aarch64/multiarch/ifunc-impl-list.c | 5 +++
sysdeps/aarch64/multiarch/init-arch.h | 8 ++--
sysdeps/aarch64/multiarch/memset.c | 40 +++++++++++++++++++
sysdeps/aarch64/multiarch/memset_falkor.S | 53 ++++++++++++++++++++++++++
sysdeps/aarch64/multiarch/memset_generic.S | 27 +++++++++++++
sysdeps/aarch64/multiarch/rtld-memset.S | 23 +++++++++++
sysdeps/unix/sysv/linux/aarch64/cpu-features.c | 10 +++++
sysdeps/unix/sysv/linux/aarch64/cpu-features.h | 1 +
11 files changed, 204 insertions(+), 22 deletions(-)
create mode 100644 sysdeps/aarch64/memset-reg.h
create mode 100644 sysdeps/aarch64/multiarch/memset.c
create mode 100644 sysdeps/aarch64/multiarch/memset_falkor.S
create mode 100644 sysdeps/aarch64/multiarch/memset_generic.S
create mode 100644 sysdeps/aarch64/multiarch/rtld-memset.S
new file mode 100644
@@ -0,0 +1,30 @@
+/* Register aliases for memset to be used across implementations.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define dstin x0
+#define val x1
+#define valw w1
+#define count x2
+#define dst x3
+#define dstend x4
+#define tmp1 x5
+#define tmp1w w5
+#define tmp2 x6
+#define tmp2w w6
+#define zva_len x7
+#define zva_lenw w7
@@ -17,6 +17,7 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#include "memset-reg.h"
/* Assumptions:
*
@@ -24,20 +25,7 @@
*
*/
-#define dstin x0
-#define val x1
-#define valw w1
-#define count x2
-#define dst x3
-#define dstend x4
-#define tmp1 x5
-#define tmp1w w5
-#define tmp2 x6
-#define tmp2w w6
-#define zva_len x7
-#define zva_lenw w7
-
-ENTRY_ALIGN (__memset, 6)
+ENTRY_ALIGN (MEMSET, 6)
DELOUSE (0)
DELOUSE (2)
@@ -108,8 +96,11 @@ L(tail64):
stp q0, q0, [dstend, -32]
ret
- .p2align 3
L(try_zva):
+#ifdef ZVA_MACRO
+ zva_macro
+#else
+ .p2align 3
mrs tmp1, dczid_el0
tbnz tmp1w, 4, L(no_zva)
and tmp1w, tmp1w, 15
@@ -189,7 +180,7 @@ L(zva_other):
b.hs 3b
4: add count, count, zva_len
b L(tail64)
+#endif
-END (__memset)
-weak_alias (__memset, memset)
-libc_hidden_builtin_def (memset)
+END (MEMSET)
+libc_hidden_builtin_def (MEMSET)
@@ -1,4 +1,4 @@
ifeq ($(subdir),string)
sysdep_routines += memcpy_generic memcpy_thunderx memcpy_falkor \
- memmove_falkor
+ memmove_falkor memset_generic memset_falkor
endif
@@ -46,6 +46,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_thunderx)
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_falkor)
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic))
+ IFUNC_IMPL (i, name, memset,
+ /* Enable this on non-falkor processors too so that other cores
+ can do a comparative analysis with __memset_generic. */
+ IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_falkor)
+ IFUNC_IMPL_ADD (array, i, memset, 1, __memset_generic))
return i;
}
@@ -18,6 +18,8 @@
#include <ldsodefs.h>
-#define INIT_ARCH() \
- uint64_t __attribute__((unused)) midr = \
- GLRO(dl_aarch64_cpu_features).midr_el1;
+#define INIT_ARCH() \
+ uint64_t __attribute__((unused)) midr = \
+ GLRO(dl_aarch64_cpu_features).midr_el1; \
+ unsigned __attribute__((unused)) zva_size = \
+ GLRO(dl_aarch64_cpu_features).zva_size;
new file mode 100644
@@ -0,0 +1,40 @@
+/* Multiple versions of memset. AARCH64 version.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+
+#if IS_IN (libc)
+/* Redefine memset so that the compiler won't complain about the type
+ mismatch with the IFUNC selector in strong_alias, below. */
+# undef memset
+# define memset __redirect_memset
+# include <string.h>
+# include <init-arch.h>
+
+extern __typeof (__redirect_memset) __libc_memset;
+
+extern __typeof (__redirect_memset) __memset_falkor attribute_hidden;
+extern __typeof (__redirect_memset) __memset_generic attribute_hidden;
+
+libc_ifunc (__libc_memset, (IS_FALKOR (midr) && zva_size == 64
+ ? __memset_falkor
+ : __memset_generic));
+
+# undef memset
+strong_alias (__libc_memset, memset);
+#endif
new file mode 100644
@@ -0,0 +1,53 @@
+/* Memset for falkor.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <memset-reg.h>
+
+/* Reading dczid_el0 is expensive on falkor so move it into the ifunc
+ resolver and assume ZVA size of 64 bytes. The IFUNC resolver takes care to
+ use this function only when ZVA is enabled. */
+
+#if IS_IN (libc)
+.macro zva_macro
+ .p2align 4
+ /* Write the first and last 64 byte aligned block using stp rather
+ than using DC ZVA. This is faster on some cores. */
+ str q0, [dst, 16]
+ stp q0, q0, [dst, 32]
+ bic dst, dst, 63
+ stp q0, q0, [dst, 64]
+ stp q0, q0, [dst, 96]
+ sub count, dstend, dst /* Count is now 128 too large. */
+ sub count, count, 128+64+64 /* Adjust count and bias for loop. */
+ add dst, dst, 128
+1: dc zva, dst
+ add dst, dst, 64
+ subs count, count, 64
+ b.hi 1b
+ stp q0, q0, [dst, 0]
+ stp q0, q0, [dst, 32]
+ stp q0, q0, [dstend, -64]
+ stp q0, q0, [dstend, -32]
+ ret
+.endm
+
+# define ZVA_MACRO zva_macro
+# define MEMSET __memset_falkor
+# include <sysdeps/aarch64/memset.S>
+#endif
new file mode 100644
@@ -0,0 +1,27 @@
+/* Memset for aarch64, default version for internal use.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define MEMSET __memset_generic
+/* Add a hidden definition for use within libc.so. */
+# ifdef SHARED
+ .globl __GI_memset; __GI_memset = __memset_generic
+# endif
+# include <sysdeps/aarch64/memset.S>
+#endif
new file mode 100644
@@ -0,0 +1,23 @@
+/* Memset for aarch64, for the dynamic linker.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (rtld)
+# define MEMSET memset
+# include <sysdeps/aarch64/memset.S>
+#endif
@@ -20,6 +20,9 @@
#include <sys/auxv.h>
#include <elf/dl-hwcaps.h>
+#define DCZID_DZP_MASK (1 << 4)
+#define DCZID_BS_MASK (0xf)
+
#if HAVE_TUNABLES
struct cpu_list
{
@@ -72,4 +75,11 @@ init_cpu_features (struct cpu_features *cpu_features)
}
cpu_features->midr_el1 = midr;
+
+ /* Check if ZVA is enabled. */
+ unsigned dczid;
+ asm volatile ("mrs %0, dczid_el0" : "=r"(dczid));
+
+ if ((dczid & DCZID_DZP_MASK) == 0)
+ cpu_features->zva_size = 4 << (dczid & DCZID_BS_MASK);
}
@@ -47,6 +47,7 @@
struct cpu_features
{
uint64_t midr_el1;
+ unsigned zva_size;
};
#endif /* _CPU_FEATURES_AARCH64_H */