Message ID | 10c41e3f-d9ea-184d-4580-1beac97fb2dd@linaro.org |
---|---|
State | New |
Headers | show |
I was working on some of other functions and noticed the following error when I ran `make check` with the original patch (I didn't try Adhemerval's): test-canon2.c:(.text+0x33): undefined reference to `mempcpy' On Mon, May 29, 2017 at 1:34 PM, Adhemerval Zanella <adhemerval.zanella@linaro.org> wrote: > > > On 28/05/2017 13:26, H.J. Lu wrote: >> On Thu, May 25, 2017 at 2:55 PM, Adhemerval Zanella >> <adhemerval.zanella@linaro.org> wrote: >>> >>> >>> On 25/05/2017 18:38, H.J. Lu wrote: >>>> On Thu, May 25, 2017 at 2:25 PM, Erich Elsen <eriche@google.com> wrote: >>>>> Ok, I'll get started then. >>>>> >>>>> Are there any general comments about the attached conversion for >>>>> memcpy? Just so I don't repeat the same wrong thing many times. >>>> >>>> You missed: >>>> >>>> /* Define multiple versions only for the definition in lib and for >>>> DSO. In static binaries we need memcpy before the initialization >>>> happened. */ >>>> #if defined SHARED && IS_IN (libc) >>>> >>>> +typedef void * (*memcpy_fn)(void *, const void *, size_t); >>>> + >>>> +extern void * __memcpy_erms(void *dest, const void *src, size_t n); >>>> +extern void * __memcpy_sse2_unaligned(void *dest, const void *src, size_t n); >>>> +extern void * __memcpy_sse2_unaligned_erms(void *dest, const void >>>> *src, size_t n); >>>> +extern void * __memcpy_ssse3(void *dest, const void *src, size_t n); >>>> +extern void * __memcpy_ssse3_back(void *dest, const void *src, size_t n); >>>> +extern void * __memcpy_avx_unaligned(void *dest, const void *src, size_t n); >>>> +extern void * __memcpy_avx_unaligned_erms(void *dest, const void >>>> *src, size_t n); >>>> +extern void * __memcpy_avx512_unaligned(void *dest, const void *src, size_t n); >>>> +extern void * __memcpy_avx512_unaligned_erms(void *dest, const void >>>> *src, size_t n); >>>> >>>> Please use something similar to multiarch/strstr.c: >>>> >>>> /* Redefine strstr so that the compiler won't complain about the type >>>> mismatch with the IFUNC selector in strong_alias, below. */ >>>> #undef strstr >>>> #define strstr __redirect_strstr >>>> #include <string.h> >>>> #undef strstr >>>> ... >>>> extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden; >>>> >>>> +/* Defined in cacheinfo.c */ >>>> +extern long int __x86_shared_cache_size attribute_hidden; >>>> +extern long int __x86_shared_cache_size_half attribute_hidden; >>>> +extern long int __x86_data_cache_size attribute_hidden; >>>> +extern long int __x86_data_cache_size_half attribute_hidden; >>>> +extern long int __x86_shared_non_temporal_threshold attribute_hidden; >>> >>> It seems it will be used not only for memcpy, so I would suggest to add >>> on a common header on multiarch. >>> >>>> >>>> Remove them. >>>> static void * select_memcpy_impl(void) { >>>> + const struct cpu_features* cpu_features_struct_p = __get_cpu_features (); >>>> + >>>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Prefer_ERMS)) { >>>> + return __memcpy_erms; >>>> + } >>>> + >>>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, AVX512F_Usable)) { >>>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Prefer_No_VZEROUPPER)) >>>> + return __memcpy_avx512_unaligned_erms; >>>> + return __memcpy_avx512_unaligned; >>>> + } >>>> + >>>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, AVX_Fast_Unaligned_Load)) { >>>> + if (CPU_FEATURES_CPU_P(cpu_features_struct_p, ERMS)) { >>>> + return __memcpy_avx_unaligned_erms; >>>> + >>>> + } >>>> + return __memcpy_avx_unaligned; >>>> + } >>>> + else { >>>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Fast_Unaligned_Copy)) { >>>> + if (CPU_FEATURES_CPU_P(cpu_features_struct_p, ERMS)) { >>>> + return __memcpy_sse2_unaligned_erms; >>>> + >>>> + } >>>> + return __memcpy_sse2_unaligned; >>>> + } >>>> + else { >>>> + if (!CPU_FEATURES_CPU_P(cpu_features_struct_p, SSSE3)) { >>>> + return __memcpy_sse2_unaligned; >>>> + >>>> + } >>>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Fast_Copy_Backward)) { >>>> + return __memcpy_ssse3_back; >>>> + >>>> + } >>>> + return __memcpy_ssse3; >>>> + } >>>> + } >>>> +} >>>> >>>> Please >>>> >>>> 1. Fix formatting. >>>> 2. Remove unnecessary {}. >>>> 3. Don't use "else". >>>> >>>> +void *__new_memcpy(void *dest, const void *src, size_t n) >>>> + __attribute__ ((ifunc ("select_memcpy_impl"))); >>>> >>>> Use "typeof" here. >>> >>> We have the libc_ifunc{_redirect} to handle the __attribute__ ((ifunc)) support >>> from compiler. I think you can use: >>> >>> # include <string.h> >>> >>> // extern __typeof (memcpy) __memcpy_<each supported one> attribute_hidden; >>> >>> static void *memcpy_selector (void) >>> { >>> // fill me. >>> } >>> >>> libc_ifunc_hidden (memcpy, memcpy, memcpy_selector); >>> libc_hidden_def (memcpy) >> >> Here is my take. It only covers memcpy and mempcpy. Please >> extend it to memmove as well as *_chk functions. >> > > I think we can simplify it further and use the already existent ifunc macros on > libc-symbols.h. Also, for memmove I think we can organize the code better (at > least for ifunc) and build a extra object with a more meaningful name. I used > your logic for the ifunc selection and extended for memmove as well. > > diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile > index 3736f54..b6179aa 100644 > --- a/sysdeps/x86_64/multiarch/Makefile > +++ b/sysdeps/x86_64/multiarch/Makefile > @@ -7,6 +7,7 @@ ifeq ($(subdir),string) > sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \ > strcmp-sse2-unaligned strncmp-ssse3 \ > memcmp-sse4 memcpy-ssse3 \ > + mem-impls \ > memmove-ssse3 \ > memcpy-ssse3-back \ > memmove-ssse3-back \ > diff --git a/sysdeps/x86_64/multiarch/memmove.S b/sysdeps/x86_64/multiarch/mem-impls.S > similarity index 52% > rename from sysdeps/x86_64/multiarch/memmove.S > rename to sysdeps/x86_64/multiarch/mem-impls.S > index 8c534e8..5e74fa0 100644 > --- a/sysdeps/x86_64/multiarch/memmove.S > +++ b/sysdeps/x86_64/multiarch/mem-impls.S > @@ -1,6 +1,5 @@ > -/* Multiple versions of memmove > - All versions must be listed in ifunc-impl-list.c. > - Copyright (C) 2016-2017 Free Software Foundation, Inc. > +/* Multiple versions of memmove, memcpy, and mempcpy. > + Copyright (C) 2017 Free Software Foundation, Inc. > This file is part of the GNU C Library. > > The GNU C Library is free software; you can redistribute it and/or > @@ -17,57 +16,6 @@ > License along with the GNU C Library; if not, see > <http://www.gnu.org/licenses/>. */ > > -#include <sysdep.h> > -#include <init-arch.h> > - > -/* Define multiple versions only for the definition in lib and for > - DSO. */ > -#if IS_IN (libc) > - .text > -ENTRY(__libc_memmove) > - .type __libc_memmove, @gnu_indirect_function > - LOAD_RTLD_GLOBAL_RO_RDX > - lea __memmove_erms(%rip), %RAX_LP > - HAS_ARCH_FEATURE (Prefer_ERMS) > - jnz 2f > - HAS_ARCH_FEATURE (Prefer_No_AVX512) > - jnz 1f > - HAS_ARCH_FEATURE (AVX512F_Usable) > - jz 1f > - lea __memmove_avx512_no_vzeroupper(%rip), %RAX_LP > - HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) > - jnz 2f > - lea __memmove_avx512_unaligned_erms(%rip), %RAX_LP > - HAS_CPU_FEATURE (ERMS) > - jnz 2f > - lea __memmove_avx512_unaligned(%rip), %RAX_LP > - ret > -1: lea __memmove_avx_unaligned(%rip), %RAX_LP > - HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) > - jz L(Fast_Unaligned_Load) > - HAS_CPU_FEATURE (ERMS) > - jz 2f > - lea __memmove_avx_unaligned_erms(%rip), %RAX_LP > - ret > -L(Fast_Unaligned_Load): > - lea __memmove_sse2_unaligned(%rip), %RAX_LP > - HAS_ARCH_FEATURE (Fast_Unaligned_Copy) > - jz L(SSSE3) > - HAS_CPU_FEATURE (ERMS) > - jz 2f > - lea __memmove_sse2_unaligned_erms(%rip), %RAX_LP > - ret > -L(SSSE3): > - HAS_CPU_FEATURE (SSSE3) > - jz 2f > - lea __memmove_ssse3_back(%rip), %RAX_LP > - HAS_ARCH_FEATURE (Fast_Copy_Backward) > - jnz 2f > - lea __memmove_ssse3(%rip), %RAX_LP > -2: ret > -END(__libc_memmove) > -#endif > - > #if IS_IN (libc) > # define MEMMOVE_SYMBOL(p,s) p##_sse2_##s > > @@ -82,15 +30,16 @@ libc_hidden_ver (__mempcpy_sse2_unaligned, __mempcpy) > The speedup we get from using SSE2 instructions is likely eaten away > by the indirect call in the PLT. */ > # define libc_hidden_builtin_def > +# else > +strong_alias (__memmove_sse2_unaligned, memmove) > # endif > -strong_alias (__libc_memmove, memmove) > #endif > > #if !defined SHARED || !IS_IN (libc) > weak_alias (__mempcpy, mempcpy) > #endif > > -#include "../memmove.S" > +#include <sysdeps/x86_64/memmove.S> > > #if defined SHARED && IS_IN (libc) > # include <shlib-compat.h> > diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S > deleted file mode 100644 > index af27703..0000000 > --- a/sysdeps/x86_64/multiarch/memcpy.S > +++ /dev/null > @@ -1,75 +0,0 @@ > -/* Multiple versions of memcpy > - All versions must be listed in ifunc-impl-list.c. > - Copyright (C) 2010-2017 Free Software Foundation, Inc. > - Contributed by Intel Corporation. > - This file is part of the GNU C Library. > - > - The GNU C Library is free software; you can redistribute it and/or > - modify it under the terms of the GNU Lesser General Public > - License as published by the Free Software Foundation; either > - version 2.1 of the License, or (at your option) any later version. > - > - The GNU C Library is distributed in the hope that it will be useful, > - but WITHOUT ANY WARRANTY; without even the implied warranty of > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - Lesser General Public License for more details. > - > - You should have received a copy of the GNU Lesser General Public > - License along with the GNU C Library; if not, see > - <http://www.gnu.org/licenses/>. */ > - > -#include <sysdep.h> > -#include <init-arch.h> > - > -/* Define multiple versions only for the definition in lib and for > - DSO. In static binaries we need memcpy before the initialization > - happened. */ > -#if defined SHARED && IS_IN (libc) > - .text > -ENTRY(__new_memcpy) > - .type __new_memcpy, @gnu_indirect_function > - LOAD_RTLD_GLOBAL_RO_RDX > - lea __memcpy_erms(%rip), %RAX_LP > - HAS_ARCH_FEATURE (Prefer_ERMS) > - jnz 2f > - HAS_ARCH_FEATURE (Prefer_No_AVX512) > - jnz 1f > - HAS_ARCH_FEATURE (AVX512F_Usable) > - jz 1f > - lea __memcpy_avx512_no_vzeroupper(%rip), %RAX_LP > - HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) > - jnz 2f > - lea __memcpy_avx512_unaligned_erms(%rip), %RAX_LP > - HAS_CPU_FEATURE (ERMS) > - jnz 2f > - lea __memcpy_avx512_unaligned(%rip), %RAX_LP > - ret > -1: lea __memcpy_avx_unaligned(%rip), %RAX_LP > - HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) > - jz L(Fast_Unaligned_Load) > - HAS_CPU_FEATURE (ERMS) > - jz 2f > - lea __memcpy_avx_unaligned_erms(%rip), %RAX_LP > - ret > -L(Fast_Unaligned_Load): > - lea __memcpy_sse2_unaligned(%rip), %RAX_LP > - HAS_ARCH_FEATURE (Fast_Unaligned_Copy) > - jz L(SSSE3) > - HAS_CPU_FEATURE (ERMS) > - jz 2f > - lea __memcpy_sse2_unaligned_erms(%rip), %RAX_LP > - ret > -L(SSSE3): > - HAS_CPU_FEATURE (SSSE3) > - jz 2f > - lea __memcpy_ssse3_back(%rip), %RAX_LP > - HAS_ARCH_FEATURE (Fast_Copy_Backward) > - jnz 2f > - lea __memcpy_ssse3(%rip), %RAX_LP > -2: ret > -END(__new_memcpy) > - > -# undef memcpy > -# include <shlib-compat.h> > -versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14); > -#endif > diff --git a/sysdeps/x86_64/multiarch/memcpy.c b/sysdeps/x86_64/multiarch/memcpy.c > new file mode 100644 > index 0000000..ad1b31f > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/memcpy.c > @@ -0,0 +1,35 @@ > +/* Multiple version of memcpy. > + All versions must be listed in ifunc-impl-list.c. > + Copyright (C) 2017 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#if defined SHARED && IS_IN (libc) > + > +# define memcpy __redirect_memcpy > +# include <string.h> > +# undef memcpy > + > +# define SYMBOL_NAME memcpy > +# include "memifunc.h" > + > +extern __typeof (__redirect_memcpy) __new_memcpy; > + > +libc_ifunc (__new_memcpy, memcpy_ifunc_selector ()); > + > +# include <shlib-compat.h> > +versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14); > +#endif > diff --git a/sysdeps/x86_64/multiarch/memifunc.h b/sysdeps/x86_64/multiarch/memifunc.h > new file mode 100644 > index 0000000..894b4a0 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/memifunc.h > @@ -0,0 +1,95 @@ > +/* Common definition for memcpy, mempcpy, and memmove implementation. > + All versions must be listed in ifunc-impl-list.c. > + Copyright (C) 2016-2017 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +/* memcpy, mempcpy, and memmove share the same logic for ifunc selection. */ > + > +#include <cpu-features.h> > +#include <ldsodefs.h> > + > +#define PASTER1(x,y) x ## _ ## y > +#define EVALUATOR1(x,y) PASTER1(x,y) > +#define PASTER2(x,y) __ ## x ## _ ## y > +#define EVALUATOR2(x,y) PASTER2(x,y) > + > +/* Basically set '__redirect_<symbol>' to use as type definition, > + '__<symbol>_<variant>' as the optimized implementation and > + '<symbol>_ifunc_selector' as the IFUNC selector. */ > +#define REDIRECT_NAME EVALUATOR1(__redirect, SYMBOL_NAME) > +#define OPTIMIZE(name) EVALUATOR2(SYMBOL_NAME, name) > +#define IFUNC_NAME EVALUATOR1(SYMBOL_NAME, ifunc_selector) > + > +extern __typeof (REDIRECT_NAME) OPTIMIZE(erms) attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE(sse2_unaligned) > + attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE(sse2_unaligned_erms) > + attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE(ssse3) attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE(ssse3_back) attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE(avx_unaligned) attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE(avx_unaligned_erms) > + attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE(avx512_unaligned) > + attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE(avx512_unaligned_erms) > + attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE(avx512_no_vzeroupper) > + attribute_hidden; > + > +static inline void * > +IFUNC_NAME (void) > +{ > + const struct cpu_features* cpu_features = __get_cpu_features (); > + > + if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_ERMS)) > + return OPTIMIZE(erms); > + > + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) > + && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) > + { > + if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) > + return OPTIMIZE(avx512_no_vzeroupper); > + > + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) > + return OPTIMIZE(avx512_unaligned_erms); > + > + return OPTIMIZE(avx512_unaligned); > + } > + > + if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) > + { > + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) > + return OPTIMIZE(avx_unaligned_erms); > + > + return OPTIMIZE(avx_unaligned); > + } > + > + if (!CPU_FEATURES_CPU_P (cpu_features, SSSE3) > + || CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Copy)) > + { > + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) > + return OPTIMIZE(sse2_unaligned_erms); > + > + return OPTIMIZE(sse2_unaligned); > + } > + > + if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Copy_Backward)) > + return OPTIMIZE(ssse3_back); > + > + return OPTIMIZE(ssse3); > +} > diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c > new file mode 100644 > index 0000000..76372fc > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/memmove.c > @@ -0,0 +1,33 @@ > +/* Multiple version of memmmove. > + All versions must be listed in ifunc-impl-list.c. > + Copyright (C) 2016-2017 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#if defined SHARED && IS_IN (libc) > + > +# define memmove __redirect_memmove > +# include <string.h> > +# undef memmove > + > +# define SYMBOL_NAME memmove > +# include "memifunc.h" > + > +extern __typeof (__redirect_memmove) __libc_memmove; > + > +libc_ifunc (__libc_memmove, memmove_ifunc_selector ()); > +strong_alias (__libc_memmove, memmove); > +#endif > diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S > deleted file mode 100644 > index b8b2b28..0000000 > --- a/sysdeps/x86_64/multiarch/mempcpy.S > +++ /dev/null > @@ -1,73 +0,0 @@ > -/* Multiple versions of mempcpy > - All versions must be listed in ifunc-impl-list.c. > - Copyright (C) 2010-2017 Free Software Foundation, Inc. > - Contributed by Intel Corporation. > - This file is part of the GNU C Library. > - > - The GNU C Library is free software; you can redistribute it and/or > - modify it under the terms of the GNU Lesser General Public > - License as published by the Free Software Foundation; either > - version 2.1 of the License, or (at your option) any later version. > - > - The GNU C Library is distributed in the hope that it will be useful, > - but WITHOUT ANY WARRANTY; without even the implied warranty of > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - Lesser General Public License for more details. > - > - You should have received a copy of the GNU Lesser General Public > - License along with the GNU C Library; if not, see > - <http://www.gnu.org/licenses/>. */ > - > -#include <sysdep.h> > -#include <init-arch.h> > - > -/* Define multiple versions only for the definition in lib and for > - DSO. In static binaries we need mempcpy before the initialization > - happened. */ > -#if defined SHARED && IS_IN (libc) > - .text > -ENTRY(__mempcpy) > - .type __mempcpy, @gnu_indirect_function > - LOAD_RTLD_GLOBAL_RO_RDX > - lea __mempcpy_erms(%rip), %RAX_LP > - HAS_ARCH_FEATURE (Prefer_ERMS) > - jnz 2f > - HAS_ARCH_FEATURE (Prefer_No_AVX512) > - jnz 1f > - HAS_ARCH_FEATURE (AVX512F_Usable) > - jz 1f > - lea __mempcpy_avx512_no_vzeroupper(%rip), %RAX_LP > - HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) > - jnz 2f > - lea __mempcpy_avx512_unaligned_erms(%rip), %RAX_LP > - HAS_CPU_FEATURE (ERMS) > - jnz 2f > - lea __mempcpy_avx512_unaligned(%rip), %RAX_LP > - ret > -1: lea __mempcpy_avx_unaligned(%rip), %RAX_LP > - HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) > - jz L(Fast_Unaligned_Load) > - HAS_CPU_FEATURE (ERMS) > - jz 2f > - lea __mempcpy_avx_unaligned_erms(%rip), %RAX_LP > - ret > -L(Fast_Unaligned_Load): > - lea __mempcpy_sse2_unaligned(%rip), %RAX_LP > - HAS_ARCH_FEATURE (Fast_Unaligned_Copy) > - jz L(SSSE3) > - HAS_CPU_FEATURE (ERMS) > - jz 2f > - lea __mempcpy_sse2_unaligned_erms(%rip), %RAX_LP > - ret > -L(SSSE3): > - HAS_CPU_FEATURE (SSSE3) > - jz 2f > - lea __mempcpy_ssse3_back(%rip), %RAX_LP > - HAS_ARCH_FEATURE (Fast_Copy_Backward) > - jnz 2f > - lea __mempcpy_ssse3(%rip), %RAX_LP > -2: ret > -END(__mempcpy) > - > -weak_alias (__mempcpy, mempcpy) > -#endif > diff --git a/sysdeps/x86_64/multiarch/mempcpy.c b/sysdeps/x86_64/multiarch/mempcpy.c > new file mode 100644 > index 0000000..e59bde2 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/mempcpy.c > @@ -0,0 +1,34 @@ > +/* Multiple version of mempcpy. > + All versions must be listed in ifunc-impl-list.c. > + Copyright (C) 2017 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#if defined SHARED && IS_IN (libc) > + > +# define mempcpy __redirect_mempcpy > +# define __mempcpy __redirect___mempcpy > +# include <string.h> > +# undef mempcpy > +# undef __mempcpy > + > +# define SYMBOL_NAME mempcpy > +# include "memifunc.h" > + > +libc_ifunc_redirected (__redirect_mempcpy, __mempcpy, > + mempcpy_ifunc_selector ()); > +weak_alias (__mempcpy, mempcpy) > +#endif > -- > 2.7.4
On Mon, May 29, 2017 at 5:11 PM, Erich Elsen <eriche@google.com> wrote: > I was working on some of other functions and noticed the following > error when I ran `make check` with the original patch (I didn't try > Adhemerval's): > > test-canon2.c:(.text+0x33): undefined reference to `mempcpy' > You must have missed something. Please take a look at hjl/ifunc/c branch which converted memcpy/mempcpy/memmove to C. H.J.
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 3736f54..b6179aa 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -7,6 +7,7 @@ ifeq ($(subdir),string) sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \ strcmp-sse2-unaligned strncmp-ssse3 \ memcmp-sse4 memcpy-ssse3 \ + mem-impls \ memmove-ssse3 \ memcpy-ssse3-back \ memmove-ssse3-back \ diff --git a/sysdeps/x86_64/multiarch/memmove.S b/sysdeps/x86_64/multiarch/mem-impls.S similarity index 52% rename from sysdeps/x86_64/multiarch/memmove.S rename to sysdeps/x86_64/multiarch/mem-impls.S index 8c534e8..5e74fa0 100644 --- a/sysdeps/x86_64/multiarch/memmove.S +++ b/sysdeps/x86_64/multiarch/mem-impls.S @@ -1,6 +1,5 @@ -/* Multiple versions of memmove - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2016-2017 Free Software Foundation, Inc. +/* Multiple versions of memmove, memcpy, and mempcpy. + Copyright (C) 2017 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,57 +16,6 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib and for - DSO. */ -#if IS_IN (libc) - .text -ENTRY(__libc_memmove) - .type __libc_memmove, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - lea __memmove_erms(%rip), %RAX_LP - HAS_ARCH_FEATURE (Prefer_ERMS) - jnz 2f - HAS_ARCH_FEATURE (Prefer_No_AVX512) - jnz 1f - HAS_ARCH_FEATURE (AVX512F_Usable) - jz 1f - lea __memmove_avx512_no_vzeroupper(%rip), %RAX_LP - HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) - jnz 2f - lea __memmove_avx512_unaligned_erms(%rip), %RAX_LP - HAS_CPU_FEATURE (ERMS) - jnz 2f - lea __memmove_avx512_unaligned(%rip), %RAX_LP - ret -1: lea __memmove_avx_unaligned(%rip), %RAX_LP - HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) - jz L(Fast_Unaligned_Load) - HAS_CPU_FEATURE (ERMS) - jz 2f - lea __memmove_avx_unaligned_erms(%rip), %RAX_LP - ret -L(Fast_Unaligned_Load): - lea __memmove_sse2_unaligned(%rip), %RAX_LP - HAS_ARCH_FEATURE (Fast_Unaligned_Copy) - jz L(SSSE3) - HAS_CPU_FEATURE (ERMS) - jz 2f - lea __memmove_sse2_unaligned_erms(%rip), %RAX_LP - ret -L(SSSE3): - HAS_CPU_FEATURE (SSSE3) - jz 2f - lea __memmove_ssse3_back(%rip), %RAX_LP - HAS_ARCH_FEATURE (Fast_Copy_Backward) - jnz 2f - lea __memmove_ssse3(%rip), %RAX_LP -2: ret -END(__libc_memmove) -#endif - #if IS_IN (libc) # define MEMMOVE_SYMBOL(p,s) p##_sse2_##s @@ -82,15 +30,16 @@ libc_hidden_ver (__mempcpy_sse2_unaligned, __mempcpy) The speedup we get from using SSE2 instructions is likely eaten away by the indirect call in the PLT. */ # define libc_hidden_builtin_def +# else +strong_alias (__memmove_sse2_unaligned, memmove) # endif -strong_alias (__libc_memmove, memmove) #endif #if !defined SHARED || !IS_IN (libc) weak_alias (__mempcpy, mempcpy) #endif -#include "../memmove.S" +#include <sysdeps/x86_64/memmove.S> #if defined SHARED && IS_IN (libc) # include <shlib-compat.h> diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S deleted file mode 100644 index af27703..0000000 --- a/sysdeps/x86_64/multiarch/memcpy.S +++ /dev/null @@ -1,75 +0,0 @@ -/* Multiple versions of memcpy - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib and for - DSO. In static binaries we need memcpy before the initialization - happened. */ -#if defined SHARED && IS_IN (libc) - .text -ENTRY(__new_memcpy) - .type __new_memcpy, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - lea __memcpy_erms(%rip), %RAX_LP - HAS_ARCH_FEATURE (Prefer_ERMS) - jnz 2f - HAS_ARCH_FEATURE (Prefer_No_AVX512) - jnz 1f - HAS_ARCH_FEATURE (AVX512F_Usable) - jz 1f - lea __memcpy_avx512_no_vzeroupper(%rip), %RAX_LP - HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) - jnz 2f - lea __memcpy_avx512_unaligned_erms(%rip), %RAX_LP - HAS_CPU_FEATURE (ERMS) - jnz 2f - lea __memcpy_avx512_unaligned(%rip), %RAX_LP - ret -1: lea __memcpy_avx_unaligned(%rip), %RAX_LP - HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) - jz L(Fast_Unaligned_Load) - HAS_CPU_FEATURE (ERMS) - jz 2f - lea __memcpy_avx_unaligned_erms(%rip), %RAX_LP - ret -L(Fast_Unaligned_Load): - lea __memcpy_sse2_unaligned(%rip), %RAX_LP - HAS_ARCH_FEATURE (Fast_Unaligned_Copy) - jz L(SSSE3) - HAS_CPU_FEATURE (ERMS) - jz 2f - lea __memcpy_sse2_unaligned_erms(%rip), %RAX_LP - ret -L(SSSE3): - HAS_CPU_FEATURE (SSSE3) - jz 2f - lea __memcpy_ssse3_back(%rip), %RAX_LP - HAS_ARCH_FEATURE (Fast_Copy_Backward) - jnz 2f - lea __memcpy_ssse3(%rip), %RAX_LP -2: ret -END(__new_memcpy) - -# undef memcpy -# include <shlib-compat.h> -versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14); -#endif diff --git a/sysdeps/x86_64/multiarch/memcpy.c b/sysdeps/x86_64/multiarch/memcpy.c new file mode 100644 index 0000000..ad1b31f --- /dev/null +++ b/sysdeps/x86_64/multiarch/memcpy.c @@ -0,0 +1,35 @@ +/* Multiple version of memcpy. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && IS_IN (libc) + +# define memcpy __redirect_memcpy +# include <string.h> +# undef memcpy + +# define SYMBOL_NAME memcpy +# include "memifunc.h" + +extern __typeof (__redirect_memcpy) __new_memcpy; + +libc_ifunc (__new_memcpy, memcpy_ifunc_selector ()); + +# include <shlib-compat.h> +versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14); +#endif diff --git a/sysdeps/x86_64/multiarch/memifunc.h b/sysdeps/x86_64/multiarch/memifunc.h new file mode 100644 index 0000000..894b4a0 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memifunc.h @@ -0,0 +1,95 @@ +/* Common definition for memcpy, mempcpy, and memmove implementation. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* memcpy, mempcpy, and memmove share the same logic for ifunc selection. */ + +#include <cpu-features.h> +#include <ldsodefs.h> + +#define PASTER1(x,y) x ## _ ## y +#define EVALUATOR1(x,y) PASTER1(x,y) +#define PASTER2(x,y) __ ## x ## _ ## y +#define EVALUATOR2(x,y) PASTER2(x,y) + +/* Basically set '__redirect_<symbol>' to use as type definition, + '__<symbol>_<variant>' as the optimized implementation and + '<symbol>_ifunc_selector' as the IFUNC selector. */ +#define REDIRECT_NAME EVALUATOR1(__redirect, SYMBOL_NAME) +#define OPTIMIZE(name) EVALUATOR2(SYMBOL_NAME, name) +#define IFUNC_NAME EVALUATOR1(SYMBOL_NAME, ifunc_selector) + +extern __typeof (REDIRECT_NAME) OPTIMIZE(erms) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE(sse2_unaligned) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE(sse2_unaligned_erms) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE(ssse3) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE(ssse3_back) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE(avx_unaligned) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE(avx_unaligned_erms) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE(avx512_unaligned) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE(avx512_unaligned_erms) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE(avx512_no_vzeroupper) + attribute_hidden; + +static inline void * +IFUNC_NAME (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_ERMS)) + return OPTIMIZE(erms); + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) + && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) + { + if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE(avx512_no_vzeroupper); + + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE(avx512_unaligned_erms); + + return OPTIMIZE(avx512_unaligned); + } + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE(avx_unaligned_erms); + + return OPTIMIZE(avx_unaligned); + } + + if (!CPU_FEATURES_CPU_P (cpu_features, SSSE3) + || CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Copy)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE(sse2_unaligned_erms); + + return OPTIMIZE(sse2_unaligned); + } + + if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Copy_Backward)) + return OPTIMIZE(ssse3_back); + + return OPTIMIZE(ssse3); +} diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c new file mode 100644 index 0000000..76372fc --- /dev/null +++ b/sysdeps/x86_64/multiarch/memmove.c @@ -0,0 +1,33 @@ +/* Multiple version of memmmove. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && IS_IN (libc) + +# define memmove __redirect_memmove +# include <string.h> +# undef memmove + +# define SYMBOL_NAME memmove +# include "memifunc.h" + +extern __typeof (__redirect_memmove) __libc_memmove; + +libc_ifunc (__libc_memmove, memmove_ifunc_selector ()); +strong_alias (__libc_memmove, memmove); +#endif diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S deleted file mode 100644 index b8b2b28..0000000 --- a/sysdeps/x86_64/multiarch/mempcpy.S +++ /dev/null @@ -1,73 +0,0 @@ -/* Multiple versions of mempcpy - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib and for - DSO. In static binaries we need mempcpy before the initialization - happened. */ -#if defined SHARED && IS_IN (libc) - .text -ENTRY(__mempcpy) - .type __mempcpy, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - lea __mempcpy_erms(%rip), %RAX_LP - HAS_ARCH_FEATURE (Prefer_ERMS) - jnz 2f - HAS_ARCH_FEATURE (Prefer_No_AVX512) - jnz 1f - HAS_ARCH_FEATURE (AVX512F_Usable) - jz 1f - lea __mempcpy_avx512_no_vzeroupper(%rip), %RAX_LP - HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) - jnz 2f - lea __mempcpy_avx512_unaligned_erms(%rip), %RAX_LP - HAS_CPU_FEATURE (ERMS) - jnz 2f - lea __mempcpy_avx512_unaligned(%rip), %RAX_LP - ret -1: lea __mempcpy_avx_unaligned(%rip), %RAX_LP - HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) - jz L(Fast_Unaligned_Load) - HAS_CPU_FEATURE (ERMS) - jz 2f - lea __mempcpy_avx_unaligned_erms(%rip), %RAX_LP - ret -L(Fast_Unaligned_Load): - lea __mempcpy_sse2_unaligned(%rip), %RAX_LP - HAS_ARCH_FEATURE (Fast_Unaligned_Copy) - jz L(SSSE3) - HAS_CPU_FEATURE (ERMS) - jz 2f - lea __mempcpy_sse2_unaligned_erms(%rip), %RAX_LP - ret -L(SSSE3): - HAS_CPU_FEATURE (SSSE3) - jz 2f - lea __mempcpy_ssse3_back(%rip), %RAX_LP - HAS_ARCH_FEATURE (Fast_Copy_Backward) - jnz 2f - lea __mempcpy_ssse3(%rip), %RAX_LP -2: ret -END(__mempcpy) - -weak_alias (__mempcpy, mempcpy) -#endif diff --git a/sysdeps/x86_64/multiarch/mempcpy.c b/sysdeps/x86_64/multiarch/mempcpy.c new file mode 100644 index 0000000..e59bde2 --- /dev/null +++ b/sysdeps/x86_64/multiarch/mempcpy.c @@ -0,0 +1,34 @@ +/* Multiple version of mempcpy. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && IS_IN (libc) + +# define mempcpy __redirect_mempcpy +# define __mempcpy __redirect___mempcpy +# include <string.h> +# undef mempcpy +# undef __mempcpy + +# define SYMBOL_NAME mempcpy +# include "memifunc.h" + +libc_ifunc_redirected (__redirect_mempcpy, __mempcpy, + mempcpy_ifunc_selector ()); +weak_alias (__mempcpy, mempcpy) +#endif