Message ID | 1507200204-23449-1-git-send-email-raji@linux.vnet.ibm.com |
---|---|
State | New |
Headers | show |
Series | powerpc: Fix IFUNC for memrchr | expand |
On 05/10/2017 07:43, Rajalakshmi Srinivasaraghavan wrote: > Recent commit 59ba2d2b5421 missed to add __memrchr_power8 in > ifunc list. Also handled discarding unwanted bytes for > unaligned inputs in power8 optimization. > > 2017-10-05 Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> > > * sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c: Revert > back to powerpc32 file. > * sysdeps/powerpc/powerpc64/multiarch/memrchr.c > (memrchr): Add __memrchr_power8 to ifunc list. > * sysdeps/powerpc/powerpc64/power8/memrchr.S: Mask > extra bytes for unaligned inputs. > --- > .../powerpc/powerpc64/multiarch/memrchr-ppc64.c | 15 +------------- > sysdeps/powerpc/powerpc64/multiarch/memrchr.c | 3 +++ > sysdeps/powerpc/powerpc64/power8/memrchr.S | 24 ++++++++++++++++++++++ > 3 files changed, 28 insertions(+), 14 deletions(-) > > diff --git a/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c > index be24689336..e2d53ac0f4 100644 > --- a/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c > +++ b/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c > @@ -15,17 +15,4 @@ > You should have received a copy of the GNU Lesser General Public > License along with the GNU C Library; if not, see > <http://www.gnu.org/licenses/>. */ > - > -#include <string.h> > - > -#define MEMRCHR __memrchr_ppc > - > -#undef weak_alias > -#define weak_alias(a, b) > - > -#undef libc_hidden_builtin_def > -#define libc_hidden_builtin_def(name) > - > -extern __typeof (memrchr) __memrchr_ppc attribute_hidden; > - > -#include <string/memrchr.c> > +#include <sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c> > diff --git a/sysdeps/powerpc/powerpc64/multiarch/memrchr.c b/sysdeps/powerpc/powerpc64/multiarch/memrchr.c > index fb09fdf89c..441598ace5 100644 > --- a/sysdeps/powerpc/powerpc64/multiarch/memrchr.c > +++ b/sysdeps/powerpc/powerpc64/multiarch/memrchr.c > @@ -23,10 +23,13 @@ > > extern __typeof (__memrchr) __memrchr_ppc attribute_hidden; > extern __typeof (__memrchr) __memrchr_power7 attribute_hidden; > +extern __typeof (__memrchr) __memrchr_power8 attribute_hidden; > > /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle > ifunc symbol properly. */ > libc_ifunc (__memrchr, > + (hwcap2 & PPC_FEATURE2_ARCH_2_07) > + ? __memrchr_power8 : > (hwcap & PPC_FEATURE_HAS_VSX) > ? __memrchr_power7 > : __memrchr_ppc); I think indentation is a little off here, compared to other ifunc implementations. Something like. (hwcap2 & PPC_FEATURE2_ARCH_2_07) ? __memrchr_power8 : (hwcap & PPC_FEATURE_HAS_VSX) ? __memrchr_power7 : __memrchr_ppc); > diff --git a/sysdeps/powerpc/powerpc64/power8/memrchr.S b/sysdeps/powerpc/powerpc64/power8/memrchr.S > index 521b3c84a2..22b01ec69c 100644 > --- a/sysdeps/powerpc/powerpc64/power8/memrchr.S > +++ b/sysdeps/powerpc/powerpc64/power8/memrchr.S > @@ -233,11 +233,35 @@ L(found): > #endif > addi r8, r8, 63 > sub r3, r8, r6 /* Compute final address. */ > + cmpld cr7, r3, r10 > + bgelr cr7 > + li r3, 0 > blr > > /* Found a match in last 16 bytes. */ > .align 4 > L(found_16B): > + cmpld r8, r10 /* Are we on the last QW? */ > + bge L(last) > + /* Now discard bytes before starting address. */ > + sub r9, r10, r8 > + MTVRD(v9, r9) > + vspltisb v8, 3 > + /* Mask unwanted bytes. */ > +#ifdef __LITTLE_ENDIAN__ > + lvsr v7, 0, r10 > + vperm v6, v0, v6, v7 > + vsldoi v9, v0, v9, 8 > + vsl v9, v9, v8 > + vslo v6, v6, v9 > +#else > + lvsl v7, 0, r10 > + vperm v6, v6, v0, v7 > + vsldoi v9, v0, v9, 8 > + vsl v9, v9, v8 > + vsro v6, v6, v9 > +#endif > +L(last): > /* Permute the first bit of each byte into bits 48-63. */ > VBPERMQ(v6, v6, v10) > /* Shift each component into its correct position for merging. */ Isn't already covered by a testcase? If not, could you add one to test-memrchr?
On 10/05/2017 07:18 PM, Adhemerval Zanella wrote: > > > On 05/10/2017 07:43, Rajalakshmi Srinivasaraghavan wrote: >> Recent commit 59ba2d2b5421 missed to add __memrchr_power8 in >> ifunc list. Also handled discarding unwanted bytes for >> unaligned inputs in power8 optimization. >> >> 2017-10-05 Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> >> >> * sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c: Revert >> back to powerpc32 file. >> * sysdeps/powerpc/powerpc64/multiarch/memrchr.c >> (memrchr): Add __memrchr_power8 to ifunc list. >> * sysdeps/powerpc/powerpc64/power8/memrchr.S: Mask >> extra bytes for unaligned inputs. >> --- >> .../powerpc/powerpc64/multiarch/memrchr-ppc64.c | 15 +------------- >> sysdeps/powerpc/powerpc64/multiarch/memrchr.c | 3 +++ >> sysdeps/powerpc/powerpc64/power8/memrchr.S | 24 ++++++++++++++++++++++ >> 3 files changed, 28 insertions(+), 14 deletions(-) >> >> diff --git a/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c >> index be24689336..e2d53ac0f4 100644 >> --- a/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c >> +++ b/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c >> @@ -15,17 +15,4 @@ >> You should have received a copy of the GNU Lesser General Public >> License along with the GNU C Library; if not, see >> <http://www.gnu.org/licenses/>. */ >> - >> -#include <string.h> >> - >> -#define MEMRCHR __memrchr_ppc >> - >> -#undef weak_alias >> -#define weak_alias(a, b) >> - >> -#undef libc_hidden_builtin_def >> -#define libc_hidden_builtin_def(name) >> - >> -extern __typeof (memrchr) __memrchr_ppc attribute_hidden; >> - >> -#include <string/memrchr.c> >> +#include <sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c> >> diff --git a/sysdeps/powerpc/powerpc64/multiarch/memrchr.c b/sysdeps/powerpc/powerpc64/multiarch/memrchr.c >> index fb09fdf89c..441598ace5 100644 >> --- a/sysdeps/powerpc/powerpc64/multiarch/memrchr.c >> +++ b/sysdeps/powerpc/powerpc64/multiarch/memrchr.c >> @@ -23,10 +23,13 @@ >> >> extern __typeof (__memrchr) __memrchr_ppc attribute_hidden; >> extern __typeof (__memrchr) __memrchr_power7 attribute_hidden; >> +extern __typeof (__memrchr) __memrchr_power8 attribute_hidden; >> >> /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle >> ifunc symbol properly. */ >> libc_ifunc (__memrchr, >> + (hwcap2 & PPC_FEATURE2_ARCH_2_07) >> + ? __memrchr_power8 : >> (hwcap & PPC_FEATURE_HAS_VSX) >> ? __memrchr_power7 >> : __memrchr_ppc); > > I think indentation is a little off here, compared to other ifunc implementations. > Something like. > > (hwcap2 & PPC_FEATURE2_ARCH_2_07) > ? __memrchr_power8 : > (hwcap & PPC_FEATURE_HAS_VSX) > ? __memrchr_power7 > : __memrchr_ppc); Ack > >> diff --git a/sysdeps/powerpc/powerpc64/power8/memrchr.S b/sysdeps/powerpc/powerpc64/power8/memrchr.S >> index 521b3c84a2..22b01ec69c 100644 >> --- a/sysdeps/powerpc/powerpc64/power8/memrchr.S >> +++ b/sysdeps/powerpc/powerpc64/power8/memrchr.S >> @@ -233,11 +233,35 @@ L(found): >> #endif >> addi r8, r8, 63 >> sub r3, r8, r6 /* Compute final address. */ >> + cmpld cr7, r3, r10 >> + bgelr cr7 >> + li r3, 0 >> blr >> >> /* Found a match in last 16 bytes. */ >> .align 4 >> L(found_16B): >> + cmpld r8, r10 /* Are we on the last QW? */ >> + bge L(last) >> + /* Now discard bytes before starting address. */ >> + sub r9, r10, r8 >> + MTVRD(v9, r9) >> + vspltisb v8, 3 >> + /* Mask unwanted bytes. */ >> +#ifdef __LITTLE_ENDIAN__ >> + lvsr v7, 0, r10 >> + vperm v6, v0, v6, v7 >> + vsldoi v9, v0, v9, 8 >> + vsl v9, v9, v8 >> + vslo v6, v6, v9 >> +#else >> + lvsl v7, 0, r10 >> + vperm v6, v6, v0, v7 >> + vsldoi v9, v0, v9, 8 >> + vsl v9, v9, v8 >> + vsro v6, v6, v9 >> +#endif >> +L(last): >> /* Permute the first bit of each byte into bits 48-63. */ >> VBPERMQ(v6, v6, v10) >> /* Shift each component into its correct position for merging. */ > > Isn't already covered by a testcase? If not, could you add one to test-memrchr? Already covered in string/tester.c. > >
> Il giorno 05 ott 2017, alle ore 10:50, Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> ha scritto: > > > >> On 10/05/2017 07:18 PM, Adhemerval Zanella wrote: >>> On 05/10/2017 07:43, Rajalakshmi Srinivasaraghavan wrote: >>> Recent commit 59ba2d2b5421 missed to add __memrchr_power8 in >>> ifunc list. Also handled discarding unwanted bytes for >>> unaligned inputs in power8 optimization. >>> >>> 2017-10-05 Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> >>> >>> * sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c: Revert >>> back to powerpc32 file. >>> * sysdeps/powerpc/powerpc64/multiarch/memrchr.c >>> (memrchr): Add __memrchr_power8 to ifunc list. >>> * sysdeps/powerpc/powerpc64/power8/memrchr.S: Mask >>> extra bytes for unaligned inputs. >>> --- >>> .../powerpc/powerpc64/multiarch/memrchr-ppc64.c | 15 +------------- >>> sysdeps/powerpc/powerpc64/multiarch/memrchr.c | 3 +++ >>> sysdeps/powerpc/powerpc64/power8/memrchr.S | 24 ++++++++++++++++++++++ >>> 3 files changed, 28 insertions(+), 14 deletions(-) >>> >>> diff --git a/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c >>> index be24689336..e2d53ac0f4 100644 >>> --- a/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c >>> +++ b/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c >>> @@ -15,17 +15,4 @@ >>> You should have received a copy of the GNU Lesser General Public >>> License along with the GNU C Library; if not, see >>> <http://www.gnu.org/licenses/>. */ >>> - >>> -#include <string.h> >>> - >>> -#define MEMRCHR __memrchr_ppc >>> - >>> -#undef weak_alias >>> -#define weak_alias(a, b) >>> - >>> -#undef libc_hidden_builtin_def >>> -#define libc_hidden_builtin_def(name) >>> - >>> -extern __typeof (memrchr) __memrchr_ppc attribute_hidden; >>> - >>> -#include <string/memrchr.c> >>> +#include <sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c> >>> diff --git a/sysdeps/powerpc/powerpc64/multiarch/memrchr.c b/sysdeps/powerpc/powerpc64/multiarch/memrchr.c >>> index fb09fdf89c..441598ace5 100644 >>> --- a/sysdeps/powerpc/powerpc64/multiarch/memrchr.c >>> +++ b/sysdeps/powerpc/powerpc64/multiarch/memrchr.c >>> @@ -23,10 +23,13 @@ >>> extern __typeof (__memrchr) __memrchr_ppc attribute_hidden; >>> extern __typeof (__memrchr) __memrchr_power7 attribute_hidden; >>> +extern __typeof (__memrchr) __memrchr_power8 attribute_hidden; >>> /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle >>> ifunc symbol properly. */ >>> libc_ifunc (__memrchr, >>> + (hwcap2 & PPC_FEATURE2_ARCH_2_07) >>> + ? __memrchr_power8 : >>> (hwcap & PPC_FEATURE_HAS_VSX) >>> ? __memrchr_power7 >>> : __memrchr_ppc); >> I think indentation is a little off here, compared to other ifunc implementations. >> Something like. >> (hwcap2 & PPC_FEATURE2_ARCH_2_07) >> ? __memrchr_power8 : >> (hwcap & PPC_FEATURE_HAS_VSX) >> ? __memrchr_power7 >> : __memrchr_ppc); > > Ack > >>> diff --git a/sysdeps/powerpc/powerpc64/power8/memrchr.S b/sysdeps/powerpc/powerpc64/power8/memrchr.S >>> index 521b3c84a2..22b01ec69c 100644 >>> --- a/sysdeps/powerpc/powerpc64/power8/memrchr.S >>> +++ b/sysdeps/powerpc/powerpc64/power8/memrchr.S >>> @@ -233,11 +233,35 @@ L(found): >>> #endif >>> addi r8, r8, 63 >>> sub r3, r8, r6 /* Compute final address. */ >>> + cmpld cr7, r3, r10 >>> + bgelr cr7 >>> + li r3, 0 >>> blr >>> /* Found a match in last 16 bytes. */ >>> .align 4 >>> L(found_16B): >>> + cmpld r8, r10 /* Are we on the last QW? */ >>> + bge L(last) >>> + /* Now discard bytes before starting address. */ >>> + sub r9, r10, r8 >>> + MTVRD(v9, r9) >>> + vspltisb v8, 3 >>> + /* Mask unwanted bytes. */ >>> +#ifdef __LITTLE_ENDIAN__ >>> + lvsr v7, 0, r10 >>> + vperm v6, v0, v6, v7 >>> + vsldoi v9, v0, v9, 8 >>> + vsl v9, v9, v8 >>> + vslo v6, v6, v9 >>> +#else >>> + lvsl v7, 0, r10 >>> + vperm v6, v6, v0, v7 >>> + vsldoi v9, v0, v9, 8 >>> + vsl v9, v9, v8 >>> + vsro v6, v6, v9 >>> +#endif >>> +L(last): >>> /* Permute the first bit of each byte into bits 48-63. */ >>> VBPERMQ(v6, v6, v10) >>> /* Shift each component into its correct position for merging. */ >> Isn't already covered by a testcase? If not, could you add one to test-memrchr? > > Already covered in string/tester.c. LGTM then. > -- > Thanks > Rajalakshmi S >
The commit of this patch appears to be missing an update to the ChangeLog file.
On 10/06/2017 08:31 PM, Joseph Myers wrote: > The commit of this patch appears to be missing an update to the ChangeLog > file. > Thanks for the note. Done.
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c index be24689336..e2d53ac0f4 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c +++ b/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c @@ -15,17 +15,4 @@ You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ - -#include <string.h> - -#define MEMRCHR __memrchr_ppc - -#undef weak_alias -#define weak_alias(a, b) - -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(name) - -extern __typeof (memrchr) __memrchr_ppc attribute_hidden; - -#include <string/memrchr.c> +#include <sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c> diff --git a/sysdeps/powerpc/powerpc64/multiarch/memrchr.c b/sysdeps/powerpc/powerpc64/multiarch/memrchr.c index fb09fdf89c..441598ace5 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/memrchr.c +++ b/sysdeps/powerpc/powerpc64/multiarch/memrchr.c @@ -23,10 +23,13 @@ extern __typeof (__memrchr) __memrchr_ppc attribute_hidden; extern __typeof (__memrchr) __memrchr_power7 attribute_hidden; +extern __typeof (__memrchr) __memrchr_power8 attribute_hidden; /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle ifunc symbol properly. */ libc_ifunc (__memrchr, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __memrchr_power8 : (hwcap & PPC_FEATURE_HAS_VSX) ? __memrchr_power7 : __memrchr_ppc); diff --git a/sysdeps/powerpc/powerpc64/power8/memrchr.S b/sysdeps/powerpc/powerpc64/power8/memrchr.S index 521b3c84a2..22b01ec69c 100644 --- a/sysdeps/powerpc/powerpc64/power8/memrchr.S +++ b/sysdeps/powerpc/powerpc64/power8/memrchr.S @@ -233,11 +233,35 @@ L(found): #endif addi r8, r8, 63 sub r3, r8, r6 /* Compute final address. */ + cmpld cr7, r3, r10 + bgelr cr7 + li r3, 0 blr /* Found a match in last 16 bytes. */ .align 4 L(found_16B): + cmpld r8, r10 /* Are we on the last QW? */ + bge L(last) + /* Now discard bytes before starting address. */ + sub r9, r10, r8 + MTVRD(v9, r9) + vspltisb v8, 3 + /* Mask unwanted bytes. */ +#ifdef __LITTLE_ENDIAN__ + lvsr v7, 0, r10 + vperm v6, v0, v6, v7 + vsldoi v9, v0, v9, 8 + vsl v9, v9, v8 + vslo v6, v6, v9 +#else + lvsl v7, 0, r10 + vperm v6, v6, v0, v7 + vsldoi v9, v0, v9, 8 + vsl v9, v9, v8 + vsro v6, v6, v9 +#endif +L(last): /* Permute the first bit of each byte into bits 48-63. */ VBPERMQ(v6, v6, v10) /* Shift each component into its correct position for merging. */