Message ID | 20221129160257.1947346-1-rajis@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | powerpc64: Remove old strncmp optimization | expand |
On 11/29/22 10:02 AM, Rajalakshmi Srinivasaraghavan via Libc-alpha wrote: > This patch removes the power4 strncmp optimization for powerpc64 and uses > __strncmp_ppc implementation instead. Currently, both power4 and ppc > IFUNC variants result in similar performance. What cpu was used to test performance? I'd be curious if this holds for something like the venerable ppc970. > > Tested on ppc64le with and without --disable-multi-arch flag. > --- > sysdeps/powerpc/powerpc64/multiarch/Makefile | 3 +- > .../powerpc64/multiarch/ifunc-impl-list.c | 2 - > .../powerpc64/multiarch/strncmp-power4.S | 23 -- > sysdeps/powerpc/powerpc64/multiarch/strncmp.c | 3 - > sysdeps/powerpc/powerpc64/power4/strncmp.S | 225 ------------------ > 5 files changed, 1 insertion(+), 255 deletions(-) > delete mode 100644 sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S > delete mode 100644 sysdeps/powerpc/powerpc64/power4/strncmp.S > > diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile > index 6f2436b660..ed25e234ba 100644 > --- a/sysdeps/powerpc/powerpc64/multiarch/Makefile > +++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile > @@ -12,8 +12,7 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \ > strnlen-power8 strnlen-power7 strnlen-ppc64 \ > strcasecmp-power7 strcasecmp_l-power7 \ > strncase-power7 strncase_l-power7 \ > - strncmp-power8 strncmp-power7 \ > - strncmp-power4 strncmp-ppc64 \ > + strncmp-power8 strncmp-power7 strncmp-ppc64 \ > strchr-power8 strchr-power7 strchr-ppc64 \ > strchrnul-power8 strchrnul-power7 strchrnul-ppc64 \ > strcpy-power8 strcpy-power7 strcpy-ppc64 stpcpy-power8 \ > diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c > index 5a3c7a5886..238f784146 100644 > --- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c > +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c > @@ -171,8 +171,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > __strncmp_power8) > IFUNC_IMPL_ADD (array, i, strncmp, hwcap & PPC_FEATURE_ARCH_2_06, > __strncmp_power7) > - IFUNC_IMPL_ADD (array, i, strncmp, hwcap & PPC_FEATURE_POWER4, > - __strncmp_power4) > IFUNC_IMPL_ADD (array, i, strncmp, 1, > __strncmp_ppc)) > > diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S > deleted file mode 100644 > index df6ad6ae9c..0000000000 > --- a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S > +++ /dev/null > @@ -1,23 +0,0 @@ > -/* Copyright (C) 2013-2022 Free Software Foundation, Inc. > - This file is part of the GNU C Library. > - > - The GNU C Library is free software; you can redistribute it and/or > - modify it under the terms of the GNU Lesser General Public > - License as published by the Free Software Foundation; either > - version 2.1 of the License, or (at your option) any later version. > - > - The GNU C Library is distributed in the hope that it will be useful, > - but WITHOUT ANY WARRANTY; without even the implied warranty of > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - Lesser General Public License for more details. > - > - You should have received a copy of the GNU Lesser General Public > - License along with the GNU C Library; if not, see > - <https://www.gnu.org/licenses/>. */ > - > -#define STRNCMP __strncmp_power4 > - > -#undef libc_hidden_builtin_def > -#define libc_hidden_builtin_def(name) > - > -#include <sysdeps/powerpc/powerpc64/power4/strncmp.S> > diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c > index da3de73c27..7400ed3b9a 100644 > --- a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c > +++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c > @@ -26,7 +26,6 @@ > # include "init-arch.h" > > extern __typeof (strncmp) __strncmp_ppc attribute_hidden; > -extern __typeof (strncmp) __strncmp_power4 attribute_hidden; > extern __typeof (strncmp) __strncmp_power7 attribute_hidden; > extern __typeof (strncmp) __strncmp_power8 attribute_hidden; > # ifdef __LITTLE_ENDIAN__ > @@ -46,7 +45,5 @@ libc_ifunc_redirected (__redirect_strncmp, strncmp, > ? __strncmp_power8 > : (hwcap & PPC_FEATURE_ARCH_2_06) > ? __strncmp_power7 > - : (hwcap & PPC_FEATURE_POWER4) > - ? __strncmp_power4 > : __strncmp_ppc); Minor nit, does the spacing of __strcncmp_ppc need updating here too?
On 29/11/22 19:37, Paul E Murphy via Libc-alpha wrote: > > > On 11/29/22 10:02 AM, Rajalakshmi Srinivasaraghavan via Libc-alpha wrote: >> This patch removes the power4 strncmp optimization for powerpc64 and uses >> __strncmp_ppc implementation instead. Currently, both power4 and ppc >> IFUNC variants result in similar performance. > What cpu was used to test performance? I'd be curious if this holds for something like the venerable ppc970. Do we really care, specially because I am not sure how it was evaluated on power4 chip? Otherwise, LGTM.
On 11/29/22 4:37 PM, Paul E Murphy wrote: > > > On 11/29/22 10:02 AM, Rajalakshmi Srinivasaraghavan via Libc-alpha wrote: >> This patch removes the power4 strncmp optimization for powerpc64 and >> uses >> __strncmp_ppc implementation instead. Currently, both power4 and ppc >> IFUNC variants result in similar performance. > What cpu was used to test performance? I'd be curious if this holds > for something like the venerable ppc970. Compared __strncmp_power4 and __strncmp_ppc benchtest numbers on POWER8 and POWER10 systems. > >> >> Tested on ppc64le with and without --disable-multi-arch flag. >> --- >> sysdeps/powerpc/powerpc64/multiarch/Makefile | 3 +- >> .../powerpc64/multiarch/ifunc-impl-list.c | 2 - >> .../powerpc64/multiarch/strncmp-power4.S | 23 -- >> sysdeps/powerpc/powerpc64/multiarch/strncmp.c | 3 - >> sysdeps/powerpc/powerpc64/power4/strncmp.S | 225 ------------------ >> 5 files changed, 1 insertion(+), 255 deletions(-) >> delete mode 100644 >> sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S >> delete mode 100644 sysdeps/powerpc/powerpc64/power4/strncmp.S >> >> diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile >> b/sysdeps/powerpc/powerpc64/multiarch/Makefile >> index 6f2436b660..ed25e234ba 100644 >> --- a/sysdeps/powerpc/powerpc64/multiarch/Makefile >> +++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile >> @@ -12,8 +12,7 @@ sysdep_routines += memcpy-power8-cached >> memcpy-power7 memcpy-a2 memcpy-power6 \ >> strnlen-power8 strnlen-power7 strnlen-ppc64 \ >> strcasecmp-power7 strcasecmp_l-power7 \ >> strncase-power7 strncase_l-power7 \ >> - strncmp-power8 strncmp-power7 \ >> - strncmp-power4 strncmp-ppc64 \ >> + strncmp-power8 strncmp-power7 strncmp-ppc64 \ >> strchr-power8 strchr-power7 strchr-ppc64 \ >> strchrnul-power8 strchrnul-power7 strchrnul-ppc64 \ >> strcpy-power8 strcpy-power7 strcpy-ppc64 stpcpy-power8 \ >> diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c >> b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c >> index 5a3c7a5886..238f784146 100644 >> --- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c >> +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c >> @@ -171,8 +171,6 @@ __libc_ifunc_impl_list (const char *name, struct >> libc_ifunc_impl *array, >> __strncmp_power8) >> IFUNC_IMPL_ADD (array, i, strncmp, hwcap & >> PPC_FEATURE_ARCH_2_06, >> __strncmp_power7) >> - IFUNC_IMPL_ADD (array, i, strncmp, hwcap & >> PPC_FEATURE_POWER4, >> - __strncmp_power4) >> IFUNC_IMPL_ADD (array, i, strncmp, 1, >> __strncmp_ppc)) >> >> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S >> b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S >> deleted file mode 100644 >> index df6ad6ae9c..0000000000 >> --- a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S >> +++ /dev/null >> @@ -1,23 +0,0 @@ >> -/* Copyright (C) 2013-2022 Free Software Foundation, Inc. >> - This file is part of the GNU C Library. >> - >> - The GNU C Library is free software; you can redistribute it and/or >> - modify it under the terms of the GNU Lesser General Public >> - License as published by the Free Software Foundation; either >> - version 2.1 of the License, or (at your option) any later version. >> - >> - The GNU C Library is distributed in the hope that it will be useful, >> - but WITHOUT ANY WARRANTY; without even the implied warranty of >> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> - Lesser General Public License for more details. >> - >> - You should have received a copy of the GNU Lesser General Public >> - License along with the GNU C Library; if not, see >> - <https://www.gnu.org/licenses/>. */ >> - >> -#define STRNCMP __strncmp_power4 >> - >> -#undef libc_hidden_builtin_def >> -#define libc_hidden_builtin_def(name) >> - >> -#include <sysdeps/powerpc/powerpc64/power4/strncmp.S> >> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c >> b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c >> index da3de73c27..7400ed3b9a 100644 >> --- a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c >> +++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c >> @@ -26,7 +26,6 @@ >> # include "init-arch.h" >> >> extern __typeof (strncmp) __strncmp_ppc attribute_hidden; >> -extern __typeof (strncmp) __strncmp_power4 attribute_hidden; >> extern __typeof (strncmp) __strncmp_power7 attribute_hidden; >> extern __typeof (strncmp) __strncmp_power8 attribute_hidden; >> # ifdef __LITTLE_ENDIAN__ >> @@ -46,7 +45,5 @@ libc_ifunc_redirected (__redirect_strncmp, strncmp, >> ? __strncmp_power8 >> : (hwcap & PPC_FEATURE_ARCH_2_06) >> ? __strncmp_power7 >> - : (hwcap & PPC_FEATURE_POWER4) >> - ? __strncmp_power4 >> : __strncmp_ppc); > Minor nit, does the spacing of __strcncmp_ppc need updating here too? Ack.
On 11/30/22 7:49 AM, Adhemerval Zanella Netto wrote: > > > On 29/11/22 19:37, Paul E Murphy via Libc-alpha wrote: >> >> >> On 11/29/22 10:02 AM, Rajalakshmi Srinivasaraghavan via Libc-alpha wrote: >>> This patch removes the power4 strncmp optimization for powerpc64 and uses >>> __strncmp_ppc implementation instead. Currently, both power4 and ppc >>> IFUNC variants result in similar performance. >> What cpu was used to test performance? I'd be curious if this holds for something like the venerable ppc970. > > > Do we really care, specially because I am not sure how it was evaluated on > power4 chip? On the merits of performance, I don't think this is sufficient justification. We should be more detailed. If the justification is tidying up, this patch is OK. That should be made explicit in the commit message. It does make me wonder, when is it OK to drop cpu specific optimizations? I suspect there aren't many running a modern BE ppc distro with P7 or older.
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile index 6f2436b660..ed25e234ba 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile @@ -12,8 +12,7 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \ strnlen-power8 strnlen-power7 strnlen-ppc64 \ strcasecmp-power7 strcasecmp_l-power7 \ strncase-power7 strncase_l-power7 \ - strncmp-power8 strncmp-power7 \ - strncmp-power4 strncmp-ppc64 \ + strncmp-power8 strncmp-power7 strncmp-ppc64 \ strchr-power8 strchr-power7 strchr-ppc64 \ strchrnul-power8 strchrnul-power7 strchrnul-ppc64 \ strcpy-power8 strcpy-power7 strcpy-ppc64 stpcpy-power8 \ diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c index 5a3c7a5886..238f784146 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c @@ -171,8 +171,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __strncmp_power8) IFUNC_IMPL_ADD (array, i, strncmp, hwcap & PPC_FEATURE_ARCH_2_06, __strncmp_power7) - IFUNC_IMPL_ADD (array, i, strncmp, hwcap & PPC_FEATURE_POWER4, - __strncmp_power4) IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_ppc)) diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S deleted file mode 100644 index df6ad6ae9c..0000000000 --- a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S +++ /dev/null @@ -1,23 +0,0 @@ -/* Copyright (C) 2013-2022 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#define STRNCMP __strncmp_power4 - -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(name) - -#include <sysdeps/powerpc/powerpc64/power4/strncmp.S> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c index da3de73c27..7400ed3b9a 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c @@ -26,7 +26,6 @@ # include "init-arch.h" extern __typeof (strncmp) __strncmp_ppc attribute_hidden; -extern __typeof (strncmp) __strncmp_power4 attribute_hidden; extern __typeof (strncmp) __strncmp_power7 attribute_hidden; extern __typeof (strncmp) __strncmp_power8 attribute_hidden; # ifdef __LITTLE_ENDIAN__ @@ -46,7 +45,5 @@ libc_ifunc_redirected (__redirect_strncmp, strncmp, ? __strncmp_power8 : (hwcap & PPC_FEATURE_ARCH_2_06) ? __strncmp_power7 - : (hwcap & PPC_FEATURE_POWER4) - ? __strncmp_power4 : __strncmp_ppc); #endif diff --git a/sysdeps/powerpc/powerpc64/power4/strncmp.S b/sysdeps/powerpc/powerpc64/power4/strncmp.S deleted file mode 100644 index 8c82518504..0000000000 --- a/sysdeps/powerpc/powerpc64/power4/strncmp.S +++ /dev/null @@ -1,225 +0,0 @@ -/* Optimized strcmp implementation for PowerPC64. - Copyright (C) 2003-2022 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -#ifndef STRNCMP -# define STRNCMP strncmp -#endif - -/* See strlen.s for comments on how the end-of-string testing works. */ - -/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */ - -ENTRY_TOCLESS (STRNCMP, 4) - CALL_MCOUNT 3 - -#define rTMP2 r0 -#define rRTN r3 -#define rSTR1 r3 /* first string arg */ -#define rSTR2 r4 /* second string arg */ -#define rN r5 /* max string length */ -#define rWORD1 r6 /* current word in s1 */ -#define rWORD2 r7 /* current word in s2 */ -#define rWORD3 r10 -#define rWORD4 r11 -#define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ -#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */ -#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ -#define rBITDIF r11 /* bits that differ in s1 & s2 words */ -#define rTMP r12 - - dcbt 0,rSTR1 - or rTMP, rSTR2, rSTR1 - lis r7F7F, 0x7f7f - dcbt 0,rSTR2 - clrldi. rTMP, rTMP, 61 - cmpldi cr1, rN, 0 - lis rFEFE, -0x101 - bne L(unaligned) -/* We are doubleword aligned so set up for two loops. first a double word - loop, then fall into the byte loop if any residual. */ - srdi. rTMP, rN, 3 - clrldi rN, rN, 61 - addi rFEFE, rFEFE, -0x101 - addi r7F7F, r7F7F, 0x7f7f - cmpldi cr1, rN, 0 - beq L(unaligned) - - mtctr rTMP /* Power4 wants mtctr 1st in dispatch group. */ - ld rWORD1, 0(rSTR1) - ld rWORD2, 0(rSTR2) - sldi rTMP, rFEFE, 32 - insrdi r7F7F, r7F7F, 32, 0 - add rFEFE, rFEFE, rTMP - b L(g1) - -L(g0): - ldu rWORD1, 8(rSTR1) - bne- cr1, L(different) - ldu rWORD2, 8(rSTR2) -L(g1): add rTMP, rFEFE, rWORD1 - nor rNEG, r7F7F, rWORD1 - bdz L(tail) - and. rTMP, rTMP, rNEG - cmpd cr1, rWORD1, rWORD2 - beq+ L(g0) - -/* OK. We've hit the end of the string. We need to be careful that - we don't compare two strings as different because of gunk beyond - the end of the strings... */ - -#ifdef __LITTLE_ENDIAN__ -L(endstring): - addi rTMP2, rTMP, -1 - beq cr1, L(equal) - andc rTMP2, rTMP2, rTMP - rldimi rTMP2, rTMP2, 1, 0 - and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ - and rWORD1, rWORD1, rTMP2 - cmpd cr1, rWORD1, rWORD2 - beq cr1, L(equal) - xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ - neg rNEG, rBITDIF - and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ - cntlzd rNEG, rNEG /* bitcount of the bit. */ - andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ - sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ - sld rWORD2, rWORD2, rNEG - xor. rBITDIF, rWORD1, rWORD2 - sub rRTN, rWORD1, rWORD2 - blt- L(highbit) - sradi rRTN, rRTN, 63 /* must return an int. */ - ori rRTN, rRTN, 1 - blr -L(equal): - li rRTN, 0 - blr - -L(different): - ld rWORD1, -8(rSTR1) - xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ - neg rNEG, rBITDIF - and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ - cntlzd rNEG, rNEG /* bitcount of the bit. */ - andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ - sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ - sld rWORD2, rWORD2, rNEG - xor. rBITDIF, rWORD1, rWORD2 - sub rRTN, rWORD1, rWORD2 - blt- L(highbit) - sradi rRTN, rRTN, 63 - ori rRTN, rRTN, 1 - blr -L(highbit): - sradi rRTN, rWORD2, 63 - ori rRTN, rRTN, 1 - blr - -#else -L(endstring): - and rTMP, r7F7F, rWORD1 - beq cr1, L(equal) - add rTMP, rTMP, r7F7F - xor. rBITDIF, rWORD1, rWORD2 - andc rNEG, rNEG, rTMP - blt- L(highbit) - cntlzd rBITDIF, rBITDIF - cntlzd rNEG, rNEG - addi rNEG, rNEG, 7 - cmpd cr1, rNEG, rBITDIF - sub rRTN, rWORD1, rWORD2 - blt- cr1, L(equal) - sradi rRTN, rRTN, 63 /* must return an int. */ - ori rRTN, rRTN, 1 - blr -L(equal): - li rRTN, 0 - blr - -L(different): - ld rWORD1, -8(rSTR1) - xor. rBITDIF, rWORD1, rWORD2 - sub rRTN, rWORD1, rWORD2 - blt- L(highbit) - sradi rRTN, rRTN, 63 - ori rRTN, rRTN, 1 - blr -L(highbit): - sradi rRTN, rWORD2, 63 - ori rRTN, rRTN, 1 - blr -#endif - -/* Oh well. In this case, we just do a byte-by-byte comparison. */ - .align 4 -L(tail): - and. rTMP, rTMP, rNEG - cmpd cr1, rWORD1, rWORD2 - bne- L(endstring) - addi rSTR1, rSTR1, 8 - bne- cr1, L(different) - addi rSTR2, rSTR2, 8 - cmpldi cr1, rN, 0 -L(unaligned): - mtctr rN /* Power4 wants mtctr 1st in dispatch group */ - ble cr1, L(ux) -L(uz): - lbz rWORD1, 0(rSTR1) - lbz rWORD2, 0(rSTR2) - .align 4 -L(u1): - cmpdi cr1, rWORD1, 0 - bdz L(u4) - cmpd rWORD1, rWORD2 - beq- cr1, L(u4) - bne- L(u4) - lbzu rWORD3, 1(rSTR1) - lbzu rWORD4, 1(rSTR2) - cmpdi cr1, rWORD3, 0 - bdz L(u3) - cmpd rWORD3, rWORD4 - beq- cr1, L(u3) - bne- L(u3) - lbzu rWORD1, 1(rSTR1) - lbzu rWORD2, 1(rSTR2) - cmpdi cr1, rWORD1, 0 - bdz L(u4) - cmpd rWORD1, rWORD2 - beq- cr1, L(u4) - bne- L(u4) - lbzu rWORD3, 1(rSTR1) - lbzu rWORD4, 1(rSTR2) - cmpdi cr1, rWORD3, 0 - bdz L(u3) - cmpd rWORD3, rWORD4 - beq- cr1, L(u3) - bne- L(u3) - lbzu rWORD1, 1(rSTR1) - lbzu rWORD2, 1(rSTR2) - b L(u1) - -L(u3): sub rRTN, rWORD3, rWORD4 - blr -L(u4): sub rRTN, rWORD1, rWORD2 - blr -L(ux): - li rRTN, 0 - blr -END (STRNCMP) -libc_hidden_builtin_def (strncmp)