From patchwork Fri Jun 26 11:51:51 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Stefan Liebler X-Patchwork-Id: 488785 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 95559140273 for ; Fri, 26 Jun 2015 22:02:19 +1000 (AEST) Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=sourceware.org header.i=@sourceware.org header.b=tnHZ1Toh; dkim-atps=neutral DomainKey-Signature: a=rsa-sha1; c=nofws; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:from:to:cc:subject:date:message-id:in-reply-to :references; q=dns; s=default; b=EYQTp46O2wqpNNU2Te5prlD1y8j6KMC HC/b16cofbJrcihjIkB0HhrWdASQ3MJUgZEBu5KjIJxEP0WNdrLPvuLV+9ROr1lU ZlhfGMM50ryNikJDkhdOBtkZZktYEl+l+OJ9LY3kNhBYB7VenDEtPPRtIqK5wljY dycU1BtTKRWw= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:from:to:cc:subject:date:message-id:in-reply-to :references; s=default; bh=WwnHzlH8ckk5MWD+Gzc+EobMovs=; b=tnHZ1 TohJQQZSiVIRltYJc7rRI9WOJlzYLyI74HIHNkOScgLyuTD3wWTJR4Gb8M39U7g4 rfRTXsSet9+/kqeYXntC3Vx2MsGanFBFiyro9vK5Z4d5CZrtA9x8BwK4qXiWonHT CpmVTTc1ke3pzRKf+X9w+XVRkSs3CIp57v1e80= Received: (qmail 53269 invoked by alias); 26 Jun 2015 12:02:12 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 53248 invoked by uid 89); 26 Jun 2015 12:02:11 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.0 required=5.0 tests=AWL, BAYES_00, KAM_LAZY_DOMAIN_SECURITY, RP_MATCHES_RCVD autolearn=ham version=3.3.2 X-HELO: e06smtp10.uk.ibm.com X-MailFrom: stli@linux.vnet.ibm.com X-RcptTo: libc-alpha@sourceware.org From: Stefan Liebler To: libc-alpha@sourceware.org Cc: Stefan Liebler Subject: [PATCH 26/27] S390: Optimize wmemcmp. Date: Fri, 26 Jun 2015 13:51:51 +0200 Message-Id: <1435319512-22245-27-git-send-email-stli@linux.vnet.ibm.com> In-Reply-To: <1435319512-22245-1-git-send-email-stli@linux.vnet.ibm.com> References: <1435319512-22245-1-git-send-email-stli@linux.vnet.ibm.com> X-TM-AS-MML: disable X-Content-Scanned: Fidelis XPS MAILER x-cbid: 15062611-0041-0000-0000-000004BCB3EC This patch provides optimized version of wmemcmp with the z13 vector instructions. ChangeLog: * sysdeps/s390/multiarch/wmemcmp-c.c: New File. * sysdeps/s390/multiarch/wmemcmp-vx.S: Likewise. * sysdeps/s390/multiarch/wmemcmp.c: Likewise. * sysdeps/s390/multiarch/Makefile (sysdep_routines): Add wmemcmp functions. * sysdeps/s390/multiarch/ifunc-impl-list-common.c (__libc_ifunc_impl_list_common): Add ifunc test for wmemcmp. * benchtests/bench-wmemcmp.c: New File. * benchtests/Makefile (wcsmbs-bench): Add wmemcmp. --- benchtests/Makefile | 2 +- benchtests/bench-wmemcmp.c | 20 +++++ sysdeps/s390/multiarch/Makefile | 3 +- sysdeps/s390/multiarch/ifunc-impl-list.c | 3 + sysdeps/s390/multiarch/wmemcmp-c.c | 26 ++++++ sysdeps/s390/multiarch/wmemcmp-vx.S | 149 +++++++++++++++++++++++++++++++ sysdeps/s390/multiarch/wmemcmp.c | 27 ++++++ 7 files changed, 228 insertions(+), 2 deletions(-) create mode 100644 benchtests/bench-wmemcmp.c create mode 100644 sysdeps/s390/multiarch/wmemcmp-c.c create mode 100644 sysdeps/s390/multiarch/wmemcmp-vx.S create mode 100644 sysdeps/s390/multiarch/wmemcmp.c diff --git a/benchtests/Makefile b/benchtests/Makefile index 28a5170..dc2a5df 100644 --- a/benchtests/Makefile +++ b/benchtests/Makefile @@ -38,7 +38,7 @@ string-bench := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \ strcoll wcsmbs-bench := wcslen wcsnlen wcscpy wcpcpy wcsncpy wcpncpy wcscat wcsncat \ wcscmp wcsncmp wcschr wcschrnul wcsrchr wcsspn wcspbrk wcscspn \ - wmemchr wmemset + wmemchr wmemset wmemcmp string-bench-all := $(string-bench) ${wcsmbs-bench} # We have to generate locales diff --git a/benchtests/bench-wmemcmp.c b/benchtests/bench-wmemcmp.c new file mode 100644 index 0000000..8b33f89 --- /dev/null +++ b/benchtests/bench-wmemcmp.c @@ -0,0 +1,20 @@ +/* Measure wmemcmp functions. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define WIDE 1 +#include "bench-memcmp.c" diff --git a/sysdeps/s390/multiarch/Makefile b/sysdeps/s390/multiarch/Makefile index eac88e0..929a545 100644 --- a/sysdeps/s390/multiarch/Makefile +++ b/sysdeps/s390/multiarch/Makefile @@ -38,5 +38,6 @@ sysdep_routines += wcslen wcslen-vx wcslen-c \ wcspbrk wcspbrk-vx wcspbrk-c \ wcscspn wcscspn-vx wcscspn-c \ wmemchr wmemchr-vx wmemchr-c \ - wmemset wmemset-vx wmemset-c + wmemset wmemset-vx wmemset-c \ + wmemcmp wmemcmp-vx wmemcmp-c endif diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c index 93789ac..5ea258b 100644 --- a/sysdeps/s390/multiarch/ifunc-impl-list.c +++ b/sysdeps/s390/multiarch/ifunc-impl-list.c @@ -134,6 +134,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_VX_IMPL (memccpy); IFUNC_VX_IMPL (wmemset); + + IFUNC_VX_IMPL (wmemcmp); + #endif /* HAVE_S390_VX_ASM_SUPPORT */ return i; diff --git a/sysdeps/s390/multiarch/wmemcmp-c.c b/sysdeps/s390/multiarch/wmemcmp-c.c new file mode 100644 index 0000000..7ca9f46 --- /dev/null +++ b/sysdeps/s390/multiarch/wmemcmp-c.c @@ -0,0 +1,26 @@ +/* Default wmemcmp implementation for S/390. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WMEMCMP __wmemcmp_c + +# include +extern __typeof (wmemcmp) __wmemcmp_c; + +# include +#endif diff --git a/sysdeps/s390/multiarch/wmemcmp-vx.S b/sysdeps/s390/multiarch/wmemcmp-vx.S new file mode 100644 index 0000000..eabfd2b --- /dev/null +++ b/sysdeps/s390/multiarch/wmemcmp-vx.S @@ -0,0 +1,149 @@ +/* Vector Optimized 32/64 bit S/390 version of wmemcmp. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* int wmemcmp (const wchar_t *s1, const wchar_t *s2, size_t n) + Compare at most n characters of two wchar_t-arrays. + + Register usage: + -r0=tmp + -r1=number of blocks + -r2=s1 + -r3=s2 + -r4=n + -r5=current_len + -v16=part of s1 + -v17=part of s2 + -v18=index of unequal +*/ +ENTRY(__wmemcmp_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + clgije %r4,0,.Lend_equal /* Nothing to do if n == 0. */ + + /* Check range of maxlen and convert to byte-count. */ +# ifdef __s390x__ + tmhh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + lghi %r1,-4 /* Max byte-count is 18446744073709551612. */ +# else + tmlh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + llilf %r1,4294967292 /* Max byte-count is 4294967292. */ +# endif /* !__s390x__ */ + sllg %r4,%r4,2 /* Convert character-count to byte-count. */ + locgrne %r4,%r1 /* Use max byte-count, if bit 0/1 was one. */ + + lghi %r5,0 /* current_len = 0. */ + + clgijh %r4,16,.Lgt16 + +.Lremaining: + aghi %r4,-1 /* vstl needs highest index. */ + vll %v16,%r4,0(%r2) + vll %v17,%r4,0(%r3) + vfenef %v18,%v16,%v17 /* Compare not equal. */ + vlgvb %r1,%v18,7 /* Load unequal index or 16 if not found. */ + clrj %r1,%r4,12,.Lfound2 /* r1 <= r4 -> unequal within loaded + bytes. */ + +.Lend_equal: + lghi %r2,0 + br %r14 + +.Lfound: + /* vfenezf found an unequal element or zero. + This instruction compares unsigned words, but wchar_t is signed. + Thus we have to compare the found element again. */ + vlgvb %r1,%v18,7 /* Extract not equal byte-index. */ +.Lfound2: + srl %r1,2 /* And convert it to character-index. */ + vlgvf %r0,%v16,0(%r1) /* Load character-values. */ + vlgvf %r1,%v17,0(%r1) + cr %r0,%r1 + je .Lend_equal + lghi %r2,1 + lghi %r1,-1 + locgrl %r2,%r1 + br %r14 + +.Lgt16: + clgijh %r4,64,.Lpreloop64 + +.Lpreloop16: + srlg %r1,%r4,4 /* Split into 16byte blocks */ +.Lloop16: + vl %v16,0(%r5,%r2) + vl %v17,0(%r5,%r3) + aghi %r5,16 + vfenefs %v18,%v16,%v17 /* Compare not equal. */ + jno .Lfound + brctg %r1,.Lloop16 /* Loop until all blocks are processed. */ + + llgfr %r4,%r4 + nilf %r4,15 /* Get remaining bytes */ + locgre %r2,%r4 + ber %r14 + la %r2,0(%r5,%r2) + la %r3,0(%r5,%r3) + j .Lremaining + +.Lpreloop64: + srlg %r1,%r4,6 /* Split into 64byte blocks */ +.Lloop64: + vl %v16,0(%r5,%r2) + vl %v17,0(%r5,%r3) + vfenefs %v18,%v16,%v17 /* Compare not equal. */ + jno .Lfound + + vl %v16,16(%r5,%r2) + vl %v17,16(%r5,%r3) + vfenefs %v18,%v16,%v17 + jno .Lfound + + vl %v16,32(%r5,%r2) + vl %v17,32(%r5,%r3) + vfenefs %v18,%v16,%v17 + jno .Lfound + + vl %v16,48(%r5,%r2) + vl %v17,48(%r5,%r3) + aghi %r5,64 + vfenefs %v18,%v16,%v17 + jno .Lfound + + brctg %r1,.Lloop64 /* Loop until all blocks are processed. */ + + llgfr %r4,%r4 + nilf %r4,63 /* Get remaining bytes */ + locgre %r2,%r4 + ber %r14 + clgijh %r4,16,.Lpreloop16 + la %r2,0(%r5,%r2) + la %r3,0(%r5,%r3) + j .Lremaining +END(__wmemcmp_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wmemcmp.c b/sysdeps/s390/multiarch/wmemcmp.c new file mode 100644 index 0000000..aed0ede --- /dev/null +++ b/sysdeps/s390/multiarch/wmemcmp.c @@ -0,0 +1,27 @@ +/* Multiple versions of wmemcmp. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include +# include + +s390_vx_libc_ifunc2 (__wmemcmp, wmemcmp) + +#else +# include +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */