From patchwork Wed Dec 21 23:05:52 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 707996 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 3tkVgV4X5Vz9t14 for ; Thu, 22 Dec 2016 10:08:06 +1100 (AEDT) Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; secure) header.d=sourceware.org header.i=@sourceware.org header.b="ZIgddtPd"; dkim-atps=neutral DomainKey-Signature: a=rsa-sha1; c=nofws; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:from:to:subject:date:message-id:in-reply-to :references; q=dns; s=default; b=mcQKyUO3hUhKXRXsowfi+c9iRTjU6DW ZqhqPNS8tbBk1ob4ngNlwuv6KGRDprjNEFV8LVpKBVnOGnEPmRCWmTlNch+YSGZP XzJjxh0oeP8zha4Wkr1V2BMiifsxh/ihyMucIjT8IBtyfEPp5MTb9MrGlVC1ZfuJ x1DJDfjZSzNE= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:from:to:subject:date:message-id:in-reply-to :references; s=default; bh=Pg/NwqWcU5/Jz+IKEb2rUEQ8LOo=; b=ZIgdd tPdYOlzKOwaKuvJQWe2m14TzTphJZJ6iahcQN1Nh1pnwC3D6yZ1FiJ8x2Gy7dlyK 5pmB6hVJPqlBG4bJnf4oBl3j46AmvGvKxi14I3CUCiwHOvLNH6cBoC/85FjvGQSZ ypYbhNP/dtaIkP7MisLdUNsG32IS41qMOd3Iuo= Received: (qmail 51843 invoked by alias); 21 Dec 2016 23:06:23 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 51600 invoked by uid 89); 21 Dec 2016 23:06:22 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.6 required=5.0 tests=BAYES_00, FREEMAIL_ENVFROM_END_DIGIT, FREEMAIL_FROM, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=no version=3.3.2 spammy=0x80, caveats, 2090 X-HELO: mail-pg0-f65.google.com X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:sender:from:to:subject:date:message-id :in-reply-to:references; bh=NHY/cK5Rllv6EwzLyVIqycoAxV59nKLDWBHAXO4qZUY=; b=KFXDflZbNSdr+RIuItgSx+NQVKVPogDulBQ/ULz0JZLYFhXtUYOAYD6qQDkt1RtCJi pOPhfUIAb1aQ6+N/vnv19mxLqCzQpMfaQWkAqTrqe7PH/Ec+1+G+mCQujPLho82r0DPy 8fop5IWkafEGMiEarHhR/TocvRSbFekxXUcU37ppbuJa6MZ7bQMlCDmu0n0/xEeqxvzv bBNZndK/lz4Wd3oKZTWHi2tPQnT6txj9CyRtChs/J9rvP4L2i82fzDkvrC2tRLmF2bmY bDjtQD/RfowGi681Sp7SgLg4zvPQqXXxxJ0x4AMDo0Ntz11TdNn6oeOAr4fqWY8NDfEq QkQw== X-Gm-Message-State: AIkVDXL3WjTOQIj7E1LLOuYWKd8l0N3Ey71+DvS76aXJjVpplv+qAFYC+juyd9t/rcBDqQ== X-Received: by 10.98.152.212 with SMTP id d81mr6294827pfk.12.1482361570124; Wed, 21 Dec 2016 15:06:10 -0800 (PST) From: Richard Henderson To: libc-alpha@sourceware.org Subject: [PATCH v2 03/16] Improve generic strlen Date: Wed, 21 Dec 2016 15:05:52 -0800 Message-Id: <20161221230605.28638-4-rth@twiddle.net> In-Reply-To: <20161221230605.28638-1-rth@twiddle.net> References: <20161221230605.28638-1-rth@twiddle.net> Extract has_zero and index_first_zero tests into headers that can be tailored for the architecture. [BZ #5806] * sysdeps/generic/string-fza.h: New file. * sysdeps/generic/string-fzb.h: New file. * sysdeps/generic/string-fzi.h: New file. * sysdeps/generic/string-extbyte.h: New file. * string/strlen.c: Use them. --- string/strlen.c | 89 ++++++------------------ sysdeps/generic/string-extbyte.h | 35 ++++++++++ sysdeps/generic/string-fza.h | 117 +++++++++++++++++++++++++++++++ sysdeps/generic/string-fzb.h | 49 +++++++++++++ sysdeps/generic/string-fzi.h | 146 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 369 insertions(+), 67 deletions(-) create mode 100644 sysdeps/generic/string-extbyte.h create mode 100644 sysdeps/generic/string-fza.h create mode 100644 sysdeps/generic/string-fzb.h create mode 100644 sysdeps/generic/string-fzi.h diff --git a/string/strlen.c b/string/strlen.c index 4943ce2..4aa95d5 100644 --- a/string/strlen.c +++ b/string/strlen.c @@ -20,90 +20,45 @@ #include #include +#include +#include +#include #undef strlen -#ifndef STRLEN -# define STRLEN strlen +#ifdef STRLEN +# define strlen STRLEN #endif /* Return the length of the null-terminated string STR. Scan for the null terminator quickly by testing four bytes at a time. */ size_t -STRLEN (const char *str) +strlen (const char *str) { - const char *char_ptr; - const unsigned long int *longword_ptr; - unsigned long int longword, himagic, lomagic; + const char *char_ptr = str; + const op_t *word_ptr; + op_t word; + uintptr_t i, align; /* Handle the first few characters by reading one character at a time. Do this until CHAR_PTR is aligned on a longword boundary. */ - for (char_ptr = str; ((unsigned long int) char_ptr - & (sizeof (longword) - 1)) != 0; - ++char_ptr) + align = -(uintptr_t)char_ptr % sizeof(word); + for (i = 0; i < align; ++i, ++char_ptr) if (*char_ptr == '\0') return char_ptr - str; - /* All these elucidatory comments refer to 4-byte longwords, - but the theory applies equally well to 8-byte longwords. */ - - longword_ptr = (unsigned long int *) char_ptr; - - /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits - the "holes." Note that there is a hole just to the left of - each byte, with an extra at the end: - - bits: 01111110 11111110 11111110 11111111 - bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD - - The 1-bits make sure that carries propagate to the next 0-bit. - The 0-bits provide holes for carries to fall into. */ - himagic = 0x80808080L; - lomagic = 0x01010101L; - if (sizeof (longword) > 4) + word_ptr = (const op_t *) char_ptr; + do { - /* 64-bit version of the magic. */ - /* Do the shift in two steps to avoid a warning if long has 32 bits. */ - himagic = ((himagic << 16) << 16) | himagic; - lomagic = ((lomagic << 16) << 16) | lomagic; + word = *word_ptr++; } - if (sizeof (longword) > 8) - abort (); - - /* Instead of the traditional loop which tests each character, - we will test a longword at a time. The tricky part is testing - if *any of the four* bytes in the longword in question are zero. */ - for (;;) - { - longword = *longword_ptr++; + while (!has_zero (word)); - if (((longword - lomagic) & ~longword & himagic) != 0) - { - /* Which of the bytes was the zero? If none of them were, it was - a misfire; continue the search. */ - - const char *cp = (const char *) (longword_ptr - 1); - - if (cp[0] == 0) - return cp - str; - if (cp[1] == 0) - return cp - str + 1; - if (cp[2] == 0) - return cp - str + 2; - if (cp[3] == 0) - return cp - str + 3; - if (sizeof (longword) > 4) - { - if (cp[4] == 0) - return cp - str + 4; - if (cp[5] == 0) - return cp - str + 5; - if (cp[6] == 0) - return cp - str + 6; - if (cp[7] == 0) - return cp - str + 7; - } - } - } + char_ptr = (const char *) (word_ptr - 1); + char_ptr += index_first_zero (word); + return char_ptr - str; } + +#ifndef STRLEN libc_hidden_builtin_def (strlen) +#endif diff --git a/sysdeps/generic/string-extbyte.h b/sysdeps/generic/string-extbyte.h new file mode 100644 index 0000000..1ccd5b3 --- /dev/null +++ b/sysdeps/generic/string-extbyte.h @@ -0,0 +1,35 @@ +/* string-extbyte.h -- function memory order byte extract. Generic C version. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef STRING_EXTBYTE_H +#define STRING_EXTBYTE_H 1 + +#include +#include +#include + +static inline unsigned char +extractbyte (op_t x, unsigned idx) +{ + if (__BYTE_ORDER == __LITTLE_ENDIAN) + return x >> (idx * CHAR_BIT); + else + return x >> (sizeof (x) - 1 - idx) * CHAR_BIT; +} + +#endif /* STRING_EXTBYTE_H */ diff --git a/sysdeps/generic/string-fza.h b/sysdeps/generic/string-fza.h new file mode 100644 index 0000000..638df2e --- /dev/null +++ b/sysdeps/generic/string-fza.h @@ -0,0 +1,117 @@ +/* string-fza.h -- zero byte detection; basics. Generic C version. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef STRING_FZA_H +#define STRING_FZA_H 1 + +#include +#include + +/* This function returns non-zero if any byte in X is zero. + More specifically, at least one bit set within the least significant + byte that was zero; other bytes within the word are indeterminate. */ + +static inline op_t +find_zero_low (op_t x) +{ + /* This expression comes from + https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord + Subtracting 1 sets 0x80 in a byte that was 0; anding ~x clears + 0x80 in a byte that was >= 128; anding 0x80 isolates that test bit. */ + op_t lsb = (op_t)-1 / 0xff; + op_t msb = lsb << (CHAR_BIT - 1); + return (x - lsb) & ~x & msb; +} + +/* This function returns at least one bit set within every byte of X that + is zero. The result is exact in that, unlike find_zero_low, all bytes + are determinate. This is usually used for finding the index of the + most significant byte that was zero. */ + +static inline op_t +find_zero_all (op_t x) +{ + /* For each byte, find not-zero by + (0) And 0x7f so that we cannot carry between bytes, + (1) Add 0x7f so that non-zero carries into 0x80, + (2) Or in the original byte (which might have had 0x80 set). + Then invert and mask such that 0x80 is set iff that byte was zero. */ + op_t m = ((op_t)-1 / 0xff) * 0x7f; + return ~(((x & m) + m) | x | m); +} + +/* With similar caveats, identify bytes that are equal between X1 and X2. */ + +static inline op_t +find_eq_low (op_t x1, op_t x2) +{ + return find_zero_low (x1 ^ x2); +} + +static inline op_t +find_eq_all (op_t x1, op_t x2) +{ + return find_zero_all (x1 ^ x2); +} + +/* With similar caveats, identify zero bytes in X1 and bytes that are + equal between in X1 and X2. */ + +static inline op_t +find_zero_eq_low (op_t x1, op_t x2) +{ + op_t lsb = (op_t)-1 / 0xff; + op_t msb = lsb << (CHAR_BIT - 1); + op_t eq = x1 ^ x2; + return (((x1 - lsb) & ~x1) | ((eq - lsb) & ~eq)) & msb; +} + +static inline op_t +find_zero_eq_all (op_t x1, op_t x2) +{ + op_t m = ((op_t)-1 / 0xff) * 0x7f; + op_t eq = x1 ^ x2; + op_t c1 = ((x1 & m) + m) | x1; + op_t c2 = ((eq & m) + m) | eq; + return ~((c1 & c2) | m); +} + +/* With similar caveats, identify zero bytes in X1 and bytes that are + not equal between in X1 and X2. */ + +static inline op_t +find_zero_ne_low (op_t x1, op_t x2) +{ + op_t m = ((op_t)-1 / 0xff) * 0x7f; + op_t eq = x1 ^ x2; + op_t nz1 = (x1 + m) | x1; /* msb set if byte not zero */ + op_t ne2 = (eq + m) | eq; /* msb set if byte not equal */ + return (ne2 | ~nz1) & ~m; /* msb set if x1 zero or x2 not equal */ +} + +static inline op_t +find_zero_ne_all (op_t x1, op_t x2) +{ + op_t m = ((op_t)-1 / 0xff) * 0x7f; + op_t eq = x1 ^ x2; + op_t nz1 = ((x1 & m) + m) | x1; + op_t ne2 = ((eq & m) + m) | eq; + return (ne2 | ~nz1) & ~m; +} + +#endif /* STRING_FZA_H */ diff --git a/sysdeps/generic/string-fzb.h b/sysdeps/generic/string-fzb.h new file mode 100644 index 0000000..e0fc26f --- /dev/null +++ b/sysdeps/generic/string-fzb.h @@ -0,0 +1,49 @@ +/* string-fzb.h -- zero byte detection, boolean. Generic C version. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef STRING_FZB_H +#define STRING_FZB_H 1 + +#include +#include + +/* Determine if any byte within X is zero. This is a pure boolean test. */ + +static inline _Bool +has_zero (op_t x) +{ + return find_zero_low (x) != 0; +} + +/* Likewise, but for byte equality between X1 and X2. */ + +static inline _Bool +has_eq (op_t x1, op_t x2) +{ + return find_eq_low (x1, x2) != 0; +} + +/* Likewise, but for zeros in X1 and equal bytes between X1 and X2. */ + +static inline _Bool +has_zero_eq (op_t x1, op_t x2) +{ + return find_zero_eq_low (x1, x2); +} + +#endif /* STRING_FZB_H */ diff --git a/sysdeps/generic/string-fzi.h b/sysdeps/generic/string-fzi.h new file mode 100644 index 0000000..ea2408f --- /dev/null +++ b/sysdeps/generic/string-fzi.h @@ -0,0 +1,146 @@ +/* string-fzi.h -- zero byte detection; indexes. Generic C version. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef STRING_FZI_H +#define STRING_FZI_H 1 + +#include +#include +#include + +/* A subroutine for the index_zero functions. Given a test word C, return + the (memory order) index of the first byte (in memory order) that is + non-zero. */ + +static inline unsigned int +index_first_ (op_t c) +{ + _Static_assert (sizeof (op_t) == sizeof (long) + || sizeof (op_t) == sizeof (long long), + "Unhandled word size"); + + unsigned r; + if (__BYTE_ORDER == __LITTLE_ENDIAN) + { + if (sizeof (op_t) == sizeof (long)) + r = __builtin_ctzl (c); + else + r = __builtin_ctzll (c); + } + else + { + if (sizeof (op_t) == sizeof (long)) + r = __builtin_clzl (c); + else + r = __builtin_clzll (c); + } + return r / CHAR_BIT; +} + +/* Similarly, but return the (memory order) index of the last byte + that is non-zero. */ + +static inline unsigned int +index_last_ (op_t c) +{ + _Static_assert (sizeof (op_t) == sizeof (long) + || sizeof (op_t) == sizeof (long long), + "Unhandled word size"); + + unsigned r; + if (__BYTE_ORDER == __LITTLE_ENDIAN) + { + if (sizeof (op_t) == sizeof (long)) + r = __builtin_clzl (c); + else + r = __builtin_clzll (c); + } + else + { + if (sizeof (op_t) == sizeof (long)) + r = __builtin_ctzl (c); + else + r = __builtin_ctzll (c); + } + return sizeof (op_t) - 1 - (r / CHAR_BIT); +} + +/* Given a word X that is known to contain a zero byte, return the + index of the first such within the word in memory order. */ + +static inline unsigned int +index_first_zero (op_t x) +{ + if (__BYTE_ORDER == __LITTLE_ENDIAN) + x = find_zero_low (x); + else + x = find_zero_all (x); + return index_first_ (x); +} + +/* Similarly, but perform the search for byte equality between X1 and X2. */ + +static inline unsigned int +index_first_eq (op_t x1, op_t x2) +{ + if (__BYTE_ORDER == __LITTLE_ENDIAN) + x1 = find_eq_low (x1, x2); + else + x1 = find_eq_all (x1, x2); + return index_first_ (x1); +} + +/* Similarly, but perform the search for zero within X1 or + equality between X1 and X2. */ + +static inline unsigned int +index_first_zero_eq (op_t x1, op_t x2) +{ + if (__BYTE_ORDER == __LITTLE_ENDIAN) + x1 = find_zero_eq_low (x1, x2); + else + x1 = find_zero_eq_all (x1, x2); + return index_first_ (x1); +} + +/* Similarly, but perform the search for zero within X1 or + inequality between X1 and X2. */ + +static inline unsigned int +index_first_zero_ne (op_t x1, op_t x2) +{ + if (__BYTE_ORDER == __LITTLE_ENDIAN) + x1 = find_zero_ne_low (x1, x2); + else + x1 = find_zero_ne_all (x1, x2); + return index_first_ (x1); +} + +/* Similarly, but search for the last zero within X. */ + +static inline unsigned int +index_last_zero (op_t x) +{ + if (__BYTE_ORDER == __LITTLE_ENDIAN) + x = find_zero_all (x); + else + x = find_zero_low (x); + return index_last_ (x); +} + +#endif /* STRING_FZI_H */