Message ID | 20220919195920.956393-5-adhemerval.zanella@linaro.org |
---|---|
State | New |
Headers | show |
Series | Improve generic string routines | expand |
On Mon, Sep 19, 2022 at 1:02 PM Adhemerval Zanella via Libc-alpha <libc-alpha@sourceware.org> wrote: > > This patch adds generic string find and detection meant to be used in > generic vectorized string implementation. The idea is to decompose the > basic string operation so each architecture can reimplement if it > provides any specialized hardware instruction. > > The 'string-fza.h' provides zero byte detection functions (find_zero_low, > find_zero_all, find_eq_low, find_eq_all, find_zero_eq_low, find_zero_eq_all, > find_zero_ne_low, and find_zero_ne_all). They are used on both functions > provided by 'string-fzb.h' and 'string-fzi'. > > The 'string-fzb.h' provides boolean zero byte detection with the > functions: > > - has_zero: determine if any byte within a word is zero. > - has_eq: determine byte equality between two words. > - has_zero_eq: determine if any byte within a word is zero along with > byte equality between two words. > > The 'string-fzi.h' provides zero byte detection along with its positions: > > - index_first_zero: return index of first zero byte within a word. > - index_first_eq: return index of first byte different between two words. > - index_first_zero_eq: return index of first zero byte within a word or > first byte different between two words. > - index_first_zero_ne: return index of first zero byte within a word or > first byte equal between two words. > - index_last_zero: return index of last zero byte within a word. > - index_last_eq: return index of last byte different between two words. > > Co-authored-by: Richard Henderson <rth@twiddle.net> > --- > sysdeps/generic/string-extbyte.h | 37 ++++++++++ > sysdeps/generic/string-fza.h | 106 +++++++++++++++++++++++++++ > sysdeps/generic/string-fzb.h | 49 +++++++++++++ > sysdeps/generic/string-fzi.h | 120 +++++++++++++++++++++++++++++++ > 4 files changed, 312 insertions(+) > create mode 100644 sysdeps/generic/string-extbyte.h > create mode 100644 sysdeps/generic/string-fza.h > create mode 100644 sysdeps/generic/string-fzb.h > create mode 100644 sysdeps/generic/string-fzi.h > > diff --git a/sysdeps/generic/string-extbyte.h b/sysdeps/generic/string-extbyte.h > new file mode 100644 > index 0000000000..c8fecd259f > --- /dev/null > +++ b/sysdeps/generic/string-extbyte.h > @@ -0,0 +1,37 @@ > +/* Extract by from memory word. Generic C version. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#ifndef _STRING_EXTBYTE_H > +#define _STRING_EXTBYTE_H 1 > + > +#include <limits.h> > +#include <endian.h> > +#include <string-optype.h> > + > +/* Extract the byte at index IDX from word X, with index 0 being the > + least significant byte. */ > +static inline unsigned char > +extractbyte (op_t x, unsigned int idx) > +{ > + if (__BYTE_ORDER == __LITTLE_ENDIAN) > + return x >> (idx * CHAR_BIT); > + else > + return x >> (sizeof (x) - 1 - idx) * CHAR_BIT; > +} > + > +#endif /* _STRING_EXTBYTE_H */ > diff --git a/sysdeps/generic/string-fza.h b/sysdeps/generic/string-fza.h > new file mode 100644 > index 0000000000..54be34e5f0 > --- /dev/null > +++ b/sysdeps/generic/string-fza.h > @@ -0,0 +1,106 @@ > +/* Basic zero byte detection. Generic C version. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#ifndef _STRING_FZA_H > +#define _STRING_FZA_H 1 > + > +#include <limits.h> > +#include <string-optype.h> > +#include <string-maskoff.h> > + > +/* This function returns non-zero if any byte in X is zero. > + More specifically, at least one bit set within the least significant > + byte that was zero; other bytes within the word are indeterminate. */ > +static inline op_t > +find_zero_low (op_t x) > +{ > + /* This expression comes from > + https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord > + Subtracting 1 sets 0x80 in a byte that was 0; anding ~x clears > + 0x80 in a byte that was >= 128; anding 0x80 isolates that test bit. */ > + op_t lsb = repeat_bytes (0x01); > + op_t msb = repeat_bytes (0x80); > + return (x - lsb) & ~x & msb; > +} > + > +/* This function returns at least one bit set within every byte of X that > + is zero. The result is exact in that, unlike find_zero_low, all bytes > + are determinate. This is usually used for finding the index of the > + most significant byte that was zero. */ > +static inline op_t > +find_zero_all (op_t x) > +{ > + /* For each byte, find not-zero by > + (0) And 0x7f so that we cannot carry between bytes, > + (1) Add 0x7f so that non-zero carries into 0x80, > + (2) Or in the original byte (which might have had 0x80 set). > + Then invert and mask such that 0x80 is set iff that byte was zero. */ > + op_t m = ((op_t)-1 / 0xff) * 0x7f; Use repeat_byte here? > + return ~(((x & m) + m) | x | m); > +} > + > +/* With similar caveats, identify bytes that are equal between X1 and X2. */ > +static inline op_t > +find_eq_low (op_t x1, op_t x2) > +{ > + return find_zero_low (x1 ^ x2); > +} > + > +static inline op_t > +find_eq_all (op_t x1, op_t x2) > +{ > + return find_zero_all (x1 ^ x2); > +} > + > +/* With similar caveats, identify zero bytes in X1 and bytes that are > + equal between in X1 and X2. */ > +static inline op_t > +find_zero_eq_low (op_t x1, op_t x2) > +{ > + return find_zero_low (x1) | find_zero_low (x1 ^ x2); > +} > + > +static inline op_t > +find_zero_eq_all (op_t x1, op_t x2) > +{ > + return find_zero_all (x1) | find_zero_all (x1 ^ x2); > +} > + > +/* With similar caveats, identify zero bytes in X1 and bytes that are > + not equal between in X1 and X2. */ > +static inline op_t > +find_zero_ne_low (op_t x1, op_t x2) > +{ > + op_t m = repeat_bytes (0x7f); > + op_t eq = x1 ^ x2; > + op_t nz1 = (x1 + m) | x1; /* msb set if byte not zero. */ > + op_t ne2 = (eq + m) | eq; /* msb set if byte not equal. */ > + return (ne2 | ~nz1) & ~m; /* msb set if x1 zero or x2 not equal. */ > +} > + > +static inline op_t > +find_zero_ne_all (op_t x1, op_t x2) > +{ > + op_t m = repeat_bytes (0x7f); > + op_t eq = x1 ^ x2; > + op_t nz1 = ((x1 & m) + m) | x1; > + op_t ne2 = ((eq & m) + m) | eq; > + return (ne2 | ~nz1) & ~m; > +} > + > +#endif /* _STRING_FZA_H */ > diff --git a/sysdeps/generic/string-fzb.h b/sysdeps/generic/string-fzb.h > new file mode 100644 > index 0000000000..f1c0ae0922 > --- /dev/null > +++ b/sysdeps/generic/string-fzb.h > @@ -0,0 +1,49 @@ > +/* Zero byte detection, boolean. Generic C version. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#ifndef _STRING_FZB_H > +#define _STRING_FZB_H 1 > + > +#include <endian.h> > +#include <string-fza.h> > + > +/* Determine if any byte within X is zero. This is a pure boolean test. */ > + > +static inline _Bool > +has_zero (op_t x) > +{ > + return find_zero_low (x) != 0; > +} > + > +/* Likewise, but for byte equality between X1 and X2. */ > + > +static inline _Bool > +has_eq (op_t x1, op_t x2) > +{ > + return find_eq_low (x1, x2) != 0; > +} > + > +/* Likewise, but for zeros in X1 and equal bytes between X1 and X2. */ > + > +static inline _Bool > +has_zero_eq (op_t x1, op_t x2) > +{ > + return find_zero_eq_low (x1, x2); > +} > + > +#endif /* _STRING_FZB_H */ > diff --git a/sysdeps/generic/string-fzi.h b/sysdeps/generic/string-fzi.h > new file mode 100644 > index 0000000000..888e1b8baa > --- /dev/null > +++ b/sysdeps/generic/string-fzi.h > @@ -0,0 +1,120 @@ > +/* Zero byte detection; indexes. Generic C version. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#ifndef _STRING_FZI_H > +#define _STRING_FZI_H 1 > + > +#include <limits.h> > +#include <endian.h> > +#include <string-fza.h> > +#include <gmp.h> > +#include <stdlib/gmp-impl.h> > +#include <stdlib/longlong.h> > + > +/* A subroutine for the index_zero functions. Given a test word C, return > + the (memory order) index of the first byte (in memory order) that is > + non-zero. */ > +static inline unsigned int > +index_first_ (op_t c) > +{ > + int r; > + if (__BYTE_ORDER == __LITTLE_ENDIAN) > + count_trailing_zeros (r, c); > + else > + count_leading_zeros (r, c); > + return r / CHAR_BIT; > +} > + > +/* Similarly, but return the (memory order) index of the last byte that is > + non-zero. */ > +static inline unsigned int > +index_last_ (op_t c) > +{ > + int r; > + if (__BYTE_ORDER == __LITTLE_ENDIAN) > + count_leading_zeros (r, c); > + else > + count_trailing_zeros (r, c); > + return sizeof (op_t) - 1 - (r / CHAR_BIT); > +} > + > +/* Given a word X that is known to contain a zero byte, return the index of > + the first such within the word in memory order. */ > +static inline unsigned int > +index_first_zero (op_t x) > +{ > + if (__BYTE_ORDER == __LITTLE_ENDIAN) > + x = find_zero_low (x); > + else > + x = find_zero_all (x); > + return index_first_ (x); > +} > + > +/* Similarly, but perform the search for byte equality between X1 and X2. */ > +static inline unsigned int > +index_first_eq (op_t x1, op_t x2) > +{ > + if (__BYTE_ORDER == __LITTLE_ENDIAN) > + x1 = find_eq_low (x1, x2); > + else > + x1 = find_eq_all (x1, x2); > + return index_first_ (x1); > +} > + > +/* Similarly, but perform the search for zero within X1 or equality between > + X1 and X2. */ > +static inline unsigned int > +index_first_zero_eq (op_t x1, op_t x2) > +{ > + if (__BYTE_ORDER == __LITTLE_ENDIAN) > + x1 = find_zero_eq_low (x1, x2); > + else > + x1 = find_zero_eq_all (x1, x2); > + return index_first_ (x1); > +} > + > +/* Similarly, but perform the search for zero within X1 or inequality between > + X1 and X2. */ > +static inline unsigned int > +index_first_zero_ne (op_t x1, op_t x2) > +{ > + if (__BYTE_ORDER == __LITTLE_ENDIAN) > + x1 = find_zero_ne_low (x1, x2); > + else > + x1 = find_zero_ne_all (x1, x2); > + return index_first_ (x1); > +} > + > +/* Similarly, but search for the last zero within X. */ > +static inline unsigned int > +index_last_zero (op_t x) > +{ > + if (__BYTE_ORDER == __LITTLE_ENDIAN) > + x = find_zero_all (x); > + else > + x = find_zero_low (x); > + return index_last_ (x); > +} > + > +static inline unsigned int > +index_last_eq (op_t x1, op_t x2) > +{ > + return index_last_zero (x1 ^ x2); > +} > + > +#endif /* STRING_FZI_H */ > -- > 2.34.1 >
On Mon, Sep 19, 2022 at 1:02 PM Adhemerval Zanella via Libc-alpha <libc-alpha@sourceware.org> wrote: > > This patch adds generic string find and detection meant to be used in > generic vectorized string implementation. The idea is to decompose the > basic string operation so each architecture can reimplement if it > provides any specialized hardware instruction. > > The 'string-fza.h' provides zero byte detection functions (find_zero_low, > find_zero_all, find_eq_low, find_eq_all, find_zero_eq_low, find_zero_eq_all, > find_zero_ne_low, and find_zero_ne_all). They are used on both functions > provided by 'string-fzb.h' and 'string-fzi'. > > The 'string-fzb.h' provides boolean zero byte detection with the > functions: > > - has_zero: determine if any byte within a word is zero. > - has_eq: determine byte equality between two words. > - has_zero_eq: determine if any byte within a word is zero along with > byte equality between two words. > > The 'string-fzi.h' provides zero byte detection along with its positions: > > - index_first_zero: return index of first zero byte within a word. > - index_first_eq: return index of first byte different between two words. > - index_first_zero_eq: return index of first zero byte within a word or > first byte different between two words. > - index_first_zero_ne: return index of first zero byte within a word or > first byte equal between two words. > - index_last_zero: return index of last zero byte within a word. > - index_last_eq: return index of last byte different between two words. > > Co-authored-by: Richard Henderson <rth@twiddle.net> > --- > sysdeps/generic/string-extbyte.h | 37 ++++++++++ > sysdeps/generic/string-fza.h | 106 +++++++++++++++++++++++++++ > sysdeps/generic/string-fzb.h | 49 +++++++++++++ > sysdeps/generic/string-fzi.h | 120 +++++++++++++++++++++++++++++++ > 4 files changed, 312 insertions(+) > create mode 100644 sysdeps/generic/string-extbyte.h > create mode 100644 sysdeps/generic/string-fza.h > create mode 100644 sysdeps/generic/string-fzb.h > create mode 100644 sysdeps/generic/string-fzi.h > > diff --git a/sysdeps/generic/string-extbyte.h b/sysdeps/generic/string-extbyte.h > new file mode 100644 > index 0000000000..c8fecd259f > --- /dev/null > +++ b/sysdeps/generic/string-extbyte.h > @@ -0,0 +1,37 @@ > +/* Extract by from memory word. Generic C version. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#ifndef _STRING_EXTBYTE_H > +#define _STRING_EXTBYTE_H 1 > + > +#include <limits.h> > +#include <endian.h> > +#include <string-optype.h> > + > +/* Extract the byte at index IDX from word X, with index 0 being the > + least significant byte. */ > +static inline unsigned char > +extractbyte (op_t x, unsigned int idx) > +{ > + if (__BYTE_ORDER == __LITTLE_ENDIAN) > + return x >> (idx * CHAR_BIT); > + else > + return x >> (sizeof (x) - 1 - idx) * CHAR_BIT; > +} > + > +#endif /* _STRING_EXTBYTE_H */ > diff --git a/sysdeps/generic/string-fza.h b/sysdeps/generic/string-fza.h > new file mode 100644 > index 0000000000..54be34e5f0 > --- /dev/null > +++ b/sysdeps/generic/string-fza.h > @@ -0,0 +1,106 @@ > +/* Basic zero byte detection. Generic C version. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#ifndef _STRING_FZA_H > +#define _STRING_FZA_H 1 > + > +#include <limits.h> > +#include <string-optype.h> > +#include <string-maskoff.h> > + > +/* This function returns non-zero if any byte in X is zero. > + More specifically, at least one bit set within the least significant > + byte that was zero; other bytes within the word are indeterminate. */ > +static inline op_t > +find_zero_low (op_t x) > +{ > + /* This expression comes from > + https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord > + Subtracting 1 sets 0x80 in a byte that was 0; anding ~x clears > + 0x80 in a byte that was >= 128; anding 0x80 isolates that test bit. */ > + op_t lsb = repeat_bytes (0x01); > + op_t msb = repeat_bytes (0x80); > + return (x - lsb) & ~x & msb; > +} > + > +/* This function returns at least one bit set within every byte of X that > + is zero. The result is exact in that, unlike find_zero_low, all bytes > + are determinate. This is usually used for finding the index of the > + most significant byte that was zero. */ > +static inline op_t > +find_zero_all (op_t x) > +{ > + /* For each byte, find not-zero by > + (0) And 0x7f so that we cannot carry between bytes, > + (1) Add 0x7f so that non-zero carries into 0x80, > + (2) Or in the original byte (which might have had 0x80 set). > + Then invert and mask such that 0x80 is set iff that byte was zero. */ > + op_t m = ((op_t)-1 / 0xff) * 0x7f; > + return ~(((x & m) + m) | x | m); > +} > + > +/* With similar caveats, identify bytes that are equal between X1 and X2. */ > +static inline op_t > +find_eq_low (op_t x1, op_t x2) > +{ > + return find_zero_low (x1 ^ x2); > +} > + > +static inline op_t > +find_eq_all (op_t x1, op_t x2) > +{ > + return find_zero_all (x1 ^ x2); > +} > + > +/* With similar caveats, identify zero bytes in X1 and bytes that are > + equal between in X1 and X2. */ > +static inline op_t > +find_zero_eq_low (op_t x1, op_t x2) > +{ > + return find_zero_low (x1) | find_zero_low (x1 ^ x2); > +} > + > +static inline op_t > +find_zero_eq_all (op_t x1, op_t x2) > +{ > + return find_zero_all (x1) | find_zero_all (x1 ^ x2); > +} > + > +/* With similar caveats, identify zero bytes in X1 and bytes that are > + not equal between in X1 and X2. */ > +static inline op_t > +find_zero_ne_low (op_t x1, op_t x2) > +{ > + op_t m = repeat_bytes (0x7f); > + op_t eq = x1 ^ x2; > + op_t nz1 = (x1 + m) | x1; /* msb set if byte not zero. */ > + op_t ne2 = (eq + m) | eq; /* msb set if byte not equal. */ > + return (ne2 | ~nz1) & ~m; /* msb set if x1 zero or x2 not equal. */ > +} Cant this just be `(~find_zero_eq_low(x1, x2)) + 1` (seems to get better codegen) > + > +static inline op_t > +find_zero_ne_all (op_t x1, op_t x2) > +{ > + op_t m = repeat_bytes (0x7f); > + op_t eq = x1 ^ x2; > + op_t nz1 = ((x1 & m) + m) | x1; > + op_t ne2 = ((eq & m) + m) | eq; > + return (ne2 | ~nz1) & ~m; > +} > + > +#endif /* _STRING_FZA_H */ > diff --git a/sysdeps/generic/string-fzb.h b/sysdeps/generic/string-fzb.h > new file mode 100644 > index 0000000000..f1c0ae0922 > --- /dev/null > +++ b/sysdeps/generic/string-fzb.h > @@ -0,0 +1,49 @@ > +/* Zero byte detection, boolean. Generic C version. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#ifndef _STRING_FZB_H > +#define _STRING_FZB_H 1 > + > +#include <endian.h> > +#include <string-fza.h> > + > +/* Determine if any byte within X is zero. This is a pure boolean test. */ > + > +static inline _Bool > +has_zero (op_t x) > +{ > + return find_zero_low (x) != 0; > +} > + > +/* Likewise, but for byte equality between X1 and X2. */ > + > +static inline _Bool > +has_eq (op_t x1, op_t x2) > +{ > + return find_eq_low (x1, x2) != 0; > +} > + > +/* Likewise, but for zeros in X1 and equal bytes between X1 and X2. */ > + > +static inline _Bool > +has_zero_eq (op_t x1, op_t x2) > +{ > + return find_zero_eq_low (x1, x2); > +} > + > +#endif /* _STRING_FZB_H */ > diff --git a/sysdeps/generic/string-fzi.h b/sysdeps/generic/string-fzi.h > new file mode 100644 > index 0000000000..888e1b8baa > --- /dev/null > +++ b/sysdeps/generic/string-fzi.h > @@ -0,0 +1,120 @@ > +/* Zero byte detection; indexes. Generic C version. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#ifndef _STRING_FZI_H > +#define _STRING_FZI_H 1 > + > +#include <limits.h> > +#include <endian.h> > +#include <string-fza.h> > +#include <gmp.h> > +#include <stdlib/gmp-impl.h> > +#include <stdlib/longlong.h> > + > +/* A subroutine for the index_zero functions. Given a test word C, return > + the (memory order) index of the first byte (in memory order) that is > + non-zero. */ > +static inline unsigned int > +index_first_ (op_t c) > +{ > + int r; > + if (__BYTE_ORDER == __LITTLE_ENDIAN) > + count_trailing_zeros (r, c); > + else > + count_leading_zeros (r, c); > + return r / CHAR_BIT; > +} > + > +/* Similarly, but return the (memory order) index of the last byte that is > + non-zero. */ > +static inline unsigned int > +index_last_ (op_t c) > +{ > + int r; > + if (__BYTE_ORDER == __LITTLE_ENDIAN) > + count_leading_zeros (r, c); > + else > + count_trailing_zeros (r, c); > + return sizeof (op_t) - 1 - (r / CHAR_BIT); > +} > + > +/* Given a word X that is known to contain a zero byte, return the index of > + the first such within the word in memory order. */ > +static inline unsigned int > +index_first_zero (op_t x) > +{ > + if (__BYTE_ORDER == __LITTLE_ENDIAN) > + x = find_zero_low (x); > + else > + x = find_zero_all (x); > + return index_first_ (x); > +} > + > +/* Similarly, but perform the search for byte equality between X1 and X2. */ > +static inline unsigned int > +index_first_eq (op_t x1, op_t x2) > +{ > + if (__BYTE_ORDER == __LITTLE_ENDIAN) > + x1 = find_eq_low (x1, x2); > + else > + x1 = find_eq_all (x1, x2); > + return index_first_ (x1); > +} > + > +/* Similarly, but perform the search for zero within X1 or equality between > + X1 and X2. */ > +static inline unsigned int > +index_first_zero_eq (op_t x1, op_t x2) > +{ > + if (__BYTE_ORDER == __LITTLE_ENDIAN) > + x1 = find_zero_eq_low (x1, x2); > + else > + x1 = find_zero_eq_all (x1, x2); > + return index_first_ (x1); > +} > + > +/* Similarly, but perform the search for zero within X1 or inequality between > + X1 and X2. */ > +static inline unsigned int > +index_first_zero_ne (op_t x1, op_t x2) > +{ > + if (__BYTE_ORDER == __LITTLE_ENDIAN) > + x1 = find_zero_ne_low (x1, x2); > + else > + x1 = find_zero_ne_all (x1, x2); > + return index_first_ (x1); > +} > + > +/* Similarly, but search for the last zero within X. */ > +static inline unsigned int > +index_last_zero (op_t x) > +{ > + if (__BYTE_ORDER == __LITTLE_ENDIAN) > + x = find_zero_all (x); > + else > + x = find_zero_low (x); > + return index_last_ (x); > +} > + > +static inline unsigned int > +index_last_eq (op_t x1, op_t x2) > +{ > + return index_last_zero (x1 ^ x2); > +} > + > +#endif /* STRING_FZI_H */ > -- > 2.34.1 >
On 05/01/23 19:53, Noah Goldstein wrote: > On Mon, Sep 19, 2022 at 1:02 PM Adhemerval Zanella via Libc-alpha > <libc-alpha@sourceware.org> wrote: >> +/* This function returns at least one bit set within every byte of X that >> + is zero. The result is exact in that, unlike find_zero_low, all bytes >> + are determinate. This is usually used for finding the index of the >> + most significant byte that was zero. */ >> +static inline op_t >> +find_zero_all (op_t x) >> +{ >> + /* For each byte, find not-zero by >> + (0) And 0x7f so that we cannot carry between bytes, >> + (1) Add 0x7f so that non-zero carries into 0x80, >> + (2) Or in the original byte (which might have had 0x80 set). >> + Then invert and mask such that 0x80 is set iff that byte was zero. */ >> + op_t m = ((op_t)-1 / 0xff) * 0x7f; > > Use repeat_byte here? Ack.
On 05/01/23 20:04, Noah Goldstein wrote: > On Mon, Sep 19, 2022 at 1:02 PM Adhemerval Zanella via Libc-alpha >> +/* With similar caveats, identify zero bytes in X1 and bytes that are >> + not equal between in X1 and X2. */ >> +static inline op_t >> +find_zero_ne_low (op_t x1, op_t x2) >> +{ >> + op_t m = repeat_bytes (0x7f); >> + op_t eq = x1 ^ x2; >> + op_t nz1 = (x1 + m) | x1; /* msb set if byte not zero. */ >> + op_t ne2 = (eq + m) | eq; /* msb set if byte not equal. */ >> + return (ne2 | ~nz1) & ~m; /* msb set if x1 zero or x2 not equal. */ >> +} > Cant this just be `(~find_zero_eq_low(x1, x2)) + 1` (seems to get > better codegen)? I think we can, I will change it in next revision.
diff --git a/sysdeps/generic/string-extbyte.h b/sysdeps/generic/string-extbyte.h new file mode 100644 index 0000000000..c8fecd259f --- /dev/null +++ b/sysdeps/generic/string-extbyte.h @@ -0,0 +1,37 @@ +/* Extract by from memory word. Generic C version. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _STRING_EXTBYTE_H +#define _STRING_EXTBYTE_H 1 + +#include <limits.h> +#include <endian.h> +#include <string-optype.h> + +/* Extract the byte at index IDX from word X, with index 0 being the + least significant byte. */ +static inline unsigned char +extractbyte (op_t x, unsigned int idx) +{ + if (__BYTE_ORDER == __LITTLE_ENDIAN) + return x >> (idx * CHAR_BIT); + else + return x >> (sizeof (x) - 1 - idx) * CHAR_BIT; +} + +#endif /* _STRING_EXTBYTE_H */ diff --git a/sysdeps/generic/string-fza.h b/sysdeps/generic/string-fza.h new file mode 100644 index 0000000000..54be34e5f0 --- /dev/null +++ b/sysdeps/generic/string-fza.h @@ -0,0 +1,106 @@ +/* Basic zero byte detection. Generic C version. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _STRING_FZA_H +#define _STRING_FZA_H 1 + +#include <limits.h> +#include <string-optype.h> +#include <string-maskoff.h> + +/* This function returns non-zero if any byte in X is zero. + More specifically, at least one bit set within the least significant + byte that was zero; other bytes within the word are indeterminate. */ +static inline op_t +find_zero_low (op_t x) +{ + /* This expression comes from + https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord + Subtracting 1 sets 0x80 in a byte that was 0; anding ~x clears + 0x80 in a byte that was >= 128; anding 0x80 isolates that test bit. */ + op_t lsb = repeat_bytes (0x01); + op_t msb = repeat_bytes (0x80); + return (x - lsb) & ~x & msb; +} + +/* This function returns at least one bit set within every byte of X that + is zero. The result is exact in that, unlike find_zero_low, all bytes + are determinate. This is usually used for finding the index of the + most significant byte that was zero. */ +static inline op_t +find_zero_all (op_t x) +{ + /* For each byte, find not-zero by + (0) And 0x7f so that we cannot carry between bytes, + (1) Add 0x7f so that non-zero carries into 0x80, + (2) Or in the original byte (which might have had 0x80 set). + Then invert and mask such that 0x80 is set iff that byte was zero. */ + op_t m = ((op_t)-1 / 0xff) * 0x7f; + return ~(((x & m) + m) | x | m); +} + +/* With similar caveats, identify bytes that are equal between X1 and X2. */ +static inline op_t +find_eq_low (op_t x1, op_t x2) +{ + return find_zero_low (x1 ^ x2); +} + +static inline op_t +find_eq_all (op_t x1, op_t x2) +{ + return find_zero_all (x1 ^ x2); +} + +/* With similar caveats, identify zero bytes in X1 and bytes that are + equal between in X1 and X2. */ +static inline op_t +find_zero_eq_low (op_t x1, op_t x2) +{ + return find_zero_low (x1) | find_zero_low (x1 ^ x2); +} + +static inline op_t +find_zero_eq_all (op_t x1, op_t x2) +{ + return find_zero_all (x1) | find_zero_all (x1 ^ x2); +} + +/* With similar caveats, identify zero bytes in X1 and bytes that are + not equal between in X1 and X2. */ +static inline op_t +find_zero_ne_low (op_t x1, op_t x2) +{ + op_t m = repeat_bytes (0x7f); + op_t eq = x1 ^ x2; + op_t nz1 = (x1 + m) | x1; /* msb set if byte not zero. */ + op_t ne2 = (eq + m) | eq; /* msb set if byte not equal. */ + return (ne2 | ~nz1) & ~m; /* msb set if x1 zero or x2 not equal. */ +} + +static inline op_t +find_zero_ne_all (op_t x1, op_t x2) +{ + op_t m = repeat_bytes (0x7f); + op_t eq = x1 ^ x2; + op_t nz1 = ((x1 & m) + m) | x1; + op_t ne2 = ((eq & m) + m) | eq; + return (ne2 | ~nz1) & ~m; +} + +#endif /* _STRING_FZA_H */ diff --git a/sysdeps/generic/string-fzb.h b/sysdeps/generic/string-fzb.h new file mode 100644 index 0000000000..f1c0ae0922 --- /dev/null +++ b/sysdeps/generic/string-fzb.h @@ -0,0 +1,49 @@ +/* Zero byte detection, boolean. Generic C version. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _STRING_FZB_H +#define _STRING_FZB_H 1 + +#include <endian.h> +#include <string-fza.h> + +/* Determine if any byte within X is zero. This is a pure boolean test. */ + +static inline _Bool +has_zero (op_t x) +{ + return find_zero_low (x) != 0; +} + +/* Likewise, but for byte equality between X1 and X2. */ + +static inline _Bool +has_eq (op_t x1, op_t x2) +{ + return find_eq_low (x1, x2) != 0; +} + +/* Likewise, but for zeros in X1 and equal bytes between X1 and X2. */ + +static inline _Bool +has_zero_eq (op_t x1, op_t x2) +{ + return find_zero_eq_low (x1, x2); +} + +#endif /* _STRING_FZB_H */ diff --git a/sysdeps/generic/string-fzi.h b/sysdeps/generic/string-fzi.h new file mode 100644 index 0000000000..888e1b8baa --- /dev/null +++ b/sysdeps/generic/string-fzi.h @@ -0,0 +1,120 @@ +/* Zero byte detection; indexes. Generic C version. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _STRING_FZI_H +#define _STRING_FZI_H 1 + +#include <limits.h> +#include <endian.h> +#include <string-fza.h> +#include <gmp.h> +#include <stdlib/gmp-impl.h> +#include <stdlib/longlong.h> + +/* A subroutine for the index_zero functions. Given a test word C, return + the (memory order) index of the first byte (in memory order) that is + non-zero. */ +static inline unsigned int +index_first_ (op_t c) +{ + int r; + if (__BYTE_ORDER == __LITTLE_ENDIAN) + count_trailing_zeros (r, c); + else + count_leading_zeros (r, c); + return r / CHAR_BIT; +} + +/* Similarly, but return the (memory order) index of the last byte that is + non-zero. */ +static inline unsigned int +index_last_ (op_t c) +{ + int r; + if (__BYTE_ORDER == __LITTLE_ENDIAN) + count_leading_zeros (r, c); + else + count_trailing_zeros (r, c); + return sizeof (op_t) - 1 - (r / CHAR_BIT); +} + +/* Given a word X that is known to contain a zero byte, return the index of + the first such within the word in memory order. */ +static inline unsigned int +index_first_zero (op_t x) +{ + if (__BYTE_ORDER == __LITTLE_ENDIAN) + x = find_zero_low (x); + else + x = find_zero_all (x); + return index_first_ (x); +} + +/* Similarly, but perform the search for byte equality between X1 and X2. */ +static inline unsigned int +index_first_eq (op_t x1, op_t x2) +{ + if (__BYTE_ORDER == __LITTLE_ENDIAN) + x1 = find_eq_low (x1, x2); + else + x1 = find_eq_all (x1, x2); + return index_first_ (x1); +} + +/* Similarly, but perform the search for zero within X1 or equality between + X1 and X2. */ +static inline unsigned int +index_first_zero_eq (op_t x1, op_t x2) +{ + if (__BYTE_ORDER == __LITTLE_ENDIAN) + x1 = find_zero_eq_low (x1, x2); + else + x1 = find_zero_eq_all (x1, x2); + return index_first_ (x1); +} + +/* Similarly, but perform the search for zero within X1 or inequality between + X1 and X2. */ +static inline unsigned int +index_first_zero_ne (op_t x1, op_t x2) +{ + if (__BYTE_ORDER == __LITTLE_ENDIAN) + x1 = find_zero_ne_low (x1, x2); + else + x1 = find_zero_ne_all (x1, x2); + return index_first_ (x1); +} + +/* Similarly, but search for the last zero within X. */ +static inline unsigned int +index_last_zero (op_t x) +{ + if (__BYTE_ORDER == __LITTLE_ENDIAN) + x = find_zero_all (x); + else + x = find_zero_low (x); + return index_last_ (x); +} + +static inline unsigned int +index_last_eq (op_t x1, op_t x2) +{ + return index_last_zero (x1 ^ x2); +} + +#endif /* STRING_FZI_H */