Message ID | 20231128140137.81955-1-tirtajames45@gmail.com |
---|---|
State | New |
Headers | show |
Series | strcasestr: try to find non-alpha char in NEEDLE | expand |
On 11/28/23 09:01, James Tirta Halim wrote: > --- James, May you please clarify your copyright assignment status? Please review "2.1. Copyright FSF or disclaimer" in the contribution checklist: https://sourceware.org/glibc/wiki/Contribution%20checklist#Copyright_FSF_or_disclaimer > string/strcasestr.c | 37 ++++++++++++++++++++++++++++++------- > 1 file changed, 30 insertions(+), 7 deletions(-) > > diff --git a/string/strcasestr.c b/string/strcasestr.c > index 2f6b4f8641..65eae2f047 100644 > --- a/string/strcasestr.c > +++ b/string/strcasestr.c > @@ -54,7 +54,6 @@ > #define STRCASESTR __strcasestr > #endif > > - > /* Find the first occurrence of NEEDLE in HAYSTACK, using > case-insensitive comparison. This function gives unspecified > results in multibyte locales. */ > @@ -63,18 +62,42 @@ STRCASESTR (const char *haystack, const char *needle) > { > size_t needle_len; /* Length of NEEDLE. */ > size_t haystack_len; /* Known minimum length of HAYSTACK. */ > + const char *h, *n; > > /* Handle empty NEEDLE special case. */ > if (needle[0] == '\0') > return (char *) haystack; > > - /* Ensure HAYSTACK length is at least as long as NEEDLE length. > - Since a match may occur early on in a huge HAYSTACK, use strnlen > - and read ahead a few cachelines for improved performance. */ > - needle_len = strlen (needle); > - haystack_len = __strnlen (haystack, needle_len + 256); > - if (haystack_len < needle_len) > + /* Try to find a non-alphanumeric character in NEEDLE to pass to > + strchr() while checking if HAYSTACK is as long as NEEDLE. */ > + for (h = haystack, n = needle; *h && isalpha (*n); ++h, ++n); > + if (__glibc_unlikely (*h == '\0')) > return NULL; > + if (*n) { > + size_t shift; > + shift = n - needle; > + haystack = strchr (h + shift, *n); > + if (__glibc_unlikely (haystack == NULL)) > + return NULL; > + haystack -= shift; > + /* Check if we have an early match. */ > + for (h = haystack, n = needle; TOLOWER (*h) == TOLOWER (*n) && *h; ++h, ++n); > + if (*n == '\0') > + return (char *)haystack; > + if (__glibc_unlikely (*h == '\0')) > + return NULL; > + if ((size_t) (n - needle) > shift) > + shift = n - needle; > + /* Since a match may occur early on in a huge HAYSTACK, use strnlen > + and read ahead a few cachelines for improved performance. */ > + needle_len = shift + strlen (needle + shift); > + haystack_len = shift + __strnlen (h + shift, 256); > + if (__glibc_unlikely (haystack_len < needle_len)) > + return NULL; > + } else { > + needle_len = n - needle; > + haystack_len = needle_len + __strnlen (haystack + needle_len, 256); > + } > > /* Perform the search. Abstract memory is considered to be an array > of 'unsigned char' values, not an array of 'char' values. See
diff --git a/string/strcasestr.c b/string/strcasestr.c index 2f6b4f8641..65eae2f047 100644 --- a/string/strcasestr.c +++ b/string/strcasestr.c @@ -54,7 +54,6 @@ #define STRCASESTR __strcasestr #endif - /* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive comparison. This function gives unspecified results in multibyte locales. */ @@ -63,18 +62,42 @@ STRCASESTR (const char *haystack, const char *needle) { size_t needle_len; /* Length of NEEDLE. */ size_t haystack_len; /* Known minimum length of HAYSTACK. */ + const char *h, *n; /* Handle empty NEEDLE special case. */ if (needle[0] == '\0') return (char *) haystack; - /* Ensure HAYSTACK length is at least as long as NEEDLE length. - Since a match may occur early on in a huge HAYSTACK, use strnlen - and read ahead a few cachelines for improved performance. */ - needle_len = strlen (needle); - haystack_len = __strnlen (haystack, needle_len + 256); - if (haystack_len < needle_len) + /* Try to find a non-alphanumeric character in NEEDLE to pass to + strchr() while checking if HAYSTACK is as long as NEEDLE. */ + for (h = haystack, n = needle; *h && isalpha (*n); ++h, ++n); + if (__glibc_unlikely (*h == '\0')) return NULL; + if (*n) { + size_t shift; + shift = n - needle; + haystack = strchr (h + shift, *n); + if (__glibc_unlikely (haystack == NULL)) + return NULL; + haystack -= shift; + /* Check if we have an early match. */ + for (h = haystack, n = needle; TOLOWER (*h) == TOLOWER (*n) && *h; ++h, ++n); + if (*n == '\0') + return (char *)haystack; + if (__glibc_unlikely (*h == '\0')) + return NULL; + if ((size_t) (n - needle) > shift) + shift = n - needle; + /* Since a match may occur early on in a huge HAYSTACK, use strnlen + and read ahead a few cachelines for improved performance. */ + needle_len = shift + strlen (needle + shift); + haystack_len = shift + __strnlen (h + shift, 256); + if (__glibc_unlikely (haystack_len < needle_len)) + return NULL; + } else { + needle_len = n - needle; + haystack_len = needle_len + __strnlen (haystack + needle_len, 256); + } /* Perform the search. Abstract memory is considered to be an array of 'unsigned char' values, not an array of 'char' values. See