Message ID | 20180814154642.09826405971F7@oldenburg.str.redhat.com |
---|---|
State | New |
Headers | show |
Series | fnmatch: Use __mbstowcs_alloc [BZ #23519] | expand |
Sorry, I posted the wrong version of the patch. Try this one. Subject: [PATCH] fnmatch: Use __mbstowcs_alloc [BZ #23519] To: libc-alpha@sourceware.org 2018-08-14 Florian Weimer <fweimer@redhat.com> [BZ #23519] * posix/fnmatch.c [HANDLE_MULTIBYTE] (fnmatch): Use __mbstowcs_alloc. diff --git a/posix/fnmatch.c b/posix/fnmatch.c index a9b762624f..1ce6c88d6a 100644 --- a/posix/fnmatch.c +++ b/posix/fnmatch.c @@ -35,6 +35,7 @@ #endif #ifdef _LIBC +# include <array_length.h> # include <alloca.h> #else # define alloca_account(size., var) alloca (size) @@ -322,122 +323,28 @@ fnmatch (const char *pattern, const char *string, int flags) # if HANDLE_MULTIBYTE if (__builtin_expect (MB_CUR_MAX, 1) != 1) { - mbstate_t ps; - size_t n; - const char *p; - wchar_t *wpattern_malloc = NULL; - wchar_t *wpattern; - wchar_t *wstring_malloc = NULL; - wchar_t *wstring; - size_t alloca_used = 0; + void *to_free1; + wchar_t wpattern_scratch[256]; + wchar_t *wpattern = __mbstowcs_alloc + (pattern, wpattern_scratch, array_length (wpattern_scratch), &to_free1); + if (wpattern == NULL) + return -1; - /* Convert the strings into wide characters. */ - memset (&ps, '\0', sizeof (ps)); - p = pattern; -#ifdef _LIBC - n = __strnlen (pattern, 1024); -#else - n = strlen (pattern); -#endif - if (__glibc_likely (n < 1024)) + void *to_free2; + wchar_t wstring_scratch[256]; + wchar_t *wstring = __mbstowcs_alloc + (string, wstring_scratch, array_length (wstring_scratch), &to_free2); + if (wstring == NULL) { - wpattern = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t), - alloca_used); - n = mbsrtowcs (wpattern, &p, n + 1, &ps); - if (__glibc_unlikely (n == (size_t) -1)) - /* Something wrong. - XXX Do we have to set `errno' to something which mbsrtows hasn't - already done? */ - return -1; - if (p) - { - memset (&ps, '\0', sizeof (ps)); - goto prepare_wpattern; - } + free (to_free1); + return -1; } - else - { - prepare_wpattern: - n = mbsrtowcs (NULL, &pattern, 0, &ps); - if (__glibc_unlikely (n == (size_t) -1)) - /* Something wrong. - XXX Do we have to set `errno' to something which mbsrtows hasn't - already done? */ - return -1; - if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t))) - { - __set_errno (ENOMEM); - return -2; - } - wpattern_malloc = wpattern - = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t)); - assert (mbsinit (&ps)); - if (wpattern == NULL) - return -2; - (void) mbsrtowcs (wpattern, &pattern, n + 1, &ps); - } - - assert (mbsinit (&ps)); -#ifdef _LIBC - n = __strnlen (string, 1024); -#else - n = strlen (string); -#endif - p = string; - if (__glibc_likely (n < 1024)) - { - wstring = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t), - alloca_used); - n = mbsrtowcs (wstring, &p, n + 1, &ps); - if (__glibc_unlikely (n == (size_t) -1)) - { - /* Something wrong. - XXX Do we have to set `errno' to something which - mbsrtows hasn't already done? */ - free_return: - free (wpattern_malloc); - return -1; - } - if (p) - { - memset (&ps, '\0', sizeof (ps)); - goto prepare_wstring; - } - } - else - { - prepare_wstring: - n = mbsrtowcs (NULL, &string, 0, &ps); - if (__glibc_unlikely (n == (size_t) -1)) - /* Something wrong. - XXX Do we have to set `errno' to something which mbsrtows hasn't - already done? */ - goto free_return; - if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t))) - { - free (wpattern_malloc); - __set_errno (ENOMEM); - return -2; - } - - wstring_malloc = wstring - = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t)); - if (wstring == NULL) - { - free (wpattern_malloc); - return -2; - } - assert (mbsinit (&ps)); - (void) mbsrtowcs (wstring, &string, n + 1, &ps); - } - - int res = internal_fnwmatch (wpattern, wstring, wstring + n, - flags & FNM_PERIOD, flags, NULL, - alloca_used); - - free (wstring_malloc); - free (wpattern_malloc); + int res = internal_fnwmatch (wpattern, + wstring, wstring + __wcslen (wstring), + flags & FNM_PERIOD, flags, NULL, 0); + free (to_free2); + free (to_free1); return res; } # endif /* mbstate_t and mbsrtowcs or _LIBC. */
On 14/08/2018 12:53, Florian Weimer wrote:
> Sorry, I posted the wrong version of the patch. Try this one.
I think we should first address which should the desirable solution for
BZ#14185: 1. reject as non-matching or give an error on invalid multibyte
strings or 2. fallback on single-bye matching for these invalid entries.
The second option is already on gnulib, so we can either first sync with
it work to get it sync back (with the __mbstowcs_alloc). It might not be
the best option though, as noted by Rich in comment #4 (it result false
positives).
In any case, I do think a better alternative is in fact avoid make fnmatch
to go temporary wchar_t string for MB_CUR_MAX != 1 (and avoid the mbsrtowcs
call altogether). I started to work on it and it should be doable, the
main time consuming think is to adapt the algorithms to access the
collation tables.
On 08/17/2018 02:36 PM, Adhemerval Zanella wrote: > > > On 14/08/2018 12:53, Florian Weimer wrote: >> Sorry, I posted the wrong version of the patch. Try this one. > > I think we should first address which should the desirable solution for > BZ#14185: 1. reject as non-matching or give an error on invalid multibyte > strings or 2. fallback on single-bye matching for these invalid entries. > The second option is already on gnulib, so we can either first sync with > it work to get it sync back (with the __mbstowcs_alloc). It might not be > the best option though, as noted by Rich in comment #4 (it result false > positives). I don't think my proposed cleanup will interfere with these fixes at all (less code would have to be deleted), so I don't see the reason why to block it. Thanks, Florian
On 17/08/2018 09:48, Florian Weimer wrote: > On 08/17/2018 02:36 PM, Adhemerval Zanella wrote: >> >> >> On 14/08/2018 12:53, Florian Weimer wrote: >>> Sorry, I posted the wrong version of the patch. Try this one. >> >> I think we should first address which should the desirable solution for >> BZ#14185: 1. reject as non-matching or give an error on invalid multibyte >> strings or 2. fallback on single-bye matching for these invalid entries. >> The second option is already on gnulib, so we can either first sync with >> it work to get it sync back (with the __mbstowcs_alloc). It might not be >> the best option though, as noted by Rich in comment #4 (it result false >> positives). > > I don't think my proposed cleanup will interfere with these fixes at all (less code would have to be deleted), so I don't see the reason why to block it. I am trying to avoid make it harder to sync back with gnulib, however if the idea is indeed deviate from it (with a possible different solution for BZ#14185) further cleanups might be possible on fnmatch implementation.
diff --git a/posix/fnmatch.c b/posix/fnmatch.c index a9b762624f..d2a86839c6 100644 --- a/posix/fnmatch.c +++ b/posix/fnmatch.c @@ -325,119 +325,28 @@ fnmatch (const char *pattern, const char *string, int flags) mbstate_t ps; size_t n; const char *p; - wchar_t *wpattern_malloc = NULL; - wchar_t *wpattern; - wchar_t *wstring_malloc = NULL; - wchar_t *wstring; - size_t alloca_used = 0; - /* Convert the strings into wide characters. */ - memset (&ps, '\0', sizeof (ps)); - p = pattern; -#ifdef _LIBC - n = __strnlen (pattern, 1024); -#else - n = strlen (pattern); -#endif - if (__glibc_likely (n < 1024)) - { - wpattern = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t), - alloca_used); - n = mbsrtowcs (wpattern, &p, n + 1, &ps); - if (__glibc_unlikely (n == (size_t) -1)) - /* Something wrong. - XXX Do we have to set `errno' to something which mbsrtows hasn't - already done? */ - return -1; - if (p) - { - memset (&ps, '\0', sizeof (ps)); - goto prepare_wpattern; - } - } - else - { - prepare_wpattern: - n = mbsrtowcs (NULL, &pattern, 0, &ps); - if (__glibc_unlikely (n == (size_t) -1)) - /* Something wrong. - XXX Do we have to set `errno' to something which mbsrtows hasn't - already done? */ - return -1; - if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t))) - { - __set_errno (ENOMEM); - return -2; - } - wpattern_malloc = wpattern - = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t)); - assert (mbsinit (&ps)); - if (wpattern == NULL) - return -2; - (void) mbsrtowcs (wpattern, &pattern, n + 1, &ps); - } + void *to_free1; + wchar_t wpattern_scratch[256]; + wchar_t *wpattern = __mbstowcs_alloc + (pattern, wpattern_scratch, array_length (wpattern_scratch), &to_free1); + if (wpattern == NULL) + return -1; - assert (mbsinit (&ps)); -#ifdef _LIBC - n = __strnlen (string, 1024); -#else - n = strlen (string); -#endif - p = string; - if (__glibc_likely (n < 1024)) + void *to_free2; + wchar_t wstring_scratch[256]; + wchar_t *wstring = __mbstowcs_alloc + (string, wstring_scratch, array_length (wstring_scratch), &to_free2); + if (wstring == NULL) { - wstring = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t), - alloca_used); - n = mbsrtowcs (wstring, &p, n + 1, &ps); - if (__glibc_unlikely (n == (size_t) -1)) - { - /* Something wrong. - XXX Do we have to set `errno' to something which - mbsrtows hasn't already done? */ - free_return: - free (wpattern_malloc); - return -1; - } - if (p) - { - memset (&ps, '\0', sizeof (ps)); - goto prepare_wstring; - } + free (to_free1); + return -1; } - else - { - prepare_wstring: - n = mbsrtowcs (NULL, &string, 0, &ps); - if (__glibc_unlikely (n == (size_t) -1)) - /* Something wrong. - XXX Do we have to set `errno' to something which mbsrtows hasn't - already done? */ - goto free_return; - if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t))) - { - free (wpattern_malloc); - __set_errno (ENOMEM); - return -2; - } - - wstring_malloc = wstring - = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t)); - if (wstring == NULL) - { - free (wpattern_malloc); - return -2; - } - assert (mbsinit (&ps)); - (void) mbsrtowcs (wstring, &string, n + 1, &ps); - } - - int res = internal_fnwmatch (wpattern, wstring, wstring + n, - flags & FNM_PERIOD, flags, NULL, - alloca_used); - - free (wstring_malloc); - free (wpattern_malloc); + int res = internal_fnwmatch (wpattern, wstring, wstring + wcslen (wstring), + flags & FNM_PERIOD, flags, NULL, 0); + free (to_free2); + free (to_free1); return res; } # endif /* mbstate_t and mbsrtowcs or _LIBC. */