diff mbox series

fnmatch: Use __mbstowcs_alloc [BZ #23519]

Message ID 20180814154642.09826405971F7@oldenburg.str.redhat.com
State New
Headers show
Series fnmatch: Use __mbstowcs_alloc [BZ #23519] | expand

Commit Message

Florian Weimer Aug. 14, 2018, 3:46 p.m. UTC
2018-08-14  Florian Weimer  <fweimer@redhat.com>

	[BZ #23519]
	* posix/fnmatch.c [HANDLE_MULTIBYTE] (fnmatch): Use
	__mbstowcs_alloc.

Comments

Florian Weimer Aug. 14, 2018, 3:53 p.m. UTC | #1
Sorry, I posted the wrong version of the patch.  Try this one.
Subject: [PATCH] fnmatch: Use __mbstowcs_alloc [BZ #23519]
To: libc-alpha@sourceware.org

2018-08-14  Florian Weimer  <fweimer@redhat.com>

	[BZ #23519]
	* posix/fnmatch.c [HANDLE_MULTIBYTE] (fnmatch): Use
	__mbstowcs_alloc.

diff --git a/posix/fnmatch.c b/posix/fnmatch.c
index a9b762624f..1ce6c88d6a 100644
--- a/posix/fnmatch.c
+++ b/posix/fnmatch.c
@@ -35,6 +35,7 @@
 #endif
 
 #ifdef _LIBC
+# include <array_length.h>
 # include <alloca.h>
 #else
 # define alloca_account(size., var) alloca (size)
@@ -322,122 +323,28 @@ fnmatch (const char *pattern, const char *string, int flags)
 # if HANDLE_MULTIBYTE
   if (__builtin_expect (MB_CUR_MAX, 1) != 1)
     {
-      mbstate_t ps;
-      size_t n;
-      const char *p;
-      wchar_t *wpattern_malloc = NULL;
-      wchar_t *wpattern;
-      wchar_t *wstring_malloc = NULL;
-      wchar_t *wstring;
-      size_t alloca_used = 0;
+      void *to_free1;
+      wchar_t wpattern_scratch[256];
+      wchar_t *wpattern = __mbstowcs_alloc
+	(pattern, wpattern_scratch, array_length (wpattern_scratch), &to_free1);
+      if (wpattern == NULL)
+	return -1;
 
-      /* Convert the strings into wide characters.  */
-      memset (&ps, '\0', sizeof (ps));
-      p = pattern;
-#ifdef _LIBC
-      n = __strnlen (pattern, 1024);
-#else
-      n = strlen (pattern);
-#endif
-      if (__glibc_likely (n < 1024))
+      void *to_free2;
+      wchar_t wstring_scratch[256];
+      wchar_t *wstring = __mbstowcs_alloc
+	(string, wstring_scratch, array_length (wstring_scratch), &to_free2);
+      if (wstring == NULL)
 	{
-	  wpattern = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t),
-						 alloca_used);
-	  n = mbsrtowcs (wpattern, &p, n + 1, &ps);
-	  if (__glibc_unlikely (n == (size_t) -1))
-	    /* Something wrong.
-	       XXX Do we have to set `errno' to something which mbsrtows hasn't
-	       already done?  */
-	    return -1;
-	  if (p)
-	    {
-	      memset (&ps, '\0', sizeof (ps));
-	      goto prepare_wpattern;
-	    }
+	  free (to_free1);
+	  return -1;
 	}
-      else
-	{
-	prepare_wpattern:
-	  n = mbsrtowcs (NULL, &pattern, 0, &ps);
-	  if (__glibc_unlikely (n == (size_t) -1))
-	    /* Something wrong.
-	       XXX Do we have to set `errno' to something which mbsrtows hasn't
-	       already done?  */
-	    return -1;
-	  if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t)))
-	    {
-	      __set_errno (ENOMEM);
-	      return -2;
-	    }
-	  wpattern_malloc = wpattern
-	    = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t));
-	  assert (mbsinit (&ps));
-	  if (wpattern == NULL)
-	    return -2;
-	  (void) mbsrtowcs (wpattern, &pattern, n + 1, &ps);
-	}
-
-      assert (mbsinit (&ps));
-#ifdef _LIBC
-      n = __strnlen (string, 1024);
-#else
-      n = strlen (string);
-#endif
-      p = string;
-      if (__glibc_likely (n < 1024))
-	{
-	  wstring = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t),
-						alloca_used);
-	  n = mbsrtowcs (wstring, &p, n + 1, &ps);
-	  if (__glibc_unlikely (n == (size_t) -1))
-	    {
-	      /* Something wrong.
-		 XXX Do we have to set `errno' to something which
-		 mbsrtows hasn't already done?  */
-	    free_return:
-	      free (wpattern_malloc);
-	      return -1;
-	    }
-	  if (p)
-	    {
-	      memset (&ps, '\0', sizeof (ps));
-	      goto prepare_wstring;
-	    }
-	}
-      else
-	{
-	prepare_wstring:
-	  n = mbsrtowcs (NULL, &string, 0, &ps);
-	  if (__glibc_unlikely (n == (size_t) -1))
-	    /* Something wrong.
-	       XXX Do we have to set `errno' to something which mbsrtows hasn't
-	       already done?  */
-	    goto free_return;
-	  if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t)))
-	    {
-	      free (wpattern_malloc);
-	      __set_errno (ENOMEM);
-	      return -2;
-	    }
-
-	  wstring_malloc = wstring
-	    = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t));
-	  if (wstring == NULL)
-	    {
-	      free (wpattern_malloc);
-	      return -2;
-	    }
-	  assert (mbsinit (&ps));
-	  (void) mbsrtowcs (wstring, &string, n + 1, &ps);
-	}
-
-      int res = internal_fnwmatch (wpattern, wstring, wstring + n,
-				   flags & FNM_PERIOD, flags, NULL,
-				   alloca_used);
-
-      free (wstring_malloc);
-      free (wpattern_malloc);
 
+      int res = internal_fnwmatch (wpattern,
+				   wstring, wstring + __wcslen (wstring),
+				   flags & FNM_PERIOD, flags, NULL, 0);
+      free (to_free2);
+      free (to_free1);
       return res;
     }
 # endif  /* mbstate_t and mbsrtowcs or _LIBC.  */
Adhemerval Zanella Aug. 17, 2018, 12:36 p.m. UTC | #2
On 14/08/2018 12:53, Florian Weimer wrote:
> Sorry, I posted the wrong version of the patch.  Try this one.

I think we should first address which should the desirable solution for
BZ#14185: 1. reject as non-matching or give an error on invalid multibyte
strings or 2. fallback on single-bye matching for these invalid entries.
The second option is already on gnulib, so we can either first sync with 
it work to get it sync back (with the __mbstowcs_alloc). It might not be
the best option though, as noted by Rich in comment #4 (it result false
positives).

In any case, I do think a better alternative is in fact avoid make fnmatch
to go temporary wchar_t string for MB_CUR_MAX != 1 (and avoid the mbsrtowcs
call altogether). I started to work on it and it should be doable, the
main time consuming think is to adapt the algorithms to access the
collation tables.
Florian Weimer Aug. 17, 2018, 12:48 p.m. UTC | #3
On 08/17/2018 02:36 PM, Adhemerval Zanella wrote:
> 
> 
> On 14/08/2018 12:53, Florian Weimer wrote:
>> Sorry, I posted the wrong version of the patch.  Try this one.
> 
> I think we should first address which should the desirable solution for
> BZ#14185: 1. reject as non-matching or give an error on invalid multibyte
> strings or 2. fallback on single-bye matching for these invalid entries.
> The second option is already on gnulib, so we can either first sync with
> it work to get it sync back (with the __mbstowcs_alloc). It might not be
> the best option though, as noted by Rich in comment #4 (it result false
> positives).

I don't think my proposed cleanup will interfere with these fixes at all 
(less code would have to be deleted), so I don't see the reason why to 
block it.

Thanks,
Florian
Adhemerval Zanella Aug. 17, 2018, 12:52 p.m. UTC | #4
On 17/08/2018 09:48, Florian Weimer wrote:
> On 08/17/2018 02:36 PM, Adhemerval Zanella wrote:
>>
>>
>> On 14/08/2018 12:53, Florian Weimer wrote:
>>> Sorry, I posted the wrong version of the patch.  Try this one.
>>
>> I think we should first address which should the desirable solution for
>> BZ#14185: 1. reject as non-matching or give an error on invalid multibyte
>> strings or 2. fallback on single-bye matching for these invalid entries.
>> The second option is already on gnulib, so we can either first sync with
>> it work to get it sync back (with the __mbstowcs_alloc). It might not be
>> the best option though, as noted by Rich in comment #4 (it result false
>> positives).
> 
> I don't think my proposed cleanup will interfere with these fixes at all (less code would have to be deleted), so I don't see the reason why to block it.

I am trying to avoid make it harder to sync back with gnulib, however if
the idea is indeed deviate from it (with a possible different solution
for BZ#14185) further cleanups might be possible on fnmatch implementation.
diff mbox series

Patch

diff --git a/posix/fnmatch.c b/posix/fnmatch.c
index a9b762624f..d2a86839c6 100644
--- a/posix/fnmatch.c
+++ b/posix/fnmatch.c
@@ -325,119 +325,28 @@  fnmatch (const char *pattern, const char *string, int flags)
       mbstate_t ps;
       size_t n;
       const char *p;
-      wchar_t *wpattern_malloc = NULL;
-      wchar_t *wpattern;
-      wchar_t *wstring_malloc = NULL;
-      wchar_t *wstring;
-      size_t alloca_used = 0;
 
-      /* Convert the strings into wide characters.  */
-      memset (&ps, '\0', sizeof (ps));
-      p = pattern;
-#ifdef _LIBC
-      n = __strnlen (pattern, 1024);
-#else
-      n = strlen (pattern);
-#endif
-      if (__glibc_likely (n < 1024))
-	{
-	  wpattern = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t),
-						 alloca_used);
-	  n = mbsrtowcs (wpattern, &p, n + 1, &ps);
-	  if (__glibc_unlikely (n == (size_t) -1))
-	    /* Something wrong.
-	       XXX Do we have to set `errno' to something which mbsrtows hasn't
-	       already done?  */
-	    return -1;
-	  if (p)
-	    {
-	      memset (&ps, '\0', sizeof (ps));
-	      goto prepare_wpattern;
-	    }
-	}
-      else
-	{
-	prepare_wpattern:
-	  n = mbsrtowcs (NULL, &pattern, 0, &ps);
-	  if (__glibc_unlikely (n == (size_t) -1))
-	    /* Something wrong.
-	       XXX Do we have to set `errno' to something which mbsrtows hasn't
-	       already done?  */
-	    return -1;
-	  if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t)))
-	    {
-	      __set_errno (ENOMEM);
-	      return -2;
-	    }
-	  wpattern_malloc = wpattern
-	    = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t));
-	  assert (mbsinit (&ps));
-	  if (wpattern == NULL)
-	    return -2;
-	  (void) mbsrtowcs (wpattern, &pattern, n + 1, &ps);
-	}
+      void *to_free1;
+      wchar_t wpattern_scratch[256];
+      wchar_t *wpattern = __mbstowcs_alloc
+	(pattern, wpattern_scratch, array_length (wpattern_scratch), &to_free1);
+      if (wpattern == NULL)
+	return -1;
 
-      assert (mbsinit (&ps));
-#ifdef _LIBC
-      n = __strnlen (string, 1024);
-#else
-      n = strlen (string);
-#endif
-      p = string;
-      if (__glibc_likely (n < 1024))
+      void *to_free2;
+      wchar_t wstring_scratch[256];
+      wchar_t *wstring = __mbstowcs_alloc
+	(string, wstring_scratch, array_length (wstring_scratch), &to_free2);
+      if (wstring == NULL)
 	{
-	  wstring = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t),
-						alloca_used);
-	  n = mbsrtowcs (wstring, &p, n + 1, &ps);
-	  if (__glibc_unlikely (n == (size_t) -1))
-	    {
-	      /* Something wrong.
-		 XXX Do we have to set `errno' to something which
-		 mbsrtows hasn't already done?  */
-	    free_return:
-	      free (wpattern_malloc);
-	      return -1;
-	    }
-	  if (p)
-	    {
-	      memset (&ps, '\0', sizeof (ps));
-	      goto prepare_wstring;
-	    }
+	  free (to_free1);
+	  return -1;
 	}
-      else
-	{
-	prepare_wstring:
-	  n = mbsrtowcs (NULL, &string, 0, &ps);
-	  if (__glibc_unlikely (n == (size_t) -1))
-	    /* Something wrong.
-	       XXX Do we have to set `errno' to something which mbsrtows hasn't
-	       already done?  */
-	    goto free_return;
-	  if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t)))
-	    {
-	      free (wpattern_malloc);
-	      __set_errno (ENOMEM);
-	      return -2;
-	    }
-
-	  wstring_malloc = wstring
-	    = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t));
-	  if (wstring == NULL)
-	    {
-	      free (wpattern_malloc);
-	      return -2;
-	    }
-	  assert (mbsinit (&ps));
-	  (void) mbsrtowcs (wstring, &string, n + 1, &ps);
-	}
-
-      int res = internal_fnwmatch (wpattern, wstring, wstring + n,
-				   flags & FNM_PERIOD, flags, NULL,
-				   alloca_used);
-
-      free (wstring_malloc);
-      free (wpattern_malloc);
 
+      int res = internal_fnwmatch (wpattern, wstring, wstring + wcslen (wstring),
+				   flags & FNM_PERIOD, flags, NULL, 0);
+      free (to_free2);
+      free (to_free1);
       return res;
     }
 # endif  /* mbstate_t and mbsrtowcs or _LIBC.  */