diff mbox series

ubsan: d-demangle.c:214 signed integer overflow

Message ID 20200903130116.GQ15695@bubble.grove.modra.org
State New
Headers show
Series ubsan: d-demangle.c:214 signed integer overflow | expand

Commit Message

Alan Modra Sept. 3, 2020, 1:01 p.m. UTC
Running the libiberty testsuite
./test-demangle < libiberty/testsuite/d-demangle-expected
libiberty/d-demangle.c:214:14: runtime error: signed integer overflow: 922337203 * 10 cannot be represented in type 'long int'

On looking at silencing ubsan, I found a real bug in dlang_number.
For a 32-bit long, some overflows won't be detected.  For example,
21474836480.  Why?  Well 214748364 * 10 is 0x7FFFFFF8 (no overflow so
far).  Adding 8 gives 0x80000000 (which does overflow but there is no
test for that overflow in the code).  Then multiplying 0x80000000 * 10
= 0x500000000 = 0 won't be caught by the multiplication overflow test.
The same holds for a 64-bit long using similarly crafted digit
sequences.

This patch replaces the mod 10 test with a simpler limit test, and
similarly the mod 26 test in dlang_decode_backref.

About the limit test:
  val * 10 + digit > ULONG_MAX is the condition for overflow
ie.
  val * 10 > ULONG_MAX - digit
or
  val > (ULONG_MAX - digit) / 10
or assuming the largest digit
  val > (ULONG_MAX - 9) / 10

I resisted the aesthetic appeal of simplifying this further to
  val > -10UL / 10
since -1UL for ULONG_MAX is only correct for 2's complement numbers.

Passes all the libiberty tests, on both 32-bit and 64-bit hosts.  OK
to apply?

	* d-demangle.c: Include limits.h.
	(ULONG_MAX): Provide fall-back definition.
	(dlang_number): Simplify and correct overflow test.  Only
	write *ret on returning non-NULL.
	(dlang_decode_backref): Likewise.

Comments

Iain Buclaw Sept. 3, 2020, 9:02 p.m. UTC | #1
Excerpts from Alan Modra's message of September 3, 2020 3:01 pm:
> Running the libiberty testsuite
> ./test-demangle < libiberty/testsuite/d-demangle-expected
> libiberty/d-demangle.c:214:14: runtime error: signed integer overflow: 922337203 * 10 cannot be represented in type 'long int'
> 
> On looking at silencing ubsan, I found a real bug in dlang_number.
> For a 32-bit long, some overflows won't be detected.  For example,
> 21474836480.  Why?  Well 214748364 * 10 is 0x7FFFFFF8 (no overflow so
> far).  Adding 8 gives 0x80000000 (which does overflow but there is no
> test for that overflow in the code).  Then multiplying 0x80000000 * 10
> = 0x500000000 = 0 won't be caught by the multiplication overflow test.
> The same holds for a 64-bit long using similarly crafted digit
> sequences.
> 
> This patch replaces the mod 10 test with a simpler limit test, and
> similarly the mod 26 test in dlang_decode_backref.
> 
> About the limit test:
>   val * 10 + digit > ULONG_MAX is the condition for overflow
> ie.
>   val * 10 > ULONG_MAX - digit
> or
>   val > (ULONG_MAX - digit) / 10
> or assuming the largest digit
>   val > (ULONG_MAX - 9) / 10
> 
> I resisted the aesthetic appeal of simplifying this further to
>   val > -10UL / 10
> since -1UL for ULONG_MAX is only correct for 2's complement numbers.
> 
> Passes all the libiberty tests, on both 32-bit and 64-bit hosts.  OK
> to apply?
> 

Thanks Alan, change seems reasonable, however on giving it a mull over,
I see that the largest number that dlang_number would need to be able to
handle is UINT_MAX.  These two tests which decode a wchar value are
representative of that (first is valid, second invalid).

#
--format=dlang
_D4test21__T3funVwi4294967295Z3funFNaNbNiNfZv
test.fun!('\Uffffffff').fun()
#
--format=dlang
_D4test21__T3funVwi4294967296Z3funFNaNbNiNfZv
_D4test21__T3funVwi4294967296Z3funFNaNbNiNfZv

I'm fine with creating a new PR and dealing with the above in a separate
change though, as it will require a few more replacements to adjust the
result parameter type to 'unsigned' or 'long long'.  

Iain.


> 	* d-demangle.c: Include limits.h.
> 	(ULONG_MAX): Provide fall-back definition.
> 	(dlang_number): Simplify and correct overflow test.  Only
> 	write *ret on returning non-NULL.
> 	(dlang_decode_backref): Likewise.
> 
> diff --git a/libiberty/d-demangle.c b/libiberty/d-demangle.c
> index f2d6946eca..59e6ae007a 100644
> --- a/libiberty/d-demangle.c
> +++ b/libiberty/d-demangle.c
> @@ -31,6 +31,9 @@ If not, see <http://www.gnu.org/licenses/>.  */
>  #ifdef HAVE_CONFIG_H
>  #include "config.h"
>  #endif
> +#ifdef HAVE_LIMITS_H
> +#include <limits.h>
> +#endif
>  
>  #include "safe-ctype.h"
>  
> @@ -45,6 +48,10 @@ If not, see <http://www.gnu.org/licenses/>.  */
>  #include <demangle.h>
>  #include "libiberty.h"
>  
> +#ifndef ULONG_MAX
> +#define	ULONG_MAX	(~0UL)
> +#endif
> +
>  /* A mini string-handling package */
>  
>  typedef struct string		/* Beware: these aren't required to be */
> @@ -207,24 +214,24 @@ dlang_number (const char *mangled, long *ret)
>    if (mangled == NULL || !ISDIGIT (*mangled))
>      return NULL;
>  
> -  (*ret) = 0;
> +  unsigned long val = 0;
>  
>    while (ISDIGIT (*mangled))
>      {
> -      (*ret) *= 10;
> -
> -      /* If an overflow occured when multiplying by ten, the result
> -	 will not be a multiple of ten.  */
> -      if ((*ret % 10) != 0)
> +      /* Check for overflow.  Yes, we return NULL here for some digits
> +	 that don't overflow "val * 10 + digit", but that doesn't
> +	 matter given the later "(long) val < 0" test.  */
> +      if (val > (ULONG_MAX - 9) / 10)
>  	return NULL;
>  
> -      (*ret) += mangled[0] - '0';
> +      val = val * 10 + mangled[0] - '0';
>        mangled++;
>      }
>  
> -  if (*mangled == '\0' || *ret < 0)
> +  if (*mangled == '\0' || (long) val < 0)
>      return NULL;
>  
> +  *ret = val;
>    return mangled;
>  }
>  
> @@ -294,24 +301,24 @@ dlang_decode_backref (const char *mangled, long *ret)
>  	    [A-Z] NumberBackRef
>  	    ^
>     */
> -  (*ret) = 0;
> +  unsigned long val = 0;
>  
>    while (ISALPHA (*mangled))
>      {
> -      (*ret) *= 26;
> +      /* Check for overflow.  */
> +      if (val > (ULONG_MAX - 25) / 26)
> +	break;
>  
> -      /* If an overflow occured when multiplying by 26, the result
> -	 will not be a multiple of 26.  */
> -      if ((*ret % 26) != 0)
> -	return NULL;
> +      val *= 26;
>  
>        if (mangled[0] >= 'a' && mangled[0] <= 'z')
>  	{
> -	  (*ret) += mangled[0] - 'a';
> +	  val += mangled[0] - 'a';
> +	  *ret = val;
>  	  return mangled + 1;
>  	}
>  
> -      (*ret) += mangled[0] - 'A';
> +      val += mangled[0] - 'A';
>        mangled++;
>      }
>  
> -- 
> Alan Modra
> Australia Development Lab, IBM
>
Alan Modra Sept. 4, 2020, 12:59 a.m. UTC | #2
On Thu, Sep 03, 2020 at 11:02:50PM +0200, Iain Buclaw wrote:
> Excerpts from Alan Modra's message of September 3, 2020 3:01 pm:
> > Running the libiberty testsuite
> > ./test-demangle < libiberty/testsuite/d-demangle-expected
> > libiberty/d-demangle.c:214:14: runtime error: signed integer overflow: 922337203 * 10 cannot be represented in type 'long int'
> > 
> > On looking at silencing ubsan, I found a real bug in dlang_number.
> > For a 32-bit long, some overflows won't be detected.  For example,
> > 21474836480.  Why?  Well 214748364 * 10 is 0x7FFFFFF8 (no overflow so
> > far).  Adding 8 gives 0x80000000 (which does overflow but there is no
> > test for that overflow in the code).  Then multiplying 0x80000000 * 10
> > = 0x500000000 = 0 won't be caught by the multiplication overflow test.
> > The same holds for a 64-bit long using similarly crafted digit
> > sequences.
> > 
> > This patch replaces the mod 10 test with a simpler limit test, and
> > similarly the mod 26 test in dlang_decode_backref.
> > 
> > About the limit test:
> >   val * 10 + digit > ULONG_MAX is the condition for overflow
> > ie.
> >   val * 10 > ULONG_MAX - digit
> > or
> >   val > (ULONG_MAX - digit) / 10
> > or assuming the largest digit
> >   val > (ULONG_MAX - 9) / 10
> > 
> > I resisted the aesthetic appeal of simplifying this further to
> >   val > -10UL / 10
> > since -1UL for ULONG_MAX is only correct for 2's complement numbers.
> > 
> > Passes all the libiberty tests, on both 32-bit and 64-bit hosts.  OK
> > to apply?
> > 
> 
> Thanks Alan, change seems reasonable, however on giving it a mull over,
> I see that the largest number that dlang_number would need to be able to
> handle is UINT_MAX.  These two tests which decode a wchar value are
> representative of that (first is valid, second invalid).
> 
> #
> --format=dlang
> _D4test21__T3funVwi4294967295Z3funFNaNbNiNfZv
> test.fun!('\Uffffffff').fun()
> #
> --format=dlang
> _D4test21__T3funVwi4294967296Z3funFNaNbNiNfZv
> _D4test21__T3funVwi4294967296Z3funFNaNbNiNfZv

Hmm, OK, on a 32-bit host those both won't be handled due to the
"(long) val < 0" test.

Do you really want the last one to fail on a 64-bit host, ie. not
produce "test.fun!('\U100000000').fun()"?  I'm guessing you do, but
I'd like that confirmed before posting a followup patch.

> I'm fine with creating a new PR and dealing with the above in a separate
> change though, as it will require a few more replacements to adjust the
> result parameter type to 'unsigned' or 'long long'.  

Ha!  When I wrote the original patch I changed most of the "long"
parameters and variables to "unsigned long", because it was clear from
the code (and my understanding of name mangling) that the values were
in fact always unsigned.  I also changed "int" to "size_t" where
appropriate, not that you need to handle strings larger than 2G in
length but simply that size_t is the correct type to use with string
functions, malloc, memcpy and so on.  I decided to remove all those
changes before posting as they were just tidies, I thought.
Iain Buclaw Sept. 4, 2020, 11:22 a.m. UTC | #3
Excerpts from Alan Modra's message of September 4, 2020 2:59 am:
> On Thu, Sep 03, 2020 at 11:02:50PM +0200, Iain Buclaw wrote:
>> Excerpts from Alan Modra's message of September 3, 2020 3:01 pm:
>> > Running the libiberty testsuite
>> > ./test-demangle < libiberty/testsuite/d-demangle-expected
>> > libiberty/d-demangle.c:214:14: runtime error: signed integer overflow: 922337203 * 10 cannot be represented in type 'long int'
>> > 
>> > On looking at silencing ubsan, I found a real bug in dlang_number.
>> > For a 32-bit long, some overflows won't be detected.  For example,
>> > 21474836480.  Why?  Well 214748364 * 10 is 0x7FFFFFF8 (no overflow so
>> > far).  Adding 8 gives 0x80000000 (which does overflow but there is no
>> > test for that overflow in the code).  Then multiplying 0x80000000 * 10
>> > = 0x500000000 = 0 won't be caught by the multiplication overflow test.
>> > The same holds for a 64-bit long using similarly crafted digit
>> > sequences.
>> > 
>> > This patch replaces the mod 10 test with a simpler limit test, and
>> > similarly the mod 26 test in dlang_decode_backref.
>> > 
>> > About the limit test:
>> >   val * 10 + digit > ULONG_MAX is the condition for overflow
>> > ie.
>> >   val * 10 > ULONG_MAX - digit
>> > or
>> >   val > (ULONG_MAX - digit) / 10
>> > or assuming the largest digit
>> >   val > (ULONG_MAX - 9) / 10
>> > 
>> > I resisted the aesthetic appeal of simplifying this further to
>> >   val > -10UL / 10
>> > since -1UL for ULONG_MAX is only correct for 2's complement numbers.
>> > 
>> > Passes all the libiberty tests, on both 32-bit and 64-bit hosts.  OK
>> > to apply?
>> > 
>> 
>> Thanks Alan, change seems reasonable, however on giving it a mull over,
>> I see that the largest number that dlang_number would need to be able to
>> handle is UINT_MAX.  These two tests which decode a wchar value are
>> representative of that (first is valid, second invalid).
>> 
>> #
>> --format=dlang
>> _D4test21__T3funVwi4294967295Z3funFNaNbNiNfZv
>> test.fun!('\Uffffffff').fun()
>> #
>> --format=dlang
>> _D4test21__T3funVwi4294967296Z3funFNaNbNiNfZv
>> _D4test21__T3funVwi4294967296Z3funFNaNbNiNfZv
> 
> Hmm, OK, on a 32-bit host those both won't be handled due to the
> "(long) val < 0" test.
> 
> Do you really want the last one to fail on a 64-bit host, ie. not
> produce "test.fun!('\U100000000').fun()"?  I'm guessing you do, but
> I'd like that confirmed before posting a followup patch.
> 

Actually I meant dchar (char32_t in C++11), and yes, it should fail on a
64-bit host, as there is no such thing as a 64-bit character type.
Though arguably, that could be handled by the caller in
dlang_parse_integer.

Looking at the places where dlang_number is used, I see:

- Get length of next symbol (unlikely to be bigger than UCHAR_MAX).
- Get length of embedded template symbol (can get very long, but rarely
  to the point where USHRT_MAX is exceeded).
- Get number of elements in an encoded tuple, string, or array literal
  (anything bigger than INT_MAX not allowed at compile-time).
- Get an encoded boolean value (0 or 1).
- Get an encoded character value (max valid value is 0x0010FFFF, but as
  dchar's an unsigned 32-bit type, might as well allow up to UINT_MAX).

So I think UINT_MAX is a reasonable artificial limit.

>> I'm fine with creating a new PR and dealing with the above in a separate
>> change though, as it will require a few more replacements to adjust the
>> result parameter type to 'unsigned' or 'long long'.  
> 
> Ha!  When I wrote the original patch I changed most of the "long"
> parameters and variables to "unsigned long", because it was clear from
> the code (and my understanding of name mangling) that the values were
> in fact always unsigned.  I also changed "int" to "size_t" where
> appropriate, not that you need to handle strings larger than 2G in
> length but simply that size_t is the correct type to use with string
> functions, malloc, memcpy and so on.  I decided to remove all those
> changes before posting as they were just tidies, I thought.
> 

Yeah, signedness is handled outside of dlang_number, so only need to
treat all numbers as unsigned for the purpose of decoding.

Apologies that you seem to have opened a can of worms here. :-)

Iain.
Alan Modra Sept. 4, 2020, 1:34 p.m. UTC | #4
So this one is on top of the previously posted patch.

	* d-demangle.c (string_need): Take a size_t n arg, and use size_t tem.
	(string_append): Use size_t n.
	(string_appendn, string_prependn): Take a size_t n arg.
	(TEMPLATE_LENGTH_UNKNOWN): Define as -1UL.
	* d-demangle.c (dlang_number): Make "ret" an unsigned long*.
	Only succeed for result of [0,4294967295UL].
	(dlang_decode_backref): Only succeed for result [1,MAX_LONG].
	(dlang_backref): Remove now unnecessary range check.
	(dlang_symbol_name_p): Likewise.
	(dlang_lname, dlang_parse_template): Take an unsigned long len
	arg.
	(dlang_symbol_backref, dlang_identifier, dlang_parse_integer),
	(dlang_parse_integer, dlang_parse_string),
	(dlang_parse_arrayliteral, dlang_parse_assocarray),
	(dlang_parse_structlit, dlang_parse_tuple),
	(dlang_template_symbol_param, dlang_template_args): Use
	unsigned long variables.
	* testsuite/d-demangle-expected: Add new tests.

diff --git a/libiberty/d-demangle.c b/libiberty/d-demangle.c
index 59e6ae007a..152f620abf 100644
--- a/libiberty/d-demangle.c
+++ b/libiberty/d-demangle.c
@@ -62,9 +62,9 @@ typedef struct string		/* Beware: these aren't required to be */
 } string;
 
 static void
-string_need (string *s, int n)
+string_need (string *s, size_t n)
 {
-  int tem;
+  size_t tem;
 
   if (s->b == NULL)
     {
@@ -75,7 +75,7 @@ string_need (string *s, int n)
       s->p = s->b = XNEWVEC (char, n);
       s->e = s->b + n;
     }
-  else if (s->e - s->p < n)
+  else if ((size_t) (s->e - s->p) < n)
     {
       tem = s->p - s->b;
       n += tem;
@@ -124,14 +124,14 @@ string_setlength (string *s, int n)
 static void
 string_append (string *p, const char *s)
 {
-  int n = strlen (s);
+  size_t n = strlen (s);
   string_need (p, n);
   memcpy (p->p, s, n);
   p->p += n;
 }
 
 static void
-string_appendn (string *p, const char *s, int n)
+string_appendn (string *p, const char *s, size_t n)
 {
   if (n != 0)
     {
@@ -142,7 +142,7 @@ string_appendn (string *p, const char *s, int n)
 }
 
 static void
-string_prependn (string *p, const char *s, int n)
+string_prependn (string *p, const char *s, size_t n)
 {
   char *q;
 
@@ -177,7 +177,7 @@ struct dlang_info
 };
 
 /* Pass as the LEN to dlang_parse_template if symbol length is not known.  */
-enum { TEMPLATE_LENGTH_UNKNOWN = -1 };
+#define TEMPLATE_LENGTH_UNKNOWN (-1UL)
 
 /* Prototypes for forward referenced functions */
 static const char *dlang_function_type (string *, const char *,
@@ -200,15 +200,16 @@ static const char *dlang_parse_tuple (string *, const char *,
 				      struct dlang_info *);
 
 static const char *dlang_parse_template (string *, const char *,
-					 struct dlang_info *, long);
+					 struct dlang_info *, unsigned long);
 
-static const char *dlang_lname (string *, const char *, long);
+static const char *dlang_lname (string *, const char *, unsigned long);
 
 
 /* Extract the number from MANGLED, and assign the result to RET.
-   Return the remaining string on success or NULL on failure.  */
+   Return the remaining string on success or NULL on failure.
+   A result larger than 4294967295UL is considered a failure.  */
 static const char *
-dlang_number (const char *mangled, long *ret)
+dlang_number (const char *mangled, unsigned long *ret)
 {
   /* Return NULL if trying to extract something that isn't a digit.  */
   if (mangled == NULL || !ISDIGIT (*mangled))
@@ -218,17 +219,17 @@ dlang_number (const char *mangled, long *ret)
 
   while (ISDIGIT (*mangled))
     {
-      /* Check for overflow.  Yes, we return NULL here for some digits
-	 that don't overflow "val * 10 + digit", but that doesn't
-	 matter given the later "(long) val < 0" test.  */
-      if (val > (ULONG_MAX - 9) / 10)
+      unsigned long digit = mangled[0] - '0';
+
+      /* Check for overflow.  */
+      if (val > (4294967295UL - digit) / 10)
 	return NULL;
 
-      val = val * 10 + mangled[0] - '0';
+      val = val * 10 + digit;
       mangled++;
     }
 
-  if (*mangled == '\0' || (long) val < 0)
+  if (*mangled == '\0')
     return NULL;
 
   *ret = val;
@@ -280,7 +281,8 @@ dlang_call_convention_p (const char *mangled)
 }
 
 /* Extract the back reference position from MANGLED, and assign the result
-   to RET.  Return the remaining string on success or NULL on failure.  */
+   to RET.  Return the remaining string on success or NULL on failure.
+   A result <= 0 is a failure.  */
 static const char *
 dlang_decode_backref (const char *mangled, long *ret)
 {
@@ -314,6 +316,8 @@ dlang_decode_backref (const char *mangled, long *ret)
       if (mangled[0] >= 'a' && mangled[0] <= 'z')
 	{
 	  val += mangled[0] - 'a';
+	  if ((long) val <= 0)
+	    break;
 	  *ret = val;
 	  return mangled + 1;
 	}
@@ -344,7 +348,7 @@ dlang_backref (const char *mangled, const char **ret, struct dlang_info *info)
   if (mangled == NULL)
     return NULL;
 
-  if (refpos <= 0 || refpos > qpos - info->s)
+  if (refpos > qpos - info->s)
     return NULL;
 
   /* Set the position of the back reference.  */
@@ -366,7 +370,7 @@ dlang_symbol_backref (string *decl, const char *mangled,
 	    ^
    */
   const char *backref;
-  long len;
+  unsigned long len;
 
   /* Get position of the back reference.  */
   mangled = dlang_backref (mangled, &backref, info);
@@ -442,7 +446,7 @@ dlang_symbol_name_p (const char *mangled, struct dlang_info *info)
     return 0;
 
   mangled = dlang_decode_backref (mangled + 1, &ret);
-  if (mangled == NULL || ret <= 0 || ret > qref - info->s)
+  if (mangled == NULL || ret > qref - info->s)
     return 0;
 
   return ISDIGIT (qref[-ret]);
@@ -992,7 +996,7 @@ dlang_type (string *decl, const char *mangled, struct dlang_info *info)
 static const char *
 dlang_identifier (string *decl, const char *mangled, struct dlang_info *info)
 {
-  long len;
+  unsigned long len;
 
   if (mangled == NULL || *mangled == '\0')
     return NULL;
@@ -1010,7 +1014,7 @@ dlang_identifier (string *decl, const char *mangled, struct dlang_info *info)
   if (endptr == NULL || len == 0)
     return NULL;
 
-  if (strlen (endptr) < (size_t) len)
+  if (strlen (endptr) < len)
     return NULL;
 
   mangled = endptr;
@@ -1027,7 +1031,7 @@ dlang_identifier (string *decl, const char *mangled, struct dlang_info *info)
    with special treatment for some magic compiler generted symbols.
    Return the remaining string on success or NULL on failure.  */
 static const char *
-dlang_lname (string *decl, const char *mangled, long len)
+dlang_lname (string *decl, const char *mangled, unsigned long len)
 {
   switch (len)
     {
@@ -1126,7 +1130,7 @@ dlang_parse_integer (string *decl, const char *mangled, char type)
       char value[20];
       int pos = sizeof(value);
       int width = 0;
-      long val;
+      unsigned long val;
 
       mangled = dlang_number (mangled, &val);
       if (mangled == NULL)
@@ -1182,7 +1186,7 @@ dlang_parse_integer (string *decl, const char *mangled, char type)
   else if (type == 'b')
     {
       /* Parse boolean value.  */
-      long val;
+      unsigned long val;
 
       mangled = dlang_number (mangled, &val);
       if (mangled == NULL)
@@ -1301,7 +1305,7 @@ static const char *
 dlang_parse_string (string *decl, const char *mangled)
 {
   char type = *mangled;
-  long len;
+  unsigned long len;
 
   mangled++;
   mangled = dlang_number (mangled, &len);
@@ -1365,7 +1369,7 @@ dlang_parse_string (string *decl, const char *mangled)
 static const char *
 dlang_parse_arrayliteral (string *decl, const char *mangled)
 {
-  long elements;
+  unsigned long elements;
 
   mangled = dlang_number (mangled, &elements);
   if (mangled == NULL)
@@ -1391,7 +1395,7 @@ dlang_parse_arrayliteral (string *decl, const char *mangled)
 static const char *
 dlang_parse_assocarray (string *decl, const char *mangled)
 {
-  long elements;
+  unsigned long elements;
 
   mangled = dlang_number (mangled, &elements);
   if (mangled == NULL)
@@ -1422,7 +1426,7 @@ dlang_parse_assocarray (string *decl, const char *mangled)
 static const char *
 dlang_parse_structlit (string *decl, const char *mangled, const char *name)
 {
-  long args;
+  unsigned long args;
 
   mangled = dlang_number (mangled, &args);
   if (mangled == NULL)
@@ -1649,7 +1653,7 @@ dlang_parse_qualified (string *decl, const char *mangled,
 static const char *
 dlang_parse_tuple (string *decl, const char *mangled, struct dlang_info *info)
 {
-  long elements;
+  unsigned long elements;
 
   mangled = dlang_number (mangled, &elements);
   if (mangled == NULL)
@@ -1684,7 +1688,7 @@ dlang_template_symbol_param (string *decl, const char *mangled,
   if (*mangled == 'Q')
     return dlang_parse_qualified (decl, mangled, info, 0);
 
-  long len;
+  unsigned long len;
   const char *endptr = dlang_number (mangled, &len);
 
   if (endptr == NULL || len == 0)
@@ -1797,12 +1801,12 @@ dlang_template_args (string *decl, const char *mangled, struct dlang_info *info)
 	}
 	case 'X': /* Externally mangled parameter.  */
 	{
-	  long len;
+	  unsigned long len;
 	  const char *endptr;
 
 	  mangled++;
 	  endptr = dlang_number (mangled, &len);
-	  if (endptr == NULL || strlen (endptr) < (size_t) len)
+	  if (endptr == NULL || strlen (endptr) < len)
 	    return NULL;
 
 	  string_appendn (decl, endptr, len);
@@ -1822,7 +1826,7 @@ dlang_template_args (string *decl, const char *mangled, struct dlang_info *info)
    Returns the remaining signature on success or NULL on failure.  */
 static const char *
 dlang_parse_template (string *decl, const char *mangled,
-		      struct dlang_info *info, long len)
+		      struct dlang_info *info, unsigned long len)
 {
   const char *start = mangled;
   string args;
@@ -1858,7 +1862,9 @@ dlang_parse_template (string *decl, const char *mangled,
   string_delete (&args);
 
   /* Check for template name length mismatch.  */
-  if (len != TEMPLATE_LENGTH_UNKNOWN && mangled && (mangled - start) != len)
+  if (len != TEMPLATE_LENGTH_UNKNOWN
+      && mangled
+      && (unsigned long) (mangled - start) != len)
     return NULL;
 
   return mangled;
diff --git a/libiberty/testsuite/d-demangle-expected b/libiberty/testsuite/d-demangle-expected
index e3f32e31d7..97bcdd5978 100644
--- a/libiberty/testsuite/d-demangle-expected
+++ b/libiberty/testsuite/d-demangle-expected
@@ -1140,6 +1140,14 @@ _D4test34__T3barVG3uw3_616263VG3wd3_646566Z1xi
 test.bar!("abc"w, "def"d).x
 #
 --format=dlang
+_D4test21__T3funVwi4294967295Z3funFNaNbNiNfZv
+test.fun!('\Uffffffff').fun()
+#
+--format=dlang
+_D4test21__T3funVwi4294967296Z3funFNaNbNiNfZv
+_D4test21__T3funVwi4294967296Z3funFNaNbNiNfZv
+#
+--format=dlang
 _D6plugin8generateFiiZAya
 plugin.generate(int, int)
 #
Iain Buclaw Sept. 4, 2020, 4:23 p.m. UTC | #5
Excerpts from Alan Modra's message of September 4, 2020 3:34 pm:
> So this one is on top of the previously posted patch.
> 
> 	* d-demangle.c (string_need): Take a size_t n arg, and use size_t tem.
> 	(string_append): Use size_t n.
> 	(string_appendn, string_prependn): Take a size_t n arg.
> 	(TEMPLATE_LENGTH_UNKNOWN): Define as -1UL.
> 	* d-demangle.c (dlang_number): Make "ret" an unsigned long*.
> 	Only succeed for result of [0,4294967295UL].
> 	(dlang_decode_backref): Only succeed for result [1,MAX_LONG].
> 	(dlang_backref): Remove now unnecessary range check.
> 	(dlang_symbol_name_p): Likewise.
> 	(dlang_lname, dlang_parse_template): Take an unsigned long len
> 	arg.
> 	(dlang_symbol_backref, dlang_identifier, dlang_parse_integer),
> 	(dlang_parse_integer, dlang_parse_string),
> 	(dlang_parse_arrayliteral, dlang_parse_assocarray),
> 	(dlang_parse_structlit, dlang_parse_tuple),
> 	(dlang_template_symbol_param, dlang_template_args): Use
> 	unsigned long variables.
> 	* testsuite/d-demangle-expected: Add new tests.
> 
> diff --git a/libiberty/d-demangle.c b/libiberty/d-demangle.c
> index 59e6ae007a..152f620abf 100644
> --- a/libiberty/d-demangle.c
> +++ b/libiberty/d-demangle.c
> @@ -62,9 +62,9 @@ typedef struct string		/* Beware: these aren't required to be */
>  } string;
>  
>  static void
> -string_need (string *s, int n)
> +string_need (string *s, size_t n)
>  {
> -  int tem;
> +  size_t tem;
>  
>    if (s->b == NULL)
>      {
> @@ -75,7 +75,7 @@ string_need (string *s, int n)
>        s->p = s->b = XNEWVEC (char, n);
>        s->e = s->b + n;
>      }
> -  else if (s->e - s->p < n)
> +  else if ((size_t) (s->e - s->p) < n)
>      {
>        tem = s->p - s->b;
>        n += tem;
> @@ -124,14 +124,14 @@ string_setlength (string *s, int n)
>  static void
>  string_append (string *p, const char *s)
>  {
> -  int n = strlen (s);
> +  size_t n = strlen (s);
>    string_need (p, n);
>    memcpy (p->p, s, n);
>    p->p += n;
>  }
>  
>  static void
> -string_appendn (string *p, const char *s, int n)
> +string_appendn (string *p, const char *s, size_t n)
>  {
>    if (n != 0)
>      {
> @@ -142,7 +142,7 @@ string_appendn (string *p, const char *s, int n)
>  }
>  
>  static void
> -string_prependn (string *p, const char *s, int n)
> +string_prependn (string *p, const char *s, size_t n)
>  {
>    char *q;
>  
> @@ -177,7 +177,7 @@ struct dlang_info
>  };
>  
>  /* Pass as the LEN to dlang_parse_template if symbol length is not known.  */
> -enum { TEMPLATE_LENGTH_UNKNOWN = -1 };
> +#define TEMPLATE_LENGTH_UNKNOWN (-1UL)
>  
>  /* Prototypes for forward referenced functions */
>  static const char *dlang_function_type (string *, const char *,
> @@ -200,15 +200,16 @@ static const char *dlang_parse_tuple (string *, const char *,
>  				      struct dlang_info *);
>  
>  static const char *dlang_parse_template (string *, const char *,
> -					 struct dlang_info *, long);
> +					 struct dlang_info *, unsigned long);
>  
> -static const char *dlang_lname (string *, const char *, long);
> +static const char *dlang_lname (string *, const char *, unsigned long);
>  
>  
>  /* Extract the number from MANGLED, and assign the result to RET.
> -   Return the remaining string on success or NULL on failure.  */
> +   Return the remaining string on success or NULL on failure.
> +   A result larger than 4294967295UL is considered a failure.  */

If we're already using limits.h, I guess it should be fine to also add

#define UINT_MAX ((unsigned) ~0U)

I'll leave it to your judgement on that though.

Other than that, OK from me.

Iain.
Alan Modra Sept. 7, 2020, 12:56 a.m. UTC | #6
On Fri, Sep 04, 2020 at 06:23:10PM +0200, Iain Buclaw wrote:
> If we're already using limits.h, I guess it should be fine to also add
> 
> #define UINT_MAX ((unsigned) ~0U)

Yes, except that I'll use the simpler fall-back
#define UINT_MAX (~0U)

The habit of using a cast for unsigned constants dates back to K&R C
where a U suffix was not valid.  For example, from libiberty/strtol.c
#define	ULONG_MAX	((unsigned long)(~0L))

Since the code uses ISO/ANSI C features such as prototypes I think
we're OK with a U suffix.  And if there's something I'm missing then
#define UINT_MAX ((unsigned) ~0)
would be correct for K&R.

> I'll leave it to your judgement on that though.
> 
> Other than that, OK from me.

Do I need an OK from Ian too?
Iain Buclaw Sept. 7, 2020, 4:17 p.m. UTC | #7
Excerpts from Alan Modra's message of September 7, 2020 2:56 am:
> On Fri, Sep 04, 2020 at 06:23:10PM +0200, Iain Buclaw wrote:
>> If we're already using limits.h, I guess it should be fine to also add
>> 
>> #define UINT_MAX ((unsigned) ~0U)
> 
> Yes, except that I'll use the simpler fall-back
> #define UINT_MAX (~0U)
> 
> The habit of using a cast for unsigned constants dates back to K&R C
> where a U suffix was not valid.  For example, from libiberty/strtol.c
> #define	ULONG_MAX	((unsigned long)(~0L))
> 
> Since the code uses ISO/ANSI C features such as prototypes I think
> we're OK with a U suffix.  And if there's something I'm missing then
> #define UINT_MAX ((unsigned) ~0)
> would be correct for K&R.
> 
>> I'll leave it to your judgement on that though.
>> 
>> Other than that, OK from me.
> 
> Do I need an OK from Ian too?
> 

As it only touches D support files, I'd say no.

Iain
Ian Lance Taylor Sept. 7, 2020, 5:46 p.m. UTC | #8
Iain Buclaw <ibuclaw@gdcproject.org> writes:

> Excerpts from Alan Modra's message of September 7, 2020 2:56 am:
>> On Fri, Sep 04, 2020 at 06:23:10PM +0200, Iain Buclaw wrote:
>>> If we're already using limits.h, I guess it should be fine to also add
>>> 
>>> #define UINT_MAX ((unsigned) ~0U)
>> 
>> Yes, except that I'll use the simpler fall-back
>> #define UINT_MAX (~0U)
>> 
>> The habit of using a cast for unsigned constants dates back to K&R C
>> where a U suffix was not valid.  For example, from libiberty/strtol.c
>> #define	ULONG_MAX	((unsigned long)(~0L))
>> 
>> Since the code uses ISO/ANSI C features such as prototypes I think
>> we're OK with a U suffix.  And if there's something I'm missing then
>> #define UINT_MAX ((unsigned) ~0)
>> would be correct for K&R.
>> 
>>> I'll leave it to your judgement on that though.
>>> 
>>> Other than that, OK from me.
>> 
>> Do I need an OK from Ian too?
>> 
>
> As it only touches D support files, I'd say no.

I agree.

Ian
Jeff Law Nov. 13, 2020, 7:04 p.m. UTC | #9
On 9/4/20 7:34 AM, Alan Modra via Gcc-patches wrote:
> So this one is on top of the previously posted patch.
>
> 	* d-demangle.c (string_need): Take a size_t n arg, and use size_t tem.
> 	(string_append): Use size_t n.
> 	(string_appendn, string_prependn): Take a size_t n arg.
> 	(TEMPLATE_LENGTH_UNKNOWN): Define as -1UL.
> 	* d-demangle.c (dlang_number): Make "ret" an unsigned long*.
> 	Only succeed for result of [0,4294967295UL].
> 	(dlang_decode_backref): Only succeed for result [1,MAX_LONG].
> 	(dlang_backref): Remove now unnecessary range check.
> 	(dlang_symbol_name_p): Likewise.
> 	(dlang_lname, dlang_parse_template): Take an unsigned long len
> 	arg.
> 	(dlang_symbol_backref, dlang_identifier, dlang_parse_integer),
> 	(dlang_parse_integer, dlang_parse_string),
> 	(dlang_parse_arrayliteral, dlang_parse_assocarray),
> 	(dlang_parse_structlit, dlang_parse_tuple),
> 	(dlang_template_symbol_param, dlang_template_args): Use
> 	unsigned long variables.
> 	* testsuite/d-demangle-expected: Add new tests.

Explicitly leaving this to Iain.


jeff
diff mbox series

Patch

diff --git a/libiberty/d-demangle.c b/libiberty/d-demangle.c
index f2d6946eca..59e6ae007a 100644
--- a/libiberty/d-demangle.c
+++ b/libiberty/d-demangle.c
@@ -31,6 +31,9 @@  If not, see <http://www.gnu.org/licenses/>.  */
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
+#ifdef HAVE_LIMITS_H
+#include <limits.h>
+#endif
 
 #include "safe-ctype.h"
 
@@ -45,6 +48,10 @@  If not, see <http://www.gnu.org/licenses/>.  */
 #include <demangle.h>
 #include "libiberty.h"
 
+#ifndef ULONG_MAX
+#define	ULONG_MAX	(~0UL)
+#endif
+
 /* A mini string-handling package */
 
 typedef struct string		/* Beware: these aren't required to be */
@@ -207,24 +214,24 @@  dlang_number (const char *mangled, long *ret)
   if (mangled == NULL || !ISDIGIT (*mangled))
     return NULL;
 
-  (*ret) = 0;
+  unsigned long val = 0;
 
   while (ISDIGIT (*mangled))
     {
-      (*ret) *= 10;
-
-      /* If an overflow occured when multiplying by ten, the result
-	 will not be a multiple of ten.  */
-      if ((*ret % 10) != 0)
+      /* Check for overflow.  Yes, we return NULL here for some digits
+	 that don't overflow "val * 10 + digit", but that doesn't
+	 matter given the later "(long) val < 0" test.  */
+      if (val > (ULONG_MAX - 9) / 10)
 	return NULL;
 
-      (*ret) += mangled[0] - '0';
+      val = val * 10 + mangled[0] - '0';
       mangled++;
     }
 
-  if (*mangled == '\0' || *ret < 0)
+  if (*mangled == '\0' || (long) val < 0)
     return NULL;
 
+  *ret = val;
   return mangled;
 }
 
@@ -294,24 +301,24 @@  dlang_decode_backref (const char *mangled, long *ret)
 	    [A-Z] NumberBackRef
 	    ^
    */
-  (*ret) = 0;
+  unsigned long val = 0;
 
   while (ISALPHA (*mangled))
     {
-      (*ret) *= 26;
+      /* Check for overflow.  */
+      if (val > (ULONG_MAX - 25) / 26)
+	break;
 
-      /* If an overflow occured when multiplying by 26, the result
-	 will not be a multiple of 26.  */
-      if ((*ret % 26) != 0)
-	return NULL;
+      val *= 26;
 
       if (mangled[0] >= 'a' && mangled[0] <= 'z')
 	{
-	  (*ret) += mangled[0] - 'a';
+	  val += mangled[0] - 'a';
+	  *ret = val;
 	  return mangled + 1;
 	}
 
-      (*ret) += mangled[0] - 'A';
+      val += mangled[0] - 'A';
       mangled++;
     }