diff mbox

ctype functions and signedness

Message ID 20151116155919.GA13057@thinkpad.swarthmore.edu
State New
Headers show

Commit Message

Michael McConville Nov. 16, 2015, 3:59 p.m. UTC
Hi, everyone.

While it often (usually?) isn't an issue, passing a signed char to ctype
functions is undefined. Here's the CERT entry:

	https://www.securecoding.cert.org/confluence/x/fAs

This means that we need to cast chars to unsigned char before passing
them to one of these functions.

The below patch, generated by a Coccinelle script, fixes instances of
this. It may need some minor tweaks to add line breaks and conform with
local style conventions.

Thanks,
Michael

Comments

DJ Delorie Nov. 17, 2015, 12:18 a.m. UTC | #1
> --- libiberty/pex-win32.c
> +++ /tmp/cocci-output-25924-3a75ca-pex-win32.c
> @@ -547,8 +547,8 @@ env_compare (const void *a_ptr, const vo
>  
>    do
>      {
> -      c1 = (unsigned char) tolower (*a++);
> -      c2 = (unsigned char) tolower (*b++);
> +      c1 = (unsigned char) tolower ((unsigned char)*a++);
> +      c2 = (unsigned char) tolower ((unsigned char)*b++);
>  
>        if (c1 == '=')
>          c1 = '\0';

Since the only use of a and b in this function are to pass to tolower,
changing the type of a and b to unsigned char (and updating the casts
where they're initialized) would make more sense.
Michael McConville Nov. 17, 2015, 12:24 a.m. UTC | #2
DJ Delorie wrote:
> 
> > --- libiberty/pex-win32.c
> > +++ /tmp/cocci-output-25924-3a75ca-pex-win32.c
> > @@ -547,8 +547,8 @@ env_compare (const void *a_ptr, const vo
> >  
> >    do
> >      {
> > -      c1 = (unsigned char) tolower (*a++);
> > -      c2 = (unsigned char) tolower (*b++);
> > +      c1 = (unsigned char) tolower ((unsigned char)*a++);
> > +      c2 = (unsigned char) tolower ((unsigned char)*b++);
> >  
> >        if (c1 == '=')
> >          c1 = '\0';
> 
> Since the only use of a and b in this function are to pass to tolower,
> changing the type of a and b to unsigned char (and updating the casts
> where they're initialized) would make more sense.

True. This patch was generated with an automated tool (Coccinelle). I
figured that it'd be easiest to send an initial patch and let someone
with a commit bit tweak it as needed. I've never worked with GCC before,
so I have no idea what the style conventions are.
Jeff Law Nov. 24, 2015, 11:01 p.m. UTC | #3
On 11/16/2015 08:59 AM, Michael McConville wrote:
> Hi, everyone.
>
> While it often (usually?) isn't an issue, passing a signed char to ctype
> functions is undefined. Here's the CERT entry:
>
> 	https://www.securecoding.cert.org/confluence/x/fAs
>
> This means that we need to cast chars to unsigned char before passing
> them to one of these functions.
>
> The below patch, generated by a Coccinelle script, fixes instances of
> this. It may need some minor tweaks to add line breaks and conform with
> local style conventions.
The changes to the boehm-gc directory really should go to the upstream 
project.   I wouldn't be terribly surprised if boehm-gc for GCC gets 
deprecated given the trajectory GCJ is on.



> --- gcc/testsuite/gcc.dg/charset/builtin1.c
> +++ /tmp/cocci-output-20442-6d79bd-builtin1.c
> @@ -14,9 +14,9 @@ static int strA(void) { return 'A'; }
>   int
>   main(void)
>   {
> -  if (!isdigit('1'))
> +  if (!isdigit((unsigned char)'1'))
>       abort();
> -  if (isdigit('A'))
> +  if (isdigit((unsigned char)'A'))
>       abort();
>     if (!isdigit(str1()))
>       abort();
This change actually fails regression testing.  We're using 
-fexec-charset=IBM1047 (EBCDIC).  So '1' is -15, turned into an unsigned 
char, 241 and boom, it fails isdigit.


While I'm not terribly concerned about EBCDIC within GCC itself, it does 
bring into question the changes for the testsuites & runtimes in particular.

Jeff
diff mbox

Patch

--- boehm-gc/cord/de_win.c
+++ /tmp/cocci-output-25514-78f80b-de_win.c
@@ -86,7 +86,7 @@  int APIENTRY WinMain (HINSTANCE hInstanc
    } else {
         char *p = command_line;
         
-        while (*p != 0 && !isspace(*p)) p++;
+        while (*p != 0 && !isspace((unsigned char)*p)) p++;
    	arg_file_name = CORD_to_char_star(
    			    CORD_substr(command_line, 0, p - command_line));
    }
@@ -129,7 +129,7 @@  char * plain_chars(char * text, size_t l
     register size_t i;
     
     for (i = 0; i < len; i++) {
-       if (iscntrl(text[i])) {
+       if (iscntrl((unsigned char)text[i])) {
            result[i] = ' ';
        } else {
            result[i] = text[i];
@@ -147,7 +147,7 @@  char * control_chars(char * text, size_t
     register size_t i;
     
     for (i = 0; i < len; i++) {
-       if (iscntrl(text[i])) {
+       if (iscntrl((unsigned char)text[i])) {
            result[i] = text[i] + 0x40;
        } else {
            result[i] = ' ';
--- boehm-gc/os_dep.c
+++ /tmp/cocci-output-25514-d4c0bc-os_dep.c
@@ -271,31 +271,31 @@  char *GC_parse_map_entry(char *buf_ptr,
     }
 
     p = buf_ptr;
-    while (isspace(*p)) ++p;
+    while (isspace((unsigned char)*p)) ++p;
     start_start = p;
-    GC_ASSERT(isxdigit(*start_start));
+    GC_ASSERT(isxdigit((unsigned char)*start_start));
     *start = strtoul(start_start, &endp, 16); p = endp;
     GC_ASSERT(*p=='-');
 
     ++p;
     end_start = p;
-    GC_ASSERT(isxdigit(*end_start));
+    GC_ASSERT(isxdigit((unsigned char)*end_start));
     *end = strtoul(end_start, &endp, 16); p = endp;
-    GC_ASSERT(isspace(*p));
+    GC_ASSERT(isspace((unsigned char)*p));
 
-    while (isspace(*p)) ++p;
+    while (isspace((unsigned char)*p)) ++p;
     prot_start = p;
     GC_ASSERT(*prot_start == 'r' || *prot_start == '-');
     memcpy(prot_buf, prot_start, 4);
     prot_buf[4] = '\0';
     if (prot_buf[1] == 'w') {/* we can skip the rest if it's not writable. */
 	/* Skip past protection field to offset field */
-          while (!isspace(*p)) ++p; while (isspace(*p)) ++p;
-          GC_ASSERT(isxdigit(*p));
+          while (!isspace((unsigned char)*p)) ++p; while (isspace((unsigned char)*p)) ++p;
+          GC_ASSERT(isxdigit((unsigned char)*p));
 	/* Skip past offset field, which we ignore */
-          while (!isspace(*p)) ++p; while (isspace(*p)) ++p;
+          while (!isspace((unsigned char)*p)) ++p; while (isspace((unsigned char)*p)) ++p;
 	maj_dev_start = p;
-        GC_ASSERT(isxdigit(*maj_dev_start));
+        GC_ASSERT(isxdigit((unsigned char)*maj_dev_start));
         *maj_dev = strtoul(maj_dev_start, NULL, 16);
     }
 
@@ -969,11 +969,11 @@  ptr_t GC_get_stack_base()
     /* Skip the required number of fields.  This number is hopefully	*/
     /* constant across all Linux implementations.			*/
       for (i = 0; i < STAT_SKIP; ++i) {
-	while (isspace(c)) c = stat_buf[buf_offset++];
-	while (!isspace(c)) c = stat_buf[buf_offset++];
+	while (isspace((unsigned char)c)) c = stat_buf[buf_offset++];
+	while (!isspace((unsigned char)c)) c = stat_buf[buf_offset++];
       }
-    while (isspace(c)) c = stat_buf[buf_offset++];
-    while (isdigit(c)) {
+    while (isspace((unsigned char)c)) c = stat_buf[buf_offset++];
+    while (isdigit((unsigned char)c)) {
       result *= 10;
       result += c - '0';
       c = stat_buf[buf_offset++];
--- gcc/testsuite/gcc.dg/charset/builtin1.c
+++ /tmp/cocci-output-20442-6d79bd-builtin1.c
@@ -14,9 +14,9 @@  static int strA(void) { return 'A'; }
 int
 main(void)
 {
-  if (!isdigit('1'))
+  if (!isdigit((unsigned char)'1'))
     abort();
-  if (isdigit('A'))
+  if (isdigit((unsigned char)'A'))
     abort();
   if (!isdigit(str1()))
     abort();
--- gcc/testsuite/gcc.dg/torture/pr67821.c
+++ /tmp/cocci-output-18483-79f7b5-pr67821.c
@@ -7,9 +7,9 @@  foo (const char *s)
 {
   int success = 1;
   const char *p = s + 2;
-  if (!isdigit (*p))
+  if (!isdigit ((unsigned char)*p))
     success = 0;
-  while (isdigit (*p))
+  while (isdigit ((unsigned char)*p))
     ++p;
   return success;
 }
--- gcc/ada/adaint.c
+++ /tmp/cocci-output-21151-00798e-adaint.c
@@ -1628,7 +1628,7 @@  __gnat_is_absolute_path (char *name, int
     {
       if (name[index] == ':' &&
           ((name[index + 1] == '/') ||
-           (isalpha (name[index + 1]) && index + 2 <= length &&
+           (isalpha ((unsigned char)name[index + 1]) && index + 2 <= length &&
             name[index + 2] == '/')))
         return 1;
 
--- gcc/config/darwin.c
+++ /tmp/cocci-output-11137-daf870-darwin.c
@@ -3471,7 +3471,7 @@  darwin_build_constant_cfstring (tree str
 	  int l = 0;
 
 	  for (l = 0; l < length; l++)
-	    if (!s[l] || !isascii (s[l]))
+	    if (!s[l] || !isascii ((unsigned char)s[l]))
 	      {
 		warning (darwin_warn_nonportable_cfstrings, "%s in CFString literal",
 			 s[l] ? "non-ASCII character" : "embedded NUL");
--- libcpp/makeucnid.c
+++ /tmp/cocci-output-1465-01eaaf-makeucnid.c
@@ -81,7 +81,7 @@  read_ucnid (const char *fname)
 	fl = C11;
       else if (strcmp (line, "[C11NOSTART]\n") == 0)
 	fl = C11|N11;
-      else if (isxdigit (line[0]))
+      else if (isxdigit ((unsigned char)line[0]))
 	{
 	  char *l = line;
 	  while (*l)
@@ -89,7 +89,7 @@  read_ucnid (const char *fname)
 	      unsigned long start, end;
 	      char *endptr;
 	      start = strtoul (l, &endptr, 16);
-	      if (endptr == l || (*endptr != '-' && ! isspace (*endptr)))
+	      if (endptr == l || (*endptr != '-' && ! isspace ((unsigned char)*endptr)))
 		fail ("parsing ucnid.tab [1]");
 	      l = endptr;
 	      if (*l != '-')
@@ -100,10 +100,10 @@  read_ucnid (const char *fname)
 		  if (end < start)
 		    fail ("parsing ucnid.tab, end before start");
 		  l = endptr;
-		  if (! isspace (*l))
+		  if (! isspace ((unsigned char)*l))
 		    fail ("parsing ucnid.tab, junk after range");
 		}
-	      while (isspace (*l))
+	      while (isspace ((unsigned char)*l))
 		l++;
 	      if (end > MAX_CODE_POINT)
 		fail ("parsing ucnid.tab, end too large");
@@ -156,7 +156,7 @@  read_table (char *fname)
       } while (*l != ';');
       /* Canonical combining class; in NFC/NFKC, they must be increasing
 	 (or zero).  */
-      if (! isdigit (*++l))
+      if (! isdigit ((unsigned char)*++l))
 	fail ("parsing UnicodeData.txt, combining class not number");
       combining_value[codepoint] = strtoul (l, &l, 10);
       if (*l++ != ';')
@@ -175,11 +175,11 @@  read_table (char *fname)
 	{
 	  if (*l == ';')
 	    break;
-	  if (!isxdigit (*l))
+	  if (!isxdigit ((unsigned char)*l))
 	    fail ("parsing UnicodeData.txt, decomposition format");
 	  this_decomp[i] = strtoul (l, &l, 16);
 	  decomp_useful &= flags[this_decomp[i]];
-	  while (isspace (*l))
+	  while (isspace ((unsigned char)*l))
 	    l++;
 	}
       if (i > 2)  /* Decomposition too long.  */
--- libgfortran/runtime/environ.c
+++ /tmp/cocci-output-8814-ef5a91-environ.c
@@ -80,7 +80,7 @@  init_integer (variable * v)
     return;
 
   for (q = p; *q; q++)
-    if (!isdigit (*q) && (p != q || *q != '-'))
+    if (!isdigit ((unsigned char)*q) && (p != q || *q != '-'))
       return;
 
   *v->var = atoi (p);
@@ -99,7 +99,7 @@  init_unsigned_integer (variable * v)
     return;
 
   for (q = p; *q; q++)
-    if (!isdigit (*q))
+    if (!isdigit ((unsigned char)*q))
       return;
 
   *v->var = atoi (p);
@@ -348,7 +348,7 @@  static int
 match_integer (void)
 {
   unit_num = 0;
-  while (isdigit (*p))
+  while (isdigit ((unsigned char)*p))
     unit_num = unit_num * 10 + (*p++ - '0');
   return INTEGER;
 }
--- libgfortran/io/list_read.c
+++ /tmp/cocci-output-7054-3fc663-list_read.c
@@ -2656,7 +2656,7 @@  nml_match_name (st_parameter_dt *dtp, co
   for (i = 0; i < len; i++)
     {
       c = next_char (dtp);
-      if (c == EOF || (tolower (c) != tolower (name[i])))
+      if (c == EOF || (tolower (c) != tolower ((unsigned char)name[i])))
 	{
 	  dtp->u.p.nml_read_error = 1;
 	  break;
--- libgfortran/io/read.c
+++ /tmp/cocci-output-7054-209e45-read.c
@@ -947,7 +947,7 @@  read_f (st_parameter_dt *dtp, const fnod
 	 between "NaN" and the optional perenthesis is not permitted.  */
       while (w > 0)
 	{
-	  *out = tolower (*p);
+	  *out = tolower ((unsigned char)*p);
 	  switch (*p)
 	    {
 	    case ' ':
@@ -969,7 +969,7 @@  read_f (st_parameter_dt *dtp, const fnod
 		goto bad_float;
 	      break;
 	    default:
-	      if (!isalnum (*out))
+	      if (!isalnum ((unsigned char)*out))
 		goto bad_float;
 	    }
 	  --w;
@@ -1091,7 +1091,7 @@  exponent:
 
   if (dtp->u.p.blank_status == BLANK_UNSPECIFIED)
     {
-      while (w > 0 && isdigit (*p))
+      while (w > 0 && isdigit ((unsigned char)*p))
 	{
 	  exponent *= 10;
 	  exponent += *p - '0';
@@ -1119,7 +1119,7 @@  exponent:
 	      else
 		assert (dtp->u.p.blank_status == BLANK_NULL);
 	    }
-	  else if (!isdigit (*p))
+	  else if (!isdigit ((unsigned char)*p))
 	    goto bad_float;
 	  else
 	    {
--- libgfortran/io/format.c
+++ /tmp/cocci-output-16280-1f8fd7-format.c
@@ -194,7 +194,7 @@  next_char (format_data *fmt, int literal
 	return -1;
 
       fmt->format_string_len--;
-      c = toupper (*fmt->format_string++);
+      c = toupper ((unsigned char)*fmt->format_string++);
       fmt->error_element = c;
     }
   while ((c == ' ' || c == '\t') && !literal);
--- libgo/go/regexp/testdata/testregex.c
+++ /tmp/cocci-output-11909-272cb0-testregex.c
@@ -1267,7 +1267,7 @@  main(int argc, char** argv)
 	state.NOMATCH.rm_so = state.NOMATCH.rm_eo = -2;
 	p = unit;
 	version = (char*)id + 10;
-	while (p < &unit[sizeof(unit)-1] && (*p = *version++) && !isspace(*p))
+	while (p < &unit[sizeof(unit)-1] && (*p = *version++) && !isspace((unsigned char)*p))
 		p++;
 	*p = 0;
 	while ((p = *++argv) && *p == '-')
@@ -1452,7 +1452,7 @@  main(int argc, char** argv)
 		/* parse: */
 
 			line = p;
-			if (*p == ':' && !isspace(*(p + 1)))
+			if (*p == ':' && !isspace((unsigned char)*(p + 1)))
 			{
 				while (*++p && *p != ':');
 				if (!*p++)
@@ -1462,7 +1462,7 @@  main(int argc, char** argv)
 					continue;
 				}
 			}
-			while (isspace(*p))
+			while (isspace((unsigned char)*p))
 				p++;
 			if (*p == 0 || *p == '#' || *p == 'T')
 			{
@@ -1476,8 +1476,8 @@  main(int argc, char** argv)
 					printf("%s\n", line);
 				else if (!(test & (TEST_ACTUAL|TEST_FAIL|TEST_PASS|TEST_SUMMARY)))
 				{
-					while (*++p && !isspace(*p));
-					while (isspace(*p))
+					while (*++p && !isspace((unsigned char)*p));
+					while (isspace((unsigned char)*p))
 						p++;
 					printf("NOTE	%s\n", p);
 				}
@@ -1533,7 +1533,7 @@  main(int argc, char** argv)
 			nsub = -1;
 			for (p = spec; *p; p++)
 			{
-				if (isdigit(*p))
+				if (isdigit((unsigned char)*p))
 				{
 					nmatch = strtol(p, &p, 10);
 					if (nmatch >= elementsof(match))
--- libiberty/pex-win32.c
+++ /tmp/cocci-output-25924-3a75ca-pex-win32.c
@@ -547,8 +547,8 @@  env_compare (const void *a_ptr, const vo
 
   do
     {
-      c1 = (unsigned char) tolower (*a++);
-      c2 = (unsigned char) tolower (*b++);
+      c1 = (unsigned char) tolower ((unsigned char)*a++);
+      c2 = (unsigned char) tolower ((unsigned char)*b++);
 
       if (c1 == '=')
         c1 = '\0';
--- libobjc/gc.c
+++ /tmp/cocci-output-2585-fc246d-gc.c
@@ -68,7 +68,7 @@  __objc_gc_setup_array (GC_bitmap mask, c
 {
   int i, len = atoi (type + 1);
 
-  while (isdigit (*++type))
+  while (isdigit ((unsigned char)*++type))
     /* do nothing */;		/* skip the size of the array */
 
   switch (*type) {
--- libquadmath/printf/quadmath-printf.c
+++ /tmp/cocci-output-27535-6b9ccc-quadmath-printf.c
@@ -189,7 +189,7 @@  quadmath_snprintf (char *str, size_t siz
       ++format;
       info.width = va_arg (ap, int);
     }
-  else if (isdigit (*format))
+  else if (isdigit ((unsigned char)*format))
     /* Constant width specification.  */
     info.width = read_int (&format);
 
@@ -206,7 +206,7 @@  quadmath_snprintf (char *str, size_t siz
 
 	  info.prec = va_arg (ap, int);
 	}
-      else if (isdigit (*format))
+      else if (isdigit ((unsigned char)*format))
 	info.prec = read_int (&format);
       else
 	/* "%.?" is treated like "%.0?".  */
--- libquadmath/printf/printf_fphex.c
+++ /tmp/cocci-output-25463-ffa004-printf_fphex.c
@@ -169,7 +169,7 @@  __quadmath_printf_fphex (struct __quadma
       if (isnanq (fpnum.value))
 	{
 	  negative = fpnum.ieee.negative != 0;
-	  if (isupper (info->spec))
+	  if (isupper ((unsigned char)info->spec))
 	    {
 	      special = "NAN";
 	      wspecial = L_("NAN");
@@ -184,7 +184,7 @@  __quadmath_printf_fphex (struct __quadma
 	{
 	  if (isinfq (fpnum.value))
 	    {
-	      if (isupper (info->spec))
+	      if (isupper ((unsigned char)info->spec))
 		{
 		  special = "INF";
 		  wspecial = L_("INF");
@@ -363,7 +363,7 @@  __quadmath_printf_fphex (struct __quadma
 		  				   think about it!  */
 		  break;
 		}
-	      else if (tolower (ch) < 'f')
+	      else if (tolower ((unsigned char)ch) < 'f')
 		{
 		  ++numstr[cnt];
 		  ++wnumstr[cnt];
@@ -382,7 +382,7 @@  __quadmath_printf_fphex (struct __quadma
 		 get an overflow.  */
 	      if (leading == '9')
 		leading = info->spec;
-	      else if (tolower (leading) < 'f')
+	      else if (tolower ((unsigned char)leading) < 'f')
 		++leading;
 	      else
 		{