Patchwork Allocate extra 16 bytes for -fsanitize=address

login
register
mail settings
Submitter Jakub Jelinek
Date Nov. 23, 2012, 7:50 p.m.
Message ID <20121123195033.GG2315@tucnak.redhat.com>
Download mbox | patch
Permalink /patch/201394/
State New
Headers show

Comments

Jakub Jelinek - Nov. 23, 2012, 7:50 p.m.
On Fri, Nov 23, 2012 at 11:33:37AM -0800, H.J. Lu wrote:
> 2012-11-21  H.J. Lu  <hongjiu.lu@intel.com>
> 
> 	PR bootstrap/55380
> 	* charset.c (_cpp_convert_input): Clear CPP_PAD_BUFFER_SIZE
> 	bytes if CLEAR_CPP_PAD_BUFFER isn't 0.  Allocate extra
> 	CPP_PAD_BUFFER_SIZE bytes and clear it if CLEAR_CPP_PAD_BUFFER
> 	isn't 0.
> 
> 	* files.c (read_file_guts): Allocate extra CPP_PAD_BUFFER_SIZE
> 	bytes.
> 
> 	* internal.h (CPP_PAD_BUFFER_SIZE): New.  Defined to 16 for
> 	-fsanitize=address if __SANITIZE_ADDRESS__ is defined.
> 	(CLEAR_CPP_PAD_BUFFER): New.

I'd say you are making this way too much complicated.
The following patch just changes those + 1 to + 16, adds memset of the 16
bytes unconditionally, but as it also fixes a thinko which IMHO affects
the most common cases (regular file, with no conversion needed), I'd say
it ought to be faster than the old version (the old version would allocate
st.st_size + 1 bytes long buffer, read st.st_size bytes into it,
call _cpp_convert_input with len st.st_size and size st.st_size (the latter
should have been st.st_size + 1, i.e. the allocated size) and thus
to.len >= to.asize was true and we called realloc with the same length
as malloc has been called originally.

2012-11-23  Jakub Jelinek  <jakub@redhat.com>

	PR bootstrap/55380
	* files.c (read_file_guts): Allocate extra 16 bytes instead of
	1 byte at the end of buf.  Pass size + 16 instead of size
	to _cpp_convert_input.
	* charset.c (_cpp_convert_input): Reallocate if there aren't
	at least 16 bytes beyond to.len in the buffer.  Clear 16 bytes
	at to.text + to.len.



	Jakub
H.J. Lu - Nov. 26, 2012, 2:50 p.m.
On Fri, Nov 23, 2012 at 11:50 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Fri, Nov 23, 2012 at 11:33:37AM -0800, H.J. Lu wrote:
>> 2012-11-21  H.J. Lu  <hongjiu.lu@intel.com>
>>
>>       PR bootstrap/55380
>>       * charset.c (_cpp_convert_input): Clear CPP_PAD_BUFFER_SIZE
>>       bytes if CLEAR_CPP_PAD_BUFFER isn't 0.  Allocate extra
>>       CPP_PAD_BUFFER_SIZE bytes and clear it if CLEAR_CPP_PAD_BUFFER
>>       isn't 0.
>>
>>       * files.c (read_file_guts): Allocate extra CPP_PAD_BUFFER_SIZE
>>       bytes.
>>
>>       * internal.h (CPP_PAD_BUFFER_SIZE): New.  Defined to 16 for
>>       -fsanitize=address if __SANITIZE_ADDRESS__ is defined.
>>       (CLEAR_CPP_PAD_BUFFER): New.
>
> I'd say you are making this way too much complicated.
> The following patch just changes those + 1 to + 16, adds memset of the 16
> bytes unconditionally, but as it also fixes a thinko which IMHO affects
> the most common cases (regular file, with no conversion needed), I'd say
> it ought to be faster than the old version (the old version would allocate
> st.st_size + 1 bytes long buffer, read st.st_size bytes into it,
> call _cpp_convert_input with len st.st_size and size st.st_size (the latter
> should have been st.st_size + 1, i.e. the allocated size) and thus
> to.len >= to.asize was true and we called realloc with the same length
> as malloc has been called originally.
>
> 2012-11-23  Jakub Jelinek  <jakub@redhat.com>
>
>         PR bootstrap/55380
>         * files.c (read_file_guts): Allocate extra 16 bytes instead of
>         1 byte at the end of buf.  Pass size + 16 instead of size
>         to _cpp_convert_input.
>         * charset.c (_cpp_convert_input): Reallocate if there aren't
>         at least 16 bytes beyond to.len in the buffer.  Clear 16 bytes
>         at to.text + to.len.
>
> --- libcpp/files.c.jj   2012-11-22 11:04:24.000000000 +0100
> +++ libcpp/files.c      2012-11-23 20:37:03.146379853 +0100
> @@ -671,7 +671,7 @@ read_file_guts (cpp_reader *pfile, _cpp_
>         the majority of C source files.  */
>      size = 8 * 1024;
>
> -  buf = XNEWVEC (uchar, size + 1);
> +  buf = XNEWVEC (uchar, size + 16);
>    total = 0;
>    while ((count = read (file->fd, buf + total, size - total)) > 0)
>      {
> @@ -682,7 +682,7 @@ read_file_guts (cpp_reader *pfile, _cpp_
>           if (regular)
>             break;
>           size *= 2;
> -         buf = XRESIZEVEC (uchar, buf, size + 1);
> +         buf = XRESIZEVEC (uchar, buf, size + 16);
>         }
>      }
>
> @@ -699,7 +699,7 @@ read_file_guts (cpp_reader *pfile, _cpp_
>
>    file->buffer = _cpp_convert_input (pfile,
>                                      CPP_OPTION (pfile, input_charset),
> -                                    buf, size, total,
> +                                    buf, size + 16, total,
>                                      &file->buffer_start,
>                                      &file->st.st_size);
>    file->buffer_valid = true;
> --- libcpp/charset.c.jj 2011-01-06 10:22:00.000000000 +0100
> +++ libcpp/charset.c    2012-11-23 20:40:39.736116642 +0100
> @@ -1,6 +1,6 @@
>  /* CPP Library - charsets
>     Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006, 2008, 2009,
> -   2010 Free Software Foundation, Inc.
> +   2010, 2012 Free Software Foundation, Inc.
>
>     Broken out of c-lex.c Apr 2003, adding valid C99 UCN ranges.
>
> @@ -1729,9 +1729,12 @@ _cpp_convert_input (cpp_reader *pfile, c
>      iconv_close (input_cset.cd);
>
>    /* Resize buffer if we allocated substantially too much, or if we
> -     haven't enough space for the \n-terminator.  */
> -  if (to.len + 4096 < to.asize || to.len >= to.asize)
> -    to.text = XRESIZEVEC (uchar, to.text, to.len + 1);
> +     haven't enough space for the \n-terminator or following
> +     15 bytes of padding.  */
> +  if (to.len + 4096 < to.asize || to.len + 16 > to.asize)
> +    to.text = XRESIZEVEC (uchar, to.text, to.len + 16);
> +
> +  memset (to.text + to.len, '\0', 16);
>
>    /* If the file is using old-school Mac line endings (\r only),
>       terminate with another \r, not an \n, so that we do not mistake
>
>
>         Jakub

You should also mention:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54691

in ChangeLog entry.

Thanks.
Tom Tromey - Dec. 3, 2012, 3:45 p.m.
>>>>> "Jakub" == Jakub Jelinek <jakub@redhat.com> writes:

Jakub> 2012-11-23  Jakub Jelinek  <jakub@redhat.com>
Jakub> 	PR bootstrap/55380
Jakub> 	* files.c (read_file_guts): Allocate extra 16 bytes instead of
Jakub> 	1 byte at the end of buf.  Pass size + 16 instead of size
Jakub> 	to _cpp_convert_input.
Jakub> 	* charset.c (_cpp_convert_input): Reallocate if there aren't
Jakub> 	at least 16 bytes beyond to.len in the buffer.  Clear 16 bytes
Jakub> 	at to.text + to.len.

Jakub> +  buf = XNEWVEC (uchar, size + 16);

I think the magic constant 16 could use a comment, here and elsewhere.

Otherwise ok.

Tom

Patch

--- libcpp/files.c.jj	2012-11-22 11:04:24.000000000 +0100
+++ libcpp/files.c	2012-11-23 20:37:03.146379853 +0100
@@ -671,7 +671,7 @@  read_file_guts (cpp_reader *pfile, _cpp_
        the majority of C source files.  */
     size = 8 * 1024;
 
-  buf = XNEWVEC (uchar, size + 1);
+  buf = XNEWVEC (uchar, size + 16);
   total = 0;
   while ((count = read (file->fd, buf + total, size - total)) > 0)
     {
@@ -682,7 +682,7 @@  read_file_guts (cpp_reader *pfile, _cpp_
 	  if (regular)
 	    break;
 	  size *= 2;
-	  buf = XRESIZEVEC (uchar, buf, size + 1);
+	  buf = XRESIZEVEC (uchar, buf, size + 16);
 	}
     }
 
@@ -699,7 +699,7 @@  read_file_guts (cpp_reader *pfile, _cpp_
 
   file->buffer = _cpp_convert_input (pfile,
 				     CPP_OPTION (pfile, input_charset),
-				     buf, size, total,
+				     buf, size + 16, total,
 				     &file->buffer_start,
 				     &file->st.st_size);
   file->buffer_valid = true;
--- libcpp/charset.c.jj	2011-01-06 10:22:00.000000000 +0100
+++ libcpp/charset.c	2012-11-23 20:40:39.736116642 +0100
@@ -1,6 +1,6 @@ 
 /* CPP Library - charsets
    Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006, 2008, 2009,
-   2010 Free Software Foundation, Inc.
+   2010, 2012 Free Software Foundation, Inc.
 
    Broken out of c-lex.c Apr 2003, adding valid C99 UCN ranges.
 
@@ -1729,9 +1729,12 @@  _cpp_convert_input (cpp_reader *pfile, c
     iconv_close (input_cset.cd);
 
   /* Resize buffer if we allocated substantially too much, or if we
-     haven't enough space for the \n-terminator.  */
-  if (to.len + 4096 < to.asize || to.len >= to.asize)
-    to.text = XRESIZEVEC (uchar, to.text, to.len + 1);
+     haven't enough space for the \n-terminator or following
+     15 bytes of padding.  */
+  if (to.len + 4096 < to.asize || to.len + 16 > to.asize)
+    to.text = XRESIZEVEC (uchar, to.text, to.len + 16);
+
+  memset (to.text + to.len, '\0', 16);
 
   /* If the file is using old-school Mac line endings (\r only),
      terminate with another \r, not an \n, so that we do not mistake