diff mbox

Factorize the two read_line functions present in gcov.c and input.c

Message ID 87bo1xfcjl.fsf@redhat.com
State New
Headers show

Commit Message

Dodji Seketeli Nov. 6, 2013, 9:25 p.m. UTC
Hello,

It appeared that gcov.c and input.c both have a static function named
read_line.  I guess we ought to keep just one.

So I changed the one in input.c as it handles lines with bytes of zero
value (as of the work on PR preprocessor/58580) to make it suitable for
what the uses of read_line in gcov.c expect.  Note that the gcov.c usage
doesn't need the handling of bytes with zero value in a line, AIUI.

First, the byte count returned by get_line now contains the '\n' at
the end of the line; of course that '\n' is still replaced by the a
terminal zero.

Second, read_line ensures that the current position indicator
correctly moves to the end of the line read, as opposed to moving to
the end of the buffer that get_line fills while reading bytes in
chunk.

Third, I have added a declaration for read_line in input.h.  I
couldn't find a better place where to put this.

Patch tested and bootstrapped on x86_64-unknown-linux-gnu against
trunk.

OK to commit to trunk?

gcc/ChangeLog

	* input.h (read_line): Declare this here.
	* input.c (get_line): Make the returned length of the line contain
	the ending '\n' character.  Replace that '\n' character by a zero
	byte.
	(read_line): Make this *not* be static anymore.  Preserve the
	current file position across invocations.
---
 gcc/gcov.c  | 35 ++---------------------------------
 gcc/input.c | 31 +++++++++++++++++++++----------
 gcc/input.h |  2 ++
 3 files changed, 25 insertions(+), 43 deletions(-)

Comments

Jakub Jelinek Nov. 7, 2013, 8 a.m. UTC | #1
On Thu, Nov 07, 2013 at 07:17:19AM +0000, Nathan Sidwell wrote:
> On 11/06/13 21:25, Dodji Seketeli wrote:
> >It appeared that gcov.c and input.c both have a static function named
> >read_line.  I guess we ought to keep just one.
> 
> As a general rule, that's good.

Generally yes, but IMHO not in this case, both because of the problems you
are mentioning, because it already now needs to do quite different things
and because I think in the future the version for caret diagnostics should
do much more complicated things (caching of the lines for faster further
caret diagnostics).  Furthermore, ftell/fseek aren't something that should
be used unless strictly necessary.

	Jakub
Bernd Edlinger Nov. 7, 2013, 6:13 p.m. UTC | #2
> On 11/07/13 09:32, Dodji Seketeli wrote:
>
>> From the above, what I can say is that input.o was already linked with
>> gcov. But I think it's minimal enough to only drag libcpp and the
>> diagnostic subsystem.
>
> I disagree. While input.o was available to gcov, I don't think it was being
> pulled into the executable (if it was, I'd like to understand why). I think
> dragging in libcpp & diagnostics is a major bloat -- have you measured the
> before and after gcov text sizes?


I agree that this should be avoided. However both versions of read_line
need to be fixed eventually.

Just for the records, this is not only a theoretical issue for gcov:
The OOM error can also be reproduced in gcov.

The attached test.c has some dozends of comment lines:
//xx\0EOL

That are lines with text before and after the \0.
If compiled without -Wall, it will crash in gcov:


gcc -fprofile-arcs -ftest-coverage test.c
./a.out
gcov test
File 'test.c'
Lines executed:100.00% of 1
Creating 'test.c.gcov'

gcov: out of memory allocating 1677721600 bytes after a total of 135168 bytes


Bernd.
//00
diff mbox

Patch

diff --git a/gcc/gcov.c b/gcc/gcov.c
index 9458812..da53ebe 100644
--- a/gcc/gcov.c
+++ b/gcc/gcov.c
@@ -2364,37 +2364,6 @@  output_branch_count (FILE *gcov_file, int ix, const arc_t *arc)
 
 }
 
-static const char *
-read_line (FILE *file)
-{
-  static char *string;
-  static size_t string_len;
-  size_t pos = 0;
-  char *ptr;
-
-  if (!string_len)
-    {
-      string_len = 200;
-      string = XNEWVEC (char, string_len);
-    }
-
-  while ((ptr = fgets (string + pos, string_len - pos, file)))
-    {
-      size_t len = strlen (string + pos);
-
-      if (string[pos + len - 1] == '\n')
-	{
-	  string[pos + len - 1] = 0;
-	  return string;
-	}
-      pos += len;
-      string = XRESIZEVEC (char, string, string_len * 2);
-      string_len *= 2;
-    }
-      
-  return pos ? string : NULL;
-}
-
 /* Read in the source file one line at a time, and output that line to
    the gcov file preceded by its execution count and other
    information.  */
@@ -2455,7 +2424,7 @@  output_lines (FILE *gcov_file, const source_t *src)
 	}
 
       if (retval)
-	retval = read_line (source_file);
+	retval = read_line (source_file, NULL);
 
       /* For lines which don't exist in the .bb file, print '-' before
 	 the source line.  For lines which exist but were never
@@ -2503,7 +2472,7 @@  output_lines (FILE *gcov_file, const source_t *src)
      last line of code.  */
   if (retval)
     {
-      for (; (retval = read_line (source_file)); line_num++)
+      for (; (retval = read_line (source_file, NULL)); line_num++)
 	fprintf (gcov_file, "%9s:%5u:%s\n", "-", line_num, retval);
     }
 
diff --git a/gcc/input.c b/gcc/input.c
index cb3a0a0..e42103c 100644
--- a/gcc/input.c
+++ b/gcc/input.c
@@ -88,12 +88,12 @@  expand_location_1 (source_location loc,
 }
 
 /* This function reads a line that might contain bytes whose value is
-   zero.  It returns the number of bytes read.  The 'end-of-line'
-   character found at the end of the line is not contained in the
-   returned buffer.  Note that this function has been adapted from
-   getline() and _IO_getdelim() GNU C library functions.  It's been
-   duplicated here because the getline() function is not necessarily
-   present on all platforms.
+   zero.  It returns the number of bytes read, including the
+   end-of-line character.  That end-of-line character is replaced by a
+   zero byte in the returned buffer.  Note that this function has been
+   adapted from getline() and _IO_getdelim() GNU C library functions.
+   It's been duplicated here because the getline() function is not
+   necessarily present on all platforms.
 
    LINEPTR points to a buffer that is to contain the line read.
 
@@ -124,7 +124,7 @@  get_line (char **lineptr, size_t *n, FILE *fp)
     {
       size_t needed;
       char *t = (char*) memchr (buf, '\n', len);
-      if (t != NULL) len = (t - buf);
+      if (t != NULL) len = (t - buf) + 1;
       if (__builtin_expect (len >= SSIZE_MAX - cur_len, 0))
 	return -1;
       needed = cur_len + len + 1;
@@ -149,7 +149,7 @@  get_line (char **lineptr, size_t *n, FILE *fp)
     }
 
   if (cur_len)
-    (*lineptr)[cur_len] = '\0';
+    (*lineptr)[cur_len - 1] = '\0';
   return cur_len;
 }
 
@@ -159,14 +159,21 @@  get_line (char **lineptr, size_t *n, FILE *fp)
  *zero bytes.  Also note that the returned string is allocated in
  *static storage that is going to be re-used by subsequent invocations
  *of read_line.  */
-static const char *
+const char *
 read_line (FILE *file, int *line_length)
 {
   static char *string;
   static size_t string_len;
   int len;
+  long prev_pos;
 
+  prev_pos = ftell (file);
   len = get_line (&string, &string_len, file);
+  /* get_line tried to read STRING_LEN bytes, which is possibly
+    different from LEN bytes of the returned line.  So set the
+    position indicator of FILE to the byte following the LEN'th byte,
+    so that the new call to get_line starts reading from there.  */
+  fseek (file, prev_pos + len, SEEK_SET);
   if (line_length)
     *line_length = len;
   return len ? string : NULL;
@@ -188,7 +195,11 @@  location_get_source_line (expanded_location xloc,
   if (!stream)
     return NULL;
 
-  while ((ptr = read_line (stream, &len)) && lines < xloc.line)
+  /* We make sure not to call read_line unnecessarily because the
+     non-null result of a given call to read_line overrides the result
+     of the previous call to read_line; hence the line test that comes
+     before the call to read_line.  */
+  while (lines < xloc.line && (ptr = read_line (stream, &len)))
     {
       buffer = ptr;
       lines++;
diff --git a/gcc/input.h b/gcc/input.h
index 128e28c..d7c8a0e 100644
--- a/gcc/input.h
+++ b/gcc/input.h
@@ -66,4 +66,6 @@  extern location_t input_location;
 
 void dump_line_table_statistics (void);
 
+const char * read_line (FILE *, int *);
+
 #endif