diff mbox

[fortran] PR 25708 Reduce seeks when loading module files

Message ID CAO9iq9Evdz0R0ZLHuktrKjwRAF4gt2QRKjc61xYWpt3DVm0gmQ@mail.gmail.com
State New
Headers show

Commit Message

Janne Blomqvist Nov. 30, 2011, 10:04 p.m. UTC
On Wed, Nov 30, 2011 at 22:36, Mikael Morin <mikael.morin@sfr.fr> wrote:
> Hi,
>
>>
>> modseek1.diff
>>   diff --git a/gcc/fortran/module.c b/gcc/fortran/module.c
>> index 70f8565..982425d 100644
>> --- a/gcc/fortran/module.c
>> +++ b/gcc/fortran/module.c
>> @@ -1069,51 +1069,49 @@ module_unget_char (void)
>>  static void
>>  parse_string (void)
[snip]
> I think you are changing the behaviour here. Single quotes in a string are
> encoded as two single quotes in a row. So when decoding, only one of them
> should be kept.
> Honestly, it may well be dead/unused code, but it's something we don't want to
> change at this point.

Ah, I misunderstood the existing code. But this is good, as it
simplifies my patch! :)

>> @@ -1293,22 +1291,15 @@ peek_atom (void)
[snip]
> I'm certainly nitpicking here, but for me it's better to have the file
> position before the unexpected atom than after it when throwing the error in
> bad_module.
> To save the call to fgetpos, (through get_module_locus) one could save/restore
> module_column and module_line as the call to bad_module doesn't need anything
> else.

Good point. Updated patch does this.

> About the rest of the patch, it's unfortunate that it makes the code slightly
> more difficult to read [this is my nitpicking day]. I think it's better to
> optimize peek_atom: the first character is sufficient to distinguish between
> atom types, so ungetc/module_unget_char can be used, even for strings, names
> or numbers.

Aaargh! Why didn't I think of this, instead spending most of the
evening trying to get rid of further peek_atom() calls! This idea is
much simpler and fixes all peek_atom() calls in one go.

With the updated patch, the number of lseek's when compiling
aermod.f90 drop to 380000, which is a factor of 15 reduction compared
to the current trunk. And a factor of 55 compared to trunk a few days
ago before Thomas' patch.

Updated patch attached. Regtested on x86_64-unknown-linux-gnu, Ok for trunk?

2011-11-31  Janne Blomqvist  <jb@gcc.gnu.org>

	PR fortran/25708
	* module.c (parse_string): Read string into resizable array
	instead of parsing twice and seeking.
	(peek_atom): New implementation avoiding seeks.
	(require_atom): Save and set column and line explicitly for error
	handling.
diff mbox

Patch

diff --git a/gcc/fortran/module.c b/gcc/fortran/module.c
index 70f8565..4954888 100644
--- a/gcc/fortran/module.c
+++ b/gcc/fortran/module.c
@@ -1069,51 +1069,37 @@  module_unget_char (void)
 static void
 parse_string (void)
 {
-  module_locus start;
-  int len, c;
-  char *p;
-
-  get_module_locus (&start);
+  int c;
+  size_t cursz = 30;
+  size_t len = 0;
 
-  len = 0;
+  atom_string = XNEWVEC (char, cursz);
 
-  /* See how long the string is.  */
   for ( ; ; )
     {
       c = module_char ();
-      if (c == EOF)
-	bad_module ("Unexpected end of module in string constant");
 
-      if (c != '\'')
+      if (c == '\'')
 	{
-	  len++;
-	  continue;
+	  int c2 = module_char ();
+	  if (c2 != '\'')
+	    {
+	      module_unget_char ();
+	      break;
+	    }
 	}
 
-      c = module_char ();
-      if (c == '\'')
+      if (len >= cursz)
 	{
-	  len++;
-	  continue;
+	  cursz *= 2;
+	  atom_string = XRESIZEVEC (char, atom_string, cursz);
 	}
-
-      break;
+      atom_string[len] = c;
+      len++;
     }
 
-  set_module_locus (&start);
-
-  atom_string = p = XCNEWVEC (char, len + 1);
-
-  for (; len > 0; len--)
-    {
-      c = module_char ();
-      if (c == '\'')
-	module_char ();		/* Guaranteed to be another \'.  */
-      *p++ = c;
-    }
-
-  module_char ();		/* Terminating \'.  */
-  *p = '\0';			/* C-style string for debug purposes.  */
+  atom_string = XRESIZEVEC (char, atom_string, len + 1);
+  atom_string[len] = '\0'; 	/* C-style string for debug purposes.  */
 }
 
 
@@ -1279,17 +1265,100 @@  parse_atom (void)
 static atom_type
 peek_atom (void)
 {
-  module_locus m;
+  int c;
   atom_type a;
 
-  get_module_locus (&m);
+  do
+    {
+      c = module_char ();
+    }
+  while (c == ' ' || c == '\r' || c == '\n');
+
+  switch (c)
+    {
+    case '(':
+      module_unget_char ();
+      return ATOM_LPAREN;
+
+    case ')':
+      module_unget_char ();
+      return ATOM_RPAREN;
 
-  a = parse_atom ();
-  if (a == ATOM_STRING)
-    free (atom_string);
+    case '\'':
+      module_unget_char ();
+      return ATOM_STRING;
+
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9':
+      module_unget_char ();
+      return ATOM_INTEGER;
+
+    case 'a':
+    case 'b':
+    case 'c':
+    case 'd':
+    case 'e':
+    case 'f':
+    case 'g':
+    case 'h':
+    case 'i':
+    case 'j':
+    case 'k':
+    case 'l':
+    case 'm':
+    case 'n':
+    case 'o':
+    case 'p':
+    case 'q':
+    case 'r':
+    case 's':
+    case 't':
+    case 'u':
+    case 'v':
+    case 'w':
+    case 'x':
+    case 'y':
+    case 'z':
+    case 'A':
+    case 'B':
+    case 'C':
+    case 'D':
+    case 'E':
+    case 'F':
+    case 'G':
+    case 'H':
+    case 'I':
+    case 'J':
+    case 'K':
+    case 'L':
+    case 'M':
+    case 'N':
+    case 'O':
+    case 'P':
+    case 'Q':
+    case 'R':
+    case 'S':
+    case 'T':
+    case 'U':
+    case 'V':
+    case 'W':
+    case 'X':
+    case 'Y':
+    case 'Z':
+      module_unget_char ();
+      return ATOM_NAME;
 
-  set_module_locus (&m);
-  return a;
+    default:
+      bad_module ("Bad name");
+    }
 }
 
 
@@ -1299,11 +1368,12 @@  peek_atom (void)
 static void
 require_atom (atom_type type)
 {
-  module_locus m;
   atom_type t;
   const char *p;
+  int column, line;
 
-  get_module_locus (&m);
+  column = module_column;
+  line = module_line;
 
   t = parse_atom ();
   if (t != type)
@@ -1329,7 +1399,8 @@  require_atom (atom_type type)
 	  gfc_internal_error ("require_atom(): bad atom type required");
 	}
 
-      set_module_locus (&m);
+      module_column = column;
+      module_line = line;
       bad_module (p);
     }
 }