Patchwork PR #53525 - track-macro-expansion performance regression

login
register
mail settings
Submitter Dimitrios Apostolou
Date July 8, 2012, 6:11 a.m.
Message ID <alpine.LNX.2.02.1207080847210.4288@localhost.localdomain>
Download mbox | patch
Permalink /patch/169622/
State New
Headers show

Comments

Dimitrios Apostolou - July 8, 2012, 6:11 a.m.
With the attached patches I introduce four new obstacks in struct 
cpp_reader to substitute malloc's/realloc's when expanding macros. Numbers 
have been posted in the PR, but to summarize:

before: 0.785 s or  2201 M instr
after:  0.760 s or  2108 M instr

Memory overhead is some tens kilobytes worst case. Tested on x86, no 
regressions. I think this version of the patch is OK to merge, besides 
some TODO comments (I'd appreciate opinions on them) and the following 
maybe:

IMHO the new obstack_{mark,release} functions are the ones that will be 
harder to apply, because they make gcc's obstacks even more different 
than libc's. I sent the patch to libc-alpha but the feedback I got was 
that I should first make the two obstack versions (gcc,libc) identical, 
and then try to push changes. I've noted the primary differences and plan 
on tackling this, but it's not as trivial as it seems.

So if it's not OK, where could the new obstack_{mark,release} go?


Thanks,
Dimitris

=== modified file 'libcpp/init.c'
--- libcpp/init.c	2012-04-30 11:43:43 +0000
+++ libcpp/init.c	2012-07-07 16:04:01 +0000
@@ -241,10 +241,20 @@ cpp_create_reader (enum c_lang lang, has
   /* The expression parser stack.  */
   _cpp_expand_op_stack (pfile);
 
+#define obstack_chunk_alloc     ((void *(*) (long)) xmalloc)
+#define obstack_chunk_free      ((void (*) (void *)) free)
+
   /* Initialize the buffer obstack.  */
-  _obstack_begin (&pfile->buffer_ob, 0, 0,
-		  (void *(*) (long)) xmalloc,
-		  (void (*) (void *)) free);
+  obstack_init(&pfile->buffer_ob);
+
+  /* Initialize the macro obstacks. */
+  obstack_init (&pfile->exp_ob);
+  if (CPP_OPTION (pfile, track_macro_expansion))
+    {
+      obstack_init (&pfile->virt_locs_ob);
+      obstack_init (&pfile->arg_locs_ob);
+      obstack_init (&pfile->exp_locs_ob);
+    }
 
   _cpp_init_files (pfile);
 
@@ -253,6 +263,9 @@ cpp_create_reader (enum c_lang lang, has
   return pfile;
 }
 
+#undef obstack_chunk_alloc
+#undef obstack_chunk_free
+
 /* Set the line_table entry in PFILE.  This is called after reading a
    PCH file, as the old line_table will be incorrect.  */
 void
@@ -289,6 +302,14 @@ cpp_destroy (cpp_reader *pfile)
     deps_free (pfile->deps);
   obstack_free (&pfile->buffer_ob, 0);
 
+  obstack_free (&pfile->exp_ob, 0);
+  if (CPP_OPTION (pfile, track_macro_expansion))
+    {
+      obstack_free (&pfile->virt_locs_ob, 0);
+      obstack_free (&pfile->arg_locs_ob, 0);
+      obstack_free (&pfile->exp_locs_ob, 0);
+    }
+
   _cpp_destroy_hashtable (pfile);
   _cpp_cleanup_files (pfile);
   _cpp_destroy_iconv (pfile);

=== modified file 'libcpp/internal.h'
--- libcpp/internal.h	2012-05-29 09:36:29 +0000
+++ libcpp/internal.h	2012-07-07 17:18:53 +0000
@@ -555,6 +555,13 @@ struct cpp_reader
   /* If non-null, the lexer will use this location for the next token
      instead of getting a location from the linemap.  */
   source_location *forced_token_location_p;
+
+  /* Obstacks used to speed up macro expansion and virt_locs tracking. */
+  struct obstack exp_ob;	/* for expanding macro arguments */
+  /* The rest are used only when -ftrack-macro-expansion */
+  struct obstack exp_locs_ob;	/* virt_locs of expanded macro arguments */
+  struct obstack arg_locs_ob;	/* virt_locs of macro arguments */
+  struct obstack virt_locs_ob;	/* virt locs for all other macros */
 };
 
 /* Character classes.  Based on the more primitive macros in safe-ctype.h.

=== modified file 'libcpp/macro.c'
--- libcpp/macro.c	2012-05-29 14:53:50 +0000
+++ libcpp/macro.c	2012-07-07 18:08:44 +0000
@@ -24,10 +24,20 @@ along with this program; see the file CO
  You are forbidden to forbid anyone else to use, share and improve
  what you give them.   Help stamp out software-hoarding!  */
 
+
 #include "config.h"
 #include "system.h"
 #include "cpplib.h"
 #include "internal.h"
+#include "obstack.h"
+
+
+#define MACRO_ASSERT(EXPR)			\
+  do {						\
+    if (! (EXPR))				\
+      abort ();					\
+  } while (0)
+
 
 typedef struct macro_arg macro_arg;
 /* This structure represents the tokens of a macro argument.  These
@@ -102,13 +112,6 @@ static const cpp_token *stringify_arg (c
 static void paste_all_tokens (cpp_reader *, const cpp_token *);
 static bool paste_tokens (cpp_reader *, source_location,
 			  const cpp_token **, const cpp_token *);
-static void alloc_expanded_arg_mem (cpp_reader *, macro_arg *, size_t);
-static void ensure_expanded_arg_room (cpp_reader *, macro_arg *, size_t, size_t *);
-static void delete_macro_args (_cpp_buff*, unsigned num_args);
-static void set_arg_token (macro_arg *, const cpp_token *,
-			   source_location, size_t,
-			   enum macro_arg_token_kind,
-			   bool);
 static const source_location *get_arg_token_location (const macro_arg *,
 						      enum macro_arg_token_kind);
 static const cpp_token **arg_token_ptr_at (const macro_arg *,
@@ -276,7 +279,7 @@ _cpp_builtin_macro_text (cpp_reader *pfi
 	unsigned int len;
 	const char *name;
 	uchar *buf;
-	
+
 	if (node->value.builtin == BT_FILE)
 	  name = linemap_get_expansion_filename (pfile->line_table,
 						 pfile->line_table->highest_line);
@@ -361,7 +364,7 @@ _cpp_builtin_macro_text (cpp_reader *pfi
 	    {
 	      cpp_errno (pfile, CPP_DL_WARNING,
 			 "could not determine date and time");
-		
+
 	      pfile->date = UC"\"??? ?? ????\"";
 	      pfile->time = UC"\"??:??:??\"";
 	    }
@@ -388,7 +391,7 @@ _cpp_builtin_macro_text (cpp_reader *pfi
       sprintf ((char *) result, "%u", number);
     }
 
-  return result;      
+  return result;
 }
 
 /* Convert builtin macros like __FILE__ to a token and push it on the
@@ -734,6 +737,52 @@ _cpp_arguments_ok (cpp_reader *pfile, cp
   return false;
 }
 
+/* Returns next token after pragma (either EOF or EOL). */
+
+static const cpp_token *
+handle_pragma (cpp_reader *pfile, const cpp_token *token,
+	       _cpp_buff **pragma_buff)
+{
+  cpp_token *newtok = _cpp_temp_token (pfile);
+
+  /* CPP_PRAGMA token lives in directive_result, which will
+     be overwritten on the next directive.  */
+  *newtok = *token;
+  token = newtok;
+  do
+    {
+      if (*pragma_buff == NULL
+	  || BUFF_ROOM (*pragma_buff) < sizeof (cpp_token *))
+	{
+	  _cpp_buff *next;
+	  if (*pragma_buff == NULL)
+	    *pragma_buff = _cpp_get_buff (pfile, 32 * sizeof (cpp_token *));
+	  else
+	    {
+	      next = *pragma_buff;
+	      *pragma_buff = _cpp_get_buff (pfile,
+					    (BUFF_FRONT (*pragma_buff)
+					     - (*pragma_buff)->base) * 2);
+	      (*pragma_buff)->next = next;
+	    }
+	}
+      *(const cpp_token **) BUFF_FRONT (*pragma_buff) = token;
+      BUFF_FRONT (*pragma_buff) += sizeof (cpp_token *);
+      if (token->type == CPP_PRAGMA_EOL)
+	break;
+
+      token = cpp_get_token_1 (pfile, NULL);
+    }
+  while (token->type != CPP_EOF);
+
+  /* In deferred pragmas parsing_args and prevent_expansion
+     had been changed, reset it.  */
+  pfile->state.parsing_args = 2;
+  pfile->state.prevent_expansion = 1;
+
+  return token;
+}
+
 /* Reads and returns the arguments to a function-like macro
    invocation.  Assumes the opening parenthesis has been processed.
    If there is an error, emits an appropriate diagnostic and returns
@@ -757,7 +806,6 @@ collect_args (cpp_reader *pfile, const c
   const cpp_token *token;
   unsigned int argc;
   source_location virt_loc;
-  bool track_macro_expansion_p = CPP_OPTION (pfile, track_macro_expansion);
   unsigned num_args_alloced = 0;
 
   macro = node->value.macro;
@@ -769,6 +817,8 @@ collect_args (cpp_reader *pfile, const c
 #define DEFAULT_NUM_TOKENS_PER_MACRO_ARG 50
 #define ARG_TOKENS_EXTENT 1000
 
+  /* TODO maybe macro_args should be stored in a separate obstack growing
+     little by little? */
   buff = _cpp_get_buff (pfile, argc * (DEFAULT_NUM_TOKENS_PER_MACRO_ARG
 				       * sizeof (cpp_token *)
 				       + sizeof (macro_arg)));
@@ -781,23 +831,17 @@ collect_args (cpp_reader *pfile, const c
   /* Collect the tokens making up each argument.  We don't yet know
      how many arguments have been supplied, whether too many or too
      few.  Hence the slightly bizarre usage of "argc" and "arg".  */
-  do
+
+  do				/* for each macro argument */
     {
       unsigned int paren_depth = 0;
       unsigned int ntokens = 0;
-      unsigned virt_locs_capacity = DEFAULT_NUM_TOKENS_PER_MACRO_ARG;
       num_args_alloced++;
 
       argc++;
       arg->first = (const cpp_token **) buff->cur;
-      if (track_macro_expansion_p)
-	{
-	  virt_locs_capacity = DEFAULT_NUM_TOKENS_PER_MACRO_ARG;
-	  arg->virt_locs = XNEWVEC (source_location,
-				    virt_locs_capacity);
-	}
 
-      for (;;)
+      for (;;)			/* for each token in argument */
 	{
 	  /* Require space for 2 new tokens (including a CPP_EOF).  */
 	  if ((unsigned char *) &arg->first[ntokens + 2] > buff->limit)
@@ -807,14 +851,6 @@ collect_args (cpp_reader *pfile, const c
 					      * sizeof (cpp_token *));
 	      arg->first = (const cpp_token **) buff->cur;
 	    }
-	  if (track_macro_expansion_p
-	      && (ntokens + 2 > virt_locs_capacity))
-	    {
-	      virt_locs_capacity += ARG_TOKENS_EXTENT;
-	      arg->virt_locs = XRESIZEVEC (source_location,
-					   arg->virt_locs,
-					   virt_locs_capacity);
-	    }
 
 	  token = cpp_get_token_1 (pfile, &virt_loc);
 
@@ -844,63 +880,40 @@ collect_args (cpp_reader *pfile, const c
 	    break;
 	  else if (token->type == CPP_PRAGMA)
 	    {
-	      cpp_token *newtok = _cpp_temp_token (pfile);
-
-	      /* CPP_PRAGMA token lives in directive_result, which will
-		 be overwritten on the next directive.  */
-	      *newtok = *token;
-	      token = newtok;
-	      do
-		{
-		  if (*pragma_buff == NULL
-		      || BUFF_ROOM (*pragma_buff) < sizeof (cpp_token *))
-		    {
-		      _cpp_buff *next;
-		      if (*pragma_buff == NULL)
-			*pragma_buff
-			  = _cpp_get_buff (pfile, 32 * sizeof (cpp_token *));
-		      else
-			{
-			  next = *pragma_buff;
-			  *pragma_buff
-			    = _cpp_get_buff (pfile,
-					     (BUFF_FRONT (*pragma_buff)
-					      - (*pragma_buff)->base) * 2);
-			  (*pragma_buff)->next = next;
-			}
-		    }
-		  *(const cpp_token **) BUFF_FRONT (*pragma_buff) = token;
-		  BUFF_FRONT (*pragma_buff) += sizeof (cpp_token *);
-		  if (token->type == CPP_PRAGMA_EOL)
-		    break;
-		  token = cpp_get_token_1 (pfile, &virt_loc);
-		}
-	      while (token->type != CPP_EOF);
-
-	      /* In deferred pragmas parsing_args and prevent_expansion
-		 had been changed, reset it.  */
-	      pfile->state.parsing_args = 2;
-	      pfile->state.prevent_expansion = 1;
+	      /* Get last token after the pragma. */
+	      token = handle_pragma (pfile, token, pragma_buff);
 
 	      if (token->type == CPP_EOF)
 		break;
-	      else
-		continue;
+	      continue;
 	    }
-	  set_arg_token (arg, token, virt_loc,
-			 ntokens, MACRO_ARG_TOKEN_NORMAL,
-			 CPP_OPTION (pfile, track_macro_expansion));
+
+	  /* TODO grow an obstack for tokens here? */
+	  arg->first[ntokens] = token;
+	  if (CPP_OPTION (pfile, track_macro_expansion))
+	    XOBGROW (&pfile->arg_locs_ob, source_location, &virt_loc);
+
 	  ntokens++;
 	}
 
       /* Drop trailing padding.  */
       while (ntokens > 0 && arg->first[ntokens - 1]->type == CPP_PADDING)
+      {
 	ntokens--;
+	if (CPP_OPTION (pfile, track_macro_expansion))
+	  XOBSHRINK (&pfile->arg_locs_ob, source_location);
+      }
 
       arg->count = ntokens;
-      set_arg_token (arg, &pfile->eof, pfile->eof.src_loc,
-		     ntokens, MACRO_ARG_TOKEN_NORMAL,
-		     CPP_OPTION (pfile, track_macro_expansion));
+      arg->first[ntokens] = &pfile->eof;
+      if (CPP_OPTION (pfile, track_macro_expansion))
+	{
+	  MACRO_ASSERT (obstack_object_size (&pfile->arg_locs_ob)
+			== ntokens * sizeof (source_location));
+	  XOBGROW (&pfile->arg_locs_ob, source_location, &pfile->eof.src_loc);
+	  /* At this point the array of virt_locs is finished. */
+	  arg->virt_locs = XOBFINISH (&pfile->arg_locs_ob, source_location *);
+	}
 
       /* Terminate the argument.  Excess arguments loop back and
 	 overwrite the final legitimate argument, before failing.  */
@@ -1057,6 +1070,17 @@ enter_macro_context (cpp_reader *pfile,
 
       if (macro->fun_like)
 	{
+	  /* Mark where we start allocating so that we can free with the
+	     simple call to obstack_release() later.
+
+	     It's not enough just to do obstack_alloc/free() because in
+	     recursive calls of enter_macro_context() happen through
+	     expand_arg()->_cpp_get_token_1() and *growing* objects may be
+	     interrupted but may need to continue growing after returning. */
+	  void *arg_locs_marker = obstack_mark (&pfile->arg_locs_ob);
+	  void *expanded_locs_marker = obstack_mark (&pfile->exp_locs_ob);
+	  void *expanded_marker = obstack_mark (&pfile->exp_ob);
+
 	  _cpp_buff *buff;
 	  unsigned num_args = 0;
 
@@ -1069,11 +1093,29 @@ enter_macro_context (cpp_reader *pfile,
 	  pfile->keep_tokens--;
 	  pfile->state.prevent_expansion--;
 
+	  if (buff != NULL)
+	    {
+	      if (macro->paramc > 0)
+		replace_args (pfile, node, macro,
+			      (macro_arg *) buff->base,
+			      location);
+	      _cpp_free_buff (buff);
+	    }
+
+	  /* Free everything that was allocated in these obstacks after the
+	     obstack_mark() operation. */
+	  obstack_release (&pfile->arg_locs_ob, arg_locs_marker);
+	  obstack_release (&pfile->exp_locs_ob, expanded_locs_marker);
+	  obstack_release (&pfile->exp_ob, expanded_marker);
+
+	  /* If no opening parenthesis was found by funlike_invocation_p() or
+	     if an error happened when it called collect_args(). */
 	  if (buff == NULL)
 	    {
 	      if (CPP_WTRADITIONAL (pfile) && ! node->value.macro->syshdr)
 		cpp_warning (pfile, CPP_W_TRADITIONAL,
- "function-like macro \"%s\" must be used with arguments in traditional C",
+			     "function-like macro \"%s\" must be used "
+			     "with arguments in traditional C",
 			     NODE_NAME (node));
 
 	      if (pragma_buff)
@@ -1082,15 +1124,6 @@ enter_macro_context (cpp_reader *pfile,
 	      pfile->about_to_expand_macro_p = false;
 	      return 0;
 	    }
-
-	  if (macro->paramc > 0)
-	    replace_args (pfile, node, macro,
-			  (macro_arg *) buff->base,
-			  location);
-	  /* Free the memory used by the arguments of this
-	     function-like macro.  This memory has been allocated by
-	     funlike_invocation_p and by replace_args.  */
-	  delete_macro_args (buff, num_args);
 	}
 
       /* Disable the macro within its expansion.  */
@@ -1182,78 +1215,6 @@ enter_macro_context (cpp_reader *pfile,
   return builtin_macro (pfile, node);
 }
 
-/* De-allocate the memory used by BUFF which is an array of instances
-   of macro_arg.  NUM_ARGS is the number of instances of macro_arg
-   present in BUFF.  */
-static void
-delete_macro_args (_cpp_buff *buff, unsigned num_args)
-{
-  macro_arg *macro_args;
-  unsigned i;
-
-  if (buff == NULL)
-    return;
-
-  macro_args = (macro_arg *) buff->base;
-
-  /* Walk instances of macro_arg to free their expanded tokens as well
-     as their macro_arg::virt_locs members.  */
-  for (i = 0; i < num_args; ++i)
-    {
-      if (macro_args[i].expanded)
-	{
-	  free (macro_args[i].expanded);
-	  macro_args[i].expanded = NULL;
-	}
-      if (macro_args[i].virt_locs)
-	{
-	  free (macro_args[i].virt_locs);
-	  macro_args[i].virt_locs = NULL;
-	}
-      if (macro_args[i].expanded_virt_locs)
-	{
-	  free (macro_args[i].expanded_virt_locs);
-	  macro_args[i].expanded_virt_locs = NULL;
-	}
-    }
-  _cpp_free_buff (buff);
-}
-
-/* Set the INDEXth token of the macro argument ARG. TOKEN is the token
-   to set, LOCATION is its virtual location.  "Virtual" location means
-   the location that encodes loci across macro expansion. Otherwise
-   it has to be TOKEN->SRC_LOC.  KIND is the kind of tokens the
-   argument ARG is supposed to contain.  Note that ARG must be
-   tailored so that it has enough room to contain INDEX + 1 numbers of
-   tokens, at least.  */
-static void
-set_arg_token (macro_arg *arg, const cpp_token *token,
-	       source_location location, size_t index,
-	       enum macro_arg_token_kind kind,
-	       bool track_macro_exp_p)
-{
-  const cpp_token **token_ptr;
-  source_location *loc = NULL;
-
-  token_ptr =
-    arg_token_ptr_at (arg, index, kind,
-		      track_macro_exp_p ? &loc : NULL);
-  *token_ptr = token;
-
-  if (loc != NULL)
-    {
-#ifdef ENABLE_CHECKING
-      if (kind == MACRO_ARG_TOKEN_STRINGIFIED
-	  || !track_macro_exp_p)
-	/* We can't set the location of a stringified argument
-	   token and we can't set any location if we aren't tracking
-	   macro expansion locations.   */
-	abort ();
-#endif
-      *loc = location;
-    }
-}
-
 /* Get the pointer to the location of the argument token of the
    function-like macro argument ARG.  This function must be called
    only when we -ftrack-macro-expansion is on.  */
@@ -1289,11 +1250,11 @@ arg_token_ptr_at (const macro_arg *arg,
     case MACRO_ARG_TOKEN_NORMAL:
       tokens_ptr = arg->first;
       break;
-    case MACRO_ARG_TOKEN_STRINGIFIED:      
+    case MACRO_ARG_TOKEN_STRINGIFIED:
       tokens_ptr = (const cpp_token **) &arg->stringified;
       break;
     case MACRO_ARG_TOKEN_EXPANDED:
-	tokens_ptr = arg->expanded;
+      tokens_ptr = arg->expanded;
       break;
     }
 
@@ -1407,12 +1368,12 @@ macro_arg_token_iter_get_location (const
    want each tokens resulting from function-like macro arguments
    expansion to have a different location or not.
 
-   E.g, consider this function-like macro: 
+   E.g, consider this function-like macro:
 
         #define M(x) x - 3
 
    Then consider us "calling" it (and thus expanding it) like:
-   
+
        M(1+4)
 
    It will be expanded into:
@@ -1526,7 +1487,7 @@ replace_args (cpp_reader *pfile, cpp_has
      location that records many things like the locus of the expansion
      point as well as the original locus inside the definition of the
      macro.  This location is called a virtual location.
-     
+
      So the buffer BUFF holds a set of cpp_token*, and the buffer
      VIRT_LOCS holds the virtual locations of the tokens held by BUFF.
 
@@ -1534,7 +1495,7 @@ replace_args (cpp_reader *pfile, cpp_has
      context, when the latter is pushed.  The memory allocated to
      store the tokens and their locations is going to be freed once
      the context of macro expansion is popped.
-     
+
      As far as tokens are concerned, the memory overhead of
      -ftrack-macro-expansion is proportional to the number of
      macros that get expanded multiplied by sizeof (source_location).
@@ -1548,6 +1509,7 @@ replace_args (cpp_reader *pfile, cpp_has
      the macro expansion, copy the tokens and replace the arguments.
      This memory must be freed when the context of the macro MACRO is
      popped.  */
+  /* TODO why is total 0 sometimes? Should we return? */
   buff = tokens_buff_new (pfile, total, track_macro_exp ? &virt_locs : NULL);
 
   first = (const cpp_token **) buff->base;
@@ -1811,6 +1773,7 @@ next_context (cpp_reader *pfile)
 
   if (result == 0)
     {
+      /* TODO obstack of contexts in cpp_reader? Too much to change? */
       result = XNEW (cpp_context);
       memset (result, 0, sizeof (cpp_context));
       result->prev = pfile->context;
@@ -1923,10 +1886,10 @@ tokens_buff_new (cpp_reader *pfile, size
 		 source_location **virt_locs)
 {
   size_t tokens_size = len * sizeof (cpp_token *);
-  size_t locs_size = len * sizeof (source_location);
 
   if (virt_locs != NULL)
-    *virt_locs = XNEWVEC (source_location, locs_size);
+    /* To be freed in _cpp_pop_buffer(). */
+    *virt_locs = XOBNEWVEC (&pfile->virt_locs_ob, source_location, len);
   return _cpp_get_buff (pfile, tokens_size);
 }
 
@@ -1984,7 +1947,7 @@ tokens_buff_put_token_to (const cpp_toke
 			  source_location *virt_loc_dest,
 			  const cpp_token *token,
 			  source_location virt_loc,
-			  source_location parm_def_loc,			  
+			  source_location parm_def_loc,
 			  const struct line_map *map,
 			  unsigned int macro_token_index)
 {
@@ -2035,7 +1998,7 @@ tokens_buff_add_token (_cpp_buff *buffer
 {
   const cpp_token **result;
   source_location *virt_loc_dest = NULL;
-  unsigned token_index = 
+  unsigned token_index =
     (BUFF_FRONT (buffer) - buffer->base) / sizeof (cpp_token *);
 
   /* Abort if we pass the end the buffer.  */
@@ -2054,50 +2017,6 @@ tokens_buff_add_token (_cpp_buff *buffer
   return result;
 }
 
-/* Allocate space for the function-like macro argument ARG to store
-   the tokens resulting from the macro-expansion of the tokens that
-   make up ARG itself. That space is allocated in ARG->expanded and
-   needs to be freed using free.  */
-static void
-alloc_expanded_arg_mem (cpp_reader *pfile, macro_arg *arg, size_t capacity)
-{
-#ifdef ENABLE_CHECKING
-  if (arg->expanded != NULL
-      || arg->expanded_virt_locs != NULL)
-    abort ();
-#endif
-  arg->expanded = XNEWVEC (const cpp_token *, capacity);
-  if (CPP_OPTION (pfile, track_macro_expansion))
-    arg->expanded_virt_locs = XNEWVEC (source_location, capacity);
-
-}
-
-/* If necessary, enlarge ARG->expanded to so that it can contain SIZE
-   tokens.  */
-static void
-ensure_expanded_arg_room (cpp_reader *pfile, macro_arg *arg,
-			  size_t size, size_t *expanded_capacity)
-{
-  if (size <= *expanded_capacity)
-    return;
-
-  size *= 2;
-
-  arg->expanded =
-    XRESIZEVEC (const cpp_token *, arg->expanded, size);
-  *expanded_capacity = size;
-
-  if (CPP_OPTION (pfile, track_macro_expansion))
-    {
-      if (arg->expanded_virt_locs == NULL)
-	arg->expanded_virt_locs = XNEWVEC (source_location, size);
-      else
-	arg->expanded_virt_locs = XRESIZEVEC (source_location,
-					      arg->expanded_virt_locs,
-					      size);
-    }
-}
-
 /* Expand an argument ARG before replacing parameters in a
    function-like macro.  This works by pushing a context with the
    argument's tokens, and then expanding that into a temporary buffer
@@ -2107,9 +2026,10 @@ ensure_expanded_arg_room (cpp_reader *pf
 static void
 expand_arg (cpp_reader *pfile, macro_arg *arg)
 {
-  size_t capacity;
   bool saved_warn_trad;
   bool track_macro_exp_p = CPP_OPTION (pfile, track_macro_expansion);
+  const cpp_token *token;
+  source_location location;
 
   if (arg->count == 0
       || arg->expanded != NULL)
@@ -2119,10 +2039,6 @@ expand_arg (cpp_reader *pfile, macro_arg
   saved_warn_trad = CPP_WTRADITIONAL (pfile);
   CPP_WTRADITIONAL (pfile) = 0;
 
-  /* Loop, reading in the tokens of the argument.  */
-  capacity = 256;
-  alloc_expanded_arg_mem (pfile, arg, capacity);
-
   if (track_macro_exp_p)
     push_extended_tokens_context (pfile, NULL, NULL,
 				  arg->virt_locs,
@@ -2132,24 +2048,27 @@ expand_arg (cpp_reader *pfile, macro_arg
     push_ptoken_context (pfile, NULL, NULL,
 			 arg->first, arg->count + 1);
 
-  for (;;)
+  /* Loop, reading in the tokens of the argument.  */
+  token = cpp_get_token_1 (pfile, &location);
+  while (token->type != CPP_EOF)
     {
-      const cpp_token *token;
-      source_location location;
-
-      ensure_expanded_arg_room (pfile, arg, arg->expanded_count + 1,
-				&capacity);
-
+      XOBGROW (&pfile->exp_ob, cpp_token *, &token);
+      arg->expanded_count++;
+      if (CPP_OPTION (pfile, track_macro_expansion))
+	XOBGROW (&pfile->exp_locs_ob, source_location, &location);
       token = cpp_get_token_1 (pfile, &location);
+    }
 
-      if (token->type == CPP_EOF)
-	break;
+  MACRO_ASSERT (obstack_object_size (&pfile->exp_ob)
+  		== arg->expanded_count * sizeof (cpp_token *));
+  if (CPP_OPTION (pfile, track_macro_expansion))
+    MACRO_ASSERT (obstack_object_size (&pfile->exp_locs_ob)
+  		  == arg->expanded_count * sizeof (source_location));
 
-      set_arg_token (arg, token, location,
-		     arg->expanded_count, MACRO_ARG_TOKEN_EXPANDED,
-		     CPP_OPTION (pfile, track_macro_expansion));
-      arg->expanded_count++;
-    }
+  /* We are done growing the objects. */
+  arg->expanded = XOBFINISH (&pfile->exp_ob, const cpp_token **);
+  if (CPP_OPTION (pfile, track_macro_expansion))
+    arg->expanded_virt_locs = XOBFINISH (&pfile->exp_locs_ob, source_location *);
 
   _cpp_pop_context (pfile);
 
@@ -2209,7 +2128,7 @@ _cpp_pop_context (cpp_reader *pfile)
 	     locations of these tokens too.  */
 	  if (context->buff && mc->virt_locs)
 	    {
-	      free (mc->virt_locs);
+	      XOBDELETE (&pfile->virt_locs_ob, source_location, mc->virt_locs);
 	      mc->virt_locs = NULL;
 	    }
 	  free (mc);
@@ -2242,7 +2161,7 @@ _cpp_pop_context (cpp_reader *pfile)
     }
 
   pfile->context = context->prev;
-  /* decrease peak memory consumption by feeing the context.  */
+  /* decrease peak memory consumption by freeing the context.  */
   pfile->context->next = NULL;
   free (context);
 }
@@ -2280,7 +2199,7 @@ consume_next_token_from_context (cpp_rea
       *location = (*token)->src_loc;
       FIRST (c).token++;
     }
-  else if ((c)->tokens_kind == TOKENS_KIND_INDIRECT)		
+  else if ((c)->tokens_kind == TOKENS_KIND_INDIRECT)
     {
       *token = *FIRST (c).ptoken;
       *location = (*token)->src_loc;
@@ -2370,7 +2289,7 @@ cpp_get_token_1 (cpp_reader *pfile, sour
 	      goto out;
 	    }
 	}
-      else
+      else			/* end of context */
 	{
 	  if (pfile->context->c.macro)
 	    ++num_expanded_macros_counter;
@@ -2433,7 +2352,7 @@ cpp_get_token_1 (cpp_reader *pfile, sour
 		}
 	    }
 	  else
-	    ret = enter_macro_context (pfile, node, result, 
+	    ret = enter_macro_context (pfile, node, result,
 				       virt_loc);
 	  if (ret)
  	    {
@@ -2727,7 +2646,7 @@ _cpp_save_parameter (cpp_reader *pfile,
     }
   ((union _cpp_hashnode_value *) pfile->macro_buffer)[macro->paramc - 1]
     = node->value;
-  
+
   node->value.arg_index  = macro->paramc;
   return false;
 }
=== modified file 'include/obstack.h'
--- include/obstack.h	2011-10-22 01:35:29 +0000
+++ include/obstack.h	2012-07-07 16:04:01 +0000
@@ -538,6 +538,40 @@ __extension__								\
 
 #endif /* not __GNUC__ or not __STDC__ */
 
+
+/* The MARK operation allows you to save the state of the obstack by pushing a
+   special marker onto the stack. When you RELEASE that marker the obstack
+   returns to its previous state which means all memory allocated after the
+   marker is free'd, and most importantly if you were growing an object before
+   pushing the marker, you can now continue growing it. Needless to say that
+   the marker is destroyed if you release/free an earlier marker/object. */
+static inline void *obstack_mark (struct obstack *o)
+{
+  void **old_base, **p;
+
+  /* Rellocation may happen when growing...*/
+  obstack_blank (o, sizeof (void *));
+  /* ...so save object_base afterwards... */
+  old_base = (void **) o->object_base;
+  /* ...and save the end of the previous object to return it. */
+  p = (void **) & o->next_free [-1 * sizeof (void *)];
+
+  /* Finally make it possible to start growing new objects */
+  obstack_finish (o);
+  /* and store saved state in the new space we grew. */
+  *p = old_base;
+
+  return p;
+}
+
+static inline void obstack_release (struct obstack *o, void *p)
+{
+  void *old_base = *(void **) p;
+  obstack_free (o, p);
+  o->object_base = (char *) old_base;
+}
+
+
 #ifdef __cplusplus
 }	/* C++ */
 #endif
Dodji Seketeli - July 18, 2012, 6:58 p.m.
Hello Dimitrios,

> With the attached patches I introduce four new obstacks in struct
> cpp_reader to substitute malloc's/realloc's when expanding
> macros. Numbers have been posted in the PR, but to summarize:
> 
> before: 0.785 s or  2201 M instr
> after:  0.760 s or  2108 M instr
> 
> Memory overhead is some tens kilobytes worst case. Tested on x86, no
> regressions. I think this version of the patch is OK to merge, besides
> some TODO comments (I'd appreciate opinions on them) and the following
> maybe:

Thank you for your time and dedication.

I am not a maintainer of any kind, so I can not approve or deny your
patch.  I am just chiming in to help with a few comments and CC the
maintainers.

> IMHO the new obstack_{mark,release} functions are the ones that will
> be harder to apply, because they make gcc's obstacks even more
> different than libc's. I sent the patch to libc-alpha but the feedback
> I got was that I should first make the two obstack versions (gcc,libc)
> identical, and then try to push changes. I've noted the primary
> differences and plan on tackling this, but it's not as trivial as it
> seems.
> 
> So if it's not OK, where could the new obstack_{mark,release} go?

I am letting the maintainers reply to this one.  :-)

Please find my comments below.

For each sub-project that your patch modifies, you need to add a
GNU-Style ChangeLog.  The custom is to add it separately (e.g inline
or in attachment), not as part of the diff.  That way, applying the
patch is less likely to trigger conflicts.

> === modified file 'include/libiberty.h'
> --- include/libiberty.h	2011-09-28 19:04:30 +0000
> +++ include/libiberty.h	2012-07-07 16:04:01 +0000

[...]

> -/* Type-safe obstack allocator.  */
> +/* Type-safe obstack allocator. You must first initialize the obstack with
> +   obstack_init() or _obstack_begin(). */

Why not just using the _obstack_begin function?  Why introducing the
use of the obstack_init macro?

>  #define XOBNEW(O, T)		((T *) obstack_alloc ((O), sizeof (T)))
>  #define XOBNEWVEC(O, T, N)	((T *) obstack_alloc ((O), sizeof (T) * (N)))
>  #define XOBNEWVAR(O, T, S)	((T *) obstack_alloc ((O), (S)))
> -#define XOBFINISH(O, T)         ((T) obstack_finish ((O)))

I think this change is not needed.  You basically remove this line to
replace it with the hunk below, that comes later in the patch:

> +#define XOBFINISH(O, PT)	((PT) obstack_finish ((O)))

So I believe you could do away with the change.

> +#define XOBDELETE(O, T, P)	(obstack_free ((O), (P)))

If you are not using the T parameter in the definition of the macro,
then you might as well remove it from the macro parameters.

> +
> +#define XOBGROW(O, T, D)	obstack_grow ((O), (D), sizeof (T))
> +#define XOBGROWVEC(O, T, D, N)	obstack_grow ((O), (D), sizeof (T) * (N))
> +#define XOBSHRINK(O, T)		obstack_blank ((O), -1 * sizeof (T))
> +#define XOBSHRINKVEC(O, T, N)	obstack_blank ((O), -1 * sizeof (T) * (N))

Maybe these new macros could use some comments at least to make it
easier to figure out what these O, T, D, N parameters mean.  I
understand that it is not done that way for the existing macros, but I
guess we could use some improvement here.  :-)

[...]

> === modified file 'libcpp/init.c'
> --- libcpp/init.c	2012-04-30 11:43:43 +0000
> +++ libcpp/init.c	2012-07-07 16:04:01 +0000

Missing ChangeLog for this changes of libcpp.

> @@ -241,10 +241,20 @@ cpp_create_reader (enum c_lang lang, has
>    /* The expression parser stack.  */
>    _cpp_expand_op_stack (pfile);
>  
> +#define obstack_chunk_alloc     ((void *(*) (long)) xmalloc)
> +#define obstack_chunk_free      ((void (*) (void *)) free)
> +
>    /* Initialize the buffer obstack.  */
> -  _obstack_begin (&pfile->buffer_ob, 0, 0,
> -		  (void *(*) (long)) xmalloc,
> -		  (void (*) (void *)) free);
> +  obstack_init(&pfile->buffer_ob);

Same comment as earlier.  Why obstack_init instead of just using
_obstack_begin?

> +
> +  /* Initialize the macro obstacks. */
> +  obstack_init (&pfile->exp_ob);
> +  if (CPP_OPTION (pfile, track_macro_expansion))
> +    {
> +      obstack_init (&pfile->virt_locs_ob);
> +      obstack_init (&pfile->arg_locs_ob);
> +      obstack_init (&pfile->exp_locs_ob);
> +    }

Same comment as above.

[...]

> === modified file 'libcpp/internal.h'
> --- libcpp/internal.h	2012-05-29 09:36:29 +0000
> +++ libcpp/internal.h	2012-07-07 17:18:53 +0000

[...]

@@ -555,6 +555,13 @@ struct cpp_reader

[...]

> +  /* Obstacks used to speed up macro expansion and virt_locs tracking. */

I'd say something like:

/* Obstacks used for fast memory allocation during macro expansion and
   virtual location tracking. */

+  struct obstack exp_ob;	/* for expanding macro arguments */

I'd rather call this field args_exp_ob, to make the name more
meaningful.

+ struct obstack exp_locs_ob;	/* virt_locs of expanded macro arguments */

Likewise, I'd call this field args_exp_virt_locs_ob.

+  struct obstack virt_locs_ob;	/* virt locs for all other macros */

I'd change the comment here to:

    /* Virtual locations for tokens resulting from macro expansion.  */

Also, please make sure to add a dot at the end of the comments,
followed by two spaces before the */.

> === modified file 'libcpp/macro.c'
> --- libcpp/macro.c	2012-05-29 14:53:50 +0000
> +++ libcpp/macro.c	2012-07-07 18:08:44 +0000
@@ -24,10 +24,20 @@ along with this program; see the file CO
  You are forbidden to forbid anyone else to use, share and improve
  what you give them.   Help stamp out software-hoarding!  */
 
+

This is an unnecessary white space change.

[...]

+#define MACRO_ASSERT(EXPR)			\
+  do {						\
+    if (! (EXPR))				\
+      abort ();					\
+  } while (0)
+

I believe this macro should be guarded by an #ifdef ENABLED_CHECKING,
so that the macro expands to nothing when building the compiler with
--disable-checking.

>  static const cpp_token **arg_token_ptr_at (const macro_arg *,
> @@ -276,7 +279,7 @@ _cpp_builtin_macro_text (cpp_reader *pfi
>  	unsigned int len;
>  	const char *name;
>  	uchar *buf;
> -	
> +

This is an unnecessary white space change.  At a very least, it should
go into a separate cleanup patch.

[...]

> @@ -361,7 +364,7 @@ _cpp_builtin_macro_text (cpp_reader *pfi
>  	    {
>  	      cpp_errno (pfile, CPP_DL_WARNING,
>  			 "could not determine date and time");
> -		
> +

Likewise.

> @@ -388,7 +391,7 @@ _cpp_builtin_macro_text (cpp_reader *pfi
>        sprintf ((char *) result, "%u", number);
>      }
>  
> -  return result;      
> +  return result;
>  }

Likewise.

[...]

>  
>  /* Convert builtin macros like __FILE__ to a token and push it on the
> @@ -734,6 +737,52 @@ _cpp_arguments_ok (cpp_reader *pfile, cp
>    return false;
>  }
>  
> +/* Returns next token after pragma (either EOF or EOL). */
> +
> +static const cpp_token *
> +handle_pragma (cpp_reader *pfile, const cpp_token *token,
> +	       _cpp_buff **pragma_buff)
> +{
> +  cpp_token *newtok = _cpp_temp_token (pfile);
> +
> +  /* CPP_PRAGMA token lives in directive_result, which will
> +     be overwritten on the next directive.  */
> +  *newtok = *token;
> +  token = newtok;
> +  do
> +    {
> +      if (*pragma_buff == NULL
> +	  || BUFF_ROOM (*pragma_buff) < sizeof (cpp_token *))
> +	{
> +	  _cpp_buff *next;
> +	  if (*pragma_buff == NULL)
> +	    *pragma_buff = _cpp_get_buff (pfile, 32 * sizeof (cpp_token *));
> +	  else
> +	    {
> +	      next = *pragma_buff;
> +	      *pragma_buff = _cpp_get_buff (pfile,
> +					    (BUFF_FRONT (*pragma_buff)
> +					     - (*pragma_buff)->base) * 2);
> +	      (*pragma_buff)->next = next;
> +	    }
> +	}
> +      *(const cpp_token **) BUFF_FRONT (*pragma_buff) = token;
> +      BUFF_FRONT (*pragma_buff) += sizeof (cpp_token *);
> +      if (token->type == CPP_PRAGMA_EOL)
> +	break;
> +
> +      token = cpp_get_token_1 (pfile, NULL);
> +    }
> +  while (token->type != CPP_EOF);
> +
> +  /* In deferred pragmas parsing_args and prevent_expansion
> +     had been changed, reset it.  */
> +  pfile->state.parsing_args = 2;
> +  pfile->state.prevent_expansion = 1;
> +
> +  return token;
> +}
> +

I understand that the macro expansion code is not always simple to
grok, and I understand the pressure to simplify it.  But I think that
factorizing this new handle_pragma function, out of the hunk:

[...]

> @@ -844,63 +880,40 @@ collect_args (cpp_reader *pfile, const c
>  	    break;
>  	  else if (token->type == CPP_PRAGMA)
>  	    {
> -	      cpp_token *newtok = _cpp_temp_token (pfile);
> -
> -	      /* CPP_PRAGMA token lives in directive_result, which will
> -		 be overwritten on the next directive.  */
> -	      *newtok = *token;
> -	      token = newtok;
> -	      do
> -		{
> -		  if (*pragma_buff == NULL
> -		      || BUFF_ROOM (*pragma_buff) < sizeof (cpp_token *))
> -		    {
> -		      _cpp_buff *next;
> -		      if (*pragma_buff == NULL)
> -			*pragma_buff
> -			  = _cpp_get_buff (pfile, 32 * sizeof (cpp_token *));
> -		      else
> -			{
> -			  next = *pragma_buff;
> -			  *pragma_buff
> -			    = _cpp_get_buff (pfile,
> -					     (BUFF_FRONT (*pragma_buff)
> -					      - (*pragma_buff)->base) * 2);
> -			  (*pragma_buff)->next = next;
> -			}
> -		    }
> -		  *(const cpp_token **) BUFF_FRONT (*pragma_buff) = token;
> -		  BUFF_FRONT (*pragma_buff) += sizeof (cpp_token *);
> -		  if (token->type == CPP_PRAGMA_EOL)
> -		    break;
> -		  token = cpp_get_token_1 (pfile, &virt_loc);
> -		}
> -	      while (token->type != CPP_EOF);
> -
> -	      /* In deferred pragmas parsing_args and prevent_expansion
> -		 had been changed, reset it.  */
> -	      pfile->state.parsing_args = 2;
> -	      pfile->state.prevent_expansion = 1;
> +	      /* Get last token after the pragma. */
> +	      token = handle_pragma (pfile, token, pragma_buff);

... belongs to a different patch, which purpose would be to "cleanup"
this code, as opposed to the purpose of the current patch which is to
use obstack for virtual location tracking code.

[...]

>  /* Reads and returns the arguments to a function-like macro
>     invocation.  Assumes the opening parenthesis has been processed.
>     If there is an error, emits an appropriate diagnostic and returns
> @@ -757,7 +806,6 @@ collect_args (cpp_reader *pfile, const c
>    const cpp_token *token;
>    unsigned int argc;
>    source_location virt_loc;
> -  bool track_macro_expansion_p = CPP_OPTION (pfile, track_macro_expansion);
>    unsigned num_args_alloced = 0;
>  
>    macro = node->value.macro;
> @@ -769,6 +817,8 @@ collect_args (cpp_reader *pfile, const c
>  #define DEFAULT_NUM_TOKENS_PER_MACRO_ARG 50
>  #define ARG_TOKENS_EXTENT 1000
>  
> +  /* TODO maybe macro_args should be stored in a separate obstack growing
> +     little by little? */

Does the memory (de)allocation macro_args show up in your profile?

>    buff = _cpp_get_buff (pfile, argc * (DEFAULT_NUM_TOKENS_PER_MACRO_ARG
>  				       * sizeof (cpp_token *)
>  				       + sizeof (macro_arg)));
> @@ -781,23 +831,17 @@ collect_args (cpp_reader *pfile, const c
>    /* Collect the tokens making up each argument.  We don't yet know
>       how many arguments have been supplied, whether too many or too
>       few.  Hence the slightly bizarre usage of "argc" and "arg".  */
> -  do
> +
> +  do				/* for each macro argument */
>      {

I believe there should be a period at the end of the comment, followed
by two spaces before the "*/".  That is the GCC style of commenting.

[...]

> -      for (;;)
> +      for (;;)			/* for each token in argument */

Likewise, commenting style.

[...]

>  	      if (token->type == CPP_EOF)
>  		break;
> -	      else
> -		continue;
> +	      continue;

I think this change is unnecessary.

>  	    }
> -	  set_arg_token (arg, token, virt_loc,
> -			 ntokens, MACRO_ARG_TOKEN_NORMAL,
> -			 CPP_OPTION (pfile, track_macro_expansion));
> +
> +	  /* TODO grow an obstack for tokens here? */

Same question as earlier.  Did the (de)allocation appear in your
profile?

> +	  arg->first[ntokens] = token;
> +	  if (CPP_OPTION (pfile, track_macro_expansion))
> +	    XOBGROW (&pfile->arg_locs_ob, source_location, &virt_loc);
> +
>  	  ntokens++;
>  	}
>  
>        /* Drop trailing padding.  */
>        while (ntokens > 0 && arg->first[ntokens - 1]->type == CPP_PADDING)
> +      {

The '{' should have two space indentation wrt the column of the
'while' keyword.

>  	ntokens--;
> +	if (CPP_OPTION (pfile, track_macro_expansion))
> +	  XOBSHRINK (&pfile->arg_locs_ob, source_location);
> +      }

Likewise, indentation of the '}'.

[...]

> +	  /* At this point the array of virt_locs is finished. */

Two spaces after dot.

[...]

>        if (macro->fun_like)
>  	{
> +	  /* Mark where we start allocating so that we can free with the
> +	     simple call to obstack_release() later.
> +
> +	     It's not enough just to do obstack_alloc/free() because in
> +	     recursive calls of enter_macro_context() happen through
> +	     expand_arg()->_cpp_get_token_1() and *growing* objects may be
> +	     interrupted but may need to continue growing after returning. */

Two spaces after dot.

Also, I would amend the comment to make it clear that from here on,
subroutines like funlike_invocation_p, expand_arg, collect_args, etc,
are going to allocate memory for the expansion of macros that are
present in the arguments of the current function-like macro we are
dealing with.

That memory is in a obstack that is global to all function-like
macros.  So we need to mark where the obstack memory for the arguments
of *this* function-like macro starts, so that we can later release
that memory only, and avoid releasing/corrupting memory of the
arguments of other intermixed function-like macro calls.

[...]

> +	  void *expanded_locs_marker = obstack_mark (&pfile->exp_locs_ob);
> +	  void *arg_locs_marker = obstack_mark (&pfile->arg_locs_ob);
> +	  void *expanded_marker = obstack_mark (&pfile->exp_ob);
> +
>  	  _cpp_buff *buff;
>  	  unsigned num_args = 0;
>  
> @@ -1069,11 +1093,29 @@ enter_macro_context (cpp_reader *pfile,
>  	  pfile->keep_tokens--;
>  	  pfile->state.prevent_expansion--;
>  
> +	  if (buff != NULL)
> +	    {
> +	      if (macro->paramc > 0)
> +		replace_args (pfile, node, macro,
> +			      (macro_arg *) buff->base,
> +			      location);
> +	      _cpp_free_buff (buff);
> +	    }
> +
> +	  /* Free everything that was allocated in these obstacks after the
> +	     obstack_mark() operation. */

Two spaces after the dot.

[...]

> +	  /* If no opening parenthesis was found by funlike_invocation_p() or
> +	     if an error happened when it called collect_args(). */

Likewise.

>  	  if (buff == NULL)
>  	    {
>  	      if (CPP_WTRADITIONAL (pfile) && ! node->value.macro->syshdr)
>  		cpp_warning (pfile, CPP_W_TRADITIONAL,
> - "function-like macro \"%s\" must be used with arguments in traditional C",
> +			     "function-like macro \"%s\" must be used "
> +			     "with arguments in traditional C",
>  			     NODE_NAME (node));

This change should go in a separate cleanup patch.

[...]

>  /* Get the pointer to the location of the argument token of the
>     function-like macro argument ARG.  This function must be called
>     only when we -ftrack-macro-expansion is on.  */
> @@ -1289,11 +1250,11 @@ arg_token_ptr_at (const macro_arg *arg,
>      case MACRO_ARG_TOKEN_NORMAL:
>        tokens_ptr = arg->first;
>        break;
> -    case MACRO_ARG_TOKEN_STRINGIFIED:      
> +    case MACRO_ARG_TOKEN_STRINGIFIED:

Unnecessary white space change.

>        tokens_ptr = (const cpp_token **) &arg->stringified;
>        break;
>      case MACRO_ARG_TOKEN_EXPANDED:
> -	tokens_ptr = arg->expanded;
> +      tokens_ptr = arg->expanded;
>        break;
>      }
>  
> @@ -1407,12 +1368,12 @@ macro_arg_token_iter_get_location (const
>     want each tokens resulting from function-like macro arguments
>     expansion to have a different location or not.
>  
> -   E.g, consider this function-like macro: 
> +   E.g, consider this function-like macro:

Likewise.

>          #define M(x) x - 3
>  
>     Then consider us "calling" it (and thus expanding it) like:
> -   
> +

Likewise.

>         M(1+4)
>  
>     It will be expanded into:
> @@ -1526,7 +1487,7 @@ replace_args (cpp_reader *pfile, cpp_has
>       location that records many things like the locus of the expansion
>       point as well as the original locus inside the definition of the
>       macro.  This location is called a virtual location.
> -     
> +

Likewise.

>       So the buffer BUFF holds a set of cpp_token*, and the buffer
>       VIRT_LOCS holds the virtual locations of the tokens held by BUFF.
>  
> @@ -1534,7 +1495,7 @@ replace_args (cpp_reader *pfile, cpp_has
>       context, when the latter is pushed.  The memory allocated to
>       store the tokens and their locations is going to be freed once
>       the context of macro expansion is popped.
> -     
> +

Likewise.

>       As far as tokens are concerned, the memory overhead of
>       -ftrack-macro-expansion is proportional to the number of
>       macros that get expanded multiplied by sizeof (source_location).
> @@ -1548,6 +1509,7 @@ replace_args (cpp_reader *pfile, cpp_has
>       the macro expansion, copy the tokens and replace the arguments.
>       This memory must be freed when the context of the macro MACRO is
>       popped.  */
> +  /* TODO why is total 0 sometimes? Should we return? */

Hmmh.  Maybe it's when a function-like macro has an empty
replacement-list?  Even in that case, I don't think we should return.
It's important to keep the information "we are expanding this macro to
nothing".  So we need to push a zero-length macro expansion context.

>    buff = tokens_buff_new (pfile, total, track_macro_exp ? &virt_locs : NULL);

[...]

> @@ -1811,6 +1773,7 @@ next_context (cpp_reader *pfile)
>  
>    if (result == 0)
>      {
> +      /* TODO obstack of contexts in cpp_reader? Too much to change? */

I am afraid that will increase peak memory usage quite a bit.  The
management of the context buffers' memory didn't involve freeing
contexts.  I introduced the memory freeing for these (after measuring
and seeing that the peak memory usage for contexts was high).

But if you are brave enough and maintainers think it could be
worthwhile, maybe you could try and measure both speed and space to
see.

[...]

> @@ -1984,7 +1947,7 @@ tokens_buff_put_token_to (const cpp_toke
>  			  source_location *virt_loc_dest,
>  			  const cpp_token *token,
>  			  source_location virt_loc,
> -			  source_location parm_def_loc,			  
> +			  source_location parm_def_loc,

Unnecessary white space change.

>  			  const struct line_map *map,
>  			  unsigned int macro_token_index)
>  {
> @@ -2035,7 +1998,7 @@ tokens_buff_add_token (_cpp_buff *buffer
>  {
>    const cpp_token **result;
>    source_location *virt_loc_dest = NULL;
> -  unsigned token_index = 
> +  unsigned token_index =

Likewise.

[...]

> @@ -2280,7 +2199,7 @@ consume_next_token_from_context (cpp_rea
>        *location = (*token)->src_loc;
>        FIRST (c).token++;
>      }
> -  else if ((c)->tokens_kind == TOKENS_KIND_INDIRECT)		
> +  else if ((c)->tokens_kind == TOKENS_KIND_INDIRECT)

Unnecessary white space change.

>      {
>        *token = *FIRST (c).ptoken;
>        *location = (*token)->src_loc;
> @@ -2370,7 +2289,7 @@ cpp_get_token_1 (cpp_reader *pfile, sour
>  	      goto out;
>  	    }
>  	}
> -      else
> +      else			/* end of context */

GCC Comment style.

>  	{
>  	  if (pfile->context->c.macro)
>  	    ++num_expanded_macros_counter;
> @@ -2433,7 +2352,7 @@ cpp_get_token_1 (cpp_reader *pfile, sour
>  		}
>  	    }
>  	  else
> -	    ret = enter_macro_context (pfile, node, result, 
> +	    ret = enter_macro_context (pfile, node, result,

Unnecessary white space change.

[...]


> === modified file 'include/obstack.h'
> --- include/obstack.h	2011-10-22 01:35:29 +0000
> +++ include/obstack.h	2012-07-07 16:04:01 +0000

Missing ChangeLog

[...]

> +static inline void obstack_release (struct obstack *o, void *p)
> +{

Missing comment for this function.

> +  void *old_base = *(void **) p;
> +  obstack_free (o, p);
> +  o->object_base = (char *) old_base;
> +}
> +
> +
>  #ifdef __cplusplus
>  }	/* C++ */
>  #endif

Cheers.

Patch

=== modified file 'include/libiberty.h'
--- include/libiberty.h	2011-09-28 19:04:30 +0000

+++ include/libiberty.h	2012-07-07 16:04:01 +0000

@@ -361,12 +361,19 @@  extern unsigned int xcrc32 (const unsign

 #define XDUPVAR(T, P, S1, S2)	((T *) xmemdup ((P), (S1), (S2)))
 #define XRESIZEVAR(T, P, S)	((T *) xrealloc ((P), (S)))
 
-/* Type-safe obstack allocator.  */

+/* Type-safe obstack allocator. You must first initialize the obstack with

+   obstack_init() or _obstack_begin(). */

 
 #define XOBNEW(O, T)		((T *) obstack_alloc ((O), sizeof (T)))
 #define XOBNEWVEC(O, T, N)	((T *) obstack_alloc ((O), sizeof (T) * (N)))
 #define XOBNEWVAR(O, T, S)	((T *) obstack_alloc ((O), (S)))
-#define XOBFINISH(O, T)         ((T) obstack_finish ((O)))

+#define XOBDELETE(O, T, P)	(obstack_free ((O), (P)))

+

+#define XOBGROW(O, T, D)	obstack_grow ((O), (D), sizeof (T))

+#define XOBGROWVEC(O, T, D, N)	obstack_grow ((O), (D), sizeof (T) * (N))

+#define XOBSHRINK(O, T)		obstack_blank ((O), -1 * sizeof (T))

+#define XOBSHRINKVEC(O, T, N)	obstack_blank ((O), -1 * sizeof (T) * (N))

+#define XOBFINISH(O, PT)	((PT) obstack_finish ((O)))

 
 /* hex character manipulation routines */