diff mbox

[18/22] Track locations within string literals in tree_string

Message ID 1441916913-11547-19-git-send-email-dmalcolm@redhat.com
State New
Headers show

Commit Message

David Malcolm Sept. 10, 2015, 8:28 p.m. UTC
This patch uses the string-literal location generated in libcpp in the
previous patch, and stores it in tree_string (adding a new field there).

This hasn't been optimized.  Perhaps the case of a single unbroken run
of 1-column per-char is the most common case, so we could only bother to
store the character range info for those string literal that are exceptions
to this rule.

The patch adds unit testing via a plugin.

Screenshot:
 https://dmalcolm.fedorapeople.org/gcc/2015-09-09/string-literals.html

gcc/c-family/ChangeLog:
	* c-lex.c (ensure_string_has_location): New function.
	(lex_string): Call ensure_string_has_location on the cpp_string;
	pass in istr.loc to the call to build_string.
	(lex_charconst): Call ensure_string_has_location on the cpp_string;

gcc/cp/ChangeLog:
	* parser.c (cp_parser_string_literal): Call init_raw on the
	str.loc.  Pass in the istr.loc to the call to build_string.

gcc/testsuite/ChangeLog:
	* gcc.dg/plugin/diagnostic-test-string-literals-1.c: New file.
	* gcc.dg/plugin/diagnostic_plugin_test_string_literals.c: New file.
	* gcc.dg/plugin/plugin.exp (plugin_test_list): Add
	diagnostic_plugin_test_string_literals.c and
	diagnostic-test-string-literals-1.c.

gcc/ChangeLog:
	* tree-core.h (struct cpp_string_location): Add forward
	declaration.
	(struct tree_string): Add cpp_string_location * field "loc".
	* tree.c: Include cpplib.h.
	(build_string): Initialize TREE_STRING_LOCATION (s) to NULL.
	(cpp_string_location_stats): New global.
	(build_string): New overload.
	* tree.h (TREE_STRING_LOCATION): New macro.
	(cpp_string_location_stats): New struct and global.
	(build_string): Add overload taking an additional
	cpp_string_location * param.

libcpp/ChangeLog:
	* charset.c (cpp_string_location::get_range_between_indices): New
	method.
	* include/cpplib.h
	(cpp_string_location::get_range_between_indices): Likewise.
---
 gcc/c-family/c-lex.c                               |  23 ++-
 gcc/cp/parser.c                                    |   4 +-
 .../plugin/diagnostic-test-string-literals-1.c     | 139 +++++++++++++
 .../diagnostic_plugin_test_string_literals.c       | 215 +++++++++++++++++++++
 gcc/testsuite/gcc.dg/plugin/plugin.exp             |   2 +
 gcc/tree-core.h                                    |   3 +
 gcc/tree.c                                         |  24 +++
 gcc/tree.h                                         |  12 ++
 libcpp/charset.c                                   |  12 ++
 libcpp/include/cpplib.h                            |   2 +
 10 files changed, 434 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c
 create mode 100644 gcc/testsuite/gcc.dg/plugin/diagnostic_plugin_test_string_literals.c
diff mbox

Patch

diff --git a/gcc/c-family/c-lex.c b/gcc/c-family/c-lex.c
index f457199..6eb8fcc 100644
--- a/gcc/c-family/c-lex.c
+++ b/gcc/c-family/c-lex.c
@@ -1076,6 +1076,19 @@  interpret_fixed (const cpp_token *token, unsigned int flags)
   return value;
 }
 
+/* FIXME.  */
+
+static void
+ensure_string_has_location (const cpp_string *str, source_location src_loc)
+{
+  if (!str->loc.m_fragloc_array)
+    {
+      cpp_string *mutstr = const_cast <cpp_string *> (str);
+      cpp_string_location *strloc = &mutstr->loc;
+      strloc->init_raw (src_loc, mutstr->len, 1, line_table);
+    }
+}
+
 /* Convert a series of STRING, WSTRING, STRING16, STRING32 and/or
    UTF8STRING tokens into a tree, performing string constant
    concatenation.  TOK is the first of these.  VALP is the location to
@@ -1107,7 +1120,9 @@  lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
   /* Try to avoid the overhead of creating and destroying an obstack
      for the common case of just one string.  */
   cpp_string str = tok->val.str;
+  ensure_string_has_location (&str, tok->src_loc);
   cpp_string *strs = &str;
+  location_t str0_loc = tok->src_loc;
 
   /* objc_at_sign_was_seen is only used when doing Objective-C string
      concatenation.  It is 'true' if we have seen an '@' before the
@@ -1146,16 +1161,20 @@  lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
 	  else
 	    error ("unsupported non-standard concatenation of string literals");
 	}
+      /* FALLTHROUGH */
 
     case CPP_STRING:
       if (!concats)
 	{
 	  gcc_obstack_init (&str_ob);
+	  ensure_string_has_location (&str, str0_loc);
 	  obstack_grow (&str_ob, &str, sizeof (cpp_string));
 	}
 
       concats++;
+      ensure_string_has_location (&tok->val.str, tok->src_loc);
       obstack_grow (&str_ob, &tok->val.str, sizeof (cpp_string));
+
       if (objc_string)
 	objc_at_sign_was_seen = false;
       goto retry;
@@ -1178,7 +1197,7 @@  lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
        ? cpp_interpret_string : cpp_interpret_string_notranslate)
       (parse_in, strs, concats + 1, &istr, type))
     {
-      value = build_string (istr.len, (const char *) istr.text);
+      value = build_string (istr.len, (const char *) istr.text, &istr.loc);
       free (CONST_CAST (unsigned char *, istr.text));
     }
   else
@@ -1245,6 +1264,8 @@  lex_charconst (const cpp_token *token)
   unsigned int chars_seen;
   int unsignedp = 0;
 
+  ensure_string_has_location (&token->val.str, token->src_loc);
+
   result = cpp_interpret_charconst (parse_in, token,
 				    &chars_seen, &unsignedp);
 
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 17b7de0..62937ae 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -3716,6 +3716,7 @@  cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
 
       str.text = (const unsigned char *)TREE_STRING_POINTER (string_tree);
       str.len = TREE_STRING_LENGTH (string_tree);
+      str.loc.init_raw (tok->location, str.len, 1, line_table);
       count = 1;
 
       if (curr_tok_is_userdef_p)
@@ -3742,6 +3743,7 @@  cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
 	  count++;
 	  str.text = (const unsigned char *)TREE_STRING_POINTER (string_tree);
 	  str.len = TREE_STRING_LENGTH (string_tree);
+	  str.loc.init_raw (tok->location, str.len, 1, line_table);
 
 	  if (curr_tok_is_userdef_p)
 	    {
@@ -3810,7 +3812,7 @@  cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
   if ((translate ? cpp_interpret_string : cpp_interpret_string_notranslate)
       (parse_in, strs, count, &istr, type))
     {
-      value = build_string (istr.len, (const char *)istr.text);
+      value = build_string (istr.len, (const char *)istr.text, &istr.loc);
       free (CONST_CAST (unsigned char *, istr.text));
 
       switch (type)
diff --git a/gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c
new file mode 100644
index 0000000..ef3b8fe
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c
@@ -0,0 +1,139 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O -fdiagnostics-show-caret" } */
+
+/* This is a collection of unittests for ranges within string literals,
+   using diagnostic_plugin_test_string_literals, which handles
+   "__emit_string_literal_range" by generating a warning at the given
+   subset of a string literal.
+
+   The indices are 0-based.  It's easiest to verify things using string
+   literals that are runs of 0-based digits (to avoid having to count
+   characters).  */
+
+extern void __emit_string_literal_range (const char *literal,
+					 int start_idx, int end_idx);
+
+void test_simple_string_literal (void)
+{
+  __emit_string_literal_range ("0123456789", /* { dg-warning "range" } */
+			       6, 7);
+/* { dg-begin-multiline-output "" }
+   __emit_string_literal_range ("0123456789",
+                                       ^~
+   { dg-end-multiline-output "" } */
+}
+
+void test_concatenated_string_literal (void)
+{
+  __emit_string_literal_range ("01234" "56789", /* { dg-warning "range" } */
+			       3, 6);
+/* { dg-begin-multiline-output "" }
+   __emit_string_literal_range ("01234" "56789",
+                                    ^~~~~~~
+   { dg-end-multiline-output "" } */
+}
+
+void test_multiline_string_literal (void)
+{
+  __emit_string_literal_range ("01234" /* { dg-warning "range" } */
+                               "56789",
+                               3, 6);
+/* { dg-begin-multiline-output "" }
+   __emit_string_literal_range ("01234"
+                                    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+                                "56789",
+                                ~~~  
+   { dg-end-multiline-output "" } */
+  /* FIXME: why does the above need two trailing spaces?  */
+}
+
+/* Tests of various unicode encodings.
+
+   Digits 0 through 9 are unicode code points:
+      U+0030 DIGIT ZERO
+      ...
+      U+0039 DIGIT NINE
+   However, these are not always valid as UCN (see the comment in
+   libcpp/charset.c:_cpp_valid_ucn).
+
+   Hence we need to test UCN using an alternative unicode
+   representation of numbers; let's use Roman numerals,
+   (though these start at one, not zero):
+      U+2170 SMALL ROMAN NUMERAL ONE
+      ...
+      U+2174 SMALL ROMAN NUMERAL FIVE  ("v")
+      U+2175 SMALL ROMAN NUMERAL SIX   ("vi")
+      ...
+      U+2178 SMALL ROMAN NUMERAL NINE.  */
+
+void test_hex (void)
+{
+  /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
+     and with a space in place of digit 6, to terminate the escaped
+     hex code.  */
+  __emit_string_literal_range ("01234\x35 789", /* { dg-warning "range" } */
+			       3, 7);
+/* { dg-begin-multiline-output "" }
+   __emit_string_literal_range ("01234\x35 789"
+                                    ^~~~~~~~
+   { dg-end-multiline-output "" } */
+}
+
+void test_oct (void)
+{
+  /* Digits 0-9, expressing digit 5 in ASCII as "\065"
+     and with a space in place of digit 6, to terminate the escaped
+     octal code.  */
+  __emit_string_literal_range ("01234\065 789", /* { dg-warning "range" } */
+			       3, 7);
+/* { dg-begin-multiline-output "" }
+   __emit_string_literal_range ("01234\065 789"
+                                    ^~~~~~~~
+   { dg-end-multiline-output "" } */
+}
+
+void test_multiple (void)
+{
+  /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
+     digit 6 in ASCII as octal "\066", concatenating multiple strings.  */
+  __emit_string_literal_range ("01234"  "\x35"  "\066"  "789", /* { dg-warning "range" } */
+			       3, 8);
+/* { dg-begin-multiline-output "" }
+   __emit_string_literal_range ("01234"  "\x35"  "\066"  "789",
+                                    ^~~~~~~~~~~~~~~~~~~~~~~~
+   { dg-end-multiline-output "" } */
+}
+
+void test_ucn4 (void)
+{
+  /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
+     as UCN 4.  */
+  __emit_string_literal_range ("01234\u2174\u2175789", /* { dg-warning "range" } */
+			       4, 7);
+/* { dg-begin-multiline-output "" }
+   __emit_string_literal_range ("01234\u2174\u2175789",
+                                     ^~~~~~~~~~~~~~
+   { dg-end-multiline-output "" } */
+}
+
+void test_ucn8 (void)
+{
+  /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.  */
+  __emit_string_literal_range ("01234\U00002174\U00002175789", /* { dg-warning "range" } */
+			       4, 7);
+/* { dg-begin-multiline-output "" }
+   __emit_string_literal_range ("01234\U00002174\U00002175789",
+                                     ^~~~~~~~~~~~~~~~~~~~~~
+   { dg-end-multiline-output "" } */
+}
+
+void test_u8 (void)
+{
+  /* Digits 0-9.  */
+  __emit_string_literal_range (u8"0123456789", /* { dg-warning "range" } */
+			       4, 7);
+/* { dg-begin-multiline-output "" }
+   __emit_string_literal_range (u8"0123456789",
+                                       ^~~~
+   { dg-end-multiline-output "" } */
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/diagnostic_plugin_test_string_literals.c b/gcc/testsuite/gcc.dg/plugin/diagnostic_plugin_test_string_literals.c
new file mode 100644
index 0000000..c6b591e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/diagnostic_plugin_test_string_literals.c
@@ -0,0 +1,215 @@ 
+/* This plugin uses the diagnostics code to verify tracking of source code
+   locations within string literals.  */
+/* { dg-options "-O" } */
+
+#include "gcc-plugin.h"
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "toplev.h"
+#include "basic-block.h"
+#include "hash-table.h"
+#include "vec.h"
+#include "ggc.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "tree.h"
+#include "tree-pass.h"
+#include "intl.h"
+#include "plugin-version.h"
+#include "diagnostic.h"
+#include "context.h"
+#include "gcc-rich-location.h"
+#include "print-tree.h"
+#include "cpplib.h"
+
+/* FIXME. hacking in a copy of this for now to get around linker issues.  */
+
+source_range
+cpp_string_location::get_range_between_indices (unsigned int start_idx,
+						unsigned int finish_idx) const
+{
+  /* This could be optimized if necessary.  */
+  source_range result;
+  result.m_start = get_loc_at_index (start_idx);
+  result.m_finish = get_range_at_index (finish_idx).m_finish;
+  return result;
+}
+
+int plugin_is_GPL_compatible;
+
+const pass_data pass_data_test_string_literals =
+{
+  GIMPLE_PASS, /* type */
+  "test_string_literals", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  PROP_ssa, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_test_string_literals : public gimple_opt_pass
+{
+public:
+  pass_test_string_literals(gcc::context *ctxt)
+    : gimple_opt_pass(pass_data_test_string_literals, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate (function *) { return true; }
+  virtual unsigned int execute (function *);
+
+}; // class pass_test_string_literals
+
+/* FIXME.  */
+
+static gcall *
+check_for_named_call (gimple stmt,
+		      const char *funcname, unsigned int num_args)
+{
+  gcc_assert (funcname);
+
+  gcall *call = dyn_cast <gcall *> (stmt);
+  if (!call)
+    return NULL;
+
+  tree fndecl = gimple_call_fndecl (call);
+  if (!fndecl)
+    return NULL;
+
+  if (strcmp (IDENTIFIER_POINTER (DECL_NAME (fndecl)), funcname))
+    return NULL;
+
+  if (gimple_call_num_args (call) != num_args)
+    {
+      error_at (stmt->location, "expected number of args: %i (got %i)",
+		num_args, gimple_call_num_args (call));
+      return NULL;
+    }
+
+  return call;
+}
+
+static void
+emit_warning (source_range src_range)
+{
+  rich_location richloc (src_range);
+  location_range *range = richloc.get_range (0);
+  warning_at_rich_loc (&richloc, 0,
+		       "range %i:%i-%i:%i",
+		       range->m_start.line,
+		       range->m_start.column,
+		       range->m_finish.line,
+		       range->m_finish.column);
+}
+
+/* Support code for verifying that we are correctly tracking ranges
+   within string literals, for use by diagnostic-test-string-literals-*.c.  */
+
+static void
+test_string_literals (gimple stmt)
+{
+  gcall *call = check_for_named_call (stmt, "__emit_string_literal_range", 3);
+  if (!call)
+    return;
+
+#if 0
+  for (int i = 0; i < 3; i++)
+    warning_at (EXPR_LOCATION (gimple_call_arg (call, i)), 0, "arg %i", i);
+#endif
+
+  /* We expect an ADDR_EXPR with a STRING_CST inside it for the
+     initial arg.  */
+  tree t_addr_string = gimple_call_arg (call, 0);
+  if (TREE_CODE (t_addr_string) != ADDR_EXPR)
+    {
+      error_at (call->location, "string literal required for arg 1");
+      return;
+    }
+
+  tree t_string = TREE_OPERAND (t_addr_string, 0);
+  if (TREE_CODE (t_string) != STRING_CST)
+    {
+      error_at (call->location, "string literal required for arg 1");
+      return;
+    }
+
+  tree t_start_idx = gimple_call_arg (call, 1);
+  if (TREE_CODE (t_start_idx) != INTEGER_CST)
+    {
+      error_at (call->location, "integer constant required for arg 2");
+      return;
+    }
+  int start_idx = TREE_INT_CST_LOW (t_start_idx);
+
+  tree t_end_idx = gimple_call_arg (call, 2);
+  if (TREE_CODE (t_end_idx) != INTEGER_CST)
+    {
+      error_at (call->location, "integer constant required for arg 3");
+      return;
+    }
+  int end_idx = TREE_INT_CST_LOW (t_end_idx);
+
+  cpp_string_location *strloc = TREE_STRING_LOCATION (t_string);
+  gcc_assert (strloc);
+  source_range src_range
+    = strloc->get_range_between_indices (start_idx, end_idx);
+  emit_warning (src_range);
+}
+
+unsigned int
+pass_test_string_literals::execute (function *fun)
+{
+  gimple_stmt_iterator gsi;
+  basic_block bb;
+
+  FOR_EACH_BB_FN (bb, fun)
+    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+      {
+	gimple stmt = gsi_stmt (gsi);
+	test_string_literals (stmt);
+      }
+
+  return 0;
+}
+
+static gimple_opt_pass *
+make_pass_test_string_literals (gcc::context *ctxt)
+{
+  return new pass_test_string_literals (ctxt);
+}
+
+int
+plugin_init (struct plugin_name_args *plugin_info,
+	     struct plugin_gcc_version *version)
+{
+  struct register_pass_info pass_info;
+  const char *plugin_name = plugin_info->base_name;
+  int argc = plugin_info->argc;
+  struct plugin_argument *argv = plugin_info->argv;
+
+  if (!plugin_default_version_check (version, &gcc_version))
+    return 1;
+
+  pass_info.pass = make_pass_test_string_literals (g);
+  pass_info.reference_pass_name = "ssa";
+  pass_info.ref_pass_instance_number = 1;
+  pass_info.pos_op = PASS_POS_INSERT_AFTER;
+  register_callback (plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL,
+		     &pass_info);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp b/gcc/testsuite/gcc.dg/plugin/plugin.exp
index 91f6391..97d7a41 100644
--- a/gcc/testsuite/gcc.dg/plugin/plugin.exp
+++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp
@@ -72,6 +72,8 @@  set plugin_test_list [list \
 	  diagnostic-test-expressions-1.c } \
     { diagnostic_plugin_show_trees.c \
 	  diagnostic-test-show-trees-1.c } \
+    { diagnostic_plugin_test_string_literals.c \
+	  diagnostic-test-string-literals-1.c } \
 ]
 
 foreach plugin_test $plugin_test_list {
diff --git a/gcc/tree-core.h b/gcc/tree-core.h
index 6931ad9..7cda82f 100644
--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -1166,9 +1166,12 @@  struct GTY(()) tree_fixed_cst {
   struct fixed_value * fixed_cst_ptr;
 };
 
+struct cpp_string_location;
+
 struct GTY(()) tree_string {
   struct tree_typed typed;
   int length;
+  cpp_string_location *loc;
   char str[1];
 };
 
diff --git a/gcc/tree.c b/gcc/tree.c
index d1595c2..81d1cbd 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -75,6 +75,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "print-tree.h"
 #include "ipa-utils.h"
+#include "cpplib.h"
 
 /* Tree code classes.  */
 
@@ -1931,12 +1932,35 @@  build_string (int len, const char *str)
   TREE_SET_CODE (s, STRING_CST);
   TREE_CONSTANT (s) = 1;
   TREE_STRING_LENGTH (s) = len;
+  TREE_STRING_LOCATION (s) = NULL;
   memcpy (s->string.str, str, len);
   s->string.str[len] = '\0';
 
   return s;
 }
 
+/* As above, but with per-character location information.  */
+
+struct cpp_string_location_stats cpp_string_location_stats;
+
+tree
+build_string (int len, const char *str, cpp_string_location *strloc)
+{
+  tree s = build_string (len, str);
+
+  /* Need to allocate a copy:  */
+  TREE_STRING_LOCATION (s) = ggc_alloc <cpp_string_location> ();
+  *TREE_STRING_LOCATION (s) = *strloc;
+
+  /* Maintain stats on string locations.  */
+  cpp_string_location_stats.count_all++;
+  if (strloc->trivial_p ())
+    cpp_string_location_stats.count_trivial++;
+
+  return s;
+}
+
+
 /* Return a newly constructed COMPLEX_CST node whose value is
    specified by the real and imaginary parts REAL and IMAG.
    Both REAL and IMAG should be constant nodes.  TYPE, if specified,
diff --git a/gcc/tree.h b/gcc/tree.h
index 66419d4..995937c 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -937,9 +937,20 @@  extern void omp_clause_range_check_failed (const_tree, const char *, int,
 /* In a STRING_CST */
 /* In C terms, this is sizeof, not strlen.  */
 #define TREE_STRING_LENGTH(NODE) (STRING_CST_CHECK (NODE)->string.length)
+#define TREE_STRING_LOCATION(NODE) (STRING_CST_CHECK (NODE)->string.loc)
 #define TREE_STRING_POINTER(NODE) \
   ((const char *)(STRING_CST_CHECK (NODE)->string.str))
 
+extern struct cpp_string_location_stats
+{
+  /* How many have been used to construct STRING_CST.  */
+  int count_all;
+
+  /* How many of these consisted of a single run of 1-byte-per-char
+     bytes.  */
+  int count_trivial;
+} cpp_string_location_stats;
+
 /* In a COMPLEX_CST node.  */
 #define TREE_REALPART(NODE) (COMPLEX_CST_CHECK (NODE)->complex.real)
 #define TREE_IMAGPART(NODE) (COMPLEX_CST_CHECK (NODE)->complex.imag)
@@ -3791,6 +3802,7 @@  extern tree build_minus_one_cst (tree);
 extern tree build_all_ones_cst (tree);
 extern tree build_zero_cst (tree);
 extern tree build_string (int, const char *);
+extern tree build_string (int, const char *, cpp_string_location *);
 extern tree build_tree_list_stat (tree, tree MEM_STAT_DECL);
 #define build_tree_list(t, q) build_tree_list_stat (t, q MEM_STAT_INFO)
 extern tree build_tree_list_vec_stat (const vec<tree, va_gc> *MEM_STAT_DECL);
diff --git a/libcpp/charset.c b/libcpp/charset.c
index 3ae7916..f78cdf6 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -2028,6 +2028,18 @@  cpp_string_location::get_range_at_index (unsigned int char_idx) const
   return err;
 }
 
+/* FIXME. FINISH_IDX is within the range.  */
+source_range
+cpp_string_location::get_range_between_indices (unsigned int start_idx,
+						unsigned int finish_idx) const
+{
+  /* This could be optimized if necessary.  */
+  source_range result;
+  result.m_start = get_loc_at_index (start_idx);
+  result.m_finish = get_range_at_index (finish_idx).m_finish;
+  return result;
+}
+
 /* FIXME.  */
 bool
 cpp_string_location::trivial_p () const
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index a5e5df5..6023812 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -256,6 +256,8 @@  struct GTY(()) cpp_string_location {
 
   source_location get_loc_at_index (unsigned int idx) const;
   source_range get_range_at_index (unsigned int idx) const;
+  source_range get_range_between_indices (unsigned int start_idx,
+					  unsigned int finish_idx) const;
 
   void debug () const;