@@ -1076,6 +1076,19 @@ interpret_fixed (const cpp_token *token, unsigned int flags)
return value;
}
+/* FIXME. */
+
+static void
+ensure_string_has_location (const cpp_string *str, source_location src_loc)
+{
+ if (!str->loc.m_fragloc_array)
+ {
+ cpp_string *mutstr = const_cast <cpp_string *> (str);
+ cpp_string_location *strloc = &mutstr->loc;
+ strloc->init_raw (src_loc, mutstr->len, 1, line_table);
+ }
+}
+
/* Convert a series of STRING, WSTRING, STRING16, STRING32 and/or
UTF8STRING tokens into a tree, performing string constant
concatenation. TOK is the first of these. VALP is the location to
@@ -1107,7 +1120,9 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
/* Try to avoid the overhead of creating and destroying an obstack
for the common case of just one string. */
cpp_string str = tok->val.str;
+ ensure_string_has_location (&str, tok->src_loc);
cpp_string *strs = &str;
+ location_t str0_loc = tok->src_loc;
/* objc_at_sign_was_seen is only used when doing Objective-C string
concatenation. It is 'true' if we have seen an '@' before the
@@ -1146,16 +1161,20 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
else
error ("unsupported non-standard concatenation of string literals");
}
+ /* FALLTHROUGH */
case CPP_STRING:
if (!concats)
{
gcc_obstack_init (&str_ob);
+ ensure_string_has_location (&str, str0_loc);
obstack_grow (&str_ob, &str, sizeof (cpp_string));
}
concats++;
+ ensure_string_has_location (&tok->val.str, tok->src_loc);
obstack_grow (&str_ob, &tok->val.str, sizeof (cpp_string));
+
if (objc_string)
objc_at_sign_was_seen = false;
goto retry;
@@ -1178,7 +1197,7 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
? cpp_interpret_string : cpp_interpret_string_notranslate)
(parse_in, strs, concats + 1, &istr, type))
{
- value = build_string (istr.len, (const char *) istr.text);
+ value = build_string (istr.len, (const char *) istr.text, &istr.loc);
free (CONST_CAST (unsigned char *, istr.text));
}
else
@@ -1245,6 +1264,8 @@ lex_charconst (const cpp_token *token)
unsigned int chars_seen;
int unsignedp = 0;
+ ensure_string_has_location (&token->val.str, token->src_loc);
+
result = cpp_interpret_charconst (parse_in, token,
&chars_seen, &unsignedp);
@@ -3716,6 +3716,7 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
str.text = (const unsigned char *)TREE_STRING_POINTER (string_tree);
str.len = TREE_STRING_LENGTH (string_tree);
+ str.loc.init_raw (tok->location, str.len, 1, line_table);
count = 1;
if (curr_tok_is_userdef_p)
@@ -3742,6 +3743,7 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
count++;
str.text = (const unsigned char *)TREE_STRING_POINTER (string_tree);
str.len = TREE_STRING_LENGTH (string_tree);
+ str.loc.init_raw (tok->location, str.len, 1, line_table);
if (curr_tok_is_userdef_p)
{
@@ -3810,7 +3812,7 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok,
if ((translate ? cpp_interpret_string : cpp_interpret_string_notranslate)
(parse_in, strs, count, &istr, type))
{
- value = build_string (istr.len, (const char *)istr.text);
+ value = build_string (istr.len, (const char *)istr.text, &istr.loc);
free (CONST_CAST (unsigned char *, istr.text));
switch (type)
new file mode 100644
@@ -0,0 +1,139 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdiagnostics-show-caret" } */
+
+/* This is a collection of unittests for ranges within string literals,
+ using diagnostic_plugin_test_string_literals, which handles
+ "__emit_string_literal_range" by generating a warning at the given
+ subset of a string literal.
+
+ The indices are 0-based. It's easiest to verify things using string
+ literals that are runs of 0-based digits (to avoid having to count
+ characters). */
+
+extern void __emit_string_literal_range (const char *literal,
+ int start_idx, int end_idx);
+
+void test_simple_string_literal (void)
+{
+ __emit_string_literal_range ("0123456789", /* { dg-warning "range" } */
+ 6, 7);
+/* { dg-begin-multiline-output "" }
+ __emit_string_literal_range ("0123456789",
+ ^~
+ { dg-end-multiline-output "" } */
+}
+
+void test_concatenated_string_literal (void)
+{
+ __emit_string_literal_range ("01234" "56789", /* { dg-warning "range" } */
+ 3, 6);
+/* { dg-begin-multiline-output "" }
+ __emit_string_literal_range ("01234" "56789",
+ ^~~~~~~
+ { dg-end-multiline-output "" } */
+}
+
+void test_multiline_string_literal (void)
+{
+ __emit_string_literal_range ("01234" /* { dg-warning "range" } */
+ "56789",
+ 3, 6);
+/* { dg-begin-multiline-output "" }
+ __emit_string_literal_range ("01234"
+ ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ "56789",
+ ~~~
+ { dg-end-multiline-output "" } */
+ /* FIXME: why does the above need two trailing spaces? */
+}
+
+/* Tests of various unicode encodings.
+
+ Digits 0 through 9 are unicode code points:
+ U+0030 DIGIT ZERO
+ ...
+ U+0039 DIGIT NINE
+ However, these are not always valid as UCN (see the comment in
+ libcpp/charset.c:_cpp_valid_ucn).
+
+ Hence we need to test UCN using an alternative unicode
+ representation of numbers; let's use Roman numerals,
+ (though these start at one, not zero):
+ U+2170 SMALL ROMAN NUMERAL ONE
+ ...
+ U+2174 SMALL ROMAN NUMERAL FIVE ("v")
+ U+2175 SMALL ROMAN NUMERAL SIX ("vi")
+ ...
+ U+2178 SMALL ROMAN NUMERAL NINE. */
+
+void test_hex (void)
+{
+ /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
+ and with a space in place of digit 6, to terminate the escaped
+ hex code. */
+ __emit_string_literal_range ("01234\x35 789", /* { dg-warning "range" } */
+ 3, 7);
+/* { dg-begin-multiline-output "" }
+ __emit_string_literal_range ("01234\x35 789"
+ ^~~~~~~~
+ { dg-end-multiline-output "" } */
+}
+
+void test_oct (void)
+{
+ /* Digits 0-9, expressing digit 5 in ASCII as "\065"
+ and with a space in place of digit 6, to terminate the escaped
+ octal code. */
+ __emit_string_literal_range ("01234\065 789", /* { dg-warning "range" } */
+ 3, 7);
+/* { dg-begin-multiline-output "" }
+ __emit_string_literal_range ("01234\065 789"
+ ^~~~~~~~
+ { dg-end-multiline-output "" } */
+}
+
+void test_multiple (void)
+{
+ /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
+ digit 6 in ASCII as octal "\066", concatenating multiple strings. */
+ __emit_string_literal_range ("01234" "\x35" "\066" "789", /* { dg-warning "range" } */
+ 3, 8);
+/* { dg-begin-multiline-output "" }
+ __emit_string_literal_range ("01234" "\x35" "\066" "789",
+ ^~~~~~~~~~~~~~~~~~~~~~~~
+ { dg-end-multiline-output "" } */
+}
+
+void test_ucn4 (void)
+{
+ /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
+ as UCN 4. */
+ __emit_string_literal_range ("01234\u2174\u2175789", /* { dg-warning "range" } */
+ 4, 7);
+/* { dg-begin-multiline-output "" }
+ __emit_string_literal_range ("01234\u2174\u2175789",
+ ^~~~~~~~~~~~~~
+ { dg-end-multiline-output "" } */
+}
+
+void test_ucn8 (void)
+{
+ /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8. */
+ __emit_string_literal_range ("01234\U00002174\U00002175789", /* { dg-warning "range" } */
+ 4, 7);
+/* { dg-begin-multiline-output "" }
+ __emit_string_literal_range ("01234\U00002174\U00002175789",
+ ^~~~~~~~~~~~~~~~~~~~~~
+ { dg-end-multiline-output "" } */
+}
+
+void test_u8 (void)
+{
+ /* Digits 0-9. */
+ __emit_string_literal_range (u8"0123456789", /* { dg-warning "range" } */
+ 4, 7);
+/* { dg-begin-multiline-output "" }
+ __emit_string_literal_range (u8"0123456789",
+ ^~~~
+ { dg-end-multiline-output "" } */
+}
new file mode 100644
@@ -0,0 +1,215 @@
+/* This plugin uses the diagnostics code to verify tracking of source code
+ locations within string literals. */
+/* { dg-options "-O" } */
+
+#include "gcc-plugin.h"
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "toplev.h"
+#include "basic-block.h"
+#include "hash-table.h"
+#include "vec.h"
+#include "ggc.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "tree.h"
+#include "tree-pass.h"
+#include "intl.h"
+#include "plugin-version.h"
+#include "diagnostic.h"
+#include "context.h"
+#include "gcc-rich-location.h"
+#include "print-tree.h"
+#include "cpplib.h"
+
+/* FIXME. hacking in a copy of this for now to get around linker issues. */
+
+source_range
+cpp_string_location::get_range_between_indices (unsigned int start_idx,
+ unsigned int finish_idx) const
+{
+ /* This could be optimized if necessary. */
+ source_range result;
+ result.m_start = get_loc_at_index (start_idx);
+ result.m_finish = get_range_at_index (finish_idx).m_finish;
+ return result;
+}
+
+int plugin_is_GPL_compatible;
+
+const pass_data pass_data_test_string_literals =
+{
+ GIMPLE_PASS, /* type */
+ "test_string_literals", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_test_string_literals : public gimple_opt_pass
+{
+public:
+ pass_test_string_literals(gcc::context *ctxt)
+ : gimple_opt_pass(pass_data_test_string_literals, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ bool gate (function *) { return true; }
+ virtual unsigned int execute (function *);
+
+}; // class pass_test_string_literals
+
+/* FIXME. */
+
+static gcall *
+check_for_named_call (gimple stmt,
+ const char *funcname, unsigned int num_args)
+{
+ gcc_assert (funcname);
+
+ gcall *call = dyn_cast <gcall *> (stmt);
+ if (!call)
+ return NULL;
+
+ tree fndecl = gimple_call_fndecl (call);
+ if (!fndecl)
+ return NULL;
+
+ if (strcmp (IDENTIFIER_POINTER (DECL_NAME (fndecl)), funcname))
+ return NULL;
+
+ if (gimple_call_num_args (call) != num_args)
+ {
+ error_at (stmt->location, "expected number of args: %i (got %i)",
+ num_args, gimple_call_num_args (call));
+ return NULL;
+ }
+
+ return call;
+}
+
+static void
+emit_warning (source_range src_range)
+{
+ rich_location richloc (src_range);
+ location_range *range = richloc.get_range (0);
+ warning_at_rich_loc (&richloc, 0,
+ "range %i:%i-%i:%i",
+ range->m_start.line,
+ range->m_start.column,
+ range->m_finish.line,
+ range->m_finish.column);
+}
+
+/* Support code for verifying that we are correctly tracking ranges
+ within string literals, for use by diagnostic-test-string-literals-*.c. */
+
+static void
+test_string_literals (gimple stmt)
+{
+ gcall *call = check_for_named_call (stmt, "__emit_string_literal_range", 3);
+ if (!call)
+ return;
+
+#if 0
+ for (int i = 0; i < 3; i++)
+ warning_at (EXPR_LOCATION (gimple_call_arg (call, i)), 0, "arg %i", i);
+#endif
+
+ /* We expect an ADDR_EXPR with a STRING_CST inside it for the
+ initial arg. */
+ tree t_addr_string = gimple_call_arg (call, 0);
+ if (TREE_CODE (t_addr_string) != ADDR_EXPR)
+ {
+ error_at (call->location, "string literal required for arg 1");
+ return;
+ }
+
+ tree t_string = TREE_OPERAND (t_addr_string, 0);
+ if (TREE_CODE (t_string) != STRING_CST)
+ {
+ error_at (call->location, "string literal required for arg 1");
+ return;
+ }
+
+ tree t_start_idx = gimple_call_arg (call, 1);
+ if (TREE_CODE (t_start_idx) != INTEGER_CST)
+ {
+ error_at (call->location, "integer constant required for arg 2");
+ return;
+ }
+ int start_idx = TREE_INT_CST_LOW (t_start_idx);
+
+ tree t_end_idx = gimple_call_arg (call, 2);
+ if (TREE_CODE (t_end_idx) != INTEGER_CST)
+ {
+ error_at (call->location, "integer constant required for arg 3");
+ return;
+ }
+ int end_idx = TREE_INT_CST_LOW (t_end_idx);
+
+ cpp_string_location *strloc = TREE_STRING_LOCATION (t_string);
+ gcc_assert (strloc);
+ source_range src_range
+ = strloc->get_range_between_indices (start_idx, end_idx);
+ emit_warning (src_range);
+}
+
+unsigned int
+pass_test_string_literals::execute (function *fun)
+{
+ gimple_stmt_iterator gsi;
+ basic_block bb;
+
+ FOR_EACH_BB_FN (bb, fun)
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ test_string_literals (stmt);
+ }
+
+ return 0;
+}
+
+static gimple_opt_pass *
+make_pass_test_string_literals (gcc::context *ctxt)
+{
+ return new pass_test_string_literals (ctxt);
+}
+
+int
+plugin_init (struct plugin_name_args *plugin_info,
+ struct plugin_gcc_version *version)
+{
+ struct register_pass_info pass_info;
+ const char *plugin_name = plugin_info->base_name;
+ int argc = plugin_info->argc;
+ struct plugin_argument *argv = plugin_info->argv;
+
+ if (!plugin_default_version_check (version, &gcc_version))
+ return 1;
+
+ pass_info.pass = make_pass_test_string_literals (g);
+ pass_info.reference_pass_name = "ssa";
+ pass_info.ref_pass_instance_number = 1;
+ pass_info.pos_op = PASS_POS_INSERT_AFTER;
+ register_callback (plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL,
+ &pass_info);
+
+ return 0;
+}
@@ -72,6 +72,8 @@ set plugin_test_list [list \
diagnostic-test-expressions-1.c } \
{ diagnostic_plugin_show_trees.c \
diagnostic-test-show-trees-1.c } \
+ { diagnostic_plugin_test_string_literals.c \
+ diagnostic-test-string-literals-1.c } \
]
foreach plugin_test $plugin_test_list {
@@ -1166,9 +1166,12 @@ struct GTY(()) tree_fixed_cst {
struct fixed_value * fixed_cst_ptr;
};
+struct cpp_string_location;
+
struct GTY(()) tree_string {
struct tree_typed typed;
int length;
+ cpp_string_location *loc;
char str[1];
};
@@ -75,6 +75,7 @@ along with GCC; see the file COPYING3. If not see
#include "builtins.h"
#include "print-tree.h"
#include "ipa-utils.h"
+#include "cpplib.h"
/* Tree code classes. */
@@ -1931,12 +1932,35 @@ build_string (int len, const char *str)
TREE_SET_CODE (s, STRING_CST);
TREE_CONSTANT (s) = 1;
TREE_STRING_LENGTH (s) = len;
+ TREE_STRING_LOCATION (s) = NULL;
memcpy (s->string.str, str, len);
s->string.str[len] = '\0';
return s;
}
+/* As above, but with per-character location information. */
+
+struct cpp_string_location_stats cpp_string_location_stats;
+
+tree
+build_string (int len, const char *str, cpp_string_location *strloc)
+{
+ tree s = build_string (len, str);
+
+ /* Need to allocate a copy: */
+ TREE_STRING_LOCATION (s) = ggc_alloc <cpp_string_location> ();
+ *TREE_STRING_LOCATION (s) = *strloc;
+
+ /* Maintain stats on string locations. */
+ cpp_string_location_stats.count_all++;
+ if (strloc->trivial_p ())
+ cpp_string_location_stats.count_trivial++;
+
+ return s;
+}
+
+
/* Return a newly constructed COMPLEX_CST node whose value is
specified by the real and imaginary parts REAL and IMAG.
Both REAL and IMAG should be constant nodes. TYPE, if specified,
@@ -937,9 +937,20 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
/* In a STRING_CST */
/* In C terms, this is sizeof, not strlen. */
#define TREE_STRING_LENGTH(NODE) (STRING_CST_CHECK (NODE)->string.length)
+#define TREE_STRING_LOCATION(NODE) (STRING_CST_CHECK (NODE)->string.loc)
#define TREE_STRING_POINTER(NODE) \
((const char *)(STRING_CST_CHECK (NODE)->string.str))
+extern struct cpp_string_location_stats
+{
+ /* How many have been used to construct STRING_CST. */
+ int count_all;
+
+ /* How many of these consisted of a single run of 1-byte-per-char
+ bytes. */
+ int count_trivial;
+} cpp_string_location_stats;
+
/* In a COMPLEX_CST node. */
#define TREE_REALPART(NODE) (COMPLEX_CST_CHECK (NODE)->complex.real)
#define TREE_IMAGPART(NODE) (COMPLEX_CST_CHECK (NODE)->complex.imag)
@@ -3791,6 +3802,7 @@ extern tree build_minus_one_cst (tree);
extern tree build_all_ones_cst (tree);
extern tree build_zero_cst (tree);
extern tree build_string (int, const char *);
+extern tree build_string (int, const char *, cpp_string_location *);
extern tree build_tree_list_stat (tree, tree MEM_STAT_DECL);
#define build_tree_list(t, q) build_tree_list_stat (t, q MEM_STAT_INFO)
extern tree build_tree_list_vec_stat (const vec<tree, va_gc> *MEM_STAT_DECL);
@@ -2028,6 +2028,18 @@ cpp_string_location::get_range_at_index (unsigned int char_idx) const
return err;
}
+/* FIXME. FINISH_IDX is within the range. */
+source_range
+cpp_string_location::get_range_between_indices (unsigned int start_idx,
+ unsigned int finish_idx) const
+{
+ /* This could be optimized if necessary. */
+ source_range result;
+ result.m_start = get_loc_at_index (start_idx);
+ result.m_finish = get_range_at_index (finish_idx).m_finish;
+ return result;
+}
+
/* FIXME. */
bool
cpp_string_location::trivial_p () const
@@ -256,6 +256,8 @@ struct GTY(()) cpp_string_location {
source_location get_loc_at_index (unsigned int idx) const;
source_range get_range_at_index (unsigned int idx) const;
+ source_range get_range_between_indices (unsigned int start_idx,
+ unsigned int finish_idx) const;
void debug () const;