@@ -1392,6 +1392,7 @@ OBJS = \
shrink-wrap.o \
simplify-rtx.o \
sparseset.o \
+ spellcheck.o \
sreal.o \
stack-ptr-mod.o \
statistics.o \
@@ -972,6 +972,7 @@ extern tree finish_label_address_expr (tree, location_t);
different implementations. Used in c-common.c. */
extern tree lookup_label (tree);
extern tree lookup_name (tree);
+extern tree lookup_name_fuzzy (tree);
extern bool lvalue_p (const_tree);
extern bool vector_targets_convertible_p (const_tree t1, const_tree t2);
@@ -64,6 +64,7 @@ along with GCC; see the file COPYING3. If not see
#include "c-family/c-ada-spec.h"
#include "cilk.h"
#include "builtins.h"
+#include "spellcheck.h"
/* In grokdeclarator, distinguish syntactic contexts of declarators. */
enum decl_context
@@ -3900,6 +3901,42 @@ lookup_name_in_scope (tree name, struct c_scope *scope)
return b->decl;
return 0;
}
+
+/* Look for the closest match for NAME within the currently valid
+ scopes.
+
+ This finds the identifier with the lowest Levenshtein distance to
+ NAME. If there are multiple candidates with equal minimal distance,
+ the first one found is returned. Scopes are searched from innermost
+ outwards, and within a scope in reverse order of declaration, thus
+ benefiting candidates "near" to the current scope. */
+
+tree
+lookup_name_fuzzy (tree name)
+{
+ gcc_assert (TREE_CODE (name) == IDENTIFIER_NODE);
+
+ c_binding *best_binding = NULL;
+ int best_distance = INT_MAX;
+
+ for (c_scope *scope = current_scope; scope; scope = scope->outer)
+ for (c_binding *binding = scope->bindings; binding; binding = binding->prev)
+ {
+ if (!binding->id)
+ continue;
+ int dist = levenshtein_distance (name, binding->id);
+ if (dist < best_distance)
+ {
+ best_distance = dist;
+ best_binding = binding;
+ }
+ }
+ if (best_binding)
+ return best_binding->id;
+ else
+ return NULL;
+}
+
/* Create the predefined scalar types of C,
and some nodes representing standard constants (0, 1, (void *) 0).
@@ -66,6 +66,7 @@ along with GCC; see the file COPYING3. If not see
#include "builtins.h"
#include "gomp-constants.h"
#include "c-family/c-indentation.h"
+#include "spellcheck.h"
/* Initialization routine for this file. */
@@ -1543,8 +1544,14 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok,
|| c_parser_peek_2nd_token (parser)->type == CPP_MULT)
&& (!nested || !lookup_name (c_parser_peek_token (parser)->value)))
{
- error_at (here, "unknown type name %qE",
- c_parser_peek_token (parser)->value);
+ tree hint = lookup_name_fuzzy (c_parser_peek_token (parser)->value);
+ if (hint)
+ error_at (here, "unknown type name %qE; did you mean %qE?",
+ c_parser_peek_token (parser)->value,
+ hint);
+ else
+ error_at (here, "unknown type name %qE",
+ c_parser_peek_token (parser)->value);
/* Parse declspecs normally to get a correct pointer type, but avoid
a further "fails to be a type name" error. Refuse nested functions
@@ -7425,7 +7432,8 @@ c_parser_postfix_expression (c_parser *parser)
if (c_parser_next_token_is (parser, CPP_NAME))
{
offsetof_ref = build_component_ref
- (loc, offsetof_ref, c_parser_peek_token (parser)->value);
+ (loc, offsetof_ref, c_parser_peek_token (parser)->value,
+ &c_parser_peek_token (parser)->range);
c_parser_consume_token (parser);
while (c_parser_next_token_is (parser, CPP_DOT)
|| c_parser_next_token_is (parser,
@@ -7453,7 +7461,8 @@ c_parser_postfix_expression (c_parser *parser)
}
offsetof_ref = build_component_ref
(loc, offsetof_ref,
- c_parser_peek_token (parser)->value);
+ c_parser_peek_token (parser)->value,
+ &c_parser_peek_token (parser)->range);
c_parser_consume_token (parser);
}
else
@@ -7913,6 +7922,7 @@ c_parser_postfix_expression_after_primary (c_parser *parser,
vec<location_t> arg_loc = vNULL;
location_t start;
location_t finish;
+ source_range ident_range;
while (true)
{
@@ -8031,10 +8041,12 @@ c_parser_postfix_expression_after_primary (c_parser *parser,
expr.original_type = NULL;
return expr;
}
+ ident_range = c_parser_peek_token (parser)->range;
start = EXPR_LOCATION_RANGE (expr.value).m_start;
- finish = c_parser_peek_token (parser)->range.m_finish;
+ finish = ident_range.m_finish;
c_parser_consume_token (parser);
- expr.value = build_component_ref (op_loc, expr.value, ident);
+ expr.value = build_component_ref (op_loc, expr.value, ident,
+ &ident_range);
set_source_range (&expr.value, start, finish);
expr.original_code = ERROR_MARK;
if (TREE_CODE (expr.value) != COMPONENT_REF)
@@ -8063,14 +8075,16 @@ c_parser_postfix_expression_after_primary (c_parser *parser,
expr.original_type = NULL;
return expr;
}
+ ident_range = c_parser_peek_token (parser)->range;
start = EXPR_LOCATION_RANGE (expr.value).m_start;
- finish = c_parser_peek_token (parser)->range.m_finish;
+ finish = ident_range.m_finish;
c_parser_consume_token (parser);
expr.value = build_component_ref (op_loc,
build_indirect_ref (op_loc,
expr.value,
RO_ARROW),
- ident);
+ ident,
+ &ident_range);
set_source_range (&expr.value, start, finish);
expr.original_code = ERROR_MARK;
if (TREE_CODE (expr.value) != COMPONENT_REF)
@@ -593,7 +593,7 @@ extern struct c_expr convert_lvalue_to_rvalue (location_t, struct c_expr,
bool, bool);
extern void mark_exp_read (tree);
extern tree composite_type (tree, tree);
-extern tree build_component_ref (location_t, tree, tree);
+extern tree build_component_ref (location_t, tree, tree, source_range *);
extern tree build_array_ref (location_t, tree, tree);
extern tree build_external_ref (source_range, tree, int, tree *);
extern void pop_maybe_used (bool);
@@ -54,6 +54,8 @@ along with GCC; see the file COPYING3. If not see
#include "c-family/c-ubsan.h"
#include "cilk.h"
#include "gomp-constants.h"
+#include "gcc-rich-location.h"
+#include "spellcheck.h"
/* Possible cases of implicit bad conversions. Used to select
diagnostic messages in convert_for_assignment. */
@@ -2259,12 +2261,72 @@ lookup_field (tree type, tree component)
return tree_cons (NULL_TREE, field, NULL_TREE);
}
+/* Recursively append candidate IDENTIFIER_NODEs to CANDIDATES. */
+
+static void
+lookup_field_fuzzy_find_candidates (tree type, tree component,
+ vec<tree> *candidates)
+{
+ tree field;
+ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+ {
+ if (DECL_NAME (field) == NULL_TREE
+ && (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE
+ || TREE_CODE (TREE_TYPE (field)) == UNION_TYPE))
+ {
+ lookup_field_fuzzy_find_candidates (TREE_TYPE (field),
+ component,
+ candidates);
+ }
+
+ if (DECL_NAME (field))
+ candidates->safe_push (field);
+ }
+}
+
+/* Like "lookup_field", but find the closest match, rather than
+ necessarily an exact match. */
+
+static tree
+lookup_field_fuzzy (tree type, tree component)
+{
+ gcc_assert (TREE_CODE (component) == IDENTIFIER_NODE);
+
+ /* FIXME: move this to a unittest suite. */
+ levenshtein_distance_unit_tests ();
+
+ /* First, gather a list of candidates. */
+ auto_vec <tree> candidates;
+
+ lookup_field_fuzzy_find_candidates (type, component,
+ &candidates);
+
+ /* Now determine which is closest. */
+ int i;
+ tree field;
+ tree best_field = NULL;
+ int best_distance = INT_MAX;
+ FOR_EACH_VEC_ELT (candidates, i, field)
+ {
+ int dist = levenshtein_distance (component, DECL_NAME (field));
+ if (dist < best_distance)
+ {
+ best_distance = dist;
+ best_field = field;
+ }
+ }
+
+ return best_field;
+}
+
/* Make an expression to refer to the COMPONENT field of structure or
union value DATUM. COMPONENT is an IDENTIFIER_NODE. LOC is the
- location of the COMPONENT_REF. */
+ location of the COMPONENT_REF. IDENT_RANGE points to the source range of
+ the identifier, or is NULL. */
tree
-build_component_ref (location_t loc, tree datum, tree component)
+build_component_ref (location_t loc, tree datum, tree component,
+ source_range *ident_range)
{
tree type = TREE_TYPE (datum);
enum tree_code code = TREE_CODE (type);
@@ -2294,7 +2356,31 @@ build_component_ref (location_t loc, tree datum, tree component)
if (!field)
{
- error_at (loc, "%qT has no member named %qE", type, component);
+ if (!ident_range)
+ {
+ error_at (loc, "%qT has no member named %qE",
+ type, component);
+ return error_mark_node;
+ }
+ gcc_rich_location richloc (*ident_range);
+ if (TREE_CODE (datum) == INDIRECT_REF)
+ richloc.add_expr (TREE_OPERAND (datum, 0));
+ else
+ richloc.add_expr (datum);
+ field = lookup_field_fuzzy (type, component);
+ if (field)
+ {
+ error_at_rich_loc
+ (&richloc,
+ "%qT has no member named %qE; did you mean %qE?",
+ type, component, field);
+ /* FIXME: error recovery: should we try to keep going,
+ with "field"? (having issued an error, and hence no
+ output). */
+ }
+ else
+ error_at_rich_loc (&richloc, "%qT has no member named %qE",
+ type, component);
return error_mark_node;
}
@@ -2665,7 +2665,8 @@ objc_build_component_ref (tree datum, tree component)
return finish_class_member_access_expr (datum, component, false,
tf_warning_or_error);
#else
- return build_component_ref (input_location, datum, component);
+ return build_component_ref (input_location, datum, component,
+ NULL);
#endif
}
new file mode 100644
@@ -0,0 +1,126 @@
+/* Find near-matches for strings and identifiers.
+ Copyright (C) 2015 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "spellcheck.h"
+
+/* The Levenshtein distance is an "edit-distance": the minimal
+ number of one-character insertions, removals or substitutions
+ that are needed to change one string into another.
+
+ This implementation uses the Wagner-Fischer algorithm.
+
+ It is currently a naive implementation, various optimizations are
+ possible (e.g. removal of the M*N allocation in favor of a pair
+ of allocations of M and N). */
+
+static int
+levenshtein_distance (const char *s, int m,
+ const char *t, int n)
+{
+ const bool debug = false;
+
+ int *d = new int [(m + 1) * (n + 1)];
+ if (debug)
+ memset (d, 0xab, (m + 1) * (n + 1) * sizeof (int));
+
+#define D(I, J) (d[((I) * (n + 1)) + (J)])
+
+ for (int i = 0; i <= m; i++)
+ D(i, 0) = i;
+ for (int j = 0; j <= n; j++)
+ D(0, j) = j;
+
+ for (int j = 1; j <= n; j++)
+ for (int i = 1; i <= m; i++)
+ if (s[i - 1] == t[j - 1])
+ D(i, j) = D(i - 1, j - 1);
+ else
+ {
+ int deletion = D(i - 1, j ) + 1;
+ int insertion = D(i, j - 1) + 1;
+ int substitution = D(i - 1, j - 1) + 1;
+ int cheapest = MIN (deletion, insertion);
+ cheapest = MIN (cheapest, substitution);
+ D(i, j) = cheapest;
+ }
+
+ if (debug)
+ {
+ printf ("s=\"%s\" t=\"%s\"\n", s, t);
+ for (int j = 0; j <= n; j++)
+ {
+ for (int i = 0; i <= m; i++)
+ printf ("%i ", D(i, j));
+ printf ("\n");
+ }
+ }
+
+ int result = D(m, n);
+#undef D
+
+ delete[] d;
+
+ return result;
+}
+
+/* Calculate Levenshtein distance between two nil-terminated strings.
+ This exists purely for the unit tests. */
+
+int
+levenshtein_distance (const char *s, const char *t)
+{
+ return levenshtein_distance (s, strlen (s), t, strlen (t));
+}
+
+/* Unit tests for levenshtein_distance. */
+
+void
+levenshtein_distance_unit_tests (void)
+{
+ gcc_assert (levenshtein_distance ("", "nonempty") == strlen ("nonempty"));
+ gcc_assert (levenshtein_distance ("nonempty", "") == strlen ("nonempty"));
+ gcc_assert (levenshtein_distance ("saturday", "sunday") == 3);
+ gcc_assert (levenshtein_distance ("sunday", "saturday") == 3);
+ gcc_assert (levenshtein_distance ("foo", "m_foo") == 2);
+ gcc_assert (levenshtein_distance ("m_foo", "foo") == 2);
+ gcc_assert (levenshtein_distance ("hello_world", "HelloWorld") == 3);
+ gcc_assert (levenshtein_distance
+ ("the quick brown fox jumps over the lazy dog", "dog") == 40);
+ gcc_assert (levenshtein_distance
+ ("the quick brown fox jumps over the lazy dog", "fox") == 40);
+}
+
+/* Calculate Levenshtein distance between two identifiers. */
+
+int
+levenshtein_distance (tree ident_s, tree ident_t)
+{
+ gcc_assert (TREE_CODE (ident_s) == IDENTIFIER_NODE);
+ gcc_assert (TREE_CODE (ident_t) == IDENTIFIER_NODE);
+
+ return levenshtein_distance (IDENTIFIER_POINTER (ident_s),
+ IDENTIFIER_LENGTH (ident_s),
+ IDENTIFIER_POINTER (ident_t),
+ IDENTIFIER_LENGTH (ident_t));
+}
new file mode 100644
@@ -0,0 +1,32 @@
+/* Find near-matches for strings and identifiers.
+ Copyright (C) 2015 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef GCC_SPELLCHECK_H
+#define GCC_SPELLCHECK_H
+
+extern void
+levenshtein_distance_unit_tests (void);
+
+extern int
+levenshtein_distance (const char *s, const char *t);
+
+extern int
+levenshtein_distance (tree ident_s, tree ident_t);
+
+#endif /* GCC_SPELLCHECK_H */
new file mode 100644
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-fdiagnostics-show-caret" } */
+
+struct foo
+{
+ int foo;
+ int bar;
+ int baz;
+};
+
+int test (struct foo *ptr)
+{
+ return ptr->m_bar; /* { dg-error "'struct foo' has no member named 'm_bar'; did you mean 'bar'?" } */
+
+/* { dg-begin-multiline-output "" }
+ return ptr->m_bar;
+ ~~~ ^~~~~
+ { dg-end-multiline-output "" } */
+}
+
+int test2 (void)
+{
+ struct foo instance = {};
+ return instance.m_bar; /* { dg-error "'struct foo' has no member named 'm_bar'; did you mean 'bar'?" } */
+
+/* { dg-begin-multiline-output "" }
+ return instance.m_bar;
+ ~~~~~~~~ ^~~~~
+ { dg-end-multiline-output "" } */
+}
+
+int64 foo; /* { dg-error "unknown type name 'int64'; did you mean 'int'?" } */
+/* { dg-begin-multiline-output "" }
+ int64 foo;
+ ^~~~~
+ { dg-end-multiline-output "" } */