From 3d604c7ca75e6293be5a84546b7f34bee48d3d92 Mon Sep 17 00:00:00 2001
From: Bernhard Reutner-Fischer <rep.dot.nop@gmail.com>
Date: Tue, 1 Dec 2015 13:55:01 +0100
Subject: [PATCH] RFC: Use Levenshtein spelling suggestions in Fortran FE (v2)
gcc/fortran/ChangeLog
2015-11-29 Bernhard Reutner-Fischer <aldot@gcc.gnu.org>
David Malcolm <dmalcolm@redhat.com>
* gfortran.h (gfc_lookup_function_fuzzy): New declaration.
* interface.c (check_interface0): Call gfc_lookup_function_fuzzy
and use it to potentially suggest a hint for misspelled names.
* resolve.c: Include spellcheck.h.
(lookup_function_fuzzy_find_candidates): New static function.
(lookup_uop_fuzzy_find_candidates): Likewise.
(lookup_uop_fuzzy): Likewise.
(resolve_operator) <INTRINSIC_USER>: Call lookup_uop_fuzzy.
(gfc_lookup_function_fuzzy): New definition.
(resolve_unknown_f): Call gfc_lookup_function_fuzzy.
* symbol.c: Include spellcheck.h.
(lookup_symbol_fuzzy_find_candidates): New static function.
(lookup_symbol_fuzzy): Likewise.
(gfc_set_default_type): Call lookup_symbol_fuzzy.
(lookup_component_fuzzy_find_candidates): New static function.
(lookup_component_fuzzy): Likewise.
(gfc_find_component): Call lookup_component_fuzzy.
gcc/ChangeLog:
David Malcolm <dmalcolm@redhat.com>
* spellcheck.c (find_closest_string): New function.
* spellcheck.h (find_closest_string): New decl.
gcc/testsuite/ChangeLog
2015-11-29 Bernhard Reutner-Fischer <aldot@gcc.gnu.org>
* gfortran.dg/spellcheck-operator.f90: New testcase.
* gfortran.dg/spellcheck-procedure.f90: New testcase.
* gfortran.dg/spellcheck-structure.f90: New testcase.
---
gcc/fortran/gfortran.h | 1 +
gcc/fortran/interface.c | 16 +++-
gcc/fortran/resolve.c | 89 +++++++++++++++++++++-
gcc/fortran/symbol.c | 83 +++++++++++++++++++-
gcc/spellcheck.c | 43 +++++++++++
gcc/spellcheck.h | 4 +
gcc/testsuite/gfortran.dg/spellcheck-operator.f90 | 30 ++++++++
gcc/testsuite/gfortran.dg/spellcheck-procedure.f90 | 41 ++++++++++
gcc/testsuite/gfortran.dg/spellcheck-structure.f90 | 35 +++++++++
9 files changed, 331 insertions(+), 11 deletions(-)
create mode 100644 gcc/testsuite/gfortran.dg/spellcheck-operator.f90
create mode 100644 gcc/testsuite/gfortran.dg/spellcheck-procedure.f90
create mode 100644 gcc/testsuite/gfortran.dg/spellcheck-structure.f90
@@ -3085,6 +3085,7 @@ bool gfc_type_is_extensible (gfc_symbol *);
bool gfc_resolve_intrinsic (gfc_symbol *, locus *);
bool gfc_explicit_interface_required (gfc_symbol *, char *, int);
extern int gfc_do_concurrent_flag;
+const char* gfc_lookup_function_fuzzy (const char *, gfc_symtree *);
/* array.c */
@@ -1590,10 +1590,18 @@ check_interface0 (gfc_interface *p, const char *interface_name)
if (p->sym->attr.external)
gfc_error ("Procedure %qs in %s at %L has no explicit interface",
p->sym->name, interface_name, &p->sym->declared_at);
- else
- gfc_error ("Procedure %qs in %s at %L is neither function nor "
- "subroutine", p->sym->name, interface_name,
- &p->sym->declared_at);
+ else {
+ const char *guessed
+ = gfc_lookup_function_fuzzy (p->sym->name, p->sym->ns->sym_root);
+ if (guessed)
+ gfc_error ("Procedure %qs in %s at %L is neither function nor "
+ "subroutine; did you mean %qs?", p->sym->name,
+ interface_name, &p->sym->declared_at, guessed);
+ else
+ gfc_error ("Procedure %qs in %s at %L is neither function nor "
+ "subroutine", p->sym->name, interface_name,
+ &p->sym->declared_at);
+ }
return 1;
}
@@ -29,6 +29,7 @@ along with GCC; see the file COPYING3. If not see
#include "data.h"
#include "target-memory.h" /* for gfc_simplify_transfer */
#include "constructor.h"
+#include "spellcheck.h"
/* Types used in equivalence statements. */
@@ -2682,6 +2683,38 @@ resolve_specific_f (gfc_expr *expr)
return true;
}
+/* Recursively append candidate SYM to CANDIDATES. */
+
+static void
+lookup_function_fuzzy_find_candidates (gfc_symtree *sym,
+ vec<const char *> *candidates)
+{
+ gfc_symtree *p;
+ for (p = sym->right; p; p = p->right)
+ {
+ lookup_function_fuzzy_find_candidates (p, candidates);
+ if (p->n.sym->ts.type != BT_UNKNOWN)
+ candidates->safe_push (p->name);
+ }
+ for (p = sym->left; p; p = p->left)
+ {
+ lookup_function_fuzzy_find_candidates (p, candidates);
+ if (p->n.sym->ts.type != BT_UNKNOWN)
+ candidates->safe_push (p->name);
+ }
+}
+
+
+/* Lookup function FN fuzzily, taking names in FUN into account. */
+
+const char*
+gfc_lookup_function_fuzzy (const char *fn, gfc_symtree *fun)
+{
+ auto_vec <const char *> candidates;
+ lookup_function_fuzzy_find_candidates (fun, &candidates);
+ return find_closest_string (fn, &candidates);
+}
+
/* Resolve a procedure call not known to be generic nor specific. */
@@ -2732,8 +2765,15 @@ set_type:
if (ts->type == BT_UNKNOWN)
{
- gfc_error ("Function %qs at %L has no IMPLICIT type",
- sym->name, &expr->where);
+ const char *guessed
+ = gfc_lookup_function_fuzzy (sym->name, sym->ns->sym_root);
+ if (guessed)
+ gfc_error ("Function %qs at %L has no IMPLICIT type"
+ "; did you mean %qs?",
+ sym->name, &expr->where, guessed);
+ else
+ gfc_error ("Function %qs at %L has no IMPLICIT type",
+ sym->name, &expr->where);
return false;
}
else
@@ -3504,6 +3544,40 @@ compare_shapes (gfc_expr *op1, gfc_expr *op2)
return t;
}
+/* Recursively append candidate UOP to CANDIDATES. */
+
+static void
+lookup_uop_fuzzy_find_candidates (gfc_symtree *uop,
+ vec<const char *> *candidates)
+{
+ gfc_symtree *p;
+ /* Not sure how to properly filter here. Use all for a start.
+ n.uop.op is NULL for empty interface operators (is that legal?) disregard
+ these as i suppose they don't make terribly sense. */
+ for (p = uop->right; p; p = p->right)
+ {
+ lookup_function_fuzzy_find_candidates (p, candidates);
+ if (p->n.uop->op != NULL)
+ candidates->safe_push (p->name);
+ }
+ for (p = uop->left; p; p = p->left)
+ {
+ lookup_function_fuzzy_find_candidates (p, candidates);
+ if (p->n.uop->op != NULL)
+ candidates->safe_push (p->name);
+ }
+}
+
+/* Lookup user-operator OP fuzzily, taking names in UOP into account. */
+
+static const char*
+lookup_uop_fuzzy (const char *op, gfc_symtree *uop)
+{
+ auto_vec <const char *> candidates;
+ lookup_uop_fuzzy_find_candidates (uop, &candidates);
+ return find_closest_string (op, &candidates);
+}
+
/* Resolve an operator expression node. This can involve replacing the
operation with a user defined function call. */
@@ -3703,7 +3777,16 @@ resolve_operator (gfc_expr *e)
case INTRINSIC_USER:
if (e->value.op.uop->op == NULL)
- sprintf (msg, _("Unknown operator '%s' at %%L"), e->value.op.uop->name);
+ {
+ const char *name = e->value.op.uop->name;
+ const char *guessed;
+ guessed = lookup_uop_fuzzy (name, e->value.op.uop->ns->uop_root);
+ if (guessed)
+ sprintf (msg, _("Unknown operator '%s' at %%L; did you mean '%s'?"),
+ name, guessed);
+ else
+ sprintf (msg, _("Unknown operator '%s' at %%L"), name);
+ }
else if (op2 == NULL)
sprintf (msg, _("Operand of user operator '%s' at %%L is %s"),
e->value.op.uop->name, gfc_typename (&op1->ts));
@@ -27,6 +27,7 @@ along with GCC; see the file COPYING3. If not see
#include "parse.h"
#include "match.h"
#include "constructor.h"
+#include "spellcheck.h"
/* Strings for all symbol attributes. We use these for dumping the
@@ -235,6 +236,39 @@ gfc_get_default_type (const char *name, gfc_namespace *ns)
}
+/* Recursively append candidate SYM to CANDIDATES. */
+
+static void
+lookup_symbol_fuzzy_find_candidates (gfc_symtree *sym,
+ vec<const char *> *candidates)
+{
+ gfc_symtree *p;
+ for (p = sym->right; p; p = p->right)
+ {
+ lookup_symbol_fuzzy_find_candidates (p, candidates);
+ if (p->n.sym->ts.type != BT_UNKNOWN)
+ candidates->safe_push (p->name);
+ }
+ for (p = sym->left; p; p = p->left)
+ {
+ lookup_symbol_fuzzy_find_candidates (p, candidates);
+ if (p->n.sym->ts.type != BT_UNKNOWN)
+ candidates->safe_push (p->name);
+ }
+}
+
+
+/* Lookup symbol SYM fuzzily, taking names in SYMBOL into account. */
+
+static const char*
+lookup_symbol_fuzzy (const char *sym, gfc_symbol *symbol)
+{
+ auto_vec <const char *> candidates;
+ lookup_symbol_fuzzy_find_candidates (symbol->ns->sym_root, &candidates);
+ return find_closest_string (sym, &candidates);
+}
+
+
/* Given a pointer to a symbol, set its type according to the first
letter of its name. Fails if the letter in question has no default
type. */
@@ -253,8 +287,15 @@ gfc_set_default_type (gfc_symbol *sym, int error_flag, gfc_namespace *ns)
{
if (error_flag && !sym->attr.untyped)
{
- gfc_error ("Symbol %qs at %L has no IMPLICIT type",
- sym->name, &sym->declared_at);
+ const char *guessed
+ = lookup_symbol_fuzzy (sym->name, sym);
+ if (guessed)
+ gfc_error ("Symbol %qs at %L has no IMPLICIT type"
+ "; did you mean %qs?",
+ sym->name, &sym->declared_at, guessed);
+ else
+ gfc_error ("Symbol %qs at %L has no IMPLICIT type",
+ sym->name, &sym->declared_at);
sym->attr.untyped = 1; /* Ensure we only give an error once. */
}
@@ -2188,6 +2229,32 @@ bad:
}
+/* Recursively append candidate COMPONENT structures to CANDIDATES. */
+
+static void
+lookup_component_fuzzy_find_candidates (gfc_component *component,
+ vec<const char *> *candidates)
+{
+ for (gfc_component *p = component; p; p = p->next)
+ {
+ if (00 && p->ts.type == BT_DERIVED)
+ /* ??? There's no (suitable) DERIVED_TYPE which would come in
+ handy throughout the frontend; Use CLASS_DATA here for brevity. */
+ lookup_component_fuzzy_find_candidates (CLASS_DATA (p), candidates);
+ candidates->safe_push (p->name);
+ }
+}
+
+/* Lookup component MEMBER fuzzily, taking names in COMPONENT into account. */
+
+static const char*
+lookup_component_fuzzy (const char *member, gfc_component *component)
+{
+ auto_vec <const char *> candidates;
+ lookup_component_fuzzy_find_candidates (component, &candidates);
+ return find_closest_string (member, &candidates);
+}
+
/* Given a derived type node and a component name, try to locate the
component structure. Returns the NULL pointer if the component is
not found or the components are private. If noaccess is set, no access
@@ -2238,8 +2305,16 @@ gfc_find_component (gfc_symbol *sym, const char *name,
}
if (p == NULL && !silent)
- gfc_error ("%qs at %C is not a member of the %qs structure",
- name, sym->name);
+ {
+ const char *guessed = lookup_component_fuzzy (name, sym->components);
+ if (guessed)
+ gfc_error ("%qs at %C is not a member of the %qs structure"
+ "; did you mean %qs?",
+ name, sym->name, guessed);
+ else
+ gfc_error ("%qs at %C is not a member of the %qs structure",
+ name, sym->name);
+ }
return p;
}
@@ -119,3 +119,46 @@ levenshtein_distance (const char *s, const char *t)
{
return levenshtein_distance (s, strlen (s), t, strlen (t));
}
+
+/* Given TARGET, a non-NULL string, and CANDIDATES, a vec of non-NULL
+ strings, determine which element within CANDIDATES has the lowest edit
+ distance to TARGET. If there are multiple elements with the
+ same minimal distance, the first in the vector wins.
+
+ If more than half of the letters were misspelled, the suggestion is
+ likely to be meaningless, so return NULL for this case. */
+
+const char *
+find_closest_string (const char *target,
+ const auto_vec<const char *> *candidates)
+{
+ gcc_assert (target);
+ gcc_assert (candidates);
+
+ int i;
+ const char *string, *best_string = NULL;
+ edit_distance_t best_distance = MAX_EDIT_DISTANCE;
+ size_t len_target = strlen (target);
+ FOR_EACH_VEC_ELT (*candidates, i, string)
+ {
+ gcc_assert (string);
+ edit_distance_t dist = levenshtein_distance (target, len_target,
+ string, strlen (string));
+ if (dist < best_distance)
+ {
+ best_distance = dist;
+ best_string = string;
+ }
+ }
+
+ /* If more than half of the letters were misspelled, the suggestion is
+ likely to be meaningless. */
+ if (best_string)
+ {
+ unsigned int cutoff = MAX (len_target, strlen (best_string)) / 2;
+ if (best_distance > cutoff)
+ return NULL;
+ }
+
+ return best_string;
+}
@@ -31,6 +31,10 @@ levenshtein_distance (const char *s, int len_s,
extern edit_distance_t
levenshtein_distance (const char *s, const char *t);
+extern const char *
+find_closest_string (const char *target,
+ const auto_vec<const char *> *candidates);
+
/* spellcheck-tree.c */
extern edit_distance_t
new file mode 100644
@@ -0,0 +1,30 @@
+! { dg-do compile }
+! test levenshtein based spelling suggestions
+
+module mymod1
+ implicit none
+ contains
+ function something_good (iarg1)
+ integer :: something_good
+ integer, intent(in) :: iarg1
+ something_good = iarg1 + 42
+ end function something_good
+end module mymod1
+
+program spellchekc
+ use mymod1
+ implicit none
+
+ interface operator (.mywrong.)
+ module procedure something_wring ! { dg-error "Procedure .something_wring. in operator interface .mywrong. at .1. is neither function nor subroutine; did you mean .something_good.\\?|User operator procedure .something_wring. at .1. must be a FUNCTION" }
+ end interface
+
+ interface operator (.mygood.)
+ module procedure something_good
+ end interface
+
+ integer :: i, j, added
+ i = 0
+ j = 0
+ added = .mygoof. j ! { dg-error "Unknown operator .mygoof. at .1.; did you mean .mygood.\\?" }
+end program spellchekc
new file mode 100644
@@ -0,0 +1,41 @@
+! { dg-do compile }
+! test levenshtein based spelling suggestions
+
+module mymod1
+ implicit none
+ contains
+ function something_good (iarg1)
+ integer :: something_good
+ integer, intent(in) :: iarg1
+ something_good = iarg1 + 42
+ end function something_good
+end module mymod1
+
+subroutine bark_unless_zero(iarg)
+ implicit none
+ integer, intent(in) :: iarg
+ if (iarg /= 0) call abort
+end subroutine bark_unless_zero
+
+function myadd(iarg1, iarg2)
+ implicit none
+ integer :: myadd
+ integer, intent(in) :: iarg1, iarg2
+ myadd = iarg1 + iarg2
+end function myadd
+
+program spellchekc
+ use mymod1
+ implicit none
+
+ integer :: i, j, myadd
+ i = 0
+ j = 0
+! I suppose this cannot be made to work, no\\?
+! call barf_unless_zero(i) ! { -dg-error "; did you mean .bark_unless_zero.\\?" }
+ j = something_goof(j) ! { dg-error "no IMPLICIT type; did you mean .something_good.\\?" }
+ j = myaddd(i, j) ! { dg-error "no IMPLICIT type; did you mean .myadd.\\?" }
+ j = mya(i, j) ! { dg-error "no IMPLICIT type; did you mean .myadd.\\?" }
+ if (j /= 42) call abort
+
+end program spellchekc
new file mode 100644
@@ -0,0 +1,35 @@
+! { dg-do compile }
+! test levenshtein based spelling suggestions
+implicit none
+
+!!!!!!!!!!!!!! structure tests !!!!!!!!!!!!!!
+type type1
+ real :: radius
+ integer :: i
+end type type1
+
+type type2
+ integer :: myint
+ type(type1) :: mytype
+end type type2
+
+type type3
+ type(type2) :: type_2
+end type type3
+type type4
+ type(type3) :: type_3
+end type type4
+
+type(type1) :: t1
+t1%radiuz = .0 ! { dg-error ".radiuz. at .1. is not a member of the .type1. structure; did you mean .radius.\\?" }
+t1%x = .0 ! { dg-error ".x. at .1. is not a member of the .type1. structure" }
+type(type2) :: t2
+t2%mytape%radius = .0 ! { dg-error ".mytape. at .1. is not a member of the .type2. structure; did you mean .mytype.\\?" }
+t2%mytype%radious = .0 ! { dg-error ".radious. at .1. is not a member of the .type1. structure; did you mean .radius.\\?" }
+type(type4) :: t4
+t4%type_3%type_2%mytype%radium = 88.0 ! { dg-error ".radium. at .1. is not a member of the .type1. structure; did you mean .radius.\\?" }
+
+!!!!!!!!!!!!!! symbol tests !!!!!!!!!!!!!!
+integer :: iarg1
+iarg2 = 1 ! { dg-error "Symbol .iarg2. at .1. has no IMPLICIT type; did you mean .iarg1.\\?" }
+end
--
1.8.5.3