diff mbox

[RFC/PATCH] More precise diagnostic locations: dynamic locations for columns vs explicit offset

Message ID CAESRpQBYDbXhHJdmG4vAKv92E94fyY_ezABqbFUK6AhU3GpBfA@mail.gmail.com
State New
Headers show

Commit Message

Manuel López-Ibáñez Aug. 30, 2014, 5:06 p.m. UTC
In some situations, we would like to point to a location which was not
encoded when tokenizing. This happens, for example, in two prominent
cases:

1) To get precise locations within strings
(https://gcc.gnu.org/PR52952) for example, for Wformat warnings.

2) In the Fortran FE, which gives quite precise location information
by tracking the characters that it wants to warn about instead of
relying on the line-map machinery.

The most straightforward way to implement this is by adding variants
of diagnostic functions that take an explicit "offset" argument and
pass this offset through the whole diagnostics machinery. This is what
I implemented in the patch format_offset.diff attached. The downside
is that we would need to add even more variants (with/without offset)
of various diagnostic functions and track the offset/no-offset cases
explicitly.

The nicer/cleaner alternative is to somehow (re)compute a single
location value from a given location plus the new offset. This is what
I implemented in patch fortran-diagnostics-part3.diff in
linemap_redo_position_for_column(). As far as I understand, this
method only works reliably if the location+offset does not jump to a
different line map, that is, if to_column < (1u <<
map->d.ordinary.column_bits). Otherwise, we may need to recompute all
successive line-maps to accommodate the new location. The best way to
do the latter (or to work-around that issue) is not clear to me at the
moment.

Thus, I am putting forward these two alternative implementations and
seeking comments/advice/help in deciding what would be the best way to
fix this key missing piece of GCC diagnostics.

Related to this, perhaps I should make a more general call for help.
Despite the heroic, constant torrent of diagnostic fixes by Paolo,
Marek and others, I have not seen much progress on the key
infrastructure issues in the roadmap
(https://gcc.gnu.org/wiki/Better_Diagnostics). We have had at least
one major item per release since GCC 4.5, but I don't see any
particular item being tackled for GCC 5.0. Are you planning to tackle
any of them?

I have a simple patch to implement Fix-it hints but it needs more
work. Unfortunately, I have very little free time to dedicate to GCC
nowadays, so I'm afraid I might not even be able to finish this in
time. Any item in that list would be a nice major feature for GCC 5.0.
Perhaps we need to ask for help in gcc/gcc-help or some other forum.

Cheers,

Manuel.

Index: gcc/tree-diagnostic.c
===================================================================
--- gcc/tree-diagnostic.c	(revision 214251)
+++ gcc/tree-diagnostic.c	(working copy)
@@ -244,11 +244,11 @@ virt_loc_aware_diagnostic_finalizer (dia
   maybe_unwind_expanded_macro_loc (context, diagnostic,
 				   diagnostic->location);
 }
 
 /* Default tree printer.   Handles declarations only.  */
-static bool
+bool
 default_tree_printer (pretty_printer *pp, text_info *text, const char *spec,
 		      int precision, bool wide, bool set_locus, bool hash)
 {
   tree t;
 
Index: gcc/fortran/gfortran.h
===================================================================
--- gcc/fortran/gfortran.h	(revision 214251)
+++ gcc/fortran/gfortran.h	(working copy)
@@ -2696,11 +2696,13 @@ void gfc_warning_cmdline (const char *gm
 
 void gfc_clear_warning (void);
 void gfc_warning_check (void);
 
 void gfc_error (const char *, ...) ATTRIBUTE_GCC_GFC(1,2);
+void gfc_error_cmdline (const char *gmsgid, ...) ATTRIBUTE_GCC_GFC(1,2);
 void gfc_error_now (const char *, ...) ATTRIBUTE_GCC_GFC(1,2);
+void gfc_error_now_2 (const char *, ...) ATTRIBUTE_GCC_GFC(1,2);
 void gfc_fatal_error (const char *, ...) ATTRIBUTE_NORETURN ATTRIBUTE_GCC_GFC(1,2);
 void gfc_internal_error (const char *, ...) ATTRIBUTE_NORETURN ATTRIBUTE_GCC_GFC(1,2);
 void gfc_clear_error (void);
 int gfc_error_check (void);
 int gfc_error_flag_test (void);
Index: gcc/fortran/error.c
===================================================================
--- gcc/fortran/error.c	(revision 214251)
+++ gcc/fortran/error.c	(working copy)
@@ -956,10 +956,67 @@ gfc_warning_now (const char *gmsgid, ...
     gfc_increment_error_count();
 
   buffer_flag = i;
 }
 
+/* Encode and return a source_location from a column number. The
+   source line considered is the last source line used to call
+   linemap_line_start, i.e, the last source line which a location was
+   encoded from.  */
+
+source_location
+linemap_redo_position_for_column (struct line_maps *set, source_location loc,
+                                  unsigned int to_column)
+{
+  const struct line_map * map = linemap_lookup (set, loc);
+  gcc_assert(to_column < (1u << map->d.ordinary.column_bits));
+  source_location r = loc + to_column;
+  gcc_assert (map == linemap_lookup (set, r));
+  return r;
+}
+
+bool
+default_tree_printer (pretty_printer *pp, text_info *text, const char *spec,
+		      int precision, bool wide, bool set_locus, bool hash);
+
+/* Called from output_format -- during diagnostic message processing --
+   to handle C++ specific format specifier with the following meanings:
+
+   %C  Current locus (no argument) 
+*/
+static bool
+gfc_format_decoder (pretty_printer *pp,
+		    text_info *text, const char *spec,
+		    int precision, bool wide, bool set_locus, bool verbose)
+{
+  const char *result;
+  switch (*spec)
+    {
+    case 'C':
+      {
+	result = "(1)"; 
+        gcc_assert (text->locus);
+        *text->locus = gfc_current_locus.lb->location;
+        gcc_assert (gfc_current_locus.nextc - gfc_current_locus.lb->line);
+        unsigned int c1 = gfc_current_locus.nextc - gfc_current_locus.lb->line;
+        *text->locus 
+	  = linemap_redo_position_for_column (line_table, *text->locus,
+					      c1 + 1);
+        global_dc->caret_char = '1';
+	break;
+      }
+    default:
+      /* We can get called from the gimplifier.  */
+      /* ??? Is there a way to set the formart_decoder back to this default? */
+      return default_tree_printer (pp, text, spec, precision, wide, set_locus,
+				   verbose);
+    }
+
+  pp_string (pp, result);
+  return true;
+}
+
 /* Return a malloc'd string describing a location.  The caller is
    responsible for freeing the memory.  */
 static char *
 gfc_diagnostic_build_prefix (diagnostic_context *context,
 			     const diagnostic_info *diagnostic)
@@ -1065,10 +1122,24 @@ gfc_warning_cmdline (const char *gmsgid,
 		       DK_WARNING);
   report_diagnostic (&diagnostic);
   va_end (argp);
 }
 
+/* Give an error about the command-line.  */
+
+void
+gfc_error_cmdline (const char *gmsgid, ...)
+{
+  va_list argp;
+  diagnostic_info diagnostic;
+
+  va_start (argp, gmsgid);
+  diagnostic_set_info (&diagnostic, gmsgid, &argp, UNKNOWN_LOCATION, DK_ERROR);
+  report_diagnostic (&diagnostic);
+  va_end (argp);
+}
+
 /* Clear the warning flag.  */
 
 void
 gfc_clear_warning (void)
 {
@@ -1145,10 +1216,29 @@ warning:
 
 
 /* Immediate error.  */
 
 void
+gfc_error_now_2 (const char *gmsgid, ...)
+{
+  va_list argp;
+  diagnostic_info diagnostic;
+
+  va_start (argp, gmsgid);
+  diagnostic_set_info (&diagnostic, gmsgid, &argp, UNKNOWN_LOCATION, DK_ERROR);
+  report_diagnostic (&diagnostic);
+  va_end (argp);
+
+  gfc_increment_error_count();
+
+  if (flag_fatal_errors)
+    exit (FATAL_EXIT_CODE);
+}
+
+/* Immediate error.  */
+
+void
 gfc_error_now (const char *gmsgid, ...)
 {
   va_list argp;
   int i;
 
@@ -1319,7 +1409,8 @@ gfc_errors_to_warnings (int f)
 void
 gfc_diagnostics_init (void)
 {
   diagnostic_starter (global_dc) = gfc_diagnostic_starter;
   diagnostic_finalizer (global_dc) = gfc_diagnostic_finalizer;
+  diagnostic_format_decoder (global_dc) = gfc_format_decoder;
   global_dc->caret_char = '^';
 }
Index: gcc/fortran/parse.c
===================================================================
--- gcc/fortran/parse.c	(revision 214251)
+++ gcc/fortran/parse.c	(working copy)
@@ -844,11 +844,11 @@ next_free (void)
 	  do
 	    c = gfc_next_ascii_char ();
 	  while (ISDIGIT(c));
 
 	  if (!gfc_is_whitespace (c))
-	    gfc_error_now ("Non-numeric character in statement label at %C");
+	    gfc_error_now_2 ("Non-numeric character in statement label at %C");
 
 	  return ST_NONE;
 	}
       else
 	{

Comments

Dodji Seketeli Sept. 25, 2014, 11:39 a.m. UTC | #1
Manuel López-Ibáñez <lopezibanez@gmail.com> a écrit:

> In some situations, we would like to point to a location which was not
> encoded when tokenizing. This happens, for example, in two prominent
> cases:
>
> 1) To get precise locations within strings
> (https://gcc.gnu.org/PR52952) for example, for Wformat warnings.

This feature would be very welcome indeed.

>
> 2) In the Fortran FE, which gives quite precise location information
> by tracking the characters that it wants to warn about instead of
> relying on the line-map machinery.

So with this feature, the Fortran FE would then use the then more
"generic" diagnostics machinery, right?

> The most straightforward way to implement this is by adding variants
> of diagnostic functions that take an explicit "offset" argument and
> pass this offset through the whole diagnostics machinery. This is what
> I implemented in the patch format_offset.diff attached. The downside
> is that we would need to add even more variants (with/without offset)
> of various diagnostic functions and track the offset/no-offset cases
> explicitly.

I would be inclined to go for this route at first sight because of its
conceptual simplicity, even if it might be heavy in terms of the
number of entry points to maintain for users of the diagnostics
sub-system but then ...

> The nicer/cleaner alternative is to somehow (re)compute a single
> location value from a given location plus the new offset.

... I agree with this.  It's more elegant and maintainable to go this
way.  But it might involve some hair splitting.


> This is what I implemented in patch fortran-diagnostics-part3.diff
> in linemap_redo_position_for_column(). As far as I understand, this
> method only works reliably if the location+offset does not jump to a
> different line map, that is, if to_column < (1u <<
> map->d.ordinary.column_bits). Otherwise, we may need to recompute
> all successive line-maps to accommodate the new location. The best
> way to do the latter (or to work-around that issue) is not clear to
> me at the moment.

I think it might be more involved than that.

There are two kinds of locations:

 1/ spelling locations.  They represent a real point in the source
 code.  For now, the beginning of a token.

 2/ virtual locations.  They are an abstract number, calculated in a
 convoluted way to encode the fact that a given token (rather, the
 location of that token) was e.g, possibly passed to a function-like
 macro that used it in its expansion, and that macro was expanded
 somewhere.  And from that number, we can get back to the macro into
 which it was used, expanded, where the macro was expanded and we can
 also get the original spelling location of the token we are looking
 at.

I might be maybe missing something, but if the location is not virtual
(case 1/), I *think* that in practice we are not likely to see that
location + column jumps to the "next" map, unless we are running low
on line maps space -- in which case, either columns tracking or even
line maps are turned off -- or the token we are looking at it
is *huge*.  In the later case, when we start tracking the location of
the *end* of tokens (as said in the roadmap), I think that later issue
is going to vanish because a given line map is going to be allocated big
enough to contain locations until at least the end of the last token
it "contains".

If the location is virtual (case 2/), then the "location + offset"
value you are referring to is meaningless, unfortunately.  You must
get back to to the spelling location of that token first; that is, you
have to consider "spelling_location(location) + offset", and we are
back to the first case (case 1/).


> Thus, I am putting forward these two alternative implementations and
> seeking comments/advice/help in deciding what would be the best way to
> fix this key missing piece of GCC diagnostics.

Thanks.

> Related to this, perhaps I should make a more general call for help.
> Despite the heroic, constant torrent of diagnostic fixes by Paolo,
> Marek and others, I have not seen much progress on the key
> infrastructure issues in the roadmap
> (https://gcc.gnu.org/wiki/Better_Diagnostics). We have had at least
> one major item per release since GCC 4.5, but I don't see any
> particular item being tackled for GCC 5.0. Are you planning to tackle
> any of them?

Unfortunately, it's unlikely that I'll have time to tackle any of
this.  I am quite busy on libabigail
(http://https://sourceware.org/libabigail/) in this cycle.  And it's
also important for us.  So I'd rather shoot for the next cycle.

But that shouldn't prevent interested hackers to jump in :-)

> I have a simple patch to implement Fix-it hints but it needs more
> work. Unfortunately, I have very little free time to dedicate to GCC
> nowadays, so I'm afraid I might not even be able to finish this in
> time. Any item in that list would be a nice major feature for GCC
> 5.0.

Thank you for the effort you are putting in this, despite your tight
schedule.  This is really appreciated.

> Perhaps we need to ask for help in gcc/gcc-help or some other forum.

Yes, please do so.

[...]

> [3. text/plain; fortran-diagnostics-part3.diff]

[...]

> Index: gcc/fortran/error.c
> ===================================================================
> --- gcc/fortran/error.c	(revision 214251)
> +++ gcc/fortran/error.c	(working copy)
> @@ -956,10 +956,67 @@ gfc_warning_now (const char *gmsgid, ...
>      gfc_increment_error_count();
>  
>    buffer_flag = i;
>  }
>  
> +/* Encode and return a source_location from a column number. The
> +   source line considered is the last source line used to call
> +   linemap_line_start, i.e, the last source line which a location was
> +   encoded from.  */
> +
> +source_location
> +linemap_redo_position_for_column (struct line_maps *set, source_location loc,
> +                                  unsigned int to_column)
> +{

I would put this kind of function in the line-map module, rather than
here.


> +  const struct line_map * map = linemap_lookup (set, loc);
> +  gcc_assert(to_column < (1u << map->d.ordinary.column_bits));

To write that, you must be sure that map is an ordinary map.  That is,
one that encodes non-virtual locations; IOW, that loc is a non-virtual
location.  Otherwise, you'd need to resolve loc to its associated
spelling location and get the map for that spelling location.

Also, to make sure that loc + to_column belong to this map, a better
assertion I could think of would be:

    gcc_assert(MAP_START_LOCATION(map[1]) < loc + to_column);

This is because by design, locations emitted by a given map are
smaller than MAP_START_LOCATION of the next map.  In the particular
case of 'map' being the last map of the set of maps, I guess we can
and should do away with the assert completely.

More generally, I'd rather use:

    linemap_position_for_line_and_column(map,
					 SOURCE_LINE(map, loc),
					 column)

instead of doing (loc + to_column) to get to the new location.

[...]

> +  gcc_assert (map == linemap_lookup (set, r));

OK.

> +  return r;
> +}

[...]

I hope this helps.

Cheers,
Manuel López-Ibáñez Sept. 27, 2014, 6:42 p.m. UTC | #2
On 25 September 2014 13:39, Dodji Seketeli <dodji@redhat.com> wrote:
>> 2) In the Fortran FE, which gives quite precise location information
>> by tracking the characters that it wants to warn about instead of
>> relying on the line-map machinery.
>
> So with this feature, the Fortran FE would then use the then more
> "generic" diagnostics machinery, right?

This is the plan. The benefits will be more shared code, deleting a
lot of duplicated stuff in the Fortran FE and supporting in Fortran
all the goodies of GCC (#pragmas, color, options printing, macro
unwinder). However, the Fortran FE still has a long way to go to make
use of all the features of the common diagnostics machinery.. On the
bright side, the common diagnostics machinery already supports all
features of the Fortran FE except for offset locations, multiple
locations (and multiple carets), and buffered diagnostics (well, the
diagnostics machinery does buffer, but there is no API to clear the
buffer without printing it).

I think a bunch of FE diagnostic calls could already use the common
machinery (some are already using it). Unfortunately, most Fortran
diagnostics use offset locations because the FE does not track the
locations of tokens with line-maps (I think the locations are computed
but not stored or passed down to the diagnostic functions).

The work to be done is not even technically difficult. The lack of
progress on this is due, as always, to lack of time by the people
currently working on GCC and/or lack of new contributors.

Cheers,

Manuel.
diff mbox

Patch

Index: gcc/c-family/c-format.c
===================================================================
--- gcc/c-family/c-format.c	(revision 197155)
+++ gcc/c-family/c-format.c	(working copy)
@@ -377,10 +377,11 @@  typedef struct format_wanted_type
   int format_length;
   /* The actual parameter to check against the wanted type.  */
   tree param;
   /* The argument number of that parameter.  */
   int arg_num;
+  int offset_loc;
   /* The next type to check for this format conversion, or NULL if none.  */
   struct format_wanted_type *next;
 } format_wanted_type;
 
 /* Convenience macro for format_length_info meaning unused.  */
@@ -903,10 +904,11 @@  typedef struct
   /* Number of leaves of the format argument that were unterminated
      strings.  */
   int number_unterminated;
   /* Number of leaves of the format argument that were not counted above.  */
   int number_other;
+  location_t loc;
 } format_check_results;
 
 typedef struct
 {
   format_check_results *res;
@@ -996,11 +998,11 @@  check_function_format (tree attrs, int n
     {
       if (is_attribute_p ("format", TREE_PURPOSE (a)))
 	{
 	  /* Yup; check it.  */
 	  function_format_info info;
-	  decode_format_attr (TREE_VALUE (a), &info, 1);
+	  decode_format_attr (TREE_VALUE (a), &info, /*validated=*/true);
 	  if (warn_format)
 	    {
 	      /* FIXME: Rewrite all the internal functions in this file
 		 to use the ARGARRAY directly instead of constructing this
 		 temporary list.  */
@@ -1465,10 +1467,11 @@  check_format_arg (void *ctx, tree format
   if (TREE_CODE (format_tree) != ADDR_EXPR)
     {
       res->number_non_literal++;
       return;
     }
+  res->loc = EXPR_LOCATION(format_tree);
   format_tree = TREE_OPERAND (format_tree, 0);
   if (format_types[info->format_type].flags 
       & (int) FMT_FLAG_PARSE_ARG_CONVERT_EXTERNAL)
     {
       bool objc_str = (info->format_type == gcc_objc_string_format_type);
@@ -1637,11 +1640,13 @@  check_format_info_main (format_check_res
 
       if (*format_chars++ != '%')
 	continue;
       if (*format_chars == 0)
 	{
-	  warning (OPT_Wformat_, "spurious trailing %<%%%> in format");
+	  warning_at (res->loc ? res->loc : input_location, 
+                      format_chars - orig_format_chars,
+                      OPT_Wformat_, "spurious trailing %<%%%> in format");
 	  continue;
 	}
       if (*format_chars == '%')
 	{
 	  ++format_chars;
@@ -2449,10 +2454,11 @@  format_type_warning (format_wanted_type
   const char *wanted_type_name = type->wanted_type_name;
   const char *format_start = type->format_start;
   int format_length = type->format_length;
   int pointer_count = type->pointer_count;
   int arg_num = type->arg_num;
+  int offset_loc = type->offset_loc;
 
   char *p;
   /* If ARG_TYPE is a typedef with a misleading name (for example,
      size_t but not the standard size_t expected by printf %zu), avoid
      printing the typedef name.  */
Index: gcc/diagnostic.c
===================================================================
--- gcc/diagnostic.c	(revision 197155)
+++ gcc/diagnostic.c	(working copy)
@@ -189,10 +189,11 @@  diagnostic_set_info_translated (diagnost
   diagnostic->message.format_spec = msg;
   diagnostic->location = location;
   diagnostic->override_column = 0;
   diagnostic->kind = kind;
   diagnostic->option_index = 0;
+  diagnostic->offset = 0;
 }
 
 /* Initialize DIAGNOSTIC, where the message GMSGID has not yet been
    translated.  */
 void
@@ -215,10 +216,11 @@  diagnostic_build_prefix (diagnostic_cont
 #undef DEFINE_DIAGNOSTIC_KIND
     "must-not-happen"
   };
   const char *text = _(diagnostic_kind_text[diagnostic->kind]);
   expanded_location s = expand_location_to_spelling_point (diagnostic->location);
+  s.column += diagnostic->offset;
   if (diagnostic->override_column)
     s.column = diagnostic->override_column;
   gcc_assert (diagnostic->kind < DK_LAST_DIAGNOSTIC_KIND);
 
   return
@@ -269,10 +271,11 @@  diagnostic_show_locus (diagnostic_contex
       || diagnostic->location == context->last_location)
     return;
 
   context->last_location = diagnostic->location;
   s = expand_location_to_spelling_point (diagnostic->location);
+  s.column += diagnostic->offset;
   line = location_get_source_line (s);
   if (line == NULL)
     return;
 
   max_width = context->caret_max_width;
@@ -945,10 +948,27 @@  warning_at (location_t location, int opt
   ret = report_diagnostic (&diagnostic);
   va_end (ap);
   return ret;
 }
 
+
+bool
+warning_at (location_t location, unsigned int offset, int opt, const char *gmsgid, ...)
+{
+  diagnostic_info diagnostic;
+  va_list ap;
+  bool ret;
+
+  va_start (ap, gmsgid);
+  diagnostic_set_info (&diagnostic, gmsgid, &ap, location, DK_WARNING);
+  diagnostic.option_index = opt;
+  diagnostic.offset = offset;
+  ret = report_diagnostic (&diagnostic);
+  va_end (ap);
+  return ret;
+}
+
 /* A "pedantic" warning at LOCATION: issues a warning unless
    -pedantic-errors was given on the command line, in which case it
    issues an error.  Use this for diagnostics required by the relevant
    language standard, if you have chosen not to make them errors.
 
Index: gcc/diagnostic.h
===================================================================
--- gcc/diagnostic.h	(revision 197155)
+++ gcc/diagnostic.h	(working copy)
@@ -36,10 +36,11 @@  typedef struct diagnostic_info
   void *x_data;
   /* The kind of diagnostic it is about.  */
   diagnostic_t kind;
   /* Which OPT_* directly controls this diagnostic.  */
   int option_index;
+  int offset;
 } diagnostic_info;
 
 /* Each time a diagnostic's classification is changed with a pragma,
    we record the change and the location of the change in an array of
    these structs.  */
Index: gcc/diagnostic-core.h
===================================================================
--- gcc/diagnostic-core.h	(revision 197155)
+++ gcc/diagnostic-core.h	(working copy)
@@ -58,10 +58,13 @@  extern void internal_error (const char *
      ATTRIBUTE_NORETURN;
 /* Pass one of the OPT_W* from options.h as the first parameter.  */
 extern bool warning (int, const char *, ...) ATTRIBUTE_GCC_DIAG(2,3);
 extern bool warning_at (location_t, int, const char *, ...)
     ATTRIBUTE_GCC_DIAG(3,4);
+extern bool warning_at (location_t, unsigned int, int, const char *, ...)
+    ATTRIBUTE_GCC_DIAG(4,5);
+
 extern void error (const char *, ...) ATTRIBUTE_GCC_DIAG(1,2);
 extern void error_n (location_t, int, const char *, const char *, ...)
     ATTRIBUTE_GCC_DIAG(3,5) ATTRIBUTE_GCC_DIAG(4,5);
 extern void error_at (location_t, const char *, ...) ATTRIBUTE_GCC_DIAG(2,3);
 extern void fatal_error (const char *, ...) ATTRIBUTE_GCC_DIAG(1,2)