diff mbox series

[RFC] diagnostics: Add support for Unicode drawing characters

Message ID 20200723162858.GA1043@ldh-imac.local
State New
Headers show
Series [RFC] diagnostics: Add support for Unicode drawing characters | expand

Commit Message

Lewis Hyatt July 23, 2020, 4:28 p.m. UTC
Hello-

The attached patch is complete including docs, but I tagged as RFC
because I am not sure if anyone will like it, or if the general reaction may
be closer to recoiling in horror :). Would appreciate your thoughts,
please...

Currently, if a UTF-8 locale is detected, GCC changes the quote characters
it outputs in diagnostics to Unicode directional quotes. I feel like this is
a nice touch, so I was wondering whether GCC shouldn't do more along these
lines. This patch adds support for using Unicode line drawing characters and
similar things when outputting diagnostics. There is a new option
-fdiagnostics-unicode-drawing=[auto|never|always] to control it, which
defaults to auto. "auto" will enable the feature under the same
circumstances that Unicode quotes get output, namely when the locale is
determined by gcc_init_libintl() to support UTF-8. (The new option does not
affect Unicode quote characters, which currently are not configurable and
are determined solely by the locale.)

The elements implemented are:

    * Vertical lines, e.g. those indicating labels and those separating the
      source lines from the line numbers, are changed to line drawing
      characters.

    * The diagnostic paths output by the static analyzer make use of line
      drawing characters to output smooth corners etc.

    * The squiggly underline ~~~~~ used to highlight source locations is
      changed to a double underline ═════. The main reason for this is that
      it enables a seamless "tee" character to connect the underline to a
      label line if one exists.

    * Carets (^) are changed to a slightly different character (∧). I think
      the new one is a little nicer looking, although probably not worth the
      trouble on its own. I wanted to implement the support in this patch
      beause carets are harder to change than the rest of the elements
      (front ends have an interface to override them, which currently
      Fortran makes use of), so I thought it worthwhile to get this logic in
      place, so that it can easily be changed to a more superior character
      in the future if one comes up. It would also be easy enough to leave
      the Unicode support in place for carets, but keep the default set to
      the plain one for now.

As an example, this diagnostic from gcc.dg/format/diagnostic-ranges.c:

diagnostic-ranges.c:196:28: warning: field width specifier ‘*’ expects argument of type ‘int’, but argument 3 has type ‘long int’ [-Wformat=]
  196 |   __builtin_sprintf (d, " %*ld ", foo + bar, foo);
      |                           ~^~~    ~~~~~~~~~
      |                            |          |
      |                            int        long int

would become instead:

diagnostic-ranges.c:196:28: warning: field width specifier ‘*’ expects argument of type ‘int’, but argument 3 has type ‘long int’ [-Wformat=]
  196 │   __builtin_sprintf (d, " %*ld ", foo + bar, foo);
      │                           ═∧══    ════╤════
      │                            │          │
      │                            int        long int

Hopefully you are viewing this in a terminal that displays it properly :), in
which case, hopefully you may find it to be an improvement?

Here is a more involved example from the analyzer:

setjmp-5.c: In function ‘outer’:
setjmp-5.c:21:3: warning: ‘longjmp’ called after enclosing function of ‘setjmp’ has returned [-Wanalyzer-stale-setjmp-buffer]
   21 |   longjmp (env, 42); /* { dg-warning "'longjmp' called after enclosing function of 'setjmp' has returned" } */
      |   ^~~~~~~~~~~~~~~~~
  ‘outer’: events 1-2
    |
    |   15 | void outer (void)
    |      |      ^~~~~
    |      |      |
    |      |      (1) entry to ‘outer’
    |......
    |   19 |   inner ();
    |      |   ~~~~~~~~
    |      |   |
    |      |   (2) calling ‘inner’ from ‘outer’
    |
    +--> ‘inner’: event 3
           |
           |   10 | static void inner (void)
           |      |             ^~~~~
           |      |             |
           |      |             (3) entry to ‘inner’
           |
         ‘inner’: event 4
           |
           |   12 |   SETJMP (env);
           |      |   ^~~~~~
           |      |   |
           |      |   (4) ‘setjmp’ called here
           |
    <------+
    |
  ‘outer’: events 5-6
    |
    |   19 |   inner ();
    |      |   ^~~~~~~~
    |      |   |
    |      |   (5) returning to ‘outer’ from ‘inner’
    |   20 |
    |   21 |   longjmp (env, 42); /* { dg-warning "'longjmp' called after enclosing function of 'setjmp' has returned" } */
    |      |   ~~~~~~~~~~~~~~~~~
    |      |   |
    |      |   (6) here
    |

would become instead:

setjmp-5.c: In function ‘outer’:
setjmp-5.c:21:3: warning: ‘longjmp’ called after enclosing function of ‘setjmp’ has returned [-Wanalyzer-stale-setjmp-buffer]
   21 │   longjmp (env, 42); /* { dg-warning "'longjmp' called after enclosing function of 'setjmp' has returned" } */
      │   ∧════════════════
  ‘outer’: events 1-2
    │
    │   15 │ void outer (void)
    │      │      ∧════
    │      │      │
    │      │      (1) entry to ‘outer’
    │......
    │   19 │   inner ();
    │      │   ╤═══════
    │      │   │
    │      │   (2) calling ‘inner’ from ‘outer’
    │
    └──> ‘inner’: event 3
           │
           │   10 │ static void inner (void)
           │      │             ∧════
           │      │             │
           │      │             (3) entry to ‘inner’
           │
         ‘inner’: event 4
           │
           │   12 │   SETJMP (env);
           │      │   ∧═════
           │      │   │
           │      │   (4) ‘setjmp’ called here
           │
    ┌<─────┘
    │
  ‘outer’: events 5-6
    │
    │   19 │   inner ();
    │      │   ∧═══════
    │      │   │
    │      │   (5) returning to ‘outer’ from ‘inner’
    │   20 │
    │   21 │   longjmp (env, 42); /* { dg-warning "'longjmp' called after enclosing function of 'setjmp' has returned" } */
    │      │   ╤════════════════
    │      │   │
    │      │   (6) here
    │


Although probably premature, bootstrap and regtest were done on x86-64
linux, all tests the same before/after and new tests passing:
FAIL 96 96
PASS 479090 479239
UNSUPPORTED 11946 11946
UNTESTED 194 194
XFAIL 1839 1839
XPASS 36 36

I tried to set this up as a general framework, at least, it is easy in one
place to change the characters that are used for various contexts, so that
if people like the general idea, but not some of the specifics, the patch is
easily modified for that now or in the future. Thanks for any feedback!

-Lewis
From: Lewis Hyatt <lhyatt@gmail.com>
Date: Thu, 23 Jul 2020 10:11:04 -0400
Subject: [PATCH] diagnostics: Add support for Unicode drawing characters

Adds the new option -fdiagnostics-unicode-drawing, on by default if a
UTF-8 local is detected, which modifies diagnostics to use extended Unicode
characters, such as line-drawing characters.

gcc/ChangeLog:

	* common.opt: Add new option -fdiagnostics-unicode-drawing.
	* diagnostic-show-locus.c (struct point_state): Add new member.
	(layout::print_source_line): Support Unicode drawing feature.
	(layout::start_annotation_line): Likewise.
	(layout::print_annotation_line): Likewise.
	(layout::print_any_labels): Likewise.
	(layout::print_trailing_fixits): Likewise.
	(layout::get_state_at_point): Likewise.
	(test_one_liner_multiple_carets_and_ranges): Likewise.
	(test_one_liner_multiple_carets_and_ranges_utf8): Likewise.
	* diagnostic.c (diagnostic_drawing_init): New function.
	(diagnostic_initialize): Call the new function.
	* diagnostic.h (struct diagnostic_context): Support Unicode
	drawing feature.
	(enum diagnostics_drawing_rule): New enum for the new option.
	(diagnostic_drawing_init): Declare.
	* doc/invoke.texi: Document the new option.
	* opts.c (common_handle_option): Call diagnostic_drawing_init() to
	support the new option.
	* selftest-diagnostic.c
	(test_diagnostic_context::test_diagnostic_context): Disable
	Unicode drawing in selftests that use test_diagnostic_context.
	* tree-diagnostic-path.cc (path_summary::print): Support Unicode
	drawing feature.

gcc/fortran/ChangeLog:

	* error.c (gfc_diagnostics_init): Adapt custom carets, as they
	need to be strings rather than chars now.
	(gfc_diagnostics_finish): Likewise.

libstdc++-v3/ChangeLog:

	* testsuite/lib/libstdc++.exp: Disable Unicode drawing by default
	in the testsuite.

gcc/testsuite/ChangeLog:

	* gcc.dg/plugin/diagnostic_plugin_test_show_locus.c: Adapt custom
	carets, as they need to be strings rather than chars now.
	* lib/c-compat.exp: Handle removing Unicode drawing option from
	compilers that do not support it.
	* lib/prune.exp: Disable Unicode drawing by default
	in the testsuite.
	* gcc.dg/analyzer/setjmp-5-utf8.c: New test.
	* gcc.dg/format/diagnostic-ranges-utf8.c: New test.

Comments

Martin Sebor July 23, 2020, 8:52 p.m. UTC | #1
On 7/23/20 10:28 AM, Lewis Hyatt via Gcc-patches wrote:
> Hello-
> 
> The attached patch is complete including docs, but I tagged as RFC
> because I am not sure if anyone will like it, or if the general reaction may
> be closer to recoiling in horror :). Would appreciate your thoughts,
> please...

I don't have much of an opinion on the proposed changes but they
remind me of an enhancement I have been thinking about for a while.
I think it would be a nice touch to more finely differentiate parts
of the message text from the rest than just by highlighting it in
bold.  Specifically, I'm thinking of terms of a language grammar,
but a similar approach could be used for other elements as well.

For example, a number of diagnostic messages refer to the term
constant-expression.  A common convention used by language
standards is to render these terms in italics.  Doing the same
in GCC output would make it clear when it refers to the term of
the grammar (especially in non-hyphenated terms).  Since some
terminals support italics even without UTF-8, this enhancement
could be made independently.

Other font characteristics could be used to differentiate other
"elements" referenced in the messages, such as numerical constants
from ordinary numbers, highlight especially relevant parts of quoted
text like option arguments (for instance, the 9 in "alignment of
%qD will increase in %<-fabi-version=9%>") or attribute arguments
that cannot be underscored, or even be used in hints (e.g.,
strikethrough to denote deletion and underline for insertion).

Martin

> 
> Currently, if a UTF-8 locale is detected, GCC changes the quote characters
> it outputs in diagnostics to Unicode directional quotes. I feel like this is
> a nice touch, so I was wondering whether GCC shouldn't do more along these
> lines. This patch adds support for using Unicode line drawing characters and
> similar things when outputting diagnostics. There is a new option
> -fdiagnostics-unicode-drawing=[auto|never|always] to control it, which
> defaults to auto. "auto" will enable the feature under the same
> circumstances that Unicode quotes get output, namely when the locale is
> determined by gcc_init_libintl() to support UTF-8. (The new option does not
> affect Unicode quote characters, which currently are not configurable and
> are determined solely by the locale.)
> 
> The elements implemented are:
> 
>      * Vertical lines, e.g. those indicating labels and those separating the
>        source lines from the line numbers, are changed to line drawing
>        characters.
> 
>      * The diagnostic paths output by the static analyzer make use of line
>        drawing characters to output smooth corners etc.
> 
>      * The squiggly underline ~~~~~ used to highlight source locations is
>        changed to a double underline ═════. The main reason for this is that
>        it enables a seamless "tee" character to connect the underline to a
>        label line if one exists.
> 
>      * Carets (^) are changed to a slightly different character (∧). I think
>        the new one is a little nicer looking, although probably not worth the
>        trouble on its own. I wanted to implement the support in this patch
>        beause carets are harder to change than the rest of the elements
>        (front ends have an interface to override them, which currently
>        Fortran makes use of), so I thought it worthwhile to get this logic in
>        place, so that it can easily be changed to a more superior character
>        in the future if one comes up. It would also be easy enough to leave
>        the Unicode support in place for carets, but keep the default set to
>        the plain one for now.
> 
> As an example, this diagnostic from gcc.dg/format/diagnostic-ranges.c:
> 
> diagnostic-ranges.c:196:28: warning: field width specifier ‘*’ expects argument of type ‘int’, but argument 3 has type ‘long int’ [-Wformat=]
>    196 |   __builtin_sprintf (d, " %*ld ", foo + bar, foo);
>        |                           ~^~~    ~~~~~~~~~
>        |                            |          |
>        |                            int        long int
> 
> would become instead:
> 
> diagnostic-ranges.c:196:28: warning: field width specifier ‘*’ expects argument of type ‘int’, but argument 3 has type ‘long int’ [-Wformat=]
>    196 │   __builtin_sprintf (d, " %*ld ", foo + bar, foo);
>        │                           ═∧══    ════╤════
>        │                            │          │
>        │                            int        long int
> 
> Hopefully you are viewing this in a terminal that displays it properly :), in
> which case, hopefully you may find it to be an improvement?
> 
> Here is a more involved example from the analyzer:
> 
> setjmp-5.c: In function ‘outer’:
> setjmp-5.c:21:3: warning: ‘longjmp’ called after enclosing function of ‘setjmp’ has returned [-Wanalyzer-stale-setjmp-buffer]
>     21 |   longjmp (env, 42); /* { dg-warning "'longjmp' called after enclosing function of 'setjmp' has returned" } */
>        |   ^~~~~~~~~~~~~~~~~
>    ‘outer’: events 1-2
>      |
>      |   15 | void outer (void)
>      |      |      ^~~~~
>      |      |      |
>      |      |      (1) entry to ‘outer’
>      |......
>      |   19 |   inner ();
>      |      |   ~~~~~~~~
>      |      |   |
>      |      |   (2) calling ‘inner’ from ‘outer’
>      |
>      +--> ‘inner’: event 3
>             |
>             |   10 | static void inner (void)
>             |      |             ^~~~~
>             |      |             |
>             |      |             (3) entry to ‘inner’
>             |
>           ‘inner’: event 4
>             |
>             |   12 |   SETJMP (env);
>             |      |   ^~~~~~
>             |      |   |
>             |      |   (4) ‘setjmp’ called here
>             |
>      <------+
>      |
>    ‘outer’: events 5-6
>      |
>      |   19 |   inner ();
>      |      |   ^~~~~~~~
>      |      |   |
>      |      |   (5) returning to ‘outer’ from ‘inner’
>      |   20 |
>      |   21 |   longjmp (env, 42); /* { dg-warning "'longjmp' called after enclosing function of 'setjmp' has returned" } */
>      |      |   ~~~~~~~~~~~~~~~~~
>      |      |   |
>      |      |   (6) here
>      |
> 
> would become instead:
> 
> setjmp-5.c: In function ‘outer’:
> setjmp-5.c:21:3: warning: ‘longjmp’ called after enclosing function of ‘setjmp’ has returned [-Wanalyzer-stale-setjmp-buffer]
>     21 │   longjmp (env, 42); /* { dg-warning "'longjmp' called after enclosing function of 'setjmp' has returned" } */
>        │   ∧════════════════
>    ‘outer’: events 1-2
>      │
>      │   15 │ void outer (void)
>      │      │      ∧════
>      │      │      │
>      │      │      (1) entry to ‘outer’
>      │......
>      │   19 │   inner ();
>      │      │   ╤═══════
>      │      │   │
>      │      │   (2) calling ‘inner’ from ‘outer’
>      │
>      └──> ‘inner’: event 3
>             │
>             │   10 │ static void inner (void)
>             │      │             ∧════
>             │      │             │
>             │      │             (3) entry to ‘inner’
>             │
>           ‘inner’: event 4
>             │
>             │   12 │   SETJMP (env);
>             │      │   ∧═════
>             │      │   │
>             │      │   (4) ‘setjmp’ called here
>             │
>      ┌<─────┘
>      │
>    ‘outer’: events 5-6
>      │
>      │   19 │   inner ();
>      │      │   ∧═══════
>      │      │   │
>      │      │   (5) returning to ‘outer’ from ‘inner’
>      │   20 │
>      │   21 │   longjmp (env, 42); /* { dg-warning "'longjmp' called after enclosing function of 'setjmp' has returned" } */
>      │      │   ╤════════════════
>      │      │   │
>      │      │   (6) here
>      │
> 
> 
> Although probably premature, bootstrap and regtest were done on x86-64
> linux, all tests the same before/after and new tests passing:
> FAIL 96 96
> PASS 479090 479239
> UNSUPPORTED 11946 11946
> UNTESTED 194 194
> XFAIL 1839 1839
> XPASS 36 36
> 
> I tried to set this up as a general framework, at least, it is easy in one
> place to change the characters that are used for various contexts, so that
> if people like the general idea, but not some of the specifics, the patch is
> easily modified for that now or in the future. Thanks for any feedback!
> 
> -Lewis
>
David Malcolm July 23, 2020, 9:47 p.m. UTC | #2
On Thu, 2020-07-23 at 12:28 -0400, Lewis Hyatt via Gcc-patches wrote:
> Hello-
> 
> The attached patch is complete including docs, but I tagged as RFC
> because I am not sure if anyone will like it, or if the general
> reaction may
> be closer to recoiling in horror :). Would appreciate your thoughts,
> please...

Thanks for working on this.  I'm interested in other people's thoughts
on this.  Various comments inline throughout below.

Currently, if a UTF-8 locale is detected, GCC changes the quote
> characters
> it outputs in diagnostics to Unicode directional quotes. I feel like
> this is
> a nice touch, so I was wondering whether GCC shouldn't do more along
> these
> lines. This patch adds support for using Unicode line drawing
> characters and
> similar things when outputting diagnostics. There is a new option
> -fdiagnostics-unicode-drawing=[auto|never|always] to control it,
> which
> defaults to auto. "auto" will enable the feature under the same
> circumstances that Unicode quotes get output, namely when the locale
> is
> determined by gcc_init_libintl() to support UTF-8. (The new option
> does not
> affect Unicode quote characters, which currently are not configurable
> and
> are determined solely by the locale.)

FWIW when I first started experimenting with location ranges back in
2015 my first patches had box-drawing characters for underlines; you
can see this in some of the early examples here (and similar URLs from
around then):

https://dmalcolm.fedorapeople.org/gcc/2015-08-18/plugin.html
  (this also has a different approach for labeling ranges, which I
called "captions", putting them in a right margin)

https://dmalcolm.fedorapeople.org/gcc/2015-08-19/diagnostic-test-string-literals-1.html

https://dmalcolm.fedorapeople.org/gcc/2015-08-26/tree-expression-ranges.html

etc; the patch kits were:

https://gcc.gnu.org/legacy-ml/gcc-patches/2015-03/msg00837.html
https://gcc.gnu.org/pipermail/gcc-patches/2015-September/428036.html
https://gcc.gnu.org/legacy-ml/gcc-patches/2015-09/msg01696.html

In:
  https://gcc.gnu.org/legacy-ml/gcc-patches/2015-09/msg01700.html
I wrote:
> * Eliminated UTF-8/box-drawing and captions.  Captions were cute but
>   weren't "fully baked".  Without them, box-drawing isn't really
>   needed, and I think I prefer the ASCII look, with the actual
>   "caret" character, and '~' makes it easier to count characters
>   compared to a box-drawing line, in my terminal's font, at least.
>   Doing so greatly simplifies the new locus-printing code.

So I dropped the UTF-8 box drawing from that original kit for:
(a) simplicity (the original patch kit was huge in scope, covering a
bunch of ideas for diagnostics - ranges, labeling, fix-it hints,
spelling suggestions, so I wanted to reduce the scope to something
manageable)
(b) I found it easier to count characters with "~"


The thing I'm most nervous about with this patch is the potential for
introducing mojibake when people copy and paste GCC output.

For example, looking at:
https://gcc.gnu.org/legacy-ml/gcc-patches/2015-03/msg00837.html
I see mojibake where the unicode line-drawing characters in my email
are being displayed in the HTML mailing list archive via "&#xE2;" -
something has gone wrong with encoding somewhere between the copy&paste
from my terminal, the email, and the list archive.

That said, looking at your email in the archive here:
https://gcc.gnu.org/pipermail/gcc-patches/2020-July/550551.html
I don't see any mojibake.

What happens if GCC's stderr is piped into "less"?
What happens if GCC's stderr is saved in a build.log file, uploaded
somewhere, and then viewed?
etc.


> The elements implemented are:
> 
>     * Vertical lines, e.g. those indicating labels and those
> separating the
>       source lines from the line numbers, are changed to line drawing
>       characters.
> 
>     * The diagnostic paths output by the static analyzer make use of
> line
>       drawing characters to output smooth corners etc.
> 
>     * The squiggly underline ~~~~~ used to highlight source locations
> is
>       changed to a double underline ═════. The main reason for this
> is that
>       it enables a seamless "tee" character to connect the underline
> to a
>       label line if one exists.
> 
>     * Carets (^) are changed to a slightly different character (∧). I
> think
>       the new one is a little nicer looking, although probably not
> worth the
>       trouble on its own. I wanted to implement the support in this
> patch
>       beause carets are harder to change than the rest of the
> elements
>       (front ends have an interface to override them, which currently
>       Fortran makes use of), so I thought it worthwhile to get this
> logic in
>       place, so that it can easily be changed to a more superior
> character
>       in the future if one comes up. It would also be easy enough to
> leave
>       the Unicode support in place for carets, but keep the default
> set to
>       the plain one for now.

Some other ideas:

* fix-it hints

* maybe have a different character for separating the line numbers as
opposed to those for labels and for showing interprocedural paths.

> As an example, this diagnostic from gcc.dg/format/diagnostic-
> ranges.c:
> 
> diagnostic-ranges.c:196:28: warning: field width specifier ‘*’
> expects argument of type ‘int’, but argument 3 has type ‘long int’ [-
> Wformat=]
>   196 |   __builtin_sprintf (d, " %*ld ", foo + bar, foo);
>       |                           ~^~~    ~~~~~~~~~
>       |                            |          |
>       |                            int        long int
> 
> would become instead:
> 
> diagnostic-ranges.c:196:28: warning: field width specifier ‘*’
> expects argument of type ‘int’, but argument 3 has type ‘long int’ [-
> Wformat=]
>   196 │   __builtin_sprintf (d, " %*ld ", foo + bar, foo);
>       │                           ═∧══    ════╤════
>       │                            │          │
>       │                            int        long int
> 
> Hopefully you are viewing this in a terminal that displays it
> properly :), in
> which case, hopefully you may find it to be an improvement?

I wonder if you can upload colorized examples somewhere?

e.g. using bin/gcc-color-to-html.py from our website repository:
 https://gcc.gnu.org/git/?p=gcc-wwwdocs.git;a=blob;f=bin/gcc-color-to-html.py
or one of the various ansi2html conversion scripts e.g.
  http://www.pixelbeat.org/scripts/ansi2html.sh

> Here is a more involved example from the analyzer:
> 
> setjmp-5.c: In function ‘outer’:
> setjmp-5.c:21:3: warning: ‘longjmp’ called after enclosing function
> of ‘setjmp’ has returned [-Wanalyzer-stale-setjmp-buffer]
>    21 |   longjmp (env, 42); /* { dg-warning "'longjmp' called after
> enclosing function of 'setjmp' has returned" } */
>       |   ^~~~~~~~~~~~~~~~~
>   ‘outer’: events 1-2
>     |
>     |   15 | void outer (void)
>     |      |      ^~~~~
>     |      |      |
>     |      |      (1) entry to ‘outer’
>     |......
>     |   19 |   inner ();
>     |      |   ~~~~~~~~
>     |      |   |
>     |      |   (2) calling ‘inner’ from ‘outer’
>     |
>     +--> ‘inner’: event 3
>            |
>            |   10 | static void inner (void)
>            |      |             ^~~~~
>            |      |             |
>            |      |             (3) entry to ‘inner’
>            |
>          ‘inner’: event 4
>            |
>            |   12 |   SETJMP (env);
>            |      |   ^~~~~~
>            |      |   |
>            |      |   (4) ‘setjmp’ called here
>            |
>     <------+
>     |
>   ‘outer’: events 5-6
>     |
>     |   19 |   inner ();
>     |      |   ^~~~~~~~
>     |      |   |
>     |      |   (5) returning to ‘outer’ from ‘inner’
>     |   20 |
>     |   21 |   longjmp (env, 42); /* { dg-warning "'longjmp' called
> after enclosing function of 'setjmp' has returned" } */
>     |      |   ~~~~~~~~~~~~~~~~~
>     |      |   |
>     |      |   (6) here
>     |
> 
> would become instead:
> 
> setjmp-5.c: In function ‘outer’:
> setjmp-5.c:21:3: warning: ‘longjmp’ called after enclosing function
> of ‘setjmp’ has returned [-Wanalyzer-stale-setjmp-buffer]
>    21 │   longjmp (env, 42); /* { dg-warning "'longjmp' called after
> enclosing function of 'setjmp' has returned" } */
>       │   ∧════════════════
>   ‘outer’: events 1-2
>     │
>     │   15 │ void outer (void)
>     │      │      ∧════
>     │      │      │
>     │      │      (1) entry to ‘outer’
>     │......
>     │   19 │   inner ();

I wonder if there's a fancier way to express the gap in the lines if
Unicode is available?


>     │      │   ╤═══════
>     │      │   │
>     │      │   (2) calling ‘inner’ from ‘outer’
>     │
>     └──> ‘inner’: event 3
>            │
>            │   10 │ static void inner (void)
>            │      │             ∧════
>            │      │             │
>            │      │             (3) entry to ‘inner’
>            │
>          ‘inner’: event 4
>            │
>            │   12 │   SETJMP (env);
>            │      │   ∧═════
>            │      │   │
>            │      │   (4) ‘setjmp’ called here
>            │

Unrelated to this patch , but it would be nice if the analyzer inserted
an event at the function end showing the frame in "env" becoming
invalid, since that's what pertinent to the diagnostic.

>     ┌<─────┘
>     │
>   ‘outer’: events 5-6
>     │
>     │   19 │   inner ();
>     │      │   ∧═══════
>     │      │   │
>     │      │   (5) returning to ‘outer’ from ‘inner’
>     │   20 │
>     │   21 │   longjmp (env, 42); /* { dg-warning "'longjmp' called
> after enclosing function of 'setjmp' has returned" } */
>     │      │   ╤════════════════
>     │      │   │
>     │      │   (6) here
>     │

FWIW I experimented with using unicode circled number characters in
place of (1), (2), etc for events in diagnostic_paths but the results
looked bad in my terminal, so I stuck to the ASCII form above.

In my more adventurous moments I've been tempted to use background
colorization to show the stack pushes and pops in a flamegraph-style
way, but I suspect it would garish and be too "busy" visually.

> 
> Although probably premature, bootstrap and regtest were done on x86-
> 64
> linux, all tests the same before/after and new tests passing:
> FAIL 96 96
> PASS 479090 479239
> UNSUPPORTED 11946 11946
> UNTESTED 194 194
> XFAIL 1839 1839
> XPASS 36 36

I see the patch kit touches Fortran; was this with all frontends
enabled?  (though I guess I'm likewise being premature here)

> I tried to set this up as a general framework, at least, it is easy
> in one
> place to change the characters that are used for various contexts, so
> that
> if people like the general idea, but not some of the specifics, the
> patch is
> easily modified for that now or in the future. Thanks for any
> feedback!

Thanks again for the patch; let's see what others think.
Dave
Lewis Hyatt July 23, 2020, 11:03 p.m. UTC | #3
On Thu, Jul 23, 2020 at 05:47:28PM -0400, David Malcolm wrote:
> On Thu, 2020-07-23 at 12:28 -0400, Lewis Hyatt via Gcc-patches wrote:
> > Hello-
> > 
> > The attached patch is complete including docs, but I tagged as RFC
> > because I am not sure if anyone will like it, or if the general
> > reaction may
> > be closer to recoiling in horror :). Would appreciate your thoughts,
> > please...
> 
> Thanks for working on this.  I'm interested in other people's thoughts
> on this.  Various comments inline throughout below.
>

Thanks for the feedback! I made a few replies below as well.

> Currently, if a UTF-8 locale is detected, GCC changes the quote
> > characters
> > it outputs in diagnostics to Unicode directional quotes. I feel like
> > this is
> > a nice touch, so I was wondering whether GCC shouldn't do more along
> > these
> > lines. This patch adds support for using Unicode line drawing
> > characters and
> > similar things when outputting diagnostics. There is a new option
> > -fdiagnostics-unicode-drawing=[auto|never|always] to control it,
> > which
> > defaults to auto. "auto" will enable the feature under the same
> > circumstances that Unicode quotes get output, namely when the locale
> > is
> > determined by gcc_init_libintl() to support UTF-8. (The new option
> > does not
> > affect Unicode quote characters, which currently are not configurable
> > and
> > are determined solely by the locale.)
> 
> FWIW when I first started experimenting with location ranges back in
> 2015 my first patches had box-drawing characters for underlines; you
> can see this in some of the early examples here (and similar URLs from
> around then):
>
> https://dmalcolm.fedorapeople.org/gcc/2015-08-18/plugin.html
>   (this also has a different approach for labeling ranges, which I
> called "captions", putting them in a right margin)
> 
> https://dmalcolm.fedorapeople.org/gcc/2015-08-19/diagnostic-test-string-literals-1.html
> 
> https://dmalcolm.fedorapeople.org/gcc/2015-08-26/tree-expression-ranges.html
> 
> etc; the patch kits were:
> 
> https://gcc.gnu.org/legacy-ml/gcc-patches/2015-03/msg00837.html
> https://gcc.gnu.org/pipermail/gcc-patches/2015-September/428036.html
> https://gcc.gnu.org/legacy-ml/gcc-patches/2015-09/msg01696.html
> 
> In:
>   https://gcc.gnu.org/legacy-ml/gcc-patches/2015-09/msg01700.html
> I wrote:
> > * Eliminated UTF-8/box-drawing and captions.  Captions were cute but
> >   weren't "fully baked".  Without them, box-drawing isn't really
> >   needed, and I think I prefer the ASCII look, with the actual
> >   "caret" character, and '~' makes it easier to count characters
> >   compared to a box-drawing line, in my terminal's font, at least.
> >   Doing so greatly simplifies the new locus-printing code.
> 
> So I dropped the UTF-8 box drawing from that original kit for:
> (a) simplicity (the original patch kit was huge in scope, covering a
> bunch of ideas for diagnostics - ranges, labeling, fix-it hints,
> spelling suggestions, so I wanted to reduce the scope to something
> manageable)
> (b) I found it easier to count characters with "~"
>

Oh interesting, sorry I didn't realize this had already been considered. Well
perhaps it's a good time to revisit it anyway, if people find it appealing.

> 
> The thing I'm most nervous about with this patch is the potential for
> introducing mojibake when people copy and paste GCC output.
> 
> For example, looking at:
> https://gcc.gnu.org/legacy-ml/gcc-patches/2015-03/msg00837.html
> I see mojibake where the unicode line-drawing characters in my email
> are being displayed in the HTML mailing list archive via "&#xE2;" -
> something has gone wrong with encoding somewhere between the copy&paste
> from my terminal, the email, and the list archive.
> 
> That said, looking at your email in the archive here:
> https://gcc.gnu.org/pipermail/gcc-patches/2020-July/550551.html
> I don't see any mojibake.
> 
> What happens if GCC's stderr is piped into "less"?
> What happens if GCC's stderr is saved in a build.log file, uploaded
> somewhere, and then viewed?
> etc.
>

pipe to less should presumably be no problem, unless user goes out of their
way to use different locale settings there. Transporting the data is certainly
a potential area of difficulty, as is also interaction with IDEs and other
tools that want to parse the data, but such tools are at least already
handling the UTF-8 quote characters OK.

By the way, I was wondering separately what you think about adding an option
like -fplain-diagnostics or something, which would achieve basically the same
thing you get in the testsuite right now (-fno-diagnostics-show-caret
-fno-diagnostics-show-line-numbers -fdiagnostics-color=never
-fdiagnostics-urls=never) but would change as necessary whenever diagnostics
evolve. It seems rather involved currently to add a new option like
-fdiagnostics-unicode-drawing but keep the testsuite working, in addition to
adding to prune.exp and to the libstdc++.exp, you also need to update the
compat.exp so that it can figure out to pass the option only to sufficiently
new compilers. With -fplain-diagnostics, this could just be part of the code
change and the testsuite could stay the same; this may also make it easier on
IDE type utilities since they could rely on a more stable format for the
diagnostics, assuming they don't already use JSON format.

> 
> > The elements implemented are:
> > 
> >     * Vertical lines, e.g. those indicating labels and those
> > separating the
> >       source lines from the line numbers, are changed to line drawing
> >       characters.
> > 
> >     * The diagnostic paths output by the static analyzer make use of
> > line
> >       drawing characters to output smooth corners etc.
> > 
> >     * The squiggly underline ~~~~~ used to highlight source locations
> > is
> >       changed to a double underline ═════. The main reason for this
> > is that
> >       it enables a seamless "tee" character to connect the underline
> > to a
> >       label line if one exists.
> > 
> >     * Carets (^) are changed to a slightly different character (∧). I
> > think
> >       the new one is a little nicer looking, although probably not
> > worth the
> >       trouble on its own. I wanted to implement the support in this
> > patch
> >       beause carets are harder to change than the rest of the
> > elements
> >       (front ends have an interface to override them, which currently
> >       Fortran makes use of), so I thought it worthwhile to get this
> > logic in
> >       place, so that it can easily be changed to a more superior
> > character
> >       in the future if one comes up. It would also be easy enough to
> > leave
> >       the Unicode support in place for carets, but keep the default
> > set to
> >       the plain one for now.
> 
> Some other ideas:
> 
> * fix-it hints
> 
> * maybe have a different character for separating the line numbers as
> opposed to those for labels and for showing interprocedural paths.
>

Something like that would be easy to add, sure, perhaps a double vertical line
instead:

diagnostic-ranges.c:196:28: warning: field width specifier ‘*’ expects argument of type ‘int’, but argument 3 has type ‘long int’ [-Wformat=]
  196 ║   __builtin_sprintf (d, " %*ld ", foo + bar, foo);
      ║                           ═∧══    ════╤════
      ║                            │          │
      ║                            int        long int

> > As an example, this diagnostic from gcc.dg/format/diagnostic-
> > ranges.c:
> > 
> > diagnostic-ranges.c:196:28: warning: field width specifier ‘*’
> > expects argument of type ‘int’, but argument 3 has type ‘long int’ [-
> > Wformat=]
> >   196 |   __builtin_sprintf (d, " %*ld ", foo + bar, foo);
> >       |                           ~^~~    ~~~~~~~~~
> >       |                            |          |
> >       |                            int        long int
> > 
> > would become instead:
> > 
> > diagnostic-ranges.c:196:28: warning: field width specifier ‘*’
> > expects argument of type ‘int’, but argument 3 has type ‘long int’ [-
> > Wformat=]
> >   196 │   __builtin_sprintf (d, " %*ld ", foo + bar, foo);
> >       │                           ═∧══    ════╤════
> >       │                            │          │
> >       │                            int        long int
> > 
> > Hopefully you are viewing this in a terminal that displays it
> > properly :), in
> > which case, hopefully you may find it to be an improvement?
> 
> I wonder if you can upload colorized examples somewhere?
> 
> e.g. using bin/gcc-color-to-html.py from our website repository:
>  https://gcc.gnu.org/git/?p=gcc-wwwdocs.git;a=blob;f=bin/gcc-color-to-html.py
> or one of the various ansi2html conversion scripts e.g.
>   http://www.pixelbeat.org/scripts/ansi2html.sh
>

Here are some .PNG screenshots, which I hope is OK for this purpose?
https://drive.google.com/file/d/1MN-_eok_gwk_hl5C8DgvQGHPCpHAn1RH/view?usp=sharing
https://drive.google.com/file/d/1siR-vh1osCvT8VB9uMrVuaS-pznswdBs/view?usp=sharing
https://drive.google.com/file/d/1xgq0F-zhXpwOi3zJyU6EZEaQL079DJD5/view?usp=sharing

...

> > 
> > Although probably premature, bootstrap and regtest were done on x86-
> > 64
> > linux, all tests the same before/after and new tests passing:
> > FAIL 96 96
> > PASS 479090 479239
> > UNSUPPORTED 11946 11946
> > UNTESTED 194 194
> > XFAIL 1839 1839
> > XPASS 36 36
> 
> I see the patch kit touches Fortran; was this with all frontends
> enabled?  (though I guess I'm likewise being premature here)
>

Yes, this was all languages except for jit.

> > I tried to set this up as a general framework, at least, it is easy
> > in one
> > place to change the characters that are used for various contexts, so
> > that
> > if people like the general idea, but not some of the specifics, the
> > patch is
> > easily modified for that now or in the future. Thanks for any
> > feedback!
> 
> Thanks again for the patch; let's see what others think.
> Dave
>

Thanks for the feedback. If there is interest in something like this, I'm
happy to implement any suggestions.

-Lewis
Richard Sandiford July 24, 2020, 1:49 p.m. UTC | #4
Lewis Hyatt via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> On Thu, Jul 23, 2020 at 05:47:28PM -0400, David Malcolm wrote:
>> On Thu, 2020-07-23 at 12:28 -0400, Lewis Hyatt via Gcc-patches wrote:
>> > Hello-
>> > 
>> > The attached patch is complete including docs, but I tagged as RFC
>> > because I am not sure if anyone will like it, or if the general
>> > reaction may
>> > be closer to recoiling in horror :). Would appreciate your thoughts,
>> > please...
>> 
>> Thanks for working on this.  I'm interested in other people's thoughts
>> on this.  Various comments inline throughout below.

+1 in favour FWIW.

> […]
> By the way, I was wondering separately what you think about adding an option
> like -fplain-diagnostics or something, which would achieve basically the same
> thing you get in the testsuite right now (-fno-diagnostics-show-caret
> -fno-diagnostics-show-line-numbers -fdiagnostics-color=never
> -fdiagnostics-urls=never) but would change as necessary whenever diagnostics
> evolve. It seems rather involved currently to add a new option like
> -fdiagnostics-unicode-drawing but keep the testsuite working, in addition to
> adding to prune.exp and to the libstdc++.exp, you also need to update the
> compat.exp so that it can figure out to pass the option only to sufficiently
> new compilers. With -fplain-diagnostics, this could just be part of the code
> change and the testsuite could stay the same; this may also make it easier on
> IDE type utilities since they could rely on a more stable format for the
> diagnostics, assuming they don't already use JSON format.

Also agree that this would be a nice feature to have.  I guess it would
act as an alias for all the -fno-* options at the point that it occurs
on the command line, so that it would be possible to use:

  -fplain-diagnostics -fthe-diagnostic-feature-i-like

> […]
>> * maybe have a different character for separating the line numbers as
>> opposed to those for labels and for showing interprocedural paths.
>>
>
> Something like that would be easy to add, sure, perhaps a double vertical line
> instead:
>
> diagnostic-ranges.c:196:28: warning: field width specifier ‘*’ expects argument of type ‘int’, but argument 3 has type ‘long int’ [-Wformat=]
>   196 ║   __builtin_sprintf (d, " %*ld ", foo + bar, foo);
>       ║                           ═∧══    ════╤════
>       ║                            │          │
>       ║                            int        long int

Guess it's just personal taste, but that seems a bit too busy to me.
Most diagnostics don't have interprocedural paths, and on its own,
there doesn't seem to be a specific reason to have a double line
on the left.

Thanks,
Richard
Lewis Hyatt Aug. 5, 2020, 8 p.m. UTC | #5
On Fri, Jul 24, 2020 at 02:49:36PM +0100, Richard Sandiford wrote:
> Lewis Hyatt via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> > On Thu, Jul 23, 2020 at 05:47:28PM -0400, David Malcolm wrote:
> >> On Thu, 2020-07-23 at 12:28 -0400, Lewis Hyatt via Gcc-patches wrote:
> >> > Hello-
> >> > 
> >> > The attached patch is complete including docs, but I tagged as RFC
> >> > because I am not sure if anyone will like it, or if the general
> >> > reaction may
> >> > be closer to recoiling in horror :). Would appreciate your thoughts,
> >> > please...
> >> 
> >> Thanks for working on this.  I'm interested in other people's thoughts
> >> on this.  Various comments inline throughout below.
> 
> +1 in favour FWIW.
>

Thanks for the feedback!

> > […]
> > By the way, I was wondering separately what you think about adding an option
> > like -fplain-diagnostics or something, which would achieve basically the same
> > thing you get in the testsuite right now (-fno-diagnostics-show-caret
> > -fno-diagnostics-show-line-numbers -fdiagnostics-color=never
> > -fdiagnostics-urls=never) but would change as necessary whenever diagnostics
> > evolve. It seems rather involved currently to add a new option like
> > -fdiagnostics-unicode-drawing but keep the testsuite working, in addition to
> > adding to prune.exp and to the libstdc++.exp, you also need to update the
> > compat.exp so that it can figure out to pass the option only to sufficiently
> > new compilers. With -fplain-diagnostics, this could just be part of the code
> > change and the testsuite could stay the same; this may also make it easier on
> > IDE type utilities since they could rely on a more stable format for the
> > diagnostics, assuming they don't already use JSON format.
> 
> Also agree that this would be a nice feature to have.  I guess it would
> act as an alias for all the -fno-* options at the point that it occurs
> on the command line, so that it would be possible to use:
> 
>   -fplain-diagnostics -fthe-diagnostic-feature-i-like
>

Yes, that's what I was thinking. Currently the option -fdiagnostics-color
requires special handling because it applies even before it appears in the
command line (so that, say, a wrong option which appears earlier can still get a
colorized diagnostic). I was thinking the way to go would be to expand
-fplain-diagnostics into its constituents around the same place that this
special handling is done. I'll go ahead and submit a patch separately for this
sometime soon, in case it is found useful.

> > […]
> >> * maybe have a different character for separating the line numbers as
> >> opposed to those for labels and for showing interprocedural paths.
> >>
> >
> > Something like that would be easy to add, sure, perhaps a double vertical line
> > instead:
> >
> > diagnostic-ranges.c:196:28: warning: field width specifier ‘*’ expects argument of type ‘int’, but argument 3 has type ‘long int’ [-Wformat=]
> >   196 ║   __builtin_sprintf (d, " %*ld ", foo + bar, foo);
> >       ║                           ═∧══    ════╤════
> >       ║                            │          │
> >       ║                            int        long int
> 
> Guess it's just personal taste, but that seems a bit too busy to me.
> Most diagnostics don't have interprocedural paths, and on its own,
> there doesn't seem to be a specific reason to have a double line
> on the left.

I tend to agree with this assessment as well.

-Lewis
Lewis Hyatt Nov. 11, 2020, 9:47 p.m. UTC | #6
On Thu, Jul 23, 2020 at 05:47:28PM -0400, David Malcolm wrote:
> On Thu, 2020-07-23 at 12:28 -0400, Lewis Hyatt via Gcc-patches wrote:
> > Hello-
> > 
> > The attached patch is complete including docs, but I tagged as RFC
> > because I am not sure if anyone will like it, or if the general
> > reaction may
> > be closer to recoiling in horror :). Would appreciate your thoughts,
> > please...
> 
> Thanks for working on this.  I'm interested in other people's thoughts
> on this.  Various comments inline throughout below.
> 
> Currently, if a UTF-8 locale is detected, GCC changes the quote
> > characters
> > it outputs in diagnostics to Unicode directional quotes. I feel like
> > this is
> > a nice touch, so I was wondering whether GCC shouldn't do more along
> > these
> > lines. This patch adds support for using Unicode line drawing
> > characters and
> > similar things when outputting diagnostics. There is a new option
> > -fdiagnostics-unicode-drawing=[auto|never|always] to control it,
> > which
> > defaults to auto. "auto" will enable the feature under the same
> > circumstances that Unicode quotes get output, namely when the locale
> > is
> > determined by gcc_init_libintl() to support UTF-8. (The new option
> > does not
> > affect Unicode quote characters, which currently are not configurable
> > and
> > are determined solely by the locale.)
> 
> FWIW when I first started experimenting with location ranges back in
> 2015 my first patches had box-drawing characters for underlines; you
> can see this in some of the early examples here (and similar URLs from
> around then):
> 
> https://dmalcolm.fedorapeople.org/gcc/2015-08-18/plugin.html
>   (this also has a different approach for labeling ranges, which I
> called "captions", putting them in a right margin)
> 
> https://dmalcolm.fedorapeople.org/gcc/2015-08-19/diagnostic-test-string-literals-1.html
> 
> https://dmalcolm.fedorapeople.org/gcc/2015-08-26/tree-expression-ranges.html
> 
> etc; the patch kits were:
> 
> https://gcc.gnu.org/legacy-ml/gcc-patches/2015-03/msg00837.html
> https://gcc.gnu.org/pipermail/gcc-patches/2015-September/428036.html
> https://gcc.gnu.org/legacy-ml/gcc-patches/2015-09/msg01696.html
> 
> In:
>   https://gcc.gnu.org/legacy-ml/gcc-patches/2015-09/msg01700.html
> I wrote:
> > * Eliminated UTF-8/box-drawing and captions.  Captions were cute but
> >   weren't "fully baked".  Without them, box-drawing isn't really
> >   needed, and I think I prefer the ASCII look, with the actual
> >   "caret" character, and '~' makes it easier to count characters
> >   compared to a box-drawing line, in my terminal's font, at least.
> >   Doing so greatly simplifies the new locus-printing code.
> 
> So I dropped the UTF-8 box drawing from that original kit for:
> (a) simplicity (the original patch kit was huge in scope, covering a
> bunch of ideas for diagnostics - ranges, labeling, fix-it hints,
> spelling suggestions, so I wanted to reduce the scope to something
> manageable)
> (b) I found it easier to count characters with "~"
> 
> 
> The thing I'm most nervous about with this patch is the potential for
> introducing mojibake when people copy and paste GCC output.
> 
> For example, looking at:
> https://gcc.gnu.org/legacy-ml/gcc-patches/2015-03/msg00837.html
> I see mojibake where the unicode line-drawing characters in my email
> are being displayed in the HTML mailing list archive via "&#xE2;" -
> something has gone wrong with encoding somewhere between the copy&paste
> from my terminal, the email, and the list archive.
> 
> That said, looking at your email in the archive here:
> https://gcc.gnu.org/pipermail/gcc-patches/2020-July/550551.html
> I don't see any mojibake.
> 
> What happens if GCC's stderr is piped into "less"?
> What happens if GCC's stderr is saved in a build.log file, uploaded
> somewhere, and then viewed?
> etc.
> 
> 
> > The elements implemented are:
> > 
> >     * Vertical lines, e.g. those indicating labels and those
> > separating the
> >       source lines from the line numbers, are changed to line drawing
> >       characters.
> > 
> >     * The diagnostic paths output by the static analyzer make use of
> > line
> >       drawing characters to output smooth corners etc.
> > 
> >     * The squiggly underline ~~~~~ used to highlight source locations
> > is
> >       changed to a double underline ═════. The main reason for this
> > is that
> >       it enables a seamless "tee" character to connect the underline
> > to a
> >       label line if one exists.
> > 
> >     * Carets (^) are changed to a slightly different character (∧). I
> > think
> >       the new one is a little nicer looking, although probably not
> > worth the
> >       trouble on its own. I wanted to implement the support in this
> > patch
> >       beause carets are harder to change than the rest of the
> > elements
> >       (front ends have an interface to override them, which currently
> >       Fortran makes use of), so I thought it worthwhile to get this
> > logic in
> >       place, so that it can easily be changed to a more superior
> > character
> >       in the future if one comes up. It would also be easy enough to
> > leave
> >       the Unicode support in place for carets, but keep the default
> > set to
> >       the plain one for now.
> 
> Some other ideas:
> 
> * fix-it hints
> 
> * maybe have a different character for separating the line numbers as
> opposed to those for labels and for showing interprocedural paths.
> 
> > As an example, this diagnostic from gcc.dg/format/diagnostic-
> > ranges.c:
> > 
> > diagnostic-ranges.c:196:28: warning: field width specifier ‘*’
> > expects argument of type ‘int’, but argument 3 has type ‘long int’ [-
> > Wformat=]
> >   196 |   __builtin_sprintf (d, " %*ld ", foo + bar, foo);
> >       |                           ~^~~    ~~~~~~~~~
> >       |                            |          |
> >       |                            int        long int
> > 
> > would become instead:
> > 
> > diagnostic-ranges.c:196:28: warning: field width specifier ‘*’
> > expects argument of type ‘int’, but argument 3 has type ‘long int’ [-
> > Wformat=]
> >   196 │   __builtin_sprintf (d, " %*ld ", foo + bar, foo);
> >       │                           ═∧══    ════╤════
> >       │                            │          │
> >       │                            int        long int
> > 
> > Hopefully you are viewing this in a terminal that displays it
> > properly :), in
> > which case, hopefully you may find it to be an improvement?
> 
> I wonder if you can upload colorized examples somewhere?
> 
> e.g. using bin/gcc-color-to-html.py from our website repository:
>  https://gcc.gnu.org/git/?p=gcc-wwwdocs.git;a=blob;f=bin/gcc-color-to-html.py
> or one of the various ansi2html conversion scripts e.g.
>   http://www.pixelbeat.org/scripts/ansi2html.sh
> 
> > Here is a more involved example from the analyzer:
> > 
> > setjmp-5.c: In function ‘outer’:
> > setjmp-5.c:21:3: warning: ‘longjmp’ called after enclosing function
> > of ‘setjmp’ has returned [-Wanalyzer-stale-setjmp-buffer]
> >    21 |   longjmp (env, 42); /* { dg-warning "'longjmp' called after
> > enclosing function of 'setjmp' has returned" } */
> >       |   ^~~~~~~~~~~~~~~~~
> >   ‘outer’: events 1-2
> >     |
> >     |   15 | void outer (void)
> >     |      |      ^~~~~
> >     |      |      |
> >     |      |      (1) entry to ‘outer’
> >     |......
> >     |   19 |   inner ();
> >     |      |   ~~~~~~~~
> >     |      |   |
> >     |      |   (2) calling ‘inner’ from ‘outer’
> >     |
> >     +--> ‘inner’: event 3
> >            |
> >            |   10 | static void inner (void)
> >            |      |             ^~~~~
> >            |      |             |
> >            |      |             (3) entry to ‘inner’
> >            |
> >          ‘inner’: event 4
> >            |
> >            |   12 |   SETJMP (env);
> >            |      |   ^~~~~~
> >            |      |   |
> >            |      |   (4) ‘setjmp’ called here
> >            |
> >     <------+
> >     |
> >   ‘outer’: events 5-6
> >     |
> >     |   19 |   inner ();
> >     |      |   ^~~~~~~~
> >     |      |   |
> >     |      |   (5) returning to ‘outer’ from ‘inner’
> >     |   20 |
> >     |   21 |   longjmp (env, 42); /* { dg-warning "'longjmp' called
> > after enclosing function of 'setjmp' has returned" } */
> >     |      |   ~~~~~~~~~~~~~~~~~
> >     |      |   |
> >     |      |   (6) here
> >     |
> > 
> > would become instead:
> > 
> > setjmp-5.c: In function ‘outer’:
> > setjmp-5.c:21:3: warning: ‘longjmp’ called after enclosing function
> > of ‘setjmp’ has returned [-Wanalyzer-stale-setjmp-buffer]
> >    21 │   longjmp (env, 42); /* { dg-warning "'longjmp' called after
> > enclosing function of 'setjmp' has returned" } */
> >       │   ∧════════════════
> >   ‘outer’: events 1-2
> >     │
> >     │   15 │ void outer (void)
> >     │      │      ∧════
> >     │      │      │
> >     │      │      (1) entry to ‘outer’
> >     │......
> >     │   19 │   inner ();
> 
> I wonder if there's a fancier way to express the gap in the lines if
> Unicode is available?
> 
> 
> >     │      │   ╤═══════
> >     │      │   │
> >     │      │   (2) calling ‘inner’ from ‘outer’
> >     │
> >     └──> ‘inner’: event 3
> >            │
> >            │   10 │ static void inner (void)
> >            │      │             ∧════
> >            │      │             │
> >            │      │             (3) entry to ‘inner’
> >            │
> >          ‘inner’: event 4
> >            │
> >            │   12 │   SETJMP (env);
> >            │      │   ∧═════
> >            │      │   │
> >            │      │   (4) ‘setjmp’ called here
> >            │
> 
> Unrelated to this patch , but it would be nice if the analyzer inserted
> an event at the function end showing the frame in "env" becoming
> invalid, since that's what pertinent to the diagnostic.
> 
> >     ┌<─────┘
> >     │
> >   ‘outer’: events 5-6
> >     │
> >     │   19 │   inner ();
> >     │      │   ∧═══════
> >     │      │   │
> >     │      │   (5) returning to ‘outer’ from ‘inner’
> >     │   20 │
> >     │   21 │   longjmp (env, 42); /* { dg-warning "'longjmp' called
> > after enclosing function of 'setjmp' has returned" } */
> >     │      │   ╤════════════════
> >     │      │   │
> >     │      │   (6) here
> >     │
> 
> FWIW I experimented with using unicode circled number characters in
> place of (1), (2), etc for events in diagnostic_paths but the results
> looked bad in my terminal, so I stuck to the ASCII form above.
> 
> In my more adventurous moments I've been tempted to use background
> colorization to show the stack pushes and pops in a flamegraph-style
> way, but I suspect it would garish and be too "busy" visually.
> 
> > 
> > Although probably premature, bootstrap and regtest were done on x86-
> > 64
> > linux, all tests the same before/after and new tests passing:
> > FAIL 96 96
> > PASS 479090 479239
> > UNSUPPORTED 11946 11946
> > UNTESTED 194 194
> > XFAIL 1839 1839
> > XPASS 36 36
> 
> I see the patch kit touches Fortran; was this with all frontends
> enabled?  (though I guess I'm likewise being premature here)
> 
> > I tried to set this up as a general framework, at least, it is easy
> > in one
> > place to change the characters that are used for various contexts, so
> > that
> > if people like the general idea, but not some of the specifics, the
> > patch is
> > easily modified for that now or in the future. Thanks for any
> > feedback!
> 
> Thanks again for the patch; let's see what others think.
> Dave
>

Hello-

This patch to make use of Unicode drawing characters in diagnostics
doesn't seem to have generated much interest; looks like there was one
vote in favor. I thought I might bring it up one more time since I still
think it is a potential improvement... The patch required some rebasing on
top of recent commits so I have attached an updated version here. I am
happy to work on it if you think it's worthwhile, also happy to drop it
too. Also, David, I wanted to mention that this patch conflicts a fair
amount with your recently sent HTML output patch. If it simplifies
things, and you are interested in my patch, I can wait for the HTML one to
be applied and then send this one relative to that. Thanks for taking a
look!

-Lewis
From: Lewis Hyatt <lhyatt@gmail.com>
Date: Wed, 11 Nov 2020 16:35:38 -0500
Subject: [PATCH] diagnostics: Add support for Unicode drawing characters

Adds the new option -fdiagnostics-unicode-drawing, on by default if a
UTF-8 local is detected, which modifies diagnostics to use extended Unicode
characters, such as line-drawing characters.

gcc/ChangeLog:

	* common.opt: Add new option -fdiagnostics-unicode-drawing.
	* diagnostic-show-locus.c (struct point_state): Add new member.
	(layout::print_source_line): Support Unicode drawing feature.
	(layout::start_annotation_line): Likewise.
	(layout::print_annotation_line): Likewise.
	(layout::print_any_labels): Likewise.
	(layout::print_trailing_fixits): Likewise.
	(layout::get_state_at_point): Likewise.
	(test_one_liner_multiple_carets_and_ranges): Likewise.
	(test_one_liner_multiple_carets_and_ranges_utf8): Likewise.
	* diagnostic.c (diagnostic_drawing_init): New function.
	(diagnostic_initialize): Call the new function.
	* diagnostic.h (struct diagnostic_context): Support Unicode
	drawing feature.
	(enum diagnostics_drawing_rule): New enum for the new option.
	(diagnostic_drawing_init): Declare.
	* doc/invoke.texi: Document the new option.
	* opts-common.c (decode_cmdline_options_to_array): Add the new option
	to -fdiagnostics-plain-output handling.
	* opts.c (common_handle_option): Call diagnostic_drawing_init() to
	support the new option.
	* selftest-diagnostic.c
	(test_diagnostic_context::test_diagnostic_context): Disable
	Unicode drawing in selftests that use test_diagnostic_context.
	* tree-diagnostic-path.cc (path_summary::print): Support Unicode
	drawing feature.

gcc/fortran/ChangeLog:

	* error.c (gfc_diagnostics_init): Adapt custom carets, as they
	need to be strings rather than chars now.
	(gfc_diagnostics_finish): Likewise.

gcc/testsuite/ChangeLog:

	* gcc.dg/plugin/diagnostic_plugin_test_show_locus.c: Adapt custom
	carets, as they need to be strings rather than chars now.
	* gcc.dg/analyzer/setjmp-5-utf8.c: New test.
	* gcc.dg/format/diagnostic-ranges-utf8.c: New test.

diff --git a/gcc/common.opt b/gcc/common.opt
index 7d0e0d9c88a..346001f2bde 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1390,6 +1390,26 @@ ftabstop=
 Common Joined RejectNegative UInteger
 -ftabstop=<number>      Distance between tab stops for column reporting.
 
+fdiagnostics-unicode-drawing
+Common Alias(fdiagnostics-unicode-drawing=,always,never)
+;
+
+fdiagnostics-unicode-drawing=
+Common Joined RejectNegative Enum(diagnostics_unicode_drawing)
+-fdiagnostics-unicode-drawing=[never|always|auto]	Use Unicode drawing characters in diagnostics.
+
+Enum
+Name(diagnostics_unicode_drawing) Type(int)
+
+EnumValue
+Enum(diagnostics_unicode_drawing) String(never) Value(DIAGNOSTICS_UNICODE_DRAWING_NO)
+
+EnumValue
+Enum(diagnostics_unicode_drawing) String(always) Value(DIAGNOSTICS_UNICODE_DRAWING_YES)
+
+EnumValue
+Enum(diagnostics_unicode_drawing) String(auto) Value(DIAGNOSTICS_UNICODE_DRAWING_AUTO)
+
 Enum
 Name(diagnostic_path_format) Type(int)
 
diff --git a/gcc/diagnostic-show-locus.c b/gcc/diagnostic-show-locus.c
index da3c5b6a92d..a945307a71c 100644
--- a/gcc/diagnostic-show-locus.c
+++ b/gcc/diagnostic-show-locus.c
@@ -62,6 +62,7 @@ struct point_state
 {
   int range_idx;
   bool draw_caret_p;
+  bool has_label_p;
 };
 
 /* A class to inject colorization codes when printing the diagnostic locus.
@@ -1467,7 +1468,7 @@ layout::print_source_line (linenum_type row, const char *line, int line_bytes)
       int width = num_digits (row);
       for (int i = 0; i < m_linenum_width - width; i++)
 	pp_space (m_pp);
-      pp_printf (m_pp, "%i | ", row);
+      pp_printf (m_pp, "%i %s ", row, m_context->drawing.vertical);
     }
   else
     pp_space (m_pp);
@@ -1596,7 +1597,8 @@ layout::start_annotation_line (char margin_char) const
 	pp_space (m_pp);
       for (; i < m_linenum_width; i++)
 	pp_character (m_pp, margin_char);
-      pp_string (m_pp, " |");
+      pp_space (m_pp);
+      pp_string (m_pp, m_context->drawing.vertical);
     }
 }
 
@@ -1628,15 +1630,21 @@ layout::print_annotation_line (linenum_type row, const line_bounds lbounds)
 	  if (state.draw_caret_p)
 	    {
 	      /* Draw the caret.  */
-	      char caret_char;
+	      const char *caret_char;
 	      if (state.range_idx < rich_location::STATICALLY_ALLOCATED_RANGES)
 		caret_char = m_context->caret_chars[state.range_idx];
 	      else
-		caret_char = '^';
-	      pp_character (m_pp, caret_char);
+		caret_char = m_context->drawing.default_caret;
+	      pp_string (m_pp, caret_char);
 	    }
 	  else
-	    pp_character (m_pp, '~');
+	    {
+	      const char *const underline
+		= state.has_label_p
+		? m_context->drawing.tee_down2
+		: m_context->drawing.horizontal2;
+	      pp_string (m_pp, underline);
+	    }
 	}
       else
 	{
@@ -1821,7 +1829,7 @@ layout::print_any_labels (linenum_type row)
 		gcc_assert (column <= label->m_column);
 		move_to_column (&column, label->m_column, true);
 		m_colorizer.set_range (label->m_state_idx);
-		pp_character (m_pp, '|');
+		pp_string (m_pp, m_context->drawing.vertical);
 		m_colorizer.set_normal_text ();
 		column++;
 	      }
@@ -2343,7 +2351,7 @@ layout::print_trailing_fixits (linenum_type row)
 	      move_to_column (&column, start_column, true);
 	      m_colorizer.set_fixit_delete ();
 	      for (; column <= finish_column; column++)
-		pp_character (m_pp, '-');
+		pp_string (m_pp, m_context->drawing.horizontal1);
 	      m_colorizer.set_normal_text ();
 	    }
 	  /* Print the replacement text.  REPLACE also covers
@@ -2400,12 +2408,19 @@ layout::get_state_at_point (/* Inputs.  */
 	{
 	  out_state->range_idx = i;
 
-	  /* Are we at the range's caret?  is it visible? */
+	  /* If we are at the range's caret, check if it is visible and check
+	     if a label line will be output directly below this point, since
+	     that enables unicode drawing to use a nicer "tee" character to
+	     mesh seamlessly with the label line.  */
 	  out_state->draw_caret_p = false;
-	  if (range->m_range_display_kind == SHOW_RANGE_WITH_CARET
-	      && row == range->m_caret.m_line
+	  out_state->has_label_p = false;
+	  if (row == range->m_caret.m_line
 	      && column == range->m_caret.m_columns[col_unit])
-	    out_state->draw_caret_p = true;
+	    {
+	      out_state->draw_caret_p
+		= (range->m_range_display_kind == SHOW_RANGE_WITH_CARET);
+	      out_state->has_label_p = (range->m_label != NULL);
+	    }
 
 	  /* Within a multiline range, don't display any underline
 	     in any leading or trailing whitespace on a line.
@@ -2976,19 +2991,19 @@ test_one_liner_multiple_carets_and_ranges ()
     = make_location (linemap_position_for_column (line_table, 2),
 		     linemap_position_for_column (line_table, 1),
 		     linemap_position_for_column (line_table, 3));
-  dc.caret_chars[0] = 'A';
+  dc.caret_chars[0] = "A";
 
   location_t bar
     = make_location (linemap_position_for_column (line_table, 8),
 		     linemap_position_for_column (line_table, 7),
 		     linemap_position_for_column (line_table, 9));
-  dc.caret_chars[1] = 'B';
+  dc.caret_chars[1] = "B";
 
   location_t field
     = make_location (linemap_position_for_column (line_table, 13),
 		     linemap_position_for_column (line_table, 11),
 		     linemap_position_for_column (line_table, 15));
-  dc.caret_chars[2] = 'C';
+  dc.caret_chars[2] = "C";
 
   rich_location richloc (line_table, foo);
   richloc.add_range (bar, SHOW_RANGE_WITH_CARET);
@@ -3584,19 +3599,19 @@ test_one_liner_multiple_carets_and_ranges_utf8 ()
     = make_location (linemap_position_for_column (line_table, 7),
 		     linemap_position_for_column (line_table, 1),
 		     linemap_position_for_column (line_table, 8));
-  dc.caret_chars[0] = 'A';
+  dc.caret_chars[0] = "A";
 
   location_t bar
     = make_location (linemap_position_for_column (line_table, 16),
 		     linemap_position_for_column (line_table, 12),
 		     linemap_position_for_column (line_table, 17));
-  dc.caret_chars[1] = 'B';
+  dc.caret_chars[1] = "B";
 
   location_t field
     = make_location (linemap_position_for_column (line_table, 26),
 		     linemap_position_for_column (line_table, 19),
 		     linemap_position_for_column (line_table, 30));
-  dc.caret_chars[2] = 'C';
+  dc.caret_chars[2] = "C";
   rich_location richloc (line_table, foo);
   richloc.add_range (bar, SHOW_RANGE_WITH_CARET);
   richloc.add_range (field, SHOW_RANGE_WITH_CARET);
diff --git a/gcc/diagnostic.c b/gcc/diagnostic.c
index 1b6c9845892..131b7220dcb 100644
--- a/gcc/diagnostic.c
+++ b/gcc/diagnostic.c
@@ -167,6 +167,14 @@ default_diagnostic_final_cb (diagnostic_context *context)
     }
 }
 
+/* Store the carets in an unnamed namespace so we can verify whether
+   or not a front-end has overriden them.  */
+namespace {
+  const char plain_caret[] = "^";
+  /* U+2227 = Logical And */
+  const char fancy_caret[] = "\xE2\x88\xA7";
+}
+
 /* Initialize the diagnostic message outputting machinery.  */
 void
 diagnostic_initialize (diagnostic_context *context, int n_opts)
@@ -187,7 +195,7 @@ diagnostic_initialize (diagnostic_context *context, int n_opts)
   context->show_caret = false;
   diagnostic_set_caret_max_width (context, pp_line_cutoff (context->printer));
   for (i = 0; i < rich_location::STATICALLY_ALLOCATED_RANGES; i++)
-    context->caret_chars[i] = '^';
+    context->caret_chars[i] = plain_caret;
   context->show_cwe = false;
   context->path_format = DPF_NONE;
   context->show_path_depths = false;
@@ -229,6 +237,7 @@ diagnostic_initialize (diagnostic_context *context, int n_opts)
   context->begin_group_cb = NULL;
   context->end_group_cb = NULL;
   context->final_cb = default_diagnostic_final_cb;
+  diagnostic_drawing_init (context, DIAGNOSTICS_UNICODE_DRAWING_AUTO);
 }
 
 /* Maybe initialize the color support. We require clients to do this
@@ -286,6 +295,83 @@ diagnostic_urls_init (diagnostic_context *context, int value /*= -1 */)
     = determine_url_format ((diagnostic_url_rule_t) value);
 }
 
+/* Initialize unicode drawing support in CONTEXT if requested, or, in auto
+   mode, if the locale supports it.  */
+void
+diagnostic_drawing_init (diagnostic_context *context,
+			 enum diagnostics_drawing_rule rule)
+{
+  switch (rule)
+    {
+    case DIAGNOSTICS_UNICODE_DRAWING_NO:
+      context->drawing.enabled = false;
+      break;
+    case DIAGNOSTICS_UNICODE_DRAWING_YES:
+      context->drawing.enabled = true;
+      break;
+    case DIAGNOSTICS_UNICODE_DRAWING_AUTO:
+      context->drawing.enabled = locale_utf8;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  if (context->drawing.enabled)
+    {
+      /* U+2500 = Box Drawings Light Horizontal */
+      context->drawing.horizontal1 = "\xE2\x94\x80";
+
+      /* U+2550 = Box Drawings Double Horizontal */
+      context->drawing.horizontal2 = "\xE2\x95\x90";
+
+      /* U+252C = Box Drawings Light Down and Horizontal */
+      context->drawing.tee_down1 = "\xE2\x94\xAC";
+
+      /* U+2564 = Box Drawings Down Single and Horizontal Double */
+      context->drawing.tee_down2 = "\xE2\x95\xA4";
+
+      /* U+2502 = Box Drawings Light Vertical */
+      context->drawing.vertical = "\xE2\x94\x82";
+
+      /* U+250C = Box Drawings Light Down and Right */
+      context->drawing.corner_nw = "\xE2\x94\x8C";
+
+      /* U+2510 = Box Drawings Light Down and Left */
+      context->drawing.corner_ne = "\xE2\x94\x90";
+
+      /* U+2518 = Box Drawings Light Up and Left */
+      context->drawing.corner_se = "\xE2\x94\x98";
+
+      /* U+2514 = Box Drawings Light Up and Right */
+      context->drawing.corner_sw = "\xE2\x94\x94";
+
+      context->drawing.default_caret = fancy_caret;
+    }
+  else
+    {
+      context->drawing.horizontal1 = "-";
+      context->drawing.tee_down1 = "-";
+      context->drawing.horizontal2 = "~";
+      context->drawing.tee_down2 = "~";
+      context->drawing.vertical = "|";
+      context->drawing.corner_nw = "+";
+      context->drawing.corner_ne = "+";
+      context->drawing.corner_se = "+";
+      context->drawing.corner_sw = "+";
+      context->drawing.default_caret = plain_caret;
+    }
+
+  /* Configure the customizable carets, unless a front-end has already changed
+     them to something else.  */
+  for (int i = 0; i != rich_location::STATICALLY_ALLOCATED_RANGES; ++i)
+    {
+      if (context->caret_chars[i] == plain_caret
+	  || context->caret_chars[i] == fancy_caret)
+	{
+	  context->caret_chars[i] = context->drawing.default_caret;
+	}
+    }
+}
+
 /* Do any cleaning up required after the last diagnostic is emitted.  */
 
 void
diff --git a/gcc/diagnostic.h b/gcc/diagnostic.h
index 4051601abfd..329ed555139 100644
--- a/gcc/diagnostic.h
+++ b/gcc/diagnostic.h
@@ -159,8 +159,9 @@ struct diagnostic_context
   /* Maximum width of the source line printed.  */
   int caret_max_width;
 
-  /* Character used for caret diagnostics.  */
-  char caret_chars[rich_location::STATICALLY_ALLOCATED_RANGES];
+  /* Character used for caret diagnostics.  These are strings to accommodate
+     multibyte characters, but they should have display width 1.  */
+  const char *caret_chars[rich_location::STATICALLY_ALLOCATED_RANGES];
 
   /* True if we should print any CWE identifiers associated with
      diagnostics.  */
@@ -326,6 +327,23 @@ struct diagnostic_context
 
   /* Callback for final cleanup.  */
   void (*final_cb) (diagnostic_context *context);
+
+  /* Some output elements that can be made to look nicer if UTF-8 output
+     is available.  */
+  struct
+  {
+    bool enabled;
+    const char *horizontal1;
+    const char *horizontal2;
+    const char *tee_down1;
+    const char *tee_down2;
+    const char *vertical;
+    const char *corner_nw;
+    const char *corner_ne;
+    const char *corner_se;
+    const char *corner_sw;
+    const char *default_caret;
+  } drawing;
 };
 
 static inline void
@@ -396,6 +414,16 @@ diagnostic_override_option_index (diagnostic_info *info, int optidx)
 extern void diagnostic_initialize (diagnostic_context *, int);
 extern void diagnostic_color_init (diagnostic_context *, int value = -1);
 extern void diagnostic_urls_init (diagnostic_context *, int value = -1);
+
+enum diagnostics_drawing_rule
+{
+  DIAGNOSTICS_UNICODE_DRAWING_NO,
+  DIAGNOSTICS_UNICODE_DRAWING_YES,
+  DIAGNOSTICS_UNICODE_DRAWING_AUTO
+};
+extern void diagnostic_drawing_init (diagnostic_context *,
+				     enum diagnostics_drawing_rule rule);
+
 extern void diagnostic_finish (diagnostic_context *);
 extern void diagnostic_report_current_module (diagnostic_context *, location_t);
 extern void diagnostic_show_locus (diagnostic_context *,
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 8d0d2136831..24eadd2ec64 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -286,6 +286,7 @@ Objective-C and Objective-C++ Dialects}.
 -fdiagnostics-plain-output @gol
 -fdiagnostics-show-location=@r{[}once@r{|}every-line@r{]}  @gol
 -fdiagnostics-color=@r{[}auto@r{|}never@r{|}always@r{]}  @gol
+-fdiagnostics-unicode-drawing=@r{[}auto@r{|}never@r{|}always@r{]}  @gol
 -fdiagnostics-urls=@r{[}auto@r{|}never@r{|}always@r{]}  @gol
 -fdiagnostics-format=@r{[}text@r{|}json@r{]}  @gol
 -fno-diagnostics-show-option  -fno-diagnostics-show-caret @gol
@@ -4425,7 +4426,8 @@ options:
 -fno-diagnostics-show-line-numbers @gol
 -fdiagnostics-color=never @gol
 -fdiagnostics-urls=never @gol
--fdiagnostics-path-format=separate-events}
+-fdiagnostics-path-format=separate-events @gol
+-fdiagnostics-unicode-drawing=never}
 In the future, if GCC changes the default appearance of its diagnostics, the
 corresponding option to disable the new behavior will be added to this list.
 
@@ -4565,6 +4567,16 @@ SGR substring for highlighting mismatching types within template
 arguments in the C++ frontend.
 @end table
 
+@item -fdiagnostics-unicode-drawing[=@var{WHEN}]
+@opindex fdiagnostics-unicode-drawing
+@opindex fno-diagnostics-unicode-drawing
+Use extended characters from the Unicode Standard (such as line drawing
+characters) to improve the appearance of diagnostics.
+
+@var{WHEN} is @samp{never}, @samp{always}, or @samp{auto}.
+@samp{auto}, the default, enables the feature provided the user's locale
+supports UTF-8.
+
 @item -fdiagnostics-urls[=@var{WHEN}]
 @opindex fdiagnostics-urls
 @cindex urls
diff --git a/gcc/fortran/error.c b/gcc/fortran/error.c
index dacc1d7ba51..68be60309ce 100644
--- a/gcc/fortran/error.c
+++ b/gcc/fortran/error.c
@@ -1532,8 +1532,8 @@ gfc_diagnostics_init (void)
   global_dc->start_span = gfc_diagnostic_start_span;
   diagnostic_finalizer (global_dc) = gfc_diagnostic_finalizer;
   diagnostic_format_decoder (global_dc) = gfc_format_decoder;
-  global_dc->caret_chars[0] = '1';
-  global_dc->caret_chars[1] = '2';
+  global_dc->caret_chars[0] = "1";
+  global_dc->caret_chars[1] = "2";
   pp_warning_buffer = new (XNEW (output_buffer)) output_buffer ();
   pp_warning_buffer->flush_p = false;
   /* pp_error_buffer is statically allocated.  This simplifies memory
@@ -1550,6 +1550,6 @@ gfc_diagnostics_finish (void)
      defaults.  */
   diagnostic_starter (global_dc) = gfc_diagnostic_starter;
   diagnostic_finalizer (global_dc) = gfc_diagnostic_finalizer;
-  global_dc->caret_chars[0] = '^';
-  global_dc->caret_chars[1] = '^';
+  global_dc->caret_chars[0] = global_dc->drawing.default_caret;
+  global_dc->caret_chars[1] = global_dc->drawing.default_caret;
 }
diff --git a/gcc/opts-common.c b/gcc/opts-common.c
index 8ec8c1ec1a8..c13904ebf50 100644
--- a/gcc/opts-common.c
+++ b/gcc/opts-common.c
@@ -1001,6 +1001,7 @@ decode_cmdline_options_to_array (unsigned int argc, const char **argv,
 	    "-fdiagnostics-color=never",
 	    "-fdiagnostics-urls=never",
 	    "-fdiagnostics-path-format=separate-events",
+	    "-fdiagnostics-unicode-drawing=never",
 	  };
 	  const int num_expanded = ARRAY_SIZE (expanded_args);
 	  opt_array_len += num_expanded - 1;
diff --git a/gcc/opts.c b/gcc/opts.c
index ac9972d9c38..24a2058f45a 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -2449,6 +2449,10 @@ common_handle_option (struct gcc_options *opts,
       diagnostic_urls_init (dc, value);
       break;
 
+    case OPT_fdiagnostics_unicode_drawing_:
+      diagnostic_drawing_init (dc, (enum diagnostics_drawing_rule)value);
+      break;
+
     case OPT_fdiagnostics_format_:
       diagnostic_output_format_init (dc,
 				     (enum diagnostics_output_format)value);
diff --git a/gcc/selftest-diagnostic.c b/gcc/selftest-diagnostic.c
index 82fddca89ab..3bce8e58048 100644
--- a/gcc/selftest-diagnostic.c
+++ b/gcc/selftest-diagnostic.c
@@ -36,6 +36,10 @@ namespace selftest {
 test_diagnostic_context::test_diagnostic_context ()
 {
   diagnostic_initialize (this, 0);
+
+  /* Disable unicode drawing to make it simpler to write selftest code.  */
+  diagnostic_drawing_init (this, DIAGNOSTICS_UNICODE_DRAWING_NO);
+
   show_caret = true;
   show_labels_p = true;
   show_column = true;
diff --git a/gcc/testsuite/gcc.dg/analyzer/setjmp-5-utf8.c b/gcc/testsuite/gcc.dg/analyzer/setjmp-5-utf8.c
new file mode 100644
index 00000000000..6f65cf9a2da
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/setjmp-5-utf8.c
@@ -0,0 +1,71 @@
+/* { dg-additional-options "-fdiagnostics-show-line-numbers -fdiagnostics-path-format=inline-events -fdiagnostics-show-caret -fdiagnostics-unicode-drawing" } */
+
+/* This tests the same functionality as setjmp-5.c, but enables unicode drawing
+   so as to exercise those features.
+   n.b. NN line numbers do not support unicode drawing mode, so we do not
+   make use of that here.  */
+
+#include "test-setjmp.h"
+#include <stddef.h>
+#include "analyzer-decls.h"
+
+static jmp_buf env;
+
+static void inner (void)
+{
+  SETJMP (env);
+}
+
+void outer (void)
+{
+  int i;
+
+  inner ();
+
+  longjmp (env, 42); /* { dg-warning "'longjmp' called after enclosing function of 'setjmp' has returned" } */
+}
+
+/* { dg-begin-multiline-output "" }
+   25 │   longjmp (env, 42);
+      │   ∧════════════════
+  'outer': events 1-2
+    │
+    │   19 │ void outer (void)
+    │      │      ∧════
+    │      │      │
+    │      │      (1) entry to 'outer'
+    │......
+    │   23 │   inner ();
+    │      │   ╤═══════
+    │      │   │
+    │      │   (2) calling 'inner' from 'outer'
+    │
+    └──> 'inner': event 3
+           │
+           │   14 │ static void inner (void)
+           │      │             ∧════
+           │      │             │
+           │      │             (3) entry to 'inner'
+           │
+         'inner': event 4
+           │
+           │   16 │   SETJMP (env);
+           │      │   ∧═════
+           │      │   │
+           │      │   (4) 'setjmp' called here
+           │
+    ┌<─────┘
+    │
+  'outer': events 5-6
+    │
+    │   23 │   inner ();
+    │      │   ∧═══════
+    │      │   │
+    │      │   (5) returning to 'outer' from 'inner'
+    │   24 │ 
+    │   25 │   longjmp (env, 42);
+    │      │   ╤════════════════
+    │      │   │
+    │      │   (6) here
+    │
+    { dg-end-multiline-output "" } */
diff --git a/gcc/testsuite/gcc.dg/format/diagnostic-ranges-utf8.c b/gcc/testsuite/gcc.dg/format/diagnostic-ranges-utf8.c
new file mode 100644
index 00000000000..ac7638e003a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/format/diagnostic-ranges-utf8.c
@@ -0,0 +1,394 @@
+/* { dg-options "-Wformat -fdiagnostics-show-caret -fdiagnostics-unicode-drawing" } */
+
+/* This performs the same tests as diagnostic-ranges.c, but it enables unicode
+   drawing so that this can be tested.  */
+
+
+#include "format.h"
+
+void test_mismatching_types (const char *msg)
+{
+  printf("hello %i", msg);  /* { dg-warning "format '%i' expects argument of type 'int', but argument 2 has type 'const char \\*' " } */
+
+/* { dg-begin-multiline-output "" }
+   printf("hello %i", msg);
+                 ═∧   ╤══
+                  │   │
+                  int const char *
+                 %s
+   { dg-end-multiline-output "" } */
+
+
+  printf("hello %s", 42);  /* { dg-warning "format '%s' expects argument of type 'char \\*', but argument 2 has type 'int'" } */
+/* { dg-begin-multiline-output "" }
+   printf("hello %s", 42);
+                 ═∧   ╤═
+                  │   │
+                  │   int
+                  char *
+                 %d
+   { dg-end-multiline-output "" } */
+
+  printf("hello %i", (long)0);  /* { dg-warning "format '%i' expects argument of type 'int', but argument 2 has type 'long int' " } */
+/* { dg-begin-multiline-output "" }
+   printf("hello %i", (long)0);
+                 ═∧   ╤══════
+                  │   │
+                  int long int
+                 %li
+   { dg-end-multiline-output "" } */
+}
+
+void test_multiple_arguments (void)
+{
+  printf ("arg0: %i  arg1: %s arg 2: %i", /* { dg-warning "29: format '%s'" } */
+          100, 101, 102);
+/* { dg-begin-multiline-output "" }
+   printf ("arg0: %i  arg1: %s arg 2: %i",
+                            ═∧
+                             │
+                             char *
+                            %d
+           100, 101, 102);
+                ╤══           
+                │
+                int
+   { dg-end-multiline-output "" } */
+}
+
+void test_multiple_arguments_2 (int i, int j)
+{
+  printf ("arg0: %i  arg1: %s arg 2: %i", /* { dg-warning "29: format '%s'" } */
+          100, i + j, 102);
+/* { dg-begin-multiline-output "" }
+   printf ("arg0: %i  arg1: %s arg 2: %i",
+                            ═∧
+                             │
+                             char *
+                            %d
+           100, i + j, 102);
+                ══╤══         
+                  │
+                  int
+   { dg-end-multiline-output "" } */
+}
+
+void multiline_format_string (void) {
+  printf ("before the fmt specifier" /* { dg-warning "11: format '%d' expects a matching 'int' argument" } */
+/* { dg-begin-multiline-output "" }
+   printf ("before the fmt specifier"
+           ∧═════════════════════════
+   { dg-end-multiline-output "" } */
+
+          "%"
+          "d" /* { dg-message "12: format string is defined here" } */
+          "after the fmt specifier");
+
+/* { dg-begin-multiline-output "" }
+           "%"
+            ══
+           "d"
+           ═∧
+            │
+            int
+   { dg-end-multiline-output "" } */
+}
+
+void test_hex (const char *msg)
+{
+  /* "%" is \x25
+     "i" is \x69 */
+  printf("hello \x25\x69", msg);  /* { dg-warning "format '%i' expects argument of type 'int', but argument 2 has type 'const char \\*' " } */
+
+/* { dg-begin-multiline-output "" }
+   printf("hello \x25\x69", msg);
+                 ════∧═══   ╤══
+                     │      │
+                     int    const char *
+                 \x25s
+   { dg-end-multiline-output "" } */
+}
+
+void test_oct (const char *msg)
+{
+  /* "%" is octal 045
+     "i" is octal 151.  */
+  printf("hello \045\151", msg);  /* { dg-warning "format '%i' expects argument of type 'int', but argument 2 has type 'const char \\*' " } */
+
+/* { dg-begin-multiline-output "" }
+   printf("hello \045\151", msg);
+                 ════∧═══   ╤══
+                     │      │
+                     int    const char *
+                 \045s
+   { dg-end-multiline-output "" } */
+}
+
+void test_multiple (const char *msg)
+{
+  /* "%" is \x25 in hex
+     "i" is \151 in octal.  */
+  printf("prefix"  "\x25"  "\151"  "suffix",  /* { dg-warning "format '%i'" } */
+         msg);
+/* { dg-begin-multiline-output "" }
+   printf("prefix"  "\x25"  "\151"  "suffix",
+          ∧═══════
+          msg);
+          ╤══
+          │
+          const char *
+  { dg-end-multiline-output "" } */
+
+/* { dg-begin-multiline-output "" }
+   printf("prefix"  "\x25"  "\151"  "suffix",
+                     ════════∧═══
+                             │
+                             int
+                     \x25"  "s
+  { dg-end-multiline-output "" } */
+}
+
+void test_u8 (const char *msg)
+{
+  printf(u8"hello %i", msg);/* { dg-warning "format '%i' expects argument of type 'int', but argument 2 has type 'const char \\*' " } */
+/* { dg-begin-multiline-output "" }
+   printf(u8"hello %i", msg);
+                   ═∧   ╤══
+                    │   │
+                    int const char *
+                   %s
+   { dg-end-multiline-output "" } */
+}
+
+void test_param (long long_i, long long_j)
+{
+  printf ("foo %s bar", long_i + long_j); /* { dg-warning "17: format '%s' expects argument of type 'char \\*', but argument 2 has type 'long int'" } */
+/* { dg-begin-multiline-output "" }
+   printf ("foo %s bar", long_i + long_j);
+                ═∧       ═══════╤═══════
+                 │              │
+                 char *         long int
+                %ld
+   { dg-end-multiline-output "" } */
+}
+
+void test_field_width_specifier (long l, int i1, int i2)
+{
+  printf (" %*.*d ", l, i1, i2); /* { dg-warning "14: field width specifier '\\*' expects argument of type 'int', but argument 2 has type 'long int'" } */
+/* { dg-begin-multiline-output "" }
+   printf (" %*.*d ", l, i1, i2);
+             ═∧═══    ╤
+              │       │
+              int     long int
+   { dg-end-multiline-output "" } */
+}
+
+/* PR c/72857.  */
+
+void test_field_width_specifier_2 (char *d, long foo, long bar)
+{
+  __builtin_sprintf (d, " %*ld ", foo, foo); /* { dg-warning "28: field width specifier '\\*' expects argument of type 'int', but argument 3 has type 'long int'" } */
+  /* { dg-begin-multiline-output "" }
+   __builtin_sprintf (d, " %*ld ", foo, foo);
+                           ═∧══    ╤══
+                            │      │
+                            int    long int
+   { dg-end-multiline-output "" } */
+
+  __builtin_sprintf (d, " %*ld ", foo + bar, foo); /* { dg-warning "28: field width specifier '\\*' expects argument of type 'int', but argument 3 has type 'long int'" } */
+  /* { dg-begin-multiline-output "" }
+   __builtin_sprintf (d, " %*ld ", foo + bar, foo);
+                           ═∧══    ════╤════
+                            │          │
+                            int        long int
+   { dg-end-multiline-output "" } */
+}
+
+void test_field_precision_specifier (char *d, long foo, long bar)
+{
+  __builtin_sprintf (d, " %.*ld ", foo, foo); /* { dg-warning "29: field precision specifier '\\.\\*' expects argument of type 'int', but argument 3 has type 'long int'" } */
+  /* { dg-begin-multiline-output "" }
+   __builtin_sprintf (d, " %.*ld ", foo, foo);
+                           ══∧══    ╤══
+                             │      │
+                             int    long int
+   { dg-end-multiline-output "" } */
+
+  __builtin_sprintf (d, " %.*ld ", foo + bar, foo); /* { dg-warning "29: field precision specifier '\\.\\*' expects argument of type 'int', but argument 3 has type 'long int'" } */
+  /* { dg-begin-multiline-output "" }
+   __builtin_sprintf (d, " %.*ld ", foo + bar, foo);
+                           ══∧══    ════╤════
+                             │          │
+                             int        long int
+   { dg-end-multiline-output "" } */
+}
+
+void test_spurious_percent (void)
+{
+  printf("hello world %"); /* { dg-warning "23: spurious trailing" } */
+
+/* { dg-begin-multiline-output "" }
+   printf("hello world %");
+                       ∧
+   { dg-end-multiline-output "" } */
+}
+
+void test_empty_precision (char *s, size_t m, double d)
+{
+  strfmon (s, m, "%#.5n", d); /* { dg-warning "20: empty left precision in gnu_strfmon format" } */
+/* { dg-begin-multiline-output "" }
+   strfmon (s, m, "%#.5n", d);
+                    ∧
+   { dg-end-multiline-output "" } */
+
+  strfmon (s, m, "%#5.n", d); /* { dg-warning "22: empty precision in gnu_strfmon format" } */
+/* { dg-begin-multiline-output "" }
+   strfmon (s, m, "%#5.n", d);
+                      ∧
+   { dg-end-multiline-output "" } */
+}
+
+void test_repeated (int i)
+{
+  printf ("%++d", i); /* { dg-warning "14: repeated '\\+' flag in format" } */
+/* { dg-begin-multiline-output "" }
+   printf ("%++d", i);
+              ∧
+   { dg-end-multiline-output "" } */
+}
+
+void test_conversion_lacks_type (void)
+{
+  printf (" %h"); /* { dg-warning "14:conversion lacks type at end of format" } */
+/* { dg-begin-multiline-output "" }
+   printf (" %h");
+              ∧
+   { dg-end-multiline-output "" } */
+}
+
+void test_embedded_nul (void)
+{
+  printf (" \0 "); /* { dg-warning "13:embedded" "warning for embedded NUL" } */
+/* { dg-begin-multiline-output "" }
+   printf (" \0 ");
+             ∧═
+   { dg-end-multiline-output "" } */
+}
+
+void test_macro (const char *msg)
+{
+#define INT_FMT "%i" /* { dg-message "19: format string is defined here" } */
+  printf("hello " INT_FMT " world", msg);  /* { dg-warning "10: format '%i' expects argument of type 'int', but argument 2 has type 'const char \\*' " } */
+/* { dg-begin-multiline-output "" }
+   printf("hello " INT_FMT " world", msg);
+          ∧═══════                   ╤══
+                                     │
+                                     const char *
+   { dg-end-multiline-output "" } */
+/* { dg-begin-multiline-output "" }
+ #define INT_FMT "%i"
+                  ═∧
+                   │
+                   int
+                  %s
+   { dg-end-multiline-output "" } */
+#undef INT_FMT
+}
+
+void test_macro_2 (const char *msg)
+{
+#define PRIu32 "u" /* { dg-message "17: format string is defined here" } */
+  printf("hello %" PRIu32 " world", msg);  /* { dg-warning "10: format '%u' expects argument of type 'unsigned int', but argument 2 has type 'const char \\*' " } */
+/* { dg-begin-multiline-output "" }
+   printf("hello %" PRIu32 " world", msg);
+          ∧════════                  ╤══
+                                     │
+                                     const char *
+   { dg-end-multiline-output "" } */
+/* { dg-begin-multiline-output "" }
+ #define PRIu32 "u"
+                 ∧
+                 │
+                 unsigned int
+   { dg-end-multiline-output "" } */
+#undef PRIu32
+}
+
+void test_macro_3 (const char *msg)
+{
+#define FMT_STRING "hello %i world" /* { dg-line test_macro_3_macro_line } */
+  /* { dg-warning "20: format '%i' expects argument of type 'int', but argument 2 has type 'const char \\*'" "" { target *-*-*} .-1 } */
+  printf(FMT_STRING, msg);  /* { dg-message "10: in expansion of macro 'FMT_STRING" } */
+/* { dg-begin-multiline-output "" }
+ #define FMT_STRING "hello %i world"
+                    ∧═══════════════
+   { dg-end-multiline-output "" } */
+/* { dg-begin-multiline-output "" }
+   printf(FMT_STRING, msg);
+          ∧═════════
+   { dg-end-multiline-output "" } */
+/* { dg-message "28: format string is defined here" "" { target *-*-* } test_macro_3_macro_line } */
+/* { dg-begin-multiline-output "" }
+ #define FMT_STRING "hello %i world"
+                           ═∧
+                            │
+                            int
+                           %s
+   { dg-end-multiline-output "" } */
+#undef FMT_STRING
+}
+
+void test_macro_4 (const char *msg)
+{
+#define FMT_STRING "hello %i world" /* { dg-warning "20: format '%i' expects argument of type 'int', but argument 2 has type 'const char \\*' " } */
+  printf(FMT_STRING "\n", msg);  /* { dg-message "10: in expansion of macro 'FMT_STRING" } */
+/* { dg-begin-multiline-output "" }
+ #define FMT_STRING "hello %i world"
+                    ∧═══════════════
+   { dg-end-multiline-output "" } */
+/* { dg-begin-multiline-output "" }
+   printf(FMT_STRING "\n", msg);
+          ∧═════════
+   { dg-end-multiline-output "" } */
+/* { dg-begin-multiline-output "" }
+ #define FMT_STRING "hello %i world"
+                           ═∧
+                            │
+                            int
+                           %s
+   { dg-end-multiline-output "" } */
+#undef FMT_STRING
+}
+
+void test_non_contiguous_strings (void)
+{
+  __builtin_printf(" %" "d ", 0.5); /* { dg-warning "20: format .%d. expects argument of type .int., but argument 2 has type .double." } */
+                                    /* { dg-message "26: format string is defined here" "" { target *-*-* } .-1 } */
+  /* { dg-begin-multiline-output "" }
+   __builtin_printf(" %" "d ", 0.5);
+                    ∧═══       ╤══
+                               │
+                               double
+   { dg-end-multiline-output "" } */
+  /* { dg-begin-multiline-output "" }
+   __builtin_printf(" %" "d ", 0.5);
+                      ════∧
+                          │
+                          int
+                      %" "f
+   { dg-end-multiline-output "" } */
+}
+
+void test_const_arrays (void)
+{
+  /* TODO: ideally we'd highlight both the format string *and* the use of
+     it here.  For now, just verify that we gracefully handle this case.  */
+  const char a[] = " %d ";
+  __builtin_printf(a, 0.5); /* { dg-warning "20: format .%d. expects argument of type .int., but argument 2 has type .double." } */
+  /* { dg-begin-multiline-output "" }
+   __builtin_printf(a, 0.5);
+                    ∧  ╤══
+                       │
+                       double
+   { dg-end-multiline-output "" } */
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/diagnostic_plugin_test_show_locus.c b/gcc/testsuite/gcc.dg/plugin/diagnostic_plugin_test_show_locus.c
index 482dbda47f7..e662ec43347 100644
--- a/gcc/testsuite/gcc.dg/plugin/diagnostic_plugin_test_show_locus.c
+++ b/gcc/testsuite/gcc.dg/plugin/diagnostic_plugin_test_show_locus.c
@@ -289,11 +289,11 @@ test_show_locus (function *fun)
       location_t caret_b = get_loc (line, 11);
       rich_location richloc (line_table, caret_a);
       add_range (&richloc, caret_b, caret_b, SHOW_RANGE_WITH_CARET);
-      global_dc->caret_chars[0] = 'A';
-      global_dc->caret_chars[1] = 'B';
+      global_dc->caret_chars[0] = "A";
+      global_dc->caret_chars[1] = "B";
       warning_at (&richloc, 0, "test");
-      global_dc->caret_chars[0] = '^';
-      global_dc->caret_chars[1] = '^';
+      global_dc->caret_chars[0] = global_dc->drawing.default_caret;
+      global_dc->caret_chars[1] = global_dc->drawing.default_caret;
     }
 
   /* Tests of rendering fixit hints.  */
@@ -407,11 +407,11 @@ test_show_locus (function *fun)
       location_t caret_b = get_loc (line - 1, 19);
       rich_location richloc (line_table, caret_a);
       richloc.add_range (caret_b, SHOW_RANGE_WITH_CARET);
-      global_dc->caret_chars[0] = '1';
-      global_dc->caret_chars[1] = '2';
+      global_dc->caret_chars[0] = "1";
+      global_dc->caret_chars[1] = "2";
       warning_at (&richloc, 0, "test");
-      global_dc->caret_chars[0] = '^';
-      global_dc->caret_chars[1] = '^';
+      global_dc->caret_chars[0] = global_dc->drawing.default_caret;
+      global_dc->caret_chars[1] = global_dc->drawing.default_caret;
     }
 
   /* Example of using the "%q+D" format code, which as well as printing
diff --git a/gcc/tree-diagnostic-path.cc b/gcc/tree-diagnostic-path.cc
index 164df86037e..293d9ffd10d 100644
--- a/gcc/tree-diagnostic-path.cc
+++ b/gcc/tree-diagnostic-path.cc
@@ -328,11 +328,13 @@ print_path_summary_as_text (const path_summary *ps, diagnostic_context *dc,
 	  if (range->m_stack_depth > prev_range->m_stack_depth)
 	    {
 	      /* Show pushed stack frame(s).  */
-	      const char *push_prefix = "+--> ";
 	      pp_string (pp, start_line_color);
-	      pp_string (pp, push_prefix);
+	      pp_string (pp, dc->drawing.corner_sw);
+	      pp_string (pp, dc->drawing.horizontal1);
+	      pp_string (pp, dc->drawing.horizontal1);
+	      pp_string (pp, "> ");
+	      cur_indent += 5;
 	      pp_string (pp, end_line_color);
-	      cur_indent += strlen (push_prefix);
 	    }
 	}
       if (range->m_fndecl)
@@ -354,7 +356,7 @@ print_path_summary_as_text (const path_summary *ps, diagnostic_context *dc,
       {
 	write_indent (pp, cur_indent + per_frame_indent);
 	pp_string (pp, start_line_color);
-	pp_string (pp, "|");
+	pp_string (pp, dc->drawing.vertical);
 	pp_string (pp, end_line_color);
 	pp_newline (pp);
 
@@ -364,7 +366,7 @@ print_path_summary_as_text (const path_summary *ps, diagnostic_context *dc,
 	  pretty_printer tmp_pp;
 	  write_indent (&tmp_pp, cur_indent + per_frame_indent);
 	  pp_string (&tmp_pp, start_line_color);
-	  pp_string (&tmp_pp, "|");
+	  pp_string (&tmp_pp, dc->drawing.vertical);
 	  pp_string (&tmp_pp, end_line_color);
 	  prefix = xstrdup (pp_formatted_text (&tmp_pp));
 	}
@@ -375,7 +377,7 @@ print_path_summary_as_text (const path_summary *ps, diagnostic_context *dc,
 
 	write_indent (pp, cur_indent + per_frame_indent);
 	pp_string (pp, start_line_color);
-	pp_string (pp, "|");
+	pp_string (pp, dc->drawing.vertical);
 	pp_string (pp, end_line_color);
 	pp_newline (pp);
       }
@@ -400,18 +402,23 @@ print_path_summary_as_text (const path_summary *ps, diagnostic_context *dc,
 		    = vbar_for_next_frame - per_frame_indent;
 		  write_indent (pp, vbar_for_next_frame);
 		  pp_string (pp, start_line_color);
+		  int col = indent_for_next_frame + per_frame_indent;
+		  if (dc->drawing.enabled)
+		    {
+		      pp_string (pp, dc->drawing.corner_nw);
+		      ++col;
+		    }
 		  pp_character (pp, '<');
-		  for (int i = indent_for_next_frame + per_frame_indent;
-		       i < cur_indent + per_frame_indent - 1; i++)
-		    pp_character (pp, '-');
-		  pp_character (pp, '+');
+		  for (; col < cur_indent + per_frame_indent - 1; col++)
+		    pp_string (pp, dc->drawing.horizontal1);
+		  pp_string (pp, dc->drawing.corner_se);
 		  pp_string (pp, end_line_color);
 		  pp_newline (pp);
 		  cur_indent = indent_for_next_frame;
 
 		  write_indent (pp, vbar_for_next_frame);
 		  pp_string (pp, start_line_color);
-		  pp_printf (pp, "|");
+		  pp_printf (pp, dc->drawing.vertical);
 		  pp_string (pp, end_line_color);
 		  pp_newline (pp);
 		}
diff mbox series

Patch

diff --git a/gcc/common.opt b/gcc/common.opt
index a3893a4725e..1839e3471a4 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1386,6 +1386,26 @@  ftabstop=
 Common Joined RejectNegative UInteger
 -ftabstop=<number>      Distance between tab stops for column reporting.
 
+fdiagnostics-unicode-drawing
+Common Alias(fdiagnostics-unicode-drawing=,always,never)
+;
+
+fdiagnostics-unicode-drawing=
+Common Joined RejectNegative Enum(diagnostics_unicode_drawing)
+-fdiagnostics-unicode-drawing=[never|always|auto]	Use Unicode drawing characters in diagnostics.
+
+Enum
+Name(diagnostics_unicode_drawing) Type(int)
+
+EnumValue
+Enum(diagnostics_unicode_drawing) String(never) Value(DIAGNOSTICS_UNICODE_DRAWING_NO)
+
+EnumValue
+Enum(diagnostics_unicode_drawing) String(always) Value(DIAGNOSTICS_UNICODE_DRAWING_YES)
+
+EnumValue
+Enum(diagnostics_unicode_drawing) String(auto) Value(DIAGNOSTICS_UNICODE_DRAWING_AUTO)
+
 Enum
 Name(diagnostic_path_format) Type(int)
 
diff --git a/gcc/diagnostic-show-locus.c b/gcc/diagnostic-show-locus.c
index da3c5b6a92d..a945307a71c 100644
--- a/gcc/diagnostic-show-locus.c
+++ b/gcc/diagnostic-show-locus.c
@@ -62,6 +62,7 @@  struct point_state
 {
   int range_idx;
   bool draw_caret_p;
+  bool has_label_p;
 };
 
 /* A class to inject colorization codes when printing the diagnostic locus.
@@ -1467,7 +1468,7 @@  layout::print_source_line (linenum_type row, const char *line, int line_bytes)
       int width = num_digits (row);
       for (int i = 0; i < m_linenum_width - width; i++)
 	pp_space (m_pp);
-      pp_printf (m_pp, "%i | ", row);
+      pp_printf (m_pp, "%i %s ", row, m_context->drawing.vertical);
     }
   else
     pp_space (m_pp);
@@ -1596,7 +1597,8 @@  layout::start_annotation_line (char margin_char) const
 	pp_space (m_pp);
       for (; i < m_linenum_width; i++)
 	pp_character (m_pp, margin_char);
-      pp_string (m_pp, " |");
+      pp_space (m_pp);
+      pp_string (m_pp, m_context->drawing.vertical);
     }
 }
 
@@ -1628,15 +1630,21 @@  layout::print_annotation_line (linenum_type row, const line_bounds lbounds)
 	  if (state.draw_caret_p)
 	    {
 	      /* Draw the caret.  */
-	      char caret_char;
+	      const char *caret_char;
 	      if (state.range_idx < rich_location::STATICALLY_ALLOCATED_RANGES)
 		caret_char = m_context->caret_chars[state.range_idx];
 	      else
-		caret_char = '^';
-	      pp_character (m_pp, caret_char);
+		caret_char = m_context->drawing.default_caret;
+	      pp_string (m_pp, caret_char);
 	    }
 	  else
-	    pp_character (m_pp, '~');
+	    {
+	      const char *const underline
+		= state.has_label_p
+		? m_context->drawing.tee_down2
+		: m_context->drawing.horizontal2;
+	      pp_string (m_pp, underline);
+	    }
 	}
       else
 	{
@@ -1821,7 +1829,7 @@  layout::print_any_labels (linenum_type row)
 		gcc_assert (column <= label->m_column);
 		move_to_column (&column, label->m_column, true);
 		m_colorizer.set_range (label->m_state_idx);
-		pp_character (m_pp, '|');
+		pp_string (m_pp, m_context->drawing.vertical);
 		m_colorizer.set_normal_text ();
 		column++;
 	      }
@@ -2343,7 +2351,7 @@  layout::print_trailing_fixits (linenum_type row)
 	      move_to_column (&column, start_column, true);
 	      m_colorizer.set_fixit_delete ();
 	      for (; column <= finish_column; column++)
-		pp_character (m_pp, '-');
+		pp_string (m_pp, m_context->drawing.horizontal1);
 	      m_colorizer.set_normal_text ();
 	    }
 	  /* Print the replacement text.  REPLACE also covers
@@ -2400,12 +2408,19 @@  layout::get_state_at_point (/* Inputs.  */
 	{
 	  out_state->range_idx = i;
 
-	  /* Are we at the range's caret?  is it visible? */
+	  /* If we are at the range's caret, check if it is visible and check
+	     if a label line will be output directly below this point, since
+	     that enables unicode drawing to use a nicer "tee" character to
+	     mesh seamlessly with the label line.  */
 	  out_state->draw_caret_p = false;
-	  if (range->m_range_display_kind == SHOW_RANGE_WITH_CARET
-	      && row == range->m_caret.m_line
+	  out_state->has_label_p = false;
+	  if (row == range->m_caret.m_line
 	      && column == range->m_caret.m_columns[col_unit])
-	    out_state->draw_caret_p = true;
+	    {
+	      out_state->draw_caret_p
+		= (range->m_range_display_kind == SHOW_RANGE_WITH_CARET);
+	      out_state->has_label_p = (range->m_label != NULL);
+	    }
 
 	  /* Within a multiline range, don't display any underline
 	     in any leading or trailing whitespace on a line.
@@ -2976,19 +2991,19 @@  test_one_liner_multiple_carets_and_ranges ()
     = make_location (linemap_position_for_column (line_table, 2),
 		     linemap_position_for_column (line_table, 1),
 		     linemap_position_for_column (line_table, 3));
-  dc.caret_chars[0] = 'A';
+  dc.caret_chars[0] = "A";
 
   location_t bar
     = make_location (linemap_position_for_column (line_table, 8),
 		     linemap_position_for_column (line_table, 7),
 		     linemap_position_for_column (line_table, 9));
-  dc.caret_chars[1] = 'B';
+  dc.caret_chars[1] = "B";
 
   location_t field
     = make_location (linemap_position_for_column (line_table, 13),
 		     linemap_position_for_column (line_table, 11),
 		     linemap_position_for_column (line_table, 15));
-  dc.caret_chars[2] = 'C';
+  dc.caret_chars[2] = "C";
 
   rich_location richloc (line_table, foo);
   richloc.add_range (bar, SHOW_RANGE_WITH_CARET);
@@ -3584,19 +3599,19 @@  test_one_liner_multiple_carets_and_ranges_utf8 ()
     = make_location (linemap_position_for_column (line_table, 7),
 		     linemap_position_for_column (line_table, 1),
 		     linemap_position_for_column (line_table, 8));
-  dc.caret_chars[0] = 'A';
+  dc.caret_chars[0] = "A";
 
   location_t bar
     = make_location (linemap_position_for_column (line_table, 16),
 		     linemap_position_for_column (line_table, 12),
 		     linemap_position_for_column (line_table, 17));
-  dc.caret_chars[1] = 'B';
+  dc.caret_chars[1] = "B";
 
   location_t field
     = make_location (linemap_position_for_column (line_table, 26),
 		     linemap_position_for_column (line_table, 19),
 		     linemap_position_for_column (line_table, 30));
-  dc.caret_chars[2] = 'C';
+  dc.caret_chars[2] = "C";
   rich_location richloc (line_table, foo);
   richloc.add_range (bar, SHOW_RANGE_WITH_CARET);
   richloc.add_range (field, SHOW_RANGE_WITH_CARET);
diff --git a/gcc/diagnostic.c b/gcc/diagnostic.c
index 1b6c9845892..131b7220dcb 100644
--- a/gcc/diagnostic.c
+++ b/gcc/diagnostic.c
@@ -167,6 +167,14 @@  default_diagnostic_final_cb (diagnostic_context *context)
     }
 }
 
+/* Store the carets in an unnamed namespace so we can verify whether
+   or not a front-end has overriden them.  */
+namespace {
+  const char plain_caret[] = "^";
+  /* U+2227 = Logical And */
+  const char fancy_caret[] = "\xE2\x88\xA7";
+}
+
 /* Initialize the diagnostic message outputting machinery.  */
 void
 diagnostic_initialize (diagnostic_context *context, int n_opts)
@@ -187,7 +195,7 @@  diagnostic_initialize (diagnostic_context *context, int n_opts)
   context->show_caret = false;
   diagnostic_set_caret_max_width (context, pp_line_cutoff (context->printer));
   for (i = 0; i < rich_location::STATICALLY_ALLOCATED_RANGES; i++)
-    context->caret_chars[i] = '^';
+    context->caret_chars[i] = plain_caret;
   context->show_cwe = false;
   context->path_format = DPF_NONE;
   context->show_path_depths = false;
@@ -229,6 +237,7 @@  diagnostic_initialize (diagnostic_context *context, int n_opts)
   context->begin_group_cb = NULL;
   context->end_group_cb = NULL;
   context->final_cb = default_diagnostic_final_cb;
+  diagnostic_drawing_init (context, DIAGNOSTICS_UNICODE_DRAWING_AUTO);
 }
 
 /* Maybe initialize the color support. We require clients to do this
@@ -286,6 +295,83 @@  diagnostic_urls_init (diagnostic_context *context, int value /*= -1 */)
     = determine_url_format ((diagnostic_url_rule_t) value);
 }
 
+/* Initialize unicode drawing support in CONTEXT if requested, or, in auto
+   mode, if the locale supports it.  */
+void
+diagnostic_drawing_init (diagnostic_context *context,
+			 enum diagnostics_drawing_rule rule)
+{
+  switch (rule)
+    {
+    case DIAGNOSTICS_UNICODE_DRAWING_NO:
+      context->drawing.enabled = false;
+      break;
+    case DIAGNOSTICS_UNICODE_DRAWING_YES:
+      context->drawing.enabled = true;
+      break;
+    case DIAGNOSTICS_UNICODE_DRAWING_AUTO:
+      context->drawing.enabled = locale_utf8;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  if (context->drawing.enabled)
+    {
+      /* U+2500 = Box Drawings Light Horizontal */
+      context->drawing.horizontal1 = "\xE2\x94\x80";
+
+      /* U+2550 = Box Drawings Double Horizontal */
+      context->drawing.horizontal2 = "\xE2\x95\x90";
+
+      /* U+252C = Box Drawings Light Down and Horizontal */
+      context->drawing.tee_down1 = "\xE2\x94\xAC";
+
+      /* U+2564 = Box Drawings Down Single and Horizontal Double */
+      context->drawing.tee_down2 = "\xE2\x95\xA4";
+
+      /* U+2502 = Box Drawings Light Vertical */
+      context->drawing.vertical = "\xE2\x94\x82";
+
+      /* U+250C = Box Drawings Light Down and Right */
+      context->drawing.corner_nw = "\xE2\x94\x8C";
+
+      /* U+2510 = Box Drawings Light Down and Left */
+      context->drawing.corner_ne = "\xE2\x94\x90";
+
+      /* U+2518 = Box Drawings Light Up and Left */
+      context->drawing.corner_se = "\xE2\x94\x98";
+
+      /* U+2514 = Box Drawings Light Up and Right */
+      context->drawing.corner_sw = "\xE2\x94\x94";
+
+      context->drawing.default_caret = fancy_caret;
+    }
+  else
+    {
+      context->drawing.horizontal1 = "-";
+      context->drawing.tee_down1 = "-";
+      context->drawing.horizontal2 = "~";
+      context->drawing.tee_down2 = "~";
+      context->drawing.vertical = "|";
+      context->drawing.corner_nw = "+";
+      context->drawing.corner_ne = "+";
+      context->drawing.corner_se = "+";
+      context->drawing.corner_sw = "+";
+      context->drawing.default_caret = plain_caret;
+    }
+
+  /* Configure the customizable carets, unless a front-end has already changed
+     them to something else.  */
+  for (int i = 0; i != rich_location::STATICALLY_ALLOCATED_RANGES; ++i)
+    {
+      if (context->caret_chars[i] == plain_caret
+	  || context->caret_chars[i] == fancy_caret)
+	{
+	  context->caret_chars[i] = context->drawing.default_caret;
+	}
+    }
+}
+
 /* Do any cleaning up required after the last diagnostic is emitted.  */
 
 void
diff --git a/gcc/diagnostic.h b/gcc/diagnostic.h
index 4051601abfd..329ed555139 100644
--- a/gcc/diagnostic.h
+++ b/gcc/diagnostic.h
@@ -159,8 +159,9 @@  struct diagnostic_context
   /* Maximum width of the source line printed.  */
   int caret_max_width;
 
-  /* Character used for caret diagnostics.  */
-  char caret_chars[rich_location::STATICALLY_ALLOCATED_RANGES];
+  /* Character used for caret diagnostics.  These are strings to accommodate
+     multibyte characters, but they should have display width 1.  */
+  const char *caret_chars[rich_location::STATICALLY_ALLOCATED_RANGES];
 
   /* True if we should print any CWE identifiers associated with
      diagnostics.  */
@@ -326,6 +327,23 @@  struct diagnostic_context
 
   /* Callback for final cleanup.  */
   void (*final_cb) (diagnostic_context *context);
+
+  /* Some output elements that can be made to look nicer if UTF-8 output
+     is available.  */
+  struct
+  {
+    bool enabled;
+    const char *horizontal1;
+    const char *horizontal2;
+    const char *tee_down1;
+    const char *tee_down2;
+    const char *vertical;
+    const char *corner_nw;
+    const char *corner_ne;
+    const char *corner_se;
+    const char *corner_sw;
+    const char *default_caret;
+  } drawing;
 };
 
 static inline void
@@ -396,6 +414,16 @@  diagnostic_override_option_index (diagnostic_info *info, int optidx)
 extern void diagnostic_initialize (diagnostic_context *, int);
 extern void diagnostic_color_init (diagnostic_context *, int value = -1);
 extern void diagnostic_urls_init (diagnostic_context *, int value = -1);
+
+enum diagnostics_drawing_rule
+{
+  DIAGNOSTICS_UNICODE_DRAWING_NO,
+  DIAGNOSTICS_UNICODE_DRAWING_YES,
+  DIAGNOSTICS_UNICODE_DRAWING_AUTO
+};
+extern void diagnostic_drawing_init (diagnostic_context *,
+				     enum diagnostics_drawing_rule rule);
+
 extern void diagnostic_finish (diagnostic_context *);
 extern void diagnostic_report_current_module (diagnostic_context *, location_t);
 extern void diagnostic_show_locus (diagnostic_context *,
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index ba18e05fb1a..6ddbe556667 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -282,6 +282,7 @@  Objective-C and Objective-C++ Dialects}.
 @gccoptlist{-fmessage-length=@var{n}  @gol
 -fdiagnostics-show-location=@r{[}once@r{|}every-line@r{]}  @gol
 -fdiagnostics-color=@r{[}auto@r{|}never@r{|}always@r{]}  @gol
+-fdiagnostics-unicode-drawing=@r{[}auto@r{|}never@r{|}always@r{]}  @gol
 -fdiagnostics-urls=@r{[}auto@r{|}never@r{|}always@r{]}  @gol
 -fdiagnostics-format=@r{[}text@r{|}json@r{]}  @gol
 -fno-diagnostics-show-option  -fno-diagnostics-show-caret @gol
@@ -4423,6 +4424,16 @@  SGR substring for highlighting mismatching types within template
 arguments in the C++ frontend.
 @end table
 
+@item -fdiagnostics-unicode-drawing[=@var{WHEN}]
+@opindex fdiagnostics-unicode-drawing
+@opindex fno-diagnostics-unicode-drawing
+Use extended characters from the Unicode Standard (such as line drawing
+characters) to improve the appearance of diagnostics.
+
+@var{WHEN} is @samp{never}, @samp{always}, or @samp{auto}.
+@samp{auto}, the default, enables the feature provided the user's locale
+supports UTF-8.
+
 @item -fdiagnostics-urls[=@var{WHEN}]
 @opindex fdiagnostics-urls
 @cindex urls
diff --git a/gcc/fortran/error.c b/gcc/fortran/error.c
index dacc1d7ba51..68be60309ce 100644
--- a/gcc/fortran/error.c
+++ b/gcc/fortran/error.c
@@ -1532,8 +1532,8 @@  gfc_diagnostics_init (void)
   global_dc->start_span = gfc_diagnostic_start_span;
   diagnostic_finalizer (global_dc) = gfc_diagnostic_finalizer;
   diagnostic_format_decoder (global_dc) = gfc_format_decoder;
-  global_dc->caret_chars[0] = '1';
-  global_dc->caret_chars[1] = '2';
+  global_dc->caret_chars[0] = "1";
+  global_dc->caret_chars[1] = "2";
   pp_warning_buffer = new (XNEW (output_buffer)) output_buffer ();
   pp_warning_buffer->flush_p = false;
   /* pp_error_buffer is statically allocated.  This simplifies memory
@@ -1550,6 +1550,6 @@  gfc_diagnostics_finish (void)
      defaults.  */
   diagnostic_starter (global_dc) = gfc_diagnostic_starter;
   diagnostic_finalizer (global_dc) = gfc_diagnostic_finalizer;
-  global_dc->caret_chars[0] = '^';
-  global_dc->caret_chars[1] = '^';
+  global_dc->caret_chars[0] = global_dc->drawing.default_caret;
+  global_dc->caret_chars[1] = global_dc->drawing.default_caret;
 }
diff --git a/gcc/opts.c b/gcc/opts.c
index 499eb900643..653580e11de 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -2395,6 +2395,10 @@  common_handle_option (struct gcc_options *opts,
       diagnostic_urls_init (dc, value);
       break;
 
+    case OPT_fdiagnostics_unicode_drawing_:
+      diagnostic_drawing_init (dc, (enum diagnostics_drawing_rule)value);
+      break;
+
     case OPT_fdiagnostics_format_:
       diagnostic_output_format_init (dc,
 				     (enum diagnostics_output_format)value);
diff --git a/gcc/selftest-diagnostic.c b/gcc/selftest-diagnostic.c
index 82fddca89ab..3bce8e58048 100644
--- a/gcc/selftest-diagnostic.c
+++ b/gcc/selftest-diagnostic.c
@@ -36,6 +36,10 @@  namespace selftest {
 test_diagnostic_context::test_diagnostic_context ()
 {
   diagnostic_initialize (this, 0);
+
+  /* Disable unicode drawing to make it simpler to write selftest code.  */
+  diagnostic_drawing_init (this, DIAGNOSTICS_UNICODE_DRAWING_NO);
+
   show_caret = true;
   show_labels_p = true;
   show_column = true;
diff --git a/gcc/testsuite/gcc.dg/analyzer/setjmp-5-utf8.c b/gcc/testsuite/gcc.dg/analyzer/setjmp-5-utf8.c
new file mode 100644
index 00000000000..6f65cf9a2da
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/setjmp-5-utf8.c
@@ -0,0 +1,71 @@ 
+/* { dg-additional-options "-fdiagnostics-show-line-numbers -fdiagnostics-path-format=inline-events -fdiagnostics-show-caret -fdiagnostics-unicode-drawing" } */
+
+/* This tests the same functionality as setjmp-5.c, but enables unicode drawing
+   so as to exercise those features.
+   n.b. NN line numbers do not support unicode drawing mode, so we do not
+   make use of that here.  */
+
+#include "test-setjmp.h"
+#include <stddef.h>
+#include "analyzer-decls.h"
+
+static jmp_buf env;
+
+static void inner (void)
+{
+  SETJMP (env);
+}
+
+void outer (void)
+{
+  int i;
+
+  inner ();
+
+  longjmp (env, 42); /* { dg-warning "'longjmp' called after enclosing function of 'setjmp' has returned" } */
+}
+
+/* { dg-begin-multiline-output "" }
+   25 │   longjmp (env, 42);
+      │   ∧════════════════
+  'outer': events 1-2
+    │
+    │   19 │ void outer (void)
+    │      │      ∧════
+    │      │      │
+    │      │      (1) entry to 'outer'
+    │......
+    │   23 │   inner ();
+    │      │   ╤═══════
+    │      │   │
+    │      │   (2) calling 'inner' from 'outer'
+    │
+    └──> 'inner': event 3
+           │
+           │   14 │ static void inner (void)
+           │      │             ∧════
+           │      │             │
+           │      │             (3) entry to 'inner'
+           │
+         'inner': event 4
+           │
+           │   16 │   SETJMP (env);
+           │      │   ∧═════
+           │      │   │
+           │      │   (4) 'setjmp' called here
+           │
+    ┌<─────┘
+    │
+  'outer': events 5-6
+    │
+    │   23 │   inner ();
+    │      │   ∧═══════
+    │      │   │
+    │      │   (5) returning to 'outer' from 'inner'
+    │   24 │ 
+    │   25 │   longjmp (env, 42);
+    │      │   ╤════════════════
+    │      │   │
+    │      │   (6) here
+    │
+    { dg-end-multiline-output "" } */
diff --git a/gcc/testsuite/gcc.dg/format/diagnostic-ranges-utf8.c b/gcc/testsuite/gcc.dg/format/diagnostic-ranges-utf8.c
new file mode 100644
index 00000000000..ac7638e003a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/format/diagnostic-ranges-utf8.c
@@ -0,0 +1,394 @@ 
+/* { dg-options "-Wformat -fdiagnostics-show-caret -fdiagnostics-unicode-drawing" } */
+
+/* This performs the same tests as diagnostic-ranges.c, but it enables unicode
+   drawing so that this can be tested.  */
+
+
+#include "format.h"
+
+void test_mismatching_types (const char *msg)
+{
+  printf("hello %i", msg);  /* { dg-warning "format '%i' expects argument of type 'int', but argument 2 has type 'const char \\*' " } */
+
+/* { dg-begin-multiline-output "" }
+   printf("hello %i", msg);
+                 ═∧   ╤══
+                  │   │
+                  int const char *
+                 %s
+   { dg-end-multiline-output "" } */
+
+
+  printf("hello %s", 42);  /* { dg-warning "format '%s' expects argument of type 'char \\*', but argument 2 has type 'int'" } */
+/* { dg-begin-multiline-output "" }
+   printf("hello %s", 42);
+                 ═∧   ╤═
+                  │   │
+                  │   int
+                  char *
+                 %d
+   { dg-end-multiline-output "" } */
+
+  printf("hello %i", (long)0);  /* { dg-warning "format '%i' expects argument of type 'int', but argument 2 has type 'long int' " } */
+/* { dg-begin-multiline-output "" }
+   printf("hello %i", (long)0);
+                 ═∧   ╤══════
+                  │   │
+                  int long int
+                 %li
+   { dg-end-multiline-output "" } */
+}
+
+void test_multiple_arguments (void)
+{
+  printf ("arg0: %i  arg1: %s arg 2: %i", /* { dg-warning "29: format '%s'" } */
+          100, 101, 102);
+/* { dg-begin-multiline-output "" }
+   printf ("arg0: %i  arg1: %s arg 2: %i",
+                            ═∧
+                             │
+                             char *
+                            %d
+           100, 101, 102);
+                ╤══           
+                │
+                int
+   { dg-end-multiline-output "" } */
+}
+
+void test_multiple_arguments_2 (int i, int j)
+{
+  printf ("arg0: %i  arg1: %s arg 2: %i", /* { dg-warning "29: format '%s'" } */
+          100, i + j, 102);
+/* { dg-begin-multiline-output "" }
+   printf ("arg0: %i  arg1: %s arg 2: %i",
+                            ═∧
+                             │
+                             char *
+                            %d
+           100, i + j, 102);
+                ══╤══         
+                  │
+                  int
+   { dg-end-multiline-output "" } */
+}
+
+void multiline_format_string (void) {
+  printf ("before the fmt specifier" /* { dg-warning "11: format '%d' expects a matching 'int' argument" } */
+/* { dg-begin-multiline-output "" }
+   printf ("before the fmt specifier"
+           ∧═════════════════════════
+   { dg-end-multiline-output "" } */
+
+          "%"
+          "d" /* { dg-message "12: format string is defined here" } */
+          "after the fmt specifier");
+
+/* { dg-begin-multiline-output "" }
+           "%"
+            ══
+           "d"
+           ═∧
+            │
+            int
+   { dg-end-multiline-output "" } */
+}
+
+void test_hex (const char *msg)
+{
+  /* "%" is \x25
+     "i" is \x69 */
+  printf("hello \x25\x69", msg);  /* { dg-warning "format '%i' expects argument of type 'int', but argument 2 has type 'const char \\*' " } */
+
+/* { dg-begin-multiline-output "" }
+   printf("hello \x25\x69", msg);
+                 ════∧═══   ╤══
+                     │      │
+                     int    const char *
+                 \x25s
+   { dg-end-multiline-output "" } */
+}
+
+void test_oct (const char *msg)
+{
+  /* "%" is octal 045
+     "i" is octal 151.  */
+  printf("hello \045\151", msg);  /* { dg-warning "format '%i' expects argument of type 'int', but argument 2 has type 'const char \\*' " } */
+
+/* { dg-begin-multiline-output "" }
+   printf("hello \045\151", msg);
+                 ════∧═══   ╤══
+                     │      │
+                     int    const char *
+                 \045s
+   { dg-end-multiline-output "" } */
+}
+
+void test_multiple (const char *msg)
+{
+  /* "%" is \x25 in hex
+     "i" is \151 in octal.  */
+  printf("prefix"  "\x25"  "\151"  "suffix",  /* { dg-warning "format '%i'" } */
+         msg);
+/* { dg-begin-multiline-output "" }
+   printf("prefix"  "\x25"  "\151"  "suffix",
+          ∧═══════
+          msg);
+          ╤══
+          │
+          const char *
+  { dg-end-multiline-output "" } */
+
+/* { dg-begin-multiline-output "" }
+   printf("prefix"  "\x25"  "\151"  "suffix",
+                     ════════∧═══
+                             │
+                             int
+                     \x25"  "s
+  { dg-end-multiline-output "" } */
+}
+
+void test_u8 (const char *msg)
+{
+  printf(u8"hello %i", msg);/* { dg-warning "format '%i' expects argument of type 'int', but argument 2 has type 'const char \\*' " } */
+/* { dg-begin-multiline-output "" }
+   printf(u8"hello %i", msg);
+                   ═∧   ╤══
+                    │   │
+                    int const char *
+                   %s
+   { dg-end-multiline-output "" } */
+}
+
+void test_param (long long_i, long long_j)
+{
+  printf ("foo %s bar", long_i + long_j); /* { dg-warning "17: format '%s' expects argument of type 'char \\*', but argument 2 has type 'long int'" } */
+/* { dg-begin-multiline-output "" }
+   printf ("foo %s bar", long_i + long_j);
+                ═∧       ═══════╤═══════
+                 │              │
+                 char *         long int
+                %ld
+   { dg-end-multiline-output "" } */
+}
+
+void test_field_width_specifier (long l, int i1, int i2)
+{
+  printf (" %*.*d ", l, i1, i2); /* { dg-warning "14: field width specifier '\\*' expects argument of type 'int', but argument 2 has type 'long int'" } */
+/* { dg-begin-multiline-output "" }
+   printf (" %*.*d ", l, i1, i2);
+             ═∧═══    ╤
+              │       │
+              int     long int
+   { dg-end-multiline-output "" } */
+}
+
+/* PR c/72857.  */
+
+void test_field_width_specifier_2 (char *d, long foo, long bar)
+{
+  __builtin_sprintf (d, " %*ld ", foo, foo); /* { dg-warning "28: field width specifier '\\*' expects argument of type 'int', but argument 3 has type 'long int'" } */
+  /* { dg-begin-multiline-output "" }
+   __builtin_sprintf (d, " %*ld ", foo, foo);
+                           ═∧══    ╤══
+                            │      │
+                            int    long int
+   { dg-end-multiline-output "" } */
+
+  __builtin_sprintf (d, " %*ld ", foo + bar, foo); /* { dg-warning "28: field width specifier '\\*' expects argument of type 'int', but argument 3 has type 'long int'" } */
+  /* { dg-begin-multiline-output "" }
+   __builtin_sprintf (d, " %*ld ", foo + bar, foo);
+                           ═∧══    ════╤════
+                            │          │
+                            int        long int
+   { dg-end-multiline-output "" } */
+}
+
+void test_field_precision_specifier (char *d, long foo, long bar)
+{
+  __builtin_sprintf (d, " %.*ld ", foo, foo); /* { dg-warning "29: field precision specifier '\\.\\*' expects argument of type 'int', but argument 3 has type 'long int'" } */
+  /* { dg-begin-multiline-output "" }
+   __builtin_sprintf (d, " %.*ld ", foo, foo);
+                           ══∧══    ╤══
+                             │      │
+                             int    long int
+   { dg-end-multiline-output "" } */
+
+  __builtin_sprintf (d, " %.*ld ", foo + bar, foo); /* { dg-warning "29: field precision specifier '\\.\\*' expects argument of type 'int', but argument 3 has type 'long int'" } */
+  /* { dg-begin-multiline-output "" }
+   __builtin_sprintf (d, " %.*ld ", foo + bar, foo);
+                           ══∧══    ════╤════
+                             │          │
+                             int        long int
+   { dg-end-multiline-output "" } */
+}
+
+void test_spurious_percent (void)
+{
+  printf("hello world %"); /* { dg-warning "23: spurious trailing" } */
+
+/* { dg-begin-multiline-output "" }
+   printf("hello world %");
+                       ∧
+   { dg-end-multiline-output "" } */
+}
+
+void test_empty_precision (char *s, size_t m, double d)
+{
+  strfmon (s, m, "%#.5n", d); /* { dg-warning "20: empty left precision in gnu_strfmon format" } */
+/* { dg-begin-multiline-output "" }
+   strfmon (s, m, "%#.5n", d);
+                    ∧
+   { dg-end-multiline-output "" } */
+
+  strfmon (s, m, "%#5.n", d); /* { dg-warning "22: empty precision in gnu_strfmon format" } */
+/* { dg-begin-multiline-output "" }
+   strfmon (s, m, "%#5.n", d);
+                      ∧
+   { dg-end-multiline-output "" } */
+}
+
+void test_repeated (int i)
+{
+  printf ("%++d", i); /* { dg-warning "14: repeated '\\+' flag in format" } */
+/* { dg-begin-multiline-output "" }
+   printf ("%++d", i);
+              ∧
+   { dg-end-multiline-output "" } */
+}
+
+void test_conversion_lacks_type (void)
+{
+  printf (" %h"); /* { dg-warning "14:conversion lacks type at end of format" } */
+/* { dg-begin-multiline-output "" }
+   printf (" %h");
+              ∧
+   { dg-end-multiline-output "" } */
+}
+
+void test_embedded_nul (void)
+{
+  printf (" \0 "); /* { dg-warning "13:embedded" "warning for embedded NUL" } */
+/* { dg-begin-multiline-output "" }
+   printf (" \0 ");
+             ∧═
+   { dg-end-multiline-output "" } */
+}
+
+void test_macro (const char *msg)
+{
+#define INT_FMT "%i" /* { dg-message "19: format string is defined here" } */
+  printf("hello " INT_FMT " world", msg);  /* { dg-warning "10: format '%i' expects argument of type 'int', but argument 2 has type 'const char \\*' " } */
+/* { dg-begin-multiline-output "" }
+   printf("hello " INT_FMT " world", msg);
+          ∧═══════                   ╤══
+                                     │
+                                     const char *
+   { dg-end-multiline-output "" } */
+/* { dg-begin-multiline-output "" }
+ #define INT_FMT "%i"
+                  ═∧
+                   │
+                   int
+                  %s
+   { dg-end-multiline-output "" } */
+#undef INT_FMT
+}
+
+void test_macro_2 (const char *msg)
+{
+#define PRIu32 "u" /* { dg-message "17: format string is defined here" } */
+  printf("hello %" PRIu32 " world", msg);  /* { dg-warning "10: format '%u' expects argument of type 'unsigned int', but argument 2 has type 'const char \\*' " } */
+/* { dg-begin-multiline-output "" }
+   printf("hello %" PRIu32 " world", msg);
+          ∧════════                  ╤══
+                                     │
+                                     const char *
+   { dg-end-multiline-output "" } */
+/* { dg-begin-multiline-output "" }
+ #define PRIu32 "u"
+                 ∧
+                 │
+                 unsigned int
+   { dg-end-multiline-output "" } */
+#undef PRIu32
+}
+
+void test_macro_3 (const char *msg)
+{
+#define FMT_STRING "hello %i world" /* { dg-line test_macro_3_macro_line } */
+  /* { dg-warning "20: format '%i' expects argument of type 'int', but argument 2 has type 'const char \\*'" "" { target *-*-*} .-1 } */
+  printf(FMT_STRING, msg);  /* { dg-message "10: in expansion of macro 'FMT_STRING" } */
+/* { dg-begin-multiline-output "" }
+ #define FMT_STRING "hello %i world"
+                    ∧═══════════════
+   { dg-end-multiline-output "" } */
+/* { dg-begin-multiline-output "" }
+   printf(FMT_STRING, msg);
+          ∧═════════
+   { dg-end-multiline-output "" } */
+/* { dg-message "28: format string is defined here" "" { target *-*-* } test_macro_3_macro_line } */
+/* { dg-begin-multiline-output "" }
+ #define FMT_STRING "hello %i world"
+                           ═∧
+                            │
+                            int
+                           %s
+   { dg-end-multiline-output "" } */
+#undef FMT_STRING
+}
+
+void test_macro_4 (const char *msg)
+{
+#define FMT_STRING "hello %i world" /* { dg-warning "20: format '%i' expects argument of type 'int', but argument 2 has type 'const char \\*' " } */
+  printf(FMT_STRING "\n", msg);  /* { dg-message "10: in expansion of macro 'FMT_STRING" } */
+/* { dg-begin-multiline-output "" }
+ #define FMT_STRING "hello %i world"
+                    ∧═══════════════
+   { dg-end-multiline-output "" } */
+/* { dg-begin-multiline-output "" }
+   printf(FMT_STRING "\n", msg);
+          ∧═════════
+   { dg-end-multiline-output "" } */
+/* { dg-begin-multiline-output "" }
+ #define FMT_STRING "hello %i world"
+                           ═∧
+                            │
+                            int
+                           %s
+   { dg-end-multiline-output "" } */
+#undef FMT_STRING
+}
+
+void test_non_contiguous_strings (void)
+{
+  __builtin_printf(" %" "d ", 0.5); /* { dg-warning "20: format .%d. expects argument of type .int., but argument 2 has type .double." } */
+                                    /* { dg-message "26: format string is defined here" "" { target *-*-* } .-1 } */
+  /* { dg-begin-multiline-output "" }
+   __builtin_printf(" %" "d ", 0.5);
+                    ∧═══       ╤══
+                               │
+                               double
+   { dg-end-multiline-output "" } */
+  /* { dg-begin-multiline-output "" }
+   __builtin_printf(" %" "d ", 0.5);
+                      ════∧
+                          │
+                          int
+                      %" "f
+   { dg-end-multiline-output "" } */
+}
+
+void test_const_arrays (void)
+{
+  /* TODO: ideally we'd highlight both the format string *and* the use of
+     it here.  For now, just verify that we gracefully handle this case.  */
+  const char a[] = " %d ";
+  __builtin_printf(a, 0.5); /* { dg-warning "20: format .%d. expects argument of type .int., but argument 2 has type .double." } */
+  /* { dg-begin-multiline-output "" }
+   __builtin_printf(a, 0.5);
+                    ∧  ╤══
+                       │
+                       double
+   { dg-end-multiline-output "" } */
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/diagnostic_plugin_test_show_locus.c b/gcc/testsuite/gcc.dg/plugin/diagnostic_plugin_test_show_locus.c
index 482dbda47f7..e662ec43347 100644
--- a/gcc/testsuite/gcc.dg/plugin/diagnostic_plugin_test_show_locus.c
+++ b/gcc/testsuite/gcc.dg/plugin/diagnostic_plugin_test_show_locus.c
@@ -289,11 +289,11 @@  test_show_locus (function *fun)
       location_t caret_b = get_loc (line, 11);
       rich_location richloc (line_table, caret_a);
       add_range (&richloc, caret_b, caret_b, SHOW_RANGE_WITH_CARET);
-      global_dc->caret_chars[0] = 'A';
-      global_dc->caret_chars[1] = 'B';
+      global_dc->caret_chars[0] = "A";
+      global_dc->caret_chars[1] = "B";
       warning_at (&richloc, 0, "test");
-      global_dc->caret_chars[0] = '^';
-      global_dc->caret_chars[1] = '^';
+      global_dc->caret_chars[0] = global_dc->drawing.default_caret;
+      global_dc->caret_chars[1] = global_dc->drawing.default_caret;
     }
 
   /* Tests of rendering fixit hints.  */
@@ -407,11 +407,11 @@  test_show_locus (function *fun)
       location_t caret_b = get_loc (line - 1, 19);
       rich_location richloc (line_table, caret_a);
       richloc.add_range (caret_b, SHOW_RANGE_WITH_CARET);
-      global_dc->caret_chars[0] = '1';
-      global_dc->caret_chars[1] = '2';
+      global_dc->caret_chars[0] = "1";
+      global_dc->caret_chars[1] = "2";
       warning_at (&richloc, 0, "test");
-      global_dc->caret_chars[0] = '^';
-      global_dc->caret_chars[1] = '^';
+      global_dc->caret_chars[0] = global_dc->drawing.default_caret;
+      global_dc->caret_chars[1] = global_dc->drawing.default_caret;
     }
 
   /* Example of using the "%q+D" format code, which as well as printing
diff --git a/gcc/testsuite/lib/c-compat.exp b/gcc/testsuite/lib/c-compat.exp
index 9493c214aea..be4f8549d5a 100644
--- a/gcc/testsuite/lib/c-compat.exp
+++ b/gcc/testsuite/lib/c-compat.exp
@@ -36,7 +36,7 @@  load_lib target-libpath.exp
 proc compat-use-alt-compiler { } {
     global GCC_UNDER_TEST ALT_CC_UNDER_TEST
     global compat_same_alt compat_alt_caret compat_alt_color compat_no_line_no
-    global compat_alt_urls
+    global compat_alt_urls compat_alt_drawing
     global TEST_ALWAYS_FLAGS
 
     # We don't need to do this if the alternate compiler is actually
@@ -52,6 +52,9 @@  proc compat-use-alt-compiler { } {
 	if { $compat_alt_urls == 0 } then {
 	    regsub -- "-fdiagnostics-urls=never" $TEST_ALWAYS_FLAGS "" TEST_ALWAYS_FLAGS
 	}
+	if { $compat_alt_drawing == 0 } then {
+	    regsub -- "-fdiagnostics-unicode-drawing=never" $TEST_ALWAYS_FLAGS "" TEST_ALWAYS_FLAGS
+	}
 	if { $compat_no_line_no == 0 } then {
 	    regsub -- "-fno-diagnostics-show-line-numbers" $TEST_ALWAYS_FLAGS "" TEST_ALWAYS_FLAGS
 	}
@@ -85,12 +88,14 @@  proc compat_setup_dfp { } {
     global compat_alt_caret
     global compat_alt_color
     global compat_alt_urls
+    global compat_alt_drawing
     global compat_no_line_no
     global TEST_ALWAYS_FLAGS compat_save_TEST_ALWAYS_FLAGS
 
     set compat_alt_caret 0
     set compat_alt_color 0
     set compat_alt_urls 0
+    set compat_alt_drawing 0
     set compat_no_line_no 0
     set compat_save_TEST_ALWAYS_FLAGS $TEST_ALWAYS_FLAGS
 
@@ -115,6 +120,10 @@  proc compat_setup_dfp { } {
 		int dummy; } "-fdiagnostics-urls=never"] != 0 } {
 	    set compat_alt_urls 1
 	}
+	if { [check_no_compiler_messages_nocache compat_alt_has_drawing object {
+		int dummy; } "-fdiagnostics-unicode-drawing=never"] != 0 } {
+	    set compat_alt_drawing 1
+	}
 	if { [check_no_compiler_messages_nocache compat_alt_has_no_line_no object {
 		int dummy; } "-fno-diagnostics-show-line-numbers"] != 0 } {
 	    set compat_no_line_no 1
diff --git a/gcc/testsuite/lib/prune.exp b/gcc/testsuite/lib/prune.exp
index 1c776249f1a..54ab417889f 100644
--- a/gcc/testsuite/lib/prune.exp
+++ b/gcc/testsuite/lib/prune.exp
@@ -21,7 +21,7 @@  load_lib multiline.exp
 if ![info exists TEST_ALWAYS_FLAGS] {
     set TEST_ALWAYS_FLAGS ""
 }
-set TEST_ALWAYS_FLAGS "-fno-diagnostics-show-caret -fno-diagnostics-show-line-numbers -fdiagnostics-color=never  -fdiagnostics-urls=never $TEST_ALWAYS_FLAGS"
+set TEST_ALWAYS_FLAGS "-fno-diagnostics-show-caret -fno-diagnostics-show-line-numbers -fdiagnostics-color=never -fdiagnostics-urls=never -fdiagnostics-unicode-drawing=never $TEST_ALWAYS_FLAGS"
 
 proc prune_gcc_output { text } {
     global srcdir
diff --git a/gcc/tree-diagnostic-path.cc b/gcc/tree-diagnostic-path.cc
index 82b3c2d6b6a..fe1c7bd1df4 100644
--- a/gcc/tree-diagnostic-path.cc
+++ b/gcc/tree-diagnostic-path.cc
@@ -332,11 +332,13 @@  path_summary::print (diagnostic_context *dc, bool show_depths) const
 	  if (range->m_stack_depth > prev_range->m_stack_depth)
 	    {
 	      /* Show pushed stack frame(s).  */
-	      const char *push_prefix = "+--> ";
 	      pp_string (pp, start_line_color);
-	      pp_string (pp, push_prefix);
+	      pp_string (pp, dc->drawing.corner_sw);
+	      pp_string (pp, dc->drawing.horizontal1);
+	      pp_string (pp, dc->drawing.horizontal1);
+	      pp_string (pp, "> ");
+	      cur_indent += 5;
 	      pp_string (pp, end_line_color);
-	      cur_indent += strlen (push_prefix);
 	    }
 	}
       if (range->m_fndecl)
@@ -358,7 +360,7 @@  path_summary::print (diagnostic_context *dc, bool show_depths) const
       {
 	write_indent (pp, cur_indent + per_frame_indent);
 	pp_string (pp, start_line_color);
-	pp_string (pp, "|");
+	pp_string (pp, dc->drawing.vertical);
 	pp_string (pp, end_line_color);
 	pp_newline (pp);
 
@@ -368,7 +370,7 @@  path_summary::print (diagnostic_context *dc, bool show_depths) const
 	  pretty_printer tmp_pp;
 	  write_indent (&tmp_pp, cur_indent + per_frame_indent);
 	  pp_string (&tmp_pp, start_line_color);
-	  pp_string (&tmp_pp, "|");
+	  pp_string (&tmp_pp, dc->drawing.vertical);
 	  pp_string (&tmp_pp, end_line_color);
 	  prefix = xstrdup (pp_formatted_text (&tmp_pp));
 	}
@@ -379,7 +381,7 @@  path_summary::print (diagnostic_context *dc, bool show_depths) const
 
 	write_indent (pp, cur_indent + per_frame_indent);
 	pp_string (pp, start_line_color);
-	pp_string (pp, "|");
+	pp_string (pp, dc->drawing.vertical);
 	pp_string (pp, end_line_color);
 	pp_newline (pp);
       }
@@ -405,18 +407,23 @@  path_summary::print (diagnostic_context *dc, bool show_depths) const
 		    = vbar_for_next_frame - per_frame_indent;
 		  write_indent (pp, vbar_for_next_frame);
 		  pp_string (pp, start_line_color);
+		  int col = indent_for_next_frame + per_frame_indent;
+		  if (dc->drawing.enabled)
+		    {
+		      pp_string (pp, dc->drawing.corner_nw);
+		      ++col;
+		    }
 		  pp_character (pp, '<');
-		  for (int i = indent_for_next_frame + per_frame_indent;
-		       i < cur_indent + per_frame_indent - 1; i++)
-		    pp_character (pp, '-');
-		  pp_character (pp, '+');
+		  for (; col < cur_indent + per_frame_indent - 1; col++)
+		    pp_string (pp, dc->drawing.horizontal1);
+		  pp_string (pp, dc->drawing.corner_se);
 		  pp_string (pp, end_line_color);
 		  pp_newline (pp);
 		  cur_indent = indent_for_next_frame;
 
 		  write_indent (pp, vbar_for_next_frame);
 		  pp_string (pp, start_line_color);
-		  pp_printf (pp, "|");
+		  pp_printf (pp, dc->drawing.vertical);
 		  pp_string (pp, end_line_color);
 		  pp_newline (pp);
 		}
diff --git a/libstdc++-v3/testsuite/lib/libstdc++.exp b/libstdc++-v3/testsuite/lib/libstdc++.exp
index 16963f2edd5..c672e5bc06b 100644
--- a/libstdc++-v3/testsuite/lib/libstdc++.exp
+++ b/libstdc++-v3/testsuite/lib/libstdc++.exp
@@ -482,7 +482,7 @@  proc v3_target_compile { source dest type options } {
     global STATIC_LIBCXXFLAGS
     global tool
 
-    lappend options "additional_flags=-fno-diagnostics-show-caret -fdiagnostics-color=never -fdiagnostics-urls=never"
+    lappend options "additional_flags=-fno-diagnostics-show-caret -fdiagnostics-color=never -fdiagnostics-urls=never -fdiagnostics-unicode-drawing=never"
 
     if { [target_info needs_status_wrapper] != "" && [info exists gluefile] } {
 	lappend options "libs=${gluefile}"