@@ -294,7 +294,7 @@ should_warn_for_misleading_indentation (const token_indent_info &guard_tinfo,
expanded_location next_stmt_exploc = expand_location (next_stmt_loc);
expanded_location guard_exploc = expand_location (guard_loc);
- const unsigned int tab_width = cpp_opts->tabstop;
+ const unsigned int tab_width = cpp_get_tabstop ();
/* They must be in the same file. */
if (next_stmt_exploc.file != body_exploc.file)
@@ -504,12 +504,6 @@ c_common_handle_option (size_t scode, const char *arg, HOST_WIDE_INT value,
cpp_opts->track_macro_expansion = 2;
break;
- case OPT_ftabstop_:
- /* It is documented that we silently ignore silly values. */
- if (value >= 1 && value <= 100)
- cpp_opts->tabstop = value;
- break;
-
case OPT_fexec_charset_:
cpp_opts->narrow_charset = arg;
break;
@@ -1872,10 +1872,6 @@ Enum(strong_eval_order) String(some) Value(1)
EnumValue
Enum(strong_eval_order) String(all) Value(2)
-ftabstop=
-C ObjC C++ ObjC++ Joined RejectNegative UInteger
--ftabstop=<number> Distance between tab stops for column reporting.
-
ftemplate-backtrace-limit=
C++ ObjC++ Joined RejectNegative UInteger Var(template_backtrace_limit) Init(10)
Set the maximum number of template instantiation notes for a single warning or error.
@@ -1346,6 +1346,10 @@ fdiagnostics-path-format=
Common Joined RejectNegative Var(flag_diagnostics_path_format) Enum(diagnostic_path_format) Init(DPF_INLINE_EVENTS)
Specify how to print any control-flow path associated with a diagnostic.
+ftabstop=
+Common Joined RejectNegative UInteger
+-ftabstop=<number> Distance between tab stops for column reporting.
+
Enum
Name(diagnostic_path_format) Type(int)
@@ -226,22 +226,18 @@ class layout_range
/* A struct for use by layout::print_source_line for telling
layout::print_annotation_line the extents of the source line that
- it printed, so that underlines can be clipped appropriately. */
+ it printed, so that underlines can be clipped appropriately. Units
+ are 1-based display columns. */
struct line_bounds
{
- int m_first_non_ws;
- int m_last_non_ws;
+ int m_first_non_ws_disp_col;
+ int m_last_non_ws_disp_col;
- void convert_to_display_cols (char_span line)
+ line_bounds ()
{
- m_first_non_ws = cpp_byte_column_to_display_column (line.get_buffer (),
- line.length (),
- m_first_non_ws);
-
- m_last_non_ws = cpp_byte_column_to_display_column (line.get_buffer (),
- line.length (),
- m_last_non_ws);
+ m_first_non_ws_disp_col = INT_MAX;
+ m_last_non_ws_disp_col = 0;
}
};
@@ -351,8 +347,8 @@ class layout
private:
bool will_show_line_p (linenum_type row) const;
void print_leading_fixits (linenum_type row);
- void print_source_line (linenum_type row, const char *line, int line_bytes,
- line_bounds *lbounds_out);
+ line_bounds print_source_line (linenum_type row, const char *line,
+ int line_bytes);
bool should_print_annotation_line_p (linenum_type row) const;
void start_annotation_line (char margin_char = ' ') const;
void print_annotation_line (linenum_type row, const line_bounds lbounds);
@@ -1445,16 +1441,13 @@ layout::calculate_x_offset_display ()
}
/* Print line ROW of source code, potentially colorized at any ranges, and
- populate *LBOUNDS_OUT.
- LINE is the source line (not necessarily 0-terminated) and LINE_BYTES
- is its length in bytes.
- This function deals only with byte offsets, not display columns, so
- m_x_offset_display must be converted from display to byte units. In
- particular, LINE_BYTES and LBOUNDS_OUT are in bytes. */
+ return the line bounds. LINE is the source line (not necessarily
+ 0-terminated) and LINE_BYTES is its length in bytes. In order to handle both
+ colorization and tab expansion, this function tracks the line position in
+ both byte and display column units. */
-void
-layout::print_source_line (linenum_type row, const char *line, int line_bytes,
- line_bounds *lbounds_out)
+line_bounds
+layout::print_source_line (linenum_type row, const char *line, int line_bytes)
{
m_colorizer.set_normal_text ();
@@ -1469,30 +1462,29 @@ layout::print_source_line (linenum_type row, const char *line, int line_bytes,
else
pp_space (m_pp);
- /* We will stop printing the source line at any trailing whitespace, and start
- printing it as per m_x_offset_display. */
+ /* We will stop printing the source line at any trailing whitespace. */
line_bytes = get_line_bytes_without_trailing_whitespace (line,
line_bytes);
- int x_offset_bytes = 0;
- if (m_x_offset_display)
- {
- x_offset_bytes = cpp_display_column_to_byte_column (line, line_bytes,
- m_x_offset_display);
- /* In case the leading portion of the line that will be skipped over ends
- with a character with wcwidth > 1, then it is possible we skipped too
- much, so account for that by padding with spaces. */
- const int overage
- = cpp_byte_column_to_display_column (line, line_bytes, x_offset_bytes)
- - m_x_offset_display;
- for (int column = 0; column < overage; ++column)
- pp_space (m_pp);
- line += x_offset_bytes;
- }
- /* Print the line. */
- int first_non_ws = INT_MAX;
- int last_non_ws = 0;
- for (int col_byte = 1 + x_offset_bytes; col_byte <= line_bytes; col_byte++)
+ /* This object helps to keep track of which display column we are at, which is
+ necessary for computing the line bounds in display units, for doing
+ tab expansion, and for implementing m_x_offset_display. */
+ cpp_display_width_computation dw (line, line_bytes);
+
+ /* Skip the first m_x_offset_display display columns. In case the leading
+ portion that will be skipped ends with a character with wcwidth > 1, then
+ it is possible we skipped too much, so account for that by padding with
+ spaces. Note that this does the right thing too in case a tab was the last
+ character to be skipped over; the tab is effectively replaced by the
+ correct number of trailing spaces needed to offset by the desired number of
+ display columns. */
+ for (int skipped_display_cols = dw.advance_display_cols (m_x_offset_display);
+ skipped_display_cols > m_x_offset_display; --skipped_display_cols)
+ pp_space (m_pp);
+
+ /* Print the line and compute the line_bounds. */
+ line_bounds lbounds;
+ while (!dw.done ())
{
/* Assuming colorization is enabled for the caret and underline
characters, we may also colorize the associated characters
@@ -1510,7 +1502,8 @@ layout::print_source_line (linenum_type row, const char *line, int line_bytes,
{
bool in_range_p;
point_state state;
- in_range_p = get_state_at_point (row, col_byte,
+ const int start_byte_col = dw.bytes_processed () + 1;
+ in_range_p = get_state_at_point (row, start_byte_col,
0, INT_MAX,
CU_BYTES,
&state);
@@ -1519,22 +1512,44 @@ layout::print_source_line (linenum_type row, const char *line, int line_bytes,
else
m_colorizer.set_normal_text ();
}
- char c = *line;
- if (c == '\0' || c == '\t' || c == '\r')
- c = ' ';
- if (c != ' ')
+
+ /* Get the display width of the next character to be output, expanding
+ tabs and replacing some control bytes with spaces as necessary. */
+ const char *c = dw.next_byte ();
+ const int start_disp_col = dw.display_cols_processed () + 1;
+ const int this_display_width = dw.process_next_codepoint ();
+ if (*c == '\t')
+ {
+ /* The returned display width is the number of spaces into which the
+ tab should be expanded. */
+ for (int i = 0; i != this_display_width; ++i)
+ pp_space (m_pp);
+ continue;
+ }
+ if (*c == '\0' || *c == '\r')
+ {
+ /* cpp_wcwidth() promises to return 1 for all control bytes, and we
+ want to output these as a single space too, so this case is
+ actually the same as the '\t' case. */
+ gcc_assert (this_display_width == 1);
+ pp_space (m_pp);
+ continue;
+ }
+
+ /* We have a (possibly multibyte) character to output; update the line
+ bounds if it is not whitespace. */
+ if (*c != ' ')
{
- last_non_ws = col_byte;
- if (first_non_ws == INT_MAX)
- first_non_ws = col_byte;
+ lbounds.m_last_non_ws_disp_col = dw.display_cols_processed ();
+ if (lbounds.m_first_non_ws_disp_col == INT_MAX)
+ lbounds.m_first_non_ws_disp_col = start_disp_col;
}
- pp_character (m_pp, c);
- line++;
+
+ /* Output the character. */
+ while (c != dw.next_byte ()) pp_character (m_pp, *c++);
}
print_newline ();
-
- lbounds_out->m_first_non_ws = first_non_ws;
- lbounds_out->m_last_non_ws = last_non_ws;
+ return lbounds;
}
/* Determine if we should print an annotation line for ROW.
@@ -1576,14 +1591,13 @@ layout::start_annotation_line (char margin_char) const
}
/* Print a line consisting of the caret/underlines for the given
- source line. This function works with display columns, rather than byte
- counts; in particular, LBOUNDS should be in display column units. */
+ source line. */
void
layout::print_annotation_line (linenum_type row, const line_bounds lbounds)
{
int x_bound = get_x_bound_for_row (row, m_exploc.m_display_col,
- lbounds.m_last_non_ws);
+ lbounds.m_last_non_ws_disp_col);
start_annotation_line ();
pp_space (m_pp);
@@ -1593,8 +1607,8 @@ layout::print_annotation_line (linenum_type row, const line_bounds lbounds)
bool in_range_p;
point_state state;
in_range_p = get_state_at_point (row, column,
- lbounds.m_first_non_ws,
- lbounds.m_last_non_ws,
+ lbounds.m_first_non_ws_disp_col,
+ lbounds.m_last_non_ws_disp_col,
CU_DISPLAY_COLS,
&state);
if (in_range_p)
@@ -2499,15 +2513,11 @@ layout::print_line (linenum_type row)
if (!line)
return;
- line_bounds lbounds;
print_leading_fixits (row);
- print_source_line (row, line.get_buffer (), line.length (), &lbounds);
+ const line_bounds lbounds
+ = print_source_line (row, line.get_buffer (), line.length ());
if (should_print_annotation_line_p (row))
- {
- if (lbounds.m_first_non_ws != INT_MAX)
- lbounds.convert_to_display_cols (line);
- print_annotation_line (row, lbounds);
- }
+ print_annotation_line (row, lbounds);
if (m_show_labels_p)
print_any_labels (row);
print_trailing_fixits (row);
@@ -2774,6 +2784,114 @@ test_layout_x_offset_display_utf8 (const line_table_case &case_)
}
+static void
+test_layout_x_offset_display_tab (const line_table_case &case_)
+{
+ const char *content
+ = "This line is very long, so that we can use it to test the logic for "
+ "clipping long lines. Also this: `\t' is a tab that occupies 1 byte and "
+ "a variable number of display columns, starting at column #103.\n";
+
+ /* Number of bytes in the line, subtracting one to remove the newline. */
+ const int line_bytes = strlen (content) - 1;
+
+ /* The column where the tab begins. Byte or display is the same as there are
+ no multibyte characters earlier on the line. */
+ const int tab_col = 103;
+
+ /* Effective extra size of the tab beyond what a single space would have taken
+ up, indexed by tabstop. */
+ static const int num_tabstops = 11;
+ int extra_width[num_tabstops];
+ for (int tabstop = 1; tabstop != num_tabstops; ++tabstop)
+ {
+ const int this_tab_size = tabstop - (tab_col - 1) % tabstop;
+ extra_width[tabstop] = this_tab_size - 1;
+ }
+ /* Example of this calculation: if tabstop is 10, the tab starting at column
+ #103 has to expand into 8 spaces, covering columns 103-110, so that the
+ next character is at column #111. So it takes up 7 more columns than
+ a space would have taken up. */
+ ASSERT_EQ (7, extra_width[10]);
+
+ temp_source_file tmp (SELFTEST_LOCATION, ".c", content);
+ line_table_test ltt (case_);
+
+ linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
+
+ location_t line_end = linemap_position_for_column (line_table, line_bytes);
+
+ /* Don't attempt to run the tests if column data might be unavailable. */
+ if (line_end > LINE_MAP_MAX_LOCATION_WITH_COLS)
+ return;
+
+ /* Check that cpp_display_width handles the tabs as expected. */
+ char_span lspan = location_get_source_line (tmp.get_filename (), 1);
+ ASSERT_EQ ('\t', *(lspan.get_buffer () + (tab_col - 1)));
+ for (int tabstop = 1; tabstop != num_tabstops; ++tabstop)
+ {
+ ASSERT_EQ (line_bytes + extra_width[tabstop],
+ cpp_display_width (lspan.get_buffer (), lspan.length (),
+ tabstop));
+ ASSERT_EQ (line_bytes + extra_width[tabstop],
+ location_compute_display_column (expand_location (line_end),
+ tabstop));
+ }
+
+ /* Check that the tab is expanded to the expected number of spaces. */
+ const int global_tabstop = cpp_get_tabstop ();
+ rich_location richloc (line_table,
+ linemap_position_for_column (line_table,
+ tab_col + 1));
+ for (int tabstop = 1; tabstop != num_tabstops; ++tabstop)
+ {
+ cpp_set_tabstop (tabstop);
+ test_diagnostic_context dc;
+ layout test_layout (&dc, &richloc, DK_ERROR);
+ test_layout.print_line (1);
+ const char *out = pp_formatted_text (dc.printer);
+ ASSERT_EQ (NULL, strchr (out, '\t'));
+ const char *left_quote = strchr (out, '`');
+ const char *right_quote = strchr (out, '\'');
+ ASSERT_NE (NULL, left_quote);
+ ASSERT_NE (NULL, right_quote);
+ ASSERT_EQ (right_quote - left_quote, extra_width[tabstop] + 2);
+ }
+
+ /* Check that the line is offset properly and that the tab is broken up
+ into the expected number of spaces when it is the last character skipped
+ over. */
+ for (int tabstop = 1; tabstop != num_tabstops; ++tabstop)
+ {
+ cpp_set_tabstop (tabstop);
+ test_diagnostic_context dc;
+ static const int small_width = 24;
+ dc.caret_max_width = small_width - 4;
+ dc.min_margin_width = test_left_margin - test_linenum_sep + 1;
+ dc.show_line_numbers_p = true;
+ layout test_layout (&dc, &richloc, DK_ERROR);
+ test_layout.print_line (1);
+
+ /* We have arranged things so that two columns will be printed before
+ the caret. If the tab results in more than one space, this should
+ produce two spaces in the output; otherwise, it will be a single space
+ preceded by the opening quote before the tab character. */
+ const char *output1
+ = " 1 | ' is a tab that occupies 1 byte and a variable number of "
+ "display columns, starting at column #103.\n"
+ " | ^\n\n";
+ const char *output2
+ = " 1 | ` ' is a tab that occupies 1 byte and a variable number of "
+ "display columns, starting at column #103.\n"
+ " | ^\n\n";
+ const char *expected_output = (extra_width[tabstop] ? output1 : output2);
+ ASSERT_STREQ (expected_output, pp_formatted_text (dc.printer));
+ }
+
+ cpp_set_tabstop (global_tabstop);
+}
+
+
/* Verify that diagnostic_show_locus works sanely on UNKNOWN_LOCATION. */
static void
@@ -3854,6 +3972,27 @@ test_one_liner_labels_utf8 ()
}
}
+/* Make sure that colorization codes don't interrupt a multibyte
+ sequence, which would corrupt it. */
+static void
+test_one_liner_colorized_utf8 ()
+{
+ test_diagnostic_context dc;
+ dc.colorize_source_p = true;
+ diagnostic_color_init (&dc, DIAGNOSTICS_COLOR_YES);
+ const location_t pi = linemap_position_for_column (line_table, 12);
+ rich_location richloc (line_table, pi);
+ diagnostic_show_locus (&dc, &richloc, DK_ERROR);
+
+ /* In order to avoid having the test depend on exactly how the colorization
+ was effected, just confirm there are two pi characters in the output. */
+ const char *result = pp_formatted_text (dc.printer);
+ const char *null_term = result + strlen (result);
+ const char *first_pi = strstr (result, "\xcf\x80");
+ ASSERT_TRUE (first_pi && first_pi <= null_term - 2);
+ ASSERT_STR_CONTAINS (first_pi + 2, "\xcf\x80");
+}
+
/* Run the various one-liner tests. */
static void
@@ -3900,6 +4039,7 @@ test_diagnostic_show_locus_one_liner_utf8 (const line_table_case &case_)
test_one_liner_many_fixits_1_utf8 ();
test_one_liner_many_fixits_2_utf8 ();
test_one_liner_labels_utf8 ();
+ test_one_liner_colorized_utf8 ();
}
/* Verify that gcc_rich_location::add_location_if_nearby works. */
@@ -4955,6 +5095,68 @@ test_fixit_deletion_affecting_newline (const line_table_case &case_)
pp_formatted_text (dc.printer));
}
+static void
+test_tab_expansion (const line_table_case &case_)
+{
+ /* Set up the tabstop to be sure it is 8. */
+ const int global_tabstop = cpp_get_tabstop ();
+ cpp_set_tabstop (8);
+
+ /* Create a tempfile and write some text to it. This example uses a tabstop
+ of 8, as the column numbers attempt to indicate:
+
+ .....................000.01111111111.22222333333 display
+ .....................123.90123456789.56789012345 columns */
+ const char *content = " \t This: `\t' is a tab.\n";
+ /* ....................000 00000011111 11111222222 byte
+ ....................123 45678901234 56789012345 columns */
+
+ const int first_non_ws_byte_col = 7;
+ const int right_quote_byte_col = 15;
+ const int last_byte_col = 25;
+ ASSERT_EQ (35, cpp_display_width (content, last_byte_col));
+
+ temp_source_file tmp (SELFTEST_LOCATION, ".c", content);
+ line_table_test ltt (case_);
+ linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
+
+ /* Don't attempt to run the tests if column data might be unavailable. */
+ location_t line_end = linemap_position_for_column (line_table, last_byte_col);
+ if (line_end > LINE_MAP_MAX_LOCATION_WITH_COLS)
+ return;
+
+ /* Check that the leading whitespace with mixed tabs and spaces is expanded
+ into 11 spaces. Recall that print_line() also puts one space before
+ everything too. */
+ {
+ test_diagnostic_context dc;
+ rich_location richloc (line_table,
+ linemap_position_for_column (line_table,
+ first_non_ws_byte_col));
+ layout test_layout (&dc, &richloc, DK_ERROR);
+ test_layout.print_line (1);
+ ASSERT_STREQ (" This: ` ' is a tab.\n"
+ " ^\n",
+ pp_formatted_text (dc.printer));
+ }
+
+ /* Confirm the display width was tracked correctly across the internal tab
+ as well. */
+ {
+ test_diagnostic_context dc;
+ rich_location richloc (line_table,
+ linemap_position_for_column (line_table,
+ right_quote_byte_col));
+ layout test_layout (&dc, &richloc, DK_ERROR);
+ test_layout.print_line (1);
+ ASSERT_STREQ (" This: ` ' is a tab.\n"
+ " ^\n",
+ pp_formatted_text (dc.printer));
+ }
+
+ cpp_set_tabstop (global_tabstop);
+}
+
/* Verify that line numbers are correctly printed for the case of
a multiline range in which the width of the line numbers changes
(e.g. from "9" to "10"). */
@@ -5012,6 +5214,7 @@ diagnostic_show_locus_c_tests ()
test_layout_range_for_multiple_lines ();
for_each_line_table_case (test_layout_x_offset_display_utf8);
+ for_each_line_table_case (test_layout_x_offset_display_tab);
test_get_line_bytes_without_trailing_whitespace ();
@@ -5029,6 +5232,7 @@ diagnostic_show_locus_c_tests ()
for_each_line_table_case (test_fixit_insert_containing_newline_2);
for_each_line_table_case (test_fixit_replace_containing_newline);
for_each_line_table_case (test_fixit_deletion_affecting_newline);
+ for_each_line_table_case (test_tab_expansion);
test_line_numbers_multiline_range ();
}
@@ -913,7 +913,7 @@ make_location (location_t caret, source_range src_range)
source line in order to calculate the display width. If that cannot be done
for any reason, then returns the byte column as a fallback. */
int
-location_compute_display_column (expanded_location exploc)
+location_compute_display_column (expanded_location exploc, int tabstop)
{
if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
return exploc.column;
@@ -921,7 +921,7 @@ location_compute_display_column (expanded_location exploc)
/* If line is NULL, this function returns exploc.column which is the
desired fallback. */
return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
- exploc.column);
+ exploc.column, tabstop);
}
/* Dump statistics to stderr about the memory usage of the line_table
@@ -3612,8 +3612,8 @@ void test_cpp_utf8 ()
{
int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8);
ASSERT_EQ (8, w_bad);
- int w_ctrl = cpp_display_width ("\r\t\n\v\0\1", 6);
- ASSERT_EQ (6, w_ctrl);
+ int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5);
+ ASSERT_EQ (5, w_ctrl);
}
/* Verify that wcwidth of valid UTF-8 is as expected. */
@@ -3635,6 +3635,15 @@ void test_cpp_utf8 ()
ASSERT_EQ (18, w_mixed);
}
+ /* Verify that display width properly expands tabs. */
+ {
+ const char *tstr = "\tabc\td";
+ ASSERT_EQ (6, cpp_display_width (tstr, 6, 1));
+ ASSERT_EQ (10, cpp_display_width (tstr, 6, 3));
+ ASSERT_EQ (17, cpp_display_width (tstr, 6, 8));
+ ASSERT_EQ (1, cpp_display_column_to_byte_column (tstr, 6, 7, 8));
+ }
+
/* Verify that cpp_byte_column_to_display_column can go past the end,
and similar edge cases. */
{
@@ -38,7 +38,12 @@ STATIC_ASSERT (BUILTINS_LOCATION < RESERVED_LOCATION_COUNT);
extern bool is_location_from_builtin_token (location_t);
extern expanded_location expand_location (location_t);
-extern int location_compute_display_column (expanded_location);
+
+/* As with cpp_byte_column_to_display_column(), TABSTOP <= 0 means to use the
+ global default cpp_get_tabstop(), which is typically set with the
+ -ftabstop option. */
+extern int location_compute_display_column (expanded_location exploc,
+ int tabstop = 0);
/* A class capturing the bounds of a buffer, to allow for run-time
bounds-checking in a checked build. */
@@ -32,6 +32,7 @@ along with GCC; see the file COPYING3. If not see
#include "spellcheck.h"
#include "opt-suggestions.h"
#include "diagnostic-color.h"
+#include "cpplib.h"
static void set_Wstrict_aliasing (struct gcc_options *opts, int onoff);
@@ -2798,6 +2799,12 @@ common_handle_option (struct gcc_options *opts,
check_alignment_argument (loc, arg, "functions");
break;
+ case OPT_ftabstop_:
+ /* It is documented that we silently ignore silly values. */
+ if (value >= 1 && value <= 100)
+ cpp_set_tabstop (value);
+ break;
+
default:
/* If the flag was handled in a standard way, assume the lack of
processing here is intentional. */
@@ -44,12 +44,12 @@ int fn_6 (int a, int b, int c)
/* ... */
/* { dg-begin-multiline-output "" }
- if ((err = foo (b)) != 0)
- ^~
+ if ((err = foo (b)) != 0)
+ ^~
{ dg-end-multiline-output "" } */
/* { dg-begin-multiline-output "" }
- goto fail;
- ^~~~
+ goto fail;
+ ^~~~
{ dg-end-multiline-output "" } */
fail:
@@ -24,9 +24,9 @@ void test_static_assert_different_line (void)
_Static_assert(sizeof(int) >= sizeof(char), /* { dg-message "to match this '\\('" } */
"msg"; /* { dg-error "expected '\\)' before ';' token" } */
/* { dg-begin-multiline-output "" }
- "msg";
- ^
- )
+ "msg";
+ ^
+ )
{ dg-end-multiline-output "" } */
/* { dg-begin-multiline-output "" }
_Static_assert(sizeof(int) >= sizeof(char),
@@ -33,10 +33,10 @@ int test_2 (void)
~~~~~~~~~~~~~~~~
|
s
- + some_other_function ());
- ^ ~~~~~~~~~~~~~~~~~~~~~~
- |
- t
+ + some_other_function ());
+ ^ ~~~~~~~~~~~~~~~~~~~~~~
+ |
+ t
{ dg-end-multiline-output "" } */
}
@@ -288,7 +288,7 @@ int test_3 (int x, int y)
| | ~~~~~~~~~~
| | |
| | (4) ...to here
- | NN | to dereference it above
+ | NN | to dereference it above
| NN | return *ptr;
| | ~~~~
| | |
@@ -35,10 +35,10 @@ int test_2 (void)
~~~~~~~~~~~~~~~~
|
struct s
- + some_other_function ());
- ^ ~~~~~~~~~~~~~~~~~~~~~~
- |
- struct t
+ + some_other_function ());
+ ^ ~~~~~~~~~~~~~~~~~~~~~~
+ |
+ struct t
{ dg-end-multiline-output "" } */
}
@@ -540,15 +540,15 @@ void test_builtin_types_compatible_p (unsigned long i)
__emit_expression_range (0,
f (i) + __builtin_types_compatible_p (long, int)); /* { dg-warning "range" } */
/* { dg-begin-multiline-output "" }
- f (i) + __builtin_types_compatible_p (long, int));
- ~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ f (i) + __builtin_types_compatible_p (long, int));
+ ~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
{ dg-end-multiline-output "" } */
__emit_expression_range (0,
__builtin_types_compatible_p (long, int) + f (i)); /* { dg-warning "range" } */
/* { dg-begin-multiline-output "" }
- __builtin_types_compatible_p (long, int) + f (i));
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~
+ __builtin_types_compatible_p (long, int) + f (i));
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~
{ dg-end-multiline-output "" } */
}
@@ -671,8 +671,8 @@ void test_multiple_ordinary_maps (void)
/* { dg-begin-multiline-output "" }
__emit_expression_range (0, foo (0,
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"));
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"));
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
{ dg-end-multiline-output "" } */
/* Another expression that transitions between ordinary maps; this
@@ -685,8 +685,8 @@ void test_multiple_ordinary_maps (void)
/* { dg-begin-multiline-output "" }
__emit_expression_range (0, foo (0, "01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789",
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- 0));
- ~~
+ 0));
+ ~~
{ dg-end-multiline-output "" } */
}
@@ -335,11 +335,11 @@ pr87652 (const char *stem, int counter)
/* { dg-error "unable to read substring location: unable to read source line" "" { target c } 329 } */
/* { dg-error "unable to read substring location: failed to get ordinary maps" "" { target c++ } 329 } */
/* { dg-begin-multiline-output "" }
- __emit_string_literal_range(__FILE__":%5d: " format, \
+ __emit_string_literal_range(__FILE__":%5d: " format, \
^~~~~~~~
{ dg-end-multiline-output "" { target c } } */
/* { dg-begin-multiline-output "" }
- __emit_string_literal_range(__FILE__":%5d: " format, \
+ __emit_string_literal_range(__FILE__":%5d: " format, \
^
{ dg-end-multiline-output "" { target c++ } } */
@@ -2276,49 +2276,105 @@ cpp_string_location_reader::get_next ()
return result;
}
-/* Helper for cpp_byte_column_to_display_column and its inverse. Given a
- pointer to a UTF-8-encoded character, compute its display width. *INBUFP
- points on entry to the start of the UTF-8 encoding of the character, and
- is updated to point just after the last byte of the encoding. *INBYTESLEFTP
- contains on entry the remaining size of the buffer into which *INBUFP
- points, and this is also updated accordingly. If *INBUFP does not
+/* This is normally determined by the -ftabstop option. We need to know it so
+ the display column computations below can expand tabs as well. */
+
+static int global_tabstop = 8;
+
+int
+cpp_set_tabstop (int t)
+{
+ return global_tabstop = MAX (1, t);
+}
+
+int
+cpp_get_tabstop ()
+{
+ return global_tabstop;
+}
+
+cpp_display_width_computation::
+cpp_display_width_computation (const char *data, int data_length, int tabstop) :
+ m_begin (data),
+ m_next (m_begin),
+ m_bytes_left (data_length),
+ m_tabstop (tabstop > 0 ? tabstop : global_tabstop),
+ m_display_cols (0)
+{}
+
+
+/* The main implementation function for class cpp_display_width_computation.
+ m_next points on entry to the start of the UTF-8 encoding of the next
+ character, and is updated to point just after the last byte of the encoding.
+ m_bytes_left contains on entry the remaining size of the buffer into which
+ m_next points, and this is also updated accordingly. If m_next does not
point to a valid UTF-8-encoded sequence, then it will be treated as a single
- byte with display width 1. */
+ byte with display width 1. m_cur_display_col is the current display column,
+ relative to which tab stops should be expanded. Returns the display width of
+ the codepoint just processed. */
-static inline int
-compute_next_display_width (const uchar **inbufp, size_t *inbytesleftp)
+int
+cpp_display_width_computation::process_next_codepoint ()
{
cppchar_t c;
- if (one_utf8_to_cppchar (inbufp, inbytesleftp, &c) != 0)
+ int next_width;
+
+ if (*m_next == '\t')
+ {
+ ++m_next;
+ --m_bytes_left;
+ next_width = m_tabstop - (m_display_cols % m_tabstop);
+ }
+ else if (one_utf8_to_cppchar ((const uchar **) &m_next, &m_bytes_left, &c)
+ != 0)
{
/* Input is not convertible to UTF-8. This could be fine, e.g. in a
string literal, so don't complain. Just treat it as if it has a width
of one. */
- ++*inbufp;
- --*inbytesleftp;
- return 1;
+ ++m_next;
+ --m_bytes_left;
+ next_width = 1;
}
+ else
+ {
+ /* one_utf8_to_cppchar() has updated m_next and m_bytes_left for us. */
+ next_width = cpp_wcwidth (c);
+ }
+
+ m_display_cols += next_width;
+ return next_width;
+}
- /* one_utf8_to_cppchar() has updated inbufp and inbytesleftp for us. */
- return cpp_wcwidth (c);
+/* Utility to advance the byte stream by the minimum amount needed to consume
+ N display columnns. Returns the number of display columns that were
+ actually skipped. This could be less than N, if there was not enough data,
+ or more than N, if the last character to be skipped had a sufficiently large
+ display width. */
+int
+cpp_display_width_computation::advance_display_cols (int n)
+{
+ const int start = m_display_cols;
+ const int target = start + n;
+ while (m_display_cols < target && !done ())
+ process_next_codepoint ();
+ return m_display_cols - start;
}
/* For the string of length DATA_LENGTH bytes that begins at DATA, compute
how many display columns are occupied by the first COLUMN bytes. COLUMN
may exceed DATA_LENGTH, in which case the phantom bytes at the end are
- treated as if they have display width 1. */
+ treated as if they have display width 1. Tabs are expanded to the next tab
+ stop, relative to the start of DATA. */
int
cpp_byte_column_to_display_column (const char *data, int data_length,
- int column)
+ int column, int tabstop)
{
- int display_col = 0;
- const uchar *udata = (const uchar *) data;
const int offset = MAX (0, column - data_length);
- size_t inbytesleft = column - offset;
- while (inbytesleft)
- display_col += compute_next_display_width (&udata, &inbytesleft);
- return display_col + offset;
+ cpp_display_width_computation dw (data, column - offset, tabstop);
+ while (!dw.done ())
+ dw.process_next_codepoint ();
+ return dw.display_cols_processed () + offset;
}
/* For the string of length DATA_LENGTH bytes that begins at DATA, compute
@@ -2328,14 +2384,11 @@ cpp_byte_column_to_display_column (const char *data, int data_length,
int
cpp_display_column_to_byte_column (const char *data, int data_length,
- int display_col)
+ int display_col, int tabstop)
{
- int column = 0;
- const uchar *udata = (const uchar *) data;
- size_t inbytesleft = data_length;
- while (column < display_col && inbytesleft)
- column += compute_next_display_width (&udata, &inbytesleft);
- return data_length - inbytesleft + MAX (0, display_col - column);
+ cpp_display_width_computation dw (data, data_length, tabstop);
+ const int avail_display = dw.advance_display_cols (display_col);
+ return dw.bytes_processed () + MAX (0, display_col - avail_display);
}
/* Our own version of wcwidth(). We don't use the actual wcwidth() in glibc,
@@ -312,9 +312,6 @@ enum cpp_normalize_level {
carries all the options visible to the command line. */
struct cpp_options
{
- /* Characters between tab stops. */
- unsigned int tabstop;
-
/* The language we're preprocessing. */
enum c_lang lang;
@@ -1320,14 +1317,48 @@ extern const char * cpp_get_userdef_suffix
(const cpp_token *);
/* In charset.c */
+
+/* A class to manage the state while converting a UTF-8 sequence to cppchar_t
+ and computing the display width one character at a time. */
+class cpp_display_width_computation {
+ public:
+ /* TABSTOP <= 0 means to use cpp_get_tabstop(). */
+ cpp_display_width_computation (const char *data, int data_length,
+ int tabstop = 0);
+ const char *next_byte () const { return m_next; }
+ int bytes_processed () const { return m_next - m_begin; }
+ int bytes_left () const { return m_bytes_left; }
+ bool done () const { return !bytes_left (); }
+ int display_cols_processed () const { return m_display_cols; }
+
+ int process_next_codepoint ();
+ int advance_display_cols (int n);
+
+ private:
+ const char *const m_begin;
+ const char *m_next;
+ size_t m_bytes_left;
+ const int m_tabstop;
+ int m_display_cols;
+};
+
+/* Convenience functions that are simple use cases for class
+ cpp_display_width_computation. Tab characters will be expanded to spaces
+ as determined by TABSTOP. If TABSTOP <= 0, the tab width is set to the
+ global default cpp_get_tabstop (), which is typically set with the
+ -ftabstop option. */
int cpp_byte_column_to_display_column (const char *data, int data_length,
- int column);
-inline int cpp_display_width (const char *data, int data_length)
+ int column, int tabstop = 0);
+inline int cpp_display_width (const char *data, int data_length,
+ int tabstop = 0)
{
- return cpp_byte_column_to_display_column (data, data_length, data_length);
+ return cpp_byte_column_to_display_column (data, data_length, data_length,
+ tabstop);
}
int cpp_display_column_to_byte_column (const char *data, int data_length,
- int display_col);
+ int display_col, int tabstop = 0);
int cpp_wcwidth (cppchar_t c);
+int cpp_set_tabstop (int t);
+int cpp_get_tabstop ();
#endif /* ! LIBCPP_CPPLIB_H */
@@ -190,7 +190,6 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table,
CPP_OPTION (pfile, discard_comments) = 1;
CPP_OPTION (pfile, discard_comments_in_macro_exp) = 1;
CPP_OPTION (pfile, max_include_depth) = 200;
- CPP_OPTION (pfile, tabstop) = 8;
CPP_OPTION (pfile, operator_names) = 1;
CPP_OPTION (pfile, warn_trigraphs) = 2;
CPP_OPTION (pfile, warn_endif_labels) = 1;