diff mbox series

[committed] preprocessor: Fix pp-number lexing of digit separators [PR83873, PR97604]

Message ID alpine.DEB.2.22.394.2105062320550.583406@digraph.polyomino.org.uk
State New
Headers show
Series [committed] preprocessor: Fix pp-number lexing of digit separators [PR83873, PR97604] | expand

Commit Message

Joseph Myers May 6, 2021, 11:21 p.m. UTC
When the preprocessor lexes preprocessing numbers in lex_number, it
accepts digit separators in more cases than actually permitted in
pp-numbers by the standard syntax.

One thing this accepts is adjacent digit separators; there is some
code to reject those later, but as noted in bug 83873 it fails to
cover the case of adjacent digit separators within a floating-point
exponent.  Accepting adjacent digit separators only results in a
missing diagnostic, not in valid code being rejected or being accepted
with incorrect semantics, because the correct lexing in such a case
would have '' start the following preprocessing tokens, and no valid
preprocessing token starts '' while ' isn't valid on its own as a
preprocessing token either.  So this patch fixes that case by moving
the error for adjacent digit separators to lex_number (allowing a more
specific diagnostic than if '' were excluded from the pp-number
completely).

Other cases inappropriately accepted involve digit separators before
'.', 'e+', 'e-', 'p+' or 'p-' (or corresponding uppercase variants).
In those cases, as shown by the test digit-sep-pp-number.C added, this
can result in valid code being wrongly rejected as a result of too
many characters being included in the pp-number.  So this case is
fixed by terminating the pp-number at the correct character according
to the standard.  That test also covers the case where a digit
separator was followed by an identifier-nondigit that is not a
nondigit (e.g. a UCN); that case was already handled correctly.

Bootstrapped with no regressions for x86_64-pc-linux-gnu.  Applied to 
mainline.

libcpp/
	PR c++/83873
	PR preprocessor/97604
	* lex.c (lex_number): Reject adjacent digit separators here.  Do
	not allow digit separators before '.' or an exponent with sign.
	* expr.c (cpp_classify_number): Do not check for adjacent digit
	separators here.

gcc/testsuite/
	PR c++/83873
	PR preprocessor/97604
	* g++.dg/cpp1y/digit-sep-neg-2.C,
	g++.dg/cpp1y/digit-sep-pp-number.C: New tests.
	* g++.dg/cpp1y/digit-sep-line-neg.C, g++.dg/cpp1y/digit-sep-neg.C:
	Adjust expected messages.
diff mbox series

Patch

diff --git a/gcc/testsuite/g++.dg/cpp1y/digit-sep-line-neg.C b/gcc/testsuite/g++.dg/cpp1y/digit-sep-line-neg.C
index fa3b1352109..239d0287b74 100644
--- a/gcc/testsuite/g++.dg/cpp1y/digit-sep-line-neg.C
+++ b/gcc/testsuite/g++.dg/cpp1y/digit-sep-line-neg.C
@@ -2,3 +2,4 @@ 
 // { dg-do preprocess { target c++14 } }
 
 #line 0''123 // { dg-error "is not a positive integer" }
+// { dg-error "adjacent digit separators" "adjacent" { target *-*-* } .-1 }
diff --git a/gcc/testsuite/g++.dg/cpp1y/digit-sep-neg-2.C b/gcc/testsuite/g++.dg/cpp1y/digit-sep-neg-2.C
new file mode 100644
index 00000000000..09393aaf838
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/digit-sep-neg-2.C
@@ -0,0 +1,4 @@ 
+// Test adjacent digit separators rejected in exponent (bug 83873).
+// { dg-do compile { target c++14 } }
+
+double d = 1.0e1''0; /* { dg-error "adjacent digit separators" } */
diff --git a/gcc/testsuite/g++.dg/cpp1y/digit-sep-neg.C b/gcc/testsuite/g++.dg/cpp1y/digit-sep-neg.C
index 5343e52c5a5..300fe51041b 100644
--- a/gcc/testsuite/g++.dg/cpp1y/digit-sep-neg.C
+++ b/gcc/testsuite/g++.dg/cpp1y/digit-sep-neg.C
@@ -16,7 +16,7 @@  main()
   unsigned u = 0b0001'0000'0000'0000'0000'0000'U; // { dg-error "digit separator outside digit sequence" }
 
   double d = 0.0;
-  d = 1'.602'176'565e-19; // { dg-error "digit separator adjacent to decimal point" }
+  d = 1'.602'176'565e-19; // { dg-warning "multi-character" }
   d = 1.'602'176'565e-19; // { dg-error "digit separator adjacent to decimal point" }
   d = 1.602''176'565e-19; // { dg-error "adjacent digit separators" }
   d = 1.602'176'565'e-19; // { dg-error "digit separator adjacent to exponent" }
@@ -29,4 +29,5 @@  main()
 
 // { dg-error "exponent has no digits" "exponent has no digits" { target *-*-* } 23 }
 // { dg-error "expected ';' before" "expected ';' before" { target *-*-* } 15 }
+// { dg-error "expected ';' before" "expected ';' before" { target *-*-* } 19 }
 // { dg-error "expected ';' before" "expected ';' before" { target *-*-* } 26 }
diff --git a/gcc/testsuite/g++.dg/cpp1y/digit-sep-pp-number.C b/gcc/testsuite/g++.dg/cpp1y/digit-sep-pp-number.C
new file mode 100644
index 00000000000..9777382224d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/digit-sep-pp-number.C
@@ -0,0 +1,17 @@ 
+// Test lexing of pp-numbers does not allow digit separators that do
+// not form part of the pp-number syntax, when the code is valid with
+// correct lexing but not with too many characters accepted in the
+// pp-number (bug 97604).
+// { dg-do compile { target c++14 } }
+
+static_assert (0x0'e-0xe == 0, "signs");
+
+#define a0 '.' -
+#define acat(x) a ## x
+static_assert (acat (0'.') == 0, ".");
+
+// This case was not actually buggy.
+#define c0(x) 0
+#define b0 c0 (
+#define bcat(x) b ## x
+static_assert (bcat (0'\u00c0')) == 0, "identifier-nondigit");
diff --git a/libcpp/expr.c b/libcpp/expr.c
index dd5611dce0e..ab4a2608962 100644
--- a/libcpp/expr.c
+++ b/libcpp/expr.c
@@ -582,11 +582,7 @@  cpp_classify_number (cpp_reader *pfile, const cpp_token *token,
 	    max_digit = c;
 	}
       else if (DIGIT_SEP (c))
-	{
-	  if (seen_digit_sep)
-	    SYNTAX_ERROR_AT (virtual_location, "adjacent digit separators");
-	  seen_digit_sep = true;
-	}
+	seen_digit_sep = true;
       else if (c == '.')
 	{
 	  if (seen_digit_sep || DIGIT_SEP (*str))
diff --git a/libcpp/lex.c b/libcpp/lex.c
index 06bcc31c87e..9662f1b8ca2 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1548,18 +1548,28 @@  lex_number (cpp_reader *pfile, cpp_string *number,
   base = pfile->buffer->cur - 1;
   do
     {
+      const uchar *adj_digit_sep = NULL;
       cur = pfile->buffer->cur;
 
       /* N.B. ISIDNUM does not include $.  */
-      while (ISIDNUM (*cur) || *cur == '.' || DIGIT_SEP (*cur)
-	     || VALID_SIGN (*cur, cur[-1]))
+      while (ISIDNUM (*cur)
+	     || (*cur == '.' && !DIGIT_SEP (cur[-1]))
+	     || DIGIT_SEP (*cur)
+	     || (VALID_SIGN (*cur, cur[-1]) && !DIGIT_SEP (cur[-2])))
 	{
 	  NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
+	  /* Adjacent digit separators do not form part of the pp-number syntax.
+	     However, they can safely be diagnosed here as an error, since '' is
+	     not a valid preprocessing token.  */
+	  if (DIGIT_SEP (*cur) && DIGIT_SEP (cur[-1]) && !adj_digit_sep)
+	    adj_digit_sep = cur;
 	  cur++;
 	}
       /* A number can't end with a digit separator.  */
       while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-1]))
 	--cur;
+      if (adj_digit_sep && adj_digit_sep < cur)
+	cpp_error (pfile, CPP_DL_ERROR, "adjacent digit separators");
 
       pfile->buffer->cur = cur;
     }