Patchwork Fix raw-string handling (PR preprocessor/57620)

login
register
mail settings
Submitter Jakub Jelinek
Date July 10, 2013, 11:32 p.m.
Message ID <20130710233258.GJ2475@laptop.redhat.com>
Download mbox | patch
Permalink /patch/258269/
State New
Headers show

Comments

Jakub Jelinek - July 10, 2013, 11:32 p.m.
On Wed, Jul 10, 2013 at 02:22:56AM -0400, Jason Merrill wrote:
> It seems undesirable to go from one to four separate copies of the
> note-handling code.  Could we instead handle the different states of
> prefix, body and suffix parsing in local variables and just have one
> loop over the characters/notes in the input?

So something like this instead?  This is one loop handling the phase1/phase2
reversion and 3 phases where we parse the raw_prefix, the actual raw string
and after seen ) trying to match d-chars against raw_prefix (if not
successful, we switch to phase RAW_STR again but keep looking for ),
if seen we switch back to RAW_STR_SUFFIX phase.

With this make check RUNTESTFLAGS=dg.exp=raw-string* passes, but perhaps
I'll need to play with gcov and add some new testcases, e.g. ones
I have in mind are R"??(??)??"; which wouldn't work correctly if we didn't
stop consuming notes after seeing ??X other than ??/, also something to
catch when we have say R"?(x)??)?" because then first char of the trigraph
matches against raw_prefix, but second char isn't the expected ", yet the
last character of trigraph ) should start another round of checking against
raw_prefix.

2013-07-10  Jakub Jelinek  <jakub@redhat.com>

	PR preprocessor/57620
	* lex.c (lex_raw_string): Undo phase1 and phase2 transformations
	between R" and final " rather than only in between R"del( and )del".

	* c-c++-common/raw-string-2.c (s12, u12, U12, L12): Remove.
	(main): Don't test {s,u,U,L}12.
	* c-c++-common/raw-string-13.c: New test.
	* c-c++-common/raw-string-14.c: New test.
	* c-c++-common/raw-string-15.c: New test.
	* c-c++-common/raw-string-16.c: New test.


	Jakub

Patch

--- libcpp/lex.c.jj	2013-07-10 18:50:45.229759934 +0200
+++ libcpp/lex.c	2013-07-11 01:20:41.864103963 +0200
@@ -1373,11 +1373,16 @@  static void
 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
 		const uchar *cur)
 {
-  const uchar *raw_prefix;
-  unsigned int raw_prefix_len = 0;
+  uchar raw_prefix[17];
+  const uchar *orig_base;
+  unsigned int raw_prefix_len = 0, raw_suffix_len = 0;
+  enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX };
+  raw_str_phase phase = RAW_STR_PREFIX;
   enum cpp_ttype type;
   size_t total_len = 0;
   _cpp_buff *first_buff = NULL, *last_buff = NULL;
+  _cpp_buff *last_seen_buff = NULL;
+  size_t last_seen_len = 0, suffix_start = 0, raw_prefix_start;
   _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
 
   type = (*base == 'L' ? CPP_WSTRING :
@@ -1385,57 +1390,6 @@  lex_raw_string (cpp_reader *pfile, cpp_t
 	  *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
 	  : CPP_STRING);
 
-  raw_prefix = cur + 1;
-  while (raw_prefix_len < 16)
-    {
-      switch (raw_prefix[raw_prefix_len])
-	{
-	case ' ': case '(': case ')': case '\\': case '\t':
-	case '\v': case '\f': case '\n': default:
-	  break;
-	/* Basic source charset except the above chars.  */
-	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
-	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
-	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
-	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
-	case 'y': case 'z':
-	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
-	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
-	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
-	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
-	case 'Y': case 'Z':
-	case '0': case '1': case '2': case '3': case '4': case '5':
-	case '6': case '7': case '8': case '9':
-	case '_': case '{': case '}': case '#': case '[': case ']':
-	case '<': case '>': case '%': case ':': case ';': case '.':
-	case '?': case '*': case '+': case '-': case '/': case '^':
-	case '&': case '|': case '~': case '!': case '=': case ',':
-	case '"': case '\'':
-	  raw_prefix_len++;
-	  continue;
-	}
-      break;
-    }
-
-  if (raw_prefix[raw_prefix_len] != '(')
-    {
-      int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
-		+ 1;
-      if (raw_prefix_len == 16)
-	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
-			     "raw string delimiter longer than 16 characters");
-      else
-	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
-			     "invalid character '%c' in raw string delimiter",
-			     (int) raw_prefix[raw_prefix_len]);
-      pfile->buffer->cur = raw_prefix - 1;
-      create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
-      return;
-    }
-
-  cur = raw_prefix + raw_prefix_len + 1;
-  for (;;)
-    {
 #define BUF_APPEND(STR,LEN)					\
       do {							\
 	bufring_append (pfile, (const uchar *)(STR), (LEN),	\
@@ -1443,10 +1397,16 @@  lex_raw_string (cpp_reader *pfile, cpp_t
 	total_len += (LEN);					\
       } while (0);
 
+  orig_base = base;
+  ++cur;
+  raw_prefix_start = cur - base;
+  for (;;)
+    {
       cppchar_t c;
 
       /* If we previously performed any trigraph or line splicing
-	 transformations, undo them within the body of the raw string.  */
+	 transformations, undo them in between the opening and closing
+	 double quote.  */
       while (note->pos < cur)
 	++note;
       for (; note->pos == cur; ++note)
@@ -1506,23 +1466,13 @@  lex_raw_string (cpp_reader *pfile, cpp_t
 		      ++note;
 		      goto after_backslash;
 		    }
-		  /* The ) from ??) could be part of the suffix.  */
-		  else if (type == ')'
-			   && strncmp ((const char *) cur+1,
-				       (const char *) raw_prefix,
-				       raw_prefix_len) == 0
-			   && cur[raw_prefix_len+1] == '"')
-		    {
-		      BUF_APPEND (")", 1);
-		      base++;
-		      cur += raw_prefix_len + 2;
-		      goto break_outer_loop;
-		    }
 		  else
 		    {
 		      /* Skip the replacement character.  */
 		      base = ++cur;
 		      BUF_APPEND (&type, 1);
+		      c = type;
+		      goto check_c;
 		    }
 		}
 	      else
@@ -1532,16 +1482,160 @@  lex_raw_string (cpp_reader *pfile, cpp_t
 	}
       c = *cur++;
 
-      if (c == ')'
-	  && strncmp ((const char *) cur, (const char *) raw_prefix,
-		      raw_prefix_len) == 0
-	  && cur[raw_prefix_len] == '"')
+     check_c:
+      if (phase == RAW_STR_PREFIX)
+	{
+	  do
+	    {
+	      if (first_buff == NULL
+		  || raw_prefix_len + raw_prefix_start >= total_len)
+		{
+		  if (total_len + (cur - base)
+		      != raw_prefix_len + 1 + raw_prefix_start)
+		    break;
+		  raw_prefix[raw_prefix_len] = c;
+		}
+	      else
+		{
+		  if (last_seen_buff == NULL)
+		    last_seen_buff = first_buff;
+		  while ((size_t) (BUFF_FRONT (last_seen_buff)
+				   - last_seen_buff->base)
+			 == raw_prefix_len + raw_prefix_start - last_seen_len)
+		    {
+		      last_seen_buff = last_seen_buff->next;
+		      last_seen_len = raw_prefix_len + raw_prefix_start;
+		    }
+		  raw_prefix[raw_prefix_len]
+		    = last_seen_buff->base[raw_prefix_len + raw_prefix_start
+					   - last_seen_len];
+		}
+
+	      switch (raw_prefix[raw_prefix_len])
+		{
+		case ' ': case '(': case ')': case '\\': case '\t':
+		case '\v': case '\f': case '\n': default:
+		  break;
+		/* Basic source charset except the above chars.  */
+		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+		case 'y': case 'z':
+		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+		case 'Y': case 'Z':
+		case '0': case '1': case '2': case '3': case '4': case '5':
+		case '6': case '7': case '8': case '9':
+		case '_': case '{': case '}': case '#': case '[': case ']':
+		case '<': case '>': case '%': case ':': case ';': case '.':
+		case '?': case '*': case '+': case '-': case '/': case '^':
+		case '&': case '|': case '~': case '!': case '=': case ',':
+		case '"': case '\'':
+		  if (raw_prefix_len < 16)
+		    {
+		      raw_prefix_len++;
+		      continue;
+		    }
+		  break;
+		}
+
+	      if (raw_prefix[raw_prefix_len] != '(')
+		{
+		  int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1;
+		  if (raw_prefix_len == 16)
+		    cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
+					 col, "raw string delimiter longer "
+					      "than 16 characters");
+		  else if (raw_prefix[raw_prefix_len] == '\n')
+		    cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
+					 col, "invalid new-line in raw "
+					      "string delimiter");
+		  else
+		    cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
+					 col, "invalid character '%c' in "
+					      "raw string delimiter",
+					 (int) raw_prefix[raw_prefix_len]);
+		  pfile->buffer->cur = orig_base + raw_prefix_start - 1;
+		  create_literal (pfile, token, orig_base,
+				  raw_prefix_start - 1, CPP_OTHER);
+		  if (first_buff)
+		    _cpp_release_buff (pfile, first_buff);
+		  return;
+		}
+	      raw_prefix[raw_prefix_len] = '"';
+	      phase = RAW_STR;
+	      break;
+	    }
+	  while (1);
+	  continue;
+	}
+      else if (phase == RAW_STR_SUFFIX)
+	{
+	  while (raw_suffix_len <= raw_prefix_len
+		 || phase != RAW_STR_SUFFIX)
+	    {
+	      cppchar_t d;
+	      bool is_c = false;
+	      if (first_buff == NULL
+		  || suffix_start + raw_suffix_len >= total_len)
+		{
+		  if (total_len + (cur - base)
+		      != suffix_start + raw_suffix_len + 1)
+		    break;
+		  d = c;
+		  is_c = true;
+		}
+	      else
+		{
+		  if (last_seen_buff == NULL)
+		    last_seen_buff = first_buff;
+		  while ((size_t) (BUFF_FRONT (last_seen_buff)
+				   - last_seen_buff->base)
+			 == last_seen_len + raw_prefix_len)
+		    {
+		      last_seen_buff = last_seen_buff->next;
+		      last_seen_len += raw_prefix_len;
+		    }
+		  d = last_seen_buff->base[suffix_start + raw_suffix_len
+					   - last_seen_len];
+		}
+	      raw_suffix_len++;
+	      if (phase == RAW_STR_SUFFIX
+		  && d != raw_prefix[raw_suffix_len - 1])
+		{
+		  phase = RAW_STR;
+		  if (is_c && c == '\n')
+		    goto handle_newline;
+		}
+	      if (phase == RAW_STR && d == ')')
+		{
+		  phase = RAW_STR;
+		  suffix_start += raw_suffix_len;
+		  raw_suffix_len = 0;
+		}
+	    }
+	  if (phase == RAW_STR_SUFFIX && raw_suffix_len > raw_prefix_len)
+	    break;
+	}
+      else if (c == ')')
 	{
-	  cur += raw_prefix_len + 1;
-	  break;
+	  phase = RAW_STR_SUFFIX;
+	  last_seen_buff = last_buff;
+	  if (last_seen_buff)
+	    last_seen_len
+	      = total_len
+		- (BUFF_FRONT (last_seen_buff) - last_seen_buff->base);
+	  else
+	    last_seen_len = 0;
+	  suffix_start = total_len + (cur - base);
+	  raw_suffix_len = 0;
 	}
       else if (c == '\n')
 	{
+	 handle_newline:
 	  if (pfile->state.in_directive
 	      || (pfile->state.parsing_args
 		  && pfile->buffer->next_line >= pfile->buffer->rlimit))
@@ -1579,7 +1673,6 @@  lex_raw_string (cpp_reader *pfile, cpp_t
 	  note = &pfile->buffer->notes[pfile->buffer->cur_note];
 	}
     }
- break_outer_loop:
 
   if (CPP_OPTION (pfile, user_literals))
     {
--- gcc/testsuite/c-c++-common/raw-string-2.c.jj	2010-03-30 08:56:15.000000000 +0200
+++ gcc/testsuite/c-c++-common/raw-string-2.c	2013-06-17 11:07:41.050902280 +0200
@@ -32,8 +32,6 @@  const char s08[] = u8R"(a)" R"_{}#[]<>%:
 const char s09[] = u8R"/^&|~!=,"'(a)/^&|~!=,"'" u8"(b)";
 const char s10[] = u8"(a)" u8R"0123456789abcdef(b)0123456789abcdef";
 const char s11[] = u8R"ghijklmnopqrstuv(a)ghijklmnopqrstuv" u8R"w(b)w";
-const char s12[] = R"??=??(??<??>??)??'??!??-\
-(a)#[{}]^|~";
 
 const char16_t u03[] = R"-(a)-" u"(b)";
 const char16_t u04[] = "(a)" uR"MNOPQRSTUVWXYZ(b)MNOPQRSTUVWXYZ";
@@ -44,8 +42,6 @@  const char16_t u08[] = uR"(a)" R"_{}#[]<
 const char16_t u09[] = uR"/^&|~!=,"'(a)/^&|~!=,"'" u"(b)";
 const char16_t u10[] = u"(a)" uR"0123456789abcdef(b)0123456789abcdef";
 const char16_t u11[] = uR"ghijklmnopqrstuv(a)ghijklmnopqrstuv" uR"w(b)w";
-const char16_t u12[] = uR"??=??(??<??>??)??'??!??-\
-(a)#[{}]^|~";
 
 const char32_t U03[] = R"-(a)-" U"(b)";
 const char32_t U04[] = "(a)" UR"MNOPQRSTUVWXYZ(b)MNOPQRSTUVWXYZ";
@@ -56,8 +52,6 @@  const char32_t U08[] = UR"(a)" R"_{}#[]<
 const char32_t U09[] = UR"/^&|~!=,"'(a)/^&|~!=,"'" U"(b)";
 const char32_t U10[] = U"(a)" UR"0123456789abcdef(b)0123456789abcdef";
 const char32_t U11[] = UR"ghijklmnopqrstuv(a)ghijklmnopqrstuv" UR"w(b)w";
-const char32_t U12[] = UR"??=??(??<??>??)??'??!??-\
-(a)#[{}]^|~";
 
 const wchar_t L03[] = R"-(a)-" L"(b)";
 const wchar_t L04[] = "(a)" LR"MNOPQRSTUVWXYZ(b)MNOPQRSTUVWXYZ";
@@ -68,8 +62,6 @@  const wchar_t L08[] = LR"(a)" R"_{}#[]<>
 const wchar_t L09[] = LR"/^&|~!=,"'(a)/^&|~!=,"'" L"(b)";
 const wchar_t L10[] = L"(a)" LR"0123456789abcdef(b)0123456789abcdef";
 const wchar_t L11[] = LR"ghijklmnopqrstuv(a)ghijklmnopqrstuv" LR"w(b)w";
-const wchar_t L12[] = LR"??=??(??<??>??)??'??!??-\
-(a)#[{}]^|~";
 
 int
 main (void)
@@ -90,7 +82,6 @@  main (void)
   TEST (s09, "a(b)");
   TEST (s10, "(a)b");
   TEST (s11, "ab");
-  TEST (s12, "a");
   TEST (u03, u"a(b)");
   TEST (u04, u"(a)b");
   TEST (u05, u"ab");
@@ -100,7 +91,6 @@  main (void)
   TEST (u09, u"a(b)");
   TEST (u10, u"(a)b");
   TEST (u11, u"ab");
-  TEST (u12, u"a");
   TEST (U03, U"a(b)");
   TEST (U04, U"(a)b");
   TEST (U05, U"ab");
@@ -110,7 +100,6 @@  main (void)
   TEST (U09, U"a(b)");
   TEST (U10, U"(a)b");
   TEST (U11, U"ab");
-  TEST (U12, U"a");
   TEST (L03, L"a(b)");
   TEST (L04, L"(a)b");
   TEST (L05, L"ab");
@@ -120,6 +109,5 @@  main (void)
   TEST (L09, L"a(b)");
   TEST (L10, L"(a)b");
   TEST (L11, L"ab");
-  TEST (L12, L"a");
   return 0;
 }
--- gcc/testsuite/c-c++-common/raw-string-13.c.jj	2013-06-17 11:05:24.191769775 +0200
+++ gcc/testsuite/c-c++-common/raw-string-13.c	2013-06-17 15:03:58.661544262 +0200
@@ -0,0 +1,248 @@ 
+// PR preprocessor/57620
+// { dg-do run }
+// { dg-require-effective-target wchar }
+// { dg-options "-std=gnu99 -Wno-c++-compat -trigraphs" { target c } }
+// { dg-options "-std=c++11" { target c++ } }
+
+#ifndef __cplusplus
+#include <wchar.h>
+
+typedef __CHAR16_TYPE__	char16_t;
+typedef __CHAR32_TYPE__ char32_t;
+#endif
+
+#define R
+#define u
+#define uR
+#define U
+#define UR
+#define u8
+#define u8R
+#define L
+#define LR
+
+const char s00[] = R"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
+)??=??";
+const char s01[] = R"a(
+)\
+a"
+)a";
+const char s02[] = R"a(
+)a\
+"
+)a";
+const char s03[] = R"ab(
+)a\
+b"
+)ab";
+const char s04[] = R"a??/(x)a??/";
+const char s05[] = R"abcdefghijklmn??(abc)abcdefghijklmn??";
+const char s06[] = R"abcdefghijklm??/(abc)abcdefghijklm??/";
+const char s07[] = R"abc(??)\
+abc";)abc";
+const char s08[] = R"def(de)\
+def";)def";
+const char s09[] = R"a(??)\
+a"
+)a";
+const char s10[] = R"a(??)a\
+"
+)a";
+const char s11[] = R"ab(??)a\
+b"
+)ab";
+const char s12[] = R"a#(a#)a??=)a#";
+const char s13[] = R"a#(??)a??=??)a#";
+const char s14[] = R"??/(x)??/
+";)??/";
+const char s15[] = R"??/(??)??/
+";)??/";
+
+const char16_t u00[] = uR"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
+)??=??";
+const char16_t u01[] = uR"a(
+)\
+a"
+)a";
+const char16_t u02[] = uR"a(
+)a\
+"
+)a";
+const char16_t u03[] = uR"ab(
+)a\
+b"
+)ab";
+const char16_t u04[] = uR"a??/(x)a??/";
+const char16_t u05[] = uR"abcdefghijklmn??(abc)abcdefghijklmn??";
+const char16_t u06[] = uR"abcdefghijklm??/(abc)abcdefghijklm??/";
+const char16_t u07[] = uR"abc(??)\
+abc";)abc";
+const char16_t u08[] = uR"def(de)\
+def";)def";
+const char16_t u09[] = uR"a(??)\
+a"
+)a";
+const char16_t u10[] = uR"a(??)a\
+"
+)a";
+const char16_t u11[] = uR"ab(??)a\
+b"
+)ab";
+const char16_t u12[] = uR"a#(a#)a??=)a#";
+const char16_t u13[] = uR"a#(??)a??=??)a#";
+const char16_t u14[] = uR"??/(x)??/
+";)??/";
+const char16_t u15[] = uR"??/(??)??/
+";)??/";
+
+const char32_t U00[] = UR"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
+)??=??";
+const char32_t U01[] = UR"a(
+)\
+a"
+)a";
+const char32_t U02[] = UR"a(
+)a\
+"
+)a";
+const char32_t U03[] = UR"ab(
+)a\
+b"
+)ab";
+const char32_t U04[] = UR"a??/(x)a??/";
+const char32_t U05[] = UR"abcdefghijklmn??(abc)abcdefghijklmn??";
+const char32_t U06[] = UR"abcdefghijklm??/(abc)abcdefghijklm??/";
+const char32_t U07[] = UR"abc(??)\
+abc";)abc";
+const char32_t U08[] = UR"def(de)\
+def";)def";
+const char32_t U09[] = UR"a(??)\
+a"
+)a";
+const char32_t U10[] = UR"a(??)a\
+"
+)a";
+const char32_t U11[] = UR"ab(??)a\
+b"
+)ab";
+const char32_t U12[] = UR"a#(a#)a??=)a#";
+const char32_t U13[] = UR"a#(??)a??=??)a#";
+const char32_t U14[] = UR"??/(x)??/
+";)??/";
+const char32_t U15[] = UR"??/(??)??/
+";)??/";
+
+const wchar_t L00[] = LR"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
+)??=??";
+const wchar_t L01[] = LR"a(
+)\
+a"
+)a";
+const wchar_t L02[] = LR"a(
+)a\
+"
+)a";
+const wchar_t L03[] = LR"ab(
+)a\
+b"
+)ab";
+const wchar_t L04[] = LR"a??/(x)a??/";
+const wchar_t L05[] = LR"abcdefghijklmn??(abc)abcdefghijklmn??";
+const wchar_t L06[] = LR"abcdefghijklm??/(abc)abcdefghijklm??/";
+const wchar_t L07[] = LR"abc(??)\
+abc";)abc";
+const wchar_t L08[] = LR"def(de)\
+def";)def";
+const wchar_t L09[] = LR"a(??)\
+a"
+)a";
+const wchar_t L10[] = LR"a(??)a\
+"
+)a";
+const wchar_t L11[] = LR"ab(??)a\
+b"
+)ab";
+const wchar_t L12[] = LR"a#(a#)a??=)a#";
+const wchar_t L13[] = LR"a#(??)a??=??)a#";
+const wchar_t L14[] = LR"??/(x)??/
+";)??/";
+const wchar_t L15[] = LR"??/(??)??/
+";)??/";
+
+int
+main (void)
+{
+#define TEST(str, val) \
+  if (sizeof (str) != sizeof (val) \
+      || __builtin_memcmp (str, val, sizeof (str)) != 0) \
+    __builtin_abort ()
+  TEST (s00, "??""<??"">??"")??""'??""!??""-\\\n(a)#[{}]^|~\";\n");
+  TEST (s01, "\n)\\\na\"\n");
+  TEST (s02, "\n)a\\\n\"\n");
+  TEST (s03, "\n)a\\\nb\"\n");
+  TEST (s04, "x");
+  TEST (s05, "abc");
+  TEST (s06, "abc");
+  TEST (s07, "??"")\\\nabc\";");
+  TEST (s08, "de)\\\ndef\";");
+  TEST (s09, "??"")\\\na\"\n");
+  TEST (s10, "??"")a\\\n\"\n");
+  TEST (s11, "??"")a\\\nb\"\n");
+  TEST (s12, "a#)a??""=");
+  TEST (s13, "??"")a??""=??");
+  TEST (s14, "x)??""/\n\";");
+  TEST (s15, "??"")??""/\n\";");
+  TEST (u00, u"??""<??"">??"")??""'??""!??""-\\\n(a)#[{}]^|~\";\n");
+  TEST (u01, u"\n)\\\na\"\n");
+  TEST (u02, u"\n)a\\\n\"\n");
+  TEST (u03, u"\n)a\\\nb\"\n");
+  TEST (u04, u"x");
+  TEST (u05, u"abc");
+  TEST (u06, u"abc");
+  TEST (u07, u"??"")\\\nabc\";");
+  TEST (u08, u"de)\\\ndef\";");
+  TEST (u09, u"??"")\\\na\"\n");
+  TEST (u10, u"??"")a\\\n\"\n");
+  TEST (u11, u"??"")a\\\nb\"\n");
+  TEST (u12, u"a#)a??""=");
+  TEST (u13, u"??"")a??""=??");
+  TEST (u14, u"x)??""/\n\";");
+  TEST (u15, u"??"")??""/\n\";");
+  TEST (U00, U"??""<??"">??"")??""'??""!??""-\\\n(a)#[{}]^|~\";\n");
+  TEST (U01, U"\n)\\\na\"\n");
+  TEST (U02, U"\n)a\\\n\"\n");
+  TEST (U03, U"\n)a\\\nb\"\n");
+  TEST (U04, U"x");
+  TEST (U05, U"abc");
+  TEST (U06, U"abc");
+  TEST (U07, U"??"")\\\nabc\";");
+  TEST (U08, U"de)\\\ndef\";");
+  TEST (U09, U"??"")\\\na\"\n");
+  TEST (U10, U"??"")a\\\n\"\n");
+  TEST (U11, U"??"")a\\\nb\"\n");
+  TEST (U12, U"a#)a??""=");
+  TEST (U13, U"??"")a??""=??");
+  TEST (U14, U"x)??""/\n\";");
+  TEST (U15, U"??"")??""/\n\";");
+  TEST (L00, L"??""<??"">??"")??""'??""!??""-\\\n(a)#[{}]^|~\";\n");
+  TEST (L01, L"\n)\\\na\"\n");
+  TEST (L02, L"\n)a\\\n\"\n");
+  TEST (L03, L"\n)a\\\nb\"\n");
+  TEST (L04, L"x");
+  TEST (L05, L"abc");
+  TEST (L06, L"abc");
+  TEST (L07, L"??"")\\\nabc\";");
+  TEST (L08, L"de)\\\ndef\";");
+  TEST (L09, L"??"")\\\na\"\n");
+  TEST (L10, L"??"")a\\\n\"\n");
+  TEST (L11, L"??"")a\\\nb\"\n");
+  TEST (L12, L"a#)a??""=");
+  TEST (L13, L"??"")a??""=??");
+  TEST (L14, L"x)??""/\n\";");
+  TEST (L15, L"??"")??""/\n\";");
+  return 0;
+}
--- gcc/testsuite/c-c++-common/raw-string-14.c.jj	2013-06-17 13:30:33.494179714 +0200
+++ gcc/testsuite/c-c++-common/raw-string-14.c	2013-06-17 14:47:42.520443362 +0200
@@ -0,0 +1,39 @@ 
+// PR preprocessor/57620
+// { dg-do compile }
+// { dg-options "-std=gnu99 -trigraphs" { target c } }
+// { dg-options "-std=c++11" { target c++ } }
+
+const void *s0 = R"abc\
+def()abcdef";
+	// { dg-error "invalid character" "invalid" { target *-*-* } 6 }
+	// { dg-error "stray" "stray" { target *-*-* } 6 }
+const void *s1 = R"??/
+()??/";
+	// { dg-error "invalid new-line" "invalid" { target *-*-* } 10 }
+	// { dg-error "stray" "stray" { target *-*-* } 10 }
+	// { dg-warning "missing terminating" "missing" { target *-*-* } 10 }
+	// { dg-error "missing terminating" "missing" { target *-*-* } 10 }
+const void *s2 = R"abcdefghijklmn??/(a)abcdefghijklmn???";
+	// { dg-error "raw string delimiter longer" "longer" { target *-*-* } 16 }
+	// { dg-error "stray" "stray" { target *-*-* } 16 }
+	// { dg-error "expected" "expected" { target *-*-* } 16 }
+const void *s3 = R"abcdefghijklmno??/(a)abcdefghijklmno???";
+	// { dg-error "raw string delimiter longer" "longer" { target *-*-* } 20 }
+	// { dg-error "stray" "stray" { target *-*-* } 20 }
+const void *s4 = R"abcdefghijklmnop??=(a)abcdefghijklmnop??=";
+	// { dg-error "raw string delimiter longer" "longer" { target *-*-* } 23 }
+	// { dg-error "stray" "stray" { target *-*-* } 23 }
+const void *s5 = R"abc\
+()abcdef";
+	// { dg-error "invalid character" "invalid" { target *-*-* } 26 }
+	// { dg-error "stray" "stray" { target *-*-* } 26 }
+const void *s6 = R"\
+()";
+	// { dg-error "invalid character" "invalid" { target *-*-* } 30 }
+	// { dg-error "stray" "stray" { target *-*-* } 30 }
+const void *s7 = R"\
+a()a";
+	// { dg-error "invalid character" "invalid" { target *-*-* } 34 }
+	// { dg-error "stray" "stray" { target *-*-* } 34 }
+
+int main () {}
--- gcc/testsuite/c-c++-common/raw-string-15.c.jj	2013-06-17 15:02:18.399822472 +0200
+++ gcc/testsuite/c-c++-common/raw-string-15.c	2013-06-17 15:03:47.988213062 +0200
@@ -0,0 +1,248 @@ 
+// PR preprocessor/57620
+// { dg-do run }
+// { dg-require-effective-target wchar }
+// { dg-options "-std=gnu99 -Wno-c++-compat -Wtrigraphs" { target c } }
+// { dg-options "-std=gnu++11 -Wtrigraphs" { target c++ } }
+
+#ifndef __cplusplus
+#include <wchar.h>
+
+typedef __CHAR16_TYPE__	char16_t;
+typedef __CHAR32_TYPE__ char32_t;
+#endif
+
+#define R
+#define u
+#define uR
+#define U
+#define UR
+#define u8
+#define u8R
+#define L
+#define LR
+
+const char s00[] = R"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
+)??=??";
+const char s01[] = R"a(
+)\
+a"
+)a";
+const char s02[] = R"a(
+)a\
+"
+)a";
+const char s03[] = R"ab(
+)a\
+b"
+)ab";
+const char s04[] = R"a??/(x)a??/";
+const char s05[] = R"abcdefghijklmn??(abc)abcdefghijklmn??";
+const char s06[] = R"abcdefghijklm??/(abc)abcdefghijklm??/";
+const char s07[] = R"abc(??)\
+abc";)abc";
+const char s08[] = R"def(de)\
+def";)def";
+const char s09[] = R"a(??)\
+a"
+)a";
+const char s10[] = R"a(??)a\
+"
+)a";
+const char s11[] = R"ab(??)a\
+b"
+)ab";
+const char s12[] = R"a#(a#)a??=)a#";
+const char s13[] = R"a#(??)a??=??)a#";
+const char s14[] = R"??/(x)??/
+";)??/";
+const char s15[] = R"??/(??)??/
+";)??/";
+
+const char16_t u00[] = uR"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
+)??=??";
+const char16_t u01[] = uR"a(
+)\
+a"
+)a";
+const char16_t u02[] = uR"a(
+)a\
+"
+)a";
+const char16_t u03[] = uR"ab(
+)a\
+b"
+)ab";
+const char16_t u04[] = uR"a??/(x)a??/";
+const char16_t u05[] = uR"abcdefghijklmn??(abc)abcdefghijklmn??";
+const char16_t u06[] = uR"abcdefghijklm??/(abc)abcdefghijklm??/";
+const char16_t u07[] = uR"abc(??)\
+abc";)abc";
+const char16_t u08[] = uR"def(de)\
+def";)def";
+const char16_t u09[] = uR"a(??)\
+a"
+)a";
+const char16_t u10[] = uR"a(??)a\
+"
+)a";
+const char16_t u11[] = uR"ab(??)a\
+b"
+)ab";
+const char16_t u12[] = uR"a#(a#)a??=)a#";
+const char16_t u13[] = uR"a#(??)a??=??)a#";
+const char16_t u14[] = uR"??/(x)??/
+";)??/";
+const char16_t u15[] = uR"??/(??)??/
+";)??/";
+
+const char32_t U00[] = UR"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
+)??=??";
+const char32_t U01[] = UR"a(
+)\
+a"
+)a";
+const char32_t U02[] = UR"a(
+)a\
+"
+)a";
+const char32_t U03[] = UR"ab(
+)a\
+b"
+)ab";
+const char32_t U04[] = UR"a??/(x)a??/";
+const char32_t U05[] = UR"abcdefghijklmn??(abc)abcdefghijklmn??";
+const char32_t U06[] = UR"abcdefghijklm??/(abc)abcdefghijklm??/";
+const char32_t U07[] = UR"abc(??)\
+abc";)abc";
+const char32_t U08[] = UR"def(de)\
+def";)def";
+const char32_t U09[] = UR"a(??)\
+a"
+)a";
+const char32_t U10[] = UR"a(??)a\
+"
+)a";
+const char32_t U11[] = UR"ab(??)a\
+b"
+)ab";
+const char32_t U12[] = UR"a#(a#)a??=)a#";
+const char32_t U13[] = UR"a#(??)a??=??)a#";
+const char32_t U14[] = UR"??/(x)??/
+";)??/";
+const char32_t U15[] = UR"??/(??)??/
+";)??/";
+
+const wchar_t L00[] = LR"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
+)??=??";
+const wchar_t L01[] = LR"a(
+)\
+a"
+)a";
+const wchar_t L02[] = LR"a(
+)a\
+"
+)a";
+const wchar_t L03[] = LR"ab(
+)a\
+b"
+)ab";
+const wchar_t L04[] = LR"a??/(x)a??/";
+const wchar_t L05[] = LR"abcdefghijklmn??(abc)abcdefghijklmn??";
+const wchar_t L06[] = LR"abcdefghijklm??/(abc)abcdefghijklm??/";
+const wchar_t L07[] = LR"abc(??)\
+abc";)abc";
+const wchar_t L08[] = LR"def(de)\
+def";)def";
+const wchar_t L09[] = LR"a(??)\
+a"
+)a";
+const wchar_t L10[] = LR"a(??)a\
+"
+)a";
+const wchar_t L11[] = LR"ab(??)a\
+b"
+)ab";
+const wchar_t L12[] = LR"a#(a#)a??=)a#";
+const wchar_t L13[] = LR"a#(??)a??=??)a#";
+const wchar_t L14[] = LR"??/(x)??/
+";)??/";
+const wchar_t L15[] = LR"??/(??)??/
+";)??/";
+
+int
+main (void)
+{
+#define TEST(str, val) \
+  if (sizeof (str) != sizeof (val) \
+      || __builtin_memcmp (str, val, sizeof (str)) != 0) \
+    __builtin_abort ()
+  TEST (s00, "??""<??"">??"")??""'??""!??""-\\\n(a)#[{}]^|~\";\n");
+  TEST (s01, "\n)\\\na\"\n");
+  TEST (s02, "\n)a\\\n\"\n");
+  TEST (s03, "\n)a\\\nb\"\n");
+  TEST (s04, "x");
+  TEST (s05, "abc");
+  TEST (s06, "abc");
+  TEST (s07, "??"")\\\nabc\";");
+  TEST (s08, "de)\\\ndef\";");
+  TEST (s09, "??"")\\\na\"\n");
+  TEST (s10, "??"")a\\\n\"\n");
+  TEST (s11, "??"")a\\\nb\"\n");
+  TEST (s12, "a#)a??""=");
+  TEST (s13, "??"")a??""=??");
+  TEST (s14, "x)??""/\n\";");
+  TEST (s15, "??"")??""/\n\";");
+  TEST (u00, u"??""<??"">??"")??""'??""!??""-\\\n(a)#[{}]^|~\";\n");
+  TEST (u01, u"\n)\\\na\"\n");
+  TEST (u02, u"\n)a\\\n\"\n");
+  TEST (u03, u"\n)a\\\nb\"\n");
+  TEST (u04, u"x");
+  TEST (u05, u"abc");
+  TEST (u06, u"abc");
+  TEST (u07, u"??"")\\\nabc\";");
+  TEST (u08, u"de)\\\ndef\";");
+  TEST (u09, u"??"")\\\na\"\n");
+  TEST (u10, u"??"")a\\\n\"\n");
+  TEST (u11, u"??"")a\\\nb\"\n");
+  TEST (u12, u"a#)a??""=");
+  TEST (u13, u"??"")a??""=??");
+  TEST (u14, u"x)??""/\n\";");
+  TEST (u15, u"??"")??""/\n\";");
+  TEST (U00, U"??""<??"">??"")??""'??""!??""-\\\n(a)#[{}]^|~\";\n");
+  TEST (U01, U"\n)\\\na\"\n");
+  TEST (U02, U"\n)a\\\n\"\n");
+  TEST (U03, U"\n)a\\\nb\"\n");
+  TEST (U04, U"x");
+  TEST (U05, U"abc");
+  TEST (U06, U"abc");
+  TEST (U07, U"??"")\\\nabc\";");
+  TEST (U08, U"de)\\\ndef\";");
+  TEST (U09, U"??"")\\\na\"\n");
+  TEST (U10, U"??"")a\\\n\"\n");
+  TEST (U11, U"??"")a\\\nb\"\n");
+  TEST (U12, U"a#)a??""=");
+  TEST (U13, U"??"")a??""=??");
+  TEST (U14, U"x)??""/\n\";");
+  TEST (U15, U"??"")??""/\n\";");
+  TEST (L00, L"??""<??"">??"")??""'??""!??""-\\\n(a)#[{}]^|~\";\n");
+  TEST (L01, L"\n)\\\na\"\n");
+  TEST (L02, L"\n)a\\\n\"\n");
+  TEST (L03, L"\n)a\\\nb\"\n");
+  TEST (L04, L"x");
+  TEST (L05, L"abc");
+  TEST (L06, L"abc");
+  TEST (L07, L"??"")\\\nabc\";");
+  TEST (L08, L"de)\\\ndef\";");
+  TEST (L09, L"??"")\\\na\"\n");
+  TEST (L10, L"??"")a\\\n\"\n");
+  TEST (L11, L"??"")a\\\nb\"\n");
+  TEST (L12, L"a#)a??""=");
+  TEST (L13, L"??"")a??""=??");
+  TEST (L14, L"x)??""/\n\";");
+  TEST (L15, L"??"")??""/\n\";");
+  return 0;
+}
--- gcc/testsuite/c-c++-common/raw-string-16.c.jj	2013-06-17 15:03:16.110148070 +0200
+++ gcc/testsuite/c-c++-common/raw-string-16.c	2013-06-17 15:11:11.386376512 +0200
@@ -0,0 +1,33 @@ 
+// PR preprocessor/57620
+// { dg-do compile }
+// { dg-options "-std=gnu99 -Wtrigraphs" { target c } }
+// { dg-options "-std=gnu++11 -Wtrigraphs" { target c++ } }
+
+const void *s0 = R"abc\
+def()abcdef";
+	// { dg-error "invalid character" "invalid" { target *-*-* } 6 }
+	// { dg-error "stray" "stray" { target *-*-* } 6 }
+const void *s1 = R"abcdefghijklmn??/(a)abcdefghijklmn???";
+	// { dg-error "raw string delimiter longer" "longer" { target *-*-* } 10 }
+	// { dg-error "stray" "stray" { target *-*-* } 10 }
+
+const void *s2 = R"abcdefghijklmno??/(a)abcdefghijklmno???";
+	// { dg-error "raw string delimiter longer" "longer" { target *-*-* } 14 }
+	// { dg-error "stray" "stray" { target *-*-* } 14 }
+const void *s3 = R"abcdefghijklmnop??=(a)abcdefghijklmnop??=?"; // { dg-warning "trigraph ..= ignored" }
+	// { dg-error "raw string delimiter longer" "longer" { target *-*-* } 17 }
+	// { dg-error "stray" "stray" { target *-*-* } 17 }
+const void *s4 = R"abc\
+()abcdef";
+	// { dg-error "invalid character" "invalid" { target *-*-* } 20 }
+	// { dg-error "stray" "stray" { target *-*-* } 20 }
+const void *s5 = R"\
+()";
+	// { dg-error "invalid character" "invalid" { target *-*-* } 24 }
+	// { dg-error "stray" "stray" { target *-*-* } 24 }
+const void *s6 = R"\
+a()a";
+	// { dg-error "invalid character" "invalid" { target *-*-* } 28 }
+	// { dg-error "stray" "stray" { target *-*-* } 28 }
+
+int main () {}