diff mbox

Fix raw-string handling (PR preprocessor/57620)

Message ID 20130617162846.GB2336@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek June 17, 2013, 4:28 p.m. UTC
Hi!

lex_raw_string right now only undoes phase {1,2} transformations in between
R"delim( and )delim", while it should undo them everywhere between R" and
the final ".  The following patch implements that, and adds testsuite
coverage for that.  Bootstrapped/regtested on x86_64-linux and i686-linux,
ok for trunk?

BTW, any thoughts on how to deal with the GNU backslash whitespace newline
extension that we need to undo, but lose track on what exact whitespace has
been seen?  Perhaps we could encode in extra line notes that there were 123
spaces, 1 tab, 2 spaces in between \ and new-line, or just add a note with a
pointer to an allocated buffer with the whitespace?
Dunno how often \ whitespace \n occurs in real-world code.

2013-06-17  Jakub Jelinek  <jakub@redhat.com>

	PR preprocessor/57620
	* lex.c (lex_raw_string): Undo phase1 and phase2 transformations
	between R" and final " rather than only in between R"del( and )del".

	* c-c++-common/raw-string-2.c (s12, u12, U12, L12): Remove.
	(main): Don't test {s,u,U,L}12.
	* c-c++-common/raw-string-13.c: New test.
	* c-c++-common/raw-string-14.c: New test.
	* c-c++-common/raw-string-15.c: New test.
	* c-c++-common/raw-string-16.c: New test.


	Jakub

Comments

Jakub Jelinek June 27, 2013, 8:08 a.m. UTC | #1
On Mon, Jun 17, 2013 at 06:28:46PM +0200, Jakub Jelinek wrote:
> lex_raw_string right now only undoes phase {1,2} transformations in between
> R"delim( and )delim", while it should undo them everywhere between R" and
> the final ".  The following patch implements that, and adds testsuite
> coverage for that.  Bootstrapped/regtested on x86_64-linux and i686-linux,
> ok for trunk?

Ping.

> 2013-06-17  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR preprocessor/57620
> 	* lex.c (lex_raw_string): Undo phase1 and phase2 transformations
> 	between R" and final " rather than only in between R"del( and )del".
> 
> 	* c-c++-common/raw-string-2.c (s12, u12, U12, L12): Remove.
> 	(main): Don't test {s,u,U,L}12.
> 	* c-c++-common/raw-string-13.c: New test.
> 	* c-c++-common/raw-string-14.c: New test.
> 	* c-c++-common/raw-string-15.c: New test.
> 	* c-c++-common/raw-string-16.c: New test.

	Jakub
Jason Merrill July 10, 2013, 6:22 a.m. UTC | #2
It seems undesirable to go from one to four separate copies of the 
note-handling code.  Could we instead handle the different states of 
prefix, body and suffix parsing in local variables and just have one 
loop over the characters/notes in the input?

Jason
diff mbox

Patch

--- libcpp/lex.c.jj	2013-04-25 23:47:59.000000000 +0200
+++ libcpp/lex.c	2013-06-17 15:22:40.343049765 +0200
@@ -1346,7 +1346,8 @@  static void
 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
 		const uchar *cur)
 {
-  const uchar *raw_prefix;
+  uchar raw_prefix[17];
+  const uchar *raw_prefix_start, *orig_base;
   unsigned int raw_prefix_len = 0;
   enum cpp_ttype type;
   size_t total_len = 0;
@@ -1358,9 +1359,76 @@  lex_raw_string (cpp_reader *pfile, cpp_t
 	  *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
 	  : CPP_STRING);
 
-  raw_prefix = cur + 1;
-  while (raw_prefix_len < 16)
+#define BUF_APPEND(STR,LEN)					\
+      do {							\
+	bufring_append (pfile, (const uchar *)(STR), (LEN),	\
+			&first_buff, &last_buff);		\
+	total_len += (LEN);					\
+      } while (0);
+
+  orig_base = base;
+  raw_prefix_start = cur;
+  cur++;
+  while (raw_prefix_len <= 16)
     {
+      /* If we previously performed any trigraph or line splicing
+	 transformations, undo them anywhere after R" and the final "
+	 of the raw string.  */
+      while (note->pos < cur)
+	++note;
+
+      raw_prefix[raw_prefix_len] = *cur;
+      if (note->pos == cur)
+	switch (note->type)
+	  {
+	  case '\\':
+	  case ' ':
+	    /* As neither \\ nor \n are valid d-char characters,
+	       '\\' or ' ' notes always result in invalid raw string.  */
+	    if (raw_prefix_len <= 16)
+	      raw_prefix[raw_prefix_len] = '\\';
+	    break;
+	  case 0:
+	    break;
+	  default:
+	    if (_cpp_trigraph_map[note->type])
+	      {
+		uchar type = note->type;
+		note->type = 0;
+
+		if (!CPP_OPTION (pfile, trigraphs))
+		  break;
+		if (raw_prefix_len <= 16)
+		  raw_prefix[raw_prefix_len] = '?';
+		if (raw_prefix_len < 16)
+		  raw_prefix[++raw_prefix_len] = '?';
+		if (raw_prefix_len < 16)
+		  raw_prefix[++raw_prefix_len] = type;
+		/* ??/ followed by new-line means invalid raw-string,
+		   while ??/ is fine, the new-line is not.  */
+		if (type == '/' && note[1].pos == cur)
+		  {
+		    if (note[1].type != '\\' && note[1].type != ' ')
+		      abort ();
+		    ++note;
+		    if (raw_prefix_len < 16)
+		      raw_prefix[++raw_prefix_len] = '\n';
+		  }
+		else
+		  {
+		    BUF_APPEND (base, cur - base);
+		    base = cur + 1;
+		    BUF_APPEND ("??", 2);
+		    BUF_APPEND (&type, 1);
+		  }
+	      }
+	    else
+	      abort ();
+	  }
+
+      if (raw_prefix_len == 16)
+	break;
+
       switch (raw_prefix[raw_prefix_len])
 	{
 	case ' ': case '(': case ')': case '\\': case '\t':
@@ -1385,6 +1453,7 @@  lex_raw_string (cpp_reader *pfile, cpp_t
 	case '&': case '|': case '~': case '!': case '=': case ',':
 	case '"': case '\'':
 	  raw_prefix_len++;
+	  cur++;
 	  continue;
 	}
       break;
@@ -1392,30 +1461,29 @@  lex_raw_string (cpp_reader *pfile, cpp_t
 
   if (raw_prefix[raw_prefix_len] != '(')
     {
-      int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
-		+ 1;
+      int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1;
       if (raw_prefix_len == 16)
 	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
 			     "raw string delimiter longer than 16 characters");
+      else if (raw_prefix[raw_prefix_len] == '\n')
+	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
+			     "invalid new-line in raw string delimiter");
       else
 	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
 			     "invalid character '%c' in raw string delimiter",
 			     (int) raw_prefix[raw_prefix_len]);
-      pfile->buffer->cur = raw_prefix - 1;
-      create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
+      pfile->buffer->cur = raw_prefix_start;
+      create_literal (pfile, token, orig_base, raw_prefix_start - orig_base,
+		      CPP_OTHER);
+      if (first_buff)
+	_cpp_release_buff (pfile, first_buff);
       return;
     }
+  raw_prefix[raw_prefix_len] = '"';
 
-  cur = raw_prefix + raw_prefix_len + 1;
+  cur++;
   for (;;)
     {
-#define BUF_APPEND(STR,LEN)					\
-      do {							\
-	bufring_append (pfile, (const uchar *)(STR), (LEN),	\
-			&first_buff, &last_buff);		\
-	total_len += (LEN);					\
-      } while (0);
-
       cppchar_t c;
 
       /* If we previously performed any trigraph or line splicing
@@ -1479,23 +1547,14 @@  lex_raw_string (cpp_reader *pfile, cpp_t
 		      ++note;
 		      goto after_backslash;
 		    }
-		  /* The ) from ??) could be part of the suffix.  */
-		  else if (type == ')'
-			   && strncmp ((const char *) cur+1,
-				       (const char *) raw_prefix,
-				       raw_prefix_len) == 0
-			   && cur[raw_prefix_len+1] == '"')
-		    {
-		      BUF_APPEND (")", 1);
-		      base++;
-		      cur += raw_prefix_len + 2;
-		      goto break_outer_loop;
-		    }
 		  else
 		    {
 		      /* Skip the replacement character.  */
 		      base = ++cur;
 		      BUF_APPEND (&type, 1);
+		      /* The ) from ??) could be part of the suffix.  */
+		      if (type == ')')
+			goto check_close_paren;
 		    }
 		}
 	      else
@@ -1505,13 +1564,103 @@  lex_raw_string (cpp_reader *pfile, cpp_t
 	}
       c = *cur++;
 
-      if (c == ')'
-	  && strncmp ((const char *) cur, (const char *) raw_prefix,
-		      raw_prefix_len) == 0
-	  && cur[raw_prefix_len] == '"')
+      if (c == ')')
 	{
-	  cur += raw_prefix_len + 1;
-	  break;
+	 check_close_paren:
+	  _cpp_line_note *tnote = note;
+	  unsigned int idx = 0;
+	  const uchar *tcur = cur;
+	  /* This might be the end of the raw-string, but doesn't have to.
+	     Do the checking in two steps, first verify if it is the end
+	     of the raw string, if no, then just continue normal processing
+	     as if ) was any other character, otherwise transform the
+	     trigraph notes into type 0 to avoid -Wtrigraphs warnings and
+	     append to buffer trigraph sequences.  */
+	  do
+	    {
+	      while (tnote->pos < tcur)
+		++tnote;
+	      for (; tcur && tnote->pos == tcur; ++tnote)
+		switch (tnote->type)
+		  {
+		  case '\\':
+		  case ' ':
+		    /* As neither \\ nor \n are valid d-char characters,
+		       '\\' or ' ' notes always result in invalid raw
+		       string.  */
+		    tcur = NULL;
+		    break;
+		  case 0:
+		    break;
+		  default:
+		    if (_cpp_trigraph_map[tnote->type])
+		      {
+			if (!CPP_OPTION (pfile, trigraphs))
+			  break;
+			if (raw_prefix[idx] != '?'
+			    || raw_prefix[idx + 1] != '?'
+			    || raw_prefix[idx + 2] != tnote->type)
+			  {
+			    tcur = NULL;
+			    break;
+			  }
+			idx += 3;
+			if (tnote->type == '/' && tnote[1].pos == tcur)
+			  {
+			    if (tnote[1].type != '\\' && tnote[1].type != ' ')
+			      abort ();
+			    tcur = NULL;
+			    break;
+			  }
+			tcur++;
+		      }
+		    else
+		      abort ();
+		    break;
+		  }
+	      if (tcur == NULL)
+		break;
+	      if (*tcur != raw_prefix[idx++])
+		{
+		  tcur = NULL;
+		  break;
+		}
+	      if (idx == raw_prefix_len + 1)
+		break;
+	      tcur++;
+	    }
+	  while (1);
+	  /* If tcur is non-NULL at this point, this is really the end
+	     of the raw string.  Perform second step.  */
+	  if (tcur != NULL)
+	    {
+	      for (; cur != tcur + 1; cur++)
+		{
+		  while (note->pos < cur)
+		    ++note;
+		  for (; cur && note->pos == cur; ++note)
+		    switch (note->type)
+		      {
+		      case '\\':
+		      case ' ':
+			abort ();
+		      case 0:
+			break;
+		      default:
+			if (CPP_OPTION (pfile, trigraphs))
+			  {
+			    uchar type = note->type;
+			    BUF_APPEND (base, cur - base);
+			    base = cur + 1;
+			    BUF_APPEND ("??", 2);
+			    BUF_APPEND (&type, 1);
+			  }
+			note->type = 0;
+			break;
+		      }
+		}
+	      break;
+	    }
 	}
       else if (c == '\n')
 	{
@@ -1552,7 +1701,6 @@  lex_raw_string (cpp_reader *pfile, cpp_t
 	  note = &pfile->buffer->notes[pfile->buffer->cur_note];
 	}
     }
- break_outer_loop:
 
   if (CPP_OPTION (pfile, user_literals))
     {
--- gcc/testsuite/c-c++-common/raw-string-2.c.jj	2010-03-30 08:56:15.000000000 +0200
+++ gcc/testsuite/c-c++-common/raw-string-2.c	2013-06-17 11:07:41.050902280 +0200
@@ -32,8 +32,6 @@  const char s08[] = u8R"(a)" R"_{}#[]<>%:
 const char s09[] = u8R"/^&|~!=,"'(a)/^&|~!=,"'" u8"(b)";
 const char s10[] = u8"(a)" u8R"0123456789abcdef(b)0123456789abcdef";
 const char s11[] = u8R"ghijklmnopqrstuv(a)ghijklmnopqrstuv" u8R"w(b)w";
-const char s12[] = R"??=??(??<??>??)??'??!??-\
-(a)#[{}]^|~";
 
 const char16_t u03[] = R"-(a)-" u"(b)";
 const char16_t u04[] = "(a)" uR"MNOPQRSTUVWXYZ(b)MNOPQRSTUVWXYZ";
@@ -44,8 +42,6 @@  const char16_t u08[] = uR"(a)" R"_{}#[]<
 const char16_t u09[] = uR"/^&|~!=,"'(a)/^&|~!=,"'" u"(b)";
 const char16_t u10[] = u"(a)" uR"0123456789abcdef(b)0123456789abcdef";
 const char16_t u11[] = uR"ghijklmnopqrstuv(a)ghijklmnopqrstuv" uR"w(b)w";
-const char16_t u12[] = uR"??=??(??<??>??)??'??!??-\
-(a)#[{}]^|~";
 
 const char32_t U03[] = R"-(a)-" U"(b)";
 const char32_t U04[] = "(a)" UR"MNOPQRSTUVWXYZ(b)MNOPQRSTUVWXYZ";
@@ -56,8 +52,6 @@  const char32_t U08[] = UR"(a)" R"_{}#[]<
 const char32_t U09[] = UR"/^&|~!=,"'(a)/^&|~!=,"'" U"(b)";
 const char32_t U10[] = U"(a)" UR"0123456789abcdef(b)0123456789abcdef";
 const char32_t U11[] = UR"ghijklmnopqrstuv(a)ghijklmnopqrstuv" UR"w(b)w";
-const char32_t U12[] = UR"??=??(??<??>??)??'??!??-\
-(a)#[{}]^|~";
 
 const wchar_t L03[] = R"-(a)-" L"(b)";
 const wchar_t L04[] = "(a)" LR"MNOPQRSTUVWXYZ(b)MNOPQRSTUVWXYZ";
@@ -68,8 +62,6 @@  const wchar_t L08[] = LR"(a)" R"_{}#[]<>
 const wchar_t L09[] = LR"/^&|~!=,"'(a)/^&|~!=,"'" L"(b)";
 const wchar_t L10[] = L"(a)" LR"0123456789abcdef(b)0123456789abcdef";
 const wchar_t L11[] = LR"ghijklmnopqrstuv(a)ghijklmnopqrstuv" LR"w(b)w";
-const wchar_t L12[] = LR"??=??(??<??>??)??'??!??-\
-(a)#[{}]^|~";
 
 int
 main (void)
@@ -90,7 +82,6 @@  main (void)
   TEST (s09, "a(b)");
   TEST (s10, "(a)b");
   TEST (s11, "ab");
-  TEST (s12, "a");
   TEST (u03, u"a(b)");
   TEST (u04, u"(a)b");
   TEST (u05, u"ab");
@@ -100,7 +91,6 @@  main (void)
   TEST (u09, u"a(b)");
   TEST (u10, u"(a)b");
   TEST (u11, u"ab");
-  TEST (u12, u"a");
   TEST (U03, U"a(b)");
   TEST (U04, U"(a)b");
   TEST (U05, U"ab");
@@ -110,7 +100,6 @@  main (void)
   TEST (U09, U"a(b)");
   TEST (U10, U"(a)b");
   TEST (U11, U"ab");
-  TEST (U12, U"a");
   TEST (L03, L"a(b)");
   TEST (L04, L"(a)b");
   TEST (L05, L"ab");
@@ -120,6 +109,5 @@  main (void)
   TEST (L09, L"a(b)");
   TEST (L10, L"(a)b");
   TEST (L11, L"ab");
-  TEST (L12, L"a");
   return 0;
 }
--- gcc/testsuite/c-c++-common/raw-string-13.c.jj	2013-06-17 11:05:24.191769775 +0200
+++ gcc/testsuite/c-c++-common/raw-string-13.c	2013-06-17 15:03:58.661544262 +0200
@@ -0,0 +1,248 @@ 
+// PR preprocessor/57620
+// { dg-do run }
+// { dg-require-effective-target wchar }
+// { dg-options "-std=gnu99 -Wno-c++-compat -trigraphs" { target c } }
+// { dg-options "-std=c++11" { target c++ } }
+
+#ifndef __cplusplus
+#include <wchar.h>
+
+typedef __CHAR16_TYPE__	char16_t;
+typedef __CHAR32_TYPE__ char32_t;
+#endif
+
+#define R
+#define u
+#define uR
+#define U
+#define UR
+#define u8
+#define u8R
+#define L
+#define LR
+
+const char s00[] = R"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
+)??=??";
+const char s01[] = R"a(
+)\
+a"
+)a";
+const char s02[] = R"a(
+)a\
+"
+)a";
+const char s03[] = R"ab(
+)a\
+b"
+)ab";
+const char s04[] = R"a??/(x)a??/";
+const char s05[] = R"abcdefghijklmn??(abc)abcdefghijklmn??";
+const char s06[] = R"abcdefghijklm??/(abc)abcdefghijklm??/";
+const char s07[] = R"abc(??)\
+abc";)abc";
+const char s08[] = R"def(de)\
+def";)def";
+const char s09[] = R"a(??)\
+a"
+)a";
+const char s10[] = R"a(??)a\
+"
+)a";
+const char s11[] = R"ab(??)a\
+b"
+)ab";
+const char s12[] = R"a#(a#)a??=)a#";
+const char s13[] = R"a#(??)a??=??)a#";
+const char s14[] = R"??/(x)??/
+";)??/";
+const char s15[] = R"??/(??)??/
+";)??/";
+
+const char16_t u00[] = uR"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
+)??=??";
+const char16_t u01[] = uR"a(
+)\
+a"
+)a";
+const char16_t u02[] = uR"a(
+)a\
+"
+)a";
+const char16_t u03[] = uR"ab(
+)a\
+b"
+)ab";
+const char16_t u04[] = uR"a??/(x)a??/";
+const char16_t u05[] = uR"abcdefghijklmn??(abc)abcdefghijklmn??";
+const char16_t u06[] = uR"abcdefghijklm??/(abc)abcdefghijklm??/";
+const char16_t u07[] = uR"abc(??)\
+abc";)abc";
+const char16_t u08[] = uR"def(de)\
+def";)def";
+const char16_t u09[] = uR"a(??)\
+a"
+)a";
+const char16_t u10[] = uR"a(??)a\
+"
+)a";
+const char16_t u11[] = uR"ab(??)a\
+b"
+)ab";
+const char16_t u12[] = uR"a#(a#)a??=)a#";
+const char16_t u13[] = uR"a#(??)a??=??)a#";
+const char16_t u14[] = uR"??/(x)??/
+";)??/";
+const char16_t u15[] = uR"??/(??)??/
+";)??/";
+
+const char32_t U00[] = UR"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
+)??=??";
+const char32_t U01[] = UR"a(
+)\
+a"
+)a";
+const char32_t U02[] = UR"a(
+)a\
+"
+)a";
+const char32_t U03[] = UR"ab(
+)a\
+b"
+)ab";
+const char32_t U04[] = UR"a??/(x)a??/";
+const char32_t U05[] = UR"abcdefghijklmn??(abc)abcdefghijklmn??";
+const char32_t U06[] = UR"abcdefghijklm??/(abc)abcdefghijklm??/";
+const char32_t U07[] = UR"abc(??)\
+abc";)abc";
+const char32_t U08[] = UR"def(de)\
+def";)def";
+const char32_t U09[] = UR"a(??)\
+a"
+)a";
+const char32_t U10[] = UR"a(??)a\
+"
+)a";
+const char32_t U11[] = UR"ab(??)a\
+b"
+)ab";
+const char32_t U12[] = UR"a#(a#)a??=)a#";
+const char32_t U13[] = UR"a#(??)a??=??)a#";
+const char32_t U14[] = UR"??/(x)??/
+";)??/";
+const char32_t U15[] = UR"??/(??)??/
+";)??/";
+
+const wchar_t L00[] = LR"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
+)??=??";
+const wchar_t L01[] = LR"a(
+)\
+a"
+)a";
+const wchar_t L02[] = LR"a(
+)a\
+"
+)a";
+const wchar_t L03[] = LR"ab(
+)a\
+b"
+)ab";
+const wchar_t L04[] = LR"a??/(x)a??/";
+const wchar_t L05[] = LR"abcdefghijklmn??(abc)abcdefghijklmn??";
+const wchar_t L06[] = LR"abcdefghijklm??/(abc)abcdefghijklm??/";
+const wchar_t L07[] = LR"abc(??)\
+abc";)abc";
+const wchar_t L08[] = LR"def(de)\
+def";)def";
+const wchar_t L09[] = LR"a(??)\
+a"
+)a";
+const wchar_t L10[] = LR"a(??)a\
+"
+)a";
+const wchar_t L11[] = LR"ab(??)a\
+b"
+)ab";
+const wchar_t L12[] = LR"a#(a#)a??=)a#";
+const wchar_t L13[] = LR"a#(??)a??=??)a#";
+const wchar_t L14[] = LR"??/(x)??/
+";)??/";
+const wchar_t L15[] = LR"??/(??)??/
+";)??/";
+
+int
+main (void)
+{
+#define TEST(str, val) \
+  if (sizeof (str) != sizeof (val) \
+      || __builtin_memcmp (str, val, sizeof (str)) != 0) \
+    __builtin_abort ()
+  TEST (s00, "??""<??"">??"")??""'??""!??""-\\\n(a)#[{}]^|~\";\n");
+  TEST (s01, "\n)\\\na\"\n");
+  TEST (s02, "\n)a\\\n\"\n");
+  TEST (s03, "\n)a\\\nb\"\n");
+  TEST (s04, "x");
+  TEST (s05, "abc");
+  TEST (s06, "abc");
+  TEST (s07, "??"")\\\nabc\";");
+  TEST (s08, "de)\\\ndef\";");
+  TEST (s09, "??"")\\\na\"\n");
+  TEST (s10, "??"")a\\\n\"\n");
+  TEST (s11, "??"")a\\\nb\"\n");
+  TEST (s12, "a#)a??""=");
+  TEST (s13, "??"")a??""=??");
+  TEST (s14, "x)??""/\n\";");
+  TEST (s15, "??"")??""/\n\";");
+  TEST (u00, u"??""<??"">??"")??""'??""!??""-\\\n(a)#[{}]^|~\";\n");
+  TEST (u01, u"\n)\\\na\"\n");
+  TEST (u02, u"\n)a\\\n\"\n");
+  TEST (u03, u"\n)a\\\nb\"\n");
+  TEST (u04, u"x");
+  TEST (u05, u"abc");
+  TEST (u06, u"abc");
+  TEST (u07, u"??"")\\\nabc\";");
+  TEST (u08, u"de)\\\ndef\";");
+  TEST (u09, u"??"")\\\na\"\n");
+  TEST (u10, u"??"")a\\\n\"\n");
+  TEST (u11, u"??"")a\\\nb\"\n");
+  TEST (u12, u"a#)a??""=");
+  TEST (u13, u"??"")a??""=??");
+  TEST (u14, u"x)??""/\n\";");
+  TEST (u15, u"??"")??""/\n\";");
+  TEST (U00, U"??""<??"">??"")??""'??""!??""-\\\n(a)#[{}]^|~\";\n");
+  TEST (U01, U"\n)\\\na\"\n");
+  TEST (U02, U"\n)a\\\n\"\n");
+  TEST (U03, U"\n)a\\\nb\"\n");
+  TEST (U04, U"x");
+  TEST (U05, U"abc");
+  TEST (U06, U"abc");
+  TEST (U07, U"??"")\\\nabc\";");
+  TEST (U08, U"de)\\\ndef\";");
+  TEST (U09, U"??"")\\\na\"\n");
+  TEST (U10, U"??"")a\\\n\"\n");
+  TEST (U11, U"??"")a\\\nb\"\n");
+  TEST (U12, U"a#)a??""=");
+  TEST (U13, U"??"")a??""=??");
+  TEST (U14, U"x)??""/\n\";");
+  TEST (U15, U"??"")??""/\n\";");
+  TEST (L00, L"??""<??"">??"")??""'??""!??""-\\\n(a)#[{}]^|~\";\n");
+  TEST (L01, L"\n)\\\na\"\n");
+  TEST (L02, L"\n)a\\\n\"\n");
+  TEST (L03, L"\n)a\\\nb\"\n");
+  TEST (L04, L"x");
+  TEST (L05, L"abc");
+  TEST (L06, L"abc");
+  TEST (L07, L"??"")\\\nabc\";");
+  TEST (L08, L"de)\\\ndef\";");
+  TEST (L09, L"??"")\\\na\"\n");
+  TEST (L10, L"??"")a\\\n\"\n");
+  TEST (L11, L"??"")a\\\nb\"\n");
+  TEST (L12, L"a#)a??""=");
+  TEST (L13, L"??"")a??""=??");
+  TEST (L14, L"x)??""/\n\";");
+  TEST (L15, L"??"")??""/\n\";");
+  return 0;
+}
--- gcc/testsuite/c-c++-common/raw-string-14.c.jj	2013-06-17 13:30:33.494179714 +0200
+++ gcc/testsuite/c-c++-common/raw-string-14.c	2013-06-17 14:47:42.520443362 +0200
@@ -0,0 +1,39 @@ 
+// PR preprocessor/57620
+// { dg-do compile }
+// { dg-options "-std=gnu99 -trigraphs" { target c } }
+// { dg-options "-std=c++11" { target c++ } }
+
+const void *s0 = R"abc\
+def()abcdef";
+	// { dg-error "invalid character" "invalid" { target *-*-* } 6 }
+	// { dg-error "stray" "stray" { target *-*-* } 6 }
+const void *s1 = R"??/
+()??/";
+	// { dg-error "invalid new-line" "invalid" { target *-*-* } 10 }
+	// { dg-error "stray" "stray" { target *-*-* } 10 }
+	// { dg-warning "missing terminating" "missing" { target *-*-* } 10 }
+	// { dg-error "missing terminating" "missing" { target *-*-* } 10 }
+const void *s2 = R"abcdefghijklmn??/(a)abcdefghijklmn???";
+	// { dg-error "raw string delimiter longer" "longer" { target *-*-* } 16 }
+	// { dg-error "stray" "stray" { target *-*-* } 16 }
+	// { dg-error "expected" "expected" { target *-*-* } 16 }
+const void *s3 = R"abcdefghijklmno??/(a)abcdefghijklmno???";
+	// { dg-error "raw string delimiter longer" "longer" { target *-*-* } 20 }
+	// { dg-error "stray" "stray" { target *-*-* } 20 }
+const void *s4 = R"abcdefghijklmnop??=(a)abcdefghijklmnop??=";
+	// { dg-error "raw string delimiter longer" "longer" { target *-*-* } 23 }
+	// { dg-error "stray" "stray" { target *-*-* } 23 }
+const void *s5 = R"abc\
+()abcdef";
+	// { dg-error "invalid character" "invalid" { target *-*-* } 26 }
+	// { dg-error "stray" "stray" { target *-*-* } 26 }
+const void *s6 = R"\
+()";
+	// { dg-error "invalid character" "invalid" { target *-*-* } 30 }
+	// { dg-error "stray" "stray" { target *-*-* } 30 }
+const void *s7 = R"\
+a()a";
+	// { dg-error "invalid character" "invalid" { target *-*-* } 34 }
+	// { dg-error "stray" "stray" { target *-*-* } 34 }
+
+int main () {}
--- gcc/testsuite/c-c++-common/raw-string-15.c.jj	2013-06-17 15:02:18.399822472 +0200
+++ gcc/testsuite/c-c++-common/raw-string-15.c	2013-06-17 15:03:47.988213062 +0200
@@ -0,0 +1,248 @@ 
+// PR preprocessor/57620
+// { dg-do run }
+// { dg-require-effective-target wchar }
+// { dg-options "-std=gnu99 -Wno-c++-compat -Wtrigraphs" { target c } }
+// { dg-options "-std=gnu++11 -Wtrigraphs" { target c++ } }
+
+#ifndef __cplusplus
+#include <wchar.h>
+
+typedef __CHAR16_TYPE__	char16_t;
+typedef __CHAR32_TYPE__ char32_t;
+#endif
+
+#define R
+#define u
+#define uR
+#define U
+#define UR
+#define u8
+#define u8R
+#define L
+#define LR
+
+const char s00[] = R"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
+)??=??";
+const char s01[] = R"a(
+)\
+a"
+)a";
+const char s02[] = R"a(
+)a\
+"
+)a";
+const char s03[] = R"ab(
+)a\
+b"
+)ab";
+const char s04[] = R"a??/(x)a??/";
+const char s05[] = R"abcdefghijklmn??(abc)abcdefghijklmn??";
+const char s06[] = R"abcdefghijklm??/(abc)abcdefghijklm??/";
+const char s07[] = R"abc(??)\
+abc";)abc";
+const char s08[] = R"def(de)\
+def";)def";
+const char s09[] = R"a(??)\
+a"
+)a";
+const char s10[] = R"a(??)a\
+"
+)a";
+const char s11[] = R"ab(??)a\
+b"
+)ab";
+const char s12[] = R"a#(a#)a??=)a#";
+const char s13[] = R"a#(??)a??=??)a#";
+const char s14[] = R"??/(x)??/
+";)??/";
+const char s15[] = R"??/(??)??/
+";)??/";
+
+const char16_t u00[] = uR"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
+)??=??";
+const char16_t u01[] = uR"a(
+)\
+a"
+)a";
+const char16_t u02[] = uR"a(
+)a\
+"
+)a";
+const char16_t u03[] = uR"ab(
+)a\
+b"
+)ab";
+const char16_t u04[] = uR"a??/(x)a??/";
+const char16_t u05[] = uR"abcdefghijklmn??(abc)abcdefghijklmn??";
+const char16_t u06[] = uR"abcdefghijklm??/(abc)abcdefghijklm??/";
+const char16_t u07[] = uR"abc(??)\
+abc";)abc";
+const char16_t u08[] = uR"def(de)\
+def";)def";
+const char16_t u09[] = uR"a(??)\
+a"
+)a";
+const char16_t u10[] = uR"a(??)a\
+"
+)a";
+const char16_t u11[] = uR"ab(??)a\
+b"
+)ab";
+const char16_t u12[] = uR"a#(a#)a??=)a#";
+const char16_t u13[] = uR"a#(??)a??=??)a#";
+const char16_t u14[] = uR"??/(x)??/
+";)??/";
+const char16_t u15[] = uR"??/(??)??/
+";)??/";
+
+const char32_t U00[] = UR"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
+)??=??";
+const char32_t U01[] = UR"a(
+)\
+a"
+)a";
+const char32_t U02[] = UR"a(
+)a\
+"
+)a";
+const char32_t U03[] = UR"ab(
+)a\
+b"
+)ab";
+const char32_t U04[] = UR"a??/(x)a??/";
+const char32_t U05[] = UR"abcdefghijklmn??(abc)abcdefghijklmn??";
+const char32_t U06[] = UR"abcdefghijklm??/(abc)abcdefghijklm??/";
+const char32_t U07[] = UR"abc(??)\
+abc";)abc";
+const char32_t U08[] = UR"def(de)\
+def";)def";
+const char32_t U09[] = UR"a(??)\
+a"
+)a";
+const char32_t U10[] = UR"a(??)a\
+"
+)a";
+const char32_t U11[] = UR"ab(??)a\
+b"
+)ab";
+const char32_t U12[] = UR"a#(a#)a??=)a#";
+const char32_t U13[] = UR"a#(??)a??=??)a#";
+const char32_t U14[] = UR"??/(x)??/
+";)??/";
+const char32_t U15[] = UR"??/(??)??/
+";)??/";
+
+const wchar_t L00[] = LR"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
+)??=??";
+const wchar_t L01[] = LR"a(
+)\
+a"
+)a";
+const wchar_t L02[] = LR"a(
+)a\
+"
+)a";
+const wchar_t L03[] = LR"ab(
+)a\
+b"
+)ab";
+const wchar_t L04[] = LR"a??/(x)a??/";
+const wchar_t L05[] = LR"abcdefghijklmn??(abc)abcdefghijklmn??";
+const wchar_t L06[] = LR"abcdefghijklm??/(abc)abcdefghijklm??/";
+const wchar_t L07[] = LR"abc(??)\
+abc";)abc";
+const wchar_t L08[] = LR"def(de)\
+def";)def";
+const wchar_t L09[] = LR"a(??)\
+a"
+)a";
+const wchar_t L10[] = LR"a(??)a\
+"
+)a";
+const wchar_t L11[] = LR"ab(??)a\
+b"
+)ab";
+const wchar_t L12[] = LR"a#(a#)a??=)a#";
+const wchar_t L13[] = LR"a#(??)a??=??)a#";
+const wchar_t L14[] = LR"??/(x)??/
+";)??/";
+const wchar_t L15[] = LR"??/(??)??/
+";)??/";
+
+int
+main (void)
+{
+#define TEST(str, val) \
+  if (sizeof (str) != sizeof (val) \
+      || __builtin_memcmp (str, val, sizeof (str)) != 0) \
+    __builtin_abort ()
+  TEST (s00, "??""<??"">??"")??""'??""!??""-\\\n(a)#[{}]^|~\";\n");
+  TEST (s01, "\n)\\\na\"\n");
+  TEST (s02, "\n)a\\\n\"\n");
+  TEST (s03, "\n)a\\\nb\"\n");
+  TEST (s04, "x");
+  TEST (s05, "abc");
+  TEST (s06, "abc");
+  TEST (s07, "??"")\\\nabc\";");
+  TEST (s08, "de)\\\ndef\";");
+  TEST (s09, "??"")\\\na\"\n");
+  TEST (s10, "??"")a\\\n\"\n");
+  TEST (s11, "??"")a\\\nb\"\n");
+  TEST (s12, "a#)a??""=");
+  TEST (s13, "??"")a??""=??");
+  TEST (s14, "x)??""/\n\";");
+  TEST (s15, "??"")??""/\n\";");
+  TEST (u00, u"??""<??"">??"")??""'??""!??""-\\\n(a)#[{}]^|~\";\n");
+  TEST (u01, u"\n)\\\na\"\n");
+  TEST (u02, u"\n)a\\\n\"\n");
+  TEST (u03, u"\n)a\\\nb\"\n");
+  TEST (u04, u"x");
+  TEST (u05, u"abc");
+  TEST (u06, u"abc");
+  TEST (u07, u"??"")\\\nabc\";");
+  TEST (u08, u"de)\\\ndef\";");
+  TEST (u09, u"??"")\\\na\"\n");
+  TEST (u10, u"??"")a\\\n\"\n");
+  TEST (u11, u"??"")a\\\nb\"\n");
+  TEST (u12, u"a#)a??""=");
+  TEST (u13, u"??"")a??""=??");
+  TEST (u14, u"x)??""/\n\";");
+  TEST (u15, u"??"")??""/\n\";");
+  TEST (U00, U"??""<??"">??"")??""'??""!??""-\\\n(a)#[{}]^|~\";\n");
+  TEST (U01, U"\n)\\\na\"\n");
+  TEST (U02, U"\n)a\\\n\"\n");
+  TEST (U03, U"\n)a\\\nb\"\n");
+  TEST (U04, U"x");
+  TEST (U05, U"abc");
+  TEST (U06, U"abc");
+  TEST (U07, U"??"")\\\nabc\";");
+  TEST (U08, U"de)\\\ndef\";");
+  TEST (U09, U"??"")\\\na\"\n");
+  TEST (U10, U"??"")a\\\n\"\n");
+  TEST (U11, U"??"")a\\\nb\"\n");
+  TEST (U12, U"a#)a??""=");
+  TEST (U13, U"??"")a??""=??");
+  TEST (U14, U"x)??""/\n\";");
+  TEST (U15, U"??"")??""/\n\";");
+  TEST (L00, L"??""<??"">??"")??""'??""!??""-\\\n(a)#[{}]^|~\";\n");
+  TEST (L01, L"\n)\\\na\"\n");
+  TEST (L02, L"\n)a\\\n\"\n");
+  TEST (L03, L"\n)a\\\nb\"\n");
+  TEST (L04, L"x");
+  TEST (L05, L"abc");
+  TEST (L06, L"abc");
+  TEST (L07, L"??"")\\\nabc\";");
+  TEST (L08, L"de)\\\ndef\";");
+  TEST (L09, L"??"")\\\na\"\n");
+  TEST (L10, L"??"")a\\\n\"\n");
+  TEST (L11, L"??"")a\\\nb\"\n");
+  TEST (L12, L"a#)a??""=");
+  TEST (L13, L"??"")a??""=??");
+  TEST (L14, L"x)??""/\n\";");
+  TEST (L15, L"??"")??""/\n\";");
+  return 0;
+}
--- gcc/testsuite/c-c++-common/raw-string-16.c.jj	2013-06-17 15:03:16.110148070 +0200
+++ gcc/testsuite/c-c++-common/raw-string-16.c	2013-06-17 15:11:11.386376512 +0200
@@ -0,0 +1,33 @@ 
+// PR preprocessor/57620
+// { dg-do compile }
+// { dg-options "-std=gnu99 -Wtrigraphs" { target c } }
+// { dg-options "-std=gnu++11 -Wtrigraphs" { target c++ } }
+
+const void *s0 = R"abc\
+def()abcdef";
+	// { dg-error "invalid character" "invalid" { target *-*-* } 6 }
+	// { dg-error "stray" "stray" { target *-*-* } 6 }
+const void *s1 = R"abcdefghijklmn??/(a)abcdefghijklmn???";
+	// { dg-error "raw string delimiter longer" "longer" { target *-*-* } 10 }
+	// { dg-error "stray" "stray" { target *-*-* } 10 }
+
+const void *s2 = R"abcdefghijklmno??/(a)abcdefghijklmno???";
+	// { dg-error "raw string delimiter longer" "longer" { target *-*-* } 14 }
+	// { dg-error "stray" "stray" { target *-*-* } 14 }
+const void *s3 = R"abcdefghijklmnop??=(a)abcdefghijklmnop??=?"; // { dg-warning "trigraph ..= ignored" }
+	// { dg-error "raw string delimiter longer" "longer" { target *-*-* } 17 }
+	// { dg-error "stray" "stray" { target *-*-* } 17 }
+const void *s4 = R"abc\
+()abcdef";
+	// { dg-error "invalid character" "invalid" { target *-*-* } 20 }
+	// { dg-error "stray" "stray" { target *-*-* } 20 }
+const void *s5 = R"\
+()";
+	// { dg-error "invalid character" "invalid" { target *-*-* } 24 }
+	// { dg-error "stray" "stray" { target *-*-* } 24 }
+const void *s6 = R"\
+a()a";
+	// { dg-error "invalid character" "invalid" { target *-*-* } 28 }
+	// { dg-error "stray" "stray" { target *-*-* } 28 }
+
+int main () {}