Patchwork [C++] DR1473 - let literal operators be defined with empty user-defined string literal

login
register
mail settings
Submitter Ed Smith-Rowland
Date June 29, 2013, 6:56 a.m.
Message ID <51CE8518.7020509@verizon.net>
Download mbox | patch
Permalink /patch/255718/
State New
Headers show

Comments

Ed Smith-Rowland - June 29, 2013, 6:56 a.m.
On 06/26/2013 05:01 PM, Jason Merrill wrote:
> On 06/26/2013 09:43 AM, Ed Smith-Rowland wrote:
>> +      if (bad_encoding_prefix)
>> +    error ("invalid encoding prefix in literal operator");
>> +      {
>> +    tree string_tree = USERDEF_LITERAL_VALUE (token->u.value);
>
> No need to open a nested block for a declaration now that we're 
> compiling as C++.
>
> Otherwise, OK.
>
> Jason
>
>
Here is the applied patch.  I was unable to remove the code braces 
because the jump to default crosses the initialization.

Also, this has a much better detection of macros for Wstring-literal 
(which test broke with my previous patch).

libcpp:

2013-06-28  Ed Smith-Rowland  <3dw4rd@verizon.net>

	* lex.c (lex_raw_string(), lex_string()): Constrain suffixes treated
	as concatenated literal and macro to just the patterns found in
	inttypes.h; (is_macro()): New.


gcc/cp:

2013-06-28  Ed Smith-Rowland  <3dw4rd@verizon.net>

	* cp-tree.h (UDLIT_OP_ANSI_PREFIX): Remove space.
	* parser.c (cp_parser_operator()): Parse user-defined string
	literal as literal operator.


gcc/testsuite:

2013-06-28  Ed Smith-Rowland  <3dw4rd@verizon.net>

	* g++.dg/cpp0x/udlit-nospace-neg.C: Adjust.
	* g++.dg/cpp1y/udlit-enc-prefix-neg.C: New.
	* g++.dg/cpp1y/udlit-userdef-string.C: New.
	* g++.dg/cpp1y/complex_literals.h: New.

Patch

Index: gcc/cp/cp-tree.h
===================================================================
--- gcc/cp/cp-tree.h	(revision 200562)
+++ gcc/cp/cp-tree.h	(working copy)
@@ -4404,7 +4404,7 @@ 
 #define LAMBDANAME_PREFIX "__lambda"
 #define LAMBDANAME_FORMAT LAMBDANAME_PREFIX "%d"
 
-#define UDLIT_OP_ANSI_PREFIX "operator\"\" "
+#define UDLIT_OP_ANSI_PREFIX "operator\"\""
 #define UDLIT_OP_ANSI_FORMAT UDLIT_OP_ANSI_PREFIX "%s"
 #define UDLIT_OP_MANGLED_PREFIX "li"
 #define UDLIT_OP_MANGLED_FORMAT UDLIT_OP_MANGLED_PREFIX "%s"
Index: libcpp/lex.c
===================================================================
--- libcpp/lex.c	(revision 200562)
+++ libcpp/lex.c	(working copy)
@@ -1334,6 +1334,33 @@ 
   *last_buff_p = last_buff;
 }
 
+
+/* Returns true if a macro has been defined.
+   This might not work if compile with -save-temps,
+   or preprocess separately from compilation.  */
+
+static bool
+is_macro(cpp_reader *pfile, const uchar *base)
+{
+  const uchar *cur = base;
+  if (! ISIDST (*cur))
+    return false;
+  unsigned int hash = HT_HASHSTEP (0, *cur);
+  ++cur;
+  while (ISIDNUM (*cur))
+    {
+      hash = HT_HASHSTEP (hash, *cur);
+      ++cur;
+    }
+  hash = HT_HASHFINISH (hash, cur - base);
+
+  cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
+					base, cur - base, hash, HT_NO_INSERT));
+
+  return !result ? false : (result->type == NT_MACRO);
+}
+
+
 /* Lexes a raw string.  The stored string contains the spelling, including
    double quotes, delimiter string, '(' and ')', any leading
    'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
@@ -1556,22 +1583,18 @@ 
 
   if (CPP_OPTION (pfile, user_literals))
     {
-      /* According to C++11 [lex.ext]p10, a ud-suffix not starting with an
-	 underscore is ill-formed.  Since this breaks programs using macros
-	 from inttypes.h, we generate a warning and treat the ud-suffix as a
-	 separate preprocessing token.  This approach is under discussion by
-	 the standards committee, and has been adopted as a conforming
-	 extension by other front ends such as clang.
-         A special exception is made for the suffix 's' which will be
-	 standardized as a user-defined literal suffix for strings.  */
-      if (ISALPHA (*cur) && *cur != 's')
+      /* If a string format macro, say from inttypes.h, is placed touching
+	 a string literal it could be parsed as a C++11 user-defined string
+	 literal thus breaking the program.
+	 Try to identify macros with is_macro. A warning is issued. */
+      if (is_macro (pfile, cur))
 	{
 	  /* Raise a warning, but do not consume subsequent tokens.  */
 	  if (CPP_OPTION (pfile, warn_literal_suffix))
 	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
 				   token->src_loc, 0,
 				   "invalid suffix on literal; C++11 requires "
-				   "a space between literal and identifier");
+				   "a space between literal and string macro");
 	}
       /* Grab user defined literal suffix.  */
       else if (ISIDST (*cur))
@@ -1689,22 +1712,18 @@ 
 
   if (CPP_OPTION (pfile, user_literals))
     {
-      /* According to C++11 [lex.ext]p10, a ud-suffix not starting with an
-	 underscore is ill-formed.  Since this breaks programs using macros
-	 from inttypes.h, we generate a warning and treat the ud-suffix as a
-	 separate preprocessing token.  This approach is under discussion by
-	 the standards committee, and has been adopted as a conforming
-	 extension by other front ends such as clang.
-         A special exception is made for the suffix 's' which will be
-	 standardized as a user-defined literal suffix for strings.  */
-      if (ISALPHA (*cur) && *cur != 's')
+      /* If a string format macro, say from inttypes.h, is placed touching
+	 a string literal it could be parsed as a C++11 user-defined string
+	 literal thus breaking the program.
+	 Try to identify macros with is_macro. A warning is issued. */
+      if (is_macro (pfile, cur))
 	{
 	  /* Raise a warning, but do not consume subsequent tokens.  */
 	  if (CPP_OPTION (pfile, warn_literal_suffix))
 	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
 				   token->src_loc, 0,
 				   "invalid suffix on literal; C++11 requires "
-				   "a space between literal and identifier");
+				   "a space between literal and string macro");
 	}
       /* Grab user defined literal suffix.  */
       else if (ISIDST (*cur))
Index: gcc/cp/parser.c
===================================================================
--- gcc/cp/parser.c	(revision 200562)
+++ gcc/cp/parser.c	(working copy)
@@ -12244,6 +12244,8 @@ 
 {
   tree id = NULL_TREE;
   cp_token *token;
+  bool bad_encoding_prefix = false;
+  int string_len = 2;
 
   /* Peek at the next token.  */
   token = cp_lexer_peek_token (parser->lexer);
@@ -12443,10 +12445,20 @@ 
       cp_parser_require (parser, CPP_CLOSE_SQUARE, RT_CLOSE_SQUARE);
       return ansi_opname (ARRAY_REF);
 
+    case CPP_WSTRING:
+      string_len = 3;
+    case CPP_STRING16:
+    case CPP_STRING32:
+      string_len = 5;
+    case CPP_UTF8STRING:
+      string_len = 4;
+      bad_encoding_prefix = true;
     case CPP_STRING:
       if (cxx_dialect == cxx98)
 	maybe_warn_cpp0x (CPP0X_USER_DEFINED_LITERALS);
-      if (TREE_STRING_LENGTH (token->u.value) > 2)
+      if (bad_encoding_prefix)
+	error ("invalid encoding prefix in literal operator");
+      if (TREE_STRING_LENGTH (token->u.value) > string_len)
 	{
 	  error ("expected empty string after %<operator%> keyword");
 	  return error_mark_node;
@@ -12464,15 +12476,49 @@ 
 	      return cp_literal_operator_id (name);
 	    }
 	}
+      else if (token->type == CPP_KEYWORD)
+	{
+	  error ("unexpected keyword;"
+		 " remove space between quotes and suffix identifier");
+	  return error_mark_node;
+	}
       else
 	{
 	  error ("expected suffix identifier");
 	  return error_mark_node;
 	}
 
+    case CPP_WSTRING_USERDEF:
+      string_len = 3;
+    case CPP_STRING16_USERDEF:
+    case CPP_STRING32_USERDEF:
+      string_len = 5;
+    case CPP_UTF8STRING_USERDEF:
+      string_len = 4;
+      bad_encoding_prefix = true;
     case CPP_STRING_USERDEF:
-      error ("missing space between %<\"\"%> and suffix identifier");
-      return error_mark_node;
+      if (cxx_dialect == cxx98)
+	maybe_warn_cpp0x (CPP0X_USER_DEFINED_LITERALS);
+      if (bad_encoding_prefix)
+	error ("invalid encoding prefix in literal operator");
+      {
+	tree string_tree = USERDEF_LITERAL_VALUE (token->u.value);
+	if (TREE_STRING_LENGTH (string_tree) > string_len)
+	  {
+	    error ("expected empty string after %<operator%> keyword");
+	    return error_mark_node;
+	  }
+	id = USERDEF_LITERAL_SUFFIX_ID (token->u.value);
+	/* Consume the user-defined string literal.  */
+	cp_lexer_consume_token (parser->lexer);
+	if (id != error_mark_node)
+	  {
+	    const char *name = IDENTIFIER_POINTER (id);
+	    return cp_literal_operator_id (name);
+	  }
+	else
+	  return error_mark_node;
+      }
 
     default:
       /* Anything else is an error.  */
Index: gcc/testsuite/g++.dg/cpp0x/udlit-nospace-neg.C
===================================================================
--- gcc/testsuite/g++.dg/cpp0x/udlit-nospace-neg.C	(revision 200562)
+++ gcc/testsuite/g++.dg/cpp0x/udlit-nospace-neg.C	(working copy)
@@ -1,3 +1,5 @@ 
 // { dg-options "-std=c++0x" }
 
-float operator ""_abc(const char*); // { dg-error "missing space between|and suffix identifier" }
+float operator ""_abc(const char*);
+
+int operator""_def(long double);
Index: gcc/testsuite/g++.dg/cpp1y/udlit-enc-prefix-neg.C
===================================================================
--- gcc/testsuite/g++.dg/cpp1y/udlit-enc-prefix-neg.C	(revision 0)
+++ gcc/testsuite/g++.dg/cpp1y/udlit-enc-prefix-neg.C	(working copy)
@@ -0,0 +1,17 @@ 
+// { dg-options -std=c++1y }
+
+int
+operator L""_Ls(unsigned long long) // { dg-error "invalid encoding prefix in literal operator" }
+{ return 0; }
+
+int
+operator u""_s16(unsigned long long) // { dg-error "invalid encoding prefix in literal operator" }
+{ return 0; }
+
+int
+operator U""_s32(unsigned long long) // { dg-error "invalid encoding prefix in literal operator" }
+{ return 0; }
+
+int
+operator u8""_u8s(unsigned long long) // { dg-error "invalid encoding prefix in literal operator" }
+{ return 0; }
Index: gcc/testsuite/g++.dg/cpp1y/udlit-userdef-string.C
===================================================================
--- gcc/testsuite/g++.dg/cpp1y/udlit-userdef-string.C	(revision 0)
+++ gcc/testsuite/g++.dg/cpp1y/udlit-userdef-string.C	(working copy)
@@ -0,0 +1,7 @@ 
+// { dg-options -std=c++1y }
+
+#include "complex_literals.h"
+
+auto cx = 1.1if;
+
+auto cn = 123if;
Index: gcc/testsuite/g++.dg/cpp1y/complex_literals.h
===================================================================
--- gcc/testsuite/g++.dg/cpp1y/complex_literals.h	(revision 0)
+++ gcc/testsuite/g++.dg/cpp1y/complex_literals.h	(working copy)
@@ -0,0 +1,12 @@ 
+
+#include <complex>
+
+#pragma GCC system_header
+
+std::complex<float>
+operator""if(long double ximag)
+{ return std::complex<float>(0.0F, static_cast<float>(ximag)); }
+
+std::complex<float>
+operator""if(unsigned long long nimag)
+{ return std::complex<float>(0.0F, static_cast<float>(nimag)); }