diff mbox

[google/integration] Extend C++11 UDLs to be compatible with inttypes.h macros (issue6104051)

Message ID 20120423025401.8F9FEA0C63@xoom.chi.corp.google.com
State New
Headers show

Commit Message

Ollie Wild April 23, 2012, 2:54 a.m. UTC
Add new option, -Wreserved-user-defined-literal.

This option, which is enabled by default, causes the preprocessor to warn
when a string or character literal is followed by a ud-suffix which does
not begin with an underscore.  According to [lex.ext]p10, this is
ill-formed.

Also modifies the preprocessor to treat such ill-formed suffixes as separate
preprocessing tokens.  This is consistent with the Clang front end (see
http://llvm.org/viewvc/llvm-project?view=rev&revision=152287), and enables
backwards compatibility with code that uses formatting macros from
<inttypes.h>, as in the following code block:

  int main() {
    int64_t i64 = 123;
    printf("My int64: %"PRId64"\n", i64);
  }

Google ref b/6377711.

2012-04-22   Ollie Wild  <aaw@google.com>

	* gcc/c-family/c-common.c:
	* gcc/c-family/c-opts.c (c_common_handle_option):
	* gcc/c-family/c.opt:
	* gcc/doc/invoke.texi (struct A):
	* gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C (test):
	(main):
	* libcpp/include/cpplib.h (struct cpp_options):
	* libcpp/init.c (cpp_create_reader):
	* libcpp/lex.c (lex_raw_string):
	(lex_string):


--
This patch is available for review at http://codereview.appspot.com/6104051

Comments

Jeffrey Yasskin April 23, 2012, 3:10 a.m. UTC | #1
Could you try to get this into mainline instead of just the google
branches? In http://gcc.gnu.org/PR52538, Jonathan sounded like he'd
consider accepting it.

On Sun, Apr 22, 2012 at 7:54 PM, Ollie Wild <aaw@google.com> wrote:
> Add new option, -Wreserved-user-defined-literal.
>
> This option, which is enabled by default, causes the preprocessor to warn
> when a string or character literal is followed by a ud-suffix which does
> not begin with an underscore.  According to [lex.ext]p10, this is
> ill-formed.
>
> Also modifies the preprocessor to treat such ill-formed suffixes as separate
> preprocessing tokens.  This is consistent with the Clang front end (see
> http://llvm.org/viewvc/llvm-project?view=rev&revision=152287), and enables
> backwards compatibility with code that uses formatting macros from
> <inttypes.h>, as in the following code block:
>
>  int main() {
>    int64_t i64 = 123;
>    printf("My int64: %"PRId64"\n", i64);
>  }
>
> Google ref b/6377711.
>
> 2012-04-22   Ollie Wild  <aaw@google.com>
>
>        * gcc/c-family/c-common.c:
>        * gcc/c-family/c-opts.c (c_common_handle_option):
>        * gcc/c-family/c.opt:
>        * gcc/doc/invoke.texi (struct A):
>        * gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C (test):
>        (main):
>        * libcpp/include/cpplib.h (struct cpp_options):
>        * libcpp/init.c (cpp_create_reader):
>        * libcpp/lex.c (lex_raw_string):
>        (lex_string):
>
> diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
> index 1d19251..915dc25 100644
> --- a/gcc/c-family/c-common.c
> +++ b/gcc/c-family/c-common.c
> @@ -8724,6 +8724,7 @@ static const struct reason_option_codes_t option_codes[] = {
>   {CPP_W_NORMALIZE,                    OPT_Wnormalized_},
>   {CPP_W_INVALID_PCH,                  OPT_Winvalid_pch},
>   {CPP_W_WARNING_DIRECTIVE,            OPT_Wcpp},
> +  {CPP_W_RESERVED_USER_LITERALS,       OPT_Wreserved_user_defined_literal},
>   {CPP_W_NONE,                         0}
>  };
>
> diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
> index 5118928..dab6ce5 100644
> --- a/gcc/c-family/c-opts.c
> +++ b/gcc/c-family/c-opts.c
> @@ -509,6 +509,10 @@ c_common_handle_option (size_t scode, const char *arg, int value,
>          break;
>        }
>
> +    case OPT_Wreserved_user_defined_literal:
> +      cpp_opts->warn_reserved_user_literals = value;
> +      break;
> +
>     case OPT_Wreturn_type:
>       warn_return_type = value;
>       break;
> diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
> index 40ff96c..f610513 100644
> --- a/gcc/c-family/c.opt
> +++ b/gcc/c-family/c.opt
> @@ -589,6 +589,10 @@ Wreorder
>  C++ ObjC++ Var(warn_reorder) Warning
>  Warn when the compiler reorders code
>
> +Wreserved-user-defined-literal
> +C++ ObjC++ Warning
> +Warn when a string or character literal is followed by a ud-suffix which does not begin with an underscore.
> +
>  Wreturn-type
>  C ObjC C++ ObjC++ Var(warn_return_type) Warning
>  Warn whenever a function's return type defaults to \"int\" (C), or about inconsistent return types (C++)
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 1b61e76..d425079 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -198,7 +198,7 @@ in the following sections.
>  -fvisibility-ms-compat @gol
>  -Wabi  -Wconversion-null  -Wctor-dtor-privacy @gol
>  -Wdelete-non-virtual-dtor -Wnarrowing -Wnoexcept @gol
> --Wnon-virtual-dtor  -Wreorder @gol
> +-Wnon-virtual-dtor  -Wreorder -Wreserved-user-defined-literal @gol
>  -Weffc++  -Wstrict-null-sentinel @gol
>  -Wno-non-template-friend  -Wold-style-cast @gol
>  -Woverloaded-virtual  -Wno-pmf-conversions @gol
> @@ -2474,6 +2474,30 @@ struct A @{
>  The compiler will rearrange the member initializers for @samp{i}
>  and @samp{j} to match the declaration order of the members, emitting
>  a warning to that effect.  This warning is enabled by @option{-Wall}.
> +
> +@item -Wreserved-user-defined-literal @r{(C++ and Objective-C++ only)}
> +@opindex Wreserved-user-defined-literal
> +@opindex Wno-reserved-user-defined-literal
> +Warn when a string or character literal is followed by a ud-suffix which does
> +not begin with an underscore.  As a conforming extension, GCC treats such
> +suffixes as separate preprocessing tokens in order to maintain backwards
> +compatibility with code that uses formatting macros from @code{<inttypes.h>}.
> +For example:
> +
> +@smallexample
> +#define __STDC_FORMAT_MACROS
> +#include <inttypes.h>
> +#include <stdio.h>
> +
> +int main() @{
> +  int64_t i64 = 123;
> +  printf("My int64: %"PRId64"\n", i64);
> +@}
> +@end smallexample
> +
> +In this case, @code{PRId64} is treated as a separate preprocessing token.
> +
> +This warning is enabled by default.
>  @end table
>
>  The following @option{-W@dots{}} options are not affected by @option{-Wall}.
> diff --git a/gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C b/gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C
> new file mode 100644
> index 0000000..66de5c0
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C
> @@ -0,0 +1,29 @@
> +// { dg-do run }
> +// { dg-options "-std=c++0x" }
> +
> +// Make sure -Wreserved-user-defined-literal is enabled by default and
> +// triggers as expected.
> +
> +#define BAR "bar"
> +#define PLUS_ONE + 1
> +
> +#include <cstdint>
> +#include <cassert>
> +
> +
> +void
> +test()
> +{
> +  char c = '3'PLUS_ONE;          // { dg-warning "invalid suffix on literal" }
> +  char s[] = "foo"BAR;   // { dg-warning "invalid suffix on literal" }
> +
> +  assert(c == '4');
> +  assert(s[3] != '\0');
> +  assert(s[3] == 'b');
> +}
> +
> +int
> +main()
> +{
> +  test();
> +}
> diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
> index bf59d01..84cacb0 100644
> --- a/libcpp/include/cpplib.h
> +++ b/libcpp/include/cpplib.h
> @@ -427,6 +427,10 @@ struct cpp_options
>   /* Nonzero for C++ 2011 Standard user-defnied literals.  */
>   unsigned char user_literals;
>
> +  /* Nonzero means warn when a string or character literal is followed by a
> +     ud-suffix which does not beging with an underscore.  */
> +  unsigned char warn_reserved_user_literals;
> +
>   /* Holds the name of the target (execution) character set.  */
>   const char *narrow_charset;
>
> @@ -906,7 +910,8 @@ enum {
>   CPP_W_CXX_OPERATOR_NAMES,
>   CPP_W_NORMALIZE,
>   CPP_W_INVALID_PCH,
> -  CPP_W_WARNING_DIRECTIVE
> +  CPP_W_WARNING_DIRECTIVE,
> +  CPP_W_RESERVED_USER_LITERALS
>  };
>
>  /* Output a diagnostic of some kind.  */
> diff --git a/libcpp/init.c b/libcpp/init.c
> index 5fa82ca..2688699 100644
> --- a/libcpp/init.c
> +++ b/libcpp/init.c
> @@ -175,6 +175,7 @@ cpp_create_reader (enum c_lang lang, hash_table *table,
>   CPP_OPTION (pfile, warn_variadic_macros) = 1;
>   CPP_OPTION (pfile, warn_builtin_macro_redefined) = 1;
>   CPP_OPTION (pfile, warn_normalize) = normalized_C;
> +  CPP_OPTION (pfile, warn_reserved_user_literals) = 1;
>
>   /* Default CPP arithmetic to something sensible for the host for the
>      benefit of dumb users like fix-header.  */
> diff --git a/libcpp/lex.c b/libcpp/lex.c
> index 0ad9660..ba8ed9e 100644
> --- a/libcpp/lex.c
> +++ b/libcpp/lex.c
> @@ -1491,14 +1491,30 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
>
>   if (CPP_OPTION (pfile, user_literals))
>     {
> +      /* According to C++11 [lex.ext]p10, a ud-suffix not starting with an
> +        underscore is ill-formed.  Since this breaks programs using macros
> +        from inttypes.h, we generate a warning and treat the ud-suffix as a
> +        separate preprocessing token.  This approach is under discussion by
> +        the standards committee, and has been adopted as a conforming
> +        extension by other front ends such as clang. */
> +      if (ISALPHA(*cur))
> +       {
> +         // Raise a warning, but do not consume subsequent tokens.
> +         if (CPP_OPTION (pfile, warn_reserved_user_literals))
> +           cpp_warning_with_line (pfile, CPP_W_RESERVED_USER_LITERALS,
> +                                  token->src_loc, 0,
> +                                  "invalid suffix on literal; C++11 requires "
> +                                  "a space between literal and identifier");
> +       }
>       /* Grab user defined literal suffix.  */
> -      if (ISIDST (*cur))
> +      else if (*cur == '_')
>        {
>          type = cpp_userdef_string_add_type (type);
>          ++cur;
> +
> +         while (ISIDNUM (*cur))
> +           ++cur;
>        }
> -      while (ISIDNUM (*cur))
> -       ++cur;
>     }
>
>   pfile->buffer->cur = cur;
> @@ -1606,15 +1622,31 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
>
>   if (CPP_OPTION (pfile, user_literals))
>     {
> +      /* According to C++11 [lex.ext]p10, a ud-suffix not starting with an
> +        underscore is ill-formed.  Since this breaks programs using macros
> +        from inttypes.h, we generate a warning and treat the ud-suffix as a
> +        separate preprocessing token.  This approach is under discussion by
> +        the standards committee, and has been adopted as a conforming
> +        extension by other front ends such as clang. */
> +      if (ISALPHA(*cur))
> +       {
> +         // Raise a warning, but do not consume subsequent tokens.
> +         if (CPP_OPTION (pfile, warn_reserved_user_literals))
> +           cpp_warning_with_line (pfile, CPP_W_RESERVED_USER_LITERALS,
> +                                  token->src_loc, 0,
> +                                  "invalid suffix on literal; C++11 requires "
> +                                  "a space between literal and identifier");
> +       }
>       /* Grab user defined literal suffix.  */
> -      if (ISIDST (*cur))
> +      else if (*cur == '_')
>        {
>          type = cpp_userdef_char_add_type (type);
>          type = cpp_userdef_string_add_type (type);
>           ++cur;
> +
> +         while (ISIDNUM (*cur))
> +           ++cur;
>        }
> -      while (ISIDNUM (*cur))
> -       ++cur;
>     }
>
>   pfile->buffer->cur = cur;
>
> --
> This patch is available for review at http://codereview.appspot.com/6104051
Jeffrey Yasskin April 23, 2012, 3:29 a.m. UTC | #2
Let's let the discussion _start_ before assuming it'll be protracted.
;) I don't think it'll kill us to run the next round of testing in
C++98 mode. If the patch doesn't look close to acceptance in a couple
days, I think it'll make sense to put the then-current version into
the google branches while people are agreeing about what to do in the
long run.

On Sun, Apr 22, 2012 at 8:14 PM, Ollie Wild <aaw@google.com> wrote:
> I'd like to get this into the google branches first because this is blocking
> testing.
>
> I fully expect this to result in some protracted discussion before it can be
> accepted into trunk.
>
> Ollie
>
>
> On Sun, Apr 22, 2012 at 10:10 PM, Jeffrey Yasskin <jyasskin@google.com>
> wrote:
>>
>> Could you try to get this into mainline instead of just the google
>> branches? In http://gcc.gnu.org/PR52538, Jonathan sounded like he'd
>> consider accepting it.
>>
>> On Sun, Apr 22, 2012 at 7:54 PM, Ollie Wild <aaw@google.com> wrote:
>> > Add new option, -Wreserved-user-defined-literal.
>> >
>> > This option, which is enabled by default, causes the preprocessor to
>> > warn
>> > when a string or character literal is followed by a ud-suffix which does
>> > not begin with an underscore.  According to [lex.ext]p10, this is
>> > ill-formed.
>> >
>> > Also modifies the preprocessor to treat such ill-formed suffixes as
>> > separate
>> > preprocessing tokens.  This is consistent with the Clang front end (see
>> > http://llvm.org/viewvc/llvm-project?view=rev&revision=152287), and
>> > enables
>> > backwards compatibility with code that uses formatting macros from
>> > <inttypes.h>, as in the following code block:
>> >
>> >  int main() {
>> >    int64_t i64 = 123;
>> >    printf("My int64: %"PRId64"\n", i64);
>> >  }
>> >
>> > Google ref b/6377711.
>> >
>> > 2012-04-22   Ollie Wild  <aaw@google.com>
>> >
>> >        * gcc/c-family/c-common.c:
>> >        * gcc/c-family/c-opts.c (c_common_handle_option):
>> >        * gcc/c-family/c.opt:
>> >        * gcc/doc/invoke.texi (struct A):
>> >        * gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C
>> > (test):
>> >        (main):
>> >        * libcpp/include/cpplib.h (struct cpp_options):
>> >        * libcpp/init.c (cpp_create_reader):
>> >        * libcpp/lex.c (lex_raw_string):
>> >        (lex_string):
>> >
>> > diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
>> > index 1d19251..915dc25 100644
>> > --- a/gcc/c-family/c-common.c
>> > +++ b/gcc/c-family/c-common.c
>> > @@ -8724,6 +8724,7 @@ static const struct reason_option_codes_t
>> > option_codes[] = {
>> >   {CPP_W_NORMALIZE,                    OPT_Wnormalized_},
>> >   {CPP_W_INVALID_PCH,                  OPT_Winvalid_pch},
>> >   {CPP_W_WARNING_DIRECTIVE,            OPT_Wcpp},
>> > +  {CPP_W_RESERVED_USER_LITERALS,
>> > OPT_Wreserved_user_defined_literal},
>> >   {CPP_W_NONE,                         0}
>> >  };
>> >
>> > diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
>> > index 5118928..dab6ce5 100644
>> > --- a/gcc/c-family/c-opts.c
>> > +++ b/gcc/c-family/c-opts.c
>> > @@ -509,6 +509,10 @@ c_common_handle_option (size_t scode, const char
>> > *arg, int value,
>> >          break;
>> >        }
>> >
>> > +    case OPT_Wreserved_user_defined_literal:
>> > +      cpp_opts->warn_reserved_user_literals = value;
>> > +      break;
>> > +
>> >     case OPT_Wreturn_type:
>> >       warn_return_type = value;
>> >       break;
>> > diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
>> > index 40ff96c..f610513 100644
>> > --- a/gcc/c-family/c.opt
>> > +++ b/gcc/c-family/c.opt
>> > @@ -589,6 +589,10 @@ Wreorder
>> >  C++ ObjC++ Var(warn_reorder) Warning
>> >  Warn when the compiler reorders code
>> >
>> > +Wreserved-user-defined-literal
>> > +C++ ObjC++ Warning
>> > +Warn when a string or character literal is followed by a ud-suffix
>> > which does not begin with an underscore.
>> > +
>> >  Wreturn-type
>> >  C ObjC C++ ObjC++ Var(warn_return_type) Warning
>> >  Warn whenever a function's return type defaults to \"int\" (C), or
>> > about inconsistent return types (C++)
>> > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
>> > index 1b61e76..d425079 100644
>> > --- a/gcc/doc/invoke.texi
>> > +++ b/gcc/doc/invoke.texi
>> > @@ -198,7 +198,7 @@ in the following sections.
>> >  -fvisibility-ms-compat @gol
>> >  -Wabi  -Wconversion-null  -Wctor-dtor-privacy @gol
>> >  -Wdelete-non-virtual-dtor -Wnarrowing -Wnoexcept @gol
>> > --Wnon-virtual-dtor  -Wreorder @gol
>> > +-Wnon-virtual-dtor  -Wreorder -Wreserved-user-defined-literal @gol
>> >  -Weffc++  -Wstrict-null-sentinel @gol
>> >  -Wno-non-template-friend  -Wold-style-cast @gol
>> >  -Woverloaded-virtual  -Wno-pmf-conversions @gol
>> > @@ -2474,6 +2474,30 @@ struct A @{
>> >  The compiler will rearrange the member initializers for @samp{i}
>> >  and @samp{j} to match the declaration order of the members, emitting
>> >  a warning to that effect.  This warning is enabled by @option{-Wall}.
>> > +
>> > +@item -Wreserved-user-defined-literal @r{(C++ and Objective-C++ only)}
>> > +@opindex Wreserved-user-defined-literal
>> > +@opindex Wno-reserved-user-defined-literal
>> > +Warn when a string or character literal is followed by a ud-suffix
>> > which does
>> > +not begin with an underscore.  As a conforming extension, GCC treats
>> > such
>> > +suffixes as separate preprocessing tokens in order to maintain
>> > backwards
>> > +compatibility with code that uses formatting macros from
>> > @code{<inttypes.h>}.
>> > +For example:
>> > +
>> > +@smallexample
>> > +#define __STDC_FORMAT_MACROS
>> > +#include <inttypes.h>
>> > +#include <stdio.h>
>> > +
>> > +int main() @{
>> > +  int64_t i64 = 123;
>> > +  printf("My int64: %"PRId64"\n", i64);
>> > +@}
>> > +@end smallexample
>> > +
>> > +In this case, @code{PRId64} is treated as a separate preprocessing
>> > token.
>> > +
>> > +This warning is enabled by default.
>> >  @end table
>> >
>> >  The following @option{-W@dots{}} options are not affected by
>> > @option{-Wall}.
>> > diff --git a/gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C
>> > b/gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C
>> > new file mode 100644
>> > index 0000000..66de5c0
>> > --- /dev/null
>> > +++ b/gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C
>> > @@ -0,0 +1,29 @@
>> > +// { dg-do run }
>> > +// { dg-options "-std=c++0x" }
>> > +
>> > +// Make sure -Wreserved-user-defined-literal is enabled by default and
>> > +// triggers as expected.
>> > +
>> > +#define BAR "bar"
>> > +#define PLUS_ONE + 1
>> > +
>> > +#include <cstdint>
>> > +#include <cassert>
>> > +
>> > +
>> > +void
>> > +test()
>> > +{
>> > +  char c = '3'PLUS_ONE;          // { dg-warning "invalid suffix on
>> > literal" }
>> > +  char s[] = "foo"BAR;   // { dg-warning "invalid suffix on literal" }
>> > +
>> > +  assert(c == '4');
>> > +  assert(s[3] != '\0');
>> > +  assert(s[3] == 'b');
>> > +}
>> > +
>> > +int
>> > +main()
>> > +{
>> > +  test();
>> > +}
>> > diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
>> > index bf59d01..84cacb0 100644
>> > --- a/libcpp/include/cpplib.h
>> > +++ b/libcpp/include/cpplib.h
>> > @@ -427,6 +427,10 @@ struct cpp_options
>> >   /* Nonzero for C++ 2011 Standard user-defnied literals.  */
>> >   unsigned char user_literals;
>> >
>> > +  /* Nonzero means warn when a string or character literal is followed
>> > by a
>> > +     ud-suffix which does not beging with an underscore.  */
>> > +  unsigned char warn_reserved_user_literals;
>> > +
>> >   /* Holds the name of the target (execution) character set.  */
>> >   const char *narrow_charset;
>> >
>> > @@ -906,7 +910,8 @@ enum {
>> >   CPP_W_CXX_OPERATOR_NAMES,
>> >   CPP_W_NORMALIZE,
>> >   CPP_W_INVALID_PCH,
>> > -  CPP_W_WARNING_DIRECTIVE
>> > +  CPP_W_WARNING_DIRECTIVE,
>> > +  CPP_W_RESERVED_USER_LITERALS
>> >  };
>> >
>> >  /* Output a diagnostic of some kind.  */
>> > diff --git a/libcpp/init.c b/libcpp/init.c
>> > index 5fa82ca..2688699 100644
>> > --- a/libcpp/init.c
>> > +++ b/libcpp/init.c
>> > @@ -175,6 +175,7 @@ cpp_create_reader (enum c_lang lang, hash_table
>> > *table,
>> >   CPP_OPTION (pfile, warn_variadic_macros) = 1;
>> >   CPP_OPTION (pfile, warn_builtin_macro_redefined) = 1;
>> >   CPP_OPTION (pfile, warn_normalize) = normalized_C;
>> > +  CPP_OPTION (pfile, warn_reserved_user_literals) = 1;
>> >
>> >   /* Default CPP arithmetic to something sensible for the host for the
>> >      benefit of dumb users like fix-header.  */
>> > diff --git a/libcpp/lex.c b/libcpp/lex.c
>> > index 0ad9660..ba8ed9e 100644
>> > --- a/libcpp/lex.c
>> > +++ b/libcpp/lex.c
>> > @@ -1491,14 +1491,30 @@ lex_raw_string (cpp_reader *pfile, cpp_token
>> > *token, const uchar *base,
>> >
>> >   if (CPP_OPTION (pfile, user_literals))
>> >     {
>> > +      /* According to C++11 [lex.ext]p10, a ud-suffix not starting with
>> > an
>> > +        underscore is ill-formed.  Since this breaks programs using
>> > macros
>> > +        from inttypes.h, we generate a warning and treat the ud-suffix
>> > as a
>> > +        separate preprocessing token.  This approach is under
>> > discussion by
>> > +        the standards committee, and has been adopted as a conforming
>> > +        extension by other front ends such as clang. */
>> > +      if (ISALPHA(*cur))
>> > +       {
>> > +         // Raise a warning, but do not consume subsequent tokens.
>> > +         if (CPP_OPTION (pfile, warn_reserved_user_literals))
>> > +           cpp_warning_with_line (pfile, CPP_W_RESERVED_USER_LITERALS,
>> > +                                  token->src_loc, 0,
>> > +                                  "invalid suffix on literal; C++11
>> > requires "
>> > +                                  "a space between literal and
>> > identifier");
>> > +       }
>> >       /* Grab user defined literal suffix.  */
>> > -      if (ISIDST (*cur))
>> > +      else if (*cur == '_')
>> >        {
>> >          type = cpp_userdef_string_add_type (type);
>> >          ++cur;
>> > +
>> > +         while (ISIDNUM (*cur))
>> > +           ++cur;
>> >        }
>> > -      while (ISIDNUM (*cur))
>> > -       ++cur;
>> >     }
>> >
>> >   pfile->buffer->cur = cur;
>> > @@ -1606,15 +1622,31 @@ lex_string (cpp_reader *pfile, cpp_token *token,
>> > const uchar *base)
>> >
>> >   if (CPP_OPTION (pfile, user_literals))
>> >     {
>> > +      /* According to C++11 [lex.ext]p10, a ud-suffix not starting with
>> > an
>> > +        underscore is ill-formed.  Since this breaks programs using
>> > macros
>> > +        from inttypes.h, we generate a warning and treat the ud-suffix
>> > as a
>> > +        separate preprocessing token.  This approach is under
>> > discussion by
>> > +        the standards committee, and has been adopted as a conforming
>> > +        extension by other front ends such as clang. */
>> > +      if (ISALPHA(*cur))
>> > +       {
>> > +         // Raise a warning, but do not consume subsequent tokens.
>> > +         if (CPP_OPTION (pfile, warn_reserved_user_literals))
>> > +           cpp_warning_with_line (pfile, CPP_W_RESERVED_USER_LITERALS,
>> > +                                  token->src_loc, 0,
>> > +                                  "invalid suffix on literal; C++11
>> > requires "
>> > +                                  "a space between literal and
>> > identifier");
>> > +       }
>> >       /* Grab user defined literal suffix.  */
>> > -      if (ISIDST (*cur))
>> > +      else if (*cur == '_')
>> >        {
>> >          type = cpp_userdef_char_add_type (type);
>> >          type = cpp_userdef_string_add_type (type);
>> >           ++cur;
>> > +
>> > +         while (ISIDNUM (*cur))
>> > +           ++cur;
>> >        }
>> > -      while (ISIDNUM (*cur))
>> > -       ++cur;
>> >     }
>> >
>> >   pfile->buffer->cur = cur;
>> >
>> > --
>> > This patch is available for review at
>> > http://codereview.appspot.com/6104051
>
>
Ollie Wild April 23, 2012, 3:32 a.m. UTC | #3
Okay, I'll send out a trunk patch for review now, too.

Ollie

On Sun, Apr 22, 2012 at 10:29 PM, Jeffrey Yasskin <jyasskin@google.com> wrote:
>
> Let's let the discussion _start_ before assuming it'll be protracted.
> ;) I don't think it'll kill us to run the next round of testing in
> C++98 mode. If the patch doesn't look close to acceptance in a couple
> days, I think it'll make sense to put the then-current version into
> the google branches while people are agreeing about what to do in the
> long run.
>
> On Sun, Apr 22, 2012 at 8:14 PM, Ollie Wild <aaw@google.com> wrote:
> > I'd like to get this into the google branches first because this is
> > blocking
> > testing.
> >
> > I fully expect this to result in some protracted discussion before it
> > can be
> > accepted into trunk.
> >
> > Ollie
> >
> >
> > On Sun, Apr 22, 2012 at 10:10 PM, Jeffrey Yasskin <jyasskin@google.com>
> > wrote:
> >>
> >> Could you try to get this into mainline instead of just the google
> >> branches? In http://gcc.gnu.org/PR52538, Jonathan sounded like he'd
> >> consider accepting it.
> >>
> >> On Sun, Apr 22, 2012 at 7:54 PM, Ollie Wild <aaw@google.com> wrote:
> >> > Add new option, -Wreserved-user-defined-literal.
> >> >
> >> > This option, which is enabled by default, causes the preprocessor to
> >> > warn
> >> > when a string or character literal is followed by a ud-suffix which
> >> > does
> >> > not begin with an underscore.  According to [lex.ext]p10, this is
> >> > ill-formed.
> >> >
> >> > Also modifies the preprocessor to treat such ill-formed suffixes as
> >> > separate
> >> > preprocessing tokens.  This is consistent with the Clang front end
> >> > (see
> >> > http://llvm.org/viewvc/llvm-project?view=rev&revision=152287), and
> >> > enables
> >> > backwards compatibility with code that uses formatting macros from
> >> > <inttypes.h>, as in the following code block:
> >> >
> >> >  int main() {
> >> >    int64_t i64 = 123;
> >> >    printf("My int64: %"PRId64"\n", i64);
> >> >  }
> >> >
> >> > Google ref b/6377711.
> >> >
> >> > 2012-04-22   Ollie Wild  <aaw@google.com>
> >> >
> >> >        * gcc/c-family/c-common.c:
> >> >        * gcc/c-family/c-opts.c (c_common_handle_option):
> >> >        * gcc/c-family/c.opt:
> >> >        * gcc/doc/invoke.texi (struct A):
> >> >        * gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C
> >> > (test):
> >> >        (main):
> >> >        * libcpp/include/cpplib.h (struct cpp_options):
> >> >        * libcpp/init.c (cpp_create_reader):
> >> >        * libcpp/lex.c (lex_raw_string):
> >> >        (lex_string):
> >> >
> >> > diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
> >> > index 1d19251..915dc25 100644
> >> > --- a/gcc/c-family/c-common.c
> >> > +++ b/gcc/c-family/c-common.c
> >> > @@ -8724,6 +8724,7 @@ static const struct reason_option_codes_t
> >> > option_codes[] = {
> >> >   {CPP_W_NORMALIZE,                    OPT_Wnormalized_},
> >> >   {CPP_W_INVALID_PCH,                  OPT_Winvalid_pch},
> >> >   {CPP_W_WARNING_DIRECTIVE,            OPT_Wcpp},
> >> > +  {CPP_W_RESERVED_USER_LITERALS,
> >> > OPT_Wreserved_user_defined_literal},
> >> >   {CPP_W_NONE,                         0}
> >> >  };
> >> >
> >> > diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
> >> > index 5118928..dab6ce5 100644
> >> > --- a/gcc/c-family/c-opts.c
> >> > +++ b/gcc/c-family/c-opts.c
> >> > @@ -509,6 +509,10 @@ c_common_handle_option (size_t scode, const char
> >> > *arg, int value,
> >> >          break;
> >> >        }
> >> >
> >> > +    case OPT_Wreserved_user_defined_literal:
> >> > +      cpp_opts->warn_reserved_user_literals = value;
> >> > +      break;
> >> > +
> >> >     case OPT_Wreturn_type:
> >> >       warn_return_type = value;
> >> >       break;
> >> > diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
> >> > index 40ff96c..f610513 100644
> >> > --- a/gcc/c-family/c.opt
> >> > +++ b/gcc/c-family/c.opt
> >> > @@ -589,6 +589,10 @@ Wreorder
> >> >  C++ ObjC++ Var(warn_reorder) Warning
> >> >  Warn when the compiler reorders code
> >> >
> >> > +Wreserved-user-defined-literal
> >> > +C++ ObjC++ Warning
> >> > +Warn when a string or character literal is followed by a ud-suffix
> >> > which does not begin with an underscore.
> >> > +
> >> >  Wreturn-type
> >> >  C ObjC C++ ObjC++ Var(warn_return_type) Warning
> >> >  Warn whenever a function's return type defaults to \"int\" (C), or
> >> > about inconsistent return types (C++)
> >> > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> >> > index 1b61e76..d425079 100644
> >> > --- a/gcc/doc/invoke.texi
> >> > +++ b/gcc/doc/invoke.texi
> >> > @@ -198,7 +198,7 @@ in the following sections.
> >> >  -fvisibility-ms-compat @gol
> >> >  -Wabi  -Wconversion-null  -Wctor-dtor-privacy @gol
> >> >  -Wdelete-non-virtual-dtor -Wnarrowing -Wnoexcept @gol
> >> > --Wnon-virtual-dtor  -Wreorder @gol
> >> > +-Wnon-virtual-dtor  -Wreorder -Wreserved-user-defined-literal @gol
> >> >  -Weffc++  -Wstrict-null-sentinel @gol
> >> >  -Wno-non-template-friend  -Wold-style-cast @gol
> >> >  -Woverloaded-virtual  -Wno-pmf-conversions @gol
> >> > @@ -2474,6 +2474,30 @@ struct A @{
> >> >  The compiler will rearrange the member initializers for @samp{i}
> >> >  and @samp{j} to match the declaration order of the members, emitting
> >> >  a warning to that effect.  This warning is enabled by
> >> > @option{-Wall}.
> >> > +
> >> > +@item -Wreserved-user-defined-literal @r{(C++ and Objective-C++
> >> > only)}
> >> > +@opindex Wreserved-user-defined-literal
> >> > +@opindex Wno-reserved-user-defined-literal
> >> > +Warn when a string or character literal is followed by a ud-suffix
> >> > which does
> >> > +not begin with an underscore.  As a conforming extension, GCC treats
> >> > such
> >> > +suffixes as separate preprocessing tokens in order to maintain
> >> > backwards
> >> > +compatibility with code that uses formatting macros from
> >> > @code{<inttypes.h>}.
> >> > +For example:
> >> > +
> >> > +@smallexample
> >> > +#define __STDC_FORMAT_MACROS
> >> > +#include <inttypes.h>
> >> > +#include <stdio.h>
> >> > +
> >> > +int main() @{
> >> > +  int64_t i64 = 123;
> >> > +  printf("My int64: %"PRId64"\n", i64);
> >> > +@}
> >> > +@end smallexample
> >> > +
> >> > +In this case, @code{PRId64} is treated as a separate preprocessing
> >> > token.
> >> > +
> >> > +This warning is enabled by default.
> >> >  @end table
> >> >
> >> >  The following @option{-W@dots{}} options are not affected by
> >> > @option{-Wall}.
> >> > diff --git
> >> > a/gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C
> >> > b/gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C
> >> > new file mode 100644
> >> > index 0000000..66de5c0
> >> > --- /dev/null
> >> > +++ b/gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C
> >> > @@ -0,0 +1,29 @@
> >> > +// { dg-do run }
> >> > +// { dg-options "-std=c++0x" }
> >> > +
> >> > +// Make sure -Wreserved-user-defined-literal is enabled by default
> >> > and
> >> > +// triggers as expected.
> >> > +
> >> > +#define BAR "bar"
> >> > +#define PLUS_ONE + 1
> >> > +
> >> > +#include <cstdint>
> >> > +#include <cassert>
> >> > +
> >> > +
> >> > +void
> >> > +test()
> >> > +{
> >> > +  char c = '3'PLUS_ONE;          // { dg-warning "invalid suffix on
> >> > literal" }
> >> > +  char s[] = "foo"BAR;   // { dg-warning "invalid suffix on literal"
> >> > }
> >> > +
> >> > +  assert(c == '4');
> >> > +  assert(s[3] != '\0');
> >> > +  assert(s[3] == 'b');
> >> > +}
> >> > +
> >> > +int
> >> > +main()
> >> > +{
> >> > +  test();
> >> > +}
> >> > diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
> >> > index bf59d01..84cacb0 100644
> >> > --- a/libcpp/include/cpplib.h
> >> > +++ b/libcpp/include/cpplib.h
> >> > @@ -427,6 +427,10 @@ struct cpp_options
> >> >   /* Nonzero for C++ 2011 Standard user-defnied literals.  */
> >> >   unsigned char user_literals;
> >> >
> >> > +  /* Nonzero means warn when a string or character literal is
> >> > followed
> >> > by a
> >> > +     ud-suffix which does not beging with an underscore.  */
> >> > +  unsigned char warn_reserved_user_literals;
> >> > +
> >> >   /* Holds the name of the target (execution) character set.  */
> >> >   const char *narrow_charset;
> >> >
> >> > @@ -906,7 +910,8 @@ enum {
> >> >   CPP_W_CXX_OPERATOR_NAMES,
> >> >   CPP_W_NORMALIZE,
> >> >   CPP_W_INVALID_PCH,
> >> > -  CPP_W_WARNING_DIRECTIVE
> >> > +  CPP_W_WARNING_DIRECTIVE,
> >> > +  CPP_W_RESERVED_USER_LITERALS
> >> >  };
> >> >
> >> >  /* Output a diagnostic of some kind.  */
> >> > diff --git a/libcpp/init.c b/libcpp/init.c
> >> > index 5fa82ca..2688699 100644
> >> > --- a/libcpp/init.c
> >> > +++ b/libcpp/init.c
> >> > @@ -175,6 +175,7 @@ cpp_create_reader (enum c_lang lang, hash_table
> >> > *table,
> >> >   CPP_OPTION (pfile, warn_variadic_macros) = 1;
> >> >   CPP_OPTION (pfile, warn_builtin_macro_redefined) = 1;
> >> >   CPP_OPTION (pfile, warn_normalize) = normalized_C;
> >> > +  CPP_OPTION (pfile, warn_reserved_user_literals) = 1;
> >> >
> >> >   /* Default CPP arithmetic to something sensible for the host for
> >> > the
> >> >      benefit of dumb users like fix-header.  */
> >> > diff --git a/libcpp/lex.c b/libcpp/lex.c
> >> > index 0ad9660..ba8ed9e 100644
> >> > --- a/libcpp/lex.c
> >> > +++ b/libcpp/lex.c
> >> > @@ -1491,14 +1491,30 @@ lex_raw_string (cpp_reader *pfile, cpp_token
> >> > *token, const uchar *base,
> >> >
> >> >   if (CPP_OPTION (pfile, user_literals))
> >> >     {
> >> > +      /* According to C++11 [lex.ext]p10, a ud-suffix not starting
> >> > with
> >> > an
> >> > +        underscore is ill-formed.  Since this breaks programs using
> >> > macros
> >> > +        from inttypes.h, we generate a warning and treat the
> >> > ud-suffix
> >> > as a
> >> > +        separate preprocessing token.  This approach is under
> >> > discussion by
> >> > +        the standards committee, and has been adopted as a
> >> > conforming
> >> > +        extension by other front ends such as clang. */
> >> > +      if (ISALPHA(*cur))
> >> > +       {
> >> > +         // Raise a warning, but do not consume subsequent tokens.
> >> > +         if (CPP_OPTION (pfile, warn_reserved_user_literals))
> >> > +           cpp_warning_with_line (pfile,
> >> > CPP_W_RESERVED_USER_LITERALS,
> >> > +                                  token->src_loc, 0,
> >> > +                                  "invalid suffix on literal; C++11
> >> > requires "
> >> > +                                  "a space between literal and
> >> > identifier");
> >> > +       }
> >> >       /* Grab user defined literal suffix.  */
> >> > -      if (ISIDST (*cur))
> >> > +      else if (*cur == '_')
> >> >        {
> >> >          type = cpp_userdef_string_add_type (type);
> >> >          ++cur;
> >> > +
> >> > +         while (ISIDNUM (*cur))
> >> > +           ++cur;
> >> >        }
> >> > -      while (ISIDNUM (*cur))
> >> > -       ++cur;
> >> >     }
> >> >
> >> >   pfile->buffer->cur = cur;
> >> > @@ -1606,15 +1622,31 @@ lex_string (cpp_reader *pfile, cpp_token
> >> > *token,
> >> > const uchar *base)
> >> >
> >> >   if (CPP_OPTION (pfile, user_literals))
> >> >     {
> >> > +      /* According to C++11 [lex.ext]p10, a ud-suffix not starting
> >> > with
> >> > an
> >> > +        underscore is ill-formed.  Since this breaks programs using
> >> > macros
> >> > +        from inttypes.h, we generate a warning and treat the
> >> > ud-suffix
> >> > as a
> >> > +        separate preprocessing token.  This approach is under
> >> > discussion by
> >> > +        the standards committee, and has been adopted as a
> >> > conforming
> >> > +        extension by other front ends such as clang. */
> >> > +      if (ISALPHA(*cur))
> >> > +       {
> >> > +         // Raise a warning, but do not consume subsequent tokens.
> >> > +         if (CPP_OPTION (pfile, warn_reserved_user_literals))
> >> > +           cpp_warning_with_line (pfile,
> >> > CPP_W_RESERVED_USER_LITERALS,
> >> > +                                  token->src_loc, 0,
> >> > +                                  "invalid suffix on literal; C++11
> >> > requires "
> >> > +                                  "a space between literal and
> >> > identifier");
> >> > +       }
> >> >       /* Grab user defined literal suffix.  */
> >> > -      if (ISIDST (*cur))
> >> > +      else if (*cur == '_')
> >> >        {
> >> >          type = cpp_userdef_char_add_type (type);
> >> >          type = cpp_userdef_string_add_type (type);
> >> >           ++cur;
> >> > +
> >> > +         while (ISIDNUM (*cur))
> >> > +           ++cur;
> >> >        }
> >> > -      while (ISIDNUM (*cur))
> >> > -       ++cur;
> >> >     }
> >> >
> >> >   pfile->buffer->cur = cur;
> >> >
> >> > --
> >> > This patch is available for review at
> >> > http://codereview.appspot.com/6104051
> >
> >
Jeffrey Yasskin April 23, 2012, 3:35 a.m. UTC | #4
Thanks!

On Sun, Apr 22, 2012 at 8:32 PM, Ollie Wild <aaw@google.com> wrote:
> Okay, I'll send out a trunk patch for review now, too.
>
> Ollie
>
> On Sun, Apr 22, 2012 at 10:29 PM, Jeffrey Yasskin <jyasskin@google.com> wrote:
>>
>> Let's let the discussion _start_ before assuming it'll be protracted.
>> ;) I don't think it'll kill us to run the next round of testing in
>> C++98 mode. If the patch doesn't look close to acceptance in a couple
>> days, I think it'll make sense to put the then-current version into
>> the google branches while people are agreeing about what to do in the
>> long run.
>>
>> On Sun, Apr 22, 2012 at 8:14 PM, Ollie Wild <aaw@google.com> wrote:
>> > I'd like to get this into the google branches first because this is
>> > blocking
>> > testing.
>> >
>> > I fully expect this to result in some protracted discussion before it
>> > can be
>> > accepted into trunk.
>> >
>> > Ollie
>> >
>> >
>> > On Sun, Apr 22, 2012 at 10:10 PM, Jeffrey Yasskin <jyasskin@google.com>
>> > wrote:
>> >>
>> >> Could you try to get this into mainline instead of just the google
>> >> branches? In http://gcc.gnu.org/PR52538, Jonathan sounded like he'd
>> >> consider accepting it.
>> >>
>> >> On Sun, Apr 22, 2012 at 7:54 PM, Ollie Wild <aaw@google.com> wrote:
>> >> > Add new option, -Wreserved-user-defined-literal.
>> >> >
>> >> > This option, which is enabled by default, causes the preprocessor to
>> >> > warn
>> >> > when a string or character literal is followed by a ud-suffix which
>> >> > does
>> >> > not begin with an underscore.  According to [lex.ext]p10, this is
>> >> > ill-formed.
>> >> >
>> >> > Also modifies the preprocessor to treat such ill-formed suffixes as
>> >> > separate
>> >> > preprocessing tokens.  This is consistent with the Clang front end
>> >> > (see
>> >> > http://llvm.org/viewvc/llvm-project?view=rev&revision=152287), and
>> >> > enables
>> >> > backwards compatibility with code that uses formatting macros from
>> >> > <inttypes.h>, as in the following code block:
>> >> >
>> >> >  int main() {
>> >> >    int64_t i64 = 123;
>> >> >    printf("My int64: %"PRId64"\n", i64);
>> >> >  }
>> >> >
>> >> > Google ref b/6377711.
>> >> >
>> >> > 2012-04-22   Ollie Wild  <aaw@google.com>
>> >> >
>> >> >        * gcc/c-family/c-common.c:
>> >> >        * gcc/c-family/c-opts.c (c_common_handle_option):
>> >> >        * gcc/c-family/c.opt:
>> >> >        * gcc/doc/invoke.texi (struct A):
>> >> >        * gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C
>> >> > (test):
>> >> >        (main):
>> >> >        * libcpp/include/cpplib.h (struct cpp_options):
>> >> >        * libcpp/init.c (cpp_create_reader):
>> >> >        * libcpp/lex.c (lex_raw_string):
>> >> >        (lex_string):
>> >> >
>> >> > diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
>> >> > index 1d19251..915dc25 100644
>> >> > --- a/gcc/c-family/c-common.c
>> >> > +++ b/gcc/c-family/c-common.c
>> >> > @@ -8724,6 +8724,7 @@ static const struct reason_option_codes_t
>> >> > option_codes[] = {
>> >> >   {CPP_W_NORMALIZE,                    OPT_Wnormalized_},
>> >> >   {CPP_W_INVALID_PCH,                  OPT_Winvalid_pch},
>> >> >   {CPP_W_WARNING_DIRECTIVE,            OPT_Wcpp},
>> >> > +  {CPP_W_RESERVED_USER_LITERALS,
>> >> > OPT_Wreserved_user_defined_literal},
>> >> >   {CPP_W_NONE,                         0}
>> >> >  };
>> >> >
>> >> > diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
>> >> > index 5118928..dab6ce5 100644
>> >> > --- a/gcc/c-family/c-opts.c
>> >> > +++ b/gcc/c-family/c-opts.c
>> >> > @@ -509,6 +509,10 @@ c_common_handle_option (size_t scode, const char
>> >> > *arg, int value,
>> >> >          break;
>> >> >        }
>> >> >
>> >> > +    case OPT_Wreserved_user_defined_literal:
>> >> > +      cpp_opts->warn_reserved_user_literals = value;
>> >> > +      break;
>> >> > +
>> >> >     case OPT_Wreturn_type:
>> >> >       warn_return_type = value;
>> >> >       break;
>> >> > diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
>> >> > index 40ff96c..f610513 100644
>> >> > --- a/gcc/c-family/c.opt
>> >> > +++ b/gcc/c-family/c.opt
>> >> > @@ -589,6 +589,10 @@ Wreorder
>> >> >  C++ ObjC++ Var(warn_reorder) Warning
>> >> >  Warn when the compiler reorders code
>> >> >
>> >> > +Wreserved-user-defined-literal
>> >> > +C++ ObjC++ Warning
>> >> > +Warn when a string or character literal is followed by a ud-suffix
>> >> > which does not begin with an underscore.
>> >> > +
>> >> >  Wreturn-type
>> >> >  C ObjC C++ ObjC++ Var(warn_return_type) Warning
>> >> >  Warn whenever a function's return type defaults to \"int\" (C), or
>> >> > about inconsistent return types (C++)
>> >> > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
>> >> > index 1b61e76..d425079 100644
>> >> > --- a/gcc/doc/invoke.texi
>> >> > +++ b/gcc/doc/invoke.texi
>> >> > @@ -198,7 +198,7 @@ in the following sections.
>> >> >  -fvisibility-ms-compat @gol
>> >> >  -Wabi  -Wconversion-null  -Wctor-dtor-privacy @gol
>> >> >  -Wdelete-non-virtual-dtor -Wnarrowing -Wnoexcept @gol
>> >> > --Wnon-virtual-dtor  -Wreorder @gol
>> >> > +-Wnon-virtual-dtor  -Wreorder -Wreserved-user-defined-literal @gol
>> >> >  -Weffc++  -Wstrict-null-sentinel @gol
>> >> >  -Wno-non-template-friend  -Wold-style-cast @gol
>> >> >  -Woverloaded-virtual  -Wno-pmf-conversions @gol
>> >> > @@ -2474,6 +2474,30 @@ struct A @{
>> >> >  The compiler will rearrange the member initializers for @samp{i}
>> >> >  and @samp{j} to match the declaration order of the members, emitting
>> >> >  a warning to that effect.  This warning is enabled by
>> >> > @option{-Wall}.
>> >> > +
>> >> > +@item -Wreserved-user-defined-literal @r{(C++ and Objective-C++
>> >> > only)}
>> >> > +@opindex Wreserved-user-defined-literal
>> >> > +@opindex Wno-reserved-user-defined-literal
>> >> > +Warn when a string or character literal is followed by a ud-suffix
>> >> > which does
>> >> > +not begin with an underscore.  As a conforming extension, GCC treats
>> >> > such
>> >> > +suffixes as separate preprocessing tokens in order to maintain
>> >> > backwards
>> >> > +compatibility with code that uses formatting macros from
>> >> > @code{<inttypes.h>}.
>> >> > +For example:
>> >> > +
>> >> > +@smallexample
>> >> > +#define __STDC_FORMAT_MACROS
>> >> > +#include <inttypes.h>
>> >> > +#include <stdio.h>
>> >> > +
>> >> > +int main() @{
>> >> > +  int64_t i64 = 123;
>> >> > +  printf("My int64: %"PRId64"\n", i64);
>> >> > +@}
>> >> > +@end smallexample
>> >> > +
>> >> > +In this case, @code{PRId64} is treated as a separate preprocessing
>> >> > token.
>> >> > +
>> >> > +This warning is enabled by default.
>> >> >  @end table
>> >> >
>> >> >  The following @option{-W@dots{}} options are not affected by
>> >> > @option{-Wall}.
>> >> > diff --git
>> >> > a/gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C
>> >> > b/gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C
>> >> > new file mode 100644
>> >> > index 0000000..66de5c0
>> >> > --- /dev/null
>> >> > +++ b/gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C
>> >> > @@ -0,0 +1,29 @@
>> >> > +// { dg-do run }
>> >> > +// { dg-options "-std=c++0x" }
>> >> > +
>> >> > +// Make sure -Wreserved-user-defined-literal is enabled by default
>> >> > and
>> >> > +// triggers as expected.
>> >> > +
>> >> > +#define BAR "bar"
>> >> > +#define PLUS_ONE + 1
>> >> > +
>> >> > +#include <cstdint>
>> >> > +#include <cassert>
>> >> > +
>> >> > +
>> >> > +void
>> >> > +test()
>> >> > +{
>> >> > +  char c = '3'PLUS_ONE;          // { dg-warning "invalid suffix on
>> >> > literal" }
>> >> > +  char s[] = "foo"BAR;   // { dg-warning "invalid suffix on literal"
>> >> > }
>> >> > +
>> >> > +  assert(c == '4');
>> >> > +  assert(s[3] != '\0');
>> >> > +  assert(s[3] == 'b');
>> >> > +}
>> >> > +
>> >> > +int
>> >> > +main()
>> >> > +{
>> >> > +  test();
>> >> > +}
>> >> > diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
>> >> > index bf59d01..84cacb0 100644
>> >> > --- a/libcpp/include/cpplib.h
>> >> > +++ b/libcpp/include/cpplib.h
>> >> > @@ -427,6 +427,10 @@ struct cpp_options
>> >> >   /* Nonzero for C++ 2011 Standard user-defnied literals.  */
>> >> >   unsigned char user_literals;
>> >> >
>> >> > +  /* Nonzero means warn when a string or character literal is
>> >> > followed
>> >> > by a
>> >> > +     ud-suffix which does not beging with an underscore.  */
>> >> > +  unsigned char warn_reserved_user_literals;
>> >> > +
>> >> >   /* Holds the name of the target (execution) character set.  */
>> >> >   const char *narrow_charset;
>> >> >
>> >> > @@ -906,7 +910,8 @@ enum {
>> >> >   CPP_W_CXX_OPERATOR_NAMES,
>> >> >   CPP_W_NORMALIZE,
>> >> >   CPP_W_INVALID_PCH,
>> >> > -  CPP_W_WARNING_DIRECTIVE
>> >> > +  CPP_W_WARNING_DIRECTIVE,
>> >> > +  CPP_W_RESERVED_USER_LITERALS
>> >> >  };
>> >> >
>> >> >  /* Output a diagnostic of some kind.  */
>> >> > diff --git a/libcpp/init.c b/libcpp/init.c
>> >> > index 5fa82ca..2688699 100644
>> >> > --- a/libcpp/init.c
>> >> > +++ b/libcpp/init.c
>> >> > @@ -175,6 +175,7 @@ cpp_create_reader (enum c_lang lang, hash_table
>> >> > *table,
>> >> >   CPP_OPTION (pfile, warn_variadic_macros) = 1;
>> >> >   CPP_OPTION (pfile, warn_builtin_macro_redefined) = 1;
>> >> >   CPP_OPTION (pfile, warn_normalize) = normalized_C;
>> >> > +  CPP_OPTION (pfile, warn_reserved_user_literals) = 1;
>> >> >
>> >> >   /* Default CPP arithmetic to something sensible for the host for
>> >> > the
>> >> >      benefit of dumb users like fix-header.  */
>> >> > diff --git a/libcpp/lex.c b/libcpp/lex.c
>> >> > index 0ad9660..ba8ed9e 100644
>> >> > --- a/libcpp/lex.c
>> >> > +++ b/libcpp/lex.c
>> >> > @@ -1491,14 +1491,30 @@ lex_raw_string (cpp_reader *pfile, cpp_token
>> >> > *token, const uchar *base,
>> >> >
>> >> >   if (CPP_OPTION (pfile, user_literals))
>> >> >     {
>> >> > +      /* According to C++11 [lex.ext]p10, a ud-suffix not starting
>> >> > with
>> >> > an
>> >> > +        underscore is ill-formed.  Since this breaks programs using
>> >> > macros
>> >> > +        from inttypes.h, we generate a warning and treat the
>> >> > ud-suffix
>> >> > as a
>> >> > +        separate preprocessing token.  This approach is under
>> >> > discussion by
>> >> > +        the standards committee, and has been adopted as a
>> >> > conforming
>> >> > +        extension by other front ends such as clang. */
>> >> > +      if (ISALPHA(*cur))
>> >> > +       {
>> >> > +         // Raise a warning, but do not consume subsequent tokens.
>> >> > +         if (CPP_OPTION (pfile, warn_reserved_user_literals))
>> >> > +           cpp_warning_with_line (pfile,
>> >> > CPP_W_RESERVED_USER_LITERALS,
>> >> > +                                  token->src_loc, 0,
>> >> > +                                  "invalid suffix on literal; C++11
>> >> > requires "
>> >> > +                                  "a space between literal and
>> >> > identifier");
>> >> > +       }
>> >> >       /* Grab user defined literal suffix.  */
>> >> > -      if (ISIDST (*cur))
>> >> > +      else if (*cur == '_')
>> >> >        {
>> >> >          type = cpp_userdef_string_add_type (type);
>> >> >          ++cur;
>> >> > +
>> >> > +         while (ISIDNUM (*cur))
>> >> > +           ++cur;
>> >> >        }
>> >> > -      while (ISIDNUM (*cur))
>> >> > -       ++cur;
>> >> >     }
>> >> >
>> >> >   pfile->buffer->cur = cur;
>> >> > @@ -1606,15 +1622,31 @@ lex_string (cpp_reader *pfile, cpp_token
>> >> > *token,
>> >> > const uchar *base)
>> >> >
>> >> >   if (CPP_OPTION (pfile, user_literals))
>> >> >     {
>> >> > +      /* According to C++11 [lex.ext]p10, a ud-suffix not starting
>> >> > with
>> >> > an
>> >> > +        underscore is ill-formed.  Since this breaks programs using
>> >> > macros
>> >> > +        from inttypes.h, we generate a warning and treat the
>> >> > ud-suffix
>> >> > as a
>> >> > +        separate preprocessing token.  This approach is under
>> >> > discussion by
>> >> > +        the standards committee, and has been adopted as a
>> >> > conforming
>> >> > +        extension by other front ends such as clang. */
>> >> > +      if (ISALPHA(*cur))
>> >> > +       {
>> >> > +         // Raise a warning, but do not consume subsequent tokens.
>> >> > +         if (CPP_OPTION (pfile, warn_reserved_user_literals))
>> >> > +           cpp_warning_with_line (pfile,
>> >> > CPP_W_RESERVED_USER_LITERALS,
>> >> > +                                  token->src_loc, 0,
>> >> > +                                  "invalid suffix on literal; C++11
>> >> > requires "
>> >> > +                                  "a space between literal and
>> >> > identifier");
>> >> > +       }
>> >> >       /* Grab user defined literal suffix.  */
>> >> > -      if (ISIDST (*cur))
>> >> > +      else if (*cur == '_')
>> >> >        {
>> >> >          type = cpp_userdef_char_add_type (type);
>> >> >          type = cpp_userdef_string_add_type (type);
>> >> >           ++cur;
>> >> > +
>> >> > +         while (ISIDNUM (*cur))
>> >> > +           ++cur;
>> >> >        }
>> >> > -      while (ISIDNUM (*cur))
>> >> > -       ++cur;
>> >> >     }
>> >> >
>> >> >   pfile->buffer->cur = cur;
>> >> >
>> >> > --
>> >> > This patch is available for review at
>> >> > http://codereview.appspot.com/6104051
>> >
>> >
Jonathan Wakely April 23, 2012, 8:11 a.m. UTC | #5
On 23 April 2012 04:10, Jeffrey Yasskin wrote:
> Could you try to get this into mainline instead of just the google
> branches? In http://gcc.gnu.org/PR52538, Jonathan sounded like he'd
> consider accepting it.

I think it's useful, but I can't approve front-end patches.
Diego Novillo April 23, 2012, 7:39 p.m. UTC | #6
On Sun, Apr 22, 2012 at 22:54, Ollie Wild <aaw@google.com> wrote:
> Add new option, -Wreserved-user-defined-literal.
>
> This option, which is enabled by default, causes the preprocessor to warn
> when a string or character literal is followed by a ud-suffix which does
> not begin with an underscore.  According to [lex.ext]p10, this is
> ill-formed.
>
> Also modifies the preprocessor to treat such ill-formed suffixes as separate
> preprocessing tokens.  This is consistent with the Clang front end (see
> http://llvm.org/viewvc/llvm-project?view=rev&revision=152287), and enables
> backwards compatibility with code that uses formatting macros from
> <inttypes.h>, as in the following code block:
>
>  int main() {
>    int64_t i64 = 123;
>    printf("My int64: %"PRId64"\n", i64);
>  }
>
> Google ref b/6377711.
>
> 2012-04-22   Ollie Wild  <aaw@google.com>
>
>        * gcc/c-family/c-common.c:
>        * gcc/c-family/c-opts.c (c_common_handle_option):
>        * gcc/c-family/c.opt:
>        * gcc/doc/invoke.texi (struct A):
>        * gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C (test):
>        (main):
>        * libcpp/include/cpplib.h (struct cpp_options):
>        * libcpp/init.c (cpp_create_reader):
>        * libcpp/lex.c (lex_raw_string):
>        (lex_string):

This would be for google/main, right?  It does not seem fit for
google/integration.


Diego.
Ollie Wild April 23, 2012, 7:53 p.m. UTC | #7
On Mon, Apr 23, 2012 at 2:39 PM, Diego Novillo <dnovillo@google.com> wrote:
>
> This would be for google/main, right?  It does not seem fit for
> google/integration.

It needs to be in google/integration because it blocks nearly all of
our code from compiling with -std=gnu++11, and there's no way to turn
this behavior off.  Without it, we can't do any google/integration
testing.

That said, I also sent out a patch for trunk (see my other gcc-patches
thread), and there have been some changes as a result of that.  I'm
currently waiting on a response from Tom Tromey regarding the libcpp
changes.

Ollie
Diego Novillo April 23, 2012, 8:51 p.m. UTC | #8
On 4/23/12 3:53 PM, Ollie Wild wrote:
> On Mon, Apr 23, 2012 at 2:39 PM, Diego Novillo<dnovillo@google.com>  wrote:
>>
>> This would be for google/main, right?  It does not seem fit for
>> google/integration.
>
> It needs to be in google/integration because it blocks nearly all of
> our code from compiling with -std=gnu++11, and there's no way to turn
> this behavior off.  Without it, we can't do any google/integration
> testing.

Ah, OK.

> That said, I also sent out a patch for trunk (see my other gcc-patches
> thread), and there have been some changes as a result of that.  I'm
> currently waiting on a response from Tom Tromey regarding the libcpp
> changes.

Great, thanks.  Patch is OK with the ChangeLog entries filled-in.  Not 
sure if you'd rather wait for the trunk commit to go in, though.  It may 
be better to put this version in google/integration and deal with the 
merge later.


Diego.
Ollie Wild April 23, 2012, 10:16 p.m. UTC | #9
On Mon, Apr 23, 2012 at 3:51 PM, Diego Novillo <dnovillo@google.com> wrote:
>
> Great, thanks.  Patch is OK with the ChangeLog entries filled-in.  Not sure if you'd rather wait for the trunk commit to go in, though.  It may be better to put this version in google/integration and deal with the merge later.

Thanks, Diego.

If the trunk change isn't approved today, I'll probably go ahead and
check in *that* patch to google/integration.  The main difference
relative to this is that the option name has been changed to
-Wliteral-suffix.  *That* part has been approved, so hopefully any
additional adjustments will be minor.

Ollie
Diego Novillo April 24, 2012, 1:12 p.m. UTC | #10
On Mon, Apr 23, 2012 at 18:16, Ollie Wild <aaw@google.com> wrote:

> If the trunk change isn't approved today, I'll probably go ahead and
> check in *that* patch to google/integration.  The main difference
> relative to this is that the option name has been changed to
> -Wliteral-suffix.  *That* part has been approved, so hopefully any
> additional adjustments will be minor.

Sounds good.  Thanks.


Diego.
diff mbox

Patch

diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 1d19251..915dc25 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -8724,6 +8724,7 @@  static const struct reason_option_codes_t option_codes[] = {
   {CPP_W_NORMALIZE,			OPT_Wnormalized_},
   {CPP_W_INVALID_PCH,			OPT_Winvalid_pch},
   {CPP_W_WARNING_DIRECTIVE,		OPT_Wcpp},
+  {CPP_W_RESERVED_USER_LITERALS,	OPT_Wreserved_user_defined_literal},
   {CPP_W_NONE,				0}
 };
 
diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
index 5118928..dab6ce5 100644
--- a/gcc/c-family/c-opts.c
+++ b/gcc/c-family/c-opts.c
@@ -509,6 +509,10 @@  c_common_handle_option (size_t scode, const char *arg, int value,
 	  break;
 	}
 
+    case OPT_Wreserved_user_defined_literal:
+      cpp_opts->warn_reserved_user_literals = value;
+      break;
+
     case OPT_Wreturn_type:
       warn_return_type = value;
       break;
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 40ff96c..f610513 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -589,6 +589,10 @@  Wreorder
 C++ ObjC++ Var(warn_reorder) Warning
 Warn when the compiler reorders code
 
+Wreserved-user-defined-literal
+C++ ObjC++ Warning
+Warn when a string or character literal is followed by a ud-suffix which does not begin with an underscore.
+
 Wreturn-type
 C ObjC C++ ObjC++ Var(warn_return_type) Warning
 Warn whenever a function's return type defaults to \"int\" (C), or about inconsistent return types (C++)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 1b61e76..d425079 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -198,7 +198,7 @@  in the following sections.
 -fvisibility-ms-compat @gol
 -Wabi  -Wconversion-null  -Wctor-dtor-privacy @gol
 -Wdelete-non-virtual-dtor -Wnarrowing -Wnoexcept @gol
--Wnon-virtual-dtor  -Wreorder @gol
+-Wnon-virtual-dtor  -Wreorder -Wreserved-user-defined-literal @gol
 -Weffc++  -Wstrict-null-sentinel @gol
 -Wno-non-template-friend  -Wold-style-cast @gol
 -Woverloaded-virtual  -Wno-pmf-conversions @gol
@@ -2474,6 +2474,30 @@  struct A @{
 The compiler will rearrange the member initializers for @samp{i}
 and @samp{j} to match the declaration order of the members, emitting
 a warning to that effect.  This warning is enabled by @option{-Wall}.
+
+@item -Wreserved-user-defined-literal @r{(C++ and Objective-C++ only)}
+@opindex Wreserved-user-defined-literal
+@opindex Wno-reserved-user-defined-literal
+Warn when a string or character literal is followed by a ud-suffix which does
+not begin with an underscore.  As a conforming extension, GCC treats such
+suffixes as separate preprocessing tokens in order to maintain backwards
+compatibility with code that uses formatting macros from @code{<inttypes.h>}.
+For example:
+
+@smallexample
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+#include <stdio.h>
+
+int main() @{
+  int64_t i64 = 123;
+  printf("My int64: %"PRId64"\n", i64);
+@}
+@end smallexample
+
+In this case, @code{PRId64} is treated as a separate preprocessing token.
+
+This warning is enabled by default.
 @end table
 
 The following @option{-W@dots{}} options are not affected by @option{-Wall}.
diff --git a/gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C b/gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C
new file mode 100644
index 0000000..66de5c0
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C
@@ -0,0 +1,29 @@ 
+// { dg-do run }
+// { dg-options "-std=c++0x" }
+
+// Make sure -Wreserved-user-defined-literal is enabled by default and
+// triggers as expected.
+
+#define BAR "bar"
+#define PLUS_ONE + 1
+
+#include <cstdint>
+#include <cassert>
+
+
+void
+test()
+{
+  char c = '3'PLUS_ONE;	  // { dg-warning "invalid suffix on literal" }
+  char s[] = "foo"BAR;	  // { dg-warning "invalid suffix on literal" }
+
+  assert(c == '4');
+  assert(s[3] != '\0');
+  assert(s[3] == 'b');
+}
+
+int
+main()
+{
+  test();
+}
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index bf59d01..84cacb0 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -427,6 +427,10 @@  struct cpp_options
   /* Nonzero for C++ 2011 Standard user-defnied literals.  */
   unsigned char user_literals;
 
+  /* Nonzero means warn when a string or character literal is followed by a
+     ud-suffix which does not beging with an underscore.  */
+  unsigned char warn_reserved_user_literals;
+
   /* Holds the name of the target (execution) character set.  */
   const char *narrow_charset;
 
@@ -906,7 +910,8 @@  enum {
   CPP_W_CXX_OPERATOR_NAMES,
   CPP_W_NORMALIZE,
   CPP_W_INVALID_PCH,
-  CPP_W_WARNING_DIRECTIVE
+  CPP_W_WARNING_DIRECTIVE,
+  CPP_W_RESERVED_USER_LITERALS
 };
 
 /* Output a diagnostic of some kind.  */
diff --git a/libcpp/init.c b/libcpp/init.c
index 5fa82ca..2688699 100644
--- a/libcpp/init.c
+++ b/libcpp/init.c
@@ -175,6 +175,7 @@  cpp_create_reader (enum c_lang lang, hash_table *table,
   CPP_OPTION (pfile, warn_variadic_macros) = 1;
   CPP_OPTION (pfile, warn_builtin_macro_redefined) = 1;
   CPP_OPTION (pfile, warn_normalize) = normalized_C;
+  CPP_OPTION (pfile, warn_reserved_user_literals) = 1;
 
   /* Default CPP arithmetic to something sensible for the host for the
      benefit of dumb users like fix-header.  */
diff --git a/libcpp/lex.c b/libcpp/lex.c
index 0ad9660..ba8ed9e 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1491,14 +1491,30 @@  lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
 
   if (CPP_OPTION (pfile, user_literals))
     {
+      /* According to C++11 [lex.ext]p10, a ud-suffix not starting with an
+	 underscore is ill-formed.  Since this breaks programs using macros
+	 from inttypes.h, we generate a warning and treat the ud-suffix as a
+	 separate preprocessing token.  This approach is under discussion by
+	 the standards committee, and has been adopted as a conforming
+	 extension by other front ends such as clang. */
+      if (ISALPHA(*cur))
+	{
+	  // Raise a warning, but do not consume subsequent tokens.
+	  if (CPP_OPTION (pfile, warn_reserved_user_literals))
+	    cpp_warning_with_line (pfile, CPP_W_RESERVED_USER_LITERALS,
+				   token->src_loc, 0,
+				   "invalid suffix on literal; C++11 requires "
+				   "a space between literal and identifier");
+	}
       /* Grab user defined literal suffix.  */
-      if (ISIDST (*cur))
+      else if (*cur == '_')
 	{
 	  type = cpp_userdef_string_add_type (type);
 	  ++cur;
+
+	  while (ISIDNUM (*cur))
+	    ++cur;
 	}
-      while (ISIDNUM (*cur))
-	++cur;
     }
 
   pfile->buffer->cur = cur;
@@ -1606,15 +1622,31 @@  lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
 
   if (CPP_OPTION (pfile, user_literals))
     {
+      /* According to C++11 [lex.ext]p10, a ud-suffix not starting with an
+	 underscore is ill-formed.  Since this breaks programs using macros
+	 from inttypes.h, we generate a warning and treat the ud-suffix as a
+	 separate preprocessing token.  This approach is under discussion by
+	 the standards committee, and has been adopted as a conforming
+	 extension by other front ends such as clang. */
+      if (ISALPHA(*cur))
+	{
+	  // Raise a warning, but do not consume subsequent tokens.
+	  if (CPP_OPTION (pfile, warn_reserved_user_literals))
+	    cpp_warning_with_line (pfile, CPP_W_RESERVED_USER_LITERALS,
+				   token->src_loc, 0,
+				   "invalid suffix on literal; C++11 requires "
+				   "a space between literal and identifier");
+	}
       /* Grab user defined literal suffix.  */
-      if (ISIDST (*cur))
+      else if (*cur == '_')
 	{
 	  type = cpp_userdef_char_add_type (type);
 	  type = cpp_userdef_string_add_type (type);
           ++cur;
+
+	  while (ISIDNUM (*cur))
+	    ++cur;
 	}
-      while (ISIDNUM (*cur))
-	++cur;
     }
 
   pfile->buffer->cur = cur;