[1/12] implement -Wformat-diag to detect quoting and spelling issues in GCC diagnostics

Message ID	dd9370c0-9954-0ffb-b72b-1d66dd8bab94@gmail.com
State	New
Headers	show Return-Path: <gcc-patches-return-500688-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :subject:to:message-id:date:mime-version:content-type; q=dns; s= default; b=TSLKjx+tcu6ThsYuoTN+mFizPmh8ScAtRFvq/bDg6coJ4uTF6Y6jZ ZQYESEO/ejdDbQCLw22m1Z0G1bs93D26QxWeB+V1whg3YcBa4PyFyey5jIxyvl69 aVkQWuG4HahVrS/JSfbJXjdKh+urj9yNPCMzGaUxCRLiLViyOMkI4c= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org From: Martin Sebor <msebor@gmail.com> Subject: [PATCH 1/12] implement -Wformat-diag to detect quoting and spelling issues in GCC diagnostics To: gcc-patches <gcc-patches@gcc.gnu.org> Message-ID: <dd9370c0-9954-0ffb-b72b-1d66dd8bab94@gmail.com> Date: Tue, 14 May 2019 15:31:50 -0600 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.3.1 MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------E85FF282E52C06466E2CDF71"
Series	detect quoting and punctuation problems in diagnostics \| expand [0/12] detect quoting and punctuation problems in diagnostics [1/12] implement -Wformat-diag to detect quoting and spelling issues in GCC diagnostics [2/12] fix diagnostic quoting/spelling in ada [3/12] fix diagnostic quoting/spelling in Brig [4/12] fix diagnostic quoting/spelling in the C front-end [5/12] fix diagnostic quoting/spelling in c-family [6/12] fix diagnostic quoting/spelling in C++ [7/12] fix diagnostic quoting/spelling in libgcc [8/12] fix diagnostic quoting/spelling in the middle-end [9/12] adjust tests to quoting/spelling diagnostics fixes [10/12] fix diagnostic quoting/spelling in D [11/12] fix diagnostic quoting/spelling issues in i386 back-end [12/12] fix diagnostic quoting/spelling issues in ObjC

gcc/c-family/ChangeLog: * c-common.h (GCC_DIAG_STYLE): Adjust. (GCC_DIAG_RAW_STYLE): New macro. c-format.c (function_format_info::format_type): Adjust type. (function_format_info::is_raw): New member. (decode_format_type): Adjust signature. Handle "raw" diag attributes. (decode_format_attr): Adjust call to decode_format_type. Avoid a redundant call to convert_format_name_to_system_name. Avoid abbreviating the word "arguments" in a diagnostic. (format_warning_substr): New function. (avoid_dollar_number): Quote dollar sign in a diagnostic. (finish_dollar_format_checking): Same. (check_format_info): Same. (struct baltoks_t): New. (maybe_diag_unbalanced_tokens, check_tokens, check_plain): New functions. (check_format_info_main): Call check_plain. Use baltoks_t. Call maybe_diag_unbalanced_tokens. (handle_format_attribute): Spell out the word "arguments" in a diagnostic. * c.opt (-Wformat-diag): New option. gcc/testsuite/ChangeLog: * gcc.dg/format/gcc_diag-11.c: New test. diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h index 1cf2cae6395..9d067028416 100644 --- a/gcc/c-family/c-common.h +++ b/gcc/c-family/c-common.h @@ -39,7 +39,10 @@ framework extensions, you must include this file before diagnostic-core.h \ never after. #endif #ifndef GCC_DIAG_STYLE -#define GCC_DIAG_STYLE __gcc_cdiag__ +# define GCC_DIAG_STYLE __gcc_cdiag__ +#endif +#ifndef GCC_DIAG_RAW_STYLE +# define GCC_DIAG_RAW_STYLE __gcc_cdiag_raw__ #endif #include "diagnostic-core.h" diff --git a/gcc/c-family/c-format.c b/gcc/c-family/c-format.c index a7f76c1c01d..372ab124661 100644 --- a/gcc/c-family/c-format.c +++ b/gcc/c-family/c-format.c @@ -52,7 +52,13 @@ enum format_type { printf_format_type, asm_fprintf_format_type, struct function_format_info { - int format_type; /* type of format (printf, scanf, etc.) */ + enum format_type format_type; /* type of format (printf, scanf, etc.) */ + /* IS_RAW is relevant only for GCC diagnostic format functions. + It is set for "raw" formatting functions like pp_printf that + are not intended to produce complete diagnostics according to + GCC guidelines, and clear for others like error and warning + whose format string is checked for proper quoting and spelling. */ + bool is_raw; unsigned HOST_WIDE_INT format_num; /* number of format argument */ unsigned HOST_WIDE_INT first_arg_num; /* number of first arg (zero for varargs) */ }; @@ -65,7 +71,7 @@ static GTY(()) tree locus; static bool decode_format_attr (const_tree, tree, tree, function_format_info *, bool); -static int decode_format_type (const char *); +static format_type decode_format_type (const char *, bool * = NULL); static bool check_format_string (const_tree argument, unsigned HOST_WIDE_INT format_num, @@ -111,6 +117,32 @@ format_warning_at_char (location_t fmt_string_loc, tree format_string_cst, return warned; } + +/* Emit a warning as per format_warning_va, but construct the substring_loc + for the substring at offset (POS1, POS2 - 1) within a string constant + FORMAT_STRING_CST at FMT_STRING_LOC. */ + +ATTRIBUTE_GCC_DIAG (6,7) +static bool +format_warning_substr (location_t fmt_string_loc, tree format_string_cst, + int pos1, int pos2, int opt, const char *gmsgid, ...) +{ + va_list ap; + va_start (ap, gmsgid); + tree string_type = TREE_TYPE (format_string_cst); + + pos2 -= 1; + + substring_loc fmt_loc (fmt_string_loc, string_type, pos1, pos1, pos2); + format_string_diagnostic_t diag (fmt_loc, NULL, UNKNOWN_LOCATION, NULL, + NULL); + bool warned = diag.emit_warning_va (opt, gmsgid, &ap); + va_end (ap); + + return warned; +} + + /* Check that we have a pointer to a string suitable for use as a format. The default is to check for a char type. For objective-c dialects, this is extended to include references to string @@ -320,10 +352,8 @@ decode_format_attr (const_tree fntype, tree atname, tree args, { const char *p = IDENTIFIER_POINTER (format_type_id); - p = convert_format_name_to_system_name (p); + info->format_type = decode_format_type (p, &info->is_raw); - info->format_type = decode_format_type (p); - if (!c_dialect_objc () && info->format_type == gcc_objc_string_format_type) { @@ -359,7 +389,7 @@ decode_format_attr (const_tree fntype, tree atname, tree args, if (info->first_arg_num != 0 && info->first_arg_num <= info->format_num) { gcc_assert (!validated_p); - error ("format string argument follows the args to be formatted"); + error ("format string argument follows the arguments to be formatted"); return false; } @@ -1067,27 +1097,55 @@ static void format_type_warning (const substring_loc &fmt_loc, char conversion_char); /* Decode a format type from a string, returning the type, or - format_type_error if not valid, in which case the caller should print an - error message. */ -static int -decode_format_type (const char *s) + format_type_error if not valid, in which case the caller should + print an error message. On success, when IS_RAW is non-null, set + *IS_RAW when the format type corresponds to a GCC "raw" diagnostic + formatting function and clear it otherwise. */ +static format_type +decode_format_type (const char *s, bool *is_raw /* = NULL */) { - int i; - int slen; + bool is_raw_buf; + + if (!is_raw) + is_raw = &is_raw_buf; + + *is_raw = false; s = convert_format_name_to_system_name (s); - slen = strlen (s); - for (i = 0; i < n_format_types; i++) + + size_t slen = strlen (s); + for (int i = 0; i < n_format_types; i++) { - int alen; + /* Check for a match with no underscores. */ if (!strcmp (s, format_types[i].name)) - return i; - alen = strlen (format_types[i].name); + return static_cast<format_type> (i); + + /* Check for leading and trailing underscores. */ + size_t alen = strlen (format_types[i].name); if (slen == alen + 4 && s[0] == '_' && s[1] == '_' && s[slen - 1] == '_' && s[slen - 2] == '_' && !strncmp (s + 2, format_types[i].name, alen)) - return i; + return static_cast<format_type>(i); + + /* Check for the "_raw" suffix and no leading underscores. */ + if (slen == alen + 4 + && !strncmp (s, format_types[i].name, alen) + && !strcmp (s + alen, "_raw")) + { + *is_raw = true; + return static_cast<format_type>(i); + } + + /* Check for the "_raw__" suffix and leading underscores. */ + if (slen == alen + 8 && s[0] == '_' && s[1] == '_' + && !strncmp (s + 2, format_types[i].name, alen) + && !strcmp (s + 2 + alen, "_raw__")) + { + *is_raw = true; + return static_cast<format_type>(i); + } } + return format_type_error; } @@ -1350,7 +1408,8 @@ avoid_dollar_number (const char *format) format++; if (*format == '$') { - warning (OPT_Wformat_, "$ operand number used after format without operand number"); + warning (OPT_Wformat_, + "%<$%>operand number used after format without operand number"); return true; } return false; @@ -1381,7 +1440,8 @@ finish_dollar_format_checking (format_check_results *res, int pointer_gap_ok) found_pointer_gap = true; else warning_at (res->format_string_loc, OPT_Wformat_, - "format argument %d unused before used argument %d in $-style format", + "format argument %d unused before used argument %d " + "in %<$%>-style format", i + 1, dollar_max_arg_used); } } @@ -1525,7 +1585,8 @@ check_format_info (function_format_info *info, tree params, } if (res.number_dollar_extra_args > 0 && res.number_non_literal == 0 && res.number_other == 0) - warning_at (loc, OPT_Wformat_extra_args, "unused arguments in $-style format"); + warning_at (loc, OPT_Wformat_extra_args, + "unused arguments in %<$%>-style format"); if (res.number_empty > 0 && res.number_non_literal == 0 && res.number_other == 0) warning_at (loc, OPT_Wformat_zero_length, "zero-length %s format string", @@ -2789,6 +2850,862 @@ check_argument_type (const format_char_info *fci, return true; } +/* Describes "paired tokens" within the format string that are + expected to be balanced. */ + +struct baltoks_t +{ + baltoks_t (): singlequote (), doublequote () { } + + typedef auto_vec<const char *> balanced_tokens_t; + /* Vectors of pointers to opening opening brackets ('['), curly + brackets ('{'), quoting directives (like GCC "%<"), parentheses, + and angle brackets ('<'). Used to detect unbalanced tokens. */ + balanced_tokens_t brackets; + balanced_tokens_t curly; + balanced_tokens_t quotdirs; + balanced_tokens_t parens; + balanced_tokens_t pointy; + /* Pointer to the last opening quote. */ + const char *singlequote; + const char *doublequote; +}; + +/* Describes a keyword, operator, or other name. */ + +struct token_t +{ + const char *name; /* Keyword/operator name. */ + unsigned char len; /* Its length. */ + bool is_cxx; /* Set for C++ only names. */ +}; + +/* C/C++ operators that are expected to be quoted within the format + string. */ + +static const token_t opers[] = + { +#undef NAME +#define NAME(name) { name, sizeof name - 1, false } + + NAME ("=="), NAME ("!="), NAME (">="), NAME ("<="), NAME ("->"), + NAME ("++"), NAME ("--"), NAME ("&&"), NAME ("||"), NAME ("?:"), + NAME ("*="), NAME ("/="), NAME ("%="), NAME ("+="), NAME ("-="), + NAME ("&="), NAME ("^="), NAME ("|="), NAME ("<<="), NAME (">>="), + +#undef NAME +#define NAME(name) { name, sizeof name - 1, true } + NAME (".*"), + NAME ("->*"), + NAME ("::"), + NAME ("<=>") + }; + +/* Common C/C++ keywords that are expected to be quoted within the format + string. Keywords like auto, inline, or volatile are exccluded because + they are sometimes used in common terms like /auto variables/, /inline + function/, or /volatile access/ where they should not be quoted. */ + +static const token_t keywords[] = + { +#undef NAME +#define NAME(name) { name, sizeof name - 1, false } + NAME ("alignas"), + NAME ("alignof"), + NAME ("asm"), + NAME ("bool"), + NAME ("char"), + NAME ("float"), + NAME ("int"), + NAME ("long double"), + NAME ("long int"), + NAME ("long long"), + NAME ("noreturn"), + NAME ("offsetof"), + NAME ("short int"), + NAME ("signed char"), + NAME ("signed int"), + NAME ("signed short"), + NAME ("signed long"), + NAME ("unsigned char"), + NAME ("unsigned int"), + NAME ("unsigned short"), + NAME ("unsigned long"), + NAME ("sizeof"), + NAME ("typeof"), + NAME ("wchar_t"), + +#undef NAME +#define NAME(name) { name, sizeof name - 1, true } + + /* C++ only keywords and operators. */ + NAME ("char8_t"), + NAME ("char16_t"), + NAME ("char32_t"), + NAME ("catch"), + NAME ("constexpr if"), + NAME ("constexpr"), + NAME ("consteval"), + NAME ("decltype"), + NAME ("nullptr"), + NAME ("operator delete"), + NAME ("operator new"), + NAME ("typeid"), + NAME ("typeinfo") + +#undef NAME + }; + +/* Common contractions that should be avoided in favor of alternatives. */ + +static const struct +{ + const char *name; /* Contraction. */ + unsigned char len; /* Its length. */ + const char *alt; /* Preferred alternative. */ +} contrs[] = + { +#define CONTR(name, alt) { name, sizeof name - 1, alt } + + CONTR ("can't", "cannot"), + CONTR ("didn't", "did not"), + /* These are commonly abused. Avoid diagnosing them for now. + CONTR ("isn't", "is not"), + CONTR ("don't", "is not"), + */ + CONTR ("mustn't", "must not"), + CONTR ("needn't", "need not"), + CONTR ("should't", "should not"), + CONTR ("that's", "that is"), + CONTR ("there's", "there is"), + CONTR ("they're", "they are"), + CONTR ("what's", "what is"), + CONTR ("won't", "will not") + }; + +/* Check for unquoted TOKENS. FORMAT_STRING_LOC is the location of + the format string, FORMAT_STRING_CST the format string itself (as + a tree), ORIG_FORMAT_CHARS and FORMAT_CHARS are ponters to + the beginning of the format string and the character currently + being processed, and BALTOKS describes paired "tokens" within + the format string that are expected to be balanced. + Returns a pointer to the last processed character or null when + nothing was done. */ + +static const char* +check_tokens (const token_t *tokens, unsigned ntoks, + location_t format_string_loc, tree format_string_cst, + const char *orig_format_chars, const char *format_chars, + baltoks_t &baltoks) +{ + /* For brevity. */ + const int opt = OPT_Wformat_diag; + /* Zero-based starting position of a problem sequence. */ + int fmtchrpos = format_chars - orig_format_chars; + + /* For identifier-like "words," set to the word length. */ + unsigned wlen = 0; + /* Set for an operator, clear for an identifier/word. */ + bool is_oper = false; + bool underscore = false; + + if (format_chars[0] == '_' || ISALPHA (format_chars[0])) + { + while (format_chars[wlen] == '_' || ISALNUM (format_chars[wlen])) + { + underscore |= format_chars[wlen] == '_'; + ++wlen; + } + } + else + is_oper = true; + + bool is_cxx = c_dialect_cxx (); + + for (unsigned i = 0; i != ntoks; ++i) + { + unsigned toklen = tokens[i].len; + + if ((!is_cxx && tokens[i].is_cxx) + || toklen < wlen + || strncmp (format_chars, tokens[i].name, toklen)) + continue; + + if (toklen == 2 + && format_chars - orig_format_chars > 0 + && (TOUPPER (format_chars[-1]) == 'C' + || TOUPPER (format_chars[-1]) == 'G')) + return format_chars + toklen - 1; /* Reference to C++ or G++. */ + + if (ISPUNCT (format_chars[toklen - 1])) + { + if (format_chars[toklen - 1] == format_chars[toklen]) + return NULL; /* Operator followed by another punctuator. */ + } + else if (ISALNUM (format_chars[toklen])) + return NULL; /* Keyword prefix for a longer word. */ + + if (toklen == 2 + && format_chars[0] == '-' + && format_chars[1] == '-' + && ISALNUM (format_chars[2])) + return NULL; /* Probably option like --help. */ + + /* Allow this ugly warning for the time being. */ + if (toklen == 2 + && format_chars - orig_format_chars > 6 + && !strncmp (format_chars - 7, " count >= width of ", 19)) + return format_chars + 10; + + bool is_type = strchr (tokens[i].name, ' '); + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + toklen, opt, + (is_type + ? G_("unquoted type name %<%.*s%> in format") + : (is_oper + ? G_("unquoted operator %<%.*s%> in format") + : G_("unquoted keyword %<%.*s%> in format"))), + toklen, format_chars); + return format_chars + toklen - 1; + } + + /* Diagnose unquoted __attribute__. Consider any parenthesized + argument to the attribute to avoid redundant warnings for + the double parentheses that might follow. */ + if (!strncmp (format_chars, "__attribute", sizeof "__attribute" - 1)) + { + unsigned nchars = sizeof "__attribute" - 1; + while ('_' == format_chars[nchars]) + ++nchars; + + for (int i = nchars; format_chars[i]; ++i) + if (' ' != format_chars[i]) + { + nchars = i; + break; + } + + if (format_chars[nchars] == '(') + { + baltoks.parens.safe_push (format_chars + nchars); + + ++nchars; + bool close = false; + if (format_chars[nchars] == '(') + { + baltoks.parens.safe_push (format_chars + nchars); + close = true; + ++nchars; + } + for (int i = nchars; format_chars[i]; ++i) + if (')' == format_chars[i]) + { + if (baltoks.parens.length () > 0) + baltoks.parens.pop (); + nchars = i + 1; + break; + } + + if (close && format_chars[nchars] == ')') + { + if (baltoks.parens.length () > 0) + baltoks.parens.pop (); + ++nchars; + } + } + + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unquoted attribute in format"); + return format_chars + nchars - 1; + } + + /* Diagnose unquoted built-ins. */ + if (format_chars[0] == '_' + && format_chars[1] == '_' + && (!strncmp (format_chars + 2, "atomic", sizeof "atomic" - 1) + || !strncmp (format_chars + 2, "builtin", sizeof "builtin" - 1) + || !strncmp (format_chars + 2, "sync", sizeof "sync" - 1))) + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + wlen, opt, + "unquoted name of built-in function %<%.*s%> " + "in format", + wlen, format_chars); + return format_chars + wlen - 1; + } + + /* Diagnose unquoted substrings of alphanumeric characters containing + underscores. They most likely refer to identifiers and should be + quoted. */ + if (underscore) + format_warning_substr (format_string_loc, format_string_cst, + format_chars - orig_format_chars, + format_chars + wlen - orig_format_chars, + opt, + "unquoted identifier or keyword %<%.*s%> in format", + wlen, format_chars); + else + { + /* Diagnose "can not". */ + if ((format_chars == orig_format_chars + || (format_chars - orig_format_chars + && !ISALNUM (format_chars[-1]))) + && !ISALNUM (format_chars[7]) + && !strncmp (format_chars, "can not", 7)) + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + 7, opt, + "%qs is not a word; use %qs instead", + "can not", "cannot"); + return format_chars + 6; + } + + /* Diagnose "arg" (short for "argument" when lazy). */ + if (!strncmp (format_chars, "arg", 3) + && (!format_chars[3] + || format_chars[3] == 's' + || ISSPACE (format_chars[3]))) + { + int n = 3 + (format_chars[3] == 's'); + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + n, opt, + "%q.*s is not a word; use %q.*s instead", + n, format_chars, + n == 3 ? 8 : 9, "arguments"); + return format_chars + n - 1; + } + + /* Diagnose "reg" (short for "register"). */ + if (!strncmp (format_chars, "reg", 3) + && (!format_chars[3] + || format_chars[3] == 's' + || ISSPACE (format_chars[3]))) + { + int n = 3 + (format_chars[3] == 's'); + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + n, opt, + "%q.*s is not a word; use %q.*s instead", + n, format_chars, + n == 3 ? 8 : 9, "registers"); + return format_chars + n - 1; + } + } + + return wlen ? format_chars + wlen - 1 : NULL; +} + +/* Check plain text in a format string of a GCC diagnostic function + for common quoting, punctuation, and spelling mistakes, and issue + -Wformat-diag warnings if they are found. FORMAT_STRING_LOC is + the location of the format string, FORMAT_STRING_CST the format + string itself (as a tree), ORIG_FORMAT_CHARS and FORMAT_CHARS are + ponters to the beginning of the format string and the character + currently being processed, and BALTOKS describes paired "tokens" + within the format string that are expected to be balanced. + Returns a pointer to the last processed character. */ + +static const char* +check_plain (location_t format_string_loc, tree format_string_cst, + const char *orig_format_chars, const char *format_chars, + baltoks_t &baltoks) +{ + /* For brevity. */ + const int opt = OPT_Wformat_diag; + /* Zero-based starting position of a problem sequence. */ + int fmtchrpos = format_chars - orig_format_chars; + + if (*format_chars == '%') + { + /* Diagnose %<%s%> and suggest using %qs instead. */ + if (!strncmp (format_chars, "%<%s%>", 6) + && format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + 6, opt, + "quoted %qs directive in format", "%s")) + inform (format_string_loc, "use %qs instead", "%qs"); + + return format_chars; + } + + if (baltoks.quotdirs.length ()) + { + /* Skip over all plain text within a quoting directive until + the next directive. */ + while (*format_chars && '%' != *format_chars) + ++format_chars; + + return format_chars; + } + + /* The length of the problem sequence. */ + int nchars = 0; + + /* Diagnose any whitespace characters other than <space> but only + leading, trailing, and two or more consecutive <space>s. Do + this before diagnosing control characters because whitespace + is a subset of controls. */ + const char *other_than_space = NULL; + while (ISSPACE (format_chars[nchars])) + { + if (format_chars[nchars] != ' ' && !other_than_space) + other_than_space = format_chars + nchars; + ++nchars; + } + + if (nchars) + { + /* This is the most common problem: go the extra mile to decribe + the problem in as much helpful detail as possible. */ + if (other_than_space) + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unquoted whitespace character %qc in format", + *other_than_space); + return format_chars + nchars - 1; + } + + if (fmtchrpos == 0) + /* Accept strings of leading spaces with no warning. */ + return format_chars + nchars - 1; + + if (!format_chars[nchars]) + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + (nchars > 1 + ? G_("spurious trailing space characters " + "in format") + : G_("spurious trailing space character " + "in format"))); + return format_chars + nchars - 1; + } + + if (nchars > 1) + { + if (nchars == 2 + && orig_format_chars < format_chars + && format_chars[-1] == '.' + && format_chars[0] == ' ' + && format_chars[1] == ' ') + { + /* A period followed by two spaces. */ + if (ISUPPER (*orig_format_chars)) + { + /* If the part before the period is a capitalized + sentence check to make sure that what follows + is also capitalized. */ + if (ISLOWER (format_chars[2])) + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "inconsistent capitalization in " + "format"); + } + } + else + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unquoted sequence of %i consecutive " + "space characters in format", nchars); + return format_chars + nchars - 1; + } + + format_chars += nchars; + nchars = 0; + } + + fmtchrpos = format_chars - orig_format_chars; + + /* Diagnose any unquoted control characters other than the terminating + NUL. */ + while (format_chars[nchars] && ISCNTRL (format_chars[nchars])) + ++nchars; + + if (nchars > 1) + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unquoted control characters in format"); + return format_chars + nchars - 1; + } + if (nchars) + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unquoted control character %qc in format", + *format_chars); + return format_chars + nchars - 1; + } + + if (ISPUNCT (format_chars[0])) + if (const char *ret = check_tokens (opers, + sizeof opers / sizeof *opers, + format_string_loc, format_string_cst, + orig_format_chars, format_chars, + baltoks)) + return ret; + + if (ISALPHA (format_chars[0])) + { + if (const char *ret = check_tokens (keywords, + sizeof keywords / sizeof *keywords, + format_string_loc, format_string_cst, + orig_format_chars, format_chars, + baltoks)) + return ret; + } + + nchars = 0; + + /* Diagnose unquoted options. */ + if ((format_chars == orig_format_chars + || format_chars[-1] == ' ') + && format_chars[0] == '-' + && ((format_chars[1] == '-' + && ISALPHA (format_chars[2])) + || ISALPHA (format_chars[1]))) + { + nchars = 1; + while (ISALNUM (format_chars[nchars]) + || '_' == format_chars[nchars] + || '-' == format_chars[nchars] + || '+' == format_chars[nchars]) + ++nchars; + + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unquoted option name %<%.*s%> in format", + nchars, format_chars); + return format_chars + nchars - 1; + } + + /* Diagnose leading, trailing, and two or more consecutive punctuation + characters. */ + const char *unbalanced = NULL; + while ('%' != format_chars[nchars] + && ISPUNCT (format_chars[nchars]) + && !unbalanced) + { + switch (format_chars[nchars]) + { + case '[': + baltoks.brackets.safe_push (format_chars + nchars); + break; + case '{': + baltoks.curly.safe_push (format_chars + nchars); + break; + case '(': + baltoks.parens.safe_push (format_chars + nchars); + break; + case '<': + baltoks.pointy.safe_push (format_chars + nchars); + break; + + case ']': + if (baltoks.brackets.length () > 0) + baltoks.brackets.pop (); + else + unbalanced = format_chars + nchars; + break; + case '}': + if (baltoks.curly.length () > 0) + baltoks.curly.pop (); + else + unbalanced = format_chars + nchars; + break; + case ')': + if (baltoks.parens.length () > 0) + baltoks.parens.pop (); + else + unbalanced = format_chars + nchars; + break; + case '>': + if (baltoks.pointy.length () > 0) + baltoks.pointy.pop (); + else + unbalanced = format_chars + nchars; + break; + } + + ++nchars; + } + + if (unbalanced) + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unbalanced punctuation character %qc in format", + *unbalanced); + return format_chars + nchars - 1; + } + + if (nchars) + { + /* Consider any identifier that follows the pound ('#') sign + a preprocessing drective. */ + if (nchars == 1 + && format_chars[0] == '#' + && ISALPHA (format_chars[1])) + { + while (ISALNUM (format_chars[nchars]) + || format_chars[nchars] == '_') + ++nchars; + + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unquoted preprocessing directive %<%.*s%> " + "in format", nchars, format_chars); + return format_chars + nchars - 1; + } + + /* Diagnose a bare single quote. */ + if (nchars == 1 + && format_chars[0] == '\'' + && format_chars - orig_format_chars + && ISALPHA (format_chars[-1]) + && ISALPHA (format_chars[1])) + { + /* Diagnose a subset of contractions that are best avoided. */ + for (unsigned i = 0; i != sizeof contrs / sizeof *contrs; ++i) + { + const char *apos = strchr (contrs[i].name, '\''); + gcc_checking_assert (apos); + + int apoff = apos - contrs[i].name; + if (format_chars - orig_format_chars >= apoff + && !strncmp (format_chars - apoff, + contrs[i].name, contrs[i].len)) + { + if (format_warning_substr (format_string_loc, + format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "contraction %<%.*s%> in format", + contrs[i].len, contrs[i].name)) + inform (format_string_loc, + "use %qs instead", contrs[i].alt); + return format_chars + nchars - 1; + } + } + + if (format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "bare apostrophe %<'%> in format")) + inform (format_string_loc, + "if avoiding the apostrophe is not feasible, enclose " + "it in a pair of %qs and %qs directives instead", + "%<", "%>"); + return format_chars + nchars - 1; + } + + /* Diagnose a backtick (grave accent). */ + if (nchars == 1 + && format_chars[0] == '`') + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "grave accent %<`%> in format"); + return format_chars + nchars - 1; + } + + /* Diagnose a punctuation character after a space. */ + if (nchars == 1 + && format_chars - orig_format_chars + && format_chars[-1] == ' ' + && strspn (format_chars, "!?:;.,") == 1) + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos - 1, fmtchrpos, opt, + "space followed by punctuation character " + "%<%c%>", format_chars[0]); + return format_chars; + } + + if (nchars == 1) + { + if (!strncmp (format_chars, "\"%s\"", 4)) + { + if (format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + 4, opt, + "quoted %qs directive in format", + "%s")) + inform (format_string_loc, "if using %qs is not feasible, " + "use %qs instead", "%qs", "\"%-s\""); + } + + if (format_chars[0] == '"') + { + baltoks.doublequote = baltoks.doublequote ? NULL : format_chars; + return format_chars + nchars - 1; + } + if (format_chars[0] == '\'') + { + baltoks.singlequote = baltoks.singlequote ? NULL : format_chars; + return format_chars + nchars - 1; + } + } + + if (fmtchrpos == 0) + { + if (nchars == 1 + && format_chars[0] == '(') + ; /* Text beginning in an open parenthesis. */ + else if (nchars == 3 + && !strncmp (format_chars, "...", 3) + && format_chars[3]) + ; /* Text beginning in an ellipsis. */ + else + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + (nchars > 1 + ? G_("spurious leading punctuation " + "characters %<%.*s%> in format") + : G_("spurious leading punctuation " + "character %<%.*s%> in format")), + nchars, format_chars); + return format_chars + nchars - 1; + } + } + else if (!format_chars[nchars]) + { + if (nchars == 1 + && (format_chars[nchars - 1] == ':' + || format_chars[nchars - 1] == ')')) + ; /* Text ending in a colon or a closing parenthesis. */ + else if (nchars == 1 + && ((ISUPPER (*orig_format_chars) + && format_chars[nchars - 1] == '.') + || strspn (format_chars + nchars - 1, "?])") == 1)) + ; /* Capitalized sentence terminated by a single period, + or text ending in a question mark, closing bracket, + or parenthesis. */ + else if (nchars == 2 + && format_chars - orig_format_chars > 0 + && (TOUPPER (format_chars[-1]) == 'C' + || TOUPPER (format_chars[-1]) == 'G')) + ; /* Trailing reference to C++ or G++. */ + else if (nchars == 2 + && format_chars[0] == '?' + && format_chars[1] == ')') + ; /* A question mark after a closing parenthetical note. */ + else if (nchars == 2 + && format_chars[0] == ')' + && (format_chars[1] == '?' + || format_chars[1] == ';' + || format_chars[1] == ':' + || (ISUPPER (*orig_format_chars) + && format_chars[1] == '.'))) + ; /* Closing parethetical note followed by a question mark, + semicolon, or colon at the end of the string, or by + a period at the end of a capitalized sentence. */ + else if (nchars == 3 + && format_chars - orig_format_chars > 0 + && !strncmp (format_chars, "...", 3)) + ; /* Text ending in the ellipsis. */ + else + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + (nchars > 1 + ? G_("spurious trailing punctuation " + "characters %<%.*s%> in format") + : G_("spurious trailing punctuation " + "character %<%.*s%> in format")), + nchars, format_chars); + + return format_chars + nchars - 1; + } + else if (nchars == 2 + && format_chars - orig_format_chars > 0 + && (!strncasecmp (format_chars - 1, "c++", 3) + || !strncasecmp (format_chars - 1, "g++", 3))) + ; /* Reference to C++. */ + else if (nchars == 2 + && format_chars[0] == ')' + && (format_chars[1] == ':' + || format_chars[1] == ';' + || format_chars[1] == ',') + && format_chars[2] == ' ') + ; /* Closing parethetical note followed by a colon, semicolon + or a comma followed by a space in the middle of the string. */ + else if (nchars > 1) + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unquoted sequence of %i consecutive " + "punctuation characters %q.*s in format", + nchars, nchars, format_chars); + return format_chars + nchars - 1; + } + + nchars = 0; + + /* Finally, diagnose any unquoted non-graph, non-punctuation characters + other than the terminating NUL. */ + while (format_chars[nchars] + && '%' != format_chars[nchars] + && !ISPUNCT (format_chars[nchars]) + && !ISGRAPH (format_chars[nchars])) + ++nchars; + + if (nchars > 1) + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unquoted non-graph characters in format"); + return format_chars + nchars - 1; + } + if (nchars) + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unquoted non-graph character %qc in format", + *format_chars); + return format_chars + nchars - 1; + } + + return format_chars; +} + +static void +maybe_diag_unbalanced_tokens (location_t format_string_loc, + const char *orig_format_chars, + tree format_string_cst, + baltoks_t &baltoks) +{ + const char *unbalanced = NULL; + + if (baltoks.brackets.length ()) + unbalanced = baltoks.brackets.pop (); + else if (baltoks.curly.length ()) + unbalanced = baltoks.curly.pop (); + else if (baltoks.parens.length ()) + unbalanced = baltoks.parens.pop (); + else if (baltoks.pointy.length ()) + unbalanced = baltoks.pointy.pop (); + + if (unbalanced) + format_warning_at_char (format_string_loc, format_string_cst, + unbalanced - orig_format_chars + 1, + OPT_Wformat_diag, + "unbalanced punctuation character %<%c%> in format", + *unbalanced); + + if (baltoks.quotdirs.length ()) + format_warning_at_char (format_string_loc, format_string_cst, + baltoks.quotdirs.pop () - orig_format_chars, + OPT_Wformat_, + "unterminated quoting directive"); + + const char *quote + = baltoks.singlequote ? baltoks.singlequote : baltoks.doublequote; + + if (quote) + format_warning_at_char (format_string_loc, format_string_cst, + quote - orig_format_chars + 1, + OPT_Wformat_diag, + "unterminated quote character %<%c%> in format", + *quote); +} + /* Do the main part of checking a call to a format function. FORMAT_CHARS is the NUL-terminated format string (which at this point may contain internal NUL characters); FORMAT_LENGTH is its length (excluding the @@ -2816,8 +3733,10 @@ check_format_info_main (format_check_results *res, and it didn't use $; 1 if $ formats are in use. */ int has_operand_number = -1; - /* Vector of pointers to opening quoting directives (like GCC "%<"). */ - auto_vec<const char*> quotdirs; + /* Vectors of pointers to opening quoting directives (like GCC "%<"), + opening braces, brackets, and parentheses. Used to detect unbalanced + tokens. */ + baltoks_t baltoks; /* Pointers to the most recent color directives (like GCC's "%r or %R"). A starting color directive much be terminated before the end of @@ -2828,10 +3747,26 @@ check_format_info_main (format_check_results *res, init_dollar_format_checking (info->first_arg_num, first_fillin_param); + /* In GCC diagnostic functions check plain directives (substrings within + the format string that don't start with %) for quoting and punctuations + problems. */ + bool ck_plain = (!info->is_raw + && (info->format_type == gcc_diag_format_type + || info->format_type == gcc_tdiag_format_type + || info->format_type == gcc_cdiag_format_type + || info->format_type == gcc_cxxdiag_format_type)); + while (*format_chars != 0) { - if (*format_chars++ != '%') + if (ck_plain) + format_chars = check_plain (format_string_loc, + format_string_cst, + orig_format_chars, format_chars, + baltoks); + + if (*format_chars == 0 || *format_chars++ != '%') continue; + if (*format_chars == 0) { format_warning_at_char (format_string_loc, format_string_cst, @@ -2846,6 +3781,8 @@ check_format_info_main (format_check_results *res, continue; } + /* ARGUMENT_PARSER ctor takes FORMAT_CHARS by reference and calls + to ARG_PARSER members may modify the variable. */ flag_chars_t flag_chars; argument_parser arg_parser (info, format_chars, format_string_cst, orig_format_chars, format_string_loc, @@ -2908,7 +3845,7 @@ check_format_info_main (format_check_results *res, flag_chars.validate (fki, fci, flag_specs, format_chars, format_string_cst, format_string_loc, orig_format_chars, format_char, - quotdirs.length () > 0); + baltoks.quotdirs.length () > 0); const int alloc_flag = flag_chars.get_alloc_flag (fki); const bool suppressed = flag_chars.assignment_suppression_p (fki); @@ -2920,17 +3857,17 @@ check_format_info_main (format_check_results *res, if (quot_begin_p && !quot_end_p) { - if (quotdirs.length ()) + if (baltoks.quotdirs.length ()) format_warning_at_char (format_string_loc, format_string_cst, format_chars - orig_format_chars, OPT_Wformat_, "nested quoting directive"); - quotdirs.safe_push (format_chars); + baltoks.quotdirs.safe_push (format_chars); } else if (!quot_begin_p && quot_end_p) { - if (quotdirs.length ()) - quotdirs.pop (); + if (baltoks.quotdirs.length ()) + baltoks.quotdirs.pop (); else format_warning_at_char (format_string_loc, format_string_cst, format_chars - orig_format_chars, @@ -2962,7 +3899,7 @@ check_format_info_main (format_check_results *res, /* Diagnose directives that shouldn't appear in a quoted sequence. (They are denoted by a double quote in FLAGS2.) */ - if (quotdirs.length ()) + if (baltoks.quotdirs.length ()) { if (strchr (fci->flags2, '"')) format_warning_at_char (format_string_loc, format_string_cst, @@ -3018,10 +3955,9 @@ check_format_info_main (format_check_results *res, if (has_operand_number > 0) finish_dollar_format_checking (res, fki->flags & (int) FMT_FLAG_DOLLAR_GAP_POINTER_OK); - if (quotdirs.length ()) - format_warning_at_char (format_string_loc, format_string_cst, - quotdirs.pop () - orig_format_chars, - OPT_Wformat_, "unterminated quoting directive"); + maybe_diag_unbalanced_tokens (format_string_loc, orig_format_chars, + format_string_cst, baltoks); + if (color_begin && !color_end) format_warning_at_char (format_string_loc, format_string_cst, color_begin - orig_format_chars, @@ -4199,7 +5135,7 @@ handle_format_attribute (tree *node, tree atname, tree args, if (arg_num != info.first_arg_num) { if (!(flags & (int) ATTR_FLAG_BUILT_IN)) - error ("args to be formatted is not %<...%>"); + error ("argument to be formatted is not %<...%>"); *no_add_attrs = true; return NULL_TREE; } diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt index 916cc67b453..046d489f7eb 100644 --- a/gcc/c-family/c.opt +++ b/gcc/c-family/c.opt @@ -562,6 +562,10 @@ Wformat-contains-nul C ObjC C++ ObjC++ Var(warn_format_contains_nul) Warning LangEnabledBy(C ObjC C++ ObjC++,Wformat=, warn_format >= 1, 0) Warn about format strings that contain NUL bytes. +Wformat-diag +C ObjC C++ ObjC++ Var(warn_format_diag) Warning LangEnabledBy(C ObjC C++ ObjC++,Wformat=, warn_format >= 1, 0) +Warn about GCC format strings with strings unsuitable for diagnostics.. + Wformat-extra-args C ObjC C++ ObjC++ Var(warn_format_extra_args) Warning LangEnabledBy(C ObjC C++ ObjC++,Wformat=, warn_format >= 1, 0) Warn if passing too many arguments to a function for its format string. diff --git a/gcc/testsuite/gcc.dg/format/gcc_diag-11.c b/gcc/testsuite/gcc.dg/format/gcc_diag-11.c new file mode 100644 index 00000000000..a716c8de7ef --- /dev/null +++ b/gcc/testsuite/gcc.dg/format/gcc_diag-11.c @@ -0,0 +1,382 @@ +/* Test warnings for for GCC diagnostics. + { dg-do compile } + { dg-options "-Wformat -Wformat-diag" } */ + +/* Magic identifiers must be set before the attribute is used. */ + +typedef long long __gcc_host_wide_int__; + +typedef struct location_s +{ + const char *file; + int line; +} location_t; + +union tree_node; +typedef union tree_node *tree; + +/* Define gimple as a dummy type. The typedef must be provided for + the C test to find the symbol. */ +typedef struct gimple gimple; + +/* Likewise for gimple. */ +typedef struct cgraph_node cgraph_node; + +#define FORMAT(kind) __attribute__ ((format (__gcc_## kind ##__, 1, 2))) + +/* Basic formatting function like pp_format. */ +void diag (const char*, ...) FORMAT (diag); + +/* Diagnostic formatting function like error or warning declared + by the C front end. */ +void cdiag (const char*, ...) FORMAT (cdiag); + +/* Diagnostic formatting function like error or warning declared + by the middle-end or back-end. */ +void tdiag (const char*, ...) FORMAT (tdiag); + +/* Diagnostic formatting function like error or warning declared + by the C++ front-end. */ +void cxxdiag (const char*, ...) FORMAT (cxxdiag); + +void dump (const char*, ...) FORMAT (dump_printf); + +/* Verify that functions declared with the C/C++ front-end __gcc_cdiag__ + attribute detect invalid whitespace in format strings. */ + +void test_cdiag_whitespace (tree t, gimple *gc) +{ + (void)&t; (void)&gc; + + /* Verify that strings of leading spaces don't trigger a warning. */ + cdiag (" a"); + cdiag (" b"); + cdiag (" c"); + cdiag ("%< %>a"); + cdiag ("%< %>a"); + cdiag ("a b"); + cdiag ("a b"); /* { dg-warning "unquoted sequence of 2 consecutive space characters" } */ + cdiag ("a "); /* { dg-warning "spurious trailing space character" } */ + cdiag ("a "); /* { dg-warning "spurious trailing space characters" } */ + cdiag ("a%< %>"); + cdiag ("a%< %>%< %>"); + cdiag ("a%< %> "); /* { dg-warning "spurious trailing space character" } */ + cdiag ("a%< %> %< %>"); /* { dg-warning "unquoted sequence of 2 consecutive space characters" } */ + + /* It's debatable whether the following two formst strings should + be cdiagnosed. They aren't only because it's simpler that way. */ + cdiag ("a %< %>"); + cdiag ("a%< %> %< %>"); + + /* Exercise other whitespace characters. */ + cdiag ("a\fb"); /* { dg-warning "unquoted whitespace character '\\\\x0c'" } */ + cdiag ("a\nb"); /* { dg-warning "unquoted whitespace character '\\\\x0a'" } */ + cdiag ("a\rb"); /* { dg-warning "unquoted whitespace character '\\\\x0d'" } */ + cdiag ("a\vb"); /* { dg-warning "unquoted whitespace character '\\\\x0b'" } */ + + cdiag ("First sentence. And a next."); + cdiag ("First sentence. not capitalized sentence"); /* { dg-warning "inconsistent capitalization" } */ + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wformat-diag" + + /* Verify that the warning can be suppressed. */ + cdiag ("\ta\b c\vb\n"); + +#pragma GCC diagnostic pop +} + + +void test_cdiag_control (tree t, gimple *gc) +{ + (void)&t; (void)&gc; + + cdiag ("\1"); /* { dg-warning "unquoted control character '\\\\x01'" } */ + cdiag ("a\ab"); /* { dg-warning "unquoted control character '\\\\x07'" } */ + cdiag ("a\bb"); /* { dg-warning "unquoted control character '\\\\x08'" } */ +} + + +void test_cdiag_punct (tree t, gimple *gc, int i) +{ + (void)&t; (void)&gc; + + /* Exercise the period. */ + cdiag (".abc"); /* { dg-warning "spurious leading punctuation character" } */ + cdiag ("abc;"); /* { dg-warning "spurious trailing punctuation character" } */ + /* Verify that sentences that start with an uppercase letter and end + in a period are not diagnosed. */ + cdiag ("This is a full sentence."); + cdiag ("Capitalized sentence (with a parethetical note)."); + cdiag ("Not a full sentence;"); /* { dg-warning "spurious trailing punctuation character" } */ + cdiag ("Neither is this one,"); /* { dg-warning "spurious trailing punctuation character" } */ + + /* Exercise the ellipsis. */ + cdiag ("this message..."); + cdiag ("...continues here"); + cdiag ("but...not here"); /* { dg-warning "unquoted sequence of 3 consecutive punctuation characters" } */ + + /* Verify that parenthesized sentences are accepted, even the whole + meesage (done in the C++ front end). */ + cdiag ("null argument where non-null required (argument %i)", i); + cdiag ("null (argument %i) where non-null required", i); + cdiag ("(see what comes next)"); + + /* Verify that only a single trailing colon is accepted. */ + cdiag ("candidates are:"); + cdiag ("candidates are::"); /* { dg-warning "spurious trailing punctuation characters" } */ + + /* Exercise C++. */ + cdiag ("C++ is cool"); + cdiag ("this is c++"); + cdiag ("you can do this in C++ but not in C"); + + /* Also verify that G++ is accepted. */ + cdiag ("G++ rocks"); + cdiag ("this is accepted by g++"); + cdiag ("valid in G++ (or g++) but not in gcc"); + + /* Exercise parenthetical note followed by a colon, semicolon, + or a comma. */ + cdiag ("found a bug (here):"); + cdiag ("because of another bug (over there); fix it"); + + cdiag ("found foo (123): go look at it"); + cdiag ("missed bar (abc); will try harder next time"); + + cdiag ("expected this (or that), got something else (or who knows what)"); + + /* Exercise parenthetical note with a question mark. */ + cdiag ("hmmm (did you really mean that?)"); + cdiag ("error (did you mean %<foo()%>?)"); + /* And a question mark after a parenthetical note. */ + cdiag ("did you mean this (or that)?"); + + /* But make sure unbalanced parenthese are diagnosed. */ + cdiag ("or this or the other)?"); /* { dg-warning "unbalanced punctuation character '\\\)'" } */ + + cdiag ("## Heading"); /* { dg-warning "spurious leading punctuation characters '##'" } */ + cdiag ("## %s ##", "1"); /* { dg-warning "spurious (leading|trailing) punctuation characters '##'" } */ + + cdiag ("#1 priority"); /* { dg-warning "spurious leading punctuation character '#'" } */ + cdiag ("priority #2"); + + /* Quoting. */ + cdiag ("\"quoted\""); + cdiag ("\"quoted\" string"); + cdiag ("this is a \"string in quotes\""); + cdiag ("\"missing closing quote"); /* { dg-warning "unterminated quote character '\"'" } */ + + /* PR translation/90121 - punctuation character after a space. */ + cdiag ("bad version : 1"); /* { dg-warning "space followed by punctuation character ':'" } */ + cdiag ("problem ; fix it"); /* { dg-warning "space followed by punctuation character ';'" } */ + cdiag ("End . not."); /* { dg-warning "space followed by punctuation character '.'" } */ + cdiag ("it is bad , very bad"); /* { dg-warning "space followed by punctuation character ','" } */ + cdiag ("say what ?"); /* { dg-warning "space followed by punctuation character '?'" } */ + /* But these are okay after a space. */ + cdiag ("1 / 2"); + cdiag ("2 + 3"); + cdiag ("2 + 3"); +} + +void test_cdiag_punct_balance (tree t, gimple *gc) +{ + (void)&t; (void)&gc; + + cdiag ("f()"); /* { dg-warning "spurious trailing punctuation characters" } */ + cdiag ("g(1)"); + cdiag ("("); /* { dg-warning "spurious leading punctuation character|unbalanced" } */ + cdiag ("()"); /* { dg-warning "spurious leading punctuation characters" } */ + cdiag (")"); /* { dg-warning "unbalanced punctuation character '\\\)'" } */ + cdiag ("f()g"); /* { dg-warning "unquoted sequence of 2 consecutive punctuation characters" } */ + cdiag ("illegal operand (1)"); +} + + +void test_cdiag_nongraph (tree t, gimple *gc) +{ + (void)&t; (void)&gc; + + cdiag ("a\376b"); /* { dg-warning "unquoted non-graph character '\\\\xfe'" } */ + cdiag ("a\377b"); /* { dg-warning "unquoted non-graph character '\\\\xff'" } */ +} + + +void test_cdiag_attribute (tree t, gimple *gc) +{ + (void)&t; (void)&gc; + + cdiag ("attribute foo"); + cdiag ("this is attribute bar"); + cdiag ("bad __attribute bar"); /* { dg-warning "unquoted attribute" } */ + cdiag ("__attribute__ (foobar) bad"); /* { dg-warning "unquoted attribute" } */ + cdiag ("__attribute__ ((foobar))"); /* { dg-warning "unquoted attribute" } */ + cdiag ("__attribute__ (xxx))"); /* { dg-warning "unquoted attribute" } */ + /* { dg-warning "unbalanced punctuation character '\\\)'" "xxx" { target *-*-* } .-1 } */ + cdiag ("__attribute__ ((yyy)))"); /* { dg-warning "unquoted attribute" } */ + /* { dg-warning "unbalanced punctuation character '\\\)'" "yyy" { target *-*-* } .-1 } */ + cdiag ("__attribute__ ((zzz)"); /* { dg-warning "unquoted attribute" } */ + /* { dg-warning "unbalanced punctuation character '\\\('" "zzz" { target *-*-* } .-1 } */ + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wformat-diag" + + /* Verify that the warning can be suppressed. */ + cdiag ("__attribute__ ((("); + +#pragma GCC diagnostic pop +} + +void test_cdiag_builtin (tree t, gimple *gc) +{ + (void)&t; (void)&gc; + + cdiag ("__builtin_abort"); /* { dg-warning "unquoted name of built-in function '__builtin_abort'" } */ + cdiag ("in __builtin_trap"); /* { dg-warning "unquoted name of built-in function '__builtin_trap'" } */ + cdiag ("__builtin_xyz bites");/* { dg-warning "unquoted name of built-in function '__builtin_xyz'" } */ + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wformat-diag" + + /* Verify that the warning can be suppressed. */ + cdiag ("__builtin____with____lots__of__underscores"); + +#pragma GCC diagnostic pop +} + + +void test_cdiag_option (tree t, gimple *gc) +{ + (void)&t; (void)&gc; + + cdiag ("%<-Wall%>"); + cdiag ("use option %<-Wextra%> to enable additinal warnings"); + + cdiag ("-O2 is fast"); /* { dg-warning "unquoted option name '-O2'" } */ + cdiag ("but -O3 is faster"); /* { dg-warning "unquoted option name '-O3'" } */ + + cdiag ("get --help"); /* { dg-warning "unquoted option name '--help'" } */ + cdiag ("enable -m32"); /* { dg-warning "unquoted option name '-m32'" } */ + cdiag ("value is -12"); + cdiag ("foo-O2"); + cdiag ("a-W"); +} + + +void test_cdiag_keyword (tree t, gimple *gc) +{ + cdiag ("malignofer or alignofus"); + cdiag ("use alignof"); /* { dg-warning "unquoted keyword 'alignof'" } */ + cdiag ("or _Alignof"); /* { dg-warning " keyword '_Alignof'" } */ + cdiag ("_Pragma too"); /* { dg-warning " keyword '_Pragma'" } */ + + cdiag ("a #error directive"); /* { dg-warning "unquoted preprocessing directive '#error'" } */ + cdiag ("#include file"); /* { dg-warning "unquoted preprocessing directive '#include'" } */ + cdiag ("but #pragma foobar"); /* { dg-warning "unquoted preprocessing directive '#pragma'" } */ + cdiag ("pragma foobar is okay"); + cdiag ("or even # pragma is fine"); + + cdiag ("an offsetof here"); /* { dg-warning "unquoted keyword 'offsetof" } */ + cdiag ("sizeof x"); /* { dg-warning "unquoted keyword 'sizeof" } */ + cdiag ("have typeof"); /* { dg-warning "unquoted keyword 'typeof" } */ +} + + +void test_cdiag_operator (tree t, gimple *gc) +{ + cdiag ("a == b"); /* { dg-warning "unquoted operator '=='" } */ + cdiag ("++a"); /* { dg-warning "unquoted operator '\\\+\\\+'" } */ + cdiag ("b--"); /* { dg-warning "unquoted operator '--'" } */ +} + + +void test_cdiag_type_name (tree t, gimple *gc) +{ + cdiag ("the word character should not be quoted"); + cdiag ("but char should be"); /* { dg-warning "unquoted keyword 'char'" } */ + + cdiag ("unsigned char should be quoted"); /* { dg-warning "unquoted type name 'unsigned char'" } */ + cdiag ("but unsigned character is fine"); + + cdiag ("as should int"); /* { dg-warning "unquoted keyword 'int'" } */ + cdiag ("and signed int"); /* { dg-warning "unquoted type name 'signed int'" } */ + cdiag ("and also unsigned int"); /* { dg-warning "unquoted type name 'unsigned int'" } */ + cdiag ("very long thing"); + cdiag ("use long long here"); /* { dg-warning "unquoted type name 'long long'" } */ + + cdiag ("have a floating type"); + cdiag ("found float type"); /* { dg-warning "unquoted keyword 'float'" } */ + + cdiag ("break rules"); + cdiag ("if we continue by default for a short while else do nothing"); + cdiag ("register a function for unsigned extern to void const reads"); + cdiag ("or volatile access"); +} + + +void test_cdiag_identifier (tree t, gimple *gc) +{ + (void)&t; (void)&gc; + + cdiag ("private _x ident"); /* { dg-warning "unquoted identifier or keyword '_x'" } */ + cdiag ("and another __y"); /* { dg-warning "unquoted identifier or keyword '__y'" } */ + cdiag ("ident z_ with trailing underscore"); /* { dg-warning "unquoted identifier or keyword 'z_'" } */ + cdiag ("v_ variable"); /* { dg-warning "unquoted identifier or keyword 'v_'" } */ + cdiag ("call foo_bar"); /* { dg-warning "unquoted identifier or keyword 'foo_bar'" } */ + cdiag ("unqoted x_y ident"); /* { dg-warning "unquoted identifier or keyword 'x_y'" } */ + + cdiag ("size_t type"); /* { dg-warning "unquoted identifier or keyword 'size_t'" } */ + cdiag ("bigger than INT_MAX");/* { dg-warning "unquoted identifier or keyword 'INT_MAX'" } */ + + cdiag ("quoted ident %<a_b%>"); + cdiag ("another quoted identifier %<x_%> here"); +} + + +void test_cdiag_bad_words (tree t, gimple *gc) +{ + (void)&t; (void)&gc; + + cdiag ("arn't you dumb?"); /* { dg-warning "bare apostrophe ''' in format" } */ + cdiag ("you can't do that"); /* { dg-warning "contraction 'can't' in format" } */ + cdiag ("you can%'t do that");/* { dg-warning "contraction 'can%'t' in format" "FIXME" { xfail *-*-* } } */ + cdiag ("it mustn't be"); /* { dg-warning "contraction 'mustn't' in format" } */ + cdiag ("isn't that silly?"); /* { dg-warning "bare apostrophe ''' in format" } */ + + cdiag ("can not do this"); /* { dg-warning "'can not' is not a word; use 'cannot' instead" } */ + cdiag ("you can not"); /* { dg-warning "'can not' is not a word; use 'cannot' instead" } */ + + /* See PR target/90157 - aarch64: unnecessary abbreviation in diagnostic */ + cdiag ("arg 1"); /* { dg-warning "'arg' is not a word; use 'argument' instead" } */ + cdiag ("bad arg"); /* { dg-warning "'arg' is not a word; use 'argument' instead" } */ + cdiag ("two args"); /* { dg-warning "'args' is not a word; use 'arguments' instead" } */ + cdiag ("args 1 and 2"); /* { dg-warning "'args' is not a word; use 'arguments' instead" } */ + + cdiag ("reg A"); /* { dg-warning "'reg' is not a word; use 'register' instead" } */ + cdiag ("regs A and B"); /* { dg-warning "'regs' is not a word; use 'registers' instead" } */ + cdiag ("no regs"); /* { dg-warning "'regs' is not a word; use 'registers' instead" } */ +} + + +void test_cdiag_directive (tree t, gimple *gc) +{ + (void)&t; (void)&gc; + + cxxdiag ("%<%s%>", ""); /* { dg-warning "quoted '%s' directive in format" } */ + /* This was asked to be diagnosed in PR #90158 but there, the \"%s\" + is in parenheses which ends up getting diagnosed because of + the two consecutive punctuation characters, ( and ". */ + cdiag ("\"%s\"", ""); /* { dg-warning "quoted '%s' directive in format" } */ + + /* Make sure quoted paired tokens are not diagnosed. */ + cdiag ("%<'%>"); + cdiag ("%<\"%>"); + cdiag ("%<<%>"); + cdiag ("%<>%>"); + cdiag ("%<(%>"); + cdiag ("%<)%>"); + cdiag ("%<[%>"); + cdiag ("%<]%>"); + + cdiag ("%<'%> %<\"%> %<>%> %<<%> %<)%> %<(%> %<]%> %<[%>"); +}

[1/12] implement -Wformat-diag to detect quoting and spelling issues in GCC diagnostics

Commit Message

Comments

Patch