diff mbox

[c] better recovery from missing semicolon after declaration

Message ID 20101117152929.GX24469@nightcrawler
State New
Headers show

Commit Message

Nathan Froyd Nov. 17, 2010, 3:29 p.m. UTC
I was editing cp-tree.h and forgot a semicolon after a function
declaration, like so:

  extern void f (tree)
  extern void g (tree);

Compiling the above gave me several tens of pages of errors, none of
which had anything to do with the actual error.

This patch attempts to fix that by detecting when we've just parsed a
declaration and we're faced with what looks like the beginning of the
next declaration.  We cannot use c_parser_next_token_starts_declspecs,
as we need to cope with things like (from
c-torture/compile/20000403-1.c):

  int uname (name) struct utsname *name;

We do, however, attempt to detect when we're not parsing an old-style
declaration and rightly complain for:

  extern void g1 (int)
  struct s { int a; };

It would be better if we could give the location of the previous token,
but the C parser is not really set up to do that.

Tested on x86_64-unknown-linux-gnu.  OK to commit?

-Nathan

gcc/
	* c-parser.c (c_parser_next_token_cannot_follow_declaration):
	New function.
	(c_parser_declaration_or_fndef): Intelligently handle invalid
	tokens after a declaration.

gcc/testsuite/
	* gcc.dg/semicolon-errors.c: New test.

Comments

Joseph Myers Nov. 19, 2010, 5:53 p.m. UTC | #1
On Wed, 17 Nov 2010, Nathan Froyd wrote:

> +/* Return true if the next token from PARSER cannot follow a
> +   declaration, false otherwise.  */
> +static inline bool
> +c_parser_next_token_cannot_follow_declaration (c_parser *parser,
> +					       struct c_declarator *declarator)

The point of this function seems not to be that the token cannot follow a 
declaration - there are a great many other tokens that cannot follow a 
declaration - but that it cannot follow *and* looks like the start of the 
next declaration.

> +	case RID_STRUCT:
> +	case RID_UNION:
> +	case RID_UNSIGNED:
> +	case RID_LONG:
> +	case RID_INT128:
> +	case RID_SHORT:
> +	case RID_SIGNED:
> +	case RID_COMPLEX:
> +	case RID_INT:
> +	case RID_CHAR:
> +	case RID_FLOAT:
> +	case RID_DOUBLE:
> +	case RID_VOID:
> +	case RID_DFLOAT32:
> +	case RID_DFLOAT64:
> +	case RID_DFLOAT128:
> +	case RID_BOOL:
> +	case RID_ENUM:

What about RID_CONST, RID_VOLATILE, RID_RESTRICT, RID_FRACT, RID_ACCUM, 
RID_SAT, RID_TYPEOF, all of which appear to be in a similar situation?  
Where did this list come from?

> +	  /* We need to be careful, since K&R argument lists can begin
> +	     with these tokens.  Consult the types of the argument info;
> +	     if we actually have a type, then these tokens are invalid.
> +	     Don't bother grovelling through the whole list.  */
> +	  if (declarator->kind == cdk_pointer)
> +	    declarator = declarator->declarator;
> +	  if (declarator->kind == cdk_function)
> +	    return TYPE_P (TREE_VALUE (declarator->u.arg_info->types));
> +	  else
> +	    return true;

This is not correct.  You need to recurse all the way to see if the 
innermost declarator that isn't cdk_id or cdk_attrs is cdk_function, and 
look at that declarator if so, to find the type of the function being 
declared.  This patch breaks testcases such as:

int **f(a) int a; { return 0; }
diff mbox

Patch

diff --git a/gcc/c-parser.c b/gcc/c-parser.c
index 577528d..92d0efc 100644
--- a/gcc/c-parser.c
+++ b/gcc/c-parser.c
@@ -592,6 +592,64 @@  c_token_starts_declaration (c_token *token)
 
 static c_token *c_parser_peek_2nd_token (c_parser *parser);
 
+/* Return true if the next token from PARSER cannot follow a
+   declaration, false otherwise.  */
+static inline bool
+c_parser_next_token_cannot_follow_declaration (c_parser *parser,
+					       struct c_declarator *declarator)
+{
+  c_token *token = c_parser_peek_token (parser);
+
+  switch (token->type)
+    {
+    case CPP_KEYWORD:
+      switch (token->keyword)
+	{
+	case RID_EXTERN:
+	case RID_STATIC:
+	case RID_TYPEDEF:
+	case RID_INLINE:
+	case RID_AUTO:
+	case RID_THREAD:
+	  /* Can never follow.  */
+	  return true;
+	case RID_STRUCT:
+	case RID_UNION:
+	case RID_UNSIGNED:
+	case RID_LONG:
+	case RID_INT128:
+	case RID_SHORT:
+	case RID_SIGNED:
+	case RID_COMPLEX:
+	case RID_INT:
+	case RID_CHAR:
+	case RID_FLOAT:
+	case RID_DOUBLE:
+	case RID_VOID:
+	case RID_DFLOAT32:
+	case RID_DFLOAT64:
+	case RID_DFLOAT128:
+	case RID_BOOL:
+	case RID_ENUM:
+	  /* We need to be careful, since K&R argument lists can begin
+	     with these tokens.  Consult the types of the argument info;
+	     if we actually have a type, then these tokens are invalid.
+	     Don't bother grovelling through the whole list.  */
+	  if (declarator->kind == cdk_pointer)
+	    declarator = declarator->declarator;
+	  if (declarator->kind == cdk_function)
+	    return TYPE_P (TREE_VALUE (declarator->u.arg_info->types));
+	  else
+	    return true;
+	default:
+	  return false;
+	}
+      break;
+    default:
+      return false;
+    }
+}
+
 /* Return true if the next token from PARSER can start declaration
    specifiers, false otherwise.  */
 static inline bool
@@ -1601,6 +1659,13 @@  c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok,
 	      return;
 	    }
 	}
+      else if (c_parser_next_token_cannot_follow_declaration (parser,
+							      declarator))
+	{
+	  start_decl (declarator, specs, false, all_prefix_attrs);
+	  c_parser_error (parser, "expected %<;%>");
+	  return;
+	}
       else if (!fndef_ok)
 	{
 	  c_parser_error (parser, "expected %<=%>, %<,%>, %<;%>, "
diff --git a/gcc/testsuite/gcc.dg/semicolon-errors.c b/gcc/testsuite/gcc.dg/semicolon-errors.c
new file mode 100644
index 0000000..e579313
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/semicolon-errors.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-std=gnu89" } // suppress default -pedantic-errors */
+
+extern void f1 (int)
+extern void f2 (int);		/* { dg-error "expected" } */
+
+extern int x
+extern int z;			/* { dg-error "expected" } */
+
+extern void g1 (int)
+struct s { int a; };		/* { dg-error "expected" } */
+
+typedef unsigned int uint32
+extern int i;			/* { dg-error "expected" } */