Patchwork Better error reporting for missing semicolons after a struct definition

login
register
mail settings
Submitter Paolo Bonzini
Date Oct. 29, 2010, 3:50 p.m.
Message ID <1288367432-3422-2-git-send-email-bonzini@gnu.org>
Download mbox | patch
Permalink /patch/69606/
State New
Headers show

Comments

Paolo Bonzini - Oct. 29, 2010, 3:50 p.m.
Currently, when a struct, enum or union declaration lacks a terminating
semicolon, GCC usually goes on parsing what follows as a declspec and
prints the infamous "two or more data types in declaration specifiers"
error message.  This patch series instead makes GCC treat the declaration
separately if it includes a tagged type definition, and print a clearer
"expected ';', identifier or '('" error message.

An additional benefit is that when presented with something like

   struct f {}
   int a;

The variables will be given type "int" in the compiler instead of
"struct f", thus removing further cascading errors.

It should be possible to do something like this for C++ too, but I'm
not attempting it right now.  This patch depends on the -Wc++-compat
patch I posted because it also uses the typespec_kind field of struct
c_declspecs.

Bootstrapped and regtested x86_64-pc-linux-gnu, okay for mainline?

Paolo

2010-10-28  Paolo Bonzini  <bonzini@gnu.org>

        * c-parser.c (c_token_is_qualifier,
        c_parser_next_token_is_qualifier): New.
        (c_parser_declaration_or_fndef, c_parser_struct_declaration):
        Improve error message on specs->tagdef_seen_p.
        (c_parser_struct_or_union_specifier): Improve error recovery.
        (c_parser_declspecs): Move exit condition on C_ID_ID early.
        Reorganize exit condition for C_ID_TYPENAME/C_ID_CLASSNAME
        using c_parser_next_token_is_qualifier, and extend it to
        cover !typespec_ok in general, and also specs->tagdef_seen_p.



/* { dg-do compile } */
/* { dg-options "-std=gnu89" } // suppress default -pedantic-errors */

typedef int x, y;
x y z;			/* { dg-error "" "" } */


/* { dg-do compile } */
/* { dg-options "-std=gnu89" } // suppress default -pedantic-errors */

typedef int x, y;
x struct f z; /* { dg-error "two types " "" } */


/* { dg-do compile } */
/* { dg-options "-std=gnu89" } // suppress default -pedantic-errors */

struct f {
}
int z(); /* { dg-error "expected ';', identifier or " ""  { target *-*-* }  } */


/* { dg-do compile } */
/* { dg-options "-std=gnu89" } // suppress default -pedantic-errors */

int f()
{
  struct f {
  }
  int z; /* { dg-error "expected ';', identifier or " "" } */
}


/* { dg-do compile } */
/* { dg-options "-std=gnu89" } // suppress default -pedantic-errors */

struct f {}
struct g {} /* { dg-error "expected ';', identifier or " "" } */
int f(); /* { dg-error "expected ';', identifier or " "" } */


/* { dg-do compile } */
/* { dg-options "-std=gnu89" } // suppress default -pedantic-errors */

struct s {
  struct f {} /* dg-warning "does not declare anything" "" } */
  struct g {} x; /* { dg-error "expected ';', identifier or " "" } */
};


/* { dg-do compile } */
/* { dg-options "-std=gnu89" } // suppress default -pedantic-errors */

struct s {
  struct f {}
  enum a { X } /* { dg-error "expected ';', identifier or " "" } */
  struct g {} /* { dg-error "expected identifier " "" } */
}; /* { dg-warning "no semicolon" "" } */


/* { dg-do compile } */
/* { dg-options "-std=gnu89" } // suppress default -pedantic-errors */

enum x { XYZ }
struct g { enum x a; }; /* { dg-error "expected ';', identifier or " "" } */

int f(struct g *x)
{
  return x->a == XYZ; /* { dg-bogus " has no member " "" } */
}


/* { dg-do compile } */
/* { dg-options "-std=gnu89" } // suppress default -pedantic-errors */

struct f {}
static int a, b; /* { dg-error "expected ';', identifier or " "" } */

int f()
{
	return a - b; /* { dg-bogus "invalid operands " "" } */
}


/* { dg-do compile } */
/* { dg-options "-std=gnu89" } // suppress default -pedantic-errors */

typedef enum a { XYZ } a; /* { dg-message "previous declaration" } */
enum a a;       /* { dg-error "redeclared" } */
struct b { enum a a : 8; };


/* { dg-do compile } */
/* { dg-options "-std=gnu89" } // suppress default -pedantic-errors */

@interface foo
struct f {}
struct g { int a; }; /* { dg-error "expected ';', identifier or " "" } */

- (struct f *) a;
- (struct g *) b;
@end

int f(struct g *x)
{
  return x->a; /* { dg-bogus " has no member " "" } */
}
Joseph S. Myers - Nov. 5, 2010, 6:04 p.m.
On Fri, 29 Oct 2010, Paolo Bonzini wrote:

> @@ -2400,6 +2469,21 @@ c_parser_struct_declaration (c_parser *p
>  	}
>        return ret;
>      }
> +
> +  /* Provide better error recovery.  Note that a type name here is valid,
> +     and will be treated as a field name.  */
> +  if (specs->typespec_kind == ctsk_tagdef
> +      && TREE_CODE (specs->type) != ENUMERAL_TYPE
> +      && c_parser_next_token_starts_declspecs (parser)
> +      && !c_parser_next_token_is (parser, CPP_NAME))
> +    {
> +      c_parser_error (parser, "expected %<;%>, identifier or %<(%>");
> +      parser->error = false;
> +      return NULL_TREE;
> +    }
> +  if (c_parser_next_token_is (parser, CPP_CLOSE_BRACE))
> +    return NULL_TREE;

I don't follow why you are returning NULL_TREE here (or why it is OK to do 
so).  It appears to me that it will quietly return NULL_TREE if you have 
declaration specifiers with no following declarator or semicolon.

Consider the following testcase.

struct s { struct { int a; } };
int *f(struct s *p) { return &p->a; }

Your patch doesn't apply cleanly to the sources I have, either because of 
subsequent changes or because of missing prerequisites, but it appears to 
me that with your patch the parser will accept the struct s declaration 
(diagnosing the missing semicolon at end of structure with a pedwarn) but 
not actually do anything with the anonymous structure element because the 
code above only handles anonymous structs with following semicolons.  The 
current compiler gives parse errors on this code.  The *correct* behavior 
I think is neither - it should be accepted with the anonymous structure 
processed (so the check above should allow for CPP_CLOSE_BRACE as well as 
CPP_SEMICOLON), like 4.0 and earlier did with the old parser.

Otherwise the changes in this patch look correct.
Paolo Bonzini - Nov. 8, 2010, 10:11 a.m.
On 11/05/2010 07:04 PM, Joseph S. Myers wrote:
> struct s { struct { int a; } };
> int *f(struct s *p) { return &p->a; }
>
> The *correct*  behavior I think is neither - it should be accepted
> with the anonymous structure processed (so the check above should
> allow for CPP_CLOSE_BRACE as well as CPP_SEMICOLON), like 4.0 and
> earlier did with the old parser.

I agree, I'll take a look at this testcase.

Thanks for the review!

Paolo

Patch

Index: gcc/c-parser.c
===================================================================
--- gcc/c-parser.c	(branch diag-2)
+++ gcc/c-parser.c	(working copy)
@@ -506,6 +506,47 @@  c_parser_next_token_starts_typename (c_p
   return c_token_starts_typename (token);
 }
 
+/* Return true if TOKEN is a type qualifier, false otherwise.  */
+static bool
+c_token_is_qualifier (c_token *token)
+{
+  switch (token->type)
+    {
+    case CPP_NAME:
+      switch (token->id_kind)
+	{
+	case C_ID_ADDRSPACE:
+	  return true;
+	default:
+	  return false;
+	}
+    case CPP_KEYWORD:
+      switch (token->keyword)
+	{
+	case RID_CONST:
+	case RID_VOLATILE:
+	case RID_RESTRICT:
+	case RID_ATTRIBUTE:
+	  return true;
+	default:
+	  return false;
+	}
+    case CPP_LESS:
+      return false;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return true if the next token from PARSER is a type qualifier,
+   false otherwise.  */
+static inline bool
+c_parser_next_token_is_qualifier (c_parser *parser)
+{
+  c_token *token = c_parser_peek_token (parser);
+  return c_token_is_qualifier (token);
+}
+
 /* Return true if TOKEN can start declaration specifiers, false
    otherwise.  */
 static bool
@@ -1349,6 +1390,19 @@  c_parser_declaration_or_fndef (c_parser 
       c_parser_consume_token (parser);
       return;
     }
+
+  /* Provide better error recovery.  Note that a type name here is usually
+     better diagnosed as a redeclaration.  */
+  if (empty_ok
+      && specs->typespec_kind == ctsk_tagdef
+      && c_parser_next_token_starts_declspecs (parser)
+      && !c_parser_next_token_is (parser, CPP_NAME))
+    {
+      c_parser_error (parser, "expected %<;%>, identifier or %<(%>");
+      parser->error = false;
+      shadow_tag_warned (specs, 1);
+      return;
+    }
   else if (c_dialect_objc ())
     {
       /* Prefix attributes are an error on method decls.  */
@@ -1812,13 +1866,28 @@  c_parser_declspecs (c_parser *parser, st
 {
   bool attrs_ok = start_attr_ok;
   bool seen_type = (specs->typespec_kind != ctsk_none);
-  while (c_parser_next_token_is (parser, CPP_NAME)
+  while ((c_parser_next_token_is (parser, CPP_NAME)
+	  && c_parser_peek_token (parser)->id_kind != C_ID_ID)
 	 || c_parser_next_token_is (parser, CPP_KEYWORD)
 	 || (c_dialect_objc () && c_parser_next_token_is (parser, CPP_LESS)))
     {
       struct c_typespec t;
       tree attrs;
       location_t loc = c_parser_peek_token (parser)->location;
+
+      /* If we already have seen a tagged definition, or cannot accept a
+	 type, and the next token must start a new type, exit.  Exit for
+	 TYPENAMEs after any type because they can appear as a field name.  */
+      if (!c_parser_next_token_is_qualifier (parser))
+        {
+          if (seen_type && c_parser_next_token_is (parser, CPP_NAME))
+            break;
+
+          if ((!typespec_ok || specs->typespec_kind == ctsk_tagdef)
+	      && c_parser_next_token_starts_typename (parser))
+            break;
+        }
+
       if (c_parser_next_token_is (parser, CPP_NAME))
 	{
 	  tree value = c_parser_peek_token (parser)->value;
@@ -1834,12 +1903,7 @@  c_parser_declspecs (c_parser *parser, st
 	      continue;
 	    }
 
-	  /* This finishes the specifiers unless a type name is OK, it
-	     is declared as a type name and a type name hasn't yet
-	     been seen.  */
-	  if (!typespec_ok || seen_type
-	      || (kind != C_ID_TYPENAME && kind != C_ID_CLASSNAME))
-	    break;
+	  /* Now at a C_ID_TYPENAME or C_ID_CLASSNAME.  */
 	  c_parser_consume_token (parser);
 	  seen_type = true;
 	  attrs_ok = true;
@@ -1857,7 +1921,7 @@  c_parser_declspecs (c_parser *parser, st
 	  else
 	    {
 	      tree proto = NULL_TREE;
-	      gcc_assert (c_dialect_objc ());
+	      gcc_assert (kind == C_ID_CLASSNAME && c_dialect_objc ());
 	      t.kind = ctsk_objc;
 	      if (c_parser_next_token_is (parser, CPP_LESS))
 		proto = c_parser_objc_protocol_refs (parser);
@@ -2280,12 +2344,17 @@  c_parser_struct_or_union_specifier (c_pa
 	      if (c_parser_next_token_is (parser, CPP_CLOSE_BRACE))
 		pedwarn (c_parser_peek_token (parser)->location, 0,
 			 "no semicolon at end of struct or union");
-	      else
+	      else if (parser->error
+		       || !c_parser_next_token_starts_declspecs (parser))
 		{
 		  c_parser_error (parser, "expected %<;%>");
 		  c_parser_skip_until_found (parser, CPP_CLOSE_BRACE, NULL);
 		  break;
 		}
+
+	      /* If we come here, we have already emitted an error
+		 for an expected `;', identifier or `(', and we also
+	         recovered already.  Go on with the next field. */
 	    }
 	}
       postfix_attrs = c_parser_attributes (parser);
@@ -2400,6 +2469,21 @@  c_parser_struct_declaration (c_parser *p
 	}
       return ret;
     }
+
+  /* Provide better error recovery.  Note that a type name here is valid,
+     and will be treated as a field name.  */
+  if (specs->typespec_kind == ctsk_tagdef
+      && TREE_CODE (specs->type) != ENUMERAL_TYPE
+      && c_parser_next_token_starts_declspecs (parser)
+      && !c_parser_next_token_is (parser, CPP_NAME))
+    {
+      c_parser_error (parser, "expected %<;%>, identifier or %<(%>");
+      parser->error = false;
+      return NULL_TREE;
+    }
+  if (c_parser_next_token_is (parser, CPP_CLOSE_BRACE))
+    return NULL_TREE;
+
   pending_xref_error ();
   prefix_attrs = specs->attrs;
   all_prefix_attrs = prefix_attrs;