diff mbox series

c++: module directive FSM

Message ID c68cea36-5e16-a9d4-1a1b-6ad42568ed40@acm.org
State New
Headers show
Series c++: module directive FSM | expand

Commit Message

Nathan Sidwell Dec. 8, 2020, 1:07 p.m. UTC
As mentioned in	the preprocessor patches, there's a new	kind of
preprocessor directive for modules, and	it interacts with the
compiler-proper, as that has to stream in header-unit macro
information (when the directive is an import that names a
header-unit).  This is that machinery.	It's an FSM that inspects the
token stream and does the minimal parsing to detect such imports.
This ends up being called from the C++ parser's tokenizer and from the
-E tokenizer (via a lang hook).  The actual module streaming is	a stub
here.

         gcc/cp/
         * cp-tree.h (module_token_pre, module_token_cdtor)
         (module_token_lang): Declare.
         * lex.c: Include langhooks.
         (struct module_token_filter): New.
         * cp-tree.h (module_token_pre, module_token_cdtor)
         (module_token_lang): Define.
         * module.cc (get_module, preprocess_module, preprocessed_module):
         Nop stubs.
diff mbox series

Patch

diff --git i/gcc/cp/cp-tree.h w/gcc/cp/cp-tree.h
index b72069eecda..aa2b0f782fa 100644
--- i/gcc/cp/cp-tree.h
+++ w/gcc/cp/cp-tree.h
@@ -6849,6 +6849,10 @@  extern void set_identifier_kind			(tree, cp_identifier_kind);
 extern bool cxx_init				(void);
 extern void cxx_finish				(void);
 extern bool in_main_input_context		(void);
+extern uintptr_t module_token_pre (cpp_reader *, const cpp_token *, uintptr_t);
+extern uintptr_t module_token_cdtor (cpp_reader *, uintptr_t);
+extern uintptr_t module_token_lang (int type, int keyword, tree value,
+				    location_t, uintptr_t);
 
 /* in method.c */
 extern void init_method				(void);
diff --git i/gcc/cp/lex.c w/gcc/cp/lex.c
index 795f5718198..6053848535e 100644
--- i/gcc/cp/lex.c
+++ w/gcc/cp/lex.c
@@ -32,6 +32,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "c-family/c-objc.h"
 #include "gcc-rich-location.h"
 #include "cp-name-hint.h"
+#include "langhooks.h"
 
 static int interface_strcmp (const char *);
 static void init_cp_pragma (void);
@@ -380,7 +381,206 @@  interface_strcmp (const char* s)
   return 1;
 }
 
-
+/* We've just read a cpp-token, figure out our next state.  Hey, this
+   is a hand-coded co-routine!  */
+
+struct module_token_filter
+{
+  enum state
+  {
+   idle,
+   module_first,
+   module_cont,
+   module_end,
+  };
+
+  enum state state : 8;
+  bool is_import : 1;
+  bool got_export : 1;
+  bool got_colon : 1;
+  bool want_dot : 1;
+
+  location_t token_loc;
+  cpp_reader *reader;
+  module_state *module;
+  module_state *import;
+
+  module_token_filter (cpp_reader *reader)
+    : state (idle), is_import (false),
+    got_export (false), got_colon (false), want_dot (false),
+    token_loc (UNKNOWN_LOCATION),
+    reader (reader), module (NULL), import (NULL)
+  {
+  };
+
+  /* Process the next token.  Note we cannot see CPP_EOF inside a
+     pragma -- a CPP_PRAGMA_EOL always happens.  */
+  uintptr_t resume (int type, int keyword, tree value, location_t loc)
+  {
+    unsigned res = 0;
+
+    switch (state)
+      {
+      case idle:
+	if (type == CPP_KEYWORD)
+	  switch (keyword)
+	    {
+	    default:
+	      break;
+
+	    case RID__EXPORT:
+	      got_export = true;
+	      res = lang_hooks::PT_begin_pragma;
+	      break;
+
+	    case RID__IMPORT:
+	      is_import = true;
+	      /* FALLTHRU */
+	    case RID__MODULE:
+	      state = module_first;
+	      want_dot = false;
+	      got_colon = false;
+	      token_loc = loc;
+	      import = NULL;
+	      if (!got_export)
+		res = lang_hooks::PT_begin_pragma;
+	      break;
+	    }
+	break;
+
+      case module_first:
+	if (is_import && type == CPP_HEADER_NAME)
+	  {
+	    /* A header name.  The preprocessor will have already
+	       done include searching and canonicalization.  */
+	    state = module_end;
+	    goto header_unit;
+	  }
+	
+	if (type == CPP_PADDING || type == CPP_COMMENT)
+	  break;
+
+	state = module_cont;
+	if (type == CPP_COLON && module)
+	  {
+	    got_colon = true;
+	    import = module;
+	    break;
+	  }
+	/* FALLTHROUGH  */
+
+      case module_cont:
+	switch (type)
+	  {
+	  case CPP_PADDING:
+	  case CPP_COMMENT:
+	    break;
+
+	  default:
+	    /* If we ever need to pay attention to attributes for
+	       header modules, more logic will be needed.  */
+	    state = module_end;
+	    break;
+
+	  case CPP_COLON:
+	    if (got_colon)
+	      state = module_end;
+	    got_colon = true;
+	    /* FALLTHROUGH  */
+	  case CPP_DOT:
+	    if (!want_dot)
+	      state = module_end;
+	    want_dot = false;
+	    break;
+
+	  case CPP_PRAGMA_EOL:
+	    goto module_end;
+
+	  case CPP_NAME:
+	    if (want_dot)
+	      {
+		/* Got name instead of [.:].  */
+		state = module_end;
+		break;
+	      }
+	  header_unit:
+	    import = get_module (value, import, got_colon);
+	    want_dot = true;
+	    break;
+	  }
+	break;
+
+      case module_end:
+	if (type == CPP_PRAGMA_EOL)
+	  {
+	  module_end:;
+	    /* End of the directive, handle the name.  */
+	    if (import)
+	      if (module_state *m
+		  = preprocess_module (import, token_loc, module != NULL,
+				       is_import, got_export, reader))
+		if (!module)
+		  module = m;
+
+	    is_import = got_export = false;
+	    state = idle;
+	  }
+	break;
+      }
+
+    return res;
+  }
+};
+
+/* Initialize or teardown.  */
+
+uintptr_t
+module_token_cdtor (cpp_reader *pfile, uintptr_t data_)
+{
+  if (module_token_filter *filter = reinterpret_cast<module_token_filter *> (data_))
+    {
+      preprocessed_module (pfile);
+      delete filter;
+      data_ = 0;
+    }
+  else if (modules_p ())
+    data_ = reinterpret_cast<uintptr_t > (new module_token_filter (pfile));
+
+  return data_;
+}
+
+uintptr_t
+module_token_lang (int type, int keyword, tree value, location_t loc,
+		   uintptr_t data_)
+{
+  module_token_filter *filter = reinterpret_cast<module_token_filter *> (data_);
+  return filter->resume (type, keyword, value, loc);
+}
+
+uintptr_t
+module_token_pre (cpp_reader *pfile, const cpp_token *tok, uintptr_t data_)
+{
+  if (!tok)
+    return module_token_cdtor (pfile, data_);
+
+  int type = tok->type;
+  int keyword = RID_MAX;
+  tree value = NULL_TREE;
+
+  if (tok->type == CPP_NAME)
+    {
+      value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node.node));
+      if (IDENTIFIER_KEYWORD_P (value))
+	{
+	  keyword = C_RID_CODE (value);
+	  type = CPP_KEYWORD;
+	}
+    }
+  else if (tok->type == CPP_HEADER_NAME)
+    value = build_string (tok->val.str.len, (const char *)tok->val.str.text);
+
+  return module_token_lang (type, keyword, value, tok->src_loc, data_);
+}
 
 /* Parse a #pragma whose sole argument is a string constant.
    If OPT is true, the argument is optional.  */
diff --git i/gcc/cp/module.cc w/gcc/cp/module.cc
index f250d6c1819..91a16815811 100644
--- i/gcc/cp/module.cc
+++ w/gcc/cp/module.cc
@@ -64,3 +64,20 @@  along with GCC; see the file COPYING3.  If not see
 #include "intl.h"
 #include "langhooks.h"
 
+module_state *
+get_module (tree, module_state *, bool)
+{
+  return nullptr;
+}
+
+module_state *
+preprocess_module (module_state *, unsigned, bool, bool, bool, cpp_reader *)
+{
+  return nullptr;
+}
+
+void
+preprocessed_module (cpp_reader *)
+{
+}
+