From a1f459f8446370704695919b3131653300866ee9 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 25 May 2017 10:41:20 +0200 Subject: Implement parsing of C++ module declarations --- build2/buildfile | 1 + build2/cc/lexer.hxx | 2 +- build2/cc/parser.cxx | 194 +++++++++++++++++++++++++++++++++++++++ build2/cc/parser.hxx | 60 ++++++++++++ build2/diagnostics.cxx | 2 +- build2/diagnostics.hxx | 12 +-- build2/parser.hxx | 1 + unit-tests/cc/parser/buildfile | 17 ++++ unit-tests/cc/parser/driver.cxx | 69 ++++++++++++++ unit-tests/cc/parser/module.test | 147 +++++++++++++++++++++++++++++ 10 files changed, 497 insertions(+), 8 deletions(-) create mode 100644 build2/cc/parser.cxx create mode 100644 build2/cc/parser.hxx create mode 100644 unit-tests/cc/parser/buildfile create mode 100644 unit-tests/cc/parser/driver.cxx create mode 100644 unit-tests/cc/parser/module.test diff --git a/build2/buildfile b/build2/buildfile index 69dfc94..3a114e2 100644 --- a/build2/buildfile +++ b/build2/buildfile @@ -58,6 +58,7 @@ exe{b}: \ cc/{hxx cxx}{ link } \ cc/{hxx cxx}{ module } \ cc/{ cxx}{ msvc } \ + cc/{hxx cxx}{ parser } \ cc/{ cxx}{ pkgconfig } \ cc/{hxx cxx}{ target } \ cc/{hxx }{ types } \ diff --git a/build2/cc/lexer.hxx b/build2/cc/lexer.hxx index 0735b45..146b579 100644 --- a/build2/cc/lexer.hxx +++ b/build2/cc/lexer.hxx @@ -141,7 +141,7 @@ namespace build2 private: const path name_; - fail_mark fail; + const fail_mark fail; }; // Diagnostics plumbing. We assume that any diag stream for which we can diff --git a/build2/cc/parser.cxx b/build2/cc/parser.cxx new file mode 100644 index 0000000..e5079b7 --- /dev/null +++ b/build2/cc/parser.cxx @@ -0,0 +1,194 @@ +// file : build2/cc/parser.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include + +using namespace std; +using namespace butl; + +namespace build2 +{ + namespace cc + { + using type = token_type; + + translation_unit parser:: + parse (istream& is, const path& name) + { + name_ = &name; + + lexer l (is, *name_); + l_ = &l; + + translation_unit u; + u_ = &u; + + // If the source has errors then we want the compiler to issues the + // diagnostics. However, the errors could as likely be because we are + // mis-parsing things. As a middle ground, we are going to issue + // warnings. + // + size_t bb (0); // {}-balance. + bool ex (false); // True if inside top-level export{} block. + + token t; + while (l_->next (t) != type::eos) + { + // Break to stop, continue to continue. + // + switch (t.type) + { + case type::lcbrace: + { + ++bb; + continue; + } + case type::rcbrace: + { + if (bb-- == 0) + break; // Imbalance. + + if (ex && bb == 0) + ex = false; // Closed top-level export{}. + + continue; + } + case type::identifier: + { + // Constructs we need to recognize (the last one is only not to + // confuse it with others). + // + // [export] import [] ; + // [export] module [] ; + // export { import [] ; } + // extern module ... + // + const string& id (t.value); + + if (bb == 0) + { + if (id == "import") + { + parse_import (t); + } + else if (id == "module") + { + parse_module (t, false); + } + else if (id == "export") + { + switch (l_->next (t)) + { + case type::lcbrace: ++bb; ex = true; break; + case type::identifier: + { + if (id == "module") + parse_module (t, true); + else if (id == "import") + parse_import (t); + + // Something else, for example, export namespace. + + break; + } + default: break; + } + } + else if (id == "extern") + l_->next (t); // Skip to make sure not recognized as module. + } + else if (ex && bb == 1) + { + if (id == "import") + { + parse_import (t); + } + } + continue; + } + default: continue; + } + + break; + } + + if (bb != 0) + warn (t) << "{}-imbalance detected"; + + return u; + } + + void parser:: + parse_import (token& t) + { + // enter: import keyword + // leave: semi + + l_->next (t); // Start of name. + string n (parse_module_name (t)); + + for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ; + + if (t.type != type::semi) + fail (t) << "';' expected instead of " << t; + + // Ignore duplicate imports. We don't expect large numbers of imports + // so vector/linear search is probably more efficient than a set. + // + auto& is (u_->module_imports); + + if (find (is.begin (), is.end (), n) == is.end ()) + is.push_back (move (n)); + } + + void parser:: + parse_module (token& t, bool ex) + { + // enter: module keyword + // leave: semi + + l_->next (t); // Start of name. + string n (parse_module_name (t)); + + for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ; + + if (t.type != type::semi) + fail (t) << "';' expected instead of " << t; + + if (!u_->module_name.empty ()) + fail (t) << "multiple module declarations"; + + u_->module_name = move (n); + u_->module_interface = ex; + } + + string parser:: + parse_module_name (token& t) + { + // enter: first token of module name + // leave: token after module name + + string n; + + // [ . ]* + // + for (;; l_->next (t)) + { + if (t.type != type::identifier) + fail (t) << "module name expected instead of " << t; + + n += t.value; + + if (l_->next (t) != type::dot) + break; + + n += '.'; + } + + return n; + } + } +} diff --git a/build2/cc/parser.hxx b/build2/cc/parser.hxx new file mode 100644 index 0000000..d52ddc9 --- /dev/null +++ b/build2/cc/parser.hxx @@ -0,0 +1,60 @@ +// file : build2/cc/parser.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef BUILD2_CC_PARSER_HXX +#define BUILD2_CC_PARSER_HXX + +#include +#include + +#include + +namespace build2 +{ + namespace cc + { + // Extract (currently module) information from a preprocessed C/C++ + // source. + // + struct translation_unit + { + string module_name; // If not empty, then a module unit. + bool module_interface; // If true, then module interface unit. + vector module_imports; // Imported modules. + }; + + struct token; + class lexer; + + class parser + { + public: + parser (): fail ("error", &name_), warn ("warning", &name_) {} + + translation_unit + parse (istream&, const path& name); + + private: + void + parse_import (token&); + + void + parse_module (token&, bool); + + string + parse_module_name (token&); + + private: + const path* name_; + + const fail_mark fail; + const basic_mark warn; + + lexer* l_; + translation_unit* u_; + }; + } +} + +#endif // BUILD2_CC_PARSER_HXX diff --git a/build2/diagnostics.cxx b/build2/diagnostics.cxx index 25339f7..7b5663a 100644 --- a/build2/diagnostics.cxx +++ b/build2/diagnostics.cxx @@ -94,7 +94,7 @@ namespace build2 const basic_mark error ("error"); const basic_mark warn ("warning"); const basic_mark info ("info"); - const basic_mark text (nullptr, nullptr); // No type/frame. + const basic_mark text (nullptr, nullptr, nullptr); // No type/data/frame. const fail_mark fail ("error"); const fail_end endf; } diff --git a/build2/diagnostics.hxx b/build2/diagnostics.hxx index 692115b..ce5f996 100644 --- a/build2/diagnostics.hxx +++ b/build2/diagnostics.hxx @@ -244,11 +244,11 @@ namespace build2 explicit basic_mark_base (const char* type, + const void* data = nullptr, diag_epilogue* epilogue = &diag_frame::apply, uint16_t (*sverb) () = &stream_verb_map, const char* mod = nullptr, - const char* name = nullptr, - const void* data = nullptr) + const char* name = nullptr) : sverb_ (sverb), type_ (type), mod_ (mod), name_ (name), data_ (data), epilogue_ (epilogue) {} @@ -300,11 +300,11 @@ namespace build2 const char* name, const void* data = nullptr) : basic_mark_base ("trace", + data, nullptr, // No diag stack. []() {return stream_verb_max;}, mod, - name, - data) {} + name) {} }; using trace_mark = butl::diag_mark; using tracer = trace_mark; @@ -317,6 +317,7 @@ namespace build2 fail_mark_base (const char* type, const void* data = nullptr) : basic_mark_base (type, + data, [](const diag_record& r) { diag_frame::apply (r); @@ -325,8 +326,7 @@ namespace build2 }, &stream_verb_map, nullptr, - nullptr, - data) {} + nullptr) {} }; using fail_mark = butl::diag_mark; diff --git a/build2/parser.hxx b/build2/parser.hxx index 85da4e5..e7afe3a 100644 --- a/build2/parser.hxx +++ b/build2/parser.hxx @@ -27,6 +27,7 @@ namespace build2 // If boot is true, then we are parsing bootstrap.build and modules // should only be bootstrapped. // + explicit parser (bool boot = false): fail ("error", &path_), boot_ (boot) {} // Issue diagnostics and throw failed in case of an error. diff --git a/unit-tests/cc/parser/buildfile b/unit-tests/cc/parser/buildfile new file mode 100644 index 0000000..5d20367 --- /dev/null +++ b/unit-tests/cc/parser/buildfile @@ -0,0 +1,17 @@ +# file : unit-tests/cc/parser/buildfile +# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +#@@ Temporary until we get utility library support. +# +import libs = libbutl%lib{butl} +src = cc/{lexer parser} token lexer diagnostics utility variable name b-options types-parsers \ +context scope parser target operation rule prerequisite file module function \ +functions-builtin functions-path functions-process-path functions-string \ +functions-target-triplet algorithm search dump filesystem scheduler \ +config/{utility init operation module} spec + +exe{driver}: cxx{driver} ../../../build2/cxx{$src} ../../../build2/liba{b} \ +$libs test{*} + +include ../../../build2/ diff --git a/unit-tests/cc/parser/driver.cxx b/unit-tests/cc/parser/driver.cxx new file mode 100644 index 0000000..cdddaca --- /dev/null +++ b/unit-tests/cc/parser/driver.cxx @@ -0,0 +1,69 @@ +// file : unit-tests/cc/parser/driver.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include +#include + +#include +#include + +#include + +using namespace std; + +namespace build2 +{ + namespace cc + { + // Usage: argv[0] [] + // + int + main (int argc, char* argv[]) + { + try + { + istream* is; + const char* in; + + // Reading from file is several times faster. + // + ifdstream ifs; + if (argc > 1) + { + in = argv[1]; + ifs.open (in); + is = &ifs; + } + else + { + in = "stdin"; + cin.exceptions (istream::failbit | istream::badbit); + is = &cin; + } + + parser p; + translation_unit u (p.parse (*is, path (in))); + + for (const string& n: u.module_imports) + cout << "import " << n << ';' << endl; + + if (!u.module_name.empty ()) + cout << (u.module_interface ? "export " : "") + << "module " << u.module_name << ';' << endl; + } + catch (const failed&) + { + return 1; + } + + return 0; + } + } +} + +int +main (int argc, char* argv[]) +{ + return build2::cc::main (argc, argv); +} diff --git a/unit-tests/cc/parser/module.test b/unit-tests/cc/parser/module.test new file mode 100644 index 0000000..f85c969 --- /dev/null +++ b/unit-tests/cc/parser/module.test @@ -0,0 +1,147 @@ +# file : unit-tests/cc/parser/module.test +# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +# Test C++ module constructs. +# + +: import +: +$* <>EOI +import foo; +import foo.bar; +import foo.bar.baz; +EOI + +: module-implementation +: +$* <>EOI +module foo; +EOI + +: module-interface +: +$* <>EOI +export module foo; +EOI + +: export-imported +: +$* <>EOO +export import foo; +EOI +import foo; +EOO + +: export-imported-block +: +$* <>EOO +export {import foo;} + +export +{ + namespace foo + { + class c {}; + } + + template int f (); + + import bar; +} +EOI +import foo; +import bar; +EOO + +: non-module +: +$* <>EOO +import foo [[export({import})]]; +module bar [[module({module})]]; +EOI +import foo; +module bar; +EOO + +: import-duplicate +: +$* <>EOO +import foo; +import bar.baz; +import foo; +import bar . baz; +EOI +import foo; +import bar.baz; +EOO + +: brace-missing +: +$* <>EOE +export +{ + class foo + { + //}; + module foo; +} +EOI +stdin:8:1: warning: {}-imbalance detected +EOE + +: brace-stray +: +$* <>EOE +export +{ + class foo + { + };} +} +module foo; +EOI +stdin:6:1: warning: {}-imbalance detected +EOE + +: import-missing-name +: +$* <>EOE != 0 +import ; +EOI +stdin:1:8: error: module name expected instead of ';' +EOE + +: module-missing-name +: +$* <>EOE != 0 +module ; +EOI +stdin:1:8: error: module name expected instead of ';' +EOE + +: import-missing-semi +: +$* <>EOE != 0 +import foo +EOI +stdin:2:1: error: ';' expected instead of +EOE + +: module-missing-semi +: +$* <>EOE != 0 +export module foo +EOI +stdin:2:1: error: ';' expected instead of +EOE -- cgit v1.1