From a1f459f8446370704695919b3131653300866ee9 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 25 May 2017 10:41:20 +0200 Subject: Implement parsing of C++ module declarations --- build2/buildfile | 1 + build2/cc/lexer.hxx | 2 +- build2/cc/parser.cxx | 194 +++++++++++++++++++++++++++++++++++++++++++++++++ build2/cc/parser.hxx | 60 +++++++++++++++ build2/diagnostics.cxx | 2 +- build2/diagnostics.hxx | 12 +-- build2/parser.hxx | 1 + 7 files changed, 264 insertions(+), 8 deletions(-) create mode 100644 build2/cc/parser.cxx create mode 100644 build2/cc/parser.hxx (limited to 'build2') diff --git a/build2/buildfile b/build2/buildfile index 69dfc94..3a114e2 100644 --- a/build2/buildfile +++ b/build2/buildfile @@ -58,6 +58,7 @@ exe{b}: \ cc/{hxx cxx}{ link } \ cc/{hxx cxx}{ module } \ cc/{ cxx}{ msvc } \ + cc/{hxx cxx}{ parser } \ cc/{ cxx}{ pkgconfig } \ cc/{hxx cxx}{ target } \ cc/{hxx }{ types } \ diff --git a/build2/cc/lexer.hxx b/build2/cc/lexer.hxx index 0735b45..146b579 100644 --- a/build2/cc/lexer.hxx +++ b/build2/cc/lexer.hxx @@ -141,7 +141,7 @@ namespace build2 private: const path name_; - fail_mark fail; + const fail_mark fail; }; // Diagnostics plumbing. We assume that any diag stream for which we can diff --git a/build2/cc/parser.cxx b/build2/cc/parser.cxx new file mode 100644 index 0000000..e5079b7 --- /dev/null +++ b/build2/cc/parser.cxx @@ -0,0 +1,194 @@ +// file : build2/cc/parser.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include + +using namespace std; +using namespace butl; + +namespace build2 +{ + namespace cc + { + using type = token_type; + + translation_unit parser:: + parse (istream& is, const path& name) + { + name_ = &name; + + lexer l (is, *name_); + l_ = &l; + + translation_unit u; + u_ = &u; + + // If the source has errors then we want the compiler to issues the + // diagnostics. However, the errors could as likely be because we are + // mis-parsing things. As a middle ground, we are going to issue + // warnings. + // + size_t bb (0); // {}-balance. + bool ex (false); // True if inside top-level export{} block. + + token t; + while (l_->next (t) != type::eos) + { + // Break to stop, continue to continue. + // + switch (t.type) + { + case type::lcbrace: + { + ++bb; + continue; + } + case type::rcbrace: + { + if (bb-- == 0) + break; // Imbalance. + + if (ex && bb == 0) + ex = false; // Closed top-level export{}. + + continue; + } + case type::identifier: + { + // Constructs we need to recognize (the last one is only not to + // confuse it with others). + // + // [export] import [] ; + // [export] module [] ; + // export { import [] ; } + // extern module ... + // + const string& id (t.value); + + if (bb == 0) + { + if (id == "import") + { + parse_import (t); + } + else if (id == "module") + { + parse_module (t, false); + } + else if (id == "export") + { + switch (l_->next (t)) + { + case type::lcbrace: ++bb; ex = true; break; + case type::identifier: + { + if (id == "module") + parse_module (t, true); + else if (id == "import") + parse_import (t); + + // Something else, for example, export namespace. + + break; + } + default: break; + } + } + else if (id == "extern") + l_->next (t); // Skip to make sure not recognized as module. + } + else if (ex && bb == 1) + { + if (id == "import") + { + parse_import (t); + } + } + continue; + } + default: continue; + } + + break; + } + + if (bb != 0) + warn (t) << "{}-imbalance detected"; + + return u; + } + + void parser:: + parse_import (token& t) + { + // enter: import keyword + // leave: semi + + l_->next (t); // Start of name. + string n (parse_module_name (t)); + + for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ; + + if (t.type != type::semi) + fail (t) << "';' expected instead of " << t; + + // Ignore duplicate imports. We don't expect large numbers of imports + // so vector/linear search is probably more efficient than a set. + // + auto& is (u_->module_imports); + + if (find (is.begin (), is.end (), n) == is.end ()) + is.push_back (move (n)); + } + + void parser:: + parse_module (token& t, bool ex) + { + // enter: module keyword + // leave: semi + + l_->next (t); // Start of name. + string n (parse_module_name (t)); + + for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ; + + if (t.type != type::semi) + fail (t) << "';' expected instead of " << t; + + if (!u_->module_name.empty ()) + fail (t) << "multiple module declarations"; + + u_->module_name = move (n); + u_->module_interface = ex; + } + + string parser:: + parse_module_name (token& t) + { + // enter: first token of module name + // leave: token after module name + + string n; + + // [ . ]* + // + for (;; l_->next (t)) + { + if (t.type != type::identifier) + fail (t) << "module name expected instead of " << t; + + n += t.value; + + if (l_->next (t) != type::dot) + break; + + n += '.'; + } + + return n; + } + } +} diff --git a/build2/cc/parser.hxx b/build2/cc/parser.hxx new file mode 100644 index 0000000..d52ddc9 --- /dev/null +++ b/build2/cc/parser.hxx @@ -0,0 +1,60 @@ +// file : build2/cc/parser.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef BUILD2_CC_PARSER_HXX +#define BUILD2_CC_PARSER_HXX + +#include +#include + +#include + +namespace build2 +{ + namespace cc + { + // Extract (currently module) information from a preprocessed C/C++ + // source. + // + struct translation_unit + { + string module_name; // If not empty, then a module unit. + bool module_interface; // If true, then module interface unit. + vector module_imports; // Imported modules. + }; + + struct token; + class lexer; + + class parser + { + public: + parser (): fail ("error", &name_), warn ("warning", &name_) {} + + translation_unit + parse (istream&, const path& name); + + private: + void + parse_import (token&); + + void + parse_module (token&, bool); + + string + parse_module_name (token&); + + private: + const path* name_; + + const fail_mark fail; + const basic_mark warn; + + lexer* l_; + translation_unit* u_; + }; + } +} + +#endif // BUILD2_CC_PARSER_HXX diff --git a/build2/diagnostics.cxx b/build2/diagnostics.cxx index 25339f7..7b5663a 100644 --- a/build2/diagnostics.cxx +++ b/build2/diagnostics.cxx @@ -94,7 +94,7 @@ namespace build2 const basic_mark error ("error"); const basic_mark warn ("warning"); const basic_mark info ("info"); - const basic_mark text (nullptr, nullptr); // No type/frame. + const basic_mark text (nullptr, nullptr, nullptr); // No type/data/frame. const fail_mark fail ("error"); const fail_end endf; } diff --git a/build2/diagnostics.hxx b/build2/diagnostics.hxx index 692115b..ce5f996 100644 --- a/build2/diagnostics.hxx +++ b/build2/diagnostics.hxx @@ -244,11 +244,11 @@ namespace build2 explicit basic_mark_base (const char* type, + const void* data = nullptr, diag_epilogue* epilogue = &diag_frame::apply, uint16_t (*sverb) () = &stream_verb_map, const char* mod = nullptr, - const char* name = nullptr, - const void* data = nullptr) + const char* name = nullptr) : sverb_ (sverb), type_ (type), mod_ (mod), name_ (name), data_ (data), epilogue_ (epilogue) {} @@ -300,11 +300,11 @@ namespace build2 const char* name, const void* data = nullptr) : basic_mark_base ("trace", + data, nullptr, // No diag stack. []() {return stream_verb_max;}, mod, - name, - data) {} + name) {} }; using trace_mark = butl::diag_mark; using tracer = trace_mark; @@ -317,6 +317,7 @@ namespace build2 fail_mark_base (const char* type, const void* data = nullptr) : basic_mark_base (type, + data, [](const diag_record& r) { diag_frame::apply (r); @@ -325,8 +326,7 @@ namespace build2 }, &stream_verb_map, nullptr, - nullptr, - data) {} + nullptr) {} }; using fail_mark = butl::diag_mark; diff --git a/build2/parser.hxx b/build2/parser.hxx index 85da4e5..e7afe3a 100644 --- a/build2/parser.hxx +++ b/build2/parser.hxx @@ -27,6 +27,7 @@ namespace build2 // If boot is true, then we are parsing bootstrap.build and modules // should only be bootstrapped. // + explicit parser (bool boot = false): fail ("error", &path_), boot_ (boot) {} // Issue diagnostics and throw failed in case of an error. -- cgit v1.1