From 4bdf53837e010073de802070d4e6087410662d3e Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Sat, 24 Aug 2019 17:41:30 +0300 Subject: Move cc build system module to separate library --- libbuild2/cc/parser.cxx | 263 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 263 insertions(+) create mode 100644 libbuild2/cc/parser.cxx (limited to 'libbuild2/cc/parser.cxx') diff --git a/libbuild2/cc/parser.cxx b/libbuild2/cc/parser.cxx new file mode 100644 index 0000000..179043e --- /dev/null +++ b/libbuild2/cc/parser.cxx @@ -0,0 +1,263 @@ +// file : libbuild2/cc/parser.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include + +using namespace std; +using namespace butl; + +namespace build2 +{ + namespace cc + { + using type = token_type; + + unit parser:: + parse (ifdstream& is, const path& name) + { + lexer l (is, name); + l_ = &l; + + unit u; + u_ = &u; + + // If the source has errors then we want the compiler to issues the + // diagnostics. However, the errors could as likely be because we are + // mis-parsing things. Initially, as a middle ground, we were going to + // issue warnings. But the problem with this approach is that they are + // easy to miss. So for now we fail. And it turns out we don't mis- + // parse much. + // + size_t bb (0); // {}-balance. + + token t; + for (bool n (true); (n ? l_->next (t) : t.type) != type::eos; ) + { + // Break to stop, continue to continue, set n to false if the + // next token already extracted. + // + n = true; + + switch (t.type) + { + case type::lcbrace: + { + ++bb; + continue; + } + case type::rcbrace: + { + if (bb-- == 0) + break; // Imbalance. + + continue; + } + case type::identifier: + { + // Constructs we need to recognize: + // + // module ; + // [export] import [] ; + // [export] import [] ; + // [export] module [] ; + // + // Additionally, when include is translated to an import, it's + // normally replaced with the special __import keyword since it + // may appear in C context. + // + const string& id (t.value); + + if (bb == 0) + { + if (id == "import" || id == "__import") + { + parse_import (t, false); + } + else if (id == "module") + { + parse_module (t, false); + } + else if (id == "export") + { + if (l_->next (t) == type::identifier) + { + if (id == "module") parse_module (t, true); + else if (id == "import") parse_import (t, true); + else n = false; // Something else (e.g., export namespace). + } + else + n = false; + } + } + continue; + } + default: continue; + } + + break; + } + + if (bb != 0) + /*warn*/ fail (t) << "{}-imbalance detected"; + + if (module_marker_ && u.module_info.name.empty ()) + fail (*module_marker_) << "module declaration expected after " + << "leading module marker"; + + checksum = l.checksum (); + return u; + } + + void parser:: + parse_import (token& t, bool ex) + { + // enter: import keyword + // leave: semi + + string un; + unit_type ut; + switch (l_->next (t)) // Start of module/header name. + { + case type::less: + case type::string: + { + un = parse_header_name (t); + ut = unit_type::module_header; + break; + } + case type::identifier: + { + un = parse_module_name (t); + ut = unit_type::module_iface; + break; + } + default: + fail (t) << "module or header name expected instead of " << t << endf; + } + + // Should be {}-balanced. + // + for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ; + + if (t.type != type::semi) + fail (t) << "';' expected instead of " << t; + + // For now we skip header units (see a comment on module type/info + // string serialization in compile rule for details). Note that + // currently parse_header_name() always returns empty name. + // + if (ut == unit_type::module_header) + return; + + // Ignore duplicates. We don't expect a large numbers of (direct) + // imports so vector/linear search is probably more efficient than a + // set. + // + auto& is (u_->module_info.imports); + + auto i (find_if (is.begin (), is.end (), + [&un] (const module_import& i) + { + return i.name == un; + })); + + if (i == is.end ()) + is.push_back (module_import {ut, move (un), ex, 0}); + else + i->exported = i->exported || ex; + } + + void parser:: + parse_module (token& t, bool ex) + { + // enter: module keyword + // leave: semi + + location l (get_location (t)); + + l_->next (t); + + // Handle the leading 'module;' marker (p0713). + // + // Note that we don't bother diagnosing invalid/duplicate markers + // leaving that to the compiler. + // + if (!ex && t.type == type::semi) + { + module_marker_ = move (l); + return; + } + + // Otherwise it should be the start of the module name. + // + string n (parse_module_name (t)); + + // Should be {}-balanced. + // + for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ; + + if (t.type != type::semi) + fail (t) << "';' expected instead of " << t; + + if (!u_->module_info.name.empty ()) + fail (l) << "multiple module declarations"; + + u_->type = ex ? unit_type::module_iface : unit_type::module_impl; + u_->module_info.name = move (n); + } + + string parser:: + parse_module_name (token& t) + { + // enter: first token of module name + // leave: token after module name + + string n; + + // [ . ]* + // + for (;; l_->next (t)) + { + if (t.type != type::identifier) + fail (t) << "module name expected instead of " << t; + + n += t.value; + + if (l_->next (t) != type::dot) + break; + + n += '.'; + } + + return n; + } + + string parser:: + parse_header_name (token& t) + { + // enter: first token of module name, either string or less + // leave: token after module name + + string n; + + // NOTE: actual name is a TODO if/when we need it. + // + if (t.type == type::string) + /*n = move (t.value)*/; + else + { + while (l_->next (t) != type::greater) + { + if (t.type == type::eos) + fail (t) << "closing '>' expected after header name" << endf; + } + } + + l_->next (t); + return n; + } + } +} -- cgit v1.1