aboutsummaryrefslogtreecommitdiff
path: root/libbuild2/cc/parser.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'libbuild2/cc/parser.cxx')
-rw-r--r--libbuild2/cc/parser.cxx263
1 files changed, 263 insertions, 0 deletions
diff --git a/libbuild2/cc/parser.cxx b/libbuild2/cc/parser.cxx
new file mode 100644
index 0000000..179043e
--- /dev/null
+++ b/libbuild2/cc/parser.cxx
@@ -0,0 +1,263 @@
+// file : libbuild2/cc/parser.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/parser.hxx>
+
+#include <libbuild2/cc/lexer.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+ namespace cc
+ {
+ using type = token_type;
+
+ unit parser::
+ parse (ifdstream& is, const path& name)
+ {
+ lexer l (is, name);
+ l_ = &l;
+
+ unit u;
+ u_ = &u;
+
+ // If the source has errors then we want the compiler to issues the
+ // diagnostics. However, the errors could as likely be because we are
+ // mis-parsing things. Initially, as a middle ground, we were going to
+ // issue warnings. But the problem with this approach is that they are
+ // easy to miss. So for now we fail. And it turns out we don't mis-
+ // parse much.
+ //
+ size_t bb (0); // {}-balance.
+
+ token t;
+ for (bool n (true); (n ? l_->next (t) : t.type) != type::eos; )
+ {
+ // Break to stop, continue to continue, set n to false if the
+ // next token already extracted.
+ //
+ n = true;
+
+ switch (t.type)
+ {
+ case type::lcbrace:
+ {
+ ++bb;
+ continue;
+ }
+ case type::rcbrace:
+ {
+ if (bb-- == 0)
+ break; // Imbalance.
+
+ continue;
+ }
+ case type::identifier:
+ {
+ // Constructs we need to recognize:
+ //
+ // module ;
+ // [export] import <module-name> [<attributes>] ;
+ // [export] import <header-name> [<attributes>] ;
+ // [export] module <module-name> [<attributes>] ;
+ //
+ // Additionally, when include is translated to an import, it's
+ // normally replaced with the special __import keyword since it
+ // may appear in C context.
+ //
+ const string& id (t.value);
+
+ if (bb == 0)
+ {
+ if (id == "import" || id == "__import")
+ {
+ parse_import (t, false);
+ }
+ else if (id == "module")
+ {
+ parse_module (t, false);
+ }
+ else if (id == "export")
+ {
+ if (l_->next (t) == type::identifier)
+ {
+ if (id == "module") parse_module (t, true);
+ else if (id == "import") parse_import (t, true);
+ else n = false; // Something else (e.g., export namespace).
+ }
+ else
+ n = false;
+ }
+ }
+ continue;
+ }
+ default: continue;
+ }
+
+ break;
+ }
+
+ if (bb != 0)
+ /*warn*/ fail (t) << "{}-imbalance detected";
+
+ if (module_marker_ && u.module_info.name.empty ())
+ fail (*module_marker_) << "module declaration expected after "
+ << "leading module marker";
+
+ checksum = l.checksum ();
+ return u;
+ }
+
+ void parser::
+ parse_import (token& t, bool ex)
+ {
+ // enter: import keyword
+ // leave: semi
+
+ string un;
+ unit_type ut;
+ switch (l_->next (t)) // Start of module/header name.
+ {
+ case type::less:
+ case type::string:
+ {
+ un = parse_header_name (t);
+ ut = unit_type::module_header;
+ break;
+ }
+ case type::identifier:
+ {
+ un = parse_module_name (t);
+ ut = unit_type::module_iface;
+ break;
+ }
+ default:
+ fail (t) << "module or header name expected instead of " << t << endf;
+ }
+
+ // Should be {}-balanced.
+ //
+ for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ;
+
+ if (t.type != type::semi)
+ fail (t) << "';' expected instead of " << t;
+
+ // For now we skip header units (see a comment on module type/info
+ // string serialization in compile rule for details). Note that
+ // currently parse_header_name() always returns empty name.
+ //
+ if (ut == unit_type::module_header)
+ return;
+
+ // Ignore duplicates. We don't expect a large numbers of (direct)
+ // imports so vector/linear search is probably more efficient than a
+ // set.
+ //
+ auto& is (u_->module_info.imports);
+
+ auto i (find_if (is.begin (), is.end (),
+ [&un] (const module_import& i)
+ {
+ return i.name == un;
+ }));
+
+ if (i == is.end ())
+ is.push_back (module_import {ut, move (un), ex, 0});
+ else
+ i->exported = i->exported || ex;
+ }
+
+ void parser::
+ parse_module (token& t, bool ex)
+ {
+ // enter: module keyword
+ // leave: semi
+
+ location l (get_location (t));
+
+ l_->next (t);
+
+ // Handle the leading 'module;' marker (p0713).
+ //
+ // Note that we don't bother diagnosing invalid/duplicate markers
+ // leaving that to the compiler.
+ //
+ if (!ex && t.type == type::semi)
+ {
+ module_marker_ = move (l);
+ return;
+ }
+
+ // Otherwise it should be the start of the module name.
+ //
+ string n (parse_module_name (t));
+
+ // Should be {}-balanced.
+ //
+ for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ;
+
+ if (t.type != type::semi)
+ fail (t) << "';' expected instead of " << t;
+
+ if (!u_->module_info.name.empty ())
+ fail (l) << "multiple module declarations";
+
+ u_->type = ex ? unit_type::module_iface : unit_type::module_impl;
+ u_->module_info.name = move (n);
+ }
+
+ string parser::
+ parse_module_name (token& t)
+ {
+ // enter: first token of module name
+ // leave: token after module name
+
+ string n;
+
+ // <identifier>[ . <identifier>]*
+ //
+ for (;; l_->next (t))
+ {
+ if (t.type != type::identifier)
+ fail (t) << "module name expected instead of " << t;
+
+ n += t.value;
+
+ if (l_->next (t) != type::dot)
+ break;
+
+ n += '.';
+ }
+
+ return n;
+ }
+
+ string parser::
+ parse_header_name (token& t)
+ {
+ // enter: first token of module name, either string or less
+ // leave: token after module name
+
+ string n;
+
+ // NOTE: actual name is a TODO if/when we need it.
+ //
+ if (t.type == type::string)
+ /*n = move (t.value)*/;
+ else
+ {
+ while (l_->next (t) != type::greater)
+ {
+ if (t.type == type::eos)
+ fail (t) << "closing '>' expected after header name" << endf;
+ }
+ }
+
+ l_->next (t);
+ return n;
+ }
+ }
+}