aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2017-05-25 10:41:20 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2017-05-25 10:41:20 +0200
commita1f459f8446370704695919b3131653300866ee9 (patch)
tree8bc4670b2f9355d694e38443d99f506dd0ea9efc
parent0cef93b4e2e9bf39b0ca542876f9ab1af6d0f01d (diff)
Implement parsing of C++ module declarations
-rw-r--r--build2/buildfile1
-rw-r--r--build2/cc/lexer.hxx2
-rw-r--r--build2/cc/parser.cxx194
-rw-r--r--build2/cc/parser.hxx60
-rw-r--r--build2/diagnostics.cxx2
-rw-r--r--build2/diagnostics.hxx12
-rw-r--r--build2/parser.hxx1
-rw-r--r--unit-tests/cc/parser/buildfile17
-rw-r--r--unit-tests/cc/parser/driver.cxx69
-rw-r--r--unit-tests/cc/parser/module.test147
10 files changed, 497 insertions, 8 deletions
diff --git a/build2/buildfile b/build2/buildfile
index 69dfc94..3a114e2 100644
--- a/build2/buildfile
+++ b/build2/buildfile
@@ -58,6 +58,7 @@ exe{b}: \
cc/{hxx cxx}{ link } \
cc/{hxx cxx}{ module } \
cc/{ cxx}{ msvc } \
+ cc/{hxx cxx}{ parser } \
cc/{ cxx}{ pkgconfig } \
cc/{hxx cxx}{ target } \
cc/{hxx }{ types } \
diff --git a/build2/cc/lexer.hxx b/build2/cc/lexer.hxx
index 0735b45..146b579 100644
--- a/build2/cc/lexer.hxx
+++ b/build2/cc/lexer.hxx
@@ -141,7 +141,7 @@ namespace build2
private:
const path name_;
- fail_mark fail;
+ const fail_mark fail;
};
// Diagnostics plumbing. We assume that any diag stream for which we can
diff --git a/build2/cc/parser.cxx b/build2/cc/parser.cxx
new file mode 100644
index 0000000..e5079b7
--- /dev/null
+++ b/build2/cc/parser.cxx
@@ -0,0 +1,194 @@
+// file : build2/cc/parser.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <build2/cc/parser.hxx>
+
+#include <build2/cc/lexer.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+ namespace cc
+ {
+ using type = token_type;
+
+ translation_unit parser::
+ parse (istream& is, const path& name)
+ {
+ name_ = &name;
+
+ lexer l (is, *name_);
+ l_ = &l;
+
+ translation_unit u;
+ u_ = &u;
+
+ // If the source has errors then we want the compiler to issues the
+ // diagnostics. However, the errors could as likely be because we are
+ // mis-parsing things. As a middle ground, we are going to issue
+ // warnings.
+ //
+ size_t bb (0); // {}-balance.
+ bool ex (false); // True if inside top-level export{} block.
+
+ token t;
+ while (l_->next (t) != type::eos)
+ {
+ // Break to stop, continue to continue.
+ //
+ switch (t.type)
+ {
+ case type::lcbrace:
+ {
+ ++bb;
+ continue;
+ }
+ case type::rcbrace:
+ {
+ if (bb-- == 0)
+ break; // Imbalance.
+
+ if (ex && bb == 0)
+ ex = false; // Closed top-level export{}.
+
+ continue;
+ }
+ case type::identifier:
+ {
+ // Constructs we need to recognize (the last one is only not to
+ // confuse it with others).
+ //
+ // [export] import <module-name> [<attributes>] ;
+ // [export] module <module-name> [<attributes>] ;
+ // export { import <module-name> [<attributes>] ; }
+ // extern module ...
+ //
+ const string& id (t.value);
+
+ if (bb == 0)
+ {
+ if (id == "import")
+ {
+ parse_import (t);
+ }
+ else if (id == "module")
+ {
+ parse_module (t, false);
+ }
+ else if (id == "export")
+ {
+ switch (l_->next (t))
+ {
+ case type::lcbrace: ++bb; ex = true; break;
+ case type::identifier:
+ {
+ if (id == "module")
+ parse_module (t, true);
+ else if (id == "import")
+ parse_import (t);
+
+ // Something else, for example, export namespace.
+
+ break;
+ }
+ default: break;
+ }
+ }
+ else if (id == "extern")
+ l_->next (t); // Skip to make sure not recognized as module.
+ }
+ else if (ex && bb == 1)
+ {
+ if (id == "import")
+ {
+ parse_import (t);
+ }
+ }
+ continue;
+ }
+ default: continue;
+ }
+
+ break;
+ }
+
+ if (bb != 0)
+ warn (t) << "{}-imbalance detected";
+
+ return u;
+ }
+
+ void parser::
+ parse_import (token& t)
+ {
+ // enter: import keyword
+ // leave: semi
+
+ l_->next (t); // Start of name.
+ string n (parse_module_name (t));
+
+ for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ;
+
+ if (t.type != type::semi)
+ fail (t) << "';' expected instead of " << t;
+
+ // Ignore duplicate imports. We don't expect large numbers of imports
+ // so vector/linear search is probably more efficient than a set.
+ //
+ auto& is (u_->module_imports);
+
+ if (find (is.begin (), is.end (), n) == is.end ())
+ is.push_back (move (n));
+ }
+
+ void parser::
+ parse_module (token& t, bool ex)
+ {
+ // enter: module keyword
+ // leave: semi
+
+ l_->next (t); // Start of name.
+ string n (parse_module_name (t));
+
+ for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ;
+
+ if (t.type != type::semi)
+ fail (t) << "';' expected instead of " << t;
+
+ if (!u_->module_name.empty ())
+ fail (t) << "multiple module declarations";
+
+ u_->module_name = move (n);
+ u_->module_interface = ex;
+ }
+
+ string parser::
+ parse_module_name (token& t)
+ {
+ // enter: first token of module name
+ // leave: token after module name
+
+ string n;
+
+ // <identifier>[ . <identifier>]*
+ //
+ for (;; l_->next (t))
+ {
+ if (t.type != type::identifier)
+ fail (t) << "module name expected instead of " << t;
+
+ n += t.value;
+
+ if (l_->next (t) != type::dot)
+ break;
+
+ n += '.';
+ }
+
+ return n;
+ }
+ }
+}
diff --git a/build2/cc/parser.hxx b/build2/cc/parser.hxx
new file mode 100644
index 0000000..d52ddc9
--- /dev/null
+++ b/build2/cc/parser.hxx
@@ -0,0 +1,60 @@
+// file : build2/cc/parser.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef BUILD2_CC_PARSER_HXX
+#define BUILD2_CC_PARSER_HXX
+
+#include <build2/types.hxx>
+#include <build2/utility.hxx>
+
+#include <build2/diagnostics.hxx>
+
+namespace build2
+{
+ namespace cc
+ {
+ // Extract (currently module) information from a preprocessed C/C++
+ // source.
+ //
+ struct translation_unit
+ {
+ string module_name; // If not empty, then a module unit.
+ bool module_interface; // If true, then module interface unit.
+ vector<string> module_imports; // Imported modules.
+ };
+
+ struct token;
+ class lexer;
+
+ class parser
+ {
+ public:
+ parser (): fail ("error", &name_), warn ("warning", &name_) {}
+
+ translation_unit
+ parse (istream&, const path& name);
+
+ private:
+ void
+ parse_import (token&);
+
+ void
+ parse_module (token&, bool);
+
+ string
+ parse_module_name (token&);
+
+ private:
+ const path* name_;
+
+ const fail_mark fail;
+ const basic_mark warn;
+
+ lexer* l_;
+ translation_unit* u_;
+ };
+ }
+}
+
+#endif // BUILD2_CC_PARSER_HXX
diff --git a/build2/diagnostics.cxx b/build2/diagnostics.cxx
index 25339f7..7b5663a 100644
--- a/build2/diagnostics.cxx
+++ b/build2/diagnostics.cxx
@@ -94,7 +94,7 @@ namespace build2
const basic_mark error ("error");
const basic_mark warn ("warning");
const basic_mark info ("info");
- const basic_mark text (nullptr, nullptr); // No type/frame.
+ const basic_mark text (nullptr, nullptr, nullptr); // No type/data/frame.
const fail_mark fail ("error");
const fail_end endf;
}
diff --git a/build2/diagnostics.hxx b/build2/diagnostics.hxx
index 692115b..ce5f996 100644
--- a/build2/diagnostics.hxx
+++ b/build2/diagnostics.hxx
@@ -244,11 +244,11 @@ namespace build2
explicit
basic_mark_base (const char* type,
+ const void* data = nullptr,
diag_epilogue* epilogue = &diag_frame::apply,
uint16_t (*sverb) () = &stream_verb_map,
const char* mod = nullptr,
- const char* name = nullptr,
- const void* data = nullptr)
+ const char* name = nullptr)
: sverb_ (sverb),
type_ (type), mod_ (mod), name_ (name), data_ (data),
epilogue_ (epilogue) {}
@@ -300,11 +300,11 @@ namespace build2
const char* name,
const void* data = nullptr)
: basic_mark_base ("trace",
+ data,
nullptr, // No diag stack.
[]() {return stream_verb_max;},
mod,
- name,
- data) {}
+ name) {}
};
using trace_mark = butl::diag_mark<trace_mark_base>;
using tracer = trace_mark;
@@ -317,6 +317,7 @@ namespace build2
fail_mark_base (const char* type,
const void* data = nullptr)
: basic_mark_base (type,
+ data,
[](const diag_record& r)
{
diag_frame::apply (r);
@@ -325,8 +326,7 @@ namespace build2
},
&stream_verb_map,
nullptr,
- nullptr,
- data) {}
+ nullptr) {}
};
using fail_mark = butl::diag_mark<fail_mark_base>;
diff --git a/build2/parser.hxx b/build2/parser.hxx
index 85da4e5..e7afe3a 100644
--- a/build2/parser.hxx
+++ b/build2/parser.hxx
@@ -27,6 +27,7 @@ namespace build2
// If boot is true, then we are parsing bootstrap.build and modules
// should only be bootstrapped.
//
+ explicit
parser (bool boot = false): fail ("error", &path_), boot_ (boot) {}
// Issue diagnostics and throw failed in case of an error.
diff --git a/unit-tests/cc/parser/buildfile b/unit-tests/cc/parser/buildfile
new file mode 100644
index 0000000..5d20367
--- /dev/null
+++ b/unit-tests/cc/parser/buildfile
@@ -0,0 +1,17 @@
+# file : unit-tests/cc/parser/buildfile
+# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+#@@ Temporary until we get utility library support.
+#
+import libs = libbutl%lib{butl}
+src = cc/{lexer parser} token lexer diagnostics utility variable name b-options types-parsers \
+context scope parser target operation rule prerequisite file module function \
+functions-builtin functions-path functions-process-path functions-string \
+functions-target-triplet algorithm search dump filesystem scheduler \
+config/{utility init operation module} spec
+
+exe{driver}: cxx{driver} ../../../build2/cxx{$src} ../../../build2/liba{b} \
+$libs test{*}
+
+include ../../../build2/
diff --git a/unit-tests/cc/parser/driver.cxx b/unit-tests/cc/parser/driver.cxx
new file mode 100644
index 0000000..cdddaca
--- /dev/null
+++ b/unit-tests/cc/parser/driver.cxx
@@ -0,0 +1,69 @@
+// file : unit-tests/cc/parser/driver.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <cassert>
+#include <iostream>
+
+#include <build2/types.hxx>
+#include <build2/utility.hxx>
+
+#include <build2/cc/parser.hxx>
+
+using namespace std;
+
+namespace build2
+{
+ namespace cc
+ {
+ // Usage: argv[0] [<file>]
+ //
+ int
+ main (int argc, char* argv[])
+ {
+ try
+ {
+ istream* is;
+ const char* in;
+
+ // Reading from file is several times faster.
+ //
+ ifdstream ifs;
+ if (argc > 1)
+ {
+ in = argv[1];
+ ifs.open (in);
+ is = &ifs;
+ }
+ else
+ {
+ in = "stdin";
+ cin.exceptions (istream::failbit | istream::badbit);
+ is = &cin;
+ }
+
+ parser p;
+ translation_unit u (p.parse (*is, path (in)));
+
+ for (const string& n: u.module_imports)
+ cout << "import " << n << ';' << endl;
+
+ if (!u.module_name.empty ())
+ cout << (u.module_interface ? "export " : "")
+ << "module " << u.module_name << ';' << endl;
+ }
+ catch (const failed&)
+ {
+ return 1;
+ }
+
+ return 0;
+ }
+ }
+}
+
+int
+main (int argc, char* argv[])
+{
+ return build2::cc::main (argc, argv);
+}
diff --git a/unit-tests/cc/parser/module.test b/unit-tests/cc/parser/module.test
new file mode 100644
index 0000000..f85c969
--- /dev/null
+++ b/unit-tests/cc/parser/module.test
@@ -0,0 +1,147 @@
+# file : unit-tests/cc/parser/module.test
+# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+# Test C++ module constructs.
+#
+
+: import
+:
+$* <<EOI >>EOI
+import foo;
+import foo.bar;
+import foo.bar.baz;
+EOI
+
+: module-implementation
+:
+$* <<EOI >>EOI
+module foo;
+EOI
+
+: module-interface
+:
+$* <<EOI >>EOI
+export module foo;
+EOI
+
+: export-imported
+:
+$* <<EOI >>EOO
+export import foo;
+EOI
+import foo;
+EOO
+
+: export-imported-block
+:
+$* <<EOI >>EOO
+export {import foo;}
+
+export
+{
+ namespace foo
+ {
+ class c {};
+ }
+
+ template <typename T> int f ();
+
+ import bar;
+}
+EOI
+import foo;
+import bar;
+EOO
+
+: non-module
+:
+$* <<EOI
+#pragma import module foo;
+#pragma export module foo;
+#pragma module foo;
+extern module foo: int foo ();
+export namespace bar {int fox ();}
+EOI
+
+: attribute
+:
+$* <<EOI >>EOO
+import foo [[export({import})]];
+module bar [[module({module})]];
+EOI
+import foo;
+module bar;
+EOO
+
+: import-duplicate
+:
+$* <<EOI >>EOO
+import foo;
+import bar.baz;
+import foo;
+import bar . baz;
+EOI
+import foo;
+import bar.baz;
+EOO
+
+: brace-missing
+:
+$* <<EOI 2>>EOE
+export
+{
+ class foo
+ {
+ //};
+ module foo;
+}
+EOI
+stdin:8:1: warning: {}-imbalance detected
+EOE
+
+: brace-stray
+:
+$* <<EOI 2>>EOE
+export
+{
+ class foo
+ {
+ };}
+}
+module foo;
+EOI
+stdin:6:1: warning: {}-imbalance detected
+EOE
+
+: import-missing-name
+:
+$* <<EOI 2>>EOE != 0
+import ;
+EOI
+stdin:1:8: error: module name expected instead of ';'
+EOE
+
+: module-missing-name
+:
+$* <<EOI 2>>EOE != 0
+module ;
+EOI
+stdin:1:8: error: module name expected instead of ';'
+EOE
+
+: import-missing-semi
+:
+$* <<EOI 2>>EOE != 0
+import foo
+EOI
+stdin:2:1: error: ';' expected instead of <end of file>
+EOE
+
+: module-missing-semi
+:
+$* <<EOI 2>>EOE != 0
+export module foo
+EOI
+stdin:2:1: error: ';' expected instead of <end of file>
+EOE