From 835ed5f7080a98e9ee80ac08d5585ccdbb63fe0e Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Mon, 15 Dec 2014 10:43:16 +0200 Subject: Parse directory scopes --- build/lexer.cxx | 10 +-- build/parser | 18 ++++- build/parser.cxx | 155 ++++++++++++++++++++++++++++++++---------- build/token | 29 +++----- tests/.gitignore | 1 + tests/build/lexer/driver.cxx | 15 ++-- tests/build/parser/driver.cxx | 18 +++++ 7 files changed, 174 insertions(+), 72 deletions(-) create mode 100644 tests/.gitignore diff --git a/build/lexer.cxx b/build/lexer.cxx index 9e3521a..a1aa375 100644 --- a/build/lexer.cxx +++ b/build/lexer.cxx @@ -19,7 +19,7 @@ namespace build uint64_t ln (c.line ()), cn (c.column ()); if (is_eos (c)) - return token (ln, cn); + return token (token_type::eos, ln, cn); switch (c) { @@ -27,19 +27,19 @@ namespace build // case '\n': { - return token (token_punctuation::newline, ln, cn); + return token (token_type::newline, ln, cn); } case ':': { - return token (token_punctuation::colon, ln, cn); + return token (token_type::colon, ln, cn); } case '{': { - return token (token_punctuation::lcbrace, ln, cn); + return token (token_type::lcbrace, ln, cn); } case '}': { - return token (token_punctuation::rcbrace, ln, cn); + return token (token_type::rcbrace, ln, cn); } } diff --git a/build/parser b/build/parser index 04ef00d..c487015 100644 --- a/build/parser +++ b/build/parser @@ -6,6 +6,7 @@ #define BUILD_PARSER #include +#include #include #include @@ -32,13 +33,26 @@ namespace build // Recursive descent parser. // private: + typedef std::vector names; + void - names (token&, token_type&); + parse_clause (token&, token_type&); + + names + parse_names (token& t, token_type& tt) + { + names ns; + parse_names (t, tt, ns); + return ns; + } + + void + parse_names (token&, token_type&, names&); // Utilities. // private: - void + token_type next (token&, token_type&); std::ostream& diff --git a/build/parser.cxx b/build/parser.cxx index 669ac8b..348d285 100644 --- a/build/parser.cxx +++ b/build/parser.cxx @@ -19,7 +19,6 @@ namespace build operator<< (ostream&, const token&); typedef token_type type; - typedef token_punctuation punc; void parser:: parse (istream& is, const path& p) @@ -28,31 +27,122 @@ namespace build lexer_ = &l; path_ = &p; - token t (0, 0); // eos + token t (type::eos, 0, 0); type tt; + next (t, tt); - for (next (t, tt); tt != type::eos; ) + parse_clause (t, tt); + + if (tt != type::eos) + { + error (t) << "unexpected " << t << endl; + throw parser_error (); + } + } + + void parser:: + parse_clause (token& t, token_type& tt) + { + while (tt != type::eos) { // We always start with one or more names. // - names (t, tt); + if (tt != type::name && tt != type::lcbrace) + break; // Something else. Let our caller handle that. + + names ns (parse_names (t, tt)); - if (t.is (punc::colon)) + if (tt == type::colon) { next (t, tt); - if (tt == type::name || t.is (punc::lcbrace)) - names (t, tt); + // Dependency declaration. + // + if (tt == type::name || tt == type::lcbrace) + { + names ns (parse_names (t, tt)); - if (t.is (punc::newline)) - next (t, tt); - else if (tt != type::eos) + if (tt == type::newline) + next (t, tt); + else if (tt != type::eos) + { + error (t) << "expected newline instead of " << t << endl; + throw parser_error (); + } + + continue; + } + + if (tt == type::newline) { - error (t) << "expected newline insetad of " << t << endl; - throw parser_error (); + // See if we have a directory/target scope. + // + if (next (t, tt) == type::lcbrace) + { + // Should be on its own line. + // + if (next (t, tt) != type::newline) + { + error (t) << "expected newline after '{'" << endl; + throw parser_error (); + } + + // See if this is a directory or target scope. Different + // things can appear inside depending on which it is. + // + bool dir (false); + for (const auto& n: ns) + { + if (n.back () == '/') + { + if (ns.size () != 1) + { + // @@ TODO: point to name. + // + error (t) << "multiple names in directory scope" << endl; + throw parser_error (); + } + + dir = true; + } + } + + next (t, tt); + + if (dir) + // A directory scope can contain anything that a top level can. + // + parse_clause (t, tt); + else + { + // @@ TODO: target scope. + } + + if (tt != type::rcbrace) + { + error (t) << "expected '}' instead of " << t << endl; + throw parser_error (); + } + + // Should be on its own line. + // + if (next (t, tt) == type::newline) + next (t, tt); + else if (tt != type::eos) + { + error (t) << "expected newline after '}'" << endl; + throw parser_error (); + } + } + + continue; } - continue; + if (tt == type::eos) + continue; + + error (t) << "expected newline insetad of " << t << endl; + throw parser_error (); } error (t) << "unexpected " << t << endl; @@ -61,18 +151,18 @@ namespace build } void parser:: - names (token& t, type& tt) + parse_names (token& t, type& tt, names& ns) { for (bool first (true);; first = false) { // Untyped name group, e.g., '{foo bar}'. // - if (t.is (punc::lcbrace)) + if (tt == type::lcbrace) { next (t, tt); - names (t, tt); + parse_names (t, tt, ns); - if (!t.is (punc::rcbrace)) + if (tt != type::rcbrace) { error (t) << "expected '}' instead of " << t << endl; throw parser_error (); @@ -90,9 +180,7 @@ namespace build // See if this is a type name, that is, it is followed by '{'. // - next (t, tt); - - if (t.is (punc::lcbrace)) + if (next (t, tt) == type::lcbrace) { //cout << "type: " << name << endl; @@ -101,9 +189,9 @@ namespace build // - detect nested typed name groups, e.g., 'cxx{hxx{foo}}'. // next (t, tt); - names (t, tt); + parse_names (t, tt, ns); - if (!t.is (punc::rcbrace)) + if (tt != type::rcbrace) { error (t) << "expected '}' instead of " << t << endl; throw parser_error (); @@ -115,6 +203,7 @@ namespace build // This is a target, directory, or variable name. //cout << "name: " << name << endl; + ns.push_back (name); continue; } @@ -126,11 +215,12 @@ namespace build } } - void parser:: + token_type parser:: next (token& t, token_type& tt) { t = lexer_->next (); tt = t.type (); + return tt; } ostream& parser:: @@ -147,19 +237,12 @@ namespace build { switch (t.type ()) { - case token_type::eos: os << ""; break; - case token_type::punctuation: - { - switch (t.punctuation ()) - { - case token_punctuation::newline: os << ""; break; - case token_punctuation::colon: os << "':'"; break; - case token_punctuation::lcbrace: os << "'{'"; break; - case token_punctuation::rcbrace: os << "'}'"; break; - } - break; - } - case token_type::name: os << '\'' << t.name () << '\''; break; + case token_type::eos: os << ""; break; + case token_type::newline: os << ""; break; + case token_type::colon: os << "':'"; break; + case token_type::lcbrace: os << "'{'"; break; + case token_type::rcbrace: os << "'}'"; break; + case token_type::name: os << '\'' << t.name () << '\''; break; } return os; diff --git a/build/token b/build/token index 6f4951c..9f9b2b4 100644 --- a/build/token +++ b/build/token @@ -13,8 +13,15 @@ namespace build { - enum class token_type {eos, name, punctuation}; - enum class token_punctuation {newline, colon, lcbrace, rcbrace}; + enum class token_type + { + eos, + name, + newline, + colon, + lcbrace, + rcbrace + }; class token { @@ -25,32 +32,18 @@ namespace build std::string const& name () const {assert (t_ == token_type::name); return n_;} - token_punctuation - punctuation () const {assert (t_ == token_type::punctuation); return p_;} - - bool - is (token_punctuation p) const - { - return t_ == token_type::punctuation && p_ == p; - } - std::uint64_t line () const {return l_;} std::uint64_t column () const {return c_;} public: - token (std::uint64_t l, std::uint64_t c) - : t_ (token_type::eos), l_ (l), c_ (c) {} + token (token_type t, std::uint64_t l, std::uint64_t c) + : t_ (t), l_ (l), c_ (c) {} token (std::string n, std::uint64_t l, std::uint64_t c) : t_ (token_type::name), n_ (std::move (n)), l_ (l), c_ (c) {} - token (token_punctuation p, std::uint64_t l, std::uint64_t c) - : t_ (token_type::punctuation), p_ (p), l_ (l), c_ (c) {} - private: token_type t_; - - token_punctuation p_; std::string n_; std::uint64_t l_; diff --git a/tests/.gitignore b/tests/.gitignore new file mode 100644 index 0000000..e54525b --- /dev/null +++ b/tests/.gitignore @@ -0,0 +1 @@ +driver diff --git a/tests/build/lexer/driver.cxx b/tests/build/lexer/driver.cxx index e329b1c..739fa26 100644 --- a/tests/build/lexer/driver.cxx +++ b/tests/build/lexer/driver.cxx @@ -91,17 +91,10 @@ lex (const char* s) switch (t.type ()) { case token_type::eos: v= ""; break; - case token_type::punctuation: - { - switch (t.punctuation ()) - { - case token_punctuation::newline: v = "\n"; break; - case token_punctuation::colon: v = ":"; break; - case token_punctuation::lcbrace: v = "{"; break; - case token_punctuation::rcbrace: v = "}"; break; - } - break; - } + case token_type::newline: v = "\n"; break; + case token_type::colon: v = ":"; break; + case token_type::lcbrace: v = "{"; break; + case token_type::rcbrace: v = "}"; break; case token_type::name: v = t.name ().c_str (); break; } diff --git a/tests/build/parser/driver.cxx b/tests/build/parser/driver.cxx index 4ba589d..0b83cb0 100644 --- a/tests/build/parser/driver.cxx +++ b/tests/build/parser/driver.cxx @@ -45,6 +45,23 @@ main () assert (!parse ("{foo{:")); assert (!parse ("foo: bar:")); assert (!parse ("exe{foo:")); + + // Directory scope. + // + assert (parse ("test/:\n{\n}")); + assert (parse ("test/:\n{\n}\n")); + assert (parse ("test/:\n{\nfoo:bar\n}")); + assert (parse ("test/:\n{\nfoo:bar\n}")); + assert (parse ("test/:\n{\nmore/:\n{\n}\n}")); + assert (parse ("test/:\n{\nmore/:\n{\nfoo:{bar baz}\n}\n}")); + + assert (!parse ("test/:\n{")); + assert (!parse ("test/:\n{\n")); + assert (!parse ("test/:\n{\n:")); + assert (!parse ("test/:\n{\n} foo: bar\n")); + assert (!parse ("test/ foo:\n{\n}")); + assert (!parse ("test foo/:\n{\n}")); + assert (!parse ("test/ foo/:\n{\n}")); } ostream cnull (nullptr); @@ -56,6 +73,7 @@ parse (const char* s) is.exceptions (istream::failbit | istream::badbit); parser p (cnull); + //parser p (cerr); try { -- cgit v1.1