From 4bf42322fdd5dd7e01a3f61272bccc4a66a5585f Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Sat, 2 Apr 2016 12:28:56 +0200 Subject: Add attribute syntax infrastructure --- build2/lexer | 26 +++++---- build2/lexer.cxx | 31 ++++++---- build2/parser | 10 ++++ build2/parser.cxx | 166 ++++++++++++++++++++++++++++++++++++++++++++++-------- build2/token | 6 +- build2/token.cxx | 2 + 6 files changed, 192 insertions(+), 49 deletions(-) (limited to 'build2') diff --git a/build2/lexer b/build2/lexer index 001d52c..c856344 100644 --- a/build2/lexer +++ b/build2/lexer @@ -42,32 +42,31 @@ namespace build2 void (*processor) (token&, const lexer&) = nullptr) : char_scanner (is), fail (name), processor_ (processor), sep_ (false) { - mode_.push (lexer_mode::normal); + mode (lexer_mode::normal); } const path& name () const {return fail.name_;} - // Note: sets mode for the next token. If mode is pairs, then - // the second argument specifies the separator character. + // Note: sets mode for the next token. If mode is pairs, then the second + // argument specifies the separator character. // void - mode (lexer_mode m, char pair_separator = '=') + mode (lexer_mode m, char pair_separator = '\0') { - mode_.push (m); - pair_separator_ = pair_separator; + state_.push (state{m, pair_separator}); } // Expire the current mode early. // void - expire_mode () {mode_.pop ();} + expire_mode () {state_.pop ();} lexer_mode - mode () const {return mode_.top ();} + mode () const {return state_.top ().mode;} char - pair_separator () const {return pair_separator_;} + pair_separator () const {return state_.top ().pair_separator;} // Scanner. Note that it is ok to call next() again after getting eos. // @@ -123,8 +122,13 @@ namespace build2 void (*processor_) (token&, const lexer&); - std::stack mode_; - char pair_separator_; + struct state + { + lexer_mode mode; + char pair_separator; + }; + std::stack state_; + bool sep_; // True if we skipped spaces in peek(). }; } diff --git a/build2/lexer.cxx b/build2/lexer.cxx index 795387a..ba43839 100644 --- a/build2/lexer.cxx +++ b/build2/lexer.cxx @@ -24,7 +24,7 @@ namespace build2 { // In the quoted mode we don't skip spaces. // - sep_ = mode_.top () != lexer_mode::quoted && skip_spaces (); + sep_ = state_.top ().mode != lexer_mode::quoted && skip_spaces (); xchar c (peek ()); return make_pair (eos (c) ? '\0' : char (c), sep_); } @@ -32,7 +32,8 @@ namespace build2 token lexer:: next_impl () { - lexer_mode m (mode_.top ()); + lexer_mode m (state_.top ().mode); + char ps (state_.top ().pair_separator); // For some modes we have dedicated imlementations of next(). // @@ -61,12 +62,14 @@ namespace build2 // Expire value/pairs mode at the end of the line. // if (m == lexer_mode::value || m == lexer_mode::pairs) - mode_.pop (); + state_.pop (); return token (type::newline, sep, ln, cn); } case '{': return token (type::lcbrace, sep, ln, cn); case '}': return token (type::rcbrace, sep, ln, cn); + case '[': return token (type::lsbrace, sep, ln, cn); + case ']': return token (type::rsbrace, sep, ln, cn); case '$': return token (type::dollar, sep, ln, cn); case '(': return token (type::lparen, sep, ln, cn); case ')': return token (type::rparen, sep, ln, cn); @@ -74,7 +77,7 @@ namespace build2 // Handle pair separator. // - if (m == lexer_mode::pairs && c == pair_separator_) + if (m == lexer_mode::pairs && c == ps) return token (type::pair_separator, sep, ln, cn); // The following characters are not treated as special in the @@ -129,6 +132,9 @@ namespace build2 // This mode is quite a bit like the value mode when it comes to special // characters, except that we have some of our own. // + // Note: we don't treat [ and ] as special here. Maybe can use them for + // something later. + // switch (c) { // NOTE: remember to update name() if adding new special characters. @@ -140,7 +146,7 @@ namespace build2 case '(': return token (type::lparen, sep, ln, cn); case ')': { - mode_.pop (); // Expire eval mode. + state_.pop (); // Expire eval mode. return token (type::rparen, sep, ln, cn); } case '=': @@ -191,7 +197,8 @@ namespace build2 uint64_t ln (c.line), cn (c.column); string lexeme; - lexer_mode m (mode_.top ()); + lexer_mode m (state_.top ().mode); + char ps (state_.top ().pair_separator); bool quoted (m == lexer_mode::quoted); for (; !eos (c); c = peek ()) @@ -200,7 +207,7 @@ namespace build2 // Handle pair separator. // - if (m == lexer_mode::pairs && c == pair_separator_) + if (m == lexer_mode::pairs && c == ps) break; // The following characters are only special in the normal and @@ -297,6 +304,8 @@ namespace build2 case '#': case '{': case '}': + case '[': + case ']': case ')': { done = true; @@ -355,14 +364,14 @@ namespace build2 get (); if (m == lexer_mode::quoted) - mode_.pop (); + state_.pop (); else { - mode_.push (lexer_mode::quoted); + mode (lexer_mode::quoted); quoted = true; } - m = mode_.top (); + m = state_.top ().mode; continue; } } @@ -384,7 +393,7 @@ namespace build2 // Expire variable mode at the end of the name. // if (m == lexer_mode::variable) - mode_.pop (); + state_.pop (); return token (lexeme, sep, quoted, ln, cn); } diff --git a/build2/parser b/build2/parser index 0e15e4c..a4b5dc5 100644 --- a/build2/parser +++ b/build2/parser @@ -96,6 +96,13 @@ namespace build2 void eval_trailer (token&, token_type&, names_type&); + // If the next token is [, parse the attribute sequence until ] storing + // it in attrs_, get the next token, verify it is not a newline or eos, + // and return true. Otherwise return false. + // + bool + attributes (token&, token_type&); + // If chunk is true, then parse the smallest but complete, name-wise, // chunk of input. Note that in this case you may still end up with // multiple names, for example, {foo bar}. @@ -290,6 +297,9 @@ namespace build2 target* target_; // Current target, if any. scope* scope_; // Current base scope (out_base). scope* root_; // Current root scope (out_root). + + vector> attrs_; // Current attributes, if any. + target* default_target_; names_type export_value_; diff --git a/build2/parser.cxx b/build2/parser.cxx index 4c5bd4e..663ca0c 100644 --- a/build2/parser.cxx +++ b/build2/parser.cxx @@ -95,6 +95,11 @@ namespace build2 // while (tt != type::eos) { + // Extract attributes if any. + // + location al (get_location (t, &path_)); + bool ha (attributes (t, tt)); + // We always start with one or more names. // if (tt != type::name && @@ -109,51 +114,44 @@ namespace build2 if (tt == type::name && keyword (t)) { const string& n (t.value); + void (parser::*f) (token&, token_type&) = nullptr; if (n == "print") { // @@ Is this the only place where it is valid? Probably also // in var namespace. // - print (t, tt); - continue; + f = &parser::print; } else if (n == "source") { - source (t, tt); - continue; + f = &parser::source; } else if (n == "include") { - include (t, tt); - continue; + f = &parser::include; } else if (n == "import") { - import (t, tt); - continue; + f = &parser::import; } else if (n == "export") { - export_ (t, tt); - continue; + f = &parser::export_; } else if (n == "using" || n == "using?") { - using_ (t, tt); - continue; + f = &parser::using_; } else if (n == "define") { - define (t, tt); - continue; + f = &parser::define; } else if (n == "if" || n == "if!") { - if_else (t, tt); - continue; + f = &parser::if_else; } else if (n == "else" || n == "elif" || @@ -163,10 +161,21 @@ namespace build2 // fail (t) << n << " without if"; } + + if (f != nullptr) + { + if (ha) + fail (al) << "attributes before " << n; + + (this->*f) (t, tt); + continue; + } } // ': foo' is equvalent to '{}: foo' and to 'dir{}: foo'. // + // @@ I think we should make ': foo' invalid. + // const location nloc (get_location (t, &path_)); names_type ns (tt != type::colon ? names (t, tt) @@ -232,8 +241,6 @@ namespace build2 bool dir (false); for (const auto& n: ns) { - // A name represents directory as an empty value. - // if (n.directory ()) { if (ns.size () != 1) @@ -251,7 +258,12 @@ namespace build2 if (dir) { - // Directory scope. Can contain anything that a top level can. + // Directory scope. + // + if (ha) + fail (al) << "attributes before directory scope"; + + // Can contain anything that a top level can. // enter_scope (move (ns[0].dir)); // Steal. clause (t, tt); @@ -259,6 +271,9 @@ namespace build2 } else { + if (ha) + fail (al) << "attributes before target scope"; + // @@ TODO: target scope. } @@ -276,13 +291,23 @@ namespace build2 } // If this is not a scope, then it is a target without any - // prerequisites. + // prerequisites. Fall through. // } // Dependency declaration or scope/target-specific variable // assignment. // + + // Will have to stash them if later support attributes on + // target/scope. + // + if (ha) + fail (al) << "attributes before target/scope"; + + al = get_location (t, &path_); + ha = attributes (t, tt); + if (tt == type::name || tt == type::lcbrace || tt == type::dollar || @@ -334,6 +359,8 @@ namespace build2 var_pool.find ( variable_name (move (pns), ploc))); + //@@ TODO: handle attrs. + // If we have multiple targets/scopes, then we save the value // tokens when parsing the first one and then replay them for // the subsequent. We have to do it this way because the value @@ -349,13 +376,15 @@ namespace build2 if (n.directory ()) { + // Scope variable. + // enter_scope (move (n.dir)); variable (t, tt, var, att); leave_scope (); } else { - // Figure out if this is a target or type/pattern specific + // Figure out if this is a target or type/pattern-specific // variable. // size_t p (n.value.find ('*')); @@ -418,6 +447,9 @@ namespace build2 // else { + if (ha) + fail (al) << "attributes before prerequisites"; + // Prepare the prerequisite list. // target::prerequisites_type ps; @@ -484,6 +516,8 @@ namespace build2 // if (tt == type::assign || tt == type::prepend || tt == type::append) { + //@@ TODO handle attrs. + variable (t, tt, var_pool.find (variable_name (move (ns), nloc)), tt); if (tt == type::newline) @@ -498,6 +532,9 @@ namespace build2 // if (tt == type::newline && ns.empty ()) { + if (ha) + fail (al) << "standalone attributes"; + next (t, tt); continue; } @@ -755,6 +792,12 @@ namespace build2 mode (lexer_mode::pairs, '@'); next (t, tt); + // Get attributes, if any (note that here we will go into a nested pairs + // mode). + // + location al (get_location (t, &path_)); + bool ha (attributes (t, tt)); + if (tt == type::name) { // Split the token into the variable name and value at position (p) of @@ -818,12 +861,18 @@ namespace build2 split (p); // Returned name should be empty. } } + } - if (var != nullptr) - val = at == type::assign - ? &scope_->assign (*var) - : &scope_->append (*var); + if (var != nullptr) + { + // @@ TODO handle attrs. + + val = at == type::assign + ? &scope_->assign (*var) + : &scope_->append (*var); } + else if (ha) + fail (al) << "attributes without variable"; // The rest should be a list of projects and/or targets. Parse // them as names to get variable expansion and directory prefixes. @@ -1271,6 +1320,73 @@ namespace build2 } } + bool parser:: + attributes (token& t, token_type& tt) + { + attrs_.clear (); + + if (tt != type::lsbrace) + return false; + + // Using '@' for key-value pairs would be just too ugly. Seeing that we + // control what goes into keys/values, let's use a much nicer '='. + // + mode (lexer_mode::pairs, '='); + next (t, tt); + + if (tt != type::rsbrace && tt != type::newline && tt != type::eos) + { + const location l (get_location (t, &path_)); + names_type ns (names (t, tt)); + + text << '[' << ns << ']'; + + for (auto i (ns.begin ()); i != ns.end (); ++i) + { + string k, v; + + try + { + k = convert (move (*i)); + } + catch (const invalid_argument&) + { + fail (l) << "invalid attribute key '" << *i << "'"; + } + + if (i->pair) + { + try + { + v = convert (move (*++i)); + } + catch (const invalid_argument&) + { + fail (l) << "invalid attribute value '" << *i << "'"; + } + } + + attrs_.emplace_back (move (k), move (v)); + } + } + + // Manually expire the pairs mode if we haven't reached newline/eos (where + // it expires automatically). + // + if (lexer_->mode () == lexer_mode::pairs) + lexer_->expire_mode (); + + if (tt != type::rsbrace) + fail (t) << "expected ']' instead of " << t; + + next (t, tt); + + if (tt == type::newline || tt == type::eos) + fail (t) << "standalone attributes"; + + return true; + } + // Parse names inside {} and handle the following "crosses" (i.e., // {a b}{x y}) if any. Return the number of names added to the list. // diff --git a/build2/token b/build2/token index 6202f44..6695010 100644 --- a/build2/token +++ b/build2/token @@ -17,8 +17,10 @@ namespace build2 newline, pair_separator, colon, - lcbrace, - rcbrace, + lcbrace, // { + rcbrace, // } + lsbrace, // [ + rsbrace, // ] assign, // = prepend, // =+ append, // += diff --git a/build2/token.cxx b/build2/token.cxx index 7a36a2d..90aeff5 100644 --- a/build2/token.cxx +++ b/build2/token.cxx @@ -19,6 +19,8 @@ namespace build2 case token_type::colon: os << ":"; break; case token_type::lcbrace: os << "{"; break; case token_type::rcbrace: os << "}"; break; + case token_type::lsbrace: os << "["; break; + case token_type::rsbrace: os << "]"; break; case token_type::assign: os << "="; break; case token_type::prepend: os << "=+"; break; case token_type::append: os << "+="; break; -- cgit v1.1