// file : build/parser.cxx -*- C++ -*- // copyright : Copyright (c) 2014-2015 Code Synthesis Tools CC // license : MIT; see accompanying LICENSE file #include #include // unique_ptr #include #include // move() #include // make_move_iterator() #include #include #include #include #include #include #include #include #include using namespace std; namespace build { // Output the token type and value in a format suitable for diagnostics. // ostream& operator<< (ostream&, const token&); static location get_location (const token&, const void*); typedef token_type type; // Given a target or prerequisite name, figure out its type, taking // into account extensions, trailing '/', or anything else that might // be relevant. // static const char* find_target_type (const string& n, const string* e) { // Empty name or a name ending with a directory separator // signifies a directory. // if (n.empty () || path::traits::is_separator (n.back ())) return "dir"; //@@ TODO: derive type from extension. // return "file"; } void parser:: parse (istream& is, const path& p, scope& s) { string rw (diag_relative_work (p)); path_ = &rw; lexer l (is, p.string ()); lexer_ = &l; scope_ = &s; token t (type::eos, false, 0, 0); type tt; next (t, tt); clause (t, tt); if (tt != type::eos) fail (t) << "unexpected " << t; } void parser:: clause (token& t, token_type& tt) { tracer trace ("parser::clause", &path_); while (tt != type::eos) { // We always start with one or more names. // if (tt != type::name && tt != type::lcbrace && // Untyped name group: '{foo ...' tt != type::dollar && // Variable expansion: '$foo ...' tt != type::colon) // Empty name: ': ...' break; // Something else. Let our caller handle that. // See if this is one of the keywords. // if (tt == type::name) { const string& n (t.name ()); if (n == "print") { // @@ Is this the only place where it is valid? Probably also // in var namespace. // next (t, tt); print (t, tt); continue; } else if (n == "source") { next (t, tt); source (t, tt); continue; } else if (n == "include") { next (t, tt); include (t, tt); continue; } } // ': foo' is equvalent to '{}: foo' and to 'dir{}: foo'. // names_type ns (tt != type::colon ? names (t, tt) : names_type ({name ("")})); if (tt == type::colon) { next (t, tt); if (tt == type::newline) { // See if this is a directory/target scope. // if (peek () == type::lcbrace) { next (t, tt); // Should be on its own line. // if (next (t, tt) != type::newline) fail (t) << "expected newline after {"; // See if this is a directory or target scope. Different // things can appear inside depending on which one it is. // bool dir (false); for (const auto& n: ns) { if (n.type.empty () && n.value.back () == '/') { if (ns.size () != 1) { // @@ TODO: point to name. // fail (t) << "multiple names in directory scope"; } dir = true; } } next (t, tt); if (dir) { scope& prev (*scope_); // On Win32 translate the root path to the special empty path. // Search for root_scope for details. // #ifdef _WIN32 path p (ns[0].value != "/" ? path (ns[0].value) : path ()); #else path p (ns[0].value); #endif if (p.relative ()) p = prev.path () / p; p.normalize (); scope_ = &scopes[p]; // A directory scope can contain anything that a top level can. // clause (t, tt); scope_ = &prev; } else { // @@ TODO: target scope. } if (tt != type::rcbrace) fail (t) << "expected } instead of " << t; // Should be on its own line. // if (next (t, tt) == type::newline) next (t, tt); else if (tt != type::eos) fail (t) << "expected newline after }"; continue; } // If this is not a scope, then it is a target without any // prerequisites. // } // Dependency declaration. // if (tt == type::name || tt == type::lcbrace || tt == type::dollar || tt == type::newline || tt == type::eos) { names_type pns (tt != type::newline && tt != type::eos ? names (t, tt) : names_type ()); // Prepare the prerequisite list. // target::prerequisites_type ps; ps.reserve (pns.size ()); for (auto& pn: pns) { // We need to split the path into its directory part (if any) // the name part, and the extension (if any). We cannot assume // the name part is a valid filesystem name so we will have // to do the splitting manually. // path d (pn.dir); string n; const string* e (nullptr); { string& v (pn.value); path::size_type i (path::traits::rfind_separator (v)); if (i == string::npos) n = move (v); // NOTE: steal! else { d /= path (v, i != 0 ? i : 1); // Special case: "/". n.assign (v, i + 1, string::npos); } // Handle '.' and '..'. // if (n == ".") n.clear (); else if (n == "..") { d /= path (n); n.clear (); } d.normalize (); // Extract extension. // string::size_type j (path::traits::find_extension (n)); if (j != string::npos) { e = &extension_pool.find (n.c_str () + j); n.resize (j - 1); } } // Resolve prerequisite type. // const char* tt (pn.type.empty () ? find_target_type (n, e) : pn.type.c_str ()); auto i (target_types.find (tt)); if (i == target_types.end ()) { //@@ TODO name (or better yet, type) location fail (t) << "unknown prerequisite type " << tt; } const target_type& ti (i->second); // Find or insert. // prerequisite& p ( scope_->prerequisites.insert ( ti, move (d), move (n), e, *scope_, trace).first); ps.push_back (p); } for (auto& tn: ns) { path d (tn.dir); string n; const string* e (nullptr); // The same deal as in handling prerequisites above. // { string& v (tn.value); path::size_type i (path::traits::rfind_separator (v)); if (i == string::npos) n = move (v); // NOTE: steal! else { d /= path (v, i != 0 ? i : 1); // Special case: "/". n.assign (v, i + 1, string::npos); } // Handle '.' and '..'. // if (n == ".") n.clear (); else if (n == "..") { d /= path (n); n.clear (); } if (d.empty ()) d = scope_->path (); // Already normalized. else { if (d.relative ()) d = scope_->path () / d; d.normalize (); } // Extract extension. // string::size_type j (path::traits::find_extension (n)); if (j != string::npos) { e = &extension_pool.find (n.c_str () + j); n.resize (j - 1); } } // Resolve target type. // const char* tt (tn.type.empty () ? find_target_type (n, e) : tn.type.c_str ()); auto i (target_types.find (tt)); if (i == target_types.end ()) { //@@ TODO name (or better yet, type) location fail (t) << "unknown target type " << tt; } const target_type& ti (i->second); // Find or insert. // target& t ( targets.insert ( ti, move (d), move (n), e, trace).first); t.prerequisites = ps; //@@ OPT: move if last target. if (default_target == nullptr) default_target = &t; } if (tt == type::newline) next (t, tt); else if (tt != type::eos) fail (t) << "expected newline instead of " << t; continue; } if (tt == type::eos) continue; fail (t) << "expected newline instead of " << t; } // Variable assignment. // if (tt == type::equal || tt == type::plus_equal) { bool assign (tt == type::equal); // LHS should be a single, simple name. // if (ns.size () != 1 || !ns[0].type.empty () || !ns[0].dir.empty ()) fail (t) << "variable name expected before " << t; next (t, tt); names_type vns (tt != type::newline && tt != type::eos ? names (t, tt) : names_type ()); // Enter the variable. // string name; if (ns[0].value.front () == '.') // Fully qualified name. name.assign (ns[0].value, 1, string::npos); else //@@ TODO: append namespace if any. name = move (ns[0].value); const variable& var (variable_pool.find (move (name))); if (assign) { value_ptr& val (scope_->variables[var]); if (val == nullptr) // Initialization. { val.reset (new list_value (*scope_, move (vns))); } else // Assignment. { //@@ TODO: assuming it is a list. // dynamic_cast (*val).data = move (vns); } } else { //@@ TODO: assuming it is a list. // list_value* val (dynamic_cast ((*scope_)[var])); if (val == nullptr) // Initialization. { list_value_ptr nval (new list_value (*scope_, move (vns))); scope_->variables.emplace (var, move (nval)); } else if (&val->scope != scope_) // Append to value from parent scope. { list_value_ptr nval (new list_value (*scope_, val->data)); val = nval.get (); // Append. scope_->variables.emplace (var, move (nval)); } // Append. // if (val != nullptr) val->data.insert (val->data.end (), make_move_iterator (vns.begin ()), make_move_iterator (vns.end ())); } if (tt == type::newline) next (t, tt); else if (tt != type::eos) fail (t) << "expected newline instead of " << t; continue; } fail (t) << "unexpected " << t; } } void parser:: source (token& t, token_type& tt) { tracer trace ("parser::source", &path_); // The rest should be a list of paths to buildfiles. // for (; tt != type::newline && tt != type::eos; next (t, tt)) { if (tt != type::name) fail (t) << "expected buildfile to source instead of " << t; path p (t.name ()); // If the path is relative then use the src directory corresponding // to the current directory scope. // if (p.relative ()) p = src_out (scope_->path ()) / p; ifstream ifs (p.string ()); if (!ifs.is_open ()) fail (t) << "unable to open " << p; ifs.exceptions (ifstream::failbit | ifstream::badbit); level4 ([&]{trace (t) << "entering " << p;}); string rw (diag_relative_work (p)); const string* op (path_); path_ = &rw; lexer l (ifs, p.string ()); lexer* ol (lexer_); lexer_ = &l; next (t, tt); clause (t, tt); if (tt != type::eos) fail (t) << "unexpected " << t; level4 ([&]{trace (t) << "leaving " << p;}); lexer_ = ol; path_ = op; } if (tt != type::eos) next (t, tt); // Swallow newline. } void parser:: include (token& t, token_type& tt) { tracer trace ("parser::include", &path_); // The rest should be a list of paths to buildfiles. // for (; tt != type::newline && tt != type::eos; next (t, tt)) { if (tt != type::name) fail (t) << "expected buildfile to include instead of " << t; path p (t.name ()); bool in_out (false); if (p.absolute ()) { p.normalize (); // Make sure the path is in this project. Include is only meant // to be used for intra-project inclusion. // if (!p.sub (src_root) && !(in_out = p.sub (out_root))) fail (t) << "out of project include " << p; } else { // Use the src directory corresponding to the current directory scope. // p = src_out (scope_->path ()) / p; p.normalize (); } if (!include_.insert (p).second) { level4 ([&]{trace (t) << "skipping already included " << p;}); continue; } ifstream ifs (p.string ()); if (!ifs.is_open ()) fail (t) << "unable to open " << p; ifs.exceptions (ifstream::failbit | ifstream::badbit); level4 ([&]{trace (t) << "entering " << p;}); string rw (diag_relative_work (p)); const string* op (path_); path_ = &rw; lexer l (ifs, p.string ()); lexer* ol (lexer_); lexer_ = &l; scope* os (scope_); scope_ = &scopes[(in_out ? p : out_src (p)).directory ()]; next (t, tt); clause (t, tt); if (tt != type::eos) fail (t) << "unexpected " << t; level4 ([&]{trace (t) << "leaving " << p;}); scope_ = os; lexer_ = ol; path_ = op; } if (tt != type::eos) next (t, tt); // Swallow newline. } void parser:: print (token& t, token_type& tt) { for (; tt != type::newline && tt != type::eos; next (t, tt)) cout << t; cout << endl; if (tt != type::eos) next (t, tt); // Swallow newline. } void parser:: names (token& t, type& tt, names_type& ns, const path* dp, const string* tp) { // Buffer that is used to collect the complete name in case of an // unseparated variable expansion, e.g., 'foo$bar$(baz)fox'. The // idea is to concatenate all the individual parts in this buffer // and then re-inject it into the loop as a single token. // string concat; for (bool first (true);; first = false) { // If the accumulating buffer is not empty, then we have two options: // continue accumulating or inject. We inject if the next token is // not a name or var expansion or if it is separated. // if (!concat.empty () && ((tt != type::name && tt != type::dollar) || peeked ().separated ())) { tt = type::name; t = token (move (concat), true, t.line (), t.column ()); concat.clear (); } else if (!first) next (t, tt); // Name. // if (tt == type::name) { string name (t.name ()); //@@ move? tt = peek (); // Should we accumulate? If the buffer is not empty, then // we continue accumulating (the case where we are separated // should have been handled by the injection code above). If // the next token is a var expansion and it is not separated, // then we need to start accumulating. // if (!concat.empty () || // Continue. (tt == type::dollar && !peeked ().separated ())) // Start. { concat += name; continue; } // See if this is a type name, directory prefix, or both. That is, // it is followed by '{'. // if (tt == type::lcbrace) { next (t, tt); string::size_type p (name.rfind ('/')), n (name.size () - 1); if (p != n && tp != nullptr) fail (t) << "nested type name " << name; path d1; const path* dp1 (dp); string t1; const string* tp1 (tp); if (p == string::npos) // type tp1 = &name; else if (p == n) // directory { if (dp == nullptr) d1 = path (name); else d1 = *dp / path (name); dp1 = &d1; } else // both { t1.assign (name, p + 1, n - p); if (dp == nullptr) d1 = path (name, 0, p + 1); else d1 = *dp / path (name, 0, p + 1); dp1 = &d1; tp1 = &t1; } next (t, tt); names (t, tt, ns, dp1, tp1); if (tt != type::rcbrace) fail (t) << "expected } instead of " << t; tt = peek (); continue; } ns.emplace_back ((tp != nullptr ? *tp : string ()), (dp != nullptr ? *dp : path ()), move (name)); continue; } // Untyped name group without a directory prefix, e.g., '{foo bar}'. // if (tt == type::lcbrace) { next (t, tt); names (t, tt, ns, dp, tp); if (tt != type::rcbrace) fail (t) << "expected } instead of " << t; tt = peek (); continue; } // Variable expansion. // if (tt == type::dollar) { next (t, tt); bool paren (tt == type::lparen); if (paren) next (t, tt); if (tt != type::name) fail (t) << "variable name expected instead of " << t; string n; if (t.name ().front () == '.') // Fully qualified name. n.assign (t.name (), 1, string::npos); else //@@ TODO: append namespace if any. n = t.name (); //@@ TODO: assuming it is a list. // const variable& var (variable_pool.find (move (n))); list_value* val (dynamic_cast ((*scope_)[var])); // Undefined namespaces variables are not allowed. // if (val == nullptr && var.name.find ('.') != string::npos) fail (t) << "undefined namespace variable " << var.name; if (paren) { next (t, tt); if (tt != type::rparen) fail (t) << "expected ) instead of " << t; } tt = peek (); if (val == nullptr || val->data.empty ()) continue; // Should we accumulate? If the buffer is not empty, then // we continue accumulating (the case where we are separated // should have been handled by the injection code above). If // the next token is a name or var expansion and it is not // separated, then we need to start accumulating. // if (!concat.empty () || // Continue. ((tt == type::name || tt == type::dollar) // Start. && !peeked ().separated ())) { // This should be a simple value. The token still points // to the name (or closing paren). // if (val->data.size () > 1) fail (t) << "concatenating expansion of " << var.name << " contains multiple values"; const name& n (val->data[0]); if (!n.type.empty ()) fail (t) << "concatenating expansion of " << var.name << " contains type"; if (!n.dir.empty ()) fail (t) << "concatenating expansion of " << var.name << " contains directory"; concat += n.value; text << "concat: " << concat; } else { // Copy the names from the variable into the resulting name list // while doing sensible things with the types and directories. // for (const name& n: val->data) { const path* dp1 (dp); const string* tp1 (tp); path d1; if (!n.dir.empty ()) { if (dp != nullptr) { if (n.dir.absolute ()) fail (t) << "nested absolute directory " << n.dir.string () << " in variable expansion"; d1 = *dp / n.dir; dp1 = &d1; } else dp1 = &n.dir; } if (!n.type.empty ()) { if (tp == nullptr) tp1 = &n.type; else fail (t) << "nested type name " << n.type << " in variable " << "expansion"; } ns.emplace_back ((tp1 != nullptr ? *tp1 : string ()), (dp1 != nullptr ? *dp1 : path ()), n.value); } } continue; } if (!first) break; if (tt == type::rcbrace) // Empty name, e.g., dir{}. { ns.emplace_back ((tp != nullptr ? *tp : string ()), (dp != nullptr ? *dp : path ()), ""); break; } else fail (t) << "expected name instead of " << t; } } token_type parser:: next (token& t, token_type& tt) { if (!peeked_) t = lexer_->next (); else { t = move (peek_); peeked_ = false; } tt = t.type (); return tt; } token_type parser:: peek () { if (!peeked_) { peek_ = lexer_->next (); peeked_ = true; } return peek_.type (); } static location get_location (const token& t, const void* data) { assert (data != nullptr); const string& p (**static_cast (data)); return location (p.c_str (), t.line (), t.column ()); } // Output the token type and value in a format suitable for diagnostics. // ostream& operator<< (ostream& os, const token& t) { switch (t.type ()) { case token_type::eos: os << ""; break; case token_type::newline: os << ""; break; case token_type::colon: os << ":"; break; case token_type::lcbrace: os << "{"; break; case token_type::rcbrace: os << "}"; break; case token_type::equal: os << "="; break; case token_type::plus_equal: os << "+="; break; case token_type::dollar: os << "$"; break; case token_type::lparen: os << "("; break; case token_type::rparen: os << ")"; break; case token_type::name: os << t.name (); break; } return os; } }