// file : build2/test/script/parser.cxx -*- C++ -*- // copyright : Copyright (c) 2014-2016 Code Synthesis Ltd // license : MIT; see accompanying LICENSE file #include #include using namespace std; namespace build2 { namespace test { namespace script { using type = token_type; script parser:: parse (istream& is, const path& p, target& test_t, target& script_t) { path_ = &p; lexer l (is, *path_, lexer_mode::script_line); lexer_ = &l; base_parser::lexer_ = &l; script_type r (test_t, script_t); script_ = &r; token t (type::eos, false, 0, 0); type tt; next (t, tt); script (t, tt); if (tt != type::eos) fail (t) << "unexpected " << t; return r; } void parser:: script (token& t, token_type& tt) { while (tt != type::eos) { script_line (t, tt); } } void parser:: script_line (token& t, token_type& tt) { // Parse first chunk. Keep track of whether anything in it was quoted. // names_type ns; location nl (get_location (t)); lexer_->reset_quoted (t.quoted); names (t, tt, ns, true); // See if this is a variable assignment or a test command. // if (tt == type::assign || tt == type::prepend || tt == type::append) { // We need to strike a balance between recognizing command lines // that contain the assignment operator and variable assignments. // // If we choose to treat these tokens literally (for example, if we // have several names on the LHS), then we have the reversibility // problem: we need to restore original whitespaces before and after // the assignment operator (e.g., foo=bar vs foo = bar). // // To keep things simple we will start with the following rule: if // the token after the first chunk of input is assignment, then it // must be a variable assignment. After all, command lines like this // are not expected to be common: // // $* =x // // It will also be easy to get the desired behavior with quoting: // // $* "=x" // // The only issue here is if $* above expands to a single, simple // name (e.g., an executable name) in which case it will be treated // as a variable name. One way to resolve it would be to detect // "funny" variable names and require that they be quoted (this // won't help with built-in commands; maybe we could warn if it's // the same as built-in). Note that currently we have no way of // knowing it's quoted. // // Or perhaps we should just let people learn that first assignment // needs to be quoted? // if (ns.size () != 1 || !ns[0].simple () || ns[0].empty ()) fail (nl) << "variable name expected instead of '" << ns << "'"; variable_line (t, tt, move (ns[0].value)); } else test_line (t, tt, move (ns), move (nl)); } void parser:: variable_line (token& t, token_type& tt, string name) { type kind (tt); // Assignment kind. const variable_type& var (script_->var_pool.insert (move (name))); // We cannot reuse the value mode since it will recognize { which // we want to treat as a literal. // value rhs (variable_value (t, tt, lexer_mode::variable_line)); value& lhs (kind == type::assign ? script_->assign (var) : script_->append (var)); // @@ Need to adjust to make strings the default type. // value_attributes (&var, lhs, move (rhs), kind); } void parser:: test_line (token& t, token_type& tt, names_type ns, location nl) { // Stop recognizing variable assignments. // mode (lexer_mode::test_line); // Keep parsing chunks of the command line until we see the newline or // the exit status comparison. // strings cmd; do { // Process words that we already have. // bool q (lexer_->quoted ()); for (name& n: ns) { string s; try { s = value_traits::convert (move (n), nullptr); } catch (const invalid_argument&) { fail (nl) << "invalid string value '" << n << "'"; } // If it is a quoted chunk, then we add the word as is. Otherwise // we re-lex it. But if the word doesn't contain any interesting // characters (operators plus quotes/escapes), then no need to // re-lex. // if (q || s.find_first_of ("|&<>\'\"\\") == string::npos) cmd.push_back (move (s)); else { // Come up with a "path" that contains both the original // location as well as the expanded string. The resulting // diagnostics will look like this: // // testscript:10:1 ('abc): unterminated single quote // path name; { string n (nl.file->string ()); n += ':'; n += to_string (nl.line); n += ':'; n += to_string (nl.column); n += ": ("; n += s; n += ')'; name = path (move (n)); } istringstream is (s); lexer lex (is, name, lexer_mode::command_line); string w; bool f (true); // In case the whole thing is empty. for (token t (lex.next ()); t.type != type::eos; t = lex.next ()) { // Note that this is not "our" token so we cannot do fail(t). // Rather we should do fail(l). // location l (build2::get_location (t, lex.name ())); // Re-lexing double-quotes will recognize $, ( inside as // tokens so we have to reverse them back. Since we don't // treat spaces as separators we can be sure we will get it // right. // switch (t.type) { case type::dollar: w += '$'; continue; case type::lparen: w += '('; continue; } // Retire the current word. We need to distinguish between // empty and non-existent (e.g., > vs >""). // if (!w.empty () || f) { cmd.push_back (move (w)); f = false; } switch (t.type) { case type::name: w = move (t.value); f = true; break; // @@ TODO // case type::pipe: case type::clean: case type::log_and: case type::log_or: case type::in_null: case type::in_string: case type::in_document: case type::out_null: case type::out_string: case type::out_document: break; } } // Don't forget the last word. // if (!w.empty () || f) cmd.push_back (move (w)); } } if (tt == type::newline || tt == type::equal || tt == type::not_equal) break; // Parse the next chunk. // ns.clear (); lexer_->reset_quoted (t.quoted); names (t, tt, ns, true); } while (true); //@@ switch mode (we no longer want to recognize command operators)? if (tt == type::equal || tt == type::not_equal) { command_exit (t, tt); } // here-document } void parser:: command_exit (token& t, token_type& tt) { // The next chunk should be the exit status. // next (t, tt); names_type ns (names (t, tt, true)); //@@ TODO: validate to be single, simple, non-empty name that // converts to integer (is exit status always non-negative). } } } }