diff options
Diffstat (limited to 'build2/test/script/parser.cxx')
-rw-r--r-- | build2/test/script/parser.cxx | 283 |
1 files changed, 283 insertions, 0 deletions
diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx new file mode 100644 index 0000000..aba9f9a --- /dev/null +++ b/build2/test/script/parser.cxx @@ -0,0 +1,283 @@ +// file : build2/test/script/parser.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <build2/test/script/parser> + +#include <build2/test/script/lexer> + +using namespace std; + +namespace build2 +{ + namespace test + { + namespace script + { + using type = token_type; + + script parser:: + parse (istream& is, const path& p, target& test_t, target& script_t) + { + path_ = &p; + + lexer l (is, *path_, lexer_mode::script_line); + lexer_ = &l; + base_parser::lexer_ = &l; + + script_type r (test_t, script_t); + script_ = &r; + + token t (type::eos, false, 0, 0); + type tt; + next (t, tt); + + script (t, tt); + + if (tt != type::eos) + fail (t) << "unexpected " << t; + + return r; + } + + void parser:: + script (token& t, token_type& tt) + { + while (tt != type::eos) + { + script_line (t, tt); + } + } + + void parser:: + script_line (token& t, token_type& tt) + { + // Parse first chunk. Keep track of whether anything in it was quoted. + // + names_type ns; + location nl (get_location (t)); + lexer_->reset_quoted (t.quoted); + names (t, tt, ns, true); + + // See if this is a variable assignment or a test command. + // + if (tt == type::assign || + tt == type::prepend || + tt == type::append) + { + // We need to strike a balance between recognizing command lines + // that contain the assignment operator and variable assignments. + // + // If we choose to treat these tokens literally (for example, if we + // have several names on the LHS), then we have the reversibility + // problem: we need to restore original whitespaces before and after + // the assignment operator (e.g., foo=bar vs foo = bar). + // + // To keep things simple we will start with the following rule: if + // the token after the first chunk of input is assignment, then it + // must be a variable assignment. After all, command lines like this + // are not expected to be common: + // + // $* =x + // + // It will also be easy to get the desired behavior with quoting: + // + // $* "=x" + // + // The only issue here is if $* above expands to a single, simple + // name (e.g., an executable name) in which case it will be treated + // as a variable name. One way to resolve it would be to detect + // "funny" variable names and require that they be quoted (this + // won't help with built-in commands; maybe we could warn if it's + // the same as built-in). Note that currently we have no way of + // knowing it's quoted. + // + // Or perhaps we should just let people learn that first assignment + // needs to be quoted? + // + if (ns.size () != 1 || !ns[0].simple () || ns[0].empty ()) + fail (nl) << "variable name expected instead of '" << ns << "'"; + + variable_line (t, tt, move (ns[0].value)); + } + else + test_line (t, tt, move (ns), move (nl)); + } + + void parser:: + variable_line (token& t, token_type& tt, string name) + { + type kind (tt); // Assignment kind. + const variable_type& var (script_->var_pool.insert (move (name))); + + // We cannot reuse the value mode since it will recognize { which + // we want to treat as a literal. + // + value rhs (variable_value (t, tt, lexer_mode::variable_line)); + + value& lhs (kind == type::assign + ? script_->assign (var) + : script_->append (var)); + + // @@ Need to adjust to make strings the default type. + // + value_attributes (&var, lhs, move (rhs), kind); + } + + void parser:: + test_line (token& t, token_type& tt, names_type ns, location nl) + { + // Stop recognizing variable assignments. + // + mode (lexer_mode::test_line); + + // Keep parsing chunks of the command line until we see the newline or + // the exit status comparison. + // + strings cmd; + + do + { + // Process words that we already have. + // + bool q (lexer_->quoted ()); + + for (name& n: ns) + { + string s; + + try + { + s = value_traits<string>::convert (move (n), nullptr); + } + catch (const invalid_argument&) + { + fail (nl) << "invalid string value '" << n << "'"; + } + + // If it is a quoted chunk, then we add the word as is. Otherwise + // we re-lex it. But if the word doesn't contain any interesting + // characters (operators plus quotes/escapes), then no need to + // re-lex. + // + if (q || s.find_first_of ("|&<>\'\"\\") == string::npos) + cmd.push_back (move (s)); + else + { + // Come up with a "path" that contains both the original + // location as well as the expanded string. The resulting + // diagnostics will look like this: + // + // testscript:10:1 ('abc): unterminated single quote + // + path name; + { + string n (nl.file->string ()); + n += ':'; + n += to_string (nl.line); + n += ':'; + n += to_string (nl.column); + n += ": ("; + n += s; + n += ')'; + name = path (move (n)); + } + + istringstream is (s); + lexer lex (is, name, lexer_mode::command_line); + + string w; + bool f (true); // In case the whole thing is empty. + for (token t (lex.next ()); t.type != type::eos; t = lex.next ()) + { + // Note that this is not "our" token so we cannot do fail(t). + // Rather we should do fail(l). + // + location l (build2::get_location (t, lex.name ())); + + // Re-lexing double-quotes will recognize $, ( inside as + // tokens so we have to reverse them back. Since we don't + // treat spaces as separators we can be sure we will get it + // right. + // + switch (t.type) + { + case type::dollar: w += '$'; continue; + case type::lparen: w += '('; continue; + } + + // Retire the current word. We need to distinguish between + // empty and non-existent (e.g., > vs >""). + // + if (!w.empty () || f) + { + cmd.push_back (move (w)); + f = false; + } + + switch (t.type) + { + case type::name: w = move (t.value); f = true; break; + + // @@ TODO + // + case type::pipe: + case type::clean: + case type::log_and: + case type::log_or: + + case type::in_null: + case type::in_string: + case type::in_document: + + case type::out_null: + case type::out_string: + case type::out_document: + break; + } + } + + // Don't forget the last word. + // + if (!w.empty () || f) + cmd.push_back (move (w)); + } + } + + if (tt == type::newline || + tt == type::equal || + tt == type::not_equal) + break; + + // Parse the next chunk. + // + ns.clear (); + lexer_->reset_quoted (t.quoted); + names (t, tt, ns, true); + + } while (true); + + //@@ switch mode (we no longer want to recognize command operators)? + + if (tt == type::equal || tt == type::not_equal) + { + command_exit (t, tt); + } + + // here-document + } + + void parser:: + command_exit (token& t, token_type& tt) + { + // The next chunk should be the exit status. + // + next (t, tt); + names_type ns (names (t, tt, true)); + + //@@ TODO: validate to be single, simple, non-empty name that + // converts to integer (is exit status always non-negative). + } + } + } +} |