aboutsummaryrefslogtreecommitdiff
path: root/build2/test/script/parser.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'build2/test/script/parser.cxx')
-rw-r--r--build2/test/script/parser.cxx283
1 files changed, 283 insertions, 0 deletions
diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx
new file mode 100644
index 0000000..aba9f9a
--- /dev/null
+++ b/build2/test/script/parser.cxx
@@ -0,0 +1,283 @@
+// file : build2/test/script/parser.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <build2/test/script/parser>
+
+#include <build2/test/script/lexer>
+
+using namespace std;
+
+namespace build2
+{
+ namespace test
+ {
+ namespace script
+ {
+ using type = token_type;
+
+ script parser::
+ parse (istream& is, const path& p, target& test_t, target& script_t)
+ {
+ path_ = &p;
+
+ lexer l (is, *path_, lexer_mode::script_line);
+ lexer_ = &l;
+ base_parser::lexer_ = &l;
+
+ script_type r (test_t, script_t);
+ script_ = &r;
+
+ token t (type::eos, false, 0, 0);
+ type tt;
+ next (t, tt);
+
+ script (t, tt);
+
+ if (tt != type::eos)
+ fail (t) << "unexpected " << t;
+
+ return r;
+ }
+
+ void parser::
+ script (token& t, token_type& tt)
+ {
+ while (tt != type::eos)
+ {
+ script_line (t, tt);
+ }
+ }
+
+ void parser::
+ script_line (token& t, token_type& tt)
+ {
+ // Parse first chunk. Keep track of whether anything in it was quoted.
+ //
+ names_type ns;
+ location nl (get_location (t));
+ lexer_->reset_quoted (t.quoted);
+ names (t, tt, ns, true);
+
+ // See if this is a variable assignment or a test command.
+ //
+ if (tt == type::assign ||
+ tt == type::prepend ||
+ tt == type::append)
+ {
+ // We need to strike a balance between recognizing command lines
+ // that contain the assignment operator and variable assignments.
+ //
+ // If we choose to treat these tokens literally (for example, if we
+ // have several names on the LHS), then we have the reversibility
+ // problem: we need to restore original whitespaces before and after
+ // the assignment operator (e.g., foo=bar vs foo = bar).
+ //
+ // To keep things simple we will start with the following rule: if
+ // the token after the first chunk of input is assignment, then it
+ // must be a variable assignment. After all, command lines like this
+ // are not expected to be common:
+ //
+ // $* =x
+ //
+ // It will also be easy to get the desired behavior with quoting:
+ //
+ // $* "=x"
+ //
+ // The only issue here is if $* above expands to a single, simple
+ // name (e.g., an executable name) in which case it will be treated
+ // as a variable name. One way to resolve it would be to detect
+ // "funny" variable names and require that they be quoted (this
+ // won't help with built-in commands; maybe we could warn if it's
+ // the same as built-in). Note that currently we have no way of
+ // knowing it's quoted.
+ //
+ // Or perhaps we should just let people learn that first assignment
+ // needs to be quoted?
+ //
+ if (ns.size () != 1 || !ns[0].simple () || ns[0].empty ())
+ fail (nl) << "variable name expected instead of '" << ns << "'";
+
+ variable_line (t, tt, move (ns[0].value));
+ }
+ else
+ test_line (t, tt, move (ns), move (nl));
+ }
+
+ void parser::
+ variable_line (token& t, token_type& tt, string name)
+ {
+ type kind (tt); // Assignment kind.
+ const variable_type& var (script_->var_pool.insert (move (name)));
+
+ // We cannot reuse the value mode since it will recognize { which
+ // we want to treat as a literal.
+ //
+ value rhs (variable_value (t, tt, lexer_mode::variable_line));
+
+ value& lhs (kind == type::assign
+ ? script_->assign (var)
+ : script_->append (var));
+
+ // @@ Need to adjust to make strings the default type.
+ //
+ value_attributes (&var, lhs, move (rhs), kind);
+ }
+
+ void parser::
+ test_line (token& t, token_type& tt, names_type ns, location nl)
+ {
+ // Stop recognizing variable assignments.
+ //
+ mode (lexer_mode::test_line);
+
+ // Keep parsing chunks of the command line until we see the newline or
+ // the exit status comparison.
+ //
+ strings cmd;
+
+ do
+ {
+ // Process words that we already have.
+ //
+ bool q (lexer_->quoted ());
+
+ for (name& n: ns)
+ {
+ string s;
+
+ try
+ {
+ s = value_traits<string>::convert (move (n), nullptr);
+ }
+ catch (const invalid_argument&)
+ {
+ fail (nl) << "invalid string value '" << n << "'";
+ }
+
+ // If it is a quoted chunk, then we add the word as is. Otherwise
+ // we re-lex it. But if the word doesn't contain any interesting
+ // characters (operators plus quotes/escapes), then no need to
+ // re-lex.
+ //
+ if (q || s.find_first_of ("|&<>\'\"\\") == string::npos)
+ cmd.push_back (move (s));
+ else
+ {
+ // Come up with a "path" that contains both the original
+ // location as well as the expanded string. The resulting
+ // diagnostics will look like this:
+ //
+ // testscript:10:1 ('abc): unterminated single quote
+ //
+ path name;
+ {
+ string n (nl.file->string ());
+ n += ':';
+ n += to_string (nl.line);
+ n += ':';
+ n += to_string (nl.column);
+ n += ": (";
+ n += s;
+ n += ')';
+ name = path (move (n));
+ }
+
+ istringstream is (s);
+ lexer lex (is, name, lexer_mode::command_line);
+
+ string w;
+ bool f (true); // In case the whole thing is empty.
+ for (token t (lex.next ()); t.type != type::eos; t = lex.next ())
+ {
+ // Note that this is not "our" token so we cannot do fail(t).
+ // Rather we should do fail(l).
+ //
+ location l (build2::get_location (t, lex.name ()));
+
+ // Re-lexing double-quotes will recognize $, ( inside as
+ // tokens so we have to reverse them back. Since we don't
+ // treat spaces as separators we can be sure we will get it
+ // right.
+ //
+ switch (t.type)
+ {
+ case type::dollar: w += '$'; continue;
+ case type::lparen: w += '('; continue;
+ }
+
+ // Retire the current word. We need to distinguish between
+ // empty and non-existent (e.g., > vs >"").
+ //
+ if (!w.empty () || f)
+ {
+ cmd.push_back (move (w));
+ f = false;
+ }
+
+ switch (t.type)
+ {
+ case type::name: w = move (t.value); f = true; break;
+
+ // @@ TODO
+ //
+ case type::pipe:
+ case type::clean:
+ case type::log_and:
+ case type::log_or:
+
+ case type::in_null:
+ case type::in_string:
+ case type::in_document:
+
+ case type::out_null:
+ case type::out_string:
+ case type::out_document:
+ break;
+ }
+ }
+
+ // Don't forget the last word.
+ //
+ if (!w.empty () || f)
+ cmd.push_back (move (w));
+ }
+ }
+
+ if (tt == type::newline ||
+ tt == type::equal ||
+ tt == type::not_equal)
+ break;
+
+ // Parse the next chunk.
+ //
+ ns.clear ();
+ lexer_->reset_quoted (t.quoted);
+ names (t, tt, ns, true);
+
+ } while (true);
+
+ //@@ switch mode (we no longer want to recognize command operators)?
+
+ if (tt == type::equal || tt == type::not_equal)
+ {
+ command_exit (t, tt);
+ }
+
+ // here-document
+ }
+
+ void parser::
+ command_exit (token& t, token_type& tt)
+ {
+ // The next chunk should be the exit status.
+ //
+ next (t, tt);
+ names_type ns (names (t, tt, true));
+
+ //@@ TODO: validate to be single, simple, non-empty name that
+ // converts to integer (is exit status always non-negative).
+ }
+ }
+ }
+}