1 files changed, 283 insertions, 0 deletions
diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx
new file mode 100644
index 0000000..aba9f9a
--- /dev/null
+++ b/build2/test/script/parser.cxx
@@ -0,0 +1,283 @@
+// file      : build2/test/script/parser.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <build2/test/script/parser>
+
+#include <build2/test/script/lexer>
+
+using namespace std;
+
+namespace build2
+{
+  namespace test
+  {
+    namespace script
+    {
+      using type = token_type;
+
+      script parser::
+      parse (istream& is, const path& p, target& test_t, target& script_t)
+      {
+        path_ = &p;
+
+        lexer l (is, *path_, lexer_mode::script_line);
+        lexer_ = &l;
+        base_parser::lexer_ = &l;
+
+        script_type r (test_t, script_t);
+        script_ = &r;
+
+        token t (type::eos, false, 0, 0);
+        type tt;
+        next (t, tt);
+
+        script (t, tt);
+
+        if (tt != type::eos)
+          fail (t) << "unexpected " << t;
+
+        return r;
+      }
+
+      void parser::
+      script (token& t, token_type& tt)
+      {
+        while (tt != type::eos)
+        {
+          script_line (t, tt);
+        }
+      }
+
+      void parser::
+      script_line (token& t, token_type& tt)
+      {
+        // Parse first chunk. Keep track of whether anything in it was quoted.
+        //
+        names_type ns;
+        location nl (get_location (t));
+        lexer_->reset_quoted (t.quoted);
+        names (t, tt, ns, true);
+
+        // See if this is a variable assignment or a test command.
+        //
+        if (tt == type::assign  ||
+            tt == type::prepend ||
+            tt == type::append)
+        {
+          // We need to strike a balance between recognizing command lines
+          // that contain the assignment operator and variable assignments.
+          //
+          // If we choose to treat these tokens literally (for example, if we
+          // have several names on the LHS), then we have the reversibility
+          // problem: we need to restore original whitespaces before and after
+          // the assignment operator (e.g., foo=bar vs foo = bar).
+          //
+          // To keep things simple we will start with the following rule: if
+          // the token after the first chunk of input is assignment, then it
+          // must be a variable assignment. After all, command lines like this
+          // are not expected to be common:
+          //
+          // $* =x
+          //
+          // It will also be easy to get the desired behavior with quoting:
+          //
+          // $* "=x"
+          //
+          // The only issue here is if $* above expands to a single, simple
+          // name (e.g., an executable name) in which case it will be treated
+          // as a variable name. One way to resolve it would be to detect
+          // "funny" variable names and require that they be quoted (this
+          // won't help with built-in commands; maybe we could warn if it's
+          // the same as built-in). Note that currently we have no way of
+          // knowing it's quoted.
+          //
+          // Or perhaps we should just let people learn that first assignment
+          // needs to be quoted?
+          //
+          if (ns.size () != 1 || !ns[0].simple () || ns[0].empty ())
+            fail (nl) << "variable name expected instead of '" << ns << "'";
+
+          variable_line (t, tt, move (ns[0].value));
+        }
+        else
+          test_line (t, tt, move (ns), move (nl));
+      }
+
+      void parser::
+      variable_line (token& t, token_type& tt, string name)
+      {
+        type kind (tt); // Assignment kind.
+        const variable_type& var (script_->var_pool.insert (move (name)));
+
+        // We cannot reuse the value mode since it will recognize { which
+        // we want to treat as a literal.
+        //
+        value rhs (variable_value (t, tt, lexer_mode::variable_line));
+
+        value& lhs (kind == type::assign
+                    ? script_->assign (var)
+                    : script_->append (var));
+
+        // @@ Need to adjust to make strings the default type.
+        //
+        value_attributes (&var, lhs, move (rhs), kind);
+      }
+
+      void parser::
+      test_line (token& t, token_type& tt, names_type ns, location nl)
+      {
+        // Stop recognizing variable assignments.
+        //
+        mode (lexer_mode::test_line);
+
+        // Keep parsing chunks of the command line until we see the newline or
+        // the exit status comparison.
+        //
+        strings cmd;
+
+        do
+        {
+          // Process words that we already have.
+          //
+          bool q (lexer_->quoted ());
+
+          for (name& n: ns)
+          {
+            string s;
+
+            try
+            {
+              s = value_traits<string>::convert (move (n), nullptr);
+            }
+            catch (const invalid_argument&)
+            {
+              fail (nl) << "invalid string value '" << n << "'";
+            }
+
+            // If it is a quoted chunk, then we add the word as is. Otherwise
+            // we re-lex it. But if the word doesn't contain any interesting
+            // characters (operators plus quotes/escapes), then no need to
+            // re-lex.
+            //
+            if (q || s.find_first_of ("|&<>\'\"\\") == string::npos)
+              cmd.push_back (move (s));
+            else
+            {
+              // Come up with a "path" that contains both the original
+              // location as well as the expanded string. The resulting
+              // diagnostics will look like this:
+              //
+              // testscript:10:1 ('abc): unterminated single quote
+              //
+              path name;
+              {
+                string n (nl.file->string ());
+                n += ':';
+                n += to_string (nl.line);
+                n += ':';
+                n += to_string (nl.column);
+                n += ": (";
+                n += s;
+                n += ')';
+                name = path (move (n));
+              }
+
+              istringstream is (s);
+              lexer lex (is, name, lexer_mode::command_line);
+
+              string w;
+              bool f (true); // In case the whole thing is empty.
+              for (token t (lex.next ()); t.type != type::eos; t = lex.next ())
+              {
+                // Note that this is not "our" token so we cannot do fail(t).
+                // Rather we should do fail(l).
+                //
+                location l (build2::get_location (t, lex.name ()));
+
+                // Re-lexing double-quotes will recognize $, ( inside as
+                // tokens so we have to reverse them back. Since we don't
+                // treat spaces as separators we can be sure we will get it
+                // right.
+                //
+                switch (t.type)
+                {
+                case type::dollar: w += '$'; continue;
+                case type::lparen: w += '('; continue;
+                }
+
+                // Retire the current word. We need to distinguish between
+                // empty and non-existent (e.g., > vs >"").
+                //
+                if (!w.empty () || f)
+                {
+                  cmd.push_back (move (w));
+                  f = false;
+                }
+
+                switch (t.type)
+                {
+                case type::name: w = move (t.value); f = true; break;
+
+                  // @@ TODO
+                  //
+                case type::pipe:
+                case type::clean:
+                case type::log_and:
+                case type::log_or:
+
+                case type::in_null:
+                case type::in_string:
+                case type::in_document:
+
+                case type::out_null:
+                case type::out_string:
+                case type::out_document:
+                  break;
+                }
+              }
+
+              // Don't forget the last word.
+              //
+              if (!w.empty () || f)
+                cmd.push_back (move (w));
+            }
+          }
+
+          if (tt == type::newline ||
+              tt == type::equal   ||
+              tt == type::not_equal)
+            break;
+
+          // Parse the next chunk.
+          //
+          ns.clear ();
+          lexer_->reset_quoted (t.quoted);
+          names (t, tt, ns, true);
+
+        } while (true);
+
+        //@@ switch mode (we no longer want to recognize command operators)?
+
+        if (tt == type::equal || tt == type::not_equal)
+        {
+          command_exit (t, tt);
+        }
+
+        // here-document
+      }
+
+      void parser::
+      command_exit (token& t, token_type& tt)
+      {
+        // The next chunk should be the exit status.
+        //
+        next (t, tt);
+        names_type ns (names (t, tt, true));
+
+        //@@ TODO: validate to be single, simple, non-empty name that
+        //         converts to integer (is exit status always non-negative).
+      }
+    }
+  }
+}