From 8187642ed2b619fc6dc6844f80d107c338c428a3 Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Tue, 18 Oct 2016 13:34:26 +0200
Subject: Remove support for expanded variable names in assignment

Such cases will have to be handled with explicit set builtin.
---
 build2/test/script/parser     |   4 +-
 build2/test/script/parser.cxx | 329 +++++++++++++++++++-----------------------
 build2/test/script/token      |   4 +-
 3 files changed, 155 insertions(+), 182 deletions(-)

(limited to 'build2')

diff --git a/build2/test/script/parser b/build2/test/script/parser
index daaa953..f9fbd98 100644
--- a/build2/test/script/parser
+++ b/build2/test/script/parser
@@ -46,10 +46,10 @@ namespace build2
         parse_script_line (token&, token_type&);
 
         void
-        parse_variable_line (token&, token_type&, string, location);
+        parse_variable_line (token&, token_type&);
 
         void
-        parse_test_line (token&, token_type&, names, location);
+        parse_test_line (token&, token_type&);
 
         command_exit
         parse_command_exit (token&, token_type&);
diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx
index 25f6690..67585c6 100644
--- a/build2/test/script/parser.cxx
+++ b/build2/test/script/parser.cxx
@@ -52,56 +52,20 @@ namespace build2
       void parser::
       parse_script_line (token& t, token_type& tt)
       {
-        // Parse first chunk. Keep track of whether anything in it was quoted.
+        // Decide whether this is a variable assignment or a command. It is a
+        // variable assignment if the first token is a word and the next is an
+        // assign/append/prepend operator. Assignment to a computed variable
+        // name must use the set builtin.
         //
-        names ns;
-        location nl (get_location (t));
-        lexer_->reset_quoted (t.quoted);
-        parse_names (t, tt, ns, true, "variable or program name");
-
-        // See if this is a variable assignment or a test command.
-        //
-        if (tt == type::assign  ||
-            tt == type::prepend ||
-            tt == type::append)
+        auto assign = [] (type t)
         {
-          // We need to strike a balance between recognizing command lines
-          // that contain the assignment operator and variable assignments.
-          //
-          // If we choose to treat these tokens literally (for example, if we
-          // have several names on the LHS), then we have the reversibility
-          // problem: we need to restore original whitespaces before and after
-          // the assignment operator (e.g., foo=bar vs foo = bar).
-          //
-          // To keep things simple we will start with the following rule: if
-          // the token after the first chunk of input is assignment, then it
-          // must be a variable assignment. After all, command lines like this
-          // are not expected to be common:
-          //
-          // $* =x
-          //
-          // It will also be easy to get the desired behavior with quoting:
-          //
-          // $* "=x"
-          //
-          // The only issue here is if $* above expands to a single, simple
-          // name (e.g., an executable name) in which case it will be treated
-          // as a variable name. One way to resolve it would be to detect
-          // "funny" variable names and require that they be quoted (this
-          // won't help with built-in commands; maybe we could warn if it's
-          // the same as built-in). Note that currently we have no way of
-          // knowing it's quoted.
-          //
-          // Or perhaps we should just let people learn that first assignment
-          // needs to be quoted?
-          //
-          if (ns.size () != 1 || !ns[0].simple () || ns[0].empty ())
-            fail (nl) << "variable name expected instead of '" << ns << "'";
+          return t == type::assign || t == type::prepend || t == type::append;
+        };
 
-          parse_variable_line (t, tt, move (ns[0].value), move (nl));
-        }
+        if (tt == type::word && assign (peek ()))
+          parse_variable_line (t, tt);
         else
-          parse_test_line (t, tt, move (ns), move (nl));
+          parse_test_line (t, tt);
       }
 
       // Return true if the string contains only digit characters (used to
@@ -118,17 +82,22 @@ namespace build2
       }
 
       void parser::
-      parse_variable_line (token& t, token_type& tt, string name, location nl)
+      parse_variable_line (token& t, token_type& tt)
       {
+        location nl (get_location (t));
+        string name (move (t.value));
+
         // Check if we are trying to modify any of the special aliases ($*,
         // $~, $N).
         //
         if (name == "*" || name == "~" || digits (name))
           fail (nl) << "attempt to set '" << name << "' variable directly";
 
-        type kind (tt); // Assignment kind.
         const variable& var (script_->var_pool.insert (move (name)));
 
+        next (t, tt);
+        type kind (tt); // Assignment kind.
+
         // We cannot reuse the value mode since it will recognize { which
         // we want to treat as a literal.
         //
@@ -164,7 +133,7 @@ namespace build2
       }
 
       void parser::
-      parse_test_line (token& t, token_type& tt, names ns, location nl)
+      parse_test_line (token& t, token_type& tt)
       {
         // Stop recognizing variable assignments.
         //
@@ -353,130 +322,11 @@ namespace build2
         // Keep parsing chunks of the command line until we see the newline or
         // the exit status comparison.
         //
-        for (bool done (false); !done; )
-        {
-          // Process words that we already have.
-          //
-          bool q (lexer_->quoted ());
-
-          for (name& n: ns)
-          {
-            string s;
-
-            try
-            {
-              s = value_traits<string>::convert (move (n), nullptr);
-            }
-            catch (const invalid_argument&)
-            {
-              fail (nl) << "invalid string value '" << n << "'";
-            }
-
-            // If it is a quoted chunk, then we add the word as is. Otherwise
-            // we re-lex it. But if the word doesn't contain any interesting
-            // characters (operators plus quotes/escapes), then no need to
-            // re-lex.
-            //
-            if (q || s.find_first_of ("|&<>\'\"\\") == string::npos)
-              add_word (move (s), nl);
-            else
-            {
-              // Come up with a "path" that contains both the original
-              // location as well as the expanded string. The resulting
-              // diagnostics will look like this:
-              //
-              // testscript:10:1 ('abc): unterminated single quote
-              //
-              path name;
-              {
-                string n (nl.file->string ());
-                n += ':';
-                n += to_string (nl.line);
-                n += ':';
-                n += to_string (nl.column);
-                n += ": (";
-                n += s;
-                n += ')';
-                name = path (move (n));
-              }
-
-              istringstream is (s);
-              lexer lex (is, name, lexer_mode::command_line);
-
-              // Treat the first "sub-token" as always separated from what we
-              // saw earlier.
-              //
-              // Note that this is not "our" token so we cannot do fail(t).
-              // Rather we should do fail(l).
-              //
-              token t (lex.next ());
-              location l (build2::get_location (t, name));
-              t.separated = true;
-
-              string w;
-              bool f (t.type == type::eos); // If the whole thing is empty.
+        location l (get_location (t));
+        names ns; // Reuse to reduce allocations.
 
-              for (; t.type != type::eos; t = lex.next ())
-              {
-                type tt (t.type);
-                l = build2::get_location (t, name);
-
-                // Re-lexing double-quotes will recognize $, ( inside as
-                // tokens so we have to reverse them back. Since we don't
-                // treat spaces as separators we can be sure we will get it
-                // right.
-                //
-                switch (tt)
-                {
-                case type::dollar: w += '$'; continue;
-                case type::lparen: w += '('; continue;
-                }
-
-                // Retire the current word. We need to distinguish between
-                // empty and non-existent (e.g., > vs >"").
-                //
-                if (!w.empty () || f)
-                {
-                  add_word (move (w), l);
-                  f = false;
-                }
-
-                if (tt == type::word)
-                {
-                  w = move (t.value);
-                  f = true;
-                  continue;
-                }
-
-                // If this is one of the operators/separators, check that we
-                // don't have any pending locations to be filled.
-                //
-                check_pending (l);
-
-                // Note: there is another one in the outer loop below.
-                //
-                switch (tt)
-                {
-                case type::in_null:
-                case type::in_string:
-                case type::in_document:
-                case type::out_null:
-                case type::out_string:
-                case type::out_document:
-                  parse_redirect (t, l);
-                  break;
-                }
-              }
-
-              // Don't forget the last word.
-              //
-              if (!w.empty () || f)
-                add_word (move (w), l);
-            }
-          }
-
-          // See what is the next token.
-          //
+        for (bool done (false); !done; l = get_location (t))
+        {
           switch (tt)
           {
           case type::equal:
@@ -496,9 +346,9 @@ namespace build2
               // If this is one of the operators/separators, check that we
               // don't have any pending locations to be filled.
               //
-              check_pending (nl);
+              check_pending (l);
 
-              // Note: there is another one in the inner loop above.
+              // Note: there is another one in the inner loop below.
               //
               switch (tt)
               {
@@ -508,7 +358,7 @@ namespace build2
               case type::out_null:
               case type::out_string:
               case type::out_document:
-                parse_redirect (t, get_location (t));
+                parse_redirect (t, l);
                 next (t, tt);
                 break;
               }
@@ -517,12 +367,133 @@ namespace build2
             }
           default:
             {
-              // Parse the next chunk.
+              // Parse the next chunk as names to get variable expansion, etc.
+              // Note that we do it in the chunking mode to detect whether
+              // anything in each chunk is quoted.
               //
-              ns.clear ();
               lexer_->reset_quoted (t.quoted);
-              nl = get_location (t);
               parse_names (t, tt, ns, true, "command");
+
+              // Process what we got.
+              //
+              bool q (lexer_->quoted ());
+              for (name& n: ns)
+              {
+                string s;
+
+                try
+                {
+                  s = value_traits<string>::convert (move (n), nullptr);
+                }
+                catch (const invalid_argument&)
+                {
+                  fail (l) << "invalid string value '" << n << "'";
+                }
+
+                // If it is a quoted chunk, then we add the word as is.
+                // Otherwise we re-lex it. But if the word doesn't contain any
+                // interesting characters (operators plus quotes/escapes),
+                // then no need to re-lex.
+                //
+                if (q || s.find_first_of ("|&<>\'\"\\") == string::npos)
+                  add_word (move (s), l);
+                else
+                {
+                  // Come up with a "path" that contains both the original
+                  // location as well as the expanded string. The resulting
+                  // diagnostics will look like this:
+                  //
+                  // testscript:10:1 ('abc): unterminated single quote
+                  //
+                  path name;
+                  {
+                    string n (l.file->string ());
+                    n += ':';
+                    n += to_string (l.line);
+                    n += ':';
+                    n += to_string (l.column);
+                    n += ": (";
+                    n += s;
+                    n += ')';
+                    name = path (move (n));
+                  }
+
+                  istringstream is (s);
+                  lexer lex (is, name, lexer_mode::command_line);
+
+                  // Treat the first "sub-token" as always separated from what
+                  // we saw earlier.
+                  //
+                  // Note that this is not "our" token so we cannot do
+                  // fail(t). Rather we should do fail(l).
+                  //
+                  token t (lex.next ());
+                  location l (build2::get_location (t, name));
+                  t.separated = true;
+
+                  string w;
+                  bool f (t.type == type::eos); // If the whole thing is empty.
+
+                  for (; t.type != type::eos; t = lex.next ())
+                  {
+                    type tt (t.type);
+                    l = build2::get_location (t, name);
+
+                    // Re-lexing double-quotes will recognize $, ( inside as
+                    // tokens so we have to reverse them back. Since we don't
+                    // treat spaces as separators we can be sure we will get
+                    // it right.
+                    //
+                    switch (tt)
+                    {
+                    case type::dollar: w += '$'; continue;
+                    case type::lparen: w += '('; continue;
+                    }
+
+                    // Retire the current word. We need to distinguish between
+                    // empty and non-existent (e.g., > vs >"").
+                    //
+                    if (!w.empty () || f)
+                    {
+                      add_word (move (w), l);
+                      f = false;
+                    }
+
+                    if (tt == type::word)
+                    {
+                      w = move (t.value);
+                      f = true;
+                      continue;
+                    }
+
+                    // If this is one of the operators/separators, check that
+                    // we don't have any pending locations to be filled.
+                    //
+                    check_pending (l);
+
+                    // Note: there is another one in the outer loop above.
+                    //
+                    switch (tt)
+                    {
+                    case type::in_null:
+                    case type::in_string:
+                    case type::in_document:
+                    case type::out_null:
+                    case type::out_string:
+                    case type::out_document:
+                      parse_redirect (t, l);
+                      break;
+                    }
+                  }
+
+                  // Don't forget the last word.
+                  //
+                  if (!w.empty () || f)
+                    add_word (move (w), l);
+                }
+              }
+
+              ns.clear ();
               break;
             }
           }
@@ -530,7 +501,7 @@ namespace build2
 
         // Verify we don't have anything pending to be filled.
         //
-        check_pending (nl);
+        check_pending (l);
 
         // While we no longer need to recognize command line operators, we
         // also don't expect a valid test trailer to contain them. So we are
diff --git a/build2/test/script/token b/build2/test/script/token
index ac035f7..e952e00 100644
--- a/build2/test/script/token
+++ b/build2/test/script/token
@@ -38,7 +38,9 @@ namespace build2
           out_document                  // <<
         };
 
-        using base_type::base_type;
+        token_type () = default;
+        token_type (value_type v): base_type (v) {}
+        token_type (base_type v): base_type (v) {}
       };
 
       void
-- 
cgit v1.1