1 files changed, 393 insertions, 154 deletions
diff --git a/libbuild2/test/script/parser.cxx b/libbuild2/test/script/parser.cxx
index 9e92f3b..b712c21 100644
--- a/libbuild2/test/script/parser.cxx
+++ b/libbuild2/test/script/parser.cxx
@@ -293,22 +293,30 @@ namespace build2
       }
 
       // Parse a logical line (as well as scope-if since the only way to
-      // recognize it is to parse the if line).
+      // recognize it is to parse the if line), handling the flow control
+      // constructs recursively.
       //
       // If one is true then only parse one line returning an indication of
-      // whether the line ended with a semicolon. If if_line is true then this
-      // line can be an if-else construct flow control line (else, end, etc).
+      // whether the line ended with a semicolon. If the flow control
+      // construct type is specified, then this line is assumed to belong to
+      // such construct.
       //
       bool parser::
       pre_parse_line (token& t, type& tt,
                       optional<description>& d,
                       lines* ls,
                       bool one,
-                      bool if_line)
+                      optional<line_type> fct)
       {
         // enter: next token is peeked at (type in tt)
         // leave: newline
 
+        assert (!fct                              ||
+                *fct == line_type::cmd_if         ||
+                *fct == line_type::cmd_while      ||
+                *fct == line_type::cmd_for_stream ||
+                *fct == line_type::cmd_for_args);
+
         // Note: token is only peeked at.
         //
         const location ll (get_location (peeked ()));
@@ -317,6 +325,52 @@ namespace build2
         //
         line_type lt;
         type st (type::eos); // Later, can only be set to plus or minus.
+        bool semi (false);
+
+        // Parse the command line tail, starting from the newline or the
+        // potential colon/semicolon token.
+        //
+        // Note that colon and semicolon are only valid in test command lines
+        // and after 'end' in flow control constructs. Note that we always
+        // recognize them lexically, even when they are not valid tokens per
+        // the grammar.
+        //
+        auto parse_command_tail = [&t, &tt, &st, &lt, &d, &semi, &ll, this] ()
+        {
+          if (tt != type::newline)
+          {
+            if (lt != line_type::cmd && lt != line_type::cmd_end)
+              fail (t) << "expected newline instead of " << t;
+
+            switch (st)
+            {
+            case type::plus:  fail (t) << t << " after setup command" << endf;
+            case type::minus: fail (t) << t << " after teardown command" << endf;
+            }
+          }
+
+          switch (tt)
+          {
+          case type::colon:
+            {
+              if (d)
+                fail (ll) << "both leading and trailing descriptions";
+
+              d = parse_trailing_description (t, tt);
+              break;
+            }
+          case type::semi:
+            {
+              semi = true;
+              replay_pop (); // See above for the reasoning.
+              next (t, tt);  // Get newline.
+              break;
+            }
+          }
+
+          if (tt != type::newline)
+            fail (t) << "expected newline instead of " << t;
+        };
 
         switch (tt)
         {
@@ -364,8 +418,12 @@ namespace build2
             {
               const string& n (t.value);
 
-              if      (n == "if")  lt = line_type::cmd_if;
-              else if (n == "if!") lt = line_type::cmd_ifn;
+              // Handle the for-loop consistently with pre_parse_line_start().
+              //
+              if      (n == "if")    lt = line_type::cmd_if;
+              else if (n == "if!")   lt = line_type::cmd_ifn;
+              else if (n == "while") lt = line_type::cmd_while;
+              else if (n == "for")   lt = line_type::cmd_for_stream;
             }
 
             break;
@@ -379,8 +437,6 @@ namespace build2
 
         // Pre-parse the line keeping track of whether it ends with a semi.
         //
-        bool semi (false);
-
         line ln;
         switch (lt)
         {
@@ -407,76 +463,147 @@ namespace build2
             mode (lexer_mode::variable_line);
             parse_variable_line (t, tt);
 
+            // Note that the semicolon token is only required during
+            // pre-parsing to decide which line list the current line should
+            // go to and provides no additional semantics during the
+            // execution. Moreover, build2::script::parser::exec_lines()
+            // doesn't expect this token to be present. Thus, we just drop
+            // this token from the saved tokens.
+            //
             semi = (tt == type::semi);
 
-            if (tt == type::semi)
+            if (semi)
+            {
+              replay_pop ();
               next (t, tt);
+            }
 
             if (tt != type::newline)
               fail (t) << "expected newline instead of " << t;
 
             break;
           }
+          //
+          // See pre_parse_line_start() for details.
+          //
+        case line_type::cmd_for_args: assert (false); break;
+        case line_type::cmd_for_stream:
+          {
+            // First we need to sense the next few tokens and detect which
+            // form of the for-loop that actually is (see
+            // libbuild2/build/script/parser.cxx for details).
+            //
+            token pt (t);
+            assert (pt.type == type::word && pt.value == "for");
+
+            mode (lexer_mode::for_loop);
+            next (t, tt);
+
+            string& n (t.value);
+
+            if (tt == type::word && t.qtype == quote_type::unquoted &&
+                (n[0] == '_' || alpha (n[0]) ||     // Variable.
+                 n == "*" || n == "~" || n == "@")) // Special variable.
+            {
+              // Detect patterns analogous to parse_variable_name() (so we
+              // diagnose `for x[string]: ...`).
+              //
+              if (n.find_first_of ("[*?") != string::npos)
+                fail (t) << "expected variable name instead of " << n;
+
+              if (special_variable (n))
+                fail (t) << "attempt to set '" << n << "' variable directly";
+
+              if (lexer_->peek_char ().first == '[')
+              {
+                token vt (move (t));
+                next_with_attributes (t, tt);
+
+                attributes_push (t, tt,
+                                 true /* standalone */,
+                                 false /* next_token */);
+
+                t = move (vt);
+                tt = t.type;
+              }
+
+              if (lexer_->peek_char ().first == ':')
+                lt = line_type::cmd_for_args;
+            }
+
+            if (lt == line_type::cmd_for_stream) // for x <...
+            {
+              ln.var = nullptr;
+
+              expire_mode ();
+
+              parse_command_expr_result r (
+                parse_command_expr (t, tt,
+                                    lexer::redirect_aliases,
+                                    move (pt)));
+
+              assert (r.for_loop);
+
+              parse_command_tail ();
+              parse_here_documents (t, tt, r);
+            }
+            else                                 // for x: ...
+            {
+              ln.var = &script_->var_pool.insert (move (n));
+
+              next (t, tt);
+
+              assert (tt == type::colon);
+
+              expire_mode ();
+
+              // Parse the value similar to the var line type (see above),
+              // except for the fact that we don't expect a trailing semicolon.
+              //
+              mode (lexer_mode::variable_line);
+              parse_variable_line (t, tt);
+
+              if (tt != type::newline)
+                fail (t) << "expected newline instead of " << t << " after for";
+            }
+
+            break;
+          }
         case line_type::cmd_elif:
         case line_type::cmd_elifn:
         case line_type::cmd_else:
-        case line_type::cmd_end:
           {
-            if (!if_line)
-            {
+            if (!fct || *fct != line_type::cmd_if)
               fail (t) << lt << " without preceding 'if'";
-            }
+          }
+          // Fall through.
+        case line_type::cmd_end:
+          {
+            if (!fct)
+              fail (t) << lt << " without preceding 'if', 'for', or 'while'";
           }
           // Fall through.
         case line_type::cmd_if:
         case line_type::cmd_ifn:
+        case line_type::cmd_while:
           next (t, tt); // Skip to start of command.
           // Fall through.
         case line_type::cmd:
           {
-            pair<command_expr, here_docs> p;
+            parse_command_expr_result r;
 
             if (lt != line_type::cmd_else && lt != line_type::cmd_end)
-              p = parse_command_expr (t, tt, lexer::redirect_aliases);
+              r = parse_command_expr (t, tt, lexer::redirect_aliases);
 
-            // Colon and semicolon are only valid in test command lines and
-            // after 'end' in if-else. Note that we still recognize them
-            // lexically, they are just not valid tokens per the grammar.
-            //
-            if (tt != type::newline)
+            if (r.for_loop)
             {
-              if (lt != line_type::cmd && lt != line_type::cmd_end)
-                fail (t) << "expected newline instead of " << t;
-
-              switch (st)
-              {
-              case type::plus:  fail (t) << t << " after setup command" << endf;
-              case type::minus: fail (t) << t << " after teardown command" << endf;
-              }
+              lt     = line_type::cmd_for_stream;
+              ln.var = nullptr;
             }
 
-            switch (tt)
-            {
-            case type::colon:
-              {
-                if (d)
-                  fail (ll) << "both leading and trailing descriptions";
-
-                d = parse_trailing_description (t, tt);
-                break;
-              }
-            case type::semi:
-              {
-                semi = true;
-                next (t, tt); // Get newline.
-                break;
-              }
-            }
-
-            if (tt != type::newline)
-              fail (t) << "expected newline instead of " << t;
+            parse_command_tail ();
+            parse_here_documents (t, tt, r);
 
-            parse_here_documents (t, tt, p);
             break;
           }
         }
@@ -494,24 +621,39 @@ namespace build2
         ln.tokens = replay_data ();
         ls->push_back (move (ln));
 
-        if (lt == line_type::cmd_if || lt == line_type::cmd_ifn)
+        switch (lt)
         {
-          semi = pre_parse_if_else (t, tt, d, *ls);
+        case line_type::cmd_if:
+        case line_type::cmd_ifn:
+          {
+            semi = pre_parse_if_else (t, tt, d, *ls);
 
-          // If this turned out to be scope-if, then ls is empty, semi is
-          // false, and none of the below logic applies.
-          //
-          if (ls->empty ())
-            return semi;
+            // If this turned out to be scope-if, then ls is empty, semi is
+            // false, and none of the below logic applies.
+            //
+            if (ls->empty ())
+              return semi;
+
+            break;
+          }
+        case line_type::cmd_while:
+        case line_type::cmd_for_stream:
+        case line_type::cmd_for_args:
+          {
+            semi = pre_parse_loop (t, tt, lt, d, *ls);
+            break;
+          }
+        default: break;
         }
 
         // Unless we were told where to put it, decide where it actually goes.
         //
         if (ls == &ls_data)
         {
-          // First pre-check variable and variable-if: by themselves (i.e.,
-          // without a trailing semicolon) they are treated as either setup or
-          // teardown without plus/minus. Also handle illegal line types.
+          // First pre-check variables and variable-only flow control
+          // constructs: by themselves (i.e., without a trailing semicolon)
+          // they are treated as either setup or teardown without
+          // plus/minus. Also handle illegal line types.
           //
           switch (lt)
           {
@@ -524,8 +666,11 @@ namespace build2
             }
           case line_type::cmd_if:
           case line_type::cmd_ifn:
+          case line_type::cmd_while:
+          case line_type::cmd_for_stream:
+          case line_type::cmd_for_args:
             {
-              // See if this is a variable-only command-if.
+              // See if this is a variable-only flow control construct.
               //
               if (find_if (ls_data.begin (), ls_data.end (),
                            [] (const line& l) {
@@ -549,7 +694,7 @@ namespace build2
                     fail (ll) << "description before setup/teardown variable";
                   else
                     fail (ll) << "description before/after setup/teardown "
-                              << "variable-if";
+                              << "variable-only " << lt;
                 }
 
                 // If we don't have any nested scopes or teardown commands,
@@ -793,7 +938,7 @@ namespace build2
                                      td,
                                      &ls,
                                      true /* one */,
-                                     true /* if_line */));
+                                     line_type::cmd_if));
 
           assert (ls.size () == 1 && ls.back ().type == lt);
           assert (tt == type::newline);
@@ -831,6 +976,99 @@ namespace build2
         return false; // We never end with a semi.
       }
 
+      // Pre-parse the flow control construct block line. Fail if the line is
+      // unexpectedly followed with a semicolon or test description.
+      //
+      bool parser::
+      pre_parse_block_line (token& t, type& tt,
+                            line_type bt,
+                            optional<description>& d,
+                            lines& ls)
+      {
+        // enter: peeked first token of the line (type in tt)
+        // leave: newline
+
+        const location ll (get_location (peeked ()));
+
+        switch (tt)
+        {
+        case type::colon:
+          fail (ll) << "description inside " << bt << endf;
+        case type::eos:
+        case type::rcbrace:
+        case type::lcbrace:
+          fail (ll) << "expected closing 'end'" << endf;
+        case type::plus:
+          fail (ll) << "setup command inside " << bt << endf;
+        case type::minus:
+          fail (ll) << "teardown command inside " << bt << endf;
+        }
+
+        // Parse one line. Note that this one line can still be multiple lines
+        // in case of a flow control construct. In this case we want to view
+        // it as, for example, cmd_if, not cmd_end. Thus remember the start
+        // position of the next logical line.
+        //
+        size_t i (ls.size ());
+
+        line_type fct; // Flow control construct type the block type relates to.
+
+        switch (bt)
+        {
+        case line_type::cmd_if:
+        case line_type::cmd_ifn:
+        case line_type::cmd_elif:
+        case line_type::cmd_elifn:
+        case line_type::cmd_else:
+          {
+            fct = line_type::cmd_if;
+            break;
+          }
+        case line_type::cmd_while:
+        case line_type::cmd_for_stream:
+        case line_type::cmd_for_args:
+          {
+            fct = bt;
+            break;
+          }
+        default: assert(false);
+        }
+
+        optional<description> td;
+        bool semi (pre_parse_line (t, tt, td, &ls, true /* one */, fct));
+
+        assert (tt == type::newline);
+
+        line_type lt (ls[i].type);
+
+        // First take care of 'end'.
+        //
+        if (lt == line_type::cmd_end)
+        {
+          if (td)
+          {
+            if (d)
+              fail (ll) << "both leading and trailing descriptions";
+
+            d = move (td);
+          }
+
+          return semi;
+        }
+
+        // For any other line trailing semi or description is illegal.
+        //
+        // @@ Not the exact location of semi/colon.
+        //
+        if (semi)
+          fail (ll) << "';' inside " << bt;
+
+        if (td)
+          fail (ll) << "description inside " << bt;
+
+        return false;
+      }
+
       bool parser::
       pre_parse_if_else_command (token& t, type& tt,
                                  optional<description>& d,
@@ -839,70 +1077,23 @@ namespace build2
         // enter: peeked first token of next line (type in tt)
         // leave: newline
 
-        // Parse lines until we see closing 'end'. Nested if-else blocks are
-        // handled recursively.
+        // Parse lines until we see closing 'end'.
         //
         for (line_type bt (line_type::cmd_if); // Current block.
              ;
              tt = peek (lexer_mode::first_token))
         {
           const location ll (get_location (peeked ()));
-
-          switch (tt)
-          {
-          case type::colon:
-            fail (ll) << "description inside " << bt << endf;
-          case type::eos:
-          case type::rcbrace:
-          case type::lcbrace:
-            fail (ll) << "expected closing 'end'" << endf;
-          case type::plus:
-            fail (ll) << "setup command inside " << bt << endf;
-          case type::minus:
-            fail (ll) << "teardown command inside " << bt << endf;
-          }
-
-          // Parse one line. Note that this one line can still be multiple
-          // lines in case of if-else. In this case we want to view it as
-          // cmd_if, not cmd_end. Thus remember the start position of the
-          // next logical line.
-          //
           size_t i (ls.size ());
 
-          optional<description> td;
-          bool semi (pre_parse_line (t, tt,
-                                     td,
-                                     &ls,
-                                     true /* one */,
-                                     true /* if_line */));
-          assert (tt == type::newline);
+          bool semi (pre_parse_block_line (t, tt, bt, d, ls));
 
           line_type lt (ls[i].type);
 
           // First take care of 'end'.
           //
           if (lt == line_type::cmd_end)
-          {
-            if (td)
-            {
-              if (d)
-                fail (ll) << "both leading and trailing descriptions";
-
-              d = move (td);
-            }
-
             return semi;
-          }
-
-          // For any other line trailing semi or description is illegal.
-          //
-          // @@ Not the exact location of semi/colon.
-          //
-          if (semi)
-            fail (ll) << "';' inside " << bt;
-
-          if (td)
-            fail (ll) << "description inside " << bt;
 
           // Check if-else block sequencing.
           //
@@ -924,6 +1115,40 @@ namespace build2
           default: break;
           }
         }
+
+        assert (false); // Can't be here.
+        return false;
+      }
+
+      bool parser::
+      pre_parse_loop (token& t, type& tt,
+                      line_type lt,
+                      optional<description>& d,
+                      lines& ls)
+      {
+        // enter: <newline> (previous line)
+        // leave: <newline>
+
+        assert (lt == line_type::cmd_while      ||
+                lt == line_type::cmd_for_stream ||
+                lt == line_type::cmd_for_args);
+
+        tt = peek (lexer_mode::first_token);
+
+        // Parse lines until we see closing 'end'.
+        //
+        for (;; tt = peek (lexer_mode::first_token))
+        {
+          size_t i (ls.size ());
+
+          bool semi (pre_parse_block_line (t, tt, lt, d, ls));
+
+          if (ls[i].type == line_type::cmd_end)
+            return semi;
+        }
+
+        assert (false); // Can't be here.
+        return false;
       }
 
       void parser::
@@ -1057,7 +1282,7 @@ namespace build2
 
           diag_record dr (fail (dl));
           dr << "invalid testscript include path ";
-          to_stream (dr.os, n, true); // Quote.
+          to_stream (dr.os, n, quote_mode::normal);
         }
       }
 
@@ -1266,21 +1491,18 @@ namespace build2
 
         // Note: this one is only used during execution.
 
-        pair<command_expr, here_docs> p (
+        parse_command_expr_result pr (
           parse_command_expr (t, tt, lexer::redirect_aliases));
 
-        switch (tt)
-        {
-        case type::colon: parse_trailing_description (t, tt); break;
-        case type::semi: next (t, tt); break; // Get newline.
-        }
+        if (tt == type::colon)
+          parse_trailing_description (t, tt);
 
         assert (tt == type::newline);
 
-        parse_here_documents (t, tt, p);
+        parse_here_documents (t, tt, pr);
         assert (tt == type::newline);
 
-        command_expr r (move (p.first));
+        command_expr r (move (pr.expr));
 
         // If the test program runner is specified, then adjust the
         // expressions to run test programs via this runner.
@@ -1402,9 +1624,6 @@ namespace build2
           mode (lexer_mode::variable_line);
           value rhs (parse_variable_line (t, tt));
 
-          if (tt == type::semi)
-            next (t, tt);
-
           assert (tt == type::newline);
 
           // Assign.
@@ -1424,8 +1643,9 @@ namespace build2
         command_type ct;
 
         auto exec_cmd = [&ct, this] (token& t, build2::script::token_type& tt,
-                                     size_t li,
+                                     const iteration_index* ii, size_t li,
                                      bool single,
+                                     const function<command_function>& cf,
                                      const location& ll)
         {
           // We use the 0 index to signal that this is the only command.
@@ -1437,19 +1657,35 @@ namespace build2
           command_expr ce (
             parse_command_line (t, static_cast<token_type&> (tt)));
 
-          runner_->run (*scope_, ce, ct, li, ll);
+          runner_->run (*scope_, ce, ct, ii, li, cf, ll);
         };
 
-        auto exec_if = [this] (token& t, build2::script::token_type& tt,
-                               size_t li,
-                               const location& ll)
+        auto exec_cond = [this] (token& t, build2::script::token_type& tt,
+                                 const iteration_index* ii, size_t li,
+                                 const location& ll)
         {
           command_expr ce (
             parse_command_line (t, static_cast<token_type&> (tt)));
 
-          // Assume if-else always involves multiple commands.
+          // Assume a flow control construct always involves multiple
+          // commands.
           //
-          return runner_->run_if (*scope_, ce, li, ll);
+          return runner_->run_cond (*scope_, ce, ii, li, ll);
+        };
+
+        auto exec_for = [this] (const variable& var,
+                                value&& val,
+                                const attributes& val_attrs,
+                                const location&)
+        {
+          value& lhs (scope_->assign (var));
+
+          attributes_.push_back (val_attrs);
+
+          apply_value_attributes (&var, lhs, move (val), type::assign);
+
+          if (script_->test_command_var (var.name))
+            scope_->reset_special ();
         };
 
         size_t li (1);
@@ -1459,16 +1695,17 @@ namespace build2
           ct = command_type::test;
 
           exec_lines (t->tests_.begin (), t->tests_.end (),
-                      exec_set, exec_cmd, exec_if,
-                      li);
+                      exec_set, exec_cmd, exec_cond, exec_for,
+                      nullptr /* iteration_index */, li);
         }
         else if (group* g = dynamic_cast<group*> (scope_))
         {
           ct = command_type::setup;
 
-          bool exec_scope (exec_lines (g->setup_.begin (), g->setup_.end (),
-                                       exec_set, exec_cmd, exec_if,
-                                       li));
+          bool exec_scope (
+            exec_lines (g->setup_.begin (), g->setup_.end (),
+                        exec_set, exec_cmd, exec_cond, exec_for,
+                        nullptr /* iteration_index */, li));
 
           if (exec_scope)
           {
@@ -1526,7 +1763,8 @@ namespace build2
 
                   try
                   {
-                    take = runner_->run_if (*scope_, ce, li++, ll);
+                    take = runner_->run_cond (
+                      *scope_, ce, nullptr /* iteration_index */, li++, ll);
                   }
                   catch (const exit_scope& e)
                   {
@@ -1593,24 +1831,24 @@ namespace build2
                 // UBSan workaround.
                 //
                 const diag_frame* df (diag_frame::stack ());
-                if (!ctx.sched.async (task_count,
-                                      [] (const diag_frame* ds,
-                                          scope& s,
-                                          script& scr,
-                                          runner& r)
-                                      {
-                                        diag_frame::stack_guard dsg (ds);
-                                        execute_impl (s, scr, r);
-                                      },
-                                      df,
-                                      ref (*chain),
-                                      ref (*script_),
-                                      ref (*runner_)))
+                if (!ctx->sched->async (task_count,
+                                        [] (const diag_frame* ds,
+                                            scope& s,
+                                            script& scr,
+                                            runner& r)
+                                        {
+                                          diag_frame::stack_guard dsg (ds);
+                                          execute_impl (s, scr, r);
+                                        },
+                                        df,
+                                        ref (*chain),
+                                        ref (*script_),
+                                        ref (*runner_)))
                 {
                   // Bail out if the scope has failed and we weren't instructed
                   // to keep going.
                   //
-                  if (chain->state == scope_state::failed && !ctx.keep_going)
+                  if (chain->state == scope_state::failed && !ctx->keep_going)
                     throw failed ();
                 }
               }
@@ -1637,8 +1875,8 @@ namespace build2
           ct = command_type::teardown;
 
           exec_lines (g->tdown_.begin (), g->tdown_.end (),
-                      exec_set, exec_cmd, exec_if,
-                      li);
+                      exec_set, exec_cmd, exec_cond, exec_for,
+                      nullptr /* iteration_index */, li);
         }
         else
           assert (false);
@@ -1652,7 +1890,8 @@ namespace build2
       // The rest.
       //
 
-      // When add a special variable don't forget to update lexer::word().
+      // When add a special variable don't forget to update lexer::word() and
+      // for-loop parsing in pre_parse_line().
       //
       bool parser::
       special_variable (const string& n) noexcept
@@ -1661,7 +1900,7 @@ namespace build2
       }
 
       lookup parser::
-      lookup_variable (name&& qual, string&& name, const location& loc)
+      lookup_variable (names&& qual, string&& name, const location& loc)
       {
         if (pre_parse_)
           return lookup ();