1 files changed, 1391 insertions, 482 deletions
diff --git a/libbuild2/script/run.cxx b/libbuild2/script/run.cxx
index 58ba23d..f8f98c1 100644
--- a/libbuild2/script/run.cxx
+++ b/libbuild2/script/run.cxx
@@ -9,13 +9,13 @@
 #  include <libbutl/win32-utility.hxx> // DBG_TERMINATE_PROCESS
 #endif
 
-#include <ios> // streamsize
+#include <ios>     // streamsize
+#include <cstring> // strchr()
 
-#include <libbutl/regex.mxx>
-#include <libbutl/builtin.mxx>
-#include <libbutl/fdstream.mxx>     // fdopen_mode, fddup()
-#include <libbutl/filesystem.mxx>   // path_search()
-#include <libbutl/path-pattern.mxx>
+#include <libbutl/regex.hxx>
+#include <libbutl/builtin.hxx>
+#include <libbutl/fdstream.hxx>     // fdopen_mode, fddup()
+#include <libbutl/filesystem.hxx>   // path_search()
 
 #include <libbuild2/filesystem.hxx>
 #include <libbuild2/diagnostics.hxx>
@@ -27,6 +27,8 @@
 using namespace std;
 using namespace butl;
 
+namespace cli = build2::build::cli;
+
 namespace build2
 {
   namespace script
@@ -531,7 +533,7 @@ namespace build2
         };
 
         // Save the regex to file for troubleshooting, return the file path
-        // it have been saved to.
+        // it has been saved to.
         //
         // Note that we save the regex on line regex creation failure or if
         // the program output doesn't match.
@@ -645,33 +647,45 @@ namespace build2
           }
         }
 
-        // Create line regex.
+        // Issue regex error diagnostics and fail.
         //
-        line_regex regex;
-
-        try
+        auto fail_regex = [&rl, &rd, &loc, &env, &output_info, &save_regex]
+                          (const regex_error& e, const string& what)
         {
-          regex = line_regex (move (rls), move (pool));
-        }
-        catch (const regex_error& e)
-        {
-          // Note that line regex creation can not fail for here-string
-          // redirect as it doesn't have syntax line chars. That in
-          // particular means that end_line and end_column are meaningful.
+          const auto& ls (rl.lines);
+
+          // Note that the parser treats both empty here-string (for example
+          // >:~'') and empty here-document redirects as an error and so there
+          // should be at least one line in the list.
           //
-          assert (rd.type == redirect_type::here_doc_regex);
+          assert (!ls.empty ());
 
-          diag_record d (fail (loc (rd.end_line, rd.end_column)));
+          diag_record d (fail (rd.type == redirect_type::here_doc_regex
+                               ? loc (rd.end_line, rd.end_column)
+                               : loc (ls[0].line, ls[0].column)));
 
           // Print regex_error description if meaningful.
           //
-          d << "invalid " << what << " regex redirect" << e;
+          d << what << " regex redirect" << e;
 
           // It would be a waste to save the regex into the file just to
           // remove it.
           //
           if (env.temp_dir_keep)
             output_info (d, save_regex (), "", " regex");
+        };
+
+        // Create line regex.
+        //
+        line_regex regex;
+
+        try
+        {
+          regex = line_regex (move (rls), move (pool));
+        }
+        catch (const regex_error& e)
+        {
+          fail_regex (e, string ("invalid ") + what);
         }
 
         // Parse the output into the literal line string.
@@ -711,6 +725,26 @@ namespace build2
             while (!s.empty () && s.back () == '\r')
               s.pop_back ();
 
+            // Some regex implementations (e.g., libstdc++, MSVC) are unable
+            // to match long strings which they "signal" by running out of
+            // stack or otherwise crashing instead of throwing an exception.
+            // So we impose some sensible limit that all of them are able to
+            // handle for basic expressions (e.g., [ab]+; GCC's limits are the
+            // lowest, see bug 86164). See also another check (for the lines
+            // number) below.
+            //
+            // BTW, if we ever need to overcome this limitation (along with
+            // various hacks for the two-dimensional regex support), one way
+            // would be to factor libc++'s implementation (which doesn't seem
+            // to have any stack-related limits) and use it everywhere.
+            //
+            if (s.size () > 16384)
+            {
+              diag_record d (fail (ll));
+              d << pr << " " << what << " lines too long to match with regex";
+              output_info (d, op);
+            }
+
             ls += line_char (move (s), regex.pool);
           }
         }
@@ -719,10 +753,56 @@ namespace build2
           fail (ll) << "unable to read " << op << ": " << e;
         }
 
+        if (ls.size () > 12288)
+        {
+          diag_record d (fail (ll));
+          d << pr << " " << what << " has too many lines to match with regex";
+          output_info (d, op);
+        }
+
+        // Note that a here-document regex without ':' modifier can never
+        // match an empty output since it always contains the trailing empty
+        // line-char. This can be confusing, as for example while testing a
+        // program which can print some line or nothing with the following
+        // test:
+        //
+        // $* >>~%EOO%
+        //   %(
+        //   Hello, World!
+        //   %)?
+        //   EOO
+        //
+        // Note that the above line-regex contains 4 line-chars and will never
+        // match empty output.
+        //
+        // Thus, let's complete an empty output with an empty line-char for
+        // such a regex, so it may potentially match.
+        //
+        if (ls.empty ()                              &&
+            rd.type == redirect_type::here_doc_regex &&
+            rd.modifiers ().find (':') == string::npos)
+        {
+          ls += line_char (string (), regex.pool);
+        }
+
         // Match the output with the regex.
         //
-        if (regex_match (ls, regex)) // Doesn't throw.
-          return true;
+        // Note that we don't distinguish between the line_regex and
+        // char_regex match failures. While it would be convenient for the
+        // user if we provide additional information in the latter case (regex
+        // line number, etc), the implementation feels too hairy for now
+        // (would require to pull additional information into char_regex,
+        // etc). Though, we may want to implement it in the future.
+        //
+        try
+        {
+          if (regex_match (ls, regex))
+            return true;
+        }
+        catch (const regex_error& e)
+        {
+          fail_regex (e, string ("unable to match ") + what);
+        }
 
         // Output doesn't match the regex.
         //
@@ -730,7 +810,7 @@ namespace build2
         // regex to file for troubleshooting regardless of whether we print
         // the diagnostics or not. We, however, register it for cleanup in the
         // later case (the expression may still succeed, we can be evaluating
-        // the if condition, etc).
+        // the flow control construct condition, etc).
         //
         optional<path> rp;
         if (env.temp_dir_keep)
@@ -782,7 +862,7 @@ namespace build2
         //
         auto verify_name = [&ll] (const string& name, const char* opt)
         {
-          verify_environment_var_name (name, opt, "export: ", ll);
+          verify_environment_var_name (name, "export: ", ll, opt);
         };
 
         // Parse options (variable set/unset cleanups and unsets).
@@ -892,200 +972,712 @@ namespace build2
              : path (c.program.recall_string ());
     }
 
-    // The set pseudo-builtin: set variable from the stdin input.
+    // Read the stream content into a string, optionally splitting the input
+    // data at whitespaces or newlines in which case return one, potentially
+    // incomplete, substring at a time (see the set builtin options for the
+    // splitting semantics). Throw io_error on the underlying OS error.
     //
-    // set [-e|--exact] [(-n|--newline)|(-w|--whitespace)] [<attr>] <var>
+    // On POSIX expects the stream to be non-blocking and its exception mask
+    // to have at least badbit. On Windows can also handle a blocking stream.
     //
-    static void
-    set_builtin (environment& env,
-                 const strings& args,
-                 auto_fd in,
+    // Note that on Windows we can only turn pipe file descriptors into the
+    // non-blocking mode. Thus, we have no choice but to read from descriptors
+    // of other types synchronously there. That implies that we can
+    // potentially block indefinitely reading a file and missing a deadline on
+    // Windows. Note though, that the user can normally rewrite the command,
+    // for example, `set foo <<<file` with `cat file | set foo` to avoid this
+    // problem.
+    //
+    class stream_reader
+    {
+    public:
+      stream_reader (ifdstream&, bool whitespace, bool newline, bool exact);
+
+      // Read next substring. Return true if the substring has been read or
+      // false if it should be called again once the stream has more data to
+      // read. Also return true on eof (in which case no substring is read).
+      // The string must be empty on the first call. Throw ios::failure on the
+      // underlying OS error.
+      //
+      // Note that there could still be data to read in the stream's buffer
+      // (as opposed to file descriptor) after this function returns true and
+      // you should be careful not to block on fdselect() in this case. The
+      // recommended usage pattern is similar to that of
+      // butl::getline_non_blocking(). The only difference is that
+      // ifdstream::eof() needs to be used instead of butl::eof() since this
+      // function doesn't set failbit and only sets eofbit after the last
+      // substring is returned.
+      //
+      bool
+      next (string&);
+
+    private:
+      ifdstream& is_;
+      bool whitespace_;
+      bool newline_;
+      bool exact_;
+
+      bool empty_ = true; // Set to false after the first character is read.
+    };
+
+    stream_reader::
+    stream_reader (ifdstream& is, bool ws, bool nl, bool ex)
+        : is_ (is),
+          whitespace_ (ws),
+          newline_ (nl),
+          exact_ (ex)
+    {
+    }
+
+    bool stream_reader::
+    next (string& ss)
+    {
 #ifndef _WIN32
-                 bool,
+      assert ((is_.exceptions () & ifdstream::badbit) != 0 && !is_.blocking ());
 #else
-                 bool pipe,
+      assert ((is_.exceptions () & ifdstream::badbit) != 0);
 #endif
-                 const optional<deadline>& dl,
-                 const command& deadline_cmd,
-                 const location& ll)
-    {
-      try
+
+      fdstreambuf& sb (*static_cast<fdstreambuf*> (is_.rdbuf ()));
+
+      // Return the number of characters available in the stream buffer's get
+      // area, which can be:
+      //
+      // -1 -- EOF.
+      //  0 -- no data since blocked before encountering more data/EOF.
+      // >0 -- there is some data.
+      //
+      // Note that on Windows if the stream is blocking, then the lambda calls
+      // underflow() instead of returning 0.
+      //
+      // @@ Probably we can call underflow() only once per the next() call,
+      //    emulating the 'no data' case. This will allow the caller to
+      //    perform some housekeeping (reading other streams, checking for the
+      //    deadline, etc). But let's keep it simple for now.
+      //
+      auto avail = [&sb] () -> streamsize
       {
-        // Parse arguments.
+        // Note that here we reasonably assume that any failure in in_avail()
+        // will lead to badbit and thus an exception (see showmanyc()).
         //
-        cli::vector_scanner scan (args);
-        set_options ops (scan);
+        streamsize r (sb.in_avail ());
 
-        if (ops.whitespace () && ops.newline ())
-          fail (ll) << "set: both -n|--newline and -w|--whitespace specified";
+#ifdef _WIN32
+        if (r == 0 && sb.blocking ())
+        {
+          if (sb.underflow () == ifdstream::traits_type::eof ())
+            return -1;
 
-        if (!scan.more ())
-          fail (ll) << "set: missing variable name";
+          r = sb.in_avail ();
 
-        string a (scan.next ()); // Either attributes or variable name.
-        const string* ats (!scan.more () ? nullptr : &a);
-        string vname (!scan.more () ? move (a) : scan.next ());
+          assert (r != 0); // We wouldn't be here otherwise.
+        }
+#endif
 
-        if (scan.more ())
-          fail (ll) << "set: unexpected argument '" << scan.next () << "'";
+        return r;
+      };
 
-        if (ats != nullptr && ats->empty ())
-          fail (ll) << "set: empty variable attributes";
+      // Read until blocked (0), EOF (-1) or encounter the delimiter.
+      //
+      streamsize s;
+      while ((s = avail ()) > 0)
+      {
+        if (empty_)
+          empty_ = false;
 
-        if (vname.empty ())
-          fail (ll) << "set: empty variable name";
+        const char* p (sb.gptr ());
+        size_t n (sb.egptr () - p);
 
-        // Read out the stream content into a string while keeping an eye on
-        // the deadline. Then parse it according to the split mode.
+        // We move p and bump by the number of consumed characters.
         //
-        string s;
+        auto bump = [&sb, &p] () {sb.gbump (static_cast<int> (p - sb.gptr ()));};
+
+        if (whitespace_) // The whitespace mode.
         {
-          ifdstream cin;
+          const char* sep (" \n\r\t");
 
-          // If the execution deadline is specified, then turn the stream into
-          // the non-blocking mode reading its content in chunks and with a
-          // single operation otherwise. If the specified deadline is reached
-          // while reading the stream, then bail out for the successful
-          // deadline and fail otherwise. Note that in the former case the
-          // variable value will be incomplete, but we leave it to the caller
-          // to handle that.
+          // Skip the whitespaces.
           //
-          // Note that on Windows we can only turn pipe file descriptors into
-          // the non-blocking mode. Thus, we have no choice but to read from
-          // descriptors of other types synchronously there. That implies that
-          // we can potentially block indefinitely reading a file and missing
-          // the deadline on Windows. Note though, that the user can always
-          // rewrite `set foo <<<file` with `cat file | set foo` to avoid this
-          // problem.
+          for (; n != 0 && strchr (sep, *p) != nullptr; ++p, --n) ;
+
+          // If there are any non-whitespace characters in the get area, then
+          // append them to the resulting substring until a whitespace
+          // character is encountered.
           //
-#ifndef _WIN32
-          if (dl)
-#else
-          if (dl && pipe)
-#endif
+          if (n != 0)
           {
-            fdselect_set fds {in.get ()};
-            cin.open (move (in), fdstream_mode::non_blocking);
+            // Append the non-whitespace characters.
+            //
+            for (char c; n != 0 && strchr (sep, c = *p) == nullptr; ++p, --n)
+              ss += c;
+
+            // If a separator is encountered, then consume it, bump, and
+            // return the substring.
+            //
+            if (n != 0)
+            {
+              ++p; --n; // Consume the separator character.
 
-            const timestamp& dlt (dl->value);
+              bump ();
+              return true;
+            }
+
+            // Fall through.
+          }
+
+          bump (); // Bump and continue reading.
+        }
+        else             // The newline or no-split mode.
+        {
+          // Note that we don't collapse multiple consecutive newlines.
+          //
+          // Note also that we always sanitize CRs, so in the no-split mode we
+          // need to loop rather than consume the whole get area at once.
+          //
+          while (n != 0)
+          {
+            // Append the characters until the newline character or the end of
+            // the get area is encountered.
+            //
+            char c;
+            for (; n != 0 && (c = *p) != '\n'; ++p, --n)
+              ss += c;
 
-            for (char buf[4096];; )
+            // If the newline character is encountered, then sanitize CRs and
+            // return the substring in the newline mode and continue
+            // parsing/reading otherwise.
+            //
+            if (n != 0)
             {
-              timestamp now (system_clock::now ());
+              // Strip the trailing CRs that can appear while, for example,
+              // cross-testing Windows target or as a part of msvcrt junk
+              // production (see above).
+              //
+              while (!ss.empty () && ss.back () == '\r')
+                ss.pop_back ();
 
-              if (dlt <= now || ifdselect (fds, dlt - now) == 0)
+              assert (c == '\n');
+
+              ++p; --n; // Consume the newline character.
+
+              if (newline_)
               {
-                if (!dl->success)
-                  fail (ll) << cmd_path (deadline_cmd)
-                            << " terminated: execution timeout expired";
-                else
-                  break;
+                bump ();
+                return true;
               }
 
-              streamsize n (cin.readsome (buf, sizeof (buf)));
-
-              // Bail out if eos is reached.
-              //
-              if (n == 0)
-                break;
+              ss += c; // Append newline to the resulting string.
 
-              s.append (buf, n);
+              // Fall through.
             }
+
+            bump (); // Bump and continue parsing/reading.
           }
-          else
+        }
+      }
+
+      // Here s can be:
+      //
+      // -1 -- EOF.
+      //  0 -- blocked before encountering delimiter/EOF.
+      //
+      // Note: >0 (encountered the delimiter) case is handled in-place.
+      //
+      assert (s == -1 || s == 0);
+
+      if (s == -1)
+      {
+        // Return the last substring if it is not empty or it is the trailing
+        // "blank" in the exact mode. Otherwise, set eofbit for the stream
+        // indicating that we are done.
+        //
+        if (!ss.empty () || (exact_ && !empty_))
+        {
+          // Also, strip the trailing newline character, if present, in the
+          // no-split no-exact mode.
+          //
+          if (!ss.empty () && ss.back () == '\n' && // Trailing newline.
+              !newline_ && !whitespace_ && !exact_) // No-split no-exact mode.
           {
-            cin.open (move (in));
-            s = cin.read_text ();
+            ss.pop_back ();
           }
 
-          cin.close ();
+          exact_ = false; // Make sure we will set eofbit on the next call.
         }
+        else
+          is_.setstate (ifdstream::eofbit);
+      }
 
-        // Parse the stream content into the variable value.
+      return s == -1;
+    }
+
+    // Stack-allocated linked list of information about the running pipeline
+    // processes and builtins.
+    //
+    // Note: constructed incrementally.
+    //
+    struct pipe_command
+    {
+      // Initially NULL. Set to the address of the process or builtin object
+      // when it is created. Reset back to NULL when the respective
+      // process/builtin is executed and its exit status is collected (see
+      // complete_pipe() for details).
+      //
+      // We could probably use a union here, but let's keep it simple for now
+      // (at least one is NULL).
+      //
+      process* proc = nullptr;
+      builtin* bltn = nullptr;
+
+      const command&            cmd;
+      const cstrings*           args = nullptr;
+      const optional<deadline>& dl;
+
+      diag_buffer dbuf;
+
+      bool terminated = false; // True if this command has been terminated.
+
+      // True if this command has been terminated but we failed to read out
+      // its stdout and/or stderr streams in the reasonable timeframe (2
+      // seconds) after the termination.
+      //
+      // Note that this may happen if there is a still running child process
+      // of the terminated command which has inherited the parent's stdout and
+      // stderr file descriptors.
+      //
+      bool unread_stdout = false;
+      bool unread_stderr = false;
+
+      // Only for diagnostics.
+      //
+      const location& loc;
+      const path*     isp = nullptr; // stdin  cache.
+      const path*     osp = nullptr; // stdout cache.
+      const path*     esp = nullptr; // stderr cache.
+
+      pipe_command* prev; // NULL for the left-most command.
+      pipe_command* next; // Left-most command for the right-most command.
+
+      pipe_command (context& x,
+                    const command& c,
+                    const optional<deadline>& d,
+                    const location& l,
+                    pipe_command* p,
+                    pipe_command* f)
+          : cmd (c), dl (d), dbuf (x), loc (l), prev (p), next (f) {}
+    };
+
+    // Wait for a process/builtin to complete until the deadline is reached
+    // and return the underlying wait function result (optional<something>).
+    //
+    template<typename P>
+    static auto
+    timed_wait (P& p, const timestamp& deadline) -> decltype(p.try_wait ())
+    {
+      timestamp now (system_clock::now ());
+      return deadline > now ? p.timed_wait (deadline - now) : p.try_wait ();
+    }
+
+    // Terminate the pipeline processes starting from the specified one and up
+    // to the leftmost one and then kill those which didn't terminate after 2
+    // seconds.
+    //
+    // After that wait for the pipeline builtins completion. Since their
+    // standard streams should no longer be written to or read from by any
+    // process, that shouldn't take long. If, however, they won't be able to
+    // complete in 2 seconds, then some of them have probably stuck while
+    // communicating with a slow filesystem device or similar, and since we
+    // currently have no way to terminate asynchronous builtins, we have no
+    // choice but to abort.
+    //
+    // Issue diagnostics and fail if something goes wrong, but still try to
+    // terminate/kill all the pipe processes.
+    //
+    static void
+    term_pipe (pipe_command* pc, tracer& trace)
+    {
+      auto prog = [] (pipe_command* c) {return cmd_path (c->cmd);};
+
+      // Terminate processes gracefully and set the terminate flag for the
+      // pipe commands.
+      //
+      diag_record dr;
+      for (pipe_command* c (pc); c != nullptr; c = c->prev)
+      {
+        if (process* p = c->proc)
+        try
+        {
+          l5 ([&]{trace (c->loc) << "terminating: " << c->cmd;});
+
+          p->term ();
+        }
+        catch (const process_error& e)
+        {
+          // If unable to terminate the process for any reason (the process is
+          // exiting on Windows, etc) then just ignore this, postponing the
+          // potential failure till the kill() call.
+          //
+          l5 ([&]{trace (c->loc) << "unable to terminate " << prog (c)
+                                 << ": " << e;});
+        }
+
+        c->terminated = true;
+      }
+
+      // Wait a bit for the processes to terminate and kill the remaining
+      // ones.
+      //
+      timestamp dl (system_clock::now () + chrono::seconds (2));
+
+      for (pipe_command* c (pc); c != nullptr; c = c->prev)
+      {
+        if (process* p = c->proc)
+        try
+        {
+          l5 ([&]{trace (c->loc) << "waiting: " << c->cmd;});
+
+          if (!timed_wait (*p, dl))
+          {
+            l5 ([&]{trace (c->loc) << "killing: " << c->cmd;});
+
+            p->kill ();
+            p->wait ();
+          }
+        }
+        catch (const process_error& e)
+        {
+          dr << fail (c->loc) << "unable to wait/kill " << prog (c) << ": "
+             << e;
+        }
+      }
+
+      // Wait a bit for the builtins to complete and abort if any remain
+      // running.
+      //
+      dl = system_clock::now () + chrono::seconds (2);
+
+      for (pipe_command* c (pc); c != nullptr; c = c->prev)
+      {
+        if (builtin* b = c->bltn)
+        try
+        {
+          l5 ([&]{trace (c->loc) << "waiting: " << c->cmd;});
+
+          if (!timed_wait (*b, dl))
+          {
+            error (c->loc) << prog (c) << " builtin hanged, aborting";
+            terminate (false /* trace */);
+          }
+        }
+        catch (const system_error& e)
+        {
+          dr << fail (c->loc) << "unable to wait for " << prog (c) << ": "
+             << e;
+        }
+      }
+    }
+
+    void
+    read (auto_fd&& in,
+          bool whitespace, bool newline, bool exact,
+          const function<void (string&&)>& cf,
+          pipe_command* pipeline,
+          const optional<deadline>& dl,
+          const location& ll,
+          const char* what)
+    {
+      tracer trace ("script::stream_read");
+
+      // Note: stays blocking on Windows if the descriptor is not of the pipe
+      // type.
+      //
+#ifndef _WIN32
+      fdstream_mode m (fdstream_mode::non_blocking);
+#else
+      fdstream_mode m (pipeline != nullptr
+                       ? fdstream_mode::non_blocking
+                       : fdstream_mode::blocking);
+#endif
+
+      ifdstream is (move (in), m, ifdstream::badbit);
+      stream_reader sr (is, whitespace, newline, exact);
+
+      fdselect_set fds;
+      for (pipe_command* c (pipeline); c != nullptr; c = c->prev)
+      {
+        diag_buffer& b (c->dbuf);
+
+        if (b.is.is_open ())
+          fds.emplace_back (b.is.fd (), c);
+      }
+
+      fds.emplace_back (is.fd ());
+      fdselect_state& ist (fds.back ());
+      size_t unread (fds.size ());
+
+      optional<timestamp> dlt (dl ? dl->value : optional<timestamp> ());
+
+      // If there are some left-hand side processes/builtins running, then
+      // terminate them and, if there are unread stdout/stderr file
+      // descriptors, then increase the deadline by another 2 seconds and
+      // return true. In this case the term() should be called again upon
+      // reaching the timeout. Otherwise return false. If there are no
+      // left-hand side processes/builtins running, then fail straight away.
+      //
+      // Note that in the former case the further reading will be performed
+      // with the adjusted timeout. We assume that this timeout is normally
+      // sufficient to read out the buffered data written by the already
+      // terminated processes. If, however, that's not the case (see
+      // pipe_command for the possible reasons), then term() needs to be
+      // called for the second time and the reading should be interrupted
+      // afterwards.
+      //
+      auto term = [&dlt, pipeline, &fds, &ist, &is, &unread,
+                   &trace, &ll, what, terminated = false] () mutable -> bool
+      {
+        // Can only be called if the deadline is specified.
         //
-        names ns;
+        assert (dlt);
 
-        if (!s.empty ())
+        if (pipeline == nullptr)
+          fail (ll) << what << " terminated: execution timeout expired";
+
+        if (!terminated)
         {
-          if (ops.whitespace ()) // The whitespace mode.
+          // Terminate the pipeline and adjust the deadline.
+          //
+
+          // Note that if we are still reading the stream and it's a builtin
+          // stdout, then we need to close it before terminating the pipeline.
+          // Not doing so can result in blocking this builtin on the write
+          // operation and thus aborting the build2 process (see term_pipe()
+          // for details).
+          //
+          // Should we do the same for all the pipeline builtins' stderr
+          // streams? No we don't, since the builtin diagnostics is assumed to
+          // always fit the pipe buffer (see libbutl/builtin.cxx for details).
+          // Thus, we will leave them open to fully read out the diagnostics.
+          //
+          if (ist.fd != nullfd && pipeline->bltn != nullptr)
           {
-            // Note that we collapse multiple consecutive whitespaces.
-            //
-            for (size_t p (0); p != string::npos; )
+            try
             {
-              // Skip the whitespaces.
-              //
-              const char* sep (" \n\r\t");
-              size_t b (s.find_first_not_of (sep, p));
+              is.close ();
+            }
+            catch (const io_error&)
+            {
+              // Not much we can do here.
+            }
 
-              if (b != string::npos) // Word beginning.
-              {
-                size_t e (s.find_first_of (sep, b)); // Find the word end.
-                ns.emplace_back (string (s, b, e != string::npos ? e - b : e));
+            ist.fd = nullfd;
+            --unread;
+          }
 
-                p = e;
-              }
-              else // Trailings whitespaces.
+          term_pipe (pipeline, trace);
+          terminated = true;
+
+          if (unread != 0)
+            dlt = system_clock::now () + chrono::seconds (2);
+
+          return unread != 0;
+        }
+        else
+        {
+          // Set the unread_{stderr,stdout} flags to true for the commands
+          // whose streams are not fully read yet.
+          //
+
+          // Can only be called after the first call of term() which would
+          // throw failed if pipeline is NULL.
+          //
+          assert (pipeline != nullptr);
+
+          for (fdselect_state& s: fds)
+          {
+            if (s.fd != nullfd)
+            {
+              if (s.data != nullptr) // stderr.
               {
-                // Append the trailing "blank" after the trailing whitespaces
-                // in the exact mode.
-                //
-                if (ops.exact ())
-                  ns.emplace_back (empty_string);
+                pipe_command* c (static_cast<pipe_command*> (s.data));
 
-                // Bail out since the end of the string is reached.
+                c->unread_stderr = true;
+
+                // Let's also close the stderr stream not to confuse
+                // diag_buffer::close() with a not fully read stream (eof is
+                // not reached, etc).
                 //
-                break;
+                try
+                {
+                  c->dbuf.is.close ();
+                }
+                catch (const io_error&)
+                {
+                  // Not much we can do here. Anyway the diagnostics will be
+                  // issued by complete_pipe().
+                }
               }
+              else                   // stdout.
+                pipeline->unread_stdout = true;
             }
           }
-          else // The newline or no-split mode.
+
+          return false;
+        }
+      };
+
+      // Note that on Windows if the file descriptor is not a pipe, then
+      // ifdstream assumes the blocking mode for which ifdselect() would throw
+      // invalid_argument. Such a descriptor can, however, only appear for the
+      // first command in the pipeline and so fds will only contain the input
+      // stream's descriptor. That all means that this descriptor will be read
+      // out by a series of the stream_reader::next() calls which can only
+      // return true and thus no ifdselect() calls will ever be made.
+      //
+      string s;
+      while (unread != 0)
+      {
+        // Read any pending data from the input stream.
+        //
+        if (ist.fd != nullfd)
+        {
+          // Prior to reading let's check that the deadline, if specified, is
+          // not reached. This way we handle the (hypothetical) case when we
+          // are continuously fed with the data without delays and thus can
+          // never get to ifdselect() which watches for the deadline. Also
+          // this check is the only way to bail out early on Windows for a
+          // blocking file descriptor.
+          //
+          if (dlt && *dlt <= system_clock::now ())
+          {
+            if (!term ())
+              break;
+          }
+
+          if (sr.next (s))
           {
-            // Note that we don't collapse multiple consecutive newlines.
-            //
-            // Note also that we always sanitize CRs so this loop is always
-            // needed.
-            //
-            for (size_t p (0); p != string::npos; )
+            if (!is.eof ())
             {
-              size_t e (s.find ('\n', p));
-              string l (s, p, e != string::npos ? e - p : e);
-
-              // Strip the trailing CRs that can appear while, for example,
-              // cross-testing Windows target or as a part of msvcrt junk
-              // production (see above).
+              // Consume the substring.
               //
-              while (!l.empty () && l.back () == '\r')
-                l.pop_back ();
+              cf (move (s));
+              s.clear ();
+            }
+            else
+            {
+              ist.fd = nullfd;
+              --unread;
+            }
 
-              // Append the line.
-              //
-              if (!l.empty ()       || // Non-empty.
-                  e != string::npos || // Empty, non-trailing.
-                  ops.exact ())        // Empty, trailing, in the exact mode.
-              {
-                if (ops.newline () || ns.empty ())
-                  ns.emplace_back (move (l));
-                else
-                {
-                  ns[0].value += '\n';
-                  ns[0].value += l;
-                }
-              }
+            continue;
+          }
+        }
 
-              p = e != string::npos ? e + 1 : e;
+        try
+        {
+          // Wait until the data appear in any of the streams. If a deadline
+          // is specified, then pass the timeout to fdselect().
+          //
+          if (dlt)
+          {
+            timestamp now (system_clock::now ());
+
+            if (*dlt <= now || ifdselect (fds, *dlt - now) == 0)
+            {
+              if (term ())
+                continue;
+              else
+                break;
             }
           }
+          else
+            ifdselect (fds);
+
+          // Read out the pending data from the stderr streams.
+          //
+          for (fdselect_state& s: fds)
+          {
+            if (s.ready           &&
+                s.data != nullptr &&
+                !static_cast<pipe_command*> (s.data)->dbuf.read ())
+            {
+              s.fd = nullfd;
+              --unread;
+            }
+          }
+        }
+        catch (const io_error& e)
+        {
+          fail (ll) << "io error reading pipeline streams: " << e;
         }
+      }
+    }
+
+    // The set pseudo-builtin: set variable from the stdin input.
+    //
+    // set [-e|--exact] [(-n|--newline)|(-w|--whitespace)] <var> [<attr>]
+    //
+    static void
+    set_builtin (environment& env,
+                 const strings& args,
+                 auto_fd in,
+                 pipe_command* pipeline,
+                 const optional<deadline>& dl,
+                 const location& ll)
+    {
+      tracer trace ("script::set_builtin");
 
-        env.set_variable (move (vname),
-                          move (ns),
-                          ats != nullptr ? *ats : empty_string,
-                          ll);
+      try
+      {
+        // Parse arguments.
+        //
+        cli::vector_scanner scan (args);
+        set_options ops (scan);
+
+        if (ops.whitespace () && ops.newline ())
+          fail (ll) << "set: both -n|--newline and -w|--whitespace specified";
+
+        if (!scan.more ())
+          fail (ll) << "set: missing variable name";
+
+        string vname (scan.next ());
+        if (vname.empty ())
+          fail (ll) << "set: empty variable name";
+
+        // Detect patterns analogous to parser::parse_variable_name() (so we
+        // diagnose `set x[string]`).
+        //
+        if (vname.find_first_of ("[*?") != string::npos)
+          fail (ll) << "set: expected variable name instead of " << vname;
+
+        string attrs;
+        if (scan.more ())
+        {
+          attrs = scan.next ();
+
+          if (attrs.empty ())
+            fail (ll) << "set: empty variable attributes";
+
+          if (scan.more ())
+            fail (ll) << "set: unexpected argument '" << scan.next () << "'";
+        }
+
+        // Parse the stream content into the variable value.
+        //
+        names ns;
+
+        read (move (in),
+              ops.whitespace (), ops.newline (), ops.exact (),
+              [&ns] (string&& s) {ns.emplace_back (move (s));},
+              pipeline,
+              dl,
+              ll,
+              "set");
+
+        env.set_variable (move (vname), move (ns), attrs, ll);
       }
       catch (const io_error& e)
       {
-        fail (ll) << "set: " << e;
+        fail (ll) << "set: unable to read from stdin: " << e;
       }
       catch (const cli::exception& e)
       {
@@ -1108,56 +1700,50 @@ namespace build2
         name);
     }
 
-    // Stack-allocated linked list of information about the running pipeline
-    // processes and builtins.
-    //
-    struct pipe_command
-    {
-      // We could probably use a union here, but let's keep it simple for now
-      // (one is NULL).
-      //
-      process* proc;
-      builtin* bltn;
-
-      // True if this command has been terminated.
-      //
-      bool terminated = false;
-
-      // Only for diagnostics.
-      //
-      const command& cmd;
-      const location& loc;
-
-      pipe_command* prev; // NULL for the left-most command.
-
-      pipe_command (process& p,
-                    const command& c,
-                    const location& l,
-                    pipe_command* v)
-          : proc (&p), bltn (nullptr), cmd (c), loc (l), prev (v) {}
-
-      pipe_command (builtin& b,
-                    const command& c,
-                    const location& l,
-                    pipe_command* v)
-          : proc (nullptr), bltn (&b), cmd (c), loc (l), prev (v) {}
-    };
-
     static bool
     run_pipe (environment& env,
               command_pipe::const_iterator bc,
               command_pipe::const_iterator ec,
               auto_fd ifd,
-              size_t ci, size_t li, const location& ll,
+              const iteration_index* ii, size_t li, size_t ci,
+              const location& ll,
               bool diag,
+              const function<command_function>& cf, bool last_cmd,
               optional<deadline> dl = nullopt,
-              const command* dl_cmd = nullptr, // env -t <cmd>
               pipe_command* prev_cmd = nullptr)
     {
       tracer trace ("script::run_pipe");
 
-      if (bc == ec) // End of the pipeline.
+      // At the end of the pipeline read out its stdout, if requested.
+      //
+      if (bc == ec)
+      {
+        if (cf != nullptr)
+        {
+          assert (!last_cmd); // Otherwise we wouldn't be here.
+
+          // The pipeline can't be empty.
+          //
+          assert (ifd != nullfd && prev_cmd != nullptr);
+
+          const command& c (prev_cmd->cmd);
+
+          try
+          {
+            cf (env, strings () /* arguments */,
+                move (ifd), prev_cmd,
+                dl,
+                ll);
+          }
+          catch (const io_error& e)
+          {
+            fail (ll) << "unable to read from " << cmd_path (c) << " stdout: "
+                      << e;
+          }
+        }
+
         return true;
+      }
 
       // The overall plan is to run the first command in the pipe, reading its
       // input from the file descriptor passed (or, for the first command,
@@ -1175,6 +1761,8 @@ namespace build2
       //
       const command& c (*bc);
 
+      const dir_path& wdir (*env.work_dir.path);
+
       // Register the command explicit cleanups. Verify that the path being
       // cleaned up is a sub-path of the script working directory. Fail if
       // this is not the case.
@@ -1182,7 +1770,7 @@ namespace build2
       for (const auto& cl: c.cleanups)
       {
         const path& p (cl.path);
-        path np (normalize (p, *env.work_dir.path, ll));
+        path np (normalize (p, wdir, ll));
 
         const string& ls (np.leaf ().string ());
         bool wc (ls == "*" || ls == "**" || ls == "***");
@@ -1207,6 +1795,12 @@ namespace build2
       command_pipe::const_iterator nc (bc + 1);
       bool last (nc == ec);
 
+      // Make sure that stdout is not redirected if meant to be read (last_cmd
+      // is false) or cannot not be produced (last_cmd is true).
+      //
+      if (last && c.out && cf != nullptr)
+        fail (ll) << "stdout cannot be redirected";
+
       // True if the process path is not pre-searched and the program path
       // still needs to be resolved.
       //
@@ -1218,7 +1812,7 @@ namespace build2
 
       const redirect& in ((c.in ? *c.in : env.in).effective ());
 
-      const redirect* out (!last
+      const redirect* out (!last || (cf != nullptr && !last_cmd)
                            ? nullptr // stdout is piped.
                            : &(c.out ? *c.out : env.out).effective ());
 
@@ -1226,13 +1820,7 @@ namespace build2
 
       auto process_args = [&c] () -> cstrings
       {
-        cstrings args {c.program.recall_string ()};
-
-        for (const auto& a: c.arguments)
-          args.push_back (a.c_str ());
-
-        args.push_back (nullptr);
-        return args;
+        return build2::process_args (c.program.recall_string (), c.arguments);
       };
 
       // Prior to opening file descriptors for command input/output redirects
@@ -1255,19 +1843,38 @@ namespace build2
         // content), to make sure that the command doesn't print any unwanted
         // diagnostics about IO operation failure.
         //
-        // Note though, that doing so would be a bad idea if the deadline is
-        // specified, since we can block on read and miss the deadline.
-        //
-        if (!dl)
+        if (ifd != nullfd)
         {
-          // Note that dtor will ignore any errors (which is what we want).
+          // Note that we can't use ifdstream dtor in the skip mode here since
+          // it turns the stream into the blocking mode and we won't be able
+          // to read out the potentially buffered stderr for the
+          // pipeline. Using read() is also not ideal since it performs
+          // parsing and allocations needlessly. This, however, is probably ok
+          // for such an uncommon case.
+          //
+          //ifdstream (move (ifd), fdstream_mode::skip);
+
+          // Let's try to minimize the allocation size splitting the input
+          // data at whitespaces.
           //
-          ifdstream (move (ifd), fdstream_mode::skip);
+          read (move (ifd),
+                true /* whitespace */,
+                false /* newline */,
+                false /* exact */,
+                [] (string&&) {}, // Just drop the string.
+                prev_cmd,
+                dl,
+                ll,
+                program.c_str ());
         }
 
         if (!first || !last)
           fail (ll) << program << " builtin must be the only pipe command";
 
+        if (c.cwd)
+          fail (ll) << "current working directory cannot be specified for "
+                    << program << " builtin";
+
         if (!c.variables.empty ())
           fail (ll) << "environment variables cannot be (un)set for "
                     << program << " builtin";
@@ -1282,6 +1889,9 @@ namespace build2
         if (c.out)
           fail (ll) << program << " builtin stdout cannot be redirected";
 
+        if (cf != nullptr && !last_cmd)
+          fail (ll) << program << " builtin stdout cannot be read";
+
         if (c.err)
           fail (ll) << program << " builtin stderr cannot be redirected";
 
@@ -1311,17 +1921,29 @@ namespace build2
 
       // Create a unique path for a command standard stream cache file.
       //
-      auto std_path = [&env, &ci, &li, &ll] (const char* n) -> path
+      auto std_path = [&env, ii, &li, &ci, &ll] (const char* nm) -> path
       {
         using std::to_string;
 
-        path p (n);
+        string s (nm);
+        size_t n (s.size ());
+
+        if (ii != nullptr)
+        {
+          // Note: reverse order (outermost to innermost).
+          //
+          for (const iteration_index* i (ii); i != nullptr; i = i->prev)
+            s.insert (n, "-i" + to_string (i->index));
+        }
 
         // 0 if belongs to a single-line script, otherwise is the command line
         // number (start from one) in the script.
         //
-        if (li > 0)
-          p += "-" + to_string (li);
+        if (li != 0)
+        {
+          s += "-n";
+          s += to_string (li);
+        }
 
         // 0 if belongs to a single-command expression, otherwise is the
         // command number (start from one) in the expression.
@@ -1330,10 +1952,13 @@ namespace build2
         // single-line script or to N-th single-command line of multi-line
         // script. These cases are mutually exclusive and so are unambiguous.
         //
-        if (ci > 0)
-          p += "-" + to_string (ci);
+        if (ci != 0)
+        {
+          s += "-c";
+          s += to_string (ci);
+        }
 
-        return normalize (move (p), temp_dir (env), ll);
+        return normalize (path (move (s)), temp_dir (env), ll);
       };
 
       // If this is the first pipeline command, then open stdin descriptor
@@ -1403,7 +2028,7 @@ namespace build2
           }
         case redirect_type::file:
           {
-            isp = normalize (in.file.path, *env.work_dir.path, ll);
+            isp = normalize (in.file.path, wdir, ll);
 
             open_stdin ();
             break;
@@ -1438,19 +2063,15 @@ namespace build2
       // Calculate the process/builtin execution deadline. Note that we should
       // also consider the left-hand side processes deadlines, not to keep
       // them waiting for us and allow them to terminate not later than their
-      // deadlines. Thus, let's also track which command has introduced the
-      // deadline, so we can report it if the deadline is missed.
+      // deadlines.
       //
       dl = earlier (dl, env.effective_deadline ());
 
       if (c.timeout)
       {
-        deadline d (system_clock::now () + *c.timeout, false /* success */);
+        deadline d (system_clock::now () + *c.timeout, c.timeout_success);
         if (!dl || d < *dl)
-        {
           dl = d;
-          dl_cmd = &c;
-        }
       }
 
       // Prior to opening file descriptors for command outputs redirects
@@ -1471,6 +2092,9 @@ namespace build2
         if (c.out)
           fail (ll) << "set builtin stdout cannot be redirected";
 
+        if (cf != nullptr && !last_cmd)
+          fail (ll) << "set builtin stdout cannot be read";
+
         if (c.err)
           fail (ll) << "set builtin stderr cannot be redirected";
 
@@ -1480,14 +2104,54 @@ namespace build2
         if (verb >= 2)
           print_process (process_args ());
 
-        set_builtin (env, c.arguments,
-                     move (ifd), !first,
-                     dl, dl_cmd != nullptr ? *dl_cmd : c,
-                     ll);
+        set_builtin (env, c.arguments, move (ifd), prev_cmd, dl, ll);
+        return true;
+      }
+
+      // If this is the last command in the pipe and the command function is
+      // specified for it, then call it.
+      //
+      if (last && cf != nullptr && last_cmd)
+      {
+        // Must be enforced by the caller.
+        //
+        assert (!c.out && !c.err && !c.exit);
+
+        try
+        {
+          cf (env, c.arguments, move (ifd), prev_cmd, dl, ll);
+        }
+        catch (const io_error& e)
+        {
+          diag_record dr (fail (ll));
+
+          dr << cmd_path (c) << ": unable to read from ";
+
+          if (prev_cmd != nullptr)
+            dr << cmd_path (prev_cmd->cmd) << " output";
+          else
+            dr << "stdin";
+
+          dr << ": " << e;
+        }
 
         return true;
       }
 
+      // Propagate the pointer to the left-most command.
+      //
+      pipe_command pc (env.context,
+                       c,
+                       dl,
+                       ll,
+                       prev_cmd,
+                       prev_cmd != nullptr ? prev_cmd->next : nullptr);
+
+      if (prev_cmd != nullptr)
+        prev_cmd->next = &pc;
+      else
+        pc.next = &pc; // Points to itself.
+
       // Open a file for command output redirect if requested explicitly
       // (file overwrite/append redirects) or for the purpose of the output
       // validation (none, here_*, file comparison redirects), register the
@@ -1497,9 +2161,9 @@ namespace build2
       // or null-device descriptor for merge, pass or null redirects
       // respectively (not opening any file).
       //
-      auto open = [&env, &ll, &std_path] (const redirect& r,
-                                          int dfd,
-                                          path& p) -> auto_fd
+      auto open = [&env, &wdir, &ll, &std_path, &c, &pc] (const redirect& r,
+                                                          int dfd,
+                                                          path& p) -> auto_fd
       {
         assert (dfd == 1 || dfd == 2);
         const char* what (dfd == 1 ? "stdout" : "stderr");
@@ -1517,11 +2181,34 @@ namespace build2
           {
             try
             {
+              if (dfd == 2) // stderr?
+              {
+                fdpipe p;
+                if (diag_buffer::pipe (env.context) == -1) // Are we buffering?
+                  p = fdopen_pipe ();
+
+                // Deduce the args0 argument similar to cmd_path().
+                //
+                // Note that we must open the diag buffer regardless of the
+                // diag_buffer::pipe() result.
+                //
+                pc.dbuf.open ((c.program.initial == nullptr
+                               ? c.program.recall.string ().c_str ()
+                               : c.program.recall_string ()),
+                              move (p.in),
+                              fdstream_mode::non_blocking);
+
+                if (p.out != nullfd)
+                  return move (p.out);
+
+                // Fall through.
+              }
+
               return fddup (dfd);
             }
             catch (const io_error& e)
             {
-              fail (ll) << "unable to duplicate " << what << ": " << e;
+              fail (ll) << "unable to redirect " << what << ": " << e;
             }
           }
 
@@ -1540,7 +2227,7 @@ namespace build2
             //
             p = r.file.mode == redirect_fmode::compare
               ? std_path (what)
-              : normalize (r.file.path, *env.work_dir.path, ll);
+              : normalize (r.file.path, wdir, ll);
 
             m |= r.file.mode == redirect_fmode::append
               ? fdopen_mode::at_end
@@ -1603,7 +2290,7 @@ namespace build2
       //    script failures investigation and, for example, for validation
       //    "tightening".
       //
-      if (last)
+      if (last && out != nullptr)
         ofd.out = open (*out, 1, osp);
       else
       {
@@ -1632,7 +2319,7 @@ namespace build2
           fail (ll) << "stdout and stderr redirected to each other";
 
         auto_fd& self  (mo ? ofd.out : efd);
-        auto_fd& other (mo ? efd : ofd.out);
+        auto_fd& other (mo ? efd     : ofd.out);
 
         try
         {
@@ -1646,121 +2333,417 @@ namespace build2
         }
       }
 
-      // All descriptors should be open to the date.
+      // By now all descriptors should be open.
       //
-      assert (ofd.out.get () != -1 && efd.get () != -1);
+      assert (ofd.out != nullfd && efd != nullfd);
 
-      // Wait for a process/builtin to complete until the deadline is reached
-      // and return the underlying wait function result (optional<something>).
-      //
-      auto timed_wait = [] (auto& p, const timestamp& deadline)
-      {
-        timestamp now (system_clock::now ());
-        return deadline > now ? p.timed_wait (deadline - now) : p.try_wait ();
-      };
+      pc.isp = &isp;
+      pc.osp = &osp;
+      pc.esp = &esp;
 
-      // Terminate the pipeline processes starting from the specified one and
-      // up to the leftmost one and then kill those which didn't terminate
-      // after 2 seconds.
+      // Read out all the pipeline's buffered strerr streams watching for the
+      // deadline, if specified. If the deadline is reached, then terminate
+      // the whole pipeline, move the deadline by another 2 seconds, and
+      // continue reading.
       //
-      // After that wait for the pipeline builtins completion. Since their
-      // standard streams should no longer be written to or read from by any
-      // process, that shouldn't take long. If, however, they won't be able to
-      // complete in 2 seconds, then some of them have probably stuck while
-      // communicating with a slow filesystem device or similar, and since we
-      // currently have no way to terminate asynchronous builtins, we have no
-      // choice but to abort.
+      // Note that we assume that this timeout increment is normally
+      // sufficient to read out the buffered data written by the already
+      // terminated processes. If, however, that's not the case (see
+      // pipe_command for the possible reasons), then we just set
+      // unread_stderr flag to true for such commands and bail out.
       //
-      // Issue diagnostics and fail if something goes wrong, but still try to
-      // terminate/kill all the pipe processes.
+      // Also note that this is a reduced version of the above read() function.
       //
-      auto term_pipe = [&timed_wait, &trace] (pipe_command* pc)
+      auto read_pipe = [&pc, &ll, &trace] ()
       {
-        diag_record dr;
+        fdselect_set fds;
+        for (pipe_command* c (&pc); c != nullptr; c = c->prev)
+        {
+          diag_buffer& b (c->dbuf);
 
-        auto prog = [] (pipe_command* c) {return cmd_path (c->cmd);};
+          if (b.is.is_open ())
+            fds.emplace_back (b.is.fd (), c);
+        }
 
-        // Terminate processes gracefully and set the terminate flag for the
-        // pipe commands.
+        // Note that the current command deadline is the earliest (see above).
         //
-        for (pipe_command* c (pc); c != nullptr; c = c->prev)
+        optional<timestamp> dlt (pc.dl ? pc.dl->value : optional<timestamp> ());
+
+        bool terminated (false);
+
+        for (size_t unread (fds.size ()); unread != 0;)
         {
-          if (process* p = c->proc)
           try
           {
-            l5 ([&]{trace (c->loc) << "terminating: " << c->cmd;});
+            // If a deadline is specified, then pass the timeout to fdselect().
+            //
+            if (dlt)
+            {
+              timestamp now (system_clock::now ());
+
+              if (*dlt <= now || ifdselect (fds, *dlt - now) == 0)
+              {
+                if (!terminated)
+                {
+                  term_pipe (&pc, trace);
+                  terminated = true;
+
+                  dlt = system_clock::now () + chrono::seconds (2);
+                  continue;
+                }
+                else
+                {
+                  for (fdselect_state& s: fds)
+                  {
+                    if (s.fd != nullfd)
+                    {
+                      pipe_command* c (static_cast<pipe_command*> (s.data));
+
+                      c->unread_stderr = true;
+
+                      // Let's also close the stderr stream not to confuse
+                      // diag_buffer::close() (see read() for details).
+                      //
+                      try
+                      {
+                        c->dbuf.is.close ();
+                      }
+                      catch (const io_error&) {}
+                    }
+                  }
+
+                  break;
+                }
+              }
+            }
+            else
+              ifdselect (fds);
 
-            p->term ();
+            for (fdselect_state& s: fds)
+            {
+              if (s.ready &&
+                  !static_cast<pipe_command*> (s.data)->dbuf.read ())
+              {
+                s.fd = nullfd;
+                --unread;
+              }
+            }
           }
-          catch (const process_error& e)
+          catch (const io_error& e)
           {
-            // If unable to terminate the process for any reason (the process
-            // is exiting on Windows, etc) then just ignore this, postponing
-            // the potential failure till the kill() call.
-            //
-            l5 ([&]{trace (c->loc) <<"unable to terminate " << prog (c)
-                                   << ": " << e;});
+            fail (ll) << "io error reading pipeline streams: " << e;
           }
-
-          c->terminated = true;
         }
+      };
 
-        // Wait a bit for the processes to terminate and kill the remaining
-        // ones.
-        //
-        timestamp dl (system_clock::now () + chrono::seconds (2));
-
-        for (pipe_command* c (pc); c != nullptr; c = c->prev)
+      // Wait for the pipeline processes and builtins to complete, watching
+      // for their deadlines if present. If a deadline is reached for any of
+      // them, then terminate the whole pipeline.
+      //
+      // Note: must be called after read_pipe().
+      //
+      auto wait_pipe = [&pc, &dl, &trace] ()
+      {
+        for (pipe_command* c (&pc); c != nullptr; c = c->prev)
         {
-          if (process* p = c->proc)
           try
           {
-            l5 ([&]{trace (c->loc) << "waiting: " << c->cmd;});
-
-            if (!timed_wait (*p, dl))
+            if (process* p = c->proc)
             {
-              l5 ([&]{trace (c->loc) << "killing: " << c->cmd;});
+              if (!dl)
+                p->wait ();
+              else if (!timed_wait (*p, dl->value))
+                term_pipe (c, trace);
+            }
+            else
+            {
+              builtin* b (c->bltn);
 
-              p->kill ();
-              p->wait ();
+              if (!dl)
+                b->wait ();
+              else if (!timed_wait (*b, dl->value))
+                term_pipe (c, trace);
             }
           }
           catch (const process_error& e)
           {
-            dr << fail (c->loc) << "unable to wait/kill " << prog (c) << ": "
-               << e;
+            fail (c->loc) << "unable to wait " << cmd_path (c->cmd) << ": "
+                          << e;
           }
         }
+      };
 
-        // Wait a bit for the builtins to complete and abort if any remain
-        // running.
-        //
-        dl = system_clock::now () + chrono::seconds (2);
+      // Iterate over the pipeline processes and builtins left to right,
+      // printing their stderr if buffered and issuing the diagnostics if the
+      // exit code is not available (terminated abnormally or due to a
+      // deadline), is unexpected, or stdout and/or stderr was not fully
+      // read. Throw failed at the end if the exit code for any of them is not
+      // available or stdout and/or stderr was not fully read. Return false if
+      // exit code for any of them is unexpected (the return is used, for
+      // example, in the if-conditions).
+      //
+      // Note: must be called after wait_pipe() and only once.
+      //
+      auto complete_pipe = [&pc, &env, diag] ()
+      {
+        bool r (true);
+        bool fail (false);
 
-        for (pipe_command* c (pc); c != nullptr; c = c->prev)
+        pipe_command* c (pc.next); // Left-most command.
+        assert (c != nullptr);     // Since the lambda must be called once.
+
+        for (pc.next = nullptr; c != nullptr; c = c->next)
         {
-          if (builtin* b = c->bltn)
-          try
+          // Collect the exit status, if present.
+          //
+          // Absent if the process/builtin misses the "unsuccessful" deadline.
+          //
+          optional<process_exit> exit;
+
+          const char* w (c->bltn != nullptr ? "builtin" : "process");
+
+          if (c->bltn != nullptr)
           {
-            l5 ([&]{trace (c->loc) << "waiting: " << c->cmd;});
+            // Note that this also handles ad hoc termination (without the
+            // call to term_pipe()) by the sleep builtin.
+            //
+            if (c->terminated)
+            {
+              if (c->dl && c->dl->success)
+                exit = process_exit (0);
+            }
+            else
+              exit = process_exit (c->bltn->wait ());
+
+            c->bltn = nullptr;
+          }
+          else if (c->proc != nullptr)
+          {
+            const process& pr (*c->proc);
 
-            if (!timed_wait (*b, dl))
+#ifndef _WIN32
+            if (c->terminated       &&
+                !pr.exit->normal () &&
+                pr.exit->signal () == SIGTERM)
+#else
+            if (c->terminated       &&
+                !pr.exit->normal () &&
+                pr.exit->status == DBG_TERMINATE_PROCESS)
+#endif
             {
-              error (c->loc) << prog (c) << " builtin hanged, aborting";
-              terminate (false /* trace */);
+              if (c->dl && c->dl->success)
+                exit = process_exit (0);
             }
+            else
+              exit = pr.exit;
+
+            c->proc = nullptr;
           }
-          catch (const system_error& e)
+          else
+            assert (false); // The lambda can only be called once.
+
+          const command& cmd (c->cmd);
+          const location& ll (c->loc);
+
+          // Verify the exit status and issue the diagnostics on failure.
+          //
+          diag_record dr;
+
+          path pr (cmd_path (cmd));
+
+          // Print the diagnostics if the command stdout and/or stderr are not
+          // fully read.
+          //
+          auto unread_output_diag = [&dr, c, w, &pr] (bool main_error)
+          {
+            if (main_error)
+              dr << error (c->loc) << w << ' ' << pr << ' ';
+            else
+              dr << error;
+
+            if (c->unread_stdout)
+            {
+              dr << "stdout ";
+
+              if (c->unread_stderr)
+                dr << "and ";
+            }
+
+            if (c->unread_stderr)
+              dr << "stderr ";
+
+            dr << "not closed after exit";
+          };
+
+          // Fail if the process is terminated due to reaching the deadline.
+          //
+          if (!exit)
+          {
+            dr << error (ll) << w << ' ' << pr
+               << " terminated: execution timeout expired";
+
+            if (c->unread_stdout || c->unread_stderr)
+              unread_output_diag (false /* main_error */);
+
+            if (verb == 1)
+            {
+              dr << info << "command line: ";
+              print_process (dr, *c->args);
+            }
+
+            fail = true;
+          }
+          else
+          {
+            // If there is no valid exit code available by whatever reason
+            // then we print the proper diagnostics, dump stderr (if cached
+            // and not too large) and fail the whole script. Otherwise if the
+            // exit code is not correct then we print diagnostics if requested
+            // and fail the pipeline.
+            //
+            bool valid (exit->normal ());
+
+            // On Windows the exit code can be out of the valid codes range
+            // being defined as uint16_t.
+            //
+#ifdef _WIN32
+            if (valid)
+              valid = exit->code () < 256;
+#endif
+
+            // In the presense of a valid exit code and given stdout and
+            // stderr are fully read out we print the diagnostics and return
+            // false rather than throw.
+            //
+            // Note that there can be a race, so that the process we have
+            // terminated due to reaching the deadline has in fact exited
+            // normally. Thus, the 'unread stderr' situation can also happen
+            // to a successfully terminated process. If that's the case, we
+            // report this problem as the main error and the secondary error
+            // otherwise.
+            //
+            if (!valid || c->unread_stdout || c->unread_stderr)
+              fail = true;
+
+            exit_comparison cmp (cmd.exit
+                                 ? cmd.exit->comparison
+                                 : exit_comparison::eq);
+
+            uint16_t exc (cmd.exit ? cmd.exit->code : 0);
+
+            bool success (valid &&
+                          (cmp == exit_comparison::eq) ==
+                          (exc == exit->code ()));
+
+            if (!success)
+              r = false;
+
+            if (!valid || (!success && diag))
+            {
+              dr << error (ll) << w << ' ' << pr << ' ';
+
+              if (!exit->normal ())
+                dr << *exit;
+              else
+              {
+                uint16_t ec (exit->code ()); // Make sure printed as integer.
+
+                if (!valid)
+                {
+                  dr << "exit code " << ec << " out of 0-255 range";
+                }
+                else
+                {
+                  if (cmd.exit)
+                    dr << "exit code " << ec
+                       << (cmp == exit_comparison::eq ? " != " : " == ")
+                       << exc;
+                  else
+                    dr << "exited with code " << ec;
+                }
+              }
+
+              if (c->unread_stdout || c->unread_stderr)
+                unread_output_diag (false /* main_error */);
+
+              if (verb == 1)
+              {
+                dr << info << "command line: ";
+                print_process (dr, *c->args);
+              }
+
+              if (non_empty (*c->esp, ll) && avail_on_failure (*c->esp, env))
+                dr << info << "stderr: " << *c->esp;
+
+              if (non_empty (*c->osp, ll) && avail_on_failure (*c->osp, env))
+                dr << info << "stdout: " << *c->osp;
+
+              if (non_empty (*c->isp, ll) && avail_on_failure (*c->isp, env))
+                dr << info << "stdin: " << *c->isp;
+
+              // Print cached stderr.
+              //
+              print_file (dr, *c->esp, ll);
+            }
+            else if (c->unread_stdout || c->unread_stderr)
+              unread_output_diag (true /* main_error */);
+          }
+
+          // Now print the buffered stderr, if present, and/or flush the
+          // diagnostics, if issued.
+          //
+          if (c->dbuf.is_open ())
+            c->dbuf.close (move (dr));
+        }
+
+        // Fail if required.
+        //
+        if (fail)
+          throw failed ();
+
+        return r;
+      };
+
+      // Close all buffered pipeline stderr streams ignoring io_error
+      // exceptions.
+      //
+      auto close_pipe = [&pc] ()
+      {
+        for (pipe_command* c (&pc); c != nullptr; c = c->prev)
+        {
+          if (c->dbuf.is.is_open ())
+          try
           {
-            dr << fail (c->loc) << "unable to wait for " << prog (c) << ": "
-               << e;
+            c->dbuf.is.close();
           }
+          catch (const io_error&) {}
         }
       };
 
-      // Absent if the process/builtin misses the "unsuccessful" deadline.
+      // Derive the process/builtin CWD.
+      //
+      // If the process/builtin CWD is specified via the env pseudo-builtin,
+      // then use that, completing it relative to the script environment work
+      // directory, if it is relative. Otherwise, use the script environment
+      // work directory.
+      //
+      dir_path completed_cwd;
+      if (c.cwd && c.cwd->relative ())
+        completed_cwd = wdir / *c.cwd;
+
+      const dir_path& cwd (!completed_cwd.empty () ? completed_cwd :
+                           c.cwd                   ? *c.cwd        :
+                                                     wdir);
+
+      // Unless CWD is the script environment work directory (which always
+      // exists), verify that it exists and fail if it doesn't.
       //
-      optional<process_exit> exit;
+      if (&cwd != &wdir && !exists (cwd))
+        fail (ll) << "specified working directory " << cwd
+                  << " does not exist";
+
+      cstrings args (process_args ());
+      pc.args = &args;
 
       const builtin_info* bi (resolve ? builtins.find (program) : nullptr);
 
@@ -1770,8 +2753,11 @@ namespace build2
       {
         // Execute the builtin.
         //
-        if (verb >= 2)
-          print_process (process_args ());
+        // Don't print the true and false builtins, since they are normally
+        // used for the commands execution flow control.
+        //
+        if (verb >= 2 && program != "true" && program != "false")
+          print_process (args);
 
         // Some of the script builtins (cp, mkdir, etc) extend libbutl
         // builtins (via callbacks) registering/moving cleanups for the
@@ -1812,18 +2798,6 @@ namespace build2
         // We also extend the sleep builtin, deactivating the thread before
         // going to sleep and waking up before the deadline is reached.
         //
-        // Let's "wrap up" the sleep-related values into the single object to
-        // rely on "small function object" optimization.
-        //
-        struct sleep
-        {
-          optional<timestamp> deadline;
-          bool terminated = false;
-
-          sleep (const optional<timestamp>& d): deadline (d) {}
-        };
-        sleep slp (dl ? dl->value : optional<timestamp> ());
-
         builtin_callbacks bcs {
 
           // create
@@ -1985,16 +2959,19 @@ namespace build2
 
           // sleep
           //
-          [&env, &slp] (const duration& d)
+          [&env, &pc] (const duration& d)
           {
             duration t (d);
-            const optional<timestamp>& dl (slp.deadline);
+            const optional<timestamp>& dl (pc.dl
+                                           ? pc.dl->value
+                                           : optional<timestamp> ());
 
             if (dl)
             {
               timestamp now (system_clock::now ());
 
-              slp.terminated = now + t > *dl;
+              if (now + t > *dl)
+                pc.terminated = true;
 
               if (*dl <= now)
                 return;
@@ -2007,7 +2984,7 @@ namespace build2
             // If/when required we could probably support the precise sleep
             // mode (e.g., via an option).
             //
-            env.context.sched.sleep (t);
+            env.context.sched->sleep (t);
           }
         };
 
@@ -2017,21 +2994,21 @@ namespace build2
           builtin b (bi->function (r,
                                    c.arguments,
                                    move (ifd), move (ofd.out), move (efd),
-                                   *env.work_dir.path,
+                                   cwd,
                                    bcs));
+          pc.bltn = &b;
 
-          pipe_command pc (b, c, ll, prev_cmd);
-
-          // If the deadline is specified, then make sure we don't miss it
-          // waiting indefinitely in the builtin destructor on the right-hand
-          // side of the pipe failure.
+          // If the right-hand part of the pipe fails, then make sure we don't
+          // wait indefinitely in the process destructor if the deadlines are
+          // specified or just because a process is blocked on stderr.
           //
-          auto g (make_exception_guard ([&dl, &pc, &term_pipe] ()
+          auto g (make_exception_guard ([&pc, &close_pipe, &trace] ()
           {
-            if (dl)
+            if (pc.bltn != nullptr)
             try
             {
-              term_pipe (&pc);
+              close_pipe ();
+              term_pipe (&pc, trace);
             }
             catch (const failed&)
             {
@@ -2042,27 +3019,21 @@ namespace build2
           success = run_pipe (env,
                               nc, ec,
                               move (ofd.in),
-                              ci + 1, li, ll, diag,
-                              dl, dl_cmd,
+                              ii, li, ci + 1, ll, diag,
+                              cf, last_cmd,
+                              dl,
                               &pc);
 
-          if (!dl)
-            b.wait ();
-          else if (!timed_wait (b, dl->value))
-            term_pipe (&pc);
-
-          // Note that this also handles ad hoc termination (without the call
-          // to term_pipe()) by the sleep builtin (see above).
+          // Complete the pipeline execution, if not done yet.
           //
-          if (pc.terminated || slp.terminated)
+          if (pc.bltn != nullptr)
           {
-            assert (dl);
+            read_pipe ();
+            wait_pipe ();
 
-            if (dl->success)
-              exit = process_exit (0);
+            if (!complete_pipe ())
+              success = false;
           }
-          else
-            exit = process_exit (r);
         }
         catch (const system_error& e)
         {
@@ -2074,8 +3045,6 @@ namespace build2
       {
         // Execute the process.
         //
-        cstrings args (process_args ());
-
         // If the process path is not pre-searched then resolve the relative
         // non-simple program path against the script's working directory. The
         // simple one will be left for the process path search machinery. Also
@@ -2107,7 +3076,7 @@ namespace build2
                 program (path (s, 1, s.size () - 1));
             }
             else
-              program (*env.work_dir.path / p);
+              program (wdir / p);
           }
         }
         catch (const invalid_path& e)
@@ -2127,17 +3096,23 @@ namespace build2
 
           // Note that CWD and builtin-escaping character '^' are not printed.
           //
-          const small_vector<string, 4>& evs (vs);
-          process_env pe (resolve ? pp : c.program, evs);
+          const small_vector<string, 4>& evars (vs);
+          process_env pe (resolve ? pp : c.program, evars);
 
           if (verb >= 2)
             print_process (pe, args);
 
+          // Note that stderr can only be a pipe if we are buffering the
+          // diagnostics. In this case also pass the reading end so it can be
+          // "probed" on Windows (see butl::process::pipe for details).
+          //
           process pr (
             *pe.path,
             args.data (),
-            {ifd.get (), -1}, process::pipe (ofd), {-1, efd.get ()},
-            env.work_dir.path->string ().c_str (),
+            {ifd.get (), -1},
+            process::pipe (ofd),
+            {pc.dbuf.is.fd (), efd.get ()},
+            cwd.string ().c_str (),
             pe.vars);
 
           // Can't throw.
@@ -2146,18 +3121,19 @@ namespace build2
           ofd.out.reset ();
           efd.reset ();
 
-          pipe_command pc (pr, c, ll, prev_cmd);
+          pc.proc = &pr;
 
-          // If the deadline is specified, then make sure we don't miss it
-          // waiting indefinitely in the process destructor on the right-hand
-          // part of the pipe failure.
+          // If the right-hand part of the pipe fails, then make sure we don't
+          // wait indefinitely in the process destructor (see above for
+          // details).
           //
-          auto g (make_exception_guard ([&dl, &pc, &term_pipe] ()
+          auto g (make_exception_guard ([&pc, &close_pipe, &trace] ()
           {
-            if (dl)
+            if (pc.proc != nullptr)
             try
             {
-              term_pipe (&pc);
+              close_pipe ();
+              term_pipe (&pc, trace);
             }
             catch (const failed&)
             {
@@ -2168,32 +3144,21 @@ namespace build2
           success = run_pipe (env,
                               nc, ec,
                               move (ofd.in),
-                              ci + 1, li, ll, diag,
-                              dl, dl_cmd,
+                              ii, li, ci + 1, ll, diag,
+                              cf, last_cmd,
+                              dl,
                               &pc);
 
-          if (!dl)
-            pr.wait ();
-          else if (!timed_wait (pr, dl->value))
-            term_pipe (&pc);
-
-#ifndef _WIN32
-          if (pc.terminated       &&
-              !pr.exit->normal () &&
-              pr.exit->signal () == SIGTERM)
-#else
-          if (pc.terminated       &&
-              !pr.exit->normal () &&
-              pr.exit->status == DBG_TERMINATE_PROCESS)
-#endif
+          // Complete the pipeline execution, if not done yet.
+          //
+          if (pc.proc != nullptr)
           {
-            assert (dl);
+            read_pipe ();
+            wait_pipe ();
 
-            if (dl->success)
-              exit = process_exit (0);
+            if (!complete_pipe ())
+              success = false;
           }
-          else
-            exit = pr.exit;
         }
         catch (const process_error& e)
         {
@@ -2206,98 +3171,23 @@ namespace build2
         }
       }
 
-      // If the righ-hand side pipeline failed than the whole pipeline fails,
-      // and no further checks are required.
-      //
-      if (!success)
-        return false;
-
-      // Fail if the process is terminated due to reaching the deadline.
-      //
-      if (!exit)
-        fail (ll) << cmd_path (dl_cmd != nullptr ? *dl_cmd : c)
-                  << " terminated: execution timeout expired";
-
-      path pr (cmd_path (c));
-
-      // If there is no valid exit code available by whatever reason then we
-      // print the proper diagnostics, dump stderr (if cached and not too
-      // large) and fail the whole script. Otherwise if the exit code is not
-      // correct then we print diagnostics if requested and fail the pipeline.
-      //
-      bool valid (exit->normal ());
-
-      // On Windows the exit code can be out of the valid codes range being
-      // defined as uint16_t.
-      //
-#ifdef _WIN32
-      if (valid)
-        valid = exit->code () < 256;
-#endif
-
-      exit_comparison cmp (c.exit ? c.exit->comparison : exit_comparison::eq);
-      uint16_t        exc (c.exit ? c.exit->code       : 0);
-
-      success = valid &&
-                (cmp == exit_comparison::eq) == (exc == exit->code ());
-
-      if (!valid || (!success && diag))
-      {
-        // In the presense of a valid exit code we print the diagnostics and
-        // return false rather than throw.
-        //
-        diag_record d (valid ? error (ll) : fail (ll));
-
-        if (!exit->normal ())
-          d << pr << " " << *exit;
-        else
-        {
-          uint16_t ec (exit->code ()); // Make sure is printed as integer.
-
-          if (!valid)
-            d << pr << " exit code " << ec << " out of 0-255 range";
-          else if (!success)
-          {
-            if (diag)
-            {
-              if (c.exit)
-                d << pr << " exit code " << ec
-                  << (cmp == exit_comparison::eq ? " != " : " == ") << exc;
-              else
-                d << pr << " exited with code " << ec;
-            }
-          }
-          else
-            assert (false);
-        }
-
-        if (non_empty (esp, ll) && avail_on_failure (esp, env))
-          d << info << "stderr: " << esp;
-
-        if (non_empty (osp, ll) && avail_on_failure (osp, env))
-          d << info << "stdout: " << osp;
-
-        if (non_empty (isp, ll) && avail_on_failure (isp, env))
-          d << info << "stdin: " << isp;
-
-        // Print cached stderr.
-        //
-        print_file (d, esp, ll);
-      }
-
-      // If exit code is correct then check if the standard outputs match the
-      // expectations. Note that stdout is only redirected to file for the
-      // last command in the pipeline.
+      // If the pipeline or the righ-hand side outputs check failed, then no
+      // further checks are required. Otherwise, check if the standard outputs
+      // match the expectations. Note that stdout can only be redirected to
+      // file for the last command in the pipeline.
       //
       // The thinking behind matching stderr first is that if it mismatches,
       // then the program probably misbehaves (executes wrong functionality,
       // etc) in which case its stdout doesn't really matter.
       //
       if (success)
-        success =
-          check_output (pr, esp, isp, err, ll, env, diag, "stderr") &&
-          (!last ||
-           check_output (pr, osp, isp, *out, ll, env, diag, "stdout"));
+      {
+        path pr (cmd_path (c));
+
+        success = check_output (pr, esp, isp, err, ll, env, diag, "stderr") &&
+                  (out == nullptr ||
+                   check_output (pr, osp, isp, *out, ll, env, diag, "stdout"));
+      }
 
       return success;
     }
@@ -2305,8 +3195,10 @@ namespace build2
     static bool
     run_expr (environment& env,
               const command_expr& expr,
-              size_t li, const location& ll,
-              bool diag)
+              const iteration_index* ii, size_t li,
+              const location& ll,
+              bool diag,
+              const function<command_function>& cf, bool last_cmd)
     {
       // Commands are numbered sequentially throughout the expression
       // starting with 1. Number 0 means the command is a single one.
@@ -2344,10 +3236,15 @@ namespace build2
         // with false.
         //
         if (!((or_op && r) || (!or_op && !r)))
+        {
+          assert (!p.empty ());
+
           r = run_pipe (env,
                         p.begin (), p.end (),
                         auto_fd (),
-                        ci, li, ll, print);
+                        ii, li, ci, ll, print,
+                        cf, last_cmd);
+        }
 
         ci += p.size ();
       }
@@ -2358,24 +3255,37 @@ namespace build2
     void
     run (environment& env,
          const command_expr& expr,
-         size_t li, const location& ll)
+         const iteration_index* ii, size_t li,
+         const location& ll,
+         const function<command_function>& cf,
+         bool last_cmd)
     {
       // Note that we don't print the expression at any verbosity level
       // assuming that the caller does this, potentially providing some
       // additional information (command type, etc).
       //
-      if (!run_expr (env, expr, li, ll, true /* diag */))
+      if (!run_expr (env,
+                     expr,
+                     ii, li, ll,
+                     true /* diag */,
+                     cf, last_cmd))
         throw failed (); // Assume diagnostics is already printed.
     }
 
     bool
-    run_if (environment& env,
-            const command_expr& expr,
-            size_t li, const location& ll)
+    run_cond (environment& env,
+              const command_expr& expr,
+              const iteration_index* ii, size_t li,
+              const location& ll,
+              const function<command_function>& cf, bool last_cmd)
     {
       // Note that we don't print the expression here (see above).
       //
-      return run_expr (env, expr, li, ll, false /* diag */);
+      return run_expr (env,
+                       expr,
+                       ii, li, ll,
+                       false /* diag */,
+                       cf, last_cmd);
     }
 
     void
@@ -2624,8 +3534,7 @@ namespace build2
       try
       {
         size_t n (0);
-        for (const dir_entry& de: dir_iterator (p,
-                                                false /* ignore_dangling */))
+        for (const dir_entry& de: dir_iterator (p, dir_iterator::no_follow))
         {
           if (n++ < 10)
             dr << '\n' << (de.ltype () == entry_type::directory