From a83f3866667bca073c4d4c5d80b4deb5ac05906c Mon Sep 17 00:00:00 2001
From: Karen Arutyunov <karen@codesynthesis.com>
Date: Wed, 11 Jan 2017 01:43:09 +0300
Subject: Add support for portable path modifer and dot character escaping
 inversion

---
 build2/buildfile              |   2 +-
 build2/test/script/parser     |  26 ++-
 build2/test/script/parser.cxx | 386 ++++++++++++++++++++----------------------
 build2/test/script/regex      |  35 ++--
 build2/test/script/regex.cxx  |  81 +++++++++
 build2/test/script/regex.ixx  |  35 ++++
 build2/test/script/runner     |   2 +-
 build2/test/script/runner.cxx | 376 +++++++++++++++++++++++++++++++++++-----
 build2/test/script/script     |  74 ++++++--
 build2/test/script/script.cxx | 115 +++++++++----
 10 files changed, 825 insertions(+), 307 deletions(-)
 create mode 100644 build2/test/script/regex.ixx

(limited to 'build2')
diff --git a/build2/buildfile b/build2/buildfile
index 6d497ca..84e2f82 100644
--- a/build2/buildfile
+++ b/build2/buildfile
@@ -88,7 +88,7 @@ exe{b}:                                                   \
 test/script/{hxx         cxx}{ builtin                  } \
 test/script/{hxx         cxx}{ lexer                    } \
 test/script/{hxx         cxx}{ parser                   } \
-test/script/{hxx         cxx}{ regex                    } \
+test/script/{hxx ixx     cxx}{ regex                    } \
 test/script/{hxx         cxx}{ runner                   } \
 test/script/{hxx ixx     cxx}{ script                   } \
 test/script/{hxx         cxx}{ token                    } \
diff --git a/build2/test/script/parser b/build2/test/script/parser
index 9ad5fe9..edd64a3 100644
--- a/build2/test/script/parser
+++ b/build2/test/script/parser
@@ -12,7 +12,6 @@
 #include <build2/diagnostics>
 
 #include <build2/test/script/token>
-#include <build2/test/script/regex>
 #include <build2/test/script/script>
 
 namespace build2
@@ -111,7 +110,7 @@ namespace build2
 
           // Regex global flags. Meaningful if regex != '\0'.
           //
-          regex::char_flags regex_flags;
+          string regex_flags;
         };
         using here_docs = vector<here_doc>;
 
@@ -125,12 +124,29 @@ namespace build2
         parse_here_documents (token&, token_type&,
                               pair<command_expr, here_docs>&);
 
-        pair<string, regex::line_regex>
+        struct parsed_doc
+        {
+          union
+          {
+            string str;        // Here-document literal.
+            regex_lines regex; // Here-document regex.
+          };
+
+          bool re;             // True if regex.
+          uint64_t end_line;   // Here-document end marker location.
+          uint64_t end_column;
+
+          parsed_doc (string, uint64_t line, uint64_t column);
+          parsed_doc (regex_lines, uint64_t line, uint64_t column);
+          parsed_doc (parsed_doc&&); // Note: move constuctible-only type.
+          ~parsed_doc ();
+        };
+
+        parsed_doc
         parse_here_document (token&, token_type&,
                              const string&,
                              const string& mode,
-                             char regex_introducer, // '\0' if not a regex.
-                             regex::char_flags);
+                             char re_intro);      // '\0' if not a regex.
 
         // Execute. Issue diagnostics and throw failed in case of an error.
         //
diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx
index f381118..4b1c777 100644
--- a/build2/test/script/parser.cxx
+++ b/build2/test/script/parser.cxx
@@ -5,7 +5,6 @@
 #include <build2/test/script/parser>
 
 #include <sstream>
-#include <cstring> // strstr()
 
 #include <build2/scheduler>
 
@@ -14,39 +13,6 @@
 
 using namespace std;
 
-namespace std
-{
-  // Print regex error description but only if it is meaningful (this is also
-  // why we have to print leading colon here).
-  //
-  // Currently libstdc++ just returns the name of the exception (bug #67361).
-  // So we check that the description contains at least one space character.
-  //
-  // While VC's description is meaningful, it has an undesired prefix that
-  // resembles the following: 'regex_error(error_badrepeat): '. So we skip it.
-  //
-  static ostream&
-  operator<< (ostream& o, const regex_error& e)
-  {
-    const char* d (e.what ());
-
-#if defined(_MSC_VER) && _MSC_VER <= 1910
-    const char* rd (strstr (d, "): "));
-    if (rd != nullptr)
-      d = rd + 3;
-#endif
-
-    ostringstream os;
-    os << runtime_error (d); // Sanitize the description.
-
-    string s (os.str ());
-    if (s.find (' ') != string::npos)
-      o << ": " << s;
-
-    return o;
-  }
-}
-
 namespace build2
 {
   namespace test
@@ -1340,23 +1306,22 @@ namespace build2
 
       // Parse the regular expression representation (non-empty string value
       // framed with introducer characters and optionally followed by flag
-      // characters from the {i} set, for example '/foo/i') into
+      // characters from the {di} set, for example '/foo/id') into
       // components. Also return end-of-parsing position if requested,
       // otherwise treat any unparsed characters left as an error.
       //
       struct regex_parts
       {
         string value;
-        char introducer;
-        regex::char_flags flags; // {icase}
+        char   intro;
+        string flags; // Combination of characters from {di} set.
 
         // Create a special empty object.
         //
-        regex_parts ()
-            : introducer ('\0'), flags (regex::char_flags ()) {}
+        regex_parts (): intro ('\0') {}
 
-        regex_parts (string v, char i, regex::char_flags f)
-            : value (move (v)), introducer (i), flags (f) {}
+        regex_parts (string v, char i, string f)
+            : value (move (v)), intro (i), flags (move (f)) {}
       };
 
       static regex_parts
@@ -1377,10 +1342,10 @@ namespace build2
         if (rn == 0)
           fail (l) << what << " is empty";
 
-        bool icase (s[++p] == 'i'); // Note: s[++p] can be '\0' (no flags).
-
-        if (icase)
-          ++p;
+        // Find end-of-flags position.
+        //
+        size_t fp (++p); // Save flags starting position.
+        for (char c; (c = s[p]) == 'd' || c == 'i'; ++p) ;
 
         // If string end is not reached then report invalid flags, unless
         // end-of-parsing position is requested (which means regex is just a
@@ -1392,11 +1357,7 @@ namespace build2
         if (end != nullptr)
           *end = p;
 
-        return regex_parts (string (s, 1, rn),
-                            s[0],
-                            icase
-                            ? regex::char_regex::icase
-                            : regex::char_flags ());
+        return regex_parts (string (s, 1, rn), s[0], string (s, fp, p - fp));
       }
 
       pair<command_expr, parser::here_docs> parser::
@@ -1419,6 +1380,27 @@ namespace build2
             fail (l) << "stdout and stderr redirected to each other";
         };
 
+        // Check that the introducer character differs from '/' if the
+        // portable path modifier is specified. Must be called before
+        // parse_regex() (see below) to make sure its diagnostics is
+        // meaningful.
+        //
+        // Note that the portable path modifier assumes '/' to be a valid
+        // regex character and so makes it indistinguishable from the
+        // terminating introducer.
+        //
+        auto check_regex_mod = [this] (const string& mod,
+                                       const string& re,
+                                       const location& l,
+                                       const char* what)
+        {
+          // Handles empty regex properly.
+          //
+          if (mod.find ('/') != string::npos && re[0] == '/')
+            fail (l) << "portable path modifier and '/' introducer in "
+                     << what;
+        };
+
         // Pending positions where the next word should go.
         //
         enum class pending
@@ -1449,7 +1431,8 @@ namespace build2
         // Add the next word to either one of the pending positions or to
         // program arguments by default.
         //
-        auto add_word = [&c, &p, &mod, this] (string&& w, const location& l)
+        auto add_word = [&c, &p, &mod, &check_regex_mod, this] (
+          string&& w, const location& l)
         {
           auto add_merge = [&l, this] (redirect& r, const string& w, int fd)
           {
@@ -1468,18 +1451,16 @@ namespace build2
                      << "file descriptor must be " << fd;
           };
 
-          auto add_here_str = [&mod] (redirect& r, string&& w)
+          auto add_here_str = [] (redirect& r, string&& w)
           {
-            if (mod.find (':') == string::npos)
+            if (r.modifiers.find (':') == string::npos)
               w += '\n';
             r.str = move (w);
           };
 
-          auto add_here_str_regex = [&l, &mod, this] (
+          auto add_here_str_regex = [&l, &check_regex_mod, this] (
             redirect& r, int fd, string&& w)
           {
-            using namespace regex;
-
             const char* what (nullptr);
             switch (fd)
             {
@@ -1487,36 +1468,23 @@ namespace build2
             case 2: what = "stderr regex redirect"; break;
             }
 
-            line_pool pool;
-            line_string s;
+            check_regex_mod (r.modifiers, w, l, what);
 
-            try
-            {
-              regex_parts re (parse_regex (w, l, what));
-              s += line_char (char_regex (re.value,
-                                          char_regex::ECMAScript | re.flags),
-                              pool);
-            }
-            catch (const regex_error& e)
-            {
-              // Print regex_error description if meaningful.
-              //
-              fail (l) << "invalid " << what << e <<
-                info << "regex: " << w;
-            }
+            regex_parts rp (parse_regex (w, l, what));
 
-            if (mod.find (':') == string::npos)
-            {
-              w += '\n';
-              s += line_char ("", pool);
-            }
+            regex_lines& re (r.regex);
+            re.intro = rp.intro;
 
-            r.regex.str = move (w);
+            re.lines.emplace_back (
+              l.line, l.column, move (rp.value), move (rp.flags));
 
-            // No special line-chars, so no way to try to create a malformed
-            // expression, and so can't throw.
+            // Add final blank line unless suppressed.
             //
-            r.regex.regex = line_regex (move (s), move (pool));
+            // Note that the position is synthetic, but that's ok as we don't
+            // expect any diagnostics to refer this line.
+            //
+            if (r.modifiers.find (':') == string::npos)
+              re.lines.emplace_back (l.line, l.column, string (), false);
           };
 
           auto parse_path = [&l, this] (string&& w, const char* what) -> path
@@ -1539,7 +1507,7 @@ namespace build2
             }
           };
 
-          auto add_file = [&mod, &parse_path] (redirect& r, int fd, string&& w)
+          auto add_file = [&parse_path] (redirect& r, int fd, string&& w)
           {
             const char* what (nullptr);
             switch (fd)
@@ -1550,7 +1518,7 @@ namespace build2
             }
 
             r.file.path = parse_path (move (w), what);
-            r.file.append = mod.find ('&') != string::npos;
+            r.file.append = r.modifiers.find ('&') != string::npos;
           };
 
           switch (p)
@@ -1771,6 +1739,11 @@ namespace build2
           redirect& r (fd == 0 ? c.in : fd == 1 ? c.out : c.err);
           r = redirect (rt);
 
+          // Don't move as still may be used for pending here-document end
+          // marker processing.
+          //
+          r.modifiers = mod;
+
           switch (rt)
           {
           case redirect_type::none:
@@ -1974,6 +1947,8 @@ namespace build2
 
                   if (re)
                   {
+                    check_regex_mod (mod, end, l, what);
+
                     r = parse_regex (end, l, what);
                     end = move (r.value); // The "cleared" end marker.
                   }
@@ -1984,7 +1959,7 @@ namespace build2
                       move (end),
                       qt == quote_type::single,
                       move (mod),
-                      r.introducer, r.flags});
+                      r.intro, move (r.flags)});
                   break;
                 }
 
@@ -2099,7 +2074,7 @@ namespace build2
                       (t.qtype == quote_type::unquoted ||
                        t.qtype == quote_type::single),
                       move (mod),
-                      r.introducer, r.flags});
+                      r.intro, move (r.flags)});
 
                   p = pending::none;
                   mod.clear ();
@@ -2396,54 +2371,43 @@ namespace build2
                 : lexer_mode::here_line_double);
           next (t, tt);
 
-          pair<string, regex::line_regex> v (
-            parse_here_document (
-              t, tt, h.end, h.modifiers, h.regex, h.regex_flags));
+          parsed_doc v (
+            parse_here_document (t, tt, h.end, h.modifiers, h.regex));
 
           if (!pre_parse_)
           {
             command& c (p.first[h.expr].pipe[h.pipe]);
             redirect& r (h.fd == 0 ? c.in : h.fd == 1 ? c.out : c.err);
 
-            if (h.regex)
+            if (v.re)
             {
-              r.regex.str   = move (v.first);
-              r.regex.regex = move (v.second);
-
-              // Restore the original end marker.
-              //
-              r.end = h.regex + h.end + h.regex;
-              if ((h.regex_flags & regex::char_regex::icase) != 0)
-                r.end += 'i';
+              r.regex = move (v.regex);
+              r.regex.flags = move (h.regex_flags);
             }
             else
-            {
-              r.str = move (v.first);
-              r.end = move (h.end);
-            }
+              r.str = move (v.str);
+
+            r.end        = move (h.end);
+            r.end_line   = v.end_line;
+            r.end_column = v.end_column;
           }
 
           expire_mode ();
         }
       }
 
-      pair<string, regex::line_regex> parser::
+      parser::parsed_doc parser::
       parse_here_document (token& t, type& tt,
                            const string& em,
                            const string& mod,
-                           char re,
-                           regex::char_flags refl)
+                           char re)
       {
         // enter: first token on first line
         // leave: newline (after end marker)
 
-        using namespace regex;
-
-        string rs; // String or regex literal.
+        string rs; // String literal.
 
-        line_pool pool;
-        line_string ls;
-        line_regex rre;
+        regex_lines rre;
 
         // Here-documents can be indented. The leading whitespaces of the end
         // marker line (called strip prefix) determine the indentation. Every
@@ -2465,8 +2429,7 @@ namespace build2
 
         // We will use the location of the first token on the line for the
         // regex diagnostics. At the end of the loop it will point to the
-        // beginning of the end marker which we use for diagnostics of the
-        // line_regex object creation.
+        // beginning of the end marker.
         //
         location l;
 
@@ -2543,97 +2506,93 @@ namespace build2
               }
             }
 
-            // Add newline after previous line.
-            //
-            if (!rs.empty ())
-              rs += '\n';
-
-            rs += s;
+            if (!re)
+            {
+              // Add newline after previous line.
+              //
+              if (!rs.empty ())
+                rs += '\n';
 
-            if (re)
+              rs += s;
+            }
+            else
             {
-              if (s[0] == re) // Line starts with the regex introducer.
+              // Due to expansion we can end up with multiple lines. If empty
+              // then will add a blank textual literal.
+              //
+              for (size_t p (0); p != string::npos; )
               {
-                size_t n (s.size ());
+                string ln;
+                size_t np (s.find ('\n', p));
 
-                // Handle the empty line-regex characters.
-                //
-                if (n == 1)
-                  fail (l) << "regex introducer without regex" <<
-                    info << "consider changing regex introducer '" << re
-                           << "' in here-document end marker";
-
-                // This is a char-regex, or a sequence of line-regex syntax
-                // characters or both (in this specific order). So we will add
-                // the char-regex first (if present), and then sequentially
-                // add the line-regex syntax characters (if present).
-                //
-                size_t p (s.find (re, 1));
-                if (p == string::npos)
+                if (np != string::npos)
                 {
-                  // No char-regex, just a sequence of line-regex syntax
-                  // characters. Prepare to parse them starting from the
-                  // position right after the introducer.
-                  //
-                  p = 1;
+                  ln = string (s, p, np - p);
+                  p = np + 1;
                 }
                 else
                 {
-                  // Add regex line-char, and then position to the end of the
-                  // regex (that includes terminating introducer and the
-                  // optional flags). This is the first line-regex syntax
-                  // character position (if present).
-                  //
-                  line_char c;
+                  ln = string (s, p);
+                  p = np;
+                }
 
-                  // Empty regex is a special case repesenting the blank line.
+                if (ln[0] != re) // Line doesn't start with regex introducer.
+                {
+                  // This is a line-char literal (covers blank lines as well).
                   //
-                  if (p == 1)
+                  // Append textual literal.
+                  //
+                  rre.lines.emplace_back (l.line, l.column, move (ln), false);
+                }
+                else // Line starts with the regex introducer.
+                {
+                  // This is a char-regex, or a sequence of line-regex syntax
+                  // characters or both (in this specific order). So we will
+                  // add regex (with optional special characters) or special
+                  // literal.
+                  //
+                  size_t p (ln.find (re, 1));
+                  if (p == string::npos)
                   {
-                    c = line_char ("", pool);
-                    ++p;
+                    // No regex, just a sequence of syntax characters.
+                    //
+                    string spec (ln, 1);
+                    if (spec.empty ())
+                      fail (l) << "no syntax line characters";
+
+                    // Append special literal.
+                    //
+                    rre.lines.emplace_back (
+                      l.line, l.column, move (spec), true);
                   }
                   else
                   {
-                    // Can't fail as all the pre-conditions verified (non-empty
-                    // with both introducers in place), so no description
-                    // required.
+                    // Regex (probably with syntax characters).
                     //
-                    regex_parts re (parse_regex (s, l, "", &p));
+                    regex_parts re;
 
-                    try
-                    {
-                      c = line_char (
-                        char_regex (re.value,
-                                    char_regex::ECMAScript | re.flags | refl),
-                        pool);
-                    }
-                    catch (const regex_error& e)
-                    {
-                      // Print regex_error description if meaningful.
+                    // Empty regex is a special case repesenting a blank line.
+                    //
+                    if (p == 1)
+                      // Position to optional specal characters of an empty
+                      // regex.
                       //
-                      fail (l) << "invalid regex" << e;
-                    }
-                  }
-
-                  ls += c;
-                }
+                      ++p;
+                    else
+                      // Can't fail as all the pre-conditions verified
+                      // (non-empty with both introducers in place), so no
+                      // description required.
+                      //
+                      re = parse_regex (ln, l, "", &p);
 
-                while (p != n)
-                {
-                  char c (s[p++]);
-                  if (line_char::syntax (c))
-                    ls += line_char (c);
-                  else
-                    fail (l) << "invalid line-regex syntax character '" << c
-                             << "'";
+                    // Append regex with optional special characters.
+                    //
+                    rre.lines.emplace_back (l.line, l.column,
+                                            move (re.value), move (re.flags),
+                                            string (ln, p));
+                  }
                 }
               }
-              else
-                // Line doesn't start with regex introducer. Add line-char
-                // literal (handles blank lines as well).
-                //
-                ls += line_char (move (s), pool);
             }
           }
 
@@ -2695,35 +2654,31 @@ namespace build2
           //
           if (mod.find (':') == string::npos)
           {
-            rs += '\n';
-
             if (re)
-              ls += line_char ("", pool);
+              // Note that the position is synthetic, but that's ok as we don't
+              // expect any diagnostics to refer this line.
+              //
+              rre.lines.emplace_back (l.line, l.column, string (), false);
+            else
+              rs += '\n';
           }
 
-          // Parse line-regex.
+          // Finalize regex lines.
           //
           if (re)
           {
             // Empty regex matches nothing, so not of much use.
             //
-            if (ls.empty ())
+            if (rre.lines.empty ())
               fail (l) << "empty here-document regex";
 
-            try
-            {
-              rre = line_regex (move (ls), move (pool));
-            }
-            catch (const regex_error& e)
-            {
-              // Print regex_error description if meaningful.
-              //
-              fail (l) << "invalid here-document regex" << e;
-            }
+            rre.intro  = re;
           }
         }
 
-        return make_pair (move (rs), move (rre));
+        return re
+          ? parsed_doc (move (rre), l.line, l.column)
+          : parsed_doc (move (rs), l.line, l.column);
       }
 
       //
@@ -3184,6 +3139,39 @@ namespace build2
         lexer_ = l;
         base_parser::lexer_ = l;
       }
+
+      // parser::parsed_doc
+      //
+      parser::parsed_doc::
+      parsed_doc (string s, uint64_t l, uint64_t c)
+          : str (move (s)), re (false), end_line (l), end_column (c)
+      {
+      }
+
+      parser::parsed_doc::
+      parsed_doc (regex_lines r, uint64_t l, uint64_t c)
+          : regex (move (r)), re (true), end_line (l), end_column (c)
+      {
+      }
+
+      parser::parsed_doc::
+      parsed_doc (parsed_doc&& d)
+          : re (d.re), end_line (d.end_line), end_column (d.end_column)
+      {
+        if (re)
+          new (&regex) regex_lines (move (d.regex));
+        else
+          new (&str) string (move (d.str));
+      }
+
+      parser::parsed_doc::
+      ~parsed_doc ()
+      {
+        if (re)
+          regex.~regex_lines ();
+        else
+          str.~string ();
+      }
     }
   }
 }
diff --git a/build2/test/script/regex b/build2/test/script/regex
index 7708410..b25c1f1 100644
--- a/build2/test/script/regex
+++ b/build2/test/script/regex
@@ -24,8 +24,25 @@ namespace build2
       namespace regex
       {
         using char_string = std::basic_string<char>;
-        using char_regex = std::basic_regex<char>;
-        using char_flags = char_regex::flag_type;
+
+        enum class char_flags: std::uint16_t
+        {
+          icase = 0x1, // Case-insensitive match.
+          idot  = 0x2, // Invert '.' escaping.
+
+          none = 0
+        };
+
+        // Restricts valid standard flags to just {icase}, extends with custom
+        // flags {idot}.
+        //
+        class char_regex: public std::basic_regex<char>
+        {
+        public:
+          using base_type = std::basic_regex<char>;
+
+          char_regex (const char_string&, char_flags = char_flags::none);
+        };
 
         // Newlines are line separators and are not part of the line:
         //
@@ -110,7 +127,7 @@ namespace build2
           //
           // 0 (nul character)
           // -1 (EOF)
-          // [()|.*+?{\}0123456789,=!] (excluding [])
+          // [()|.*+?{}\0123456789,=!] (excluding [])
           //
           // Note that the constructor is implicit to allow basic_regex to
           // implicitly construct line_chars from special char literals (in
@@ -252,9 +269,8 @@ namespace build2
         template <typename T>
         struct line_char_cmp
           : public std::enable_if<std::is_integral<T>::value ||
-                                  std::is_enum<T>::value>
-        {
-        };
+                                  (std::is_enum<T>::value &&
+                                   !std::is_same<T, char_flags>::value)> {};
 
         template <typename T, typename = typename line_char_cmp<T>::type>
         bool
@@ -655,14 +671,13 @@ namespace build2
           line_regex& operator= (const line_regex&) = delete;
 
         public:
-          // Mutable since input line_char literals must go into the same
-          // pool (and thus is MT-unsafe).
-          //
-          mutable line_pool pool;
+          line_pool pool;
         };
       }
     }
   }
 }
 
+#include <build2/test/script/regex.ixx>
+
 #endif // BUILD2_TEST_SCRIPT_REGEX
diff --git a/build2/test/script/regex.cxx b/build2/test/script/regex.cxx
index bbf3f00..48e1eeb 100644
--- a/build2/test/script/regex.cxx
+++ b/build2/test/script/regex.cxx
@@ -171,6 +171,87 @@ namespace build2
                       new std::ctype<line_char> ()) // Hidden by ctype bitmask.
         {
         }
+
+        // char_regex
+        //
+        // Transform regex according to the extended flags {idot}. If regex is
+        // malformed then keep transforming, so the resulting string is
+        // malformed the same way. We expect the error to be reported by the
+        // char_regex ctor.
+        //
+        static string
+        transform (const string& s, char_flags f)
+        {
+          assert ((f & char_flags::idot) != char_flags::none);
+
+          string r;
+          bool escape (false);
+          bool cclass (false);
+
+          for (char c: s)
+          {
+            // Inverse escaping for a dot which is out of the char class
+            // brackets.
+            //
+            bool inverse (c == '.' && !cclass);
+
+            // Handle the escape case. Note that we delay adding the backslash
+            // since we may have to inverse things.
+            //
+            if (escape)
+            {
+              if (!inverse)
+                r += '\\';
+
+              r += c;
+              escape = false;
+
+              continue;
+            }
+            else if (c == '\\')
+            {
+              escape = true;
+              continue;
+            }
+
+            // Keep track of being inside the char class brackets, escape if
+            // inversion. Note that we never inverse square brackets.
+            //
+            if (c == '[' && !cclass)
+              cclass = true;
+            else if (c == ']' && cclass)
+              cclass = false;
+            else if (inverse)
+              r += '\\';
+
+            r += c;
+          }
+
+          if (escape) // Regex is malformed but that's not our problem.
+            r += '\\';
+
+          return r;
+        }
+
+        static char_regex::flag_type
+        to_std_flags (char_flags f)
+        {
+          // Note that ECMAScript flag is implied in the absense of a grammar
+          // flag.
+          //
+          return (f & char_flags::icase) != char_flags::none
+            ? char_regex::icase
+            : char_regex::flag_type ();
+        }
+
+        char_regex::
+        char_regex (const char_string& s, char_flags f)
+            : base_type ((f & char_flags::idot) != char_flags::none
+                         ? transform (s, f)
+                         : s,
+                         to_std_flags (f))
+        {
+        }
       }
     }
   }
diff --git a/build2/test/script/regex.ixx b/build2/test/script/regex.ixx
new file mode 100644
index 0000000..4073312
--- /dev/null
+++ b/build2/test/script/regex.ixx
@@ -0,0 +1,35 @@
+// file      : build2/test/script/regex.ixx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+namespace build2
+{
+  namespace test
+  {
+    namespace script
+    {
+      namespace regex
+      {
+        inline char_flags
+        operator&= (char_flags& x, char_flags y)
+        {
+          return x = static_cast<char_flags> (
+            static_cast<uint16_t> (x) & static_cast<uint16_t> (y));
+        }
+
+        inline char_flags
+        operator|= (char_flags& x, char_flags y)
+        {
+          return x = static_cast<char_flags> (
+            static_cast<uint16_t> (x) | static_cast<uint16_t> (y));
+        }
+
+        inline char_flags
+        operator& (char_flags x, char_flags y) {return x &= y;}
+
+        inline char_flags
+        operator| (char_flags x, char_flags y) {return x |= y;}
+      }
+    }
+  }
+}
diff --git a/build2/test/script/runner b/build2/test/script/runner
index 7b932b9..56ea834 100644
--- a/build2/test/script/runner
+++ b/build2/test/script/runner
@@ -16,7 +16,7 @@ namespace build2
 {
   namespace test
   {
-    class common;
+    struct common;
 
     namespace script
     {
diff --git a/build2/test/script/runner.cxx b/build2/test/script/runner.cxx
index 522dedd..8e31cf8 100644
--- a/build2/test/script/runner.cxx
+++ b/build2/test/script/runner.cxx
@@ -5,7 +5,9 @@
 #include <build2/test/script/runner>
 
 #include <set>
-#include <ios> // streamsize
+#include <ios>     // streamsize
+#include <cstring> // strstr()
+#include <sstream>
 
 #include <butl/fdstream> // fdopen_mode, fdnull(), fddup()
 
@@ -13,11 +15,45 @@
 
 #include <build2/test/common>
 
+#include <build2/test/script/regex>
 #include <build2/test/script/builtin>
 
 using namespace std;
 using namespace butl;
 
+namespace std
+{
+  // Print regex error description but only if it is meaningful (this is also
+  // why we have to print leading colon here).
+  //
+  // Currently libstdc++ just returns the name of the exception (bug #67361).
+  // So we check that the description contains at least one space character.
+  //
+  // While VC's description is meaningful, it has an undesired prefix that
+  // resembles the following: 'regex_error(error_badrepeat): '. So we skip it.
+  //
+  static ostream&
+  operator<< (ostream& o, const regex_error& e)
+  {
+    const char* d (e.what ());
+
+#if defined(_MSC_VER) && _MSC_VER <= 1910
+    const char* rd (strstr (d, "): "));
+    if (rd != nullptr)
+      d = rd + 3;
+#endif
+
+    ostringstream os;
+    os << runtime_error (d); // Sanitize the description.
+
+    string s (os.str ());
+    if (s.find (' ') != string::npos)
+      o << ": " << s;
+
+    return o;
+  }
+}
+
 namespace build2
 {
   namespace test
@@ -99,6 +135,66 @@ namespace build2
         }
       }
 
+      // Save a string to the file. Fail if exception is thrown by underlying
+      // operations.
+      //
+      static void
+      save (const path& p, const string& s, const location& ll)
+      {
+        try
+        {
+          ofdstream os (p);
+          os << s;
+          os.close ();
+        }
+        catch (const io_error& e)
+        {
+          fail (ll) << "unable to write " << p << ": " << e;
+        }
+      }
+
+      // Transform string according to here-* redirect modifiers from the {/}
+      // set.
+      //
+      static string
+      transform (const string& s,
+                 bool regex,
+                 const string& modifiers,
+                 const script& scr)
+      {
+        if (modifiers.find ('/') == string::npos)
+          return s;
+
+        // For targets other than Windows leave the string intact.
+        //
+        if (cast<target_triplet> (scr.test_target["test.target"]).class_ !=
+            "windows")
+          return s;
+
+        // Convert forward slashes to Windows path separators (escape for
+        // regex).
+        //
+        string r;
+        for (size_t p (0);;)
+        {
+          size_t sp (s.find ('/', p));
+
+          if (sp != string::npos)
+          {
+            r.append (s, p, sp - p);
+            r.append (regex ? "\\\\" : "\\");
+            p = sp + 1;
+          }
+          else
+          {
+            r.append (s, p, sp);
+            break;
+          }
+        }
+
+        return r;
+      }
+
       // Check if the test command output matches the expected result (redirect
       // value). Noop for redirect types other than none, here_*.
       //
@@ -140,23 +236,6 @@ namespace build2
         {
           assert (!op.empty ());
 
-          // While the regex file is not used for output validation we still
-          // create it for troubleshooting.
-          //
-          path opp (op + (re ? ".regex" : ".orig"));
-
-          try
-          {
-            ofdstream os (opp);
-            sp.clean ({cleanup_type::always, opp}, true);
-            os << (re ? rd.regex.str : rd.str);
-            os.close ();
-          }
-          catch (const io_error& e)
-          {
-            fail (ll) << "unable to write " << opp << ": " << e;
-          }
-
           auto output_info = [&what, &ll] (diag_record& d,
                                            const path& p,
                                            const char* prefix = "",
@@ -168,13 +247,223 @@ namespace build2
               d << info << prefix << what << suffix << " is empty";
           };
 
-          if (re)
+          if (re) // Match the output with the regex.
           {
-            // Match the output with the line_regex. That requires to parse the
-            // output into the line_string of literals first.
+            // The overall plan is:
+            //
+            // 1. Create regex line string. While creating it's line characters
+            //    transform regex lines according to the redirect modifiers.
+            //
+            // 2. Create line regex using the line string. If creation fails
+            //    then save the (transformed) regex redirect to a file for
+            //    troubleshooting.
+            //
+            // 3. Parse the output into the literal line string.
+            //
+            // 4. Match the output line string with the line regex.
+            //
+            // 5. If match fails save the (transformed) regex redirect to a
+            //    file for troubleshooting.
             //
             using namespace regex;
 
+            // Create regex line string.
+            //
+            line_pool pool;
+            line_string rls;
+            const regex_lines rl (rd.regex);
+
+            // Parse regex flags.
+            //
+            // When add support for new flags don't forget to update
+            // parse_regex().
+            //
+            auto parse_flags = [] (const string& f) -> char_flags
+            {
+              char_flags r (char_flags::none);
+
+              for (char c: f)
+              {
+                switch (c)
+                {
+                case 'd': r |= char_flags::idot;  break;
+                case 'i': r |= char_flags::icase; break;
+                default: assert (false); // Error so should have been checked.
+                }
+              }
+
+              return r;
+            };
+
+            // Return original regex line with the transformation applied.
+            //
+            auto line = [&rl, &rd, &sp] (const regex_line& l) -> string
+            {
+              string r;
+              if (l.regex)                  // Regex (possibly empty),
+              {
+                r += rl.intro;
+                r += transform (l.value, true, rd.modifiers, *sp.root);
+                r += rl.intro;
+                r += l.flags;
+              }
+              else if (!l.special.empty ()) // Special literal.
+                r += rl.intro;
+              else                          // Textual literal.
+                r += transform (l.value, false, rd.modifiers, *sp.root);
+
+              r += l.special;
+              return r;
+            };
+
+            // Return regex line location.
+            //
+            // Note that we rely on the fact that the command and regex lines
+            // are always belong to the same testscript file.
+            //
+            auto loc = [&ll] (uint64_t line, uint64_t column) -> location
+            {
+              location r (ll);
+              r.line = line;
+              r.column = column;
+              return r;
+            };
+
+            // Save the regex to file for troubleshooting, return the file path
+            // it have been saved to.
+            //
+            // Note that we save the regex on line regex creation failure or if
+            // the program output doesn't match.
+            //
+            auto save_regex = [&op, &rl, &rd, &ll, &line] () -> path
+            {
+              path rp (op + ".regex");
+
+              // Encode here-document regex global flags if present as a file
+              // name suffix. For example if icase and idot flags are specified
+              // the name will look like:
+              //
+              // test/1/stdout.regex~di
+              //
+              if (rd.type == redirect_type::here_doc_regex &&
+                  !rl.flags.empty ())
+                rp += "~" + rl.flags;
+
+              // Note that if would be more efficient to directly write chunks
+              // to file rather than to compose a string first. Hower we don't
+              // bother (about performance) for the sake of the code as we
+              // already failed.
+              //
+              string s;
+              for (const auto& l: rl.lines)
+              {
+                if (!s.empty ()) s += '\n';
+                s += line (l);
+              }
+
+              save (rp, s, ll);
+              return rp;
+            };
+
+            // Finally create regex line string.
+            //
+            // Note that diagnostics doesn't refer to the program path as it is
+            // irrelevant to failures at this stage.
+            //
+            char_flags gf (parse_flags (rl.flags)); // Regex global flags.
+
+            for (const auto& l: rl.lines)
+            {
+              if (l.regex) // Regex (with optional special characters).
+              {
+                line_char c;
+
+                // Empty regex is a special case repesenting the blank line.
+                //
+                if (l.value.empty ())
+                  c = line_char ("", pool);
+                else
+                {
+                  try
+                  {
+                    string s (
+                      transform (l.value, true, rd.modifiers, *sp.root));
+
+                    c = line_char (
+                      char_regex (s, gf | parse_flags (l.flags)), pool);
+                  }
+                  catch (const regex_error& e)
+                  {
+                    // Print regex_error description if meaningful.
+                    //
+                    diag_record d (fail (loc (l.line, l.column)));
+
+                    if (rd.type == redirect_type::here_str_regex)
+                      d << "invalid " << what << " regex redirect" << e <<
+                        info << "regex: '" << line (l) << "'";
+                    else
+                      d << "invalid char-regex in " << what
+                        << " regex redirect" << e <<
+                        info << "regex line: '" << line (l) << "'";
+                  }
+                }
+
+                rls += c; // Append blank literal or regex line char.
+              }
+              else if (!l.special.empty ()) // Special literal.
+              {
+                // Literal can not be followed by special characters in the
+                // same line.
+                //
+                assert (l.value.empty ());
+              }
+              else // Textual literal.
+              {
+                // Append literal line char.
+                //
+                rls += line_char (
+                  transform (l.value, false, rd.modifiers, *sp.root), pool);
+              }
+
+              for (char c: l.special)
+              {
+                if (line_char::syntax (c))
+                  rls += line_char (c); // Append special line char.
+                else
+                  fail (loc (l.line, l.column))
+                    << "invalid syntax character '" << c << "' in " << what
+                    << " regex redirect" <<
+                    info << "regex line: '" << line (l) << "'";
+              }
+            }
+
+            // Create line regex.
+            //
+            line_regex regex;
+
+            try
+            {
+              regex = line_regex (move (rls), move (pool));
+            }
+            catch (const regex_error& e)
+            {
+              // Note that line regex creation can not fail for here-string
+              // redirect as it doesn't have syntax line chars. That in
+              // particular means that end_line and end_column are meaningful.
+              //
+              assert (rd.type == redirect_type::here_doc_regex);
+
+              diag_record d (fail (loc (rd.end_line, rd.end_column)));
+
+              // Print regex_error description if meaningful.
+              //
+              d << "invalid " << what << " regex redirect" << e;
+
+              output_info (d, save_regex (), "", " regex");
+            }
+
+            // Parse the output into the literal line string.
+            //
             line_string ls;
 
             try
@@ -212,7 +501,7 @@ namespace build2
                 while (!s.empty () && s.back () == '\r')
                   s.pop_back ();
 
-                ls += line_char (move (s), rd.regex.regex.pool);
+                ls += line_char (move (s), regex.pool);
               }
             }
             catch (const io_error& e)
@@ -220,7 +509,9 @@ namespace build2
               fail (ll) << "unable to read " << op << ": " << e;
             }
 
-            if (regex_match (ls, rd.regex.regex)) // Doesn't throw.
+            // Match the output with the regex.
+            //
+            if (regex_match (ls, regex)) // Doesn't throw.
               return;
 
             // Output doesn't match the regex.
@@ -229,16 +520,20 @@ namespace build2
             d << pr << " " << what << " doesn't match the regex";
 
             output_info (d, op);
-            output_info (d, opp, "", " regex");
+            output_info (d, save_regex (), "", " regex");
             input_info  (d);
 
             // Fall through.
             //
           }
-          else
+          else // Compare the output with the expected result.
           {
-            // Use diff utility to compare the output with the expected result.
+            // Use diff utility for the comparison.
             //
+            path eop (op + ".orig");
+            save (eop, transform (rd.str, false, rd.modifiers, *sp.root), ll);
+            sp.clean ({cleanup_type::always, eop}, true);
+
             path dp ("diff");
             process_path pp (run_search (dp, true));
 
@@ -246,7 +541,7 @@ namespace build2
               pp.recall_string (),
                 "--strip-trailing-cr", // Is essential for cross-testing.
                 "-u",
-                opp.string ().c_str (),
+                eop.string ().c_str (),
                 op.string ().c_str (),
                 nullptr};
 
@@ -288,7 +583,7 @@ namespace build2
               d << pr << " " << what << " doesn't match the expected output";
 
               output_info (d, op);
-              output_info (d, opp, "expected ");
+              output_info (d, eop, "expected ");
               output_info (d, ep, "", " diff");
               input_info  (d);
 
@@ -589,17 +884,9 @@ namespace build2
             //
             isp = std_path ("stdin");
 
-            try
-            {
-              ofdstream os (isp);
-              sp.clean ({cleanup_type::always, isp}, true);
-              os << c.in.str;
-              os.close ();
-            }
-            catch (const io_error& e)
-            {
-              fail (ll) << "unable to write " << isp << ": " << e;
-            }
+            const redirect& r (c.in);
+            save (isp, transform (r.str, false, r.modifiers, *sp.root), ll);
+            sp.clean ({cleanup_type::always, isp}, true);
 
             open_stdin ();
             break;
@@ -767,12 +1054,7 @@ namespace build2
         {
           // Execute the process.
           //
-          // Pre-search the program path so it is reflected in the failure
-          // diagnostics. The user can see the original path running the test
-          // operation with the verbosity level > 2.
-          //
-          process_path pp (run_search (c.program, true));
-          cstrings args {pp.recall_string ()};
+          cstrings args {c.program.string ().c_str ()};
 
           for (const auto& a: c.arguments)
             args.push_back (a.c_str ());
@@ -781,6 +1063,8 @@ namespace build2
 
           try
           {
+            process_path pp (process::path_search (args[0]));
+
             if (verb >= 2)
               print_process (args);
 
@@ -798,7 +1082,7 @@ namespace build2
           }
           catch (const process_error& e)
           {
-            error (ll) << "unable to execute " << pp << ": " << e;
+            error (ll) << "unable to execute " << args[0] << ": " << e;
 
             if (e.child ())
               std::exit (1);
diff --git a/build2/test/script/script b/build2/test/script/script
index 0144af7..bb9b074 100644
--- a/build2/test/script/script
+++ b/build2/test/script/script
@@ -15,7 +15,6 @@
 #include <build2/test/target>
 
 #include <build2/test/script/token> // replay_tokens
-#include <build2/test/script/regex>
 
 namespace build2
 {
@@ -78,16 +77,64 @@ namespace build2
         file
       };
 
+      // Pre-parsed (but not instantiated) regex lines. The idea here is that
+      // we should be able to re-create their (more or less) exact text
+      // representation for diagnostics but also instantiate without any
+      // re-parsing.
+      //
+      struct regex_line
+      {
+        // If regex is true, then value is the regex expression. Otherwise, it
+        // is a literal. Note that special characters can be present in both
+        // cases. For example, //+ is a regex, while /+ is a literal, both
+        // with '+' as a special character. Flags are only valid for regex.
+        // Literals falls apart into textual (has no special characters) and
+        // special (has just special characters instead) ones. For example
+        // foo is a textual literal, while /.+ is a special one. Note that
+        // literal must not have value and special both non-empty.
+        //
+        bool regex;
+
+        string value;
+        string flags;
+        string special;
+
+        uint64_t line;
+        uint64_t column;
+
+        // Create regex with optional special characters.
+        //
+        regex_line (uint64_t l, uint64_t c,
+                    string v, string f, string s = string ())
+            : regex (true),
+              value (move (v)),
+              flags (move (f)),
+              special (move (s)),
+              line (l),
+              column (c) {}
+
+        // Create a literal, either text or special.
+        //
+        regex_line (uint64_t l, uint64_t c, string v, bool s)
+            : regex (false),
+              value (s ? string () : move (v)),
+              special (s ? move (v) : string ()),
+              line (l),
+              column (c) {}
+      };
+
+      struct regex_lines
+      {
+        char intro;   // Introducer character.
+        string flags; // Global flags (here-document).
+
+        small_vector<regex_line, 8> lines;
+      };
+
       struct redirect
       {
         redirect_type type;
 
-        struct regex_type
-        {
-          regex::line_regex regex;
-          string str;              // String representation for printing.
-        };
-
         struct file_type
         {
           using path_type = build2::path;
@@ -97,13 +144,16 @@ namespace build2
 
         union
         {
-          int        fd;    // Merge-to descriptor.
-          string     str;   // Note: includes trailing newline, if requested.
-          regex_type regex; // Note: includes trailing blank, if requested.
-          file_type  file;
+          int         fd;    // Merge-to descriptor.
+          string      str;   // Note: includes trailing newline, if requested.
+          regex_lines regex; // Note: includes trailing blank, if requested.
+          file_type   file;
         };
 
-        string end; // Here-document end marker for printing.
+        string modifiers;   // Redirect modifiers.
+        string end;         // Here-document end marker (no regex intro/flags).
+        uint64_t end_line;  // Here-document end marker location.
+        uint64_t end_column;
 
         explicit
         redirect (redirect_type = redirect_type::none);
diff --git a/build2/test/script/script.cxx b/build2/test/script/script.cxx
index 2a34f66..6f56661 100644
--- a/build2/test/script/script.cxx
+++ b/build2/test/script/script.cxx
@@ -85,44 +85,65 @@ namespace build2
           case redirect_type::merge: o << '&' << r.fd; break;
 
           case redirect_type::here_str_literal:
-          case redirect_type::here_str_regex:
+          case redirect_type::here_doc_literal:
             {
-              bool re (r.type == redirect_type::here_str_regex);
-              const string& v (re ? r.regex.str : r.str);
-              bool nl (!v.empty () && v.back () == '\n');
+              bool doc (r.type == redirect_type::here_doc_literal);
 
-              if (!nl)
-                o << ':';
+              // For here-document add another '>' or '<'. Note that here end
+              // marker never needs to be quoted.
+              //
+              if (doc)
+                o << d;
 
-              if (re)
-                o << '~';
+              o << r.modifiers;
+
+              if (doc)
+                o << r.end;
+              else
+              {
+                const string& v (r.str);
+                to_stream_q (o,
+                             r.modifiers.find (':') == string::npos
+                             ? string (v, 0, v.size () - 1) // Strip newline.
+                             : v);
+              }
 
-              to_stream_q (o, nl ? string (v, 0, v.size () - 1) : v);
               break;
             }
-          case redirect_type::here_doc_literal:
+
+          case redirect_type::here_str_regex:
           case redirect_type::here_doc_regex:
             {
-              bool re (r.type == redirect_type::here_doc_regex);
-              const string& v (re ? r.regex.str : r.str);
-              bool nl (!v.empty () && v.back () == '\n');
+              bool doc (r.type == redirect_type::here_doc_regex);
 
-              // Add another '>' or '<'. Note that here end marker never
-              // needs to be quoted.
+              // For here-document add another '>' or '<'. Note that here end
+              // marker never needs to be quoted.
               //
-              o << d << (nl ? "" : ":");
+              if (doc)
+                o << d;
+
+              o << r.modifiers;
+
+              const regex_lines& re (r.regex);
+
+              if (doc)
+                o << re.intro + r.end + re.intro + re.flags;
+              else
+              {
+                assert (!re.lines.empty ()); // Regex can't be empty.
 
-              if (re)
-                o << '~';
+                regex_line l (re.lines[0]);
+                to_stream_q (o, re.intro + l.value + re.intro + l.flags);
+              }
 
-              to_stream_q (o, r.end);
               break;
             }
+
           case redirect_type::file:
             {
               // Add '>>' or '<<' (and so make it '<<<' or '>>>').
               //
-              o << d << d << (r.file.append ? "&" : "");
+              o << d << d << r.modifiers;
               print_path (r.file.path);
               break;
             }
@@ -131,16 +152,36 @@ namespace build2
 
         auto print_doc = [&o] (const redirect& r)
         {
-          bool re (r.type == redirect_type::here_doc_regex);
-          const string& v (re ? r.regex.str : r.str);
-          bool nl (!v.empty () && v.back () == '\n');
+          o << endl;
 
-          // For the regex here-document the end marker contains introducer and
-          // flags characters, so need to remove them.
-          //
-          const string& e (r.end);
-          o << endl << v << (nl ? "" : "\n")
-            << (re ? string (e, 1, e.find (e[0], 1) - 1) : e);
+          if (r.type == redirect_type::here_doc_literal)
+            o << r.str;
+          else
+          {
+            assert (r.type == redirect_type::here_doc_regex);
+
+            const regex_lines& rl (r.regex);
+
+            for (auto b (rl.lines.cbegin ()), i (b), e (rl.lines.cend ());
+                 i != e; ++i)
+            {
+              if (i != b)
+                o << endl;
+
+              const regex_line& l (*i);
+
+              if (l.regex)                  // Regex (possibly empty),
+                o << rl.intro << l.value << rl.intro << l.flags;
+              else if (!l.special.empty ()) // Special literal.
+                o << rl.intro;
+              else                          // Textual literal.
+                o << l.value;
+
+              o << l.special;
+            }
+          }
+
+          o << (r.modifiers.find (':') == string::npos ? "" : "\n") << r.end;
         };
 
         if ((m & command_to_stream::header) == command_to_stream::header)
@@ -268,7 +309,11 @@ namespace build2
         case redirect_type::here_doc_literal: new (&str) string (); break;
 
         case redirect_type::here_str_regex:
-        case redirect_type::here_doc_regex: new (&regex) regex_type (); break;
+        case redirect_type::here_doc_regex:
+          {
+            new (&regex) regex_lines ();
+            break;
+          }
 
         case redirect_type::file: new (&file) file_type (); break;
         }
@@ -276,7 +321,11 @@ namespace build2
 
       redirect::
       redirect (redirect&& r)
-          : type (r.type), end (move (r.end))
+          : type (r.type),
+            modifiers (move (r.modifiers)),
+            end (move (r.end)),
+            end_line (r.end_line),
+            end_column (r.end_column)
       {
         switch (type)
         {
@@ -295,7 +344,7 @@ namespace build2
         case redirect_type::here_str_regex:
         case redirect_type::here_doc_regex:
           {
-            new (&regex) regex_type (move (r.regex));
+            new (&regex) regex_lines (move (r.regex));
             break;
           }
         case redirect_type::file:
@@ -320,7 +369,7 @@ namespace build2
         case redirect_type::here_doc_literal: str.~string (); break;
 
         case redirect_type::here_str_regex:
-        case redirect_type::here_doc_regex: regex.~regex_type (); break;
+        case redirect_type::here_doc_regex: regex.~regex_lines (); break;
 
         case redirect_type::file: file.~file_type (); break;
         }
-- 
cgit v1.1