Add support for regex in runner

author: Karen Arutyunov <karen@codesynthesis.com> 2016-12-17 23:28:30 +0300
committer: Karen Arutyunov <karen@codesynthesis.com> 2017-01-05 15:30:41 +0300
commit: 3ecbf5d51b13e11a93ae5757408a27c21d804c9f (patch)
tree: be46e3caa24574de106c2fbf1a05c43d32694e12 /build2/test
parent: a63e1809afd9a837821d6e8376cb14a36e7fc26e (diff)
6 files changed, 659 insertions, 141 deletions
diff --git a/build2/test/script/parser b/build2/test/script/parser
index ee270d8..5fefe48 100644
--- a/build2/test/script/parser
+++ b/build2/test/script/parser
@@ -12,6 +12,7 @@
 #include <build2/diagnostics>
 
 #include <build2/test/script/token>
+#include <build2/test/script/regex>
 #include <build2/test/script/script>
 
 namespace build2
@@ -97,12 +98,20 @@ namespace build2
         //
         struct here_doc
         {
-          size_t expr;  // Index in command_expr.
-          size_t pipe;  // Index in command_pipe.
-          int fd;       // Redirect fd (0 - in, 1 - out, 2 - err).
+          size_t expr;      // Index in command_expr.
+          size_t pipe;      // Index in command_pipe.
+          int fd;           // Redirect fd (0 - in, 1 - out, 2 - err).
           string end;
-          bool literal;    // Literal (single-quote).
+          bool literal;     // Literal (single-quote).
           string modifiers;
+
+          // Regex introducer ('\0' if not a regex, so can be used as bool).
+          //
+          char regex;
+
+          // Regex global flags. Meaningful if regex != '\0'.
+          //
+          regex::char_flags regex_flags;
         };
         using here_docs = vector<here_doc>;
 
@@ -115,10 +124,13 @@ namespace build2
         void
         parse_here_documents (token&, token_type&,
                               pair<command_expr, here_docs>&);
-        string
+
+        pair<string, regex::line_regex>
         parse_here_document (token&, token_type&,
                              const string&,
-                             const string&);
+                             const string& mode,
+                             char regex_introducer, // '\0' if not a regex.
+                             regex::char_flags);
 
         // Execute. Issue diagnostics and throw failed in case of an error.
         //
diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx
index dd5c5c7..9af85b1 100644
--- a/build2/test/script/parser.cxx
+++ b/build2/test/script/parser.cxx
@@ -2,6 +2,9 @@
 // copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
 // license   : MIT; see accompanying LICENSE file
 
+#include <sstream>
+#include <cstring> // strstr(), strchr()
+
 #include <build2/test/script/parser>
 
 #include <build2/scheduler>
@@ -11,6 +14,35 @@
 
 using namespace std;
 
+namespace std
+{
+  // Print regex error description but only if it is meaningful (this is also
+  // why we have to print leading colon here).
+  //
+  // Currently libstdc++ just returns the name of the exception (bug #67361).
+  // So we check that the description contains at least one space character.
+  //
+  // While VC's description is meaningful, it has an undesired prefix that
+  // resembles the following: 'regex_error(error_badrepeat): '. So we skip it.
+  //
+  static ostream&
+  operator<< (ostream& os, const regex_error& e)
+  {
+    const char* d (e.what ());
+    if (strchr (d, ' ') != nullptr)
+    {
+#if defined(_MSC_VER) && _MSC_VER <= 1910
+      const char* s (strstr (d, "): "));
+      if (s != nullptr)
+        d = s + 3;
+#endif
+      os << ": " << d;
+    }
+
+    return os;
+  }
+}
+
 namespace build2
 {
   namespace test
@@ -1277,8 +1309,69 @@ namespace build2
         assert (tt == type::newline);
 
         return move (p.first);
+      }
+
+      // Parse the regular expression representation (non-empty string value
+      // framed with introducer characters and optionally followed by flag
+      // characters from the {i} set, for example '/foo/i') into
+      // components. Also return end-of-parsing position if requested,
+      // otherwise treat any unparsed characters left as an error.
+      //
+      struct regex_parts
+      {
+        string value;
+        char introducer;
+        regex::char_flags flags; // {icase}
+
+        // Create a special empty object.
+        //
+        regex_parts ()
+            : introducer ('\0'), flags (regex::char_flags()) {}
+
+        regex_parts (string v, char i, regex::char_flags f)
+            : value (move (v)), introducer (i), flags (f) {}
       };
 
+      static regex_parts
+      parse_regex (const string& s,
+                   const location& l,
+                   const char* what,
+                   size_t* end = nullptr)
+      {
+        if (s.empty ())
+          fail (l) << "no introducer character in " << what;
+
+        size_t p (s.find (s[0], 1)); // Find terminating introducer.
+
+        if (p == string::npos)
+          fail (l) << "no closing introducer character in " << what;
+
+        size_t rn (p - 1); // Regex length.
+        if (rn == 0)
+          fail (l) << what << " is empty";
+
+        bool icase (s[++p] == 'i'); // Note: s[++p] can be '\0' (no flags).
+
+        if (icase)
+          ++p;
+
+        // If string end is not reached then report invalid flags, unless
+        // end-of-parsing position is requested (which means regex is just a
+        // prefix).
+        //
+        if (s[p] != '\0' && end == nullptr)
+          fail (l) << "junk at the end of " << what;
+
+        if (end != nullptr)
+          *end = p;
+
+        return regex_parts (string (s, 1, rn),
+                            s[0],
+                            icase
+                            ? regex::char_regex::icase
+                            : regex::char_flags ());
+      }
+
       pair<command_expr, parser::here_docs> parser::
       parse_command_expr (token& t, type& tt)
       {
@@ -1310,11 +1403,15 @@ namespace build2
           in_file,
           out_merge,
           out_string,
+          out_str_regex,
           out_document,
+          out_doc_regex,
           out_file,
           err_merge,
           err_string,
+          err_str_regex,
           err_document,
+          err_doc_regex,
           err_file,
           clean
         };
@@ -1351,6 +1448,50 @@ namespace build2
             r.str = move (w);
           };
 
+          auto add_here_str_regex = [&l, &mod, this] (
+            redirect& r, int fd, string&& w)
+          {
+            using namespace regex;
+
+            const char* what (nullptr);
+            switch (fd)
+            {
+            case 1: what = "stdout regex redirect"; break;
+            case 2: what = "stderr regex redirect"; break;
+            }
+
+            line_pool pool;
+            line_string s;
+
+            try
+            {
+              regex_parts re (parse_regex (w, l, what));
+              s += line_char (char_regex (re.value,
+                                          char_regex::ECMAScript | re.flags),
+                              pool);
+            }
+            catch (const regex_error& e)
+            {
+              // Print regex_error description if meaningful.
+              //
+              fail (l) << "invalid " << what << e <<
+                info << "regex: " << w;
+            }
+
+            if (mod.find (':') == string::npos)
+            {
+              w += '\n';
+              s += line_char ("", pool);
+            }
+
+            r.regex.str = move (w);
+
+            // No special line-chars, so no way to try to create a malformed
+            // expression, and so can't throw.
+            //
+            r.regex.regex = line_regex (move (s), move (pool));
+          };
+
           auto parse_path = [&l, this] (string&& w, const char* what) -> path
           {
             try
@@ -1399,11 +1540,24 @@ namespace build2
           case pending::out_string: add_here_str (c.out, move (w)); break;
           case pending::err_string: add_here_str (c.err, move (w)); break;
 
+          case pending::out_str_regex:
+            {
+              add_here_str_regex (c.out, 1, move (w));
+              break;
+            }
+          case pending::err_str_regex:
+            {
+              add_here_str_regex (c.err, 2, move (w));
+              break;
+            }
+
             // These are handled specially below.
             //
           case pending::in_document:
           case pending::out_document:
-          case pending::err_document: assert (false); break;
+          case pending::err_document:
+          case pending::out_doc_regex:
+          case pending::err_doc_regex: assert (false); break;
 
           case pending::in_file:  add_file (c.in,  0, move (w)); break;
           case pending::out_file: add_file (c.out, 1, move (w)); break;
@@ -1451,6 +1605,27 @@ namespace build2
           case pending::err_document: what = "stderr here-document end"; break;
           case pending::err_file:     what = "stderr file";              break;
           case pending::clean:        what = "cleanup path";             break;
+
+          case pending::out_str_regex:
+            {
+              what = "stdout here-string regex";
+              break;
+            }
+          case pending::err_str_regex:
+            {
+              what = "stderr here-string regex";
+              break;
+            }
+          case pending::out_doc_regex:
+            {
+              what = "stdout here-document regex end";
+              break;
+            }
+          case pending::err_doc_regex:
+            {
+              what = "stderr here-document regex end";
+              break;
+            }
           }
 
           if (what != nullptr)
@@ -1523,25 +1698,47 @@ namespace build2
             }
           }
 
+          mod = move (t.value);
+
           redirect_type rt (redirect_type::none);
           switch (tt)
           {
           case type::in_pass:
-          case type::out_pass:     rt = redirect_type::pass;  break;
+          case type::out_pass:  rt = redirect_type::pass;  break;
 
           case type::in_null:
-          case type::out_null:     rt = redirect_type::null;  break;
+          case type::out_null:  rt = redirect_type::null;  break;
 
-          case type::out_merge:    rt = redirect_type::merge; break;
+          case type::out_merge: rt = redirect_type::merge; break;
 
           case type::in_str:
-          case type::out_str:      rt = redirect_type::here_str_literal; break;
+          case type::out_str:
+            {
+              bool re (mod.find ('~') != string::npos);
+              assert (tt == type::out_str || !re);
+
+              rt = re
+                ? redirect_type::here_str_regex
+                : redirect_type::here_str_literal;
+
+              break;
+            }
 
           case type::in_doc:
-          case type::out_doc:      rt = redirect_type::here_doc_literal; break;
+          case type::out_doc:
+            {
+              bool re (mod.find ('~') != string::npos);
+              assert (tt == type::out_doc || !re);
+
+              rt = re
+                ? redirect_type::here_doc_regex
+                : redirect_type::here_doc_literal;
+
+              break;
+            }
 
           case type::in_file:
-          case type::out_file:     rt = redirect_type::file; break;
+          case type::out_file: rt = redirect_type::file; break;
           }
 
           redirect& r (fd == 0 ? c.in : fd == 1 ? c.out : c.err);
@@ -1569,6 +1766,14 @@ namespace build2
             case 2: p = pending::err_string; break;
             }
             break;
+          case redirect_type::here_str_regex:
+            switch (fd)
+            {
+            case 0: assert (false);             break;
+            case 1: p = pending::out_str_regex; break;
+            case 2: p = pending::err_str_regex; break;
+            }
+            break;
           case redirect_type::here_doc_literal:
             switch (fd)
             {
@@ -1577,10 +1782,14 @@ namespace build2
             case 2: p = pending::err_document; break;
             }
             break;
-
-          case redirect_type::here_str_regex: // @@ REGEX
-          case redirect_type::here_doc_regex: assert (false); break;
-
+          case redirect_type::here_doc_regex:
+            switch (fd)
+            {
+            case 0: assert (false);             break;
+            case 1: p = pending::out_doc_regex; break;
+            case 2: p = pending::err_doc_regex; break;
+            }
+            break;
           case redirect_type::file:
             switch (fd)
             {
@@ -1590,8 +1799,6 @@ namespace build2
             }
             break;
           }
-
-          mod = move (t.value);
         };
 
         // Set pending cleanup type.
@@ -1674,9 +1881,9 @@ namespace build2
             {
               if (pre_parse_)
               {
-                // The only thing we need to handle here are the here-document
-                // end markers since we need to know how many of them to pre-
-                // parse after the command.
+                // The only things we need to handle here are the here-document
+                // and here-document regex end markers since we need to know
+                // how many of them to pre-parse after the command.
                 //
                 switch (tt)
                 {
@@ -1684,6 +1891,11 @@ namespace build2
                 case type::out_doc:
                   mod = move (t.value);
 
+                  bool re (mod.find ('~') != string::npos);
+                  const char* what (re
+                                    ? "here-document regex end marker"
+                                    : "here-document end marker");
+
                   // We require the end marker to be a literal, unquoted word.
                   // In particularm, we don't allow quoted because of cases
                   // like foo"$bar" (where we will see word 'foo').
@@ -1700,8 +1912,8 @@ namespace build2
                   // would be >>FOO$bar -- on reparse it will be expanded
                   // as a single word.
                   //
-                  if (tt != type::word)
-                    fail (t) << "expected here-document end marker";
+                  if (tt != type::word || t.value.empty ())
+                    fail (t) << "expected " << what;
 
                   peek ();
                   const token& p (peeked ());
@@ -1711,7 +1923,7 @@ namespace build2
                     {
                     case type::dollar:
                     case type::lparen:
-                      fail (p) << "here-document end marker must be literal";
+                      fail (p) << what << " must be literal";
                     }
                   }
 
@@ -1727,15 +1939,25 @@ namespace build2
                       break;
                     // Fall through.
                   case quote_type::mixed:
-                    fail (t) << "partially-quoted here-document end marker";
+                    fail (t) << "partially-quoted " << what;
+                  }
+
+                  regex_parts r;
+                  string end (move (t.value));
+
+                  if (re)
+                  {
+                    r = parse_regex (end, l, what);
+                    end = move (r.value); // The "cleared" end marker.
                   }
 
                   hd.push_back (
                     here_doc {
                       0, 0, 0,
-                      move (t.value),
+                      move (end),
                       qt == quote_type::single,
-                      move (mod)});
+                      move (mod),
+                      r.introducer, r.flags});
                   break;
                 }
 
@@ -1817,23 +2039,40 @@ namespace build2
                 int fd;
                 switch (p)
                 {
-                case pending::in_document:  fd =  0; break;
-                case pending::out_document: fd =  1; break;
-                case pending::err_document: fd =  2; break;
-                default:                    fd = -1; break;
+                case pending::in_document:   fd =  0; break;
+                case pending::out_document:
+                case pending::out_doc_regex: fd =  1; break;
+                case pending::err_document:
+                case pending::err_doc_regex: fd =  2; break;
+                default:                     fd = -1; break;
                 }
 
                 if (fd != -1)
                 {
+                  string end (move (t.value));
+                  regex_parts r;
+
+                  if (p == pending::out_doc_regex ||
+                      p == pending::err_doc_regex)
+                  {
+                    // We can't fail here as we already parsed all the end
+                    // markers during pre-parsing stage, and so no need in the
+                    // description.
+                    //
+                    r = parse_regex (end, l, "");
+                    end = move (r.value); // The "cleared" end marker.
+                  }
+
                   hd.push_back (
                     here_doc {
                       expr.size () - 1,
                       expr.back ().pipe.size (),
                       fd,
-                      move (t.value),
+                      move (end),
                       (t.qtype == quote_type::unquoted ||
                        t.qtype == quote_type::single),
-                      move (mod)});
+                      move (mod),
+                      r.introducer, r.flags});
 
                   p = pending::none;
                   mod.clear ();
@@ -2130,30 +2369,54 @@ namespace build2
                 : lexer_mode::here_line_double);
           next (t, tt);
 
-          string v (parse_here_document (t, tt, h.end, h.modifiers));
+          pair<string, regex::line_regex> v (
+            parse_here_document (
+              t, tt, h.end, h.modifiers, h.regex, h.regex_flags));
 
           if (!pre_parse_)
           {
             command& c (p.first[h.expr].pipe[h.pipe]);
             redirect& r (h.fd == 0 ? c.in : h.fd == 1 ? c.out : c.err);
 
-            r.str = move (v);
-            r.end = move (h.end);
+            if (h.regex)
+            {
+              r.regex.str   = move (v.first);
+              r.regex.regex = move (v.second);
+
+              // Restore the original end marker.
+              //
+              r.end = h.regex + h.end + h.regex;
+              if ((h.regex_flags & regex::char_regex::icase) != 0)
+                r.end += 'i';
+            }
+            else
+            {
+              r.str = move (v.first);
+              r.end = move (h.end);
+            }
           }
 
           expire_mode ();
         }
       }
 
-      string parser::
+      pair<string, regex::line_regex> parser::
       parse_here_document (token& t, type& tt,
                            const string& em,
-                           const string& mod)
+                           const string& mod,
+                           char re,
+                           regex::char_flags refl)
       {
         // enter: first token on first line
         // leave: newline (after end marker)
 
-        string r;
+        using namespace regex;
+
+        string rs; // String or regex literal.
+
+        line_pool pool;
+        line_string ls;
+        line_regex rre;
 
         // Here-documents can be indented. The leading whitespaces of the end
         // marker line (called strip prefix) determine the indentation. Every
@@ -2173,8 +2436,17 @@ namespace build2
         //
         size_t ri (pre_parse_ ? replay_data_.size () - 1 : 0);
 
+        // We will use the location of the first token on the line for the
+        // regex diagnostics. At the end of the loop it will point to the
+        // beginning of the end marker which we use for diagnostics of the
+        // line_regex object creation.
+        //
+        location l;
+
         while (tt != type::eos)
         {
+          l = get_location (t);
+
           // Check if this is the end marker. For starters, it should be a
           // single, unquoted word followed by a newline.
           //
@@ -2216,31 +2488,125 @@ namespace build2
 
           if (!pre_parse_)
           {
-            if (!r.empty ()) // Add newline after previous line.
-              r += '\n';
-
             // What shall we do if the expansion results in multiple names?
             // For, example if the line contains just the variable expansion
             // and it is of type strings. Adding all the elements space-
             // separated seems like the natural thing to do.
             //
+            string s;
             for (auto b (ns.begin ()), i (b); i != ns.end (); ++i)
             {
-              string s;
+              string n;
 
               try
               {
-                s = value_traits<string>::convert (move (*i), nullptr);
+                n = value_traits<string>::convert (move (*i), nullptr);
               }
               catch (const invalid_argument&)
               {
-                fail (t) << "invalid string value '" << *i << "'";
+                fail (l) << "invalid string value '" << *i << "'";
+              }
+
+              if (i == b)
+                s = move (n);
+              else
+              {
+                s += ' ';
+                s += n;
               }
+            }
+
+            // Add newline after previous line.
+            //
+            if (!rs.empty ())
+              rs += '\n';
+
+            rs += s;
+
+            if (re)
+            {
+              if (s[0] == re) // Line starts with the regex introducer.
+              {
+                size_t n (s.size ());
+
+                // Handle the empty line-regex characters.
+                //
+                if (n == 1)
+                  fail (l) << "regex introducer without regex" <<
+                    info << "consider changing regex introducer '" << re
+                           << "' in here-document end marker";
+
+                // This is a char-regex, or a sequence of line-regex syntax
+                // characters or both (in this specific order). So we will add
+                // the char-regex first (if present), and then sequentially
+                // add the line-regex syntax characters (if present).
+                //
+                size_t p (s.find (re, 1));
+                if (p == string::npos)
+                {
+                  // No char-regex, just a sequence of line-regex syntax
+                  // characters. Prepare to parse them starting from the
+                  // position right after the introducer.
+                  //
+                  p = 1;
+                }
+                else
+                {
+                  // Add regex line-char, and then position to the end of the
+                  // regex (that includes terminating introducer and the
+                  // optional flags). This is the first line-regex syntax
+                  // character position (if present).
+                  //
+                  line_char c;
+
+                  // Empty regex is a special case repesenting the blank line.
+                  //
+                  if (p == 1)
+                  {
+                    c = line_char ("", pool);
+                    ++p;
+                  }
+                  else
+                  {
+                    // Can't fail as all the pre-conditions verified (non-empty
+                    // with both introducers in place), so no description
+                    // required.
+                    //
+                    regex_parts re (parse_regex (s, l, "", &p));
 
-              if (i != b)
-                r += ' ';
+                    try
+                    {
+                      c = line_char (
+                        char_regex (re.value,
+                                    char_regex::ECMAScript | re.flags | refl),
+                        pool);
+                    }
+                    catch (const regex_error& e)
+                    {
+                      // Print regex_error description if meaningful.
+                      //
+                      fail (l) << "invalid regex" << e;
+                    }
+                  }
+
+                  ls += c;
+                }
 
-              r += s;
+                while (p != n)
+                {
+                  char c (s[p++]);
+                  if (line_char::syntax (c))
+                    ls += line_char (c);
+                  else
+                    fail (l) << "invalid line-regex syntax character '" << c
+                             << "'";
+                }
+              }
+              else
+                // Line doesn't start with regex introducer. Add line-char
+                // literal (handles blank lines as well).
+                //
+                ls += line_char (move (s), pool);
             }
           }
 
@@ -2301,10 +2667,36 @@ namespace build2
           // Add final newline unless suppressed.
           //
           if (mod.find (':') == string::npos)
-            r += '\n';
+          {
+            rs += '\n';
+
+            if (re)
+              ls += line_char ("", pool);
+          }
+
+          // Parse line-regex.
+          //
+          if (re)
+          {
+            // Empty regex matches nothing, so not of much use.
+            //
+            if (ls.empty ())
+              fail (l) << "empty here-document regex";
+
+            try
+            {
+              rre = line_regex (move (ls), move (pool));
+            }
+            catch (const regex_error& e)
+            {
+              // Print regex_error description if meaningful.
+              //
+              fail (l) << "invalid here-document regex" << e;
+            }
+          }
         }
 
-        return r;
+        return make_pair (move (rs), move (rre));
       }
 
       //
diff --git a/build2/test/script/regex b/build2/test/script/regex
index cfc6031..ae31f59 100644
--- a/build2/test/script/regex
+++ b/build2/test/script/regex
@@ -24,6 +24,7 @@ namespace build2
       {
         using char_string = std::basic_string<char>;
         using char_regex = std::basic_regex<char>;
+        using char_flags = char_regex::flag_type;
 
         // Newlines are line separators and are not part of the line:
         //
@@ -50,9 +51,9 @@ namespace build2
         enum class line_type
         {
           special,
-            literal,
-            regex
-            };
+          literal,
+          regex
+        };
 
         struct line_char
         {
@@ -127,6 +128,11 @@ namespace build2
             return type == line_type::special ? special : '\a'; // BELL.
           }
 
+          // Return true if the character is a syntax (special) one.
+          //
+          static bool
+          syntax (char);
+
           // Provide basic_regex (such as from msvcrt) with the ability to
           // explicitly cast line_chars to implementation-specific enums.
           //
@@ -553,10 +559,10 @@ namespace std
 
   // When used with libc++ the linker complains that it can't find
   // __match_any_but_newline<line_char>::__exec() function. The problem is
-  // that the function is only specialized for char and wchar_t. As line_char
-  // has no notion of the newline character we specialize the class template
-  // to behave as the __match_any<line_char> instantiation does (that luckily
-  // has all the functions in place).
+  // that the function is only specialized for char and wchar_t
+  // (LLVM bug #31409). As line_char has no notion of the newline character we
+  // specialize the class template to behave as the __match_any<line_char>
+  // instantiation does (that luckily has all the functions in place).
   //
 #if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION <= 4000
   template <>
@@ -587,11 +593,6 @@ namespace build2
 
           line_regex () = default;
 
-          // Move constuctible-only type.
-          //
-          line_regex (line_regex&&) = default;
-          line_regex (const line_regex&) = delete;
-
           // Move string regex together with the pool used to create it.
           //
           line_regex (line_string&& s, line_pool&& p)
@@ -599,11 +600,18 @@ namespace build2
               //
               : base_type (s), pool (move (p)) {s.clear ();}
 
-          line_regex& operator= (line_regex&&) = delete;
+          // Move constuctible/assignable-only type.
+          //
+          line_regex (line_regex&&) = default;
+          line_regex (const line_regex&) = delete;
+          line_regex& operator= (line_regex&&) = default;
           line_regex& operator= (const line_regex&) = delete;
 
         public:
-          line_pool pool;
+          // Mutable since input line_char literals must go into the same
+          // pool (and thus is MT-unsafe).
+          //
+          mutable line_pool pool;
         };
       }
     }
diff --git a/build2/test/script/regex.cxx b/build2/test/script/regex.cxx
index c6fba75..bd811e4 100644
--- a/build2/test/script/regex.cxx
+++ b/build2/test/script/regex.cxx
@@ -28,7 +28,6 @@ namespace build2
           // @@ How can we allow anything for basic_regex but only subset
           //    for our own code?
           //
-          const char sp[] = "()|.*+?{\\}0123456789,=!";
           const char ex[] = "pn\n\r";
 
           assert (c == 0  || // Null character.
@@ -45,7 +44,7 @@ namespace build2
                   (c > 0 && c <= 255 && (
                     // Supported regex special characters.
                     //
-                    string::traits_type::find (sp, 23, c) != nullptr ||
+                    syntax (c) ||
 
                     // libstdc++ look-ahead tokens, newline chars.
                     //
@@ -73,6 +72,13 @@ namespace build2
         }
 
         bool
+        line_char::syntax (char c)
+        {
+          return string::traits_type::find (
+            "()|.*+?{}\\0123456789,=!", 23, c) != nullptr;
+        }
+
+        bool
         operator== (const line_char& l, const line_char& r)
         {
           if (l.type == r.type)
@@ -157,7 +163,7 @@ namespace std
     if (n > 0 && d != s)
     {
       // If d < s then it can't be in [s, s + n) range and so using copy() is
-      // safe. Otherwise d + n is out of (first, last] range and so using
+      // safe. Otherwise d + n is out of (s, s + n] range and so using
       // copy_backward() is safe.
       //
       if (d < s)
diff --git a/build2/test/script/runner.cxx b/build2/test/script/runner.cxx
index 05b3b5c..08d358c 100644
--- a/build2/test/script/runner.cxx
+++ b/build2/test/script/runner.cxx
@@ -46,8 +46,7 @@ namespace build2
       }
 
       // Check if the test command output matches the expected result (redirect
-      // value). Noop for redirect types other than none, here_string,
-      // here_document.
+      // value). Noop for redirect types other than none, here_*.
       //
       static void
       check_output (const path& pr,
@@ -64,6 +63,7 @@ namespace build2
             d << info << "stdin: " << ip;
         };
 
+        bool re;
         if (rd.type == redirect_type::none)
         {
           assert (!op.empty ());
@@ -79,96 +79,173 @@ namespace build2
             input_info (d);
           }
         }
-        else if (rd.type == redirect_type::here_str_literal ||
+        else if ((re = (rd.type == redirect_type::here_str_regex ||
+                        rd.type == redirect_type::here_doc_regex)) ||
+                 rd.type == redirect_type::here_str_literal ||
                  rd.type == redirect_type::here_doc_literal)
         {
           assert (!op.empty ());
 
-          path orp (op + ".orig");
+          // While the regex file is not used for output validation we still
+          // create it for troubleshooting.
+          //
+          path opp (op + (re ? ".regex" : ".orig"));
 
           try
           {
-            ofdstream os (orp);
-            sp.clean ({cleanup_type::always, orp}, true);
-            os << rd.str;
+            ofdstream os (opp);
+            sp.clean ({cleanup_type::always, opp}, true);
+            os << (re ? rd.regex.str : rd.str);
             os.close ();
           }
           catch (const io_error& e)
           {
-            fail (ll) << "unable to write " << orp << ": " << e.what ();
+            fail (ll) << "unable to write " << opp << ": " << e.what ();
           }
 
-          // Use diff utility to compare the output with the expected result.
-          //
-          path dp ("diff");
-          process_path pp (run_search (dp, true));
+          auto output_info = [&what, &ll] (diag_record& d,
+                                           const path& p,
+                                           const char* prefix = "",
+                                           const char* suffix = "")
+          {
+            if (non_empty (p, ll))
+              d << info << prefix << what << suffix << ": " << p;
+            else
+              d << info << prefix << what << suffix << " is empty";
+          };
 
-          cstrings args {
-            pp.recall_string (),
-            "--strip-trailing-cr",
-            "-u",
-            orp.string ().c_str (),
-            op.string ().c_str (),
-            nullptr};
+          if (re)
+          {
+            // Match the output with the line_regex. That requires to parse the
+            // output into the line_string of literals first.
+            //
+            using namespace regex;
 
-          if (verb >= 2)
-            print_process (args);
+            line_string ls;
 
-          try
+            try
+            {
+              // Do not throw when eofbit is set (end of stream reached), and
+              // when failbit is set (getline() failed to extract any
+              // character).
+              //
+              // Note that newlines are treated as line-chars separators. That
+              // in particular means that the trailing newline produces a blank
+              // line-char (empty literal). Empty output produces the
+              // zero-length line-string.
+              //
+              // Also note that we strip the trailing CR characters (otherwise
+              // can mismatch when cross-test).
+              //
+              ifdstream is (op, ifdstream::in, ifdstream::badbit);
+              is.peek (); // Sets eofbit for an empty stream.
+
+              while (!is.eof ())
+              {
+                string s;
+                getline (is, s);
+
+                // It is safer to strip CRs in cycle, as msvcrt unexplainably
+                // adds too much trailing junk to the system_error
+                // descriptions, and so it can appear in programs output. For
+                // example:
+                //
+                // ...: Invalid data.\r\r\n
+                //
+                while (!s.empty () && s.back () == '\r')
+                  s.pop_back ();
+
+                ls += line_char (move (s), rd.regex.regex.pool);
+              }
+            }
+            catch (const io_error& e)
+            {
+              fail (ll) << "unable to read " << op << ": " << e.what ();
+            }
+
+            if (regex_match (ls, rd.regex.regex)) // Doesn't throw.
+              return;
+
+            // Output doesn't match the regex.
+            //
+            diag_record d (error (ll));
+            d << pr << " " << what << " doesn't match the regex";
+
+            output_info (d, op);
+            output_info (d, opp, "", " regex");
+            input_info  (d);
+
+            // Fall through.
+            //
+          }
+          else
           {
-            // Diff utility prints the differences to stdout. But for the user
-            // it is a part of the test failure diagnostics so let's redirect
-            // stdout to stderr.
+            // Use diff utility to compare the output with the expected result.
             //
-            process p (pp, args.data (), 0, 2);
+            path dp ("diff");
+            process_path pp (run_search (dp, true));
+
+            cstrings args {
+              pp.recall_string (),
+                "--strip-trailing-cr", // Is essential for cross-testing.
+                "-u",
+                opp.string ().c_str (),
+                op.string ().c_str (),
+                nullptr};
+
+            if (verb >= 2)
+              print_process (args);
 
             try
             {
-              if (p.wait ())
-                return;
-
-              // Output doesn't match the expected result.
+              // Diff utility prints the differences to stdout. But for the
+              // user it is a part of the test failure diagnostics so let's
+              // redirect stdout to stderr.
               //
-              diag_record d (error (ll));
-              d << pr << " " << what << " doesn't match the expected output";
+              process p (pp, args.data (), 0, 2);
 
-              auto output_info =
-                [&d, &what, &ll] (const path& p, const char* prefix)
+              try
               {
-                if (non_empty (p, ll))
-                  d << info << prefix << what << ": " << p;
-                else
-                  d << info << prefix << what << " is empty";
-              };
+                if (p.wait ())
+                  return;
 
-              output_info (op, "");
-              output_info (orp, "expected ");
-              input_info  (d);
+                // Output doesn't match the expected result.
+                //
+                diag_record d (error (ll));
+                d << pr << " " << what << " doesn't match the expected output";
+
+                output_info (d, op);
+                output_info (d, opp, "expected ");
+                input_info  (d);
+
+                // Fall through.
+                //
+              }
+              catch (const io_error&)
+              {
+                // Child exit status doesn't matter. Assume the child process
+                // issued diagnostics. Just wait for the process completion.
+                //
+                p.wait (); // Check throw.
+
+                error (ll) << "failed to compare " << what
+                           << " with the expected output";
+              }
 
               // Fall through.
               //
             }
-            catch (const io_error&)
+            catch (const process_error& e)
             {
-              // Child exit status doesn't matter. Assume the child process
-              // issued diagnostics. Just wait for the process completion.
-              //
-              p.wait (); // Check throw.
+              error (ll) << "unable to execute " << pp << ": " << e.what ();
 
-              error (ll) << "failed to compare " << what
-                         << " with the expected output";
+              if (e.child ())
+                exit (1);
             }
 
             // Fall through.
             //
           }
-          catch (const process_error& e)
-          {
-            error (ll) << "unable to execute " << pp << ": " << e.what ();
-
-            if (e.child ())
-              exit (1);
-          }
 
           throw failed ();
         }
@@ -461,8 +538,8 @@ namespace build2
             break;
           }
 
-        case redirect_type::merge: assert (false); break;
-        case redirect_type::here_str_regex: // @@ REGEX
+        case redirect_type::merge:
+        case redirect_type::here_str_regex:
         case redirect_type::here_doc_regex: assert (false); break;
         }
 
@@ -482,10 +559,9 @@ namespace build2
 
         // Open a file for command output redirect if requested explicitly
         // (file redirect) or for the purpose of the output validation (none,
-        // here_string, here_document), register the file for cleanup, return
-        // the file descriptor. Return the specified, default or -2 file
-        // descriptors for merge, pass or null redirects respectively not
-        // opening a file.
+        // here_*), register the file for cleanup, return the file descriptor.
+        // Return the specified, default or -2 file descriptors for merge, pass
+        // or null redirects respectively not opening a file.
         //
         auto open = [&sp, &ll, &std_path, &normalize] (const redirect& r,
                                                        int dfd,
@@ -549,13 +625,13 @@ namespace build2
           case redirect_type::none:
           case redirect_type::here_str_literal:
           case redirect_type::here_doc_literal:
+          case redirect_type::here_str_regex:
+          case redirect_type::here_doc_regex:
             {
               p = std_path (what);
               m |= fdopen_mode::truncate;
               break;
             }
-          case redirect_type::here_str_regex: // @@ REGEX
-          case redirect_type::here_doc_regex: assert (false); break;
           }
 
           try
diff --git a/build2/test/script/script.cxx b/build2/test/script/script.cxx
index 7941df6..c67e1b6 100644
--- a/build2/test/script/script.cxx
+++ b/build2/test/script/script.cxx
@@ -85,25 +85,36 @@ namespace build2
           case redirect_type::merge: o << '&' << r.fd; break;
 
           case redirect_type::here_str_literal:
+          case redirect_type::here_str_regex:
             {
-              const string& v (r.str);
+              bool re (r.type == redirect_type::here_str_regex);
+              const string& v (re ? r.regex.str : r.str);
               bool nl (!v.empty () && v.back () == '\n');
 
               if (!nl)
                 o << ':';
 
+              if (re)
+                o << '~';
+
               to_stream_q (o, nl ? string (v, 0, v.size () - 1) : v);
               break;
             }
           case redirect_type::here_doc_literal:
+          case redirect_type::here_doc_regex:
             {
-              const string& v (r.str);
+              bool re (r.type == redirect_type::here_doc_regex);
+              const string& v (re ? r.regex.str : r.str);
               bool nl (!v.empty () && v.back () == '\n');
 
               // Add another '>' or '<'. Note that here end marker never
               // needs to be quoted.
               //
               o << d << (nl ? "" : ":");
+
+              if (re)
+                o << '~';
+
               to_stream_q (o, r.end);
               break;
             }
@@ -115,16 +126,21 @@ namespace build2
               print_path (r.file.path);
               break;
             }
-          case redirect_type::here_str_regex: // @@ REGEX
-          case redirect_type::here_doc_regex: assert (false); break;
           }
         };
 
         auto print_doc = [&o] (const redirect& r)
         {
-          const string& v (r.str);
+          bool re (r.type == redirect_type::here_doc_regex);
+          const string& v (re ? r.regex.str : r.str);
           bool nl (!v.empty () && v.back () == '\n');
-          o << endl << v << (nl ? "" : "\n") << r.end;
+
+          // For the regex here-document the end marker contains introducer and
+          // flags characters, so need to remove them.
+          //
+          const string& e (r.end);
+          o << endl << v << (nl ? "" : "\n")
+            << (re ? string (e, 1, e.find (e[0], 1) - 1) : e);
         };
 
         if ((m & command_to_stream::header) == command_to_stream::header)
@@ -173,9 +189,17 @@ namespace build2
         {
           // Here-documents.
           //
-          if (c.in.type  == redirect_type::here_doc_literal) print_doc (c.in);
-          if (c.out.type == redirect_type::here_doc_literal) print_doc (c.out);
-          if (c.err.type == redirect_type::here_doc_literal) print_doc (c.err);
+          if (c.in.type == redirect_type::here_doc_literal ||
+              c.in.type == redirect_type::here_doc_regex)
+            print_doc (c.in);
+
+          if (c.out.type == redirect_type::here_doc_literal ||
+              c.out.type == redirect_type::here_doc_regex)
+            print_doc (c.out);
+
+          if (c.err.type == redirect_type::here_doc_literal ||
+              c.err.type == redirect_type::here_doc_regex)
+            print_doc (c.err);
         }
       }
author	Karen Arutyunov <karen@codesynthesis.com>	2016-12-17 23:28:30 +0300
committer	Karen Arutyunov <karen@codesynthesis.com>	2017-01-05 15:30:41 +0300
commit	3ecbf5d51b13e11a93ae5757408a27c21d804c9f (patch)
tree	be46e3caa24574de106c2fbf1a05c43d32694e12 /build2/test
parent	a63e1809afd9a837821d6e8376cb14a36e7fc26e (diff)