From a83f3866667bca073c4d4c5d80b4deb5ac05906c Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Wed, 11 Jan 2017 01:43:09 +0300 Subject: Add support for portable path modifer and dot character escaping inversion --- build2/buildfile | 2 +- build2/test/script/parser | 26 ++- build2/test/script/parser.cxx | 386 ++++++++++++++++++++---------------------- build2/test/script/regex | 35 ++-- build2/test/script/regex.cxx | 81 +++++++++ build2/test/script/regex.ixx | 35 ++++ build2/test/script/runner | 2 +- build2/test/script/runner.cxx | 376 +++++++++++++++++++++++++++++++++++----- build2/test/script/script | 74 ++++++-- build2/test/script/script.cxx | 115 +++++++++---- 10 files changed, 825 insertions(+), 307 deletions(-) create mode 100644 build2/test/script/regex.ixx (limited to 'build2') diff --git a/build2/buildfile b/build2/buildfile index 6d497ca..84e2f82 100644 --- a/build2/buildfile +++ b/build2/buildfile @@ -88,7 +88,7 @@ exe{b}: \ test/script/{hxx cxx}{ builtin } \ test/script/{hxx cxx}{ lexer } \ test/script/{hxx cxx}{ parser } \ -test/script/{hxx cxx}{ regex } \ +test/script/{hxx ixx cxx}{ regex } \ test/script/{hxx cxx}{ runner } \ test/script/{hxx ixx cxx}{ script } \ test/script/{hxx cxx}{ token } \ diff --git a/build2/test/script/parser b/build2/test/script/parser index 9ad5fe9..edd64a3 100644 --- a/build2/test/script/parser +++ b/build2/test/script/parser @@ -12,7 +12,6 @@ #include #include -#include #include namespace build2 @@ -111,7 +110,7 @@ namespace build2 // Regex global flags. Meaningful if regex != '\0'. // - regex::char_flags regex_flags; + string regex_flags; }; using here_docs = vector; @@ -125,12 +124,29 @@ namespace build2 parse_here_documents (token&, token_type&, pair&); - pair + struct parsed_doc + { + union + { + string str; // Here-document literal. + regex_lines regex; // Here-document regex. + }; + + bool re; // True if regex. + uint64_t end_line; // Here-document end marker location. + uint64_t end_column; + + parsed_doc (string, uint64_t line, uint64_t column); + parsed_doc (regex_lines, uint64_t line, uint64_t column); + parsed_doc (parsed_doc&&); // Note: move constuctible-only type. + ~parsed_doc (); + }; + + parsed_doc parse_here_document (token&, token_type&, const string&, const string& mode, - char regex_introducer, // '\0' if not a regex. - regex::char_flags); + char re_intro); // '\0' if not a regex. // Execute. Issue diagnostics and throw failed in case of an error. // diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx index f381118..4b1c777 100644 --- a/build2/test/script/parser.cxx +++ b/build2/test/script/parser.cxx @@ -5,7 +5,6 @@ #include #include -#include // strstr() #include @@ -14,39 +13,6 @@ using namespace std; -namespace std -{ - // Print regex error description but only if it is meaningful (this is also - // why we have to print leading colon here). - // - // Currently libstdc++ just returns the name of the exception (bug #67361). - // So we check that the description contains at least one space character. - // - // While VC's description is meaningful, it has an undesired prefix that - // resembles the following: 'regex_error(error_badrepeat): '. So we skip it. - // - static ostream& - operator<< (ostream& o, const regex_error& e) - { - const char* d (e.what ()); - -#if defined(_MSC_VER) && _MSC_VER <= 1910 - const char* rd (strstr (d, "): ")); - if (rd != nullptr) - d = rd + 3; -#endif - - ostringstream os; - os << runtime_error (d); // Sanitize the description. - - string s (os.str ()); - if (s.find (' ') != string::npos) - o << ": " << s; - - return o; - } -} - namespace build2 { namespace test @@ -1340,23 +1306,22 @@ namespace build2 // Parse the regular expression representation (non-empty string value // framed with introducer characters and optionally followed by flag - // characters from the {i} set, for example '/foo/i') into + // characters from the {di} set, for example '/foo/id') into // components. Also return end-of-parsing position if requested, // otherwise treat any unparsed characters left as an error. // struct regex_parts { string value; - char introducer; - regex::char_flags flags; // {icase} + char intro; + string flags; // Combination of characters from {di} set. // Create a special empty object. // - regex_parts () - : introducer ('\0'), flags (regex::char_flags ()) {} + regex_parts (): intro ('\0') {} - regex_parts (string v, char i, regex::char_flags f) - : value (move (v)), introducer (i), flags (f) {} + regex_parts (string v, char i, string f) + : value (move (v)), intro (i), flags (move (f)) {} }; static regex_parts @@ -1377,10 +1342,10 @@ namespace build2 if (rn == 0) fail (l) << what << " is empty"; - bool icase (s[++p] == 'i'); // Note: s[++p] can be '\0' (no flags). - - if (icase) - ++p; + // Find end-of-flags position. + // + size_t fp (++p); // Save flags starting position. + for (char c; (c = s[p]) == 'd' || c == 'i'; ++p) ; // If string end is not reached then report invalid flags, unless // end-of-parsing position is requested (which means regex is just a @@ -1392,11 +1357,7 @@ namespace build2 if (end != nullptr) *end = p; - return regex_parts (string (s, 1, rn), - s[0], - icase - ? regex::char_regex::icase - : regex::char_flags ()); + return regex_parts (string (s, 1, rn), s[0], string (s, fp, p - fp)); } pair parser:: @@ -1419,6 +1380,27 @@ namespace build2 fail (l) << "stdout and stderr redirected to each other"; }; + // Check that the introducer character differs from '/' if the + // portable path modifier is specified. Must be called before + // parse_regex() (see below) to make sure its diagnostics is + // meaningful. + // + // Note that the portable path modifier assumes '/' to be a valid + // regex character and so makes it indistinguishable from the + // terminating introducer. + // + auto check_regex_mod = [this] (const string& mod, + const string& re, + const location& l, + const char* what) + { + // Handles empty regex properly. + // + if (mod.find ('/') != string::npos && re[0] == '/') + fail (l) << "portable path modifier and '/' introducer in " + << what; + }; + // Pending positions where the next word should go. // enum class pending @@ -1449,7 +1431,8 @@ namespace build2 // Add the next word to either one of the pending positions or to // program arguments by default. // - auto add_word = [&c, &p, &mod, this] (string&& w, const location& l) + auto add_word = [&c, &p, &mod, &check_regex_mod, this] ( + string&& w, const location& l) { auto add_merge = [&l, this] (redirect& r, const string& w, int fd) { @@ -1468,18 +1451,16 @@ namespace build2 << "file descriptor must be " << fd; }; - auto add_here_str = [&mod] (redirect& r, string&& w) + auto add_here_str = [] (redirect& r, string&& w) { - if (mod.find (':') == string::npos) + if (r.modifiers.find (':') == string::npos) w += '\n'; r.str = move (w); }; - auto add_here_str_regex = [&l, &mod, this] ( + auto add_here_str_regex = [&l, &check_regex_mod, this] ( redirect& r, int fd, string&& w) { - using namespace regex; - const char* what (nullptr); switch (fd) { @@ -1487,36 +1468,23 @@ namespace build2 case 2: what = "stderr regex redirect"; break; } - line_pool pool; - line_string s; + check_regex_mod (r.modifiers, w, l, what); - try - { - regex_parts re (parse_regex (w, l, what)); - s += line_char (char_regex (re.value, - char_regex::ECMAScript | re.flags), - pool); - } - catch (const regex_error& e) - { - // Print regex_error description if meaningful. - // - fail (l) << "invalid " << what << e << - info << "regex: " << w; - } + regex_parts rp (parse_regex (w, l, what)); - if (mod.find (':') == string::npos) - { - w += '\n'; - s += line_char ("", pool); - } + regex_lines& re (r.regex); + re.intro = rp.intro; - r.regex.str = move (w); + re.lines.emplace_back ( + l.line, l.column, move (rp.value), move (rp.flags)); - // No special line-chars, so no way to try to create a malformed - // expression, and so can't throw. + // Add final blank line unless suppressed. // - r.regex.regex = line_regex (move (s), move (pool)); + // Note that the position is synthetic, but that's ok as we don't + // expect any diagnostics to refer this line. + // + if (r.modifiers.find (':') == string::npos) + re.lines.emplace_back (l.line, l.column, string (), false); }; auto parse_path = [&l, this] (string&& w, const char* what) -> path @@ -1539,7 +1507,7 @@ namespace build2 } }; - auto add_file = [&mod, &parse_path] (redirect& r, int fd, string&& w) + auto add_file = [&parse_path] (redirect& r, int fd, string&& w) { const char* what (nullptr); switch (fd) @@ -1550,7 +1518,7 @@ namespace build2 } r.file.path = parse_path (move (w), what); - r.file.append = mod.find ('&') != string::npos; + r.file.append = r.modifiers.find ('&') != string::npos; }; switch (p) @@ -1771,6 +1739,11 @@ namespace build2 redirect& r (fd == 0 ? c.in : fd == 1 ? c.out : c.err); r = redirect (rt); + // Don't move as still may be used for pending here-document end + // marker processing. + // + r.modifiers = mod; + switch (rt) { case redirect_type::none: @@ -1974,6 +1947,8 @@ namespace build2 if (re) { + check_regex_mod (mod, end, l, what); + r = parse_regex (end, l, what); end = move (r.value); // The "cleared" end marker. } @@ -1984,7 +1959,7 @@ namespace build2 move (end), qt == quote_type::single, move (mod), - r.introducer, r.flags}); + r.intro, move (r.flags)}); break; } @@ -2099,7 +2074,7 @@ namespace build2 (t.qtype == quote_type::unquoted || t.qtype == quote_type::single), move (mod), - r.introducer, r.flags}); + r.intro, move (r.flags)}); p = pending::none; mod.clear (); @@ -2396,54 +2371,43 @@ namespace build2 : lexer_mode::here_line_double); next (t, tt); - pair v ( - parse_here_document ( - t, tt, h.end, h.modifiers, h.regex, h.regex_flags)); + parsed_doc v ( + parse_here_document (t, tt, h.end, h.modifiers, h.regex)); if (!pre_parse_) { command& c (p.first[h.expr].pipe[h.pipe]); redirect& r (h.fd == 0 ? c.in : h.fd == 1 ? c.out : c.err); - if (h.regex) + if (v.re) { - r.regex.str = move (v.first); - r.regex.regex = move (v.second); - - // Restore the original end marker. - // - r.end = h.regex + h.end + h.regex; - if ((h.regex_flags & regex::char_regex::icase) != 0) - r.end += 'i'; + r.regex = move (v.regex); + r.regex.flags = move (h.regex_flags); } else - { - r.str = move (v.first); - r.end = move (h.end); - } + r.str = move (v.str); + + r.end = move (h.end); + r.end_line = v.end_line; + r.end_column = v.end_column; } expire_mode (); } } - pair parser:: + parser::parsed_doc parser:: parse_here_document (token& t, type& tt, const string& em, const string& mod, - char re, - regex::char_flags refl) + char re) { // enter: first token on first line // leave: newline (after end marker) - using namespace regex; - - string rs; // String or regex literal. + string rs; // String literal. - line_pool pool; - line_string ls; - line_regex rre; + regex_lines rre; // Here-documents can be indented. The leading whitespaces of the end // marker line (called strip prefix) determine the indentation. Every @@ -2465,8 +2429,7 @@ namespace build2 // We will use the location of the first token on the line for the // regex diagnostics. At the end of the loop it will point to the - // beginning of the end marker which we use for diagnostics of the - // line_regex object creation. + // beginning of the end marker. // location l; @@ -2543,97 +2506,93 @@ namespace build2 } } - // Add newline after previous line. - // - if (!rs.empty ()) - rs += '\n'; - - rs += s; + if (!re) + { + // Add newline after previous line. + // + if (!rs.empty ()) + rs += '\n'; - if (re) + rs += s; + } + else { - if (s[0] == re) // Line starts with the regex introducer. + // Due to expansion we can end up with multiple lines. If empty + // then will add a blank textual literal. + // + for (size_t p (0); p != string::npos; ) { - size_t n (s.size ()); + string ln; + size_t np (s.find ('\n', p)); - // Handle the empty line-regex characters. - // - if (n == 1) - fail (l) << "regex introducer without regex" << - info << "consider changing regex introducer '" << re - << "' in here-document end marker"; - - // This is a char-regex, or a sequence of line-regex syntax - // characters or both (in this specific order). So we will add - // the char-regex first (if present), and then sequentially - // add the line-regex syntax characters (if present). - // - size_t p (s.find (re, 1)); - if (p == string::npos) + if (np != string::npos) { - // No char-regex, just a sequence of line-regex syntax - // characters. Prepare to parse them starting from the - // position right after the introducer. - // - p = 1; + ln = string (s, p, np - p); + p = np + 1; } else { - // Add regex line-char, and then position to the end of the - // regex (that includes terminating introducer and the - // optional flags). This is the first line-regex syntax - // character position (if present). - // - line_char c; + ln = string (s, p); + p = np; + } - // Empty regex is a special case repesenting the blank line. + if (ln[0] != re) // Line doesn't start with regex introducer. + { + // This is a line-char literal (covers blank lines as well). // - if (p == 1) + // Append textual literal. + // + rre.lines.emplace_back (l.line, l.column, move (ln), false); + } + else // Line starts with the regex introducer. + { + // This is a char-regex, or a sequence of line-regex syntax + // characters or both (in this specific order). So we will + // add regex (with optional special characters) or special + // literal. + // + size_t p (ln.find (re, 1)); + if (p == string::npos) { - c = line_char ("", pool); - ++p; + // No regex, just a sequence of syntax characters. + // + string spec (ln, 1); + if (spec.empty ()) + fail (l) << "no syntax line characters"; + + // Append special literal. + // + rre.lines.emplace_back ( + l.line, l.column, move (spec), true); } else { - // Can't fail as all the pre-conditions verified (non-empty - // with both introducers in place), so no description - // required. + // Regex (probably with syntax characters). // - regex_parts re (parse_regex (s, l, "", &p)); + regex_parts re; - try - { - c = line_char ( - char_regex (re.value, - char_regex::ECMAScript | re.flags | refl), - pool); - } - catch (const regex_error& e) - { - // Print regex_error description if meaningful. + // Empty regex is a special case repesenting a blank line. + // + if (p == 1) + // Position to optional specal characters of an empty + // regex. // - fail (l) << "invalid regex" << e; - } - } - - ls += c; - } + ++p; + else + // Can't fail as all the pre-conditions verified + // (non-empty with both introducers in place), so no + // description required. + // + re = parse_regex (ln, l, "", &p); - while (p != n) - { - char c (s[p++]); - if (line_char::syntax (c)) - ls += line_char (c); - else - fail (l) << "invalid line-regex syntax character '" << c - << "'"; + // Append regex with optional special characters. + // + rre.lines.emplace_back (l.line, l.column, + move (re.value), move (re.flags), + string (ln, p)); + } } } - else - // Line doesn't start with regex introducer. Add line-char - // literal (handles blank lines as well). - // - ls += line_char (move (s), pool); } } @@ -2695,35 +2654,31 @@ namespace build2 // if (mod.find (':') == string::npos) { - rs += '\n'; - if (re) - ls += line_char ("", pool); + // Note that the position is synthetic, but that's ok as we don't + // expect any diagnostics to refer this line. + // + rre.lines.emplace_back (l.line, l.column, string (), false); + else + rs += '\n'; } - // Parse line-regex. + // Finalize regex lines. // if (re) { // Empty regex matches nothing, so not of much use. // - if (ls.empty ()) + if (rre.lines.empty ()) fail (l) << "empty here-document regex"; - try - { - rre = line_regex (move (ls), move (pool)); - } - catch (const regex_error& e) - { - // Print regex_error description if meaningful. - // - fail (l) << "invalid here-document regex" << e; - } + rre.intro = re; } } - return make_pair (move (rs), move (rre)); + return re + ? parsed_doc (move (rre), l.line, l.column) + : parsed_doc (move (rs), l.line, l.column); } // @@ -3184,6 +3139,39 @@ namespace build2 lexer_ = l; base_parser::lexer_ = l; } + + // parser::parsed_doc + // + parser::parsed_doc:: + parsed_doc (string s, uint64_t l, uint64_t c) + : str (move (s)), re (false), end_line (l), end_column (c) + { + } + + parser::parsed_doc:: + parsed_doc (regex_lines r, uint64_t l, uint64_t c) + : regex (move (r)), re (true), end_line (l), end_column (c) + { + } + + parser::parsed_doc:: + parsed_doc (parsed_doc&& d) + : re (d.re), end_line (d.end_line), end_column (d.end_column) + { + if (re) + new (®ex) regex_lines (move (d.regex)); + else + new (&str) string (move (d.str)); + } + + parser::parsed_doc:: + ~parsed_doc () + { + if (re) + regex.~regex_lines (); + else + str.~string (); + } } } } diff --git a/build2/test/script/regex b/build2/test/script/regex index 7708410..b25c1f1 100644 --- a/build2/test/script/regex +++ b/build2/test/script/regex @@ -24,8 +24,25 @@ namespace build2 namespace regex { using char_string = std::basic_string; - using char_regex = std::basic_regex; - using char_flags = char_regex::flag_type; + + enum class char_flags: std::uint16_t + { + icase = 0x1, // Case-insensitive match. + idot = 0x2, // Invert '.' escaping. + + none = 0 + }; + + // Restricts valid standard flags to just {icase}, extends with custom + // flags {idot}. + // + class char_regex: public std::basic_regex + { + public: + using base_type = std::basic_regex; + + char_regex (const char_string&, char_flags = char_flags::none); + }; // Newlines are line separators and are not part of the line: // @@ -110,7 +127,7 @@ namespace build2 // // 0 (nul character) // -1 (EOF) - // [()|.*+?{\}0123456789,=!] (excluding []) + // [()|.*+?{}\0123456789,=!] (excluding []) // // Note that the constructor is implicit to allow basic_regex to // implicitly construct line_chars from special char literals (in @@ -252,9 +269,8 @@ namespace build2 template struct line_char_cmp : public std::enable_if::value || - std::is_enum::value> - { - }; + (std::is_enum::value && + !std::is_same::value)> {}; template ::type> bool @@ -655,14 +671,13 @@ namespace build2 line_regex& operator= (const line_regex&) = delete; public: - // Mutable since input line_char literals must go into the same - // pool (and thus is MT-unsafe). - // - mutable line_pool pool; + line_pool pool; }; } } } } +#include + #endif // BUILD2_TEST_SCRIPT_REGEX diff --git a/build2/test/script/regex.cxx b/build2/test/script/regex.cxx index bbf3f00..48e1eeb 100644 --- a/build2/test/script/regex.cxx +++ b/build2/test/script/regex.cxx @@ -171,6 +171,87 @@ namespace build2 new std::ctype ()) // Hidden by ctype bitmask. { } + + // char_regex + // + // Transform regex according to the extended flags {idot}. If regex is + // malformed then keep transforming, so the resulting string is + // malformed the same way. We expect the error to be reported by the + // char_regex ctor. + // + static string + transform (const string& s, char_flags f) + { + assert ((f & char_flags::idot) != char_flags::none); + + string r; + bool escape (false); + bool cclass (false); + + for (char c: s) + { + // Inverse escaping for a dot which is out of the char class + // brackets. + // + bool inverse (c == '.' && !cclass); + + // Handle the escape case. Note that we delay adding the backslash + // since we may have to inverse things. + // + if (escape) + { + if (!inverse) + r += '\\'; + + r += c; + escape = false; + + continue; + } + else if (c == '\\') + { + escape = true; + continue; + } + + // Keep track of being inside the char class brackets, escape if + // inversion. Note that we never inverse square brackets. + // + if (c == '[' && !cclass) + cclass = true; + else if (c == ']' && cclass) + cclass = false; + else if (inverse) + r += '\\'; + + r += c; + } + + if (escape) // Regex is malformed but that's not our problem. + r += '\\'; + + return r; + } + + static char_regex::flag_type + to_std_flags (char_flags f) + { + // Note that ECMAScript flag is implied in the absense of a grammar + // flag. + // + return (f & char_flags::icase) != char_flags::none + ? char_regex::icase + : char_regex::flag_type (); + } + + char_regex:: + char_regex (const char_string& s, char_flags f) + : base_type ((f & char_flags::idot) != char_flags::none + ? transform (s, f) + : s, + to_std_flags (f)) + { + } } } } diff --git a/build2/test/script/regex.ixx b/build2/test/script/regex.ixx new file mode 100644 index 0000000..4073312 --- /dev/null +++ b/build2/test/script/regex.ixx @@ -0,0 +1,35 @@ +// file : build2/test/script/regex.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +namespace build2 +{ + namespace test + { + namespace script + { + namespace regex + { + inline char_flags + operator&= (char_flags& x, char_flags y) + { + return x = static_cast ( + static_cast (x) & static_cast (y)); + } + + inline char_flags + operator|= (char_flags& x, char_flags y) + { + return x = static_cast ( + static_cast (x) | static_cast (y)); + } + + inline char_flags + operator& (char_flags x, char_flags y) {return x &= y;} + + inline char_flags + operator| (char_flags x, char_flags y) {return x |= y;} + } + } + } +} diff --git a/build2/test/script/runner b/build2/test/script/runner index 7b932b9..56ea834 100644 --- a/build2/test/script/runner +++ b/build2/test/script/runner @@ -16,7 +16,7 @@ namespace build2 { namespace test { - class common; + struct common; namespace script { diff --git a/build2/test/script/runner.cxx b/build2/test/script/runner.cxx index 522dedd..8e31cf8 100644 --- a/build2/test/script/runner.cxx +++ b/build2/test/script/runner.cxx @@ -5,7 +5,9 @@ #include #include -#include // streamsize +#include // streamsize +#include // strstr() +#include #include // fdopen_mode, fdnull(), fddup() @@ -13,11 +15,45 @@ #include +#include #include using namespace std; using namespace butl; +namespace std +{ + // Print regex error description but only if it is meaningful (this is also + // why we have to print leading colon here). + // + // Currently libstdc++ just returns the name of the exception (bug #67361). + // So we check that the description contains at least one space character. + // + // While VC's description is meaningful, it has an undesired prefix that + // resembles the following: 'regex_error(error_badrepeat): '. So we skip it. + // + static ostream& + operator<< (ostream& o, const regex_error& e) + { + const char* d (e.what ()); + +#if defined(_MSC_VER) && _MSC_VER <= 1910 + const char* rd (strstr (d, "): ")); + if (rd != nullptr) + d = rd + 3; +#endif + + ostringstream os; + os << runtime_error (d); // Sanitize the description. + + string s (os.str ()); + if (s.find (' ') != string::npos) + o << ": " << s; + + return o; + } +} + namespace build2 { namespace test @@ -99,6 +135,66 @@ namespace build2 } } + // Save a string to the file. Fail if exception is thrown by underlying + // operations. + // + static void + save (const path& p, const string& s, const location& ll) + { + try + { + ofdstream os (p); + os << s; + os.close (); + } + catch (const io_error& e) + { + fail (ll) << "unable to write " << p << ": " << e; + } + } + + // Transform string according to here-* redirect modifiers from the {/} + // set. + // + static string + transform (const string& s, + bool regex, + const string& modifiers, + const script& scr) + { + if (modifiers.find ('/') == string::npos) + return s; + + // For targets other than Windows leave the string intact. + // + if (cast (scr.test_target["test.target"]).class_ != + "windows") + return s; + + // Convert forward slashes to Windows path separators (escape for + // regex). + // + string r; + for (size_t p (0);;) + { + size_t sp (s.find ('/', p)); + + if (sp != string::npos) + { + r.append (s, p, sp - p); + r.append (regex ? "\\\\" : "\\"); + p = sp + 1; + } + else + { + r.append (s, p, sp); + break; + } + } + + return r; + } + // Check if the test command output matches the expected result (redirect // value). Noop for redirect types other than none, here_*. // @@ -140,23 +236,6 @@ namespace build2 { assert (!op.empty ()); - // While the regex file is not used for output validation we still - // create it for troubleshooting. - // - path opp (op + (re ? ".regex" : ".orig")); - - try - { - ofdstream os (opp); - sp.clean ({cleanup_type::always, opp}, true); - os << (re ? rd.regex.str : rd.str); - os.close (); - } - catch (const io_error& e) - { - fail (ll) << "unable to write " << opp << ": " << e; - } - auto output_info = [&what, &ll] (diag_record& d, const path& p, const char* prefix = "", @@ -168,13 +247,223 @@ namespace build2 d << info << prefix << what << suffix << " is empty"; }; - if (re) + if (re) // Match the output with the regex. { - // Match the output with the line_regex. That requires to parse the - // output into the line_string of literals first. + // The overall plan is: + // + // 1. Create regex line string. While creating it's line characters + // transform regex lines according to the redirect modifiers. + // + // 2. Create line regex using the line string. If creation fails + // then save the (transformed) regex redirect to a file for + // troubleshooting. + // + // 3. Parse the output into the literal line string. + // + // 4. Match the output line string with the line regex. + // + // 5. If match fails save the (transformed) regex redirect to a + // file for troubleshooting. // using namespace regex; + // Create regex line string. + // + line_pool pool; + line_string rls; + const regex_lines rl (rd.regex); + + // Parse regex flags. + // + // When add support for new flags don't forget to update + // parse_regex(). + // + auto parse_flags = [] (const string& f) -> char_flags + { + char_flags r (char_flags::none); + + for (char c: f) + { + switch (c) + { + case 'd': r |= char_flags::idot; break; + case 'i': r |= char_flags::icase; break; + default: assert (false); // Error so should have been checked. + } + } + + return r; + }; + + // Return original regex line with the transformation applied. + // + auto line = [&rl, &rd, &sp] (const regex_line& l) -> string + { + string r; + if (l.regex) // Regex (possibly empty), + { + r += rl.intro; + r += transform (l.value, true, rd.modifiers, *sp.root); + r += rl.intro; + r += l.flags; + } + else if (!l.special.empty ()) // Special literal. + r += rl.intro; + else // Textual literal. + r += transform (l.value, false, rd.modifiers, *sp.root); + + r += l.special; + return r; + }; + + // Return regex line location. + // + // Note that we rely on the fact that the command and regex lines + // are always belong to the same testscript file. + // + auto loc = [&ll] (uint64_t line, uint64_t column) -> location + { + location r (ll); + r.line = line; + r.column = column; + return r; + }; + + // Save the regex to file for troubleshooting, return the file path + // it have been saved to. + // + // Note that we save the regex on line regex creation failure or if + // the program output doesn't match. + // + auto save_regex = [&op, &rl, &rd, &ll, &line] () -> path + { + path rp (op + ".regex"); + + // Encode here-document regex global flags if present as a file + // name suffix. For example if icase and idot flags are specified + // the name will look like: + // + // test/1/stdout.regex~di + // + if (rd.type == redirect_type::here_doc_regex && + !rl.flags.empty ()) + rp += "~" + rl.flags; + + // Note that if would be more efficient to directly write chunks + // to file rather than to compose a string first. Hower we don't + // bother (about performance) for the sake of the code as we + // already failed. + // + string s; + for (const auto& l: rl.lines) + { + if (!s.empty ()) s += '\n'; + s += line (l); + } + + save (rp, s, ll); + return rp; + }; + + // Finally create regex line string. + // + // Note that diagnostics doesn't refer to the program path as it is + // irrelevant to failures at this stage. + // + char_flags gf (parse_flags (rl.flags)); // Regex global flags. + + for (const auto& l: rl.lines) + { + if (l.regex) // Regex (with optional special characters). + { + line_char c; + + // Empty regex is a special case repesenting the blank line. + // + if (l.value.empty ()) + c = line_char ("", pool); + else + { + try + { + string s ( + transform (l.value, true, rd.modifiers, *sp.root)); + + c = line_char ( + char_regex (s, gf | parse_flags (l.flags)), pool); + } + catch (const regex_error& e) + { + // Print regex_error description if meaningful. + // + diag_record d (fail (loc (l.line, l.column))); + + if (rd.type == redirect_type::here_str_regex) + d << "invalid " << what << " regex redirect" << e << + info << "regex: '" << line (l) << "'"; + else + d << "invalid char-regex in " << what + << " regex redirect" << e << + info << "regex line: '" << line (l) << "'"; + } + } + + rls += c; // Append blank literal or regex line char. + } + else if (!l.special.empty ()) // Special literal. + { + // Literal can not be followed by special characters in the + // same line. + // + assert (l.value.empty ()); + } + else // Textual literal. + { + // Append literal line char. + // + rls += line_char ( + transform (l.value, false, rd.modifiers, *sp.root), pool); + } + + for (char c: l.special) + { + if (line_char::syntax (c)) + rls += line_char (c); // Append special line char. + else + fail (loc (l.line, l.column)) + << "invalid syntax character '" << c << "' in " << what + << " regex redirect" << + info << "regex line: '" << line (l) << "'"; + } + } + + // Create line regex. + // + line_regex regex; + + try + { + regex = line_regex (move (rls), move (pool)); + } + catch (const regex_error& e) + { + // Note that line regex creation can not fail for here-string + // redirect as it doesn't have syntax line chars. That in + // particular means that end_line and end_column are meaningful. + // + assert (rd.type == redirect_type::here_doc_regex); + + diag_record d (fail (loc (rd.end_line, rd.end_column))); + + // Print regex_error description if meaningful. + // + d << "invalid " << what << " regex redirect" << e; + + output_info (d, save_regex (), "", " regex"); + } + + // Parse the output into the literal line string. + // line_string ls; try @@ -212,7 +501,7 @@ namespace build2 while (!s.empty () && s.back () == '\r') s.pop_back (); - ls += line_char (move (s), rd.regex.regex.pool); + ls += line_char (move (s), regex.pool); } } catch (const io_error& e) @@ -220,7 +509,9 @@ namespace build2 fail (ll) << "unable to read " << op << ": " << e; } - if (regex_match (ls, rd.regex.regex)) // Doesn't throw. + // Match the output with the regex. + // + if (regex_match (ls, regex)) // Doesn't throw. return; // Output doesn't match the regex. @@ -229,16 +520,20 @@ namespace build2 d << pr << " " << what << " doesn't match the regex"; output_info (d, op); - output_info (d, opp, "", " regex"); + output_info (d, save_regex (), "", " regex"); input_info (d); // Fall through. // } - else + else // Compare the output with the expected result. { - // Use diff utility to compare the output with the expected result. + // Use diff utility for the comparison. // + path eop (op + ".orig"); + save (eop, transform (rd.str, false, rd.modifiers, *sp.root), ll); + sp.clean ({cleanup_type::always, eop}, true); + path dp ("diff"); process_path pp (run_search (dp, true)); @@ -246,7 +541,7 @@ namespace build2 pp.recall_string (), "--strip-trailing-cr", // Is essential for cross-testing. "-u", - opp.string ().c_str (), + eop.string ().c_str (), op.string ().c_str (), nullptr}; @@ -288,7 +583,7 @@ namespace build2 d << pr << " " << what << " doesn't match the expected output"; output_info (d, op); - output_info (d, opp, "expected "); + output_info (d, eop, "expected "); output_info (d, ep, "", " diff"); input_info (d); @@ -589,17 +884,9 @@ namespace build2 // isp = std_path ("stdin"); - try - { - ofdstream os (isp); - sp.clean ({cleanup_type::always, isp}, true); - os << c.in.str; - os.close (); - } - catch (const io_error& e) - { - fail (ll) << "unable to write " << isp << ": " << e; - } + const redirect& r (c.in); + save (isp, transform (r.str, false, r.modifiers, *sp.root), ll); + sp.clean ({cleanup_type::always, isp}, true); open_stdin (); break; @@ -767,12 +1054,7 @@ namespace build2 { // Execute the process. // - // Pre-search the program path so it is reflected in the failure - // diagnostics. The user can see the original path running the test - // operation with the verbosity level > 2. - // - process_path pp (run_search (c.program, true)); - cstrings args {pp.recall_string ()}; + cstrings args {c.program.string ().c_str ()}; for (const auto& a: c.arguments) args.push_back (a.c_str ()); @@ -781,6 +1063,8 @@ namespace build2 try { + process_path pp (process::path_search (args[0])); + if (verb >= 2) print_process (args); @@ -798,7 +1082,7 @@ namespace build2 } catch (const process_error& e) { - error (ll) << "unable to execute " << pp << ": " << e; + error (ll) << "unable to execute " << args[0] << ": " << e; if (e.child ()) std::exit (1); diff --git a/build2/test/script/script b/build2/test/script/script index 0144af7..bb9b074 100644 --- a/build2/test/script/script +++ b/build2/test/script/script @@ -15,7 +15,6 @@ #include #include // replay_tokens -#include namespace build2 { @@ -78,16 +77,64 @@ namespace build2 file }; + // Pre-parsed (but not instantiated) regex lines. The idea here is that + // we should be able to re-create their (more or less) exact text + // representation for diagnostics but also instantiate without any + // re-parsing. + // + struct regex_line + { + // If regex is true, then value is the regex expression. Otherwise, it + // is a literal. Note that special characters can be present in both + // cases. For example, //+ is a regex, while /+ is a literal, both + // with '+' as a special character. Flags are only valid for regex. + // Literals falls apart into textual (has no special characters) and + // special (has just special characters instead) ones. For example + // foo is a textual literal, while /.+ is a special one. Note that + // literal must not have value and special both non-empty. + // + bool regex; + + string value; + string flags; + string special; + + uint64_t line; + uint64_t column; + + // Create regex with optional special characters. + // + regex_line (uint64_t l, uint64_t c, + string v, string f, string s = string ()) + : regex (true), + value (move (v)), + flags (move (f)), + special (move (s)), + line (l), + column (c) {} + + // Create a literal, either text or special. + // + regex_line (uint64_t l, uint64_t c, string v, bool s) + : regex (false), + value (s ? string () : move (v)), + special (s ? move (v) : string ()), + line (l), + column (c) {} + }; + + struct regex_lines + { + char intro; // Introducer character. + string flags; // Global flags (here-document). + + small_vector lines; + }; + struct redirect { redirect_type type; - struct regex_type - { - regex::line_regex regex; - string str; // String representation for printing. - }; - struct file_type { using path_type = build2::path; @@ -97,13 +144,16 @@ namespace build2 union { - int fd; // Merge-to descriptor. - string str; // Note: includes trailing newline, if requested. - regex_type regex; // Note: includes trailing blank, if requested. - file_type file; + int fd; // Merge-to descriptor. + string str; // Note: includes trailing newline, if requested. + regex_lines regex; // Note: includes trailing blank, if requested. + file_type file; }; - string end; // Here-document end marker for printing. + string modifiers; // Redirect modifiers. + string end; // Here-document end marker (no regex intro/flags). + uint64_t end_line; // Here-document end marker location. + uint64_t end_column; explicit redirect (redirect_type = redirect_type::none); diff --git a/build2/test/script/script.cxx b/build2/test/script/script.cxx index 2a34f66..6f56661 100644 --- a/build2/test/script/script.cxx +++ b/build2/test/script/script.cxx @@ -85,44 +85,65 @@ namespace build2 case redirect_type::merge: o << '&' << r.fd; break; case redirect_type::here_str_literal: - case redirect_type::here_str_regex: + case redirect_type::here_doc_literal: { - bool re (r.type == redirect_type::here_str_regex); - const string& v (re ? r.regex.str : r.str); - bool nl (!v.empty () && v.back () == '\n'); + bool doc (r.type == redirect_type::here_doc_literal); - if (!nl) - o << ':'; + // For here-document add another '>' or '<'. Note that here end + // marker never needs to be quoted. + // + if (doc) + o << d; - if (re) - o << '~'; + o << r.modifiers; + + if (doc) + o << r.end; + else + { + const string& v (r.str); + to_stream_q (o, + r.modifiers.find (':') == string::npos + ? string (v, 0, v.size () - 1) // Strip newline. + : v); + } - to_stream_q (o, nl ? string (v, 0, v.size () - 1) : v); break; } - case redirect_type::here_doc_literal: + + case redirect_type::here_str_regex: case redirect_type::here_doc_regex: { - bool re (r.type == redirect_type::here_doc_regex); - const string& v (re ? r.regex.str : r.str); - bool nl (!v.empty () && v.back () == '\n'); + bool doc (r.type == redirect_type::here_doc_regex); - // Add another '>' or '<'. Note that here end marker never - // needs to be quoted. + // For here-document add another '>' or '<'. Note that here end + // marker never needs to be quoted. // - o << d << (nl ? "" : ":"); + if (doc) + o << d; + + o << r.modifiers; + + const regex_lines& re (r.regex); + + if (doc) + o << re.intro + r.end + re.intro + re.flags; + else + { + assert (!re.lines.empty ()); // Regex can't be empty. - if (re) - o << '~'; + regex_line l (re.lines[0]); + to_stream_q (o, re.intro + l.value + re.intro + l.flags); + } - to_stream_q (o, r.end); break; } + case redirect_type::file: { // Add '>>' or '<<' (and so make it '<<<' or '>>>'). // - o << d << d << (r.file.append ? "&" : ""); + o << d << d << r.modifiers; print_path (r.file.path); break; } @@ -131,16 +152,36 @@ namespace build2 auto print_doc = [&o] (const redirect& r) { - bool re (r.type == redirect_type::here_doc_regex); - const string& v (re ? r.regex.str : r.str); - bool nl (!v.empty () && v.back () == '\n'); + o << endl; - // For the regex here-document the end marker contains introducer and - // flags characters, so need to remove them. - // - const string& e (r.end); - o << endl << v << (nl ? "" : "\n") - << (re ? string (e, 1, e.find (e[0], 1) - 1) : e); + if (r.type == redirect_type::here_doc_literal) + o << r.str; + else + { + assert (r.type == redirect_type::here_doc_regex); + + const regex_lines& rl (r.regex); + + for (auto b (rl.lines.cbegin ()), i (b), e (rl.lines.cend ()); + i != e; ++i) + { + if (i != b) + o << endl; + + const regex_line& l (*i); + + if (l.regex) // Regex (possibly empty), + o << rl.intro << l.value << rl.intro << l.flags; + else if (!l.special.empty ()) // Special literal. + o << rl.intro; + else // Textual literal. + o << l.value; + + o << l.special; + } + } + + o << (r.modifiers.find (':') == string::npos ? "" : "\n") << r.end; }; if ((m & command_to_stream::header) == command_to_stream::header) @@ -268,7 +309,11 @@ namespace build2 case redirect_type::here_doc_literal: new (&str) string (); break; case redirect_type::here_str_regex: - case redirect_type::here_doc_regex: new (®ex) regex_type (); break; + case redirect_type::here_doc_regex: + { + new (®ex) regex_lines (); + break; + } case redirect_type::file: new (&file) file_type (); break; } @@ -276,7 +321,11 @@ namespace build2 redirect:: redirect (redirect&& r) - : type (r.type), end (move (r.end)) + : type (r.type), + modifiers (move (r.modifiers)), + end (move (r.end)), + end_line (r.end_line), + end_column (r.end_column) { switch (type) { @@ -295,7 +344,7 @@ namespace build2 case redirect_type::here_str_regex: case redirect_type::here_doc_regex: { - new (®ex) regex_type (move (r.regex)); + new (®ex) regex_lines (move (r.regex)); break; } case redirect_type::file: @@ -320,7 +369,7 @@ namespace build2 case redirect_type::here_doc_literal: str.~string (); break; case redirect_type::here_str_regex: - case redirect_type::here_doc_regex: regex.~regex_type (); break; + case redirect_type::here_doc_regex: regex.~regex_lines (); break; case redirect_type::file: file.~file_type (); break; } -- cgit v1.1