diff options
author | Karen Arutyunov <karen@codesynthesis.com> | 2016-12-08 18:51:22 +0300 |
---|---|---|
committer | Karen Arutyunov <karen@codesynthesis.com> | 2016-12-16 18:49:26 +0300 |
commit | 67b63f91f2668ec65e1791ecfeb24a53c389f1ec (patch) | |
tree | f55bd231491a0417592fe98f415c1b5c06f4dadc | |
parent | 2af491cc4ad0ae4064f2e858c83ed485613adafa (diff) |
Add line_regex
-rw-r--r-- | build2/buildfile | 1 | ||||
-rw-r--r-- | build2/test/script/parser.cxx | 26 | ||||
-rw-r--r-- | build2/test/script/regex | 625 | ||||
-rw-r--r-- | build2/test/script/regex.cxx | 290 | ||||
-rw-r--r-- | build2/test/script/runner.cxx | 28 | ||||
-rw-r--r-- | build2/test/script/script | 27 | ||||
-rw-r--r-- | build2/test/script/script.cxx | 80 | ||||
-rw-r--r-- | unit-tests/test/script/buildfile | 2 | ||||
-rw-r--r-- | unit-tests/test/script/parser/buildfile | 2 | ||||
-rw-r--r-- | unit-tests/test/script/regex/buildfile | 12 | ||||
-rw-r--r-- | unit-tests/test/script/regex/driver.cxx | 252 |
11 files changed, 1213 insertions, 132 deletions
diff --git a/build2/buildfile b/build2/buildfile index f166ecd..37f0dc0 100644 --- a/build2/buildfile +++ b/build2/buildfile @@ -85,6 +85,7 @@ exe{b}: \ test/script/{hxx cxx}{ builtin } \ test/script/{hxx cxx}{ lexer } \ test/script/{hxx cxx}{ parser } \ +test/script/{hxx cxx}{ regex } \ test/script/{hxx cxx}{ runner } \ test/script/{hxx ixx cxx}{ script } \ test/script/{hxx cxx}{ token } \ diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx index 50d4f67..da61c64 100644 --- a/build2/test/script/parser.cxx +++ b/build2/test/script/parser.cxx @@ -1277,7 +1277,9 @@ namespace build2 // enter: first token of the command line // leave: <newline> - command_expr expr {{expr_operator::log_and, {}}}; + command_expr expr; + expr.emplace_back (expr_term ()); + command c; // Command being assembled. // Make sure the command makes sense. @@ -1517,18 +1519,18 @@ namespace build2 switch (tt) { case type::in_pass: - case type::out_pass: rt = redirect_type::pass; break; + case type::out_pass: rt = redirect_type::pass; break; case type::in_null: - case type::out_null: rt = redirect_type::null; break; + case type::out_null: rt = redirect_type::null; break; - case type::out_merge: rt = redirect_type::merge; break; + case type::out_merge: rt = redirect_type::merge; break; case type::in_str: - case type::out_str: rt = redirect_type::here_string; break; + case type::out_str: rt = redirect_type::here_str_literal; break; case type::in_doc: - case type::out_doc: rt = redirect_type::here_document; break; + case type::out_doc: rt = redirect_type::here_doc_literal; break; case type::in_file: case type::out_file: rt = redirect_type::file; break; @@ -1551,7 +1553,7 @@ namespace build2 case 2: p = pending::err_merge; break; } break; - case redirect_type::here_string: + case redirect_type::here_str_literal: switch (fd) { case 0: p = pending::in_string; break; @@ -1559,7 +1561,7 @@ namespace build2 case 2: p = pending::err_string; break; } break; - case redirect_type::here_document: + case redirect_type::here_doc_literal: switch (fd) { case 0: p = pending::in_document; break; @@ -1567,6 +1569,10 @@ namespace build2 case 2: p = pending::err_document; break; } break; + + case redirect_type::here_str_regex: // @@ REGEX + case redirect_type::here_doc_regex: assert (false); break; + case redirect_type::file: switch (fd) { @@ -2121,8 +2127,8 @@ namespace build2 command& c (p.first[h.expr].pipe[h.pipe]); redirect& r (h.fd == 0 ? c.in : h.fd == 1 ? c.out : c.err); - r.doc.doc = move (v); - r.doc.end = move (h.end); + r.str = move (v); + r.end = move (h.end); } expire_mode (); diff --git a/build2/test/script/regex b/build2/test/script/regex index c6e711c..cfc6031 100644 --- a/build2/test/script/regex +++ b/build2/test/script/regex @@ -5,7 +5,11 @@ #ifndef BUILD2_TEST_SCRIPT_REGEX #define BUILD2_TEST_SCRIPT_REGEX +#include <list> #include <regex> +#include <locale> +#include <type_traits> // make_unsigned, is_unsigned +#include <unordered_set> #include <build2/types> #include <build2/utility> @@ -16,59 +20,592 @@ namespace build2 { namespace script { - // @@ How can we get the position the first line_char in the output that - // did not match, for diagnostics? Failed that it will be very hard - // to debug match failures. - // - - using char_string = std::basic_string<char>; - using char_regex = std::basic_regex<char>; - - // Newlines are line separators and are not part of the line: - // - // line<newline>line<newline> - // - // Specifically, this means that a customary trailing newline creates a - // trailing blank line. - // - // Special characters should only be compared to special. All others - // can inter-compare (though there cannot be regex characters in the - // output, only in line_regex). - // - enum class line_type + namespace regex { - blank, - special, - literal, - regex - }; + using char_string = std::basic_string<char>; + using char_regex = std::basic_regex<char>; - struct line_char - { - line_type type; + // Newlines are line separators and are not part of the line: + // + // line<newline>line<newline> + // + // Specifically, this means that a customary trailing newline creates a + // trailing blank line. + // + // All characters can inter-compare (though there cannot be regex + // characters in the output, only in line_regex). + // + // Note that we assume that line_regex and the input to regex_match() + // use the same pool. + // + struct line_pool + { + // Note that we assume the pool can be moved without invalidating + // pointers to any already pooled entities. + // + std::unordered_set<char_string> strings; + std::list<char_regex> regexes; + }; + + enum class line_type + { + special, + literal, + regex + }; + + struct line_char + { + line_type type; + + union + { + int special; // 0 (nul character) + // -1 (EOF) + // [()|.*+?{\}0123456789,=!] (excluding []) + + const char_string* literal; + const char_regex* regex; + }; + + static const line_char nul; + static const line_char eof; + + // Note: creates an uninitialized value. + // + line_char () = default; + + // Create a special character. + // + // Note that the constructor is implicit to allow basic_regex to + // implicitly construct line_chars from special char literals (in + // particular libstdc++ appends them to an internal line_string). + // + // Also note that we extend the valid characters set (see above) with + // 'p', 'n' (used by libstdc++ for positive/negative look-ahead + // tokens representation), and '\n', '\r', u'\u2028', u'\u2029' (used + // by libstdc++ for newline/newparagraph matching). + // + line_char (int); + + // Create a literal character. + // + // Don't copy string if already pooled. + // + explicit + line_char (const char_string&, line_pool&); + + explicit + line_char (char_string&&, line_pool&); + + explicit + line_char (const char_string* s) // Assume already pooled. + : type (line_type::literal), literal (s) {} + + // Create a regex character. + // + explicit + line_char (char_regex, line_pool&); + + explicit + line_char (const char_regex* r) // Assume already pooled. + : type (line_type::regex), regex (r) {} + + // Provide basic_regex with the ability to use line_char in a context + // where a char value is expected (e.g., as a function argument). + // + // libstdc++ seems to cast special line_chars only (and such a + // conversion is meanigfull). + // + // msvcrt casts line_chars of arbitrary types instead. The only + // reasonable strategy is to return a value that differs from any + // other that can be encountered in a regex expression and so will + // unlikelly be misinterpreted. + // + operator char () const + { + return type == line_type::special ? special : '\a'; // BELL. + } - union - { // Uninitialized if type is blank. - char special; // [()|*+?{\}0123456789,=!] (excluding []). - char_string literal; - char_regex regex; + // Provide basic_regex (such as from msvcrt) with the ability to + // explicitly cast line_chars to implementation-specific enums. + // + template <typename T> + explicit + operator T () const + { + assert (type == line_type::special); + return static_cast<T> (special); + } }; - }; - // Note: line_string is not NUL-terminated. - // - using line_string = vector<line_char>; + // Perform "deep" characters comparison (for example match literal + // character with a regex character), rather than just compare them + // literally. At least one argument must be of a type other than regex + // as there is no operator==() defined to compare regexes. Characters + // of the literal type must share the same pool (strings are compared + // by pointers not by values). + // + bool + operator== (const line_char&, const line_char&); + + // Return false if arguments are equal (operator==() returns true). + // Otherwise if types are different return the value implying that + // special < literal < regex. If types are special or literal return + // the result of the respective characters or strings comparison. At + // least one argument must be of a type other than regex as there is no + // operator<() defined to compare regexes. + // + // While not very natural operation for the class we have, we have to + // provide some meaningfull semantics for such a comparison as it is + // required by the char_traits<line_char> specialization. While we + // could provide it right in that specialization, let's keep it here + // for basic_regex implementations that potentially can compare + // line_chars as they compare them with expressions of other types (see + // below). + // + bool + operator< (const line_char&, const line_char&); + + inline bool + operator!= (const line_char& l, const line_char& r) + { + return !(l == r); + } + + inline bool + operator<= (const line_char& l, const line_char& r) + { + return l < r || l == r; + } + + // Provide basic_regex (such as from msvcrt) with the ability to + // compare line_char to a value of an integral or + // implementation-specific enum type. In the absense of the following + // template operators, such a comparisons would be ambigious for + // integral types (given that there are implicit conversions + // int->line_char and line_char->char) and impossible for enums. + // + // Note that these == and < operators can succeed only for a line_char + // of the special type. For other types they always return false. That + // in particular leads to the following case: + // + // (lc != c) != (lc < c || c < lc). + // + // Note that we can not assert line_char is of the special type as + // basic_regex (such as from libc++) may need the ability to check if + // arbitrary line_char belongs to some special characters range (like + // ['0', '9']). + // + template <typename T> + struct line_char_cmp + : public std::enable_if<std::is_integral<T>::value || + std::is_enum<T>::value> + { + }; + + template <typename T, typename = typename line_char_cmp<T>::type> + bool + operator== (const line_char& l, const T& r) + { + return + l.type == line_type::special && static_cast<T> (l.special) == r; + } + + template <typename T, typename = typename line_char_cmp<T>::type> + bool + operator== (const T& l, const line_char& r) + { + return + r.type == line_type::special && static_cast<T> (r.special) == l; + } + + template <typename T, typename = typename line_char_cmp<T>::type> + bool + operator!= (const line_char& l, const T& r) + { + return !(l == r); + } + + template <typename T, typename = typename line_char_cmp<T>::type> + bool + operator!= (const T& l, const line_char& r) + { + return !(l == r); + } + + template <typename T, typename = typename line_char_cmp<T>::type> + bool + operator< (const line_char& l, const T& r) + { + return + l.type == line_type::special && static_cast<T> (l.special) < r; + } + + template <typename T, typename = typename line_char_cmp<T>::type> + bool + operator< (const T& l, const line_char& r) + { + return + r.type == line_type::special && l < static_cast<T> (r.special); + } + + template <typename T, typename = typename line_char_cmp<T>::type> + inline bool + operator<= (const line_char& l, const T& r) + { + return l < r || l == r; + } + + template <typename T, typename = typename line_char_cmp<T>::type> + inline bool + operator<= (const T& l, const line_char& r) + { + return l < r || l == r; + } + + using line_string = std::basic_string<line_char>; + + // Locale that has ctype<line_char> facet installed. Used in the + // regex_traits<line_char> specialization (see below). + // + class line_char_locale: public std::locale + { + public: + // Create a copy of the global C++ locale. + // + line_char_locale (); + }; + } + } + } +} + +// Standard template specializations for line_char that are required for the +// basic_regex<line_char> instantiation. +// +namespace std +{ + template <> + class char_traits<build2::test::script::regex::line_char> + { + public: + using char_type = build2::test::script::regex::line_char; + using int_type = char_type; + using off_type = char_traits<char>::off_type; + using pos_type = char_traits<char>::pos_type; + using state_type = char_traits<char>::state_type; + + static void + assign (char_type& c1, const char_type& c2) {c1 = c2;} + + static char_type* + assign (char_type*, size_t, char_type); + + // Note that eq() and lt() are not constexpr (as required by C++11) + // because == and < operators for char_type are not constexpr. + // + static bool + eq (const char_type& l, const char_type& r) {return l == r;} + + static bool + lt (const char_type& l, const char_type& r) {return l < r;} + + static char_type* + move (char_type*, const char_type*, size_t); + + static char_type* + copy (char_type*, const char_type*, size_t); + + static int + compare (const char_type*, const char_type*, size_t); + + static size_t + length (const char_type*); + + static const char_type* + find (const char_type*, size_t, const char_type&); + + static constexpr char_type + to_char_type (const int_type& c) {return c;} + + static constexpr int_type + to_int_type (const char_type& c) {return int_type (c);} + + // Note that the following functions are not constexpr (as required by + // C++11) because their return expressions are not constexpr. + // + static bool + eq_int_type (const int_type& l, const int_type& r) {return l == r;} + + static int_type eof () {return char_type::eof;} + + static int_type + not_eof (const int_type& c) + { + return c != char_type::eof ? c : char_type::nul; + } + }; + + // ctype<> must be derived from both ctype_base and locale::facet (the later + // supports ref-counting used by the std::locale implementation internally). + // + // msvcrt for some reason also derives ctype_base from locale::facet which + // produces "already a base-class" warning and effectivelly breaks the + // reference counting. So we derive from ctype_base only in this case. + // + template <> + class ctype<build2::test::script::regex::line_char>: public ctype_base +#if !defined(_MSC_VER) || _MSC_VER > 1910 + , public locale::facet +#endif + { + // Used by the implementation only. + // + using line_type = build2::test::script::regex::line_type; + + public: + using char_type = build2::test::script::regex::line_char; + + static locale::id id; - class line_regex: public std::basic_regex<line_char> +#if !defined(_MSC_VER) || _MSC_VER > 1910 + explicit + ctype (size_t refs = 0): locale::facet (refs) {} +#else + explicit + ctype (size_t refs = 0): ctype_base (refs) {} +#endif + + // While unnecessary, let's keep for completeness. + // + virtual + ~ctype () override = default; + + // The C++ standard requires the following functions to call their virtual + // (protected) do_*() counterparts that provide the real implementations. + // The only purpose for this indirection is to provide a user with the + // ability to customize existing (standard) ctype facets. As we do not + // provide such an ability, for simplicity we will omit the do_*() + // functions and provide the implementations directly. This should be safe + // as nobody except us could call those protected functions. + // + bool + is (mask m, char_type c) const + { + return m == (c.type == line_type::special && build2::digit (c.special) + ? digit + : 0); + } + + const char_type* + is (const char_type*, const char_type*, mask*) const; + + const char_type* + scan_is (mask, const char_type*, const char_type*) const; + + const char_type* + scan_not (mask, const char_type*, const char_type*) const; + + char_type + toupper (char_type c) const {return c;} + + const char_type* + toupper (char_type*, const char_type* e) const {return e;} + + char_type + tolower (char_type c) const {return c;} + + const char_type* + tolower (char_type*, const char_type* e) const {return e;} + + char_type + widen (char c) const {return char_type (c);} + + const char* + widen (const char*, const char*, char_type*) const; + + char + narrow (char_type c, char def) const + { + return c.type == line_type::special ? c.special : def; + } + + const char_type* + narrow (const char_type*, const char_type*, char, char*) const; + }; + + // Note: the current application locale must be the POSIX one. Otherwise the + // behavior is undefined. + // + template <> + class regex_traits<build2::test::script::regex::line_char> + { + public: + using char_type = build2::test::script::regex::line_char; + using string_type = build2::test::script::regex::line_string; + using locale_type = build2::test::script::regex::line_char_locale; + using char_class_type = regex_traits<char>::char_class_type; + + // Workaround for msvcrt bugs. For some reason it assumes such a members + // to be present in a regex_traits specialization. + // +#if defined(_MSC_VER) && _MSC_VER <= 1910 + static const ctype_base::mask _Ch_upper = ctype_base::upper; + static const ctype_base::mask _Ch_alpha = ctype_base::alpha; + + // Unsigned char_type. msvcrt statically asserts the _Uelem type is + // unsigned, so we specialize is_unsigned<line_char> as well (see below). + // + using _Uelem = char_type; +#endif + + regex_traits () = default; // Unnecessary but let's keep for completeness. + + static size_t + length (const char_type* p) {return string_type::traits_type::length (p);} + + char_type + translate (char_type c) const {return c;} + + // Case-insensitive matching is not supported by line_regex. So there is no + // reason for the function to be called. + // + char_type + translate_nocase (char_type c) const {assert (false); return c;} + + // Return a sort-key - the exact copy of [b, e). + // + template <typename I> + string_type + transform (I b, I e) const {return string_type (b, e);} + + // Return a case-insensitive sort-key. Case-insensitive matching is not + // supported by line_regex. So there is no reason for the function to be + // called. + // + template <typename I> + string_type + transform_primary (I b, I e) const + { + assert (false); + return string_type (b, e); + } + + // POSIX regex grammar and collating elements (e.g., [.tilde.]) in + // particular are not supported. So there is no reason for the function to + // be called. + // + template <typename I> + string_type + lookup_collatename (I, I) const {assert (false); return string_type ();} + + // Character classes (e.g., [:lower:]) are not supported. So there is no + // reason for the function to be called. + // + template <typename I> + char_class_type + lookup_classname (I, I, bool = false) const + { + assert (false); + return char_class_type (); + } + + // Return false as we don't support character classes (e.g., [:lower:]). + // + bool + isctype (char_type, char_class_type) const {return false;} + + int + value (char_type, int) const; + + // Return the locale passed as an argument as we do not expect anything + // other than POSIX locale, that we also assume to be imbued by default. + // + locale_type + imbue (locale_type l) {return l;} + + locale_type + getloc () const {return locale_type ();} + }; + + // We assume line_char to be an unsigned type and express that with the + // following specializations used by basic_regex implementations. + // + // libstdc++ defines unsigned CharT type (regex_traits template parameter) + // to use as an index in some internal cache regardless if the cache is used + // for this specialization (and the cache is used only if CharT is char). + // + template <> + struct make_unsigned<build2::test::script::regex::line_char> + { + using type = build2::test::script::regex::line_char; + }; + + // msvcrt assumes regex_traits<line_char>::_Uelem to be present (see above) + // and statically asserts it is unsigned. + // + template <> + struct is_unsigned<build2::test::script::regex::line_char> + { + static const bool value = true; + }; + + // When used with libc++ the linker complains that it can't find + // __match_any_but_newline<line_char>::__exec() function. The problem is + // that the function is only specialized for char and wchar_t. As line_char + // has no notion of the newline character we specialize the class template + // to behave as the __match_any<line_char> instantiation does (that luckily + // has all the functions in place). + // +#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION <= 4000 + template <> + class __match_any_but_newline<build2::test::script::regex::line_char> + : public __match_any<build2::test::script::regex::line_char> + { + public: + using base = __match_any<build2::test::script::regex::line_char>; + using base::base; + }; +#endif +} + +namespace build2 +{ + namespace test + { + namespace script + { + namespace regex { - public: - using base_type = std::basic_regex<line_char>; + class line_regex: public std::basic_regex<line_char> + { + public: + using base_type = std::basic_regex<line_char>; + + using base_type::base_type; + + line_regex () = default; - using base_type::base_type; + // Move constuctible-only type. + // + line_regex (line_regex&&) = default; + line_regex (const line_regex&) = delete; - explicit - line_regex (const line_string&); - }; + // Move string regex together with the pool used to create it. + // + line_regex (line_string&& s, line_pool&& p) + // No move-string ctor for base_type, so emulate it. + // + : base_type (s), pool (move (p)) {s.clear ();} + + line_regex& operator= (line_regex&&) = delete; + line_regex& operator= (const line_regex&) = delete; + + public: + line_pool pool; + }; + } } } } diff --git a/build2/test/script/regex.cxx b/build2/test/script/regex.cxx new file mode 100644 index 0000000..c6fba75 --- /dev/null +++ b/build2/test/script/regex.cxx @@ -0,0 +1,290 @@ +// file : build2/test/script/regex.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <build2/test/script/regex> + +#include <algorithm> // copy(), copy_backward() + +using namespace std; + +namespace build2 +{ + namespace test + { + namespace script + { + namespace regex + { + const line_char line_char::nul (0); + const line_char line_char::eof (-1); + + // line_char + // + line_char:: + line_char (int c) + : type (line_type::special), special (c) + { + // @@ How can we allow anything for basic_regex but only subset + // for our own code? + // + const char sp[] = "()|.*+?{\\}0123456789,=!"; + const char ex[] = "pn\n\r"; + + assert (c == 0 || // Null character. + + // EOF. Note that is also passed by msvcrt as _Meta_eos + // enum value. + // + c == -1 || + + // libstdc++ line/paragraph separators. + // + c == u'\u2028' || c == u'\u2029' || + + (c > 0 && c <= 255 && ( + // Supported regex special characters. + // + string::traits_type::find (sp, 23, c) != nullptr || + + // libstdc++ look-ahead tokens, newline chars. + // + string::traits_type::find (ex, 4, c) != nullptr))); + } + + line_char:: + line_char (const char_string& s, line_pool& p) + : line_char (&(*p.strings.emplace (s).first)) + { + } + + line_char:: + line_char (char_string&& s, line_pool& p) + : line_char (&(*p.strings.emplace (move (s)).first)) + { + } + + line_char:: + line_char (char_regex r, line_pool& p) + // Note: in C++17 can write as p.regexes.emplace_front(move (r)) + // + : line_char (&(*p.regexes.emplace (p.regexes.begin (), move (r)))) + { + } + + bool + operator== (const line_char& l, const line_char& r) + { + if (l.type == r.type) + { + bool res (true); + + switch (l.type) + { + case line_type::special: res = l.special == r.special; break; + case line_type::regex: assert (false); break; + + // Note that we use pointers (rather than vales) comparison + // assuming that the strings must belong to the same pool. + // + case line_type::literal: res = l.literal == r.literal; break; + } + + return res; + } + + // Match literal with regex. + // + if (l.type == line_type::literal && r.type == line_type::regex) + return regex_match (*l.literal, *r.regex); + else if (r.type == line_type::literal && l.type == line_type::regex) + return regex_match (*r.literal, *l.regex); + + return false; + } + + bool + operator< (const line_char& l, const line_char& r) + { + if (l == r) + return false; + + if (l.type != r.type) + return l.type < r.type; + + bool res (false); + + switch (l.type) + { + case line_type::special: res = l.special < r.special; break; + case line_type::literal: res = *l.literal < *r.literal; break; + case line_type::regex: assert (false); break; + } + + return res; + } + + // line_char_locale + // + line_char_locale:: + line_char_locale () + : locale (locale (), + new std::ctype<line_char> ()) // Hidden by ctype bitmask. + { + } + } + } + } +} + +namespace std +{ + using namespace build2::test::script::regex; + + // char_traits<line_char> + // + line_char* char_traits<line_char>:: + assign (char_type* s, size_t n, char_type c) + { + for (size_t i (0); i != n; ++i) + s[i] = c; + return s; + } + + line_char* char_traits<line_char>:: + move (char_type* d, const char_type* s, size_t n) + { + if (n > 0 && d != s) + { + // If d < s then it can't be in [s, s + n) range and so using copy() is + // safe. Otherwise d + n is out of (first, last] range and so using + // copy_backward() is safe. + // + if (d < s) + std::copy (s, s + n, d); // Hidden by char_traits<line_char>::copy(). + else + copy_backward (s, s + n, d + n); + } + + return d; + } + + line_char* char_traits<line_char>:: + copy (char_type* d, const char_type* s, size_t n) + { + std::copy (s, s + n, d); // Hidden by char_traits<line_char>::copy(). + return d; + } + + int char_traits<line_char>:: + compare (const char_type* s1, const char_type* s2, size_t n) + { + for (size_t i (0); i != n; ++i) + { + if (s1[i] < s2[i]) + return -1; + else if (s2[i] < s1[i]) + return 1; + } + + return 0; + } + + size_t char_traits<line_char>:: + length (const char_type* s) + { + size_t i (0); + while (s[i] != char_type::nul) + ++i; + + return i; + } + + const line_char* char_traits<line_char>:: + find (const char_type* s, size_t n, const char_type& c) + { + for (size_t i (0); i != n; ++i) + { + if (s[i] == c) + return s + i; + } + + return nullptr; + } + + // ctype<line_char> + // + locale::id ctype<line_char>::id; + + const line_char* ctype<line_char>:: + is (const char_type* b, const char_type* e, mask* m) const + { + while (b != e) + { + const char_type& c (*b++); + + *m++ = c.type == line_type::special && build2::digit (c.special) + ? digit + : 0; + } + + return e; + } + + const line_char* ctype<line_char>:: + scan_is (mask m, const char_type* b, const char_type* e) const + { + for (; b != e; ++b) + { + if (is (m, *b)) + return b; + } + + return e; + } + + const line_char* ctype<line_char>:: + scan_not (mask m, const char_type* b, const char_type* e) const + { + for (; b != e; ++b) + { + if (!is (m, *b)) + return b; + } + + return e; + } + + const char* ctype<line_char>:: + widen (const char* b, const char* e, char_type* c) const + { + while (b != e) + *c++ = widen (*b++); + + return e; + } + + const line_char* ctype<line_char>:: + narrow (const char_type* b, const char_type* e, char def, char* c) const + { + while (b != e) + *c++ = narrow (*b++, def); + + return e; + } + + // regex_traits<line_char> + // + int regex_traits<line_char>:: + value (char_type c, int radix) const + { + assert (radix == 8 || radix == 10 || radix == 16); + + if (c.type != line_type::special) + return -1; + + const char digits[] = "0123456789ABCDEF"; + const char* d (string::traits_type::find (digits, radix, c.special)); + return d != nullptr ? d - digits : -1; + } +} diff --git a/build2/test/script/runner.cxx b/build2/test/script/runner.cxx index 67d82c0..05b3b5c 100644 --- a/build2/test/script/runner.cxx +++ b/build2/test/script/runner.cxx @@ -79,8 +79,8 @@ namespace build2 input_info (d); } } - else if (rd.type == redirect_type::here_string || - rd.type == redirect_type::here_document) + else if (rd.type == redirect_type::here_str_literal || + rd.type == redirect_type::here_doc_literal) { assert (!op.empty ()); @@ -90,11 +90,7 @@ namespace build2 { ofdstream os (orp); sp.clean ({cleanup_type::always, orp}, true); - - os << (rd.type == redirect_type::here_string - ? rd.str - : rd.doc.doc); - + os << rd.str; os.close (); } catch (const io_error& e) @@ -441,8 +437,8 @@ namespace build2 break; } - case redirect_type::here_string: - case redirect_type::here_document: + case redirect_type::here_str_literal: + case redirect_type::here_doc_literal: { // We could write to the command stdin directly but instead will // cache the data for potential troubleshooting. @@ -453,11 +449,7 @@ namespace build2 { ofdstream os (isp); sp.clean ({cleanup_type::always, isp}, true); - - os << (c.in.type == redirect_type::here_string - ? c.in.str - : c.in.doc.doc); - + os << c.in.str; os.close (); } catch (const io_error& e) @@ -470,6 +462,8 @@ namespace build2 } case redirect_type::merge: assert (false); break; + case redirect_type::here_str_regex: // @@ REGEX + case redirect_type::here_doc_regex: assert (false); break; } // Dealing with stdout and stderr redirect types other than 'null' @@ -553,13 +547,15 @@ namespace build2 } case redirect_type::none: - case redirect_type::here_string: - case redirect_type::here_document: + case redirect_type::here_str_literal: + case redirect_type::here_doc_literal: { p = std_path (what); m |= fdopen_mode::truncate; break; } + case redirect_type::here_str_regex: // @@ REGEX + case redirect_type::here_doc_regex: assert (false); break; } try diff --git a/build2/test/script/script b/build2/test/script/script index 714e2c5..bb202fa 100644 --- a/build2/test/script/script +++ b/build2/test/script/script @@ -15,6 +15,7 @@ #include <build2/test/target> #include <build2/test/script/token> // replay_tokens +#include <build2/test/script/regex> namespace build2 { @@ -70,8 +71,10 @@ namespace build2 pass, null, merge, - here_string, - here_document, + here_str_literal, + here_doc_literal, + here_str_regex, + here_doc_regex, file }; @@ -79,10 +82,10 @@ namespace build2 { redirect_type type; - struct doc_type + struct regex_type { - string doc; // Note: includes trailing newline, if required. - string end; + regex::line_regex regex; + string str; // String representation for printing. }; struct file_type @@ -94,19 +97,21 @@ namespace build2 union { - int fd; // Merge-to descriptor. - string str; // Note: includes trailing newline, if required. - doc_type doc; - file_type file; + int fd; // Merge-to descriptor. + string str; // Note: includes trailing newline, if requested. + regex_type regex; // Note: includes trailing blank, if requested. + file_type file; }; + string end; // Here-document end marker for printing. + explicit redirect (redirect_type = redirect_type::none); + // Move constuctible/assignable-only type. + // redirect (redirect&&); - redirect (const redirect&); redirect& operator= (redirect&&); - redirect& operator= (const redirect&); ~redirect (); }; diff --git a/build2/test/script/script.cxx b/build2/test/script/script.cxx index 17eacaa..7941df6 100644 --- a/build2/test/script/script.cxx +++ b/build2/test/script/script.cxx @@ -84,7 +84,7 @@ namespace build2 case redirect_type::null: o << '-'; break; case redirect_type::merge: o << '&' << r.fd; break; - case redirect_type::here_string: + case redirect_type::here_str_literal: { const string& v (r.str); bool nl (!v.empty () && v.back () == '\n'); @@ -95,16 +95,16 @@ namespace build2 to_stream_q (o, nl ? string (v, 0, v.size () - 1) : v); break; } - case redirect_type::here_document: + case redirect_type::here_doc_literal: { - const string& v (r.doc.doc); + const string& v (r.str); bool nl (!v.empty () && v.back () == '\n'); // Add another '>' or '<'. Note that here end marker never // needs to be quoted. // o << d << (nl ? "" : ":"); - to_stream_q (o, r.doc.end); + to_stream_q (o, r.end); break; } case redirect_type::file: @@ -115,14 +115,16 @@ namespace build2 print_path (r.file.path); break; } + case redirect_type::here_str_regex: // @@ REGEX + case redirect_type::here_doc_regex: assert (false); break; } }; auto print_doc = [&o] (const redirect& r) { - const string& v (r.doc.doc); + const string& v (r.str); bool nl (!v.empty () && v.back () == '\n'); - o << endl << v << (nl ? "" : "\n") << r.doc.end; + o << endl << v << (nl ? "" : "\n") << r.end; }; if ((m & command_to_stream::header) == command_to_stream::header) @@ -171,9 +173,9 @@ namespace build2 { // Here-documents. // - if (c.in.type == redirect_type::here_document) print_doc (c.in); - if (c.out.type == redirect_type::here_document) print_doc (c.out); - if (c.err.type == redirect_type::here_document) print_doc (c.err); + if (c.in.type == redirect_type::here_doc_literal) print_doc (c.in); + if (c.out.type == redirect_type::here_doc_literal) print_doc (c.out); + if (c.err.type == redirect_type::here_doc_literal) print_doc (c.err); } } @@ -238,15 +240,19 @@ namespace build2 case redirect_type::null: case redirect_type::merge: break; - case redirect_type::here_string: new (&str) string (); break; - case redirect_type::here_document: new (&doc) doc_type (); break; - case redirect_type::file: new (&file) file_type (); break; + case redirect_type::here_str_literal: + case redirect_type::here_doc_literal: new (&str) string (); break; + + case redirect_type::here_str_regex: + case redirect_type::here_doc_regex: new (®ex) regex_type (); break; + + case redirect_type::file: new (&file) file_type (); break; } } redirect:: redirect (redirect&& r) - : type (r.type) + : type (r.type), end (move (r.end)) { switch (type) { @@ -256,14 +262,16 @@ namespace build2 case redirect_type::merge: fd = r.fd; break; - case redirect_type::here_string: + case redirect_type::here_str_literal: + case redirect_type::here_doc_literal: { new (&str) string (move (r.str)); break; } - case redirect_type::here_document: + case redirect_type::here_str_regex: + case redirect_type::here_doc_regex: { - new (&doc) doc_type (move (r.doc)); + new (®ex) regex_type (move (r.regex)); break; } case redirect_type::file: @@ -275,28 +283,6 @@ namespace build2 } redirect:: - redirect (const redirect& r) - : type (r.type) - { - switch (type) - { - case redirect_type::none: - case redirect_type::pass: - case redirect_type::null: break; - - case redirect_type::merge: fd = r.fd; break; - - case redirect_type::here_string: new (&str) string (r.str); break; - case redirect_type::here_document: new (&doc) doc_type (r.doc); break; - case redirect_type::file: - { - new (&file) file_type (r.file); - break; - } - } - } - - redirect:: ~redirect () { switch (type) @@ -306,9 +292,13 @@ namespace build2 case redirect_type::null: case redirect_type::merge: break; - case redirect_type::here_string: str.~string (); break; - case redirect_type::here_document: doc.~doc_type (); break; - case redirect_type::file: file.~file_type (); break; + case redirect_type::here_str_literal: + case redirect_type::here_doc_literal: str.~string (); break; + + case redirect_type::here_str_regex: + case redirect_type::here_doc_regex: regex.~regex_type (); break; + + case redirect_type::file: file.~file_type (); break; } } @@ -323,14 +313,6 @@ namespace build2 return *this; } - redirect& redirect:: - operator= (const redirect& r) - { - if (this != &r) - *this = redirect (r); // Reduce to move-assignment. - return *this; - } - // scope // scope:: diff --git a/unit-tests/test/script/buildfile b/unit-tests/test/script/buildfile index 6b303e1..c5306b1 100644 --- a/unit-tests/test/script/buildfile +++ b/unit-tests/test/script/buildfile @@ -2,6 +2,6 @@ # copyright : Copyright (c) 2014-2016 Code Synthesis Ltd # license : MIT; see accompanying LICENSE file -d = lexer/ parser/ +d = lexer/ parser/ regex/ ./: $d include $d diff --git a/unit-tests/test/script/parser/buildfile b/unit-tests/test/script/parser/buildfile index ad32494..db75a4f 100644 --- a/unit-tests/test/script/parser/buildfile +++ b/unit-tests/test/script/parser/buildfile @@ -11,7 +11,7 @@ src = token lexer parser diagnostics utility variable name context target \ scope prerequisite file module operation rule b-options algorithm search \ filesystem function functions-builtin functions-path functions-process-path \ functions-string config/{utility init operation} dump types-parsers \ -test/{target script/{token lexer parser script}} \ +test/{target script/{token lexer parser regex script}} \ scheduler exe{driver}: cxx{driver} ../../../../build2/cxx{$src} $libs \ diff --git a/unit-tests/test/script/regex/buildfile b/unit-tests/test/script/regex/buildfile new file mode 100644 index 0000000..26c759a --- /dev/null +++ b/unit-tests/test/script/regex/buildfile @@ -0,0 +1,12 @@ +# file : unit-tests/test/script/regex/buildfile +# copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +#@@ Temporary until we get utility library support. +# +import libs = libbutl%lib{butl} +src = test/script/regex + +exe{driver}: cxx{driver} ../../../../build2/cxx{$src} $libs + +include ../../../../build2/ diff --git a/unit-tests/test/script/regex/driver.cxx b/unit-tests/test/script/regex/driver.cxx new file mode 100644 index 0000000..ca09048 --- /dev/null +++ b/unit-tests/test/script/regex/driver.cxx @@ -0,0 +1,252 @@ +// file : unit-tests/test/script/regex/driver.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <regex> +#include <type_traits> // is_pod, is_array + +#include <build2/test/script/regex> + +using namespace std; +using namespace build2::test::script::regex; + +int +main () +{ + using lc = line_char; + using ls = line_string; + using lr = line_regex; + + // Test line_char. + // + { + static_assert (is_pod<lc>::value && !is_array<lc>::value, + "line_char must be char-like"); + + // Zero-initialed line_char should be the null-char as required by + // char_traits<>::length() specification. + // + assert (lc () == lc::nul); + + line_pool p; + + assert (lc::eof == -1); + assert (lc::nul == 0); + + enum meta {mn = 'n', mp = 'p'}; + + // Special comparison. + // + assert (lc ('0') == lc ('0')); + assert (lc ('0') == '0'); + assert (lc ('n') == mn); + assert (mn == static_cast<meta> (lc ('n'))); + + assert (lc ('0') != lc ('1')); + assert (lc ('0') != '1'); + assert (lc ('n') != mp); + assert (lc ('0') != lc ("0", p)); + assert (lc ('0') != lc (regex ("0"), p)); + + assert (lc ('0') < lc ('1')); + assert (lc ('0') < '1'); + assert (lc ('1') < lc ("0", p)); + assert (lc ('n') < mp); + + assert (lc ('0') <= '1'); + assert (lc ('0') <= lc ('1')); + assert (lc ('n') <= mn); + assert (lc ('1') <= lc ("0", p)); + + // Literal comparison. + // + assert (lc ("a", p) == lc ("a", p)); + assert (lc ("a", p).literal == lc ("a", p).literal); + assert (char (lc ("a", p)) == '\a'); + + assert (lc ("a", p) != lc ("b", p)); + assert (!(lc ("a", p) != lc (regex ("a"), p))); // Matches. + assert (lc ("a", p) != lc (regex ("b"), p)); + + assert (lc ("a", p) < lc ("b", p)); + assert (!(lc ("a", p) < lc (regex ("a"), p))); // Matches. + + assert (lc ("a", p) <= lc ("b", p)); + assert (lc ("a", p) <= lc (regex ("a"), p)); + assert (lc ("a", p) < lc (regex ("c"), p)); + + // Regex comparison. + // + assert (lc ("a", p) == lc (regex ("a|b"), p)); + assert (lc (regex ("a|b"), p) == lc ("a", p)); + } + + // Test char_traits<line_char>. + // + { + using ct = char_traits<lc>; + using vc = vector<lc>; + + lc c; + ct::assign (c, '0'); + assert (c == ct::char_type ('0')); + + assert (ct::to_char_type (c) == c); + assert (ct::to_int_type (c) == c); + + assert (ct::eq_int_type (c, c)); + assert (!ct::eq_int_type (c, lc::eof)); + + assert (ct::eof () == lc::eof); + + assert (ct::not_eof (c) == c); + assert (ct::not_eof (lc::eof) != lc::eof); + + ct::assign (&c, 1, '1'); + assert (c == ct::int_type ('1')); + + assert (ct::eq (lc ('0'), lc ('0'))); + assert (ct::lt (lc ('0'), lc ('1'))); + + vc v1 ({'0', '1', '2'}); + vc v2 (3, lc::nul); + + assert (ct::find (v1.data (), 3, '1') == v1.data () + 1); + + ct::copy (v2.data (), v1.data (), 3); + assert (v2 == v1); + + v2.push_back (lc::nul); + assert (ct::length (v2.data ()) == 3); + + // Overlaping ranges. + // + ct::move (v1.data () + 1, v1.data (), 2); + assert (v1 == vc ({'0', '0', '1'})); + + v1 = vc ({'0', '1', '2'}); + ct::move (v1.data (), v1.data () + 1, 2); + assert (v1 == vc ({'1', '2', '2'})); + } + + // Test line_char_locale and ctype<line_char> (only non-trivial functions). + // + { + using ct = ctype<lc>; + + line_char_locale l; + assert (has_facet<ct> (l)); + + // It is better not to create q facet on stack as it is + // reference-countable. + // + const ct& t (use_facet<ct> (l)); + line_pool p; + + assert (t.is (ct::digit, '0')); + assert (!t.is (ct::digit, '?')); + assert (!t.is (ct::digit, lc ("0", p))); + + const lc chars[] = { '0', '?' }; + ct::mask m[2]; + + const lc* b (chars); + const lc* e (chars + 2); + + // Cast flag value to mask type and compare to mask. + // + auto fl = [] (ct::mask m, ct::mask f) {return m == f;}; + + t.is (b, e, m); + assert (fl (m[0], ct::digit) && fl (m[1], 0)); + + assert (t.scan_is (ct::digit, b, e) == b); + assert (t.scan_is (0, b, e) == b + 1); + + assert (t.scan_not (ct::digit, b, e) == b + 1); + assert (t.scan_not (0, b, e) == b); + + { + char nr[] = "0?"; + lc wd[2]; + t.widen (nr, nr + 2, wd); + assert (wd[0] == b[0] && wd[1] == b[1]); + } + + { + lc wd[] = {'0', lc ("a", p)}; + char nr[2]; + t.narrow (wd, wd + 2, '-', nr); + assert (nr[0] == '0' && nr[1] == '-'); + } + } + + // Test regex_traits<line_char>. Functions other that value() are trivial. + // + { + regex_traits<lc> t; + + const int radix[] = {8, 10}; // Radix 16 is not supported by line_char. + const char digits[] = "0123456789ABCDEF"; + + for (size_t r (0); r < 2; ++r) + { + for (int i (0); i < radix[r]; ++i) + assert (t.value (digits[i], radix[r]) == i); + } + } + + // Test line_regex construction. + // + { + line_pool p; + lr r1 ({lc ("foo", p), lc (regex ("ba(r|z)"), p)}, move (p)); + + lr r2 (move (r1)); + assert (regex_match (ls ({lc ("foo", r2.pool), lc ("bar", r2.pool)}), r2)); + assert (!regex_match (ls ({lc ("foo", r2.pool), lc ("ba", r2.pool)}), r2)); + } + + // Test line_regex match. + // + { + line_pool p; + + const lc foo ("foo", p); + const lc bar ("bar", p); + const lc baz ("baz", p); + const lc blank ("", p); + + assert (regex_match (ls ({foo, bar}), lr ({foo, bar}))); + assert (!regex_match (ls ({foo, baz}), lr ({foo, bar}))); + + assert (regex_match (ls ({bar, foo}), + lr ({'(', foo, '|', bar, ')', '+'}))); + + assert (regex_match (ls ({foo, foo}), lr ({'(', foo, ')', '\\', '1'}))); + + assert (regex_match (ls ({foo}), lr ({lc (regex ("fo+"), p)}))); + assert (regex_match (ls ({foo}), lr ({lc (regex (".*"), p)}))); + assert (regex_match (ls ({blank}), lr ({lc (regex (".*"), p)}))); + + assert (regex_match (ls ({blank, blank, foo}), + lr ({blank, '*', foo, blank, '*'}))); + + assert (regex_match (ls ({blank, blank, foo}), lr ({'.', '*'}))); + + assert (regex_match (ls ({blank, blank}), + lr ({blank, '*', foo, '?', blank, '*'}))); + + assert (regex_match (ls ({foo}), lr ({foo, '{', '1', '}'}))); + assert (regex_match (ls ({foo, foo}), lr ({foo, '{', '1', ',', '}'}))); + + assert (regex_match (ls ({foo, foo}), + lr ({foo, '{', '1', ',', '2', '}'}))); + + assert (!regex_match (ls ({foo, foo}), + lr ({foo, '{', '3', ',', '4', '}'}))); + + assert (regex_match (ls ({foo}), lr ({'(', '?', '=', foo, ')', foo}))); + assert (regex_match (ls ({foo}), lr ({'(', '?', '!', bar, ')', foo}))); + } +} |