aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--build2/buildfile1
-rw-r--r--build2/test/script/parser.cxx26
-rw-r--r--build2/test/script/regex625
-rw-r--r--build2/test/script/regex.cxx290
-rw-r--r--build2/test/script/runner.cxx28
-rw-r--r--build2/test/script/script27
-rw-r--r--build2/test/script/script.cxx80
-rw-r--r--unit-tests/test/script/buildfile2
-rw-r--r--unit-tests/test/script/parser/buildfile2
-rw-r--r--unit-tests/test/script/regex/buildfile12
-rw-r--r--unit-tests/test/script/regex/driver.cxx252
11 files changed, 1213 insertions, 132 deletions
diff --git a/build2/buildfile b/build2/buildfile
index f166ecd..37f0dc0 100644
--- a/build2/buildfile
+++ b/build2/buildfile
@@ -85,6 +85,7 @@ exe{b}: \
test/script/{hxx cxx}{ builtin } \
test/script/{hxx cxx}{ lexer } \
test/script/{hxx cxx}{ parser } \
+test/script/{hxx cxx}{ regex } \
test/script/{hxx cxx}{ runner } \
test/script/{hxx ixx cxx}{ script } \
test/script/{hxx cxx}{ token } \
diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx
index 50d4f67..da61c64 100644
--- a/build2/test/script/parser.cxx
+++ b/build2/test/script/parser.cxx
@@ -1277,7 +1277,9 @@ namespace build2
// enter: first token of the command line
// leave: <newline>
- command_expr expr {{expr_operator::log_and, {}}};
+ command_expr expr;
+ expr.emplace_back (expr_term ());
+
command c; // Command being assembled.
// Make sure the command makes sense.
@@ -1517,18 +1519,18 @@ namespace build2
switch (tt)
{
case type::in_pass:
- case type::out_pass: rt = redirect_type::pass; break;
+ case type::out_pass: rt = redirect_type::pass; break;
case type::in_null:
- case type::out_null: rt = redirect_type::null; break;
+ case type::out_null: rt = redirect_type::null; break;
- case type::out_merge: rt = redirect_type::merge; break;
+ case type::out_merge: rt = redirect_type::merge; break;
case type::in_str:
- case type::out_str: rt = redirect_type::here_string; break;
+ case type::out_str: rt = redirect_type::here_str_literal; break;
case type::in_doc:
- case type::out_doc: rt = redirect_type::here_document; break;
+ case type::out_doc: rt = redirect_type::here_doc_literal; break;
case type::in_file:
case type::out_file: rt = redirect_type::file; break;
@@ -1551,7 +1553,7 @@ namespace build2
case 2: p = pending::err_merge; break;
}
break;
- case redirect_type::here_string:
+ case redirect_type::here_str_literal:
switch (fd)
{
case 0: p = pending::in_string; break;
@@ -1559,7 +1561,7 @@ namespace build2
case 2: p = pending::err_string; break;
}
break;
- case redirect_type::here_document:
+ case redirect_type::here_doc_literal:
switch (fd)
{
case 0: p = pending::in_document; break;
@@ -1567,6 +1569,10 @@ namespace build2
case 2: p = pending::err_document; break;
}
break;
+
+ case redirect_type::here_str_regex: // @@ REGEX
+ case redirect_type::here_doc_regex: assert (false); break;
+
case redirect_type::file:
switch (fd)
{
@@ -2121,8 +2127,8 @@ namespace build2
command& c (p.first[h.expr].pipe[h.pipe]);
redirect& r (h.fd == 0 ? c.in : h.fd == 1 ? c.out : c.err);
- r.doc.doc = move (v);
- r.doc.end = move (h.end);
+ r.str = move (v);
+ r.end = move (h.end);
}
expire_mode ();
diff --git a/build2/test/script/regex b/build2/test/script/regex
index c6e711c..cfc6031 100644
--- a/build2/test/script/regex
+++ b/build2/test/script/regex
@@ -5,7 +5,11 @@
#ifndef BUILD2_TEST_SCRIPT_REGEX
#define BUILD2_TEST_SCRIPT_REGEX
+#include <list>
#include <regex>
+#include <locale>
+#include <type_traits> // make_unsigned, is_unsigned
+#include <unordered_set>
#include <build2/types>
#include <build2/utility>
@@ -16,59 +20,592 @@ namespace build2
{
namespace script
{
- // @@ How can we get the position the first line_char in the output that
- // did not match, for diagnostics? Failed that it will be very hard
- // to debug match failures.
- //
-
- using char_string = std::basic_string<char>;
- using char_regex = std::basic_regex<char>;
-
- // Newlines are line separators and are not part of the line:
- //
- // line<newline>line<newline>
- //
- // Specifically, this means that a customary trailing newline creates a
- // trailing blank line.
- //
- // Special characters should only be compared to special. All others
- // can inter-compare (though there cannot be regex characters in the
- // output, only in line_regex).
- //
- enum class line_type
+ namespace regex
{
- blank,
- special,
- literal,
- regex
- };
+ using char_string = std::basic_string<char>;
+ using char_regex = std::basic_regex<char>;
- struct line_char
- {
- line_type type;
+ // Newlines are line separators and are not part of the line:
+ //
+ // line<newline>line<newline>
+ //
+ // Specifically, this means that a customary trailing newline creates a
+ // trailing blank line.
+ //
+ // All characters can inter-compare (though there cannot be regex
+ // characters in the output, only in line_regex).
+ //
+ // Note that we assume that line_regex and the input to regex_match()
+ // use the same pool.
+ //
+ struct line_pool
+ {
+ // Note that we assume the pool can be moved without invalidating
+ // pointers to any already pooled entities.
+ //
+ std::unordered_set<char_string> strings;
+ std::list<char_regex> regexes;
+ };
+
+ enum class line_type
+ {
+ special,
+ literal,
+ regex
+ };
+
+ struct line_char
+ {
+ line_type type;
+
+ union
+ {
+ int special; // 0 (nul character)
+ // -1 (EOF)
+ // [()|.*+?{\}0123456789,=!] (excluding [])
+
+ const char_string* literal;
+ const char_regex* regex;
+ };
+
+ static const line_char nul;
+ static const line_char eof;
+
+ // Note: creates an uninitialized value.
+ //
+ line_char () = default;
+
+ // Create a special character.
+ //
+ // Note that the constructor is implicit to allow basic_regex to
+ // implicitly construct line_chars from special char literals (in
+ // particular libstdc++ appends them to an internal line_string).
+ //
+ // Also note that we extend the valid characters set (see above) with
+ // 'p', 'n' (used by libstdc++ for positive/negative look-ahead
+ // tokens representation), and '\n', '\r', u'\u2028', u'\u2029' (used
+ // by libstdc++ for newline/newparagraph matching).
+ //
+ line_char (int);
+
+ // Create a literal character.
+ //
+ // Don't copy string if already pooled.
+ //
+ explicit
+ line_char (const char_string&, line_pool&);
+
+ explicit
+ line_char (char_string&&, line_pool&);
+
+ explicit
+ line_char (const char_string* s) // Assume already pooled.
+ : type (line_type::literal), literal (s) {}
+
+ // Create a regex character.
+ //
+ explicit
+ line_char (char_regex, line_pool&);
+
+ explicit
+ line_char (const char_regex* r) // Assume already pooled.
+ : type (line_type::regex), regex (r) {}
+
+ // Provide basic_regex with the ability to use line_char in a context
+ // where a char value is expected (e.g., as a function argument).
+ //
+ // libstdc++ seems to cast special line_chars only (and such a
+ // conversion is meanigfull).
+ //
+ // msvcrt casts line_chars of arbitrary types instead. The only
+ // reasonable strategy is to return a value that differs from any
+ // other that can be encountered in a regex expression and so will
+ // unlikelly be misinterpreted.
+ //
+ operator char () const
+ {
+ return type == line_type::special ? special : '\a'; // BELL.
+ }
- union
- { // Uninitialized if type is blank.
- char special; // [()|*+?{\}0123456789,=!] (excluding []).
- char_string literal;
- char_regex regex;
+ // Provide basic_regex (such as from msvcrt) with the ability to
+ // explicitly cast line_chars to implementation-specific enums.
+ //
+ template <typename T>
+ explicit
+ operator T () const
+ {
+ assert (type == line_type::special);
+ return static_cast<T> (special);
+ }
};
- };
- // Note: line_string is not NUL-terminated.
- //
- using line_string = vector<line_char>;
+ // Perform "deep" characters comparison (for example match literal
+ // character with a regex character), rather than just compare them
+ // literally. At least one argument must be of a type other than regex
+ // as there is no operator==() defined to compare regexes. Characters
+ // of the literal type must share the same pool (strings are compared
+ // by pointers not by values).
+ //
+ bool
+ operator== (const line_char&, const line_char&);
+
+ // Return false if arguments are equal (operator==() returns true).
+ // Otherwise if types are different return the value implying that
+ // special < literal < regex. If types are special or literal return
+ // the result of the respective characters or strings comparison. At
+ // least one argument must be of a type other than regex as there is no
+ // operator<() defined to compare regexes.
+ //
+ // While not very natural operation for the class we have, we have to
+ // provide some meaningfull semantics for such a comparison as it is
+ // required by the char_traits<line_char> specialization. While we
+ // could provide it right in that specialization, let's keep it here
+ // for basic_regex implementations that potentially can compare
+ // line_chars as they compare them with expressions of other types (see
+ // below).
+ //
+ bool
+ operator< (const line_char&, const line_char&);
+
+ inline bool
+ operator!= (const line_char& l, const line_char& r)
+ {
+ return !(l == r);
+ }
+
+ inline bool
+ operator<= (const line_char& l, const line_char& r)
+ {
+ return l < r || l == r;
+ }
+
+ // Provide basic_regex (such as from msvcrt) with the ability to
+ // compare line_char to a value of an integral or
+ // implementation-specific enum type. In the absense of the following
+ // template operators, such a comparisons would be ambigious for
+ // integral types (given that there are implicit conversions
+ // int->line_char and line_char->char) and impossible for enums.
+ //
+ // Note that these == and < operators can succeed only for a line_char
+ // of the special type. For other types they always return false. That
+ // in particular leads to the following case:
+ //
+ // (lc != c) != (lc < c || c < lc).
+ //
+ // Note that we can not assert line_char is of the special type as
+ // basic_regex (such as from libc++) may need the ability to check if
+ // arbitrary line_char belongs to some special characters range (like
+ // ['0', '9']).
+ //
+ template <typename T>
+ struct line_char_cmp
+ : public std::enable_if<std::is_integral<T>::value ||
+ std::is_enum<T>::value>
+ {
+ };
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ bool
+ operator== (const line_char& l, const T& r)
+ {
+ return
+ l.type == line_type::special && static_cast<T> (l.special) == r;
+ }
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ bool
+ operator== (const T& l, const line_char& r)
+ {
+ return
+ r.type == line_type::special && static_cast<T> (r.special) == l;
+ }
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ bool
+ operator!= (const line_char& l, const T& r)
+ {
+ return !(l == r);
+ }
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ bool
+ operator!= (const T& l, const line_char& r)
+ {
+ return !(l == r);
+ }
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ bool
+ operator< (const line_char& l, const T& r)
+ {
+ return
+ l.type == line_type::special && static_cast<T> (l.special) < r;
+ }
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ bool
+ operator< (const T& l, const line_char& r)
+ {
+ return
+ r.type == line_type::special && l < static_cast<T> (r.special);
+ }
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ inline bool
+ operator<= (const line_char& l, const T& r)
+ {
+ return l < r || l == r;
+ }
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ inline bool
+ operator<= (const T& l, const line_char& r)
+ {
+ return l < r || l == r;
+ }
+
+ using line_string = std::basic_string<line_char>;
+
+ // Locale that has ctype<line_char> facet installed. Used in the
+ // regex_traits<line_char> specialization (see below).
+ //
+ class line_char_locale: public std::locale
+ {
+ public:
+ // Create a copy of the global C++ locale.
+ //
+ line_char_locale ();
+ };
+ }
+ }
+ }
+}
+
+// Standard template specializations for line_char that are required for the
+// basic_regex<line_char> instantiation.
+//
+namespace std
+{
+ template <>
+ class char_traits<build2::test::script::regex::line_char>
+ {
+ public:
+ using char_type = build2::test::script::regex::line_char;
+ using int_type = char_type;
+ using off_type = char_traits<char>::off_type;
+ using pos_type = char_traits<char>::pos_type;
+ using state_type = char_traits<char>::state_type;
+
+ static void
+ assign (char_type& c1, const char_type& c2) {c1 = c2;}
+
+ static char_type*
+ assign (char_type*, size_t, char_type);
+
+ // Note that eq() and lt() are not constexpr (as required by C++11)
+ // because == and < operators for char_type are not constexpr.
+ //
+ static bool
+ eq (const char_type& l, const char_type& r) {return l == r;}
+
+ static bool
+ lt (const char_type& l, const char_type& r) {return l < r;}
+
+ static char_type*
+ move (char_type*, const char_type*, size_t);
+
+ static char_type*
+ copy (char_type*, const char_type*, size_t);
+
+ static int
+ compare (const char_type*, const char_type*, size_t);
+
+ static size_t
+ length (const char_type*);
+
+ static const char_type*
+ find (const char_type*, size_t, const char_type&);
+
+ static constexpr char_type
+ to_char_type (const int_type& c) {return c;}
+
+ static constexpr int_type
+ to_int_type (const char_type& c) {return int_type (c);}
+
+ // Note that the following functions are not constexpr (as required by
+ // C++11) because their return expressions are not constexpr.
+ //
+ static bool
+ eq_int_type (const int_type& l, const int_type& r) {return l == r;}
+
+ static int_type eof () {return char_type::eof;}
+
+ static int_type
+ not_eof (const int_type& c)
+ {
+ return c != char_type::eof ? c : char_type::nul;
+ }
+ };
+
+ // ctype<> must be derived from both ctype_base and locale::facet (the later
+ // supports ref-counting used by the std::locale implementation internally).
+ //
+ // msvcrt for some reason also derives ctype_base from locale::facet which
+ // produces "already a base-class" warning and effectivelly breaks the
+ // reference counting. So we derive from ctype_base only in this case.
+ //
+ template <>
+ class ctype<build2::test::script::regex::line_char>: public ctype_base
+#if !defined(_MSC_VER) || _MSC_VER > 1910
+ , public locale::facet
+#endif
+ {
+ // Used by the implementation only.
+ //
+ using line_type = build2::test::script::regex::line_type;
+
+ public:
+ using char_type = build2::test::script::regex::line_char;
+
+ static locale::id id;
- class line_regex: public std::basic_regex<line_char>
+#if !defined(_MSC_VER) || _MSC_VER > 1910
+ explicit
+ ctype (size_t refs = 0): locale::facet (refs) {}
+#else
+ explicit
+ ctype (size_t refs = 0): ctype_base (refs) {}
+#endif
+
+ // While unnecessary, let's keep for completeness.
+ //
+ virtual
+ ~ctype () override = default;
+
+ // The C++ standard requires the following functions to call their virtual
+ // (protected) do_*() counterparts that provide the real implementations.
+ // The only purpose for this indirection is to provide a user with the
+ // ability to customize existing (standard) ctype facets. As we do not
+ // provide such an ability, for simplicity we will omit the do_*()
+ // functions and provide the implementations directly. This should be safe
+ // as nobody except us could call those protected functions.
+ //
+ bool
+ is (mask m, char_type c) const
+ {
+ return m == (c.type == line_type::special && build2::digit (c.special)
+ ? digit
+ : 0);
+ }
+
+ const char_type*
+ is (const char_type*, const char_type*, mask*) const;
+
+ const char_type*
+ scan_is (mask, const char_type*, const char_type*) const;
+
+ const char_type*
+ scan_not (mask, const char_type*, const char_type*) const;
+
+ char_type
+ toupper (char_type c) const {return c;}
+
+ const char_type*
+ toupper (char_type*, const char_type* e) const {return e;}
+
+ char_type
+ tolower (char_type c) const {return c;}
+
+ const char_type*
+ tolower (char_type*, const char_type* e) const {return e;}
+
+ char_type
+ widen (char c) const {return char_type (c);}
+
+ const char*
+ widen (const char*, const char*, char_type*) const;
+
+ char
+ narrow (char_type c, char def) const
+ {
+ return c.type == line_type::special ? c.special : def;
+ }
+
+ const char_type*
+ narrow (const char_type*, const char_type*, char, char*) const;
+ };
+
+ // Note: the current application locale must be the POSIX one. Otherwise the
+ // behavior is undefined.
+ //
+ template <>
+ class regex_traits<build2::test::script::regex::line_char>
+ {
+ public:
+ using char_type = build2::test::script::regex::line_char;
+ using string_type = build2::test::script::regex::line_string;
+ using locale_type = build2::test::script::regex::line_char_locale;
+ using char_class_type = regex_traits<char>::char_class_type;
+
+ // Workaround for msvcrt bugs. For some reason it assumes such a members
+ // to be present in a regex_traits specialization.
+ //
+#if defined(_MSC_VER) && _MSC_VER <= 1910
+ static const ctype_base::mask _Ch_upper = ctype_base::upper;
+ static const ctype_base::mask _Ch_alpha = ctype_base::alpha;
+
+ // Unsigned char_type. msvcrt statically asserts the _Uelem type is
+ // unsigned, so we specialize is_unsigned<line_char> as well (see below).
+ //
+ using _Uelem = char_type;
+#endif
+
+ regex_traits () = default; // Unnecessary but let's keep for completeness.
+
+ static size_t
+ length (const char_type* p) {return string_type::traits_type::length (p);}
+
+ char_type
+ translate (char_type c) const {return c;}
+
+ // Case-insensitive matching is not supported by line_regex. So there is no
+ // reason for the function to be called.
+ //
+ char_type
+ translate_nocase (char_type c) const {assert (false); return c;}
+
+ // Return a sort-key - the exact copy of [b, e).
+ //
+ template <typename I>
+ string_type
+ transform (I b, I e) const {return string_type (b, e);}
+
+ // Return a case-insensitive sort-key. Case-insensitive matching is not
+ // supported by line_regex. So there is no reason for the function to be
+ // called.
+ //
+ template <typename I>
+ string_type
+ transform_primary (I b, I e) const
+ {
+ assert (false);
+ return string_type (b, e);
+ }
+
+ // POSIX regex grammar and collating elements (e.g., [.tilde.]) in
+ // particular are not supported. So there is no reason for the function to
+ // be called.
+ //
+ template <typename I>
+ string_type
+ lookup_collatename (I, I) const {assert (false); return string_type ();}
+
+ // Character classes (e.g., [:lower:]) are not supported. So there is no
+ // reason for the function to be called.
+ //
+ template <typename I>
+ char_class_type
+ lookup_classname (I, I, bool = false) const
+ {
+ assert (false);
+ return char_class_type ();
+ }
+
+ // Return false as we don't support character classes (e.g., [:lower:]).
+ //
+ bool
+ isctype (char_type, char_class_type) const {return false;}
+
+ int
+ value (char_type, int) const;
+
+ // Return the locale passed as an argument as we do not expect anything
+ // other than POSIX locale, that we also assume to be imbued by default.
+ //
+ locale_type
+ imbue (locale_type l) {return l;}
+
+ locale_type
+ getloc () const {return locale_type ();}
+ };
+
+ // We assume line_char to be an unsigned type and express that with the
+ // following specializations used by basic_regex implementations.
+ //
+ // libstdc++ defines unsigned CharT type (regex_traits template parameter)
+ // to use as an index in some internal cache regardless if the cache is used
+ // for this specialization (and the cache is used only if CharT is char).
+ //
+ template <>
+ struct make_unsigned<build2::test::script::regex::line_char>
+ {
+ using type = build2::test::script::regex::line_char;
+ };
+
+ // msvcrt assumes regex_traits<line_char>::_Uelem to be present (see above)
+ // and statically asserts it is unsigned.
+ //
+ template <>
+ struct is_unsigned<build2::test::script::regex::line_char>
+ {
+ static const bool value = true;
+ };
+
+ // When used with libc++ the linker complains that it can't find
+ // __match_any_but_newline<line_char>::__exec() function. The problem is
+ // that the function is only specialized for char and wchar_t. As line_char
+ // has no notion of the newline character we specialize the class template
+ // to behave as the __match_any<line_char> instantiation does (that luckily
+ // has all the functions in place).
+ //
+#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION <= 4000
+ template <>
+ class __match_any_but_newline<build2::test::script::regex::line_char>
+ : public __match_any<build2::test::script::regex::line_char>
+ {
+ public:
+ using base = __match_any<build2::test::script::regex::line_char>;
+ using base::base;
+ };
+#endif
+}
+
+namespace build2
+{
+ namespace test
+ {
+ namespace script
+ {
+ namespace regex
{
- public:
- using base_type = std::basic_regex<line_char>;
+ class line_regex: public std::basic_regex<line_char>
+ {
+ public:
+ using base_type = std::basic_regex<line_char>;
+
+ using base_type::base_type;
+
+ line_regex () = default;
- using base_type::base_type;
+ // Move constuctible-only type.
+ //
+ line_regex (line_regex&&) = default;
+ line_regex (const line_regex&) = delete;
- explicit
- line_regex (const line_string&);
- };
+ // Move string regex together with the pool used to create it.
+ //
+ line_regex (line_string&& s, line_pool&& p)
+ // No move-string ctor for base_type, so emulate it.
+ //
+ : base_type (s), pool (move (p)) {s.clear ();}
+
+ line_regex& operator= (line_regex&&) = delete;
+ line_regex& operator= (const line_regex&) = delete;
+
+ public:
+ line_pool pool;
+ };
+ }
}
}
}
diff --git a/build2/test/script/regex.cxx b/build2/test/script/regex.cxx
new file mode 100644
index 0000000..c6fba75
--- /dev/null
+++ b/build2/test/script/regex.cxx
@@ -0,0 +1,290 @@
+// file : build2/test/script/regex.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <build2/test/script/regex>
+
+#include <algorithm> // copy(), copy_backward()
+
+using namespace std;
+
+namespace build2
+{
+ namespace test
+ {
+ namespace script
+ {
+ namespace regex
+ {
+ const line_char line_char::nul (0);
+ const line_char line_char::eof (-1);
+
+ // line_char
+ //
+ line_char::
+ line_char (int c)
+ : type (line_type::special), special (c)
+ {
+ // @@ How can we allow anything for basic_regex but only subset
+ // for our own code?
+ //
+ const char sp[] = "()|.*+?{\\}0123456789,=!";
+ const char ex[] = "pn\n\r";
+
+ assert (c == 0 || // Null character.
+
+ // EOF. Note that is also passed by msvcrt as _Meta_eos
+ // enum value.
+ //
+ c == -1 ||
+
+ // libstdc++ line/paragraph separators.
+ //
+ c == u'\u2028' || c == u'\u2029' ||
+
+ (c > 0 && c <= 255 && (
+ // Supported regex special characters.
+ //
+ string::traits_type::find (sp, 23, c) != nullptr ||
+
+ // libstdc++ look-ahead tokens, newline chars.
+ //
+ string::traits_type::find (ex, 4, c) != nullptr)));
+ }
+
+ line_char::
+ line_char (const char_string& s, line_pool& p)
+ : line_char (&(*p.strings.emplace (s).first))
+ {
+ }
+
+ line_char::
+ line_char (char_string&& s, line_pool& p)
+ : line_char (&(*p.strings.emplace (move (s)).first))
+ {
+ }
+
+ line_char::
+ line_char (char_regex r, line_pool& p)
+ // Note: in C++17 can write as p.regexes.emplace_front(move (r))
+ //
+ : line_char (&(*p.regexes.emplace (p.regexes.begin (), move (r))))
+ {
+ }
+
+ bool
+ operator== (const line_char& l, const line_char& r)
+ {
+ if (l.type == r.type)
+ {
+ bool res (true);
+
+ switch (l.type)
+ {
+ case line_type::special: res = l.special == r.special; break;
+ case line_type::regex: assert (false); break;
+
+ // Note that we use pointers (rather than vales) comparison
+ // assuming that the strings must belong to the same pool.
+ //
+ case line_type::literal: res = l.literal == r.literal; break;
+ }
+
+ return res;
+ }
+
+ // Match literal with regex.
+ //
+ if (l.type == line_type::literal && r.type == line_type::regex)
+ return regex_match (*l.literal, *r.regex);
+ else if (r.type == line_type::literal && l.type == line_type::regex)
+ return regex_match (*r.literal, *l.regex);
+
+ return false;
+ }
+
+ bool
+ operator< (const line_char& l, const line_char& r)
+ {
+ if (l == r)
+ return false;
+
+ if (l.type != r.type)
+ return l.type < r.type;
+
+ bool res (false);
+
+ switch (l.type)
+ {
+ case line_type::special: res = l.special < r.special; break;
+ case line_type::literal: res = *l.literal < *r.literal; break;
+ case line_type::regex: assert (false); break;
+ }
+
+ return res;
+ }
+
+ // line_char_locale
+ //
+ line_char_locale::
+ line_char_locale ()
+ : locale (locale (),
+ new std::ctype<line_char> ()) // Hidden by ctype bitmask.
+ {
+ }
+ }
+ }
+ }
+}
+
+namespace std
+{
+ using namespace build2::test::script::regex;
+
+ // char_traits<line_char>
+ //
+ line_char* char_traits<line_char>::
+ assign (char_type* s, size_t n, char_type c)
+ {
+ for (size_t i (0); i != n; ++i)
+ s[i] = c;
+ return s;
+ }
+
+ line_char* char_traits<line_char>::
+ move (char_type* d, const char_type* s, size_t n)
+ {
+ if (n > 0 && d != s)
+ {
+ // If d < s then it can't be in [s, s + n) range and so using copy() is
+ // safe. Otherwise d + n is out of (first, last] range and so using
+ // copy_backward() is safe.
+ //
+ if (d < s)
+ std::copy (s, s + n, d); // Hidden by char_traits<line_char>::copy().
+ else
+ copy_backward (s, s + n, d + n);
+ }
+
+ return d;
+ }
+
+ line_char* char_traits<line_char>::
+ copy (char_type* d, const char_type* s, size_t n)
+ {
+ std::copy (s, s + n, d); // Hidden by char_traits<line_char>::copy().
+ return d;
+ }
+
+ int char_traits<line_char>::
+ compare (const char_type* s1, const char_type* s2, size_t n)
+ {
+ for (size_t i (0); i != n; ++i)
+ {
+ if (s1[i] < s2[i])
+ return -1;
+ else if (s2[i] < s1[i])
+ return 1;
+ }
+
+ return 0;
+ }
+
+ size_t char_traits<line_char>::
+ length (const char_type* s)
+ {
+ size_t i (0);
+ while (s[i] != char_type::nul)
+ ++i;
+
+ return i;
+ }
+
+ const line_char* char_traits<line_char>::
+ find (const char_type* s, size_t n, const char_type& c)
+ {
+ for (size_t i (0); i != n; ++i)
+ {
+ if (s[i] == c)
+ return s + i;
+ }
+
+ return nullptr;
+ }
+
+ // ctype<line_char>
+ //
+ locale::id ctype<line_char>::id;
+
+ const line_char* ctype<line_char>::
+ is (const char_type* b, const char_type* e, mask* m) const
+ {
+ while (b != e)
+ {
+ const char_type& c (*b++);
+
+ *m++ = c.type == line_type::special && build2::digit (c.special)
+ ? digit
+ : 0;
+ }
+
+ return e;
+ }
+
+ const line_char* ctype<line_char>::
+ scan_is (mask m, const char_type* b, const char_type* e) const
+ {
+ for (; b != e; ++b)
+ {
+ if (is (m, *b))
+ return b;
+ }
+
+ return e;
+ }
+
+ const line_char* ctype<line_char>::
+ scan_not (mask m, const char_type* b, const char_type* e) const
+ {
+ for (; b != e; ++b)
+ {
+ if (!is (m, *b))
+ return b;
+ }
+
+ return e;
+ }
+
+ const char* ctype<line_char>::
+ widen (const char* b, const char* e, char_type* c) const
+ {
+ while (b != e)
+ *c++ = widen (*b++);
+
+ return e;
+ }
+
+ const line_char* ctype<line_char>::
+ narrow (const char_type* b, const char_type* e, char def, char* c) const
+ {
+ while (b != e)
+ *c++ = narrow (*b++, def);
+
+ return e;
+ }
+
+ // regex_traits<line_char>
+ //
+ int regex_traits<line_char>::
+ value (char_type c, int radix) const
+ {
+ assert (radix == 8 || radix == 10 || radix == 16);
+
+ if (c.type != line_type::special)
+ return -1;
+
+ const char digits[] = "0123456789ABCDEF";
+ const char* d (string::traits_type::find (digits, radix, c.special));
+ return d != nullptr ? d - digits : -1;
+ }
+}
diff --git a/build2/test/script/runner.cxx b/build2/test/script/runner.cxx
index 67d82c0..05b3b5c 100644
--- a/build2/test/script/runner.cxx
+++ b/build2/test/script/runner.cxx
@@ -79,8 +79,8 @@ namespace build2
input_info (d);
}
}
- else if (rd.type == redirect_type::here_string ||
- rd.type == redirect_type::here_document)
+ else if (rd.type == redirect_type::here_str_literal ||
+ rd.type == redirect_type::here_doc_literal)
{
assert (!op.empty ());
@@ -90,11 +90,7 @@ namespace build2
{
ofdstream os (orp);
sp.clean ({cleanup_type::always, orp}, true);
-
- os << (rd.type == redirect_type::here_string
- ? rd.str
- : rd.doc.doc);
-
+ os << rd.str;
os.close ();
}
catch (const io_error& e)
@@ -441,8 +437,8 @@ namespace build2
break;
}
- case redirect_type::here_string:
- case redirect_type::here_document:
+ case redirect_type::here_str_literal:
+ case redirect_type::here_doc_literal:
{
// We could write to the command stdin directly but instead will
// cache the data for potential troubleshooting.
@@ -453,11 +449,7 @@ namespace build2
{
ofdstream os (isp);
sp.clean ({cleanup_type::always, isp}, true);
-
- os << (c.in.type == redirect_type::here_string
- ? c.in.str
- : c.in.doc.doc);
-
+ os << c.in.str;
os.close ();
}
catch (const io_error& e)
@@ -470,6 +462,8 @@ namespace build2
}
case redirect_type::merge: assert (false); break;
+ case redirect_type::here_str_regex: // @@ REGEX
+ case redirect_type::here_doc_regex: assert (false); break;
}
// Dealing with stdout and stderr redirect types other than 'null'
@@ -553,13 +547,15 @@ namespace build2
}
case redirect_type::none:
- case redirect_type::here_string:
- case redirect_type::here_document:
+ case redirect_type::here_str_literal:
+ case redirect_type::here_doc_literal:
{
p = std_path (what);
m |= fdopen_mode::truncate;
break;
}
+ case redirect_type::here_str_regex: // @@ REGEX
+ case redirect_type::here_doc_regex: assert (false); break;
}
try
diff --git a/build2/test/script/script b/build2/test/script/script
index 714e2c5..bb202fa 100644
--- a/build2/test/script/script
+++ b/build2/test/script/script
@@ -15,6 +15,7 @@
#include <build2/test/target>
#include <build2/test/script/token> // replay_tokens
+#include <build2/test/script/regex>
namespace build2
{
@@ -70,8 +71,10 @@ namespace build2
pass,
null,
merge,
- here_string,
- here_document,
+ here_str_literal,
+ here_doc_literal,
+ here_str_regex,
+ here_doc_regex,
file
};
@@ -79,10 +82,10 @@ namespace build2
{
redirect_type type;
- struct doc_type
+ struct regex_type
{
- string doc; // Note: includes trailing newline, if required.
- string end;
+ regex::line_regex regex;
+ string str; // String representation for printing.
};
struct file_type
@@ -94,19 +97,21 @@ namespace build2
union
{
- int fd; // Merge-to descriptor.
- string str; // Note: includes trailing newline, if required.
- doc_type doc;
- file_type file;
+ int fd; // Merge-to descriptor.
+ string str; // Note: includes trailing newline, if requested.
+ regex_type regex; // Note: includes trailing blank, if requested.
+ file_type file;
};
+ string end; // Here-document end marker for printing.
+
explicit
redirect (redirect_type = redirect_type::none);
+ // Move constuctible/assignable-only type.
+ //
redirect (redirect&&);
- redirect (const redirect&);
redirect& operator= (redirect&&);
- redirect& operator= (const redirect&);
~redirect ();
};
diff --git a/build2/test/script/script.cxx b/build2/test/script/script.cxx
index 17eacaa..7941df6 100644
--- a/build2/test/script/script.cxx
+++ b/build2/test/script/script.cxx
@@ -84,7 +84,7 @@ namespace build2
case redirect_type::null: o << '-'; break;
case redirect_type::merge: o << '&' << r.fd; break;
- case redirect_type::here_string:
+ case redirect_type::here_str_literal:
{
const string& v (r.str);
bool nl (!v.empty () && v.back () == '\n');
@@ -95,16 +95,16 @@ namespace build2
to_stream_q (o, nl ? string (v, 0, v.size () - 1) : v);
break;
}
- case redirect_type::here_document:
+ case redirect_type::here_doc_literal:
{
- const string& v (r.doc.doc);
+ const string& v (r.str);
bool nl (!v.empty () && v.back () == '\n');
// Add another '>' or '<'. Note that here end marker never
// needs to be quoted.
//
o << d << (nl ? "" : ":");
- to_stream_q (o, r.doc.end);
+ to_stream_q (o, r.end);
break;
}
case redirect_type::file:
@@ -115,14 +115,16 @@ namespace build2
print_path (r.file.path);
break;
}
+ case redirect_type::here_str_regex: // @@ REGEX
+ case redirect_type::here_doc_regex: assert (false); break;
}
};
auto print_doc = [&o] (const redirect& r)
{
- const string& v (r.doc.doc);
+ const string& v (r.str);
bool nl (!v.empty () && v.back () == '\n');
- o << endl << v << (nl ? "" : "\n") << r.doc.end;
+ o << endl << v << (nl ? "" : "\n") << r.end;
};
if ((m & command_to_stream::header) == command_to_stream::header)
@@ -171,9 +173,9 @@ namespace build2
{
// Here-documents.
//
- if (c.in.type == redirect_type::here_document) print_doc (c.in);
- if (c.out.type == redirect_type::here_document) print_doc (c.out);
- if (c.err.type == redirect_type::here_document) print_doc (c.err);
+ if (c.in.type == redirect_type::here_doc_literal) print_doc (c.in);
+ if (c.out.type == redirect_type::here_doc_literal) print_doc (c.out);
+ if (c.err.type == redirect_type::here_doc_literal) print_doc (c.err);
}
}
@@ -238,15 +240,19 @@ namespace build2
case redirect_type::null:
case redirect_type::merge: break;
- case redirect_type::here_string: new (&str) string (); break;
- case redirect_type::here_document: new (&doc) doc_type (); break;
- case redirect_type::file: new (&file) file_type (); break;
+ case redirect_type::here_str_literal:
+ case redirect_type::here_doc_literal: new (&str) string (); break;
+
+ case redirect_type::here_str_regex:
+ case redirect_type::here_doc_regex: new (&regex) regex_type (); break;
+
+ case redirect_type::file: new (&file) file_type (); break;
}
}
redirect::
redirect (redirect&& r)
- : type (r.type)
+ : type (r.type), end (move (r.end))
{
switch (type)
{
@@ -256,14 +262,16 @@ namespace build2
case redirect_type::merge: fd = r.fd; break;
- case redirect_type::here_string:
+ case redirect_type::here_str_literal:
+ case redirect_type::here_doc_literal:
{
new (&str) string (move (r.str));
break;
}
- case redirect_type::here_document:
+ case redirect_type::here_str_regex:
+ case redirect_type::here_doc_regex:
{
- new (&doc) doc_type (move (r.doc));
+ new (&regex) regex_type (move (r.regex));
break;
}
case redirect_type::file:
@@ -275,28 +283,6 @@ namespace build2
}
redirect::
- redirect (const redirect& r)
- : type (r.type)
- {
- switch (type)
- {
- case redirect_type::none:
- case redirect_type::pass:
- case redirect_type::null: break;
-
- case redirect_type::merge: fd = r.fd; break;
-
- case redirect_type::here_string: new (&str) string (r.str); break;
- case redirect_type::here_document: new (&doc) doc_type (r.doc); break;
- case redirect_type::file:
- {
- new (&file) file_type (r.file);
- break;
- }
- }
- }
-
- redirect::
~redirect ()
{
switch (type)
@@ -306,9 +292,13 @@ namespace build2
case redirect_type::null:
case redirect_type::merge: break;
- case redirect_type::here_string: str.~string (); break;
- case redirect_type::here_document: doc.~doc_type (); break;
- case redirect_type::file: file.~file_type (); break;
+ case redirect_type::here_str_literal:
+ case redirect_type::here_doc_literal: str.~string (); break;
+
+ case redirect_type::here_str_regex:
+ case redirect_type::here_doc_regex: regex.~regex_type (); break;
+
+ case redirect_type::file: file.~file_type (); break;
}
}
@@ -323,14 +313,6 @@ namespace build2
return *this;
}
- redirect& redirect::
- operator= (const redirect& r)
- {
- if (this != &r)
- *this = redirect (r); // Reduce to move-assignment.
- return *this;
- }
-
// scope
//
scope::
diff --git a/unit-tests/test/script/buildfile b/unit-tests/test/script/buildfile
index 6b303e1..c5306b1 100644
--- a/unit-tests/test/script/buildfile
+++ b/unit-tests/test/script/buildfile
@@ -2,6 +2,6 @@
# copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
# license : MIT; see accompanying LICENSE file
-d = lexer/ parser/
+d = lexer/ parser/ regex/
./: $d
include $d
diff --git a/unit-tests/test/script/parser/buildfile b/unit-tests/test/script/parser/buildfile
index ad32494..db75a4f 100644
--- a/unit-tests/test/script/parser/buildfile
+++ b/unit-tests/test/script/parser/buildfile
@@ -11,7 +11,7 @@ src = token lexer parser diagnostics utility variable name context target \
scope prerequisite file module operation rule b-options algorithm search \
filesystem function functions-builtin functions-path functions-process-path \
functions-string config/{utility init operation} dump types-parsers \
-test/{target script/{token lexer parser script}} \
+test/{target script/{token lexer parser regex script}} \
scheduler
exe{driver}: cxx{driver} ../../../../build2/cxx{$src} $libs \
diff --git a/unit-tests/test/script/regex/buildfile b/unit-tests/test/script/regex/buildfile
new file mode 100644
index 0000000..26c759a
--- /dev/null
+++ b/unit-tests/test/script/regex/buildfile
@@ -0,0 +1,12 @@
+# file : unit-tests/test/script/regex/buildfile
+# copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+#@@ Temporary until we get utility library support.
+#
+import libs = libbutl%lib{butl}
+src = test/script/regex
+
+exe{driver}: cxx{driver} ../../../../build2/cxx{$src} $libs
+
+include ../../../../build2/
diff --git a/unit-tests/test/script/regex/driver.cxx b/unit-tests/test/script/regex/driver.cxx
new file mode 100644
index 0000000..ca09048
--- /dev/null
+++ b/unit-tests/test/script/regex/driver.cxx
@@ -0,0 +1,252 @@
+// file : unit-tests/test/script/regex/driver.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <regex>
+#include <type_traits> // is_pod, is_array
+
+#include <build2/test/script/regex>
+
+using namespace std;
+using namespace build2::test::script::regex;
+
+int
+main ()
+{
+ using lc = line_char;
+ using ls = line_string;
+ using lr = line_regex;
+
+ // Test line_char.
+ //
+ {
+ static_assert (is_pod<lc>::value && !is_array<lc>::value,
+ "line_char must be char-like");
+
+ // Zero-initialed line_char should be the null-char as required by
+ // char_traits<>::length() specification.
+ //
+ assert (lc () == lc::nul);
+
+ line_pool p;
+
+ assert (lc::eof == -1);
+ assert (lc::nul == 0);
+
+ enum meta {mn = 'n', mp = 'p'};
+
+ // Special comparison.
+ //
+ assert (lc ('0') == lc ('0'));
+ assert (lc ('0') == '0');
+ assert (lc ('n') == mn);
+ assert (mn == static_cast<meta> (lc ('n')));
+
+ assert (lc ('0') != lc ('1'));
+ assert (lc ('0') != '1');
+ assert (lc ('n') != mp);
+ assert (lc ('0') != lc ("0", p));
+ assert (lc ('0') != lc (regex ("0"), p));
+
+ assert (lc ('0') < lc ('1'));
+ assert (lc ('0') < '1');
+ assert (lc ('1') < lc ("0", p));
+ assert (lc ('n') < mp);
+
+ assert (lc ('0') <= '1');
+ assert (lc ('0') <= lc ('1'));
+ assert (lc ('n') <= mn);
+ assert (lc ('1') <= lc ("0", p));
+
+ // Literal comparison.
+ //
+ assert (lc ("a", p) == lc ("a", p));
+ assert (lc ("a", p).literal == lc ("a", p).literal);
+ assert (char (lc ("a", p)) == '\a');
+
+ assert (lc ("a", p) != lc ("b", p));
+ assert (!(lc ("a", p) != lc (regex ("a"), p))); // Matches.
+ assert (lc ("a", p) != lc (regex ("b"), p));
+
+ assert (lc ("a", p) < lc ("b", p));
+ assert (!(lc ("a", p) < lc (regex ("a"), p))); // Matches.
+
+ assert (lc ("a", p) <= lc ("b", p));
+ assert (lc ("a", p) <= lc (regex ("a"), p));
+ assert (lc ("a", p) < lc (regex ("c"), p));
+
+ // Regex comparison.
+ //
+ assert (lc ("a", p) == lc (regex ("a|b"), p));
+ assert (lc (regex ("a|b"), p) == lc ("a", p));
+ }
+
+ // Test char_traits<line_char>.
+ //
+ {
+ using ct = char_traits<lc>;
+ using vc = vector<lc>;
+
+ lc c;
+ ct::assign (c, '0');
+ assert (c == ct::char_type ('0'));
+
+ assert (ct::to_char_type (c) == c);
+ assert (ct::to_int_type (c) == c);
+
+ assert (ct::eq_int_type (c, c));
+ assert (!ct::eq_int_type (c, lc::eof));
+
+ assert (ct::eof () == lc::eof);
+
+ assert (ct::not_eof (c) == c);
+ assert (ct::not_eof (lc::eof) != lc::eof);
+
+ ct::assign (&c, 1, '1');
+ assert (c == ct::int_type ('1'));
+
+ assert (ct::eq (lc ('0'), lc ('0')));
+ assert (ct::lt (lc ('0'), lc ('1')));
+
+ vc v1 ({'0', '1', '2'});
+ vc v2 (3, lc::nul);
+
+ assert (ct::find (v1.data (), 3, '1') == v1.data () + 1);
+
+ ct::copy (v2.data (), v1.data (), 3);
+ assert (v2 == v1);
+
+ v2.push_back (lc::nul);
+ assert (ct::length (v2.data ()) == 3);
+
+ // Overlaping ranges.
+ //
+ ct::move (v1.data () + 1, v1.data (), 2);
+ assert (v1 == vc ({'0', '0', '1'}));
+
+ v1 = vc ({'0', '1', '2'});
+ ct::move (v1.data (), v1.data () + 1, 2);
+ assert (v1 == vc ({'1', '2', '2'}));
+ }
+
+ // Test line_char_locale and ctype<line_char> (only non-trivial functions).
+ //
+ {
+ using ct = ctype<lc>;
+
+ line_char_locale l;
+ assert (has_facet<ct> (l));
+
+ // It is better not to create q facet on stack as it is
+ // reference-countable.
+ //
+ const ct& t (use_facet<ct> (l));
+ line_pool p;
+
+ assert (t.is (ct::digit, '0'));
+ assert (!t.is (ct::digit, '?'));
+ assert (!t.is (ct::digit, lc ("0", p)));
+
+ const lc chars[] = { '0', '?' };
+ ct::mask m[2];
+
+ const lc* b (chars);
+ const lc* e (chars + 2);
+
+ // Cast flag value to mask type and compare to mask.
+ //
+ auto fl = [] (ct::mask m, ct::mask f) {return m == f;};
+
+ t.is (b, e, m);
+ assert (fl (m[0], ct::digit) && fl (m[1], 0));
+
+ assert (t.scan_is (ct::digit, b, e) == b);
+ assert (t.scan_is (0, b, e) == b + 1);
+
+ assert (t.scan_not (ct::digit, b, e) == b + 1);
+ assert (t.scan_not (0, b, e) == b);
+
+ {
+ char nr[] = "0?";
+ lc wd[2];
+ t.widen (nr, nr + 2, wd);
+ assert (wd[0] == b[0] && wd[1] == b[1]);
+ }
+
+ {
+ lc wd[] = {'0', lc ("a", p)};
+ char nr[2];
+ t.narrow (wd, wd + 2, '-', nr);
+ assert (nr[0] == '0' && nr[1] == '-');
+ }
+ }
+
+ // Test regex_traits<line_char>. Functions other that value() are trivial.
+ //
+ {
+ regex_traits<lc> t;
+
+ const int radix[] = {8, 10}; // Radix 16 is not supported by line_char.
+ const char digits[] = "0123456789ABCDEF";
+
+ for (size_t r (0); r < 2; ++r)
+ {
+ for (int i (0); i < radix[r]; ++i)
+ assert (t.value (digits[i], radix[r]) == i);
+ }
+ }
+
+ // Test line_regex construction.
+ //
+ {
+ line_pool p;
+ lr r1 ({lc ("foo", p), lc (regex ("ba(r|z)"), p)}, move (p));
+
+ lr r2 (move (r1));
+ assert (regex_match (ls ({lc ("foo", r2.pool), lc ("bar", r2.pool)}), r2));
+ assert (!regex_match (ls ({lc ("foo", r2.pool), lc ("ba", r2.pool)}), r2));
+ }
+
+ // Test line_regex match.
+ //
+ {
+ line_pool p;
+
+ const lc foo ("foo", p);
+ const lc bar ("bar", p);
+ const lc baz ("baz", p);
+ const lc blank ("", p);
+
+ assert (regex_match (ls ({foo, bar}), lr ({foo, bar})));
+ assert (!regex_match (ls ({foo, baz}), lr ({foo, bar})));
+
+ assert (regex_match (ls ({bar, foo}),
+ lr ({'(', foo, '|', bar, ')', '+'})));
+
+ assert (regex_match (ls ({foo, foo}), lr ({'(', foo, ')', '\\', '1'})));
+
+ assert (regex_match (ls ({foo}), lr ({lc (regex ("fo+"), p)})));
+ assert (regex_match (ls ({foo}), lr ({lc (regex (".*"), p)})));
+ assert (regex_match (ls ({blank}), lr ({lc (regex (".*"), p)})));
+
+ assert (regex_match (ls ({blank, blank, foo}),
+ lr ({blank, '*', foo, blank, '*'})));
+
+ assert (regex_match (ls ({blank, blank, foo}), lr ({'.', '*'})));
+
+ assert (regex_match (ls ({blank, blank}),
+ lr ({blank, '*', foo, '?', blank, '*'})));
+
+ assert (regex_match (ls ({foo}), lr ({foo, '{', '1', '}'})));
+ assert (regex_match (ls ({foo, foo}), lr ({foo, '{', '1', ',', '}'})));
+
+ assert (regex_match (ls ({foo, foo}),
+ lr ({foo, '{', '1', ',', '2', '}'})));
+
+ assert (!regex_match (ls ({foo, foo}),
+ lr ({foo, '{', '3', ',', '4', '}'})));
+
+ assert (regex_match (ls ({foo}), lr ({'(', '?', '=', foo, ')', foo})));
+ assert (regex_match (ls ({foo}), lr ({'(', '?', '!', bar, ')', foo})));
+ }
+}