diff options
Diffstat (limited to 'libbuild2/test/script/regex.hxx')
-rw-r--r-- | libbuild2/test/script/regex.hxx | 684 |
1 files changed, 0 insertions, 684 deletions
diff --git a/libbuild2/test/script/regex.hxx b/libbuild2/test/script/regex.hxx deleted file mode 100644 index 4114ea4..0000000 --- a/libbuild2/test/script/regex.hxx +++ /dev/null @@ -1,684 +0,0 @@ -// file : libbuild2/test/script/regex.hxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef LIBBUILD2_TEST_SCRIPT_REGEX_HXX -#define LIBBUILD2_TEST_SCRIPT_REGEX_HXX - -#include <list> -#include <regex> -#include <locale> -#include <string> // basic_string -#include <type_traits> // make_unsigned, enable_if, is_* -#include <unordered_set> - -#include <libbuild2/types.hxx> -#include <libbuild2/utility.hxx> - -namespace build2 -{ - namespace test - { - namespace script - { - namespace regex - { - using char_string = std::basic_string<char>; - - enum class char_flags: uint16_t - { - icase = 0x1, // Case-insensitive match. - idot = 0x2, // Invert '.' escaping. - - none = 0 - }; - - // Restricts valid standard flags to just {icase}, extends with custom - // flags {idot}. - // - class char_regex: public std::basic_regex<char> - { - public: - using base_type = std::basic_regex<char>; - - char_regex (const char_string&, char_flags = char_flags::none); - }; - - // Newlines are line separators and are not part of the line: - // - // line<newline>line<newline> - // - // Specifically, this means that a customary trailing newline creates a - // trailing blank line. - // - // All characters can inter-compare (though there cannot be regex - // characters in the output, only in line_regex). - // - // Note that we assume that line_regex and the input to regex_match() - // use the same pool. - // - struct line_pool - { - // Note that we assume the pool can be moved without invalidating - // pointers to any already pooled entities. - // - std::unordered_set<char_string> strings; - std::list<char_regex> regexes; - }; - - enum class line_type - { - special, - literal, - regex - }; - - struct line_char - { - // Steal last two bits from the pointer to store the type. - // - private: - std::uintptr_t data_; - - public: - line_type - type () const {return static_cast<line_type> (data_ & 0x3);} - - int - special () const - { - // Stored as (shifted) int16_t. Perform steps reversed to those - // that are described in the comment for the corresponding ctor. - // Note that the intermediate cast to uint16_t is required to - // portably preserve the -1 special character. - // - return static_cast<int16_t> (static_cast<uint16_t> (data_ >> 2)); - } - - const char_string* - literal () const - { - // Note that 2 rightmost bits are used for packaging line_char - // type. Read the comment for the corresponding ctor for details. - // - return reinterpret_cast<const char_string*> ( - data_ & ~std::uintptr_t (0x3)); - } - - const char_regex* - regex () const - { - // Note that 2 rightmost bits are used for packaging line_char - // type. Read the comment for the corresponding ctor for details. - // - return reinterpret_cast<const char_regex*> ( - data_ & ~std::uintptr_t (0x3)); - } - - static const line_char nul; - static const line_char eof; - - // Note: creates an uninitialized value. - // - line_char () = default; - - // Create a special character. The argument value must be one of the - // following ones: - // - // 0 (nul character) - // -1 (EOF) - // [()|.*+?{}\0123456789,=!] (excluding []) - // - // Note that the constructor is implicit to allow basic_regex to - // implicitly construct line_chars from special char literals (in - // particular libstdc++ appends them to an internal line_string). - // - // Also note that we extend the valid characters set (see above) with - // 'p', 'n' (used by libstdc++ for positive/negative look-ahead - // tokens representation), and '\n', '\r', u'\u2028', u'\u2029' (used - // by libstdc++ for newline/newparagraph matching). - // - line_char (int); - - // Create a literal character. - // - // Don't copy string if already pooled. - // - explicit - line_char (const char_string&, line_pool&); - - explicit - line_char (char_string&&, line_pool&); - - explicit - line_char (const char_string* s) // Assume already pooled. - // - // Steal two bits from the pointer to package line_char type. - // Assume (and statically assert) that char_string address is a - // multiple of four. - // - : data_ (reinterpret_cast <std::uintptr_t> (s) | - static_cast <std::uintptr_t> (line_type::literal)) {} - - // Create a regex character. - // - explicit - line_char (char_regex, line_pool&); - - explicit - line_char (const char_regex* r) // Assume already pooled. - // - // Steal two bits from the pointer to package line_char type. - // Assume (and statically assert) that char_regex address is a - // multiple of four. - // - : data_ (reinterpret_cast <std::uintptr_t> (r) | - static_cast <std::uintptr_t> (line_type::regex)) {} - - // Provide basic_regex with the ability to use line_char in a context - // where a char value is expected (e.g., as a function argument). - // - // libstdc++ seems to cast special line_chars only (and such a - // conversion is meanigfull). - // - // msvcrt casts line_chars of arbitrary types instead. The only - // reasonable strategy is to return a value that differs from any - // other that can be encountered in a regex expression and so will - // unlikelly be misinterpreted. - // - operator char () const - { - return type () == line_type::special ? special () : '\a'; // BELL. - } - - // Return true if the character is a syntax (special) one. - // - static bool - syntax (char); - - // Provide basic_regex (such as from msvcrt) with the ability to - // explicitly cast line_chars to implementation-specific numeric - // types (enums, msvcrt's _Uelem, etc). - // - template <typename T> - explicit - operator T () const - { - assert (type () == line_type::special); - return static_cast<T> (special ()); - } - }; - - // Perform "deep" characters comparison (for example match literal - // character with a regex character), rather than just compare them - // literally. At least one argument must be of a type other than regex - // as there is no operator==() defined to compare regexes. Characters - // of the literal type must share the same pool (strings are compared - // by pointers not by values). - // - bool - operator== (const line_char&, const line_char&); - - // Return false if arguments are equal (operator==() returns true). - // Otherwise if types are different return the value implying that - // special < literal < regex. If types are special or literal return - // the result of the respective characters or strings comparison. At - // least one argument must be of a type other than regex as there is no - // operator<() defined to compare regexes. - // - // While not very natural operation for the class we have, we have to - // provide some meaningfull semantics for such a comparison as it is - // required by the char_traits<line_char> specialization. While we - // could provide it right in that specialization, let's keep it here - // for basic_regex implementations that potentially can compare - // line_chars as they compare them with expressions of other types (see - // below). - // - bool - operator< (const line_char&, const line_char&); - - inline bool - operator!= (const line_char& l, const line_char& r) - { - return !(l == r); - } - - inline bool - operator<= (const line_char& l, const line_char& r) - { - return l < r || l == r; - } - - // Provide basic_regex (such as from msvcrt) with the ability to - // compare line_char to a value of an integral or - // implementation-specific enum type. In the absense of the following - // template operators, such a comparisons would be ambigious for - // integral types (given that there are implicit conversions - // int->line_char and line_char->char) and impossible for enums. - // - // Note that these == and < operators can succeed only for a line_char - // of the special type. For other types they always return false. That - // in particular leads to the following case: - // - // (lc != c) != (lc < c || c < lc). - // - // Note that we can not assert line_char is of the special type as - // basic_regex (such as from libc++) may need the ability to check if - // arbitrary line_char belongs to some special characters range (like - // ['0', '9']). - // - template <typename T> - struct line_char_cmp - : public std::enable_if<std::is_integral<T>::value || - (std::is_enum<T>::value && - !std::is_same<T, char_flags>::value)> {}; - - template <typename T, typename = typename line_char_cmp<T>::type> - bool - operator== (const line_char& l, const T& r) - { - return l.type () == line_type::special && - static_cast<T> (l.special ()) == r; - } - - template <typename T, typename = typename line_char_cmp<T>::type> - bool - operator== (const T& l, const line_char& r) - { - return r.type () == line_type::special && - static_cast<T> (r.special ()) == l; - } - - template <typename T, typename = typename line_char_cmp<T>::type> - bool - operator!= (const line_char& l, const T& r) - { - return !(l == r); - } - - template <typename T, typename = typename line_char_cmp<T>::type> - bool - operator!= (const T& l, const line_char& r) - { - return !(l == r); - } - - template <typename T, typename = typename line_char_cmp<T>::type> - bool - operator< (const line_char& l, const T& r) - { - return l.type () == line_type::special && - static_cast<T> (l.special ()) < r; - } - - template <typename T, typename = typename line_char_cmp<T>::type> - bool - operator< (const T& l, const line_char& r) - { - return r.type () == line_type::special && - l < static_cast<T> (r.special ()); - } - - template <typename T, typename = typename line_char_cmp<T>::type> - inline bool - operator<= (const line_char& l, const T& r) - { - return l < r || l == r; - } - - template <typename T, typename = typename line_char_cmp<T>::type> - inline bool - operator<= (const T& l, const line_char& r) - { - return l < r || l == r; - } - - using line_string = std::basic_string<line_char>; - - // Locale that has ctype<line_char> facet installed. Used in the - // regex_traits<line_char> specialization (see below). - // - class line_char_locale: public std::locale - { - public: - // Create a copy of the global C++ locale. - // - line_char_locale (); - }; - - // Initialize the testscript regex global state. Should be called once - // prior to creating objects of types from this namespace. Note: not - // thread-safe. - // - void - init (); - } - } - } -} - -// Standard template specializations for line_char that are required for the -// basic_regex<line_char> instantiation. -// -namespace std -{ - template <> - class char_traits<build2::test::script::regex::line_char> - { - public: - using char_type = build2::test::script::regex::line_char; - using int_type = char_type; - using off_type = char_traits<char>::off_type; - using pos_type = char_traits<char>::pos_type; - using state_type = char_traits<char>::state_type; - - static void - assign (char_type& c1, const char_type& c2) {c1 = c2;} - - static char_type* - assign (char_type*, size_t, char_type); - - // Note that eq() and lt() are not constexpr (as required by C++11) - // because == and < operators for char_type are not constexpr. - // - static bool - eq (const char_type& l, const char_type& r) {return l == r;} - - static bool - lt (const char_type& l, const char_type& r) {return l < r;} - - static char_type* - move (char_type*, const char_type*, size_t); - - static char_type* - copy (char_type*, const char_type*, size_t); - - static int - compare (const char_type*, const char_type*, size_t); - - static size_t - length (const char_type*); - - static const char_type* - find (const char_type*, size_t, const char_type&); - - static constexpr char_type - to_char_type (const int_type& c) {return c;} - - static constexpr int_type - to_int_type (const char_type& c) {return int_type (c);} - - // Note that the following functions are not constexpr (as required by - // C++11) because their return expressions are not constexpr. - // - static bool - eq_int_type (const int_type& l, const int_type& r) {return l == r;} - - static int_type eof () {return char_type::eof;} - - static int_type - not_eof (const int_type& c) - { - return c != char_type::eof ? c : char_type::nul; - } - }; - - // ctype<> must be derived from both ctype_base and locale::facet (the later - // supports ref-counting used by the std::locale implementation internally). - // - // msvcrt for some reason also derives ctype_base from locale::facet which - // produces "already a base-class" warning and effectivelly breaks the - // reference counting. So we derive from ctype_base only in this case. - // - template <> - class ctype<build2::test::script::regex::line_char>: public ctype_base -#if !defined(_MSC_VER) || _MSC_VER >= 2000 - , public locale::facet -#endif - { - // Used by the implementation only. - // - using line_type = build2::test::script::regex::line_type; - - public: - using char_type = build2::test::script::regex::line_char; - - static locale::id id; - -#if !defined(_MSC_VER) || _MSC_VER >= 2000 - explicit - ctype (size_t refs = 0): locale::facet (refs) {} -#else - explicit - ctype (size_t refs = 0): ctype_base (refs) {} -#endif - - // While unnecessary, let's keep for completeness. - // - virtual - ~ctype () override = default; - - // The C++ standard requires the following functions to call their virtual - // (protected) do_*() counterparts that provide the real implementations. - // The only purpose for this indirection is to provide a user with the - // ability to customize existing (standard) ctype facets. As we do not - // provide such an ability, for simplicity we will omit the do_*() - // functions and provide the implementations directly. This should be safe - // as nobody except us could call those protected functions. - // - bool - is (mask m, char_type c) const - { - return m == - (c.type () == line_type::special && c.special () >= 0 && - build2::digit (static_cast<char> (c.special ())) - ? digit - : 0); - } - - const char_type* - is (const char_type*, const char_type*, mask*) const; - - const char_type* - scan_is (mask, const char_type*, const char_type*) const; - - const char_type* - scan_not (mask, const char_type*, const char_type*) const; - - char_type - toupper (char_type c) const {return c;} - - const char_type* - toupper (char_type*, const char_type* e) const {return e;} - - char_type - tolower (char_type c) const {return c;} - - const char_type* - tolower (char_type*, const char_type* e) const {return e;} - - char_type - widen (char c) const {return char_type (c);} - - const char* - widen (const char*, const char*, char_type*) const; - - char - narrow (char_type c, char def) const - { - return c.type () == line_type::special ? c.special () : def; - } - - const char_type* - narrow (const char_type*, const char_type*, char, char*) const; - }; - - // Note: the current application locale must be POSIX. Otherwise the - // behavior is undefined. - // - template <> - class regex_traits<build2::test::script::regex::line_char> - { - public: - using char_type = build2::test::script::regex::line_char; - using string_type = build2::test::script::regex::line_string; - using locale_type = build2::test::script::regex::line_char_locale; - using char_class_type = regex_traits<char>::char_class_type; - - // Workaround for msvcrt bugs. For some reason it assumes such a members - // to be present in a regex_traits specialization. - // -#if defined(_MSC_VER) && _MSC_VER < 2000 - static const ctype_base::mask _Ch_upper = ctype_base::upper; - static const ctype_base::mask _Ch_alpha = ctype_base::alpha; - - // Unsigned numeric type. msvcrt normally casts characters to this type - // for comparing with some numeric values or for calculating an index in - // some bit array. Luckily that all relates to the character class - // handling that we don't support. - // - using _Uelem = unsigned int; -#endif - - regex_traits () = default; // Unnecessary but let's keep for completeness. - - static size_t - length (const char_type* p) {return string_type::traits_type::length (p);} - - char_type - translate (char_type c) const {return c;} - - // Case-insensitive matching is not supported by line_regex. So there is no - // reason for the function to be called. - // - char_type - translate_nocase (char_type c) const {assert (false); return c;} - - // Return a sort-key - the exact copy of [b, e). - // - template <typename I> - string_type - transform (I b, I e) const {return string_type (b, e);} - - // Return a case-insensitive sort-key. Case-insensitive matching is not - // supported by line_regex. So there is no reason for the function to be - // called. - // - template <typename I> - string_type - transform_primary (I b, I e) const - { - assert (false); - return string_type (b, e); - } - - // POSIX regex grammar and collating elements (e.g., [.tilde.]) in - // particular are not supported. So there is no reason for the function to - // be called. - // - template <typename I> - string_type - lookup_collatename (I, I) const {assert (false); return string_type ();} - - // Character classes (e.g., [:lower:]) are not supported. So there is no - // reason for the function to be called. - // - template <typename I> - char_class_type - lookup_classname (I, I, bool = false) const - { - assert (false); - return char_class_type (); - } - - // Return false as we don't support character classes (e.g., [:lower:]). - // - bool - isctype (char_type, char_class_type) const {return false;} - - int - value (char_type, int) const; - - // Return the locale passed as an argument as we do not expect anything - // other than POSIX locale, that we also assume to be imbued by default. - // - locale_type - imbue (locale_type l) {return l;} - - locale_type - getloc () const {return locale_type ();} - }; - - // We assume line_char to be an unsigned type and express that with the - // following specialization used by basic_regex implementations. - // - // libstdc++ defines unsigned CharT type (regex_traits template parameter) - // to use as an index in some internal cache regardless if the cache is used - // for this specialization (and the cache is used only if CharT is char). - // - template <> - struct make_unsigned<build2::test::script::regex::line_char> - { - using type = build2::test::script::regex::line_char; - }; - - // When used with libc++ the linker complains that it can't find - // __match_any_but_newline<line_char>::__exec() function. The problem is - // that the function is only specialized for char and wchar_t - // (LLVM bug #31409). As line_char has no notion of the newline character we - // specialize the class template to behave as the __match_any<line_char> - // instantiation does (that luckily has all the functions in place). - // -#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION <= 9000 - template <> - class __match_any_but_newline<build2::test::script::regex::line_char> - : public __match_any<build2::test::script::regex::line_char> - { - public: - using base = __match_any<build2::test::script::regex::line_char>; - using base::base; - }; -#endif -} - -namespace build2 -{ - namespace test - { - namespace script - { - namespace regex - { - class line_regex: public std::basic_regex<line_char> - { - public: - using base_type = std::basic_regex<line_char>; - - using base_type::base_type; - - line_regex () = default; - - // Move string regex together with the pool used to create it. - // - line_regex (line_string&& s, line_pool&& p) - // No move-string ctor for base_type, so emulate it. - // - : base_type (s), pool (move (p)) {s.clear ();} - - // Move constuctible/assignable-only type. - // - line_regex (line_regex&&) = default; - line_regex (const line_regex&) = delete; - line_regex& operator= (line_regex&&) = default; - line_regex& operator= (const line_regex&) = delete; - - public: - line_pool pool; - }; - } - } - } -} - -#include <libbuild2/test/script/regex.ixx> - -#endif // LIBBUILD2_TEST_SCRIPT_REGEX_HXX |