aboutsummaryrefslogtreecommitdiff
path: root/libbuild2/test/script/regex.hxx
diff options
context:
space:
mode:
Diffstat (limited to 'libbuild2/test/script/regex.hxx')
-rw-r--r--libbuild2/test/script/regex.hxx684
1 files changed, 0 insertions, 684 deletions
diff --git a/libbuild2/test/script/regex.hxx b/libbuild2/test/script/regex.hxx
deleted file mode 100644
index 4114ea4..0000000
--- a/libbuild2/test/script/regex.hxx
+++ /dev/null
@@ -1,684 +0,0 @@
-// file : libbuild2/test/script/regex.hxx -*- C++ -*-
-// license : MIT; see accompanying LICENSE file
-
-#ifndef LIBBUILD2_TEST_SCRIPT_REGEX_HXX
-#define LIBBUILD2_TEST_SCRIPT_REGEX_HXX
-
-#include <list>
-#include <regex>
-#include <locale>
-#include <string> // basic_string
-#include <type_traits> // make_unsigned, enable_if, is_*
-#include <unordered_set>
-
-#include <libbuild2/types.hxx>
-#include <libbuild2/utility.hxx>
-
-namespace build2
-{
- namespace test
- {
- namespace script
- {
- namespace regex
- {
- using char_string = std::basic_string<char>;
-
- enum class char_flags: uint16_t
- {
- icase = 0x1, // Case-insensitive match.
- idot = 0x2, // Invert '.' escaping.
-
- none = 0
- };
-
- // Restricts valid standard flags to just {icase}, extends with custom
- // flags {idot}.
- //
- class char_regex: public std::basic_regex<char>
- {
- public:
- using base_type = std::basic_regex<char>;
-
- char_regex (const char_string&, char_flags = char_flags::none);
- };
-
- // Newlines are line separators and are not part of the line:
- //
- // line<newline>line<newline>
- //
- // Specifically, this means that a customary trailing newline creates a
- // trailing blank line.
- //
- // All characters can inter-compare (though there cannot be regex
- // characters in the output, only in line_regex).
- //
- // Note that we assume that line_regex and the input to regex_match()
- // use the same pool.
- //
- struct line_pool
- {
- // Note that we assume the pool can be moved without invalidating
- // pointers to any already pooled entities.
- //
- std::unordered_set<char_string> strings;
- std::list<char_regex> regexes;
- };
-
- enum class line_type
- {
- special,
- literal,
- regex
- };
-
- struct line_char
- {
- // Steal last two bits from the pointer to store the type.
- //
- private:
- std::uintptr_t data_;
-
- public:
- line_type
- type () const {return static_cast<line_type> (data_ & 0x3);}
-
- int
- special () const
- {
- // Stored as (shifted) int16_t. Perform steps reversed to those
- // that are described in the comment for the corresponding ctor.
- // Note that the intermediate cast to uint16_t is required to
- // portably preserve the -1 special character.
- //
- return static_cast<int16_t> (static_cast<uint16_t> (data_ >> 2));
- }
-
- const char_string*
- literal () const
- {
- // Note that 2 rightmost bits are used for packaging line_char
- // type. Read the comment for the corresponding ctor for details.
- //
- return reinterpret_cast<const char_string*> (
- data_ & ~std::uintptr_t (0x3));
- }
-
- const char_regex*
- regex () const
- {
- // Note that 2 rightmost bits are used for packaging line_char
- // type. Read the comment for the corresponding ctor for details.
- //
- return reinterpret_cast<const char_regex*> (
- data_ & ~std::uintptr_t (0x3));
- }
-
- static const line_char nul;
- static const line_char eof;
-
- // Note: creates an uninitialized value.
- //
- line_char () = default;
-
- // Create a special character. The argument value must be one of the
- // following ones:
- //
- // 0 (nul character)
- // -1 (EOF)
- // [()|.*+?{}\0123456789,=!] (excluding [])
- //
- // Note that the constructor is implicit to allow basic_regex to
- // implicitly construct line_chars from special char literals (in
- // particular libstdc++ appends them to an internal line_string).
- //
- // Also note that we extend the valid characters set (see above) with
- // 'p', 'n' (used by libstdc++ for positive/negative look-ahead
- // tokens representation), and '\n', '\r', u'\u2028', u'\u2029' (used
- // by libstdc++ for newline/newparagraph matching).
- //
- line_char (int);
-
- // Create a literal character.
- //
- // Don't copy string if already pooled.
- //
- explicit
- line_char (const char_string&, line_pool&);
-
- explicit
- line_char (char_string&&, line_pool&);
-
- explicit
- line_char (const char_string* s) // Assume already pooled.
- //
- // Steal two bits from the pointer to package line_char type.
- // Assume (and statically assert) that char_string address is a
- // multiple of four.
- //
- : data_ (reinterpret_cast <std::uintptr_t> (s) |
- static_cast <std::uintptr_t> (line_type::literal)) {}
-
- // Create a regex character.
- //
- explicit
- line_char (char_regex, line_pool&);
-
- explicit
- line_char (const char_regex* r) // Assume already pooled.
- //
- // Steal two bits from the pointer to package line_char type.
- // Assume (and statically assert) that char_regex address is a
- // multiple of four.
- //
- : data_ (reinterpret_cast <std::uintptr_t> (r) |
- static_cast <std::uintptr_t> (line_type::regex)) {}
-
- // Provide basic_regex with the ability to use line_char in a context
- // where a char value is expected (e.g., as a function argument).
- //
- // libstdc++ seems to cast special line_chars only (and such a
- // conversion is meanigfull).
- //
- // msvcrt casts line_chars of arbitrary types instead. The only
- // reasonable strategy is to return a value that differs from any
- // other that can be encountered in a regex expression and so will
- // unlikelly be misinterpreted.
- //
- operator char () const
- {
- return type () == line_type::special ? special () : '\a'; // BELL.
- }
-
- // Return true if the character is a syntax (special) one.
- //
- static bool
- syntax (char);
-
- // Provide basic_regex (such as from msvcrt) with the ability to
- // explicitly cast line_chars to implementation-specific numeric
- // types (enums, msvcrt's _Uelem, etc).
- //
- template <typename T>
- explicit
- operator T () const
- {
- assert (type () == line_type::special);
- return static_cast<T> (special ());
- }
- };
-
- // Perform "deep" characters comparison (for example match literal
- // character with a regex character), rather than just compare them
- // literally. At least one argument must be of a type other than regex
- // as there is no operator==() defined to compare regexes. Characters
- // of the literal type must share the same pool (strings are compared
- // by pointers not by values).
- //
- bool
- operator== (const line_char&, const line_char&);
-
- // Return false if arguments are equal (operator==() returns true).
- // Otherwise if types are different return the value implying that
- // special < literal < regex. If types are special or literal return
- // the result of the respective characters or strings comparison. At
- // least one argument must be of a type other than regex as there is no
- // operator<() defined to compare regexes.
- //
- // While not very natural operation for the class we have, we have to
- // provide some meaningfull semantics for such a comparison as it is
- // required by the char_traits<line_char> specialization. While we
- // could provide it right in that specialization, let's keep it here
- // for basic_regex implementations that potentially can compare
- // line_chars as they compare them with expressions of other types (see
- // below).
- //
- bool
- operator< (const line_char&, const line_char&);
-
- inline bool
- operator!= (const line_char& l, const line_char& r)
- {
- return !(l == r);
- }
-
- inline bool
- operator<= (const line_char& l, const line_char& r)
- {
- return l < r || l == r;
- }
-
- // Provide basic_regex (such as from msvcrt) with the ability to
- // compare line_char to a value of an integral or
- // implementation-specific enum type. In the absense of the following
- // template operators, such a comparisons would be ambigious for
- // integral types (given that there are implicit conversions
- // int->line_char and line_char->char) and impossible for enums.
- //
- // Note that these == and < operators can succeed only for a line_char
- // of the special type. For other types they always return false. That
- // in particular leads to the following case:
- //
- // (lc != c) != (lc < c || c < lc).
- //
- // Note that we can not assert line_char is of the special type as
- // basic_regex (such as from libc++) may need the ability to check if
- // arbitrary line_char belongs to some special characters range (like
- // ['0', '9']).
- //
- template <typename T>
- struct line_char_cmp
- : public std::enable_if<std::is_integral<T>::value ||
- (std::is_enum<T>::value &&
- !std::is_same<T, char_flags>::value)> {};
-
- template <typename T, typename = typename line_char_cmp<T>::type>
- bool
- operator== (const line_char& l, const T& r)
- {
- return l.type () == line_type::special &&
- static_cast<T> (l.special ()) == r;
- }
-
- template <typename T, typename = typename line_char_cmp<T>::type>
- bool
- operator== (const T& l, const line_char& r)
- {
- return r.type () == line_type::special &&
- static_cast<T> (r.special ()) == l;
- }
-
- template <typename T, typename = typename line_char_cmp<T>::type>
- bool
- operator!= (const line_char& l, const T& r)
- {
- return !(l == r);
- }
-
- template <typename T, typename = typename line_char_cmp<T>::type>
- bool
- operator!= (const T& l, const line_char& r)
- {
- return !(l == r);
- }
-
- template <typename T, typename = typename line_char_cmp<T>::type>
- bool
- operator< (const line_char& l, const T& r)
- {
- return l.type () == line_type::special &&
- static_cast<T> (l.special ()) < r;
- }
-
- template <typename T, typename = typename line_char_cmp<T>::type>
- bool
- operator< (const T& l, const line_char& r)
- {
- return r.type () == line_type::special &&
- l < static_cast<T> (r.special ());
- }
-
- template <typename T, typename = typename line_char_cmp<T>::type>
- inline bool
- operator<= (const line_char& l, const T& r)
- {
- return l < r || l == r;
- }
-
- template <typename T, typename = typename line_char_cmp<T>::type>
- inline bool
- operator<= (const T& l, const line_char& r)
- {
- return l < r || l == r;
- }
-
- using line_string = std::basic_string<line_char>;
-
- // Locale that has ctype<line_char> facet installed. Used in the
- // regex_traits<line_char> specialization (see below).
- //
- class line_char_locale: public std::locale
- {
- public:
- // Create a copy of the global C++ locale.
- //
- line_char_locale ();
- };
-
- // Initialize the testscript regex global state. Should be called once
- // prior to creating objects of types from this namespace. Note: not
- // thread-safe.
- //
- void
- init ();
- }
- }
- }
-}
-
-// Standard template specializations for line_char that are required for the
-// basic_regex<line_char> instantiation.
-//
-namespace std
-{
- template <>
- class char_traits<build2::test::script::regex::line_char>
- {
- public:
- using char_type = build2::test::script::regex::line_char;
- using int_type = char_type;
- using off_type = char_traits<char>::off_type;
- using pos_type = char_traits<char>::pos_type;
- using state_type = char_traits<char>::state_type;
-
- static void
- assign (char_type& c1, const char_type& c2) {c1 = c2;}
-
- static char_type*
- assign (char_type*, size_t, char_type);
-
- // Note that eq() and lt() are not constexpr (as required by C++11)
- // because == and < operators for char_type are not constexpr.
- //
- static bool
- eq (const char_type& l, const char_type& r) {return l == r;}
-
- static bool
- lt (const char_type& l, const char_type& r) {return l < r;}
-
- static char_type*
- move (char_type*, const char_type*, size_t);
-
- static char_type*
- copy (char_type*, const char_type*, size_t);
-
- static int
- compare (const char_type*, const char_type*, size_t);
-
- static size_t
- length (const char_type*);
-
- static const char_type*
- find (const char_type*, size_t, const char_type&);
-
- static constexpr char_type
- to_char_type (const int_type& c) {return c;}
-
- static constexpr int_type
- to_int_type (const char_type& c) {return int_type (c);}
-
- // Note that the following functions are not constexpr (as required by
- // C++11) because their return expressions are not constexpr.
- //
- static bool
- eq_int_type (const int_type& l, const int_type& r) {return l == r;}
-
- static int_type eof () {return char_type::eof;}
-
- static int_type
- not_eof (const int_type& c)
- {
- return c != char_type::eof ? c : char_type::nul;
- }
- };
-
- // ctype<> must be derived from both ctype_base and locale::facet (the later
- // supports ref-counting used by the std::locale implementation internally).
- //
- // msvcrt for some reason also derives ctype_base from locale::facet which
- // produces "already a base-class" warning and effectivelly breaks the
- // reference counting. So we derive from ctype_base only in this case.
- //
- template <>
- class ctype<build2::test::script::regex::line_char>: public ctype_base
-#if !defined(_MSC_VER) || _MSC_VER >= 2000
- , public locale::facet
-#endif
- {
- // Used by the implementation only.
- //
- using line_type = build2::test::script::regex::line_type;
-
- public:
- using char_type = build2::test::script::regex::line_char;
-
- static locale::id id;
-
-#if !defined(_MSC_VER) || _MSC_VER >= 2000
- explicit
- ctype (size_t refs = 0): locale::facet (refs) {}
-#else
- explicit
- ctype (size_t refs = 0): ctype_base (refs) {}
-#endif
-
- // While unnecessary, let's keep for completeness.
- //
- virtual
- ~ctype () override = default;
-
- // The C++ standard requires the following functions to call their virtual
- // (protected) do_*() counterparts that provide the real implementations.
- // The only purpose for this indirection is to provide a user with the
- // ability to customize existing (standard) ctype facets. As we do not
- // provide such an ability, for simplicity we will omit the do_*()
- // functions and provide the implementations directly. This should be safe
- // as nobody except us could call those protected functions.
- //
- bool
- is (mask m, char_type c) const
- {
- return m ==
- (c.type () == line_type::special && c.special () >= 0 &&
- build2::digit (static_cast<char> (c.special ()))
- ? digit
- : 0);
- }
-
- const char_type*
- is (const char_type*, const char_type*, mask*) const;
-
- const char_type*
- scan_is (mask, const char_type*, const char_type*) const;
-
- const char_type*
- scan_not (mask, const char_type*, const char_type*) const;
-
- char_type
- toupper (char_type c) const {return c;}
-
- const char_type*
- toupper (char_type*, const char_type* e) const {return e;}
-
- char_type
- tolower (char_type c) const {return c;}
-
- const char_type*
- tolower (char_type*, const char_type* e) const {return e;}
-
- char_type
- widen (char c) const {return char_type (c);}
-
- const char*
- widen (const char*, const char*, char_type*) const;
-
- char
- narrow (char_type c, char def) const
- {
- return c.type () == line_type::special ? c.special () : def;
- }
-
- const char_type*
- narrow (const char_type*, const char_type*, char, char*) const;
- };
-
- // Note: the current application locale must be POSIX. Otherwise the
- // behavior is undefined.
- //
- template <>
- class regex_traits<build2::test::script::regex::line_char>
- {
- public:
- using char_type = build2::test::script::regex::line_char;
- using string_type = build2::test::script::regex::line_string;
- using locale_type = build2::test::script::regex::line_char_locale;
- using char_class_type = regex_traits<char>::char_class_type;
-
- // Workaround for msvcrt bugs. For some reason it assumes such a members
- // to be present in a regex_traits specialization.
- //
-#if defined(_MSC_VER) && _MSC_VER < 2000
- static const ctype_base::mask _Ch_upper = ctype_base::upper;
- static const ctype_base::mask _Ch_alpha = ctype_base::alpha;
-
- // Unsigned numeric type. msvcrt normally casts characters to this type
- // for comparing with some numeric values or for calculating an index in
- // some bit array. Luckily that all relates to the character class
- // handling that we don't support.
- //
- using _Uelem = unsigned int;
-#endif
-
- regex_traits () = default; // Unnecessary but let's keep for completeness.
-
- static size_t
- length (const char_type* p) {return string_type::traits_type::length (p);}
-
- char_type
- translate (char_type c) const {return c;}
-
- // Case-insensitive matching is not supported by line_regex. So there is no
- // reason for the function to be called.
- //
- char_type
- translate_nocase (char_type c) const {assert (false); return c;}
-
- // Return a sort-key - the exact copy of [b, e).
- //
- template <typename I>
- string_type
- transform (I b, I e) const {return string_type (b, e);}
-
- // Return a case-insensitive sort-key. Case-insensitive matching is not
- // supported by line_regex. So there is no reason for the function to be
- // called.
- //
- template <typename I>
- string_type
- transform_primary (I b, I e) const
- {
- assert (false);
- return string_type (b, e);
- }
-
- // POSIX regex grammar and collating elements (e.g., [.tilde.]) in
- // particular are not supported. So there is no reason for the function to
- // be called.
- //
- template <typename I>
- string_type
- lookup_collatename (I, I) const {assert (false); return string_type ();}
-
- // Character classes (e.g., [:lower:]) are not supported. So there is no
- // reason for the function to be called.
- //
- template <typename I>
- char_class_type
- lookup_classname (I, I, bool = false) const
- {
- assert (false);
- return char_class_type ();
- }
-
- // Return false as we don't support character classes (e.g., [:lower:]).
- //
- bool
- isctype (char_type, char_class_type) const {return false;}
-
- int
- value (char_type, int) const;
-
- // Return the locale passed as an argument as we do not expect anything
- // other than POSIX locale, that we also assume to be imbued by default.
- //
- locale_type
- imbue (locale_type l) {return l;}
-
- locale_type
- getloc () const {return locale_type ();}
- };
-
- // We assume line_char to be an unsigned type and express that with the
- // following specialization used by basic_regex implementations.
- //
- // libstdc++ defines unsigned CharT type (regex_traits template parameter)
- // to use as an index in some internal cache regardless if the cache is used
- // for this specialization (and the cache is used only if CharT is char).
- //
- template <>
- struct make_unsigned<build2::test::script::regex::line_char>
- {
- using type = build2::test::script::regex::line_char;
- };
-
- // When used with libc++ the linker complains that it can't find
- // __match_any_but_newline<line_char>::__exec() function. The problem is
- // that the function is only specialized for char and wchar_t
- // (LLVM bug #31409). As line_char has no notion of the newline character we
- // specialize the class template to behave as the __match_any<line_char>
- // instantiation does (that luckily has all the functions in place).
- //
-#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION <= 9000
- template <>
- class __match_any_but_newline<build2::test::script::regex::line_char>
- : public __match_any<build2::test::script::regex::line_char>
- {
- public:
- using base = __match_any<build2::test::script::regex::line_char>;
- using base::base;
- };
-#endif
-}
-
-namespace build2
-{
- namespace test
- {
- namespace script
- {
- namespace regex
- {
- class line_regex: public std::basic_regex<line_char>
- {
- public:
- using base_type = std::basic_regex<line_char>;
-
- using base_type::base_type;
-
- line_regex () = default;
-
- // Move string regex together with the pool used to create it.
- //
- line_regex (line_string&& s, line_pool&& p)
- // No move-string ctor for base_type, so emulate it.
- //
- : base_type (s), pool (move (p)) {s.clear ();}
-
- // Move constuctible/assignable-only type.
- //
- line_regex (line_regex&&) = default;
- line_regex (const line_regex&) = delete;
- line_regex& operator= (line_regex&&) = default;
- line_regex& operator= (const line_regex&) = delete;
-
- public:
- line_pool pool;
- };
- }
- }
- }
-}
-
-#include <libbuild2/test/script/regex.ixx>
-
-#endif // LIBBUILD2_TEST_SCRIPT_REGEX_HXX