From ca97c1e1527e721cae3a1114864da2110b15406a Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Mon, 9 Jan 2017 17:03:04 +0300 Subject: Workaround libc++'s basic_string crash --- build2/test/script/parser.cxx | 4 +- build2/test/script/regex | 103 ++++++++++++++++++++++++++---------------- build2/test/script/regex.cxx | 67 ++++++++++++++++++++------- 3 files changed, 117 insertions(+), 57 deletions(-) (limited to 'build2/test') diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx index e5f634c..da072dc 100644 --- a/build2/test/script/parser.cxx +++ b/build2/test/script/parser.cxx @@ -37,7 +37,7 @@ namespace std #endif ostringstream os; - os << runtime_error (d); // Purify the description. + os << runtime_error (d); // Sanitize the description. string s (os.str ()); if (s.find (' ') != string::npos) @@ -1328,7 +1328,7 @@ namespace build2 // Create a special empty object. // regex_parts () - : introducer ('\0'), flags (regex::char_flags()) {} + : introducer ('\0'), flags (regex::char_flags ()) {} regex_parts (string v, char i, regex::char_flags f) : value (move (v)), introducer (i), flags (f) {} diff --git a/build2/test/script/regex b/build2/test/script/regex index 7dacfee..7708410 100644 --- a/build2/test/script/regex +++ b/build2/test/script/regex @@ -8,6 +8,7 @@ #include #include #include +#include // uintptr_t #include // make_unsigned, is_unsigned #include @@ -57,37 +58,45 @@ namespace build2 struct line_char { - /* + // Steal last two bits from the pointer to store the type. + // private: - size_t data_; + std::uintptr_t data_; - // Steal two bits from the pointer (need to static assert). - // public: line_type - type () const; + type () const {return static_cast (data_ & 0x3);} int - special () const; // Stored as (shifted) int16. + special () const + { + // Stored as (shifted) int16_t. Perform steps reversed to those + // that are described in the comment for the corresponding ctor. + // Note that the intermediate cast to uint16_t is required to + // portably preserve the -1 special character. + // + return static_cast (static_cast (data_ >> 2)); + } const char_string* - literal () const; + literal () const + { + // Note that 2 rightmost bits are used for packaging line_char + // type. Read the comment for the corresponding ctor for details. + // + return reinterpret_cast ( + data_ & ~std::uintptr_t (0x3)); + } const char_regex* - regex () const; - */ - - line_type type; - - union + regex () const { - int special; // 0 (nul character) - // -1 (EOF) - // [()|.*+?{\}0123456789,=!] (excluding []) - - const char_string* literal; - const char_regex* regex; - }; + // Note that 2 rightmost bits are used for packaging line_char + // type. Read the comment for the corresponding ctor for details. + // + return reinterpret_cast ( + data_ & ~std::uintptr_t (0x3)); + } static const line_char nul; static const line_char eof; @@ -96,7 +105,12 @@ namespace build2 // line_char () = default; - // Create a special character. + // Create a special character. The argument value must be one of the + // following ones: + // + // 0 (nul character) + // -1 (EOF) + // [()|.*+?{\}0123456789,=!] (excluding []) // // Note that the constructor is implicit to allow basic_regex to // implicitly construct line_chars from special char literals (in @@ -121,7 +135,13 @@ namespace build2 explicit line_char (const char_string* s) // Assume already pooled. - : type (line_type::literal), literal (s) {} + // + // Steal two bits from the pointer to package line_char type. + // Assume (and statically assert) that char_string address is a + // multiple of four. + // + : data_ (reinterpret_cast (s) | + static_cast (line_type::literal)) {} // Create a regex character. // @@ -130,7 +150,13 @@ namespace build2 explicit line_char (const char_regex* r) // Assume already pooled. - : type (line_type::regex), regex (r) {} + // + // Steal two bits from the pointer to package line_char type. + // Assume (and statically assert) that char_regex address is a + // multiple of four. + // + : data_ (reinterpret_cast (r) | + static_cast (line_type::regex)) {} // Provide basic_regex with the ability to use line_char in a context // where a char value is expected (e.g., as a function argument). @@ -145,7 +171,7 @@ namespace build2 // operator char () const { - return type == line_type::special ? special : '\a'; // BELL. + return type () == line_type::special ? special () : '\a'; // BELL. } // Return true if the character is a syntax (special) one. @@ -160,8 +186,8 @@ namespace build2 explicit operator T () const { - assert (type == line_type::special); - return static_cast (special); + assert (type () == line_type::special); + return static_cast (special ()); } }; @@ -234,16 +260,16 @@ namespace build2 bool operator== (const line_char& l, const T& r) { - return - l.type == line_type::special && static_cast (l.special) == r; + return l.type () == line_type::special && + static_cast (l.special ()) == r; } template ::type> bool operator== (const T& l, const line_char& r) { - return - r.type == line_type::special && static_cast (r.special) == l; + return r.type () == line_type::special && + static_cast (r.special ()) == l; } template ::type> @@ -264,16 +290,16 @@ namespace build2 bool operator< (const line_char& l, const T& r) { - return - l.type == line_type::special && static_cast (l.special) < r; + return l.type () == line_type::special && + static_cast (l.special ()) < r; } template ::type> bool operator< (const T& l, const line_char& r) { - return - r.type == line_type::special && l < static_cast (r.special); + return r.type () == line_type::special && + l < static_cast (r.special ()); } template ::type> @@ -419,9 +445,10 @@ namespace std bool is (mask m, char_type c) const { - return m == (c.type == line_type::special && build2::digit (c.special) - ? digit - : 0); + return m == + (c.type () == line_type::special && build2::digit (c.special ()) + ? digit + : 0); } const char_type* @@ -454,7 +481,7 @@ namespace std char narrow (char_type c, char def) const { - return c.type == line_type::special ? c.special : def; + return c.type () == line_type::special ? c.special () : def; } const char_type* diff --git a/build2/test/script/regex.cxx b/build2/test/script/regex.cxx index ebf28f0..bbf3f00 100644 --- a/build2/test/script/regex.cxx +++ b/build2/test/script/regex.cxx @@ -16,14 +16,41 @@ namespace build2 { namespace regex { + static_assert (alignof (char_string) % 4 == 0, + "inappropriate allignment for char_string"); + + static_assert (alignof (char_regex) % 4 == 0, + "inappropriate allignment for char_regex"); + + static_assert (sizeof (uintptr_t) > sizeof (int16_t), + "inappropriate uintptr_t size"); + const line_char line_char::nul (0); const line_char line_char::eof (-1); // line_char // + // We package the special character into uintptr_t with the following + // steps: + // + // - narrow down int value to int16_t (preserves all the valid values) + // + // - convert to uint16_t (bitwise representation stays the same, but no + // need to bother with signed value widening, leftmost bits loss on + // left shift, etc) + // + // - convert to uintptr_t (storage type) + // + // - shift left by two bits (the operation is fully reversible as + // uintptr_t is wider then uint16_t) + // line_char:: line_char (int c) - : type (line_type::special), special (c) + : data_ ( + (static_cast ( + static_cast ( + static_cast (c))) << 2) | + static_cast (line_type::special)) { // @@ How can we allow anything for basic_regex but only subset // for our own code? @@ -81,19 +108,22 @@ namespace build2 bool operator== (const line_char& l, const line_char& r) { - if (l.type == r.type) + line_type lt (l.type ()); + line_type rt (r.type ()); + + if (lt == rt) { bool res (true); - switch (l.type) + switch (lt) { - case line_type::special: res = l.special == r.special; break; + case line_type::special: res = l.special () == r.special (); break; case line_type::regex: assert (false); break; // Note that we use pointers (rather than vales) comparison // assuming that the strings must belong to the same pool. // - case line_type::literal: res = l.literal == r.literal; break; + case line_type::literal: res = l.literal () == r.literal (); break; } return res; @@ -101,10 +131,10 @@ namespace build2 // Match literal with regex. // - if (l.type == line_type::literal && r.type == line_type::regex) - return regex_match (*l.literal, *r.regex); - else if (r.type == line_type::literal && l.type == line_type::regex) - return regex_match (*r.literal, *l.regex); + if (lt == line_type::literal && rt == line_type::regex) + return regex_match (*l.literal (), *r.regex ()); + else if (rt == line_type::literal && lt == line_type::regex) + return regex_match (*r.literal (), *l.regex ()); return false; } @@ -115,15 +145,18 @@ namespace build2 if (l == r) return false; - if (l.type != r.type) - return l.type < r.type; + line_type lt (l.type ()); + line_type rt (r.type ()); + + if (lt != rt) + return lt < rt; bool res (false); - switch (l.type) + switch (lt) { - case line_type::special: res = l.special < r.special; break; - case line_type::literal: res = *l.literal < *r.literal; break; + case line_type::special: res = l.special () < r.special (); break; + case line_type::literal: res = *l.literal () < *r.literal (); break; case line_type::regex: assert (false); break; } @@ -229,7 +262,7 @@ namespace std { const char_type& c (*b++); - *m++ = c.type == line_type::special && build2::digit (c.special) + *m++ = c.type () == line_type::special && build2::digit (c.special ()) ? digit : 0; } @@ -286,11 +319,11 @@ namespace std { assert (radix == 8 || radix == 10 || radix == 16); - if (c.type != line_type::special) + if (c.type () != line_type::special) return -1; const char digits[] = "0123456789ABCDEF"; - const char* d (string::traits_type::find (digits, radix, c.special)); + const char* d (string::traits_type::find (digits, radix, c.special ())); return d != nullptr ? d - digits : -1; } } -- cgit v1.1