From ca97c1e1527e721cae3a1114864da2110b15406a Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Mon, 9 Jan 2017 17:03:04 +0300 Subject: Workaround libc++'s basic_string crash --- build2/test/script/parser.cxx | 4 +- build2/test/script/regex | 103 +++++++++++++++++++------------ build2/test/script/regex.cxx | 67 +++++++++++++++----- build2/variable | 11 ++-- unit-tests/test/script/parser/regex.test | 14 +++-- unit-tests/test/script/regex/driver.cxx | 45 ++++++-------- 6 files changed, 153 insertions(+), 91 deletions(-) diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx index e5f634c..da072dc 100644 --- a/build2/test/script/parser.cxx +++ b/build2/test/script/parser.cxx @@ -37,7 +37,7 @@ namespace std #endif ostringstream os; - os << runtime_error (d); // Purify the description. + os << runtime_error (d); // Sanitize the description. string s (os.str ()); if (s.find (' ') != string::npos) @@ -1328,7 +1328,7 @@ namespace build2 // Create a special empty object. // regex_parts () - : introducer ('\0'), flags (regex::char_flags()) {} + : introducer ('\0'), flags (regex::char_flags ()) {} regex_parts (string v, char i, regex::char_flags f) : value (move (v)), introducer (i), flags (f) {} diff --git a/build2/test/script/regex b/build2/test/script/regex index 7dacfee..7708410 100644 --- a/build2/test/script/regex +++ b/build2/test/script/regex @@ -8,6 +8,7 @@ #include #include #include +#include // uintptr_t #include // make_unsigned, is_unsigned #include @@ -57,37 +58,45 @@ namespace build2 struct line_char { - /* + // Steal last two bits from the pointer to store the type. + // private: - size_t data_; + std::uintptr_t data_; - // Steal two bits from the pointer (need to static assert). - // public: line_type - type () const; + type () const {return static_cast (data_ & 0x3);} int - special () const; // Stored as (shifted) int16. + special () const + { + // Stored as (shifted) int16_t. Perform steps reversed to those + // that are described in the comment for the corresponding ctor. + // Note that the intermediate cast to uint16_t is required to + // portably preserve the -1 special character. + // + return static_cast (static_cast (data_ >> 2)); + } const char_string* - literal () const; + literal () const + { + // Note that 2 rightmost bits are used for packaging line_char + // type. Read the comment for the corresponding ctor for details. + // + return reinterpret_cast ( + data_ & ~std::uintptr_t (0x3)); + } const char_regex* - regex () const; - */ - - line_type type; - - union + regex () const { - int special; // 0 (nul character) - // -1 (EOF) - // [()|.*+?{\}0123456789,=!] (excluding []) - - const char_string* literal; - const char_regex* regex; - }; + // Note that 2 rightmost bits are used for packaging line_char + // type. Read the comment for the corresponding ctor for details. + // + return reinterpret_cast ( + data_ & ~std::uintptr_t (0x3)); + } static const line_char nul; static const line_char eof; @@ -96,7 +105,12 @@ namespace build2 // line_char () = default; - // Create a special character. + // Create a special character. The argument value must be one of the + // following ones: + // + // 0 (nul character) + // -1 (EOF) + // [()|.*+?{\}0123456789,=!] (excluding []) // // Note that the constructor is implicit to allow basic_regex to // implicitly construct line_chars from special char literals (in @@ -121,7 +135,13 @@ namespace build2 explicit line_char (const char_string* s) // Assume already pooled. - : type (line_type::literal), literal (s) {} + // + // Steal two bits from the pointer to package line_char type. + // Assume (and statically assert) that char_string address is a + // multiple of four. + // + : data_ (reinterpret_cast (s) | + static_cast (line_type::literal)) {} // Create a regex character. // @@ -130,7 +150,13 @@ namespace build2 explicit line_char (const char_regex* r) // Assume already pooled. - : type (line_type::regex), regex (r) {} + // + // Steal two bits from the pointer to package line_char type. + // Assume (and statically assert) that char_regex address is a + // multiple of four. + // + : data_ (reinterpret_cast (r) | + static_cast (line_type::regex)) {} // Provide basic_regex with the ability to use line_char in a context // where a char value is expected (e.g., as a function argument). @@ -145,7 +171,7 @@ namespace build2 // operator char () const { - return type == line_type::special ? special : '\a'; // BELL. + return type () == line_type::special ? special () : '\a'; // BELL. } // Return true if the character is a syntax (special) one. @@ -160,8 +186,8 @@ namespace build2 explicit operator T () const { - assert (type == line_type::special); - return static_cast (special); + assert (type () == line_type::special); + return static_cast (special ()); } }; @@ -234,16 +260,16 @@ namespace build2 bool operator== (const line_char& l, const T& r) { - return - l.type == line_type::special && static_cast (l.special) == r; + return l.type () == line_type::special && + static_cast (l.special ()) == r; } template ::type> bool operator== (const T& l, const line_char& r) { - return - r.type == line_type::special && static_cast (r.special) == l; + return r.type () == line_type::special && + static_cast (r.special ()) == l; } template ::type> @@ -264,16 +290,16 @@ namespace build2 bool operator< (const line_char& l, const T& r) { - return - l.type == line_type::special && static_cast (l.special) < r; + return l.type () == line_type::special && + static_cast (l.special ()) < r; } template ::type> bool operator< (const T& l, const line_char& r) { - return - r.type == line_type::special && l < static_cast (r.special); + return r.type () == line_type::special && + l < static_cast (r.special ()); } template ::type> @@ -419,9 +445,10 @@ namespace std bool is (mask m, char_type c) const { - return m == (c.type == line_type::special && build2::digit (c.special) - ? digit - : 0); + return m == + (c.type () == line_type::special && build2::digit (c.special ()) + ? digit + : 0); } const char_type* @@ -454,7 +481,7 @@ namespace std char narrow (char_type c, char def) const { - return c.type == line_type::special ? c.special : def; + return c.type () == line_type::special ? c.special () : def; } const char_type* diff --git a/build2/test/script/regex.cxx b/build2/test/script/regex.cxx index ebf28f0..bbf3f00 100644 --- a/build2/test/script/regex.cxx +++ b/build2/test/script/regex.cxx @@ -16,14 +16,41 @@ namespace build2 { namespace regex { + static_assert (alignof (char_string) % 4 == 0, + "inappropriate allignment for char_string"); + + static_assert (alignof (char_regex) % 4 == 0, + "inappropriate allignment for char_regex"); + + static_assert (sizeof (uintptr_t) > sizeof (int16_t), + "inappropriate uintptr_t size"); + const line_char line_char::nul (0); const line_char line_char::eof (-1); // line_char // + // We package the special character into uintptr_t with the following + // steps: + // + // - narrow down int value to int16_t (preserves all the valid values) + // + // - convert to uint16_t (bitwise representation stays the same, but no + // need to bother with signed value widening, leftmost bits loss on + // left shift, etc) + // + // - convert to uintptr_t (storage type) + // + // - shift left by two bits (the operation is fully reversible as + // uintptr_t is wider then uint16_t) + // line_char:: line_char (int c) - : type (line_type::special), special (c) + : data_ ( + (static_cast ( + static_cast ( + static_cast (c))) << 2) | + static_cast (line_type::special)) { // @@ How can we allow anything for basic_regex but only subset // for our own code? @@ -81,19 +108,22 @@ namespace build2 bool operator== (const line_char& l, const line_char& r) { - if (l.type == r.type) + line_type lt (l.type ()); + line_type rt (r.type ()); + + if (lt == rt) { bool res (true); - switch (l.type) + switch (lt) { - case line_type::special: res = l.special == r.special; break; + case line_type::special: res = l.special () == r.special (); break; case line_type::regex: assert (false); break; // Note that we use pointers (rather than vales) comparison // assuming that the strings must belong to the same pool. // - case line_type::literal: res = l.literal == r.literal; break; + case line_type::literal: res = l.literal () == r.literal (); break; } return res; @@ -101,10 +131,10 @@ namespace build2 // Match literal with regex. // - if (l.type == line_type::literal && r.type == line_type::regex) - return regex_match (*l.literal, *r.regex); - else if (r.type == line_type::literal && l.type == line_type::regex) - return regex_match (*r.literal, *l.regex); + if (lt == line_type::literal && rt == line_type::regex) + return regex_match (*l.literal (), *r.regex ()); + else if (rt == line_type::literal && lt == line_type::regex) + return regex_match (*r.literal (), *l.regex ()); return false; } @@ -115,15 +145,18 @@ namespace build2 if (l == r) return false; - if (l.type != r.type) - return l.type < r.type; + line_type lt (l.type ()); + line_type rt (r.type ()); + + if (lt != rt) + return lt < rt; bool res (false); - switch (l.type) + switch (lt) { - case line_type::special: res = l.special < r.special; break; - case line_type::literal: res = *l.literal < *r.literal; break; + case line_type::special: res = l.special () < r.special (); break; + case line_type::literal: res = *l.literal () < *r.literal (); break; case line_type::regex: assert (false); break; } @@ -229,7 +262,7 @@ namespace std { const char_type& c (*b++); - *m++ = c.type == line_type::special && build2::digit (c.special) + *m++ = c.type () == line_type::special && build2::digit (c.special ()) ? digit : 0; } @@ -286,11 +319,11 @@ namespace std { assert (radix == 8 || radix == 10 || radix == 16); - if (c.type != line_type::special) + if (c.type () != line_type::special) return -1; const char digits[] = "0123456789ABCDEF"; - const char* d (string::traits_type::find (digits, radix, c.special)); + const char* d (string::traits_type::find (digits, radix, c.special ())); return d != nullptr ? d - digits : -1; } } diff --git a/build2/variable b/build2/variable index 6aec85b..2206fe0 100644 --- a/build2/variable +++ b/build2/variable @@ -232,11 +232,14 @@ namespace build2 // specialization below). Types that don't fit will have to be handled // with an extra dynamic allocation. // - std::aligned_storage::type data_; - - // VC14 needs decltype. + // std::max() is not constexpr on GCC 4.9. // - static const size_t size_ = sizeof (decltype (data_)); + static constexpr size_t size_ = + sizeof (names) > sizeof (target_triplet) + ? sizeof (names) + : sizeof (target_triplet); + + std::aligned_storage::type data_; // Make sure we have sufficient storage for untyped values. // diff --git a/unit-tests/test/script/parser/regex.test b/unit-tests/test/script/parser/regex.test index f3fda75..f9101c9 100644 --- a/unit-tests/test/script/parser/regex.test +++ b/unit-tests/test/script/parser/regex.test @@ -46,9 +46,12 @@ : malformed : - $* <'cmd >~/*foo/' 2>>~/EOE/ != 0 + : Note that old versions of libc++ (for example 1.1) do not detect some + : regex errors. For example '*' is parsed successfully. + : + $* <'cmd >~/foo[/' 2>>~/EOE/ != 0 /testscript:1:7: error: invalid stdout regex redirect.*/ - info: regex: /*foo/ + info: regex: /foo[/ EOE : without-flags @@ -128,7 +131,7 @@ : $* <>~/EOE/ != 0 cmd >>~/EOO/ - /?foo/ + /foo[/ EOO EOI /testscript:2:1: error: invalid regex.*/ @@ -138,10 +141,11 @@ : $* <>~/EOE/ != 0 cmd >>~/EOO/ - /* + a + /{ EOO EOI - /testscript:3:1: error: invalid here-document regex.*/ + /testscript:4:1: error: invalid here-document regex.*/ EOE : empty diff --git a/unit-tests/test/script/regex/driver.cxx b/unit-tests/test/script/regex/driver.cxx index 643756e..6ff4d75 100644 --- a/unit-tests/test/script/regex/driver.cxx +++ b/unit-tests/test/script/regex/driver.cxx @@ -35,12 +35,20 @@ main () enum meta {mn = 'n', mp = 'p'}; + // Special roundtrip. + // + assert (lc ('0').special () == '0'); + assert (lc (0).special () == 0); + assert (lc (-1).special () == -1); + assert (lc ('p').special () == 'p'); + assert (lc (u'\u2028').special () == u'\u2028'); + // Special comparison. // assert (lc ('0') == lc ('0')); assert (lc ('0') == '0'); assert (lc ('n') == mn); - assert (mn == static_cast (lc ('n'))); + assert (mn == static_cast (lc ('n'))); assert (lc ('0') != lc ('1')); assert (lc ('0') != '1'); @@ -58,11 +66,15 @@ main () assert (lc ('n') <= mn); assert (lc ('1') <= lc ("0", p)); + // Literal roundtrip. + // + assert (*lc ("abc", p).literal () == "abc"); + // Literal comparison. // - assert (lc ("a", p) == lc ("a", p)); - assert (lc ("a", p).literal == lc ("a", p).literal); - assert (char (lc ("a", p)) == '\a'); + assert (lc ("a", p) == lc ("a", p)); + assert (lc ("a", p).literal () == lc ("a", p).literal ()); + assert (char (lc ("a", p)) == '\a'); assert (lc ("a", p) != lc ("b", p)); assert (!(lc ("a", p) != lc (regex ("a"), p))); // Matches. @@ -75,6 +87,10 @@ main () assert (lc ("a", p) <= lc (regex ("a"), p)); assert (lc ("a", p) < lc (regex ("c"), p)); + // Regex roundtrip. + // + assert (regex_match ("abc", *lc (regex ("abc"), p).regex ())); + // Regex comparison. // assert (lc ("a", p) == lc (regex ("a|b"), p)); @@ -129,27 +145,6 @@ main () assert (v1 == vc ({'1', '2', '2'})); } - // Test line_string. - // - // @@ Add more tests. - // - // Note that the following code crashes if compiled with libc++ (LLVM bug - // #31454). - // - // @@ Probably we can overcome it by providing our own allocator for - // basic_string instantiation. The function allocate() could allocate some - // more elements that would be enough not to corrupt the memory (which - // push_back() does). - // @@ But maybe doesn't worth to bother as the bug seems to get assigned. - // @@ Heavily affects MacOS where clang++/libc++ is the default setup. - // - { - line_string s; - s.push_back (line_char ('0')); - s.push_back (line_char ('1')); - s.push_back (line_char ('2')); - } - // Test line_char_locale and ctype (only non-trivial functions). // { -- cgit v1.1