diff options
author | Karen Arutyunov <karen@codesynthesis.com> | 2024-10-28 12:52:11 +0200 |
---|---|---|
committer | Karen Arutyunov <karen@codesynthesis.com> | 2024-10-28 17:08:37 +0200 |
commit | 6dc01f74431a40dac698417d2947f0d8ddbd8c8c (patch) | |
tree | 069adb6512873012e60ebb601157a8d270bc4ed6 | |
parent | ebc6dcfe9e7eb7aeddeff808c1c0498508183263 (diff) |
Make $regex.*search() functions not to match empty substrings in non empty strings
-rw-r--r-- | doc/manual.cli | 5 | ||||
-rw-r--r-- | doc/testscript.cli | 8 | ||||
-rw-r--r-- | libbuild2/functions-regex.cxx | 42 | ||||
-rw-r--r-- | tests/function/regex/testscript | 81 |
4 files changed, 127 insertions, 9 deletions
diff --git a/doc/manual.cli b/doc/manual.cli index e16b0fa..8beb4ea 100644 --- a/doc/manual.cli +++ b/doc/manual.cli @@ -6128,8 +6128,9 @@ source <functions-target.cli>; The \c{$regex.*()} function family contains function that provide comprehensive regular expression matching and substitution facilities. The -supported regular expression flavor is ECMAScript (more specifically, -ECMA-262-based C++11 regular expressions). +supported regular expression flavor is ECMAScript, more precisely, +ECMA-262-based C++11 regular expressions. Note that the \c{match_not_null} +flag is in effect unless the string being matched is empty. In the \c{$regex.*()} functions the substitution escape sequences in the format string (the \ci{fmt} argument) are extended with a subset of the Perl diff --git a/doc/testscript.cli b/doc/testscript.cli index 18dac41..7f7522d 100644 --- a/doc/testscript.cli +++ b/doc/testscript.cli @@ -2372,8 +2372,8 @@ continuations. \h#syntax-regex|Output Regex| Instead of literal text the expected result in output here-strings and -here-documents can be specified as ECMAScript regular expressions (more -specifically, ECMA-262-based C++11 regular expressions). To signal the use of +here-documents can be specified as ECMAScript regular expressions, more +precisely, ECMA-262-based C++11 regular expressions. To signal the use of regular expressions the redirect must end with the \c{~} modifier, for example: @@ -3082,7 +3082,9 @@ illegal. Note that this builtin implementation deviates substantially from POSIX \c{sed} (as described next). Most significantly, the regular expression flavor -is ECMAScript (more specifically, ECMA-262-based C++11 regular expressions). +is ECMAScript, more precisely, ECMA-262-based C++11 regular expressions. Note +that the \c{match_not_null} flag is in effect unless the line being matched +is empty. \dl| diff --git a/libbuild2/functions-regex.cxx b/libbuild2/functions-regex.cxx index c46f6f5..de34d63 100644 --- a/libbuild2/functions-regex.cxx +++ b/libbuild2/functions-regex.cxx @@ -138,12 +138,24 @@ namespace build2 // string s (to_string (move (v))); + // Match flags. + // + // Note that by default std::regex_search() matches the empty substrings + // in non-empty strings for all the major implementations. We suppress + // such a counter-intuitive behavior with the match_not_null flag (see the + // butl::regex_replace_search() function implementation for details). + // + regex_constants::match_flag_type mf (regex_constants::match_default); + + if (!s.empty ()) + mf |= regex_constants::match_not_null; + if (!match && !subs) - return value (regex_search (s, rge)); // Return boolean value. + return value (regex_search (s, rge, mf)); // Return boolean value. match_results<string::const_iterator> m; - if (regex_search (s, m, rge)) + if (regex_search (s, m, rge, mf)) { assert (!m.empty ()); @@ -483,7 +495,19 @@ namespace build2 for (auto& n: ns) { - if (regex_search (convert<string> (move (n)), rge)) + string s (convert<string> (move (n))); + + // Match flags. + // + // Suppress matching of empty substrings in non-empty strings (see above + // for details). + // + regex_constants::match_flag_type mf (regex_constants::match_default); + + if (!s.empty ()) + mf |= regex_constants::match_not_null; + + if (regex_search (s, rge, mf)) return true; } @@ -516,7 +540,17 @@ namespace build2 bool s (n.simple ()); string v (convert<string> (s ? move (n) : name (n))); - if (regex_search (v, rge) == matching) + // Match flags. + // + // Suppress matching of empty substrings in non-empty strings (see above + // for details). + // + regex_constants::match_flag_type mf (regex_constants::match_default); + + if (!v.empty ()) + mf |= regex_constants::match_not_null; + + if (regex_search (v, rge, mf) == matching) r.emplace_back (s ? name (move (v)) : move (n)); } diff --git a/tests/function/regex/testscript b/tests/function/regex/testscript index 538bdab..7fbcc8e 100644 --- a/tests/function/regex/testscript +++ b/tests/function/regex/testscript @@ -366,6 +366,33 @@ EOI } } + + : empty-substring + : + : Note that regex_search() ignores the match_not_null flag for older + : versions of libstdc++ and libc++. + : + if (($cxx.id != 'gcc' || $cxx.version.major >= 7) && \ + ($cxx.id != 'clang' || $cxx.version.major >= 6)) + { + : empty + : + $* <<EOI >'true' + print $regex.search('', '.*') + EOI + + : match + : + $* <<EOI >'true' + print $regex.search('a', 'a*') + EOI + + : no-match + : + $* <<EOI >'false' + print $regex.search('aa', 'b*') + EOI + } } : split @@ -576,6 +603,33 @@ print $regex.find_search(Foo.cxx, 'f', icase) EOI } + + : empty-substring + : + : Note that regex_search() ignores the match_not_null flag for older + : versions of libstdc++ and libc++. + : + if (($cxx.id != 'gcc' || $cxx.version.major >= 7) && \ + ($cxx.id != 'clang' || $cxx.version.major >= 6)) + { + : empty + : + $* <<EOI >'true' + print $regex.find_search('', '.*') + EOI + + : match + : + $* <<EOI >'true' + print $regex.find_search('a', 'a*') + EOI + + : no-match + : + $* <<EOI >'false' + print $regex.find_search('aa', 'b*') + EOI + } } : filter-search @@ -607,6 +661,33 @@ $* <<EOI >'' print $regex.filter_search(-g, '-O') EOI + + : empty-substring + : + : Note that regex_search() ignores the match_not_null flag for older + : versions of libstdc++ and libc++. + : + if (($cxx.id != 'gcc' || $cxx.version.major >= 7) && \ + ($cxx.id != 'clang' || $cxx.version.major >= 6)) + { + : empty + : + $* <<EOI >'{}' + print $regex.filter_search('', '.*') + EOI + + : match + : + $* <<EOI >'a' + print $regex.filter_search('a', 'a*') + EOI + + : no-match + : + $* <<EOI >'' + print $regex.filter_search('aa', 'b*') + EOI + } } : filter-out |