aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2024-10-28 12:52:11 +0200
committerKaren Arutyunov <karen@codesynthesis.com>2024-10-28 17:08:37 +0200
commit6dc01f74431a40dac698417d2947f0d8ddbd8c8c (patch)
tree069adb6512873012e60ebb601157a8d270bc4ed6
parentebc6dcfe9e7eb7aeddeff808c1c0498508183263 (diff)
Make $regex.*search() functions not to match empty substrings in non empty strings
-rw-r--r--doc/manual.cli5
-rw-r--r--doc/testscript.cli8
-rw-r--r--libbuild2/functions-regex.cxx42
-rw-r--r--tests/function/regex/testscript81
4 files changed, 127 insertions, 9 deletions
diff --git a/doc/manual.cli b/doc/manual.cli
index e16b0fa..8beb4ea 100644
--- a/doc/manual.cli
+++ b/doc/manual.cli
@@ -6128,8 +6128,9 @@ source <functions-target.cli>;
The \c{$regex.*()} function family contains function that provide
comprehensive regular expression matching and substitution facilities. The
-supported regular expression flavor is ECMAScript (more specifically,
-ECMA-262-based C++11 regular expressions).
+supported regular expression flavor is ECMAScript, more precisely,
+ECMA-262-based C++11 regular expressions. Note that the \c{match_not_null}
+flag is in effect unless the string being matched is empty.
In the \c{$regex.*()} functions the substitution escape sequences in the
format string (the \ci{fmt} argument) are extended with a subset of the Perl
diff --git a/doc/testscript.cli b/doc/testscript.cli
index 18dac41..7f7522d 100644
--- a/doc/testscript.cli
+++ b/doc/testscript.cli
@@ -2372,8 +2372,8 @@ continuations.
\h#syntax-regex|Output Regex|
Instead of literal text the expected result in output here-strings and
-here-documents can be specified as ECMAScript regular expressions (more
-specifically, ECMA-262-based C++11 regular expressions). To signal the use of
+here-documents can be specified as ECMAScript regular expressions, more
+precisely, ECMA-262-based C++11 regular expressions. To signal the use of
regular expressions the redirect must end with the \c{~} modifier, for
example:
@@ -3082,7 +3082,9 @@ illegal.
Note that this builtin implementation deviates substantially from POSIX
\c{sed} (as described next). Most significantly, the regular expression flavor
-is ECMAScript (more specifically, ECMA-262-based C++11 regular expressions).
+is ECMAScript, more precisely, ECMA-262-based C++11 regular expressions. Note
+that the \c{match_not_null} flag is in effect unless the line being matched
+is empty.
\dl|
diff --git a/libbuild2/functions-regex.cxx b/libbuild2/functions-regex.cxx
index c46f6f5..de34d63 100644
--- a/libbuild2/functions-regex.cxx
+++ b/libbuild2/functions-regex.cxx
@@ -138,12 +138,24 @@ namespace build2
//
string s (to_string (move (v)));
+ // Match flags.
+ //
+ // Note that by default std::regex_search() matches the empty substrings
+ // in non-empty strings for all the major implementations. We suppress
+ // such a counter-intuitive behavior with the match_not_null flag (see the
+ // butl::regex_replace_search() function implementation for details).
+ //
+ regex_constants::match_flag_type mf (regex_constants::match_default);
+
+ if (!s.empty ())
+ mf |= regex_constants::match_not_null;
+
if (!match && !subs)
- return value (regex_search (s, rge)); // Return boolean value.
+ return value (regex_search (s, rge, mf)); // Return boolean value.
match_results<string::const_iterator> m;
- if (regex_search (s, m, rge))
+ if (regex_search (s, m, rge, mf))
{
assert (!m.empty ());
@@ -483,7 +495,19 @@ namespace build2
for (auto& n: ns)
{
- if (regex_search (convert<string> (move (n)), rge))
+ string s (convert<string> (move (n)));
+
+ // Match flags.
+ //
+ // Suppress matching of empty substrings in non-empty strings (see above
+ // for details).
+ //
+ regex_constants::match_flag_type mf (regex_constants::match_default);
+
+ if (!s.empty ())
+ mf |= regex_constants::match_not_null;
+
+ if (regex_search (s, rge, mf))
return true;
}
@@ -516,7 +540,17 @@ namespace build2
bool s (n.simple ());
string v (convert<string> (s ? move (n) : name (n)));
- if (regex_search (v, rge) == matching)
+ // Match flags.
+ //
+ // Suppress matching of empty substrings in non-empty strings (see above
+ // for details).
+ //
+ regex_constants::match_flag_type mf (regex_constants::match_default);
+
+ if (!v.empty ())
+ mf |= regex_constants::match_not_null;
+
+ if (regex_search (v, rge, mf) == matching)
r.emplace_back (s ? name (move (v)) : move (n));
}
diff --git a/tests/function/regex/testscript b/tests/function/regex/testscript
index 538bdab..7fbcc8e 100644
--- a/tests/function/regex/testscript
+++ b/tests/function/regex/testscript
@@ -366,6 +366,33 @@
EOI
}
}
+
+ : empty-substring
+ :
+ : Note that regex_search() ignores the match_not_null flag for older
+ : versions of libstdc++ and libc++.
+ :
+ if (($cxx.id != 'gcc' || $cxx.version.major >= 7) && \
+ ($cxx.id != 'clang' || $cxx.version.major >= 6))
+ {
+ : empty
+ :
+ $* <<EOI >'true'
+ print $regex.search('', '.*')
+ EOI
+
+ : match
+ :
+ $* <<EOI >'true'
+ print $regex.search('a', 'a*')
+ EOI
+
+ : no-match
+ :
+ $* <<EOI >'false'
+ print $regex.search('aa', 'b*')
+ EOI
+ }
}
: split
@@ -576,6 +603,33 @@
print $regex.find_search(Foo.cxx, 'f', icase)
EOI
}
+
+ : empty-substring
+ :
+ : Note that regex_search() ignores the match_not_null flag for older
+ : versions of libstdc++ and libc++.
+ :
+ if (($cxx.id != 'gcc' || $cxx.version.major >= 7) && \
+ ($cxx.id != 'clang' || $cxx.version.major >= 6))
+ {
+ : empty
+ :
+ $* <<EOI >'true'
+ print $regex.find_search('', '.*')
+ EOI
+
+ : match
+ :
+ $* <<EOI >'true'
+ print $regex.find_search('a', 'a*')
+ EOI
+
+ : no-match
+ :
+ $* <<EOI >'false'
+ print $regex.find_search('aa', 'b*')
+ EOI
+ }
}
: filter-search
@@ -607,6 +661,33 @@
$* <<EOI >''
print $regex.filter_search(-g, '-O')
EOI
+
+ : empty-substring
+ :
+ : Note that regex_search() ignores the match_not_null flag for older
+ : versions of libstdc++ and libc++.
+ :
+ if (($cxx.id != 'gcc' || $cxx.version.major >= 7) && \
+ ($cxx.id != 'clang' || $cxx.version.major >= 6))
+ {
+ : empty
+ :
+ $* <<EOI >'{}'
+ print $regex.filter_search('', '.*')
+ EOI
+
+ : match
+ :
+ $* <<EOI >'a'
+ print $regex.filter_search('a', 'a*')
+ EOI
+
+ : no-match
+ :
+ $* <<EOI >''
+ print $regex.filter_search('aa', 'b*')
+ EOI
+ }
}
: filter-out