aboutsummaryrefslogtreecommitdiff
path: root/libbuild2/functions-regex.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'libbuild2/functions-regex.cxx')
-rw-r--r--libbuild2/functions-regex.cxx475
1 files changed, 318 insertions, 157 deletions
diff --git a/libbuild2/functions-regex.cxx b/libbuild2/functions-regex.cxx
index 2f0d122..cf3ffd0 100644
--- a/libbuild2/functions-regex.cxx
+++ b/libbuild2/functions-regex.cxx
@@ -21,7 +21,7 @@ namespace build2
// Optimize for the string value type.
//
if (v.type != &value_traits<string>::value_type)
- untypify (v);
+ untypify (v, true /* reduce */);
return convert<string> (move (v));
}
@@ -69,7 +69,7 @@ namespace build2
else if (s == "return_subs")
subs = true;
else
- throw invalid_argument ("invalid flag '" + s + "'");
+ throw invalid_argument ("invalid flag '" + s + '\'');
}
}
@@ -92,10 +92,7 @@ namespace build2
names r;
for (size_t i (1); i != m.size (); ++i)
- {
- if (m[i].matched)
- r.emplace_back (m.str (i));
- }
+ r.emplace_back (m[i].matched ? m.str (i) : string ());
return value (move (r));
}
@@ -129,7 +126,7 @@ namespace build2
else if (s == "return_subs")
subs = true;
else
- throw invalid_argument ("invalid flag '" + s + "'");
+ throw invalid_argument ("invalid flag '" + s + '\'');
}
}
@@ -161,10 +158,7 @@ namespace build2
if (subs)
{
for (size_t i (1); i != m.size (); ++i)
- {
- if (m[i].matched)
- r.emplace_back (m.str (i));
- }
+ r.emplace_back (m[i].matched ? m.str (i) : string ());
}
return value (move (r));
@@ -174,7 +168,9 @@ namespace build2
}
static pair<regex::flag_type, regex_constants::match_flag_type>
- parse_replacement_flags (optional<names>&& flags, bool first_only = true)
+ parse_replacement_flags (optional<names>&& flags,
+ bool first_only = true,
+ bool* copy_empty = nullptr)
{
regex::flag_type rf (regex::ECMAScript);
regex_constants::match_flag_type mf (regex_constants::match_default);
@@ -191,8 +187,10 @@ namespace build2
mf |= regex_constants::format_first_only;
else if (s == "format_no_copy")
mf |= regex_constants::format_no_copy;
+ else if (copy_empty != nullptr && s == "format_copy_empty")
+ *copy_empty = true;
else
- throw invalid_argument ("invalid flag '" + s + "'");
+ throw invalid_argument ("invalid flag '" + s + '\'');
}
}
@@ -334,7 +332,10 @@ namespace build2
const string& fmt,
optional<names>&& flags)
{
- auto fl (parse_replacement_flags (move (flags), false));
+ bool copy_empty (false);
+ auto fl (parse_replacement_flags (move (flags),
+ false /* first_only */,
+ &copy_empty));
regex rge (parse_regex (re, fl.first));
names r;
@@ -342,10 +343,10 @@ namespace build2
try
{
regex_replace_search (to_string (move (v)), rge, fmt,
- [&r] (string::const_iterator b,
- string::const_iterator e)
+ [copy_empty, &r] (string::const_iterator b,
+ string::const_iterator e)
{
- if (b != e)
+ if (copy_empty || b != e)
r.emplace_back (string (b, e));
},
fl.second);
@@ -364,26 +365,29 @@ namespace build2
// apply() overloads (below) for details.
//
static names
- apply (names&& s,
+ apply (names&& ns,
const string& re,
const string& fmt,
optional<names>&& flags)
{
- auto fl (parse_replacement_flags (move (flags)));
+ bool copy_empty (false);
+ auto fl (parse_replacement_flags (move (flags),
+ true /* first_only */,
+ &copy_empty));
regex rge (parse_regex (re, fl.first));
names r;
try
{
- for (auto& v: s)
+ for (auto& n: ns)
{
- string s (regex_replace_search (convert<string> (move (v)),
+ string s (regex_replace_search (convert<string> (move (n)),
rge,
fmt,
fl.second).first);
- if (!s.empty ())
+ if (copy_empty || !s.empty ())
r.emplace_back (move (s));
}
}
@@ -411,7 +415,7 @@ namespace build2
if (s == "icase")
r |= regex::icase;
else
- throw invalid_argument ("invalid flag '" + s + "'");
+ throw invalid_argument ("invalid flag '" + s + '\'');
}
}
@@ -422,67 +426,141 @@ namespace build2
// See find_match() overloads (below) for details.
//
static bool
- find_match (names&& s, const string& re, optional<names>&& flags)
+ find_match (names&& ns, const string& re, optional<names>&& flags)
{
regex::flag_type fl (parse_find_flags (move (flags)));
regex rge (parse_regex (re, fl));
- for (auto& v: s)
+ for (auto& n: ns)
{
- if (regex_match (convert<string> (move (v)), rge))
+ if (regex_match (convert<string> (move (n)), rge))
return true;
}
return false;
}
+ // Return a list of elements that match (matching is true) or don't match
+ // (matching is false) the regular expression. See filter_match() and
+ // filter_out_match() overloads (below) for details.
+ //
+ static names
+ filter_match (names&& ns,
+ const string& re,
+ optional<names>&& flags,
+ bool matching)
+ {
+ regex::flag_type fl (parse_find_flags (move (flags)));
+ regex rge (parse_regex (re, fl));
+
+ names r;
+
+ for (name& n: ns)
+ {
+ // Note that we need to preserve the element while converting it to
+ // string since we may add it to the resulting list. But let's optimize
+ // this for the simple value case by round-tripping it through the
+ // string.
+ //
+ bool s (n.simple ());
+ string v (convert<string> (s ? move (n) : name (n)));
+
+ if (regex_match (v, rge) == matching)
+ r.emplace_back (s ? name (move (v)) : move (n));
+ }
+
+ return r;
+ }
+
// Return true if a part of any of the list elements matches the regular
// expression. See find_search() overloads (below) for details.
//
static bool
- find_search (names&& s, const string& re, optional<names>&& flags)
+ find_search (names&& ns, const string& re, optional<names>&& flags)
{
regex::flag_type fl (parse_find_flags (move (flags)));
regex rge (parse_regex (re, fl));
- for (auto& v: s)
+ for (auto& n: ns)
{
- if (regex_search (convert<string> (move (v)), rge))
+ if (regex_search (convert<string> (move (n)), rge))
return true;
}
return false;
}
+ // Return those elements of a list which have a match (matching is true) or
+ // have no match (matching is false) between the regular expression and
+ // some/any part of the element. See filter_search() and filter_out_search()
+ // overloads (below) for details.
+ //
+ static names
+ filter_search (names&& ns,
+ const string& re,
+ optional<names>&& flags,
+ bool matching)
+ {
+ regex::flag_type fl (parse_find_flags (move (flags)));
+ regex rge (parse_regex (re, fl));
+
+ names r;
+
+ for (auto& n: ns)
+ {
+ // Note that we need to preserve the element while converting it to
+ // string since we may add it to the resulting list. But let's optimize
+ // this for the simple value case by round-tripping it through the
+ // string.
+ //
+ bool s (n.simple ());
+ string v (convert<string> (s ? move (n) : name (n)));
+
+ if (regex_search (v, rge) == matching)
+ r.emplace_back (s ? name (move (v)) : move (n));
+ }
+
+ return r;
+ }
+
// Replace matched parts of list elements using the format string and
// concatenate the transformed elements. See merge() overloads (below) for
// details.
//
static names
- merge (names&& s,
+ merge (names&& ns,
const string& re,
const string& fmt,
optional<string>&& delim,
optional<names>&& flags)
{
- auto fl (parse_replacement_flags (move (flags)));
+ bool copy_empty (false);
+ auto fl (parse_replacement_flags (move (flags),
+ true /* first_only */,
+ &copy_empty));
regex rge (parse_regex (re, fl.first));
string rs;
try
{
- for (auto& v: s)
+ bool first (true);
+ for (auto& n: ns)
{
- string s (regex_replace_search (convert<string> (move (v)),
+ string s (regex_replace_search (convert<string> (move (n)),
rge,
fmt,
fl.second).first);
- if (!s.empty ())
+ if (copy_empty || !s.empty ())
{
- if (!rs.empty () && delim)
- rs.append (*delim);
+ if (delim)
+ {
+ if (first)
+ first = false;
+ else
+ rs.append (*delim);
+ }
rs.append (s);
}
@@ -510,129 +588,203 @@ namespace build2
//
// Match a value of an arbitrary type against the regular expression.
// Convert the value to string prior to matching. Return the boolean value
- // unless return_subs flag is specified (see below), in which case return
- // names (NULL if no match).
+ // unless `return_subs` flag is specified (see below), in which case
+ // return names (or `null` if no match).
//
// The following flags are supported:
//
- // icase - match ignoring case
+ // icase - match ignoring case
//
- // return_subs - return names (rather than boolean), that contain
- // sub-strings that match the marked sub-expressions and
- // NULL if no match
+ // return_subs - return names (rather than boolean), that contain
+ // sub-strings that match the marked sub-expressions
+ // and null if no match
//
- f[".match"] += [](value s, string re, optional<names> flags)
+ f[".match"] += [](value v, string re, optional<names> flags)
{
- return match (move (s), re, move (flags));
+ return match (move (v), re, move (flags));
};
- f[".match"] += [](value s, names re, optional<names> flags)
+ f[".match"] += [](value v, names re, optional<names> flags)
{
- return match (move (s), convert<string> (move (re)), move (flags));
+ return match (move (v), convert<string> (move (re)), move (flags));
};
// $regex.find_match(<vals>, <pat> [, <flags>])
//
// Match list elements against the regular expression and return true if
- // the match is found. Convert the elements to string prior to matching.
+ // the match is found. Convert the elements to strings prior to matching.
+ //
+ // The following flags are supported:
+ //
+ // icase - match ignoring case
+ //
+ f[".find_match"] += [](names ns, string re, optional<names> flags)
+ {
+ return find_match (move (ns), re, move (flags));
+ };
+
+ f[".find_match"] += [](names ns, names re, optional<names> flags)
+ {
+ return find_match (move (ns), convert<string> (move (re)), move (flags));
+ };
+
+ // $regex.filter_match(<vals>, <pat> [, <flags>])
+ // $regex.filter_out_match(<vals>, <pat> [, <flags>])
+ //
+ // Return elements of a list that match (`filter`) or do not match
+ // (`filter_out`) the regular expression. Convert the elements to strings
+ // prior to matching.
//
// The following flags are supported:
//
- // icase - match ignoring case
+ // icase - match ignoring case
//
- f[".find_match"] += [](names s, string re, optional<names> flags)
+ f[".filter_match"] += [](names ns, string re, optional<names> flags)
+ {
+ return filter_match (move (ns), re, move (flags), true /* matching */);
+ };
+
+ f[".filter_match"] += [](names ns, names re, optional<names> flags)
{
- return find_match (move (s), re, move (flags));
+ return filter_match (move (ns),
+ convert<string> (move (re)),
+ move (flags),
+ true /* matching */);
};
- f[".find_match"] += [](names s, names re, optional<names> flags)
+ f[".filter_out_match"] += [](names s, string re, optional<names> flags)
{
- return find_match (move (s), convert<string> (move (re)), move (flags));
+ return filter_match (move (s), re, move (flags), false /* matching */);
+ };
+
+ f[".filter_out_match"] += [](names ns, names re, optional<names> flags)
+ {
+ return filter_match (move (ns),
+ convert<string> (move (re)),
+ move (flags),
+ false /* matching */);
};
// $regex.search(<val>, <pat> [, <flags>])
//
// Determine if there is a match between the regular expression and some
// part of a value of an arbitrary type. Convert the value to string prior
- // to searching. Return the boolean value unless return_match or
- // return_subs flag is specified (see below) in which case return names
- // (NULL if no match).
+ // to searching. Return the boolean value unless `return_match` or
+ // `return_subs` flag is specified (see below) in which case return names
+ // (`null` if no match).
//
// The following flags are supported:
//
- // icase - match ignoring case
+ // icase - match ignoring case
//
- // return_match - return names (rather than boolean), that contain a
- // sub-string that matches the whole regular expression and
- // NULL if no match
+ // return_match - return names (rather than boolean), that contain a
+ // sub-string that matches the whole regular expression
+ // and null if no match
//
- // return_subs - return names (rather than boolean), that contain
- // sub-strings that match the marked sub-expressions and
- // NULL if no match
+ // return_subs - return names (rather than boolean), that contain
+ // sub-strings that match the marked sub-expressions
+ // and null if no match
//
- // If both return_match and return_subs flags are specified then the
+ // If both `return_match` and `return_subs` flags are specified then the
// sub-string that matches the whole regular expression comes first.
//
- f[".search"] += [](value s, string re, optional<names> flags)
+ f[".search"] += [](value v, string re, optional<names> flags)
{
- return search (move (s), re, move (flags));
+ return search (move (v), re, move (flags));
};
- f[".search"] += [](value s, names re, optional<names> flags)
+ f[".search"] += [](value v, names re, optional<names> flags)
{
- return search (move (s), convert<string> (move (re)), move (flags));
+ return search (move (v), convert<string> (move (re)), move (flags));
};
// $regex.find_search(<vals>, <pat> [, <flags>])
//
// Determine if there is a match between the regular expression and some
- // part of any of the list elements. Convert the elements to string prior
+ // part of any of the list elements. Convert the elements to strings prior
// to matching.
//
// The following flags are supported:
//
- // icase - match ignoring case
+ // icase - match ignoring case
//
- f[".find_search"] += [](names s, string re, optional<names> flags)
+ f[".find_search"] += [](names ns, string re, optional<names> flags)
{
- return find_search (move (s), re, move (flags));
+ return find_search (move (ns), re, move (flags));
};
- f[".find_search"] += [](names s, names re, optional<names> flags)
+ f[".find_search"] += [](names ns, names re, optional<names> flags)
{
- return find_search (move (s),
+ return find_search (move (ns),
convert<string> (move (re)),
move (flags));
};
+ // $regex.filter_search(<vals>, <pat> [, <flags>])
+ // $regex.filter_out_search(<vals>, <pat> [, <flags>])
+ //
+ // Return elements of a list for which there is a match (`filter`) or no
+ // match (`filter_out`) between the regular expression and some part of
+ // the element. Convert the elements to strings prior to matching.
+ //
+ // The following flags are supported:
+ //
+ // icase - match ignoring case
+ //
+ f[".filter_search"] += [](names ns, string re, optional<names> flags)
+ {
+ return filter_search (move (ns), re, move (flags), true /* matching */);
+ };
+
+ f[".filter_search"] += [](names ns, names re, optional<names> flags)
+ {
+ return filter_search (move (ns),
+ convert<string> (move (re)),
+ move (flags),
+ true /* matching */);
+ };
+
+ f[".filter_out_search"] += [](names ns, string re, optional<names> flags)
+ {
+ return filter_search (move (ns), re, move (flags), false /* matching */);
+ };
+
+ f[".filter_out_search"] += [](names ns, names re, optional<names> flags)
+ {
+ return filter_search (move (ns),
+ convert<string> (move (re)),
+ move (flags),
+ false /* matching */);
+ };
+
// $regex.replace(<val>, <pat>, <fmt> [, <flags>])
//
// Replace matched parts in a value of an arbitrary type, using the format
// string. Convert the value to string prior to matching. The result value
// is always untyped, regardless of the argument type.
//
- // Substitution escape sequences are extended with a subset of Perl
- // sequences (see libbutl/regex.hxx for details).
- //
// The following flags are supported:
//
- // icase - match ignoring case
+ // icase - match ignoring case
//
- // format_first_only - only replace the first match
+ // format_first_only - only replace the first match
//
- // format_no_copy - do not copy unmatched value parts into the result
+ // format_no_copy - do not copy unmatched value parts into the
+ // result
//
- // If both format_first_only and format_no_copy flags are specified then
- // the result will only contain the replacement of the first match.
+ // If both `format_first_only` and `format_no_copy` flags are specified
+ // then the result will only contain the replacement of the first match.
//
- f[".replace"] += [](value s, string re, string fmt, optional<names> flags)
+ // See also `$string.replace()`.
+ //
+ f[".replace"] += [](value v, string re, string fmt, optional<names> flags)
{
- return replace (move (s), re, fmt, move (flags));
+ return replace (move (v), re, fmt, move (flags));
};
- f[".replace"] += [](value s, names re, names fmt, optional<names> flags)
+ f[".replace"] += [](value v, names re, names fmt, optional<names> flags)
{
- return replace (move (s),
+ return replace (move (v),
convert<string> (move (re)),
convert<string> (move (fmt)),
move (flags));
@@ -641,38 +793,38 @@ namespace build2
// $regex.replace_lines(<val>, <pat>, <fmt> [, <flags>])
//
// Convert the value to string, parse it into lines and for each line
- // apply the $regex.replace() function with the specified pattern, format,
- // and flags. If the format argument is NULL, omit the "all-NULL"
- // replacements for the matched lines from the result. Return unmatched
- // lines and line replacements as a name list unless return_lines flag is
- // specified (see below), in which case return a single multi-line simple
- // name value.
+ // apply the `$regex.replace()` function with the specified pattern,
+ // format, and flags. If the format argument is `null`, omit the
+ // "all-`null`" replacements for the matched lines from the result. Return
+ // unmatched lines and line replacements as a `name` list unless
+ // `return_lines` flag is specified (see below), in which case return a
+ // single multi-line simple `name` value.
//
- // The following flags are supported in addition to the $regex.replace()
- // function flags:
+ // The following flags are supported in addition to the `$regex.replace()`
+ // function's flags:
//
- // return_lines - return the simple name (rather than a name list)
- // containing the unmatched lines and line replacements
- // separated with newlines.
+ // return_lines - return the simple name (rather than a name list)
+ // containing the unmatched lines and line replacements
+ // separated with newlines.
//
- // Note that if format_no_copy is specified, unmatched lines are not
+ // Note that if `format_no_copy` is specified, unmatched lines are not
// copied either.
//
- f[".replace_lines"] += [](value s,
- string re,
- string fmt,
- optional<names> flags)
+ f[".replace_lines"] += [](value v,
+ string re,
+ string fmt,
+ optional<names> flags)
{
- return replace_lines (move (s), re, move (fmt), move (flags));
+ return replace_lines (move (v), re, move (fmt), move (flags));
};
- f[".replace_lines"] += [](value s,
- names re,
- names* fmt,
- optional<names> flags)
+ f[".replace_lines"] += [](value v,
+ names re,
+ names* fmt,
+ optional<names> flags)
{
return replace_lines (
- move (s),
+ move (v),
convert<string> (move (re)),
(fmt != nullptr
? optional<string> (convert<string> (move (*fmt)))
@@ -683,26 +835,27 @@ namespace build2
// $regex.split(<val>, <pat>, <fmt> [, <flags>])
//
// Split a value of an arbitrary type into a list of unmatched value parts
- // and replacements of the matched parts, omitting empty ones. Convert the
- // value to string prior to matching.
- //
- // Substitution escape sequences are extended with a subset of Perl
- // sequences (see libbutl/regex.hxx for details).
+ // and replacements of the matched parts, omitting empty ones (unless the
+ // `format_copy_empty` flag is specified). Convert the value to string
+ // prior to matching.
//
// The following flags are supported:
//
- // icase - match ignoring case
+ // icase - match ignoring case
+ //
+ // format_no_copy - do not copy unmatched value parts into the
+ // result
//
- // format_no_copy - do not copy unmatched value parts into the result
+ // format_copy_empty - copy empty elements into the result
//
- f[".split"] += [](value s, string re, string fmt, optional<names> flags)
+ f[".split"] += [](value v, string re, string fmt, optional<names> flags)
{
- return split (move (s), re, fmt, move (flags));
+ return split (move (v), re, fmt, move (flags));
};
- f[".split"] += [](value s, names re, names fmt, optional<names> flags)
+ f[".split"] += [](value v, names re, names fmt, optional<names> flags)
{
- return split (move (s),
+ return split (move (v),
convert<string> (move (re)),
convert<string> (move (fmt)),
move (flags));
@@ -711,45 +864,52 @@ namespace build2
// $regex.merge(<vals>, <pat>, <fmt> [, <delim> [, <flags>]])
//
// Replace matched parts in a list of elements using the regex format
- // string. Convert the elements to string prior to matching. The result
+ // string. Convert the elements to strings prior to matching. The result
// value is untyped and contains concatenation of transformed non-empty
- // elements optionally separated with a delimiter.
- //
- // Substitution escape sequences are extended with a subset of Perl
- // sequences (see libbutl/regex.hxx for details).
+ // elements (unless the `format_copy_empty` flag is specified) optionally
+ // separated with a delimiter.
//
// The following flags are supported:
//
- // icase - match ignoring case
+ // icase - match ignoring case
+ //
+ // format_first_only - only replace the first match
//
- // format_first_only - only replace the first match
+ // format_no_copy - do not copy unmatched value parts into the
+ // result
//
- // format_no_copy - do not copy unmatched value parts into the result
+ // format_copy_empty - copy empty elements into the result
//
- // If both format_first_only and format_no_copy flags are specified then
- // the result will be a concatenation of only the first match
+ // If both `format_first_only` and `format_no_copy` flags are specified
+ // then the result will be a concatenation of only the first match
// replacements.
//
- f[".merge"] += [](names s,
- string re,
- string fmt,
- optional<string> delim,
- optional<names> flags)
- {
- return merge (move (s), re, fmt, move (delim), move (flags));
+ f[".merge"] += [](names ns,
+ string re,
+ string fmt,
+ optional<string*> delim,
+ optional<names> flags)
+ {
+ return merge (move (ns),
+ re,
+ fmt,
+ delim && *delim != nullptr
+ ? move (**delim)
+ : optional<string> (),
+ move (flags));
};
- f[".merge"] += [](names s,
- names re,
- names fmt,
- optional<names> delim,
- optional<names> flags)
+ f[".merge"] += [](names ns,
+ names re,
+ names fmt,
+ optional<names*> delim,
+ optional<names> flags)
{
- return merge (move (s),
+ return merge (move (ns),
convert<string> (move (re)),
convert<string> (move (fmt)),
- delim
- ? convert<string> (move (*delim))
+ delim && *delim != nullptr
+ ? convert<string> (move (**delim))
: optional<string> (),
move (flags));
};
@@ -757,32 +917,33 @@ namespace build2
// $regex.apply(<vals>, <pat>, <fmt> [, <flags>])
//
// Replace matched parts of each element in a list using the regex format
- // string. Convert the elements to string prior to matching. Return a list
- // of transformed elements, omitting the empty ones.
- //
- // Substitution escape sequences are extended with a subset of Perl
- // sequences (see libbutl/regex.hxx for details).
+ // string. Convert the elements to strings prior to matching. Return a
+ // list of transformed elements, omitting the empty ones (unless the
+ // `format_copy_empty` flag is specified).
//
// The following flags are supported:
//
- // icase - match ignoring case
+ // icase - match ignoring case
+ //
+ // format_first_only - only replace the first match
//
- // format_first_only - only replace the first match
+ // format_no_copy - do not copy unmatched value parts into the
+ // result
//
- // format_no_copy - do not copy unmatched value parts into the result
+ // format_copy_empty - copy empty elements into the result
//
- // If both format_first_only and format_no_copy flags are specified then
- // the result elements will only contain the replacement of the first
+ // If both `format_first_only` and `format_no_copy` flags are specified
+ // then the result elements will only contain the replacement of the first
// match.
//
- f[".apply"] += [](names s, string re, string fmt, optional<names> flags)
+ f[".apply"] += [](names ns, string re, string fmt, optional<names> flags)
{
- return apply (move (s), re, fmt, move (flags));
+ return apply (move (ns), re, fmt, move (flags));
};
- f[".apply"] += [](names s, names re, names fmt, optional<names> flags)
+ f[".apply"] += [](names ns, names re, names fmt, optional<names> flags)
{
- return apply (move (s),
+ return apply (move (ns),
convert<string> (move (re)),
convert<string> (move (fmt)),
move (flags));