aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2017-08-30 13:18:50 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2017-08-30 23:00:29 +0300
commit32df5529f791760161a027a1a7408bc92976a3cd (patch)
tree170064adddd321c080d89a0bf85255db6d644f7f
parent909bbc79fd2dc1d3a36e3109ba32a22e162b2b7f (diff)
Add $regex.split(), $regex.merge() and $regex.apply() functions
-rw-r--r--build2/functions-regex.cxx379
-rw-r--r--tests/function/regex/testscript96
2 files changed, 452 insertions, 23 deletions
diff --git a/build2/functions-regex.cxx b/build2/functions-regex.cxx
index bdecc14..3185b85 100644
--- a/build2/functions-regex.cxx
+++ b/build2/functions-regex.cxx
@@ -97,7 +97,7 @@ namespace build2
}
}
- return value (r);
+ return value (move (r));
}
// Determine if there is a match between the regular expression and some
@@ -164,20 +164,12 @@ namespace build2
}
}
- return value (r);
+ return value (move (r));
}
- // Replace matched parts in a value of an arbitrary type, using the format
- // string. See replace() overloads (below) for details.
- //
- static names
- replace (value&& v,
- const string& re,
- const string& fmt,
- optional<names>&& flags)
+ static pair<regex::flag_type, regex_constants::match_flag_type>
+ parse_replacement_flags (optional<names>&& flags, bool first_only = true)
{
- // Parse flags.
- //
regex::flag_type rf (regex::ECMAScript);
regex_constants::match_flag_type mf (regex_constants::match_default);
@@ -189,7 +181,7 @@ namespace build2
if (s == "icase")
rf |= regex::icase;
- else if (s == "format_first_only")
+ else if (first_only && s == "format_first_only")
mf |= regex_constants::format_first_only;
else if (s == "format_no_copy")
mf |= regex_constants::format_no_copy;
@@ -198,18 +190,29 @@ namespace build2
}
}
- // Parse regex.
- //
- regex rge (parse_regex (re, rf));
+ return make_pair (rf, mf);
+ }
+
+ // Replace matched parts in a value of an arbitrary type, using the format
+ // string. See replace() overloads (below) for details.
+ //
+ static names
+ replace (value&& v,
+ const string& re,
+ const string& fmt,
+ optional<names>&& flags)
+ {
+ auto fl (parse_replacement_flags (move (flags)));
+ regex rge (parse_regex (re, fl.first));
- // Replace.
- //
names r;
try
{
- string s (to_string (move (v)));
- r.emplace_back (regex_replace_ex (s, rge, fmt, mf).first);
+ r.emplace_back (regex_replace_ex (to_string (move (v)),
+ rge,
+ fmt,
+ fl.second).first);
}
catch (const regex_error& e)
{
@@ -219,6 +222,120 @@ namespace build2
return r;
}
+ // Split a value of an arbitrary type into a list of unmatched value parts
+ // and replacements of the matched parts. See split() overloads (below) for
+ // details.
+ //
+ static names
+ split (value&& v,
+ const string& re,
+ const string& fmt,
+ optional<names>&& flags)
+ {
+ auto fl (parse_replacement_flags (move (flags), false));
+ regex rge (parse_regex (re, fl.first));
+
+ names r;
+
+ try
+ {
+ regex_replace_ex (to_string (move (v)), rge, fmt,
+ [&r] (string::const_iterator b,
+ string::const_iterator e)
+ {
+ if (b != e)
+ r.emplace_back (string (b, e));
+ },
+ fl.second);
+ }
+ catch (const regex_error& e)
+ {
+ fail << "unable to split" << e;
+ }
+
+ return r;
+ }
+
+ // Replace matched parts of list elements using the format string. See
+ // apply() overloads (below) for details.
+ //
+ static names
+ apply (names&& s,
+ const string& re,
+ const string& fmt,
+ optional<names>&& flags)
+ {
+ auto fl (parse_replacement_flags (move (flags)));
+ regex rge (parse_regex (re, fl.first));
+
+ names r;
+
+ try
+ {
+ for (auto& v: s)
+ {
+ string s (regex_replace_ex (convert<string> (move (v)),
+ rge,
+ fmt,
+ fl.second).first);
+
+ if (!s.empty ())
+ r.emplace_back (move (s));
+ }
+ }
+ catch (const regex_error& e)
+ {
+ fail << "unable to apply" << e;
+ }
+
+ return r;
+ }
+
+ // Replace matched parts of list elements using the format string and
+ // concatenate the transformed elements. See merge() overloads (below) for
+ // details.
+ //
+ static names
+ merge (names&& s,
+ const string& re,
+ const string& fmt,
+ optional<string>&& delim,
+ optional<names>&& flags)
+ {
+ auto fl (parse_replacement_flags (move (flags)));
+ regex rge (parse_regex (re, fl.first));
+
+ string rs;
+
+ try
+ {
+ for (auto& v: s)
+ {
+ string s (regex_replace_ex (convert<string> (move (v)),
+ rge,
+ fmt,
+ fl.second).first);
+
+ if (!s.empty ())
+ {
+ if (!rs.empty () && delim)
+ rs.append (*delim);
+
+ rs.append (s);
+ }
+
+ }
+ }
+ catch (const regex_error& e)
+ {
+ fail << "unable to merge" << e;
+ }
+
+ names r;
+ r.emplace_back (move (rs));
+ return r;
+ }
+
void
regex_functions ()
{
@@ -286,7 +403,7 @@ namespace build2
// is always untyped, regardless of the argument type.
//
// Substitution escape sequences are extended with a subset of Perl
- // sequences (see regex_replace_ex() for details).
+ // sequences (see butl::regex_replace_ex() for details).
//
// The following flags are supported:
//
@@ -294,10 +411,10 @@ namespace build2
//
// format_first_only - only replace the first match
//
- // format_no_copy - do not copy unmatched value parts to the result
+ // format_no_copy - do not copy unmatched value parts into the result
//
// If both format_first_only and format_no_copy flags are specified then
- // all the result will contain is the replacement of the first match.
+ // the result will only contain the replacement of the first match.
//
f[".replace"] = [](value s, string re, string fmt, optional<names> flags)
{
@@ -327,5 +444,221 @@ namespace build2
convert<string> (move (fmt)),
move (flags));
};
+
+ // split
+ //
+ // Split a value of an arbitrary type into a list of unmatched value parts
+ // and replacements of the matched parts, omitting empty ones. Convert the
+ // value to string prior to matching.
+ //
+ // Substitution escape sequences are extended with a subset of Perl
+ // sequences (see butl::regex_replace_ex() for details).
+ //
+ // The following flags are supported:
+ //
+ // icase - match ignoring case
+ //
+ // format_no_copy - do not copy unmatched value parts into the result
+ //
+ f[".split"] = [](value s, string re, string fmt, optional<names> flags)
+ {
+ return split (move (s), re, fmt, move (flags));
+ };
+
+ f[".split"] = [](value s, string re, names fmt, optional<names> flags)
+ {
+ return split (move (s),
+ re,
+ convert<string> (move (fmt)),
+ move (flags));
+ };
+
+ f[".split"] = [](value s, names re, string fmt, optional<names> flags)
+ {
+ return split (move (s),
+ convert<string> (move (re)),
+ fmt,
+ move (flags));
+ };
+
+ f[".split"] = [](value s, names re, names fmt, optional<names> flags)
+ {
+ return split (move (s),
+ convert<string> (move (re)),
+ convert<string> (move (fmt)),
+ move (flags));
+ };
+
+ // merge
+ //
+ // Replace matched parts in a list of elements using the regex format
+ // string. Convert the elements to string prior to matching. The result
+ // value is untyped and contains concatenation of transformed non-empty
+ // elements optionally separated with a delimiter.
+ //
+ // Substitution escape sequences are extended with a subset of Perl
+ // sequences (see butl::regex_replace_ex() for details).
+ //
+ // The following flags are supported:
+ //
+ // icase - match ignoring case
+ //
+ // format_first_only - only replace the first match
+ //
+ // format_no_copy - do not copy unmatched value parts into the result
+ //
+ // If both format_first_only and format_no_copy flags are specified then
+ // the result will be a concatenation of only the first match
+ // replacements.
+ //
+ f[".merge"] = [](names s,
+ string re,
+ string fmt,
+ optional<string> delim,
+ optional<names> flags)
+ {
+ return merge (move (s), re, fmt, move (delim), move (flags));
+ };
+
+ f[".merge"] = [](names s,
+ string re,
+ names fmt,
+ optional<string> delim,
+ optional<names> flags)
+ {
+ return merge (move (s),
+ re,
+ convert<string> (move (fmt)),
+ move (delim),
+ move (flags));
+ };
+
+ f[".merge"] = [](names s,
+ names re,
+ string fmt,
+ optional<string> delim,
+ optional<names> flags)
+ {
+ return merge (move (s),
+ convert<string> (move (re)),
+ fmt,
+ move (delim),
+ move (flags));
+ };
+
+ f[".merge"] = [](names s,
+ names re,
+ names fmt,
+ optional<string> delim,
+ optional<names> flags)
+ {
+ return merge (move (s),
+ convert<string> (move (re)),
+ convert<string> (move (fmt)),
+ move (delim),
+ move (flags));
+ };
+
+ f[".merge"] = [](names s,
+ string re,
+ string fmt,
+ names delim,
+ optional<names> flags)
+ {
+ return merge (move (s),
+ re,
+ fmt,
+ convert<string> (move (delim)),
+ move (flags));
+ };
+
+ f[".merge"] = [](names s,
+ string re,
+ names fmt,
+ names delim,
+ optional<names> flags)
+ {
+ return merge (move (s),
+ re,
+ convert<string> (move (fmt)),
+ convert<string> (move (delim)),
+ move (flags));
+ };
+
+ f[".merge"] = [](names s,
+ names re,
+ string fmt,
+ names delim,
+ optional<names> flags)
+ {
+ return merge (move (s),
+ convert<string> (move (re)),
+ fmt,
+ convert<string> (move (delim)),
+ move (flags));
+ };
+
+ f[".merge"] = [](names s,
+ names re,
+ names fmt,
+ names delim,
+ optional<names> flags)
+ {
+ return merge (move (s),
+ convert<string> (move (re)),
+ convert<string> (move (fmt)),
+ convert<string> (move (delim)),
+ move (flags));
+ };
+
+ // apply
+ //
+ // Replace matched parts of each element in a list using the regex format
+ // string. Convert the elements to string prior to matching. Return a list
+ // of transformed elements, omitting the empty ones.
+ //
+ // Substitution escape sequences are extended with a subset of Perl
+ // sequences (see butl::regex_replace_ex() for details).
+ //
+ // The following flags are supported:
+ //
+ // icase - match ignoring case
+ //
+ // format_first_only - only replace the first match
+ //
+ // format_no_copy - do not copy unmatched value parts into the result
+ //
+ // If both format_first_only and format_no_copy flags are specified then
+ // the result elements will only contain the replacement of the first
+ // match.
+ //
+ f[".apply"] = [](names s, string re, string fmt, optional<names> flags)
+ {
+ return apply (move (s), re, fmt, move (flags));
+ };
+
+ f[".apply"] = [](names s, string re, names fmt, optional<names> flags)
+ {
+ return apply (move (s),
+ re,
+ convert<string> (move (fmt)),
+ move (flags));
+ };
+
+ f[".apply"] = [](names s, names re, string fmt, optional<names> flags)
+ {
+ return apply (move (s),
+ convert<string> (move (re)),
+ fmt,
+ move (flags));
+ };
+
+ f[".apply"] = [](names s, names re, names fmt, optional<names> flags)
+ {
+ return apply (move (s),
+ convert<string> (move (re)),
+ convert<string> (move (fmt)),
+ move (flags));
+ };
}
}
diff --git a/tests/function/regex/testscript b/tests/function/regex/testscript
index deeefa5..6cce873 100644
--- a/tests/function/regex/testscript
+++ b/tests/function/regex/testscript
@@ -253,3 +253,99 @@
}
}
}
+
+: split
+:
+{
+ : all-parts
+ :
+ : Note that 3 parts a printed here ('|abc|', ' ' and '|def|'), separated by
+ : the space character.
+ :
+ $* <<EOI >'|abc| |def|'
+ print $regex.split('abc def', '(\S+)', '|\1|')
+ EOI
+
+ : no-copy
+ :
+ : Note that 2 parts a printed here ('|abc|' and '|def|'), separated by the
+ : space character.
+ :
+ $* <<EOI >'|abc| |def|'
+ print $regex.split('abc def', '(\S+)', '|\1|', format_no_copy)
+ EOI
+
+ : unmatched
+ :
+ : Note that only unmatched part is printed here (' '). Empty replacements are
+ : omitted.
+ :
+ $* <<EOI >' '
+ print $regex.split('abc def', '(\S+)', '')
+ EOI
+
+ : include-options
+ :
+ {
+ : quoted
+ :
+ $* <<EOI >'|-Ic:/dir 1| |-IC:/dir2| |-IC:/dir3| |-IC:/dir4| ||'
+ opts = '"-Ic:/dir 1" "-IC:/dir2" "-IC:/dir3" "-IC:/dir4" ""'
+ print $regex.split($opts, ' *"([^"]*)" *', '|\1|')
+ EOI
+
+ : quoted-unquoted
+ :
+ : Note that one of the two captures (\1\2) is always empty as they are
+ : alternative ones.
+ :
+ $* <<EOI >'|-Ic:/dir 1| |-IC:/dir2| |-IC:/dir3| |-IC:/dir4| ||'
+ opts = '"-Ic:/dir 1" -IC:/dir2 "-IC:/dir3" "-IC:/dir4" ""'
+ print $regex.split($opts, '"([^"]*)"|([^" ]+)', '|\1\2|', format_no_copy)
+ EOI
+ }
+}
+
+: apply
+:
+{
+ : all-parts
+ :
+ $* <<EOI >'xbc cbx'
+ print $regex.apply(abc cba, 'a', 'x')
+ EOI
+
+ : omit-empty
+ :
+ $* <<EOI >'bc cb'
+ print $regex.apply(abc a cba, 'a', '')
+ EOI
+}
+
+: merge
+:
+{
+ : all-parts
+ :
+ $* <<EOI >'xbccbx'
+ print $regex.merge(abc cba, 'a', 'x')
+ EOI
+
+ : omit-empty
+ :
+ $* <<EOI >'bccb'
+ print $regex.merge(abc a cba, 'a', '')
+ EOI
+
+ : delim
+ :
+ $* <<EOI >'xbc-cbx'
+ print $regex.merge(abc cba, 'a', 'x', '-')
+ EOI
+
+ : string-delim
+ :
+ $* <<EOI >'xbc-cbx'
+ print $regex.merge(abc cba, 'a', 'x', [string] '-')
+ EOI
+}