diff options
Diffstat (limited to 'libbuild2/functions-regex.cxx')
-rw-r--r-- | libbuild2/functions-regex.cxx | 475 |
1 files changed, 318 insertions, 157 deletions
diff --git a/libbuild2/functions-regex.cxx b/libbuild2/functions-regex.cxx index 2f0d122..cf3ffd0 100644 --- a/libbuild2/functions-regex.cxx +++ b/libbuild2/functions-regex.cxx @@ -21,7 +21,7 @@ namespace build2 // Optimize for the string value type. // if (v.type != &value_traits<string>::value_type) - untypify (v); + untypify (v, true /* reduce */); return convert<string> (move (v)); } @@ -69,7 +69,7 @@ namespace build2 else if (s == "return_subs") subs = true; else - throw invalid_argument ("invalid flag '" + s + "'"); + throw invalid_argument ("invalid flag '" + s + '\''); } } @@ -92,10 +92,7 @@ namespace build2 names r; for (size_t i (1); i != m.size (); ++i) - { - if (m[i].matched) - r.emplace_back (m.str (i)); - } + r.emplace_back (m[i].matched ? m.str (i) : string ()); return value (move (r)); } @@ -129,7 +126,7 @@ namespace build2 else if (s == "return_subs") subs = true; else - throw invalid_argument ("invalid flag '" + s + "'"); + throw invalid_argument ("invalid flag '" + s + '\''); } } @@ -161,10 +158,7 @@ namespace build2 if (subs) { for (size_t i (1); i != m.size (); ++i) - { - if (m[i].matched) - r.emplace_back (m.str (i)); - } + r.emplace_back (m[i].matched ? m.str (i) : string ()); } return value (move (r)); @@ -174,7 +168,9 @@ namespace build2 } static pair<regex::flag_type, regex_constants::match_flag_type> - parse_replacement_flags (optional<names>&& flags, bool first_only = true) + parse_replacement_flags (optional<names>&& flags, + bool first_only = true, + bool* copy_empty = nullptr) { regex::flag_type rf (regex::ECMAScript); regex_constants::match_flag_type mf (regex_constants::match_default); @@ -191,8 +187,10 @@ namespace build2 mf |= regex_constants::format_first_only; else if (s == "format_no_copy") mf |= regex_constants::format_no_copy; + else if (copy_empty != nullptr && s == "format_copy_empty") + *copy_empty = true; else - throw invalid_argument ("invalid flag '" + s + "'"); + throw invalid_argument ("invalid flag '" + s + '\''); } } @@ -334,7 +332,10 @@ namespace build2 const string& fmt, optional<names>&& flags) { - auto fl (parse_replacement_flags (move (flags), false)); + bool copy_empty (false); + auto fl (parse_replacement_flags (move (flags), + false /* first_only */, + ©_empty)); regex rge (parse_regex (re, fl.first)); names r; @@ -342,10 +343,10 @@ namespace build2 try { regex_replace_search (to_string (move (v)), rge, fmt, - [&r] (string::const_iterator b, - string::const_iterator e) + [copy_empty, &r] (string::const_iterator b, + string::const_iterator e) { - if (b != e) + if (copy_empty || b != e) r.emplace_back (string (b, e)); }, fl.second); @@ -364,26 +365,29 @@ namespace build2 // apply() overloads (below) for details. // static names - apply (names&& s, + apply (names&& ns, const string& re, const string& fmt, optional<names>&& flags) { - auto fl (parse_replacement_flags (move (flags))); + bool copy_empty (false); + auto fl (parse_replacement_flags (move (flags), + true /* first_only */, + ©_empty)); regex rge (parse_regex (re, fl.first)); names r; try { - for (auto& v: s) + for (auto& n: ns) { - string s (regex_replace_search (convert<string> (move (v)), + string s (regex_replace_search (convert<string> (move (n)), rge, fmt, fl.second).first); - if (!s.empty ()) + if (copy_empty || !s.empty ()) r.emplace_back (move (s)); } } @@ -411,7 +415,7 @@ namespace build2 if (s == "icase") r |= regex::icase; else - throw invalid_argument ("invalid flag '" + s + "'"); + throw invalid_argument ("invalid flag '" + s + '\''); } } @@ -422,67 +426,141 @@ namespace build2 // See find_match() overloads (below) for details. // static bool - find_match (names&& s, const string& re, optional<names>&& flags) + find_match (names&& ns, const string& re, optional<names>&& flags) { regex::flag_type fl (parse_find_flags (move (flags))); regex rge (parse_regex (re, fl)); - for (auto& v: s) + for (auto& n: ns) { - if (regex_match (convert<string> (move (v)), rge)) + if (regex_match (convert<string> (move (n)), rge)) return true; } return false; } + // Return a list of elements that match (matching is true) or don't match + // (matching is false) the regular expression. See filter_match() and + // filter_out_match() overloads (below) for details. + // + static names + filter_match (names&& ns, + const string& re, + optional<names>&& flags, + bool matching) + { + regex::flag_type fl (parse_find_flags (move (flags))); + regex rge (parse_regex (re, fl)); + + names r; + + for (name& n: ns) + { + // Note that we need to preserve the element while converting it to + // string since we may add it to the resulting list. But let's optimize + // this for the simple value case by round-tripping it through the + // string. + // + bool s (n.simple ()); + string v (convert<string> (s ? move (n) : name (n))); + + if (regex_match (v, rge) == matching) + r.emplace_back (s ? name (move (v)) : move (n)); + } + + return r; + } + // Return true if a part of any of the list elements matches the regular // expression. See find_search() overloads (below) for details. // static bool - find_search (names&& s, const string& re, optional<names>&& flags) + find_search (names&& ns, const string& re, optional<names>&& flags) { regex::flag_type fl (parse_find_flags (move (flags))); regex rge (parse_regex (re, fl)); - for (auto& v: s) + for (auto& n: ns) { - if (regex_search (convert<string> (move (v)), rge)) + if (regex_search (convert<string> (move (n)), rge)) return true; } return false; } + // Return those elements of a list which have a match (matching is true) or + // have no match (matching is false) between the regular expression and + // some/any part of the element. See filter_search() and filter_out_search() + // overloads (below) for details. + // + static names + filter_search (names&& ns, + const string& re, + optional<names>&& flags, + bool matching) + { + regex::flag_type fl (parse_find_flags (move (flags))); + regex rge (parse_regex (re, fl)); + + names r; + + for (auto& n: ns) + { + // Note that we need to preserve the element while converting it to + // string since we may add it to the resulting list. But let's optimize + // this for the simple value case by round-tripping it through the + // string. + // + bool s (n.simple ()); + string v (convert<string> (s ? move (n) : name (n))); + + if (regex_search (v, rge) == matching) + r.emplace_back (s ? name (move (v)) : move (n)); + } + + return r; + } + // Replace matched parts of list elements using the format string and // concatenate the transformed elements. See merge() overloads (below) for // details. // static names - merge (names&& s, + merge (names&& ns, const string& re, const string& fmt, optional<string>&& delim, optional<names>&& flags) { - auto fl (parse_replacement_flags (move (flags))); + bool copy_empty (false); + auto fl (parse_replacement_flags (move (flags), + true /* first_only */, + ©_empty)); regex rge (parse_regex (re, fl.first)); string rs; try { - for (auto& v: s) + bool first (true); + for (auto& n: ns) { - string s (regex_replace_search (convert<string> (move (v)), + string s (regex_replace_search (convert<string> (move (n)), rge, fmt, fl.second).first); - if (!s.empty ()) + if (copy_empty || !s.empty ()) { - if (!rs.empty () && delim) - rs.append (*delim); + if (delim) + { + if (first) + first = false; + else + rs.append (*delim); + } rs.append (s); } @@ -510,129 +588,203 @@ namespace build2 // // Match a value of an arbitrary type against the regular expression. // Convert the value to string prior to matching. Return the boolean value - // unless return_subs flag is specified (see below), in which case return - // names (NULL if no match). + // unless `return_subs` flag is specified (see below), in which case + // return names (or `null` if no match). // // The following flags are supported: // - // icase - match ignoring case + // icase - match ignoring case // - // return_subs - return names (rather than boolean), that contain - // sub-strings that match the marked sub-expressions and - // NULL if no match + // return_subs - return names (rather than boolean), that contain + // sub-strings that match the marked sub-expressions + // and null if no match // - f[".match"] += [](value s, string re, optional<names> flags) + f[".match"] += [](value v, string re, optional<names> flags) { - return match (move (s), re, move (flags)); + return match (move (v), re, move (flags)); }; - f[".match"] += [](value s, names re, optional<names> flags) + f[".match"] += [](value v, names re, optional<names> flags) { - return match (move (s), convert<string> (move (re)), move (flags)); + return match (move (v), convert<string> (move (re)), move (flags)); }; // $regex.find_match(<vals>, <pat> [, <flags>]) // // Match list elements against the regular expression and return true if - // the match is found. Convert the elements to string prior to matching. + // the match is found. Convert the elements to strings prior to matching. + // + // The following flags are supported: + // + // icase - match ignoring case + // + f[".find_match"] += [](names ns, string re, optional<names> flags) + { + return find_match (move (ns), re, move (flags)); + }; + + f[".find_match"] += [](names ns, names re, optional<names> flags) + { + return find_match (move (ns), convert<string> (move (re)), move (flags)); + }; + + // $regex.filter_match(<vals>, <pat> [, <flags>]) + // $regex.filter_out_match(<vals>, <pat> [, <flags>]) + // + // Return elements of a list that match (`filter`) or do not match + // (`filter_out`) the regular expression. Convert the elements to strings + // prior to matching. // // The following flags are supported: // - // icase - match ignoring case + // icase - match ignoring case // - f[".find_match"] += [](names s, string re, optional<names> flags) + f[".filter_match"] += [](names ns, string re, optional<names> flags) + { + return filter_match (move (ns), re, move (flags), true /* matching */); + }; + + f[".filter_match"] += [](names ns, names re, optional<names> flags) { - return find_match (move (s), re, move (flags)); + return filter_match (move (ns), + convert<string> (move (re)), + move (flags), + true /* matching */); }; - f[".find_match"] += [](names s, names re, optional<names> flags) + f[".filter_out_match"] += [](names s, string re, optional<names> flags) { - return find_match (move (s), convert<string> (move (re)), move (flags)); + return filter_match (move (s), re, move (flags), false /* matching */); + }; + + f[".filter_out_match"] += [](names ns, names re, optional<names> flags) + { + return filter_match (move (ns), + convert<string> (move (re)), + move (flags), + false /* matching */); }; // $regex.search(<val>, <pat> [, <flags>]) // // Determine if there is a match between the regular expression and some // part of a value of an arbitrary type. Convert the value to string prior - // to searching. Return the boolean value unless return_match or - // return_subs flag is specified (see below) in which case return names - // (NULL if no match). + // to searching. Return the boolean value unless `return_match` or + // `return_subs` flag is specified (see below) in which case return names + // (`null` if no match). // // The following flags are supported: // - // icase - match ignoring case + // icase - match ignoring case // - // return_match - return names (rather than boolean), that contain a - // sub-string that matches the whole regular expression and - // NULL if no match + // return_match - return names (rather than boolean), that contain a + // sub-string that matches the whole regular expression + // and null if no match // - // return_subs - return names (rather than boolean), that contain - // sub-strings that match the marked sub-expressions and - // NULL if no match + // return_subs - return names (rather than boolean), that contain + // sub-strings that match the marked sub-expressions + // and null if no match // - // If both return_match and return_subs flags are specified then the + // If both `return_match` and `return_subs` flags are specified then the // sub-string that matches the whole regular expression comes first. // - f[".search"] += [](value s, string re, optional<names> flags) + f[".search"] += [](value v, string re, optional<names> flags) { - return search (move (s), re, move (flags)); + return search (move (v), re, move (flags)); }; - f[".search"] += [](value s, names re, optional<names> flags) + f[".search"] += [](value v, names re, optional<names> flags) { - return search (move (s), convert<string> (move (re)), move (flags)); + return search (move (v), convert<string> (move (re)), move (flags)); }; // $regex.find_search(<vals>, <pat> [, <flags>]) // // Determine if there is a match between the regular expression and some - // part of any of the list elements. Convert the elements to string prior + // part of any of the list elements. Convert the elements to strings prior // to matching. // // The following flags are supported: // - // icase - match ignoring case + // icase - match ignoring case // - f[".find_search"] += [](names s, string re, optional<names> flags) + f[".find_search"] += [](names ns, string re, optional<names> flags) { - return find_search (move (s), re, move (flags)); + return find_search (move (ns), re, move (flags)); }; - f[".find_search"] += [](names s, names re, optional<names> flags) + f[".find_search"] += [](names ns, names re, optional<names> flags) { - return find_search (move (s), + return find_search (move (ns), convert<string> (move (re)), move (flags)); }; + // $regex.filter_search(<vals>, <pat> [, <flags>]) + // $regex.filter_out_search(<vals>, <pat> [, <flags>]) + // + // Return elements of a list for which there is a match (`filter`) or no + // match (`filter_out`) between the regular expression and some part of + // the element. Convert the elements to strings prior to matching. + // + // The following flags are supported: + // + // icase - match ignoring case + // + f[".filter_search"] += [](names ns, string re, optional<names> flags) + { + return filter_search (move (ns), re, move (flags), true /* matching */); + }; + + f[".filter_search"] += [](names ns, names re, optional<names> flags) + { + return filter_search (move (ns), + convert<string> (move (re)), + move (flags), + true /* matching */); + }; + + f[".filter_out_search"] += [](names ns, string re, optional<names> flags) + { + return filter_search (move (ns), re, move (flags), false /* matching */); + }; + + f[".filter_out_search"] += [](names ns, names re, optional<names> flags) + { + return filter_search (move (ns), + convert<string> (move (re)), + move (flags), + false /* matching */); + }; + // $regex.replace(<val>, <pat>, <fmt> [, <flags>]) // // Replace matched parts in a value of an arbitrary type, using the format // string. Convert the value to string prior to matching. The result value // is always untyped, regardless of the argument type. // - // Substitution escape sequences are extended with a subset of Perl - // sequences (see libbutl/regex.hxx for details). - // // The following flags are supported: // - // icase - match ignoring case + // icase - match ignoring case // - // format_first_only - only replace the first match + // format_first_only - only replace the first match // - // format_no_copy - do not copy unmatched value parts into the result + // format_no_copy - do not copy unmatched value parts into the + // result // - // If both format_first_only and format_no_copy flags are specified then - // the result will only contain the replacement of the first match. + // If both `format_first_only` and `format_no_copy` flags are specified + // then the result will only contain the replacement of the first match. // - f[".replace"] += [](value s, string re, string fmt, optional<names> flags) + // See also `$string.replace()`. + // + f[".replace"] += [](value v, string re, string fmt, optional<names> flags) { - return replace (move (s), re, fmt, move (flags)); + return replace (move (v), re, fmt, move (flags)); }; - f[".replace"] += [](value s, names re, names fmt, optional<names> flags) + f[".replace"] += [](value v, names re, names fmt, optional<names> flags) { - return replace (move (s), + return replace (move (v), convert<string> (move (re)), convert<string> (move (fmt)), move (flags)); @@ -641,38 +793,38 @@ namespace build2 // $regex.replace_lines(<val>, <pat>, <fmt> [, <flags>]) // // Convert the value to string, parse it into lines and for each line - // apply the $regex.replace() function with the specified pattern, format, - // and flags. If the format argument is NULL, omit the "all-NULL" - // replacements for the matched lines from the result. Return unmatched - // lines and line replacements as a name list unless return_lines flag is - // specified (see below), in which case return a single multi-line simple - // name value. + // apply the `$regex.replace()` function with the specified pattern, + // format, and flags. If the format argument is `null`, omit the + // "all-`null`" replacements for the matched lines from the result. Return + // unmatched lines and line replacements as a `name` list unless + // `return_lines` flag is specified (see below), in which case return a + // single multi-line simple `name` value. // - // The following flags are supported in addition to the $regex.replace() - // function flags: + // The following flags are supported in addition to the `$regex.replace()` + // function's flags: // - // return_lines - return the simple name (rather than a name list) - // containing the unmatched lines and line replacements - // separated with newlines. + // return_lines - return the simple name (rather than a name list) + // containing the unmatched lines and line replacements + // separated with newlines. // - // Note that if format_no_copy is specified, unmatched lines are not + // Note that if `format_no_copy` is specified, unmatched lines are not // copied either. // - f[".replace_lines"] += [](value s, - string re, - string fmt, - optional<names> flags) + f[".replace_lines"] += [](value v, + string re, + string fmt, + optional<names> flags) { - return replace_lines (move (s), re, move (fmt), move (flags)); + return replace_lines (move (v), re, move (fmt), move (flags)); }; - f[".replace_lines"] += [](value s, - names re, - names* fmt, - optional<names> flags) + f[".replace_lines"] += [](value v, + names re, + names* fmt, + optional<names> flags) { return replace_lines ( - move (s), + move (v), convert<string> (move (re)), (fmt != nullptr ? optional<string> (convert<string> (move (*fmt))) @@ -683,26 +835,27 @@ namespace build2 // $regex.split(<val>, <pat>, <fmt> [, <flags>]) // // Split a value of an arbitrary type into a list of unmatched value parts - // and replacements of the matched parts, omitting empty ones. Convert the - // value to string prior to matching. - // - // Substitution escape sequences are extended with a subset of Perl - // sequences (see libbutl/regex.hxx for details). + // and replacements of the matched parts, omitting empty ones (unless the + // `format_copy_empty` flag is specified). Convert the value to string + // prior to matching. // // The following flags are supported: // - // icase - match ignoring case + // icase - match ignoring case + // + // format_no_copy - do not copy unmatched value parts into the + // result // - // format_no_copy - do not copy unmatched value parts into the result + // format_copy_empty - copy empty elements into the result // - f[".split"] += [](value s, string re, string fmt, optional<names> flags) + f[".split"] += [](value v, string re, string fmt, optional<names> flags) { - return split (move (s), re, fmt, move (flags)); + return split (move (v), re, fmt, move (flags)); }; - f[".split"] += [](value s, names re, names fmt, optional<names> flags) + f[".split"] += [](value v, names re, names fmt, optional<names> flags) { - return split (move (s), + return split (move (v), convert<string> (move (re)), convert<string> (move (fmt)), move (flags)); @@ -711,45 +864,52 @@ namespace build2 // $regex.merge(<vals>, <pat>, <fmt> [, <delim> [, <flags>]]) // // Replace matched parts in a list of elements using the regex format - // string. Convert the elements to string prior to matching. The result + // string. Convert the elements to strings prior to matching. The result // value is untyped and contains concatenation of transformed non-empty - // elements optionally separated with a delimiter. - // - // Substitution escape sequences are extended with a subset of Perl - // sequences (see libbutl/regex.hxx for details). + // elements (unless the `format_copy_empty` flag is specified) optionally + // separated with a delimiter. // // The following flags are supported: // - // icase - match ignoring case + // icase - match ignoring case + // + // format_first_only - only replace the first match // - // format_first_only - only replace the first match + // format_no_copy - do not copy unmatched value parts into the + // result // - // format_no_copy - do not copy unmatched value parts into the result + // format_copy_empty - copy empty elements into the result // - // If both format_first_only and format_no_copy flags are specified then - // the result will be a concatenation of only the first match + // If both `format_first_only` and `format_no_copy` flags are specified + // then the result will be a concatenation of only the first match // replacements. // - f[".merge"] += [](names s, - string re, - string fmt, - optional<string> delim, - optional<names> flags) - { - return merge (move (s), re, fmt, move (delim), move (flags)); + f[".merge"] += [](names ns, + string re, + string fmt, + optional<string*> delim, + optional<names> flags) + { + return merge (move (ns), + re, + fmt, + delim && *delim != nullptr + ? move (**delim) + : optional<string> (), + move (flags)); }; - f[".merge"] += [](names s, - names re, - names fmt, - optional<names> delim, - optional<names> flags) + f[".merge"] += [](names ns, + names re, + names fmt, + optional<names*> delim, + optional<names> flags) { - return merge (move (s), + return merge (move (ns), convert<string> (move (re)), convert<string> (move (fmt)), - delim - ? convert<string> (move (*delim)) + delim && *delim != nullptr + ? convert<string> (move (**delim)) : optional<string> (), move (flags)); }; @@ -757,32 +917,33 @@ namespace build2 // $regex.apply(<vals>, <pat>, <fmt> [, <flags>]) // // Replace matched parts of each element in a list using the regex format - // string. Convert the elements to string prior to matching. Return a list - // of transformed elements, omitting the empty ones. - // - // Substitution escape sequences are extended with a subset of Perl - // sequences (see libbutl/regex.hxx for details). + // string. Convert the elements to strings prior to matching. Return a + // list of transformed elements, omitting the empty ones (unless the + // `format_copy_empty` flag is specified). // // The following flags are supported: // - // icase - match ignoring case + // icase - match ignoring case + // + // format_first_only - only replace the first match // - // format_first_only - only replace the first match + // format_no_copy - do not copy unmatched value parts into the + // result // - // format_no_copy - do not copy unmatched value parts into the result + // format_copy_empty - copy empty elements into the result // - // If both format_first_only and format_no_copy flags are specified then - // the result elements will only contain the replacement of the first + // If both `format_first_only` and `format_no_copy` flags are specified + // then the result elements will only contain the replacement of the first // match. // - f[".apply"] += [](names s, string re, string fmt, optional<names> flags) + f[".apply"] += [](names ns, string re, string fmt, optional<names> flags) { - return apply (move (s), re, fmt, move (flags)); + return apply (move (ns), re, fmt, move (flags)); }; - f[".apply"] += [](names s, names re, names fmt, optional<names> flags) + f[".apply"] += [](names ns, names re, names fmt, optional<names> flags) { - return apply (move (s), + return apply (move (ns), convert<string> (move (re)), convert<string> (move (fmt)), move (flags)); |