diff options
Diffstat (limited to 'build2/functions-regex.cxx')
-rw-r--r-- | build2/functions-regex.cxx | 542 |
1 files changed, 0 insertions, 542 deletions
diff --git a/build2/functions-regex.cxx b/build2/functions-regex.cxx deleted file mode 100644 index 3f44e8a..0000000 --- a/build2/functions-regex.cxx +++ /dev/null @@ -1,542 +0,0 @@ -// file : build2/functions-regex.cxx -*- C++ -*- -// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd -// license : MIT; see accompanying LICENSE file - -#include <sstream> - -#include <libbutl/regex.mxx> - -#include <build2/function.hxx> -#include <build2/variable.hxx> - -using namespace std; -using namespace butl; - -namespace build2 -{ - // Convert value of an arbitrary type to string. - // - static inline string - to_string (value&& v) - { - // Optimize for the string value type. - // - if (v.type != &value_traits<string>::value_type) - untypify (v); - - return convert<string> (move (v)); - } - - // Parse a regular expression. Throw invalid_argument if it is not valid. - // - // Note: also used in functions-process.cxx (thus not static). - // - regex - parse_regex (const string& s, regex::flag_type f) - { - try - { - return regex (s, f); - } - catch (const regex_error& e) - { - // Print regex_error description if meaningful (no space). - // - ostringstream os; - os << "invalid regex '" << s << "'" << e; - throw invalid_argument (os.str ()); - } - } - - // Match value of an arbitrary type against the regular expression. See - // match() overloads (below) for details. - // - static value - match (value&& v, const string& re, optional<names>&& flags) - { - // Parse flags. - // - regex::flag_type rf (regex::ECMAScript); - bool subs (false); - - if (flags) - { - for (auto& f: *flags) - { - string s (convert<string> (move (f))); - - if (s == "icase") - rf |= regex::icase; - else if (s == "return_subs") - subs = true; - else - throw invalid_argument ("invalid flag '" + s + "'"); - } - } - - // Parse regex. - // - regex rge (parse_regex (re, rf)); - - // Match. - // - string s (to_string (move (v))); - - if (!subs) - return value (regex_match (s, rge)); // Return boolean value. - - names r; - match_results<string::const_iterator> m; - - if (regex_match (s, m, rge)) - { - assert (!m.empty ()); - - for (size_t i (1); i != m.size (); ++i) - { - if (m[i].matched) - r.emplace_back (m.str (i)); - } - } - - return value (move (r)); - } - - // Determine if there is a match between the regular expression and some - // part of a value of an arbitrary type. See search() overloads (below) - // for details. - // - static value - search (value&& v, const string& re, optional<names>&& flags) - { - // Parse flags. - // - regex::flag_type rf (regex::ECMAScript); - bool match (false); - bool subs (false); - - if (flags) - { - for (auto& f: *flags) - { - string s (convert<string> (move (f))); - - if (s == "icase") - rf |= regex::icase; - else if (s == "return_match") - match = true; - else if (s == "return_subs") - subs = true; - else - throw invalid_argument ("invalid flag '" + s + "'"); - } - } - - // Parse regex. - // - regex rge (parse_regex (re, rf)); - - // Search. - // - string s (to_string (move (v))); - - if (!match && !subs) - return value (regex_search (s, rge)); // Return boolean value. - - names r; - match_results<string::const_iterator> m; - - if (regex_search (s, m, rge)) - { - assert (!m.empty ()); - - if (match) - { - assert (m[0].matched); - r.emplace_back (m.str (0)); - } - - if (subs) - { - for (size_t i (1); i != m.size (); ++i) - { - if (m[i].matched) - r.emplace_back (m.str (i)); - } - } - } - - return value (move (r)); - } - - static pair<regex::flag_type, regex_constants::match_flag_type> - parse_replacement_flags (optional<names>&& flags, bool first_only = true) - { - regex::flag_type rf (regex::ECMAScript); - regex_constants::match_flag_type mf (regex_constants::match_default); - - if (flags) - { - for (auto& f: *flags) - { - string s (convert<string> (move (f))); - - if (s == "icase") - rf |= regex::icase; - else if (first_only && s == "format_first_only") - mf |= regex_constants::format_first_only; - else if (s == "format_no_copy") - mf |= regex_constants::format_no_copy; - else - throw invalid_argument ("invalid flag '" + s + "'"); - } - } - - return make_pair (rf, mf); - } - - // Replace matched parts in a value of an arbitrary type, using the format - // string. See replace() overloads (below) for details. - // - static names - replace (value&& v, - const string& re, - const string& fmt, - optional<names>&& flags) - { - auto fl (parse_replacement_flags (move (flags))); - regex rge (parse_regex (re, fl.first)); - - names r; - - try - { - r.emplace_back (regex_replace_search (to_string (move (v)), - rge, - fmt, - fl.second).first); - } - catch (const regex_error& e) - { - fail << "unable to replace" << e; - } - - return r; - } - - // Split a value of an arbitrary type into a list of unmatched value parts - // and replacements of the matched parts. See split() overloads (below) for - // details. - // - static names - split (value&& v, - const string& re, - const string& fmt, - optional<names>&& flags) - { - auto fl (parse_replacement_flags (move (flags), false)); - regex rge (parse_regex (re, fl.first)); - - names r; - - try - { - regex_replace_search (to_string (move (v)), rge, fmt, - [&r] (string::const_iterator b, - string::const_iterator e) - { - if (b != e) - r.emplace_back (string (b, e)); - }, - fl.second); - } - catch (const regex_error& e) - { - fail << "unable to split" << e; - } - - return r; - } - - // Replace matched parts of list elements using the format string. See - // apply() overloads (below) for details. - // - static names - apply (names&& s, - const string& re, - const string& fmt, - optional<names>&& flags) - { - auto fl (parse_replacement_flags (move (flags))); - regex rge (parse_regex (re, fl.first)); - - names r; - - try - { - for (auto& v: s) - { - string s (regex_replace_search (convert<string> (move (v)), - rge, - fmt, - fl.second).first); - - if (!s.empty ()) - r.emplace_back (move (s)); - } - } - catch (const regex_error& e) - { - fail << "unable to apply" << e; - } - - return r; - } - - // Replace matched parts of list elements using the format string and - // concatenate the transformed elements. See merge() overloads (below) for - // details. - // - static names - merge (names&& s, - const string& re, - const string& fmt, - optional<string>&& delim, - optional<names>&& flags) - { - auto fl (parse_replacement_flags (move (flags))); - regex rge (parse_regex (re, fl.first)); - - string rs; - - try - { - for (auto& v: s) - { - string s (regex_replace_search (convert<string> (move (v)), - rge, - fmt, - fl.second).first); - - if (!s.empty ()) - { - if (!rs.empty () && delim) - rs.append (*delim); - - rs.append (s); - } - - } - } - catch (const regex_error& e) - { - fail << "unable to merge" << e; - } - - names r; - r.emplace_back (move (rs)); - return r; - } - - void - regex_functions () - { - function_family f ("regex"); - - // $regex.match(<val>, <pat> [, <flags>]) - // - // Match a value of an arbitrary type against the regular expression. - // Convert the value to string prior to matching. Return the boolean value - // unless return_subs flag is specified (see below), in which case return - // names (empty if no match). - // - // The following flags are supported: - // - // icase - match ignoring case - // - // return_subs - return names (rather than boolean), that contain - // sub-strings that match the marked sub-expressions - // - f[".match"] = [](value s, string re, optional<names> flags) - { - return match (move (s), re, move (flags)); - }; - - f[".match"] = [](value s, names re, optional<names> flags) - { - return match (move (s), convert<string> (move (re)), move (flags)); - }; - - // $regex.search(<val>, <pat> [, <flags>]) - // - // Determine if there is a match between the regular expression and some - // part of a value of an arbitrary type. Convert the value to string prior - // to searching. Return the boolean value unless return_match or - // return_subs flag is specified (see below) in which case return names - // (empty if no match). - // - // The following flags are supported: - // - // icase - match ignoring case - // - // return_match - return names (rather than boolean), that contain a - // sub-string that matches the whole regular expression - // - // return_subs - return names (rather than boolean), that contain - // sub-strings that match the marked sub-expressions - // - // If both return_match and return_subs flags are specified then the - // sub-string that matches the whole regular expression comes first. - // - f[".search"] = [](value s, string re, optional<names> flags) - { - return search (move (s), re, move (flags)); - }; - - f[".search"] = [](value s, names re, optional<names> flags) - { - return search (move (s), convert<string> (move (re)), move (flags)); - }; - - // $regex.replace(<val>, <pat>, <fmt> [, <flags>]) - // - // Replace matched parts in a value of an arbitrary type, using the format - // string. Convert the value to string prior to matching. The result value - // is always untyped, regardless of the argument type. - // - // Substitution escape sequences are extended with a subset of Perl - // sequences (see libbutl/regex.mxx for details). - // - // The following flags are supported: - // - // icase - match ignoring case - // - // format_first_only - only replace the first match - // - // format_no_copy - do not copy unmatched value parts into the result - // - // If both format_first_only and format_no_copy flags are specified then - // the result will only contain the replacement of the first match. - // - f[".replace"] = [](value s, string re, string fmt, optional<names> flags) - { - return replace (move (s), re, fmt, move (flags)); - }; - - f[".replace"] = [](value s, names re, names fmt, optional<names> flags) - { - return replace (move (s), - convert<string> (move (re)), - convert<string> (move (fmt)), - move (flags)); - }; - - // $regex.split(<val>, <pat>, <fmt> [, <flags>]) - // - // Split a value of an arbitrary type into a list of unmatched value parts - // and replacements of the matched parts, omitting empty ones. Convert the - // value to string prior to matching. - // - // Substitution escape sequences are extended with a subset of Perl - // sequences (see libbutl/regex.mxx for details). - // - // The following flags are supported: - // - // icase - match ignoring case - // - // format_no_copy - do not copy unmatched value parts into the result - // - f[".split"] = [](value s, string re, string fmt, optional<names> flags) - { - return split (move (s), re, fmt, move (flags)); - }; - - f[".split"] = [](value s, names re, names fmt, optional<names> flags) - { - return split (move (s), - convert<string> (move (re)), - convert<string> (move (fmt)), - move (flags)); - }; - - // $regex.merge(<vals>, <pat>, <fmt> [, <delim> [, <flags>]]) - // - // Replace matched parts in a list of elements using the regex format - // string. Convert the elements to string prior to matching. The result - // value is untyped and contains concatenation of transformed non-empty - // elements optionally separated with a delimiter. - // - // Substitution escape sequences are extended with a subset of Perl - // sequences (see libbutl/regex.mxx for details). - // - // The following flags are supported: - // - // icase - match ignoring case - // - // format_first_only - only replace the first match - // - // format_no_copy - do not copy unmatched value parts into the result - // - // If both format_first_only and format_no_copy flags are specified then - // the result will be a concatenation of only the first match - // replacements. - // - f[".merge"] = [](names s, - string re, - string fmt, - optional<string> delim, - optional<names> flags) - { - return merge (move (s), re, fmt, move (delim), move (flags)); - }; - - f[".merge"] = [](names s, - names re, - names fmt, - optional<names> delim, - optional<names> flags) - { - return merge (move (s), - convert<string> (move (re)), - convert<string> (move (fmt)), - delim - ? convert<string> (move (*delim)) - : optional<string> (), - move (flags)); - }; - - // $regex.apply(<vals>, <pat>, <fmt> [, <flags>]) - // - // Replace matched parts of each element in a list using the regex format - // string. Convert the elements to string prior to matching. Return a list - // of transformed elements, omitting the empty ones. - // - // Substitution escape sequences are extended with a subset of Perl - // sequences (see libbutl/regex.mxx for details). - // - // The following flags are supported: - // - // icase - match ignoring case - // - // format_first_only - only replace the first match - // - // format_no_copy - do not copy unmatched value parts into the result - // - // If both format_first_only and format_no_copy flags are specified then - // the result elements will only contain the replacement of the first - // match. - // - f[".apply"] = [](names s, string re, string fmt, optional<names> flags) - { - return apply (move (s), re, fmt, move (flags)); - }; - - f[".apply"] = [](names s, names re, names fmt, optional<names> flags) - { - return apply (move (s), - convert<string> (move (re)), - convert<string> (move (fmt)), - move (flags)); - }; - } -} |