From 9a3a8d1915c8a3666984d6603606af856dfd8c41 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Mon, 26 Jun 2017 22:23:43 +0300 Subject: Add support for regex function family --- build2/functions-regex.cxx | 331 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 331 insertions(+) create mode 100644 build2/functions-regex.cxx (limited to 'build2/functions-regex.cxx') diff --git a/build2/functions-regex.cxx b/build2/functions-regex.cxx new file mode 100644 index 0000000..bdecc14 --- /dev/null +++ b/build2/functions-regex.cxx @@ -0,0 +1,331 @@ +// file : build2/functions-regex.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include + +#include +#include + +using namespace std; +using namespace butl; + +namespace build2 +{ + // Convert value of an arbitrary type to string. + // + static inline string + to_string (value&& v) + { + // Optimize for the string value type. + // + if (v.type != &value_traits::value_type) + untypify (v); + + return convert (move (v)); + } + + // Parse a regular expression. Throw invalid_argument if it is not valid. + // + static regex + parse_regex (const string& s, regex::flag_type f) + { + try + { + return regex (s, f); + } + catch (const regex_error& e) + { + // Print regex_error description if meaningful (no space). + // + ostringstream os; + os << "invalid regex '" << s << "'" << e; + throw invalid_argument (os.str ()); + } + } + + // Match value of an arbitrary type against the regular expression. See + // match() overloads (below) for details. + // + static value + match (value&& v, const string& re, optional&& flags) + { + // Parse flags. + // + regex::flag_type rf (regex::ECMAScript); + bool subs (false); + + if (flags) + { + for (auto& f: *flags) + { + string s (convert (move (f))); + + if (s == "icase") + rf |= regex::icase; + else if (s == "return_subs") + subs = true; + else + throw invalid_argument ("invalid flag '" + s + "'"); + } + } + + // Parse regex. + // + regex rge (parse_regex (re, rf)); + + // Match. + // + string s (to_string (move (v))); + + if (!subs) + return value (regex_match (s, rge)); // Return boolean value. + + names r; + match_results m; + + if (regex_match (s, m, rge)) + { + assert (!m.empty ()); + + for (size_t i (1); i != m.size (); ++i) + { + if (m[i].matched) + r.emplace_back (m.str (i)); + } + } + + return value (r); + } + + // Determine if there is a match between the regular expression and some + // part of a value of an arbitrary type. See search() overloads (below) + // for details. + // + static value + search (value&& v, const string& re, optional&& flags) + { + // Parse flags. + // + regex::flag_type rf (regex::ECMAScript); + bool match (false); + bool subs (false); + + if (flags) + { + for (auto& f: *flags) + { + string s (convert (move (f))); + + if (s == "icase") + rf |= regex::icase; + else if (s == "return_match") + match = true; + else if (s == "return_subs") + subs = true; + else + throw invalid_argument ("invalid flag '" + s + "'"); + } + } + + // Parse regex. + // + regex rge (parse_regex (re, rf)); + + // Search. + // + string s (to_string (move (v))); + + if (!match && !subs) + return value (regex_search (s, rge)); // Return boolean value. + + names r; + match_results m; + + if (regex_search (s, m, rge)) + { + assert (!m.empty ()); + + if (match) + { + assert (m[0].matched); + r.emplace_back (m.str (0)); + } + + if (subs) + { + for (size_t i (1); i != m.size (); ++i) + { + if (m[i].matched) + r.emplace_back (m.str (i)); + } + } + } + + return value (r); + } + + // Replace matched parts in a value of an arbitrary type, using the format + // string. See replace() overloads (below) for details. + // + static names + replace (value&& v, + const string& re, + const string& fmt, + optional&& flags) + { + // Parse flags. + // + regex::flag_type rf (regex::ECMAScript); + regex_constants::match_flag_type mf (regex_constants::match_default); + + if (flags) + { + for (auto& f: *flags) + { + string s (convert (move (f))); + + if (s == "icase") + rf |= regex::icase; + else if (s == "format_first_only") + mf |= regex_constants::format_first_only; + else if (s == "format_no_copy") + mf |= regex_constants::format_no_copy; + else + throw invalid_argument ("invalid flag '" + s + "'"); + } + } + + // Parse regex. + // + regex rge (parse_regex (re, rf)); + + // Replace. + // + names r; + + try + { + string s (to_string (move (v))); + r.emplace_back (regex_replace_ex (s, rge, fmt, mf).first); + } + catch (const regex_error& e) + { + fail << "unable to replace" << e; + } + + return r; + } + + void + regex_functions () + { + function_family f ("regex"); + + // match + // + // Match a value of an arbitrary type against the regular expression. + // Convert the value to string prior to matching. Return the boolean value + // unless return_subs flag is specified (see below), in which case return + // names (empty if no match). + // + // The following flags are supported: + // + // icase - match ignoring case + // + // return_subs - return names (rather than boolean), that contain + // sub-strings that match the marked sub-expressions + // + f[".match"] = [](value s, string re, optional flags) + { + return match (move (s), re, move (flags)); + }; + + f[".match"] = [](value s, names re, optional flags) + { + return match (move (s), convert (move (re)), move (flags)); + }; + + // search + // + // Determine if there is a match between the regular expression and some + // part of a value of an arbitrary type. Convert the value to string prior + // to searching. Return the boolean value unless return_match or + // return_subs flag is specified (see below) in which case return names + // (empty if no match). + // + // The following flags are supported: + // + // icase - match ignoring case + // + // return_match - return names (rather than boolean), that contain a + // sub-string that matches the whole regular expression + // + // return_subs - return names (rather than boolean), that contain + // sub-strings that match the marked sub-expressions + // + // If both return_match and return_subs flags are specified then the + // sub-string that matches the whole regular expression comes first. + // + f[".search"] = [](value s, string re, optional flags) + { + return search (move (s), re, move (flags)); + }; + + f[".search"] = [](value s, names re, optional flags) + { + return search (move (s), convert (move (re)), move (flags)); + }; + + // replace + // + // Replace matched parts in a value of an arbitrary type, using the format + // string. Convert the value to string prior to matching. The result value + // is always untyped, regardless of the argument type. + // + // Substitution escape sequences are extended with a subset of Perl + // sequences (see regex_replace_ex() for details). + // + // The following flags are supported: + // + // icase - match ignoring case + // + // format_first_only - only replace the first match + // + // format_no_copy - do not copy unmatched value parts to the result + // + // If both format_first_only and format_no_copy flags are specified then + // all the result will contain is the replacement of the first match. + // + f[".replace"] = [](value s, string re, string fmt, optional flags) + { + return replace (move (s), re, fmt, move (flags)); + }; + + f[".replace"] = [](value s, string re, names fmt, optional flags) + { + return replace (move (s), + re, + convert (move (fmt)), + move (flags)); + }; + + f[".replace"] = [](value s, names re, string fmt, optional flags) + { + return replace (move (s), + convert (move (re)), + fmt, + move (flags)); + }; + + f[".replace"] = [](value s, names re, names fmt, optional flags) + { + return replace (move (s), + convert (move (re)), + convert (move (fmt)), + move (flags)); + }; + } +} -- cgit v1.1