From 9a3a8d1915c8a3666984d6603606af856dfd8c41 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Mon, 26 Jun 2017 22:23:43 +0300 Subject: Add support for regex function family --- build2/buildfile | 1 + build2/function.cxx | 2 + build2/functions-regex.cxx | 331 +++++++++++++++++++++++++++++++++++++++++++++ build2/variable.hxx | 2 +- 4 files changed, 335 insertions(+), 1 deletion(-) create mode 100644 build2/functions-regex.cxx (limited to 'build2') diff --git a/build2/buildfile b/build2/buildfile index 8525eb8..d2aa7c1 100644 --- a/build2/buildfile +++ b/build2/buildfile @@ -19,6 +19,7 @@ exe{b}: \ { cxx}{ functions-filesystem } \ { cxx}{ functions-path } \ { cxx}{ functions-process-path } \ + { cxx}{ functions-regex } \ { cxx}{ functions-string } \ { cxx}{ functions-target-triplet } \ {hxx cxx}{ lexer } \ diff --git a/build2/function.cxx b/build2/function.cxx index 399d679..f7de749 100644 --- a/build2/function.cxx +++ b/build2/function.cxx @@ -304,6 +304,7 @@ namespace build2 void filesystem_functions (); // functions-filesystem.cxx void path_functions (); // functions-path.cxx void process_path_functions (); // functions-process-path.cxx + void regex_functions (); // functions-regex.cxx void string_functions (); // functions-string.cxx void target_triplet_functions (); // functions-target-triplet.cxx @@ -315,6 +316,7 @@ namespace build2 filesystem_functions (); path_functions (); process_path_functions (); + regex_functions (); string_functions (); target_triplet_functions (); } diff --git a/build2/functions-regex.cxx b/build2/functions-regex.cxx new file mode 100644 index 0000000..bdecc14 --- /dev/null +++ b/build2/functions-regex.cxx @@ -0,0 +1,331 @@ +// file : build2/functions-regex.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include + +#include +#include + +using namespace std; +using namespace butl; + +namespace build2 +{ + // Convert value of an arbitrary type to string. + // + static inline string + to_string (value&& v) + { + // Optimize for the string value type. + // + if (v.type != &value_traits::value_type) + untypify (v); + + return convert (move (v)); + } + + // Parse a regular expression. Throw invalid_argument if it is not valid. + // + static regex + parse_regex (const string& s, regex::flag_type f) + { + try + { + return regex (s, f); + } + catch (const regex_error& e) + { + // Print regex_error description if meaningful (no space). + // + ostringstream os; + os << "invalid regex '" << s << "'" << e; + throw invalid_argument (os.str ()); + } + } + + // Match value of an arbitrary type against the regular expression. See + // match() overloads (below) for details. + // + static value + match (value&& v, const string& re, optional&& flags) + { + // Parse flags. + // + regex::flag_type rf (regex::ECMAScript); + bool subs (false); + + if (flags) + { + for (auto& f: *flags) + { + string s (convert (move (f))); + + if (s == "icase") + rf |= regex::icase; + else if (s == "return_subs") + subs = true; + else + throw invalid_argument ("invalid flag '" + s + "'"); + } + } + + // Parse regex. + // + regex rge (parse_regex (re, rf)); + + // Match. + // + string s (to_string (move (v))); + + if (!subs) + return value (regex_match (s, rge)); // Return boolean value. + + names r; + match_results m; + + if (regex_match (s, m, rge)) + { + assert (!m.empty ()); + + for (size_t i (1); i != m.size (); ++i) + { + if (m[i].matched) + r.emplace_back (m.str (i)); + } + } + + return value (r); + } + + // Determine if there is a match between the regular expression and some + // part of a value of an arbitrary type. See search() overloads (below) + // for details. + // + static value + search (value&& v, const string& re, optional&& flags) + { + // Parse flags. + // + regex::flag_type rf (regex::ECMAScript); + bool match (false); + bool subs (false); + + if (flags) + { + for (auto& f: *flags) + { + string s (convert (move (f))); + + if (s == "icase") + rf |= regex::icase; + else if (s == "return_match") + match = true; + else if (s == "return_subs") + subs = true; + else + throw invalid_argument ("invalid flag '" + s + "'"); + } + } + + // Parse regex. + // + regex rge (parse_regex (re, rf)); + + // Search. + // + string s (to_string (move (v))); + + if (!match && !subs) + return value (regex_search (s, rge)); // Return boolean value. + + names r; + match_results m; + + if (regex_search (s, m, rge)) + { + assert (!m.empty ()); + + if (match) + { + assert (m[0].matched); + r.emplace_back (m.str (0)); + } + + if (subs) + { + for (size_t i (1); i != m.size (); ++i) + { + if (m[i].matched) + r.emplace_back (m.str (i)); + } + } + } + + return value (r); + } + + // Replace matched parts in a value of an arbitrary type, using the format + // string. See replace() overloads (below) for details. + // + static names + replace (value&& v, + const string& re, + const string& fmt, + optional&& flags) + { + // Parse flags. + // + regex::flag_type rf (regex::ECMAScript); + regex_constants::match_flag_type mf (regex_constants::match_default); + + if (flags) + { + for (auto& f: *flags) + { + string s (convert (move (f))); + + if (s == "icase") + rf |= regex::icase; + else if (s == "format_first_only") + mf |= regex_constants::format_first_only; + else if (s == "format_no_copy") + mf |= regex_constants::format_no_copy; + else + throw invalid_argument ("invalid flag '" + s + "'"); + } + } + + // Parse regex. + // + regex rge (parse_regex (re, rf)); + + // Replace. + // + names r; + + try + { + string s (to_string (move (v))); + r.emplace_back (regex_replace_ex (s, rge, fmt, mf).first); + } + catch (const regex_error& e) + { + fail << "unable to replace" << e; + } + + return r; + } + + void + regex_functions () + { + function_family f ("regex"); + + // match + // + // Match a value of an arbitrary type against the regular expression. + // Convert the value to string prior to matching. Return the boolean value + // unless return_subs flag is specified (see below), in which case return + // names (empty if no match). + // + // The following flags are supported: + // + // icase - match ignoring case + // + // return_subs - return names (rather than boolean), that contain + // sub-strings that match the marked sub-expressions + // + f[".match"] = [](value s, string re, optional flags) + { + return match (move (s), re, move (flags)); + }; + + f[".match"] = [](value s, names re, optional flags) + { + return match (move (s), convert (move (re)), move (flags)); + }; + + // search + // + // Determine if there is a match between the regular expression and some + // part of a value of an arbitrary type. Convert the value to string prior + // to searching. Return the boolean value unless return_match or + // return_subs flag is specified (see below) in which case return names + // (empty if no match). + // + // The following flags are supported: + // + // icase - match ignoring case + // + // return_match - return names (rather than boolean), that contain a + // sub-string that matches the whole regular expression + // + // return_subs - return names (rather than boolean), that contain + // sub-strings that match the marked sub-expressions + // + // If both return_match and return_subs flags are specified then the + // sub-string that matches the whole regular expression comes first. + // + f[".search"] = [](value s, string re, optional flags) + { + return search (move (s), re, move (flags)); + }; + + f[".search"] = [](value s, names re, optional flags) + { + return search (move (s), convert (move (re)), move (flags)); + }; + + // replace + // + // Replace matched parts in a value of an arbitrary type, using the format + // string. Convert the value to string prior to matching. The result value + // is always untyped, regardless of the argument type. + // + // Substitution escape sequences are extended with a subset of Perl + // sequences (see regex_replace_ex() for details). + // + // The following flags are supported: + // + // icase - match ignoring case + // + // format_first_only - only replace the first match + // + // format_no_copy - do not copy unmatched value parts to the result + // + // If both format_first_only and format_no_copy flags are specified then + // all the result will contain is the replacement of the first match. + // + f[".replace"] = [](value s, string re, string fmt, optional flags) + { + return replace (move (s), re, fmt, move (flags)); + }; + + f[".replace"] = [](value s, string re, names fmt, optional flags) + { + return replace (move (s), + re, + convert (move (fmt)), + move (flags)); + }; + + f[".replace"] = [](value s, names re, string fmt, optional flags) + { + return replace (move (s), + convert (move (re)), + fmt, + move (flags)); + }; + + f[".replace"] = [](value s, names re, names fmt, optional flags) + { + return replace (move (s), + convert (move (re)), + convert (move (fmt)), + move (flags)); + }; + } +} diff --git a/build2/variable.hxx b/build2/variable.hxx index 19a6c69..f0218fe 100644 --- a/build2/variable.hxx +++ b/build2/variable.hxx @@ -263,7 +263,7 @@ namespace build2 bool operator>= (const value&, const value&); // Value cast. The first three expect the value to be not NULL. The cast - // from lookup expects the value to aslo be defined. + // from lookup expects the value to also be defined. // // Note that a cast to names expects the value to be untyped while a cast // to vector -- typed. -- cgit v1.1