aboutsummaryrefslogtreecommitdiff
path: root/build2
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2017-06-26 22:23:43 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2017-06-27 19:23:16 +0300
commit9a3a8d1915c8a3666984d6603606af856dfd8c41 (patch)
tree387af88de60f908ce248ae215b722f5a907564b9 /build2
parentf65377448e74fc7e575e4df258fb0a48a09e39cc (diff)
Add support for regex function family
Diffstat (limited to 'build2')
-rw-r--r--build2/buildfile1
-rw-r--r--build2/function.cxx2
-rw-r--r--build2/functions-regex.cxx331
-rw-r--r--build2/variable.hxx2
4 files changed, 335 insertions, 1 deletions
diff --git a/build2/buildfile b/build2/buildfile
index 8525eb8..d2aa7c1 100644
--- a/build2/buildfile
+++ b/build2/buildfile
@@ -19,6 +19,7 @@ exe{b}: \
{ cxx}{ functions-filesystem } \
{ cxx}{ functions-path } \
{ cxx}{ functions-process-path } \
+ { cxx}{ functions-regex } \
{ cxx}{ functions-string } \
{ cxx}{ functions-target-triplet } \
{hxx cxx}{ lexer } \
diff --git a/build2/function.cxx b/build2/function.cxx
index 399d679..f7de749 100644
--- a/build2/function.cxx
+++ b/build2/function.cxx
@@ -304,6 +304,7 @@ namespace build2
void filesystem_functions (); // functions-filesystem.cxx
void path_functions (); // functions-path.cxx
void process_path_functions (); // functions-process-path.cxx
+ void regex_functions (); // functions-regex.cxx
void string_functions (); // functions-string.cxx
void target_triplet_functions (); // functions-target-triplet.cxx
@@ -315,6 +316,7 @@ namespace build2
filesystem_functions ();
path_functions ();
process_path_functions ();
+ regex_functions ();
string_functions ();
target_triplet_functions ();
}
diff --git a/build2/functions-regex.cxx b/build2/functions-regex.cxx
new file mode 100644
index 0000000..bdecc14
--- /dev/null
+++ b/build2/functions-regex.cxx
@@ -0,0 +1,331 @@
+// file : build2/functions-regex.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <sstream>
+
+#include <libbutl/regex.hxx>
+
+#include <build2/function.hxx>
+#include <build2/variable.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+ // Convert value of an arbitrary type to string.
+ //
+ static inline string
+ to_string (value&& v)
+ {
+ // Optimize for the string value type.
+ //
+ if (v.type != &value_traits<string>::value_type)
+ untypify (v);
+
+ return convert<string> (move (v));
+ }
+
+ // Parse a regular expression. Throw invalid_argument if it is not valid.
+ //
+ static regex
+ parse_regex (const string& s, regex::flag_type f)
+ {
+ try
+ {
+ return regex (s, f);
+ }
+ catch (const regex_error& e)
+ {
+ // Print regex_error description if meaningful (no space).
+ //
+ ostringstream os;
+ os << "invalid regex '" << s << "'" << e;
+ throw invalid_argument (os.str ());
+ }
+ }
+
+ // Match value of an arbitrary type against the regular expression. See
+ // match() overloads (below) for details.
+ //
+ static value
+ match (value&& v, const string& re, optional<names>&& flags)
+ {
+ // Parse flags.
+ //
+ regex::flag_type rf (regex::ECMAScript);
+ bool subs (false);
+
+ if (flags)
+ {
+ for (auto& f: *flags)
+ {
+ string s (convert<string> (move (f)));
+
+ if (s == "icase")
+ rf |= regex::icase;
+ else if (s == "return_subs")
+ subs = true;
+ else
+ throw invalid_argument ("invalid flag '" + s + "'");
+ }
+ }
+
+ // Parse regex.
+ //
+ regex rge (parse_regex (re, rf));
+
+ // Match.
+ //
+ string s (to_string (move (v)));
+
+ if (!subs)
+ return value (regex_match (s, rge)); // Return boolean value.
+
+ names r;
+ match_results<string::const_iterator> m;
+
+ if (regex_match (s, m, rge))
+ {
+ assert (!m.empty ());
+
+ for (size_t i (1); i != m.size (); ++i)
+ {
+ if (m[i].matched)
+ r.emplace_back (m.str (i));
+ }
+ }
+
+ return value (r);
+ }
+
+ // Determine if there is a match between the regular expression and some
+ // part of a value of an arbitrary type. See search() overloads (below)
+ // for details.
+ //
+ static value
+ search (value&& v, const string& re, optional<names>&& flags)
+ {
+ // Parse flags.
+ //
+ regex::flag_type rf (regex::ECMAScript);
+ bool match (false);
+ bool subs (false);
+
+ if (flags)
+ {
+ for (auto& f: *flags)
+ {
+ string s (convert<string> (move (f)));
+
+ if (s == "icase")
+ rf |= regex::icase;
+ else if (s == "return_match")
+ match = true;
+ else if (s == "return_subs")
+ subs = true;
+ else
+ throw invalid_argument ("invalid flag '" + s + "'");
+ }
+ }
+
+ // Parse regex.
+ //
+ regex rge (parse_regex (re, rf));
+
+ // Search.
+ //
+ string s (to_string (move (v)));
+
+ if (!match && !subs)
+ return value (regex_search (s, rge)); // Return boolean value.
+
+ names r;
+ match_results<string::const_iterator> m;
+
+ if (regex_search (s, m, rge))
+ {
+ assert (!m.empty ());
+
+ if (match)
+ {
+ assert (m[0].matched);
+ r.emplace_back (m.str (0));
+ }
+
+ if (subs)
+ {
+ for (size_t i (1); i != m.size (); ++i)
+ {
+ if (m[i].matched)
+ r.emplace_back (m.str (i));
+ }
+ }
+ }
+
+ return value (r);
+ }
+
+ // Replace matched parts in a value of an arbitrary type, using the format
+ // string. See replace() overloads (below) for details.
+ //
+ static names
+ replace (value&& v,
+ const string& re,
+ const string& fmt,
+ optional<names>&& flags)
+ {
+ // Parse flags.
+ //
+ regex::flag_type rf (regex::ECMAScript);
+ regex_constants::match_flag_type mf (regex_constants::match_default);
+
+ if (flags)
+ {
+ for (auto& f: *flags)
+ {
+ string s (convert<string> (move (f)));
+
+ if (s == "icase")
+ rf |= regex::icase;
+ else if (s == "format_first_only")
+ mf |= regex_constants::format_first_only;
+ else if (s == "format_no_copy")
+ mf |= regex_constants::format_no_copy;
+ else
+ throw invalid_argument ("invalid flag '" + s + "'");
+ }
+ }
+
+ // Parse regex.
+ //
+ regex rge (parse_regex (re, rf));
+
+ // Replace.
+ //
+ names r;
+
+ try
+ {
+ string s (to_string (move (v)));
+ r.emplace_back (regex_replace_ex (s, rge, fmt, mf).first);
+ }
+ catch (const regex_error& e)
+ {
+ fail << "unable to replace" << e;
+ }
+
+ return r;
+ }
+
+ void
+ regex_functions ()
+ {
+ function_family f ("regex");
+
+ // match
+ //
+ // Match a value of an arbitrary type against the regular expression.
+ // Convert the value to string prior to matching. Return the boolean value
+ // unless return_subs flag is specified (see below), in which case return
+ // names (empty if no match).
+ //
+ // The following flags are supported:
+ //
+ // icase - match ignoring case
+ //
+ // return_subs - return names (rather than boolean), that contain
+ // sub-strings that match the marked sub-expressions
+ //
+ f[".match"] = [](value s, string re, optional<names> flags)
+ {
+ return match (move (s), re, move (flags));
+ };
+
+ f[".match"] = [](value s, names re, optional<names> flags)
+ {
+ return match (move (s), convert<string> (move (re)), move (flags));
+ };
+
+ // search
+ //
+ // Determine if there is a match between the regular expression and some
+ // part of a value of an arbitrary type. Convert the value to string prior
+ // to searching. Return the boolean value unless return_match or
+ // return_subs flag is specified (see below) in which case return names
+ // (empty if no match).
+ //
+ // The following flags are supported:
+ //
+ // icase - match ignoring case
+ //
+ // return_match - return names (rather than boolean), that contain a
+ // sub-string that matches the whole regular expression
+ //
+ // return_subs - return names (rather than boolean), that contain
+ // sub-strings that match the marked sub-expressions
+ //
+ // If both return_match and return_subs flags are specified then the
+ // sub-string that matches the whole regular expression comes first.
+ //
+ f[".search"] = [](value s, string re, optional<names> flags)
+ {
+ return search (move (s), re, move (flags));
+ };
+
+ f[".search"] = [](value s, names re, optional<names> flags)
+ {
+ return search (move (s), convert<string> (move (re)), move (flags));
+ };
+
+ // replace
+ //
+ // Replace matched parts in a value of an arbitrary type, using the format
+ // string. Convert the value to string prior to matching. The result value
+ // is always untyped, regardless of the argument type.
+ //
+ // Substitution escape sequences are extended with a subset of Perl
+ // sequences (see regex_replace_ex() for details).
+ //
+ // The following flags are supported:
+ //
+ // icase - match ignoring case
+ //
+ // format_first_only - only replace the first match
+ //
+ // format_no_copy - do not copy unmatched value parts to the result
+ //
+ // If both format_first_only and format_no_copy flags are specified then
+ // all the result will contain is the replacement of the first match.
+ //
+ f[".replace"] = [](value s, string re, string fmt, optional<names> flags)
+ {
+ return replace (move (s), re, fmt, move (flags));
+ };
+
+ f[".replace"] = [](value s, string re, names fmt, optional<names> flags)
+ {
+ return replace (move (s),
+ re,
+ convert<string> (move (fmt)),
+ move (flags));
+ };
+
+ f[".replace"] = [](value s, names re, string fmt, optional<names> flags)
+ {
+ return replace (move (s),
+ convert<string> (move (re)),
+ fmt,
+ move (flags));
+ };
+
+ f[".replace"] = [](value s, names re, names fmt, optional<names> flags)
+ {
+ return replace (move (s),
+ convert<string> (move (re)),
+ convert<string> (move (fmt)),
+ move (flags));
+ };
+ }
+}
diff --git a/build2/variable.hxx b/build2/variable.hxx
index 19a6c69..f0218fe 100644
--- a/build2/variable.hxx
+++ b/build2/variable.hxx
@@ -263,7 +263,7 @@ namespace build2
bool operator>= (const value&, const value&);
// Value cast. The first three expect the value to be not NULL. The cast
- // from lookup expects the value to aslo be defined.
+ // from lookup expects the value to also be defined.
//
// Note that a cast to names expects the value to be untyped while a cast
// to vector<names> -- typed.