2 files changed, 452 insertions, 23 deletions
diff --git a/build2/functions-regex.cxx b/build2/functions-regex.cxx
index bdecc14..3185b85 100644
--- a/build2/functions-regex.cxx
+++ b/build2/functions-regex.cxx
@@ -97,7 +97,7 @@ namespace build2
       }
     }
 
-    return value (r);
+    return value (move (r));
   }
 
   // Determine if there is a match between the regular expression and some
@@ -164,20 +164,12 @@ namespace build2
       }
     }
 
-    return value (r);
+    return value (move (r));
   }
 
-  // Replace matched parts in a value of an arbitrary type, using the format
-  // string. See replace() overloads (below) for details.
-  //
-  static names
-  replace (value&& v,
-           const string& re,
-           const string& fmt,
-           optional<names>&& flags)
+  static pair<regex::flag_type, regex_constants::match_flag_type>
+  parse_replacement_flags (optional<names>&& flags, bool first_only = true)
   {
-    // Parse flags.
-    //
     regex::flag_type rf (regex::ECMAScript);
     regex_constants::match_flag_type mf (regex_constants::match_default);
 
@@ -189,7 +181,7 @@ namespace build2
 
         if (s == "icase")
           rf |= regex::icase;
-        else if (s == "format_first_only")
+        else if (first_only && s == "format_first_only")
           mf |= regex_constants::format_first_only;
         else if (s == "format_no_copy")
           mf |= regex_constants::format_no_copy;
@@ -198,18 +190,29 @@ namespace build2
       }
     }
 
-    // Parse regex.
-    //
-    regex rge (parse_regex (re, rf));
+    return make_pair (rf, mf);
+  }
+
+  // Replace matched parts in a value of an arbitrary type, using the format
+  // string. See replace() overloads (below) for details.
+  //
+  static names
+  replace (value&& v,
+           const string& re,
+           const string& fmt,
+           optional<names>&& flags)
+  {
+    auto fl (parse_replacement_flags (move (flags)));
+    regex rge (parse_regex (re, fl.first));
 
-    // Replace.
-    //
     names r;
 
     try
     {
-      string s (to_string (move (v)));
-      r.emplace_back (regex_replace_ex (s, rge, fmt, mf).first);
+      r.emplace_back (regex_replace_ex (to_string (move (v)),
+                                        rge,
+                                        fmt,
+                                        fl.second).first);
     }
     catch (const regex_error& e)
     {
@@ -219,6 +222,120 @@ namespace build2
     return r;
   }
 
+  // Split a value of an arbitrary type into a list of unmatched value parts
+  // and replacements of the matched parts. See split() overloads (below) for
+  // details.
+  //
+  static names
+  split (value&& v,
+         const string& re,
+         const string& fmt,
+         optional<names>&& flags)
+  {
+    auto fl (parse_replacement_flags (move (flags), false));
+    regex rge (parse_regex (re, fl.first));
+
+    names r;
+
+    try
+    {
+      regex_replace_ex (to_string (move (v)), rge, fmt,
+                        [&r] (string::const_iterator b,
+                              string::const_iterator e)
+                        {
+                          if (b != e)
+                            r.emplace_back (string (b, e));
+                        },
+                        fl.second);
+    }
+    catch (const regex_error& e)
+    {
+      fail << "unable to split" << e;
+    }
+
+    return r;
+  }
+
+  // Replace matched parts of list elements using the format string. See
+  // apply() overloads (below) for details.
+  //
+  static names
+  apply (names&& s,
+         const string& re,
+         const string& fmt,
+         optional<names>&& flags)
+  {
+    auto fl (parse_replacement_flags (move (flags)));
+    regex rge (parse_regex (re, fl.first));
+
+    names r;
+
+    try
+    {
+      for (auto& v: s)
+      {
+        string s (regex_replace_ex (convert<string> (move (v)),
+                                    rge,
+                                    fmt,
+                                    fl.second).first);
+
+        if (!s.empty ())
+          r.emplace_back (move (s));
+      }
+    }
+    catch (const regex_error& e)
+    {
+      fail << "unable to apply" << e;
+    }
+
+    return r;
+  }
+
+  // Replace matched parts of list elements using the format string and
+  // concatenate the transformed elements. See merge() overloads (below) for
+  // details.
+  //
+  static names
+  merge (names&& s,
+         const string& re,
+         const string& fmt,
+         optional<string>&& delim,
+         optional<names>&& flags)
+  {
+    auto fl (parse_replacement_flags (move (flags)));
+    regex rge (parse_regex (re, fl.first));
+
+    string rs;
+
+    try
+    {
+      for (auto& v: s)
+      {
+        string s (regex_replace_ex (convert<string> (move (v)),
+                                    rge,
+                                    fmt,
+                                    fl.second).first);
+
+        if (!s.empty ())
+        {
+          if (!rs.empty () && delim)
+            rs.append (*delim);
+
+          rs.append (s);
+        }
+
+      }
+    }
+    catch (const regex_error& e)
+    {
+      fail << "unable to merge" << e;
+    }
+
+    names r;
+    r.emplace_back (move (rs));
+    return r;
+  }
+
   void
   regex_functions ()
   {
@@ -286,7 +403,7 @@ namespace build2
     // is always untyped, regardless of the argument type.
     //
     // Substitution escape sequences are extended with a subset of Perl
-    // sequences (see regex_replace_ex() for details).
+    // sequences (see butl::regex_replace_ex() for details).
     //
     // The following flags are supported:
     //
@@ -294,10 +411,10 @@ namespace build2
     //
     // format_first_only - only replace the first match
     //
-    // format_no_copy    - do not copy unmatched value parts to the result
+    // format_no_copy    - do not copy unmatched value parts into the result
     //
     // If both format_first_only and format_no_copy flags are specified then
-    // all the result will contain is the replacement of the first match.
+    // the result will only contain the replacement of the first match.
     //
     f[".replace"] = [](value s, string re, string fmt, optional<names> flags)
     {
@@ -327,5 +444,221 @@ namespace build2
                       convert<string> (move (fmt)),
                       move (flags));
     };
+
+    // split
+    //
+    // Split a value of an arbitrary type into a list of unmatched value parts
+    // and replacements of the matched parts, omitting empty ones. Convert the
+    // value to string prior to matching.
+    //
+    // Substitution escape sequences are extended with a subset of Perl
+    // sequences (see butl::regex_replace_ex() for details).
+    //
+    // The following flags are supported:
+    //
+    // icase             - match ignoring case
+    //
+    // format_no_copy    - do not copy unmatched value parts into the result
+    //
+    f[".split"] = [](value s, string re, string fmt, optional<names> flags)
+    {
+      return split (move (s), re, fmt, move (flags));
+    };
+
+    f[".split"] = [](value s, string re, names fmt, optional<names> flags)
+    {
+      return split (move (s),
+                    re,
+                    convert<string> (move (fmt)),
+                    move (flags));
+    };
+
+    f[".split"] = [](value s, names re, string fmt, optional<names> flags)
+    {
+      return split (move (s),
+                    convert<string> (move (re)),
+                    fmt,
+                    move (flags));
+    };
+
+    f[".split"] = [](value s, names re, names fmt, optional<names> flags)
+    {
+      return split (move (s),
+                    convert<string> (move (re)),
+                    convert<string> (move (fmt)),
+                    move (flags));
+    };
+
+    // merge
+    //
+    // Replace matched parts in a list of elements using the regex format
+    // string. Convert the elements to string prior to matching. The result
+    // value is untyped and contains concatenation of transformed non-empty
+    // elements optionally separated with a delimiter.
+    //
+    // Substitution escape sequences are extended with a subset of Perl
+    // sequences (see butl::regex_replace_ex() for details).
+    //
+    // The following flags are supported:
+    //
+    // icase             - match ignoring case
+    //
+    // format_first_only - only replace the first match
+    //
+    // format_no_copy    - do not copy unmatched value parts into the result
+    //
+    // If both format_first_only and format_no_copy flags are specified then
+    // the result will be a concatenation of only the first match
+    // replacements.
+    //
+    f[".merge"] = [](names s,
+                     string re,
+                     string fmt,
+                     optional<string> delim,
+                     optional<names> flags)
+    {
+      return merge (move (s), re, fmt, move (delim), move (flags));
+    };
+
+    f[".merge"] = [](names s,
+                     string re,
+                     names fmt,
+                     optional<string> delim,
+                     optional<names> flags)
+    {
+      return merge (move (s),
+                    re,
+                    convert<string> (move (fmt)),
+                    move (delim),
+                    move (flags));
+    };
+
+    f[".merge"] = [](names s,
+                     names re,
+                     string fmt,
+                     optional<string> delim,
+                     optional<names> flags)
+    {
+      return merge (move (s),
+                    convert<string> (move (re)),
+                    fmt,
+                    move (delim),
+                    move (flags));
+    };
+
+    f[".merge"] = [](names s,
+                     names re,
+                     names fmt,
+                     optional<string> delim,
+                     optional<names> flags)
+    {
+      return merge (move (s),
+                    convert<string> (move (re)),
+                    convert<string> (move (fmt)),
+                    move (delim),
+                    move (flags));
+    };
+
+    f[".merge"] = [](names s,
+                     string re,
+                     string fmt,
+                     names delim,
+                     optional<names> flags)
+    {
+      return merge (move (s),
+                    re,
+                    fmt,
+                    convert<string> (move (delim)),
+                    move (flags));
+    };
+
+    f[".merge"] = [](names s,
+                     string re,
+                     names fmt,
+                     names delim,
+                     optional<names> flags)
+    {
+      return merge (move (s),
+                    re,
+                    convert<string> (move (fmt)),
+                    convert<string> (move (delim)),
+                    move (flags));
+    };
+
+    f[".merge"] = [](names s,
+                     names re,
+                     string fmt,
+                     names delim,
+                     optional<names> flags)
+    {
+      return merge (move (s),
+                    convert<string> (move (re)),
+                    fmt,
+                    convert<string> (move (delim)),
+                    move (flags));
+    };
+
+    f[".merge"] = [](names s,
+                     names re,
+                     names fmt,
+                     names delim,
+                     optional<names> flags)
+    {
+      return merge (move (s),
+                    convert<string> (move (re)),
+                    convert<string> (move (fmt)),
+                    convert<string> (move (delim)),
+                    move (flags));
+    };
+
+    // apply
+    //
+    // Replace matched parts of each element in a list using the regex format
+    // string. Convert the elements to string prior to matching. Return a list
+    // of transformed elements, omitting the empty ones.
+    //
+    // Substitution escape sequences are extended with a subset of Perl
+    // sequences (see butl::regex_replace_ex() for details).
+    //
+    // The following flags are supported:
+    //
+    // icase             - match ignoring case
+    //
+    // format_first_only - only replace the first match
+    //
+    // format_no_copy    - do not copy unmatched value parts into the result
+    //
+    // If both format_first_only and format_no_copy flags are specified then
+    // the result elements will only contain the replacement of the first
+    // match.
+    //
+    f[".apply"] = [](names s, string re, string fmt, optional<names> flags)
+    {
+      return apply (move (s), re, fmt, move (flags));
+    };
+
+    f[".apply"] = [](names s, string re, names fmt, optional<names> flags)
+    {
+      return apply (move (s),
+                    re,
+                    convert<string> (move (fmt)),
+                    move (flags));
+    };
+
+    f[".apply"] = [](names s, names re, string fmt, optional<names> flags)
+    {
+      return apply (move (s),
+                    convert<string> (move (re)),
+                    fmt,
+                    move (flags));
+    };
+
+    f[".apply"] = [](names s, names re, names fmt, optional<names> flags)
+    {
+      return apply (move (s),
+                    convert<string> (move (re)),
+                    convert<string> (move (fmt)),
+                    move (flags));
+    };
   }
 }
diff --git a/tests/function/regex/testscript b/tests/function/regex/testscript
index deeefa5..6cce873 100644
--- a/tests/function/regex/testscript
+++ b/tests/function/regex/testscript
@@ -253,3 +253,99 @@
     }
   }
 }
+
+: split
+:
+{
+  : all-parts
+  :
+  : Note that 3 parts a printed here ('|abc|', ' ' and '|def|'), separated by
+  : the space character.
+  :
+  $* <<EOI >'|abc|   |def|'
+  print $regex.split('abc def', '(\S+)', '|\1|')
+  EOI
+
+  : no-copy
+  :
+  : Note that 2 parts a printed here ('|abc|' and '|def|'), separated by the
+  : space character.
+  :
+  $* <<EOI >'|abc| |def|'
+  print $regex.split('abc   def', '(\S+)', '|\1|', format_no_copy)
+  EOI
+
+  : unmatched
+  :
+  : Note that only unmatched part is printed here (' '). Empty replacements are
+  : omitted.
+  :
+  $* <<EOI >' '
+  print $regex.split('abc def', '(\S+)', '')
+  EOI
+
+  : include-options
+  :
+  {
+    : quoted
+    :
+    $* <<EOI >'|-Ic:/dir 1| |-IC:/dir2| |-IC:/dir3| |-IC:/dir4| ||'
+    opts = '"-Ic:/dir 1"   "-IC:/dir2"   "-IC:/dir3"   "-IC:/dir4" ""'
+    print $regex.split($opts, ' *"([^"]*)" *', '|\1|')
+    EOI
+
+    : quoted-unquoted
+    :
+    : Note that one of the two captures (\1\2) is always empty as they are
+    : alternative ones.
+    :
+    $* <<EOI >'|-Ic:/dir 1| |-IC:/dir2| |-IC:/dir3| |-IC:/dir4| ||'
+    opts = '"-Ic:/dir 1"   -IC:/dir2  "-IC:/dir3"   "-IC:/dir4" ""'
+    print $regex.split($opts, '"([^"]*)"|([^" ]+)', '|\1\2|', format_no_copy)
+    EOI
+  }
+}
+
+: apply
+:
+{
+  : all-parts
+  :
+  $* <<EOI >'xbc cbx'
+  print $regex.apply(abc cba, 'a', 'x')
+  EOI
+
+  : omit-empty
+  :
+  $* <<EOI >'bc cb'
+  print $regex.apply(abc a cba, 'a', '')
+  EOI
+}
+
+: merge
+:
+{
+  : all-parts
+  :
+  $* <<EOI >'xbccbx'
+  print $regex.merge(abc cba, 'a', 'x')
+  EOI
+
+  : omit-empty
+  :
+  $* <<EOI >'bccb'
+  print $regex.merge(abc a cba, 'a', '')
+  EOI
+
+  : delim
+  :
+  $* <<EOI >'xbc-cbx'
+  print $regex.merge(abc cba, 'a', 'x', '-')
+  EOI
+
+  : string-delim
+  :
+  $* <<EOI >'xbc-cbx'
+  print $regex.merge(abc cba, 'a', 'x', [string] '-')
+  EOI
+}