1 files changed, 298 insertions, 15 deletions
diff --git a/libbuild2/functions-string.cxx b/libbuild2/functions-string.cxx
index b430ebf..b7e0a17 100644
--- a/libbuild2/functions-string.cxx
+++ b/libbuild2/functions-string.cxx
@@ -8,18 +8,148 @@ using namespace std;
 
 namespace build2
 {
+  static string
+  replace (string&& s, value&& fv, value&& tv, optional<names>&& fs)
+  {
+    bool ic (false), fo (false), lo (false);
+    if (fs)
+    {
+      for (name& f: *fs)
+      {
+        string s (convert<string> (move (f)));
+
+        if (s == "icase")
+          ic = true;
+        else if (s == "first_only")
+          fo = true;
+        else if (s == "last_only")
+          lo = true;
+        else
+          throw invalid_argument ("invalid flag '" + s + '\'');
+      }
+    }
+
+    string f (convert<string> (move (fv)));
+    string t (convert<string> (move (tv)));
+
+    if (f.empty ())
+      throw invalid_argument ("empty <from> substring");
+
+    if (!s.empty ())
+    {
+      // Note that we don't cache s.size () since the string size will be
+      // changing as we are replacing. In fact, we may end up with an empty
+      // string after a replacement.
+
+      size_t fn (f.size ());
+
+      // Look for the substring forward in the [p, n) range.
+      //
+      auto find = [&s, &f, fn, ic] (size_t p) -> size_t
+      {
+        for (size_t n (s.size ()); p != n; ++p)
+        {
+          if (n - p >= fn &&
+              (ic
+               ? icasecmp (f, s.c_str () + p, fn)
+               : s.compare (p, fn, f)) == 0)
+            return p;
+        }
+
+        return string::npos;
+      };
+
+      // Look for the substring backard in the [0, n) range.
+      //
+      auto rfind = [&s, &f, fn, ic] (size_t n) -> size_t
+      {
+        if (n >= fn)
+        {
+          n -= fn; // Don't consider characters out of range.
+
+          for (size_t p (n);; )
+          {
+            if ((ic
+                 ? icasecmp (f, s.c_str () + p, fn)
+                 : s.compare (p, fn, f)) == 0)
+              return p;
+
+            if (--p == 0)
+              break;
+          }
+        }
+
+        return string::npos;
+      };
+
+      if (fo || lo)
+      {
+        size_t p (lo ? rfind (s.size ()) : find (0));
+
+        if (fo && lo && p != string::npos)
+        {
+          if (p != find (0))
+            p = string::npos;
+        }
+
+        if (p != string::npos)
+          s.replace (p, fn, t);
+      }
+      else
+      {
+        for (size_t p (0); (p = find (0)) != string::npos; p += fn)
+          s.replace (p, fn, t);
+      }
+    }
+
+    return move (s);
+  }
+
+  static size_t
+  find_index (const strings& vs, value&& v, optional<names>&& fs)
+  {
+    bool ic (false);
+    if (fs)
+    {
+      for (name& f: *fs)
+      {
+        string s (convert<string> (move (f)));
+
+        if (s == "icase")
+          ic = true;
+        else
+          throw invalid_argument ("invalid flag '" + s + '\'');
+      }
+    }
+
+    auto i (find_if (vs.begin (), vs.end (),
+                     [ic, y = convert<string> (move (v))] (const string& x)
+                     {
+                       return (ic ? icasecmp (x, y) : x.compare (y)) == 0;
+                     }));
+
+    return i != vs.end () ? i - vs.begin () : vs.size ();
+  }
+
   void
   string_functions (function_map& m)
   {
     function_family f (m, "string");
 
-    f["string"] += [](string s)  {return s;};
-
-    // @@ Shouldn't it concatenate elements into the single string?
-    // @@ Doesn't seem to be used so far. Can consider removing.
+    // Note: leave undocumented since there is no good reason for the user to
+    // call this function (which would be converting string to string).
+    //
+    // Note that we must handle NULL values (relied upon by the parser
+    // to provide conversion semantics consistent with untyped values).
     //
-    // f["string"] += [](strings v) {return v;};
+    f["string"] += [](string* s)
+    {
+      return s != nullptr ? move (*s) : string ();
+    };
 
+    // $string.icasecmp(<untyped>, <untyped>)
+    // $icasecmp(<string>, <string>)
+    //
     // Compare ASCII strings ignoring case and returning the boolean value.
     //
     f["icasecmp"] += [](string x, string y)
@@ -43,7 +173,43 @@ namespace build2
                        convert<string> (move (y))) == 0;
     };
 
-    // Trim.
+    // $string.replace(<untyped>, <from>, <to> [, <flags>])
+    // $replace(<string>, <from>, <to> [, <flags>])
+    //
+    // Replace occurences of substring <from> with <to> in a string. The
+    // <from> substring must not be empty.
+    //
+    // The following flags are supported:
+    //
+    //     icase       - compare ignoring case
+    //
+    //     first_only  - only replace the first match
+    //
+    //     last_only   - only replace the last match
+    //
+    //
+    // If both `first_only` and `last_only` flags are specified, then <from>
+    // is replaced only if it occurs in the string once.
+    //
+    // See also `$regex.replace()`.
+    //
+    f["replace"] += [](string s, value f, value t, optional<names> fs)
+    {
+      return replace (move (s), move (f), move (t), move (fs));
+    };
+
+    f[".replace"] += [](names s, value f, value t, optional<names> fs)
+    {
+      return names {
+        name (
+          replace (
+            convert<string> (move (s)), move (f), move (t), move (fs)))};
+    };
+
+    // $string.trim(<untyped>)
+    // $trim(<string>)
+    //
+    // Trim leading and trailing whitespaces in a string.
     //
     f["trim"] += [](string s)
     {
@@ -55,7 +221,12 @@ namespace build2
       return names {name (trim (convert<string> (move (s))))};
     };
 
-    // Convert ASCII strings into lower/upper case.
+    // $string.lcase(<untyped>)
+    // $string.ucase(<untyped>)
+    // $lcase(<string>)
+    // $ucase(<string>)
+    //
+    // Convert ASCII string into lower/upper case.
     //
     f["lcase"] += [](string s)
     {
@@ -77,23 +248,135 @@ namespace build2
       return names {name (ucase (convert<string> (move (s))))};
     };
 
+    // $size(<strings>)
+    // $size(<string-set>)
+    // $size(<string-map>)
+    // $size(<string>)
+    //
+    // First three forms: return the number of elements in the sequence.
+    //
+    // Fourth form: return the number of characters (bytes) in the string.
+    //
+    f["size"] += [] (strings v)             {return v.size ();};
+    f["size"] += [] (set<string> v)         {return v.size ();};
+    f["size"] += [] (map<string, string> v) {return v.size ();};
+    f["size"] += [] (string v)              {return v.size ();};
+
+    // $sort(<strings> [, <flags>])
+    //
+    // Sort strings in ascending order.
+    //
+    // The following flags are supported:
+    //
+    //     icase - sort ignoring case
+    //
+    //     dedup - in addition to sorting also remove duplicates
+    //
+    f["sort"] += [](strings v, optional<names> fs)
+    {
+      bool ic (false);
+      bool dd (false);
+      if (fs)
+      {
+        for (name& f: *fs)
+        {
+          string s (convert<string> (move (f)));
+
+          if (s == "icase")
+            ic = true;
+          else if (s == "dedup")
+            dd = true;
+          else
+            throw invalid_argument ("invalid flag '" + s + '\'');
+        }
+      }
+
+      sort (v.begin (), v.end (),
+            [ic] (const string& x, const string& y)
+            {
+              return (ic ? icasecmp (x, y) : x.compare (y)) < 0;
+            });
+
+      if (dd)
+        v.erase (unique (v.begin(), v.end(),
+                         [ic] (const string& x, const string& y)
+                         {
+                           return (ic ? icasecmp (x, y) : x.compare (y)) == 0;
+                         }),
+                 v.end ());
+
+      return v;
+    };
+
+    // $find(<strings>, <string>[, <flags>])
+    //
+    // Return true if the string sequence contains the specified string.
+    //
+    // The following flags are supported:
+    //
+    //     icase - compare ignoring case
+    //
+    // See also `$regex.find_match()` and `$regex.find_search()`.
+    //
+    f["find"] += [](strings vs, value v, optional<names> fs)
+    {
+      return find_index (vs, move (v), move (fs)) != vs.size ();
+    };
+
+    // $find_index(<strings>, <string>[, <flags>])
+    //
+    // Return the index of the first element in the string sequence that
+    // is equal to the specified string or `$size(strings)` if none is
+    // found.
+    //
+    // The following flags are supported:
+    //
+    //     icase - compare ignoring case
+    //
+    f["find_index"] += [](strings vs, value v, optional<names> fs)
+    {
+      return find_index (vs, move (v), move (fs));
+    };
+
+    // $keys(<string-map>)
+    //
+    // Return the list of keys in a string map.
+    //
+    // Note that the result is sorted in ascending order.
+    //
+    f["keys"] += [](map<string, string> v)
+    {
+      strings r;
+      r.reserve (v.size ());
+      for (pair<const string, string>& p: v)
+        r.push_back (p.first); // @@ PERF: use C++17 map::extract() to steal.
+      return r;
+    };
+
     // String-specific overloads from builtins.
     //
     function_family b (m, "builtin");
 
-    b[".concat"] += [](string l, string r) {l += r; return l;};
+    // Note that we must handle NULL values (relied upon by the parser to
+    // provide concatenation semantics consistent with untyped values).
+    //
+    b[".concat"] += [](string* l, string* r)
+    {
+      return l != nullptr
+        ? r != nullptr ? move (*l += *r) : move (*l)
+        : r != nullptr ? move (*r) : string ();
+    };
 
-    b[".concat"] += [](string l, names ur)
+    b[".concat"] += [](string* l, names* ur)
     {
-      l += convert<string> (move (ur));
-      return l;
+      string r (ur != nullptr ? convert<string> (move (*ur)) : string ());
+      return l != nullptr ? move (*l += r) : move (r);
     };
 
-    b[".concat"] += [](names ul, string r)
+    b[".concat"] += [](names* ul, string* r)
     {
-      string l (convert<string> (move (ul)));
-      l += r;
-      return l;
+      string l (ul != nullptr ? convert<string> (move (*ul)) : string ());
+      return r != nullptr ? move (l += *r) : move (l);
     };
   }
 }