1 files changed, 460 insertions, 15 deletions
diff --git a/libbuild2/functions-string.cxx b/libbuild2/functions-string.cxx
index b430ebf..eccc6c7 100644
--- a/libbuild2/functions-string.cxx
+++ b/libbuild2/functions-string.cxx
@@ -8,18 +8,241 @@ using namespace std;
 
 namespace build2
 {
+  // Look for the substring forwards in the [p, n) range.
+  //
+  static inline size_t
+  find (const string& s, size_t p, const string& ss, bool ic)
+  {
+    size_t sn (ss.size ());
+
+    for (size_t n (s.size ()); p != n; ++p)
+    {
+      if (n - p >= sn &&
+          (ic
+           ? icasecmp (ss, s.c_str () + p, sn)
+           : s.compare (p, sn, ss)) == 0)
+        return p;
+    }
+
+    return string::npos;
+  }
+
+  // Look for the substring backwards in the [0, n) range.
+  //
+  static inline size_t
+  rfind (const string& s, size_t n, const string& ss, bool ic)
+  {
+    size_t sn (ss.size ());
+
+    if (n >= sn)
+    {
+      n -= sn; // Don't consider characters out of range.
+
+      for (size_t p (n);; )
+      {
+        if ((ic
+             ? icasecmp (ss, s.c_str () + p, sn)
+             : s.compare (p, sn, ss)) == 0)
+          return p;
+
+        if (--p == 0)
+          break;
+      }
+    }
+
+    return string::npos;
+  }
+
+  static bool
+  contains (const string& s, value&& ssv, optional<names>&& fs)
+  {
+    bool ic (false), once (false);
+    if (fs)
+    {
+      for (name& f: *fs)
+      {
+        string s (convert<string> (move (f)));
+
+        if (s == "icase")
+          ic = true;
+        else if (s == "once")
+          once = true;
+        else
+          throw invalid_argument ("invalid flag '" + s + '\'');
+      }
+    }
+
+    const string ss (convert<string> (move (ssv)));
+
+    if (ss.empty ())
+      throw invalid_argument ("empty substring");
+
+    size_t p (find (s, 0, ss, ic));
+
+    if (once && p != string::npos && p != rfind (s, s.size (), ss, ic))
+      p = string::npos;
+
+    return p != string::npos;
+  }
+
+  static bool
+  starts_with (const string& s, value&& pfv, optional<names>&& fs)
+  {
+    bool ic (false);
+    if (fs)
+    {
+      for (name& f: *fs)
+      {
+        string s (convert<string> (move (f)));
+
+        if (s == "icase")
+          ic = true;
+        else
+          throw invalid_argument ("invalid flag '" + s + '\'');
+      }
+    }
+
+    const string pf (convert<string> (move (pfv)));
+
+    if (pf.empty ())
+      throw invalid_argument ("empty prefix");
+
+    return find (s, 0, pf, ic) == 0;
+  }
+
+  static bool
+  ends_with (const string& s, value&& sfv, optional<names>&& fs)
+  {
+    bool ic (false);
+    if (fs)
+    {
+      for (name& f: *fs)
+      {
+        string s (convert<string> (move (f)));
+
+        if (s == "icase")
+          ic = true;
+        else
+          throw invalid_argument ("invalid flag '" + s + '\'');
+      }
+    }
+
+    const string sf (convert<string> (move (sfv)));
+
+    if (sf.empty ())
+      throw invalid_argument ("empty suffix");
+
+    size_t n (s.size ());
+    size_t p (rfind (s, n, sf, ic));
+
+    return p != string::npos && p + sf.size () == n;
+  }
+
+  static string
+  replace (string&& s, value&& fv, value&& tv, optional<names>&& fs)
+  {
+    bool ic (false), fo (false), lo (false);
+    if (fs)
+    {
+      for (name& f: *fs)
+      {
+        string s (convert<string> (move (f)));
+
+        if (s == "icase")
+          ic = true;
+        else if (s == "first_only")
+          fo = true;
+        else if (s == "last_only")
+          lo = true;
+        else
+          throw invalid_argument ("invalid flag '" + s + '\'');
+      }
+    }
+
+    string f (convert<string> (move (fv)));
+    string t (convert<string> (move (tv)));
+
+    if (f.empty ())
+      throw invalid_argument ("empty <from> substring");
+
+    if (!s.empty ())
+    {
+      // Note that we don't cache s.size () since the string size will be
+      // changing as we are replacing. In fact, we may end up with an empty
+      // string after a replacement.
+
+      size_t fn (f.size ());
+
+      if (fo || lo)
+      {
+        size_t p (lo ? rfind (s, s.size (), f, ic) : find (s, 0, f, ic));
+
+        if (fo && lo && p != string::npos)
+        {
+          if (p != find (s, 0, f, ic))
+            p = string::npos;
+        }
+
+        if (p != string::npos)
+          s.replace (p, fn, t);
+      }
+      else
+      {
+        size_t tn (t.size ());
+
+        for (size_t p (0); (p = find (s, p, f, ic)) != string::npos; p += tn)
+          s.replace (p, fn, t);
+      }
+    }
+
+    return move (s);
+  }
+
+  static size_t
+  find_index (const strings& vs, value&& v, optional<names>&& fs)
+  {
+    bool ic (false);
+    if (fs)
+    {
+      for (name& f: *fs)
+      {
+        string s (convert<string> (move (f)));
+
+        if (s == "icase")
+          ic = true;
+        else
+          throw invalid_argument ("invalid flag '" + s + '\'');
+      }
+    }
+
+    auto i (find_if (vs.begin (), vs.end (),
+                     [ic, y = convert<string> (move (v))] (const string& x)
+                     {
+                       return (ic ? icasecmp (x, y) : x.compare (y)) == 0;
+                     }));
+
+    return i != vs.end () ? i - vs.begin () : vs.size ();
+  }
+
   void
   string_functions (function_map& m)
   {
     function_family f (m, "string");
 
-    f["string"] += [](string s)  {return s;};
-
-    // @@ Shouldn't it concatenate elements into the single string?
-    // @@ Doesn't seem to be used so far. Can consider removing.
+    // Note: leave undocumented since there is no good reason for the user to
+    // call this function (which would be converting string to string).
+    //
+    // Note that we must handle NULL values (relied upon by the parser
+    // to provide conversion semantics consistent with untyped values).
     //
-    // f["string"] += [](strings v) {return v;};
+    f["string"] += [](string* s)
+    {
+      return s != nullptr ? move (*s) : string ();
+    };
 
+    // $string.icasecmp(<untyped>, <untyped>)
+    // $icasecmp(<string>, <string>)
+    //
     // Compare ASCII strings ignoring case and returning the boolean value.
     //
     f["icasecmp"] += [](string x, string y)
@@ -43,7 +266,112 @@ namespace build2
                        convert<string> (move (y))) == 0;
     };
 
-    // Trim.
+    // $string.contains(<untyped>, <untyped>[, <flags>])
+    // $contains(<string>, <string>[, <flags>])
+    //
+    // Check if the string (first argument) contains the given substring
+    // (second argument). The substring must not be empty.
+    //
+    // The following flags are supported:
+    //
+    //     icase  - compare ignoring case
+    //
+    //     once   - check if the substring occurs exactly once
+    //
+    // See also `$string.starts_with()`, `$string.ends_with()`,
+    // `$regex.search()`.
+    //
+    f["contains"] += [](string s, value ss, optional<names> fs)
+    {
+      return contains (move (s), move (ss), move (fs));
+    };
+
+    f[".contains"] += [](names s, value ss, optional<names> fs)
+    {
+      return contains (convert<string> (move (s)), move (ss), move (fs));
+    };
+
+    // $string.starts_with(<untyped>, <untyped>[, <flags>])
+    // $starts_with(<string>, <string>[, <flags>])
+    //
+    // Check if the string (first argument) begins with the given prefix
+    // (second argument). The prefix must not be empty.
+    //
+    // The following flags are supported:
+    //
+    //     icase  - compare ignoring case
+    //
+    // See also `$string.contains()`.
+    //
+    f["starts_with"] += [](string s, value pf, optional<names> fs)
+    {
+      return starts_with (move (s), move (pf), move (fs));
+    };
+
+    f[".starts_with"] += [](names s, value pf, optional<names> fs)
+    {
+      return starts_with (convert<string> (move (s)), move (pf), move (fs));
+    };
+
+    // $string.ends_with(<untyped>, <untyped>[, <flags>])
+    // $ends_with(<string>, <string>[, <flags>])
+    //
+    // Check if the string (first argument) ends with the given suffix (second
+    // argument). The suffix must not be empty.
+    //
+    // The following flags are supported:
+    //
+    //     icase  - compare ignoring case
+    //
+    // See also `$string.contains()`.
+    //
+    f["ends_with"] += [](string s, value sf, optional<names> fs)
+    {
+      return ends_with (move (s), move (sf), move (fs));
+    };
+
+    f[".ends_with"] += [](names s, value sf, optional<names> fs)
+    {
+      return ends_with (convert<string> (move (s)), move (sf), move (fs));
+    };
+
+    // $string.replace(<untyped>, <from>, <to> [, <flags>])
+    // $replace(<string>, <from>, <to> [, <flags>])
+    //
+    // Replace occurences of substring <from> with <to> in a string. The
+    // <from> substring must not be empty.
+    //
+    // The following flags are supported:
+    //
+    //     icase       - compare ignoring case
+    //
+    //     first_only  - only replace the first match
+    //
+    //     last_only   - only replace the last match
+    //
+    //
+    // If both `first_only` and `last_only` flags are specified, then <from>
+    // is replaced only if it occurs in the string once.
+    //
+    // See also `$regex.replace()`.
+    //
+    f["replace"] += [](string s, value f, value t, optional<names> fs)
+    {
+      return replace (move (s), move (f), move (t), move (fs));
+    };
+
+    f[".replace"] += [](names s, value f, value t, optional<names> fs)
+    {
+      return names {
+        name (
+          replace (
+            convert<string> (move (s)), move (f), move (t), move (fs)))};
+    };
+
+    // $string.trim(<untyped>)
+    // $trim(<string>)
+    //
+    // Trim leading and trailing whitespaces in a string.
     //
     f["trim"] += [](string s)
     {
@@ -55,7 +383,12 @@ namespace build2
       return names {name (trim (convert<string> (move (s))))};
     };
 
-    // Convert ASCII strings into lower/upper case.
+    // $string.lcase(<untyped>)
+    // $string.ucase(<untyped>)
+    // $lcase(<string>)
+    // $ucase(<string>)
+    //
+    // Convert ASCII string into lower/upper case.
     //
     f["lcase"] += [](string s)
     {
@@ -77,23 +410,135 @@ namespace build2
       return names {name (ucase (convert<string> (move (s))))};
     };
 
+    // $size(<strings>)
+    // $size(<string-set>)
+    // $size(<string-map>)
+    // $size(<string>)
+    //
+    // First three forms: return the number of elements in the sequence.
+    //
+    // Fourth form: return the number of characters (bytes) in the string.
+    //
+    f["size"] += [] (strings v)             {return v.size ();};
+    f["size"] += [] (set<string> v)         {return v.size ();};
+    f["size"] += [] (map<string, string> v) {return v.size ();};
+    f["size"] += [] (string v)              {return v.size ();};
+
+    // $sort(<strings> [, <flags>])
+    //
+    // Sort strings in ascending order.
+    //
+    // The following flags are supported:
+    //
+    //     icase - sort ignoring case
+    //
+    //     dedup - in addition to sorting also remove duplicates
+    //
+    f["sort"] += [](strings v, optional<names> fs)
+    {
+      bool ic (false);
+      bool dd (false);
+      if (fs)
+      {
+        for (name& f: *fs)
+        {
+          string s (convert<string> (move (f)));
+
+          if (s == "icase")
+            ic = true;
+          else if (s == "dedup")
+            dd = true;
+          else
+            throw invalid_argument ("invalid flag '" + s + '\'');
+        }
+      }
+
+      sort (v.begin (), v.end (),
+            [ic] (const string& x, const string& y)
+            {
+              return (ic ? icasecmp (x, y) : x.compare (y)) < 0;
+            });
+
+      if (dd)
+        v.erase (unique (v.begin(), v.end(),
+                         [ic] (const string& x, const string& y)
+                         {
+                           return (ic ? icasecmp (x, y) : x.compare (y)) == 0;
+                         }),
+                 v.end ());
+
+      return v;
+    };
+
+    // $find(<strings>, <string>[, <flags>])
+    //
+    // Return true if the string sequence contains the specified string.
+    //
+    // The following flags are supported:
+    //
+    //     icase - compare ignoring case
+    //
+    // See also `$regex.find_match()` and `$regex.find_search()`.
+    //
+    f["find"] += [](strings vs, value v, optional<names> fs)
+    {
+      return find_index (vs, move (v), move (fs)) != vs.size ();
+    };
+
+    // $find_index(<strings>, <string>[, <flags>])
+    //
+    // Return the index of the first element in the string sequence that
+    // is equal to the specified string or `$size(strings)` if none is
+    // found.
+    //
+    // The following flags are supported:
+    //
+    //     icase - compare ignoring case
+    //
+    f["find_index"] += [](strings vs, value v, optional<names> fs)
+    {
+      return find_index (vs, move (v), move (fs));
+    };
+
+    // $keys(<string-map>)
+    //
+    // Return the list of keys in a string map.
+    //
+    // Note that the result is sorted in ascending order.
+    //
+    f["keys"] += [](map<string, string> v)
+    {
+      strings r;
+      r.reserve (v.size ());
+      for (pair<const string, string>& p: v)
+        r.push_back (p.first); // @@ PERF: use C++17 map::extract() to steal.
+      return r;
+    };
+
     // String-specific overloads from builtins.
     //
     function_family b (m, "builtin");
 
-    b[".concat"] += [](string l, string r) {l += r; return l;};
+    // Note that we must handle NULL values (relied upon by the parser to
+    // provide concatenation semantics consistent with untyped values).
+    //
+    b[".concat"] += [](string* l, string* r)
+    {
+      return l != nullptr
+        ? r != nullptr ? move (*l += *r) : move (*l)
+        : r != nullptr ? move (*r) : string ();
+    };
 
-    b[".concat"] += [](string l, names ur)
+    b[".concat"] += [](string* l, names* ur)
     {
-      l += convert<string> (move (ur));
-      return l;
+      string r (ur != nullptr ? convert<string> (move (*ur)) : string ());
+      return l != nullptr ? move (*l += r) : move (r);
     };
 
-    b[".concat"] += [](names ul, string r)
+    b[".concat"] += [](names* ul, string* r)
     {
-      string l (convert<string> (move (ul)));
-      l += r;
-      return l;
+      string l (ul != nullptr ? convert<string> (move (*ul)) : string ());
+      return r != nullptr ? move (l += *r) : move (l);
     };
   }
 }