1 files changed, 442 insertions, 0 deletions
diff --git a/libbuild2/adhoc-rule-regex-pattern.cxx b/libbuild2/adhoc-rule-regex-pattern.cxx
new file mode 100644
index 0000000..4c8c1e5
--- /dev/null
+++ b/libbuild2/adhoc-rule-regex-pattern.cxx
@@ -0,0 +1,442 @@
+// file      : libbuild2/adhoc-rule-regex-pattern.cxx -*- C++ -*-
+// license   : MIT; see accompanying LICENSE file
+
+#include <libbuild2/adhoc-rule-regex-pattern.hxx>
+
+#include <libbutl/regex.mxx>
+
+#include <libbuild2/algorithm.hxx>
+
+namespace build2
+{
+  using pattern_type = name::pattern_type;
+
+  adhoc_rule_regex_pattern::
+  adhoc_rule_regex_pattern (
+    const scope& s, string rn, const target_type& tt,
+    name&& n, const location& nloc,
+    names&& ans, const location& aloc,
+    names&& pns, const location& ploc)
+      : adhoc_rule_pattern (s, move (rn), tt)
+  {
+    // Semantically, our rule pattern is one logical regular expression that
+    // spans multiple targets and prerequisites with a single back reference
+    // (\N) space.
+    //
+    // To implement this we are going to concatenate all the target and
+    // prerequisite sub-patterns separated with a character which cannot
+    // appear in the name (nor is a special regex character) but which is
+    // printable (for diagnostics). The directory separator (`/`) feels like a
+    // natural choice. We will call such a concatenated string of names a
+    // "name signature" (we also have a "type signature"; see below) and its
+    // pattern a "name signature pattern".
+    //
+    regex::flag_type flags (regex::ECMAScript);
+
+    // Append the sub-pattern to text_ returning the status of the `e` flag.
+    //
+    auto append_pattern = [this, &flags, first = true] (
+      const string& t,
+      const location& loc) mutable -> bool
+    {
+      size_t n (t.size ()), p (t.rfind (t[0]));
+
+      // Process flags.
+      //
+      bool fi (false), fe (false);
+      for (size_t i (p + 1); i != n; ++i)
+      {
+        switch (t[i])
+        {
+        case 'i': fi = true; break;
+        case 'e': fe = true; break;
+        }
+      }
+
+      // For icase we require all or none of the patterns to have it.
+      //
+      if (first)
+      {
+        if (fi)
+          flags |= regex::icase;
+      }
+      else if (((flags & regex::icase) != 0) != fi)
+        fail (loc) << "inconsistent regex 'i' flag in '" << t << "'";
+
+      if (!first)
+        text_ += '/';
+      else
+        first = false;
+
+      text_.append (t.c_str () + 1, p - 1);
+
+      return fe;
+    };
+
+    // Append an element either to targets_ or prereqs_.
+    //
+    auto append_element = [&s, &append_pattern] (
+      vector<element>& v,
+      name&& n,
+      const location& loc,
+      const target_type* tt = nullptr)
+    {
+      if (tt == nullptr)
+      {
+        tt = n.untyped () || n.type == "*"
+        ? &target::static_type
+        : s.find_target_type (n.type);
+
+        if (tt == nullptr)
+          fail (loc) << "unknown target type " << n.type;
+      }
+
+      bool e (n.pattern                                 &&
+              *n.pattern == pattern_type::regex_pattern &&
+              append_pattern (n.value, loc));
+
+      v.push_back (element {move (n), *tt, e});
+    };
+
+    // This one is always a pattern.
+    //
+    append_element (targets_, move (n), nloc, &tt);
+
+    // These are all patterns or substitutions.
+    //
+    for (name& an: ans)
+      append_element (targets_, move (an), aloc);
+
+    // These can be patterns, substitutions, or non-patterns.
+    //
+    for (name& pn: pns)
+      append_element (prereqs_, move (pn), ploc);
+
+    try
+    {
+      regex_ = regex (text_, flags);
+    }
+    catch (const regex_error& e)
+    {
+      // Print regex_error description if meaningful (no space).
+      //
+      // This may not necessarily be pointing at the actual location of the
+      // error but it should be close enough.
+      //
+      fail (nloc) << "invalid regex pattern '" << text_ << "'" << e;
+    }
+  }
+
+  bool adhoc_rule_regex_pattern::
+  match (action a, target& t, const string&, match_extra& me) const
+  {
+    tracer trace ("adhoc_rule_regex_pattern::match");
+
+    // The plan is as follows: First check the "type signature" of the target
+    // and its prerequisites (the primary target type has already been matched
+    // by the rule matching machinery). If there is a match, then concatenate
+    // their names into a "name signature" in the same way as for sub-patterns
+    // above and match that against the name signature regex pattern. If there
+    // is a match then this rule matches and the apply_*() functions should be
+    // called to process any member/prerequisite substitutions and inject them
+    // along with non-pattern prerequisites.
+    //
+    // It would be natural to perform the type match and concatenation of the
+    // names simultaneously. However, while the former should be quite cheap,
+    // the latter will most likely require dynamic allocation. To mitigate
+    // this we are going to pre-type-match the first prerequisite before
+    // concatenating any names. This should weed out most of the non-matches
+    // for sane patterns.
+    //
+    // Note also that we don't backtrack and try different combinations of the
+    // type-matching targets/prerequisites. We also ignore prerequisites
+    // marked ad hoc for type-matching.
+    //
+    auto pattern = [] (const element& e) -> bool
+    {
+      return e.name.pattern && *e.name.pattern == pattern_type::regex_pattern;
+    };
+
+    auto find_prereq = [a, &t] (const target_type& tt) -> optional<target_key>
+    {
+      // We use the standard logic that one would use in the rule::match()
+      // implementation.
+      //
+      for (prerequisite_member p: group_prerequisite_members (a, t))
+      {
+        if (include (a, t, p) == include_type::normal && p.is_a (tt))
+          return p.key ().tk;
+      }
+      return nullopt;
+    };
+
+    // Pre-type-match the first prerequisite, if any.
+    //
+    auto pe (prereqs_.end ()), pi (find_if (prereqs_.begin (), pe, pattern));
+
+    optional<target_key> pk1;
+    if (pi != pe)
+    {
+      if (!(pk1 = find_prereq (pi->type)))
+      {
+        l4 ([&]{trace << rule_name << ": no " << pi->type.name
+                      << "{} prerequisite for target " << t;});
+        return false;
+      }
+    }
+
+    // Ok, this is a potential match, start concatenating the names.
+    //
+    // Note that the regex_match_results object (which we will be passing
+    // through to apply() in the target's auxiliary data storage) contains
+    // iterators pointing to the string being matched. Which means this string
+    // must be kept around until we are done with replacing the subsitutions.
+    // In fact, we cannot even move it because this may invalidate the
+    // iterators (e.g., in case of a small string optimization). So the plan
+    // is to store the string in match_extra::buffer and regex_match_results
+    // (which we can move) in the auxiliary data storage.
+    //
+    string& ns (me.buffer);
+
+    auto append_name = [&ns, first = true] (const target_key& tk,
+                                            const element& e) mutable
+    {
+      if (!first)
+        ns += '/';
+      else
+        first = false;
+
+      ns += *tk.name;
+
+      // The same semantics as in variable_type_map::find().
+      //
+      if (tk.ext && !tk.ext->empty () &&
+          (e.match_ext ||
+           tk.type->fixed_extension == &target_extension_none ||
+           tk.type->fixed_extension == &target_extension_must))
+      {
+        ns += '.';
+        ns += *tk.ext;
+      }
+    };
+
+    // Primary target (always a pattern).
+    //
+    auto te (targets_.end ()), ti (targets_.begin ());
+    append_name (t.key (), *ti);
+
+    // Match ad hoc group members.
+    //
+    while ((ti = find_if (ti + 1, te, pattern)) != te)
+    {
+      const target* at (find_adhoc_member (t, ti->type));
+
+      if (at == nullptr)
+      {
+        l4 ([&]{trace << rule_name << ": no " << ti->type.name
+                      << "{} ad hoc target group member for target " << t;});
+        return false;
+      }
+
+      append_name (at->key (), *ti);
+    }
+
+    // Finish prerequisites.
+    //
+    if (pi != pe)
+    {
+      append_name (*pk1, *pi);
+
+      while ((pi = find_if (pi + 1, pe, pattern)) != pe)
+      {
+        optional<target_key> pk (find_prereq (pi->type));
+
+        if (!pk)
+        {
+          l4 ([&]{trace << rule_name << ": no " << pi->type.name
+                        << "{} prerequisite for target " << t;});
+          return false;
+        }
+
+        append_name (*pk, *pi);
+      }
+    }
+
+    // While it can be tempting to optimize this for patterns that don't have
+    // any substitutions (which would be most of them), keep in mind that we
+    // will also need match_results for $N variables in the recipe (or a C++
+    // rule implementation may want to access the match_results object).
+    //
+    regex_match_results mr;
+    if (!regex_match (ns, mr, regex_))
+    {
+      l4 ([&]{trace << rule_name << ": name signature '" << ns
+                    << "' does not match regex '" << text_
+                    << "' for target " << t;});
+      return false;
+    }
+
+    static_assert (sizeof (regex_match_results) <= target::data_size,
+                   "insufficient space");
+    t.data (move (mr));
+
+    return true;
+  }
+
+  static inline string
+  substitute (const target& t,
+              const regex_match_results& mr,
+              const string& s,
+              const char* what)
+  {
+    string r (butl::regex_replace_match_results (
+                mr, s.c_str () + 1, s.rfind (s[0]) - 1));
+
+    // @@ Note that while it would have been nice to print the location here,
+    //    (and also pass to search()->find_target_type()), we would need to
+    //    save location_value in each element to cover multiple declarations.
+    //
+    if (r.empty ())
+      fail << what << " substitution '" << s << "' for target " << t
+           << " results in empty name";
+
+    return r;
+  }
+
+  void adhoc_rule_regex_pattern::
+  apply_adhoc_members (action, target& t, match_extra&) const
+  {
+    const auto& mr (t.data<regex_match_results> ());
+
+    for (auto i (targets_.begin () + 1); i != targets_.end (); ++i)
+    {
+      // These are all patterns or substitutions.
+      //
+      const element& e (*i);
+
+      if (*e.name.pattern == pattern_type::regex_pattern)
+        continue;
+
+      // Similar to prerequisites below, we treat member substitutions
+      // relative to the target.
+      //
+      dir_path d;
+      if (e.name.dir.empty ())
+        d = t.dir; // Absolute and normalized.
+      else
+      {
+        if (e.name.dir.absolute ())
+          d = e.name.dir;
+        else
+          d = t.dir / e.name.dir;
+
+        d.normalize ();
+      }
+
+      // @@ TODO: currently this uses type as the ad hoc member identity.
+      //
+      add_adhoc_member (
+        t,
+        e.type,
+        move (d),
+        dir_path () /* out */,
+        substitute (t, mr, e.name.value, "ad hoc target group member"));
+    }
+  }
+
+  void adhoc_rule_regex_pattern::
+  apply_prerequisites (action a, target& t, match_extra&) const
+  {
+    const auto& mr (t.data<regex_match_results> ());
+
+    // Resolve and cache target scope lazily.
+    //
+    auto base_scope = [&t, bs = (const scope*) nullptr] () mutable
+      -> const scope&
+    {
+      if (bs == nullptr)
+        bs = &t.base_scope ();
+
+      return *bs;
+    };
+
+    // Re-create the same clean semantics as in match_prerequisite_members().
+    //
+    bool clean (a.operation () == clean_id && !t.is_a<alias> ());
+
+    auto& pts (t.prerequisite_targets[a]);
+    size_t start (pts.size ());
+
+    for (const element& e: prereqs_)
+    {
+      // While it would be nice to avoid copying here, the semantics of
+      // search() (and find_target_type() that it calls) is just too hairy to
+      // duplicate and try to optimize. It feels like most of the cases will
+      // either fall under the small string optimization or be absolute target
+      // names (e.g., imported tools).
+      //
+      // @@ Perhaps we should try to optimize the absolute target name case?
+      //
+      // Which scope should we use to resolve this prerequisite? After some
+      // meditation it feels natural to use the target's scope for patterns
+      // and the rule's scope for non-patterns.
+      //
+      name n;
+      const scope* s;
+      if (e.name.pattern)
+      {
+        if (*e.name.pattern == pattern_type::regex_pattern)
+          continue;
+
+        // Note: cannot be project-qualified.
+        //
+        n = name (e.name.dir,
+                  e.name.type,
+                  substitute (t, mr, e.name.value, "prerequisite"));
+        s = &base_scope ();
+      }
+      else
+      {
+        n = e.name;
+        s = &rule_scope;
+      }
+
+      const target& pt (search (t, move (n), *s, &e.type));
+
+      if (clean && !pt.in (*base_scope ().root_scope ()))
+        continue;
+
+      // @@ TODO: it could be handy to mark a prerequisite (e.g., a tool)
+      //    ad hoc so that it doesn't interfere with the $< list.
+      //
+      pts.push_back (prerequisite_target (&pt, false /* adhoc */));
+    }
+
+    if (start != pts.size ())
+      match_members (a, t, pts, start);
+  }
+
+  void adhoc_rule_regex_pattern::
+  dump (ostream& os) const
+  {
+    // Targets.
+    //
+    size_t tn (targets_.size ());
+
+    if (tn != 1)
+      os << '<';
+
+    for (size_t i (0); i != tn; ++i)
+      os << (i != 0 ? " " : "") << targets_[i].name;
+
+    if (tn != 1)
+      os << '>';
+
+    // Prerequisites.
+    //
+    os << ':';
+
+    for (size_t i (0); i != prereqs_.size (); ++i)
+      os << ' ' << prereqs_[i].name;
+  }
+}