aboutsummaryrefslogtreecommitdiff
path: root/libbuild2/adhoc-rule-regex-pattern.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'libbuild2/adhoc-rule-regex-pattern.cxx')
-rw-r--r--libbuild2/adhoc-rule-regex-pattern.cxx442
1 files changed, 442 insertions, 0 deletions
diff --git a/libbuild2/adhoc-rule-regex-pattern.cxx b/libbuild2/adhoc-rule-regex-pattern.cxx
new file mode 100644
index 0000000..4c8c1e5
--- /dev/null
+++ b/libbuild2/adhoc-rule-regex-pattern.cxx
@@ -0,0 +1,442 @@
+// file : libbuild2/adhoc-rule-regex-pattern.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/adhoc-rule-regex-pattern.hxx>
+
+#include <libbutl/regex.mxx>
+
+#include <libbuild2/algorithm.hxx>
+
+namespace build2
+{
+ using pattern_type = name::pattern_type;
+
+ adhoc_rule_regex_pattern::
+ adhoc_rule_regex_pattern (
+ const scope& s, string rn, const target_type& tt,
+ name&& n, const location& nloc,
+ names&& ans, const location& aloc,
+ names&& pns, const location& ploc)
+ : adhoc_rule_pattern (s, move (rn), tt)
+ {
+ // Semantically, our rule pattern is one logical regular expression that
+ // spans multiple targets and prerequisites with a single back reference
+ // (\N) space.
+ //
+ // To implement this we are going to concatenate all the target and
+ // prerequisite sub-patterns separated with a character which cannot
+ // appear in the name (nor is a special regex character) but which is
+ // printable (for diagnostics). The directory separator (`/`) feels like a
+ // natural choice. We will call such a concatenated string of names a
+ // "name signature" (we also have a "type signature"; see below) and its
+ // pattern a "name signature pattern".
+ //
+ regex::flag_type flags (regex::ECMAScript);
+
+ // Append the sub-pattern to text_ returning the status of the `e` flag.
+ //
+ auto append_pattern = [this, &flags, first = true] (
+ const string& t,
+ const location& loc) mutable -> bool
+ {
+ size_t n (t.size ()), p (t.rfind (t[0]));
+
+ // Process flags.
+ //
+ bool fi (false), fe (false);
+ for (size_t i (p + 1); i != n; ++i)
+ {
+ switch (t[i])
+ {
+ case 'i': fi = true; break;
+ case 'e': fe = true; break;
+ }
+ }
+
+ // For icase we require all or none of the patterns to have it.
+ //
+ if (first)
+ {
+ if (fi)
+ flags |= regex::icase;
+ }
+ else if (((flags & regex::icase) != 0) != fi)
+ fail (loc) << "inconsistent regex 'i' flag in '" << t << "'";
+
+ if (!first)
+ text_ += '/';
+ else
+ first = false;
+
+ text_.append (t.c_str () + 1, p - 1);
+
+ return fe;
+ };
+
+ // Append an element either to targets_ or prereqs_.
+ //
+ auto append_element = [&s, &append_pattern] (
+ vector<element>& v,
+ name&& n,
+ const location& loc,
+ const target_type* tt = nullptr)
+ {
+ if (tt == nullptr)
+ {
+ tt = n.untyped () || n.type == "*"
+ ? &target::static_type
+ : s.find_target_type (n.type);
+
+ if (tt == nullptr)
+ fail (loc) << "unknown target type " << n.type;
+ }
+
+ bool e (n.pattern &&
+ *n.pattern == pattern_type::regex_pattern &&
+ append_pattern (n.value, loc));
+
+ v.push_back (element {move (n), *tt, e});
+ };
+
+ // This one is always a pattern.
+ //
+ append_element (targets_, move (n), nloc, &tt);
+
+ // These are all patterns or substitutions.
+ //
+ for (name& an: ans)
+ append_element (targets_, move (an), aloc);
+
+ // These can be patterns, substitutions, or non-patterns.
+ //
+ for (name& pn: pns)
+ append_element (prereqs_, move (pn), ploc);
+
+ try
+ {
+ regex_ = regex (text_, flags);
+ }
+ catch (const regex_error& e)
+ {
+ // Print regex_error description if meaningful (no space).
+ //
+ // This may not necessarily be pointing at the actual location of the
+ // error but it should be close enough.
+ //
+ fail (nloc) << "invalid regex pattern '" << text_ << "'" << e;
+ }
+ }
+
+ bool adhoc_rule_regex_pattern::
+ match (action a, target& t, const string&, match_extra& me) const
+ {
+ tracer trace ("adhoc_rule_regex_pattern::match");
+
+ // The plan is as follows: First check the "type signature" of the target
+ // and its prerequisites (the primary target type has already been matched
+ // by the rule matching machinery). If there is a match, then concatenate
+ // their names into a "name signature" in the same way as for sub-patterns
+ // above and match that against the name signature regex pattern. If there
+ // is a match then this rule matches and the apply_*() functions should be
+ // called to process any member/prerequisite substitutions and inject them
+ // along with non-pattern prerequisites.
+ //
+ // It would be natural to perform the type match and concatenation of the
+ // names simultaneously. However, while the former should be quite cheap,
+ // the latter will most likely require dynamic allocation. To mitigate
+ // this we are going to pre-type-match the first prerequisite before
+ // concatenating any names. This should weed out most of the non-matches
+ // for sane patterns.
+ //
+ // Note also that we don't backtrack and try different combinations of the
+ // type-matching targets/prerequisites. We also ignore prerequisites
+ // marked ad hoc for type-matching.
+ //
+ auto pattern = [] (const element& e) -> bool
+ {
+ return e.name.pattern && *e.name.pattern == pattern_type::regex_pattern;
+ };
+
+ auto find_prereq = [a, &t] (const target_type& tt) -> optional<target_key>
+ {
+ // We use the standard logic that one would use in the rule::match()
+ // implementation.
+ //
+ for (prerequisite_member p: group_prerequisite_members (a, t))
+ {
+ if (include (a, t, p) == include_type::normal && p.is_a (tt))
+ return p.key ().tk;
+ }
+ return nullopt;
+ };
+
+ // Pre-type-match the first prerequisite, if any.
+ //
+ auto pe (prereqs_.end ()), pi (find_if (prereqs_.begin (), pe, pattern));
+
+ optional<target_key> pk1;
+ if (pi != pe)
+ {
+ if (!(pk1 = find_prereq (pi->type)))
+ {
+ l4 ([&]{trace << rule_name << ": no " << pi->type.name
+ << "{} prerequisite for target " << t;});
+ return false;
+ }
+ }
+
+ // Ok, this is a potential match, start concatenating the names.
+ //
+ // Note that the regex_match_results object (which we will be passing
+ // through to apply() in the target's auxiliary data storage) contains
+ // iterators pointing to the string being matched. Which means this string
+ // must be kept around until we are done with replacing the subsitutions.
+ // In fact, we cannot even move it because this may invalidate the
+ // iterators (e.g., in case of a small string optimization). So the plan
+ // is to store the string in match_extra::buffer and regex_match_results
+ // (which we can move) in the auxiliary data storage.
+ //
+ string& ns (me.buffer);
+
+ auto append_name = [&ns, first = true] (const target_key& tk,
+ const element& e) mutable
+ {
+ if (!first)
+ ns += '/';
+ else
+ first = false;
+
+ ns += *tk.name;
+
+ // The same semantics as in variable_type_map::find().
+ //
+ if (tk.ext && !tk.ext->empty () &&
+ (e.match_ext ||
+ tk.type->fixed_extension == &target_extension_none ||
+ tk.type->fixed_extension == &target_extension_must))
+ {
+ ns += '.';
+ ns += *tk.ext;
+ }
+ };
+
+ // Primary target (always a pattern).
+ //
+ auto te (targets_.end ()), ti (targets_.begin ());
+ append_name (t.key (), *ti);
+
+ // Match ad hoc group members.
+ //
+ while ((ti = find_if (ti + 1, te, pattern)) != te)
+ {
+ const target* at (find_adhoc_member (t, ti->type));
+
+ if (at == nullptr)
+ {
+ l4 ([&]{trace << rule_name << ": no " << ti->type.name
+ << "{} ad hoc target group member for target " << t;});
+ return false;
+ }
+
+ append_name (at->key (), *ti);
+ }
+
+ // Finish prerequisites.
+ //
+ if (pi != pe)
+ {
+ append_name (*pk1, *pi);
+
+ while ((pi = find_if (pi + 1, pe, pattern)) != pe)
+ {
+ optional<target_key> pk (find_prereq (pi->type));
+
+ if (!pk)
+ {
+ l4 ([&]{trace << rule_name << ": no " << pi->type.name
+ << "{} prerequisite for target " << t;});
+ return false;
+ }
+
+ append_name (*pk, *pi);
+ }
+ }
+
+ // While it can be tempting to optimize this for patterns that don't have
+ // any substitutions (which would be most of them), keep in mind that we
+ // will also need match_results for $N variables in the recipe (or a C++
+ // rule implementation may want to access the match_results object).
+ //
+ regex_match_results mr;
+ if (!regex_match (ns, mr, regex_))
+ {
+ l4 ([&]{trace << rule_name << ": name signature '" << ns
+ << "' does not match regex '" << text_
+ << "' for target " << t;});
+ return false;
+ }
+
+ static_assert (sizeof (regex_match_results) <= target::data_size,
+ "insufficient space");
+ t.data (move (mr));
+
+ return true;
+ }
+
+ static inline string
+ substitute (const target& t,
+ const regex_match_results& mr,
+ const string& s,
+ const char* what)
+ {
+ string r (butl::regex_replace_match_results (
+ mr, s.c_str () + 1, s.rfind (s[0]) - 1));
+
+ // @@ Note that while it would have been nice to print the location here,
+ // (and also pass to search()->find_target_type()), we would need to
+ // save location_value in each element to cover multiple declarations.
+ //
+ if (r.empty ())
+ fail << what << " substitution '" << s << "' for target " << t
+ << " results in empty name";
+
+ return r;
+ }
+
+ void adhoc_rule_regex_pattern::
+ apply_adhoc_members (action, target& t, match_extra&) const
+ {
+ const auto& mr (t.data<regex_match_results> ());
+
+ for (auto i (targets_.begin () + 1); i != targets_.end (); ++i)
+ {
+ // These are all patterns or substitutions.
+ //
+ const element& e (*i);
+
+ if (*e.name.pattern == pattern_type::regex_pattern)
+ continue;
+
+ // Similar to prerequisites below, we treat member substitutions
+ // relative to the target.
+ //
+ dir_path d;
+ if (e.name.dir.empty ())
+ d = t.dir; // Absolute and normalized.
+ else
+ {
+ if (e.name.dir.absolute ())
+ d = e.name.dir;
+ else
+ d = t.dir / e.name.dir;
+
+ d.normalize ();
+ }
+
+ // @@ TODO: currently this uses type as the ad hoc member identity.
+ //
+ add_adhoc_member (
+ t,
+ e.type,
+ move (d),
+ dir_path () /* out */,
+ substitute (t, mr, e.name.value, "ad hoc target group member"));
+ }
+ }
+
+ void adhoc_rule_regex_pattern::
+ apply_prerequisites (action a, target& t, match_extra&) const
+ {
+ const auto& mr (t.data<regex_match_results> ());
+
+ // Resolve and cache target scope lazily.
+ //
+ auto base_scope = [&t, bs = (const scope*) nullptr] () mutable
+ -> const scope&
+ {
+ if (bs == nullptr)
+ bs = &t.base_scope ();
+
+ return *bs;
+ };
+
+ // Re-create the same clean semantics as in match_prerequisite_members().
+ //
+ bool clean (a.operation () == clean_id && !t.is_a<alias> ());
+
+ auto& pts (t.prerequisite_targets[a]);
+ size_t start (pts.size ());
+
+ for (const element& e: prereqs_)
+ {
+ // While it would be nice to avoid copying here, the semantics of
+ // search() (and find_target_type() that it calls) is just too hairy to
+ // duplicate and try to optimize. It feels like most of the cases will
+ // either fall under the small string optimization or be absolute target
+ // names (e.g., imported tools).
+ //
+ // @@ Perhaps we should try to optimize the absolute target name case?
+ //
+ // Which scope should we use to resolve this prerequisite? After some
+ // meditation it feels natural to use the target's scope for patterns
+ // and the rule's scope for non-patterns.
+ //
+ name n;
+ const scope* s;
+ if (e.name.pattern)
+ {
+ if (*e.name.pattern == pattern_type::regex_pattern)
+ continue;
+
+ // Note: cannot be project-qualified.
+ //
+ n = name (e.name.dir,
+ e.name.type,
+ substitute (t, mr, e.name.value, "prerequisite"));
+ s = &base_scope ();
+ }
+ else
+ {
+ n = e.name;
+ s = &rule_scope;
+ }
+
+ const target& pt (search (t, move (n), *s, &e.type));
+
+ if (clean && !pt.in (*base_scope ().root_scope ()))
+ continue;
+
+ // @@ TODO: it could be handy to mark a prerequisite (e.g., a tool)
+ // ad hoc so that it doesn't interfere with the $< list.
+ //
+ pts.push_back (prerequisite_target (&pt, false /* adhoc */));
+ }
+
+ if (start != pts.size ())
+ match_members (a, t, pts, start);
+ }
+
+ void adhoc_rule_regex_pattern::
+ dump (ostream& os) const
+ {
+ // Targets.
+ //
+ size_t tn (targets_.size ());
+
+ if (tn != 1)
+ os << '<';
+
+ for (size_t i (0); i != tn; ++i)
+ os << (i != 0 ? " " : "") << targets_[i].name;
+
+ if (tn != 1)
+ os << '>';
+
+ // Prerequisites.
+ //
+ os << ':';
+
+ for (size_t i (0); i != prereqs_.size (); ++i)
+ os << ' ' << prereqs_[i].name;
+ }
+}