// file      : libbuild2/adhoc-rule-regex-pattern.cxx -*- C++ -*-
// license   : MIT; see accompanying LICENSE file

#include <libbuild2/adhoc-rule-regex-pattern.hxx>

#include <libbutl/regex.hxx>

#include <libbuild2/algorithm.hxx>

namespace build2
{
  using pattern_type = name::pattern_type;

  adhoc_rule_regex_pattern::
  adhoc_rule_regex_pattern (
    const scope& s, string rn, const target_type& tt,
    name&& n, const location& nloc,
    names&& ans, const location& aloc,
    names&& pns, const location& ploc)
      : adhoc_rule_pattern (s, move (rn), tt)
  {
    // Semantically, our rule pattern is one logical regular expression that
    // spans multiple targets and prerequisites with a single back reference
    // (\N) space.
    //
    // To implement this we are going to concatenate all the target and
    // prerequisite sub-patterns separated with a character which cannot
    // appear in the name (nor is a special regex character) but which is
    // printable (for diagnostics). The directory separator (`/`) feels like a
    // natural choice. We will call such a concatenated string of names a
    // "name signature" (we also have a "type signature"; see below) and its
    // pattern a "name signature pattern".
    //
    regex::flag_type flags (regex::ECMAScript);

    // Append the sub-pattern to text_ returning the status of the `e` flag.
    //
    auto append_pattern = [this, &flags, first = true] (
      const string& t,
      const location& loc) mutable -> bool
    {
      size_t n (t.size ()), p (t.rfind (t[0]));

      // Process flags.
      //
      bool fi (false), fe (false);
      for (size_t i (p + 1); i != n; ++i)
      {
        switch (t[i])
        {
        case 'i': fi = true; break;
        case 'e': fe = true; break;
        }
      }

      // For icase we require all or none of the patterns to have it.
      //
      if (first)
      {
        if (fi)
          flags |= regex::icase;
      }
      else if (((flags & regex::icase) != 0) != fi)
        fail (loc) << "inconsistent regex 'i' flag in '" << t << "'";

      if (!first)
        text_ += '/';
      else
        first = false;

      text_.append (t.c_str () + 1, p - 1);

      return fe;
    };

    // Append an element either to targets_ or prereqs_.
    //
    auto append_element = [&s, &append_pattern] (
      vector<element>& v,
      name&& n,
      const location& loc,
      const target_type* tt = nullptr)
    {
      if (tt == nullptr)
      {
        tt = n.untyped () ? &file::static_type : s.find_target_type (n.type);

        if (tt == nullptr)
          fail (loc) << "unknown target type " << n.type;
      }

      bool e (n.pattern                                 &&
              *n.pattern == pattern_type::regex_pattern &&
              append_pattern (n.value, loc));

      v.push_back (element {move (n), *tt, e});
    };

    // This one is always a pattern.
    //
    append_element (targets_, move (n), nloc, &tt);

    // These are all patterns or substitutions.
    //
    for (name& an: ans)
      append_element (targets_, move (an), aloc);

    // These can be patterns, substitutions, or non-patterns.
    //
    for (name& pn: pns)
      append_element (prereqs_, move (pn), ploc);

    try
    {
      regex_ = regex (text_, flags);
    }
    catch (const regex_error& e)
    {
      // Print regex_error description if meaningful (no space).
      //
      // This may not necessarily be pointing at the actual location of the
      // error but it should be close enough.
      //
      fail (nloc) << "invalid regex pattern '" << text_ << "'" << e;
    }
  }

  bool adhoc_rule_regex_pattern::
  match (action a, target& t, const string&, match_extra& me) const
  {
    tracer trace ("adhoc_rule_regex_pattern::match");

    // The plan is as follows: First check the "type signature" of the target
    // and its prerequisites (the primary target type has already been matched
    // by the rule matching machinery). If there is a match, then concatenate
    // their names into a "name signature" in the same way as for sub-patterns
    // above and match that against the name signature regex pattern. If there
    // is a match then this rule matches and the apply_*() functions should be
    // called to process any member/prerequisite substitutions and inject them
    // along with non-pattern prerequisites.
    //
    // It would be natural to perform the type match and concatenation of the
    // names simultaneously. However, while the former should be quite cheap,
    // the latter will most likely require dynamic allocation. To mitigate
    // this we are going to pre-type-match the first prerequisite before
    // concatenating any names. This should weed out most of the non-matches
    // for sane patterns.
    //
    // Note also that we don't backtrack and try different combinations of the
    // type-matching targets/prerequisites. We also ignore prerequisites
    // marked ad hoc for type-matching.
    //
    auto pattern = [] (const element& e) -> bool
    {
      return e.name.pattern && *e.name.pattern == pattern_type::regex_pattern;
    };

    auto find_prereq = [a, &t] (const target_type& tt) -> optional<target_key>
    {
      // We use the standard logic that one would use in the rule::match()
      // implementation.
      //
      for (prerequisite_member p: group_prerequisite_members (a, t))
      {
        if (include (a, t, p) == include_type::normal && p.is_a (tt))
          return p.key ().tk;
      }
      return nullopt;
    };

    // Pre-type-match the first prerequisite, if any.
    //
    auto pe (prereqs_.end ()), pi (find_if (prereqs_.begin (), pe, pattern));

    optional<target_key> pk1;
    if (pi != pe)
    {
      if (!(pk1 = find_prereq (pi->type)))
      {
        l4 ([&]{trace << rule_name << ": no " << pi->type.name
                      << "{} prerequisite for target " << t;});
        return false;
      }
    }

    // Ok, this is a potential match, start concatenating the names.
    //
    // Note that the regex_match_results object (which we will be passing
    // through to apply() in the target's auxiliary data storage) contains
    // iterators pointing to the string being matched. Which means this string
    // must be kept around until we are done with replacing the subsitutions.
    // In fact, we cannot even move it because this may invalidate the
    // iterators (e.g., in case of a small string optimization). So the plan
    // is to store the string in match_extra::buffer and regex_match_results
    // (which we can move) in the auxiliary data storage.
    //
    string& ns (me.buffer);

    auto append_name = [&ns, first = true] (const target_key& tk,
                                            const element& e) mutable
    {
      if (!first)
        ns += '/';
      else
        first = false;

      ns += *tk.name;

      // The same semantics as in variable_type_map::find().
      //
      if (tk.ext && !tk.ext->empty () &&
          (e.match_ext ||
           tk.type->fixed_extension == &target_extension_none ||
           tk.type->fixed_extension == &target_extension_must))
      {
        ns += '.';
        ns += *tk.ext;
      }
    };

    // Primary target (always a pattern).
    //
    auto te (targets_.end ()), ti (targets_.begin ());
    append_name (t.key (), *ti);

    // Match ad hoc group members.
    //
    while ((ti = find_if (ti + 1, te, pattern)) != te)
    {
      const target* at (find_adhoc_member (t, ti->type));

      if (at == nullptr)
      {
        l4 ([&]{trace << rule_name << ": no " << ti->type.name
                      << "{} ad hoc target group member for target " << t;});
        return false;
      }

      append_name (at->key (), *ti);
    }

    // Finish prerequisites.
    //
    if (pi != pe)
    {
      append_name (*pk1, *pi);

      while ((pi = find_if (pi + 1, pe, pattern)) != pe)
      {
        optional<target_key> pk (find_prereq (pi->type));

        if (!pk)
        {
          l4 ([&]{trace << rule_name << ": no " << pi->type.name
                        << "{} prerequisite for target " << t;});
          return false;
        }

        append_name (*pk, *pi);
      }
    }

    // While it can be tempting to optimize this for patterns that don't have
    // any substitutions (which would be most of them), keep in mind that we
    // will also need match_results for $N variables in the recipe (or a C++
    // rule implementation may want to access the match_results object).
    //
    regex_match_results mr;
    if (!regex_match (ns, mr, regex_))
    {
      l4 ([&]{trace << rule_name << ": name signature '" << ns
                    << "' does not match regex '" << text_
                    << "' for target " << t;});
      return false;
    }

    static_assert (sizeof (regex_match_results) <= target::data_size,
                   "insufficient space");
    t.data (move (mr));

    return true;
  }

  static inline string
  substitute (const target& t,
              const regex_match_results& mr,
              const string& s,
              const char* what)
  {
    string r (butl::regex_replace_match_results (
                mr, s.c_str () + 1, s.rfind (s[0]) - 1));

    // @@ Note that while it would have been nice to print the location here,
    //    (and also pass to search()->find_target_type()), we would need to
    //    save location_value in each element to cover multiple declarations.
    //
    if (r.empty ())
      fail << what << " substitution '" << s << "' for target " << t
           << " results in empty name";

    return r;
  }

  void adhoc_rule_regex_pattern::
  apply_adhoc_members (action, target& t, match_extra&) const
  {
    const auto& mr (t.data<regex_match_results> ());

    for (auto i (targets_.begin () + 1); i != targets_.end (); ++i)
    {
      // These are all patterns or substitutions.
      //
      const element& e (*i);

      if (*e.name.pattern == pattern_type::regex_pattern)
        continue;

      // Similar to prerequisites below, we treat member substitutions
      // relative to the target.
      //
      dir_path d;
      if (e.name.dir.empty ())
        d = t.dir; // Absolute and normalized.
      else
      {
        if (e.name.dir.absolute ())
          d = e.name.dir;
        else
          d = t.dir / e.name.dir;

        d.normalize ();
      }

      // @@ TODO: currently this uses type as the ad hoc member identity.
      //
      add_adhoc_member (
        t,
        e.type,
        move (d),
        dir_path () /* out */,
        substitute (t, mr, e.name.value, "ad hoc target group member"));
    }
  }

  void adhoc_rule_regex_pattern::
  apply_prerequisites (action a, target& t, match_extra&) const
  {
    const auto& mr (t.data<regex_match_results> ());

    // Resolve and cache target scope lazily.
    //
    auto base_scope = [&t, bs = (const scope*) nullptr] () mutable
      -> const scope&
    {
      if (bs == nullptr)
        bs = &t.base_scope ();

      return *bs;
    };

    // Re-create the same clean semantics as in match_prerequisite_members().
    //
    bool clean (a.operation () == clean_id && !t.is_a<alias> ());

    auto& pts (t.prerequisite_targets[a]);
    size_t start (pts.size ());

    for (const element& e: prereqs_)
    {
      // While it would be nice to avoid copying here, the semantics of
      // search() (and find_target_type() that it calls) is just too hairy to
      // duplicate and try to optimize. It feels like most of the cases will
      // either fall under the small string optimization or be absolute target
      // names (e.g., imported tools).
      //
      // @@ Perhaps we should try to optimize the absolute target name case?
      //
      // Which scope should we use to resolve this prerequisite? After some
      // meditation it feels natural to use the target's scope for patterns
      // and the rule's scope for non-patterns.
      //
      name n;
      const scope* s;
      if (e.name.pattern)
      {
        if (*e.name.pattern == pattern_type::regex_pattern)
          continue;

        // Note: cannot be project-qualified.
        //
        n = name (e.name.dir,
                  e.name.type,
                  substitute (t, mr, e.name.value, "prerequisite"));
        s = &base_scope ();
      }
      else
      {
        n = e.name;
        s = &rule_scope;
      }

      const target& pt (search (t, move (n), *s, &e.type));

      if (clean && !pt.in (*base_scope ().root_scope ()))
        continue;

      // @@ TODO: it could be handy to mark a prerequisite (e.g., a tool)
      //    ad hoc so that it doesn't interfere with the $< list. Also
      //    clean=false.
      //
      pts.push_back (prerequisite_target (&pt, false /* adhoc */));
    }

    if (start != pts.size ())
      match_members (a, t, pts, start);
  }

  void adhoc_rule_regex_pattern::
  dump (ostream& os) const
  {
    // Targets.
    //
    size_t tn (targets_.size ());

    if (tn != 1)
      os << '<';

    for (size_t i (0); i != tn; ++i)
      os << (i != 0 ? " " : "") << targets_[i].name;

    if (tn != 1)
      os << '>';

    // Prerequisites.
    //
    os << ':';

    for (size_t i (0); i != prereqs_.size (); ++i)
      os << ' ' << prereqs_[i].name;
  }
}