From 939beb11a5ccf58d7fe79a809a1b592c5c9143c0 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 11 Nov 2021 13:20:30 +0200 Subject: Add support for dynamic dependencies in ad hoc Buildscript recipes Specifically, add the new `depdb dyndep` builtin that can be used to extract dynamic dependencies from a program run or a file. For example: obje{hello.o}: cxx{hello} {{ s = $path($<[0]) depdb dyndep $cxx.poptions $cc.poptions --what=header --default-prereq-type=h -- $cxx.path $cxx.poptions $cc.poptions $cxx.mode -M -MG $s diag c++ ($<[0]) o = $path($>) $cxx.path $cxx.poptions $cc.poptions $cc.coptions $cxx.coptions $cxx.mode -o $o -c $s }} Currently only the `make` dependency format is supported. --- libbuild2/dyndep.cxx | 667 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 667 insertions(+) create mode 100644 libbuild2/dyndep.cxx (limited to 'libbuild2/dyndep.cxx') diff --git a/libbuild2/dyndep.cxx b/libbuild2/dyndep.cxx new file mode 100644 index 0000000..51fa7bc --- /dev/null +++ b/libbuild2/dyndep.cxx @@ -0,0 +1,667 @@ +// file : libbuild2/dyndep.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace butl; + +namespace build2 +{ + bool dyndep_rule:: + update (tracer& trace, action a, const target& t, timestamp ts) + { + // In particular, this function is used to make sure header dependencies + // are up to date. + // + // There would normally be a lot of headers for every source file (think + // all the system headers) and just calling execute_direct() on all of + // them can get expensive. At the same time, most of these headers are + // existing files that we will never be updating (again, system headers, + // for example) and the rule that will match them is the fallback + // file_rule. That rule has an optimization: it returns noop_recipe (which + // causes the target state to be automatically set to unchanged) if the + // file is known to be up to date. So we do the update "smartly". + // + const path_target* pt (t.is_a ()); + + if (pt == nullptr) + ts = timestamp_unknown; + + target_state os (t.matched_state (a)); + + if (os == target_state::unchanged) + { + if (ts == timestamp_unknown) + return false; + else + { + // We expect the timestamp to be known (i.e., existing file). + // + timestamp mt (pt->mtime ()); + assert (mt != timestamp_unknown); + return mt > ts; + } + } + else + { + // We only want to return true if our call to execute() actually caused + // an update. In particular, the target could already have been in + // target_state::changed because of the dynamic dependency extraction + // run for some other target. + // + // @@ MT perf: so we are going to switch the phase and execute for + // any generated header. + // + phase_switch ps (t.ctx, run_phase::execute); + target_state ns (execute_direct (a, t)); + + if (ns != os && ns != target_state::unchanged) + { + l6 ([&]{trace << "updated " << t + << "; old state " << os + << "; new state " << ns;}); + return true; + } + else + return ts != timestamp_unknown ? pt->newer (ts, ns) : false; + } + } + + optional dyndep_rule:: + inject_file (tracer& trace, const char* what, + action a, target& t, + const file& pt, + timestamp mt, + bool f) + { + // Even if failing we still use try_match() in order to issue consistent + // (with other places) diagnostics (rather than the generic "not rule to + // update ..."). + // + if (!try_match (a, pt).first) + { + if (!f) + return nullopt; + + diag_record dr; + dr << fail << what << ' ' << pt << " not found and no rule to " + << "generate it"; + + if (verb < 4) + dr << info << "re-run with --verbose=4 for more information"; + } + + bool r (update (trace, a, pt, mt)); + + // Add to our prerequisite target list. + // + t.prerequisite_targets[a].push_back (&pt); + + return r; + } + + // Reverse-lookup target type(s) from file name/extension. + // + // If the list of base target types is specified, then only these types and + // those derived from them are considered. Otherwise, any file-based type is + // considered but not the file type itself. + // + small_vector dyndep_rule:: + map_extension (const scope& bs, + const string& n, const string& e, + const target_type* const* tts) + { + // We will just have to try all of the possible ones, in the "most + // likely to match" order. + // + auto test = [&bs, &n, &e] (const target_type& tt) -> bool + { + if (tt.default_extension != nullptr) + { + // Call the extension derivation function. Here we know that it will + // only use the target type and name from the target key so we can + // pass bogus values for the rest. + // + target_key tk {&tt, nullptr, nullptr, &n, nullopt}; + + // This is like prerequisite search. + // + optional de (tt.default_extension (tk, bs, nullptr, true)); + + return de && *de == e; + } + + return false; + }; + + small_vector r; + + if (tts != nullptr) + { + // @@ What if these types are not known by this project? Maybe this + // should just be unified with the below loop? Need to make sure + // we don't rely on the order in which they are returned. + // + for (const target_type* const* p (tts); *p != nullptr; ++p) + if (test (**p)) + r.push_back (*p); + } + + // Next try target types derived from any of the base types (or file if + // there are no base types). + // + const target_type_map& ttm (bs.root_scope ()->root_extra->target_types); + + for (auto i (ttm.type_begin ()), e (ttm.type_end ()); i != e; ++i) + { + const target_type& dt (i->second); + + if (tts != nullptr) + { + for (const target_type* const* p (tts); *p != nullptr; ++p) + { + const target_type& bt (**p); + + if (dt.is_a (bt)) + { + if (dt != bt && test (dt)) + r.push_back (&dt); + + break; + } + } + } + else + { + // Anything file-derived but not the file itself. + // + if (dt.is_a () && dt != file::static_type && test (dt)) + r.push_back (&dt); + } + } + + return r; + } + + void dyndep_rule:: + append_prefix (tracer& trace, prefix_map& m, const target& t, dir_path d) + { + // If the target directory is a sub-directory of the include directory, + // then the prefix is the difference between the two. Otherwise, leave it + // empty. + // + // The idea here is to make this "canonical" setup work auto-magically + // (using C/C++ #include's as an example): + // + // 1. We include all headers with a prefix, e.g., . + // + // 2. The library target is in the foo/ sub-directory, e.g., /tmp/foo/. + // + // 3. The poptions variable contains -I/tmp. + // + dir_path p (t.dir.sub (d) ? t.dir.leaf (d) : dir_path ()); + + // We use the target's directory as out_base but that doesn't work well + // for targets that are stashed in subdirectories. So as a heuristics we + // are going to also enter the outer directories of the original prefix. + // It is, however, possible, that another directory after this one will + // produce one of these outer prefixes as its original prefix in which + // case we should override it. + // + // So we are going to assign the original prefix priority value 0 + // (highest) and then increment it for each outer prefix. + // + auto enter = [&trace, &m] (dir_path p, dir_path d, size_t prio) + { + auto j (m.lower_bound (p)), e (m.end ()); + + if (j != e && j->first != p) + j = e; + + if (j == m.end ()) + { + if (verb >= 4) + trace << "new mapping for prefix '" << p << "'\n" + << " new mapping to " << d << " priority " << prio; + + m.emplace (move (p), prefix_value {move (d), prio}); + } + else if (p.empty ()) + { + // For prefixless we keep all the entries since for them we have an + // extra check (target must be explicitly spelled out in a buildfile). + // + if (verb >= 4) + trace << "additional mapping for prefix '" << p << "'\n" + << " new mapping to " << d << " priority " << prio; + + // Find the position where to insert according to the priority. + // For equal priorities we use the insertion order. + // + do + { + if (j->second.priority > prio) + break; + } + while (++j != e && j->first == p); + + m.emplace_hint (j, move (p), prefix_value {move (d), prio}); + } + else + { + prefix_value& v (j->second); + + // We used to reject duplicates but it seems this can be reasonably + // expected to work according to the order of, say, -I options. + // + // Seeing that we normally have more "specific" -I paths first, (so + // that we don't pick up installed headers, etc), we ignore it. + // + if (v.directory == d) + { + if (v.priority > prio) + v.priority = prio; + } + else if (v.priority <= prio) + { + if (verb >= 4) + trace << "ignoring mapping for prefix '" << p << "'\n" + << " existing mapping to " << v.directory + << " priority " << v.priority << '\n' + << " another mapping to " << d << " priority " << prio; + } + else + { + if (verb >= 4) + trace << "overriding mapping for prefix '" << p << "'\n" + << " existing mapping to " << v.directory + << " priority " << v.priority << '\n' + << " new mapping to " << d << " priority " << prio; + + v.directory = move (d); + v.priority = prio; + } + } + }; + + // Enter all outer prefixes, including prefixless. + // + // The prefixless part is fuzzy but seems to be doing the right thing + // ignoring/overriding-wise, at least in cases where one of the competing + // include search paths is a subdirectory of another. + // + for (size_t prio (0);; ++prio) + { + bool e (p.empty ()); + enter ((e ? move (p) : p), (e ? move (d) : d), prio); + if (e) + break; + p = p.directory (); + } + } + + bool dyndep_rule::srcout_builder:: + next (dir_path&& d) + { + // Ignore any paths containing '.', '..' components. Allow any directory + // separators though (think -I$src_root/foo on Windows). + // + if (d.absolute () && d.normalized (false)) + { + // If we have a candidate out_base, see if this is its src_base. + // + if (prev_ != nullptr) + { + const dir_path& bp (prev_->src_path ()); + + if (d.sub (bp)) + { + if (diff_.empty () || d.leaf (bp) == diff_) + { + // We've got a pair. + // + map_.emplace (move (d), prev_->out_path () / diff_); + prev_ = nullptr; // Taken. + return true; + } + } + + // Not a pair. Fall through to consider as out_base. + // + prev_ = nullptr; + } + + // See if this path is inside a project with an out-of-tree build and is + // in the out directory tree. + // + const scope& bs (ctx_.scopes.find_out (d)); + if (bs.root_scope () != nullptr) + { + if (!bs.out_eq_src ()) + { + const dir_path& bp (bs.out_path ()); + + bool e; + if ((e = (d == bp)) || d.sub (bp)) + { + prev_ = &bs; + if (e) + diff_.clear (); + else + diff_ = d.leaf (bp); + } + } + } + } + else + prev_ = nullptr; + + return false; + } + + pair dyndep_rule:: + enter_file (tracer& trace, const char* what, + action a, const scope& bs, target& t, + path&& f, bool cache, bool norm, + const function& map_extension, + const target_type& fallback, + const function& get_pfx_map, + const srcout_map& so_map) + { + // Find or maybe insert the target. The directory is only moved from if + // insert is true. Note that it must be normalized. + // + auto find = [&trace, what, &t, + &map_extension, &fallback] (dir_path&& d, + path&& f, + bool insert) -> const file* + { + // Split the file into its name part and extension. Here we can assume + // the name part is a valid filesystem name. + // + // Note that if the file has no extension, we record an empty extension + // rather than NULL (which would signify that the default extension + // should be added). + // + string e (f.extension ()); + string n (move (f).string ()); + + if (!e.empty ()) + n.resize (n.size () - e.size () - 1); // One for the dot. + + // See if this directory is part of any project and if so determine + // the target type. + // + // While at it also determine if this target is from the src or out + // tree of said project. + // + dir_path out; + + // It's possible the extension-to-target type mapping is ambiguous (for + // example, because both C and X-language headers use the same .h + // extension). In this case we will first try to find one that matches + // an explicit target (similar logic to when insert is false). + // + small_vector tts; + + // Note that the path can be in out or src directory and the latter + // can be associated with multiple scopes. So strictly speaking we + // need to pick one that is "associated" with us. But that is still a + // TODO (see scope_map::find() for details) and so for now we just + // pick the first one (it's highly unlikely the source file extension + // mapping will differ based on the configuration). + // + { + const scope& bs (**t.ctx.scopes.find (d).first); + if (const scope* rs = bs.root_scope ()) + { + if (map_extension != nullptr) + tts = map_extension (bs, n, e); + + if (!bs.out_eq_src () && d.sub (bs.src_path ())) + out = out_src (d, *rs); + } + } + + // If it is outside any project, or the project doesn't have such an + // extension, use the fallback target type. + // + if (tts.empty ()) + { + // If the project doesn't "know" this extension then we can't possibly + // find an explicit target of this type. + // + if (!insert) + { + l6 ([&]{trace << "unknown " << what << ' ' << n << " extension '" + << e << "'";}); + return nullptr; + } + + tts.push_back (&fallback); + } + + // Find or insert target. + // + // Note that in case of the target type ambiguity we first try to find + // an explicit target that resolves this ambiguity. + // + const target* r (nullptr); + + if (!insert || tts.size () > 1) + { + // Note that we skip any target type-specific searches (like for an + // existing file) and go straight for the target object since we + // need to find the target explicitly spelled out. + // + // Also, it doesn't feel like we should be able to resolve an + // absolute path with a spelled-out extension to multiple targets. + // + for (const target_type* tt: tts) + { + if ((r = t.ctx.targets.find (*tt, d, out, n, e, trace)) != nullptr) + break; + else + l6 ([&]{trace << "no targe with target type " << tt->name;}); + } + + // Note: we can't do this because of the in-source builds where there + // won't be explicit targets for non-generated files. + // + // This should be harmless, however, since in our world generated file + // are spelled-out as explicit targets. And if not, we will still get + // an error, just a bit less specific. + // +#if 0 + if (r == nullptr && insert) + { + f = d / n; + if (!e.empty ()) + { + f += '.'; + f += e; + } + + diag_record dr (fail); + dr << "ambiguous mapping of " << what ' ' << f << " to target type"; + for (const target_type* tt: tts) + dr << info << "could be " << tt->name << "{}"; + dr << info << "spell-out its target to resolve this ambiguity"; + } +#endif + } + + // @@ OPT: move d, out, n + // + if (r == nullptr && insert) + r = &search (t, *tts[0], d, out, n, &e, nullptr); + + return static_cast (r); + }; + + // If it's not absolute then it either does not (yet) exist or is a + // relative ""-include (see init_args() for details). Reduce the second + // case to absolute. + // + // Note: we now always use absolute path to the translation unit so this + // no longer applies. But let's keep it for posterity. + // +#if 0 + if (f.relative () && rels.relative ()) + { + // If the relative source path has a directory component, make sure it + // matches since ""-include will always start with that (none of the + // compilers we support try to normalize this path). Failed that we may + // end up searching for a generated header in a random (working) + // directory. + // + const string& fs (f.string ()); + const string& ss (rels.string ()); + + size_t p (path::traits::rfind_separator (ss)); + + if (p == string::npos || // No directory. + (fs.size () > p + 1 && + path::traits::compare (fs.c_str (), p, ss.c_str (), p) == 0)) + { + path t (work / f); // The rels path is relative to work. + + if (exists (t)) + f = move (t); + } + } +#endif + + const file* pt (nullptr); + bool remapped (false); + + // If still relative then it does not exist. + // + if (f.relative ()) + { + // This is probably as often an error as an auto-generated file, so + // trace at level 4. + // + l4 ([&]{trace << "non-existent " << what << " '" << f << "'";}); + + f.normalize (); + + // The relative path might still contain '..' (e.g., ../foo.hxx; + // presumably ""-include'ed). We don't attempt to support auto- + // generated files with such inclusion styles. + // + if (get_pfx_map != nullptr && f.normalized ()) + { + const prefix_map& pfx_map (get_pfx_map (a, bs, t)); + + // First try the whole file. Then just the directory. + // + // @@ Has to be a separate map since the prefix can be the same as + // the file name. + // + // auto i (pfx_map->find (f)); + + // Find the most qualified prefix of which we are a sub-path. + // + if (!pfx_map.empty ()) + { + dir_path d (f.directory ()); + auto p (pfx_map.sup_range (d)); + + if (p.first != p.second) + { + // Note that we can only have multiple entries for the + // prefixless mapping. + // + dir_path pd; // Reuse. + for (auto i (p.first); i != p.second; ++i) + { + // Note: value in pfx_map is not necessarily canonical. + // + pd = i->second.directory; + pd.canonicalize (); + + l4 ([&]{trace << "try prefix '" << d << "' mapped to " << pd;}); + + // If this is a prefixless mapping, then only use it if we can + // resolve it to an existing target (i.e., it is explicitly + // spelled out in a buildfile). @@ Hm, I wonder why, it's not + // like we can generate any file without an explicit target. + // Maybe for diagnostics (i.e., we will actually try to build + // something there instead of just saying no mapping). + // + pt = find (pd / d, f.leaf (), !i->first.empty ()); + if (pt != nullptr) + { + f = pd / f; + l4 ([&]{trace << "mapped as auto-generated " << f;}); + break; + } + else + l4 ([&]{trace << "no explicit target in " << pd;}); + } + } + else + l4 ([&]{trace << "no prefix map entry for '" << d << "'";}); + } + else + l4 ([&]{trace << "prefix map is empty";}); + } + } + else + { + // Normalize the path unless it comes from the depdb, in which case + // we've already done that (normally). This is also where we handle + // src-out remap (again, not needed if cached). + // + if (!cache || norm) + normalize_external (f, what); + + if (!cache) + { + if (!so_map.empty ()) + { + // Find the most qualified prefix of which we are a sub-path. + // + auto i (so_map.find_sup (f)); + if (i != so_map.end ()) + { + // Ok, there is an out tree for this file. Remap to a path from + // the out tree and see if there is a target for it. Note that the + // value in so_map is not necessarily canonical. + // + dir_path d (i->second); + d /= f.leaf (i->first).directory (); + d.canonicalize (); + + pt = find (move (d), f.leaf (), false); // d is not moved from. + + if (pt != nullptr) + { + path p (d / f.leaf ()); + l4 ([&]{trace << "remapping " << f << " to " << p;}); + f = move (p); + remapped = true; + } + } + } + } + + if (pt == nullptr) + { + l6 ([&]{trace << "entering " << f;}); + pt = find (f.directory (), f.leaf (), true); + } + } + + return make_pair (pt, remapped); + } +} -- cgit v1.1