From be30decf3f777786a44b12920ac2d273b1c8d1f8 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Mon, 9 Aug 2021 10:48:19 +0200 Subject: Merge library hashing and collection into single traversal pass It turns out this is a lot faster on deeply-dependent libraries like Boost while not having any noticeable differences for "sane" projects. --- libbuild2/cc/compile-rule.cxx | 4 +- libbuild2/cc/functions.cxx | 12 +- libbuild2/cc/link-rule.cxx | 307 +++++++++++++----------------------------- libbuild2/cc/link-rule.hxx | 13 +- 4 files changed, 106 insertions(+), 230 deletions(-) diff --git a/libbuild2/cc/compile-rule.cxx b/libbuild2/cc/compile-rule.cxx index b2fdba9..26ee566 100644 --- a/libbuild2/cc/compile-rule.cxx +++ b/libbuild2/cc/compile-rule.cxx @@ -6222,8 +6222,8 @@ namespace build2 // auto imp = [] (const target&, bool) { return false; }; - //@@ TODO: implement duplicate suppression and prunning? Reuse - // above machinery. + //@@ TODO: implement duplicate suppression and prunning? Reuse above + // machinery (do before is_a() call). // The same logic as in append_libraries(). // diff --git a/libbuild2/cc/functions.cxx b/libbuild2/cc/functions.cxx index 0394524..7f9a6c2 100644 --- a/libbuild2/cc/functions.cxx +++ b/libbuild2/cc/functions.cxx @@ -224,7 +224,8 @@ namespace build2 action a, const file& l, bool la, linfo li) { m.append_library_options ( - *static_cast (ls), r, bs, a, l, la, li); + *static_cast (ls), r, + bs, a, l, la, li); }}); // $.find_system_header() @@ -322,9 +323,10 @@ namespace build2 bool self (vs.size () > 3 ? convert (vs[3]) : true); - m.append_libraries (*static_cast (ls), r, - bs, - a, l, la, lf, li, self, rel); + m.append_libraries ( + *static_cast (ls), r, + nullptr /* sha256 */, nullptr /* update */, timestamp_unknown, + bs, a, l, la, lf, li, self, rel); }}); // $.lib_rpaths(, [, [, ]]) @@ -391,7 +393,7 @@ namespace build2 if (const file* f = t.is_a ()) { if (m.modules) - m.append_binless_modules (r, bs, a, *f); + m.append_binless_modules (r, nullptr /* sha256 */, bs, a, *f); } else fail << t << " is not an object file target"; diff --git a/libbuild2/cc/link-rule.cxx b/libbuild2/cc/link-rule.cxx index 6e378e9..4497382 100644 --- a/libbuild2/cc/link-rule.cxx +++ b/libbuild2/cc/link-rule.cxx @@ -1661,8 +1661,12 @@ namespace build2 } } + // Append (and optionally hash and detect if rendered out of data) + // libraries to link, recursively. + // void link_rule:: append_libraries (appended_libraries& ls, strings& args, + sha256* cs, bool* update, timestamp mt, const scope& bs, action a, const file& l, bool la, lflags lf, linfo li, bool self, bool rel, @@ -1672,12 +1676,22 @@ namespace build2 { appended_libraries& ls; strings& args; + + sha256* cs; + const dir_path* out_root; + + bool* update; + timestamp mt; + const file& l; action a; linfo li; bool rel; compile_target_types tts; - } d {ls, args, l, a, li, rel, compile_types (li.type)}; + } d {ls, args, + cs, cs != nullptr ? &bs.root_scope ()->out_path () : nullptr, + update, mt, + l, a, li, rel, compile_types (li.type)}; auto imp = [] (const target&, bool la) { @@ -1718,10 +1732,11 @@ namespace build2 if (al != nullptr && al->end != appended_library::npos) // Closed. { // Hoist the elements corresponding to this library to the end. + // Note that we cannot prune the traversal since we need to see the + // last occurrence of each library. // d.ls.hoist (d.args, *al); - return true; // @@ Can we prune here???. But also sha256 version? - // Also in pkgconfig.cxx! + return true; } if (l == nullptr) @@ -1732,7 +1747,12 @@ namespace build2 if (d.li.type != otype::a) { for (const string& n: ns) + { d.args.push_back (n); + + if (d.cs != nullptr) + d.cs->append (n); + } } } else @@ -1765,6 +1785,12 @@ namespace build2 // are automatically handled by process_libraries(). So all we // have to do is implement the "thin archive" logic. // + // We also don't need to do anything special for the out-of-date + // logic: If any of its object files (or the set of its object + // files) changes, then the library will have to be updated as + // well. In other words, we use the library timestamp as a proxy + // for all of its member's timestamps. + // // We may also end up trying to link a non-utility library to a // static library via a utility library (direct linking is taken // care of by perform_update()). So we cut it off here. @@ -1775,6 +1801,11 @@ namespace build2 if (l->mtime () == timestamp_unreal) // Binless. goto done; + // Check if this library renders us out of date. + // + if (d.update != nullptr) + *d.update = *d.update || l->newer (d.mt); + for (const target* pt: l->prerequisite_targets[d.a]) { if (pt == nullptr) @@ -1809,6 +1840,11 @@ namespace build2 if (l->mtime () == timestamp_unreal) // Binless. goto done; + // Check if this library renders us out of date. + // + if (d.update != nullptr) + *d.update = *d.update || l->newer (d.mt); + // On Windows a shared library is a DLL with the import library as // an ad hoc group member. MinGW though can link directly to DLLs // (see search_library() for details). @@ -1844,6 +1880,12 @@ namespace build2 d.args.push_back (move (p)); } + + if (d.cs != nullptr) + { + d.cs->append (f); + hash_path (*d.cs, l->path (), *d.out_root); + } } done: @@ -1890,123 +1932,9 @@ namespace build2 : l.ctx.var_pool[t + (exp ? ".export.loptions" : ".loptions")])); append_options (d.args, *g, var); - } - - return true; - }; - process_libraries (a, bs, li, sys_lib_dirs, - l, la, - lf, imp, lib, opt, self, - lib_cache); - } - - void link_rule:: - append_libraries (sha256& cs, bool& update, timestamp mt, - const scope& bs, action a, - const file& l, bool la, lflags lf, linfo li, - library_cache* lib_cache) const - { - // Note that we don't do any duplicate suppression here: there is no way - // to "hoist" things once they are hashed and hashing only the first - // occurrence could miss changes to the command line (e.g., due to - // "hoisting"). - - struct data - { - sha256& cs; - const dir_path& out_root; - bool& update; - timestamp mt; - linfo li; - } d {cs, bs.root_scope ()->out_path (), update, mt, li}; - - auto imp = [] (const target&, bool la) - { - return la; - }; - - auto lib = [&d, this] ( - const target* const* lc, - const small_vector, 2>& ns, - lflags f, - bool) - { - const file* l (lc != nullptr ? &(*lc)->as () : nullptr); - - if (l == nullptr) - { - if (d.li.type != otype::a) - { - for (const string& n: ns) - d.cs.append (n); - } - } - else - { - bool lu (l->is_a ()); - - if (lu) - { - for (ptrdiff_t i (-1); lc[i] != nullptr; --i) - if (!lc[i]->is_a ()) - return true; - } - - // We also don't need to do anything special for linking a utility - // library to a static library. If any of its object files (or the - // set of its object files) changes, then the library will have to - // be updated as well. In other words, we use the library timestamp - // as a proxy for all of its member's timestamps. - // - // We do need to cut of the static to static linking, just as in - // append_libraries(). - // - if (d.li.type == otype::a && !lu) - return true; - - if (l->mtime () == timestamp_unreal) // Binless. - return true; - - // Check if this library renders us out of date. - // - d.update = d.update || l->newer (d.mt); - - // On Windows a shared library is a DLL with the import library as - // an ad hoc group member. MinGW though can link directly to DLLs - // (see search_library() for details). - // - if (tclass == "windows" && l->is_a ()) - { - if (const libi* li = find_adhoc_member (*l)) - l = li; - } - - d.cs.append (f); - hash_path (d.cs, l->path (), d.out_root); - } - - return true; - }; - - auto opt = [&d, this] (const target& l, - const string& t, - bool com, - bool exp) - { - if (d.li.type == otype::a || !exp) - return true; - - if (const target* g = exp && l.is_a () ? l.group : &l) - { - const variable& var ( - com - ? (exp ? c_export_loptions : c_loptions) - : (t == x - ? (exp ? x_export_loptions : x_loptions) - : l.ctx.var_pool[t + (exp ? ".export.loptions" : ".loptions")])); - - append_options (d.cs, *g, var); + if (d.cs != nullptr) + append_options (*d.cs, *g, var); } return true; @@ -2014,7 +1942,7 @@ namespace build2 process_libraries (a, bs, li, sys_lib_dirs, l, la, - lf, imp, lib, opt, true /* self */, + lf, imp, lib, opt, self, lib_cache); } @@ -2093,12 +2021,6 @@ namespace build2 if (l != nullptr) { - if (!l->is_a ()) - return true; - - if (l->mtime () == timestamp_unreal) // Binless. - return true; - // Suppress duplicates. // // We handle rpath similar to the compilation case by adding the @@ -2108,6 +2030,15 @@ namespace build2 if (find (d.ls.begin (), d.ls.end (), l) != d.ls.end ()) return false; + // Note that these checks are fairly expensive so we do them after + // duplicate suppression. + // + if (!l->is_a ()) + return true; + + if (l->mtime () == timestamp_unreal) // Binless. + return true; + append (ns[0]); d.ls.push_back (l); } @@ -2190,11 +2121,11 @@ namespace build2 } } - // Append object files of bmi{} prerequisites that belong to binless - // libraries. + // Append (and optionally hash while at it) object files of bmi{} + // prerequisites that belong to binless libraries. // void link_rule:: - append_binless_modules (strings& args, + append_binless_modules (strings& args, sha256* cs, const scope& bs, action a, const file& t) const { // Note that here we don't need to hoist anything on duplicate detection @@ -2211,25 +2142,12 @@ namespace build2 if (find (args.begin (), args.end (), p) == args.end ()) { args.push_back (move (p)); - append_binless_modules (args, bs, a, o); - } - } - } - } - void link_rule:: - append_binless_modules (sha256& cs, - const scope& bs, action a, const file& t) const - { - for (const target* pt: t.prerequisite_targets[a]) - { - if (pt != nullptr && - pt->is_a () && - cast_false ((*pt)[b_binless])) - { - const objx& o (*find_adhoc_member (*pt)); - hash_path (cs, o.path (), bs.root_scope ()->out_path ()); - append_binless_modules (cs, bs, a, o); + if (cs != nullptr) + hash_path (*cs, o.path (), bs.root_scope ()->out_path ()); + + append_binless_modules (args, cs, bs, a, o); + } } } } @@ -2581,8 +2499,8 @@ namespace build2 // are to either replicate the exact process twice, first for hashing // then for building or to go ahead and start building and hash the // result. The first approach is probably more efficient while the - // second is simpler. Let's got with the simpler for now (actually it's - // kind of a hybrid). + // second is simpler. Let's got with the simpler for now (also see a + // note on the cost of library dependency graph traversal below). // cstrings args {nullptr}; // Reserve one for config.bin.ar/config.x. strings sargs; // Argument tail with storage. @@ -2854,10 +2772,21 @@ namespace build2 // pinpoint exactly what is causing the update. On the other hand, the // checksum is faster and simpler. And we like simple. // + // Note that originally we only hashed inputs here and then re-collected + // them below. But the double traversal of the library graph proved to + // be way more expensive on libraries with lots of dependencies (like + // Boost) than both collecting and hashing in a single pass. So that's + // what we do now. @@ TODO: it would be beneficial to also merge the + // rpath pass above into this. + // + // See also a similar loop inside append_libraries(). + // + bool seen_obj (false); const file* def (nullptr); // Cached if present. { - sha256 cs; + appended_libraries als; library_cache lc; + sha256 cs; for (const prerequisite_target& p: t.prerequisite_targets[a]) { @@ -2894,8 +2823,8 @@ namespace build2 ((la = (f = pt->is_a ())) || (ls = (f = pt->is_a ()))))) { - // Link all the dependent interface libraries (shared) or interface - // and implementation (static), recursively. + // Link all the dependent interface libraries (shared) or + // interface and implementation (static), recursively. // // Also check if any of them render us out of date. The tricky // case is, say, a utility library (static) that depends on a @@ -2905,12 +2834,21 @@ namespace build2 // if (la || ls) { - append_libraries (cs, update, mt, bs, a, *f, la, p.data, li, &lc); + append_libraries (als, sargs, + &cs, &update, mt, + bs, a, *f, la, p.data, li, true, true, &lc); f = nullptr; // Timestamp checked by hash_libraries(). } else { - hash_path (cs, f->path (), rs.out_path ()); + // Do not hoist libraries over object files since such object + // files might satisfy symbols in the preceding libraries. + // + als.clear (); + + const path& p (f->path ()); + sargs.push_back (relative (p).string ()); + hash_path (cs, p, rs.out_path ()); // @@ Do we actually need to hash this? I don't believe this set // can change without rendering the object file itself out of @@ -2918,7 +2856,9 @@ namespace build2 // marked with bin.binless manually? // if (modules) - append_binless_modules (cs, rs, a, *f); + append_binless_modules (sargs, &cs, bs, a, *f); + + seen_obj = true; } } else if ((f = pt->is_a ())) @@ -3198,63 +3138,6 @@ namespace build2 size_t args_input (args.size ()); #endif - // The same logic as during hashing above. See also a similar loop - // inside append_libraries(). - // - bool seen_obj (false); - { - appended_libraries als; - library_cache lc; - - for (const prerequisite_target& p: t.prerequisite_targets[a]) - { - const target* pt (p.target); - - if (pt == nullptr) - continue; - - if (modules) - { - if (pt->is_a ()) - { - pt = find_adhoc_member (*pt, tts.obj); - - if (pt == nullptr) // Header BMIs have no object file. - continue; - } - } - - const file* f; - bool la (false), ls (false); - - if ((f = pt->is_a ()) || - (!lt.utility && - (la = (f = pt->is_a ()))) || - (!lt.static_library () && - ((la = (f = pt->is_a ())) || - (ls = (f = pt->is_a ()))))) - { - if (la || ls) - append_libraries ( - als, sargs, bs, a, *f, la, p.data, li, true, true, &lc); - else - { - // Do not hoist libraries over object files since such object - // files might satisfy symbols in the preceding libraries. - // - als.clear (); - - sargs.push_back (relative (f->path ()).string ()); - - if (modules) - append_binless_modules (sargs, bs, a, *f); - - seen_obj = true; - } - } - } - } - // For MinGW manifest is an object file. // if (!manifest.empty () && tsys == "mingw32") diff --git a/libbuild2/cc/link-rule.hxx b/libbuild2/cc/link-rule.hxx index fd12e89..c20844d 100644 --- a/libbuild2/cc/link-rule.hxx +++ b/libbuild2/cc/link-rule.hxx @@ -172,17 +172,12 @@ namespace build2 void append_libraries (appended_libraries&, strings&, + sha256*, bool*, timestamp, const scope&, action, const file&, bool, lflags, linfo, bool = true, bool = true, library_cache* = nullptr) const; - void - append_libraries (sha256&, bool&, timestamp, - const scope&, action, - const file&, bool, lflags, linfo, - library_cache* = nullptr) const; - using rpathed_libraries = small_vector; void @@ -197,11 +192,7 @@ namespace build2 const target&, linfo, bool) const; void - append_binless_modules (strings&, - const scope&, action, const file&) const; - - void - append_binless_modules (sha256&, + append_binless_modules (strings&, sha256*, const scope&, action, const file&) const; optional -- cgit v1.1