From f7a245b2b6091ef3a5e1193423c7fbbd6fe6a538 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 15 Dec 2020 09:25:19 +0200 Subject: Cache more results of executing programs (compilers, etc) --- libbuild2/bin/guess.cxx | 109 ++++++++++++++++++++++++++++++--------- libbuild2/bin/guess.hxx | 6 +-- libbuild2/bin/init.cxx | 51 +++++++++--------- libbuild2/cc/guess.cxx | 20 ++----- libbuild2/cc/module.cxx | 85 +++++++++++++++++++++++------- libbuild2/file.cxx | 31 ++++++++--- libbuild2/test/script/parser.hxx | 2 + libbuild2/utility.hxx | 36 ++++++++++++- 8 files changed, 239 insertions(+), 101 deletions(-) (limited to 'libbuild2') diff --git a/libbuild2/bin/guess.cxx b/libbuild2/bin/guess.cxx index 21936d9..9f15030 100644 --- a/libbuild2/bin/guess.cxx +++ b/libbuild2/bin/guess.cxx @@ -54,7 +54,7 @@ namespace build2 { process_path r ( run_try_search (prog, - true /* init */, + false /* init (cached) */, dir_path () /* fallback */, true /* path_only */, paths)); @@ -80,14 +80,33 @@ namespace build2 dr << info << "use " << var << " to override"; }); - return run_search (prog, true, dir_path (), true); + return run_search (prog, false, dir_path (), true); } - ar_info + // Extracting ar/ranlib information requires running them which can become + // expensive if done repeatedly. So we cache the result. + // + static global_cache ar_cache; + + const ar_info& guess_ar (const path& ar, const path* rl, const char* paths) { tracer trace ("bin::guess_ar"); + // First check the cache. + // + string key; + { + sha256 cs; + cs.append (ar.string ()); + if (rl != nullptr) cs.append (rl->string ()); + if (paths != nullptr) cs.append (paths); + key = cs.string (); + + if (const ar_info* r = ar_cache.find (key)) + return *r; + } + guess_result arr, rlr; process_path arp (search (ar, paths, "config.bin.ar")); @@ -307,24 +326,43 @@ namespace build2 fail << "unable to guess " << *rl << " signature"; } - return ar_info { - move (arp), - move (arr.id), - move (arr.signature), - move (arr.checksum), - move (*arr.version), - - move (rlp), - move (rlr.id), - move (rlr.signature), - move (rlr.checksum)}; + return ar_cache.insert (move (key), + ar_info { + move (arp), + move (arr.id), + move (arr.signature), + move (arr.checksum), + move (*arr.version), + + move (rlp), + move (rlr.id), + move (rlr.signature), + move (rlr.checksum)}); } - ld_info + // Extracting ld information requires running it which can become + // expensive if done repeatedly. So we cache the result. + // + static global_cache ld_cache; + + const ld_info& guess_ld (const path& ld, const char* paths) { tracer trace ("bin::guess_ld"); + // First check the cache. + // + string key; + { + sha256 cs; + cs.append (ld.string ()); + if (paths != nullptr) cs.append (paths); + key = cs.string (); + + if (const ld_info* r = ld_cache.find (key)) + return *r; + } + guess_result r; process_path pp (search (ld, paths, "config.bin.ld")); @@ -484,19 +522,38 @@ namespace build2 if (r.empty ()) fail << "unable to guess " << ld << " signature"; - return ld_info { - move (pp), - move (r.id), - move (r.signature), - move (r.checksum), - move (r.version)}; + return ld_cache.insert (move (key), + ld_info { + move (pp), + move (r.id), + move (r.signature), + move (r.checksum), + move (r.version)}); } - rc_info + // Extracting rc information requires running it which can become + // expensive if done repeatedly. So we cache the result. + // + static global_cache rc_cache; + + const rc_info& guess_rc (const path& rc, const char* paths) { tracer trace ("bin::guess_rc"); + // First check the cache. + // + string key; + { + sha256 cs; + cs.append (rc.string ()); + if (paths != nullptr) cs.append (paths); + key = cs.string (); + + if (const rc_info* r = rc_cache.find (key)) + return *r; + } + guess_result r; process_path pp (search (rc, paths, "config.bin.rc")); @@ -575,8 +632,12 @@ namespace build2 if (r.empty ()) fail << "unable to guess " << rc << " signature"; - return rc_info { - move (pp), move (r.id), move (r.signature), move (r.checksum)}; + return rc_cache.insert (move (key), + rc_info { + move (pp), + move (r.id), + move (r.signature), + move (r.checksum)}); } } } diff --git a/libbuild2/bin/guess.hxx b/libbuild2/bin/guess.hxx index 27bb5ed..9a63fa1 100644 --- a/libbuild2/bin/guess.hxx +++ b/libbuild2/bin/guess.hxx @@ -45,7 +45,7 @@ namespace build2 // The ranlib path can be NULL, in which case no ranlib guessing will be // attemplated and the returned ranlib_* members will be left empty. // - ar_info + const ar_info& guess_ar (const path& ar, const path* ranlib, const char* paths); // ld information. @@ -85,7 +85,7 @@ namespace build2 optional version; }; - ld_info + const ld_info& guess_ld (const path& ld, const char* paths); // rc information. @@ -110,7 +110,7 @@ namespace build2 string checksum; }; - rc_info + const rc_info& guess_rc (const path& rc, const char* paths); } } diff --git a/libbuild2/bin/init.cxx b/libbuild2/bin/init.cxx index 49ba518..6d2d2c2 100644 --- a/libbuild2/bin/init.cxx +++ b/libbuild2/bin/init.cxx @@ -683,7 +683,7 @@ namespace build2 nullptr, config::save_default_commented))); - ar_info ari (guess_ar (ar, ranlib, pat.paths)); + const ar_info& ari (guess_ar (ar, ranlib, pat.paths)); // If this is a configuration with new values, then print the report // at verbosity level 2 and up (-v). @@ -723,33 +723,28 @@ namespace build2 } rs.assign ("bin.ar.path") = - process_path_ex (move (ari.ar_path), "ar", ari.ar_checksum); - rs.assign ("bin.ar.id") = move (ari.ar_id); - rs.assign ("bin.ar.signature") = move (ari.ar_signature); - rs.assign ("bin.ar.checksum") = move (ari.ar_checksum); + process_path_ex (ari.ar_path, "ar", ari.ar_checksum); + rs.assign ("bin.ar.id") = ari.ar_id; + rs.assign ("bin.ar.signature") = ari.ar_signature; + rs.assign ("bin.ar.checksum") = ari.ar_checksum; { - semantic_version& v (ari.ar_version); + const semantic_version& v (ari.ar_version); rs.assign ("bin.ar.version") = v.string (); rs.assign ("bin.ar.version.major") = v.major; rs.assign ("bin.ar.version.minor") = v.minor; rs.assign ("bin.ar.version.patch") = v.patch; - rs.assign ("bin.ar.version.build") = move (v.build); + rs.assign ("bin.ar.version.build") = v.build; } if (ranlib != nullptr) { rs.assign ("bin.ranlib.path") = - process_path_ex (move (ari.ranlib_path), - "ranlib", - ari.ranlib_checksum); - rs.assign ("bin.ranlib.id") = - move (ari.ranlib_id); - rs.assign ("bin.ranlib.signature") = - move (ari.ranlib_signature); - rs.assign ("bin.ranlib.checksum") = - move (ari.ranlib_checksum); + process_path_ex (ari.ranlib_path, "ranlib", ari.ranlib_checksum); + rs.assign ("bin.ranlib.id") = ari.ranlib_id; + rs.assign ("bin.ranlib.signature") = ari.ranlib_signature; + rs.assign ("bin.ranlib.checksum") = ari.ranlib_checksum; } } @@ -826,7 +821,7 @@ namespace build2 path (apply_pattern (ld_d, pat.pattern)), config::save_default_commented))); - ld_info ldi (guess_ld (ld, pat.paths)); + const ld_info& ldi (guess_ld (ld, pat.paths)); // If this is a configuration with new values, then print the report // at verbosity level 2 and up (-v). @@ -859,20 +854,20 @@ namespace build2 } rs.assign ("bin.ld.path") = - process_path_ex (move (ldi.path), "ld", ldi.checksum); - rs.assign ("bin.ld.id") = move (ldi.id); - rs.assign ("bin.ld.signature") = move (ldi.signature); - rs.assign ("bin.ld.checksum") = move (ldi.checksum); + process_path_ex (ldi.path, "ld", ldi.checksum); + rs.assign ("bin.ld.id") = ldi.id; + rs.assign ("bin.ld.signature") = ldi.signature; + rs.assign ("bin.ld.checksum") = ldi.checksum; if (ldi.version) { - semantic_version& v (*ldi.version); + const semantic_version& v (*ldi.version); rs.assign ("bin.ld.version") = v.string (); rs.assign ("bin.ld.version.major") = v.major; rs.assign ("bin.ld.version.minor") = v.minor; rs.assign ("bin.ld.version.patch") = v.patch; - rs.assign ("bin.ld.version.build") = move (v.build); + rs.assign ("bin.ld.version.build") = v.build; } } @@ -979,7 +974,7 @@ namespace build2 path (apply_pattern (rc_d, pat.pattern)), config::save_default_commented))); - rc_info rci (guess_rc (rc, pat.paths)); + const rc_info& rci (guess_rc (rc, pat.paths)); // If this is a configuration with new values, then print the report // at verbosity level 2 and up (-v). @@ -994,10 +989,10 @@ namespace build2 } rs.assign ("bin.rc.path") = - process_path_ex (move (rci.path), "rc", rci.checksum); - rs.assign ("bin.rc.id") = move (rci.id); - rs.assign ("bin.rc.signature") = move (rci.signature); - rs.assign ("bin.rc.checksum") = move (rci.checksum); + process_path_ex (rci.path, "rc", rci.checksum); + rs.assign ("bin.rc.id") = rci.id; + rs.assign ("bin.rc.signature") = rci.signature; + rs.assign ("bin.rc.checksum") = rci.checksum; } return true; diff --git a/libbuild2/cc/guess.cxx b/libbuild2/cc/guess.cxx index 5a2e3b3..daa9d9b 100644 --- a/libbuild2/cc/guess.cxx +++ b/libbuild2/cc/guess.cxx @@ -2956,8 +2956,7 @@ namespace build2 // Compiler checks can be expensive (we often need to run the compiler // several times) so we cache the result. // - static map cache; - static mutex cache_mutex; + static global_cache cache; const compiler_info& guess (const char* xm, @@ -2988,11 +2987,8 @@ namespace build2 if (x_lo != nullptr) append_options (cs, *x_lo); key = cs.string (); - mlock l (cache_mutex); - - auto i (cache.find (key)); - if (i != cache.end ()) - return i->second; + if (const compiler_info* r = cache.find (key)) + return *r; } // Parse the user-specified compiler id (config.x.id). @@ -3144,15 +3140,7 @@ namespace build2 r.bin_pattern = p.directory ().representation (); // Trailing slash. } - // It's possible the cache entry already exists, in which case we - // ignore our value. - // - // But what if the compiler information it contains is different? Well, - // we don't generally deal with toolchain changes during the build so we - // ignore this special case as well. - // - mlock l (cache_mutex); - return cache.insert (make_pair (move (key), move (r))).first->second; + return cache.insert (move (key), move (r)); } strings diff --git a/libbuild2/cc/module.cxx b/libbuild2/cc/module.cxx index 14182a2..8241a01 100644 --- a/libbuild2/cc/module.cxx +++ b/libbuild2/cc/module.cxx @@ -338,6 +338,17 @@ namespace build2 # endif #endif + // Extracting search dirs can be expensive (we may need to run the + // compiler several times) so we cache the result. + // + struct search_dirs + { + pair lib; + pair inc; + }; + + static global_cache dirs_cache; + void config_module:: init (scope& rs, const location& loc, const variable_map&) { @@ -440,33 +451,67 @@ namespace build2 pair inc_dirs; const optional>& mod_dirs (xi.sys_mod_dirs); - if (xi.sys_lib_dirs) + if (xi.sys_lib_dirs && xi.sys_inc_dirs) + { lib_dirs = *xi.sys_lib_dirs; + inc_dirs = *xi.sys_inc_dirs; + } else { - switch (xi.class_) + string key; { - case compiler_class::gcc: - lib_dirs = gcc_library_search_dirs (xi.path, rs); - break; - case compiler_class::msvc: - lib_dirs = msvc_library_search_dirs (xi.path, rs); - break; + sha256 cs; + cs.append (static_cast (x_lang)); + cs.append (xi.path.effect_string ()); + append_options (cs, mode); + key = cs.string (); } - } - if (xi.sys_inc_dirs) - inc_dirs = *xi.sys_inc_dirs; - else - { - switch (xi.class_) + // Because the compiler info (xi) is also cached, we can assume that + // if dirs come from there, then they do so consistently. + // + const search_dirs* sd (dirs_cache.find (key)); + + if (xi.sys_lib_dirs) + lib_dirs = *xi.sys_lib_dirs; + else if (sd != nullptr) + lib_dirs = sd->lib; + else + { + switch (xi.class_) + { + case compiler_class::gcc: + lib_dirs = gcc_library_search_dirs (xi.path, rs); + break; + case compiler_class::msvc: + lib_dirs = msvc_library_search_dirs (xi.path, rs); + break; + } + } + + if (xi.sys_inc_dirs) + inc_dirs = *xi.sys_inc_dirs; + else if (sd != nullptr) + inc_dirs = sd->inc; + else + { + switch (xi.class_) + { + case compiler_class::gcc: + inc_dirs = gcc_header_search_dirs (xi.path, rs); + break; + case compiler_class::msvc: + inc_dirs = msvc_header_search_dirs (xi.path, rs); + break; + } + } + + if (sd == nullptr) { - case compiler_class::gcc: - inc_dirs = gcc_header_search_dirs (xi.path, rs); - break; - case compiler_class::msvc: - inc_dirs = msvc_header_search_dirs (xi.path, rs); - break; + search_dirs sd; + if (!xi.sys_lib_dirs) sd.lib = lib_dirs; + if (!xi.sys_inc_dirs) sd.inc = inc_dirs; + dirs_cache.insert (move (key), move (sd)); } } diff --git a/libbuild2/file.cxx b/libbuild2/file.cxx index 219a5f1..396b0fe 100644 --- a/libbuild2/file.cxx +++ b/libbuild2/file.cxx @@ -1698,20 +1698,35 @@ namespace build2 // Note that loading of the metadata is split into two steps, extraction and // parsing, because extraction also serves as validation that the executable // is runnable, what we expected, etc. In other words, we sometimes do the - // extraction without parsing. In this light it would have been more - // efficient for extract to return the running process with a pipe rather - // than the extracted data. But this would complicate the code quite a bit - // plus we don't expect the data to be large, typically. + // extraction without parsing. Actually, this seems to be no longer true but + // we do separate the two acts with some interleaving code (e.g., inserting + // the target). // // Also note that we do not check the export.metadata here leaving it to // the caller to do for both this case and export stub. // + // Finally, at first it may seem that caching the metadata is unnecessary + // since the target state itself serves as a cache (i.e., we try hard to + // avoid re-extracting the metadata). However, if there is no metadata, then + // we will re-run the extraction for every optional import. So we cache that + // case only. Note also that while this is only done during serial load, we + // still have to use MT-safe cache since it could be shared by multiple + // build contexts. + // + static global_cache metadata_cache; + static optional extract_metadata (const process_path& pp, const string& key, bool opt, const location& loc) { + if (opt) + { + if (metadata_cache.find (pp.effect_string ())) + return nullopt; + } + // Note: to ease handling (think patching third-party code) we will always // specify the --build2-metadata option in this single-argument form. // @@ -1754,7 +1769,7 @@ namespace build2 ifdstream is (move (pr.in_ofd), ifdstream::badbit); // Note: no skip! // What are the odds that we will run some unrelated program which - // will keep writing to stdout until we run out of memory reading? + // will keep writing to stdout until we run out of memory reading it? // Apparently non-negligible (see GitHub issue #102). // string r; @@ -1839,7 +1854,10 @@ namespace build2 fail: if (opt) + { + metadata_cache.insert (pp.effect_string (), true); return nullopt; + } else throw failed (); } @@ -2683,10 +2701,7 @@ namespace build2 // the first import will determine the path). // if (r.second) - { r.first.as ().process_path (move (pp)); - r.second.unlock (); - } } // Save the metadata. Note that this happens during the load phase and diff --git a/libbuild2/test/script/parser.hxx b/libbuild2/test/script/parser.hxx index 0c467a5..f8c3f21 100644 --- a/libbuild2/test/script/parser.hxx +++ b/libbuild2/test/script/parser.hxx @@ -4,6 +4,8 @@ #ifndef LIBBUILD2_TEST_SCRIPT_PARSER_HXX #define LIBBUILD2_TEST_SCRIPT_PARSER_HXX +#include + #include #include #include diff --git a/libbuild2/utility.hxx b/libbuild2/utility.hxx index 44819b5..a07d928 100644 --- a/libbuild2/utility.hxx +++ b/libbuild2/utility.hxx @@ -4,6 +4,7 @@ #ifndef LIBBUILD2_UTILITY_HXX #define LIBBUILD2_UTILITY_HXX +#include #include // make_tuple() #include // make_shared() #include // to_string() @@ -18,8 +19,6 @@ #include // combine_hash(), reverse_iterate(), etc #include -#include - #include #include @@ -533,6 +532,39 @@ namespace build2 verbosity, pe, args, forward (f), error, ignore_exit, checksum); } + // Global, MT-safe information cache. Normally used for caching information + // (versions, targets, search paths, etc) extracted from other programs + // (compilers, etc). + // + // The key is normally a hash of all the inputs that can affect the output. + // + // Note that insertion is racy and it's possible the cache entry already + // exists, in which case we ignore our value assuming it is the same. + // + template + class global_cache + { + public: + const T* + find (const string& k) const + { + mlock l (mutex_); + auto i (cache_.find (k)); + return i != cache_.end () ? &i->second : nullptr; + } + + const T& + insert (string k, T v) + { + mlock l (mutex_); + return cache_.insert (make_pair (move (k), move (v))).first->second; + } + + private: + std::map cache_; + mutable mutex mutex_; + }; + // File descriptor streams. // fdpipe -- cgit v1.1