aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2020-12-15 09:25:19 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2020-12-15 09:31:59 +0200
commitf7a245b2b6091ef3a5e1193423c7fbbd6fe6a538 (patch)
tree28b3006840b718084f229820e0408b0712674232
parent8c9b0fb944a60d8193d8ac3dbac4e8e15f81bf57 (diff)
Cache more results of executing programs (compilers, etc)
-rw-r--r--libbuild2/bin/guess.cxx109
-rw-r--r--libbuild2/bin/guess.hxx6
-rw-r--r--libbuild2/bin/init.cxx51
-rw-r--r--libbuild2/cc/guess.cxx20
-rw-r--r--libbuild2/cc/module.cxx85
-rw-r--r--libbuild2/file.cxx31
-rw-r--r--libbuild2/test/script/parser.hxx2
-rw-r--r--libbuild2/utility.hxx36
8 files changed, 239 insertions, 101 deletions
diff --git a/libbuild2/bin/guess.cxx b/libbuild2/bin/guess.cxx
index 21936d9..9f15030 100644
--- a/libbuild2/bin/guess.cxx
+++ b/libbuild2/bin/guess.cxx
@@ -54,7 +54,7 @@ namespace build2
{
process_path r (
run_try_search (prog,
- true /* init */,
+ false /* init (cached) */,
dir_path () /* fallback */,
true /* path_only */,
paths));
@@ -80,14 +80,33 @@ namespace build2
dr << info << "use " << var << " to override";
});
- return run_search (prog, true, dir_path (), true);
+ return run_search (prog, false, dir_path (), true);
}
- ar_info
+ // Extracting ar/ranlib information requires running them which can become
+ // expensive if done repeatedly. So we cache the result.
+ //
+ static global_cache<ar_info> ar_cache;
+
+ const ar_info&
guess_ar (const path& ar, const path* rl, const char* paths)
{
tracer trace ("bin::guess_ar");
+ // First check the cache.
+ //
+ string key;
+ {
+ sha256 cs;
+ cs.append (ar.string ());
+ if (rl != nullptr) cs.append (rl->string ());
+ if (paths != nullptr) cs.append (paths);
+ key = cs.string ();
+
+ if (const ar_info* r = ar_cache.find (key))
+ return *r;
+ }
+
guess_result arr, rlr;
process_path arp (search (ar, paths, "config.bin.ar"));
@@ -307,24 +326,43 @@ namespace build2
fail << "unable to guess " << *rl << " signature";
}
- return ar_info {
- move (arp),
- move (arr.id),
- move (arr.signature),
- move (arr.checksum),
- move (*arr.version),
-
- move (rlp),
- move (rlr.id),
- move (rlr.signature),
- move (rlr.checksum)};
+ return ar_cache.insert (move (key),
+ ar_info {
+ move (arp),
+ move (arr.id),
+ move (arr.signature),
+ move (arr.checksum),
+ move (*arr.version),
+
+ move (rlp),
+ move (rlr.id),
+ move (rlr.signature),
+ move (rlr.checksum)});
}
- ld_info
+ // Extracting ld information requires running it which can become
+ // expensive if done repeatedly. So we cache the result.
+ //
+ static global_cache<ld_info> ld_cache;
+
+ const ld_info&
guess_ld (const path& ld, const char* paths)
{
tracer trace ("bin::guess_ld");
+ // First check the cache.
+ //
+ string key;
+ {
+ sha256 cs;
+ cs.append (ld.string ());
+ if (paths != nullptr) cs.append (paths);
+ key = cs.string ();
+
+ if (const ld_info* r = ld_cache.find (key))
+ return *r;
+ }
+
guess_result r;
process_path pp (search (ld, paths, "config.bin.ld"));
@@ -484,19 +522,38 @@ namespace build2
if (r.empty ())
fail << "unable to guess " << ld << " signature";
- return ld_info {
- move (pp),
- move (r.id),
- move (r.signature),
- move (r.checksum),
- move (r.version)};
+ return ld_cache.insert (move (key),
+ ld_info {
+ move (pp),
+ move (r.id),
+ move (r.signature),
+ move (r.checksum),
+ move (r.version)});
}
- rc_info
+ // Extracting rc information requires running it which can become
+ // expensive if done repeatedly. So we cache the result.
+ //
+ static global_cache<rc_info> rc_cache;
+
+ const rc_info&
guess_rc (const path& rc, const char* paths)
{
tracer trace ("bin::guess_rc");
+ // First check the cache.
+ //
+ string key;
+ {
+ sha256 cs;
+ cs.append (rc.string ());
+ if (paths != nullptr) cs.append (paths);
+ key = cs.string ();
+
+ if (const rc_info* r = rc_cache.find (key))
+ return *r;
+ }
+
guess_result r;
process_path pp (search (rc, paths, "config.bin.rc"));
@@ -575,8 +632,12 @@ namespace build2
if (r.empty ())
fail << "unable to guess " << rc << " signature";
- return rc_info {
- move (pp), move (r.id), move (r.signature), move (r.checksum)};
+ return rc_cache.insert (move (key),
+ rc_info {
+ move (pp),
+ move (r.id),
+ move (r.signature),
+ move (r.checksum)});
}
}
}
diff --git a/libbuild2/bin/guess.hxx b/libbuild2/bin/guess.hxx
index 27bb5ed..9a63fa1 100644
--- a/libbuild2/bin/guess.hxx
+++ b/libbuild2/bin/guess.hxx
@@ -45,7 +45,7 @@ namespace build2
// The ranlib path can be NULL, in which case no ranlib guessing will be
// attemplated and the returned ranlib_* members will be left empty.
//
- ar_info
+ const ar_info&
guess_ar (const path& ar, const path* ranlib, const char* paths);
// ld information.
@@ -85,7 +85,7 @@ namespace build2
optional<semantic_version> version;
};
- ld_info
+ const ld_info&
guess_ld (const path& ld, const char* paths);
// rc information.
@@ -110,7 +110,7 @@ namespace build2
string checksum;
};
- rc_info
+ const rc_info&
guess_rc (const path& rc, const char* paths);
}
}
diff --git a/libbuild2/bin/init.cxx b/libbuild2/bin/init.cxx
index 49ba518..6d2d2c2 100644
--- a/libbuild2/bin/init.cxx
+++ b/libbuild2/bin/init.cxx
@@ -683,7 +683,7 @@ namespace build2
nullptr,
config::save_default_commented)));
- ar_info ari (guess_ar (ar, ranlib, pat.paths));
+ const ar_info& ari (guess_ar (ar, ranlib, pat.paths));
// If this is a configuration with new values, then print the report
// at verbosity level 2 and up (-v).
@@ -723,33 +723,28 @@ namespace build2
}
rs.assign<process_path_ex> ("bin.ar.path") =
- process_path_ex (move (ari.ar_path), "ar", ari.ar_checksum);
- rs.assign<string> ("bin.ar.id") = move (ari.ar_id);
- rs.assign<string> ("bin.ar.signature") = move (ari.ar_signature);
- rs.assign<string> ("bin.ar.checksum") = move (ari.ar_checksum);
+ process_path_ex (ari.ar_path, "ar", ari.ar_checksum);
+ rs.assign<string> ("bin.ar.id") = ari.ar_id;
+ rs.assign<string> ("bin.ar.signature") = ari.ar_signature;
+ rs.assign<string> ("bin.ar.checksum") = ari.ar_checksum;
{
- semantic_version& v (ari.ar_version);
+ const semantic_version& v (ari.ar_version);
rs.assign<string> ("bin.ar.version") = v.string ();
rs.assign<uint64_t> ("bin.ar.version.major") = v.major;
rs.assign<uint64_t> ("bin.ar.version.minor") = v.minor;
rs.assign<uint64_t> ("bin.ar.version.patch") = v.patch;
- rs.assign<string> ("bin.ar.version.build") = move (v.build);
+ rs.assign<string> ("bin.ar.version.build") = v.build;
}
if (ranlib != nullptr)
{
rs.assign<process_path_ex> ("bin.ranlib.path") =
- process_path_ex (move (ari.ranlib_path),
- "ranlib",
- ari.ranlib_checksum);
- rs.assign<string> ("bin.ranlib.id") =
- move (ari.ranlib_id);
- rs.assign<string> ("bin.ranlib.signature") =
- move (ari.ranlib_signature);
- rs.assign<string> ("bin.ranlib.checksum") =
- move (ari.ranlib_checksum);
+ process_path_ex (ari.ranlib_path, "ranlib", ari.ranlib_checksum);
+ rs.assign<string> ("bin.ranlib.id") = ari.ranlib_id;
+ rs.assign<string> ("bin.ranlib.signature") = ari.ranlib_signature;
+ rs.assign<string> ("bin.ranlib.checksum") = ari.ranlib_checksum;
}
}
@@ -826,7 +821,7 @@ namespace build2
path (apply_pattern (ld_d, pat.pattern)),
config::save_default_commented)));
- ld_info ldi (guess_ld (ld, pat.paths));
+ const ld_info& ldi (guess_ld (ld, pat.paths));
// If this is a configuration with new values, then print the report
// at verbosity level 2 and up (-v).
@@ -859,20 +854,20 @@ namespace build2
}
rs.assign<process_path_ex> ("bin.ld.path") =
- process_path_ex (move (ldi.path), "ld", ldi.checksum);
- rs.assign<string> ("bin.ld.id") = move (ldi.id);
- rs.assign<string> ("bin.ld.signature") = move (ldi.signature);
- rs.assign<string> ("bin.ld.checksum") = move (ldi.checksum);
+ process_path_ex (ldi.path, "ld", ldi.checksum);
+ rs.assign<string> ("bin.ld.id") = ldi.id;
+ rs.assign<string> ("bin.ld.signature") = ldi.signature;
+ rs.assign<string> ("bin.ld.checksum") = ldi.checksum;
if (ldi.version)
{
- semantic_version& v (*ldi.version);
+ const semantic_version& v (*ldi.version);
rs.assign<string> ("bin.ld.version") = v.string ();
rs.assign<uint64_t> ("bin.ld.version.major") = v.major;
rs.assign<uint64_t> ("bin.ld.version.minor") = v.minor;
rs.assign<uint64_t> ("bin.ld.version.patch") = v.patch;
- rs.assign<string> ("bin.ld.version.build") = move (v.build);
+ rs.assign<string> ("bin.ld.version.build") = v.build;
}
}
@@ -979,7 +974,7 @@ namespace build2
path (apply_pattern (rc_d, pat.pattern)),
config::save_default_commented)));
- rc_info rci (guess_rc (rc, pat.paths));
+ const rc_info& rci (guess_rc (rc, pat.paths));
// If this is a configuration with new values, then print the report
// at verbosity level 2 and up (-v).
@@ -994,10 +989,10 @@ namespace build2
}
rs.assign<process_path_ex> ("bin.rc.path") =
- process_path_ex (move (rci.path), "rc", rci.checksum);
- rs.assign<string> ("bin.rc.id") = move (rci.id);
- rs.assign<string> ("bin.rc.signature") = move (rci.signature);
- rs.assign<string> ("bin.rc.checksum") = move (rci.checksum);
+ process_path_ex (rci.path, "rc", rci.checksum);
+ rs.assign<string> ("bin.rc.id") = rci.id;
+ rs.assign<string> ("bin.rc.signature") = rci.signature;
+ rs.assign<string> ("bin.rc.checksum") = rci.checksum;
}
return true;
diff --git a/libbuild2/cc/guess.cxx b/libbuild2/cc/guess.cxx
index 5a2e3b3..daa9d9b 100644
--- a/libbuild2/cc/guess.cxx
+++ b/libbuild2/cc/guess.cxx
@@ -2956,8 +2956,7 @@ namespace build2
// Compiler checks can be expensive (we often need to run the compiler
// several times) so we cache the result.
//
- static map<string, compiler_info> cache;
- static mutex cache_mutex;
+ static global_cache<compiler_info> cache;
const compiler_info&
guess (const char* xm,
@@ -2988,11 +2987,8 @@ namespace build2
if (x_lo != nullptr) append_options (cs, *x_lo);
key = cs.string ();
- mlock l (cache_mutex);
-
- auto i (cache.find (key));
- if (i != cache.end ())
- return i->second;
+ if (const compiler_info* r = cache.find (key))
+ return *r;
}
// Parse the user-specified compiler id (config.x.id).
@@ -3144,15 +3140,7 @@ namespace build2
r.bin_pattern = p.directory ().representation (); // Trailing slash.
}
- // It's possible the cache entry already exists, in which case we
- // ignore our value.
- //
- // But what if the compiler information it contains is different? Well,
- // we don't generally deal with toolchain changes during the build so we
- // ignore this special case as well.
- //
- mlock l (cache_mutex);
- return cache.insert (make_pair (move (key), move (r))).first->second;
+ return cache.insert (move (key), move (r));
}
strings
diff --git a/libbuild2/cc/module.cxx b/libbuild2/cc/module.cxx
index 14182a2..8241a01 100644
--- a/libbuild2/cc/module.cxx
+++ b/libbuild2/cc/module.cxx
@@ -338,6 +338,17 @@ namespace build2
# endif
#endif
+ // Extracting search dirs can be expensive (we may need to run the
+ // compiler several times) so we cache the result.
+ //
+ struct search_dirs
+ {
+ pair<dir_paths, size_t> lib;
+ pair<dir_paths, size_t> inc;
+ };
+
+ static global_cache<search_dirs> dirs_cache;
+
void config_module::
init (scope& rs, const location& loc, const variable_map&)
{
@@ -440,33 +451,67 @@ namespace build2
pair<dir_paths, size_t> inc_dirs;
const optional<pair<dir_paths, size_t>>& mod_dirs (xi.sys_mod_dirs);
- if (xi.sys_lib_dirs)
+ if (xi.sys_lib_dirs && xi.sys_inc_dirs)
+ {
lib_dirs = *xi.sys_lib_dirs;
+ inc_dirs = *xi.sys_inc_dirs;
+ }
else
{
- switch (xi.class_)
+ string key;
{
- case compiler_class::gcc:
- lib_dirs = gcc_library_search_dirs (xi.path, rs);
- break;
- case compiler_class::msvc:
- lib_dirs = msvc_library_search_dirs (xi.path, rs);
- break;
+ sha256 cs;
+ cs.append (static_cast<size_t> (x_lang));
+ cs.append (xi.path.effect_string ());
+ append_options (cs, mode);
+ key = cs.string ();
}
- }
- if (xi.sys_inc_dirs)
- inc_dirs = *xi.sys_inc_dirs;
- else
- {
- switch (xi.class_)
+ // Because the compiler info (xi) is also cached, we can assume that
+ // if dirs come from there, then they do so consistently.
+ //
+ const search_dirs* sd (dirs_cache.find (key));
+
+ if (xi.sys_lib_dirs)
+ lib_dirs = *xi.sys_lib_dirs;
+ else if (sd != nullptr)
+ lib_dirs = sd->lib;
+ else
+ {
+ switch (xi.class_)
+ {
+ case compiler_class::gcc:
+ lib_dirs = gcc_library_search_dirs (xi.path, rs);
+ break;
+ case compiler_class::msvc:
+ lib_dirs = msvc_library_search_dirs (xi.path, rs);
+ break;
+ }
+ }
+
+ if (xi.sys_inc_dirs)
+ inc_dirs = *xi.sys_inc_dirs;
+ else if (sd != nullptr)
+ inc_dirs = sd->inc;
+ else
+ {
+ switch (xi.class_)
+ {
+ case compiler_class::gcc:
+ inc_dirs = gcc_header_search_dirs (xi.path, rs);
+ break;
+ case compiler_class::msvc:
+ inc_dirs = msvc_header_search_dirs (xi.path, rs);
+ break;
+ }
+ }
+
+ if (sd == nullptr)
{
- case compiler_class::gcc:
- inc_dirs = gcc_header_search_dirs (xi.path, rs);
- break;
- case compiler_class::msvc:
- inc_dirs = msvc_header_search_dirs (xi.path, rs);
- break;
+ search_dirs sd;
+ if (!xi.sys_lib_dirs) sd.lib = lib_dirs;
+ if (!xi.sys_inc_dirs) sd.inc = inc_dirs;
+ dirs_cache.insert (move (key), move (sd));
}
}
diff --git a/libbuild2/file.cxx b/libbuild2/file.cxx
index 219a5f1..396b0fe 100644
--- a/libbuild2/file.cxx
+++ b/libbuild2/file.cxx
@@ -1698,20 +1698,35 @@ namespace build2
// Note that loading of the metadata is split into two steps, extraction and
// parsing, because extraction also serves as validation that the executable
// is runnable, what we expected, etc. In other words, we sometimes do the
- // extraction without parsing. In this light it would have been more
- // efficient for extract to return the running process with a pipe rather
- // than the extracted data. But this would complicate the code quite a bit
- // plus we don't expect the data to be large, typically.
+ // extraction without parsing. Actually, this seems to be no longer true but
+ // we do separate the two acts with some interleaving code (e.g., inserting
+ // the target).
//
// Also note that we do not check the export.metadata here leaving it to
// the caller to do for both this case and export stub.
//
+ // Finally, at first it may seem that caching the metadata is unnecessary
+ // since the target state itself serves as a cache (i.e., we try hard to
+ // avoid re-extracting the metadata). However, if there is no metadata, then
+ // we will re-run the extraction for every optional import. So we cache that
+ // case only. Note also that while this is only done during serial load, we
+ // still have to use MT-safe cache since it could be shared by multiple
+ // build contexts.
+ //
+ static global_cache<bool> metadata_cache;
+
static optional<string>
extract_metadata (const process_path& pp,
const string& key,
bool opt,
const location& loc)
{
+ if (opt)
+ {
+ if (metadata_cache.find (pp.effect_string ()))
+ return nullopt;
+ }
+
// Note: to ease handling (think patching third-party code) we will always
// specify the --build2-metadata option in this single-argument form.
//
@@ -1754,7 +1769,7 @@ namespace build2
ifdstream is (move (pr.in_ofd), ifdstream::badbit); // Note: no skip!
// What are the odds that we will run some unrelated program which
- // will keep writing to stdout until we run out of memory reading?
+ // will keep writing to stdout until we run out of memory reading it?
// Apparently non-negligible (see GitHub issue #102).
//
string r;
@@ -1839,7 +1854,10 @@ namespace build2
fail:
if (opt)
+ {
+ metadata_cache.insert (pp.effect_string (), true);
return nullopt;
+ }
else
throw failed ();
}
@@ -2683,10 +2701,7 @@ namespace build2
// the first import will determine the path).
//
if (r.second)
- {
r.first.as<exe> ().process_path (move (pp));
- r.second.unlock ();
- }
}
// Save the metadata. Note that this happens during the load phase and
diff --git a/libbuild2/test/script/parser.hxx b/libbuild2/test/script/parser.hxx
index 0c467a5..f8c3f21 100644
--- a/libbuild2/test/script/parser.hxx
+++ b/libbuild2/test/script/parser.hxx
@@ -4,6 +4,8 @@
#ifndef LIBBUILD2_TEST_SCRIPT_PARSER_HXX
#define LIBBUILD2_TEST_SCRIPT_PARSER_HXX
+#include <unordered_map>
+
#include <libbuild2/types.hxx>
#include <libbuild2/forward.hxx>
#include <libbuild2/utility.hxx>
diff --git a/libbuild2/utility.hxx b/libbuild2/utility.hxx
index 44819b5..a07d928 100644
--- a/libbuild2/utility.hxx
+++ b/libbuild2/utility.hxx
@@ -4,6 +4,7 @@
#ifndef LIBBUILD2_UTILITY_HXX
#define LIBBUILD2_UTILITY_HXX
+#include <map>
#include <tuple> // make_tuple()
#include <memory> // make_shared()
#include <string> // to_string()
@@ -18,8 +19,6 @@
#include <libbutl/utility.mxx> // combine_hash(), reverse_iterate(), etc
#include <libbutl/fdstream.mxx>
-#include <unordered_set>
-
#include <libbuild2/types.hxx>
#include <libbuild2/forward.hxx>
@@ -533,6 +532,39 @@ namespace build2
verbosity, pe, args, forward<F> (f), error, ignore_exit, checksum);
}
+ // Global, MT-safe information cache. Normally used for caching information
+ // (versions, targets, search paths, etc) extracted from other programs
+ // (compilers, etc).
+ //
+ // The key is normally a hash of all the inputs that can affect the output.
+ //
+ // Note that insertion is racy and it's possible the cache entry already
+ // exists, in which case we ignore our value assuming it is the same.
+ //
+ template <typename T>
+ class global_cache
+ {
+ public:
+ const T*
+ find (const string& k) const
+ {
+ mlock l (mutex_);
+ auto i (cache_.find (k));
+ return i != cache_.end () ? &i->second : nullptr;
+ }
+
+ const T&
+ insert (string k, T v)
+ {
+ mlock l (mutex_);
+ return cache_.insert (make_pair (move (k), move (v))).first->second;
+ }
+
+ private:
+ std::map<string, T> cache_;
+ mutable mutex mutex_;
+ };
+
// File descriptor streams.
//
fdpipe