From 7a4f1ae238b918fe21671d0d1a5549aeb1fe6425 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 16 Mar 2021 11:36:10 +0200 Subject: Define intermediate build results file cache interface --- libbuild2/algorithm.hxx | 2 +- libbuild2/build/script/parser.test.cxx | 4 +- libbuild2/cc/compile-rule.cxx | 129 +++++++++++++------- libbuild2/cc/compile-rule.hxx | 6 +- libbuild2/context.cxx | 2 + libbuild2/context.hxx | 3 + libbuild2/file-cache.hxx | 209 +++++++++++++++++++++++++++++++++ libbuild2/file-cache.ixx | 38 ++++++ libbuild2/function.test.cxx | 4 +- libbuild2/module.cxx | 1 + libbuild2/test/script/parser.test.cxx | 4 +- 11 files changed, 354 insertions(+), 48 deletions(-) create mode 100644 libbuild2/file-cache.hxx create mode 100644 libbuild2/file-cache.ixx (limited to 'libbuild2') diff --git a/libbuild2/algorithm.hxx b/libbuild2/algorithm.hxx index aa1336c..90159d3 100644 --- a/libbuild2/algorithm.hxx +++ b/libbuild2/algorithm.hxx @@ -757,7 +757,7 @@ namespace build2 // the prerequisites in the reverse order. // // You can also clean extra files derived from ad hoc group members that are - // "indexed" using using their target types (see add/find_adhoc_member() for + // "indexed" using their target types (see add/find_adhoc_member() for // details). // // Note that if the target path is empty then it is assumed "unreal" and is diff --git a/libbuild2/build/script/parser.test.cxx b/libbuild2/build/script/parser.test.cxx index 4a3e8cc..a277102 100644 --- a/libbuild2/build/script/parser.test.cxx +++ b/libbuild2/build/script/parser.test.cxx @@ -10,6 +10,7 @@ #include #include #include +#include #include // line #include @@ -179,7 +180,8 @@ namespace build2 // scheduler sched (1); global_mutexes mutexes (1); - context ctx (sched, mutexes); + file_cache fcache (sched); + context ctx (sched, mutexes, fcache); try { diff --git a/libbuild2/cc/compile-rule.cxx b/libbuild2/cc/compile-rule.cxx index 116f67b..e394175 100644 --- a/libbuild2/cc/compile-rule.cxx +++ b/libbuild2/cc/compile-rule.cxx @@ -188,7 +188,7 @@ namespace build2 bool touch = false; // Target needs to be touched. timestamp mt = timestamp_unknown; // Target timestamp. prerequisite_member src; - auto_rmfile psrc; // Preprocessed source, if any. + file_cache::entry psrc; // Preprocessed source, if any. path dd; // Dependency database path. size_t header_units = 0; // Number of imported header units. module_positions modules = {0, 0, 0}; // Positions of imported modules. @@ -1100,7 +1100,7 @@ namespace build2 // If we have no #include directives (or header unit imports), then // skip header dependency extraction. // - pair psrc (auto_rmfile (), false); + pair psrc (file_cache::entry (), false); if (md.pp < preprocessed::includes) { // Note: trace is used in a test. @@ -1292,22 +1292,33 @@ namespace build2 { md.psrc = move (psrc.first); + // Now is also the right time to unpin the cache entry (we don't do + // it earlier because parse_unit() may need to read it). + // + md.psrc.unpin (); + // Without modules keeping the (partially) preprocessed output // around doesn't buy us much: if the source/headers haven't changed // then neither will the object file. Modules make things more // interesting: now we may have to recompile an otherwise unchanged - // translation unit because a BMI it depends on has changed. In this - // case re-processing the translation unit would be a waste and - // compiling the original source would break distributed + // translation unit because a named module BMI it depends on has + // changed. In this case re-processing the translation unit would be + // a waste and compiling the original source would break distributed // compilation. // // Note also that the long term trend will (hopefully) be for // modularized projects to get rid of #include's which means the // need for producing this partially preprocessed output will - // (hopefully) gradually disappear. + // (hopefully) gradually disappear. Or not, most C headers will stay + // headers, and probably not importable. + // + // @@ TODO: no use keeping it if there are no named module imports + // (but see also file_cache::create() hint, and + // extract_headers() the cache case: there we just assume + // it exists if modules is true). // if (modules) - md.psrc.active = false; // Keep. + md.psrc.temporary = false; // Keep. } // Above we may have ignored changes to the translation unit. The @@ -3109,7 +3120,7 @@ namespace build2 // header unit BMI is out-of-date, then we have to re-preprocess this // translation unit. // - pair compile_rule:: + pair compile_rule:: extract_headers (action a, const scope& bs, file& t, @@ -3123,11 +3134,13 @@ namespace build2 { tracer trace (x, "compile_rule::extract_headers"); + context& ctx (t.ctx); + otype ot (li.type); bool reprocess (cast_false (t[c_reprocess])); - auto_rmfile psrc; + file_cache::entry psrc; bool puse (true); // If things go wrong (and they often do in this area), give the user a @@ -3346,6 +3359,8 @@ namespace build2 &so_map, this] (bool& gen) -> const path* { + context& ctx (t.ctx); + const path* r (nullptr); if (args.empty ()) // First call. @@ -3489,7 +3504,7 @@ namespace build2 // See if this path is inside a project with an out-of- // tree build and is in the out directory tree. // - const scope& bs (t.ctx.scopes.find (d)); + const scope& bs (ctx.scopes.find (d)); if (bs.root_scope () != nullptr) { const dir_path& bp (bs.out_path ()); @@ -3580,16 +3595,16 @@ namespace build2 msvc_sanitize_cl (args); - psrc = auto_rmfile (t.path () + x_pext); + psrc = ctx.fcache.create (t.path () + x_pext, !modules); if (fc) { args.push_back ("/Fi:"); - args.push_back (psrc.path.string ().c_str ()); + args.push_back (psrc.path ().string ().c_str ()); } else { - out = "/Fi" + psrc.path.string (); + out = "/Fi" + psrc.path ().string (); args.push_back (out.c_str ()); } @@ -3724,9 +3739,9 @@ namespace build2 // Preprocessor output. // - psrc = auto_rmfile (t.path () + x_pext); + psrc = ctx.fcache.create (t.path () + x_pext, !modules); args.push_back ("-o"); - args.push_back (psrc.path.string ().c_str ()); + args.push_back (psrc.path ().string ().c_str ()); } else { @@ -4005,8 +4020,9 @@ namespace build2 // around (see apply() for details). // return modules - ? make_pair (auto_rmfile (t.path () + x_pext, false), true) - : make_pair (auto_rmfile (), false); + ? make_pair (ctx.fcache.create_existing (t.path () + x_pext), + true) + : make_pair (file_cache::entry (), false); } // This can be a header or a header unit (mapping). @@ -4059,7 +4075,7 @@ namespace build2 // Bail out early if we have deferred a failure. // - return make_pair (auto_rmfile (), false); + return make_pair (file_cache::entry (), false); } } } @@ -4073,6 +4089,13 @@ namespace build2 if (args.empty () || gen != args_gen) drmp = init_args (gen); + // If we are producing the preprocessed output, get its write + // handle. + // + file_cache::write psrcw (psrc + ? psrc.init_new () + : file_cache::write ()); + if (verb >= 3) print_process (args.data ()); // Disable pipe mode. @@ -4559,7 +4582,7 @@ namespace build2 if (md.deferred_failure) { is.close (); - return make_pair (auto_rmfile (), false); + return make_pair (file_cache::entry (), false); } // In case of VC, we are parsing stderr and if things go @@ -4703,6 +4726,13 @@ namespace build2 } else run_finish (args, pr); // Throws. + + // Success. + // + assert (!restart); + + if (psrc) + psrcw.close (); } catch (const process_error& e) { @@ -4727,7 +4757,7 @@ namespace build2 // dd.expect (""); - puse = puse && !reprocess && !psrc.path.empty (); + puse = puse && !reprocess && psrc; return make_pair (move (psrc), puse); } @@ -4740,7 +4770,7 @@ namespace build2 file& t, linfo li, const file& src, - auto_rmfile& psrc, + file_cache::entry& psrc, const match_data& md, const path& dd, unit& tu) const @@ -4791,8 +4821,8 @@ namespace build2 // may extend cc.reprocess to allow specifying where reprocessing is // needed). // - ps = !psrc.path.empty () && !reprocess; - sp = &(ps ? psrc.path : src.path ()); + ps = psrc && !reprocess; + sp = &(ps ? psrc.path () : src.path ()); // VC's preprocessed output, if present, is fully preprocessed. // @@ -4931,11 +4961,16 @@ namespace build2 for (;;) // Breakout loop. try { - // Disarm the removal of the preprocessed file in case of an error. - // We re-arm it below. + // If we are compiling the preprocessed output, get its read handle. // - if (ps) - psrc.active = false; + file_cache::read psrcr (ps ? psrc.open () : file_cache::read ()); + + // Temporarily disable the removal of the preprocessed file in case of + // an error. We re-enable it below. + // + bool ptmp (ps && psrc.temporary); + if (ptmp) + psrc.temporary = false; process pr; @@ -4973,8 +5008,8 @@ namespace build2 if (pr.wait ()) { - if (ps) - psrc.active = true; // Re-arm. + if (ptmp) + psrc.temporary = true; // Re-enable. unit_type& ut (tu.type); module_info& mi (tu.module_info); @@ -7045,14 +7080,14 @@ namespace build2 // If we have the (partially) preprocessed output, switch to that. // - bool psrc (!md.psrc.path.empty ()); - bool pact (md.psrc.active); + bool psrc (md.psrc); + bool ptmp (psrc && md.psrc.temporary); if (psrc) { args.pop_back (); // nullptr args.pop_back (); // sp - sp = &md.psrc.path; + sp = &md.psrc.path (); // This should match with how we setup preprocessing. // @@ -7099,10 +7134,11 @@ namespace build2 // Let's keep the preprocessed file in case of an error but only at // verbosity level 3 and up (when one actually sees it mentioned on - // the command line). We also have to re-arm on success (see below). + // the command line). We also have to re-enable on success (see + // below). // - if (pact && verb >= 3) - md.psrc.active = false; + if (ptmp && verb >= 3) + md.psrc.temporary = false; } if (verb >= 3) @@ -7121,6 +7157,10 @@ namespace build2 { try { + // If we are compiling the preprocessed output, get its read handle. + // + file_cache::read psrcr (psrc ? md.psrc.open () : file_cache::read ()); + // VC cl.exe sends diagnostics to stdout. It also prints the file // name being compiled as the first line. So for cl.exe we redirect // stdout to a pipe, filter that noise out, and send the rest to @@ -7178,8 +7218,8 @@ namespace build2 // Remove preprocessed file (see above). // - if (pact && verb >= 3) - md.psrc.active = true; + if (ptmp && verb >= 3) + md.psrc.temporary = true; // Clang's module compilation requires two separate compiler // invocations. @@ -7251,14 +7291,21 @@ namespace build2 { const file& t (xt.as ()); + // Compressed preprocessed file extension. + // + auto cpext = [this, &t, s = string ()] () mutable -> const char* + { + return (s = t.ctx.fcache.compressed_extension (x_pext)).c_str (); + }; + clean_extras extras; switch (ctype) { - case compiler_type::gcc: extras = {".d", x_pext, ".t"}; break; - case compiler_type::clang: extras = {".d", x_pext}; break; - case compiler_type::msvc: extras = {".d", x_pext, ".idb", ".pdb"};break; - case compiler_type::icc: extras = {".d"}; break; + case compiler_type::gcc: extras = {".d", x_pext, cpext (), ".t"}; break; + case compiler_type::clang: extras = {".d", x_pext, cpext ()}; break; + case compiler_type::msvc: extras = {".d", x_pext, cpext (), ".idb", ".pdb"};break; + case compiler_type::icc: extras = {".d"}; break; } return perform_clean_extra (a, t, extras); diff --git a/libbuild2/cc/compile-rule.hxx b/libbuild2/cc/compile-rule.hxx index edff1d8..917acd1 100644 --- a/libbuild2/cc/compile-rule.hxx +++ b/libbuild2/cc/compile-rule.hxx @@ -8,7 +8,7 @@ #include #include -#include // auto_rmfile +#include #include #include @@ -140,14 +140,14 @@ namespace build2 optional inject_header (action, file&, const file&, timestamp, bool) const; - pair + pair extract_headers (action, const scope&, file&, linfo, const file&, match_data&, depdb&, bool&, timestamp, module_imports&) const; string parse_unit (action, file&, linfo, - const file&, auto_rmfile&, + const file&, file_cache::entry&, const match_data&, const path&, unit&) const; diff --git a/libbuild2/context.cxx b/libbuild2/context.cxx index df06aa8..e8f671d 100644 --- a/libbuild2/context.cxx +++ b/libbuild2/context.cxx @@ -58,6 +58,7 @@ namespace build2 context:: context (scheduler& s, global_mutexes& ms, + file_cache& fc, bool mo, bool nem, bool dr, @@ -68,6 +69,7 @@ namespace build2 : data_ (new data (*this)), sched (s), mutexes (ms), + fcache (fc), match_only (mo), no_external_modules (nem), dry_run_option (dr), diff --git a/libbuild2/context.hxx b/libbuild2/context.hxx index edbf780..f25db85 100644 --- a/libbuild2/context.hxx +++ b/libbuild2/context.hxx @@ -20,6 +20,7 @@ namespace build2 { + class file_cache; class loaded_modules_lock; class LIBBUILD2_SYMEXPORT run_phase_mutex @@ -141,6 +142,7 @@ namespace build2 public: scheduler& sched; global_mutexes& mutexes; + file_cache& fcache; // Match only flag (see --match-only but also dist). // @@ -490,6 +492,7 @@ namespace build2 explicit context (scheduler&, global_mutexes&, + file_cache&, bool match_only = false, bool no_external_modules = false, bool dry_run = false, diff --git a/libbuild2/file-cache.hxx b/libbuild2/file-cache.hxx new file mode 100644 index 0000000..4486307 --- /dev/null +++ b/libbuild2/file-cache.hxx @@ -0,0 +1,209 @@ +// file : libbuild2/file-cache.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_FILE_CACHE_HXX +#define LIBBUILD2_FILE_CACHE_HXX + +#include +#include +#include + +#include + +namespace build2 +{ + // We sometimes have intermediate build results that must be stored and + // accessed as files (for example, partially-preprocessed C/C++ translation + // units; those .i/.ii files). These files can be quite large which can lead + // to excessive disk usage (for example, the .ii files can be several MB + // each and can end up dominating object file sizes in a build with debug + // information). These files are also often temporary which means writing + // them to disk is really a waste. + // + // The file cache attempts to address this by still presenting a file-like + // entry (which can be a real file or a named pipe) but potentially storing + // the file contents in memory and/or compressed. + // + // Each cache entry is identified by the filesystem entry path that will be + // written to or read from. The file cache reserves a filesystem entry path + // that is derived by adding a compression extension to the main entry path + // (for example, .ii.lz4). When cleaning intermediate build results that are + // managed by the cache, the rule must clean such a reserved path in + // addition to the main entry path (see compressed_extension() below). + // + // While the cache is MT-safe (that is, we can insert multiple entries + // concurrently), each entry is expected to be accessed serially by a single + // thread. Furthermore, each entry can either be written to or read from at + // any give time and it can only be read from by a single reader at a time. + // In other words, there meant to be a single cache entry for any given path + // and it is not meant to be shared. + // + // The underlying filesystem entry can be either temporary or permanent. A + // temporary entry only exists during the build, normally between the match + // and execute phases. A permanent entry exists across builds. Note, + // however, that a permanent entry is often removed in cases of an error and + // sometimes a temporary entry is left behind for diagnostics. It is also + // possible that the distinction only becomes known some time after the + // entry has been created. As a result, all entries by default start as + // temporary and can later be made permanent if desired. + // + // A cache entry can be pinned or unpinned. A cache entry is created pinned. + // A cache entry being written to or read from remains pinned. + // + // An unpinned entry can be preempted. Preempting a cache entry can mean any + // of the following: + // + // - An in-memory content is compressed (but stays in memory). + // + // - An in-memory content (compressed or not) is flushed to disk (with or + // without compression). + // + // - An uncompressed on-disk content is compressed. + // + // Naturally, any of the above degrees of preemption make accessing the + // contents of a cache entry slower. Note also that pinned/unpinned and + // temporary/permanent are independent and a temporary entry does not need + // to be unpinned to be removed. + // + // After creation, a cache entry must be initialized by either writing new + // contents to the filesystem entry or by using an existing (permanent) + // filesystem entry. Once initialized, an entry can be opened for reading, + // potentially multiple times. + // + // Note also that a noop implementation of this caching semantics (that is, + // one that simply saves the file on disk) is file_cache::entry that is just + // auto_rmfile. + // + class /*LIBBUILD2_SYMEXPORT*/ file_cache + { + public: + + // A cache entry write handle. During the lifetime of this object the + // filesystem entry can be opened for writing and written to. + // + // A successful write must be terminated with an explicit call to close() + // (similar semantics to ofdstream). A write handle that is destroyed + // without a close() call is treated as an unsuccessful write and the + // initialization can be attempted again. + // + struct write + { + void close () {} + }; + + // A cache entry read handle. During the lifetime of this object the + // filesystem entry can be opened for reading and read from. + // + struct read + { + }; + + // A cache entry handle. When it is destroyed, a temporary entry is + // automatically removed from the filesystem. + // + struct entry + { + using path_type = build2::path; + + bool temporary; + + // The returned reference is valid and stable for the lifetime of the + // entry handle. + // + const path_type& + path () const {return path_;} + + // Initialization. + // + write + init_new () + { + return write (); + } + + void + init_existing () {} + + // Reading. + // + read + open () + { + return read (); + } + + // Pinning. + // + void + pin () {} + + void + unpin () {} + + // NULL entry handle. + // + entry () = default; + + explicit operator bool () const + { + return !path_.empty (); + } + + // Move-to-NULL-entry-only type. + // + entry (entry&&); + entry (const entry&) = delete; + entry& operator= (entry&&); + entry& operator= (const entry&) = delete; + + // Implementation details. + // + entry (path_type, bool); + ~entry (); + + path_type path_; + }; + + // Create a cache entry corresponding to the specified filesystem path. + // The path must be absolute and normalized. The temporary argument may be + // used to hint whether the entry is likely to be temporary or permanent. + // + entry + create (path f, optional /*temporary*/) + { + return entry (move (f), true /* temporary */); + }; + + // A shortcut for creating and initializing an existing permanent entry. + // + entry + create_existing (path f) + { + entry e (move (f), false /* temporary */); + e.init_existing (); + return e; + }; + + // Return the compressed filesystem entry extension (with the leading dot) + // or empty string if no compression is used by this cache implementation. + // + // If the passed extension is not NULL, then it is included as a first- + // level extension into the returned value (useful to form extensions for + // clean_extra()). + // + string + compressed_extension (const char* = nullptr) + { + return string (); + } + + explicit + file_cache (scheduler&) + { + } + }; +} + +#include + +#endif // LIBBUILD2_FILE_CACHE_HXX diff --git a/libbuild2/file-cache.ixx b/libbuild2/file-cache.ixx new file mode 100644 index 0000000..259d348 --- /dev/null +++ b/libbuild2/file-cache.ixx @@ -0,0 +1,38 @@ +// file : libbuild2/file-cache.ixx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include // try_rmfile() + +namespace build2 +{ + inline file_cache::entry:: + entry (path_type p, bool t) + : temporary (t), path_ (move (p)) + { + } + + inline file_cache::entry:: + ~entry () + { + if (!path_.empty () && temporary) + try_rmfile (path_, true /* ignore_errors */); + } + + inline file_cache::entry:: + entry (entry&& e) + : temporary (e.temporary), path_ (move (e.path_)) + { + } + + inline file_cache::entry& file_cache::entry:: + operator= (entry&& e) + { + if (this != &e) + { + assert (path_.empty ()); + temporary = e.temporary; + path_ = move (e.path_); + } + return *this; + } +} diff --git a/libbuild2/function.test.cxx b/libbuild2/function.test.cxx index f8a2c16..7aa1a50 100644 --- a/libbuild2/function.test.cxx +++ b/libbuild2/function.test.cxx @@ -12,6 +12,7 @@ #include #include #include +#include #include using namespace std; @@ -46,7 +47,8 @@ namespace build2 // scheduler sched (1); global_mutexes mutexes (1); - context ctx (sched, mutexes); + file_cache fcache (sched); + context ctx (sched, mutexes, fcache); auto& functions (ctx.functions); diff --git a/libbuild2/module.cxx b/libbuild2/module.cxx index 9a7975d..eb4395d 100644 --- a/libbuild2/module.cxx +++ b/libbuild2/module.cxx @@ -80,6 +80,7 @@ namespace build2 ctx.module_context_storage->reset ( new context (ctx.sched, ctx.mutexes, + ctx.fcache, false, /* match_only */ false, /* no_external_modules */ false, /* dry_run */ diff --git a/libbuild2/test/script/parser.test.cxx b/libbuild2/test/script/parser.test.cxx index dbdeb57..df91586 100644 --- a/libbuild2/test/script/parser.test.cxx +++ b/libbuild2/test/script/parser.test.cxx @@ -10,6 +10,7 @@ #include #include #include +#include #include @@ -165,7 +166,8 @@ namespace build2 // scheduler sched (1); global_mutexes mutexes (1); - context ctx (sched, mutexes); + file_cache fcache (sched); + context ctx (sched, mutexes, fcache); bool scope (false); bool id (false); -- cgit v1.1