From 562af3a7f3742bf57b007e904e0bb661a5da1dab Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 18 Mar 2021 09:46:03 +0200 Subject: Implement synchronous compressed file cache --- libbuild2/file-cache.cxx | 187 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 libbuild2/file-cache.cxx (limited to 'libbuild2/file-cache.cxx') diff --git a/libbuild2/file-cache.cxx b/libbuild2/file-cache.cxx new file mode 100644 index 0000000..107bf3f --- /dev/null +++ b/libbuild2/file-cache.cxx @@ -0,0 +1,187 @@ +// file : libbuild2/file-cache.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include +#include // entry_stat, path_entry() + +#include // exists(), try_rmfile() +#include + +using namespace butl; + +namespace build2 +{ + // file_cache::entry + // + file_cache::write file_cache::entry:: + init_new () + { + assert (state_ == uninit); + + // Remove stale compressed file if exists. While not strictly necessary + // (since the presence of the new uncompressed file will render the + // compressed one invalid), this makes things cleaner in case we don't get + // to compressing the new file (for example, if we fail and leave the + // uncompressed file behind for troubleshooting). + // + try_rmfile_ignore_error (comp_path_); + + pin (); + return write (*this); + } + + void file_cache::entry:: + init_existing () + { + assert (state_ == uninit); + + // Determine the cache state from the filesystem state. + // + // First check for the uncompressed file. Its presence means that the + // compressed file, if exists, is invalid and we clean it up, similar to + // init_new(). + // + if (exists (path_)) + { + try_rmfile_ignore_error (comp_path_); + state_ = uncomp; + } + else if (exists (comp_path_)) + { + state_ = comp; + } + else + fail << path_ << " (or its compressed variant) does not exist" << + info << "consider cleaning the build state"; + } + + void file_cache::entry:: + preempt () + { + // Note that this function is called from destructors so it's best if it + // doesn't throw. + // + switch (state_) + { + case uncomp: + { + if (!compress ()) + break; + + state_ = decomp; // We now have both. + } + // Fall through. + case decomp: + { + if (try_rmfile_ignore_error (path_)) + state_ = comp; + + break; + } + default: + assert (false); + } + } + + bool file_cache::entry:: + compress () + { + tracer trace ("file_cache::entry::compress"); + + pair st ( + path_entry (path_, + true /* follow_symlinks */, + true /* ignore_error */)); + + if (!st.first) + return false; + + try + { + ifdstream ifs (path_, fdopen_mode::binary, ifdstream::badbit); + ofdstream ofs (comp_path_, fdopen_mode::binary); + + // Experience shows that for the type of content we typically cache + // using 1MB blocks results in almost the same comression as for 4MB. + // + uint64_t comp_size ( + lz4::compress (ofs, ifs, + 1 /* compression_level (fastest) */, + 6 /* block_size_id (1MB) */, + st.second.size)); + + ofs.close (); + + l6 ([&]{trace << "compressed " << path_ << " to " + << (comp_size * 100 / st.second.size) << '%';}); + } + catch (const std::exception& e) + { + l5 ([&]{trace << "unable to compress " << path_ << ": " << e;}); + try_rmfile_ignore_error (comp_path_); + return false; + } + + return true; + } + + void file_cache::entry:: + decompress () + { + try + { + ifdstream ifs (comp_path_, fdopen_mode::binary, ifdstream::badbit); + ofdstream ofs (path_, fdopen_mode::binary); + + lz4::decompress (ofs, ifs); + + ofs.close (); + } + catch (const std::exception& e) + { + fail << "unable to decompress " << comp_path_ << ": " << e << + info << "consider cleaning the build state"; + } + } + + void file_cache::entry:: + remove () + { + switch (state_) + { + case uninit: + { + // In this case we are cleaning the filesystem without having any idea + // about its state. As a result, if we couldn't remove the compressed + // file, then we don't attempt to remove the uncompressed file either + // since it could be an indicator that the compressed file is invalid. + // + if (try_rmfile_ignore_error (comp_path_)) + try_rmfile_ignore_error (path_); + break; + } + case uncomp: + { + try_rmfile_ignore_error (path_); + break; + } + case comp: + { + try_rmfile_ignore_error (comp_path_); + break; + } + case decomp: + { + // Both are valid so we are ok with failing to remove either. + // + try_rmfile_ignore_error (comp_path_); + try_rmfile_ignore_error (path_); + break; + } + case null: + assert (false); + } + } +} -- cgit v1.1