From 562af3a7f3742bf57b007e904e0bb661a5da1dab Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 18 Mar 2021 09:46:03 +0200 Subject: Implement synchronous compressed file cache --- libbuild2/file-cache.cxx | 187 +++++++++++++++++++++++++++++++++++++++++++++++ libbuild2/file-cache.hxx | 80 +++++++++++++++++++- libbuild2/file-cache.ixx | 98 +++++++++++++++++++------ 3 files changed, 340 insertions(+), 25 deletions(-) create mode 100644 libbuild2/file-cache.cxx (limited to 'libbuild2') diff --git a/libbuild2/file-cache.cxx b/libbuild2/file-cache.cxx new file mode 100644 index 0000000..107bf3f --- /dev/null +++ b/libbuild2/file-cache.cxx @@ -0,0 +1,187 @@ +// file : libbuild2/file-cache.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include +#include // entry_stat, path_entry() + +#include // exists(), try_rmfile() +#include + +using namespace butl; + +namespace build2 +{ + // file_cache::entry + // + file_cache::write file_cache::entry:: + init_new () + { + assert (state_ == uninit); + + // Remove stale compressed file if exists. While not strictly necessary + // (since the presence of the new uncompressed file will render the + // compressed one invalid), this makes things cleaner in case we don't get + // to compressing the new file (for example, if we fail and leave the + // uncompressed file behind for troubleshooting). + // + try_rmfile_ignore_error (comp_path_); + + pin (); + return write (*this); + } + + void file_cache::entry:: + init_existing () + { + assert (state_ == uninit); + + // Determine the cache state from the filesystem state. + // + // First check for the uncompressed file. Its presence means that the + // compressed file, if exists, is invalid and we clean it up, similar to + // init_new(). + // + if (exists (path_)) + { + try_rmfile_ignore_error (comp_path_); + state_ = uncomp; + } + else if (exists (comp_path_)) + { + state_ = comp; + } + else + fail << path_ << " (or its compressed variant) does not exist" << + info << "consider cleaning the build state"; + } + + void file_cache::entry:: + preempt () + { + // Note that this function is called from destructors so it's best if it + // doesn't throw. + // + switch (state_) + { + case uncomp: + { + if (!compress ()) + break; + + state_ = decomp; // We now have both. + } + // Fall through. + case decomp: + { + if (try_rmfile_ignore_error (path_)) + state_ = comp; + + break; + } + default: + assert (false); + } + } + + bool file_cache::entry:: + compress () + { + tracer trace ("file_cache::entry::compress"); + + pair st ( + path_entry (path_, + true /* follow_symlinks */, + true /* ignore_error */)); + + if (!st.first) + return false; + + try + { + ifdstream ifs (path_, fdopen_mode::binary, ifdstream::badbit); + ofdstream ofs (comp_path_, fdopen_mode::binary); + + // Experience shows that for the type of content we typically cache + // using 1MB blocks results in almost the same comression as for 4MB. + // + uint64_t comp_size ( + lz4::compress (ofs, ifs, + 1 /* compression_level (fastest) */, + 6 /* block_size_id (1MB) */, + st.second.size)); + + ofs.close (); + + l6 ([&]{trace << "compressed " << path_ << " to " + << (comp_size * 100 / st.second.size) << '%';}); + } + catch (const std::exception& e) + { + l5 ([&]{trace << "unable to compress " << path_ << ": " << e;}); + try_rmfile_ignore_error (comp_path_); + return false; + } + + return true; + } + + void file_cache::entry:: + decompress () + { + try + { + ifdstream ifs (comp_path_, fdopen_mode::binary, ifdstream::badbit); + ofdstream ofs (path_, fdopen_mode::binary); + + lz4::decompress (ofs, ifs); + + ofs.close (); + } + catch (const std::exception& e) + { + fail << "unable to decompress " << comp_path_ << ": " << e << + info << "consider cleaning the build state"; + } + } + + void file_cache::entry:: + remove () + { + switch (state_) + { + case uninit: + { + // In this case we are cleaning the filesystem without having any idea + // about its state. As a result, if we couldn't remove the compressed + // file, then we don't attempt to remove the uncompressed file either + // since it could be an indicator that the compressed file is invalid. + // + if (try_rmfile_ignore_error (comp_path_)) + try_rmfile_ignore_error (path_); + break; + } + case uncomp: + { + try_rmfile_ignore_error (path_); + break; + } + case comp: + { + try_rmfile_ignore_error (comp_path_); + break; + } + case decomp: + { + // Both are valid so we are ok with failing to remove either. + // + try_rmfile_ignore_error (comp_path_); + try_rmfile_ignore_error (path_); + break; + } + case null: + assert (false); + } + } +} diff --git a/libbuild2/file-cache.hxx b/libbuild2/file-cache.hxx index 8d9d8dd..1502fb8 100644 --- a/libbuild2/file-cache.hxx +++ b/libbuild2/file-cache.hxx @@ -73,11 +73,26 @@ namespace build2 // Note also that a noop implementation of this caching semantics (that is, // one that simply saves the file on disk) is file_cache::entry that is just // auto_rmfile. + + // The synchronous compressed file cache implementation. + // + // If the cache entry is no longer pinned, this implementation compresses + // the content and removes the uncompressed file all as part of the call that + // caused the entry to become unpinned. // - class /*LIBBUILD2_SYMEXPORT*/ file_cache + // In order to deal with interruptions during compression, when recreating + // the cache entry state from the filesystem state, this implementation + // treats the presence of the uncompressed file as an indication that the + // compressed file, if any, is invalid. + // + class scheduler; + + class file_cache { public: + class entry; + // A cache entry write handle. During the lifetime of this object the // filesystem entry can be opened for writing and written to. // @@ -91,6 +106,24 @@ namespace build2 public: void close (); + + write (): entry_ (nullptr) {} + ~write (); + + // Move-to-NULL-only type. + // + write (write&&); + write (const write&) = delete; + write& operator= (write&&); + write& operator= (const write&) = delete; + + private: + friend class entry; + + explicit + write (entry& e): entry_ (&e) {} + + entry* entry_; }; // A cache entry read handle. During the lifetime of this object the @@ -99,13 +132,29 @@ namespace build2 class read { public: + read (): entry_ (nullptr) {} ~read (); + + // Move-to-NULL-only type. + // + read (read&&); + read (const read&) = delete; + read& operator= (read&&); + read& operator= (const read&) = delete; + + private: + friend class entry; + + explicit + read (entry& e): entry_ (&e) {} + + entry* entry_; }; // A cache entry handle. When it is destroyed, a temporary entry is // automatically removed from the filesystem. // - class entry + class LIBBUILD2_SYMEXPORT entry { public: using path_type = build2::path; @@ -133,6 +182,8 @@ namespace build2 // Pinning. // + // Note that every call to pin() should have a matching unpin(). + // void pin (); @@ -145,7 +196,7 @@ namespace build2 explicit operator bool () const; - // Move-to-NULL-entry-only type. + // Move-to-NULL-only type. // entry (entry&&); entry (const entry&) = delete; @@ -157,7 +208,24 @@ namespace build2 entry (path_type, bool); ~entry (); - path_type path_; + void + preempt (); + + bool + compress (); + + void + decompress (); + + void + remove (); + + enum state {null, uninit, uncomp, comp, decomp}; + + state state_ = null; + path_type path_; // Uncompressed path. + path_type comp_path_; // Compressed path. + size_t pin_ = 0; // Pin count. }; // Create a cache entry corresponding to the specified filesystem path. @@ -169,6 +237,10 @@ namespace build2 // A shortcut for creating and initializing an existing permanent entry. // + // Note that this function creates a permanent entry right away and if + // init_existing() fails, no filesystem cleanup of any kind will be + // performed. + // entry create_existing (path); diff --git a/libbuild2/file-cache.ixx b/libbuild2/file-cache.ixx index 6d73387..2b76fb6 100644 --- a/libbuild2/file-cache.ixx +++ b/libbuild2/file-cache.ixx @@ -1,8 +1,6 @@ // file : libbuild2/file-cache.ixx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#include // try_rmfile() - namespace build2 { // file_cache::write @@ -10,6 +8,32 @@ namespace build2 inline void file_cache::write:: close () { + entry_->state_ = entry::uncomp; + } + + inline file_cache::write:: + ~write () + { + if (entry_ != nullptr) + entry_->unpin (); + } + + inline file_cache::write:: + write (write&& e) + : entry_ (e.entry_) + { + e.entry_ = nullptr; + } + + inline file_cache::write& file_cache::write:: + operator= (write&& e) + { + if (this != &e) + { + assert (entry_ == nullptr); + swap (entry_, e.entry_); + } + return *this; } // file_cache::read @@ -17,65 +41,94 @@ namespace build2 inline file_cache::read:: ~read () { + if (entry_ != nullptr) + entry_->unpin (); } - // file_cache::entry - // - inline const path& file_cache::entry:: - path () const + inline file_cache::read:: + read (read&& e) + : entry_ (e.entry_) { - return path_; + e.entry_ = nullptr; } - inline file_cache::write file_cache::entry:: - init_new () + inline file_cache::read& file_cache::read:: + operator= (read&& e) { - return write (); + if (this != &e) + { + assert (entry_ == nullptr); + swap (entry_, e.entry_); + } + return *this; } - inline void file_cache::entry:: - init_existing () + // file_cache::entry + // + inline const path& file_cache::entry:: + path () const { + return path_; } inline file_cache::read file_cache::entry:: open () { - return read (); + assert (state_ != null && state_ != uninit); + + if (state_ == comp) + { + decompress (); + state_ = decomp; + } + + pin (); + return read (*this); } inline void file_cache::entry:: pin () { + ++pin_; } inline void file_cache::entry:: unpin () { + if (--pin_ == 0 && (state_ == uncomp || state_ == decomp)) + preempt (); } inline file_cache::entry:: operator bool () const { - return !path_.empty (); + return state_ != null; } inline file_cache::entry:: entry (path_type p, bool t) - : temporary (t), path_ (move (p)) + : temporary (t), + state_ (uninit), + path_ (move (p)), + comp_path_ (path_ + ".lz4"), + pin_ (1) { } inline file_cache::entry:: ~entry () { - if (!path_.empty () && temporary) - try_rmfile (path_, true /* ignore_errors */); + if (state_ != null && temporary) + remove (); } inline file_cache::entry:: entry (entry&& e) - : temporary (e.temporary), path_ (move (e.path_)) + : temporary (e.temporary), + state_ (e.state_), + path_ (move (e.path_)), + comp_path_ (move (e.comp_path_)), + pin_ (e.pin_) { } @@ -84,9 +137,12 @@ namespace build2 { if (this != &e) { - assert (path_.empty ()); + assert (state_ == null); temporary = e.temporary; + state_ = e.state_; path_ = move (e.path_); + comp_path_ = move (e.comp_path_); + pin_ = e.pin_; } return *this; } @@ -108,9 +164,9 @@ namespace build2 } inline string file_cache:: - compressed_extension (const char*) + compressed_extension (const char* e) { - return string (); + return (e != nullptr ? string (e) : string ()) + ".lz4"; } inline file_cache:: -- cgit v1.1