aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2021-03-18 09:46:03 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2021-03-18 09:46:03 +0200
commit562af3a7f3742bf57b007e904e0bb661a5da1dab (patch)
tree35c378f696c2e08d7c88342c5a94b928282b83bd
parentcbcae12587364f4cb7c44b50cc1ae4c8e1fb3bb3 (diff)
Implement synchronous compressed file cache
-rw-r--r--libbuild2/file-cache.cxx187
-rw-r--r--libbuild2/file-cache.hxx80
-rw-r--r--libbuild2/file-cache.ixx98
3 files changed, 340 insertions, 25 deletions
diff --git a/libbuild2/file-cache.cxx b/libbuild2/file-cache.cxx
new file mode 100644
index 0000000..107bf3f
--- /dev/null
+++ b/libbuild2/file-cache.cxx
@@ -0,0 +1,187 @@
+// file : libbuild2/file-cache.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/file-cache.hxx>
+
+#include <libbutl/lz4.hxx>
+#include <libbutl/filesystem.mxx> // entry_stat, path_entry()
+
+#include <libbuild2/filesystem.hxx> // exists(), try_rmfile()
+#include <libbuild2/diagnostics.hxx>
+
+using namespace butl;
+
+namespace build2
+{
+ // file_cache::entry
+ //
+ file_cache::write file_cache::entry::
+ init_new ()
+ {
+ assert (state_ == uninit);
+
+ // Remove stale compressed file if exists. While not strictly necessary
+ // (since the presence of the new uncompressed file will render the
+ // compressed one invalid), this makes things cleaner in case we don't get
+ // to compressing the new file (for example, if we fail and leave the
+ // uncompressed file behind for troubleshooting).
+ //
+ try_rmfile_ignore_error (comp_path_);
+
+ pin ();
+ return write (*this);
+ }
+
+ void file_cache::entry::
+ init_existing ()
+ {
+ assert (state_ == uninit);
+
+ // Determine the cache state from the filesystem state.
+ //
+ // First check for the uncompressed file. Its presence means that the
+ // compressed file, if exists, is invalid and we clean it up, similar to
+ // init_new().
+ //
+ if (exists (path_))
+ {
+ try_rmfile_ignore_error (comp_path_);
+ state_ = uncomp;
+ }
+ else if (exists (comp_path_))
+ {
+ state_ = comp;
+ }
+ else
+ fail << path_ << " (or its compressed variant) does not exist" <<
+ info << "consider cleaning the build state";
+ }
+
+ void file_cache::entry::
+ preempt ()
+ {
+ // Note that this function is called from destructors so it's best if it
+ // doesn't throw.
+ //
+ switch (state_)
+ {
+ case uncomp:
+ {
+ if (!compress ())
+ break;
+
+ state_ = decomp; // We now have both.
+ }
+ // Fall through.
+ case decomp:
+ {
+ if (try_rmfile_ignore_error (path_))
+ state_ = comp;
+
+ break;
+ }
+ default:
+ assert (false);
+ }
+ }
+
+ bool file_cache::entry::
+ compress ()
+ {
+ tracer trace ("file_cache::entry::compress");
+
+ pair<bool, entry_stat> st (
+ path_entry (path_,
+ true /* follow_symlinks */,
+ true /* ignore_error */));
+
+ if (!st.first)
+ return false;
+
+ try
+ {
+ ifdstream ifs (path_, fdopen_mode::binary, ifdstream::badbit);
+ ofdstream ofs (comp_path_, fdopen_mode::binary);
+
+ // Experience shows that for the type of content we typically cache
+ // using 1MB blocks results in almost the same comression as for 4MB.
+ //
+ uint64_t comp_size (
+ lz4::compress (ofs, ifs,
+ 1 /* compression_level (fastest) */,
+ 6 /* block_size_id (1MB) */,
+ st.second.size));
+
+ ofs.close ();
+
+ l6 ([&]{trace << "compressed " << path_ << " to "
+ << (comp_size * 100 / st.second.size) << '%';});
+ }
+ catch (const std::exception& e)
+ {
+ l5 ([&]{trace << "unable to compress " << path_ << ": " << e;});
+ try_rmfile_ignore_error (comp_path_);
+ return false;
+ }
+
+ return true;
+ }
+
+ void file_cache::entry::
+ decompress ()
+ {
+ try
+ {
+ ifdstream ifs (comp_path_, fdopen_mode::binary, ifdstream::badbit);
+ ofdstream ofs (path_, fdopen_mode::binary);
+
+ lz4::decompress (ofs, ifs);
+
+ ofs.close ();
+ }
+ catch (const std::exception& e)
+ {
+ fail << "unable to decompress " << comp_path_ << ": " << e <<
+ info << "consider cleaning the build state";
+ }
+ }
+
+ void file_cache::entry::
+ remove ()
+ {
+ switch (state_)
+ {
+ case uninit:
+ {
+ // In this case we are cleaning the filesystem without having any idea
+ // about its state. As a result, if we couldn't remove the compressed
+ // file, then we don't attempt to remove the uncompressed file either
+ // since it could be an indicator that the compressed file is invalid.
+ //
+ if (try_rmfile_ignore_error (comp_path_))
+ try_rmfile_ignore_error (path_);
+ break;
+ }
+ case uncomp:
+ {
+ try_rmfile_ignore_error (path_);
+ break;
+ }
+ case comp:
+ {
+ try_rmfile_ignore_error (comp_path_);
+ break;
+ }
+ case decomp:
+ {
+ // Both are valid so we are ok with failing to remove either.
+ //
+ try_rmfile_ignore_error (comp_path_);
+ try_rmfile_ignore_error (path_);
+ break;
+ }
+ case null:
+ assert (false);
+ }
+ }
+}
diff --git a/libbuild2/file-cache.hxx b/libbuild2/file-cache.hxx
index 8d9d8dd..1502fb8 100644
--- a/libbuild2/file-cache.hxx
+++ b/libbuild2/file-cache.hxx
@@ -73,11 +73,26 @@ namespace build2
// Note also that a noop implementation of this caching semantics (that is,
// one that simply saves the file on disk) is file_cache::entry that is just
// auto_rmfile.
+
+ // The synchronous compressed file cache implementation.
+ //
+ // If the cache entry is no longer pinned, this implementation compresses
+ // the content and removes the uncompressed file all as part of the call that
+ // caused the entry to become unpinned.
//
- class /*LIBBUILD2_SYMEXPORT*/ file_cache
+ // In order to deal with interruptions during compression, when recreating
+ // the cache entry state from the filesystem state, this implementation
+ // treats the presence of the uncompressed file as an indication that the
+ // compressed file, if any, is invalid.
+ //
+ class scheduler;
+
+ class file_cache
{
public:
+ class entry;
+
// A cache entry write handle. During the lifetime of this object the
// filesystem entry can be opened for writing and written to.
//
@@ -91,6 +106,24 @@ namespace build2
public:
void
close ();
+
+ write (): entry_ (nullptr) {}
+ ~write ();
+
+ // Move-to-NULL-only type.
+ //
+ write (write&&);
+ write (const write&) = delete;
+ write& operator= (write&&);
+ write& operator= (const write&) = delete;
+
+ private:
+ friend class entry;
+
+ explicit
+ write (entry& e): entry_ (&e) {}
+
+ entry* entry_;
};
// A cache entry read handle. During the lifetime of this object the
@@ -99,13 +132,29 @@ namespace build2
class read
{
public:
+ read (): entry_ (nullptr) {}
~read ();
+
+ // Move-to-NULL-only type.
+ //
+ read (read&&);
+ read (const read&) = delete;
+ read& operator= (read&&);
+ read& operator= (const read&) = delete;
+
+ private:
+ friend class entry;
+
+ explicit
+ read (entry& e): entry_ (&e) {}
+
+ entry* entry_;
};
// A cache entry handle. When it is destroyed, a temporary entry is
// automatically removed from the filesystem.
//
- class entry
+ class LIBBUILD2_SYMEXPORT entry
{
public:
using path_type = build2::path;
@@ -133,6 +182,8 @@ namespace build2
// Pinning.
//
+ // Note that every call to pin() should have a matching unpin().
+ //
void
pin ();
@@ -145,7 +196,7 @@ namespace build2
explicit operator bool () const;
- // Move-to-NULL-entry-only type.
+ // Move-to-NULL-only type.
//
entry (entry&&);
entry (const entry&) = delete;
@@ -157,7 +208,24 @@ namespace build2
entry (path_type, bool);
~entry ();
- path_type path_;
+ void
+ preempt ();
+
+ bool
+ compress ();
+
+ void
+ decompress ();
+
+ void
+ remove ();
+
+ enum state {null, uninit, uncomp, comp, decomp};
+
+ state state_ = null;
+ path_type path_; // Uncompressed path.
+ path_type comp_path_; // Compressed path.
+ size_t pin_ = 0; // Pin count.
};
// Create a cache entry corresponding to the specified filesystem path.
@@ -169,6 +237,10 @@ namespace build2
// A shortcut for creating and initializing an existing permanent entry.
//
+ // Note that this function creates a permanent entry right away and if
+ // init_existing() fails, no filesystem cleanup of any kind will be
+ // performed.
+ //
entry
create_existing (path);
diff --git a/libbuild2/file-cache.ixx b/libbuild2/file-cache.ixx
index 6d73387..2b76fb6 100644
--- a/libbuild2/file-cache.ixx
+++ b/libbuild2/file-cache.ixx
@@ -1,8 +1,6 @@
// file : libbuild2/file-cache.ixx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <libbuild2/filesystem.hxx> // try_rmfile()
-
namespace build2
{
// file_cache::write
@@ -10,6 +8,32 @@ namespace build2
inline void file_cache::write::
close ()
{
+ entry_->state_ = entry::uncomp;
+ }
+
+ inline file_cache::write::
+ ~write ()
+ {
+ if (entry_ != nullptr)
+ entry_->unpin ();
+ }
+
+ inline file_cache::write::
+ write (write&& e)
+ : entry_ (e.entry_)
+ {
+ e.entry_ = nullptr;
+ }
+
+ inline file_cache::write& file_cache::write::
+ operator= (write&& e)
+ {
+ if (this != &e)
+ {
+ assert (entry_ == nullptr);
+ swap (entry_, e.entry_);
+ }
+ return *this;
}
// file_cache::read
@@ -17,65 +41,94 @@ namespace build2
inline file_cache::read::
~read ()
{
+ if (entry_ != nullptr)
+ entry_->unpin ();
}
- // file_cache::entry
- //
- inline const path& file_cache::entry::
- path () const
+ inline file_cache::read::
+ read (read&& e)
+ : entry_ (e.entry_)
{
- return path_;
+ e.entry_ = nullptr;
}
- inline file_cache::write file_cache::entry::
- init_new ()
+ inline file_cache::read& file_cache::read::
+ operator= (read&& e)
{
- return write ();
+ if (this != &e)
+ {
+ assert (entry_ == nullptr);
+ swap (entry_, e.entry_);
+ }
+ return *this;
}
- inline void file_cache::entry::
- init_existing ()
+ // file_cache::entry
+ //
+ inline const path& file_cache::entry::
+ path () const
{
+ return path_;
}
inline file_cache::read file_cache::entry::
open ()
{
- return read ();
+ assert (state_ != null && state_ != uninit);
+
+ if (state_ == comp)
+ {
+ decompress ();
+ state_ = decomp;
+ }
+
+ pin ();
+ return read (*this);
}
inline void file_cache::entry::
pin ()
{
+ ++pin_;
}
inline void file_cache::entry::
unpin ()
{
+ if (--pin_ == 0 && (state_ == uncomp || state_ == decomp))
+ preempt ();
}
inline file_cache::entry::
operator bool () const
{
- return !path_.empty ();
+ return state_ != null;
}
inline file_cache::entry::
entry (path_type p, bool t)
- : temporary (t), path_ (move (p))
+ : temporary (t),
+ state_ (uninit),
+ path_ (move (p)),
+ comp_path_ (path_ + ".lz4"),
+ pin_ (1)
{
}
inline file_cache::entry::
~entry ()
{
- if (!path_.empty () && temporary)
- try_rmfile (path_, true /* ignore_errors */);
+ if (state_ != null && temporary)
+ remove ();
}
inline file_cache::entry::
entry (entry&& e)
- : temporary (e.temporary), path_ (move (e.path_))
+ : temporary (e.temporary),
+ state_ (e.state_),
+ path_ (move (e.path_)),
+ comp_path_ (move (e.comp_path_)),
+ pin_ (e.pin_)
{
}
@@ -84,9 +137,12 @@ namespace build2
{
if (this != &e)
{
- assert (path_.empty ());
+ assert (state_ == null);
temporary = e.temporary;
+ state_ = e.state_;
path_ = move (e.path_);
+ comp_path_ = move (e.comp_path_);
+ pin_ = e.pin_;
}
return *this;
}
@@ -108,9 +164,9 @@ namespace build2
}
inline string file_cache::
- compressed_extension (const char*)
+ compressed_extension (const char* e)
{
- return string ();
+ return (e != nullptr ? string (e) : string ()) + ".lz4";
}
inline file_cache::