aboutsummaryrefslogtreecommitdiff
path: root/libbuild2/file-cache.hxx
blob: d6904ed38c1c1db2941591ab4cbac49a7be975c8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
// file      : libbuild2/file-cache.hxx -*- C++ -*-
// license   : MIT; see accompanying LICENSE file

#ifndef LIBBUILD2_FILE_CACHE_HXX
#define LIBBUILD2_FILE_CACHE_HXX

#include <libbuild2/types.hxx>
#include <libbuild2/forward.hxx>
#include <libbuild2/utility.hxx>

#include <libbuild2/export.hxx>

namespace build2
{
  // We sometimes have intermediate build results that must be stored and
  // accessed as files (for example, partially-preprocessed C/C++ translation
  // units; those .i/.ii files). These files can be quite large which can lead
  // to excessive disk usage (for example, the .ii files can be several MB
  // each and can end up dominating object file sizes in a build with debug
  // information). These files are also often temporary which means writing
  // them to disk is really a waste.
  //
  // The file cache attempts to address this by still presenting a file-like
  // entry (which can be a real file or a named pipe) but potentially storing
  // the file contents in memory and/or compressed.
  //
  // Each cache entry is identified by the filesystem entry path that will be
  // written to or read from. The file cache reserves a filesystem entry path
  // that is derived by adding a compression extension to the main entry path
  // (for example, .ii.lz4). When cleaning intermediate build results that are
  // managed by the cache, the rule must clean such a reserved path in
  // addition to the main entry path (see compressed_extension() below).
  //
  // While the cache is MT-safe (that is, we can insert multiple entries
  // concurrently), each entry is expected to be accessed serially by a single
  // thread. Furthermore, each entry can either be written to or read from at
  // any give time and it can only be read from by a single reader at a time.
  // In other words, there meant to be a single cache entry for any given path
  // and it is not meant to be shared.
  //
  // The underlying filesystem entry can be either temporary or permanent. A
  // temporary entry only exists during the build, normally between the match
  // and execute phases. A permanent entry exists across builds. Note,
  // however, that a permanent entry is often removed in cases of an error and
  // sometimes a temporary entry is left behind for diagnostics. It is also
  // possible that the distinction only becomes known some time after the
  // entry has been created. As a result, all entries by default start as
  // temporary and can later be made permanent if desired.
  //
  // A cache entry can be pinned or unpinned. A cache entry is created pinned.
  // A cache entry being written to or read from remains pinned.
  //
  // An unpinned entry can be preempted. Preempting a cache entry can mean any
  // of the following:
  //
  //   - An in-memory content is compressed (but stays in memory).
  //
  //   - An in-memory content (compressed or not) is flushed to disk (with or
  //     without compression).
  //
  //   - An uncompressed on-disk content is compressed.
  //
  // Naturally, any of the above degrees of preemption make accessing the
  // contents of a cache entry slower. Note also that pinned/unpinned and
  // temporary/permanent are independent and a temporary entry does not need
  // to be unpinned to be removed.
  //
  // After creation, a cache entry must be initialized by either writing new
  // contents to the filesystem entry or by using an existing (permanent)
  // filesystem entry. Once initialized, an entry can be opened for reading,
  // potentially multiple times.
  //
  // Note also that a noop implementation of this caching semantics (that is,
  // one that simply saves the file on disk) is file_cache::entry that is just
  // auto_rmfile.

  // The synchronous LZ4 on-disk compression file cache implementation.
  //
  // If the cache entry is no longer pinned, this implementation compresses
  // the content and removes the uncompressed file all as part of the call
  // that caused the entry to become unpinned.
  //
  // In order to deal with interruptions during compression, when recreating
  // the cache entry state from the filesystem state, this implementation
  // treats the presence of the uncompressed file as an indication that the
  // compressed file, if any, is invalid.
  //
  class file_cache
  {
  public:
    // If compression is disabled, then this implementation becomes equivalent
    // to the noop implementation.
    //
    explicit
    file_cache (bool compress = true);

    class entry;

    // A cache entry write handle. During the lifetime of this object the
    // filesystem entry can be opened for writing and written to.
    //
    // A successful write must be terminated with an explicit call to close()
    // (similar semantics to ofdstream). A write handle that is destroyed
    // without a close() call is treated as an unsuccessful write and the
    // initialization can be attempted again.
    //
    class write
    {
    public:
      void
      close ();

      write (): entry_ (nullptr) {}

      // Move-to-NULL-only type.
      //
      write (write&&);
      write (const write&) = delete;
      write& operator= (write&&);
      write& operator= (const write&) = delete;

      ~write ();

    private:
      friend class entry;

      explicit
      write (entry& e): entry_ (&e) {}

      entry* entry_;
    };

    // A cache entry read handle. During the lifetime of this object the
    // filesystem entry can be opened for reading and read from.
    //
    class read
    {
    public:
      read (): entry_ (nullptr) {}

      // Move-to-NULL-only type.
      //
      read (read&&);
      read (const read&) = delete;
      read& operator= (read&&);
      read& operator= (const read&) = delete;

      ~read ();

    private:
      friend class entry;

      explicit
      read (entry& e): entry_ (&e) {}

      entry* entry_;
    };

    // A cache entry handle. When it is destroyed, a temporary entry is
    // automatically removed from the filesystem.
    //
    class LIBBUILD2_SYMEXPORT entry
    {
    public:
      using path_type = build2::path;

      bool temporary = true;

      // The returned reference is valid and stable for the lifetime of the
      // entry handle.
      //
      const path_type&
      path () const;

      // Initialization.
      //
      write
      init_new ();

      void
      init_existing ();

      // Reading.
      //
      read
      open ();

      // Pinning.
      //
      // Note that every call to pin() should have a matching unpin().
      //
      void
      pin ();

      void
      unpin ();

      // NULL handle.
      //
      entry () = default;

      explicit operator bool () const;

      // Move-to-NULL-only type.
      //
      entry (entry&&);
      entry (const entry&) = delete;
      entry& operator= (entry&&);
      entry& operator= (const entry&) = delete;

      ~entry ();

    private:
      friend class file_cache;

      entry (path_type, bool, bool);

      void
      preempt ();

      bool
      compress ();

      void
      decompress ();

      void
      remove ();

      enum state {null, uninit, uncomp, comp, decomp};

      state     state_ = null;
      path_type path_;           // Uncompressed path.
      path_type comp_path_;      // Compressed path (empty if disabled).
      size_t    pin_ = 0;        // Pin count.
    };

    // Create a cache entry corresponding to the specified filesystem path.
    // The path must be absolute and normalized. The temporary argument may be
    // used to hint whether the entry is likely to be temporary or permanent.
    //
    entry
    create (path, optional<bool> temporary);

    // A shortcut for creating and initializing an existing permanent entry.
    //
    // Note that this function creates a permanent entry right away and if
    // init_existing() fails, no filesystem cleanup of any kind will be
    // performed.
    //
    entry
    create_existing (path);

    // Return the compressed filesystem entry extension (with the leading dot)
    // or empty string if no compression is used by this cache implementation.
    //
    // If the passed extension is not NULL, then it is included as a first-
    // level extension into the returned value (useful to form extensions for
    // clean_extra()).
    //
    string
    compressed_extension (const char* ext = nullptr);

  private:
    bool compress_;
  };
}

#include <libbuild2/file-cache.ixx>

#endif // LIBBUILD2_FILE_CACHE_HXX