aboutsummaryrefslogtreecommitdiff
path: root/bbot/worker/worker.cxx
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2024-12-02 13:48:06 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2024-12-02 13:48:06 +0200
commit1c99f419fe91a9a39eb5faa056b9867de2d988a1 (patch)
tree3ac30c929cd4e0f3240476db214317d8d69c2e77 /bbot/worker/worker.cxx
parenta27a01a76740a5ba9b236eb20ccfa5c404020018 (diff)
Handle inability to serialize result manifest due to no disk space
Diffstat (limited to 'bbot/worker/worker.cxx')
-rw-r--r--bbot/worker/worker.cxx182
1 files changed, 118 insertions, 64 deletions
diff --git a/bbot/worker/worker.cxx b/bbot/worker/worker.cxx
index d413912..5a4d240 100644
--- a/bbot/worker/worker.cxx
+++ b/bbot/worker/worker.cxx
@@ -12,6 +12,7 @@
#include <cstring> // strchr(), strncmp()
#include <sstream>
#include <iostream>
+#include <system_error> // generic_category()
#include <libbutl/b.hxx>
#include <libbutl/pager.hxx>
@@ -1222,16 +1223,21 @@ run_tar (step_id step,
}
#endif
-// Upload compressed manifest to the specified TFTP URL with curl. Issue
-// diagnostics and throw failed on invalid manifest or process management
-// errors and throw io_error for input/output errors or non-zero curl exit.
+// Upload compressed manifest to the specified TFTP URL with curl. If disk
+// is false, then upload without creating a temporary file.
+//
+// Issue diagnostics and throw failed on invalid manifest or process
+// management errors and throw io_error for curl input/output errors or
+// non-zero curl exit. Also throw system_error with ENOSPC if unable to create
+// a temporary file due to insufficient disk space.
//
template <typename T>
static void
upload_manifest (tracer& trace,
const string& url,
const T& m,
- const string& what)
+ const string& what,
+ bool disk = true)
{
try
{
@@ -1242,74 +1248,88 @@ upload_manifest (tracer& trace,
// other hand, uploading from a file appears to work reliably (we still
// get an odd error on Windows from time to time with larger uploads).
//
- // Let's not break lines in the manifest values not to further increase
- // the size of the manifest encoded representation. Also here we don't
- // care much about readability of the manifest since it will only be read
- // by the bbot agent anyway.
- //
-#if 0
- // Note: need to add compression support if re-enable this.
- tftp_curl c (trace,
- path ("-"),
- nullfd,
- curl::put,
- url,
- "--tftp-blksize", tftp_blksize,
- "--max-time", tftp_put_timeout);
-
- manifest_serializer s (c.out, url, true /* long_lines */);
- m.serialize (s);
- c.out.close ();
-#else
auto_rmfile tmp;
+ if (disk)
+ {
+ try
+ {
+ try
+ {
+ tmp = auto_rmfile (path::temp_path (what + "-manifest.lz4"));
+ ofdstream ofs (tmp.path, fdopen_mode::binary);
+ olz4stream ozs (ofs, 9, 5 /* 256KB */, nullopt /* content_size */);
+
+ // Let's not break lines in the manifest values not to further
+ // increase the size of the manifest encoded representation. Also
+ // here we don't care much about readability of the manifest since
+ // it will only be read by the bbot agent anyway.
+ //
+ manifest_serializer s (ozs, tmp.path.string (), true /*long_lines*/);
+ m.serialize (s);
+
+ ozs.close ();
+ ofs.close ();
+ }
+ catch (const system_error& e)
+ {
+ if (e.code ().category () == std::generic_category () &&
+ e.code ().value () == ENOSPC)
+ throw_generic_error (ENOSPC);
+
+ fail << "unable to save " << what << " manifest: " << e;
+ }
+ }
+ catch (const io_error& e) // In case not derived from system_error.
+ {
+ fail << "unable to save " << what << " manifest: " << e;
+ }
+ }
+
try
{
- tmp = auto_rmfile (path::temp_path (what + "-manifest.lz4"));
- ofdstream ofs (tmp.path, fdopen_mode::binary);
- olz4stream ozs (ofs, 9, 5 /* 256KB */, nullopt /* content_size */);
- manifest_serializer s (ozs, tmp.path.string (), true /* long_lines */);
- m.serialize (s);
- ozs.close ();
- ofs.close ();
+ tftp_curl c (trace,
+ disk ? tmp.path : path ("-"),
+ nullfd,
+ curl::put,
+ url,
+ "--tftp-blksize", tftp_blksize,
+ "--max-time", tftp_put_timeout);
+
+ if (!disk)
+ {
+ // Note: it's simpler to compress than do deal with the variability
+ // everywhere else.
+ //
+ olz4stream ozs (c.out, 9, 5 /* 256KB */, nullopt /* content_size */);
+
+ manifest_serializer s (ozs, url, true /* long_lines */);
+ m.serialize (s);
+
+ ozs.close ();
+ c.out.close ();
+ }
+
+ if (!c.wait ())
+ throw_generic_ios_failure (EIO, "non-zero curl exit code");
}
- catch (const io_error& e) // In case not derived from system_error.
+ catch (const process_error& e)
{
- fail << "unable to save " << what << " manifest: " << e;
+ fail << "unable to execute curl: " << e;
}
catch (const system_error& e)
{
- fail << "unable to save " << what << " manifest: " << e;
- }
+ const auto& c (e.code ());
- tftp_curl c (trace,
- tmp.path,
- nullfd,
- curl::put,
- url,
- "--tftp-blksize", tftp_blksize,
- "--max-time", tftp_put_timeout);
-#endif
-
- if (!c.wait ())
- throw_generic_ios_failure (EIO, "non-zero curl exit code");
+ if (c.category () == generic_category ())
+ throw_generic_ios_failure (c.value (), e.what ());
+ else
+ throw_system_ios_failure (c.value (), e.what ());
+ }
}
catch (const manifest_serialization& e)
{
fail << "invalid " << what << " manifest: " << e.description;
}
- catch (const process_error& e)
- {
- fail << "unable to execute curl: " << e;
- }
- catch (const system_error& e)
- {
- const auto& c (e.code ());
-
- if (c.category () == generic_category ())
- throw_generic_ios_failure (c.value (), e.what ());
- else
- throw_system_ios_failure (c.value (), e.what ());
- }
}
static strings
@@ -6448,15 +6468,24 @@ build (size_t argc, const char* argv[])
//
return rm.status != result_status::abnormal ? 0 : 2;
}
+ //
+ // We use exit code 3 to signal an unsuccessful attempt to upload the result
+ // manifest and exit code 4 to singal that there was no disk space to
+ // serialize the manifest. See startup() for details.
+ //
catch (const io_error& e)
{
error << "unable to upload result manifest to " << url << ": " << e;
+ return 3;
}
+ catch (const system_error& e)
+ {
+ assert (e.code ().category () == std::generic_category () &&
+ e.code ().value () == ENOSPC);
- // We use exit code 3 to signal an unsuccessful attempt to upload the result
- // manifest. See startup() for details.
- //
- return 3;
+ error << "unable to serialize result manifest: " << e;
+ return 4;
+ }
}
// Parse the task_manifest::auxiliary_environment value into the list of
@@ -6593,7 +6622,8 @@ startup ()
task_manifest tm;
auto upload_result = [&trace, &tm] (result_status rs,
- operation_results&& ors)
+ operation_results&& ors,
+ bool disk = true)
{
const string url ("tftp://" + ops.tftp_host () + "/result.manifest.lz4");
@@ -6611,12 +6641,16 @@ startup ()
try
{
- upload_manifest (trace, url, rm, "result");
+ upload_manifest (trace, url, rm, "result", disk);
}
catch (const io_error& e)
{
fail << "unable to upload result manifest to " << url << ": " << e;
}
+ catch (const system_error& e)
+ {
+ fail << "unable to serialize result manifest: " << e;
+ }
};
try
@@ -6755,6 +6789,8 @@ startup ()
// execution when the interactive build breakpoint is reached. Thus, we
// don't redirect stdin to /dev/null.
//
+ // Exit code 1 signals abnormal worker termination.
+ //
// Exit code 2 signals abnormal termination but where the worker uploaded
// the result itself.
//
@@ -6762,6 +6798,9 @@ startup ()
// result manifest. There is no reason to retry (most likely there is
// nobody listening on the other end anymore).
//
+ // Exit code 4 signals the inability to serialize the result manifest due
+ // to insufficient disk space.
+ //
switch (run_io_exit (trace,
0 /* stdin */, 2 /* stdout */, 2 /* stderr */,
process_env (pp, aux_env),
@@ -6769,6 +6808,21 @@ startup ()
argv0.effect_string (),
os))
{
+ case 4:
+ {
+ // Note that while it may seem like abnormal is the more appropriate
+ // result, experience shows running out of disk space is not that
+ // uncommon and suspending the machine every time we hit this makes it
+ // unusable for the rest of the users.
+ //
+ operation_result r {
+ "configure",
+ result_status::abort,
+ "error: no disk space left to serialize result manifest\n"};
+
+ upload_result (result_status::abort, {move (r)}, false /* disk */);
+ return 1;
+ }
case 3:
case 2: return 1;
case 0: return 0;