diff options
-rw-r--r-- | bbot/agent.cli | 16 | ||||
-rw-r--r-- | bbot/agent.cxx | 374 | ||||
-rw-r--r-- | bbot/bbot-agent@.service | 12 | ||||
-rw-r--r-- | bbot/bootstrap-manifest | 57 | ||||
-rw-r--r-- | doc/manual.cli | 5 |
5 files changed, 426 insertions, 38 deletions
diff --git a/bbot/agent.cli b/bbot/agent.cli index 761580c..c4ee356 100644 --- a/bbot/agent.cli +++ b/bbot/agent.cli @@ -11,18 +11,25 @@ include <bbot/common.cli>; namespace bbot { { - "<options> <file>", + "<options> <name> <num> <id> ", " \h|SYNOPSIS| \cb{bbot-agent --help}\n \cb{bbot-agent --version}\n - \c{\b{bbot-agent} [<options>] <toolchain>} + \c{\b{bbot-agent} [<options>] <name> <num> <id>} \h|DESCRIPTION| \cb{bbot-agent} @@ TODO. + + The <name> argument is the toolchain name, <id> \- the toolchain id, + and <num> \- the toolchain number in this deployment. + + Note that on termination \cb{bbot-agent} may leave a working machine + snapshot behind. It is expected that the caller (normally Build OS + monitor) cleans them up before restarting the agent. " } @@ -30,6 +37,11 @@ namespace bbot { "\h|OPTIONS|" + bool --systemd-daemon + { + "Start as a simple systemd daemon." + } + size_t --cpu = 1 { "<num>", diff --git a/bbot/agent.cxx b/bbot/agent.cxx index 8c281fe..3e4f8dc 100644 --- a/bbot/agent.cxx +++ b/bbot/agent.cxx @@ -2,12 +2,20 @@ // copyright : Copyright (c) 2014-2017 Code Synthesis Ltd // license : MIT; see accompanying LICENSE file +#include <limits.h> // PATH_MAX #include <signal.h> // signal() -#include <unistd.h> // sleep() +#include <unistd.h> // sleep(), realink() #include <iostream> #include <butl/pager> +#include <butl/fdstream> +#include <butl/filesystem> // dir_iterator + +#include <butl/manifest-parser> +#include <butl/manifest-serializer> + +#include <bbot/manifest> #include <bbot/types> #include <bbot/utility> @@ -15,10 +23,302 @@ #include <bbot/diagnostics> #include <bbot/agent-options> +#include <bbot/bootstrap-manifest> + using namespace std; using namespace butl; using namespace bbot; +const string bs_prot ("1"); // Bootstrap protocol version. + +string tc_name; // Toolchain name. +string tc_num; // Toolchain number. +string tc_id; // Toolchain id. + +template <typename T> +static T +parse_manifest (const path& f, const char* what, bool ignore_unknown = true) +{ + try + { + if (!file_exists (f)) + fail << what << " manifest file " << f << " does not exist"; + + ifdstream ifs (f); + manifest_parser mp (ifs, f.string ()); + return T (mp, ignore_unknown); + } + catch (const manifest_parsing& e) + { + fail << "invalid " << what << " manifest: " + << f << ':' << e.line << ':' << e.column << ": " << e.description + << endf; + } + catch (const io_error& e) + { + fail << "unable to read " << what << " manifest " << f << ": " << e + << endf; + } + catch (const system_error& e) // EACCES, etc. + { + fail << "unable to access " << what << " manifest " << f << ": " << e + << endf; + } +} + +/* + +static bootstrapped_machine_manifest +bootstrap_machine (const dir_path&); + +static machine_manifests +enumerate_machines (const dir_path& rd) +{ + tracer trace ("enumerate_machines"); + + machine_manifests r; + + // The first level are machine volumes. + // + for (const dir_entry& ve: dir_iterator (rd)) + { + const string vn (ve.path ().string ()); + + // Ignore hidden directories. + // + if (ve.type () != entry_type::directory || vn[0] == '.') + continue; + + const dir_path vd (dir_path (rd) /= vn); + + // Inside we have machines. + // + for (const dir_entry& me: dir_iterator (vd)) + { + const string mn (me.path ().string ()); + + if (me.type () != entry_type::directory || mn[0] == '.') + continue; + + const dir_path md (dir_path (vd) /= mn); + + // Our endgoal here is to obtain a bootstrapped snapshot of this machine + // while watching out for potential race conditions (machines being + // added/upgraded/removed; see the manual for details). + // + // So here is our overall plan: + // + // 1. Resolve current subvolume link for our bootstrap protocol. + // + // 2. If there is no link, cleanup and ignore this machine. + // + // 3. Try to create a snapshot of current subvolume (this operation is + // atomic). If failed (e.g., someone changed the link and removed the + // subvolume in the meantime), retry from #1. + // + // 4. Compare the snapshot to the already bootstrapped version (if any) + // and see if we need to re-bootstrap. If so, use the snapshot as a + // starting point. Rename to bootstrapped at the end (atomic). + // + const dir_path lp (dir_path (md) /= (mn + '-' + bs_prot)); // -<P> + const dir_path tp (dir_path (md) /= (mn + '-' + tc_name)); // -<toolc...> + bool te (dir_exists (tp)); + + auto delete_t = [&tp] () + { + // btrfs property set -ts $tp ro false + // btrfs subvolume delete $tp + }; + + for (size_t retry (0);; ++retry) + { + if (retry != 0) + sleep (1); + + // Resolve the link to subvolume path. + // + dir_path sp; // <name>-<P>.<R> + try + { + char b [PATH_MAX + 1]; + ssize_t r (readlink (lp.string ().c_str (), b, sizeof (b))); + + if (r == -1) + { + if (errno != ENOENT) + throw_generic_error (errno); + } + else if (static_cast<size_t> (r) >= sizeof (b)) + throw_generic_error (EINVAL); + else + { + b[r] = '\0'; + sp = dir_path (b); + if (sp.relative ()) + sp = md / sp; + } + } + catch (const system_error& e) + { + fail << "unable to read subvolume link " << lp << ": " << e; + } + + // If the resolution fails, then this means there is no current + // machine subvolume (for this bootstrap protocol). In this case we + // clean up our toolchain subvolume (<name>-<toolchain>) and ignore + // this machine. + // + if (sp.empty ()) + { + if (te) + delete_t (); + + break; + } + + // <name>-<toolchain>-<xxx> + // + const dir_path xp (dir_path (md) /= + path::traits::temp_name (mn + '-' + tc_name)); + + // btrfs subvolume snapshot $sp $xp + if (false) + { + if (retry >= 10) + fail << "unable to snapshot subvolume " << sp; + + continue; + } + + // Load the (original) machine manifest. + // + auto mm ( + parse_manifest<machine_manifest> (sp / "manifest", "machine")); + + // If we already have <name>-<toolchain>, see if it needs to be re- + // bootstrapped. Things that render it obsolete: + // + // 1. New machine revision (compare machine ids). + // 2. New toolchain (compare toolchain ids). + // 3. New bbot/libbbot (compare versions). + // + // The last case has a complication: what should we do if we have + // bootstrapped a newer version of bbot? This would mean that we are + // about to be stopped and upgraded (and the upgraded version will + // probably be able to use the result). So we simply ignore this + // machine for this run. + + // Return -1 if older, 0 if the same, and +1 if newer. + // + auto compare_bbot = [] (const bootstrap_manifest& m) -> int + { + auto cmp = [&m] (const string& n, uint64_t v) -> int + { + auto i = m.versions.find (n); + return + i == m.versions.end () || i->second < v + ? -1 + : i->second > v ? 1 : 0; + }; + + // Start from the top assuming a new dependency cannot be added + // without changing the dependent's version. + // + int r; + return + (r = cmp ("bbot", BBOT_VERSION)) != 0 ? r : + (r = cmp ("libbbot", LIBBBOT_VERSION)) != 0 ? r : + (r = cmp ("libbpkg", LIBBPKG_VERSION)) != 0 ? r : + (r = cmp ("libbutl", LIBBUTL_VERSION)) != 0 ? r : 0; + }; + + if (te) + { + auto bmm ( + parse_manifest<bootstrapped_machine_manifest> ( + tp / "manifest", + "bootstrapped machine")); + + if (bmm.machine.id != mm.id) + { + trace << "re-bootstrapping " << tp << ": new machine"; + te = false; + } + + if (bmm.toolchain.id != tc_id) + { + trace << "re-bootstrapping " << tp << ": new toolchain"; + te = false; + } + + if (int i = compare_bbot (bmm.bootstrap)) + { + if (i < 0) + { + trace << "re-bootstrapping " << tp << ": new bbot"; + te = false; + } + else + { + trace << "ignoring " << tp << ": newer bbot"; + // btrfs subvolume snapshot $xp + break; + } + } + + if (!te) + delete_t (); + } + + if (!te) + { + // Use the <name>-<toolchain>-<xxx> snapshot that we have made to + // bootstrap the new machine. Then atomically rename it to + // <name>-<toolchain>. + // + bootstrapped_machine_manifest bmm (bootstrap_machine (xp)); + + try + { + mvdir (xp, tp); + } + catch (const system_error& e) + { + fail << "unable to rename " << xp << " to " << tp; + } + + te = true; + + // Check the boostrapped bbot version as above and ignore this + // machine if it's newer than us. + // + if (int i = compare_bbot (bmm.bootstrap)) + { + assert (i > 0); + trace << "ignoring " << tp << ": newer bbot"; + break; + } + } + else + ;// btrfs subvolume snapshot $xp + + // Add the machine to the list. + // + // In order not to forget to clear new fields, we are instead going to + // create a new instance with just the required fields. + // + r.push_back (machine_manifest (mm.id, mm.name, mm.summary)); + + break; + } + } + } + + return r; +} + +*/ + extern "C" void handle_signal (int sig) { @@ -30,28 +330,36 @@ handle_signal (int sig) } } +// Right arrow followed by newline. +// +const char systemd_indent[] = "\xE2\x86\xB2\n"; + int main (int argc, char* argv[]) try { - // Map to systemd severity prefixes (see sd-daemon(3) for details). Note - // that here we assume we will never have location (like file name which - // would end up being before the prefix). - // - const char indent[] = "\xE2\x86\xB2\n"; // Right arrow followed by newline. + cli::argv_scanner scan (argc, argv, true); + agent_options ops (scan); - trace_indent = - fail.indent_ = - error.indent_ = - warn.indent_ = - info.indent_ = - text.indent_ = indent; + if (ops.systemd_daemon ()) + { + // Map to systemd severity prefixes (see sd-daemon(3) for details). Note + // that here we assume we will never have location (like file name which + // would end up being before the prefix). + // + trace_indent = + fail.indent_ = + error.indent_ = + warn.indent_ = + info.indent_ = + text.indent_ = systemd_indent; - fail.type_ = "<3>"; - error.type_ = "<3>"; - warn.type_ = "<4>"; - info.type_ = "<6>"; - trace_type = "<7>"; + fail.type_ = "<3>"; + error.type_ = "<3>"; + warn.type_ = "<4>"; + info.type_ = "<6>"; + trace_type = "<7>"; + } tracer trace ("main"); @@ -64,16 +372,6 @@ try fail << "unable to ignore broken pipe (SIGPIPE) signal: " << system_error (errno, generic_category ()); // Sanitize. - // Handle SIGHUP and SIGTERM. - // - if (signal (SIGHUP, &handle_signal) == SIG_ERR || - signal (SIGTERM, &handle_signal) == SIG_ERR) - fail << "unable to set signal handler: " - << system_error (errno, generic_category ()); // Sanitize. - - cli::argv_scanner scan (argc, argv, true); - agent_options ops (scan); - // Version. // if (ops.version ()) @@ -99,15 +397,25 @@ try return p.wait () ? 0 : 1; } - if (argc != 2) - fail << "toolchain name excected" << + if (argc != 4) + fail << "toolchain name/id/num excected" << info << "run " << argv[0] << " --help for details"; - string tc (argv[1]); + tc_name = argv[1]; + tc_num = argv[2]; + tc_id = argv[3]; + + // Handle SIGHUP and SIGTERM. + // + if (signal (SIGHUP, &handle_signal) == SIG_ERR || + signal (SIGTERM, &handle_signal) == SIG_ERR) + fail << "unable to set signal handler: " + << system_error (errno, generic_category ()); // Sanitize. - info << "starting bbot agent for toolchain " << tc << - info << ops.cpu () << " CPU(s)" << - info << ops.ram () << " RAM(kB)"; + info << "bbot agent for " << tc_name << '/' << tc_num << + info << "toolchain id " << tc_id << + info << "CPU(s) " << ops.cpu () << + info << "RAM(kB) " << ops.ram (); for (;;) { diff --git a/bbot/bbot-agent@.service b/bbot/bbot-agent@.service index 64e07d2..496692f 100644 --- a/bbot/bbot-agent@.service +++ b/bbot/bbot-agent@.service @@ -4,9 +4,19 @@ After=default.target [Service] Type=simple + Environment=CPU=1 Environment=RAM=1048576 -ExecStart=/build/bbot/%i/bin/bbot-agent --cpu ${CPU} --ram ${RAM} %i +Environment=TOOLCHAIN_ID=123abc +Environment=TOOLCHAIN_NUM=1 + +ExecStart=/build/bbot/%i/bin/bbot-agent --systemd-daemon \ + --cpu ${CPU} \ + --ram ${RAM} \ + %i \ + ${TOOLCHAIN_NUM} \ + ${TOOLCHAIN_ID} + User=build Group=build WorkingDirectory=~ diff --git a/bbot/bootstrap-manifest b/bbot/bootstrap-manifest new file mode 100644 index 0000000..502127b --- /dev/null +++ b/bbot/bootstrap-manifest @@ -0,0 +1,57 @@ +// file : bbot/bootstrap-manifest -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef BBOT_BOOTSTRAP_MANIFEST +#define BBOT_BOOTSTRAP_MANIFEST + +#include <map> + +#include <bbot/types> +#include <bbot/utility> + +#include <bbot/manifest> // machine_manifest + +namespace bbot +{ + // Toolchain manifest. + // + class toolchain_manifest + { + public: + + // Toolchain id (SHAXXX). + // + string id; + }; + + // Bootstrap result manifest. Uploaded by the worker to the agent's TFTP + // server. + // + class bootstrap_manifest + { + public: + + // Map of packages to their (numeric) versions that were used inside the + // bootstrapped machine. Used to make sure bbot agent/worker use the same + // versions. For example: + // + // libbbot-version: 1010100 # 1.1.1 + // bbot-version: 1010200 # 1.1.2 + // + std::map<string, uint64_t> versions; + }; + + // The manifest stored in <name>-<toolchain>/ consists of the machine + // manifest (original), toolchain manifest, and bootstrap result manifest. + // + class bootstrapped_machine_manifest + { + public: + machine_manifest machine; + toolchain_manifest toolchain; + bootstrap_manifest bootstrap; + }; +} + +#endif // BBOT_BOOTSTRAP_MANIFEST diff --git a/doc/manual.cli b/doc/manual.cli index 336e2dd..d5ab992 100644 --- a/doc/manual.cli +++ b/doc/manual.cli @@ -185,8 +185,9 @@ manifests is also sent by \c{bbot} agents to controllers. \li|\n\c{type: <machine-type>}\n - The machine type. Valid values are \c{vm} and \c{container}. Note that this - value is not sent by agents to controllers.| + The machine type. Valid values are \c{kvm} (QEMU/KVM virtual machine) and + \c{nspawn} (\c{systemd-nspawn} container). Note that this value is not sent + by agents to controllers.| \li|\n\c{summary: <string>}\n |