From c8ace1ee0a6cab5fd4ea2f084ea436cfa513637d Mon Sep 17 00:00:00 2001
From: Karen Arutyunov <karen@codesynthesis.com>
Date: Thu, 13 Jul 2017 22:50:15 +0300
Subject: Make use of wildcards in buildfiles

---
 bbot/agent/agent.cli            |  232 ++++++++
 bbot/agent/agent.cxx            | 1248 +++++++++++++++++++++++++++++++++++++++
 bbot/agent/agent.hxx            |   45 ++
 bbot/agent/machine-manifest.cxx |  355 +++++++++++
 bbot/agent/machine-manifest.hxx |  118 ++++
 bbot/agent/machine.cxx          |  474 +++++++++++++++
 bbot/agent/machine.hxx          |   84 +++
 bbot/agent/tftp.cxx             |  137 +++++
 bbot/agent/tftp.hxx             |   47 ++
 9 files changed, 2740 insertions(+)
 create mode 100644 bbot/agent/agent.cli
 create mode 100644 bbot/agent/agent.cxx
 create mode 100644 bbot/agent/agent.hxx
 create mode 100644 bbot/agent/machine-manifest.cxx
 create mode 100644 bbot/agent/machine-manifest.hxx
 create mode 100644 bbot/agent/machine.cxx
 create mode 100644 bbot/agent/machine.hxx
 create mode 100644 bbot/agent/tftp.cxx
 create mode 100644 bbot/agent/tftp.hxx

(limited to 'bbot/agent')
diff --git a/bbot/agent/agent.cli b/bbot/agent/agent.cli
new file mode 100644
index 0000000..a5dbe01
--- /dev/null
+++ b/bbot/agent/agent.cli
@@ -0,0 +1,232 @@
+// file      : bbot/agent.cli
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license   : TBC; see accompanying LICENSE file
+
+include <bbot/common.cli>;
+
+"\section=1"
+"\name=bbot-agent"
+"\summary=build bot agent"
+
+namespace bbot
+{
+  {
+    "<options> <url>",
+
+    "
+    \h|SYNOPSIS|
+
+    \cb{bbot-agent --help}\n
+    \cb{bbot-agent --version}\n
+    \c{\b{bbot-agent} [<options>] <url>...}
+
+    \h|DESCRIPTION|
+
+    \cb{bbot-agent} @@ TODO.
+
+    Note that on termination \cb{bbot-agent} may leave a working machine
+    snapshot behind. It is expected that the caller (normally Build OS
+    monitor) cleans them up before restarting the agent.
+    "
+  }
+
+  class agent_options
+  {
+    "\h|OPTIONS|"
+
+    bool --help {"Print usage information and exit."}
+    bool --version {"Print version and exit."}
+
+    bool --systemd-daemon
+    {
+      "Run as a simple systemd daemon."
+    }
+
+    path --auth-key
+    {
+      "<file>",
+      "Private key for the public key-based agent authentication. If not
+       specified, then the agent will not be able to request tasks from
+       controllers that require authentication.
+
+       The file is expected to contain a single PEM-encoded private key
+       without a password. A suitable key can be generated using the
+       following command:
+
+       \
+       $ openssl genrsa 4096 >key.pem
+       \
+       "
+    }
+
+    path --openssl = "openssl"
+    {
+      "<path>",
+      "The openssl program to be used for crypto operations. You can also
+       specify additional options that should be passed to the openssl program
+       with \cb{--openssl-option}. If the openssl program is not explicitly
+       specified, then \cb{bbot-agent} will use \cb{openssl} by default."
+    }
+
+    strings --openssl-option
+    {
+      "<opt>",
+      "Additional option to be passed to the openssl program (see
+       \cb{--openssl} for details). Repeat this option to specify multiple
+       openssl options."
+    }
+
+    size_t --cpu = 1
+    {
+      "<num>",
+      "Number of CPUs (threads) to use, 1 by default."
+    }
+
+    size_t --ram (1024 * 1024) // 1G
+    {
+      "<num>",
+      "Amount of RAM (in kB) to use, 1G by default."
+    }
+
+    string --toolchain-name = "default"
+    {
+      "<str>",
+      "Toolchain name, \cb{default} by default."
+    }
+
+    uint16_t --toolchain-num = 1
+    {
+      "<num>",
+      "Toolchain number, 1 by default."
+    }
+
+    standard_version --toolchain-ver
+    {
+      "<stdver>",
+      "Toolchain version. If unspecified, then the agent's version will be
+       used (which will be imprecise for snapshot versions)."
+    }
+
+    string --toolchain-id
+    {
+      "<str>",
+      "Toolchain id. If unspecified or empty, then no re-bootstrapping on
+       toolchain changes will be performed (which is primarily useful for
+       testing)."
+    }
+
+    strings --trust
+    {
+      "<fingerprint>",
+      "Trust repository certificate with a SHA256 <fingerprint>."
+    }
+
+    dir_path --machines = "/build/machines/"
+    {
+      "<dir>",
+      "The location of the build machines, \cb{/build/machines/} by default."
+    }
+
+    dir_path --tftp = "/build/tftp/"
+    {
+      "<dir>",
+      "The location of the TFTP server root, \cb{/build/tftp/} by default."
+    }
+
+    uint16_t --tftp-port = 23400
+    {
+      "<num>",
+      "TFTP server port base, 23400 by default. The actual port is calculated
+       by adding the toolchain number \c{--toolchain-num} to this value."
+    }
+
+    size_t --bootstrap-timeout = 900
+    {
+      "<sec>",
+      "Maximum number of seconds to wait for machine bootstrap completion,
+       900 (15 minutes) by default."
+    }
+
+    size_t --bootstrap-retries = 2
+    {
+      "<num>",
+      "Number of time to retry a mis-booted bootstrap, 2 by default."
+    }
+
+    size_t --build-timeout = 1800
+    {
+      "<sec>",
+      "Maximum number of seconds to wait for build completion, 1800 (30
+       minutes) by default."
+    }
+
+    size_t --build-retries = 2
+    {
+      "<num>",
+      "Number of time to retry a mis-booted build, 2 by default."
+    }
+
+    size_t --request-timeout = 300
+    {
+      "<sec>",
+      "Maximum number of seconds to wait for controller request completion,
+       300 (5 minutes) by default."
+    }
+
+    uint16_t --verbose = 1
+    {
+      "<level>",
+      "Set the diagnostics verbosity to <level> between 0 and 6 with level 1
+       being the default."
+    }
+
+    // Testing options.
+    //
+    bool --dump-machines
+    {
+      "Dump the available machines to \cb{stdout}, (re)-bootstrapping any if
+       necessary, and exit."
+    }
+
+    bool --dump-task
+    {
+      "Dump the received build task to \cb{stdout} and exit."
+    }
+
+    bool --dump-result
+    {
+      "Dump the obtained build result to \cb{stdout} and exit."
+    }
+
+    bool --fake-bootstrap
+    {
+      "Fake the machine bootstrap process by creating the expected bootstrapped
+       machine manifest."
+    }
+
+    bool --fake-build
+    {
+      "Fake the package building process by creating the aborted build result."
+    }
+
+    path --fake-machine
+    {
+      "<file>",
+      "Fake the machine enumeration process by reading the machine header
+       manifest from <file> (or \cb{stdin} if <file> is '\cb{-}')."
+    }
+
+    path --fake-request
+    {
+      "<file>",
+      "Fake the task request process by reading the task manifest from <file>
+       (or \cb{stdin} if <file> is '\cb{-}')."
+    }
+  };
+
+  "
+  \h|EXIT STATUS|
+
+  Non-zero exit status is returned in case of an error.
+  "
+}
diff --git a/bbot/agent/agent.cxx b/bbot/agent/agent.cxx
new file mode 100644
index 0000000..ff697db
--- /dev/null
+++ b/bbot/agent/agent.cxx
@@ -0,0 +1,1248 @@
+// file      : bbot/agent/agent.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license   : TBC; see accompanying LICENSE file
+
+#include <bbot/agent/agent.hxx>
+
+#include <pwd.h>    // getpwuid()
+#include <limits.h> // PATH_MAX
+#include <signal.h> // signal()
+#include <stdlib.h> // rand_r()
+#include <unistd.h> // sleep(), realink(), getuid(), fsync()
+
+#include <net/if.h>     // ifreq
+#include <netinet/in.h> // sockaddr_in
+#include <arpa/inet.h>  // inet_ntop()
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+
+#include <chrono>
+#include <iostream>
+
+#include <libbutl/pager.hxx>
+#include <libbutl/sha256.hxx>
+#include <libbutl/openssl.hxx>
+#include <libbutl/filesystem.hxx> // dir_iterator
+
+#include <libbbot/manifest.hxx>
+
+#include <bbot/types.hxx>
+#include <bbot/utility.hxx>
+#include <bbot/diagnostics.hxx>
+
+#include <bbot/bootstrap-manifest.hxx>
+
+#include <bbot/agent/tftp.hxx>
+#include <bbot/agent/machine.hxx>
+#include <bbot/agent/machine-manifest.hxx>
+
+using namespace std;
+using namespace butl;
+using namespace bbot;
+
+namespace bbot
+{
+  agent_options ops;
+
+  const string bs_prot ("1");
+
+  string           tc_name;
+  uint16_t         tc_num;
+  standard_version tc_ver;
+  string           tc_id;
+
+  string hname;
+  uid_t  uid;
+  string uname;
+}
+
+static void
+file_sync (const path& f)
+{
+  auto_fd fd (fdopen (f, fdopen_mode::in));
+  if (fsync (fd.get ()) != 0)
+    throw_system_error (errno);
+}
+
+// The btrfs tool likes to print informational messages, like "Created
+// snapshot such and such". Luckily, it writes them to stdout while proper
+// diagnostics to stderr.
+//
+template <typename... A>
+inline void
+run_btrfs (tracer& t, A&&... a)
+{
+  if (verb >= 4)
+    run_io (t, fdnull (), 2, 2, "btrfs", forward<A> (a)...);
+  else
+    run_io (t, fdnull (), fdnull (), 2, "btrfs", forward<A> (a)...);
+}
+
+template <typename... A>
+inline butl::process_exit::code_type
+btrfs_exit (tracer& t, A&&... a)
+{
+  return verb >= 4
+    ? run_io_exit (t, fdnull (), 2, 2, "btrfs", forward<A> (a)...)
+    : run_io_exit (t, fdnull (), fdnull (), 2, "btrfs", forward<A> (a)...);
+}
+
+// Bootstrap the machine. Return the bootstrapped machine manifest if
+// successful and nullopt otherwise (in which case the machine directory
+// should be cleaned and the machine ignored for now).
+//
+static optional<bootstrapped_machine_manifest>
+bootstrap_machine (const dir_path& md,
+                   const machine_manifest& mm,
+                   optional<bootstrapped_machine_manifest> obmm)
+{
+  tracer trace ("bootstrap_machine", md.string ().c_str ());
+
+  bootstrapped_machine_manifest r {
+    mm,
+    toolchain_manifest {tc_id.empty () ? "bogus" : tc_id},
+    bootstrap_manifest {
+      bootstrap_manifest::versions_type {
+        {"bbot",    standard_version (BBOT_VERSION_STR)},
+        {"libbbot", standard_version (LIBBBOT_VERSION_STR)},
+        {"libbpkg", standard_version (LIBBPKG_VERSION_STR)},
+        {"libbutl", standard_version (LIBBUTL_VERSION_STR)}
+      }
+    }
+  };
+
+  if (ops.fake_bootstrap ())
+  {
+    r.machine.mac = "de:ad:be:ef:de:ad";
+  }
+  else
+  try
+  {
+    string br ("br1"); // Using private bridge for now.
+
+    // Start the TFTP server (server chroot is --tftp). Map:
+    //
+    // GET requests to .../toolchains/<name>/*
+    // PUT requests to .../bootstrap/<name>/*
+    //
+    auto_rmdir arm ((dir_path (ops.tftp ()) /= "bootstrap") /= tc_name);
+    try_mkdir_p (arm.path ());
+
+    // Bootstrap result manifest.
+    //
+    path mf (arm.path () / "manifest");
+    try_rmfile (mf);
+
+    // Note that unlike build, here we use the same VM snapshot for retries,
+    // which is not ideal.
+    //
+    for (size_t retry (0);; ++retry)
+    {
+      tftp_server tftpd ("Gr  ^/?(.+)$  /toolchains/" + tc_name + "/\\1\n" +
+                         "Pr  ^/?(.+)$  /bootstrap/" + tc_name + "/\\1\n",
+                         ops.tftp_port () + tc_num);
+
+      l3 ([&]{trace << "tftp server on port " << tftpd.port ();});
+
+      // Start the machine.
+      //
+      unique_ptr<machine> m (
+        start_machine (md,
+                       mm,
+                       obmm ? obmm->machine.mac : nullopt,
+                       br,
+                       tftpd.port ()));
+
+      {
+        // If we are terminating with an exception then force the machine down.
+        // Failed that, the machine's destructor will block waiting for its
+        // completion.
+        //
+        auto mg (
+          make_exception_guard (
+            [&m, &md] ()
+            {
+              info << "trying to force machine " << md << " down";
+              try {m->forcedown ();} catch (const failed&) {}
+            }));
+
+        // What happens if the bootstrap process hangs? The simple thing would
+        // be to force the machine down after some timeout and then fail. But
+        // that won't be very helpful for investigating the cause. So instead
+        // the plan is to suspend it after some timeout, issue diagnostics
+        // (without failing and which Build OS monitor will relay to the
+        // admin), and wait for the external intervention.
+        //
+        auto soft_fail = [&md, &m] (const char* msg)
+        {
+          {
+            diag_record dr (error);
+            dr << msg << " for machine " << md << ", suspending";
+            m->print_info (dr);
+          }
+          m->suspend ();
+          m->wait ();
+          info << "resuming after machine suspension";
+          return nullopt;
+        };
+
+        // The first request should be the toolchain download. Wait for up to
+        // 5 minutes for that to arrive. In a sense we use it as an indication
+        // that the machine has booted and the bootstrap process has started.
+        // Why wait so long you may wonder? Well, we may be using a new MAC
+        // address and operating systems like Windows may need to digest that.
+        //
+        size_t to;
+        const size_t startup_to   (5 * 60);
+        const size_t bootstrap_to (ops.bootstrap_timeout ());
+        const size_t shutdown_to  (5 * 60);
+
+        // This can mean two things: machine mis-configuration or what we
+        // euphemistically call a "mis-boot": the VM failed to boot for some
+        // unknown/random reason. Mac OS is particularly know for suffering
+        // from this. So the strategy is to retry it a couple of times and
+        // then suspend for investigation.
+        //
+        if (!tftpd.serve ((to = startup_to)))
+        {
+          if (retry > ops.bootstrap_retries ())
+            return soft_fail ("bootstrap startup timeout");
+
+          warn << "machine " << mm.name << " appears to have "
+               << "mis-booted, retrying";
+
+          try {m->forcedown (false);} catch (const failed&) {}
+          continue;
+        }
+
+        l3 ([&]{trace << "completed startup in " << startup_to - to << "s";});
+
+        // Next the bootstrap process may download additional toolchain
+        // archives, build things, and then upload the result manifest. So on
+        // our side we serve TFTP requests while periodically checking for the
+        // manifest file. To workaround some obscure filesystem races (the
+        // file's mtime/size is updated several seconds later; maybe tmpfs
+        // issue?), we periodically re-check.
+        //
+        for (to = bootstrap_to; to != 0; tftpd.serve (to, 2))
+        {
+          if (file_exists (mf))
+          {
+            file_sync (mf);
+            if (!file_empty (mf))
+              break;
+          }
+        }
+
+        if (to == 0)
+          return soft_fail ("bootstrap timeout");
+
+        l3 ([&]{trace << "completed bootstrap in " << bootstrap_to - to << "s";});
+
+        // Shut the machine down cleanly.
+        //
+        if (!m->shutdown ((to = shutdown_to)))
+          return soft_fail ("bootstrap shutdown timeout");
+
+        l3 ([&]{trace << "completed shutdown in " << shutdown_to - to << "s";});
+      }
+
+      // Parse the result manifest.
+      //
+      r.bootstrap = parse_manifest<bootstrap_manifest> (mf, "bootstrap");
+
+      r.machine.mac = m->mac; // Save the MAC address.
+
+      break;
+    }
+  }
+  catch (const system_error& e)
+  {
+    fail << "bootstrap error: " << e;
+  }
+
+  serialize_manifest (r, md / "manifest", "bootstrapped machine");
+  return r;
+}
+
+// Return available machines and their directories as a parallel array.
+//
+static pair<bootstrapped_machine_manifests, dir_paths>
+enumerate_machines (const dir_path& machines)
+try
+{
+  tracer trace ("enumerate_machines", machines.string ().c_str ());
+
+  bootstrapped_machine_manifests rm;
+  dir_paths rd;
+
+  if (ops.fake_machine_specified ())
+  {
+    auto mh (
+      parse_manifest<machine_header_manifest> (
+        ops.fake_machine (), "machine header"));
+
+    rm.push_back (
+      bootstrapped_machine_manifest {
+        machine_manifest {
+          mh.id,
+          mh.name,
+          mh.summary,
+          machine_type::kvm,
+          string ("de:ad:be:ef:de:ad"),
+          nullopt},
+        toolchain_manifest {tc_id},
+        bootstrap_manifest {}
+      });
+
+    rd.push_back (dir_path (ops.machines ()) /= mh.name); // For diagnostics.
+
+    return make_pair (move (rm), move (rd));
+  }
+
+  // The first level are machine volumes.
+  //
+  for (const dir_entry& ve: dir_iterator (machines))
+  {
+    const string vn (ve.path ().string ());
+
+    // Ignore hidden directories.
+    //
+    if (ve.type () != entry_type::directory || vn[0] == '.')
+      continue;
+
+    const dir_path vd (dir_path (machines) /= vn);
+
+    // Inside we have machines.
+    //
+    try
+    {
+      for (const dir_entry& me: dir_iterator (vd))
+      {
+        const string mn (me.path ().string ());
+
+        if (me.type () != entry_type::directory || mn[0] == '.')
+          continue;
+
+        const dir_path md (dir_path (vd) /= mn);
+
+        // Our endgoal here is to obtain a bootstrapped snapshot of this
+        // machine while watching out for potential race conditions (machines
+        // being added/upgraded/removed; see the manual for details).
+        //
+        // So here is our overall plan:
+        //
+        // 1. Resolve current subvolume link for our bootstrap protocol.
+        //
+        // 2. If there is no link, cleanup and ignore this machine.
+        //
+        // 3. Try to create a snapshot of current subvolume (this operation is
+        //    atomic). If failed (e.g., someone changed the link and removed
+        //    the subvolume in the meantime), retry from #1.
+        //
+        // 4. Compare the snapshot to the already bootstrapped version (if
+        //    any) and see if we need to re-bootstrap. If so, use the snapshot
+        //    as a starting point. Rename to bootstrapped at the end (atomic).
+        //
+        dir_path lp (dir_path (md) /= (mn + '-' + bs_prot)); // -<P>
+        dir_path tp (dir_path (md) /= (mn + '-' + tc_name)); // -<toolchain>
+        bool te (dir_exists (tp));
+
+        auto delete_t = [&tp, &trace] ()
+        {
+          run_btrfs (trace, "property", "set", "-ts", tp, "ro", "false");
+          run_btrfs (trace, "subvolume", "delete", tp);
+        };
+
+        for (size_t retry (0);; ++retry)
+        {
+          if (retry != 0)
+            sleep (1);
+
+          // Resolve the link to subvolume path.
+          //
+          dir_path sp; // <name>-<P>.<R>
+          try
+          {
+            char b [PATH_MAX + 1];
+            ssize_t r (readlink (lp.string ().c_str (), b, sizeof (b)));
+
+            if (r == -1)
+            {
+              if (errno != ENOENT)
+                throw_generic_error (errno);
+            }
+            else if (static_cast<size_t> (r) >= sizeof (b))
+              throw_generic_error (EINVAL);
+            else
+            {
+              b[r] = '\0';
+              sp = dir_path (b);
+              if (sp.relative ())
+                sp = md / sp;
+            }
+          }
+          catch (const system_error& e)
+          {
+            fail << "unable to read subvolume link " << lp << ": " << e;
+          }
+
+          // If the resolution fails, then this means there is no current
+          // machine subvolume (for this bootstrap protocol). In this case we
+          // clean up our toolchain subvolume (<name>-<toolchain>) and ignore
+          // this machine.
+          //
+          if (sp.empty ())
+          {
+            if (te)
+              delete_t ();
+
+            l3 ([&]{trace << "skipping " << md << ": no subvolume link";});
+            break;
+          }
+
+          // <name>-<toolchain>-<xxx>
+          //
+          const dir_path xp (
+            dir_path (md) /= path::traits::temp_name (mn + '-' + tc_name));
+
+          if (btrfs_exit (trace, "subvolume", "snapshot", sp, xp) != 0)
+          {
+            if (retry >= 10)
+              fail << "unable to snapshot subvolume " << sp;
+
+            continue;
+          }
+
+          // Load the (original) machine manifest.
+          //
+          auto mm (
+            parse_manifest<machine_manifest> (sp / "manifest", "machine"));
+
+          // If we already have <name>-<toolchain>, see if it needs to be re-
+          // bootstrapped. Things that render it obsolete:
+          //
+          // 1. New machine revision  (compare machine ids).
+          // 2. New toolchain         (compare toolchain ids).
+          // 3. New bbot/libbbot      (compare versions).
+          //
+          // The last case has a complication: what should we do if we have
+          // bootstrapped a newer version of bbot? This would mean that we are
+          // about to be stopped and upgraded (and the upgraded version will
+          // probably be able to use the result). So we simply ignore this
+          // machine for this run.
+
+          // Return -1 if older, 0 if the same, and +1 if newer.
+          //
+          auto compare_bbot = [] (const bootstrap_manifest& m) -> int
+          {
+            auto cmp = [&m] (const string& n, const char* v) -> int
+            {
+              standard_version sv (v);
+              auto i = m.versions.find (n);
+
+              return (i == m.versions.end () || i->second < sv
+                      ? -1
+                      : i->second > sv ? 1 : 0);
+            };
+
+            // Start from the top assuming a new dependency cannot be added
+            // without changing the dependent's version.
+            //
+            int r;
+            return
+              (r = cmp ("bbot",       BBOT_VERSION_STR)) != 0 ? r :
+              (r = cmp ("libbbot", LIBBBOT_VERSION_STR)) != 0 ? r :
+              (r = cmp ("libbpkg", LIBBPKG_VERSION_STR)) != 0 ? r :
+              (r = cmp ("libbutl", LIBBUTL_VERSION_STR)) != 0 ? r : 0;
+          };
+
+          optional<bootstrapped_machine_manifest> bmm;
+          if (te)
+          {
+            bmm = parse_manifest<bootstrapped_machine_manifest> (
+              tp / "manifest", "bootstrapped machine");
+
+            if (bmm->machine.id != mm.id)
+            {
+              l3 ([&]{trace << "re-bootstrapping " << tp << ": new machine";});
+              te = false;
+            }
+
+            if (!tc_id.empty () && bmm->toolchain.id != tc_id)
+            {
+              l3 ([&]{trace << "re-bootstrapping " << tp << ": new toolchain";});
+              te = false;
+            }
+
+            if (int i = compare_bbot (bmm->bootstrap))
+            {
+              if (i < 0)
+              {
+                l3 ([&]{trace << "re-bootstrapping " << tp << ": new bbot";});
+                te = false;
+              }
+              else
+              {
+                l3 ([&]{trace << "ignoring " << tp << ": old bbot";});
+                run_btrfs (trace, "subvolume", "delete", xp);
+                break;
+              }
+            }
+
+            if (!te)
+              delete_t ();
+          }
+          else
+            l3 ([&]{trace << "bootstrapping " << tp;});
+
+          if (!te)
+          {
+            // Use the <name>-<toolchain>-<xxx> snapshot that we have made to
+            // bootstrap the new machine. Then atomically rename it to
+            // <name>-<toolchain>.
+            //
+            bmm = bootstrap_machine (xp, mm, move (bmm));
+
+            if (!bmm)
+            {
+              l3 ([&]{trace << "ignoring " << tp << ": failed to bootstrap";});
+              run_btrfs (trace, "subvolume", "delete", xp);
+              break;
+            }
+
+            try
+            {
+              mvdir (xp, tp);
+            }
+            catch (const system_error& e)
+            {
+              fail << "unable to rename " << xp << " to " << tp;
+            }
+
+            l2 ([&]{trace << "bootstrapped " << bmm->machine.name;});
+
+            // Check the bootstrapped bbot version as above and ignore this
+            // machine if it's newer than us.
+            //
+            if (int i = compare_bbot (bmm->bootstrap))
+            {
+              if (i > 0)
+              {
+                l3 ([&]{trace << "ignoring " << tp << ": old bbot";});
+                break;
+              }
+              else
+                warn << "bootstrapped " << tp << " bbot worker is older "
+                     << "than agent; assuming test setup";
+            }
+          }
+          else
+            run_btrfs (trace, "subvolume", "delete", xp);
+
+          // Add the machine to the lists.
+          //
+          rm.push_back (move (*bmm));
+          rd.push_back (move (tp));
+
+          break;
+        }
+      }
+    }
+    catch (const system_error& e)
+    {
+      fail << "unable to iterate over " << vd << ": " << e << endf;
+    }
+  }
+
+  return make_pair (move (rm), move (rd));
+}
+catch (const system_error& e)
+{
+  fail << "unable to iterate over " << machines << ": " << e << endf;
+}
+
+static result_manifest
+perform_task (const dir_path& md,
+              const bootstrapped_machine_manifest& mm,
+              const task_manifest& tm)
+try
+{
+  tracer trace ("perform_task", md.string ().c_str ());
+
+  result_manifest r {
+    tm.name,
+    tm.version,
+    result_status::abort,
+    operation_results {}};
+
+  if (ops.fake_build ())
+    return r;
+
+  // The overall plan is as follows:
+  //
+  // 1. Snapshot the (bootstrapped) machine.
+  //
+  // 2. Save the task manifest to the TFTP directory (to be accessed by the
+  //    worker).
+  //
+  // 3. Start the TFTP server and the machine.
+  //
+  // 4. Serve TFTP requests while watching out for the result manifest.
+  //
+  // 5. Clean up (force the machine down and delete the snapshot).
+  //
+
+  // TFTP server mapping (server chroot is --tftp):
+  //
+  // GET requests to .../build/<name>/get/*
+  // PUT requests to .../build/<name>/put/*
+  //
+  auto_rmdir arm ((dir_path (ops.tftp ()) /= "build") /= tc_name);
+
+  dir_path gd (dir_path (arm.path ()) /= "get");
+  dir_path pd (dir_path (arm.path ()) /= "put");
+
+  try_mkdir_p (gd);
+  try_mkdir_p (pd);
+
+  path tf (gd / "manifest"); // Task manifest file.
+  path rf (pd / "manifest"); // Result manifest file.
+
+  serialize_manifest (tm, tf, "task");
+
+  if (ops.fake_machine_specified ())
+  {
+    // Simply wait for the file to appear.
+    //
+    for (size_t i (0); !file_exists (rf); sleep (1))
+      if (i++ % 10 == 0)
+        l3 ([&]{trace << "waiting for result manifest";});
+
+    r = parse_manifest<result_manifest> (rf, "result");
+  }
+  else
+  {
+    try_rmfile (rf);
+
+    // <name>-<toolchain>-<xxx>
+    //
+    const dir_path xp (
+      md.directory () /= path::traits::temp_name (md.leaf ().string ()));
+
+    string br ("br1"); // Using private bridge for now.
+
+    for (size_t retry (0);; ++retry)
+    {
+      if (retry != 0)
+        run_btrfs (trace, "subvolume", "delete", xp);
+
+      run_btrfs (trace, "subvolume", "snapshot", md, xp);
+
+      // Start the TFTP server.
+      //
+      tftp_server tftpd ("Gr  ^/?(.+)$  /build/" + tc_name + "/get/\\1\n" +
+                         "Pr  ^/?(.+)$  /build/" + tc_name + "/put/\\1\n",
+                         ops.tftp_port () + tc_num);
+
+      l3 ([&]{trace << "tftp server on port " << tftpd.port ();});
+
+      // Start the machine.
+      //
+      unique_ptr<machine> m (
+        start_machine (xp,
+                       mm.machine,
+                       mm.machine.mac,
+                       br,
+                       tftpd.port ()));
+
+      // Note: the machine handling logic is similar to bootstrap.
+      //
+      {
+        auto mg (
+          make_exception_guard (
+            [&m, &xp] ()
+            {
+              info << "trying to force machine " << xp << " down";
+              try {m->forcedown ();} catch (const failed&) {}
+            }));
+
+        auto soft_fail = [&xp, &m, &r] (const char* msg)
+        {
+          {
+            diag_record dr (error);
+            dr << msg << " for machine " << xp << ", suspending";
+            m->print_info (dr);
+          }
+          m->suspend ();
+          m->wait ();
+          info << "resuming after machine suspension";
+          return r;
+        };
+
+        // The first request should be the task manifest download. Wait for up
+        // to 60 seconds for that to arrive. In a sense we use it as an
+        // indication that the machine has booted and the worker process has
+        // started.
+        //
+        size_t to;
+        const size_t startup_to (60);
+        const size_t build_to   (ops.build_timeout ());
+
+        if (!tftpd.serve ((to = startup_to)))
+        {
+          if (retry > ops.build_retries ())
+            return soft_fail ("build startup timeout");
+
+          warn << "machine " << mm.machine.name << " appears to have "
+               << "mis-booted, retrying";
+
+          try {m->forcedown (false);} catch (const failed&) {}
+          continue;
+        }
+
+        l3 ([&]{trace << "completed startup in " << startup_to - to << "s";});
+
+        // Next the worker builds things and then uploads the result manifest.
+        // So on our side we serve TFTP requests while checking for the
+        // manifest file. To workaround some obscure filesystem races (the
+        // file's mtime/size is updated several seconds later; maybe tmpfs
+        // issue?), we periodically re-check.
+        //
+        for (to = build_to; to != 0; tftpd.serve (to, 2))
+        {
+          if (file_exists (rf))
+          {
+            file_sync (rf);
+            if (!file_empty (rf))
+              break;
+          }
+        }
+
+        if (to == 0)
+          return soft_fail ("build timeout");
+
+        l3 ([&]{trace << "completed build in " << build_to - to << "s";});
+
+        // Parse the result manifest.
+        //
+        try
+        {
+          r = parse_manifest<result_manifest> (rf, "result", false);
+        }
+        catch (const failed&)
+        {
+          r.status = result_status::abnormal; // Soft-fail below.
+        }
+
+        if (r.status == result_status::abnormal)
+        {
+          // If the build terminated abnormally, suspend the machine for
+          // investigation.
+          //
+          return soft_fail ("build terminated abnormally");
+        }
+        else
+        {
+          // Force the machine down (there is no need wasting time on clean
+          // shutdown since the next step is to drop the snapshot). Also fail
+          // softly if things go badly.
+          //
+          try {m->forcedown (false);} catch (const failed&) {}
+        }
+      }
+
+      run_btrfs (trace, "subvolume", "delete", xp);
+      break;
+    }
+  }
+
+  // Update package name/version if the returned value as "unknown".
+  //
+  if (r.version == bpkg::version ("0"))
+  {
+    assert (r.status == result_status::abnormal);
+
+    r.name = tm.name;
+    r.version = tm.version;
+  }
+
+  return r;
+}
+catch (const system_error& e)
+{
+  fail << "build error: " << e << endf;
+}
+
+extern "C" void
+handle_signal (int sig)
+{
+  switch (sig)
+  {
+  case SIGHUP:  exit (3); // Unimplemented feature.
+  case SIGTERM: exit (0);
+  default:      assert (false);
+  }
+}
+
+int
+main (int argc, char* argv[])
+try
+{
+  cli::argv_scanner scan (argc, argv, true);
+  ops.parse (scan);
+
+  verb = ops.verbose ();
+
+  if (ops.systemd_daemon ())
+    systemd_diagnostics (true); // With critical errors.
+
+  tracer trace ("main");
+
+  uid = getuid ();
+  uname = getpwuid (uid)->pw_name;
+
+  {
+    char buf[HOST_NAME_MAX + 1];
+
+    if (gethostname (buf, sizeof (buf)) == -1)
+      fail << "unable to obtain hostname: "
+           << system_error (errno, generic_category ()); // Sanitize.
+
+    hname = buf;
+  }
+
+  // On POSIX ignore SIGPIPE which is signaled to a pipe-writing process if
+  // the pipe reading end is closed. Note that by default this signal
+  // terminates a process. Also note that there is no way to disable this
+  // behavior on a file descriptor basis or for the write() function call.
+  //
+  if (signal (SIGPIPE, SIG_IGN) == SIG_ERR)
+    fail << "unable to ignore broken pipe (SIGPIPE) signal: "
+         << system_error (errno, generic_category ()); // Sanitize.
+
+  // Version.
+  //
+  if (ops.version ())
+  {
+    cout << "bbot-agent " << BBOT_VERSION_ID << endl
+         << "libbbot " << LIBBBOT_VERSION_ID << endl
+         << "libbpkg " << LIBBBOT_VERSION_ID << endl
+         << "libbutl " << LIBBUTL_VERSION_ID << endl
+         << "Copyright (c) 2014-2017 Code Synthesis Ltd" << endl
+         << "TBC; All rights reserved" << endl;
+
+    return 0;
+  }
+
+  // Help.
+  //
+  if (ops.help ())
+  {
+    pager p ("bbot-agent help", false);
+    print_bbot_agent_usage (p.stream ());
+
+    // If the pager failed, assume it has issued some diagnostics.
+    //
+    return p.wait () ? 0 : 1;
+  }
+
+  tc_name = ops.toolchain_name ();
+  tc_num  = ops.toolchain_num ();
+  tc_ver  = (ops.toolchain_ver_specified ()
+             ? ops.toolchain_ver ()
+             : standard_version (BBOT_VERSION_STR));
+  tc_id   = ops.toolchain_id ();
+
+
+  // Controller URLs.
+  //
+  if (argc < 2 &&
+      !ops.dump_machines () &&
+      !ops.fake_request_specified ())
+  {
+    fail << "controller url expected" <<
+      info << "run " << argv[0] << " --help for details";
+  }
+
+  strings controllers;
+
+  for (int i (1); i != argc; ++i)
+    controllers.push_back (argv[i]);
+
+  // Handle SIGHUP and SIGTERM.
+  //
+  if (signal (SIGHUP,  &handle_signal) == SIG_ERR ||
+      signal (SIGTERM, &handle_signal) == SIG_ERR)
+    fail << "unable to set signal handler: "
+         << system_error (errno, generic_category ()); // Sanitize.
+
+  optional<string> fingerprint;
+
+  if (ops.auth_key_specified ())
+  try
+  {
+    // Note that the process always prints to STDERR, so we redirect it to the
+    // null device. We also check for the key file existence to print more
+    // meaningful error message if that's not the case.
+    //
+    if (!file_exists (ops.auth_key ()))
+      throw_generic_error (ENOENT);
+
+    openssl os (trace,
+                ops.auth_key (), path ("-"), fdnull (),
+                ops.openssl (), "rsa",
+                ops.openssl_option (), "-pubout", "-outform", "DER");
+
+    vector<char> k (os.in.read_binary ());
+    os.in.close ();
+
+    if (!os.wait ())
+      throw_generic_error (EIO);
+
+    fingerprint = sha256 (k.data (), k.size ()).string ();
+  }
+  catch (const system_error& e)
+  {
+    fail << "unable to obtain authentication public key: " << e;
+  }
+
+  if (ops.systemd_daemon ())
+  {
+    diag_record dr;
+
+    dr << info << "bbot agent " << BBOT_VERSION_ID;
+
+    if (fingerprint)
+      dr << info << "auth key fp    " << *fingerprint;
+
+    dr <<
+      info << "toolchain name " << tc_name <<
+      info << "toolchain num  " << tc_num <<
+      info << "toolchain ver  " << tc_ver.string () <<
+      info << "toolchain id   " << tc_id <<
+      info << "CPU(s)         " << ops.cpu () <<
+      info << "RAM(kB)        " << ops.ram ();
+
+    for (const string& u: controllers)
+      dr << info << "controller url " << u;
+  }
+
+  // The work loop. The steps we go through are:
+  //
+  // 1. Enumerate the available machines, (re-)bootstrapping any if necessary.
+  //
+  // 2. Poll controller(s) for build tasks.
+  //
+  // 3. If no build tasks are available, go to #1 (after sleeping a bit).
+  //
+  // 4. If a build task is returned, do it, upload the result, and go to #1
+  //    (immediately).
+  //
+  for (bool sleep (false);; ::sleep (sleep ? 60 : 0), sleep = false)
+  {
+    // Enumerate the machines.
+    //
+    auto mp (enumerate_machines (ops.machines ()));
+    bootstrapped_machine_manifests& ms (mp.first);
+    dir_paths& ds (mp.second);
+
+    // Prepare task request.
+    //
+    task_request_manifest tq {
+      hname,
+      tc_name,
+      tc_ver,
+      fingerprint,
+      machine_header_manifests {}
+    };
+
+    for (const bootstrapped_machine_manifest& m: ms)
+      tq.machines.emplace_back (m.machine.id,
+                                m.machine.name,
+                                m.machine.summary);
+
+    if (ops.dump_machines ())
+    {
+      for (const machine_header_manifest& m: tq.machines)
+        serialize_manifest (m, cout, "stdout", "machine");
+
+      return 0;
+    }
+
+    if (tq.machines.empty ())
+    {
+      warn << "no build machines for toolchain " << tc_name;
+      sleep = true;
+      continue;
+    }
+
+    // Send task requests.
+    //
+    //
+    string url;
+    task_response_manifest tr;
+
+    if (ops.fake_request_specified ())
+    {
+      auto t (parse_manifest<task_manifest> (ops.fake_request (), "task"));
+
+      tr = task_response_manifest {
+        "fake-session", // Dummy session.
+        nullopt,        // No challenge.
+        url,            // Empty result URL.
+        move (t)};
+
+      url = "http://example.org";
+    }
+    else
+    {
+      for (const string& u: controllers)
+      {
+        try
+        {
+          http_curl c (trace,
+                       path ("-"),
+                       path ("-"),
+                       curl::post,
+                       u,
+                       "--header", "Content-Type: text/manifest",
+                       "--max-time", ops.request_timeout ());
+
+          // This is tricky/hairy: we may fail hard parsing the output before
+          // seeing that curl exited with an error and failing softly.
+          //
+          bool f (false);
+
+          try
+          {
+            serialize_manifest (tq, c.out, u, "task request", false);
+          }
+          catch (const failed&) {f = true;}
+
+          c.out.close ();
+
+          if (!f)
+          try
+          {
+            tr = parse_manifest<task_response_manifest> (
+              c.in, u, "task response", false);
+          }
+          catch (const failed&) {f = true;}
+
+          c.in.close ();
+
+          if (!c.wait () || f)
+            throw_generic_error (EIO);
+        }
+        catch (const system_error& e)
+        {
+          error << "unable to request task from " << u << ": " << e;
+          continue;
+        }
+
+        if (tr.challenge && !fingerprint) // Controller misbehaves.
+        {
+          error << "unexpected challenge from " << u << ": " << *tr.challenge;
+          continue;
+        }
+
+        if (!tr.session.empty ()) // Got a task.
+        {
+          url = u;
+
+          task_manifest& t (*tr.task);
+          l2 ([&]{trace << "task for " << t.name << '/' << t.version << " "
+                        << "on " << t.machine << " "
+                        << "from " << url;});
+          break;
+        }
+      }
+    }
+
+    if (tr.session.empty ()) // No task from any of the controllers.
+    {
+      l2 ([&]{trace << "no tasks from any controllers, sleeping";});
+      sleep = true;
+      continue;
+    }
+
+    // We have a build task.
+    //
+    // First find the index of the machine we were asked to use (and also
+    // verify it is one of those we sent).
+    //
+    size_t i (0);
+    for (const machine_header_manifest& m: tq.machines)
+    {
+      if (m.name == tr.task->machine)
+        break;
+
+      ++i;
+    }
+
+    if (i == ms.size ())
+    {
+      error << "task from " << url << " for unknown machine "
+            << tr.task->machine;
+
+      if (ops.dump_task ())
+        return 0;
+
+      continue;
+    }
+
+    task_manifest& t (*tr.task);
+
+    if (ops.dump_task ())
+    {
+      serialize_manifest (t, cout, "stdout", "task");
+      return 0;
+    }
+
+    // If we have our own repository certificate fingerprints, then use them
+    // to replace what we have received from the controller.
+    //
+    if (!ops.trust ().empty ())
+      t.trust = ops.trust ();
+
+    const dir_path& d (ds[i]); // The -<toolchain> directory.
+    const bootstrapped_machine_manifest& m (ms[i]);
+
+    result_manifest r (perform_task (d, m, t));
+
+    if (ops.dump_result ())
+    {
+      serialize_manifest (r, cout, "stdout", "result");
+      return 0;
+    }
+
+    // Prepare answer to the private key challenge.
+    //
+    optional<vector<char>> challenge;
+
+    if (tr.challenge)
+    try
+    {
+      assert (ops.auth_key_specified ());
+
+      openssl os (trace,
+                  fdstream_mode::text, path ("-"), 2,
+                  ops.openssl (), "rsautl",
+                  ops.openssl_option (), "-sign", "-inkey", ops.auth_key ());
+
+      os.out << *tr.challenge;
+      os.out.close ();
+
+      challenge = os.in.read_binary ();
+      os.in.close ();
+
+      if (!os.wait ())
+        throw_generic_error (EIO);
+    }
+    catch (const system_error& e)
+    {
+      // The task response challenge is valid (verified by manifest parser),
+      // so there is something wrong with setup, and so the failure is fatal.
+      //
+      fail << "unable to sign task response challenge: " << e;
+    }
+
+    // Upload the result.
+    //
+    result_request_manifest rq {tr.session, move (challenge), move (r)};
+    {
+      const string& u (*tr.result_url);
+
+      try
+      {
+        http_curl c (trace,
+                     path ("-"),
+                     nullfd,     // Not expecting any data in response.
+                     curl::post,
+                     u,
+                     "--header", "Content-Type: text/manifest",
+                     "--max-time", ops.request_timeout ());
+
+        // This is tricky/hairy: we may fail hard writing the input before
+        // seeing that curl exited with an error and failing softly.
+        //
+        bool f (false);
+
+        try
+        {
+          serialize_manifest (rq, c.out, u, "task request");
+        }
+        catch (const failed&) {f = true;}
+
+        c.out.close ();
+
+        if (!c.wait () || f)
+          throw_generic_error (EIO);
+      }
+      catch (const system_error& e)
+      {
+        error << "unable to upload result to " << u << ": " << e;
+        continue;
+      }
+    }
+
+    l2 ([&]{trace << "built " << t.name << '/' << t.version << " "
+                  << "on " << t.machine << " "
+                  << "for " << url;});
+  }
+}
+catch (const failed&)
+{
+  return 1; // Diagnostics has already been issued.
+}
+catch (const cli::exception& e)
+{
+  error << e;
+  return 1;
+}
+
+namespace bbot
+{
+  static unsigned int rand_seed; // Seed for rand_r();
+
+  size_t
+  genrand ()
+  {
+    if (rand_seed == 0)
+      rand_seed = static_cast<unsigned int> (
+        chrono::system_clock::now ().time_since_epoch ().count ());
+
+    return static_cast<size_t> (rand_r (&rand_seed));
+  }
+
+  // Note: Linux-specific implementation.
+  //
+  string
+  iface_addr (const string& i)
+  {
+    if (i.size () >= IFNAMSIZ)
+      throw invalid_argument ("interface nama too long");
+
+    auto_fd fd (socket (AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0));
+
+    if (fd.get () == -1)
+      throw_system_error (errno);
+
+    ifreq ifr;
+    ifr.ifr_addr.sa_family = AF_INET;
+    strcpy (ifr.ifr_name, i.c_str ());
+
+    if (ioctl (fd.get (), SIOCGIFADDR, &ifr) == -1)
+      throw_system_error (errno);
+
+    char buf[3 * 4 + 3 + 1]; // IPv4 address.
+    if (inet_ntop (AF_INET,
+                   &reinterpret_cast<sockaddr_in*> (&ifr.ifr_addr)->sin_addr,
+                   buf,
+                   sizeof (buf)) == nullptr)
+      throw_system_error (errno);
+
+    return buf;
+  }
+}
diff --git a/bbot/agent/agent.hxx b/bbot/agent/agent.hxx
new file mode 100644
index 0000000..96876bc
--- /dev/null
+++ b/bbot/agent/agent.hxx
@@ -0,0 +1,45 @@
+// file      : bbot/agent/agent.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license   : TBC; see accompanying LICENSE file
+
+#ifndef BBOT_AGENT_AGENT_HXX
+#define BBOT_AGENT_AGENT_HXX
+
+#include <sys/types.h> // uid_t
+
+#include <bbot/types.hxx>
+#include <bbot/utility.hxx>
+
+#include <bbot/agent/agent-options.hxx>
+
+namespace bbot
+{
+  extern agent_options ops;
+
+  extern const string bs_prot; // Bootstrap protocol version.
+
+  extern string           tc_name; // Toolchain name.
+  extern uint16_t         tc_num;  // Toolchain number.
+  extern standard_version tc_ver;  // Toolchain version.
+  extern string           tc_id;   // Toolchain id.
+
+  extern string hname; // Our host name.
+  extern uid_t  uid;   // Our effective user id.
+  extern string uname; // Our effective user name.
+
+  // Random number generator (currently not MT-safe and limited to RAND_MAX).
+  //
+  size_t
+  genrand ();
+
+  template <typename T>
+  inline T
+  genrand () {return static_cast<T> (genrand ());}
+
+  // Return the IPv4 address of an interface.
+  //
+  string
+  iface_addr (const string&);
+}
+
+#endif // BBOT_AGENT_AGENT_HXX
diff --git a/bbot/agent/machine-manifest.cxx b/bbot/agent/machine-manifest.cxx
new file mode 100644
index 0000000..3312d1b
--- /dev/null
+++ b/bbot/agent/machine-manifest.cxx
@@ -0,0 +1,355 @@
+// file      : bbot/agent/machine-manifest.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license   : TBC; see accompanying LICENSE file
+
+#include <bbot/agent/machine-manifest.hxx>
+
+#include <sstream>
+
+#include <libbutl/tab-parser.hxx>
+#include <libbutl/string-parser.hxx>
+#include <libbutl/manifest-parser.hxx>
+#include <libbutl/manifest-serializer.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace bbot
+{
+  using parser = manifest_parser;
+  using parsing = manifest_parsing;
+  using serializer = manifest_serializer;
+  using serialization = manifest_serialization;
+  using name_value = manifest_name_value;
+
+  // machine_type
+  //
+  string
+  to_string (machine_type t)
+  {
+    switch (t)
+    {
+    case machine_type::kvm:    return "kvm";
+    case machine_type::nspawn: return "nspawn";
+    }
+
+    assert (false);
+    return string ();
+  }
+
+  machine_type
+  to_machine_type (const string& t)
+  {
+         if (t == "kvm")    return machine_type::kvm;
+    else if (t == "nspawn") return machine_type::nspawn;
+    else throw invalid_argument ("invalid machine type '" + t + "'");
+  }
+
+  // machine_manifest
+  //
+  machine_manifest::
+  machine_manifest (parser& p, bool iu)
+      : machine_manifest (p, p.next (), iu)
+  {
+    // Make sure this is the end.
+    //
+    name_value nv (p.next ());
+    if (!nv.empty ())
+      throw parsing (p.name (), nv.name_line, nv.name_column,
+                     "single machine manifest expected");
+  }
+
+  machine_manifest::
+  machine_manifest (parser& p, name_value nv, bool iu)
+      : machine_header_manifest (p, move (nv), unknown_name_mode::stop, &nv)
+  {
+    auto bad_name = [&p, &nv] (const string& d)
+    {
+      throw parsing (p.name (), nv.name_line, nv.name_column, d);
+    };
+
+    // Offsets are used to tie an error to the specific position inside a
+    // manifest value (possibly a multiline one).
+    //
+    auto bad_value = [&p, &nv] (
+      const string& d, uint64_t column_offset = 0, uint64_t line_offset = 0)
+    {
+      throw parsing (p.name (),
+                     nv.value_line + line_offset,
+                     (line_offset == 0 ? nv.value_column : 1) + column_offset,
+                     d);
+    };
+
+    optional<machine_type> type;
+
+    for (; !nv.empty (); nv = p.next ())
+    {
+      string& n (nv.name);
+      string& v (nv.value);
+
+      if (n == "type")
+      {
+        if (type)
+          bad_name ("machine type redefinition");
+
+        try
+        {
+          type = to_machine_type (v);
+        }
+        catch (const invalid_argument&)
+        {
+          bad_value ("invalid machine type");
+        }
+      }
+      else if (n == "mac")
+      {
+        if (mac)
+          bad_name ("machine mac redefinition");
+
+        // @@ Should we check that the value is a valid mac?
+        //
+        mac = move (v);
+      }
+      else if (n == "options")
+      {
+        if (options)
+          bad_name ("machine options redefinition");
+
+        strings op;
+
+        // Note that when reporting errors we combine the manifest value
+        // position with the respective error position.
+        //
+        try
+        {
+          istringstream is (v);
+          tab_parser parser (is, "");
+
+          tab_fields tl;
+          while (!(tl = parser.next ()).empty ())
+          {
+            for (auto& tf: tl)
+              op.emplace_back (move (tf.value));
+          }
+        }
+        catch (const tab_parsing& e)
+        {
+          bad_value ("invalid machine options: " + e.description,
+                     e.column - 1,
+                     e.line - 1);
+        }
+
+        if (op.empty ())
+          bad_value ("empty machine options");
+
+        options = move (op);
+      }
+      else if (!iu)
+        bad_name ("unknown name '" + n + "' in machine manifest");
+    }
+
+    // Verify all non-optional values were specified.
+    //
+    if (!type)
+      bad_value ("no machine type specified");
+
+    this->type = *type;
+  }
+
+  void machine_manifest::
+  serialize (serializer& s) const
+  {
+    // @@ Should we check that all non-optional values are specified and all
+    //    values are valid?
+    //
+
+    machine_header_manifest::serialize (s, false);
+
+    s.next ("type", to_string (type));
+
+    if (mac)
+      s.next ("mac", *mac);
+
+    // Recompose options string as a space-separated option list,
+    //
+    if (options)
+    {
+      string v;
+      for (auto b (options->cbegin ()), i (b), e (options->cend ()); i != e;
+           ++i)
+      {
+        if (i != b)
+          v += ' ';
+
+        v += *i;
+      }
+
+      s.next ("options", v);
+    }
+
+    s.next ("", ""); // End of manifest.
+  }
+
+  strings machine_manifest::
+  unquoted_options () const
+  {
+    return options
+      ? string_parser::unquote (*options)
+      : strings ();
+  }
+
+  // toolchain_manifest
+  //
+  toolchain_manifest::
+  toolchain_manifest (parser& p, bool iu)
+      : toolchain_manifest (p, p.next (), iu)
+  {
+    // Make sure this is the end.
+    //
+    name_value nv (p.next ());
+    if (!nv.empty ())
+      throw parsing (p.name (), nv.name_line, nv.name_column,
+                     "single toolchain manifest expected");
+  }
+
+  toolchain_manifest::
+  toolchain_manifest (parser& p, name_value nv, bool iu)
+  {
+    auto bad_name = [&p, &nv] (const string& d)
+    {
+      throw parsing (p.name (), nv.name_line, nv.name_column, d);
+    };
+
+    auto bad_value = [&p, &nv] (const string& d)
+    {
+      throw parsing (p.name (), nv.value_line, nv.value_column, d);
+    };
+
+    // Make sure this is the start and we support the version.
+    //
+    if (!nv.name.empty ())
+      bad_name ("start of toolchain manifest expected");
+
+    if (nv.value != "1")
+      bad_value ("unsupported format version");
+
+    // Parse the toolchain manifest.
+    //
+    for (nv = p.next (); !nv.empty (); nv = p.next ())
+    {
+      string& n (nv.name);
+      string& v (nv.value);
+
+      if (n == "id")
+      {
+        if (!id.empty ())
+          bad_name ("toolchain id redefinition");
+
+        if (v.empty ())
+          bad_value ("empty toolchain id");
+
+        id = move (v);
+      }
+      else if (!iu)
+        bad_name ("unknown name '" + n + "' in toolchain manifest");
+    }
+
+    // Verify all non-optional values were specified.
+    //
+    if (id.empty ())
+      bad_value ("no toolchain id specified");
+  }
+
+  void toolchain_manifest::
+  serialize (serializer& s) const
+  {
+    // @@ Should we check that all non-optional values are specified?
+    //
+    s.next ("", "1"); // Start of manifest.
+    s.next ("id", id);
+    s.next ("", ""); // End of manifest.
+  }
+
+  // bootstrapped_machine_manifest
+  //
+  bootstrapped_machine_manifest::
+  bootstrapped_machine_manifest (parser& p, bool iu)
+  {
+    name_value nv (p.next ());
+
+    auto bad_name = [&p, &nv] (const string& d)
+    {
+      throw parsing (p.name (), nv.name_line, nv.name_column, d);
+    };
+
+    auto bad_value = [&p, &nv] (const string& d)
+    {
+      throw parsing (p.name (), nv.value_line, nv.value_column, d);
+    };
+
+    // Make sure this is the start and we support the version.
+    //
+    if (!nv.name.empty ())
+      bad_name ("start of bootstrapped machine manifest expected");
+
+    if (nv.value != "1")
+      bad_value ("unsupported format version");
+
+    // Parse the bootstrapped machine manifest. Currently there is no values
+    // expected.
+    //
+    for (nv = p.next (); !nv.empty (); nv = p.next ())
+    {
+      if (!iu)
+        bad_name ("unknown name '" + nv.name +
+                  "' in bootstrapped machine manifest");
+    }
+
+    nv = p.next ();
+    if (nv.empty ())
+      bad_value ("machine manifest expected");
+
+    machine = machine_manifest (p, nv, iu);
+
+    if (!machine.mac)
+      bad_name ("mac address must be present in machine manifest");
+
+    nv = p.next ();
+    if (nv.empty ())
+      bad_value ("toolchain manifest expected");
+
+    toolchain = toolchain_manifest (p, nv, iu);
+
+    nv = p.next ();
+    if (nv.empty ())
+      bad_value ("bootstrap manifest expected");
+
+    bootstrap = bootstrap_manifest (p, nv, iu);
+
+    // Make sure this is the end.
+    //
+    nv = p.next ();
+    if (!nv.empty ())
+      throw parsing (p.name (), nv.name_line, nv.name_column,
+                     "single bootstrapped machine manifest expected");
+  }
+
+  void bootstrapped_machine_manifest::
+  serialize (serializer& s) const
+  {
+    // @@ Should we check that all non-optional values are specified?
+    //
+    s.next ("", "1"); // Start of manifest.
+    s.next ("", "");  // End of manifest.
+
+    if (!machine.mac)
+      throw serialization (s.name (),
+                           "mac address must be present in machine manifest");
+
+    machine.serialize (s);
+    toolchain.serialize (s);
+    bootstrap.serialize (s);
+
+    s.next ("", ""); // End of stream.
+  }
+}
diff --git a/bbot/agent/machine-manifest.hxx b/bbot/agent/machine-manifest.hxx
new file mode 100644
index 0000000..37919ba
--- /dev/null
+++ b/bbot/agent/machine-manifest.hxx
@@ -0,0 +1,118 @@
+// file      : bbot/agent/machine-manifest.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license   : TBC; see accompanying LICENSE file
+
+#ifndef BBOT_AGENT_MACHINE_MANIFEST_HXX
+#define BBOT_AGENT_MACHINE_MANIFEST_HXX
+
+#include <map>
+
+#include <libbutl/manifest-forward.hxx>
+
+#include <libbbot/manifest.hxx> // machine_header
+
+#include <bbot/types.hxx>
+#include <bbot/utility.hxx>
+
+#include <bbot/bootstrap-manifest.hxx>
+
+namespace bbot
+{
+  // Machine type.
+  //
+  enum class machine_type {kvm, nspawn};
+
+  string
+  to_string (machine_type);
+
+  machine_type
+  to_machine_type (const string&); // Throws invalid_argument.
+
+  // Machine.
+  //
+  class machine_manifest: public machine_header_manifest
+  {
+  public:
+    machine_type type;
+    optional<string> mac;      // Required in bootstrapped machine manifest.
+    optional<strings> options; // Note: could be quoted.
+
+    strings
+    unquoted_options () const; // Return empty if absent.
+
+    machine_manifest (std::string i,
+                      std::string n,
+                      std::string s,
+                      machine_type t,
+                      optional<string> m,
+                      optional<strings> o)
+        : machine_header_manifest (std::move (i),
+                                   std::move (n),
+                                   std::move (s)),
+          type (t),
+          mac (std::move (m)),
+          options (std::move (o)) {}
+
+  public:
+    machine_manifest () = default; // VC export.
+    machine_manifest (butl::manifest_parser&, bool ignore_unknown = false);
+    machine_manifest (butl::manifest_parser&,
+                      butl::manifest_name_value start,
+                      bool ignore_unknown = false);
+
+    void
+    serialize (butl::manifest_serializer&) const;
+  };
+
+  // Toolchain.
+  //
+  class toolchain_manifest
+  {
+  public:
+
+    // Toolchain id (SHAXXX).
+    //
+    string id;
+
+    explicit
+    toolchain_manifest (string i): id (i) {}
+
+  public:
+    toolchain_manifest () = default; // VC export.
+    toolchain_manifest (butl::manifest_parser&, bool ignore_unknown = false);
+    toolchain_manifest (butl::manifest_parser&,
+                        butl::manifest_name_value start,
+                        bool ignore_unknown = false);
+
+    void
+    serialize (butl::manifest_serializer&) const;
+  };
+
+  // The manifest stored in <name>-<toolchain>/ consists of the machine
+  // manifest (original), toolchain manifest, and bootstrap manifest.
+  //
+  class bootstrapped_machine_manifest
+  {
+  public:
+    machine_manifest   machine;
+    toolchain_manifest toolchain;
+    bootstrap_manifest bootstrap;
+
+    bootstrapped_machine_manifest (machine_manifest m,
+                                   toolchain_manifest t,
+                                   bootstrap_manifest b)
+        : machine (move (m)), toolchain (move (t)), bootstrap (move (b)) {}
+
+  public:
+    bootstrapped_machine_manifest () = default; // VC export.
+    bootstrapped_machine_manifest (butl::manifest_parser&,
+                                   bool ignore_unknown = false);
+
+    void
+    serialize (butl::manifest_serializer&) const;
+  };
+
+  using bootstrapped_machine_manifests = vector<bootstrapped_machine_manifest>;
+}
+
+#endif // BBOT_AGENT_MACHINE_MANIFEST_HXX
diff --git a/bbot/agent/machine.cxx b/bbot/agent/machine.cxx
new file mode 100644
index 0000000..422c623
--- /dev/null
+++ b/bbot/agent/machine.cxx
@@ -0,0 +1,474 @@
+// file      : bbot/agent/machine.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license   : TBC; see accompanying LICENSE file
+
+#include <bbot/agent/machine.hxx>
+
+#include <unistd.h> // sleep()
+
+#include <sys/un.h>   // sockaddr_un
+#include <sys/socket.h>
+
+#include <cstdio>  // snprintf()
+#include <cstring> // strcpy()
+
+#include <bbot/agent/agent.hxx>
+#include <bbot/agent/machine-manifest.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace bbot
+{
+  // Forward TFTP requests (UDP/69) coming from the machine to the specified
+  // port.
+  //
+  // This allows the machine to connect to any "unknown" IP (e.g., link-local
+  // 196.254.111.222) port 69 and end up being redirected to out TFTP server.
+  //
+  static void
+  iptables (tracer& t,
+            const char* a,
+            const string& tap,
+            const string& br,
+            uint16_t port,
+            bool ignore_errors = false)
+  {
+    string addr (iface_addr (br));
+
+    auto_fd fdn (ignore_errors ? fdnull () : nullfd);
+    int ofd (ignore_errors ? fdn.get () : 2);
+
+    process_exit::code_type e;
+
+    e = run_io_exit (t, 0, ofd, ofd,
+                     "sudo",             "iptables",
+                     "-t",               "nat",
+                     a,                  "PREROUTING",
+                     "-m",               "udp",
+                     "-p",               "udp",
+                     "-m",               "physdev",
+                     "-i",               br,
+                     "--physdev-in",     tap,
+                     "--dport",          69,
+                     "-j",               "DNAT",
+                     "--to-destination", addr + ':' + to_string (port));
+
+    if (e != 0 && !ignore_errors)
+      fail << "process iptables terminated with non-zero exit code";
+
+    // Nobody really knows whether this is really needed (really)...
+    //
+    e = run_io_exit (t, 0, ofd, ofd,
+                     "sudo",             "iptables",
+                     a,                  "FORWARD",
+                     "-m",               "udp",
+                     "-p",               "udp",
+                     "-m",               "physdev",
+                     "-o",               br,
+                     "--physdev-out",    tap,
+                     "-d",               addr,
+                     "--dport",          port,
+                     "-m",               "state",
+                     "--state",          "NEW,ESTABLISHED,RELATED",
+                     "-j",               "ACCEPT");
+
+    if (e != 0 && !ignore_errors)
+      fail << "process iptables terminated with non-zero exit code";
+  }
+
+  static string
+  create_tap (const string& br, uint16_t port)
+  {
+    string t ("tap" + to_string (tc_num));
+
+    tracer trace ("create_tap", t.c_str ());
+
+    // First try to delete it in case there is one from a previous run.
+    //
+    iptables (trace, "-D", t, br, port, true); // Ignore errors.
+    run_exit (trace, "sudo", "ip", "tuntap", "delete", t, "mode", "tap");
+
+    run (trace, "sudo", "ip", "tuntap", "add", t, "mode", "tap", "user", uid);
+    run (trace, "sudo", "ip", "link", "set", t, "up");
+    run (trace, "sudo", "ip", "link", "set", t, "master", br);
+
+    iptables (trace, "-A", t, br, port); // Add.
+
+    return t;
+  }
+
+  static void
+  destroy_tap (const string& t, const string& br, uint16_t port)
+  {
+    tracer trace ("destroy_tap", t.c_str ());
+    iptables (trace, "-D", t, br, port); // Delete.
+    run (trace, "sudo", "ip", "tuntap", "delete", t, "mode", "tap");
+  }
+
+  class tap
+  {
+  public:
+    string iface;
+
+    string bridge; // Bridge interface to which this tap belongs
+    uint16_t port; // UDP port to forward TFTP traffic to.
+
+    tap (string b, uint16_t p)
+        : iface (create_tap (b, p)), bridge (move (b)), port (p) {}
+
+    ~tap ()
+    {
+      if (!iface.empty ())
+      {
+        try {destroy ();} catch (...) {}
+      }
+    }
+
+    void
+    destroy ()
+    {
+      destroy_tap (iface, bridge, port);
+      iface.clear ();
+    }
+  };
+
+  static string
+  generate_mac ()
+  {
+    // The last two bits of the first byte are special: bit 1 indicates a
+    // multicast address (which we don't want) while bit 1 -- local assignment
+    // (which we do want).
+    //
+    char r[6 * 2 + 5 + 1];
+    snprintf (r, sizeof (r),
+              "%02x:%02x:%02x:%02x:%02x:%02x",
+              (genrand<uint8_t> () & 0xFE) | 0x02,
+              genrand<uint8_t> (),
+              genrand<uint8_t> (),
+              genrand<uint8_t> (),
+              genrand<uint8_t> (),
+              genrand<uint8_t> ());
+    return r;
+  }
+
+  class kvm_machine: public machine
+  {
+  public:
+    kvm_machine (const dir_path&,
+                 const machine_manifest&,
+                 const optional<string>& mac,
+                 const string& br_iface,
+                 uint16_t tftp_port);
+
+    virtual bool
+    shutdown (size_t& seconds) override;
+
+    virtual void
+    forcedown (bool fail_hard) override;
+
+    virtual void
+    suspend () override;
+
+    bool
+    wait (size_t& seconds, bool fail_hard) override;
+
+    using machine::wait;
+
+    virtual void
+    print_info (diag_record&) override;
+
+  private:
+    void
+    monitor_command (const string&, bool fail_hard = true);
+
+  private:
+    path kvm;     // Hypervisor binary.
+    tap net;      // Tap network interface.
+    string vnc;   // QEMU VNC TCP addr:port.
+    path monitor; // QEMU monitor UNIX socket.
+    process proc;
+  };
+
+  kvm_machine::
+  kvm_machine (const dir_path& md,
+               const machine_manifest& mm,
+               const optional<string>& omac,
+               const string& br,
+               uint16_t port)
+      : machine (mm.mac ? *mm.mac : // Fixed mac from machine manifest.
+                 omac   ? *omac   : // Generated mac from previous bootstrap.
+                 generate_mac ()),
+        kvm ("kvm"),
+        net (br, port),
+        vnc ("127.0.0.1:" + to_string (5900 + tc_num)),
+        monitor ("/tmp/" + tc_name + "-monitor")
+  {
+    tracer trace ("kvm_machine", md.string ().c_str ());
+
+    if (sizeof (sockaddr_un::sun_path) <= monitor.size ())
+      throw invalid_argument ("monitor unix socket path too long");
+
+    // Map logical CPUs to sockets/cores/threads. Failed that, QEMU just makes
+    // it a machine with that number of sockets and some operating systems
+    // (like Windows) only can do two.
+    //
+    size_t cpu (ops.cpu ());
+
+    size_t sockets (cpu <= 8  ? 1 : cpu <= 64 ? 2 : 4);
+    size_t cores (cpu / sockets);
+    size_t threads (cores <= 4 ? 1 : 2);
+    cores /= threads;
+
+
+    // We probably don't want to commit all the available RAM to the VM since
+    // some of it could be used on the host side for caching, etc. So the
+    // heuristics that we will use is 4G or 1G per CPU, whichever is greater
+    // and the rest divide equally between the host and the VM.
+    //
+    size_t ram ((cpu < 4 ? 4 : cpu) * 1024 * 1024); // Kb.
+
+    if (ram > ops.ram ())
+      ram = ops.ram ();
+    else
+      ram += (ops.ram () - ram) / 2;
+
+    // If we have options, use that instead of the default network and
+    // disk configuration.
+    //
+    strings os;
+
+    if (mm.options)
+    {
+      os = mm.unquoted_options ();
+
+      // Pre-process ifname=? and mac=?.
+      //
+      auto sub = [] (string& o, const char* s, const string& r)
+      {
+        size_t p (o.find (s));
+
+        if (p != string::npos)
+        {
+          p = o.find ('?', p + 1);
+          assert (p != string::npos);
+          o.replace (p, 1, r);
+        }
+      };
+
+      for (string& o: os)
+      {
+        sub (o, "ifname=?", net.iface);
+        sub (o, "mac=?", mac);
+      }
+    }
+    else
+    {
+      auto add = [&os] (string o, string v)
+      {
+        os.push_back (move (o));
+        os.push_back (move (v));
+      };
+
+      // Network.
+      //
+      add ("-netdev", "tap,id=net0,script=no,ifname=" + net.iface);
+      add ("-device", "virtio-net-pci,netdev=net0,mac=" + mac);
+
+      // Disk.
+      //
+      add ("-drive",  "if=none,id=disk0,file=disk.img,format=raw");
+      add ("-device", "virtio-blk-pci,scsi=off,drive=disk0");
+
+      //"-drive",  "if=none,id=disk0,format=raw,file=disk.img"
+      //"-device", "virtio-scsi-pci,id=scsi"
+      //"-device", "scsi-hd,drive=disk0"
+    }
+
+    // Start the VM.
+    //
+    // Notes:
+    //
+    // 1. echo system_powerdown | socat - UNIX-CONNECT:.../monitor
+    //
+    proc = run_io_start (
+      trace,
+      fdnull (),
+      2,
+      2,
+      md, // Run from the machine's directory.
+      kvm,
+      "-boot", "c", // Boot from disk.
+      "-no-reboot", // Exit on VM reboot.
+      "-m",   to_string (ram / 1024) + "M",
+      "-cpu", "host",
+      "-smp", (to_string (cpu) +
+               ",sockets=" + to_string (sockets) +
+               ",cores="   + to_string (cores) +
+               ",threads=" + to_string (threads)),
+      os,
+      "-vnc", "127.0.0.1:" + to_string (tc_num), // 5900 + tc_num
+      "-monitor", "unix:" + monitor.string () + ",server,nowait");
+  }
+
+  // Connect to the QEMU monitor via the UNIX socket and send system_reset.
+  // You may be wondering why not system_powerdown? The reason is that while
+  // not all OS know how to power-down the machine, pretty much all of them
+  // can reboot. So combined with the -no-reboot option above, we get the
+  // same result in a more robust way.
+  //
+  // Note that this setup has one side effect: if the VM decided to reboot,
+  // say, during bootstrap, then we will interpret it as a shutdown. Current
+  // thinking saying this is good since we don't want our VMs to reboot
+  // uncontrollably for security and predictability reasons (e.g., we don't
+  // want Windows to decide to install updates -- this stuff should all be
+  // disabled during the VM preparation).
+  //
+  // Actually, this turned out not to be entirely accurate: reset appears to
+  // be a "hard reset" while powerdown causes a clean shutdown. So we use
+  // powerdown to implement shutdown() and reset/-no-reboot for implement
+  // forcedown().
+  //
+  bool kvm_machine::
+  shutdown (size_t& seconds)
+  {
+    monitor_command ("system_powerdown");
+
+    // Wait for up to the specified number if seconds for the machine to
+    // shutdown.
+    //
+    return wait (seconds);
+  }
+
+  void kvm_machine::
+  forcedown (bool fh)
+  {
+    monitor_command ("system_reset", fh);
+    wait (fh);
+  }
+
+  void kvm_machine::
+  suspend ()
+  {
+    monitor_command ("stop");
+  }
+
+  void kvm_machine::
+  print_info (diag_record& dr)
+  {
+    dr << info << "qemu pid: " << proc.id ()
+       << info << "qemu vnc: " << vnc
+       << info << "qemu monitor: unix:" << monitor;
+  }
+
+  bool kvm_machine::
+  wait (size_t& sec, bool fh)
+  {
+    try
+    {
+      tracer trace ("kvm_machine::wait");
+
+      bool t;
+      for (; !(t = proc.try_wait ()) && sec != 0; --sec)
+        sleep (1);
+
+      if (t)
+      {
+        run_io_finish (trace, proc, kvm, fh);
+        net.destroy (); //@@ Always fails hard.
+        try_rmfile (monitor, true); // QEMU doesn't seem to remove it.
+      }
+
+      return t;
+    }
+    catch (const process_error& e)
+    {
+      fail (fh) << "unable to execute " << kvm << ": " << e << endf;
+    }
+  }
+
+  void kvm_machine::
+  monitor_command (const string& c, bool fh)
+  {
+    try
+    {
+      sockaddr_un addr;
+      addr.sun_family = AF_LOCAL;
+      strcpy (addr.sun_path, monitor.string ().c_str ()); // Size check in ctor
+
+      auto_fd sock (socket (AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0));
+
+      if (sock.get () == -1)
+        throw_system_error (errno);
+
+      if (connect (sock.get (),
+                   reinterpret_cast<sockaddr*> (&addr),
+                   sizeof (addr)) == -1)
+        throw_system_error (errno);
+
+      // Read until we get something.
+      //
+      auto readsome = [&sock] ()
+      {
+        ifdstream ifs (move (sock),
+                       fdstream_mode::non_blocking,
+                       ostream::badbit);
+
+        char buf[256];
+        for (streamsize n (0), m (0);
+             n == 0 || m != 0;
+             m = ifs.readsome (buf, sizeof (buf) - 1))
+        {
+          if (m != 0)
+          {
+            n += m;
+
+            //buf[m] = '\0';
+            //text << buf;
+          }
+        }
+
+        sock = ifs.release ();
+      };
+
+      // Read QEMU welcome.
+      //
+      readsome ();
+
+      // Write our command.
+      //
+      {
+        ofdstream ofs (move (sock), fdstream_mode::blocking);
+        ofs << c << endl;
+        sock = ofs.release ();
+      }
+
+      // Read QEMU reply (may hit eof).
+      //
+      readsome ();
+      return;
+    }
+    catch (const system_error& e)
+    {
+      fail (fh) << "unable to communicate with qemu monitor: " << e;
+    }
+  }
+
+  unique_ptr<machine>
+  start_machine (const dir_path& md,
+                 const machine_manifest& mm,
+                 const optional<string>& mac,
+                 const string& br_iface,
+                 uint16_t tftp_port)
+  {
+    switch (mm.type)
+    {
+    case machine_type::kvm:
+      return make_unique<kvm_machine> (md, mm, mac, br_iface, tftp_port);
+    case machine_type::nspawn:
+      assert (false); //@@ TODO
+    }
+
+    return nullptr;
+  }
+}
diff --git a/bbot/agent/machine.hxx b/bbot/agent/machine.hxx
new file mode 100644
index 0000000..e352e42
--- /dev/null
+++ b/bbot/agent/machine.hxx
@@ -0,0 +1,84 @@
+// file      : bbot/agent/machine.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license   : TBC; see accompanying LICENSE file
+
+#ifndef BBOT_AGENT_MACHINE_HXX
+#define BBOT_AGENT_MACHINE_HXX
+
+#include <bbot/types.hxx>
+#include <bbot/utility.hxx>
+
+namespace bbot
+{
+  // A running build machine (container, vm, etc).
+  //
+  // Note that if the machine is destroyed while it is still running, the
+  // destructor will block until the machine process terminates.
+  //
+  // Some functions can fail softly if the fail_hard argument is false.
+  //
+  class machine
+  {
+  public:
+    // Shut the machine down cleanly waiting up to the specified number of
+    // seconds for completion. Update the timeout and return false if the
+    // machine is still running, true if the machine exited successfully, and
+    // throw failed otherwise.
+    //
+    virtual bool
+    shutdown (size_t& seconds) = 0;
+
+    // Force the machine down.
+    //
+    virtual void
+    forcedown (bool fail_hard = true) = 0;
+
+    // Suspend the machine.
+    //
+    virtual void
+    suspend () = 0;
+
+    // Wait for the machine to terminate up to the specified number of
+    // seconds. Update the timeout and return false if the machine is still
+    // running, true if the machine exited successfully, and throw failed
+    // otherwise.
+    //
+    virtual bool
+    wait (size_t& seconds, bool fail_hard = true) = 0;
+
+    bool
+    wait (bool fail_hard = true)
+    {
+      size_t sec (~0); // Wait indefinitely.
+      return wait (sec, fail_hard);
+    }
+
+    // Print information about the machine (as info diagnostics) that can be
+    // useful for debugging (e.g., how to connect/login, etc).
+    //
+    virtual void
+    print_info (diag_record&) = 0;
+
+  public:
+    const string mac; // MAC address (inside the machine).
+
+  public:
+    virtual
+    ~machine () = default;
+
+  protected:
+    machine (string m)
+        : mac (move (m)) {}
+  };
+
+  class machine_manifest;
+
+  unique_ptr<machine>
+  start_machine (const dir_path&,
+                 const machine_manifest&,
+                 const optional<string>& mac,
+                 const string& br_iface,
+                 uint16_t tftp_port);
+}
+
+#endif // BBOT_AGENT_MACHINE_HXX
diff --git a/bbot/agent/tftp.cxx b/bbot/agent/tftp.cxx
new file mode 100644
index 0000000..27c1577
--- /dev/null
+++ b/bbot/agent/tftp.cxx
@@ -0,0 +1,137 @@
+// file      : bbot/agent/tftp.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license   : TBC; see accompanying LICENSE file
+
+#include <bbot/agent/tftp.hxx>
+
+#include <arpa/inet.h>  // htonl()
+#include <netinet/in.h> // sockaddr_in
+#include <sys/socket.h>
+#include <sys/select.h>
+
+#include <cstring> // memset()
+
+#include <bbot/agent/agent.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace bbot
+{
+  tftp_server::
+  tftp_server (const string& map, uint16_t port)
+  {
+    int fd (socket (AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0));
+
+    if (fd == -1)
+      throw_system_error (errno);
+
+    fd_.reset (fd);
+
+    // Bind to ephemeral port.
+    //
+    sockaddr_in addr;
+    memset (&addr, 0, sizeof (addr));
+    addr.sin_family = AF_INET;
+    addr.sin_addr.s_addr = htonl (INADDR_ANY);
+    addr.sin_port = htons (port);
+
+    // Not to confuse with std::bind().
+    //
+    if (::bind (fd,
+                reinterpret_cast<sockaddr*> (&addr),
+                sizeof (sockaddr_in)) == -1)
+      throw_system_error (errno);
+
+    // Create the map file.
+    //
+    map_ = auto_rmfile (path::temp_path ("bbot-agent-tftp-map"));
+    ofdstream ofs (map_.path ());
+    ofs << map << endl;
+    ofs.close ();
+  }
+
+  uint16_t tftp_server::
+  port () const
+  {
+    sockaddr_in addr;
+    socklen_t size (sizeof (addr));
+
+    if (getsockname (fd_.get (),
+                     reinterpret_cast<sockaddr*> (&addr),
+                     &size) == -1)
+      throw_system_error (errno);
+
+    assert (size == sizeof (addr));
+    return ntohs (addr.sin_port);
+  }
+
+  bool tftp_server::
+  serve (size_t& sec, size_t inc)
+  {
+    tracer trace ("tftp_server::serve");
+
+    if (inc == 0 || inc > sec)
+      inc = sec;
+
+    int fd (fd_.get ());
+
+    // Note: Linux updates the timeout value which we rely upon.
+    //
+    timeval timeout {static_cast<long> (inc), 0};
+
+    fd_set rd;
+    FD_ZERO (&rd);
+
+    for (;;)
+    {
+      FD_SET (fd, &rd);
+
+      int r (select (fd + 1, &rd, nullptr, nullptr, &timeout));
+
+      if (r == -1)
+      {
+        if (errno == EINTR)
+          continue;
+
+        throw_system_error (errno);
+      }
+      else if (r == 0) // Timeout.
+      {
+        sec -= inc;
+        return false;
+      }
+
+      if (FD_ISSET (fd, &rd))
+      {
+        // The inetd "protocol" is to pass the socket as stdin/stdout file
+        // descriptors.
+        //
+        // Notes/issues:
+        //
+        // 1. Writes diagnostics to syslog.
+        //
+        run_io (trace,
+                fddup (fd),
+                fddup (fd),
+                2,
+                "sudo",                     // Required for --secure (chroot).
+                "/usr/sbin/in.tftpd",       // Standard installation location.
+                "--timeout", 1,             // Wait for more requests.
+                "--permissive",             // Use inherited umask.
+                "--create",                 // Allow creating new files (PUT).
+                "--map-file", map_.path (), // Path remapping rules.
+                "--user", uname,            // Run as our effective user.
+                "--secure",                 // Chroot to data directory.
+                ops.tftp ());
+
+        // This is not really accurate since tftpd will, for example, serve
+        // an upload request until it is complete. But it's close anough for
+        // our needs.
+        //
+        sec -= (inc - static_cast<size_t> (timeout.tv_sec));
+        return true;
+      }
+    }
+  }
+}
diff --git a/bbot/agent/tftp.hxx b/bbot/agent/tftp.hxx
new file mode 100644
index 0000000..76b4d1c
--- /dev/null
+++ b/bbot/agent/tftp.hxx
@@ -0,0 +1,47 @@
+// file      : bbot/agent/tftp.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license   : TBC; see accompanying LICENSE file
+
+#ifndef BBOT_AGENT_TFTP_HXX
+#define BBOT_AGENT_TFTP_HXX
+
+#include <bbot/types.hxx>
+#include <bbot/utility.hxx>
+
+namespace bbot
+{
+  // A TFTP server "wrapper" over tftpd-hpa.
+  //
+  // In a nutshell, we are pretending to be inetd and when a request arrives,
+  // spawn tftpd-hpa to handle it.
+  //
+  class tftp_server
+  {
+  public:
+    // The map argument specifies the path mapping rules, one per line (see
+    // the tftpd-hpa --map-file|-m option for details). If port is 0, then
+    // it is automatically assigned.
+    //
+    tftp_server (const string& map, uint16_t port);
+
+    // Return the assigned port.
+    //
+    uint16_t
+    port () const;
+
+    // Wait for a TFTP request for up to the specified number of seconds.  If
+    // increment is not 0, then wait in the specified incremenets (i.e., wait
+    // for up to that number of seconds; useful when one needs to also
+    // periodically check for something else). Update the timeout value as
+    // well as return true if a request was served and false otherwise.
+    //
+    bool
+    serve (size_t& seconds, size_t increment = 0);
+
+  private:
+    auto_fd fd_;
+    auto_rmfile map_;
+  };
+}
+
+#endif // BBOT_AGENT_TFTP_HXX
-- 
cgit v1.1