aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bbot/agent/agent.cxx29
-rw-r--r--bbot/agent/machine.cxx110
-rw-r--r--bbot/agent/machine.hxx6
3 files changed, 130 insertions, 15 deletions
diff --git a/bbot/agent/agent.cxx b/bbot/agent/agent.cxx
index 2378fc2..be3b706 100644
--- a/bbot/agent/agent.cxx
+++ b/bbot/agent/agent.cxx
@@ -211,6 +211,7 @@ bootstrap_machine (const dir_path& md,
{
m->suspend (false);
m->wait (false);
+ m->cleanup ();
info << "resuming after machine suspension";
}
catch (const failed&) {}
@@ -236,7 +237,10 @@ bootstrap_machine (const dir_path& md,
// Failed, exit code diagnostics has already been issued.
}
- error << "machine " << md << " exited unexpectedly";
+ diag_record dr (error);
+ dr << "machine " << md << " exited unexpectedly";
+ m->print_info (dr);
+
return false;
};
@@ -273,8 +277,11 @@ bootstrap_machine (const dir_path& md,
if (retry > ops.bootstrap_retries ())
return soft_fail ("bootstrap startup timeout");
- warn << "machine " << mm.name << " appears to have "
- << "mis-booted, retrying";
+ // Note: keeping the logs behind (no cleanup).
+
+ diag_record dr (warn);
+ dr << "machine " << mm.name << " mis-booted, retrying";
+ m->print_info (dr);
try {m->forcedown (false);} catch (const failed&) {}
continue;
@@ -322,6 +329,8 @@ bootstrap_machine (const dir_path& md,
return soft_fail ("bootstrap shutdown timeout");
l3 ([&]{trace << "completed shutdown in " << shutdown_to - to << "s";});
+
+ m->cleanup ();
}
// Parse the result manifest.
@@ -897,6 +906,7 @@ try
{
m->suspend (false);
m->wait (false);
+ m->cleanup ();
info << "resuming after machine suspension";
}
catch (const failed&) {}
@@ -916,7 +926,10 @@ try
{
}
- error << "machine " << xp << " exited unexpectedly";
+ diag_record dr (error);
+ dr << "machine " << xp << " exited unexpectedly";
+ m->print_info (dr);
+
return false;
};
@@ -945,8 +958,11 @@ try
if (retry > ops.build_retries ())
return soft_fail ("build startup timeout");
- warn << "machine " << mm.machine.name << " appears to have "
- << "mis-booted, retrying";
+ // Note: keeping the logs behind (no cleanup).
+
+ diag_record dr (warn);
+ dr << "machine " << mm.machine.name << " mis-booted, retrying";
+ m->print_info (dr);
try {m->forcedown (false);} catch (const failed&) {}
continue;
@@ -1010,6 +1026,7 @@ try
// lease instead of a new one.
//
try {m->forcedown (false);} catch (const failed&) {}
+ m->cleanup ();
}
}
diff --git a/bbot/agent/machine.cxx b/bbot/agent/machine.cxx
index fdc11c0..2566ca4 100644
--- a/bbot/agent/machine.cxx
+++ b/bbot/agent/machine.cxx
@@ -189,6 +189,9 @@ namespace bbot
using machine::wait;
virtual void
+ cleanup () override;
+
+ virtual void
print_info (diag_record&) override;
private:
@@ -202,6 +205,8 @@ namespace bbot
tap net; // Tap network interface.
string vnc; // QEMU VNC TCP addr:port.
path monitor; // QEMU monitor UNIX socket.
+ path log; // QEMU log (QMP read end).
+ auto_fd qmp; // QMP write end.
process proc;
};
@@ -229,6 +234,18 @@ namespace bbot
if (sizeof (sockaddr_un::sun_path) <= monitor.size ())
throw invalid_argument ("monitor unix socket path too long");
+ // Machine name.
+ //
+ // While we currently can only have one running machine per toolchain, add
+ // the instance number for debuggability.
+ //
+ string name (mm.name + '-' + tc_name + '-' + to_string (inst));
+
+ // Machine log. Note that it is only removed with an explicit cleanup()
+ // call.
+ //
+ log = path ("/tmp/" + path::traits::temp_name (name) + ".log");
+
// Map logical CPUs to sockets/cores/threads keeping the number of and
// cores even. Failed that, QEMU just makes it a machine with that number
// of sockets and some operating systems (like Windows) can only do two.
@@ -315,20 +332,45 @@ namespace bbot
//"-device", "scsi-hd,drive=disk0"
}
- // Start the VM.
+ // Setup QMP (QEMU Machine Protocol) monitor to act as a log.
//
- // Notes:
+ // Note that we still have to tell it our "capabilities" so while it will
+ // write to a log file, we need a pipe it will read from.
//
- // 1. echo system_powerdown | socat - UNIX-CONNECT:.../monitor
+ fdpipe qmp_in;
+ try
+ {
+ qmp_in = fdopen_pipe ();
+ }
+ catch (const io_error& e)
+ {
+ fail << "unable to create QMP input pipe: " << e;
+ }
+
+ auto_fd qmp_out;
+ try
+ {
+ qmp_out = fdopen (log, (fdopen_mode::out |
+ fdopen_mode::create |
+ fdopen_mode::exclusive));
+ }
+ catch (const io_error& e)
+ {
+ fail << "unable to create QMP output file: " << e;
+ }
+
+ // Start the VM.
//
const char* env[] = {"QEMU_AUDIO_DRV=none", // Disable audio output.
nullptr};
proc = run_io_start (
trace,
- fdnull (),
- 2,
- 2,
+ qmp_in,
+ 2, // 1>&2 (QMP goes to stdout)
+ qmp_out,
process_env (kvm, md, env), // Run from the machine's directory.
+ "-name", name + ",debug-threads=on",
+ "-S", // Start suspended.
"-boot", "c", // Boot from disk.
"-no-reboot", // Exit on VM reboot.
"-m", to_string (ram / 1024) + "M",
@@ -337,15 +379,64 @@ namespace bbot
",sockets=" + to_string (sockets) +
",cores=" + to_string (cores) +
",threads=" + to_string (threads)),
- //
+
// RTC settings.
//
"-rtc", "clock=vm,driftfix=slew",
"-no-hpet",
"-global", "kvm-pit.lost_tick_policy=discard",
+
os,
+
+ // VNC.
+ //
"-vnc", "127.0.0.1:" + to_string (offset), // 5900 + offset
- "-monitor", "unix:" + monitor.string () + ",server,nowait");
+
+ // QMP.
+ //
+ "-chardev", "stdio,id=qmp",
+ "-mon", "chardev=qmp,mode=control,pretty=on",
+
+ // Monitor.
+ //
+ "-chardev", "socket,id=mon,path=" + monitor.string () + ",server,nowait",
+ "-mon", "chardev=mon,mode=readline");
+
+ qmp_out.close ();
+ qmp_in.in.close ();
+ qmp = move (qmp_in.out);
+
+ // Wait for the QMP greeting. One day we will find a better way.
+ //
+ sleep (1);
+
+ try
+ {
+ ofdstream os (move (qmp));
+ os << "{ \"execute\": \"qmp_capabilities\" }" << endl;
+ qmp = os.release ();
+ }
+ catch (const io_error& e)
+ {
+ fail << "unable to initialize QMP: " << e;
+ }
+
+ // Start execution.
+ //
+ try
+ {
+ monitor_command ("cont");
+ }
+ catch (const system_error& e)
+ {
+ fail << "unable to communicate with qemu monitor: " << e;
+ }
+ }
+
+ void kvm_machine::
+ cleanup ()
+ {
+ try_rmfile (log, true /* ignore_errors */);
}
// Connect to the QEMU monitor via the UNIX socket and send system_reset.
@@ -434,6 +525,7 @@ namespace bbot
print_info (diag_record& dr)
{
dr << info << "qemu pid: " << proc.id ()
+ << info << "qemu log: " << log
<< info << "qemu vnc: " << vnc
<< info << "qemu monitor: unix:" << monitor;
}
@@ -453,7 +545,7 @@ namespace bbot
{
run_io_finish (trace, proc, kvm, fh);
net.destroy (); //@@ Always fails hard.
- try_rmfile (monitor, true); // QEMU doesn't seem to remove it.
+ try_rmfile (monitor, true /* ignore_errors */); // QEMU doesn't do it.
}
return t;
diff --git a/bbot/agent/machine.hxx b/bbot/agent/machine.hxx
index 04da80e..46ad544 100644
--- a/bbot/agent/machine.hxx
+++ b/bbot/agent/machine.hxx
@@ -53,6 +53,12 @@ namespace bbot
return wait (sec, fail_hard);
}
+ // Cleanup machine resources (logs, etc). Normally you would only call
+ // this after a successful (or successfully investigated) completion.
+ //
+ virtual void
+ cleanup () = 0;
+
// Print information about the machine (as info diagnostics) that can be
// useful for debugging (e.g., how to connect/login, etc).
//