From b99194f5fac913d745f9af3b9b7c24ad6a032c1a Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Wed, 24 May 2017 18:12:14 +0200 Subject: Some more TFTP upload robustness work --- bbot/agent.cli | 4 ++-- bbot/agent.cxx | 48 +++++++++++++++++++++++++++--------------------- bbot/bbot-agent@.service | 2 +- 3 files changed, 30 insertions(+), 24 deletions(-) diff --git a/bbot/agent.cli b/bbot/agent.cli index b55379c..2f5ad65 100644 --- a/bbot/agent.cli +++ b/bbot/agent.cli @@ -106,11 +106,11 @@ namespace bbot by adding the toolchain number \c{--toolchain-num} to this value." } - size_t --bootstrap-timeout = 600 + size_t --bootstrap-timeout = 900 { "", "Maximum number of seconds to wait for machine bootstrap completion, - 600 (10 minutes) by default." + 900 (15 minutes) by default." } size_t --bootstrap-retries = 2 diff --git a/bbot/agent.cxx b/bbot/agent.cxx index ac4905d..234763f 100644 --- a/bbot/agent.cxx +++ b/bbot/agent.cxx @@ -8,7 +8,7 @@ #include // PATH_MAX #include // signal() #include // rand_r() -#include // sleep(), realink(), getuid() +#include // sleep(), realink(), getuid(), fsync() #include // ifreq #include // sockaddr_in @@ -53,6 +53,14 @@ namespace bbot string uname; } +static void +file_sync (const path& f) +{ + auto_fd fd (fdopen (f, fdopen_mode::in)); + if (fsync (fd.get ()) != 0) + throw_system_error (errno); +} + // The btrfs tool likes to print informational messages, like "Created // snapshot such and such". Luckily, it writes them to stdout while proper // diagnostics to stderr. @@ -211,20 +219,21 @@ bootstrap_machine (const dir_path& md, // our side we serve TFTP requests while periodically checking for the // manifest file. // - for (to = bootstrap_to; - to != 0 && (!file_exists (mf) || file_empty (mf)); - tftpd.serve (to)) ; + for (to = bootstrap_to; to != 0; tftpd.serve (to)) + { + if (file_exists (mf)) + { + file_sync (mf); + if (!file_empty (mf)) + break; + } + } if (to == 0) return soft_fail ("bootstrap timeout"); l3 ([&]{trace << "completed bootstrap in " << bootstrap_to - to << "s";}); - // The file may have been created but not yet (completely) uploaded. - // So we wait for a one second window without any TFTP activity. - // - while (tftpd.serve ((to = 1))) ; - // Shut the machine down cleanly. // if (!m->shutdown ((to = shutdown_to))) @@ -693,24 +702,21 @@ try // So on our side we serve TFTP requests while checking for the // manifest file. // - // There are seem to be some "anomalies" in the Windows network stack - // in that we get an empty file and then the upload process gets stuck - // for no apparent reason. - // - for (to = build_to; - to != 0 && (!file_exists (rf) || file_empty (rf)); - tftpd.serve (to)) ; + for (to = build_to; to != 0; tftpd.serve (to)) + { + if (file_exists (rf)) + { + file_sync (rf); + if (!file_empty (rf)) + break; + } + } if (to == 0) return soft_fail ("build timeout"); l3 ([&]{trace << "completed build in " << build_to - to << "s";}); - // The file may have been created but not yet (completely) uploaded. - // So we wait for a one second window without any TFTP activity. - // - while (tftpd.serve ((to = 1))) ; - // Parse the result manifest. // try diff --git a/bbot/bbot-agent@.service b/bbot/bbot-agent@.service index 66d46f4..7d8c250 100644 --- a/bbot/bbot-agent@.service +++ b/bbot/bbot-agent@.service @@ -10,7 +10,7 @@ Environment=VERBOSE=3 Environment=CPU=1 Environment=RAM=1048576 -Environment=BOOTSTRAP_TIMEOUT=600 +Environment=BOOTSTRAP_TIMEOUT=900 Environment=BOOTSTRAP_RETRIES=2 Environment=BUILD_TIMEOUT=1800 Environment=BUILD_RETRIES=2 -- cgit v1.1