From f58adbc887abd4f467b483be953ecce7794255fc Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Wed, 14 Jun 2017 17:04:57 +0200 Subject: Periodically recheck for manifest upload There seems to be some obscure race, perhaps related to tmpfs? --- bbot/agent.cxx | 12 ++++++++---- bbot/tftp.cxx | 15 ++++++++++----- bbot/tftp.hxx | 10 ++++++---- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/bbot/agent.cxx b/bbot/agent.cxx index 117840a..d71f7b4 100644 --- a/bbot/agent.cxx +++ b/bbot/agent.cxx @@ -219,9 +219,11 @@ bootstrap_machine (const dir_path& md, // Next the bootstrap process may download additional toolchain // archives, build things, and then upload the result manifest. So on // our side we serve TFTP requests while periodically checking for the - // manifest file. + // manifest file. To workaround some obscure filesystem races (the + // file's mtime/size is updated several seconds later; maybe tmpfs + // issue?), we periodically re-check. // - for (to = bootstrap_to; to != 0; tftpd.serve (to)) + for (to = bootstrap_to; to != 0; tftpd.serve (to, 2)) { if (file_exists (mf)) { @@ -702,9 +704,11 @@ try // Next the worker builds things and then uploads the result manifest. // So on our side we serve TFTP requests while checking for the - // manifest file. + // manifest file. To workaround some obscure filesystem races (the + // file's mtime/size is updated several seconds later; maybe tmpfs + // issue?), we periodically re-check. // - for (to = build_to; to != 0; tftpd.serve (to)) + for (to = build_to; to != 0; tftpd.serve (to, 2)) { if (file_exists (rf)) { diff --git a/bbot/tftp.cxx b/bbot/tftp.cxx index 70cc85d..0da6c24 100644 --- a/bbot/tftp.cxx +++ b/bbot/tftp.cxx @@ -67,15 +67,18 @@ namespace bbot } bool tftp_server:: - serve (size_t& sec) + serve (size_t& sec, size_t inc) { tracer trace ("tftp_server::serve"); + if (inc == 0 || inc > sec) + inc = sec; + int fd (fd_.get ()); // Note: Linux updates the timeout value which we rely upon. // - timeval timeout {static_cast (sec), 0}; + timeval timeout {static_cast (inc), 0}; fd_set rd; FD_ZERO (&rd); @@ -95,7 +98,7 @@ namespace bbot } else if (r == 0) // Timeout. { - sec = 0; + sec -= inc; return false; } @@ -122,9 +125,11 @@ namespace bbot "--secure", // Chroot to data directory. ops.tftp ()); - // @@ This is not really accurate. + // This is not really accurate since tftpd will, for example, serve + // an upload request until it is complete. But it's close anough for + // our needs. // - sec = static_cast (timeout.tv_sec); + sec -= (inc - static_cast (timeout.tv_sec)); return true; } } diff --git a/bbot/tftp.hxx b/bbot/tftp.hxx index c9a88c9..cdd0e3e 100644 --- a/bbot/tftp.hxx +++ b/bbot/tftp.hxx @@ -29,12 +29,14 @@ namespace bbot uint16_t port () const; - // Wait for a TFTP request for up to the specified number of seconds. - // Update the timeout value as well as return true if a request was - // served and false otherwise. + // Wait for a TFTP request for up to the specified number of seconds. If + // increment is not 0, then wait in the specified incremenets (i.e., wait + // for up to that number of seconds; useful when one needs to also + // periodically check for something else). Update the timeout value as + // well as return true if a request was served and false otherwise. // bool - serve (size_t& seconds); + serve (size_t& seconds, size_t increment = 0); private: auto_fd fd_; -- cgit v1.1