aboutsummaryrefslogtreecommitdiff
path: root/mod
diff options
context:
space:
mode:
Diffstat (limited to 'mod')
-rw-r--r--mod/mod-submit.cxx429
-rw-r--r--mod/options.cli8
2 files changed, 355 insertions, 82 deletions
diff --git a/mod/mod-submit.cxx b/mod/mod-submit.cxx
index accd56d..c68b773 100644
--- a/mod/mod-submit.cxx
+++ b/mod/mod-submit.cxx
@@ -4,8 +4,16 @@
#include <mod/mod-submit.hxx>
-#include <cstdlib> // strtoul()
+#include <sys/time.h> // timeval
+#include <sys/select.h>
+
+#include <ratio> // ratio_greater_equal
+#include <chrono>
+#include <cstdlib> // strtoul()
#include <istream>
+#include <sstream>
+#include <type_traits> // static_assert
+#include <system_error> // error_code, generic_category()
#include <libbutl/sha256.mxx>
#include <libbutl/process.mxx>
@@ -501,10 +509,56 @@ handle (request& rq, response& rs)
}
// Given that the submission data is now successfully persisted we are no
- // longer in charge of removing it, even in case of a subsequent error.
+ // longer in charge of removing it, except for the cases when the submission
+ // handler terminates with an error (see below for details).
//
tdr.cancel ();
+ // If the handler terminates with non-zero exit status or specifies 5XX
+ // (HTTP server error) submission result manifest status value, then we
+ // stash the submission data directory for troubleshooting. Otherwise, if
+ // it's the 4XX (HTTP client error) status value, then we remove the
+ // directory.
+ //
+ // Note that leaving the directory in place in case of a submission error
+ // would have prevent the user from re-submitting until we research the
+ // issue and manually remove the directory.
+ //
+ auto stash_submit_dir = [&dd, error] ()
+ {
+ try
+ {
+ if (!dir_exists (dd))
+ return;
+
+ for (size_t n (1); true; ++n) // Eventually we should find the free one.
+ {
+ string ext ('.' + to_string (n));
+ dir_path d (dd + ext);
+
+ if (!dir_exists (d))
+ try
+ {
+ mvdir (dd, d);
+ break;
+ }
+ catch (const system_error& e)
+ {
+ int ec (e.code ().value ());
+ if (ec != ENOTEMPTY && ec != EEXIST) // Note: there can be a race.
+ throw;
+ }
+ }
+ }
+ catch (const system_error& e)
+ {
+ // Not much we can do here. Let's just log the issue and bail out
+ // leaving the directory in place.
+ //
+ error << "unable to rename directory '" << dd << "': " << e;
+ }
+ };
+
auto print_args = [&trace, this] (const char* args[], size_t n)
{
l2 ([&]{trace << process_args {args, n};});
@@ -518,13 +572,44 @@ handle (request& rq, response& rs)
// containing at least the status value. Thus, an empty cache indicates that
// the handler is not configured.
//
- status_code sc;
+ status_code sc (200);
vector<manifest_name_value> rvs;
if (options_->submit_handler_specified ())
{
+ // For the sake of the documentation we will call the handler's normal
+ // exit with 0 code "successful termination".
+ //
+ // To make sure the handler process execution doesn't exceed the specified
+ // timeout we set the non-blocking mode for the process stdout-reading
+ // stream, try to read from it with the 10 milliseconds timeout and check
+ // the process execution time between the reads. We then kill the process
+ // if the execution time is exceeded.
+ //
+ using namespace chrono;
+
+ using time_point = system_clock::time_point;
+ using duration = system_clock::duration;
+
+ // Make sure that the system clock has at least milliseconds resolution.
+ //
+ static_assert(
+ ratio_greater_equal<milliseconds::period, duration::period>::value,
+ "The system clock resolution is too low");
+
+ optional<milliseconds> timeout;
+
+ if (options_->submit_handler_timeout_specified ())
+ timeout = milliseconds (options_->submit_handler_timeout () * 1000);
+
const path& handler (options_->submit_handler ());
+ // Note that due to the non-blocking mode we cannot just pass the stream
+ // to the manifest parser constructor. So we buffer the data in the string
+ // stream and then parse that.
+ //
+ stringstream ss;
+
for (;;) // Breakout loop.
try
{
@@ -542,86 +627,177 @@ handle (request& rq, response& rs)
dd));
pipe.out.close ();
- try
+ auto kill = [&pr, &warn, &handler, &ac] ()
{
- ifdstream is (move (pipe.in));
-
- // Parse and verify the manifest. Obtain the HTTP status code (must go
- // first) and cache it for the subsequent responding to the client.
- //
- parser p (is, "handler");
- manifest_name_value nv (p.next ());
-
- auto bad_value ([&p, &nv] (const string& d) {
- throw parsing (p.name (), nv.value_line, nv.value_column, d);});
-
- if (nv.empty ())
- bad_value ("empty manifest");
-
- const string& n (nv.name);
- const string& v (nv.value);
-
- // The format version pair is verified by the parser.
- //
- assert (n.empty () && v == "1");
-
- // Cache the format version pair.
- //
- rvs.push_back (move (nv));
-
- // Get and verify the HTTP status.
- //
- nv = p.next ();
- if (n != "status")
- bad_value ("no status specified");
-
- char* e (nullptr);
- unsigned long c (strtoul (v.c_str (), &e, 10)); // Can't throw.
-
- assert (e != nullptr);
-
- if (!(*e == '\0' && c >= 100 && c < 600))
- bad_value ("invalid http status '" + v + "'");
-
- // Cache the HTTP status.
+ // We may still end up well (see below), thus this is a warning.
//
- sc = static_cast<status_code> (c);
- rvs.push_back (move (nv));
+ warn << "ref " << ac << ": process " << handler
+ << " execution timeout expired";
- // Cache the remaining name/value pairs.
- //
- for (nv = p.next (); !nv.empty (); nv = p.next ())
- rvs.push_back (move (nv));
+ pr.kill ();
+ };
- // Cache end of manifest.
+ try
+ {
+ ifdstream is (move (pipe.in), fdstream_mode::non_blocking);
+
+ const size_t nbuf (8192);
+ char buf[nbuf];
+
+ while (is.is_open ())
+ {
+ time_point start;
+ milliseconds wd (10); // Max time to wait for the data portion.
+
+ if (timeout)
+ {
+ start = system_clock::now ();
+
+ if (*timeout < wd)
+ wd = *timeout;
+ }
+
+ timeval tm {wd.count () / 1000 /* seconds */,
+ wd.count () % 1000 * 1000 /* microseconds */};
+
+ fd_set rd;
+ FD_ZERO (&rd);
+ FD_SET (is.fd (), &rd);
+
+ int r (select (is.fd () + 1, &rd, nullptr, nullptr, &tm));
+
+ if (r == -1)
+ {
+ // Don't fail if the select() call was interrupted by the signal.
+ //
+ if (errno != EINTR)
+ throw io_error ("select failed",
+ error_code (errno, generic_category ()));
+ }
+ else if (r != 0) // Is data available?
+ {
+ assert (FD_ISSET (is.fd (), &rd));
+
+ // The only leagal way to read from non-blocking ifdstream.
+ //
+ streamsize n (is.readsome (buf, nbuf));
+
+ // Close the stream (and bail out) if the end of the data is
+ // reached. Otherwise cache the read data.
+ //
+ if (is.eof ())
+ is.close ();
+ else
+ {
+ // The data must be available.
+ //
+ // Note that we could keep reading until the readsome() call
+ // returns 0. However, this way we could potentially exceed the
+ // timeout significantly for some broken handler that floods us
+ // with data. So instead, we will be checking the process
+ // execution time after every data chunk read.
+ //
+ assert (n != 0);
+
+ ss.write (buf, n);
+ }
+ }
+ else // Timeout occured.
+ {
+ // Normally, we don't expect timeout to occur on the pipe read
+ // operation if the process has terminated successfully, as all its
+ // output must already be buffered (including eof). However, there
+ // can be some still running handler's child that has inherited
+ // the parent's stdout. In this case we assume that we have read
+ // all the handler's output, close the stream, log the warning and
+ // bail out.
+ //
+ if (pr.exit)
+ {
+ // We keep reading only upon successful handler termination.
+ //
+ assert (*pr.exit);
+
+ is.close ();
+
+ warn << "ref " << ac << ": process " << handler
+ << " stdout is not closed after termination (possibly "
+ << "handler's child still running)";
+ }
+ }
+
+ if (timeout)
+ {
+ time_point now (system_clock::now ());
+
+ // Assume we have waited the full amount if the time adjustment is
+ // detected.
+ //
+ duration d (now > start ? now - start : wd);
+
+ // If the timeout is not fully exhausted, then decrement it and
+ // try to read some more data from the handler' stdout. Otherwise,
+ // kill the process, if not done yet.
+ //
+ // Note that it may happen that we are killing an already
+ // terminated process, in which case kill() just sets the process
+ // exit information. On the other hand it's guaranteed that the
+ // process is terminated after the kill() call, and so the pipe is
+ // presumably closed on the write end (see above for details).
+ // Thus, if the process terminated successfully, we will continue
+ // reading until eof is reached or read timeout occurred. Yes, it
+ // may happen that we end up with a successful submission even
+ // with the kill.
+ //
+ if (*timeout > d)
+ *timeout -= duration_cast<milliseconds> (d);
+ else if (!pr.exit)
+ {
+ kill ();
+
+ assert (pr.exit);
+
+ // Close the stream (and bail out) if the process hasn't
+ // terminate successfully.
+ //
+ if (!*pr.exit)
+ is.close ();
+
+ *timeout = milliseconds::zero ();
+ }
+ }
+ }
+
+ assert (!is.is_open ());
+
+ if (!timeout)
+ pr.wait ();
+
+ // If the process is not terminated yet, then wait for its termination
+ // for the remaining time. Kill it if the timeout has been exceeded
+ // and the process still hasn't terminate.
//
- rvs.push_back (move (nv));
+ else if (!pr.exit && !pr.timed_wait (*timeout))
+ kill ();
- is.close ();
+ assert (pr.exit); // The process must finally be terminated.
- if (pr.wait ())
+ if (*pr.exit)
break; // Get out of the breakout loop.
- assert (pr.exit);
- error << "process " << handler << " " << *pr.exit;
-
- // Fall through.
- }
- catch (const parsing& e)
- {
- if (pr.wait ())
- error << "unable to parse handler's output: " << e;
+ error << "ref " << ac << ": process " << handler << " " << *pr.exit;
// Fall through.
}
catch (const io_error& e)
{
if (pr.wait ())
- error << "unable to read handler's output: " << e;
+ error << "ref " << ac << ": unable to read handler's output: " << e;
// Fall through.
}
+ stash_submit_dir ();
return respond_error ();
}
// Handle process_error and io_error (both derive from system_error).
@@ -629,6 +805,73 @@ handle (request& rq, response& rs)
catch (const system_error& e)
{
error << "unable to execute '" << handler << "': " << e;
+
+ stash_submit_dir ();
+ return respond_error ();
+ }
+
+ try
+ {
+ // Parse and verify the manifest. Obtain the HTTP status code (must go
+ // first) and cache it for the subsequent response to the client.
+ //
+ parser p (ss, "handler");
+ manifest_name_value nv (p.next ());
+
+ auto bad_value ([&p, &nv] (const string& d) {
+ throw parsing (p.name (), nv.value_line, nv.value_column, d);});
+
+ if (nv.empty ())
+ bad_value ("empty manifest");
+
+ const string& n (nv.name);
+ const string& v (nv.value);
+
+ // The format version pair is verified by the parser.
+ //
+ assert (n.empty () && v == "1");
+
+ // Cache the format version pair.
+ //
+ rvs.push_back (move (nv));
+
+ // Get and verify the HTTP status.
+ //
+ nv = p.next ();
+ if (n != "status")
+ bad_value ("no status specified");
+
+ char* e (nullptr);
+ unsigned long c (strtoul (v.c_str (), &e, 10)); // Can't throw.
+
+ assert (e != nullptr);
+
+ if (!(*e == '\0' && c >= 100 && c < 600))
+ bad_value ("invalid HTTP status '" + v + "'");
+
+ // Cache the HTTP status.
+ //
+ sc = static_cast<status_code> (c);
+ rvs.push_back (move (nv));
+
+ // Cache the remaining name/value pairs.
+ //
+ for (nv = p.next (); !nv.empty (); nv = p.next ())
+ rvs.push_back (move (nv));
+
+ // Cache end of manifest.
+ //
+ rvs.push_back (move (nv));
+ }
+ catch (const parsing& e)
+ {
+ error << "ref " << ac << ": unable to parse handler's output: " << e;
+
+ // It appears the handler had misbehaved, so let's stash the submission
+ // directory for troubleshooting.
+ //
+ stash_submit_dir ();
+
return respond_error ();
}
}
@@ -637,7 +880,7 @@ handle (request& rq, response& rs)
// serialization error log the error description and return false, on the
// stream error pass through the io_error exception, otherwise return true.
//
- auto rsm = [&rvs, &error] (ostream& os) -> bool
+ auto rsm = [&rvs, &error, &ac] (ostream& os) -> bool
{
assert (!rvs.empty ());
@@ -651,30 +894,52 @@ handle (request& rq, response& rs)
}
catch (const serialization& e)
{
- error << "unable to serialize handler's output: " << e;
+ error << "ref " << ac << ": unable to serialize handler's output: " << e;
return false;
}
};
- // Save the result manifest, if generated, into the submission directory
- // if it still exists (note that the handler could move or remove it).
+ // If the submission data directory still exists then perform an appropriate
+ // action on it, depending on the submission result status. Note that the
+ // handler could move or remove the directory.
//
- path rsf (dd / "result.manifest");
-
- if (!rvs.empty () && dir_exists (dd))
- try
+ if (dir_exists (dd))
{
- ofdstream os (rsf);
- bool r (rsm (os));
- os.close ();
+ // Remove the directory if the client error is detected.
+ //
+ if (sc >= 400 && sc < 500)
+ rmdir_r (dd);
- if (!r)
- return respond_error (); // The error description is already logged.
- }
- catch (const io_error& e)
- {
- error << "unable to write to '" << rsf << "': " << e;
- return respond_error ();
+ // Otherwise, save the result manifest, if generated, into the directory.
+ // Also stash the directory for troubleshooting in case of the server
+ // error.
+ //
+ else
+ {
+ path rsf (dd / "result.manifest");
+
+ if (!rvs.empty ())
+ try
+ {
+ ofdstream os (rsf);
+
+ // Not being able to stash the result manifest is not a reason to
+ // claim the submission failed. The error is logged nevertheless.
+ //
+ rsm (os);
+
+ os.close ();
+ }
+ catch (const io_error& e)
+ {
+ // Not fatal (see above).
+ //
+ error << "unable to write to '" << rsf << "': " << e;
+ }
+
+ if (sc >= 500 && sc < 600)
+ stash_submit_dir ();
+ }
}
// Send email, if configured, and the submission is not simulated.
diff --git a/mod/options.cli b/mod/options.cli
index 992ffc4..59aceb6 100644
--- a/mod/options.cli
+++ b/mod/options.cli
@@ -458,6 +458,14 @@ namespace brep
(see \cb{submit-handler} for details). Repeat this option to specify
multiple arguments."
}
+
+ size_t submit-handler-timeout
+ {
+ "<seconds>",
+ "The handler program timeout in seconds. If specified and the handler
+ does not exit in the alloted time, then it is killed and its
+ termination is treated as abnormal."
+ }
};
class repository_root: handler