diff options
-rw-r--r-- | libbutl/curl.cxx | 120 | ||||
-rw-r--r-- | libbutl/curl.hxx | 34 | ||||
-rw-r--r-- | tests/curl/testscript | 6 |
3 files changed, 157 insertions, 3 deletions
diff --git a/libbutl/curl.cxx b/libbutl/curl.cxx index addba81..92baa4c 100644 --- a/libbutl/curl.cxx +++ b/libbutl/curl.cxx @@ -5,6 +5,7 @@ #include <cassert> #include <utility> // move() +#include <cstdlib> // strtoul(), size_t #include <exception> // invalid_argument #include <libbutl/utility.hxx> @@ -175,4 +176,123 @@ namespace butl throw invalid_argument ("unsupported protocol"); } + + uint16_t curl:: + parse_http_status_code (const string& s) + { + char* e (nullptr); + unsigned long c (strtoul (s.c_str (), &e, 10)); // Can't throw. + assert (e != nullptr); + + return *e == '\0' && c >= 100 && c < 600 + ? static_cast<uint16_t> (c) + : 0; + } + + string curl:: + read_http_response_line (ifdstream& is) + { + string r; + getline (is, r); // Strips the trailing LF (0xA). + + // Note that on POSIX CRLF is not automatically translated into LF, so we + // need to strip CR (0xD) manually. + // + if (!r.empty () && r.back () == '\r') + r.pop_back (); + + return r; + } + + curl::http_status curl:: + read_http_status (ifdstream& is, bool skip_headers) + { + // After getting the status line, if requested, we will read until the + // empty line (containing just CRLF). Not being able to reach such a line + // is an error, which is the reason for the exception mask choice. When + // done, we will restore the original exception mask. + // + ifdstream::iostate es (is.exceptions ()); + is.exceptions (ifdstream::badbit | ifdstream::failbit | ifdstream::eofbit); + + auto read_status = [&is, es] () + { + string l (read_http_response_line (is)); + + for (;;) // Breakout loop. + { + if (l.compare (0, 5, "HTTP/") != 0) + break; + + size_t p (l.find (' ', 5)); // The protocol end. + if (p == string::npos) + break; + + p = l.find_first_not_of (' ', p + 1); // The code start. + if (p == string::npos) + break; + + size_t e (l.find (' ', p + 1)); // The code end. + if (e == string::npos) + break; + + uint16_t c (parse_http_status_code (string (l, p, e - p))); + if (c == 0) + break; + + string r; + p = l.find_first_not_of (' ', e + 1); // The reason start. + if (p != string::npos) + { + e = l.find_last_not_of (' '); // The reason end. + assert (e != string::npos && e >= p); + + r = string (l, p, e - p + 1); + } + + return http_status {c, move (r)}; + } + + is.exceptions (es); // Restore the exception mask. + + throw invalid_argument ("invalid status line '" + l + "'"); + }; + + // The curl output for a successfull request looks like this: + // + // HTTP/1.1 100 Continue + // + // HTTP/1.1 200 OK + // Content-Length: 83 + // Content-Type: text/manifest;charset=utf-8 + // + // <response-body> + // + // curl normally sends the 'Expect: 100-continue' header for uploads, so + // we need to handle the interim HTTP server response with the continue + // (100) status code. + // + // Interestingly, Apache can respond with the continue (100) code and with + // the not found (404) code afterwords. + // + http_status rs (read_status ()); + + if (rs.code == 100) + { + // Skips the interim response. + // + while (!read_http_response_line (is).empty ()) ; + + rs = read_status (); // Reads the final status code. + } + + if (skip_headers) + { + while (!read_http_response_line (is).empty ()) ; // Skips headers. + } + + is.exceptions (es); + + return rs; + } } diff --git a/libbutl/curl.hxx b/libbutl/curl.hxx index cd4ebd0..3fa7890 100644 --- a/libbutl/curl.hxx +++ b/libbutl/curl.hxx @@ -4,6 +4,7 @@ #pragma once #include <string> +#include <cstdint> // uint16_t #include <type_traits> #include <libbutl/path.hxx> @@ -120,6 +121,39 @@ namespace butl const std::string& url, A&&... options); + // Read the HTTP response status from an input stream. + // + // Specifically, read and parse the HTTP status line, by default skip over + // the remaining headers (leaving the stream at the beginning of the + // response body), and return the status code and the reason phrase. Throw + // std::invalid_argument if the status line could not be parsed. Pass + // through the ios::failure exception on the stream error. + // + // Note that if ios::failure is thrown the stream's exception mask may not + // be preserved. + // + struct http_status + { + std::uint16_t code; + std::string reason; + }; + + static http_status + read_http_status (ifdstream&, bool skip_headers = true); + + // Parse and return the HTTP status code. Return 0 if the argument is + // invalid. + // + static std::uint16_t + parse_http_status_code (const std::string&); + + // Read the CRLF-terminated line from an input stream, stripping the + // trailing CRLF. Pass through the ios::failure exception on the stream + // error. + // + static std::string + read_http_response_line (ifdstream&); + private: enum method_proto {ftp_get, ftp_put, http_get, http_post}; using method_proto_options = small_vector<const char*, 2>; diff --git a/tests/curl/testscript b/tests/curl/testscript index 3da2306..453c09a 100644 --- a/tests/curl/testscript +++ b/tests/curl/testscript @@ -43,14 +43,14 @@ sudo /usr/sbin/in.tftpd \ : http : { - $* 'http' 2>>EOE + $* 'http' 2>>~%EOE% curl -s -S --fail --location https://build2.org/bogus - curl: (22) The requested URL returned error: 404 Not Found + %curl: \(22\) The requested URL returned error: 404( Not Found)?% curl -s -S --fail --location https://build2.org curl -s -S --fail --location --data-binary @- https://build2.org/bogus - curl: (22) The requested URL returned error: 404 Not Found + %curl: \(22\) The requested URL returned error: 404( Not Found)?% EOE } |