aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2024-02-23 17:50:38 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2024-02-26 11:52:12 +0300
commit4a3ec1c7e77e3da09d39ab57048bc6b1a39fb003 (patch)
treece74d09943dd33282a8aafeafa805c1583d9cdd4
parentea51b01330a96084f1b33cc4fef4d7eec6c8d170 (diff)
Add read_http_status(), parse_http_status_code(), and read_http_response_line() static functions to curl class
-rw-r--r--libbutl/curl.cxx120
-rw-r--r--libbutl/curl.hxx34
-rw-r--r--tests/curl/testscript6
3 files changed, 157 insertions, 3 deletions
diff --git a/libbutl/curl.cxx b/libbutl/curl.cxx
index addba81..92baa4c 100644
--- a/libbutl/curl.cxx
+++ b/libbutl/curl.cxx
@@ -5,6 +5,7 @@
#include <cassert>
#include <utility> // move()
+#include <cstdlib> // strtoul(), size_t
#include <exception> // invalid_argument
#include <libbutl/utility.hxx>
@@ -175,4 +176,123 @@ namespace butl
throw invalid_argument ("unsupported protocol");
}
+
+ uint16_t curl::
+ parse_http_status_code (const string& s)
+ {
+ char* e (nullptr);
+ unsigned long c (strtoul (s.c_str (), &e, 10)); // Can't throw.
+ assert (e != nullptr);
+
+ return *e == '\0' && c >= 100 && c < 600
+ ? static_cast<uint16_t> (c)
+ : 0;
+ }
+
+ string curl::
+ read_http_response_line (ifdstream& is)
+ {
+ string r;
+ getline (is, r); // Strips the trailing LF (0xA).
+
+ // Note that on POSIX CRLF is not automatically translated into LF, so we
+ // need to strip CR (0xD) manually.
+ //
+ if (!r.empty () && r.back () == '\r')
+ r.pop_back ();
+
+ return r;
+ }
+
+ curl::http_status curl::
+ read_http_status (ifdstream& is, bool skip_headers)
+ {
+ // After getting the status line, if requested, we will read until the
+ // empty line (containing just CRLF). Not being able to reach such a line
+ // is an error, which is the reason for the exception mask choice. When
+ // done, we will restore the original exception mask.
+ //
+ ifdstream::iostate es (is.exceptions ());
+ is.exceptions (ifdstream::badbit | ifdstream::failbit | ifdstream::eofbit);
+
+ auto read_status = [&is, es] ()
+ {
+ string l (read_http_response_line (is));
+
+ for (;;) // Breakout loop.
+ {
+ if (l.compare (0, 5, "HTTP/") != 0)
+ break;
+
+ size_t p (l.find (' ', 5)); // The protocol end.
+ if (p == string::npos)
+ break;
+
+ p = l.find_first_not_of (' ', p + 1); // The code start.
+ if (p == string::npos)
+ break;
+
+ size_t e (l.find (' ', p + 1)); // The code end.
+ if (e == string::npos)
+ break;
+
+ uint16_t c (parse_http_status_code (string (l, p, e - p)));
+ if (c == 0)
+ break;
+
+ string r;
+ p = l.find_first_not_of (' ', e + 1); // The reason start.
+ if (p != string::npos)
+ {
+ e = l.find_last_not_of (' '); // The reason end.
+ assert (e != string::npos && e >= p);
+
+ r = string (l, p, e - p + 1);
+ }
+
+ return http_status {c, move (r)};
+ }
+
+ is.exceptions (es); // Restore the exception mask.
+
+ throw invalid_argument ("invalid status line '" + l + "'");
+ };
+
+ // The curl output for a successfull request looks like this:
+ //
+ // HTTP/1.1 100 Continue
+ //
+ // HTTP/1.1 200 OK
+ // Content-Length: 83
+ // Content-Type: text/manifest;charset=utf-8
+ //
+ // <response-body>
+ //
+ // curl normally sends the 'Expect: 100-continue' header for uploads, so
+ // we need to handle the interim HTTP server response with the continue
+ // (100) status code.
+ //
+ // Interestingly, Apache can respond with the continue (100) code and with
+ // the not found (404) code afterwords.
+ //
+ http_status rs (read_status ());
+
+ if (rs.code == 100)
+ {
+ // Skips the interim response.
+ //
+ while (!read_http_response_line (is).empty ()) ;
+
+ rs = read_status (); // Reads the final status code.
+ }
+
+ if (skip_headers)
+ {
+ while (!read_http_response_line (is).empty ()) ; // Skips headers.
+ }
+
+ is.exceptions (es);
+
+ return rs;
+ }
}
diff --git a/libbutl/curl.hxx b/libbutl/curl.hxx
index cd4ebd0..3fa7890 100644
--- a/libbutl/curl.hxx
+++ b/libbutl/curl.hxx
@@ -4,6 +4,7 @@
#pragma once
#include <string>
+#include <cstdint> // uint16_t
#include <type_traits>
#include <libbutl/path.hxx>
@@ -120,6 +121,39 @@ namespace butl
const std::string& url,
A&&... options);
+ // Read the HTTP response status from an input stream.
+ //
+ // Specifically, read and parse the HTTP status line, by default skip over
+ // the remaining headers (leaving the stream at the beginning of the
+ // response body), and return the status code and the reason phrase. Throw
+ // std::invalid_argument if the status line could not be parsed. Pass
+ // through the ios::failure exception on the stream error.
+ //
+ // Note that if ios::failure is thrown the stream's exception mask may not
+ // be preserved.
+ //
+ struct http_status
+ {
+ std::uint16_t code;
+ std::string reason;
+ };
+
+ static http_status
+ read_http_status (ifdstream&, bool skip_headers = true);
+
+ // Parse and return the HTTP status code. Return 0 if the argument is
+ // invalid.
+ //
+ static std::uint16_t
+ parse_http_status_code (const std::string&);
+
+ // Read the CRLF-terminated line from an input stream, stripping the
+ // trailing CRLF. Pass through the ios::failure exception on the stream
+ // error.
+ //
+ static std::string
+ read_http_response_line (ifdstream&);
+
private:
enum method_proto {ftp_get, ftp_put, http_get, http_post};
using method_proto_options = small_vector<const char*, 2>;
diff --git a/tests/curl/testscript b/tests/curl/testscript
index 3da2306..453c09a 100644
--- a/tests/curl/testscript
+++ b/tests/curl/testscript
@@ -43,14 +43,14 @@ sudo /usr/sbin/in.tftpd \
: http
:
{
- $* 'http' 2>>EOE
+ $* 'http' 2>>~%EOE%
curl -s -S --fail --location https://build2.org/bogus
- curl: (22) The requested URL returned error: 404 Not Found
+ %curl: \(22\) The requested URL returned error: 404( Not Found)?%
curl -s -S --fail --location https://build2.org
curl -s -S --fail --location --data-binary @- https://build2.org/bogus
- curl: (22) The requested URL returned error: 404 Not Found
+ %curl: \(22\) The requested URL returned error: 404( Not Found)?%
EOE
}