From 01c179eed3fcfccc7cdd262742935177dfcf5106 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Tue, 10 Jan 2023 23:33:31 +0300 Subject: Assume git repository protocol as smart if HTTP response code is 401 --- bpkg/fetch-git.cxx | 102 +++++++++++---- bpkg/fetch.cxx | 355 +++++++++++++++++++++++++++++++++++++++++++++-------- bpkg/fetch.hxx | 32 ++++- bpkg/utility.cxx | 19 +++ bpkg/utility.hxx | 6 + 5 files changed, 434 insertions(+), 80 deletions(-) diff --git a/bpkg/fetch-git.cxx b/bpkg/fetch-git.cxx index 79567e6..dc1e16a 100644 --- a/bpkg/fetch-git.cxx +++ b/bpkg/fetch-git.cxx @@ -285,20 +285,7 @@ namespace bpkg try { - ifdstream is (move (pipe.in), fdstream_mode::skip, ifdstream::badbit); - - // We could probably write something like this, instead: - // - // *diag_stream << is.rdbuf () << flush; - // - // However, it would never throw and we could potentially miss the - // reading failure, unless we decide to additionally mess with the - // diagnostics stream exception mask. - // - for (string l; !eof (getline (is, l)); ) - *diag_stream << l << endl; - - is.close (); + dump_stderr (move (pipe.in)); // Fall through. } @@ -549,7 +536,11 @@ namespace bpkg // For HTTP(S) sense the protocol type by sending the first HTTP request of // the fetch operation handshake and analyzing the first line of the // response. Fail if connecting to the server failed, the response code - // differs from 200, or reading the response body failed. + // differs from 200 and 401, or reading the response body failed. If the + // response code is 401 (requires authentication), then consider protocol as + // smart. The thinking here is that a git repository with support for + // authentication is likely one of the hosting places (like git{hub,lab}) + // and is unlikely to be dumb. // // Note that, as a side-effect, this function checks the HTTP(S) server // availability and so must be called prior to any git command that involves @@ -574,12 +565,12 @@ namespace bpkg static capabilities sense_capabilities (const common_options& co, - repository_url url, + const repository_url& repo_url, const semantic_version& git_ver) { - assert (url.path); + assert (repo_url.path); - switch (url.scheme) + switch (repo_url.scheme) { case repository_protocol::git: case repository_protocol::ssh: @@ -588,6 +579,9 @@ namespace bpkg case repository_protocol::https: break; // Ask the server (see below). } + // Craft the URL for sensing the capabilities. + // + repository_url url (repo_url); path& up (*url.path); if (!up.to_directory ()) @@ -601,19 +595,69 @@ namespace bpkg url.query = "service=git-upload-pack"; string u (url.string ()); - process pr (start_fetch (co, - u, - path () /* out */, - "git/" + git_ver.string ())); + + // Start fetching, also trying to retrieve the HTTP status code. + // + // We unset failbit to properly handle an empty response (no refs) from + // the dumb server. + // + ifdstream is (ifdstream::badbit); + + pair ps ( + start_fetch_http (co, + u, + is /* out */, + fdstream_mode::skip | fdstream_mode::binary, + true /* quiet */, + "git/" + git_ver.string ())); + + process& pr (ps.first); + + // If the fetch program stderr is redirected, then read it out and pass + // through. + // + auto dump_stderr = [&pr] () + { + if (pr.in_efd != nullfd) + try + { + bpkg::dump_stderr (move (pr.in_efd)); + } + catch (const io_error&) + { + // Not much we can do here. + } + }; try { - // We unset failbit to properly handle an empty response (no refs) from - // the dumb server. + // If authentication is required (HTTP status code is 401), then + // consider the protocol as smart. Drop the diagnostics if that's the + // case and dump it otherwise. // - ifdstream is (move (pr.in_ofd), - fdstream_mode::skip | fdstream_mode::binary, - ifdstream::badbit); + if (ps.second == 401) + { + if (verb >= 2) + { + info << "smart git protocol assumed for repository " << repo_url + << " due to authentication requirement" << + info << "use --git-capabilities to override or suppress this " + << "diagnostics"; + } + + // Note that we don't care about the process exit code here and just + // silently wait for the process completion in the process object + // destructor. We, however, close the stream (reading out the + // content), so that the process won't get blocked writing to it. + // + // Also note that we drop the potentially redirected process stderr + // stream content. We even don't read it out, since we assume it fully + // fits into the pipe buffer. + // + is.close (); + + return capabilities::smart; + } string l; getline (is, l); // Is empty if no refs returned by the dumb server. @@ -667,6 +711,8 @@ namespace bpkg is.close (); + dump_stderr (); + if (pr.wait ()) return r; @@ -674,6 +720,8 @@ namespace bpkg } catch (const io_error&) { + dump_stderr (); + if (pr.wait ()) fail << "unable to read fetched " << url << endg; diff --git a/bpkg/fetch.cxx b/bpkg/fetch.cxx index 44f1ba4..a7fe8c9 100644 --- a/bpkg/fetch.cxx +++ b/bpkg/fetch.cxx @@ -84,13 +84,21 @@ namespace bpkg } } - static process + // Note that there is no easy way to retrieve the HTTP status code for wget + // (there is no reliable way to redirect the status line/headers to stdout) + // and thus we always return 0. Due to the status code unavailability there + // is no need to redirect stderr and thus we ignore the quiet mode. + // + static pair start_wget (const path& prog, const optional& timeout, bool progress, bool no_progress, + bool /* quiet */, const strings& ops, const string& url, + ifdstream* out_is, + fdstream_mode out_ism, const path& out, const string& user_agent, const string& http_proxy) @@ -108,8 +116,8 @@ namespace bpkg }; // Wget 1.16 introduced the --show-progress option which in the quiet mode - // shows a nice and tidy progress bar (if only it also showed errors, then - // it would have been perfect). + // (-q) shows a nice and tidy progress bar (if only it also showed errors, + // then it would have been perfect). // bool has_show_progress (wget_major > 1 || (wget_major == 1 && wget_minor >= 16)); @@ -222,12 +230,19 @@ namespace bpkg // just the file name (rather than the whole path) in the progress // report. Process exceptions must be handled by the caller. // - return fo - ? process (pp, args.data (), - 0, 1, 2, - out.directory ().string ().c_str (), - env.vars) - : process (pp, args.data (), 0, -1, 2, nullptr /* cwd */, env.vars); + process pr (fo + ? process (pp, args.data (), + 0, 1, 2, + out.directory ().string ().c_str (), + env.vars) + : process (pp, args.data (), + 0, -1, 2, + nullptr /* cwd */, env.vars)); + + if (out_is != nullptr) + out_is->open (move (pr.in_ofd), out_ism); + + return make_pair (move (pr), 0); } // curl @@ -275,13 +290,21 @@ namespace bpkg return false; } - static process + // If HTTP status code needs to be retrieved, then open the passed stream + // and read out the status line(s) extracting the status code and the + // headers. Otherwise, return 0 indicating that the status code is not + // available. In the former case redirect stderr and respect the quiet mode. + // + static pair start_curl (const path& prog, const optional& timeout, bool progress, bool no_progress, + bool quiet, const strings& ops, const string& url, + ifdstream* out_is, + fdstream_mode out_ism, const path& out, const string& user_agent, const string& http_proxy) @@ -313,28 +336,31 @@ namespace bpkg // Higher than that -- run it verbose. Always show the progress // bar if requested explicitly, even in the quiet mode. // - if (verb < (fo ? 1 : 2)) + if (!quiet) { - if (!progress) + if (verb < (fo ? 1 : 2)) { - suppress_progress (); - no_progress = false; // Already suppressed. + if (!progress) + { + suppress_progress (); + no_progress = false; // Already suppressed. + } } + else if (fo && verb == 1) + { + if (!no_progress) + args.push_back ("--progress-bar"); + } + else if (verb > 3) + args.push_back ("-v"); } - else if (fo && verb == 1) - { - if (!no_progress) - args.push_back ("--progress-bar"); - } - else if (verb > 3) - args.push_back ("-v"); // Suppress progress. // // Note: the `-v -s` options combination is valid and results in a verbose // output without progress. // - if (no_progress) + if (no_progress || quiet) suppress_progress (); // Set download timeout if requested. @@ -369,11 +395,23 @@ namespace bpkg args.push_back (http_proxy.c_str ()); } + // Status code. + // + if (out_is != nullptr) + { + assert (!fo); // Currently unsupported (see start_fetch() for details). + + args.push_back ("-i"); + } + args.push_back (url.c_str ()); args.push_back (nullptr); process_path pp (process::path_search (args[0])); + // Let's still print the command line in the quiet mode to ease the + // troubleshooting. + // if (verb >= 2) print_process (args); else if (verb == 1 && fo && !no_progress) @@ -386,9 +424,143 @@ namespace bpkg // Process exceptions must be handled by the caller. // - return fo - ? process (pp, args.data ()) - : process (pp, args.data (), 0, -1); + process pr (fo + ? process (pp, args.data ()) + : process (pp, args.data (), + 0, -1, out_is != nullptr ? -1 : 2)); + + // Close the process stdout stream and read stderr stream out and dump. + // + // Needs to be called prior to failing, so that the process won't get + // blocked writing to stdout and so that stderr get dumped before the + // error message we issue. + // + auto close_streams = [&pr, out_is] () + { + try + { + out_is->close (); + + bpkg::dump_stderr (move (pr.in_efd)); + } + catch (const io_error&) + { + // Not much we can do here. + } + }; + + // If HTTP status code needs to be retrieved, then open the passed stream + // and read out the status line(s) and headers. + // + // Note that this implementation is inspired by the bdep's + // http_service::post() function. + // + uint16_t sc (0); + + if (out_is != nullptr) + try + { + // At this stage we will read until the empty line (containing just + // CRLF). Not being able to reach such a line is an error, which is the + // reason for the exception mask choice. When done, we will restore the + // original exception mask. + // + ifdstream::iostate es (out_is->exceptions ()); + + out_is->exceptions ( + ifdstream::badbit | ifdstream::failbit | ifdstream::eofbit); + + out_is->open (move (pr.in_ofd), out_ism); + + // Parse and return the HTTP status code. Return 0 if the argument is + // invalid. + // + auto status_code = [] (const string& s) + { + char* e (nullptr); + unsigned long c (strtoul (s.c_str (), &e, 10)); // Can't throw. + assert (e != nullptr); + + return *e == '\0' && c >= 100 && c < 600 + ? static_cast (c) + : 0; + }; + + // Read the CRLF-terminated line from the stream stripping the trailing + // CRLF. + // + auto read_line = [out_is] () + { + string l; + getline (*out_is, l); // Strips the trailing LF (0xA). + + // Note that on POSIX CRLF is not automatically translated into LF, so + // we need to strip CR (0xD) manually. + // + if (!l.empty () && l.back () == '\r') + l.pop_back (); + + return l; + }; + + auto read_status = [&read_line, &status_code, &url, &close_streams] () + -> uint16_t + { + string l (read_line ()); + + for (;;) // Breakout loop. + { + if (l.compare (0, 5, "HTTP/") != 0) + break; + + size_t p (l.find (' ', 5)); // The protocol end. + if (p == string::npos) + break; + + p = l.find_first_not_of (' ', p + 1); // The code start. + if (p == string::npos) + break; + + size_t e (l.find (' ', p + 1)); // The code end. + if (e == string::npos) + break; + + uint16_t c (status_code (string (l, p, e - p))); + if (c == 0) + break; + + return c; + } + + close_streams (); + + fail << "invalid HTTP response status line '" << l + << "' while fetching " << url; + + assert (false); // Can't be here. + return 0; + }; + + sc = read_status (); + + if (sc == 100) + { + while (!read_line ().empty ()) ; // Skips the interim response. + sc = read_status (); // Reads the final status code. + } + + while (!read_line ().empty ()) ; // Skips headers. + + out_is->exceptions (es); + } + catch (const io_error&) + { + close_streams (); + + fail << "unable to read HTTP response status line for " << url; + } + + return make_pair (move (pr), sc); } // fetch @@ -438,13 +610,24 @@ namespace bpkg return false; } - static process + // Note that there is no easy way to retrieve the HTTP status code for the + // fetch program and thus we always return 0. + // + // Also note that in the HTTP status code retrieval mode (out_is != NULL) we + // nevertheless redirect stderr to prevent the fetch program from + // interactively querying the user for the credentials. Thus, we also + // respect the quiet mode in contrast to start_wget(). + // + static pair start_fetch (const path& prog, const optional& timeout, bool progress, bool no_progress, + bool quiet, const strings& ops, const string& url, + ifdstream* out_is, + fdstream_mode out_ism, const path& out, const string& user_agent, const string& http_proxy) @@ -476,23 +659,26 @@ namespace bpkg // unless the verbosity level is greater than three, in which case we will // run verbose (and with progress). That's the best we can do. // - if (verb < (fo ? 1 : 2)) + if (!quiet) { - if (!progress) + if (verb < (fo ? 1 : 2)) { - args.push_back ("-q"); - no_progress = false; // Already suppressed with -q. + if (!progress) + { + args.push_back ("-q"); + no_progress = false; // Already suppressed with -q. + } + } + else if (verb > 3) + { + args.push_back ("-v"); + no_progress = false; // Don't be quiet in the verbose mode (see above). } - } - else if (verb > 3) - { - args.push_back ("-v"); - no_progress = false; // Don't be quiet in the verbose mode (see above). } // Suppress progress. // - if (no_progress) + if (no_progress || quiet) args.push_back ("-q"); // Set download timeout if requested. @@ -534,6 +720,9 @@ namespace bpkg env.vars = evars; } + // Let's still print the command line in the quiet mode to ease the + // troubleshooting. + // if (verb >= 2) print_process (env, args); @@ -542,12 +731,19 @@ namespace bpkg // just the file name (rather than the whole path) in the progress // report. Process exceptions must be handled by the caller. // - return fo - ? process (pp, args.data (), - 0, 1, 2, - out.directory ().string ().c_str (), - env.vars) - : process (pp, args.data (), 0, -1, 2, nullptr /* cwd */, env.vars); + process pr (fo + ? process (pp, args.data (), + 0, 1, 2, + out.directory ().string ().c_str (), + env.vars) + : process (pp, args.data (), + 0, -1, out_is != nullptr ? -1 : 2, + nullptr /* cwd */, env.vars)); + + if (out_is != nullptr) + out_is->open (move (pr.in_ofd), out_ism); + + return make_pair (move (pr), 0); } // The dispatcher. @@ -670,22 +866,38 @@ namespace bpkg return kind_; } - process + static pair start_fetch (const common_options& o, const string& src, + ifdstream* out_is, + fdstream_mode out_ism, + bool quiet, const path& out, const string& user_agent, const url& proxy) { - process (*f) (const path&, - const optional&, - bool, - bool, - const strings&, - const string&, - const path&, - const string&, - const string&) = nullptr; + // Currently, for the sake of simplicity, we don't support retrieving the + // HTTP status code if we fetch into a file. + // + assert (out.empty () || out_is == nullptr); + + // Quiet mode is only meaningful if HTTP status code needs to be + // retrieved. + // + assert (!quiet || out_is != nullptr); + + pair (*f) (const path&, + const optional&, + bool, + bool, + bool, + const strings&, + const string&, + ifdstream*, + fdstream_mode, + const path&, + const string&, + const string&) = nullptr; fetch_kind fk (check (o)); switch (fk) @@ -793,8 +1005,11 @@ namespace bpkg timeout, o.progress (), o.no_progress (), + quiet, os, !http_url.empty () ? http_url : src, + out_is, + out_ism, out, user_agent, http_proxy); @@ -809,4 +1024,40 @@ namespace bpkg throw failed (); } } + + process + start_fetch (const common_options& o, + const string& src, + const path& out, + const string& user_agent, + const url& proxy) + { + return start_fetch (o, + src, + nullptr /* out_is */, + fdstream_mode::none, + false /* quiet */, + out, + user_agent, + proxy).first; + } + + pair + start_fetch_http (const common_options& o, + const string& src, + ifdstream& out, + fdstream_mode out_mode, + bool quiet, + const string& user_agent, + const url& proxy) + { + return start_fetch (o, + src, + &out, + out_mode, + quiet, + path () /* out */, + user_agent, + proxy); + } } diff --git a/bpkg/fetch.hxx b/bpkg/fetch.hxx index c077079..78e77a7 100644 --- a/bpkg/fetch.hxx +++ b/bpkg/fetch.hxx @@ -138,11 +138,41 @@ namespace bpkg // option for details). // process - start_fetch (const common_options& o, + start_fetch (const common_options&, const string& url, const path& out = {}, const string& user_agent = {}, const butl::url& proxy = {}); + + // Similar to the above but can only be used for HTTP(S) URLs. Additionally + // return the HTTP status code, if the underlying fetch program provides an + // easy way to retrieve it, and 0 otherwise. + // + // In the former case redirect the fetch process stderr to a pipe, so that + // depending on the returned status code the caller can either drop or dump + // the fetch process diagnostics. May also redirect stderr in the latter + // case for some implementation-specific reasons (to prevent the underlying + // fetch program from interacting with the user, etc). The caller can detect + // whether stderr is redirected by checking process::in_efd. + // + // In contrast to start_fetch() always fetch to stdout, which can be read by + // the caller from the specified stream. + // + // If the quiet argument is true, then, if stderr is redirected, minimize + // the amount of diagnostics printed by the fetch program by only printing + // errors. That allows the caller to read stdout and stderr streams + // sequentially in the blocking mode by assuming that the diagnostics always + // fits into the pipe buffer. If stderr is not redirected, then the quiet + // argument is ignored in favor of the more informative diagnostics. + // + pair + start_fetch_http (const common_options&, + const string& url, + ifdstream& out, + fdstream_mode out_mode, + bool quiet, + const string& user_agent = {}, + const butl::url& proxy = {}); } #endif // BPKG_FETCH_HXX diff --git a/bpkg/utility.cxx b/bpkg/utility.cxx index 467e01f..52114df 100644 --- a/bpkg/utility.cxx +++ b/bpkg/utility.cxx @@ -366,4 +366,23 @@ namespace bpkg ? co.build ().string ().c_str () : BPKG_EXE_PREFIX "b" BPKG_EXE_SUFFIX; } + + void + dump_stderr (auto_fd&& fd) + { + ifdstream is (move (fd), fdstream_mode::skip, ifdstream::badbit); + + // We could probably write something like this, instead: + // + // *diag_stream << is.rdbuf () << flush; + // + // However, it would never throw and we could potentially miss the reading + // failure, unless we decide to additionally mess with the diagnostics + // stream exception mask. + // + for (string l; !eof (getline (is, l)); ) + *diag_stream << l << endl; + + is.close (); + } } diff --git a/bpkg/utility.hxx b/bpkg/utility.hxx index 1f5c725..8e7260a 100644 --- a/bpkg/utility.hxx +++ b/bpkg/utility.hxx @@ -246,6 +246,12 @@ namespace bpkg template void run_b (const common_options&, verb_b, A&&... args); + + // Read out the data from the specified file descriptor and dump it to + // stderr. Throw io_error on the underlying OS errors. + // + void + dump_stderr (auto_fd&&); } #include -- cgit v1.1