From fd4439e4d067f77642b3f3dfcaf50d605e69235f Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Fri, 21 Jul 2023 20:19:15 +0300 Subject: Advice user to re-fetch repositories if package fetch ends up with 404 HTTP status code (GH issue #299) --- bpkg/fetch-git.cxx | 2 +- bpkg/fetch-pkg.cxx | 76 +++++++++++++++++--- bpkg/fetch.cxx | 170 +++++++++++++++++++++++++++++++++------------ bpkg/fetch.hxx | 68 ++++++++++++------ bpkg/pkg-build-collect.cxx | 4 +- 5 files changed, 242 insertions(+), 78 deletions(-) diff --git a/bpkg/fetch-git.cxx b/bpkg/fetch-git.cxx index 44387de..d2c30a1 100644 --- a/bpkg/fetch-git.cxx +++ b/bpkg/fetch-git.cxx @@ -603,7 +603,7 @@ namespace bpkg u, is /* out */, fdstream_mode::skip | fdstream_mode::binary, - true /* quiet */, + stderr_mode::redirect_quiet, "git/" + git_ver.string ())); process& pr (ps.first); diff --git a/bpkg/fetch-pkg.cxx b/bpkg/fetch-pkg.cxx index ca4767f..721e4b8 100644 --- a/bpkg/fetch-pkg.cxx +++ b/bpkg/fetch-pkg.cxx @@ -97,20 +97,80 @@ namespace bpkg if (exists (df)) fail << "file " << df << " already exists"; + // Currently we only expect fetching a package archive via the HTTP(S) + // protocol. + // + switch (u.scheme) + { + case repository_protocol::git: + case repository_protocol::ssh: + case repository_protocol::file: assert (false); + case repository_protocol::http: + case repository_protocol::https: break; + } + auto_rmfile arm (df); - process pr (start_fetch (o, - u.string (), - df, - string () /* user_agent */, - o.pkg_proxy ())); - if (!pr.wait ()) + // Note that a package file may not be present in the repository due to + // outdated repository information. Thus, while fetching the file we also + // try to retrieve the HTTP status code. If the HTTP status code is + // retrieved and is 404 (not found) or the fetch program doesn't support + // its retrieval and fails, then we also advise the user to re-fetch the + // repositories. + // + pair ps ( + start_fetch_http (o, + u.string (), + df, + string () /* user_agent */, + o.pkg_proxy ())); + + process& pr (ps.first); + uint16_t sc (ps.second); + + // Fail if the fetch process didn't exit normally with 0 code or the HTTP + // status code is retrieved and differs from 200. + // + // Note that the diagnostics may potentially look as follows: + // + // foo-1.0.0.tar.gz: + // ###################################################### 100.0% + // error: unable to fetch package https://example.org/1/foo-1.0.0.tar.gz + // info: repository metadata could be stale + // info: run 'bpkg rep-fetch' (or equivalent) to update + // + // It's a bit unfortunate that the 100% progress indicator can be shown + // for a potential HTTP error and it doesn't seem that we can easily fix + // that. Note, however, that this situation is not very common and + // probably that's fine. + // + if (!pr.wait () || (sc != 0 && sc != 200)) { // While it is reasonable to assuming the child process issued // diagnostics, some may not mention the URL. // - fail << "unable to fetch " << u << - info << "re-run with -v for more information"; + diag_record dr (fail); + dr << "unable to fetch package " << u; + + // Print the HTTP status code in the diagnostics on the request failure, + // unless it cannot be retrieved or is 404. Note that the fetch program + // may even exit successfully on such a failure (see start_fetch_http() + // for details) and issue no diagnostics at all. + // + if (sc != 0 && sc != 200 && sc != 404) + dr << info << "HTTP status code " << sc; + + // If not found, advise the user to re-fetch the repositories. Note that + // if the status code cannot be retrieved, we assume it could be 404 and + // advise. + // + if (sc == 404 || sc == 0) + { + dr << info << "repository metadata could be stale" << + info << "run 'bpkg rep-fetch' (or equivalent) to update"; + } + else if (verb < 2) + dr << info << "re-run with -v for more information"; } arm.cancel (); diff --git a/bpkg/fetch.cxx b/bpkg/fetch.cxx index a9e4df2..4deee0a 100644 --- a/bpkg/fetch.cxx +++ b/bpkg/fetch.cxx @@ -6,6 +6,7 @@ #include using namespace std; +using namespace butl; namespace bpkg { @@ -87,14 +88,14 @@ namespace bpkg // Note that there is no easy way to retrieve the HTTP status code for wget // (there is no reliable way to redirect the status line/headers to stdout) // and thus we always return 0. Due to the status code unavailability there - // is no need to redirect stderr and thus we ignore the quiet mode. + // is no need to redirect stderr and thus we ignore the stderr mode. // static pair start_wget (const path& prog, const optional& timeout, bool progress, bool no_progress, - bool /* quiet */, + stderr_mode, const strings& ops, const string& url, ifdstream* out_is, @@ -239,7 +240,7 @@ namespace bpkg 0, -1, 2, nullptr /* cwd */, env.vars)); - if (out_is != nullptr) + if (!fo && out_is != nullptr) out_is->open (move (pr.in_ofd), out_ism); return make_pair (move (pr), 0); @@ -290,17 +291,19 @@ namespace bpkg return false; } - // If HTTP status code needs to be retrieved, then open the passed stream - // and read out the status line(s) extracting the status code and the - // headers. Otherwise, return 0 indicating that the status code is not - // available. In the former case redirect stderr and respect the quiet mode. + // If HTTP status code needs to be retrieved (out_is != NULL), then open the + // passed stream and read out the status line(s) extracting the status code + // and the headers. Otherwise, return 0 indicating that the status code is + // not available. In the former case if the output file is also specified, + // then read out and save the file if the status code is 200 and drop the + // HTTP response body otherwise. // static pair start_curl (const path& prog, const optional& timeout, bool progress, bool no_progress, - bool quiet, + stderr_mode err_mode, const strings& ops, const string& url, ifdstream* out_is, @@ -335,6 +338,8 @@ namespace bpkg // Higher than that -- run it verbose. Always show the progress // bar if requested explicitly, even in the quiet mode. // + bool quiet (err_mode == stderr_mode::redirect_quiet); + if (!quiet) { if (verb < (fo ? 1 : 2)) @@ -380,7 +385,7 @@ namespace bpkg // Output. By default curl writes to stdout. // - if (fo) + if (fo && out_is == nullptr) // Output to file and don't query HTTP status? { args.push_back ("-o"); args.push_back (out.string ().c_str ()); @@ -407,11 +412,7 @@ namespace bpkg // print the response status line and headers. // if (out_is != nullptr) - { - assert (!fo); // Currently unsupported (see start_fetch() for details). - args.push_back ("-i"); - } else args.push_back ("-f"); @@ -435,10 +436,10 @@ namespace bpkg // Process exceptions must be handled by the caller. // - process pr (fo + process pr (fo && out_is == nullptr ? process (pp, args.data ()) : process (pp, args.data (), - 0, -1, out_is != nullptr ? -1 : 2)); + 0, -1, err_mode == stderr_mode::pass ? 2 : -1)); // Close the process stdout stream and read stderr stream out and dump. // @@ -446,13 +447,16 @@ namespace bpkg // blocked writing to stdout and so that stderr get dumped before the // error message we issue. // - auto close_streams = [&pr, out_is] () + auto close_streams = [&pr, out_is, err_mode] () { try { + assert (out_is != nullptr); + out_is->close (); - bpkg::dump_stderr (move (pr.in_efd)); + if (err_mode != stderr_mode::pass) + bpkg::dump_stderr (move (pr.in_efd)); } catch (const io_error&) { @@ -476,12 +480,13 @@ namespace bpkg // reason for the exception mask choice. When done, we will restore the // original exception mask. // - ifdstream::iostate es (out_is->exceptions ()); + ifdstream& is (*out_is); + ifdstream::iostate es (is.exceptions ()); - out_is->exceptions ( + is.exceptions ( ifdstream::badbit | ifdstream::failbit | ifdstream::eofbit); - out_is->open (move (pr.in_ofd), out_ism); + is.open (move (pr.in_ofd), out_ism); // Parse and return the HTTP status code. Return 0 if the argument is // invalid. @@ -500,10 +505,10 @@ namespace bpkg // Read the CRLF-terminated line from the stream stripping the trailing // CRLF. // - auto read_line = [out_is] () + auto read_line = [&is] () { string l; - getline (*out_is, l); // Strips the trailing LF (0xA). + getline (is, l); // Strips the trailing LF (0xA). // Note that on POSIX CRLF is not automatically translated into LF, so // we need to strip CR (0xD) manually. @@ -559,7 +564,7 @@ namespace bpkg while (!read_line ().empty ()) ; // Skips headers. - out_is->exceptions (es); + is.exceptions (es); } catch (const io_error&) { @@ -568,6 +573,57 @@ namespace bpkg fail << "unable to read HTTP response status line for " << url; } + // If the output file is specified and the HTTP status code needs to also + // be retrieved, then read out and save the file if the status code is 200 + // and drop the HTTP response body otherwise. + // + bool io_read; // If true then io_error relates to a read operation. + if (fo && out_is != nullptr) + try + { + ifdstream& is (*out_is); + + // Read and save the file if the HTTP status code is 200. + // + if (sc == 200) + { + io_read = false; + ofdstream os (out, fdopen_mode::binary); + + bufstreambuf* buf (dynamic_cast (is.rdbuf ())); + assert (buf != nullptr); + + for (io_read = true; + is.peek () != istream::traits_type::eof (); // Potentially reads. + io_read = true) + { + size_t n (buf->egptr () - buf->gptr ()); + + io_read = false; + os.write (buf->gptr (), n); + + buf->gbump (static_cast (n)); + } + + io_read = false; + os.close (); + } + + // Close the stream, skipping the remaining content, if present. + // + io_read = true; + is.close (); + } + catch (const io_error& e) + { + close_streams (); + + if (io_read) + fail << "unable to read fetched " << url << ": " << e; + else + fail << "unable to write to " << out << ": " << e; + } + return make_pair (move (pr), sc); } @@ -621,17 +677,17 @@ namespace bpkg // Note that there is no easy way to retrieve the HTTP status code for the // fetch program and thus we always return 0. // - // Also note that in the HTTP status code retrieval mode (out_is != NULL) we - // nevertheless redirect stderr to prevent the fetch program from - // interactively querying the user for the credentials. Thus, we also - // respect the quiet mode in contrast to start_wget(). + // Also note that in the redirect* stderr modes we nevertheless redirect + // stderr to prevent the fetch program from interactively querying the user + // for the credentials. Thus, we also respect the redirect_quiet mode in + // contrast to start_wget(). // static pair start_fetch (const path& prog, const optional& timeout, bool progress, bool no_progress, - bool quiet, + stderr_mode err_mode, const strings& ops, const string& url, ifdstream* out_is, @@ -667,6 +723,8 @@ namespace bpkg // unless the verbosity level is greater than three, in which case we will // run verbose (and with progress). That's the best we can do. // + bool quiet (err_mode == stderr_mode::redirect_quiet); + if (!quiet) { if (verb < (fo ? 1 : 2)) @@ -745,10 +803,10 @@ namespace bpkg out.directory ().string ().c_str (), env.vars) : process (pp, args.data (), - 0, -1, out_is != nullptr ? -1 : 2, + 0, -1, err_mode == stderr_mode::pass ? 2 : -1, nullptr /* cwd */, env.vars)); - if (out_is != nullptr) + if (!fo && out_is != nullptr) out_is->open (move (pr.in_ofd), out_ism); return make_pair (move (pr), 0); @@ -879,26 +937,29 @@ namespace bpkg const string& src, ifdstream* out_is, fdstream_mode out_ism, - bool quiet, + stderr_mode err_mode, const path& out, const string& user_agent, const url& proxy) { - // Currently, for the sake of simplicity, we don't support retrieving the - // HTTP status code if we fetch into a file. + // Currently, for the sake of simplicity, we don't support redirecting + // stderr if we fetch into a file. // - assert (out.empty () || out_is == nullptr); - - // Quiet mode is only meaningful if HTTP status code needs to be - // retrieved. + assert (out.empty () || err_mode == stderr_mode::pass); + + // If out_is is not NULL and out is not empty, then the former argument is + // unused by the caller and only indicates that the HTTP status code still + // needs to be retrieved while the requested file needs to be saved. In + // this case if the fetch program doesn't provide an easy way to retrieve + // the HTTP status code, then the respective start_*() function can just + // ignore the referred stream. Otherwise, it may or may not use it for + // convenience but should close it before returning if it does. // - assert (!quiet || out_is != nullptr); - pair (*f) (const path&, const optional&, bool, bool, - bool, + stderr_mode, const strings&, const string&, ifdstream*, @@ -1013,7 +1074,7 @@ namespace bpkg timeout, o.progress (), o.no_progress (), - quiet, + err_mode, os, !http_url.empty () ? http_url : src, out_is, @@ -1044,7 +1105,7 @@ namespace bpkg src, nullptr /* out_is */, fdstream_mode::none, - false /* quiet */, + stderr_mode::pass, out, user_agent, proxy).first; @@ -1055,7 +1116,7 @@ namespace bpkg const string& src, ifdstream& out, fdstream_mode out_mode, - bool quiet, + stderr_mode err_mode, const string& user_agent, const url& proxy) { @@ -1063,9 +1124,30 @@ namespace bpkg src, &out, out_mode, - quiet, + err_mode, path () /* out */, user_agent, proxy); } + + pair + start_fetch_http (const common_options& o, + const string& src, + const path& out, + const string& user_agent, + const url& proxy) + { + assert (!out.empty ()); + + ifdstream is (ifdstream::badbit | ifdstream::failbit); + + return start_fetch (o, + src, + &is, + fdstream_mode::skip | fdstream_mode::binary, + stderr_mode::pass, + out, + user_agent, + proxy); + } } diff --git a/bpkg/fetch.hxx b/bpkg/fetch.hxx index 6578f0d..daf1ffe 100644 --- a/bpkg/fetch.hxx +++ b/bpkg/fetch.hxx @@ -144,35 +144,57 @@ namespace bpkg const string& user_agent = {}, const butl::url& proxy = {}); - // Similar to the above but can only be used for HTTP(S) URLs. Additionally - // return the HTTP status code, if the underlying fetch program provides an - // easy way to retrieve it, and 0 otherwise. - // - // In the former case redirect the fetch process stderr to a pipe, so that - // depending on the returned status code the caller can either drop or dump - // the fetch process diagnostics. May also redirect stderr in the latter - // case for some implementation-specific reasons (to prevent the underlying - // fetch program from interacting with the user, etc). The caller can detect - // whether stderr is redirected by checking process::in_efd. - // - // In contrast to start_fetch() always fetch to stdout, which can be read by - // the caller from the specified stream. On HTTP errors (e.g., 404) this - // stream may contain the error description returned by the server and the - // process may exit with 0 code. - // - // If the quiet argument is true, then, if stderr is redirected, minimize - // the amount of diagnostics printed by the fetch program by only printing - // errors. That allows the caller to read stdout and stderr streams - // sequentially in the blocking mode by assuming that the diagnostics always - // fits into the pipe buffer. If stderr is not redirected, then the quiet - // argument is ignored in favor of the more informative diagnostics. + // Similar to the above but can only be used for fetching HTTP(S) URL to a + // file. Additionally return the HTTP status code, if the underlying fetch + // program provides an easy way to retrieve it, and 0 otherwise. // pair start_fetch_http (const common_options&, const string& url, + const path& out, + const string& user_agent = {}, + const butl::url& proxy = {}); + + // As above but fetches HTTP(S) URL to stdout, which can be read by the + // caller from the specified stream. On HTTP errors (e.g., 404) this stream + // may contain the error description returned by the server and the process + // may exit with 0 code. + // + // Fetch process stderr redirect mode. + // + enum class stderr_mode + { + // Don't redirect stderr. + // + pass, + + // If the underlying fetch program provides an easy way to retrieve the + // HTTP status code, then redirect the fetch process stderr to a pipe, so + // that depending on the returned status code the caller can either drop + // or dump the fetch process diagnostics. Otherwise, may still redirect + // stderr for some implementation-specific reasons (to prevent the + // underlying fetch program from interacting with the user, etc). The + // caller can detect whether stderr is redirected or not by checking + // process::in_efd. + // + redirect, + + // As above but if stderr is redirected, minimize the amount of + // diagnostics printed by the fetch program by only printing errors. That + // allows the caller to read stdout and stderr streams sequentially in the + // blocking mode by assuming that the diagnostics always fits into the + // pipe buffer. If stderr is not redirected, then ignore this mode in + // favor of the more informative diagnostics. + // + redirect_quiet + }; + + pair + start_fetch_http (const common_options&, + const string& url, ifdstream& out, fdstream_mode out_mode, - bool quiet, + stderr_mode, const string& user_agent = {}, const butl::url& proxy = {}); } diff --git a/bpkg/pkg-build-collect.cxx b/bpkg/pkg-build-collect.cxx index 9a60d4a..c581f5c 100644 --- a/bpkg/pkg-build-collect.cxx +++ b/bpkg/pkg-build-collect.cxx @@ -2586,8 +2586,8 @@ namespace bpkg (!dep_constr || system)) dr << info << "repository " << af->location << " appears " << "to be broken" << - info << "or the repository state could be stale" << - info << "run 'bpkg rep-fetch' to update"; + info << "or the repository metadata could be stale" << + info << "run 'bpkg rep-fetch' (or equivalent) to update"; } // If all that's available is a stub then we need to make sure -- cgit v1.1