aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2023-07-21 20:19:15 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2023-07-25 17:09:35 +0300
commitfd4439e4d067f77642b3f3dfcaf50d605e69235f (patch)
treee72cabbd3e931021a8ccbf80943fb6d9cc8531b6
parent8468f0beac9b792d7e2621f1a78a485ed542fb1d (diff)
Advice user to re-fetch repositories if package fetch ends up with 404 HTTP status code (GH issue #299)
-rw-r--r--bpkg/fetch-git.cxx2
-rw-r--r--bpkg/fetch-pkg.cxx76
-rw-r--r--bpkg/fetch.cxx170
-rw-r--r--bpkg/fetch.hxx68
-rw-r--r--bpkg/pkg-build-collect.cxx4
5 files changed, 242 insertions, 78 deletions
diff --git a/bpkg/fetch-git.cxx b/bpkg/fetch-git.cxx
index 44387de..d2c30a1 100644
--- a/bpkg/fetch-git.cxx
+++ b/bpkg/fetch-git.cxx
@@ -603,7 +603,7 @@ namespace bpkg
u,
is /* out */,
fdstream_mode::skip | fdstream_mode::binary,
- true /* quiet */,
+ stderr_mode::redirect_quiet,
"git/" + git_ver.string ()));
process& pr (ps.first);
diff --git a/bpkg/fetch-pkg.cxx b/bpkg/fetch-pkg.cxx
index ca4767f..721e4b8 100644
--- a/bpkg/fetch-pkg.cxx
+++ b/bpkg/fetch-pkg.cxx
@@ -97,20 +97,80 @@ namespace bpkg
if (exists (df))
fail << "file " << df << " already exists";
+ // Currently we only expect fetching a package archive via the HTTP(S)
+ // protocol.
+ //
+ switch (u.scheme)
+ {
+ case repository_protocol::git:
+ case repository_protocol::ssh:
+ case repository_protocol::file: assert (false);
+ case repository_protocol::http:
+ case repository_protocol::https: break;
+ }
+
auto_rmfile arm (df);
- process pr (start_fetch (o,
- u.string (),
- df,
- string () /* user_agent */,
- o.pkg_proxy ()));
- if (!pr.wait ())
+ // Note that a package file may not be present in the repository due to
+ // outdated repository information. Thus, while fetching the file we also
+ // try to retrieve the HTTP status code. If the HTTP status code is
+ // retrieved and is 404 (not found) or the fetch program doesn't support
+ // its retrieval and fails, then we also advise the user to re-fetch the
+ // repositories.
+ //
+ pair<process, uint16_t> ps (
+ start_fetch_http (o,
+ u.string (),
+ df,
+ string () /* user_agent */,
+ o.pkg_proxy ()));
+
+ process& pr (ps.first);
+ uint16_t sc (ps.second);
+
+ // Fail if the fetch process didn't exit normally with 0 code or the HTTP
+ // status code is retrieved and differs from 200.
+ //
+ // Note that the diagnostics may potentially look as follows:
+ //
+ // foo-1.0.0.tar.gz:
+ // ###################################################### 100.0%
+ // error: unable to fetch package https://example.org/1/foo-1.0.0.tar.gz
+ // info: repository metadata could be stale
+ // info: run 'bpkg rep-fetch' (or equivalent) to update
+ //
+ // It's a bit unfortunate that the 100% progress indicator can be shown
+ // for a potential HTTP error and it doesn't seem that we can easily fix
+ // that. Note, however, that this situation is not very common and
+ // probably that's fine.
+ //
+ if (!pr.wait () || (sc != 0 && sc != 200))
{
// While it is reasonable to assuming the child process issued
// diagnostics, some may not mention the URL.
//
- fail << "unable to fetch " << u <<
- info << "re-run with -v for more information";
+ diag_record dr (fail);
+ dr << "unable to fetch package " << u;
+
+ // Print the HTTP status code in the diagnostics on the request failure,
+ // unless it cannot be retrieved or is 404. Note that the fetch program
+ // may even exit successfully on such a failure (see start_fetch_http()
+ // for details) and issue no diagnostics at all.
+ //
+ if (sc != 0 && sc != 200 && sc != 404)
+ dr << info << "HTTP status code " << sc;
+
+ // If not found, advise the user to re-fetch the repositories. Note that
+ // if the status code cannot be retrieved, we assume it could be 404 and
+ // advise.
+ //
+ if (sc == 404 || sc == 0)
+ {
+ dr << info << "repository metadata could be stale" <<
+ info << "run 'bpkg rep-fetch' (or equivalent) to update";
+ }
+ else if (verb < 2)
+ dr << info << "re-run with -v for more information";
}
arm.cancel ();
diff --git a/bpkg/fetch.cxx b/bpkg/fetch.cxx
index a9e4df2..4deee0a 100644
--- a/bpkg/fetch.cxx
+++ b/bpkg/fetch.cxx
@@ -6,6 +6,7 @@
#include <bpkg/diagnostics.hxx>
using namespace std;
+using namespace butl;
namespace bpkg
{
@@ -87,14 +88,14 @@ namespace bpkg
// Note that there is no easy way to retrieve the HTTP status code for wget
// (there is no reliable way to redirect the status line/headers to stdout)
// and thus we always return 0. Due to the status code unavailability there
- // is no need to redirect stderr and thus we ignore the quiet mode.
+ // is no need to redirect stderr and thus we ignore the stderr mode.
//
static pair<process, uint16_t>
start_wget (const path& prog,
const optional<size_t>& timeout,
bool progress,
bool no_progress,
- bool /* quiet */,
+ stderr_mode,
const strings& ops,
const string& url,
ifdstream* out_is,
@@ -239,7 +240,7 @@ namespace bpkg
0, -1, 2,
nullptr /* cwd */, env.vars));
- if (out_is != nullptr)
+ if (!fo && out_is != nullptr)
out_is->open (move (pr.in_ofd), out_ism);
return make_pair (move (pr), 0);
@@ -290,17 +291,19 @@ namespace bpkg
return false;
}
- // If HTTP status code needs to be retrieved, then open the passed stream
- // and read out the status line(s) extracting the status code and the
- // headers. Otherwise, return 0 indicating that the status code is not
- // available. In the former case redirect stderr and respect the quiet mode.
+ // If HTTP status code needs to be retrieved (out_is != NULL), then open the
+ // passed stream and read out the status line(s) extracting the status code
+ // and the headers. Otherwise, return 0 indicating that the status code is
+ // not available. In the former case if the output file is also specified,
+ // then read out and save the file if the status code is 200 and drop the
+ // HTTP response body otherwise.
//
static pair<process, uint16_t>
start_curl (const path& prog,
const optional<size_t>& timeout,
bool progress,
bool no_progress,
- bool quiet,
+ stderr_mode err_mode,
const strings& ops,
const string& url,
ifdstream* out_is,
@@ -335,6 +338,8 @@ namespace bpkg
// Higher than that -- run it verbose. Always show the progress
// bar if requested explicitly, even in the quiet mode.
//
+ bool quiet (err_mode == stderr_mode::redirect_quiet);
+
if (!quiet)
{
if (verb < (fo ? 1 : 2))
@@ -380,7 +385,7 @@ namespace bpkg
// Output. By default curl writes to stdout.
//
- if (fo)
+ if (fo && out_is == nullptr) // Output to file and don't query HTTP status?
{
args.push_back ("-o");
args.push_back (out.string ().c_str ());
@@ -407,11 +412,7 @@ namespace bpkg
// print the response status line and headers.
//
if (out_is != nullptr)
- {
- assert (!fo); // Currently unsupported (see start_fetch() for details).
-
args.push_back ("-i");
- }
else
args.push_back ("-f");
@@ -435,10 +436,10 @@ namespace bpkg
// Process exceptions must be handled by the caller.
//
- process pr (fo
+ process pr (fo && out_is == nullptr
? process (pp, args.data ())
: process (pp, args.data (),
- 0, -1, out_is != nullptr ? -1 : 2));
+ 0, -1, err_mode == stderr_mode::pass ? 2 : -1));
// Close the process stdout stream and read stderr stream out and dump.
//
@@ -446,13 +447,16 @@ namespace bpkg
// blocked writing to stdout and so that stderr get dumped before the
// error message we issue.
//
- auto close_streams = [&pr, out_is] ()
+ auto close_streams = [&pr, out_is, err_mode] ()
{
try
{
+ assert (out_is != nullptr);
+
out_is->close ();
- bpkg::dump_stderr (move (pr.in_efd));
+ if (err_mode != stderr_mode::pass)
+ bpkg::dump_stderr (move (pr.in_efd));
}
catch (const io_error&)
{
@@ -476,12 +480,13 @@ namespace bpkg
// reason for the exception mask choice. When done, we will restore the
// original exception mask.
//
- ifdstream::iostate es (out_is->exceptions ());
+ ifdstream& is (*out_is);
+ ifdstream::iostate es (is.exceptions ());
- out_is->exceptions (
+ is.exceptions (
ifdstream::badbit | ifdstream::failbit | ifdstream::eofbit);
- out_is->open (move (pr.in_ofd), out_ism);
+ is.open (move (pr.in_ofd), out_ism);
// Parse and return the HTTP status code. Return 0 if the argument is
// invalid.
@@ -500,10 +505,10 @@ namespace bpkg
// Read the CRLF-terminated line from the stream stripping the trailing
// CRLF.
//
- auto read_line = [out_is] ()
+ auto read_line = [&is] ()
{
string l;
- getline (*out_is, l); // Strips the trailing LF (0xA).
+ getline (is, l); // Strips the trailing LF (0xA).
// Note that on POSIX CRLF is not automatically translated into LF, so
// we need to strip CR (0xD) manually.
@@ -559,7 +564,7 @@ namespace bpkg
while (!read_line ().empty ()) ; // Skips headers.
- out_is->exceptions (es);
+ is.exceptions (es);
}
catch (const io_error&)
{
@@ -568,6 +573,57 @@ namespace bpkg
fail << "unable to read HTTP response status line for " << url;
}
+ // If the output file is specified and the HTTP status code needs to also
+ // be retrieved, then read out and save the file if the status code is 200
+ // and drop the HTTP response body otherwise.
+ //
+ bool io_read; // If true then io_error relates to a read operation.
+ if (fo && out_is != nullptr)
+ try
+ {
+ ifdstream& is (*out_is);
+
+ // Read and save the file if the HTTP status code is 200.
+ //
+ if (sc == 200)
+ {
+ io_read = false;
+ ofdstream os (out, fdopen_mode::binary);
+
+ bufstreambuf* buf (dynamic_cast<bufstreambuf*> (is.rdbuf ()));
+ assert (buf != nullptr);
+
+ for (io_read = true;
+ is.peek () != istream::traits_type::eof (); // Potentially reads.
+ io_read = true)
+ {
+ size_t n (buf->egptr () - buf->gptr ());
+
+ io_read = false;
+ os.write (buf->gptr (), n);
+
+ buf->gbump (static_cast<int> (n));
+ }
+
+ io_read = false;
+ os.close ();
+ }
+
+ // Close the stream, skipping the remaining content, if present.
+ //
+ io_read = true;
+ is.close ();
+ }
+ catch (const io_error& e)
+ {
+ close_streams ();
+
+ if (io_read)
+ fail << "unable to read fetched " << url << ": " << e;
+ else
+ fail << "unable to write to " << out << ": " << e;
+ }
+
return make_pair (move (pr), sc);
}
@@ -621,17 +677,17 @@ namespace bpkg
// Note that there is no easy way to retrieve the HTTP status code for the
// fetch program and thus we always return 0.
//
- // Also note that in the HTTP status code retrieval mode (out_is != NULL) we
- // nevertheless redirect stderr to prevent the fetch program from
- // interactively querying the user for the credentials. Thus, we also
- // respect the quiet mode in contrast to start_wget().
+ // Also note that in the redirect* stderr modes we nevertheless redirect
+ // stderr to prevent the fetch program from interactively querying the user
+ // for the credentials. Thus, we also respect the redirect_quiet mode in
+ // contrast to start_wget().
//
static pair<process, uint16_t>
start_fetch (const path& prog,
const optional<size_t>& timeout,
bool progress,
bool no_progress,
- bool quiet,
+ stderr_mode err_mode,
const strings& ops,
const string& url,
ifdstream* out_is,
@@ -667,6 +723,8 @@ namespace bpkg
// unless the verbosity level is greater than three, in which case we will
// run verbose (and with progress). That's the best we can do.
//
+ bool quiet (err_mode == stderr_mode::redirect_quiet);
+
if (!quiet)
{
if (verb < (fo ? 1 : 2))
@@ -745,10 +803,10 @@ namespace bpkg
out.directory ().string ().c_str (),
env.vars)
: process (pp, args.data (),
- 0, -1, out_is != nullptr ? -1 : 2,
+ 0, -1, err_mode == stderr_mode::pass ? 2 : -1,
nullptr /* cwd */, env.vars));
- if (out_is != nullptr)
+ if (!fo && out_is != nullptr)
out_is->open (move (pr.in_ofd), out_ism);
return make_pair (move (pr), 0);
@@ -879,26 +937,29 @@ namespace bpkg
const string& src,
ifdstream* out_is,
fdstream_mode out_ism,
- bool quiet,
+ stderr_mode err_mode,
const path& out,
const string& user_agent,
const url& proxy)
{
- // Currently, for the sake of simplicity, we don't support retrieving the
- // HTTP status code if we fetch into a file.
+ // Currently, for the sake of simplicity, we don't support redirecting
+ // stderr if we fetch into a file.
//
- assert (out.empty () || out_is == nullptr);
-
- // Quiet mode is only meaningful if HTTP status code needs to be
- // retrieved.
+ assert (out.empty () || err_mode == stderr_mode::pass);
+
+ // If out_is is not NULL and out is not empty, then the former argument is
+ // unused by the caller and only indicates that the HTTP status code still
+ // needs to be retrieved while the requested file needs to be saved. In
+ // this case if the fetch program doesn't provide an easy way to retrieve
+ // the HTTP status code, then the respective start_*() function can just
+ // ignore the referred stream. Otherwise, it may or may not use it for
+ // convenience but should close it before returning if it does.
//
- assert (!quiet || out_is != nullptr);
-
pair<process, uint16_t> (*f) (const path&,
const optional<size_t>&,
bool,
bool,
- bool,
+ stderr_mode,
const strings&,
const string&,
ifdstream*,
@@ -1013,7 +1074,7 @@ namespace bpkg
timeout,
o.progress (),
o.no_progress (),
- quiet,
+ err_mode,
os,
!http_url.empty () ? http_url : src,
out_is,
@@ -1044,7 +1105,7 @@ namespace bpkg
src,
nullptr /* out_is */,
fdstream_mode::none,
- false /* quiet */,
+ stderr_mode::pass,
out,
user_agent,
proxy).first;
@@ -1055,7 +1116,7 @@ namespace bpkg
const string& src,
ifdstream& out,
fdstream_mode out_mode,
- bool quiet,
+ stderr_mode err_mode,
const string& user_agent,
const url& proxy)
{
@@ -1063,9 +1124,30 @@ namespace bpkg
src,
&out,
out_mode,
- quiet,
+ err_mode,
path () /* out */,
user_agent,
proxy);
}
+
+ pair<process, uint16_t>
+ start_fetch_http (const common_options& o,
+ const string& src,
+ const path& out,
+ const string& user_agent,
+ const url& proxy)
+ {
+ assert (!out.empty ());
+
+ ifdstream is (ifdstream::badbit | ifdstream::failbit);
+
+ return start_fetch (o,
+ src,
+ &is,
+ fdstream_mode::skip | fdstream_mode::binary,
+ stderr_mode::pass,
+ out,
+ user_agent,
+ proxy);
+ }
}
diff --git a/bpkg/fetch.hxx b/bpkg/fetch.hxx
index 6578f0d..daf1ffe 100644
--- a/bpkg/fetch.hxx
+++ b/bpkg/fetch.hxx
@@ -144,35 +144,57 @@ namespace bpkg
const string& user_agent = {},
const butl::url& proxy = {});
- // Similar to the above but can only be used for HTTP(S) URLs. Additionally
- // return the HTTP status code, if the underlying fetch program provides an
- // easy way to retrieve it, and 0 otherwise.
- //
- // In the former case redirect the fetch process stderr to a pipe, so that
- // depending on the returned status code the caller can either drop or dump
- // the fetch process diagnostics. May also redirect stderr in the latter
- // case for some implementation-specific reasons (to prevent the underlying
- // fetch program from interacting with the user, etc). The caller can detect
- // whether stderr is redirected by checking process::in_efd.
- //
- // In contrast to start_fetch() always fetch to stdout, which can be read by
- // the caller from the specified stream. On HTTP errors (e.g., 404) this
- // stream may contain the error description returned by the server and the
- // process may exit with 0 code.
- //
- // If the quiet argument is true, then, if stderr is redirected, minimize
- // the amount of diagnostics printed by the fetch program by only printing
- // errors. That allows the caller to read stdout and stderr streams
- // sequentially in the blocking mode by assuming that the diagnostics always
- // fits into the pipe buffer. If stderr is not redirected, then the quiet
- // argument is ignored in favor of the more informative diagnostics.
+ // Similar to the above but can only be used for fetching HTTP(S) URL to a
+ // file. Additionally return the HTTP status code, if the underlying fetch
+ // program provides an easy way to retrieve it, and 0 otherwise.
//
pair<process, uint16_t>
start_fetch_http (const common_options&,
const string& url,
+ const path& out,
+ const string& user_agent = {},
+ const butl::url& proxy = {});
+
+ // As above but fetches HTTP(S) URL to stdout, which can be read by the
+ // caller from the specified stream. On HTTP errors (e.g., 404) this stream
+ // may contain the error description returned by the server and the process
+ // may exit with 0 code.
+ //
+ // Fetch process stderr redirect mode.
+ //
+ enum class stderr_mode
+ {
+ // Don't redirect stderr.
+ //
+ pass,
+
+ // If the underlying fetch program provides an easy way to retrieve the
+ // HTTP status code, then redirect the fetch process stderr to a pipe, so
+ // that depending on the returned status code the caller can either drop
+ // or dump the fetch process diagnostics. Otherwise, may still redirect
+ // stderr for some implementation-specific reasons (to prevent the
+ // underlying fetch program from interacting with the user, etc). The
+ // caller can detect whether stderr is redirected or not by checking
+ // process::in_efd.
+ //
+ redirect,
+
+ // As above but if stderr is redirected, minimize the amount of
+ // diagnostics printed by the fetch program by only printing errors. That
+ // allows the caller to read stdout and stderr streams sequentially in the
+ // blocking mode by assuming that the diagnostics always fits into the
+ // pipe buffer. If stderr is not redirected, then ignore this mode in
+ // favor of the more informative diagnostics.
+ //
+ redirect_quiet
+ };
+
+ pair<process, uint16_t>
+ start_fetch_http (const common_options&,
+ const string& url,
ifdstream& out,
fdstream_mode out_mode,
- bool quiet,
+ stderr_mode,
const string& user_agent = {},
const butl::url& proxy = {});
}
diff --git a/bpkg/pkg-build-collect.cxx b/bpkg/pkg-build-collect.cxx
index 9a60d4a..c581f5c 100644
--- a/bpkg/pkg-build-collect.cxx
+++ b/bpkg/pkg-build-collect.cxx
@@ -2586,8 +2586,8 @@ namespace bpkg
(!dep_constr || system))
dr << info << "repository " << af->location << " appears "
<< "to be broken" <<
- info << "or the repository state could be stale" <<
- info << "run 'bpkg rep-fetch' to update";
+ info << "or the repository metadata could be stale" <<
+ info << "run 'bpkg rep-fetch' (or equivalent) to update";
}
// If all that's available is a stub then we need to make sure