aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2015-09-25 07:10:57 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2015-09-25 07:10:57 +0200
commit287c71ddc00f0db1436d557042b69c03dc448b13 (patch)
tree824d66203c59530b856151c94c4bd322eff28562
parent5ab55b3efa7be536af146c778ebe457462a886a6 (diff)
Add support for fetching remote repositories
Wget, curl, and (FreeBSD) fetch are supported.
-rw-r--r--bpkg/bpkg-version77
-rw-r--r--bpkg/bpkg.cxx4
-rw-r--r--bpkg/common-options.cli28
-rw-r--r--bpkg/fetch7
-rw-r--r--bpkg/fetch.cxx453
-rw-r--r--bpkg/rep-fetch.cxx21
-rwxr-xr-xbpkg/test.sh6
7 files changed, 576 insertions, 20 deletions
diff --git a/bpkg/bpkg-version b/bpkg/bpkg-version
new file mode 100644
index 0000000..381fa56
--- /dev/null
+++ b/bpkg/bpkg-version
@@ -0,0 +1,77 @@
+// file : bpkg/bpkg-version -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef BPKG_VERSION // Note: using the version macro itself.
+
+#include <bpkg/version> // LIBBPKG_VERSION
+
+// Version format is AABBCCDD where
+//
+// AA - major version number
+// BB - minor version number
+// CC - bugfix version number
+// DD - alpha / beta (DD + 50) version number
+//
+// When DD is not 00, 1 is subtracted from AABBCC. For example:
+//
+// Version AABBCCDD
+// 2.0.0 02000000
+// 2.1.0 02010000
+// 2.1.1 02010100
+// 2.2.0.a1 02019901
+// 3.0.0.b2 02999952
+//
+
+// Generally, we expect minor versions to be source code backwards-
+// compatible, thought we might have a minimum version requirement.
+//
+// AABBCCDD
+#if (LIBBPKG_VERSION < 10000 || \
+ LIBBPKG_VERSION > 990000)
+# error incompatible libbpkg version
+#endif
+
+// AABBCCDD
+#define BPKG_VERSION 10000
+#define BPKG_VERSION_STR "0.1.0"
+
+// User agent.
+//
+#if defined(_WIN32)
+# if defined(__MINGW32__)
+# define BPKG_OS "MinGW"
+# else
+# define BPKG_OS "Windows"
+# endif
+#elif defined(__linux)
+# define BPKG_OS "GNU/Linux"
+#elif defined(__APPLE__)
+# define BPKG_OS "MacOS"
+#elif defined(__CYGWIN__)
+# define BPKG_OS "Cygwin"
+#elif defined(__FreeBSD__)
+# define BPKG_OS "FreeBSD"
+#elif defined(__OpenBSD__)
+# define BPKG_OS "OpenBSD"
+#elif defined(__NetBSD__)
+# define BPKG_OS "NetBSD"
+#elif defined(__sun)
+# define BPKG_OS "Solaris"
+#elif defined(__hpux)
+# define BPKG_OS "HP-UX"
+#elif defined(_AIX)
+# define BPKG_OS "AIX"
+#elif defined(__unix)
+# define BPKG_OS "Unix"
+#elif defined(__posix)
+# define BPKG_OS "Posix"
+#else
+# define BPKG_OS "Other"
+#endif
+
+#define BPKG_USER_AGENT \
+ "bpkg/" BPKG_VERSION_STR " (" BPKG_OS "; +http://build2.org) " \
+ "libbpkg/" LIBBPKG_VERSION_STR
+
+#endif // BPKG_VERSION
diff --git a/bpkg/bpkg.cxx b/bpkg/bpkg.cxx
index ef74144..75684cb 100644
--- a/bpkg/bpkg.cxx
+++ b/bpkg/bpkg.cxx
@@ -10,6 +10,7 @@
#include <bpkg/diagnostics>
#include <bpkg/bpkg-options>
+#include <bpkg/bpkg-version>
// Commands.
//
@@ -72,7 +73,8 @@ try
if (bo.version ())
{
- cout << "bpkg 0.0.0" << endl
+ cout << "bpkg " << BPKG_VERSION_STR << "; " <<
+ "libbpkg " << LIBBPKG_VERSION_STR << endl
<< "Copyright (c) 2014-2015 Code Synthesis Ltd" << endl
<< "This is free software released under the MIT license." << endl;
return 0;
diff --git a/bpkg/common-options.cli b/bpkg/common-options.cli
index 25db69c..afeec77 100644
--- a/bpkg/common-options.cli
+++ b/bpkg/common-options.cli
@@ -2,6 +2,7 @@
// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
// license : MIT; see accompanying LICENSE file
+include <vector>;
include <cstdint>;
include <bpkg/types>;
@@ -29,10 +30,35 @@ namespace bpkg
further."
};
+ path --fetch
+ {
+ "<path>",
+ "The fetch program that should be used to download remote resources.
+ Currently, \cb{bpkg} recognizes \cb{curl}, \cb{wget}, and \cb{fetch}.
+ Note that the last component of <path> must contain one of these
+ names as a substring in order for \cb{bpkg} to recognize which
+ program is being used. You can also specify additional options that
+ should be passed to the fetch program with \cb{--fetch-option}.
+
+ If the fetch program is not specified, then \cb{bpkg} will try to
+ discover if one of the above program is available and use that.
+ Currently, \cb{bpkg} has the following preference order: \cb{wget}
+ 1.16 or higher (supports \cb{--show-progress}), \cb{curl},
+ \cb{wget}, and \cb{fetch}."
+ };
+
+ strings --fetch-option
+ {
+ "<opt>",
+ "Additional option that should be passed to the fetch program. See
+ \cb{--fetch} for more information on the fetch program. Repeat this
+ option to specify multiple fetch options."
+ };
+
// The following option is "fake" in that it is actually handled by
// argv_file_scanner. We have it here for documentation.
//
- std::string --options-file
+ string --options-file
{
"<file>",
"Read additional options from <file> with each option appearing on a
diff --git a/bpkg/fetch b/bpkg/fetch
index 5290107..49d8cd9 100644
--- a/bpkg/fetch
+++ b/bpkg/fetch
@@ -9,14 +9,17 @@
#include <bpkg/types>
#include <bpkg/utility>
+#include <bpkg/common-options>
namespace bpkg
{
repository_manifests fetch_repositories (const dir_path&);
- repository_manifests fetch_repositories (const repository_location&);
+ repository_manifests fetch_repositories (const common_options&,
+ const repository_location&);
package_manifests fetch_packages (const dir_path&);
- package_manifests fetch_packages (const repository_location&);
+ package_manifests fetch_packages (const common_options&,
+ const repository_location&);
}
#endif // BPKG_FETCH
diff --git a/bpkg/fetch.cxx b/bpkg/fetch.cxx
index 34445a8..10acf1f 100644
--- a/bpkg/fetch.cxx
+++ b/bpkg/fetch.cxx
@@ -5,6 +5,7 @@
#include <bpkg/fetch>
#include <fstream>
+#include <cstdint> // uint16_t
#include <butl/process>
#include <butl/fdstream>
@@ -13,12 +14,444 @@
#include <bpkg/manifest-parser>
#include <bpkg/diagnostics>
+#include <bpkg/bpkg-version>
using namespace std;
using namespace butl;
namespace bpkg
{
+ // wget
+ //
+ static uint16_t wget_major;
+ static uint16_t wget_minor;
+
+ static bool
+ check_wget (const path& prog)
+ {
+ tracer trace ("check_wget");
+
+ // wget --version prints the version to STDOUT and exits with 0
+ // status. The first line starts with "GNU Wget X.Y[.Z].
+ //
+ const char* args[] = {prog.string ().c_str (), "--version", nullptr};
+
+ if (verb >= 3)
+ print_process (args);
+
+ try
+ {
+ process pr (args, 0, -1); // Redirect STDOUT to a pipe.
+
+ ifdstream is (pr.in_ofd);
+ string l;
+ getline (is, l);
+
+ if (l.compare (0, 9, "GNU Wget ") != 0)
+ return false;
+
+ // Extract the version. If something goes wrong, set the version
+ // to 0 so that we treat it as a really old wget.
+ //
+ try
+ {
+ //l = "GNU Wget 1.8.1";
+ string s (l, 9);
+ size_t p;
+ wget_major = static_cast<uint16_t> (stoul (s, &p));
+
+ if (p != s.size () && s[p] == '.')
+ wget_minor = static_cast<uint16_t> (stoul (string (s, p + 1)));
+
+ level4 ([&]{trace << "version " << wget_major << '.' << wget_minor;});
+ }
+ catch (const std::exception&)
+ {
+ wget_major = 0;
+ wget_minor = 0;
+
+ level4 ([&]{trace << "unable to extract version from '" << l << "'";});
+ }
+
+ return pr.wait ();
+ }
+ catch (const process_error& e)
+ {
+ if (e.child ())
+ exit (1);
+
+ return false;
+ }
+ }
+
+ static process
+ start_wget (const path& prog, const strings& ops, const string& url)
+ {
+ string ua (BPKG_USER_AGENT " wget/" + to_string (wget_major) + "."
+ + to_string (wget_minor));
+
+ cstrings args {
+ prog.string ().c_str (),
+ "-U", ua.c_str ()
+ };
+
+ // Map verbosity level. If we are running quiet or at level 1,
+ // then run wget quiet. At level 2 and 3 run it at the default
+ // level (so we will print the command line and it will display
+ // the progress, error messages, etc). Higher than that -- run
+ // it with debug output.
+ //
+ // In the wget world quiet means don't print anything, not even
+ // error messages. There is also the -nv mode (aka "non-verbose")
+ // which prints error messages but also a useless info-line. So
+ // what we are going to do is run it quiet and hope for the best.
+ // If things go south, we suggest (in fetch_url()) below that the
+ // user re-runs the command with -v to see all the gory details.
+ //
+ if (verb < 2)
+ args.push_back ("-q");
+ else if (verb > 3)
+ args.push_back ("-d");
+
+ // Add extra options. The idea if that they may override what
+ // we have set before this point but not after (like -O below).
+ //
+ for (const string& o: ops)
+ args.push_back (o.c_str ());
+
+ args.push_back ("-O"); // Output to...
+ args.push_back ("-"); // ...STDOUT.
+ args.push_back (url.c_str ());
+ args.push_back (nullptr);
+
+ if (verb >= 2)
+ print_process (args);
+
+ return process (args.data (), 0, -1); // Failure handled by the caller.
+ }
+
+ // curl
+ //
+ static bool
+ check_curl (const path& prog)
+ {
+ // curl --version prints the version to STDOUT and exits with 0
+ // status. The first line starts with "curl X.Y.Z"
+ //
+ const char* args[] = {prog.string ().c_str (), "--version", nullptr};
+
+ if (verb >= 3)
+ print_process (args);
+
+ try
+ {
+ process pr (args, 0, -1); // Redirect STDOUT to a pipe.
+
+ ifdstream is (pr.in_ofd);
+ string l;
+ getline (is, l);
+
+ return l.compare (0, 5, "curl ") == 0 && pr.wait ();
+ }
+ catch (const process_error& e)
+ {
+ if (e.child ())
+ exit (1);
+
+ return false;
+ }
+ }
+
+ static process
+ start_curl (const path& prog, const strings& ops, const string& url)
+ {
+ cstrings args {
+ prog.string ().c_str (),
+ "-f", // Fail on HTTP errors (e.g., 404).
+ "-L", // Follow redirects.
+ "-A", (BPKG_USER_AGENT " curl")
+ };
+
+ // Map verbosity level. If we are running quiet or at level 1,
+ // then run curl quiet. At level 2 and 3 run it at the default
+ // level (so we will print the command line and it will display
+ // the progress). Higher than that -- run it verbose.
+ //
+ if (verb < 2)
+ {
+ args.push_back ("-s");
+ args.push_back ("-S"); // But show errors.
+ }
+ else if (verb > 3)
+ args.push_back ("-v");
+
+ // Add extra options. The idea is that they may override what
+ // we have set before this point but not after.
+ //
+ for (const string& o: ops)
+ args.push_back (o.c_str ());
+
+ args.push_back (url.c_str ());
+ args.push_back (nullptr);
+
+ if (verb >= 2)
+ print_process (args);
+
+ return process (args.data (), 0, -1); // Failure handled by the caller.
+ }
+
+ // fetch
+ //
+ static bool
+ check_fetch (const path& prog)
+ {
+ // This one doesn't have --version or --help. Running it without
+ // any arguments causes it to dump usage and exit with the error
+ // status. The usage starts with "usage: fetch " which will be
+ // our signature.
+ //
+ const char* args[] = {prog.string ().c_str (), nullptr};
+
+ if (verb >= 3)
+ print_process (args);
+
+ try
+ {
+ process pr (args, 0, -1, 1); // Redirect STDOUT and STDERR to a pipe.
+
+ ifdstream is (pr.in_ofd);
+ string l;
+ getline (is, l);
+
+ return l.compare (0, 13, "usage: fetch ") == 0;
+ }
+ catch (const process_error& e)
+ {
+ if (e.child ())
+ exit (1);
+
+ return false;
+ }
+ }
+
+ static process
+ start_fetch (const path& prog, const strings& ops, const string& url)
+ {
+ // -T|--timeout 120 seconds by default, leave it at that for now.
+ // -n|--no-mtime
+ //
+ cstrings args {
+ prog.string ().c_str (),
+ "--user-agent", (BPKG_USER_AGENT " fetch")
+ };
+
+ // Map verbosity level. If we are running quiet or at level 1,
+ // then run fetch quiet. At level 2 and 3 run it at the default
+ // level (so we will print the command line and it will display
+ // the progress). Higher than that -- run it verbose.
+ //
+ if (verb < 2)
+ args.push_back ("-q");
+ else if (verb > 3)
+ args.push_back ("-v");
+
+ // Add extra options. The idea is that they may override what
+ // we have set before this point but not after (like -o below).
+ //
+ for (const string& o: ops)
+ args.push_back (o.c_str ());
+
+ args.push_back ("-o"); // Output to...
+ args.push_back ("-"); // ...STDOUT.
+ args.push_back (url.c_str ());
+ args.push_back (nullptr);
+
+ if (verb >= 2)
+ print_process (args);
+
+ return process (args.data (), 0, -1); // Failure handled by the caller.
+ }
+
+ // The dispatcher.
+ //
+ // Cache the result of finding/testing the fetch program. Sometimes
+ // a simple global variable is really the right solution...
+ //
+ enum kind {wget, curl, fetch};
+
+ static path fetch_path;
+ static kind fetch_kind;
+
+ kind
+ check (const common_options& o)
+ {
+ if (!fetch_path.empty ())
+ return fetch_kind; // Cached.
+
+ if (o.fetch_specified ())
+ {
+ const path& p (fetch_path = o.fetch ());
+
+ // Figure out which one it is.
+ //
+ const path& n (p.leaf ());
+ const string& s (n.string ());
+
+ if (s.find ("wget") != string::npos)
+ {
+ if (!check_wget (p))
+ fail << p << " does not appear to be the 'wget' program";
+
+ fetch_kind = wget;
+ }
+ else if (s.find ("curl") != string::npos)
+ {
+ if (!check_curl (p))
+ fail << p << " does not appear to be the 'curl' program";
+
+ fetch_kind = curl;
+ }
+ else if (s.find ("fetch") != string::npos)
+ {
+ if (!check_fetch (p))
+ fail << p << " does not appear to be the 'fetch' program";
+
+ fetch_kind = fetch;
+ }
+ else
+ fail << "unknown fetch program " << p;
+ }
+ else
+ {
+ // See if any is available. The preference order is:
+ //
+ // wget 1.16 or up
+ // curl
+ // wget
+ // fetch
+ //
+ bool wg (check_wget (fetch_path = path ("wget")));
+
+ if (wg && (wget_major > 1 || (wget_major == 1 && wget_minor >= 16)))
+ {
+ fetch_kind = wget;
+ }
+ else if (check_curl (fetch_path = path ("curl")))
+ {
+ fetch_kind = curl;
+ }
+ else if (wg)
+ {
+ fetch_path = path ("wget");
+ fetch_kind = wget;
+ }
+ else if (check_fetch (fetch_path = path ("fetch")))
+ {
+ fetch_kind = fetch;
+ }
+ else
+ fail << "unable to find 'wget', 'curl', or 'fetch'" <<
+ info << "use --fetch to specify the fetch program location";
+
+ if (verb > 1)
+ info << "using '" << fetch_path << "' as the fetch program, "
+ << "use --fetch to override";
+ }
+
+ return fetch_kind;
+ }
+
+ static process
+ start (const common_options& o, const string& url)
+ {
+ process (*start) (const path&, const strings&, const string&) = nullptr;
+
+ switch (check (o))
+ {
+ case wget: start = &start_wget; break;
+ case curl: start = &start_curl; break;
+ case fetch: start = &start_fetch; break;
+ }
+
+ try
+ {
+ return start (fetch_path, o.fetch_option (), url);
+ }
+ catch (const process_error& e)
+ {
+ error << "unable to execute " << fetch_path << ": " << e.what ();
+
+ if (e.child ())
+ exit (1);
+
+ throw failed ();
+ }
+ }
+
+ template <typename M>
+ static M
+ fetch_url (const common_options& o,
+ const string& host,
+ uint16_t port,
+ const path& file)
+ {
+ // Assemble the URL.
+ //
+ //@@ Absolute path in URL: how is this going to work on Windows?
+ // Change to relative: watch for empty path.
+ //
+ assert (file.absolute ());
+
+ string url ("http://");
+ url += host;
+
+ if (port != 0)
+ url += ":" + to_string (port);
+
+ url += file.posix_string ();
+
+ process pr (start (o, url));
+
+ try
+ {
+ ifdstream is (pr.in_ofd);
+ is.exceptions (ifdstream::badbit | ifdstream::failbit);
+
+ manifest_parser mp (is, url);
+ M m (mp);
+ is.close ();
+
+ if (pr.wait ())
+ return m;
+
+ // Child existed with an error, fall through.
+ }
+ // Ignore these exceptions if the child process exited with
+ // an error status since that's the source of the failure.
+ //
+ catch (const manifest_parsing& e)
+ {
+ if (pr.wait ())
+ fail (e.name, e.line, e.column) << e.description;
+ }
+ catch (const ifdstream::failure&)
+ {
+ if (pr.wait ())
+ fail << "unable to read fetched " << url;
+ }
+
+ // We should only get here if the child exited with an error status.
+ //
+ assert (!pr.wait ());
+
+ // While it is reasonable to assuming the child process issued
+ // diagnostics, some may not mention the URL.
+ //
+ error << "unable to fetch " << url <<
+ info << "re-run with -v for more information";
+ throw failed ();
+ }
+
template <typename M>
static M
fetch_file (const path& f)
@@ -56,13 +489,15 @@ namespace bpkg
}
repository_manifests
- fetch_repositories (const repository_location& rl)
+ fetch_repositories (const common_options& o, const repository_location& rl)
{
- assert (/*rl.remote () ||*/ rl.absolute ());
+ assert (rl.remote () || rl.absolute ());
+
+ path f (rl.path () / repositories);
return rl.remote ()
- ? repository_manifests ()
- : fetch_file<repository_manifests> (rl.path () / repositories);
+ ? fetch_url<repository_manifests> (o, rl.host (), rl.port (), f)
+ : fetch_file<repository_manifests> (f);
}
static const path packages ("packages");
@@ -74,12 +509,14 @@ namespace bpkg
}
package_manifests
- fetch_packages (const repository_location& rl)
+ fetch_packages (const common_options& o, const repository_location& rl)
{
- assert (/*rl.remote () ||*/ rl.absolute ());
+ assert (rl.remote () || rl.absolute ());
+
+ path f (rl.path () / packages);
return rl.remote ()
- ? package_manifests ()
- : fetch_file<package_manifests> (rl.path () / packages);
+ ? fetch_url<package_manifests> (o, rl.host (), rl.port (), f)
+ : fetch_file<package_manifests> (f);
}
}
diff --git a/bpkg/rep-fetch.cxx b/bpkg/rep-fetch.cxx
index 89107aa..e112bf7 100644
--- a/bpkg/rep-fetch.cxx
+++ b/bpkg/rep-fetch.cxx
@@ -23,26 +23,31 @@ using namespace butl;
namespace bpkg
{
static void
- rep_fetch (transaction& t, const shared_ptr<repository>& r)
+ rep_fetch (const common_options& co,
+ transaction& t,
+ const shared_ptr<repository>& r)
{
tracer trace ("rep_fetch(rep)");
database& db (t.database ());
tracer_guard tg (db, trace);
- if (verb >= 2)
- text << "fetching " << r->name ();
-
const repository_location& rl (r->location);
level4 ([&]{trace << r->name () << " " << rl;});
assert (rl.absolute () || rl.remote ());
+ // The fetch_*() functions below will be quiet at level 1, which
+ // can be quite confusing if the download hangs.
+ //
+ if (verb >= (rl.remote () ? 1 : 2))
+ text << "fetching " << r->name ();
+
r->fetched = true; // Mark as being fetched.
// Load the 'repositories' file and use it to populate the
// prerequisite and complement repository sets.
//
- repository_manifests rms (fetch_repositories (rl));
+ repository_manifests rms (fetch_repositories (co, rl));
for (repository_manifest& rm: rms)
{
@@ -82,7 +87,7 @@ namespace bpkg
// (or is already being) fetched.
//
if (!pr->fetched)
- rep_fetch (t, pr);
+ rep_fetch (co, t, pr);
level4 ([&]{trace << pr->name () << " prerequisite of " << r->name ();});
@@ -98,7 +103,7 @@ namespace bpkg
// @@ We need to check that that 'repositories' file hasn't
// changed since.
//
- package_manifests pms (fetch_packages (rl));
+ package_manifests pms (fetch_packages (co, rl));
// "Suspend" session while persisting packages to reduce memory
// consumption.
@@ -196,7 +201,7 @@ namespace bpkg
// their packages.
//
for (const lazy_shared_ptr<repository>& lp: ua)
- rep_fetch (t, lp.load ());
+ rep_fetch (o, t, lp.load ());
size_t rcount, pcount;
if (verb)
diff --git a/bpkg/test.sh b/bpkg/test.sh
index c252a8a..75555f8 100755
--- a/bpkg/test.sh
+++ b/bpkg/test.sh
@@ -147,6 +147,12 @@ test rep-add ../tests/repository/1/math/unstable
test rep-fetch
test rep-fetch
+# remote
+#
+test cfg-create --wipe
+test rep-add http://pkg.cppget.org/1/hello
+test rep-fetch
+
## @@
##
##