diff options
author | Boris Kolpackov <boris@codesynthesis.com> | 2015-09-25 07:10:57 +0200 |
---|---|---|
committer | Boris Kolpackov <boris@codesynthesis.com> | 2015-09-25 07:10:57 +0200 |
commit | 287c71ddc00f0db1436d557042b69c03dc448b13 (patch) | |
tree | 824d66203c59530b856151c94c4bd322eff28562 | |
parent | 5ab55b3efa7be536af146c778ebe457462a886a6 (diff) |
Add support for fetching remote repositories
Wget, curl, and (FreeBSD) fetch are supported.
-rw-r--r-- | bpkg/bpkg-version | 77 | ||||
-rw-r--r-- | bpkg/bpkg.cxx | 4 | ||||
-rw-r--r-- | bpkg/common-options.cli | 28 | ||||
-rw-r--r-- | bpkg/fetch | 7 | ||||
-rw-r--r-- | bpkg/fetch.cxx | 453 | ||||
-rw-r--r-- | bpkg/rep-fetch.cxx | 21 | ||||
-rwxr-xr-x | bpkg/test.sh | 6 |
7 files changed, 576 insertions, 20 deletions
diff --git a/bpkg/bpkg-version b/bpkg/bpkg-version new file mode 100644 index 0000000..381fa56 --- /dev/null +++ b/bpkg/bpkg-version @@ -0,0 +1,77 @@ +// file : bpkg/bpkg-version -*- C++ -*- +// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef BPKG_VERSION // Note: using the version macro itself. + +#include <bpkg/version> // LIBBPKG_VERSION + +// Version format is AABBCCDD where +// +// AA - major version number +// BB - minor version number +// CC - bugfix version number +// DD - alpha / beta (DD + 50) version number +// +// When DD is not 00, 1 is subtracted from AABBCC. For example: +// +// Version AABBCCDD +// 2.0.0 02000000 +// 2.1.0 02010000 +// 2.1.1 02010100 +// 2.2.0.a1 02019901 +// 3.0.0.b2 02999952 +// + +// Generally, we expect minor versions to be source code backwards- +// compatible, thought we might have a minimum version requirement. +// +// AABBCCDD +#if (LIBBPKG_VERSION < 10000 || \ + LIBBPKG_VERSION > 990000) +# error incompatible libbpkg version +#endif + +// AABBCCDD +#define BPKG_VERSION 10000 +#define BPKG_VERSION_STR "0.1.0" + +// User agent. +// +#if defined(_WIN32) +# if defined(__MINGW32__) +# define BPKG_OS "MinGW" +# else +# define BPKG_OS "Windows" +# endif +#elif defined(__linux) +# define BPKG_OS "GNU/Linux" +#elif defined(__APPLE__) +# define BPKG_OS "MacOS" +#elif defined(__CYGWIN__) +# define BPKG_OS "Cygwin" +#elif defined(__FreeBSD__) +# define BPKG_OS "FreeBSD" +#elif defined(__OpenBSD__) +# define BPKG_OS "OpenBSD" +#elif defined(__NetBSD__) +# define BPKG_OS "NetBSD" +#elif defined(__sun) +# define BPKG_OS "Solaris" +#elif defined(__hpux) +# define BPKG_OS "HP-UX" +#elif defined(_AIX) +# define BPKG_OS "AIX" +#elif defined(__unix) +# define BPKG_OS "Unix" +#elif defined(__posix) +# define BPKG_OS "Posix" +#else +# define BPKG_OS "Other" +#endif + +#define BPKG_USER_AGENT \ + "bpkg/" BPKG_VERSION_STR " (" BPKG_OS "; +http://build2.org) " \ + "libbpkg/" LIBBPKG_VERSION_STR + +#endif // BPKG_VERSION diff --git a/bpkg/bpkg.cxx b/bpkg/bpkg.cxx index ef74144..75684cb 100644 --- a/bpkg/bpkg.cxx +++ b/bpkg/bpkg.cxx @@ -10,6 +10,7 @@ #include <bpkg/diagnostics> #include <bpkg/bpkg-options> +#include <bpkg/bpkg-version> // Commands. // @@ -72,7 +73,8 @@ try if (bo.version ()) { - cout << "bpkg 0.0.0" << endl + cout << "bpkg " << BPKG_VERSION_STR << "; " << + "libbpkg " << LIBBPKG_VERSION_STR << endl << "Copyright (c) 2014-2015 Code Synthesis Ltd" << endl << "This is free software released under the MIT license." << endl; return 0; diff --git a/bpkg/common-options.cli b/bpkg/common-options.cli index 25db69c..afeec77 100644 --- a/bpkg/common-options.cli +++ b/bpkg/common-options.cli @@ -2,6 +2,7 @@ // copyright : Copyright (c) 2014-2015 Code Synthesis Ltd // license : MIT; see accompanying LICENSE file +include <vector>; include <cstdint>; include <bpkg/types>; @@ -29,10 +30,35 @@ namespace bpkg further." }; + path --fetch + { + "<path>", + "The fetch program that should be used to download remote resources. + Currently, \cb{bpkg} recognizes \cb{curl}, \cb{wget}, and \cb{fetch}. + Note that the last component of <path> must contain one of these + names as a substring in order for \cb{bpkg} to recognize which + program is being used. You can also specify additional options that + should be passed to the fetch program with \cb{--fetch-option}. + + If the fetch program is not specified, then \cb{bpkg} will try to + discover if one of the above program is available and use that. + Currently, \cb{bpkg} has the following preference order: \cb{wget} + 1.16 or higher (supports \cb{--show-progress}), \cb{curl}, + \cb{wget}, and \cb{fetch}." + }; + + strings --fetch-option + { + "<opt>", + "Additional option that should be passed to the fetch program. See + \cb{--fetch} for more information on the fetch program. Repeat this + option to specify multiple fetch options." + }; + // The following option is "fake" in that it is actually handled by // argv_file_scanner. We have it here for documentation. // - std::string --options-file + string --options-file { "<file>", "Read additional options from <file> with each option appearing on a @@ -9,14 +9,17 @@ #include <bpkg/types> #include <bpkg/utility> +#include <bpkg/common-options> namespace bpkg { repository_manifests fetch_repositories (const dir_path&); - repository_manifests fetch_repositories (const repository_location&); + repository_manifests fetch_repositories (const common_options&, + const repository_location&); package_manifests fetch_packages (const dir_path&); - package_manifests fetch_packages (const repository_location&); + package_manifests fetch_packages (const common_options&, + const repository_location&); } #endif // BPKG_FETCH diff --git a/bpkg/fetch.cxx b/bpkg/fetch.cxx index 34445a8..10acf1f 100644 --- a/bpkg/fetch.cxx +++ b/bpkg/fetch.cxx @@ -5,6 +5,7 @@ #include <bpkg/fetch> #include <fstream> +#include <cstdint> // uint16_t #include <butl/process> #include <butl/fdstream> @@ -13,12 +14,444 @@ #include <bpkg/manifest-parser> #include <bpkg/diagnostics> +#include <bpkg/bpkg-version> using namespace std; using namespace butl; namespace bpkg { + // wget + // + static uint16_t wget_major; + static uint16_t wget_minor; + + static bool + check_wget (const path& prog) + { + tracer trace ("check_wget"); + + // wget --version prints the version to STDOUT and exits with 0 + // status. The first line starts with "GNU Wget X.Y[.Z]. + // + const char* args[] = {prog.string ().c_str (), "--version", nullptr}; + + if (verb >= 3) + print_process (args); + + try + { + process pr (args, 0, -1); // Redirect STDOUT to a pipe. + + ifdstream is (pr.in_ofd); + string l; + getline (is, l); + + if (l.compare (0, 9, "GNU Wget ") != 0) + return false; + + // Extract the version. If something goes wrong, set the version + // to 0 so that we treat it as a really old wget. + // + try + { + //l = "GNU Wget 1.8.1"; + string s (l, 9); + size_t p; + wget_major = static_cast<uint16_t> (stoul (s, &p)); + + if (p != s.size () && s[p] == '.') + wget_minor = static_cast<uint16_t> (stoul (string (s, p + 1))); + + level4 ([&]{trace << "version " << wget_major << '.' << wget_minor;}); + } + catch (const std::exception&) + { + wget_major = 0; + wget_minor = 0; + + level4 ([&]{trace << "unable to extract version from '" << l << "'";}); + } + + return pr.wait (); + } + catch (const process_error& e) + { + if (e.child ()) + exit (1); + + return false; + } + } + + static process + start_wget (const path& prog, const strings& ops, const string& url) + { + string ua (BPKG_USER_AGENT " wget/" + to_string (wget_major) + "." + + to_string (wget_minor)); + + cstrings args { + prog.string ().c_str (), + "-U", ua.c_str () + }; + + // Map verbosity level. If we are running quiet or at level 1, + // then run wget quiet. At level 2 and 3 run it at the default + // level (so we will print the command line and it will display + // the progress, error messages, etc). Higher than that -- run + // it with debug output. + // + // In the wget world quiet means don't print anything, not even + // error messages. There is also the -nv mode (aka "non-verbose") + // which prints error messages but also a useless info-line. So + // what we are going to do is run it quiet and hope for the best. + // If things go south, we suggest (in fetch_url()) below that the + // user re-runs the command with -v to see all the gory details. + // + if (verb < 2) + args.push_back ("-q"); + else if (verb > 3) + args.push_back ("-d"); + + // Add extra options. The idea if that they may override what + // we have set before this point but not after (like -O below). + // + for (const string& o: ops) + args.push_back (o.c_str ()); + + args.push_back ("-O"); // Output to... + args.push_back ("-"); // ...STDOUT. + args.push_back (url.c_str ()); + args.push_back (nullptr); + + if (verb >= 2) + print_process (args); + + return process (args.data (), 0, -1); // Failure handled by the caller. + } + + // curl + // + static bool + check_curl (const path& prog) + { + // curl --version prints the version to STDOUT and exits with 0 + // status. The first line starts with "curl X.Y.Z" + // + const char* args[] = {prog.string ().c_str (), "--version", nullptr}; + + if (verb >= 3) + print_process (args); + + try + { + process pr (args, 0, -1); // Redirect STDOUT to a pipe. + + ifdstream is (pr.in_ofd); + string l; + getline (is, l); + + return l.compare (0, 5, "curl ") == 0 && pr.wait (); + } + catch (const process_error& e) + { + if (e.child ()) + exit (1); + + return false; + } + } + + static process + start_curl (const path& prog, const strings& ops, const string& url) + { + cstrings args { + prog.string ().c_str (), + "-f", // Fail on HTTP errors (e.g., 404). + "-L", // Follow redirects. + "-A", (BPKG_USER_AGENT " curl") + }; + + // Map verbosity level. If we are running quiet or at level 1, + // then run curl quiet. At level 2 and 3 run it at the default + // level (so we will print the command line and it will display + // the progress). Higher than that -- run it verbose. + // + if (verb < 2) + { + args.push_back ("-s"); + args.push_back ("-S"); // But show errors. + } + else if (verb > 3) + args.push_back ("-v"); + + // Add extra options. The idea is that they may override what + // we have set before this point but not after. + // + for (const string& o: ops) + args.push_back (o.c_str ()); + + args.push_back (url.c_str ()); + args.push_back (nullptr); + + if (verb >= 2) + print_process (args); + + return process (args.data (), 0, -1); // Failure handled by the caller. + } + + // fetch + // + static bool + check_fetch (const path& prog) + { + // This one doesn't have --version or --help. Running it without + // any arguments causes it to dump usage and exit with the error + // status. The usage starts with "usage: fetch " which will be + // our signature. + // + const char* args[] = {prog.string ().c_str (), nullptr}; + + if (verb >= 3) + print_process (args); + + try + { + process pr (args, 0, -1, 1); // Redirect STDOUT and STDERR to a pipe. + + ifdstream is (pr.in_ofd); + string l; + getline (is, l); + + return l.compare (0, 13, "usage: fetch ") == 0; + } + catch (const process_error& e) + { + if (e.child ()) + exit (1); + + return false; + } + } + + static process + start_fetch (const path& prog, const strings& ops, const string& url) + { + // -T|--timeout 120 seconds by default, leave it at that for now. + // -n|--no-mtime + // + cstrings args { + prog.string ().c_str (), + "--user-agent", (BPKG_USER_AGENT " fetch") + }; + + // Map verbosity level. If we are running quiet or at level 1, + // then run fetch quiet. At level 2 and 3 run it at the default + // level (so we will print the command line and it will display + // the progress). Higher than that -- run it verbose. + // + if (verb < 2) + args.push_back ("-q"); + else if (verb > 3) + args.push_back ("-v"); + + // Add extra options. The idea is that they may override what + // we have set before this point but not after (like -o below). + // + for (const string& o: ops) + args.push_back (o.c_str ()); + + args.push_back ("-o"); // Output to... + args.push_back ("-"); // ...STDOUT. + args.push_back (url.c_str ()); + args.push_back (nullptr); + + if (verb >= 2) + print_process (args); + + return process (args.data (), 0, -1); // Failure handled by the caller. + } + + // The dispatcher. + // + // Cache the result of finding/testing the fetch program. Sometimes + // a simple global variable is really the right solution... + // + enum kind {wget, curl, fetch}; + + static path fetch_path; + static kind fetch_kind; + + kind + check (const common_options& o) + { + if (!fetch_path.empty ()) + return fetch_kind; // Cached. + + if (o.fetch_specified ()) + { + const path& p (fetch_path = o.fetch ()); + + // Figure out which one it is. + // + const path& n (p.leaf ()); + const string& s (n.string ()); + + if (s.find ("wget") != string::npos) + { + if (!check_wget (p)) + fail << p << " does not appear to be the 'wget' program"; + + fetch_kind = wget; + } + else if (s.find ("curl") != string::npos) + { + if (!check_curl (p)) + fail << p << " does not appear to be the 'curl' program"; + + fetch_kind = curl; + } + else if (s.find ("fetch") != string::npos) + { + if (!check_fetch (p)) + fail << p << " does not appear to be the 'fetch' program"; + + fetch_kind = fetch; + } + else + fail << "unknown fetch program " << p; + } + else + { + // See if any is available. The preference order is: + // + // wget 1.16 or up + // curl + // wget + // fetch + // + bool wg (check_wget (fetch_path = path ("wget"))); + + if (wg && (wget_major > 1 || (wget_major == 1 && wget_minor >= 16))) + { + fetch_kind = wget; + } + else if (check_curl (fetch_path = path ("curl"))) + { + fetch_kind = curl; + } + else if (wg) + { + fetch_path = path ("wget"); + fetch_kind = wget; + } + else if (check_fetch (fetch_path = path ("fetch"))) + { + fetch_kind = fetch; + } + else + fail << "unable to find 'wget', 'curl', or 'fetch'" << + info << "use --fetch to specify the fetch program location"; + + if (verb > 1) + info << "using '" << fetch_path << "' as the fetch program, " + << "use --fetch to override"; + } + + return fetch_kind; + } + + static process + start (const common_options& o, const string& url) + { + process (*start) (const path&, const strings&, const string&) = nullptr; + + switch (check (o)) + { + case wget: start = &start_wget; break; + case curl: start = &start_curl; break; + case fetch: start = &start_fetch; break; + } + + try + { + return start (fetch_path, o.fetch_option (), url); + } + catch (const process_error& e) + { + error << "unable to execute " << fetch_path << ": " << e.what (); + + if (e.child ()) + exit (1); + + throw failed (); + } + } + + template <typename M> + static M + fetch_url (const common_options& o, + const string& host, + uint16_t port, + const path& file) + { + // Assemble the URL. + // + //@@ Absolute path in URL: how is this going to work on Windows? + // Change to relative: watch for empty path. + // + assert (file.absolute ()); + + string url ("http://"); + url += host; + + if (port != 0) + url += ":" + to_string (port); + + url += file.posix_string (); + + process pr (start (o, url)); + + try + { + ifdstream is (pr.in_ofd); + is.exceptions (ifdstream::badbit | ifdstream::failbit); + + manifest_parser mp (is, url); + M m (mp); + is.close (); + + if (pr.wait ()) + return m; + + // Child existed with an error, fall through. + } + // Ignore these exceptions if the child process exited with + // an error status since that's the source of the failure. + // + catch (const manifest_parsing& e) + { + if (pr.wait ()) + fail (e.name, e.line, e.column) << e.description; + } + catch (const ifdstream::failure&) + { + if (pr.wait ()) + fail << "unable to read fetched " << url; + } + + // We should only get here if the child exited with an error status. + // + assert (!pr.wait ()); + + // While it is reasonable to assuming the child process issued + // diagnostics, some may not mention the URL. + // + error << "unable to fetch " << url << + info << "re-run with -v for more information"; + throw failed (); + } + template <typename M> static M fetch_file (const path& f) @@ -56,13 +489,15 @@ namespace bpkg } repository_manifests - fetch_repositories (const repository_location& rl) + fetch_repositories (const common_options& o, const repository_location& rl) { - assert (/*rl.remote () ||*/ rl.absolute ()); + assert (rl.remote () || rl.absolute ()); + + path f (rl.path () / repositories); return rl.remote () - ? repository_manifests () - : fetch_file<repository_manifests> (rl.path () / repositories); + ? fetch_url<repository_manifests> (o, rl.host (), rl.port (), f) + : fetch_file<repository_manifests> (f); } static const path packages ("packages"); @@ -74,12 +509,14 @@ namespace bpkg } package_manifests - fetch_packages (const repository_location& rl) + fetch_packages (const common_options& o, const repository_location& rl) { - assert (/*rl.remote () ||*/ rl.absolute ()); + assert (rl.remote () || rl.absolute ()); + + path f (rl.path () / packages); return rl.remote () - ? package_manifests () - : fetch_file<package_manifests> (rl.path () / packages); + ? fetch_url<package_manifests> (o, rl.host (), rl.port (), f) + : fetch_file<package_manifests> (f); } } diff --git a/bpkg/rep-fetch.cxx b/bpkg/rep-fetch.cxx index 89107aa..e112bf7 100644 --- a/bpkg/rep-fetch.cxx +++ b/bpkg/rep-fetch.cxx @@ -23,26 +23,31 @@ using namespace butl; namespace bpkg { static void - rep_fetch (transaction& t, const shared_ptr<repository>& r) + rep_fetch (const common_options& co, + transaction& t, + const shared_ptr<repository>& r) { tracer trace ("rep_fetch(rep)"); database& db (t.database ()); tracer_guard tg (db, trace); - if (verb >= 2) - text << "fetching " << r->name (); - const repository_location& rl (r->location); level4 ([&]{trace << r->name () << " " << rl;}); assert (rl.absolute () || rl.remote ()); + // The fetch_*() functions below will be quiet at level 1, which + // can be quite confusing if the download hangs. + // + if (verb >= (rl.remote () ? 1 : 2)) + text << "fetching " << r->name (); + r->fetched = true; // Mark as being fetched. // Load the 'repositories' file and use it to populate the // prerequisite and complement repository sets. // - repository_manifests rms (fetch_repositories (rl)); + repository_manifests rms (fetch_repositories (co, rl)); for (repository_manifest& rm: rms) { @@ -82,7 +87,7 @@ namespace bpkg // (or is already being) fetched. // if (!pr->fetched) - rep_fetch (t, pr); + rep_fetch (co, t, pr); level4 ([&]{trace << pr->name () << " prerequisite of " << r->name ();}); @@ -98,7 +103,7 @@ namespace bpkg // @@ We need to check that that 'repositories' file hasn't // changed since. // - package_manifests pms (fetch_packages (rl)); + package_manifests pms (fetch_packages (co, rl)); // "Suspend" session while persisting packages to reduce memory // consumption. @@ -196,7 +201,7 @@ namespace bpkg // their packages. // for (const lazy_shared_ptr<repository>& lp: ua) - rep_fetch (t, lp.load ()); + rep_fetch (o, t, lp.load ()); size_t rcount, pcount; if (verb) diff --git a/bpkg/test.sh b/bpkg/test.sh index c252a8a..75555f8 100755 --- a/bpkg/test.sh +++ b/bpkg/test.sh @@ -147,6 +147,12 @@ test rep-add ../tests/repository/1/math/unstable test rep-fetch test rep-fetch +# remote +# +test cfg-create --wipe +test rep-add http://pkg.cppget.org/1/hello +test rep-fetch + ## @@ ## ## |