From 2936f9c7c9eb330e71cb28c1493761310330b420 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Wed, 18 Jul 2018 22:44:03 +0300 Subject: Implement package archive upload --- bdep/common.cli | 19 +++ bdep/publish.cli | 10 ++ bdep/publish.cxx | 483 ++++++++++++++++++++++++++++++++++++++++++++++++++-- bdep/utility.txx | 2 +- bdep/version.hxx.in | 39 +++++ tests/common.test | 19 ++- tests/publish.test | 210 +++++++++++++++++++++++ 7 files changed, 762 insertions(+), 20 deletions(-) create mode 100644 tests/publish.test diff --git a/bdep/common.cli b/bdep/common.cli index 471f5a2..eb9c6ec 100644 --- a/bdep/common.cli +++ b/bdep/common.cli @@ -123,6 +123,25 @@ namespace bdep multiple build options." } + path --curl = "curl" + { + "", + "The curl program to be used for network operations. You can also + specify additional options that should be passed to the curl program + with \cb{--curl-option}. + + If the curl program is not explicitly specified, then \cb{bdep} will use + \cb{curl} by default." + } + + strings --curl-option + { + "", + "Additional option to be passed to the curl program. See \cb{--curl} for + more information on the curl program. Repeat this option to specify + multiple curl options." + } + string --pager // String to allow empty value. { "", diff --git a/bdep/publish.cli b/bdep/publish.cli index f44ecc0..121218a 100644 --- a/bdep/publish.cli +++ b/bdep/publish.cli @@ -119,6 +119,16 @@ namespace bdep obtained from the environment and/or version control system. See the ENVIRONMENT section for details." } + + string --simulate + { + "", + "Simulate the specified outcome of the submission process without + actually performing any externally visible actions (such as publishing + the package). The commonly used outcome value is \cb{success}. For + other recognized outcomes refer to the submission service + documentation." + } }; "\h|ENVIRONMENT| diff --git a/bdep/publish.cxx b/bdep/publish.cxx index 2a74262..49ba0b8 100644 --- a/bdep/publish.cxx +++ b/bdep/publish.cxx @@ -4,6 +4,9 @@ #include +#include // strtoul() + +#include #include #include @@ -52,7 +55,7 @@ namespace bdep return parse_url (*l, "remote.origin.build2ControlUrl"); } - // Otherwise, get remote.origin.url and try to derive an https URL from + // Otherwise, get remote.origin.url and try to derive an HTTPS URL from // it. // if (optional l = git_line (prj, @@ -224,6 +227,444 @@ namespace bdep } } + // Submit package archive using the curl program and parse the response + // manifest. On success, return the submission reference (first) and message + // (second). Issue diagnostics and fail if anything goes wrong. + // + static pair + submit (const cmd_publish_options& o, + const path& archive, + const string& checksum, + const string& section, + const string& email, + const optional& ctrl) + { + using parser = manifest_parser; + using parsing = manifest_parsing; + using name_value = manifest_name_value; + + // The overall plan is to post the archive using the curl program, read + // the HTTP response status and content type, read and parse the body + // according to the content type, and obtain the result reference and + // message in case of both the submission success and failure. + // + // The successful submission response (HTTP status code 200) is expected + // to contain the submission result manifest (text/manifest content type). + // The faulty response (HTTP status code other than 200) can either + // contain the result manifest or a plain text error description + // (text/plain content type) or some other content (for example + // text/html). We will print the manifest message value, if available or + // the first line of the plain text error description or, as a last + // resort, construct the message from the HTTP status code and reason + // phrase. + // + string message; + optional reference; // Must be present on the submission success. + + // None of the 3XX redirect code semantics assume automatic re-posting. We + // will treat all such codes as failures, additionally printing the + // location header value to advise the user to try the other URL for the + // package submission. + // + // Note that repositories that move to a new URL may well be responding + // with the moved permanently (301) code. + // + optional location; + + // Note that it's a bad idea to issue the diagnostics while curl is + // running, as it will be messed up with the progress output. Thus, we + // throw the runtime_error exception on the HTTP response parsing error + // (rather than use our fail stream) and issue the diagnostics after curl + // finishes. + // + // Also note that we prefer the start/finish process facility for running + // curl over using butl::curl because in this context it is restrictive + // and inconvenient. + // + process pr; + bool io (false); + try + { + url u (o.repository ()); + u.query = "submit"; + + // Map the verbosity level. + // + cstrings v; + if (verb < 1) + { + v.push_back ("-s"); + v.push_back ("-S"); // But show errors. + } + else if (verb == 1) + v.push_back ("--progress-bar"); + else if (verb > 3) + v.push_back ("-v"); + + // Start curl program. + // + fdpipe pipe (fdopen_pipe ()); // Text mode seems appropriate. + + // Note that we don't specify any default timeouts, assuming that bdep + // is an interactive program and the user can always interrupt the + // command (or pass the timeout with --curl-option). + // + pr = start (0 /* stdin */, + pipe /* stdout */, + 2 /* stderr */, + o.curl (), + v, + "-A", (BDEP_USER_AGENT " curl"), + + o.curl_option (), + + // Include the response headers in the output so we can get + // the status code/reason, content type, and the redirect + // location. + // + "--include", + + "--form", "archive=@" + archive.string (), + "--form-string", "sha256sum=" + checksum, + "--form-string", "section=" + section, + "--form-string", "email=" + email, + + ctrl + ? strings ({"--form-string", "control=" + ctrl->string ()}) + : strings (), + + o.simulate_specified () + ? strings ({"--form-string", "simulate=" + o.simulate ()}) + : strings (), + + u.string ()); + + pipe.out.close (); + + // First we read the HTTP response status line and headers. At this + // stage we will read until the empty line (containing just CRLF). Not + // being able to reach such a line is an error, which is the reason for + // the exception mask choice. + // + ifdstream is ( + move (pipe.in), + fdstream_mode::skip, + ifdstream::badbit | ifdstream::failbit | ifdstream::eofbit); + + // Parse and return the HTTP status code. Return 0 if the argument is + // invalid. + // + auto status_code = [] (const string& s) + { + char* e (nullptr); + unsigned long c (strtoul (s.c_str (), &e, 10)); // Can't throw. + assert (e != nullptr); + + return *e == '\0' && c >= 100 && c < 600 + ? static_cast (c) + : 0; + }; + + // Read the CRLF-terminated line from the stream stripping the trailing + // CRLF. + // + auto read_line = [&is] () + { + string l; + getline (is, l); // Strips the trailing LF (0xA). + + // Note that on POSIX CRLF is not automatically translated into LF, + // so we need to strip CR (0xD) manually. + // + if (!l.empty () && l.back () == '\r') + l.pop_back (); + + return l; + }; + + auto bad_response = [] (const string& d) {throw runtime_error (d);}; + + // Read and parse the HTTP response status line, return the status code + // and the reason phrase. + // + struct http_status + { + uint16_t code; + string reason; + }; + + auto read_status = [&read_line, &status_code, &bad_response] () + { + string l (read_line ()); + + for (;;) // Breakout loop. + { + if (l.compare (0, 5, "HTTP/") != 0) + break; + + size_t p (l.find (' ', 5)); // Finds the protocol end. + if (p == string::npos) + break; + + p = l.find_first_not_of (' ', p + 1); // Finds the code start. + if (p == string::npos) + break; + + size_t e (l.find (' ', p + 1)); // Finds the code end. + if (e == string::npos) + break; + + uint16_t c (status_code (string (l, p, e - p))); + if (c == 0) + break; + + string r; + p = l.find_first_not_of (' ', e + 1); // Finds the reason start. + if (p != string::npos) + { + e = l.find_last_not_of (' '); // Finds the reason end. + assert (e != string::npos && e >= p); + + r = string (l, p, e - p + 1); + } + + return http_status {c, move (r)}; + } + + bad_response ("invalid HTTP response status line '" + l + "'"); + + assert (false); // Can't be here. + return http_status {}; + }; + + // The curl output for a successfull submission looks like this: + // + // HTTP/1.1 100 Continue + // + // HTTP/1.1 200 OK + // Content-Length: 83 + // Content-Type: text/manifest;charset=utf-8 + // + // : 1 + // status: 200 + // message: submission queued + // reference: 256910ca46d5 + // + // curl normally sends the 'Expect: 100-continue' header for uploads, + // so we need to handle the interim HTTP server response with the + // continue (100) status code. + // + // Interestingly, Apache can respond with the continue (100) code and + // with the not found (404) code afterwords. Can it be configured to + // just respond with 404? + // + http_status rs (read_status ()); + + if (rs.code == 100) + { + while (!read_line ().empty ()) ; // Skips the interim response. + rs = read_status (); // Reads the final status code. + } + + // Read through the response headers until the empty line is encountered + // and obtain the content type and/or the redirect location, if present. + // + optional ctype; + + // Check if the line contains the specified header and return its value + // if that's the case. Return nullopt otherwise. + // + // Note that we don't expect the header values that we are interested in + // to span over multiple lines. + // + string l; + auto header = [&l] (const char* name) -> optional + { + size_t n (string::traits_type::length (name)); + if (!(casecmp (name, l, n) == 0 && l[n] == ':')) + return nullopt; + + string r; + size_t p (l.find_first_not_of (' ', n + 1)); // Finds value begin. + if (p != string::npos) + { + size_t e (l.find_last_not_of (' ')); // Finds value end. + assert (e != string::npos && e >= p); + + r = string (l, p, e - p + 1); + } + + return optional (move (r)); + }; + + while (!(l = read_line ()).empty ()) + { + if (optional v = header ("Content-Type")) + ctype = move (v); + else if (optional v = header ("Location")) + { + if ((rs.code >= 301 && rs.code <= 303) || rs.code == 307) + try + { + location = url (*v); + location->query = nullopt; // Can possibly contain '?submit'. + } + catch (const invalid_argument&) + { + // Let's just ignore invalid locations. + // + } + } + } + + assert (!eof (is)); // Would have already failed otherwise. + + // Now parse the response payload if the content type is specified and + // is recognized (text/manifest or text/plain), skip it (with the + // ifdstream's close() function) otherwise. + // + // Note that eof and getline() fail conditions are not errors anymore, + // so we adjust the exception mask accordingly. + // + is.exceptions (ifdstream::badbit); + + bool manifest (false); + + if (ctype) + { + if (casecmp ("text/manifest", *ctype, 13) == 0) + { + parser p (is, "manifest"); + name_value nv (p.next ()); + + if (nv.empty ()) + bad_response ("empty manifest"); + + const string& n (nv.name); + string& v (nv.value); + + // The format version pair is verified by the parser. + // + assert (n.empty () && v == "1"); + + auto bad_value = [&p, &nv] (const string& d) { + throw parsing (p.name (), nv.value_line, nv.value_column, d);}; + + // Get and verify the HTTP status. + // + nv = p.next (); + if (n != "status") + bad_value ("no status specified"); + + uint16_t c (status_code (v)); + if (c == 0) + bad_value ("invalid HTTP status '" + v + "'"); + + if (c != rs.code) + bad_value ("status " + v + " doesn't match HTTP response " + "code " + to_string (rs.code)); + + // Get the message. + // + nv = p.next (); + if (n != "message" || v.empty ()) + bad_value ("no message specified"); + + message = move (v); + + // Get the reference if the submission succeeded. + // + if (c == 200) + { + nv = p.next (); + if (n != "reference" || v.empty ()) + bad_value ("no reference specified"); + + reference = move (v); + } + + // Skip the remaining name/value pairs. + // + for (nv = p.next (); !nv.empty (); nv = p.next ()) ; + + manifest = true; + } + else if (casecmp ("text/plain", *ctype, 10) == 0) + getline (is, message); // Can result in the empty message. + } + + is.close (); // Detect errors. + + // The meaningful result we expect is either manifest (status code is + // not necessarily 200) or HTTP redirect (location is present). We + // unable to interpret any other cases and so report them as a bad + // response. + // + if (!manifest) + { + if (rs.code == 200) + bad_response ("manifest expected"); + + if (message.empty ()) + { + message = "HTTP status code " + to_string (rs.code); + + if (!rs.reason.empty ()) + message += " (" + lcase (rs.reason) + ")"; + } + + if (!location) + bad_response (message); + } + } + catch (const io_error&) + { + // Presumably the child process failed and issued diagnostics so let + // finish() try to deal with that first. + // + io = true; + } + // Handle all parsing errors, including the manifest_parsing exception that + // inherits from the runtime_error exception. + // + // Note that the io_error class inherits from the runtime_error class, so + // this catch-clause must go last. + // + catch (const runtime_error& e) + { + finish (o.curl (), pr); // Throws on process failure. + + // Finally we can safely issue the diagnostics (see above for details). + // + diag_record dr (fail); + dr << e << + info << "consider reporting this to " << o.repository () + << " repository maintainers"; + + if (reference) + dr << info << "reference: " << *reference; + else + dr << info << "checksum: " << checksum; + } + + finish (o.curl (), pr, io); + + assert (!message.empty ()); + + // Print the submission failure reason and fail. + // + if (!reference) + { + diag_record dr (fail); + dr << message; + + if (location) + dr << info << "new repository location: " << *location; + } + + return make_pair (move (*reference), message); + } + static int cmd_publish (const cmd_publish_options& o, const dir_path& prj, @@ -250,7 +691,6 @@ namespace bdep fail << "unable to obtain publisher's email" << info << "use --email to specify explicitly"; - // Collect package information (version, project, section). // // @@ It would have been nice to publish them in the dependency order. @@ -264,8 +704,8 @@ namespace bdep string project; string section; // alpha|beta|stable (or --section) - path archive; - string checksum; + path archive; + string checksum; }; vector pkgs; @@ -296,7 +736,7 @@ namespace bdep // if (!o.yes ()) { - text << "publishing:" << '\n' + text << "publishing:" << '\n' << " to: " << repo << '\n' << " as: " << email << '\n'; @@ -361,9 +801,25 @@ namespace bdep if (!exists (c)) fail << "package distribution did not produce expected checksum " << c; - //@@ TODO: call bpkg-pkg-verify to verify archive name/content all match. + // Verify that archive name/content all match. + // + run_bpkg (2 /* verbosity */, o, "pkg-verify", a); - //@@ TODO: read checksum from .sha256 file and store in p.checksum. + // Read the checksum. + // + try + { + ifdstream is (c); + string l; + getline (is, l); + is.close (); + + p.checksum = string (l, 0, 64); + } + catch (const io_error& e) + { + fail << "unable to read " << c << ": " << e; + } p.archive = move (a); } @@ -372,10 +828,17 @@ namespace bdep // for (const package& p: pkgs) { - //@@ TODO: call curl to upload the archive, parse response manifest, - // and print message/reference. + // The path points into the temporary directory so let's omit the + // directory part. + // + if (verb) + text << "submitting " << p.archive.leaf (); + + pair r ( + submit (o, p.archive, p.checksum, p.section, email, ctrl)); - text << "submitting " << p.archive; + if (verb) + text << r.second << " (" << r.first << ")"; //@@ TODO [phase 2]: add checksum file to build2-control branch, commit // and push (this will need some more discussion). diff --git a/bdep/utility.txx b/bdep/utility.txx index 84248d0..9926e12 100644 --- a/bdep/utility.txx +++ b/bdep/utility.txx @@ -17,7 +17,7 @@ namespace bdep { try { - return process_start_callback ( + return butl::process_start_callback ( [] (const char* const args[], size_t n) { if (verb >= 2) diff --git a/bdep/version.hxx.in b/bdep/version.hxx.in index bd29a8c..f3507ec 100644 --- a/bdep/version.hxx.in +++ b/bdep/version.hxx.in @@ -53,4 +53,43 @@ $libodb.check(LIBODB_VERSION, LIBODB_SNAPSHOT)$ $libodb-sqlite.check(LIBODB_SQLITE_VERSION, LIBODB_SQLITE_SNAPSHOT)$ +// User agent. +// +#if defined(_WIN32) +# if defined(__MINGW32__) +# define BDEP_OS "MinGW" +# else +# define BDEP_OS "Windows" +# endif +#elif defined(__linux) +# define BDEP_OS "GNU/Linux" +#elif defined(__APPLE__) +# define BDEP_OS "MacOS" +#elif defined(__CYGWIN__) +# define BDEP_OS "Cygwin" +#elif defined(__FreeBSD__) +# define BDEP_OS "FreeBSD" +#elif defined(__OpenBSD__) +# define BDEP_OS "OpenBSD" +#elif defined(__NetBSD__) +# define BDEP_OS "NetBSD" +#elif defined(__sun) +# define BDEP_OS "Solaris" +#elif defined(__hpux) +# define BDEP_OS "HP-UX" +#elif defined(_AIX) +# define BDEP_OS "AIX" +#elif defined(__unix) +# define BDEP_OS "Unix" +#elif defined(__posix) +# define BDEP_OS "Posix" +#else +# define BDEP_OS "Other" +#endif + +#define BDEP_USER_AGENT \ + "bdep/" BDEP_VERSION_ID " (" BDEP_OS "; +https://build2.org)" \ + " libbpkg/" LIBBPKG_VERSION_ID \ + " libbutl/" LIBBUTL_VERSION_ID + #endif // BDEP_VERSION diff --git a/tests/common.test b/tests/common.test index d85e038..8de78d7 100644 --- a/tests/common.test +++ b/tests/common.test @@ -23,15 +23,16 @@ test.options += --build $build # (for example, to make sure that configuration post-test state is valid and is # as expected). # -clean = $* clean -deinit = $* deinit -init = $* init -fetch = $* fetch -new = $* new --no-checks -status = $* status -sync = $* sync -update = $* update -config = $* config +clean = $* clean +deinit = $* deinit +init = $* init +fetch = $* fetch +new = $* new --no-checks +status = $* status +sync = $* sync +update = $* update +config = $* config +publish = $* publish # All testscripts are named after bdep commands, for example sync.test. So the # testscript scope id is a name of the command being tested. diff --git a/tests/publish.test b/tests/publish.test new file mode 100644 index 0000000..fcab32b --- /dev/null +++ b/tests/publish.test @@ -0,0 +1,210 @@ +# file : tests/publish.test +# copyright : Copyright (c) 2014-2018 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +.include common.test project.test + +# By default simulate the package submissions to the stage repository. +# +repository = ($config.bdep.test.repository != [null] \ + ? "$config.bdep.test.repository" \ + : 'https://stage.build2.org') + +test.arguments += --repository "$repository" --control 'none' --yes + +cxx = cc "config.cxx=$config.cxx" + +new += 2>! +init += $cxx -d prj 2>! &prj/**/bootstrap/*** + +# Note that using the same package name and version for tests may result in +# duplicate submissions. We will use unique version for each test, +# incrementing the patch version for 1.0.X. +# +# Next version to use: 1.0.8 +# +: single-pkg +: +{ + test.arguments += --simulate 'success' + + : basic + : + { + $clone_root_prj; + $init -C @cfg &prj-cfg/***; + sed -i -e 's/^(version:) .*$/\1 1.0.1/' prj/manifest; + + $* 2>>~%EOE% + synchronizing: + upgrade prj/1.0.1 + submitting prj-1.0.1.tar.gz + %. + %.*prj/1.0.1 submission is queued \(.{12}\)% + EOE + } + + : no-cfg + : + { + $clone_root_prj; + + $* 2>>~%EOE% != 0 + %error: no default configuration in project .+% + info: use (@ | --config|-c | --all|-a) to specify configuration explicitly + EOE + } + + : multi-cfg + : + { + $clone_root_prj; + $init -C @cfg1 &prj-cfg1/***; + $init -C @cfg2 &prj-cfg2/***; + + $* --all 2>'error: multiple configurations specified for publish' != 0 + } +} + +: multi-pkg +: +{ + test.arguments += --simulate 'success' + + +$new -t empty prj &prj/*** + +$new --package -t lib libprj -d prj + +$new --package -t exe prj -d prj + + : both + : + { + $clone_prj; + sed -i -e 's/^(version:) .*$/\1 1.0.2/' prj/libprj/manifest; + sed -i -e 's/^(version:) .*$/\1 1.0.2/' prj/prj/manifest; + $init -C @cfg &prj-cfg/***; + + $* 2>>~%EOE% + submitting libprj-1.0.2.tar.gz + %. + %.*libprj/1.0.2 submission is queued \(.{12}\)% + submitting prj-1.0.2.tar.gz + %. + %.*prj/1.0.2 submission is queued \(.{12}\)% + EOE + } + + : single + : + { + $clone_prj; + sed -i -e 's/^(version:) .*$/\1 1.0.3/' prj/libprj/manifest; + $init -C @cfg &prj-cfg/***; + + # Publish the single libprj package rather than the whole prj project. + # + test.arguments = $regex.apply($test.arguments, '^(prj)$', '\1/libprj'); + + $* 2>>~%EOE% + submitting libprj-1.0.3.tar.gz + %. + %.*libprj/1.0.3 submission is queued \(.{12}\)% + EOE + } + + : prompt + : + { + $clone_prj; + sed -i -e 's/^(version:) .*$/\1 1.0.4/' prj/libprj/manifest; + sed -i -e 's/^(version:) .*$/\1 1.0.4/' prj/prj/manifest; + $init -C @cfg &prj-cfg/***; + + # Suppress the --yes option. + # + test.arguments = $regex.apply($test.arguments, '^(--yes)$', ''); + + $* <'y' 2>>~"%EOE%" + publishing: + to: $repository + % as: .+@.+% + + package: libprj + version: 1.0.4 + project: prj + section: stable + + package: prj + version: 1.0.4 + project: prj + section: stable + continue? [y/n] submitting libprj-1.0.4.tar.gz + %. + %.*libprj/1.0.4 submission is queued \\\(.{12}\\\)% + submitting prj-1.0.4.tar.gz + %. + %.*prj/1.0.4 submission is queued \\\(.{12}\\\)% + EOE + } +} + +: failure +: +{ + : duplicate-archive + : + { + test.arguments += --simulate 'duplicate-archive' + + $clone_root_prj; + $init -C @cfg &prj-cfg/***; + sed -i -e 's/^(version:) .*$/\1 1.0.5/' prj/manifest; + + $* 2>>~%EOE% != 0 + synchronizing: + upgrade prj/1.0.5 + submitting prj-1.0.5.tar.gz + %. + %.*error: duplicate submission% + EOE + } + + : internal-error-text + : + { + test.arguments += --simulate 'internal-error-text' + + $clone_root_prj; + $init -C @cfg &prj-cfg/***; + sed -i -e 's/^(version:) .*$/\1 1.0.6/' prj/manifest; + + $* 2>>~%EOE% != 0 + synchronizing: + upgrade prj/1.0.6 + submitting prj-1.0.6.tar.gz + %. + %.*error: submission handling failed% + % info: consider reporting this to .+ repository maintainers% + % info: checksum: .{64}% + EOE + } + + : internal-error-html + : + { + test.arguments += --simulate 'internal-error-html' + + $clone_root_prj; + $init -C @cfg &prj-cfg/***; + sed -i -e 's/^(version:) .*$/\1 1.0.7/' prj/manifest; + + $* 2>>~%EOE% != 0 + synchronizing: + upgrade prj/1.0.7 + submitting prj-1.0.7.tar.gz + %. + %.*error: HTTP status code 500 \(internal server error\)% + % info: consider reporting this to .+ repository maintainers% + % info: checksum: .{64}% + EOE + } +} -- cgit v1.1