diff options
Diffstat (limited to 'mod')
-rw-r--r-- | mod/mod-ci-github-gh.cxx | 338 | ||||
-rw-r--r-- | mod/mod-ci-github-gh.hxx | 163 | ||||
-rw-r--r-- | mod/mod-ci-github-gq.cxx | 574 | ||||
-rw-r--r-- | mod/mod-ci-github-gq.hxx | 67 | ||||
-rw-r--r-- | mod/mod-ci-github-post.hxx | 161 | ||||
-rw-r--r-- | mod/mod-ci-github-service-data.cxx | 140 | ||||
-rw-r--r-- | mod/mod-ci-github-service-data.hxx | 92 | ||||
-rw-r--r-- | mod/mod-ci-github.cxx | 819 | ||||
-rw-r--r-- | mod/mod-ci-github.hxx | 140 | ||||
-rw-r--r-- | mod/mod-repository-root.cxx | 4 | ||||
-rw-r--r-- | mod/tenant-service.hxx | 15 |
11 files changed, 2049 insertions, 464 deletions
diff --git a/mod/mod-ci-github-gh.cxx b/mod/mod-ci-github-gh.cxx new file mode 100644 index 0000000..0bc6595 --- /dev/null +++ b/mod/mod-ci-github-gh.cxx @@ -0,0 +1,338 @@ +// file : mod/mod-ci-github-gh.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <mod/mod-ci-github-gh.hxx> + +#include <libbutl/json/parser.hxx> + +namespace brep +{ + // Return the GitHub check run status corresponding to a build_state. Throw + // invalid_argument if the build_state value was invalid. + // + string + gh_to_status (build_state st) + { + // Just return by value (small string optimization). + // + switch (st) + { + case build_state::queued: return "QUEUED"; + case build_state::building: return "IN_PROGRESS"; + case build_state::built: return "COMPLETED"; + default: + throw invalid_argument ("invalid build_state value: " + + to_string (static_cast<int> (st))); + } + } + + // Return the build_state corresponding to a GitHub check run status + // string. Throw invalid_argument if the passed status was invalid. + // + build_state + gh_from_status (const string& s) + { + if (s == "QUEUED") return build_state::queued; + else if (s == "IN_PROGRESS") return build_state::building; + else if (s == "COMPLETED") return build_state::built; + else + throw invalid_argument ("invalid GitHub check run status: '" + s + + '\''); + } + + string + gh_check_run_name (const build& b, const build_queued_hints* bh) + { + string r; + + if (bh == nullptr || !bh->single_package_version) + { + r += b.package_name.string (); + r += '/'; + r += b.package_version.string (); + r += '/'; + } + + r += b.target_config_name; + r += '/'; + r += b.target.string (); + r += '/'; + + if (bh == nullptr || !bh->single_package_config) + { + r += b.package_config_name; + r += '/'; + } + + r += b.toolchain_name; + r += '-'; + r += b.toolchain_version.string (); + + return r; + } + + // Throw invalid_json_input when a required member `m` is missing from a + // JSON object `o`. + // + [[noreturn]] static void + missing_member (const json::parser& p, const char* o, const char* m) + { + throw json::invalid_json_input ( + p.input_name, + p.line (), p.column (), p.position (), + o + string (" object is missing member '") + m + '\''); + } + + using event = json::event; + + // gh_check_suite + // + gh_check_suite:: + gh_check_suite (json::parser& p) + { + p.next_expect (event::begin_object); + + bool ni (false), hb (false), hs (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + auto c = [&p] (bool& v, const char* s) + { + return p.name () == s ? (v = true) : false; + }; + + if (c (ni, "node_id")) node_id = p.next_expect_string (); + else if (c (hb, "head_branch")) head_branch = p.next_expect_string (); + else if (c (hs, "head_sha")) head_sha = p.next_expect_string (); + else p.next_expect_value_skip (); + } + + if (!ni) missing_member (p, "gh_check_suite", "node_id"); + if (!hb) missing_member (p, "gh_check_suite", "head_branch"); + if (!hs) missing_member (p, "gh_check_suite", "head_sha"); + } + + ostream& + operator<< (ostream& os, const gh_check_suite& cs) + { + os << "node_id: " << cs.node_id + << ", head_branch: " << cs.head_branch + << ", head_sha: " << cs.head_sha; + + return os; + } + + // gh_check_run + // + gh_check_run:: + gh_check_run (json::parser& p) + { + p.next_expect (event::begin_object); + + // We always ask for this exact set of fields to be returned in GraphQL + // requests. + // + node_id = p.next_expect_member_string ("id"); + name = p.next_expect_member_string ("name"); + status = p.next_expect_member_string ("status"); + + p.next_expect (event::end_object); + } + + ostream& + operator<< (ostream& os, const gh_check_run& cr) + { + os << "node_id: " << cr.node_id + << ", name: " << cr.name + << ", status: " << cr.status; + + return os; + } + + // gh_repository + // + gh_repository:: + gh_repository (json::parser& p) + { + p.next_expect (event::begin_object); + + bool ni (false), nm (false), fn (false), db (false), cu (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + auto c = [&p] (bool& v, const char* s) + { + return p.name () == s ? (v = true) : false; + }; + + if (c (ni, "node_id")) node_id = p.next_expect_string (); + else if (c (nm, "name")) name = p.next_expect_string (); + else if (c (fn, "full_name")) full_name = p.next_expect_string (); + else if (c (db, "default_branch")) default_branch = p.next_expect_string (); + else if (c (cu, "clone_url")) clone_url = p.next_expect_string (); + else p.next_expect_value_skip (); + } + + if (!ni) missing_member (p, "gh_repository", "node_id"); + if (!nm) missing_member (p, "gh_repository", "name"); + if (!fn) missing_member (p, "gh_repository", "full_name"); + if (!db) missing_member (p, "gh_repository", "default_branch"); + if (!cu) missing_member (p, "gh_repository", "clone_url"); + } + + ostream& + operator<< (ostream& os, const gh_repository& rep) + { + os << "node_id: " << rep.node_id + << ", name: " << rep.name + << ", full_name: " << rep.full_name + << ", default_branch: " << rep.default_branch + << ", clone_url: " << rep.clone_url; + + return os; + } + + // gh_installation + // + gh_installation:: + gh_installation (json::parser& p) + { + p.next_expect (event::begin_object); + + bool i (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + auto c = [&p] (bool& v, const char* s) + { + return p.name () == s ? (v = true) : false; + }; + + if (c (i, "id")) id = p.next_expect_number<uint64_t> (); + else p.next_expect_value_skip (); + } + + if (!i) missing_member (p, "gh_installation", "id"); + } + + ostream& + operator<< (ostream& os, const gh_installation& i) + { + os << "id: " << i.id; + + return os; + } + + // gh_check_suite_event + // + gh_check_suite_event:: + gh_check_suite_event (json::parser& p) + { + p.next_expect (event::begin_object); + + bool ac (false), cs (false), rp (false), in (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + auto c = [&p] (bool& v, const char* s) + { + return p.name () == s ? (v = true) : false; + }; + + if (c (ac, "action")) action = p.next_expect_string (); + else if (c (cs, "check_suite")) check_suite = gh_check_suite (p); + else if (c (rp, "repository")) repository = gh_repository (p); + else if (c (in, "installation")) installation = gh_installation (p); + else p.next_expect_value_skip (); + } + + if (!ac) missing_member (p, "gh_check_suite_event", "action"); + if (!cs) missing_member (p, "gh_check_suite_event", "check_suite"); + if (!rp) missing_member (p, "gh_check_suite_event", "repository"); + if (!in) missing_member (p, "gh_check_suite_event", "installation"); + } + + ostream& + operator<< (ostream& os, const gh_check_suite_event& cs) + { + os << "action: " << cs.action; + os << ", check_suite { " << cs.check_suite << " }"; + os << ", repository { " << cs.repository << " }"; + os << ", installation { " << cs.installation << " }"; + + return os; + } + + // gh_installation_access_token + // + // Example JSON: + // + // { + // "token": "ghs_Py7TPcsmsITeVCAWeVtD8RQs8eSos71O5Nzp", + // "expires_at": "2024-02-15T16:16:38Z", + // ... + // } + // + gh_installation_access_token:: + gh_installation_access_token (json::parser& p) + { + p.next_expect (event::begin_object); + + bool tk (false), ea (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + auto c = [&p] (bool& v, const char* s) + { + return p.name () == s ? (v = true) : false; + }; + + if (c (tk, "token")) token = p.next_expect_string (); + else if (c (ea, "expires_at")) expires_at = gh_from_iso8601 (p.next_expect_string ()); + else p.next_expect_value_skip (); + } + + if (!tk) missing_member (p, "gh_installation_access_token", "token"); + if (!ea) missing_member (p, "gh_installation_access_token", "expires_at"); + } + + gh_installation_access_token:: + gh_installation_access_token (string tk, timestamp ea) + : token (move (tk)), expires_at (ea) + { + } + + ostream& + operator<< (ostream& os, const gh_installation_access_token& t) + { + os << "token: " << t.token << ", expires_at: "; + butl::operator<< (os, t.expires_at); + + return os; + } + + string + gh_to_iso8601 (timestamp t) + { + return butl::to_string (t, + "%Y-%m-%dT%TZ", + false /* special */, + false /* local */); + } + + timestamp + gh_from_iso8601 (const string& s) + { + return butl::from_string (s.c_str (), "%Y-%m-%dT%TZ", false /* local */); + } +} diff --git a/mod/mod-ci-github-gh.hxx b/mod/mod-ci-github-gh.hxx new file mode 100644 index 0000000..6fa8590 --- /dev/null +++ b/mod/mod-ci-github-gh.hxx @@ -0,0 +1,163 @@ +// file : mod/mod-ci-github-gh.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef MOD_MOD_CI_GITHUB_GH_HXX +#define MOD_MOD_CI_GITHUB_GH_HXX + +#include <libbrep/types.hxx> +#include <libbrep/utility.hxx> + +#include <libbrep/build.hxx> + +#include <mod/tenant-service.hxx> // build_hints + +namespace butl +{ + namespace json + { + class parser; + } +} + +namespace brep +{ + using build_queued_hints = tenant_service_build_queued::build_queued_hints; + + // GitHub request/response types (all start with gh_). + // + // Note that the GitHub REST and GraphQL APIs use different ID types and + // values. In the REST API they are usually integers (but sometimes + // strings!) whereas in GraphQL they are always strings (note: + // base64-encoded and opaque, not just the REST ID value as a string). + // + // In both APIs the ID field is called `id`, but REST responses and webhook + // events also contain the corresponding GraphQL object's ID in the + // `node_id` field. + // + // In the structures below we always use the RESP API/webhook names for ID + // fields. I.e., `id` always refers to the REST/webhook ID, and `node_id` + // always refers to the GraphQL ID. + // + namespace json = butl::json; + + // The "check_suite" object within a check_suite webhook event request. + // + struct gh_check_suite + { + string node_id; + string head_branch; + string head_sha; + + explicit + gh_check_suite (json::parser&); + + gh_check_suite () = default; + }; + + struct gh_check_run + { + string node_id; + string name; + string status; + + explicit + gh_check_run (json::parser&); + + gh_check_run () = default; + }; + + // Return the GitHub check run status corresponding to a build_state. + // + string + gh_to_status (build_state st); + + // Return the build_state corresponding to a GitHub check run status + // string. Throw invalid_argument if the passed status was invalid. + // + build_state + gh_from_status (const string&); + + // Create a check_run name from a build. If the second argument is not + // NULL, return an abbreviated id if possible. + // + string + gh_check_run_name (const build&, const build_queued_hints* = nullptr); + + struct gh_repository + { + string node_id; + string name; + string full_name; + string default_branch; + string clone_url; + + explicit + gh_repository (json::parser&); + + gh_repository () = default; + }; + + struct gh_installation + { + uint64_t id; // Note: used for installation access token (REST API). + + explicit + gh_installation (json::parser&); + + gh_installation () = default; + }; + + // The check_suite webhook event request. + // + struct gh_check_suite_event + { + string action; + gh_check_suite check_suite; + gh_repository repository; + gh_installation installation; + + explicit + gh_check_suite_event (json::parser&); + + gh_check_suite_event () = default; + }; + + struct gh_installation_access_token + { + string token; + timestamp expires_at; + + explicit + gh_installation_access_token (json::parser&); + + gh_installation_access_token (string token, timestamp expires_at); + + gh_installation_access_token () = default; + }; + + string + gh_to_iso8601 (timestamp); + + timestamp + gh_from_iso8601 (const string&); + + ostream& + operator<< (ostream&, const gh_check_suite&); + + ostream& + operator<< (ostream&, const gh_check_run&); + + ostream& + operator<< (ostream&, const gh_repository&); + + ostream& + operator<< (ostream&, const gh_installation&); + + ostream& + operator<< (ostream&, const gh_check_suite_event&); + + ostream& + operator<< (ostream&, const gh_installation_access_token&); +} + +#endif // MOD_MOD_CI_GITHUB_GH_HXX diff --git a/mod/mod-ci-github-gq.cxx b/mod/mod-ci-github-gq.cxx new file mode 100644 index 0000000..7fbbb4b --- /dev/null +++ b/mod/mod-ci-github-gq.cxx @@ -0,0 +1,574 @@ +// file : mod/mod-ci-github-gq.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <mod/mod-ci-github-gq.hxx> + +#include <libbutl/json/parser.hxx> +#include <libbutl/json/serializer.hxx> + +#include <mod/mod-ci-github-post.hxx> + +using namespace std; +using namespace butl; + +namespace brep +{ + // GraphQL serialization functions (see definitions and documentation at the + // bottom). + // + static const string& gq_name (const string&); + static string gq_str (const string&); + static string gq_bool (bool); + static const string& gq_enum (const string&); + + [[noreturn]] static void + throw_json (json::parser& p, const string& m) + { + throw json::invalid_json_input ( + p.input_name, + p.line (), p.column (), p.position (), + m); + } + + // Parse a JSON-serialized GraphQL response. + // + // Throw runtime_error if the response indicated errors and + // invalid_json_input if the GitHub response contained invalid JSON. + // + // The parse_data function should not throw anything but invalid_json_input. + // + // The response format is defined in the GraphQL spec: + // https://spec.graphql.org/October2021/#sec-Response. + // + // Example response: + // + // { + // "data": {...}, + // "errors": {...} + // } + // + // The contents of `data`, including its opening and closing braces, are + // parsed by the `parse_data` function. + // + // If the `errors` field is present in the response, error(s) occurred + // before or during execution of the operation. + // + // If the `data` field is not present the errors are request errors which + // occur before execution and are typically the client's fault. + // + // If the `data` field is also present in the response the errors are field + // errors which occur during execution and are typically the GraphQL + // endpoint's fault, and some fields in `data` that should not be are likely + // to be null. + // + // Although the spec recommends that the errors field (if present) should + // come before the data field, GitHub places data before errors. Therefore + // we need to check that the errors field is not present before parsing the + // data field as it might contain nulls if errors is present. + // + static void + gq_parse_response (json::parser& p, + function<void (json::parser&)> parse_data) + { + using event = json::event; + + // True if the data/errors fields are present. + // + bool dat (false), err (false); + + // Because the errors field is likely to come before the data field, + // serialize data to a stringstream and only parse it later once we're + // sure there are no errors. + // + stringstream data; // The value of the data field. + + p.next_expect (event::begin_object); + + while (p.next_expect (event::name, event::end_object)) + { + if (p.name () == "data") + { + dat = true; + + // Serialize the data field to a string. + // + // Note that the JSON payload sent by GitHub is not pretty-printed so + // there is no need to worry about that. + // + json::stream_serializer s (data, 0 /* indentation */); + + try + { + for (event e: p) + { + if (!s.next (e, p.data ())) + break; // Stop if data object is complete. + } + } + catch (const json::invalid_json_output& e) + { + throw_json (p, + string ("serializer rejected response 'data' field: ") + + e.what ()); + } + } + else if (p.name () == "errors") + { + // Don't stop parsing because the error semantics depends on whether + // or not `data` is present. + // + err = true; // Handled below. + } + else + { + // The spec says the response will never contain any top-level fields + // other than data, errors, and extensions. + // + if (p.name () != "extensions") + { + throw_json (p, + "unexpected top-level GraphQL response field: '" + + p.name () + '\''); + } + + p.next_expect_value_skip (); + } + } + + if (!err) + { + if (!dat) + throw runtime_error ("no data received from GraphQL endpoint"); + + // Parse the data field now that we know there are no errors. + // + json::parser dp (data, p.input_name); + + parse_data (dp); + } + else + { + if (dat) + { + throw runtime_error ("field error(s) received from GraphQL endpoint; " + "incomplete data received"); + } + else + throw runtime_error ("request error(s) received from GraphQL endpoint"); + } + } + + // Parse a response to a check_run GraphQL mutation such as `createCheckRun` + // or `updateCheckRun`. + // + // Example response (only the part we need to parse here): + // + // { + // "cr0": { + // "checkRun": { + // "id": "CR_kwDOLc8CoM8AAAAFQ5GqPg", + // "name": "libb2/0.98.1+2/x86_64-linux-gnu/linux_debian_12-gcc_13.1-O3/default/dev/0.17.0-a.1", + // "status": "QUEUED" + // } + // }, + // "cr1": { + // "checkRun": { + // "id": "CR_kwDOLc8CoM8AAAAFQ5GqhQ", + // "name": "libb2/0.98.1+2/x86_64-linux-gnu/linux_debian_12-gcc_13.1/default/dev/0.17.0-a.1", + // "status": "QUEUED" + // } + // } + // } + // + static vector<gh_check_run> + gq_parse_response_check_runs (json::parser& p) + { + using event = json::event; + + vector<gh_check_run> r; + + gq_parse_response (p, [&r] (json::parser& p) + { + p.next_expect (event::begin_object); + + // Parse the "cr0".."crN" members (field aliases). + // + while (p.next_expect (event::name, event::end_object)) + { + // Parse `"crN": { "checkRun":`. + // + if (p.name () != "cr" + to_string (r.size ())) + throw_json (p, "unexpected field alias: '" + p.name () + '\''); + p.next_expect (event::begin_object); + p.next_expect_name ("checkRun"); + + r.emplace_back (p); // Parse the check_run object. + + p.next_expect (event::end_object); // Parse end of crN object. + } + }); + + // Our requests always operate on at least one check run so if there were + // none in the data field something went wrong. + // + if (r.empty ()) + throw_json (p, "data object is empty"); + + return r; + } + + // Send a GraphQL mutation request `rq` that operates on one or more check + // runs. Update the check runs in `crs` with the new state and the node ID + // if unset. Return false and issue diagnostics if the request failed. + // + static bool + gq_mutate_check_runs (vector<check_run>& crs, + const string& iat, + string rq, + build_state st, + const basic_mark& error) noexcept + { + vector<gh_check_run> rcrs; + + try + { + // Response type which parses a GraphQL response containing multiple + // check_run objects. + // + struct resp + { + vector<gh_check_run> check_runs; // Received check runs. + + resp (json::parser& p): check_runs (gq_parse_response_check_runs (p)) {} + + resp () = default; + } rs; + + uint16_t sc (github_post (rs, + "graphql", // API Endpoint. + strings {"Authorization: Bearer " + iat}, + move (rq))); + + if (sc == 200) + { + rcrs = move (rs.check_runs); + + if (rcrs.size () == crs.size ()) + { + for (size_t i (0); i != rcrs.size (); ++i) + { + // Validate the check run in the response against the build. + // + const gh_check_run& rcr (rcrs[i]); // Received check run. + + build_state rst (gh_from_status (rcr.status)); // Received state. + + if (rst != build_state::built && rst != st) + { + error << "unexpected check_run status: received '" << rcr.status + << "' but expected '" << gh_to_status (st) << '\''; + + return false; // Fail because something is clearly very wrong. + } + else + { + check_run& cr (crs[i]); + + if (!cr.node_id) + cr.node_id = move (rcr.node_id); + + cr.state = gh_from_status (rcr.status); + cr.state_synced = true; + } + } + + return true; + } + else + error << "unexpected number of check_run objects in response"; + } + else + error << "failed to update check run: error HTTP response status " + << sc; + } + catch (const json::invalid_json_input& e) + { + // Note: e.name is the GitHub API endpoint. + // + error << "malformed JSON in response from " << e.name << ", line: " + << e.line << ", column: " << e.column << ", byte offset: " + << e.position << ", error: " << e; + } + catch (const invalid_argument& e) + { + error << "malformed header(s) in response: " << e; + } + catch (const system_error& e) + { + error << "unable to mutate check runs (errno=" << e.code () << "): " + << e.what (); + } + catch (const runtime_error& e) // From gq_parse_response_check_runs(). + { + // GitHub response contained error(s) (could be ours or theirs at this + // point). + // + error << "unable to mutate check runs: " << e; + } + + return false; + } + + // Serialize a GraphQL operation (query/mutation) into a GraphQL request. + // + // This is essentially a JSON object with a "query" string member containing + // the GraphQL operation. For example: + // + // { "query": "mutation { cr0:createCheckRun(... }" } + // + static string + gq_serialize_request (const string& o) + { + string b; + json::buffer_serializer s (b); + + s.begin_object (); + s.member ("query", o); + s.end_object (); + + return b; + } + + // Serialize `createCheckRun` mutations for one or more builds to GraphQL. + // + static string + gq_mutation_create_check_runs ( + const string& ri, // Repository ID + const string& hs, // Head SHA + const vector<reference_wrapper<const build>>& bs, + build_state st, + const build_queued_hints* bh) + { + ostringstream os; + + os << "mutation {" << '\n'; + + // Serialize a `createCheckRun` for each build. + // + for (size_t i (0); i != bs.size (); ++i) + { + const build& b (bs[i]); + + string al ("cr" + to_string (i)); // Field alias. + + // Check run name. + // + string nm (gh_check_run_name (b, bh)); + + os << gq_name (al) << ":createCheckRun(input: {" << '\n' + << " name: " << gq_str (nm) << ',' << '\n' + << " repositoryId: " << gq_str (ri) << ',' << '\n' + << " headSha: " << gq_str (hs) << ',' << '\n' + << " status: " << gq_enum (gh_to_status (st)) << '\n' + << "})" << '\n' + // Specify the selection set (fields to be returned). + // + << "{" << '\n' + << " checkRun {" << '\n' + << " id," << '\n' + << " name," << '\n' + << " status" << '\n' + << " }" << '\n' + << "}" << '\n'; + } + + os << "}" << '\n'; + + return os.str (); + } + + // Serialize an `updateCheckRun` mutation for one build to GraphQL. + // + static string + gq_mutation_update_check_run (const string& ri, // Repository ID. + const string& ni, // Node ID. + build_state st) + { + ostringstream os; + + os << "mutation {" << '\n' + << "cr0:updateCheckRun(input: {" << '\n' + << " checkRunId: " << gq_str (ni) << ',' << '\n' + << " repositoryId: " << gq_str (ri) << ',' << '\n' + << " status: " << gq_enum (gh_to_status (st)) << '\n' + << "})" << '\n' + // Specify the selection set (fields to be returned). + // + << "{" << '\n' + << " checkRun {" << '\n' + << " id," << '\n' + << " name," << '\n' + << " status" << '\n' + << " }" << '\n' + << "}" << '\n' + << "}" << '\n'; + + return os.str (); + } + + bool + gq_create_check_runs (vector<check_run>& crs, + const string& iat, + const string& rid, + const string& hs, + const vector<reference_wrapper<const build>>& bs, + build_state st, + const build_queued_hints& bh, + const basic_mark& error) + { + string rq (gq_serialize_request ( + gq_mutation_create_check_runs (rid, hs, bs, st, &bh))); + + return gq_mutate_check_runs (crs, iat, move (rq), st, error); + } + + bool + gq_create_check_run (check_run& cr, + const string& iat, + const string& rid, + const string& hs, + const build& b, + build_state st, + const build_queued_hints& bh, + const basic_mark& error) + { + vector<check_run> crs {move (cr)}; + + bool r (gq_create_check_runs (crs, iat, rid, hs, {b}, st, bh, error)); + + cr = move (crs[0]); + + return r; + } + + bool + gq_update_check_run (check_run& cr, + const string& iat, + const string& rid, + const string& nid, + build_state st, + const basic_mark& error) + { + string rq ( + gq_serialize_request (gq_mutation_update_check_run (rid, nid, st))); + + vector<check_run> crs {move (cr)}; + + bool r (gq_mutate_check_runs (crs, iat, move (rq), st, error)); + + cr = move (crs[0]); + + return r; + } + + // GraphQL serialization functions. + // + // The GraphQL spec: + // https://spec.graphql.org/ + // + // The GitHub GraphQL API reference: + // https://docs.github.com/en/graphql/reference/ + // + + // Check that a string is a valid GraphQL name. + // + // GraphQL names can contain only alphanumeric characters and underscores + // and cannot begin with a digit (so basically a C identifier). + // + // Return the name or throw invalid_argument if it is invalid. + // + // @@ TODO: dangerous API. + // + static const string& + gq_name (const string& v) + { + if (v.empty () || digit (v[0])) + throw invalid_argument ("invalid GraphQL name: '" + v + '\''); + + for (char c: v) + { + if (!alnum (c) && c != '_') + { + throw invalid_argument ("invalid character in GraphQL name: '" + c + + '\''); + } + } + + return v; + } + + // Serialize a string to GraphQL. + // + // Return the serialized string or throw invalid_argument if the string is + // invalid. + // + static string + gq_str (const string& v) + { + // GraphQL strings are the same as JSON strings so we use the JSON + // serializer. + // + string b; + json::buffer_serializer s (b); + + try + { + s.value (v); + } + catch (const json::invalid_json_output&) + { + throw invalid_argument ("invalid GraphQL string: '" + v + '\''); + } + + return b; + } + + // Serialize an int to GraphQL. + // +#if 0 + static string + gq_int (uint64_t v) + { + string b; + json::buffer_serializer s (b); + s.value (v); + return b; + } +#endif + + // Serialize a boolean to GraphQL. + // + static inline string + gq_bool (bool v) + { + return v ? "true" : "false"; + } + + // Check that a string is a valid GraphQL enum value. + // + // GraphQL enum values can be any GraphQL name except for `true`, `false`, + // or `null`. + // + // Return the enum value or throw invalid_argument if it is invalid. + // + // @@ TODO: dangerous API. + // + static const string& + gq_enum (const string& v) + { + if (v == "true" || v == "false" || v == "null") + throw invalid_argument ("invalid GraphQL enum value: '" + v + '\''); + + return gq_name (v); + } +} diff --git a/mod/mod-ci-github-gq.hxx b/mod/mod-ci-github-gq.hxx new file mode 100644 index 0000000..3d8c6cc --- /dev/null +++ b/mod/mod-ci-github-gq.hxx @@ -0,0 +1,67 @@ +// file : mod/mod-ci-github-gq.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef MOD_MOD_CI_GITHUB_GQ_HXX +#define MOD_MOD_CI_GITHUB_GQ_HXX + +#include <libbrep/types.hxx> +#include <libbrep/utility.hxx> + +#include <libbrep/build.hxx> + +#include <mod/tenant-service.hxx> // build_hints + +#include <mod/mod-ci-github-gh.hxx> +#include <mod/mod-ci-github-service-data.hxx> + +namespace brep +{ + // GraphQL functions (all start with gq_). + // + + // Create a new check run on GitHub for each build. Update `check_runs` with + // the new states and node IDs. Return false and issue diagnostics if the + // request failed. + // + bool + gq_create_check_runs (vector<check_run>& check_runs, + const string& installation_access_token, + const string& repository_id, + const string& head_sha, + const vector<reference_wrapper<const build>>&, + build_state, + const build_queued_hints&, + const basic_mark& error); + + // Create a new check run on GitHub for a build. Update `cr` with the new + // state and the node ID. Return false and issue diagnostics if the request + // failed. + // + bool + gq_create_check_run (check_run& cr, + const string& installation_access_token, + const string& repository_id, + const string& head_sha, + const build&, + build_state, + const build_queued_hints&, + const basic_mark& error); + + // Update a check run on GitHub. + // + // Send a GraphQL request that updates an existing check run. Update `cr` + // with the new state. Return false and issue diagnostics if the request + // failed. + // + // @@ TODO Support conclusion, output, etc. + // + bool + gq_update_check_run (check_run& cr, + const string& installation_access_token, + const string& repository_id, + const string& node_id, + build_state, + const basic_mark& error); +} + +#endif // MOD_MOD_CI_GITHUB_GQ_HXX diff --git a/mod/mod-ci-github-post.hxx b/mod/mod-ci-github-post.hxx new file mode 100644 index 0000000..d278ae0 --- /dev/null +++ b/mod/mod-ci-github-post.hxx @@ -0,0 +1,161 @@ +// file : mod/mod-ci-github-post.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef MOD_MOD_CI_GITHUB_POST_HXX +#define MOD_MOD_CI_GITHUB_POST_HXX + +#include <libbrep/types.hxx> +#include <libbrep/utility.hxx> + +#include <libbutl/curl.hxx> + +namespace brep +{ + // Send a POST request to the GitHub API endpoint `ep`, parse GitHub's JSON + // response into `rs` (only for 200 codes), and return the HTTP status code. + // + // The endpoint `ep` should not have a leading slash. + // + // Pass additional HTTP headers in `hdrs`. For example: + // + // "HeaderName: header value" + // + // Throw invalid_argument if unable to parse the response headers, + // invalid_json_input (derived from invalid_argument) if unable to parse the + // response body, and system_error in other cases. + // + template <typename T> + uint16_t + github_post (T& rs, + const string& ep, + const strings& hdrs, + const string& body = "") + { + using namespace butl; + + // Convert the header values to curl header option/value pairs. + // + strings hdr_opts; + + for (const string& h: hdrs) + { + hdr_opts.push_back ("--header"); + hdr_opts.push_back (h); + } + + // Run curl. + // + try + { + // Pass --include to print the HTTP status line (followed by the response + // headers) so that we can get the response status code. + // + // Suppress the --fail option which causes curl to exit with status 22 + // in case of an error HTTP response status code (>= 400) otherwise we + // can't get the status code. + // + // Note that butl::curl also adds --location to make curl follow redirects + // (which is recommended by GitHub). + // + // The API version `2022-11-28` is the only one currently supported. If + // the X-GitHub-Api-Version header is not passed this version will be + // chosen by default. + // + fdpipe errp (fdopen_pipe ()); // stderr pipe. + + curl c (path ("-"), // Read input from curl::out. + path ("-"), // Write response to curl::in. + process::pipe (errp.in.get (), move (errp.out)), + curl::post, + curl::flags::no_fail, + "https://api.github.com/" + ep, + "--no-fail", // Don't fail if response status code >= 400. + "--include", // Output response headers for status code. + "--header", "Accept: application/vnd.github+json", + "--header", "X-GitHub-Api-Version: 2022-11-28", + move (hdr_opts)); + + ifdstream err (move (errp.in)); + + // Parse the HTTP response. + // + uint16_t sc; // Status code. + try + { + // Note: re-open in/out so that they get automatically closed on + // exception. + // + ifdstream in (c.in.release (), fdstream_mode::skip); + ofdstream out (c.out.release ()); + + // Write request body to out. + // + if (!body.empty ()) + out << body; + out.close (); + + sc = curl::read_http_status (in).code; // May throw invalid_argument. + + // Parse the response body if the status code is in the 200 range. + // + if (sc >= 200 && sc < 300) + { + // Use endpoint name as input name (useful to have it propagated + // in exceptions). + // + json::parser p (in, ep /* name */); + rs = T (p); + } + + in.close (); + } + catch (const io_error& e) + { + // If the process exits with non-zero status, assume the IO error is due + // to that and fall through. + // + if (c.wait ()) + { + throw_generic_error ( + e.code ().value (), + (string ("unable to read curl stdout: ") + e.what ()).c_str ()); + } + } + catch (const json::invalid_json_input&) + { + // If the process exits with non-zero status, assume the JSON error is + // due to that and fall through. + // + if (c.wait ()) + throw; + } + + if (!c.wait ()) + { + string et (err.read_text ()); + throw_generic_error (EINVAL, + ("non-zero curl exit status: " + et).c_str ()); + } + + err.close (); + + return sc; + } + catch (const process_error& e) + { + throw_generic_error ( + e.code ().value (), + (string ("unable to execute curl:") + e.what ()).c_str ()); + } + catch (const io_error& e) + { + // Unable to read diagnostics from stderr. + // + throw_generic_error ( + e.code ().value (), + (string ("unable to read curl stderr : ") + e.what ()).c_str ()); + } + } +} + +#endif // MOD_MOD_CI_GITHUB_POST_HXX diff --git a/mod/mod-ci-github-service-data.cxx b/mod/mod-ci-github-service-data.cxx new file mode 100644 index 0000000..ff2af5d --- /dev/null +++ b/mod/mod-ci-github-service-data.cxx @@ -0,0 +1,140 @@ +// file : mod/mod-ci-github-service-data.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <mod/mod-ci-github-service-data.hxx> + +#include <libbutl/json/parser.hxx> +#include <libbutl/json/serializer.hxx> + +namespace brep +{ + using event = json::event; + + service_data:: + service_data (const string& json) + { + json::parser p (json.data (), json.size (), "service_data"); + + p.next_expect (event::begin_object); + + // Throw if the schema version is not supported. + // + version = p.next_expect_member_number<uint64_t> ("version"); + if (version != 1) + { + throw invalid_argument ("unsupported service_data schema version: " + + to_string (version)); + } + + // Installation access token. + // + p.next_expect_name ("installation_access"); + installation_access = gh_installation_access_token (p); + + installation_id = + p.next_expect_member_number<uint64_t> ("installation_id"); + repository_node_id = p.next_expect_member_string ("repository_node_id"); + head_sha = p.next_expect_member_string ("head_sha"); + + p.next_expect_member_array ("check_runs"); + while (p.next_expect (event::begin_object, event::end_array)) + { + string bid (p.next_expect_member_string ("build_id")); + + optional<string> nid; + { + string* v (p.next_expect_member_string_null ("node_id")); + if (v != nullptr) + nid = *v; + } + + build_state s (to_build_state (p.next_expect_member_string ("state"))); + bool ss (p.next_expect_member_boolean<bool> ("state_synced")); + + check_runs.emplace_back (move (bid), move (nid), s, ss); + + p.next_expect (event::end_object); + } + + p.next_expect (event::end_object); + } + + service_data:: + service_data (string iat_tok, + timestamp iat_ea, + uint64_t iid, + string rid, + string hs) + : installation_access (move (iat_tok), iat_ea), + installation_id (iid), + repository_node_id (move (rid)), + head_sha (move (hs)) + { + } + + string service_data:: + json () const + { + string b; + json::buffer_serializer s (b); + + s.begin_object (); + + s.member ("version", 1); + + // Installation access token. + // + s.member_begin_object ("installation_access"); + s.member ("token", installation_access.token); + s.member ("expires_at", gh_to_iso8601 (installation_access.expires_at)); + s.end_object (); + + s.member ("installation_id", installation_id); + s.member ("repository_node_id", repository_node_id); + s.member ("head_sha", head_sha); + + s.member_begin_array ("check_runs"); + for (const check_run& cr: check_runs) + { + s.begin_object (); + s.member ("build_id", cr.build_id); + + s.member_name ("node_id"); + if (cr.node_id) + s.value (*cr.node_id); + else + s.value (nullptr); + + s.member ("state", to_string (cr.state)); + s.member ("state_synced", cr.state_synced); + + s.end_object (); + } + s.end_array (); + + s.end_object (); + + return b; + } + + check_run* service_data:: + find_check_run (const string& bid) + { + for (check_run& cr: check_runs) + { + if (cr.build_id == bid) + return &cr; + } + return nullptr; + } + + ostream& + operator<< (ostream& os, const check_run& cr) + { + os << "node_id: " << cr.node_id.value_or ("null") + << ", build_id: " << cr.build_id + << ", state: " << cr.state_string (); + + return os; + } +} diff --git a/mod/mod-ci-github-service-data.hxx b/mod/mod-ci-github-service-data.hxx new file mode 100644 index 0000000..0d94b55 --- /dev/null +++ b/mod/mod-ci-github-service-data.hxx @@ -0,0 +1,92 @@ +// file : mod/mod-ci-github-service-data.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef MOD_MOD_CI_GITHUB_SERVICE_DATA_HXX +#define MOD_MOD_CI_GITHUB_SERVICE_DATA_HXX + +#include <libbrep/types.hxx> +#include <libbrep/utility.hxx> + +#include <mod/mod-ci-github-gh.hxx> + +namespace brep +{ + // Service data associated with the tenant (corresponds to GH check suite). + // + // It is always a top-level JSON object and the first member is always the + // schema version. + + // Unsynchronized state means we were unable to (conclusively) notify + // GitHub about the last state transition (e.g., due to a transient + // network error). The "conclusively" part means that the notification may + // or may not have gone through. Note: node_id can be absent for the same + // reason. + // + struct check_run + { + string build_id; // Full build id. + optional<string> node_id; // GitHub id. + + build_state state; + bool state_synced; + + string + state_string () const + { + string r (to_string (state)); + if (!state_synced) + r += "(unsynchronized)"; + return r; + } + }; + + struct service_data + { + // The data schema version. Note: must be first member in the object. + // + uint64_t version = 1; + + // Check suite-global data. + // + gh_installation_access_token installation_access; + + uint64_t installation_id; + + string repository_node_id; // GitHub-internal opaque repository id. + + string head_sha; + + vector<check_run> check_runs; + + // Return the check run with the specified build ID or nullptr if not + // found. + // + check_run* + find_check_run (const string& build_id); + + // Construct from JSON. + // + // Throw invalid_argument if the schema version is not supported. + // + explicit + service_data (const string& json); + + service_data (string iat_token, + timestamp iat_expires_at, + uint64_t installation_id, + string repository_node_id, + string head_sha); + + service_data () = default; + + // Serialize to JSON. + // + string + json () const; + }; + + ostream& + operator<< (ostream&, const check_run&); +} + +#endif // MOD_MOD_CI_GITHUB_SERVICE_DATA_HXX diff --git a/mod/mod-ci-github.cxx b/mod/mod-ci-github.cxx index c4aaec1..defb74f 100644 --- a/mod/mod-ci-github.cxx +++ b/mod/mod-ci-github.cxx @@ -3,15 +3,17 @@ #include <mod/mod-ci-github.hxx> -#include <libbutl/curl.hxx> #include <libbutl/json/parser.hxx> #include <mod/jwt.hxx> #include <mod/hmac.hxx> #include <mod/module-options.hxx> +#include <mod/mod-ci-github-gq.hxx> +#include <mod/mod-ci-github-post.hxx> +#include <mod/mod-ci-github-service-data.hxx> + #include <stdexcept> -#include <iostream> // @@ TODO Remove once debug output has been removed. // @@ TODO // @@ -35,6 +37,10 @@ // https://en.wikipedia.org/wiki/HMAC#Definition. A suitable implementation // is provided by OpenSSL. +// @@ TODO Centralize exception/error handling around calls to +// github_post(). Currently it's mostly duplicated and there is quite +// a lot of it. +// using namespace std; using namespace butl; using namespace web; @@ -42,20 +48,42 @@ using namespace brep::cli; namespace brep { - using namespace gh; + ci_github:: + ci_github (tenant_service_map& tsm) + : tenant_service_map_ (tsm) + { + } ci_github:: - ci_github (const ci_github& r) + ci_github (const ci_github& r, tenant_service_map& tsm) : handler (r), - options_ (r.initialized_ ? r.options_ : nullptr) + ci_start (r), + options_ (r.initialized_ ? r.options_ : nullptr), + tenant_service_map_ (tsm) { } void ci_github:: init (scanner& s) { + { + shared_ptr<tenant_service_base> ts ( + dynamic_pointer_cast<tenant_service_base> (shared_from_this ())); + + assert (ts != nullptr); // By definition. + + tenant_service_map_["ci-github"] = move (ts); + } + options_ = make_shared<options::ci_github> ( s, unknown_mode::fail, unknown_mode::fail); + + // Prepare for the CI requests handling, if configured. + // + if (options_->ci_github_app_webhook_secret_specified ()) + { + ci_start::init (make_shared<options::ci_start> (*options_)); + } } bool ci_github:: @@ -210,12 +238,12 @@ namespace brep // if (event == "check_suite") { - check_suite_event cs; + gh_check_suite_event cs; try { json::parser p (body.data (), body.size (), "check_suite event"); - cs = check_suite_event (p); + cs = gh_check_suite_event (p); } catch (const json::invalid_json_input& e) { @@ -277,154 +305,457 @@ namespace brep } bool ci_github:: - handle_check_suite_request (check_suite_event cs) const + handle_check_suite_request (gh_check_suite_event cs) { - cout << "<check_suite event>" << endl << cs << endl; + HANDLER_DIAG; - installation_access_token iat ( - obtain_installation_access_token (cs.installation.id, generate_jwt ())); + l3 ([&]{trace << "check_suite event { " << cs << " }";}); - cout << endl << "<installation_access_token>" << endl << iat << endl; + optional<string> jwt (generate_jwt (trace, error)); + if (!jwt) + throw server_error (); + + optional<gh_installation_access_token> iat ( + obtain_installation_access_token (cs.installation.id, + move (*jwt), + error)); + + if (!iat) + throw server_error (); + + l3 ([&]{trace << "installation_access_token { " << *iat << " }";}); + + // Submit the CI request. + // + repository_location rl (cs.repository.clone_url + '#' + + cs.check_suite.head_branch, + repository_type::git); + + string sd (service_data (move (iat->token), + iat->expires_at, + cs.installation.id, + move (cs.repository.node_id), + move (cs.check_suite.head_sha)) + .json ()); + + optional<start_result> r ( + start (error, + warn, + verb_ ? &trace : nullptr, + tenant_service (move (cs.check_suite.node_id), + "ci-github", + move (sd)), + move (rl), + vector<package> {}, + nullopt, // client_ip + nullopt // user_agent + )); + + if (!r) + fail << "unable to submit CI request"; return true; } - // Send a POST request to the GitHub API endpoint `ep`, parse GitHub's JSON - // response into `rs` (only for 200 codes), and return the HTTP status code. + // Build state change notifications (see tenant-services.hxx for + // background). Mapping our state transitions to GitHub pose multiple + // problems: + // + // 1. In our model we have the building->queued (interrupted) and + // built->queued (rebuild) transitions. We are going to ignore both of + // them when notifying GitHub. The first is not important (we expect the + // state to go back to building shortly). The second should normally not + // happen and would mean that a completed check suite may go back on its + // conclusion (which would be pretty confusing for the user). + // + // So, for GitHub notifications, we only have the following linear + // transition sequence: + // + // -> queued -> building -> built + // + // Note, however, that because we ignore certain transitions, we can now + // observe "degenerate" state changes that we need to ignore: + // + // building -> [queued] -> building + // built -> [queued] -> ... + // + // 2. As mentioned in tenant-services.hxx, we may observe the notifications + // as arriving in the wrong order. Unfortunately, GitHub provides no + // mechanisms to help with that. In fact, GitHub does not even prevent + // the creation of multiple check runs with the same name (it will always + // use the last created instance, regardless of the status, timestamps, + // etc). As a result, we cannot, for example, rely on the failure to + // create a new check run in response to the queued notification as an + // indication of a subsequent notification (e.g., building) having + // already occurred. + // + // The only aid in this area that GitHub provides is that it prevents + // updating a check run in the built state to a former state (queued or + // building). But one can still create a new check run with the same name + // and a former state. + // + // (Note that we should also be careful if trying to take advantage of + // this "check run override" semantics: each created check run gets a new + // URL and while the GitHub UI will always point to the last created when + // showing the list of check runs, if the user is already on the previous + // check run's URL, nothing will automatically cause them to be + // redirected to the new URL. And so the user may sit on the abandoned + // check run waiting forever for it to be completed.) + // + // As a result, we will deal with the out of order problem differently + // depending on the notification: + // + // queued Skip if there is already a check run in service data, + // otherwise create new. // - // The endpoint `ep` should not have a leading slash. + // building Skip if there is no check run in service data or it's + // not in the queued state, otherwise update. // - // Pass additional HTTP headers in `hdrs`. For example: + // built Update if there is check run in service data and its + // state is not built, otherwise create new. // - // "HeaderName: header value" + // The rationale for this semantics is as follows: the building + // notification is a "nice to have" and can be skipped if things are not + // going normally. In contrast, the built notification cannot be skipped + // and we must either update the existing check run or create a new one + // (hopefully overriding the one created previously, if any). Note that + // the likelihood of the built notification being performed at the same + // time as queued/building is quite low (unlike queued and building). // - // Throw invalid_argument if unable to parse the response headers, - // invalid_json_input (derived from invalid_argument) if unable to parse the - // response body, and system_error in other cases. + // Note also that with this semantics it's unlikely but possible that we + // attempt to update the service data in the wrong order. Specifically, it + // feels like this should not be possible in the ->building transition + // since we skip the building notification unless the check run in the + // service data is already in the queued state. But it is theoretically + // possible in the ->built transition. For example, we may be updating + // the service data for the queued notification after it has already been + // updated by the built notification. In such cases we should not be + // overriding the latter state (built) with the former (queued). // - template<typename T> - static uint16_t - github_post (T& rs, const string& ep, const strings& hdrs) + // 3. We may not be able to "conclusively" notify GitHub, for example, due + // to a transient network error. The "conclusively" part means that the + // notification may or may not have gone through (though it feels the + // common case will be the inability to send the request rather than + // receive the reply). + // + // In such cases, we record in the service data that the notification was + // not synchronized and in subsequent notifications we do the best we can: + // if we have node_id, then we update, otherwise, we create (potentially + // overriding the check run created previously). + // + function<optional<string> (const tenant_service&)> ci_github:: + build_queued (const tenant_service& ts, + const vector<build>& builds, + optional<build_state> istate, + const build_queued_hints& hs, + const diag_epilogue& log_writer) const noexcept { - // Convert the header values to curl header option/value pairs. - // - strings hdr_opts; + NOTIFICATION_DIAG (log_writer); - for (const string& h: hdrs) + service_data sd; + try { - hdr_opts.push_back ("--header"); - hdr_opts.push_back (h); + sd = service_data (*ts.data); + } + catch (const invalid_argument& e) + { + error << "failed to parse service data: " << e; + return nullptr; } - // Run curl. + // The builds for which we will be creating check runs. // - try - { - // Pass --include to print the HTTP status line (followed by the response - // headers) so that we can get the response status code. - // - // Suppress the --fail option which causes curl to exit with status 22 - // in case of an error HTTP response status code (>= 400) otherwise we - // can't get the status code. - // - // Note that butl::curl also adds --location to make curl follow redirects - // (which is recommended by GitHub). - // - // The API version `2022-11-28` is the only one currently supported. If - // the X-GitHub-Api-Version header is not passed this version will be - // chosen by default. - // - fdpipe errp (fdopen_pipe ()); // stderr pipe. - - curl c (nullfd, - path ("-"), // Write response to curl::in. - process::pipe (errp.in.get (), move (errp.out)), - curl::post, - curl::flags::no_fail, - "https://api.github.com/" + ep, - "--no-fail", // Don't fail if response status code >= 400. - "--include", // Output response headers for status code. - "--header", "Accept: application/vnd.github+json", - "--header", "X-GitHub-Api-Version: 2022-11-28", - move (hdr_opts)); - - ifdstream err (move (errp.in)); - - // Parse the HTTP response. - // - int sc; // Status code. - try - { - // Note: re-open in/out so that they get automatically closed on - // exception. - // - ifdstream in (c.in.release (), fdstream_mode::skip); + vector<reference_wrapper<const build>> bs; + vector<check_run> crs; // Parallel to bs. - sc = curl::read_http_status (in).code; // May throw invalid_argument. + // Exclude the builds for which we won't be creating check runs. + // + for (const build& b: builds) + { + string bid (gh_check_run_name (b)); // Full build ID. - // Parse the response body if the status code is in the 200 range. + if (const check_run* scr = sd.find_check_run (bid)) + { + // Another notification has already stored this check run. // - if (sc >= 200 && sc < 300) + if (!istate) { - // Use endpoint name as input name (useful to have it propagated - // in exceptions). + // Out of order queued notification. // - json::parser p (in, ep /* name */); - rs = T (p); + warn << "check run " << bid << ": out of order queued " + << "notification; existing state: " << scr->state_string (); + } + else if (*istate == build_state::built) + { + // Unexpected built->queued transition (rebuild). + // + warn << "check run " << bid << ": unexpected rebuild"; + } + else + { + // Ignore interrupted. } - - in.close (); } - catch (const io_error& e) + else { - // If the process exits with non-zero status, assume the IO error is due - // to that and fall through. + // No stored check run for this build so prepare to create one. // - if (c.wait ()) + bs.push_back (b); + + crs.emplace_back (move (bid), + gh_check_run_name (b, &hs), + nullopt, /* node_id */ + build_state::queued, + false /* state_synced */); + } + } + + if (bs.empty ()) // Nothing to do. + return nullptr; + + // Get a new installation access token if the current one has expired. + // + const gh_installation_access_token* iat (nullptr); + optional<gh_installation_access_token> new_iat; + + if (system_clock::now () > sd.installation_access.expires_at) + { + if (optional<string> jwt = generate_jwt (trace, error)) + { + new_iat = obtain_installation_access_token (sd.installation_id, + move (*jwt), + error); + if (new_iat) + iat = &*new_iat; + } + } + else + iat = &sd.installation_access; + + // Note: we treat the failure to obtain the installation access token the + // same as the failure to notify GitHub (state is updated by not marked + // synced). + // + if (iat != nullptr) + { + // Create a check_run for each build. + // + if (gq_create_check_runs (error, + crs, + iat->token, + sd.repository_node_id, sd.head_sha, + build_state::queued)) + { + for (const check_run& cr: crs) { - throw_generic_error ( - e.code ().value (), - (string ("unable to read curl stdout: ") + e.what ()).c_str ()); + assert (cr.state == build_state::queued); + l3 ([&]{trace << "created check_run { " << cr << " }";}); } } - catch (const json::invalid_json_input&) + } + + return [bs = move (bs), + iat = move (new_iat), + crs = move (crs), + error = move (error), + warn = move (warn)] (const tenant_service& ts) -> optional<string> + { + // NOTE: this lambda may be called repeatedly (e.g., due to transaction + // being aborted) and so should not move out of its captures. + + service_data sd; + try { - // If the process exits with non-zero status, assume the JSON error is - // due to that and fall through. - // - if (c.wait ()) - throw; + sd = service_data (*ts.data); + } + catch (const invalid_argument& e) + { + error << "failed to parse service data: " << e; + return nullopt; } - if (!c.wait ()) + if (iat) + sd.installation_access = *iat; + + for (size_t i (0); i != bs.size (); ++i) { - string et (err.read_text ()); - throw_generic_error (EINVAL, - ("non-zero curl exit status: " + et).c_str ()); + const check_run& cr (crs[i]); + + // Note that this service data may not be the same as what we observed + // in the build_queued() function above. For example, some check runs + // that we have queued may have already transitioned to built. So we + // skip any check runs that are already present. + // + if (const check_run* scr = sd.find_check_run (cr.build_id)) + { + // Doesn't looks like printing new/existing check run node_id will + // be of any help. + // + warn << "check run " << cr.build_id << ": out of order queued " + << "notification service data update; existing state: " + << scr->state_string (); + } + else + sd.check_runs.push_back (cr); } - err.close (); + return sd.json (); + }; + } + + function<optional<string> (const tenant_service&)> ci_github:: + build_building (const tenant_service& ts, + const build& b, + const diag_epilogue& log_writer) const noexcept + { + NOTIFICATION_DIAG (log_writer); - return sc; + service_data sd; + try + { + sd = service_data (*ts.data); } - catch (const process_error& e) + catch (const invalid_argument& e) { - throw_generic_error ( - e.code ().value (), - (string ("unable to execute curl:") + e.what ()).c_str ()); + error << "failed to parse service data: " << e; + return nullptr; } - catch (const io_error& e) + + optional<check_run> cr; // Updated check run. + string bid (gh_check_run_name (b)); // Full Build ID. + + if (check_run* scr = sd.find_check_run (bid)) // Stored check run. { - // Unable to read diagnostics from stderr. + // Update the check run if it exists on GitHub and the queued + // notification succeeded and updated the service data, otherwise do + // nothing. // - throw_generic_error ( - e.code ().value (), - (string ("unable to read curl stderr : ") + e.what ()).c_str ()); + if (scr->state == build_state::queued) + { + if (scr->node_id) + { + cr = move (*scr); + cr->state_synced = false; + } + else + { + // Network error during queued notification, ignore. + } + } + else + warn << "check run " << bid << ": out of order building " + << "notification; existing state: " << scr->state_string (); + } + else + warn << "check run " << bid << ": out of order building " + << "notification; no check run state in service data"; + + if (!cr) + return nullptr; + + // Get a new installation access token if the current one has expired. + // + const gh_installation_access_token* iat (nullptr); + optional<gh_installation_access_token> new_iat; + + if (system_clock::now () > sd.installation_access.expires_at) + { + if (optional<string> jwt = generate_jwt (trace, error)) + { + new_iat = obtain_installation_access_token (sd.installation_id, + move (*jwt), + error); + if (new_iat) + iat = &*new_iat; + } } + else + iat = &sd.installation_access; + + // Note: we treat the failure to obtain the installation access token the + // same as the failure to notify GitHub (state is updated but not marked + // synced). + // + if (iat != nullptr) + { + if (gq_update_check_run (error, + *cr, + iat->token, + sd.repository_node_id, + *cr->node_id, + details_url (b), + build_state::building)) + { + // Do nothing further if the state was already built on GitHub (note + // that this is based on the above-mentioned special GitHub semantics + // of preventing changes to the built status). + // + if (cr->state == build_state::built) + { + warn << "check run " << bid << ": already in built state on GitHub"; + + return nullptr; + } + + assert (cr->state == build_state::building); + + l3 ([&]{trace << "updated check_run { " << *cr << " }";}); + } + } + + return [iat = move (new_iat), + cr = move (*cr), + error = move (error), + warn = move (warn)] (const tenant_service& ts) -> optional<string> + { + // NOTE: this lambda may be called repeatedly (e.g., due to transaction + // being aborted) and so should not move out of its captures. + + service_data sd; + try + { + sd = service_data (*ts.data); + } + catch (const invalid_argument& e) + { + error << "failed to parse service data: " << e; + return nullopt; + } + + if (iat) + sd.installation_access = *iat; + + // Update the check run only if it is in the queued state. + // + if (check_run* scr = sd.find_check_run (cr.build_id)) + { + if (scr->state == build_state::queued) + *scr = cr; + else + { + warn << "check run " << cr.build_id << ": out of order building " + << "notification service data update; existing state: " + << scr->state_string (); + } + } + else + warn << "check run " << cr.build_id << ": service data state has " + << "disappeared"; + + return sd.json (); + }; } - string ci_github:: - generate_jwt () const + function<optional<string> (const tenant_service&)> ci_github:: + build_built (const tenant_service&, const build&, + const diag_epilogue& /* log_writer */) const noexcept + { + return nullptr; + } + + optional<string> ci_github:: + generate_jwt (const basic_mark& trace, + const basic_mark& error) const { string jwt; try @@ -439,13 +770,12 @@ namespace brep chrono::seconds (options_->ci_github_jwt_validity_period ()), chrono::seconds (60)); - cout << "JWT: " << jwt << endl; + l3 ([&]{trace << "JWT: " << jwt;}); } catch (const system_error& e) { - HANDLER_DIAG; - - fail << "unable to generate JWT (errno=" << e.code () << "): " << e; + error << "unable to generate JWT (errno=" << e.code () << "): " << e; + return nullopt; } return jwt; @@ -491,19 +821,20 @@ namespace brep // repos covered by the installation if installed on an organisation for // example. // - installation_access_token ci_github:: - obtain_installation_access_token (uint64_t iid, string jwt) const + optional<gh_installation_access_token> ci_github:: + obtain_installation_access_token (uint64_t iid, + string jwt, + const basic_mark& error) const { - HANDLER_DIAG; - - installation_access_token iat; + gh_installation_access_token iat; try { // API endpoint. // string ep ("app/installations/" + to_string (iid) + "/access_tokens"); - int sc (github_post (iat, ep, strings {"Authorization: Bearer " + jwt})); + uint16_t sc ( + github_post (iat, ep, strings {"Authorization: Bearer " + jwt})); // Possible response status codes from the access_tokens endpoint: // @@ -517,252 +848,36 @@ namespace brep // if (sc != 201) { - fail << "unable to get installation access token: " - << "error HTTP response status " << sc; + error << "unable to get installation access token: error HTTP " + << "response status " << sc; + return nullopt; } + + // Create a clock drift safety window. + // + iat.expires_at -= chrono::minutes (5); } catch (const json::invalid_json_input& e) { // Note: e.name is the GitHub API endpoint. // - fail << "malformed JSON in response from " << e.name << ", line: " - << e.line << ", column: " << e.column << ", byte offset: " - << e.position << ", error: " << e; + error << "malformed JSON in response from " << e.name << ", line: " + << e.line << ", column: " << e.column << ", byte offset: " + << e.position << ", error: " << e; + return nullopt; } catch (const invalid_argument& e) { - fail << "malformed header(s) in response: " << e; + error << "malformed header(s) in response: " << e; + return nullopt; } catch (const system_error& e) { - fail << "unable to get installation access token (errno=" << e.code () - << "): " << e.what (); + error << "unable to get installation access token (errno=" << e.code () + << "): " << e.what (); + return nullopt; } return iat; } - - // The rest is GitHub request/response type parsing and printing. - // - using event = json::event; - - // Throw invalid_json_input when a required member `m` is missing from a - // JSON object `o`. - // - [[noreturn]] static void - missing_member (const json::parser& p, const char* o, const char* m) - { - throw json::invalid_json_input ( - p.input_name, - p.line (), p.column (), p.position (), - o + string (" object is missing member '") + m + '\''); - } - - // check_suite - // - gh::check_suite:: - check_suite (json::parser& p) - { - p.next_expect (event::begin_object); - - bool i (false), hb (false), hs (false), bf (false), at (false); - - // Skip unknown/uninteresting members. - // - while (p.next_expect (event::name, event::end_object)) - { - auto c = [&p] (bool& v, const char* s) - { - return p.name () == s ? (v = true) : false; - }; - - if (c (i, "id")) id = p.next_expect_number<uint64_t> (); - else if (c (hb, "head_branch")) head_branch = p.next_expect_string (); - else if (c (hs, "head_sha")) head_sha = p.next_expect_string (); - else if (c (bf, "before")) before = p.next_expect_string (); - else if (c (at, "after")) after = p.next_expect_string (); - else p.next_expect_value_skip (); - } - - if (!i) missing_member (p, "check_suite", "id"); - if (!hb) missing_member (p, "check_suite", "head_branch"); - if (!hs) missing_member (p, "check_suite", "head_sha"); - if (!bf) missing_member (p, "check_suite", "before"); - if (!at) missing_member (p, "check_suite", "after"); - } - - ostream& - gh::operator<< (ostream& os, const check_suite& cs) - { - os << "id: " << cs.id << endl - << "head_branch: " << cs.head_branch << endl - << "head_sha: " << cs.head_sha << endl - << "before: " << cs.before << endl - << "after: " << cs.after << endl; - - return os; - } - - // repository - // - gh::repository:: - repository (json::parser& p) - { - p.next_expect (event::begin_object); - - bool nm (false), fn (false), db (false); - - // Skip unknown/uninteresting members. - // - while (p.next_expect (event::name, event::end_object)) - { - auto c = [&p] (bool& v, const char* s) - { - return p.name () == s ? (v = true) : false; - }; - - if (c (nm, "name")) name = p.next_expect_string (); - else if (c (fn, "full_name")) full_name = p.next_expect_string (); - else if (c (db, "default_branch")) default_branch = p.next_expect_string (); - else p.next_expect_value_skip (); - } - - if (!nm) missing_member (p, "repository", "name"); - if (!fn) missing_member (p, "repository", "full_name"); - if (!db) missing_member (p, "repository", "default_branch"); - } - - ostream& - gh::operator<< (ostream& os, const repository& rep) - { - os << "name: " << rep.name << endl - << "full_name: " << rep.full_name << endl - << "default_branch: " << rep.default_branch << endl; - - return os; - } - - // installation - // - gh::installation:: - installation (json::parser& p) - { - p.next_expect (event::begin_object); - - bool i (false); - - // Skip unknown/uninteresting members. - // - while (p.next_expect (event::name, event::end_object)) - { - auto c = [&p] (bool& v, const char* s) - { - return p.name () == s ? (v = true) : false; - }; - - if (c (i, "id")) id = p.next_expect_number<uint64_t> (); - else p.next_expect_value_skip (); - } - - if (!i) missing_member (p, "installation", "id"); - } - - ostream& - gh::operator<< (ostream& os, const installation& i) - { - os << "id: " << i.id << endl; - - return os; - } - - // check_suite_event - // - gh::check_suite_event:: - check_suite_event (json::parser& p) - { - p.next_expect (event::begin_object); - - bool ac (false), cs (false), rp (false), in (false); - - // Skip unknown/uninteresting members. - // - while (p.next_expect (event::name, event::end_object)) - { - auto c = [&p] (bool& v, const char* s) - { - return p.name () == s ? (v = true) : false; - }; - - if (c (ac, "action")) action = p.next_expect_string (); - else if (c (cs, "check_suite")) check_suite = gh::check_suite (p); - else if (c (rp, "repository")) repository = gh::repository (p); - else if (c (in, "installation")) installation = gh::installation (p); - else p.next_expect_value_skip (); - } - - if (!ac) missing_member (p, "check_suite_event", "action"); - if (!cs) missing_member (p, "check_suite_event", "check_suite"); - if (!rp) missing_member (p, "check_suite_event", "repository"); - if (!in) missing_member (p, "check_suite_event", "installation"); - } - - ostream& - gh::operator<< (ostream& os, const check_suite_event& cs) - { - os << "action: " << cs.action << endl; - os << "<check_suite>" << endl << cs.check_suite; - os << "<repository>" << endl << cs.repository; - os << "<installation>" << endl << cs.installation; - - return os; - } - - // installation_access_token - // - // Example JSON: - // - // { - // "token": "ghs_Py7TPcsmsITeVCAWeVtD8RQs8eSos71O5Nzp", - // "expires_at": "2024-02-15T16:16:38Z", - // ... - // } - // - gh::installation_access_token:: - installation_access_token (json::parser& p) - { - p.next_expect (event::begin_object); - - bool tk (false), ea (false); - - // Skip unknown/uninteresting members. - // - while (p.next_expect (event::name, event::end_object)) - { - auto c = [&p] (bool& v, const char* s) - { - return p.name () == s ? (v = true) : false; - }; - - if (c (tk, "token")) token = p.next_expect_string (); - else if (c (ea, "expires_at")) - { - const string& s (p.next_expect_string ()); - expires_at = from_string (s.c_str (), "%Y-%m-%dT%TZ", false /* local */); - } - else p.next_expect_value_skip (); - } - - if (!tk) missing_member (p, "installation_access_token", "token"); - if (!ea) missing_member (p, "installation_access_token", "expires_at"); - } - - ostream& - gh::operator<< (ostream& os, const installation_access_token& t) - { - os << "token: " << t.token << endl; - os << "expires_at: "; - butl::operator<< (os, t.expires_at) << endl; - - return os; - } } diff --git a/mod/mod-ci-github.hxx b/mod/mod-ci-github.hxx index 9731881..07feca8 100644 --- a/mod/mod-ci-github.hxx +++ b/mod/mod-ci-github.hxx @@ -10,115 +10,28 @@ #include <mod/module.hxx> #include <mod/module-options.hxx> -namespace butl -{ - namespace json - { - class parser; - } -} +#include <mod/ci-common.hxx> +#include <mod/tenant-service.hxx> + +#include <mod/mod-ci-github-gh.hxx> namespace brep { - // GitHub request/response types. - // - // Note that having this types directly in brep causes clashes (e.g., for - // the repository name). - // - namespace gh - { - namespace json = butl::json; - - // The "check_suite" object within a check_suite webhook event request. - // - struct check_suite - { - uint64_t id; - string head_branch; - string head_sha; - string before; - string after; - - explicit - check_suite (json::parser&); - - check_suite () = default; - }; - - struct repository - { - string name; - string full_name; - string default_branch; - - explicit - repository (json::parser&); - - repository () = default; - }; - - struct installation - { - uint64_t id; - - explicit - installation (json::parser&); - - installation () = default; - }; - - // The check_suite webhook event request. - // - struct check_suite_event - { - string action; - gh::check_suite check_suite; - gh::repository repository; - gh::installation installation; - - explicit - check_suite_event (json::parser&); - - check_suite_event () = default; - }; - - struct installation_access_token - { - string token; - timestamp expires_at; - - explicit - installation_access_token (json::parser&); - - installation_access_token () = default; - }; - - ostream& - operator<< (ostream&, const check_suite&); - - ostream& - operator<< (ostream&, const repository&); - - ostream& - operator<< (ostream&, const installation&); - - ostream& - operator<< (ostream&, const check_suite_event&); - - ostream& - operator<< (ostream&, const installation_access_token&); - } - - class ci_github: public handler + class ci_github: public handler, + private ci_start, + public tenant_service_build_queued, + public tenant_service_build_building, + public tenant_service_build_built { public: - ci_github () = default; + explicit + ci_github (tenant_service_map&); // Create a shallow copy (handling instance) if initialized and a deep // copy (context exemplar) otherwise. // explicit - ci_github (const ci_github&); + ci_github (const ci_github&, tenant_service_map&); virtual bool handle (request&, response&); @@ -126,6 +39,21 @@ namespace brep virtual const cli::options& cli_options () const {return options::ci_github::description ();} + virtual function<optional<string> (const tenant_service&)> + build_queued (const tenant_service&, + const vector<build>&, + optional<build_state> initial_state, + const build_queued_hints&, + const diag_epilogue& log_writer) const noexcept override; + + virtual function<optional<string> (const tenant_service&)> + build_building (const tenant_service&, const build&, + const diag_epilogue& log_writer) const noexcept override; + + virtual function<optional<string> (const tenant_service&)> + build_built (const tenant_service&, const build&, + const diag_epilogue& log_writer) const noexcept override; + private: virtual void init (cli::scanner&); @@ -133,18 +61,22 @@ namespace brep // Handle the check_suite event `requested` and `rerequested` actions. // bool - handle_check_suite_request (gh::check_suite_event) const; + handle_check_suite_request (gh_check_suite_event); - string - generate_jwt () const; + optional<string> + generate_jwt (const basic_mark& trace, const basic_mark& error) const; // Authenticate to GitHub as an app installation. // - gh::installation_access_token - obtain_installation_access_token (uint64_t install_id, string jwt) const; + optional<gh_installation_access_token> + obtain_installation_access_token (uint64_t install_id, + string jwt, + const basic_mark& error) const; private: shared_ptr<options::ci_github> options_; + + tenant_service_map& tenant_service_map_; }; } diff --git a/mod/mod-repository-root.cxx b/mod/mod-repository-root.cxx index ee2e9ce..b0d5e0e 100644 --- a/mod/mod-repository-root.cxx +++ b/mod/mod-repository-root.cxx @@ -137,7 +137,7 @@ namespace brep ci_ (make_shared<ci> ()), #endif ci_cancel_ (make_shared<ci_cancel> ()), - ci_github_ (make_shared<ci_github> ()), + ci_github_ (make_shared<ci_github> (*tenant_service_map_)), upload_ (make_shared<upload> ()) { } @@ -217,7 +217,7 @@ namespace brep ci_github_ ( r.initialized_ ? r.ci_github_ - : make_shared<ci_github> (*r.ci_github_)), + : make_shared<ci_github> (*r.ci_github_, *tenant_service_map_)), upload_ ( r.initialized_ ? r.upload_ diff --git a/mod/tenant-service.hxx b/mod/tenant-service.hxx index b7f5c02..c46cb7b 100644 --- a/mod/tenant-service.hxx +++ b/mod/tenant-service.hxx @@ -39,11 +39,13 @@ namespace brep // While the implementation tries to make sure the notifications arrive in // the correct order, this is currently done by imposing delays (some // natural, such as building->built, and some artificial, such as - // queued->building). As result, it is unlikely but possible to be notified - // about the state transitions in the wrong order, especially if the - // notifications take a long time. To minimize the chance of this happening, - // the service implementation should strive to batch the queued state - // notifications (or which there could be hundreds) in a single request if + // queued->building). As result, it is unlikely but possible to observe the + // state transition notifications in the wrong order, especially if + // processing notifications can take a long time. For example, while + // processing the queued notification, the building notification may arrive + // in a different thread. To minimize the chance of this happening, the + // service implementation should strive to batch the queued state + // notifications (of which there could be hundreds) in a single request if // at all possible. Also, if supported by the third-party API, it makes // sense for the implementation to protect against overwriting later states // with earlier. For example, if it's possible to place a condition on a @@ -71,7 +73,8 @@ namespace brep // service data. It should return the new data or nullopt if no update is // necessary. Note: tenant_service::data passed to the callback and to the // returned function may not be the same. Also, the returned function may - // be called multiple times (on transaction retries). + // be called multiple times (on transaction retries). Note that the passed + // log_writer is valid during the calls to the returned function. // // The passed initial_state indicates the logical initial state and is // either absent, `building` (interrupted), or `built` (rebuild). Note |