diff options
author | Francois Kritzinger <francois@codesynthesis.com> | 2024-12-02 13:36:45 +0200 |
---|---|---|
committer | Boris Kolpackov <boris@codesynthesis.com> | 2024-12-10 16:44:55 +0200 |
commit | c05051582afa6d778edf544bf8ccd9392ef64bb0 (patch) | |
tree | b3a3536f19c738071141d01791e06659e9213254 /mod/mod-ci-github.cxx | |
parent | 3871a466fa21ed7ecb6a7b1d1d5ef4d14b736a48 (diff) |
Handle replaced tenants and clean up code/comments
Diffstat (limited to 'mod/mod-ci-github.cxx')
-rw-r--r-- | mod/mod-ci-github.cxx | 143 |
1 files changed, 77 insertions, 66 deletions
diff --git a/mod/mod-ci-github.cxx b/mod/mod-ci-github.cxx index 113da2e..394638a 100644 --- a/mod/mod-ci-github.cxx +++ b/mod/mod-ci-github.cxx @@ -19,26 +19,6 @@ #include <stdexcept> -// @@ Remaining TODOs -// -// - Rerequested checks -// -// - check_suite (action: rerequested): received when user re-runs all -// checks. -// -// - check_run (action: rerequested): received when user re-runs a -// specific check or all failed checks. -// -// @@ TMP I have confirmed that the above is accurate. -// -// Will need to extract a few more fields from check_runs, but the layout -// is very similar to that of check_suite. -// -// - Choose strong webhook secret (when deploying). -// -// - Check that delivery UUID has not been received before (replay attack). -// - // Resources: // // Creating an App: @@ -280,9 +260,6 @@ namespace brep // is that we want be "notified" of new actions at which point we can // decide whether to ignore them or to handle. // - // @@ There is also check_run even (re-requested by user, either - // individual check run or all the failed check runs). - // if (event == "check_suite") { gh_check_suite_event cs; @@ -316,13 +293,10 @@ namespace brep else if (cs.action == "completed") { // GitHub thinks that "all the check runs in this check suite have - // completed and a conclusion is available". Looks like this one we - // ignore? + // completed and a conclusion is available". Check with our own + // bookkeeping and log an error if there is a mismatch. // - // What if our bookkeeping says otherwise? But then we can't even - // access the service data easily here. @@ TODO: maybe/later. - // - return true; + return handle_check_suite_completed (move (cs), warning_success); } else { @@ -558,10 +532,6 @@ namespace brep l3 ([&]{trace << "installation_access_token { " << *iat << " }";}); - // @@ What happens if we call this functions with an already existing - // node_id (e.g., replay attack). See the UUID header above. - // - // While it would have been nice to cancel CIs of PRs with this branch as // base not to waste resources, there are complications: Firstly, we can // only do this for remote PRs (since local PRs will most likely share the @@ -690,6 +660,23 @@ namespace brep return true; } + bool ci_github:: + handle_check_suite_completed (gh_check_suite_event cs, bool warning_success) + { + // The plans is as follows: + // + // 1. Load the service data. + // + // 2. Verify it is completed. + // + // 3. Verify (like in build_built()) that all the check runs are + // completed. + // + // 4. Verify the result matches what GitHub thinks it is (if easy). + + return true; + } + // Create a gq_built_result. // // Throw invalid_argument in case of invalid result_status. @@ -795,6 +782,7 @@ namespace brep // archived. // service_data sd; + string tenant_id; { // Service id that uniquely identifies the CI tenant. // @@ -868,6 +856,8 @@ namespace brep { fail << "failed to parse service data: " << e; } + + tenant_id = d->tenant_id; } else { @@ -1015,7 +1005,8 @@ namespace brep // only if the build is actually restarted). // auto update_sd = [&error, &new_iat, &race, - &cr, &bcr, &ccr] (const string& /*tenant_id*/, + tenant_id = move (tenant_id), + &cr, &bcr, &ccr] (const string& ti, const tenant_service& ts, build_state) -> optional<string> { @@ -1024,6 +1015,16 @@ namespace brep race = false; // Reset. + if (tenant_id != ti) + { + // The tenant got replaced since we loaded it but we managed to + // trigger a rebuild in the new tenant. Who knows whose check runs are + // visible, so let's fail ours similar to the cases below. + // + race = true; + return nullopt; + } + service_data sd; try { @@ -1320,9 +1321,8 @@ namespace brep return true; } - function<optional<string> (const string& tenant_id, - const tenant_service&)> ci_github:: - build_unloaded (const string& /*tenant_id*/, + function<optional<string> (const string&, const tenant_service&)> ci_github:: + build_unloaded (const string& ti, tenant_service&& ts, const diag_epilogue& log_writer) const noexcept { @@ -1342,12 +1342,11 @@ namespace brep } return sd.pre_check - ? build_unloaded_pre_check (move (ts), move (sd), log_writer) - : build_unloaded_load (move (ts), move (sd), log_writer); + ? build_unloaded_pre_check (move (ts), move (sd), log_writer) + : build_unloaded_load (ti, move (ts), move (sd), log_writer); } - function<optional<string> (const string& tenant_id, - const tenant_service&)> ci_github:: + function<optional<string> (const string&, const tenant_service&)> ci_github:: build_unloaded_pre_check (tenant_service&& ts, service_data&& sd, const diag_epilogue& log_writer) const noexcept @@ -1547,9 +1546,9 @@ namespace brep return nullptr; } - function<optional<string> (const string& tenant_id, - const tenant_service&)> ci_github:: - build_unloaded_load (tenant_service&& ts, + function<optional<string> (const string&, const tenant_service&)> ci_github:: + build_unloaded_load (const string& tenant_id, + tenant_service&& ts, service_data&& sd, const diag_epilogue& log_writer) const noexcept try @@ -1751,15 +1750,19 @@ namespace brep return nullptr; // Nothing to save (but potentially retry on next call). return [&error, + tenant_id, iat = move (new_iat), cni = move (conclusion_node_id)] - (const string& /*tenant_id*/, + (const string& ti, const tenant_service& ts) -> optional<string> { // NOTE: this lambda may be called repeatedly (e.g., due to // transaction being aborted) and so should not move out of its // captures. + if (tenant_id != ti) + return nullopt; // Do nothing if the tenant has been replaced. + service_data sd; try { @@ -1812,9 +1815,9 @@ namespace brep // them when notifying GitHub. The first is not important (we expect the // state to go back to building shortly). The second should normally not // happen and would mean that a completed check suite may go back on its - // conclusion (which would be pretty confusing for the user). @@@ This - // can/will happen on check run rebuild. Distinguish between internal - // and external rebuilds? + // conclusion (which would be pretty confusing for the user). Note that + // the ->queued state transition of a check run rebuild triggered by + // us is handled directly in handle_check_run_rerequest(). // // So, for GitHub notifications, we only have the following linear // transition sequence: @@ -1891,9 +1894,8 @@ namespace brep // if we have node_id, then we update, otherwise, we create (potentially // overriding the check run created previously). // - function<optional<string> (const string& tenant_id, - const tenant_service&)> ci_github:: - build_queued (const string& /*tenant_id*/, + function<optional<string> (const string&, const tenant_service&)> ci_github:: + build_queued (const string& tenant_id, const tenant_service& ts, const vector<build>& builds, optional<build_state> istate, @@ -2019,16 +2021,20 @@ namespace brep } } - return [bs = move (bs), + return [tenant_id, + bs = move (bs), iat = move (new_iat), crs = move (crs), error = move (error), - warn = move (warn)] (const string& /*tenant_id*/, + warn = move (warn)] (const string& ti, const tenant_service& ts) -> optional<string> { // NOTE: this lambda may be called repeatedly (e.g., due to transaction // being aborted) and so should not move out of its captures. + if (tenant_id != ti) + return nullopt; // Do nothing if the tenant has been replaced. + service_data sd; try { @@ -2077,9 +2083,8 @@ namespace brep return nullptr; } - function<optional<string> (const string& tenant_id, - const tenant_service&)> ci_github:: - build_building (const string& /*tenant_id*/, + function<optional<string> (const string&, const tenant_service&)> ci_github:: + build_building (const string& tenant_id, const tenant_service& ts, const build& b, const diag_epilogue& log_writer) const noexcept @@ -2187,15 +2192,19 @@ namespace brep } } - return [iat = move (new_iat), + return [tenant_id, + iat = move (new_iat), cr = move (*cr), error = move (error), - warn = move (warn)] (const string& /*tenant_id*/, + warn = move (warn)] (const string& ti, const tenant_service& ts) -> optional<string> { // NOTE: this lambda may be called repeatedly (e.g., due to transaction // being aborted) and so should not move out of its captures. + if (tenant_id != ti) + return nullopt; // Do nothing if the tenant has been replaced. + service_data sd; try { @@ -2241,9 +2250,8 @@ namespace brep return nullptr; } - function<optional<string> (const string& tenant_id, - const tenant_service&)> ci_github:: - build_built (const string& /*tenant_id*/, + function<optional<string> (const string&, const tenant_service&)> ci_github:: + build_built (const string& tenant_id, const tenant_service& ts, const build& b, const diag_epilogue& log_writer) const noexcept @@ -2253,9 +2261,8 @@ namespace brep NOTIFICATION_DIAG (log_writer); - // @@ TODO Include service_data::event_node_id and perhaps ts.id in - // diagnostics? E.g. when failing to update check runs we print the - // build ID only. + // @@ TODO Include ts.id in diagnostics? Check run build ids alone seem + // kind of meaningless. Log lines get pretty long this way however. service_data sd; try @@ -2569,16 +2576,20 @@ namespace brep } } - return [iat = move (new_iat), + return [tenant_id, + iat = move (new_iat), cr = move (cr), completed = completed, error = move (error), - warn = move (warn)] (const string& /*tenant_id*/, + warn = move (warn)] (const string& ti, const tenant_service& ts) -> optional<string> { // NOTE: this lambda may be called repeatedly (e.g., due to transaction // being aborted) and so should not move out of its captures. + if (tenant_id != ti) + return nullopt; // Do nothing if the tenant has been replaced. + service_data sd; try { |