aboutsummaryrefslogtreecommitdiff
path: root/mod/mod-ci-github.cxx
diff options
context:
space:
mode:
authorFrancois Kritzinger <francois@codesynthesis.com>2024-12-02 13:36:45 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2024-12-10 16:44:55 +0200
commitc05051582afa6d778edf544bf8ccd9392ef64bb0 (patch)
treeb3a3536f19c738071141d01791e06659e9213254 /mod/mod-ci-github.cxx
parent3871a466fa21ed7ecb6a7b1d1d5ef4d14b736a48 (diff)
Handle replaced tenants and clean up code/comments
Diffstat (limited to 'mod/mod-ci-github.cxx')
-rw-r--r--mod/mod-ci-github.cxx143
1 files changed, 77 insertions, 66 deletions
diff --git a/mod/mod-ci-github.cxx b/mod/mod-ci-github.cxx
index 113da2e..394638a 100644
--- a/mod/mod-ci-github.cxx
+++ b/mod/mod-ci-github.cxx
@@ -19,26 +19,6 @@
#include <stdexcept>
-// @@ Remaining TODOs
-//
-// - Rerequested checks
-//
-// - check_suite (action: rerequested): received when user re-runs all
-// checks.
-//
-// - check_run (action: rerequested): received when user re-runs a
-// specific check or all failed checks.
-//
-// @@ TMP I have confirmed that the above is accurate.
-//
-// Will need to extract a few more fields from check_runs, but the layout
-// is very similar to that of check_suite.
-//
-// - Choose strong webhook secret (when deploying).
-//
-// - Check that delivery UUID has not been received before (replay attack).
-//
-
// Resources:
//
// Creating an App:
@@ -280,9 +260,6 @@ namespace brep
// is that we want be "notified" of new actions at which point we can
// decide whether to ignore them or to handle.
//
- // @@ There is also check_run even (re-requested by user, either
- // individual check run or all the failed check runs).
- //
if (event == "check_suite")
{
gh_check_suite_event cs;
@@ -316,13 +293,10 @@ namespace brep
else if (cs.action == "completed")
{
// GitHub thinks that "all the check runs in this check suite have
- // completed and a conclusion is available". Looks like this one we
- // ignore?
+ // completed and a conclusion is available". Check with our own
+ // bookkeeping and log an error if there is a mismatch.
//
- // What if our bookkeeping says otherwise? But then we can't even
- // access the service data easily here. @@ TODO: maybe/later.
- //
- return true;
+ return handle_check_suite_completed (move (cs), warning_success);
}
else
{
@@ -558,10 +532,6 @@ namespace brep
l3 ([&]{trace << "installation_access_token { " << *iat << " }";});
- // @@ What happens if we call this functions with an already existing
- // node_id (e.g., replay attack). See the UUID header above.
- //
-
// While it would have been nice to cancel CIs of PRs with this branch as
// base not to waste resources, there are complications: Firstly, we can
// only do this for remote PRs (since local PRs will most likely share the
@@ -690,6 +660,23 @@ namespace brep
return true;
}
+ bool ci_github::
+ handle_check_suite_completed (gh_check_suite_event cs, bool warning_success)
+ {
+ // The plans is as follows:
+ //
+ // 1. Load the service data.
+ //
+ // 2. Verify it is completed.
+ //
+ // 3. Verify (like in build_built()) that all the check runs are
+ // completed.
+ //
+ // 4. Verify the result matches what GitHub thinks it is (if easy).
+
+ return true;
+ }
+
// Create a gq_built_result.
//
// Throw invalid_argument in case of invalid result_status.
@@ -795,6 +782,7 @@ namespace brep
// archived.
//
service_data sd;
+ string tenant_id;
{
// Service id that uniquely identifies the CI tenant.
//
@@ -868,6 +856,8 @@ namespace brep
{
fail << "failed to parse service data: " << e;
}
+
+ tenant_id = d->tenant_id;
}
else
{
@@ -1015,7 +1005,8 @@ namespace brep
// only if the build is actually restarted).
//
auto update_sd = [&error, &new_iat, &race,
- &cr, &bcr, &ccr] (const string& /*tenant_id*/,
+ tenant_id = move (tenant_id),
+ &cr, &bcr, &ccr] (const string& ti,
const tenant_service& ts,
build_state) -> optional<string>
{
@@ -1024,6 +1015,16 @@ namespace brep
race = false; // Reset.
+ if (tenant_id != ti)
+ {
+ // The tenant got replaced since we loaded it but we managed to
+ // trigger a rebuild in the new tenant. Who knows whose check runs are
+ // visible, so let's fail ours similar to the cases below.
+ //
+ race = true;
+ return nullopt;
+ }
+
service_data sd;
try
{
@@ -1320,9 +1321,8 @@ namespace brep
return true;
}
- function<optional<string> (const string& tenant_id,
- const tenant_service&)> ci_github::
- build_unloaded (const string& /*tenant_id*/,
+ function<optional<string> (const string&, const tenant_service&)> ci_github::
+ build_unloaded (const string& ti,
tenant_service&& ts,
const diag_epilogue& log_writer) const noexcept
{
@@ -1342,12 +1342,11 @@ namespace brep
}
return sd.pre_check
- ? build_unloaded_pre_check (move (ts), move (sd), log_writer)
- : build_unloaded_load (move (ts), move (sd), log_writer);
+ ? build_unloaded_pre_check (move (ts), move (sd), log_writer)
+ : build_unloaded_load (ti, move (ts), move (sd), log_writer);
}
- function<optional<string> (const string& tenant_id,
- const tenant_service&)> ci_github::
+ function<optional<string> (const string&, const tenant_service&)> ci_github::
build_unloaded_pre_check (tenant_service&& ts,
service_data&& sd,
const diag_epilogue& log_writer) const noexcept
@@ -1547,9 +1546,9 @@ namespace brep
return nullptr;
}
- function<optional<string> (const string& tenant_id,
- const tenant_service&)> ci_github::
- build_unloaded_load (tenant_service&& ts,
+ function<optional<string> (const string&, const tenant_service&)> ci_github::
+ build_unloaded_load (const string& tenant_id,
+ tenant_service&& ts,
service_data&& sd,
const diag_epilogue& log_writer) const noexcept
try
@@ -1751,15 +1750,19 @@ namespace brep
return nullptr; // Nothing to save (but potentially retry on next call).
return [&error,
+ tenant_id,
iat = move (new_iat),
cni = move (conclusion_node_id)]
- (const string& /*tenant_id*/,
+ (const string& ti,
const tenant_service& ts) -> optional<string>
{
// NOTE: this lambda may be called repeatedly (e.g., due to
// transaction being aborted) and so should not move out of its
// captures.
+ if (tenant_id != ti)
+ return nullopt; // Do nothing if the tenant has been replaced.
+
service_data sd;
try
{
@@ -1812,9 +1815,9 @@ namespace brep
// them when notifying GitHub. The first is not important (we expect the
// state to go back to building shortly). The second should normally not
// happen and would mean that a completed check suite may go back on its
- // conclusion (which would be pretty confusing for the user). @@@ This
- // can/will happen on check run rebuild. Distinguish between internal
- // and external rebuilds?
+ // conclusion (which would be pretty confusing for the user). Note that
+ // the ->queued state transition of a check run rebuild triggered by
+ // us is handled directly in handle_check_run_rerequest().
//
// So, for GitHub notifications, we only have the following linear
// transition sequence:
@@ -1891,9 +1894,8 @@ namespace brep
// if we have node_id, then we update, otherwise, we create (potentially
// overriding the check run created previously).
//
- function<optional<string> (const string& tenant_id,
- const tenant_service&)> ci_github::
- build_queued (const string& /*tenant_id*/,
+ function<optional<string> (const string&, const tenant_service&)> ci_github::
+ build_queued (const string& tenant_id,
const tenant_service& ts,
const vector<build>& builds,
optional<build_state> istate,
@@ -2019,16 +2021,20 @@ namespace brep
}
}
- return [bs = move (bs),
+ return [tenant_id,
+ bs = move (bs),
iat = move (new_iat),
crs = move (crs),
error = move (error),
- warn = move (warn)] (const string& /*tenant_id*/,
+ warn = move (warn)] (const string& ti,
const tenant_service& ts) -> optional<string>
{
// NOTE: this lambda may be called repeatedly (e.g., due to transaction
// being aborted) and so should not move out of its captures.
+ if (tenant_id != ti)
+ return nullopt; // Do nothing if the tenant has been replaced.
+
service_data sd;
try
{
@@ -2077,9 +2083,8 @@ namespace brep
return nullptr;
}
- function<optional<string> (const string& tenant_id,
- const tenant_service&)> ci_github::
- build_building (const string& /*tenant_id*/,
+ function<optional<string> (const string&, const tenant_service&)> ci_github::
+ build_building (const string& tenant_id,
const tenant_service& ts,
const build& b,
const diag_epilogue& log_writer) const noexcept
@@ -2187,15 +2192,19 @@ namespace brep
}
}
- return [iat = move (new_iat),
+ return [tenant_id,
+ iat = move (new_iat),
cr = move (*cr),
error = move (error),
- warn = move (warn)] (const string& /*tenant_id*/,
+ warn = move (warn)] (const string& ti,
const tenant_service& ts) -> optional<string>
{
// NOTE: this lambda may be called repeatedly (e.g., due to transaction
// being aborted) and so should not move out of its captures.
+ if (tenant_id != ti)
+ return nullopt; // Do nothing if the tenant has been replaced.
+
service_data sd;
try
{
@@ -2241,9 +2250,8 @@ namespace brep
return nullptr;
}
- function<optional<string> (const string& tenant_id,
- const tenant_service&)> ci_github::
- build_built (const string& /*tenant_id*/,
+ function<optional<string> (const string&, const tenant_service&)> ci_github::
+ build_built (const string& tenant_id,
const tenant_service& ts,
const build& b,
const diag_epilogue& log_writer) const noexcept
@@ -2253,9 +2261,8 @@ namespace brep
NOTIFICATION_DIAG (log_writer);
- // @@ TODO Include service_data::event_node_id and perhaps ts.id in
- // diagnostics? E.g. when failing to update check runs we print the
- // build ID only.
+ // @@ TODO Include ts.id in diagnostics? Check run build ids alone seem
+ // kind of meaningless. Log lines get pretty long this way however.
service_data sd;
try
@@ -2569,16 +2576,20 @@ namespace brep
}
}
- return [iat = move (new_iat),
+ return [tenant_id,
+ iat = move (new_iat),
cr = move (cr),
completed = completed,
error = move (error),
- warn = move (warn)] (const string& /*tenant_id*/,
+ warn = move (warn)] (const string& ti,
const tenant_service& ts) -> optional<string>
{
// NOTE: this lambda may be called repeatedly (e.g., due to transaction
// being aborted) and so should not move out of its captures.
+ if (tenant_id != ti)
+ return nullopt; // Do nothing if the tenant has been replaced.
+
service_data sd;
try
{