aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2024-11-25 12:57:26 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2024-11-25 12:57:26 +0200
commita4ffb1aea2982f6c53838fda8ed18bf0b1a6f645 (patch)
tree3ddbfdae7eff2c124a4ae6c28e23279260716364
parent78ffae24b5e134c613f54defa65128e11a26013b (diff)
-rw-r--r--mod/mod-ci-github-gq.cxx4
-rw-r--r--mod/mod-ci-github-service-data.hxx4
-rw-r--r--mod/mod-ci-github.cxx402
3 files changed, 204 insertions, 206 deletions
diff --git a/mod/mod-ci-github-gq.cxx b/mod/mod-ci-github-gq.cxx
index 3805445..10ffa78 100644
--- a/mod/mod-ci-github-gq.cxx
+++ b/mod/mod-ci-github-gq.cxx
@@ -436,7 +436,7 @@ namespace brep
ostringstream os;
- os << "mutation {" << '\n';
+ os << "mutation {" << '\n';
// Serialize a `createCheckRun` for the build.
//
@@ -472,7 +472,7 @@ namespace brep
<< " }" << '\n'
<< "}" << '\n';
- os << "}" << '\n';
+ os << "}" << '\n';
return os.str ();
}
diff --git a/mod/mod-ci-github-service-data.hxx b/mod/mod-ci-github-service-data.hxx
index 93a8895..cabd19a 100644
--- a/mod/mod-ci-github-service-data.hxx
+++ b/mod/mod-ci-github-service-data.hxx
@@ -35,8 +35,8 @@ namespace brep
optional<result_status> status; // Only if state is built & synced.
- // Never serialized. Only used by some of the GraphQL functions in
- // mod-ci-github-gq.hxx.
+ // Note: never serialized (only used to pass information to the GraphQL
+ // functions).
//
optional<string> details_url;
diff --git a/mod/mod-ci-github.cxx b/mod/mod-ci-github.cxx
index 022abac..76d6411 100644
--- a/mod/mod-ci-github.cxx
+++ b/mod/mod-ci-github.cxx
@@ -653,7 +653,8 @@ namespace brep
// Note that GitHub always posts a message to their GUI saying "You have
// successfully requested <check_run_name> be rerun", regardless of what
// HTTP status code we respond with. However we do return error status codes
- // when appropriate in case they start handling them someday.
+ // when there is no better option (like failing the conclusion) in case they
+ // start handling them someday.
//
bool ci_github::
handle_check_run_rerequest (const gh_check_run_event& cr,
@@ -663,38 +664,21 @@ namespace brep
l3 ([&]{trace << "check_run event { " << cr << " }";});
- // Get a new installation access token.
- //
- auto get_iat = [this, &trace, &error, &cr] ()
- -> optional<gh_installation_access_token>
- {
- optional<string> jwt (generate_jwt (trace, error));
- if (!jwt)
- return nullopt;
-
- optional<gh_installation_access_token> iat (
- obtain_installation_access_token (cr.installation.id,
- move (*jwt),
- error));
-
- if (iat)
- l3 ([&]{trace << "installation_access_token { " << *iat << " }";});
-
- return iat;
- };
-
// The overall plan is as follows:
//
// 1. Load service data.
//
- // 2. If the tenant is archived, then fail (re-create) conclusion with
- // appropriate diagnostics.
+ // 2. If the tenant is archived, then fail (re-create) both the check run
+ // and the conclusion with appropriate diagnostics.
//
// @@ TMP A PR might get blocked indefinitely by this. If the user
// re-runs a single CR the "re-run all check runs" option disappears
// from the UI. So the single re-run will keep failing but there
// will be no way to start a new CI from scratch.
//
+ // @@ Can we confirm that if we also fail the check run, then the
+ // re-run check suite becomes possible again?
+ //
// 3. If the check run is in the queued state, then do nothing.
//
// 4. Re-create the check run in the queued state and the conclusion in
@@ -711,12 +695,9 @@ namespace brep
//
// c. Clear the completed flag if true.
//
- // d. "Return" the service data to be used after the call.
- //
- // @@ TMP Not currently returning anything.
- //
// 6. If the result of rebuild() indicates the tenant is archived, then
- // fail (update) the conclusion check run with appropriate diagnostics.
+ // fail (update) both the check run and conclusion with appropriate
+ // diagnostics.
//
// 7. If original state is queued (no rebuild was scheduled), then fail
// (update) both the check run and the conclusion.
@@ -725,13 +706,32 @@ namespace brep
// practice we have to re-create as new check runs in order to replace the
// existing ones because GitHub does not allow transitioning out of the
// built state.
+
+ // Get a new installation access token.
//
+ auto get_iat = [this, &trace, &error, &cr] ()
+ -> optional<gh_installation_access_token>
+ {
+ optional<string> jwt (generate_jwt (trace, error));
+ if (!jwt)
+ return nullopt;
+
+ optional<gh_installation_access_token> iat (
+ obtain_installation_access_token (cr.installation.id,
+ move (*jwt),
+ error));
+
+ if (iat)
+ l3 ([&]{trace << "installation_access_token { " << *iat << " }";});
+
+ return iat;
+ };
const string& repo_node_id (cr.repository.node_id);
const string& head_sha (cr.check_run.check_suite.head_sha);
- // The build and conclusion check runs. They are sent to GitHub in a
- // single request (unless something goes wrong) so store them together
+ // Prepare the build and conclusion check runs. They are sent to GitHub in
+ // a single request (unless something goes wrong) so store them together
// from the outset.
//
vector<check_run> check_runs (2);
@@ -740,8 +740,8 @@ namespace brep
ccr.name = conclusion_check_run_name;
- // Load the service data, failing the conclusion check run if the tenant
- // has been archived.
+ // Load the service data, failing the check runs if the tenant has been
+ // archived.
//
service_data sd;
{
@@ -761,9 +761,13 @@ namespace brep
throw server_error ();
gq_built_result br (
- make_built_result (result_status::error, warning_success,
- "Unable to rebuild: tenant has been archived"));
+ make_built_result (
+ result_status::error, warning_success,
+ "Unable to rebuild individual configuration: build has "
+ "been archived"));
+ // @@ Let's fail both check runs.
+ //
if (gq_create_check_run (error, ccr, iat->token,
repo_node_id, head_sha,
nullopt /* details_url */,
@@ -795,10 +799,9 @@ namespace brep
{
// No such tenant.
//
- error << "check run " << cr.check_run.node_id
- << " re-requested but tenant_service with id " << sid
- << " does not exist";
- return true;
+ fail << "check run " << cr.check_run.node_id
+ << " re-requested but tenant_service with id " << sid
+ << " does not exist";
}
}
@@ -821,11 +824,10 @@ namespace brep
//
if (cr.check_run.name == conclusion_check_run_name)
{
- // Must exist if it can be re-requested.
- //
- assert (sd.conclusion_node_id);
+ l3 ([&]{trace << "re-requested conclusion check_run";});
- l3 ([&]{trace << "ignoring re-requested conclusion check_run";});
+ if (!sd.conclusion_node_id)
+ fail << "no conclusion node id for check run " << cr.check_run.node_id;
gq_built_result br (
make_built_result (result_status::error, warning_success,
@@ -864,44 +866,50 @@ namespace brep
<< ": failed to extract build id from details_url";
}
- // Initialize `bcr` with the check run from the service data.
+ // Initialize the check run (`bcr`) with state from the service data.
//
{
// Search for the check run in the service data.
//
bool found (false);
- for (const check_run& scr: sd.check_runs)
- {
- if (scr.node_id && *scr.node_id == cr.check_run.node_id)
- {
- found = true;
-
- // Do nothing if the build is already queued.
- //
- if (scr.state == build_state::queued)
- {
- l3 ([&]{trace << "ignoring already-queued check run";});
- return true;
- }
- bcr.name = scr.name;
- bcr.build_id = scr.build_id;
- bcr.state = scr.state;
+ // Note that we look by name in case node id got replace by a racing
+ // re-request (in which case we ignore this request).
+ //
+ auto i (find_if (sd.check_runs.begin (), sd.check_runs.end (),
+ [&sd] (const check_run& scr)
+ {
+ // @@ TODO
+ }));
+
+ if (i == sd.check_runs.end ())
+ fail << "check_run " << cr.check_run.node_id // @@ name?
+ << ": re-requested but does not exist in service data";
- break;
- }
+ // Do nothing if node ids don't match.
+ //
+ if (i->node_id && *i->node_id != cr.check_run.node_id)
+ {
+ // @@ trace
+ return true;
}
- if (!found)
+ // Do nothing if the build is already queued.
+ //
+ if (i->state == build_state::queued)
{
- fail << "check_run " << cr.check_run.node_id
- << ": re-requested but does not exist in service data";
+ l3 ([&]{trace << "ignoring already-queued check run";});
+ return true;
}
+
+ bcr.name = i->name;
+ bcr.build_id = i->build_id;
+ bcr.state = i->state;
}
// Transition the build and conclusion check runs out of the built state
- // by re-creating them.
+ // (or any other state) by re-creating them.
//
bcr.state = build_state::queued;
bcr.state_synced = false;
@@ -925,51 +933,59 @@ namespace brep
<< ": unable to re-create build and conclusion check runs";
}
- // Request the rebuild.
+ // Request the rebuild and update service data.
- // Function called by rebuild() to update the service data (but only if
- // the build is actually restarted).
+ // Callback function called by rebuild() to update the service data (but
+ // only if the build is actually restarted).
//
auto update_sd = [&error, &new_iat,
&cr, &bcr, &ccr] (const tenant_service& ts,
build_state) -> optional<string>
- {
- // NOTE: this lambda may be called repeatedly (e.g., due to transaction
- // being aborted) and so should not move out of its captures.
+ {
+ // NOTE: this lambda may be called repeatedly (e.g., due to transaction
+ // being aborted) and so should not move out of its captures.
- service_data sd;
- try
- {
- sd = service_data (*ts.data);
- }
- catch (const invalid_argument& e)
- {
- error << "failed to parse service data: " << e;
- return nullopt;
- }
+ service_data sd;
+ try
+ {
+ sd = service_data (*ts.data);
+ }
+ catch (const invalid_argument& e)
+ {
+ error << "failed to parse service data: " << e;
+ return nullopt;
+ }
- for (check_run& scr: sd.check_runs)
- {
- if (scr.node_id && *scr.node_id == cr.check_run.node_id)
- {
- scr = bcr; // Update with new node_id, state, state_synced.
- sd.conclusion_node_id = ccr.node_id;
+ // Note that we again look by name in case node id got replace by a
+ // racing re-request. In this case, however, it's impossible to decide
+ // who won that race, so let's assume it's us since we are called last.
+ //
+ auto i (find_if (
+ sd.check_runs.begin (), sd.check_runs.end (),
+ [&cr] (const check_run& scr)
+ {
+ // @@ TODO
+ }));
- sd.completed = false;
+ if (i == sd.check_runs.end ())
+ {
+ error << "check_run " << cr.check_run.node_id // @@ name?
+ << ": re-requested but does not exist in service data";
+ return nullopt;
+ }
- // Save the IAT if we created a new one.
- //
- if (new_iat)
- sd.installation_access = *new_iat;
+ *i = bcr; // Update with new node_id, state, state_synced.
- return sd.json ();
- }
- }
+ sd.conclusion_node_id = ccr.node_id;
+ sd.completed = false;
- assert (false); // Check run must exist because we found it earlier.
+ // Save the IAT if we created a new one.
+ //
+ if (new_iat)
+ sd.installation_access = *new_iat;
- return nullopt;
- };
+ return sd.json ();
+ };
optional<build_state> bs (rebuild (*build_db_, retry_, *bid, update_sd));
@@ -978,108 +994,86 @@ namespace brep
// conclusion check run. Otherwise the build has been successfully
// re-enqueued so do nothing further.
//
- if (!bs || *bs == build_state::queued)
- {
- // @@ TMP Diverging from the plan here to fail both CRs in the
- // already-archived case as well. Seems better to not leave the
- // build CR in the queued state.
- //
- gq_built_result bbr; // Built result for the build check run.
- gq_built_result cbr; // Built result for the conclusion check run.
-
- if (!bs) // Archived
- {
- // The build has expired since we loaded the service data. Most likely
- // the tenant has been archived.
- //
- bbr = make_built_result (
- result_status::error, warning_success,
- "Unable to rebuild: tenant has been archived or no such build");
-
- cbr = bbr;
- }
- else // *bs == build_state::queued
- {
- // This build has been re-enqueued since we first loaded the service
- // data. This could happen if the user clicked "re-run" multiple times
- // and another handler won the rebuild() race.
- //
- // However the winner of the check runs race cannot be determined.
- //
- // Best case the other handler won the check runs race as well and
- // thus everything will proceed normally. Our check runs will be
- // invisible and disregarded.
- //
- // Worst case we won the check runs race and the other handler's check
- // runs -- the ones that will be updated by the build_*()
- // notifications -- are no longer visible, leaving things quite
- // broken.
- //
- // Either way, we fail our check runs. In the best case scenario it
- // will have no effect; in the worst case scenario it lets the user
- // know something has gone wrong.
- //
- // @@ TMP In an attempt to recover, the user might end up clicking
- // re-run a few times but nothing will change in the UI because the
- // new check runs will only be created once the re-enqueued build
- // has entered the building state.
- //
- bbr = make_built_result (
- result_status::error, warning_success,
- "Unable to rebuild at this time, please try again in a few minutes");
+ if (bs && *bs != build_state::queued)
+ return true;
- cbr = make_built_result (result_status::error, warning_success,
- "Failed to rebuild check run(s)");
- }
+ gq_built_result bbr; // Built result for the build check run.
+ gq_built_result cbr; // Built result for the conclusion check run.
- // True if we successfully updated both of the check runs.
+ if (!bs) // Archived
+ {
+ // The build has expired since we loaded the service data. Most likely
+ // the tenant has been archived.
//
- bool bu (true); // Both updated
-
- // Fail the build check run.
+ bbr = cbr = make_built_result (
+ result_status::error, warning_success,
+ "Unable to rebuild individual configuration: build has been archived");
+ }
+ else // Re-enqueued.
+ {
+ // This build has been re-enqueued since we first loaded the service
+ // data. This could happen if the user clicked "re-run" multiple times
+ // and another handler won the rebuild() race.
//
- if (gq_update_check_run (error, bcr, iat->token,
- repo_node_id, *bcr.node_id,
- nullopt /* details_url */,
- build_state::built, move (bbr)))
- {
- l3 ([&]{trace << "updated check_run { " << bcr << " }";});
- }
- else
- {
- bu = false;
-
- error << "check run " << cr.check_run.node_id
- << ": unable to update (replacement) check run "
- << *bcr.node_id;
- }
-
- // Fail the conclusion check run.
+ // However the winner of the check runs race cannot be determined.
//
- if (gq_update_check_run (error, ccr, iat->token,
- repo_node_id, *ccr.node_id,
- nullopt /* details_url */,
- build_state::built, move (cbr)))
- {
- l3 ([&]{trace << "updated conclusion check_run { " << ccr << " }";});
- }
- else
- {
- bu = false;
+ // Best case the other handler won the check runs race as well and
+ // thus everything will proceed normally. Our check runs will be
+ // invisible and disregarded.
+ //
+ // Worst case we won the check runs race and the other handler's check
+ // runs -- the ones that will be updated by the build_*() notifications
+ // -- are no longer visible, leaving things quite broken.
+ //
+ // Either way, we fail our check runs. In the best case scenario it
+ // will have no effect; in the worst case scenario it lets the user
+ // know something has gone wrong.
+ //
+ bbr = cbr = make_built_result (result_status::error, warning_success,
+ "Unable to rebuild, try again");
+ }
- error << "check run " << cr.check_run.node_id
- << ": unable to update conclusion check run " << *ccr.node_id;
- }
+ // Try to update the conclusion check run even if the first update fails.
+ //
+ bool f (false); // Failed.
- // Fail the handler if either of the check runs could not be updated.
- //
- if (!bu)
- throw server_error ();
+ // Fail the build check run.
+ //
+ if (gq_update_check_run (error, bcr, iat->token,
+ repo_node_id, *bcr.node_id,
+ nullopt /* details_url */,
+ build_state::built, move (bbr)))
+ {
+ l3 ([&]{trace << "updated check_run { " << bcr << " }";});
+ }
+ else
+ {
+ error << "check run " << cr.check_run.node_id
+ << ": unable to update (replacement) check run "
+ << *bcr.node_id;
+ f = true;
+ }
- return true;
+ // Fail the conclusion check run.
+ //
+ if (gq_update_check_run (error, ccr, iat->token,
+ repo_node_id, *ccr.node_id,
+ nullopt /* details_url */,
+ build_state::built, move (cbr)))
+ {
+ l3 ([&]{trace << "updated conclusion check_run { " << ccr << " }";});
+ }
+ else
+ {
+ error << "check run " << cr.check_run.node_id
+ << ": unable to update conclusion check run " << *ccr.node_id;
+ f = true;
}
- // The build has successfully been re-enqueued.
+ // Fail the handler if either of the check runs could not be updated.
+ //
+ if (f)
+ throw server_error ();
return true;
}
@@ -2713,22 +2707,22 @@ namespace brep
url u (details_url);
- if (!u.query || !u.path || u.path->size () <= 1)
- return nullopt;
-
build_id r;
// Extract the tenant from the URL path.
//
// Example path: @d2586f57-21dc-40b7-beb2-6517ad7917dd
//
- r.package.tenant = u.path->substr (1);
-
- if (r.package.tenant.empty ())
+ if (!u.path || u.path->size () != 37 || (*u.path)[0] != '@')
return nullopt;
+ r.package.tenant = u.path->substr (1);
+
// Extract the rest of the build_id members from the URL query.
//
+ if (!u.query)
+ return nullopt;
+
bool pn (false), pv (false), tg (false), tc (false), pc (false),
th (false);
@@ -2747,18 +2741,22 @@ namespace brep
++vp; // Skip '='
- const char* ve (ep ? ep : vp + strlen (vp)); // Value end pointer.
+ const char* ve (ep != nullptr ? ep : vp + strlen (vp)); // Value end.
// Get the value as-is or URL-decode it.
//
- auto getval = [vp, ve] () { return string (vp, ve); };
+ auto rawval = [vp, ve] () { return string (vp, ve); };
auto decval = [vp, ve] () { return mime_url_decode (vp, ve); };
auto make_version = [] (string&& v)
- { return canonical_version (brep::version (move (v))); };
+ {
+ return canonical_version (brep::version (move (v)));
+ };
auto c = [&n] (bool& b, const char* s)
- { return n == s ? (b = true) : false; };
+ {
+ return n == s ? (b = true) : false;
+ };
if (c (pn, "builds")) r.package.name = package_name (decval ());
else if (c (pv, "pv")) r.package.version = make_version (getval ());
@@ -2781,7 +2779,7 @@ namespace brep
r.toolchain_version = make_version (v.substr (p + 1));
}
- qp = ep ? ep + 1 : nullptr;
+ qp = ep != nullptr ? ep + 1 : nullptr;
}
if (!pn || !pv || !tg || !tc || !pc || !th)
@@ -2789,7 +2787,7 @@ namespace brep
return r;
}
- catch (const invalid_argument&)
+ catch (const invalid_argument&) // Invalid url, brep::version, etc.
{
return nullopt;
}