diff options
author | Boris Kolpackov <boris@codesynthesis.com> | 2017-04-06 12:02:08 +0200 |
---|---|---|
committer | Boris Kolpackov <boris@codesynthesis.com> | 2017-04-06 12:02:08 +0200 |
commit | 6af0f124675b6741dc8f683df902628dfc7e8eb7 (patch) | |
tree | 8dfb973f5f4f756bd471844e01eb8c7a595987e7 | |
parent | 4fb0c6eacbfcee14b36d09d3f89f665bd2c70f36 (diff) |
Implement bbot agent monitoring
-rwxr-xr-x | buildos | 42 |
1 files changed, 28 insertions, 14 deletions
@@ -640,9 +640,10 @@ function bbot_start () # <toolchain-name> info "failed to stop bbot-agent@$tn service, assuming not running" fi + # We may not be able to uninstall if we previously failed to build. + # if ! bpkg uninstall "${vars[@]}" bbot; then - info "failed to uninstall bbot agent" - break + info "failed to uninstall bbot agent, assuming not installed" fi fi @@ -692,19 +693,14 @@ function bbot_start () # <toolchain-name> fi # Start the service. With Type=simple start returns as soon as the process - # has forked. To see if the service actually started we wait a bit and - # check with status. + # has forked. To see if the service actually started is done as part of + # service monitoring. # if ! sudo systemctl start "bbot-agent@$tn"; then info "failed to start bbot-agent@$tn service" break fi - if ! sudo systemctl status "bbot-agent@$tn"; then - info "failed to start bbot-agent@$tn service" - break - fi - r=0 break done @@ -779,7 +775,7 @@ EOF # info "bootstrapping $tn toolchain..." - toolchain_bootstrap "$tn" 2>&1 | tee "$tr/bootstrap-$count.log" 1>&2 + toolchain_bootstrap "$tn" 2>&1 | tee "$tr/toolchain-$count.log" 1>&2 if [ "${PIPESTATUS[0]}" -eq 0 ]; then v="$(cat $tr/version)" @@ -794,7 +790,7 @@ EOF info "$s" email "$s" <<EOF -$tn.bootstrap_log: tftp://$hname/toolchain/$tn/bootstrap-$count.log +$tn.toolchain_log: tftp://$hname/toolchain/$tn/toolchain-$count.log EOF fi else @@ -804,7 +800,7 @@ EOF done # If we have boostrapped all the toolchains, (re)build and (re)start their - # bbot agents. + # bbot agents and then monitor them. # if [ "${#toolchain_names[@]}" -eq "${#toolchain_boots[@]}" ]; then @@ -822,7 +818,25 @@ EOF case "${PIPESTATUS[0]}" in 0) rm -f "$tr/bbot-$count.log" - continue # Nothing to do. + + # Check if the service has failed. + # + if sudo systemctl is-failed --quiet "bbot-agent@$tn"; then + s="bbot-agent@$tn service has failed, stopping" + + # Note: ignore errors. + # + sudo systemctl status "bbot-agent@$tn" 2>&1 | \ + tee "$tr/bbot-$count.log" 1>&2 + + # Reset it so that we don't keep sending the log on each + # iteration. Note: ignore errors. + # + sudo systemctl reset-failed "bbot-agent@$tn" 2>&1 | \ + tee -a "$tr/bbot-$count.log" 1>&2 + else + continue # Nothing to do. + fi ;; 1) s="re" @@ -847,7 +861,7 @@ EOF info "$s" email "$s" <<EOF -$tn.start_log: tftp://$hname/toolchain/$tn/bbot-$count.log +$tn.bbot_log: tftp://$hname/toolchain/$tn/bbot-$count.log EOF done fi |