diff options
author | Boris Kolpackov <boris@codesynthesis.com> | 2017-05-24 15:51:50 +0200 |
---|---|---|
committer | Boris Kolpackov <boris@codesynthesis.com> | 2017-05-24 15:51:50 +0200 |
commit | 45c9fbacc4b21efd54173bad11998ceffd175676 (patch) | |
tree | efb8876ca8ee7554d94140aa1791991b104a4b49 | |
parent | 49cce16baa7ec26af089a50c9bcb6d2eebbcaf2a (diff) |
Improve bbot diagnostics email
-rwxr-xr-x | buildos | 86 |
1 files changed, 63 insertions, 23 deletions
@@ -976,6 +976,9 @@ EOF # (not even the cursor) if there are no new entries. The new # versions output the old cursor. # + # Plus, it sometimes changes the cursor even without any errors in + # it (journal rewind/truncation maybe?) so we have to detect that. + # c=(sudo journalctl --unit "bbot-agent@$tn") # Get the last cursor if any. @@ -985,36 +988,73 @@ EOF c+=("--after-cursor" "$oc") fi - # Get the log range: the first line is the date of the first error - # and the second line is the end cursor. + # Get the "log range": the first line is the date of the first + # error, the second line is the date of the last error, and the + # third line is the end cursor. It can also be just one line in + # which case it is the new cursor (that rewind stuff). + # + # Here is what's going on in that sed script: + # + # The first chunk matches the first line. We first put it into + # the hold space (in case that's the only line) and then extract + # and print the date. + # + # The second chunk matches the last line. We first handle the hold + # space which by now should contain the last error line and then + # the cursor. + # + # The last chunk matches every other line. We simply replace the + # hold space with the next line so that at the end we have the + # last line there. # lr="$("${c[@]}" --no-pager --quiet --output short-full \ ---priority 4 --show-cursor | \ -sed -n -re '1s/^... ([^ ]+ [^ ]+) .*$/\1/p;s/^-- cursor: (.+)$/\1/p')" - - nc="$(sed -n -e '2p' <<<"$lr")" +--priority 4 --show-cursor | sed -n -r \ +-e '1{h;s/^[MTWFS].. ([^ ]+ [^ ]+) .*$/\1/p;t}' \ +-e '${x;s/^[MTWFS].. ([^ ]+ [^ ]+) .*$/\1/p;x;s/^-- cursor: (.+)$/\1/p;t}' \ +-e 'h')" + lc="$(wc -l <<<"$lr")" + nc="$(sed -n -e "${lc}p" <<<"$lr")" # If we have no new entries, then nothing to do. # - if [ -n "$nc" -a "$nc" != "$oc" ]; then + if [ "$nc" != "$oc" ]; then - # Try to get some context for the first error entry. This is - # unexpectedly hard in systemd. So we are going to get all the - # entries that start a minute ago. + # We may have no actual entries (cursor rewind). # - sd="$(sed -n -e '1p' <<<"$lr")" - sd="$(date '+%Y-%m-%d %H:%M:%S' -d "$sd +00:01")" - - s="bbot-agent@$tn service issued new diagnostics" - - info "$s" - { - echo "$tn.bbot_cmd: ssh build@$hname ${c[@]}"; - echo; - echo; - "${c[@]}" --no-pager --quiet --output short-full \ - --since "$sd" --lines 100 - } | email "$s" + if [ "$lc" -ne 1 ]; then + + # Try to get some context before the first error and after the + # last. This is unexpectedly hard in systemd. + # + # This can be a lot of output which makes it hard to spot the + # error so we are going to print just the error summary first. + # Quite a mess, I agree. + # + sd="$(sed -n -e '1p' <<<"$lr")" + sd="$(date '+%s' -d "$sd")" # sec + sd="$(date '+%Y-%m-%d %H:%M:%S' -d "@$(($sd - 10))")" # -10sec + + ed="$(sed -n -e '2p' <<<"$lr")" + ed="$(date '+%s' -d "$ed")" # sec + ed="$(date '+%Y-%m-%d %H:%M:%S' -d "@$(($ed + 10))")" # +10sec + + s="bbot-agent@$tn service issued new diagnostics" + + info "$s" + { + echo "$tn.bbot_cmd: ssh build@$hname ${c[@]}"; + echo; + echo "summary:"; + echo; + "${c[@]}" --no-pager --quiet --output short-full \ + --priority 4 | head -n 200; + echo; + echo "context [$sd -- $ed]:"; + echo; + "${c[@]}" --no-pager --quiet --output short-full \ + --since "$sd" --until "$ed" | head -n 200 + } | email "$s" + fi toolchain_cursors["$tn"]="$nc" fi |