diff options
author | Boris Kolpackov <boris@codesynthesis.com> | 2017-04-04 14:03:28 +0200 |
---|---|---|
committer | Boris Kolpackov <boris@codesynthesis.com> | 2017-04-04 14:03:28 +0200 |
commit | 1fbd27f288cf31625cff567788aaa0be66caffbd (patch) | |
tree | 82436078a7d247f798fa4b2029a3a3dcf6cabe32 | |
parent | e3889e5ccde6ef7b8a1377ced008252c3a13831b (diff) |
Implement bbot agent startup and monitoring
-rwxr-xr-x | bootstrap | 4 | ||||
-rwxr-xr-x | buildos | 569 | ||||
-rw-r--r-- | doc/manual.cli | 16 |
3 files changed, 400 insertions, 189 deletions
@@ -153,7 +153,7 @@ if [ "$stage" -eq "1" ]; then pkgs+=",tftp-hpa,tftpd-hpa" pkgs+=",bzip2,xz-utils" - pkgs+=",less" + pkgs+=",less,nano" pkgs+=",g++,pkg-config" @@ -428,7 +428,7 @@ fi # To test PXE boot, replace -kernel/-initrd/-append with '-boot n'. # sudo kvm \ - -m 8G \ + -m 16G \ -cpu host -smp "sockets=1,cores=4,threads=2" \ -device "e1000,netdev=net0,mac=$macaddr" \ -netdev "tap,id=net0,script=./qemu-ifup" \ @@ -95,7 +95,7 @@ function restart () sudo systemctl reboot } -# Toolchain-related funtions. +# Process toolchains. # # Return the value of one of the toolchain_* variables for this toolchain. @@ -106,172 +106,74 @@ function tc_value () # <toolchain-prefix> <variable> echo "${!n}" } -# Calculate the file checksum using the shaNNNsum utility. -# -function tc_checksum () # <toolchain-prefix> <file> -{ - "$(tc_value "$1" toolchain_csum)sum" -b "$2" | \ - sed -n -re 's/^([^ ]+) .+$/\1/p' -} - -# Fetch a file from the sums file into $toolchain_root, verify its checksum, -# and make a predictable name (without version) symlink. -# -function tc_fetch () # <toolchain-prefix> <line> -{ - local s p f u l tp tu tr tv - - tp="$1" +toolchain_names=() +for tn in "${!toolchains[@]}"; do + tp="${toolchains["$tn"]}" tu="$(tc_value "$tp" toolchain_url)" - tr="$(tc_value "$tp" toolchain_root)" - s="$(sed -n -re 's/^([^ ]+) .+$/\1/p' <<<"$2")" # Checksum. - p="$(sed -n -re 's/^[^ ]+ \*([^ ]+)$/\1/p' <<<"$2")" # File path (relative). - f="$(sed -n -re 's%^(.+/)?([^/]+)$%\2%p' <<<"$p")" # File name. - u="$(sed -n -re 's%^(.+)/[^/]+$%\1%p' <<<"$tu")/$p" # File URL. - - if [ -z "$s" -o -z "$p" -o -z "$f" -o -z "$u" ]; then - info "invalid sum line '$2'" - return 1 - fi - - # Extract the version. - # - tv="$(tc_value "$tp" toolchain_ver)" - - if [ -z "$tv" ]; then - tv="$(sed -n -re 's/build2-toolchain-(.+)\.tar.*/\1/p' <<<"$f")" - - if [ -z "$tv" ]; then - info "unable to extract toolchain version from '$f'" - return 1 - fi - - declare -g "${tp}toolchain_ver=$tv" - - info "toolchain version $tv" - echo "$tv" >"$tr/version" + if [ -z "$tu" ]; then + continue fi - # Derive a predictable name link. - # - l="$(sed -n -re "s/^(.+)-$tv(.*)$/\1\2/p" <<<"$f")" - - if [ -z "$l" ]; then - info "unable to derive predicatable name from '$f' and '$tv'" - return 1 - fi + toolchain_names+=("$tn") - # Fetch the file. + # The toolchain "sums" file (a list of SHA sums and relative file names, as + # produced by shaNNNsum). The first entry should always be build2-toolchain + # tar archive itself (which we use to figure out the version). Blank lines + # and lines that start with '#' are ignored. # - info "fetching $u [$l]" + tf="$(sed -n -re 's%^.+/([^/]+)$%\1%p' <<<"$tu")" - if ! curl -f -L -s -S -o "$tr/$f" "$u"; then - info "unable to fetch $u" - return 1 - fi + declare "${tp}toolchain_file=$tf" + declare "${tp}toolchain_csum=$(sed -n -re 's%^.+\.([^.]+)$%\1%p' <<<"$tf")" + declare "${tp}toolchain_root=/build/tftp/toolchain/$tn" + declare "${tp}toolchain_ver=" - # Verify the checksum. + # If buildos.toolchain_trust was not specified, set it to "no" so that + # we don't prompt if the repository happens to be signed. # - info "verifying checksum for $f" - - local cs - cs="$(tc_checksum "$tp" "$tr/$f")" - - if [ "$cs" != "$s" ]; then - info "checksum mismatch for $u" - info " expected: $s" - info " calculated: $cs" - return 1 + if [ -z "$(tc_value "$tp" toolchain_trust)" ]; then + declare "${tp}toolchain_trust=no" fi +done - # Make the link. - # - ln -s "$f" "$tr/$l" -} - -# Bootstrap the toolchain. +# Divide CPUs and memory (in kB) among the toolchains. # -function tc_bootstrap () # <toolchain-name> -{ - local tn="$1" - local tp="${toolchains["$tn"]}" - local tr="$(tc_value "$tp" toolchain_root)" - local tf="$(tc_value "$tp" toolchain_file)" - - # Fetch files according to the sums file. Skip empty lines and those that - # start with '#'. - # - local l ls=() - - readarray -t ls < <(sed -e '/^\s*#/d;/^\s*$/d' "$tr/$tf") - - for l in "${ls[@]}"; do - if ! tc_fetch "$tp" "$l"; then - return 1 # Diagnostics has already been issued. - fi - done - - local tv="$(tc_value "$tp" toolchain_ver)" # Should be set by tc_fetch(). - local tt="$(tc_value "$tp" toolchain_trust)" - - # Bootstrap in /tmp/toolchains/$tn/, install to /build/toolchains/$tn/. - # - local wd="/tmp/toolchains/$tn" - local id="/build/toolchains/$tn" - - mkdir -p "$wd" - mkdir -p "$id" - - local r=1 - - cd "$wd" - while true; do # The "breakout loop". - - # Extract the toolchain. - # - if ! tar -xf "$tr/build2-toolchain.tar.xz"; then - info "unable to extract $tr/build2-toolchain.tar.xz" - break - fi - - cd "build2-toolchain-$tv" - - # Bootstrap, stage, and install using the provided build.sh script. - # - if ! ./build.sh --install-dir "$id" --trust "$tt" g++; then - info "failed to build $(pwd)" - break - fi +# Reserve 4G of RAM for ourselves (rootfs, tmpfs). +# +mem_total="$(sed -n -re 's/^MemTotal: *([0-9]+) *kB$/\1/p' </proc/meminfo)" +cpu_total="$(lscpu | sed -n -re 's/^CPU\(s\): *([0-9]+)$/\1/p')" - r=0 - break - done - cd "$owd" +mem_slice=$(("$mem_total" - 4 * 1024 * 1024)) +cpu_slice="$cpu_total" - # Clean up. - # - rm -r "$wd" +if [ "${#toolchain_names[@]}" -gt 1 ]; then + mem_slice=$(("$mem_slice" / "${#toolchain_names[@]}")) + cpu_slice=$(("$cpu_slice" / "${#toolchain_names[@]}")) - return "$r" -} + if [ "$cpu_slice" -eq 0 ]; then + cpu_slice=1 + fi +fi # Print monitor configuration as email body. # function print () { + echo "cpu_total: $cpu_total" + echo "cpu_slice: $cpu_slice" + + echo "mem_total: $mem_total kB" + echo "mem_slice: $mem_slice kB" + echo + echo "buildid: $buildid" echo "buildid_url: $buildid_url" echo - for tn in "${!toolchains[@]}"; do + for tn in "${toolchain_names[@]}"; do tp="${toolchains["$tn"]}" tu="$(tc_value "$tp" toolchain_url)" - - if [ -z "$tu" ]; then - continue - fi - tt="$(tc_value "$tp" toolchain_trust)" echo "$tn.toolchain_url: $tu" @@ -286,38 +188,7 @@ if [ -z "$buildid_url" ]; then info "no buildos.buildid_url specified, not monitoring for new os builds" fi -tc= -for tn in "${!toolchains[@]}"; do - tp="${toolchains["$tn"]}" - tu="$(tc_value "$tp" toolchain_url)" - - if [ -z "$tu" ]; then - continue - fi - - tc="true" - - # The toolchain "sums" file (a list of SHA sums and relative file names, as - # produced by shaNNNsum). The first entry should always be build2-toolchain - # tar archive itself (which we use to figure out the version). Blank lines - # and lines that start with '#' are ignored. - # - tf="$(sed -n -re 's%^.+/([^/]+)$%\1%p' <<<"$tu")" - - declare "${tp}toolchain_file=$tf" - declare "${tp}toolchain_csum=$(sed -n -re 's%^.+\.([^.]+)$%\1%p' <<<"$tf")" - declare "${tp}toolchain_root=/build/tftp/toolchains/$tn" - declare "${tp}toolchain_ver=" - - # If buildos.toolchain_trust was not specified, set it to "no" so that - # we don't prompt if the repository happens to be signed. - # - if [ -z "$(tc_value "$tp" toolchain_trust)" ]; then - declare "${tp}toolchain_trust=no" - fi -done - -if [ -z "$tc" ]; then +if [ "${#toolchain_names[@]}" -eq 0 ]; then info "no buildos.toolchain_url specified, not bootstrapping" fi @@ -400,7 +271,7 @@ for v in /build/machines/*; do # <name>-<toolchain> (bootstrapped image) # f= - for tn in "${!toolchains[@]}"; do + for tn in "${toolchain_names[@]}"; do if [[ "$s" =~ ^"$m"-"$tn"$ ]]; then f="true" break @@ -471,20 +342,303 @@ if [ "${#diag[@]}" -gt 0 ]; then fi fi +# Toolchain-related funtions. +# + +# Calculate the file checksum using the shaNNNsum utility. +# +function tc_checksum () # <toolchain-prefix> <file> +{ + "$(tc_value "$1" toolchain_csum)sum" -b "$2" | \ + sed -n -re 's/^([^ ]+) .+$/\1/p' +} + +# Fetch a file from the sums file into $toolchain_root, verify its checksum, +# and make a predictable name (without version) symlink. +# +function tc_fetch () # <toolchain-prefix> <line> +{ + local s p f u l tp tu tr tv + + tp="$1" + tu="$(tc_value "$tp" toolchain_url)" + tr="$(tc_value "$tp" toolchain_root)" + + s="$(sed -n -re 's/^([^ ]+) .+$/\1/p' <<<"$2")" # Checksum. + p="$(sed -n -re 's/^[^ ]+ \*([^ ]+)$/\1/p' <<<"$2")" # File path (relative). + f="$(sed -n -re 's%^(.+/)?([^/]+)$%\2%p' <<<"$p")" # File name. + u="$(sed -n -re 's%^(.+)/[^/]+$%\1%p' <<<"$tu")/$p" # File URL. + + if [ -z "$s" -o -z "$p" -o -z "$f" -o -z "$u" ]; then + info "invalid sum line '$2'" + return 1 + fi + + # Extract the version. + # + tv="$(tc_value "$tp" toolchain_ver)" + + if [ -z "$tv" ]; then + tv="$(sed -n -re 's/build2-toolchain-(.+)\.tar.*/\1/p' <<<"$f")" + + if [ -z "$tv" ]; then + info "unable to extract toolchain version from '$f'" + return 1 + fi + + declare -g "${tp}toolchain_ver=$tv" + + info "toolchain version $tv" + echo "$tv" >"$tr/version" + fi + + # Derive a predictable name link. + # + l="$(sed -n -re "s/^(.+)-$tv(.*)$/\1\2/p" <<<"$f")" + + if [ -z "$l" ]; then + info "unable to derive predicatable name from '$f' and '$tv'" + return 1 + fi + + # Fetch the file. + # + info "fetching $u [$l]" + + if ! curl -f -L -s -S -o "$tr/$f" "$u"; then + info "unable to fetch $u" + return 1 + fi + + # Verify the checksum. + # + info "verifying checksum for $f" + + local cs + cs="$(tc_checksum "$tp" "$tr/$f")" + + if [ "$cs" != "$s" ]; then + info "checksum mismatch for $u" + info " expected: $s" + info " calculated: $cs" + return 1 + fi + + # Make the link. + # + ln -s "$f" "$tr/$l" +} + +# Bootstrap the toolchain. +# +function tc_bootstrap () # <toolchain-name> +{ + local tn="$1" + local tp="${toolchains["$tn"]}" + local tr="$(tc_value "$tp" toolchain_root)" + local tf="$(tc_value "$tp" toolchain_file)" + + # Fetch files according to the sums file. Skip empty lines and those that + # start with '#'. + # + local l ls=() + + readarray -t ls < <(sed -e '/^\s*#/d;/^\s*$/d' "$tr/$tf") + + for l in "${ls[@]}"; do + if ! tc_fetch "$tp" "$l"; then + return 1 # Diagnostics has already been issued. + fi + done + + local tv="$(tc_value "$tp" toolchain_ver)" # Should be set by tc_fetch(). + local tt="$(tc_value "$tp" toolchain_trust)" + + # Bootstrap in /tmp/toolchain/$tn/, install to /build/toolchain/$tn/. + # + local wd="/tmp/toolchain/$tn" + local id="/build/toolchain/$tn" + + mkdir -p "$wd" + mkdir -p "$id" + + local r=1 + + cd "$wd" + while true; do # The "breakout loop". + + # Extract the toolchain. + # + if ! tar -xf "$tr/build2-toolchain.tar.xz"; then + info "unable to extract $tr/build2-toolchain.tar.xz" + break + fi + + cd "build2-toolchain-$tv" + + # Bootstrap, stage, and install using the provided build.sh script. + # + if ! ./build.sh --install-dir "$id" --trust "$tt" g++; then + info "failed to build $(pwd)" + break + fi + + cd "$wd" + rm -r "build2-toolchain-$tv" + mv -T build2-toolchain-* build2-toolchain # Strip version. + + r=0 + break + done + cd "$owd" + + return "$r" +} + +# Check if we need to build/start or rebuild/restart the bbot agent. Return +# 0 if nothing to do, 1 for upgrades, 2 for first build, and 3 for failure. +# +function bb_check () # <toolchain-name> +{ + local tn="$1" + + export PATH="/build/toolchain/$tn/bin:$PATH" # Running in subshell. + + cd "/tmp/toolchain/$tn/build2-toolchain" + + local r=3 + + local l_stat b_stat + while true; do # The "breakout loop". + + l_stat="$(bpkg status libbbot)" + b_stat="$(bpkg status bbot)" + + if ! bpkg fetch -q; then + info "failed to fetch package information" + break + fi + + # See if this is the first time or if we need to upgrade. + # + if [ "$(cut -d ' ' -f 1 <<<"$b_stat")" = "configured" ]; then + + # We assume that if anything has changed in the status line, then we + # have a new version. + # + if [ "$b_stat" = "$(bpkg status bbot)" -a \ + "$l_stat" = "$(bpkg status libbbot)" ]; then + r=0 + break + fi + + r=1 + break + fi + + r=2 + break + done + cd "$owd" + + return "$r" +} + +# Build and start bbot agent using the bpkg configuration created by +# tc_bootstrap(). +# +function bb_start () # <toolchain-name> +{ + local tn="$1" + + local id="/build/bbot/$tn" + mkdir -p "$id" + + # Install/uninstall vars. + # + local vars=(config.install.root="$id" config.bin.rpath="$id/lib") + + export PATH="/build/toolchain/$tn/bin:$PATH" # Running in subshell. + + cd "/tmp/toolchain/$tn/build2-toolchain" + + local r=1 + + local b_word + while true; do # The "breakout loop". + + b_word="$(bpkg status bbot | cut -d ' ' -f 1)" + + # If upgrading, stop the service and uninstall. + # + if [ "$b_word" = "configured" ]; then + + if ! sudo systemctl stop "bbot-agent@$tn"; then + info "failed to stop bbot-agent@$tn service, assuming not running" + fi + + if ! bpkg uninstall "${vars[@]}" bbot; then + info "failed to uninstall bbot agent" + break + fi + fi + + # Build and install the bbot agent. + # + if ! bpkg build --build-option --jobs --build-option "$cpu_slice" \ + --yes libbbot bbot; then + info "failed to build bbot agent" + break + fi + + if ! bpkg install "${vars[@]}" bbot; then + info "failed to install bbot agent" + break + fi + + # Post-process and install systemd .service file. Note that we cannot use + # the systemd pattern machinery since each version of bbot can have its + # own version of the .service file. + # + sed -i -re "s/%[iI]/$tn/g" "$id/lib/systemd/system/bbot-agent@.service" + sudo ln -sf "$id/lib/systemd/system/bbot-agent@.service" \ + "/usr/lib/systemd/system/bbot-agent@$tn.service" + + # Start the service. + # + if ! sudo systemctl start "bbot-agent@$tn"; then + info "failed to start bbot-agent@$tn service" + break + fi + + r=0 + break + done + cd "$owd" + + return "$r" +} + +# Array of bootstrapped toolchains. +# +# The idea is to collect them until we bootstrap all of them and only then +# start their bbot agents. +# +toolchain_boots=() + # Monitoring loop. # +count=0 while true; do + count=$(($count + 1)) + # Check for toolchain changes. If this is the first run, bootstrap them. # - for tn in "${!toolchains[@]}"; do + for tn in "${toolchain_names[@]}"; do tp="${toolchains["$tn"]}" tu="$(tc_value "$tp" toolchain_url)" - if [ -z "$tu" ]; then - continue - fi - tr="$(tc_value "$tp" toolchain_root)" tf="$(tc_value "$tp" toolchain_file)" p="$tr/$tf" @@ -531,20 +685,22 @@ EOF # info "bootstrapping $tn toolchain..." - tc_bootstrap "$tn" 2>&1 | tee "$tr/bootstrap.log" 1>&2 + tc_bootstrap "$tn" 2>&1 | tee "$tr/bootstrap-$count.log" 1>&2 if [ "${PIPESTATUS[0]}" -eq 0 ]; then v="$(cat $tr/version)" declare "${tp}toolchain_ver=$v" s="bootstrapped $tn toolchain $v" + toolchain_boots+=("$tn") else s="failed to bootstrap $tn toolchain, waiting for new version" + toolchain_boots+=("") # Skip. fi info "$s" email "$s" <<EOF -$tn.bootstrap_log: tftp://$hname/toolchains/$tn/bootstrap.log +$tn.bootstrap_log: tftp://$hname/toolchain/$tn/bootstrap-$count.log EOF fi else @@ -553,6 +709,55 @@ EOF fi done + # If we have boostrapped all the toolchains, (re)build and (re)start their + # bbot agents. + # + if [ "${#toolchain_names[@]}" -eq "${#toolchain_boots[@]}" ]; then + + for tn in "${toolchain_boots[@]}"; do + + # Skip those that failed to bootstrap. + # + if [ -z "$tn" ]; then + continue + fi + + s= + bb_check "$tn" 2>&1 | tee "$tr/bbot-$count.log" 1>&2 + + case "${PIPESTATUS[0]}" in + 0) + rm -f "$tr/bbot-$count.log" + continue # Nothing to do. + ;; + 1) + s="re" + ;& + 2) + info "${s}starting bbot-agent@$tn..." + + # Append to the same log. + # + bb_start "$tn" 2>&1 | tee -a "$tr/bbot-$count.log" 1>&2 + + if [ "${PIPESTATUS[0]}" -eq 0 ]; then + s="${s}started bbot-agent@$tn" + else + s="failed to ${s}start bbot-agent@$tn, waiting for new version" + fi + ;; + *) + s="failed to fetch package information for $tn, will try again" + ;; + esac + + info "$s" + email "$s" <<EOF +$tn.start_log: tftp://$hname/toolchain/$tn/bbot-$count.log +EOF + done + fi + # Check for OS changes. # if [ -n "$buildid_url" ]; then @@ -574,5 +779,5 @@ EOF fi info "monitoring..." - sleep 10 + sleep 20 done diff --git a/doc/manual.cli b/doc/manual.cli index 9e90297..e3b3bd1 100644 --- a/doc/manual.cli +++ b/doc/manual.cli @@ -66,12 +66,18 @@ the following subdirectories: temporarily write-accessible to build machines by running custom instances of the TFTP server on other ports.| -\li|\n\c{/build/toolchains/}\n +\li|\n\c{/build/toolchain/}\n - Contains \c{build2} toolchain installations after bootstrap. Each toolchain - is installed into a subdirectory names as the toolchain name. See + Contains \c{build2} toolchain installations after bootstrap. Each version + is installed into a subdirectory named as the toolchain name. See \l{#config-toolchains Toolchains} for details.| +\li|\n\c{/build/bbot/}\n + + Contains \c{bbot} installations. Each version is installed into a + subdirectory names as the toolchain name. See \l{#config-toolchains + Toolchains} for details.| + \li|\n\c{/build/machines/}\n Contains virtual machines and containers. See \l{#config-storage-machines @@ -330,10 +336,10 @@ e723[...]c305 *0.4.0/build2-mingw-0.4.0-x86_64-windows.tar.xz \ Based on the checksums file the monitor downloads each file into -\c{/build/tftp/toolchains/<name>/} (the file path is taken as relative to +\c{/build/tftp/toolchain/<name>/} (the file path is taken as relative to \c{toolchain_url}), verifies their checksums, and creates \i{predictable name} symlinks (names without the version). Continuing with the above example, the -contents of \c{/build/tftp/toolchains/default/} would be: +contents of \c{/build/tftp/toolchain/default/} would be: \ build2-toolchain-0.4.0.tar.xz |