#!/bin/bash # Init script for build2 Build OS. # # Loosely based on the one that comes in Debian initrd.img (since we are # using its kernel image). # trap "exit 1" ERR set -o errtrace # Trap in functions. # Note: diagnostics goes to stdout. # function info () { echo "$*"; } function error () { if [ "$#" -gt 0 ]; then info "$*"; fi # The setsid voodoo (taken from Debian init's panic()) is to enable job # control. # info "type Ctrl-D to exit shell and reboot" setsid /bin/bash -c "exec /bin/bash -i <>/dev/tty1 1>&0 2>&1" reboot -f } # Some pre-systemd utilities (like reboot) come from klibc-utils. # export PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/lib/klibc/bin/ # One would expect rootflags=size=Xg to work but it doesn't (perhaps init # is expected to interpret it)? # mount -o remount,size=2G / mkdir -p /sys /proc mount -t sysfs -o nodev,noexec,nosuid sysfs /sys mount -t proc -o nodev,noexec,nosuid proc /proc info "init starting up..." mount -t devtmpfs -o nosuid,mode=0755 udev /dev # Prepare the /dev directory. # ln -s /proc/self/fd /dev/fd ln -s /proc/self/fd/0 /dev/stdin ln -s /proc/self/fd/1 /dev/stdout ln -s /proc/self/fd/2 /dev/stderr mkdir -p /dev/pts mount -t devpts -o noexec,nosuid,gid=5,mode=0620 devpts /dev/pts || true # If tmpfs size is unspecified, then the default is 50%. Note that for /tmp in # particular we may (temporarily) utilize several gigabytes while building the # toolchain. # mkdir -p /run mount -t tmpfs -o nodev,noexec,nosuid,size=512M,mode=0755 tmpfs /run mkdir -p /tmp mount -t tmpfs -o nodev,nosuid,size=2G,mode=1777 tmpfs /tmp # Start udev. # # Based on Debian initrd's init-{top,bottom}/udev # info "starting udev..." if [ -w /sys/kernel/uevent_helper ]; then echo >/sys/kernel/uevent_helper fi SYSTEMD_LOG_LEVEL=info /lib/systemd/systemd-udevd --daemon --resolve-names=never udevadm trigger --type=subsystems --action=add udevadm trigger --type=devices --action=add udevadm settle || true # On 6-series kernels we seem to be executed a lot earlier (or a lot faster) # with many devices (Ethernet, USB storage) not being discovered yet (and # devices that require firmware generally taking a while). So let's wait a # bit for things to settle down. # for s in 5 4 3 2 1; do info "waiting for devices ${s}s..." sleep 1 done # Detect hardware sensors. # sensors-detect --auto # Initialize KVM. # #if ! (/sbin/modprobe kvm_intel || /sbin/modprobe kvm_amd); then # error "no virtualization support available (is it disabled in BIOS?)" #fi # Parse the kernel command line. This is complicated by the fact that the # values can be quoted, for example: # # foo='foo fox' # bar="bar 'box'" # # Or (as rewritten by GRUB): # # 'foo=foo fox' # "bar=bar 'box'" # # First we separete quoted variables and arguments with newlines (giving # priority to assignments). Then we replace whitespaces with newline on # lines that don't contain quotes. Finally, clean up by removing blank # lines. # # Note: the same code as in buildos. # readarray -t cmdline < <(cat /proc/cmdline | \ sed -r -e "s/([^ ]+=)?('[^']*'|\"[^\"]*\")/\n\1\2\n/g" | \ sed -r -e "/['\"]/!s/ /\n/g" | sed -r -e '/^\s*$/d') # Enter all buildos variables as bash variables. # info "command line:" for v in "${cmdline[@]}"; do # Rewrite "x=y" as x="y" (as well as the single-quote variant). # v1="$(sed -n -re "s/^\"([^= ]+)=(.*)\"\$/\1=\"\2\"/p" <<<"$v")" if [ -n "$v1" ]; then v="$v1" else v1="$(sed -n -re "s/^'([^= ]+)=(.*)'\$/\1='\2'/p" <<<"$v")" if [ -n "$v1" ]; then v="$v1" fi fi var="$(sed -n -re 's/^buildos\.([^= ]+)=.*$/\1/p' <<<"$v")" # Extract name. if [ -n "$var" ]; then val="$(sed -re 's/^[^= ]+=(.*)$/\1/' <<<"$v")" # Extract value. val="$(sed -re "s/^('(.*)'|\"(.*)\")\$/\2\3/" <<<"$val")" # Strip quoted. info " $var=$val" # If the variable contains a dot, then it is a toolchain variable and we # don't care about those in init. # if [[ "$var" != *.* ]]; then declare "$var=$val" fi fi done # Figure out network configuration and generate the corresponding # /etc/network/interfaces. # info "starting network..." # We are using udev's predictable interface names. The two character prefixes # based on the type of interface: # # en -- ethernet # sl -- serial line IP (slip) # wl -- wlan # ww -- wwan # eth_all="$(cd /sys/class/net && ls -d en?*)" if [ -z "$eth_all" ]; then info "no ethernet interfaces found among:" ip link show error fi eth= eth_up= for s in 1 2 4 8; do # Try to bring them all up and find the one that has carrier. # for i in $eth_all; do ip link set "$i" up || true done sleep "$s" for i in $eth_all; do if [ "$(cat "/sys/class/net/$i/carrier")" -eq "1" ]; then info "detected carrier on $i" eth_up+=" $i" fi done # Bring them all down. # for i in $eth_all; do ip link set "$i" down || true done # If we didn't find anything, try to wait for carrier longer. # if [ -z "$eth_up" ]; then continue fi # If we end up with several interfaces we simply unleash dhcp on all of # them and use the first that gets configured. # # Note also that it's possible the interface that we want is not yet ready # in which case we will try to wait for carrier a bit longer. # for i in $eth_up; do if dhclient -v "$i"; then eth="$i" break fi done if [ -n "$eth" ]; then break fi done if [ -z "$eth_up" ]; then info "no ethernet interfaces with carrier among:" ip link show error fi if [ -z "$eth" ]; then info "no ethernet interfaces with DHCP among:" ip link show error fi # Global and local MAC addresses (used below for br0 and br1, respectively). # Derive the local address from the global by fixing the first octet to 02 # (locally-assigned). # gmac="$(cat "/sys/class/net/$eth/address")" lmac="$(sed -re 's/..:(.+)/02:\1/g' <<<"$gmac")" info "configured $eth ($gmac)" # Machine id. # mid="$(sed -re 's/://g' <<<"$gmac")" # Set the hostname. # hname="$(hostname)" if [ "$hname" = "(none)" ]; then hname="build-$mid" hostname "$hname" fi echo "$hname" >/etc/hostname info "hostname $hname" # Stop DHCP client without releasing the lease and deconfigure the interface. # The plan is to generate a bridge-based /etc/network/interfaces configuration # based on what we have discovered and then let the systemd networking bringup # to configure everything (at which point we will hopefully reuse the lease). # dhclient -x 2>/dev/null # @@ Needs to be made configurable. Something like 172.23.0.0/16. # priv_network="172.23.0.0" priv_netmask="255.255.0.0" priv_netbase="$(sed -e 's/^\(.*\)\.0\.0$/\1/' <<<"$priv_network")" # Note that if we don't assign the bridge MAC address, then it will keep # changing every time an interface with a greater address (e.g., a tap) # joins the bridge. Needless to say, constantly changing MAC will wreck # all kinds of networking havoc. # cat </etc/network/interfaces auto lo iface lo inet loopback # Public bridge. # auto br0 iface br0 inet dhcp bridge_ports $eth bridge_stp off bridge_maxwait 0 bridge_fd 0 bridge_hw $gmac post-up ip link set $eth txqueuelen 4000 post-up ip link set br0 txqueuelen 4000 # Private bridge with NAT to br0. # auto br1 iface br1 inet static address ${priv_netbase}.0.1 netmask $priv_netmask bridge_ports none bridge_stp off bridge_maxwait 0 bridge_fd 0 bridge_hw $lmac #post-up ip link set br1 address $lmac post-up ip link set br1 txqueuelen 4000 post-up iptables -t nat -A POSTROUTING -o br0 -j MASQUERADE post-up iptables -A FORWARD -i br0 -o br1 -m state --state RELATED,ESTABLISHED -j ACCEPT post-up iptables -A FORWARD -i br1 -o br0 -j ACCEPT EOF cat </etc/dnsmasq.d/br1-dhcp interface=br1 bind-interfaces dhcp-range=${priv_netbase}.1.1,${priv_netbase}.255.255,$priv_netmask,2h EOF # Figure out disk configuration and generate the corresponding /etc/fstab. # fstab=/etc/fstab #fstab=/dev/stdout echo -n '' >$fstab l= state= declare -A machines machines_mode= while read l || [ -n "$l" ]; do d="$(sed -re 's/.*NAME=\"([^\"]+)\".*/\1/' <<<"$l")" t="$(sed -re 's/.*FSTYPE=\"([^\"]*)\".*/\1/' <<<"$l")" l="$(sed -re 's/.*LABEL=\"([^\"]*)\".*/\1/' <<<"$l")" # Strip the buildos prefix from the label. If the result is empty then this # disk/patition hasn't been labeled for use by us. # l="$(sed -n -re 's/^buildos\.([^ ]+)$/\1/p' <<<"$l")" if [ -z "$l" ]; then continue fi # Handle buildos.state. # if [ "$l" == "state" ]; then if [ -n "$state" ]; then error "multiple disks labeled with buildos.state" fi if [ -z "$t" ]; then error "no filesystem on $d labeled with buildos.state" fi info "mounting $d (buildos.state) on /state as $t" # Check it. # if ! fsck -n -t "$t" "$d"; then info "$d (buildos.state) has errors; run fsck -t $type $d" error fi o="defaults,noatime,nodiratime" echo "$d /state $t $o 0 0" >>$fstab # Mount it now since we need it below. # mkdir -p "/state" mount -t "$t" -o "$o" "$d" /state state="true" continue fi # Handle buildos.machines and buildos.machines.* mounts. # if [[ "$l" == "machines" ]] || [[ "$l" =~ "machines.".+ ]]; then if [ "$t" != "btrfs" ]; then error "non-btrfs filesystem on $d labeled with buildos.machines" fi if [ "$l" = "machines" ]; then # Single mount. # if [ "$machines_mode" = "multiple" ]; then error "multiple disks labeled with buildos.machines/machines.*" fi m=/build/machines/default machines["$m"]="${machines["$m"]} $d" machines_mode="single" else # Multiple mounts. # if [ "$machines_mode" = "single" ]; then error "multiple disks labeled with buildos.machines/machines.*" fi n="$(sed -n -re 's/^machines\.([^ ]+)$/\1/p' <<<"$l")" m="/build/machines/$n" machines["$m"]="${machines["$m"]} $d" machines_mode="multiple" fi info "will be mounting $d (buildos.$l) on $m" continue fi done < <(lsblk --pairs --paths --output NAME,FSTYPE,LABEL) #done <>$fstab # Mount it and change the owner of the filesystem root. # mkdir -p "$m" mount -t btrfs -o "$o" "$fd" "$m" chown build:build "$m" done # Create /build/tftp. We make it a size-limited tmpfs since potentially- # compromized VMs will be able to upload to. # mkdir -p /build/tftp o="nodev,noexec,nosuid,size=512M" o+=",mode=0755,uid=$(id -u build),gid=$(id -g build)" echo "tmpfs /build/tftp tmpfs $o 0 0" >>$fstab # Generate a host key. Used, for example, by the bbot agent to authenticate # with a controller. # mkdir -p /state/etc if [ ! -e /state/etc/host-key.pem ]; then openssl genrsa 4096 >/state/etc/host-key.pem fi # Configure Postfix. # cat <<<"$hname" >/etc/mailname sed -r -i \ -e "s%^(myhostname).*%\1 = $hname%" \ -e 's%^(mydestination).*%\1 = $myhostname, localhost.localdomain, localhost%' \ -e 's%^(mynetworks).*%\1 = 127.0.0.0/8 [::ffff:127.0.0.0]/104 [::1]/128%' \ -e "s%^(relayhost).*%\1 = $smtp_relay%" \ /etc/postfix/main.cf # Make admin alias for buildos.admin_email, alias root as admin. # cat <>/etc/aliases admin: $admin_email root: admin EOF newaliases # Configure OpenSSH server. Things that we do: # # - Change host key locations to (persistent) /state/etc/ssh/ and remove # existing keys. If no corresponding key exists in /state, generate it. # # - Disable root login. # # - Disable password authentication. # sed -r -i \ -e "s%^#?HostKey +(.+)%HostKey /state\1%" \ -e "s%^#?PermitRootLogin.*%PermitRootLogin no%" \ -e "s%^#?PasswordAuthentication.*%PasswordAuthentication no%" \ /etc/ssh/sshd_config # Generate missing keys. # mkdir -p /state/etc/ssh for k in $(echo /etc/ssh/ssh_host_*_key | \ sed -re 's%/etc/ssh/ssh_host_([^_]+)_key%\1%g'); do if [ ! -e "/state/etc/ssh/ssh_host_${k}_key" ]; then ssh-keygen -N "" -t "$k" -f "/state/etc/ssh/ssh_host_${k}_key" fi done rm -f /etc/ssh/ssh_host_*_key* # Add buildos.ssh_key to build's authorized_keys. # if [ -n "$ssh_key" ]; then info "adding buildos.ssh_key to ~build/.ssh/authorized_keys" mkdir -p /build/.ssh echo "$ssh_key" >>/build/.ssh/authorized_keys chown build:build /build/.ssh /build/.ssh/authorized_keys chmod 700 /build/.ssh chmod 600 /build/.ssh/authorized_keys fi # Configure the TFTP server (tftpd-hpa). # # We could have ran it as user 'build' but since it is read-only, the default # user 'tftp' works just as well. Note that by default it is running chroot'ed # (--secure) so no symlinks pointing outside /build/tftp will work. # sed -r -i \ -e "s%^(TFTP_DIRECTORY).*%\1=\"/build/tftp\"%" \ /etc/default/tftpd-hpa # Hand off to systemd. But first arrange to keep console output (which # becomes tty1). # mkdir -p /etc/systemd/system/getty@tty1.service.d cat </etc/systemd/system/getty@tty1.service.d/noclear.conf [Service] TTYVTDisallocate=no EOF # Stop udev daemon (systemd will start its own). # udevadm control --exit # Get rid of klibc tools. # export PATH=/sbin:/usr/sbin:/bin:/usr/bin exec /lib/systemd/systemd \ --show-status=1 \ --machine-id="00000000000000000000$mid" \ /dev/console 2>&1