diff options
40 files changed, 3139 insertions, 438 deletions
@@ -99,7 +99,7 @@ $ bpkg create \ $ bpkg add https://pkg.cppget.org/1/alpha $ bpkg fetch -$ bpkg build brep ?sys:libapr1 ?sys:libapreq2 ?sys:libpq +$ bpkg build brep ?sys:libapr1/* ?sys:libapreq2/* ?sys:libpq/* $ bpkg install brep $ cd .. # Back to brep home. @@ -218,7 +218,21 @@ $ cp install/share/brep/etc/brep-module.conf config/ $ edit config/brep-module.conf # Adjust default values if required. To enable the build2 build bot controller functionality you will need to set -the build-config option in brep-module.conf. +the build-config option in brep-module.conf. To also enable the build +artifacts upload functionality you will need to specify the upload-data +directory for the desired upload types in brep-module.conf. For example, for +generated binary distribution packages it can be as follows: + +upload-data bindist=/home/brep/bindist-data + +Note that this directory must exist and have read, write, and execute +permissions granted to the www-data user. This, for example, can be achieved +with the following commands: + +$ mkdir /home/brep/bindist-data +$ setfacl -m g:www-data:rwx /home/brep/bindist-data + +For sample upload handler implementations see brep/handler/upload/. To enable the package submission functionality you will need to specify the submit-data and submit-temp directories in brep-module.conf. Note that these @@ -330,8 +344,12 @@ user/group. Otherwise, a cron job is a natural choice. Note that the builds cleaner execution is optional and is only required if the build2 build bot functionality is enabled (see the build bot documentation for -details). If it is disabled in you setup, then skip the cleaner-related -parts in the subsequent subsections. +details). If it is disabled in you setup, then skip the cleaner-related parts +in the subsequent subsections. + +If the build artifacts upload functionality is enabled in addition to the +build2 build bot functionality you most likely will want to additionally setup +the cleanup of the outdated build artifacts. If the CI request functionality is enabled you most likely will want to additionally setup the tenants cleanup. @@ -346,8 +364,9 @@ infrastructure. 8.a Setup Periodic Loader, Cleaner, and Monitor Execution with cron The following crontab entries will execute the loader every five minutes, the -tenants and builds cleaners once a day at midnight, and the monitor every hour -(all shifted by a few minutes in order not to clash with other jobs): +tenants, builds, and binary distribution cleaners once a day at midnight, and +the monitor every hour (all shifted by a few minutes in order not to clash +with other jobs): $ crontab -l MAILTO=<brep-admin-email> @@ -355,7 +374,8 @@ PATH=/usr/local/bin:/bin:/usr/bin */5 * * * * $HOME/install/bin/brep-load $HOME/config/loadtab 1 0 * * * $HOME/install/bin/brep-clean tenants 240 2 0 * * * $HOME/install/bin/brep-clean builds $HOME/config/buildtab -3 * * * * $HOME/install/bin/brep-monitor --report-timeout 86400 --clean $HOME/config/brep-module.conf public +3 0 * * * $HOME/install/bin/brep-upload-bindist-clean /var/bindist 2880 +4 * * * * $HOME/install/bin/brep-monitor --report-timeout 86400 --clean $HOME/config/brep-module.conf public ^D Note that here we assume that bpkg (which is executed by brep-load) is in one diff --git a/brep/handler/buildfile b/brep/handler/buildfile index b76b465..cd11231 100644 --- a/brep/handler/buildfile +++ b/brep/handler/buildfile @@ -5,6 +5,6 @@ import mods = libbutl.bash%bash{manifest-parser} import mods += libbutl.bash%bash{manifest-serializer} import mods += bpkg-util%bash{package-archive} -./: bash{handler} submit/ ci/ +./: bash{handler} submit/ ci/ upload/ bash{handler}: in{handler} $mods diff --git a/brep/handler/submit/submit-pub.in b/brep/handler/submit/submit-pub.in index f4a3c2d..42d478d 100644 --- a/brep/handler/submit/submit-pub.in +++ b/brep/handler/submit/submit-pub.in @@ -12,7 +12,7 @@ # # Specifically, the handler performs the following steps: # -# - Lock the repository directory for the duraton of the package submission. +# - Lock the repository directory for the duration of the package submission. # # - Check for the package duplicate. # diff --git a/brep/handler/upload/.gitignore b/brep/handler/upload/.gitignore new file mode 100644 index 0000000..da4dc5a --- /dev/null +++ b/brep/handler/upload/.gitignore @@ -0,0 +1,2 @@ +brep-upload-bindist +brep-upload-bindist-clean diff --git a/brep/handler/upload/buildfile b/brep/handler/upload/buildfile new file mode 100644 index 0000000..ca52ddd --- /dev/null +++ b/brep/handler/upload/buildfile @@ -0,0 +1,13 @@ +# file : brep/handler/upload/buildfile +# license : MIT; see accompanying LICENSE file + +./: exe{brep-upload-bindist} exe{brep-upload-bindist-clean} + +include ../ + +exe{brep-upload-bindist}: in{upload-bindist} bash{upload} ../bash{handler} + +[rule_hint=bash] \ +exe{brep-upload-bindist-clean}: in{upload-bindist-clean} + +bash{upload}: in{upload} ../bash{handler} diff --git a/brep/handler/upload/upload-bindist-clean.in b/brep/handler/upload/upload-bindist-clean.in new file mode 100644 index 0000000..20c2b00 --- /dev/null +++ b/brep/handler/upload/upload-bindist-clean.in @@ -0,0 +1,212 @@ +#!/usr/bin/env bash + +# file : brep/handler/upload/upload-bindist-clean.in +# license : MIT; see accompanying LICENSE file + +# Remove expired package configuration directories created by the +# upload-bindist handler. +# +# Specifically, perform the following steps: +# +# - Recursively scan the specified root directory and collect the package +# configuration directories with age older than the specified timeout (in +# minutes). Recognize the package configuration directories by matching the +# *-????-??-??T??:??:??Z* pattern and calculate their age based on the +# modification time of the packages.sha256 file they may contain. If +# packages.sha256 doesn't exist in the configuration directory, then +# consider it as still being prepared and skip. +# +# - Iterate over the expired package configuration directories and for each of +# them: +# +# - Lock the root directory. +# +# - Re-check the expiration criteria. +# +# - Remove the package configuration symlink if it refers to this directory. +# +# - Remove this directory. +# +# - Remove all the the parent directories of this directory which become +# empty, up to (but excluding) the root directory. +# +# - Unlock the root directory. +# +usage="usage: $0 <root> <timeout>" + +# Diagnostics. +# +verbose= #true + +# The root directory lock timeout (in seconds). +# +lock_timeout=60 + +trap "{ exit 1; }" ERR +set -o errtrace # Trap in functions and subshells. +set -o pipefail # Fail if any pipeline command fails. +shopt -s lastpipe # Execute last pipeline command in the current shell. +shopt -s nullglob # Expand no-match globs to nothing rather than themselves. + +function info () { echo "$*" 1>&2; } +function error () { info "$*"; exit 1; } +function trace () { if [ "$verbose" ]; then info "$*"; fi } + +# Trace a command line, quoting empty arguments as well as those that contain +# spaces. +# +function trace_cmd () # <cmd> <arg>... +{ + if [[ "$verbose" ]]; then + local s="+" + while [ $# -gt 0 ]; do + if [ -z "$1" -o -z "${1##* *}" ]; then + s="$s '$1'" + else + s="$s $1" + fi + + shift + done + + info "$s" + fi +} + +# Trace and run a command. +# +function run () # <cmd> <arg>... +{ + trace_cmd "$@" + "$@" +} + +if [[ "$#" -ne 2 ]]; then + error "$usage" +fi + +# Package configurations root directory. +# +root_dir="${1%/}" +shift + +if [[ -z "$root_dir" ]]; then + error "$usage" +fi + +if [[ ! -d "$root_dir" ]]; then + error "'$root_dir' does not exist or is not a directory" +fi + +# Package configuration directories timeout. +# +timeout="$1" +shift + +if [[ ! "$timeout" =~ ^[0-9]+$ ]]; then + error "$usage" +fi + +# Note that while the '%s' date format is not POSIX, it is supported on both +# Linux and FreeBSD. +# +expiration=$(($(date -u +"%s") - $timeout * 60)) + +# Collect the list of expired package configuration directories. +# +expired_dirs=() + +run find "$root_dir" -type d -name "*-????-??-??T??:??:??Z*" | while read d; do + f="$d/packages.sha256" + + # Note that while the -r date option is not POSIX, it is supported on both + # Linux and FreeBSD. + # + trace_cmd date -u -r "$f" +"%s" + if t="$(date -u -r "$f" +"%s" 2>/dev/null)" && (($t <= $expiration)); then + expired_dirs+=("$d") + fi +done + +if [[ "${#expired_dirs[@]}" -eq 0 ]]; then + exit 0 # Nothing to do. +fi + +# Make sure the root directory lock file exists. +# +lock="$root_dir/upload.lock" +run touch "$lock" + +# Remove the expired package configuration directories, symlinks which refer +# to them, and the parent directories which become empty. +# +for d in "${expired_dirs[@]}"; do + # Deduce the path of the potential package configuration symlink that may + # refer to this package configuration directory by stripping the + # -<timestamp>[-<number>] suffix. + # + l="$(sed -n -re 's/^(.+)-[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z(-[0-9]+)?$/\1/p' <<<"$d")" + if [[ -z "$l" ]]; then + error "invalid name '$d' for package configuration directory" + fi + + f="$d/packages.sha256" + + # Open the reading file descriptor and lock the root directory. Fail if + # unable to lock before timeout. + # + trace "+ exec {lfd}<$lock" + exec {lfd}<"$lock" + + if ! run flock -w "$lock_timeout" "$lfd"; then + error "unable to lock root directory" + fi + + # Now, as the lock is acquired, recheck the package configuration directory + # expiration criteria (see above) and, if it still holds, remove this + # directory, the package configuration symlink if it refers to it, and all + # the parent directories which become empty up to (but excluding) the root + # directory. + # + trace_cmd date -u -r "$f" +"%s" + if t="$(date -u -r "$f" +"%s" 2>/dev/null)" && (($t <= $expiration)); then + # Remove the package configuration symlink. + # + # Do this first to avoid dangling symlinks which may potentially be + # exposed by brep. + # + # Note that while the realpath utility is not POSIX, it is present on + # both Linux and FreeBSD. + # + if [[ -L "$l" ]]; then + p="$(realpath "$l")" + if [[ "$p" == "$d" ]]; then + run rm "$l" + fi + fi + + # Remove the package configuration directory. + # + run rm -r "$d" + + # Remove the empty parent directories. + # + # Note that we iterate until the rmdir command fails, presumably because a + # directory is not empty. + # + d="$(dirname "$d")" + while [[ "$d" != "$root_dir" ]]; do + trace_cmd rmdir "$d" + if rmdir "$d" 2>/dev/null; then + d="$(dirname "$d")" + else + break + fi + done + fi + + # Close the file descriptor and unlock the root directory. + # + trace "+ exec {lfd}<&-" + exec {lfd}<&- +done diff --git a/brep/handler/upload/upload-bindist.in b/brep/handler/upload/upload-bindist.in new file mode 100644 index 0000000..ba05bc3 --- /dev/null +++ b/brep/handler/upload/upload-bindist.in @@ -0,0 +1,531 @@ +#!/usr/bin/env bash + +# file : brep/handler/upload/upload-bindist.in +# license : MIT; see accompanying LICENSE file + +# Binary distribution packages upload handler which places the uploaded +# packages under the following filesystem hierarchy: +# +# <root>/[<tenant>/]<instance>/<os-release-name-id><os-release-version-id>/<project>/<package>/<version>/<package-config> +# +# The overall idea behind this handler is to create a uniquely named package +# configuration directory for each upload and maintain the package +# configuration symlink at the above path to refer to the directory of the +# latest upload. +# +# The root directory is passed as an argument (via upload-handler-argument). +# All the remaining directory components are retrieved from the respective +# manifest values of request.manifest created by brep and +# bindist-result.manifest contained in the uploaded archive. +# +# Note that the leaf component of the package configuration symlink path is +# sanitized, having the "bindist", <instance>, <os-release-name-id>, and +# <os-release-name-id><os-release-version-id> dash-separated sub-components +# removed. If the component becomes empty as a result of the sanitization, +# then "default" is assumed. For example, the following symlink paths: +# +# .../archive/windows10/foo/libfoo/1.0.0/bindist-archive-windows10-release +# .../archive/windows10/foo/libfoo/1.0.0/bindist-archive-windows10 +# +# are reduced to: +# +# .../archive/windows10/foo/libfoo/1.0.0/release +# .../archive/windows10/foo/libfoo/1.0.0/default +# +# To achieve this the handler performs the following steps (<dir> is passed as +# last argument by brep and is a subdirectory of upload-data): +# +# - Parse <dir>/request.manifest to retrieve the upload archive path, +# timestamp, and the values which are required to compose the package +# configuration symlink path. +# +# - Extract files from the upload archive. +# +# - Parse <dir>/<instance>/bindist-result.manifest to retrieve the values +# required to compose the package configuration symlink path and the package +# file paths. +# +# - Compose the package configuration symlink path. +# +# - Compose the package configuration directory path by appending the +# -<timestamp>[-<number>] suffix to the package configuration symlink path. +# +# - Create the package configuration directory. +# +# - Copy the uploaded package files into the package configuration directory. +# +# - Generate the packages.sha256 file in the package configuration directory, +# which lists the SHA256 checksums of the files contained in this directory. +# +# - Switch the package configuration symlink to refer to the newly created +# package configuration directory. +# +# - If the --keep-previous option is not specified, then remove the previous +# target of the package configuration symlink, if exists. +# +# Notes: +# +# - There could be a race both with upload-bindist-clean and other +# upload-bindist instances while creating the package version/configuration +# directories, querying the package configuration symlink target, switching +# the symlink, and removing the symlink's previous target. To avoid it, the +# root directory needs to be locked for the duration of these operations. +# This, however, needs to be done granularly to perform the time consuming +# operations (files copying, etc) while not holding the lock. +# +# - The brep module doesn't acquire the root directory lock. Thus, the package +# configuration symlink during its lifetime should always refer to a +# valid/complete package configuration directory. +# +# - Filesystem entries that exist or are created in the data directory: +# +# <archive> saved by brep +# request.manifest created by brep +# <instance>/* extracted by the handler (bindist-result.manifest, etc) +# result.manifest saved by brep +# +# Options: +# +# --keep-previous +# +# Don't remove the previous target of the package configuration symlink. +# +usage="usage: $0 [<options>] <root> <dir>" + +# Diagnostics. +# +verbose= #true + +# The root directory lock timeout (in seconds). +# +lock_timeout=60 + +# If the package configuration directory already exists (may happen due to the +# low timestamp resolution), then re-try creating the configuration directory +# by adding the -<number> suffix and incrementing it until the creation +# succeeds or the retries limit is reached. +# +create_dir_retries=99 + +trap "{ exit 1; }" ERR +set -o errtrace # Trap in functions and subshells. +set -o pipefail # Fail if any pipeline command fails. +shopt -s lastpipe # Execute last pipeline command in the current shell. +shopt -s nullglob # Expand no-match globs to nothing rather than themselves. + +@import brep/handler/handler@ +@import brep/handler/upload/upload@ + +# Parse the command line options. +# +keep_previous= + +while [[ "$#" -gt 0 ]]; do + case $1 in + --keep-previous) + shift + keep_previous=true + ;; + *) + break + ;; + esac +done + +if [[ "$#" -ne 2 ]]; then + error "$usage" +fi + +# Destination root directory. +# +root_dir="${1%/}" +shift + +if [[ -z "$root_dir" ]]; then + error "$usage" +fi + +if [[ ! -d "$root_dir" ]]; then + error "'$root_dir' does not exist or is not a directory" +fi + +# Upload data directory. +# +data_dir="${1%/}" +shift + +if [[ -z "$data_dir" ]]; then + error "$usage" +fi + +if [[ ! -d "$data_dir" ]]; then + error "'$data_dir' does not exist or is not a directory" +fi + +reference="$(basename "$data_dir")" # Upload request reference. + +# Parse the upload request manifest. +# +manifest_parser_start "$data_dir/request.manifest" + +archive= +instance= +timestamp= +name= +version= +project= +package_config= +tenant= + +while IFS=: read -ru "$manifest_parser_ofd" -d '' n v; do + case "$n" in + archive) archive="$v" ;; + instance) instance="$v" ;; + timestamp) timestamp="$v" ;; + name) name="$v" ;; + version) version="$v" ;; + project) project="$v" ;; + package-config) package_config="$v" ;; + tenant) tenant="$v" ;; + esac +done + +manifest_parser_finish + +if [[ -z "$archive" ]]; then + error "archive manifest value expected" +fi + +if [[ -z "$instance" ]]; then + error "instance manifest value expected" +fi + +if [[ -z "$timestamp" ]]; then + error "timestamp manifest value expected" +fi + +if [[ -z "$name" ]]; then + error "name manifest value expected" +fi + +if [[ -z "$version" ]]; then + error "version manifest value expected" +fi + +if [[ -z "$project" ]]; then + error "project manifest value expected" +fi + +if [[ -z "$package_config" ]]; then + error "package-config manifest value expected" +fi + +# Let's disallow dots in the package-config manifest value since the latter +# serves as the package configuration symlink name and the dot can be +# misinterpreted by brep as an extension separator, which the implementation +# relies upon. +# +if [[ "$package_config" == *"."* ]]; then + exit_with_manifest 400 "package-config manifest value may not contain dot" +fi + +# Unpack the archive. +# +run tar -xf "$data_dir/$archive" -C "$data_dir" + +# Parse the bindist result manifest list. +# +f="$data_dir/$instance/bindist-result.manifest" + +if [[ ! -f "$f" ]]; then + exit_with_manifest 400 "$instance/bindist-result.manifest not found" +fi + +manifest_parser_start "$f" + +# Parse the distribution manifest. +# +# Note that we need to skip the first manifest version value and parse until +# the next one is encountered, which introduces the first package file +# manifest. +# +os_release_name_id= +os_release_version_id= + +first=true +more= +while IFS=: read -ru "$manifest_parser_ofd" -d '' n v; do + case "$n" in + "") if [[ "$first" ]]; then # Start of the first (distribution) manifest? + first= + else # Start of the second (package file) manifest. + more=true + break + fi + ;; + + os-release-name-id) os_release_name_id="$v" ;; + os-release-version-id) os_release_version_id="$v" ;; + esac +done + +if [[ -z "$os_release_name_id" ]]; then + exit_with_manifest 400 "os-release-name-id bindist result manifest value expected" +fi + +if [[ -z "$os_release_version_id" ]]; then + exit_with_manifest 400 "os-release-version-id bindist result manifest value expected" +fi + +if [[ ! "$more" ]]; then + exit_with_manifest 400 "no package file manifests in bindist result manifest list" +fi + +# Parse the package file manifest list and cache the file paths. +# +# Note that while we currently only need the package file paths, we can make +# use of their types and system names in the future. Thus, let's verify that +# all the required package file values are present and, while at it, cache +# them all in the parallel arrays. +# +package_file_paths=() +package_file_types=() +package_file_system_names=() + +# The outer loop iterates over package file manifests while the inner loop +# iterates over manifest values in each such manifest. +# +while [[ "$more" ]]; do + more= + type= + path= + system_name= + + while IFS=: read -ru "$manifest_parser_ofd" -d '' n v; do + case "$n" in + "") # Start of the next package file manifest. + more=true + break + ;; + + package-file-path) path="$v" ;; + package-file-type) type="$v" ;; + package-file-system-name) system_name="$v" ;; + esac + done + + if [[ -z "$path" ]]; then + exit_with_manifest 400 "package-file-path bindist result manifest value expected" + fi + + if [[ -z "$type" ]]; then + exit_with_manifest 400 "package-file-type bindist result manifest value expected" + fi + + package_file_paths+=("$path") + package_file_types+=("$type") + package_file_system_names+=("$system_name") # Note: system name can be empty. +done + +manifest_parser_finish + +# Sanitize the package configuration name. +# +config= +for c in $(sed 's/-/ /g' <<<"$package_config"); do + if [[ "$c" != "bindist" && + "$c" != "$instance" && + "$c" != "$os_release_name_id" && + "$c" != "$os_release_name_id$os_release_version_id" ]]; then + if [[ -z "$config" ]]; then + config="$c" + else + config="$config-$c" + fi + fi +done + +if [[ -z "$config" ]]; then + config="default" +fi + +# Compose the package configuration symlink path. +# +config_link="$root_dir" + +if [[ -n "$tenant" ]]; then + config_link="$config_link/$tenant" +fi + +config_link="$config_link/$instance/$os_release_name_id$os_release_version_id" +config_link="$config_link/$project/$name/$version/$config" + +# Compose the package configuration directory path. +# +config_dir="$config_link-$timestamp" + +# Create the package configuration directory. +# +# Note that it is highly unlikely that multiple uploads for the same package +# configuration/distribution occur at the same time (with the seconds +# resolution) making the directory name not unique. If that still happens, +# lets retry for some reasonable number of times to create the directory, +# while adding the -<number> suffix to its path on each iteration. If +# that also fails, then we assume that there is some issue with the handler +# setup and fail, printing the cached mkdir diagnostics to stderr. +# +# Note that we need to prevent removing of the potentially empty package +# version directory by the upload-bindist-clean script before we create +# configuration directory. To achieve that, we lock the root directory for the +# duration of the package version/configuration directories creation. +# +# Open the reading file descriptor and lock the root directory. Fail if +# unable to lock before timeout. +# +lock="$root_dir/upload.lock" +run touch "$lock" +trace "+ exec {lfd}<$lock" +exec {lfd}<"$lock" + +if ! run flock -w "$lock_timeout" "$lfd"; then + exit_with_manifest 503 "upload service is busy" +fi + +# Create parent (doesn't fail if directory exists). +# +run mkdir -p "$(dirname "$config_dir")" + +created= + +trace_cmd mkdir "$config_dir" +if ! e="$(mkdir "$config_dir" 2>&1)"; then # Note: fails if directory exists. + for ((i=0; i != $create_dir_retries; ++i)); do + d="$config_dir-$i" + trace_cmd mkdir "$d" + if e="$(mkdir "$d" 2>&1)"; then + config_dir="$d" + created=true + break + fi + done +else + created=true +fi + +# Close the file descriptor and unlock the root directory. +# +trace "+ exec {lfd}<&-" +exec {lfd}<&- + +if [[ ! "$created" ]]; then + echo "$e" 1>&2 + error "unable to create package configuration directory" +fi + +# On exit, remove the newly created package configuration directory, unless +# its removal is canceled (for example, the symlink is switched to refer to +# it). Also remove the new symlink, if already created. +# +# Make sure we don't fail if the entries are already removed, for example, by +# the upload-bindist-clean script. +# +config_link_new= +function exit_trap () +{ + if [[ -n "$config_dir" && -d "$config_dir" ]]; then + if [[ -n "$config_link_new" && -L "$config_link_new" ]]; then + run rm -f "$config_link_new" + fi + run rm -rf "$config_dir" + fi +} + +trap exit_trap EXIT + +# Copy all the extracted package files to the package configuration directory. +# +for ((i=0; i != "${#package_file_paths[@]}"; ++i)); do + run cp "$data_dir/$instance/${package_file_paths[$i]}" "$config_dir" +done + +# Generate the packages.sha256 file. +# +# Note that since we don't hold the root directory lock at this time, we +# temporary "hide" the resulting file from the upload-bindist-clean script +# (which uses it for the upload age calculation) by adding the leading dot to +# its name. Not doing so we may potentially end up with upload-bindist-clean +# removing the half-cooked directory and so breaking the upload handling. +# +trace "+ (cd $config_dir && exec sha256sum -b ${package_file_paths[@]} >.packages.sha256)" +(cd "$config_dir" && exec sha256sum -b "${package_file_paths[@]}" >".packages.sha256") + +# Create the new package configuration symlink. +# +config_link_new="$config_dir.symlink" +run ln -s $(basename "$config_dir") "$config_link_new" + +# Switch the package configuration symlink atomically. But first, cache the +# previous package configuration symlink target if the --keep-previous option +# is not specified and "unhide" the packages.sha256 file. +# +# Note that to avoid a race with upload-bindist-clean and other upload-bindist +# instances, we need to perform all the mentioned operations as well as +# removing the previous package configuration directory while holding the root +# directory lock. + +# Lock the root directory. +# +trace "+ exec {lfd}<$lock" +exec {lfd}<"$lock" + +if ! run flock -w "$lock_timeout" "$lfd"; then + exit_with_manifest 503 "upload service is busy" +fi + +# Note that while the realpath utility is not POSIX, it is present on both +# Linux and FreeBSD. +# +config_dir_prev= +if [[ ! "$keep_previous" && -L "$config_link" ]]; then + config_dir_prev="$(realpath "$config_link")" +fi + +# "Unhide" the packages.sha256 file. +# +run mv "$config_dir/.packages.sha256" "$config_dir/packages.sha256" + +# Note that since brep doesn't acquire the root directory lock, we need to +# switch the symlink as the final step, when the package directory is fully +# prepared and can be exposed. +# +# @@ Also note that the -T option is Linux-specific. To add support for +# FreeBSD we need to use -h option there (but maybe -T also works, +# who knows). +# +run mv -T "$config_link_new" "$config_link" + +# Now, when the package configuration symlink is switched, disable removal of +# the newly created package configuration directory. +# +# Note that we still can respond with an error status. However, the remaining +# operations are all cleanups and thus unlikely to fail. +# +config_dir= + +# Remove the previous package configuration directory, if requested. +# +if [[ -n "$config_dir_prev" ]]; then + run rm -r "$config_dir_prev" +fi + +# Unlock the root directory. +# +trace "+ exec {lfd}<&-" +exec {lfd}<&- + +# Remove the no longer needed upload data directory. +# +run rm -r "$data_dir" + +trace "binary distribution packages are published" +exit_with_manifest 200 "binary distribution packages are published" diff --git a/brep/handler/upload/upload.bash.in b/brep/handler/upload/upload.bash.in new file mode 100644 index 0000000..9acead9 --- /dev/null +++ b/brep/handler/upload/upload.bash.in @@ -0,0 +1,40 @@ +# file : brep/handler/upload/upload.bash.in +# license : MIT; see accompanying LICENSE file + +# Utility functions useful for implementing upload handlers. + +if [ "$brep_handler_upload" ]; then + return 0 +else + brep_handler_upload=true +fi + +@import brep/handler/handler@ + +# Serialize the upload result manifest to stdout and exit the (sub-)shell with +# the zero status. +# +reference= # Should be assigned later by the handler, when becomes available. + +function exit_with_manifest () # <status> <message> +{ + trace_func "$@" + + local sts="$1" + local msg="$2" + + manifest_serializer_start + + manifest_serialize "" "1" # Start of manifest. + manifest_serialize "status" "$sts" + manifest_serialize "message" "$msg" + + if [ -n "$reference" ]; then + manifest_serialize "reference" "$reference" + elif [ "$sts" == "200" ]; then + error "no reference for code $sts" + fi + + manifest_serializer_finish + run exit 0 +} @@ -62,6 +62,7 @@ function compile () --man-epilogue-file man-epilogue.1 \ --link-regex '%bpkg(#.+)?%$1%' \ --link-regex '%brep(#.+)?%$1%' \ +--link-regex '%bbot(#.+)?%$1%' \ ../$n.cli } @@ -100,6 +101,7 @@ cli -I .. \ --link-regex '%b([-.].+)%../../build2/doc/b$1%' \ --link-regex '%bpkg([-.].+)%../../bpkg/doc/bpkg$1%' \ --link-regex '%bpkg(#.+)?%../../bpkg/doc/build2-package-manager-manual.xhtml$1%' \ +--link-regex '%bbot(#.+)?%../../bbot/doc/build2-build-bot-manual.xhtml$1%' \ --output-prefix build2-repository-interface- \ manual.cli diff --git a/doc/manual.cli b/doc/manual.cli index 9529dac..6dab404 100644 --- a/doc/manual.cli +++ b/doc/manual.cli @@ -121,7 +121,6 @@ reference: <abbrev-checksum> | - \li|Send the submission email. If \c{submit-email} is configured, send an email to this address containing @@ -400,4 +399,182 @@ message: <string> [reference]: <string> \ + +\h1#upload|Build Artifacts Upload| + +The build artifacts upload functionality allows uploading archives of files +generated as a byproduct of the package builds. Such archives as well as +additional, repository-specific information can optionally be uploaded by the +automated build bots via the HTTP \c{POST} method using the +\c{multipart/form-data} content type (see the \l{bbot \c{bbot} documentation} +for details). The implementation in \c{brep} only handles uploading as well as +basic actions and verification (build session resolution, agent +authentication, checksum verification) expecting the rest of the upload logic +to be handled by a separate entity according to the repository policy. Such an +entity can be notified by \c{brep} about a new upload as an invocation of the +\i{handler program} (as part of the HTTP request) and/or via email. It could +also be a separate process that monitors the upload data directory. + +For each upload request \c{brep} performs the following steps. + +\ol| + +\li|Determine upload type. + +The upload type must be passed via the \c{upload} parameter in the query +component of the request URL.| + +\li|Verify upload size limit. + +The upload form-data payload size must not exceed \c{upload-max-size} specific +for this upload type.| + +\li|Verify the required \c{session}, \c{instance}, \c{archive}, and +\c{sha256sum} parameters are present. If \c{brep} is configured to perform +agent authentication, then verify that the \c{challenge} parameter is also +present. See the \l{bbot#arch-result-req Result Request Manifest} for +semantics of the \c{session} and \c{challenge} parameters. + +The \c{archive} parameter must be the build artifacts archive upload while +\c{sha256sum} must be its 64 characters SHA256 checksum calculated in the +binary mode.| + +\li|Verify other parameters are valid manifest name/value pairs. + +The value can only contain UTF-8 encoded Unicode graphic characters as well as +tab (\c{\\t}), carriage return (\c{\\r}), and line feed (\c{\\n}).| + +\li|Resolve the session. + +Resolve the \c{session} parameter value to the actual package build +information.| + +\li| Authenticate the build bot agent. + +Use the \c{challenge} parameter value and the resolved package build +information to authenticate the agent, if configured to do so.| + +\li|Generate upload request id and create request directory. + +For each upload request a unique id (UUID) is generated and a request +subdirectory is created in the \c{upload-data} directory with this id as its +name.| + +\li|Save the upload archive into the request directory and verify its +checksum. + +The archive is saved using the submitted name, and its checksum is calculated +and compared to the submitted checksum.| + +\li|Save the upload request manifest into the request directory. + +The upload request manifest is saved as \c{request.manifest} into the request +subdirectory next to the archive.| + +\li|Invoke the upload handler program. + +If \c{upload-handler} is configured, invoke the handler program passing to it +additional arguments specified with \c{upload-handler-argument} (if any) +followed by the absolute path to the upload request directory. + +The handler program is expected to write the upload result manifest to +\c{stdout} and terminate with the zero exit status. A non-zero exit status is +treated as an internal error. The handler program's \c{stderr} is logged. + +Note that the handler program should report temporary server errors (service +overload, network connectivity loss, etc.) via the upload result manifest +status values in the [500-599] range (HTTP server error) rather than via a +non-zero exit status. + +The handler program assumes ownership of the upload request directory and can +move/remove it. If after the handler program terminates the request directory +still exists, then it is handled by \c{brep} depending on the handler process +exit status and the upload result manifest status value. If the process has +terminated abnormally or with a non-zero exit status or the result manifest +status is in the [500-599] range (HTTP server error), then the directory is +saved for troubleshooting by appending the \c{.fail} extension to its name. +Otherwise, if the status is in the [400-499] range (HTTP client error), then +the directory is removed. If the directory is left in place by the handler or +is saved for troubleshooting, then the upload result manifest is saved as +\c{result.manifest} into this directory, next to the request manifest. + +If \c{upload-handler-timeout} is configured and the handler program does not +exit in the allotted time, then it is killed and its termination is treated as +abnormal. + +If the handler program is not specified, then the following upload result +manifest is implied: + +\ +status: 200 +message: <upload-type> upload is queued +reference: <request-id> +\ + +| + +\li|Send the upload email. + +If \c{upload-email} is configured, send an email to this address containing +the upload request manifest and the upload result manifest.| + +\li|Respond to the client. + +Respond to the client with the upload result manifest and its \c{status} value +as the HTTP status code.| + +| + +Check violations (max size, etc) that are explicitly mentioned above are +always reported with the upload result manifest. Other errors (for example, +internal server errors) might be reported with unformatted text, including +HTML. + + +\h#upload-request-manifest|Upload Request Manifest| + +The upload request manifest starts with the below values and in that order +optionally followed by additional values in the unspecified order +corresponding to the custom request parameters. + +\ +id: <request-id> +session: <session-id> +instance: <name> +archive: <name> +sha256sum: <sum> +timestamp: <date-time> + +name: <name> +version: <version> +project: <name> +target-config: <name> +package-config: <name> +target: <target-triplet> +[tenant]: <tenant-id> +toolchain-name: <name> +toolchain-version: <standard-version> +repository-name: <canonical-name> +machine-name: <name> +machine-summary: <text> +\ + +The \c{timestamp} value is in the ISO-8601 +\c{<YYYY>-<MM>-<DD>T<hh>:<mm>:<ss>Z} form (always UTC). + + +\h#upload-result-manifest|Upload Result Manifest| + +The upload result manifest starts with the below values and in that order +optionally followed by additional values if returned by the handler program. +If the upload request is successful, then the \c{reference} value must be +present and contain a string that can be used to identify this request (for +example, the upload request id). + +\ +status: <http-code> +message: <string> +[reference]: <string> +\ + " diff --git a/etc/brep-module.conf b/etc/brep-module.conf index 31e3e11..ece1a05 100644 --- a/etc/brep-module.conf +++ b/etc/brep-module.conf @@ -260,6 +260,24 @@ menu About=?about # build-db-retry 10 +# The root directory where the uploaded binary distribution artifacts are +# saved to under the following directory hierarchy: +# +# [<tenant>/]<distribution>/<os-release>/<project>/<package>/<version>/<package-config> +# +# The package configuration directory symlinks at these paths are displayed as +# web links on the package version details page. If specified, then +# bindist-url must be specified as well. +# +# bindist-root + + +# The URL of the directory specified with the bindist-root option. Must be +# specified if bindist-root is specified. +# +# bindist-url + + # The openssl program to be used for crypto operations. You can also specify # additional options that should be passed to the openssl program with # openssl-option. If the openssl program is not explicitly specified, then brep @@ -336,10 +354,9 @@ menu About=?about # The handler program to be executed on package submission. The handler is -# executed as part of the submission request and is passed additional -# arguments that can be specified with submit-handler-argument followed by -# the absolute path to the submission directory. Note that the program path -# must be absolute. +# executed as part of the HTTP request and is passed additional arguments that +# can be specified with submit-handler-argument followed by the absolute path +# to the submission directory. Note that the program path must be absolute. # # submit-handler @@ -403,6 +420,66 @@ menu About=?about # ci-handler-timeout +# The directory to save upload data to for the specified upload type. If +# unspecified, the build artifacts upload functionality will be disabled for +# this type. +# +# Note that the directory path must be absolute and the directory itself must +# exist and have read, write, and execute permissions granted to the user that +# runs the web server. +# +# upload-data <type>=<dir> + + +# The maximum size of the upload data accepted for the specified upload type. +# Note that currently the entire upload request is read into memory. The +# default is 10M. +# +# upload-max-size <type>=10485760 + + +# The build artifacts upload email. If specified, the upload request and +# result manifests will be sent to this address. +# +# upload-email <type>=<email> + + +# The handler program to be executed on build artifacts upload of the +# specified type. The handler is executed as part of the HTTP request and is +# passed additional arguments that can be specified with +# upload-handler-argument followed by the absolute path to the upload +# directory. Note that the program path must be absolute. +# +# upload-handler <type>=<path> + + +# Additional arguments to be passed to the upload handler program for the +# specified upload type (see upload-handler for details). Repeat this option +# to specify multiple arguments. +# +# upload-handler-argument <type>=<arg> + + +# The upload handler program timeout in seconds for the specified upload type. +# If specified and the handler does not exit in the allotted time, then it is +# killed and its termination is treated as abnormal. +# +# upload-handler-timeout <type>=<seconds> + + +# Disable upload of the specified type for the specified toolchain name. +# Repeat this option to disable uploads for multiple toolchains. +# +# upload-toolchain-exclude <type>=<name> + + +# Disable upload of the specified type for packages from the repository with +# the specified canonical name. Repeat this option to disable uploads for +# multiple repositories. +# +# upload-repository-exclude <type>=<name> + + # The default view to display for the global repository root. The value is one # of the supported services (packages, builds, submit, ci, etc). Default is # packages. diff --git a/etc/private/install/brep-module.conf b/etc/private/install/brep-module.conf index 832b8c1..525316d 100644 --- a/etc/private/install/brep-module.conf +++ b/etc/private/install/brep-module.conf @@ -260,6 +260,24 @@ menu About=?about # build-db-retry 10 +# The root directory where the uploaded binary distribution artifacts are +# saved to under the following directory hierarchy: +# +# [<tenant>/]<distribution>/<os-release>/<project>/<package>/<version>/<package-config> +# +# The package configuration directory symlinks at these paths are displayed as +# web links on the package version details page. If specified, then +# bindist-url must be specified as well. +# +# bindist-root + + +# The URL of the directory specified with the bindist-root option. Must be +# specified if bindist-root is specified. +# +# bindist-url + + # The openssl program to be used for crypto operations. You can also specify # additional options that should be passed to the openssl program with # openssl-option. If the openssl program is not explicitly specified, then brep @@ -338,10 +356,9 @@ submit-form /home/brep/install/share/brep/www/submit.xhtml # The handler program to be executed on package submission. The handler is -# executed as part of the submission request and is passed additional -# arguments that can be specified with submit-handler-argument followed by -# the absolute path to the submission directory. Note that the program path -# must be absolute. +# executed as part of the HTTP request and is passed additional arguments that +# can be specified with submit-handler-argument followed by the absolute path +# to the submission directory. Note that the program path must be absolute. # submit-handler /home/brep/install/bin/brep-submit-pub @@ -411,6 +428,66 @@ submit-handler-timeout 120 # ci-handler-timeout +# The directory to save upload data to for the specified upload type. If +# unspecified, the build artifacts upload functionality will be disabled for +# this type. +# +# Note that the directory path must be absolute and the directory itself must +# exist and have read, write, and execute permissions granted to the user that +# runs the web server. +# +# upload-data <type>=<dir> + + +# The maximum size of the upload data accepted for the specified upload type. +# Note that currently the entire upload request is read into memory. The +# default is 10M. +# +# upload-max-size <type>=10485760 + + +# The build artifacts upload email. If specified, the upload request and +# result manifests will be sent to this address. +# +# upload-email <type>=<email> + + +# The handler program to be executed on build artifacts upload of the +# specified type. The handler is executed as part of the HTTP request and is +# passed additional arguments that can be specified with +# upload-handler-argument followed by the absolute path to the upload +# directory. Note that the program path must be absolute. +# +# upload-handler <type>=<path> + + +# Additional arguments to be passed to the upload handler program for the +# specified upload type (see upload-handler for details). Repeat this option +# to specify multiple arguments. +# +# upload-handler-argument <type>=<arg> + + +# The upload handler program timeout in seconds for the specified upload type. +# If specified and the handler does not exit in the allotted time, then it is +# killed and its termination is treated as abnormal. +# +# upload-handler-timeout <type>=<seconds> + + +# Disable upload of the specified type for the specified toolchain name. +# Repeat this option to disable uploads for multiple toolchains. +# +# upload-toolchain-exclude <type>=<name> + + +# Disable upload of the specified type for packages from the repository with +# the specified canonical name. Repeat this option to disable uploads for +# multiple repositories. +# +# upload-repository-exclude <type>=<name> + + # The default view to display for the global repository root. The value is one # of the supported services (packages, builds, submit, ci, etc). Default is # packages. diff --git a/etc/systemd/brep-clean.service b/etc/systemd/brep-clean.service index 739a54a..d2e5630 100644 --- a/etc/systemd/brep-clean.service +++ b/etc/systemd/brep-clean.service @@ -1,5 +1,5 @@ [Unit] -Description=brep build database cleaner service +Description=brep build database and artifacts cleaner service [Service] Type=oneshot @@ -7,9 +7,12 @@ Type=oneshot #Group=brep # Run both tenants and builds cleaners if CI request functionality is enabled. +# Also run outdated build artifacts cleaners if build artifacts upload +# functionality is enabled. # #ExecStart=/home/brep/install/bin/brep-clean tenants 240 ExecStart=/home/brep/install/bin/brep-clean builds /home/brep/config/buildtab +#ExecStart=/home/brep/install/bin/brep-upload-bindist-clean /var/bindist 2880 [Install] WantedBy=default.target diff --git a/libbrep/build-extra.sql b/libbrep/build-extra.sql index 7331ab1..b1bd04f 100644 --- a/libbrep/build-extra.sql +++ b/libbrep/build-extra.sql @@ -60,6 +60,7 @@ CREATE FOREIGN TABLE build_package ( version_revision INTEGER NOT NULL, version_upstream TEXT NOT NULL, version_release TEXT NULL, + project CITEXT NOT NULL, internal_repository_tenant TEXT NULL, internal_repository_canonical_name TEXT NULL, buildable BOOLEAN NOT NULL) diff --git a/libbrep/build-package.hxx b/libbrep/build-package.hxx index 6ca8702..e9104e0 100644 --- a/libbrep/build-package.hxx +++ b/libbrep/build-package.hxx @@ -111,6 +111,8 @@ namespace brep package_id id; upstream_version version; + package_name project; + // Mapped to the package object requirements and tests members using the // PostgreSQL foreign table mechanism. // diff --git a/libbrep/build.hxx b/libbrep/build.hxx index eaceebc..717029d 100644 --- a/libbrep/build.hxx +++ b/libbrep/build.hxx @@ -28,7 +28,7 @@ // #define LIBBREP_BUILD_SCHEMA_VERSION_BASE 20 -#pragma db model version(LIBBREP_BUILD_SCHEMA_VERSION_BASE, 20, closed) +#pragma db model version(LIBBREP_BUILD_SCHEMA_VERSION_BASE, 21, closed) // We have to keep these mappings at the global scope instead of inside the // brep namespace because they need to be also effective in the bbot namespace diff --git a/libbrep/build.xml b/libbrep/build.xml index d1969f1..0374b05 100644 --- a/libbrep/build.xml +++ b/libbrep/build.xml @@ -1,4 +1,6 @@ <changelog xmlns="http://www.codesynthesis.com/xmlns/odb/changelog" database="pgsql" schema-name="build" version="1"> + <changeset version="21"/> + <model version="20"> <table name="build" kind="object"> <column name="package_tenant" type="TEXT" null="false"/> diff --git a/libbrep/package.hxx b/libbrep/package.hxx index cf6ae64..bba38b7 100644 --- a/libbrep/package.hxx +++ b/libbrep/package.hxx @@ -494,7 +494,7 @@ namespace brep // Matches the package name if the project name is not specified in // the manifest. // - package_name project; + package_name project; // Note: foreign-mapped in build. priority_type priority; string summary; diff --git a/mod/build-config-module.cxx b/mod/build-config-module.cxx index 6ad2d73..97c9f9e 100644 --- a/mod/build-config-module.cxx +++ b/mod/build-config-module.cxx @@ -108,7 +108,7 @@ namespace brep catch (const system_error& e) { ostringstream os; - os<< "unable to iterate over agents keys directory '" << d << "'"; + os << "unable to iterate over agents keys directory '" << d << "'"; throw_generic_error (e.code ().value (), os.str ().c_str ()); } diff --git a/mod/build-result-module.cxx b/mod/build-result-module.cxx new file mode 100644 index 0000000..7823e3a --- /dev/null +++ b/mod/build-result-module.cxx @@ -0,0 +1,284 @@ +// file : mod/build-result-module.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <mod/build-result-module.hxx> + +#include <libbutl/openssl.hxx> +#include <libbutl/process-io.hxx> + +namespace brep +{ + using namespace std; + using namespace butl; + + // While currently the user-defined copy constructor is not required (we + // don't need to deep copy nullptr's), it is a good idea to keep the + // placeholder ready for less trivial cases. + // + build_result_module:: + build_result_module (const build_result_module& r) + : database_module (r), + build_config_module (r), + use_openssl_pkeyutl_ (r.initialized_ ? r.use_openssl_pkeyutl_ : false) + { + } + + void build_result_module:: + init (const options::build& bo, const options::build_db& bdo) + { + HANDLER_DIAG; + + build_config_module::init (bo); + database_module::init (bdo, bdo.build_db_retry ()); + + try + { + optional<openssl_info> oi ( + openssl::info ([&trace, this] (const char* args[], size_t n) + { + l2 ([&]{trace << process_args {args, n};}); + }, + 2, + bo.openssl ())); + + use_openssl_pkeyutl_ = oi && + oi->name == "OpenSSL" && + oi->version >= semantic_version {3, 0, 0}; + } + catch (const system_error& e) + { + fail << "unable to obtain openssl version: " << e; + } + } + + build_result_module::parse_session_result build_result_module:: + parse_session (const string& s) const + { + using brep::version; // Not to confuse with module::version. + + parse_session_result r; + + size_t p (s.find ('/')); // End of tenant. + + if (p == string::npos) + throw invalid_argument ("no package name"); + + if (tenant.compare (0, tenant.size (), s, 0, p) != 0) + throw invalid_argument ("tenant mismatch"); + + size_t b (p + 1); // Start of package name. + p = s.find ('/', b); // End of package name. + + if (p == b) + throw invalid_argument ("empty package name"); + + if (p == string::npos) + throw invalid_argument ("no package version"); + + package_name name; + + try + { + name = package_name (string (s, b, p - b)); + } + catch (const invalid_argument& e) + { + throw invalid_argument ( + string ("invalid package name : ") + e.what ()); + } + + b = p + 1; // Start of version. + p = s.find ('/', b); // End of version. + + if (p == string::npos) + throw invalid_argument ("no target"); + + auto parse_version = [&s, &b, &p] (const char* what) -> version + { + // Intercept exception handling to add the parsing error attribution. + // + try + { + return brep::version (string (s, b, p - b)); + } + catch (const invalid_argument& e) + { + throw invalid_argument ( + string ("invalid ") + what + ": " + e.what ()); + } + }; + + r.package_version = parse_version ("package version"); + + b = p + 1; // Start of target. + p = s.find ('/', b); // End of target. + + if (p == string::npos) + throw invalid_argument ("no target configuration name"); + + target_triplet target; + try + { + target = target_triplet (string (s, b, p - b)); + } + catch (const invalid_argument& e) + { + throw invalid_argument (string ("invalid target: ") + e.what ()); + } + + b = p + 1; // Start of target configuration name. + p = s.find ('/', b); // End of target configuration name. + + if (p == string::npos) + throw invalid_argument ("no package configuration name"); + + string target_config (s, b, p - b); + + if (target_config.empty ()) + throw invalid_argument ("empty target configuration name"); + + b = p + 1; // Start of package configuration name. + p = s.find ('/', b); // End of package configuration name. + + if (p == string::npos) + throw invalid_argument ("no toolchain name"); + + string package_config (s, b, p - b); + + if (package_config.empty ()) + throw invalid_argument ("empty package configuration name"); + + b = p + 1; // Start of toolchain name. + p = s.find ('/', b); // End of toolchain name. + + if (p == string::npos) + throw invalid_argument ("no toolchain version"); + + string toolchain_name (s, b, p - b); + + if (toolchain_name.empty ()) + throw invalid_argument ("empty toolchain name"); + + b = p + 1; // Start of toolchain version. + p = s.find ('/', b); // End of toolchain version. + + if (p == string::npos) + throw invalid_argument ("no timestamp"); + + r.toolchain_version = parse_version ("toolchain version"); + + r.id = build_id (package_id (move (tenant), move (name), r.package_version), + move (target), + move (target_config), + move (package_config), + move (toolchain_name), + r.toolchain_version); + + try + { + size_t tsn; + string ts (s, p + 1); + + r.timestamp = timestamp (chrono::duration_cast<timestamp::duration> ( + chrono::nanoseconds (stoull (ts, &tsn)))); + + if (tsn != ts.size ()) + throw invalid_argument ("trailing junk"); + } + // Handle invalid_argument or out_of_range (both derive from logic_error), + // that can be thrown by stoull(). + // + catch (const logic_error& e) + { + throw invalid_argument (string ("invalid timestamp: ") + e.what ()); + } + + return r; + } + + bool build_result_module:: + authenticate_session (const options::build& o, + const optional<vector<char>>& challenge, + const build& b, + const string& session) const + { + HANDLER_DIAG; + + auto warn_auth = [&session, &warn] (const string& d) + { + warn << "session '" << session << "' authentication failed: " << d; + }; + + bool r (false); + + // Must both be present or absent. + // + if (!b.agent_challenge != !challenge) + { + warn_auth (challenge ? "unexpected challenge": "challenge is expected"); + } + else if (bot_agent_key_map_ == nullptr) // Authentication is disabled. + { + r = true; + } + else if (!b.agent_challenge) // Authentication is recently enabled. + { + warn_auth ("challenge is required now"); + } + else + { + assert (b.agent_fingerprint && challenge); + auto i (bot_agent_key_map_->find (*b.agent_fingerprint)); + + // The agent's key is recently replaced. + // + if (i == bot_agent_key_map_->end ()) + { + warn_auth ("agent's public key not found"); + } + else + try + { + openssl os ([&trace, this] (const char* args[], size_t n) + { + l2 ([&]{trace << process_args {args, n};}); + }, + path ("-"), fdstream_mode::text, 2, + process_env (o.openssl (), o.openssl_envvar ()), + use_openssl_pkeyutl_ ? "pkeyutl" : "rsautl", + o.openssl_option (), + use_openssl_pkeyutl_ ? "-verifyrecover" : "-verify", + "-pubin", + "-inkey", + i->second); + + for (const auto& c: *challenge) + os.out.put (c); // Sets badbit on failure. + + os.out.close (); + + string s; + getline (os.in, s); + + bool v (os.in.eof ()); + os.in.close (); + + if (os.wait () && v) + { + r = (s == *b.agent_challenge); + + if (!r) + warn_auth ("challenge mismatched"); + } + else // The signature is presumably meaningless. + warn_auth ("unable to verify challenge"); + } + catch (const system_error& e) + { + fail << "unable to verify challenge: " << e; + } + } + + return r; + } +} diff --git a/mod/build-result-module.hxx b/mod/build-result-module.hxx new file mode 100644 index 0000000..e0f1eef --- /dev/null +++ b/mod/build-result-module.hxx @@ -0,0 +1,76 @@ +// file : mod/build-result-module.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef MOD_BUILD_RESULT_MODULE_HXX +#define MOD_BUILD_RESULT_MODULE_HXX + +#include <libbrep/types.hxx> +#include <libbrep/utility.hxx> + +#include <libbrep/build.hxx> + +#include <mod/module-options.hxx> +#include <mod/database-module.hxx> +#include <mod/build-config-module.hxx> + +namespace brep +{ + // Base class for modules that handle the build task results. + // + // Specifically, it loads build controller configuration, initializes the + // build database instance, and provides utilities for parsing and + // authenticating the build task session. + // + class build_result_module: public database_module, + protected build_config_module + { + protected: + build_result_module () = default; + + // Create a shallow copy (handling instance) if initialized and a deep + // copy (context exemplar) otherwise. + // + explicit + build_result_module (const build_result_module&); + + void + init (const options::build&, const options::build_db&); + + // Parse the build task session and verify that the session matches the + // tenant. Throw invalid_argument on errors. + // + struct parse_session_result + { + build_id id; + brep::version package_version; + brep::version toolchain_version; + brep::timestamp timestamp; + }; + + parse_session_result + parse_session (const string&) const; + + // Return true if bbot agent authentication is disabled or the agent is + // recognized and challenge matches. If the session authentication fails + // (challenge is not expected, expected but doesn't match, etc), then log + // the failure reason with the warning severity and return false. + // + // Note that the session argument is used only for logging. + // + bool + authenticate_session (const options::build&, + const optional<vector<char>>& challenge, + const build&, + const string& session) const; + + protected: + // True if the openssl version is greater or equal to 3.0.0 and so pkeyutl + // needs to be used instead of rsautl. + // + // Note that openssl 3.0.0 deprecates rsautl in favor of pkeyutl. + // + bool use_openssl_pkeyutl_; + }; +} + +#endif // MOD_BUILD_RESULT_MODULE_HXX diff --git a/mod/mod-build-result.cxx b/mod/mod-build-result.cxx index 99bdc8d..7806cda 100644 --- a/mod/mod-build-result.cxx +++ b/mod/mod-build-result.cxx @@ -6,7 +6,6 @@ #include <odb/database.hxx> #include <odb/transaction.hxx> -#include <libbutl/openssl.hxx> #include <libbutl/sendmail.hxx> #include <libbutl/fdstream.hxx> #include <libbutl/process-io.hxx> @@ -38,10 +37,8 @@ using namespace odb::core; // brep::build_result:: build_result (const build_result& r) - : database_module (r), - build_config_module (r), - options_ (r.initialized_ ? r.options_ : nullptr), - use_openssl_pkeyutl_ (r.initialized_ ? r.use_openssl_pkeyutl_ : false) + : build_result_module (r), + options_ (r.initialized_ ? r.options_ : nullptr) { } @@ -53,34 +50,12 @@ init (scanner& s) options_ = make_shared<options::build_result> ( s, unknown_mode::fail, unknown_mode::fail); - database_module::init (static_cast<const options::package_db&> (*options_), - options_->package_db_retry ()); - if (options_->build_config_specified ()) { - database_module::init (static_cast<const options::build_db&> (*options_), - options_->build_db_retry ()); - - build_config_module::init (*options_); - } + build_result_module::init (*options_, *options_); - try - { - optional<openssl_info> oi ( - openssl::info ([&trace, this] (const char* args[], size_t n) - { - l2 ([&]{trace << process_args {args, n};}); - }, - 2, - options_->openssl ())); - - use_openssl_pkeyutl_ = oi && - oi->name == "OpenSSL" && - oi->version >= semantic_version {3, 0, 0}; - } - catch (const system_error& e) - { - fail << "unable to obtain openssl version: " << e; + database_module::init (static_cast<const options::package_db&> (*options_), + options_->package_db_retry ()); } if (options_->root ().empty ()) @@ -129,149 +104,23 @@ handle (request& rq, response&) throw invalid_request (400, e.what ()); } - // Parse the task response session to obtain the build id and the timestamp, - // and to make sure the session matches tenant and the result manifest's - // package name, and version. + // Parse the task response session and make sure the session matches tenant + // and the result manifest's package name, and version. // - build_id id; - timestamp session_timestamp; + parse_session_result session; + const build_id& id (session.id); try { - const string& s (rqm.session); - - size_t p (s.find ('/')); // End of tenant. - - if (p == string::npos) - throw invalid_argument ("no package name"); - - if (tenant.compare (0, tenant.size (), s, 0, p) != 0) - throw invalid_argument ("tenant mismatch"); - - size_t b (p + 1); // Start of package name. - p = s.find ('/', b); // End of package name. - - if (p == b) - throw invalid_argument ("empty package name"); - - if (p == string::npos) - throw invalid_argument ("no package version"); - - package_name& name (rqm.result.name); - { - const string& n (name.string ()); - if (n.compare (0, n.size (), s, b, p - b) != 0) - throw invalid_argument ("package name mismatch"); - } - - b = p + 1; // Start of version. - p = s.find ('/', b); // End of version. - - if (p == string::npos) - throw invalid_argument ("no configuration name"); - - auto parse_version = [&s, &b, &p] (const char* what) -> version - { - // Intercept exception handling to add the parsing error attribution. - // - try - { - return brep::version (string (s, b, p - b)); - } - catch (const invalid_argument& e) - { - throw invalid_argument (string ("invalid ") + what + ": " + e.what ()); - } - }; + // Note: also verifies that the tenant matches the session. + // + session = parse_session (rqm.session); - version package_version (parse_version ("package version")); + if (rqm.result.name != id.package.name) + throw invalid_argument ("package name mismatch"); - if (package_version != rqm.result.version) + if (rqm.result.version != session.package_version) throw invalid_argument ("package version mismatch"); - - b = p + 1; // Start of target. - p = s.find ('/', b); // End of target. - - if (p == string::npos) - throw invalid_argument ("no target configuration name"); - - target_triplet target; - try - { - target = target_triplet (string (s, b, p - b)); - } - catch (const invalid_argument& e) - { - throw invalid_argument (string ("invalid target: ") + e.what ()); - } - - b = p + 1; // Start of target configuration name. - p = s.find ('/', b); // End of target configuration name. - - if (p == string::npos) - throw invalid_argument ("no package configuration name"); - - string target_config (s, b, p - b); - - if (target_config.empty ()) - throw invalid_argument ("empty target configuration name"); - - b = p + 1; // Start of package configuration name. - p = s.find ('/', b); // End of package configuration name. - - if (p == string::npos) - throw invalid_argument ("no toolchain name"); - - string package_config (s, b, p - b); - - if (package_config.empty ()) - throw invalid_argument ("empty package configuration name"); - - b = p + 1; // Start of toolchain name. - p = s.find ('/', b); // End of toolchain name. - - if (p == string::npos) - throw invalid_argument ("no toolchain version"); - - string toolchain_name (s, b, p - b); - - if (toolchain_name.empty ()) - throw invalid_argument ("empty toolchain name"); - - b = p + 1; // Start of toolchain version. - p = s.find ('/', b); // End of toolchain version. - - if (p == string::npos) - throw invalid_argument ("no timestamp"); - - version toolchain_version (parse_version ("toolchain version")); - - id = build_id (package_id (move (tenant), move (name), package_version), - move (target), - move (target_config), - move (package_config), - move (toolchain_name), - toolchain_version); - - try - { - size_t tsn; - string ts (s, p + 1); - - session_timestamp = timestamp ( - chrono::duration_cast<timestamp::duration> ( - chrono::nanoseconds (stoull (ts, &tsn)))); - - if (tsn != ts.size ()) - throw invalid_argument ("trailing junk"); - } - // Handle invalid_argument or out_of_range (both derive from logic_error), - // that can be thrown by stoull(). - // - catch (const logic_error& e) - { - throw invalid_argument (string ("invalid timestamp: ") + e.what ()); - } } catch (const invalid_argument& e) { @@ -341,6 +190,12 @@ handle (request& rq, response&) bool build_notify (false); bool unforced (true); + // Note that if the session authentication fails (probably due to the + // authentication settings change), then we log this case with the warning + // severity and respond with the 200 HTTP code as if the challenge is + // valid. The thinking is that we shouldn't alarm a law-abaiding agent and + // shouldn't provide any information to a malicious one. + // { transaction t (build_db_->begin ()); @@ -348,251 +203,174 @@ handle (request& rq, response&) shared_ptr<build> b; if (!build_db_->query_one<package_build> ( query<package_build>::build::id == id, pb)) + { warn_expired ("no package build"); + } else if ((b = move (pb.build))->state != build_state::building) + { warn_expired ("package configuration state is " + to_string (b->state)); - else if (b->timestamp != session_timestamp) + } + else if (b->timestamp != session.timestamp) + { warn_expired ("non-matching timestamp"); - else + } + else if (authenticate_session (*options_, rqm.challenge, *b, rqm.session)) { - // Check the challenge. + // If the build is interrupted, then revert it to the original built + // state if this is a rebuild and delete it from the database otherwise. // - // If the challenge doesn't match expectations (probably due to the - // authentication settings change), then we log this case with the - // warning severity and respond with the 200 HTTP code as if the - // challenge is valid. The thinking is that we shouldn't alarm a - // law-abaiding agent and shouldn't provide any information to a - // malicious one. - // - auto warn_auth = [&rqm, &warn] (const string& d) + if (rqm.result.status == result_status::interrupt) { - warn << "session '" << rqm.session << "' authentication failed: " << d; - }; + if (b->status) // Is this a rebuild? + { + b->state = build_state::built; - bool auth (false); + // Keep the force rebuild indication. Note that the forcing state is + // only valid for the building state. + // + if (b->force == force_state::forcing) + b->force = force_state::forced; - // Must both be present or absent. - // - if (!b->agent_challenge != !rqm.challenge) - warn_auth (rqm.challenge - ? "unexpected challenge" - : "challenge is expected"); - else if (bot_agent_key_map_ == nullptr) // Authentication is disabled. - auth = true; - else if (!b->agent_challenge) // Authentication is recently enabled. - warn_auth ("challenge is required now"); - else - { - assert (b->agent_fingerprint && rqm.challenge); - auto i (bot_agent_key_map_->find (*b->agent_fingerprint)); + // Cleanup the interactive build login information. + // + b->interactive = nullopt; - // The agent's key is recently replaced. - // - if (i == bot_agent_key_map_->end ()) - warn_auth ("agent's public key not found"); - else - { - try - { - openssl os (print_args, - path ("-"), fdstream_mode::text, 2, - process_env (options_->openssl (), - options_->openssl_envvar ()), - use_openssl_pkeyutl_ ? "pkeyutl" : "rsautl", - options_->openssl_option (), - use_openssl_pkeyutl_ ? "-verifyrecover" : "-verify", - "-pubin", - "-inkey", - i->second); - - for (const auto& c: *rqm.challenge) - os.out.put (c); // Sets badbit on failure. - - os.out.close (); - - string s; - getline (os.in, s); - - bool v (os.in.eof ()); - os.in.close (); - - if (os.wait () && v) - { - auth = s == *b->agent_challenge; - - if (!auth) - warn_auth ("challenge mismatched"); - } - else // The signature is presumably meaningless. - warn_auth ("unable to verify challenge"); - } - catch (const system_error& e) - { - fail << "unable to verify challenge: " << e; - } + // Cleanup the authentication data. + // + b->agent_fingerprint = nullopt; + b->agent_challenge = nullopt; + + // Note that we are unable to restore the pre-rebuild timestamp + // since it has been overwritten when the build task was issued. + // That, however, feels ok and we just keep it unchanged. + + build_db_->update (b); } + else + build_db_->erase (b); } - - if (auth) + else { - // If the build is interrupted, then revert it to the original built - // state if this is a rebuild and delete it from the database - // otherwise. + // Verify the result status/checksums. // - if (rqm.result.status == result_status::interrupt) + // Specifically, if the result status is skip, then it can only be in + // response to the soft rebuild task (all checksums are present in the + // build object) and the result checksums must match the build object + // checksums. On verification failure respond with the bad request + // HTTP code (400). + // + if (rqm.result.status == result_status::skip) { - if (b->status) // Is this a rebuild? - { - b->state = build_state::built; + if (!b->agent_checksum || + !b->worker_checksum || + !b->dependency_checksum) + throw invalid_request (400, "unexpected skip result status"); + + // Can only be absent for initial build, in which case the + // checksums are also absent and we would end up with the above + // 400 response. + // + assert (b->status); - // Keep the force rebuild indication. Note that the forcing state - // is only valid for the building state. - // - if (b->force == force_state::forcing) - b->force = force_state::forced; + // Verify that the result checksum matches the build checksum and + // throw invalid_request(400) if that's not the case. + // + auto verify = [] (const string& build_checksum, + const optional<string>& result_checksum, + const char* what) + { + if (!result_checksum) + throw invalid_request ( + 400, + string (what) + " checksum is expected for skip result status"); + + if (*result_checksum != build_checksum) + throw invalid_request ( + 400, + string (what) + " checksum '" + build_checksum + + "' is expected instead of '" + *result_checksum + + "' for skip result status"); + }; + + verify (*b->agent_checksum, rqm.agent_checksum, "agent"); + + verify (*b->worker_checksum, + rqm.result.worker_checksum, + "worker"); + + verify (*b->dependency_checksum, + rqm.result.dependency_checksum, + "dependency"); + } - // Cleanup the interactive build login information. - // - b->interactive = nullopt; + unforced = b->force == force_state::unforced; - // Cleanup the authentication data. - // - b->agent_fingerprint = nullopt; - b->agent_challenge = nullopt; + // Don't send email to the build-email address for the + // success-to-success status change, unless the build was forced. + // + build_notify = !(rqm.result.status == result_status::success && + b->status && + *b->status == rqm.result.status && + unforced); - // Note that we are unable to restore the pre-rebuild timestamp - // since it has been overwritten when the build task was issued. - // That, however, feels ok and we just keep it unchanged. + b->state = build_state::built; + b->force = force_state::unforced; - build_db_->update (b); - } - else - build_db_->erase (b); - } - else - { - // Verify the result status/checksums. - // - // Specifically, if the result status is skip, then it can only be - // in response to the soft rebuild task (all checksums are present - // in the build object) and the result checksums must match the - // build object checksums. On verification failure respond with the - // bad request HTTP code (400). - // - if (rqm.result.status == result_status::skip) - { - if (!b->agent_checksum || - !b->worker_checksum || - !b->dependency_checksum) - throw invalid_request (400, "unexpected skip result status"); - - // Can only be absent for initial build, in which case the - // checksums are also absent and we would end up with the above - // 400 response. - // - assert (b->status); - - // Verify that the result checksum matches the build checksum and - // throw invalid_request(400) if that's not the case. - // - auto verify = [] (const string& build_checksum, - const optional<string>& result_checksum, - const char* what) - { - if (!result_checksum) - throw invalid_request ( - 400, - string (what) + - " checksum is expected for skip result status"); - - if (*result_checksum != build_checksum) - throw invalid_request ( - 400, - string (what) + " checksum '" + build_checksum + - "' is expected instead of '" + *result_checksum + - "' for skip result status"); - }; - - verify (*b->agent_checksum, rqm.agent_checksum, "agent"); - - verify (*b->worker_checksum, - rqm.result.worker_checksum, - "worker"); - - verify (*b->dependency_checksum, - rqm.result.dependency_checksum, - "dependency"); - } + // Cleanup the interactive build login information. + // + b->interactive = nullopt; - unforced = b->force == force_state::unforced; + // Cleanup the authentication data. + // + b->agent_fingerprint = nullopt; + b->agent_challenge = nullopt; - // Don't send email to the build-email address for the - // success-to-success status change, unless the build was forced. - // - build_notify = !(rqm.result.status == result_status::success && - b->status && - *b->status == rqm.result.status && - unforced); + b->timestamp = system_clock::now (); + b->soft_timestamp = b->timestamp; - b->state = build_state::built; - b->force = force_state::unforced; + // If the result status is other than skip, then save the status, + // results, and checksums and update the hard timestamp. + // + if (rqm.result.status != result_status::skip) + { + b->status = rqm.result.status; + b->hard_timestamp = b->soft_timestamp; - // Cleanup the interactive build login information. + // Mark the section as loaded, so results are updated. // - b->interactive = nullopt; + b->results_section.load (); + b->results = move (rqm.result.results); - // Cleanup the authentication data. + // Save the checksums. // - b->agent_fingerprint = nullopt; - b->agent_challenge = nullopt; + b->agent_checksum = move (rqm.agent_checksum); + b->worker_checksum = move (rqm.result.worker_checksum); + b->dependency_checksum = move (rqm.result.dependency_checksum); + } - b->timestamp = system_clock::now (); - b->soft_timestamp = b->timestamp; + build_db_->update (b); - // If the result status is other than skip, then save the status, - // results, and checksums and update the hard timestamp. - // - if (rqm.result.status != result_status::skip) - { - b->status = rqm.result.status; - b->hard_timestamp = b->soft_timestamp; - - // Mark the section as loaded, so results are updated. - // - b->results_section.load (); - b->results = move (rqm.result.results); - - // Save the checksums. - // - b->agent_checksum = move (rqm.agent_checksum); - b->worker_checksum = move (rqm.result.worker_checksum); - b->dependency_checksum = move (rqm.result.dependency_checksum); - } - - build_db_->update (b); + // Don't send the build notification email if the task result is + // `skip`, the configuration is hidden, or is now excluded by the + // package. + // + if (rqm.result.status != result_status::skip && belongs (*tc, "all")) + { + shared_ptr<build_package> p ( + build_db_->load<build_package> (b->id.package)); - // Don't send the build notification email if the task result is - // `skip`, the configuration is hidden, or is now excluded by the - // package. + // The package configuration should be present (see mod-builds.cxx + // for details) but if it is not, let's log the warning. // - if (rqm.result.status != result_status::skip && belongs (*tc, "all")) + if (const build_package_config* pc = find (b->package_config_name, + p->configs)) { - shared_ptr<build_package> p ( - build_db_->load<build_package> (b->id.package)); - - // The package configuration should be present (see mod-builds.cxx - // for details) but if it is not, let's log the warning. - // - if (const build_package_config* pc = find (b->package_config_name, - p->configs)) - { - if (!exclude (*pc, p->builds, p->constraints, *tc)) - bld = move (b); - } - else - warn << "cannot find configuration '" << b->package_config_name - << "' for package " << p->id.name << '/' << p->version; + if (!exclude (*pc, p->builds, p->constraints, *tc)) + bld = move (b); } + else + warn << "cannot find configuration '" << b->package_config_name + << "' for package " << p->id.name << '/' << p->version; } } } diff --git a/mod/mod-build-result.hxx b/mod/mod-build-result.hxx index 1b32ad4..87ef1f2 100644 --- a/mod/mod-build-result.hxx +++ b/mod/mod-build-result.hxx @@ -8,12 +8,11 @@ #include <libbrep/utility.hxx> #include <mod/module-options.hxx> -#include <mod/database-module.hxx> -#include <mod/build-config-module.hxx> +#include <mod/build-result-module.hxx> namespace brep { - class build_result: public database_module, private build_config_module + class build_result: public build_result_module { public: build_result () = default; @@ -36,13 +35,6 @@ namespace brep private: shared_ptr<options::build_result> options_; - - // True if the openssl version is greater or equal to 3.0.0 and so pkeyutl - // needs to be used instead of rsautl. - // - // Note that openssl 3.0.0 deprecates rsautl in favor of pkeyutl. - // - bool use_openssl_pkeyutl_; }; } diff --git a/mod/mod-build-task.cxx b/mod/mod-build-task.cxx index 33a7f58..4b0165f 100644 --- a/mod/mod-build-task.cxx +++ b/mod/mod-build-task.cxx @@ -286,15 +286,14 @@ handle (request& rq, response& rs) b->toolchain_version.string () + '/' + to_string (ts)); - string result_url (options_->host () + - tenant_dir (options_->root (), b->tenant).string () + - "?build-result"); + string tenant (tenant_dir (options_->root (), b->tenant).string ()); + string result_url (options_->host () + tenant + "?build-result"); assert (transaction::has_current ()); assert (p->internal ()); // The package is expected to be buildable. - lazy_shared_ptr<build_repository> r (p->internal_repository.load ()); + shared_ptr<build_repository> r (p->internal_repository.load ()); strings fps; if (r->certificate_fingerprint) @@ -369,9 +368,43 @@ handle (request& rq, response& rs) move (t->interactive), move (b->worker_checksum)); + // Collect the build artifacts upload URLs, skipping those which are + // excluded with the upload-*-exclude configuration options. + // + vector<upload_url> upload_urls; + + for (const auto& ud: options_->upload_data ()) + { + const string& t (ud.first); + + auto exclude = [&t] (const multimap<string, string>& mm, + const string& v) + { + auto range (mm.equal_range (t)); + + for (auto i (range.first); i != range.second; ++i) + { + if (i->second == v) + return true; + } + + return false; + }; + + if (!exclude (options_->upload_toolchain_exclude (), + b->toolchain_name) && + !exclude (options_->upload_repository_exclude (), + r->canonical_name)) + { + upload_urls.emplace_back (options_->host () + tenant + "?upload=" + t, + t); + } + } + return task_response_manifest (move (session), move (b->agent_challenge), move (result_url), + move (upload_urls), move (b->agent_checksum), move (task)); }; diff --git a/mod/mod-ci.cxx b/mod/mod-ci.cxx index 68bb9be..df2365a 100644 --- a/mod/mod-ci.cxx +++ b/mod/mod-ci.cxx @@ -498,10 +498,6 @@ handle (request& rq, response& rs) // it's the 4XX (HTTP client error) status value, then we remove the // directory. // - // Note that leaving the directory in place in case of a submission error - // would have prevent the user from re-submitting until we research the - // issue and manually remove the directory. - // auto stash_submit_dir = [&dd, error] () { if (dir_exists (dd)) @@ -520,7 +516,7 @@ handle (request& rq, response& rs) // Run the submission handler, if specified, reading the result manifest // from its stdout and caching it as a name/value pair list for later use - // (forwarding to the client, sending via email, etc.). Otherwise, create + // (forwarding to the client, sending via email, etc). Otherwise, create // implied result manifest. // status_code sc; @@ -598,8 +594,10 @@ handle (request& rq, response& rs) // Remove the directory if the client error is detected. // if (sc >= 400 && sc < 500) + { rmdir_r (dd); - + } + // // Otherwise, save the result manifest, into the directory. Also stash the // directory for troubleshooting in case of the server error. // diff --git a/mod/mod-package-version-details.cxx b/mod/mod-package-version-details.cxx index 3a6ef1f..e47d9b4 100644 --- a/mod/mod-package-version-details.cxx +++ b/mod/mod-package-version-details.cxx @@ -9,6 +9,8 @@ #include <odb/database.hxx> #include <odb/transaction.hxx> +#include <libbutl/filesystem.hxx> // dir_iterator, dir_entry + #include <web/server/module.hxx> #include <web/server/mime-url-encoding.hxx> @@ -47,6 +49,12 @@ init (scanner& s) options_ = make_shared<options::package_version_details> ( s, unknown_mode::fail, unknown_mode::fail); + // Verify that the bindist-url option is specified when necessary. + // + if (options_->bindist_root_specified () && + !options_->bindist_url_specified ()) + fail << "bindist-url must be specified if bindist-root is specified"; + database_module::init (static_cast<const options::package_db&> (*options_), options_->package_db_retry ()); @@ -187,8 +195,8 @@ handle (request& rq, response& rs) const string what (title + " description"); s << (full - ? DIV_TEXT (*d, * - pkg->description_type, + ? DIV_TEXT (*d, + *pkg->description_type, true /* strip_title */, id, what, @@ -526,6 +534,168 @@ handle (request& rq, response& rs) t.commit (); + // Display the binary distribution packages for this tenant, package, and + // version, if present. Print the archive distributions last. + // + if (options_->bindist_root_specified ()) + { + // Collect all the available package configurations by iterating over the + // <distribution> and <os-release> subdirectories and the <package-config> + // symlinks in the following filesystem hierarchy: + // + // [<tenant>/]<distribution>/<os-release>/<project>/<package>/<version>/<package-config> + // + // Note that it is possible that new directories and symlinks are created + // and/or removed while we iterate over the filesystem entries in the + // above hierarchy, which may result with system_error exceptions. If that + // happens, we just ignore such exceptions, trying to collect what we can. + // + const dir_path& br (options_->bindist_root ()); + + dir_path d (br); + + if (!tenant.empty ()) + d /= tenant; + + // Note that distribution and os_release are simple paths and the + // config_symlink and config_dir are relative to the bindist root + // directory. + // + struct bindist_config + { + dir_path distribution; // debian, fedora, archive + dir_path os_release; // fedora37, windows10 + path symlink; // .../default, .../release + dir_path directory; // .../default-2023-05-11T10:13:43Z + + bool + operator< (const bindist_config& v) + { + if (int r = distribution.compare (v.distribution)) + return distribution.string () == "archive" ? false : + v.distribution.string () == "archive" ? true : + r < 0; + + if (int r = os_release.compare (v.os_release)) + return r < 0; + + return symlink < v.symlink; + } + }; + + vector<bindist_config> configs; + + if (dir_exists (d)) + try + { + for (const dir_entry& de: dir_iterator (d, dir_iterator::ignore_dangling)) + { + if (de.type () != entry_type::directory) + continue; + + // Distribution directory. + // + dir_path dd (path_cast<dir_path> (de.path ())); + + try + { + dir_path fdd (d / dd); + + for (const dir_entry& re: + dir_iterator (fdd, dir_iterator::ignore_dangling)) + { + if (re.type () != entry_type::directory) + continue; + + // OS release directory. + // + dir_path rd (path_cast<dir_path> (re.path ())); + + // Package version directory. + // + dir_path vd (fdd / + rd / + dir_path (pkg->project.string ()) / + dir_path (pn.string ()) / + dir_path (sver)); + + try + { + for (const dir_entry& ce: + dir_iterator (vd, dir_iterator::ignore_dangling)) + { + if (ce.ltype () != entry_type::symlink) + continue; + + // Skip symlinks which have extensions. Note that upload + // handlers may add an extension to a newly created symlink to + // atomically replace an old symlink with the new one. + // + const path& cl (ce.path ()); + if (cl.extension_cstring () != nullptr) + continue; + + try + { + path fcl (vd / cl); + dir_path cd (path_cast<dir_path> (followsymlink (fcl))); + + if (cd.sub (br)) + configs.push_back ( + bindist_config {dd, rd, fcl.leaf (br), cd.leaf (br)}); + } + catch (const system_error&) {} + } + } + catch (const system_error&) {} + } + } + catch (const system_error&) {} + } + } + catch (const system_error&) {} + + // Sort and print collected package configurations, if any. + // + if (!configs.empty ()) + { + sort (configs.begin (), configs.end ()); + + s << H3 << "Binaries" << ~H3 + << TABLE(ID="binaries") + << TBODY; + + for (const bindist_config& c: configs) + { + s << TR(CLASS="binaries") + << TD << SPAN(CLASS="value") << c.distribution << ~SPAN << ~TD + << TD << SPAN(CLASS="value") << c.os_release << ~SPAN << ~TD + << TD + << SPAN(CLASS="value") + << A + << HREF + << options_->bindist_url () << '/' << c.symlink + << ~HREF + << c.symlink.leaf () + << ~A + << " (" + << A + << HREF + << options_->bindist_url () << '/' << c.directory + << ~HREF + << "snapshot" + << ~A + << ")" + << ~SPAN + << ~TD + << ~TR; + } + + s << ~TBODY + << ~TABLE; + } + } + if (builds) { s << H3 << "Builds" << ~H3 diff --git a/mod/mod-repository-root.cxx b/mod/mod-repository-root.cxx index f00e80e..1b18996 100644 --- a/mod/mod-repository-root.cxx +++ b/mod/mod-repository-root.cxx @@ -16,6 +16,7 @@ #include <mod/mod-ci.hxx> #include <mod/mod-submit.hxx> +#include <mod/mod-upload.hxx> #include <mod/mod-builds.hxx> #include <mod/mod-packages.hxx> #include <mod/mod-build-log.hxx> @@ -118,7 +119,8 @@ namespace brep builds_ (make_shared<builds> ()), build_configs_ (make_shared<build_configs> ()), submit_ (make_shared<submit> ()), - ci_ (make_shared<ci> ()) + ci_ (make_shared<ci> ()), + upload_ (make_shared<upload> ()) { } @@ -178,6 +180,10 @@ namespace brep r.initialized_ ? r.ci_ : make_shared<ci> (*r.ci_)), + upload_ ( + r.initialized_ + ? r.upload_ + : make_shared<upload> (*r.upload_)), options_ ( r.initialized_ ? r.options_ @@ -204,6 +210,7 @@ namespace brep append (r, build_configs_->options ()); append (r, submit_->options ()); append (r, ci_->options ()); + append (r, upload_->options ()); return r; } @@ -249,6 +256,7 @@ namespace brep sub_init (*build_configs_, "build_configs"); sub_init (*submit_, "submit"); sub_init (*ci_, "ci"); + sub_init (*upload_, "upload"); // Parse own configuration options. // @@ -444,6 +452,13 @@ namespace brep return handle ("ci", param); } + else if (func == "upload") + { + if (handler_ == nullptr) + handler_.reset (new upload (*upload_)); + + return handle ("upload", param); + } else return nullopt; }; diff --git a/mod/mod-repository-root.hxx b/mod/mod-repository-root.hxx index 9e28797..4f40c94 100644 --- a/mod/mod-repository-root.hxx +++ b/mod/mod-repository-root.hxx @@ -24,6 +24,7 @@ namespace brep class build_configs; class submit; class ci; + class upload; class repository_root: public handler { @@ -70,6 +71,7 @@ namespace brep shared_ptr<build_configs> build_configs_; shared_ptr<submit> submit_; shared_ptr<ci> ci_; + shared_ptr<upload> upload_; shared_ptr<options::repository_root> options_; diff --git a/mod/mod-submit.cxx b/mod/mod-submit.cxx index b73c96e..5ee358a 100644 --- a/mod/mod-submit.cxx +++ b/mod/mod-submit.cxx @@ -163,7 +163,7 @@ handle (request& rq, response& rs) if (!options_->submit_data_specified ()) return respond_manifest (404, "submission disabled"); - // Parse the request form data and verifying the submission size limit. + // Parse the request form data and verify the submission size limit. // // Note that if it is exceeded, then there are parameters and this is the // submission rather than the form request, and so we respond with the @@ -292,8 +292,8 @@ handle (request& rq, response& rs) // However, using the abbreviated checksum can be helpful for // troubleshooting. // - td = dir_path (options_->submit_temp () / - dir_path (path::traits_type::temp_name (ref))); + td = options_->submit_temp () / + dir_path (path::traits_type::temp_name (ref)); // It's highly unlikely but still possible that the temporary directory // already exists. This can only happen due to the unclean web server @@ -553,7 +553,7 @@ handle (request& rq, response& rs) // Run the submission handler, if specified, reading the result manifest // from its stdout and caching it as a name/value pair list for later use - // (forwarding to the client, sending via email, etc.). Otherwise, create + // (forwarding to the client, sending via email, etc). Otherwise, create // implied result manifest. // status_code sc; diff --git a/mod/mod-upload.cxx b/mod/mod-upload.cxx new file mode 100644 index 0000000..1474363 --- /dev/null +++ b/mod/mod-upload.cxx @@ -0,0 +1,766 @@ +// file : mod/mod-upload.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <mod/mod-upload.hxx> + +#include <odb/database.hxx> +#include <odb/transaction.hxx> + +#include <libbutl/uuid.hxx> +#include <libbutl/base64.hxx> +#include <libbutl/sha256.hxx> +#include <libbutl/sendmail.hxx> +#include <libbutl/fdstream.hxx> +#include <libbutl/timestamp.hxx> +#include <libbutl/filesystem.hxx> +#include <libbutl/process-io.hxx> // operator<<(ostream, process_args) +#include <libbutl/manifest-types.hxx> +#include <libbutl/manifest-serializer.hxx> + +#include <web/server/module.hxx> + +#include <libbrep/build.hxx> +#include <libbrep/build-odb.hxx> +#include <libbrep/build-package.hxx> +#include <libbrep/build-package-odb.hxx> + +#include <mod/module-options.hxx> +#include <mod/external-handler.hxx> + +using namespace std; +using namespace butl; +using namespace brep::cli; +using namespace odb::core; + +// While currently the user-defined copy constructor is not required (we don't +// need to deep copy nullptr's), it is a good idea to keep the placeholder +// ready for less trivial cases. +// +brep::upload:: +upload (const upload& r) + : build_result_module (r), + options_ (r.initialized_ ? r.options_ : nullptr) +{ +} + +void brep::upload:: +init (scanner& s) +{ + HANDLER_DIAG; + + options_ = make_shared<options::upload> ( + s, unknown_mode::fail, unknown_mode::fail); + + // Verify that the upload handling is setup properly, if configured. + // + for (const auto& ud: options_->upload_data ()) + { + const string& t (ud.first); + + if (t.empty ()) + fail << "empty upload type in upload-data configuration option"; + + if (ud.second.relative ()) + fail << t << " upload-data path '" << ud.second << "' is relative"; + + if (!dir_exists (ud.second)) + fail << t << " upload-data directory '" << ud.second + << "' does not exist"; + + const map<string, path>& uh (options_->upload_handler ()); + auto i (uh.find (t)); + + if (i != uh.end () && i->second.relative ()) + fail << t << " upload-handler path '" << i->second << "' is relative"; + } + + if (options_->upload_data_specified ()) + { + if (!options_->build_config_specified ()) + fail << "upload functionality is enabled but package building " + << "functionality is disabled"; + + build_result_module::init (*options_, *options_); + } + + if (options_->root ().empty ()) + options_->root (dir_path ("/")); +} + +bool brep::upload:: +handle (request& rq, response& rs) +{ + using brep::version; // Not to confuse with module::version. + + using serializer = manifest_serializer; + using serialization = manifest_serialization; + + HANDLER_DIAG; + + // We will respond with the manifest to the upload protocol violations and + // with a plain text message on the internal errors. In the latter case we + // will always respond with the same neutral message for security reason, + // logging the error details. Note that descriptions of exceptions caught by + // the web server are returned to the client (see web/module.hxx for + // details), and we want to avoid this when there is a danger of exposing + // sensitive data. + // + // Also we will pass through exceptions thrown by the underlying API, unless + // we need to handle them or add details for the description, in which case + // we will fallback to one of the above mentioned response methods. + // + // Note that both respond_manifest() and respond_error() are normally called + // right before the end of the request handling. They both always return + // true to allow bailing out with a single line, for example: + // + // return respond_error (); // Request is handled with an error. + // + string request_id; // Will be set later. + auto respond_manifest = [&rs, &request_id] (status_code status, + const string& message) -> bool + { + serializer s (rs.content (status, "text/manifest;charset=utf-8"), + "response"); + + s.next ("", "1"); // Start of manifest. + s.next ("status", to_string (status)); + s.next ("message", message); + + if (!request_id.empty ()) + s.next ("reference", request_id); + + s.next ("", ""); // End of manifest. + return true; + }; + + auto respond_error = [&rs] (status_code status = 500) -> bool + { + rs.content (status, "text/plain;charset=utf-8") + << "upload handling failed" << endl; + + return true; + }; + + // Check if the upload functionality is enabled. + // + // Note that this is not an upload protocol violation but it feels right to + // respond with the manifest, to help the client a bit. + // + if (!options_->upload_data_specified ()) + return respond_manifest (404, "upload disabled"); + + // Parse the request data and verify the upload size limit. + // + // Note that the size limit is upload type-specific. Thus, first, we need to + // determine the upload type which we expect to be specified in the URL as a + // value of the upload parameter. + // + string type; + dir_path dir; + + try + { + name_value_scanner s (rq.parameters (0 /* limit */, true /* url_only */)); + + // We only expect the upload=<type> parameter in URL. + // + params::upload params ( + params::upload (s, unknown_mode::fail, unknown_mode::fail)); + + type = move (params.type ()); + + if (type.empty ()) + return respond_manifest (400, "upload type expected"); + + // Check if this upload type is enabled. While at it, cache the upload + // data directory path. + // + const map<string, dir_path>& ud (options_->upload_data ()); + auto i (ud.find (type)); + + if (i == ud.end ()) + return respond_manifest (404, type + " upload disabled"); + + dir = i->second; + } + catch (const cli::exception&) + { + return respond_manifest (400, "invalid parameter"); + } + + try + { + const map<string, size_t>& us (options_->upload_max_size ()); + auto i (us.find (type)); + rq.parameters (i != us.end () ? i->second : 10485760); // 10M by default. + } + catch (const invalid_request& e) + { + if (e.status == 413) // Payload too large? + return respond_manifest (e.status, type + " upload size exceeds limit"); + + throw; + } + + // The request parameters are now parsed and the limit doesn't really matter. + // + const name_values& rps (rq.parameters (0 /* limit */)); + + // Verify the upload parameters we expect. The unknown ones will be + // serialized to the upload manifest. + // + params::upload params; + + try + { + name_value_scanner s (rps); + params = params::upload (s, unknown_mode::skip, unknown_mode::skip); + } + catch (const cli::exception&) + { + return respond_manifest (400, "invalid parameter"); + } + + const string& session (params.session ()); + const string& instance (params.instance ()); + const string& archive (params.archive ()); + const string& sha256sum (params.sha256sum ()); + + if (session.empty ()) + return respond_manifest (400, "upload session expected"); + + optional<vector<char>> challenge; + + if (params.challenge_specified ()) + try + { + challenge = base64_decode (params.challenge ()); + } + catch (const invalid_argument&) + { + return respond_manifest (400, "invalid challenge"); + } + + if (instance.empty ()) + return respond_manifest (400, "upload instance expected"); + + if (archive.empty ()) + return respond_manifest (400, "upload archive expected"); + + if (sha256sum.empty ()) + return respond_manifest (400, "upload archive checksum expected"); + + if (sha256sum.size () != 64) + return respond_manifest (400, "invalid upload archive checksum"); + + // Verify that unknown parameter values satisfy the requirements (contain + // only UTF-8 encoded graphic characters plus '\t', '\r', and '\n'). + // + // Actually, the expected ones must satisfy too, so check them as well. + // + string what; + for (const name_value& nv: rps) + { + if (nv.value && + !utf8 (*nv.value, what, codepoint_types::graphic, U"\n\r\t")) + return respond_manifest (400, + "invalid parameter " + nv.name + ": " + what); + } + + parse_session_result sess; + + try + { + sess = parse_session (session); + } + catch (const invalid_argument& e) + { + return respond_manifest (400, string ("invalid session: ") + e.what ()); + } + + // If the session expired (no such configuration, etc) then, similar to the + // build result module, we log this case with the warning severity and + // respond with manifest with the 200 status as if the session is valid (see + // the build result module for the reasoning). + // + auto warn_expired = [&session, &warn] (const string& d) + { + warn << "session '" << session << "' expired: " << d; + }; + + const build_id& id (sess.id); + + // Make sure the build configuration still exists. + // + const build_target_config* tc; + { + auto i (target_conf_map_->find ( + build_target_config_id {id.target, id.target_config_name})); + + if (i == target_conf_map_->end ()) + { + warn_expired ("no build configuration"); + return respond_manifest (200, type + " upload is queued"); + } + + tc = i->second; + } + + // Note that if the session authentication fails (probably due to the + // authentication settings change), then we log this case with the warning + // severity and respond with manifest with the 200 status as if the + // challenge is valid (see the build result module for the reasoning). + // + shared_ptr<build> bld; + shared_ptr<build_package> pkg; + shared_ptr<build_repository> rep; + { + transaction t (build_db_->begin ()); + + package_build pb; + shared_ptr<build> b; + if (!build_db_->query_one<package_build> ( + query<package_build>::build::id == id, pb)) + { + warn_expired ("no package build"); + } + else if ((b = move (pb.build))->state != build_state::building) + { + warn_expired ("package configuration state is " + to_string (b->state)); + } + else if (b->timestamp != sess.timestamp) + { + warn_expired ("non-matching timestamp"); + } + else if (authenticate_session (*options_, challenge, *b, session)) + { + bld = move (b); + pkg = build_db_->load<build_package> (id.package); + rep = pkg->internal_repository.load (); + } + + t.commit (); + } + + // Note that from now on the result manifest we respond with will contain + // the reference value. + // + try + { + request_id = uuid::generate ().string (); + } + catch (const system_error& e) + { + error << "unable to generate request id: " << e; + return respond_error (); + } + + if (bld == nullptr) + return respond_manifest (200, type + " upload is queued"); + + // Create the upload data directory. + // + dir_path dd (dir / dir_path (request_id)); + + try + { + // It's highly unlikely but still possible that the directory already + // exists. This can only happen if the generated uuid is not unique. + // + if (try_mkdir (dd) == mkdir_status::already_exists) + throw_generic_error (EEXIST); + } + catch (const system_error& e) + { + error << "unable to create directory '" << dd << "': " << e; + return respond_error (); + } + + auto_rmdir ddr (dd); + + // Save the package archive into the temporary directory and verify its + // checksum. + // + // Note that the archive file name can potentially contain directory path in + // the POSIX form, so let's strip it if that's the case. + // + path a; + path af; + + try + { + size_t n (archive.find_last_of ('/')); + a = path (n != string::npos ? string (archive, n + 1) : archive); + af = dd / a; + } + catch (const invalid_path&) + { + return respond_manifest (400, "invalid package archive name"); + } + + try + { + istream& is (rq.open_upload ("archive")); + + // Note that istream::read() sets failbit if unable to read the requested + // number of bytes. + // + is.exceptions (istream::badbit); + + sha256 sha; + char buf[8192]; + ofdstream os (af, fdopen_mode::binary); + + while (!eof (is)) + { + is.read (buf, sizeof (buf)); + + if (size_t n = is.gcount ()) + { + sha.append (buf, n); + os.write (buf, n); + } + } + + os.close (); + + // Respond with the unprocessable entity (422) code for the archive + // checksum mismatch. + // + if (sha.string () != sha256sum) + return respond_manifest (422, "upload archive checksum mismatch"); + } + // Note that invalid_argument (thrown by open_upload() function call) can + // mean both no archive upload or multiple archive uploads. + // + catch (const invalid_argument&) + { + return respond_manifest (400, "archive upload expected"); + } + catch (const io_error& e) + { + error << "unable to write package archive '" << af << "': " << e; + return respond_error (); + } + + // Serialize the upload request manifest to a stream. On the serialization + // error respond to the client with the manifest containing the bad request + // (400) code and return false, on the stream error pass through the + // io_error exception, otherwise return true. + // + timestamp ts (system_clock::now ()); + + auto rqm = [&request_id, + &ts, + &rps, + &session, + &instance, + &a, + &sha256sum, + &id, + &bld, + &pkg, + &rep, + &tc, + &sess, + &respond_manifest, + this] (ostream& os, bool long_lines = false) -> bool + { + try + { + serializer s (os, "request", long_lines); + + // Serialize the upload manifest header. + // + s.next ("", "1"); // Start of manifest. + s.next ("id", request_id); + s.next ("session", session); + s.next ("instance", instance); + s.next ("archive", a.string ()); + s.next ("sha256sum", sha256sum); + + s.next ("timestamp", + butl::to_string (ts, + "%Y-%m-%dT%H:%M:%SZ", + false /* special */, + false /* local */)); + + s.next ("name", id.package.name.string ()); + s.next ("version", pkg->version.string ()); + s.next ("project", pkg->project.string ()); + s.next ("target-config", tc->name); + s.next ("package-config", id.package_config_name); + s.next ("target", tc->target.string ()); + + if (!tenant.empty ()) + s.next ("tenant", tenant); + + s.next ("toolchain-name", id.toolchain_name); + s.next ("toolchain-version", sess.toolchain_version.string ()); + s.next ("repository-name", rep->canonical_name); + + s.next ("machine-name", bld->machine); + s.next ("machine-summary", bld->machine_summary); + + // Serialize the request parameters. + // + // Note that the serializer constraints the parameter names (can't start + // with '#', can't contain ':' and the whitespaces, etc.). + // + for (const name_value& nv: rps) + { + // Note that the upload parameter is renamed to '_' by the root + // handler (see the request_proxy class for details). + // + const string& n (nv.name); + if (n != "_" && + n != "session" && + n != "challenge" && + n != "instance" && + n != "archive" && + n != "sha256sum") + s.next (n, nv.value ? *nv.value : ""); + } + + s.next ("", ""); // End of manifest. + return true; + } + catch (const serialization& e) + { + respond_manifest (400, string ("invalid parameter: ") + e.what ()); + return false; + } + }; + + // Serialize the upload request manifest to the upload directory. + // + path rqf (dd / "request.manifest"); + + try + { + ofdstream os (rqf); + bool r (rqm (os)); + os.close (); + + if (!r) + return true; // The client is already responded with the manifest. + } + catch (const io_error& e) + { + error << "unable to write to '" << rqf << "': " << e; + return respond_error (); + } + + // Given that the upload data is now successfully persisted we are no longer + // in charge of removing it, except for the cases when the upload + // handler terminates with an error (see below for details). + // + ddr.cancel (); + + // If the handler terminates with non-zero exit status or specifies 5XX + // (HTTP server error) upload result manifest status value, then we stash + // the upload data directory for troubleshooting. Otherwise, if it's the 4XX + // (HTTP client error) status value, then we remove the directory. + // + auto stash_upload_dir = [&dd, error] () + { + if (dir_exists (dd)) + try + { + mvdir (dd, dir_path (dd + ".fail")); + } + catch (const system_error& e) + { + // Not much we can do here. Let's just log the issue and bail out + // leaving the directory in place. + // + error << "unable to rename directory '" << dd << "': " << e; + } + }; + + // Run the upload handler, if specified, reading the result manifest from + // its stdout and caching it as a name/value pair list for later use + // (forwarding to the client, sending via email, etc). Otherwise, create + // implied result manifest. + // + status_code sc; + vector<manifest_name_value> rvs; + + const map<string, path>& uh (options_->upload_handler ()); + auto hi (uh.find (type)); + + if (hi != uh.end ()) + { + auto range (options_->upload_handler_argument ().equal_range (type)); + + strings args; + for (auto i (range.first); i != range.second; ++i) + args.push_back (i->second); + + const map<string, size_t>& ht (options_->upload_handler_timeout ()); + auto i (ht.find (type)); + + optional<external_handler::result_manifest> r ( + external_handler::run (hi->second, + args, + dd, + i != ht.end () ? i->second : 0, + error, + warn, + verb_ ? &trace : nullptr)); + + if (!r) + { + stash_upload_dir (); + return respond_error (); // The diagnostics is already issued. + } + + sc = r->status; + rvs = move (r->values); + } + else // Create the implied result manifest. + { + sc = 200; + + auto add = [&rvs] (string n, string v) + { + manifest_name_value nv { + move (n), move (v), + 0 /* name_line */, 0 /* name_column */, + 0 /* value_line */, 0 /* value_column */, + 0 /* start_pos */, 0 /* colon_pos */, 0 /* end_pos */}; + + rvs.emplace_back (move (nv)); + }; + + add ("status", "200"); + add ("message", type + " upload is queued"); + add ("reference", request_id); + } + + assert (!rvs.empty ()); // Produced by the handler or is implied. + + // Serialize the upload result manifest to a stream. On the serialization + // error log the error description and return false, on the stream error + // pass through the io_error exception, otherwise return true. + // + auto rsm = [&rvs, + &error, + &request_id, + &type] (ostream& os, bool long_lines = false) -> bool + { + try + { + serializer s (os, "result", long_lines); + serialize_manifest (s, rvs); + return true; + } + catch (const serialization& e) + { + error << "ref " << request_id << ": unable to serialize " << type + << " upload handler's output: " << e; + return false; + } + }; + + // If the upload data directory still exists then perform an appropriate + // action on it, depending on the upload result status. Note that the + // handler could move or remove the directory. + // + if (dir_exists (dd)) + { + // Remove the directory if the client error is detected. + // + if (sc >= 400 && sc < 500) + { + rmdir_r (dd); + } + // + // Otherwise, save the result manifest, into the directory. Also stash the + // directory for troubleshooting in case of the server error. + // + else + { + path rsf (dd / "result.manifest"); + + try + { + ofdstream os (rsf); + + // Not being able to stash the result manifest is not a reason to + // claim the upload failed. The error is logged nevertheless. + // + rsm (os); + + os.close (); + } + catch (const io_error& e) + { + // Not fatal (see above). + // + error << "unable to write to '" << rsf << "': " << e; + } + + if (sc >= 500 && sc < 600) + stash_upload_dir (); + } + } + + // Send email, if configured. Use the long lines manifest serialization mode + // for the convenience of copying/clicking URLs they contain. + // + // Note that we don't consider the email sending failure to be an upload + // failure as the upload data is successfully persisted and the handler is + // successfully executed, if configured. One can argue that email can be + // essential for the upload processing and missing it would result in the + // incomplete upload. In this case it's natural to assume that the web + // server error log is monitored and the email sending failure will be + // noticed. + // + const map<string, string>& ue (options_->upload_email ()); + auto ei (ue.find (type)); + + if (ei != ue.end ()) + try + { + // Redirect the diagnostics to the web server error log. + // + sendmail sm ([&trace, this] (const char* args[], size_t n) + { + l2 ([&]{trace << process_args {args, n};}); + }, + 2 /* stderr */, + options_->email (), + type + " upload (" + request_id + ')', + {ei->second}); + + // Write the upload request manifest. + // + bool r (rqm (sm.out, true /* long_lines */)); + assert (r); // The serialization succeeded once, so can't fail now. + + // Write the upload result manifest. + // + sm.out << "\n\n"; + + // We don't care about the result (see above). + // + rsm (sm.out, true /* long_lines */); + + sm.out.close (); + + if (!sm.wait ()) + error << "sendmail " << *sm.exit; + } + // Handle process_error and io_error (both derive from system_error). + // + catch (const system_error& e) + { + error << "sendmail error: " << e; + } + + if (!rsm (rs.content (sc, "text/manifest;charset=utf-8"))) + return respond_error (); // The error description is already logged. + + return true; +} diff --git a/mod/mod-upload.hxx b/mod/mod-upload.hxx new file mode 100644 index 0000000..6cc723b --- /dev/null +++ b/mod/mod-upload.hxx @@ -0,0 +1,41 @@ +// file : mod/mod-upload.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef MOD_MOD_UPLOAD_HXX +#define MOD_MOD_UPLOAD_HXX + +#include <libbrep/types.hxx> +#include <libbrep/utility.hxx> + +#include <mod/module-options.hxx> +#include <mod/build-result-module.hxx> + +namespace brep +{ + class upload: public build_result_module + { + public: + upload () = default; + + // Create a shallow copy (handling instance) if initialized and a deep + // copy (context exemplar) otherwise. + // + explicit + upload (const upload&); + + virtual bool + handle (request&, response&); + + virtual const cli::options& + cli_options () const {return options::upload::description ();} + + private: + virtual void + init (cli::scanner&); + + private: + shared_ptr<options::upload> options_; + }; +} + +#endif // MOD_MOD_UPLOAD_HXX diff --git a/mod/module.cli b/mod/module.cli index 3d4ea00..4c10826 100644 --- a/mod/module.cli +++ b/mod/module.cli @@ -343,6 +343,82 @@ namespace brep } }; + class build_upload + { + std::map<string, dir_path> upload-data + { + "<type>=<dir>", + "The directory to save upload data to for the specified upload type. + If unspecified, the build artifacts upload functionality will be + disabled for this type. See \l{brep The \cb{build2} Repository + Interface Manual} for more information on build artifacts upload. + + Note that the directory path must be absolute and the directory + itself must exist and have read, write, and execute permissions + granted to the user that runs the web server." + } + + std::map<string, size_t> upload-max-size + { + "<type>=<bytes>", + "The maximum size of the upload data accepted for the specified upload + type. Note that currently the entire upload request is read into + memory. The default is 10M." + } + + std::map<string, string> upload-email + { + "<type>=<email>", + "The build artifacts upload email. If specified, the upload request + and result manifests will be sent to this address. See \l{brep The + \cb{build2} Repository Interface Manual} for more information." + } + + std::map<string, path> upload-handler + { + "<type>=<path>", + "The handler program to be executed on build artifacts upload of the + specified type. The handler is executed as part of the HTTP request + and is passed additional arguments that can be specified with + \cb{upload-handler-argument} followed by the absolute path to the + upload directory (\cb{upload-data}). See \l{brep The \cb{build2} + Repository Interface Manual} for more information. Note that the + program path must be absolute." + } + + std::multimap<string, string> upload-handler-argument + { + "<type>=<arg>", + "Additional arguments to be passed to the upload handler program for + the specified upload type (see \cb{upload-handler} for details). + Repeat this option to specify multiple arguments." + } + + std::map<string, size_t> upload-handler-timeout + { + "<type>=<seconds>", + "The upload handler program timeout in seconds for the specified + upload type. If specified and the handler does not exit in the + allotted time, then it is killed and its termination is treated as + abnormal." + } + + std::multimap<string, string> upload-toolchain-exclude + { + "<type>=<name>", + "Disable upload of the specified type for the specified toolchain + name. Repeat this option to disable uploads for multiple toolchains." + } + + std::multimap<string, string> upload-repository-exclude + { + "<type>=<name>", + "Disable upload of the specified type for packages from the repository + with the specified canonical name. Repeat this option to disable + uploads for multiple repositories." + } + }; + class page { web::xhtml::fragment logo @@ -416,13 +492,36 @@ namespace brep page, handler { + dir_path bindist-root + { + "<dir>", + "The root directory where the uploaded binary distribution packages + are saved to under the following directory hierarchy: + + \ + [<tenant>/]<distribution>/<os-release>/<project>/<package>/<version>/<package-config> + \ + + The package configuration directory symlinks that match these paths + are mapped to web URLs based on the \cb{bindist-url} value and + displayed on the package version details page. If this option is + specified, then \cb{bindist-url} must be specified as well." + } + + string bindist-url + { + "<url>", + "The root URL of the directory specified with the \cb{bindist-root} + option. This option must be specified if \cb{bindist-root} is + specified." + } }; class repository_details: package_db, page, handler { }; - class build_task: build, build_db, handler + class build_task: build, build_db, build_upload, handler { size_t build-task-request-max-size = 102400 { @@ -666,6 +765,10 @@ namespace brep } }; + class upload: build, build_db, build_upload, handler + { + }; + class repository_root: handler { string root-global-view = "packages" @@ -930,5 +1033,45 @@ namespace brep // string simulate; }; + + // Parameters other than challenge must be all present. + // + // Note also that besides these parameters there can be others. We don't + // recognize their semantics and just save them to the upload request + // manifest. + // + class upload + { + // Upload type. + // + // Note that the upload parameter is renamed to '_' by the root handler + // (see the request_proxy class for details). + // + string type | _; + + // Session id as returned by brep in the task response. + // + string session; + + // Answer to the private key challenge as posed by brep in the task + // response. It must be present only if the challenge value was present + // in the task response. + // + string challenge; + + // Upload instance name. + // + string instance; + + // Package archive file name. Must be <input type="file"/>. + // + // Note that it can potentially be not just a name but a file path. + // + string archive; + + // Package archive file SHA256 checksum. + // + string sha256sum; + }; } } diff --git a/tests/ci/data.testscript b/tests/ci/data.testscript index e269d0d..6f44c85 100644 --- a/tests/ci/data.testscript +++ b/tests/ci/data.testscript @@ -3,11 +3,11 @@ # Pre-created CI request data directory that will be copied by subsequent # tests and scope setup commands. The common approach will be that group -# scopes copy and modify the parent scope submission directory as required +# scopes copy and modify the parent scope request data directory as required # by the nested tests and scopes. Tests will also clone the parent scope -# submission data directory to optionally modify it, use and cleanup at the -# end. Note that configuration can not be shared between multiple submission -# handler processes. Also we need to make sure that submission data +# request data data directory to optionally modify it, use and cleanup at the +# end. Note that request data directory can not be shared between multiple +# submission handler processes. Also we need to make sure that request data # directories are not cloned while being used by submission handler scripts. # data_dir = $regex.replace($path_search('*/request.manifest', $src_base), \ diff --git a/tests/submit/data.testscript b/tests/submit/data.testscript index 29b607f..b0fe8f0 100644 --- a/tests/submit/data.testscript +++ b/tests/submit/data.testscript @@ -3,12 +3,13 @@ # Pre-created submission data directory that will be copied by subsequent # tests and scope setup commands. The common approach will be that group -# scopes copy and modify the parent scope submission directory as required by -# the nested tests and scopes. Tests will also clone the parent scope -# submission data directory to optionally modify it, use and cleanup at the -# end. Note that configuration can not be shared between multiple submission -# handler processes. Also we need to make sure that submission data -# directories are not cloned while being used by submission handler scripts. +# scopes copy and modify the parent scope submission data directory as +# required by the nested tests and scopes. Tests will also clone the parent +# scope submission data directory to optionally modify it, use and cleanup at +# the end. Note that submission data directory can not be shared between +# multiple submission handler processes. Also we need to make sure that +# submission data directories are not cloned while being used by submission +# handler scripts. # data_dir = $regex.replace($path_search('*/request.manifest', $src_base), \ '(.*)/.*', \ diff --git a/tests/upload/545f1f78-63ea-4acf-bcb8-37b2513a78c8/archive.tar b/tests/upload/545f1f78-63ea-4acf-bcb8-37b2513a78c8/archive.tar Binary files differnew file mode 100644 index 0000000..d3b5b17 --- /dev/null +++ b/tests/upload/545f1f78-63ea-4acf-bcb8-37b2513a78c8/archive.tar diff --git a/tests/upload/545f1f78-63ea-4acf-bcb8-37b2513a78c8/request.manifest b/tests/upload/545f1f78-63ea-4acf-bcb8-37b2513a78c8/request.manifest new file mode 100644 index 0000000..c59303b --- /dev/null +++ b/tests/upload/545f1f78-63ea-4acf-bcb8-37b2513a78c8/request.manifest @@ -0,0 +1,22 @@ +: 1 +id: 545f1f78-63ea-4acf-bcb8-37b2513a78c8 +session: 4d4c8b36-56c5-42e0-91d1-58bfd1228002/libhello/1.1.0+10/x86_64-linux-\ +gnu/linux_fedora_37-gcc_12.2-bindist/default/queue/0.3.0/1683122318585120886 +instance: archive +archive: archive.tar +sha256sum: 4fa79e4e11a03db321514800806a2b0a3a8eef9375dc22963f4e5a16764c0d5e +timestamp: 2023-05-08T09:18:20Z +name: libhello +version: 1.1.0+10 +project: hello +target-config: linux_fedora_37-gcc_12.2-bindist +package-config: default +target: x86_64-linux-gnu +tenant: 4d4c8b36-56c5-42e0-91d1-58bfd1228002 +toolchain-name: queue +toolchain-version: 0.3.0 +repository-name: git:build2.org/var/scm/hello/libhello#master@7f62790591b66bd\ +a248140013bdbd12bf078c2a2 +machine-name: linux_fedora_37-bindist-gcc_12.2 +machine-summary: Fedora Linux 37 with system-default GCC 12.2.1 and bpkg-pkg-\ +bindist prerequisites diff --git a/tests/upload/buildfile b/tests/upload/buildfile new file mode 100644 index 0000000..32d7720 --- /dev/null +++ b/tests/upload/buildfile @@ -0,0 +1,13 @@ +# file : tests/upload/buildfile +# license : MIT; see accompanying LICENSE file + +dir = ../../brep/handler/upload/ + +include $dir + +commons = data + +./: testscript{* -{$commons}} common_testscript{$commons} {*/ -test/}{**} \ + $dir/exe{brep-upload-bindist} + +testscript{upload-bindist}@./: test = $out_base/$dir/brep-upload-bindist diff --git a/tests/upload/data.testscript b/tests/upload/data.testscript new file mode 100644 index 0000000..3d3eede --- /dev/null +++ b/tests/upload/data.testscript @@ -0,0 +1,34 @@ +# file : tests/upload/data.testscript +# license : MIT; see accompanying LICENSE file + +# Pre-created upload data directory that will be copied by subsequent tests +# and scope setup commands. The common approach will be that group scopes copy +# and modify the parent scope upload data directory as required by the nested +# tests and scopes. Tests will also clone the parent scope upload data +# directory to optionally modify it, use and cleanup at the end. Note that +# upload data directory can not be shared between multiple upload handler +# processes. Also we need to make sure that upload data directories are not +# cloned while being used by upload handler scripts. +# +data_dir = $regex.replace($path_search('*/request.manifest', $src_base), \ + '(.*)/.*', \ + '\1') + +request_id = "$data_dir" + +# Copy the original upload data directory to the root scope. +# ++cp -r $src_base/$data_dir ./ + +root_data_dir = $~/$data_dir + +# The most commonly used upload data directory cloning command that copies it +# from the parent scope working directory. +# +clone_data = [cmdline] cp --no-cleanup -r ../$data_dir ./ +clone_data_clean = [cmdline] cp --no-cleanup -r ../$data_dir ./ &$data_dir/*** + +# Clones the original upload data directory. +# +clone_root_data = [cmdline] cp --no-cleanup -r $root_data_dir ./ +clone_root_data_clean = [cmdline] cp --no-cleanup -r $root_data_dir ./ &$data_dir/*** diff --git a/tests/upload/upload-bindist.testscript b/tests/upload/upload-bindist.testscript new file mode 100644 index 0000000..d43c567 --- /dev/null +++ b/tests/upload/upload-bindist.testscript @@ -0,0 +1,126 @@ +# file : tests/upload/upload-bindist.testscript +# license : MIT; see accompanying LICENSE file + +.include data.testscript + +: args +{ + : no-dir + : + $* 2>>~%EOE% != 0 + %\[.+\] \[brep:error\] \[ref \] \[brep-upload-bindist\]: usage: .+brep-upload-bindist \[<options>\] <root> <dir>% + EOE + + : no-root + : + $* $~/dir 2>>~%EOE% != 0 + %\[.+\] \[brep:error\] \[ref dir\] \[brep-upload-bindist\]: usage: .+brep-upload-bindist \[<options>\] <root> <dir>% + EOE + + : root-not-exist + : + : While at it, also test that the trailing slash is stripped from the + : directory paths. + : + $* $~/root/ $~/dir/ 2>>~%EOE% != 0 + %\[.+\] \[brep:error\] \[ref dir\] \[brep-upload-bindist\]: '.+root' does not exist or is not a directory% + EOE + + : data-not-exist + : + mkdir root; + $* $~/root $~/dir 2>>~%EOE% != 0 + %\[.+\] \[brep:error\] \[ref dir\] \[brep-upload-bindist\]: '.+dir' does not exist or is not a directory% + EOE +} + +: success +: +{ + mkdir --no-cleanup bindist-root/ &bindist-root/***; + + # Test the first upload. + # + $clone_data; + + $* $~/bindist-root/ $~/$data_dir >>"EOO"; + : 1 + status: 200 + message: binary distribution packages are published + reference: $request_id + EOO + + timestamp = '2023-05-08T09:18:20Z'; + tenant = '4d4c8b36-56c5-42e0-91d1-58bfd1228002'; + dir = [dir_path] bindist-root/$tenant/archive/fedora35/hello/libhello/1.1.0+10/; + + test -f $dir/default/libhello-1.0.0+10.tar.xz; + test -f $dir/default-$timestamp/libhello-1.0.0+10.tar.xz; + + # Repeat the upload using the same timestamp to make sure that we properly + # handle this situation (by adding the retry number as a suffix to the + # package configuration directory name). + # + $clone_data; + + $* $~/bindist-root/ $~/$data_dir >>"EOO" &bindist-root/***; + : 1 + status: 200 + message: binary distribution packages are published + reference: $request_id + EOO + + test -f $dir/default/libhello-1.0.0+10.tar.xz; + test -f $dir/default-$timestamp-0/libhello-1.0.0+10.tar.xz; + test -d $dir/default-$timestamp/ != 0; + + # Test the second upload without --keep-previous option. + # + data_dir2 = 22222222-2222-2222-2222-222222222222; + request_id2 = $data_dir2; + timestamp2 = '2023-05-09T09:18:20Z'; + + cp --no-cleanup -r ../$data_dir ./$data_dir2; + + sed -i -e "s%^\(id:\) .+\$%\\1 $request_id2%" \ + $data_dir2/request.manifest; + + sed -i -e "s%^\(timestamp:\) .+\$%\\1 $timestamp2%" \ + $data_dir2/request.manifest; + + $* $~/bindist-root/ $~/$data_dir2 >>"EOO"; + : 1 + status: 200 + message: binary distribution packages are published + reference: $request_id2 + EOO + + test -f $dir/default/libhello-1.0.0+10.tar.xz; + test -f $dir/default-$timestamp2/libhello-1.0.0+10.tar.xz; + test -d $dir/default-$timestamp.0/ != 0; + + # Test the third upload with --keep-previous option. + # + data_dir3 = 33333333-3333-3333-3333-333333333333; + request_id3 = $data_dir3; + timestamp3 = '2023-05-10T09:18:20Z'; + + cp --no-cleanup -r ../$data_dir ./$data_dir3; + + sed -i -e "s%^\(id:\) .+\$%\\1 $request_id3%" \ + $data_dir3/request.manifest; + + sed -i -e "s%^\(timestamp:\) .+\$%\\1 $timestamp3%" \ + $data_dir3/request.manifest; + + $* --keep-previous $~/bindist-root/ $~/$data_dir3 >>"EOO"; + : 1 + status: 200 + message: binary distribution packages are published + reference: $request_id3 + EOO + + test -f $dir/default/libhello-1.0.0+10.tar.xz; + test -f $dir/default-$timestamp3/libhello-1.0.0+10.tar.xz; + test -f $dir/default-$timestamp2/libhello-1.0.0+10.tar.xz +} diff --git a/www/package-version-details-body.css b/www/package-version-details-body.css index dec5652..7ef4486 100644 --- a/www/package-version-details-body.css +++ b/www/package-version-details-body.css @@ -289,6 +289,54 @@ h1, h2, h3 } /* + * Binaries. + */ +#binaries +{ + width: calc(100% + .8rem); + margin-left: -.4rem; + border: none; + border-spacing: 0 0; + + margin-top: .4em; + margin-bottom: 1em; + border-collapse: collapse; +} + +#binaries tr:nth-child(even) td {background-color: rgba(0, 0, 0, 0.07);} + +#binaries td +{ + padding: .08em .4rem; +} + +#binaries td:last-child {width: 100%;} + +#binaries td .value +{ + display: inline-block; + white-space: nowrap; + + /* <code> style. */ + font-family: monospace; + font-size: 0.94em; +} + +/* Re-styling for full page variant. */ + +.full #binaries td +{ + vertical-align: top; +} + +.full #binaries td .value +{ + margin-right: 1em; + + white-space: normal; +} + +/* * Builds. */ #builds {margin-bottom: 1em;} |