#!/usr/bin/env bash # file : brep/handler/submit/submit-git.in # license : MIT; see accompanying LICENSE file # Package submission handler with git repository storage. # # The overall idea behind this handler is to add the package archive into a # git repository. Another entity (for example, a human or a script) can then # pull this change and do something about it (for example, review it and/or # add it to an archive-based repository). In other words, git is used as a # kind of transport that is easy enough to access for both humans and scripts. # # The handler also implements the project/package name ownership verification # by performing the submitter authentication/authorization based on the # control repository mechanism described in bdep-publish(1). This # functionality is optional. # # The handler can operate with a single git repository, called "target", or # with two git repositories, in which case the first is the target and the # second is called "reference". The reference repository access is read-only # and it is only consulted for duplicate package suppression and name # ownership verification. The dual repository mode is normally used to # implement a two-stage queue/public setup where the package is first queued # for review and/or testing and then moved (for example, by a moderator) to a # public repository. # # The target repository argument () should be a read-write git # repository URL. It is cloned (shallow) into the submission directory on # each submission. # # If specified, the reference repository argument () should be a # directory with a pre-cloned read-only reference repository. This directory # is shared between all instances of the handler. On each submission, the # handler will flock(1) this directory, git-pull, obtain the information it # needs, and release the lock. # # Both the target and, if specified, reference repositories should contain the # submit.config.bash repository configuration file in the root directory. The # configuration file is a bash fragment and is sourced by the handler script. # It provides the following information: # # - Mapping of section names to repository subdirectories in the 'sections' # variable (declare -A sections; values are relative to the repository # root). # If there is no key for the submitted section name, then the entry with the # special '*' key is used. If there is no such entry, then the submission is # invalid. For example: # # sections[alpha]=1/alpha # sections[beta]=1/beta # sections[stable]=1/testing # # - Optional owners subdirectory in the 'owners' variable (relative to the # repository root). If not specified, then no ownership verification is # performed. For example: # # owners=owners # # If the ownership directory is specified, then the handler script maintains # the project/package name ownership information in this directory. It has the # following structure: # # / # |-- / # | |-- / # | | `-- package-owner.manifest # | |-- / # | | `-- package-owner.manifest # | |-- ... # | `-- project-owner.manifest # |-- / # | `-- ... # `-- ... # # If the submitted project name is not yet known, then the handler script # creates a new project subdirectory and saves project-owner.manifest. The # project owner manifest contains the following values in the specified order: # # name: # author-name: # author-email: # control: # # The 'control' value is the control repository URL prefix and there can be # multiple such values in a single manifest. The handler script derives it # from the submitted control repository URL by removing the last path # component. So, for example, https://github.com/build2/libbutl.git becomes # https://github.com/build2/. # # If the submitted project name is already known, then the handler script # loads its project-owner.manifest and verifies that at least one of the # 'control' values is a prefix of the submitted control repository URL. # # Similarly, if the submitted package name is not yet known, then the handler # script creates a new package subdirectory and saves package-owner.manifest. # The package owner manifest contains the following values in the specified # order: # # name: # author-name: # author-email: # control: # # The 'control' value is the control repository URL and there can be multiple # such values in a single manifest. # # If the submission package is already known, then the handler script loads # its package-owner.manifest and verifies that at least one of the 'control' # values matches the submitted control repository URL. Note that the URL is # matched without regards to the potential .git extension. # # If all these ownership authentication tests pass, the handler script clones # (shallow) the build2-control branch of the control repository and verifies # that the submission authorization file is present (see bdep-publish(1) for # details). # # If the submission authorization test passes, then the handler script adds # the package archives to the target repository, commits this change, and # then pushes the commit to the remote. # # Notes: # # - It is possible that a submitted package name already exists in another # project. In this case, such a submission is accepted only if the package # already exists in the requested project. This allows the moderator to # manually permit such multi-project packages (for example, to allow moving # packages between projects). # # - There could be a race when moving package and ownership information from # target to reference. To avoid it, the protocol for such a move is to first # add, commit, and push to reference and then remove, commit, and push to # target. # # On the handler side, the script acts in the opposite order cloning the # target prior pulling the reference in order not to get into the situation # where it misses the ownership info that is not in the reference yet but no # longer in the target. Note that if some move happens after the cloning, # then the script will be unable to push the target modification and will # re-try the whole authentication procedure from scratch. # # - Filesystem entries that exist or are created in the data directory: # # -.tar.gz saved by brep (could be other archives in the future) # request.manifest created by brep # package.manifest extracted by the handler # target/ cloned by the handler # control/ cloned by the handler # result.manifest saved by brep # # Options: # # --committer-name # # Name to use for the target repository commits. "Submission Handler" if # unspecified. # # --committer-email # # Email to use for the target repository commits. noreply@example.com if # unspecified. # # --result-url # # Result URL base for the response. If specified, the handler will append # the / to this value and include the resulting URL in the # response message. # usage="usage: $0 [] [] " # Diagnostics. # verbose= #true # Git network operations timeout (seconds). # # Note that we don't cover protocols other than HTTP(S) since for them git # doesn't support any timeouts (though we may be able to cobble something # up for SSH). # git_timeout=10 # The reference repository lock timeout (seconds). # ref_lock_timeout=30 trap "{ exit 1; }" ERR set -o errtrace # Trap ERR in functions. set -o pipefail # Return the rightmost non-zero exit status in a pipeline. @import brep/handler/handler@ @import brep/handler/submit/submit@ @import brep/handler/submit/submit-git@ # Parse the command line options. # committer_name="Submission Handler" committer_email="noreply@example.com" result_url= while [ $# -gt 0 ]; do case $1 in --committer-name) shift committer_name="$1" shift ;; --committer-email) shift committer_email="$1" shift ;; --result-url) shift result_url="${1%/}" shift ;; *) break; # The end of options is encountered. ;; esac done if [ -z "$committer_name" -o -z "$committer_email" ]; then error "$usage" fi # Parse the command line arguments. # if [ "$#" -lt 2 -o "$#" -gt 3 ]; then error "$usage" fi # Target repository URL. # tgt_repo="$1" shift if [ -z "$tgt_repo" ]; then error "$usage" fi # Reference repository directory. # # Note that the last argument is always the submission data directory. # ref_repo= if [ "$#" -gt 1 ]; then ref_repo="$1" shift if [ -z "$ref_repo" ]; then error "$usage" fi if [ ! -d "$ref_repo" ]; then error "'$ref_repo' does not exist or is not a directory" fi fi # Submission data directory. # data_dir="${1%/}" shift if [ -z "$data_dir" ]; then error "$usage" fi if [ ! -d "$data_dir" ]; then error "'$data_dir' does not exist or is not a directory" fi reference="$(basename "$data_dir")" # Git verbosity options. # # Note that not all git commands support the -q/-v options. Also note that # these variable expansions should not be quoted. # if [ "$verbose" ]; then gqo= gvo="-v" else gqo="-q" gvo= fi # Git doesn't support the connection timeout option. The options we use are # just an approximation of the former, that, in particular, don't cover the # connection establishing. To work around this problem, before running a git # command that assumes the remote repository communication we manually check # connectivity with the remote repository. # git_http_timeout=("-c" "http.lowSpeedLimit=1" \ "-c" "http.lowSpeedTime=$git_timeout") # Parse the submission request manifest and obtain the required values. # manifest_parser_start "$data_dir/request.manifest" archive= sha256sum= section= author_name= author_email= control= simulate= while IFS=: read -ru "$manifest_parser_ofd" -d '' n v; do case "$n" in archive) archive="$v" ;; sha256sum) sha256sum="$v" ;; section) section="$v" ;; author-name) author_name="$v" ;; author-email) author_email="$v" ;; control) control="$v" ;; simulate) simulate="$v" ;; esac done manifest_parser_finish if [ -z "$archive" ]; then error "archive manifest value expected" fi if [ -z "$sha256sum" ]; then error "sha256sum manifest value expected" fi if [ -n "$simulate" -a "$simulate" != "success" ]; then exit_with_manifest 400 "unrecognized simulation outcome '$simulate'" fi # The author-* manifest values should both be present or absent. # if [ -z "$author_name" -a -n "$author_email" ]; then exit_with_manifest 400 "author-name manifest value expected" fi if [ -z "$author_email" -a -n "$author_name" ]; then exit_with_manifest 400 "author-email manifest value expected" fi # Note: checking for section, author-*, and control later. m="$data_dir/package.manifest" extract_package_manifest "$data_dir/$archive" "$m" # Parse the package manifest and obtain the package name and version. # manifest_parser_start "$m" name= version= project= while IFS=: read -ru "$manifest_parser_ofd" -d '' n v; do case "$n" in name) name="$v" ;; version) version="$v" ;; project) project="$v" ;; esac done manifest_parser_finish if [ -z "$name" ]; then error "name manifest value expected" fi if [ -z "$version" ]; then error "version manifest value expected" fi if [ -z "$project" ]; then project="$name" fi if [ -n "$result_url" ]; then message_suffix=": $result_url/$name/$version" else message_suffix=": $name/$version" fi # Exit with the 'submission is queued' result manifest if simulating. # # Note that we can't expect a real control repository URL to be specified for # simulating, so trying to authenticate/authorize would be meaningless. # if [ -n "$simulate" ]; then run rm -r -f "$data_dir" trace "package submission is simulated$message_suffix" exit_with_manifest 200 "package submission is queued$message_suffix" fi function git_add () # ... { local d="$1" shift run git -C "$d" add $gvo "$@" >&2 } # For now we make 10 re-tries to add the package and push to target. Push can # fail due to the target-to-reference information move race (see the above # notes for details) or because concurrent submissions. We may want to make it # configurable in the future. # pkg_added= for i in {1..11}; do # Clone the target repository. # tgt_dir="$data_dir/target" check_connectivity "$tgt_repo" "$git_timeout" true run git "${git_http_timeout[@]}" clone $gqo $gvo --single-branch --depth 1 \ "$tgt_repo" "$tgt_dir" >&2 check_package_duplicate "$name" "$version" "$tgt_dir" # Check for duplicates and try to authenticate the package ownership using # information in the reference repository, if specified. # if [ -n "$ref_repo" ]; then remote_url=$(git -C "$ref_repo" config --get remote.origin.url) # Open the reading file descriptor and lock the repository. Fail if unable # to lock before timeout. # l="$ref_repo/submit.config.bash" trace "+ exec {fd}<$l" exec {fd}<"$l" # Note that on the locking failure we don't suggest the user to try again, # since the client program may suggest to re-try later for all server # errors (as bdep-publish(1) does). # if ! run flock -w "$ref_lock_timeout" "$fd"; then exit_with_manifest 503 "submission service is busy" fi # Pull the reference repository. # check_connectivity "$remote_url" "$git_timeout" true run git "${git_http_timeout[@]}" -C "$ref_repo" pull $gqo $gvo >&2 # Check the package duplicate. # check_package_duplicate "$name" "$version" "$ref_repo" # Authenticate the project ownership. # auth="$(auth_project "$project" "$control" "$ref_repo")" # Try to authenticate the package ownership if the project ownership was # authenticated successfully. # if [ "$auth" == "project" ]; then a="$(auth_package "$project" "$name" "$control" "$ref_repo")" # If the package is unknown to this project, we will try to authenticate # the package name with the target repository later and so we keep the # 'project' auth state. # if [ "$a" != "unknown" ]; then auth="$a" fi fi trace "reference auth: $auth" if [ "${auth:0:1}" == ":" ]; then # Authentication error? echo "$auth" exit 0 fi # If the package is not present in the specified project then we need to # make sure it is also not present in any other project. # if [ "$auth" == "project" -o "$auth" == "unknown" ]; then auth_package_unknown "$name" "$ref_repo" fi trace "+ exec {fd}<&-" exec {fd}<&- # Close the file descriptor and unlock the repository. else auth="disabled" fi ref_auth="$auth" # Now try to authenticate the package ownership using information in the # target repository unless already authenticated with reference. # if [ "$auth" != "package" ]; then # Don't authenticate the project ownership if this is already done with # the reference repository. # if [ "$auth" != "project" ]; then auth="$(auth_project "$project" "$control" "$tgt_dir")" fi # Try to authenticate the package ownership if the project ownership was # authenticated successfully. # if [ "$auth" == "project" ]; then auth="$(auth_package "$project" "$name" "$control" "$tgt_dir")" fi trace "target auth: $auth" if [ "${auth:0:1}" == ":" ]; then # Authentication error? echo "$auth" exit 0 fi fi trace "resulting auth: $auth" # Sanity check the auth variable value. # case "$auth" in package) ;; unknown) ;; disabled) ;; *) error "unexpected resulting auth '$auth'";; esac # Establish ownership of the package name unless already done. # if [ "$auth" == "unknown" ]; then # Check that the necessary request manifest values are specified. # if [ -z "$author_name" ]; then exit_with_manifest 400 "author-name manifest value expected" fi # Check that the package doesn't belong yet to some other project. # auth_package_unknown "$name" "$tgt_dir" # First the project name. # # Note that owners_dir() shouldn't return an empty string at this stage. # d="$(owners_dir "$tgt_dir")/$project" # Establish ownership of the project name unless already done. Note that # it can only be owned by the submitter at this stage. # prj_man="$d/project-owner.manifest" if [ ! -f "$prj_man" -a "$ref_auth" != "project" ]; then run mkdir -p "$d" # Also creates the owners directory if not exist. ctl="$(repository_base "$control")" create_owner_manifest \ "$project" "$author_name" "$author_email" "$ctl" "$prj_man" # Add the project owners manifest file to git repository using the path # relative to the repository directory. # git_add "$tgt_dir" "${prj_man#$tgt_dir/}" fi # Now the package name. # d="$d/$name" run mkdir -p "$d" # Also creates the project directory if not exist. pkg_man="$d/package-owner.manifest" create_owner_manifest \ "$name" "$author_name" "$author_email" "$control" "$pkg_man" # Add the package owners manifest file using path relative to the # repository directory. # git_add "$tgt_dir" "${pkg_man#$tgt_dir/}" auth="package" fi # Respond with the 'unauthorized' manifest if we failed to authenticate the # submitter as the package owner, unless both the reference and target # repositories have the ownership authentication disabled. In the latter # case no authorization is required. # if [ "$auth" != "disabled" -o "$ref_auth" != "disabled" ]; then # Respond with the 'unauthorized' manifest if not the package owner. # if [ "$auth" != "package" ]; then if [ "$auth" == "project" -o "$ref_auth" == "project" ]; then exit_with_manifest 401 "package owner authentication failed" else exit_with_manifest 401 "project owner authentication failed" fi fi # Authorize the submission. # ctl_dir="$data_dir/control" check_connectivity "$control" "$git_timeout" "" if ! run_silent git "${git_http_timeout[@]}" clone $gqo $gvo --depth 1 \ --single-branch --branch "build2-control" "$control" "$ctl_dir" >&2; then exit_with_manifest 422 "failed to git-clone $control" fi if [ ! -f "$ctl_dir/submit/${sha256sum:0:16}" ]; then exit_with_manifest 401 "package publishing authorization failed" fi fi # Remove the package version revision archives that may exist in the section # directory of the target repository. # # Note that it could be desirable to keep multiple revisions in different # sections. However, we need to remove a package revision that may # potentially exist in a different project. # s="$(section_dir "$section" "$tgt_dir")" if [ -z "$s" ]; then exit_with_manifest 400 "unrecognized section '$section'" fi # Strips the version revision part, if present. # v="$(sed -n -re 's%^(\+?[^+]+)(\+[0-9]+)?$%\1%p' <<<"$version")" # Make sure the section directory exists before we run find in it. # d="$tgt_dir/$s/$project" run mkdir -p "$d" # Create all the parent directories as well. # Go through the potentially matching archives (for example, for # foo-1.2.3+2: foo-1.2.3.tar.gz, foo-1.2.3+1.tar.gz, foo-1.2.30.tar.gz, etc) # and remove those that match exactly. # # Change CWD to the section directory to make sure that the found archive # paths don't contain spaces. # fs=($(run cd "$tgt_dir/$s" && run find -name "$name-$v*")) for f in "${fs[@]}"; do if [[ "$f" =~ ^\./[^/]+/"$name-$v"(\+[0-9]+)?\.[^/]+$ ]]; then run git -C "$tgt_dir" rm $gqo "$s/$f" >&2 fi done # Finally, add the package archive to the target repository. # # Copy the archive rather than move it since we may need it for a re-try. # Make sure the project directory exists before we copy the archive into it. # Note that it was removed by git-rm if it became empty. # a="$d/$archive" run mkdir -p "$d" # Create all the parent directories as well. run cp "$data_dir/$archive" "$a" git_add "$tgt_dir" "${a#$tgt_dir/}" author=() if [ -n "$author_name" ]; then author=(--author="$author_name <$author_email>") fi run git -c "user.name=$committer_name" -c "user.email=$committer_email" \ -C "$tgt_dir" commit "${author[@]}" $gqo $gvo -F - <&2 Add $name/$version to $s/$project $(cat "$data_dir/request.manifest") EOF check_connectivity "$tgt_repo" "$git_timeout" true # Try to push the target modifications. If this succeeds then we are done. # Otherwise, drop the target directory and re-try the whole # authentication/authorization procedure, unless we are out of attempts. # if run_silent git "${git_http_timeout[@]}" -C "$tgt_dir" push >&2; then pkg_added=true break else run rm -r -f "$tgt_dir" "$ctl_dir" fi done if [ ! "$pkg_added" ]; then exit_with_manifest 503 "submission service temporarily unavailable" fi # Remove the no longer needed submission data directory. # run rm -r -f "$data_dir" trace "package submission is queued$message_suffix" exit_with_manifest 200 "package submission is queued$message_suffix"