From b5eebbfc42a2f207a15762e3a480eed47e71c345 Mon Sep 17 00:00:00 2001 From: Francois Kritzinger Date: Thu, 4 Mar 2021 09:42:27 +0200 Subject: Fix manage script to only show ownership if related to archive(s) --- bpkg-util/manage.in | 292 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 178 insertions(+), 114 deletions(-) diff --git a/bpkg-util/manage.in b/bpkg-util/manage.in index aa59d3b..b2a8119 100644 --- a/bpkg-util/manage.in +++ b/bpkg-util/manage.in @@ -53,8 +53,10 @@ # or package archives (not ownership manifests) migrated to its counterpart # section (also in the destination repository). These mode-specific source # directories are called "managed directories" and the files they contain -# "managed files", excluding those with an invalid path (project component is -# missed, etc); any other directory or file is "unmanaged". +# "managed files". The "unmanaged files" include any file outside of a managed +# directory, any ownership manifest not related (by package name or project +# name) to a managed package archive, and any file inside a managed directory +# with an invalid path (project component is missed, etc). # # The destination-management mode is actually just a name used to refer to a # number of more specific, near-identical modes distinguished only by the @@ -81,7 +83,8 @@ # # Enter the stable-management mode: manage the stable->legacy transitions in # the destination repository. Note that commits/files cannot be dropped in -# this mode. +# this mode and, by extension, it does not support any operations on +# ownership manifests. # # --alpha[=] # @@ -308,6 +311,88 @@ if [[ ("$mode" != "source") && ! -v "src_sections[$mode]" ]]; then error "section '$mode' not configured in the destination repository" fi +# Print information about the path of a source repository file. +# +# The information includes its class (package archive, ownership manifest, or +# unmanaged), the project directory, and in the case of package archives, the +# section directory. +# +# must be relative to the source repository directory (`src_dir`). +# +# If the path refers to a managed archive file in the source repository, then +# print `archive\n\n\n`. The section directory will be +# relative to the source repository directory. +# +# Otherwise, if the path refers to a managed ownership manifest file in the +# source repository, then print `ownership\n\n`. +# +# Otherwise the file is unmanaged; print `unmanaged\n`. +# +# Note that the function doesn't validate the file path exhaustively and may +# classify improperly named file (invalid base name, etc) as an archive or +# ownership. +# +function src_path_info () # +{ + local f="$1" + + # Check whether the file is a package archive. Valid source repository + # package paths start with one of the section directories in `src_sections` + # which is followed by a single subdirectory (which would be the project + # directory, but this is not checked). + # + local s + for s in "${src_sections[@]}"; do + if [[ "$f" =~ ^"$s"/([^/]+)/[^/]+$ ]]; then + echo "archive" + echo "${BASH_REMATCH[1]}" + echo "$s" + return + fi + done + + # Not a managed archive path, so check whether it's an ownership + # manifest. Valid source repository ownership manifest paths start with the + # directory in `src_owners` and is followed by at least one subdirectory + # (the project directory, again). + # + if [[ -n "$src_owners" && ("$f" =~ ^"$src_owners"/([^/]+)/.+$) ]]; then + echo "ownership" + echo "${BASH_REMATCH[1]}" + else + echo "unmanaged" + fi +} + +# Extract the package name, version, and project from a package archive's +# manifest and print it to stdout in the `\n\n\n` +# form. If the manifest does not specify the project name, the package name is +# returned as the project name. +# +function extract_pkg_info () # +{ + local arc="$1" + + local r # ( ) + bpkg_util_pkg_verify_archive "$arc" | readarray -t r + + if [[ -z "${r[2]}" ]]; then + r[2]="${r[0]}" + fi + + # Verify that the archive parent directory name matches the project. + # + local p="${r[2]}" + if [[ "$p" != "$(basename "$(dirname "$arc")")" ]]; then + error "'$arc' archive directory name does not match package project '$p'" + fi + + local e + for e in "${r[@]}"; do + echo "$e" + done +} + # Contains the hashes of the pending commits in chronological order. # pending_seq=() @@ -356,7 +441,11 @@ function init_globals () fi done - if [[ -n "$src_owners" && -d "$src_dir/$src_owners" ]]; then + # Don't load ownership manifests if in stable-management mode because it + # does not support any operations on them. + # + if [[ ("$mode" != "stable") && + -n "$src_owners" && -d "$src_dir/$src_owners" ]]; then local f while read f; do src_files+=("${f#$src_dir/}") @@ -366,14 +455,39 @@ function init_globals () # Build the set of pending commit hashes ("pending set"). # # For each file in the source repository, find the commit it belongs to and - # store its abbreviated hash (as key) inside the `pending_set` associative - # array (note: unordered) and (as value) inside the `file_commits` + # store its abbreviated hash (as value) inside the `file_commits` + # associative array (note: unordered). With some exceptions (see below), + # these files' commits will also be stored (as key) inside the `pending_set` # associative array. # - # If in destination-management mode, exclude from `pending_set` those - # commits without any package archives that match the pattern in `filter`. - # - local -A pending_set=() + # If in destination-management mode: + # + # - Unless in stable-management mode (which does not operate on ownership + # manifests at all), do not add unmanaged ownership manifests' commits + # to `pending_set`. (Note that these commits could be added as a result + # of other files, and that, in source-management mode, all ownership + # manifests are managed.) + # + # The `managed_projs` and `managed_pkgnames` associative arrays store + # the project and package names, respectively, of every package archive + # in the managed section and are used to identify unmanaged ownership + # manifests (see below). (Both are empty in source-management and + # stable-management modes.) + # + # - Exclude from `pending_set` those commits without any package archives + # that match the pattern in `filter`. + # + # Every file in `src_files` is added to `file_commits` without exception + # (that is, regardless of whether or not its commit was added to + # `pending_set`) otherwise it could potentially get incorrectly attributed + # to an earlier, unfiltered commit (consider the edge case example under the + # general migration comments (above migrate_src()) with a filter of "bar.*") + # or caused to look like it's been deleted from disk when displayed to the + # user. + # + local -A pending_set=() # Hashes of the pending commits. + local -A managed_projs=() # Project names of all managed archives. + local -A managed_pkgnames=() # Package names of all managed archives. local h for f in "${src_files[@]}"; do @@ -390,27 +504,54 @@ function init_globals () # the top, every file on disk must have been added by some commit (that # is, there can be no untracked files). + file_commits["$f"]="$h" + + # Don't add unmanaged ownership manifests to `pending_set` (by skipping + # them) if in any destination-management mode but stable-management (in + # which case there are none in `src_files`). An ownership manifest is + # unmanaged if its project or package name has not been seen (archives + # come before ownership manifests in `src_files`). To this end, also + # record the package and project names of every package archive. + # + if [[ ("$mode" != "source") && ("$mode" != "stable") ]]; then + local fi + src_path_info "$f" | readarray -t fi + local ftype="${fi[0]}" # File type. + + case "$ftype" in + "archive") + # Record the package and project names of this package archive. + # + local p + extract_pkg_info "$src_dir/$f" | readarray -t p # (name ver proj) + managed_pkgnames["${p[0]}"]=true + managed_projs["${p[2]}"]=true + ;; + "ownership") + # Skip this ownership manifest if its package or project name has + # not been seen (which means it's unmanaged). + # + local k="$(basename $(dirname "$f"))" + if [[ (("$f" == */package-owner.manifest) && + ! "${managed_pkgnames[$k]}") || + (("$f" == */project-owner.manifest) && + ! "${managed_projs[$k]}") ]]; then + continue + fi + ;; + esac + fi + # Add the commit to the pending set unless the current file is filtered # out. # - # Note: `src_files` can contain only package archives if we're in - # destination-management mode so there's no need to check the file type. - # # Note: $filter must be unquoted to match as a wildcard pattern. # - if [[ "$mode" == "source" || + if [[ ("$mode" == "source") || ("$(basename "$(dirname "$f")")" == $filter) || # Project name? ("$(basename "$f")" == $filter.*) ]]; then # Filename (with ext)? pending_set["$h"]=true fi - - # Add the file and commit to `file_commits` even if the current commit was - # not added to `pending_set` above otherwise this file could get - # incorrectly attributed to an earlier, unfiltered commit (consider the - # edge case example under the general migration comments (above - # migrate_src()) with a filter of "bar.*"). - # - file_commits["$f"]="$h" done # Arrange the pending commits in the chronological order. @@ -477,88 +618,6 @@ function commit_files () # "$h" } -# Print information about the path of a source repository file. -# -# The information includes its class (package archive, ownership manifest, or -# unmanaged), the project directory, and in the case of package archives, the -# section directory. -# -# must be relative to the source repository directory (`src_dir`). -# -# If the path refers to a managed archive file in the source repository, then -# print `archive\n\n\n`. The section directory will be -# relative to the source repository directory. -# -# Otherwise, if the path refers to a managed ownership manifest file in the -# source repository, then print `ownership\n\n`. -# -# Otherwise the file is unmanaged; print `unmanaged\n`. -# -# Note that the function doesn't validate the file path exhaustively and may -# classify improperly named file (invalid base name, etc) as an archive or -# ownership. -# -function src_path_info () # -{ - local f="$1" - - # Check whether the file is a package archive. Valid source repository - # package paths start with one of the section directories in `src_sections` - # which is followed by a single subdirectory (which would be the project - # directory, but this is not checked). - # - local s - for s in "${src_sections[@]}"; do - if [[ "$f" =~ ^"$s"/([^/]+)/[^/]+$ ]]; then - echo "archive" - echo "${BASH_REMATCH[1]}" - echo "$s" - return - fi - done - - # Not a managed archive path, so check whether it's an ownership - # manifest. Valid source repository ownership manifest paths start with the - # directory in `src_owners` and is followed by at least one subdirectory - # (the project directory, again). - # - if [[ -n "$src_owners" && ("$f" =~ ^"$src_owners"/([^/]+)/.+$) ]]; then - echo "ownership" - echo "${BASH_REMATCH[1]}" - else - echo "unmanaged" - fi -} - -# Extract the package name, version, and project from a package archive's -# manifest and print it to stdout in the `\n\n\n` -# form. If the manifest does not specify the project name, the package name is -# returned as the project name. -# -function extract_pkg_info () # -{ - local arc="$1" - - local r # ( ) - bpkg_util_pkg_verify_archive "$arc" | readarray -t r - - if [[ -z "${r[2]}" ]]; then - r[2]="${r[0]}" - fi - - # Verify that the archive parent directory name matches the project. - # - local p="${r[2]}" - if [[ "$p" != "$(basename "$(dirname "$arc")")" ]]; then - error "'$arc' archive directory name does not match package project '$p'" - fi - - local e - for e in "${r[@]}"; do - echo "$e" - done -} - # Exit with an error if a package which is a duplicate of or is in conflict # with the given package exists in any of the destination sections. # @@ -2037,19 +2096,24 @@ while true; do sz="$(file_size "$f")" fi - # Note that, in destination-management mode, there can be no ownership - # manifests in `file_commits`. - # if [[ "${file_commits[$f]}" == "$h" ]]; then - info " $f $sz" # Last added or moved by the current commit. + # Last added or moved by the current commit. + # + # If a commit included some managed and some unmanaged ownership + # manifests then it will not have been filtered out at the top and + # the unmanaged ownerships will also be shown here. (For an example, + # see the first commit output by `bpkg-util-manage --alpha` on the + # real cppget.org repos.) I don't think it's worth checking here + # that ownerships are managed because those sorts of commits should + # be rare. This script also refuses to operate on packages from + # different projects or sections. + # + info " $f $sz" elif [[ -v "file_commits[$f]" ]]; then info "! $f $sz" # Deleted and added back by subsequent commits. - elif [[ ("$mode" == "source") || ("$ftype" != "ownership") ]]; then - # File was deleted and never added again and, if we're in - # destination-management mode, is not an ownership manifest. - # - # Note that actioned files of partially actioned commits will also - # appear this way. + else + # File was deleted and never added again. Note that actioned files + # of partially actioned commits will also appear this way. # info "* $f" fi -- cgit v1.1