aboutsummaryrefslogtreecommitdiff
path: root/brep/handler/submit/submit-git.in
blob: 347ca1931bdfa9638c3d891a03fed442da348626 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
#!/usr/bin/env bash

# file      : brep/handler/submit/submit-git.in
# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
# license   : MIT; see accompanying LICENSE file

# Package submission handler with git repository storage.
#
# The overall idea behind this handler is to add the package archive into a
# git repository. Another entity (for example, a human or a script) can then
# pull this change and do something about it (for example, review it and/or
# add it to an archive-based repository). In other words, git is used as a
# kind of transport that is easy enough to access for both humans and scripts.
#
# The handler also implements the project/package name ownership verification
# by performing the submitter authentication/authorization based on the
# control repository mechanism described in bdep-publish(1). This
# functionality is optional.
#
# The handler can operate with a single git repository, called "target", or
# with two git repositories, in which case the first is the target and the
# second is called "reference". The reference repository access is read-only
# and it is only consulted for duplicate package suppression and name
# ownership verification. The dual repository mode is normally used to
# implement a two-stage queue/public setup where the package is first queued
# for review and/or testing and then moved (for example, by a moderator) to a
# public repository.
#
# The target repository argument (<tgt-repo>) should be a read-write git
# repository URL. It is cloned (shallow) into the submission directory on
# each submission.
#
# If specified, the reference repository argument (<ref-repo>) should be a
# directory with a pre-cloned read-only reference repository. This directory
# is shared between all instances of the handler. On each submission, the
# handler will flock(1) this directory, git-pull, obtain the information it
# needs, and release the lock.
#
# Both the target and, if specified, reference repositories should contain the
# submit.config.bash repository configuration file in the root directory. The
# configuration file is a bash fragment and is sourced by the handler script.
# It provides the following information:
#
# - Mapping of section names to repository subdirectories in the 'sections'
#   variable (declare -A sections; values are relative to the repository
#   root).

#   If there is no key for the submitted section name, then the entry with the
#   special '*' key is used. If there is no such entry, then the submission is
#   invalid. For example:
#
#     sections[alpha]=1/alpha
#     sections[beta]=1/beta
#     sections[stable]=1/testing
#
# - Optional owners subdirectory in the 'owners' variable (relative to the
#   repository root). If not specified, then no ownership verification is
#   performed. For example:
#
#     owners=owners
#
# If the ownership directory is specified, then the handler script maintains
# the project/package name ownership information in this directory. It has the
# following structure:
#
# <owners>/
# |-- <project1>/
# |   |-- <package1>/
# |   |   `-- package-owner.manifest
# |   |-- <package2>/
# |   |   `-- package-owner.manifest
# |   |-- ...
# |   `-- project-owner.manifest
# |-- <project2>/
# |   `-- ...
# `-- ...
#
# If the submitted project name is not yet known, then the handler script
# creates a new project subdirectory and saves project-owner.manifest. The
# project owner manifest contains the following values in the specified order:
#
#   name: <project-name>
#   author-name: <name>
#   author-email: <email>
#   control: <url-prefix>
#
# The 'control' value is the control repository URL prefix and there can be
# multiple such values in a single manifest. The handler script derives it
# from the submitted control repository URL by removing the last path
# component. So, for example, https://github.com/build2/libbutl.git becomes
# https://github.com/build2/.
#
# If the submitted project name is already known, then the handler script
# loads its project-owner.manifest and verifies that at least one of the
# 'control' values is a prefix of the submitted control repository URL.
#
# Similarly, if the submitted package name is not yet known, then the handler
# script creates a new package subdirectory and saves package-owner.manifest.
# The package owner manifest contains the following values in the specified
# order:
#
#   name: <package-name>
#   author-name: <name>
#   author-email: <email>
#   control: <url>
#
# The 'control' value is the control repository URL and there can be multiple
# such values in a single manifest.
#
# If the submission package is already known, then the handler script loads
# its package-owner.manifest and verifies that at least one of the 'control'
# values matches the submitted control repository URL. Note that the URL is
# matched without regards to the potential .git extension.
#
# If all these ownership authentication tests pass, the handler script clones
# (shallow) the build2-control branch of the control repository and verifies
# that the submission authorization file is present (see bdep-publish(1) for
# details).
#
# If the submission authorization test passes, then the handler script adds
# the package archives to the target repository, commits this change, and
# then pushes the commit to the remote.
#
# Notes:
#
# - It is possible that a submitted package name already exists in another
#   project. In this case, such a submission is accepted only if the package
#   already exists in the requested project. This allows the moderator to
#   manually permit such multi-project packages (for example, to allow moving
#   packages between projects).
#
# - There could be a race when moving package and ownership information from
#   target to reference. To avoid it, the protocol for such a move is to first
#   add, commit, and push to reference and then remove, commit, and push to
#   target.
#
#   On the handler side, the script acts in the opposite order cloning the
#   target prior pulling the reference in order not to get into the situation
#   where it misses the ownership info that is not in the reference yet but no
#   longer in the target. Note that if some move happens after the cloning,
#   then the script will be unable to push the target modification and will
#   re-try the whole authentication procedure from scratch.
#
# - Filesystem entries that exist or are created in the data directory:
#
#   <pkg>-<ver>.tar.gz   saved by brep (could be other archives in the future)
#   request.manifest     created by brep
#   package.manifest     extracted by the handler
#   target/              cloned by the handler
#   control/             cloned by the handler
#   result.manifest      saved by brep
#
# Options:
#
# --committer-name <name>
#
#   Name to use for the target repository commits. "Submission Handler" if
#   unspecified.
#
# --committer-email <email>
#
#   Email to use for the target repository commits. noreply@example.com if
#   unspecified.
#
# --result-url <url>
#
#   Result URL base for the response. If specified, the handler will append
#   the <package>/<version> to this value and include the resulting URL in the
#   response message.
#
usage="usage: $0 [<options>] <tgt-repo> [<ref-repo>] <dir>"

# Diagnostics.
#
verbose= #true

# Git network operations timeout (seconds).
#
# Note that we don't cover protocols other than HTTP(S) since for them git
# doesn't support any timeouts (though we may be able to cobble something
# up for SSH).
#
git_timeout=10

# The reference repository lock timeout (seconds).
#
ref_lock_timeout=30

trap "{ exit 1; }" ERR
set -o errtrace # Trap ERR in functions.
set -o pipefail # Return the rightmost non-zero exit status in a pipeline.

@import brep/handler/handler@
@import brep/handler/submit/submit@
@import brep/handler/submit/submit-git@

# Parse the command line options.
#
committer_name="Submission Handler"
committer_email="noreply@example.com"
result_url=

while [ $# -gt 0 ]; do
  case $1 in
    --committer-name)
      shift
      committer_name="$1"
      shift
      ;;
    --committer-email)
      shift
      committer_email="$1"
      shift
      ;;
    --result-url)
      shift
      result_url="${1%/}"
      shift
      ;;
    *)
      break; # The end of options is encountered.
      ;;
  esac
done

if [ -z "$committer_name" -o -z "$committer_email" ]; then
  error "$usage"
fi

# Parse the command line arguments.
#
if [ "$#" -lt 2 -o "$#" -gt 3 ]; then
  error "$usage"
fi

# Target repository URL.
#
tgt_repo="$1"
shift

if [ -z "$tgt_repo" ]; then
  error "$usage"
fi

# Reference repository directory.
#
# Note that the last argument is always the submission data directory.
#
ref_repo=

if [ "$#" -gt 1 ]; then
  ref_repo="$1"
  shift

  if [ -z "$ref_repo" ]; then
    error "$usage"
  fi

  if [ ! -d "$ref_repo" ]; then
    error "'$ref_repo' does not exist or is not a directory"
  fi
fi

# Submission data directory.
#
data_dir="${1%/}"
shift

if [ -z "$data_dir" ]; then
  error "$usage"
fi

if [ ! -d "$data_dir" ]; then
  error "'$data_dir' does not exist or is not a directory"
fi

reference="$(basename "$data_dir")"

# Git verbosity options.
#
# Note that not all git commands support the -q/-v options. Also note that
# these variable expansions should not be quoted.
#
if [ "$verbose" ]; then
  gqo=
  gvo="-v"
else
  gqo="-q"
  gvo=
fi

# Git doesn't support the connection timeout option. The options we use are
# just an approximation of the former, that, in particular, don't cover the
# connection establishing. To work around this problem, before running a git
# command that assumes the remote repository communication we manually check
# connectivity with the remote repository.
#
git_http_timeout=("-c" "http.lowSpeedLimit=1" \
                  "-c" "http.lowSpeedTime=$git_timeout")

# Parse the submission request manifest and obtain the required values.
#
manifest_parser_start "$data_dir/request.manifest"

archive=
sha256sum=
section=
author_name=
author_email=
control=
simulate=

while IFS=: read -ru "$manifest_parser_ofd" -d '' n v; do
  case "$n" in
    archive)      archive="$v"      ;;
    sha256sum)    sha256sum="$v"    ;;
    section)      section="$v"      ;;
    author-name)  author_name="$v"  ;;
    author-email) author_email="$v" ;;
    control)      control="$v"      ;;
    simulate)     simulate="$v"     ;;
  esac
done

manifest_parser_finish

if [ -z "$archive" ]; then
  error "archive manifest value expected"
fi

if [ -z "$sha256sum" ]; then
  error "sha256sum manifest value expected"
fi

if [ -n "$simulate" -a "$simulate" != "success" ]; then
  exit_with_manifest 400 "unrecognized simulation outcome '$simulate'"
fi

# The author-* manifest values should both be present or absent.
#
if [ -z "$author_name" -a -n "$author_email" ]; then
  exit_with_manifest 400 "author-name manifest value expected"
fi

if [ -z "$author_email" -a -n "$author_name" ]; then
  exit_with_manifest 400 "author-email manifest value expected"
fi

# Note: checking for section, author-*, and control later.

m="$data_dir/package.manifest"
extract_package_manifest "$data_dir/$archive" "$m"

# Parse the package manifest and obtain the package name and version.
#
manifest_parser_start "$m"

name=
version=
project=

while IFS=: read -ru "$manifest_parser_ofd" -d '' n v; do
  case "$n" in
    name)    name="$v"    ;;
    version) version="$v" ;;
    project) project="$v" ;;
  esac
done

manifest_parser_finish

if [ -z "$name" ]; then
  error "name manifest value expected"
fi

if [ -z "$version" ]; then
  error "version manifest value expected"
fi

if [ -z "$project" ]; then
  project="$name"
fi

if [ -n "$result_url" ]; then
  message_suffix=": $result_url/$name/$version"
else
  message_suffix=": $name/$version"
fi

# Exit with the 'submission is queued' result manifest if simulating.
#
# Note that we can't expect a real control repository URL to be specified for
# simulating, so trying to authenticate/authorize would be meaningless.
#
if [ -n "$simulate" ]; then
  run rm -r -f "$data_dir"

  trace "package submission is simulated$message_suffix"
  exit_with_manifest 200 "package submission is queued$message_suffix"
fi

function git_add () # <repo-dir> <path>...
{
  local d="$1"
  shift

  run git -C "$d" add $gvo "$@" >&2
}

# For now we make 10 re-tries to add the package and push to target. Push can
# fail due to the target-to-reference information move race (see the above
# notes for details) or because concurrent submissions. We may want to make it
# configurable in the future.
#
pkg_added=

for i in {1..11}; do

  # Clone the target repository.
  #
  tgt_dir="$data_dir/target"
  check_connectivity "$tgt_repo" "$git_timeout" true

  run git "${git_http_timeout[@]}" clone $gqo $gvo --single-branch --depth 1 \
"$tgt_repo" "$tgt_dir" >&2

  check_package_duplicate "$name" "$version" "$tgt_dir"

  # Check for duplicates and try to authenticate the package ownership using
  # information in the reference repository, if specified.
  #
  if [ -n "$ref_repo" ]; then

    remote_url=$(git -C "$ref_repo" config --get remote.origin.url)

    # Open the reading file descriptor and lock the repository. Fail if unable
    # to lock before timeout.
    #
    l="$ref_repo/submit.config.bash"
    trace "+ exec {fd}<$l"
    exec {fd}<"$l"

    if ! run flock -w "$ref_lock_timeout" "$fd"; then
      exit_with_manifest 503 "submission service temporarily unavailable"
    fi

    # Pull the reference repository.
    #
    check_connectivity "$remote_url" "$git_timeout" true
    run git "${git_http_timeout[@]}" -C "$ref_repo" pull $gqo $gvo >&2

    # Check the package duplicate.
    #
    check_package_duplicate "$name" "$version" "$ref_repo"

    # Authenticate the project ownership.
    #
    auth="$(auth_project "$project" "$control" "$ref_repo")"

    # Try to authenticate the package ownership if the project ownership was
    # authenticated successfully.
    #
    if [ "$auth" == "project" ]; then
      a="$(auth_package "$project" "$name" "$control" "$ref_repo")"

      # If the package is unknown to this project, we will try to authenticate
      # the package name with the target repository later and so we keep the
      # 'project' auth state.
      #
      if [ "$a" != "unknown" ]; then
        auth="$a"
      fi
    fi

    trace "reference auth: $auth"

    if [ "${auth:0:1}" == ":" ]; then # Authentication error?
      echo "$auth"
      exit 0
    fi

    # If the package is not present in the specified project then we need to
    # make sure it is also not present in any other project.
    #
    if [ "$auth" == "project" -o "$auth" == "unknown" ]; then
      auth_package_unknown "$name" "$ref_repo"
    fi

    trace "+ exec {fd}<&-"
    exec {fd}<&- # Close the file descriptor and unlock the repository.
  else
    auth="disabled"
  fi

  ref_auth="$auth"

  # Now try to authenticate the package ownership using information in the
  # target repository unless already authenticated with reference.
  #
  if [ "$auth" != "package" ]; then

    # Don't authenticate the project ownership if this is already done with
    # the reference repository.
    #
    if [ "$auth" != "project" ]; then
      auth="$(auth_project "$project" "$control" "$tgt_dir")"
    fi

    # Try to authenticate the package ownership if the project ownership was
    # authenticated successfully.
    #
    if [ "$auth" == "project" ]; then
      auth="$(auth_package "$project" "$name" "$control" "$tgt_dir")"
    fi

    trace "target auth: $auth"

    if [ "${auth:0:1}" == ":" ]; then # Authentication error?
      echo "$auth"
      exit 0
    fi
  fi

  trace "resulting auth: $auth"

  # Sanity check the auth variable value.
  #
  case "$auth" in
    package)  ;;
    unknown)  ;;
    disabled) ;;

    *) error "unexpected resulting auth '$auth'";;
  esac

  # Establish ownership of the package name unless already done.
  #
  if [ "$auth" == "unknown" ]; then

    # Check that the necessary request manifest values are specified.
    #
    if [ -z "$author_name" ]; then
      exit_with_manifest 400 "author-name manifest value expected"
    fi

    # Check that the package doesn't belong yet to some other project.
    #
    auth_package_unknown "$name" "$tgt_dir"

    # First the project name.
    #
    # Note that owners_dir() shouldn't return an empty string at this stage.
    #
    d="$(owners_dir "$tgt_dir")/$project"

    # Establish ownership of the project name unless already done. Note that
    # it can only be owned by the submitter at this stage.
    #
    prj_man="$d/project-owner.manifest"

    if [ ! -f "$prj_man" ]; then
      run mkdir -p "$d" # Also creates the owners directory if not exist.

      ctl="$(repository_base "$control")"

      create_owner_manifest \
        "$project" "$author_name" "$author_email" "$ctl" "$prj_man"

      # Add the project owners manifest file to git repository using the path
      # relative to the repository directory.
      #
      git_add "$tgt_dir" "${prj_man#$tgt_dir/}"
    fi

    # Now the package name.
    #
    d="$d/$name"
    run mkdir "$d"

    pkg_man="$d/package-owner.manifest"

    create_owner_manifest \
      "$name" "$author_name" "$author_email" "$control" "$pkg_man"

    # Add the package owners manifest file using path relative to the
    # repository directory.
    #
    git_add "$tgt_dir" "${pkg_man#$tgt_dir/}"

    auth="package"
  fi

  # Respond with the 'unauthorized' manifest if we failed to authenticate the
  # submitter as the package owner, unless both the reference and target
  # repositories have the ownership authentication disabled. In the latter
  # case no authorization is required.
  #
  if [ "$auth" != "disabled" -o "$ref_auth" != "disabled" ]; then

    # Respond with the 'unauthorized' manifest if not the package owner.
    #
    if [ "$auth" != "package" ]; then
      if [ "$auth" == "project" -o "$ref_auth" == "project" ]; then
        exit_with_manifest 401 "package owner authentication failed"
      else
        exit_with_manifest 401 "project owner authentication failed"
      fi
    fi

    # Authorize the submission.
    #
    ctl_dir="$data_dir/control"

    check_connectivity "$control" "$git_timeout" ""

    if ! run_silent git "${git_http_timeout[@]}" clone $gqo $gvo --depth 1 \
--single-branch --branch "build2-control" "$control" "$ctl_dir" >&2; then
      exit_with_manifest 422 "failed to git-clone $control"
    fi

    if [ ! -f "$ctl_dir/submit/${sha256sum:0:16}" ]; then
      exit_with_manifest 401 "package publishing authorization failed"
    fi
  fi

  # Remove the package version revision archives that may exist in the section
  # directory of the target repository.
  #
  # Note that it could be desirable to keep multiple revisions in different
  # sections. However, we need to remove a package revision that may
  # potentially exist in a different project.
  #
  s="$(section_dir "$section" "$tgt_dir")"

  if [ -z "$s" ]; then
    exit_with_manifest 400 "unrecognized section '$section'"
  fi

  # Strips the version revision part, if present.
  #
  v="$(sed -n -re 's%^(\+?[^+]+)(\+[0-9]+)?$%\1%p' <<<"$version")"

  # Make sure the section directory exists before we run find in it.
  #
  d="$tgt_dir/$s/$project"
  run mkdir -p "$d" # Create all the parent directories as well.

  # Go through the potentially matching archives (for example, for
  # foo-1.2.3+2: foo-1.2.3.tar.gz, foo-1.2.3+1.tar.gz, foo-1.2.30.tar.gz, etc)
  # and remove those that match exactly.
  #
  # Change CWD to the section directory to make sure that the found archive
  # paths doesn't contain spaces.
  #
  fs=($(run cd "$tgt_dir/$s" && run find -name "$name-$v*"))

  for f in "${fs[@]}"; do
    if [[ "$f" =~ ^\./[^/]+/"$name-$v"(\+[0-9]+)?\.[^/]+$ ]]; then
      run git -C "$tgt_dir" rm $gqo "$s/$f" >&2
    fi
  done

  # Finally, add the package archive to the target repository.
  #
  # We copy the archive rather than move it since we may need it for a re-try.
  #
  a="$d/$archive"
  run cp "$data_dir/$archive" "$a"

  git_add "$tgt_dir" "${a#$tgt_dir/}"

  author=()
  if [ -n "$author_name" ]; then
    author=(--author="$author_name <$author_email>")
  fi

  run git -c "user.name=$committer_name" -c "user.email=$committer_email" \
-C "$tgt_dir" commit "${author[@]}" $gqo $gvo -F - <<EOF >&2
Add $name/$version to $s/$project

$(cat "$data_dir/request.manifest")
EOF

  check_connectivity "$tgt_repo" "$git_timeout" true

  # Try to push the target modifications. If this succeeds then we are done.
  # Otherwise, drop the target directory and re-try the whole
  # authentication/authorization procedure, unless we are out of attempts.
  #
  if run_silent git "${git_http_timeout[@]}" -C "$tgt_dir" push >&2; then
    pkg_added=true
    break
  else
    run rm -r -f "$tgt_dir" "$ctl_dir"
  fi
done

if [ ! "$pkg_added" ]; then
  exit_with_manifest 503 "submission service temporarily unavailable"
fi

# Remove the no longer needed submission data directory.
#
run rm -r -f "$data_dir"

trace "package submission is queued$message_suffix"
exit_with_manifest 200 "package submission is queued$message_suffix"