aboutsummaryrefslogtreecommitdiff
path: root/brep/handler/upload/upload-bindist.in
blob: 05d0bcf9cefbe09c35a39ac6737abf03e6847bec (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
#!/usr/bin/env bash

# file      : brep/handler/upload/upload-bindist.in
# license   : MIT; see accompanying LICENSE file

# Binary distribution packages upload handler which places the uploaded
# packages under the following filesystem hierarchy:
#
# <root>/[<tenant>/]<instance>/<os-release-name-id><os-release-version-id>/<project>/<package>/<version>/<package-config>
#
# The overall idea behind this handler is to create a uniquely named package
# configuration directory for each upload and maintain the package
# configuration symlink at the above path to refer to the directory of the
# latest upload.
#
# The root directory is passed as an argument (via upload-handler-argument).
# All the remaining directory components are retrieved from the respective
# manifest values of request.manifest created by brep and
# bindist-result.manifest contained in the uploaded archive.
#
# Note that the leaf component of the package configuration symlink path is
# sanitized, having the "bindist", <instance>, <os-release-name-id>, and
# <os-release-name-id><os-release-version-id> dash-separated sub-components
# removed. If the component becomes empty as a result of the sanitization,
# then the target CPU is assumed, if the package is not architecture-
# independent, and "noarch" otherwise. If the sanitized component is not
# empty, the package is not architecture-independent, and the resulting
# component doesn't containt the target CPU, then prepend it with the <cpu>-
# prefix. For example, the following symlink paths:
#
# .../archive/windows10/foo/libfoo/1.0.0/bindist-archive-windows10-release
# .../archive/windows10/foo/libfoo/1.0.0/bindist-archive-windows10
#
# are reduced to:
#
# .../archive/windows10/foo/libfoo/1.0.0/x86_64-release
# .../archive/windows10/foo/libfoo/1.0.0/x86_64
#
# To achieve this the handler performs the following steps (<dir> is passed as
# last argument by brep and is a subdirectory of upload-data):
#
# - Parse <dir>/request.manifest to retrieve the upload archive path,
#   timestamp, and the values which are required to compose the package
#   configuration symlink path.
#
# - Extract files from the upload archive.
#
# - Parse <dir>/<instance>/bindist-result.manifest to retrieve the values
#   required to compose the package configuration symlink path and the package
#   file paths.
#
# - Compose the package configuration symlink path.
#
# - Compose the package configuration directory path by appending the
#   -<timestamp>[-<number>] suffix to the package configuration symlink path.
#
# - Create the package configuration directory.
#
# - Copy the uploaded package files into the package configuration directory.
#
# - Generate the packages.sha256 file in the package configuration directory,
#   which lists the SHA256 checksums of the files contained in this directory.
#
# - Switch the package configuration symlink to refer to the newly created
#   package configuration directory.
#
# - If the --keep-previous option is not specified, then remove the previous
#   target of the package configuration symlink, if exists.
#
# Notes:
#
# - There could be a race both with upload-bindist-clean and other
#   upload-bindist instances while creating the package version/configuration
#   directories, querying the package configuration symlink target, switching
#   the symlink, and removing the symlink's previous target. To avoid it, the
#   root directory needs to be locked for the duration of these operations.
#   This, however, needs to be done granularly to perform the time consuming
#   operations (files copying, etc) while not holding the lock.
#
# - The brep module doesn't acquire the root directory lock. Thus, the package
#   configuration symlink during its lifetime should always refer to a
#   valid/complete package configuration directory.
#
# - Filesystem entries that exist or are created in the data directory:
#
#   <archive>           saved by brep
#   request.manifest    created by brep
#   <instance>/*        extracted by the handler (bindist-result.manifest, etc)
#   result.manifest     saved by brep
#
# Options:
#
# --keep-previous
#
#   Don't remove the previous target of the package configuration symlink.
#
usage="usage: $0 [<options>] <root> <dir>"

# Diagnostics.
#
verbose= #true

# The root directory lock timeout (in seconds).
#
lock_timeout=60

# If the package configuration directory already exists (may happen due to the
# low timestamp resolution), then re-try creating the configuration directory
# by adding the -<number> suffix and incrementing it until the creation
# succeeds or the retries limit is reached.
#
create_dir_retries=99

trap "{ exit 1; }" ERR
set -o errtrace     # Trap in functions and subshells.
set -o pipefail     # Fail if any pipeline command fails.
shopt -s lastpipe   # Execute last pipeline command in the current shell.
shopt -s nullglob   # Expand no-match globs to nothing rather than themselves.

@import brep/handler/handler@
@import brep/handler/upload/upload@

# Parse the command line options.
#
keep_previous=

while [[ "$#" -gt 0 ]]; do
  case $1 in
    --keep-previous)
      shift
      keep_previous=true
      ;;
    *)
      break
      ;;
  esac
done

if [[ "$#" -ne 2 ]]; then
  error "$usage"
fi

# Destination root directory.
#
root_dir="${1%/}"
shift

if [[ -z "$root_dir" ]]; then
  error "$usage"
fi

if [[ ! -d "$root_dir" ]]; then
  error "'$root_dir' does not exist or is not a directory"
fi

# Upload data directory.
#
data_dir="${1%/}"
shift

if [[ -z "$data_dir" ]]; then
  error "$usage"
fi

if [[ ! -d "$data_dir" ]]; then
  error "'$data_dir' does not exist or is not a directory"
fi

reference="$(basename "$data_dir")" # Upload request reference.

# Parse the upload request manifest.
#
manifest_parser_start "$data_dir/request.manifest"

archive=
instance=
timestamp=
name=
version=
project=
package_config=
target=
tenant=

while IFS=: read -ru "$manifest_parser_ofd" -d '' n v; do
  case "$n" in
    archive)        archive="$v"        ;;
    instance)       instance="$v"       ;;
    timestamp)      timestamp="$v"      ;;
    name)           name="$v"           ;;
    version)        version="$v"        ;;
    project)        project="$v"        ;;
    package-config) package_config="$v" ;;
    target)         target="$v"         ;;
    tenant)         tenant="$v"         ;;
  esac
done

manifest_parser_finish

if [[ -z "$archive" ]]; then
  error "archive manifest value expected"
fi

if [[ -z "$instance" ]]; then
  error "instance manifest value expected"
fi

if [[ -z "$timestamp" ]]; then
  error "timestamp manifest value expected"
fi

if [[ -z "$name" ]]; then
  error "name manifest value expected"
fi

if [[ -z "$version" ]]; then
  error "version manifest value expected"
fi

if [[ -z "$project" ]]; then
  error "project manifest value expected"
fi

if [[ -z "$package_config" ]]; then
  error "package-config manifest value expected"
fi

if [[ -z "$target" ]]; then
  error "target manifest value expected"
fi

# Let's disallow the leading dot in the package-config manifest value since
# the latter serves as the package configuration symlink name and brep skips
# symlinks with the leading dots assuming them as hidden (see
# mod/mod-package-version-details.cxx for details).
#
if [[ "$package_config" == "."* ]]; then
  exit_with_manifest 400 "package-config manifest value may not start with dot"
fi

# Extract the CPU component from the target triplet and deduce the binary
# distribution-specific CPU representation which is normally used in the
# package file names.
#
cpu="$(sed -n -re 's/^([^-]+)-.+/\1/p' <<<"$target")"

if [[ -z "$cpu" ]]; then
  error "CPU expected in target triplet '$target'"
fi

# Use CPU extracted from the target triplet as a distribution-specific
# representation, unless this is Debian or Fedora (see bpkg's
# system-package-manager-{fedora,debian}.cxx for details).
#
cpu_dist="$cpu"

case $instance in
  debian)
    case $cpu in
      x86_64)                    cpu_dist="amd64" ;;
      aarch64)                   cpu_dist="arm64" ;;
      i386 | i486 | i586 | i686) cpu_dist="i386"  ;;
    esac
    ;;
  fedora)
    case $cpu in
      i386 | i486 | i586 | i686) cpu_dist="i686"  ;;
    esac
    ;;
esac

# Unpack the archive.
#
run tar -xf "$data_dir/$archive" -C "$data_dir"

# Parse the bindist result manifest list.
#
f="$data_dir/$instance/bindist-result.manifest"

if [[ ! -f "$f" ]]; then
  exit_with_manifest 400 "$instance/bindist-result.manifest not found"
fi

manifest_parser_start "$f"

# Parse the distribution manifest.
#
# Note that we need to skip the first manifest version value and parse until
# the next one is encountered, which introduces the first package file
# manifest.
#
os_release_name_id=
os_release_version_id=

first=true
more=
while IFS=: read -ru "$manifest_parser_ofd" -d '' n v; do
  case "$n" in
    "") if [[ "$first" ]]; then # Start of the first (distribution) manifest?
          first=
        else                    # Start of the second (package file) manifest.
          more=true
          break
        fi
        ;;

    os-release-name-id)    os_release_name_id="$v"    ;;
    os-release-version-id) os_release_version_id="$v" ;;
  esac
done

if [[ -z "$os_release_name_id" ]]; then
  exit_with_manifest 400 "os-release-name-id bindist result manifest value expected"
fi

if [[ -z "$os_release_version_id" ]]; then
  exit_with_manifest 400 "os-release-version-id bindist result manifest value expected"
fi

if [[ ! "$more" ]]; then
  exit_with_manifest 400 "no package file manifests in bindist result manifest list"
fi

# Parse the package file manifest list and cache the file paths.
#
# While at it, detect if the package is architecture-specific or not by
# checking if any package file names contain the distribution-specific CPU
# representation (as a sub-string).
#
# Note that while we currently only need the package file paths, we can make
# use of their types and system names in the future. Thus, let's verify that
# all the required package file values are present and, while at it, cache
# them all in the parallel arrays.
#
package_file_paths=()
package_file_types=()
package_file_system_names=()

arch_specific=

# The outer loop iterates over package file manifests while the inner loop
# iterates over manifest values in each such manifest.
#
while [[ "$more" ]]; do
  more=
  type=
  path=
  system_name=

  while IFS=: read -ru "$manifest_parser_ofd" -d '' n v; do
    case "$n" in
      "") # Start of the next package file manifest.
        more=true
        break
        ;;

      package-file-path)        path="$v"        ;;
      package-file-type)        type="$v"        ;;
      package-file-system-name) system_name="$v" ;;
    esac
  done

  if [[ -z "$path" ]]; then
    exit_with_manifest 400 "package-file-path bindist result manifest value expected"
  fi

  if [[ -z "$type" ]]; then
    exit_with_manifest 400 "package-file-type bindist result manifest value expected"
  fi

  package_file_paths+=("$path")
  package_file_types+=("$type")
  package_file_system_names+=("$system_name") # Note: system name can be empty.

  if [[ "$path" == *"$cpu_dist"* ]]; then
    arch_specific=true
  fi
done

manifest_parser_finish

# Sanitize the package configuration name.
#
config=
for c in $(sed 's/-/ /g' <<<"$package_config"); do
  if [[ "$c" != "bindist"             &&
        "$c" != "$instance"           &&
        "$c" != "$os_release_name_id" &&
        "$c" != "$os_release_name_id$os_release_version_id" ]]; then
    if [[ -z "$config" ]]; then
      config="$c"
    else
      config="$config-$c"
    fi
  fi
done

# Reflect the architecture in the sanitized configuration name.
#
if [[ -z "$config" ]]; then
  if [[ "$arch_specific"  ]]; then
    config="$cpu"
  else
    config="noarch"
  fi
else
  if [[ "$arch_specific" && ("$config" != *"$cpu"*) ]]; then
    config="$cpu-$config"
  fi
fi

# Compose the package configuration symlink path.
#
config_link="$root_dir"

if [[ -n "$tenant" ]]; then
  config_link="$config_link/$tenant"
fi

config_link="$config_link/$instance/$os_release_name_id$os_release_version_id"
config_link="$config_link/$project/$name/$version/$config"

# Compose the package configuration directory path.
#
config_dir="$config_link-$timestamp"

# Create the package configuration directory.
#
# Note that it is highly unlikely that multiple uploads for the same package
# configuration/distribution occur at the same time (with the seconds
# resolution) making the directory name not unique. If that still happens,
# lets retry for some reasonable number of times to create the directory,
# while adding the -<number> suffix to its path on each iteration. If
# that also fails, then we assume that there is some issue with the handler
# setup and fail, printing the cached mkdir diagnostics to stderr.
#
# Note that we need to prevent removing of the potentially empty package
# version directory by the upload-bindist-clean script before we create
# configuration directory. To achieve that, we lock the root directory for the
# duration of the package version/configuration directories creation.
#
# Open the reading file descriptor and lock the root directory. Fail if
# unable to lock before timeout.
#
lock="$root_dir/upload.lock"
run touch "$lock"
trace "+ exec {lfd}<$lock"
exec {lfd}<"$lock"

if ! run flock -w "$lock_timeout" "$lfd"; then
  exit_with_manifest 503 "upload service is busy"
fi

# Create parent (doesn't fail if directory exists).
#
config_parent_dir="$(dirname "$config_dir")"
run mkdir -p "$config_parent_dir"

created=

trace_cmd mkdir "$config_dir"
if ! e="$(mkdir "$config_dir" 2>&1)"; then # Note: fails if directory exists.
  for ((i=0; i != $create_dir_retries; ++i)); do
    d="$config_dir-$i"
    trace_cmd mkdir "$d"
    if e="$(mkdir "$d" 2>&1)"; then
      config_dir="$d"
      created=true
      break
    fi
  done
else
  created=true
fi

# Close the file descriptor and unlock the root directory.
#
trace "+ exec {lfd}<&-"
exec {lfd}<&-

if [[ ! "$created" ]]; then
  echo "$e" 1>&2
  error "unable to create package configuration directory"
fi

# On exit, remove the newly created package configuration directory, unless
# its removal is canceled (for example, the symlink is switched to refer to
# it). Also remove the new symlink, if already created.
#
# Make sure we don't fail if the entries are already removed, for example, by
# the upload-bindist-clean script.
#
config_link_new=
function exit_trap ()
{
  if [[ -n "$config_dir" && -d "$config_dir" ]]; then
    if [[ -n "$config_link_new" && -L "$config_link_new" ]]; then
      run rm -f "$config_link_new"
    fi
    run rm -rf "$config_dir"
  fi
}

trap exit_trap EXIT

# Copy all the extracted package files to the package configuration directory.
#
for ((i=0; i != "${#package_file_paths[@]}"; ++i)); do
  run cp "$data_dir/$instance/${package_file_paths[$i]}" "$config_dir"
done

# Generate the packages.sha256 file.
#
# Note that since we don't hold the root directory lock at this time, we
# temporary "hide" the resulting file from the upload-bindist-clean script
# (which uses it for the upload age calculation) by adding the leading dot to
# its name. Not doing so we may potentially end up with upload-bindist-clean
# removing the half-cooked directory and so breaking the upload handling.
#
trace "+ (cd $config_dir && exec sha256sum -b ${package_file_paths[@]} >.packages.sha256)"
(cd "$config_dir" && exec sha256sum -b "${package_file_paths[@]}" >".packages.sha256")

# Create the new package configuration "hidden" symlink. Construct its name by
# prepending the configuration directory name with a dot.
#
config_dir_name="$(basename "$config_dir")"
config_link_new="$config_parent_dir/.$config_dir_name"
run ln -s "$config_dir_name" "$config_link_new"

# Switch the package configuration symlink atomically. But first, cache the
# previous package configuration symlink target if the --keep-previous option
# is not specified and "unhide" the packages.sha256 file.
#
# Note that to avoid a race with upload-bindist-clean and other upload-bindist
# instances, we need to perform all the mentioned operations as well as
# removing the previous package configuration directory while holding the root
# directory lock.

# Lock the root directory.
#
trace "+ exec {lfd}<$lock"
exec {lfd}<"$lock"

if ! run flock -w "$lock_timeout" "$lfd"; then
  exit_with_manifest 503 "upload service is busy"
fi

# Note that while the realpath utility is not POSIX, it is present on both
# Linux and FreeBSD.
#
config_dir_prev=
if [[ ! "$keep_previous" && -L "$config_link" ]]; then
  config_dir_prev="$(realpath "$config_link")"
fi

# "Unhide" the packages.sha256 file.
#
run mv "$config_dir/.packages.sha256" "$config_dir/packages.sha256"

# Note that since brep doesn't acquire the root directory lock, we need to
# switch the symlink as the final step, when the package directory is fully
# prepared and can be exposed.
#
# @@ Also note that the -T option is Linux-specific. To add support for
#    FreeBSD we need to use -h option there (but maybe -T also works,
#    who knows).
#
run mv -T "$config_link_new" "$config_link"

# Now, when the package configuration symlink is switched, disable removal of
# the newly created package configuration directory.
#
# Note that we still can respond with an error status. However, the remaining
# operations are all cleanups and thus unlikely to fail.
#
config_dir=

# Remove the previous package configuration directory, if requested.
#
if [[ -n "$config_dir_prev" ]]; then
  run rm -r "$config_dir_prev"
fi

# Unlock the root directory.
#
trace "+ exec {lfd}<&-"
exec {lfd}<&-

# Remove the no longer needed upload data directory.
#
run rm -r "$data_dir"

trace "binary distribution packages are published"
exit_with_manifest 200 "binary distribution packages are published"