bbot/agent/agent.cli


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404

// file      : bbot/agent.cli
// license   : MIT; see accompanying LICENSE file

include <libbbot/manifest.hxx>;

include <bbot/common.cli>;

"\section=1"
"\name=bbot-agent"
"\summary=build bot agent"

namespace bbot
{
  {
    "<options> <priority> <url>",

    "
    \h|SYNOPSIS|

    \c{\b{bbot-agent --help}\n
       \b{bbot-agent --version}\n
       \b{bbot-agent} [<options>] [<priority>=]<url>...}

    \h|DESCRIPTION|

    \cb{bbot-agent} @@ TODO.

    The controller URL <priority> is a four or five-digit decimal value. If it
    is absent, then \cb{0} (lowest priority) is assumed. URLs with equal
    priority are queried at random.

    The <priority> value has the \c{[\i{F}]\i{DCBA}} form which encodes four
    priority levels (\ci{DCBA}) each occupying one decimal digit (so there are
    10 distinct priorities in each level) plus the optional boost flag
    (\ci{F}). These levels offer different trade-offs between the speed of
    completing a higher priority task and potentially discarding work that has
    already been done.

    The first priority level (\ci{A}) is a simple preference: among the URLs
    with equal values for other levels (\ci{DCB}), those with higher first
    level priorities are queried first.

    The second priority level (\ci{B}) has the semantics of the first level
    plus it prevents URLs with lower second priority level from being
    queried until the task with a higher second priority level has completed,
    effectively conserving the resources for the higher priority task.

    The third priority level (\ci{C}) has the semantics of the second level
    plus it may interrupt one lower third priority level task in order to
    perform the higher third priority task (the interrupt is necessary if the
    desired machine is used by the lower priority task or the number of tasks
    already being performed is the maximum allowed to be performed
    concurrently; see \cb{--instance-max}).

    Finally, the fourth priority level (\ci{D}) has the semantics of the third
    level except that not one but all the lower fourth priority level tasks
    are interrupting, effectively dedicating all the available resources to
    the higher priority task. This level can also be combined with the boost
    flag \ci{F}. If this flag is \cb{1} then the higher priority task's CPU
    number (\cb{--cpu}) is boosted to the full number of available hardware
    threads (or, to view it another way, the fourth priority level has 20
    possible values, not 10, with the first 0-9 being without the boost while
    the last 10-19 being with the boost). Note that this boosting semantics
    may not be accurate if the agent is executed with CPU affinity. Also note
    that there is no corresponding RAM boosting and it's possible that in some
    configurations the amount of RAM will be insufficient for the boosted CPU
    count.

    Note that the priority levels are hierarchical in a sense that within a
    given higher level URLs can be further prioritized using the lower
    levels. As an example, consider a deployment with three controller URLs:
    background package rebuilds (\cb{pkg.example.org}), user-initiated CI
    (\cb{ci.example.org}), and user-initiated interactive CI
    (\cb{ici.example.org}). Given the following priorities:

    \
    0000=https://pkg.example.org
    0100=https://ci.example.org
    0101=https://ici.example.org
    \

    Both types of CI tasks will interrupt one background rebuild task if
    necessary while the interactive CI tasks will be merely preferred over
    non-interactive.

    Note that on termination \cb{bbot-agent} may leave behind a machine lock
    and working machine snapshot. It is expected that the caller (normally
    Build OS monitor) cleans them up before restarting the agent.
    "
  }

  class agent_options
  {
    "\h|OPTIONS|"

    bool --help {"Print usage information and exit."}
    bool --version {"Print version and exit."}

    uint16_t --verbose = 1
    {
      "<level>",
      "Set the diagnostics verbosity to <level> between 0 and 6 with level 1
       being the default."
    }

    bool --systemd-daemon
    {
      "Run as a simple systemd daemon."
    }

    string --toolchain-name = "default"
    {
      "<str>",
      "Toolchain name, \cb{default} by default."
    }

    uint16_t --toolchain-num = 1
    {
      "<num>",
      "Toolchain number, 1 by default. If agents are running for several
       toolchains, then each of them should have a unique toolchain number
       between 1 and 99. This number is used as an offset for network ports,
       interfaces, etc."
    }

    string --toolchain-lock // Note: string to allow empty path.
    {
      "<path>",
      "Absolute path to the global toolchain lock file. If unspecified, then
       \c{\b{/var/lock/bbot-agent-}\i{toolchain-name}\b{.lock}} is used by
       default. If empty path is specified then no global locking is
       performed. If one of the \cb{--fake-*} options is specified, then no
       locking is performed by default."
    }

    standard_version --toolchain-ver
    {
      "<stdver>",
      "Toolchain version. If unspecified, then the agent's version will be
       used (which will be imprecise for snapshot versions)."
    }

    string --toolchain-id
    {
      "<str>",
      "Toolchain id. If unspecified or empty, then no re-bootstrapping on
       toolchain changes will be performed (which is primarily useful for
       testing)."
    }

    interactive_mode --interactive = interactive_mode::false_
    {
      "<mode>",
      "Interactive build support. Valid values for this option are \cb{false}
       (only non-interactive), \cb{true} (only interactive), and \cb{both}.
       If this option is not specified, then only non-interactive builds
       are supported."
    }

    // We reserve 0 in case in the future we want to distinguish a single-
    // instance mode or some such.
    //
    uint16_t --instance = 1
    {
      "<num>",
      "Instance number, 1 by default. If several instances of an agent are
       running for the same toolchain, then each of them should have a unique
       instance number between 1 and 99. This number is used as an offset for
       network ports, interfaces, etc."
    }

    uint16_t --instance-max = 0
    {
      "<num>",
      "Maximum number of instances that can perform tasks concurrently. If the
       number of instances that have been started is greater than this number
       (normally by just one), then when the maximum number of tasks is
       already being performed, the extra instances operate in the \i{priority
       monitor} mode: they only query controller URLs with priorities higher
       than of the existing tasks and can only perform a task by interrupting
       one of them. If the maximum number of instances is \cb{0} (default),
       then it is assumed the number of instances started is the maximum
       number, essentially disabling the priority monitor functionality."
    }

    size_t --cpu = 1
    {
      "<num>",
      "Number of CPUs (threads) to use, 1 by default."
    }

    size_t --build-ram (4 * 1024 * 1024) // 4GiB
    {
      "<num>",
      "Amount of RAM (in KiB) to use for the build machine, 4GiB by default."
    }

    size_t --auxiliary-ram
    {
      "<num>",
      "Amount of RAM (in KiB) to use for auxiliary machines. To disable
       running auxiliary machines, specify \cb{0}. If unspecified, then
       currently the behavior is the same as specifying \cb{0} but this
       may change in the future (for example, to support a more dynamic
       allocation strategy)."

      // Note: it's not going to be easy to set it to unspecified in
      // bbot-agent@.service so we may have to invent some special value,
      // like `auto`.
    }

    string --bridge = "br1"
    {
      "<iface>",
      "Bridge interface to use for machine networking, \cb{br1} by default."
    };

    path --auth-key
    {
      "<file>",
      "Private key for the public key-based agent authentication. If not
       specified, then the agent will not be able to request tasks from
       controllers that require authentication.

       The file is expected to contain a single PEM-encoded private key
       without a password. A suitable key can be generated using the
       following command:

       \
       $ openssl genrsa 4096 >key.pem
       \
       "
    }

    strings --trust
    {
      "<fingerprint>",
      "Trust repository certificate with a SHA256 <fingerprint>."
    }

    dir_path --machines = "/build/machines/"
    {
      "<dir>",
      "The location of the build machines, \cb{/build/machines/} by default."
    }

    dir_path --tftp = "/build/tftp/"
    {
      "<dir>",
      "The location of the TFTP server root, \cb{/build/tftp/} by default."
    }

    // Low 23401+, 23501+, 23601+, etc., all look good collision-wise with
    // with anything useful.
    //
    uint16_t --tftp-port = 23400
    {
      "<num>",
      "TFTP server port base, 23400 by default. The actual port is calculated
       by adding an offset calculated based on the toolchain and instance
       numbers."
    }

    size_t --bootstrap-startup = 300
    {
      "<sec>",
      "Maximum number of seconds to wait for machine bootstrap startup,
       300 (5 minutes) by default."
    }

    size_t --bootstrap-timeout = 3600
    {
      "<sec>",
      "Maximum number of seconds to wait for machine bootstrap completion,
       3600 (60 minutes) by default."
    }

    size_t --bootstrap-retries = 2
    {
      "<num>",
      "Number of times to retry a mis-booted bootstrap, 2 (3 attempts total)
       by default."
    }

    size_t --build-startup = 240
    {
      "<sec>",
      "Maximum number of seconds to wait for build startup, 240 (4 minutes) by
       default."
    }

    size_t --build-timeout = 5400
    {
      "<sec>",
      "Maximum number of seconds to wait for build completion, 5400 (90
       minutes) by default."
    }

    size_t --build-retries = 2
    {
      "<num>",
      "Number of times to retry a mis-booted build, 2 (3 attempts total) by
       default."
    }

    size_t --intactive-timeout = 10800
    {
      "<sec>",
      "Maximum number of seconds to wait for interactive build completion,
       10800 (3 hours) by default."
    }

    size_t --connect-timeout = 60
    {
      "<sec>",
      "Maximum number of seconds to wait for controller request connection,
       60 (1 minute) by default."
    }

    size_t --request-timeout = 300
    {
      "<sec>",
      "Maximum number of seconds to wait for controller request completion,
       300 (5 minutes) by default."
    }

    size_t --request-retries = 4
    {
      "<num>",
      "Number of times to retry a controller request, 4 (5 attempts total) by
       default. Note that both the total time for all retries as well as the
       time of each retry are limited by the same \c{--request-timeout}
       value. This means that a successful request may take up to twice as
       long if a connection was established at the end of the retry window
       and took just as long to complete."
    }

    path --openssl = "openssl"
    {
      "<path>",
      "The openssl program to be used for crypto operations. You can also
       specify additional options that should be passed to the openssl program
       with \cb{--openssl-option}. If the openssl program is not explicitly
       specified, then \cb{bbot-agent} will use \cb{openssl} by default."
    }

    strings --openssl-option
    {
      "<opt>",
      "Additional option to be passed to the openssl program (see
       \cb{--openssl} for details). Repeat this option to specify multiple
       openssl options."
    }

    // Testing options.
    //
    bool --dump-machines
    {
      "Dump the available machines to \cb{stdout}, (re)-bootstrapping any if
       necessary, and exit."
    }

    bool --dump-task
    {
      "Dump the received build task to \cb{stdout} and exit."
    }

    bool --dump-result
    {
      "Dump the obtained build result to \cb{stdout} and exit."
    }

    bool --fake-bootstrap
    {
      "Fake the machine bootstrap process by creating the expected bootstrapped
       machine manifest."
    }

    bool --fake-build
    {
      "Fake the package building process by creating the aborted build result."
    }

    path --fake-machine
    {
      "<file>",
      "Fake the machine enumeration process by reading the machine header
       manifest from <file> (or \cb{stdin} if <file> is '\cb{-}')."
    }

    path --fake-request
    {
      "<file>",
      "Fake the task request process by reading the task manifest from <file>
       (or \cb{stdin} if <file> is '\cb{-}')."
    }
  };

  "
  \h|EXIT STATUS|

  Non-zero exit status is returned in case of an error.
  "
}