aboutsummaryrefslogtreecommitdiff
path: root/libbuild2/script/script.hxx
blob: f9e0fa901d974215835355d6bcb1a3306784cce5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
// file      : libbuild2/script/script.hxx -*- C++ -*-
// license   : MIT; see accompanying LICENSE file

#ifndef LIBBUILD2_SCRIPT_SCRIPT_HXX
#define LIBBUILD2_SCRIPT_SCRIPT_HXX

#include <libbuild2/types.hxx>
#include <libbuild2/forward.hxx>
#include <libbuild2/utility.hxx>

#include <libbuild2/token.hxx>
#include <libbuild2/variable.hxx>

namespace build2
{
  namespace script
  {
    // Pre-parsed representation.
    //

    enum class line_type
    {
      var,
      cmd,
      cmd_if,
      cmd_ifn,
      cmd_elif,
      cmd_elifn,
      cmd_else,
      cmd_while,
      cmd_for_args,   // `for x: ...`
      cmd_for_stream, // `... | for x` and `for x <...`
      cmd_end
    };

    ostream&
    operator<< (ostream&, line_type);

    struct line
    {
      line_type type;
      replay_tokens tokens;

      union
      {
        const variable* var; // Pre-entered for line_type::{var,cmd_for_*}.
      };
    };

    // Most of the time we will have just one line (a command).
    //
    using lines = small_vector<line, 1>;

    // Print the script lines, trying to reproduce their original (non-
    // expanded) representation.
    //
    // Note that the exact spacing and partial quoting may not be restored due
    // to the information loss.
    //
    void
    dump (ostream&, const string& ind, const lines&);

    // As above but print a single line and without the trailing newline token
    // by default.
    //
    void
    dump (ostream&, const line&, bool newline = false);

    // Parse object model.
    //

    // redirect
    //
    enum class redirect_type
    {
      // No data is allowed to be read or written.
      //
      // Note that redirect of this type cannot be currently specified on the
      // script command line and can only be set via the environment object
      // as a default redirect (see below).
      //
      none,

      pass,
      null,
      trace,
      merge,
      here_str_literal,
      here_str_regex,
      here_doc_literal,
      here_doc_regex,
      here_doc_ref,     // Reference to here_doc literal or regex.
      file,
    };

    // Pre-parsed (but not instantiated) regex lines. The idea here is that
    // we should be able to re-create their (more or less) exact text
    // representation for diagnostics but also instantiate without any
    // re-parsing.
    //
    struct regex_line
    {
      // If regex is true, then value is the regex expression. Otherwise, it
      // is a literal. Note that special characters can be present in both
      // cases. For example, //+ is a regex, while /+ is a literal, both
      // with '+' as a special character. Flags are only valid for regex.
      // Literals falls apart into textual (has no special characters) and
      // special (has just special characters instead) ones. For example
      // foo is a textual literal, while /.+ is a special one. Note that
      // literal must not have value and special both non-empty.
      //
      bool regex;

      string value;
      string flags;
      string special;

      uint64_t line;
      uint64_t column;

      // Create regex with optional special characters.
      //
      regex_line (uint64_t l, uint64_t c,
                  string v, string f, string s = string ())
          : regex (true),
            value (move (v)),
            flags (move (f)),
            special (move (s)),
            line (l),
            column (c) {}

      // Create a literal, either text or special.
      //
      regex_line (uint64_t l, uint64_t c, string v, bool s)
          : regex (false),
            value (s ? string () : move (v)),
            special (s ? move (v) : string ()),
            line (l),
            column (c) {}
    };

    struct regex_lines
    {
      char intro;   // Introducer character.
      string flags; // Global flags (here-document).

      small_vector<regex_line, 8> lines;
    };

    // Output file redirect mode.
    //
    enum class redirect_fmode
    {
      compare,
      overwrite,
      append
    };

    struct redirect
    {
      redirect_type type;

      struct file_type
      {
        using path_type = build2::path;
        path_type path;
        redirect_fmode mode; // Meaningless for input redirect.
      };

      union
      {
        int         fd;    // Merge-to descriptor.
        string      str;   // Note: with trailing newline, if requested.
        regex_lines regex; // Note: with trailing blank, if requested.
        file_type   file;
        reference_wrapper<const redirect> ref; // Note: no chains.
      };

      // Modifiers and the original representation (potentially an alias).
      //
      build2::token token;

      string end;         // Here-document end marker (no regex intro/flags).
      uint64_t end_line;  // Here-document end marker location.
      uint64_t end_column;

      // Create redirect of a type other than reference.
      //
      explicit
      redirect (redirect_type);

      // Create redirect of the reference type.
      //
      redirect (redirect_type t, const redirect& r, build2::token tk)
          : type (redirect_type::here_doc_ref),
            ref (r),
            token (move (tk))
      {
        // There is no support (and need) for reference chains.
        //
        assert (t == redirect_type::here_doc_ref &&
                r.type != redirect_type::here_doc_ref);
      }

      // Create redirect of the merge type.
      //
      // Note that it's the caller's responsibility to make sure that the file
      // descriptor is valid for this redirect (2 for stdout, etc).
      //
      redirect (redirect_type t, int f)
          : type (redirect_type::merge), fd (f)
      {
        assert (t == redirect_type::merge && (f == 1 || f == 2));
      }

      // Movable-only type.
      //
      redirect (redirect&&) noexcept;
      redirect& operator= (redirect&&) noexcept;

      redirect (const redirect&) = delete;
      redirect& operator= (const redirect&) = delete;

      ~redirect ();

      const redirect&
      effective () const noexcept
      {
        return type == redirect_type::here_doc_ref ? ref.get () : *this;
      }

      const string&
      modifiers () const noexcept
      {
        return token.value;
      }
    };

    // cleanup
    //
    enum class cleanup_type
    {
      always, // &foo  - cleanup, fail if does not exist.
      maybe,  // &?foo - cleanup, ignore if does not exist.
      never   // &!foo - don’t cleanup, ignore if doesn’t exist.
    };

    // File or directory to be automatically cleaned up at the end of the
    // script execution. If the path ends with a trailing slash, then it is
    // assumed to be a directory, otherwise -- a file. A directory that is
    // about to be cleaned up must be empty.
    //
    // The last component in the path may contain a wildcard that have the
    // following semantics:
    //
    // dir/*   - remove all immediate files
    // dir/*/  - remove all immediate sub-directories (must be empty)
    // dir/**  - remove all files recursively
    // dir/**/ - remove all sub-directories recursively (must be empty)
    // dir/*** - remove directory dir with all files and sub-directories
    //           recursively
    //
    struct cleanup
    {
      cleanup_type type;
      build2::path path;
    };
    using cleanups = vector<cleanup>; // @@ Make it small_vector<..., 1>?

    // command_exit
    //
    enum class exit_comparison {eq, ne};

    struct command_exit
    {
      // C/C++ don't apply constraints on program exit code other than it
      // being of type int.
      //
      // POSIX specifies that only the least significant 8 bits shall be
      // available from wait() and waitpid(); the full value shall be
      // available from waitid() (read more at _Exit, _exit Open Group
      // spec).
      //
      // While the Linux man page for waitid() doesn't mention any
      // deviations from the standard, the FreeBSD implementation (as of
      // version 11.0) only returns 8 bits like the other wait*() calls.
      //
      // Windows supports 32-bit exit codes.
      //
      // Note that in shells some exit values can have special meaning so
      // using them can be a source of confusion. For bash values in the
      // [126, 255] range are such a special ones (see Appendix E, "Exit
      // Codes With Special Meanings" in the Advanced Bash-Scripting Guide).
      //
      exit_comparison comparison;
      uint8_t code;
    };

    // command
    //
    // Assume it is not very common to (un)set more than a few environment
    // variables in the script.
    //
    struct environment_vars: small_vector<string, 4>
    {
      // Find a variable (un)set.
      //
      // Note that only the variable name is considered for both arguments. In
      // other words, passing a variable set as a first argument can result
      // with a variable unset being found and vice versa.
      //
      environment_vars::iterator
      find (const string&);

      // Add or overwrite an existing variable (un)set.
      //
      void
      add (string);
    };

    // @@ For better diagnostics we may want to store an individual location
    //    of each command in the pipeline (maybe we can share the file part
    //    somehow since a pipline cannot span multiple files).
    //
    struct command
    {
      // We use NULL initial as an indication that the path stored in recall
      // is a program name that still needs to be resolved into the builtin
      // function or the process path.
      //
      process_path program;

      strings            arguments;
      optional<dir_path> cwd;       // From env builtin.
      environment_vars   variables; // From env builtin.
      optional<duration> timeout;   // From env builtin.

      optional<redirect> in;
      optional<redirect> out;
      optional<redirect> err;

      script::cleanups cleanups;

      // If nullopt, then the command is expected to succeed (0 exit code).
      //
      optional<command_exit> exit;
    };

    enum class command_to_stream: uint16_t
    {
      header   = 0x01,
      here_doc = 0x02,              // Note: printed on a new line.
      all      = header | here_doc
    };

    void
    to_stream (ostream&, const command&, command_to_stream);

    ostream&
    operator<< (ostream&, const command&);

    // command_pipe
    //
    using command_pipe = vector<command>; // @@ Make it small_vector<..., 1>?

    void
    to_stream (ostream&, const command_pipe&, command_to_stream);

    ostream&
    operator<< (ostream&, const command_pipe&);

    // command_expr
    //
    enum class expr_operator {log_or, log_and};

    struct expr_term
    {
      expr_operator op;  // OR-ed to an implied false for the first term.
      command_pipe pipe;
    };

    using command_expr = vector<expr_term>; // @@ Make it small_vector<..., 1>?

    void
    to_stream (ostream&, const command_expr&, command_to_stream);

    ostream&
    operator<< (ostream&, const command_expr&);

    // Stack-allocated linked list of iteration indexes of the nested loops.
    //
    struct iteration_index
    {
      size_t index; // 1-based.

      const iteration_index* prev; // NULL for the top-most loop.
    };

    struct timeout
    {
      duration value;
      bool success;

      timeout (duration d, bool s): value (d), success (s) {}
    };

    struct deadline
    {
      timestamp value;
      bool success;

      deadline (timestamp t, bool s): value (t), success (s) {}
    };

    // If timestamps/durations are equal, the failure is less than the
    // success.
    //
    bool
    operator< (const deadline&, const deadline&);

    bool
    operator< (const timeout&, const timeout&);

    optional<deadline>
    to_deadline (const optional<timestamp>&, bool success);

    optional<timeout>
    to_timeout (const optional<duration>&, bool success);

    // Script execution environment.
    //
    class environment
    {
    public:
      build2::context& context;

      // The platform script programs run on.
      //
      const target_triplet& host;

      // The work directory is used as the builtin/process CWD and to complete
      // relative paths. Any attempt to remove or move this directory (or its
      // parent directory) using the rm or mv builtins will fail. Must be an
      // absolute path.
      //
      const dir_name_view work_dir;

      // If the sanbox directory is not NULL, then any attempt to remove or
      // move a filesystem entry outside this directory using an explicit
      // cleanup or the rm/mv builtins will fail, unless the --force option is
      // specified for the builtin. Must be an absolute path.
      //
      const dir_name_view sandbox_dir;

      // The temporary directory is used by the script running machinery to
      // create special files. Must be an absolute path, unless empty. Can be
      // empty until the create_temp_dir() function call, which can be used
      // for creating this directory on demand.
      //
      const dir_path& temp_dir;

      // If true, the temporary directory will not be removed on the script
      // failure. In particular, this allows the script running machinery to
      // refer to the special files in diagnostics.
      //
      const bool temp_dir_keep;

      // Default process streams redirects.
      //
      // If a stream redirect is not specified on the script command line,
      // then the respective redirect data member will be used as the default.
      //
      const redirect in;
      const redirect out;
      const redirect err;

      environment (build2::context& ctx,
                   const target_triplet& h,
                   const dir_name_view& wd,
                   const dir_name_view& sd,
                   const dir_path& td, bool tk,
                   redirect&& i = redirect (redirect_type::pass),
                   redirect&& o = redirect (redirect_type::pass),
                   redirect&& e = redirect (redirect_type::pass))
          : context (ctx), host (h),
            work_dir (wd), sandbox_dir (sd), temp_dir (td), temp_dir_keep (tk),
            in (move (i)), out (move (o)), err (move (e))
      {
      }

      // Create environment without the sandbox.
      //
      environment (build2::context& ctx,
                   const target_triplet& h,
                   const dir_name_view& wd,
                   const dir_path& td, bool tk,
                   redirect&& i = redirect (redirect_type::pass),
                   redirect&& o = redirect (redirect_type::pass),
                   redirect&& e = redirect (redirect_type::pass))
          : environment (ctx, h,
                         wd, dir_name_view (), td, tk,
                         move (i), move (o), move (e))
      {
      }

      // Cleanup.
      //
    public:
      script::cleanups cleanups;
      paths special_cleanups;

      // Register a cleanup. If the cleanup is explicit, then override the
      // cleanup type if this path is already registered. Ignore implicit
      // registration of a path outside sandbox directory, if specified (see
      // above).
      //
      void
      clean (cleanup, bool implicit);

      // Register cleanup of a special file. Such files are created to
      // maintain the script running machinery and must be removed first, not
      // to interfere with the user-defined wildcard cleanups if the working
      // and temporary directories are the same.
      //
      void
      clean_special (path);

      // Command execution environment variables.
      //
    public:
      // Environment variable (un)sets from the export builtin call.
      //
      // Each variable in the list can only be present once.
      //
      environment_vars exported_vars;

      // Return the environment variable (un)sets which can potentially rely
      // on factors besides the export builtin call sequence (scoping,
      // etc). The default implementation returns exported_vars.
      //
      virtual const environment_vars&
      exported_variables (environment_vars& storage);

      // Merge the own environment variable (un)sets with the specified ones,
      // overriding the former with the latter.
      //
      const environment_vars&
      merge_exported_variables (const environment_vars&,
                                environment_vars& storage);

    public:
      // Set variable value with optional (non-empty) attributes.
      //
      virtual void
      set_variable (string name,
                    names&&,
                    const string& attrs,
                    const location&) = 0;

      // Set the script execution timeout from the timeout builtin call.
      //
      // The builtin argument semantics is script implementation-dependent. If
      // success is true then a process missing this deadline should not be
      // considered as failed unless it didn't terminate gracefully and had to
      // be killed.
      //
      virtual void
      set_timeout (const string& arg, bool success, const location&) = 0;

      // Return the script execution deadline which can potentially rely on
      // factors besides the latest timeout builtin call (variables, scoping,
      // etc).
      //
      virtual optional<deadline>
      effective_deadline () = 0;

      // Create the temporary directory and set the temp_dir reference target
      // to its path. Must only be called if temp_dir is empty.
      //
      virtual void
      create_temp_dir () = 0;

    public:
      virtual
      ~environment () = default;
    };

    // Custom command function that can be executed at the end of the
    // pipeline. Should throw io_error on the underlying OS error.
    //
    // Note: the pipeline can be NULL (think of `for x <<<='foo'`).
    //
    struct pipe_command;

    using command_function = void (environment&,
                                   const strings& args,
                                   auto_fd in,
                                   pipe_command* pipeline,
                                   const optional<deadline>&,
                                   const location&);

    // Helpers.
    //
    // Issue diagnostics with the specified prefix and fail if the string
    // (potentially an option value) is not a valid variable name or
    // assignment (empty, etc).
    //
    void
    verify_environment_var_name (const string&,
                                 const char* prefix,
                                 const location&,
                                 const char* opt = nullptr);

    void
    verify_environment_var_assignment (const string&,
                                       const char* prefix,
                                       const location&);

    // "Unhide" operator<< from the build2 namespace.
    //
    using build2::operator<<;
  }
}

#include <libbuild2/script/script.ixx>

#endif // LIBBUILD2_SCRIPT_SCRIPT_HXX