aboutsummaryrefslogtreecommitdiff
path: root/libbuild2
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2020-04-28 08:48:53 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2020-05-27 15:47:28 +0200
commitb808c255b6a9ddba085bf5646e7d20ec344f2e2d (patch)
tree32730291f7e6de8ef0a227905520dd66fb4ec0f3 /libbuild2
parent3552356a87402727e663131994fa87f48b3cd4fb (diff)
Initial support for ad hoc recipes (still work in progress)
Diffstat (limited to 'libbuild2')
-rw-r--r--libbuild2/action.hxx21
-rw-r--r--libbuild2/algorithm.cxx58
-rw-r--r--libbuild2/algorithm.ixx4
-rw-r--r--libbuild2/build/script/lexer+command-line.test.testscript164
-rw-r--r--libbuild2/build/script/lexer+first-token.test.testscript30
-rw-r--r--libbuild2/build/script/lexer+second-token.test.testscript53
-rw-r--r--libbuild2/build/script/lexer+variable-line.test.testscript12
-rw-r--r--libbuild2/build/script/lexer+variable.test.testscript25
-rw-r--r--libbuild2/build/script/lexer.cxx270
-rw-r--r--libbuild2/build/script/lexer.hxx80
-rw-r--r--libbuild2/build/script/lexer.test.cxx77
-rw-r--r--libbuild2/build/script/parser+cleanup.test.testscript57
-rw-r--r--libbuild2/build/script/parser+command-if.test.testscript395
-rw-r--r--libbuild2/build/script/parser+command-re-parse.test.testscript11
-rw-r--r--libbuild2/build/script/parser+exit.test.testscript26
-rw-r--r--libbuild2/build/script/parser+expansion.test.testscript35
-rw-r--r--libbuild2/build/script/parser+here-document.test.testscript272
-rw-r--r--libbuild2/build/script/parser+here-string.test.testscript34
-rw-r--r--libbuild2/build/script/parser+line.test.testscript72
-rw-r--r--libbuild2/build/script/parser+pipe-expr.test.testscript132
-rw-r--r--libbuild2/build/script/parser+pre-parse.test.testscript22
-rw-r--r--libbuild2/build/script/parser+redirect.test.testscript525
-rw-r--r--libbuild2/build/script/parser+regex.test.testscript225
-rw-r--r--libbuild2/build/script/parser+variable.test.testscript41
-rw-r--r--libbuild2/build/script/parser.cxx391
-rw-r--r--libbuild2/build/script/parser.hxx96
-rw-r--r--libbuild2/build/script/parser.test.cxx224
-rw-r--r--libbuild2/build/script/runner.cxx133
-rw-r--r--libbuild2/build/script/runner.hxx84
-rw-r--r--libbuild2/build/script/script.cxx236
-rw-r--r--libbuild2/build/script/script.hxx156
-rw-r--r--libbuild2/build/script/token.cxx23
-rw-r--r--libbuild2/build/script/token.hxx36
-rw-r--r--libbuild2/buildfile16
-rw-r--r--libbuild2/cc/compile-rule.cxx14
-rw-r--r--libbuild2/cc/init.cxx6
-rw-r--r--libbuild2/cc/lexer.cxx13
-rw-r--r--libbuild2/cc/link-rule.cxx24
-rw-r--r--libbuild2/config/operation.cxx4
-rw-r--r--libbuild2/context.cxx12
-rw-r--r--libbuild2/context.hxx19
-rw-r--r--libbuild2/dist/operation.cxx2
-rw-r--r--libbuild2/dump.cxx199
-rw-r--r--libbuild2/file.cxx65
-rw-r--r--libbuild2/file.hxx10
-rw-r--r--libbuild2/functions-name.cxx108
-rw-r--r--libbuild2/lexer+foreign.test.testscript96
-rw-r--r--libbuild2/lexer+normal.test.testscript54
-rw-r--r--libbuild2/lexer+quoting.test.testscript2
-rw-r--r--libbuild2/lexer.cxx172
-rw-r--r--libbuild2/lexer.hxx64
-rw-r--r--libbuild2/lexer.test.cxx16
-rw-r--r--libbuild2/module.cxx318
-rw-r--r--libbuild2/name.hxx12
-rw-r--r--libbuild2/parser.cxx544
-rw-r--r--libbuild2/parser.hxx43
-rw-r--r--libbuild2/recipe.hxx15
-rw-r--r--libbuild2/rule.cxx892
-rw-r--r--libbuild2/rule.hxx142
-rw-r--r--libbuild2/script/builtin-options.cxx661
-rw-r--r--libbuild2/script/builtin-options.hxx339
-rw-r--r--libbuild2/script/builtin-options.ixx182
-rw-r--r--libbuild2/script/builtin.cli (renamed from libbuild2/test/script/builtin.cli)22
-rw-r--r--libbuild2/script/lexer+command-expansion.test.testscript (renamed from libbuild2/test/script/lexer+command-expansion.test.testscript)104
-rw-r--r--libbuild2/script/lexer.cxx431
-rw-r--r--libbuild2/script/lexer.hxx139
-rw-r--r--libbuild2/script/lexer.test.cxx76
-rw-r--r--libbuild2/script/parser.cxx2015
-rw-r--r--libbuild2/script/parser.hxx189
-rw-r--r--libbuild2/script/regex.cxx436
-rw-r--r--libbuild2/script/regex.hxx678
-rw-r--r--libbuild2/script/regex.ixx31
-rw-r--r--libbuild2/script/regex.test.cxx (renamed from libbuild2/test/script/regex.test.cxx)8
-rw-r--r--libbuild2/script/run.cxx2020
-rw-r--r--libbuild2/script/run.hxx75
-rw-r--r--libbuild2/script/script.cxx659
-rw-r--r--libbuild2/script/script.hxx471
-rw-r--r--libbuild2/script/script.ixx56
-rw-r--r--libbuild2/script/token.cxx53
-rw-r--r--libbuild2/script/token.hxx66
-rw-r--r--libbuild2/target-key.hxx12
-rw-r--r--libbuild2/target.cxx14
-rw-r--r--libbuild2/target.hxx11
-rw-r--r--libbuild2/target.ixx17
-rw-r--r--libbuild2/test/init.cxx4
-rw-r--r--libbuild2/test/script/builtin-options.cxx667
-rw-r--r--libbuild2/test/script/builtin-options.hxx345
-rw-r--r--libbuild2/test/script/builtin-options.ixx188
-rw-r--r--libbuild2/test/script/lexer.cxx251
-rw-r--r--libbuild2/test/script/lexer.hxx40
-rw-r--r--libbuild2/test/script/lexer.test.cxx13
-rw-r--r--libbuild2/test/script/parser+exit.test.testscript2
-rw-r--r--libbuild2/test/script/parser+redirect.test.testscript8
-rw-r--r--libbuild2/test/script/parser+regex.test.testscript5
-rw-r--r--libbuild2/test/script/parser+variable.test.testscript19
-rw-r--r--libbuild2/test/script/parser.cxx2071
-rw-r--r--libbuild2/test/script/parser.hxx124
-rw-r--r--libbuild2/test/script/regex.cxx439
-rw-r--r--libbuild2/test/script/regex.hxx684
-rw-r--r--libbuild2/test/script/regex.ixx34
-rw-r--r--libbuild2/test/script/runner.cxx2026
-rw-r--r--libbuild2/test/script/runner.hxx15
-rw-r--r--libbuild2/test/script/script.cxx514
-rw-r--r--libbuild2/test/script/script.hxx378
-rw-r--r--libbuild2/test/script/script.ixx59
-rw-r--r--libbuild2/test/script/token.cxx35
-rw-r--r--libbuild2/test/script/token.hxx31
-rw-r--r--libbuild2/token.cxx109
-rw-r--r--libbuild2/token.hxx48
-rw-r--r--libbuild2/types.hxx8
-rw-r--r--libbuild2/types.ixx21
-rw-r--r--libbuild2/utility.cxx4
-rw-r--r--libbuild2/utility.hxx1
113 files changed, 15653 insertions, 8137 deletions
diff --git a/libbuild2/action.hxx b/libbuild2/action.hxx
index c1e4697..906d7eb 100644
--- a/libbuild2/action.hxx
+++ b/libbuild2/action.hxx
@@ -11,11 +11,11 @@
namespace build2
{
- // While we are using uint8_t for the meta/operation ids, we assume
- // that each is limited to 4 bits (max 128 entries) so that we can
- // store the combined action id in uint8_t as well. This makes our
- // life easier when it comes to defining switch labels for action
- // ids (no need to mess with endian-ness).
+ // While we are using uint8_t for the meta/operation ids, we assume that
+ // each is limited to 4 bits (max 15 entries @@ this is probably too low) so
+ // that we can store the combined action id in uint8_t as well. This makes
+ // our life easier when it comes to defining switch labels for action ids
+ // (no need to mess with endian-ness).
//
// Note that 0 is not a valid meta/operation/action id.
//
@@ -61,6 +61,8 @@ namespace build2
{
action (): inner_id (0), outer_id (0) {} // Invalid action.
+ action (action_id a): action (a >> 4, a & 0xF) {}
+
// If this is not a nested operation, then outer should be 0.
//
action (meta_operation_id m, operation_id inner, operation_id outer = 0)
@@ -103,6 +105,11 @@ namespace build2
inline bool
operator!= (action x, action y) {return !(x == y);}
+ inline bool operator== (action x, action_id y) {return x == action (y);}
+ inline bool operator!= (action x, action_id y) {return x != action (y);}
+ inline bool operator== (action_id x, action y) {return action (x) == y;}
+ inline bool operator!= (action_id x, action y) {return action (x) == y;}
+
bool operator> (action, action) = delete;
bool operator< (action, action) = delete;
bool operator>= (action, action) = delete;
@@ -140,6 +147,8 @@ namespace build2
// Id constants for build-in and pre-defined meta/operations.
//
+ // Note: currently max 15 (see above).
+ //
const meta_operation_id noop_id = 1; // nomop?
const meta_operation_id perform_id = 2;
const meta_operation_id configure_id = 3;
@@ -152,6 +161,8 @@ namespace build2
// that no operation was explicitly specified by the user. If adding
// something here remember to update the man page.
//
+ // Note: currently max 15 (see above).
+ //
const operation_id default_id = 1; // Shall be first.
const operation_id update_id = 2; // Shall be second.
const operation_id clean_id = 3;
diff --git a/libbuild2/algorithm.cxx b/libbuild2/algorithm.cxx
index ef1a78d..11f2a56 100644
--- a/libbuild2/algorithm.cxx
+++ b/libbuild2/algorithm.cxx
@@ -318,12 +318,55 @@ namespace build2
// Return the matching rule or NULL if no match and try_match is true.
//
const rule_match*
- match_impl (action a, target& t, const rule* skip, bool try_match)
+ match_rule (action a, target& t, const rule* skip, bool try_match)
{
+ // First check for an ad hoc recipe.
+ //
+ if (!t.adhoc_recipes.empty ())
+ {
+ auto df = make_diag_frame (
+ [a, &t](const diag_record& dr)
+ {
+ if (verb != 0)
+ dr << info << "while matching ad hoc recipe to " << diag_do (a, t);
+ });
+
+ // @@ TODO:
+ //
+ // If action is Y-for-X, how would we distinguish between X and Y-for-X?
+ // See match_rule() for the hairy details. We could start with
+ // supporting just the inner case. Or we could try to just match an
+ // inner rule by default? I think need a clear use-case to see what's
+ // the correct semantics.
+
+ auto b (t.adhoc_recipes.begin ()), e (t.adhoc_recipes.end ());
+ auto i (find_if (b, e,
+ [a, &t] (const adhoc_recipe& r)
+ {
+ return r.action == a &&
+ r.rule->match (a, t, string () /* hint */, nullopt);
+ }));
+
+ if (i == e)
+ i = find_if (b, e,
+ [a, &t] (const adhoc_recipe& r)
+ {
+ return r.action != a &&
+ r.rule->match (a, t, string () /* hint */, r.action);
+ });
+ if (i != e)
+ return &i->rule->rule_match;
+ }
+
// If this is an outer operation (Y-for-X), then we look for rules
- // registered for the outer id (X). Note that we still pass the original
- // action to the rule's match() function so that it can distinguish
- // between a pre/post operation (Y-for-X) and the actual operation (X).
+ // registered for the outer id (X; yes, it's really outer). Note that we
+ // still pass the original action to the rule's match() function so that
+ // it can distinguish between a pre/post operation (Y-for-X) and the
+ // actual operation (X).
+ //
+ // If you are then wondering how would a rule for Y ever match in case of
+ // Y-for-X, the answer is via a rule that matches for X and then, in case
+ // of Y-for-X, matches an inner rule for just Y (see match_inner()).
//
meta_operation_id mo (a.meta_operation ());
operation_id o (a.inner () ? a.operation () : a.outer_operation ());
@@ -561,7 +604,7 @@ namespace build2
t.prerequisite_targets[a].clear ();
if (a.inner ()) t.clear_data ();
- const rule_match* r (match_impl (a, t, nullptr, try_match));
+ const rule_match* r (match_rule (a, t, nullptr, try_match));
assert (l.offset != target::offset_tried); // Should have failed.
@@ -972,8 +1015,11 @@ namespace build2
if (r != nullptr)
{
+ // Make it ad hoc so that it doesn't end up in prerequisite_targets
+ // after execution.
+ //
match (a, *r);
- t.prerequisite_targets[a].emplace_back (r);
+ t.prerequisite_targets[a].emplace_back (r, include_type::adhoc);
}
return r;
diff --git a/libbuild2/algorithm.ixx b/libbuild2/algorithm.ixx
index 7231fec..5f9143a 100644
--- a/libbuild2/algorithm.ixx
+++ b/libbuild2/algorithm.ixx
@@ -263,7 +263,7 @@ namespace build2
}
LIBBUILD2_SYMEXPORT const rule_match*
- match_impl (action, target&, const rule* skip, bool try_match = false);
+ match_rule (action, target&, const rule* skip, bool try_match = false);
LIBBUILD2_SYMEXPORT recipe
apply_impl (action, target&, const rule_match&);
@@ -424,7 +424,7 @@ namespace build2
// Note: we don't touch any of the t[a] state since that was/will be set
// for the delegating rule.
//
- const rule_match* r (match_impl (a, t, &dr, try_match));
+ const rule_match* r (match_rule (a, t, &dr, try_match));
return r != nullptr ? apply_impl (a, t, *r) : empty_recipe;
}
diff --git a/libbuild2/build/script/lexer+command-line.test.testscript b/libbuild2/build/script/lexer+command-line.test.testscript
new file mode 100644
index 0000000..3eceae8
--- /dev/null
+++ b/libbuild2/build/script/lexer+command-line.test.testscript
@@ -0,0 +1,164 @@
+# file : libbuild2/build/script/lexer+command-line.test.testscript
+# license : MIT; see accompanying LICENSE file
+
+test.arguments = command-line
+
+: redirect
+:
+{
+ : pass
+ :
+ $* <"cmd <| 1>|" >>EOO
+ 'cmd'
+ <|
+ '1'
+ >|
+ <newline>
+ EOO
+
+ : null
+ :
+ $* <"cmd <- 1>-" >>EOO
+ 'cmd'
+ <-
+ '1'
+ >-
+ <newline>
+ EOO
+
+ : trace
+ :
+ $* <"cmd 1>!" >>EOO
+ 'cmd'
+ '1'
+ >!
+ <newline>
+ EOO
+
+ : merge
+ :
+ $* <"cmd 1>&2" >>EOO
+ 'cmd'
+ '1'
+ >&
+ '2'
+ <newline>
+ EOO
+
+ : str
+ :
+ $* <"cmd <<<=a 1>>>?b" >>EOO
+ 'cmd'
+ <<<=
+ 'a'
+ '1'
+ >>>?
+ 'b'
+ <newline>
+ EOO
+
+ : str-nn
+ :
+ $* <"cmd <<<=:a 1>>>?:b" >>EOO
+ 'cmd'
+ <<<=:
+ 'a'
+ '1'
+ >>>?:
+ 'b'
+ <newline>
+ EOO
+
+ : str-nn-alias
+ :
+ $* <"cmd <<<:a 1>>>?:b" >>EOO
+ 'cmd'
+ <<<:
+ 'a'
+ '1'
+ >>>?:
+ 'b'
+ <newline>
+ EOO
+
+ : doc
+ :
+ $* <"cmd <<EOI 1>>EOO" >>EOO
+ 'cmd'
+ <<
+ 'EOI'
+ '1'
+ >>
+ 'EOO'
+ <newline>
+ EOO
+
+ : doc-nn
+ :
+ $* <"cmd <<:EOI 1>>?:EOO" >>EOO
+ 'cmd'
+ <<:
+ 'EOI'
+ '1'
+ >>?:
+ 'EOO'
+ <newline>
+ EOO
+
+ : file-cmp
+ :
+ $* <"cmd <=in >?out 2>?err" >>EOO
+ 'cmd'
+ <=
+ 'in'
+ >?
+ 'out'
+ '2'
+ >?
+ 'err'
+ <newline>
+ EOO
+
+ : file-write
+ :
+ $* <"cmd >=out 2>+err" >>EOO
+ 'cmd'
+ >=
+ 'out'
+ '2'
+ >+
+ 'err'
+ <newline>
+ EOO
+}
+
+: cleanup
+:
+{
+ : always
+ :
+ $* <"cmd &file" >>EOO
+ 'cmd'
+ &
+ 'file'
+ <newline>
+ EOO
+
+ : maybe
+ :
+ $* <"cmd &?file" >>EOO
+ 'cmd'
+ &?
+ 'file'
+ <newline>
+ EOO
+
+ : never
+ :
+ $* <"cmd &!file" >>EOO
+ 'cmd'
+ &!
+ 'file'
+ <newline>
+ EOO
+}
diff --git a/libbuild2/build/script/lexer+first-token.test.testscript b/libbuild2/build/script/lexer+first-token.test.testscript
new file mode 100644
index 0000000..6709e60
--- /dev/null
+++ b/libbuild2/build/script/lexer+first-token.test.testscript
@@ -0,0 +1,30 @@
+# file : libbuild2/build/script/lexer+first-token.test.testscript
+# license : MIT; see accompanying LICENSE file
+
+# Note: this mode auto-expires after each token.
+#
+test.arguments = first-token
+
+: assign
+:
+$* <"foo=" >>EOO
+'foo'
+'='
+<newline>
+EOO
+
+: append
+:
+$* <"foo+=" >>EOO
+'foo'
+'+='
+<newline>
+EOO
+
+: prepend
+:
+$* <"foo=+" >>EOO
+'foo'
+'=+'
+<newline>
+EOO
diff --git a/libbuild2/build/script/lexer+second-token.test.testscript b/libbuild2/build/script/lexer+second-token.test.testscript
new file mode 100644
index 0000000..d5f3329
--- /dev/null
+++ b/libbuild2/build/script/lexer+second-token.test.testscript
@@ -0,0 +1,53 @@
+# file : libbuild2/build/script/lexer+second-token.test.testscript
+# license : MIT; see accompanying LICENSE file
+
+# Note: this mode auto-expires after each token.
+#
+test.arguments = second-token
+
+: assign
+:
+$* <"=foo" >>EOO
+=
+'foo'
+<newline>
+EOO
+
+: append
+:
+$* <"+= foo" >>EOO
++=
+'foo'
+<newline>
+EOO
+
+: prepend
+:
+$* <" =+ foo" >>EOO
+=+
+'foo'
+<newline>
+EOO
+
+: assign-leading
+:
+$* <"foo=bar" >>EOO
+'foo=bar'
+<newline>
+EOO
+
+: append-leading
+:
+$* <"foo+= bar" >>EOO
+'foo+='
+'bar'
+<newline>
+EOO
+
+: prepend-leading
+:
+$* <"foo =+bar" >>EOO
+'foo'
+'=+bar'
+<newline>
+EOO
diff --git a/libbuild2/build/script/lexer+variable-line.test.testscript b/libbuild2/build/script/lexer+variable-line.test.testscript
new file mode 100644
index 0000000..e4b5adb
--- /dev/null
+++ b/libbuild2/build/script/lexer+variable-line.test.testscript
@@ -0,0 +1,12 @@
+# file : libbuild2/build/script/lexer+variable-line.test.testscript
+# license : MIT; see accompanying LICENSE file
+
+test.arguments = variable-line
+
+: basic
+:
+$* <"a 'b c'" >>EOO
+'a'
+'b c'
+<newline>
+EOO
diff --git a/libbuild2/build/script/lexer+variable.test.testscript b/libbuild2/build/script/lexer+variable.test.testscript
new file mode 100644
index 0000000..54b0a30
--- /dev/null
+++ b/libbuild2/build/script/lexer+variable.test.testscript
@@ -0,0 +1,25 @@
+# file : libbuild2/build/script/lexer+variable.test.testscript
+# license : MIT; see accompanying LICENSE file
+
+# Test handling custom variable names ($*, $~, $NN).
+#
+test.arguments = variable
+
+: primary-target
+:
+{
+ : only
+ :
+ $* <">" >>EOO
+ '>'
+ <newline>
+ EOO
+
+ : followed
+ :
+ $* <">abc" >>EOO
+ '>'
+ 'abc'
+ <newline>
+ EOO
+}
diff --git a/libbuild2/build/script/lexer.cxx b/libbuild2/build/script/lexer.cxx
new file mode 100644
index 0000000..7b8bdd4
--- /dev/null
+++ b/libbuild2/build/script/lexer.cxx
@@ -0,0 +1,270 @@
+// file : libbuild2/build/script/lexer.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/build/script/lexer.hxx>
+
+using namespace std;
+
+namespace build2
+{
+ namespace build
+ {
+ namespace script
+ {
+ using type = token_type;
+
+ build2::script::redirect_aliases lexer::redirect_aliases {
+ type (type::in_file),
+ type (type::in_doc),
+ type (type::in_str),
+ type (type::out_file_ovr),
+ type (type::out_file_app),
+ nullopt};
+
+ void lexer::
+ mode (build2::lexer_mode m,
+ char ps,
+ optional<const char*> esc,
+ uintptr_t data)
+ {
+ bool a (false); // attributes
+
+ const char* s1 (nullptr);
+ const char* s2 (nullptr);
+
+ bool s (true); // space
+ bool n (true); // newline
+ bool q (true); // quotes
+
+ if (!esc)
+ {
+ assert (!state_.empty ());
+ esc = state_.top ().escapes;
+ }
+
+ switch (m)
+ {
+ case lexer_mode::command_line:
+ {
+ s1 = "=!|&<> $(#\t\n";
+ s2 = "== ";
+ break;
+ }
+ case lexer_mode::first_token:
+ {
+ // First token on the script line. Like command_line but
+ // recognizes variable assignments as separators.
+ //
+ s1 = "=+!|&<> $(#\t\n";
+ s2 = " == ";
+ break;
+ }
+ case lexer_mode::second_token:
+ {
+ // Second token on the script line. Like command_line but
+ // recognizes leading variable assignments.
+ //
+ // Note that to recognize only leading assignments we shouldn't
+ // add them to the separator strings (so this is identical to
+ // command_line).
+ //
+ s1 = "=!|&<> $(#\t\n";
+ s2 = "== ";
+ break;
+ }
+ case lexer_mode::variable_line:
+ {
+ // Like value except we don't recognize '{'.
+ //
+ s1 = " $(#\t\n";
+ s2 = " ";
+ break;
+ }
+ default:
+ {
+ base_lexer::mode (m, ps, esc);
+ return;
+ }
+ }
+
+ assert (ps == '\0');
+ state_.push (state {m, data, nullopt, a, ps, s, n, q, *esc, s1, s2});
+ }
+
+ token lexer::
+ next ()
+ {
+ token r;
+
+ switch (state_.top ().mode)
+ {
+ case lexer_mode::command_line:
+ case lexer_mode::first_token:
+ case lexer_mode::second_token:
+ case lexer_mode::variable_line:
+ r = next_line ();
+ break;
+ default: return base_lexer::next ();
+ }
+
+ if (r.qtype != quote_type::unquoted)
+ ++quoted_;
+
+ return r;
+ }
+
+ token lexer::
+ next_line ()
+ {
+ bool sep (skip_spaces ().first);
+
+ xchar c (get ());
+ uint64_t ln (c.line), cn (c.column);
+
+ state st (state_.top ()); // Make copy (see first/second_token).
+ lexer_mode m (st.mode);
+
+ auto make_token = [&sep, ln, cn] (type t)
+ {
+ return token (t, sep, ln, cn, token_printer);
+ };
+
+ // Handle attributes (do it first to make sure the flag is cleared
+ // regardless of what we return).
+ //
+ if (st.attributes)
+ {
+ assert (m == lexer_mode::variable_line);
+
+ state_.top ().attributes = false;
+
+ if (c == '[')
+ return make_token (type::lsbrace);
+ }
+
+ if (eos (c))
+ return make_token (type::eos);
+
+ // Expire certain modes at the end of the token. Do it early in case
+ // we push any new mode (e.g., double quote).
+ //
+ if (m == lexer_mode::first_token || m == lexer_mode::second_token)
+ state_.pop ();
+
+ // NOTE: remember to update mode() if adding new special characters.
+
+ switch (c)
+ {
+ case '\n':
+ {
+ // Expire variable value mode at the end of the line.
+ //
+ if (m == lexer_mode::variable_line)
+ state_.pop ();
+
+ sep = true; // Treat newline as always separated.
+ return make_token (type::newline);
+ }
+
+ // Variable expansion, function call, and evaluation context.
+ //
+ case '$': return make_token (type::dollar);
+ case '(': return make_token (type::lparen);
+ }
+
+ // Command line operator/separators.
+ //
+ if (m == lexer_mode::command_line ||
+ m == lexer_mode::first_token ||
+ m == lexer_mode::second_token)
+ {
+ switch (c)
+ {
+ // Comparison (==, !=).
+ //
+ case '=':
+ case '!':
+ {
+ if (peek () == '=')
+ {
+ get ();
+ return make_token (c == '=' ? type::equal : type::not_equal);
+ }
+ }
+ }
+ }
+
+ // Command operators.
+ //
+ if (m == lexer_mode::command_line ||
+ m == lexer_mode::first_token ||
+ m == lexer_mode::second_token)
+ {
+ if (optional<token> t = next_cmd_op (c, sep))
+ return move (*t);
+ }
+
+ // Variable assignment (=, +=, =+).
+ //
+ if (m == lexer_mode::second_token)
+ {
+ switch (c)
+ {
+ case '=':
+ {
+ if (peek () == '+')
+ {
+ get ();
+ return make_token (type::prepend);
+ }
+ else
+ return make_token (type::assign);
+ }
+ case '+':
+ {
+ if (peek () == '=')
+ {
+ get ();
+ return make_token (type::append);
+ }
+ }
+ }
+ }
+
+ // Otherwise it is a word.
+ //
+ unget (c);
+ return word (st, sep);
+ }
+
+ token lexer::
+ word (state st, bool sep)
+ {
+ lexer_mode m (st.mode);
+
+ // Customized implementation that handles special variable names ($>,
+ // $<, $~).
+ //
+ // @@ TODO: $(<), $(>): feels like this will have to somehow be
+ // handled at the top-level lexer level. Maybe provide a
+ // string of one-char special variable names as state::data?
+ //
+ if (m != lexer_mode::variable)
+ return base_lexer::word (st, sep);
+
+ xchar c (peek ());
+
+ if (c != '>' && c != '<' && c != '~')
+ return base_lexer::word (st, sep);
+
+ get ();
+
+ state_.pop (); // Expire the variable mode.
+ return token (string (1, c),
+ sep,
+ quote_type::unquoted, false,
+ c.line, c.column);
+ }
+ }
+ }
+}
diff --git a/libbuild2/build/script/lexer.hxx b/libbuild2/build/script/lexer.hxx
new file mode 100644
index 0000000..7d919e5
--- /dev/null
+++ b/libbuild2/build/script/lexer.hxx
@@ -0,0 +1,80 @@
+// file : libbuild2/build/script/lexer.hxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_BUILD_SCRIPT_LEXER_HXX
+#define LIBBUILD2_BUILD_SCRIPT_LEXER_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/script/lexer.hxx>
+
+#include <libbuild2/build/script/token.hxx>
+
+namespace build2
+{
+ namespace build
+ {
+ namespace script
+ {
+ struct lexer_mode: build2::script::lexer_mode
+ {
+ using base_type = build2::script::lexer_mode;
+
+ enum
+ {
+ command_line = base_type::value_next,
+ first_token, // Expires at the end of the token.
+ second_token, // Expires at the end of the token.
+ variable_line // Expires at the end of the line.
+ };
+
+ lexer_mode () = default;
+ lexer_mode (value_type v): base_type (v) {}
+ lexer_mode (build2::lexer_mode v): base_type (v) {}
+ };
+
+ class lexer: public build2::script::lexer
+ {
+ public:
+ using base_lexer = build2::script::lexer;
+
+ // Note that neither the name nor escape arguments are copied.
+ //
+ lexer (istream& is,
+ const path_name& name,
+ uint64_t line, // Start line in the stream.
+ lexer_mode m,
+ const char* escapes = nullptr)
+ : base_lexer (is, name, line,
+ nullptr /* escapes */,
+ false /* set_mode */,
+ redirect_aliases)
+ {
+ mode (m, '\0', escapes);
+ }
+
+ virtual void
+ mode (build2::lexer_mode,
+ char = '\0',
+ optional<const char*> = nullopt,
+ uintptr_t = 0) override;
+
+ virtual token
+ next () override;
+
+ public:
+ static redirect_aliases_type redirect_aliases;
+
+ private:
+ token
+ next_line ();
+
+ virtual token
+ word (state, bool) override;
+ };
+ }
+ }
+}
+
+#endif // LIBBUILD2_BUILD_SCRIPT_LEXER_HXX
diff --git a/libbuild2/build/script/lexer.test.cxx b/libbuild2/build/script/lexer.test.cxx
new file mode 100644
index 0000000..1c47442
--- /dev/null
+++ b/libbuild2/build/script/lexer.test.cxx
@@ -0,0 +1,77 @@
+// file : libbuild2/build/script/lexer.test.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <cassert>
+#include <iostream>
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/build/script/token.hxx>
+#include <libbuild2/build/script/lexer.hxx>
+
+using namespace std;
+
+namespace build2
+{
+ namespace build
+ {
+ namespace script
+ {
+ // Usage: argv[0] <lexer-mode>
+ //
+ int
+ main (int argc, char* argv[])
+ {
+ lexer_mode m;
+ {
+ assert (argc == 2);
+ string s (argv[1]);
+
+ if (s == "command-line") m = lexer_mode::command_line;
+ else if (s == "first-token") m = lexer_mode::first_token;
+ else if (s == "second-token") m = lexer_mode::second_token;
+ else if (s == "variable-line") m = lexer_mode::variable_line;
+ else if (s == "variable") m = lexer_mode::variable;
+ else assert (false);
+ }
+
+ try
+ {
+ cin.exceptions (istream::failbit | istream::badbit);
+
+ // Some modes auto-expire so we need something underneath.
+ //
+ bool u (m != lexer_mode::command_line);
+
+ path_name in ("<stdin>");
+ lexer l (cin, in, 1 /* line */, lexer_mode::command_line);
+ if (u)
+ l.mode (m);
+
+ // No use printing eos since we will either get it or loop forever.
+ //
+ for (token t (l.next ()); t.type != token_type::eos; t = l.next ())
+ {
+ // Print each token on a separate line without quoting operators.
+ //
+ t.printer (cout, t, print_mode::normal);
+ cout << endl;
+ }
+ }
+ catch (const failed&)
+ {
+ return 1;
+ }
+
+ return 0;
+ }
+ }
+ }
+}
+
+int
+main (int argc, char* argv[])
+{
+ return build2::build::script::main (argc, argv);
+}
diff --git a/libbuild2/build/script/parser+cleanup.test.testscript b/libbuild2/build/script/parser+cleanup.test.testscript
new file mode 100644
index 0000000..9a5af3d
--- /dev/null
+++ b/libbuild2/build/script/parser+cleanup.test.testscript
@@ -0,0 +1,57 @@
+# file : libbuild2/build/script/parser+cleanup.test.testscript
+# license : MIT; see accompanying LICENSE file
+
+: always
+:
+$* <<EOI >>EOO
+cmd &file
+EOI
+cmd &file
+EOO
+
+: maybe
+:
+$* <<EOI >>EOO
+cmd &?file
+EOI
+cmd &?file
+EOO
+
+: never
+:
+$* <<EOI >>EOO
+cmd &!file
+EOI
+cmd &!file
+EOO
+
+: empty
+:
+$* <<EOI 2>>EOE != 0
+cmd &""
+EOI
+buildfile:11:6: error: empty cleanup path
+EOE
+
+: missed-before
+:
+{
+ : token
+ :
+ : Path missed before command next token
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd & >file
+ EOI
+ buildfile:11:7: error: missing cleanup path
+ EOE
+
+ : end
+ : Test path missed before end of command
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd &
+ EOI
+ buildfile:11:6: error: missing cleanup path
+ EOE
+}
diff --git a/libbuild2/build/script/parser+command-if.test.testscript b/libbuild2/build/script/parser+command-if.test.testscript
new file mode 100644
index 0000000..a18a885
--- /dev/null
+++ b/libbuild2/build/script/parser+command-if.test.testscript
@@ -0,0 +1,395 @@
+# file : libbuild2/build/script/parser+command-if.test.testscript
+# license : MIT; see accompanying LICENSE file
+
+: if
+:
+{
+ : true
+ :
+ $* <<EOI >>EOO
+ if true foo
+ cmd1
+ cmd2
+ end
+ EOI
+ ? true foo
+ cmd1
+ cmd2
+ EOO
+
+ : false
+ :
+ $* <<EOI >>EOO
+ if false foo
+ cmd1
+ cmd2
+ end
+ EOI
+ ? false foo
+ EOO
+
+ : not-true
+ :
+ $* <<EOI >>EOO
+ if! true foo
+ cmd1
+ cmd2
+ end
+ EOI
+ ? true foo
+ EOO
+
+ : not-false
+ :
+ $* <<EOI >>EOO
+ if! false foo
+ cmd1
+ cmd2
+ end
+ EOI
+ ? false foo
+ cmd1
+ cmd2
+ EOO
+
+ : without-command
+ :
+ $* <<EOI 2>>EOE != 0
+ if
+ cmd
+ end
+ EOI
+ buildfile:11:3: error: missing program
+ EOE
+}
+
+: elif
+:
+{
+ : true
+ :
+ $* <<EOI >>EOO
+ if false
+ cmd1
+ cmd2
+ elif true
+ cmd3
+ cmd4
+ end
+ EOI
+ ? false
+ ? true
+ cmd3
+ cmd4
+ EOO
+
+ : false
+ :
+ $* <<EOI >>EOO
+ if false
+ cmd1
+ cmd2
+ elif false
+ cmd3
+ cmd4
+ end
+ EOI
+ ? false
+ ? false
+ EOO
+
+ : not-true
+ :
+ $* <<EOI >>EOO
+ if false
+ cmd1
+ cmd2
+ elif! true
+ cmd3
+ cmd4
+ end
+ EOI
+ ? false
+ ? true
+ EOO
+
+ : not-false
+ :
+ $* <<EOI >>EOO
+ if false
+ cmd1
+ cmd2
+ elif! false
+ cmd3
+ cmd4
+ end
+ EOI
+ ? false
+ ? false
+ cmd3
+ cmd4
+ EOO
+
+ : without-if
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd
+ elif true
+ cmd
+ end
+ EOI
+ buildfile:12:1: error: 'elif' without preceding 'if'
+ EOE
+
+ : not-without-if
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd
+ elif! true
+ cmd
+ end
+ EOI
+ buildfile:12:1: error: 'elif!' without preceding 'if'
+ EOE
+
+ : after-else
+ :
+ $* <<EOI 2>>EOE != 0
+ if false
+ cmd
+ else
+ cmd
+ elif true
+ cmd
+ end
+ EOI
+ buildfile:15:1: error: 'elif' after 'else'
+ EOE
+}
+
+: else
+:
+{
+ : true
+ :
+ $* <<EOI >>EOO
+ if false
+ cmd1
+ cmd2
+ else
+ cmd3
+ cmd4
+ end
+ EOI
+ ? false
+ cmd3
+ cmd4
+ EOO
+
+ : false
+ :
+ $* <<EOI >>EOO
+ if true
+ cmd1
+ cmd2
+ else
+ cmd3
+ cmd4
+ end
+ EOI
+ ? true
+ cmd1
+ cmd2
+ EOO
+
+ : chain
+ :
+ $* <<EOI >>EOO
+ if false
+ cmd
+ cmd
+ elif false
+ cmd
+ cmd
+ elif false
+ cmd
+ cmd
+ elif true
+ cmd1
+ cmd2
+ elif false
+ cmd
+ cmd
+ else
+ cmd
+ cmd
+ end
+ EOI
+ ? false
+ ? false
+ ? false
+ ? true
+ cmd1
+ cmd2
+ EOO
+
+ : command-after
+ :
+ $* <<EOI 2>>EOE != 0
+ if true
+ cmd
+ else cmd
+ cmd
+ end
+ EOI
+ buildfile:13:6: error: expected newline instead of 'cmd'
+ EOE
+
+ : without-if
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd
+ else
+ cmd
+ end
+ EOI
+ buildfile:12:1: error: 'else' without preceding 'if'
+ EOE
+
+ : after-else
+ :
+ $* <<EOI 2>>EOE != 0
+ if false
+ cmd
+ else
+ cmd
+ else
+ cmd
+ end
+ EOI
+ buildfile:15:1: error: 'else' after 'else'
+ EOE
+}
+
+: end
+{
+ : without-if
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd
+ end
+ EOI
+ buildfile:12:1: error: 'end' without preceding 'if'
+ EOE
+
+ : before
+ {
+ : command
+ :
+ $* <<EOI 2>>EOE != 0
+ if true
+ cmd
+ end cmd
+ EOI
+ buildfile:13:5: error: expected newline instead of 'cmd'
+ EOE
+ }
+}
+
+: nested
+:
+{
+ : take
+ :
+ $* <<EOI >>EOO
+ if true
+ cmd1
+ if false
+ cmd
+ elif false
+ if true
+ cmd
+ end
+ else
+ cmd2
+ end
+ cmd3
+ end
+ EOI
+ ? true
+ cmd1
+ ? false
+ ? false
+ cmd2
+ cmd3
+ EOO
+
+ : skip
+ :
+ $* <<EOI >>EOO
+ if false
+ cmd1
+ if false
+ cmd
+ elif false
+ if true
+ cmd
+ end
+ else
+ cmd2
+ end
+ cmd3
+ else
+ cmd
+ end
+ EOI
+ ? false
+ cmd
+ EOO
+}
+
+: contained
+:
+{
+ : eos
+ :
+ $* <<EOI 2>>EOE != 0
+ if
+ EOI
+ buildfile:12:1: error: expected closing 'end'
+ EOE
+}
+
+: line-index
+:
+$* -l <<EOI >>EOO
+if false
+ cmd
+ if true
+ cmd
+ end
+ cmd
+elif false
+ cmd
+else
+ cmd
+end
+EOI
+? false # 1
+? false # 6
+cmd # 8
+EOO
+
+: var
+:
+$* <<EOI >>EOO
+if true
+ x = foo
+else
+ x = bar
+end
+cmd $x
+EOI
+? true
+cmd foo
+EOO
diff --git a/libbuild2/build/script/parser+command-re-parse.test.testscript b/libbuild2/build/script/parser+command-re-parse.test.testscript
new file mode 100644
index 0000000..a59b49c
--- /dev/null
+++ b/libbuild2/build/script/parser+command-re-parse.test.testscript
@@ -0,0 +1,11 @@
+# file : libbuild2/build/script/parser+command-re-parse.test.testscript
+# license : MIT; see accompanying LICENSE file
+
+: double-quote
+:
+$* <<EOI >>EOO
+x = cmd \">-\" "'<-'"
+$x
+EOI
+cmd '>-' '<-'
+EOO
diff --git a/libbuild2/build/script/parser+exit.test.testscript b/libbuild2/build/script/parser+exit.test.testscript
new file mode 100644
index 0000000..53ee1b9
--- /dev/null
+++ b/libbuild2/build/script/parser+exit.test.testscript
@@ -0,0 +1,26 @@
+# file : libbuild2/build/script/parser+exit.test.testscript
+# license : MIT; see accompanying LICENSE file
+
+: eq
+:
+$* <<EOI >>EOO
+cmd == 1
+EOI
+cmd == 1
+EOO
+
+: ne
+:
+$* <<EOI >>EOO
+cmd!=1
+EOI
+cmd != 1
+EOO
+
+: end
+:
+$* <<EOI 2>>EOE != 0
+cmd != 1 <"foo"
+EOI
+buildfile:11:10: error: expected newline instead of '<'
+EOE
diff --git a/libbuild2/build/script/parser+expansion.test.testscript b/libbuild2/build/script/parser+expansion.test.testscript
new file mode 100644
index 0000000..9f1e774
--- /dev/null
+++ b/libbuild2/build/script/parser+expansion.test.testscript
@@ -0,0 +1,35 @@
+# file : libbuild2/build/script/parser+expansion.test.testscript
+# license : MIT; see accompanying LICENSE file
+
+: quote
+:
+: Make sure everything expanded as strings.
+:
+$* <<EOI >>EOO
+x = dir/ proj% proj%name proj%proj%dir/type{name name {name}}
+cmd dir/ proj% proj%name proj%proj%dir/type{name name {name}}
+cmd $x
+EOI
+cmd dir/ proj% proj%name proj%proj%dir/type{name name {name}}
+cmd dir/ proj% proj%name proj%proj%dir/type{name name {name}}
+EOO
+
+: unterm-quoted-seq
+:
+$* <<EOI 2>>EOE != 0
+x = "'a bc"
+cmd xy$x
+EOI
+<string>:1:8: error: unterminated single-quoted sequence
+ buildfile:12:5: info: while parsing string 'xy'a bc'
+EOE
+
+: invalid-redirect
+:
+$* <<EOI 2>>EOE != 0
+x = "1>&a"
+cmd $x
+EOI
+<string>:1:4: error: stdout merge redirect file descriptor must be 2
+ buildfile:12:5: info: while parsing string '1>&a'
+EOE
diff --git a/libbuild2/build/script/parser+here-document.test.testscript b/libbuild2/build/script/parser+here-document.test.testscript
new file mode 100644
index 0000000..f56a5e1
--- /dev/null
+++ b/libbuild2/build/script/parser+here-document.test.testscript
@@ -0,0 +1,272 @@
+# file : libbuild2/build/script/parser+here-document.test.testscript
+# license : MIT; see accompanying LICENSE file
+
+: end-marker
+:
+{
+ : missing-newline
+ :
+ $* <'cmd <<=' 2>>EOE != 0
+ buildfile:11:8: error: expected here-document end marker
+ EOE
+
+ : missing-newline-alias
+ :
+ $* <'cmd <<' 2>>EOE != 0
+ buildfile:11:7: error: expected here-document end marker
+ EOE
+
+ : missing-exit
+ :
+ $* <'cmd <<= != 0' 2>>EOE != 0
+ buildfile:11:9: error: expected here-document end marker
+ EOE
+
+ : missing-exit-alias
+ :
+ $* <'cmd << != 0' 2>>EOE != 0
+ buildfile:11:8: error: expected here-document end marker
+ EOE
+
+ : missing-empty
+ :
+ $* <'cmd <<=""' 2>>EOE != 0
+ buildfile:11:8: error: expected here-document end marker
+ EOE
+
+ : missing-empty-alias
+ :
+ $* <'cmd <<""' 2>>EOE != 0
+ buildfile:11:7: error: expected here-document end marker
+ EOE
+
+ : unseparated-expansion
+ :
+ $* <'cmd <<=FOO$foo' 2>>EOE != 0
+ buildfile:11:11: error: here-document end marker must be literal
+ EOE
+
+ : unseparated-expansion-alias
+ :
+ $* <'cmd <<FOO$foo' 2>>EOE != 0
+ buildfile:11:10: error: here-document end marker must be literal
+ EOE
+
+ : quoted-single-partial
+ :
+ $* <"cmd <<=F'O'O" 2>>EOE != 0
+ buildfile:11:8: error: partially-quoted here-document end marker
+ EOE
+
+ : quoted-double-partial
+ :
+ $* <'cmd <<="FO"O' 2>>EOE != 0
+ buildfile:11:8: error: partially-quoted here-document end marker
+ EOE
+
+ : quoted-mixed
+ :
+ $* <"cmd <<=\"FO\"'O'" 2>>EOE != 0
+ buildfile:11:8: error: partially-quoted here-document end marker
+ EOE
+
+ : unseparated
+ :
+ $* <<EOI >>EOO
+ cmd <<=EOF!=0
+ foo
+ EOF
+ EOI
+ cmd <<=EOF != 0
+ foo
+ EOF
+ EOO
+
+ : unseparated-alias
+ :
+ $* <<EOI >>EOO
+ cmd <<EOF!=0
+ foo
+ EOF
+ EOI
+ cmd <<EOF != 0
+ foo
+ EOF
+ EOO
+
+ : quoted-single
+ :
+ $* <<EOI >>EOO
+ cmd <<='EOF'
+ foo
+ EOF
+ EOI
+ cmd <<=EOF
+ foo
+ EOF
+ EOO
+
+ : quoted-single-alias
+ :
+ $* <<EOI >>EOO
+ cmd <<'EOF'
+ foo
+ EOF
+ EOI
+ cmd <<EOF
+ foo
+ EOF
+ EOO
+
+ : quoted-double
+ :
+ $* <<EOI >>EOO
+ cmd <<="EOF"
+ foo
+ EOF
+ EOI
+ cmd <<=EOF
+ foo
+ EOF
+ EOO
+
+ : quoted-double-alias
+ :
+ $* <<EOI >>EOO
+ cmd <<"EOF"
+ foo
+ EOF
+ EOI
+ cmd <<EOF
+ foo
+ EOF
+ EOO
+}
+
+: indent
+:
+{
+ : basic
+ :
+ $* <<EOI >>EOO
+ cmd <<=EOF
+ foo
+ bar
+ baz
+ EOF
+ EOI
+ cmd <<=EOF
+ foo
+ bar
+ baz
+ EOF
+ EOO
+
+ : blank
+ :
+ $* <<EOI >>EOO
+ cmd <<=EOF
+ foo
+
+
+ bar
+ EOF
+ EOI
+ cmd <<=EOF
+ foo
+
+
+ bar
+ EOF
+ EOO
+
+ : non-ws-prefix
+ :
+ $* <<EOI >>EOO
+ cmd <<=EOF
+ x EOF
+ EOF
+ EOI
+ cmd <<=EOF
+ x EOF
+ EOF
+ EOO
+
+ : whole-token
+ : Test the case where the indentation is a whole token
+ :
+ $* <<EOI >>EOO
+ x = foo bar
+ cmd <<="EOF"
+ $x
+ EOF
+ EOI
+ cmd <<=EOF
+ foo bar
+ EOF
+ EOO
+
+ : long-line
+ : Test the case where the line contains multiple tokens
+ :
+ $* <<EOI >>EOO
+ x = foo
+ cmd <<="EOF"
+ $x bar $x
+ EOF
+ EOI
+ cmd <<=EOF
+ foo bar foo
+ EOF
+ EOO
+
+ : unindented
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd <<=EOF
+ bar
+ EOF
+ EOI
+ buildfile:12:1: error: unindented here-document line
+ EOE
+}
+
+: blank
+:
+$* <<EOI >>EOO
+cmd <<=EOF
+
+foo
+
+bar
+
+EOF
+EOI
+cmd <<=EOF
+
+foo
+
+bar
+
+EOF
+EOO
+
+: quote
+:
+: Note: they are still recognized in eval contexts.
+:
+$* <<EOI >>EOO
+cmd <<="EOF"
+'single'
+"double"
+b'o't"h"
+('single' "double")
+EOF
+EOI
+cmd <<=EOF
+'single'
+"double"
+b'o't"h"
+single double
+EOF
+EOO
diff --git a/libbuild2/build/script/parser+here-string.test.testscript b/libbuild2/build/script/parser+here-string.test.testscript
new file mode 100644
index 0000000..f857c57
--- /dev/null
+++ b/libbuild2/build/script/parser+here-string.test.testscript
@@ -0,0 +1,34 @@
+# file : libbuild2/build/script/parser+here-string.test.testscript
+# license : MIT; see accompanying LICENSE file
+
+: empty
+:
+$* <<EOI >>EOO
+cmd <<<=""
+EOI
+cmd <<<=''
+EOO
+
+: empty-nn
+:
+$* <<EOI >>EOO
+cmd <<<=:""
+EOI
+cmd <<<=:''
+EOO
+
+: empty-alias
+:
+$* <<EOI >>EOO
+cmd <<<""
+EOI
+cmd <<<''
+EOO
+
+: empty-nn-alias
+:
+$* <<EOI >>EOO
+cmd <<<:""
+EOI
+cmd <<<:''
+EOO
diff --git a/libbuild2/build/script/parser+line.test.testscript b/libbuild2/build/script/parser+line.test.testscript
new file mode 100644
index 0000000..6401d91
--- /dev/null
+++ b/libbuild2/build/script/parser+line.test.testscript
@@ -0,0 +1,72 @@
+# file : libbuild2/build/script/parser+line.test.testscript
+# license : MIT; see accompanying LICENSE file
+
+test.options += -d
+
+: command
+:
+$* <<EOF >>EOF
+ foo >| 2>- &a &?b
+ foo >=c 2>~/error:.*/ &!c
+ foo >>:/~%EOS%
+ %.*
+ abc
+ %xyz.*%
+ EOS
+ EOF
+
+: if-else
+:
+$* <<EOF >>EOF
+ if foo
+ bar
+ elif fox
+ if fix
+ baz
+ end
+ biz
+ end
+ if! foo
+ bar
+ elif! fox
+ baz
+ end
+ EOF
+
+: quoting
+:
+$* <<EOI >>EOO
+ foo 'bar' "baz" '' ""
+ "$foo"
+ "foo$"
+ "fo"o
+ "foo"\"
+ "foo\\"
+ "foo\"<"
+ fo\"o
+ fo\\o
+ fo\<o
+ "fo<o"
+ 'fo\"o'
+ f"oo" "ba"r
+ f"oo" 'ba'r
+ "fo"'o'
+ 'foo b"ar baz'
+ EOI
+ foo 'bar' "baz" '' ""
+ "$foo"
+ "foo$"
+ "foo"
+ "foo\""
+ "foo\\"
+ "foo\"<"
+ fo\"o
+ fo\\o
+ fo\<o
+ "fo<o"
+ 'fo\"o'
+ "foo bar"
+ "foo" 'bar'
+ "foo"
+ 'foo b"ar baz'
+ EOO
diff --git a/libbuild2/build/script/parser+pipe-expr.test.testscript b/libbuild2/build/script/parser+pipe-expr.test.testscript
new file mode 100644
index 0000000..a6ca12e
--- /dev/null
+++ b/libbuild2/build/script/parser+pipe-expr.test.testscript
@@ -0,0 +1,132 @@
+# file : libbuild2/build/script/parser+pipe-expr.test.testscript
+# license : MIT; see accompanying LICENSE file
+
+: pipe
+:
+$* <<EOI >>EOO
+cmd1 | cmd2|cmd3
+EOI
+cmd1 | cmd2 | cmd3
+EOO
+
+: log
+:
+$* <<EOI >>EOO
+cmd1 || cmd2&&cmd3
+EOI
+cmd1 || cmd2 && cmd3
+EOO
+
+: pipe-log
+:
+$* <<EOI >>EOO
+cmd1 | cmd2 && cmd3 | cmd4
+EOI
+cmd1 | cmd2 && cmd3 | cmd4
+EOO
+
+: exit
+:
+$* <<EOI >>EOO
+cmd1|cmd2==1&&cmd3!=0|cmd4
+EOI
+cmd1 | cmd2 == 1 && cmd3 != 0 | cmd4
+EOO
+
+: here-doc
+:
+$* <<EOI >>EOO
+cmd1 <<=EOI1 | cmd2 >>?EOO2 && cmd3 <<=EOI3 2>&1 | cmd4 2>>?EOE4 >>?EOO4
+input
+one
+EOI1
+ouput
+two
+EOO2
+input
+three
+EOI3
+error
+four
+EOE4
+output
+four
+EOO4
+EOI
+cmd1 <<=EOI1 | cmd2 >>?EOO2 && cmd3 <<=EOI3 2>&1 | cmd4 >>?EOO4 2>>?EOE4
+input
+one
+EOI1
+ouput
+two
+EOO2
+input
+three
+EOI3
+output
+four
+EOO4
+error
+four
+EOE4
+EOO
+
+: leading
+:
+$* <<EOI 2>>EOE != 0
+| cmd
+EOI
+buildfile:11:1: error: missing program
+EOE
+
+: trailing
+:
+$* <<EOI 2>>EOE != 0
+cmd &&
+EOI
+buildfile:11:7: error: missing program
+EOE
+
+: redirected
+:
+{
+ : input
+ :
+ {
+ : first
+ :
+ $* <<EOI >>EOO
+ cmd1 <foo | cmd2
+ EOI
+ cmd1 <foo | cmd2
+ EOO
+
+ : non-first
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd1 | cmd2 <foo
+ EOI
+ buildfile:11:13: error: stdin is both piped and redirected
+ EOE
+ }
+
+ : output
+ :
+ {
+ : last
+ :
+ $* <<EOI >>EOO
+ cmd1 | cmd2 >foo
+ EOI
+ cmd1 | cmd2 >foo
+ EOO
+
+ : non-last
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd1 >foo | cmd2
+ EOI
+ buildfile:11:11: error: stdout is both redirected and piped
+ EOE
+ }
+}
diff --git a/libbuild2/build/script/parser+pre-parse.test.testscript b/libbuild2/build/script/parser+pre-parse.test.testscript
new file mode 100644
index 0000000..4aff3e8
--- /dev/null
+++ b/libbuild2/build/script/parser+pre-parse.test.testscript
@@ -0,0 +1,22 @@
+# file : libbuild2/build/script/parser+pre-parse.test.testscript
+# license : MIT; see accompanying LICENSE file
+
+: attribute
+:
+{
+ : name
+ :
+ $* <<EOI 2>>EOE != 0
+ x = [foo]
+ EOI
+ buildfile:11:5: error: unknown value attribute foo
+ EOE
+
+ : name-value
+ :
+ $* <<EOI 2>>EOE != 0
+ x = [foo=bar]
+ EOI
+ buildfile:11:5: error: unknown value attribute foo=bar
+ EOE
+}
diff --git a/libbuild2/build/script/parser+redirect.test.testscript b/libbuild2/build/script/parser+redirect.test.testscript
new file mode 100644
index 0000000..82c04ea
--- /dev/null
+++ b/libbuild2/build/script/parser+redirect.test.testscript
@@ -0,0 +1,525 @@
+# file : libbuild2/build/script/parser+redirect.test.testscript
+# license : MIT; see accompanying LICENSE file
+
+# @@ Add tests for redirects other than trace, here-*, file and merge.
+# @@ Does it make sense to split into separate files - one per redirect type?
+#
+
+: trace
+:
+{
+ $* <'cmd >!' >'cmd >!' : out
+ $* <'cmd 2>!' >'cmd 2>!' : err
+}
+
+: str
+:
+{
+ : literal
+ :
+ {
+ : portable-path
+ :
+ $* <<EOI >>EOO
+ cmd <<<=/foo >>>?/bar 2>>>?/baz
+ EOI
+ cmd <<<=/foo >>>?/bar 2>>>?/baz
+ EOO
+ }
+
+ : regex
+ :
+ {
+ : portable-path
+ :
+ $* <<EOI >>EOO
+ cmd >>>?/~%foo% 2>>>?/~%bar%
+ EOI
+ cmd >>>?/~%foo% 2>>>?/~%bar%
+ EOO
+ }
+}
+
+: doc
+:
+{
+ : literal
+ :
+ {
+ : portable-path
+ :
+ $* <<EOI >>EOO
+ cmd <<=/EOI_ >>?/EOO_ 2>>?/EOE_
+ foo
+ EOI_
+ bar
+ EOO_
+ baz
+ EOE_
+ EOI
+ cmd <<=/EOI_ >>?/EOO_ 2>>?/EOE_
+ foo
+ EOI_
+ bar
+ EOO_
+ baz
+ EOE_
+ EOO
+
+ : sharing
+ :
+ {
+ : in-out
+ :
+ $* <<EOI >>EOO
+ cmd <<=:/EOF >>?:/EOF
+ foo
+ EOF
+ EOI
+ cmd <<=:/EOF >>?:/EOF
+ foo
+ EOF
+ EOO
+
+ : in-alias-out
+ :
+ $* <<EOI >>EOO
+ cmd <<:/EOF >>?:/EOF
+ foo
+ EOF
+ EOI
+ cmd <<:/EOF >>?:/EOF
+ foo
+ EOF
+ EOO
+
+ : out-in-alias
+ :
+ $* <<EOI >>EOO
+ cmd >>?:/EOF <<:/EOF
+ foo
+ EOF
+ EOI
+ cmd <<:/EOF >>?:/EOF
+ foo
+ EOF
+ EOO
+
+ : different
+ :
+ {
+ : modifiers
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd <<=:/EOF >>?:EOF
+ foo
+ EOF
+ EOI
+ buildfile:11:18: error: different modifiers for shared here-document 'EOF'
+ EOE
+
+ : quoting
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd <<=EOF >>?"EOF"
+ foo
+ EOF
+ EOI
+ buildfile:11:15: error: different quoting for shared here-document 'EOF'
+ EOE
+ }
+ }
+ }
+
+ : regex
+ :
+ {
+ : portable-path
+ :
+ $* <<EOI >>EOO
+ cmd >>?/~%EOF% 2>>?/~%EOE%
+ foo
+ EOF
+ bar
+ EOE
+ EOI
+ cmd >>?/~%EOF% 2>>?/~%EOE%
+ foo
+ EOF
+ bar
+ EOE
+ EOO
+
+ : sharing
+ :
+ {
+ : in-out
+ :
+ $* <<EOI >>EOO
+ cmd >>?~/EOF/ 2>>?~/EOF/
+ foo
+ EOF
+ EOI
+ cmd >>?~/EOF/ 2>>?~/EOF/
+ foo
+ EOF
+ EOO
+
+ : different
+ :
+ {
+ : introducers
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd >>?~/EOF/ 2>>?~%EOF%
+ foo
+ EOF
+ EOI
+ buildfile:11:20: error: different introducers for shared here-document regex 'EOF'
+ EOE
+
+ : flags
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd >>?~/EOF/ 2>>?~/EOF/i
+ foo
+ EOF
+ EOI
+ buildfile:11:20: error: different global flags for shared here-document regex 'EOF'
+ EOE
+ }
+ }
+ }
+
+ : overriding
+ :
+ {
+ : literal
+ :
+ {
+ : with
+ :
+ {
+ : string
+ :
+ $* <<EOI >>EOO
+ cmd >>?EOF >>>?bar
+ foo
+ EOF
+ EOI
+ cmd >>>?bar
+ EOO
+
+ : regex
+ :
+ $* <<EOI >>EOO
+ cmd >>?FOO >>?~/BAR/
+ foo
+ FOO
+ bar
+ BAR
+ EOI
+ cmd >>?~/BAR/
+ bar
+ BAR
+ EOO
+
+ : self
+ :
+ $* <<EOI >>EOO
+ cmd >>EOF >>EOF
+ foo
+ EOF
+ EOI
+ cmd >>EOF
+ foo
+ EOF
+ EOO
+
+ : different-modifiers
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd >>?EOF >>?/EOF
+ foo
+ EOF
+ EOI
+ buildfile:11:16: error: different modifiers for shared here-document 'EOF'
+ EOE
+ }
+ }
+
+ : shared
+ :
+ {
+ : after-sharing
+ :
+ $* <<EOI >>EOO
+ cmd >>EOF 2>>EOF >bar
+ foo
+ EOF
+ EOI
+ cmd >bar 2>>EOF
+ foo
+ EOF
+ EOO
+
+ : before-sharing
+ :
+ $* <<EOI >>EOO
+ cmd >>EOF >bar 2>>EOF
+ foo
+ EOF
+ EOI
+ cmd >bar 2>>EOF
+ foo
+ EOF
+ EOO
+ }
+ }
+}
+
+: file
+:
+{
+ : cmp
+ :
+ $* <<EOI >>EOO
+ cmd 0<=a 1>?b 2>?c
+ EOI
+ cmd <=a >?b 2>?c
+ EOO
+
+ : write
+ :
+ $* <<EOI >>EOO
+ cmd 1>=b 2>+c
+ EOI
+ cmd >=b 2>+c
+ EOO
+
+ : quote
+ :
+ $* <<EOI >>EOO
+ cmd 0<="a f" 1>="b f" 2>+"c f"
+ EOI
+ cmd <='a f' >='b f' 2>+'c f'
+ EOO
+
+ : in
+ :
+ {
+ : missed
+ :
+ $* <<EOI 2>>EOE !=0
+ cmd <=
+ EOI
+ buildfile:11:7: error: missing stdin file
+ EOE
+
+ : empty
+ :
+ $* <<EOI 2>>EOE !=0
+ cmd <=""
+ EOI
+ buildfile:11:7: error: empty stdin redirect path
+ EOE
+ }
+
+ : in-alias
+ :
+ {
+ : missed
+ :
+ $* <<EOI 2>>EOE !=0
+ cmd <
+ EOI
+ buildfile:11:6: error: missing stdin file
+ EOE
+
+ : empty
+ :
+ $* <<EOI 2>>EOE !=0
+ cmd <""
+ EOI
+ buildfile:11:6: error: empty stdin redirect path
+ EOE
+ }
+
+ : out
+ :
+ {
+ : missed
+ :
+ $* <<EOI 2>>EOE !=0
+ cmd >=
+ EOI
+ buildfile:11:7: error: missing stdout file
+ EOE
+
+ : empty
+ :
+ $* <<EOI 2>>EOE !=0
+ cmd >=""
+ EOI
+ buildfile:11:7: error: empty stdout redirect path
+ EOE
+ }
+
+ : out-alias
+ :
+ {
+ : missed
+ :
+ $* <<EOI 2>>EOE !=0
+ cmd >
+ EOI
+ buildfile:11:6: error: missing stdout file
+ EOE
+
+ : empty
+ :
+ $* <<EOI 2>>EOE !=0
+ cmd >""
+ EOI
+ buildfile:11:6: error: empty stdout redirect path
+ EOE
+ }
+
+ : err
+ :
+ {
+ : missed
+ :
+ $* <<EOI 2>>EOE !=0
+ cmd 2>=
+ EOI
+ buildfile:11:8: error: missing stderr file
+ EOE
+
+ : empty
+ :
+ $* <<EOI 2>>EOE !=0
+ cmd 2>=""
+ EOI
+ buildfile:11:8: error: empty stderr redirect path
+ EOE
+ }
+
+ : err-alias
+ :
+ {
+ : missed
+ :
+ $* <<EOI 2>>EOE !=0
+ cmd 2>
+ EOI
+ buildfile:11:7: error: missing stderr file
+ EOE
+
+ : empty
+ :
+ $* <<EOI 2>>EOE !=0
+ cmd 2>""
+ EOI
+ buildfile:11:7: error: empty stderr redirect path
+ EOE
+ }
+}
+
+: merge
+{
+ : out
+ :
+ {
+ : err
+ :
+ $* <<EOI >>EOO
+ cmd 1>&2
+ EOI
+ cmd >&2
+ EOO
+
+ : no-mutual
+ :
+ $* <<EOI >>EOO
+ cmd 1>&2 2>&1 2>a
+ EOI
+ cmd >&2 2>a
+ EOO
+
+ : not-descriptor
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd 1>&a
+ EOI
+ buildfile:11:8: error: stdout merge redirect file descriptor must be 2
+ EOE
+
+ : self
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd 1>&1
+ EOI
+ buildfile:11:8: error: stdout merge redirect file descriptor must be 2
+ EOE
+
+ : missed
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd 1>&
+ EOI
+ buildfile:11:8: error: missing stdout file descriptor
+ EOE
+ }
+
+ : err
+ {
+ : out
+ :
+ $* <<EOI >>EOO
+ cmd 2>&1
+ EOI
+ cmd 2>&1
+ EOO
+
+ : no-mutual
+ :
+ $* <<EOI >>EOO
+ cmd 1>&2 2>&1 >a
+ EOI
+ cmd >a 2>&1
+ EOO
+
+ : not-descriptor
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd 2>&a
+ EOI
+ buildfile:11:8: error: stderr merge redirect file descriptor must be 1
+ EOE
+
+ : self
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd 2>&2
+ EOI
+ buildfile:11:8: error: stderr merge redirect file descriptor must be 1
+ EOE
+
+ : missed
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd 2>&
+ EOI
+ buildfile:11:8: error: missing stderr file descriptor
+ EOE
+ }
+
+ : mutual
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd 1>&2 2>&1
+ EOI
+ buildfile:11:14: error: stdout and stderr redirected to each other
+ EOE
+}
diff --git a/libbuild2/build/script/parser+regex.test.testscript b/libbuild2/build/script/parser+regex.test.testscript
new file mode 100644
index 0000000..625bfdf
--- /dev/null
+++ b/libbuild2/build/script/parser+regex.test.testscript
@@ -0,0 +1,225 @@
+# file : libbuild2/build/script/parser+regex.test.testscript
+# license : MIT; see accompanying LICENSE file
+
+: here-string
+:
+{
+ : stdout
+ :
+ {
+ : missed
+ :
+ $* <'cmd >>>?~' 2>>EOE != 0
+ buildfile:11:10: error: missing stdout here-string regex
+ EOE
+
+ : no-introducer
+ :
+ $* <'cmd >>>?~""' 2>>EOE != 0
+ buildfile:11:10: error: no introducer character in stdout regex redirect
+ EOE
+
+ : no-term-introducer
+ :
+ $* <'cmd >>>?~/' 2>>EOE != 0
+ buildfile:11:10: error: no closing introducer character in stdout regex redirect
+ EOE
+
+ : portable-path-introducer
+ :
+ $* <'cmd >>>?/~/foo/' 2>>EOE != 0
+ buildfile:11:11: error: portable path modifier and '/' introducer in stdout regex redirect
+ EOE
+
+ : empty
+ :
+ $* <'cmd >>>?~//' 2>>EOE != 0
+ buildfile:11:10: error: stdout regex redirect is empty
+ EOE
+
+ : no-flags
+ :
+ $* <'cmd >>>?~/fo*/' >'cmd >>>?~/fo*/'
+
+ : idot
+ :
+ $* <'cmd >>>?~/fo*/d' >'cmd >>>?~/fo*/d'
+
+ : icase
+ :
+ $* <'cmd >>>?~/fo*/i' >'cmd >>>?~/fo*/i'
+
+ : invalid-flags1
+ :
+ $* <'cmd >>>?~/foo/z' 2>>EOE != 0
+ buildfile:11:10: error: junk at the end of stdout regex redirect
+ EOE
+
+ : invalid-flags2
+ :
+ $* <'cmd >>>?~/foo/iz' 2>>EOE != 0
+ buildfile:11:10: error: junk at the end of stdout regex redirect
+ EOE
+
+ : no-newline
+ :
+ $* <'cmd >>>?:~/fo*/' >'cmd >>>?:~/fo*/'
+ }
+
+ : stderr
+ :
+ {
+ : missed
+ :
+ $* <'cmd 2>>>?~' 2>>EOE != 0
+ buildfile:11:11: error: missing stderr here-string regex
+ EOE
+
+ : no-introducer
+ :
+ : Note that there is no need to reproduce all the errors as for stdout.
+ : All we need is to make sure that the proper description is passed to
+ : the parse_regex() function.
+ :
+ $* <'cmd 2>>>?~""' 2>>EOE != 0
+ buildfile:11:11: error: no introducer character in stderr regex redirect
+ EOE
+ }
+
+ : modifier-last
+ :
+ $* <'cmd >>>?~/x' 2>>EOE != 0
+ buildfile:11:10: error: no closing introducer character in stdout regex redirect
+ EOE
+}
+
+: here-doc
+:
+{
+ : stdout
+ :
+ {
+ : missed
+ :
+ $* <'cmd >>?~' 2>>EOE != 0
+ buildfile:11:9: error: expected here-document regex end marker
+ EOE
+
+ : portable-path-introducer
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd >>?/~/EOO/
+ foo
+ EOO
+ EOI
+ buildfile:11:5: error: portable path modifier and '/' introducer in here-document regex end marker
+ EOE
+
+ : unterminated-line-char
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd >>?~/EOO/
+ /
+ EOO
+ EOI
+ buildfile:12:1: error: no syntax line characters
+ EOE
+
+ : empty
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd >>?:~/EOO/
+ EOO
+ EOI
+ buildfile:12:1: error: empty here-document regex
+ EOE
+
+ : no-flags
+ :
+ $* <<EOI >>EOO
+ cmd 2>>?~/EOE/
+ foo
+ /?
+ /foo/
+ /foo/*
+ /foo/i
+ /foo/i*
+
+ //
+ //*
+ EOE
+ EOI
+ cmd 2>>?~/EOE/
+ foo
+ /?
+ /foo/
+ /foo/*
+ /foo/i
+ /foo/i*
+
+ //
+ //*
+ EOE
+ EOO
+
+ : no-newline-str
+ :
+ $* <'cmd >>>?:~/fo*/' >'cmd >>>?:~/fo*/'
+
+ : no-newline-doc
+ :
+ $* <<EOI >>EOO
+ cmd 2>>?:~/EOE/
+ foo
+ EOE
+ EOI
+ cmd 2>>?:~/EOE/
+ foo
+ EOE
+ EOO
+
+ : end-marker-restore
+ :
+ {
+ : idot
+ :
+ $* <<EOI >>EOO
+ cmd 2>>?~/EOE/d
+ foo
+ EOE
+ EOI
+ cmd 2>>?~/EOE/d
+ foo
+ EOE
+ EOO
+
+ : icase
+ :
+ $* <<EOI >>EOO
+ cmd 2>>?~/EOE/i
+ foo
+ EOE
+ EOI
+ cmd 2>>?~/EOE/i
+ foo
+ EOE
+ EOO
+ }
+ }
+
+ : stderr
+ :
+ {
+ : missed
+ :
+ $* <'cmd 2>>?~' 2>>EOE != 0
+ buildfile:11:10: error: expected here-document regex end marker
+ EOE
+ }
+
+ : modifier-last
+ :
+ $* <'cmd >>?~:/FOO/' 2>>EOE != 0
+ buildfile:11:5: error: no closing introducer character in here-document regex end marker
+ EOE
+}
diff --git a/libbuild2/build/script/parser+variable.test.testscript b/libbuild2/build/script/parser+variable.test.testscript
new file mode 100644
index 0000000..5040e66
--- /dev/null
+++ b/libbuild2/build/script/parser+variable.test.testscript
@@ -0,0 +1,41 @@
+# file : libbuild2/build/script/parser+variable.test.testscript
+# license : MIT; see accompanying LICENSE file
+
+: assignment
+:
+$* <<EOI >>EOO
+a = b
+echo $a
+EOI
+echo b
+EOO
+
+: primary-target
+:
+$* <<EOI >>EOO
+echo $name($>)
+EOI
+echo driver
+EOO
+
+: no-newline
+:
+$* <:'echo a' 2>>EOE != 0
+buildfile:11:7: error: expected newline instead of <end of file>
+EOE
+
+: set-primary-target
+:
+$* <<EOI 2>>EOE != 0
+> = a
+EOI
+buildfile:11:1: error: missing program
+EOE
+
+: empty-name
+:
+$* <<EOI 2>>EOE != 0
+= b
+EOI
+buildfile:11:1: error: missing variable name
+EOE
diff --git a/libbuild2/build/script/parser.cxx b/libbuild2/build/script/parser.cxx
new file mode 100644
index 0000000..e64db91
--- /dev/null
+++ b/libbuild2/build/script/parser.cxx
@@ -0,0 +1,391 @@
+// file : libbuild2/build/script/parser.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/build/script/parser.hxx>
+
+#include <libbuild2/build/script/lexer.hxx>
+#include <libbuild2/build/script/runner.hxx>
+
+using namespace std;
+
+namespace build2
+{
+ namespace build
+ {
+ namespace script
+ {
+ using type = token_type;
+
+ //
+ // Pre-parse.
+ //
+
+ script parser::
+ pre_parse (istream& is, const path_name& pn, uint64_t line)
+ {
+ path_ = &pn;
+
+ pre_parse_ = true;
+
+ lexer l (is, *path_, line, lexer_mode::command_line);
+ set_lexer (&l);
+
+ script s;
+ script_ = &s;
+ runner_ = nullptr;
+ environment_ = nullptr;
+
+ s.start_loc = location (*path_, line, 1);
+
+ token t (pre_parse_script ());
+
+ assert (t.type == type::eos);
+
+ s.end_loc = get_location (t);
+
+ return s;
+ }
+
+ token parser::
+ pre_parse_script ()
+ {
+ // enter: next token is first token of the script
+ // leave: eos (returned)
+
+ token t;
+ type tt;
+
+ // Parse lines until we see eos.
+ //
+ for (;;)
+ {
+ // Start lexing each line.
+ //
+ tt = peek (lexer_mode::first_token);
+
+ // Determine the line type by peeking at the first token.
+ //
+ switch (tt)
+ {
+ case type::eos:
+ {
+ next (t, tt);
+ return t;
+ }
+ default:
+ {
+ pre_parse_line (t, tt);
+ assert (tt == type::newline);
+ break;
+ }
+ }
+ }
+ }
+
+ void parser::
+ pre_parse_line (token& t, type& tt, bool if_line)
+ {
+ // Determine the line type/start token.
+ //
+ line_type lt (
+ pre_parse_line_start (t, tt, lexer_mode::second_token));
+
+ line ln;
+ switch (lt)
+ {
+ case line_type::var:
+ {
+ // Check if we are trying to modify any of the special variables.
+ //
+ if (special_variable (t.value))
+ fail (t) << "attempt to set '" << t.value << "' special "
+ << "variable";
+
+ // We don't pre-enter variables.
+ //
+ ln.var = nullptr;
+
+ next (t, tt); // Assignment kind.
+
+ mode (lexer_mode::variable_line);
+ parse_variable_line (t, tt);
+
+ if (tt != type::newline)
+ fail (t) << "expected newline instead of " << t;
+
+ break;
+ }
+ case line_type::cmd_elif:
+ case line_type::cmd_elifn:
+ case line_type::cmd_else:
+ case line_type::cmd_end:
+ {
+ if (!if_line)
+ {
+ fail (t) << lt << " without preceding 'if'";
+ }
+ }
+ // Fall through.
+ case line_type::cmd_if:
+ case line_type::cmd_ifn:
+ next (t, tt); // Skip to start of command.
+ // Fall through.
+ case line_type::cmd:
+ {
+ pair<command_expr, here_docs> p;
+
+ if (lt != line_type::cmd_else && lt != line_type::cmd_end)
+ p = parse_command_expr (t, tt, lexer::redirect_aliases);
+
+ if (tt != type::newline)
+ fail (t) << "expected newline instead of " << t;
+
+ parse_here_documents (t, tt, p);
+ break;
+ }
+ }
+
+ assert (tt == type::newline);
+
+ ln.type = lt;
+ ln.tokens = replay_data ();
+ script_->lines.push_back (move (ln));
+
+ if (lt == line_type::cmd_if || lt == line_type::cmd_ifn)
+ {
+ tt = peek (lexer_mode::first_token);
+
+ pre_parse_if_else (t, tt);
+ }
+ }
+
+ void parser::
+ pre_parse_if_else (token& t, type& tt)
+ {
+ // enter: peeked first token of next line (type in tt)
+ // leave: newline
+
+ // Parse lines until we see closing 'end'. Nested if-else blocks are
+ // handled recursively.
+ //
+ for (line_type bt (line_type::cmd_if); // Current block.
+ ;
+ tt = peek (lexer_mode::first_token))
+ {
+ const location ll (get_location (peeked ()));
+
+ if (tt == type::eos)
+ fail (ll) << "expected closing 'end'";
+
+ // Parse one line. Note that this one line can still be multiple
+ // lines in case of if-else. In this case we want to view it as
+ // cmd_if, not cmd_end. Thus remember the start position of the
+ // next logical line.
+ //
+ size_t i (script_->lines.size ());
+
+ pre_parse_line (t, tt, true /* if_line */);
+ assert (tt == type::newline);
+
+ line_type lt (script_->lines[i].type);
+
+ // First take care of 'end'.
+ //
+ if (lt == line_type::cmd_end)
+ return;
+
+ // Check if-else block sequencing.
+ //
+ if (bt == line_type::cmd_else)
+ {
+ if (lt == line_type::cmd_else ||
+ lt == line_type::cmd_elif ||
+ lt == line_type::cmd_elifn)
+ fail (ll) << lt << " after " << bt;
+ }
+
+ // Update current if-else block.
+ //
+ switch (lt)
+ {
+ case line_type::cmd_elif:
+ case line_type::cmd_elifn: bt = line_type::cmd_elif; break;
+ case line_type::cmd_else: bt = line_type::cmd_else; break;
+ default: break;
+ }
+ }
+ }
+
+ command_expr parser::
+ parse_command_line (token& t, type& tt)
+ {
+ // enter: first token of the command line
+ // leave: <newline>
+
+ // Note: this one is only used during execution.
+ //
+ assert (!pre_parse_);
+
+ pair<command_expr, here_docs> p (
+ parse_command_expr (t, tt, lexer::redirect_aliases));
+
+ assert (tt == type::newline);
+
+ parse_here_documents (t, tt, p);
+ assert (tt == type::newline);
+
+ return move (p.first);
+ }
+
+ //
+ // Execute.
+ //
+
+ void parser::
+ execute (const scope& rs, const scope& bs,
+ environment& e, const script& s, runner& r)
+ {
+ path_ = nullptr; // Set by replays.
+
+ pre_parse_ = false;
+
+ set_lexer (nullptr);
+
+ // The script shouldn't be able to modify the scopes.
+ //
+ root_ = const_cast<scope*> (&rs);
+ scope_ = const_cast<scope*> (&bs);
+ pbase_ = scope_->src_path_;
+
+ script_ = const_cast<script*> (&s);
+ runner_ = &r;
+ environment_ = &e;
+
+ exec_script ();
+ }
+
+ void parser::
+ exec_script ()
+ {
+ const script& s (*script_);
+
+ runner_->enter (*environment_, s.start_loc);
+
+ // Note that we rely on "small function object" optimization for the
+ // exec_*() lambdas.
+ //
+ auto exec_set = [this] (const variable& var,
+ token& t, build2::script::token_type& tt,
+ const location&)
+ {
+ next (t, tt);
+ type kind (tt); // Assignment kind.
+
+ mode (lexer_mode::variable_line);
+ value rhs (parse_variable_line (t, tt));
+
+ assert (tt == type::newline);
+
+ // Assign.
+ //
+ value& lhs (kind == type::assign
+ ? environment_->assign (var)
+ : environment_->append (var));
+
+ apply_value_attributes (&var, lhs, move (rhs), kind);
+ };
+
+ auto exec_cmd = [this] (token& t, build2::script::token_type& tt,
+ size_t li,
+ bool single,
+ const location& ll)
+ {
+ // We use the 0 index to signal that this is the only command.
+ //
+ if (single)
+ li = 0;
+
+ command_expr ce (
+ parse_command_line (t, static_cast<token_type&> (tt)));
+
+ runner_->run (*environment_, ce, li, ll);
+ };
+
+ auto exec_if = [this] (token& t, build2::script::token_type& tt,
+ size_t li,
+ const location& ll)
+ {
+ command_expr ce (
+ parse_command_line (t, static_cast<token_type&> (tt)));
+
+ // Assume if-else always involves multiple commands.
+ //
+ return runner_->run_if (*environment_, ce, li, ll);
+ };
+
+ size_t li (1);
+
+ exec_lines (s.lines.begin (), s.lines.end (),
+ exec_set, exec_cmd, exec_if,
+ li,
+ &environment_->var_pool);
+
+ runner_->leave (*environment_, s.end_loc);
+ }
+
+ // When add a special variable don't forget to update lexer::word().
+ //
+ bool parser::
+ special_variable (const string& n) noexcept
+ {
+ return n == ">" || n == "<" || n == "~";
+ }
+
+ lookup parser::
+ lookup_variable (name&& qual, string&& name, const location& loc)
+ {
+ // In the pre-parse mode collect the referenced variable names for the
+ // script semantics change tracking.
+ //
+ if (pre_parse_)
+ {
+ // Add the variable name skipping special variables and suppressing
+ // duplicates. While at it, check if the script temporary directory
+ // is referenced and set the flag, if that's the case.
+ //
+ if (special_variable (name))
+ {
+ if (name == "~")
+ script_->temp_dir = true;
+ }
+ else if (!name.empty ())
+ {
+ auto& vars (script_->vars);
+
+ if (find (vars.begin (), vars.end (), name) == vars.end ())
+ vars.push_back (move (name));
+ }
+
+ return lookup ();
+ }
+
+ if (!qual.empty ())
+ fail (loc) << "qualified variable name";
+
+ lookup r (environment_->lookup (name));
+
+ // Fail if non-script-local variable with an untracked name.
+ //
+ if (r.defined () && !r.belongs (*environment_))
+ {
+ const auto& vars (script_->vars);
+
+ if (find (vars.begin (), vars.end (), name) == vars.end ())
+ fail (loc) << "use of untracked variable '" << name << "'";
+ }
+
+ return r;
+ }
+ }
+ }
+}
diff --git a/libbuild2/build/script/parser.hxx b/libbuild2/build/script/parser.hxx
new file mode 100644
index 0000000..27e7f49
--- /dev/null
+++ b/libbuild2/build/script/parser.hxx
@@ -0,0 +1,96 @@
+// file : libbuild2/build/script/parser.hxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_BUILD_SCRIPT_PARSER_HXX
+#define LIBBUILD2_BUILD_SCRIPT_PARSER_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/forward.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/script/parser.hxx>
+
+#include <libbuild2/build/script/token.hxx>
+#include <libbuild2/build/script/script.hxx>
+
+namespace build2
+{
+ namespace build
+ {
+ namespace script
+ {
+ class runner;
+
+ class parser: public build2::script::parser
+ {
+ // Pre-parse. Issue diagnostics and throw failed in case of an error.
+ //
+ public:
+ parser (context& c): build2::script::parser (c) {}
+
+ // Note that the returned script object references the passed path
+ // name.
+ //
+ script
+ pre_parse (istream&, const path_name&, uint64_t line);
+
+ // Recursive descent parser.
+ //
+ // Usually (but not always) parse functions receive the token/type
+ // from which it should start consuming and in return the token/type
+ // should contain the first token that has not been consumed.
+ //
+ // Functions that are called parse_*() rather than pre_parse_*() are
+ // used for both stages.
+ //
+ protected:
+ token
+ pre_parse_script ();
+
+ void
+ pre_parse_line (token&, token_type&, bool if_line = false);
+
+ void
+ pre_parse_if_else (token&, token_type&);
+
+ command_expr
+ parse_command_line (token&, token_type&);
+
+ // Execute. Issue diagnostics and throw failed in case of an error.
+ //
+ public:
+ void
+ execute (const scope& root, const scope& base,
+ environment&, const script&, runner&);
+
+ protected:
+ void
+ exec_script ();
+
+ // Helpers.
+ //
+ public:
+ static bool
+ special_variable (const string&) noexcept;
+
+ // Customization hooks.
+ //
+ protected:
+ virtual lookup
+ lookup_variable (name&&, string&&, const location&) override;
+
+ protected:
+ script* script_;
+
+ // Execute state.
+ //
+ runner* runner_;
+ environment* environment_;
+ };
+ }
+ }
+}
+
+#endif // LIBBUILD2_BUILD_SCRIPT_PARSER_HXX
diff --git a/libbuild2/build/script/parser.test.cxx b/libbuild2/build/script/parser.test.cxx
new file mode 100644
index 0000000..9046312
--- /dev/null
+++ b/libbuild2/build/script/parser.test.cxx
@@ -0,0 +1,224 @@
+// file : libbuild2/build/script/parser.test.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <cassert>
+#include <iostream>
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/target.hxx>
+#include <libbuild2/context.hxx>
+#include <libbuild2/scheduler.hxx>
+
+#include <libbuild2/build/script/script.hxx> // line
+#include <libbuild2/build/script/parser.hxx>
+#include <libbuild2/build/script/runner.hxx>
+
+using namespace std;
+
+namespace build2
+{
+ namespace build
+ {
+ namespace script
+ {
+ class print_runner: public runner
+ {
+ public:
+ print_runner (bool line): line_ (line) {}
+
+ virtual void
+ enter (environment&, const location&) override {}
+
+ virtual void
+ run (environment&,
+ const command_expr& e,
+ size_t i,
+ const location&) override
+ {
+ cout << e;
+
+ if (line_)
+ cout << " # " << i;
+
+ cout << endl;
+ }
+
+ virtual bool
+ run_if (environment&,
+ const command_expr& e,
+ size_t i,
+ const location&) override
+ {
+ cout << "? " << e;
+
+ if (line_)
+ cout << " # " << i;
+
+ cout << endl;
+
+ return e.back ().pipe.back ().program.string () == "true";
+ }
+
+ virtual void
+ leave (environment&, const location&) override {}
+
+ private:
+ bool line_;
+ };
+
+ // Usages:
+ //
+ // argv[0] [-l]
+ // argv[0] -d
+ // argv[0] -p
+ //
+ // In the first form read the script from stdin and trace the script
+ // execution to stdout using the custom print runner.
+ //
+ // In the second form read the script from stdin, parse it and dump the
+ // resulting lines to stdout.
+ //
+ // In the third form read the script from stdin, parse it and print
+ // line tokens quoting information to stdout.
+ //
+ // -l
+ // Print the script line number for each executed expression.
+ //
+ // -d
+ // Dump the parsed script to sdout.
+ //
+ // -p
+ // Print the parsed script tokens quoting information to sdout. If a
+ // token is quoted follow its representation with its quoting
+ // information in the [<quoting>/<completeness>] form, where:
+ //
+ // <quoting> := 'S' | 'D' | 'M'
+ // <completeness> := 'C' | 'P'
+ //
+ int
+ main (int argc, char* argv[])
+ {
+ tracer trace ("main");
+
+ enum class mode
+ {
+ run,
+ dump,
+ print
+ } m (mode::run);
+
+ bool print_line (false);
+
+ for (int i (1); i != argc; ++i)
+ {
+ string a (argv[i]);
+
+ if (a == "-l")
+ print_line = true;
+ else if (a == "-d")
+ m = mode::dump;
+ else if (a == "-p")
+ m = mode::print;
+ else
+ assert (false);
+ }
+
+ assert (m == mode::run || !print_line);
+
+ // Fake build system driver, default verbosity.
+ //
+ init_diag (1);
+ init (nullptr, argv[0]);
+
+ // Serial execution.
+ //
+ scheduler sched (1);
+ global_mutexes mutexes (1);
+ context ctx (sched, mutexes);
+
+ try
+ {
+ cin.exceptions (istream::failbit | istream::badbit);
+
+ // Enter mock target. Use fixed name and path so that we can use
+ // them in expected results. Strictly speaking target path should
+ // be absolute. However, the buildscript implementation doesn't
+ // really care.
+ //
+ file& tt (
+ ctx.targets.insert<file> (work,
+ dir_path (),
+ "driver",
+ string (),
+ trace));
+
+ tt.path (path ("driver"));
+
+ // Parse and run.
+ //
+ parser p (ctx);
+ path_name nm ("buildfile");
+ script s (p.pre_parse (cin, nm, 11 /* line */));
+
+ switch (m)
+ {
+ case mode::run:
+ {
+ environment e (perform_update_id, tt, false /* temp_dir */);
+ print_runner r (print_line);
+ p.execute (ctx.global_scope, ctx.global_scope, e, s, r);
+ break;
+ }
+ case mode::dump:
+ {
+ dump (cout, "", s.lines);
+ break;
+ }
+ case mode::print:
+ {
+ for (const line& l: s.lines)
+ {
+ for (const replay_token& rt: l.tokens)
+ {
+ if (&rt != &l.tokens[0])
+ cout << ' ';
+
+ const token& t (rt.token);
+ cout << t;
+
+ char q ('\0');
+ switch (t.qtype)
+ {
+ case quote_type::single: q = 'S'; break;
+ case quote_type::double_: q = 'D'; break;
+ case quote_type::mixed: q = 'M'; break;
+ case quote_type::unquoted: break;
+ }
+
+ if (q != '\0')
+ cout << " [" << q << (t.qcomp ? "/C" : "/P") << ']';
+ }
+ }
+
+ cout << endl;
+ }
+ }
+ }
+ catch (const failed&)
+ {
+ return 1;
+ }
+
+ return 0;
+ }
+ }
+ }
+}
+
+int
+main (int argc, char* argv[])
+{
+ return build2::build::script::main (argc, argv);
+}
diff --git a/libbuild2/build/script/runner.cxx b/libbuild2/build/script/runner.cxx
new file mode 100644
index 0000000..315a248
--- /dev/null
+++ b/libbuild2/build/script/runner.cxx
@@ -0,0 +1,133 @@
+// file : libbuild2/build/script/runner.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/build/script/runner.hxx>
+
+#include <libbutl/filesystem.mxx> // try_rmdir()
+
+#include <libbuild2/target.hxx>
+#include <libbuild2/script/run.hxx>
+
+using namespace butl;
+
+namespace build2
+{
+ namespace build
+ {
+ namespace script
+ {
+ void default_runner::
+ enter (environment&, const location&)
+ {
+ }
+
+ void default_runner::
+ leave (environment& env, const location& ll)
+ {
+ // Drop cleanups of target paths.
+ //
+ for (auto i (env.cleanups.begin ()); i != env.cleanups.end (); )
+ {
+ const target* m (&env.target);
+ for (; m != nullptr; m = m->adhoc_member)
+ {
+ if (const path_target* pm = m->is_a<path_target> ())
+ if (i->path == pm->path ())
+ break;
+ }
+
+ if (m != nullptr)
+ i = env.cleanups.erase (i);
+ else
+ ++i;
+ }
+
+ clean (env, ll);
+
+ // Remove the temporary directory, if created.
+ //
+ const dir_path& td (env.temp_dir.path);
+
+ if (!td.empty ())
+ {
+ // Note that since the temporary directory may only contain special
+ // files that are created and registered for cleanup by the script
+ // running machinery and should all be removed by the above clean()
+ // function call, its removal failure may not be the script fault
+ // but potentially a bug or a filesystem problem. Thus, we don't
+ // ignore the errors and report them.
+ //
+ env.temp_dir.cancel ();
+
+ try
+ {
+ // Note that the temporary directory must be empty to date.
+ //
+ rmdir_status r (try_rmdir (td));
+
+ if (r != rmdir_status::success)
+ {
+ // While there can be no fault of the script being currently
+ // executed let's add the location anyway to ease the
+ // troubleshooting. And let's stick to that principle down the
+ // road.
+ //
+ diag_record dr (fail (ll));
+ dr << "temporary directory '" << td
+ << (r == rmdir_status::not_exist
+ ? "' does not exist"
+ : "' is not empty");
+
+ if (r == rmdir_status::not_empty)
+ build2::script::print_dir (dr, td, ll);
+ }
+ }
+ catch (const system_error& e)
+ {
+ fail (ll) << "unable to remove temporary directory '" << td
+ << "': " << e;
+ }
+
+ if (verb >= 3)
+ text << "rmdir " << td;
+ }
+ }
+
+ void default_runner::
+ run (environment& env,
+ const command_expr& expr,
+ size_t li,
+ const location& ll)
+ {
+ if (verb >= 3)
+ text << ": " << expr;
+
+ // Run the expression if we are not in the dry-run mode or if it
+ // executes the set or exit builtin and just print the expression
+ // otherwise at verbosity level 2 and up.
+ //
+ if (!env.context.dry_run ||
+ find_if (expr.begin (), expr.end (),
+ [] (const expr_term& et)
+ {
+ const string& p (et.pipe.back ().program.string ());
+ return p == "set" || p == "exit";
+ }) != expr.end ())
+ build2::script::run (env, expr, li, ll);
+ else if (verb >= 2)
+ text << expr;
+ }
+
+ bool default_runner::
+ run_if (environment& env,
+ const command_expr& expr,
+ size_t li, const location& ll)
+ {
+ if (verb >= 3)
+ text << ": ?" << expr;
+
+ return build2::script::run_if (env, expr, li, ll);
+ }
+ }
+ }
+}
diff --git a/libbuild2/build/script/runner.hxx b/libbuild2/build/script/runner.hxx
new file mode 100644
index 0000000..431c446
--- /dev/null
+++ b/libbuild2/build/script/runner.hxx
@@ -0,0 +1,84 @@
+// file : libbuild2/build/script/runner.hxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_BUILD_SCRIPT_RUNNER_HXX
+#define LIBBUILD2_BUILD_SCRIPT_RUNNER_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/build/script/script.hxx>
+
+namespace build2
+{
+ namespace build
+ {
+ struct common;
+
+ namespace script
+ {
+ class runner
+ {
+ public:
+ // Location is the script start location (for diagnostics, etc).
+ //
+ virtual void
+ enter (environment&, const location&) = 0;
+
+ // Index is the 1-base index of this command line in the command list.
+ // If it is 0 then it means there is only one command. This
+ // information can be used, for example, to derive file names.
+ //
+ // Location is the start position of this command line in the script.
+ // It can be used in diagnostics.
+ //
+ virtual void
+ run (environment&,
+ const command_expr&,
+ size_t index,
+ const location&) = 0;
+
+ virtual bool
+ run_if (environment&,
+ const command_expr&,
+ size_t,
+ const location&) = 0;
+
+ // Location is the script end location (for diagnostics, etc).
+ //
+ virtual void
+ leave (environment&, const location&) = 0;
+ };
+
+ // Run command expressions.
+ //
+ // In dry-run mode don't run the expressions unless they are if-
+ // conditions or execute the set or exit builtins, but prints them at
+ // verbosity level 2 and up.
+ //
+ class default_runner: public runner
+ {
+ public:
+ virtual void
+ enter (environment&, const location&) override;
+
+ virtual void
+ run (environment&,
+ const command_expr&,
+ size_t,
+ const location&) override;
+
+ virtual bool
+ run_if (environment&,
+ const command_expr&,
+ size_t,
+ const location&) override;
+
+ virtual void
+ leave (environment&, const location&) override;
+ };
+ }
+ }
+}
+
+#endif // LIBBUILD2_BUILD_SCRIPT_RUNNER_HXX
diff --git a/libbuild2/build/script/script.cxx b/libbuild2/build/script/script.cxx
new file mode 100644
index 0000000..3485f54
--- /dev/null
+++ b/libbuild2/build/script/script.cxx
@@ -0,0 +1,236 @@
+// file : libbuild2/build/script/script.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/build/script/script.hxx>
+
+#include <libbutl/filesystem.mxx>
+
+#include <libbuild2/target.hxx>
+
+#include <libbuild2/build/script/parser.hxx>
+
+using namespace std;
+
+namespace build2
+{
+ namespace build
+ {
+ namespace script
+ {
+ // environment
+ //
+ static const optional<string> wd_name ("current directory");
+
+ environment::
+ environment (action a, const target_type& t, bool temp)
+ : build2::script::environment (
+ t.ctx,
+ cast<target_triplet> (t.ctx.global_scope["build.host"]),
+ dir_name_view (&work, &wd_name),
+ temp_dir.path, false /* temp_dir_keep */,
+ redirect (redirect_type::none),
+ redirect (redirect_type::merge, 2),
+ redirect (redirect_type::pass)),
+ target (t),
+ vars (context, false /* global */)
+ {
+ // Set special variables.
+ //
+ {
+ // $>
+ //
+ names ns;
+ for (const target_type* m (&t); m != nullptr; m = m->adhoc_member)
+ m->as_name (ns);
+
+ assign (var_pool.insert (">")) = move (ns);
+ }
+
+ {
+ // $<
+ //
+ // Note that at this stage (after execute_prerequisites()) ad hoc
+ // prerequisites are no longer in prerequisite_targets which means
+ // they won't end up in $< either. While at first thought ad hoc
+ // prerequisites in ad hoc recipes don't seem to make much sense,
+ // they could be handy to exclude certain preresquisites from $<
+ // while still treating them as such.
+ //
+ names ns;
+ for (const target_type* pt: t.prerequisite_targets[a])
+ if (pt != nullptr)
+ pt->as_name (ns);
+
+ assign (var_pool.insert ("<")) = move (ns);
+ }
+
+ // Set the $~ special variable.
+ //
+ if (temp)
+ {
+ create_temp_dir ();
+ assign (var_pool.insert<dir_path> ("~")) = temp_dir.path;
+ }
+ }
+
+ void environment::
+ create_temp_dir ()
+ {
+ // Create the temporary directory for this run regardless of the
+ // dry-run mode, since some commands still can be executed (see run()
+ // for details). This is also the reason why we are not using the
+ // build2 filesystem API that considers the dry-run mode.
+ //
+ // Note that the directory auto-removal is active.
+ //
+ dir_path& td (temp_dir.path);
+
+ assert (td.empty ()); // Must be called once.
+
+ try
+ {
+ td = dir_path::temp_path ("buildscript");
+ }
+ catch (const system_error& e)
+ {
+ fail << "unable to obtain temporary directory for buildscript "
+ << "execution" << e;
+ }
+
+ mkdir_status r;
+
+ try
+ {
+ r = try_mkdir (td);
+ }
+ catch (const system_error& e)
+ {
+ fail << "unable to create temporary directory '" << td << "': "
+ << e << endf;
+ }
+
+ // Note that the temporary directory can potentially stay after some
+ // abnormally terminated script run. Clean it up and reuse if that's
+ // the case.
+ //
+ if (r == mkdir_status::already_exists)
+ try
+ {
+ butl::rmdir_r (td, false /* dir */);
+ }
+ catch (const system_error& e)
+ {
+ fail << "unable to cleanup temporary directory '" << td << "': "
+ << e;
+ }
+
+ if (verb >= 3)
+ text << "mkdir " << td;
+ }
+
+ void environment::
+ set_variable (string&& nm,
+ names&& val,
+ const string& attrs,
+ const location& ll)
+ {
+ // Check if we are trying to modify any of the special variables.
+ //
+ if (parser::special_variable (nm))
+ fail (ll) << "attempt to set '" << nm << "' special variable";
+
+ // Set the variable value and attributes.
+ //
+ const variable& var (var_pool.insert (move (nm)));
+
+ value& lhs (assign (var));
+
+ // If there are no attributes specified then the variable assignment
+ // is straightforward. Otherwise we will use the build2 parser helper
+ // function.
+ //
+ if (attrs.empty ())
+ lhs.assign (move (val), &var);
+ else
+ {
+ // If there is an error in the attributes string, our diagnostics
+ // will look like this:
+ //
+ // <attributes>:1:1 error: unknown value attribute x
+ // buildfile:10:1 info: while parsing attributes '[x]'
+ //
+ // Note that the attributes parsing error is the only reason for a
+ // failure.
+ //
+ auto df = make_diag_frame (
+ [attrs, &ll](const diag_record& dr)
+ {
+ dr << info (ll) << "while parsing attributes '" << attrs << "'";
+ });
+
+ parser p (context);
+ p.apply_value_attributes (&var,
+ lhs,
+ value (move (val)),
+ attrs,
+ token_type::assign,
+ path_name ("<attributes>"));
+ }
+ }
+
+ lookup environment::
+ lookup (const variable& var) const
+ {
+ auto p (vars.lookup (var));
+ if (p.first != nullptr)
+ return lookup_type (*p.first, p.second, vars);
+
+ return lookup_in_buildfile (var.name);
+ }
+
+ lookup environment::
+ lookup (const string& name) const
+ {
+ // Every variable that is ever set in a script has been added during
+ // variable line execution or introduced with the set builtin. Which
+ // means that if one is not found in the environment pool then it can
+ // only possibly be set in the buildfile.
+ //
+ const variable* pvar (var_pool.find (name));
+ return pvar != nullptr ? lookup (*pvar) : lookup_in_buildfile (name);
+ }
+
+ lookup environment::
+ lookup_in_buildfile (const string& n) const
+ {
+ // Switch to the corresponding buildfile variable. Note that we don't
+ // want to insert a new variable into the pool (we might be running
+ // in parallel). Plus, if there is no such variable, then we cannot
+ // possibly find any value.
+ //
+ const variable* pvar (context.var_pool.find (n));
+
+ if (pvar == nullptr)
+ return lookup_type ();
+
+ return target[*pvar];
+ }
+
+ value& environment::
+ append (const variable& var)
+ {
+ auto l (lookup (var));
+
+ if (l.defined () && l.belongs (*this)) // Existing var.
+ return vars.modify (l);
+
+ value& r (assign (var)); // NULL.
+
+ if (l.defined ())
+ r = *l; // Copy value (and type) from the outer scope.
+
+ return r;
+ }
+ }
+ }
+}
diff --git a/libbuild2/build/script/script.hxx b/libbuild2/build/script/script.hxx
new file mode 100644
index 0000000..2118568
--- /dev/null
+++ b/libbuild2/build/script/script.hxx
@@ -0,0 +1,156 @@
+// file : libbuild2/build/script/script.hxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_BUILD_SCRIPT_SCRIPT_HXX
+#define LIBBUILD2_BUILD_SCRIPT_SCRIPT_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/forward.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/variable.hxx>
+#include <libbuild2/filesystem.hxx> // auto_rmdir
+
+#include <libbuild2/script/script.hxx>
+
+namespace build2
+{
+ namespace build
+ {
+ namespace script
+ {
+ using build2::script::line;
+ using build2::script::line_type;
+ using build2::script::redirect;
+ using build2::script::redirect_type;
+ using build2::script::expr_term;
+ using build2::script::command_expr;
+
+ // Notes:
+ //
+ // - Once parsed, the script can be executed in multiple threads with
+ // the state (variable values, etc) maintained in the environment.
+ //
+ // - The default script command redirects semantics is 'none' for stdin,
+ // 'merge' into stderr for stdout, and 'pass' for stderr.
+ //
+ class script
+ {
+ public:
+ // Note that the variables are not pre-entered into a pool during the
+ // parsing phase, so the line variable pointers are NULL.
+ //
+ build2::script::lines lines;
+
+ // Referenced ordinary (non-special) variables.
+ //
+ // Used for the script semantics change tracking. The variable list is
+ // filled during the pre-parsing phase and is checked against during
+ // the execution phase. If during execution some non-script-local
+ // variable is not found in the list (may happen for a computed name),
+ // then the execution fails since the script semantics may not be
+ // properly tracked (the variable value change will not trigger the
+ // target rebuild).
+ //
+ small_vector<string, 2> vars; // 2 for command and options.
+
+ // True if script references the $~ special variable.
+ //
+ bool temp_dir = false;
+
+ location start_loc;
+ location end_loc;
+ };
+
+ class environment: public build2::script::environment
+ {
+ public:
+ using target_type = build2::target;
+
+ environment (action, const target_type&, bool temp_dir);
+
+ environment (environment&&) = delete;
+ environment (const environment&) = delete;
+ environment& operator= (environment&&) = delete;
+ environment& operator= (const environment&) = delete;
+
+ public:
+ // Primary target this environment is for.
+ //
+ const target_type& target;
+
+ // Script-local variable pool and map.
+ //
+ // Note that if we lookup the variable by passing name as a string,
+ // then it will be looked up in the wrong pool.
+ //
+ variable_pool var_pool;
+ variable_map vars;
+
+ // Temporary directory for the script run.
+ //
+ // Currently this directory is removed regardless of the script
+ // execution success or failure. Later, to help with troubleshooting,
+ // we may invent an option that suppresses the removal of temporary
+ // files in general.
+ //
+ // This directory is available to the user via the $~ special
+ // variable. Note, however, that the following filesystem entry
+ // prefixes are reserved:
+ //
+ // stdin*
+ // stdout*
+ // stderr*
+ //
+ auto_rmdir temp_dir;
+
+ virtual void
+ set_variable (string&& name,
+ names&&,
+ const string& attrs,
+ const location&) override;
+
+ virtual void
+ create_temp_dir () override;
+
+ // Variables.
+ //
+ public:
+ // Lookup the variable starting from this environment, then the
+ // primary target, and then outer buildfile scopes.
+ //
+ using lookup_type = build2::lookup;
+
+ lookup_type
+ lookup (const variable&) const;
+
+ lookup_type
+ lookup (const string&) const;
+
+ // As above but only look for buildfile variables.
+ //
+ lookup_type
+ lookup_in_buildfile (const string&) const;
+
+ // Return a value suitable for assignment. If the variable does not
+ // exist in this environment's variable map, then a new one with the
+ // NULL value is added and returned. Otherwise the existing value is
+ // returned.
+ //
+ value&
+ assign (const variable& var) {return vars.assign (var);}
+
+ // Return a value suitable for append/prepend. If the variable does
+ // not exist in this environment's variable map, then outer scopes are
+ // searched for the same variable. If found then a new variable with
+ // the found value is added to the environment and returned. Otherwise
+ // this function proceeds as assign() above.
+ //
+ value&
+ append (const variable&);
+ };
+ }
+ }
+}
+
+#endif // LIBBUILD2_BUILD_SCRIPT_SCRIPT_HXX
diff --git a/libbuild2/build/script/token.cxx b/libbuild2/build/script/token.cxx
new file mode 100644
index 0000000..8f8477b
--- /dev/null
+++ b/libbuild2/build/script/token.cxx
@@ -0,0 +1,23 @@
+// file : libbuild2/build/script/token.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/build/script/token.hxx>
+
+using namespace std;
+
+namespace build2
+{
+ namespace build
+ {
+ namespace script
+ {
+ void
+ token_printer (ostream& os, const token& t, print_mode m)
+ {
+ // No buildscript-specific tokens so far.
+ //
+ build2::script::token_printer (os, t, m);
+ }
+ }
+ }
+}
diff --git a/libbuild2/build/script/token.hxx b/libbuild2/build/script/token.hxx
new file mode 100644
index 0000000..954b412
--- /dev/null
+++ b/libbuild2/build/script/token.hxx
@@ -0,0 +1,36 @@
+// file : libbuild2/build/script/token.hxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_BUILD_SCRIPT_TOKEN_HXX
+#define LIBBUILD2_BUILD_SCRIPT_TOKEN_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/script/token.hxx>
+
+namespace build2
+{
+ namespace build
+ {
+ namespace script
+ {
+ struct token_type: build2::script::token_type
+ {
+ using base_type = build2::script::token_type;
+
+ // No buildscript-specific tokens so far.
+ //
+
+ token_type () = default;
+ token_type (value_type v): base_type (v) {}
+ token_type (build2::token_type v): base_type (v) {}
+ };
+
+ void
+ token_printer (ostream&, const token&, print_mode);
+ }
+ }
+}
+
+#endif // LIBBUILD2_BUILD_SCRIPT_TOKEN_HXX
diff --git a/libbuild2/buildfile b/libbuild2/buildfile
index edd0570..5f7bc11 100644
--- a/libbuild2/buildfile
+++ b/libbuild2/buildfile
@@ -27,6 +27,11 @@ lib{build2}: libul{build2}: \
{hxx ixx txx cxx}{* -utility-*installed -config -version -*.test...} \
{hxx}{config version}
+libul{build2}: script/{hxx ixx txx cxx}{** -*-options -**.test...} \
+ script/{hxx ixx cxx}{builtin-options}
+
+libul{build2}: build/{hxx ixx txx cxx}{** -**.test...}
+
# Note that this won't work in libul{} since it's not installed.
#
lib{build2}: cxx{utility-installed}: for_install = true
@@ -78,8 +83,7 @@ libul{build2}: dist/{hxx ixx txx cxx}{** -**.test...}
libul{build2}: install/{hxx ixx txx cxx}{** -**.test...}
-libul{build2}: test/{hxx ixx txx cxx}{** -**-options -**.test...} \
- test/script/{hxx ixx cxx}{builtin-options}
+libul{build2}: test/{hxx ixx txx cxx}{** -**.test...}
libul{build2}: $int_libs
@@ -105,6 +109,8 @@ exe{*.test}:
}
for t: cxx{ *.test...} \
+ script/cxx{**.test...} \
+ build/cxx{**.test...} \
config/cxx{**.test...} \
dist/cxx{**.test...} \
install/cxx{**.test...} \
@@ -191,15 +197,15 @@ else
# Generated options parser.
#
-test/script/
+script/
{
if $cli.configured
{
cli.cxx{builtin-options}: cli{builtin}
cli.options += --std c++11 -I $src_root --include-with-brackets \
---include-prefix libbuild2/test/script --guard-prefix LIBBUILD2_TEST_SCRIPT \
---cli-namespace build2::test::script::cli --generate-vector-scanner \
+--include-prefix libbuild2/script --guard-prefix LIBBUILD2_SCRIPT \
+--cli-namespace build2::script::cli --generate-vector-scanner \
--generate-specifier --suppress-usage
cli.cxx{*}:
diff --git a/libbuild2/cc/compile-rule.cxx b/libbuild2/cc/compile-rule.cxx
index 8b082cc..6b9104f 100644
--- a/libbuild2/cc/compile-rule.cxx
+++ b/libbuild2/cc/compile-rule.cxx
@@ -5247,6 +5247,8 @@ namespace build2
dir_path compile_rule::
find_modules_sidebuild (const scope& rs) const
{
+ context& ctx (rs.ctx);
+
// First figure out where we are going to build. We want to avoid
// multiple sidebuilds so the outermost scope that has loaded the
// cc.config module and that is within our amalgmantion seems like a
@@ -5284,18 +5286,18 @@ namespace build2
modules_sidebuild_dir /=
x);
- const scope* ps (&rs.ctx.scopes.find (pd));
+ const scope* ps (&ctx.scopes.find (pd));
if (ps->out_path () != pd)
{
// Switch the phase to load then create and load the subproject.
//
- phase_switch phs (rs.ctx, run_phase::load);
+ phase_switch phs (ctx, run_phase::load);
// Re-test again now that we are in exclusive phase (another thread
// could have already created and loaded the subproject).
//
- ps = &rs.ctx.scopes.find (pd);
+ ps = &ctx.scopes.find (pd);
if (ps->out_path () != pd)
{
@@ -5322,15 +5324,13 @@ namespace build2
{string (x) + '.'}, /* root_modules */
"", /* root_post */
nullopt, /* config_module */
+ nullopt, /* config_file */
false, /* buildfile */
"the cc module",
2); /* verbosity */
}
- ps = &load_project (as->rw () /* lock */,
- pd,
- pd,
- false /* forwarded */);
+ ps = &load_project (ctx, pd, pd, false /* forwarded */);
}
}
diff --git a/libbuild2/cc/init.cxx b/libbuild2/cc/init.cxx
index 2a0dbd2..07051c5 100644
--- a/libbuild2/cc/init.cxx
+++ b/libbuild2/cc/init.cxx
@@ -142,9 +142,9 @@ namespace build2
// Register scope operation callback.
//
- // It feels natural to do clean up sidebuilds as a post operation but
- // that prevents the (otherwise-empty) out root directory to be cleaned
- // up (via the standard fsdir{} chain).
+ // It feels natural to clean up sidebuilds as a post operation but that
+ // prevents the (otherwise-empty) out root directory to be cleaned up
+ // (via the standard fsdir{} chain).
//
rs.operation_callbacks.emplace (
perform_clean_id,
diff --git a/libbuild2/cc/lexer.cxx b/libbuild2/cc/lexer.cxx
index d57f5eb..d2be3d8 100644
--- a/libbuild2/cc/lexer.cxx
+++ b/libbuild2/cc/lexer.cxx
@@ -48,8 +48,8 @@ namespace build2
auto lexer::
peek (bool e) -> xchar
{
- if (unget_)
- return ungetc_;
+ if (ungetn_ != 0)
+ return ungetb_[ungetn_ - 1];
if (unpeek_)
return unpeekc_;
@@ -98,11 +98,8 @@ namespace build2
inline auto lexer::
get (bool e) -> xchar
{
- if (unget_)
- {
- unget_ = false;
- return ungetc_;
- }
+ if (ungetn_ != 0)
+ return ungetb_[--ungetn_];
else
{
xchar c (peek (e));
@@ -117,7 +114,7 @@ namespace build2
// Increment the logical line similar to how base will increment the
// physical (the column counts are the same).
//
- if (log_line_ && c == '\n' && !unget_)
+ if (log_line_ && c == '\n' && ungetn_ == 0)
++*log_line_;
base::get (c);
diff --git a/libbuild2/cc/link-rule.cxx b/libbuild2/cc/link-rule.cxx
index b11ee42..bc8eb8e 100644
--- a/libbuild2/cc/link-rule.cxx
+++ b/libbuild2/cc/link-rule.cxx
@@ -2145,22 +2145,8 @@ namespace build2
// 1 is resource ID, 24 is RT_MANIFEST. We also need to
// escape Windows path backslashes.
//
- os << "1 24 \"";
-
- const string& s (mf.string ());
- for (size_t i (0), j;; i = j + 1)
- {
- j = s.find ('\\', i);
- os.write (s.c_str () + i,
- (j == string::npos ? s.size () : j) - i);
-
- if (j == string::npos)
- break;
-
- os.write ("\\\\", 2);
- }
-
- os << "\"" << endl;
+ os << "1 24 \"" << sanitize_strlit (mf.string ()) << '"'
+ << endl;
os.close ();
rm.cancel ();
@@ -3035,14 +3021,14 @@ namespace build2
auto_rmfile trm;
string targ;
{
- // Calculate the would-be command line length similar to how process'
- // implementation does it.
- //
auto quote = [s = string ()] (const char* a) mutable -> const char*
{
return process::quote_argument (a, s);
};
+ // Calculate the would-be command line length similar to how process'
+ // implementation does it.
+ //
size_t n (0);
for (const char* a: args)
{
diff --git a/libbuild2/config/operation.cxx b/libbuild2/config/operation.cxx
index 17eb99a..41d982b 100644
--- a/libbuild2/config/operation.cxx
+++ b/libbuild2/config/operation.cxx
@@ -1103,8 +1103,7 @@ namespace build2
// this information is stored). So what we are going to do is bootstrap
// the newly created project, similar to the way main() does it.
//
- scope& gs (ctx.global_scope.rw ());
- scope& rs (load_project (gs, d, d, false /* fwd */, false /* load */));
+ scope& rs (load_project (ctx, d, d, false /* fwd */, false /* load */));
// Add the default config.config.persist value unless there is a custom
// one (specified as a command line override).
@@ -1223,6 +1222,7 @@ namespace build2
rmod,
"", /* root_post */
string ("config"), /* config_module */
+ nullopt, /* config_file */
true, /* buildfile */
"the create meta-operation");
diff --git a/libbuild2/context.cxx b/libbuild2/context.cxx
index 0be0046..a3455ea 100644
--- a/libbuild2/context.cxx
+++ b/libbuild2/context.cxx
@@ -340,8 +340,10 @@ namespace build2
// And so the first token should be a word which can be either a
// variable name (potentially with the directory qualification) or just
// the directory, in which case it should be followed by another word
- // (unqualified variable name).
+ // (unqualified variable name). To avoid treating any of the visibility
+ // modifiers as special we use the cmdvar mode.
//
+ l.mode (lexer_mode::cmdvar);
token t (l.next ());
optional<dir_path> dir;
@@ -892,6 +894,14 @@ namespace build2
}
phase_switch::
+ phase_switch (phase_unlock&& u, phase_lock&& l)
+ : old_phase (u.l->phase), new_phase (l.phase)
+ {
+ phase_lock_instance = u.l; // Disarms phase_lock
+ u.l = nullptr; // Disarms phase_unlock
+ }
+
+ phase_switch::
~phase_switch () noexcept (false)
{
phase_lock* pl (phase_lock_instance);
diff --git a/libbuild2/context.hxx b/libbuild2/context.hxx
index 573b8d1..2a9c93e 100644
--- a/libbuild2/context.hxx
+++ b/libbuild2/context.hxx
@@ -428,7 +428,7 @@ namespace build2
//
const loaded_modules_lock* modules_lock;
- // Nested context for updating build system modules.
+ // Nested context for updating build system modules and ad hoc recipes.
//
// Note that such a context itself should normally have modules_context
// setup to point to itself (see import_module() for details).
@@ -438,9 +438,10 @@ namespace build2
public:
// If module_context is absent, then automatic updating of build system
- // modules is disabled. If it is NULL, then the context will be created
- // lazily if and when necessary. Otherwise, it should be a properly setup
- // context (including, normally, a self-reference in modules_context).
+ // modules and ad hoc recipes is disabled. If it is NULL, then the context
+ // will be created lazily if and when necessary. Otherwise, it should be a
+ // properly setup context (including, normally, a self-reference in
+ // modules_context).
//
explicit
context (scheduler&,
@@ -553,9 +554,17 @@ namespace build2
// Assuming we have a lock on the current phase, temporarily switch to a
// new phase and switch back on destruction.
//
+ // The second constructor can be used for a switch with an intermittent
+ // unlock:
+ //
+ // phase_unlock pu;
+ // phase_lock pl;
+ // phase_switch ps (move (pu), move (pl));
+ //
struct LIBBUILD2_SYMEXPORT phase_switch
{
- explicit phase_switch (context&, run_phase);
+ phase_switch (context&, run_phase);
+ phase_switch (phase_unlock&&, phase_lock&&);
~phase_switch () noexcept (false);
run_phase old_phase, new_phase;
diff --git a/libbuild2/dist/operation.cxx b/libbuild2/dist/operation.cxx
index 34dc747..8dd8a6e 100644
--- a/libbuild2/dist/operation.cxx
+++ b/libbuild2/dist/operation.cxx
@@ -319,7 +319,7 @@ namespace build2
ctx.current_operation (op_update, nullptr, false /* diag_noise */);
- action a (perform_id, update_id);
+ action a (perform_update_id);
mo_perform.match (params, a, files,
1 /* diag (failures only) */,
diff --git a/libbuild2/dump.cxx b/libbuild2/dump.cxx
index 8ee68b7..9f60900 100644
--- a/libbuild2/dump.cxx
+++ b/libbuild2/dump.cxx
@@ -3,6 +3,7 @@
#include <libbuild2/dump.hxx>
+#include <libbuild2/rule.hxx>
#include <libbuild2/scope.hxx>
#include <libbuild2/target.hxx>
#include <libbuild2/context.hxx>
@@ -213,7 +214,90 @@ namespace build2
os << ind << t << ':';
- // First print target/rule-specific variables, if any.
+ // First check if this is the simple case where we can print everything
+ // as a single declaration.
+ //
+ const prerequisites& ps (t.prerequisites ());
+ bool simple (true);
+ for (const prerequisite& p: ps)
+ {
+ if (!p.vars.empty ()) // Has prerequisite-specific vars.
+ {
+ simple = false;
+ break;
+ }
+ }
+
+ // If the target has been matched to a rule, we also print resolved
+ // prerequisite targets.
+ //
+ // Note: running serial and task_count is 0 before any operation has
+ // started.
+ //
+ const prerequisite_targets* pts (nullptr);
+ {
+ action inner; // @@ Only for the inner part of the action currently.
+
+ if (size_t c = t[inner].task_count.load (memory_order_relaxed))
+ {
+ if (c == t.ctx.count_applied () || c == t.ctx.count_executed ())
+ {
+ pts = &t.prerequisite_targets[inner];
+
+ bool f (false);
+ for (const target* pt: *pts)
+ {
+ if (pt != nullptr)
+ {
+ f = true;
+ break;
+ }
+ }
+
+ if (!f)
+ pts = nullptr;
+ }
+ }
+ }
+
+ auto print_pts = [&os, &ps, pts] ()
+ {
+ for (const target* pt: *pts)
+ {
+ if (pt != nullptr)
+ os << ' ' << *pt;
+ }
+
+ // Only omit '|' if we have no prerequisites nor targets.
+ //
+ if (!ps.empty ())
+ {
+ os << " |";
+ return true;
+ }
+
+ return false;
+ };
+
+ if (simple)
+ {
+ if (pts != nullptr)
+ print_pts ();
+
+ for (const prerequisite& p: ps)
+ {
+ // Print it as a target if one has been cached.
+ //
+ if (const target* t = p.target.load (memory_order_relaxed)) // Serial.
+ os << ' ' << *t;
+ else
+ os << ' ' << p;
+ }
+ }
+
+ bool used (false); // Target header has been used.
+
+ // Print target/rule-specific variables, if any.
//
{
bool tv (!t.vars.empty ());
@@ -258,87 +342,78 @@ namespace build2
if (rel)
stream_verb (os, nsv);
- os << endl
- << ind << t << ':';
+ used = true;
}
}
- bool used (false); // Target header has been used to display prerequisites.
-
- // If the target has been matched to a rule, first print resolved
- // prerequisite targets.
+ // Then ad hoc recipes, if any.
//
- // Note: running serial and task_count is 0 before any operation has
- // started.
- //
- action inner; // @@ Only for the inner part of the action currently.
-
- if (size_t c = t[inner].task_count.load (memory_order_relaxed))
+ if (!t.adhoc_recipes.empty ())
{
- if (c == t.ctx.count_applied () || c == t.ctx.count_executed ())
+ for (const adhoc_recipe r: t.adhoc_recipes)
{
- bool f (false);
- for (const target* pt: t.prerequisite_targets[inner])
- {
- if (pt == nullptr) // Skipped.
- continue;
-
- os << ' ' << *pt;
- f = true;
- }
-
- // Only omit '|' if we have no prerequisites nor targets.
- //
- if (f || !t.prerequisites ().empty ())
- {
- os << " |";
- used = true;
- }
+ os << endl;
+ r.rule->dump (os, ind); // @@ TODO: pass action(s).
}
+
+ used = true;
}
- // Print prerequisites. Those that have prerequisite-specific variables
- // have to be printed as a separate dependency.
- //
- const prerequisites& ps (t.prerequisites ());
- for (auto i (ps.begin ()), e (ps.end ()); i != e; )
+ if (!simple)
{
- const prerequisite& p (*i++);
- bool ps (!p.vars.empty ()); // Has prerequisite-specific vars.
-
- if (ps && used) // If it has been used, get a new header.
+ if (used)
+ {
os << endl
<< ind << t << ':';
- // Print it as a target if one has been cached.
- //
- if (const target* t = p.target.load (memory_order_relaxed)) // Serial.
- os << ' ' << *t;
- else
- os << ' ' << p;
+ used = false;
+ }
+
+ if (pts != nullptr)
+ used = print_pts () || used;
- if (ps)
+ // Print prerequisites. Those that have prerequisite-specific variables
+ // have to be printed as a separate dependency.
+ //
+ for (auto i (ps.begin ()), e (ps.end ()); i != e; )
{
- if (rel)
- stream_verb (os, osv); // We want variable values in full.
+ const prerequisite& p (*i++);
+ bool ps (!p.vars.empty ()); // Has prerequisite-specific vars.
- os << ':' << endl
- << ind << '{';
- ind += " ";
- dump_variables (os, ind, p.vars, s, variable_kind::prerequisite);
- ind.resize (ind.size () - 2);
- os << endl
- << ind << '}';
+ if (ps && used) // If it has been used, get a new header.
+ os << endl
+ << ind << t << ':';
- if (rel)
- stream_verb (os, nsv);
+ // Print it as a target if one has been cached.
+ //
+ if (const target* t = p.target.load (memory_order_relaxed)) // Serial.
+ os << ' ' << *t;
+ else
+ os << ' ' << p;
- if (i != e) // If we have another, get a new header.
+ if (ps)
+ {
+ if (rel)
+ stream_verb (os, osv); // We want variable values in full.
+
+ os << ':' << endl
+ << ind << '{';
+ ind += " ";
+ dump_variables (os, ind, p.vars, s, variable_kind::prerequisite);
+ ind.resize (ind.size () - 2);
os << endl
- << ind << t << ':';
- }
+ << ind << '}';
+
+ if (rel)
+ stream_verb (os, nsv);
- used = !ps;
+ if (i != e) // If we have another, get a new header.
+ os << endl
+ << ind << t << ':';
+ }
+
+ used = !ps;
+ }
}
if (rel)
diff --git a/libbuild2/file.cxx b/libbuild2/file.cxx
index 0bcb198..571980e 100644
--- a/libbuild2/file.cxx
+++ b/libbuild2/file.cxx
@@ -17,7 +17,8 @@
#include <libbuild2/lexer.hxx>
#include <libbuild2/parser.hxx>
-#include <libbuild2/config/utility.hxx> // lookup_config()
+#include <libbuild2/config/module.hxx> // config::module::version
+#include <libbuild2/config/utility.hxx> // config::lookup_config()
using namespace std;
using namespace butl;
@@ -310,13 +311,13 @@ namespace build2
}
scope_map::iterator
- create_root (scope& s, const dir_path& out_root, const dir_path& src_root)
+ create_root (context& ctx,
+ const dir_path& out_root,
+ const dir_path& src_root)
{
- auto i (s.ctx.scopes.rw (s).insert (out_root, true /* root */));
+ auto i (ctx.scopes.rw ().insert (out_root, true /* root */));
scope& rs (i->second);
- context& ctx (rs.ctx);
-
// Set out_path. Note that src_path is set in setup_root() below.
//
if (rs.out_path_ != &i->first)
@@ -1208,7 +1209,7 @@ namespace build2
// probably be tried first since that src_root was explicitly configured
// by the user. After that, #2 followed by #1 seems reasonable.
//
- scope& rs (create_root (root, out_root, dir_path ())->second);
+ scope& rs (create_root (ctx, out_root, dir_path ())->second);
bool bstrapped (bootstrapped (rs));
@@ -1275,7 +1276,7 @@ namespace build2
// The same logic to src_root as in create_bootstrap_outer().
//
- scope& rs (create_root (root, out_root, dir_path ())->second);
+ scope& rs (create_root (ctx, out_root, dir_path ())->second);
optional<bool> altn;
if (!bootstrapped (rs))
@@ -1466,17 +1467,16 @@ namespace build2
}
scope&
- load_project (scope& s,
+ load_project (context& ctx,
const dir_path& out_root,
const dir_path& src_root,
bool forwarded,
bool load)
{
+ assert (ctx.phase == run_phase::load);
assert (!forwarded || out_root != src_root);
- context& ctx (s.ctx);
-
- auto i (create_root (s, out_root, src_root));
+ auto i (create_root (ctx, out_root, src_root));
scope& rs (i->second);
if (!bootstrapped (rs))
@@ -2065,13 +2065,11 @@ namespace build2
fwd = (src_root != out_root);
}
- scope& gs (ctx.global_scope.rw ());
-
for (const scope* proot (nullptr); ; proot = root)
{
bool top (proot == nullptr);
- root = &create_root (gs, out_root, src_root)->second;
+ root = &create_root (ctx, out_root, src_root)->second;
bool bstrapped (bootstrapped (*root));
@@ -2153,6 +2151,8 @@ namespace build2
//
load_root (*root);
+ scope& gs (ctx.global_scope.rw ());
+
// Use a temporary scope so that the export stub doesn't mess anything up.
//
temp_scope ts (gs);
@@ -2555,11 +2555,14 @@ namespace build2
const string& rpre,
const strings& rmod,
const string& rpos,
- const optional<string>& config,
+ const optional<string>& config_mod,
+ const optional<string>& config_file,
bool buildfile,
const char* who,
uint16_t verbosity)
{
+ assert (!config_file || (config_mod && *config_mod == "config"));
+
string hdr ("# Generated by " + string (who) + ". Edit if you know"
" what you are doing.\n"
"#");
@@ -2610,12 +2613,12 @@ namespace build2
ofs << endl;
- if (config)
- ofs << "using " << *config << endl;
+ if (config_mod)
+ ofs << "using " << *config_mod << endl;
for (const string& m: bmod)
{
- if (!config || m != *config)
+ if (!config_mod || m != *config_mod)
ofs << "using " << m << endl;
}
@@ -2675,6 +2678,32 @@ namespace build2
}
}
+ // Write build/config.build.
+ //
+ if (config_file)
+ {
+ path f (d / std_build_dir / "config.build"); // std_config_file
+
+ if (verb >= verbosity)
+ text << (verb >= 2 ? "cat >" : "save ") << f;
+
+ try
+ {
+ ofdstream ofs (f);
+
+ ofs << hdr << endl
+ << "config.version = " << config::module::version << endl
+ << endl
+ << *config_file << endl;
+
+ ofs.close ();
+ }
+ catch (const io_error& e)
+ {
+ fail << "unable to write to " << f << ": " << e;
+ }
+ }
+
// Write root buildfile.
//
if (buildfile)
diff --git a/libbuild2/file.hxx b/libbuild2/file.hxx
index 0123591..78be600 100644
--- a/libbuild2/file.hxx
+++ b/libbuild2/file.hxx
@@ -106,11 +106,10 @@ namespace build2
source_once (scope& root, scope& base, const path&, scope& once);
// Create project's root scope. Only set the src_root variable if the passed
- // src_root value is not empty. The scope argument is only used for context
- // and as a proof of lock.
+ // src_root value is not empty.
//
LIBBUILD2_SYMEXPORT scope_map::iterator
- create_root (scope&, const dir_path& out_root, const dir_path& src_root);
+ create_root (context&, const dir_path& out_root, const dir_path& src_root);
// Setup root scope. Note that it assumes the src_root variable has already
// been set.
@@ -142,10 +141,8 @@ namespace build2
// loaded and currently we do not add the newly loaded subproject to the
// outer project's subprojects map.
//
- // The scope argument is only used as proof of lock.
- //
LIBBUILD2_SYMEXPORT scope&
- load_project (scope&,
+ load_project (context&,
const dir_path& out_root,
const dir_path& src_root,
bool forwarded,
@@ -441,6 +438,7 @@ namespace build2
const strings& root_modules, // Root modules.
const string& root_post, // Extra root.build text.
const optional<string>& config_module, // Config module to load.
+ const optional<string>& config_file, // Ad hoc config.build contents.
bool buildfile, // Create root buildfile.
const char* who, // Who is creating it.
uint16_t verbosity = 1); // Diagnostic verbosity.
diff --git a/libbuild2/functions-name.cxx b/libbuild2/functions-name.cxx
index 283b1a6..70659ee 100644
--- a/libbuild2/functions-name.cxx
+++ b/libbuild2/functions-name.cxx
@@ -4,6 +4,7 @@
#include <libbuild2/scope.hxx>
#include <libbuild2/function.hxx>
#include <libbuild2/variable.hxx>
+#include <libbuild2/algorithm.hxx>
using namespace std;
@@ -14,7 +15,7 @@ namespace build2
// out of scope). See scope::find_target_type() for details.
//
static pair<name, optional<string>>
- to_target (const scope* s, name&& n)
+ to_target_name (const scope* s, name&& n)
{
optional<string> e;
@@ -31,74 +32,129 @@ namespace build2
return make_pair (move (n), move (e));
}
+ static const target&
+ to_target (const scope& s, name&& n, name&& o)
+ {
+ if (const target* r = search_existing (n, s, o.dir))
+ return *r;
+
+ fail << "target "
+ << (n.pair ? names {move (n), move (o)} : names {move (n)})
+ << " not found" << endf;
+ }
+
void
name_functions (function_map& m)
{
- function_family f (m, "name");
-
// These functions treat a name as a target/prerequisite name.
//
// While on one hand it feels like calling them target.name(), etc., would
// have been more appropriate, on the other hand they can also be called
// on prerequisite names. They also won't always return the same result as
// if we were interrogating an actual target (e.g., the directory may be
- // relative).
+ // relative). Plus we now have functions that can only be called on
+ // targets (see below).
//
- f["name"] = [](const scope* s, name n)
+ function_family fn (m, "name");
+
+ fn["name"] = [](const scope* s, name n)
{
- return to_target (s, move (n)).first.value;
+ return to_target_name (s, move (n)).first.value;
};
- f["name"] = [](const scope* s, names ns)
+ fn["name"] = [](const scope* s, names ns)
{
- return to_target (s, convert<name> (move (ns))).first.value;
+ return to_target_name (s, convert<name> (move (ns))).first.value;
};
// Note: returns NULL if extension is unspecified (default) and empty if
// specified as no extension.
//
- f["extension"] = [](const scope* s, name n)
+ fn["extension"] = [](const scope* s, name n)
{
- return to_target (s, move (n)).second;
+ return to_target_name (s, move (n)).second;
};
- f["extension"] = [](const scope* s, names ns)
+ fn["extension"] = [](const scope* s, names ns)
{
- return to_target (s, convert<name> (move (ns))).second;
+ return to_target_name (s, convert<name> (move (ns))).second;
};
- f["directory"] = [](const scope* s, name n)
+ fn["directory"] = [](const scope* s, name n)
{
- return to_target (s, move (n)).first.dir;
+ return to_target_name (s, move (n)).first.dir;
};
- f["directory"] = [](const scope* s, names ns)
+ fn["directory"] = [](const scope* s, names ns)
{
- return to_target (s, convert<name> (move (ns))).first.dir;
+ return to_target_name (s, convert<name> (move (ns))).first.dir;
};
- f["target_type"] = [](const scope* s, name n)
+ fn["target_type"] = [](const scope* s, name n)
{
- return to_target (s, move (n)).first.type;
+ return to_target_name (s, move (n)).first.type;
};
- f["target_type"] = [](const scope* s, names ns)
+ fn["target_type"] = [](const scope* s, names ns)
{
- return to_target (s, convert<name> (move (ns))).first.type;
+ return to_target_name (s, convert<name> (move (ns))).first.type;
};
// Note: returns NULL if no project specified.
//
- f["project"] = [](const scope* s, name n)
+ fn["project"] = [](const scope* s, name n)
{
- return to_target (s, move (n)).first.proj;
+ return to_target_name (s, move (n)).first.proj;
};
- f["project"] = [](const scope* s, names ns)
+ fn["project"] = [](const scope* s, names ns)
{
- return to_target (s, convert<name> (move (ns))).first.proj;
+ return to_target_name (s, convert<name> (move (ns))).first.proj;
+ };
+
+ // Functions that can be called only on real targets.
+ //
+ function_family ft (m, "target");
+
+ fn["path"] = [](const scope* s, names ns)
+ {
+ if (s == nullptr)
+ fail << "target.path() called out of scope" << endf;
+
+ // Most of the time we will have a single target so optimize for that.
+ //
+ small_vector<path, 1> r;
+
+ for (auto i (ns.begin ()); i != ns.end (); ++i)
+ {
+ name& n (*i), o;
+ const target& t (to_target (*s, move (n), move (n.pair ? *++i : o)));
+
+ if (const auto* pt = t.is_a<path_target> ())
+ {
+ const path& p (pt->path ());
+
+ if (&p != &empty_path)
+ r.push_back (p);
+ else
+ fail << "target " << t << " path is not assigned";
+ }
+ else
+ fail << "target " << t << " is not path-based";
+ }
+
+ // We want the result to be path if we were given a single target and
+ // paths if multiple (or zero). The problem is, we cannot distinguish it
+ // based on the argument type (e.g., name vs names) since passing an
+ // out-qualified single target requires two names.
+ //
+ if (r.size () == 1)
+ return value (move (r[0]));
+
+ return value (paths (make_move_iterator (r.begin ()),
+ make_move_iterator (r.end ())));
};
// Name-specific overloads from builtins.
//
- function_family b (m, "builtin");
+ function_family fb (m, "builtin");
- b[".concat"] = [](dir_path d, name n)
+ fb[".concat"] = [](dir_path d, name n)
{
d /= n.dir;
n.dir = move (d);
diff --git a/libbuild2/lexer+foreign.test.testscript b/libbuild2/lexer+foreign.test.testscript
new file mode 100644
index 0000000..94c83c1
--- /dev/null
+++ b/libbuild2/lexer+foreign.test.testscript
@@ -0,0 +1,96 @@
+# file : libbuild2/lexer+foreign.test.testscript
+# license : MIT; see accompanying LICENSE file
+
+test.arguments = foreign=2
+
+: basics
+:
+$* <<EOI >>EOO
+echo foo
+}}
+EOI
+'echo foo
+'
+}}
+<newline>
+EOO
+
+: empty
+:
+$* <<EOI >>EOO
+}}
+EOI
+''
+}}
+<newline>
+EOO
+
+: braces
+:
+$* <<EOI >>EOO
+}
+}}}
+{{}}
+}} }
+}}
+EOI
+'}
+}}}
+{{}}
+}} }
+'
+}}
+<newline>
+EOO
+
+: whitespaces
+:
+$* <' }} ' >>EOO # Note: there are TABs.
+''
+}}
+<newline>
+EOO
+
+: comment
+:
+$* <'}} # comment' >>EOO
+''
+}}
+<newline>
+EOO
+
+: eos
+:
+$* <:'}}' >>EOO
+''
+}}
+EOO
+
+: missing
+: Note that we get eos right away (i.e., there is no word token).
+:
+$* <<EOI
+}
+}}}
+{{}}
+}} }
+}
+EOI
+
+: three
+:
+{
+ test.arguments = foreign=3
+
+ : basic
+ :
+ $* <<EOI >>EOO
+ echo foo
+ }}}
+ EOI
+ ' echo foo
+ '
+ }}}
+ <newline>
+ EOO
+}
diff --git a/libbuild2/lexer+normal.test.testscript b/libbuild2/lexer+normal.test.testscript
index c9448c3..e2780a2 100644
--- a/libbuild2/lexer+normal.test.testscript
+++ b/libbuild2/lexer+normal.test.testscript
@@ -34,3 +34,57 @@ $* <:'x?=y' >>EOO
?=
'y'
EOO
+
+: percent
+: Leading percent sign recognition.
+:
+{
+ : first
+ :
+ $* <:'%%' >>EOO
+ %
+ '%'
+ EOO
+
+ : space
+ :
+ $* <:' %%' >>EOO
+ %
+ '%'
+ EOO
+
+ : newline
+ :
+ $* <<EOI >>EOO
+
+ %%
+ EOI
+ %
+ '%'
+ <newline>
+ EOO
+
+ : non-token
+ :
+ $* <:'x%' >>EOO
+ 'x%'
+ EOO
+}
+
+: multi-lcbrace
+: Leading multi-curly-brace recognition.
+:
+{
+ : two
+ :
+ $* <:'{{' >>EOO
+ {{
+ EOO
+
+ : three
+ :
+ $* <:'{{{c++' >>EOO
+ {{{
+ 'c++'
+ EOO
+}
diff --git a/libbuild2/lexer+quoting.test.testscript b/libbuild2/lexer+quoting.test.testscript
index debefc1..0143c90 100644
--- a/libbuild2/lexer+quoting.test.testscript
+++ b/libbuild2/lexer+quoting.test.testscript
@@ -47,8 +47,10 @@ EOO
}
: part
+:
{
: quoted
+ :
{
: start
: Token start already quoted
diff --git a/libbuild2/lexer.cxx b/libbuild2/lexer.cxx
index c0cadd3..6d3504c 100644
--- a/libbuild2/lexer.cxx
+++ b/libbuild2/lexer.cxx
@@ -14,7 +14,10 @@ namespace build2
pair<pair<char, char>, bool> lexer::
peek_chars ()
{
- sep_ = skip_spaces ();
+ auto p (skip_spaces ());
+ assert (!p.second);
+ sep_ = p.first;
+
char r[2] = {'\0', '\0'};
xchar c0 (peek ());
@@ -34,7 +37,7 @@ namespace build2
}
void lexer::
- mode (lexer_mode m, char ps, optional<const char*> esc)
+ mode (lexer_mode m, char ps, optional<const char*> esc, uintptr_t data)
{
bool a (false); // attributes
@@ -54,7 +57,11 @@ namespace build2
switch (m)
{
case lexer_mode::normal:
+ case lexer_mode::cmdvar:
{
+ // Note: `%` is only recognized at the beginning of the line so it
+ // should not be included here.
+ //
a = true;
s1 = ":<>=+? $(){}#\t\n";
s2 = " == ";
@@ -121,10 +128,16 @@ namespace build2
n = false;
break;
}
+ case lexer_mode::foreign:
+ assert (data > 1);
+ // Fall through.
case lexer_mode::single_quoted:
case lexer_mode::double_quoted:
- s = false;
- // Fall through.
+ {
+ assert (ps == '\0');
+ s = false;
+ break;
+ }
case lexer_mode::variable:
{
// These are handled in an ad hoc way in word().
@@ -134,7 +147,7 @@ namespace build2
default: assert (false); // Unhandled custom mode.
}
- state_.push (state {m, a, ps, s, n, q, *esc, s1, s2});
+ state_.push (state {m, data, nullopt, a, ps, s, n, q, *esc, s1, s2});
}
token lexer::
@@ -148,6 +161,7 @@ namespace build2
switch (m)
{
case lexer_mode::normal:
+ case lexer_mode::cmdvar:
case lexer_mode::value:
case lexer_mode::values:
case lexer_mode::switch_expressions:
@@ -158,10 +172,13 @@ namespace build2
case lexer_mode::buildspec: break;
case lexer_mode::eval: return next_eval ();
case lexer_mode::double_quoted: return next_quoted ();
+ case lexer_mode::foreign: return next_foreign ();
default: assert (false); // Unhandled custom mode.
}
- bool sep (skip_spaces ());
+ pair<bool, bool> skip (skip_spaces ());
+ bool sep (skip.first); // Separated from a previous character.
+ bool first (skip.second); // First non-whitespace character of a line.
xchar c (get ());
uint64_t ln (c.line), cn (c.column);
@@ -209,7 +226,8 @@ namespace build2
m == lexer_mode::case_patterns)
state_.pop ();
- // Re-enable attributes in the normal mode.
+ // Re-enable attributes in the normal mode (should never be needed in
+ // cmdvar).
//
if (state_.top ().mode == lexer_mode::normal)
state_.top ().attributes = true;
@@ -230,6 +248,32 @@ namespace build2
}
}
+ // Line-leading tokens in the normal mode.
+ //
+ // Note: must come before any other (e.g., `{`) tests below.
+ //
+ if (m == lexer_mode::normal && first)
+ {
+ switch (c)
+ {
+ case '%': return make_token (type::percent);
+ case '{':
+ {
+ string v;
+ while (peek () == '{')
+ v += get ();
+
+ if (!v.empty ())
+ {
+ v += '{';
+ return make_token (type::multi_lcbrace, move (v));
+ }
+
+ break;
+ }
+ }
+ }
+
// The following characters are special in all modes except attributes.
//
if (m != lexer_mode::attributes && m != lexer_mode::attribute_value)
@@ -267,6 +311,7 @@ namespace build2
// switch_expressions modes.
//
if (m == lexer_mode::normal ||
+ m == lexer_mode::cmdvar ||
m == lexer_mode::switch_expressions ||
m == lexer_mode::case_patterns)
{
@@ -278,7 +323,8 @@ namespace build2
// The following characters are special in the normal mode.
//
- if (m == lexer_mode::normal)
+ if (m == lexer_mode::normal ||
+ m == lexer_mode::cmdvar)
{
switch (c)
{
@@ -315,7 +361,8 @@ namespace build2
// The following characters are special in the normal mode.
//
- if (m == lexer_mode::normal)
+ if (m == lexer_mode::normal ||
+ m == lexer_mode::cmdvar)
{
switch (c)
{
@@ -361,7 +408,7 @@ namespace build2
// This mode is quite a bit like the value mode when it comes to special
// characters, except that we have some of our own.
- bool sep (skip_spaces ());
+ bool sep (skip_spaces ().first);
xchar c (get ());
if (eos (c))
@@ -485,6 +532,99 @@ namespace build2
}
token lexer::
+ next_foreign ()
+ {
+ state& st (state_.top ());
+
+ if (st.hold)
+ {
+ token r (move (*st.hold));
+ state_.pop (); // Expire foreign mode.
+ return r;
+ }
+
+ auto count (state_.top ().data); // Number of closing braces to expect.
+
+ xchar c (get ()); // First character of first line after `{{...`.
+ uint64_t ln (c.line), cn (c.column);
+
+ string lexeme;
+ for (bool first (true); !eos (c); c = get ())
+ {
+ // If this is the first character of a line, recognize closing braces.
+ //
+ if (first)
+ {
+ first = false;
+
+ // If this turns not to be the closing braces, we need to add any
+ // characters we have extracted to lexeme. Instead of saving these
+ // characters in a temporary we speculatively add them to the lexeme
+ // but then chop them off if this turned out to be the closing braces.
+ //
+ size_t chop (lexeme.size ());
+
+ // Skip leading whitespaces, if any.
+ //
+ for (; c == ' ' || c == '\t'; c = get ())
+ lexeme += c;
+
+ uint64_t bln (c.line), bcn (c.column); // Position of first `}`.
+
+ // Count braces.
+ //
+ auto i (count);
+ for (; c == '}'; c = get ())
+ {
+ lexeme += c;
+
+ if (--i == 0)
+ break;
+ }
+
+ if (i == 0) // Got enough braces.
+ {
+ // Make sure there are only whitespaces/comments after. Note that
+ // now we must start peeking since newline is not "ours".
+ //
+ for (c = peek (); c == ' ' || c == '\t'; c = peek ())
+ lexeme += get ();
+
+ if (c == '\n' || c == '#' || eos (c))
+ {
+ st.hold = token (type::multi_rcbrace,
+ string (count, '}'),
+ false, quote_type::unquoted, false,
+ bln, bcn,
+ token_printer);
+
+ lexeme.resize (chop);
+ return token (move (lexeme),
+ false, quote_type::unquoted, false,
+ ln, cn);
+ }
+
+ get (); // And fall through (not eos).
+ }
+ else
+ {
+ if (eos (c))
+ break;
+
+ // Fall through.
+ }
+ }
+
+ if (c == '\n')
+ first = true;
+
+ lexeme += c;
+ }
+
+ return token (type::eos, false, c.line, c.column, token_printer);
+ }
+
+ token lexer::
word (state st, bool sep)
{
lexer_mode m (st.mode);
@@ -728,7 +868,7 @@ namespace build2
return token (move (lexeme), sep, qtype, qcomp, ln, cn);
}
- bool lexer::
+ pair<bool, bool> lexer::
skip_spaces ()
{
bool r (sep_);
@@ -739,7 +879,7 @@ namespace build2
// In some special modes we don't skip spaces.
//
if (!s.sep_space)
- return r;
+ return make_pair (r, false);
xchar c (peek ());
bool start (c.column == 1);
@@ -758,6 +898,8 @@ namespace build2
{
// In some modes we treat newlines as ordinary spaces.
//
+ // Note that in this case we don't adjust start.
+ //
if (!s.sep_newline)
{
r = true;
@@ -772,7 +914,7 @@ namespace build2
break;
}
- return r;
+ return make_pair (r, start);
}
case '#':
{
@@ -833,12 +975,12 @@ namespace build2
}
// Fall through.
default:
- return r; // Not a space.
+ return make_pair (r, start); // Not a space.
}
get ();
}
- return r;
+ return make_pair (r, start);
}
}
diff --git a/libbuild2/lexer.hxx b/libbuild2/lexer.hxx
index 02112cb..749668e 100644
--- a/libbuild2/lexer.hxx
+++ b/libbuild2/lexer.hxx
@@ -20,17 +20,18 @@
namespace build2
{
- // Context-dependent lexing mode. Quoted modes are internal and should not
- // be set explicitly. In the value mode we don't treat certain characters
- // (e.g., `+`, `=`) as special so that we can use them in the variable
- // values, e.g., `foo = g++`. In contrast, in the variable mode, we restrict
- // certain character (e.g., `/`) from appearing in the name. The values mode
- // is like value but recogizes `,` as special (used in contexts where we
- // need to list multiple values). The attributes/attribute_value modes are
- // like values where each value is potentially a variable assignment; they
- // don't treat `{` and `}` as special (so we cannot have name groups in
- // attributes) as well as recognizes `=` and `]`. The eval mode is used in
- // the evaluation context.
+ // Context-dependent lexing mode.
+ //
+ // Quoted modes are internal and should not be set explicitly. In the value
+ // mode we don't treat certain characters (e.g., `+`, `=`) as special so
+ // that we can use them in the variable values, e.g., `foo = g++`. In
+ // contrast, in the variable mode, we restrict certain character (e.g., `/`)
+ // from appearing in the name. The values mode is like value but recogizes
+ // `,` as special (used in contexts where we need to list multiple
+ // values). The attributes/attribute_value modes are like values where each
+ // value is potentially a variable assignment; they don't treat `{` and `}`
+ // as special (so we cannot have name groups in attributes) as well as
+ // recognizes `=` and `]`. The eval mode is used in the evaluation context.
//
// A number of modes are "derived" from the value/values mode by recognizing
// a few extra characters:
@@ -42,10 +43,22 @@ namespace build2
// split words separated by the pair character (to disable pairs one can
// pass `\0` as a pair character).
//
+ // The normal mode recognizes `%` and `{{...` at the beginning of the line
+ // as special. The cmdvar mode is like normal but does not treat these
+ // character sequences as special.
+ //
+ // Finally, the foreign mode reads everything until encountering a line that
+ // contains nothing (besides whitespaces) other than the closing multi-
+ // curly-brace (`}}...`) (or eos) returning the contents as the word token
+ // followed by the multi_rcbrace (or eos). In a way it is similar to the
+ // single-quote mode. The number of closing braces to expect is passed as
+ // mode data.
+ //
// The alternative modes must be set manually. The value/values and derived
// modes automatically expires after the end of the line. The attribute mode
// expires after the closing `]`. The variable mode expires after the word
- // token. And the eval mode expires after the closing `)`.
+ // token. The eval mode expires after the closing `)`. And the foreign mode
+ // expires after the closing braces.
//
// Note that normally it is only safe to switch mode when the current token
// is not quoted (or, more generally, when you are not in the double-quoted
@@ -70,6 +83,7 @@ namespace build2
enum
{
normal = base_type::value_next,
+ cmdvar,
variable,
value,
values,
@@ -80,6 +94,7 @@ namespace build2
eval,
single_quoted,
double_quoted,
+ foreign,
buildspec,
value_next
@@ -91,7 +106,7 @@ namespace build2
};
class LIBBUILD2_SYMEXPORT lexer:
- public butl::char_scanner<butl::utf8_validator>
+ public butl::char_scanner<butl::utf8_validator, 2>
{
public:
// If escape is not NULL then only escape sequences with characters from
@@ -116,7 +131,8 @@ namespace build2
virtual void
mode (lexer_mode,
char pair_separator = '\0',
- optional<const char*> escapes = nullopt);
+ optional<const char*> escapes = nullopt,
+ uintptr_t data = 0);
// Enable attributes recognition for the next token.
//
@@ -157,7 +173,10 @@ namespace build2
protected:
struct state
{
- lexer_mode mode;
+ lexer_mode mode;
+ uintptr_t data;
+ optional<token> hold;
+
bool attributes;
char sep_pair;
@@ -183,17 +202,22 @@ namespace build2
token
next_quoted ();
+ token
+ next_foreign ();
+
// Lex a word assuming current is the top state (which may already have
// been "expired" from the top).
//
virtual token
word (state current, bool separated);
- // Return true if we have seen any spaces. Skipped empty lines
- // don't count. In other words, we are only interested in spaces
- // that are on the same line as the following non-space character.
+ // Return true in first if we have seen any spaces. Skipped empty lines
+ // don't count. In other words, we are only interested in spaces that are
+ // on the same line as the following non-space character. Return true in
+ // second if we have started skipping spaces from column 1 (note that
+ // if this mode does not skip spaces, then second will always be false).
//
- bool
+ pair<bool, bool>
skip_spaces ();
// Diagnostics.
@@ -232,7 +256,7 @@ namespace build2
namespace butl // ADL
{
inline build2::location
- get_location (const butl::char_scanner<butl::utf8_validator>::xchar& c,
+ get_location (const butl::char_scanner<butl::utf8_validator, 2>::xchar& c,
const void* data)
{
using namespace build2;
diff --git a/libbuild2/lexer.test.cxx b/libbuild2/lexer.test.cxx
index 5e39e43..24f0528 100644
--- a/libbuild2/lexer.test.cxx
+++ b/libbuild2/lexer.test.cxx
@@ -1,6 +1,7 @@
// file : libbuild2/lexer.test.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
+#include <cstdlib> // strtoul()
#include <cassert>
#include <iostream>
@@ -14,13 +15,15 @@ using namespace std;
namespace build2
{
- // Usage: argv[0] [-q] [<lexer-mode>]
+ // Usage: argv[0] [-q] [<lexer-mode>[=<data>]]
//
int
main (int argc, char* argv[])
{
bool quote (false);
+
lexer_mode m (lexer_mode::normal);
+ uintptr_t d (0);
for (int i (1); i != argc; ++i)
{
@@ -36,7 +39,12 @@ namespace build2
else if (a == "attributes") m = lexer_mode::attributes;
else if (a == "eval") m = lexer_mode::eval;
else if (a == "buildspec") m = lexer_mode::buildspec;
- else assert (false);
+ else if (a.compare (0, 8, "foreign=") == 0)
+ {
+ m = lexer_mode::foreign;
+ d = strtoul (a.c_str () + 8, nullptr, 10);
+ }
+ else assert (false);
break;
}
}
@@ -51,7 +59,7 @@ namespace build2
lexer l (cin, in);
if (m != lexer_mode::normal)
- l.mode (m);
+ l.mode (m, '\0', nullopt, d);
// No use printing eos since we will either get it or loop forever.
//
@@ -62,7 +70,7 @@ namespace build2
// Print each token on a separate line without quoting operators.
//
- t.printer (cout, t, false);
+ t.printer (cout, t, print_mode::normal);
if (quote)
{
diff --git a/libbuild2/module.cxx b/libbuild2/module.cxx
index 3abb102..cfedefe 100644
--- a/libbuild2/module.cxx
+++ b/libbuild2/module.cxx
@@ -63,6 +63,162 @@ namespace build2
mod);
}
+ // Note: also used by ad hoc recipes thus not static.
+ //
+ void
+ create_module_context (context& ctx, const location& loc)
+ {
+ assert (ctx.module_context == nullptr);
+ assert (*ctx.module_context_storage == nullptr);
+
+ // Since we are using the same scheduler, it makes sense to reuse the
+ // same global mutexes. Also disable nested module context for good
+ // measure.
+ //
+ ctx.module_context_storage->reset (
+ new context (ctx.sched,
+ ctx.mutexes,
+ false, /* match_only */
+ false, /* dry_run */
+ ctx.keep_going,
+ ctx.global_var_overrides, /* cmd_vars */
+ nullopt)); /* module_context */
+
+ // We use the same context for building any nested modules that might be
+ // required while building modules.
+ //
+ ctx.module_context = ctx.module_context_storage->get ();
+ ctx.module_context->module_context = ctx.module_context;
+
+ // Setup the context to perform update. In a sense we have a long-running
+ // perform meta-operation batch (indefinite, in fact, since we never call
+ // the meta-operation's *_post() callbacks) in which we periodically
+ // execute update operations.
+ //
+ // Note that we perform each build in a separate update operation. Failed
+ // that, if the same target is update twice (which may happen with ad hoc
+ // recipes) we will see the old state.
+ //
+ if (mo_perform.meta_operation_pre != nullptr)
+ mo_perform.meta_operation_pre ({} /* parameters */, loc);
+
+ ctx.module_context->current_meta_operation (mo_perform);
+
+ if (mo_perform.operation_pre != nullptr)
+ mo_perform.operation_pre ({} /* parameters */, update_id);
+ }
+
+ // Note: also used by ad hoc recipes thus not static.
+ //
+ const target&
+ update_in_module_context (context& ctx, const scope& rs, names tgt,
+ const location& loc, const path& bf)
+ {
+ // New update operation.
+ //
+ ctx.module_context->current_operation (op_update);
+
+ // Un-tune the scheduler.
+ //
+ // Note that we can only do this if we are running serially because
+ // otherwise we cannot guarantee the scheduler is idle (we could have
+ // waiting threads from the outer context). This is fine for now since the
+ // only two tuning level we use are serial and full concurrency (turns out
+ // currently we don't really need this: we will always be called during
+ // load or match phases and we always do parallel match; but let's keep it
+ // in case things change).
+ //
+ auto sched_tune (ctx.sched.serial ()
+ ? scheduler::tune_guard (ctx.sched, 0)
+ : scheduler::tune_guard ());
+
+ // Remap verbosity level 0 to 1 unless we were requested to be silent.
+ // Failed that, we may have long periods of seemingly nothing happening
+ // while we quietly update the module, which may look like things have
+ // hung up.
+ //
+ // @@ CTX: modifying global verbosity level won't work if we have multiple
+ // top-level contexts running in parallel.
+ //
+ auto verbg = make_guard (
+ [z = !silent && verb == 0 ? (verb = 1, true) : false] ()
+ {
+ if (z)
+ verb = 0;
+ });
+
+ // Note that for now we suppress progress since it would clash with the
+ // progress of what we are already doing (maybe in the future we can do
+ // save/restore but then we would need some sort of diagnostics that we
+ // have switched to another task).
+ //
+ action a (perform_update_id);
+ action_targets tgs;
+
+ mo_perform.search ({}, /* parameters */
+ rs, /* root scope */
+ rs, /* base scope */
+ bf, /* buildfile */
+ rs.find_target_key (tgt, loc),
+ loc,
+ tgs);
+
+ mo_perform.match ({}, /* parameters */
+ a,
+ tgs,
+ 1, /* diag (failures only) */
+ false /* progress */);
+
+ mo_perform.execute ({}, /* parameters */
+ a,
+ tgs,
+ 1, /* diag (failures only) */
+ false /* progress */);
+
+ assert (tgs.size () == 1);
+ return tgs[0].as<target> ();
+ }
+
+ // Note: also used by ad hoc recipes thus not static.
+ //
+ pair<void* /* handle */, void* /* symbol */>
+ load_module_library (const path& lib, const string& sym, string& err)
+ {
+ // Note that we don't unload our modules since it's not clear what would
+ // the benefit be.
+ //
+ void* h (nullptr);
+ void* s (nullptr);
+
+#ifndef _WIN32
+ // Use RTLD_NOW instead of RTLD_LAZY to both speed things up (we are going
+ // to use this module now) and to detect any symbol mismatches.
+ //
+ if ((h = dlopen (lib.string ().c_str (), RTLD_NOW | RTLD_GLOBAL)))
+ {
+ s = dlsym (h, sym.c_str ());
+
+ if (s == nullptr)
+ err = dlerror ();
+ }
+ else
+ err = dlerror ();
+#else
+ if (HMODULE m = LoadLibrary (lib.string ().c_str ()))
+ {
+ h = static_cast<void*> (m);
+ s = function_cast<void*> (GetProcAddress (m, sym.c_str ()));
+
+ if (s == nullptr)
+ err = win32::last_error_msg ();
+ }
+ else
+ err = win32::last_error_msg ();
+#endif
+
+ return make_pair (h, s);
+ }
+
static module_load_function*
import_module (scope& bs,
const string& mod,
@@ -180,43 +336,9 @@ namespace build2
{
if (!ctx.module_context_storage)
fail (loc) << "unable to update build system module " << mod <<
- info << "updating of build system modules is disabled";
-
- assert (*ctx.module_context_storage == nullptr);
-
- // Since we are using the same scheduler, it makes sense to reuse the
- // same global mutexes. Also disable nested module context for good
- // measure.
- //
- ctx.module_context_storage->reset (
- new context (ctx.sched,
- ctx.mutexes,
- false, /* match_only */
- false, /* dry_run */
- ctx.keep_going,
- ctx.global_var_overrides, /* cmd_vars */
- nullopt)); /* module_context */
-
- // We use the same context for building any nested modules that
- // might be required while building modules.
- //
- ctx.module_context = ctx.module_context_storage->get ();
- ctx.module_context->module_context = ctx.module_context;
-
- // Setup the context to perform update. In a sense we have a long-
- // running perform meta-operation batch (indefinite, in fact, since we
- // never call the meta-operation's *_post() callbacks) in which we
- // periodically execute the update operation.
- //
- if (mo_perform.meta_operation_pre != nullptr)
- mo_perform.meta_operation_pre ({} /* parameters */, loc);
+ info << "building of build system modules is disabled";
- ctx.module_context->current_meta_operation (mo_perform);
-
- if (mo_perform.operation_pre != nullptr)
- mo_perform.operation_pre ({} /* parameters */, update_id);
-
- ctx.module_context->current_operation (op_update);
+ create_module_context (ctx, loc);
}
// Inherit loaded_modules lock from the outer context.
@@ -234,24 +356,20 @@ namespace build2
l5 ([&]{trace << "loaded " << lr.first;});
- // When happens next depends on whether this is a top-level or nested
+ // What happens next depends on whether this is a top-level or nested
// module update.
//
if (nested)
{
// This could be initial or exclusive load.
//
- // @@ TODO
+ // @@ TODO: see the ad hoc recipe case as a reference.
//
fail (loc) << "nested build system module updates not yet supported";
}
else
{
- const scope& rs (lr.second);
-
- action_targets tgs;
- action a (perform_id, update_id);
-
+ const target* l;
{
// Cutoff the existing diagnostics stack and push our own entry.
//
@@ -263,68 +381,15 @@ namespace build2
dr << info (loc) << "while loading build system module " << mod;
});
- // Un-tune the scheduler.
- //
- // Note that we can only do this if we are running serially because
- // otherwise we cannot guarantee the scheduler is idle (we could
- // have waiting threads from the outer context). This is fine for
- // now since the only two tuning level we use are serial and full
- // concurrency (turns out currently we don't really need this: we
- // will always be called during load or match phases and we always
- // do parallel match; but let's keep it in case things change).
- //
- auto sched_tune (ctx.sched.serial ()
- ? scheduler::tune_guard (ctx.sched, 0)
- : scheduler::tune_guard ());
-
- // Remap verbosity level 0 to 1 unless we were requested to be
- // silent. Failed that, we may have long periods of seemingly
- // nothing happening while we quietly update the module, which
- // may look like things have hung up.
- //
- // @@ CTX: modifying global verbosity level won't work if we have
- // multiple top-level contexts running in parallel.
- //
- auto verbg = make_guard (
- [z = !silent && verb == 0 ? (verb = 1, true) : false] ()
- {
- if (z)
- verb = 0;
- });
-
- // Note that for now we suppress progress since it would clash with
- // the progress of what we are already doing (maybe in the future we
- // can do save/restore but then we would need some sort of
- // diagnostics that we have switched to another task).
- //
- mo_perform.search ({}, /* parameters */
- rs, /* root scope */
- rs, /* base scope */
- path (), /* buildfile */
- rs.find_target_key (lr.first, loc),
- loc,
- tgs);
-
- mo_perform.match ({}, /* parameters */
- a,
- tgs,
- 1, /* diag (failures only) */
- false /* progress */);
-
- mo_perform.execute ({}, /* parameters */
- a,
- tgs,
- 1, /* diag (failures only) */
- false /* progress */);
+ l = &update_in_module_context (
+ ctx, lr.second, move (lr.first),
+ loc, path ());
}
- assert (tgs.size () == 1);
- const target& l (tgs[0].as<target> ());
-
- if (!l.is_a ("libs"))
+ if (!l->is_a ("libs"))
fail (loc) << "wrong export from build system module " << mod;
- lib = l.as<file> ().path ();
+ lib = l->as<file> ().path ();
l5 ([&]{trace << "updated " << lib;});
}
@@ -364,53 +429,30 @@ namespace build2
//
string sym (sanitize_identifier ("build2_" + mod + "_load"));
- // Note that we don't unload our modules since it's not clear what would
- // the benefit be.
- //
- diag_record dr;
+ string err;
+ pair<void*, void*> hs (load_module_library (lib, sym, err));
-#ifndef _WIN32
- // Use RTLD_NOW instead of RTLD_LAZY to both speed things up (we are going
- // to use this module now) and to detect any symbol mismatches.
- //
- if (void* h = dlopen (lib.string ().c_str (), RTLD_NOW | RTLD_GLOBAL))
+ if (hs.first != nullptr)
{
- r = function_cast<module_load_function*> (dlsym (h, sym.c_str ()));
-
// I don't think we should ignore this even if the module is optional.
//
- if (r == nullptr)
+ if (hs.second == nullptr)
fail (loc) << "unable to lookup " << sym << " in build system module "
- << mod << " (" << lib << "): " << dlerror ();
+ << mod << " (" << lib << "): " << err;
+
+ r = function_cast<module_load_function*> (hs.second);
}
else if (!opt)
- dr << fail (loc) << "unable to load build system module " << mod
- << " (" << lib << "): " << dlerror ();
- else
- l5 ([&]{trace << "unable to load " << lib << ": " << dlerror ();});
-#else
- if (HMODULE h = LoadLibrary (lib.string ().c_str ()))
{
- r = function_cast<module_load_function*> (
- GetProcAddress (h, sym.c_str ()));
-
- if (r == nullptr)
- fail (loc) << "unable to lookup " << sym << " in build system module "
- << mod << " (" << lib << "): " << win32::last_error_msg ();
+ // Add import suggestion similar to import phase 2.
+ //
+ fail (loc) << "unable to load build system module " << mod << " ("
+ << lib << "): " << err <<
+ info << "use config.import." << proj.variable () << " command "
+ << "line variable to specify its project out_root";
}
- else if (!opt)
- dr << fail (loc) << "unable to load build system module " << mod
- << " (" << lib << "): " << win32::last_error_msg ();
else
- l5 ([&]{trace << "unable to load " << lib << ": "
- << win32::last_error_msg ();});
-#endif
-
- // Add a suggestion similar to import phase 2.
- //
- if (!dr.empty ())
- dr << info << "use config.import." << proj.variable () << " command "
- << "line variable to specify its project out_root" << endf;
+ l5 ([&]{trace << "unable to load " << lib << ": " << err;});
#endif // BUILD2_BOOTSTRAP
diff --git a/libbuild2/name.hxx b/libbuild2/name.hxx
index d0e8d85..39d2396 100644
--- a/libbuild2/name.hxx
+++ b/libbuild2/name.hxx
@@ -113,6 +113,18 @@ namespace build2
LIBBUILD2_SYMEXPORT string
to_string (const name&);
+ template <typename T>
+ inline void
+ to_checksum (T& cs, const name& n)
+ {
+ if (n.proj)
+ cs.append (n.proj->string ());
+ cs.append (n.dir.string ());
+ cs.append (n.type);
+ cs.append (n.value);
+ cs.append (n.pair);
+ }
+
// Store a string in a name in a reversible way. If the string ends with a
// trailing directory separator then it is stored as a directory, otherwise
// as a simple name.
diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx
index e87ca95..94f597d 100644
--- a/libbuild2/parser.cxx
+++ b/libbuild2/parser.cxx
@@ -9,6 +9,7 @@
#include <libbutl/filesystem.mxx> // path_search
#include <libbutl/path-pattern.mxx>
+#include <libbuild2/rule.hxx>
#include <libbuild2/dump.hxx>
#include <libbuild2/scope.hxx>
#include <libbuild2/module.hxx>
@@ -19,6 +20,9 @@
#include <libbuild2/diagnostics.hxx>
#include <libbuild2/prerequisite.hxx>
+#include <libbuild2/build/script/parser.hxx>
+#include <libbuild2/build/script/script.hxx>
+
#include <libbuild2/config/utility.hxx> // lookup_config
using namespace std;
@@ -28,6 +32,21 @@ namespace build2
{
using type = token_type;
+ ostream&
+ operator<< (ostream& o, const parser::attribute& a)
+ {
+ o << a.name;
+
+ if (!a.value.null)
+ {
+ o << '=';
+ names storage;
+ to_stream (o, reverse (a.value, storage), true /* quote */, '@');
+ }
+
+ return o;
+ }
+
class parser::enter_scope
{
public:
@@ -335,6 +354,11 @@ namespace build2
while (tt != type::eos && !(one && parsed))
{
+ // Issue better diagnostics for stray `%`.
+ //
+ if (tt == type::percent)
+ fail (t) << "recipe without target";
+
// Extract attributes if any.
//
assert (attributes_.empty ());
@@ -501,7 +525,8 @@ namespace build2
// exactly that would mean is unclear. One potentially useful
// semantics would be the ability to specify attributes for ad hoc
// members though the fact that the primary target is listed first
- // would make it rather unintuitive.
+ // would make it rather unintuitive. Maybe attributes that change
+ // the group semantics itself?
//
next_with_attributes (t, tt);
@@ -593,9 +618,11 @@ namespace build2
//
// void (token& t, type& tt, const target_type* type, string pat)
//
- auto for_each = [this, &trace,
- &t, &tt,
- &ns, &nloc, &ans] (auto&& f)
+ // Note that the target and its ad hoc members are inserted implied
+ // but this flag can be cleared and default_target logic applied if
+ // appropriate.
+ //
+ auto for_each = [this, &trace, &t, &tt, &ns, &nloc, &ans] (auto&& f)
{
// Note: watch out for an out-qualified single target (two names).
//
@@ -674,30 +701,68 @@ namespace build2
if (tt == type::newline)
{
- // See if this is a target block.
+ // See if this is a target-specific variable and/or recipe block(s).
//
// Note that we cannot just let parse_dependency() handle this case
// because we can have (a mixture of) target type/patterns.
//
- if (next (t, tt) == type::lcbrace && peek () == type::newline)
+ // @@ This might change once we support ad hoc rules (where we may
+ // have prerequisites for a pattern; but perhaps this should be
+ // handled separately since the parse_dependency() is already too
+ // complex and there will be no chains in this case).
+ //
+ next (t, tt);
+ if (tt == type::percent ||
+ tt == type::multi_lcbrace ||
+ (tt == type::lcbrace && peek () == type::newline))
{
- next (t, tt); // Newline.
-
- // Parse the block for each target.
+ // Parse the block(s) for each target.
+ //
+ // Note that because we have to peek past the closing brace(s) to
+ // see whether there is a/another recipe block, we have to make
+ // that token part of the replay (we cannot peek past the replay
+ // sequence).
//
- for_each ([this] (token& t, type& tt,
- const target_type* type, string pat)
- {
- next (t, tt); // First token inside the block.
+ // Note: similar code to the version in parse_dependency().
+ //
+ auto parse = [
+ this,
+ st = token (t), // Save start token (will be gone on replay).
+ recipes = small_vector<shared_ptr<adhoc_rule>, 1> ()]
+ (token& t, type& tt,
+ const target_type* type, string pat) mutable
+ {
+ token rt; // Recipe start token.
- parse_variable_block (t, tt, type, move (pat));
+ // The variable block, if any, should be first.
+ //
+ if (st.type == type::lcbrace)
+ {
+ next (t, tt); // Newline.
+ next (t, tt); // First token inside the variable block.
+ parse_variable_block (t, tt, type, move (pat));
- if (tt != type::rcbrace)
- fail (t) << "expected '}' instead of " << t;
- });
+ if (tt != type::rcbrace)
+ fail (t) << "expected '}' instead of " << t;
- next (t, tt); // Presumably newline after '}'.
- next_after_newline (t, tt, '}'); // Should be on its own line.
+ next (t, tt); // Newline.
+ next_after_newline (t, tt, '}'); // Should be on its own line.
+
+ if (tt != type::percent && tt != type::multi_lcbrace)
+ return;
+
+ rt = t;
+ }
+ else
+ rt = st;
+
+ if (type != nullptr)
+ fail (rt) << "recipe in target type/pattern";
+
+ parse_recipe (t, tt, rt, recipes);
+ };
+
+ for_each (parse);
}
else
{
@@ -717,7 +782,7 @@ namespace build2
// Target-specific variable assignment or dependency declaration,
// including a dependency chain and/or prerequisite-specific variable
- // assignment.
+ // assignment and/or recipe block(s).
//
auto at (attributes_push (t, tt));
@@ -731,6 +796,10 @@ namespace build2
// Target-specific variable assignment.
//
+ // Note that neither here nor in parse_dependency() below we allow
+ // specifying recipes following a target-specified variable assignment
+ // (but we do allow them following a target-specific variable block).
+ //
if (tt == type::assign || tt == type::prepend || tt == type::append)
{
type akind (tt);
@@ -762,7 +831,8 @@ namespace build2
next_after_newline (t, tt);
}
// Dependency declaration potentially followed by a chain and/or a
- // prerequisite-specific variable assignment/block.
+ // target/prerequisite-specific variable assignment/block and/or
+ // recipe block(s).
//
else
{
@@ -954,6 +1024,181 @@ namespace build2
}
void parser::
+ parse_recipe (token& t, type& tt,
+ const token& start,
+ small_vector<shared_ptr<adhoc_rule>, 1>& recipes)
+ {
+ // Parse a recipe chain.
+ //
+ // % [<attrs>]
+ // {{ [<lang>]
+ // ...
+ // }}
+ //
+ // enter: start is percent or openining multi-curly-brace
+ // leave: token past newline after last closing multi-curly-brace
+
+ if (stage_ == stage::boot)
+ fail (t) << "ad hoc recipe specified during bootstrap";
+
+ // If we have a recipe, the target is not implied.
+ //
+ if (target_->implied)
+ {
+ for (target* m (target_); m != nullptr; m = m->adhoc_member)
+ m->implied = false;
+
+ if (default_target_ == nullptr)
+ default_target_ = target_;
+ }
+
+ bool first (recipes.empty ()); // First target.
+ bool clean (false); // Seen a recipe that requires cleanup.
+
+ token st (start);
+ for (size_t i (0);; st = t, ++i)
+ {
+ optional<string> diag;
+
+ if (st.type == type::percent)
+ {
+ next_with_attributes (t, tt);
+ attributes_push (t, tt, true /* standalone */);
+
+ // Get variable (or value) attributes, if any, and deal with the
+ // special metadata attribute. Since currently it can only appear in
+ // the import directive, we handle it in an ad hoc manner.
+ //
+ attributes& as (attributes_top ());
+ for (attribute& a: as)
+ {
+ const string& n (a.name);
+
+ // @@ TODO: diag is script-specific, pass as attributes to rule?
+ //
+ if (n == "diag")
+ {
+ try
+ {
+ diag = convert<string> (move (a.value));
+ }
+ catch (const invalid_argument& e)
+ {
+ fail (as.loc) << "invalid " << n << " attribute value: " << e;
+ }
+ }
+ else
+ fail (as.loc) << "unknown recipe attribute " << a;
+ }
+
+ attributes_pop ();
+
+ next_after_newline (t, tt, '%');
+
+ if (tt != type::multi_lcbrace)
+ fail (t) << "expected recipe block instead of " << t;
+
+ st = t; // And fall through.
+ }
+
+ optional<string> lang;
+ location lloc;
+ if (next (t, tt) == type::newline)
+ ;
+ else if (tt == type::word)
+ {
+ lang = t.value;
+ lloc = get_location (t);
+ next (t, tt); // Newline after <lang>.
+ }
+ else
+ fail (t) << "expected recipe language instead of " << t;
+
+ mode (lexer_mode::foreign, '\0', st.value.size ());
+ next_after_newline (t, tt, st); // Should be on its own line.
+
+ if (tt != type::word)
+ fail (t) << "unterminated recipe block" <<
+ info (st) << "recipe block starts here" << endf;
+
+ shared_ptr<adhoc_rule> ar;
+ if (first)
+ {
+ // Note that this is always the location of the opening multi-curly-
+ // brace, whether we have the header or not. This is relied upon by
+ // the rule implementations (e.g., to calculate the first line of the
+ // recipe code).
+ //
+ location loc (get_location (st));
+
+ if (!lang)
+ {
+ auto* asr (new adhoc_script_rule (move (diag), loc, st.value.size ()));
+ ar.reset (asr);
+
+ asr->checksum = sha256 (t.value).string ();
+
+ istringstream is (move (t.value));
+ build::script::parser p (ctx);
+ asr->script = p.pre_parse (is, asr->loc.file, loc.line + 1);
+ }
+ else if (*lang == "c++")
+ {
+ ar.reset (new adhoc_cxx_rule (move (t.value), loc, st.value.size ()));
+ clean = true;
+ }
+ else
+ fail (lloc) << "unknown recipe language '" << *lang << "'";
+
+ recipes.push_back (ar);
+ }
+ else
+ ar = recipes[i];
+
+ target_->adhoc_recipes.push_back (
+ adhoc_recipe {perform_update_id, move (ar)});
+
+ next (t, tt);
+ assert (tt == type::multi_rcbrace);
+
+ next (t, tt); // Newline.
+ next_after_newline (t, tt, token (t)); // Should be on its own line.
+
+ if (tt != type::percent && tt != type::multi_lcbrace)
+ break;
+ }
+
+ // If we have a recipe that needs cleanup, register an operation callback
+ // for this project unless it has already been done.
+ //
+ if (clean)
+ {
+ action a (perform_clean_id);
+ auto f (&adhoc_rule::clean_recipes_build);
+
+ // First check if we have already done this.
+ //
+ auto p (root_->operation_callbacks.equal_range (a));
+ for (; p.first != p.second; ++p.first)
+ {
+ auto t (
+ p.first->second.pre.target<scope::operation_callback::callback*> ());
+
+ if (t != nullptr && *t == f)
+ break;
+ }
+
+ // It feels natural to clean up recipe builds as a post operation but
+ // that prevents the (otherwise-empty) out root directory to be cleaned
+ // up (via the standard fsdir{} chain).
+ //
+ if (p.first == p.second)
+ root_->operation_callbacks.emplace (
+ a, scope::operation_callback {f, nullptr /*post*/});
+ }
+ }
+
+ void parser::
enter_adhoc_members (adhoc_names_loc&& ans, bool implied)
{
tracer trace ("parser::enter_adhoc_members", &path_);
@@ -1083,9 +1328,9 @@ namespace build2
bool chain)
{
// Parse a dependency chain and/or a target/prerequisite-specific variable
- // assignment/block. Return true if the following block (if any) has been
- // "claimed" (the block "belongs" to targets/prerequisites before the last
- // colon).
+ // assignment/block and/or recipe block(s). Return true if the following
+ // block(s) (if any) have been "claimed", meaning they "belong" to
+ // targets/prerequisites before the last colon.
//
// enter: colon (anything else is not handled)
// leave: - first token on the next line if returning true
@@ -1163,7 +1408,8 @@ namespace build2
// each target (for_each_p).
//
// We handle multiple targets and/or prerequisites by replaying the tokens
- // (see the target-specific case for details). The function signature is:
+ // (see the target-specific case comments for details). The function
+ // signature is:
//
// void (token& t, type& tt)
//
@@ -1209,9 +1455,9 @@ namespace build2
};
// Do we have a dependency chain and/or prerequisite-specific variable
- // assignment? If not, check for the target-specific variable block unless
- // this is a chained call (in which case the block, if any, "belongs" to
- // prerequisites).
+ // assignment? If not, check for the target-specific variable block and/or
+ // recipe block(s) unless this is a chained call (in which case the block,
+ // if any, "belongs" to prerequisites).
//
if (tt != type::colon)
{
@@ -1220,24 +1466,48 @@ namespace build2
next_after_newline (t, tt); // Must be a newline then.
- if (tt == type::lcbrace && peek () == type::newline)
+ if (tt == type::percent ||
+ tt == type::multi_lcbrace ||
+ (tt == type::lcbrace && peek () == type::newline))
{
- next (t, tt); // Newline.
-
- // Parse the block for each target.
+ // Parse the block(s) for each target.
//
- for_each_t ([this] (token& t, token_type& tt)
- {
- next (t, tt); // First token inside the block.
+ // Note: similar code to the version in parse_clause().
+ //
+ auto parse = [
+ this,
+ st = token (t), // Save start token (will be gone on replay).
+ recipes = small_vector<shared_ptr<adhoc_rule>, 1> ()]
+ (token& t, type& tt) mutable
+ {
+ token rt; // Recipe start token.
- parse_variable_block (t, tt);
+ // The variable block, if any, should be first.
+ //
+ if (st.type == type::lcbrace)
+ {
+ next (t, tt); // Newline.
+ next (t, tt); // First token inside the variable block.
+ parse_variable_block (t, tt);
- if (tt != type::rcbrace)
- fail (t) << "expected '}' instead of " << t;
- });
+ if (tt != type::rcbrace)
+ fail (t) << "expected '}' instead of " << t;
- next (t, tt); // Presumably newline after '}'.
- next_after_newline (t, tt, '}'); // Should be on its own line.
+ next (t, tt); // Newline.
+ next_after_newline (t, tt, '}'); // Should be on its own line.
+
+ if (tt != type::percent && tt != type::multi_lcbrace)
+ return;
+
+ rt = t;
+ }
+ else
+ rt = st;
+
+ parse_recipe (t, tt, rt, recipes);
+ };
+
+ for_each_t (parse);
}
return true; // Claimed or isn't any.
@@ -1648,7 +1918,7 @@ namespace build2
//
{
auto df = make_diag_frame (
- [&args, &l](const diag_record& dr)
+ [this, &args, &l](const diag_record& dr)
{
dr << info (l) << "while parsing " << args[0] << " output";
});
@@ -1758,7 +2028,7 @@ namespace build2
}
catch (const invalid_argument& e)
{
- fail << "invalid " << i->name << " attribute value: " << e;
+ fail (as.loc) << "invalid " << i->name << " attribute value: " << e;
}
}
else if (i->name == "config.report.variable")
@@ -1769,7 +2039,7 @@ namespace build2
}
catch (const invalid_argument& e)
{
- fail << "invalid " << i->name << " attribute value: " << e;
+ fail (as.loc) << "invalid " << i->name << " attribute value: " << e;
}
}
else
@@ -3263,16 +3533,10 @@ namespace build2
optional<variable_visibility> vis;
optional<bool> ovr;
- auto print = [storage = names ()] (diag_record& dr, const value& v) mutable
- {
- storage.clear ();
- to_stream (dr.os, reverse (v, storage), true /* quote */, '@');
- };
-
- for (auto& p: as)
+ for (auto& a: as)
{
- string& n (p.name);
- value& v (p.value);
+ string& n (a.name);
+ value& v (a.value);
if (const value_type* t = map_type (n))
{
@@ -3283,23 +3547,10 @@ namespace build2
// Fall through.
}
else
- {
- diag_record dr (fail (l));
- dr << "unknown variable attribute " << n;
-
- if (!v.null)
- {
- dr << '=';
- print (dr, v);
- }
- }
+ fail (l) << "unknown variable attribute " << a;
if (!v.null)
- {
- diag_record dr (fail (l));
- dr << "unexpected value for attribute " << n << ": ";
- print (dr, v);
- }
+ fail (l) << "unexpected value in attribute " << a;
}
if (type != nullptr && var.type != nullptr)
@@ -3336,16 +3587,10 @@ namespace build2
bool null (false);
const value_type* type (nullptr);
- auto print = [storage = names ()] (diag_record& dr, const value& v) mutable
- {
- storage.clear ();
- to_stream (dr.os, reverse (v, storage), true /* quote */, '@');
- };
-
- for (auto& p: as)
+ for (auto& a: as)
{
- string& n (p.name);
- value& v (p.value);
+ string& n (a.name);
+ value& v (a.value);
if (n == "null")
{
@@ -3364,23 +3609,10 @@ namespace build2
// Fall through.
}
else
- {
- diag_record dr (fail (l));
- dr << "unknown value attribute " << n;
-
- if (!v.null)
- {
- dr << '=';
- print (dr, v);
- }
- }
+ fail (l) << "unknown value attribute " << a;
if (!v.null)
- {
- diag_record dr (fail (l));
- dr << "unexpected value for attribute " << n << ": ";
- print (dr, v);
- }
+ fail (l) << "unexpected value in attribute " << a;
}
// When do we set the type and when do we keep the original? This gets
@@ -3502,11 +3734,8 @@ namespace build2
values parser::
parse_eval (token& t, type& tt, pattern_mode pmode)
{
- // enter: lparen
- // leave: rparen
-
- mode (lexer_mode::eval, '@'); // Auto-expires at rparen.
- next_with_attributes (t, tt);
+ // enter: token after lparen (lexed in the eval mode with attributes).
+ // leave: rparen (eval mode auto-expires at rparen).
if (tt == type::rparen)
return values ();
@@ -4707,7 +4936,7 @@ namespace build2
// Print the location information in case the function fails.
//
auto df = make_diag_frame (
- [&loc, l, r] (const diag_record& dr)
+ [this, &loc, l, r] (const diag_record& dr)
{
dr << info (loc) << "while concatenating " << l << " to " << r;
dr << info << "use quoting to force untyped concatenation";
@@ -5322,56 +5551,72 @@ namespace build2
; // Leave the name empty to fail below.
else if (tt == type::word)
{
- if (!pre_parse_)
- name = move (t.value);
+ name = move (t.value);
}
else if (tt == type::lparen)
{
expire_mode ();
- values vs (parse_eval (t, tt, pmode)); //@@ OUT will parse @-pair and do well?
+ mode (lexer_mode::eval, '@');
+ next_with_attributes (t, tt);
- if (!pre_parse_)
+ // Handle the $(x) case ad hoc. We do it this way in order to get
+ // the variable name even during pre-parse. It should also be
+ // faster.
+ //
+ if (tt == type::word && peek () == type::rparen)
+ {
+ name = move (t.value);
+ next (t, tt); // Get `)`.
+ }
+ else
{
- if (vs.size () != 1)
- fail (loc) << "expected single variable/function name";
+ //@@ OUT will parse @-pair and do well?
+ //
+ values vs (parse_eval (t, tt, pmode));
- value& v (vs[0]);
+ if (!pre_parse_)
+ {
+ if (vs.size () != 1)
+ fail (loc) << "expected single variable/function name";
- if (!v)
- fail (loc) << "null variable/function name";
+ value& v (vs[0]);
- names storage;
- vector_view<build2::name> ns (reverse (v, storage)); // Movable.
- size_t n (ns.size ());
+ if (!v)
+ fail (loc) << "null variable/function name";
- // We cannot handle scope-qualification in the eval context as
- // we do for target-qualification (see eval-qual) since then we
- // would be treating all paths as qualified variables. So we
- // have to do it here.
- //
- if (n == 2 && ns[0].pair == ':') // $(foo: x)
- {
- qual = move (ns[0]);
+ names storage;
+ vector_view<build2::name> ns (reverse (v, storage)); // Movable.
+ size_t n (ns.size ());
- if (qual.empty ())
- fail (loc) << "empty variable/function qualification";
- }
- else if (n == 2 && ns[0].directory ()) // $(foo/ x)
- {
- qual = move (ns[0]);
- qual.pair = '/';
- }
- else if (n > 1)
- fail (loc) << "expected variable/function name instead of '"
- << ns << "'";
+ // We cannot handle scope-qualification in the eval context as
+ // we do for target-qualification (see eval-qual) since then
+ // we would be treating all paths as qualified variables. So
+ // we have to do it here.
+ //
+ if (n == 2 && ns[0].pair == ':') // $(foo: x)
+ {
+ qual = move (ns[0]);
- // Note: checked for empty below.
- //
- if (!ns[n - 1].simple ())
- fail (loc) << "expected variable/function name instead of '"
- << ns[n - 1] << "'";
+ if (qual.empty ())
+ fail (loc) << "empty variable/function qualification";
+ }
+ else if (n == 2 && ns[0].directory ()) // $(foo/ x)
+ {
+ qual = move (ns[0]);
+ qual.pair = '/';
+ }
+ else if (n > 1)
+ fail (loc) << "expected variable/function name instead of '"
+ << ns << "'";
- name = move (ns[n - 1].value);
+ // Note: checked for empty below.
+ //
+ if (!ns[n - 1].simple ())
+ fail (loc) << "expected variable/function name instead of '"
+ << ns[n - 1] << "'";
+
+ name = move (ns[n - 1].value);
+ }
}
}
else
@@ -5392,8 +5637,9 @@ namespace build2
{
// Function call.
//
-
next (t, tt); // Get '('.
+ mode (lexer_mode::eval, '@');
+ next_with_attributes (t, tt);
// @@ Should we use (target/scope) qualification (of name) as the
// context in which to call the function? Hm, interesting...
@@ -5413,12 +5659,11 @@ namespace build2
{
// Variable expansion.
//
+ lookup l (lookup_variable (move (qual), move (name), loc));
if (pre_parse_)
continue; // As if empty value.
- lookup l (lookup_variable (move (qual), move (name), loc));
-
if (l.defined ())
result = l.value; // Otherwise leave as NULL result_data.
@@ -5429,8 +5674,10 @@ namespace build2
{
// Context evaluation.
//
-
loc = get_location (t);
+ mode (lexer_mode::eval, '@');
+ next_with_attributes (t, tt);
+
values vs (parse_eval (t, tt, pmode));
tt = peek ();
@@ -5507,7 +5754,7 @@ namespace build2
// Print the location information in case the function fails.
//
auto df = make_diag_frame (
- [&loc, t] (const diag_record& dr)
+ [this, &loc, t] (const diag_record& dr)
{
dr << info (loc) << "while converting " << t << " to string";
});
@@ -6066,6 +6313,9 @@ namespace build2
lookup parser::
lookup_variable (name&& qual, string&& name, const location& loc)
{
+ if (pre_parse_)
+ return lookup ();
+
tracer trace ("parser::lookup_variable", &path_);
const scope* s (nullptr);
@@ -6321,6 +6571,20 @@ namespace build2
return tt;
}
+ inline type parser::
+ next_after_newline (token& t, type& tt, const token& a)
+ {
+ if (tt == type::newline)
+ next (t, tt);
+ else if (tt != type::eos)
+ {
+ diag_record dr (fail (t));
+ dr << "expected newline instead of " << t << " after " << a;
+ }
+
+ return tt;
+ }
+
type parser::
peek ()
{
diff --git a/libbuild2/parser.hxx b/libbuild2/parser.hxx
index c55e14f..bc01e08 100644
--- a/libbuild2/parser.hxx
+++ b/libbuild2/parser.hxx
@@ -26,7 +26,9 @@ namespace build2
explicit
parser (context& c, stage s = stage::rest)
- : fail ("error", &path_), ctx (c), stage_ (s) {}
+ : fail ("error", &path_), info ("info", &path_),
+ ctx (c),
+ stage_ (s) {}
// Issue diagnostics and throw failed in case of an error.
//
@@ -109,6 +111,11 @@ namespace build2
const target_type* = nullptr,
string = string ());
+ void
+ parse_recipe (token&, token_type&,
+ const token&,
+ small_vector<shared_ptr<adhoc_rule>, 1>&);
+
// Ad hoc target names inside < ... >.
//
struct adhoc_names_loc
@@ -246,6 +253,9 @@ namespace build2
build2::value value;
};
+ friend ostream&
+ operator<< (ostream&, const attribute&);
+
struct attributes: small_vector<attribute, 1>
{
location loc; // Start location.
@@ -479,6 +489,12 @@ namespace build2
// If qual is not empty, then its pair member should indicate the kind
// of qualification: ':' -- target, '/' -- scope.
//
+ // Note that this function is called even during pre-parse with the result
+ // unused. In this case a valid name will only be provided for variables
+ // with literal names (for example, $x, $(x)). For computed variables (for
+ // example, $($x ? X : Y)) it will be empty (along with qual, which can
+ // only be non-empty for a computed variable).
+ //
virtual lookup
lookup_variable (name&& qual, string&& name, const location&);
@@ -525,8 +541,14 @@ namespace build2
// If the current token is newline, then get the next token. Otherwise,
// fail unless the current token is eos (i.e., optional newline at the end
- // of stream). If the after argument is not \0, use it in diagnostics as
- // the token after which the newline was expectd.
+ // of stream). Use the after token in diagnostics as the token after which
+ // the newline was expected.
+ //
+ token_type
+ next_after_newline (token&, token_type&, const token& after);
+
+ // As above but the after argument is a single-character token. If it is
+ // \0, then it is ignored.
//
token_type
next_after_newline (token&, token_type&, char after = '\0');
@@ -568,10 +590,10 @@ namespace build2
}
void
- mode (lexer_mode m, char ps = '\0')
+ mode (lexer_mode m, char ps = '\0', uintptr_t d = 0)
{
if (replay_ != replay::play)
- lexer_->mode (m, ps);
+ lexer_->mode (m, ps, nullopt, d);
else
// As a sanity check, make sure the mode matches the next token. Note
// that we don't check the attributes flags or the pair separator
@@ -612,8 +634,10 @@ namespace build2
// with the lexer directly (e.g., the keyword() test). Replays also cannot
// nest. For now we don't enforce any of this.
//
- // Note also that the peeked token is not part of the replay, until it
- // is "got".
+ // Note also that the peeked token is not part of the replay until it is
+ // "got". In particular, this means that we cannot peek past the replay
+ // sequence (since we will get the peeked token as the first token of
+ // the replay).
//
void
replay_save ()
@@ -628,6 +652,8 @@ namespace build2
assert ((replay_ == replay::save && !replay_data_.empty ()) ||
(replay_ == replay::play && replay_i_ == replay_data_.size ()));
+ assert (!peeked_);
+
if (replay_ == replay::save)
replay_path_ = path_; // Save old path.
@@ -638,6 +664,8 @@ namespace build2
void
replay_stop ()
{
+ assert (!peeked_);
+
if (replay_ == replay::play)
path_ = replay_path_; // Restore old path.
@@ -726,6 +754,7 @@ namespace build2
//
protected:
const fail_mark fail;
+ const basic_mark info;
// Parser state.
//
diff --git a/libbuild2/recipe.hxx b/libbuild2/recipe.hxx
index 508c059..efd184a 100644
--- a/libbuild2/recipe.hxx
+++ b/libbuild2/recipe.hxx
@@ -48,6 +48,21 @@ namespace build2
LIBBUILD2_SYMEXPORT extern const recipe noop_recipe;
LIBBUILD2_SYMEXPORT extern const recipe default_recipe;
LIBBUILD2_SYMEXPORT extern const recipe group_recipe;
+
+ // Ad hoc recipe.
+ //
+ // A recipe is a fragment of a rule so we handle ad hoc recipies by
+ // "completing" them to rules.
+ //
+ class adhoc_rule;
+
+ struct adhoc_recipe
+ {
+ // @@ TODO: maybe we should have a small vector of actions (for dump).
+ //
+ build2::action action;
+ shared_ptr<adhoc_rule> rule;
+ };
}
#endif // LIBBUILD2_RECIPE_HXX
diff --git a/libbuild2/rule.cxx b/libbuild2/rule.cxx
index 3a32eed..773d42e 100644
--- a/libbuild2/rule.cxx
+++ b/libbuild2/rule.cxx
@@ -3,6 +3,8 @@
#include <libbuild2/rule.hxx>
+#include <libbuild2/file.hxx>
+#include <libbuild2/depdb.hxx>
#include <libbuild2/scope.hxx>
#include <libbuild2/target.hxx>
#include <libbuild2/context.hxx>
@@ -10,11 +12,21 @@
#include <libbuild2/filesystem.hxx>
#include <libbuild2/diagnostics.hxx>
+#include <libbuild2/build/script/parser.hxx>
+#include <libbuild2/build/script/runner.hxx>
+
using namespace std;
using namespace butl;
namespace build2
{
+ // rule (vtable)
+ //
+ rule::
+ ~rule ()
+ {
+ }
+
// file_rule
//
// Note that this rule is special. It is the last, fallback rule. If
@@ -97,11 +109,6 @@ namespace build2
recipe file_rule::
apply (action a, target& t) const
{
- /*
- @@ outer
- return noop_recipe;
- */
-
// Update triggers the update of this target's prerequisites so it would
// seem natural that we should also trigger their cleanup. However, this
// possibility is rather theoretical so until we see a real use-case for
@@ -305,4 +312,879 @@ namespace build2
}
const noop_rule noop_rule::instance;
+
+ // adhoc_rule
+ //
+ const dir_path adhoc_rule::recipes_build_dir ("recipes.out");
+
+ bool adhoc_rule::
+ match (action a, target& t, const string& h, optional<action> fallback) const
+ {
+ return !fallback && match (a, t, h);
+ }
+
+ bool adhoc_rule::
+ match (action, target&, const string&) const
+ {
+ return true;
+ }
+
+ // Scope operation callback that cleans up recipe builds.
+ //
+ target_state adhoc_rule::
+ clean_recipes_build (action, const scope& rs, const dir&)
+ {
+ context& ctx (rs.ctx);
+
+ const dir_path& out_root (rs.out_path ());
+
+ dir_path d (out_root / rs.root_extra->build_dir / recipes_build_dir);
+
+ if (exists (d))
+ {
+ if (rmdir_r (ctx, d))
+ {
+ // Clean up build/ if it also became empty (e.g., in case of a build
+ // with a transient configuration).
+ //
+ d = out_root / rs.root_extra->build_dir;
+ if (empty (d))
+ rmdir (ctx, d);
+
+ return target_state::changed;
+ }
+ }
+
+ return target_state::unchanged;
+ }
+
+ // adhoc_script_rule
+ //
+ void adhoc_script_rule::
+ dump (ostream& os, string& ind) const
+ {
+ // Do we need the header?
+ //
+ if (diag)
+ {
+ os << ind << '%';
+
+ if (diag)
+ {
+ os << " [";
+ os << "diag="; to_stream (os, name (*diag), true /* quote */, '@');
+ os << ']';
+ }
+
+ os << endl;
+ }
+
+ os << ind << string (braces, '{') << endl;
+ ind += " ";
+ script::dump (os, ind, script.lines);
+ ind.resize (ind.size () - 2);
+ os << ind << string (braces, '}');
+ }
+
+ bool adhoc_script_rule::
+ match (action a, target& t, const string&, optional<action> fb) const
+ {
+ if (!fb)
+ ;
+ // If this is clean for a file target and we are supplying the update,
+ // then we will also supply the standard clean.
+ //
+ else if (a == perform_clean_id &&
+ *fb == perform_update_id &&
+ t.is_a<file> ())
+ ;
+ else
+ return false;
+
+ // It's unfortunate we have to resort to this but we need to remember this
+ // in apply().
+ //
+ t.data (fb.has_value ());
+
+ return true;
+ }
+
+ recipe adhoc_script_rule::
+ apply (action a, target& t) const
+ {
+ // Derive file names for the target and its ad hoc group members, if any.
+ //
+ for (target* m (&t); m != nullptr; m = m->adhoc_member)
+ {
+ if (auto* p = m->is_a<path_target> ())
+ p->derive_path ();
+ }
+
+ // Inject dependency on the output directory.
+ //
+ // We do it always instead of only if one of the targets is path-based in
+ // case the recipe creates temporary files or some such.
+ //
+ inject_fsdir (a, t);
+
+ // Match prerequisites.
+ //
+ match_prerequisite_members (a, t);
+
+ // See if we are providing the standard clean as a fallback.
+ //
+ if (t.data<bool> ())
+ return &perform_clean_depdb;
+
+ // For update inject dependency on the tool target(s).
+ //
+ // @@ We could see that it's a target and do it but not sure if we should
+ // bother. We dropped this idea of implicit targets in tests. Maybe we
+ // should verify path assigned, like we do there? I think we will have
+ // to.
+ //
+ // if (a == perform_update_id)
+ // inject (a, t, tgt);
+
+ if (a == perform_update_id && t.is_a<file> ())
+ {
+ return [this] (action a, const target& t)
+ {
+ return perform_update_file (a, t);
+ };
+ }
+ else
+ {
+ return [this] (action a, const target& t)
+ {
+ return default_action (a, t);
+ };
+ }
+ }
+
+ target_state adhoc_script_rule::
+ perform_update_file (action a, const target& xt) const
+ {
+ tracer trace ("adhoc_rule::perform_update_file");
+
+ context& ctx (xt.ctx);
+
+ const file& t (xt.as<file> ());
+ const path& tp (t.path ());
+
+ // Update prerequisites and determine if any of them render this target
+ // out-of-date.
+ //
+ timestamp mt (t.load_mtime ());
+ optional<target_state> ps (execute_prerequisites (a, t, mt));
+
+ bool update (!ps);
+
+ // We use depdb to track changes to the script itself, input/output file
+ // names, tools, etc.
+ //
+ depdb dd (tp + ".d");
+ {
+ // First should come the rule name/version.
+ //
+ if (dd.expect ("adhoc 1") != nullptr)
+ l4 ([&]{trace << "rule mismatch forcing update of " << t;});
+
+ // Then the script checksum.
+ //
+ // Ideally, to detect changes to the script semantics, we would hash the
+ // text with all the variables expanded but without executing any
+ // commands. In practice, this is easier said than done (think the set
+ // builtin that receives output of a command that modifies the
+ // filesystem).
+ //
+ // So as the next best thing we are going to hash the unexpanded text as
+ // well as values of all the variables expanded in it (which we get as a
+ // side effect of pre-parsing the script). This approach has a number of
+ // drawbacks:
+ //
+ // - We can't handle computed variable names (e.g., $($x ? X : Y)).
+ //
+ // - We may "overhash" by including variables that are actually
+ // script-local.
+ //
+ // - There are functions like $install.resolve() with result based on
+ // external (to the script) information.
+ //
+ if (dd.expect (checksum) != nullptr)
+ l4 ([&]{trace << "recipe text change forcing update of " << t;});
+
+ // For each variable hash its name, undefined/null/non-null indicator,
+ // and the value if non-null.
+ //
+ // Note that this excludes the special $< and $> variables which we
+ // handle below.
+ //
+ {
+ sha256 cs;
+ names storage;
+
+ for (const string& n: script.vars)
+ {
+ cs.append (n);
+
+ lookup l;
+
+ if (const variable* var = ctx.var_pool.find (n))
+ l = t[var];
+
+ cs.append (!l.defined () ? '\x1' : l->null ? '\x2' : '\x3');
+
+ if (l)
+ {
+ storage.clear ();
+ names_view ns (reverse (*l, storage));
+
+ for (const name& n: ns)
+ to_checksum (cs, n);
+ }
+ }
+
+ if (dd.expect (cs.string ()) != nullptr)
+ l4 ([&]{trace << "recipe variable change forcing update of " << t;});
+ }
+
+ // Target and prerequisite sets ($> and $<).
+ //
+ // How should we hash them? We could hash them as target names (i.e.,
+ // the same as the $>/< content) or as paths (only for path-based
+ // targets). While names feel more general, they are also more expensive
+ // to compute. And for path-based targets, path is generally a good
+ // proxy for the target name. Since the bulk of the ad hoc recipes will
+ // presumably be operating exclusively on path-based targets, let's do
+ // it both ways.
+ //
+ {
+ auto hash = [ns = names ()] (sha256& cs, const target& t) mutable
+ {
+ if (const path_target* pt = t.is_a<path_target> ())
+ cs.append (pt->path ().string ());
+ else
+ {
+ ns.clear ();
+ t.as_name (ns);
+ for (const name& n: ns)
+ to_checksum (cs, n);
+ }
+ };
+
+ sha256 tcs;
+ for (const target* m (&t); m != nullptr; m = m->adhoc_member)
+ hash (tcs, *m);
+
+ if (dd.expect (tcs.string ()) != nullptr)
+ l4 ([&]{trace << "target set change forcing update of " << t;});
+
+ sha256 pcs;
+ for (const target* pt: t.prerequisite_targets[a])
+ if (pt != nullptr)
+ hash (pcs, *pt);
+
+ if (dd.expect (pcs.string ()) != nullptr)
+ l4 ([&]{trace << "prerequisite set change forcing update of " << t;});
+ }
+
+ // Then the tools checksums.
+ //
+ // @@ TODO: obtain checksums of all the targets used as commands in
+ // the script.
+ //
+ //if (dd.expect (csum) != nullptr)
+ // l4 ([&]{trace << "compiler mismatch forcing update of " << t;});
+ }
+
+ // Update if depdb mismatch.
+ //
+ if (dd.writing () || dd.mtime > mt)
+ update = true;
+
+ dd.close ();
+
+ // If nothing changed, then we are done.
+ //
+ if (!update)
+ return *ps;
+
+ if (verb == 1)
+ {
+ // @@ TODO:
+ //
+ // - derive diag if absent (should probably do in match?)
+ //
+ // - we are printing target, not source (like in most other places)
+ //
+ // - printing of ad hoc target group (the {hxx cxx}{foo} idea)
+ //
+ // - if we are printing prerequisites, should we print all of them
+ // (including tools)?
+ //
+
+ text << (diag ? diag->c_str () : "adhoc") << ' ' << t;
+ }
+
+ if (!ctx.dry_run || verb >= 2)
+ {
+ const scope& bs (t.base_scope ());
+
+ build::script::environment e (a, t, script.temp_dir);
+ build::script::parser p (ctx);
+ build::script::default_runner r;
+ p.execute (*bs.root_scope (), bs, e, script, r);
+
+ if (!ctx.dry_run)
+ dd.check_mtime (tp);
+ }
+
+ t.mtime (system_clock::now ());
+ return target_state::changed;
+ }
+
+ target_state adhoc_script_rule::
+ default_action (action a, const target& t) const
+ {
+ tracer trace ("adhoc_rule::default_action");
+
+ context& ctx (t.ctx);
+
+ execute_prerequisites (a, t);
+
+ if (verb == 1)
+ {
+ // @@ TODO: as above
+
+ text << (diag ? diag->c_str () : "adhoc") << ' ' << t;
+ }
+
+ if (!ctx.dry_run || verb >= 2)
+ {
+ const scope& bs (t.base_scope ());
+
+ build::script::environment e (a, t, script.temp_dir);
+ build::script::parser p (ctx);
+ build::script::default_runner r;
+ p.execute (*bs.root_scope (), bs, e, script, r);
+ }
+
+ return target_state::changed;
+ }
+
+ // cxx_rule
+ //
+ bool cxx_rule::
+ match (action, target&, const string&) const
+ {
+ return true;
+ }
+
+ // adhoc_cxx_rule
+ //
+ adhoc_cxx_rule::
+ ~adhoc_cxx_rule ()
+ {
+ delete impl.load (memory_order_relaxed); // Serial execution.
+ }
+
+ void adhoc_cxx_rule::
+ dump (ostream& os, string& ind) const
+ {
+ // @@ TODO: indentation is multi-line recipes is off (would need to insert
+ // indentation after every newline).
+ //
+ os << ind << string (braces, '{') << " c++" << endl
+ << ind << code
+ << ind << string (braces, '}');
+ }
+
+ // From module.cxx.
+ //
+ void
+ create_module_context (context&, const location&);
+
+ const target&
+ update_in_module_context (context&, const scope&, names tgt,
+ const location&, const path& bf);
+
+ pair<void*, void*>
+ load_module_library (const path& lib, const string& sym, string& err);
+
+ bool adhoc_cxx_rule::
+ match (action a, target& t, const string& hint) const
+ {
+ tracer trace ("adhoc_cxx_rule::match");
+
+ context& ctx (t.ctx);
+ const scope& rs (t.root_scope ());
+
+ // The plan is to reduce this to the build system module case as much as
+ // possible. Specifically, we switch to the load phase, create a module-
+ // like library with the recipe text as a rule implementation, then build
+ // and load it.
+ //
+ // Since the recipe can be shared among multiple targets, several threads
+ // can all be trying to do this in parallel.
+ //
+ // We use the relaxed memory order here because any change must go through
+ // the serial load phase. In other words, all we need here is atomicity
+ // with ordering/visibility provided by the phase mutex.
+ //
+ cxx_rule* impl (this->impl.load (memory_order_relaxed));
+
+ while (impl == nullptr) // Breakout loop.
+ {
+ // Switch the phase to (serial) load and re-check.
+ //
+ phase_switch ps (ctx, run_phase::load);
+
+ if ((impl = this->impl.load (memory_order_relaxed)) != nullptr)
+ break;
+
+ using create_function = cxx_rule* (const location&, target_state);
+ using load_function = create_function* ();
+
+ // The only way to guarantee that the name of our module matches its
+ // implementation is to based the name on the implementation hash (plus
+ // the language, in case we support other compiled implementations in
+ // the future).
+ //
+ // Unfortunately, this means we will be creating a new project (and
+ // leaving behind the old one as garbage) for every change to the
+ // recipe. On the other hand, if the recipe is moved around unchanged,
+ // we will reuse the same project. In fact, two different recipes (e.g.,
+ // in different buildfiles) with the same text will share the project.
+ //
+ // The fact that we don't incorporate the recipe location into the hash
+ // but include it in the source (in the form of the #line directive; see
+ // below) has its own problems. If we do nothing extra here, then if a
+ // "moved" but otherwise unchanged recipe is updated (for example,
+ // because of changes in the build system core), then we may end up with
+ // bogus location in the diagnostics.
+ //
+ // The straightforward solution would be to just update the location in
+ // the source code if it has changed. This, however, will lead to
+ // unnecessary and probably surprising recompilations since any line
+ // count change before the recipe will trigger this update. One key
+ // observation here is that we need accurate location information only
+ // if we are going to recompile the recipe but the change to location
+ // itself does not render the recipe out of date. So what we going to do
+ // is factor the location information into its own small header and then
+ // keep it up-to-date without changing its modification time.
+ //
+ // This works well if the project is not shared by multiple recipes.
+ // However, if we have recipes in several buildfiles with identical
+ // text, then the location information may end up yo-yo'ing depending on
+ // which recipe got here first.
+ //
+ // There doesn't seem to be much we can do about it without incurring
+ // other drawbacks/overheads. So the answer is for the user to use an ad
+ // hoc rule with the common implementation instead of a bunch of
+ // duplicate recipes.
+ //
+ string id;
+ {
+ sha256 cs;
+ cs.append ("c++");
+ cs.append (code);
+ id = cs.abbreviated_string (12);
+ }
+
+ dir_path pd (rs.out_path () /
+ rs.root_extra->build_dir /
+ recipes_build_dir /= id);
+
+ path bf (pd / std_buildfile_file);
+
+ string sym ("load_" + id);
+
+ // Check whether the file exists and its last line matches the specified
+ // signature.
+ //
+ // Note: we use the last instead of the first line for extra protection
+ // against incomplete writes.
+ //
+ auto check_sig = [] (const path& f, const string& s) -> bool
+ {
+ try
+ {
+ if (!file_exists (f))
+ return false;
+
+ ifdstream ifs (f);
+
+ string l;
+ while (ifs.peek () != ifdstream::traits_type::eof ())
+ getline (ifs, l);
+
+ return l == s;
+ }
+ catch (const io_error& e)
+ {
+ fail << "unable to read " << f << ": " << e << endf;
+ }
+ catch (const system_error& e)
+ {
+ fail << "unable to access " << f << ": " << e << endf;
+ }
+ };
+
+ bool nested (ctx.module_context == &ctx);
+
+ // Create the build context if necessary.
+ //
+ if (ctx.module_context == nullptr)
+ {
+ if (!ctx.module_context_storage)
+ fail (loc) << "unable to update ad hoc recipe for target " << t <<
+ info << "building of ad hoc recipes is disabled";
+
+ create_module_context (ctx, loc);
+ }
+
+ // "Switch" to the module context.
+ //
+ context& ctx (*t.ctx.module_context);
+
+ const uint16_t verbosity (3); // Project creation command verbosity.
+
+ // Project and location signatures.
+ //
+ // Specifically, we update the project version when changing anything
+ // which would make the already existing projects unusable.
+ //
+ const string& lf (!loc.file.path.empty ()
+ ? loc.file.path.string ()
+ : loc.file.name ? *loc.file.name : string ());
+
+ const string psig ("# c++ 1");
+ const string lsig ("// " + lf + ':' + to_string (loc.line));
+
+ // Check whether we need to (re)create the project.
+ //
+ optional<bool> altn (false); // Standard naming scheme.
+ bool create (!is_src_root (pd, altn));
+
+ if (!create && (create = !check_sig (bf, psig)))
+ rmdir_r (ctx, pd, false, verbosity); // Never dry-run.
+
+ path of;
+ ofdstream ofs;
+
+ if (create)
+ try
+ {
+ // Write ad hoc config.build that loads the ~build2 configuration.
+ // This way the configuration will be always in sync with ~build2
+ // and we can update the recipe manually (e.g., for debugging).
+ //
+ create_project (
+ pd,
+ dir_path (), /* amalgamation */
+ {}, /* boot_modules */
+ "cxx.std = latest", /* root_pre */
+ {"cxx."}, /* root_modules */
+ "", /* root_post */
+ string ("config"), /* config_module */
+ string ("config.config.load = ~build2"), /* config_file */
+ false, /* buildfile */
+ "build2 core", /* who */
+ verbosity); /* verbosity */
+
+
+ // Write the rule source file.
+ //
+ of = path (pd / "rule.cxx");
+
+ if (verb >= verbosity)
+ text << (verb >= 2 ? "cat >" : "save ") << of;
+
+ ofs.open (of);
+
+ ofs << "#include \"location.hxx\"" << '\n'
+ << '\n';
+
+ // Include every header that can plausibly be needed by a rule.
+ //
+ ofs << "#include <libbuild2/types.hxx>" << '\n'
+ << "#include <libbuild2/forward.hxx>" << '\n'
+ << "#include <libbuild2/utility.hxx>" << '\n'
+ << '\n'
+ << "#include <libbuild2/file.hxx>" << '\n'
+ << "#include <libbuild2/rule.hxx>" << '\n'
+ << "#include <libbuild2/depdb.hxx>" << '\n'
+ << "#include <libbuild2/scope.hxx>" << '\n'
+ << "#include <libbuild2/target.hxx>" << '\n'
+ << "#include <libbuild2/context.hxx>" << '\n'
+ << "#include <libbuild2/variable.hxx>" << '\n'
+ << "#include <libbuild2/algorithm.hxx>" << '\n'
+ << "#include <libbuild2/filesystem.hxx>" << '\n'
+ << "#include <libbuild2/diagnostics.hxx>" << '\n'
+ << '\n';
+
+ // Normally the recipe code will have one level of indentation so
+ // let's not indent the namespace level to match.
+ //
+ ofs << "namespace build2" << '\n'
+ << "{" << '\n'
+ << '\n';
+
+ // If we want the user to be able to supply a custom constuctor, then
+ // we have to give the class a predictable name (i.e., we cannot use
+ // id as part of its name) and put it into an unnamed namespace. One
+ // clever idea is to call the class `constructor` but the name could
+ // also be used for a custom destructor (still could work) or for name
+ // qualification (would definitely look bizarre).
+ //
+ // In this light the most natural name is probable `rule`. The issue
+ // is we already have this name in the build2 namespace (and its our
+ // indirect base). In fact, any name that we choose could in the
+ // future conflict with something in that namespace so maybe it makes
+ // sense to bite the bullet and pick a name that is least likely to be
+ // used by the user directly (can always use cxx_rule instead).
+ //
+ ofs << "namespace" << '\n'
+ << "{" << '\n'
+ << "class rule: public cxx_rule" << '\n'
+ << "{" << '\n'
+ << "public:" << '\n'
+ << '\n';
+
+ // Inherit base constructor. This way the user may provide their own
+ // but don't have to.
+ //
+ ofs << " using cxx_rule::cxx_rule;" << '\n'
+ << '\n';
+
+ // An extern "C" function cannot throw which can happen in case of a
+ // user-defined constructor. So we need an extra level of indirection.
+ // We incorporate id to make sure it doesn't conflict with anything
+ // user-defined.
+ //
+ ofs << " static cxx_rule*" << '\n'
+ << " create_" << id << " (const location& l, target_state s)" << '\n'
+ << " {" << '\n'
+ << " return new rule (l, s);" << '\n'
+ << " }" << '\n'
+ << '\n';
+
+ // Use the #line directive to point diagnostics to the code in the
+ // buildfile. Note that there is no easy way to restore things to
+ // point back to the source file (other than another #line with a line
+ // and a file). Seeing that we don't have much after, let's not bother
+ // for now.
+ //
+ ofs << "#line RECIPE_LINE RECIPE_FILE" << '\n';
+
+ // Note that the code always includes trailing newline.
+ //
+ ofs << code
+ << "};" << '\n'
+ << '\n';
+
+ // Add an alias that we can use unambiguously in the load function.
+ //
+ ofs << "using rule_" << id << " = rule;" << '\n'
+ << "}" << '\n'
+ << '\n';
+
+ // Entry point.
+ //
+ ofs << "extern \"C\"" << '\n'
+ << "#ifdef _WIN32" << '\n'
+ << "__declspec(dllexport)" << '\n'
+ << "#endif" << '\n'
+ << "cxx_rule* (*" << sym << " ()) (const location&, target_state)" << '\n'
+ << "{" << '\n'
+ << " return &rule_" << id << "::create_" << id << ";" << '\n'
+ << "}" << '\n'
+ << '\n';
+
+ ofs << "}" << '\n';
+
+ ofs.close ();
+
+
+ // Write buildfile.
+ //
+ of = bf;
+
+ if (verb >= verbosity)
+ text << (verb >= 2 ? "cat >" : "save ") << of;
+
+ ofs.open (of);
+
+ ofs << "import imp_libs += build2%lib{build2}" << '\n'
+ << "libs{" << id << "}: cxx{rule} hxx{location} $imp_libs" << '\n'
+ << '\n'
+ << psig << '\n';
+
+ ofs.close ();
+ }
+ catch (const io_error& e)
+ {
+ fail << "unable to write to " << of << ": " << e;
+ }
+
+ // Update the library target in the module context.
+ //
+ const target* l (nullptr);
+ do // Breakout loop.
+ {
+ // Load the project in the module context.
+ //
+ // Note that it's possible it has already been loaded (see above about
+ // the id calculation).
+ //
+ scope& rs (load_project (ctx, pd, pd, false /* forwarded */));
+
+ auto find_target = [&ctx, &rs, &pd, &id] ()
+ {
+ const target_type* tt (rs.find_target_type ("libs"));
+ assert (tt != nullptr);
+
+ const target* t (
+ ctx.targets.find (*tt, pd, dir_path () /* out */, id));
+ assert (t != nullptr);
+
+ return t;
+ };
+
+ // If the project has already been loaded then, as an optimization,
+ // check if the target has already been updated (this will make a
+ // difference we if we have identical recipes in several buildfiles,
+ // especially to the location update that comes next).
+ //
+ if (!source_once (rs, rs, bf))
+ {
+ l = find_target ();
+
+ if (l->executed_state (perform_update_id) != target_state::unknown)
+ break;
+ }
+
+ // Create/update the recipe location header.
+ //
+ // For update, preserve the file timestamp in order not to render the
+ // recipe out of date.
+ //
+ of = path (pd / "location.hxx");
+ if (!check_sig (of, lsig))
+ try
+ {
+ entry_time et (file_time (of));
+
+ if (verb >= verbosity)
+ text << (verb >= 2 ? "cat >" : "save ") << of;
+
+ ofs.open (of);
+
+ // Recipe file and line for the #line directive above. Note that the
+ // code starts from the next line thus +1. We also need to escape
+ // backslashes (Windows paths).
+ //
+ ofs << "#define RECIPE_FILE \"" << sanitize_strlit (lf) << '"'<< '\n'
+ << "#define RECIPE_LINE " << loc.line + 1 << '\n'
+ << '\n'
+ << lsig << '\n';
+
+ ofs.close ();
+
+ if (et.modification != timestamp_nonexistent)
+ file_time (of, et);
+ }
+ catch (const io_error& e)
+ {
+ fail << "unable to write to " << of << ": " << e;
+ }
+ catch (const system_error& e)
+ {
+ fail << "unable to get/set timestamp for " << of << ": " << e;
+ }
+
+ if (nested)
+ {
+ // This means there is a perform update action already in progress
+ // in this context. So we are going to switch the phase and
+ // perform direct match and update (similar how we do this for
+ // generated headers).
+ //
+ // Note that since neither match nor execute are serial phases, it
+ // means other targets in this context can be matched and executed
+ // in paralellel with us.
+ //
+ if (l == nullptr)
+ l = find_target ();
+
+ phase_switch mp (ctx, run_phase::match);
+ if (build2::match (perform_update_id, *l) != target_state::unchanged)
+ {
+ phase_switch ep (ctx, run_phase::execute);
+ execute (a, *l);
+ }
+ }
+ else
+ {
+ // Cutoff the existing diagnostics stack and push our own entry.
+ //
+ diag_frame::stack_guard diag_cutoff (nullptr);
+
+ auto df = make_diag_frame (
+ [this, &t] (const diag_record& dr)
+ {
+ dr << info (loc) << "while updating ad hoc recipe for target "
+ << t;
+ });
+
+ l = &update_in_module_context (
+ ctx, rs, names {name (pd, "libs", id)},
+ loc, bf);
+ }
+ } while (false);
+
+ // Load the library.
+ //
+ const path& lib (l->as<file> ().path ());
+
+ // Note again that it's possible the library has already been loaded
+ // (see above about the id calculation).
+ //
+ string err;
+ pair<void*, void*> hs (load_module_library (lib, sym, err));
+
+ // These normally shouldn't happen unless something is seriously broken.
+ //
+ if (hs.first == nullptr)
+ fail (loc) << "unable to load recipe library " << lib << ": " << err;
+
+ if (hs.second == nullptr)
+ fail (loc) << "unable to lookup " << sym << " in recipe library "
+ << lib << ": " << err;
+
+ {
+ auto df = make_diag_frame (
+ [this](const diag_record& dr)
+ {
+ if (verb != 0)
+ dr << info (loc) << "while initializing ad hoc recipe";
+ });
+
+ load_function* lf (function_cast<load_function*> (hs.second));
+ create_function* cf (lf ());
+
+ impl = cf (loc, l->executed_state (perform_update_id));
+ this->impl.store (impl, memory_order_relaxed); // Still in load phase.
+ }
+ }
+
+ return impl->match (a, t, hint);
+ }
+
+ recipe adhoc_cxx_rule::
+ apply (action a, target& t) const
+ {
+ return impl.load (memory_order_relaxed)->apply (a, t);
+ }
}
diff --git a/libbuild2/rule.hxx b/libbuild2/rule.hxx
index 9eab1f6..efa4ec3 100644
--- a/libbuild2/rule.hxx
+++ b/libbuild2/rule.hxx
@@ -12,6 +12,8 @@
#include <libbuild2/target.hxx>
#include <libbuild2/recipe.hxx>
+#include <libbuild2/build/script/script.hxx>
+
#include <libbuild2/export.hxx>
namespace build2
@@ -22,7 +24,7 @@ namespace build2
//
// Note: match() is only called once but may not be followed by apply().
//
- class rule
+ class LIBBUILD2_SYMEXPORT rule
{
public:
virtual bool
@@ -33,6 +35,9 @@ namespace build2
rule () = default;
+ virtual
+ ~rule ();
+
rule (const rule&) = delete;
rule& operator= (const rule&) = delete;
};
@@ -108,6 +113,141 @@ namespace build2
noop_rule () {}
static const noop_rule instance;
};
+
+ // Ad hoc rule.
+ //
+ // Note: not exported
+ //
+ class adhoc_rule: public rule
+ {
+ public:
+ location_value loc; // Buildfile location of the recipe.
+ size_t braces; // Number of braces in multi-brace tokens.
+
+ adhoc_rule (const location& l, size_t b)
+ : loc (l),
+ braces (b),
+ rule_match ("adhoc", static_cast<const rule&> (*this)) {}
+
+ public:
+ // Some of the operations come in compensating pairs, such as update and
+ // clean, install and uninstall. An ad hoc rule implementation may choose
+ // to provide a fallback implementation of a compensating operation if it
+ // is providing the other half (passed in the fallback argument).
+ //
+ // The default implementation calls rule::match() if fallback is absent
+ // and returns false if fallback is present. So an implementation that
+ // doesn't care about this semantics can implement the straight rule
+ // interface.
+ //
+ virtual bool
+ match (action, target&, const string&, optional<action> fallback) const;
+
+ virtual bool
+ match (action, target&, const string&) const override;
+
+ virtual void
+ dump (ostream&, string& indentation) const = 0;
+
+ // Implementation details.
+ //
+ public:
+ build2::rule_match rule_match;
+
+ static const dir_path recipes_build_dir;
+
+ // Scope operation callback that cleans up ad hoc recipe builds.
+ //
+ static target_state
+ clean_recipes_build (action, const scope&, const dir&);
+ };
+
+ // Ad hoc script rule.
+ //
+ // Note: not exported and should not be used directly (i.e., registered).
+ //
+ class adhoc_script_rule: public adhoc_rule
+ {
+ public:
+ virtual bool
+ match (action, target&, const string&, optional<action>) const override;
+
+ virtual recipe
+ apply (action, target&) const override;
+
+ target_state
+ perform_update_file (action, const target&) const;
+
+ target_state
+ default_action (action, const target&) const;
+
+ virtual void
+ dump (ostream&, string&) const override;
+
+ using script_type = build::script::script;
+
+ adhoc_script_rule (optional<string> d, const location& l, size_t b)
+ : adhoc_rule (l, b), diag (move (d)) {}
+
+ public:
+ const optional<string> diag; // Command name for low-verbosity diag.
+ string checksum; // Script text hashsum.
+ script_type script;
+
+ };
+
+ // Ad hoc C++ rule.
+ //
+ // Note: exported but should not be used directly (i.e., registered).
+ //
+ class LIBBUILD2_SYMEXPORT cxx_rule: public rule
+ {
+ public:
+
+ // A robust recipe may want to incorporate the recipe_state into its
+ // up-to-date decision as if the recipe library was a prerequisite (it
+ // cannot be injected as a real prerequisite since it's from a different
+ // build context).
+ //
+ const location recipe_loc; // Buildfile location of the recipe.
+ const target_state recipe_state; // State of recipe library target.
+
+ cxx_rule (const location& l, target_state s)
+ : recipe_loc (l), recipe_state (s) {}
+
+ // Return true by default.
+ //
+ virtual bool
+ match (action, target&, const string&) const override;
+ };
+
+ // Note: not exported.
+ //
+ class adhoc_cxx_rule: public adhoc_rule
+ {
+ public:
+ virtual bool
+ match (action, target&, const string&) const override;
+
+ virtual recipe
+ apply (action, target&) const override;
+
+ virtual void
+ dump (ostream&, string&) const override;
+
+ adhoc_cxx_rule (string c, const location& l, size_t b)
+ : adhoc_rule (l, b), code (move (c)), impl (nullptr) {}
+
+ virtual
+ ~adhoc_cxx_rule () override;
+
+ public:
+ // Note that this recipe (rule instance) can be shared between multiple
+ // targets which could all be matched in parallel.
+ //
+ const string code;
+ mutable atomic<cxx_rule*> impl;
+ };
}
#endif // LIBBUILD2_RULE_HXX
diff --git a/libbuild2/script/builtin-options.cxx b/libbuild2/script/builtin-options.cxx
new file mode 100644
index 0000000..2002764
--- /dev/null
+++ b/libbuild2/script/builtin-options.cxx
@@ -0,0 +1,661 @@
+// -*- C++ -*-
+//
+// This file was generated by CLI, a command line interface
+// compiler for C++.
+//
+
+// Begin prologue.
+//
+//
+// End prologue.
+
+#include <libbuild2/script/builtin-options.hxx>
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+#include <ostream>
+#include <sstream>
+
+namespace build2
+{
+ namespace script
+ {
+ namespace cli
+ {
+ // unknown_option
+ //
+ unknown_option::
+ ~unknown_option () throw ()
+ {
+ }
+
+ void unknown_option::
+ print (::std::ostream& os) const
+ {
+ os << "unknown option '" << option ().c_str () << "'";
+ }
+
+ const char* unknown_option::
+ what () const throw ()
+ {
+ return "unknown option";
+ }
+
+ // unknown_argument
+ //
+ unknown_argument::
+ ~unknown_argument () throw ()
+ {
+ }
+
+ void unknown_argument::
+ print (::std::ostream& os) const
+ {
+ os << "unknown argument '" << argument ().c_str () << "'";
+ }
+
+ const char* unknown_argument::
+ what () const throw ()
+ {
+ return "unknown argument";
+ }
+
+ // missing_value
+ //
+ missing_value::
+ ~missing_value () throw ()
+ {
+ }
+
+ void missing_value::
+ print (::std::ostream& os) const
+ {
+ os << "missing value for option '" << option ().c_str () << "'";
+ }
+
+ const char* missing_value::
+ what () const throw ()
+ {
+ return "missing option value";
+ }
+
+ // invalid_value
+ //
+ invalid_value::
+ ~invalid_value () throw ()
+ {
+ }
+
+ void invalid_value::
+ print (::std::ostream& os) const
+ {
+ os << "invalid value '" << value ().c_str () << "' for option '"
+ << option ().c_str () << "'";
+
+ if (!message ().empty ())
+ os << ": " << message ().c_str ();
+ }
+
+ const char* invalid_value::
+ what () const throw ()
+ {
+ return "invalid option value";
+ }
+
+ // eos_reached
+ //
+ void eos_reached::
+ print (::std::ostream& os) const
+ {
+ os << what ();
+ }
+
+ const char* eos_reached::
+ what () const throw ()
+ {
+ return "end of argument stream reached";
+ }
+
+ // scanner
+ //
+ scanner::
+ ~scanner ()
+ {
+ }
+
+ // argv_scanner
+ //
+ bool argv_scanner::
+ more ()
+ {
+ return i_ < argc_;
+ }
+
+ const char* argv_scanner::
+ peek ()
+ {
+ if (i_ < argc_)
+ return argv_[i_];
+ else
+ throw eos_reached ();
+ }
+
+ const char* argv_scanner::
+ next ()
+ {
+ if (i_ < argc_)
+ {
+ const char* r (argv_[i_]);
+
+ if (erase_)
+ {
+ for (int i (i_ + 1); i < argc_; ++i)
+ argv_[i - 1] = argv_[i];
+
+ --argc_;
+ argv_[argc_] = 0;
+ }
+ else
+ ++i_;
+
+ return r;
+ }
+ else
+ throw eos_reached ();
+ }
+
+ void argv_scanner::
+ skip ()
+ {
+ if (i_ < argc_)
+ ++i_;
+ else
+ throw eos_reached ();
+ }
+
+ // vector_scanner
+ //
+ bool vector_scanner::
+ more ()
+ {
+ return i_ < v_.size ();
+ }
+
+ const char* vector_scanner::
+ peek ()
+ {
+ if (i_ < v_.size ())
+ return v_[i_].c_str ();
+ else
+ throw eos_reached ();
+ }
+
+ const char* vector_scanner::
+ next ()
+ {
+ if (i_ < v_.size ())
+ return v_[i_++].c_str ();
+ else
+ throw eos_reached ();
+ }
+
+ void vector_scanner::
+ skip ()
+ {
+ if (i_ < v_.size ())
+ ++i_;
+ else
+ throw eos_reached ();
+ }
+
+ template <typename X>
+ struct parser
+ {
+ static void
+ parse (X& x, bool& xs, scanner& s)
+ {
+ using namespace std;
+
+ const char* o (s.next ());
+ if (s.more ())
+ {
+ string v (s.next ());
+ istringstream is (v);
+ if (!(is >> x && is.peek () == istringstream::traits_type::eof ()))
+ throw invalid_value (o, v);
+ }
+ else
+ throw missing_value (o);
+
+ xs = true;
+ }
+ };
+
+ template <>
+ struct parser<bool>
+ {
+ static void
+ parse (bool& x, scanner& s)
+ {
+ s.next ();
+ x = true;
+ }
+ };
+
+ template <>
+ struct parser<std::string>
+ {
+ static void
+ parse (std::string& x, bool& xs, scanner& s)
+ {
+ const char* o (s.next ());
+
+ if (s.more ())
+ x = s.next ();
+ else
+ throw missing_value (o);
+
+ xs = true;
+ }
+ };
+
+ template <typename X>
+ struct parser<std::vector<X> >
+ {
+ static void
+ parse (std::vector<X>& c, bool& xs, scanner& s)
+ {
+ X x;
+ bool dummy;
+ parser<X>::parse (x, dummy, s);
+ c.push_back (x);
+ xs = true;
+ }
+ };
+
+ template <typename X>
+ struct parser<std::set<X> >
+ {
+ static void
+ parse (std::set<X>& c, bool& xs, scanner& s)
+ {
+ X x;
+ bool dummy;
+ parser<X>::parse (x, dummy, s);
+ c.insert (x);
+ xs = true;
+ }
+ };
+
+ template <typename K, typename V>
+ struct parser<std::map<K, V> >
+ {
+ static void
+ parse (std::map<K, V>& m, bool& xs, scanner& s)
+ {
+ const char* o (s.next ());
+
+ if (s.more ())
+ {
+ std::string ov (s.next ());
+ std::string::size_type p = ov.find ('=');
+
+ K k = K ();
+ V v = V ();
+ std::string kstr (ov, 0, p);
+ std::string vstr (ov, (p != std::string::npos ? p + 1 : ov.size ()));
+
+ int ac (2);
+ char* av[] =
+ {
+ const_cast<char*> (o), 0
+ };
+
+ bool dummy;
+ if (!kstr.empty ())
+ {
+ av[1] = const_cast<char*> (kstr.c_str ());
+ argv_scanner s (0, ac, av);
+ parser<K>::parse (k, dummy, s);
+ }
+
+ if (!vstr.empty ())
+ {
+ av[1] = const_cast<char*> (vstr.c_str ());
+ argv_scanner s (0, ac, av);
+ parser<V>::parse (v, dummy, s);
+ }
+
+ m[k] = v;
+ }
+ else
+ throw missing_value (o);
+
+ xs = true;
+ }
+ };
+
+ template <typename X, typename T, T X::*M>
+ void
+ thunk (X& x, scanner& s)
+ {
+ parser<T>::parse (x.*M, s);
+ }
+
+ template <typename X, typename T, T X::*M, bool X::*S>
+ void
+ thunk (X& x, scanner& s)
+ {
+ parser<T>::parse (x.*M, x.*S, s);
+ }
+ }
+ }
+}
+
+#include <map>
+#include <cstring>
+
+namespace build2
+{
+ namespace script
+ {
+ // set_options
+ //
+
+ set_options::
+ set_options ()
+ : exact_ (),
+ newline_ (),
+ whitespace_ ()
+ {
+ }
+
+ set_options::
+ set_options (int& argc,
+ char** argv,
+ bool erase,
+ ::build2::script::cli::unknown_mode opt,
+ ::build2::script::cli::unknown_mode arg)
+ : exact_ (),
+ newline_ (),
+ whitespace_ ()
+ {
+ ::build2::script::cli::argv_scanner s (argc, argv, erase);
+ _parse (s, opt, arg);
+ }
+
+ set_options::
+ set_options (int start,
+ int& argc,
+ char** argv,
+ bool erase,
+ ::build2::script::cli::unknown_mode opt,
+ ::build2::script::cli::unknown_mode arg)
+ : exact_ (),
+ newline_ (),
+ whitespace_ ()
+ {
+ ::build2::script::cli::argv_scanner s (start, argc, argv, erase);
+ _parse (s, opt, arg);
+ }
+
+ set_options::
+ set_options (int& argc,
+ char** argv,
+ int& end,
+ bool erase,
+ ::build2::script::cli::unknown_mode opt,
+ ::build2::script::cli::unknown_mode arg)
+ : exact_ (),
+ newline_ (),
+ whitespace_ ()
+ {
+ ::build2::script::cli::argv_scanner s (argc, argv, erase);
+ _parse (s, opt, arg);
+ end = s.end ();
+ }
+
+ set_options::
+ set_options (int start,
+ int& argc,
+ char** argv,
+ int& end,
+ bool erase,
+ ::build2::script::cli::unknown_mode opt,
+ ::build2::script::cli::unknown_mode arg)
+ : exact_ (),
+ newline_ (),
+ whitespace_ ()
+ {
+ ::build2::script::cli::argv_scanner s (start, argc, argv, erase);
+ _parse (s, opt, arg);
+ end = s.end ();
+ }
+
+ set_options::
+ set_options (::build2::script::cli::scanner& s,
+ ::build2::script::cli::unknown_mode opt,
+ ::build2::script::cli::unknown_mode arg)
+ : exact_ (),
+ newline_ (),
+ whitespace_ ()
+ {
+ _parse (s, opt, arg);
+ }
+
+ typedef
+ std::map<std::string, void (*) (set_options&, ::build2::script::cli::scanner&)>
+ _cli_set_options_map;
+
+ static _cli_set_options_map _cli_set_options_map_;
+
+ struct _cli_set_options_map_init
+ {
+ _cli_set_options_map_init ()
+ {
+ _cli_set_options_map_["--exact"] =
+ &::build2::script::cli::thunk< set_options, bool, &set_options::exact_ >;
+ _cli_set_options_map_["-e"] =
+ &::build2::script::cli::thunk< set_options, bool, &set_options::exact_ >;
+ _cli_set_options_map_["--newline"] =
+ &::build2::script::cli::thunk< set_options, bool, &set_options::newline_ >;
+ _cli_set_options_map_["-n"] =
+ &::build2::script::cli::thunk< set_options, bool, &set_options::newline_ >;
+ _cli_set_options_map_["--whitespace"] =
+ &::build2::script::cli::thunk< set_options, bool, &set_options::whitespace_ >;
+ _cli_set_options_map_["-w"] =
+ &::build2::script::cli::thunk< set_options, bool, &set_options::whitespace_ >;
+ }
+ };
+
+ static _cli_set_options_map_init _cli_set_options_map_init_;
+
+ bool set_options::
+ _parse (const char* o, ::build2::script::cli::scanner& s)
+ {
+ _cli_set_options_map::const_iterator i (_cli_set_options_map_.find (o));
+
+ if (i != _cli_set_options_map_.end ())
+ {
+ (*(i->second)) (*this, s);
+ return true;
+ }
+
+ return false;
+ }
+
+ bool set_options::
+ _parse (::build2::script::cli::scanner& s,
+ ::build2::script::cli::unknown_mode opt_mode,
+ ::build2::script::cli::unknown_mode arg_mode)
+ {
+ // Can't skip combined flags (--no-combined-flags).
+ //
+ assert (opt_mode != ::build2::script::cli::unknown_mode::skip);
+
+ bool r = false;
+ bool opt = true;
+
+ while (s.more ())
+ {
+ const char* o = s.peek ();
+
+ if (std::strcmp (o, "--") == 0)
+ {
+ opt = false;
+ s.skip ();
+ r = true;
+ continue;
+ }
+
+ if (opt)
+ {
+ if (_parse (o, s))
+ {
+ r = true;
+ continue;
+ }
+
+ if (std::strncmp (o, "-", 1) == 0 && o[1] != '\0')
+ {
+ // Handle combined option values.
+ //
+ std::string co;
+ if (const char* v = std::strchr (o, '='))
+ {
+ co.assign (o, 0, v - o);
+ ++v;
+
+ int ac (2);
+ char* av[] =
+ {
+ const_cast<char*> (co.c_str ()),
+ const_cast<char*> (v)
+ };
+
+ ::build2::script::cli::argv_scanner ns (0, ac, av);
+
+ if (_parse (co.c_str (), ns))
+ {
+ // Parsed the option but not its value?
+ //
+ if (ns.end () != 2)
+ throw ::build2::script::cli::invalid_value (co, v);
+
+ s.next ();
+ r = true;
+ continue;
+ }
+ else
+ {
+ // Set the unknown option and fall through.
+ //
+ o = co.c_str ();
+ }
+ }
+
+ // Handle combined flags.
+ //
+ char cf[3];
+ {
+ const char* p = o + 1;
+ for (; *p != '\0'; ++p)
+ {
+ if (!((*p >= 'a' && *p <= 'z') ||
+ (*p >= 'A' && *p <= 'Z') ||
+ (*p >= '0' && *p <= '9')))
+ break;
+ }
+
+ if (*p == '\0')
+ {
+ for (p = o + 1; *p != '\0'; ++p)
+ {
+ std::strcpy (cf, "-");
+ cf[1] = *p;
+ cf[2] = '\0';
+
+ int ac (1);
+ char* av[] =
+ {
+ cf
+ };
+
+ ::build2::script::cli::argv_scanner ns (0, ac, av);
+
+ if (!_parse (cf, ns))
+ break;
+ }
+
+ if (*p == '\0')
+ {
+ // All handled.
+ //
+ s.next ();
+ r = true;
+ continue;
+ }
+ else
+ {
+ // Set the unknown option and fall through.
+ //
+ o = cf;
+ }
+ }
+ }
+
+ switch (opt_mode)
+ {
+ case ::build2::script::cli::unknown_mode::skip:
+ {
+ s.skip ();
+ r = true;
+ continue;
+ }
+ case ::build2::script::cli::unknown_mode::stop:
+ {
+ break;
+ }
+ case ::build2::script::cli::unknown_mode::fail:
+ {
+ throw ::build2::script::cli::unknown_option (o);
+ }
+ }
+
+ break;
+ }
+ }
+
+ switch (arg_mode)
+ {
+ case ::build2::script::cli::unknown_mode::skip:
+ {
+ s.skip ();
+ r = true;
+ continue;
+ }
+ case ::build2::script::cli::unknown_mode::stop:
+ {
+ break;
+ }
+ case ::build2::script::cli::unknown_mode::fail:
+ {
+ throw ::build2::script::cli::unknown_argument (o);
+ }
+ }
+
+ break;
+ }
+
+ return r;
+ }
+ }
+}
+
+// Begin epilogue.
+//
+//
+// End epilogue.
+
diff --git a/libbuild2/script/builtin-options.hxx b/libbuild2/script/builtin-options.hxx
new file mode 100644
index 0000000..5a3f153
--- /dev/null
+++ b/libbuild2/script/builtin-options.hxx
@@ -0,0 +1,339 @@
+// -*- C++ -*-
+//
+// This file was generated by CLI, a command line interface
+// compiler for C++.
+//
+
+#ifndef LIBBUILD2_SCRIPT_BUILTIN_OPTIONS_HXX
+#define LIBBUILD2_SCRIPT_BUILTIN_OPTIONS_HXX
+
+// Begin prologue.
+//
+//
+// End prologue.
+
+#include <vector>
+#include <iosfwd>
+#include <string>
+#include <cstddef>
+#include <exception>
+
+#ifndef CLI_POTENTIALLY_UNUSED
+# if defined(_MSC_VER) || defined(__xlC__)
+# define CLI_POTENTIALLY_UNUSED(x) (void*)&x
+# else
+# define CLI_POTENTIALLY_UNUSED(x) (void)x
+# endif
+#endif
+
+namespace build2
+{
+ namespace script
+ {
+ namespace cli
+ {
+ class unknown_mode
+ {
+ public:
+ enum value
+ {
+ skip,
+ stop,
+ fail
+ };
+
+ unknown_mode (value);
+
+ operator value () const
+ {
+ return v_;
+ }
+
+ private:
+ value v_;
+ };
+
+ // Exceptions.
+ //
+
+ class exception: public std::exception
+ {
+ public:
+ virtual void
+ print (::std::ostream&) const = 0;
+ };
+
+ ::std::ostream&
+ operator<< (::std::ostream&, const exception&);
+
+ class unknown_option: public exception
+ {
+ public:
+ virtual
+ ~unknown_option () throw ();
+
+ unknown_option (const std::string& option);
+
+ const std::string&
+ option () const;
+
+ virtual void
+ print (::std::ostream&) const;
+
+ virtual const char*
+ what () const throw ();
+
+ private:
+ std::string option_;
+ };
+
+ class unknown_argument: public exception
+ {
+ public:
+ virtual
+ ~unknown_argument () throw ();
+
+ unknown_argument (const std::string& argument);
+
+ const std::string&
+ argument () const;
+
+ virtual void
+ print (::std::ostream&) const;
+
+ virtual const char*
+ what () const throw ();
+
+ private:
+ std::string argument_;
+ };
+
+ class missing_value: public exception
+ {
+ public:
+ virtual
+ ~missing_value () throw ();
+
+ missing_value (const std::string& option);
+
+ const std::string&
+ option () const;
+
+ virtual void
+ print (::std::ostream&) const;
+
+ virtual const char*
+ what () const throw ();
+
+ private:
+ std::string option_;
+ };
+
+ class invalid_value: public exception
+ {
+ public:
+ virtual
+ ~invalid_value () throw ();
+
+ invalid_value (const std::string& option,
+ const std::string& value,
+ const std::string& message = std::string ());
+
+ const std::string&
+ option () const;
+
+ const std::string&
+ value () const;
+
+ const std::string&
+ message () const;
+
+ virtual void
+ print (::std::ostream&) const;
+
+ virtual const char*
+ what () const throw ();
+
+ private:
+ std::string option_;
+ std::string value_;
+ std::string message_;
+ };
+
+ class eos_reached: public exception
+ {
+ public:
+ virtual void
+ print (::std::ostream&) const;
+
+ virtual const char*
+ what () const throw ();
+ };
+
+ // Command line argument scanner interface.
+ //
+ // The values returned by next() are guaranteed to be valid
+ // for the two previous arguments up until a call to a third
+ // peek() or next().
+ //
+ class scanner
+ {
+ public:
+ virtual
+ ~scanner ();
+
+ virtual bool
+ more () = 0;
+
+ virtual const char*
+ peek () = 0;
+
+ virtual const char*
+ next () = 0;
+
+ virtual void
+ skip () = 0;
+ };
+
+ class argv_scanner: public scanner
+ {
+ public:
+ argv_scanner (int& argc, char** argv, bool erase = false);
+ argv_scanner (int start, int& argc, char** argv, bool erase = false);
+
+ int
+ end () const;
+
+ virtual bool
+ more ();
+
+ virtual const char*
+ peek ();
+
+ virtual const char*
+ next ();
+
+ virtual void
+ skip ();
+
+ private:
+ int i_;
+ int& argc_;
+ char** argv_;
+ bool erase_;
+ };
+
+ class vector_scanner: public scanner
+ {
+ public:
+ vector_scanner (const std::vector<std::string>&, std::size_t start = 0);
+
+ std::size_t
+ end () const;
+
+ void
+ reset (std::size_t start = 0);
+
+ virtual bool
+ more ();
+
+ virtual const char*
+ peek ();
+
+ virtual const char*
+ next ();
+
+ virtual void
+ skip ();
+
+ private:
+ const std::vector<std::string>& v_;
+ std::size_t i_;
+ };
+
+ template <typename X>
+ struct parser;
+ }
+ }
+}
+
+namespace build2
+{
+ namespace script
+ {
+ class set_options
+ {
+ public:
+ set_options ();
+
+ set_options (int& argc,
+ char** argv,
+ bool erase = false,
+ ::build2::script::cli::unknown_mode option = ::build2::script::cli::unknown_mode::fail,
+ ::build2::script::cli::unknown_mode argument = ::build2::script::cli::unknown_mode::stop);
+
+ set_options (int start,
+ int& argc,
+ char** argv,
+ bool erase = false,
+ ::build2::script::cli::unknown_mode option = ::build2::script::cli::unknown_mode::fail,
+ ::build2::script::cli::unknown_mode argument = ::build2::script::cli::unknown_mode::stop);
+
+ set_options (int& argc,
+ char** argv,
+ int& end,
+ bool erase = false,
+ ::build2::script::cli::unknown_mode option = ::build2::script::cli::unknown_mode::fail,
+ ::build2::script::cli::unknown_mode argument = ::build2::script::cli::unknown_mode::stop);
+
+ set_options (int start,
+ int& argc,
+ char** argv,
+ int& end,
+ bool erase = false,
+ ::build2::script::cli::unknown_mode option = ::build2::script::cli::unknown_mode::fail,
+ ::build2::script::cli::unknown_mode argument = ::build2::script::cli::unknown_mode::stop);
+
+ set_options (::build2::script::cli::scanner&,
+ ::build2::script::cli::unknown_mode option = ::build2::script::cli::unknown_mode::fail,
+ ::build2::script::cli::unknown_mode argument = ::build2::script::cli::unknown_mode::stop);
+
+ // Option accessors.
+ //
+ const bool&
+ exact () const;
+
+ const bool&
+ newline () const;
+
+ const bool&
+ whitespace () const;
+
+ // Implementation details.
+ //
+ protected:
+ bool
+ _parse (const char*, ::build2::script::cli::scanner&);
+
+ private:
+ bool
+ _parse (::build2::script::cli::scanner&,
+ ::build2::script::cli::unknown_mode option,
+ ::build2::script::cli::unknown_mode argument);
+
+ public:
+ bool exact_;
+ bool newline_;
+ bool whitespace_;
+ };
+ }
+}
+
+#include <libbuild2/script/builtin-options.ixx>
+
+// Begin epilogue.
+//
+//
+// End epilogue.
+
+#endif // LIBBUILD2_SCRIPT_BUILTIN_OPTIONS_HXX
diff --git a/libbuild2/script/builtin-options.ixx b/libbuild2/script/builtin-options.ixx
new file mode 100644
index 0000000..dc59f98
--- /dev/null
+++ b/libbuild2/script/builtin-options.ixx
@@ -0,0 +1,182 @@
+// -*- C++ -*-
+//
+// This file was generated by CLI, a command line interface
+// compiler for C++.
+//
+
+// Begin prologue.
+//
+//
+// End prologue.
+
+#include <cassert>
+
+namespace build2
+{
+ namespace script
+ {
+ namespace cli
+ {
+ // unknown_mode
+ //
+ inline unknown_mode::
+ unknown_mode (value v)
+ : v_ (v)
+ {
+ }
+
+ // exception
+ //
+ inline ::std::ostream&
+ operator<< (::std::ostream& os, const exception& e)
+ {
+ e.print (os);
+ return os;
+ }
+
+ // unknown_option
+ //
+ inline unknown_option::
+ unknown_option (const std::string& option)
+ : option_ (option)
+ {
+ }
+
+ inline const std::string& unknown_option::
+ option () const
+ {
+ return option_;
+ }
+
+ // unknown_argument
+ //
+ inline unknown_argument::
+ unknown_argument (const std::string& argument)
+ : argument_ (argument)
+ {
+ }
+
+ inline const std::string& unknown_argument::
+ argument () const
+ {
+ return argument_;
+ }
+
+ // missing_value
+ //
+ inline missing_value::
+ missing_value (const std::string& option)
+ : option_ (option)
+ {
+ }
+
+ inline const std::string& missing_value::
+ option () const
+ {
+ return option_;
+ }
+
+ // invalid_value
+ //
+ inline invalid_value::
+ invalid_value (const std::string& option,
+ const std::string& value,
+ const std::string& message)
+ : option_ (option),
+ value_ (value),
+ message_ (message)
+ {
+ }
+
+ inline const std::string& invalid_value::
+ option () const
+ {
+ return option_;
+ }
+
+ inline const std::string& invalid_value::
+ value () const
+ {
+ return value_;
+ }
+
+ inline const std::string& invalid_value::
+ message () const
+ {
+ return message_;
+ }
+
+ // argv_scanner
+ //
+ inline argv_scanner::
+ argv_scanner (int& argc, char** argv, bool erase)
+ : i_ (1), argc_ (argc), argv_ (argv), erase_ (erase)
+ {
+ }
+
+ inline argv_scanner::
+ argv_scanner (int start, int& argc, char** argv, bool erase)
+ : i_ (start), argc_ (argc), argv_ (argv), erase_ (erase)
+ {
+ }
+
+ inline int argv_scanner::
+ end () const
+ {
+ return i_;
+ }
+
+ // vector_scanner
+ //
+ inline vector_scanner::
+ vector_scanner (const std::vector<std::string>& v, std::size_t i)
+ : v_ (v), i_ (i)
+ {
+ }
+
+ inline std::size_t vector_scanner::
+ end () const
+ {
+ return i_;
+ }
+
+ inline void vector_scanner::
+ reset (std::size_t i)
+ {
+ i_ = i;
+ }
+ }
+ }
+}
+
+namespace build2
+{
+ namespace script
+ {
+ // set_options
+ //
+
+ inline const bool& set_options::
+ exact () const
+ {
+ return this->exact_;
+ }
+
+ inline const bool& set_options::
+ newline () const
+ {
+ return this->newline_;
+ }
+
+ inline const bool& set_options::
+ whitespace () const
+ {
+ return this->whitespace_;
+ }
+ }
+}
+
+// Begin epilogue.
+//
+//
+// End epilogue.
diff --git a/libbuild2/test/script/builtin.cli b/libbuild2/script/builtin.cli
index 42b26d2..68db23e 100644
--- a/libbuild2/test/script/builtin.cli
+++ b/libbuild2/script/builtin.cli
@@ -1,4 +1,4 @@
-// file : libbuild2/test/script/builtin.cli
+// file : libbuild2/script/builtin.cli
// license : MIT; see accompanying LICENSE file
// Note that options in this file are undocumented because we generate neither
@@ -7,19 +7,15 @@
//
namespace build2
{
- namespace test
+ namespace script
{
- namespace script
+ // Pseudo-builtin options.
+ //
+ class set_options
{
- // Pseudo-builtin options.
- //
-
- class set_options
- {
- bool --exact|-e;
- bool --newline|-n;
- bool --whitespace|-w;
- };
- }
+ bool --exact|-e;
+ bool --newline|-n;
+ bool --whitespace|-w;
+ };
}
}
diff --git a/libbuild2/test/script/lexer+command-expansion.test.testscript b/libbuild2/script/lexer+command-expansion.test.testscript
index 2cb6587..f4d69d2 100644
--- a/libbuild2/test/script/lexer+command-expansion.test.testscript
+++ b/libbuild2/script/lexer+command-expansion.test.testscript
@@ -1,4 +1,4 @@
-# file : libbuild2/test/script/lexer+command-expansion.test.testscript
+# file : libbuild2/script/lexer+command-expansion.test.testscript
# license : MIT; see accompanying LICENSE file
test.arguments = command-expansion
@@ -113,17 +113,37 @@ test.arguments = command-expansion
{
: newline
:
- $* <:"0<a b" >>EOO
+ $* <:"0<<<=a b" >>EOO
'0'
- <
+ <<<=
'a b'
EOO
: no-newline
:
- $* <:"0<:a b" >>EOO
+ $* <:"0<<<=:a b" >>EOO
'0'
- <:
+ <<<=:
+ 'a b'
+ EOO
+ }
+
+ : in-alias
+ :
+ {
+ : newline
+ :
+ $* <:"0<<<a b" >>EOO
+ '0'
+ <<<
+ 'a b'
+ EOO
+
+ : no-newline
+ :
+ $* <:"0<<<:a b" >>EOO
+ '0'
+ <<<:
'a b'
EOO
}
@@ -133,17 +153,17 @@ test.arguments = command-expansion
{
: newline
:
- $* <:"1>a b" >>EOO
+ $* <:"1>>>?a b" >>EOO
'1'
- >
+ >>>?
'a b'
EOO
: no-newline
:
- $* <:"1>:a b" >>EOO
+ $* <:"1>>>?:a b" >>EOO
'1'
- >:
+ >>>?:
'a b'
EOO
}
@@ -157,6 +177,26 @@ test.arguments = command-expansion
{
: newline
:
+ $* <:"0<<=E O I" >>EOO
+ '0'
+ <<=
+ 'E O I'
+ EOO
+
+ : no-newline
+ :
+ $* <:"0<<=:E O I" >>EOO
+ '0'
+ <<=:
+ 'E O I'
+ EOO
+ }
+
+ : in-alias
+ :
+ {
+ : newline
+ :
$* <:"0<<E O I" >>EOO
'0'
<<
@@ -177,17 +217,17 @@ test.arguments = command-expansion
{
: newline
:
- $* <:"1>>E O O" >>EOO
+ $* <:"1>>?E O O" >>EOO
'1'
- >>
+ >>?
'E O O'
EOO
: no-newline
:
- $* <:"1>>:E O O" >>EOO
+ $* <:"1>>?:E O O" >>EOO
'1'
- >>:
+ >>?:
'E O O'
EOO
}
@@ -198,9 +238,17 @@ test.arguments = command-expansion
{
: in
:
- $* <:"0<<<a b" >>EOO
+ $* <:"0<=a b" >>EOO
'0'
- <<<
+ <=
+ 'a b'
+ EOO
+
+ : in-alias
+ :
+ $* <:"0<a b" >>EOO
+ '0'
+ <
'a b'
EOO
@@ -212,6 +260,14 @@ test.arguments = command-expansion
'a b'
EOO
+ : out-alias
+ :
+ $* <:"1>a b" >>EOO
+ '1'
+ >
+ 'a b'
+ EOO
+
: out-app
:
$* <:"1>+a b" >>EOO
@@ -219,8 +275,26 @@ test.arguments = command-expansion
>+
'a b'
EOO
+
+ : out-app-alias
+ :
+ $* <:"1>>a b" >>EOO
+ '1'
+ >>
+ 'a b'
+ EOO
}
+: no-out-alias
+:
+$* <:"1>>>a b" >>EOO
+'1'
+>>
+>
+'a b'
+EOO
+
+
: cleanup
:
{
diff --git a/libbuild2/script/lexer.cxx b/libbuild2/script/lexer.cxx
new file mode 100644
index 0000000..d78e999
--- /dev/null
+++ b/libbuild2/script/lexer.cxx
@@ -0,0 +1,431 @@
+// file : libbuild2/script/lexer.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/script/lexer.hxx>
+
+#include <cstring> // strchr()
+
+using namespace std;
+
+namespace build2
+{
+ namespace script
+ {
+ using type = token_type;
+
+ void lexer::
+ mode (base_mode m, char ps, optional<const char*> esc, uintptr_t data)
+ {
+ bool a (false); // attributes
+
+ const char* s1 (nullptr);
+ const char* s2 (nullptr);
+
+ bool s (true); // space
+ bool n (true); // newline
+ bool q (true); // quotes
+
+ if (!esc)
+ {
+ assert (!state_.empty ());
+ esc = state_.top ().escapes;
+ }
+
+ switch (m)
+ {
+ case lexer_mode::command_expansion:
+ {
+ // Note that whitespaces are not word separators in this mode.
+ //
+ s1 = "|&<>";
+ s2 = " ";
+ s = false;
+ break;
+ }
+ case lexer_mode::here_line_single:
+ {
+ // This one is like a single-quoted string except it treats
+ // newlines as a separator. We also treat quotes as literals.
+ //
+ // Note that it might be tempting to enable line continuation
+ // escapes. However, we will then have to also enable escaping of
+ // the backslash, which makes it a lot less tempting.
+ //
+ s1 = "\n";
+ s2 = " ";
+ esc = ""; // Disable escape sequences.
+ s = false;
+ q = false;
+ break;
+ }
+ case lexer_mode::here_line_double:
+ {
+ // This one is like a double-quoted string except it treats
+ // newlines as a separator. We also treat quotes as literals.
+ //
+ s1 = "$(\n";
+ s2 = " ";
+ s = false;
+ q = false;
+ break;
+ }
+ default:
+ {
+ // Make sure pair separators are only enabled where we expect
+ // them.
+ //
+ // @@ Should we disable pair separators in the eval mode?
+ //
+ assert (ps == '\0' ||
+ m == lexer_mode::eval ||
+ m == lexer_mode::attribute_value);
+
+ base_lexer::mode (m, ps, esc);
+ return;
+ }
+ }
+
+ assert (ps == '\0');
+ state_.push (state {m, data, nullopt, a, ps, s, n, q, *esc, s1, s2});
+ }
+
+ token lexer::
+ next ()
+ {
+ token r;
+
+ switch (state_.top ().mode)
+ {
+ case lexer_mode::command_expansion:
+ case lexer_mode::here_line_single:
+ case lexer_mode::here_line_double:
+ r = next_line ();
+ break;
+ default:
+ r = base_lexer::next ();
+ break;
+ }
+
+ if (r.qtype != quote_type::unquoted)
+ ++quoted_;
+
+ return r;
+ }
+
+ token lexer::
+ next_line ()
+ {
+ bool sep (skip_spaces ().first);
+
+ xchar c (get ());
+ uint64_t ln (c.line), cn (c.column);
+
+ const state& st (state_.top ());
+ lexer_mode m (st.mode);
+
+ auto make_token = [&sep, &m, ln, cn] (type t)
+ {
+ bool q (m == lexer_mode::here_line_double);
+
+ return token (t, string (), sep,
+ (q ? quote_type::double_ : quote_type::unquoted), q,
+ ln, cn,
+ token_printer);
+ };
+
+ if (eos (c))
+ return make_token (type::eos);
+
+ // NOTE: remember to update mode() if adding new special characters.
+
+ if (m != lexer_mode::command_expansion)
+ {
+ switch (c)
+ {
+ case '\n':
+ {
+ sep = true; // Treat newline as always separated.
+ return make_token (type::newline);
+ }
+ }
+ }
+
+ if (m != lexer_mode::here_line_single)
+ {
+ switch (c)
+ {
+ // Variable expansion, function call, and evaluation context.
+ //
+ case '$': return make_token (type::dollar);
+ case '(': return make_token (type::lparen);
+ }
+ }
+
+ // Command operators.
+ //
+ if (m == lexer_mode::command_expansion)
+ {
+ if (optional<token> t = next_cmd_op (c, sep))
+ return move (*t);
+ }
+
+ // Otherwise it is a word.
+ //
+ unget (c);
+ return word (st, sep);
+ }
+
+ optional<token> lexer::
+ next_cmd_op (const xchar& c, bool sep)
+ {
+ auto make_token = [&sep, &c] (type t, string v = string ())
+ {
+ return token (t, move (v), sep,
+ quote_type::unquoted, false,
+ c.line, c.column,
+ token_printer);
+ };
+
+ auto make_token_with_modifiers =
+ [&make_token, this] (type t,
+ const char* mods, // To recorgnize.
+ const char* stop = nullptr) // To stop after.
+ {
+ string v;
+ if (mods != nullptr)
+ {
+ for (xchar p (peek ());
+ (strchr (mods, p) != nullptr && // Modifier.
+ strchr (v.c_str (), p) == nullptr); // Not already seen.
+ p = peek ())
+ {
+ get ();
+ v += p;
+
+ if (stop != nullptr && strchr (stop, p) != nullptr)
+ break;
+ }
+ }
+
+ return make_token (t, move (v));
+ };
+
+ switch (c)
+ {
+ // |, ||
+ //
+ case '|':
+ {
+ if (peek () == '|')
+ {
+ get ();
+ return make_token (type::log_or);
+ }
+ else
+ return make_token (type::pipe);
+ }
+ // &, &&
+ //
+ case '&':
+ {
+ xchar p (peek ());
+
+ if (p == '&')
+ {
+ get ();
+ return make_token (type::log_and);
+ }
+
+ // These modifiers are mutually exclusive so stop after seeing
+ // either one.
+ //
+ return make_token_with_modifiers (type::clean, "!?", "!?");
+ }
+ // <
+ //
+ case '<':
+ {
+ optional<type> r;
+ xchar p (peek ());
+
+ if (p == '|' || p == '-' || p == '=' || p == '<') // <| <- <= <<
+ {
+ xchar c (get ());
+
+ switch (p)
+ {
+ case '|': return make_token (type::in_pass); // <|
+ case '-': return make_token (type::in_null); // <-
+ case '=': return make_token (type::in_file); // <=
+ case '<': // <<
+ {
+ p = peek ();
+
+ if (p == '=' || p == '<') // <<= <<<
+ {
+ xchar c (get ());
+
+ switch (p)
+ {
+ case '=':
+ {
+ r = type::in_doc; // <<=
+ break;
+ }
+ case '<':
+ {
+ p = peek ();
+
+ if (p == '=')
+ {
+ get ();
+ r = type::in_str; // <<<=
+ }
+
+ if (!r && redirect_aliases.lll)
+ r = type::in_lll; // <<<
+
+ // We can still end up with the << or < redirect alias,
+ // if any of them is present.
+ //
+ if (!r)
+ unget (c);
+ }
+
+ break;
+ }
+ }
+
+ if (!r && redirect_aliases.ll)
+ r = type::in_ll; // <<
+
+ // We can still end up with the < redirect alias, if it is
+ // present.
+ //
+ if (!r)
+ unget (c);
+
+ break;
+ }
+ }
+ }
+
+ if (!r && redirect_aliases.l)
+ r = type::in_l; // <
+
+ if (!r)
+ return nullopt;
+
+ // Handle modifiers.
+ //
+ const char* mods (nullptr);
+
+ switch (redirect_aliases.resolve (*r))
+ {
+ case type::in_str:
+ case type::in_doc: mods = ":/"; break;
+ }
+
+ token t (make_token_with_modifiers (*r, mods));
+
+ return t;
+ }
+ // >
+ //
+ case '>':
+ {
+ optional<type> r;
+ xchar p (peek ());
+
+ if (p == '|' || p == '-' || p == '!' || p == '&' || // >| >- >! >&
+ p == '=' || p == '+' || p == '?' || p == '>') // >= >+ >? >>
+ {
+ xchar c (get ());
+
+ switch (p)
+ {
+ case '|': return make_token (type::out_pass); // >|
+ case '-': return make_token (type::out_null); // >-
+ case '!': return make_token (type::out_trace); // >!
+ case '&': return make_token (type::out_merge); // >&
+ case '=': return make_token (type::out_file_ovr); // >=
+ case '+': return make_token (type::out_file_app); // >+
+ case '?': return make_token (type::out_file_cmp); // >?
+ case '>': // >>
+ {
+ p = peek ();
+
+ if (p == '?' || p == '>') // >>? >>>
+ {
+ xchar c (get ());
+
+ switch (p)
+ {
+ case '?':
+ {
+ r = type::out_doc; // >>?
+ break;
+ }
+ case '>':
+ {
+ p = peek ();
+
+ if (p == '?')
+ {
+ get ();
+ r = type::out_str; // >>>?
+ }
+
+ if (!r && redirect_aliases.ggg)
+ r = type::out_ggg; // >>>
+
+ // We can still end up with the >> or > redirect alias,
+ // if any of themis present.
+ //
+ if (!r)
+ unget (c);
+ }
+
+ break;
+ }
+ }
+
+ if (!r && redirect_aliases.gg)
+ r = type::out_gg; // >>
+
+ // We can still end up with the > redirect alias, if it is
+ // present.
+ //
+ if (!r)
+ unget (c);
+
+ break;
+ }
+ }
+ }
+
+ if (!r && redirect_aliases.g)
+ r = type::out_g; // >
+
+ if (!r)
+ return nullopt;
+
+ // Handle modifiers.
+ //
+ const char* mods (nullptr);
+ const char* stop (nullptr);
+
+ switch (redirect_aliases.resolve (*r))
+ {
+ case type::out_str:
+ case type::out_doc: mods = ":/~"; stop = "~"; break;
+ }
+
+ return make_token_with_modifiers (*r, mods, stop);
+ }
+ }
+
+ return nullopt;
+ }
+ }
+}
diff --git a/libbuild2/script/lexer.hxx b/libbuild2/script/lexer.hxx
new file mode 100644
index 0000000..dbfdfcc
--- /dev/null
+++ b/libbuild2/script/lexer.hxx
@@ -0,0 +1,139 @@
+// file : libbuild2/script/lexer.hxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_SCRIPT_LEXER_HXX
+#define LIBBUILD2_SCRIPT_LEXER_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/lexer.hxx>
+
+#include <libbuild2/script/token.hxx>
+
+namespace build2
+{
+ namespace script
+ {
+ struct lexer_mode: build2::lexer_mode
+ {
+ using base_type = build2::lexer_mode;
+
+ enum
+ {
+ command_expansion = base_type::value_next,
+ here_line_single,
+ here_line_double,
+
+ value_next
+ };
+
+ lexer_mode () = default;
+ lexer_mode (value_type v): base_type (v) {}
+ lexer_mode (base_type v): base_type (v) {}
+ };
+
+ // Actual redirects (as tokens) for the the <, <<, <<<, and >, >>, >>>
+ // aliases.
+ //
+ struct redirect_aliases
+ {
+ optional<token_type> l; // <
+ optional<token_type> ll; // <<
+ optional<token_type> lll; // <<<
+ optional<token_type> g; // >
+ optional<token_type> gg; // >>
+ optional<token_type> ggg; // >>>
+
+ // If the token type is a redirect alias then return the token type it
+ // resolves to and the passed token type otherwise. Note that it's the
+ // caller's responsibility to make sure that the corresponding alias is
+ // present (normally by not recognizing absent aliases as tokens).
+ //
+ token_type
+ resolve (token_type t) const noexcept
+ {
+ switch (t)
+ {
+ case token_type::in_l: assert (l); return *l;
+ case token_type::in_ll: assert (ll); return *ll;
+ case token_type::in_lll: assert (lll); return *lll;
+ case token_type::out_g: assert (g); return *g;
+ case token_type::out_gg: assert (gg); return *gg;
+ case token_type::out_ggg: assert (ggg); return *ggg;
+ }
+
+ return t;
+ }
+ };
+
+ class lexer: public build2::lexer
+ {
+ public:
+ using base_lexer = build2::lexer;
+ using base_mode = build2::lexer_mode;
+
+ using redirect_aliases_type = script::redirect_aliases;
+
+ // Note that none of the name, redirect aliases, and escape arguments
+ // are copied.
+ //
+ lexer (istream& is,
+ const path_name& name,
+ lexer_mode m,
+ const redirect_aliases_type& ra,
+ const char* escapes = nullptr)
+ : base_lexer (is, name, 1 /* line */,
+ nullptr /* escapes */,
+ false /* set_mode */),
+ redirect_aliases (ra)
+ {
+ mode (m, '\0', escapes);
+ }
+
+ virtual void
+ mode (base_mode,
+ char = '\0',
+ optional<const char*> = nullopt,
+ uintptr_t = 0) override;
+
+ // Number of quoted (double or single) tokens since last reset.
+ //
+ size_t
+ quoted () const {return quoted_;}
+
+ void
+ reset_quoted (size_t q) {quoted_ = q;}
+
+ virtual token
+ next () override;
+
+ public:
+ const redirect_aliases_type& redirect_aliases;
+
+ protected:
+ lexer (istream& is, const path_name& name, uint64_t line,
+ const char* escapes,
+ bool set_mode,
+ const redirect_aliases_type& ra)
+ : base_lexer (is, name, line, escapes, set_mode),
+ redirect_aliases (ra) {}
+
+ // Return the next token if it is a command operator (|, ||, &&,
+ // redirect, or cleanup) and nullopt otherwise.
+ //
+ optional<token>
+ next_cmd_op (const xchar&, // The token first character (last got char).
+ bool sep); // The token is separated.
+
+ private:
+ token
+ next_line ();
+
+ protected:
+ size_t quoted_;
+ };
+ }
+}
+
+#endif // LIBBUILD2_SCRIPT_LEXER_HXX
diff --git a/libbuild2/script/lexer.test.cxx b/libbuild2/script/lexer.test.cxx
new file mode 100644
index 0000000..b8de241
--- /dev/null
+++ b/libbuild2/script/lexer.test.cxx
@@ -0,0 +1,76 @@
+// file : libbuild2/script/lexer.test.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <cassert>
+#include <iostream>
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/script/token.hxx>
+#include <libbuild2/script/lexer.hxx>
+
+using namespace std;
+
+namespace build2
+{
+ namespace script
+ {
+ // Usage: argv[0] <lexer-mode>
+ //
+ int
+ main (int argc, char* argv[])
+ {
+ lexer_mode m;
+ {
+ assert (argc == 2);
+ string s (argv[1]);
+
+ if (s == "command-expansion") m = lexer_mode::command_expansion;
+ else if (s == "here-line-single") m = lexer_mode::here_line_single;
+ else if (s == "here-line-double") m = lexer_mode::here_line_double;
+ else assert (false);
+ }
+
+ try
+ {
+ cin.exceptions (istream::failbit | istream::badbit);
+
+ path_name in ("<stdin>");
+
+ using type = token_type;
+
+ redirect_aliases ra {type (type::in_file),
+ type (type::in_doc),
+ type (type::in_str),
+ type (type::out_file_ovr),
+ type (type::out_file_app),
+ nullopt};
+
+ lexer l (cin, in, m, ra);
+
+ // No use printing eos since we will either get it or loop forever.
+ //
+ for (token t (l.next ()); t.type != token_type::eos; t = l.next ())
+ {
+ // Print each token on a separate line without quoting operators.
+ //
+ t.printer (cout, t, print_mode::normal);
+ cout << endl;
+ }
+ }
+ catch (const failed&)
+ {
+ return 1;
+ }
+
+ return 0;
+ }
+ }
+}
+
+int
+main (int argc, char* argv[])
+{
+ return build2::script::main (argc, argv);
+}
diff --git a/libbuild2/script/parser.cxx b/libbuild2/script/parser.cxx
new file mode 100644
index 0000000..aa60111
--- /dev/null
+++ b/libbuild2/script/parser.cxx
@@ -0,0 +1,2015 @@
+// file : libbuild2/script/parser.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/script/parser.hxx>
+
+#include <libbuild2/variable.hxx>
+#include <libbuild2/script/run.hxx> // exit
+#include <libbuild2/script/lexer.hxx>
+
+using namespace std;
+
+namespace build2
+{
+ namespace script
+ {
+ using type = token_type;
+
+ value parser::
+ parse_variable_line (token& t, type& tt)
+ {
+ // enter: assignment
+ // leave: newline or unknown token
+
+ next_with_attributes (t, tt);
+
+ // Parse value attributes if any. Note that it's ok not to have
+ // anything after the attributes (e.g., foo=[null]).
+ //
+ attributes_push (t, tt, true);
+
+ // @@ PAT: Should we expand patterns? Note that it will only be
+ // simple ones since we have disabled {}. Also, what would be the
+ // pattern base directory?
+ //
+ return tt != type::newline && start_names (tt)
+ ? parse_value (t, tt,
+ pattern_mode::ignore,
+ "variable value",
+ nullptr)
+ : value (names ());
+ }
+
+ // Parse the regular expression representation (non-empty string value
+ // framed with introducer characters and optionally followed by flag
+ // characters from the {di} set, for example '/foo/id') into
+ // components. Also return end-of-parsing position if requested,
+ // otherwise treat any unparsed characters left as an error.
+ //
+ struct regex_parts
+ {
+ string value;
+ char intro;
+ string flags; // Combination of characters from {di} set.
+
+ // Create a special empty object.
+ //
+ regex_parts (): intro ('\0') {}
+
+ regex_parts (string v, char i, string f)
+ : value (move (v)), intro (i), flags (move (f)) {}
+ };
+
+ static regex_parts
+ parse_regex (const string& s,
+ const location& l,
+ const char* what,
+ size_t* end = nullptr)
+ {
+ if (s.empty ())
+ fail (l) << "no introducer character in " << what;
+
+ size_t p (s.find (s[0], 1)); // Find terminating introducer.
+
+ if (p == string::npos)
+ fail (l) << "no closing introducer character in " << what;
+
+ size_t rn (p - 1); // Regex length.
+ if (rn == 0)
+ fail (l) << what << " is empty";
+
+ // Find end-of-flags position.
+ //
+ size_t fp (++p); // Save flags starting position.
+ for (char c; (c = s[p]) == 'd' || c == 'i'; ++p) ;
+
+ // If string end is not reached then report invalid flags, unless
+ // end-of-parsing position is requested (which means regex is just a
+ // prefix).
+ //
+ if (s[p] != '\0' && end == nullptr)
+ fail (l) << "junk at the end of " << what;
+
+ if (end != nullptr)
+ *end = p;
+
+ return regex_parts (string (s, 1, rn), s[0], string (s, fp, p - fp));
+ }
+
+ pair<command_expr, parser::here_docs> parser::
+ parse_command_expr (token& t, type& tt,
+ const redirect_aliases& ra)
+ {
+ // enter: first token of the command line
+ // leave: <newline> or unknown token
+
+ command_expr expr;
+
+ // OR-ed to an implied false for the first term.
+ //
+ expr.push_back ({expr_operator::log_or, command_pipe ()});
+
+ command c; // Command being assembled.
+
+ // Make sure the command makes sense.
+ //
+ auto check_command = [&c, this] (const location& l, bool last)
+ {
+ if (c.out && c.out->type == redirect_type::merge &&
+ c.err && c.err->type == redirect_type::merge)
+ fail (l) << "stdout and stderr redirected to each other";
+
+ if (!last && c.out)
+ fail (l) << "stdout is both redirected and piped";
+ };
+
+ // Check that the introducer character differs from '/' if the
+ // portable path modifier is specified. Must be called before
+ // parse_regex() (see below) to make sure its diagnostics is
+ // meaningful.
+ //
+ // Note that the portable path modifier assumes '/' to be a valid
+ // regex character and so makes it indistinguishable from the
+ // terminating introducer.
+ //
+ auto check_regex_mod = [this] (const string& mod,
+ const string& re,
+ const location& l,
+ const char* what)
+ {
+ // Handles empty regex properly.
+ //
+ if (mod.find ('/') != string::npos && re[0] == '/')
+ fail (l) << "portable path modifier and '/' introducer in "
+ << what;
+ };
+
+ // Pending positions where the next word should go.
+ //
+ enum class pending
+ {
+ none,
+ program,
+ in_string,
+ in_document,
+ in_file,
+ out_merge,
+ out_string,
+ out_str_regex,
+ out_document,
+ out_doc_regex,
+ out_file,
+ err_merge,
+ err_string,
+ err_str_regex,
+ err_document,
+ err_doc_regex,
+ err_file,
+ clean
+ };
+ pending p (pending::program);
+ string mod; // Modifiers for pending in_* and out_* positions.
+ here_docs hd; // Expected here-documents.
+
+ // Add the next word to either one of the pending positions or to
+ // program arguments by default.
+ //
+ auto add_word = [&c, &p, &mod, &check_regex_mod, this] (
+ string&& w, const location& l)
+ {
+ auto add_merge = [&l, this] (optional<redirect>& r,
+ const string& w,
+ int fd)
+ {
+ assert (r); // Must already be present.
+
+ try
+ {
+ size_t n;
+ if (stoi (w, &n) == fd && n == w.size ())
+ {
+ r->fd = fd;
+ return;
+ }
+ }
+ catch (const exception&) {} // Fall through.
+
+ fail (l) << (fd == 1 ? "stderr" : "stdout") << " merge redirect "
+ << "file descriptor must be " << fd;
+ };
+
+ auto add_here_str = [] (optional<redirect>& r, string&& w)
+ {
+ assert (r); // Must already be present.
+
+ if (r->modifiers ().find (':') == string::npos)
+ w += '\n';
+ r->str = move (w);
+ };
+
+ auto add_here_str_regex = [&l, &check_regex_mod] (
+ optional<redirect>& r, int fd, string&& w)
+ {
+ assert (r); // Must already be present.
+
+ const char* what (nullptr);
+ switch (fd)
+ {
+ case 1: what = "stdout regex redirect"; break;
+ case 2: what = "stderr regex redirect"; break;
+ }
+
+ check_regex_mod (r->modifiers (), w, l, what);
+
+ regex_parts rp (parse_regex (w, l, what));
+
+ regex_lines& re (r->regex);
+ re.intro = rp.intro;
+
+ re.lines.emplace_back (
+ l.line, l.column, move (rp.value), move (rp.flags));
+
+ // Add final blank line unless suppressed.
+ //
+ // Note that the position is synthetic, but that's ok as we don't
+ // expect any diagnostics to refer this line.
+ //
+ if (r->modifiers ().find (':') == string::npos)
+ re.lines.emplace_back (l.line, l.column, string (), false);
+ };
+
+ auto parse_path = [&l, this] (string&& w, const char* what) -> path
+ {
+ try
+ {
+ path p (move (w));
+
+ if (!p.empty ())
+ {
+ p.normalize ();
+ return p;
+ }
+
+ fail (l) << "empty " << what << endf;
+ }
+ catch (const invalid_path& e)
+ {
+ fail (l) << "invalid " << what << " '" << e.path << "'" << endf;
+ }
+ };
+
+ auto add_file = [&parse_path] (optional<redirect>& r,
+ int fd,
+ string&& w)
+ {
+ assert (r); // Must already be present.
+
+ const char* what (nullptr);
+ switch (fd)
+ {
+ case 0: what = "stdin redirect path"; break;
+ case 1: what = "stdout redirect path"; break;
+ case 2: what = "stderr redirect path"; break;
+ }
+
+ r->file.path = parse_path (move (w), what);
+ };
+
+ switch (p)
+ {
+ case pending::none: c.arguments.push_back (move (w)); break;
+ case pending::program:
+ c.program = parse_path (move (w), "program path");
+ break;
+
+ case pending::out_merge: add_merge (c.out, w, 2); break;
+ case pending::err_merge: add_merge (c.err, w, 1); break;
+
+ case pending::in_string: add_here_str (c.in, move (w)); break;
+ case pending::out_string: add_here_str (c.out, move (w)); break;
+ case pending::err_string: add_here_str (c.err, move (w)); break;
+
+ case pending::out_str_regex:
+ {
+ add_here_str_regex (c.out, 1, move (w));
+ break;
+ }
+ case pending::err_str_regex:
+ {
+ add_here_str_regex (c.err, 2, move (w));
+ break;
+ }
+
+ // These are handled specially below.
+ //
+ case pending::in_document:
+ case pending::out_document:
+ case pending::err_document:
+ case pending::out_doc_regex:
+ case pending::err_doc_regex: assert (false); break;
+
+ case pending::in_file: add_file (c.in, 0, move (w)); break;
+ case pending::out_file: add_file (c.out, 1, move (w)); break;
+ case pending::err_file: add_file (c.err, 2, move (w)); break;
+
+ case pending::clean:
+ {
+ cleanup_type t;
+ switch (mod[0]) // Ok, if empty
+ {
+ case '!': t = cleanup_type::never; break;
+ case '?': t = cleanup_type::maybe; break;
+ default: t = cleanup_type::always; break;
+ }
+
+ c.cleanups.push_back (
+ {t, parse_path (move (w), "cleanup path")});
+ break;
+ }
+ }
+
+ p = pending::none;
+ mod.clear ();
+ };
+
+ // Make sure we don't have any pending positions to fill.
+ //
+ auto check_pending = [&p, this] (const location& l)
+ {
+ const char* what (nullptr);
+
+ switch (p)
+ {
+ case pending::none: break;
+ case pending::program: what = "program"; break;
+ case pending::in_string: what = "stdin here-string"; break;
+ case pending::in_document: what = "stdin here-document end"; break;
+ case pending::in_file: what = "stdin file"; break;
+ case pending::out_merge: what = "stdout file descriptor"; break;
+ case pending::out_string: what = "stdout here-string"; break;
+ case pending::out_document: what = "stdout here-document end"; break;
+ case pending::out_file: what = "stdout file"; break;
+ case pending::err_merge: what = "stderr file descriptor"; break;
+ case pending::err_string: what = "stderr here-string"; break;
+ case pending::err_document: what = "stderr here-document end"; break;
+ case pending::err_file: what = "stderr file"; break;
+ case pending::clean: what = "cleanup path"; break;
+
+ case pending::out_str_regex:
+ {
+ what = "stdout here-string regex";
+ break;
+ }
+ case pending::err_str_regex:
+ {
+ what = "stderr here-string regex";
+ break;
+ }
+ case pending::out_doc_regex:
+ {
+ what = "stdout here-document regex end";
+ break;
+ }
+ case pending::err_doc_regex:
+ {
+ what = "stderr here-document regex end";
+ break;
+ }
+ }
+
+ if (what != nullptr)
+ fail (l) << "missing " << what;
+ };
+
+ // Parse the redirect operator.
+ //
+ // If the token type is the redirect alias then tt must contain the type
+ // the alias resolves to and the token type otherwise. Note that this
+ // argument defines the redirect semantics. Also note that the token is
+ // saved into the redirect to keep the modifiers and the original
+ // representation.
+ //
+ auto parse_redirect = [&c, &expr, &p, &mod, &hd, this]
+ (token&& t, type tt, const location& l)
+ {
+ // The redirect alias token type must be resolved.
+ //
+ assert (tt != type::in_l &&
+ tt != type::in_ll &&
+ tt != type::in_lll &&
+ tt != type::out_g &&
+ tt != type::out_gg &&
+ tt != type::out_ggg);
+
+ // Our semantics is the last redirect seen takes effect.
+ //
+ assert (p == pending::none && mod.empty ());
+
+ // See if we have the file descriptor.
+ //
+ unsigned long fd (3);
+ if (!t.separated)
+ {
+ if (c.arguments.empty ())
+ fail (l) << "missing redirect file descriptor";
+
+ const string& s (c.arguments.back ());
+
+ try
+ {
+ size_t n;
+ fd = stoul (s, &n);
+
+ if (n != s.size () || fd > 2)
+ throw invalid_argument (string ());
+ }
+ catch (const exception&)
+ {
+ fail (l) << "invalid redirect file descriptor '" << s << "'";
+ }
+
+ c.arguments.pop_back ();
+ }
+
+ // Validate/set default file descriptor.
+ //
+ switch (tt)
+ {
+ case type::in_pass:
+ case type::in_null:
+ case type::in_str:
+ case type::in_doc:
+ case type::in_file:
+ {
+ if ((fd = fd == 3 ? 0 : fd) != 0)
+ fail (l) << "invalid in redirect file descriptor " << fd;
+
+ if (!expr.back ().pipe.empty ())
+ fail (l) << "stdin is both piped and redirected";
+
+ break;
+ }
+ case type::out_pass:
+ case type::out_null:
+ case type::out_trace:
+ case type::out_merge:
+ case type::out_str:
+ case type::out_doc:
+ case type::out_file_cmp:
+ case type::out_file_ovr:
+ case type::out_file_app:
+ {
+ if ((fd = fd == 3 ? 1 : fd) == 0)
+ fail (l) << "invalid out redirect file descriptor " << fd;
+
+ break;
+ }
+ }
+
+ // Don't move as we will save the token into the redirect object.
+ //
+ mod = t.value;
+
+ // Handle the none redirect (no data allowed) in the switch construct
+ // if/when the respective syntax is invented.
+ //
+ redirect_type rt (redirect_type::none);
+ switch (tt)
+ {
+ case type::in_pass:
+ case type::out_pass: rt = redirect_type::pass; break;
+
+ case type::in_null:
+ case type::out_null: rt = redirect_type::null; break;
+
+ case type::out_trace: rt = redirect_type::trace; break;
+
+ case type::out_merge: rt = redirect_type::merge; break;
+
+ case type::in_str:
+ case type::out_str:
+ {
+ bool re (mod.find ('~') != string::npos);
+ assert (tt == type::out_str || !re);
+
+ rt = re
+ ? redirect_type::here_str_regex
+ : redirect_type::here_str_literal;
+
+ break;
+ }
+
+ case type::in_doc:
+ case type::out_doc:
+ {
+ bool re (mod.find ('~') != string::npos);
+ assert (tt == type::out_doc || !re);
+
+ rt = re
+ ? redirect_type::here_doc_regex
+ : redirect_type::here_doc_literal;
+
+ break;
+ }
+
+ case type::in_file:
+ case type::out_file_cmp:
+ case type::out_file_ovr:
+ case type::out_file_app: rt = redirect_type::file; break;
+ }
+
+ optional<redirect>& r (fd == 0 ? c.in :
+ fd == 1 ? c.out :
+ c.err);
+
+ optional<redirect_type> overriden;
+
+ if (r)
+ overriden = r->type;
+
+ r = redirect (rt);
+
+ // Don't move as still may be used for pending here-document end
+ // marker processing.
+ //
+ r->token = move (t);
+
+ switch (rt)
+ {
+ case redirect_type::none:
+ // Remove the assertion if/when the none redirect syntax is
+ // invented.
+ //
+ assert (false);
+ // Fall through.
+ case redirect_type::pass:
+ case redirect_type::null:
+ case redirect_type::trace:
+ break;
+ case redirect_type::merge:
+ switch (fd)
+ {
+ case 0: assert (false); break;
+ case 1: p = pending::out_merge; break;
+ case 2: p = pending::err_merge; break;
+ }
+ break;
+ case redirect_type::here_str_literal:
+ switch (fd)
+ {
+ case 0: p = pending::in_string; break;
+ case 1: p = pending::out_string; break;
+ case 2: p = pending::err_string; break;
+ }
+ break;
+ case redirect_type::here_str_regex:
+ switch (fd)
+ {
+ case 0: assert (false); break;
+ case 1: p = pending::out_str_regex; break;
+ case 2: p = pending::err_str_regex; break;
+ }
+ break;
+ case redirect_type::here_doc_literal:
+ switch (fd)
+ {
+ case 0: p = pending::in_document; break;
+ case 1: p = pending::out_document; break;
+ case 2: p = pending::err_document; break;
+ }
+ break;
+ case redirect_type::here_doc_regex:
+ switch (fd)
+ {
+ case 0: assert (false); break;
+ case 1: p = pending::out_doc_regex; break;
+ case 2: p = pending::err_doc_regex; break;
+ }
+ break;
+ case redirect_type::file:
+ switch (fd)
+ {
+ case 0: p = pending::in_file; break;
+ case 1: p = pending::out_file; break;
+ case 2: p = pending::err_file; break;
+ }
+
+ // Also sets for stdin, but this is harmless.
+ //
+ r->file.mode = tt == type::out_file_ovr ? redirect_fmode::overwrite :
+ tt == type::out_file_app ? redirect_fmode::append :
+ redirect_fmode::compare;
+
+ break;
+
+ case redirect_type::here_doc_ref: assert (false); break;
+ }
+
+ // If we are overriding a here-document, then remove the reference
+ // to this command redirect from the corresponding here_doc object.
+ //
+ if (!pre_parse_ &&
+ overriden &&
+ (*overriden == redirect_type::here_doc_literal ||
+ *overriden == redirect_type::here_doc_regex))
+ {
+ size_t e (expr.size () - 1);
+ size_t p (expr.back ().pipe.size ());
+ int f (static_cast<int> (fd));
+
+ for (here_doc& d: hd)
+ {
+ small_vector<here_redirect, 2>& rs (d.redirects);
+
+ auto i (find_if (rs.begin (), rs.end (),
+ [e, p, f] (const here_redirect& r)
+ {
+ return r.expr == e &&
+ r.pipe == p &&
+ r.fd == f;
+ }));
+
+ if (i != rs.end ())
+ {
+ rs.erase (i);
+ break;
+ }
+ }
+ }
+ };
+
+ // Set pending cleanup type.
+ //
+ auto parse_clean = [&p, &mod] (token& t)
+ {
+ p = pending::clean;
+ mod = move (t.value);
+ };
+
+ const location ll (get_location (t)); // Line location.
+
+ // Keep parsing chunks of the command line until we see one of the
+ // "terminators" (newline, exit status comparison, etc).
+ //
+ location l (ll);
+ names ns; // Reuse to reduce allocations.
+
+ for (bool done (false); !done; l = get_location (t))
+ {
+ tt = ra.resolve (tt);
+
+ switch (tt)
+ {
+ case type::newline:
+ {
+ done = true;
+ break;
+ }
+
+ case type::equal:
+ case type::not_equal:
+ {
+ if (!pre_parse_)
+ check_pending (l);
+
+ c.exit = parse_command_exit (t, tt);
+
+ // Only a limited set of things can appear after the exit status
+ // so we check this here.
+ //
+ switch (tt)
+ {
+ case type::newline:
+
+ case type::pipe:
+ case type::log_or:
+ case type::log_and:
+ break;
+
+ default:
+ {
+ // Bail out if this is one of the unknown/unexpected tokens.
+ //
+ done = true;
+ break;
+ }
+ }
+
+ break;
+ }
+
+ case type::pipe:
+ case type::log_or:
+ case type::log_and:
+
+ case type::in_pass:
+ case type::out_pass:
+
+ case type::in_null:
+ case type::out_null:
+
+ case type::out_trace:
+
+ case type::out_merge:
+
+ case type::in_str:
+ case type::in_doc:
+ case type::out_str:
+ case type::out_doc:
+
+ case type::in_file:
+ case type::out_file_cmp:
+ case type::out_file_ovr:
+ case type::out_file_app:
+
+ case type::clean:
+ {
+ if (pre_parse_)
+ {
+ // The only things we need to handle here are the here-document
+ // and here-document regex end markers since we need to know
+ // how many of them to pre-parse after the command.
+ //
+ switch (tt)
+ {
+ case type::in_doc:
+ case type::out_doc:
+ mod = move (t.value);
+
+ bool re (mod.find ('~') != string::npos);
+ const char* what (re
+ ? "here-document regex end marker"
+ : "here-document end marker");
+
+ // We require the end marker to be a literal, unquoted word.
+ // In particularm, we don't allow quoted because of cases
+ // like foo"$bar" (where we will see word 'foo').
+ //
+ next (t, tt);
+
+ // We require the end marker to be an unquoted or completely
+ // quoted word. The complete quoting becomes important for
+ // cases like foo"$bar" (where we will see word 'foo').
+ //
+ // For good measure we could have also required it to be
+ // separated from the following token, but out grammar
+ // allows one to write >>EOO;. The problematic sequence
+ // would be >>FOO$bar -- on reparse it will be expanded
+ // as a single word.
+ //
+ if (tt != type::word || t.value.empty ())
+ fail (t) << "expected " << what;
+
+ peek ();
+ const token& p (peeked ());
+ if (!p.separated)
+ {
+ switch (p.type)
+ {
+ case type::dollar:
+ case type::lparen:
+ fail (p) << what << " must be literal";
+ }
+ }
+
+ quote_type qt (t.qtype);
+ switch (qt)
+ {
+ case quote_type::unquoted:
+ qt = quote_type::single; // Treat as single-quoted.
+ break;
+ case quote_type::single:
+ case quote_type::double_:
+ if (t.qcomp)
+ break;
+ // Fall through.
+ case quote_type::mixed:
+ fail (t) << "partially-quoted " << what;
+ }
+
+ regex_parts r;
+ string end (move (t.value));
+
+ if (re)
+ {
+ check_regex_mod (mod, end, l, what);
+
+ r = parse_regex (end, l, what);
+ end = move (r.value); // The "cleared" end marker.
+ }
+
+ bool literal (qt == quote_type::single);
+ bool shared (false);
+
+ for (const auto& d: hd)
+ {
+ if (d.end == end)
+ {
+ auto check = [&t, &end, &re, this] (bool c,
+ const char* what)
+ {
+ if (!c)
+ fail (t) << "different " << what
+ << " for shared here-document "
+ << (re ? "regex '" : "'") << end << "'";
+ };
+
+ check (d.modifiers == mod, "modifiers");
+ check (d.literal == literal, "quoting");
+
+ if (re)
+ {
+ check (d.regex == r.intro, "introducers");
+ check (d.regex_flags == r.flags, "global flags");
+ }
+
+ shared = true;
+ break;
+ }
+ }
+
+ if (!shared)
+ hd.push_back (
+ here_doc {
+ {},
+ move (end),
+ literal,
+ move (mod),
+ r.intro, move (r.flags)});
+
+ break;
+ }
+
+ next (t, tt);
+ break;
+ }
+
+ // If this is one of the operators/separators, check that we
+ // don't have any pending locations to be filled.
+ //
+ check_pending (l);
+
+ // Note: there is another one in the inner loop below.
+ //
+ switch (tt)
+ {
+ case type::pipe:
+ case type::log_or:
+ case type::log_and:
+ {
+ // Check that the previous command makes sense.
+ //
+ check_command (l, tt != type::pipe);
+ expr.back ().pipe.push_back (move (c));
+
+ c = command ();
+ p = pending::program;
+
+ if (tt != type::pipe)
+ {
+ expr_operator o (tt == type::log_or
+ ? expr_operator::log_or
+ : expr_operator::log_and);
+ expr.push_back ({o, command_pipe ()});
+ }
+
+ break;
+ }
+
+ case type::in_pass:
+ case type::out_pass:
+
+ case type::in_null:
+ case type::out_null:
+
+ case type::out_trace:
+
+ case type::out_merge:
+
+ case type::in_str:
+ case type::in_doc:
+ case type::out_str:
+ case type::out_doc:
+
+ case type::in_file:
+ case type::out_file_cmp:
+ case type::out_file_ovr:
+ case type::out_file_app:
+ {
+ parse_redirect (move (t), tt, l);
+ break;
+ }
+
+ case type::clean:
+ {
+ parse_clean (t);
+ break;
+ }
+
+ default: assert (false); break;
+ }
+
+ next (t, tt);
+ break;
+ }
+ default:
+ {
+ // Bail out if this is one of the unknown tokens.
+ //
+ if (!start_names (tt))
+ {
+ done = true;
+ break;
+ }
+
+ // Here-document end markers are literal (we verified that above
+ // during pre-parsing) and we need to know whether they were
+ // quoted. So handle this case specially.
+ //
+ {
+ int fd;
+ switch (p)
+ {
+ case pending::in_document: fd = 0; break;
+ case pending::out_document:
+ case pending::out_doc_regex: fd = 1; break;
+ case pending::err_document:
+ case pending::err_doc_regex: fd = 2; break;
+ default: fd = -1; break;
+ }
+
+ if (fd != -1)
+ {
+ if (tt != type::word || t.value.empty ())
+ fail (t) << "expected here-document end marker";
+
+ here_redirect rd {
+ expr.size () - 1, expr.back ().pipe.size (), fd};
+
+ string end (move (t.value));
+
+ regex_parts r;
+
+ if (p == pending::out_doc_regex ||
+ p == pending::err_doc_regex)
+ {
+ // We can't fail here as we already parsed all the end
+ // markers during pre-parsing stage, and so no need in the
+ // description.
+ //
+ r = parse_regex (end, l, "");
+ end = move (r.value); // The "cleared" end marker.
+ }
+
+ bool shared (false);
+ for (auto& d: hd)
+ {
+ // No need to check that redirects that share here-document
+ // have the same modifiers, etc. That have been done during
+ // pre-parsing.
+ //
+ if (d.end == end)
+ {
+ d.redirects.emplace_back (rd);
+ shared = true;
+ break;
+ }
+ }
+
+ if (!shared)
+ hd.push_back (
+ here_doc {
+ {rd},
+ move (end),
+ (t.qtype == quote_type::unquoted ||
+ t.qtype == quote_type::single),
+ move (mod),
+ r.intro, move (r.flags)});
+
+ p = pending::none;
+ mod.clear ();
+
+ next (t, tt);
+ break;
+ }
+ }
+
+ // Parse the next chunk as simple names to get expansion, etc.
+ // Note that we do it in the chunking mode to detect whether
+ // anything in each chunk is quoted.
+ //
+ // @@ PAT: should we support pattern expansion? This is even
+ // fuzzier than the variable case above. Though this is the
+ // shell semantics. Think what happens when we do rm *.txt?
+ //
+ reset_quoted (t);
+ parse_names (t, tt,
+ ns,
+ pattern_mode::ignore,
+ true,
+ "command line",
+ nullptr);
+
+ if (pre_parse_) // Nothing else to do if we are pre-parsing.
+ break;
+
+ // Process what we got. Determine whether anything inside was
+ // quoted (note that the current token is "next" and is not part
+ // of this).
+ //
+ bool q ((quoted () -
+ (t.qtype != quote_type::unquoted ? 1 : 0)) != 0);
+
+ for (name& n: ns)
+ {
+ string s;
+
+ try
+ {
+ s = value_traits<string>::convert (move (n), nullptr);
+ }
+ catch (const invalid_argument&)
+ {
+ diag_record dr (fail (l));
+ dr << "invalid string value ";
+ to_stream (dr.os, n, true); // Quote.
+ }
+
+ // If it is a quoted chunk, then we add the word as is.
+ // Otherwise we re-lex it. But if the word doesn't contain any
+ // interesting characters (operators plus quotes/escapes),
+ // then no need to re-lex.
+ //
+ // NOTE: update quoting (script.cxx:to_stream_q()) if adding
+ // any new characters.
+ //
+ if (q || s.find_first_of ("|&<>\'\"\\") == string::npos)
+ add_word (move (s), l);
+ else
+ {
+ // If the chunk re-parsing results in error, our diagnostics
+ // will look like this:
+ //
+ // <string>:1:4: error: stdout merge redirect file descriptor must be 2
+ // script:2:5: info: while parsing string '1>&a'
+ //
+ auto df = make_diag_frame (
+ [this, s, &l](const diag_record& dr)
+ {
+ dr << info (l) << "while parsing string '" << s << "'";
+ });
+
+ // When re-lexing we do "effective escaping" and only for
+ // ['"\] (quotes plus the backslash itself). In particular,
+ // there is no way to escape redirects, operators, etc. The
+ // idea is to prefer quoting except for passing literal
+ // quotes, for example:
+ //
+ // args = \"&foo\"
+ // cmd $args # cmd &foo
+ //
+ // args = 'x=\"foo bar\"'
+ // cmd $args # cmd x="foo bar"
+ //
+ istringstream is (s);
+ path_name in ("<string>");
+ lexer lex (is, in,
+ lexer_mode::command_expansion,
+ ra,
+ "\'\"\\");
+
+ // Treat the first "sub-token" as always separated from what
+ // we saw earlier.
+ //
+ // Note that this is not "our" token so we cannot do
+ // fail(t). Rather we should do fail(l).
+ //
+ token t (lex.next ());
+ location l (build2::get_location (t, in));
+ t.separated = true;
+
+ string w;
+ bool f (t.type == type::eos); // If the whole thing is empty.
+
+ for (; t.type != type::eos; t = lex.next ())
+ {
+ type tt (ra.resolve (t.type));
+ l = build2::get_location (t, in);
+
+ // Re-lexing double-quotes will recognize $, ( inside as
+ // tokens so we have to reverse them back. Since we don't
+ // treat spaces as separators we can be sure we will get
+ // it right.
+ //
+ switch (tt)
+ {
+ case type::dollar: w += '$'; continue;
+ case type::lparen: w += '('; continue;
+ }
+
+ // Retire the current word. We need to distinguish between
+ // empty and non-existent (e.g., > vs >"").
+ //
+ if (!w.empty () || f)
+ {
+ add_word (move (w), l);
+ f = false;
+ }
+
+ if (tt == type::word)
+ {
+ w = move (t.value);
+ f = true;
+ continue;
+ }
+
+ // If this is one of the operators/separators, check that
+ // we don't have any pending locations to be filled.
+ //
+ check_pending (l);
+
+ // Note: there is another one in the outer loop above.
+ //
+ switch (tt)
+ {
+ case type::pipe:
+ case type::log_or:
+ case type::log_and:
+ {
+ // Check that the previous command makes sense.
+ //
+ check_command (l, tt != type::pipe);
+ expr.back ().pipe.push_back (move (c));
+
+ c = command ();
+ p = pending::program;
+
+ if (tt != type::pipe)
+ {
+ expr_operator o (tt == type::log_or
+ ? expr_operator::log_or
+ : expr_operator::log_and);
+ expr.push_back ({o, command_pipe ()});
+ }
+
+ break;
+ }
+
+ case type::in_pass:
+ case type::out_pass:
+
+ case type::in_null:
+ case type::out_null:
+
+ case type::out_trace:
+
+ case type::out_merge:
+
+ case type::in_str:
+ case type::out_str:
+
+ case type::in_file:
+ case type::out_file_cmp:
+ case type::out_file_ovr:
+ case type::out_file_app:
+ {
+ parse_redirect (move (t), tt, l);
+ break;
+ }
+
+ case type::clean:
+ {
+ parse_clean (t);
+ break;
+ }
+
+ case type::in_doc:
+ case type::out_doc:
+ {
+ fail (l) << "here-document redirect in expansion";
+ break;
+ }
+ }
+ }
+
+ // Don't forget the last word.
+ //
+ if (!w.empty () || f)
+ add_word (move (w), l);
+ }
+ }
+
+ ns.clear ();
+ break;
+ }
+ }
+ }
+
+ if (!pre_parse_)
+ {
+ // Verify we don't have anything pending to be filled and the
+ // command makes sense.
+ //
+ check_pending (l);
+ check_command (l, true);
+
+ expr.back ().pipe.push_back (move (c));
+ }
+
+ return make_pair (move (expr), move (hd));
+ }
+
+ command_exit parser::
+ parse_command_exit (token& t, type& tt)
+ {
+ // enter: equal/not_equal
+ // leave: token after exit status (one parse_names() chunk)
+
+ exit_comparison comp (tt == type::equal
+ ? exit_comparison::eq
+ : exit_comparison::ne);
+
+ // The next chunk should be the exit status.
+ //
+ next (t, tt);
+ location l (get_location (t));
+ names ns (parse_names (t, tt,
+ pattern_mode::ignore,
+ true,
+ "exit status",
+ nullptr));
+ unsigned long es (256);
+
+ if (!pre_parse_)
+ {
+ try
+ {
+ if (ns.size () == 1 && ns[0].simple () && !ns[0].empty ())
+ es = stoul (ns[0].value);
+ }
+ catch (const exception&) {} // Fall through.
+
+ if (es > 255)
+ {
+ diag_record dr;
+
+ dr << fail (l) << "expected exit status instead of ";
+ to_stream (dr.os, ns, true); // Quote.
+
+ dr << info << "exit status is an unsigned integer less than 256";
+ }
+ }
+
+ return command_exit {comp, static_cast<uint8_t> (es)};
+ }
+
+ void parser::
+ parse_here_documents (token& t, type& tt,
+ pair<command_expr, here_docs>& p)
+ {
+ // enter: newline
+ // leave: newline
+
+ // Parse here-document fragments in the order they were mentioned on
+ // the command line.
+ //
+ for (here_doc& h: p.second)
+ {
+ // Switch to the here-line mode which is like single/double-quoted
+ // string but recognized the newline as a separator.
+ //
+ mode (h.literal
+ ? lexer_mode::here_line_single
+ : lexer_mode::here_line_double);
+ next (t, tt);
+
+ parsed_doc v (
+ parse_here_document (t, tt, h.end, h.modifiers, h.regex));
+
+ // If all the here-document redirects are overridden, then we just
+ // drop the fragment.
+ //
+ if (!pre_parse_ && !h.redirects.empty ())
+ {
+ auto i (h.redirects.cbegin ());
+
+ command& c (p.first[i->expr].pipe[i->pipe]);
+
+ optional<redirect>& r (i->fd == 0 ? c.in :
+ i->fd == 1 ? c.out :
+ c.err);
+
+ assert (r); // Must be present since it is referred.
+
+ if (v.re)
+ {
+ assert (r->type == redirect_type::here_doc_regex);
+
+ r->regex = move (v.regex);
+ r->regex.flags = move (h.regex_flags);
+ }
+ else
+ {
+ assert (r->type == redirect_type::here_doc_literal);
+
+ r->str = move (v.str);
+ }
+
+ r->end = move (h.end);
+ r->end_line = v.end_line;
+ r->end_column = v.end_column;
+
+ // Note that our references cannot be invalidated because the
+ // command_expr/command-pipe vectors already contain all their
+ // elements.
+ //
+ for (++i; i != h.redirects.cend (); ++i)
+ {
+ command& c (p.first[i->expr].pipe[i->pipe]);
+
+ optional<redirect>& ir (i->fd == 0 ? c.in :
+ i->fd == 1 ? c.out :
+ c.err);
+
+ // Must be present since it is referenced by here-doc.
+ //
+ assert (ir);
+
+ // Note: preserve the original representation.
+ //
+ ir = redirect (redirect_type::here_doc_ref, *r, move (ir->token));
+ }
+ }
+
+ expire_mode ();
+ }
+ }
+
+ parser::parsed_doc parser::
+ parse_here_document (token& t, type& tt,
+ const string& em,
+ const string& mod,
+ char re)
+ {
+ // enter: first token on first line
+ // leave: newline (after end marker)
+
+ // String literal. Note that when decide if to terminate the previously
+ // added line with a newline, we need to distinguish a yet empty result
+ // and the one that has a single blank line added.
+ //
+ optional<string> rs;
+
+ regex_lines rre;
+
+ // Here-documents can be indented. The leading whitespaces of the end
+ // marker line (called strip prefix) determine the indentation. Every
+ // other line in the here-document should start with this prefix which
+ // is automatically stripped. The only exception is a blank line.
+ //
+ // The fact that the strip prefix is only known at the end, after
+ // seeing all the lines, is rather inconvenient. As a result, the way
+ // we implement this is a bit hackish (though there is also something
+ // elegant about it): at the end of the pre-parse stage we are going
+ // re-examine the sequence of tokens that comprise this here-document
+ // and "fix up" the first token of each line by stripping the prefix.
+ //
+ string sp;
+
+ // Remember the position of the first token in this here-document.
+ //
+ size_t ri (pre_parse_ ? replay_data_.size () - 1 : 0);
+
+ // We will use the location of the first token on the line for the
+ // regex diagnostics. At the end of the loop it will point to the
+ // beginning of the end marker.
+ //
+ location l;
+
+ while (tt != type::eos)
+ {
+ l = get_location (t);
+
+ // Check if this is the end marker. For starters, it should be a
+ // single, unquoted word followed by a newline.
+ //
+ if (tt == type::word &&
+ t.qtype == quote_type::unquoted &&
+ peek () == type::newline)
+ {
+ const string& v (t.value);
+
+ size_t vn (v.size ());
+ size_t en (em.size ());
+
+ // Then check that it ends with the end marker.
+ //
+ if (vn >= en && v.compare (vn - en, en, em) == 0)
+ {
+ // Now check that the prefix only contains whitespaces.
+ //
+ size_t n (vn - en);
+
+ if (v.find_first_not_of (" \t") >= n)
+ {
+ assert (pre_parse_ || n == 0); // Should have been stripped.
+
+ if (n != 0)
+ sp.assign (v, 0, n); // Save the strip prefix.
+
+ next (t, tt); // Get the newline.
+ break;
+ }
+ }
+ }
+
+ // Expand the line (can be blank).
+ //
+ // @@ PAT: one could argue that if we do it in variables, then we
+ // should do it here as well. Though feels bizarre.
+ //
+ names ns (tt != type::newline
+ ? parse_names (t, tt,
+ pattern_mode::ignore,
+ false,
+ "here-document line",
+ nullptr)
+ : names ());
+
+ if (!pre_parse_)
+ {
+ // What shall we do if the expansion results in multiple names?
+ // For, example if the line contains just the variable expansion
+ // and it is of type strings. Adding all the elements space-
+ // separated seems like the natural thing to do.
+ //
+ string s;
+ for (auto b (ns.begin ()), i (b); i != ns.end (); ++i)
+ {
+ string n;
+
+ try
+ {
+ n = value_traits<string>::convert (move (*i), nullptr);
+ }
+ catch (const invalid_argument&)
+ {
+ fail (l) << "invalid string value '" << *i << "'";
+ }
+
+ if (i == b)
+ s = move (n);
+ else
+ {
+ s += ' ';
+ s += n;
+ }
+ }
+
+ if (!re)
+ {
+ // Add newline after previous line.
+ //
+ if (rs)
+ {
+ *rs += '\n';
+ *rs += s;
+ }
+ else
+ rs = move (s);
+ }
+ else
+ {
+ // Due to expansion we can end up with multiple lines. If empty
+ // then will add a blank textual literal.
+ //
+ for (size_t p (0); p != string::npos; )
+ {
+ string ln;
+ size_t np (s.find ('\n', p));
+
+ if (np != string::npos)
+ {
+ ln = string (s, p, np - p);
+ p = np + 1;
+ }
+ else
+ {
+ ln = string (s, p);
+ p = np;
+ }
+
+ if (ln[0] != re) // Line doesn't start with regex introducer.
+ {
+ // This is a line-char literal (covers blank lines as well).
+ //
+ // Append textual literal.
+ //
+ rre.lines.emplace_back (l.line, l.column, move (ln), false);
+ }
+ else // Line starts with the regex introducer.
+ {
+ // This is a char-regex, or a sequence of line-regex syntax
+ // characters or both (in this specific order). So we will
+ // add regex (with optional special characters) or special
+ // literal.
+ //
+ size_t p (ln.find (re, 1));
+ if (p == string::npos)
+ {
+ // No regex, just a sequence of syntax characters.
+ //
+ string spec (ln, 1);
+ if (spec.empty ())
+ fail (l) << "no syntax line characters";
+
+ // Append special literal.
+ //
+ rre.lines.emplace_back (
+ l.line, l.column, move (spec), true);
+ }
+ else
+ {
+ // Regex (probably with syntax characters).
+ //
+ regex_parts re;
+
+ // Empty regex is a special case repesenting a blank line.
+ //
+ if (p == 1)
+ // Position to optional specal characters of an empty
+ // regex.
+ //
+ ++p;
+ else
+ // Can't fail as all the pre-conditions verified
+ // (non-empty with both introducers in place), so no
+ // description required.
+ //
+ re = parse_regex (ln, l, "", &p);
+
+ // Append regex with optional special characters.
+ //
+ rre.lines.emplace_back (l.line, l.column,
+ move (re.value), move (re.flags),
+ string (ln, p));
+ }
+ }
+ }
+ }
+ }
+
+ // We should expand the whole line at once so this would normally be
+ // a newline but can also be an end-of-stream.
+ //
+ if (tt == type::newline)
+ next (t, tt);
+ else
+ assert (tt == type::eos);
+ }
+
+ if (tt == type::eos)
+ fail (t) << "missing here-document end marker '" << em << "'";
+
+ if (pre_parse_)
+ {
+ // Strip the indentation prefix if there is one.
+ //
+ assert (replay_ == replay::save);
+
+ if (!sp.empty ())
+ {
+ size_t sn (sp.size ());
+
+ for (; ri != replay_data_.size (); ++ri)
+ {
+ token& rt (replay_data_[ri].token);
+
+ if (rt.type == type::newline) // Blank
+ continue;
+
+ if (rt.type != type::word || rt.value.compare (0, sn, sp) != 0)
+ fail (rt) << "unindented here-document line";
+
+ // If the word is equal to the strip prefix then we have to drop
+ // the token. Note that simply making it an empty word won't
+ // have the same semantics. For instance, it would trigger
+ // concatenated expansion.
+ //
+ if (rt.value.size () == sn)
+ replay_data_.erase (replay_data_.begin () + ri);
+ else
+ {
+ rt.value.erase (0, sn);
+ rt.column += sn;
+ ++ri;
+ }
+
+ // Skip until next newline.
+ //
+ for (; replay_data_[ri].token.type != type::newline; ++ri) ;
+ }
+ }
+ }
+ else
+ {
+ // Add final newline unless suppressed.
+ //
+ if (mod.find (':') == string::npos)
+ {
+ if (re)
+ // Note that the position is synthetic, but that's ok as we don't
+ // expect any diagnostics to refer this line.
+ //
+ rre.lines.emplace_back (l.line, l.column, string (), false);
+ else if (rs)
+ *rs += '\n';
+ else
+ rs = "\n";
+ }
+
+ // Finalize regex lines.
+ //
+ if (re)
+ {
+ // Empty regex matches nothing, so not of much use.
+ //
+ if (rre.lines.empty ())
+ fail (l) << "empty here-document regex";
+
+ rre.intro = re;
+ }
+ }
+
+ return re
+ ? parsed_doc (move (rre), l.line, l.column)
+ : parsed_doc (rs ? move (*rs) : string (), l.line, l.column);
+ }
+
+ size_t parser::
+ quoted () const
+ {
+ size_t r (0);
+
+ if (replay_ != replay::play)
+ r = lexer_->quoted ();
+ else
+ {
+ // Examine tokens we have replayed since last reset.
+ //
+ for (size_t i (replay_quoted_); i != replay_i_; ++i)
+ if (replay_data_[i].token.qtype != quote_type::unquoted)
+ ++r;
+ }
+
+ return r;
+ }
+
+ void parser::
+ reset_quoted (token& cur)
+ {
+ if (replay_ != replay::play)
+ lexer_->reset_quoted (cur.qtype != quote_type::unquoted ? 1 : 0);
+ else
+ {
+ replay_quoted_ = replay_i_ - 1;
+
+ // Must be the same token.
+ //
+ assert (replay_data_[replay_quoted_].token.qtype == cur.qtype);
+ }
+ }
+
+ void parser::
+ set_lexer (lexer* l)
+ {
+ lexer_ = l;
+ build2::parser::lexer_ = l;
+ }
+
+ static redirect_aliases no_redirect_aliases;
+
+ void parser::
+ apply_value_attributes (const variable* var,
+ value& lhs,
+ value&& rhs,
+ const string& attributes,
+ token_type kind,
+ const path_name& name)
+ {
+ path_ = &name;
+
+ istringstream is (attributes);
+
+ // Note that the redirect alias information is not used in the
+ // attributes lexer mode.
+ //
+ lexer l (is, name, lexer_mode::attributes, no_redirect_aliases);
+
+ set_lexer (&l);
+
+ token t;
+ type tt;
+
+ next_with_attributes (t, tt); // Enable `[` recognition.
+
+ if (tt != type::lsbrace && tt != type::eos)
+ fail (t) << "expected '[' instead of " << t;
+
+ attributes_push (t, tt, true);
+
+ if (tt != type::eos)
+ fail (t) << "trailing junk after ']'";
+
+ build2::parser::apply_value_attributes (var, lhs, move (rhs), kind);
+ }
+
+ line_type parser::
+ pre_parse_line_start (token& t, token_type& tt, lexer_mode stm)
+ {
+ replay_save (); // Start saving tokens from the current one.
+ next (t, tt);
+
+ // Decide whether this is a variable assignment or a command.
+ //
+ // It is an assignment if the first token is an unquoted name and
+ // the next token is an assign/append/prepend operator. Assignment
+ // to a computed variable name must use the set builtin.
+ //
+ // Note also that special commands take precedence over variable
+ // assignments.
+ //
+ line_type r (line_type::cmd); // Default.
+
+ if (tt == type::word && t.qtype == quote_type::unquoted)
+ {
+ const string& n (t.value);
+
+ if (n == "if") r = line_type::cmd_if;
+ else if (n == "if!") r = line_type::cmd_ifn;
+ else if (n == "elif") r = line_type::cmd_elif;
+ else if (n == "elif!") r = line_type::cmd_elifn;
+ else if (n == "else") r = line_type::cmd_else;
+ else if (n == "end") r = line_type::cmd_end;
+ else
+ {
+ // Switch the recognition of leading variable assignments for
+ // the next token. This is safe to do because we know we
+ // cannot be in the quoted mode (since the current token is
+ // not quoted).
+ //
+ type p (peek (stm));
+
+ if (p == type::assign || p == type::prepend || p == type::append)
+ {
+ r = line_type::var;
+
+ // Note that the missing command program is detected later, by
+ // parse_command_expr().
+ //
+ if (n.empty ())
+ fail (t) << "missing variable name";
+ }
+ }
+ }
+
+ return r;
+ }
+
+ bool parser::
+ exec_lines (lines::const_iterator i, lines::const_iterator e,
+ const function<exec_set_function>& exec_set,
+ const function<exec_cmd_function>& exec_cmd,
+ const function<exec_if_function>& exec_if,
+ size_t& li,
+ variable_pool* var_pool)
+ {
+ try
+ {
+ token t;
+ type tt;
+ for (; i != e; ++i)
+ {
+ const line& ln (*i);
+ line_type lt (ln.type);
+
+ assert (path_ == nullptr);
+
+ // Copy the tokens and start playing.
+ //
+ replay_data (replay_tokens (ln.tokens));
+
+ // We don't really need to change the mode since we already know
+ // the line type.
+ //
+ next (t, tt);
+ const location ll (get_location (t));
+
+ switch (lt)
+ {
+ case line_type::var:
+ {
+ // Enter the variable into the pool if this is not done during
+ // the script parsing. Note that in this case the pool is
+ // expected to be provided.
+ //
+ const variable* var (ln.var);
+
+ if (var == nullptr)
+ {
+ assert (var_pool != nullptr);
+
+ var = &var_pool->insert (t.value);
+ }
+
+ exec_set (*var, t, tt, ll);
+
+ replay_stop ();
+ break;
+ }
+ case line_type::cmd:
+ {
+ bool single (false);
+
+ if (li == 1)
+ {
+ lines::const_iterator j (i);
+ for (++j; j != e && j->type == line_type::var; ++j) ;
+
+ if (j == e) // We have no another command.
+ single = true;
+ }
+
+ exec_cmd (t, tt, li++, single, ll);
+
+ replay_stop ();
+ break;
+ }
+ case line_type::cmd_if:
+ case line_type::cmd_ifn:
+ case line_type::cmd_elif:
+ case line_type::cmd_elifn:
+ case line_type::cmd_else:
+ {
+ next (t, tt); // Skip to start of command.
+
+ bool take;
+ if (lt != line_type::cmd_else)
+ {
+ take = exec_if (t, tt, li++, ll);
+
+ if (lt == line_type::cmd_ifn || lt == line_type::cmd_elifn)
+ take = !take;
+ }
+ else
+ {
+ assert (tt == type::newline);
+ take = true;
+ }
+
+ replay_stop ();
+
+ // If end is true, then find the 'end' line. Otherwise, find
+ // the next if-else line. If skip is true then increment the
+ // command line index.
+ //
+ auto next = [e, &li] (lines::const_iterator j,
+ bool end,
+ bool skip) -> lines::const_iterator
+ {
+ // We need to be aware of nested if-else chains.
+ //
+ size_t n (0);
+
+ for (++j; j != e; ++j)
+ {
+ line_type lt (j->type);
+
+ if (lt == line_type::cmd_if || lt == line_type::cmd_ifn)
+ ++n;
+
+ // If we are nested then we just wait until we get back
+ // to the surface.
+ //
+ if (n == 0)
+ {
+ switch (lt)
+ {
+ case line_type::cmd_elif:
+ case line_type::cmd_elifn:
+ case line_type::cmd_else:
+ if (end) break;
+ // Fall through.
+ case line_type::cmd_end: return j;
+ default: break;
+ }
+ }
+
+ if (lt == line_type::cmd_end)
+ --n;
+
+ if (skip)
+ {
+ // Note that we don't count else and end as commands.
+ //
+ switch (lt)
+ {
+ case line_type::cmd:
+ case line_type::cmd_if:
+ case line_type::cmd_ifn:
+ case line_type::cmd_elif:
+ case line_type::cmd_elifn: ++li; break;
+ default: break;
+ }
+ }
+ }
+
+ assert (false); // Missing end.
+ return e;
+ };
+
+ // If we are taking this branch then we need to parse all the
+ // lines until the next if-else line and then skip all the
+ // lines until the end (unless next is already end).
+ //
+ // Otherwise, we need to skip all the lines until the next
+ // if-else line and then continue parsing.
+ //
+ if (take)
+ {
+ // Next if-else.
+ //
+ lines::const_iterator j (next (i, false, false));
+ if (!exec_lines (i + 1, j,
+ exec_set, exec_cmd, exec_if,
+ li,
+ var_pool))
+ return false;
+
+ i = j->type == line_type::cmd_end ? j : next (j, true, true);
+ }
+ else
+ {
+ i = next (i, false, true);
+ if (i->type != line_type::cmd_end)
+ --i; // Continue with this line (e.g., elif or else).
+ }
+
+ break;
+ }
+ case line_type::cmd_end:
+ {
+ assert (false);
+ }
+ }
+ }
+
+ return true;
+ }
+ catch (const exit& e)
+ {
+ // Bail out if the script is exited with the failure status. Otherwise
+ // exit the lines execution normally.
+ //
+ if (!e.status)
+ throw failed ();
+
+ replay_stop ();
+ return false;
+ }
+ }
+
+ // parser::parsed_doc
+ //
+ parser::parsed_doc::
+ parsed_doc (string s, uint64_t l, uint64_t c)
+ : str (move (s)), re (false), end_line (l), end_column (c)
+ {
+ }
+
+ parser::parsed_doc::
+ parsed_doc (regex_lines&& r, uint64_t l, uint64_t c)
+ : regex (move (r)), re (true), end_line (l), end_column (c)
+ {
+ }
+
+ parser::parsed_doc::
+ parsed_doc (parsed_doc&& d)
+ : re (d.re), end_line (d.end_line), end_column (d.end_column)
+ {
+ if (re)
+ new (&regex) regex_lines (move (d.regex));
+ else
+ new (&str) string (move (d.str));
+ }
+
+ parser::parsed_doc::
+ ~parsed_doc ()
+ {
+ if (re)
+ regex.~regex_lines ();
+ else
+ str.~string ();
+ }
+ }
+}
diff --git a/libbuild2/script/parser.hxx b/libbuild2/script/parser.hxx
new file mode 100644
index 0000000..a63ecde
--- /dev/null
+++ b/libbuild2/script/parser.hxx
@@ -0,0 +1,189 @@
+// file : libbuild2/script/parser.hxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_SCRIPT_PARSER_HXX
+#define LIBBUILD2_SCRIPT_PARSER_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/forward.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/parser.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/script/token.hxx>
+#include <libbuild2/script/lexer.hxx> // redirect_aliases
+#include <libbuild2/script/script.hxx>
+
+namespace build2
+{
+ namespace script
+ {
+ class lexer;
+ struct lexer_mode;
+
+ class parser: protected build2::parser
+ {
+ public:
+ parser (context& c): build2::parser (c) {}
+
+ // Helpers.
+ //
+ // Parse attribute string and perform attribute-guided assignment.
+ // Issue diagnostics and throw failed in case of an error.
+ //
+ void
+ apply_value_attributes (const variable*, // Optional.
+ value& lhs,
+ value&& rhs,
+ const string& attributes,
+ token_type assign_kind,
+ const path_name&); // For diagnostics.
+
+ using build2::parser::apply_value_attributes;
+
+ // Commonly used parsing functions. Issue diagnostics and throw failed
+ // in case of an error.
+ //
+ // Usually (but not always) parse functions receive the token/type
+ // from which it should start consuming and in return the token/type
+ // should contain the first token that has not been consumed.
+ //
+ // Functions that are called parse_*() rather than pre_parse_*() can be
+ // used for both stages.
+ //
+ protected:
+ value
+ parse_variable_line (token&, token_type&);
+
+ // Ordered sequence of here-document redirects that we can expect to
+ // see after the command line.
+ //
+ struct here_redirect
+ {
+ size_t expr; // Index in command_expr.
+ size_t pipe; // Index in command_pipe.
+ int fd; // Redirect fd (0 - in, 1 - out, 2 - err).
+ };
+
+ struct here_doc
+ {
+ // Redirects that share here_doc. Most of the time we will have no
+ // more than 2 (2 - for the roundtrip cases). Doesn't refer overridden
+ // redirects and thus can be empty.
+ //
+ small_vector<here_redirect, 2> redirects;
+
+ string end;
+ bool literal; // Literal (single-quote).
+ string modifiers;
+
+ // Regex introducer ('\0' if not a regex, so can be used as bool).
+ //
+ char regex;
+
+ // Regex global flags. Meaningful if regex != '\0'.
+ //
+ string regex_flags;
+ };
+ using here_docs = vector<here_doc>;
+
+ pair<command_expr, here_docs>
+ parse_command_expr (token&, token_type&, const redirect_aliases&);
+
+ command_exit
+ parse_command_exit (token&, token_type&);
+
+ void
+ parse_here_documents (token&, token_type&,
+ pair<command_expr, here_docs>&);
+
+ struct parsed_doc
+ {
+ union
+ {
+ string str; // Here-document literal.
+ regex_lines regex; // Here-document regex.
+ };
+
+ bool re; // True if regex.
+ uint64_t end_line; // Here-document end marker location.
+ uint64_t end_column;
+
+ parsed_doc (string, uint64_t line, uint64_t column);
+ parsed_doc (regex_lines&&, uint64_t line, uint64_t column);
+ parsed_doc (parsed_doc&&); // Note: move constuctible-only type.
+ ~parsed_doc ();
+ };
+
+ parsed_doc
+ parse_here_document (token&, token_type&,
+ const string&,
+ const string& mode,
+ char re_intro); // '\0' if not a regex.
+
+ // Start pre-parsing a script line returning its type, detected based on
+ // the first two tokens. Use the specified lexer mode to peek the second
+ // token.
+ //
+ line_type
+ pre_parse_line_start (token&, token_type&, lexer_mode);
+
+ // Execute.
+ //
+ protected:
+ // Return false if the execution of the script should be terminated with
+ // the success status (e.g., as a result of encountering the exit
+ // builtin). For unsuccessful termination the failed exception is thrown.
+ //
+ using exec_set_function = void (const variable&,
+ token&, token_type&,
+ const location&);
+
+ using exec_cmd_function = void (token&, token_type&,
+ size_t li,
+ bool single,
+ const location&);
+
+ using exec_if_function = bool (token&, token_type&,
+ size_t li,
+ const location&);
+
+ // If a parser implementation doesn't pre-enter variables into a pool
+ // during the pre-parsing phase, then they are entered during the
+ // execution phase and so the variable pool must be provided. Note that
+ // in this case the variable pool insertions are not MT-safe.
+ //
+ bool
+ exec_lines (lines::const_iterator b, lines::const_iterator e,
+ const function<exec_set_function>&,
+ const function<exec_cmd_function>&,
+ const function<exec_if_function>&,
+ size_t& li,
+ variable_pool* = nullptr);
+
+ // Set lexer pointers for both the current and the base classes.
+ //
+ protected:
+ void
+ set_lexer (lexer*);
+
+ // Number of quoted tokens since last reset. Note that this includes
+ // the peeked token, if any.
+ //
+ protected:
+ size_t
+ quoted () const;
+
+ void
+ reset_quoted (token& current);
+
+ size_t replay_quoted_;
+
+ protected:
+ lexer* lexer_ = nullptr;
+ };
+ }
+}
+
+#endif // LIBBUILD2_SCRIPT_PARSER_HXX
diff --git a/libbuild2/script/regex.cxx b/libbuild2/script/regex.cxx
new file mode 100644
index 0000000..3f796b6
--- /dev/null
+++ b/libbuild2/script/regex.cxx
@@ -0,0 +1,436 @@
+// file : libbuild2/script/regex.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <locale>
+
+#include <libbuild2/script/regex.hxx>
+
+using namespace std;
+
+namespace build2
+{
+ namespace script
+ {
+ namespace regex
+ {
+ static_assert (alignof (char_string) % 4 == 0,
+ "unexpected char_string alignment");
+
+ static_assert (alignof (char_regex) % 4 == 0,
+ "unexpected char_regex alignment");
+
+ static_assert (sizeof (uintptr_t) > sizeof (int16_t),
+ "unexpected uintptr_t size");
+
+ const line_char line_char::nul (0);
+ const line_char line_char::eof (-1);
+
+ // line_char
+ //
+ // We package the special character into uintptr_t with the following
+ // steps:
+ //
+ // - narrow down int value to int16_t (preserves all the valid values)
+ //
+ // - convert to uint16_t (bitwise representation stays the same, but no
+ // need to bother with signed value widening, leftmost bits loss on
+ // left shift, etc)
+ //
+ // - convert to uintptr_t (storage type)
+ //
+ // - shift left by two bits (the operation is fully reversible as
+ // uintptr_t is wider then uint16_t)
+ //
+ line_char::
+ line_char (int c)
+ : data_ (
+ (static_cast <uintptr_t> (
+ static_cast<uint16_t> (
+ static_cast<int16_t> (c))) << 2) |
+ static_cast <uintptr_t> (line_type::special))
+ {
+ // @@ How can we allow anything for basic_regex but only subset
+ // for our own code?
+ //
+ const char ex[] = "pn\n\r";
+
+ assert (c == 0 || // Null character.
+
+ // EOF. Note that is also passed by msvcrt as _Meta_eos
+ // enum value.
+ //
+ c == -1 ||
+
+ // libstdc++ line/paragraph separators.
+ //
+ c == u'\u2028' || c == u'\u2029' ||
+
+ (c > 0 && c <= 255 && (
+ // Supported regex special characters.
+ //
+ syntax (c) ||
+
+ // libstdc++ look-ahead tokens, newline chars.
+ //
+ string::traits_type::find (ex, 4, c) != nullptr)));
+ }
+
+ line_char::
+ line_char (const char_string& s, line_pool& p)
+ : line_char (&(*p.strings.emplace (s).first))
+ {
+ }
+
+ line_char::
+ line_char (char_string&& s, line_pool& p)
+ : line_char (&(*p.strings.emplace (move (s)).first))
+ {
+ }
+
+ line_char::
+ line_char (char_regex r, line_pool& p)
+ // Note: in C++17 can write as p.regexes.emplace_front(move (r))
+ //
+ : line_char (&(*p.regexes.emplace (p.regexes.begin (), move (r))))
+ {
+ }
+
+ bool
+ line_char::syntax (char c)
+ {
+ return string::traits_type::find (
+ "()|.*+?{}\\0123456789,=!", 23, c) != nullptr;
+ }
+
+ bool
+ operator== (const line_char& l, const line_char& r)
+ {
+ line_type lt (l.type ());
+ line_type rt (r.type ());
+
+ if (lt == rt)
+ {
+ bool res (true);
+
+ switch (lt)
+ {
+ case line_type::special: res = l.special () == r.special (); break;
+ case line_type::regex: assert (false); break;
+
+ // Note that we use pointers (rather than vales) comparison
+ // assuming that the strings must belong to the same pool.
+ //
+ case line_type::literal: res = l.literal () == r.literal (); break;
+ }
+
+ return res;
+ }
+
+ // Match literal with regex.
+ //
+ if (lt == line_type::literal && rt == line_type::regex)
+ return regex_match (*l.literal (), *r.regex ());
+ else if (rt == line_type::literal && lt == line_type::regex)
+ return regex_match (*r.literal (), *l.regex ());
+
+ return false;
+ }
+
+ bool
+ operator< (const line_char& l, const line_char& r)
+ {
+ if (l == r)
+ return false;
+
+ line_type lt (l.type ());
+ line_type rt (r.type ());
+
+ if (lt != rt)
+ return lt < rt;
+
+ bool res (false);
+
+ switch (lt)
+ {
+ case line_type::special: res = l.special () < r.special (); break;
+ case line_type::literal: res = *l.literal () < *r.literal (); break;
+ case line_type::regex: assert (false); break;
+ }
+
+ return res;
+ }
+
+ // line_char_locale
+ //
+
+ // An exemplar locale with the std::ctype<line_char> facet. It is used
+ // for the subsequent line char locale objects creation (see below)
+ // which normally ends up with a shallow copy of a reference-counted
+ // object.
+ //
+ // Note that creating the line char locales from the exemplar is not
+ // merely an optimization: there is a data race in the libstdc++ (at
+ // least as of GCC 9.1) implementation of the locale(const locale&,
+ // Facet*) constructor (bug #91057).
+ //
+ // Also note that we install the facet in init() rather than during
+ // the object creation to avoid a race with the std::locale-related
+ // global variables initialization.
+ //
+ static locale line_char_locale_exemplar;
+
+ void
+ init ()
+ {
+ line_char_locale_exemplar =
+ locale (locale (),
+ new std::ctype<line_char> ()); // Hidden by ctype bitmask.
+ }
+
+ line_char_locale::
+ line_char_locale ()
+ : locale (line_char_locale_exemplar)
+ {
+ // Make sure init() has been called.
+ //
+ // Note: has_facet() is hidden by a private function in libc++.
+ //
+ assert (std::has_facet<std::ctype<line_char>> (*this));
+ }
+
+ // char_regex
+ //
+ // Transform regex according to the extended flags {idot}. If regex is
+ // malformed then keep transforming, so the resulting string is
+ // malformed the same way. We expect the error to be reported by the
+ // char_regex ctor.
+ //
+ static string
+ transform (const string& s, char_flags f)
+ {
+ assert ((f & char_flags::idot) != char_flags::none);
+
+ string r;
+ bool escape (false);
+ bool cclass (false);
+
+ for (char c: s)
+ {
+ // Inverse escaping for a dot which is out of the char class
+ // brackets.
+ //
+ bool inverse (c == '.' && !cclass);
+
+ // Handle the escape case. Note that we delay adding the backslash
+ // since we may have to inverse things.
+ //
+ if (escape)
+ {
+ if (!inverse)
+ r += '\\';
+
+ r += c;
+ escape = false;
+
+ continue;
+ }
+ else if (c == '\\')
+ {
+ escape = true;
+ continue;
+ }
+
+ // Keep track of being inside the char class brackets, escape if
+ // inversion. Note that we never inverse square brackets.
+ //
+ if (c == '[' && !cclass)
+ cclass = true;
+ else if (c == ']' && cclass)
+ cclass = false;
+ else if (inverse)
+ r += '\\';
+
+ r += c;
+ }
+
+ if (escape) // Regex is malformed but that's not our problem.
+ r += '\\';
+
+ return r;
+ }
+
+ static char_regex::flag_type
+ to_std_flags (char_flags f)
+ {
+ // Note that ECMAScript flag is implied in the absense of a grammar
+ // flag.
+ //
+ return (f & char_flags::icase) != char_flags::none
+ ? char_regex::icase
+ : char_regex::flag_type ();
+ }
+
+ char_regex::
+ char_regex (const char_string& s, char_flags f)
+ : base_type ((f & char_flags::idot) != char_flags::none
+ ? transform (s, f)
+ : s,
+ to_std_flags (f))
+ {
+ }
+ }
+ }
+}
+
+namespace std
+{
+ using namespace build2::script::regex;
+
+ // char_traits<line_char>
+ //
+ line_char* char_traits<line_char>::
+ assign (char_type* s, size_t n, char_type c)
+ {
+ for (size_t i (0); i != n; ++i)
+ s[i] = c;
+ return s;
+ }
+
+ line_char* char_traits<line_char>::
+ move (char_type* d, const char_type* s, size_t n)
+ {
+ if (n > 0 && d != s)
+ {
+ // If d < s then it can't be in [s, s + n) range and so using copy() is
+ // safe. Otherwise d + n is out of (s, s + n] range and so using
+ // copy_backward() is safe.
+ //
+ if (d < s)
+ std::copy (s, s + n, d); // Hidden by char_traits<line_char>::copy().
+ else
+ copy_backward (s, s + n, d + n);
+ }
+
+ return d;
+ }
+
+ line_char* char_traits<line_char>::
+ copy (char_type* d, const char_type* s, size_t n)
+ {
+ std::copy (s, s + n, d); // Hidden by char_traits<line_char>::copy().
+ return d;
+ }
+
+ int char_traits<line_char>::
+ compare (const char_type* s1, const char_type* s2, size_t n)
+ {
+ for (size_t i (0); i != n; ++i)
+ {
+ if (s1[i] < s2[i])
+ return -1;
+ else if (s2[i] < s1[i])
+ return 1;
+ }
+
+ return 0;
+ }
+
+ size_t char_traits<line_char>::
+ length (const char_type* s)
+ {
+ size_t i (0);
+ while (s[i] != char_type::nul)
+ ++i;
+
+ return i;
+ }
+
+ const line_char* char_traits<line_char>::
+ find (const char_type* s, size_t n, const char_type& c)
+ {
+ for (size_t i (0); i != n; ++i)
+ {
+ if (s[i] == c)
+ return s + i;
+ }
+
+ return nullptr;
+ }
+
+ // ctype<line_char>
+ //
+ locale::id ctype<line_char>::id;
+
+ const line_char* ctype<line_char>::
+ is (const char_type* b, const char_type* e, mask* m) const
+ {
+ while (b != e)
+ {
+ const char_type& c (*b++);
+
+ *m++ = c.type () == line_type::special && c.special () >= 0 &&
+ build2::digit (static_cast<char> (c.special ()))
+ ? digit
+ : 0;
+ }
+
+ return e;
+ }
+
+ const line_char* ctype<line_char>::
+ scan_is (mask m, const char_type* b, const char_type* e) const
+ {
+ for (; b != e; ++b)
+ {
+ if (is (m, *b))
+ return b;
+ }
+
+ return e;
+ }
+
+ const line_char* ctype<line_char>::
+ scan_not (mask m, const char_type* b, const char_type* e) const
+ {
+ for (; b != e; ++b)
+ {
+ if (!is (m, *b))
+ return b;
+ }
+
+ return e;
+ }
+
+ const char* ctype<line_char>::
+ widen (const char* b, const char* e, char_type* c) const
+ {
+ while (b != e)
+ *c++ = widen (*b++);
+
+ return e;
+ }
+
+ const line_char* ctype<line_char>::
+ narrow (const char_type* b, const char_type* e, char def, char* c) const
+ {
+ while (b != e)
+ *c++ = narrow (*b++, def);
+
+ return e;
+ }
+
+ // regex_traits<line_char>
+ //
+ int regex_traits<line_char>::
+ value (char_type c, int radix) const
+ {
+ assert (radix == 8 || radix == 10 || radix == 16);
+
+ if (c.type () != line_type::special)
+ return -1;
+
+ const char digits[] = "0123456789ABCDEF";
+ const char* d (string::traits_type::find (digits, radix, c.special ()));
+ return d != nullptr ? static_cast<int> (d - digits) : -1;
+ }
+}
diff --git a/libbuild2/script/regex.hxx b/libbuild2/script/regex.hxx
new file mode 100644
index 0000000..30d3363
--- /dev/null
+++ b/libbuild2/script/regex.hxx
@@ -0,0 +1,678 @@
+// file : libbuild2/script/regex.hxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_SCRIPT_REGEX_HXX
+#define LIBBUILD2_SCRIPT_REGEX_HXX
+
+#include <list>
+#include <regex>
+#include <locale>
+#include <string> // basic_string
+#include <type_traits> // make_unsigned, enable_if, is_*
+#include <unordered_set>
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+namespace build2
+{
+ namespace script
+ {
+ namespace regex
+ {
+ using char_string = std::basic_string<char>;
+
+ enum class char_flags: uint16_t
+ {
+ icase = 0x1, // Case-insensitive match.
+ idot = 0x2, // Invert '.' escaping.
+
+ none = 0
+ };
+
+ // Restricts valid standard flags to just {icase}, extends with custom
+ // flags {idot}.
+ //
+ class char_regex: public std::basic_regex<char>
+ {
+ public:
+ using base_type = std::basic_regex<char>;
+
+ char_regex (const char_string&, char_flags = char_flags::none);
+ };
+
+ // Newlines are line separators and are not part of the line:
+ //
+ // line<newline>line<newline>
+ //
+ // Specifically, this means that a customary trailing newline creates a
+ // trailing blank line.
+ //
+ // All characters can inter-compare (though there cannot be regex
+ // characters in the output, only in line_regex).
+ //
+ // Note that we assume that line_regex and the input to regex_match()
+ // use the same pool.
+ //
+ struct line_pool
+ {
+ // Note that we assume the pool can be moved without invalidating
+ // pointers to any already pooled entities.
+ //
+ std::unordered_set<char_string> strings;
+ std::list<char_regex> regexes;
+ };
+
+ enum class line_type
+ {
+ special,
+ literal,
+ regex
+ };
+
+ struct line_char
+ {
+ // Steal last two bits from the pointer to store the type.
+ //
+ private:
+ std::uintptr_t data_;
+
+ public:
+ line_type
+ type () const {return static_cast<line_type> (data_ & 0x3);}
+
+ int
+ special () const
+ {
+ // Stored as (shifted) int16_t. Perform steps reversed to those
+ // that are described in the comment for the corresponding ctor.
+ // Note that the intermediate cast to uint16_t is required to
+ // portably preserve the -1 special character.
+ //
+ return static_cast<int16_t> (static_cast<uint16_t> (data_ >> 2));
+ }
+
+ const char_string*
+ literal () const
+ {
+ // Note that 2 rightmost bits are used for packaging line_char
+ // type. Read the comment for the corresponding ctor for details.
+ //
+ return reinterpret_cast<const char_string*> (
+ data_ & ~std::uintptr_t (0x3));
+ }
+
+ const char_regex*
+ regex () const
+ {
+ // Note that 2 rightmost bits are used for packaging line_char
+ // type. Read the comment for the corresponding ctor for details.
+ //
+ return reinterpret_cast<const char_regex*> (
+ data_ & ~std::uintptr_t (0x3));
+ }
+
+ static const line_char nul;
+ static const line_char eof;
+
+ // Note: creates an uninitialized value.
+ //
+ line_char () = default;
+
+ // Create a special character. The argument value must be one of the
+ // following ones:
+ //
+ // 0 (nul character)
+ // -1 (EOF)
+ // [()|.*+?{}\0123456789,=!] (excluding [])
+ //
+ // Note that the constructor is implicit to allow basic_regex to
+ // implicitly construct line_chars from special char literals (in
+ // particular libstdc++ appends them to an internal line_string).
+ //
+ // Also note that we extend the valid characters set (see above) with
+ // 'p', 'n' (used by libstdc++ for positive/negative look-ahead
+ // tokens representation), and '\n', '\r', u'\u2028', u'\u2029' (used
+ // by libstdc++ for newline/newparagraph matching).
+ //
+ line_char (int);
+
+ // Create a literal character.
+ //
+ // Don't copy string if already pooled.
+ //
+ explicit
+ line_char (const char_string&, line_pool&);
+
+ explicit
+ line_char (char_string&&, line_pool&);
+
+ explicit
+ line_char (const char_string* s) // Assume already pooled.
+ //
+ // Steal two bits from the pointer to package line_char type.
+ // Assume (and statically assert) that char_string address is a
+ // multiple of four.
+ //
+ : data_ (reinterpret_cast <std::uintptr_t> (s) |
+ static_cast <std::uintptr_t> (line_type::literal)) {}
+
+ // Create a regex character.
+ //
+ explicit
+ line_char (char_regex, line_pool&);
+
+ explicit
+ line_char (const char_regex* r) // Assume already pooled.
+ //
+ // Steal two bits from the pointer to package line_char type.
+ // Assume (and statically assert) that char_regex address is a
+ // multiple of four.
+ //
+ : data_ (reinterpret_cast <std::uintptr_t> (r) |
+ static_cast <std::uintptr_t> (line_type::regex)) {}
+
+ // Provide basic_regex with the ability to use line_char in a context
+ // where a char value is expected (e.g., as a function argument).
+ //
+ // libstdc++ seems to cast special line_chars only (and such a
+ // conversion is meanigfull).
+ //
+ // msvcrt casts line_chars of arbitrary types instead. The only
+ // reasonable strategy is to return a value that differs from any
+ // other that can be encountered in a regex expression and so will
+ // unlikelly be misinterpreted.
+ //
+ operator char () const
+ {
+ return type () == line_type::special ? special () : '\a'; // BELL.
+ }
+
+ // Return true if the character is a syntax (special) one.
+ //
+ static bool
+ syntax (char);
+
+ // Provide basic_regex (such as from msvcrt) with the ability to
+ // explicitly cast line_chars to implementation-specific numeric
+ // types (enums, msvcrt's _Uelem, etc).
+ //
+ template <typename T>
+ explicit
+ operator T () const
+ {
+ assert (type () == line_type::special);
+ return static_cast<T> (special ());
+ }
+ };
+
+ // Perform "deep" characters comparison (for example match literal
+ // character with a regex character), rather than just compare them
+ // literally. At least one argument must be of a type other than regex
+ // as there is no operator==() defined to compare regexes. Characters
+ // of the literal type must share the same pool (strings are compared
+ // by pointers not by values).
+ //
+ bool
+ operator== (const line_char&, const line_char&);
+
+ // Return false if arguments are equal (operator==() returns true).
+ // Otherwise if types are different return the value implying that
+ // special < literal < regex. If types are special or literal return
+ // the result of the respective characters or strings comparison. At
+ // least one argument must be of a type other than regex as there is no
+ // operator<() defined to compare regexes.
+ //
+ // While not very natural operation for the class we have, we have to
+ // provide some meaningfull semantics for such a comparison as it is
+ // required by the char_traits<line_char> specialization. While we
+ // could provide it right in that specialization, let's keep it here
+ // for basic_regex implementations that potentially can compare
+ // line_chars as they compare them with expressions of other types (see
+ // below).
+ //
+ bool
+ operator< (const line_char&, const line_char&);
+
+ inline bool
+ operator!= (const line_char& l, const line_char& r)
+ {
+ return !(l == r);
+ }
+
+ inline bool
+ operator<= (const line_char& l, const line_char& r)
+ {
+ return l < r || l == r;
+ }
+
+ // Provide basic_regex (such as from msvcrt) with the ability to
+ // compare line_char to a value of an integral or
+ // implementation-specific enum type. In the absense of the following
+ // template operators, such a comparisons would be ambigious for
+ // integral types (given that there are implicit conversions
+ // int->line_char and line_char->char) and impossible for enums.
+ //
+ // Note that these == and < operators can succeed only for a line_char
+ // of the special type. For other types they always return false. That
+ // in particular leads to the following case:
+ //
+ // (lc != c) != (lc < c || c < lc).
+ //
+ // Note that we can not assert line_char is of the special type as
+ // basic_regex (such as from libc++) may need the ability to check if
+ // arbitrary line_char belongs to some special characters range (like
+ // ['0', '9']).
+ //
+ template <typename T>
+ struct line_char_cmp
+ : public std::enable_if<std::is_integral<T>::value ||
+ (std::is_enum<T>::value &&
+ !std::is_same<T, char_flags>::value)> {};
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ bool
+ operator== (const line_char& l, const T& r)
+ {
+ return l.type () == line_type::special &&
+ static_cast<T> (l.special ()) == r;
+ }
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ bool
+ operator== (const T& l, const line_char& r)
+ {
+ return r.type () == line_type::special &&
+ static_cast<T> (r.special ()) == l;
+ }
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ bool
+ operator!= (const line_char& l, const T& r)
+ {
+ return !(l == r);
+ }
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ bool
+ operator!= (const T& l, const line_char& r)
+ {
+ return !(l == r);
+ }
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ bool
+ operator< (const line_char& l, const T& r)
+ {
+ return l.type () == line_type::special &&
+ static_cast<T> (l.special ()) < r;
+ }
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ bool
+ operator< (const T& l, const line_char& r)
+ {
+ return r.type () == line_type::special &&
+ l < static_cast<T> (r.special ());
+ }
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ inline bool
+ operator<= (const line_char& l, const T& r)
+ {
+ return l < r || l == r;
+ }
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ inline bool
+ operator<= (const T& l, const line_char& r)
+ {
+ return l < r || l == r;
+ }
+
+ using line_string = std::basic_string<line_char>;
+
+ // Locale that has ctype<line_char> facet installed. Used in the
+ // regex_traits<line_char> specialization (see below).
+ //
+ class line_char_locale: public std::locale
+ {
+ public:
+ // Create a copy of the global C++ locale.
+ //
+ line_char_locale ();
+ };
+
+ // Initialize the script regex global state. Should be called once
+ // prior to creating objects of types from this namespace. Note: not
+ // thread-safe.
+ //
+ void
+ init ();
+ }
+ }
+}
+
+// Standard template specializations for line_char that are required for the
+// basic_regex<line_char> instantiation.
+//
+namespace std
+{
+ template <>
+ class char_traits<build2::script::regex::line_char>
+ {
+ public:
+ using char_type = build2::script::regex::line_char;
+ using int_type = char_type;
+ using off_type = char_traits<char>::off_type;
+ using pos_type = char_traits<char>::pos_type;
+ using state_type = char_traits<char>::state_type;
+
+ static void
+ assign (char_type& c1, const char_type& c2) {c1 = c2;}
+
+ static char_type*
+ assign (char_type*, size_t, char_type);
+
+ // Note that eq() and lt() are not constexpr (as required by C++11)
+ // because == and < operators for char_type are not constexpr.
+ //
+ static bool
+ eq (const char_type& l, const char_type& r) {return l == r;}
+
+ static bool
+ lt (const char_type& l, const char_type& r) {return l < r;}
+
+ static char_type*
+ move (char_type*, const char_type*, size_t);
+
+ static char_type*
+ copy (char_type*, const char_type*, size_t);
+
+ static int
+ compare (const char_type*, const char_type*, size_t);
+
+ static size_t
+ length (const char_type*);
+
+ static const char_type*
+ find (const char_type*, size_t, const char_type&);
+
+ static constexpr char_type
+ to_char_type (const int_type& c) {return c;}
+
+ static constexpr int_type
+ to_int_type (const char_type& c) {return int_type (c);}
+
+ // Note that the following functions are not constexpr (as required by
+ // C++11) because their return expressions are not constexpr.
+ //
+ static bool
+ eq_int_type (const int_type& l, const int_type& r) {return l == r;}
+
+ static int_type eof () {return char_type::eof;}
+
+ static int_type
+ not_eof (const int_type& c)
+ {
+ return c != char_type::eof ? c : char_type::nul;
+ }
+ };
+
+ // ctype<> must be derived from both ctype_base and locale::facet (the later
+ // supports ref-counting used by the std::locale implementation internally).
+ //
+ // msvcrt for some reason also derives ctype_base from locale::facet which
+ // produces "already a base-class" warning and effectivelly breaks the
+ // reference counting. So we derive from ctype_base only in this case.
+ //
+ template <>
+ class ctype<build2::script::regex::line_char>: public ctype_base
+#if !defined(_MSC_VER) || _MSC_VER >= 2000
+ , public locale::facet
+#endif
+ {
+ // Used by the implementation only.
+ //
+ using line_type = build2::script::regex::line_type;
+
+ public:
+ using char_type = build2::script::regex::line_char;
+
+ static locale::id id;
+
+#if !defined(_MSC_VER) || _MSC_VER >= 2000
+ explicit
+ ctype (size_t refs = 0): locale::facet (refs) {}
+#else
+ explicit
+ ctype (size_t refs = 0): ctype_base (refs) {}
+#endif
+
+ // While unnecessary, let's keep for completeness.
+ //
+ virtual
+ ~ctype () override = default;
+
+ // The C++ standard requires the following functions to call their virtual
+ // (protected) do_*() counterparts that provide the real implementations.
+ // The only purpose for this indirection is to provide a user with the
+ // ability to customize existing (standard) ctype facets. As we do not
+ // provide such an ability, for simplicity we will omit the do_*()
+ // functions and provide the implementations directly. This should be safe
+ // as nobody except us could call those protected functions.
+ //
+ bool
+ is (mask m, char_type c) const
+ {
+ return m ==
+ (c.type () == line_type::special && c.special () >= 0 &&
+ build2::digit (static_cast<char> (c.special ()))
+ ? digit
+ : 0);
+ }
+
+ const char_type*
+ is (const char_type*, const char_type*, mask*) const;
+
+ const char_type*
+ scan_is (mask, const char_type*, const char_type*) const;
+
+ const char_type*
+ scan_not (mask, const char_type*, const char_type*) const;
+
+ char_type
+ toupper (char_type c) const {return c;}
+
+ const char_type*
+ toupper (char_type*, const char_type* e) const {return e;}
+
+ char_type
+ tolower (char_type c) const {return c;}
+
+ const char_type*
+ tolower (char_type*, const char_type* e) const {return e;}
+
+ char_type
+ widen (char c) const {return char_type (c);}
+
+ const char*
+ widen (const char*, const char*, char_type*) const;
+
+ char
+ narrow (char_type c, char def) const
+ {
+ return c.type () == line_type::special ? c.special () : def;
+ }
+
+ const char_type*
+ narrow (const char_type*, const char_type*, char, char*) const;
+ };
+
+ // Note: the current application locale must be POSIX. Otherwise the
+ // behavior is undefined.
+ //
+ template <>
+ class regex_traits<build2::script::regex::line_char>
+ {
+ public:
+ using char_type = build2::script::regex::line_char;
+ using string_type = build2::script::regex::line_string;
+ using locale_type = build2::script::regex::line_char_locale;
+ using char_class_type = regex_traits<char>::char_class_type;
+
+ // Workaround for msvcrt bugs. For some reason it assumes such a members
+ // to be present in a regex_traits specialization.
+ //
+#if defined(_MSC_VER) && _MSC_VER < 2000
+ static const ctype_base::mask _Ch_upper = ctype_base::upper;
+ static const ctype_base::mask _Ch_alpha = ctype_base::alpha;
+
+ // Unsigned numeric type. msvcrt normally casts characters to this type
+ // for comparing with some numeric values or for calculating an index in
+ // some bit array. Luckily that all relates to the character class
+ // handling that we don't support.
+ //
+ using _Uelem = unsigned int;
+#endif
+
+ regex_traits () = default; // Unnecessary but let's keep for completeness.
+
+ static size_t
+ length (const char_type* p) {return string_type::traits_type::length (p);}
+
+ char_type
+ translate (char_type c) const {return c;}
+
+ // Case-insensitive matching is not supported by line_regex. So there is no
+ // reason for the function to be called.
+ //
+ char_type
+ translate_nocase (char_type c) const {assert (false); return c;}
+
+ // Return a sort-key - the exact copy of [b, e).
+ //
+ template <typename I>
+ string_type
+ transform (I b, I e) const {return string_type (b, e);}
+
+ // Return a case-insensitive sort-key. Case-insensitive matching is not
+ // supported by line_regex. So there is no reason for the function to be
+ // called.
+ //
+ template <typename I>
+ string_type
+ transform_primary (I b, I e) const
+ {
+ assert (false);
+ return string_type (b, e);
+ }
+
+ // POSIX regex grammar and collating elements (e.g., [.tilde.]) in
+ // particular are not supported. So there is no reason for the function to
+ // be called.
+ //
+ template <typename I>
+ string_type
+ lookup_collatename (I, I) const {assert (false); return string_type ();}
+
+ // Character classes (e.g., [:lower:]) are not supported. So there is no
+ // reason for the function to be called.
+ //
+ template <typename I>
+ char_class_type
+ lookup_classname (I, I, bool = false) const
+ {
+ assert (false);
+ return char_class_type ();
+ }
+
+ // Return false as we don't support character classes (e.g., [:lower:]).
+ //
+ bool
+ isctype (char_type, char_class_type) const {return false;}
+
+ int
+ value (char_type, int) const;
+
+ // Return the locale passed as an argument as we do not expect anything
+ // other than POSIX locale, that we also assume to be imbued by default.
+ //
+ locale_type
+ imbue (locale_type l) {return l;}
+
+ locale_type
+ getloc () const {return locale_type ();}
+ };
+
+ // We assume line_char to be an unsigned type and express that with the
+ // following specialization used by basic_regex implementations.
+ //
+ // libstdc++ defines unsigned CharT type (regex_traits template parameter)
+ // to use as an index in some internal cache regardless if the cache is used
+ // for this specialization (and the cache is used only if CharT is char).
+ //
+ template <>
+ struct make_unsigned<build2::script::regex::line_char>
+ {
+ using type = build2::script::regex::line_char;
+ };
+
+ // When used with libc++ the linker complains that it can't find
+ // __match_any_but_newline<line_char>::__exec() function. The problem is
+ // that the function is only specialized for char and wchar_t
+ // (LLVM bug #31409). As line_char has no notion of the newline character we
+ // specialize the class template to behave as the __match_any<line_char>
+ // instantiation does (that luckily has all the functions in place).
+ //
+#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION <= 9000
+ template <>
+ class __match_any_but_newline<build2::script::regex::line_char>
+ : public __match_any<build2::script::regex::line_char>
+ {
+ public:
+ using base = __match_any<build2::script::regex::line_char>;
+ using base::base;
+ };
+#endif
+}
+
+namespace build2
+{
+ namespace script
+ {
+ namespace regex
+ {
+ class line_regex: public std::basic_regex<line_char>
+ {
+ public:
+ using base_type = std::basic_regex<line_char>;
+
+ using base_type::base_type;
+
+ line_regex () = default;
+
+ // Move string regex together with the pool used to create it.
+ //
+ line_regex (line_string&& s, line_pool&& p)
+ // No move-string ctor for base_type, so emulate it.
+ //
+ : base_type (s), pool (move (p)) {s.clear ();}
+
+ // Move constuctible/assignable-only type.
+ //
+ line_regex (line_regex&&) = default;
+ line_regex (const line_regex&) = delete;
+ line_regex& operator= (line_regex&&) = default;
+ line_regex& operator= (const line_regex&) = delete;
+
+ public:
+ line_pool pool;
+ };
+ }
+ }
+}
+
+#include <libbuild2/script/regex.ixx>
+
+#endif // LIBBUILD2_SCRIPT_REGEX_HXX
diff --git a/libbuild2/script/regex.ixx b/libbuild2/script/regex.ixx
new file mode 100644
index 0000000..e72b578
--- /dev/null
+++ b/libbuild2/script/regex.ixx
@@ -0,0 +1,31 @@
+// file : libbuild2/script/regex.ixx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+namespace build2
+{
+ namespace script
+ {
+ namespace regex
+ {
+ inline char_flags
+ operator&= (char_flags& x, char_flags y)
+ {
+ return x = static_cast<char_flags> (
+ static_cast<uint16_t> (x) & static_cast<uint16_t> (y));
+ }
+
+ inline char_flags
+ operator|= (char_flags& x, char_flags y)
+ {
+ return x = static_cast<char_flags> (
+ static_cast<uint16_t> (x) | static_cast<uint16_t> (y));
+ }
+
+ inline char_flags
+ operator& (char_flags x, char_flags y) {return x &= y;}
+
+ inline char_flags
+ operator| (char_flags x, char_flags y) {return x |= y;}
+ }
+ }
+}
diff --git a/libbuild2/test/script/regex.test.cxx b/libbuild2/script/regex.test.cxx
index 5a93c53..36d47e1 100644
--- a/libbuild2/test/script/regex.test.cxx
+++ b/libbuild2/script/regex.test.cxx
@@ -1,13 +1,13 @@
-// file : libbuild2/test/script/regex.test.cxx -*- C++ -*-
+// file : libbuild2/script/regex.test.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
#include <regex>
#include <type_traits> // is_*
-#include <libbuild2/test/script/regex.hxx>
+#include <libbuild2/script/regex.hxx>
using namespace std;
-using namespace build2::test::script::regex;
+using namespace build2::script::regex;
int
main ()
@@ -18,7 +18,7 @@ main ()
using cf = char_flags;
using cr = char_regex;
- init (); // Initializes the testscript regex global state.
+ init (); // Initializes the script regex global state.
// Test line_char.
//
diff --git a/libbuild2/script/run.cxx b/libbuild2/script/run.cxx
new file mode 100644
index 0000000..38436b9
--- /dev/null
+++ b/libbuild2/script/run.cxx
@@ -0,0 +1,2020 @@
+// file : libbuild2/script/run.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/script/run.hxx>
+
+#include <ios> // streamsize
+
+#include <libbutl/regex.mxx>
+#include <libbutl/builtin.mxx>
+#include <libbutl/fdstream.mxx> // fdopen_mode, fddup()
+#include <libbutl/filesystem.mxx> // path_search()
+#include <libbutl/path-pattern.mxx>
+
+#include <libbuild2/filesystem.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/script/regex.hxx>
+#include <libbuild2/script/builtin-options.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+ namespace script
+ {
+ string
+ diag_path (const path& d)
+ {
+ string r ("'");
+
+ r += stream_verb_map ().path < 1
+ ? diag_relative (d)
+ : d.representation ();
+
+ r += '\'';
+ return r;
+ }
+
+ string
+ diag_path (const dir_name_view& dn)
+ {
+ string r;
+ if (dn.name != nullptr && *dn.name)
+ {
+ r += **dn.name;
+ r += ' ';
+ }
+
+ assert (dn.path != nullptr);
+
+ r += diag_path (*dn.path);
+ return r;
+ }
+
+ // Return the environment temporary directory, creating it if it doesn't
+ // exist.
+ //
+ static inline const dir_path&
+ temp_dir (environment& env)
+ {
+ if (env.temp_dir.empty ())
+ env.create_temp_dir ();
+
+ return env.temp_dir;
+ }
+
+ // Normalize a path. Also make the relative path absolute using the
+ // specified directory unless it is already absolute.
+ //
+ static path
+ normalize (path p, const dir_path& d, const location& l)
+ {
+ path r (p.absolute () ? move (p) : d / move (p));
+
+ try
+ {
+ r.normalize ();
+ }
+ catch (const invalid_path& e)
+ {
+ fail (l) << "invalid file path " << e.path;
+ }
+
+ return r;
+ }
+
+ // Check if a path is not empty, the referenced file exists and is not
+ // empty.
+ //
+ static bool
+ non_empty (const path& p, const location& ll)
+ {
+ if (p.empty () || !exists (p))
+ return false;
+
+ try
+ {
+ ifdstream is (p);
+ return is.peek () != ifdstream::traits_type::eof ();
+ }
+ catch (const io_error& e)
+ {
+ // While there can be no fault of the script command being currently
+ // executed let's add the location anyway to ease the
+ // troubleshooting. And let's stick to that principle down the road.
+ //
+ fail (ll) << "unable to read " << p << ": " << e << endf;
+ }
+ }
+
+ // If the file exists, not empty and not larger than 4KB print it to the
+ // diag record. The file content goes from the new line and is not
+ // indented.
+ //
+ static void
+ print_file (diag_record& d, const path& p, const location& ll)
+ {
+ if (exists (p))
+ {
+ try
+ {
+ ifdstream is (p, ifdstream::badbit);
+
+ if (is.peek () != ifdstream::traits_type::eof ())
+ {
+ char buf[4096 + 1]; // Extra byte is for terminating '\0'.
+
+ // Note that the string is always '\0'-terminated with a maximum
+ // sizeof (buf) - 1 bytes read.
+ //
+ is.getline (buf, sizeof (buf), '\0');
+
+ // Print if the file fits 4KB-size buffer. Note that if it
+ // doesn't the failbit is set.
+ //
+ if (is.eof ())
+ {
+ // Suppress the trailing newline character as the diag record
+ // adds it's own one when flush.
+ //
+ streamsize n (is.gcount ());
+ assert (n > 0);
+
+ // Note that if the file contains '\0' it will also be counted
+ // by gcount(). But even in the worst case we will stay in the
+ // buffer boundaries (and so not crash).
+ //
+ if (buf[n - 1] == '\n')
+ buf[n - 1] = '\0';
+
+ d << '\n' << buf;
+ }
+ }
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "unable to read " << p << ": " << e;
+ }
+ }
+ }
+
+ // Save a string to the file. Fail if exception is thrown by underlying
+ // operations.
+ //
+ static void
+ save (const path& p, const string& s, const location& ll)
+ {
+ try
+ {
+ ofdstream os (p);
+ os << s;
+ os.close ();
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "unable to write to " << p << ": " << e;
+ }
+ }
+
+ // Transform string according to here-* redirect modifiers from the {/}
+ // set.
+ //
+ static string
+ transform (const string& s,
+ bool regex,
+ const string& modifiers,
+ environment& env)
+ {
+ if (modifiers.find ('/') == string::npos)
+ return s;
+
+ // For targets other than Windows leave the string intact.
+ //
+ if (env.host.class_ != "windows")
+ return s;
+
+ // Convert forward slashes to Windows path separators (escape for
+ // regex).
+ //
+ string r;
+ for (size_t p (0);;)
+ {
+ size_t sp (s.find ('/', p));
+
+ if (sp != string::npos)
+ {
+ r.append (s, p, sp - p);
+ r.append (regex ? "\\\\" : "\\");
+ p = sp + 1;
+ }
+ else
+ {
+ r.append (s, p, sp);
+ break;
+ }
+ }
+
+ return r;
+ }
+
+ // Return true if the script temporary directory is not created yet (and
+ // so cannot contain any path), a path is not under the temporary
+ // directory or this directory will not be removed on failure.
+ //
+ static inline bool
+ avail_on_failure (const path& p, const environment& env)
+ {
+ return env.temp_dir.empty () ||
+ env.temp_dir_keep ||
+ !p.sub (env.temp_dir);
+ }
+
+ // Check if the script command output matches the expected result
+ // (redirect value). Noop for redirect types other than none, here_*.
+ //
+ static bool
+ check_output (const path& pr,
+ const path& op,
+ const path& ip,
+ const redirect& rd,
+ const location& ll,
+ environment& env,
+ bool diag,
+ const char* what)
+ {
+ auto input_info = [&ip, &ll, &env] (diag_record& d)
+ {
+ if (non_empty (ip, ll) && avail_on_failure (ip, env))
+ d << info << "stdin: " << ip;
+ };
+
+ auto output_info = [&what, &ll, &env] (diag_record& d,
+ const path& p,
+ const char* prefix = "",
+ const char* suffix = "")
+ {
+ if (non_empty (p, ll))
+ {
+ if (avail_on_failure (p, env))
+ d << info << prefix << what << suffix << ": " << p;
+ }
+ else
+ d << info << prefix << what << suffix << " is empty";
+ };
+
+ if (rd.type == redirect_type::none)
+ {
+ // Check that there is no output produced.
+ //
+ assert (!op.empty ());
+
+ if (!non_empty (op, ll))
+ return true;
+
+ if (diag)
+ {
+ diag_record d (error (ll));
+ d << pr << " unexpectedly writes to " << what;
+
+ if (avail_on_failure (op, env))
+ d << info << what << ": " << op;
+
+ input_info (d);
+
+ // Print cached output.
+ //
+ print_file (d, op, ll);
+ }
+
+ // Fall through (to return false).
+ //
+ }
+ else if (rd.type == redirect_type::here_str_literal ||
+ rd.type == redirect_type::here_doc_literal ||
+ (rd.type == redirect_type::file &&
+ rd.file.mode == redirect_fmode::compare))
+ {
+ // The expected output is provided as a file or as a string. Save the
+ // string to a file in the later case.
+ //
+ assert (!op.empty ());
+
+ path eop;
+
+ if (rd.type == redirect_type::file)
+ eop = normalize (rd.file.path, *env.work_dir.path, ll);
+ else
+ {
+ eop = path (op + ".orig");
+
+ save (eop,
+ transform (rd.str, false /* regex */, rd.modifiers (), env),
+ ll);
+
+ env.clean_special (eop);
+ }
+
+ // Use the diff utility for comparison.
+ //
+ path dp ("diff");
+ process_path pp (run_search (dp, true));
+
+ cstrings args {pp.recall_string ()};
+
+ // If both files being compared won't be available on failure, then
+ // instruct diff not to print the file paths. It seems that the only
+ // way to achieve this is to abandon the output unified format in the
+ // favor of the minimal output, which normally is still informative
+ // enough for the troubleshooting (contains the difference line
+ // numbers, etc).
+ //
+ if (avail_on_failure (eop, env) || avail_on_failure (op, env))
+ args.push_back ("-u");
+
+ // Ignore Windows newline fluff if that's what we are running on.
+ //
+ if (env.host.class_ == "windows")
+ args.push_back ("--strip-trailing-cr");
+
+ args.push_back (eop.string ().c_str ());
+ args.push_back (op.string ().c_str ());
+ args.push_back (nullptr);
+
+ if (verb >= 2)
+ print_process (args);
+
+ try
+ {
+ // Save diff's stdout to a file for troubleshooting and for the
+ // optional (if not too large) printing (at the end of
+ // diagnostics).
+ //
+ path ep (op + ".diff");
+ auto_fd efd;
+
+ try
+ {
+ efd = fdopen (ep, fdopen_mode::out | fdopen_mode::create);
+ env.clean_special (ep);
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "unable to write to " << ep << ": " << e;
+ }
+
+ // Diff utility prints the differences to stdout. But for the
+ // user it is a part of the script failure diagnostics so let's
+ // redirect stdout to stderr.
+ //
+ process p (pp, args.data (), 0, 2, efd.get ());
+ efd.reset ();
+
+ if (p.wait ())
+ return true;
+
+ assert (p.exit);
+ const process_exit& pe (*p.exit);
+
+ // Note that both POSIX and GNU diff report error by exiting with
+ // the code > 1.
+ //
+ if (!pe.normal () || pe.code () > 1)
+ {
+ diag_record d (fail (ll));
+ print_process (d, args);
+ d << " " << pe;
+
+ print_file (d, ep, ll);
+ }
+
+ // Output doesn't match the expected result.
+ //
+ if (diag)
+ {
+ diag_record d (error (ll));
+ d << pr << " " << what << " doesn't match expected";
+
+ output_info (d, op);
+ output_info (d, eop, "expected ");
+ output_info (d, ep, "", " diff");
+ input_info (d);
+
+ print_file (d, ep, ll);
+ }
+
+ // Fall through (to return false).
+ //
+ }
+ catch (const process_error& e)
+ {
+ error (ll) << "unable to execute " << pp << ": " << e;
+
+ if (e.child)
+ exit (1);
+
+ throw failed ();
+ }
+ }
+ else if (rd.type == redirect_type::here_str_regex ||
+ rd.type == redirect_type::here_doc_regex)
+ {
+ // The overall plan is:
+ //
+ // 1. Create regex line string. While creating it's line characters
+ // transform regex lines according to the redirect modifiers.
+ //
+ // 2. Create line regex using the line string. If creation fails
+ // then save the (transformed) regex redirect to a file for
+ // troubleshooting.
+ //
+ // 3. Parse the output into the literal line string.
+ //
+ // 4. Match the output line string with the line regex.
+ //
+ // 5. If match fails save the (transformed) regex redirect to a file
+ // for troubleshooting.
+ //
+ using namespace regex;
+
+ assert (!op.empty ());
+
+ // Create regex line string.
+ //
+ line_pool pool;
+ line_string rls;
+ const regex_lines rl (rd.regex);
+
+ // Parse regex flags.
+ //
+ // When add support for new flags don't forget to update
+ // parse_regex().
+ //
+ auto parse_flags = [] (const string& f) -> char_flags
+ {
+ char_flags r (char_flags::none);
+
+ for (char c: f)
+ {
+ switch (c)
+ {
+ case 'd': r |= char_flags::idot; break;
+ case 'i': r |= char_flags::icase; break;
+ default: assert (false); // Error so should have been checked.
+ }
+ }
+
+ return r;
+ };
+
+ // Return original regex line with the transformation applied.
+ //
+ auto line = [&rl, &rd, &env] (const regex_line& l) -> string
+ {
+ string r;
+ if (l.regex) // Regex (possibly empty),
+ {
+ r += rl.intro;
+ r += transform (l.value, true /* regex */, rd.modifiers (), env);
+ r += rl.intro;
+ r += l.flags;
+ }
+ else if (!l.special.empty ()) // Special literal.
+ r += rl.intro;
+ else // Textual literal.
+ r += transform (l.value, false /* regex */, rd.modifiers (), env);
+
+ r += l.special;
+ return r;
+ };
+
+ // Return regex line location.
+ //
+ // Note that we rely on the fact that the command and regex lines
+ // are always belong to the same file.
+ //
+ auto loc = [&ll] (uint64_t line, uint64_t column) -> location
+ {
+ location r (ll);
+ r.line = line;
+ r.column = column;
+ return r;
+ };
+
+ // Save the regex to file for troubleshooting, return the file path
+ // it have been saved to.
+ //
+ // Note that we save the regex on line regex creation failure or if
+ // the program output doesn't match.
+ //
+ auto save_regex = [&op, &rl, &rd, &ll, &line] () -> path
+ {
+ path rp (op + ".regex");
+
+ // Encode here-document regex global flags if present as a file
+ // name suffix. For example if icase and idot flags are specified
+ // the name will look like:
+ //
+ // stdout.regex-di
+ //
+ if (rd.type == redirect_type::here_doc_regex && !rl.flags.empty ())
+ rp += '-' + rl.flags;
+
+ // Note that if would be more efficient to directly write chunks
+ // to file rather than to compose a string first. Hower we don't
+ // bother (about performance) for the sake of the code as we
+ // already failed.
+ //
+ string s;
+ for (auto b (rl.lines.cbegin ()), i (b), e (rl.lines.cend ());
+ i != e; ++i)
+ {
+ if (i != b) s += '\n';
+ s += line (*i);
+ }
+
+ save (rp, s, ll);
+ return rp;
+ };
+
+ // Finally create regex line string.
+ //
+ // Note that diagnostics doesn't refer to the program path as it is
+ // irrelevant to failures at this stage.
+ //
+ char_flags gf (parse_flags (rl.flags)); // Regex global flags.
+
+ for (const auto& l: rl.lines)
+ {
+ if (l.regex) // Regex (with optional special characters).
+ {
+ line_char c;
+
+ // Empty regex is a special case repesenting the blank line.
+ //
+ if (l.value.empty ())
+ c = line_char ("", pool);
+ else
+ {
+ try
+ {
+ string s (transform (l.value,
+ true /* regex */,
+ rd.modifiers (),
+ env));
+
+ c = line_char (
+ char_regex (s, gf | parse_flags (l.flags)), pool);
+ }
+ catch (const regex_error& e)
+ {
+ // Print regex_error description if meaningful.
+ //
+ diag_record d (fail (loc (l.line, l.column)));
+
+ if (rd.type == redirect_type::here_str_regex)
+ d << "invalid " << what << " regex redirect" << e <<
+ info << "regex: '" << line (l) << "'";
+ else
+ d << "invalid char-regex in " << what << " regex redirect"
+ << e <<
+ info << "regex line: '" << line (l) << "'";
+
+ d << endf;
+ }
+ }
+
+ rls += c; // Append blank literal or regex line char.
+ }
+ else if (!l.special.empty ()) // Special literal.
+ {
+ // Literal can not be followed by special characters in the same
+ // line.
+ //
+ assert (l.value.empty ());
+ }
+ else // Textual literal.
+ {
+ // Append literal line char.
+ //
+ rls += line_char (transform (l.value,
+ false /* regex */,
+ rd.modifiers (),
+ env),
+ pool);
+ }
+
+ for (char c: l.special)
+ {
+ if (line_char::syntax (c))
+ rls += line_char (c); // Append special line char.
+ else
+ fail (loc (l.line, l.column))
+ << "invalid syntax character '" << c << "' in " << what
+ << " regex redirect" <<
+ info << "regex line: '" << line (l) << "'";
+ }
+ }
+
+ // Create line regex.
+ //
+ line_regex regex;
+
+ try
+ {
+ regex = line_regex (move (rls), move (pool));
+ }
+ catch (const regex_error& e)
+ {
+ // Note that line regex creation can not fail for here-string
+ // redirect as it doesn't have syntax line chars. That in
+ // particular means that end_line and end_column are meaningful.
+ //
+ assert (rd.type == redirect_type::here_doc_regex);
+
+ diag_record d (fail (loc (rd.end_line, rd.end_column)));
+
+ // Print regex_error description if meaningful.
+ //
+ d << "invalid " << what << " regex redirect" << e;
+
+ // It would be a waste to save the regex into the file just to
+ // remove it.
+ //
+ if (env.temp_dir_keep)
+ output_info (d, save_regex (), "", " regex");
+ }
+
+ // Parse the output into the literal line string.
+ //
+ line_string ls;
+
+ try
+ {
+ // Do not throw when eofbit is set (end of stream reached), and
+ // when failbit is set (getline() failed to extract any character).
+ //
+ // Note that newlines are treated as line-chars separators. That
+ // in particular means that the trailing newline produces a blank
+ // line-char (empty literal). Empty output produces the zero-length
+ // line-string.
+ //
+ // Also note that we strip the trailing CR characters (otherwise
+ // can mismatch when, for example, cross-testing).
+ //
+ ifdstream is (op, ifdstream::badbit);
+ is.peek (); // Sets eofbit for an empty stream.
+
+ while (!is.eof ())
+ {
+ string s;
+ getline (is, s);
+
+ // It is safer to strip CRs in cycle, as msvcrt unexplainably
+ // adds too much trailing junk to the system_error descriptions,
+ // and so it can appear in programs output. For example:
+ //
+ // ...: Invalid data.\r\r\n
+ //
+ // Note that our custom operator<<(ostream&, const exception&)
+ // removes this junk.
+ //
+ while (!s.empty () && s.back () == '\r')
+ s.pop_back ();
+
+ ls += line_char (move (s), regex.pool);
+ }
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "unable to read " << op << ": " << e;
+ }
+
+ // Match the output with the regex.
+ //
+ if (regex_match (ls, regex)) // Doesn't throw.
+ return true;
+
+ // Output doesn't match the regex.
+ //
+ // Unless the temporary directory is removed on failure, we save the
+ // regex to file for troubleshooting regardless of whether we print
+ // the diagnostics or not. We, however, register it for cleanup in the
+ // later case (the expression may still succeed, we can be evaluating
+ // the if condition, etc).
+ //
+ optional<path> rp;
+ if (env.temp_dir_keep)
+ rp = save_regex ();
+
+ if (diag)
+ {
+ diag_record d (error (ll));
+ d << pr << " " << what << " doesn't match regex";
+
+ output_info (d, op);
+
+ if (rp)
+ output_info (d, *rp, "", " regex");
+
+ input_info (d);
+
+ // Print cached output.
+ //
+ print_file (d, op, ll);
+ }
+ else if (rp)
+ env.clean_special (*rp);
+
+ // Fall through (to return false).
+ //
+ }
+ else // Noop.
+ return true;
+
+ return false;
+ }
+
+ // The exit pseudo-builtin: exit the script successfully, or print the
+ // diagnostics and exit the script unsuccessfully. Always throw exit
+ // exception.
+ //
+ // exit [<diagnostics>]
+ //
+ [[noreturn]] static void
+ exit_builtin (const strings& args, const location& ll)
+ {
+ auto i (args.begin ());
+ auto e (args.end ());
+
+ // Process arguments.
+ //
+ // If no argument is specified, then exit successfully. Otherwise,
+ // print the diagnostics and exit unsuccessfully.
+ //
+ if (i == e)
+ throw exit (true);
+
+ const string& s (*i++);
+
+ if (i != e)
+ fail (ll) << "unexpected argument '" << *i << "'";
+
+ error (ll) << s;
+ throw exit (false);
+ }
+
+ // The set pseudo-builtin: set variable from the stdin input.
+ //
+ // set [-e|--exact] [(-n|--newline)|(-w|--whitespace)] [<attr>] <var>
+ //
+ static void
+ set_builtin (environment& env,
+ const strings& args,
+ auto_fd in,
+ const location& ll)
+ {
+ try
+ {
+ // Do not throw when eofbit is set (end of stream reached), and
+ // when failbit is set (read operation failed to extract any
+ // character).
+ //
+ ifdstream cin (move (in), ifdstream::badbit);
+
+ // Parse arguments.
+ //
+ cli::vector_scanner scan (args);
+ set_options ops (scan);
+
+ if (ops.whitespace () && ops.newline ())
+ fail (ll) << "both -n|--newline and -w|--whitespace specified";
+
+ if (!scan.more ())
+ fail (ll) << "missing variable name";
+
+ string a (scan.next ()); // Either attributes or variable name.
+ const string* ats (!scan.more () ? nullptr : &a);
+ string vname (!scan.more () ? move (a) : scan.next ());
+
+ if (scan.more ())
+ fail (ll) << "unexpected argument '" << scan.next () << "'";
+
+ if (ats != nullptr && ats->empty ())
+ fail (ll) << "empty variable attributes";
+
+ if (vname.empty ())
+ fail (ll) << "empty variable name";
+
+ // Read the input.
+ //
+ cin.peek (); // Sets eofbit for an empty stream.
+
+ names ns;
+ while (!cin.eof ())
+ {
+ // Read next element that depends on the whitespace mode being
+ // enabled or not. For the later case it also make sense to strip
+ // the trailing CRs that can appear while, for example,
+ // cross-testing Windows target or as a part of msvcrt junk
+ // production (see above).
+ //
+ string s;
+ if (ops.whitespace ())
+ cin >> s;
+ else
+ {
+ getline (cin, s);
+
+ while (!s.empty () && s.back () == '\r')
+ s.pop_back ();
+ }
+
+ // If failbit is set then we read nothing into the string as eof is
+ // reached. That in particular means that the stream has trailing
+ // whitespaces (possibly including newlines) if the whitespace mode
+ // is enabled, or the trailing newline otherwise. If so then
+ // we append the "blank" to the variable value in the exact mode
+ // prior to bailing out.
+ //
+ if (cin.fail ())
+ {
+ if (ops.exact ())
+ {
+ if (ops.whitespace () || ops.newline ())
+ ns.emplace_back (move (s)); // Reuse empty string.
+ else if (ns.empty ())
+ ns.emplace_back ("\n");
+ else
+ ns[0].value += '\n';
+ }
+
+ break;
+ }
+
+ if (ops.whitespace () || ops.newline () || ns.empty ())
+ ns.emplace_back (move (s));
+ else
+ {
+ ns[0].value += '\n';
+ ns[0].value += s;
+ }
+ }
+
+ cin.close ();
+
+ env.set_variable (move (vname),
+ move (ns),
+ ats != nullptr ? *ats : empty_string,
+ ll);
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "set: " << e;
+ }
+ catch (const cli::exception& e)
+ {
+ fail (ll) << "set: " << e;
+ }
+ }
+
+ // Sorted array of builtins that support filesystem entries cleanup.
+ //
+ static const char* cleanup_builtins[] = {
+ "cp", "ln", "mkdir", "mv", "touch"};
+
+ static inline bool
+ cleanup_builtin (const string& name)
+ {
+ return binary_search (
+ cleanup_builtins,
+ cleanup_builtins +
+ sizeof (cleanup_builtins) / sizeof (*cleanup_builtins),
+ name);
+ }
+
+ static bool
+ run_pipe (environment& env,
+ command_pipe::const_iterator bc,
+ command_pipe::const_iterator ec,
+ auto_fd ifd,
+ size_t ci, size_t li, const location& ll,
+ bool diag)
+ {
+ if (bc == ec) // End of the pipeline.
+ return true;
+
+ // The overall plan is to run the first command in the pipe, reading
+ // its input from the file descriptor passed (or, for the first
+ // command, according to stdin redirect specification) and redirecting
+ // its output to the right-hand part of the pipe recursively. Fail if
+ // the right-hand part fails. Otherwise check the process exit code,
+ // match stderr (and stdout for the last command in the pipe) according
+ // to redirect specification(s) and fail if any of the above fails.
+ //
+ const command& c (*bc);
+
+ // Register the command explicit cleanups. Verify that the path being
+ // cleaned up is a sub-path of the script working directory. Fail if
+ // this is not the case.
+ //
+ for (const auto& cl: c.cleanups)
+ {
+ const path& p (cl.path);
+ path np (normalize (p, *env.work_dir.path, ll));
+
+ const string& ls (np.leaf ().string ());
+ bool wc (ls == "*" || ls == "**" || ls == "***");
+ const path& cp (wc ? np.directory () : np);
+ const dir_path* sd (env.sandbox_dir.path);
+
+ if (sd != nullptr && !cp.sub (*sd))
+ fail (ll) << (wc ? "wildcard" :
+ p.to_directory () ? "directory" :
+ "file")
+ << " cleanup " << p << " is out of "
+ << diag_path (env.sandbox_dir);
+
+ env.clean ({cl.type, move (np)}, false);
+ }
+
+ bool eq (c.exit.comparison == exit_comparison::eq);
+
+ // If stdin file descriptor is not open then this is the first pipeline
+ // command.
+ //
+ bool first (ifd.get () == -1);
+
+ command_pipe::const_iterator nc (bc + 1);
+ bool last (nc == ec);
+
+ const string& program (c.program.string ());
+
+ const redirect& in ((c.in ? *c.in : env.in).effective ());
+
+ const redirect* out (!last
+ ? nullptr // stdout is piped.
+ : &(c.out ? *c.out : env.out).effective ());
+
+ const redirect& err ((c.err ? *c.err : env.err).effective ());
+
+ auto process_args = [&c] () -> cstrings
+ {
+ cstrings args {c.program.string ().c_str ()};
+
+ for (const auto& a: c.arguments)
+ args.push_back (a.c_str ());
+
+ args.push_back (nullptr);
+ return args;
+ };
+
+ // Prior to opening file descriptors for command input/output
+ // redirects let's check if the command is the exit builtin. Being a
+ // builtin syntactically it differs from the regular ones in a number
+ // of ways. It doesn't communicate with standard streams, so
+ // redirecting them is meaningless. It may appear only as a single
+ // command in a pipeline. It doesn't return any value and stops the
+ // script execution, so checking its exit status is meaningless as
+ // well. That all means we can short-circuit here calling the builtin
+ // and bailing out right after that. Checking that the user didn't
+ // specify any redirects or exit code check sounds like a right thing
+ // to do.
+ //
+ if (program == "exit")
+ {
+ // In case the builtin is erroneously pipelined from the other
+ // command, we will close stdin gracefully (reading out the stream
+ // content), to make sure that the command doesn't print any
+ // unwanted diagnostics about IO operation failure.
+ //
+ // Note that dtor will ignore any errors (which is what we want).
+ //
+ ifdstream is (move (ifd), fdstream_mode::skip);
+
+ if (!first || !last)
+ fail (ll) << "exit builtin must be the only pipe command";
+
+ if (c.in)
+ fail (ll) << "exit builtin stdin cannot be redirected";
+
+ if (c.out)
+ fail (ll) << "exit builtin stdout cannot be redirected";
+
+ if (c.err)
+ fail (ll) << "exit builtin stderr cannot be redirected";
+
+ // We can't make sure that there is no exit code check. Let's, at
+ // least, check that non-zero code is not expected.
+ //
+ if (eq != (c.exit.code == 0))
+ fail (ll) << "exit builtin exit code cannot be non-zero";
+
+ if (verb >= 2)
+ print_process (process_args ());
+
+ exit_builtin (c.arguments, ll); // Throws exit exception.
+ }
+
+ // Create a unique path for a command standard stream cache file.
+ //
+ auto std_path = [&env, &ci, &li, &ll] (const char* n) -> path
+ {
+ using std::to_string;
+
+ path p (n);
+
+ // 0 if belongs to a single-line script, otherwise is the command line
+ // number (start from one) in the script.
+ //
+ if (li > 0)
+ p += "-" + to_string (li);
+
+ // 0 if belongs to a single-command expression, otherwise is the
+ // command number (start from one) in the expression.
+ //
+ // Note that the name like stdin-N can relate to N-th command of a
+ // single-line script or to N-th single-command line of multi-line
+ // script. These cases are mutually exclusive and so are unambiguous.
+ //
+ if (ci > 0)
+ p += "-" + to_string (ci);
+
+ return normalize (move (p), temp_dir (env), ll);
+ };
+
+ // If this is the first pipeline command, then open stdin descriptor
+ // according to the redirect specified.
+ //
+ path isp;
+
+ if (!first)
+ assert (!c.in); // No redirect expected.
+ else
+ {
+ // Open a file for passing to the command stdin.
+ //
+ auto open_stdin = [&isp, &ifd, &ll] ()
+ {
+ assert (!isp.empty ());
+
+ try
+ {
+ ifd = fdopen (isp, fdopen_mode::in);
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "unable to read " << isp << ": " << e;
+ }
+ };
+
+ switch (in.type)
+ {
+ case redirect_type::pass:
+ {
+ try
+ {
+ ifd = fddup (0);
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "unable to duplicate stdin: " << e;
+ }
+
+ break;
+ }
+ case redirect_type::none:
+ // Somehow need to make sure that the child process doesn't read
+ // from stdin. That is tricky to do in a portable way. Here we
+ // suppose that the program which (erroneously) tries to read some
+ // data from stdin being redirected to /dev/null fails not being
+ // able to read the expected data, and so the command doesn't pass
+ // through.
+ //
+ // @@ Obviously doesn't cover the case when the process reads
+ // whatever available.
+ // @@ Another approach could be not to redirect stdin and let the
+ // process to hang which can be interpreted as a command failure.
+ // @@ Both ways are quite ugly. Is there some better way to do
+ // this?
+ //
+ // Fall through.
+ //
+ case redirect_type::null:
+ {
+ ifd = open_null ();
+ break;
+ }
+ case redirect_type::file:
+ {
+ isp = normalize (in.file.path, *env.work_dir.path, ll);
+
+ open_stdin ();
+ break;
+ }
+ case redirect_type::here_str_literal:
+ case redirect_type::here_doc_literal:
+ {
+ // We could write to the command stdin directly but instead will
+ // cache the data for potential troubleshooting.
+ //
+ isp = std_path ("stdin");
+
+ save (isp,
+ transform (in.str, false /* regex */, in.modifiers (), env),
+ ll);
+
+ env.clean_special (isp);
+
+ open_stdin ();
+ break;
+ }
+ case redirect_type::trace:
+ case redirect_type::merge:
+ case redirect_type::here_str_regex:
+ case redirect_type::here_doc_regex:
+ case redirect_type::here_doc_ref: assert (false); break;
+ }
+ }
+
+ assert (ifd.get () != -1);
+
+ // Prior to opening file descriptors for command outputs redirects
+ // let's check if the command is the set builtin. Being a builtin
+ // syntactically it differs from the regular ones in a number of ways.
+ // It either succeeds or terminates abnormally, so redirecting stderr
+ // is meaningless. It also never produces any output and may appear
+ // only as a terminal command in a pipeline. That means we can
+ // short-circuit here calling the builtin and returning right after
+ // that. Checking that the user didn't specify any meaningless
+ // redirects or exit code check sounds as a right thing to do.
+ //
+ if (program == "set")
+ {
+ if (!last)
+ fail (ll) << "set builtin must be the last pipe command";
+
+ if (c.out)
+ fail (ll) << "set builtin stdout cannot be redirected";
+
+ if (c.err)
+ fail (ll) << "set builtin stderr cannot be redirected";
+
+ if (eq != (c.exit.code == 0))
+ fail (ll) << "set builtin exit code cannot be non-zero";
+
+ if (verb >= 2)
+ print_process (process_args ());
+
+ set_builtin (env, c.arguments, move (ifd), ll);
+ return true;
+ }
+
+ // Open a file for command output redirect if requested explicitly
+ // (file overwrite/append redirects) or for the purpose of the output
+ // validation (none, here_*, file comparison redirects), register the
+ // file for cleanup, return the file descriptor. Interpret trace
+ // redirect according to the verbosity level (as null if below 2, as
+ // pass otherwise). Return nullfd, standard stream descriptor duplicate
+ // or null-device descriptor for merge, pass or null redirects
+ // respectively (not opening any file).
+ //
+ auto open = [&env, &ll, &std_path] (const redirect& r,
+ int dfd,
+ path& p) -> auto_fd
+ {
+ assert (dfd == 1 || dfd == 2);
+ const char* what (dfd == 1 ? "stdout" : "stderr");
+
+ fdopen_mode m (fdopen_mode::out | fdopen_mode::create);
+
+ redirect_type rt (r.type != redirect_type::trace
+ ? r.type
+ : verb < 2
+ ? redirect_type::null
+ : redirect_type::pass);
+ switch (rt)
+ {
+ case redirect_type::pass:
+ {
+ try
+ {
+ return fddup (dfd);
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "unable to duplicate " << what << ": " << e;
+ }
+ }
+
+ case redirect_type::null: return open_null ();
+
+ // Duplicate the paired file descriptor later.
+ //
+ case redirect_type::merge: return nullfd;
+
+ case redirect_type::file:
+ {
+ // For the cmp mode the user-provided path refers a content to
+ // match against, rather than a content to be produced (as for
+ // overwrite and append modes). And so for cmp mode we redirect
+ // the process output to a temporary file.
+ //
+ p = r.file.mode == redirect_fmode::compare
+ ? std_path (what)
+ : normalize (r.file.path, *env.work_dir.path, ll);
+
+ m |= r.file.mode == redirect_fmode::append
+ ? fdopen_mode::at_end
+ : fdopen_mode::truncate;
+
+ break;
+ }
+
+ case redirect_type::none:
+ case redirect_type::here_str_literal:
+ case redirect_type::here_doc_literal:
+ case redirect_type::here_str_regex:
+ case redirect_type::here_doc_regex:
+ {
+ p = std_path (what);
+ m |= fdopen_mode::truncate;
+ break;
+ }
+
+ case redirect_type::trace:
+ case redirect_type::here_doc_ref: assert (false); break;
+ }
+
+ auto_fd fd;
+
+ try
+ {
+ fd = fdopen (p, m);
+
+ if ((m & fdopen_mode::at_end) != fdopen_mode::at_end)
+ {
+ if (rt == redirect_type::file)
+ env.clean ({cleanup_type::always, p}, true);
+ else
+ env.clean_special (p);
+ }
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "unable to write to " << p << ": " << e;
+ }
+
+ return fd;
+ };
+
+ path osp;
+ fdpipe ofd;
+
+ // If this is the last command in the pipeline than redirect the
+ // command process stdout to a file. Otherwise create a pipe and
+ // redirect the stdout to the write-end of the pipe. The read-end will
+ // be passed as stdin for the next command in the pipeline.
+ //
+ // @@ Shouldn't we allow the here-* and file output redirects for a
+ // command with pipelined output? Say if such redirect is present
+ // then the process output is redirected to a file first (as it is
+ // when no output pipelined), and only after the process exit code
+ // and the output are validated the next command in the pipeline is
+ // executed taking the file as an input. This could be usefull for
+ // script failures investigation and, for example, for validation
+ // "tightening".
+ //
+ if (last)
+ ofd.out = open (*out, 1, osp);
+ else
+ {
+ assert (!c.out); // No redirect expected.
+ ofd = open_pipe ();
+ }
+
+ path esp;
+ auto_fd efd (open (err, 2, esp));
+
+ // Merge standard streams.
+ //
+ bool mo (out != nullptr && out->type == redirect_type::merge);
+ if (mo || err.type == redirect_type::merge)
+ {
+ auto_fd& self (mo ? ofd.out : efd);
+ auto_fd& other (mo ? efd : ofd.out);
+
+ try
+ {
+ assert (self.get () == -1 && other.get () != -1);
+ self = fddup (other.get ());
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "unable to duplicate " << (mo ? "stderr" : "stdout")
+ << ": " << e;
+ }
+ }
+
+ // All descriptors should be open to the date.
+ //
+ assert (ofd.out.get () != -1 && efd.get () != -1);
+
+ optional<process_exit> exit;
+ builtin_function* bf (builtins.find (program));
+
+ bool success;
+
+ if (bf != nullptr)
+ {
+ // Execute the builtin.
+ //
+ if (verb >= 2)
+ print_process (process_args ());
+
+ // Some of the script builtins (cp, mkdir, etc) extend libbutl
+ // builtins (via callbacks) registering/moving cleanups for the
+ // filesystem entries they create/move, unless explicitly requested
+ // not to do so via the --no-cleanup option.
+ //
+ // Let's "wrap up" the cleanup-related flags into the single object
+ // to rely on "small function object" optimization.
+ //
+ struct cleanup
+ {
+ // Whether the cleanups are enabled for the builtin. Can be set to
+ // false by the parse_option callback if --no-cleanup is
+ // encountered.
+ //
+ bool enabled = true;
+
+ // Whether to register cleanup for a filesystem entry being
+ // created/updated depending on its existence. Calculated by the
+ // create pre-hook and used by the subsequent post-hook.
+ //
+ bool add;
+
+ // Whether to move existing cleanups for the filesystem entry
+ // being moved, rather than to erase them. Calculated by the move
+ // pre-hook and used by the subsequent post-hook.
+ //
+ bool move;
+ };
+
+ // nullopt if the builtin doesn't support cleanups.
+ //
+ optional<cleanup> cln;
+
+ if (cleanup_builtin (program))
+ cln = cleanup ();
+
+ builtin_callbacks bcs {
+
+ // create
+ //
+ // Unless cleanups are suppressed, test that the filesystem entry
+ // doesn't exist (pre-hook) and, if that's the case, register the
+ // cleanup for the newly created filesystem entry (post-hook).
+ //
+ [&env, &cln] (const path& p, bool pre)
+ {
+ // Cleanups must be supported by a filesystem entry-creating
+ // builtin.
+ //
+ assert (cln);
+
+ if (cln->enabled)
+ {
+ if (pre)
+ cln->add = !butl::entry_exists (p);
+ else if (cln->add)
+ env.clean ({cleanup_type::always, p}, true /* implicit */);
+ }
+ },
+
+ // move
+ //
+ // Validate the source and destination paths (pre-hook) and,
+ // unless suppressed, adjust the cleanups that are sub-paths of
+ // the source path (post-hook).
+ //
+ [&env, &cln] (const path& from, const path& to, bool force, bool pre)
+ {
+ // Cleanups must be supported by a filesystem entry-moving
+ // builtin.
+ //
+ assert (cln);
+
+ if (pre)
+ {
+ const dir_path& wd (*env.work_dir.path);
+ const dir_path* sd (env.sandbox_dir.path);
+
+ auto fail = [] (const string& d) {throw runtime_error (d);};
+
+ if (sd != nullptr && !from.sub (*sd) && !force)
+ fail (diag_path (from) + " is out of " +
+ diag_path (env.sandbox_dir));
+
+ auto check_wd = [&wd, &env, fail] (const path& p)
+ {
+ if (wd.sub (path_cast<dir_path> (p)))
+ fail (diag_path (p) + " contains " +
+ diag_path (env.work_dir));
+ };
+
+ check_wd (from);
+ check_wd (to);
+
+ // Unless cleanups are disabled, "move" the matching cleanups
+ // if the destination path doesn't exist and it is a sub-path
+ // of the working directory and just remove them otherwise.
+ //
+ if (cln->enabled)
+ cln->move = !butl::entry_exists (to) &&
+ (sd == nullptr || to.sub (*sd));
+ }
+ else if (cln->enabled)
+ {
+ // Move or remove the matching cleanups (see above).
+ //
+ // Note that it's not enough to just change the cleanup paths.
+ // We also need to make sure that these cleanups happen before
+ // the destination directory (or any of its parents) cleanup,
+ // that is potentially registered. To achieve that we can just
+ // relocate these cleanup entries to the end of the list,
+ // preserving their mutual order. Remember that cleanups in
+ // the list are executed in the reversed order.
+ //
+ cleanups cs;
+
+ // Remove the source path sub-path cleanups from the list,
+ // adjusting/caching them if required (see above).
+ //
+ for (auto i (env.cleanups.begin ()); i != env.cleanups.end (); )
+ {
+ script::cleanup& c (*i);
+ path& p (c.path);
+
+ if (p.sub (from))
+ {
+ if (cln->move)
+ {
+ // Note that we need to preserve the cleanup path
+ // trailing separator which indicates the removal
+ // method. Also note that leaf(), in particular, does
+ // that.
+ //
+ p = p != from
+ ? to / p.leaf (path_cast<dir_path> (from))
+ : p.to_directory ()
+ ? path_cast<dir_path> (to)
+ : to;
+
+ cs.push_back (move (c));
+ }
+
+ i = env.cleanups.erase (i);
+ }
+ else
+ ++i;
+ }
+
+ // Re-insert the adjusted cleanups at the end of the list.
+ //
+ env.cleanups.insert (env.cleanups.end (),
+ make_move_iterator (cs.begin ()),
+ make_move_iterator (cs.end ()));
+
+ }
+ },
+
+ // remove
+ //
+ // Validate the filesystem entry path (pre-hook).
+ //
+ [&env] (const path& p, bool force, bool pre)
+ {
+ if (pre)
+ {
+ const dir_path& wd (*env.work_dir.path);
+ const dir_path* sd (env.sandbox_dir.path);
+
+ auto fail = [] (const string& d) {throw runtime_error (d);};
+
+ if (sd != nullptr && !p.sub (*sd) && !force)
+ fail (diag_path (p) + " is out of " +
+ diag_path (env.sandbox_dir));
+
+ if (wd.sub (path_cast<dir_path> (p)))
+ fail (diag_path (p) + " contains " +
+ diag_path (env.work_dir));
+ }
+ },
+
+ // parse_option
+ //
+ [&cln] (const strings& args, size_t i)
+ {
+ // Parse --no-cleanup, if it is supported by the builtin.
+ //
+ if (cln && args[i] == "--no-cleanup")
+ {
+ cln->enabled = false;
+ return 1;
+ }
+
+ return 0;
+ },
+
+ // sleep
+ //
+ // Deactivate the thread before going to sleep.
+ //
+ [&env] (const duration& d)
+ {
+ // If/when required we could probably support the precise sleep
+ // mode (e.g., via an option).
+ //
+ env.context.sched.sleep (d);
+ }
+ };
+
+ try
+ {
+ uint8_t r; // Storage.
+ builtin b (bf (r,
+ c.arguments,
+ move (ifd), move (ofd.out), move (efd),
+ *env.work_dir.path,
+ bcs));
+
+ success = run_pipe (env,
+ nc,
+ ec,
+ move (ofd.in),
+ ci + 1, li, ll, diag);
+
+ exit = process_exit (b.wait ());
+ }
+ catch (const system_error& e)
+ {
+ fail (ll) << "unable to execute " << c.program << " builtin: "
+ << e << endf;
+ }
+ }
+ else
+ {
+ // Execute the process.
+ //
+ cstrings args (process_args ());
+
+ // Resolve the relative not simple program path against the script's
+ // working directory. The simple one will be left for the process
+ // path search machinery. Also strip the potential leading `^`,
+ // indicating that this is an external program rather than a
+ // builtin.
+ //
+ path p;
+
+ try
+ {
+ p = path (args[0]);
+
+ if (p.relative ())
+ {
+ auto program = [&p, &args] (path pp)
+ {
+ p = move (pp);
+ args[0] = p.string ().c_str ();
+ };
+
+ if (p.simple ())
+ {
+ const string& s (p.string ());
+
+ // Don't end up with an empty path.
+ //
+ if (s.size () > 1 && s[0] == '^')
+ program (path (s, 1, s.size () - 1));
+ }
+ else
+ program (*env.work_dir.path / p);
+ }
+ }
+ catch (const invalid_path& e)
+ {
+ fail (ll) << "invalid program path " << e.path;
+ }
+
+ try
+ {
+ process_path pp (process::path_search (args[0]));
+
+ // Note: the builtin-escaping character '^' is not printed.
+ //
+ if (verb >= 2)
+ print_process (args);
+
+ process pr (
+ pp,
+ args.data (),
+ {ifd.get (), -1}, process::pipe (ofd), {-1, efd.get ()},
+ env.work_dir.path->string ().c_str ());
+
+ ifd.reset ();
+ ofd.out.reset ();
+ efd.reset ();
+
+ success = run_pipe (env,
+ nc,
+ ec,
+ move (ofd.in),
+ ci + 1, li, ll, diag);
+
+ pr.wait ();
+
+ exit = move (pr.exit);
+ }
+ catch (const process_error& e)
+ {
+ error (ll) << "unable to execute " << args[0] << ": " << e;
+
+ if (e.child)
+ std::exit (1);
+
+ throw failed ();
+ }
+ }
+
+ assert (exit);
+
+ // If the righ-hand side pipeline failed than the whole pipeline fails,
+ // and no further checks are required.
+ //
+ if (!success)
+ return false;
+
+ const path& pr (c.program);
+
+ // If there is no valid exit code available by whatever reason then we
+ // print the proper diagnostics, dump stderr (if cached and not too
+ // large) and fail the whole script. Otherwise if the exit code is not
+ // correct then we print diagnostics if requested and fail the
+ // pipeline.
+ //
+ bool valid (exit->normal ());
+
+ // On Windows the exit code can be out of the valid codes range being
+ // defined as uint16_t.
+ //
+#ifdef _WIN32
+ if (valid)
+ valid = exit->code () < 256;
+#endif
+
+ success = valid && eq == (exit->code () == c.exit.code);
+
+ if (!valid || (!success && diag))
+ {
+ // In the presense of a valid exit code we print the diagnostics and
+ // return false rather than throw.
+ //
+ diag_record d (valid ? error (ll) : fail (ll));
+
+ if (!exit->normal ())
+ d << pr << " " << *exit;
+ else
+ {
+ uint16_t ec (exit->code ()); // Make sure is printed as integer.
+
+ if (!valid)
+ d << pr << " exit code " << ec << " out of 0-255 range";
+ else if (!success)
+ {
+ if (diag)
+ d << pr << " exit code " << ec << (eq ? " != " : " == ")
+ << static_cast<uint16_t> (c.exit.code);
+ }
+ else
+ assert (false);
+ }
+
+ if (non_empty (esp, ll) && avail_on_failure (esp, env))
+ d << info << "stderr: " << esp;
+
+ if (non_empty (osp, ll) && avail_on_failure (osp, env))
+ d << info << "stdout: " << osp;
+
+ if (non_empty (isp, ll) && avail_on_failure (isp, env))
+ d << info << "stdin: " << isp;
+
+ // Print cached stderr.
+ //
+ print_file (d, esp, ll);
+ }
+
+ // If exit code is correct then check if the standard outputs match the
+ // expectations. Note that stdout is only redirected to file for the
+ // last command in the pipeline.
+ //
+ // The thinking behind matching stderr first is that if it mismatches,
+ // then the program probably misbehaves (executes wrong functionality,
+ // etc) in which case its stdout doesn't really matter.
+ //
+ if (success)
+ success =
+ check_output (pr, esp, isp, err, ll, env, diag, "stderr") &&
+ (!last ||
+ check_output (pr, osp, isp, *out, ll, env, diag, "stdout"));
+
+ return success;
+ }
+
+ static bool
+ run_expr (environment& env,
+ const command_expr& expr,
+ size_t li, const location& ll,
+ bool diag)
+ {
+ // Commands are numbered sequentially throughout the expression
+ // starting with 1. Number 0 means the command is a single one.
+ //
+ size_t ci (expr.size () == 1 && expr.back ().pipe.size () == 1
+ ? 0
+ : 1);
+
+ // If there is no ORs to the right of a pipe then the pipe failure is
+ // fatal for the whole expression. In particular, the pipe must print
+ // the diagnostics on failure (if generally allowed). So we find the
+ // pipe that "switches on" the diagnostics potential printing.
+ //
+ command_expr::const_iterator trailing_ands; // Undefined if diag is
+ // disallowed.
+ if (diag)
+ {
+ auto i (expr.crbegin ());
+ for (; i != expr.crend () && i->op == expr_operator::log_and; ++i) ;
+ trailing_ands = i.base ();
+ }
+
+ bool r (false);
+ bool print (false);
+
+ for (auto b (expr.cbegin ()), i (b), e (expr.cend ()); i != e; ++i)
+ {
+ if (diag && i + 1 == trailing_ands)
+ print = true;
+
+ const command_pipe& p (i->pipe);
+ bool or_op (i->op == expr_operator::log_or);
+
+ // Short-circuit if the pipe result must be OR-ed with true or AND-ed
+ // with false.
+ //
+ if (!((or_op && r) || (!or_op && !r)))
+ r = run_pipe (
+ env, p.begin (), p.end (), auto_fd (), ci, li, ll, print);
+
+ ci += p.size ();
+ }
+
+ return r;
+ }
+
+ void
+ run (environment& env,
+ const command_expr& expr,
+ size_t li, const location& ll)
+ {
+ // Note that we don't print the expression at any verbosity level
+ // assuming that the caller does this, potentially providing some
+ // additional information (command type, etc).
+ //
+ if (!run_expr (env, expr, li, ll, true /* diag */))
+ throw failed (); // Assume diagnostics is already printed.
+ }
+
+ bool
+ run_if (environment& env,
+ const command_expr& expr,
+ size_t li, const location& ll)
+ {
+ // Note that we don't print the expression here (see above).
+ //
+ return run_expr (env, expr, li, ll, false /* diag */);
+ }
+
+ void
+ clean (environment& env, const location& ll)
+ {
+ context& ctx (env.context);
+ const dir_path& wdir (*env.work_dir.path);
+
+ // Note that we operate with normalized paths here.
+ //
+ // Remove special files. The order is not important as we don't
+ // expect directories here.
+ //
+ for (const path& p: env.special_cleanups)
+ {
+ // Remove the file if exists. Fail otherwise.
+ //
+ if (rmfile (ctx, p, 3) == rmfile_status::not_exist)
+ fail (ll) << "registered for cleanup special file " << p
+ << " does not exist";
+ }
+
+ // Remove files and directories in the order opposite to the order of
+ // cleanup registration.
+ //
+ for (const auto& c: reverse_iterate (env.cleanups))
+ {
+ cleanup_type t (c.type);
+
+ // Skip whenever the path exists or not.
+ //
+ if (t == cleanup_type::never)
+ continue;
+
+ const path& cp (c.path);
+
+ // Wildcard with the last component being '***' (without trailing
+ // separator) matches all files and sub-directories recursively as
+ // well as the start directories itself. So we will recursively
+ // remove the directories that match the parent (for the original
+ // path) directory wildcard.
+ //
+ bool recursive (cp.leaf ().representation () == "***");
+ const path& p (!recursive ? cp : cp.directory ());
+
+ // Remove files or directories using wildcard.
+ //
+ if (path_pattern (p))
+ {
+ bool removed (false);
+
+ auto rm = [&cp, recursive, &removed, &ll, &ctx, &wdir]
+ (path&& pe, const string&, bool interm)
+ {
+ if (!interm)
+ {
+ // While removing the entry we can get not_exist due to
+ // racing conditions, but that's ok if somebody did our job.
+ // Note that we still set the removed flag to true in this
+ // case.
+ //
+ removed = true; // Will be meaningless on failure.
+
+ if (pe.to_directory ())
+ {
+ dir_path d (path_cast<dir_path> (pe));
+
+ if (!recursive)
+ {
+ rmdir_status r (rmdir (ctx, d, 3));
+
+ if (r != rmdir_status::not_empty)
+ return true;
+
+ diag_record dr (fail (ll));
+ dr << "registered for cleanup directory " << d
+ << " is not empty";
+
+ print_dir (dr, d, ll);
+ dr << info << "wildcard: '" << cp << "'";
+ }
+ else
+ {
+ // Don't remove the working directory (it will be removed
+ // by the dedicated cleanup).
+ //
+ // Cast to uint16_t to avoid ambiguity with
+ // libbutl::rmdir_r().
+ //
+ rmdir_status r (rmdir_r (ctx, d, d != wdir, 3));
+
+ if (r != rmdir_status::not_empty)
+ return true;
+
+ // The directory is unlikely to be current but let's keep
+ // for completeness.
+ //
+ fail (ll) << "registered for cleanup wildcard " << cp
+ << " matches the current directory";
+ }
+ }
+ else
+ rmfile (ctx, pe, 3);
+ }
+
+ return true;
+ };
+
+ // Note that here we rely on the fact that recursive iterating
+ // goes depth-first (which make sense for the cleanup).
+ //
+ try
+ {
+ // Doesn't follow symlinks.
+ //
+ path_search (p,
+ rm,
+ dir_path () /* start */,
+ path_match_flags::none);
+ }
+ catch (const system_error& e)
+ {
+ fail (ll) << "unable to cleanup wildcard " << cp << ": " << e;
+ }
+
+ // Removal of no filesystem entries is not an error for 'maybe'
+ // cleanup type.
+ //
+ if (removed || t == cleanup_type::maybe)
+ continue;
+
+ fail (ll) << "registered for cleanup wildcard " << cp
+ << " doesn't match any "
+ << (recursive
+ ? "path"
+ : p.to_directory ()
+ ? "directory"
+ : "file");
+ }
+
+ // Remove the directory if exists and empty. Fail otherwise.
+ // Removal of non-existing directory is not an error for 'maybe'
+ // cleanup type.
+ //
+ if (p.to_directory ())
+ {
+ dir_path d (path_cast<dir_path> (p));
+ bool wd (d == wdir);
+
+ // Don't remove the working directory for the recursive cleanup
+ // (it will be removed by the dedicated one).
+ //
+ // Note that the root working directory contains the
+ // .buildignore file (see above).
+ //
+ // @@ If 'd' is a file then will fail with a diagnostics having
+ // no location info. Probably need to add an optional location
+ // parameter to rmdir() function. The same problem exists for
+ // a file cleanup when try to rmfile() directory instead of
+ // file.
+ //
+ rmdir_status r (recursive
+ ? rmdir_r (ctx, d, !wd, static_cast <uint16_t> (3))
+ : rmdir (ctx, d, 3));
+
+ if (r == rmdir_status::success ||
+ (r == rmdir_status::not_exist && t == cleanup_type::maybe))
+ continue;
+
+ diag_record dr (fail (ll));
+ dr << "registered for cleanup directory " << d
+ << (r == rmdir_status::not_exist ? " does not exist" :
+ !recursive ? " is not empty"
+ : " is current");
+
+ if (r == rmdir_status::not_empty)
+ print_dir (dr, d, ll);
+ }
+
+ // Remove the file if exists. Fail otherwise. Removal of
+ // non-existing file is not an error for 'maybe' cleanup type.
+ //
+ if (rmfile (ctx, p, 3) == rmfile_status::not_exist &&
+ t == cleanup_type::always)
+ fail (ll) << "registered for cleanup file " << p
+ << " does not exist";
+ }
+ }
+
+ void
+ print_dir (diag_record& dr, const dir_path& p, const location& ll)
+ {
+ try
+ {
+ size_t n (0);
+ for (const dir_entry& de: dir_iterator (p,
+ false /* ignore_dangling */))
+ {
+ if (n++ < 10)
+ dr << '\n' << (de.ltype () == entry_type::directory
+ ? path_cast<dir_path> (de.path ())
+ : de.path ());
+ }
+
+ if (n > 10)
+ dr << "\nand " << n - 10 << " more file(s)";
+ }
+ catch (const system_error& e)
+ {
+ fail (ll) << "unable to iterate over " << p << ": " << e;
+ }
+ }
+ }
+}
diff --git a/libbuild2/script/run.hxx b/libbuild2/script/run.hxx
new file mode 100644
index 0000000..477dd88
--- /dev/null
+++ b/libbuild2/script/run.hxx
@@ -0,0 +1,75 @@
+// file : libbuild2/script/run.hxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_SCRIPT_RUN_HXX
+#define LIBBUILD2_SCRIPT_RUN_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/script/script.hxx>
+
+namespace build2
+{
+ namespace script
+ {
+ // An exception that can be thrown by an expression running function to
+ // exit the script (for example, as a result of executing the exit builtin
+ // by the below run*() functions). The status indicates whether the
+ // execution should be considered to have succeeded or failed.
+ //
+ struct exit
+ {
+ bool status;
+
+ explicit
+ exit (bool s): status (s) {}
+ };
+
+ // Helpers.
+ //
+
+ // Command expression running functions.
+ //
+ // Index is the 1-base index of this command line in the command list.
+ // If it is 0 then it means there is only one command. This information
+ // can be used, for example, to derive file names.
+ //
+ // Location is the start position of this command line in the script. It
+ // can be used in diagnostics.
+ //
+ void
+ run (environment&, const command_expr&, size_t index, const location&);
+
+ bool
+ run_if (environment&, const command_expr&, size_t, const location&);
+
+ // Perform the registered special file cleanups in the direct order and
+ // then the regular cleanups in the reverse order.
+ //
+ void
+ clean (environment&, const location&);
+
+ // Print first 10 directory sub-entries to the diag record. The directory
+ // must exist. Is normally used while issuing diagnostics on non-empty
+ // directory removal failure.
+ //
+ void
+ print_dir (diag_record&, const dir_path&, const location&);
+
+ // Return the quoted path representation with the preserved trailing
+ // directory separator. The path is relative if the verbosity level is
+ // less than 3.
+ //
+ string
+ diag_path (const path&);
+
+ // Same as above, but prepends the path with a name, if present. The path
+ // must be not NULL.
+ //
+ string
+ diag_path (const dir_name_view&);
+ }
+}
+
+#endif // LIBBUILD2_SCRIPT_RUN_HXX
diff --git a/libbuild2/script/script.cxx b/libbuild2/script/script.cxx
new file mode 100644
index 0000000..c85bfd3
--- /dev/null
+++ b/libbuild2/script/script.cxx
@@ -0,0 +1,659 @@
+// file : libbuild2/script/script.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/script/script.hxx>
+
+#include <sstream>
+#include <cstring> // strchr()
+
+using namespace std;
+
+namespace build2
+{
+ namespace script
+ {
+ ostream&
+ operator<< (ostream& o, line_type lt)
+ {
+ const char* s (nullptr);
+
+ switch (lt)
+ {
+ case line_type::var: s = "variable"; break;
+ case line_type::cmd: s = "command"; break;
+ case line_type::cmd_if: s = "'if'"; break;
+ case line_type::cmd_ifn: s = "'if!'"; break;
+ case line_type::cmd_elif: s = "'elif'"; break;
+ case line_type::cmd_elifn: s = "'elif!'"; break;
+ case line_type::cmd_else: s = "'else'"; break;
+ case line_type::cmd_end: s = "'end'"; break;
+ }
+
+ return o << s;
+ }
+
+ void
+ dump (ostream& os, const string& ind, const lines& ls)
+ {
+ // For each line print its tokens literal representation trying to
+ // reproduce the quoting. Consider mixed quoting as double quoting
+ // since the information is lost.
+ //
+ // Also additionally indent the if-branch lines.
+ //
+ string if_ind;
+
+ for (const line& l: ls)
+ {
+ // Before printing indentation, decrease it if the else or end line is
+ // reached.
+ //
+ switch (l.type)
+ {
+ case line_type::cmd_elif:
+ case line_type::cmd_elifn:
+ case line_type::cmd_else:
+ case line_type::cmd_end:
+ {
+ size_t n (if_ind.size ());
+ assert (n >= 2);
+ if_ind.resize (n - 2);
+ break;
+ }
+ default: break;
+ }
+
+ // Print indentations.
+ //
+ os << ind << if_ind;
+
+ // After printing indentation, increase it for if/else branch.
+ //
+ switch (l.type)
+ {
+ case line_type::cmd_if:
+ case line_type::cmd_ifn:
+ case line_type::cmd_elif:
+ case line_type::cmd_elifn:
+ case line_type::cmd_else: if_ind += " "; break;
+ default: break;
+ }
+
+ // '"' or '\'' if we are inside the quoted token sequence and '\0'
+ // otherwise. Thus, can be used as bool.
+ //
+ char qseq ('\0');
+
+ for (const replay_token& rt: l.tokens)
+ {
+ const token& t (rt.token);
+
+ // '"' or '\'' if the token is quoted and '\0' otherwise. Thus,
+ // can be used as bool.
+ //
+ char qtok ('\0');
+
+ switch (t.qtype)
+ {
+ case quote_type::unquoted: qtok = '\0'; break;
+ case quote_type::single: qtok = '\''; break;
+ case quote_type::mixed:
+ case quote_type::double_: qtok = '"'; break;
+ }
+
+ // If being inside a quoted token sequence we have reached a token
+ // quoted differently or the newline, then we probably made a
+ // mistake misinterpreting some previous partially quoted token, for
+ // example f"oo" as "foo. If that's the case, all we can do is to
+ // end the sequence adding the trailing quote.
+ //
+ // Note that a token inside the quoted sequence may well be
+ // unquoted, so for example "$foo" is lexed as:
+ //
+ // token quoting complete notes
+ // '' " no
+ // $ " yes
+ // 'foo' Unquoted since lexed in variable mode.
+ // '' " no
+ // \n
+ //
+ if (qseq &&
+ ((qtok && qtok != qseq) || t.type == token_type::newline))
+ {
+ os << qseq;
+ qseq = '\0';
+ }
+
+ // Left and right token quotes (can be used as bool).
+ //
+ char lq ('\0');
+ char rq ('\0');
+
+ // If the token is quoted, then determine if/which quotes should be
+ // present on its sides and track the quoted token sequence.
+ //
+ if (qtok)
+ {
+ if (t.qcomp) // Complete token quoting.
+ {
+ // If we are inside a quoted token sequence then do noting.
+ // Otherwise just quote the current token not starting a
+ // sequence.
+ //
+ if (!qseq)
+ {
+ lq = qtok;
+ rq = qtok;
+ }
+ }
+ else // Partial token quoting.
+ {
+ // Note that we can not always reproduce the original tokens
+ // representation for partial quoting. For example, the two
+ // following tokens are lexed into the identical token objects:
+ //
+ // "foo
+ // f"oo"
+ //
+ // We will always assume that the partially quoted token either
+ // starts or ends the quoted token sequence. Sometimes this ends
+ // up unexpectedly, but seems there is not much we can do:
+ //
+ // f"oo" "ba"r -> "foo bar"
+ //
+ if (!qseq) // Start quoted sequence.
+ {
+ lq = qtok;
+ qseq = qtok;
+ }
+ else // End quoted sequence.
+ {
+ rq = qtok;
+ qseq = '\0';
+ }
+ }
+ }
+
+ // Print the space character prior to the separated token, unless
+ // it is a first like token or the newline.
+ //
+ if (t.separated &&
+ t.type != token_type::newline &&
+ &rt != &l.tokens[0])
+ os << ' ';
+
+ if (lq) os << lq; // Print the left quote, if required.
+
+ // Escape the special characters, unless the token in not a word or
+ // is single-quoted. Note that the special character set depends on
+ // whether the word is double-quoted or unquoted.
+ //
+ if (t.type == token_type::word && qtok != '\'')
+ {
+ for (char c: t.value)
+ {
+ if (strchr (qtok ? "\\\"" : "|&<>=\\\"", c) != nullptr)
+ os << '\\';
+
+ os << c;
+ }
+ }
+ else
+ t.printer (os, t, print_mode::raw);
+
+ if (rq) os << rq; // Print the right quote, if required.
+ }
+ }
+ }
+
+ // Quote if empty or contains spaces or any of the special characters.
+ // Note that we use single quotes since double quotes still allow
+ // expansion.
+ //
+ // @@ What if it contains single quotes?
+ //
+ static void
+ to_stream_q (ostream& o, const string& s)
+ {
+ if (s.empty () || s.find_first_of (" |&<>=\\\"") != string::npos)
+ o << '\'' << s << '\'';
+ else
+ o << s;
+ };
+
+ void
+ to_stream (ostream& o, const command& c, command_to_stream m)
+ {
+ auto print_path = [&o] (const path& p)
+ {
+ using build2::operator<<;
+
+ ostringstream s;
+ stream_verb (s, stream_verb (o));
+ s << p;
+
+ to_stream_q (o, s.str ());
+ };
+
+ auto print_redirect = [&o, print_path] (const redirect& r, int fd)
+ {
+ const redirect& er (r.effective ());
+
+ // Print the none redirect (no data allowed) if/when the respective
+ // syntax is invented.
+ //
+ if (er.type == redirect_type::none)
+ return;
+
+ o << ' ';
+
+ // Print the redirect file descriptor.
+ //
+ if (fd == 2)
+ o << fd;
+
+ // Print the redirect original representation and the modifiers, if
+ // present.
+ //
+ r.token.printer (o, r.token, print_mode::raw);
+
+ // Print the rest of the redirect (file path, etc).
+ //
+ switch (er.type)
+ {
+ case redirect_type::none: assert (false); break;
+ case redirect_type::here_doc_ref: assert (false); break;
+
+ case redirect_type::pass:
+ case redirect_type::null:
+ case redirect_type::trace: break;
+ case redirect_type::merge: o << er.fd; break;
+
+ case redirect_type::file:
+ {
+ print_path (er.file.path);
+ break;
+ }
+
+ case redirect_type::here_str_literal:
+ case redirect_type::here_doc_literal:
+ {
+ if (er.type == redirect_type::here_doc_literal)
+ o << er.end;
+ else
+ {
+ const string& v (er.str);
+ to_stream_q (o,
+ er.modifiers ().find (':') == string::npos
+ ? string (v, 0, v.size () - 1) // Strip newline.
+ : v);
+ }
+
+ break;
+ }
+
+ case redirect_type::here_str_regex:
+ case redirect_type::here_doc_regex:
+ {
+ const regex_lines& re (er.regex);
+
+ if (er.type == redirect_type::here_doc_regex)
+ o << re.intro + er.end + re.intro + re.flags;
+ else
+ {
+ assert (!re.lines.empty ()); // Regex can't be empty.
+
+ regex_line l (re.lines[0]);
+ to_stream_q (o, re.intro + l.value + re.intro + l.flags);
+ }
+
+ break;
+ }
+ }
+ };
+
+ auto print_doc = [&o] (const redirect& r)
+ {
+ o << endl;
+
+ if (r.type == redirect_type::here_doc_literal)
+ o << r.str;
+ else
+ {
+ assert (r.type == redirect_type::here_doc_regex);
+
+ const regex_lines& rl (r.regex);
+
+ for (auto b (rl.lines.cbegin ()), i (b), e (rl.lines.cend ());
+ i != e; ++i)
+ {
+ if (i != b)
+ o << endl;
+
+ const regex_line& l (*i);
+
+ if (l.regex) // Regex (possibly empty),
+ o << rl.intro << l.value << rl.intro << l.flags;
+ else if (!l.special.empty ()) // Special literal.
+ o << rl.intro;
+ else // Textual literal.
+ o << l.value;
+
+ o << l.special;
+ }
+ }
+
+ o << (r.modifiers ().find (':') == string::npos ? "" : "\n") << r.end;
+ };
+
+ if ((m & command_to_stream::header) == command_to_stream::header)
+ {
+ // Program.
+ //
+ to_stream_q (o, c.program.string ());
+
+ // Arguments.
+ //
+ for (const string& a: c.arguments)
+ {
+ o << ' ';
+ to_stream_q (o, a);
+ }
+
+ // Redirects.
+ //
+ if (c.in)
+ print_redirect (*c.in, 0);
+
+ if (c.out)
+ print_redirect (*c.out, 1);
+
+ if (c.err)
+ print_redirect (*c.err, 2);
+
+ for (const auto& p: c.cleanups)
+ {
+ o << " &";
+
+ if (p.type != cleanup_type::always)
+ o << (p.type == cleanup_type::maybe ? '?' : '!');
+
+ print_path (p.path);
+ }
+
+ if (c.exit.comparison != exit_comparison::eq || c.exit.code != 0)
+ {
+ switch (c.exit.comparison)
+ {
+ case exit_comparison::eq: o << " == "; break;
+ case exit_comparison::ne: o << " != "; break;
+ }
+
+ o << static_cast<uint16_t> (c.exit.code);
+ }
+ }
+
+ if ((m & command_to_stream::here_doc) == command_to_stream::here_doc)
+ {
+ // Here-documents.
+ //
+ if (c.in &&
+ (c.in->type == redirect_type::here_doc_literal ||
+ c.in->type == redirect_type::here_doc_regex))
+ print_doc (*c.in);
+
+ if (c.out &&
+ (c.out->type == redirect_type::here_doc_literal ||
+ c.out->type == redirect_type::here_doc_regex))
+ print_doc (*c.out);
+
+ if (c.err &&
+ (c.err->type == redirect_type::here_doc_literal ||
+ c.err->type == redirect_type::here_doc_regex))
+ print_doc (*c.err);
+ }
+ }
+
+ void
+ to_stream (ostream& o, const command_pipe& p, command_to_stream m)
+ {
+ if ((m & command_to_stream::header) == command_to_stream::header)
+ {
+ for (auto b (p.begin ()), i (b); i != p.end (); ++i)
+ {
+ if (i != b)
+ o << " | ";
+
+ to_stream (o, *i, command_to_stream::header);
+ }
+ }
+
+ if ((m & command_to_stream::here_doc) == command_to_stream::here_doc)
+ {
+ for (const command& c: p)
+ to_stream (o, c, command_to_stream::here_doc);
+ }
+ }
+
+ void
+ to_stream (ostream& o, const command_expr& e, command_to_stream m)
+ {
+ if ((m & command_to_stream::header) == command_to_stream::header)
+ {
+ for (auto b (e.begin ()), i (b); i != e.end (); ++i)
+ {
+ if (i != b)
+ {
+ switch (i->op)
+ {
+ case expr_operator::log_or: o << " || "; break;
+ case expr_operator::log_and: o << " && "; break;
+ }
+ }
+
+ to_stream (o, i->pipe, command_to_stream::header);
+ }
+ }
+
+ if ((m & command_to_stream::here_doc) == command_to_stream::here_doc)
+ {
+ for (const expr_term& t: e)
+ to_stream (o, t.pipe, command_to_stream::here_doc);
+ }
+ }
+
+ // redirect
+ //
+ redirect::
+ redirect (redirect_type t)
+ : type (t)
+ {
+ switch (type)
+ {
+ case redirect_type::none:
+ case redirect_type::pass:
+ case redirect_type::null:
+ case redirect_type::trace:
+ case redirect_type::merge: break;
+
+ case redirect_type::here_str_literal:
+ case redirect_type::here_doc_literal: new (&str) string (); break;
+
+ case redirect_type::here_str_regex:
+ case redirect_type::here_doc_regex:
+ {
+ new (&regex) regex_lines ();
+ break;
+ }
+
+ case redirect_type::file: new (&file) file_type (); break;
+
+ case redirect_type::here_doc_ref: assert (false); break;
+ }
+ }
+
+ redirect::
+ redirect (redirect&& r) noexcept
+ : type (r.type),
+ token (move (r.token)),
+ end (move (r.end)),
+ end_line (r.end_line),
+ end_column (r.end_column)
+ {
+ switch (type)
+ {
+ case redirect_type::none:
+ case redirect_type::pass:
+ case redirect_type::null:
+ case redirect_type::trace: break;
+
+ case redirect_type::merge: fd = r.fd; break;
+
+ case redirect_type::here_str_literal:
+ case redirect_type::here_doc_literal:
+ {
+ new (&str) string (move (r.str));
+ break;
+ }
+ case redirect_type::here_str_regex:
+ case redirect_type::here_doc_regex:
+ {
+ new (&regex) regex_lines (move (r.regex));
+ break;
+ }
+ case redirect_type::file:
+ {
+ new (&file) file_type (move (r.file));
+ break;
+ }
+ case redirect_type::here_doc_ref:
+ {
+ new (&ref) reference_wrapper<const redirect> (r.ref);
+ break;
+ }
+ }
+ }
+
+ redirect& redirect::
+ operator= (redirect&& r) noexcept
+ {
+ if (this != &r)
+ {
+ this->~redirect ();
+ new (this) redirect (move (r)); // Assume noexcept move-constructor.
+ }
+ return *this;
+ }
+
+ redirect::
+ ~redirect ()
+ {
+ switch (type)
+ {
+ case redirect_type::none:
+ case redirect_type::pass:
+ case redirect_type::null:
+ case redirect_type::trace:
+ case redirect_type::merge: break;
+
+ case redirect_type::here_str_literal:
+ case redirect_type::here_doc_literal: str.~string (); break;
+
+ case redirect_type::here_str_regex:
+ case redirect_type::here_doc_regex: regex.~regex_lines (); break;
+
+ case redirect_type::file: file.~file_type (); break;
+
+ case redirect_type::here_doc_ref:
+ {
+ ref.~reference_wrapper<const redirect> ();
+ break;
+ }
+ }
+ }
+
+ redirect::
+ redirect (const redirect& r)
+ : type (r.type),
+ token (r.token),
+ end (r.end),
+ end_line (r.end_line),
+ end_column (r.end_column)
+ {
+ switch (type)
+ {
+ case redirect_type::none:
+ case redirect_type::pass:
+ case redirect_type::null:
+ case redirect_type::trace: break;
+
+ case redirect_type::merge: fd = r.fd; break;
+
+ case redirect_type::here_str_literal:
+ case redirect_type::here_doc_literal:
+ {
+ new (&str) string (r.str);
+ break;
+ }
+ case redirect_type::here_str_regex:
+ case redirect_type::here_doc_regex:
+ {
+ new (&regex) regex_lines (r.regex);
+ break;
+ }
+ case redirect_type::file:
+ {
+ new (&file) file_type (r.file);
+ break;
+ }
+ case redirect_type::here_doc_ref:
+ {
+ new (&ref) reference_wrapper<const redirect> (r.ref);
+ break;
+ }
+ }
+ }
+
+ redirect& redirect::
+ operator= (const redirect& r)
+ {
+ if (this != &r)
+ *this = redirect (r); // Reduce to move-assignment.
+ return *this;
+ }
+
+ // environment
+ //
+ void environment::
+ clean (script::cleanup c, bool implicit)
+ {
+ using script::cleanup;
+
+ assert (!implicit || c.type == cleanup_type::always);
+
+ const path& p (c.path);
+
+ if (sandbox_dir.path != nullptr && !p.sub (*sandbox_dir.path))
+ {
+ if (implicit)
+ return;
+ else
+ assert (false); // Error so should have been checked.
+ }
+
+ auto pr = [&p] (const cleanup& v) -> bool {return v.path == p;};
+ auto i (find_if (cleanups.begin (), cleanups.end (), pr));
+
+ if (i == cleanups.end ())
+ cleanups.emplace_back (move (c));
+ else if (!implicit)
+ i->type = c.type;
+ }
+
+ void environment::
+ clean_special (path p)
+ {
+ special_cleanups.emplace_back (move (p));
+ }
+ }
+}
diff --git a/libbuild2/script/script.hxx b/libbuild2/script/script.hxx
new file mode 100644
index 0000000..f4998b7
--- /dev/null
+++ b/libbuild2/script/script.hxx
@@ -0,0 +1,471 @@
+// file : libbuild2/script/script.hxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_SCRIPT_SCRIPT_HXX
+#define LIBBUILD2_SCRIPT_SCRIPT_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/forward.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/token.hxx>
+#include <libbuild2/variable.hxx>
+
+namespace build2
+{
+ namespace script
+ {
+ // Pre-parsed representation.
+ //
+
+ enum class line_type
+ {
+ var,
+ cmd,
+ cmd_if,
+ cmd_ifn,
+ cmd_elif,
+ cmd_elifn,
+ cmd_else,
+ cmd_end
+ };
+
+ ostream&
+ operator<< (ostream&, line_type);
+
+ struct line
+ {
+ line_type type;
+ replay_tokens tokens;
+
+ union
+ {
+ const variable* var; // Pre-entered for line_type::var.
+ };
+ };
+
+ // Most of the time we will have just one line (a command).
+ //
+ using lines = small_vector<line, 1>;
+
+ // Print the script lines, trying to reproduce their original (non-
+ // expanded) representation.
+ //
+ // Note that the exact spacing and partial quoting may not be restored due
+ // to the information loss.
+ //
+ void
+ dump (ostream&, const string& ind, const lines&);
+
+ // Parse object model.
+ //
+
+ // redirect
+ //
+ enum class redirect_type
+ {
+ // No data is allowed to be read or written.
+ //
+ // Note that redirect of this type cannot be currently specified on the
+ // script command line and can only be set via the environment object
+ // as a default redirect (see below).
+ //
+ none,
+
+ pass,
+ null,
+ trace,
+ merge,
+ here_str_literal,
+ here_str_regex,
+ here_doc_literal,
+ here_doc_regex,
+ here_doc_ref, // Reference to here_doc literal or regex.
+ file,
+ };
+
+ // Pre-parsed (but not instantiated) regex lines. The idea here is that
+ // we should be able to re-create their (more or less) exact text
+ // representation for diagnostics but also instantiate without any
+ // re-parsing.
+ //
+ struct regex_line
+ {
+ // If regex is true, then value is the regex expression. Otherwise, it
+ // is a literal. Note that special characters can be present in both
+ // cases. For example, //+ is a regex, while /+ is a literal, both
+ // with '+' as a special character. Flags are only valid for regex.
+ // Literals falls apart into textual (has no special characters) and
+ // special (has just special characters instead) ones. For example
+ // foo is a textual literal, while /.+ is a special one. Note that
+ // literal must not have value and special both non-empty.
+ //
+ bool regex;
+
+ string value;
+ string flags;
+ string special;
+
+ uint64_t line;
+ uint64_t column;
+
+ // Create regex with optional special characters.
+ //
+ regex_line (uint64_t l, uint64_t c,
+ string v, string f, string s = string ())
+ : regex (true),
+ value (move (v)),
+ flags (move (f)),
+ special (move (s)),
+ line (l),
+ column (c) {}
+
+ // Create a literal, either text or special.
+ //
+ regex_line (uint64_t l, uint64_t c, string v, bool s)
+ : regex (false),
+ value (s ? string () : move (v)),
+ special (s ? move (v) : string ()),
+ line (l),
+ column (c) {}
+ };
+
+ struct regex_lines
+ {
+ char intro; // Introducer character.
+ string flags; // Global flags (here-document).
+
+ small_vector<regex_line, 8> lines;
+ };
+
+ // Output file redirect mode.
+ //
+ enum class redirect_fmode
+ {
+ compare,
+ overwrite,
+ append
+ };
+
+ struct redirect
+ {
+ redirect_type type;
+
+ struct file_type
+ {
+ using path_type = build2::path;
+ path_type path;
+ redirect_fmode mode; // Meaningless for input redirect.
+ };
+
+ union
+ {
+ int fd; // Merge-to descriptor.
+ string str; // Note: with trailing newline, if requested.
+ regex_lines regex; // Note: with trailing blank, if requested.
+ file_type file;
+ reference_wrapper<const redirect> ref; // Note: no chains.
+ };
+
+ // Modifiers and the original representation (potentially an alias).
+ //
+ build2::token token;
+
+ string end; // Here-document end marker (no regex intro/flags).
+ uint64_t end_line; // Here-document end marker location.
+ uint64_t end_column;
+
+ // Create redirect of a type other than reference.
+ //
+ explicit
+ redirect (redirect_type);
+
+ // Create redirect of the reference type.
+ //
+ redirect (redirect_type t, const redirect& r, build2::token tk)
+ : type (redirect_type::here_doc_ref),
+ ref (r),
+ token (move (tk))
+ {
+ // There is no support (and need) for reference chains.
+ //
+ assert (t == redirect_type::here_doc_ref &&
+ r.type != redirect_type::here_doc_ref);
+ }
+
+ // Create redirect of the merge type.
+ //
+ // Note that it's the caller's responsibility to make sure that the file
+ // descriptor is valid for this redirect (2 for stdout, etc).
+ //
+ redirect (redirect_type t, int f)
+ : type (redirect_type::merge), fd (f)
+ {
+ assert (t == redirect_type::merge && (f == 1 || f == 2));
+ }
+
+ redirect (redirect&&) noexcept;
+ redirect& operator= (redirect&&) noexcept;
+
+ // @@ Defining optional movable-only redirects in the command class make
+ // the older C++ compilers (GCC 4.9, Clang 4, VC 15) fail to compile the
+ // command vector manipulating code. Thus, we make the redirect class
+ // copyable to workaround the issue.
+ //
+ redirect (const redirect&);
+ redirect& operator= (const redirect&);
+
+ ~redirect ();
+
+ const redirect&
+ effective () const noexcept
+ {
+ return type == redirect_type::here_doc_ref ? ref.get () : *this;
+ }
+
+ const string&
+ modifiers () const noexcept
+ {
+ return token.value;
+ }
+ };
+
+ // cleanup
+ //
+ enum class cleanup_type
+ {
+ always, // &foo - cleanup, fail if does not exist.
+ maybe, // &?foo - cleanup, ignore if does not exist.
+ never // &!foo - don’t cleanup, ignore if doesn’t exist.
+ };
+
+ // File or directory to be automatically cleaned up at the end of the
+ // script execution. If the path ends with a trailing slash, then it is
+ // assumed to be a directory, otherwise -- a file. A directory that is
+ // about to be cleaned up must be empty.
+ //
+ // The last component in the path may contain a wildcard that have the
+ // following semantics:
+ //
+ // dir/* - remove all immediate files
+ // dir/*/ - remove all immediate sub-directories (must be empty)
+ // dir/** - remove all files recursively
+ // dir/**/ - remove all sub-directories recursively (must be empty)
+ // dir/*** - remove directory dir with all files and sub-directories
+ // recursively
+ //
+ struct cleanup
+ {
+ cleanup_type type;
+ build2::path path;
+ };
+ using cleanups = vector<cleanup>;
+
+ // command_exit
+ //
+ enum class exit_comparison {eq, ne};
+
+ struct command_exit
+ {
+ // C/C++ don't apply constraints on program exit code other than it
+ // being of type int.
+ //
+ // POSIX specifies that only the least significant 8 bits shall be
+ // available from wait() and waitpid(); the full value shall be
+ // available from waitid() (read more at _Exit, _exit Open Group
+ // spec).
+ //
+ // While the Linux man page for waitid() doesn't mention any
+ // deviations from the standard, the FreeBSD implementation (as of
+ // version 11.0) only returns 8 bits like the other wait*() calls.
+ //
+ // Windows supports 32-bit exit codes.
+ //
+ // Note that in shells some exit values can have special meaning so
+ // using them can be a source of confusion. For bash values in the
+ // [126, 255] range are such a special ones (see Appendix E, "Exit
+ // Codes With Special Meanings" in the Advanced Bash-Scripting Guide).
+ //
+ exit_comparison comparison;
+ uint8_t code;
+ };
+
+ // command
+ //
+ struct command
+ {
+ path program;
+ strings arguments;
+
+ optional<redirect> in;
+ optional<redirect> out;
+ optional<redirect> err;
+
+ script::cleanups cleanups;
+
+ command_exit exit {exit_comparison::eq, 0};
+ };
+
+ enum class command_to_stream: uint16_t
+ {
+ header = 0x01,
+ here_doc = 0x02, // Note: printed on a new line.
+ all = header | here_doc
+ };
+
+ void
+ to_stream (ostream&, const command&, command_to_stream);
+
+ ostream&
+ operator<< (ostream&, const command&);
+
+ // command_pipe
+ //
+ using command_pipe = vector<command>;
+
+ void
+ to_stream (ostream&, const command_pipe&, command_to_stream);
+
+ ostream&
+ operator<< (ostream&, const command_pipe&);
+
+ // command_expr
+ //
+ enum class expr_operator {log_or, log_and};
+
+ struct expr_term
+ {
+ expr_operator op; // OR-ed to an implied false for the first term.
+ command_pipe pipe;
+ };
+
+ using command_expr = vector<expr_term>;
+
+ void
+ to_stream (ostream&, const command_expr&, command_to_stream);
+
+ ostream&
+ operator<< (ostream&, const command_expr&);
+
+ // Script execution environment.
+ //
+ class environment
+ {
+ public:
+ build2::context& context;
+
+ // The platform script programs run on.
+ //
+ const target_triplet& host;
+
+ // The work directory is used as the builtin/process CWD and to complete
+ // relative paths. Any attempt to remove or move this directory (or its
+ // parent directory) using the rm or mv builtins will fail. Must be an
+ // absolute path.
+ //
+ const dir_name_view work_dir;
+
+ // If the sanbox directory is not NULL, then any attempt to remove or
+ // move a filesystem entry outside this directory using an explicit
+ // cleanup or the rm/mv builtins will fail, unless the --force option is
+ // specified for the builtin. Must be an absolute path.
+ //
+ const dir_name_view sandbox_dir;
+
+ // The temporary directory is used by the script running machinery to
+ // create special files. Must be an absolute path, unless empty. Can be
+ // empty until the create_temp_dir() function call, which can be used
+ // for creating this directory on demand.
+ //
+ const dir_path& temp_dir;
+
+ // If true, the temporary directory will not be removed on the script
+ // failure. In particular, this allows the script running machinery to
+ // refer to the special files in diagnostics.
+ //
+ const bool temp_dir_keep;
+
+ // Default process streams redirects.
+ //
+ // If a stream redirect is not specified on the script command line,
+ // then the respective redirect data member will be used as the default.
+ //
+ const redirect in;
+ const redirect out;
+ const redirect err;
+
+ environment (build2::context& ctx,
+ const target_triplet& h,
+ const dir_name_view& wd,
+ const dir_name_view& sd,
+ const dir_path& td, bool tk,
+ redirect&& i = redirect (redirect_type::pass),
+ redirect&& o = redirect (redirect_type::pass),
+ redirect&& e = redirect (redirect_type::pass))
+ : context (ctx), host (h),
+ work_dir (wd), sandbox_dir (sd), temp_dir (td), temp_dir_keep (tk),
+ in (move (i)), out (move (o)), err (move (e))
+ {
+ }
+
+ // Create environment without the sandbox.
+ //
+ environment (build2::context& ctx,
+ const target_triplet& h,
+ const dir_name_view& wd,
+ const dir_path& td, bool tk,
+ redirect&& i = redirect (redirect_type::pass),
+ redirect&& o = redirect (redirect_type::pass),
+ redirect&& e = redirect (redirect_type::pass))
+ : environment (ctx, h,
+ wd, dir_name_view (), td, tk,
+ move (i), move (o), move (e))
+ {
+ }
+
+ // Cleanup.
+ //
+ public:
+ script::cleanups cleanups;
+ paths special_cleanups;
+
+ // Register a cleanup. If the cleanup is explicit, then override the
+ // cleanup type if this path is already registered. Ignore implicit
+ // registration of a path outside root directory (see below).
+ //
+ void
+ clean (cleanup, bool implicit);
+
+ // Register cleanup of a special file. Such files are created to
+ // maintain the script running machinery and must be removed first, not
+ // to interfere with the user-defined wildcard cleanups if the working
+ // and temporary directories are the same.
+ //
+ void
+ clean_special (path);
+
+ public:
+ // Set variable value with optional (non-empty) attributes.
+ //
+ virtual void
+ set_variable (string&& name,
+ names&&,
+ const string& attrs,
+ const location&) = 0;
+
+ // Create the temporary directory and set the temp_dir reference target
+ // to its path. Must only be called if temp_dir is empty.
+ //
+ virtual void
+ create_temp_dir () = 0;
+
+ public:
+ virtual
+ ~environment () = default;
+ };
+ }
+}
+
+#include <libbuild2/script/script.ixx>
+
+#endif // LIBBUILD2_SCRIPT_SCRIPT_HXX
diff --git a/libbuild2/script/script.ixx b/libbuild2/script/script.ixx
new file mode 100644
index 0000000..56043b2
--- /dev/null
+++ b/libbuild2/script/script.ixx
@@ -0,0 +1,56 @@
+// file : libbuild2/script/script.ixx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+namespace build2
+{
+ namespace script
+ {
+ inline command_to_stream
+ operator&= (command_to_stream& x, command_to_stream y)
+ {
+ return x = static_cast<command_to_stream> (
+ static_cast<uint16_t> (x) & static_cast<uint16_t> (y));
+ }
+
+ inline command_to_stream
+ operator|= (command_to_stream& x, command_to_stream y)
+ {
+ return x = static_cast<command_to_stream> (
+ static_cast<uint16_t> (x) | static_cast<uint16_t> (y));
+ }
+
+ inline command_to_stream
+ operator& (command_to_stream x, command_to_stream y) {return x &= y;}
+
+ inline command_to_stream
+ operator| (command_to_stream x, command_to_stream y) {return x |= y;}
+
+
+ // command
+ //
+ inline ostream&
+ operator<< (ostream& o, const command& c)
+ {
+ to_stream (o, c, command_to_stream::all);
+ return o;
+ }
+
+ // command_pipe
+ //
+ inline ostream&
+ operator<< (ostream& o, const command_pipe& p)
+ {
+ to_stream (o, p, command_to_stream::all);
+ return o;
+ }
+
+ // command_expr
+ //
+ inline ostream&
+ operator<< (ostream& o, const command_expr& e)
+ {
+ to_stream (o, e, command_to_stream::all);
+ return o;
+ }
+ }
+}
diff --git a/libbuild2/script/token.cxx b/libbuild2/script/token.cxx
new file mode 100644
index 0000000..1c612a5
--- /dev/null
+++ b/libbuild2/script/token.cxx
@@ -0,0 +1,53 @@
+// file : libbuild2/script/token.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/script/token.hxx>
+
+using namespace std;
+
+namespace build2
+{
+ namespace script
+ {
+ void
+ token_printer (ostream& os, const token& t, print_mode m)
+ {
+ const string& v (t.value);
+
+ // Only quote non-name tokens for diagnostics.
+ //
+ const char* q (m == print_mode::diagnostics ? "'" : "");
+
+ switch (t.type)
+ {
+ case token_type::clean: os << q << '&' << v << q; break;
+ case token_type::pipe: os << q << '|' << q; break;
+
+ case token_type::in_pass: os << q << "<|" << q; break;
+ case token_type::in_null: os << q << "<-" << q; break;
+ case token_type::in_file: os << q << "<=" << q; break;
+ case token_type::in_doc: os << q << "<<=" << v << q; break;
+ case token_type::in_str: os << q << "<<<=" << v << q; break;
+
+ case token_type::out_pass: os << q << ">|" << q; break;
+ case token_type::out_null: os << q << ">-" << q; break;
+ case token_type::out_trace: os << q << ">!" << q; break;
+ case token_type::out_merge: os << q << ">&" << q; break;
+ case token_type::out_file_ovr: os << q << ">=" << q; break;
+ case token_type::out_file_app: os << q << ">+" << q; break;
+ case token_type::out_file_cmp: os << q << ">?" << q; break;
+ case token_type::out_doc: os << q << ">>?" << v << q; break;
+ case token_type::out_str: os << q << ">>>?" << v << q; break;
+
+ case token_type::in_l: os << q << '<' << v << q; break;
+ case token_type::in_ll: os << q << "<<" << v << q; break;
+ case token_type::in_lll: os << q << "<<<" << v << q; break;
+ case token_type::out_g: os << q << '>' << v << q; break;
+ case token_type::out_gg: os << q << ">>" << v << q; break;
+ case token_type::out_ggg: os << q << ">>>" << v << q; break;
+
+ default: build2::token_printer (os, t, m);
+ }
+ }
+ }
+}
diff --git a/libbuild2/script/token.hxx b/libbuild2/script/token.hxx
new file mode 100644
index 0000000..0186bd9
--- /dev/null
+++ b/libbuild2/script/token.hxx
@@ -0,0 +1,66 @@
+// file : libbuild2/script/token.hxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_SCRIPT_TOKEN_HXX
+#define LIBBUILD2_SCRIPT_TOKEN_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/token.hxx>
+
+namespace build2
+{
+ namespace script
+ {
+ struct token_type: build2::token_type
+ {
+ using base_type = build2::token_type;
+
+ enum
+ {
+ // NOTE: remember to update token_printer()!
+
+ pipe = base_type::value_next, // |
+ clean, // &{?!} (modifiers in value)
+
+ in_pass, // <|
+ in_null, // <-
+ in_file, // <=
+ in_doc, // <<={:/} (modifiers in value)
+ in_str, // <<<={:/} (modifiers in value)
+
+ out_pass, // >|
+ out_null, // >-
+ out_trace, // >!
+ out_merge, // >&
+ out_file_ovr, // >=
+ out_file_app, // >+
+ out_file_cmp, // >?
+ out_doc, // >>?{:/~} (modifiers in value)
+ out_str, // >>>?{:/~} (modifiers in value)
+
+ // The modifiers are in the token value, if the redirect the alias
+ // resolves to supports the modifiers.
+ //
+ in_l, // <
+ in_ll, // <<
+ in_lll, // <<<
+ out_g, // >
+ out_gg, // >>
+ out_ggg, // >>>
+
+ value_next
+ };
+
+ token_type () = default;
+ token_type (value_type v): base_type (v) {}
+ token_type (base_type v): base_type (v) {}
+ };
+
+ void
+ token_printer (ostream&, const token&, print_mode);
+ }
+}
+
+#endif // LIBBUILD2_SCRIPT_TOKEN_HXX
diff --git a/libbuild2/target-key.hxx b/libbuild2/target-key.hxx
index 0096d46..62bcc25 100644
--- a/libbuild2/target-key.hxx
+++ b/libbuild2/target-key.hxx
@@ -32,10 +32,18 @@ namespace build2
bool is_a () const {return type->is_a<T> ();}
bool is_a (const target_type& tt) const {return type->is_a (tt);}
- // Return the target name or a pair of names if out-qualified.
+ // Append/return the target name or a pair of names if out-qualified.
//
+ void
+ as_name (names&) const;
+
names
- as_name () const;
+ as_name () const
+ {
+ names r;
+ as_name (r);
+ return r;
+ }
};
inline bool
diff --git a/libbuild2/target.cxx b/libbuild2/target.cxx
index 83ed4a5..b9cfea7 100644
--- a/libbuild2/target.cxx
+++ b/libbuild2/target.cxx
@@ -44,11 +44,9 @@ namespace build2
// target_key
//
- names target_key::
- as_name () const
+ void target_key::
+ as_name (names& r) const
{
- names r;
-
string v (*name);
target::combine_name (v, ext, false /* @@ TODO: what to do? */);
@@ -56,11 +54,9 @@ namespace build2
if (!out->empty ())
{
- r.front ().pair = '@';
+ r.back ().pair = '@';
r.push_back (build2::name (*out));
}
-
- return r;
}
// target_state
@@ -964,8 +960,8 @@ namespace build2
phase_switch ps (t.ctx, run_phase::load);
// This is subtle: while we were fussing around another thread may
- // have loaded the buildfile. So re-test now that we are in exclusive
- // phase.
+ // have loaded the buildfile. So re-test now that we are in an
+ // exclusive phase.
//
if (e == nullptr)
e = search_existing_target (t.ctx, pk);
diff --git a/libbuild2/target.hxx b/libbuild2/target.hxx
index 9975f33..72b7acc 100644
--- a/libbuild2/target.hxx
+++ b/libbuild2/target.hxx
@@ -264,6 +264,9 @@ namespace build2
names
as_name () const;
+ void
+ as_name (names&) const;
+
// Scoping.
//
public:
@@ -410,6 +413,11 @@ namespace build2
value&
append (const variable&);
+ // Ad hoc recipes.
+ //
+ public:
+ vector<adhoc_recipe> adhoc_recipes;
+
// Target operation state.
//
public:
@@ -578,6 +586,9 @@ namespace build2
// This function can only be called during execution if we have observed
// (synchronization-wise) that this target has been executed.
//
+ // It can also be called during the serial load phase (but make sure you
+ // understand what you are doing).
+ //
target_state
executed_state (action, bool fail = true) const;
diff --git a/libbuild2/target.ixx b/libbuild2/target.ixx
index bb30c9c..611e562 100644
--- a/libbuild2/target.ixx
+++ b/libbuild2/target.ixx
@@ -36,6 +36,12 @@ namespace build2
return key ().as_name ();
}
+ inline void target::
+ as_name (names& r) const
+ {
+ return key ().as_name (r);
+ }
+
inline auto target::
prerequisites () const -> const prerequisites_type&
{
@@ -114,8 +120,6 @@ namespace build2
inline pair<bool, target_state> target::
matched_state_impl (action a) const
{
- assert (ctx.phase == run_phase::match);
-
// Note that the "tried" state is "final".
//
const opstate& s (state[a]);
@@ -138,13 +142,14 @@ namespace build2
inline target_state target::
executed_state_impl (action a) const
{
- assert (ctx.phase == run_phase::execute);
return (group_state (a) ? group->state : state)[a].state;
}
inline target_state target::
matched_state (action a, bool fail) const
{
+ assert (ctx.phase == run_phase::match);
+
// Note that the target could be being asynchronously re-matched.
//
pair<bool, target_state> r (matched_state_impl (a));
@@ -158,6 +163,8 @@ namespace build2
inline pair<bool, target_state> target::
try_matched_state (action a, bool fail) const
{
+ assert (ctx.phase == run_phase::match);
+
pair<bool, target_state> r (matched_state_impl (a));
if (fail && r.first && r.second == target_state::failed)
@@ -169,6 +176,8 @@ namespace build2
inline target_state target::
executed_state (action a, bool fail) const
{
+ assert (ctx.phase == run_phase::execute || ctx.phase == run_phase::load);
+
target_state r (executed_state_impl (a));
if (fail && r == target_state::failed)
@@ -193,6 +202,8 @@ namespace build2
inline bool target::
unchanged (action a) const
{
+ assert (ctx.phase == run_phase::match);
+
return matched_state_impl (a).second == target_state::unchanged;
}
diff --git a/libbuild2/test/init.cxx b/libbuild2/test/init.cxx
index 16891c6..7a07e76 100644
--- a/libbuild2/test/init.cxx
+++ b/libbuild2/test/init.cxx
@@ -14,8 +14,6 @@
#include <libbuild2/test/target.hxx>
#include <libbuild2/test/operation.hxx>
-#include <libbuild2/test/script/regex.hxx> // script::regex::init()
-
using namespace std;
using namespace butl;
@@ -226,8 +224,6 @@ namespace build2
const module_functions*
build2_test_load ()
{
- script::regex::init ();
-
return mod_functions;
}
}
diff --git a/libbuild2/test/script/builtin-options.cxx b/libbuild2/test/script/builtin-options.cxx
deleted file mode 100644
index 6b6afe0..0000000
--- a/libbuild2/test/script/builtin-options.cxx
+++ /dev/null
@@ -1,667 +0,0 @@
-// -*- C++ -*-
-//
-// This file was generated by CLI, a command line interface
-// compiler for C++.
-//
-
-// Begin prologue.
-//
-//
-// End prologue.
-
-#include <libbuild2/test/script/builtin-options.hxx>
-
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-#include <ostream>
-#include <sstream>
-
-namespace build2
-{
- namespace test
- {
- namespace script
- {
- namespace cli
- {
- // unknown_option
- //
- unknown_option::
- ~unknown_option () throw ()
- {
- }
-
- void unknown_option::
- print (::std::ostream& os) const
- {
- os << "unknown option '" << option ().c_str () << "'";
- }
-
- const char* unknown_option::
- what () const throw ()
- {
- return "unknown option";
- }
-
- // unknown_argument
- //
- unknown_argument::
- ~unknown_argument () throw ()
- {
- }
-
- void unknown_argument::
- print (::std::ostream& os) const
- {
- os << "unknown argument '" << argument ().c_str () << "'";
- }
-
- const char* unknown_argument::
- what () const throw ()
- {
- return "unknown argument";
- }
-
- // missing_value
- //
- missing_value::
- ~missing_value () throw ()
- {
- }
-
- void missing_value::
- print (::std::ostream& os) const
- {
- os << "missing value for option '" << option ().c_str () << "'";
- }
-
- const char* missing_value::
- what () const throw ()
- {
- return "missing option value";
- }
-
- // invalid_value
- //
- invalid_value::
- ~invalid_value () throw ()
- {
- }
-
- void invalid_value::
- print (::std::ostream& os) const
- {
- os << "invalid value '" << value ().c_str () << "' for option '"
- << option ().c_str () << "'";
-
- if (!message ().empty ())
- os << ": " << message ().c_str ();
- }
-
- const char* invalid_value::
- what () const throw ()
- {
- return "invalid option value";
- }
-
- // eos_reached
- //
- void eos_reached::
- print (::std::ostream& os) const
- {
- os << what ();
- }
-
- const char* eos_reached::
- what () const throw ()
- {
- return "end of argument stream reached";
- }
-
- // scanner
- //
- scanner::
- ~scanner ()
- {
- }
-
- // argv_scanner
- //
- bool argv_scanner::
- more ()
- {
- return i_ < argc_;
- }
-
- const char* argv_scanner::
- peek ()
- {
- if (i_ < argc_)
- return argv_[i_];
- else
- throw eos_reached ();
- }
-
- const char* argv_scanner::
- next ()
- {
- if (i_ < argc_)
- {
- const char* r (argv_[i_]);
-
- if (erase_)
- {
- for (int i (i_ + 1); i < argc_; ++i)
- argv_[i - 1] = argv_[i];
-
- --argc_;
- argv_[argc_] = 0;
- }
- else
- ++i_;
-
- return r;
- }
- else
- throw eos_reached ();
- }
-
- void argv_scanner::
- skip ()
- {
- if (i_ < argc_)
- ++i_;
- else
- throw eos_reached ();
- }
-
- // vector_scanner
- //
- bool vector_scanner::
- more ()
- {
- return i_ < v_.size ();
- }
-
- const char* vector_scanner::
- peek ()
- {
- if (i_ < v_.size ())
- return v_[i_].c_str ();
- else
- throw eos_reached ();
- }
-
- const char* vector_scanner::
- next ()
- {
- if (i_ < v_.size ())
- return v_[i_++].c_str ();
- else
- throw eos_reached ();
- }
-
- void vector_scanner::
- skip ()
- {
- if (i_ < v_.size ())
- ++i_;
- else
- throw eos_reached ();
- }
-
- template <typename X>
- struct parser
- {
- static void
- parse (X& x, bool& xs, scanner& s)
- {
- using namespace std;
-
- const char* o (s.next ());
- if (s.more ())
- {
- string v (s.next ());
- istringstream is (v);
- if (!(is >> x && is.peek () == istringstream::traits_type::eof ()))
- throw invalid_value (o, v);
- }
- else
- throw missing_value (o);
-
- xs = true;
- }
- };
-
- template <>
- struct parser<bool>
- {
- static void
- parse (bool& x, scanner& s)
- {
- s.next ();
- x = true;
- }
- };
-
- template <>
- struct parser<std::string>
- {
- static void
- parse (std::string& x, bool& xs, scanner& s)
- {
- const char* o (s.next ());
-
- if (s.more ())
- x = s.next ();
- else
- throw missing_value (o);
-
- xs = true;
- }
- };
-
- template <typename X>
- struct parser<std::vector<X> >
- {
- static void
- parse (std::vector<X>& c, bool& xs, scanner& s)
- {
- X x;
- bool dummy;
- parser<X>::parse (x, dummy, s);
- c.push_back (x);
- xs = true;
- }
- };
-
- template <typename X>
- struct parser<std::set<X> >
- {
- static void
- parse (std::set<X>& c, bool& xs, scanner& s)
- {
- X x;
- bool dummy;
- parser<X>::parse (x, dummy, s);
- c.insert (x);
- xs = true;
- }
- };
-
- template <typename K, typename V>
- struct parser<std::map<K, V> >
- {
- static void
- parse (std::map<K, V>& m, bool& xs, scanner& s)
- {
- const char* o (s.next ());
-
- if (s.more ())
- {
- std::string ov (s.next ());
- std::string::size_type p = ov.find ('=');
-
- K k = K ();
- V v = V ();
- std::string kstr (ov, 0, p);
- std::string vstr (ov, (p != std::string::npos ? p + 1 : ov.size ()));
-
- int ac (2);
- char* av[] =
- {
- const_cast<char*> (o), 0
- };
-
- bool dummy;
- if (!kstr.empty ())
- {
- av[1] = const_cast<char*> (kstr.c_str ());
- argv_scanner s (0, ac, av);
- parser<K>::parse (k, dummy, s);
- }
-
- if (!vstr.empty ())
- {
- av[1] = const_cast<char*> (vstr.c_str ());
- argv_scanner s (0, ac, av);
- parser<V>::parse (v, dummy, s);
- }
-
- m[k] = v;
- }
- else
- throw missing_value (o);
-
- xs = true;
- }
- };
-
- template <typename X, typename T, T X::*M>
- void
- thunk (X& x, scanner& s)
- {
- parser<T>::parse (x.*M, s);
- }
-
- template <typename X, typename T, T X::*M, bool X::*S>
- void
- thunk (X& x, scanner& s)
- {
- parser<T>::parse (x.*M, x.*S, s);
- }
- }
- }
- }
-}
-
-#include <map>
-#include <cstring>
-
-namespace build2
-{
- namespace test
- {
- namespace script
- {
- // set_options
- //
-
- set_options::
- set_options ()
- : exact_ (),
- newline_ (),
- whitespace_ ()
- {
- }
-
- set_options::
- set_options (int& argc,
- char** argv,
- bool erase,
- ::build2::test::script::cli::unknown_mode opt,
- ::build2::test::script::cli::unknown_mode arg)
- : exact_ (),
- newline_ (),
- whitespace_ ()
- {
- ::build2::test::script::cli::argv_scanner s (argc, argv, erase);
- _parse (s, opt, arg);
- }
-
- set_options::
- set_options (int start,
- int& argc,
- char** argv,
- bool erase,
- ::build2::test::script::cli::unknown_mode opt,
- ::build2::test::script::cli::unknown_mode arg)
- : exact_ (),
- newline_ (),
- whitespace_ ()
- {
- ::build2::test::script::cli::argv_scanner s (start, argc, argv, erase);
- _parse (s, opt, arg);
- }
-
- set_options::
- set_options (int& argc,
- char** argv,
- int& end,
- bool erase,
- ::build2::test::script::cli::unknown_mode opt,
- ::build2::test::script::cli::unknown_mode arg)
- : exact_ (),
- newline_ (),
- whitespace_ ()
- {
- ::build2::test::script::cli::argv_scanner s (argc, argv, erase);
- _parse (s, opt, arg);
- end = s.end ();
- }
-
- set_options::
- set_options (int start,
- int& argc,
- char** argv,
- int& end,
- bool erase,
- ::build2::test::script::cli::unknown_mode opt,
- ::build2::test::script::cli::unknown_mode arg)
- : exact_ (),
- newline_ (),
- whitespace_ ()
- {
- ::build2::test::script::cli::argv_scanner s (start, argc, argv, erase);
- _parse (s, opt, arg);
- end = s.end ();
- }
-
- set_options::
- set_options (::build2::test::script::cli::scanner& s,
- ::build2::test::script::cli::unknown_mode opt,
- ::build2::test::script::cli::unknown_mode arg)
- : exact_ (),
- newline_ (),
- whitespace_ ()
- {
- _parse (s, opt, arg);
- }
-
- typedef
- std::map<std::string, void (*) (set_options&, ::build2::test::script::cli::scanner&)>
- _cli_set_options_map;
-
- static _cli_set_options_map _cli_set_options_map_;
-
- struct _cli_set_options_map_init
- {
- _cli_set_options_map_init ()
- {
- _cli_set_options_map_["--exact"] =
- &::build2::test::script::cli::thunk< set_options, bool, &set_options::exact_ >;
- _cli_set_options_map_["-e"] =
- &::build2::test::script::cli::thunk< set_options, bool, &set_options::exact_ >;
- _cli_set_options_map_["--newline"] =
- &::build2::test::script::cli::thunk< set_options, bool, &set_options::newline_ >;
- _cli_set_options_map_["-n"] =
- &::build2::test::script::cli::thunk< set_options, bool, &set_options::newline_ >;
- _cli_set_options_map_["--whitespace"] =
- &::build2::test::script::cli::thunk< set_options, bool, &set_options::whitespace_ >;
- _cli_set_options_map_["-w"] =
- &::build2::test::script::cli::thunk< set_options, bool, &set_options::whitespace_ >;
- }
- };
-
- static _cli_set_options_map_init _cli_set_options_map_init_;
-
- bool set_options::
- _parse (const char* o, ::build2::test::script::cli::scanner& s)
- {
- _cli_set_options_map::const_iterator i (_cli_set_options_map_.find (o));
-
- if (i != _cli_set_options_map_.end ())
- {
- (*(i->second)) (*this, s);
- return true;
- }
-
- return false;
- }
-
- bool set_options::
- _parse (::build2::test::script::cli::scanner& s,
- ::build2::test::script::cli::unknown_mode opt_mode,
- ::build2::test::script::cli::unknown_mode arg_mode)
- {
- // Can't skip combined flags (--no-combined-flags).
- //
- assert (opt_mode != ::build2::test::script::cli::unknown_mode::skip);
-
- bool r = false;
- bool opt = true;
-
- while (s.more ())
- {
- const char* o = s.peek ();
-
- if (std::strcmp (o, "--") == 0)
- {
- opt = false;
- s.skip ();
- r = true;
- continue;
- }
-
- if (opt)
- {
- if (_parse (o, s))
- {
- r = true;
- continue;
- }
-
- if (std::strncmp (o, "-", 1) == 0 && o[1] != '\0')
- {
- // Handle combined option values.
- //
- std::string co;
- if (const char* v = std::strchr (o, '='))
- {
- co.assign (o, 0, v - o);
- ++v;
-
- int ac (2);
- char* av[] =
- {
- const_cast<char*> (co.c_str ()),
- const_cast<char*> (v)
- };
-
- ::build2::test::script::cli::argv_scanner ns (0, ac, av);
-
- if (_parse (co.c_str (), ns))
- {
- // Parsed the option but not its value?
- //
- if (ns.end () != 2)
- throw ::build2::test::script::cli::invalid_value (co, v);
-
- s.next ();
- r = true;
- continue;
- }
- else
- {
- // Set the unknown option and fall through.
- //
- o = co.c_str ();
- }
- }
-
- // Handle combined flags.
- //
- char cf[3];
- {
- const char* p = o + 1;
- for (; *p != '\0'; ++p)
- {
- if (!((*p >= 'a' && *p <= 'z') ||
- (*p >= 'A' && *p <= 'Z') ||
- (*p >= '0' && *p <= '9')))
- break;
- }
-
- if (*p == '\0')
- {
- for (p = o + 1; *p != '\0'; ++p)
- {
- std::strcpy (cf, "-");
- cf[1] = *p;
- cf[2] = '\0';
-
- int ac (1);
- char* av[] =
- {
- cf
- };
-
- ::build2::test::script::cli::argv_scanner ns (0, ac, av);
-
- if (!_parse (cf, ns))
- break;
- }
-
- if (*p == '\0')
- {
- // All handled.
- //
- s.next ();
- r = true;
- continue;
- }
- else
- {
- // Set the unknown option and fall through.
- //
- o = cf;
- }
- }
- }
-
- switch (opt_mode)
- {
- case ::build2::test::script::cli::unknown_mode::skip:
- {
- s.skip ();
- r = true;
- continue;
- }
- case ::build2::test::script::cli::unknown_mode::stop:
- {
- break;
- }
- case ::build2::test::script::cli::unknown_mode::fail:
- {
- throw ::build2::test::script::cli::unknown_option (o);
- }
- }
-
- break;
- }
- }
-
- switch (arg_mode)
- {
- case ::build2::test::script::cli::unknown_mode::skip:
- {
- s.skip ();
- r = true;
- continue;
- }
- case ::build2::test::script::cli::unknown_mode::stop:
- {
- break;
- }
- case ::build2::test::script::cli::unknown_mode::fail:
- {
- throw ::build2::test::script::cli::unknown_argument (o);
- }
- }
-
- break;
- }
-
- return r;
- }
- }
- }
-}
-
-// Begin epilogue.
-//
-//
-// End epilogue.
-
diff --git a/libbuild2/test/script/builtin-options.hxx b/libbuild2/test/script/builtin-options.hxx
deleted file mode 100644
index 44e129a..0000000
--- a/libbuild2/test/script/builtin-options.hxx
+++ /dev/null
@@ -1,345 +0,0 @@
-// -*- C++ -*-
-//
-// This file was generated by CLI, a command line interface
-// compiler for C++.
-//
-
-#ifndef LIBBUILD2_TEST_SCRIPT_BUILTIN_OPTIONS_HXX
-#define LIBBUILD2_TEST_SCRIPT_BUILTIN_OPTIONS_HXX
-
-// Begin prologue.
-//
-//
-// End prologue.
-
-#include <vector>
-#include <iosfwd>
-#include <string>
-#include <cstddef>
-#include <exception>
-
-#ifndef CLI_POTENTIALLY_UNUSED
-# if defined(_MSC_VER) || defined(__xlC__)
-# define CLI_POTENTIALLY_UNUSED(x) (void*)&x
-# else
-# define CLI_POTENTIALLY_UNUSED(x) (void)x
-# endif
-#endif
-
-namespace build2
-{
- namespace test
- {
- namespace script
- {
- namespace cli
- {
- class unknown_mode
- {
- public:
- enum value
- {
- skip,
- stop,
- fail
- };
-
- unknown_mode (value);
-
- operator value () const
- {
- return v_;
- }
-
- private:
- value v_;
- };
-
- // Exceptions.
- //
-
- class exception: public std::exception
- {
- public:
- virtual void
- print (::std::ostream&) const = 0;
- };
-
- ::std::ostream&
- operator<< (::std::ostream&, const exception&);
-
- class unknown_option: public exception
- {
- public:
- virtual
- ~unknown_option () throw ();
-
- unknown_option (const std::string& option);
-
- const std::string&
- option () const;
-
- virtual void
- print (::std::ostream&) const;
-
- virtual const char*
- what () const throw ();
-
- private:
- std::string option_;
- };
-
- class unknown_argument: public exception
- {
- public:
- virtual
- ~unknown_argument () throw ();
-
- unknown_argument (const std::string& argument);
-
- const std::string&
- argument () const;
-
- virtual void
- print (::std::ostream&) const;
-
- virtual const char*
- what () const throw ();
-
- private:
- std::string argument_;
- };
-
- class missing_value: public exception
- {
- public:
- virtual
- ~missing_value () throw ();
-
- missing_value (const std::string& option);
-
- const std::string&
- option () const;
-
- virtual void
- print (::std::ostream&) const;
-
- virtual const char*
- what () const throw ();
-
- private:
- std::string option_;
- };
-
- class invalid_value: public exception
- {
- public:
- virtual
- ~invalid_value () throw ();
-
- invalid_value (const std::string& option,
- const std::string& value,
- const std::string& message = std::string ());
-
- const std::string&
- option () const;
-
- const std::string&
- value () const;
-
- const std::string&
- message () const;
-
- virtual void
- print (::std::ostream&) const;
-
- virtual const char*
- what () const throw ();
-
- private:
- std::string option_;
- std::string value_;
- std::string message_;
- };
-
- class eos_reached: public exception
- {
- public:
- virtual void
- print (::std::ostream&) const;
-
- virtual const char*
- what () const throw ();
- };
-
- // Command line argument scanner interface.
- //
- // The values returned by next() are guaranteed to be valid
- // for the two previous arguments up until a call to a third
- // peek() or next().
- //
- class scanner
- {
- public:
- virtual
- ~scanner ();
-
- virtual bool
- more () = 0;
-
- virtual const char*
- peek () = 0;
-
- virtual const char*
- next () = 0;
-
- virtual void
- skip () = 0;
- };
-
- class argv_scanner: public scanner
- {
- public:
- argv_scanner (int& argc, char** argv, bool erase = false);
- argv_scanner (int start, int& argc, char** argv, bool erase = false);
-
- int
- end () const;
-
- virtual bool
- more ();
-
- virtual const char*
- peek ();
-
- virtual const char*
- next ();
-
- virtual void
- skip ();
-
- private:
- int i_;
- int& argc_;
- char** argv_;
- bool erase_;
- };
-
- class vector_scanner: public scanner
- {
- public:
- vector_scanner (const std::vector<std::string>&, std::size_t start = 0);
-
- std::size_t
- end () const;
-
- void
- reset (std::size_t start = 0);
-
- virtual bool
- more ();
-
- virtual const char*
- peek ();
-
- virtual const char*
- next ();
-
- virtual void
- skip ();
-
- private:
- const std::vector<std::string>& v_;
- std::size_t i_;
- };
-
- template <typename X>
- struct parser;
- }
- }
- }
-}
-
-namespace build2
-{
- namespace test
- {
- namespace script
- {
- class set_options
- {
- public:
- set_options ();
-
- set_options (int& argc,
- char** argv,
- bool erase = false,
- ::build2::test::script::cli::unknown_mode option = ::build2::test::script::cli::unknown_mode::fail,
- ::build2::test::script::cli::unknown_mode argument = ::build2::test::script::cli::unknown_mode::stop);
-
- set_options (int start,
- int& argc,
- char** argv,
- bool erase = false,
- ::build2::test::script::cli::unknown_mode option = ::build2::test::script::cli::unknown_mode::fail,
- ::build2::test::script::cli::unknown_mode argument = ::build2::test::script::cli::unknown_mode::stop);
-
- set_options (int& argc,
- char** argv,
- int& end,
- bool erase = false,
- ::build2::test::script::cli::unknown_mode option = ::build2::test::script::cli::unknown_mode::fail,
- ::build2::test::script::cli::unknown_mode argument = ::build2::test::script::cli::unknown_mode::stop);
-
- set_options (int start,
- int& argc,
- char** argv,
- int& end,
- bool erase = false,
- ::build2::test::script::cli::unknown_mode option = ::build2::test::script::cli::unknown_mode::fail,
- ::build2::test::script::cli::unknown_mode argument = ::build2::test::script::cli::unknown_mode::stop);
-
- set_options (::build2::test::script::cli::scanner&,
- ::build2::test::script::cli::unknown_mode option = ::build2::test::script::cli::unknown_mode::fail,
- ::build2::test::script::cli::unknown_mode argument = ::build2::test::script::cli::unknown_mode::stop);
-
- // Option accessors.
- //
- const bool&
- exact () const;
-
- const bool&
- newline () const;
-
- const bool&
- whitespace () const;
-
- // Implementation details.
- //
- protected:
- bool
- _parse (const char*, ::build2::test::script::cli::scanner&);
-
- private:
- bool
- _parse (::build2::test::script::cli::scanner&,
- ::build2::test::script::cli::unknown_mode option,
- ::build2::test::script::cli::unknown_mode argument);
-
- public:
- bool exact_;
- bool newline_;
- bool whitespace_;
- };
- }
- }
-}
-
-#include <libbuild2/test/script/builtin-options.ixx>
-
-// Begin epilogue.
-//
-//
-// End epilogue.
-
-#endif // LIBBUILD2_TEST_SCRIPT_BUILTIN_OPTIONS_HXX
diff --git a/libbuild2/test/script/builtin-options.ixx b/libbuild2/test/script/builtin-options.ixx
deleted file mode 100644
index bdb95b4..0000000
--- a/libbuild2/test/script/builtin-options.ixx
+++ /dev/null
@@ -1,188 +0,0 @@
-// -*- C++ -*-
-//
-// This file was generated by CLI, a command line interface
-// compiler for C++.
-//
-
-// Begin prologue.
-//
-//
-// End prologue.
-
-#include <cassert>
-
-namespace build2
-{
- namespace test
- {
- namespace script
- {
- namespace cli
- {
- // unknown_mode
- //
- inline unknown_mode::
- unknown_mode (value v)
- : v_ (v)
- {
- }
-
- // exception
- //
- inline ::std::ostream&
- operator<< (::std::ostream& os, const exception& e)
- {
- e.print (os);
- return os;
- }
-
- // unknown_option
- //
- inline unknown_option::
- unknown_option (const std::string& option)
- : option_ (option)
- {
- }
-
- inline const std::string& unknown_option::
- option () const
- {
- return option_;
- }
-
- // unknown_argument
- //
- inline unknown_argument::
- unknown_argument (const std::string& argument)
- : argument_ (argument)
- {
- }
-
- inline const std::string& unknown_argument::
- argument () const
- {
- return argument_;
- }
-
- // missing_value
- //
- inline missing_value::
- missing_value (const std::string& option)
- : option_ (option)
- {
- }
-
- inline const std::string& missing_value::
- option () const
- {
- return option_;
- }
-
- // invalid_value
- //
- inline invalid_value::
- invalid_value (const std::string& option,
- const std::string& value,
- const std::string& message)
- : option_ (option),
- value_ (value),
- message_ (message)
- {
- }
-
- inline const std::string& invalid_value::
- option () const
- {
- return option_;
- }
-
- inline const std::string& invalid_value::
- value () const
- {
- return value_;
- }
-
- inline const std::string& invalid_value::
- message () const
- {
- return message_;
- }
-
- // argv_scanner
- //
- inline argv_scanner::
- argv_scanner (int& argc, char** argv, bool erase)
- : i_ (1), argc_ (argc), argv_ (argv), erase_ (erase)
- {
- }
-
- inline argv_scanner::
- argv_scanner (int start, int& argc, char** argv, bool erase)
- : i_ (start), argc_ (argc), argv_ (argv), erase_ (erase)
- {
- }
-
- inline int argv_scanner::
- end () const
- {
- return i_;
- }
-
- // vector_scanner
- //
- inline vector_scanner::
- vector_scanner (const std::vector<std::string>& v, std::size_t i)
- : v_ (v), i_ (i)
- {
- }
-
- inline std::size_t vector_scanner::
- end () const
- {
- return i_;
- }
-
- inline void vector_scanner::
- reset (std::size_t i)
- {
- i_ = i;
- }
- }
- }
- }
-}
-
-namespace build2
-{
- namespace test
- {
- namespace script
- {
- // set_options
- //
-
- inline const bool& set_options::
- exact () const
- {
- return this->exact_;
- }
-
- inline const bool& set_options::
- newline () const
- {
- return this->newline_;
- }
-
- inline const bool& set_options::
- whitespace () const
- {
- return this->whitespace_;
- }
- }
- }
-}
-
-// Begin epilogue.
-//
-//
-// End epilogue.
diff --git a/libbuild2/test/script/lexer.cxx b/libbuild2/test/script/lexer.cxx
index 26d77b5..a94109b 100644
--- a/libbuild2/test/script/lexer.cxx
+++ b/libbuild2/test/script/lexer.cxx
@@ -15,8 +15,16 @@ namespace build2
{
using type = token_type;
+ build2::script::redirect_aliases lexer::redirect_aliases {
+ type (type::in_str),
+ type (type::in_doc),
+ type (type::in_file),
+ type (type::out_str),
+ type (type::out_doc),
+ type (type::out_file_cmp)};
+
void lexer::
- mode (base_mode m, char ps, optional<const char*> esc)
+ mode (base_mode m, char ps, optional<const char*> esc, uintptr_t data)
{
bool a (false); // attributes
@@ -77,43 +85,6 @@ namespace build2
s2 = " ";
break;
}
-
- case lexer_mode::command_expansion:
- {
- // Note that whitespaces are not word separators in this mode.
- //
- s1 = "|&<>";
- s2 = " ";
- s = false;
- break;
- }
- case lexer_mode::here_line_single:
- {
- // This one is like a single-quoted string except it treats
- // newlines as a separator. We also treat quotes as literals.
- //
- // Note that it might be tempting to enable line continuation
- // escapes. However, we will then have to also enable escaping of
- // the backslash, which makes it a lot less tempting.
- //
- s1 = "\n";
- s2 = " ";
- esc = ""; // Disable escape sequences.
- s = false;
- q = false;
- break;
- }
- case lexer_mode::here_line_double:
- {
- // This one is like a double-quoted string except it treats
- // newlines as a separator. We also treat quotes as literals.
- //
- s1 = "$(\n";
- s2 = " ";
- s = false;
- q = false;
- break;
- }
case lexer_mode::description_line:
{
// This one is like a single-quoted string and has an ad hoc
@@ -138,7 +109,7 @@ namespace build2
}
assert (ps == '\0');
- state_.push (state {m, a, ps, s, n, q, *esc, s1, s2});
+ state_.push (state {m, data, nullopt, a, ps, s, n, q, *esc, s1, s2});
}
token lexer::
@@ -152,17 +123,12 @@ namespace build2
case lexer_mode::first_token:
case lexer_mode::second_token:
case lexer_mode::variable_line:
- case lexer_mode::command_expansion:
- case lexer_mode::here_line_single:
- case lexer_mode::here_line_double:
r = next_line ();
break;
case lexer_mode::description_line:
r = next_description ();
break;
- default:
- r = base_lexer::next ();
- break;
+ default: return base_lexer::next ();
}
if (r.qtype != quote_type::unquoted)
@@ -174,7 +140,7 @@ namespace build2
token lexer::
next_line ()
{
- bool sep (skip_spaces ());
+ bool sep (skip_spaces ().first);
xchar c (get ());
uint64_t ln (c.line), cn (c.column);
@@ -182,38 +148,9 @@ namespace build2
state st (state_.top ()); // Make copy (see first/second_token).
lexer_mode m (st.mode);
- auto make_token = [&sep, &m, ln, cn] (type t, string v = string ())
+ auto make_token = [&sep, ln, cn] (type t)
{
- bool q (m == lexer_mode::here_line_double);
-
- return token (t, move (v), sep,
- (q ? quote_type::double_ : quote_type::unquoted), q,
- ln, cn,
- token_printer);
- };
-
- auto make_token_with_modifiers =
- [&make_token, this] (type t,
- const char* mods, // To recorgnize.
- const char* stop = nullptr) // To stop after.
- {
- string v;
- if (mods != nullptr)
- {
- for (xchar p (peek ());
- (strchr (mods, p) != nullptr && // Modifier.
- strchr (v.c_str (), p) == nullptr); // Not already seen.
- p = peek ())
- {
- get ();
- v += p;
-
- if (stop != nullptr && strchr (stop, p) != nullptr)
- break;
- }
- }
-
- return make_token (t, move (v));
+ return token (t, sep, ln, cn, token_printer);
};
// Handle attributes (do it first to make sure the flag is cleared
@@ -240,32 +177,23 @@ namespace build2
// NOTE: remember to update mode() if adding new special characters.
- if (m != lexer_mode::command_expansion)
+ switch (c)
{
- switch (c)
+ case '\n':
{
- case '\n':
- {
- // Expire variable value mode at the end of the line.
- //
- if (m == lexer_mode::variable_line)
- state_.pop ();
+ // Expire variable value mode at the end of the line.
+ //
+ if (m == lexer_mode::variable_line)
+ state_.pop ();
- sep = true; // Treat newline as always separated.
- return make_token (type::newline);
- }
+ sep = true; // Treat newline as always separated.
+ return make_token (type::newline);
}
- }
- if (m != lexer_mode::here_line_single)
- {
- switch (c)
- {
- // Variable expansion, function call, and evaluation context.
- //
- case '$': return make_token (type::dollar);
- case '(': return make_token (type::lparen);
- }
+ // Variable expansion, function call, and evaluation context.
+ //
+ case '$': return make_token (type::dollar);
+ case '(': return make_token (type::lparen);
}
// Line separators.
@@ -313,133 +241,14 @@ namespace build2
}
}
- // Command operators/separators.
+ // Command operators.
//
if (m == lexer_mode::command_line ||
m == lexer_mode::first_token ||
- m == lexer_mode::second_token ||
- m == lexer_mode::command_expansion)
+ m == lexer_mode::second_token)
{
- switch (c)
- {
- // |, ||
- //
- case '|':
- {
- if (peek () == '|')
- {
- get ();
- return make_token (type::log_or);
- }
- else
- return make_token (type::pipe);
- }
- // &, &&
- //
- case '&':
- {
- xchar p (peek ());
-
- if (p == '&')
- {
- get ();
- return make_token (type::log_and);
- }
-
- // These modifiers are mutually exclusive so stop after seeing
- // either one.
- //
- return make_token_with_modifiers (type::clean, "!?", "!?");
- }
- // <
- //
- case '<':
- {
- type r (type::in_str);
- xchar p (peek ());
-
- if (p == '|' || p == '-' || p == '<')
- {
- get ();
-
- switch (p)
- {
- case '|': return make_token (type::in_pass);
- case '-': return make_token (type::in_null);
- case '<':
- {
- r = type::in_doc;
- p = peek ();
-
- if (p == '<')
- {
- get ();
- r = type::in_file;
- }
- break;
- }
- }
- }
-
- // Handle modifiers.
- //
- const char* mods (nullptr);
- switch (r)
- {
- case type::in_str:
- case type::in_doc: mods = ":/"; break;
- }
-
- return make_token_with_modifiers (r, mods);
- }
- // >
- //
- case '>':
- {
- type r (type::out_str);
- xchar p (peek ());
-
- if (p == '|' || p == '-' || p == '!' || p == '&' ||
- p == '=' || p == '+' || p == '>')
- {
- get ();
-
- switch (p)
- {
- case '|': return make_token (type::out_pass);
- case '-': return make_token (type::out_null);
- case '!': return make_token (type::out_trace);
- case '&': return make_token (type::out_merge);
- case '=': return make_token (type::out_file_ovr);
- case '+': return make_token (type::out_file_app);
- case '>':
- {
- r = type::out_doc;
- p = peek ();
-
- if (p == '>')
- {
- get ();
- r = type::out_file_cmp;
- }
- break;
- }
- }
- }
-
- // Handle modifiers.
- //
- const char* mods (nullptr);
- const char* stop (nullptr);
- switch (r)
- {
- case type::out_str:
- case type::out_doc: mods = ":/~"; stop = "~"; break;
- }
-
- return make_token_with_modifiers (r, mods, stop);
- }
- }
+ if (optional<token> t = next_cmd_op (c, sep))
+ return move (*t);
}
// Dot, plus/minus, and left/right curly braces.
diff --git a/libbuild2/test/script/lexer.hxx b/libbuild2/test/script/lexer.hxx
index 5763e3b..452e794 100644
--- a/libbuild2/test/script/lexer.hxx
+++ b/libbuild2/test/script/lexer.hxx
@@ -7,7 +7,7 @@
#include <libbuild2/types.hxx>
#include <libbuild2/utility.hxx>
-#include <libbuild2/lexer.hxx>
+#include <libbuild2/script/lexer.hxx>
#include <libbuild2/test/script/token.hxx>
@@ -17,9 +17,9 @@ namespace build2
{
namespace script
{
- struct lexer_mode: build2::lexer_mode
+ struct lexer_mode: build2::script::lexer_mode
{
- using base_type = build2::lexer_mode;
+ using base_type = build2::script::lexer_mode;
enum
{
@@ -27,22 +27,18 @@ namespace build2
first_token, // Expires at the end of the token.
second_token, // Expires at the end of the token.
variable_line, // Expires at the end of the line.
- command_expansion,
- here_line_single,
- here_line_double,
description_line // Expires at the end of the line.
};
lexer_mode () = default;
lexer_mode (value_type v): base_type (v) {}
- lexer_mode (base_type v): base_type (v) {}
+ lexer_mode (build2::lexer_mode v): base_type (v) {}
};
- class lexer: public build2::lexer
+ class lexer: public build2::script::lexer
{
public:
- using base_lexer = build2::lexer;
- using base_mode = build2::lexer_mode;
+ using base_lexer = build2::script::lexer;
// Note that neither the name nor escape arguments are copied.
//
@@ -52,28 +48,25 @@ namespace build2
const char* escapes = nullptr)
: base_lexer (is, name, 1 /* line */,
nullptr /* escapes */,
- false /* set_mode */)
+ false /* set_mode */,
+ redirect_aliases)
{
mode (m, '\0', escapes);
}
virtual void
- mode (base_mode,
+ mode (build2::lexer_mode,
char = '\0',
- optional<const char*> = nullopt) override;
-
- // Number of quoted (double or single) tokens since last reset.
- //
- size_t
- quoted () const {return quoted_;}
-
- void
- reset_quoted (size_t q) {quoted_ = q;}
+ optional<const char*> = nullopt,
+ uintptr_t = 0) override;
virtual token
next () override;
- protected:
+ public:
+ static redirect_aliases_type redirect_aliases;
+
+ private:
token
next_line ();
@@ -82,9 +75,6 @@ namespace build2
virtual token
word (state, bool) override;
-
- protected:
- size_t quoted_;
};
}
}
diff --git a/libbuild2/test/script/lexer.test.cxx b/libbuild2/test/script/lexer.test.cxx
index 1512e58..9c64616 100644
--- a/libbuild2/test/script/lexer.test.cxx
+++ b/libbuild2/test/script/lexer.test.cxx
@@ -32,9 +32,6 @@ namespace build2
else if (s == "first-token") m = lexer_mode::first_token;
else if (s == "second-token") m = lexer_mode::second_token;
else if (s == "variable-line") m = lexer_mode::variable_line;
- else if (s == "command-expansion") m = lexer_mode::command_expansion;
- else if (s == "here-line-single") m = lexer_mode::here_line_single;
- else if (s == "here-line-double") m = lexer_mode::here_line_double;
else if (s == "description-line") m = lexer_mode::description_line;
else if (s == "variable") m = lexer_mode::variable;
else assert (false);
@@ -46,14 +43,10 @@ namespace build2
// Some modes auto-expire so we need something underneath.
//
- bool u (m == lexer_mode::first_token ||
- m == lexer_mode::second_token ||
- m == lexer_mode::variable_line ||
- m == lexer_mode::description_line ||
- m == lexer_mode::variable);
+ bool u (m != lexer_mode::command_line);
path_name in ("<stdin>");
- lexer l (cin, in, u ? lexer_mode::command_line : m);
+ lexer l (cin, in, lexer_mode::command_line);
if (u)
l.mode (m);
@@ -63,7 +56,7 @@ namespace build2
{
// Print each token on a separate line without quoting operators.
//
- t.printer (cout, t, false);
+ t.printer (cout, t, print_mode::normal);
cout << endl;
}
}
diff --git a/libbuild2/test/script/parser+exit.test.testscript b/libbuild2/test/script/parser+exit.test.testscript
index c6327df..44728a5 100644
--- a/libbuild2/test/script/parser+exit.test.testscript
+++ b/libbuild2/test/script/parser+exit.test.testscript
@@ -22,5 +22,5 @@ EOO
$* <<EOI 2>>EOE != 0
cmd != 1 <"foo"
EOI
-testscript:1:10: error: unexpected '<' after command exit status
+testscript:1:10: error: expected newline instead of '<'
EOE
diff --git a/libbuild2/test/script/parser+redirect.test.testscript b/libbuild2/test/script/parser+redirect.test.testscript
index 3858808..79530e0 100644
--- a/libbuild2/test/script/parser+redirect.test.testscript
+++ b/libbuild2/test/script/parser+redirect.test.testscript
@@ -49,7 +49,7 @@
: portable-path
:
$* <<EOI >>EOO
- cmd <</EOI_ >/EOO_ 2>/EOE_
+ cmd <</EOI_ >>/EOO_ 2>>/EOE_
foo
EOI_
bar
@@ -57,7 +57,7 @@
baz
EOE_
EOI
- cmd <</EOI_ >/EOO_ 2>/EOE_
+ cmd <</EOI_ >>/EOO_ 2>>/EOE_
foo
EOI_
bar
@@ -113,13 +113,13 @@
: portable-path
:
$* <<EOI >>EOO
- cmd >/~%EOF% 2>/~%EOE%
+ cmd >>/~%EOF% 2>>/~%EOE%
foo
EOF
bar
EOE
EOI
- cmd >/~%EOF% 2>/~%EOE%
+ cmd >>/~%EOF% 2>>/~%EOE%
foo
EOF
bar
diff --git a/libbuild2/test/script/parser+regex.test.testscript b/libbuild2/test/script/parser+regex.test.testscript
index 8627304..db418b3 100644
--- a/libbuild2/test/script/parser+regex.test.testscript
+++ b/libbuild2/test/script/parser+regex.test.testscript
@@ -162,9 +162,12 @@
EOE
EOO
- : no-newline
+ : no-newline-str
:
$* <'cmd >:~/fo*/' >'cmd >:~/fo*/'
+
+ : no-newline-doc
+ :
$* <<EOI >>EOO
cmd 2>>:~/EOE/
foo
diff --git a/libbuild2/test/script/parser+variable.test.testscript b/libbuild2/test/script/parser+variable.test.testscript
new file mode 100644
index 0000000..3751a5f
--- /dev/null
+++ b/libbuild2/test/script/parser+variable.test.testscript
@@ -0,0 +1,19 @@
+# file : libbuild2/test/script/parser+variable.test.testscript
+# license : MIT; see accompanying LICENSE file
+
+: assignment
+:
+$* <<EOI >>EOO
+a = b
+echo $a
+EOI
+echo b
+EOO
+
+: empty-name
+:
+$* <<EOI 2>>EOE != 0
+= b
+EOI
+testscript:1:1: error: missing variable name
+EOE
diff --git a/libbuild2/test/script/parser.cxx b/libbuild2/test/script/parser.cxx
index 06cefc7..f663c11 100644
--- a/libbuild2/test/script/parser.cxx
+++ b/libbuild2/test/script/parser.cxx
@@ -3,8 +3,6 @@
#include <libbuild2/test/script/parser.hxx>
-#include <sstream>
-
#include <libbuild2/context.hxx> // sched, keep_going
#include <libbuild2/test/script/lexer.hxx>
@@ -316,7 +314,7 @@ namespace build2
// Determine the line type/start token.
//
line_type lt;
- type st (type::eos);
+ type st (type::eos); // Later, can only be set to plus or minus.
switch (tt)
{
@@ -372,51 +370,7 @@ namespace build2
}
default:
{
- // Either variable assignment or test command.
- //
- replay_save (); // Start saving tokens from the current one.
- next (t, tt);
-
- // Decide whether this is a variable assignment or a command.
- //
- // It is an assignment if the first token is an unquoted name and
- // the next token is an assign/append/prepend operator. Assignment
- // to a computed variable name must use the set builtin.
- //
- // Note also thatspecial commands take precedence over variable
- // assignments.
- //
- lt = line_type::cmd; // Default.
-
- if (tt == type::word && t.qtype == quote_type::unquoted)
- {
- const string& n (t.value);
-
- if (n == "if") lt = line_type::cmd_if;
- else if (n == "if!") lt = line_type::cmd_ifn;
- else if (n == "elif") lt = line_type::cmd_elif;
- else if (n == "elif!") lt = line_type::cmd_elifn;
- else if (n == "else") lt = line_type::cmd_else;
- else if (n == "end") lt = line_type::cmd_end;
- else
- {
- // Switch the recognition of leading variable assignments for
- // the next token. This is safe to do because we know we
- // cannot be in the quoted mode (since the current token is
- // not quoted).
- //
- type p (peek (lexer_mode::second_token));
-
- if (p == type::assign ||
- p == type::prepend ||
- p == type::append)
- {
- lt = line_type::var;
- st = p;
- }
- }
- }
-
+ lt = pre_parse_line_start (t, tt, lexer_mode::second_token);
break;
}
}
@@ -435,7 +389,7 @@ namespace build2
//
string& n (t.value);
- if (n == "*" || n == "~" || n == "@" || digit (n))
+ if (special_variable (n))
fail (t) << "attempt to set '" << n << "' variable directly";
// Pre-enter the variables now while we are executing serially.
@@ -444,6 +398,11 @@ namespace build2
ln.var = &script_->var_pool.insert (move (n));
next (t, tt); // Assignment kind.
+
+ // We cannot reuse the value mode since it will recognize `{`
+ // which we want to treat as a literal.
+ //
+ mode (lexer_mode::variable_line);
parse_variable_line (t, tt);
semi = (tt == type::semi);
@@ -469,7 +428,7 @@ namespace build2
pair<command_expr, here_docs> p;
if (lt != line_type::cmd_else && lt != line_type::cmd_end)
- p = parse_command_expr (t, tt);
+ p = parse_command_expr (t, tt, lexer::redirect_aliases);
// Colon and semicolon are only valid in test command lines and
// after 'end' in if-else. Note that we still recognize them
@@ -1039,7 +998,7 @@ namespace build2
const path_name* op (path_);
path_ = &pn;
- lexer* ol (lexer_);
+ build2::script::lexer* ol (lexer_);
set_lexer (&l);
string oip (id_prefix_);
@@ -1281,35 +1240,6 @@ namespace build2
return r;
}
- value parser::
- parse_variable_line (token& t, type& tt)
- {
- // enter: assignment
- // leave: newline or semi
-
- // We cannot reuse the value mode since it will recognize `{` which we
- // want to treat as a literal.
- //
- mode (lexer_mode::variable_line);
- next_with_attributes (t, tt);
-
- // Parse value attributes if any. Note that it's ok not to have
- // anything after the attributes (e.g., foo=[null]).
- //
- attributes_push (t, tt, true);
-
- // @@ PAT: Should we expand patterns? Note that it will only be
- // simple ones since we have disabled {}. Also, what would be the
- // pattern base directory?
- //
- return tt != type::newline && tt != type::semi
- ? parse_value (t, tt,
- pattern_mode::ignore,
- "variable value",
- nullptr)
- : value (names ());
- }
-
command_expr parser::
parse_command_line (token& t, type& tt)
{
@@ -1318,7 +1248,8 @@ namespace build2
// Note: this one is only used during execution.
- pair<command_expr, here_docs> p (parse_command_expr (t, tt));
+ pair<command_expr, here_docs> p (
+ parse_command_expr (t, tt, lexer::redirect_aliases));
switch (tt)
{
@@ -1334,1542 +1265,6 @@ namespace build2
return move (p.first);
}
- // Parse the regular expression representation (non-empty string value
- // framed with introducer characters and optionally followed by flag
- // characters from the {di} set, for example '/foo/id') into
- // components. Also return end-of-parsing position if requested,
- // otherwise treat any unparsed characters left as an error.
- //
- struct regex_parts
- {
- string value;
- char intro;
- string flags; // Combination of characters from {di} set.
-
- // Create a special empty object.
- //
- regex_parts (): intro ('\0') {}
-
- regex_parts (string v, char i, string f)
- : value (move (v)), intro (i), flags (move (f)) {}
- };
-
- static regex_parts
- parse_regex (const string& s,
- const location& l,
- const char* what,
- size_t* end = nullptr)
- {
- if (s.empty ())
- fail (l) << "no introducer character in " << what;
-
- size_t p (s.find (s[0], 1)); // Find terminating introducer.
-
- if (p == string::npos)
- fail (l) << "no closing introducer character in " << what;
-
- size_t rn (p - 1); // Regex length.
- if (rn == 0)
- fail (l) << what << " is empty";
-
- // Find end-of-flags position.
- //
- size_t fp (++p); // Save flags starting position.
- for (char c; (c = s[p]) == 'd' || c == 'i'; ++p) ;
-
- // If string end is not reached then report invalid flags, unless
- // end-of-parsing position is requested (which means regex is just a
- // prefix).
- //
- if (s[p] != '\0' && end == nullptr)
- fail (l) << "junk at the end of " << what;
-
- if (end != nullptr)
- *end = p;
-
- return regex_parts (string (s, 1, rn), s[0], string (s, fp, p - fp));
- }
-
- pair<command_expr, parser::here_docs> parser::
- parse_command_expr (token& t, type& tt)
- {
- // enter: first token of the command line
- // leave: <newline>
-
- command_expr expr;
-
- // OR-ed to an implied false for the first term.
- //
- expr.push_back ({expr_operator::log_or, command_pipe ()});
-
- command c; // Command being assembled.
-
- // Make sure the command makes sense.
- //
- auto check_command = [&c, this] (const location& l, bool last)
- {
- if (c.out.type == redirect_type::merge &&
- c.err.type == redirect_type::merge)
- fail (l) << "stdout and stderr redirected to each other";
-
- if (!last && c.out.type != redirect_type::none)
- fail (l) << "stdout is both redirected and piped";
- };
-
- // Check that the introducer character differs from '/' if the
- // portable path modifier is specified. Must be called before
- // parse_regex() (see below) to make sure its diagnostics is
- // meaningful.
- //
- // Note that the portable path modifier assumes '/' to be a valid
- // regex character and so makes it indistinguishable from the
- // terminating introducer.
- //
- auto check_regex_mod = [this] (const string& mod,
- const string& re,
- const location& l,
- const char* what)
- {
- // Handles empty regex properly.
- //
- if (mod.find ('/') != string::npos && re[0] == '/')
- fail (l) << "portable path modifier and '/' introducer in "
- << what;
- };
-
- // Pending positions where the next word should go.
- //
- enum class pending
- {
- none,
- program,
- in_string,
- in_document,
- in_file,
- out_merge,
- out_string,
- out_str_regex,
- out_document,
- out_doc_regex,
- out_file,
- err_merge,
- err_string,
- err_str_regex,
- err_document,
- err_doc_regex,
- err_file,
- clean
- };
- pending p (pending::program);
- string mod; // Modifiers for pending in_* and out_* positions.
- here_docs hd; // Expected here-documents.
-
- // Add the next word to either one of the pending positions or to
- // program arguments by default.
- //
- auto add_word = [&c, &p, &mod, &check_regex_mod, this] (
- string&& w, const location& l)
- {
- auto add_merge = [&l, this] (redirect& r, const string& w, int fd)
- {
- try
- {
- size_t n;
- if (stoi (w, &n) == fd && n == w.size ())
- {
- r.fd = fd;
- return;
- }
- }
- catch (const exception&) {} // Fall through.
-
- fail (l) << (fd == 1 ? "stderr" : "stdout") << " merge redirect "
- << "file descriptor must be " << fd;
- };
-
- auto add_here_str = [] (redirect& r, string&& w)
- {
- if (r.modifiers.find (':') == string::npos)
- w += '\n';
- r.str = move (w);
- };
-
- auto add_here_str_regex = [&l, &check_regex_mod] (
- redirect& r, int fd, string&& w)
- {
- const char* what (nullptr);
- switch (fd)
- {
- case 1: what = "stdout regex redirect"; break;
- case 2: what = "stderr regex redirect"; break;
- }
-
- check_regex_mod (r.modifiers, w, l, what);
-
- regex_parts rp (parse_regex (w, l, what));
-
- regex_lines& re (r.regex);
- re.intro = rp.intro;
-
- re.lines.emplace_back (
- l.line, l.column, move (rp.value), move (rp.flags));
-
- // Add final blank line unless suppressed.
- //
- // Note that the position is synthetic, but that's ok as we don't
- // expect any diagnostics to refer this line.
- //
- if (r.modifiers.find (':') == string::npos)
- re.lines.emplace_back (l.line, l.column, string (), false);
- };
-
- auto parse_path = [&l, this] (string&& w, const char* what) -> path
- {
- try
- {
- path p (move (w));
-
- if (!p.empty ())
- {
- p.normalize ();
- return p;
- }
-
- fail (l) << "empty " << what << endf;
- }
- catch (const invalid_path& e)
- {
- fail (l) << "invalid " << what << " '" << e.path << "'" << endf;
- }
- };
-
- auto add_file = [&parse_path] (redirect& r, int fd, string&& w)
- {
- const char* what (nullptr);
- switch (fd)
- {
- case 0: what = "stdin redirect path"; break;
- case 1: what = "stdout redirect path"; break;
- case 2: what = "stderr redirect path"; break;
- }
-
- r.file.path = parse_path (move (w), what);
- };
-
- switch (p)
- {
- case pending::none: c.arguments.push_back (move (w)); break;
- case pending::program:
- c.program = parse_path (move (w), "program path");
- break;
-
- case pending::out_merge: add_merge (c.out, w, 2); break;
- case pending::err_merge: add_merge (c.err, w, 1); break;
-
- case pending::in_string: add_here_str (c.in, move (w)); break;
- case pending::out_string: add_here_str (c.out, move (w)); break;
- case pending::err_string: add_here_str (c.err, move (w)); break;
-
- case pending::out_str_regex:
- {
- add_here_str_regex (c.out, 1, move (w));
- break;
- }
- case pending::err_str_regex:
- {
- add_here_str_regex (c.err, 2, move (w));
- break;
- }
-
- // These are handled specially below.
- //
- case pending::in_document:
- case pending::out_document:
- case pending::err_document:
- case pending::out_doc_regex:
- case pending::err_doc_regex: assert (false); break;
-
- case pending::in_file: add_file (c.in, 0, move (w)); break;
- case pending::out_file: add_file (c.out, 1, move (w)); break;
- case pending::err_file: add_file (c.err, 2, move (w)); break;
-
- case pending::clean:
- {
- cleanup_type t;
- switch (mod[0]) // Ok, if empty
- {
- case '!': t = cleanup_type::never; break;
- case '?': t = cleanup_type::maybe; break;
- default: t = cleanup_type::always; break;
- }
-
- c.cleanups.push_back (
- {t, parse_path (move (w), "cleanup path")});
- break;
- }
- }
-
- p = pending::none;
- mod.clear ();
- };
-
- // Make sure we don't have any pending positions to fill.
- //
- auto check_pending = [&p, this] (const location& l)
- {
- const char* what (nullptr);
-
- switch (p)
- {
- case pending::none: break;
- case pending::program: what = "program"; break;
- case pending::in_string: what = "stdin here-string"; break;
- case pending::in_document: what = "stdin here-document end"; break;
- case pending::in_file: what = "stdin file"; break;
- case pending::out_merge: what = "stdout file descriptor"; break;
- case pending::out_string: what = "stdout here-string"; break;
- case pending::out_document: what = "stdout here-document end"; break;
- case pending::out_file: what = "stdout file"; break;
- case pending::err_merge: what = "stderr file descriptor"; break;
- case pending::err_string: what = "stderr here-string"; break;
- case pending::err_document: what = "stderr here-document end"; break;
- case pending::err_file: what = "stderr file"; break;
- case pending::clean: what = "cleanup path"; break;
-
- case pending::out_str_regex:
- {
- what = "stdout here-string regex";
- break;
- }
- case pending::err_str_regex:
- {
- what = "stderr here-string regex";
- break;
- }
- case pending::out_doc_regex:
- {
- what = "stdout here-document regex end";
- break;
- }
- case pending::err_doc_regex:
- {
- what = "stderr here-document regex end";
- break;
- }
- }
-
- if (what != nullptr)
- fail (l) << "missing " << what;
- };
-
- // Parse the redirect operator.
- //
- auto parse_redirect =
- [&c, &expr, &p, &mod, &hd, this] (token& t, const location& l)
- {
- // Our semantics is the last redirect seen takes effect.
- //
- assert (p == pending::none && mod.empty ());
-
- // See if we have the file descriptor.
- //
- unsigned long fd (3);
- if (!t.separated)
- {
- if (c.arguments.empty ())
- fail (l) << "missing redirect file descriptor";
-
- const string& s (c.arguments.back ());
-
- try
- {
- size_t n;
- fd = stoul (s, &n);
-
- if (n != s.size () || fd > 2)
- throw invalid_argument (string ());
- }
- catch (const exception&)
- {
- fail (l) << "invalid redirect file descriptor '" << s << "'";
- }
-
- c.arguments.pop_back ();
- }
-
- type tt (t.type);
-
- // Validate/set default file descriptor.
- //
- switch (tt)
- {
- case type::in_pass:
- case type::in_null:
- case type::in_str:
- case type::in_doc:
- case type::in_file:
- {
- if ((fd = fd == 3 ? 0 : fd) != 0)
- fail (l) << "invalid in redirect file descriptor " << fd;
-
- if (!expr.back ().pipe.empty ())
- fail (l) << "stdin is both piped and redirected";
-
- break;
- }
- case type::out_pass:
- case type::out_null:
- case type::out_trace:
- case type::out_merge:
- case type::out_str:
- case type::out_doc:
- case type::out_file_cmp:
- case type::out_file_ovr:
- case type::out_file_app:
- {
- if ((fd = fd == 3 ? 1 : fd) == 0)
- fail (l) << "invalid out redirect file descriptor " << fd;
-
- break;
- }
- }
-
- mod = move (t.value);
-
- redirect_type rt (redirect_type::none);
- switch (tt)
- {
- case type::in_pass:
- case type::out_pass: rt = redirect_type::pass; break;
-
- case type::in_null:
- case type::out_null: rt = redirect_type::null; break;
-
- case type::out_trace: rt = redirect_type::trace; break;
-
- case type::out_merge: rt = redirect_type::merge; break;
-
- case type::in_str:
- case type::out_str:
- {
- bool re (mod.find ('~') != string::npos);
- assert (tt == type::out_str || !re);
-
- rt = re
- ? redirect_type::here_str_regex
- : redirect_type::here_str_literal;
-
- break;
- }
-
- case type::in_doc:
- case type::out_doc:
- {
- bool re (mod.find ('~') != string::npos);
- assert (tt == type::out_doc || !re);
-
- rt = re
- ? redirect_type::here_doc_regex
- : redirect_type::here_doc_literal;
-
- break;
- }
-
- case type::in_file:
- case type::out_file_cmp:
- case type::out_file_ovr:
- case type::out_file_app: rt = redirect_type::file; break;
- }
-
- redirect& r (fd == 0 ? c.in : fd == 1 ? c.out : c.err);
- redirect_type overriden (r.type);
-
- r = redirect (rt);
-
- // Don't move as still may be used for pending here-document end
- // marker processing.
- //
- r.modifiers = mod;
-
- switch (rt)
- {
- case redirect_type::none:
- case redirect_type::pass:
- case redirect_type::null:
- case redirect_type::trace:
- break;
- case redirect_type::merge:
- switch (fd)
- {
- case 0: assert (false); break;
- case 1: p = pending::out_merge; break;
- case 2: p = pending::err_merge; break;
- }
- break;
- case redirect_type::here_str_literal:
- switch (fd)
- {
- case 0: p = pending::in_string; break;
- case 1: p = pending::out_string; break;
- case 2: p = pending::err_string; break;
- }
- break;
- case redirect_type::here_str_regex:
- switch (fd)
- {
- case 0: assert (false); break;
- case 1: p = pending::out_str_regex; break;
- case 2: p = pending::err_str_regex; break;
- }
- break;
- case redirect_type::here_doc_literal:
- switch (fd)
- {
- case 0: p = pending::in_document; break;
- case 1: p = pending::out_document; break;
- case 2: p = pending::err_document; break;
- }
- break;
- case redirect_type::here_doc_regex:
- switch (fd)
- {
- case 0: assert (false); break;
- case 1: p = pending::out_doc_regex; break;
- case 2: p = pending::err_doc_regex; break;
- }
- break;
- case redirect_type::file:
- switch (fd)
- {
- case 0: p = pending::in_file; break;
- case 1: p = pending::out_file; break;
- case 2: p = pending::err_file; break;
- }
-
- // Also sets for stdin, but this is harmless.
- //
- r.file.mode = tt == type::out_file_ovr
- ? redirect_fmode::overwrite
- : (tt == type::out_file_app
- ? redirect_fmode::append
- : redirect_fmode::compare);
-
- break;
-
- case redirect_type::here_doc_ref: assert (false); break;
- }
-
- // If we are overriding a here-document, then remove the reference
- // to this command redirect from the corresponding here_doc object.
- //
- if (!pre_parse_ &&
- (overriden == redirect_type::here_doc_literal ||
- overriden == redirect_type::here_doc_regex))
- {
- size_t e (expr.size () - 1);
- size_t p (expr.back ().pipe.size ());
- int f (static_cast<int> (fd));
-
- for (here_doc& d: hd)
- {
- small_vector<here_redirect, 2>& rs (d.redirects);
-
- auto i (find_if (rs.begin (), rs.end (),
- [e, p, f] (const here_redirect& r)
- {
- return r.expr == e &&
- r.pipe == p &&
- r.fd == f;
- }));
-
- if (i != rs.end ())
- {
- rs.erase (i);
- break;
- }
- }
- }
- };
-
- // Set pending cleanup type.
- //
- auto parse_clean = [&p, &mod] (token& t)
- {
- p = pending::clean;
- mod = move (t.value);
- };
-
- const location ll (get_location (t)); // Line location.
-
- // Keep parsing chunks of the command line until we see one of the
- // "terminators" (newline, semicolon, exit status comparison, etc).
- //
- location l (ll);
- names ns; // Reuse to reduce allocations.
-
- for (bool done (false); !done; l = get_location (t))
- {
- switch (tt)
- {
- case type::semi:
- case type::colon:
- case type::newline:
- {
- done = true;
- break;
- }
-
- case type::equal:
- case type::not_equal:
- {
- if (!pre_parse_)
- check_pending (l);
-
- c.exit = parse_command_exit (t, tt);
-
- // Only a limited set of things can appear after the exit status
- // so we check this here.
- //
- switch (tt)
- {
- case type::semi:
- case type::colon:
- case type::newline:
-
- case type::pipe:
- case type::log_or:
- case type::log_and:
- break;
- default:
- fail (t) << "unexpected " << t << " after command exit status";
- }
-
- break;
- }
-
- case type::pipe:
- case type::log_or:
- case type::log_and:
-
- case type::in_pass:
- case type::out_pass:
-
- case type::in_null:
- case type::out_null:
-
- case type::out_trace:
-
- case type::out_merge:
-
- case type::in_str:
- case type::in_doc:
- case type::out_str:
- case type::out_doc:
-
- case type::in_file:
- case type::out_file_cmp:
- case type::out_file_ovr:
- case type::out_file_app:
-
- case type::clean:
- {
- if (pre_parse_)
- {
- // The only things we need to handle here are the here-document
- // and here-document regex end markers since we need to know
- // how many of them to pre-parse after the command.
- //
- switch (tt)
- {
- case type::in_doc:
- case type::out_doc:
- mod = move (t.value);
-
- bool re (mod.find ('~') != string::npos);
- const char* what (re
- ? "here-document regex end marker"
- : "here-document end marker");
-
- // We require the end marker to be a literal, unquoted word.
- // In particularm, we don't allow quoted because of cases
- // like foo"$bar" (where we will see word 'foo').
- //
- next (t, tt);
-
- // We require the end marker to be an unquoted or completely
- // quoted word. The complete quoting becomes important for
- // cases like foo"$bar" (where we will see word 'foo').
- //
- // For good measure we could have also required it to be
- // separated from the following token, but out grammar
- // allows one to write >>EOO;. The problematic sequence
- // would be >>FOO$bar -- on reparse it will be expanded
- // as a single word.
- //
- if (tt != type::word || t.value.empty ())
- fail (t) << "expected " << what;
-
- peek ();
- const token& p (peeked ());
- if (!p.separated)
- {
- switch (p.type)
- {
- case type::dollar:
- case type::lparen:
- fail (p) << what << " must be literal";
- }
- }
-
- quote_type qt (t.qtype);
- switch (qt)
- {
- case quote_type::unquoted:
- qt = quote_type::single; // Treat as single-quoted.
- break;
- case quote_type::single:
- case quote_type::double_:
- if (t.qcomp)
- break;
- // Fall through.
- case quote_type::mixed:
- fail (t) << "partially-quoted " << what;
- }
-
- regex_parts r;
- string end (move (t.value));
-
- if (re)
- {
- check_regex_mod (mod, end, l, what);
-
- r = parse_regex (end, l, what);
- end = move (r.value); // The "cleared" end marker.
- }
-
- bool literal (qt == quote_type::single);
- bool shared (false);
-
- for (const auto& d: hd)
- {
- if (d.end == end)
- {
- auto check = [&t, &end, &re, this] (bool c,
- const char* what)
- {
- if (!c)
- fail (t) << "different " << what
- << " for shared here-document "
- << (re ? "regex '" : "'") << end << "'";
- };
-
- check (d.modifiers == mod, "modifiers");
- check (d.literal == literal, "quoting");
-
- if (re)
- {
- check (d.regex == r.intro, "introducers");
- check (d.regex_flags == r.flags, "global flags");
- }
-
- shared = true;
- break;
- }
- }
-
- if (!shared)
- hd.push_back (
- here_doc {
- {},
- move (end),
- literal,
- move (mod),
- r.intro, move (r.flags)});
-
- break;
- }
-
- next (t, tt);
- break;
- }
-
- // If this is one of the operators/separators, check that we
- // don't have any pending locations to be filled.
- //
- check_pending (l);
-
- // Note: there is another one in the inner loop below.
- //
- switch (tt)
- {
- case type::pipe:
- case type::log_or:
- case type::log_and:
- {
- // Check that the previous command makes sense.
- //
- check_command (l, tt != type::pipe);
- expr.back ().pipe.push_back (move (c));
-
- c = command ();
- p = pending::program;
-
- if (tt != type::pipe)
- {
- expr_operator o (tt == type::log_or
- ? expr_operator::log_or
- : expr_operator::log_and);
- expr.push_back ({o, command_pipe ()});
- }
-
- break;
- }
-
- case type::in_pass:
- case type::out_pass:
-
- case type::in_null:
- case type::out_null:
-
- case type::out_trace:
-
- case type::out_merge:
-
- case type::in_str:
- case type::in_doc:
- case type::out_str:
- case type::out_doc:
-
- case type::in_file:
- case type::out_file_cmp:
- case type::out_file_ovr:
- case type::out_file_app:
- {
- parse_redirect (t, l);
- break;
- }
-
- case type::clean:
- {
- parse_clean (t);
- break;
- }
-
- default: assert (false); break;
- }
-
- next (t, tt);
- break;
- }
- default:
- {
- // Here-document end markers are literal (we verified that above
- // during pre-parsing) and we need to know whether they were
- // quoted. So handle this case specially.
- //
- {
- int fd;
- switch (p)
- {
- case pending::in_document: fd = 0; break;
- case pending::out_document:
- case pending::out_doc_regex: fd = 1; break;
- case pending::err_document:
- case pending::err_doc_regex: fd = 2; break;
- default: fd = -1; break;
- }
-
- if (fd != -1)
- {
- here_redirect rd {
- expr.size () - 1, expr.back ().pipe.size (), fd};
-
- string end (move (t.value));
-
- regex_parts r;
-
- if (p == pending::out_doc_regex ||
- p == pending::err_doc_regex)
- {
- // We can't fail here as we already parsed all the end
- // markers during pre-parsing stage, and so no need in the
- // description.
- //
- r = parse_regex (end, l, "");
- end = move (r.value); // The "cleared" end marker.
- }
-
- bool shared (false);
- for (auto& d: hd)
- {
- // No need to check that redirects that share here-document
- // have the same modifiers, etc. That have been done during
- // pre-parsing.
- //
- if (d.end == end)
- {
- d.redirects.emplace_back (rd);
- shared = true;
- break;
- }
- }
-
- if (!shared)
- hd.push_back (
- here_doc {
- {rd},
- move (end),
- (t.qtype == quote_type::unquoted ||
- t.qtype == quote_type::single),
- move (mod),
- r.intro, move (r.flags)});
-
- p = pending::none;
- mod.clear ();
-
- next (t, tt);
- break;
- }
- }
-
- // Parse the next chunk as simple names to get expansion, etc.
- // Note that we do it in the chunking mode to detect whether
- // anything in each chunk is quoted.
- //
- // @@ PAT: should we support pattern expansion? This is even
- // fuzzier than the variable case above. Though this is the
- // shell semantics. Think what happens when we do rm *.txt?
- //
- reset_quoted (t);
- parse_names (t, tt,
- ns,
- pattern_mode::ignore,
- true,
- "command line",
- nullptr);
-
- if (pre_parse_) // Nothing else to do if we are pre-parsing.
- break;
-
- // Process what we got. Determine whether anything inside was
- // quoted (note that the current token is "next" and is not part
- // of this).
- //
- bool q ((quoted () -
- (t.qtype != quote_type::unquoted ? 1 : 0)) != 0);
-
- for (name& n: ns)
- {
- string s;
-
- try
- {
- s = value_traits<string>::convert (move (n), nullptr);
- }
- catch (const invalid_argument&)
- {
- diag_record dr (fail (l));
- dr << "invalid string value ";
- to_stream (dr.os, n, true); // Quote.
- }
-
- // If it is a quoted chunk, then we add the word as is.
- // Otherwise we re-lex it. But if the word doesn't contain any
- // interesting characters (operators plus quotes/escapes),
- // then no need to re-lex.
- //
- // NOTE: update quoting (script.cxx:to_stream_q()) if adding
- // any new characters.
- //
- if (q || s.find_first_of ("|&<>\'\"\\") == string::npos)
- add_word (move (s), l);
- else
- {
- // If the chunk re-parsing results in error, our diagnostics
- // will look like this:
- //
- // <string>:1:4: error: stdout merge redirect file descriptor must be 2
- // testscript:2:5: info: while parsing string '1>&a'
- //
- auto df = make_diag_frame (
- [s, &l](const diag_record& dr)
- {
- dr << info (l) << "while parsing string '" << s << "'";
- });
-
- // When re-lexing we do "effective escaping" and only for
- // ['"\] (quotes plus the backslash itself). In particular,
- // there is no way to escape redirects, operators, etc. The
- // idea is to prefer quoting except for passing literal
- // quotes, for example:
- //
- // args = \"&foo\"
- // cmd $args # cmd &foo
- //
- // args = 'x=\"foo bar\"'
- // cmd $args # cmd x="foo bar"
- //
-
- istringstream is (s);
- path_name in ("<string>");
- lexer lex (is, in,
- lexer_mode::command_expansion,
- "\'\"\\");
-
- // Treat the first "sub-token" as always separated from what
- // we saw earlier.
- //
- // Note that this is not "our" token so we cannot do
- // fail(t). Rather we should do fail(l).
- //
- token t (lex.next ());
- location l (build2::get_location (t, in));
- t.separated = true;
-
- string w;
- bool f (t.type == type::eos); // If the whole thing is empty.
-
- for (; t.type != type::eos; t = lex.next ())
- {
- type tt (t.type);
- l = build2::get_location (t, in);
-
- // Re-lexing double-quotes will recognize $, ( inside as
- // tokens so we have to reverse them back. Since we don't
- // treat spaces as separators we can be sure we will get
- // it right.
- //
- switch (tt)
- {
- case type::dollar: w += '$'; continue;
- case type::lparen: w += '('; continue;
- }
-
- // Retire the current word. We need to distinguish between
- // empty and non-existent (e.g., > vs >"").
- //
- if (!w.empty () || f)
- {
- add_word (move (w), l);
- f = false;
- }
-
- if (tt == type::word)
- {
- w = move (t.value);
- f = true;
- continue;
- }
-
- // If this is one of the operators/separators, check that
- // we don't have any pending locations to be filled.
- //
- check_pending (l);
-
- // Note: there is another one in the outer loop above.
- //
- switch (tt)
- {
- case type::pipe:
- case type::log_or:
- case type::log_and:
- {
- // Check that the previous command makes sense.
- //
- check_command (l, tt != type::pipe);
- expr.back ().pipe.push_back (move (c));
-
- c = command ();
- p = pending::program;
-
- if (tt != type::pipe)
- {
- expr_operator o (tt == type::log_or
- ? expr_operator::log_or
- : expr_operator::log_and);
- expr.push_back ({o, command_pipe ()});
- }
-
- break;
- }
-
- case type::in_pass:
- case type::out_pass:
-
- case type::in_null:
- case type::out_null:
-
- case type::out_trace:
-
- case type::out_merge:
-
- case type::in_str:
- case type::out_str:
-
- case type::in_file:
- case type::out_file_cmp:
- case type::out_file_ovr:
- case type::out_file_app:
- {
- parse_redirect (t, l);
- break;
- }
-
- case type::clean:
- {
- parse_clean (t);
- break;
- }
-
- case type::in_doc:
- case type::out_doc:
- {
- fail (l) << "here-document redirect in expansion";
- break;
- }
- }
- }
-
- // Don't forget the last word.
- //
- if (!w.empty () || f)
- add_word (move (w), l);
- }
- }
-
- ns.clear ();
- break;
- }
- }
- }
-
- if (!pre_parse_)
- {
- // Verify we don't have anything pending to be filled and the
- // command makes sense.
- //
- check_pending (l);
- check_command (l, true);
-
- expr.back ().pipe.push_back (move (c));
- }
-
- return make_pair (move (expr), move (hd));
- }
-
- command_exit parser::
- parse_command_exit (token& t, type& tt)
- {
- // enter: equal/not_equal
- // leave: token after exit status (one parse_names() chunk)
-
- exit_comparison comp (tt == type::equal
- ? exit_comparison::eq
- : exit_comparison::ne);
-
- // The next chunk should be the exit status.
- //
- next (t, tt);
- location l (get_location (t));
- names ns (parse_names (t, tt,
- pattern_mode::ignore,
- true,
- "exit status",
- nullptr));
- unsigned long es (256);
-
- if (!pre_parse_)
- {
- try
- {
- if (ns.size () == 1 && ns[0].simple () && !ns[0].empty ())
- es = stoul (ns[0].value);
- }
- catch (const exception&) {} // Fall through.
-
- if (es > 255)
- {
- diag_record dr;
-
- dr << fail (l) << "expected exit status instead of ";
- to_stream (dr.os, ns, true); // Quote.
-
- dr << info << "exit status is an unsigned integer less than 256";
- }
- }
-
- return command_exit {comp, static_cast<uint8_t> (es)};
- }
-
- void parser::
- parse_here_documents (token& t, type& tt,
- pair<command_expr, here_docs>& p)
- {
- // enter: newline
- // leave: newline
-
- // Parse here-document fragments in the order they were mentioned on
- // the command line.
- //
- for (here_doc& h: p.second)
- {
- // Switch to the here-line mode which is like single/double-quoted
- // string but recognized the newline as a separator.
- //
- mode (h.literal
- ? lexer_mode::here_line_single
- : lexer_mode::here_line_double);
- next (t, tt);
-
- parsed_doc v (
- parse_here_document (t, tt, h.end, h.modifiers, h.regex));
-
- // If all the here-document redirects are overridden, then we just
- // drop the fragment.
- //
- if (!pre_parse_ && !h.redirects.empty ())
- {
- auto i (h.redirects.cbegin ());
-
- command& c (p.first[i->expr].pipe[i->pipe]);
- redirect& r (i->fd == 0 ? c.in : i->fd == 1 ? c.out : c.err);
-
- if (v.re)
- {
- assert (r.type == redirect_type::here_doc_regex);
-
- r.regex = move (v.regex);
- r.regex.flags = move (h.regex_flags);
- }
- else
- {
- assert (r.type == redirect_type::here_doc_literal);
-
- r.str = move (v.str);
- }
-
- r.end = move (h.end);
- r.end_line = v.end_line;
- r.end_column = v.end_column;
-
- // Note that our references cannot be invalidated because the
- // command_expr/command-pipe vectors already contain all their
- // elements.
- //
- for (++i; i != h.redirects.cend (); ++i)
- {
- command& c (p.first[i->expr].pipe[i->pipe]);
-
- (i->fd == 0 ? c.in : i->fd == 1 ? c.out : c.err) =
- redirect (redirect_type::here_doc_ref, r);
- }
- }
-
- expire_mode ();
- }
- }
-
- parser::parsed_doc parser::
- parse_here_document (token& t, type& tt,
- const string& em,
- const string& mod,
- char re)
- {
- // enter: first token on first line
- // leave: newline (after end marker)
-
- // String literal. Note that when decide if to terminate the previously
- // added line with a newline, we need to distinguish a yet empty result
- // and the one that has a single blank line added.
- //
- optional<string> rs;
-
- regex_lines rre;
-
- // Here-documents can be indented. The leading whitespaces of the end
- // marker line (called strip prefix) determine the indentation. Every
- // other line in the here-document should start with this prefix which
- // is automatically stripped. The only exception is a blank line.
- //
- // The fact that the strip prefix is only known at the end, after
- // seeing all the lines, is rather inconvenient. As a result, the way
- // we implement this is a bit hackish (though there is also something
- // elegant about it): at the end of the pre-parse stage we are going
- // re-examine the sequence of tokens that comprise this here-document
- // and "fix up" the first token of each line by stripping the prefix.
- //
- string sp;
-
- // Remember the position of the first token in this here-document.
- //
- size_t ri (pre_parse_ ? replay_data_.size () - 1 : 0);
-
- // We will use the location of the first token on the line for the
- // regex diagnostics. At the end of the loop it will point to the
- // beginning of the end marker.
- //
- location l;
-
- while (tt != type::eos)
- {
- l = get_location (t);
-
- // Check if this is the end marker. For starters, it should be a
- // single, unquoted word followed by a newline.
- //
- if (tt == type::word &&
- t.qtype == quote_type::unquoted &&
- peek () == type::newline)
- {
- const string& v (t.value);
-
- size_t vn (v.size ());
- size_t en (em.size ());
-
- // Then check that it ends with the end marker.
- //
- if (vn >= en && v.compare (vn - en, en, em) == 0)
- {
- // Now check that the prefix only contains whitespaces.
- //
- size_t n (vn - en);
-
- if (v.find_first_not_of (" \t") >= n)
- {
- assert (pre_parse_ || n == 0); // Should have been stripped.
-
- if (n != 0)
- sp.assign (v, 0, n); // Save the strip prefix.
-
- next (t, tt); // Get the newline.
- break;
- }
- }
- }
-
- // Expand the line (can be blank).
- //
- // @@ PAT: one could argue that if we do it in variables, then we
- // should do it here as well. Though feels bizarre.
- //
- names ns (tt != type::newline
- ? parse_names (t, tt,
- pattern_mode::ignore,
- false,
- "here-document line",
- nullptr)
- : names ());
-
- if (!pre_parse_)
- {
- // What shall we do if the expansion results in multiple names?
- // For, example if the line contains just the variable expansion
- // and it is of type strings. Adding all the elements space-
- // separated seems like the natural thing to do.
- //
- string s;
- for (auto b (ns.begin ()), i (b); i != ns.end (); ++i)
- {
- string n;
-
- try
- {
- n = value_traits<string>::convert (move (*i), nullptr);
- }
- catch (const invalid_argument&)
- {
- fail (l) << "invalid string value '" << *i << "'";
- }
-
- if (i == b)
- s = move (n);
- else
- {
- s += ' ';
- s += n;
- }
- }
-
- if (!re)
- {
- // Add newline after previous line.
- //
- if (rs)
- {
- *rs += '\n';
- *rs += s;
- }
- else
- rs = move (s);
- }
- else
- {
- // Due to expansion we can end up with multiple lines. If empty
- // then will add a blank textual literal.
- //
- for (size_t p (0); p != string::npos; )
- {
- string ln;
- size_t np (s.find ('\n', p));
-
- if (np != string::npos)
- {
- ln = string (s, p, np - p);
- p = np + 1;
- }
- else
- {
- ln = string (s, p);
- p = np;
- }
-
- if (ln[0] != re) // Line doesn't start with regex introducer.
- {
- // This is a line-char literal (covers blank lines as well).
- //
- // Append textual literal.
- //
- rre.lines.emplace_back (l.line, l.column, move (ln), false);
- }
- else // Line starts with the regex introducer.
- {
- // This is a char-regex, or a sequence of line-regex syntax
- // characters or both (in this specific order). So we will
- // add regex (with optional special characters) or special
- // literal.
- //
- size_t p (ln.find (re, 1));
- if (p == string::npos)
- {
- // No regex, just a sequence of syntax characters.
- //
- string spec (ln, 1);
- if (spec.empty ())
- fail (l) << "no syntax line characters";
-
- // Append special literal.
- //
- rre.lines.emplace_back (
- l.line, l.column, move (spec), true);
- }
- else
- {
- // Regex (probably with syntax characters).
- //
- regex_parts re;
-
- // Empty regex is a special case repesenting a blank line.
- //
- if (p == 1)
- // Position to optional specal characters of an empty
- // regex.
- //
- ++p;
- else
- // Can't fail as all the pre-conditions verified
- // (non-empty with both introducers in place), so no
- // description required.
- //
- re = parse_regex (ln, l, "", &p);
-
- // Append regex with optional special characters.
- //
- rre.lines.emplace_back (l.line, l.column,
- move (re.value), move (re.flags),
- string (ln, p));
- }
- }
- }
- }
- }
-
- // We should expand the whole line at once so this would normally be
- // a newline but can also be an end-of-stream.
- //
- if (tt == type::newline)
- next (t, tt);
- else
- assert (tt == type::eos);
- }
-
- if (tt == type::eos)
- fail (t) << "missing here-document end marker '" << em << "'";
-
- if (pre_parse_)
- {
- // Strip the indentation prefix if there is one.
- //
- assert (replay_ == replay::save);
-
- if (!sp.empty ())
- {
- size_t sn (sp.size ());
-
- for (; ri != replay_data_.size (); ++ri)
- {
- token& rt (replay_data_[ri].token);
-
- if (rt.type == type::newline) // Blank
- continue;
-
- if (rt.type != type::word || rt.value.compare (0, sn, sp) != 0)
- fail (rt) << "unindented here-document line";
-
- // If the word is equal to the strip prefix then we have to drop
- // the token. Note that simply making it an empty word won't
- // have the same semantics. For instance, it would trigger
- // concatenated expansion.
- //
- if (rt.value.size () == sn)
- replay_data_.erase (replay_data_.begin () + ri);
- else
- {
- rt.value.erase (0, sn);
- rt.column += sn;
- ++ri;
- }
-
- // Skip until next newline.
- //
- for (; replay_data_[ri].token.type != type::newline; ++ri) ;
- }
- }
- }
- else
- {
- // Add final newline unless suppressed.
- //
- if (mod.find (':') == string::npos)
- {
- if (re)
- // Note that the position is synthetic, but that's ok as we don't
- // expect any diagnostics to refer this line.
- //
- rre.lines.emplace_back (l.line, l.column, string (), false);
- else if (rs)
- *rs += '\n';
- else
- rs = "\n";
- }
-
- // Finalize regex lines.
- //
- if (re)
- {
- // Empty regex matches nothing, so not of much use.
- //
- if (rre.lines.empty ())
- fail (l) << "empty here-document regex";
-
- rre.intro = re;
- }
- }
-
- return re
- ? parsed_doc (move (rre), l.line, l.column)
- : parsed_doc (rs ? move (*rs) : string (), l.line, l.column);
- }
-
//
// Execute.
//
@@ -2927,20 +1322,99 @@ namespace build2
void parser::
exec_scope_body ()
{
- size_t li (0);
-
runner_->enter (*scope_, scope_->start_loc_);
+ // Note that we rely on "small function object" optimization for the
+ // exec_*() lambdas.
+ //
+ auto exec_set = [this] (const variable& var,
+ token& t, build2::script::token_type& tt,
+ const location&)
+ {
+ next (t, tt);
+ type kind (tt); // Assignment kind.
+
+ // We cannot reuse the value mode (see above for details).
+ //
+ mode (lexer_mode::variable_line);
+ value rhs (parse_variable_line (t, tt));
+
+ if (tt == type::semi)
+ next (t, tt);
+
+ assert (tt == type::newline);
+
+ // Assign.
+ //
+ value& lhs (kind == type::assign
+ ? scope_->assign (var)
+ : scope_->append (var));
+
+ apply_value_attributes (&var, lhs, move (rhs), kind);
+
+ // If we change any of the test.* values, then reset the $*, $N
+ // special aliases.
+ //
+ if (var.name == script_->test_var.name ||
+ var.name == script_->options_var.name ||
+ var.name == script_->arguments_var.name ||
+ var.name == script_->redirects_var.name ||
+ var.name == script_->cleanups_var.name)
+ {
+ scope_->reset_special ();
+ }
+ };
+
+ // Is set later, right before the exec_lines() call.
+ //
+ command_type ct;
+
+ auto exec_cmd = [&ct, this] (token& t, build2::script::token_type& tt,
+ size_t li,
+ bool single,
+ const location& ll)
+ {
+ // We use the 0 index to signal that this is the only command.
+ // Note that we only do this for test commands.
+ //
+ if (ct == command_type::test && single)
+ li = 0;
+
+ command_expr ce (
+ parse_command_line (t, static_cast<token_type&> (tt)));
+
+ runner_->run (*scope_, ce, ct, li, ll);
+ };
+
+ auto exec_if = [this] (token& t, build2::script::token_type& tt,
+ size_t li,
+ const location& ll)
+ {
+ command_expr ce (
+ parse_command_line (t, static_cast<token_type&> (tt)));
+
+ // Assume if-else always involves multiple commands.
+ //
+ return runner_->run_if (*scope_, ce, li, ll);
+ };
+
+ size_t li (1);
+
if (test* t = dynamic_cast<test*> (scope_))
{
- exec_lines (
- t->tests_.begin (), t->tests_.end (), li, command_type::test);
+ ct = command_type::test;
+
+ exec_lines (t->tests_.begin (), t->tests_.end (),
+ exec_set, exec_cmd, exec_if,
+ li);
}
else if (group* g = dynamic_cast<group*> (scope_))
{
- bool exec_scope (
- exec_lines (
- g->setup_.begin (), g->setup_.end (), li, command_type::setup));
+ ct = command_type::setup;
+
+ bool exec_scope (exec_lines (g->setup_.begin (), g->setup_.end (),
+ exec_set, exec_cmd, exec_if,
+ li));
if (exec_scope)
{
@@ -2998,7 +1472,7 @@ namespace build2
try
{
- take = runner_->run_if (*scope_, ce, ++li, ll);
+ take = runner_->run_if (*scope_, ce, li++, ll);
}
catch (const exit_scope& e)
{
@@ -3106,8 +1580,11 @@ namespace build2
}
}
- exec_lines (
- g->tdown_.begin (), g->tdown_.end (), li, command_type::teardown);
+ ct = command_type::teardown;
+
+ exec_lines (g->tdown_.begin (), g->tdown_.end (),
+ exec_set, exec_cmd, exec_if,
+ li);
}
else
assert (false);
@@ -3117,239 +1594,23 @@ namespace build2
scope_->state = scope_state::passed;
}
- bool parser::
- exec_lines (lines::iterator i, lines::iterator e,
- size_t& li,
- command_type ct)
- {
- try
- {
- token t;
- type tt;
-
- for (; i != e; ++i)
- {
- line& ln (*i);
- line_type lt (ln.type);
-
- assert (path_ == nullptr);
-
- // Set the tokens and start playing.
- //
- replay_data (move (ln.tokens));
-
- // We don't really need to change the mode since we already know
- // the line type.
- //
- next (t, tt);
- const location ll (get_location (t));
-
- switch (lt)
- {
- case line_type::var:
- {
- // Parse.
- //
- string name (move (t.value));
-
- next (t, tt);
- type kind (tt); // Assignment kind.
-
- value rhs (parse_variable_line (t, tt));
-
- if (tt == type::semi)
- next (t, tt);
-
- assert (tt == type::newline);
-
- // Assign.
- //
- const variable& var (*ln.var);
-
- value& lhs (kind == type::assign
- ? scope_->assign (var)
- : scope_->append (var));
-
- build2::parser::apply_value_attributes (
- &var, lhs, move (rhs), kind);
-
- // If we changes any of the test.* values, then reset the $*,
- // $N special aliases.
- //
- if (var.name == script_->test_var.name ||
- var.name == script_->options_var.name ||
- var.name == script_->arguments_var.name ||
- var.name == script_->redirects_var.name ||
- var.name == script_->cleanups_var.name)
- {
- scope_->reset_special ();
- }
-
- replay_stop ();
- break;
- }
- case line_type::cmd:
- {
- // We use the 0 index to signal that this is the only command.
- // Note that we only do this for test commands.
- //
- if (ct == command_type::test && li == 0)
- {
- lines::iterator j (i);
- for (++j; j != e && j->type == line_type::var; ++j) ;
-
- if (j != e) // We have another command.
- ++li;
- }
- else
- ++li;
-
- command_expr ce (parse_command_line (t, tt));
- runner_->run (*scope_, ce, ct, li, ll);
-
- replay_stop ();
- break;
- }
- case line_type::cmd_if:
- case line_type::cmd_ifn:
- case line_type::cmd_elif:
- case line_type::cmd_elifn:
- case line_type::cmd_else:
- {
- next (t, tt); // Skip to start of command.
-
- bool take;
- if (lt != line_type::cmd_else)
- {
- // Assume if-else always involves multiple commands.
- //
- command_expr ce (parse_command_line (t, tt));
- take = runner_->run_if (*scope_, ce, ++li, ll);
-
- if (lt == line_type::cmd_ifn || lt == line_type::cmd_elifn)
- take = !take;
- }
- else
- {
- assert (tt == type::newline);
- take = true;
- }
-
- replay_stop ();
-
- // If end is true, then find the 'end' line. Otherwise, find
- // the next if-else line. If skip is true then increment the
- // command line index.
- //
- auto next = [e, &li]
- (lines::iterator j, bool end, bool skip) -> lines::iterator
- {
- // We need to be aware of nested if-else chains.
- //
- size_t n (0);
-
- for (++j; j != e; ++j)
- {
- line_type lt (j->type);
-
- if (lt == line_type::cmd_if ||
- lt == line_type::cmd_ifn)
- ++n;
-
- // If we are nested then we just wait until we get back
- // to the surface.
- //
- if (n == 0)
- {
- switch (lt)
- {
- case line_type::cmd_elif:
- case line_type::cmd_elifn:
- case line_type::cmd_else:
- if (end) break;
- // Fall through.
- case line_type::cmd_end: return j;
- default: break;
- }
- }
-
- if (lt == line_type::cmd_end)
- --n;
-
- if (skip)
- {
- // Note that we don't count else and end as commands.
- //
- switch (lt)
- {
- case line_type::cmd:
- case line_type::cmd_if:
- case line_type::cmd_ifn:
- case line_type::cmd_elif:
- case line_type::cmd_elifn: ++li; break;
- default: break;
- }
- }
- }
-
- assert (false); // Missing end.
- return e;
- };
-
- // If we are taking this branch then we need to parse all the
- // lines until the next if-else line and then skip all the
- // lines until the end (unless next is already end).
- //
- // Otherwise, we need to skip all the lines until the next
- // if-else line and then continue parsing.
- //
- if (take)
- {
- lines::iterator j (next (i, false, false)); // Next if-else.
- if (!exec_lines (i + 1, j, li, ct))
- return false;
-
- i = j->type == line_type::cmd_end ? j : next (j, true, true);
- }
- else
- {
- i = next (i, false, true);
- if (i->type != line_type::cmd_end)
- --i; // Continue with this line (e.g., elif or else).
- }
-
- break;
- }
- case line_type::cmd_end:
- {
- assert (false);
- }
- }
- }
-
- return true;
- }
- catch (const exit_scope& e)
- {
- // Bail out if the scope is exited with the failure status. Otherwise
- // leave the scope normally.
- //
- if (!e.status)
- throw failed ();
-
- replay_stop ();
- return false;
- }
- }
-
//
// The rest.
//
+ // When add a special variable don't forget to update lexer::word().
+ //
+ bool parser::
+ special_variable (const string& n) noexcept
+ {
+ return n == "*" || n == "~" || n == "@" || digit (n);
+ }
+
lookup parser::
lookup_variable (name&& qual, string&& name, const location& loc)
{
- assert (!pre_parse_);
+ if (pre_parse_)
+ return lookup ();
if (!qual.empty ())
fail (loc) << "qualified variable name";
@@ -3381,40 +1642,6 @@ namespace build2
: script_->lookup_in_buildfile (name);
}
- size_t parser::
- quoted () const
- {
- size_t r (0);
-
- if (replay_ != replay::play)
- r = lexer_->quoted ();
- else
- {
- // Examine tokens we have replayed since last reset.
- //
- for (size_t i (replay_quoted_); i != replay_i_; ++i)
- if (replay_data_[i].token.qtype != quote_type::unquoted)
- ++r;
- }
-
- return r;
- }
-
- void parser::
- reset_quoted (token& cur)
- {
- if (replay_ != replay::play)
- lexer_->reset_quoted (cur.qtype != quote_type::unquoted ? 1 : 0);
- else
- {
- replay_quoted_ = replay_i_ - 1;
-
- // Must be the same token.
- //
- assert (replay_data_[replay_quoted_].token.qtype == cur.qtype);
- }
- }
-
const string& parser::
insert_id (string id, location l)
{
@@ -3426,76 +1653,6 @@ namespace build2
return p.first->first;
}
-
- void parser::
- set_lexer (lexer* l)
- {
- lexer_ = l;
- base_parser::lexer_ = l;
- }
-
- void parser::
- apply_value_attributes (const variable* var,
- value& lhs,
- value&& rhs,
- const string& attributes,
- token_type kind,
- const path_name& name)
- {
- path_ = &name;
-
- istringstream is (attributes);
- lexer l (is, name, lexer_mode::attributes);
- set_lexer (&l);
-
- token t;
- type tt;
-
- next_with_attributes (t, tt); // Enable `[` recognition.
-
- if (tt != type::lsbrace && tt != type::eos)
- fail (t) << "expected '[' instead of " << t;
-
- attributes_push (t, tt, true);
-
- if (tt != type::eos)
- fail (t) << "trailing junk after ']'";
-
- build2::parser::apply_value_attributes (var, lhs, move (rhs), kind);
- }
-
- // parser::parsed_doc
- //
- parser::parsed_doc::
- parsed_doc (string s, uint64_t l, uint64_t c)
- : str (move (s)), re (false), end_line (l), end_column (c)
- {
- }
-
- parser::parsed_doc::
- parsed_doc (regex_lines&& r, uint64_t l, uint64_t c)
- : regex (move (r)), re (true), end_line (l), end_column (c)
- {
- }
-
- parser::parsed_doc::
- parsed_doc (parsed_doc&& d)
- : re (d.re), end_line (d.end_line), end_column (d.end_column)
- {
- if (re)
- new (&regex) regex_lines (move (d.regex));
- else
- new (&str) string (move (d.str));
- }
-
- parser::parsed_doc::
- ~parsed_doc ()
- {
- if (re)
- regex.~regex_lines ();
- else
- str.~string ();
- }
}
}
}
diff --git a/libbuild2/test/script/parser.hxx b/libbuild2/test/script/parser.hxx
index ed3c926..aa64943 100644
--- a/libbuild2/test/script/parser.hxx
+++ b/libbuild2/test/script/parser.hxx
@@ -8,9 +8,10 @@
#include <libbuild2/forward.hxx>
#include <libbuild2/utility.hxx>
-#include <libbuild2/parser.hxx>
#include <libbuild2/diagnostics.hxx>
+#include <libbuild2/script/parser.hxx>
+
#include <libbuild2/test/script/token.hxx>
#include <libbuild2/test/script/script.hxx>
@@ -20,15 +21,14 @@ namespace build2
{
namespace script
{
- class lexer;
class runner;
- class parser: protected build2::parser
+ class parser: public build2::script::parser
{
// Pre-parse. Issue diagnostics and throw failed in case of an error.
//
public:
- parser (context& c): build2::parser (c) {}
+ parser (context& c): build2::script::parser (c) {}
void
pre_parse (script&);
@@ -36,19 +36,6 @@ namespace build2
void
pre_parse (istream&, script&);
- // Helpers.
- //
- // Parse attribute string and perform attribute-guided assignment.
- // Issue diagnostics and throw failed in case of an error.
- //
- void
- apply_value_attributes (const variable*, // Optional.
- value& lhs,
- value&& rhs,
- const string& attributes,
- token_type assign_kind,
- const path_name&); // For diagnostics.
-
// Recursive descent parser.
//
// Usually (but not always) parse functions receive the token/type
@@ -101,83 +88,14 @@ namespace build2
description
parse_trailing_description (token&, token_type&);
- value
- parse_variable_line (token&, token_type&);
-
command_expr
parse_command_line (token&, token_type&);
- // Ordered sequence of here-document redirects that we can expect to
- // see after the command line.
- //
- struct here_redirect
- {
- size_t expr; // Index in command_expr.
- size_t pipe; // Index in command_pipe.
- int fd; // Redirect fd (0 - in, 1 - out, 2 - err).
- };
-
- struct here_doc
- {
- // Redirects that share here_doc. Most of the time we will have no
- // more than 2 (2 - for the roundtrip test cases). Doesn't refer
- // overridden redirects and thus can be empty.
- //
- small_vector<here_redirect, 2> redirects;
-
- string end;
- bool literal; // Literal (single-quote).
- string modifiers;
-
- // Regex introducer ('\0' if not a regex, so can be used as bool).
- //
- char regex;
-
- // Regex global flags. Meaningful if regex != '\0'.
- //
- string regex_flags;
- };
- using here_docs = vector<here_doc>;
-
- pair<command_expr, here_docs>
- parse_command_expr (token&, token_type&);
-
- command_exit
- parse_command_exit (token&, token_type&);
-
- void
- parse_here_documents (token&, token_type&,
- pair<command_expr, here_docs>&);
-
- struct parsed_doc
- {
- union
- {
- string str; // Here-document literal.
- regex_lines regex; // Here-document regex.
- };
-
- bool re; // True if regex.
- uint64_t end_line; // Here-document end marker location.
- uint64_t end_column;
-
- parsed_doc (string, uint64_t line, uint64_t column);
- parsed_doc (regex_lines&&, uint64_t line, uint64_t column);
- parsed_doc (parsed_doc&&); // Note: move constuctible-only type.
- ~parsed_doc ();
- };
-
- parsed_doc
- parse_here_document (token&, token_type&,
- const string&,
- const string& mode,
- char re_intro); // '\0' if not a regex.
-
// Execute. Issue diagnostics and throw failed in case of an error.
//
public:
void
- execute (script& s, runner& r);
+ execute (script&, runner&);
void
execute (scope&, script&, runner&);
@@ -186,13 +104,11 @@ namespace build2
void
exec_scope_body ();
- // Return false if the execution of the scope should be terminated
- // with the success status (e.g., as a result of encountering the exit
- // builtin). For unsuccessful termination the failed exception should
- // be thrown.
+ // Helpers.
//
- bool
- exec_lines (lines::iterator, lines::iterator, size_t&, command_type);
+ public:
+ static bool
+ special_variable (const string&) noexcept;
// Customization hooks.
//
@@ -200,33 +116,13 @@ namespace build2
virtual lookup
lookup_variable (name&&, string&&, const location&) override;
- // Number of quoted tokens since last reset. Note that this includes
- // the peeked token, if any.
- //
- protected:
- size_t
- quoted () const;
-
- void
- reset_quoted (token& current);
-
- size_t replay_quoted_;
-
// Insert id into the id map checking for duplicates.
//
protected:
const string&
insert_id (string, location);
- // Set lexer pointers for both the current and the base classes.
- //
- protected:
- void
- set_lexer (lexer* l);
-
protected:
- using base_parser = build2::parser;
-
script* script_;
// Pre-parse state.
@@ -238,7 +134,7 @@ namespace build2
id_map* id_map_;
include_set* include_set_; // Testscripts already included in this
// scope. Must be absolute and normalized.
- lexer* lexer_;
+
string id_prefix_; // Auto-derived id prefix.
// Execute state.
diff --git a/libbuild2/test/script/regex.cxx b/libbuild2/test/script/regex.cxx
deleted file mode 100644
index 92dd8f1..0000000
--- a/libbuild2/test/script/regex.cxx
+++ /dev/null
@@ -1,439 +0,0 @@
-// file : libbuild2/test/script/regex.cxx -*- C++ -*-
-// license : MIT; see accompanying LICENSE file
-
-#include <locale>
-
-#include <libbuild2/test/script/regex.hxx>
-
-using namespace std;
-
-namespace build2
-{
- namespace test
- {
- namespace script
- {
- namespace regex
- {
- static_assert (alignof (char_string) % 4 == 0,
- "unexpected char_string alignment");
-
- static_assert (alignof (char_regex) % 4 == 0,
- "unexpected char_regex alignment");
-
- static_assert (sizeof (uintptr_t) > sizeof (int16_t),
- "unexpected uintptr_t size");
-
- const line_char line_char::nul (0);
- const line_char line_char::eof (-1);
-
- // line_char
- //
- // We package the special character into uintptr_t with the following
- // steps:
- //
- // - narrow down int value to int16_t (preserves all the valid values)
- //
- // - convert to uint16_t (bitwise representation stays the same, but no
- // need to bother with signed value widening, leftmost bits loss on
- // left shift, etc)
- //
- // - convert to uintptr_t (storage type)
- //
- // - shift left by two bits (the operation is fully reversible as
- // uintptr_t is wider then uint16_t)
- //
- line_char::
- line_char (int c)
- : data_ (
- (static_cast <uintptr_t> (
- static_cast<uint16_t> (
- static_cast<int16_t> (c))) << 2) |
- static_cast <uintptr_t> (line_type::special))
- {
- // @@ How can we allow anything for basic_regex but only subset
- // for our own code?
- //
- const char ex[] = "pn\n\r";
-
- assert (c == 0 || // Null character.
-
- // EOF. Note that is also passed by msvcrt as _Meta_eos
- // enum value.
- //
- c == -1 ||
-
- // libstdc++ line/paragraph separators.
- //
- c == u'\u2028' || c == u'\u2029' ||
-
- (c > 0 && c <= 255 && (
- // Supported regex special characters.
- //
- syntax (c) ||
-
- // libstdc++ look-ahead tokens, newline chars.
- //
- string::traits_type::find (ex, 4, c) != nullptr)));
- }
-
- line_char::
- line_char (const char_string& s, line_pool& p)
- : line_char (&(*p.strings.emplace (s).first))
- {
- }
-
- line_char::
- line_char (char_string&& s, line_pool& p)
- : line_char (&(*p.strings.emplace (move (s)).first))
- {
- }
-
- line_char::
- line_char (char_regex r, line_pool& p)
- // Note: in C++17 can write as p.regexes.emplace_front(move (r))
- //
- : line_char (&(*p.regexes.emplace (p.regexes.begin (), move (r))))
- {
- }
-
- bool
- line_char::syntax (char c)
- {
- return string::traits_type::find (
- "()|.*+?{}\\0123456789,=!", 23, c) != nullptr;
- }
-
- bool
- operator== (const line_char& l, const line_char& r)
- {
- line_type lt (l.type ());
- line_type rt (r.type ());
-
- if (lt == rt)
- {
- bool res (true);
-
- switch (lt)
- {
- case line_type::special: res = l.special () == r.special (); break;
- case line_type::regex: assert (false); break;
-
- // Note that we use pointers (rather than vales) comparison
- // assuming that the strings must belong to the same pool.
- //
- case line_type::literal: res = l.literal () == r.literal (); break;
- }
-
- return res;
- }
-
- // Match literal with regex.
- //
- if (lt == line_type::literal && rt == line_type::regex)
- return regex_match (*l.literal (), *r.regex ());
- else if (rt == line_type::literal && lt == line_type::regex)
- return regex_match (*r.literal (), *l.regex ());
-
- return false;
- }
-
- bool
- operator< (const line_char& l, const line_char& r)
- {
- if (l == r)
- return false;
-
- line_type lt (l.type ());
- line_type rt (r.type ());
-
- if (lt != rt)
- return lt < rt;
-
- bool res (false);
-
- switch (lt)
- {
- case line_type::special: res = l.special () < r.special (); break;
- case line_type::literal: res = *l.literal () < *r.literal (); break;
- case line_type::regex: assert (false); break;
- }
-
- return res;
- }
-
- // line_char_locale
- //
-
- // An exemplar locale with the std::ctype<line_char> facet. It is used
- // for the subsequent line char locale objects creation (see below)
- // which normally ends up with a shallow copy of a reference-counted
- // object.
- //
- // Note that creating the line char locales from the exemplar is not
- // merely an optimization: there is a data race in the libstdc++ (at
- // least as of GCC 9.1) implementation of the locale(const locale&,
- // Facet*) constructor (bug #91057).
- //
- // Also note that we install the facet in init() rather than during
- // the object creation to avoid a race with the std::locale-related
- // global variables initialization.
- //
- static locale line_char_locale_exemplar;
-
- void
- init ()
- {
- line_char_locale_exemplar =
- locale (locale (),
- new std::ctype<line_char> ()); // Hidden by ctype bitmask.
- }
-
- line_char_locale::
- line_char_locale ()
- : locale (line_char_locale_exemplar)
- {
- // Make sure init() has been called.
- //
- // Note: has_facet() is hidden by a private function in libc++.
- //
- assert (std::has_facet<std::ctype<line_char>> (*this));
- }
-
- // char_regex
- //
- // Transform regex according to the extended flags {idot}. If regex is
- // malformed then keep transforming, so the resulting string is
- // malformed the same way. We expect the error to be reported by the
- // char_regex ctor.
- //
- static string
- transform (const string& s, char_flags f)
- {
- assert ((f & char_flags::idot) != char_flags::none);
-
- string r;
- bool escape (false);
- bool cclass (false);
-
- for (char c: s)
- {
- // Inverse escaping for a dot which is out of the char class
- // brackets.
- //
- bool inverse (c == '.' && !cclass);
-
- // Handle the escape case. Note that we delay adding the backslash
- // since we may have to inverse things.
- //
- if (escape)
- {
- if (!inverse)
- r += '\\';
-
- r += c;
- escape = false;
-
- continue;
- }
- else if (c == '\\')
- {
- escape = true;
- continue;
- }
-
- // Keep track of being inside the char class brackets, escape if
- // inversion. Note that we never inverse square brackets.
- //
- if (c == '[' && !cclass)
- cclass = true;
- else if (c == ']' && cclass)
- cclass = false;
- else if (inverse)
- r += '\\';
-
- r += c;
- }
-
- if (escape) // Regex is malformed but that's not our problem.
- r += '\\';
-
- return r;
- }
-
- static char_regex::flag_type
- to_std_flags (char_flags f)
- {
- // Note that ECMAScript flag is implied in the absense of a grammar
- // flag.
- //
- return (f & char_flags::icase) != char_flags::none
- ? char_regex::icase
- : char_regex::flag_type ();
- }
-
- char_regex::
- char_regex (const char_string& s, char_flags f)
- : base_type ((f & char_flags::idot) != char_flags::none
- ? transform (s, f)
- : s,
- to_std_flags (f))
- {
- }
- }
- }
- }
-}
-
-namespace std
-{
- using namespace build2::test::script::regex;
-
- // char_traits<line_char>
- //
- line_char* char_traits<line_char>::
- assign (char_type* s, size_t n, char_type c)
- {
- for (size_t i (0); i != n; ++i)
- s[i] = c;
- return s;
- }
-
- line_char* char_traits<line_char>::
- move (char_type* d, const char_type* s, size_t n)
- {
- if (n > 0 && d != s)
- {
- // If d < s then it can't be in [s, s + n) range and so using copy() is
- // safe. Otherwise d + n is out of (s, s + n] range and so using
- // copy_backward() is safe.
- //
- if (d < s)
- std::copy (s, s + n, d); // Hidden by char_traits<line_char>::copy().
- else
- copy_backward (s, s + n, d + n);
- }
-
- return d;
- }
-
- line_char* char_traits<line_char>::
- copy (char_type* d, const char_type* s, size_t n)
- {
- std::copy (s, s + n, d); // Hidden by char_traits<line_char>::copy().
- return d;
- }
-
- int char_traits<line_char>::
- compare (const char_type* s1, const char_type* s2, size_t n)
- {
- for (size_t i (0); i != n; ++i)
- {
- if (s1[i] < s2[i])
- return -1;
- else if (s2[i] < s1[i])
- return 1;
- }
-
- return 0;
- }
-
- size_t char_traits<line_char>::
- length (const char_type* s)
- {
- size_t i (0);
- while (s[i] != char_type::nul)
- ++i;
-
- return i;
- }
-
- const line_char* char_traits<line_char>::
- find (const char_type* s, size_t n, const char_type& c)
- {
- for (size_t i (0); i != n; ++i)
- {
- if (s[i] == c)
- return s + i;
- }
-
- return nullptr;
- }
-
- // ctype<line_char>
- //
- locale::id ctype<line_char>::id;
-
- const line_char* ctype<line_char>::
- is (const char_type* b, const char_type* e, mask* m) const
- {
- while (b != e)
- {
- const char_type& c (*b++);
-
- *m++ = c.type () == line_type::special && c.special () >= 0 &&
- build2::digit (static_cast<char> (c.special ()))
- ? digit
- : 0;
- }
-
- return e;
- }
-
- const line_char* ctype<line_char>::
- scan_is (mask m, const char_type* b, const char_type* e) const
- {
- for (; b != e; ++b)
- {
- if (is (m, *b))
- return b;
- }
-
- return e;
- }
-
- const line_char* ctype<line_char>::
- scan_not (mask m, const char_type* b, const char_type* e) const
- {
- for (; b != e; ++b)
- {
- if (!is (m, *b))
- return b;
- }
-
- return e;
- }
-
- const char* ctype<line_char>::
- widen (const char* b, const char* e, char_type* c) const
- {
- while (b != e)
- *c++ = widen (*b++);
-
- return e;
- }
-
- const line_char* ctype<line_char>::
- narrow (const char_type* b, const char_type* e, char def, char* c) const
- {
- while (b != e)
- *c++ = narrow (*b++, def);
-
- return e;
- }
-
- // regex_traits<line_char>
- //
- int regex_traits<line_char>::
- value (char_type c, int radix) const
- {
- assert (radix == 8 || radix == 10 || radix == 16);
-
- if (c.type () != line_type::special)
- return -1;
-
- const char digits[] = "0123456789ABCDEF";
- const char* d (string::traits_type::find (digits, radix, c.special ()));
- return d != nullptr ? static_cast<int> (d - digits) : -1;
- }
-}
diff --git a/libbuild2/test/script/regex.hxx b/libbuild2/test/script/regex.hxx
deleted file mode 100644
index 4114ea4..0000000
--- a/libbuild2/test/script/regex.hxx
+++ /dev/null
@@ -1,684 +0,0 @@
-// file : libbuild2/test/script/regex.hxx -*- C++ -*-
-// license : MIT; see accompanying LICENSE file
-
-#ifndef LIBBUILD2_TEST_SCRIPT_REGEX_HXX
-#define LIBBUILD2_TEST_SCRIPT_REGEX_HXX
-
-#include <list>
-#include <regex>
-#include <locale>
-#include <string> // basic_string
-#include <type_traits> // make_unsigned, enable_if, is_*
-#include <unordered_set>
-
-#include <libbuild2/types.hxx>
-#include <libbuild2/utility.hxx>
-
-namespace build2
-{
- namespace test
- {
- namespace script
- {
- namespace regex
- {
- using char_string = std::basic_string<char>;
-
- enum class char_flags: uint16_t
- {
- icase = 0x1, // Case-insensitive match.
- idot = 0x2, // Invert '.' escaping.
-
- none = 0
- };
-
- // Restricts valid standard flags to just {icase}, extends with custom
- // flags {idot}.
- //
- class char_regex: public std::basic_regex<char>
- {
- public:
- using base_type = std::basic_regex<char>;
-
- char_regex (const char_string&, char_flags = char_flags::none);
- };
-
- // Newlines are line separators and are not part of the line:
- //
- // line<newline>line<newline>
- //
- // Specifically, this means that a customary trailing newline creates a
- // trailing blank line.
- //
- // All characters can inter-compare (though there cannot be regex
- // characters in the output, only in line_regex).
- //
- // Note that we assume that line_regex and the input to regex_match()
- // use the same pool.
- //
- struct line_pool
- {
- // Note that we assume the pool can be moved without invalidating
- // pointers to any already pooled entities.
- //
- std::unordered_set<char_string> strings;
- std::list<char_regex> regexes;
- };
-
- enum class line_type
- {
- special,
- literal,
- regex
- };
-
- struct line_char
- {
- // Steal last two bits from the pointer to store the type.
- //
- private:
- std::uintptr_t data_;
-
- public:
- line_type
- type () const {return static_cast<line_type> (data_ & 0x3);}
-
- int
- special () const
- {
- // Stored as (shifted) int16_t. Perform steps reversed to those
- // that are described in the comment for the corresponding ctor.
- // Note that the intermediate cast to uint16_t is required to
- // portably preserve the -1 special character.
- //
- return static_cast<int16_t> (static_cast<uint16_t> (data_ >> 2));
- }
-
- const char_string*
- literal () const
- {
- // Note that 2 rightmost bits are used for packaging line_char
- // type. Read the comment for the corresponding ctor for details.
- //
- return reinterpret_cast<const char_string*> (
- data_ & ~std::uintptr_t (0x3));
- }
-
- const char_regex*
- regex () const
- {
- // Note that 2 rightmost bits are used for packaging line_char
- // type. Read the comment for the corresponding ctor for details.
- //
- return reinterpret_cast<const char_regex*> (
- data_ & ~std::uintptr_t (0x3));
- }
-
- static const line_char nul;
- static const line_char eof;
-
- // Note: creates an uninitialized value.
- //
- line_char () = default;
-
- // Create a special character. The argument value must be one of the
- // following ones:
- //
- // 0 (nul character)
- // -1 (EOF)
- // [()|.*+?{}\0123456789,=!] (excluding [])
- //
- // Note that the constructor is implicit to allow basic_regex to
- // implicitly construct line_chars from special char literals (in
- // particular libstdc++ appends them to an internal line_string).
- //
- // Also note that we extend the valid characters set (see above) with
- // 'p', 'n' (used by libstdc++ for positive/negative look-ahead
- // tokens representation), and '\n', '\r', u'\u2028', u'\u2029' (used
- // by libstdc++ for newline/newparagraph matching).
- //
- line_char (int);
-
- // Create a literal character.
- //
- // Don't copy string if already pooled.
- //
- explicit
- line_char (const char_string&, line_pool&);
-
- explicit
- line_char (char_string&&, line_pool&);
-
- explicit
- line_char (const char_string* s) // Assume already pooled.
- //
- // Steal two bits from the pointer to package line_char type.
- // Assume (and statically assert) that char_string address is a
- // multiple of four.
- //
- : data_ (reinterpret_cast <std::uintptr_t> (s) |
- static_cast <std::uintptr_t> (line_type::literal)) {}
-
- // Create a regex character.
- //
- explicit
- line_char (char_regex, line_pool&);
-
- explicit
- line_char (const char_regex* r) // Assume already pooled.
- //
- // Steal two bits from the pointer to package line_char type.
- // Assume (and statically assert) that char_regex address is a
- // multiple of four.
- //
- : data_ (reinterpret_cast <std::uintptr_t> (r) |
- static_cast <std::uintptr_t> (line_type::regex)) {}
-
- // Provide basic_regex with the ability to use line_char in a context
- // where a char value is expected (e.g., as a function argument).
- //
- // libstdc++ seems to cast special line_chars only (and such a
- // conversion is meanigfull).
- //
- // msvcrt casts line_chars of arbitrary types instead. The only
- // reasonable strategy is to return a value that differs from any
- // other that can be encountered in a regex expression and so will
- // unlikelly be misinterpreted.
- //
- operator char () const
- {
- return type () == line_type::special ? special () : '\a'; // BELL.
- }
-
- // Return true if the character is a syntax (special) one.
- //
- static bool
- syntax (char);
-
- // Provide basic_regex (such as from msvcrt) with the ability to
- // explicitly cast line_chars to implementation-specific numeric
- // types (enums, msvcrt's _Uelem, etc).
- //
- template <typename T>
- explicit
- operator T () const
- {
- assert (type () == line_type::special);
- return static_cast<T> (special ());
- }
- };
-
- // Perform "deep" characters comparison (for example match literal
- // character with a regex character), rather than just compare them
- // literally. At least one argument must be of a type other than regex
- // as there is no operator==() defined to compare regexes. Characters
- // of the literal type must share the same pool (strings are compared
- // by pointers not by values).
- //
- bool
- operator== (const line_char&, const line_char&);
-
- // Return false if arguments are equal (operator==() returns true).
- // Otherwise if types are different return the value implying that
- // special < literal < regex. If types are special or literal return
- // the result of the respective characters or strings comparison. At
- // least one argument must be of a type other than regex as there is no
- // operator<() defined to compare regexes.
- //
- // While not very natural operation for the class we have, we have to
- // provide some meaningfull semantics for such a comparison as it is
- // required by the char_traits<line_char> specialization. While we
- // could provide it right in that specialization, let's keep it here
- // for basic_regex implementations that potentially can compare
- // line_chars as they compare them with expressions of other types (see
- // below).
- //
- bool
- operator< (const line_char&, const line_char&);
-
- inline bool
- operator!= (const line_char& l, const line_char& r)
- {
- return !(l == r);
- }
-
- inline bool
- operator<= (const line_char& l, const line_char& r)
- {
- return l < r || l == r;
- }
-
- // Provide basic_regex (such as from msvcrt) with the ability to
- // compare line_char to a value of an integral or
- // implementation-specific enum type. In the absense of the following
- // template operators, such a comparisons would be ambigious for
- // integral types (given that there are implicit conversions
- // int->line_char and line_char->char) and impossible for enums.
- //
- // Note that these == and < operators can succeed only for a line_char
- // of the special type. For other types they always return false. That
- // in particular leads to the following case:
- //
- // (lc != c) != (lc < c || c < lc).
- //
- // Note that we can not assert line_char is of the special type as
- // basic_regex (such as from libc++) may need the ability to check if
- // arbitrary line_char belongs to some special characters range (like
- // ['0', '9']).
- //
- template <typename T>
- struct line_char_cmp
- : public std::enable_if<std::is_integral<T>::value ||
- (std::is_enum<T>::value &&
- !std::is_same<T, char_flags>::value)> {};
-
- template <typename T, typename = typename line_char_cmp<T>::type>
- bool
- operator== (const line_char& l, const T& r)
- {
- return l.type () == line_type::special &&
- static_cast<T> (l.special ()) == r;
- }
-
- template <typename T, typename = typename line_char_cmp<T>::type>
- bool
- operator== (const T& l, const line_char& r)
- {
- return r.type () == line_type::special &&
- static_cast<T> (r.special ()) == l;
- }
-
- template <typename T, typename = typename line_char_cmp<T>::type>
- bool
- operator!= (const line_char& l, const T& r)
- {
- return !(l == r);
- }
-
- template <typename T, typename = typename line_char_cmp<T>::type>
- bool
- operator!= (const T& l, const line_char& r)
- {
- return !(l == r);
- }
-
- template <typename T, typename = typename line_char_cmp<T>::type>
- bool
- operator< (const line_char& l, const T& r)
- {
- return l.type () == line_type::special &&
- static_cast<T> (l.special ()) < r;
- }
-
- template <typename T, typename = typename line_char_cmp<T>::type>
- bool
- operator< (const T& l, const line_char& r)
- {
- return r.type () == line_type::special &&
- l < static_cast<T> (r.special ());
- }
-
- template <typename T, typename = typename line_char_cmp<T>::type>
- inline bool
- operator<= (const line_char& l, const T& r)
- {
- return l < r || l == r;
- }
-
- template <typename T, typename = typename line_char_cmp<T>::type>
- inline bool
- operator<= (const T& l, const line_char& r)
- {
- return l < r || l == r;
- }
-
- using line_string = std::basic_string<line_char>;
-
- // Locale that has ctype<line_char> facet installed. Used in the
- // regex_traits<line_char> specialization (see below).
- //
- class line_char_locale: public std::locale
- {
- public:
- // Create a copy of the global C++ locale.
- //
- line_char_locale ();
- };
-
- // Initialize the testscript regex global state. Should be called once
- // prior to creating objects of types from this namespace. Note: not
- // thread-safe.
- //
- void
- init ();
- }
- }
- }
-}
-
-// Standard template specializations for line_char that are required for the
-// basic_regex<line_char> instantiation.
-//
-namespace std
-{
- template <>
- class char_traits<build2::test::script::regex::line_char>
- {
- public:
- using char_type = build2::test::script::regex::line_char;
- using int_type = char_type;
- using off_type = char_traits<char>::off_type;
- using pos_type = char_traits<char>::pos_type;
- using state_type = char_traits<char>::state_type;
-
- static void
- assign (char_type& c1, const char_type& c2) {c1 = c2;}
-
- static char_type*
- assign (char_type*, size_t, char_type);
-
- // Note that eq() and lt() are not constexpr (as required by C++11)
- // because == and < operators for char_type are not constexpr.
- //
- static bool
- eq (const char_type& l, const char_type& r) {return l == r;}
-
- static bool
- lt (const char_type& l, const char_type& r) {return l < r;}
-
- static char_type*
- move (char_type*, const char_type*, size_t);
-
- static char_type*
- copy (char_type*, const char_type*, size_t);
-
- static int
- compare (const char_type*, const char_type*, size_t);
-
- static size_t
- length (const char_type*);
-
- static const char_type*
- find (const char_type*, size_t, const char_type&);
-
- static constexpr char_type
- to_char_type (const int_type& c) {return c;}
-
- static constexpr int_type
- to_int_type (const char_type& c) {return int_type (c);}
-
- // Note that the following functions are not constexpr (as required by
- // C++11) because their return expressions are not constexpr.
- //
- static bool
- eq_int_type (const int_type& l, const int_type& r) {return l == r;}
-
- static int_type eof () {return char_type::eof;}
-
- static int_type
- not_eof (const int_type& c)
- {
- return c != char_type::eof ? c : char_type::nul;
- }
- };
-
- // ctype<> must be derived from both ctype_base and locale::facet (the later
- // supports ref-counting used by the std::locale implementation internally).
- //
- // msvcrt for some reason also derives ctype_base from locale::facet which
- // produces "already a base-class" warning and effectivelly breaks the
- // reference counting. So we derive from ctype_base only in this case.
- //
- template <>
- class ctype<build2::test::script::regex::line_char>: public ctype_base
-#if !defined(_MSC_VER) || _MSC_VER >= 2000
- , public locale::facet
-#endif
- {
- // Used by the implementation only.
- //
- using line_type = build2::test::script::regex::line_type;
-
- public:
- using char_type = build2::test::script::regex::line_char;
-
- static locale::id id;
-
-#if !defined(_MSC_VER) || _MSC_VER >= 2000
- explicit
- ctype (size_t refs = 0): locale::facet (refs) {}
-#else
- explicit
- ctype (size_t refs = 0): ctype_base (refs) {}
-#endif
-
- // While unnecessary, let's keep for completeness.
- //
- virtual
- ~ctype () override = default;
-
- // The C++ standard requires the following functions to call their virtual
- // (protected) do_*() counterparts that provide the real implementations.
- // The only purpose for this indirection is to provide a user with the
- // ability to customize existing (standard) ctype facets. As we do not
- // provide such an ability, for simplicity we will omit the do_*()
- // functions and provide the implementations directly. This should be safe
- // as nobody except us could call those protected functions.
- //
- bool
- is (mask m, char_type c) const
- {
- return m ==
- (c.type () == line_type::special && c.special () >= 0 &&
- build2::digit (static_cast<char> (c.special ()))
- ? digit
- : 0);
- }
-
- const char_type*
- is (const char_type*, const char_type*, mask*) const;
-
- const char_type*
- scan_is (mask, const char_type*, const char_type*) const;
-
- const char_type*
- scan_not (mask, const char_type*, const char_type*) const;
-
- char_type
- toupper (char_type c) const {return c;}
-
- const char_type*
- toupper (char_type*, const char_type* e) const {return e;}
-
- char_type
- tolower (char_type c) const {return c;}
-
- const char_type*
- tolower (char_type*, const char_type* e) const {return e;}
-
- char_type
- widen (char c) const {return char_type (c);}
-
- const char*
- widen (const char*, const char*, char_type*) const;
-
- char
- narrow (char_type c, char def) const
- {
- return c.type () == line_type::special ? c.special () : def;
- }
-
- const char_type*
- narrow (const char_type*, const char_type*, char, char*) const;
- };
-
- // Note: the current application locale must be POSIX. Otherwise the
- // behavior is undefined.
- //
- template <>
- class regex_traits<build2::test::script::regex::line_char>
- {
- public:
- using char_type = build2::test::script::regex::line_char;
- using string_type = build2::test::script::regex::line_string;
- using locale_type = build2::test::script::regex::line_char_locale;
- using char_class_type = regex_traits<char>::char_class_type;
-
- // Workaround for msvcrt bugs. For some reason it assumes such a members
- // to be present in a regex_traits specialization.
- //
-#if defined(_MSC_VER) && _MSC_VER < 2000
- static const ctype_base::mask _Ch_upper = ctype_base::upper;
- static const ctype_base::mask _Ch_alpha = ctype_base::alpha;
-
- // Unsigned numeric type. msvcrt normally casts characters to this type
- // for comparing with some numeric values or for calculating an index in
- // some bit array. Luckily that all relates to the character class
- // handling that we don't support.
- //
- using _Uelem = unsigned int;
-#endif
-
- regex_traits () = default; // Unnecessary but let's keep for completeness.
-
- static size_t
- length (const char_type* p) {return string_type::traits_type::length (p);}
-
- char_type
- translate (char_type c) const {return c;}
-
- // Case-insensitive matching is not supported by line_regex. So there is no
- // reason for the function to be called.
- //
- char_type
- translate_nocase (char_type c) const {assert (false); return c;}
-
- // Return a sort-key - the exact copy of [b, e).
- //
- template <typename I>
- string_type
- transform (I b, I e) const {return string_type (b, e);}
-
- // Return a case-insensitive sort-key. Case-insensitive matching is not
- // supported by line_regex. So there is no reason for the function to be
- // called.
- //
- template <typename I>
- string_type
- transform_primary (I b, I e) const
- {
- assert (false);
- return string_type (b, e);
- }
-
- // POSIX regex grammar and collating elements (e.g., [.tilde.]) in
- // particular are not supported. So there is no reason for the function to
- // be called.
- //
- template <typename I>
- string_type
- lookup_collatename (I, I) const {assert (false); return string_type ();}
-
- // Character classes (e.g., [:lower:]) are not supported. So there is no
- // reason for the function to be called.
- //
- template <typename I>
- char_class_type
- lookup_classname (I, I, bool = false) const
- {
- assert (false);
- return char_class_type ();
- }
-
- // Return false as we don't support character classes (e.g., [:lower:]).
- //
- bool
- isctype (char_type, char_class_type) const {return false;}
-
- int
- value (char_type, int) const;
-
- // Return the locale passed as an argument as we do not expect anything
- // other than POSIX locale, that we also assume to be imbued by default.
- //
- locale_type
- imbue (locale_type l) {return l;}
-
- locale_type
- getloc () const {return locale_type ();}
- };
-
- // We assume line_char to be an unsigned type and express that with the
- // following specialization used by basic_regex implementations.
- //
- // libstdc++ defines unsigned CharT type (regex_traits template parameter)
- // to use as an index in some internal cache regardless if the cache is used
- // for this specialization (and the cache is used only if CharT is char).
- //
- template <>
- struct make_unsigned<build2::test::script::regex::line_char>
- {
- using type = build2::test::script::regex::line_char;
- };
-
- // When used with libc++ the linker complains that it can't find
- // __match_any_but_newline<line_char>::__exec() function. The problem is
- // that the function is only specialized for char and wchar_t
- // (LLVM bug #31409). As line_char has no notion of the newline character we
- // specialize the class template to behave as the __match_any<line_char>
- // instantiation does (that luckily has all the functions in place).
- //
-#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION <= 9000
- template <>
- class __match_any_but_newline<build2::test::script::regex::line_char>
- : public __match_any<build2::test::script::regex::line_char>
- {
- public:
- using base = __match_any<build2::test::script::regex::line_char>;
- using base::base;
- };
-#endif
-}
-
-namespace build2
-{
- namespace test
- {
- namespace script
- {
- namespace regex
- {
- class line_regex: public std::basic_regex<line_char>
- {
- public:
- using base_type = std::basic_regex<line_char>;
-
- using base_type::base_type;
-
- line_regex () = default;
-
- // Move string regex together with the pool used to create it.
- //
- line_regex (line_string&& s, line_pool&& p)
- // No move-string ctor for base_type, so emulate it.
- //
- : base_type (s), pool (move (p)) {s.clear ();}
-
- // Move constuctible/assignable-only type.
- //
- line_regex (line_regex&&) = default;
- line_regex (const line_regex&) = delete;
- line_regex& operator= (line_regex&&) = default;
- line_regex& operator= (const line_regex&) = delete;
-
- public:
- line_pool pool;
- };
- }
- }
- }
-}
-
-#include <libbuild2/test/script/regex.ixx>
-
-#endif // LIBBUILD2_TEST_SCRIPT_REGEX_HXX
diff --git a/libbuild2/test/script/regex.ixx b/libbuild2/test/script/regex.ixx
deleted file mode 100644
index 46db9db..0000000
--- a/libbuild2/test/script/regex.ixx
+++ /dev/null
@@ -1,34 +0,0 @@
-// file : libbuild2/test/script/regex.ixx -*- C++ -*-
-// license : MIT; see accompanying LICENSE file
-
-namespace build2
-{
- namespace test
- {
- namespace script
- {
- namespace regex
- {
- inline char_flags
- operator&= (char_flags& x, char_flags y)
- {
- return x = static_cast<char_flags> (
- static_cast<uint16_t> (x) & static_cast<uint16_t> (y));
- }
-
- inline char_flags
- operator|= (char_flags& x, char_flags y)
- {
- return x = static_cast<char_flags> (
- static_cast<uint16_t> (x) | static_cast<uint16_t> (y));
- }
-
- inline char_flags
- operator& (char_flags x, char_flags y) {return x &= y;}
-
- inline char_flags
- operator| (char_flags x, char_flags y) {return x |= y;}
- }
- }
- }
-}
diff --git a/libbuild2/test/script/runner.cxx b/libbuild2/test/script/runner.cxx
index b40dea8..03a1f0e 100644
--- a/libbuild2/test/script/runner.cxx
+++ b/libbuild2/test/script/runner.cxx
@@ -3,696 +3,17 @@
#include <libbuild2/test/script/runner.hxx>
-#include <ios> // streamsize
-
-#include <libbutl/regex.mxx>
-#include <libbutl/builtin.mxx>
-#include <libbutl/fdstream.mxx> // fdopen_mode, fddup()
-#include <libbutl/filesystem.mxx> // path_search()
-#include <libbutl/path-pattern.mxx>
-
-#include <libbuild2/variable.hxx>
-#include <libbuild2/filesystem.hxx>
-#include <libbuild2/diagnostics.hxx>
+#include <libbuild2/script/run.hxx>
#include <libbuild2/test/common.hxx>
-#include <libbuild2/test/script/regex.hxx>
-#include <libbuild2/test/script/parser.hxx>
-#include <libbuild2/test/script/builtin-options.hxx>
-
-using namespace std;
-using namespace butl;
-
namespace build2
{
namespace test
{
namespace script
{
- // Normalize a path. Also make the relative path absolute using the
- // scope's working directory unless it is already absolute.
- //
- static path
- normalize (path p, const scope& sp, const location& l)
- {
- path r (p.absolute () ? move (p) : sp.wd_path / move (p));
-
- try
- {
- r.normalize ();
- }
- catch (const invalid_path& e)
- {
- fail (l) << "invalid file path " << e.path;
- }
-
- return r;
- }
-
- // Check if a path is not empty, the referenced file exists and is not
- // empty.
- //
- static bool
- non_empty (const path& p, const location& ll)
- {
- if (p.empty () || !exists (p))
- return false;
-
- try
- {
- ifdstream is (p);
- return is.peek () != ifdstream::traits_type::eof ();
- }
- catch (const io_error& e)
- {
- // While there can be no fault of the test command being currently
- // executed let's add the location anyway to ease the
- // troubleshooting. And let's stick to that principle down the road.
- //
- fail (ll) << "unable to read " << p << ": " << e << endf;
- }
- }
-
- // If the file exists, not empty and not larger than 4KB print it to the
- // diag record. The file content goes from the new line and is not
- // indented.
- //
- static void
- print_file (diag_record& d, const path& p, const location& ll)
- {
- if (exists (p))
- {
- try
- {
- ifdstream is (p, ifdstream::badbit);
-
- if (is.peek () != ifdstream::traits_type::eof ())
- {
- char buf[4096 + 1]; // Extra byte is for terminating '\0'.
-
- // Note that the string is always '\0'-terminated with a maximum
- // sizeof (buf) - 1 bytes read.
- //
- is.getline (buf, sizeof (buf), '\0');
-
- // Print if the file fits 4KB-size buffer. Note that if it
- // doesn't the failbit is set.
- //
- if (is.eof ())
- {
- // Suppress the trailing newline character as the diag record
- // adds it's own one when flush.
- //
- streamsize n (is.gcount ());
- assert (n > 0);
-
- // Note that if the file contains '\0' it will also be counted
- // by gcount(). But even in the worst case we will stay in the
- // buffer boundaries (and so not crash).
- //
- if (buf[n - 1] == '\n')
- buf[n - 1] = '\0';
-
- d << '\n' << buf;
- }
- }
- }
- catch (const io_error& e)
- {
- fail (ll) << "unable to read " << p << ": " << e;
- }
- }
- }
-
- // Print first 10 directory sub-entries to the diag record. The directory
- // must exist.
- //
- static void
- print_dir (diag_record& d, const dir_path& p, const location& ll)
- {
- try
- {
- size_t n (0);
- for (const dir_entry& de: dir_iterator (p,
- false /* ignore_dangling */))
- {
- if (n++ < 10)
- d << '\n' << (de.ltype () == entry_type::directory
- ? path_cast<dir_path> (de.path ())
- : de.path ());
- }
-
- if (n > 10)
- d << "\nand " << n - 10 << " more file(s)";
- }
- catch (const system_error& e)
- {
- fail (ll) << "unable to iterate over " << p << ": " << e;
- }
- }
-
- // Save a string to the file. Fail if exception is thrown by underlying
- // operations.
- //
- static void
- save (const path& p, const string& s, const location& ll)
- {
- try
- {
- ofdstream os (p);
- os << s;
- os.close ();
- }
- catch (const io_error& e)
- {
- fail (ll) << "unable to write to " << p << ": " << e;
- }
- }
-
- // Return the value of the test.target variable.
- //
- static inline const target_triplet&
- test_target (const script& s)
- {
- // @@ Would be nice to use cached value from test::common_data.
- //
- if (auto r = cast_null<target_triplet> (s.test_target["test.target"]))
- return *r;
-
- // We set it to default value in init() so it can only be NULL if the
- // user resets it.
- //
- fail << "invalid test.target value" << endf;
- }
-
- // Transform string according to here-* redirect modifiers from the {/}
- // set.
- //
- static string
- transform (const string& s,
- bool regex,
- const string& modifiers,
- const script& scr)
- {
- if (modifiers.find ('/') == string::npos)
- return s;
-
- // For targets other than Windows leave the string intact.
- //
- if (test_target (scr).class_ != "windows")
- return s;
-
- // Convert forward slashes to Windows path separators (escape for
- // regex).
- //
- string r;
- for (size_t p (0);;)
- {
- size_t sp (s.find ('/', p));
-
- if (sp != string::npos)
- {
- r.append (s, p, sp - p);
- r.append (regex ? "\\\\" : "\\");
- p = sp + 1;
- }
- else
- {
- r.append (s, p, sp);
- break;
- }
- }
-
- return r;
- }
-
- // Check if the test command output matches the expected result (redirect
- // value). Noop for redirect types other than none, here_*.
- //
- static bool
- check_output (const path& pr,
- const path& op,
- const path& ip,
- const redirect& rd,
- const location& ll,
- scope& sp,
- bool diag,
- const char* what)
- {
- auto input_info = [&ip, &ll] (diag_record& d)
- {
- if (non_empty (ip, ll))
- d << info << "stdin: " << ip;
- };
-
- auto output_info = [&what, &ll] (diag_record& d,
- const path& p,
- const char* prefix = "",
- const char* suffix = "")
- {
- if (non_empty (p, ll))
- d << info << prefix << what << suffix << ": " << p;
- else
- d << info << prefix << what << suffix << " is empty";
- };
-
- if (rd.type == redirect_type::none)
- {
- // Check that there is no output produced.
- //
- assert (!op.empty ());
-
- if (!non_empty (op, ll))
- return true;
-
- if (diag)
- {
- diag_record d (error (ll));
- d << pr << " unexpectedly writes to " << what <<
- info << what << ": " << op;
-
- input_info (d);
-
- // Print cached output.
- //
- print_file (d, op, ll);
- }
-
- // Fall through (to return false).
- //
- }
- else if (rd.type == redirect_type::here_str_literal ||
- rd.type == redirect_type::here_doc_literal ||
- (rd.type == redirect_type::file &&
- rd.file.mode == redirect_fmode::compare))
- {
- // The expected output is provided as a file or as a string. Save the
- // string to a file in the later case.
- //
- assert (!op.empty ());
-
- path eop;
-
- if (rd.type == redirect_type::file)
- eop = normalize (rd.file.path, sp, ll);
- else
- {
- eop = path (op + ".orig");
- save (eop, transform (rd.str, false, rd.modifiers, sp.root), ll);
- sp.clean_special (eop);
- }
-
- // Use the diff utility for comparison.
- //
- path dp ("diff");
- process_path pp (run_search (dp, true));
-
- cstrings args {pp.recall_string (), "-u"};
-
- // Ignore Windows newline fluff if that's what we are running on.
- //
- if (test_target (sp.root).class_ == "windows")
- args.push_back ("--strip-trailing-cr");
-
- args.push_back (eop.string ().c_str ());
- args.push_back (op.string ().c_str ());
- args.push_back (nullptr);
-
- if (verb >= 2)
- print_process (args);
-
- try
- {
- // Save diff's stdout to a file for troubleshooting and for the
- // optional (if not too large) printing (at the end of
- // diagnostics).
- //
- path ep (op + ".diff");
- auto_fd efd;
-
- try
- {
- efd = fdopen (ep, fdopen_mode::out | fdopen_mode::create);
- sp.clean_special (ep);
- }
- catch (const io_error& e)
- {
- fail (ll) << "unable to write to " << ep << ": " << e;
- }
-
- // Diff utility prints the differences to stdout. But for the
- // user it is a part of the test failure diagnostics so let's
- // redirect stdout to stderr.
- //
- process p (pp, args.data (), 0, 2, efd.get ());
- efd.reset ();
-
- if (p.wait ())
- return true;
-
- assert (p.exit);
- const process_exit& pe (*p.exit);
-
- // Note that both POSIX and GNU diff report error by exiting with
- // the code > 1.
- //
- if (!pe.normal () || pe.code () > 1)
- {
- diag_record d (fail (ll));
- print_process (d, args);
- d << " " << pe;
- }
-
- // Output doesn't match the expected result.
- //
- if (diag)
- {
- diag_record d (error (ll));
- d << pr << " " << what << " doesn't match expected";
-
- output_info (d, op);
- output_info (d, eop, "expected ");
- output_info (d, ep, "", " diff");
- input_info (d);
-
- print_file (d, ep, ll);
- }
-
- // Fall through (to return false).
- //
- }
- catch (const process_error& e)
- {
- error (ll) << "unable to execute " << pp << ": " << e;
-
- if (e.child)
- exit (1);
-
- throw failed ();
- }
- }
- else if (rd.type == redirect_type::here_str_regex ||
- rd.type == redirect_type::here_doc_regex)
- {
- // The overall plan is:
- //
- // 1. Create regex line string. While creating it's line characters
- // transform regex lines according to the redirect modifiers.
- //
- // 2. Create line regex using the line string. If creation fails
- // then save the (transformed) regex redirect to a file for
- // troubleshooting.
- //
- // 3. Parse the output into the literal line string.
- //
- // 4. Match the output line string with the line regex.
- //
- // 5. If match fails save the (transformed) regex redirect to a file
- // for troubleshooting.
- //
- using namespace regex;
-
- assert (!op.empty ());
-
- // Create regex line string.
- //
- line_pool pool;
- line_string rls;
- const regex_lines rl (rd.regex);
-
- // Parse regex flags.
- //
- // When add support for new flags don't forget to update
- // parse_regex().
- //
- auto parse_flags = [] (const string& f) -> char_flags
- {
- char_flags r (char_flags::none);
-
- for (char c: f)
- {
- switch (c)
- {
- case 'd': r |= char_flags::idot; break;
- case 'i': r |= char_flags::icase; break;
- default: assert (false); // Error so should have been checked.
- }
- }
-
- return r;
- };
-
- // Return original regex line with the transformation applied.
- //
- auto line = [&rl, &rd, &sp] (const regex_line& l) -> string
- {
- string r;
- if (l.regex) // Regex (possibly empty),
- {
- r += rl.intro;
- r += transform (l.value, true, rd.modifiers, sp.root);
- r += rl.intro;
- r += l.flags;
- }
- else if (!l.special.empty ()) // Special literal.
- r += rl.intro;
- else // Textual literal.
- r += transform (l.value, false, rd.modifiers, sp.root);
-
- r += l.special;
- return r;
- };
-
- // Return regex line location.
- //
- // Note that we rely on the fact that the command and regex lines
- // are always belong to the same testscript file.
- //
- auto loc = [&ll] (uint64_t line, uint64_t column) -> location
- {
- location r (ll);
- r.line = line;
- r.column = column;
- return r;
- };
-
- // Save the regex to file for troubleshooting, return the file path
- // it have been saved to.
- //
- // Note that we save the regex on line regex creation failure or if
- // the program output doesn't match.
- //
- auto save_regex = [&op, &rl, &rd, &ll, &line] () -> path
- {
- path rp (op + ".regex");
-
- // Encode here-document regex global flags if present as a file
- // name suffix. For example if icase and idot flags are specified
- // the name will look like:
- //
- // test/1/stdout.regex-di
- //
- if (rd.type == redirect_type::here_doc_regex && !rl.flags.empty ())
- rp += '-' + rl.flags;
-
- // Note that if would be more efficient to directly write chunks
- // to file rather than to compose a string first. Hower we don't
- // bother (about performance) for the sake of the code as we
- // already failed.
- //
- string s;
- for (auto b (rl.lines.cbegin ()), i (b), e (rl.lines.cend ());
- i != e; ++i)
- {
- if (i != b) s += '\n';
- s += line (*i);
- }
-
- save (rp, s, ll);
- return rp;
- };
-
- // Finally create regex line string.
- //
- // Note that diagnostics doesn't refer to the program path as it is
- // irrelevant to failures at this stage.
- //
- char_flags gf (parse_flags (rl.flags)); // Regex global flags.
-
- for (const auto& l: rl.lines)
- {
- if (l.regex) // Regex (with optional special characters).
- {
- line_char c;
-
- // Empty regex is a special case repesenting the blank line.
- //
- if (l.value.empty ())
- c = line_char ("", pool);
- else
- {
- try
- {
- string s (transform (l.value, true, rd.modifiers, sp.root));
-
- c = line_char (
- char_regex (s, gf | parse_flags (l.flags)), pool);
- }
- catch (const regex_error& e)
- {
- // Print regex_error description if meaningful.
- //
- diag_record d (fail (loc (l.line, l.column)));
-
- if (rd.type == redirect_type::here_str_regex)
- d << "invalid " << what << " regex redirect" << e <<
- info << "regex: '" << line (l) << "'";
- else
- d << "invalid char-regex in " << what << " regex redirect"
- << e <<
- info << "regex line: '" << line (l) << "'";
-
- d << endf;
- }
- }
-
- rls += c; // Append blank literal or regex line char.
- }
- else if (!l.special.empty ()) // Special literal.
- {
- // Literal can not be followed by special characters in the same
- // line.
- //
- assert (l.value.empty ());
- }
- else // Textual literal.
- {
- // Append literal line char.
- //
- rls += line_char (
- transform (l.value, false, rd.modifiers, sp.root), pool);
- }
-
- for (char c: l.special)
- {
- if (line_char::syntax (c))
- rls += line_char (c); // Append special line char.
- else
- fail (loc (l.line, l.column))
- << "invalid syntax character '" << c << "' in " << what
- << " regex redirect" <<
- info << "regex line: '" << line (l) << "'";
- }
- }
-
- // Create line regex.
- //
- line_regex regex;
-
- try
- {
- regex = line_regex (move (rls), move (pool));
- }
- catch (const regex_error& e)
- {
- // Note that line regex creation can not fail for here-string
- // redirect as it doesn't have syntax line chars. That in
- // particular means that end_line and end_column are meaningful.
- //
- assert (rd.type == redirect_type::here_doc_regex);
-
- diag_record d (fail (loc (rd.end_line, rd.end_column)));
-
- // Print regex_error description if meaningful.
- //
- d << "invalid " << what << " regex redirect" << e;
-
- output_info (d, save_regex (), "", " regex");
- }
-
- // Parse the output into the literal line string.
- //
- line_string ls;
-
- try
- {
- // Do not throw when eofbit is set (end of stream reached), and
- // when failbit is set (getline() failed to extract any character).
- //
- // Note that newlines are treated as line-chars separators. That
- // in particular means that the trailing newline produces a blank
- // line-char (empty literal). Empty output produces the zero-length
- // line-string.
- //
- // Also note that we strip the trailing CR characters (otherwise
- // can mismatch when cross-test).
- //
- ifdstream is (op, ifdstream::badbit);
- is.peek (); // Sets eofbit for an empty stream.
-
- while (!is.eof ())
- {
- string s;
- getline (is, s);
-
- // It is safer to strip CRs in cycle, as msvcrt unexplainably
- // adds too much trailing junk to the system_error descriptions,
- // and so it can appear in programs output. For example:
- //
- // ...: Invalid data.\r\r\n
- //
- // Note that our custom operator<<(ostream&, const exception&)
- // removes this junk.
- //
- while (!s.empty () && s.back () == '\r')
- s.pop_back ();
-
- ls += line_char (move (s), regex.pool);
- }
- }
- catch (const io_error& e)
- {
- fail (ll) << "unable to read " << op << ": " << e;
- }
-
- // Match the output with the regex.
- //
- if (regex_match (ls, regex)) // Doesn't throw.
- return true;
-
- // Output doesn't match the regex. We save the regex to file for
- // troubleshooting regardless of whether we print the diagnostics or
- // not. We, however, register it for cleanup in the later case (the
- // expression may still succeed, we can be evaluating the if
- // condition, etc).
- //
- path rp (save_regex ());
-
- if (diag)
- {
- diag_record d (error (ll));
- d << pr << " " << what << " doesn't match regex";
-
- output_info (d, op);
- output_info (d, rp, "", " regex");
- input_info (d);
-
- // Print cached output.
- //
- print_file (d, op, ll);
- }
- else
- sp.clean_special (rp);
-
- // Fall through (to return false).
- //
- }
- else // Noop.
- return true;
-
- return false;
- }
+ using namespace build2::script;
bool default_runner::
test (scope& s) const
@@ -703,7 +24,7 @@ namespace build2
void default_runner::
enter (scope& sp, const location&)
{
- context& ctx (sp.root.target_scope.ctx);
+ context& ctx (sp.context);
auto df = make_diag_frame (
[&sp](const diag_record& dr)
@@ -730,29 +51,25 @@ namespace build2
sp.parent == nullptr
? mkdir_buildignore (
ctx,
- sp.wd_path,
+ *sp.work_dir.path,
sp.root.target_scope.root_scope ()->root_extra->buildignore_file,
2)
- : mkdir (sp.wd_path, 2));
+ : mkdir (*sp.work_dir.path, 2));
if (r == mkdir_status::already_exists)
- fail << "working directory " << sp.wd_path << " already exists" <<
+ fail << diag_path (sp.work_dir) << " already exists" <<
info << "are tests stomping on each other's feet?";
// We don't change the current directory here but indicate that the
// scope test commands will be executed in that directory.
//
if (verb >= 2)
- text << "cd " << sp.wd_path;
-
- sp.clean ({cleanup_type::always, sp.wd_path}, true);
+ text << "cd " << *sp.work_dir.path;
}
void default_runner::
leave (scope& sp, const location& ll)
{
- context& ctx (sp.root.target_scope.ctx);
-
auto df = make_diag_frame (
[&sp](const diag_record& dr)
{
@@ -766,200 +83,30 @@ namespace build2
//
if (common_.after == output_after::clean)
{
- // Note that we operate with normalized paths here.
- //
- // Remove special files. The order is not important as we don't
- // expect directories here.
- //
- for (const auto& p: sp.special_cleanups)
- {
- // Remove the file if exists. Fail otherwise.
- //
- if (rmfile (ctx, p, 3) == rmfile_status::not_exist)
- fail (ll) << "registered for cleanup special file " << p
- << " does not exist";
- }
-
- // Remove files and directories in the order opposite to the order of
- // cleanup registration.
- //
- for (const auto& c: reverse_iterate (sp.cleanups))
- {
- cleanup_type t (c.type);
-
- // Skip whenever the path exists or not.
- //
- if (t == cleanup_type::never)
- continue;
-
- const path& cp (c.path);
+ clean (sp, ll);
- // Wildcard with the last component being '***' (without trailing
- // separator) matches all files and sub-directories recursively as
- // well as the start directories itself. So we will recursively
- // remove the directories that match the parent (for the original
- // path) directory wildcard.
- //
- bool recursive (cp.leaf ().representation () == "***");
- const path& p (!recursive ? cp : cp.directory ());
+ context& ctx (sp.context);
- // Remove files or directories using wildcard.
- //
- if (path_pattern (p))
- {
- bool removed (false);
+ rmdir_status r (
+ sp.parent == nullptr
+ ? rmdir_buildignore (ctx,
+ *sp.work_dir.path,
+ sp.root.target_scope.root_scope ()->
+ root_extra->buildignore_file,
+ 2)
+ : rmdir (ctx, *sp.work_dir.path, 2));
- auto rm = [&cp, recursive, &removed, &sp, &ll, &ctx]
- (path&& pe, const string&, bool interm)
- {
- if (!interm)
- {
- // While removing the entry we can get not_exist due to
- // racing conditions, but that's ok if somebody did our job.
- // Note that we still set the removed flag to true in this
- // case.
- //
- removed = true; // Will be meaningless on failure.
-
- if (pe.to_directory ())
- {
- dir_path d (path_cast<dir_path> (pe));
-
- if (!recursive)
- {
- rmdir_status r (rmdir (ctx, d, 3));
-
- if (r != rmdir_status::not_empty)
- return true;
-
- diag_record dr (fail (ll));
- dr << "registered for cleanup directory " << d
- << " is not empty";
-
- print_dir (dr, d, ll);
- dr << info << "wildcard: '" << cp << "'";
- }
- else
- {
- // Don't remove the working directory (it will be removed
- // by the dedicated cleanup).
- //
- // Cast to uint16_t to avoid ambiguity with
- // libbutl::rmdir_r().
- //
- rmdir_status r (rmdir_r (ctx, d, d != sp.wd_path, 3));
-
- if (r != rmdir_status::not_empty)
- return true;
-
- // The directory is unlikely to be current but let's keep
- // for completeness.
- //
- fail (ll) << "registered for cleanup wildcard " << cp
- << " matches the current directory";
- }
- }
- else
- rmfile (ctx, pe, 3);
- }
-
- return true;
- };
-
- // Note that here we rely on the fact that recursive iterating
- // goes depth-first (which make sense for the cleanup).
- //
- try
- {
- // Doesn't follow symlinks.
- //
- path_search (p,
- rm,
- dir_path () /* start */,
- path_match_flags::none);
- }
- catch (const system_error& e)
- {
- fail (ll) << "unable to cleanup wildcard " << cp << ": " << e;
- }
-
- // Removal of no filesystem entries is not an error for 'maybe'
- // cleanup type.
- //
- if (removed || t == cleanup_type::maybe)
- continue;
-
- fail (ll) << "registered for cleanup wildcard " << cp
- << " doesn't match any "
- << (recursive
- ? "path"
- : p.to_directory ()
- ? "directory"
- : "file");
- }
-
- // Remove the directory if exists and empty. Fail otherwise.
- // Removal of non-existing directory is not an error for 'maybe'
- // cleanup type.
- //
- if (p.to_directory ())
- {
- dir_path d (path_cast<dir_path> (p));
- bool wd (d == sp.wd_path);
-
- // Trace the scope working directory removal with the verbosity
- // level 2 (that was used for its creation). For other
- // directories use level 3 (as for other cleanups).
- //
- int v (wd ? 2 : 3);
-
- // Don't remove the working directory for the recursive cleanup
- // (it will be removed by the dedicated one).
- //
- // Note that the root working directory contains the
- // .buildignore file (see above).
- //
- // @@ If 'd' is a file then will fail with a diagnostics having
- // no location info. Probably need to add an optional location
- // parameter to rmdir() function. The same problem exists for
- // a file cleanup when try to rmfile() directory instead of
- // file.
- //
- rmdir_status r (
- recursive
- ? rmdir_r (ctx, d, !wd, static_cast <uint16_t> (v))
- : (wd && sp.parent == nullptr
- ? rmdir_buildignore (
- ctx,
- d,
- sp.root.target_scope.root_scope ()->root_extra->
- buildignore_file,
- v)
- : rmdir (ctx, d, v)));
-
- if (r == rmdir_status::success ||
- (r == rmdir_status::not_exist && t == cleanup_type::maybe))
- continue;
-
- diag_record dr (fail (ll));
- dr << "registered for cleanup directory " << d
- << (r == rmdir_status::not_exist
- ? " does not exist"
- : !recursive
- ? " is not empty"
- : " is current");
+ if (r != rmdir_status::success)
+ {
+ diag_record dr (fail (ll));
- if (r == rmdir_status::not_empty)
- print_dir (dr, d, ll);
- }
+ dr << diag_path (sp.work_dir)
+ << (r == rmdir_status::not_exist
+ ? " does not exist"
+ : " is not empty");
- // Remove the file if exists. Fail otherwise. Removal of
- // non-existing file is not an error for 'maybe' cleanup type.
- //
- if (rmfile (ctx, p, 3) == rmfile_status::not_exist &&
- t == cleanup_type::always)
- fail (ll) << "registered for cleanup file " << p
- << " does not exist";
+ if (r == rmdir_status::not_empty)
+ print_dir (dr, *sp.work_dir.path, ll);
}
}
@@ -968,1102 +115,14 @@ namespace build2
//
if (verb >= 2)
text << "cd " << (sp.parent != nullptr
- ? sp.parent->wd_path
- : sp.wd_path.directory ());
- }
-
- // The exit pseudo-builtin: exit the current scope successfully, or
- // print the diagnostics and exit the current scope and all the outer
- // scopes unsuccessfully. Always throw exit_scope exception.
- //
- // exit [<diagnostics>]
- //
- [[noreturn]] static void
- exit_builtin (const strings& args, const location& ll)
- {
- auto i (args.begin ());
- auto e (args.end ());
-
- // Process arguments.
- //
- // If no argument is specified, then exit successfully. Otherwise,
- // print the diagnostics and exit unsuccessfully.
- //
- if (i == e)
- throw exit_scope (true);
-
- const string& s (*i++);
-
- if (i != e)
- fail (ll) << "unexpected argument '" << *i << "'";
-
- error (ll) << s;
- throw exit_scope (false);
- }
-
- // The set pseudo-builtin: set variable from the stdin input.
- //
- // set [-e|--exact] [(-n|--newline)|(-w|--whitespace)] [<attr>] <var>
- //
- static void
- set_builtin (scope& sp,
- const strings& args,
- auto_fd in,
- const location& ll)
- {
- try
- {
- // Do not throw when eofbit is set (end of stream reached), and
- // when failbit is set (read operation failed to extract any
- // character).
- //
- ifdstream cin (move (in), ifdstream::badbit);
-
- // Parse arguments.
- //
- cli::vector_scanner scan (args);
- set_options ops (scan);
-
- if (ops.whitespace () && ops.newline ())
- fail (ll) << "both -n|--newline and -w|--whitespace specified";
-
- if (!scan.more ())
- fail (ll) << "missing variable name";
-
- string a (scan.next ()); // Either attributes or variable name.
- const string* ats (!scan.more () ? nullptr : &a);
- const string& vname (!scan.more () ? a : scan.next ());
-
- if (scan.more ())
- fail (ll) << "unexpected argument '" << scan.next () << "'";
-
- if (ats != nullptr && ats->empty ())
- fail (ll) << "empty variable attributes";
-
- if (vname.empty ())
- fail (ll) << "empty variable name";
-
- // Read the input.
- //
- cin.peek (); // Sets eofbit for an empty stream.
-
- names ns;
- while (!cin.eof ())
- {
- // Read next element that depends on the whitespace mode being
- // enabled or not. For the later case it also make sense to strip
- // the trailing CRs that can appear while cross-testing Windows
- // target or as a part of msvcrt junk production (see above).
- //
- string s;
- if (ops.whitespace ())
- cin >> s;
- else
- {
- getline (cin, s);
-
- while (!s.empty () && s.back () == '\r')
- s.pop_back ();
- }
-
- // If failbit is set then we read nothing into the string as eof is
- // reached. That in particular means that the stream has trailing
- // whitespaces (possibly including newlines) if the whitespace mode
- // is enabled, or the trailing newline otherwise. If so then
- // we append the "blank" to the variable value in the exact mode
- // prior to bailing out.
- //
- if (cin.fail ())
- {
- if (ops.exact ())
- {
- if (ops.whitespace () || ops.newline ())
- ns.emplace_back (move (s)); // Reuse empty string.
- else if (ns.empty ())
- ns.emplace_back ("\n");
- else
- ns[0].value += '\n';
- }
-
- break;
- }
-
- if (ops.whitespace () || ops.newline () || ns.empty ())
- ns.emplace_back (move (s));
- else
- {
- ns[0].value += '\n';
- ns[0].value += s;
- }
- }
-
- cin.close ();
-
- // Set the variable value and attributes. Note that we need to aquire
- // unique lock before potentially changing the script's variable
- // pool. The obtained variable reference can safelly be used with no
- // locking as the variable pool is an associative container
- // (underneath) and we are only adding new variables into it.
- //
- ulock ul (sp.root.var_pool_mutex);
- const variable& var (sp.root.var_pool.insert (move (vname)));
- ul.unlock ();
-
- value& lhs (sp.assign (var));
-
- // If there are no attributes specified then the variable assignment
- // is straightforward. Otherwise we will use the build2 parser helper
- // function.
- //
- if (ats == nullptr)
- lhs.assign (move (ns), &var);
- else
- {
- // If there is an error in the attributes string, our diagnostics
- // will look like this:
- //
- // <attributes>:1:1 error: unknown value attribute x
- // testscript:10:1 info: while parsing attributes '[x]'
- //
- auto df = make_diag_frame (
- [ats, &ll](const diag_record& dr)
- {
- dr << info (ll) << "while parsing attributes '" << *ats << "'";
- });
-
- parser p (sp.root.test_target.ctx);
- p.apply_value_attributes (&var,
- lhs,
- value (move (ns)),
- *ats,
- token_type::assign,
- path_name ("<attributes>"));
- }
- }
- catch (const io_error& e)
- {
- fail (ll) << "set: " << e;
- }
- catch (const cli::exception& e)
- {
- fail (ll) << "set: " << e;
- }
- }
-
- // Sorted array of builtins that support filesystem entries cleanup.
- //
- static const char* cleanup_builtins[] = {
- "cp", "ln", "mkdir", "mv", "touch"};
-
- static inline bool
- cleanup_builtin (const string& name)
- {
- return binary_search (
- cleanup_builtins,
- cleanup_builtins +
- sizeof (cleanup_builtins) / sizeof (*cleanup_builtins),
- name);
- }
-
- static bool
- run_pipe (scope& sp,
- command_pipe::const_iterator bc,
- command_pipe::const_iterator ec,
- auto_fd ifd,
- size_t ci, size_t li, const location& ll,
- bool diag)
- {
- if (bc == ec) // End of the pipeline.
- return true;
-
- // The overall plan is to run the first command in the pipe, reading
- // its input from the file descriptor passed (or, for the first
- // command, according to stdin redirect specification) and redirecting
- // its output to the right-hand part of the pipe recursively. Fail if
- // the right-hand part fails. Otherwise check the process exit code,
- // match stderr (and stdout for the last command in the pipe) according
- // to redirect specification(s) and fail if any of the above fails.
- //
- const command& c (*bc);
-
- // Register the command explicit cleanups. Verify that the path being
- // cleaned up is a sub-path of the testscript working directory. Fail
- // if this is not the case.
- //
- for (const auto& cl: c.cleanups)
- {
- const path& p (cl.path);
- path np (normalize (p, sp, ll));
-
- const string& ls (np.leaf ().string ());
- bool wc (ls == "*" || ls == "**" || ls == "***");
- const path& cp (wc ? np.directory () : np);
- const dir_path& wd (sp.root.wd_path);
-
- if (!cp.sub (wd))
- fail (ll) << (wc
- ? "wildcard"
- : p.to_directory ()
- ? "directory"
- : "file")
- << " cleanup " << p << " is out of working directory "
- << wd;
-
- sp.clean ({cl.type, move (np)}, false);
- }
-
- const redirect& in (c.in.effective ());
- const redirect& out (c.out.effective ());
- const redirect& err (c.err.effective ());
- bool eq (c.exit.comparison == exit_comparison::eq);
-
- // If stdin file descriptor is not open then this is the first pipeline
- // command.
- //
- bool first (ifd.get () == -1);
-
- command_pipe::const_iterator nc (bc + 1);
- bool last (nc == ec);
-
- const string& program (c.program.string ());
-
- // Prior to opening file descriptors for command input/output
- // redirects let's check if the command is the exit builtin. Being a
- // builtin syntactically it differs from the regular ones in a number
- // of ways. It doesn't communicate with standard streams, so
- // redirecting them is meaningless. It may appear only as a single
- // command in a pipeline. It doesn't return any value and stops the
- // scope execution, so checking its exit status is meaningless as
- // well. That all means we can short-circuit here calling the builtin
- // and bailing out right after that. Checking that the user didn't
- // specify any redirects or exit code check sounds like a right thing
- // to do.
- //
- if (program == "exit")
- {
- // In case the builtin is erroneously pipelined from the other
- // command, we will close stdin gracefully (reading out the stream
- // content), to make sure that the command doesn't print any
- // unwanted diagnostics about IO operation failure.
- //
- // Note that dtor will ignore any errors (which is what we want).
- //
- ifdstream is (move (ifd), fdstream_mode::skip);
-
- if (!first || !last)
- fail (ll) << "exit builtin must be the only pipe command";
-
- if (in.type != redirect_type::none)
- fail (ll) << "exit builtin stdin cannot be redirected";
-
- if (out.type != redirect_type::none)
- fail (ll) << "exit builtin stdout cannot be redirected";
-
- if (err.type != redirect_type::none)
- fail (ll) << "exit builtin stderr cannot be redirected";
-
- // We can't make sure that there is no exit code check. Let's, at
- // least, check that non-zero code is not expected.
- //
- if (eq != (c.exit.code == 0))
- fail (ll) << "exit builtin exit code cannot be non-zero";
-
- exit_builtin (c.arguments, ll); // Throws exit_scope exception.
- }
-
- // Create a unique path for a command standard stream cache file.
- //
- auto std_path = [&sp, &ci, &li, &ll] (const char* n) -> path
- {
- path p (n);
-
- // 0 if belongs to a single-line test scope, otherwise is the
- // command line number (start from one) in the test scope.
- //
- if (li > 0)
- p += "-" + to_string (li);
-
- // 0 if belongs to a single-command expression, otherwise is the
- // command number (start from one) in the expression.
- //
- // Note that the name like stdin-N can relate to N-th command of a
- // single-line test or to N-th single-command line of multi-line
- // test. These cases are mutually exclusive and so are unambiguous.
- //
- if (ci > 0)
- p += "-" + to_string (ci);
-
- return normalize (move (p), sp, ll);
- };
-
- // If this is the first pipeline command, then open stdin descriptor
- // according to the redirect specified.
- //
- path isp;
-
- if (!first)
- assert (in.type == redirect_type::none); // No redirect expected.
- else
- {
- // Open a file for passing to the command stdin.
- //
- auto open_stdin = [&isp, &ifd, &ll] ()
- {
- assert (!isp.empty ());
-
- try
- {
- ifd = fdopen (isp, fdopen_mode::in);
- }
- catch (const io_error& e)
- {
- fail (ll) << "unable to read " << isp << ": " << e;
- }
- };
-
- switch (in.type)
- {
- case redirect_type::pass:
- {
- try
- {
- ifd = fddup (0);
- }
- catch (const io_error& e)
- {
- fail (ll) << "unable to duplicate stdin: " << e;
- }
-
- break;
- }
-
- case redirect_type::none:
- // Somehow need to make sure that the child process doesn't read
- // from stdin. That is tricky to do in a portable way. Here we
- // suppose that the program which (erroneously) tries to read some
- // data from stdin being redirected to /dev/null fails not being
- // able to read the expected data, and so the test doesn't pass
- // through.
- //
- // @@ Obviously doesn't cover the case when the process reads
- // whatever available.
- // @@ Another approach could be not to redirect stdin and let the
- // process to hang which can be interpreted as a test failure.
- // @@ Both ways are quite ugly. Is there some better way to do
- // this?
- //
- // Fall through.
- //
- case redirect_type::null:
- {
- ifd = open_null ();
- break;
- }
-
- case redirect_type::file:
- {
- isp = normalize (in.file.path, sp, ll);
-
- open_stdin ();
- break;
- }
-
- case redirect_type::here_str_literal:
- case redirect_type::here_doc_literal:
- {
- // We could write to the command stdin directly but instead will
- // cache the data for potential troubleshooting.
- //
- isp = std_path ("stdin");
-
- save (
- isp, transform (in.str, false, in.modifiers, sp.root), ll);
-
- sp.clean_special (isp);
-
- open_stdin ();
- break;
- }
- case redirect_type::trace:
- case redirect_type::merge:
- case redirect_type::here_str_regex:
- case redirect_type::here_doc_regex:
- case redirect_type::here_doc_ref: assert (false); break;
- }
- }
-
- assert (ifd.get () != -1);
-
- // Prior to opening file descriptors for command outputs redirects
- // let's check if the command is the set builtin. Being a builtin
- // syntactically it differs from the regular ones in a number of ways.
- // It either succeeds or terminates abnormally, so redirecting stderr
- // is meaningless. It also never produces any output and may appear
- // only as a terminal command in a pipeline. That means we can
- // short-circuit here calling the builtin and returning right after
- // that. Checking that the user didn't specify any meaningless
- // redirects or exit code check sounds as a right thing to do.
- //
- if (program == "set")
- {
- if (!last)
- fail (ll) << "set builtin must be the last pipe command";
-
- if (out.type != redirect_type::none)
- fail (ll) << "set builtin stdout cannot be redirected";
-
- if (err.type != redirect_type::none)
- fail (ll) << "set builtin stderr cannot be redirected";
-
- if (eq != (c.exit.code == 0))
- fail (ll) << "set builtin exit code cannot be non-zero";
-
- set_builtin (sp, c.arguments, move (ifd), ll);
- return true;
- }
-
- // Open a file for command output redirect if requested explicitly
- // (file overwrite/append redirects) or for the purpose of the output
- // validation (none, here_*, file comparison redirects), register the
- // file for cleanup, return the file descriptor. Interpret trace
- // redirect according to the verbosity level (as null if below 2, as
- // pass otherwise). Return nullfd, standard stream descriptor duplicate
- // or null-device descriptor for merge, pass or null redirects
- // respectively (not opening any file).
- //
- auto open = [&sp, &ll, &std_path] (const redirect& r,
- int dfd,
- path& p) -> auto_fd
- {
- assert (dfd == 1 || dfd == 2);
- const char* what (dfd == 1 ? "stdout" : "stderr");
-
- fdopen_mode m (fdopen_mode::out | fdopen_mode::create);
-
- redirect_type rt (r.type != redirect_type::trace
- ? r.type
- : verb < 2
- ? redirect_type::null
- : redirect_type::pass);
- switch (rt)
- {
- case redirect_type::pass:
- {
- try
- {
- return fddup (dfd);
- }
- catch (const io_error& e)
- {
- fail (ll) << "unable to duplicate " << what << ": " << e;
- }
- }
-
- case redirect_type::null: return open_null ();
-
- // Duplicate the paired file descriptor later.
- //
- case redirect_type::merge: return nullfd;
-
- case redirect_type::file:
- {
- // For the cmp mode the user-provided path refers a content to
- // match against, rather than a content to be produced (as for
- // overwrite and append modes). And so for cmp mode we redirect
- // the process output to a temporary file.
- //
- p = r.file.mode == redirect_fmode::compare
- ? std_path (what)
- : normalize (r.file.path, sp, ll);
-
- m |= r.file.mode == redirect_fmode::append
- ? fdopen_mode::at_end
- : fdopen_mode::truncate;
-
- break;
- }
-
- case redirect_type::none:
- case redirect_type::here_str_literal:
- case redirect_type::here_doc_literal:
- case redirect_type::here_str_regex:
- case redirect_type::here_doc_regex:
- {
- p = std_path (what);
- m |= fdopen_mode::truncate;
- break;
- }
-
- case redirect_type::trace:
- case redirect_type::here_doc_ref: assert (false); break;
- }
-
- auto_fd fd;
-
- try
- {
- fd = fdopen (p, m);
-
- if ((m & fdopen_mode::at_end) != fdopen_mode::at_end)
- {
- if (rt == redirect_type::file)
- sp.clean ({cleanup_type::always, p}, true);
- else
- sp.clean_special (p);
- }
- }
- catch (const io_error& e)
- {
- fail (ll) << "unable to write to " << p << ": " << e;
- }
-
- return fd;
- };
-
- path osp;
- fdpipe ofd;
-
- // If this is the last command in the pipeline than redirect the
- // command process stdout to a file. Otherwise create a pipe and
- // redirect the stdout to the write-end of the pipe. The read-end will
- // be passed as stdin for the next command in the pipeline.
- //
- // @@ Shouldn't we allow the here-* and file output redirects for a
- // command with pipelined output? Say if such redirect is present
- // then the process output is redirected to a file first (as it is
- // when no output pipelined), and only after the process exit code
- // and the output are validated the next command in the pipeline is
- // executed taking the file as an input. This could be usefull for
- // test failures investigation and for tests "tightening".
- //
- if (last)
- ofd.out = open (out, 1, osp);
- else
- {
- assert (out.type == redirect_type::none); // No redirect expected.
- ofd = open_pipe ();
- }
-
- path esp;
- auto_fd efd (open (err, 2, esp));
-
- // Merge standard streams.
- //
- bool mo (out.type == redirect_type::merge);
- if (mo || err.type == redirect_type::merge)
- {
- auto_fd& self (mo ? ofd.out : efd);
- auto_fd& other (mo ? efd : ofd.out);
-
- try
- {
- assert (self.get () == -1 && other.get () != -1);
- self = fddup (other.get ());
- }
- catch (const io_error& e)
- {
- fail (ll) << "unable to duplicate " << (mo ? "stderr" : "stdout")
- << ": " << e;
- }
- }
-
- // All descriptors should be open to the date.
- //
- assert (ofd.out.get () != -1 && efd.get () != -1);
-
- optional<process_exit> exit;
- builtin_function* bf (builtins.find (program));
-
- bool success;
-
- auto process_args = [&c] () -> cstrings
- {
- cstrings args {c.program.string ().c_str ()};
-
- for (const auto& a: c.arguments)
- args.push_back (a.c_str ());
-
- args.push_back (nullptr);
- return args;
- };
-
- if (bf != nullptr)
- {
- // Execute the builtin.
- //
- if (verb >= 2)
- print_process (process_args ());
-
- // Some of the testscript builtins (cp, mkdir, etc) extend libbutl
- // builtins (via callbacks) registering/moving cleanups for the
- // filesystem entries they create/move, unless explicitly requested
- // not to do so via the --no-cleanup option.
- //
- // Let's "wrap up" the cleanup-related flags into the single object
- // to rely on "small function object" optimization.
- //
- struct cleanup
- {
- // Whether the cleanups are enabled for the builtin. Can be set to
- // false by the parse_option callback if --no-cleanup is
- // encountered.
- //
- bool enabled = true;
-
- // Whether to register cleanup for a filesystem entry being
- // created/updated depending on its existence. Calculated by the
- // create pre-hook and used by the subsequent post-hook.
- //
- bool add;
-
- // Whether to move existing cleanups for the filesystem entry
- // being moved, rather than to erase them. Calculated by the move
- // pre-hook and used by the subsequent post-hook.
- //
- bool move;
- };
-
- // nullopt if the builtin doesn't support cleanups.
- //
- optional<cleanup> cln;
-
- if (cleanup_builtin (program))
- cln = cleanup ();
-
- builtin_callbacks bcs {
-
- // create
- //
- // Unless cleanups are suppressed, test that the filesystem entry
- // doesn't exist (pre-hook) and, if that's the case, register the
- // cleanup for the newly created filesystem entry (post-hook).
- //
- [&sp, &cln] (const path& p, bool pre)
- {
- // Cleanups must be supported by a filesystem entry-creating
- // builtin.
- //
- assert (cln);
-
- if (cln->enabled)
- {
- if (pre)
- cln->add = !butl::entry_exists (p);
- else if (cln->add)
- sp.clean ({cleanup_type::always, p}, true /* implicit */);
- }
- },
-
- // move
- //
- // Validate the source and destination paths (pre-hook) and,
- // unless suppressed, adjust the cleanups that are sub-paths of
- // the source path (post-hook).
- //
- [&sp, &cln]
- (const path& from, const path& to, bool force, bool pre)
- {
- // Cleanups must be supported by a filesystem entry-moving
- // builtin.
- //
- assert (cln);
-
- if (pre)
- {
- const dir_path& wd (sp.wd_path);
- const dir_path& rwd (sp.root.wd_path);
-
- auto fail = [] (const string& d) {throw runtime_error (d);};
-
- if (!from.sub (rwd) && !force)
- fail ("'" + from.representation () +
- "' is out of working directory '" + rwd.string () +
- "'");
-
- auto check_wd = [&wd, fail] (const path& p)
- {
- if (wd.sub (path_cast<dir_path> (p)))
- fail ("'" + p.string () +
- "' contains test working directory '" +
- wd.string () + "'");
- };
-
- check_wd (from);
- check_wd (to);
-
- // Unless cleanups are disabled, "move" the matching cleanups
- // if the destination path doesn't exist and it is a sub-path
- // of the working directory and just remove them otherwise.
- //
- if (cln->enabled)
- cln->move = !butl::entry_exists (to) && to.sub (rwd);
- }
- else if (cln->enabled)
- {
- // Move or remove the matching cleanups (see above).
- //
- // Note that it's not enough to just change the cleanup paths.
- // We also need to make sure that these cleanups happen before
- // the destination directory (or any of its parents) cleanup,
- // that is potentially registered. To achieve that we can just
- // relocate these cleanup entries to the end of the list,
- // preserving their mutual order. Remember that cleanups in
- // the list are executed in the reversed order.
- //
- cleanups cs;
-
- // Remove the source path sub-path cleanups from the list,
- // adjusting/caching them if required (see above).
- //
- for (auto i (sp.cleanups.begin ()); i != sp.cleanups.end (); )
- {
- build2::test::script::cleanup& c (*i);
- path& p (c.path);
-
- if (p.sub (from))
- {
- if (cln->move)
- {
- // Note that we need to preserve the cleanup path
- // trailing separator which indicates the removal
- // method. Also note that leaf(), in particular, does
- // that.
- //
- p = p != from
- ? to / p.leaf (path_cast<dir_path> (from))
- : p.to_directory ()
- ? path_cast<dir_path> (to)
- : to;
-
- cs.push_back (move (c));
- }
-
- i = sp.cleanups.erase (i);
- }
- else
- ++i;
- }
-
- // Re-insert the adjusted cleanups at the end of the list.
- //
- sp.cleanups.insert (sp.cleanups.end (),
- make_move_iterator (cs.begin ()),
- make_move_iterator (cs.end ()));
-
- }
- },
-
- // remove
- //
- // Validate the filesystem entry path (pre-hook).
- //
- [&sp] (const path& p, bool force, bool pre)
- {
- if (pre)
- {
- const dir_path& wd (sp.wd_path);
- const dir_path& rwd (sp.root.wd_path);
-
- auto fail = [] (const string& d) {throw runtime_error (d);};
-
- if (!p.sub (rwd) && !force)
- fail ("'" + p.representation () +
- "' is out of working directory '" + rwd.string () +
- "'");
-
- if (wd.sub (path_cast<dir_path> (p)))
- fail ("'" + p.string () +
- "' contains test working directory '" + wd.string () +
- "'");
- }
- },
-
- // parse_option
- //
- [&cln] (const strings& args, size_t i)
- {
- // Parse --no-cleanup, if it is supported by the builtin.
- //
- if (cln && args[i] == "--no-cleanup")
- {
- cln->enabled = false;
- return 1;
- }
-
- return 0;
- },
-
- // sleep
- //
- // Deactivate the thread before going to sleep.
- //
- [&sp] (const duration& d)
- {
- // If/when required we could probably support the precise sleep
- // mode (e.g., via an option).
- //
- sp.root.test_target.ctx.sched.sleep (d);
- }
- };
-
- try
- {
- uint8_t r; // Storage.
- builtin b (bf (r,
- c.arguments,
- move (ifd), move (ofd.out), move (efd),
- sp.wd_path,
- bcs));
-
- success = run_pipe (sp,
- nc,
- ec,
- move (ofd.in),
- ci + 1, li, ll, diag);
-
- exit = process_exit (b.wait ());
- }
- catch (const system_error& e)
- {
- fail (ll) << "unable to execute " << c.program << " builtin: "
- << e << endf;
- }
- }
- else
- {
- // Execute the process.
- //
- cstrings args (process_args ());
-
- // Resolve the relative not simple program path against the scope's
- // working directory. The simple one will be left for the process
- // path search machinery. Also strip the potential leading `^`,
- // indicating that this is an external program rather than a
- // builtin.
- //
- path p;
-
- try
- {
- p = path (args[0]);
-
- if (p.relative ())
- {
- auto program = [&p, &args] (path pp)
- {
- p = move (pp);
- args[0] = p.string ().c_str ();
- };
-
- if (p.simple ())
- {
- const string& s (p.string ());
-
- // Don't end up with an empty path.
- //
- if (s.size () > 1 && s[0] == '^')
- program (path (s, 1, s.size () - 1));
- }
- else
- program (sp.wd_path / p);
- }
- }
- catch (const invalid_path& e)
- {
- fail (ll) << "invalid program path " << e.path;
- }
-
- try
- {
- process_path pp (process::path_search (args[0]));
-
- // Note: the builtin-escaping character '^' is not printed.
- //
- if (verb >= 2)
- print_process (args);
-
- process pr (
- pp,
- args.data (),
- {ifd.get (), -1}, process::pipe (ofd), {-1, efd.get ()},
- sp.wd_path.string ().c_str ());
-
- ifd.reset ();
- ofd.out.reset ();
- efd.reset ();
-
- success = run_pipe (sp,
- nc,
- ec,
- move (ofd.in),
- ci + 1, li, ll, diag);
-
- pr.wait ();
-
- exit = move (pr.exit);
- }
- catch (const process_error& e)
- {
- error (ll) << "unable to execute " << args[0] << ": " << e;
-
- if (e.child)
- std::exit (1);
-
- throw failed ();
- }
- }
-
- assert (exit);
-
- // If the righ-hand side pipeline failed than the whole pipeline fails,
- // and no further checks are required.
- //
- if (!success)
- return false;
-
- const path& pr (c.program);
-
- // If there is no valid exit code available by whatever reason then we
- // print the proper diagnostics, dump stderr (if cached and not too
- // large) and fail the whole test. Otherwise if the exit code is not
- // correct then we print diagnostics if requested and fail the
- // pipeline.
- //
- bool valid (exit->normal ());
-
- // On Windows the exit code can be out of the valid codes range being
- // defined as uint16_t.
- //
-#ifdef _WIN32
- if (valid)
- valid = exit->code () < 256;
-#endif
-
- success = valid && eq == (exit->code () == c.exit.code);
-
- if (!valid || (!success && diag))
- {
- // In the presense of a valid exit code we print the diagnostics and
- // return false rather than throw.
- //
- diag_record d (valid ? error (ll) : fail (ll));
-
- if (!exit->normal ())
- d << pr << " " << *exit;
- else
- {
- uint16_t ec (exit->code ()); // Make sure is printed as integer.
-
- if (!valid)
- d << pr << " exit code " << ec << " out of 0-255 range";
- else if (!success)
- {
- if (diag)
- d << pr << " exit code " << ec << (eq ? " != " : " == ")
- << static_cast<uint16_t> (c.exit.code);
- }
- else
- assert (false);
- }
-
- if (non_empty (esp, ll))
- d << info << "stderr: " << esp;
-
- if (non_empty (osp, ll))
- d << info << "stdout: " << osp;
-
- if (non_empty (isp, ll))
- d << info << "stdin: " << isp;
-
- // Print cached stderr.
- //
- print_file (d, esp, ll);
- }
-
- // If exit code is correct then check if the standard outputs match the
- // expectations. Note that stdout is only redirected to file for the
- // last command in the pipeline.
- //
- // The thinking behind matching stderr first is that if it mismatches,
- // then the program probably misbehaves (executes wrong functionality,
- // etc) in which case its stdout doesn't really matter.
- //
- if (success)
- success =
- check_output (pr, esp, isp, err, ll, sp, diag, "stderr") &&
- (!last ||
- check_output (pr, osp, isp, out, ll, sp, diag, "stdout"));
-
- return success;
- }
-
- static bool
- run_expr (scope& sp,
- const command_expr& expr,
- size_t li, const location& ll,
- bool diag)
- {
- // Print test id once per test expression.
- //
- auto df = make_diag_frame (
- [&sp](const diag_record& dr)
- {
- // Let's not depend on how the path representation can be improved
- // for readability on printing.
- //
- dr << info << "test id: " << sp.id_path.posix_string ();
- });
-
- // Commands are numbered sequentially throughout the expression
- // starting with 1. Number 0 means the command is a single one.
- //
- size_t ci (expr.size () == 1 && expr.back ().pipe.size () == 1
- ? 0
- : 1);
-
- // If there is no ORs to the right of a pipe then the pipe failure is
- // fatal for the whole expression. In particular, the pipe must print
- // the diagnostics on failure (if generally allowed). So we find the
- // pipe that "switches on" the diagnostics potential printing.
- //
- command_expr::const_iterator trailing_ands; // Undefined if diag is
- // disallowed.
- if (diag)
- {
- auto i (expr.crbegin ());
- for (; i != expr.crend () && i->op == expr_operator::log_and; ++i) ;
- trailing_ands = i.base ();
- }
-
- bool r (false);
- bool print (false);
-
- for (auto b (expr.cbegin ()), i (b), e (expr.cend ()); i != e; ++i)
- {
- if (diag && i + 1 == trailing_ands)
- print = true;
-
- const command_pipe& p (i->pipe);
- bool or_op (i->op == expr_operator::log_or);
-
- // Short-circuit if the pipe result must be OR-ed with true or AND-ed
- // with false.
- //
- if (!((or_op && r) || (!or_op && !r)))
- r = run_pipe (
- sp, p.begin (), p.end (), auto_fd (), ci, li, ll, print);
-
- ci += p.size ();
- }
-
- return r;
+ ? *sp.parent->work_dir.path
+ : sp.work_dir.path->directory ());
}
void default_runner::
run (scope& sp,
const command_expr& expr, command_type ct,
- size_t li,
- const location& ll)
+ size_t li, const location& ll)
{
// Noop for teardown commands if keeping tests output is requested.
//
@@ -2085,8 +144,18 @@ namespace build2
text << ": " << c << expr;
}
- if (!run_expr (sp, expr, li, ll, true))
- throw failed (); // Assume diagnostics is already printed.
+ // Print test id once per test expression.
+ //
+ auto df = make_diag_frame (
+ [&sp](const diag_record& dr)
+ {
+ // Let's not depend on how the path representation can be improved
+ // for readability on printing.
+ //
+ dr << info << "test id: " << sp.id_path.posix_string ();
+ });
+
+ build2::script::run (sp, expr, li, ll);
}
bool default_runner::
@@ -2097,7 +166,18 @@ namespace build2
if (verb >= 3)
text << ": ?" << expr;
- return run_expr (sp, expr, li, ll, false);
+ // Print test id once per test expression.
+ //
+ auto df = make_diag_frame (
+ [&sp](const diag_record& dr)
+ {
+ // Let's not depend on how the path representation can be improved
+ // for readability on printing.
+ //
+ dr << info << "test id: " << sp.id_path.posix_string ();
+ });
+
+ return build2::script::run_if (sp, expr, li, ll);
}
}
}
diff --git a/libbuild2/test/script/runner.hxx b/libbuild2/test/script/runner.hxx
index af37f56..22cae4e 100644
--- a/libbuild2/test/script/runner.hxx
+++ b/libbuild2/test/script/runner.hxx
@@ -7,6 +7,8 @@
#include <libbuild2/types.hxx>
#include <libbuild2/utility.hxx>
+#include <libbuild2/script/run.hxx> // exit
+
#include <libbuild2/test/script/script.hxx>
namespace build2
@@ -17,18 +19,7 @@ namespace build2
namespace script
{
- // An exception that can be thrown by a runner to exit the scope (for
- // example, as a result of executing the exit builtin). The status
- // indicates whether the scope should be considered to have succeeded
- // or failed.
- //
- struct exit_scope
- {
- bool status;
-
- explicit
- exit_scope (bool s): status (s) {}
- };
+ using exit_scope = build2::script::exit;
class runner
{
diff --git a/libbuild2/test/script/script.cxx b/libbuild2/test/script/script.cxx
index 79b8bca..34d4723 100644
--- a/libbuild2/test/script/script.cxx
+++ b/libbuild2/test/script/script.cxx
@@ -8,6 +8,8 @@
#include <libbuild2/target.hxx>
#include <libbuild2/algorithm.hxx>
+#include <libbuild2/test/script/parser.hxx>
+
using namespace std;
namespace build2
@@ -16,414 +18,60 @@ namespace build2
{
namespace script
{
- ostream&
- operator<< (ostream& o, line_type lt)
- {
- const char* s (nullptr);
-
- switch (lt)
- {
- case line_type::var: s = "variable"; break;
- case line_type::cmd: s = "command"; break;
- case line_type::cmd_if: s = "'if'"; break;
- case line_type::cmd_ifn: s = "'if!'"; break;
- case line_type::cmd_elif: s = "'elif'"; break;
- case line_type::cmd_elifn: s = "'elif!'"; break;
- case line_type::cmd_else: s = "'else'"; break;
- case line_type::cmd_end: s = "'end'"; break;
- }
-
- return o << s;
- }
-
- // Quote if empty or contains spaces or any of the special characters.
- // Note that we use single quotes since double quotes still allow
- // expansion.
- //
- // @@ What if it contains single quotes?
- //
- static void
- to_stream_q (ostream& o, const string& s)
- {
- if (s.empty () || s.find_first_of (" |&<>=\\\"") != string::npos)
- o << '\'' << s << '\'';
- else
- o << s;
- };
-
- void
- to_stream (ostream& o, const command& c, command_to_stream m)
- {
- auto print_path = [&o] (const path& p)
- {
- using build2::operator<<;
-
- ostringstream s;
- stream_verb (s, stream_verb (o));
- s << p;
-
- to_stream_q (o, s.str ());
- };
-
- auto print_redirect =
- [&o, print_path] (const redirect& r, const char* prefix)
- {
- o << ' ' << prefix;
-
- size_t n (string::traits_type::length (prefix));
- assert (n > 0);
-
- char d (prefix[n - 1]); // Redirect direction.
-
- switch (r.type)
- {
- case redirect_type::none: assert (false); break;
- case redirect_type::pass: o << '|'; break;
- case redirect_type::null: o << '-'; break;
- case redirect_type::trace: o << '!'; break;
- case redirect_type::merge: o << '&' << r.fd; break;
-
- case redirect_type::here_str_literal:
- case redirect_type::here_doc_literal:
- {
- bool doc (r.type == redirect_type::here_doc_literal);
-
- // For here-document add another '>' or '<'. Note that here end
- // marker never needs to be quoted.
- //
- if (doc)
- o << d;
-
- o << r.modifiers;
-
- if (doc)
- o << r.end;
- else
- {
- const string& v (r.str);
- to_stream_q (o,
- r.modifiers.find (':') == string::npos
- ? string (v, 0, v.size () - 1) // Strip newline.
- : v);
- }
-
- break;
- }
-
- case redirect_type::here_str_regex:
- case redirect_type::here_doc_regex:
- {
- bool doc (r.type == redirect_type::here_doc_regex);
-
- // For here-document add another '>' or '<'. Note that here end
- // marker never needs to be quoted.
- //
- if (doc)
- o << d;
-
- o << r.modifiers;
-
- const regex_lines& re (r.regex);
-
- if (doc)
- o << re.intro + r.end + re.intro + re.flags;
- else
- {
- assert (!re.lines.empty ()); // Regex can't be empty.
-
- regex_line l (re.lines[0]);
- to_stream_q (o, re.intro + l.value + re.intro + l.flags);
- }
-
- break;
- }
-
- case redirect_type::file:
- {
- // For stdin or stdout-comparison redirect add '>>' or '<<' (and
- // so make it '<<<' or '>>>'). Otherwise add '+' or '=' (and so
- // make it '>+' or '>=').
- //
- if (d == '<' || r.file.mode == redirect_fmode::compare)
- o << d << d;
- else
- o << (r.file.mode == redirect_fmode::append ? '+' : '=');
-
- print_path (r.file.path);
- break;
- }
-
- case redirect_type::here_doc_ref: assert (false); break;
- }
- };
-
- auto print_doc = [&o] (const redirect& r)
- {
- o << endl;
-
- if (r.type == redirect_type::here_doc_literal)
- o << r.str;
- else
- {
- assert (r.type == redirect_type::here_doc_regex);
-
- const regex_lines& rl (r.regex);
-
- for (auto b (rl.lines.cbegin ()), i (b), e (rl.lines.cend ());
- i != e; ++i)
- {
- if (i != b)
- o << endl;
-
- const regex_line& l (*i);
-
- if (l.regex) // Regex (possibly empty),
- o << rl.intro << l.value << rl.intro << l.flags;
- else if (!l.special.empty ()) // Special literal.
- o << rl.intro;
- else // Textual literal.
- o << l.value;
-
- o << l.special;
- }
- }
-
- o << (r.modifiers.find (':') == string::npos ? "" : "\n") << r.end;
- };
-
- if ((m & command_to_stream::header) == command_to_stream::header)
- {
- // Program.
- //
- to_stream_q (o, c.program.string ());
-
- // Arguments.
- //
- for (const string& a: c.arguments)
- {
- o << ' ';
- to_stream_q (o, a);
- }
-
- // Redirects.
- //
- if (c.in.effective ().type != redirect_type::none)
- print_redirect (c.in.effective (), "<");
-
- if (c.out.effective ().type != redirect_type::none)
- print_redirect (c.out.effective (), ">");
-
- if (c.err.effective ().type != redirect_type::none)
- print_redirect (c.err.effective (), "2>");
-
- for (const auto& p: c.cleanups)
- {
- o << " &";
-
- if (p.type != cleanup_type::always)
- o << (p.type == cleanup_type::maybe ? '?' : '!');
-
- print_path (p.path);
- }
-
- if (c.exit.comparison != exit_comparison::eq || c.exit.code != 0)
- {
- switch (c.exit.comparison)
- {
- case exit_comparison::eq: o << " == "; break;
- case exit_comparison::ne: o << " != "; break;
- }
-
- o << static_cast<uint16_t> (c.exit.code);
- }
- }
-
- if ((m & command_to_stream::here_doc) == command_to_stream::here_doc)
- {
- // Here-documents.
- //
- if (c.in.type == redirect_type::here_doc_literal ||
- c.in.type == redirect_type::here_doc_regex)
- print_doc (c.in);
-
- if (c.out.type == redirect_type::here_doc_literal ||
- c.out.type == redirect_type::here_doc_regex)
- print_doc (c.out);
-
- if (c.err.type == redirect_type::here_doc_literal ||
- c.err.type == redirect_type::here_doc_regex)
- print_doc (c.err);
- }
- }
-
- void
- to_stream (ostream& o, const command_pipe& p, command_to_stream m)
- {
- if ((m & command_to_stream::header) == command_to_stream::header)
- {
- for (auto b (p.begin ()), i (b); i != p.end (); ++i)
- {
- if (i != b)
- o << " | ";
-
- to_stream (o, *i, command_to_stream::header);
- }
- }
-
- if ((m & command_to_stream::here_doc) == command_to_stream::here_doc)
- {
- for (const command& c: p)
- to_stream (o, c, command_to_stream::here_doc);
- }
- }
-
- void
- to_stream (ostream& o, const command_expr& e, command_to_stream m)
- {
- if ((m & command_to_stream::header) == command_to_stream::header)
- {
- for (auto b (e.begin ()), i (b); i != e.end (); ++i)
- {
- if (i != b)
- {
- switch (i->op)
- {
- case expr_operator::log_or: o << " || "; break;
- case expr_operator::log_and: o << " && "; break;
- }
- }
-
- to_stream (o, i->pipe, command_to_stream::header);
- }
- }
-
- if ((m & command_to_stream::here_doc) == command_to_stream::here_doc)
- {
- for (const expr_term& t: e)
- to_stream (o, t.pipe, command_to_stream::here_doc);
- }
- }
-
- // redirect
+ // scope_base
//
- redirect::
- redirect (redirect_type t)
- : type (t)
+ scope_base::
+ scope_base (script& s)
+ : root (s),
+ vars (s.test_target.ctx, false /* global */)
{
- switch (type)
- {
- case redirect_type::none:
- case redirect_type::pass:
- case redirect_type::null:
- case redirect_type::trace:
- case redirect_type::merge: break;
-
- case redirect_type::here_str_literal:
- case redirect_type::here_doc_literal: new (&str) string (); break;
-
- case redirect_type::here_str_regex:
- case redirect_type::here_doc_regex:
- {
- new (&regex) regex_lines ();
- break;
- }
-
- case redirect_type::file: new (&file) file_type (); break;
-
- case redirect_type::here_doc_ref: assert (false); break;
- }
+ vars.assign (root.wd_var) = dir_path ();
}
- redirect::
- redirect (redirect&& r)
- : type (r.type),
- modifiers (move (r.modifiers)),
- end (move (r.end)),
- end_line (r.end_line),
- end_column (r.end_column)
+ const dir_path* scope_base::
+ wd_path () const
{
- switch (type)
- {
- case redirect_type::none:
- case redirect_type::pass:
- case redirect_type::null:
- case redirect_type::trace: break;
-
- case redirect_type::merge: fd = r.fd; break;
-
- case redirect_type::here_str_literal:
- case redirect_type::here_doc_literal:
- {
- new (&str) string (move (r.str));
- break;
- }
- case redirect_type::here_str_regex:
- case redirect_type::here_doc_regex:
- {
- new (&regex) regex_lines (move (r.regex));
- break;
- }
- case redirect_type::file:
- {
- new (&file) file_type (move (r.file));
- break;
- }
- case redirect_type::here_doc_ref:
- {
- new (&ref) reference_wrapper<const redirect> (r.ref);
- break;
- }
- }
+ return &cast<dir_path> (vars[root.wd_var]);
}
- redirect::
- ~redirect ()
+ const target_triplet& scope_base::
+ test_tt () const
{
- switch (type)
- {
- case redirect_type::none:
- case redirect_type::pass:
- case redirect_type::null:
- case redirect_type::trace:
- case redirect_type::merge: break;
+ if (auto r =
+ cast_null<target_triplet> (root.test_target["test.target"]))
+ return *r;
- case redirect_type::here_str_literal:
- case redirect_type::here_doc_literal: str.~string (); break;
-
- case redirect_type::here_str_regex:
- case redirect_type::here_doc_regex: regex.~regex_lines (); break;
-
- case redirect_type::file: file.~file_type (); break;
-
- case redirect_type::here_doc_ref:
- {
- ref.~reference_wrapper<const redirect> ();
- break;
- }
- }
- }
-
- redirect& redirect::
- operator= (redirect&& r)
- {
- if (this != &r)
- {
- this->~redirect ();
- new (this) redirect (move (r)); // Assume noexcept move-constructor.
- }
- return *this;
+ // We set it to default value in init() so it can only be NULL if the
+ // user resets it.
+ //
+ fail << "invalid test.target value" << endf;
}
// scope
//
+ static const optional<string> wd_name ("test working directory");
+ static const optional<string> sd_name ("working directory");
+
scope::
scope (const string& id, scope* p, script& r)
- : parent (p),
- root (r),
- vars (r.test_target.ctx, false /* global */),
- id_path (cast<path> (assign (root.id_var) = path ())),
- wd_path (cast<dir_path> (assign (root.wd_var) = dir_path ()))
-
+ : scope_base (r),
+ //
+ // Note that root.work_dir is not yet constructed if we are
+ // creating the root scope (p is NULL). Also note that
+ // root.test_target is always constructed to date.
+ //
+ environment (root.test_target.ctx,
+ test_tt (),
+ dir_name_view (wd_path (), &wd_name),
+ dir_name_view (
+ p != nullptr ? root.work_dir.path : wd_path (),
+ &sd_name),
+ *wd_path (), true /* temp_dir_keep */,
+ redirect (redirect_type::none),
+ redirect (redirect_type::none),
+ redirect (redirect_type::none)),
+ parent (p),
+ id_path (cast<path> (assign (root.id_var) = path ()))
{
// Construct the id_path as a string to ensure POSIX form. In fact,
// the only reason we keep it as a path is to be able to easily get id
@@ -443,38 +91,64 @@ namespace build2
// (handled in an ad hoc way).
//
if (p != nullptr)
- const_cast<dir_path&> (wd_path) = dir_path (p->wd_path) /= id;
+ const_cast<dir_path&> (*work_dir.path) =
+ dir_path (*p->work_dir.path) /= id;
}
void scope::
- clean (cleanup c, bool implicit)
+ set_variable (string&& nm,
+ names&& val,
+ const string& attrs,
+ const location& ll)
{
- using std::find; // Hidden by scope::find().
+ // Check if we are trying to modify any of the special variables.
+ //
+ if (parser::special_variable (nm))
+ fail (ll) << "attempt to set '" << nm << "' variable directly";
+
+ // Set the variable value and attributes. Note that we need to aquire
+ // unique lock before potentially changing the script's variable
+ // pool. The obtained variable reference can safelly be used with no
+ // locking as the variable pool is an associative container
+ // (underneath) and we are only adding new variables into it.
+ //
+ ulock ul (root.var_pool_mutex);
+ const variable& var (root.var_pool.insert (move (nm)));
+ ul.unlock ();
- assert (!implicit || c.type == cleanup_type::always);
+ value& lhs (assign (var));
- const path& p (c.path);
- if (!p.sub (root.wd_path))
+ // If there are no attributes specified then the variable assignment
+ // is straightforward. Otherwise we will use the build2 parser helper
+ // function.
+ //
+ if (attrs.empty ())
+ lhs.assign (move (val), &var);
+ else
{
- if (implicit)
- return;
- else
- assert (false); // Error so should have been checked.
+ // If there is an error in the attributes string, our diagnostics
+ // will look like this:
+ //
+ // <attributes>:1:1 error: unknown value attribute x
+ // testscript:10:1 info: while parsing attributes '[x]'
+ //
+ // Note that the attributes parsing error is the only reason for a
+ // failure.
+ //
+ auto df = make_diag_frame (
+ [attrs, &ll](const diag_record& dr)
+ {
+ dr << info (ll) << "while parsing attributes '" << attrs << "'";
+ });
+
+ parser p (context);
+ p.apply_value_attributes (&var,
+ lhs,
+ value (move (val)),
+ attrs,
+ token_type::assign,
+ path_name ("<attributes>"));
}
-
- auto pr = [&p] (const cleanup& v) -> bool {return v.path == p;};
- auto i (find_if (cleanups.begin (), cleanups.end (), pr));
-
- if (i == cleanups.end ())
- cleanups.emplace_back (move (c));
- else if (!implicit)
- i->type = c.type;
- }
-
- void scope::
- clean_special (path p)
- {
- special_cleanups.emplace_back (move (p));
}
// script_base
@@ -523,7 +197,8 @@ namespace build2
// Set the script working dir ($~) to $out_base/test/<id> (id_path
// for root is just the id which is empty if st is 'testscript').
//
- const_cast<dir_path&> (wd_path) = dir_path (rwd) /= id_path.string ();
+ const_cast<dir_path&> (*work_dir.path) =
+ dir_path (rwd) /= id_path.string ();
// Set the test variable at the script level. We do it even if it's
// set in the buildfile since they use different types.
@@ -625,7 +300,6 @@ namespace build2
return lookup_in_buildfile (var.name);
}
-
lookup scope::
lookup_in_buildfile (const string& n, bool target_only) const
{
@@ -634,7 +308,7 @@ namespace build2
// in parallel). Plus, if there is no such variable, then we cannot
// possibly find any value.
//
- const variable* pvar (root.test_target.ctx.var_pool.find (n));
+ const variable* pvar (context.var_pool.find (n));
if (pvar == nullptr)
return lookup_type ();
diff --git a/libbuild2/test/script/script.hxx b/libbuild2/test/script/script.hxx
index a28ef25..6356501 100644
--- a/libbuild2/test/script/script.hxx
+++ b/libbuild2/test/script/script.hxx
@@ -12,9 +12,9 @@
#include <libbuild2/variable.hxx>
-#include <libbuild2/test/target.hxx>
+#include <libbuild2/script/script.hxx>
-#include <libbuild2/test/script/token.hxx> // replay_tokens
+#include <libbuild2/test/target.hxx>
namespace build2
{
@@ -22,295 +22,14 @@ namespace build2
{
namespace script
{
- class parser; // Required by VC for 'friend class parser' declaration.
-
- // Pre-parse representation.
- //
-
- enum class line_type
- {
- var,
- cmd,
- cmd_if,
- cmd_ifn,
- cmd_elif,
- cmd_elifn,
- cmd_else,
- cmd_end
- };
-
- ostream&
- operator<< (ostream&, line_type);
-
- struct line
- {
- line_type type;
- replay_tokens tokens;
-
- union
- {
- const variable* var; // Pre-entered for line_type::var.
- };
- };
-
- // Most of the time we will have just one line (test command).
- //
- using lines = small_vector<line, 1>;
-
- // Parse object model.
- //
-
- // redirect
- //
- enum class redirect_type
- {
- none,
- pass,
- null,
- trace,
- merge,
- here_str_literal,
- here_str_regex,
- here_doc_literal,
- here_doc_regex,
- here_doc_ref, // Reference to here_doc literal or regex.
- file,
- };
-
- // Pre-parsed (but not instantiated) regex lines. The idea here is that
- // we should be able to re-create their (more or less) exact text
- // representation for diagnostics but also instantiate without any
- // re-parsing.
- //
- struct regex_line
- {
- // If regex is true, then value is the regex expression. Otherwise, it
- // is a literal. Note that special characters can be present in both
- // cases. For example, //+ is a regex, while /+ is a literal, both
- // with '+' as a special character. Flags are only valid for regex.
- // Literals falls apart into textual (has no special characters) and
- // special (has just special characters instead) ones. For example
- // foo is a textual literal, while /.+ is a special one. Note that
- // literal must not have value and special both non-empty.
- //
- bool regex;
-
- string value;
- string flags;
- string special;
-
- uint64_t line;
- uint64_t column;
-
- // Create regex with optional special characters.
- //
- regex_line (uint64_t l, uint64_t c,
- string v, string f, string s = string ())
- : regex (true),
- value (move (v)),
- flags (move (f)),
- special (move (s)),
- line (l),
- column (c) {}
-
- // Create a literal, either text or special.
- //
- regex_line (uint64_t l, uint64_t c, string v, bool s)
- : regex (false),
- value (s ? string () : move (v)),
- special (s ? move (v) : string ()),
- line (l),
- column (c) {}
- };
-
- struct regex_lines
- {
- char intro; // Introducer character.
- string flags; // Global flags (here-document).
-
- small_vector<regex_line, 8> lines;
- };
-
- // Output file redirect mode.
- //
- enum class redirect_fmode
- {
- compare,
- overwrite,
- append
- };
-
- struct redirect
- {
- redirect_type type;
-
- struct file_type
- {
- using path_type = build2::path;
- path_type path;
- redirect_fmode mode; // Meaningless for input redirect.
- };
-
- union
- {
- int fd; // Merge-to descriptor.
- string str; // Note: with trailing newline, if requested.
- regex_lines regex; // Note: with trailing blank, if requested.
- file_type file;
- reference_wrapper<const redirect> ref; // Note: no chains.
- };
-
- string modifiers; // Redirect modifiers.
- string end; // Here-document end marker (no regex intro/flags).
- uint64_t end_line; // Here-document end marker location.
- uint64_t end_column;
-
- // Create redirect of a type other than reference.
- //
- explicit
- redirect (redirect_type = redirect_type::none);
-
- // Create redirect of the reference type.
- //
- redirect (redirect_type t, const redirect& r)
- : type (redirect_type::here_doc_ref), ref (r)
- {
- // There is no support (and need) for reference chains.
- //
- assert (t == redirect_type::here_doc_ref &&
- r.type != redirect_type::here_doc_ref);
- }
-
- // Move constuctible/assignable-only type.
- //
- redirect (redirect&&);
- redirect& operator= (redirect&&);
-
- ~redirect ();
-
- const redirect&
- effective () const noexcept
- {
- return type == redirect_type::here_doc_ref ? ref.get () : *this;
- }
- };
-
- // cleanup
- //
- enum class cleanup_type
- {
- always, // &foo - cleanup, fail if does not exist.
- maybe, // &?foo - cleanup, ignore if does not exist.
- never // &!foo - don’t cleanup, ignore if doesn’t exist.
- };
-
- // File or directory to be automatically cleaned up at the end of the
- // scope. If the path ends with a trailing slash, then it is assumed to
- // be a directory, otherwise -- a file. A directory that is about to be
- // cleaned up must be empty.
- //
- // The last component in the path may contain a wildcard that have the
- // following semantics:
- //
- // dir/* - remove all immediate files
- // dir/*/ - remove all immediate sub-directories (must be empty)
- // dir/** - remove all files recursively
- // dir/**/ - remove all sub-directories recursively (must be empty)
- // dir/*** - remove directory dir with all files and sub-directories
- // recursively
- //
- struct cleanup
- {
- cleanup_type type;
- build2::path path;
- };
- using cleanups = vector<cleanup>;
-
- // command_exit
- //
- enum class exit_comparison {eq, ne};
-
- struct command_exit
- {
- // C/C++ don't apply constraints on program exit code other than it
- // being of type int.
- //
- // POSIX specifies that only the least significant 8 bits shall be
- // available from wait() and waitpid(); the full value shall be
- // available from waitid() (read more at _Exit, _exit Open Group
- // spec).
- //
- // While the Linux man page for waitid() doesn't mention any
- // deviations from the standard, the FreeBSD implementation (as of
- // version 11.0) only returns 8 bits like the other wait*() calls.
- //
- // Windows supports 32-bit exit codes.
- //
- // Note that in shells some exit values can have special meaning so
- // using them can be a source of confusion. For bash values in the
- // [126, 255] range are such a special ones (see Appendix E, "Exit
- // Codes With Special Meanings" in the Advanced Bash-Scripting Guide).
- //
- exit_comparison comparison;
- uint8_t code;
- };
-
- // command
- //
- struct command
- {
- path program;
- strings arguments;
-
- redirect in;
- redirect out;
- redirect err;
-
- script::cleanups cleanups;
-
- command_exit exit {exit_comparison::eq, 0};
- };
-
- enum class command_to_stream: uint16_t
- {
- header = 0x01,
- here_doc = 0x02, // Note: printed on a new line.
- all = header | here_doc
- };
-
- void
- to_stream (ostream&, const command&, command_to_stream);
-
- ostream&
- operator<< (ostream&, const command&);
-
- // command_pipe
- //
- using command_pipe = vector<command>;
-
- void
- to_stream (ostream&, const command_pipe&, command_to_stream);
-
- ostream&
- operator<< (ostream&, const command_pipe&);
-
- // command_expr
- //
- enum class expr_operator {log_or, log_and};
-
- struct expr_term
- {
- expr_operator op; // OR-ed to an implied false for the first term.
- command_pipe pipe;
- };
-
- using command_expr = vector<expr_term>;
+ using build2::script::line;
+ using build2::script::lines;
+ using build2::script::redirect;
+ using build2::script::redirect_type;
+ using build2::script::line_type;
+ using build2::script::command_expr;
- void
- to_stream (ostream&, const command_expr&, command_to_stream);
-
- ostream&
- operator<< (ostream&, const command_expr&);
+ class parser; // Required by VC for 'friend class parser' declaration.
// command_type
//
@@ -335,31 +54,54 @@ namespace build2
//
class script;
+ class scope_base // Make sure certain things are initialized early.
+ {
+ public:
+ script& root; // Self for the root (script) scope.
+
+ // Note that if we pass the variable name as a string, then it will
+ // be looked up in the wrong pool.
+ //
+ variable_map vars;
+
+ protected:
+ scope_base (script&);
+
+ const dir_path*
+ wd_path () const;
+
+ const target_triplet&
+ test_tt () const;
+ };
+
enum class scope_state {unknown, passed, failed};
- class scope
+ class scope: public scope_base, public build2::script::environment
{
public:
scope* const parent; // NULL for the root (script) scope.
- script& root; // Self for the root (script) scope.
// The chain of if-else scope alternatives. See also if_cond_ below.
//
unique_ptr<scope> if_chain;
- // Note that if we pass the variable name as a string, then it will
- // be looked up in the wrong pool.
- //
- variable_map vars;
-
const path& id_path; // Id path ($@, relative in POSIX form).
- const dir_path& wd_path; // Working dir ($~, absolute and normalized).
optional<description> desc;
scope_state state = scope_state::unknown;
- test::script::cleanups cleanups;
- paths special_cleanups;
+
+ void
+ set_variable (string&& name,
+ names&&,
+ const string& attrs,
+ const location&) override;
+
+ // Noop since the temporary directory is a working directory and so
+ // is created before the scope commands execution.
+ //
+ virtual void
+ create_temp_dir () override {assert (false);};
// Variables.
//
@@ -382,17 +124,18 @@ namespace build2
lookup_in_buildfile (const string&, bool target_only = true) const;
// Return a value suitable for assignment. If the variable does not
- // exist in this scope's map, then a new one with the NULL value is
- // added and returned. Otherwise the existing value is returned.
+ // exist in this scope's variable map, then a new one with the NULL
+ // value is added and returned. Otherwise the existing value is
+ // returned.
//
value&
assign (const variable& var) {return vars.assign (var);}
// Return a value suitable for append/prepend. If the variable does
- // not exist in this scope's map, then outer scopes are searched for
- // the same variable. If found then a new variable with the found
- // value is added to this scope and returned. Otherwise this function
- // proceeds as assign() above.
+ // not exist in this scope's variable map, then outer scopes are
+ // searched for the same variable. If found then a new variable with
+ // the found value is added to this scope and returned. Otherwise this
+ // function proceeds as assign() above.
//
value&
append (const variable&);
@@ -402,27 +145,6 @@ namespace build2
void
reset_special ();
- // Cleanup.
- //
- public:
- // Register a cleanup. If the cleanup is explicit, then override the
- // cleanup type if this path is already registered. Ignore implicit
- // registration of a path outside script working directory.
- //
- void
- clean (cleanup, bool implicit);
-
- // Register cleanup of a special file. Such files are created to
- // maintain testscript machinery and must be removed first, not to
- // interfere with the user-defined wildcard cleanups.
- //
- void
- clean_special (path p);
-
- public:
- virtual
- ~scope () = default;
-
protected:
scope (const string& id, scope* parent, script& root);
@@ -567,6 +289,4 @@ namespace build2
}
}
-#include <libbuild2/test/script/script.ixx>
-
#endif // LIBBUILD2_TEST_SCRIPT_SCRIPT_HXX
diff --git a/libbuild2/test/script/script.ixx b/libbuild2/test/script/script.ixx
deleted file mode 100644
index 38cba29..0000000
--- a/libbuild2/test/script/script.ixx
+++ /dev/null
@@ -1,59 +0,0 @@
-// file : libbuild2/test/script/script.ixx -*- C++ -*-
-// license : MIT; see accompanying LICENSE file
-
-namespace build2
-{
- namespace test
- {
- namespace script
- {
- inline command_to_stream
- operator&= (command_to_stream& x, command_to_stream y)
- {
- return x = static_cast<command_to_stream> (
- static_cast<uint16_t> (x) & static_cast<uint16_t> (y));
- }
-
- inline command_to_stream
- operator|= (command_to_stream& x, command_to_stream y)
- {
- return x = static_cast<command_to_stream> (
- static_cast<uint16_t> (x) | static_cast<uint16_t> (y));
- }
-
- inline command_to_stream
- operator& (command_to_stream x, command_to_stream y) {return x &= y;}
-
- inline command_to_stream
- operator| (command_to_stream x, command_to_stream y) {return x |= y;}
-
-
- // command
- //
- inline ostream&
- operator<< (ostream& o, const command& c)
- {
- to_stream (o, c, command_to_stream::all);
- return o;
- }
-
- // command_pipe
- //
- inline ostream&
- operator<< (ostream& o, const command_pipe& p)
- {
- to_stream (o, p, command_to_stream::all);
- return o;
- }
-
- // command_expr
- //
- inline ostream&
- operator<< (ostream& o, const command_expr& e)
- {
- to_stream (o, e, command_to_stream::all);
- return o;
- }
- }
- }
-}
diff --git a/libbuild2/test/script/token.cxx b/libbuild2/test/script/token.cxx
index 85fbb06..efeb17b 100644
--- a/libbuild2/test/script/token.cxx
+++ b/libbuild2/test/script/token.cxx
@@ -12,43 +12,22 @@ namespace build2
namespace script
{
void
- token_printer (ostream& os, const token& t, bool d)
+ token_printer (ostream& os, const token& t, print_mode m)
{
- const string& v (t.value);
-
// Only quote non-name tokens for diagnostics.
//
- const char* q (d ? "'" : "");
+ const char* q (m == print_mode::diagnostics ? "'" : "");
switch (t.type)
{
- case token_type::semi: os << q << ';' << q; break;
-
- case token_type::dot: os << q << '.' << q; break;
-
- case token_type::plus: os << q << '+' << q; break;
- case token_type::minus: os << q << '-' << q; break;
-
- case token_type::clean: os << q << '&' << v << q; break;
- case token_type::pipe: os << q << '|' << q; break;
+ case token_type::semi: os << q << ';' << q; break;
- case token_type::in_pass: os << q << "<|" << q; break;
- case token_type::in_null: os << q << "<-" << q; break;
- case token_type::in_str: os << q << '<' << v << q; break;
- case token_type::in_doc: os << q << "<<" << v << q; break;
- case token_type::in_file: os << q << "<<<" << q; break;
+ case token_type::dot: os << q << '.' << q; break;
- case token_type::out_pass: os << q << ">|" << q; break;
- case token_type::out_null: os << q << ">-" << q; break;
- case token_type::out_trace: os << q << ">!" << q; break;
- case token_type::out_merge: os << q << ">&" << q; break;
- case token_type::out_str: os << q << '>' << v << q; break;
- case token_type::out_doc: os << q << ">>" << v << q; break;
- case token_type::out_file_cmp: os << q << ">>>" << v << q; break;
- case token_type::out_file_ovr: os << q << ">=" << v << q; break;
- case token_type::out_file_app: os << q << ">+" << v << q; break;
+ case token_type::plus: os << q << '+' << q; break;
+ case token_type::minus: os << q << '-' << q; break;
- default: build2::token_printer (os, t, d);
+ default: build2::script::token_printer (os, t, m);
}
}
}
diff --git a/libbuild2/test/script/token.hxx b/libbuild2/test/script/token.hxx
index 14be0a2..dead796 100644
--- a/libbuild2/test/script/token.hxx
+++ b/libbuild2/test/script/token.hxx
@@ -7,7 +7,7 @@
#include <libbuild2/types.hxx>
#include <libbuild2/utility.hxx>
-#include <libbuild2/token.hxx>
+#include <libbuild2/script/token.hxx>
namespace build2
{
@@ -15,9 +15,9 @@ namespace build2
{
namespace script
{
- struct token_type: build2::token_type
+ struct token_type: build2::script::token_type
{
- using base_type = build2::token_type;
+ using base_type = build2::script::token_type;
enum
{
@@ -28,35 +28,16 @@ namespace build2
dot, // .
plus, // +
- minus, // -
-
- pipe, // |
- clean, // &{?!} (modifiers in value)
-
- in_pass, // <|
- in_null, // <-
- in_str, // <{:} (modifiers in value)
- in_doc, // <<{:} (modifiers in value)
- in_file, // <<<
-
- out_pass, // >|
- out_null, // >-
- out_trace, // >!
- out_merge, // >&
- out_str, // >{:~} (modifiers in value)
- out_doc, // >>{:~} (modifiers in value)
- out_file_cmp, // >>>
- out_file_ovr, // >=
- out_file_app // >+
+ minus // -
};
token_type () = default;
token_type (value_type v): base_type (v) {}
- token_type (base_type v): base_type (v) {}
+ token_type (build2::token_type v): base_type (v) {}
};
void
- token_printer (ostream&, const token&, bool);
+ token_printer (ostream&, const token&, print_mode);
}
}
}
diff --git a/libbuild2/token.cxx b/libbuild2/token.cxx
index 4975a02..7ce85be 100644
--- a/libbuild2/token.cxx
+++ b/libbuild2/token.cxx
@@ -8,51 +8,82 @@ using namespace std;
namespace build2
{
void
- token_printer (ostream& os, const token& t, bool d)
+ token_printer (ostream& os, const token& t, print_mode m)
{
// Only quote non-name tokens for diagnostics.
//
- const char* q (d ? "'" : "");
+ const char* q (m == print_mode::diagnostics ? "'" : "");
+ bool r (m == print_mode::raw);
switch (t.type)
{
- case token_type::eos: os << "<end of file>"; break;
- case token_type::newline: os << "<newline>"; break;
- case token_type::pair_separator: os << "<pair separator " << t.value[0] << ">"; break;
- case token_type::word: os << '\'' << t.value << '\''; break;
-
- case token_type::colon: os << q << ':' << q; break;
- case token_type::dollar: os << q << '$' << q; break;
- case token_type::question: os << q << '?' << q; break;
- case token_type::comma: os << q << ',' << q; break;
-
- case token_type::lparen: os << q << '(' << q; break;
- case token_type::rparen: os << q << ')' << q; break;
-
- case token_type::lcbrace: os << q << '{' << q; break;
- case token_type::rcbrace: os << q << '}' << q; break;
-
- case token_type::lsbrace: os << q << '[' << q; break;
- case token_type::rsbrace: os << q << ']' << q; break;
-
- case token_type::labrace: os << q << '<' << q; break;
- case token_type::rabrace: os << q << '>' << q; break;
-
- case token_type::assign: os << q << '=' << q; break;
- case token_type::prepend: os << q << "=+" << q; break;
- case token_type::append: os << q << "+=" << q; break;
- case token_type::default_assign: os << q << "?=" << q; break;
-
- case token_type::equal: os << q << "==" << q; break;
- case token_type::not_equal: os << q << "!=" << q; break;
- case token_type::less: os << q << '<' << q; break;
- case token_type::greater: os << q << '>' << q; break;
- case token_type::less_equal: os << q << "<=" << q; break;
- case token_type::greater_equal: os << q << ">=" << q; break;
-
- case token_type::log_or: os << q << "||" << q; break;
- case token_type::log_and: os << q << "&&" << q; break;
- case token_type::log_not: os << q << '!' << q; break;
+ case token_type::eos:
+ {
+ if (!r)
+ os <<"<end of file>";
+
+ break;
+ }
+ case token_type::newline:
+ {
+ os << (r ? "\n" : "<newline>");
+ break;
+ }
+ case token_type::pair_separator:
+ {
+ if (r)
+ os << t.value[0];
+ else
+ os << "<pair separator " << t.value[0] << ">";
+
+ break;
+ }
+ case token_type::word:
+ {
+ if (r)
+ os << t.value;
+ else
+ os << '\'' << t.value << '\'';
+
+ break;
+ }
+
+ case token_type::colon: os << q << ':' << q; break;
+ case token_type::dollar: os << q << '$' << q; break;
+ case token_type::question: os << q << '?' << q; break;
+ case token_type::percent: os << q << '%' << q; break;
+ case token_type::comma: os << q << ',' << q; break;
+
+ case token_type::lparen: os << q << '(' << q; break;
+ case token_type::rparen: os << q << ')' << q; break;
+
+ case token_type::lcbrace: os << q << '{' << q; break;
+ case token_type::rcbrace: os << q << '}' << q; break;
+
+ case token_type::multi_lcbrace: os << q << t.value << q; break;
+ case token_type::multi_rcbrace: os << q << t.value << q; break;
+
+ case token_type::lsbrace: os << q << '[' << q; break;
+ case token_type::rsbrace: os << q << ']' << q; break;
+
+ case token_type::labrace: os << q << '<' << q; break;
+ case token_type::rabrace: os << q << '>' << q; break;
+
+ case token_type::assign: os << q << '=' << q; break;
+ case token_type::prepend: os << q << "=+" << q; break;
+ case token_type::append: os << q << "+=" << q; break;
+ case token_type::default_assign: os << q << "?=" << q; break;
+
+ case token_type::equal: os << q << "==" << q; break;
+ case token_type::not_equal: os << q << "!=" << q; break;
+ case token_type::less: os << q << '<' << q; break;
+ case token_type::greater: os << q << '>' << q; break;
+ case token_type::less_equal: os << q << "<=" << q; break;
+ case token_type::greater_equal: os << q << ">=" << q; break;
+
+ case token_type::log_or: os << q << "||" << q; break;
+ case token_type::log_and: os << q << "&&" << q; break;
+ case token_type::log_not: os << q << '!' << q; break;
default: assert (false); // Unhandled extended token.
}
diff --git a/libbuild2/token.hxx b/libbuild2/token.hxx
index e48c088..156e428 100644
--- a/libbuild2/token.hxx
+++ b/libbuild2/token.hxx
@@ -13,7 +13,6 @@
namespace build2
{
-
// Token type.
//
// A line consists of a sequence of words separated by separators and
@@ -36,6 +35,7 @@ namespace build2
colon, // :
dollar, // $
question, // ?
+ percent, // %
comma, // ,
lparen, // (
@@ -44,6 +44,9 @@ namespace build2
lcbrace, // {
rcbrace, // }
+ multi_lcbrace, // {{... (value contains the braces)
+ multi_rcbrace, // }}... (value contains the braces)
+
lsbrace, // [
rsbrace, // ]
@@ -85,20 +88,37 @@ namespace build2
class token;
+ enum class print_mode
+ {
+ // Print eos, newline, and pair separator in the <name> form and other
+ // tokens as literals, single-quoting the word token.
+ //
+ normal,
+
+ // Same as normal but all literals are quoted.
+ //
+ diagnostics,
+
+ // Print all tokens as literals with newline represented as '\n' and eos
+ // as an empty string.
+ //
+ raw
+ };
+
LIBBUILD2_SYMEXPORT void
- token_printer (ostream&, const token&, bool);
+ token_printer (ostream&, const token&, print_mode);
class token
{
public:
- using printer_type = void (ostream&, const token&, bool diag);
+ using printer_type = void (ostream&, const token&, print_mode);
token_type type;
bool separated; // Whitespace-separated from the previous token.
// Quoting can be complete, where the token starts and ends with the quote
// characters and quoting is contiguous or partial where only some part(s)
- // of the token are quoted or quoting continus to the next token.
+ // of the token are quoted or quoting continues to the next token.
//
quote_type qtype;
bool qcomp;
@@ -146,7 +166,25 @@ namespace build2
// Output the token value in a format suitable for diagnostics.
//
inline ostream&
- operator<< (ostream& o, const token& t) {t.printer (o, t, true); return o;}
+ operator<< (ostream& o, const token& t)
+ {
+ t.printer (o, t, print_mode::diagnostics);
+ return o;
+ }
+
+ // Note: these are currently only used for sanity checks.
+ //
+ inline bool
+ operator== (const token& x, const token& y)
+ {
+ return x.type == y.type && x.value == y.value;
+ }
+
+ inline bool
+ operator!= (const token& x, const token& y)
+ {
+ return !(x == y);
+ }
// Context-dependent lexing (see lexer_mode for details).
//
diff --git a/libbuild2/types.hxx b/libbuild2/types.hxx
index d20fa22..6582c3a 100644
--- a/libbuild2/types.hxx
+++ b/libbuild2/types.hxx
@@ -230,6 +230,7 @@ namespace build2
using butl::path_name_view;
using butl::path_name_value;
using butl::dir_path;
+ using butl::dir_name_view;
using butl::path_cast;
using butl::basic_path;
using butl::invalid_path;
@@ -347,6 +348,12 @@ namespace build2
location (uint64_t l, uint64_t c): line (l), column (c) {}
};
+ // Print in the <file>:<line>:<column> form with 0 lines/columns not
+ // printed. Nothing is printed for an empty location.
+ //
+ ostream&
+ operator<< (ostream&, const location&);
+
// Similar (and implicit-convertible) to the above but stores a copy of the
// path.
//
@@ -371,7 +378,6 @@ namespace build2
LIBBUILD2_SYMEXPORT ostream&
operator<< (ostream&, run_phase); // utility.cxx
-
}
// In order to be found (via ADL) these have to be either in std:: or in
diff --git a/libbuild2/types.ixx b/libbuild2/types.ixx
index c770842..750c8c7 100644
--- a/libbuild2/types.ixx
+++ b/libbuild2/types.ixx
@@ -3,6 +3,27 @@
namespace build2
{
+ // location
+ //
+ inline ostream&
+ operator<< (ostream& o, const location& l)
+ {
+ if (!l.empty ())
+ {
+ o << l.file;
+
+ if (l.line != 0)
+ {
+ o << ':' << l.line;
+
+ if (l.column != 0)
+ o << ':' << l.column;
+ }
+ }
+
+ return o;
+ }
+
// Note that in the constructors we cannot pass the file data member to the
// base class constructor as it is not initialized yet (and so its base
// path/name pointers are not initialized). Thus, we initialize the path
diff --git a/libbuild2/utility.cxx b/libbuild2/utility.cxx
index a45e901..81f6809 100644
--- a/libbuild2/utility.cxx
+++ b/libbuild2/utility.cxx
@@ -14,6 +14,8 @@
#include <libbuild2/variable.hxx>
#include <libbuild2/diagnostics.hxx>
+#include <libbuild2/script/regex.hxx> // script::regex::init()
+
using namespace std;
using namespace butl;
@@ -588,5 +590,7 @@ namespace build2
{
fail << "unable to obtain home directory: " << e;
}
+
+ script::regex::init ();
}
}
diff --git a/libbuild2/utility.hxx b/libbuild2/utility.hxx
index 9800d6c..7a6ada2 100644
--- a/libbuild2/utility.hxx
+++ b/libbuild2/utility.hxx
@@ -73,6 +73,7 @@ namespace build2
using butl::trim;
using butl::next_word;
using butl::sanitize_identifier;
+ using butl::sanitize_strlit;
using butl::make_guard;
using butl::make_exception_guard;