From b808c255b6a9ddba085bf5646e7d20ec344f2e2d Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 28 Apr 2020 08:48:53 +0200 Subject: Initial support for ad hoc recipes (still work in progress) --- bootstrap-clang.bat | 2 + bootstrap-mingw.bat | 2 + bootstrap-msvc.bat | 2 + bootstrap.gmake | 2 + bootstrap.sh | 2 + build2/b.cxx | 2 +- build2/cli/rule.cxx | 3 +- doc/testscript.cli | 28 +- libbuild2/action.hxx | 21 +- libbuild2/algorithm.cxx | 58 +- libbuild2/algorithm.ixx | 4 +- .../script/lexer+command-line.test.testscript | 164 ++ .../build/script/lexer+first-token.test.testscript | 30 + .../script/lexer+second-token.test.testscript | 53 + .../script/lexer+variable-line.test.testscript | 12 + .../build/script/lexer+variable.test.testscript | 25 + libbuild2/build/script/lexer.cxx | 270 +++ libbuild2/build/script/lexer.hxx | 80 + libbuild2/build/script/lexer.test.cxx | 77 + .../build/script/parser+cleanup.test.testscript | 57 + .../build/script/parser+command-if.test.testscript | 395 ++++ .../script/parser+command-re-parse.test.testscript | 11 + libbuild2/build/script/parser+exit.test.testscript | 26 + .../build/script/parser+expansion.test.testscript | 35 + .../script/parser+here-document.test.testscript | 272 +++ .../script/parser+here-string.test.testscript | 34 + libbuild2/build/script/parser+line.test.testscript | 72 + .../build/script/parser+pipe-expr.test.testscript | 132 ++ .../build/script/parser+pre-parse.test.testscript | 22 + .../build/script/parser+redirect.test.testscript | 525 +++++ .../build/script/parser+regex.test.testscript | 225 ++ .../build/script/parser+variable.test.testscript | 41 + libbuild2/build/script/parser.cxx | 391 ++++ libbuild2/build/script/parser.hxx | 96 + libbuild2/build/script/parser.test.cxx | 224 ++ libbuild2/build/script/runner.cxx | 133 ++ libbuild2/build/script/runner.hxx | 84 + libbuild2/build/script/script.cxx | 236 +++ libbuild2/build/script/script.hxx | 156 ++ libbuild2/build/script/token.cxx | 23 + libbuild2/build/script/token.hxx | 36 + libbuild2/buildfile | 16 +- libbuild2/cc/compile-rule.cxx | 14 +- libbuild2/cc/init.cxx | 6 +- libbuild2/cc/lexer.cxx | 13 +- libbuild2/cc/link-rule.cxx | 24 +- libbuild2/config/operation.cxx | 4 +- libbuild2/context.cxx | 12 +- libbuild2/context.hxx | 19 +- libbuild2/dist/operation.cxx | 2 +- libbuild2/dump.cxx | 199 +- libbuild2/file.cxx | 65 +- libbuild2/file.hxx | 10 +- libbuild2/functions-name.cxx | 108 +- libbuild2/lexer+foreign.test.testscript | 96 + libbuild2/lexer+normal.test.testscript | 54 + libbuild2/lexer+quoting.test.testscript | 2 + libbuild2/lexer.cxx | 172 +- libbuild2/lexer.hxx | 64 +- libbuild2/lexer.test.cxx | 16 +- libbuild2/module.cxx | 318 +-- libbuild2/name.hxx | 12 + libbuild2/parser.cxx | 544 +++-- libbuild2/parser.hxx | 43 +- libbuild2/recipe.hxx | 15 + libbuild2/rule.cxx | 892 +++++++- libbuild2/rule.hxx | 142 +- libbuild2/script/builtin-options.cxx | 661 ++++++ libbuild2/script/builtin-options.hxx | 339 +++ libbuild2/script/builtin-options.ixx | 182 ++ libbuild2/script/builtin.cli | 21 + .../script/lexer+command-expansion.test.testscript | 321 +++ libbuild2/script/lexer.cxx | 431 ++++ libbuild2/script/lexer.hxx | 139 ++ libbuild2/script/lexer.test.cxx | 76 + libbuild2/script/parser.cxx | 2015 ++++++++++++++++++ libbuild2/script/parser.hxx | 189 ++ libbuild2/script/regex.cxx | 436 ++++ libbuild2/script/regex.hxx | 678 ++++++ libbuild2/script/regex.ixx | 31 + libbuild2/script/regex.test.cxx | 303 +++ libbuild2/script/run.cxx | 2020 ++++++++++++++++++ libbuild2/script/run.hxx | 75 + libbuild2/script/script.cxx | 659 ++++++ libbuild2/script/script.hxx | 471 +++++ libbuild2/script/script.ixx | 56 + libbuild2/script/token.cxx | 53 + libbuild2/script/token.hxx | 66 + libbuild2/target-key.hxx | 12 +- libbuild2/target.cxx | 14 +- libbuild2/target.hxx | 11 + libbuild2/target.ixx | 17 +- libbuild2/test/init.cxx | 4 - libbuild2/test/script/builtin-options.cxx | 667 ------ libbuild2/test/script/builtin-options.hxx | 345 --- libbuild2/test/script/builtin-options.ixx | 188 -- libbuild2/test/script/builtin.cli | 25 - .../script/lexer+command-expansion.test.testscript | 247 --- libbuild2/test/script/lexer.cxx | 251 +-- libbuild2/test/script/lexer.hxx | 40 +- libbuild2/test/script/lexer.test.cxx | 13 +- libbuild2/test/script/parser+exit.test.testscript | 2 +- .../test/script/parser+redirect.test.testscript | 8 +- libbuild2/test/script/parser+regex.test.testscript | 5 +- .../test/script/parser+variable.test.testscript | 19 + libbuild2/test/script/parser.cxx | 2199 ++------------------ libbuild2/test/script/parser.hxx | 124 +- libbuild2/test/script/regex.cxx | 439 ---- libbuild2/test/script/regex.hxx | 684 ------ libbuild2/test/script/regex.ixx | 34 - libbuild2/test/script/regex.test.cxx | 303 --- libbuild2/test/script/runner.cxx | 2046 +----------------- libbuild2/test/script/runner.hxx | 15 +- libbuild2/test/script/script.cxx | 514 +---- libbuild2/test/script/script.hxx | 378 +--- libbuild2/test/script/script.ixx | 59 - libbuild2/test/script/token.cxx | 35 +- libbuild2/test/script/token.hxx | 31 +- libbuild2/token.cxx | 109 +- libbuild2/token.hxx | 48 +- libbuild2/types.hxx | 8 +- libbuild2/types.ixx | 21 + libbuild2/utility.cxx | 4 + libbuild2/utility.hxx | 1 + tests/dependency/chain/testscript | 2 +- tests/dependency/recipe/buildfile | 4 + tests/dependency/recipe/testscript | 344 +++ tests/test/script/builtin/mv.testscript | 8 +- tests/test/script/builtin/rm.testscript | 6 +- tests/test/script/builtin/rmdir.testscript | 6 +- tests/test/script/runner/cleanup.testscript | 12 +- tests/test/script/runner/redirect.testscript | 195 +- 132 files changed, 16835 insertions(+), 8809 deletions(-) create mode 100644 libbuild2/build/script/lexer+command-line.test.testscript create mode 100644 libbuild2/build/script/lexer+first-token.test.testscript create mode 100644 libbuild2/build/script/lexer+second-token.test.testscript create mode 100644 libbuild2/build/script/lexer+variable-line.test.testscript create mode 100644 libbuild2/build/script/lexer+variable.test.testscript create mode 100644 libbuild2/build/script/lexer.cxx create mode 100644 libbuild2/build/script/lexer.hxx create mode 100644 libbuild2/build/script/lexer.test.cxx create mode 100644 libbuild2/build/script/parser+cleanup.test.testscript create mode 100644 libbuild2/build/script/parser+command-if.test.testscript create mode 100644 libbuild2/build/script/parser+command-re-parse.test.testscript create mode 100644 libbuild2/build/script/parser+exit.test.testscript create mode 100644 libbuild2/build/script/parser+expansion.test.testscript create mode 100644 libbuild2/build/script/parser+here-document.test.testscript create mode 100644 libbuild2/build/script/parser+here-string.test.testscript create mode 100644 libbuild2/build/script/parser+line.test.testscript create mode 100644 libbuild2/build/script/parser+pipe-expr.test.testscript create mode 100644 libbuild2/build/script/parser+pre-parse.test.testscript create mode 100644 libbuild2/build/script/parser+redirect.test.testscript create mode 100644 libbuild2/build/script/parser+regex.test.testscript create mode 100644 libbuild2/build/script/parser+variable.test.testscript create mode 100644 libbuild2/build/script/parser.cxx create mode 100644 libbuild2/build/script/parser.hxx create mode 100644 libbuild2/build/script/parser.test.cxx create mode 100644 libbuild2/build/script/runner.cxx create mode 100644 libbuild2/build/script/runner.hxx create mode 100644 libbuild2/build/script/script.cxx create mode 100644 libbuild2/build/script/script.hxx create mode 100644 libbuild2/build/script/token.cxx create mode 100644 libbuild2/build/script/token.hxx create mode 100644 libbuild2/lexer+foreign.test.testscript create mode 100644 libbuild2/script/builtin-options.cxx create mode 100644 libbuild2/script/builtin-options.hxx create mode 100644 libbuild2/script/builtin-options.ixx create mode 100644 libbuild2/script/builtin.cli create mode 100644 libbuild2/script/lexer+command-expansion.test.testscript create mode 100644 libbuild2/script/lexer.cxx create mode 100644 libbuild2/script/lexer.hxx create mode 100644 libbuild2/script/lexer.test.cxx create mode 100644 libbuild2/script/parser.cxx create mode 100644 libbuild2/script/parser.hxx create mode 100644 libbuild2/script/regex.cxx create mode 100644 libbuild2/script/regex.hxx create mode 100644 libbuild2/script/regex.ixx create mode 100644 libbuild2/script/regex.test.cxx create mode 100644 libbuild2/script/run.cxx create mode 100644 libbuild2/script/run.hxx create mode 100644 libbuild2/script/script.cxx create mode 100644 libbuild2/script/script.hxx create mode 100644 libbuild2/script/script.ixx create mode 100644 libbuild2/script/token.cxx create mode 100644 libbuild2/script/token.hxx delete mode 100644 libbuild2/test/script/builtin-options.cxx delete mode 100644 libbuild2/test/script/builtin-options.hxx delete mode 100644 libbuild2/test/script/builtin-options.ixx delete mode 100644 libbuild2/test/script/builtin.cli delete mode 100644 libbuild2/test/script/lexer+command-expansion.test.testscript create mode 100644 libbuild2/test/script/parser+variable.test.testscript delete mode 100644 libbuild2/test/script/regex.cxx delete mode 100644 libbuild2/test/script/regex.hxx delete mode 100644 libbuild2/test/script/regex.ixx delete mode 100644 libbuild2/test/script/regex.test.cxx delete mode 100644 libbuild2/test/script/script.ixx create mode 100644 tests/dependency/recipe/buildfile create mode 100644 tests/dependency/recipe/testscript diff --git a/bootstrap-clang.bat b/bootstrap-clang.bat index 0e99f53..00302e9 100644 --- a/bootstrap-clang.bat +++ b/bootstrap-clang.bat @@ -62,6 +62,8 @@ rem set "src=build2" set "src=%src% libbuild2" +set "src=%src% libbuild2\script" +set "src=%src% libbuild2\build\script" set "src=%src% libbuild2\config" set "src=%src% libbuild2\dist" set "src=%src% libbuild2\test" diff --git a/bootstrap-mingw.bat b/bootstrap-mingw.bat index f8fb441..df7e677 100644 --- a/bootstrap-mingw.bat +++ b/bootstrap-mingw.bat @@ -62,6 +62,8 @@ rem set "src=build2" set "src=%src% libbuild2" +set "src=%src% libbuild2\script" +set "src=%src% libbuild2\build\script" set "src=%src% libbuild2\config" set "src=%src% libbuild2\dist" set "src=%src% libbuild2\test" diff --git a/bootstrap-msvc.bat b/bootstrap-msvc.bat index 4ac93ad..3d74427 100644 --- a/bootstrap-msvc.bat +++ b/bootstrap-msvc.bat @@ -93,6 +93,8 @@ rem set "src=build2" set "src=%src% libbuild2" +set "src=%src% libbuild2\script" +set "src=%src% libbuild2\build\script" set "src=%src% libbuild2\config" set "src=%src% libbuild2\dist" set "src=%src% libbuild2\test" diff --git a/bootstrap.gmake b/bootstrap.gmake index 404c5fb..1e0e8e2 100644 --- a/bootstrap.gmake +++ b/bootstrap.gmake @@ -152,6 +152,8 @@ endif # Note: list nested subdirectories first (used in clean). # libbuild2_sub := \ +script \ +build/script \ config \ dist \ test/script \ diff --git a/bootstrap.sh b/bootstrap.sh index a6b98fa..14e52cf 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -118,6 +118,8 @@ fi src="build2/*.cxx" src="$src libbuild2/*.cxx" +src="$src libbuild2/script/*.cxx" +src="$src libbuild2/build/script/*.cxx" src="$src libbuild2/config/*.cxx" src="$src libbuild2/dist/*.cxx" src="$src libbuild2/test/*.cxx" diff --git a/build2/b.cxx b/build2/b.cxx index cdcfd59..fdd1b1c 100644 --- a/build2/b.cxx +++ b/build2/b.cxx @@ -1015,7 +1015,7 @@ main (int argc, char* argv[]) // use to the bootstrap files (other than src-root.build, which, // BTW, doesn't need to exist if src_root == out_root). // - scope& rs (create_root (gs, out_root, src_root)->second); + scope& rs (create_root (*ctx, out_root, src_root)->second); bool bstrapped (bootstrapped (rs)); diff --git a/build2/cli/rule.cxx b/build2/cli/rule.cxx index 3109689..99b6bee 100644 --- a/build2/cli/rule.cxx +++ b/build2/cli/rule.cxx @@ -222,7 +222,8 @@ namespace build2 // Update prerequisites and determine if any relevant ones render us // out-of-date. Note that currently we treat all the prerequisites as - // potentially affecting the result (think prologues/epilogues, etc). + // potentially affecting the result (think prologues/epilogues, CLI + // compiler target itself, etc). // timestamp mt (t.load_mtime (tp)); auto pr (execute_prerequisites (a, t, mt)); diff --git a/doc/testscript.cli b/doc/testscript.cli index 6b47dad..4c78e18 100644 --- a/doc/testscript.cli +++ b/doc/testscript.cli @@ -545,7 +545,7 @@ complete picture: \ $* 'World' >'Hello, World!' : command-name -$* 'John' 'Jane' >EOO : command-names +$* 'John' 'Jane' >>EOO : command-names Hello, Jane! Hello, John! EOO @@ -943,7 +943,7 @@ Alternatively, we can use an absolute path: \ Inside the scope working directory filesystem names that start with \c{stdin}, -\c{stdout}, \c{stderr}, as well as, \c{cmd-} are reserved. +\c{stdout}, and \c{stderr} are reserved. To execute a test scope its commands (including variable assignments) are executed sequentially and in the order specified. If any of the commands @@ -1536,9 +1536,9 @@ stderr: '2'(out-redirect) in-redirect: '<-'|\ '<|'|\ - '<'{':'?'/'?} |\ - '<<'{':'?'/'?} |\ - '<<<' + ('<='|'<<<') |\ + ('<<='|'<<'){':'?'/'?} |\ + ('<<<='|'<'){':'?'/'?} out-redirect: '>-'|\ '>|'|\ @@ -1546,9 +1546,9 @@ out-redirect: '>-'|\ '>=' |\ '>+' |\ '>&' ('1'|'2')|\ - '>'{':'?'/'?}'~'? |\ - '>>'{':'?'/'?}'~'? |\ - '>>>' + ('>?'|'>>>') |\ + ('>>?'|'>>'){':'?'/'?}'~'? |\ + ('>>>?'|'>'){':'?'/'?}'~'? here-document: * @@ -1866,9 +1866,9 @@ $* a1>- \ in-redirect: '<-'|\ '<|'|\ - '<'{':'?'/'?} |\ - '<<'{':'?'/'?} |\ - '<<<' + ('<='|'<<<') |\ + ('<<='|'<<'){':'?'/'?} |\ + ('<<<='|'<'){':'?'/'?} \ The \c{stdin} data can come from a pipe, here-string (\c{<}), here-document @@ -1905,9 +1905,9 @@ out-redirect: '>-'|\ '>=' |\ '>+' |\ '>&' ('1'|'2')|\ - '>'{':'?'/'?}'~'? |\ - '>>'{':'?'/'?}'~'? |\ - '>>>' + ('>?'|'>>>') |\ + ('>>?'|'>>'){':'?'/'?}'~'? |\ + ('>>>?'|'>'){':'?'/'?}'~'? \ The \c{stdout} and \c{stderr} data can go to a pipe (\c{stdout} only), file diff --git a/libbuild2/action.hxx b/libbuild2/action.hxx index c1e4697..906d7eb 100644 --- a/libbuild2/action.hxx +++ b/libbuild2/action.hxx @@ -11,11 +11,11 @@ namespace build2 { - // While we are using uint8_t for the meta/operation ids, we assume - // that each is limited to 4 bits (max 128 entries) so that we can - // store the combined action id in uint8_t as well. This makes our - // life easier when it comes to defining switch labels for action - // ids (no need to mess with endian-ness). + // While we are using uint8_t for the meta/operation ids, we assume that + // each is limited to 4 bits (max 15 entries @@ this is probably too low) so + // that we can store the combined action id in uint8_t as well. This makes + // our life easier when it comes to defining switch labels for action ids + // (no need to mess with endian-ness). // // Note that 0 is not a valid meta/operation/action id. // @@ -61,6 +61,8 @@ namespace build2 { action (): inner_id (0), outer_id (0) {} // Invalid action. + action (action_id a): action (a >> 4, a & 0xF) {} + // If this is not a nested operation, then outer should be 0. // action (meta_operation_id m, operation_id inner, operation_id outer = 0) @@ -103,6 +105,11 @@ namespace build2 inline bool operator!= (action x, action y) {return !(x == y);} + inline bool operator== (action x, action_id y) {return x == action (y);} + inline bool operator!= (action x, action_id y) {return x != action (y);} + inline bool operator== (action_id x, action y) {return action (x) == y;} + inline bool operator!= (action_id x, action y) {return action (x) == y;} + bool operator> (action, action) = delete; bool operator< (action, action) = delete; bool operator>= (action, action) = delete; @@ -140,6 +147,8 @@ namespace build2 // Id constants for build-in and pre-defined meta/operations. // + // Note: currently max 15 (see above). + // const meta_operation_id noop_id = 1; // nomop? const meta_operation_id perform_id = 2; const meta_operation_id configure_id = 3; @@ -152,6 +161,8 @@ namespace build2 // that no operation was explicitly specified by the user. If adding // something here remember to update the man page. // + // Note: currently max 15 (see above). + // const operation_id default_id = 1; // Shall be first. const operation_id update_id = 2; // Shall be second. const operation_id clean_id = 3; diff --git a/libbuild2/algorithm.cxx b/libbuild2/algorithm.cxx index ef1a78d..11f2a56 100644 --- a/libbuild2/algorithm.cxx +++ b/libbuild2/algorithm.cxx @@ -318,12 +318,55 @@ namespace build2 // Return the matching rule or NULL if no match and try_match is true. // const rule_match* - match_impl (action a, target& t, const rule* skip, bool try_match) + match_rule (action a, target& t, const rule* skip, bool try_match) { + // First check for an ad hoc recipe. + // + if (!t.adhoc_recipes.empty ()) + { + auto df = make_diag_frame ( + [a, &t](const diag_record& dr) + { + if (verb != 0) + dr << info << "while matching ad hoc recipe to " << diag_do (a, t); + }); + + // @@ TODO: + // + // If action is Y-for-X, how would we distinguish between X and Y-for-X? + // See match_rule() for the hairy details. We could start with + // supporting just the inner case. Or we could try to just match an + // inner rule by default? I think need a clear use-case to see what's + // the correct semantics. + + auto b (t.adhoc_recipes.begin ()), e (t.adhoc_recipes.end ()); + auto i (find_if (b, e, + [a, &t] (const adhoc_recipe& r) + { + return r.action == a && + r.rule->match (a, t, string () /* hint */, nullopt); + })); + + if (i == e) + i = find_if (b, e, + [a, &t] (const adhoc_recipe& r) + { + return r.action != a && + r.rule->match (a, t, string () /* hint */, r.action); + }); + if (i != e) + return &i->rule->rule_match; + } + // If this is an outer operation (Y-for-X), then we look for rules - // registered for the outer id (X). Note that we still pass the original - // action to the rule's match() function so that it can distinguish - // between a pre/post operation (Y-for-X) and the actual operation (X). + // registered for the outer id (X; yes, it's really outer). Note that we + // still pass the original action to the rule's match() function so that + // it can distinguish between a pre/post operation (Y-for-X) and the + // actual operation (X). + // + // If you are then wondering how would a rule for Y ever match in case of + // Y-for-X, the answer is via a rule that matches for X and then, in case + // of Y-for-X, matches an inner rule for just Y (see match_inner()). // meta_operation_id mo (a.meta_operation ()); operation_id o (a.inner () ? a.operation () : a.outer_operation ()); @@ -561,7 +604,7 @@ namespace build2 t.prerequisite_targets[a].clear (); if (a.inner ()) t.clear_data (); - const rule_match* r (match_impl (a, t, nullptr, try_match)); + const rule_match* r (match_rule (a, t, nullptr, try_match)); assert (l.offset != target::offset_tried); // Should have failed. @@ -972,8 +1015,11 @@ namespace build2 if (r != nullptr) { + // Make it ad hoc so that it doesn't end up in prerequisite_targets + // after execution. + // match (a, *r); - t.prerequisite_targets[a].emplace_back (r); + t.prerequisite_targets[a].emplace_back (r, include_type::adhoc); } return r; diff --git a/libbuild2/algorithm.ixx b/libbuild2/algorithm.ixx index 7231fec..5f9143a 100644 --- a/libbuild2/algorithm.ixx +++ b/libbuild2/algorithm.ixx @@ -263,7 +263,7 @@ namespace build2 } LIBBUILD2_SYMEXPORT const rule_match* - match_impl (action, target&, const rule* skip, bool try_match = false); + match_rule (action, target&, const rule* skip, bool try_match = false); LIBBUILD2_SYMEXPORT recipe apply_impl (action, target&, const rule_match&); @@ -424,7 +424,7 @@ namespace build2 // Note: we don't touch any of the t[a] state since that was/will be set // for the delegating rule. // - const rule_match* r (match_impl (a, t, &dr, try_match)); + const rule_match* r (match_rule (a, t, &dr, try_match)); return r != nullptr ? apply_impl (a, t, *r) : empty_recipe; } diff --git a/libbuild2/build/script/lexer+command-line.test.testscript b/libbuild2/build/script/lexer+command-line.test.testscript new file mode 100644 index 0000000..3eceae8 --- /dev/null +++ b/libbuild2/build/script/lexer+command-line.test.testscript @@ -0,0 +1,164 @@ +# file : libbuild2/build/script/lexer+command-line.test.testscript +# license : MIT; see accompanying LICENSE file + +test.arguments = command-line + +: redirect +: +{ + : pass + : + $* <"cmd <| 1>|" >>EOO + 'cmd' + <| + '1' + >| + + EOO + + : null + : + $* <"cmd <- 1>-" >>EOO + 'cmd' + <- + '1' + >- + + EOO + + : trace + : + $* <"cmd 1>!" >>EOO + 'cmd' + '1' + >! + + EOO + + : merge + : + $* <"cmd 1>&2" >>EOO + 'cmd' + '1' + >& + '2' + + EOO + + : str + : + $* <"cmd <<<=a 1>>>?b" >>EOO + 'cmd' + <<<= + 'a' + '1' + >>>? + 'b' + + EOO + + : str-nn + : + $* <"cmd <<<=:a 1>>>?:b" >>EOO + 'cmd' + <<<=: + 'a' + '1' + >>>?: + 'b' + + EOO + + : str-nn-alias + : + $* <"cmd <<<:a 1>>>?:b" >>EOO + 'cmd' + <<<: + 'a' + '1' + >>>?: + 'b' + + EOO + + : doc + : + $* <"cmd <>EOO" >>EOO + 'cmd' + << + 'EOI' + '1' + >> + 'EOO' + + EOO + + : doc-nn + : + $* <"cmd <<:EOI 1>>?:EOO" >>EOO + 'cmd' + <<: + 'EOI' + '1' + >>?: + 'EOO' + + EOO + + : file-cmp + : + $* <"cmd <=in >?out 2>?err" >>EOO + 'cmd' + <= + 'in' + >? + 'out' + '2' + >? + 'err' + + EOO + + : file-write + : + $* <"cmd >=out 2>+err" >>EOO + 'cmd' + >= + 'out' + '2' + >+ + 'err' + + EOO +} + +: cleanup +: +{ + : always + : + $* <"cmd &file" >>EOO + 'cmd' + & + 'file' + + EOO + + : maybe + : + $* <"cmd &?file" >>EOO + 'cmd' + &? + 'file' + + EOO + + : never + : + $* <"cmd &!file" >>EOO + 'cmd' + &! + 'file' + + EOO +} diff --git a/libbuild2/build/script/lexer+first-token.test.testscript b/libbuild2/build/script/lexer+first-token.test.testscript new file mode 100644 index 0000000..6709e60 --- /dev/null +++ b/libbuild2/build/script/lexer+first-token.test.testscript @@ -0,0 +1,30 @@ +# file : libbuild2/build/script/lexer+first-token.test.testscript +# license : MIT; see accompanying LICENSE file + +# Note: this mode auto-expires after each token. +# +test.arguments = first-token + +: assign +: +$* <"foo=" >>EOO +'foo' +'=' + +EOO + +: append +: +$* <"foo+=" >>EOO +'foo' +'+=' + +EOO + +: prepend +: +$* <"foo=+" >>EOO +'foo' +'=+' + +EOO diff --git a/libbuild2/build/script/lexer+second-token.test.testscript b/libbuild2/build/script/lexer+second-token.test.testscript new file mode 100644 index 0000000..d5f3329 --- /dev/null +++ b/libbuild2/build/script/lexer+second-token.test.testscript @@ -0,0 +1,53 @@ +# file : libbuild2/build/script/lexer+second-token.test.testscript +# license : MIT; see accompanying LICENSE file + +# Note: this mode auto-expires after each token. +# +test.arguments = second-token + +: assign +: +$* <"=foo" >>EOO += +'foo' + +EOO + +: append +: +$* <"+= foo" >>EOO ++= +'foo' + +EOO + +: prepend +: +$* <" =+ foo" >>EOO +=+ +'foo' + +EOO + +: assign-leading +: +$* <"foo=bar" >>EOO +'foo=bar' + +EOO + +: append-leading +: +$* <"foo+= bar" >>EOO +'foo+=' +'bar' + +EOO + +: prepend-leading +: +$* <"foo =+bar" >>EOO +'foo' +'=+bar' + +EOO diff --git a/libbuild2/build/script/lexer+variable-line.test.testscript b/libbuild2/build/script/lexer+variable-line.test.testscript new file mode 100644 index 0000000..e4b5adb --- /dev/null +++ b/libbuild2/build/script/lexer+variable-line.test.testscript @@ -0,0 +1,12 @@ +# file : libbuild2/build/script/lexer+variable-line.test.testscript +# license : MIT; see accompanying LICENSE file + +test.arguments = variable-line + +: basic +: +$* <"a 'b c'" >>EOO +'a' +'b c' + +EOO diff --git a/libbuild2/build/script/lexer+variable.test.testscript b/libbuild2/build/script/lexer+variable.test.testscript new file mode 100644 index 0000000..54b0a30 --- /dev/null +++ b/libbuild2/build/script/lexer+variable.test.testscript @@ -0,0 +1,25 @@ +# file : libbuild2/build/script/lexer+variable.test.testscript +# license : MIT; see accompanying LICENSE file + +# Test handling custom variable names ($*, $~, $NN). +# +test.arguments = variable + +: primary-target +: +{ + : only + : + $* <">" >>EOO + '>' + + EOO + + : followed + : + $* <">abc" >>EOO + '>' + 'abc' + + EOO +} diff --git a/libbuild2/build/script/lexer.cxx b/libbuild2/build/script/lexer.cxx new file mode 100644 index 0000000..7b8bdd4 --- /dev/null +++ b/libbuild2/build/script/lexer.cxx @@ -0,0 +1,270 @@ +// file : libbuild2/build/script/lexer.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +using namespace std; + +namespace build2 +{ + namespace build + { + namespace script + { + using type = token_type; + + build2::script::redirect_aliases lexer::redirect_aliases { + type (type::in_file), + type (type::in_doc), + type (type::in_str), + type (type::out_file_ovr), + type (type::out_file_app), + nullopt}; + + void lexer:: + mode (build2::lexer_mode m, + char ps, + optional esc, + uintptr_t data) + { + bool a (false); // attributes + + const char* s1 (nullptr); + const char* s2 (nullptr); + + bool s (true); // space + bool n (true); // newline + bool q (true); // quotes + + if (!esc) + { + assert (!state_.empty ()); + esc = state_.top ().escapes; + } + + switch (m) + { + case lexer_mode::command_line: + { + s1 = "=!|&<> $(#\t\n"; + s2 = "== "; + break; + } + case lexer_mode::first_token: + { + // First token on the script line. Like command_line but + // recognizes variable assignments as separators. + // + s1 = "=+!|&<> $(#\t\n"; + s2 = " == "; + break; + } + case lexer_mode::second_token: + { + // Second token on the script line. Like command_line but + // recognizes leading variable assignments. + // + // Note that to recognize only leading assignments we shouldn't + // add them to the separator strings (so this is identical to + // command_line). + // + s1 = "=!|&<> $(#\t\n"; + s2 = "== "; + break; + } + case lexer_mode::variable_line: + { + // Like value except we don't recognize '{'. + // + s1 = " $(#\t\n"; + s2 = " "; + break; + } + default: + { + base_lexer::mode (m, ps, esc); + return; + } + } + + assert (ps == '\0'); + state_.push (state {m, data, nullopt, a, ps, s, n, q, *esc, s1, s2}); + } + + token lexer:: + next () + { + token r; + + switch (state_.top ().mode) + { + case lexer_mode::command_line: + case lexer_mode::first_token: + case lexer_mode::second_token: + case lexer_mode::variable_line: + r = next_line (); + break; + default: return base_lexer::next (); + } + + if (r.qtype != quote_type::unquoted) + ++quoted_; + + return r; + } + + token lexer:: + next_line () + { + bool sep (skip_spaces ().first); + + xchar c (get ()); + uint64_t ln (c.line), cn (c.column); + + state st (state_.top ()); // Make copy (see first/second_token). + lexer_mode m (st.mode); + + auto make_token = [&sep, ln, cn] (type t) + { + return token (t, sep, ln, cn, token_printer); + }; + + // Handle attributes (do it first to make sure the flag is cleared + // regardless of what we return). + // + if (st.attributes) + { + assert (m == lexer_mode::variable_line); + + state_.top ().attributes = false; + + if (c == '[') + return make_token (type::lsbrace); + } + + if (eos (c)) + return make_token (type::eos); + + // Expire certain modes at the end of the token. Do it early in case + // we push any new mode (e.g., double quote). + // + if (m == lexer_mode::first_token || m == lexer_mode::second_token) + state_.pop (); + + // NOTE: remember to update mode() if adding new special characters. + + switch (c) + { + case '\n': + { + // Expire variable value mode at the end of the line. + // + if (m == lexer_mode::variable_line) + state_.pop (); + + sep = true; // Treat newline as always separated. + return make_token (type::newline); + } + + // Variable expansion, function call, and evaluation context. + // + case '$': return make_token (type::dollar); + case '(': return make_token (type::lparen); + } + + // Command line operator/separators. + // + if (m == lexer_mode::command_line || + m == lexer_mode::first_token || + m == lexer_mode::second_token) + { + switch (c) + { + // Comparison (==, !=). + // + case '=': + case '!': + { + if (peek () == '=') + { + get (); + return make_token (c == '=' ? type::equal : type::not_equal); + } + } + } + } + + // Command operators. + // + if (m == lexer_mode::command_line || + m == lexer_mode::first_token || + m == lexer_mode::second_token) + { + if (optional t = next_cmd_op (c, sep)) + return move (*t); + } + + // Variable assignment (=, +=, =+). + // + if (m == lexer_mode::second_token) + { + switch (c) + { + case '=': + { + if (peek () == '+') + { + get (); + return make_token (type::prepend); + } + else + return make_token (type::assign); + } + case '+': + { + if (peek () == '=') + { + get (); + return make_token (type::append); + } + } + } + } + + // Otherwise it is a word. + // + unget (c); + return word (st, sep); + } + + token lexer:: + word (state st, bool sep) + { + lexer_mode m (st.mode); + + // Customized implementation that handles special variable names ($>, + // $<, $~). + // + // @@ TODO: $(<), $(>): feels like this will have to somehow be + // handled at the top-level lexer level. Maybe provide a + // string of one-char special variable names as state::data? + // + if (m != lexer_mode::variable) + return base_lexer::word (st, sep); + + xchar c (peek ()); + + if (c != '>' && c != '<' && c != '~') + return base_lexer::word (st, sep); + + get (); + + state_.pop (); // Expire the variable mode. + return token (string (1, c), + sep, + quote_type::unquoted, false, + c.line, c.column); + } + } + } +} diff --git a/libbuild2/build/script/lexer.hxx b/libbuild2/build/script/lexer.hxx new file mode 100644 index 0000000..7d919e5 --- /dev/null +++ b/libbuild2/build/script/lexer.hxx @@ -0,0 +1,80 @@ +// file : libbuild2/build/script/lexer.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_BUILD_SCRIPT_LEXER_HXX +#define LIBBUILD2_BUILD_SCRIPT_LEXER_HXX + +#include +#include + +#include + +#include + +namespace build2 +{ + namespace build + { + namespace script + { + struct lexer_mode: build2::script::lexer_mode + { + using base_type = build2::script::lexer_mode; + + enum + { + command_line = base_type::value_next, + first_token, // Expires at the end of the token. + second_token, // Expires at the end of the token. + variable_line // Expires at the end of the line. + }; + + lexer_mode () = default; + lexer_mode (value_type v): base_type (v) {} + lexer_mode (build2::lexer_mode v): base_type (v) {} + }; + + class lexer: public build2::script::lexer + { + public: + using base_lexer = build2::script::lexer; + + // Note that neither the name nor escape arguments are copied. + // + lexer (istream& is, + const path_name& name, + uint64_t line, // Start line in the stream. + lexer_mode m, + const char* escapes = nullptr) + : base_lexer (is, name, line, + nullptr /* escapes */, + false /* set_mode */, + redirect_aliases) + { + mode (m, '\0', escapes); + } + + virtual void + mode (build2::lexer_mode, + char = '\0', + optional = nullopt, + uintptr_t = 0) override; + + virtual token + next () override; + + public: + static redirect_aliases_type redirect_aliases; + + private: + token + next_line (); + + virtual token + word (state, bool) override; + }; + } + } +} + +#endif // LIBBUILD2_BUILD_SCRIPT_LEXER_HXX diff --git a/libbuild2/build/script/lexer.test.cxx b/libbuild2/build/script/lexer.test.cxx new file mode 100644 index 0000000..1c47442 --- /dev/null +++ b/libbuild2/build/script/lexer.test.cxx @@ -0,0 +1,77 @@ +// file : libbuild2/build/script/lexer.test.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include +#include + +#include +#include + +#include +#include + +using namespace std; + +namespace build2 +{ + namespace build + { + namespace script + { + // Usage: argv[0] + // + int + main (int argc, char* argv[]) + { + lexer_mode m; + { + assert (argc == 2); + string s (argv[1]); + + if (s == "command-line") m = lexer_mode::command_line; + else if (s == "first-token") m = lexer_mode::first_token; + else if (s == "second-token") m = lexer_mode::second_token; + else if (s == "variable-line") m = lexer_mode::variable_line; + else if (s == "variable") m = lexer_mode::variable; + else assert (false); + } + + try + { + cin.exceptions (istream::failbit | istream::badbit); + + // Some modes auto-expire so we need something underneath. + // + bool u (m != lexer_mode::command_line); + + path_name in (""); + lexer l (cin, in, 1 /* line */, lexer_mode::command_line); + if (u) + l.mode (m); + + // No use printing eos since we will either get it or loop forever. + // + for (token t (l.next ()); t.type != token_type::eos; t = l.next ()) + { + // Print each token on a separate line without quoting operators. + // + t.printer (cout, t, print_mode::normal); + cout << endl; + } + } + catch (const failed&) + { + return 1; + } + + return 0; + } + } + } +} + +int +main (int argc, char* argv[]) +{ + return build2::build::script::main (argc, argv); +} diff --git a/libbuild2/build/script/parser+cleanup.test.testscript b/libbuild2/build/script/parser+cleanup.test.testscript new file mode 100644 index 0000000..9a5af3d --- /dev/null +++ b/libbuild2/build/script/parser+cleanup.test.testscript @@ -0,0 +1,57 @@ +# file : libbuild2/build/script/parser+cleanup.test.testscript +# license : MIT; see accompanying LICENSE file + +: always +: +$* <>EOO +cmd &file +EOI +cmd &file +EOO + +: maybe +: +$* <>EOO +cmd &?file +EOI +cmd &?file +EOO + +: never +: +$* <>EOO +cmd &!file +EOI +cmd &!file +EOO + +: empty +: +$* <>EOE != 0 +cmd &"" +EOI +buildfile:11:6: error: empty cleanup path +EOE + +: missed-before +: +{ + : token + : + : Path missed before command next token + : + $* <>EOE != 0 + cmd & >file + EOI + buildfile:11:7: error: missing cleanup path + EOE + + : end + : Test path missed before end of command + : + $* <>EOE != 0 + cmd & + EOI + buildfile:11:6: error: missing cleanup path + EOE +} diff --git a/libbuild2/build/script/parser+command-if.test.testscript b/libbuild2/build/script/parser+command-if.test.testscript new file mode 100644 index 0000000..a18a885 --- /dev/null +++ b/libbuild2/build/script/parser+command-if.test.testscript @@ -0,0 +1,395 @@ +# file : libbuild2/build/script/parser+command-if.test.testscript +# license : MIT; see accompanying LICENSE file + +: if +: +{ + : true + : + $* <>EOO + if true foo + cmd1 + cmd2 + end + EOI + ? true foo + cmd1 + cmd2 + EOO + + : false + : + $* <>EOO + if false foo + cmd1 + cmd2 + end + EOI + ? false foo + EOO + + : not-true + : + $* <>EOO + if! true foo + cmd1 + cmd2 + end + EOI + ? true foo + EOO + + : not-false + : + $* <>EOO + if! false foo + cmd1 + cmd2 + end + EOI + ? false foo + cmd1 + cmd2 + EOO + + : without-command + : + $* <>EOE != 0 + if + cmd + end + EOI + buildfile:11:3: error: missing program + EOE +} + +: elif +: +{ + : true + : + $* <>EOO + if false + cmd1 + cmd2 + elif true + cmd3 + cmd4 + end + EOI + ? false + ? true + cmd3 + cmd4 + EOO + + : false + : + $* <>EOO + if false + cmd1 + cmd2 + elif false + cmd3 + cmd4 + end + EOI + ? false + ? false + EOO + + : not-true + : + $* <>EOO + if false + cmd1 + cmd2 + elif! true + cmd3 + cmd4 + end + EOI + ? false + ? true + EOO + + : not-false + : + $* <>EOO + if false + cmd1 + cmd2 + elif! false + cmd3 + cmd4 + end + EOI + ? false + ? false + cmd3 + cmd4 + EOO + + : without-if + : + $* <>EOE != 0 + cmd + elif true + cmd + end + EOI + buildfile:12:1: error: 'elif' without preceding 'if' + EOE + + : not-without-if + : + $* <>EOE != 0 + cmd + elif! true + cmd + end + EOI + buildfile:12:1: error: 'elif!' without preceding 'if' + EOE + + : after-else + : + $* <>EOE != 0 + if false + cmd + else + cmd + elif true + cmd + end + EOI + buildfile:15:1: error: 'elif' after 'else' + EOE +} + +: else +: +{ + : true + : + $* <>EOO + if false + cmd1 + cmd2 + else + cmd3 + cmd4 + end + EOI + ? false + cmd3 + cmd4 + EOO + + : false + : + $* <>EOO + if true + cmd1 + cmd2 + else + cmd3 + cmd4 + end + EOI + ? true + cmd1 + cmd2 + EOO + + : chain + : + $* <>EOO + if false + cmd + cmd + elif false + cmd + cmd + elif false + cmd + cmd + elif true + cmd1 + cmd2 + elif false + cmd + cmd + else + cmd + cmd + end + EOI + ? false + ? false + ? false + ? true + cmd1 + cmd2 + EOO + + : command-after + : + $* <>EOE != 0 + if true + cmd + else cmd + cmd + end + EOI + buildfile:13:6: error: expected newline instead of 'cmd' + EOE + + : without-if + : + $* <>EOE != 0 + cmd + else + cmd + end + EOI + buildfile:12:1: error: 'else' without preceding 'if' + EOE + + : after-else + : + $* <>EOE != 0 + if false + cmd + else + cmd + else + cmd + end + EOI + buildfile:15:1: error: 'else' after 'else' + EOE +} + +: end +{ + : without-if + : + $* <>EOE != 0 + cmd + end + EOI + buildfile:12:1: error: 'end' without preceding 'if' + EOE + + : before + { + : command + : + $* <>EOE != 0 + if true + cmd + end cmd + EOI + buildfile:13:5: error: expected newline instead of 'cmd' + EOE + } +} + +: nested +: +{ + : take + : + $* <>EOO + if true + cmd1 + if false + cmd + elif false + if true + cmd + end + else + cmd2 + end + cmd3 + end + EOI + ? true + cmd1 + ? false + ? false + cmd2 + cmd3 + EOO + + : skip + : + $* <>EOO + if false + cmd1 + if false + cmd + elif false + if true + cmd + end + else + cmd2 + end + cmd3 + else + cmd + end + EOI + ? false + cmd + EOO +} + +: contained +: +{ + : eos + : + $* <>EOE != 0 + if + EOI + buildfile:12:1: error: expected closing 'end' + EOE +} + +: line-index +: +$* -l <>EOO +if false + cmd + if true + cmd + end + cmd +elif false + cmd +else + cmd +end +EOI +? false # 1 +? false # 6 +cmd # 8 +EOO + +: var +: +$* <>EOO +if true + x = foo +else + x = bar +end +cmd $x +EOI +? true +cmd foo +EOO diff --git a/libbuild2/build/script/parser+command-re-parse.test.testscript b/libbuild2/build/script/parser+command-re-parse.test.testscript new file mode 100644 index 0000000..a59b49c --- /dev/null +++ b/libbuild2/build/script/parser+command-re-parse.test.testscript @@ -0,0 +1,11 @@ +# file : libbuild2/build/script/parser+command-re-parse.test.testscript +# license : MIT; see accompanying LICENSE file + +: double-quote +: +$* <>EOO +x = cmd \">-\" "'<-'" +$x +EOI +cmd '>-' '<-' +EOO diff --git a/libbuild2/build/script/parser+exit.test.testscript b/libbuild2/build/script/parser+exit.test.testscript new file mode 100644 index 0000000..53ee1b9 --- /dev/null +++ b/libbuild2/build/script/parser+exit.test.testscript @@ -0,0 +1,26 @@ +# file : libbuild2/build/script/parser+exit.test.testscript +# license : MIT; see accompanying LICENSE file + +: eq +: +$* <>EOO +cmd == 1 +EOI +cmd == 1 +EOO + +: ne +: +$* <>EOO +cmd!=1 +EOI +cmd != 1 +EOO + +: end +: +$* <>EOE != 0 +cmd != 1 <"foo" +EOI +buildfile:11:10: error: expected newline instead of '<' +EOE diff --git a/libbuild2/build/script/parser+expansion.test.testscript b/libbuild2/build/script/parser+expansion.test.testscript new file mode 100644 index 0000000..9f1e774 --- /dev/null +++ b/libbuild2/build/script/parser+expansion.test.testscript @@ -0,0 +1,35 @@ +# file : libbuild2/build/script/parser+expansion.test.testscript +# license : MIT; see accompanying LICENSE file + +: quote +: +: Make sure everything expanded as strings. +: +$* <>EOO +x = dir/ proj% proj%name proj%proj%dir/type{name name {name}} +cmd dir/ proj% proj%name proj%proj%dir/type{name name {name}} +cmd $x +EOI +cmd dir/ proj% proj%name proj%proj%dir/type{name name {name}} +cmd dir/ proj% proj%name proj%proj%dir/type{name name {name}} +EOO + +: unterm-quoted-seq +: +$* <>EOE != 0 +x = "'a bc" +cmd xy$x +EOI +:1:8: error: unterminated single-quoted sequence + buildfile:12:5: info: while parsing string 'xy'a bc' +EOE + +: invalid-redirect +: +$* <>EOE != 0 +x = "1>&a" +cmd $x +EOI +:1:4: error: stdout merge redirect file descriptor must be 2 + buildfile:12:5: info: while parsing string '1>&a' +EOE diff --git a/libbuild2/build/script/parser+here-document.test.testscript b/libbuild2/build/script/parser+here-document.test.testscript new file mode 100644 index 0000000..f56a5e1 --- /dev/null +++ b/libbuild2/build/script/parser+here-document.test.testscript @@ -0,0 +1,272 @@ +# file : libbuild2/build/script/parser+here-document.test.testscript +# license : MIT; see accompanying LICENSE file + +: end-marker +: +{ + : missing-newline + : + $* <'cmd <<=' 2>>EOE != 0 + buildfile:11:8: error: expected here-document end marker + EOE + + : missing-newline-alias + : + $* <'cmd <<' 2>>EOE != 0 + buildfile:11:7: error: expected here-document end marker + EOE + + : missing-exit + : + $* <'cmd <<= != 0' 2>>EOE != 0 + buildfile:11:9: error: expected here-document end marker + EOE + + : missing-exit-alias + : + $* <'cmd << != 0' 2>>EOE != 0 + buildfile:11:8: error: expected here-document end marker + EOE + + : missing-empty + : + $* <'cmd <<=""' 2>>EOE != 0 + buildfile:11:8: error: expected here-document end marker + EOE + + : missing-empty-alias + : + $* <'cmd <<""' 2>>EOE != 0 + buildfile:11:7: error: expected here-document end marker + EOE + + : unseparated-expansion + : + $* <'cmd <<=FOO$foo' 2>>EOE != 0 + buildfile:11:11: error: here-document end marker must be literal + EOE + + : unseparated-expansion-alias + : + $* <'cmd <>EOE != 0 + buildfile:11:10: error: here-document end marker must be literal + EOE + + : quoted-single-partial + : + $* <"cmd <<=F'O'O" 2>>EOE != 0 + buildfile:11:8: error: partially-quoted here-document end marker + EOE + + : quoted-double-partial + : + $* <'cmd <<="FO"O' 2>>EOE != 0 + buildfile:11:8: error: partially-quoted here-document end marker + EOE + + : quoted-mixed + : + $* <"cmd <<=\"FO\"'O'" 2>>EOE != 0 + buildfile:11:8: error: partially-quoted here-document end marker + EOE + + : unseparated + : + $* <>EOO + cmd <<=EOF!=0 + foo + EOF + EOI + cmd <<=EOF != 0 + foo + EOF + EOO + + : unseparated-alias + : + $* <>EOO + cmd <>EOO + cmd <<='EOF' + foo + EOF + EOI + cmd <<=EOF + foo + EOF + EOO + + : quoted-single-alias + : + $* <>EOO + cmd <<'EOF' + foo + EOF + EOI + cmd <>EOO + cmd <<="EOF" + foo + EOF + EOI + cmd <<=EOF + foo + EOF + EOO + + : quoted-double-alias + : + $* <>EOO + cmd <<"EOF" + foo + EOF + EOI + cmd <>EOO + cmd <<=EOF + foo + bar + baz + EOF + EOI + cmd <<=EOF + foo + bar + baz + EOF + EOO + + : blank + : + $* <>EOO + cmd <<=EOF + foo + + + bar + EOF + EOI + cmd <<=EOF + foo + + + bar + EOF + EOO + + : non-ws-prefix + : + $* <>EOO + cmd <<=EOF + x EOF + EOF + EOI + cmd <<=EOF + x EOF + EOF + EOO + + : whole-token + : Test the case where the indentation is a whole token + : + $* <>EOO + x = foo bar + cmd <<="EOF" + $x + EOF + EOI + cmd <<=EOF + foo bar + EOF + EOO + + : long-line + : Test the case where the line contains multiple tokens + : + $* <>EOO + x = foo + cmd <<="EOF" + $x bar $x + EOF + EOI + cmd <<=EOF + foo bar foo + EOF + EOO + + : unindented + : + $* <>EOE != 0 + cmd <<=EOF + bar + EOF + EOI + buildfile:12:1: error: unindented here-document line + EOE +} + +: blank +: +$* <>EOO +cmd <<=EOF + +foo + +bar + +EOF +EOI +cmd <<=EOF + +foo + +bar + +EOF +EOO + +: quote +: +: Note: they are still recognized in eval contexts. +: +$* <>EOO +cmd <<="EOF" +'single' +"double" +b'o't"h" +('single' "double") +EOF +EOI +cmd <<=EOF +'single' +"double" +b'o't"h" +single double +EOF +EOO diff --git a/libbuild2/build/script/parser+here-string.test.testscript b/libbuild2/build/script/parser+here-string.test.testscript new file mode 100644 index 0000000..f857c57 --- /dev/null +++ b/libbuild2/build/script/parser+here-string.test.testscript @@ -0,0 +1,34 @@ +# file : libbuild2/build/script/parser+here-string.test.testscript +# license : MIT; see accompanying LICENSE file + +: empty +: +$* <>EOO +cmd <<<="" +EOI +cmd <<<='' +EOO + +: empty-nn +: +$* <>EOO +cmd <<<=:"" +EOI +cmd <<<=:'' +EOO + +: empty-alias +: +$* <>EOO +cmd <<<"" +EOI +cmd <<<'' +EOO + +: empty-nn-alias +: +$* <>EOO +cmd <<<:"" +EOI +cmd <<<:'' +EOO diff --git a/libbuild2/build/script/parser+line.test.testscript b/libbuild2/build/script/parser+line.test.testscript new file mode 100644 index 0000000..6401d91 --- /dev/null +++ b/libbuild2/build/script/parser+line.test.testscript @@ -0,0 +1,72 @@ +# file : libbuild2/build/script/parser+line.test.testscript +# license : MIT; see accompanying LICENSE file + +test.options += -d + +: command +: +$* <>EOF + foo >| 2>- &a &?b + foo >=c 2>~/error:.*/ &!c + foo >>:/~%EOS% + %.* + abc + %xyz.*% + EOS + EOF + +: if-else +: +$* <>EOF + if foo + bar + elif fox + if fix + baz + end + biz + end + if! foo + bar + elif! fox + baz + end + EOF + +: quoting +: +$* <>EOO + foo 'bar' "baz" '' "" + "$foo" + "foo$" + "fo"o + "foo"\" + "foo\\" + "foo\"<" + fo\"o + fo\\o + fo\>EOO +cmd1 | cmd2|cmd3 +EOI +cmd1 | cmd2 | cmd3 +EOO + +: log +: +$* <>EOO +cmd1 || cmd2&&cmd3 +EOI +cmd1 || cmd2 && cmd3 +EOO + +: pipe-log +: +$* <>EOO +cmd1 | cmd2 && cmd3 | cmd4 +EOI +cmd1 | cmd2 && cmd3 | cmd4 +EOO + +: exit +: +$* <>EOO +cmd1|cmd2==1&&cmd3!=0|cmd4 +EOI +cmd1 | cmd2 == 1 && cmd3 != 0 | cmd4 +EOO + +: here-doc +: +$* <>EOO +cmd1 <<=EOI1 | cmd2 >>?EOO2 && cmd3 <<=EOI3 2>&1 | cmd4 2>>?EOE4 >>?EOO4 +input +one +EOI1 +ouput +two +EOO2 +input +three +EOI3 +error +four +EOE4 +output +four +EOO4 +EOI +cmd1 <<=EOI1 | cmd2 >>?EOO2 && cmd3 <<=EOI3 2>&1 | cmd4 >>?EOO4 2>>?EOE4 +input +one +EOI1 +ouput +two +EOO2 +input +three +EOI3 +output +four +EOO4 +error +four +EOE4 +EOO + +: leading +: +$* <>EOE != 0 +| cmd +EOI +buildfile:11:1: error: missing program +EOE + +: trailing +: +$* <>EOE != 0 +cmd && +EOI +buildfile:11:7: error: missing program +EOE + +: redirected +: +{ + : input + : + { + : first + : + $* <>EOO + cmd1 >EOE != 0 + cmd1 | cmd2 >EOO + cmd1 | cmd2 >foo + EOI + cmd1 | cmd2 >foo + EOO + + : non-last + : + $* <>EOE != 0 + cmd1 >foo | cmd2 + EOI + buildfile:11:11: error: stdout is both redirected and piped + EOE + } +} diff --git a/libbuild2/build/script/parser+pre-parse.test.testscript b/libbuild2/build/script/parser+pre-parse.test.testscript new file mode 100644 index 0000000..4aff3e8 --- /dev/null +++ b/libbuild2/build/script/parser+pre-parse.test.testscript @@ -0,0 +1,22 @@ +# file : libbuild2/build/script/parser+pre-parse.test.testscript +# license : MIT; see accompanying LICENSE file + +: attribute +: +{ + : name + : + $* <>EOE != 0 + x = [foo] + EOI + buildfile:11:5: error: unknown value attribute foo + EOE + + : name-value + : + $* <>EOE != 0 + x = [foo=bar] + EOI + buildfile:11:5: error: unknown value attribute foo=bar + EOE +} diff --git a/libbuild2/build/script/parser+redirect.test.testscript b/libbuild2/build/script/parser+redirect.test.testscript new file mode 100644 index 0000000..82c04ea --- /dev/null +++ b/libbuild2/build/script/parser+redirect.test.testscript @@ -0,0 +1,525 @@ +# file : libbuild2/build/script/parser+redirect.test.testscript +# license : MIT; see accompanying LICENSE file + +# @@ Add tests for redirects other than trace, here-*, file and merge. +# @@ Does it make sense to split into separate files - one per redirect type? +# + +: trace +: +{ + $* <'cmd >!' >'cmd >!' : out + $* <'cmd 2>!' >'cmd 2>!' : err +} + +: str +: +{ + : literal + : + { + : portable-path + : + $* <>EOO + cmd <<<=/foo >>>?/bar 2>>>?/baz + EOI + cmd <<<=/foo >>>?/bar 2>>>?/baz + EOO + } + + : regex + : + { + : portable-path + : + $* <>EOO + cmd >>>?/~%foo% 2>>>?/~%bar% + EOI + cmd >>>?/~%foo% 2>>>?/~%bar% + EOO + } +} + +: doc +: +{ + : literal + : + { + : portable-path + : + $* <>EOO + cmd <<=/EOI_ >>?/EOO_ 2>>?/EOE_ + foo + EOI_ + bar + EOO_ + baz + EOE_ + EOI + cmd <<=/EOI_ >>?/EOO_ 2>>?/EOE_ + foo + EOI_ + bar + EOO_ + baz + EOE_ + EOO + + : sharing + : + { + : in-out + : + $* <>EOO + cmd <<=:/EOF >>?:/EOF + foo + EOF + EOI + cmd <<=:/EOF >>?:/EOF + foo + EOF + EOO + + : in-alias-out + : + $* <>EOO + cmd <<:/EOF >>?:/EOF + foo + EOF + EOI + cmd <<:/EOF >>?:/EOF + foo + EOF + EOO + + : out-in-alias + : + $* <>EOO + cmd >>?:/EOF <<:/EOF + foo + EOF + EOI + cmd <<:/EOF >>?:/EOF + foo + EOF + EOO + + : different + : + { + : modifiers + : + $* <>EOE != 0 + cmd <<=:/EOF >>?:EOF + foo + EOF + EOI + buildfile:11:18: error: different modifiers for shared here-document 'EOF' + EOE + + : quoting + : + $* <>EOE != 0 + cmd <<=EOF >>?"EOF" + foo + EOF + EOI + buildfile:11:15: error: different quoting for shared here-document 'EOF' + EOE + } + } + } + + : regex + : + { + : portable-path + : + $* <>EOO + cmd >>?/~%EOF% 2>>?/~%EOE% + foo + EOF + bar + EOE + EOI + cmd >>?/~%EOF% 2>>?/~%EOE% + foo + EOF + bar + EOE + EOO + + : sharing + : + { + : in-out + : + $* <>EOO + cmd >>?~/EOF/ 2>>?~/EOF/ + foo + EOF + EOI + cmd >>?~/EOF/ 2>>?~/EOF/ + foo + EOF + EOO + + : different + : + { + : introducers + : + $* <>EOE != 0 + cmd >>?~/EOF/ 2>>?~%EOF% + foo + EOF + EOI + buildfile:11:20: error: different introducers for shared here-document regex 'EOF' + EOE + + : flags + : + $* <>EOE != 0 + cmd >>?~/EOF/ 2>>?~/EOF/i + foo + EOF + EOI + buildfile:11:20: error: different global flags for shared here-document regex 'EOF' + EOE + } + } + } + + : overriding + : + { + : literal + : + { + : with + : + { + : string + : + $* <>EOO + cmd >>?EOF >>>?bar + foo + EOF + EOI + cmd >>>?bar + EOO + + : regex + : + $* <>EOO + cmd >>?FOO >>?~/BAR/ + foo + FOO + bar + BAR + EOI + cmd >>?~/BAR/ + bar + BAR + EOO + + : self + : + $* <>EOO + cmd >>EOF >>EOF + foo + EOF + EOI + cmd >>EOF + foo + EOF + EOO + + : different-modifiers + : + $* <>EOE != 0 + cmd >>?EOF >>?/EOF + foo + EOF + EOI + buildfile:11:16: error: different modifiers for shared here-document 'EOF' + EOE + } + } + + : shared + : + { + : after-sharing + : + $* <>EOO + cmd >>EOF 2>>EOF >bar + foo + EOF + EOI + cmd >bar 2>>EOF + foo + EOF + EOO + + : before-sharing + : + $* <>EOO + cmd >>EOF >bar 2>>EOF + foo + EOF + EOI + cmd >bar 2>>EOF + foo + EOF + EOO + } + } +} + +: file +: +{ + : cmp + : + $* <>EOO + cmd 0<=a 1>?b 2>?c + EOI + cmd <=a >?b 2>?c + EOO + + : write + : + $* <>EOO + cmd 1>=b 2>+c + EOI + cmd >=b 2>+c + EOO + + : quote + : + $* <>EOO + cmd 0<="a f" 1>="b f" 2>+"c f" + EOI + cmd <='a f' >='b f' 2>+'c f' + EOO + + : in + : + { + : missed + : + $* <>EOE !=0 + cmd <= + EOI + buildfile:11:7: error: missing stdin file + EOE + + : empty + : + $* <>EOE !=0 + cmd <="" + EOI + buildfile:11:7: error: empty stdin redirect path + EOE + } + + : in-alias + : + { + : missed + : + $* <>EOE !=0 + cmd < + EOI + buildfile:11:6: error: missing stdin file + EOE + + : empty + : + $* <>EOE !=0 + cmd <"" + EOI + buildfile:11:6: error: empty stdin redirect path + EOE + } + + : out + : + { + : missed + : + $* <>EOE !=0 + cmd >= + EOI + buildfile:11:7: error: missing stdout file + EOE + + : empty + : + $* <>EOE !=0 + cmd >="" + EOI + buildfile:11:7: error: empty stdout redirect path + EOE + } + + : out-alias + : + { + : missed + : + $* <>EOE !=0 + cmd > + EOI + buildfile:11:6: error: missing stdout file + EOE + + : empty + : + $* <>EOE !=0 + cmd >"" + EOI + buildfile:11:6: error: empty stdout redirect path + EOE + } + + : err + : + { + : missed + : + $* <>EOE !=0 + cmd 2>= + EOI + buildfile:11:8: error: missing stderr file + EOE + + : empty + : + $* <>EOE !=0 + cmd 2>="" + EOI + buildfile:11:8: error: empty stderr redirect path + EOE + } + + : err-alias + : + { + : missed + : + $* <>EOE !=0 + cmd 2> + EOI + buildfile:11:7: error: missing stderr file + EOE + + : empty + : + $* <>EOE !=0 + cmd 2>"" + EOI + buildfile:11:7: error: empty stderr redirect path + EOE + } +} + +: merge +{ + : out + : + { + : err + : + $* <>EOO + cmd 1>&2 + EOI + cmd >&2 + EOO + + : no-mutual + : + $* <>EOO + cmd 1>&2 2>&1 2>a + EOI + cmd >&2 2>a + EOO + + : not-descriptor + : + $* <>EOE != 0 + cmd 1>&a + EOI + buildfile:11:8: error: stdout merge redirect file descriptor must be 2 + EOE + + : self + : + $* <>EOE != 0 + cmd 1>&1 + EOI + buildfile:11:8: error: stdout merge redirect file descriptor must be 2 + EOE + + : missed + : + $* <>EOE != 0 + cmd 1>& + EOI + buildfile:11:8: error: missing stdout file descriptor + EOE + } + + : err + { + : out + : + $* <>EOO + cmd 2>&1 + EOI + cmd 2>&1 + EOO + + : no-mutual + : + $* <>EOO + cmd 1>&2 2>&1 >a + EOI + cmd >a 2>&1 + EOO + + : not-descriptor + : + $* <>EOE != 0 + cmd 2>&a + EOI + buildfile:11:8: error: stderr merge redirect file descriptor must be 1 + EOE + + : self + : + $* <>EOE != 0 + cmd 2>&2 + EOI + buildfile:11:8: error: stderr merge redirect file descriptor must be 1 + EOE + + : missed + : + $* <>EOE != 0 + cmd 2>& + EOI + buildfile:11:8: error: missing stderr file descriptor + EOE + } + + : mutual + : + $* <>EOE != 0 + cmd 1>&2 2>&1 + EOI + buildfile:11:14: error: stdout and stderr redirected to each other + EOE +} diff --git a/libbuild2/build/script/parser+regex.test.testscript b/libbuild2/build/script/parser+regex.test.testscript new file mode 100644 index 0000000..625bfdf --- /dev/null +++ b/libbuild2/build/script/parser+regex.test.testscript @@ -0,0 +1,225 @@ +# file : libbuild2/build/script/parser+regex.test.testscript +# license : MIT; see accompanying LICENSE file + +: here-string +: +{ + : stdout + : + { + : missed + : + $* <'cmd >>>?~' 2>>EOE != 0 + buildfile:11:10: error: missing stdout here-string regex + EOE + + : no-introducer + : + $* <'cmd >>>?~""' 2>>EOE != 0 + buildfile:11:10: error: no introducer character in stdout regex redirect + EOE + + : no-term-introducer + : + $* <'cmd >>>?~/' 2>>EOE != 0 + buildfile:11:10: error: no closing introducer character in stdout regex redirect + EOE + + : portable-path-introducer + : + $* <'cmd >>>?/~/foo/' 2>>EOE != 0 + buildfile:11:11: error: portable path modifier and '/' introducer in stdout regex redirect + EOE + + : empty + : + $* <'cmd >>>?~//' 2>>EOE != 0 + buildfile:11:10: error: stdout regex redirect is empty + EOE + + : no-flags + : + $* <'cmd >>>?~/fo*/' >'cmd >>>?~/fo*/' + + : idot + : + $* <'cmd >>>?~/fo*/d' >'cmd >>>?~/fo*/d' + + : icase + : + $* <'cmd >>>?~/fo*/i' >'cmd >>>?~/fo*/i' + + : invalid-flags1 + : + $* <'cmd >>>?~/foo/z' 2>>EOE != 0 + buildfile:11:10: error: junk at the end of stdout regex redirect + EOE + + : invalid-flags2 + : + $* <'cmd >>>?~/foo/iz' 2>>EOE != 0 + buildfile:11:10: error: junk at the end of stdout regex redirect + EOE + + : no-newline + : + $* <'cmd >>>?:~/fo*/' >'cmd >>>?:~/fo*/' + } + + : stderr + : + { + : missed + : + $* <'cmd 2>>>?~' 2>>EOE != 0 + buildfile:11:11: error: missing stderr here-string regex + EOE + + : no-introducer + : + : Note that there is no need to reproduce all the errors as for stdout. + : All we need is to make sure that the proper description is passed to + : the parse_regex() function. + : + $* <'cmd 2>>>?~""' 2>>EOE != 0 + buildfile:11:11: error: no introducer character in stderr regex redirect + EOE + } + + : modifier-last + : + $* <'cmd >>>?~/x' 2>>EOE != 0 + buildfile:11:10: error: no closing introducer character in stdout regex redirect + EOE +} + +: here-doc +: +{ + : stdout + : + { + : missed + : + $* <'cmd >>?~' 2>>EOE != 0 + buildfile:11:9: error: expected here-document regex end marker + EOE + + : portable-path-introducer + : + $* <>EOE != 0 + cmd >>?/~/EOO/ + foo + EOO + EOI + buildfile:11:5: error: portable path modifier and '/' introducer in here-document regex end marker + EOE + + : unterminated-line-char + : + $* <>EOE != 0 + cmd >>?~/EOO/ + / + EOO + EOI + buildfile:12:1: error: no syntax line characters + EOE + + : empty + : + $* <>EOE != 0 + cmd >>?:~/EOO/ + EOO + EOI + buildfile:12:1: error: empty here-document regex + EOE + + : no-flags + : + $* <>EOO + cmd 2>>?~/EOE/ + foo + /? + /foo/ + /foo/* + /foo/i + /foo/i* + + // + //* + EOE + EOI + cmd 2>>?~/EOE/ + foo + /? + /foo/ + /foo/* + /foo/i + /foo/i* + + // + //* + EOE + EOO + + : no-newline-str + : + $* <'cmd >>>?:~/fo*/' >'cmd >>>?:~/fo*/' + + : no-newline-doc + : + $* <>EOO + cmd 2>>?:~/EOE/ + foo + EOE + EOI + cmd 2>>?:~/EOE/ + foo + EOE + EOO + + : end-marker-restore + : + { + : idot + : + $* <>EOO + cmd 2>>?~/EOE/d + foo + EOE + EOI + cmd 2>>?~/EOE/d + foo + EOE + EOO + + : icase + : + $* <>EOO + cmd 2>>?~/EOE/i + foo + EOE + EOI + cmd 2>>?~/EOE/i + foo + EOE + EOO + } + } + + : stderr + : + { + : missed + : + $* <'cmd 2>>?~' 2>>EOE != 0 + buildfile:11:10: error: expected here-document regex end marker + EOE + } + + : modifier-last + : + $* <'cmd >>?~:/FOO/' 2>>EOE != 0 + buildfile:11:5: error: no closing introducer character in here-document regex end marker + EOE +} diff --git a/libbuild2/build/script/parser+variable.test.testscript b/libbuild2/build/script/parser+variable.test.testscript new file mode 100644 index 0000000..5040e66 --- /dev/null +++ b/libbuild2/build/script/parser+variable.test.testscript @@ -0,0 +1,41 @@ +# file : libbuild2/build/script/parser+variable.test.testscript +# license : MIT; see accompanying LICENSE file + +: assignment +: +$* <>EOO +a = b +echo $a +EOI +echo b +EOO + +: primary-target +: +$* <>EOO +echo $name($>) +EOI +echo driver +EOO + +: no-newline +: +$* <:'echo a' 2>>EOE != 0 +buildfile:11:7: error: expected newline instead of +EOE + +: set-primary-target +: +$* <>EOE != 0 +> = a +EOI +buildfile:11:1: error: missing program +EOE + +: empty-name +: +$* <>EOE != 0 += b +EOI +buildfile:11:1: error: missing variable name +EOE diff --git a/libbuild2/build/script/parser.cxx b/libbuild2/build/script/parser.cxx new file mode 100644 index 0000000..e64db91 --- /dev/null +++ b/libbuild2/build/script/parser.cxx @@ -0,0 +1,391 @@ +// file : libbuild2/build/script/parser.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include +#include + +using namespace std; + +namespace build2 +{ + namespace build + { + namespace script + { + using type = token_type; + + // + // Pre-parse. + // + + script parser:: + pre_parse (istream& is, const path_name& pn, uint64_t line) + { + path_ = &pn; + + pre_parse_ = true; + + lexer l (is, *path_, line, lexer_mode::command_line); + set_lexer (&l); + + script s; + script_ = &s; + runner_ = nullptr; + environment_ = nullptr; + + s.start_loc = location (*path_, line, 1); + + token t (pre_parse_script ()); + + assert (t.type == type::eos); + + s.end_loc = get_location (t); + + return s; + } + + token parser:: + pre_parse_script () + { + // enter: next token is first token of the script + // leave: eos (returned) + + token t; + type tt; + + // Parse lines until we see eos. + // + for (;;) + { + // Start lexing each line. + // + tt = peek (lexer_mode::first_token); + + // Determine the line type by peeking at the first token. + // + switch (tt) + { + case type::eos: + { + next (t, tt); + return t; + } + default: + { + pre_parse_line (t, tt); + assert (tt == type::newline); + break; + } + } + } + } + + void parser:: + pre_parse_line (token& t, type& tt, bool if_line) + { + // Determine the line type/start token. + // + line_type lt ( + pre_parse_line_start (t, tt, lexer_mode::second_token)); + + line ln; + switch (lt) + { + case line_type::var: + { + // Check if we are trying to modify any of the special variables. + // + if (special_variable (t.value)) + fail (t) << "attempt to set '" << t.value << "' special " + << "variable"; + + // We don't pre-enter variables. + // + ln.var = nullptr; + + next (t, tt); // Assignment kind. + + mode (lexer_mode::variable_line); + parse_variable_line (t, tt); + + if (tt != type::newline) + fail (t) << "expected newline instead of " << t; + + break; + } + case line_type::cmd_elif: + case line_type::cmd_elifn: + case line_type::cmd_else: + case line_type::cmd_end: + { + if (!if_line) + { + fail (t) << lt << " without preceding 'if'"; + } + } + // Fall through. + case line_type::cmd_if: + case line_type::cmd_ifn: + next (t, tt); // Skip to start of command. + // Fall through. + case line_type::cmd: + { + pair p; + + if (lt != line_type::cmd_else && lt != line_type::cmd_end) + p = parse_command_expr (t, tt, lexer::redirect_aliases); + + if (tt != type::newline) + fail (t) << "expected newline instead of " << t; + + parse_here_documents (t, tt, p); + break; + } + } + + assert (tt == type::newline); + + ln.type = lt; + ln.tokens = replay_data (); + script_->lines.push_back (move (ln)); + + if (lt == line_type::cmd_if || lt == line_type::cmd_ifn) + { + tt = peek (lexer_mode::first_token); + + pre_parse_if_else (t, tt); + } + } + + void parser:: + pre_parse_if_else (token& t, type& tt) + { + // enter: peeked first token of next line (type in tt) + // leave: newline + + // Parse lines until we see closing 'end'. Nested if-else blocks are + // handled recursively. + // + for (line_type bt (line_type::cmd_if); // Current block. + ; + tt = peek (lexer_mode::first_token)) + { + const location ll (get_location (peeked ())); + + if (tt == type::eos) + fail (ll) << "expected closing 'end'"; + + // Parse one line. Note that this one line can still be multiple + // lines in case of if-else. In this case we want to view it as + // cmd_if, not cmd_end. Thus remember the start position of the + // next logical line. + // + size_t i (script_->lines.size ()); + + pre_parse_line (t, tt, true /* if_line */); + assert (tt == type::newline); + + line_type lt (script_->lines[i].type); + + // First take care of 'end'. + // + if (lt == line_type::cmd_end) + return; + + // Check if-else block sequencing. + // + if (bt == line_type::cmd_else) + { + if (lt == line_type::cmd_else || + lt == line_type::cmd_elif || + lt == line_type::cmd_elifn) + fail (ll) << lt << " after " << bt; + } + + // Update current if-else block. + // + switch (lt) + { + case line_type::cmd_elif: + case line_type::cmd_elifn: bt = line_type::cmd_elif; break; + case line_type::cmd_else: bt = line_type::cmd_else; break; + default: break; + } + } + } + + command_expr parser:: + parse_command_line (token& t, type& tt) + { + // enter: first token of the command line + // leave: + + // Note: this one is only used during execution. + // + assert (!pre_parse_); + + pair p ( + parse_command_expr (t, tt, lexer::redirect_aliases)); + + assert (tt == type::newline); + + parse_here_documents (t, tt, p); + assert (tt == type::newline); + + return move (p.first); + } + + // + // Execute. + // + + void parser:: + execute (const scope& rs, const scope& bs, + environment& e, const script& s, runner& r) + { + path_ = nullptr; // Set by replays. + + pre_parse_ = false; + + set_lexer (nullptr); + + // The script shouldn't be able to modify the scopes. + // + root_ = const_cast (&rs); + scope_ = const_cast (&bs); + pbase_ = scope_->src_path_; + + script_ = const_cast (&s); + runner_ = &r; + environment_ = &e; + + exec_script (); + } + + void parser:: + exec_script () + { + const script& s (*script_); + + runner_->enter (*environment_, s.start_loc); + + // Note that we rely on "small function object" optimization for the + // exec_*() lambdas. + // + auto exec_set = [this] (const variable& var, + token& t, build2::script::token_type& tt, + const location&) + { + next (t, tt); + type kind (tt); // Assignment kind. + + mode (lexer_mode::variable_line); + value rhs (parse_variable_line (t, tt)); + + assert (tt == type::newline); + + // Assign. + // + value& lhs (kind == type::assign + ? environment_->assign (var) + : environment_->append (var)); + + apply_value_attributes (&var, lhs, move (rhs), kind); + }; + + auto exec_cmd = [this] (token& t, build2::script::token_type& tt, + size_t li, + bool single, + const location& ll) + { + // We use the 0 index to signal that this is the only command. + // + if (single) + li = 0; + + command_expr ce ( + parse_command_line (t, static_cast (tt))); + + runner_->run (*environment_, ce, li, ll); + }; + + auto exec_if = [this] (token& t, build2::script::token_type& tt, + size_t li, + const location& ll) + { + command_expr ce ( + parse_command_line (t, static_cast (tt))); + + // Assume if-else always involves multiple commands. + // + return runner_->run_if (*environment_, ce, li, ll); + }; + + size_t li (1); + + exec_lines (s.lines.begin (), s.lines.end (), + exec_set, exec_cmd, exec_if, + li, + &environment_->var_pool); + + runner_->leave (*environment_, s.end_loc); + } + + // When add a special variable don't forget to update lexer::word(). + // + bool parser:: + special_variable (const string& n) noexcept + { + return n == ">" || n == "<" || n == "~"; + } + + lookup parser:: + lookup_variable (name&& qual, string&& name, const location& loc) + { + // In the pre-parse mode collect the referenced variable names for the + // script semantics change tracking. + // + if (pre_parse_) + { + // Add the variable name skipping special variables and suppressing + // duplicates. While at it, check if the script temporary directory + // is referenced and set the flag, if that's the case. + // + if (special_variable (name)) + { + if (name == "~") + script_->temp_dir = true; + } + else if (!name.empty ()) + { + auto& vars (script_->vars); + + if (find (vars.begin (), vars.end (), name) == vars.end ()) + vars.push_back (move (name)); + } + + return lookup (); + } + + if (!qual.empty ()) + fail (loc) << "qualified variable name"; + + lookup r (environment_->lookup (name)); + + // Fail if non-script-local variable with an untracked name. + // + if (r.defined () && !r.belongs (*environment_)) + { + const auto& vars (script_->vars); + + if (find (vars.begin (), vars.end (), name) == vars.end ()) + fail (loc) << "use of untracked variable '" << name << "'"; + } + + return r; + } + } + } +} diff --git a/libbuild2/build/script/parser.hxx b/libbuild2/build/script/parser.hxx new file mode 100644 index 0000000..27e7f49 --- /dev/null +++ b/libbuild2/build/script/parser.hxx @@ -0,0 +1,96 @@ +// file : libbuild2/build/script/parser.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_BUILD_SCRIPT_PARSER_HXX +#define LIBBUILD2_BUILD_SCRIPT_PARSER_HXX + +#include +#include +#include + +#include + +#include + +#include +#include + +namespace build2 +{ + namespace build + { + namespace script + { + class runner; + + class parser: public build2::script::parser + { + // Pre-parse. Issue diagnostics and throw failed in case of an error. + // + public: + parser (context& c): build2::script::parser (c) {} + + // Note that the returned script object references the passed path + // name. + // + script + pre_parse (istream&, const path_name&, uint64_t line); + + // Recursive descent parser. + // + // Usually (but not always) parse functions receive the token/type + // from which it should start consuming and in return the token/type + // should contain the first token that has not been consumed. + // + // Functions that are called parse_*() rather than pre_parse_*() are + // used for both stages. + // + protected: + token + pre_parse_script (); + + void + pre_parse_line (token&, token_type&, bool if_line = false); + + void + pre_parse_if_else (token&, token_type&); + + command_expr + parse_command_line (token&, token_type&); + + // Execute. Issue diagnostics and throw failed in case of an error. + // + public: + void + execute (const scope& root, const scope& base, + environment&, const script&, runner&); + + protected: + void + exec_script (); + + // Helpers. + // + public: + static bool + special_variable (const string&) noexcept; + + // Customization hooks. + // + protected: + virtual lookup + lookup_variable (name&&, string&&, const location&) override; + + protected: + script* script_; + + // Execute state. + // + runner* runner_; + environment* environment_; + }; + } + } +} + +#endif // LIBBUILD2_BUILD_SCRIPT_PARSER_HXX diff --git a/libbuild2/build/script/parser.test.cxx b/libbuild2/build/script/parser.test.cxx new file mode 100644 index 0000000..9046312 --- /dev/null +++ b/libbuild2/build/script/parser.test.cxx @@ -0,0 +1,224 @@ +// file : libbuild2/build/script/parser.test.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include +#include + +#include +#include + +#include +#include +#include + +#include // line +#include +#include + +using namespace std; + +namespace build2 +{ + namespace build + { + namespace script + { + class print_runner: public runner + { + public: + print_runner (bool line): line_ (line) {} + + virtual void + enter (environment&, const location&) override {} + + virtual void + run (environment&, + const command_expr& e, + size_t i, + const location&) override + { + cout << e; + + if (line_) + cout << " # " << i; + + cout << endl; + } + + virtual bool + run_if (environment&, + const command_expr& e, + size_t i, + const location&) override + { + cout << "? " << e; + + if (line_) + cout << " # " << i; + + cout << endl; + + return e.back ().pipe.back ().program.string () == "true"; + } + + virtual void + leave (environment&, const location&) override {} + + private: + bool line_; + }; + + // Usages: + // + // argv[0] [-l] + // argv[0] -d + // argv[0] -p + // + // In the first form read the script from stdin and trace the script + // execution to stdout using the custom print runner. + // + // In the second form read the script from stdin, parse it and dump the + // resulting lines to stdout. + // + // In the third form read the script from stdin, parse it and print + // line tokens quoting information to stdout. + // + // -l + // Print the script line number for each executed expression. + // + // -d + // Dump the parsed script to sdout. + // + // -p + // Print the parsed script tokens quoting information to sdout. If a + // token is quoted follow its representation with its quoting + // information in the [/] form, where: + // + // := 'S' | 'D' | 'M' + // := 'C' | 'P' + // + int + main (int argc, char* argv[]) + { + tracer trace ("main"); + + enum class mode + { + run, + dump, + print + } m (mode::run); + + bool print_line (false); + + for (int i (1); i != argc; ++i) + { + string a (argv[i]); + + if (a == "-l") + print_line = true; + else if (a == "-d") + m = mode::dump; + else if (a == "-p") + m = mode::print; + else + assert (false); + } + + assert (m == mode::run || !print_line); + + // Fake build system driver, default verbosity. + // + init_diag (1); + init (nullptr, argv[0]); + + // Serial execution. + // + scheduler sched (1); + global_mutexes mutexes (1); + context ctx (sched, mutexes); + + try + { + cin.exceptions (istream::failbit | istream::badbit); + + // Enter mock target. Use fixed name and path so that we can use + // them in expected results. Strictly speaking target path should + // be absolute. However, the buildscript implementation doesn't + // really care. + // + file& tt ( + ctx.targets.insert (work, + dir_path (), + "driver", + string (), + trace)); + + tt.path (path ("driver")); + + // Parse and run. + // + parser p (ctx); + path_name nm ("buildfile"); + script s (p.pre_parse (cin, nm, 11 /* line */)); + + switch (m) + { + case mode::run: + { + environment e (perform_update_id, tt, false /* temp_dir */); + print_runner r (print_line); + p.execute (ctx.global_scope, ctx.global_scope, e, s, r); + break; + } + case mode::dump: + { + dump (cout, "", s.lines); + break; + } + case mode::print: + { + for (const line& l: s.lines) + { + for (const replay_token& rt: l.tokens) + { + if (&rt != &l.tokens[0]) + cout << ' '; + + const token& t (rt.token); + cout << t; + + char q ('\0'); + switch (t.qtype) + { + case quote_type::single: q = 'S'; break; + case quote_type::double_: q = 'D'; break; + case quote_type::mixed: q = 'M'; break; + case quote_type::unquoted: break; + } + + if (q != '\0') + cout << " [" << q << (t.qcomp ? "/C" : "/P") << ']'; + } + } + + cout << endl; + } + } + } + catch (const failed&) + { + return 1; + } + + return 0; + } + } + } +} + +int +main (int argc, char* argv[]) +{ + return build2::build::script::main (argc, argv); +} diff --git a/libbuild2/build/script/runner.cxx b/libbuild2/build/script/runner.cxx new file mode 100644 index 0000000..315a248 --- /dev/null +++ b/libbuild2/build/script/runner.cxx @@ -0,0 +1,133 @@ +// file : libbuild2/build/script/runner.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include // try_rmdir() + +#include +#include + +using namespace butl; + +namespace build2 +{ + namespace build + { + namespace script + { + void default_runner:: + enter (environment&, const location&) + { + } + + void default_runner:: + leave (environment& env, const location& ll) + { + // Drop cleanups of target paths. + // + for (auto i (env.cleanups.begin ()); i != env.cleanups.end (); ) + { + const target* m (&env.target); + for (; m != nullptr; m = m->adhoc_member) + { + if (const path_target* pm = m->is_a ()) + if (i->path == pm->path ()) + break; + } + + if (m != nullptr) + i = env.cleanups.erase (i); + else + ++i; + } + + clean (env, ll); + + // Remove the temporary directory, if created. + // + const dir_path& td (env.temp_dir.path); + + if (!td.empty ()) + { + // Note that since the temporary directory may only contain special + // files that are created and registered for cleanup by the script + // running machinery and should all be removed by the above clean() + // function call, its removal failure may not be the script fault + // but potentially a bug or a filesystem problem. Thus, we don't + // ignore the errors and report them. + // + env.temp_dir.cancel (); + + try + { + // Note that the temporary directory must be empty to date. + // + rmdir_status r (try_rmdir (td)); + + if (r != rmdir_status::success) + { + // While there can be no fault of the script being currently + // executed let's add the location anyway to ease the + // troubleshooting. And let's stick to that principle down the + // road. + // + diag_record dr (fail (ll)); + dr << "temporary directory '" << td + << (r == rmdir_status::not_exist + ? "' does not exist" + : "' is not empty"); + + if (r == rmdir_status::not_empty) + build2::script::print_dir (dr, td, ll); + } + } + catch (const system_error& e) + { + fail (ll) << "unable to remove temporary directory '" << td + << "': " << e; + } + + if (verb >= 3) + text << "rmdir " << td; + } + } + + void default_runner:: + run (environment& env, + const command_expr& expr, + size_t li, + const location& ll) + { + if (verb >= 3) + text << ": " << expr; + + // Run the expression if we are not in the dry-run mode or if it + // executes the set or exit builtin and just print the expression + // otherwise at verbosity level 2 and up. + // + if (!env.context.dry_run || + find_if (expr.begin (), expr.end (), + [] (const expr_term& et) + { + const string& p (et.pipe.back ().program.string ()); + return p == "set" || p == "exit"; + }) != expr.end ()) + build2::script::run (env, expr, li, ll); + else if (verb >= 2) + text << expr; + } + + bool default_runner:: + run_if (environment& env, + const command_expr& expr, + size_t li, const location& ll) + { + if (verb >= 3) + text << ": ?" << expr; + + return build2::script::run_if (env, expr, li, ll); + } + } + } +} diff --git a/libbuild2/build/script/runner.hxx b/libbuild2/build/script/runner.hxx new file mode 100644 index 0000000..431c446 --- /dev/null +++ b/libbuild2/build/script/runner.hxx @@ -0,0 +1,84 @@ +// file : libbuild2/build/script/runner.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_BUILD_SCRIPT_RUNNER_HXX +#define LIBBUILD2_BUILD_SCRIPT_RUNNER_HXX + +#include +#include + +#include + +namespace build2 +{ + namespace build + { + struct common; + + namespace script + { + class runner + { + public: + // Location is the script start location (for diagnostics, etc). + // + virtual void + enter (environment&, const location&) = 0; + + // Index is the 1-base index of this command line in the command list. + // If it is 0 then it means there is only one command. This + // information can be used, for example, to derive file names. + // + // Location is the start position of this command line in the script. + // It can be used in diagnostics. + // + virtual void + run (environment&, + const command_expr&, + size_t index, + const location&) = 0; + + virtual bool + run_if (environment&, + const command_expr&, + size_t, + const location&) = 0; + + // Location is the script end location (for diagnostics, etc). + // + virtual void + leave (environment&, const location&) = 0; + }; + + // Run command expressions. + // + // In dry-run mode don't run the expressions unless they are if- + // conditions or execute the set or exit builtins, but prints them at + // verbosity level 2 and up. + // + class default_runner: public runner + { + public: + virtual void + enter (environment&, const location&) override; + + virtual void + run (environment&, + const command_expr&, + size_t, + const location&) override; + + virtual bool + run_if (environment&, + const command_expr&, + size_t, + const location&) override; + + virtual void + leave (environment&, const location&) override; + }; + } + } +} + +#endif // LIBBUILD2_BUILD_SCRIPT_RUNNER_HXX diff --git a/libbuild2/build/script/script.cxx b/libbuild2/build/script/script.cxx new file mode 100644 index 0000000..3485f54 --- /dev/null +++ b/libbuild2/build/script/script.cxx @@ -0,0 +1,236 @@ +// file : libbuild2/build/script/script.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include + +#include + +#include + +using namespace std; + +namespace build2 +{ + namespace build + { + namespace script + { + // environment + // + static const optional wd_name ("current directory"); + + environment:: + environment (action a, const target_type& t, bool temp) + : build2::script::environment ( + t.ctx, + cast (t.ctx.global_scope["build.host"]), + dir_name_view (&work, &wd_name), + temp_dir.path, false /* temp_dir_keep */, + redirect (redirect_type::none), + redirect (redirect_type::merge, 2), + redirect (redirect_type::pass)), + target (t), + vars (context, false /* global */) + { + // Set special variables. + // + { + // $> + // + names ns; + for (const target_type* m (&t); m != nullptr; m = m->adhoc_member) + m->as_name (ns); + + assign (var_pool.insert (">")) = move (ns); + } + + { + // $< + // + // Note that at this stage (after execute_prerequisites()) ad hoc + // prerequisites are no longer in prerequisite_targets which means + // they won't end up in $< either. While at first thought ad hoc + // prerequisites in ad hoc recipes don't seem to make much sense, + // they could be handy to exclude certain preresquisites from $< + // while still treating them as such. + // + names ns; + for (const target_type* pt: t.prerequisite_targets[a]) + if (pt != nullptr) + pt->as_name (ns); + + assign (var_pool.insert ("<")) = move (ns); + } + + // Set the $~ special variable. + // + if (temp) + { + create_temp_dir (); + assign (var_pool.insert ("~")) = temp_dir.path; + } + } + + void environment:: + create_temp_dir () + { + // Create the temporary directory for this run regardless of the + // dry-run mode, since some commands still can be executed (see run() + // for details). This is also the reason why we are not using the + // build2 filesystem API that considers the dry-run mode. + // + // Note that the directory auto-removal is active. + // + dir_path& td (temp_dir.path); + + assert (td.empty ()); // Must be called once. + + try + { + td = dir_path::temp_path ("buildscript"); + } + catch (const system_error& e) + { + fail << "unable to obtain temporary directory for buildscript " + << "execution" << e; + } + + mkdir_status r; + + try + { + r = try_mkdir (td); + } + catch (const system_error& e) + { + fail << "unable to create temporary directory '" << td << "': " + << e << endf; + } + + // Note that the temporary directory can potentially stay after some + // abnormally terminated script run. Clean it up and reuse if that's + // the case. + // + if (r == mkdir_status::already_exists) + try + { + butl::rmdir_r (td, false /* dir */); + } + catch (const system_error& e) + { + fail << "unable to cleanup temporary directory '" << td << "': " + << e; + } + + if (verb >= 3) + text << "mkdir " << td; + } + + void environment:: + set_variable (string&& nm, + names&& val, + const string& attrs, + const location& ll) + { + // Check if we are trying to modify any of the special variables. + // + if (parser::special_variable (nm)) + fail (ll) << "attempt to set '" << nm << "' special variable"; + + // Set the variable value and attributes. + // + const variable& var (var_pool.insert (move (nm))); + + value& lhs (assign (var)); + + // If there are no attributes specified then the variable assignment + // is straightforward. Otherwise we will use the build2 parser helper + // function. + // + if (attrs.empty ()) + lhs.assign (move (val), &var); + else + { + // If there is an error in the attributes string, our diagnostics + // will look like this: + // + // :1:1 error: unknown value attribute x + // buildfile:10:1 info: while parsing attributes '[x]' + // + // Note that the attributes parsing error is the only reason for a + // failure. + // + auto df = make_diag_frame ( + [attrs, &ll](const diag_record& dr) + { + dr << info (ll) << "while parsing attributes '" << attrs << "'"; + }); + + parser p (context); + p.apply_value_attributes (&var, + lhs, + value (move (val)), + attrs, + token_type::assign, + path_name ("")); + } + } + + lookup environment:: + lookup (const variable& var) const + { + auto p (vars.lookup (var)); + if (p.first != nullptr) + return lookup_type (*p.first, p.second, vars); + + return lookup_in_buildfile (var.name); + } + + lookup environment:: + lookup (const string& name) const + { + // Every variable that is ever set in a script has been added during + // variable line execution or introduced with the set builtin. Which + // means that if one is not found in the environment pool then it can + // only possibly be set in the buildfile. + // + const variable* pvar (var_pool.find (name)); + return pvar != nullptr ? lookup (*pvar) : lookup_in_buildfile (name); + } + + lookup environment:: + lookup_in_buildfile (const string& n) const + { + // Switch to the corresponding buildfile variable. Note that we don't + // want to insert a new variable into the pool (we might be running + // in parallel). Plus, if there is no such variable, then we cannot + // possibly find any value. + // + const variable* pvar (context.var_pool.find (n)); + + if (pvar == nullptr) + return lookup_type (); + + return target[*pvar]; + } + + value& environment:: + append (const variable& var) + { + auto l (lookup (var)); + + if (l.defined () && l.belongs (*this)) // Existing var. + return vars.modify (l); + + value& r (assign (var)); // NULL. + + if (l.defined ()) + r = *l; // Copy value (and type) from the outer scope. + + return r; + } + } + } +} diff --git a/libbuild2/build/script/script.hxx b/libbuild2/build/script/script.hxx new file mode 100644 index 0000000..2118568 --- /dev/null +++ b/libbuild2/build/script/script.hxx @@ -0,0 +1,156 @@ +// file : libbuild2/build/script/script.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_BUILD_SCRIPT_SCRIPT_HXX +#define LIBBUILD2_BUILD_SCRIPT_SCRIPT_HXX + +#include +#include +#include + +#include +#include // auto_rmdir + +#include + +namespace build2 +{ + namespace build + { + namespace script + { + using build2::script::line; + using build2::script::line_type; + using build2::script::redirect; + using build2::script::redirect_type; + using build2::script::expr_term; + using build2::script::command_expr; + + // Notes: + // + // - Once parsed, the script can be executed in multiple threads with + // the state (variable values, etc) maintained in the environment. + // + // - The default script command redirects semantics is 'none' for stdin, + // 'merge' into stderr for stdout, and 'pass' for stderr. + // + class script + { + public: + // Note that the variables are not pre-entered into a pool during the + // parsing phase, so the line variable pointers are NULL. + // + build2::script::lines lines; + + // Referenced ordinary (non-special) variables. + // + // Used for the script semantics change tracking. The variable list is + // filled during the pre-parsing phase and is checked against during + // the execution phase. If during execution some non-script-local + // variable is not found in the list (may happen for a computed name), + // then the execution fails since the script semantics may not be + // properly tracked (the variable value change will not trigger the + // target rebuild). + // + small_vector vars; // 2 for command and options. + + // True if script references the $~ special variable. + // + bool temp_dir = false; + + location start_loc; + location end_loc; + }; + + class environment: public build2::script::environment + { + public: + using target_type = build2::target; + + environment (action, const target_type&, bool temp_dir); + + environment (environment&&) = delete; + environment (const environment&) = delete; + environment& operator= (environment&&) = delete; + environment& operator= (const environment&) = delete; + + public: + // Primary target this environment is for. + // + const target_type& target; + + // Script-local variable pool and map. + // + // Note that if we lookup the variable by passing name as a string, + // then it will be looked up in the wrong pool. + // + variable_pool var_pool; + variable_map vars; + + // Temporary directory for the script run. + // + // Currently this directory is removed regardless of the script + // execution success or failure. Later, to help with troubleshooting, + // we may invent an option that suppresses the removal of temporary + // files in general. + // + // This directory is available to the user via the $~ special + // variable. Note, however, that the following filesystem entry + // prefixes are reserved: + // + // stdin* + // stdout* + // stderr* + // + auto_rmdir temp_dir; + + virtual void + set_variable (string&& name, + names&&, + const string& attrs, + const location&) override; + + virtual void + create_temp_dir () override; + + // Variables. + // + public: + // Lookup the variable starting from this environment, then the + // primary target, and then outer buildfile scopes. + // + using lookup_type = build2::lookup; + + lookup_type + lookup (const variable&) const; + + lookup_type + lookup (const string&) const; + + // As above but only look for buildfile variables. + // + lookup_type + lookup_in_buildfile (const string&) const; + + // Return a value suitable for assignment. If the variable does not + // exist in this environment's variable map, then a new one with the + // NULL value is added and returned. Otherwise the existing value is + // returned. + // + value& + assign (const variable& var) {return vars.assign (var);} + + // Return a value suitable for append/prepend. If the variable does + // not exist in this environment's variable map, then outer scopes are + // searched for the same variable. If found then a new variable with + // the found value is added to the environment and returned. Otherwise + // this function proceeds as assign() above. + // + value& + append (const variable&); + }; + } + } +} + +#endif // LIBBUILD2_BUILD_SCRIPT_SCRIPT_HXX diff --git a/libbuild2/build/script/token.cxx b/libbuild2/build/script/token.cxx new file mode 100644 index 0000000..8f8477b --- /dev/null +++ b/libbuild2/build/script/token.cxx @@ -0,0 +1,23 @@ +// file : libbuild2/build/script/token.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +using namespace std; + +namespace build2 +{ + namespace build + { + namespace script + { + void + token_printer (ostream& os, const token& t, print_mode m) + { + // No buildscript-specific tokens so far. + // + build2::script::token_printer (os, t, m); + } + } + } +} diff --git a/libbuild2/build/script/token.hxx b/libbuild2/build/script/token.hxx new file mode 100644 index 0000000..954b412 --- /dev/null +++ b/libbuild2/build/script/token.hxx @@ -0,0 +1,36 @@ +// file : libbuild2/build/script/token.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_BUILD_SCRIPT_TOKEN_HXX +#define LIBBUILD2_BUILD_SCRIPT_TOKEN_HXX + +#include +#include + +#include + +namespace build2 +{ + namespace build + { + namespace script + { + struct token_type: build2::script::token_type + { + using base_type = build2::script::token_type; + + // No buildscript-specific tokens so far. + // + + token_type () = default; + token_type (value_type v): base_type (v) {} + token_type (build2::token_type v): base_type (v) {} + }; + + void + token_printer (ostream&, const token&, print_mode); + } + } +} + +#endif // LIBBUILD2_BUILD_SCRIPT_TOKEN_HXX diff --git a/libbuild2/buildfile b/libbuild2/buildfile index edd0570..5f7bc11 100644 --- a/libbuild2/buildfile +++ b/libbuild2/buildfile @@ -27,6 +27,11 @@ lib{build2}: libul{build2}: \ {hxx ixx txx cxx}{* -utility-*installed -config -version -*.test...} \ {hxx}{config version} +libul{build2}: script/{hxx ixx txx cxx}{** -*-options -**.test...} \ + script/{hxx ixx cxx}{builtin-options} + +libul{build2}: build/{hxx ixx txx cxx}{** -**.test...} + # Note that this won't work in libul{} since it's not installed. # lib{build2}: cxx{utility-installed}: for_install = true @@ -78,8 +83,7 @@ libul{build2}: dist/{hxx ixx txx cxx}{** -**.test...} libul{build2}: install/{hxx ixx txx cxx}{** -**.test...} -libul{build2}: test/{hxx ixx txx cxx}{** -**-options -**.test...} \ - test/script/{hxx ixx cxx}{builtin-options} +libul{build2}: test/{hxx ixx txx cxx}{** -**.test...} libul{build2}: $int_libs @@ -105,6 +109,8 @@ exe{*.test}: } for t: cxx{ *.test...} \ + script/cxx{**.test...} \ + build/cxx{**.test...} \ config/cxx{**.test...} \ dist/cxx{**.test...} \ install/cxx{**.test...} \ @@ -191,15 +197,15 @@ else # Generated options parser. # -test/script/ +script/ { if $cli.configured { cli.cxx{builtin-options}: cli{builtin} cli.options += --std c++11 -I $src_root --include-with-brackets \ ---include-prefix libbuild2/test/script --guard-prefix LIBBUILD2_TEST_SCRIPT \ ---cli-namespace build2::test::script::cli --generate-vector-scanner \ +--include-prefix libbuild2/script --guard-prefix LIBBUILD2_SCRIPT \ +--cli-namespace build2::script::cli --generate-vector-scanner \ --generate-specifier --suppress-usage cli.cxx{*}: diff --git a/libbuild2/cc/compile-rule.cxx b/libbuild2/cc/compile-rule.cxx index 8b082cc..6b9104f 100644 --- a/libbuild2/cc/compile-rule.cxx +++ b/libbuild2/cc/compile-rule.cxx @@ -5247,6 +5247,8 @@ namespace build2 dir_path compile_rule:: find_modules_sidebuild (const scope& rs) const { + context& ctx (rs.ctx); + // First figure out where we are going to build. We want to avoid // multiple sidebuilds so the outermost scope that has loaded the // cc.config module and that is within our amalgmantion seems like a @@ -5284,18 +5286,18 @@ namespace build2 modules_sidebuild_dir /= x); - const scope* ps (&rs.ctx.scopes.find (pd)); + const scope* ps (&ctx.scopes.find (pd)); if (ps->out_path () != pd) { // Switch the phase to load then create and load the subproject. // - phase_switch phs (rs.ctx, run_phase::load); + phase_switch phs (ctx, run_phase::load); // Re-test again now that we are in exclusive phase (another thread // could have already created and loaded the subproject). // - ps = &rs.ctx.scopes.find (pd); + ps = &ctx.scopes.find (pd); if (ps->out_path () != pd) { @@ -5322,15 +5324,13 @@ namespace build2 {string (x) + '.'}, /* root_modules */ "", /* root_post */ nullopt, /* config_module */ + nullopt, /* config_file */ false, /* buildfile */ "the cc module", 2); /* verbosity */ } - ps = &load_project (as->rw () /* lock */, - pd, - pd, - false /* forwarded */); + ps = &load_project (ctx, pd, pd, false /* forwarded */); } } diff --git a/libbuild2/cc/init.cxx b/libbuild2/cc/init.cxx index 2a0dbd2..07051c5 100644 --- a/libbuild2/cc/init.cxx +++ b/libbuild2/cc/init.cxx @@ -142,9 +142,9 @@ namespace build2 // Register scope operation callback. // - // It feels natural to do clean up sidebuilds as a post operation but - // that prevents the (otherwise-empty) out root directory to be cleaned - // up (via the standard fsdir{} chain). + // It feels natural to clean up sidebuilds as a post operation but that + // prevents the (otherwise-empty) out root directory to be cleaned up + // (via the standard fsdir{} chain). // rs.operation_callbacks.emplace ( perform_clean_id, diff --git a/libbuild2/cc/lexer.cxx b/libbuild2/cc/lexer.cxx index d57f5eb..d2be3d8 100644 --- a/libbuild2/cc/lexer.cxx +++ b/libbuild2/cc/lexer.cxx @@ -48,8 +48,8 @@ namespace build2 auto lexer:: peek (bool e) -> xchar { - if (unget_) - return ungetc_; + if (ungetn_ != 0) + return ungetb_[ungetn_ - 1]; if (unpeek_) return unpeekc_; @@ -98,11 +98,8 @@ namespace build2 inline auto lexer:: get (bool e) -> xchar { - if (unget_) - { - unget_ = false; - return ungetc_; - } + if (ungetn_ != 0) + return ungetb_[--ungetn_]; else { xchar c (peek (e)); @@ -117,7 +114,7 @@ namespace build2 // Increment the logical line similar to how base will increment the // physical (the column counts are the same). // - if (log_line_ && c == '\n' && !unget_) + if (log_line_ && c == '\n' && ungetn_ == 0) ++*log_line_; base::get (c); diff --git a/libbuild2/cc/link-rule.cxx b/libbuild2/cc/link-rule.cxx index b11ee42..bc8eb8e 100644 --- a/libbuild2/cc/link-rule.cxx +++ b/libbuild2/cc/link-rule.cxx @@ -2145,22 +2145,8 @@ namespace build2 // 1 is resource ID, 24 is RT_MANIFEST. We also need to // escape Windows path backslashes. // - os << "1 24 \""; - - const string& s (mf.string ()); - for (size_t i (0), j;; i = j + 1) - { - j = s.find ('\\', i); - os.write (s.c_str () + i, - (j == string::npos ? s.size () : j) - i); - - if (j == string::npos) - break; - - os.write ("\\\\", 2); - } - - os << "\"" << endl; + os << "1 24 \"" << sanitize_strlit (mf.string ()) << '"' + << endl; os.close (); rm.cancel (); @@ -3035,14 +3021,14 @@ namespace build2 auto_rmfile trm; string targ; { - // Calculate the would-be command line length similar to how process' - // implementation does it. - // auto quote = [s = string ()] (const char* a) mutable -> const char* { return process::quote_argument (a, s); }; + // Calculate the would-be command line length similar to how process' + // implementation does it. + // size_t n (0); for (const char* a: args) { diff --git a/libbuild2/config/operation.cxx b/libbuild2/config/operation.cxx index 17eb99a..41d982b 100644 --- a/libbuild2/config/operation.cxx +++ b/libbuild2/config/operation.cxx @@ -1103,8 +1103,7 @@ namespace build2 // this information is stored). So what we are going to do is bootstrap // the newly created project, similar to the way main() does it. // - scope& gs (ctx.global_scope.rw ()); - scope& rs (load_project (gs, d, d, false /* fwd */, false /* load */)); + scope& rs (load_project (ctx, d, d, false /* fwd */, false /* load */)); // Add the default config.config.persist value unless there is a custom // one (specified as a command line override). @@ -1223,6 +1222,7 @@ namespace build2 rmod, "", /* root_post */ string ("config"), /* config_module */ + nullopt, /* config_file */ true, /* buildfile */ "the create meta-operation"); diff --git a/libbuild2/context.cxx b/libbuild2/context.cxx index 0be0046..a3455ea 100644 --- a/libbuild2/context.cxx +++ b/libbuild2/context.cxx @@ -340,8 +340,10 @@ namespace build2 // And so the first token should be a word which can be either a // variable name (potentially with the directory qualification) or just // the directory, in which case it should be followed by another word - // (unqualified variable name). + // (unqualified variable name). To avoid treating any of the visibility + // modifiers as special we use the cmdvar mode. // + l.mode (lexer_mode::cmdvar); token t (l.next ()); optional dir; @@ -892,6 +894,14 @@ namespace build2 } phase_switch:: + phase_switch (phase_unlock&& u, phase_lock&& l) + : old_phase (u.l->phase), new_phase (l.phase) + { + phase_lock_instance = u.l; // Disarms phase_lock + u.l = nullptr; // Disarms phase_unlock + } + + phase_switch:: ~phase_switch () noexcept (false) { phase_lock* pl (phase_lock_instance); diff --git a/libbuild2/context.hxx b/libbuild2/context.hxx index 573b8d1..2a9c93e 100644 --- a/libbuild2/context.hxx +++ b/libbuild2/context.hxx @@ -428,7 +428,7 @@ namespace build2 // const loaded_modules_lock* modules_lock; - // Nested context for updating build system modules. + // Nested context for updating build system modules and ad hoc recipes. // // Note that such a context itself should normally have modules_context // setup to point to itself (see import_module() for details). @@ -438,9 +438,10 @@ namespace build2 public: // If module_context is absent, then automatic updating of build system - // modules is disabled. If it is NULL, then the context will be created - // lazily if and when necessary. Otherwise, it should be a properly setup - // context (including, normally, a self-reference in modules_context). + // modules and ad hoc recipes is disabled. If it is NULL, then the context + // will be created lazily if and when necessary. Otherwise, it should be a + // properly setup context (including, normally, a self-reference in + // modules_context). // explicit context (scheduler&, @@ -553,9 +554,17 @@ namespace build2 // Assuming we have a lock on the current phase, temporarily switch to a // new phase and switch back on destruction. // + // The second constructor can be used for a switch with an intermittent + // unlock: + // + // phase_unlock pu; + // phase_lock pl; + // phase_switch ps (move (pu), move (pl)); + // struct LIBBUILD2_SYMEXPORT phase_switch { - explicit phase_switch (context&, run_phase); + phase_switch (context&, run_phase); + phase_switch (phase_unlock&&, phase_lock&&); ~phase_switch () noexcept (false); run_phase old_phase, new_phase; diff --git a/libbuild2/dist/operation.cxx b/libbuild2/dist/operation.cxx index 34dc747..8dd8a6e 100644 --- a/libbuild2/dist/operation.cxx +++ b/libbuild2/dist/operation.cxx @@ -319,7 +319,7 @@ namespace build2 ctx.current_operation (op_update, nullptr, false /* diag_noise */); - action a (perform_id, update_id); + action a (perform_update_id); mo_perform.match (params, a, files, 1 /* diag (failures only) */, diff --git a/libbuild2/dump.cxx b/libbuild2/dump.cxx index 8ee68b7..9f60900 100644 --- a/libbuild2/dump.cxx +++ b/libbuild2/dump.cxx @@ -3,6 +3,7 @@ #include +#include #include #include #include @@ -213,7 +214,90 @@ namespace build2 os << ind << t << ':'; - // First print target/rule-specific variables, if any. + // First check if this is the simple case where we can print everything + // as a single declaration. + // + const prerequisites& ps (t.prerequisites ()); + bool simple (true); + for (const prerequisite& p: ps) + { + if (!p.vars.empty ()) // Has prerequisite-specific vars. + { + simple = false; + break; + } + } + + // If the target has been matched to a rule, we also print resolved + // prerequisite targets. + // + // Note: running serial and task_count is 0 before any operation has + // started. + // + const prerequisite_targets* pts (nullptr); + { + action inner; // @@ Only for the inner part of the action currently. + + if (size_t c = t[inner].task_count.load (memory_order_relaxed)) + { + if (c == t.ctx.count_applied () || c == t.ctx.count_executed ()) + { + pts = &t.prerequisite_targets[inner]; + + bool f (false); + for (const target* pt: *pts) + { + if (pt != nullptr) + { + f = true; + break; + } + } + + if (!f) + pts = nullptr; + } + } + } + + auto print_pts = [&os, &ps, pts] () + { + for (const target* pt: *pts) + { + if (pt != nullptr) + os << ' ' << *pt; + } + + // Only omit '|' if we have no prerequisites nor targets. + // + if (!ps.empty ()) + { + os << " |"; + return true; + } + + return false; + }; + + if (simple) + { + if (pts != nullptr) + print_pts (); + + for (const prerequisite& p: ps) + { + // Print it as a target if one has been cached. + // + if (const target* t = p.target.load (memory_order_relaxed)) // Serial. + os << ' ' << *t; + else + os << ' ' << p; + } + } + + bool used (false); // Target header has been used. + + // Print target/rule-specific variables, if any. // { bool tv (!t.vars.empty ()); @@ -258,87 +342,78 @@ namespace build2 if (rel) stream_verb (os, nsv); - os << endl - << ind << t << ':'; + used = true; } } - bool used (false); // Target header has been used to display prerequisites. - - // If the target has been matched to a rule, first print resolved - // prerequisite targets. + // Then ad hoc recipes, if any. // - // Note: running serial and task_count is 0 before any operation has - // started. - // - action inner; // @@ Only for the inner part of the action currently. - - if (size_t c = t[inner].task_count.load (memory_order_relaxed)) + if (!t.adhoc_recipes.empty ()) { - if (c == t.ctx.count_applied () || c == t.ctx.count_executed ()) + for (const adhoc_recipe r: t.adhoc_recipes) { - bool f (false); - for (const target* pt: t.prerequisite_targets[inner]) - { - if (pt == nullptr) // Skipped. - continue; - - os << ' ' << *pt; - f = true; - } - - // Only omit '|' if we have no prerequisites nor targets. - // - if (f || !t.prerequisites ().empty ()) - { - os << " |"; - used = true; - } + os << endl; + r.rule->dump (os, ind); // @@ TODO: pass action(s). } + + used = true; } - // Print prerequisites. Those that have prerequisite-specific variables - // have to be printed as a separate dependency. - // - const prerequisites& ps (t.prerequisites ()); - for (auto i (ps.begin ()), e (ps.end ()); i != e; ) + if (!simple) { - const prerequisite& p (*i++); - bool ps (!p.vars.empty ()); // Has prerequisite-specific vars. - - if (ps && used) // If it has been used, get a new header. + if (used) + { os << endl << ind << t << ':'; - // Print it as a target if one has been cached. - // - if (const target* t = p.target.load (memory_order_relaxed)) // Serial. - os << ' ' << *t; - else - os << ' ' << p; + used = false; + } + + if (pts != nullptr) + used = print_pts () || used; - if (ps) + // Print prerequisites. Those that have prerequisite-specific variables + // have to be printed as a separate dependency. + // + for (auto i (ps.begin ()), e (ps.end ()); i != e; ) { - if (rel) - stream_verb (os, osv); // We want variable values in full. + const prerequisite& p (*i++); + bool ps (!p.vars.empty ()); // Has prerequisite-specific vars. - os << ':' << endl - << ind << '{'; - ind += " "; - dump_variables (os, ind, p.vars, s, variable_kind::prerequisite); - ind.resize (ind.size () - 2); - os << endl - << ind << '}'; + if (ps && used) // If it has been used, get a new header. + os << endl + << ind << t << ':'; - if (rel) - stream_verb (os, nsv); + // Print it as a target if one has been cached. + // + if (const target* t = p.target.load (memory_order_relaxed)) // Serial. + os << ' ' << *t; + else + os << ' ' << p; - if (i != e) // If we have another, get a new header. + if (ps) + { + if (rel) + stream_verb (os, osv); // We want variable values in full. + + os << ':' << endl + << ind << '{'; + ind += " "; + dump_variables (os, ind, p.vars, s, variable_kind::prerequisite); + ind.resize (ind.size () - 2); os << endl - << ind << t << ':'; - } + << ind << '}'; + + if (rel) + stream_verb (os, nsv); - used = !ps; + if (i != e) // If we have another, get a new header. + os << endl + << ind << t << ':'; + } + + used = !ps; + } } if (rel) diff --git a/libbuild2/file.cxx b/libbuild2/file.cxx index 0bcb198..571980e 100644 --- a/libbuild2/file.cxx +++ b/libbuild2/file.cxx @@ -17,7 +17,8 @@ #include #include -#include // lookup_config() +#include // config::module::version +#include // config::lookup_config() using namespace std; using namespace butl; @@ -310,13 +311,13 @@ namespace build2 } scope_map::iterator - create_root (scope& s, const dir_path& out_root, const dir_path& src_root) + create_root (context& ctx, + const dir_path& out_root, + const dir_path& src_root) { - auto i (s.ctx.scopes.rw (s).insert (out_root, true /* root */)); + auto i (ctx.scopes.rw ().insert (out_root, true /* root */)); scope& rs (i->second); - context& ctx (rs.ctx); - // Set out_path. Note that src_path is set in setup_root() below. // if (rs.out_path_ != &i->first) @@ -1208,7 +1209,7 @@ namespace build2 // probably be tried first since that src_root was explicitly configured // by the user. After that, #2 followed by #1 seems reasonable. // - scope& rs (create_root (root, out_root, dir_path ())->second); + scope& rs (create_root (ctx, out_root, dir_path ())->second); bool bstrapped (bootstrapped (rs)); @@ -1275,7 +1276,7 @@ namespace build2 // The same logic to src_root as in create_bootstrap_outer(). // - scope& rs (create_root (root, out_root, dir_path ())->second); + scope& rs (create_root (ctx, out_root, dir_path ())->second); optional altn; if (!bootstrapped (rs)) @@ -1466,17 +1467,16 @@ namespace build2 } scope& - load_project (scope& s, + load_project (context& ctx, const dir_path& out_root, const dir_path& src_root, bool forwarded, bool load) { + assert (ctx.phase == run_phase::load); assert (!forwarded || out_root != src_root); - context& ctx (s.ctx); - - auto i (create_root (s, out_root, src_root)); + auto i (create_root (ctx, out_root, src_root)); scope& rs (i->second); if (!bootstrapped (rs)) @@ -2065,13 +2065,11 @@ namespace build2 fwd = (src_root != out_root); } - scope& gs (ctx.global_scope.rw ()); - for (const scope* proot (nullptr); ; proot = root) { bool top (proot == nullptr); - root = &create_root (gs, out_root, src_root)->second; + root = &create_root (ctx, out_root, src_root)->second; bool bstrapped (bootstrapped (*root)); @@ -2153,6 +2151,8 @@ namespace build2 // load_root (*root); + scope& gs (ctx.global_scope.rw ()); + // Use a temporary scope so that the export stub doesn't mess anything up. // temp_scope ts (gs); @@ -2555,11 +2555,14 @@ namespace build2 const string& rpre, const strings& rmod, const string& rpos, - const optional& config, + const optional& config_mod, + const optional& config_file, bool buildfile, const char* who, uint16_t verbosity) { + assert (!config_file || (config_mod && *config_mod == "config")); + string hdr ("# Generated by " + string (who) + ". Edit if you know" " what you are doing.\n" "#"); @@ -2610,12 +2613,12 @@ namespace build2 ofs << endl; - if (config) - ofs << "using " << *config << endl; + if (config_mod) + ofs << "using " << *config_mod << endl; for (const string& m: bmod) { - if (!config || m != *config) + if (!config_mod || m != *config_mod) ofs << "using " << m << endl; } @@ -2675,6 +2678,32 @@ namespace build2 } } + // Write build/config.build. + // + if (config_file) + { + path f (d / std_build_dir / "config.build"); // std_config_file + + if (verb >= verbosity) + text << (verb >= 2 ? "cat >" : "save ") << f; + + try + { + ofdstream ofs (f); + + ofs << hdr << endl + << "config.version = " << config::module::version << endl + << endl + << *config_file << endl; + + ofs.close (); + } + catch (const io_error& e) + { + fail << "unable to write to " << f << ": " << e; + } + } + // Write root buildfile. // if (buildfile) diff --git a/libbuild2/file.hxx b/libbuild2/file.hxx index 0123591..78be600 100644 --- a/libbuild2/file.hxx +++ b/libbuild2/file.hxx @@ -106,11 +106,10 @@ namespace build2 source_once (scope& root, scope& base, const path&, scope& once); // Create project's root scope. Only set the src_root variable if the passed - // src_root value is not empty. The scope argument is only used for context - // and as a proof of lock. + // src_root value is not empty. // LIBBUILD2_SYMEXPORT scope_map::iterator - create_root (scope&, const dir_path& out_root, const dir_path& src_root); + create_root (context&, const dir_path& out_root, const dir_path& src_root); // Setup root scope. Note that it assumes the src_root variable has already // been set. @@ -142,10 +141,8 @@ namespace build2 // loaded and currently we do not add the newly loaded subproject to the // outer project's subprojects map. // - // The scope argument is only used as proof of lock. - // LIBBUILD2_SYMEXPORT scope& - load_project (scope&, + load_project (context&, const dir_path& out_root, const dir_path& src_root, bool forwarded, @@ -441,6 +438,7 @@ namespace build2 const strings& root_modules, // Root modules. const string& root_post, // Extra root.build text. const optional& config_module, // Config module to load. + const optional& config_file, // Ad hoc config.build contents. bool buildfile, // Create root buildfile. const char* who, // Who is creating it. uint16_t verbosity = 1); // Diagnostic verbosity. diff --git a/libbuild2/functions-name.cxx b/libbuild2/functions-name.cxx index 283b1a6..70659ee 100644 --- a/libbuild2/functions-name.cxx +++ b/libbuild2/functions-name.cxx @@ -4,6 +4,7 @@ #include #include #include +#include using namespace std; @@ -14,7 +15,7 @@ namespace build2 // out of scope). See scope::find_target_type() for details. // static pair> - to_target (const scope* s, name&& n) + to_target_name (const scope* s, name&& n) { optional e; @@ -31,74 +32,129 @@ namespace build2 return make_pair (move (n), move (e)); } + static const target& + to_target (const scope& s, name&& n, name&& o) + { + if (const target* r = search_existing (n, s, o.dir)) + return *r; + + fail << "target " + << (n.pair ? names {move (n), move (o)} : names {move (n)}) + << " not found" << endf; + } + void name_functions (function_map& m) { - function_family f (m, "name"); - // These functions treat a name as a target/prerequisite name. // // While on one hand it feels like calling them target.name(), etc., would // have been more appropriate, on the other hand they can also be called // on prerequisite names. They also won't always return the same result as // if we were interrogating an actual target (e.g., the directory may be - // relative). + // relative). Plus we now have functions that can only be called on + // targets (see below). // - f["name"] = [](const scope* s, name n) + function_family fn (m, "name"); + + fn["name"] = [](const scope* s, name n) { - return to_target (s, move (n)).first.value; + return to_target_name (s, move (n)).first.value; }; - f["name"] = [](const scope* s, names ns) + fn["name"] = [](const scope* s, names ns) { - return to_target (s, convert (move (ns))).first.value; + return to_target_name (s, convert (move (ns))).first.value; }; // Note: returns NULL if extension is unspecified (default) and empty if // specified as no extension. // - f["extension"] = [](const scope* s, name n) + fn["extension"] = [](const scope* s, name n) { - return to_target (s, move (n)).second; + return to_target_name (s, move (n)).second; }; - f["extension"] = [](const scope* s, names ns) + fn["extension"] = [](const scope* s, names ns) { - return to_target (s, convert (move (ns))).second; + return to_target_name (s, convert (move (ns))).second; }; - f["directory"] = [](const scope* s, name n) + fn["directory"] = [](const scope* s, name n) { - return to_target (s, move (n)).first.dir; + return to_target_name (s, move (n)).first.dir; }; - f["directory"] = [](const scope* s, names ns) + fn["directory"] = [](const scope* s, names ns) { - return to_target (s, convert (move (ns))).first.dir; + return to_target_name (s, convert (move (ns))).first.dir; }; - f["target_type"] = [](const scope* s, name n) + fn["target_type"] = [](const scope* s, name n) { - return to_target (s, move (n)).first.type; + return to_target_name (s, move (n)).first.type; }; - f["target_type"] = [](const scope* s, names ns) + fn["target_type"] = [](const scope* s, names ns) { - return to_target (s, convert (move (ns))).first.type; + return to_target_name (s, convert (move (ns))).first.type; }; // Note: returns NULL if no project specified. // - f["project"] = [](const scope* s, name n) + fn["project"] = [](const scope* s, name n) { - return to_target (s, move (n)).first.proj; + return to_target_name (s, move (n)).first.proj; }; - f["project"] = [](const scope* s, names ns) + fn["project"] = [](const scope* s, names ns) { - return to_target (s, convert (move (ns))).first.proj; + return to_target_name (s, convert (move (ns))).first.proj; + }; + + // Functions that can be called only on real targets. + // + function_family ft (m, "target"); + + fn["path"] = [](const scope* s, names ns) + { + if (s == nullptr) + fail << "target.path() called out of scope" << endf; + + // Most of the time we will have a single target so optimize for that. + // + small_vector r; + + for (auto i (ns.begin ()); i != ns.end (); ++i) + { + name& n (*i), o; + const target& t (to_target (*s, move (n), move (n.pair ? *++i : o))); + + if (const auto* pt = t.is_a ()) + { + const path& p (pt->path ()); + + if (&p != &empty_path) + r.push_back (p); + else + fail << "target " << t << " path is not assigned"; + } + else + fail << "target " << t << " is not path-based"; + } + + // We want the result to be path if we were given a single target and + // paths if multiple (or zero). The problem is, we cannot distinguish it + // based on the argument type (e.g., name vs names) since passing an + // out-qualified single target requires two names. + // + if (r.size () == 1) + return value (move (r[0])); + + return value (paths (make_move_iterator (r.begin ()), + make_move_iterator (r.end ()))); }; // Name-specific overloads from builtins. // - function_family b (m, "builtin"); + function_family fb (m, "builtin"); - b[".concat"] = [](dir_path d, name n) + fb[".concat"] = [](dir_path d, name n) { d /= n.dir; n.dir = move (d); diff --git a/libbuild2/lexer+foreign.test.testscript b/libbuild2/lexer+foreign.test.testscript new file mode 100644 index 0000000..94c83c1 --- /dev/null +++ b/libbuild2/lexer+foreign.test.testscript @@ -0,0 +1,96 @@ +# file : libbuild2/lexer+foreign.test.testscript +# license : MIT; see accompanying LICENSE file + +test.arguments = foreign=2 + +: basics +: +$* <>EOO +echo foo +}} +EOI +'echo foo +' +}} + +EOO + +: empty +: +$* <>EOO +}} +EOI +'' +}} + +EOO + +: braces +: +$* <>EOO +} +}}} +{{}} +}} } +}} +EOI +'} +}}} +{{}} +}} } +' +}} + +EOO + +: whitespaces +: +$* <' }} ' >>EOO # Note: there are TABs. +'' +}} + +EOO + +: comment +: +$* <'}} # comment' >>EOO +'' +}} + +EOO + +: eos +: +$* <:'}}' >>EOO +'' +}} +EOO + +: missing +: Note that we get eos right away (i.e., there is no word token). +: +$* <>EOO + echo foo + }}} + EOI + ' echo foo + ' + }}} + + EOO +} diff --git a/libbuild2/lexer+normal.test.testscript b/libbuild2/lexer+normal.test.testscript index c9448c3..e2780a2 100644 --- a/libbuild2/lexer+normal.test.testscript +++ b/libbuild2/lexer+normal.test.testscript @@ -34,3 +34,57 @@ $* <:'x?=y' >>EOO ?= 'y' EOO + +: percent +: Leading percent sign recognition. +: +{ + : first + : + $* <:'%%' >>EOO + % + '%' + EOO + + : space + : + $* <:' %%' >>EOO + % + '%' + EOO + + : newline + : + $* <>EOO + + %% + EOI + % + '%' + + EOO + + : non-token + : + $* <:'x%' >>EOO + 'x%' + EOO +} + +: multi-lcbrace +: Leading multi-curly-brace recognition. +: +{ + : two + : + $* <:'{{' >>EOO + {{ + EOO + + : three + : + $* <:'{{{c++' >>EOO + {{{ + 'c++' + EOO +} diff --git a/libbuild2/lexer+quoting.test.testscript b/libbuild2/lexer+quoting.test.testscript index debefc1..0143c90 100644 --- a/libbuild2/lexer+quoting.test.testscript +++ b/libbuild2/lexer+quoting.test.testscript @@ -47,8 +47,10 @@ EOO } : part +: { : quoted + : { : start : Token start already quoted diff --git a/libbuild2/lexer.cxx b/libbuild2/lexer.cxx index c0cadd3..6d3504c 100644 --- a/libbuild2/lexer.cxx +++ b/libbuild2/lexer.cxx @@ -14,7 +14,10 @@ namespace build2 pair, bool> lexer:: peek_chars () { - sep_ = skip_spaces (); + auto p (skip_spaces ()); + assert (!p.second); + sep_ = p.first; + char r[2] = {'\0', '\0'}; xchar c0 (peek ()); @@ -34,7 +37,7 @@ namespace build2 } void lexer:: - mode (lexer_mode m, char ps, optional esc) + mode (lexer_mode m, char ps, optional esc, uintptr_t data) { bool a (false); // attributes @@ -54,7 +57,11 @@ namespace build2 switch (m) { case lexer_mode::normal: + case lexer_mode::cmdvar: { + // Note: `%` is only recognized at the beginning of the line so it + // should not be included here. + // a = true; s1 = ":<>=+? $(){}#\t\n"; s2 = " == "; @@ -121,10 +128,16 @@ namespace build2 n = false; break; } + case lexer_mode::foreign: + assert (data > 1); + // Fall through. case lexer_mode::single_quoted: case lexer_mode::double_quoted: - s = false; - // Fall through. + { + assert (ps == '\0'); + s = false; + break; + } case lexer_mode::variable: { // These are handled in an ad hoc way in word(). @@ -134,7 +147,7 @@ namespace build2 default: assert (false); // Unhandled custom mode. } - state_.push (state {m, a, ps, s, n, q, *esc, s1, s2}); + state_.push (state {m, data, nullopt, a, ps, s, n, q, *esc, s1, s2}); } token lexer:: @@ -148,6 +161,7 @@ namespace build2 switch (m) { case lexer_mode::normal: + case lexer_mode::cmdvar: case lexer_mode::value: case lexer_mode::values: case lexer_mode::switch_expressions: @@ -158,10 +172,13 @@ namespace build2 case lexer_mode::buildspec: break; case lexer_mode::eval: return next_eval (); case lexer_mode::double_quoted: return next_quoted (); + case lexer_mode::foreign: return next_foreign (); default: assert (false); // Unhandled custom mode. } - bool sep (skip_spaces ()); + pair skip (skip_spaces ()); + bool sep (skip.first); // Separated from a previous character. + bool first (skip.second); // First non-whitespace character of a line. xchar c (get ()); uint64_t ln (c.line), cn (c.column); @@ -209,7 +226,8 @@ namespace build2 m == lexer_mode::case_patterns) state_.pop (); - // Re-enable attributes in the normal mode. + // Re-enable attributes in the normal mode (should never be needed in + // cmdvar). // if (state_.top ().mode == lexer_mode::normal) state_.top ().attributes = true; @@ -230,6 +248,32 @@ namespace build2 } } + // Line-leading tokens in the normal mode. + // + // Note: must come before any other (e.g., `{`) tests below. + // + if (m == lexer_mode::normal && first) + { + switch (c) + { + case '%': return make_token (type::percent); + case '{': + { + string v; + while (peek () == '{') + v += get (); + + if (!v.empty ()) + { + v += '{'; + return make_token (type::multi_lcbrace, move (v)); + } + + break; + } + } + } + // The following characters are special in all modes except attributes. // if (m != lexer_mode::attributes && m != lexer_mode::attribute_value) @@ -267,6 +311,7 @@ namespace build2 // switch_expressions modes. // if (m == lexer_mode::normal || + m == lexer_mode::cmdvar || m == lexer_mode::switch_expressions || m == lexer_mode::case_patterns) { @@ -278,7 +323,8 @@ namespace build2 // The following characters are special in the normal mode. // - if (m == lexer_mode::normal) + if (m == lexer_mode::normal || + m == lexer_mode::cmdvar) { switch (c) { @@ -315,7 +361,8 @@ namespace build2 // The following characters are special in the normal mode. // - if (m == lexer_mode::normal) + if (m == lexer_mode::normal || + m == lexer_mode::cmdvar) { switch (c) { @@ -361,7 +408,7 @@ namespace build2 // This mode is quite a bit like the value mode when it comes to special // characters, except that we have some of our own. - bool sep (skip_spaces ()); + bool sep (skip_spaces ().first); xchar c (get ()); if (eos (c)) @@ -485,6 +532,99 @@ namespace build2 } token lexer:: + next_foreign () + { + state& st (state_.top ()); + + if (st.hold) + { + token r (move (*st.hold)); + state_.pop (); // Expire foreign mode. + return r; + } + + auto count (state_.top ().data); // Number of closing braces to expect. + + xchar c (get ()); // First character of first line after `{{...`. + uint64_t ln (c.line), cn (c.column); + + string lexeme; + for (bool first (true); !eos (c); c = get ()) + { + // If this is the first character of a line, recognize closing braces. + // + if (first) + { + first = false; + + // If this turns not to be the closing braces, we need to add any + // characters we have extracted to lexeme. Instead of saving these + // characters in a temporary we speculatively add them to the lexeme + // but then chop them off if this turned out to be the closing braces. + // + size_t chop (lexeme.size ()); + + // Skip leading whitespaces, if any. + // + for (; c == ' ' || c == '\t'; c = get ()) + lexeme += c; + + uint64_t bln (c.line), bcn (c.column); // Position of first `}`. + + // Count braces. + // + auto i (count); + for (; c == '}'; c = get ()) + { + lexeme += c; + + if (--i == 0) + break; + } + + if (i == 0) // Got enough braces. + { + // Make sure there are only whitespaces/comments after. Note that + // now we must start peeking since newline is not "ours". + // + for (c = peek (); c == ' ' || c == '\t'; c = peek ()) + lexeme += get (); + + if (c == '\n' || c == '#' || eos (c)) + { + st.hold = token (type::multi_rcbrace, + string (count, '}'), + false, quote_type::unquoted, false, + bln, bcn, + token_printer); + + lexeme.resize (chop); + return token (move (lexeme), + false, quote_type::unquoted, false, + ln, cn); + } + + get (); // And fall through (not eos). + } + else + { + if (eos (c)) + break; + + // Fall through. + } + } + + if (c == '\n') + first = true; + + lexeme += c; + } + + return token (type::eos, false, c.line, c.column, token_printer); + } + + token lexer:: word (state st, bool sep) { lexer_mode m (st.mode); @@ -728,7 +868,7 @@ namespace build2 return token (move (lexeme), sep, qtype, qcomp, ln, cn); } - bool lexer:: + pair lexer:: skip_spaces () { bool r (sep_); @@ -739,7 +879,7 @@ namespace build2 // In some special modes we don't skip spaces. // if (!s.sep_space) - return r; + return make_pair (r, false); xchar c (peek ()); bool start (c.column == 1); @@ -758,6 +898,8 @@ namespace build2 { // In some modes we treat newlines as ordinary spaces. // + // Note that in this case we don't adjust start. + // if (!s.sep_newline) { r = true; @@ -772,7 +914,7 @@ namespace build2 break; } - return r; + return make_pair (r, start); } case '#': { @@ -833,12 +975,12 @@ namespace build2 } // Fall through. default: - return r; // Not a space. + return make_pair (r, start); // Not a space. } get (); } - return r; + return make_pair (r, start); } } diff --git a/libbuild2/lexer.hxx b/libbuild2/lexer.hxx index 02112cb..749668e 100644 --- a/libbuild2/lexer.hxx +++ b/libbuild2/lexer.hxx @@ -20,17 +20,18 @@ namespace build2 { - // Context-dependent lexing mode. Quoted modes are internal and should not - // be set explicitly. In the value mode we don't treat certain characters - // (e.g., `+`, `=`) as special so that we can use them in the variable - // values, e.g., `foo = g++`. In contrast, in the variable mode, we restrict - // certain character (e.g., `/`) from appearing in the name. The values mode - // is like value but recogizes `,` as special (used in contexts where we - // need to list multiple values). The attributes/attribute_value modes are - // like values where each value is potentially a variable assignment; they - // don't treat `{` and `}` as special (so we cannot have name groups in - // attributes) as well as recognizes `=` and `]`. The eval mode is used in - // the evaluation context. + // Context-dependent lexing mode. + // + // Quoted modes are internal and should not be set explicitly. In the value + // mode we don't treat certain characters (e.g., `+`, `=`) as special so + // that we can use them in the variable values, e.g., `foo = g++`. In + // contrast, in the variable mode, we restrict certain character (e.g., `/`) + // from appearing in the name. The values mode is like value but recogizes + // `,` as special (used in contexts where we need to list multiple + // values). The attributes/attribute_value modes are like values where each + // value is potentially a variable assignment; they don't treat `{` and `}` + // as special (so we cannot have name groups in attributes) as well as + // recognizes `=` and `]`. The eval mode is used in the evaluation context. // // A number of modes are "derived" from the value/values mode by recognizing // a few extra characters: @@ -42,10 +43,22 @@ namespace build2 // split words separated by the pair character (to disable pairs one can // pass `\0` as a pair character). // + // The normal mode recognizes `%` and `{{...` at the beginning of the line + // as special. The cmdvar mode is like normal but does not treat these + // character sequences as special. + // + // Finally, the foreign mode reads everything until encountering a line that + // contains nothing (besides whitespaces) other than the closing multi- + // curly-brace (`}}...`) (or eos) returning the contents as the word token + // followed by the multi_rcbrace (or eos). In a way it is similar to the + // single-quote mode. The number of closing braces to expect is passed as + // mode data. + // // The alternative modes must be set manually. The value/values and derived // modes automatically expires after the end of the line. The attribute mode // expires after the closing `]`. The variable mode expires after the word - // token. And the eval mode expires after the closing `)`. + // token. The eval mode expires after the closing `)`. And the foreign mode + // expires after the closing braces. // // Note that normally it is only safe to switch mode when the current token // is not quoted (or, more generally, when you are not in the double-quoted @@ -70,6 +83,7 @@ namespace build2 enum { normal = base_type::value_next, + cmdvar, variable, value, values, @@ -80,6 +94,7 @@ namespace build2 eval, single_quoted, double_quoted, + foreign, buildspec, value_next @@ -91,7 +106,7 @@ namespace build2 }; class LIBBUILD2_SYMEXPORT lexer: - public butl::char_scanner + public butl::char_scanner { public: // If escape is not NULL then only escape sequences with characters from @@ -116,7 +131,8 @@ namespace build2 virtual void mode (lexer_mode, char pair_separator = '\0', - optional escapes = nullopt); + optional escapes = nullopt, + uintptr_t data = 0); // Enable attributes recognition for the next token. // @@ -157,7 +173,10 @@ namespace build2 protected: struct state { - lexer_mode mode; + lexer_mode mode; + uintptr_t data; + optional hold; + bool attributes; char sep_pair; @@ -183,17 +202,22 @@ namespace build2 token next_quoted (); + token + next_foreign (); + // Lex a word assuming current is the top state (which may already have // been "expired" from the top). // virtual token word (state current, bool separated); - // Return true if we have seen any spaces. Skipped empty lines - // don't count. In other words, we are only interested in spaces - // that are on the same line as the following non-space character. + // Return true in first if we have seen any spaces. Skipped empty lines + // don't count. In other words, we are only interested in spaces that are + // on the same line as the following non-space character. Return true in + // second if we have started skipping spaces from column 1 (note that + // if this mode does not skip spaces, then second will always be false). // - bool + pair skip_spaces (); // Diagnostics. @@ -232,7 +256,7 @@ namespace build2 namespace butl // ADL { inline build2::location - get_location (const butl::char_scanner::xchar& c, + get_location (const butl::char_scanner::xchar& c, const void* data) { using namespace build2; diff --git a/libbuild2/lexer.test.cxx b/libbuild2/lexer.test.cxx index 5e39e43..24f0528 100644 --- a/libbuild2/lexer.test.cxx +++ b/libbuild2/lexer.test.cxx @@ -1,6 +1,7 @@ // file : libbuild2/lexer.test.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file +#include // strtoul() #include #include @@ -14,13 +15,15 @@ using namespace std; namespace build2 { - // Usage: argv[0] [-q] [] + // Usage: argv[0] [-q] [[=]] // int main (int argc, char* argv[]) { bool quote (false); + lexer_mode m (lexer_mode::normal); + uintptr_t d (0); for (int i (1); i != argc; ++i) { @@ -36,7 +39,12 @@ namespace build2 else if (a == "attributes") m = lexer_mode::attributes; else if (a == "eval") m = lexer_mode::eval; else if (a == "buildspec") m = lexer_mode::buildspec; - else assert (false); + else if (a.compare (0, 8, "foreign=") == 0) + { + m = lexer_mode::foreign; + d = strtoul (a.c_str () + 8, nullptr, 10); + } + else assert (false); break; } } @@ -51,7 +59,7 @@ namespace build2 lexer l (cin, in); if (m != lexer_mode::normal) - l.mode (m); + l.mode (m, '\0', nullopt, d); // No use printing eos since we will either get it or loop forever. // @@ -62,7 +70,7 @@ namespace build2 // Print each token on a separate line without quoting operators. // - t.printer (cout, t, false); + t.printer (cout, t, print_mode::normal); if (quote) { diff --git a/libbuild2/module.cxx b/libbuild2/module.cxx index 3abb102..cfedefe 100644 --- a/libbuild2/module.cxx +++ b/libbuild2/module.cxx @@ -63,6 +63,162 @@ namespace build2 mod); } + // Note: also used by ad hoc recipes thus not static. + // + void + create_module_context (context& ctx, const location& loc) + { + assert (ctx.module_context == nullptr); + assert (*ctx.module_context_storage == nullptr); + + // Since we are using the same scheduler, it makes sense to reuse the + // same global mutexes. Also disable nested module context for good + // measure. + // + ctx.module_context_storage->reset ( + new context (ctx.sched, + ctx.mutexes, + false, /* match_only */ + false, /* dry_run */ + ctx.keep_going, + ctx.global_var_overrides, /* cmd_vars */ + nullopt)); /* module_context */ + + // We use the same context for building any nested modules that might be + // required while building modules. + // + ctx.module_context = ctx.module_context_storage->get (); + ctx.module_context->module_context = ctx.module_context; + + // Setup the context to perform update. In a sense we have a long-running + // perform meta-operation batch (indefinite, in fact, since we never call + // the meta-operation's *_post() callbacks) in which we periodically + // execute update operations. + // + // Note that we perform each build in a separate update operation. Failed + // that, if the same target is update twice (which may happen with ad hoc + // recipes) we will see the old state. + // + if (mo_perform.meta_operation_pre != nullptr) + mo_perform.meta_operation_pre ({} /* parameters */, loc); + + ctx.module_context->current_meta_operation (mo_perform); + + if (mo_perform.operation_pre != nullptr) + mo_perform.operation_pre ({} /* parameters */, update_id); + } + + // Note: also used by ad hoc recipes thus not static. + // + const target& + update_in_module_context (context& ctx, const scope& rs, names tgt, + const location& loc, const path& bf) + { + // New update operation. + // + ctx.module_context->current_operation (op_update); + + // Un-tune the scheduler. + // + // Note that we can only do this if we are running serially because + // otherwise we cannot guarantee the scheduler is idle (we could have + // waiting threads from the outer context). This is fine for now since the + // only two tuning level we use are serial and full concurrency (turns out + // currently we don't really need this: we will always be called during + // load or match phases and we always do parallel match; but let's keep it + // in case things change). + // + auto sched_tune (ctx.sched.serial () + ? scheduler::tune_guard (ctx.sched, 0) + : scheduler::tune_guard ()); + + // Remap verbosity level 0 to 1 unless we were requested to be silent. + // Failed that, we may have long periods of seemingly nothing happening + // while we quietly update the module, which may look like things have + // hung up. + // + // @@ CTX: modifying global verbosity level won't work if we have multiple + // top-level contexts running in parallel. + // + auto verbg = make_guard ( + [z = !silent && verb == 0 ? (verb = 1, true) : false] () + { + if (z) + verb = 0; + }); + + // Note that for now we suppress progress since it would clash with the + // progress of what we are already doing (maybe in the future we can do + // save/restore but then we would need some sort of diagnostics that we + // have switched to another task). + // + action a (perform_update_id); + action_targets tgs; + + mo_perform.search ({}, /* parameters */ + rs, /* root scope */ + rs, /* base scope */ + bf, /* buildfile */ + rs.find_target_key (tgt, loc), + loc, + tgs); + + mo_perform.match ({}, /* parameters */ + a, + tgs, + 1, /* diag (failures only) */ + false /* progress */); + + mo_perform.execute ({}, /* parameters */ + a, + tgs, + 1, /* diag (failures only) */ + false /* progress */); + + assert (tgs.size () == 1); + return tgs[0].as (); + } + + // Note: also used by ad hoc recipes thus not static. + // + pair + load_module_library (const path& lib, const string& sym, string& err) + { + // Note that we don't unload our modules since it's not clear what would + // the benefit be. + // + void* h (nullptr); + void* s (nullptr); + +#ifndef _WIN32 + // Use RTLD_NOW instead of RTLD_LAZY to both speed things up (we are going + // to use this module now) and to detect any symbol mismatches. + // + if ((h = dlopen (lib.string ().c_str (), RTLD_NOW | RTLD_GLOBAL))) + { + s = dlsym (h, sym.c_str ()); + + if (s == nullptr) + err = dlerror (); + } + else + err = dlerror (); +#else + if (HMODULE m = LoadLibrary (lib.string ().c_str ())) + { + h = static_cast (m); + s = function_cast (GetProcAddress (m, sym.c_str ())); + + if (s == nullptr) + err = win32::last_error_msg (); + } + else + err = win32::last_error_msg (); +#endif + + return make_pair (h, s); + } + static module_load_function* import_module (scope& bs, const string& mod, @@ -180,43 +336,9 @@ namespace build2 { if (!ctx.module_context_storage) fail (loc) << "unable to update build system module " << mod << - info << "updating of build system modules is disabled"; - - assert (*ctx.module_context_storage == nullptr); - - // Since we are using the same scheduler, it makes sense to reuse the - // same global mutexes. Also disable nested module context for good - // measure. - // - ctx.module_context_storage->reset ( - new context (ctx.sched, - ctx.mutexes, - false, /* match_only */ - false, /* dry_run */ - ctx.keep_going, - ctx.global_var_overrides, /* cmd_vars */ - nullopt)); /* module_context */ - - // We use the same context for building any nested modules that - // might be required while building modules. - // - ctx.module_context = ctx.module_context_storage->get (); - ctx.module_context->module_context = ctx.module_context; - - // Setup the context to perform update. In a sense we have a long- - // running perform meta-operation batch (indefinite, in fact, since we - // never call the meta-operation's *_post() callbacks) in which we - // periodically execute the update operation. - // - if (mo_perform.meta_operation_pre != nullptr) - mo_perform.meta_operation_pre ({} /* parameters */, loc); + info << "building of build system modules is disabled"; - ctx.module_context->current_meta_operation (mo_perform); - - if (mo_perform.operation_pre != nullptr) - mo_perform.operation_pre ({} /* parameters */, update_id); - - ctx.module_context->current_operation (op_update); + create_module_context (ctx, loc); } // Inherit loaded_modules lock from the outer context. @@ -234,24 +356,20 @@ namespace build2 l5 ([&]{trace << "loaded " << lr.first;}); - // When happens next depends on whether this is a top-level or nested + // What happens next depends on whether this is a top-level or nested // module update. // if (nested) { // This could be initial or exclusive load. // - // @@ TODO + // @@ TODO: see the ad hoc recipe case as a reference. // fail (loc) << "nested build system module updates not yet supported"; } else { - const scope& rs (lr.second); - - action_targets tgs; - action a (perform_id, update_id); - + const target* l; { // Cutoff the existing diagnostics stack and push our own entry. // @@ -263,68 +381,15 @@ namespace build2 dr << info (loc) << "while loading build system module " << mod; }); - // Un-tune the scheduler. - // - // Note that we can only do this if we are running serially because - // otherwise we cannot guarantee the scheduler is idle (we could - // have waiting threads from the outer context). This is fine for - // now since the only two tuning level we use are serial and full - // concurrency (turns out currently we don't really need this: we - // will always be called during load or match phases and we always - // do parallel match; but let's keep it in case things change). - // - auto sched_tune (ctx.sched.serial () - ? scheduler::tune_guard (ctx.sched, 0) - : scheduler::tune_guard ()); - - // Remap verbosity level 0 to 1 unless we were requested to be - // silent. Failed that, we may have long periods of seemingly - // nothing happening while we quietly update the module, which - // may look like things have hung up. - // - // @@ CTX: modifying global verbosity level won't work if we have - // multiple top-level contexts running in parallel. - // - auto verbg = make_guard ( - [z = !silent && verb == 0 ? (verb = 1, true) : false] () - { - if (z) - verb = 0; - }); - - // Note that for now we suppress progress since it would clash with - // the progress of what we are already doing (maybe in the future we - // can do save/restore but then we would need some sort of - // diagnostics that we have switched to another task). - // - mo_perform.search ({}, /* parameters */ - rs, /* root scope */ - rs, /* base scope */ - path (), /* buildfile */ - rs.find_target_key (lr.first, loc), - loc, - tgs); - - mo_perform.match ({}, /* parameters */ - a, - tgs, - 1, /* diag (failures only) */ - false /* progress */); - - mo_perform.execute ({}, /* parameters */ - a, - tgs, - 1, /* diag (failures only) */ - false /* progress */); + l = &update_in_module_context ( + ctx, lr.second, move (lr.first), + loc, path ()); } - assert (tgs.size () == 1); - const target& l (tgs[0].as ()); - - if (!l.is_a ("libs")) + if (!l->is_a ("libs")) fail (loc) << "wrong export from build system module " << mod; - lib = l.as ().path (); + lib = l->as ().path (); l5 ([&]{trace << "updated " << lib;}); } @@ -364,53 +429,30 @@ namespace build2 // string sym (sanitize_identifier ("build2_" + mod + "_load")); - // Note that we don't unload our modules since it's not clear what would - // the benefit be. - // - diag_record dr; + string err; + pair hs (load_module_library (lib, sym, err)); -#ifndef _WIN32 - // Use RTLD_NOW instead of RTLD_LAZY to both speed things up (we are going - // to use this module now) and to detect any symbol mismatches. - // - if (void* h = dlopen (lib.string ().c_str (), RTLD_NOW | RTLD_GLOBAL)) + if (hs.first != nullptr) { - r = function_cast (dlsym (h, sym.c_str ())); - // I don't think we should ignore this even if the module is optional. // - if (r == nullptr) + if (hs.second == nullptr) fail (loc) << "unable to lookup " << sym << " in build system module " - << mod << " (" << lib << "): " << dlerror (); + << mod << " (" << lib << "): " << err; + + r = function_cast (hs.second); } else if (!opt) - dr << fail (loc) << "unable to load build system module " << mod - << " (" << lib << "): " << dlerror (); - else - l5 ([&]{trace << "unable to load " << lib << ": " << dlerror ();}); -#else - if (HMODULE h = LoadLibrary (lib.string ().c_str ())) { - r = function_cast ( - GetProcAddress (h, sym.c_str ())); - - if (r == nullptr) - fail (loc) << "unable to lookup " << sym << " in build system module " - << mod << " (" << lib << "): " << win32::last_error_msg (); + // Add import suggestion similar to import phase 2. + // + fail (loc) << "unable to load build system module " << mod << " (" + << lib << "): " << err << + info << "use config.import." << proj.variable () << " command " + << "line variable to specify its project out_root"; } - else if (!opt) - dr << fail (loc) << "unable to load build system module " << mod - << " (" << lib << "): " << win32::last_error_msg (); else - l5 ([&]{trace << "unable to load " << lib << ": " - << win32::last_error_msg ();}); -#endif - - // Add a suggestion similar to import phase 2. - // - if (!dr.empty ()) - dr << info << "use config.import." << proj.variable () << " command " - << "line variable to specify its project out_root" << endf; + l5 ([&]{trace << "unable to load " << lib << ": " << err;}); #endif // BUILD2_BOOTSTRAP diff --git a/libbuild2/name.hxx b/libbuild2/name.hxx index d0e8d85..39d2396 100644 --- a/libbuild2/name.hxx +++ b/libbuild2/name.hxx @@ -113,6 +113,18 @@ namespace build2 LIBBUILD2_SYMEXPORT string to_string (const name&); + template + inline void + to_checksum (T& cs, const name& n) + { + if (n.proj) + cs.append (n.proj->string ()); + cs.append (n.dir.string ()); + cs.append (n.type); + cs.append (n.value); + cs.append (n.pair); + } + // Store a string in a name in a reversible way. If the string ends with a // trailing directory separator then it is stored as a directory, otherwise // as a simple name. diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx index e87ca95..94f597d 100644 --- a/libbuild2/parser.cxx +++ b/libbuild2/parser.cxx @@ -9,6 +9,7 @@ #include // path_search #include +#include #include #include #include @@ -19,6 +20,9 @@ #include #include +#include +#include + #include // lookup_config using namespace std; @@ -28,6 +32,21 @@ namespace build2 { using type = token_type; + ostream& + operator<< (ostream& o, const parser::attribute& a) + { + o << a.name; + + if (!a.value.null) + { + o << '='; + names storage; + to_stream (o, reverse (a.value, storage), true /* quote */, '@'); + } + + return o; + } + class parser::enter_scope { public: @@ -335,6 +354,11 @@ namespace build2 while (tt != type::eos && !(one && parsed)) { + // Issue better diagnostics for stray `%`. + // + if (tt == type::percent) + fail (t) << "recipe without target"; + // Extract attributes if any. // assert (attributes_.empty ()); @@ -501,7 +525,8 @@ namespace build2 // exactly that would mean is unclear. One potentially useful // semantics would be the ability to specify attributes for ad hoc // members though the fact that the primary target is listed first - // would make it rather unintuitive. + // would make it rather unintuitive. Maybe attributes that change + // the group semantics itself? // next_with_attributes (t, tt); @@ -593,9 +618,11 @@ namespace build2 // // void (token& t, type& tt, const target_type* type, string pat) // - auto for_each = [this, &trace, - &t, &tt, - &ns, &nloc, &ans] (auto&& f) + // Note that the target and its ad hoc members are inserted implied + // but this flag can be cleared and default_target logic applied if + // appropriate. + // + auto for_each = [this, &trace, &t, &tt, &ns, &nloc, &ans] (auto&& f) { // Note: watch out for an out-qualified single target (two names). // @@ -674,30 +701,68 @@ namespace build2 if (tt == type::newline) { - // See if this is a target block. + // See if this is a target-specific variable and/or recipe block(s). // // Note that we cannot just let parse_dependency() handle this case // because we can have (a mixture of) target type/patterns. // - if (next (t, tt) == type::lcbrace && peek () == type::newline) + // @@ This might change once we support ad hoc rules (where we may + // have prerequisites for a pattern; but perhaps this should be + // handled separately since the parse_dependency() is already too + // complex and there will be no chains in this case). + // + next (t, tt); + if (tt == type::percent || + tt == type::multi_lcbrace || + (tt == type::lcbrace && peek () == type::newline)) { - next (t, tt); // Newline. - - // Parse the block for each target. + // Parse the block(s) for each target. + // + // Note that because we have to peek past the closing brace(s) to + // see whether there is a/another recipe block, we have to make + // that token part of the replay (we cannot peek past the replay + // sequence). // - for_each ([this] (token& t, type& tt, - const target_type* type, string pat) - { - next (t, tt); // First token inside the block. + // Note: similar code to the version in parse_dependency(). + // + auto parse = [ + this, + st = token (t), // Save start token (will be gone on replay). + recipes = small_vector, 1> ()] + (token& t, type& tt, + const target_type* type, string pat) mutable + { + token rt; // Recipe start token. - parse_variable_block (t, tt, type, move (pat)); + // The variable block, if any, should be first. + // + if (st.type == type::lcbrace) + { + next (t, tt); // Newline. + next (t, tt); // First token inside the variable block. + parse_variable_block (t, tt, type, move (pat)); - if (tt != type::rcbrace) - fail (t) << "expected '}' instead of " << t; - }); + if (tt != type::rcbrace) + fail (t) << "expected '}' instead of " << t; - next (t, tt); // Presumably newline after '}'. - next_after_newline (t, tt, '}'); // Should be on its own line. + next (t, tt); // Newline. + next_after_newline (t, tt, '}'); // Should be on its own line. + + if (tt != type::percent && tt != type::multi_lcbrace) + return; + + rt = t; + } + else + rt = st; + + if (type != nullptr) + fail (rt) << "recipe in target type/pattern"; + + parse_recipe (t, tt, rt, recipes); + }; + + for_each (parse); } else { @@ -717,7 +782,7 @@ namespace build2 // Target-specific variable assignment or dependency declaration, // including a dependency chain and/or prerequisite-specific variable - // assignment. + // assignment and/or recipe block(s). // auto at (attributes_push (t, tt)); @@ -731,6 +796,10 @@ namespace build2 // Target-specific variable assignment. // + // Note that neither here nor in parse_dependency() below we allow + // specifying recipes following a target-specified variable assignment + // (but we do allow them following a target-specific variable block). + // if (tt == type::assign || tt == type::prepend || tt == type::append) { type akind (tt); @@ -762,7 +831,8 @@ namespace build2 next_after_newline (t, tt); } // Dependency declaration potentially followed by a chain and/or a - // prerequisite-specific variable assignment/block. + // target/prerequisite-specific variable assignment/block and/or + // recipe block(s). // else { @@ -954,6 +1024,181 @@ namespace build2 } void parser:: + parse_recipe (token& t, type& tt, + const token& start, + small_vector, 1>& recipes) + { + // Parse a recipe chain. + // + // % [] + // {{ [] + // ... + // }} + // + // enter: start is percent or openining multi-curly-brace + // leave: token past newline after last closing multi-curly-brace + + if (stage_ == stage::boot) + fail (t) << "ad hoc recipe specified during bootstrap"; + + // If we have a recipe, the target is not implied. + // + if (target_->implied) + { + for (target* m (target_); m != nullptr; m = m->adhoc_member) + m->implied = false; + + if (default_target_ == nullptr) + default_target_ = target_; + } + + bool first (recipes.empty ()); // First target. + bool clean (false); // Seen a recipe that requires cleanup. + + token st (start); + for (size_t i (0);; st = t, ++i) + { + optional diag; + + if (st.type == type::percent) + { + next_with_attributes (t, tt); + attributes_push (t, tt, true /* standalone */); + + // Get variable (or value) attributes, if any, and deal with the + // special metadata attribute. Since currently it can only appear in + // the import directive, we handle it in an ad hoc manner. + // + attributes& as (attributes_top ()); + for (attribute& a: as) + { + const string& n (a.name); + + // @@ TODO: diag is script-specific, pass as attributes to rule? + // + if (n == "diag") + { + try + { + diag = convert (move (a.value)); + } + catch (const invalid_argument& e) + { + fail (as.loc) << "invalid " << n << " attribute value: " << e; + } + } + else + fail (as.loc) << "unknown recipe attribute " << a; + } + + attributes_pop (); + + next_after_newline (t, tt, '%'); + + if (tt != type::multi_lcbrace) + fail (t) << "expected recipe block instead of " << t; + + st = t; // And fall through. + } + + optional lang; + location lloc; + if (next (t, tt) == type::newline) + ; + else if (tt == type::word) + { + lang = t.value; + lloc = get_location (t); + next (t, tt); // Newline after . + } + else + fail (t) << "expected recipe language instead of " << t; + + mode (lexer_mode::foreign, '\0', st.value.size ()); + next_after_newline (t, tt, st); // Should be on its own line. + + if (tt != type::word) + fail (t) << "unterminated recipe block" << + info (st) << "recipe block starts here" << endf; + + shared_ptr ar; + if (first) + { + // Note that this is always the location of the opening multi-curly- + // brace, whether we have the header or not. This is relied upon by + // the rule implementations (e.g., to calculate the first line of the + // recipe code). + // + location loc (get_location (st)); + + if (!lang) + { + auto* asr (new adhoc_script_rule (move (diag), loc, st.value.size ())); + ar.reset (asr); + + asr->checksum = sha256 (t.value).string (); + + istringstream is (move (t.value)); + build::script::parser p (ctx); + asr->script = p.pre_parse (is, asr->loc.file, loc.line + 1); + } + else if (*lang == "c++") + { + ar.reset (new adhoc_cxx_rule (move (t.value), loc, st.value.size ())); + clean = true; + } + else + fail (lloc) << "unknown recipe language '" << *lang << "'"; + + recipes.push_back (ar); + } + else + ar = recipes[i]; + + target_->adhoc_recipes.push_back ( + adhoc_recipe {perform_update_id, move (ar)}); + + next (t, tt); + assert (tt == type::multi_rcbrace); + + next (t, tt); // Newline. + next_after_newline (t, tt, token (t)); // Should be on its own line. + + if (tt != type::percent && tt != type::multi_lcbrace) + break; + } + + // If we have a recipe that needs cleanup, register an operation callback + // for this project unless it has already been done. + // + if (clean) + { + action a (perform_clean_id); + auto f (&adhoc_rule::clean_recipes_build); + + // First check if we have already done this. + // + auto p (root_->operation_callbacks.equal_range (a)); + for (; p.first != p.second; ++p.first) + { + auto t ( + p.first->second.pre.target ()); + + if (t != nullptr && *t == f) + break; + } + + // It feels natural to clean up recipe builds as a post operation but + // that prevents the (otherwise-empty) out root directory to be cleaned + // up (via the standard fsdir{} chain). + // + if (p.first == p.second) + root_->operation_callbacks.emplace ( + a, scope::operation_callback {f, nullptr /*post*/}); + } + } + + void parser:: enter_adhoc_members (adhoc_names_loc&& ans, bool implied) { tracer trace ("parser::enter_adhoc_members", &path_); @@ -1083,9 +1328,9 @@ namespace build2 bool chain) { // Parse a dependency chain and/or a target/prerequisite-specific variable - // assignment/block. Return true if the following block (if any) has been - // "claimed" (the block "belongs" to targets/prerequisites before the last - // colon). + // assignment/block and/or recipe block(s). Return true if the following + // block(s) (if any) have been "claimed", meaning they "belong" to + // targets/prerequisites before the last colon. // // enter: colon (anything else is not handled) // leave: - first token on the next line if returning true @@ -1163,7 +1408,8 @@ namespace build2 // each target (for_each_p). // // We handle multiple targets and/or prerequisites by replaying the tokens - // (see the target-specific case for details). The function signature is: + // (see the target-specific case comments for details). The function + // signature is: // // void (token& t, type& tt) // @@ -1209,9 +1455,9 @@ namespace build2 }; // Do we have a dependency chain and/or prerequisite-specific variable - // assignment? If not, check for the target-specific variable block unless - // this is a chained call (in which case the block, if any, "belongs" to - // prerequisites). + // assignment? If not, check for the target-specific variable block and/or + // recipe block(s) unless this is a chained call (in which case the block, + // if any, "belongs" to prerequisites). // if (tt != type::colon) { @@ -1220,24 +1466,48 @@ namespace build2 next_after_newline (t, tt); // Must be a newline then. - if (tt == type::lcbrace && peek () == type::newline) + if (tt == type::percent || + tt == type::multi_lcbrace || + (tt == type::lcbrace && peek () == type::newline)) { - next (t, tt); // Newline. - - // Parse the block for each target. + // Parse the block(s) for each target. // - for_each_t ([this] (token& t, token_type& tt) - { - next (t, tt); // First token inside the block. + // Note: similar code to the version in parse_clause(). + // + auto parse = [ + this, + st = token (t), // Save start token (will be gone on replay). + recipes = small_vector, 1> ()] + (token& t, type& tt) mutable + { + token rt; // Recipe start token. - parse_variable_block (t, tt); + // The variable block, if any, should be first. + // + if (st.type == type::lcbrace) + { + next (t, tt); // Newline. + next (t, tt); // First token inside the variable block. + parse_variable_block (t, tt); - if (tt != type::rcbrace) - fail (t) << "expected '}' instead of " << t; - }); + if (tt != type::rcbrace) + fail (t) << "expected '}' instead of " << t; - next (t, tt); // Presumably newline after '}'. - next_after_newline (t, tt, '}'); // Should be on its own line. + next (t, tt); // Newline. + next_after_newline (t, tt, '}'); // Should be on its own line. + + if (tt != type::percent && tt != type::multi_lcbrace) + return; + + rt = t; + } + else + rt = st; + + parse_recipe (t, tt, rt, recipes); + }; + + for_each_t (parse); } return true; // Claimed or isn't any. @@ -1648,7 +1918,7 @@ namespace build2 // { auto df = make_diag_frame ( - [&args, &l](const diag_record& dr) + [this, &args, &l](const diag_record& dr) { dr << info (l) << "while parsing " << args[0] << " output"; }); @@ -1758,7 +2028,7 @@ namespace build2 } catch (const invalid_argument& e) { - fail << "invalid " << i->name << " attribute value: " << e; + fail (as.loc) << "invalid " << i->name << " attribute value: " << e; } } else if (i->name == "config.report.variable") @@ -1769,7 +2039,7 @@ namespace build2 } catch (const invalid_argument& e) { - fail << "invalid " << i->name << " attribute value: " << e; + fail (as.loc) << "invalid " << i->name << " attribute value: " << e; } } else @@ -3263,16 +3533,10 @@ namespace build2 optional vis; optional ovr; - auto print = [storage = names ()] (diag_record& dr, const value& v) mutable - { - storage.clear (); - to_stream (dr.os, reverse (v, storage), true /* quote */, '@'); - }; - - for (auto& p: as) + for (auto& a: as) { - string& n (p.name); - value& v (p.value); + string& n (a.name); + value& v (a.value); if (const value_type* t = map_type (n)) { @@ -3283,23 +3547,10 @@ namespace build2 // Fall through. } else - { - diag_record dr (fail (l)); - dr << "unknown variable attribute " << n; - - if (!v.null) - { - dr << '='; - print (dr, v); - } - } + fail (l) << "unknown variable attribute " << a; if (!v.null) - { - diag_record dr (fail (l)); - dr << "unexpected value for attribute " << n << ": "; - print (dr, v); - } + fail (l) << "unexpected value in attribute " << a; } if (type != nullptr && var.type != nullptr) @@ -3336,16 +3587,10 @@ namespace build2 bool null (false); const value_type* type (nullptr); - auto print = [storage = names ()] (diag_record& dr, const value& v) mutable - { - storage.clear (); - to_stream (dr.os, reverse (v, storage), true /* quote */, '@'); - }; - - for (auto& p: as) + for (auto& a: as) { - string& n (p.name); - value& v (p.value); + string& n (a.name); + value& v (a.value); if (n == "null") { @@ -3364,23 +3609,10 @@ namespace build2 // Fall through. } else - { - diag_record dr (fail (l)); - dr << "unknown value attribute " << n; - - if (!v.null) - { - dr << '='; - print (dr, v); - } - } + fail (l) << "unknown value attribute " << a; if (!v.null) - { - diag_record dr (fail (l)); - dr << "unexpected value for attribute " << n << ": "; - print (dr, v); - } + fail (l) << "unexpected value in attribute " << a; } // When do we set the type and when do we keep the original? This gets @@ -3502,11 +3734,8 @@ namespace build2 values parser:: parse_eval (token& t, type& tt, pattern_mode pmode) { - // enter: lparen - // leave: rparen - - mode (lexer_mode::eval, '@'); // Auto-expires at rparen. - next_with_attributes (t, tt); + // enter: token after lparen (lexed in the eval mode with attributes). + // leave: rparen (eval mode auto-expires at rparen). if (tt == type::rparen) return values (); @@ -4707,7 +4936,7 @@ namespace build2 // Print the location information in case the function fails. // auto df = make_diag_frame ( - [&loc, l, r] (const diag_record& dr) + [this, &loc, l, r] (const diag_record& dr) { dr << info (loc) << "while concatenating " << l << " to " << r; dr << info << "use quoting to force untyped concatenation"; @@ -5322,56 +5551,72 @@ namespace build2 ; // Leave the name empty to fail below. else if (tt == type::word) { - if (!pre_parse_) - name = move (t.value); + name = move (t.value); } else if (tt == type::lparen) { expire_mode (); - values vs (parse_eval (t, tt, pmode)); //@@ OUT will parse @-pair and do well? + mode (lexer_mode::eval, '@'); + next_with_attributes (t, tt); - if (!pre_parse_) + // Handle the $(x) case ad hoc. We do it this way in order to get + // the variable name even during pre-parse. It should also be + // faster. + // + if (tt == type::word && peek () == type::rparen) + { + name = move (t.value); + next (t, tt); // Get `)`. + } + else { - if (vs.size () != 1) - fail (loc) << "expected single variable/function name"; + //@@ OUT will parse @-pair and do well? + // + values vs (parse_eval (t, tt, pmode)); - value& v (vs[0]); + if (!pre_parse_) + { + if (vs.size () != 1) + fail (loc) << "expected single variable/function name"; - if (!v) - fail (loc) << "null variable/function name"; + value& v (vs[0]); - names storage; - vector_view ns (reverse (v, storage)); // Movable. - size_t n (ns.size ()); + if (!v) + fail (loc) << "null variable/function name"; - // We cannot handle scope-qualification in the eval context as - // we do for target-qualification (see eval-qual) since then we - // would be treating all paths as qualified variables. So we - // have to do it here. - // - if (n == 2 && ns[0].pair == ':') // $(foo: x) - { - qual = move (ns[0]); + names storage; + vector_view ns (reverse (v, storage)); // Movable. + size_t n (ns.size ()); - if (qual.empty ()) - fail (loc) << "empty variable/function qualification"; - } - else if (n == 2 && ns[0].directory ()) // $(foo/ x) - { - qual = move (ns[0]); - qual.pair = '/'; - } - else if (n > 1) - fail (loc) << "expected variable/function name instead of '" - << ns << "'"; + // We cannot handle scope-qualification in the eval context as + // we do for target-qualification (see eval-qual) since then + // we would be treating all paths as qualified variables. So + // we have to do it here. + // + if (n == 2 && ns[0].pair == ':') // $(foo: x) + { + qual = move (ns[0]); - // Note: checked for empty below. - // - if (!ns[n - 1].simple ()) - fail (loc) << "expected variable/function name instead of '" - << ns[n - 1] << "'"; + if (qual.empty ()) + fail (loc) << "empty variable/function qualification"; + } + else if (n == 2 && ns[0].directory ()) // $(foo/ x) + { + qual = move (ns[0]); + qual.pair = '/'; + } + else if (n > 1) + fail (loc) << "expected variable/function name instead of '" + << ns << "'"; - name = move (ns[n - 1].value); + // Note: checked for empty below. + // + if (!ns[n - 1].simple ()) + fail (loc) << "expected variable/function name instead of '" + << ns[n - 1] << "'"; + + name = move (ns[n - 1].value); + } } } else @@ -5392,8 +5637,9 @@ namespace build2 { // Function call. // - next (t, tt); // Get '('. + mode (lexer_mode::eval, '@'); + next_with_attributes (t, tt); // @@ Should we use (target/scope) qualification (of name) as the // context in which to call the function? Hm, interesting... @@ -5413,12 +5659,11 @@ namespace build2 { // Variable expansion. // + lookup l (lookup_variable (move (qual), move (name), loc)); if (pre_parse_) continue; // As if empty value. - lookup l (lookup_variable (move (qual), move (name), loc)); - if (l.defined ()) result = l.value; // Otherwise leave as NULL result_data. @@ -5429,8 +5674,10 @@ namespace build2 { // Context evaluation. // - loc = get_location (t); + mode (lexer_mode::eval, '@'); + next_with_attributes (t, tt); + values vs (parse_eval (t, tt, pmode)); tt = peek (); @@ -5507,7 +5754,7 @@ namespace build2 // Print the location information in case the function fails. // auto df = make_diag_frame ( - [&loc, t] (const diag_record& dr) + [this, &loc, t] (const diag_record& dr) { dr << info (loc) << "while converting " << t << " to string"; }); @@ -6066,6 +6313,9 @@ namespace build2 lookup parser:: lookup_variable (name&& qual, string&& name, const location& loc) { + if (pre_parse_) + return lookup (); + tracer trace ("parser::lookup_variable", &path_); const scope* s (nullptr); @@ -6321,6 +6571,20 @@ namespace build2 return tt; } + inline type parser:: + next_after_newline (token& t, type& tt, const token& a) + { + if (tt == type::newline) + next (t, tt); + else if (tt != type::eos) + { + diag_record dr (fail (t)); + dr << "expected newline instead of " << t << " after " << a; + } + + return tt; + } + type parser:: peek () { diff --git a/libbuild2/parser.hxx b/libbuild2/parser.hxx index c55e14f..bc01e08 100644 --- a/libbuild2/parser.hxx +++ b/libbuild2/parser.hxx @@ -26,7 +26,9 @@ namespace build2 explicit parser (context& c, stage s = stage::rest) - : fail ("error", &path_), ctx (c), stage_ (s) {} + : fail ("error", &path_), info ("info", &path_), + ctx (c), + stage_ (s) {} // Issue diagnostics and throw failed in case of an error. // @@ -109,6 +111,11 @@ namespace build2 const target_type* = nullptr, string = string ()); + void + parse_recipe (token&, token_type&, + const token&, + small_vector, 1>&); + // Ad hoc target names inside < ... >. // struct adhoc_names_loc @@ -246,6 +253,9 @@ namespace build2 build2::value value; }; + friend ostream& + operator<< (ostream&, const attribute&); + struct attributes: small_vector { location loc; // Start location. @@ -479,6 +489,12 @@ namespace build2 // If qual is not empty, then its pair member should indicate the kind // of qualification: ':' -- target, '/' -- scope. // + // Note that this function is called even during pre-parse with the result + // unused. In this case a valid name will only be provided for variables + // with literal names (for example, $x, $(x)). For computed variables (for + // example, $($x ? X : Y)) it will be empty (along with qual, which can + // only be non-empty for a computed variable). + // virtual lookup lookup_variable (name&& qual, string&& name, const location&); @@ -525,8 +541,14 @@ namespace build2 // If the current token is newline, then get the next token. Otherwise, // fail unless the current token is eos (i.e., optional newline at the end - // of stream). If the after argument is not \0, use it in diagnostics as - // the token after which the newline was expectd. + // of stream). Use the after token in diagnostics as the token after which + // the newline was expected. + // + token_type + next_after_newline (token&, token_type&, const token& after); + + // As above but the after argument is a single-character token. If it is + // \0, then it is ignored. // token_type next_after_newline (token&, token_type&, char after = '\0'); @@ -568,10 +590,10 @@ namespace build2 } void - mode (lexer_mode m, char ps = '\0') + mode (lexer_mode m, char ps = '\0', uintptr_t d = 0) { if (replay_ != replay::play) - lexer_->mode (m, ps); + lexer_->mode (m, ps, nullopt, d); else // As a sanity check, make sure the mode matches the next token. Note // that we don't check the attributes flags or the pair separator @@ -612,8 +634,10 @@ namespace build2 // with the lexer directly (e.g., the keyword() test). Replays also cannot // nest. For now we don't enforce any of this. // - // Note also that the peeked token is not part of the replay, until it - // is "got". + // Note also that the peeked token is not part of the replay until it is + // "got". In particular, this means that we cannot peek past the replay + // sequence (since we will get the peeked token as the first token of + // the replay). // void replay_save () @@ -628,6 +652,8 @@ namespace build2 assert ((replay_ == replay::save && !replay_data_.empty ()) || (replay_ == replay::play && replay_i_ == replay_data_.size ())); + assert (!peeked_); + if (replay_ == replay::save) replay_path_ = path_; // Save old path. @@ -638,6 +664,8 @@ namespace build2 void replay_stop () { + assert (!peeked_); + if (replay_ == replay::play) path_ = replay_path_; // Restore old path. @@ -726,6 +754,7 @@ namespace build2 // protected: const fail_mark fail; + const basic_mark info; // Parser state. // diff --git a/libbuild2/recipe.hxx b/libbuild2/recipe.hxx index 508c059..efd184a 100644 --- a/libbuild2/recipe.hxx +++ b/libbuild2/recipe.hxx @@ -48,6 +48,21 @@ namespace build2 LIBBUILD2_SYMEXPORT extern const recipe noop_recipe; LIBBUILD2_SYMEXPORT extern const recipe default_recipe; LIBBUILD2_SYMEXPORT extern const recipe group_recipe; + + // Ad hoc recipe. + // + // A recipe is a fragment of a rule so we handle ad hoc recipies by + // "completing" them to rules. + // + class adhoc_rule; + + struct adhoc_recipe + { + // @@ TODO: maybe we should have a small vector of actions (for dump). + // + build2::action action; + shared_ptr rule; + }; } #endif // LIBBUILD2_RECIPE_HXX diff --git a/libbuild2/rule.cxx b/libbuild2/rule.cxx index 3a32eed..773d42e 100644 --- a/libbuild2/rule.cxx +++ b/libbuild2/rule.cxx @@ -3,6 +3,8 @@ #include +#include +#include #include #include #include @@ -10,11 +12,21 @@ #include #include +#include +#include + using namespace std; using namespace butl; namespace build2 { + // rule (vtable) + // + rule:: + ~rule () + { + } + // file_rule // // Note that this rule is special. It is the last, fallback rule. If @@ -97,11 +109,6 @@ namespace build2 recipe file_rule:: apply (action a, target& t) const { - /* - @@ outer - return noop_recipe; - */ - // Update triggers the update of this target's prerequisites so it would // seem natural that we should also trigger their cleanup. However, this // possibility is rather theoretical so until we see a real use-case for @@ -305,4 +312,879 @@ namespace build2 } const noop_rule noop_rule::instance; + + // adhoc_rule + // + const dir_path adhoc_rule::recipes_build_dir ("recipes.out"); + + bool adhoc_rule:: + match (action a, target& t, const string& h, optional fallback) const + { + return !fallback && match (a, t, h); + } + + bool adhoc_rule:: + match (action, target&, const string&) const + { + return true; + } + + // Scope operation callback that cleans up recipe builds. + // + target_state adhoc_rule:: + clean_recipes_build (action, const scope& rs, const dir&) + { + context& ctx (rs.ctx); + + const dir_path& out_root (rs.out_path ()); + + dir_path d (out_root / rs.root_extra->build_dir / recipes_build_dir); + + if (exists (d)) + { + if (rmdir_r (ctx, d)) + { + // Clean up build/ if it also became empty (e.g., in case of a build + // with a transient configuration). + // + d = out_root / rs.root_extra->build_dir; + if (empty (d)) + rmdir (ctx, d); + + return target_state::changed; + } + } + + return target_state::unchanged; + } + + // adhoc_script_rule + // + void adhoc_script_rule:: + dump (ostream& os, string& ind) const + { + // Do we need the header? + // + if (diag) + { + os << ind << '%'; + + if (diag) + { + os << " ["; + os << "diag="; to_stream (os, name (*diag), true /* quote */, '@'); + os << ']'; + } + + os << endl; + } + + os << ind << string (braces, '{') << endl; + ind += " "; + script::dump (os, ind, script.lines); + ind.resize (ind.size () - 2); + os << ind << string (braces, '}'); + } + + bool adhoc_script_rule:: + match (action a, target& t, const string&, optional fb) const + { + if (!fb) + ; + // If this is clean for a file target and we are supplying the update, + // then we will also supply the standard clean. + // + else if (a == perform_clean_id && + *fb == perform_update_id && + t.is_a ()) + ; + else + return false; + + // It's unfortunate we have to resort to this but we need to remember this + // in apply(). + // + t.data (fb.has_value ()); + + return true; + } + + recipe adhoc_script_rule:: + apply (action a, target& t) const + { + // Derive file names for the target and its ad hoc group members, if any. + // + for (target* m (&t); m != nullptr; m = m->adhoc_member) + { + if (auto* p = m->is_a ()) + p->derive_path (); + } + + // Inject dependency on the output directory. + // + // We do it always instead of only if one of the targets is path-based in + // case the recipe creates temporary files or some such. + // + inject_fsdir (a, t); + + // Match prerequisites. + // + match_prerequisite_members (a, t); + + // See if we are providing the standard clean as a fallback. + // + if (t.data ()) + return &perform_clean_depdb; + + // For update inject dependency on the tool target(s). + // + // @@ We could see that it's a target and do it but not sure if we should + // bother. We dropped this idea of implicit targets in tests. Maybe we + // should verify path assigned, like we do there? I think we will have + // to. + // + // if (a == perform_update_id) + // inject (a, t, tgt); + + if (a == perform_update_id && t.is_a ()) + { + return [this] (action a, const target& t) + { + return perform_update_file (a, t); + }; + } + else + { + return [this] (action a, const target& t) + { + return default_action (a, t); + }; + } + } + + target_state adhoc_script_rule:: + perform_update_file (action a, const target& xt) const + { + tracer trace ("adhoc_rule::perform_update_file"); + + context& ctx (xt.ctx); + + const file& t (xt.as ()); + const path& tp (t.path ()); + + // Update prerequisites and determine if any of them render this target + // out-of-date. + // + timestamp mt (t.load_mtime ()); + optional ps (execute_prerequisites (a, t, mt)); + + bool update (!ps); + + // We use depdb to track changes to the script itself, input/output file + // names, tools, etc. + // + depdb dd (tp + ".d"); + { + // First should come the rule name/version. + // + if (dd.expect ("adhoc 1") != nullptr) + l4 ([&]{trace << "rule mismatch forcing update of " << t;}); + + // Then the script checksum. + // + // Ideally, to detect changes to the script semantics, we would hash the + // text with all the variables expanded but without executing any + // commands. In practice, this is easier said than done (think the set + // builtin that receives output of a command that modifies the + // filesystem). + // + // So as the next best thing we are going to hash the unexpanded text as + // well as values of all the variables expanded in it (which we get as a + // side effect of pre-parsing the script). This approach has a number of + // drawbacks: + // + // - We can't handle computed variable names (e.g., $($x ? X : Y)). + // + // - We may "overhash" by including variables that are actually + // script-local. + // + // - There are functions like $install.resolve() with result based on + // external (to the script) information. + // + if (dd.expect (checksum) != nullptr) + l4 ([&]{trace << "recipe text change forcing update of " << t;}); + + // For each variable hash its name, undefined/null/non-null indicator, + // and the value if non-null. + // + // Note that this excludes the special $< and $> variables which we + // handle below. + // + { + sha256 cs; + names storage; + + for (const string& n: script.vars) + { + cs.append (n); + + lookup l; + + if (const variable* var = ctx.var_pool.find (n)) + l = t[var]; + + cs.append (!l.defined () ? '\x1' : l->null ? '\x2' : '\x3'); + + if (l) + { + storage.clear (); + names_view ns (reverse (*l, storage)); + + for (const name& n: ns) + to_checksum (cs, n); + } + } + + if (dd.expect (cs.string ()) != nullptr) + l4 ([&]{trace << "recipe variable change forcing update of " << t;}); + } + + // Target and prerequisite sets ($> and $<). + // + // How should we hash them? We could hash them as target names (i.e., + // the same as the $>/< content) or as paths (only for path-based + // targets). While names feel more general, they are also more expensive + // to compute. And for path-based targets, path is generally a good + // proxy for the target name. Since the bulk of the ad hoc recipes will + // presumably be operating exclusively on path-based targets, let's do + // it both ways. + // + { + auto hash = [ns = names ()] (sha256& cs, const target& t) mutable + { + if (const path_target* pt = t.is_a ()) + cs.append (pt->path ().string ()); + else + { + ns.clear (); + t.as_name (ns); + for (const name& n: ns) + to_checksum (cs, n); + } + }; + + sha256 tcs; + for (const target* m (&t); m != nullptr; m = m->adhoc_member) + hash (tcs, *m); + + if (dd.expect (tcs.string ()) != nullptr) + l4 ([&]{trace << "target set change forcing update of " << t;}); + + sha256 pcs; + for (const target* pt: t.prerequisite_targets[a]) + if (pt != nullptr) + hash (pcs, *pt); + + if (dd.expect (pcs.string ()) != nullptr) + l4 ([&]{trace << "prerequisite set change forcing update of " << t;}); + } + + // Then the tools checksums. + // + // @@ TODO: obtain checksums of all the targets used as commands in + // the script. + // + //if (dd.expect (csum) != nullptr) + // l4 ([&]{trace << "compiler mismatch forcing update of " << t;}); + } + + // Update if depdb mismatch. + // + if (dd.writing () || dd.mtime > mt) + update = true; + + dd.close (); + + // If nothing changed, then we are done. + // + if (!update) + return *ps; + + if (verb == 1) + { + // @@ TODO: + // + // - derive diag if absent (should probably do in match?) + // + // - we are printing target, not source (like in most other places) + // + // - printing of ad hoc target group (the {hxx cxx}{foo} idea) + // + // - if we are printing prerequisites, should we print all of them + // (including tools)? + // + + text << (diag ? diag->c_str () : "adhoc") << ' ' << t; + } + + if (!ctx.dry_run || verb >= 2) + { + const scope& bs (t.base_scope ()); + + build::script::environment e (a, t, script.temp_dir); + build::script::parser p (ctx); + build::script::default_runner r; + p.execute (*bs.root_scope (), bs, e, script, r); + + if (!ctx.dry_run) + dd.check_mtime (tp); + } + + t.mtime (system_clock::now ()); + return target_state::changed; + } + + target_state adhoc_script_rule:: + default_action (action a, const target& t) const + { + tracer trace ("adhoc_rule::default_action"); + + context& ctx (t.ctx); + + execute_prerequisites (a, t); + + if (verb == 1) + { + // @@ TODO: as above + + text << (diag ? diag->c_str () : "adhoc") << ' ' << t; + } + + if (!ctx.dry_run || verb >= 2) + { + const scope& bs (t.base_scope ()); + + build::script::environment e (a, t, script.temp_dir); + build::script::parser p (ctx); + build::script::default_runner r; + p.execute (*bs.root_scope (), bs, e, script, r); + } + + return target_state::changed; + } + + // cxx_rule + // + bool cxx_rule:: + match (action, target&, const string&) const + { + return true; + } + + // adhoc_cxx_rule + // + adhoc_cxx_rule:: + ~adhoc_cxx_rule () + { + delete impl.load (memory_order_relaxed); // Serial execution. + } + + void adhoc_cxx_rule:: + dump (ostream& os, string& ind) const + { + // @@ TODO: indentation is multi-line recipes is off (would need to insert + // indentation after every newline). + // + os << ind << string (braces, '{') << " c++" << endl + << ind << code + << ind << string (braces, '}'); + } + + // From module.cxx. + // + void + create_module_context (context&, const location&); + + const target& + update_in_module_context (context&, const scope&, names tgt, + const location&, const path& bf); + + pair + load_module_library (const path& lib, const string& sym, string& err); + + bool adhoc_cxx_rule:: + match (action a, target& t, const string& hint) const + { + tracer trace ("adhoc_cxx_rule::match"); + + context& ctx (t.ctx); + const scope& rs (t.root_scope ()); + + // The plan is to reduce this to the build system module case as much as + // possible. Specifically, we switch to the load phase, create a module- + // like library with the recipe text as a rule implementation, then build + // and load it. + // + // Since the recipe can be shared among multiple targets, several threads + // can all be trying to do this in parallel. + // + // We use the relaxed memory order here because any change must go through + // the serial load phase. In other words, all we need here is atomicity + // with ordering/visibility provided by the phase mutex. + // + cxx_rule* impl (this->impl.load (memory_order_relaxed)); + + while (impl == nullptr) // Breakout loop. + { + // Switch the phase to (serial) load and re-check. + // + phase_switch ps (ctx, run_phase::load); + + if ((impl = this->impl.load (memory_order_relaxed)) != nullptr) + break; + + using create_function = cxx_rule* (const location&, target_state); + using load_function = create_function* (); + + // The only way to guarantee that the name of our module matches its + // implementation is to based the name on the implementation hash (plus + // the language, in case we support other compiled implementations in + // the future). + // + // Unfortunately, this means we will be creating a new project (and + // leaving behind the old one as garbage) for every change to the + // recipe. On the other hand, if the recipe is moved around unchanged, + // we will reuse the same project. In fact, two different recipes (e.g., + // in different buildfiles) with the same text will share the project. + // + // The fact that we don't incorporate the recipe location into the hash + // but include it in the source (in the form of the #line directive; see + // below) has its own problems. If we do nothing extra here, then if a + // "moved" but otherwise unchanged recipe is updated (for example, + // because of changes in the build system core), then we may end up with + // bogus location in the diagnostics. + // + // The straightforward solution would be to just update the location in + // the source code if it has changed. This, however, will lead to + // unnecessary and probably surprising recompilations since any line + // count change before the recipe will trigger this update. One key + // observation here is that we need accurate location information only + // if we are going to recompile the recipe but the change to location + // itself does not render the recipe out of date. So what we going to do + // is factor the location information into its own small header and then + // keep it up-to-date without changing its modification time. + // + // This works well if the project is not shared by multiple recipes. + // However, if we have recipes in several buildfiles with identical + // text, then the location information may end up yo-yo'ing depending on + // which recipe got here first. + // + // There doesn't seem to be much we can do about it without incurring + // other drawbacks/overheads. So the answer is for the user to use an ad + // hoc rule with the common implementation instead of a bunch of + // duplicate recipes. + // + string id; + { + sha256 cs; + cs.append ("c++"); + cs.append (code); + id = cs.abbreviated_string (12); + } + + dir_path pd (rs.out_path () / + rs.root_extra->build_dir / + recipes_build_dir /= id); + + path bf (pd / std_buildfile_file); + + string sym ("load_" + id); + + // Check whether the file exists and its last line matches the specified + // signature. + // + // Note: we use the last instead of the first line for extra protection + // against incomplete writes. + // + auto check_sig = [] (const path& f, const string& s) -> bool + { + try + { + if (!file_exists (f)) + return false; + + ifdstream ifs (f); + + string l; + while (ifs.peek () != ifdstream::traits_type::eof ()) + getline (ifs, l); + + return l == s; + } + catch (const io_error& e) + { + fail << "unable to read " << f << ": " << e << endf; + } + catch (const system_error& e) + { + fail << "unable to access " << f << ": " << e << endf; + } + }; + + bool nested (ctx.module_context == &ctx); + + // Create the build context if necessary. + // + if (ctx.module_context == nullptr) + { + if (!ctx.module_context_storage) + fail (loc) << "unable to update ad hoc recipe for target " << t << + info << "building of ad hoc recipes is disabled"; + + create_module_context (ctx, loc); + } + + // "Switch" to the module context. + // + context& ctx (*t.ctx.module_context); + + const uint16_t verbosity (3); // Project creation command verbosity. + + // Project and location signatures. + // + // Specifically, we update the project version when changing anything + // which would make the already existing projects unusable. + // + const string& lf (!loc.file.path.empty () + ? loc.file.path.string () + : loc.file.name ? *loc.file.name : string ()); + + const string psig ("# c++ 1"); + const string lsig ("// " + lf + ':' + to_string (loc.line)); + + // Check whether we need to (re)create the project. + // + optional altn (false); // Standard naming scheme. + bool create (!is_src_root (pd, altn)); + + if (!create && (create = !check_sig (bf, psig))) + rmdir_r (ctx, pd, false, verbosity); // Never dry-run. + + path of; + ofdstream ofs; + + if (create) + try + { + // Write ad hoc config.build that loads the ~build2 configuration. + // This way the configuration will be always in sync with ~build2 + // and we can update the recipe manually (e.g., for debugging). + // + create_project ( + pd, + dir_path (), /* amalgamation */ + {}, /* boot_modules */ + "cxx.std = latest", /* root_pre */ + {"cxx."}, /* root_modules */ + "", /* root_post */ + string ("config"), /* config_module */ + string ("config.config.load = ~build2"), /* config_file */ + false, /* buildfile */ + "build2 core", /* who */ + verbosity); /* verbosity */ + + + // Write the rule source file. + // + of = path (pd / "rule.cxx"); + + if (verb >= verbosity) + text << (verb >= 2 ? "cat >" : "save ") << of; + + ofs.open (of); + + ofs << "#include \"location.hxx\"" << '\n' + << '\n'; + + // Include every header that can plausibly be needed by a rule. + // + ofs << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << '\n'; + + // Normally the recipe code will have one level of indentation so + // let's not indent the namespace level to match. + // + ofs << "namespace build2" << '\n' + << "{" << '\n' + << '\n'; + + // If we want the user to be able to supply a custom constuctor, then + // we have to give the class a predictable name (i.e., we cannot use + // id as part of its name) and put it into an unnamed namespace. One + // clever idea is to call the class `constructor` but the name could + // also be used for a custom destructor (still could work) or for name + // qualification (would definitely look bizarre). + // + // In this light the most natural name is probable `rule`. The issue + // is we already have this name in the build2 namespace (and its our + // indirect base). In fact, any name that we choose could in the + // future conflict with something in that namespace so maybe it makes + // sense to bite the bullet and pick a name that is least likely to be + // used by the user directly (can always use cxx_rule instead). + // + ofs << "namespace" << '\n' + << "{" << '\n' + << "class rule: public cxx_rule" << '\n' + << "{" << '\n' + << "public:" << '\n' + << '\n'; + + // Inherit base constructor. This way the user may provide their own + // but don't have to. + // + ofs << " using cxx_rule::cxx_rule;" << '\n' + << '\n'; + + // An extern "C" function cannot throw which can happen in case of a + // user-defined constructor. So we need an extra level of indirection. + // We incorporate id to make sure it doesn't conflict with anything + // user-defined. + // + ofs << " static cxx_rule*" << '\n' + << " create_" << id << " (const location& l, target_state s)" << '\n' + << " {" << '\n' + << " return new rule (l, s);" << '\n' + << " }" << '\n' + << '\n'; + + // Use the #line directive to point diagnostics to the code in the + // buildfile. Note that there is no easy way to restore things to + // point back to the source file (other than another #line with a line + // and a file). Seeing that we don't have much after, let's not bother + // for now. + // + ofs << "#line RECIPE_LINE RECIPE_FILE" << '\n'; + + // Note that the code always includes trailing newline. + // + ofs << code + << "};" << '\n' + << '\n'; + + // Add an alias that we can use unambiguously in the load function. + // + ofs << "using rule_" << id << " = rule;" << '\n' + << "}" << '\n' + << '\n'; + + // Entry point. + // + ofs << "extern \"C\"" << '\n' + << "#ifdef _WIN32" << '\n' + << "__declspec(dllexport)" << '\n' + << "#endif" << '\n' + << "cxx_rule* (*" << sym << " ()) (const location&, target_state)" << '\n' + << "{" << '\n' + << " return &rule_" << id << "::create_" << id << ";" << '\n' + << "}" << '\n' + << '\n'; + + ofs << "}" << '\n'; + + ofs.close (); + + + // Write buildfile. + // + of = bf; + + if (verb >= verbosity) + text << (verb >= 2 ? "cat >" : "save ") << of; + + ofs.open (of); + + ofs << "import imp_libs += build2%lib{build2}" << '\n' + << "libs{" << id << "}: cxx{rule} hxx{location} $imp_libs" << '\n' + << '\n' + << psig << '\n'; + + ofs.close (); + } + catch (const io_error& e) + { + fail << "unable to write to " << of << ": " << e; + } + + // Update the library target in the module context. + // + const target* l (nullptr); + do // Breakout loop. + { + // Load the project in the module context. + // + // Note that it's possible it has already been loaded (see above about + // the id calculation). + // + scope& rs (load_project (ctx, pd, pd, false /* forwarded */)); + + auto find_target = [&ctx, &rs, &pd, &id] () + { + const target_type* tt (rs.find_target_type ("libs")); + assert (tt != nullptr); + + const target* t ( + ctx.targets.find (*tt, pd, dir_path () /* out */, id)); + assert (t != nullptr); + + return t; + }; + + // If the project has already been loaded then, as an optimization, + // check if the target has already been updated (this will make a + // difference we if we have identical recipes in several buildfiles, + // especially to the location update that comes next). + // + if (!source_once (rs, rs, bf)) + { + l = find_target (); + + if (l->executed_state (perform_update_id) != target_state::unknown) + break; + } + + // Create/update the recipe location header. + // + // For update, preserve the file timestamp in order not to render the + // recipe out of date. + // + of = path (pd / "location.hxx"); + if (!check_sig (of, lsig)) + try + { + entry_time et (file_time (of)); + + if (verb >= verbosity) + text << (verb >= 2 ? "cat >" : "save ") << of; + + ofs.open (of); + + // Recipe file and line for the #line directive above. Note that the + // code starts from the next line thus +1. We also need to escape + // backslashes (Windows paths). + // + ofs << "#define RECIPE_FILE \"" << sanitize_strlit (lf) << '"'<< '\n' + << "#define RECIPE_LINE " << loc.line + 1 << '\n' + << '\n' + << lsig << '\n'; + + ofs.close (); + + if (et.modification != timestamp_nonexistent) + file_time (of, et); + } + catch (const io_error& e) + { + fail << "unable to write to " << of << ": " << e; + } + catch (const system_error& e) + { + fail << "unable to get/set timestamp for " << of << ": " << e; + } + + if (nested) + { + // This means there is a perform update action already in progress + // in this context. So we are going to switch the phase and + // perform direct match and update (similar how we do this for + // generated headers). + // + // Note that since neither match nor execute are serial phases, it + // means other targets in this context can be matched and executed + // in paralellel with us. + // + if (l == nullptr) + l = find_target (); + + phase_switch mp (ctx, run_phase::match); + if (build2::match (perform_update_id, *l) != target_state::unchanged) + { + phase_switch ep (ctx, run_phase::execute); + execute (a, *l); + } + } + else + { + // Cutoff the existing diagnostics stack and push our own entry. + // + diag_frame::stack_guard diag_cutoff (nullptr); + + auto df = make_diag_frame ( + [this, &t] (const diag_record& dr) + { + dr << info (loc) << "while updating ad hoc recipe for target " + << t; + }); + + l = &update_in_module_context ( + ctx, rs, names {name (pd, "libs", id)}, + loc, bf); + } + } while (false); + + // Load the library. + // + const path& lib (l->as ().path ()); + + // Note again that it's possible the library has already been loaded + // (see above about the id calculation). + // + string err; + pair hs (load_module_library (lib, sym, err)); + + // These normally shouldn't happen unless something is seriously broken. + // + if (hs.first == nullptr) + fail (loc) << "unable to load recipe library " << lib << ": " << err; + + if (hs.second == nullptr) + fail (loc) << "unable to lookup " << sym << " in recipe library " + << lib << ": " << err; + + { + auto df = make_diag_frame ( + [this](const diag_record& dr) + { + if (verb != 0) + dr << info (loc) << "while initializing ad hoc recipe"; + }); + + load_function* lf (function_cast (hs.second)); + create_function* cf (lf ()); + + impl = cf (loc, l->executed_state (perform_update_id)); + this->impl.store (impl, memory_order_relaxed); // Still in load phase. + } + } + + return impl->match (a, t, hint); + } + + recipe adhoc_cxx_rule:: + apply (action a, target& t) const + { + return impl.load (memory_order_relaxed)->apply (a, t); + } } diff --git a/libbuild2/rule.hxx b/libbuild2/rule.hxx index 9eab1f6..efa4ec3 100644 --- a/libbuild2/rule.hxx +++ b/libbuild2/rule.hxx @@ -12,6 +12,8 @@ #include #include +#include + #include namespace build2 @@ -22,7 +24,7 @@ namespace build2 // // Note: match() is only called once but may not be followed by apply(). // - class rule + class LIBBUILD2_SYMEXPORT rule { public: virtual bool @@ -33,6 +35,9 @@ namespace build2 rule () = default; + virtual + ~rule (); + rule (const rule&) = delete; rule& operator= (const rule&) = delete; }; @@ -108,6 +113,141 @@ namespace build2 noop_rule () {} static const noop_rule instance; }; + + // Ad hoc rule. + // + // Note: not exported + // + class adhoc_rule: public rule + { + public: + location_value loc; // Buildfile location of the recipe. + size_t braces; // Number of braces in multi-brace tokens. + + adhoc_rule (const location& l, size_t b) + : loc (l), + braces (b), + rule_match ("adhoc", static_cast (*this)) {} + + public: + // Some of the operations come in compensating pairs, such as update and + // clean, install and uninstall. An ad hoc rule implementation may choose + // to provide a fallback implementation of a compensating operation if it + // is providing the other half (passed in the fallback argument). + // + // The default implementation calls rule::match() if fallback is absent + // and returns false if fallback is present. So an implementation that + // doesn't care about this semantics can implement the straight rule + // interface. + // + virtual bool + match (action, target&, const string&, optional fallback) const; + + virtual bool + match (action, target&, const string&) const override; + + virtual void + dump (ostream&, string& indentation) const = 0; + + // Implementation details. + // + public: + build2::rule_match rule_match; + + static const dir_path recipes_build_dir; + + // Scope operation callback that cleans up ad hoc recipe builds. + // + static target_state + clean_recipes_build (action, const scope&, const dir&); + }; + + // Ad hoc script rule. + // + // Note: not exported and should not be used directly (i.e., registered). + // + class adhoc_script_rule: public adhoc_rule + { + public: + virtual bool + match (action, target&, const string&, optional) const override; + + virtual recipe + apply (action, target&) const override; + + target_state + perform_update_file (action, const target&) const; + + target_state + default_action (action, const target&) const; + + virtual void + dump (ostream&, string&) const override; + + using script_type = build::script::script; + + adhoc_script_rule (optional d, const location& l, size_t b) + : adhoc_rule (l, b), diag (move (d)) {} + + public: + const optional diag; // Command name for low-verbosity diag. + string checksum; // Script text hashsum. + script_type script; + + }; + + // Ad hoc C++ rule. + // + // Note: exported but should not be used directly (i.e., registered). + // + class LIBBUILD2_SYMEXPORT cxx_rule: public rule + { + public: + + // A robust recipe may want to incorporate the recipe_state into its + // up-to-date decision as if the recipe library was a prerequisite (it + // cannot be injected as a real prerequisite since it's from a different + // build context). + // + const location recipe_loc; // Buildfile location of the recipe. + const target_state recipe_state; // State of recipe library target. + + cxx_rule (const location& l, target_state s) + : recipe_loc (l), recipe_state (s) {} + + // Return true by default. + // + virtual bool + match (action, target&, const string&) const override; + }; + + // Note: not exported. + // + class adhoc_cxx_rule: public adhoc_rule + { + public: + virtual bool + match (action, target&, const string&) const override; + + virtual recipe + apply (action, target&) const override; + + virtual void + dump (ostream&, string&) const override; + + adhoc_cxx_rule (string c, const location& l, size_t b) + : adhoc_rule (l, b), code (move (c)), impl (nullptr) {} + + virtual + ~adhoc_cxx_rule () override; + + public: + // Note that this recipe (rule instance) can be shared between multiple + // targets which could all be matched in parallel. + // + const string code; + mutable atomic impl; + }; } #endif // LIBBUILD2_RULE_HXX diff --git a/libbuild2/script/builtin-options.cxx b/libbuild2/script/builtin-options.cxx new file mode 100644 index 0000000..2002764 --- /dev/null +++ b/libbuild2/script/builtin-options.cxx @@ -0,0 +1,661 @@ +// -*- C++ -*- +// +// This file was generated by CLI, a command line interface +// compiler for C++. +// + +// Begin prologue. +// +// +// End prologue. + +#include + +#include +#include +#include +#include +#include +#include + +namespace build2 +{ + namespace script + { + namespace cli + { + // unknown_option + // + unknown_option:: + ~unknown_option () throw () + { + } + + void unknown_option:: + print (::std::ostream& os) const + { + os << "unknown option '" << option ().c_str () << "'"; + } + + const char* unknown_option:: + what () const throw () + { + return "unknown option"; + } + + // unknown_argument + // + unknown_argument:: + ~unknown_argument () throw () + { + } + + void unknown_argument:: + print (::std::ostream& os) const + { + os << "unknown argument '" << argument ().c_str () << "'"; + } + + const char* unknown_argument:: + what () const throw () + { + return "unknown argument"; + } + + // missing_value + // + missing_value:: + ~missing_value () throw () + { + } + + void missing_value:: + print (::std::ostream& os) const + { + os << "missing value for option '" << option ().c_str () << "'"; + } + + const char* missing_value:: + what () const throw () + { + return "missing option value"; + } + + // invalid_value + // + invalid_value:: + ~invalid_value () throw () + { + } + + void invalid_value:: + print (::std::ostream& os) const + { + os << "invalid value '" << value ().c_str () << "' for option '" + << option ().c_str () << "'"; + + if (!message ().empty ()) + os << ": " << message ().c_str (); + } + + const char* invalid_value:: + what () const throw () + { + return "invalid option value"; + } + + // eos_reached + // + void eos_reached:: + print (::std::ostream& os) const + { + os << what (); + } + + const char* eos_reached:: + what () const throw () + { + return "end of argument stream reached"; + } + + // scanner + // + scanner:: + ~scanner () + { + } + + // argv_scanner + // + bool argv_scanner:: + more () + { + return i_ < argc_; + } + + const char* argv_scanner:: + peek () + { + if (i_ < argc_) + return argv_[i_]; + else + throw eos_reached (); + } + + const char* argv_scanner:: + next () + { + if (i_ < argc_) + { + const char* r (argv_[i_]); + + if (erase_) + { + for (int i (i_ + 1); i < argc_; ++i) + argv_[i - 1] = argv_[i]; + + --argc_; + argv_[argc_] = 0; + } + else + ++i_; + + return r; + } + else + throw eos_reached (); + } + + void argv_scanner:: + skip () + { + if (i_ < argc_) + ++i_; + else + throw eos_reached (); + } + + // vector_scanner + // + bool vector_scanner:: + more () + { + return i_ < v_.size (); + } + + const char* vector_scanner:: + peek () + { + if (i_ < v_.size ()) + return v_[i_].c_str (); + else + throw eos_reached (); + } + + const char* vector_scanner:: + next () + { + if (i_ < v_.size ()) + return v_[i_++].c_str (); + else + throw eos_reached (); + } + + void vector_scanner:: + skip () + { + if (i_ < v_.size ()) + ++i_; + else + throw eos_reached (); + } + + template + struct parser + { + static void + parse (X& x, bool& xs, scanner& s) + { + using namespace std; + + const char* o (s.next ()); + if (s.more ()) + { + string v (s.next ()); + istringstream is (v); + if (!(is >> x && is.peek () == istringstream::traits_type::eof ())) + throw invalid_value (o, v); + } + else + throw missing_value (o); + + xs = true; + } + }; + + template <> + struct parser + { + static void + parse (bool& x, scanner& s) + { + s.next (); + x = true; + } + }; + + template <> + struct parser + { + static void + parse (std::string& x, bool& xs, scanner& s) + { + const char* o (s.next ()); + + if (s.more ()) + x = s.next (); + else + throw missing_value (o); + + xs = true; + } + }; + + template + struct parser > + { + static void + parse (std::vector& c, bool& xs, scanner& s) + { + X x; + bool dummy; + parser::parse (x, dummy, s); + c.push_back (x); + xs = true; + } + }; + + template + struct parser > + { + static void + parse (std::set& c, bool& xs, scanner& s) + { + X x; + bool dummy; + parser::parse (x, dummy, s); + c.insert (x); + xs = true; + } + }; + + template + struct parser > + { + static void + parse (std::map& m, bool& xs, scanner& s) + { + const char* o (s.next ()); + + if (s.more ()) + { + std::string ov (s.next ()); + std::string::size_type p = ov.find ('='); + + K k = K (); + V v = V (); + std::string kstr (ov, 0, p); + std::string vstr (ov, (p != std::string::npos ? p + 1 : ov.size ())); + + int ac (2); + char* av[] = + { + const_cast (o), 0 + }; + + bool dummy; + if (!kstr.empty ()) + { + av[1] = const_cast (kstr.c_str ()); + argv_scanner s (0, ac, av); + parser::parse (k, dummy, s); + } + + if (!vstr.empty ()) + { + av[1] = const_cast (vstr.c_str ()); + argv_scanner s (0, ac, av); + parser::parse (v, dummy, s); + } + + m[k] = v; + } + else + throw missing_value (o); + + xs = true; + } + }; + + template + void + thunk (X& x, scanner& s) + { + parser::parse (x.*M, s); + } + + template + void + thunk (X& x, scanner& s) + { + parser::parse (x.*M, x.*S, s); + } + } + } +} + +#include +#include + +namespace build2 +{ + namespace script + { + // set_options + // + + set_options:: + set_options () + : exact_ (), + newline_ (), + whitespace_ () + { + } + + set_options:: + set_options (int& argc, + char** argv, + bool erase, + ::build2::script::cli::unknown_mode opt, + ::build2::script::cli::unknown_mode arg) + : exact_ (), + newline_ (), + whitespace_ () + { + ::build2::script::cli::argv_scanner s (argc, argv, erase); + _parse (s, opt, arg); + } + + set_options:: + set_options (int start, + int& argc, + char** argv, + bool erase, + ::build2::script::cli::unknown_mode opt, + ::build2::script::cli::unknown_mode arg) + : exact_ (), + newline_ (), + whitespace_ () + { + ::build2::script::cli::argv_scanner s (start, argc, argv, erase); + _parse (s, opt, arg); + } + + set_options:: + set_options (int& argc, + char** argv, + int& end, + bool erase, + ::build2::script::cli::unknown_mode opt, + ::build2::script::cli::unknown_mode arg) + : exact_ (), + newline_ (), + whitespace_ () + { + ::build2::script::cli::argv_scanner s (argc, argv, erase); + _parse (s, opt, arg); + end = s.end (); + } + + set_options:: + set_options (int start, + int& argc, + char** argv, + int& end, + bool erase, + ::build2::script::cli::unknown_mode opt, + ::build2::script::cli::unknown_mode arg) + : exact_ (), + newline_ (), + whitespace_ () + { + ::build2::script::cli::argv_scanner s (start, argc, argv, erase); + _parse (s, opt, arg); + end = s.end (); + } + + set_options:: + set_options (::build2::script::cli::scanner& s, + ::build2::script::cli::unknown_mode opt, + ::build2::script::cli::unknown_mode arg) + : exact_ (), + newline_ (), + whitespace_ () + { + _parse (s, opt, arg); + } + + typedef + std::map + _cli_set_options_map; + + static _cli_set_options_map _cli_set_options_map_; + + struct _cli_set_options_map_init + { + _cli_set_options_map_init () + { + _cli_set_options_map_["--exact"] = + &::build2::script::cli::thunk< set_options, bool, &set_options::exact_ >; + _cli_set_options_map_["-e"] = + &::build2::script::cli::thunk< set_options, bool, &set_options::exact_ >; + _cli_set_options_map_["--newline"] = + &::build2::script::cli::thunk< set_options, bool, &set_options::newline_ >; + _cli_set_options_map_["-n"] = + &::build2::script::cli::thunk< set_options, bool, &set_options::newline_ >; + _cli_set_options_map_["--whitespace"] = + &::build2::script::cli::thunk< set_options, bool, &set_options::whitespace_ >; + _cli_set_options_map_["-w"] = + &::build2::script::cli::thunk< set_options, bool, &set_options::whitespace_ >; + } + }; + + static _cli_set_options_map_init _cli_set_options_map_init_; + + bool set_options:: + _parse (const char* o, ::build2::script::cli::scanner& s) + { + _cli_set_options_map::const_iterator i (_cli_set_options_map_.find (o)); + + if (i != _cli_set_options_map_.end ()) + { + (*(i->second)) (*this, s); + return true; + } + + return false; + } + + bool set_options:: + _parse (::build2::script::cli::scanner& s, + ::build2::script::cli::unknown_mode opt_mode, + ::build2::script::cli::unknown_mode arg_mode) + { + // Can't skip combined flags (--no-combined-flags). + // + assert (opt_mode != ::build2::script::cli::unknown_mode::skip); + + bool r = false; + bool opt = true; + + while (s.more ()) + { + const char* o = s.peek (); + + if (std::strcmp (o, "--") == 0) + { + opt = false; + s.skip (); + r = true; + continue; + } + + if (opt) + { + if (_parse (o, s)) + { + r = true; + continue; + } + + if (std::strncmp (o, "-", 1) == 0 && o[1] != '\0') + { + // Handle combined option values. + // + std::string co; + if (const char* v = std::strchr (o, '=')) + { + co.assign (o, 0, v - o); + ++v; + + int ac (2); + char* av[] = + { + const_cast (co.c_str ()), + const_cast (v) + }; + + ::build2::script::cli::argv_scanner ns (0, ac, av); + + if (_parse (co.c_str (), ns)) + { + // Parsed the option but not its value? + // + if (ns.end () != 2) + throw ::build2::script::cli::invalid_value (co, v); + + s.next (); + r = true; + continue; + } + else + { + // Set the unknown option and fall through. + // + o = co.c_str (); + } + } + + // Handle combined flags. + // + char cf[3]; + { + const char* p = o + 1; + for (; *p != '\0'; ++p) + { + if (!((*p >= 'a' && *p <= 'z') || + (*p >= 'A' && *p <= 'Z') || + (*p >= '0' && *p <= '9'))) + break; + } + + if (*p == '\0') + { + for (p = o + 1; *p != '\0'; ++p) + { + std::strcpy (cf, "-"); + cf[1] = *p; + cf[2] = '\0'; + + int ac (1); + char* av[] = + { + cf + }; + + ::build2::script::cli::argv_scanner ns (0, ac, av); + + if (!_parse (cf, ns)) + break; + } + + if (*p == '\0') + { + // All handled. + // + s.next (); + r = true; + continue; + } + else + { + // Set the unknown option and fall through. + // + o = cf; + } + } + } + + switch (opt_mode) + { + case ::build2::script::cli::unknown_mode::skip: + { + s.skip (); + r = true; + continue; + } + case ::build2::script::cli::unknown_mode::stop: + { + break; + } + case ::build2::script::cli::unknown_mode::fail: + { + throw ::build2::script::cli::unknown_option (o); + } + } + + break; + } + } + + switch (arg_mode) + { + case ::build2::script::cli::unknown_mode::skip: + { + s.skip (); + r = true; + continue; + } + case ::build2::script::cli::unknown_mode::stop: + { + break; + } + case ::build2::script::cli::unknown_mode::fail: + { + throw ::build2::script::cli::unknown_argument (o); + } + } + + break; + } + + return r; + } + } +} + +// Begin epilogue. +// +// +// End epilogue. + diff --git a/libbuild2/script/builtin-options.hxx b/libbuild2/script/builtin-options.hxx new file mode 100644 index 0000000..5a3f153 --- /dev/null +++ b/libbuild2/script/builtin-options.hxx @@ -0,0 +1,339 @@ +// -*- C++ -*- +// +// This file was generated by CLI, a command line interface +// compiler for C++. +// + +#ifndef LIBBUILD2_SCRIPT_BUILTIN_OPTIONS_HXX +#define LIBBUILD2_SCRIPT_BUILTIN_OPTIONS_HXX + +// Begin prologue. +// +// +// End prologue. + +#include +#include +#include +#include +#include + +#ifndef CLI_POTENTIALLY_UNUSED +# if defined(_MSC_VER) || defined(__xlC__) +# define CLI_POTENTIALLY_UNUSED(x) (void*)&x +# else +# define CLI_POTENTIALLY_UNUSED(x) (void)x +# endif +#endif + +namespace build2 +{ + namespace script + { + namespace cli + { + class unknown_mode + { + public: + enum value + { + skip, + stop, + fail + }; + + unknown_mode (value); + + operator value () const + { + return v_; + } + + private: + value v_; + }; + + // Exceptions. + // + + class exception: public std::exception + { + public: + virtual void + print (::std::ostream&) const = 0; + }; + + ::std::ostream& + operator<< (::std::ostream&, const exception&); + + class unknown_option: public exception + { + public: + virtual + ~unknown_option () throw (); + + unknown_option (const std::string& option); + + const std::string& + option () const; + + virtual void + print (::std::ostream&) const; + + virtual const char* + what () const throw (); + + private: + std::string option_; + }; + + class unknown_argument: public exception + { + public: + virtual + ~unknown_argument () throw (); + + unknown_argument (const std::string& argument); + + const std::string& + argument () const; + + virtual void + print (::std::ostream&) const; + + virtual const char* + what () const throw (); + + private: + std::string argument_; + }; + + class missing_value: public exception + { + public: + virtual + ~missing_value () throw (); + + missing_value (const std::string& option); + + const std::string& + option () const; + + virtual void + print (::std::ostream&) const; + + virtual const char* + what () const throw (); + + private: + std::string option_; + }; + + class invalid_value: public exception + { + public: + virtual + ~invalid_value () throw (); + + invalid_value (const std::string& option, + const std::string& value, + const std::string& message = std::string ()); + + const std::string& + option () const; + + const std::string& + value () const; + + const std::string& + message () const; + + virtual void + print (::std::ostream&) const; + + virtual const char* + what () const throw (); + + private: + std::string option_; + std::string value_; + std::string message_; + }; + + class eos_reached: public exception + { + public: + virtual void + print (::std::ostream&) const; + + virtual const char* + what () const throw (); + }; + + // Command line argument scanner interface. + // + // The values returned by next() are guaranteed to be valid + // for the two previous arguments up until a call to a third + // peek() or next(). + // + class scanner + { + public: + virtual + ~scanner (); + + virtual bool + more () = 0; + + virtual const char* + peek () = 0; + + virtual const char* + next () = 0; + + virtual void + skip () = 0; + }; + + class argv_scanner: public scanner + { + public: + argv_scanner (int& argc, char** argv, bool erase = false); + argv_scanner (int start, int& argc, char** argv, bool erase = false); + + int + end () const; + + virtual bool + more (); + + virtual const char* + peek (); + + virtual const char* + next (); + + virtual void + skip (); + + private: + int i_; + int& argc_; + char** argv_; + bool erase_; + }; + + class vector_scanner: public scanner + { + public: + vector_scanner (const std::vector&, std::size_t start = 0); + + std::size_t + end () const; + + void + reset (std::size_t start = 0); + + virtual bool + more (); + + virtual const char* + peek (); + + virtual const char* + next (); + + virtual void + skip (); + + private: + const std::vector& v_; + std::size_t i_; + }; + + template + struct parser; + } + } +} + +namespace build2 +{ + namespace script + { + class set_options + { + public: + set_options (); + + set_options (int& argc, + char** argv, + bool erase = false, + ::build2::script::cli::unknown_mode option = ::build2::script::cli::unknown_mode::fail, + ::build2::script::cli::unknown_mode argument = ::build2::script::cli::unknown_mode::stop); + + set_options (int start, + int& argc, + char** argv, + bool erase = false, + ::build2::script::cli::unknown_mode option = ::build2::script::cli::unknown_mode::fail, + ::build2::script::cli::unknown_mode argument = ::build2::script::cli::unknown_mode::stop); + + set_options (int& argc, + char** argv, + int& end, + bool erase = false, + ::build2::script::cli::unknown_mode option = ::build2::script::cli::unknown_mode::fail, + ::build2::script::cli::unknown_mode argument = ::build2::script::cli::unknown_mode::stop); + + set_options (int start, + int& argc, + char** argv, + int& end, + bool erase = false, + ::build2::script::cli::unknown_mode option = ::build2::script::cli::unknown_mode::fail, + ::build2::script::cli::unknown_mode argument = ::build2::script::cli::unknown_mode::stop); + + set_options (::build2::script::cli::scanner&, + ::build2::script::cli::unknown_mode option = ::build2::script::cli::unknown_mode::fail, + ::build2::script::cli::unknown_mode argument = ::build2::script::cli::unknown_mode::stop); + + // Option accessors. + // + const bool& + exact () const; + + const bool& + newline () const; + + const bool& + whitespace () const; + + // Implementation details. + // + protected: + bool + _parse (const char*, ::build2::script::cli::scanner&); + + private: + bool + _parse (::build2::script::cli::scanner&, + ::build2::script::cli::unknown_mode option, + ::build2::script::cli::unknown_mode argument); + + public: + bool exact_; + bool newline_; + bool whitespace_; + }; + } +} + +#include + +// Begin epilogue. +// +// +// End epilogue. + +#endif // LIBBUILD2_SCRIPT_BUILTIN_OPTIONS_HXX diff --git a/libbuild2/script/builtin-options.ixx b/libbuild2/script/builtin-options.ixx new file mode 100644 index 0000000..dc59f98 --- /dev/null +++ b/libbuild2/script/builtin-options.ixx @@ -0,0 +1,182 @@ +// -*- C++ -*- +// +// This file was generated by CLI, a command line interface +// compiler for C++. +// + +// Begin prologue. +// +// +// End prologue. + +#include + +namespace build2 +{ + namespace script + { + namespace cli + { + // unknown_mode + // + inline unknown_mode:: + unknown_mode (value v) + : v_ (v) + { + } + + // exception + // + inline ::std::ostream& + operator<< (::std::ostream& os, const exception& e) + { + e.print (os); + return os; + } + + // unknown_option + // + inline unknown_option:: + unknown_option (const std::string& option) + : option_ (option) + { + } + + inline const std::string& unknown_option:: + option () const + { + return option_; + } + + // unknown_argument + // + inline unknown_argument:: + unknown_argument (const std::string& argument) + : argument_ (argument) + { + } + + inline const std::string& unknown_argument:: + argument () const + { + return argument_; + } + + // missing_value + // + inline missing_value:: + missing_value (const std::string& option) + : option_ (option) + { + } + + inline const std::string& missing_value:: + option () const + { + return option_; + } + + // invalid_value + // + inline invalid_value:: + invalid_value (const std::string& option, + const std::string& value, + const std::string& message) + : option_ (option), + value_ (value), + message_ (message) + { + } + + inline const std::string& invalid_value:: + option () const + { + return option_; + } + + inline const std::string& invalid_value:: + value () const + { + return value_; + } + + inline const std::string& invalid_value:: + message () const + { + return message_; + } + + // argv_scanner + // + inline argv_scanner:: + argv_scanner (int& argc, char** argv, bool erase) + : i_ (1), argc_ (argc), argv_ (argv), erase_ (erase) + { + } + + inline argv_scanner:: + argv_scanner (int start, int& argc, char** argv, bool erase) + : i_ (start), argc_ (argc), argv_ (argv), erase_ (erase) + { + } + + inline int argv_scanner:: + end () const + { + return i_; + } + + // vector_scanner + // + inline vector_scanner:: + vector_scanner (const std::vector& v, std::size_t i) + : v_ (v), i_ (i) + { + } + + inline std::size_t vector_scanner:: + end () const + { + return i_; + } + + inline void vector_scanner:: + reset (std::size_t i) + { + i_ = i; + } + } + } +} + +namespace build2 +{ + namespace script + { + // set_options + // + + inline const bool& set_options:: + exact () const + { + return this->exact_; + } + + inline const bool& set_options:: + newline () const + { + return this->newline_; + } + + inline const bool& set_options:: + whitespace () const + { + return this->whitespace_; + } + } +} + +// Begin epilogue. +// +// +// End epilogue. diff --git a/libbuild2/script/builtin.cli b/libbuild2/script/builtin.cli new file mode 100644 index 0000000..68db23e --- /dev/null +++ b/libbuild2/script/builtin.cli @@ -0,0 +1,21 @@ +// file : libbuild2/script/builtin.cli +// license : MIT; see accompanying LICENSE file + +// Note that options in this file are undocumented because we generate neither +// the usage printing code nor man pages. Instead, they are documented in the +// Testscript Language Manual's builtin descriptions. +// +namespace build2 +{ + namespace script + { + // Pseudo-builtin options. + // + class set_options + { + bool --exact|-e; + bool --newline|-n; + bool --whitespace|-w; + }; + } +} diff --git a/libbuild2/script/lexer+command-expansion.test.testscript b/libbuild2/script/lexer+command-expansion.test.testscript new file mode 100644 index 0000000..f4d69d2 --- /dev/null +++ b/libbuild2/script/lexer+command-expansion.test.testscript @@ -0,0 +1,321 @@ +# file : libbuild2/script/lexer+command-expansion.test.testscript +# license : MIT; see accompanying LICENSE file + +test.arguments = command-expansion + +: pass-redirect +: +{ + : in + : + $* <:"0<|" >>EOO + '0' + <| + EOO + + : arg-in + : + $* <:"0 <|" >>EOO + '0 ' + <| + EOO + + : out + : + $* <:"1>|" >>EOO + '1' + >| + EOO + + : arg-out + : + $* <:"1 >|" >>EOO + '1 ' + >| + EOO +} + +: null-redirect +: +{ + : in + : + $* <:"0<-" >>EOO + '0' + <- + EOO + + : arg-in + : + $* <:"0 <-" >>EOO + '0 ' + <- + EOO + + : out + : + $* <:"1>-" >>EOO + '1' + >- + EOO + + : arg-out + : + $* <:"1 >-" >>EOO + '1 ' + >- + EOO +} + +: trace-redirect +: +{ + : out + : + $* <:"1>!" >>EOO + '1' + >! + EOO + + : arg-out + : + $* <:"1 >!" >>EOO + '1 ' + >! + EOO +} + +: merge-redirect +: +{ + : out + : + $* <:"1>&2" >>EOO + '1' + >& + '2' + EOO + + : arg-out + : + $* <:"1 >&2" >>EOO + '1 ' + >& + '2' + EOO +} + +: str-redirect +: +{ + : in + : + { + : newline + : + $* <:"0<<<=a b" >>EOO + '0' + <<<= + 'a b' + EOO + + : no-newline + : + $* <:"0<<<=:a b" >>EOO + '0' + <<<=: + 'a b' + EOO + } + + : in-alias + : + { + : newline + : + $* <:"0<<>EOO + '0' + <<< + 'a b' + EOO + + : no-newline + : + $* <:"0<<<:a b" >>EOO + '0' + <<<: + 'a b' + EOO + } + + : out + : + { + : newline + : + $* <:"1>>>?a b" >>EOO + '1' + >>>? + 'a b' + EOO + + : no-newline + : + $* <:"1>>>?:a b" >>EOO + '1' + >>>?: + 'a b' + EOO + } +} + +: doc-redirect +: +{ + : in + : + { + : newline + : + $* <:"0<<=E O I" >>EOO + '0' + <<= + 'E O I' + EOO + + : no-newline + : + $* <:"0<<=:E O I" >>EOO + '0' + <<=: + 'E O I' + EOO + } + + : in-alias + : + { + : newline + : + $* <:"0<>EOO + '0' + << + 'E O I' + EOO + + : no-newline + : + $* <:"0<<:E O I" >>EOO + '0' + <<: + 'E O I' + EOO + } + + : out + : + { + : newline + : + $* <:"1>>?E O O" >>EOO + '1' + >>? + 'E O O' + EOO + + : no-newline + : + $* <:"1>>?:E O O" >>EOO + '1' + >>?: + 'E O O' + EOO + } +} + +: file-redirect +: +{ + : in + : + $* <:"0<=a b" >>EOO + '0' + <= + 'a b' + EOO + + : in-alias + : + $* <:"0>EOO + '0' + < + 'a b' + EOO + + : out + : + $* <:"1>=a b" >>EOO + '1' + >= + 'a b' + EOO + + : out-alias + : + $* <:"1>a b" >>EOO + '1' + > + 'a b' + EOO + + : out-app + : + $* <:"1>+a b" >>EOO + '1' + >+ + 'a b' + EOO + + : out-app-alias + : + $* <:"1>>a b" >>EOO + '1' + >> + 'a b' + EOO +} + +: no-out-alias +: +$* <:"1>>>a b" >>EOO +'1' +>> +> +'a b' +EOO + + +: cleanup +: +{ + : always + : + $* <:"&file" >>EOO + & + 'file' + EOO + + : maybe + : + $* <:"&?file" >>EOO + &? + 'file' + EOO + + : never + : + $* <:"&!file" >>EOO + &! + 'file' + EOO +} diff --git a/libbuild2/script/lexer.cxx b/libbuild2/script/lexer.cxx new file mode 100644 index 0000000..d78e999 --- /dev/null +++ b/libbuild2/script/lexer.cxx @@ -0,0 +1,431 @@ +// file : libbuild2/script/lexer.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include // strchr() + +using namespace std; + +namespace build2 +{ + namespace script + { + using type = token_type; + + void lexer:: + mode (base_mode m, char ps, optional esc, uintptr_t data) + { + bool a (false); // attributes + + const char* s1 (nullptr); + const char* s2 (nullptr); + + bool s (true); // space + bool n (true); // newline + bool q (true); // quotes + + if (!esc) + { + assert (!state_.empty ()); + esc = state_.top ().escapes; + } + + switch (m) + { + case lexer_mode::command_expansion: + { + // Note that whitespaces are not word separators in this mode. + // + s1 = "|&<>"; + s2 = " "; + s = false; + break; + } + case lexer_mode::here_line_single: + { + // This one is like a single-quoted string except it treats + // newlines as a separator. We also treat quotes as literals. + // + // Note that it might be tempting to enable line continuation + // escapes. However, we will then have to also enable escaping of + // the backslash, which makes it a lot less tempting. + // + s1 = "\n"; + s2 = " "; + esc = ""; // Disable escape sequences. + s = false; + q = false; + break; + } + case lexer_mode::here_line_double: + { + // This one is like a double-quoted string except it treats + // newlines as a separator. We also treat quotes as literals. + // + s1 = "$(\n"; + s2 = " "; + s = false; + q = false; + break; + } + default: + { + // Make sure pair separators are only enabled where we expect + // them. + // + // @@ Should we disable pair separators in the eval mode? + // + assert (ps == '\0' || + m == lexer_mode::eval || + m == lexer_mode::attribute_value); + + base_lexer::mode (m, ps, esc); + return; + } + } + + assert (ps == '\0'); + state_.push (state {m, data, nullopt, a, ps, s, n, q, *esc, s1, s2}); + } + + token lexer:: + next () + { + token r; + + switch (state_.top ().mode) + { + case lexer_mode::command_expansion: + case lexer_mode::here_line_single: + case lexer_mode::here_line_double: + r = next_line (); + break; + default: + r = base_lexer::next (); + break; + } + + if (r.qtype != quote_type::unquoted) + ++quoted_; + + return r; + } + + token lexer:: + next_line () + { + bool sep (skip_spaces ().first); + + xchar c (get ()); + uint64_t ln (c.line), cn (c.column); + + const state& st (state_.top ()); + lexer_mode m (st.mode); + + auto make_token = [&sep, &m, ln, cn] (type t) + { + bool q (m == lexer_mode::here_line_double); + + return token (t, string (), sep, + (q ? quote_type::double_ : quote_type::unquoted), q, + ln, cn, + token_printer); + }; + + if (eos (c)) + return make_token (type::eos); + + // NOTE: remember to update mode() if adding new special characters. + + if (m != lexer_mode::command_expansion) + { + switch (c) + { + case '\n': + { + sep = true; // Treat newline as always separated. + return make_token (type::newline); + } + } + } + + if (m != lexer_mode::here_line_single) + { + switch (c) + { + // Variable expansion, function call, and evaluation context. + // + case '$': return make_token (type::dollar); + case '(': return make_token (type::lparen); + } + } + + // Command operators. + // + if (m == lexer_mode::command_expansion) + { + if (optional t = next_cmd_op (c, sep)) + return move (*t); + } + + // Otherwise it is a word. + // + unget (c); + return word (st, sep); + } + + optional lexer:: + next_cmd_op (const xchar& c, bool sep) + { + auto make_token = [&sep, &c] (type t, string v = string ()) + { + return token (t, move (v), sep, + quote_type::unquoted, false, + c.line, c.column, + token_printer); + }; + + auto make_token_with_modifiers = + [&make_token, this] (type t, + const char* mods, // To recorgnize. + const char* stop = nullptr) // To stop after. + { + string v; + if (mods != nullptr) + { + for (xchar p (peek ()); + (strchr (mods, p) != nullptr && // Modifier. + strchr (v.c_str (), p) == nullptr); // Not already seen. + p = peek ()) + { + get (); + v += p; + + if (stop != nullptr && strchr (stop, p) != nullptr) + break; + } + } + + return make_token (t, move (v)); + }; + + switch (c) + { + // |, || + // + case '|': + { + if (peek () == '|') + { + get (); + return make_token (type::log_or); + } + else + return make_token (type::pipe); + } + // &, && + // + case '&': + { + xchar p (peek ()); + + if (p == '&') + { + get (); + return make_token (type::log_and); + } + + // These modifiers are mutually exclusive so stop after seeing + // either one. + // + return make_token_with_modifiers (type::clean, "!?", "!?"); + } + // < + // + case '<': + { + optional r; + xchar p (peek ()); + + if (p == '|' || p == '-' || p == '=' || p == '<') // <| <- <= << + { + xchar c (get ()); + + switch (p) + { + case '|': return make_token (type::in_pass); // <| + case '-': return make_token (type::in_null); // <- + case '=': return make_token (type::in_file); // <= + case '<': // << + { + p = peek (); + + if (p == '=' || p == '<') // <<= <<< + { + xchar c (get ()); + + switch (p) + { + case '=': + { + r = type::in_doc; // <<= + break; + } + case '<': + { + p = peek (); + + if (p == '=') + { + get (); + r = type::in_str; // <<<= + } + + if (!r && redirect_aliases.lll) + r = type::in_lll; // <<< + + // We can still end up with the << or < redirect alias, + // if any of them is present. + // + if (!r) + unget (c); + } + + break; + } + } + + if (!r && redirect_aliases.ll) + r = type::in_ll; // << + + // We can still end up with the < redirect alias, if it is + // present. + // + if (!r) + unget (c); + + break; + } + } + } + + if (!r && redirect_aliases.l) + r = type::in_l; // < + + if (!r) + return nullopt; + + // Handle modifiers. + // + const char* mods (nullptr); + + switch (redirect_aliases.resolve (*r)) + { + case type::in_str: + case type::in_doc: mods = ":/"; break; + } + + token t (make_token_with_modifiers (*r, mods)); + + return t; + } + // > + // + case '>': + { + optional r; + xchar p (peek ()); + + if (p == '|' || p == '-' || p == '!' || p == '&' || // >| >- >! >& + p == '=' || p == '+' || p == '?' || p == '>') // >= >+ >? >> + { + xchar c (get ()); + + switch (p) + { + case '|': return make_token (type::out_pass); // >| + case '-': return make_token (type::out_null); // >- + case '!': return make_token (type::out_trace); // >! + case '&': return make_token (type::out_merge); // >& + case '=': return make_token (type::out_file_ovr); // >= + case '+': return make_token (type::out_file_app); // >+ + case '?': return make_token (type::out_file_cmp); // >? + case '>': // >> + { + p = peek (); + + if (p == '?' || p == '>') // >>? >>> + { + xchar c (get ()); + + switch (p) + { + case '?': + { + r = type::out_doc; // >>? + break; + } + case '>': + { + p = peek (); + + if (p == '?') + { + get (); + r = type::out_str; // >>>? + } + + if (!r && redirect_aliases.ggg) + r = type::out_ggg; // >>> + + // We can still end up with the >> or > redirect alias, + // if any of themis present. + // + if (!r) + unget (c); + } + + break; + } + } + + if (!r && redirect_aliases.gg) + r = type::out_gg; // >> + + // We can still end up with the > redirect alias, if it is + // present. + // + if (!r) + unget (c); + + break; + } + } + } + + if (!r && redirect_aliases.g) + r = type::out_g; // > + + if (!r) + return nullopt; + + // Handle modifiers. + // + const char* mods (nullptr); + const char* stop (nullptr); + + switch (redirect_aliases.resolve (*r)) + { + case type::out_str: + case type::out_doc: mods = ":/~"; stop = "~"; break; + } + + return make_token_with_modifiers (*r, mods, stop); + } + } + + return nullopt; + } + } +} diff --git a/libbuild2/script/lexer.hxx b/libbuild2/script/lexer.hxx new file mode 100644 index 0000000..dbfdfcc --- /dev/null +++ b/libbuild2/script/lexer.hxx @@ -0,0 +1,139 @@ +// file : libbuild2/script/lexer.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_SCRIPT_LEXER_HXX +#define LIBBUILD2_SCRIPT_LEXER_HXX + +#include +#include + +#include + +#include + +namespace build2 +{ + namespace script + { + struct lexer_mode: build2::lexer_mode + { + using base_type = build2::lexer_mode; + + enum + { + command_expansion = base_type::value_next, + here_line_single, + here_line_double, + + value_next + }; + + lexer_mode () = default; + lexer_mode (value_type v): base_type (v) {} + lexer_mode (base_type v): base_type (v) {} + }; + + // Actual redirects (as tokens) for the the <, <<, <<<, and >, >>, >>> + // aliases. + // + struct redirect_aliases + { + optional l; // < + optional ll; // << + optional lll; // <<< + optional g; // > + optional gg; // >> + optional ggg; // >>> + + // If the token type is a redirect alias then return the token type it + // resolves to and the passed token type otherwise. Note that it's the + // caller's responsibility to make sure that the corresponding alias is + // present (normally by not recognizing absent aliases as tokens). + // + token_type + resolve (token_type t) const noexcept + { + switch (t) + { + case token_type::in_l: assert (l); return *l; + case token_type::in_ll: assert (ll); return *ll; + case token_type::in_lll: assert (lll); return *lll; + case token_type::out_g: assert (g); return *g; + case token_type::out_gg: assert (gg); return *gg; + case token_type::out_ggg: assert (ggg); return *ggg; + } + + return t; + } + }; + + class lexer: public build2::lexer + { + public: + using base_lexer = build2::lexer; + using base_mode = build2::lexer_mode; + + using redirect_aliases_type = script::redirect_aliases; + + // Note that none of the name, redirect aliases, and escape arguments + // are copied. + // + lexer (istream& is, + const path_name& name, + lexer_mode m, + const redirect_aliases_type& ra, + const char* escapes = nullptr) + : base_lexer (is, name, 1 /* line */, + nullptr /* escapes */, + false /* set_mode */), + redirect_aliases (ra) + { + mode (m, '\0', escapes); + } + + virtual void + mode (base_mode, + char = '\0', + optional = nullopt, + uintptr_t = 0) override; + + // Number of quoted (double or single) tokens since last reset. + // + size_t + quoted () const {return quoted_;} + + void + reset_quoted (size_t q) {quoted_ = q;} + + virtual token + next () override; + + public: + const redirect_aliases_type& redirect_aliases; + + protected: + lexer (istream& is, const path_name& name, uint64_t line, + const char* escapes, + bool set_mode, + const redirect_aliases_type& ra) + : base_lexer (is, name, line, escapes, set_mode), + redirect_aliases (ra) {} + + // Return the next token if it is a command operator (|, ||, &&, + // redirect, or cleanup) and nullopt otherwise. + // + optional + next_cmd_op (const xchar&, // The token first character (last got char). + bool sep); // The token is separated. + + private: + token + next_line (); + + protected: + size_t quoted_; + }; + } +} + +#endif // LIBBUILD2_SCRIPT_LEXER_HXX diff --git a/libbuild2/script/lexer.test.cxx b/libbuild2/script/lexer.test.cxx new file mode 100644 index 0000000..b8de241 --- /dev/null +++ b/libbuild2/script/lexer.test.cxx @@ -0,0 +1,76 @@ +// file : libbuild2/script/lexer.test.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include +#include + +#include +#include + +#include +#include + +using namespace std; + +namespace build2 +{ + namespace script + { + // Usage: argv[0] + // + int + main (int argc, char* argv[]) + { + lexer_mode m; + { + assert (argc == 2); + string s (argv[1]); + + if (s == "command-expansion") m = lexer_mode::command_expansion; + else if (s == "here-line-single") m = lexer_mode::here_line_single; + else if (s == "here-line-double") m = lexer_mode::here_line_double; + else assert (false); + } + + try + { + cin.exceptions (istream::failbit | istream::badbit); + + path_name in (""); + + using type = token_type; + + redirect_aliases ra {type (type::in_file), + type (type::in_doc), + type (type::in_str), + type (type::out_file_ovr), + type (type::out_file_app), + nullopt}; + + lexer l (cin, in, m, ra); + + // No use printing eos since we will either get it or loop forever. + // + for (token t (l.next ()); t.type != token_type::eos; t = l.next ()) + { + // Print each token on a separate line without quoting operators. + // + t.printer (cout, t, print_mode::normal); + cout << endl; + } + } + catch (const failed&) + { + return 1; + } + + return 0; + } + } +} + +int +main (int argc, char* argv[]) +{ + return build2::script::main (argc, argv); +} diff --git a/libbuild2/script/parser.cxx b/libbuild2/script/parser.cxx new file mode 100644 index 0000000..aa60111 --- /dev/null +++ b/libbuild2/script/parser.cxx @@ -0,0 +1,2015 @@ +// file : libbuild2/script/parser.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include +#include // exit +#include + +using namespace std; + +namespace build2 +{ + namespace script + { + using type = token_type; + + value parser:: + parse_variable_line (token& t, type& tt) + { + // enter: assignment + // leave: newline or unknown token + + next_with_attributes (t, tt); + + // Parse value attributes if any. Note that it's ok not to have + // anything after the attributes (e.g., foo=[null]). + // + attributes_push (t, tt, true); + + // @@ PAT: Should we expand patterns? Note that it will only be + // simple ones since we have disabled {}. Also, what would be the + // pattern base directory? + // + return tt != type::newline && start_names (tt) + ? parse_value (t, tt, + pattern_mode::ignore, + "variable value", + nullptr) + : value (names ()); + } + + // Parse the regular expression representation (non-empty string value + // framed with introducer characters and optionally followed by flag + // characters from the {di} set, for example '/foo/id') into + // components. Also return end-of-parsing position if requested, + // otherwise treat any unparsed characters left as an error. + // + struct regex_parts + { + string value; + char intro; + string flags; // Combination of characters from {di} set. + + // Create a special empty object. + // + regex_parts (): intro ('\0') {} + + regex_parts (string v, char i, string f) + : value (move (v)), intro (i), flags (move (f)) {} + }; + + static regex_parts + parse_regex (const string& s, + const location& l, + const char* what, + size_t* end = nullptr) + { + if (s.empty ()) + fail (l) << "no introducer character in " << what; + + size_t p (s.find (s[0], 1)); // Find terminating introducer. + + if (p == string::npos) + fail (l) << "no closing introducer character in " << what; + + size_t rn (p - 1); // Regex length. + if (rn == 0) + fail (l) << what << " is empty"; + + // Find end-of-flags position. + // + size_t fp (++p); // Save flags starting position. + for (char c; (c = s[p]) == 'd' || c == 'i'; ++p) ; + + // If string end is not reached then report invalid flags, unless + // end-of-parsing position is requested (which means regex is just a + // prefix). + // + if (s[p] != '\0' && end == nullptr) + fail (l) << "junk at the end of " << what; + + if (end != nullptr) + *end = p; + + return regex_parts (string (s, 1, rn), s[0], string (s, fp, p - fp)); + } + + pair parser:: + parse_command_expr (token& t, type& tt, + const redirect_aliases& ra) + { + // enter: first token of the command line + // leave: or unknown token + + command_expr expr; + + // OR-ed to an implied false for the first term. + // + expr.push_back ({expr_operator::log_or, command_pipe ()}); + + command c; // Command being assembled. + + // Make sure the command makes sense. + // + auto check_command = [&c, this] (const location& l, bool last) + { + if (c.out && c.out->type == redirect_type::merge && + c.err && c.err->type == redirect_type::merge) + fail (l) << "stdout and stderr redirected to each other"; + + if (!last && c.out) + fail (l) << "stdout is both redirected and piped"; + }; + + // Check that the introducer character differs from '/' if the + // portable path modifier is specified. Must be called before + // parse_regex() (see below) to make sure its diagnostics is + // meaningful. + // + // Note that the portable path modifier assumes '/' to be a valid + // regex character and so makes it indistinguishable from the + // terminating introducer. + // + auto check_regex_mod = [this] (const string& mod, + const string& re, + const location& l, + const char* what) + { + // Handles empty regex properly. + // + if (mod.find ('/') != string::npos && re[0] == '/') + fail (l) << "portable path modifier and '/' introducer in " + << what; + }; + + // Pending positions where the next word should go. + // + enum class pending + { + none, + program, + in_string, + in_document, + in_file, + out_merge, + out_string, + out_str_regex, + out_document, + out_doc_regex, + out_file, + err_merge, + err_string, + err_str_regex, + err_document, + err_doc_regex, + err_file, + clean + }; + pending p (pending::program); + string mod; // Modifiers for pending in_* and out_* positions. + here_docs hd; // Expected here-documents. + + // Add the next word to either one of the pending positions or to + // program arguments by default. + // + auto add_word = [&c, &p, &mod, &check_regex_mod, this] ( + string&& w, const location& l) + { + auto add_merge = [&l, this] (optional& r, + const string& w, + int fd) + { + assert (r); // Must already be present. + + try + { + size_t n; + if (stoi (w, &n) == fd && n == w.size ()) + { + r->fd = fd; + return; + } + } + catch (const exception&) {} // Fall through. + + fail (l) << (fd == 1 ? "stderr" : "stdout") << " merge redirect " + << "file descriptor must be " << fd; + }; + + auto add_here_str = [] (optional& r, string&& w) + { + assert (r); // Must already be present. + + if (r->modifiers ().find (':') == string::npos) + w += '\n'; + r->str = move (w); + }; + + auto add_here_str_regex = [&l, &check_regex_mod] ( + optional& r, int fd, string&& w) + { + assert (r); // Must already be present. + + const char* what (nullptr); + switch (fd) + { + case 1: what = "stdout regex redirect"; break; + case 2: what = "stderr regex redirect"; break; + } + + check_regex_mod (r->modifiers (), w, l, what); + + regex_parts rp (parse_regex (w, l, what)); + + regex_lines& re (r->regex); + re.intro = rp.intro; + + re.lines.emplace_back ( + l.line, l.column, move (rp.value), move (rp.flags)); + + // Add final blank line unless suppressed. + // + // Note that the position is synthetic, but that's ok as we don't + // expect any diagnostics to refer this line. + // + if (r->modifiers ().find (':') == string::npos) + re.lines.emplace_back (l.line, l.column, string (), false); + }; + + auto parse_path = [&l, this] (string&& w, const char* what) -> path + { + try + { + path p (move (w)); + + if (!p.empty ()) + { + p.normalize (); + return p; + } + + fail (l) << "empty " << what << endf; + } + catch (const invalid_path& e) + { + fail (l) << "invalid " << what << " '" << e.path << "'" << endf; + } + }; + + auto add_file = [&parse_path] (optional& r, + int fd, + string&& w) + { + assert (r); // Must already be present. + + const char* what (nullptr); + switch (fd) + { + case 0: what = "stdin redirect path"; break; + case 1: what = "stdout redirect path"; break; + case 2: what = "stderr redirect path"; break; + } + + r->file.path = parse_path (move (w), what); + }; + + switch (p) + { + case pending::none: c.arguments.push_back (move (w)); break; + case pending::program: + c.program = parse_path (move (w), "program path"); + break; + + case pending::out_merge: add_merge (c.out, w, 2); break; + case pending::err_merge: add_merge (c.err, w, 1); break; + + case pending::in_string: add_here_str (c.in, move (w)); break; + case pending::out_string: add_here_str (c.out, move (w)); break; + case pending::err_string: add_here_str (c.err, move (w)); break; + + case pending::out_str_regex: + { + add_here_str_regex (c.out, 1, move (w)); + break; + } + case pending::err_str_regex: + { + add_here_str_regex (c.err, 2, move (w)); + break; + } + + // These are handled specially below. + // + case pending::in_document: + case pending::out_document: + case pending::err_document: + case pending::out_doc_regex: + case pending::err_doc_regex: assert (false); break; + + case pending::in_file: add_file (c.in, 0, move (w)); break; + case pending::out_file: add_file (c.out, 1, move (w)); break; + case pending::err_file: add_file (c.err, 2, move (w)); break; + + case pending::clean: + { + cleanup_type t; + switch (mod[0]) // Ok, if empty + { + case '!': t = cleanup_type::never; break; + case '?': t = cleanup_type::maybe; break; + default: t = cleanup_type::always; break; + } + + c.cleanups.push_back ( + {t, parse_path (move (w), "cleanup path")}); + break; + } + } + + p = pending::none; + mod.clear (); + }; + + // Make sure we don't have any pending positions to fill. + // + auto check_pending = [&p, this] (const location& l) + { + const char* what (nullptr); + + switch (p) + { + case pending::none: break; + case pending::program: what = "program"; break; + case pending::in_string: what = "stdin here-string"; break; + case pending::in_document: what = "stdin here-document end"; break; + case pending::in_file: what = "stdin file"; break; + case pending::out_merge: what = "stdout file descriptor"; break; + case pending::out_string: what = "stdout here-string"; break; + case pending::out_document: what = "stdout here-document end"; break; + case pending::out_file: what = "stdout file"; break; + case pending::err_merge: what = "stderr file descriptor"; break; + case pending::err_string: what = "stderr here-string"; break; + case pending::err_document: what = "stderr here-document end"; break; + case pending::err_file: what = "stderr file"; break; + case pending::clean: what = "cleanup path"; break; + + case pending::out_str_regex: + { + what = "stdout here-string regex"; + break; + } + case pending::err_str_regex: + { + what = "stderr here-string regex"; + break; + } + case pending::out_doc_regex: + { + what = "stdout here-document regex end"; + break; + } + case pending::err_doc_regex: + { + what = "stderr here-document regex end"; + break; + } + } + + if (what != nullptr) + fail (l) << "missing " << what; + }; + + // Parse the redirect operator. + // + // If the token type is the redirect alias then tt must contain the type + // the alias resolves to and the token type otherwise. Note that this + // argument defines the redirect semantics. Also note that the token is + // saved into the redirect to keep the modifiers and the original + // representation. + // + auto parse_redirect = [&c, &expr, &p, &mod, &hd, this] + (token&& t, type tt, const location& l) + { + // The redirect alias token type must be resolved. + // + assert (tt != type::in_l && + tt != type::in_ll && + tt != type::in_lll && + tt != type::out_g && + tt != type::out_gg && + tt != type::out_ggg); + + // Our semantics is the last redirect seen takes effect. + // + assert (p == pending::none && mod.empty ()); + + // See if we have the file descriptor. + // + unsigned long fd (3); + if (!t.separated) + { + if (c.arguments.empty ()) + fail (l) << "missing redirect file descriptor"; + + const string& s (c.arguments.back ()); + + try + { + size_t n; + fd = stoul (s, &n); + + if (n != s.size () || fd > 2) + throw invalid_argument (string ()); + } + catch (const exception&) + { + fail (l) << "invalid redirect file descriptor '" << s << "'"; + } + + c.arguments.pop_back (); + } + + // Validate/set default file descriptor. + // + switch (tt) + { + case type::in_pass: + case type::in_null: + case type::in_str: + case type::in_doc: + case type::in_file: + { + if ((fd = fd == 3 ? 0 : fd) != 0) + fail (l) << "invalid in redirect file descriptor " << fd; + + if (!expr.back ().pipe.empty ()) + fail (l) << "stdin is both piped and redirected"; + + break; + } + case type::out_pass: + case type::out_null: + case type::out_trace: + case type::out_merge: + case type::out_str: + case type::out_doc: + case type::out_file_cmp: + case type::out_file_ovr: + case type::out_file_app: + { + if ((fd = fd == 3 ? 1 : fd) == 0) + fail (l) << "invalid out redirect file descriptor " << fd; + + break; + } + } + + // Don't move as we will save the token into the redirect object. + // + mod = t.value; + + // Handle the none redirect (no data allowed) in the switch construct + // if/when the respective syntax is invented. + // + redirect_type rt (redirect_type::none); + switch (tt) + { + case type::in_pass: + case type::out_pass: rt = redirect_type::pass; break; + + case type::in_null: + case type::out_null: rt = redirect_type::null; break; + + case type::out_trace: rt = redirect_type::trace; break; + + case type::out_merge: rt = redirect_type::merge; break; + + case type::in_str: + case type::out_str: + { + bool re (mod.find ('~') != string::npos); + assert (tt == type::out_str || !re); + + rt = re + ? redirect_type::here_str_regex + : redirect_type::here_str_literal; + + break; + } + + case type::in_doc: + case type::out_doc: + { + bool re (mod.find ('~') != string::npos); + assert (tt == type::out_doc || !re); + + rt = re + ? redirect_type::here_doc_regex + : redirect_type::here_doc_literal; + + break; + } + + case type::in_file: + case type::out_file_cmp: + case type::out_file_ovr: + case type::out_file_app: rt = redirect_type::file; break; + } + + optional& r (fd == 0 ? c.in : + fd == 1 ? c.out : + c.err); + + optional overriden; + + if (r) + overriden = r->type; + + r = redirect (rt); + + // Don't move as still may be used for pending here-document end + // marker processing. + // + r->token = move (t); + + switch (rt) + { + case redirect_type::none: + // Remove the assertion if/when the none redirect syntax is + // invented. + // + assert (false); + // Fall through. + case redirect_type::pass: + case redirect_type::null: + case redirect_type::trace: + break; + case redirect_type::merge: + switch (fd) + { + case 0: assert (false); break; + case 1: p = pending::out_merge; break; + case 2: p = pending::err_merge; break; + } + break; + case redirect_type::here_str_literal: + switch (fd) + { + case 0: p = pending::in_string; break; + case 1: p = pending::out_string; break; + case 2: p = pending::err_string; break; + } + break; + case redirect_type::here_str_regex: + switch (fd) + { + case 0: assert (false); break; + case 1: p = pending::out_str_regex; break; + case 2: p = pending::err_str_regex; break; + } + break; + case redirect_type::here_doc_literal: + switch (fd) + { + case 0: p = pending::in_document; break; + case 1: p = pending::out_document; break; + case 2: p = pending::err_document; break; + } + break; + case redirect_type::here_doc_regex: + switch (fd) + { + case 0: assert (false); break; + case 1: p = pending::out_doc_regex; break; + case 2: p = pending::err_doc_regex; break; + } + break; + case redirect_type::file: + switch (fd) + { + case 0: p = pending::in_file; break; + case 1: p = pending::out_file; break; + case 2: p = pending::err_file; break; + } + + // Also sets for stdin, but this is harmless. + // + r->file.mode = tt == type::out_file_ovr ? redirect_fmode::overwrite : + tt == type::out_file_app ? redirect_fmode::append : + redirect_fmode::compare; + + break; + + case redirect_type::here_doc_ref: assert (false); break; + } + + // If we are overriding a here-document, then remove the reference + // to this command redirect from the corresponding here_doc object. + // + if (!pre_parse_ && + overriden && + (*overriden == redirect_type::here_doc_literal || + *overriden == redirect_type::here_doc_regex)) + { + size_t e (expr.size () - 1); + size_t p (expr.back ().pipe.size ()); + int f (static_cast (fd)); + + for (here_doc& d: hd) + { + small_vector& rs (d.redirects); + + auto i (find_if (rs.begin (), rs.end (), + [e, p, f] (const here_redirect& r) + { + return r.expr == e && + r.pipe == p && + r.fd == f; + })); + + if (i != rs.end ()) + { + rs.erase (i); + break; + } + } + } + }; + + // Set pending cleanup type. + // + auto parse_clean = [&p, &mod] (token& t) + { + p = pending::clean; + mod = move (t.value); + }; + + const location ll (get_location (t)); // Line location. + + // Keep parsing chunks of the command line until we see one of the + // "terminators" (newline, exit status comparison, etc). + // + location l (ll); + names ns; // Reuse to reduce allocations. + + for (bool done (false); !done; l = get_location (t)) + { + tt = ra.resolve (tt); + + switch (tt) + { + case type::newline: + { + done = true; + break; + } + + case type::equal: + case type::not_equal: + { + if (!pre_parse_) + check_pending (l); + + c.exit = parse_command_exit (t, tt); + + // Only a limited set of things can appear after the exit status + // so we check this here. + // + switch (tt) + { + case type::newline: + + case type::pipe: + case type::log_or: + case type::log_and: + break; + + default: + { + // Bail out if this is one of the unknown/unexpected tokens. + // + done = true; + break; + } + } + + break; + } + + case type::pipe: + case type::log_or: + case type::log_and: + + case type::in_pass: + case type::out_pass: + + case type::in_null: + case type::out_null: + + case type::out_trace: + + case type::out_merge: + + case type::in_str: + case type::in_doc: + case type::out_str: + case type::out_doc: + + case type::in_file: + case type::out_file_cmp: + case type::out_file_ovr: + case type::out_file_app: + + case type::clean: + { + if (pre_parse_) + { + // The only things we need to handle here are the here-document + // and here-document regex end markers since we need to know + // how many of them to pre-parse after the command. + // + switch (tt) + { + case type::in_doc: + case type::out_doc: + mod = move (t.value); + + bool re (mod.find ('~') != string::npos); + const char* what (re + ? "here-document regex end marker" + : "here-document end marker"); + + // We require the end marker to be a literal, unquoted word. + // In particularm, we don't allow quoted because of cases + // like foo"$bar" (where we will see word 'foo'). + // + next (t, tt); + + // We require the end marker to be an unquoted or completely + // quoted word. The complete quoting becomes important for + // cases like foo"$bar" (where we will see word 'foo'). + // + // For good measure we could have also required it to be + // separated from the following token, but out grammar + // allows one to write >>EOO;. The problematic sequence + // would be >>FOO$bar -- on reparse it will be expanded + // as a single word. + // + if (tt != type::word || t.value.empty ()) + fail (t) << "expected " << what; + + peek (); + const token& p (peeked ()); + if (!p.separated) + { + switch (p.type) + { + case type::dollar: + case type::lparen: + fail (p) << what << " must be literal"; + } + } + + quote_type qt (t.qtype); + switch (qt) + { + case quote_type::unquoted: + qt = quote_type::single; // Treat as single-quoted. + break; + case quote_type::single: + case quote_type::double_: + if (t.qcomp) + break; + // Fall through. + case quote_type::mixed: + fail (t) << "partially-quoted " << what; + } + + regex_parts r; + string end (move (t.value)); + + if (re) + { + check_regex_mod (mod, end, l, what); + + r = parse_regex (end, l, what); + end = move (r.value); // The "cleared" end marker. + } + + bool literal (qt == quote_type::single); + bool shared (false); + + for (const auto& d: hd) + { + if (d.end == end) + { + auto check = [&t, &end, &re, this] (bool c, + const char* what) + { + if (!c) + fail (t) << "different " << what + << " for shared here-document " + << (re ? "regex '" : "'") << end << "'"; + }; + + check (d.modifiers == mod, "modifiers"); + check (d.literal == literal, "quoting"); + + if (re) + { + check (d.regex == r.intro, "introducers"); + check (d.regex_flags == r.flags, "global flags"); + } + + shared = true; + break; + } + } + + if (!shared) + hd.push_back ( + here_doc { + {}, + move (end), + literal, + move (mod), + r.intro, move (r.flags)}); + + break; + } + + next (t, tt); + break; + } + + // If this is one of the operators/separators, check that we + // don't have any pending locations to be filled. + // + check_pending (l); + + // Note: there is another one in the inner loop below. + // + switch (tt) + { + case type::pipe: + case type::log_or: + case type::log_and: + { + // Check that the previous command makes sense. + // + check_command (l, tt != type::pipe); + expr.back ().pipe.push_back (move (c)); + + c = command (); + p = pending::program; + + if (tt != type::pipe) + { + expr_operator o (tt == type::log_or + ? expr_operator::log_or + : expr_operator::log_and); + expr.push_back ({o, command_pipe ()}); + } + + break; + } + + case type::in_pass: + case type::out_pass: + + case type::in_null: + case type::out_null: + + case type::out_trace: + + case type::out_merge: + + case type::in_str: + case type::in_doc: + case type::out_str: + case type::out_doc: + + case type::in_file: + case type::out_file_cmp: + case type::out_file_ovr: + case type::out_file_app: + { + parse_redirect (move (t), tt, l); + break; + } + + case type::clean: + { + parse_clean (t); + break; + } + + default: assert (false); break; + } + + next (t, tt); + break; + } + default: + { + // Bail out if this is one of the unknown tokens. + // + if (!start_names (tt)) + { + done = true; + break; + } + + // Here-document end markers are literal (we verified that above + // during pre-parsing) and we need to know whether they were + // quoted. So handle this case specially. + // + { + int fd; + switch (p) + { + case pending::in_document: fd = 0; break; + case pending::out_document: + case pending::out_doc_regex: fd = 1; break; + case pending::err_document: + case pending::err_doc_regex: fd = 2; break; + default: fd = -1; break; + } + + if (fd != -1) + { + if (tt != type::word || t.value.empty ()) + fail (t) << "expected here-document end marker"; + + here_redirect rd { + expr.size () - 1, expr.back ().pipe.size (), fd}; + + string end (move (t.value)); + + regex_parts r; + + if (p == pending::out_doc_regex || + p == pending::err_doc_regex) + { + // We can't fail here as we already parsed all the end + // markers during pre-parsing stage, and so no need in the + // description. + // + r = parse_regex (end, l, ""); + end = move (r.value); // The "cleared" end marker. + } + + bool shared (false); + for (auto& d: hd) + { + // No need to check that redirects that share here-document + // have the same modifiers, etc. That have been done during + // pre-parsing. + // + if (d.end == end) + { + d.redirects.emplace_back (rd); + shared = true; + break; + } + } + + if (!shared) + hd.push_back ( + here_doc { + {rd}, + move (end), + (t.qtype == quote_type::unquoted || + t.qtype == quote_type::single), + move (mod), + r.intro, move (r.flags)}); + + p = pending::none; + mod.clear (); + + next (t, tt); + break; + } + } + + // Parse the next chunk as simple names to get expansion, etc. + // Note that we do it in the chunking mode to detect whether + // anything in each chunk is quoted. + // + // @@ PAT: should we support pattern expansion? This is even + // fuzzier than the variable case above. Though this is the + // shell semantics. Think what happens when we do rm *.txt? + // + reset_quoted (t); + parse_names (t, tt, + ns, + pattern_mode::ignore, + true, + "command line", + nullptr); + + if (pre_parse_) // Nothing else to do if we are pre-parsing. + break; + + // Process what we got. Determine whether anything inside was + // quoted (note that the current token is "next" and is not part + // of this). + // + bool q ((quoted () - + (t.qtype != quote_type::unquoted ? 1 : 0)) != 0); + + for (name& n: ns) + { + string s; + + try + { + s = value_traits::convert (move (n), nullptr); + } + catch (const invalid_argument&) + { + diag_record dr (fail (l)); + dr << "invalid string value "; + to_stream (dr.os, n, true); // Quote. + } + + // If it is a quoted chunk, then we add the word as is. + // Otherwise we re-lex it. But if the word doesn't contain any + // interesting characters (operators plus quotes/escapes), + // then no need to re-lex. + // + // NOTE: update quoting (script.cxx:to_stream_q()) if adding + // any new characters. + // + if (q || s.find_first_of ("|&<>\'\"\\") == string::npos) + add_word (move (s), l); + else + { + // If the chunk re-parsing results in error, our diagnostics + // will look like this: + // + // :1:4: error: stdout merge redirect file descriptor must be 2 + // script:2:5: info: while parsing string '1>&a' + // + auto df = make_diag_frame ( + [this, s, &l](const diag_record& dr) + { + dr << info (l) << "while parsing string '" << s << "'"; + }); + + // When re-lexing we do "effective escaping" and only for + // ['"\] (quotes plus the backslash itself). In particular, + // there is no way to escape redirects, operators, etc. The + // idea is to prefer quoting except for passing literal + // quotes, for example: + // + // args = \"&foo\" + // cmd $args # cmd &foo + // + // args = 'x=\"foo bar\"' + // cmd $args # cmd x="foo bar" + // + istringstream is (s); + path_name in (""); + lexer lex (is, in, + lexer_mode::command_expansion, + ra, + "\'\"\\"); + + // Treat the first "sub-token" as always separated from what + // we saw earlier. + // + // Note that this is not "our" token so we cannot do + // fail(t). Rather we should do fail(l). + // + token t (lex.next ()); + location l (build2::get_location (t, in)); + t.separated = true; + + string w; + bool f (t.type == type::eos); // If the whole thing is empty. + + for (; t.type != type::eos; t = lex.next ()) + { + type tt (ra.resolve (t.type)); + l = build2::get_location (t, in); + + // Re-lexing double-quotes will recognize $, ( inside as + // tokens so we have to reverse them back. Since we don't + // treat spaces as separators we can be sure we will get + // it right. + // + switch (tt) + { + case type::dollar: w += '$'; continue; + case type::lparen: w += '('; continue; + } + + // Retire the current word. We need to distinguish between + // empty and non-existent (e.g., > vs >""). + // + if (!w.empty () || f) + { + add_word (move (w), l); + f = false; + } + + if (tt == type::word) + { + w = move (t.value); + f = true; + continue; + } + + // If this is one of the operators/separators, check that + // we don't have any pending locations to be filled. + // + check_pending (l); + + // Note: there is another one in the outer loop above. + // + switch (tt) + { + case type::pipe: + case type::log_or: + case type::log_and: + { + // Check that the previous command makes sense. + // + check_command (l, tt != type::pipe); + expr.back ().pipe.push_back (move (c)); + + c = command (); + p = pending::program; + + if (tt != type::pipe) + { + expr_operator o (tt == type::log_or + ? expr_operator::log_or + : expr_operator::log_and); + expr.push_back ({o, command_pipe ()}); + } + + break; + } + + case type::in_pass: + case type::out_pass: + + case type::in_null: + case type::out_null: + + case type::out_trace: + + case type::out_merge: + + case type::in_str: + case type::out_str: + + case type::in_file: + case type::out_file_cmp: + case type::out_file_ovr: + case type::out_file_app: + { + parse_redirect (move (t), tt, l); + break; + } + + case type::clean: + { + parse_clean (t); + break; + } + + case type::in_doc: + case type::out_doc: + { + fail (l) << "here-document redirect in expansion"; + break; + } + } + } + + // Don't forget the last word. + // + if (!w.empty () || f) + add_word (move (w), l); + } + } + + ns.clear (); + break; + } + } + } + + if (!pre_parse_) + { + // Verify we don't have anything pending to be filled and the + // command makes sense. + // + check_pending (l); + check_command (l, true); + + expr.back ().pipe.push_back (move (c)); + } + + return make_pair (move (expr), move (hd)); + } + + command_exit parser:: + parse_command_exit (token& t, type& tt) + { + // enter: equal/not_equal + // leave: token after exit status (one parse_names() chunk) + + exit_comparison comp (tt == type::equal + ? exit_comparison::eq + : exit_comparison::ne); + + // The next chunk should be the exit status. + // + next (t, tt); + location l (get_location (t)); + names ns (parse_names (t, tt, + pattern_mode::ignore, + true, + "exit status", + nullptr)); + unsigned long es (256); + + if (!pre_parse_) + { + try + { + if (ns.size () == 1 && ns[0].simple () && !ns[0].empty ()) + es = stoul (ns[0].value); + } + catch (const exception&) {} // Fall through. + + if (es > 255) + { + diag_record dr; + + dr << fail (l) << "expected exit status instead of "; + to_stream (dr.os, ns, true); // Quote. + + dr << info << "exit status is an unsigned integer less than 256"; + } + } + + return command_exit {comp, static_cast (es)}; + } + + void parser:: + parse_here_documents (token& t, type& tt, + pair& p) + { + // enter: newline + // leave: newline + + // Parse here-document fragments in the order they were mentioned on + // the command line. + // + for (here_doc& h: p.second) + { + // Switch to the here-line mode which is like single/double-quoted + // string but recognized the newline as a separator. + // + mode (h.literal + ? lexer_mode::here_line_single + : lexer_mode::here_line_double); + next (t, tt); + + parsed_doc v ( + parse_here_document (t, tt, h.end, h.modifiers, h.regex)); + + // If all the here-document redirects are overridden, then we just + // drop the fragment. + // + if (!pre_parse_ && !h.redirects.empty ()) + { + auto i (h.redirects.cbegin ()); + + command& c (p.first[i->expr].pipe[i->pipe]); + + optional& r (i->fd == 0 ? c.in : + i->fd == 1 ? c.out : + c.err); + + assert (r); // Must be present since it is referred. + + if (v.re) + { + assert (r->type == redirect_type::here_doc_regex); + + r->regex = move (v.regex); + r->regex.flags = move (h.regex_flags); + } + else + { + assert (r->type == redirect_type::here_doc_literal); + + r->str = move (v.str); + } + + r->end = move (h.end); + r->end_line = v.end_line; + r->end_column = v.end_column; + + // Note that our references cannot be invalidated because the + // command_expr/command-pipe vectors already contain all their + // elements. + // + for (++i; i != h.redirects.cend (); ++i) + { + command& c (p.first[i->expr].pipe[i->pipe]); + + optional& ir (i->fd == 0 ? c.in : + i->fd == 1 ? c.out : + c.err); + + // Must be present since it is referenced by here-doc. + // + assert (ir); + + // Note: preserve the original representation. + // + ir = redirect (redirect_type::here_doc_ref, *r, move (ir->token)); + } + } + + expire_mode (); + } + } + + parser::parsed_doc parser:: + parse_here_document (token& t, type& tt, + const string& em, + const string& mod, + char re) + { + // enter: first token on first line + // leave: newline (after end marker) + + // String literal. Note that when decide if to terminate the previously + // added line with a newline, we need to distinguish a yet empty result + // and the one that has a single blank line added. + // + optional rs; + + regex_lines rre; + + // Here-documents can be indented. The leading whitespaces of the end + // marker line (called strip prefix) determine the indentation. Every + // other line in the here-document should start with this prefix which + // is automatically stripped. The only exception is a blank line. + // + // The fact that the strip prefix is only known at the end, after + // seeing all the lines, is rather inconvenient. As a result, the way + // we implement this is a bit hackish (though there is also something + // elegant about it): at the end of the pre-parse stage we are going + // re-examine the sequence of tokens that comprise this here-document + // and "fix up" the first token of each line by stripping the prefix. + // + string sp; + + // Remember the position of the first token in this here-document. + // + size_t ri (pre_parse_ ? replay_data_.size () - 1 : 0); + + // We will use the location of the first token on the line for the + // regex diagnostics. At the end of the loop it will point to the + // beginning of the end marker. + // + location l; + + while (tt != type::eos) + { + l = get_location (t); + + // Check if this is the end marker. For starters, it should be a + // single, unquoted word followed by a newline. + // + if (tt == type::word && + t.qtype == quote_type::unquoted && + peek () == type::newline) + { + const string& v (t.value); + + size_t vn (v.size ()); + size_t en (em.size ()); + + // Then check that it ends with the end marker. + // + if (vn >= en && v.compare (vn - en, en, em) == 0) + { + // Now check that the prefix only contains whitespaces. + // + size_t n (vn - en); + + if (v.find_first_not_of (" \t") >= n) + { + assert (pre_parse_ || n == 0); // Should have been stripped. + + if (n != 0) + sp.assign (v, 0, n); // Save the strip prefix. + + next (t, tt); // Get the newline. + break; + } + } + } + + // Expand the line (can be blank). + // + // @@ PAT: one could argue that if we do it in variables, then we + // should do it here as well. Though feels bizarre. + // + names ns (tt != type::newline + ? parse_names (t, tt, + pattern_mode::ignore, + false, + "here-document line", + nullptr) + : names ()); + + if (!pre_parse_) + { + // What shall we do if the expansion results in multiple names? + // For, example if the line contains just the variable expansion + // and it is of type strings. Adding all the elements space- + // separated seems like the natural thing to do. + // + string s; + for (auto b (ns.begin ()), i (b); i != ns.end (); ++i) + { + string n; + + try + { + n = value_traits::convert (move (*i), nullptr); + } + catch (const invalid_argument&) + { + fail (l) << "invalid string value '" << *i << "'"; + } + + if (i == b) + s = move (n); + else + { + s += ' '; + s += n; + } + } + + if (!re) + { + // Add newline after previous line. + // + if (rs) + { + *rs += '\n'; + *rs += s; + } + else + rs = move (s); + } + else + { + // Due to expansion we can end up with multiple lines. If empty + // then will add a blank textual literal. + // + for (size_t p (0); p != string::npos; ) + { + string ln; + size_t np (s.find ('\n', p)); + + if (np != string::npos) + { + ln = string (s, p, np - p); + p = np + 1; + } + else + { + ln = string (s, p); + p = np; + } + + if (ln[0] != re) // Line doesn't start with regex introducer. + { + // This is a line-char literal (covers blank lines as well). + // + // Append textual literal. + // + rre.lines.emplace_back (l.line, l.column, move (ln), false); + } + else // Line starts with the regex introducer. + { + // This is a char-regex, or a sequence of line-regex syntax + // characters or both (in this specific order). So we will + // add regex (with optional special characters) or special + // literal. + // + size_t p (ln.find (re, 1)); + if (p == string::npos) + { + // No regex, just a sequence of syntax characters. + // + string spec (ln, 1); + if (spec.empty ()) + fail (l) << "no syntax line characters"; + + // Append special literal. + // + rre.lines.emplace_back ( + l.line, l.column, move (spec), true); + } + else + { + // Regex (probably with syntax characters). + // + regex_parts re; + + // Empty regex is a special case repesenting a blank line. + // + if (p == 1) + // Position to optional specal characters of an empty + // regex. + // + ++p; + else + // Can't fail as all the pre-conditions verified + // (non-empty with both introducers in place), so no + // description required. + // + re = parse_regex (ln, l, "", &p); + + // Append regex with optional special characters. + // + rre.lines.emplace_back (l.line, l.column, + move (re.value), move (re.flags), + string (ln, p)); + } + } + } + } + } + + // We should expand the whole line at once so this would normally be + // a newline but can also be an end-of-stream. + // + if (tt == type::newline) + next (t, tt); + else + assert (tt == type::eos); + } + + if (tt == type::eos) + fail (t) << "missing here-document end marker '" << em << "'"; + + if (pre_parse_) + { + // Strip the indentation prefix if there is one. + // + assert (replay_ == replay::save); + + if (!sp.empty ()) + { + size_t sn (sp.size ()); + + for (; ri != replay_data_.size (); ++ri) + { + token& rt (replay_data_[ri].token); + + if (rt.type == type::newline) // Blank + continue; + + if (rt.type != type::word || rt.value.compare (0, sn, sp) != 0) + fail (rt) << "unindented here-document line"; + + // If the word is equal to the strip prefix then we have to drop + // the token. Note that simply making it an empty word won't + // have the same semantics. For instance, it would trigger + // concatenated expansion. + // + if (rt.value.size () == sn) + replay_data_.erase (replay_data_.begin () + ri); + else + { + rt.value.erase (0, sn); + rt.column += sn; + ++ri; + } + + // Skip until next newline. + // + for (; replay_data_[ri].token.type != type::newline; ++ri) ; + } + } + } + else + { + // Add final newline unless suppressed. + // + if (mod.find (':') == string::npos) + { + if (re) + // Note that the position is synthetic, but that's ok as we don't + // expect any diagnostics to refer this line. + // + rre.lines.emplace_back (l.line, l.column, string (), false); + else if (rs) + *rs += '\n'; + else + rs = "\n"; + } + + // Finalize regex lines. + // + if (re) + { + // Empty regex matches nothing, so not of much use. + // + if (rre.lines.empty ()) + fail (l) << "empty here-document regex"; + + rre.intro = re; + } + } + + return re + ? parsed_doc (move (rre), l.line, l.column) + : parsed_doc (rs ? move (*rs) : string (), l.line, l.column); + } + + size_t parser:: + quoted () const + { + size_t r (0); + + if (replay_ != replay::play) + r = lexer_->quoted (); + else + { + // Examine tokens we have replayed since last reset. + // + for (size_t i (replay_quoted_); i != replay_i_; ++i) + if (replay_data_[i].token.qtype != quote_type::unquoted) + ++r; + } + + return r; + } + + void parser:: + reset_quoted (token& cur) + { + if (replay_ != replay::play) + lexer_->reset_quoted (cur.qtype != quote_type::unquoted ? 1 : 0); + else + { + replay_quoted_ = replay_i_ - 1; + + // Must be the same token. + // + assert (replay_data_[replay_quoted_].token.qtype == cur.qtype); + } + } + + void parser:: + set_lexer (lexer* l) + { + lexer_ = l; + build2::parser::lexer_ = l; + } + + static redirect_aliases no_redirect_aliases; + + void parser:: + apply_value_attributes (const variable* var, + value& lhs, + value&& rhs, + const string& attributes, + token_type kind, + const path_name& name) + { + path_ = &name; + + istringstream is (attributes); + + // Note that the redirect alias information is not used in the + // attributes lexer mode. + // + lexer l (is, name, lexer_mode::attributes, no_redirect_aliases); + + set_lexer (&l); + + token t; + type tt; + + next_with_attributes (t, tt); // Enable `[` recognition. + + if (tt != type::lsbrace && tt != type::eos) + fail (t) << "expected '[' instead of " << t; + + attributes_push (t, tt, true); + + if (tt != type::eos) + fail (t) << "trailing junk after ']'"; + + build2::parser::apply_value_attributes (var, lhs, move (rhs), kind); + } + + line_type parser:: + pre_parse_line_start (token& t, token_type& tt, lexer_mode stm) + { + replay_save (); // Start saving tokens from the current one. + next (t, tt); + + // Decide whether this is a variable assignment or a command. + // + // It is an assignment if the first token is an unquoted name and + // the next token is an assign/append/prepend operator. Assignment + // to a computed variable name must use the set builtin. + // + // Note also that special commands take precedence over variable + // assignments. + // + line_type r (line_type::cmd); // Default. + + if (tt == type::word && t.qtype == quote_type::unquoted) + { + const string& n (t.value); + + if (n == "if") r = line_type::cmd_if; + else if (n == "if!") r = line_type::cmd_ifn; + else if (n == "elif") r = line_type::cmd_elif; + else if (n == "elif!") r = line_type::cmd_elifn; + else if (n == "else") r = line_type::cmd_else; + else if (n == "end") r = line_type::cmd_end; + else + { + // Switch the recognition of leading variable assignments for + // the next token. This is safe to do because we know we + // cannot be in the quoted mode (since the current token is + // not quoted). + // + type p (peek (stm)); + + if (p == type::assign || p == type::prepend || p == type::append) + { + r = line_type::var; + + // Note that the missing command program is detected later, by + // parse_command_expr(). + // + if (n.empty ()) + fail (t) << "missing variable name"; + } + } + } + + return r; + } + + bool parser:: + exec_lines (lines::const_iterator i, lines::const_iterator e, + const function& exec_set, + const function& exec_cmd, + const function& exec_if, + size_t& li, + variable_pool* var_pool) + { + try + { + token t; + type tt; + for (; i != e; ++i) + { + const line& ln (*i); + line_type lt (ln.type); + + assert (path_ == nullptr); + + // Copy the tokens and start playing. + // + replay_data (replay_tokens (ln.tokens)); + + // We don't really need to change the mode since we already know + // the line type. + // + next (t, tt); + const location ll (get_location (t)); + + switch (lt) + { + case line_type::var: + { + // Enter the variable into the pool if this is not done during + // the script parsing. Note that in this case the pool is + // expected to be provided. + // + const variable* var (ln.var); + + if (var == nullptr) + { + assert (var_pool != nullptr); + + var = &var_pool->insert (t.value); + } + + exec_set (*var, t, tt, ll); + + replay_stop (); + break; + } + case line_type::cmd: + { + bool single (false); + + if (li == 1) + { + lines::const_iterator j (i); + for (++j; j != e && j->type == line_type::var; ++j) ; + + if (j == e) // We have no another command. + single = true; + } + + exec_cmd (t, tt, li++, single, ll); + + replay_stop (); + break; + } + case line_type::cmd_if: + case line_type::cmd_ifn: + case line_type::cmd_elif: + case line_type::cmd_elifn: + case line_type::cmd_else: + { + next (t, tt); // Skip to start of command. + + bool take; + if (lt != line_type::cmd_else) + { + take = exec_if (t, tt, li++, ll); + + if (lt == line_type::cmd_ifn || lt == line_type::cmd_elifn) + take = !take; + } + else + { + assert (tt == type::newline); + take = true; + } + + replay_stop (); + + // If end is true, then find the 'end' line. Otherwise, find + // the next if-else line. If skip is true then increment the + // command line index. + // + auto next = [e, &li] (lines::const_iterator j, + bool end, + bool skip) -> lines::const_iterator + { + // We need to be aware of nested if-else chains. + // + size_t n (0); + + for (++j; j != e; ++j) + { + line_type lt (j->type); + + if (lt == line_type::cmd_if || lt == line_type::cmd_ifn) + ++n; + + // If we are nested then we just wait until we get back + // to the surface. + // + if (n == 0) + { + switch (lt) + { + case line_type::cmd_elif: + case line_type::cmd_elifn: + case line_type::cmd_else: + if (end) break; + // Fall through. + case line_type::cmd_end: return j; + default: break; + } + } + + if (lt == line_type::cmd_end) + --n; + + if (skip) + { + // Note that we don't count else and end as commands. + // + switch (lt) + { + case line_type::cmd: + case line_type::cmd_if: + case line_type::cmd_ifn: + case line_type::cmd_elif: + case line_type::cmd_elifn: ++li; break; + default: break; + } + } + } + + assert (false); // Missing end. + return e; + }; + + // If we are taking this branch then we need to parse all the + // lines until the next if-else line and then skip all the + // lines until the end (unless next is already end). + // + // Otherwise, we need to skip all the lines until the next + // if-else line and then continue parsing. + // + if (take) + { + // Next if-else. + // + lines::const_iterator j (next (i, false, false)); + if (!exec_lines (i + 1, j, + exec_set, exec_cmd, exec_if, + li, + var_pool)) + return false; + + i = j->type == line_type::cmd_end ? j : next (j, true, true); + } + else + { + i = next (i, false, true); + if (i->type != line_type::cmd_end) + --i; // Continue with this line (e.g., elif or else). + } + + break; + } + case line_type::cmd_end: + { + assert (false); + } + } + } + + return true; + } + catch (const exit& e) + { + // Bail out if the script is exited with the failure status. Otherwise + // exit the lines execution normally. + // + if (!e.status) + throw failed (); + + replay_stop (); + return false; + } + } + + // parser::parsed_doc + // + parser::parsed_doc:: + parsed_doc (string s, uint64_t l, uint64_t c) + : str (move (s)), re (false), end_line (l), end_column (c) + { + } + + parser::parsed_doc:: + parsed_doc (regex_lines&& r, uint64_t l, uint64_t c) + : regex (move (r)), re (true), end_line (l), end_column (c) + { + } + + parser::parsed_doc:: + parsed_doc (parsed_doc&& d) + : re (d.re), end_line (d.end_line), end_column (d.end_column) + { + if (re) + new (®ex) regex_lines (move (d.regex)); + else + new (&str) string (move (d.str)); + } + + parser::parsed_doc:: + ~parsed_doc () + { + if (re) + regex.~regex_lines (); + else + str.~string (); + } + } +} diff --git a/libbuild2/script/parser.hxx b/libbuild2/script/parser.hxx new file mode 100644 index 0000000..a63ecde --- /dev/null +++ b/libbuild2/script/parser.hxx @@ -0,0 +1,189 @@ +// file : libbuild2/script/parser.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_SCRIPT_PARSER_HXX +#define LIBBUILD2_SCRIPT_PARSER_HXX + +#include +#include +#include + +#include +#include + +#include +#include // redirect_aliases +#include + +namespace build2 +{ + namespace script + { + class lexer; + struct lexer_mode; + + class parser: protected build2::parser + { + public: + parser (context& c): build2::parser (c) {} + + // Helpers. + // + // Parse attribute string and perform attribute-guided assignment. + // Issue diagnostics and throw failed in case of an error. + // + void + apply_value_attributes (const variable*, // Optional. + value& lhs, + value&& rhs, + const string& attributes, + token_type assign_kind, + const path_name&); // For diagnostics. + + using build2::parser::apply_value_attributes; + + // Commonly used parsing functions. Issue diagnostics and throw failed + // in case of an error. + // + // Usually (but not always) parse functions receive the token/type + // from which it should start consuming and in return the token/type + // should contain the first token that has not been consumed. + // + // Functions that are called parse_*() rather than pre_parse_*() can be + // used for both stages. + // + protected: + value + parse_variable_line (token&, token_type&); + + // Ordered sequence of here-document redirects that we can expect to + // see after the command line. + // + struct here_redirect + { + size_t expr; // Index in command_expr. + size_t pipe; // Index in command_pipe. + int fd; // Redirect fd (0 - in, 1 - out, 2 - err). + }; + + struct here_doc + { + // Redirects that share here_doc. Most of the time we will have no + // more than 2 (2 - for the roundtrip cases). Doesn't refer overridden + // redirects and thus can be empty. + // + small_vector redirects; + + string end; + bool literal; // Literal (single-quote). + string modifiers; + + // Regex introducer ('\0' if not a regex, so can be used as bool). + // + char regex; + + // Regex global flags. Meaningful if regex != '\0'. + // + string regex_flags; + }; + using here_docs = vector; + + pair + parse_command_expr (token&, token_type&, const redirect_aliases&); + + command_exit + parse_command_exit (token&, token_type&); + + void + parse_here_documents (token&, token_type&, + pair&); + + struct parsed_doc + { + union + { + string str; // Here-document literal. + regex_lines regex; // Here-document regex. + }; + + bool re; // True if regex. + uint64_t end_line; // Here-document end marker location. + uint64_t end_column; + + parsed_doc (string, uint64_t line, uint64_t column); + parsed_doc (regex_lines&&, uint64_t line, uint64_t column); + parsed_doc (parsed_doc&&); // Note: move constuctible-only type. + ~parsed_doc (); + }; + + parsed_doc + parse_here_document (token&, token_type&, + const string&, + const string& mode, + char re_intro); // '\0' if not a regex. + + // Start pre-parsing a script line returning its type, detected based on + // the first two tokens. Use the specified lexer mode to peek the second + // token. + // + line_type + pre_parse_line_start (token&, token_type&, lexer_mode); + + // Execute. + // + protected: + // Return false if the execution of the script should be terminated with + // the success status (e.g., as a result of encountering the exit + // builtin). For unsuccessful termination the failed exception is thrown. + // + using exec_set_function = void (const variable&, + token&, token_type&, + const location&); + + using exec_cmd_function = void (token&, token_type&, + size_t li, + bool single, + const location&); + + using exec_if_function = bool (token&, token_type&, + size_t li, + const location&); + + // If a parser implementation doesn't pre-enter variables into a pool + // during the pre-parsing phase, then they are entered during the + // execution phase and so the variable pool must be provided. Note that + // in this case the variable pool insertions are not MT-safe. + // + bool + exec_lines (lines::const_iterator b, lines::const_iterator e, + const function&, + const function&, + const function&, + size_t& li, + variable_pool* = nullptr); + + // Set lexer pointers for both the current and the base classes. + // + protected: + void + set_lexer (lexer*); + + // Number of quoted tokens since last reset. Note that this includes + // the peeked token, if any. + // + protected: + size_t + quoted () const; + + void + reset_quoted (token& current); + + size_t replay_quoted_; + + protected: + lexer* lexer_ = nullptr; + }; + } +} + +#endif // LIBBUILD2_SCRIPT_PARSER_HXX diff --git a/libbuild2/script/regex.cxx b/libbuild2/script/regex.cxx new file mode 100644 index 0000000..3f796b6 --- /dev/null +++ b/libbuild2/script/regex.cxx @@ -0,0 +1,436 @@ +// file : libbuild2/script/regex.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include + +using namespace std; + +namespace build2 +{ + namespace script + { + namespace regex + { + static_assert (alignof (char_string) % 4 == 0, + "unexpected char_string alignment"); + + static_assert (alignof (char_regex) % 4 == 0, + "unexpected char_regex alignment"); + + static_assert (sizeof (uintptr_t) > sizeof (int16_t), + "unexpected uintptr_t size"); + + const line_char line_char::nul (0); + const line_char line_char::eof (-1); + + // line_char + // + // We package the special character into uintptr_t with the following + // steps: + // + // - narrow down int value to int16_t (preserves all the valid values) + // + // - convert to uint16_t (bitwise representation stays the same, but no + // need to bother with signed value widening, leftmost bits loss on + // left shift, etc) + // + // - convert to uintptr_t (storage type) + // + // - shift left by two bits (the operation is fully reversible as + // uintptr_t is wider then uint16_t) + // + line_char:: + line_char (int c) + : data_ ( + (static_cast ( + static_cast ( + static_cast (c))) << 2) | + static_cast (line_type::special)) + { + // @@ How can we allow anything for basic_regex but only subset + // for our own code? + // + const char ex[] = "pn\n\r"; + + assert (c == 0 || // Null character. + + // EOF. Note that is also passed by msvcrt as _Meta_eos + // enum value. + // + c == -1 || + + // libstdc++ line/paragraph separators. + // + c == u'\u2028' || c == u'\u2029' || + + (c > 0 && c <= 255 && ( + // Supported regex special characters. + // + syntax (c) || + + // libstdc++ look-ahead tokens, newline chars. + // + string::traits_type::find (ex, 4, c) != nullptr))); + } + + line_char:: + line_char (const char_string& s, line_pool& p) + : line_char (&(*p.strings.emplace (s).first)) + { + } + + line_char:: + line_char (char_string&& s, line_pool& p) + : line_char (&(*p.strings.emplace (move (s)).first)) + { + } + + line_char:: + line_char (char_regex r, line_pool& p) + // Note: in C++17 can write as p.regexes.emplace_front(move (r)) + // + : line_char (&(*p.regexes.emplace (p.regexes.begin (), move (r)))) + { + } + + bool + line_char::syntax (char c) + { + return string::traits_type::find ( + "()|.*+?{}\\0123456789,=!", 23, c) != nullptr; + } + + bool + operator== (const line_char& l, const line_char& r) + { + line_type lt (l.type ()); + line_type rt (r.type ()); + + if (lt == rt) + { + bool res (true); + + switch (lt) + { + case line_type::special: res = l.special () == r.special (); break; + case line_type::regex: assert (false); break; + + // Note that we use pointers (rather than vales) comparison + // assuming that the strings must belong to the same pool. + // + case line_type::literal: res = l.literal () == r.literal (); break; + } + + return res; + } + + // Match literal with regex. + // + if (lt == line_type::literal && rt == line_type::regex) + return regex_match (*l.literal (), *r.regex ()); + else if (rt == line_type::literal && lt == line_type::regex) + return regex_match (*r.literal (), *l.regex ()); + + return false; + } + + bool + operator< (const line_char& l, const line_char& r) + { + if (l == r) + return false; + + line_type lt (l.type ()); + line_type rt (r.type ()); + + if (lt != rt) + return lt < rt; + + bool res (false); + + switch (lt) + { + case line_type::special: res = l.special () < r.special (); break; + case line_type::literal: res = *l.literal () < *r.literal (); break; + case line_type::regex: assert (false); break; + } + + return res; + } + + // line_char_locale + // + + // An exemplar locale with the std::ctype facet. It is used + // for the subsequent line char locale objects creation (see below) + // which normally ends up with a shallow copy of a reference-counted + // object. + // + // Note that creating the line char locales from the exemplar is not + // merely an optimization: there is a data race in the libstdc++ (at + // least as of GCC 9.1) implementation of the locale(const locale&, + // Facet*) constructor (bug #91057). + // + // Also note that we install the facet in init() rather than during + // the object creation to avoid a race with the std::locale-related + // global variables initialization. + // + static locale line_char_locale_exemplar; + + void + init () + { + line_char_locale_exemplar = + locale (locale (), + new std::ctype ()); // Hidden by ctype bitmask. + } + + line_char_locale:: + line_char_locale () + : locale (line_char_locale_exemplar) + { + // Make sure init() has been called. + // + // Note: has_facet() is hidden by a private function in libc++. + // + assert (std::has_facet> (*this)); + } + + // char_regex + // + // Transform regex according to the extended flags {idot}. If regex is + // malformed then keep transforming, so the resulting string is + // malformed the same way. We expect the error to be reported by the + // char_regex ctor. + // + static string + transform (const string& s, char_flags f) + { + assert ((f & char_flags::idot) != char_flags::none); + + string r; + bool escape (false); + bool cclass (false); + + for (char c: s) + { + // Inverse escaping for a dot which is out of the char class + // brackets. + // + bool inverse (c == '.' && !cclass); + + // Handle the escape case. Note that we delay adding the backslash + // since we may have to inverse things. + // + if (escape) + { + if (!inverse) + r += '\\'; + + r += c; + escape = false; + + continue; + } + else if (c == '\\') + { + escape = true; + continue; + } + + // Keep track of being inside the char class brackets, escape if + // inversion. Note that we never inverse square brackets. + // + if (c == '[' && !cclass) + cclass = true; + else if (c == ']' && cclass) + cclass = false; + else if (inverse) + r += '\\'; + + r += c; + } + + if (escape) // Regex is malformed but that's not our problem. + r += '\\'; + + return r; + } + + static char_regex::flag_type + to_std_flags (char_flags f) + { + // Note that ECMAScript flag is implied in the absense of a grammar + // flag. + // + return (f & char_flags::icase) != char_flags::none + ? char_regex::icase + : char_regex::flag_type (); + } + + char_regex:: + char_regex (const char_string& s, char_flags f) + : base_type ((f & char_flags::idot) != char_flags::none + ? transform (s, f) + : s, + to_std_flags (f)) + { + } + } + } +} + +namespace std +{ + using namespace build2::script::regex; + + // char_traits + // + line_char* char_traits:: + assign (char_type* s, size_t n, char_type c) + { + for (size_t i (0); i != n; ++i) + s[i] = c; + return s; + } + + line_char* char_traits:: + move (char_type* d, const char_type* s, size_t n) + { + if (n > 0 && d != s) + { + // If d < s then it can't be in [s, s + n) range and so using copy() is + // safe. Otherwise d + n is out of (s, s + n] range and so using + // copy_backward() is safe. + // + if (d < s) + std::copy (s, s + n, d); // Hidden by char_traits::copy(). + else + copy_backward (s, s + n, d + n); + } + + return d; + } + + line_char* char_traits:: + copy (char_type* d, const char_type* s, size_t n) + { + std::copy (s, s + n, d); // Hidden by char_traits::copy(). + return d; + } + + int char_traits:: + compare (const char_type* s1, const char_type* s2, size_t n) + { + for (size_t i (0); i != n; ++i) + { + if (s1[i] < s2[i]) + return -1; + else if (s2[i] < s1[i]) + return 1; + } + + return 0; + } + + size_t char_traits:: + length (const char_type* s) + { + size_t i (0); + while (s[i] != char_type::nul) + ++i; + + return i; + } + + const line_char* char_traits:: + find (const char_type* s, size_t n, const char_type& c) + { + for (size_t i (0); i != n; ++i) + { + if (s[i] == c) + return s + i; + } + + return nullptr; + } + + // ctype + // + locale::id ctype::id; + + const line_char* ctype:: + is (const char_type* b, const char_type* e, mask* m) const + { + while (b != e) + { + const char_type& c (*b++); + + *m++ = c.type () == line_type::special && c.special () >= 0 && + build2::digit (static_cast (c.special ())) + ? digit + : 0; + } + + return e; + } + + const line_char* ctype:: + scan_is (mask m, const char_type* b, const char_type* e) const + { + for (; b != e; ++b) + { + if (is (m, *b)) + return b; + } + + return e; + } + + const line_char* ctype:: + scan_not (mask m, const char_type* b, const char_type* e) const + { + for (; b != e; ++b) + { + if (!is (m, *b)) + return b; + } + + return e; + } + + const char* ctype:: + widen (const char* b, const char* e, char_type* c) const + { + while (b != e) + *c++ = widen (*b++); + + return e; + } + + const line_char* ctype:: + narrow (const char_type* b, const char_type* e, char def, char* c) const + { + while (b != e) + *c++ = narrow (*b++, def); + + return e; + } + + // regex_traits + // + int regex_traits:: + value (char_type c, int radix) const + { + assert (radix == 8 || radix == 10 || radix == 16); + + if (c.type () != line_type::special) + return -1; + + const char digits[] = "0123456789ABCDEF"; + const char* d (string::traits_type::find (digits, radix, c.special ())); + return d != nullptr ? static_cast (d - digits) : -1; + } +} diff --git a/libbuild2/script/regex.hxx b/libbuild2/script/regex.hxx new file mode 100644 index 0000000..30d3363 --- /dev/null +++ b/libbuild2/script/regex.hxx @@ -0,0 +1,678 @@ +// file : libbuild2/script/regex.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_SCRIPT_REGEX_HXX +#define LIBBUILD2_SCRIPT_REGEX_HXX + +#include +#include +#include +#include // basic_string +#include // make_unsigned, enable_if, is_* +#include + +#include +#include + +namespace build2 +{ + namespace script + { + namespace regex + { + using char_string = std::basic_string; + + enum class char_flags: uint16_t + { + icase = 0x1, // Case-insensitive match. + idot = 0x2, // Invert '.' escaping. + + none = 0 + }; + + // Restricts valid standard flags to just {icase}, extends with custom + // flags {idot}. + // + class char_regex: public std::basic_regex + { + public: + using base_type = std::basic_regex; + + char_regex (const char_string&, char_flags = char_flags::none); + }; + + // Newlines are line separators and are not part of the line: + // + // lineline + // + // Specifically, this means that a customary trailing newline creates a + // trailing blank line. + // + // All characters can inter-compare (though there cannot be regex + // characters in the output, only in line_regex). + // + // Note that we assume that line_regex and the input to regex_match() + // use the same pool. + // + struct line_pool + { + // Note that we assume the pool can be moved without invalidating + // pointers to any already pooled entities. + // + std::unordered_set strings; + std::list regexes; + }; + + enum class line_type + { + special, + literal, + regex + }; + + struct line_char + { + // Steal last two bits from the pointer to store the type. + // + private: + std::uintptr_t data_; + + public: + line_type + type () const {return static_cast (data_ & 0x3);} + + int + special () const + { + // Stored as (shifted) int16_t. Perform steps reversed to those + // that are described in the comment for the corresponding ctor. + // Note that the intermediate cast to uint16_t is required to + // portably preserve the -1 special character. + // + return static_cast (static_cast (data_ >> 2)); + } + + const char_string* + literal () const + { + // Note that 2 rightmost bits are used for packaging line_char + // type. Read the comment for the corresponding ctor for details. + // + return reinterpret_cast ( + data_ & ~std::uintptr_t (0x3)); + } + + const char_regex* + regex () const + { + // Note that 2 rightmost bits are used for packaging line_char + // type. Read the comment for the corresponding ctor for details. + // + return reinterpret_cast ( + data_ & ~std::uintptr_t (0x3)); + } + + static const line_char nul; + static const line_char eof; + + // Note: creates an uninitialized value. + // + line_char () = default; + + // Create a special character. The argument value must be one of the + // following ones: + // + // 0 (nul character) + // -1 (EOF) + // [()|.*+?{}\0123456789,=!] (excluding []) + // + // Note that the constructor is implicit to allow basic_regex to + // implicitly construct line_chars from special char literals (in + // particular libstdc++ appends them to an internal line_string). + // + // Also note that we extend the valid characters set (see above) with + // 'p', 'n' (used by libstdc++ for positive/negative look-ahead + // tokens representation), and '\n', '\r', u'\u2028', u'\u2029' (used + // by libstdc++ for newline/newparagraph matching). + // + line_char (int); + + // Create a literal character. + // + // Don't copy string if already pooled. + // + explicit + line_char (const char_string&, line_pool&); + + explicit + line_char (char_string&&, line_pool&); + + explicit + line_char (const char_string* s) // Assume already pooled. + // + // Steal two bits from the pointer to package line_char type. + // Assume (and statically assert) that char_string address is a + // multiple of four. + // + : data_ (reinterpret_cast (s) | + static_cast (line_type::literal)) {} + + // Create a regex character. + // + explicit + line_char (char_regex, line_pool&); + + explicit + line_char (const char_regex* r) // Assume already pooled. + // + // Steal two bits from the pointer to package line_char type. + // Assume (and statically assert) that char_regex address is a + // multiple of four. + // + : data_ (reinterpret_cast (r) | + static_cast (line_type::regex)) {} + + // Provide basic_regex with the ability to use line_char in a context + // where a char value is expected (e.g., as a function argument). + // + // libstdc++ seems to cast special line_chars only (and such a + // conversion is meanigfull). + // + // msvcrt casts line_chars of arbitrary types instead. The only + // reasonable strategy is to return a value that differs from any + // other that can be encountered in a regex expression and so will + // unlikelly be misinterpreted. + // + operator char () const + { + return type () == line_type::special ? special () : '\a'; // BELL. + } + + // Return true if the character is a syntax (special) one. + // + static bool + syntax (char); + + // Provide basic_regex (such as from msvcrt) with the ability to + // explicitly cast line_chars to implementation-specific numeric + // types (enums, msvcrt's _Uelem, etc). + // + template + explicit + operator T () const + { + assert (type () == line_type::special); + return static_cast (special ()); + } + }; + + // Perform "deep" characters comparison (for example match literal + // character with a regex character), rather than just compare them + // literally. At least one argument must be of a type other than regex + // as there is no operator==() defined to compare regexes. Characters + // of the literal type must share the same pool (strings are compared + // by pointers not by values). + // + bool + operator== (const line_char&, const line_char&); + + // Return false if arguments are equal (operator==() returns true). + // Otherwise if types are different return the value implying that + // special < literal < regex. If types are special or literal return + // the result of the respective characters or strings comparison. At + // least one argument must be of a type other than regex as there is no + // operator<() defined to compare regexes. + // + // While not very natural operation for the class we have, we have to + // provide some meaningfull semantics for such a comparison as it is + // required by the char_traits specialization. While we + // could provide it right in that specialization, let's keep it here + // for basic_regex implementations that potentially can compare + // line_chars as they compare them with expressions of other types (see + // below). + // + bool + operator< (const line_char&, const line_char&); + + inline bool + operator!= (const line_char& l, const line_char& r) + { + return !(l == r); + } + + inline bool + operator<= (const line_char& l, const line_char& r) + { + return l < r || l == r; + } + + // Provide basic_regex (such as from msvcrt) with the ability to + // compare line_char to a value of an integral or + // implementation-specific enum type. In the absense of the following + // template operators, such a comparisons would be ambigious for + // integral types (given that there are implicit conversions + // int->line_char and line_char->char) and impossible for enums. + // + // Note that these == and < operators can succeed only for a line_char + // of the special type. For other types they always return false. That + // in particular leads to the following case: + // + // (lc != c) != (lc < c || c < lc). + // + // Note that we can not assert line_char is of the special type as + // basic_regex (such as from libc++) may need the ability to check if + // arbitrary line_char belongs to some special characters range (like + // ['0', '9']). + // + template + struct line_char_cmp + : public std::enable_if::value || + (std::is_enum::value && + !std::is_same::value)> {}; + + template ::type> + bool + operator== (const line_char& l, const T& r) + { + return l.type () == line_type::special && + static_cast (l.special ()) == r; + } + + template ::type> + bool + operator== (const T& l, const line_char& r) + { + return r.type () == line_type::special && + static_cast (r.special ()) == l; + } + + template ::type> + bool + operator!= (const line_char& l, const T& r) + { + return !(l == r); + } + + template ::type> + bool + operator!= (const T& l, const line_char& r) + { + return !(l == r); + } + + template ::type> + bool + operator< (const line_char& l, const T& r) + { + return l.type () == line_type::special && + static_cast (l.special ()) < r; + } + + template ::type> + bool + operator< (const T& l, const line_char& r) + { + return r.type () == line_type::special && + l < static_cast (r.special ()); + } + + template ::type> + inline bool + operator<= (const line_char& l, const T& r) + { + return l < r || l == r; + } + + template ::type> + inline bool + operator<= (const T& l, const line_char& r) + { + return l < r || l == r; + } + + using line_string = std::basic_string; + + // Locale that has ctype facet installed. Used in the + // regex_traits specialization (see below). + // + class line_char_locale: public std::locale + { + public: + // Create a copy of the global C++ locale. + // + line_char_locale (); + }; + + // Initialize the script regex global state. Should be called once + // prior to creating objects of types from this namespace. Note: not + // thread-safe. + // + void + init (); + } + } +} + +// Standard template specializations for line_char that are required for the +// basic_regex instantiation. +// +namespace std +{ + template <> + class char_traits + { + public: + using char_type = build2::script::regex::line_char; + using int_type = char_type; + using off_type = char_traits::off_type; + using pos_type = char_traits::pos_type; + using state_type = char_traits::state_type; + + static void + assign (char_type& c1, const char_type& c2) {c1 = c2;} + + static char_type* + assign (char_type*, size_t, char_type); + + // Note that eq() and lt() are not constexpr (as required by C++11) + // because == and < operators for char_type are not constexpr. + // + static bool + eq (const char_type& l, const char_type& r) {return l == r;} + + static bool + lt (const char_type& l, const char_type& r) {return l < r;} + + static char_type* + move (char_type*, const char_type*, size_t); + + static char_type* + copy (char_type*, const char_type*, size_t); + + static int + compare (const char_type*, const char_type*, size_t); + + static size_t + length (const char_type*); + + static const char_type* + find (const char_type*, size_t, const char_type&); + + static constexpr char_type + to_char_type (const int_type& c) {return c;} + + static constexpr int_type + to_int_type (const char_type& c) {return int_type (c);} + + // Note that the following functions are not constexpr (as required by + // C++11) because their return expressions are not constexpr. + // + static bool + eq_int_type (const int_type& l, const int_type& r) {return l == r;} + + static int_type eof () {return char_type::eof;} + + static int_type + not_eof (const int_type& c) + { + return c != char_type::eof ? c : char_type::nul; + } + }; + + // ctype<> must be derived from both ctype_base and locale::facet (the later + // supports ref-counting used by the std::locale implementation internally). + // + // msvcrt for some reason also derives ctype_base from locale::facet which + // produces "already a base-class" warning and effectivelly breaks the + // reference counting. So we derive from ctype_base only in this case. + // + template <> + class ctype: public ctype_base +#if !defined(_MSC_VER) || _MSC_VER >= 2000 + , public locale::facet +#endif + { + // Used by the implementation only. + // + using line_type = build2::script::regex::line_type; + + public: + using char_type = build2::script::regex::line_char; + + static locale::id id; + +#if !defined(_MSC_VER) || _MSC_VER >= 2000 + explicit + ctype (size_t refs = 0): locale::facet (refs) {} +#else + explicit + ctype (size_t refs = 0): ctype_base (refs) {} +#endif + + // While unnecessary, let's keep for completeness. + // + virtual + ~ctype () override = default; + + // The C++ standard requires the following functions to call their virtual + // (protected) do_*() counterparts that provide the real implementations. + // The only purpose for this indirection is to provide a user with the + // ability to customize existing (standard) ctype facets. As we do not + // provide such an ability, for simplicity we will omit the do_*() + // functions and provide the implementations directly. This should be safe + // as nobody except us could call those protected functions. + // + bool + is (mask m, char_type c) const + { + return m == + (c.type () == line_type::special && c.special () >= 0 && + build2::digit (static_cast (c.special ())) + ? digit + : 0); + } + + const char_type* + is (const char_type*, const char_type*, mask*) const; + + const char_type* + scan_is (mask, const char_type*, const char_type*) const; + + const char_type* + scan_not (mask, const char_type*, const char_type*) const; + + char_type + toupper (char_type c) const {return c;} + + const char_type* + toupper (char_type*, const char_type* e) const {return e;} + + char_type + tolower (char_type c) const {return c;} + + const char_type* + tolower (char_type*, const char_type* e) const {return e;} + + char_type + widen (char c) const {return char_type (c);} + + const char* + widen (const char*, const char*, char_type*) const; + + char + narrow (char_type c, char def) const + { + return c.type () == line_type::special ? c.special () : def; + } + + const char_type* + narrow (const char_type*, const char_type*, char, char*) const; + }; + + // Note: the current application locale must be POSIX. Otherwise the + // behavior is undefined. + // + template <> + class regex_traits + { + public: + using char_type = build2::script::regex::line_char; + using string_type = build2::script::regex::line_string; + using locale_type = build2::script::regex::line_char_locale; + using char_class_type = regex_traits::char_class_type; + + // Workaround for msvcrt bugs. For some reason it assumes such a members + // to be present in a regex_traits specialization. + // +#if defined(_MSC_VER) && _MSC_VER < 2000 + static const ctype_base::mask _Ch_upper = ctype_base::upper; + static const ctype_base::mask _Ch_alpha = ctype_base::alpha; + + // Unsigned numeric type. msvcrt normally casts characters to this type + // for comparing with some numeric values or for calculating an index in + // some bit array. Luckily that all relates to the character class + // handling that we don't support. + // + using _Uelem = unsigned int; +#endif + + regex_traits () = default; // Unnecessary but let's keep for completeness. + + static size_t + length (const char_type* p) {return string_type::traits_type::length (p);} + + char_type + translate (char_type c) const {return c;} + + // Case-insensitive matching is not supported by line_regex. So there is no + // reason for the function to be called. + // + char_type + translate_nocase (char_type c) const {assert (false); return c;} + + // Return a sort-key - the exact copy of [b, e). + // + template + string_type + transform (I b, I e) const {return string_type (b, e);} + + // Return a case-insensitive sort-key. Case-insensitive matching is not + // supported by line_regex. So there is no reason for the function to be + // called. + // + template + string_type + transform_primary (I b, I e) const + { + assert (false); + return string_type (b, e); + } + + // POSIX regex grammar and collating elements (e.g., [.tilde.]) in + // particular are not supported. So there is no reason for the function to + // be called. + // + template + string_type + lookup_collatename (I, I) const {assert (false); return string_type ();} + + // Character classes (e.g., [:lower:]) are not supported. So there is no + // reason for the function to be called. + // + template + char_class_type + lookup_classname (I, I, bool = false) const + { + assert (false); + return char_class_type (); + } + + // Return false as we don't support character classes (e.g., [:lower:]). + // + bool + isctype (char_type, char_class_type) const {return false;} + + int + value (char_type, int) const; + + // Return the locale passed as an argument as we do not expect anything + // other than POSIX locale, that we also assume to be imbued by default. + // + locale_type + imbue (locale_type l) {return l;} + + locale_type + getloc () const {return locale_type ();} + }; + + // We assume line_char to be an unsigned type and express that with the + // following specialization used by basic_regex implementations. + // + // libstdc++ defines unsigned CharT type (regex_traits template parameter) + // to use as an index in some internal cache regardless if the cache is used + // for this specialization (and the cache is used only if CharT is char). + // + template <> + struct make_unsigned + { + using type = build2::script::regex::line_char; + }; + + // When used with libc++ the linker complains that it can't find + // __match_any_but_newline::__exec() function. The problem is + // that the function is only specialized for char and wchar_t + // (LLVM bug #31409). As line_char has no notion of the newline character we + // specialize the class template to behave as the __match_any + // instantiation does (that luckily has all the functions in place). + // +#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION <= 9000 + template <> + class __match_any_but_newline + : public __match_any + { + public: + using base = __match_any; + using base::base; + }; +#endif +} + +namespace build2 +{ + namespace script + { + namespace regex + { + class line_regex: public std::basic_regex + { + public: + using base_type = std::basic_regex; + + using base_type::base_type; + + line_regex () = default; + + // Move string regex together with the pool used to create it. + // + line_regex (line_string&& s, line_pool&& p) + // No move-string ctor for base_type, so emulate it. + // + : base_type (s), pool (move (p)) {s.clear ();} + + // Move constuctible/assignable-only type. + // + line_regex (line_regex&&) = default; + line_regex (const line_regex&) = delete; + line_regex& operator= (line_regex&&) = default; + line_regex& operator= (const line_regex&) = delete; + + public: + line_pool pool; + }; + } + } +} + +#include + +#endif // LIBBUILD2_SCRIPT_REGEX_HXX diff --git a/libbuild2/script/regex.ixx b/libbuild2/script/regex.ixx new file mode 100644 index 0000000..e72b578 --- /dev/null +++ b/libbuild2/script/regex.ixx @@ -0,0 +1,31 @@ +// file : libbuild2/script/regex.ixx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +namespace build2 +{ + namespace script + { + namespace regex + { + inline char_flags + operator&= (char_flags& x, char_flags y) + { + return x = static_cast ( + static_cast (x) & static_cast (y)); + } + + inline char_flags + operator|= (char_flags& x, char_flags y) + { + return x = static_cast ( + static_cast (x) | static_cast (y)); + } + + inline char_flags + operator& (char_flags x, char_flags y) {return x &= y;} + + inline char_flags + operator| (char_flags x, char_flags y) {return x |= y;} + } + } +} diff --git a/libbuild2/script/regex.test.cxx b/libbuild2/script/regex.test.cxx new file mode 100644 index 0000000..36d47e1 --- /dev/null +++ b/libbuild2/script/regex.test.cxx @@ -0,0 +1,303 @@ +// file : libbuild2/script/regex.test.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include +#include // is_* + +#include + +using namespace std; +using namespace build2::script::regex; + +int +main () +{ + using lc = line_char; + using ls = line_string; + using lr = line_regex; + using cf = char_flags; + using cr = char_regex; + + init (); // Initializes the script regex global state. + + // Test line_char. + // + { + static_assert (is_trivial::value && + is_standard_layout::value && + !is_array::value, + "line_char must be char-like"); + + // Zero-initialed line_char should be the null-char as required by + // char_traits<>::length() specification. + // + assert (lc () == lc::nul); + + line_pool p; + + assert (lc::eof == -1); + assert (lc::nul == 0); + + enum meta {mn = 'n', mp = 'p'}; + + // Special roundtrip. + // + assert (lc ('0').special () == '0'); + assert (lc (0).special () == 0); + assert (lc (-1).special () == -1); + assert (lc ('p').special () == 'p'); + assert (lc (u'\u2028').special () == u'\u2028'); + + // Special comparison. + // + assert (lc ('0') == lc ('0')); + assert (lc ('0') == '0'); + assert (lc ('n') == mn); + assert (mn == static_cast (lc ('n'))); + + assert (lc ('0') != lc ('1')); + assert (lc ('0') != '1'); + assert (lc ('n') != mp); + assert (lc ('0') != lc ("0", p)); + assert (lc ('0') != lc (cr ("0"), p)); + + assert (lc ('0') < lc ('1')); + assert (lc ('0') < '1'); + assert (lc ('1') < lc ("0", p)); + assert (lc ('n') < mp); + + assert (lc ('0') <= '1'); + assert (lc ('0') <= lc ('1')); + assert (lc ('n') <= mn); + assert (lc ('1') <= lc ("0", p)); + + // Literal roundtrip. + // + assert (*lc ("abc", p).literal () == "abc"); + + // Literal comparison. + // + assert (lc ("a", p) == lc ("a", p)); + assert (lc ("a", p).literal () == lc ("a", p).literal ()); + assert (char (lc ("a", p)) == '\a'); + + assert (lc ("a", p) != lc ("b", p)); + assert (!(lc ("a", p) != lc (cr ("a"), p))); + assert (lc ("a", p) != lc (cr ("b"), p)); + + assert (lc ("a", p) < lc ("b", p)); + assert (!(lc ("a", p) < lc (cr ("a"), p))); + + assert (lc ("a", p) <= lc ("b", p)); + assert (lc ("a", p) <= lc (cr ("a"), p)); + assert (lc ("a", p) < lc (cr ("c"), p)); + + // Regex roundtrip. + // + assert (regex_match ("abc", *lc (cr ("abc"), p).regex ())); + + // Regex flags. + // + // icase + // + assert (regex_match ("ABC", cr ("abc", cf::icase))); + + // idot + // + assert (!regex_match ("a", cr ("[.]", cf::idot))); + assert (!regex_match ("a", cr ("[\\.]", cf::idot))); + + assert (regex_match ("a", cr ("."))); + assert (!regex_match ("a", cr (".", cf::idot))); + assert (regex_match ("a", cr ("\\.", cf::idot))); + assert (!regex_match ("a", cr ("\\."))); + + // regex::transform() + // + // The function is static and we can't test it directly. So we will test + // it indirectly via regex matches. + // + // @@ Would be nice to somehow address the inability to test internals (not + // exposed via headers). As a part of utility library support? + // + assert (regex_match (".a[.", cr (".\\.\\[[.]", cf::idot))); + assert (regex_match (".a[.", cr (".\\.\\[[\\.]", cf::idot))); + assert (!regex_match ("ba[.", cr (".\\.\\[[.]", cf::idot))); + assert (!regex_match (".a[b", cr (".\\.\\[[.]", cf::idot))); + assert (!regex_match (".a[b", cr (".\\.\\[[\\.]", cf::idot))); + + // Regex comparison. + // + assert (lc ("a", p) == lc (cr ("a|b"), p)); + assert (lc (cr ("a|b"), p) == lc ("a", p)); + } + + // Test char_traits. + // + { + using ct = char_traits; + using vc = vector; + + lc c; + ct::assign (c, '0'); + assert (c == ct::char_type ('0')); + + assert (ct::to_char_type (c) == c); + assert (ct::to_int_type (c) == c); + + assert (ct::eq_int_type (c, c)); + assert (!ct::eq_int_type (c, lc::eof)); + + assert (ct::eof () == lc::eof); + + assert (ct::not_eof (c) == c); + assert (ct::not_eof (lc::eof) != lc::eof); + + ct::assign (&c, 1, '1'); + assert (c == ct::int_type ('1')); + + assert (ct::eq (lc ('0'), lc ('0'))); + assert (ct::lt (lc ('0'), lc ('1'))); + + vc v1 ({'0', '1', '2'}); + vc v2 (3, lc::nul); + + assert (ct::find (v1.data (), 3, '1') == v1.data () + 1); + + ct::copy (v2.data (), v1.data (), 3); + assert (v2 == v1); + + v2.push_back (lc::nul); + assert (ct::length (v2.data ()) == 3); + + // Overlaping ranges. + // + ct::move (v1.data () + 1, v1.data (), 2); + assert (v1 == vc ({'0', '0', '1'})); + + v1 = vc ({'0', '1', '2'}); + ct::move (v1.data (), v1.data () + 1, 2); + assert (v1 == vc ({'1', '2', '2'})); + } + + // Test line_char_locale and ctype (only non-trivial functions). + // + { + using ct = ctype; + + line_char_locale l; + + // It is better not to create q facet on stack as it is + // reference-countable. + // + const ct& t (use_facet (l)); + line_pool p; + + assert (t.is (ct::digit, '0')); + assert (!t.is (ct::digit, '?')); + assert (!t.is (ct::digit, lc ("0", p))); + + const lc chars[] = { '0', '?' }; + ct::mask m[2]; + + const lc* b (chars); + const lc* e (chars + 2); + + // Cast flag value to mask type and compare to mask. + // + auto fl = [] (ct::mask m, ct::mask f) {return m == f;}; + + t.is (b, e, m); + assert (fl (m[0], ct::digit) && fl (m[1], 0)); + + assert (t.scan_is (ct::digit, b, e) == b); + assert (t.scan_is (0, b, e) == b + 1); + + assert (t.scan_not (ct::digit, b, e) == b + 1); + assert (t.scan_not (0, b, e) == b); + + { + char nr[] = "0?"; + lc wd[2]; + t.widen (nr, nr + 2, wd); + assert (wd[0] == b[0] && wd[1] == b[1]); + } + + { + lc wd[] = {'0', lc ("a", p)}; + char nr[2]; + t.narrow (wd, wd + 2, '-', nr); + assert (nr[0] == '0' && nr[1] == '-'); + } + } + + // Test regex_traits. Functions other that value() are trivial. + // + { + regex_traits t; + + const int radix[] = {8, 10}; // Radix 16 is not supported by line_char. + const char digits[] = "0123456789ABCDEF"; + + for (size_t r (0); r < 2; ++r) + { + for (int i (0); i < radix[r]; ++i) + assert (t.value (digits[i], radix[r]) == i); + } + } + + // Test line_regex construction. + // + { + line_pool p; + lr r1 ({lc ("foo", p), lc (cr ("ba(r|z)"), p)}, move (p)); + + lr r2 (move (r1)); + assert (regex_match (ls ({lc ("foo", r2.pool), lc ("bar", r2.pool)}), r2)); + assert (!regex_match (ls ({lc ("foo", r2.pool), lc ("ba", r2.pool)}), r2)); + } + + // Test line_regex match. + // + { + line_pool p; + + const lc foo ("foo", p); + const lc bar ("bar", p); + const lc baz ("baz", p); + const lc blank ("", p); + + assert (regex_match (ls ({foo, bar}), lr ({foo, bar}))); + assert (!regex_match (ls ({foo, baz}), lr ({foo, bar}))); + + assert (regex_match (ls ({bar, foo}), + lr ({'(', foo, '|', bar, ')', '+'}))); + + assert (regex_match (ls ({foo, foo, bar}), + lr ({'(', foo, ')', '\\', '1', bar}))); + + assert (regex_match (ls ({foo}), lr ({lc (cr ("fo+"), p)}))); + assert (regex_match (ls ({foo}), lr ({lc (cr (".*"), p)}))); + assert (regex_match (ls ({blank}), lr ({lc (cr (".*"), p)}))); + + assert (regex_match (ls ({blank, blank, foo}), + lr ({blank, '*', foo, blank, '*'}))); + + assert (regex_match (ls ({blank, blank, foo}), lr ({'.', '*'}))); + + assert (regex_match (ls ({blank, blank}), + lr ({blank, '*', foo, '?', blank, '*'}))); + + assert (regex_match (ls ({foo}), lr ({foo, '{', '1', '}'}))); + assert (regex_match (ls ({foo, foo}), lr ({foo, '{', '1', ',', '}'}))); + + assert (regex_match (ls ({foo, foo}), + lr ({foo, '{', '1', ',', '2', '}'}))); + + assert (!regex_match (ls ({foo, foo}), + lr ({foo, '{', '3', ',', '4', '}'}))); + + assert (regex_match (ls ({foo}), lr ({'(', '?', '=', foo, ')', foo}))); + assert (regex_match (ls ({foo}), lr ({'(', '?', '!', bar, ')', foo}))); + } +} diff --git a/libbuild2/script/run.cxx b/libbuild2/script/run.cxx new file mode 100644 index 0000000..38436b9 --- /dev/null +++ b/libbuild2/script/run.cxx @@ -0,0 +1,2020 @@ +// file : libbuild2/script/run.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include // streamsize + +#include +#include +#include // fdopen_mode, fddup() +#include // path_search() +#include + +#include +#include + +#include +#include + +using namespace std; +using namespace butl; + +namespace build2 +{ + namespace script + { + string + diag_path (const path& d) + { + string r ("'"); + + r += stream_verb_map ().path < 1 + ? diag_relative (d) + : d.representation (); + + r += '\''; + return r; + } + + string + diag_path (const dir_name_view& dn) + { + string r; + if (dn.name != nullptr && *dn.name) + { + r += **dn.name; + r += ' '; + } + + assert (dn.path != nullptr); + + r += diag_path (*dn.path); + return r; + } + + // Return the environment temporary directory, creating it if it doesn't + // exist. + // + static inline const dir_path& + temp_dir (environment& env) + { + if (env.temp_dir.empty ()) + env.create_temp_dir (); + + return env.temp_dir; + } + + // Normalize a path. Also make the relative path absolute using the + // specified directory unless it is already absolute. + // + static path + normalize (path p, const dir_path& d, const location& l) + { + path r (p.absolute () ? move (p) : d / move (p)); + + try + { + r.normalize (); + } + catch (const invalid_path& e) + { + fail (l) << "invalid file path " << e.path; + } + + return r; + } + + // Check if a path is not empty, the referenced file exists and is not + // empty. + // + static bool + non_empty (const path& p, const location& ll) + { + if (p.empty () || !exists (p)) + return false; + + try + { + ifdstream is (p); + return is.peek () != ifdstream::traits_type::eof (); + } + catch (const io_error& e) + { + // While there can be no fault of the script command being currently + // executed let's add the location anyway to ease the + // troubleshooting. And let's stick to that principle down the road. + // + fail (ll) << "unable to read " << p << ": " << e << endf; + } + } + + // If the file exists, not empty and not larger than 4KB print it to the + // diag record. The file content goes from the new line and is not + // indented. + // + static void + print_file (diag_record& d, const path& p, const location& ll) + { + if (exists (p)) + { + try + { + ifdstream is (p, ifdstream::badbit); + + if (is.peek () != ifdstream::traits_type::eof ()) + { + char buf[4096 + 1]; // Extra byte is for terminating '\0'. + + // Note that the string is always '\0'-terminated with a maximum + // sizeof (buf) - 1 bytes read. + // + is.getline (buf, sizeof (buf), '\0'); + + // Print if the file fits 4KB-size buffer. Note that if it + // doesn't the failbit is set. + // + if (is.eof ()) + { + // Suppress the trailing newline character as the diag record + // adds it's own one when flush. + // + streamsize n (is.gcount ()); + assert (n > 0); + + // Note that if the file contains '\0' it will also be counted + // by gcount(). But even in the worst case we will stay in the + // buffer boundaries (and so not crash). + // + if (buf[n - 1] == '\n') + buf[n - 1] = '\0'; + + d << '\n' << buf; + } + } + } + catch (const io_error& e) + { + fail (ll) << "unable to read " << p << ": " << e; + } + } + } + + // Save a string to the file. Fail if exception is thrown by underlying + // operations. + // + static void + save (const path& p, const string& s, const location& ll) + { + try + { + ofdstream os (p); + os << s; + os.close (); + } + catch (const io_error& e) + { + fail (ll) << "unable to write to " << p << ": " << e; + } + } + + // Transform string according to here-* redirect modifiers from the {/} + // set. + // + static string + transform (const string& s, + bool regex, + const string& modifiers, + environment& env) + { + if (modifiers.find ('/') == string::npos) + return s; + + // For targets other than Windows leave the string intact. + // + if (env.host.class_ != "windows") + return s; + + // Convert forward slashes to Windows path separators (escape for + // regex). + // + string r; + for (size_t p (0);;) + { + size_t sp (s.find ('/', p)); + + if (sp != string::npos) + { + r.append (s, p, sp - p); + r.append (regex ? "\\\\" : "\\"); + p = sp + 1; + } + else + { + r.append (s, p, sp); + break; + } + } + + return r; + } + + // Return true if the script temporary directory is not created yet (and + // so cannot contain any path), a path is not under the temporary + // directory or this directory will not be removed on failure. + // + static inline bool + avail_on_failure (const path& p, const environment& env) + { + return env.temp_dir.empty () || + env.temp_dir_keep || + !p.sub (env.temp_dir); + } + + // Check if the script command output matches the expected result + // (redirect value). Noop for redirect types other than none, here_*. + // + static bool + check_output (const path& pr, + const path& op, + const path& ip, + const redirect& rd, + const location& ll, + environment& env, + bool diag, + const char* what) + { + auto input_info = [&ip, &ll, &env] (diag_record& d) + { + if (non_empty (ip, ll) && avail_on_failure (ip, env)) + d << info << "stdin: " << ip; + }; + + auto output_info = [&what, &ll, &env] (diag_record& d, + const path& p, + const char* prefix = "", + const char* suffix = "") + { + if (non_empty (p, ll)) + { + if (avail_on_failure (p, env)) + d << info << prefix << what << suffix << ": " << p; + } + else + d << info << prefix << what << suffix << " is empty"; + }; + + if (rd.type == redirect_type::none) + { + // Check that there is no output produced. + // + assert (!op.empty ()); + + if (!non_empty (op, ll)) + return true; + + if (diag) + { + diag_record d (error (ll)); + d << pr << " unexpectedly writes to " << what; + + if (avail_on_failure (op, env)) + d << info << what << ": " << op; + + input_info (d); + + // Print cached output. + // + print_file (d, op, ll); + } + + // Fall through (to return false). + // + } + else if (rd.type == redirect_type::here_str_literal || + rd.type == redirect_type::here_doc_literal || + (rd.type == redirect_type::file && + rd.file.mode == redirect_fmode::compare)) + { + // The expected output is provided as a file or as a string. Save the + // string to a file in the later case. + // + assert (!op.empty ()); + + path eop; + + if (rd.type == redirect_type::file) + eop = normalize (rd.file.path, *env.work_dir.path, ll); + else + { + eop = path (op + ".orig"); + + save (eop, + transform (rd.str, false /* regex */, rd.modifiers (), env), + ll); + + env.clean_special (eop); + } + + // Use the diff utility for comparison. + // + path dp ("diff"); + process_path pp (run_search (dp, true)); + + cstrings args {pp.recall_string ()}; + + // If both files being compared won't be available on failure, then + // instruct diff not to print the file paths. It seems that the only + // way to achieve this is to abandon the output unified format in the + // favor of the minimal output, which normally is still informative + // enough for the troubleshooting (contains the difference line + // numbers, etc). + // + if (avail_on_failure (eop, env) || avail_on_failure (op, env)) + args.push_back ("-u"); + + // Ignore Windows newline fluff if that's what we are running on. + // + if (env.host.class_ == "windows") + args.push_back ("--strip-trailing-cr"); + + args.push_back (eop.string ().c_str ()); + args.push_back (op.string ().c_str ()); + args.push_back (nullptr); + + if (verb >= 2) + print_process (args); + + try + { + // Save diff's stdout to a file for troubleshooting and for the + // optional (if not too large) printing (at the end of + // diagnostics). + // + path ep (op + ".diff"); + auto_fd efd; + + try + { + efd = fdopen (ep, fdopen_mode::out | fdopen_mode::create); + env.clean_special (ep); + } + catch (const io_error& e) + { + fail (ll) << "unable to write to " << ep << ": " << e; + } + + // Diff utility prints the differences to stdout. But for the + // user it is a part of the script failure diagnostics so let's + // redirect stdout to stderr. + // + process p (pp, args.data (), 0, 2, efd.get ()); + efd.reset (); + + if (p.wait ()) + return true; + + assert (p.exit); + const process_exit& pe (*p.exit); + + // Note that both POSIX and GNU diff report error by exiting with + // the code > 1. + // + if (!pe.normal () || pe.code () > 1) + { + diag_record d (fail (ll)); + print_process (d, args); + d << " " << pe; + + print_file (d, ep, ll); + } + + // Output doesn't match the expected result. + // + if (diag) + { + diag_record d (error (ll)); + d << pr << " " << what << " doesn't match expected"; + + output_info (d, op); + output_info (d, eop, "expected "); + output_info (d, ep, "", " diff"); + input_info (d); + + print_file (d, ep, ll); + } + + // Fall through (to return false). + // + } + catch (const process_error& e) + { + error (ll) << "unable to execute " << pp << ": " << e; + + if (e.child) + exit (1); + + throw failed (); + } + } + else if (rd.type == redirect_type::here_str_regex || + rd.type == redirect_type::here_doc_regex) + { + // The overall plan is: + // + // 1. Create regex line string. While creating it's line characters + // transform regex lines according to the redirect modifiers. + // + // 2. Create line regex using the line string. If creation fails + // then save the (transformed) regex redirect to a file for + // troubleshooting. + // + // 3. Parse the output into the literal line string. + // + // 4. Match the output line string with the line regex. + // + // 5. If match fails save the (transformed) regex redirect to a file + // for troubleshooting. + // + using namespace regex; + + assert (!op.empty ()); + + // Create regex line string. + // + line_pool pool; + line_string rls; + const regex_lines rl (rd.regex); + + // Parse regex flags. + // + // When add support for new flags don't forget to update + // parse_regex(). + // + auto parse_flags = [] (const string& f) -> char_flags + { + char_flags r (char_flags::none); + + for (char c: f) + { + switch (c) + { + case 'd': r |= char_flags::idot; break; + case 'i': r |= char_flags::icase; break; + default: assert (false); // Error so should have been checked. + } + } + + return r; + }; + + // Return original regex line with the transformation applied. + // + auto line = [&rl, &rd, &env] (const regex_line& l) -> string + { + string r; + if (l.regex) // Regex (possibly empty), + { + r += rl.intro; + r += transform (l.value, true /* regex */, rd.modifiers (), env); + r += rl.intro; + r += l.flags; + } + else if (!l.special.empty ()) // Special literal. + r += rl.intro; + else // Textual literal. + r += transform (l.value, false /* regex */, rd.modifiers (), env); + + r += l.special; + return r; + }; + + // Return regex line location. + // + // Note that we rely on the fact that the command and regex lines + // are always belong to the same file. + // + auto loc = [&ll] (uint64_t line, uint64_t column) -> location + { + location r (ll); + r.line = line; + r.column = column; + return r; + }; + + // Save the regex to file for troubleshooting, return the file path + // it have been saved to. + // + // Note that we save the regex on line regex creation failure or if + // the program output doesn't match. + // + auto save_regex = [&op, &rl, &rd, &ll, &line] () -> path + { + path rp (op + ".regex"); + + // Encode here-document regex global flags if present as a file + // name suffix. For example if icase and idot flags are specified + // the name will look like: + // + // stdout.regex-di + // + if (rd.type == redirect_type::here_doc_regex && !rl.flags.empty ()) + rp += '-' + rl.flags; + + // Note that if would be more efficient to directly write chunks + // to file rather than to compose a string first. Hower we don't + // bother (about performance) for the sake of the code as we + // already failed. + // + string s; + for (auto b (rl.lines.cbegin ()), i (b), e (rl.lines.cend ()); + i != e; ++i) + { + if (i != b) s += '\n'; + s += line (*i); + } + + save (rp, s, ll); + return rp; + }; + + // Finally create regex line string. + // + // Note that diagnostics doesn't refer to the program path as it is + // irrelevant to failures at this stage. + // + char_flags gf (parse_flags (rl.flags)); // Regex global flags. + + for (const auto& l: rl.lines) + { + if (l.regex) // Regex (with optional special characters). + { + line_char c; + + // Empty regex is a special case repesenting the blank line. + // + if (l.value.empty ()) + c = line_char ("", pool); + else + { + try + { + string s (transform (l.value, + true /* regex */, + rd.modifiers (), + env)); + + c = line_char ( + char_regex (s, gf | parse_flags (l.flags)), pool); + } + catch (const regex_error& e) + { + // Print regex_error description if meaningful. + // + diag_record d (fail (loc (l.line, l.column))); + + if (rd.type == redirect_type::here_str_regex) + d << "invalid " << what << " regex redirect" << e << + info << "regex: '" << line (l) << "'"; + else + d << "invalid char-regex in " << what << " regex redirect" + << e << + info << "regex line: '" << line (l) << "'"; + + d << endf; + } + } + + rls += c; // Append blank literal or regex line char. + } + else if (!l.special.empty ()) // Special literal. + { + // Literal can not be followed by special characters in the same + // line. + // + assert (l.value.empty ()); + } + else // Textual literal. + { + // Append literal line char. + // + rls += line_char (transform (l.value, + false /* regex */, + rd.modifiers (), + env), + pool); + } + + for (char c: l.special) + { + if (line_char::syntax (c)) + rls += line_char (c); // Append special line char. + else + fail (loc (l.line, l.column)) + << "invalid syntax character '" << c << "' in " << what + << " regex redirect" << + info << "regex line: '" << line (l) << "'"; + } + } + + // Create line regex. + // + line_regex regex; + + try + { + regex = line_regex (move (rls), move (pool)); + } + catch (const regex_error& e) + { + // Note that line regex creation can not fail for here-string + // redirect as it doesn't have syntax line chars. That in + // particular means that end_line and end_column are meaningful. + // + assert (rd.type == redirect_type::here_doc_regex); + + diag_record d (fail (loc (rd.end_line, rd.end_column))); + + // Print regex_error description if meaningful. + // + d << "invalid " << what << " regex redirect" << e; + + // It would be a waste to save the regex into the file just to + // remove it. + // + if (env.temp_dir_keep) + output_info (d, save_regex (), "", " regex"); + } + + // Parse the output into the literal line string. + // + line_string ls; + + try + { + // Do not throw when eofbit is set (end of stream reached), and + // when failbit is set (getline() failed to extract any character). + // + // Note that newlines are treated as line-chars separators. That + // in particular means that the trailing newline produces a blank + // line-char (empty literal). Empty output produces the zero-length + // line-string. + // + // Also note that we strip the trailing CR characters (otherwise + // can mismatch when, for example, cross-testing). + // + ifdstream is (op, ifdstream::badbit); + is.peek (); // Sets eofbit for an empty stream. + + while (!is.eof ()) + { + string s; + getline (is, s); + + // It is safer to strip CRs in cycle, as msvcrt unexplainably + // adds too much trailing junk to the system_error descriptions, + // and so it can appear in programs output. For example: + // + // ...: Invalid data.\r\r\n + // + // Note that our custom operator<<(ostream&, const exception&) + // removes this junk. + // + while (!s.empty () && s.back () == '\r') + s.pop_back (); + + ls += line_char (move (s), regex.pool); + } + } + catch (const io_error& e) + { + fail (ll) << "unable to read " << op << ": " << e; + } + + // Match the output with the regex. + // + if (regex_match (ls, regex)) // Doesn't throw. + return true; + + // Output doesn't match the regex. + // + // Unless the temporary directory is removed on failure, we save the + // regex to file for troubleshooting regardless of whether we print + // the diagnostics or not. We, however, register it for cleanup in the + // later case (the expression may still succeed, we can be evaluating + // the if condition, etc). + // + optional rp; + if (env.temp_dir_keep) + rp = save_regex (); + + if (diag) + { + diag_record d (error (ll)); + d << pr << " " << what << " doesn't match regex"; + + output_info (d, op); + + if (rp) + output_info (d, *rp, "", " regex"); + + input_info (d); + + // Print cached output. + // + print_file (d, op, ll); + } + else if (rp) + env.clean_special (*rp); + + // Fall through (to return false). + // + } + else // Noop. + return true; + + return false; + } + + // The exit pseudo-builtin: exit the script successfully, or print the + // diagnostics and exit the script unsuccessfully. Always throw exit + // exception. + // + // exit [] + // + [[noreturn]] static void + exit_builtin (const strings& args, const location& ll) + { + auto i (args.begin ()); + auto e (args.end ()); + + // Process arguments. + // + // If no argument is specified, then exit successfully. Otherwise, + // print the diagnostics and exit unsuccessfully. + // + if (i == e) + throw exit (true); + + const string& s (*i++); + + if (i != e) + fail (ll) << "unexpected argument '" << *i << "'"; + + error (ll) << s; + throw exit (false); + } + + // The set pseudo-builtin: set variable from the stdin input. + // + // set [-e|--exact] [(-n|--newline)|(-w|--whitespace)] [] + // + static void + set_builtin (environment& env, + const strings& args, + auto_fd in, + const location& ll) + { + try + { + // Do not throw when eofbit is set (end of stream reached), and + // when failbit is set (read operation failed to extract any + // character). + // + ifdstream cin (move (in), ifdstream::badbit); + + // Parse arguments. + // + cli::vector_scanner scan (args); + set_options ops (scan); + + if (ops.whitespace () && ops.newline ()) + fail (ll) << "both -n|--newline and -w|--whitespace specified"; + + if (!scan.more ()) + fail (ll) << "missing variable name"; + + string a (scan.next ()); // Either attributes or variable name. + const string* ats (!scan.more () ? nullptr : &a); + string vname (!scan.more () ? move (a) : scan.next ()); + + if (scan.more ()) + fail (ll) << "unexpected argument '" << scan.next () << "'"; + + if (ats != nullptr && ats->empty ()) + fail (ll) << "empty variable attributes"; + + if (vname.empty ()) + fail (ll) << "empty variable name"; + + // Read the input. + // + cin.peek (); // Sets eofbit for an empty stream. + + names ns; + while (!cin.eof ()) + { + // Read next element that depends on the whitespace mode being + // enabled or not. For the later case it also make sense to strip + // the trailing CRs that can appear while, for example, + // cross-testing Windows target or as a part of msvcrt junk + // production (see above). + // + string s; + if (ops.whitespace ()) + cin >> s; + else + { + getline (cin, s); + + while (!s.empty () && s.back () == '\r') + s.pop_back (); + } + + // If failbit is set then we read nothing into the string as eof is + // reached. That in particular means that the stream has trailing + // whitespaces (possibly including newlines) if the whitespace mode + // is enabled, or the trailing newline otherwise. If so then + // we append the "blank" to the variable value in the exact mode + // prior to bailing out. + // + if (cin.fail ()) + { + if (ops.exact ()) + { + if (ops.whitespace () || ops.newline ()) + ns.emplace_back (move (s)); // Reuse empty string. + else if (ns.empty ()) + ns.emplace_back ("\n"); + else + ns[0].value += '\n'; + } + + break; + } + + if (ops.whitespace () || ops.newline () || ns.empty ()) + ns.emplace_back (move (s)); + else + { + ns[0].value += '\n'; + ns[0].value += s; + } + } + + cin.close (); + + env.set_variable (move (vname), + move (ns), + ats != nullptr ? *ats : empty_string, + ll); + } + catch (const io_error& e) + { + fail (ll) << "set: " << e; + } + catch (const cli::exception& e) + { + fail (ll) << "set: " << e; + } + } + + // Sorted array of builtins that support filesystem entries cleanup. + // + static const char* cleanup_builtins[] = { + "cp", "ln", "mkdir", "mv", "touch"}; + + static inline bool + cleanup_builtin (const string& name) + { + return binary_search ( + cleanup_builtins, + cleanup_builtins + + sizeof (cleanup_builtins) / sizeof (*cleanup_builtins), + name); + } + + static bool + run_pipe (environment& env, + command_pipe::const_iterator bc, + command_pipe::const_iterator ec, + auto_fd ifd, + size_t ci, size_t li, const location& ll, + bool diag) + { + if (bc == ec) // End of the pipeline. + return true; + + // The overall plan is to run the first command in the pipe, reading + // its input from the file descriptor passed (or, for the first + // command, according to stdin redirect specification) and redirecting + // its output to the right-hand part of the pipe recursively. Fail if + // the right-hand part fails. Otherwise check the process exit code, + // match stderr (and stdout for the last command in the pipe) according + // to redirect specification(s) and fail if any of the above fails. + // + const command& c (*bc); + + // Register the command explicit cleanups. Verify that the path being + // cleaned up is a sub-path of the script working directory. Fail if + // this is not the case. + // + for (const auto& cl: c.cleanups) + { + const path& p (cl.path); + path np (normalize (p, *env.work_dir.path, ll)); + + const string& ls (np.leaf ().string ()); + bool wc (ls == "*" || ls == "**" || ls == "***"); + const path& cp (wc ? np.directory () : np); + const dir_path* sd (env.sandbox_dir.path); + + if (sd != nullptr && !cp.sub (*sd)) + fail (ll) << (wc ? "wildcard" : + p.to_directory () ? "directory" : + "file") + << " cleanup " << p << " is out of " + << diag_path (env.sandbox_dir); + + env.clean ({cl.type, move (np)}, false); + } + + bool eq (c.exit.comparison == exit_comparison::eq); + + // If stdin file descriptor is not open then this is the first pipeline + // command. + // + bool first (ifd.get () == -1); + + command_pipe::const_iterator nc (bc + 1); + bool last (nc == ec); + + const string& program (c.program.string ()); + + const redirect& in ((c.in ? *c.in : env.in).effective ()); + + const redirect* out (!last + ? nullptr // stdout is piped. + : &(c.out ? *c.out : env.out).effective ()); + + const redirect& err ((c.err ? *c.err : env.err).effective ()); + + auto process_args = [&c] () -> cstrings + { + cstrings args {c.program.string ().c_str ()}; + + for (const auto& a: c.arguments) + args.push_back (a.c_str ()); + + args.push_back (nullptr); + return args; + }; + + // Prior to opening file descriptors for command input/output + // redirects let's check if the command is the exit builtin. Being a + // builtin syntactically it differs from the regular ones in a number + // of ways. It doesn't communicate with standard streams, so + // redirecting them is meaningless. It may appear only as a single + // command in a pipeline. It doesn't return any value and stops the + // script execution, so checking its exit status is meaningless as + // well. That all means we can short-circuit here calling the builtin + // and bailing out right after that. Checking that the user didn't + // specify any redirects or exit code check sounds like a right thing + // to do. + // + if (program == "exit") + { + // In case the builtin is erroneously pipelined from the other + // command, we will close stdin gracefully (reading out the stream + // content), to make sure that the command doesn't print any + // unwanted diagnostics about IO operation failure. + // + // Note that dtor will ignore any errors (which is what we want). + // + ifdstream is (move (ifd), fdstream_mode::skip); + + if (!first || !last) + fail (ll) << "exit builtin must be the only pipe command"; + + if (c.in) + fail (ll) << "exit builtin stdin cannot be redirected"; + + if (c.out) + fail (ll) << "exit builtin stdout cannot be redirected"; + + if (c.err) + fail (ll) << "exit builtin stderr cannot be redirected"; + + // We can't make sure that there is no exit code check. Let's, at + // least, check that non-zero code is not expected. + // + if (eq != (c.exit.code == 0)) + fail (ll) << "exit builtin exit code cannot be non-zero"; + + if (verb >= 2) + print_process (process_args ()); + + exit_builtin (c.arguments, ll); // Throws exit exception. + } + + // Create a unique path for a command standard stream cache file. + // + auto std_path = [&env, &ci, &li, &ll] (const char* n) -> path + { + using std::to_string; + + path p (n); + + // 0 if belongs to a single-line script, otherwise is the command line + // number (start from one) in the script. + // + if (li > 0) + p += "-" + to_string (li); + + // 0 if belongs to a single-command expression, otherwise is the + // command number (start from one) in the expression. + // + // Note that the name like stdin-N can relate to N-th command of a + // single-line script or to N-th single-command line of multi-line + // script. These cases are mutually exclusive and so are unambiguous. + // + if (ci > 0) + p += "-" + to_string (ci); + + return normalize (move (p), temp_dir (env), ll); + }; + + // If this is the first pipeline command, then open stdin descriptor + // according to the redirect specified. + // + path isp; + + if (!first) + assert (!c.in); // No redirect expected. + else + { + // Open a file for passing to the command stdin. + // + auto open_stdin = [&isp, &ifd, &ll] () + { + assert (!isp.empty ()); + + try + { + ifd = fdopen (isp, fdopen_mode::in); + } + catch (const io_error& e) + { + fail (ll) << "unable to read " << isp << ": " << e; + } + }; + + switch (in.type) + { + case redirect_type::pass: + { + try + { + ifd = fddup (0); + } + catch (const io_error& e) + { + fail (ll) << "unable to duplicate stdin: " << e; + } + + break; + } + case redirect_type::none: + // Somehow need to make sure that the child process doesn't read + // from stdin. That is tricky to do in a portable way. Here we + // suppose that the program which (erroneously) tries to read some + // data from stdin being redirected to /dev/null fails not being + // able to read the expected data, and so the command doesn't pass + // through. + // + // @@ Obviously doesn't cover the case when the process reads + // whatever available. + // @@ Another approach could be not to redirect stdin and let the + // process to hang which can be interpreted as a command failure. + // @@ Both ways are quite ugly. Is there some better way to do + // this? + // + // Fall through. + // + case redirect_type::null: + { + ifd = open_null (); + break; + } + case redirect_type::file: + { + isp = normalize (in.file.path, *env.work_dir.path, ll); + + open_stdin (); + break; + } + case redirect_type::here_str_literal: + case redirect_type::here_doc_literal: + { + // We could write to the command stdin directly but instead will + // cache the data for potential troubleshooting. + // + isp = std_path ("stdin"); + + save (isp, + transform (in.str, false /* regex */, in.modifiers (), env), + ll); + + env.clean_special (isp); + + open_stdin (); + break; + } + case redirect_type::trace: + case redirect_type::merge: + case redirect_type::here_str_regex: + case redirect_type::here_doc_regex: + case redirect_type::here_doc_ref: assert (false); break; + } + } + + assert (ifd.get () != -1); + + // Prior to opening file descriptors for command outputs redirects + // let's check if the command is the set builtin. Being a builtin + // syntactically it differs from the regular ones in a number of ways. + // It either succeeds or terminates abnormally, so redirecting stderr + // is meaningless. It also never produces any output and may appear + // only as a terminal command in a pipeline. That means we can + // short-circuit here calling the builtin and returning right after + // that. Checking that the user didn't specify any meaningless + // redirects or exit code check sounds as a right thing to do. + // + if (program == "set") + { + if (!last) + fail (ll) << "set builtin must be the last pipe command"; + + if (c.out) + fail (ll) << "set builtin stdout cannot be redirected"; + + if (c.err) + fail (ll) << "set builtin stderr cannot be redirected"; + + if (eq != (c.exit.code == 0)) + fail (ll) << "set builtin exit code cannot be non-zero"; + + if (verb >= 2) + print_process (process_args ()); + + set_builtin (env, c.arguments, move (ifd), ll); + return true; + } + + // Open a file for command output redirect if requested explicitly + // (file overwrite/append redirects) or for the purpose of the output + // validation (none, here_*, file comparison redirects), register the + // file for cleanup, return the file descriptor. Interpret trace + // redirect according to the verbosity level (as null if below 2, as + // pass otherwise). Return nullfd, standard stream descriptor duplicate + // or null-device descriptor for merge, pass or null redirects + // respectively (not opening any file). + // + auto open = [&env, &ll, &std_path] (const redirect& r, + int dfd, + path& p) -> auto_fd + { + assert (dfd == 1 || dfd == 2); + const char* what (dfd == 1 ? "stdout" : "stderr"); + + fdopen_mode m (fdopen_mode::out | fdopen_mode::create); + + redirect_type rt (r.type != redirect_type::trace + ? r.type + : verb < 2 + ? redirect_type::null + : redirect_type::pass); + switch (rt) + { + case redirect_type::pass: + { + try + { + return fddup (dfd); + } + catch (const io_error& e) + { + fail (ll) << "unable to duplicate " << what << ": " << e; + } + } + + case redirect_type::null: return open_null (); + + // Duplicate the paired file descriptor later. + // + case redirect_type::merge: return nullfd; + + case redirect_type::file: + { + // For the cmp mode the user-provided path refers a content to + // match against, rather than a content to be produced (as for + // overwrite and append modes). And so for cmp mode we redirect + // the process output to a temporary file. + // + p = r.file.mode == redirect_fmode::compare + ? std_path (what) + : normalize (r.file.path, *env.work_dir.path, ll); + + m |= r.file.mode == redirect_fmode::append + ? fdopen_mode::at_end + : fdopen_mode::truncate; + + break; + } + + case redirect_type::none: + case redirect_type::here_str_literal: + case redirect_type::here_doc_literal: + case redirect_type::here_str_regex: + case redirect_type::here_doc_regex: + { + p = std_path (what); + m |= fdopen_mode::truncate; + break; + } + + case redirect_type::trace: + case redirect_type::here_doc_ref: assert (false); break; + } + + auto_fd fd; + + try + { + fd = fdopen (p, m); + + if ((m & fdopen_mode::at_end) != fdopen_mode::at_end) + { + if (rt == redirect_type::file) + env.clean ({cleanup_type::always, p}, true); + else + env.clean_special (p); + } + } + catch (const io_error& e) + { + fail (ll) << "unable to write to " << p << ": " << e; + } + + return fd; + }; + + path osp; + fdpipe ofd; + + // If this is the last command in the pipeline than redirect the + // command process stdout to a file. Otherwise create a pipe and + // redirect the stdout to the write-end of the pipe. The read-end will + // be passed as stdin for the next command in the pipeline. + // + // @@ Shouldn't we allow the here-* and file output redirects for a + // command with pipelined output? Say if such redirect is present + // then the process output is redirected to a file first (as it is + // when no output pipelined), and only after the process exit code + // and the output are validated the next command in the pipeline is + // executed taking the file as an input. This could be usefull for + // script failures investigation and, for example, for validation + // "tightening". + // + if (last) + ofd.out = open (*out, 1, osp); + else + { + assert (!c.out); // No redirect expected. + ofd = open_pipe (); + } + + path esp; + auto_fd efd (open (err, 2, esp)); + + // Merge standard streams. + // + bool mo (out != nullptr && out->type == redirect_type::merge); + if (mo || err.type == redirect_type::merge) + { + auto_fd& self (mo ? ofd.out : efd); + auto_fd& other (mo ? efd : ofd.out); + + try + { + assert (self.get () == -1 && other.get () != -1); + self = fddup (other.get ()); + } + catch (const io_error& e) + { + fail (ll) << "unable to duplicate " << (mo ? "stderr" : "stdout") + << ": " << e; + } + } + + // All descriptors should be open to the date. + // + assert (ofd.out.get () != -1 && efd.get () != -1); + + optional exit; + builtin_function* bf (builtins.find (program)); + + bool success; + + if (bf != nullptr) + { + // Execute the builtin. + // + if (verb >= 2) + print_process (process_args ()); + + // Some of the script builtins (cp, mkdir, etc) extend libbutl + // builtins (via callbacks) registering/moving cleanups for the + // filesystem entries they create/move, unless explicitly requested + // not to do so via the --no-cleanup option. + // + // Let's "wrap up" the cleanup-related flags into the single object + // to rely on "small function object" optimization. + // + struct cleanup + { + // Whether the cleanups are enabled for the builtin. Can be set to + // false by the parse_option callback if --no-cleanup is + // encountered. + // + bool enabled = true; + + // Whether to register cleanup for a filesystem entry being + // created/updated depending on its existence. Calculated by the + // create pre-hook and used by the subsequent post-hook. + // + bool add; + + // Whether to move existing cleanups for the filesystem entry + // being moved, rather than to erase them. Calculated by the move + // pre-hook and used by the subsequent post-hook. + // + bool move; + }; + + // nullopt if the builtin doesn't support cleanups. + // + optional cln; + + if (cleanup_builtin (program)) + cln = cleanup (); + + builtin_callbacks bcs { + + // create + // + // Unless cleanups are suppressed, test that the filesystem entry + // doesn't exist (pre-hook) and, if that's the case, register the + // cleanup for the newly created filesystem entry (post-hook). + // + [&env, &cln] (const path& p, bool pre) + { + // Cleanups must be supported by a filesystem entry-creating + // builtin. + // + assert (cln); + + if (cln->enabled) + { + if (pre) + cln->add = !butl::entry_exists (p); + else if (cln->add) + env.clean ({cleanup_type::always, p}, true /* implicit */); + } + }, + + // move + // + // Validate the source and destination paths (pre-hook) and, + // unless suppressed, adjust the cleanups that are sub-paths of + // the source path (post-hook). + // + [&env, &cln] (const path& from, const path& to, bool force, bool pre) + { + // Cleanups must be supported by a filesystem entry-moving + // builtin. + // + assert (cln); + + if (pre) + { + const dir_path& wd (*env.work_dir.path); + const dir_path* sd (env.sandbox_dir.path); + + auto fail = [] (const string& d) {throw runtime_error (d);}; + + if (sd != nullptr && !from.sub (*sd) && !force) + fail (diag_path (from) + " is out of " + + diag_path (env.sandbox_dir)); + + auto check_wd = [&wd, &env, fail] (const path& p) + { + if (wd.sub (path_cast (p))) + fail (diag_path (p) + " contains " + + diag_path (env.work_dir)); + }; + + check_wd (from); + check_wd (to); + + // Unless cleanups are disabled, "move" the matching cleanups + // if the destination path doesn't exist and it is a sub-path + // of the working directory and just remove them otherwise. + // + if (cln->enabled) + cln->move = !butl::entry_exists (to) && + (sd == nullptr || to.sub (*sd)); + } + else if (cln->enabled) + { + // Move or remove the matching cleanups (see above). + // + // Note that it's not enough to just change the cleanup paths. + // We also need to make sure that these cleanups happen before + // the destination directory (or any of its parents) cleanup, + // that is potentially registered. To achieve that we can just + // relocate these cleanup entries to the end of the list, + // preserving their mutual order. Remember that cleanups in + // the list are executed in the reversed order. + // + cleanups cs; + + // Remove the source path sub-path cleanups from the list, + // adjusting/caching them if required (see above). + // + for (auto i (env.cleanups.begin ()); i != env.cleanups.end (); ) + { + script::cleanup& c (*i); + path& p (c.path); + + if (p.sub (from)) + { + if (cln->move) + { + // Note that we need to preserve the cleanup path + // trailing separator which indicates the removal + // method. Also note that leaf(), in particular, does + // that. + // + p = p != from + ? to / p.leaf (path_cast (from)) + : p.to_directory () + ? path_cast (to) + : to; + + cs.push_back (move (c)); + } + + i = env.cleanups.erase (i); + } + else + ++i; + } + + // Re-insert the adjusted cleanups at the end of the list. + // + env.cleanups.insert (env.cleanups.end (), + make_move_iterator (cs.begin ()), + make_move_iterator (cs.end ())); + + } + }, + + // remove + // + // Validate the filesystem entry path (pre-hook). + // + [&env] (const path& p, bool force, bool pre) + { + if (pre) + { + const dir_path& wd (*env.work_dir.path); + const dir_path* sd (env.sandbox_dir.path); + + auto fail = [] (const string& d) {throw runtime_error (d);}; + + if (sd != nullptr && !p.sub (*sd) && !force) + fail (diag_path (p) + " is out of " + + diag_path (env.sandbox_dir)); + + if (wd.sub (path_cast (p))) + fail (diag_path (p) + " contains " + + diag_path (env.work_dir)); + } + }, + + // parse_option + // + [&cln] (const strings& args, size_t i) + { + // Parse --no-cleanup, if it is supported by the builtin. + // + if (cln && args[i] == "--no-cleanup") + { + cln->enabled = false; + return 1; + } + + return 0; + }, + + // sleep + // + // Deactivate the thread before going to sleep. + // + [&env] (const duration& d) + { + // If/when required we could probably support the precise sleep + // mode (e.g., via an option). + // + env.context.sched.sleep (d); + } + }; + + try + { + uint8_t r; // Storage. + builtin b (bf (r, + c.arguments, + move (ifd), move (ofd.out), move (efd), + *env.work_dir.path, + bcs)); + + success = run_pipe (env, + nc, + ec, + move (ofd.in), + ci + 1, li, ll, diag); + + exit = process_exit (b.wait ()); + } + catch (const system_error& e) + { + fail (ll) << "unable to execute " << c.program << " builtin: " + << e << endf; + } + } + else + { + // Execute the process. + // + cstrings args (process_args ()); + + // Resolve the relative not simple program path against the script's + // working directory. The simple one will be left for the process + // path search machinery. Also strip the potential leading `^`, + // indicating that this is an external program rather than a + // builtin. + // + path p; + + try + { + p = path (args[0]); + + if (p.relative ()) + { + auto program = [&p, &args] (path pp) + { + p = move (pp); + args[0] = p.string ().c_str (); + }; + + if (p.simple ()) + { + const string& s (p.string ()); + + // Don't end up with an empty path. + // + if (s.size () > 1 && s[0] == '^') + program (path (s, 1, s.size () - 1)); + } + else + program (*env.work_dir.path / p); + } + } + catch (const invalid_path& e) + { + fail (ll) << "invalid program path " << e.path; + } + + try + { + process_path pp (process::path_search (args[0])); + + // Note: the builtin-escaping character '^' is not printed. + // + if (verb >= 2) + print_process (args); + + process pr ( + pp, + args.data (), + {ifd.get (), -1}, process::pipe (ofd), {-1, efd.get ()}, + env.work_dir.path->string ().c_str ()); + + ifd.reset (); + ofd.out.reset (); + efd.reset (); + + success = run_pipe (env, + nc, + ec, + move (ofd.in), + ci + 1, li, ll, diag); + + pr.wait (); + + exit = move (pr.exit); + } + catch (const process_error& e) + { + error (ll) << "unable to execute " << args[0] << ": " << e; + + if (e.child) + std::exit (1); + + throw failed (); + } + } + + assert (exit); + + // If the righ-hand side pipeline failed than the whole pipeline fails, + // and no further checks are required. + // + if (!success) + return false; + + const path& pr (c.program); + + // If there is no valid exit code available by whatever reason then we + // print the proper diagnostics, dump stderr (if cached and not too + // large) and fail the whole script. Otherwise if the exit code is not + // correct then we print diagnostics if requested and fail the + // pipeline. + // + bool valid (exit->normal ()); + + // On Windows the exit code can be out of the valid codes range being + // defined as uint16_t. + // +#ifdef _WIN32 + if (valid) + valid = exit->code () < 256; +#endif + + success = valid && eq == (exit->code () == c.exit.code); + + if (!valid || (!success && diag)) + { + // In the presense of a valid exit code we print the diagnostics and + // return false rather than throw. + // + diag_record d (valid ? error (ll) : fail (ll)); + + if (!exit->normal ()) + d << pr << " " << *exit; + else + { + uint16_t ec (exit->code ()); // Make sure is printed as integer. + + if (!valid) + d << pr << " exit code " << ec << " out of 0-255 range"; + else if (!success) + { + if (diag) + d << pr << " exit code " << ec << (eq ? " != " : " == ") + << static_cast (c.exit.code); + } + else + assert (false); + } + + if (non_empty (esp, ll) && avail_on_failure (esp, env)) + d << info << "stderr: " << esp; + + if (non_empty (osp, ll) && avail_on_failure (osp, env)) + d << info << "stdout: " << osp; + + if (non_empty (isp, ll) && avail_on_failure (isp, env)) + d << info << "stdin: " << isp; + + // Print cached stderr. + // + print_file (d, esp, ll); + } + + // If exit code is correct then check if the standard outputs match the + // expectations. Note that stdout is only redirected to file for the + // last command in the pipeline. + // + // The thinking behind matching stderr first is that if it mismatches, + // then the program probably misbehaves (executes wrong functionality, + // etc) in which case its stdout doesn't really matter. + // + if (success) + success = + check_output (pr, esp, isp, err, ll, env, diag, "stderr") && + (!last || + check_output (pr, osp, isp, *out, ll, env, diag, "stdout")); + + return success; + } + + static bool + run_expr (environment& env, + const command_expr& expr, + size_t li, const location& ll, + bool diag) + { + // Commands are numbered sequentially throughout the expression + // starting with 1. Number 0 means the command is a single one. + // + size_t ci (expr.size () == 1 && expr.back ().pipe.size () == 1 + ? 0 + : 1); + + // If there is no ORs to the right of a pipe then the pipe failure is + // fatal for the whole expression. In particular, the pipe must print + // the diagnostics on failure (if generally allowed). So we find the + // pipe that "switches on" the diagnostics potential printing. + // + command_expr::const_iterator trailing_ands; // Undefined if diag is + // disallowed. + if (diag) + { + auto i (expr.crbegin ()); + for (; i != expr.crend () && i->op == expr_operator::log_and; ++i) ; + trailing_ands = i.base (); + } + + bool r (false); + bool print (false); + + for (auto b (expr.cbegin ()), i (b), e (expr.cend ()); i != e; ++i) + { + if (diag && i + 1 == trailing_ands) + print = true; + + const command_pipe& p (i->pipe); + bool or_op (i->op == expr_operator::log_or); + + // Short-circuit if the pipe result must be OR-ed with true or AND-ed + // with false. + // + if (!((or_op && r) || (!or_op && !r))) + r = run_pipe ( + env, p.begin (), p.end (), auto_fd (), ci, li, ll, print); + + ci += p.size (); + } + + return r; + } + + void + run (environment& env, + const command_expr& expr, + size_t li, const location& ll) + { + // Note that we don't print the expression at any verbosity level + // assuming that the caller does this, potentially providing some + // additional information (command type, etc). + // + if (!run_expr (env, expr, li, ll, true /* diag */)) + throw failed (); // Assume diagnostics is already printed. + } + + bool + run_if (environment& env, + const command_expr& expr, + size_t li, const location& ll) + { + // Note that we don't print the expression here (see above). + // + return run_expr (env, expr, li, ll, false /* diag */); + } + + void + clean (environment& env, const location& ll) + { + context& ctx (env.context); + const dir_path& wdir (*env.work_dir.path); + + // Note that we operate with normalized paths here. + // + // Remove special files. The order is not important as we don't + // expect directories here. + // + for (const path& p: env.special_cleanups) + { + // Remove the file if exists. Fail otherwise. + // + if (rmfile (ctx, p, 3) == rmfile_status::not_exist) + fail (ll) << "registered for cleanup special file " << p + << " does not exist"; + } + + // Remove files and directories in the order opposite to the order of + // cleanup registration. + // + for (const auto& c: reverse_iterate (env.cleanups)) + { + cleanup_type t (c.type); + + // Skip whenever the path exists or not. + // + if (t == cleanup_type::never) + continue; + + const path& cp (c.path); + + // Wildcard with the last component being '***' (without trailing + // separator) matches all files and sub-directories recursively as + // well as the start directories itself. So we will recursively + // remove the directories that match the parent (for the original + // path) directory wildcard. + // + bool recursive (cp.leaf ().representation () == "***"); + const path& p (!recursive ? cp : cp.directory ()); + + // Remove files or directories using wildcard. + // + if (path_pattern (p)) + { + bool removed (false); + + auto rm = [&cp, recursive, &removed, &ll, &ctx, &wdir] + (path&& pe, const string&, bool interm) + { + if (!interm) + { + // While removing the entry we can get not_exist due to + // racing conditions, but that's ok if somebody did our job. + // Note that we still set the removed flag to true in this + // case. + // + removed = true; // Will be meaningless on failure. + + if (pe.to_directory ()) + { + dir_path d (path_cast (pe)); + + if (!recursive) + { + rmdir_status r (rmdir (ctx, d, 3)); + + if (r != rmdir_status::not_empty) + return true; + + diag_record dr (fail (ll)); + dr << "registered for cleanup directory " << d + << " is not empty"; + + print_dir (dr, d, ll); + dr << info << "wildcard: '" << cp << "'"; + } + else + { + // Don't remove the working directory (it will be removed + // by the dedicated cleanup). + // + // Cast to uint16_t to avoid ambiguity with + // libbutl::rmdir_r(). + // + rmdir_status r (rmdir_r (ctx, d, d != wdir, 3)); + + if (r != rmdir_status::not_empty) + return true; + + // The directory is unlikely to be current but let's keep + // for completeness. + // + fail (ll) << "registered for cleanup wildcard " << cp + << " matches the current directory"; + } + } + else + rmfile (ctx, pe, 3); + } + + return true; + }; + + // Note that here we rely on the fact that recursive iterating + // goes depth-first (which make sense for the cleanup). + // + try + { + // Doesn't follow symlinks. + // + path_search (p, + rm, + dir_path () /* start */, + path_match_flags::none); + } + catch (const system_error& e) + { + fail (ll) << "unable to cleanup wildcard " << cp << ": " << e; + } + + // Removal of no filesystem entries is not an error for 'maybe' + // cleanup type. + // + if (removed || t == cleanup_type::maybe) + continue; + + fail (ll) << "registered for cleanup wildcard " << cp + << " doesn't match any " + << (recursive + ? "path" + : p.to_directory () + ? "directory" + : "file"); + } + + // Remove the directory if exists and empty. Fail otherwise. + // Removal of non-existing directory is not an error for 'maybe' + // cleanup type. + // + if (p.to_directory ()) + { + dir_path d (path_cast (p)); + bool wd (d == wdir); + + // Don't remove the working directory for the recursive cleanup + // (it will be removed by the dedicated one). + // + // Note that the root working directory contains the + // .buildignore file (see above). + // + // @@ If 'd' is a file then will fail with a diagnostics having + // no location info. Probably need to add an optional location + // parameter to rmdir() function. The same problem exists for + // a file cleanup when try to rmfile() directory instead of + // file. + // + rmdir_status r (recursive + ? rmdir_r (ctx, d, !wd, static_cast (3)) + : rmdir (ctx, d, 3)); + + if (r == rmdir_status::success || + (r == rmdir_status::not_exist && t == cleanup_type::maybe)) + continue; + + diag_record dr (fail (ll)); + dr << "registered for cleanup directory " << d + << (r == rmdir_status::not_exist ? " does not exist" : + !recursive ? " is not empty" + : " is current"); + + if (r == rmdir_status::not_empty) + print_dir (dr, d, ll); + } + + // Remove the file if exists. Fail otherwise. Removal of + // non-existing file is not an error for 'maybe' cleanup type. + // + if (rmfile (ctx, p, 3) == rmfile_status::not_exist && + t == cleanup_type::always) + fail (ll) << "registered for cleanup file " << p + << " does not exist"; + } + } + + void + print_dir (diag_record& dr, const dir_path& p, const location& ll) + { + try + { + size_t n (0); + for (const dir_entry& de: dir_iterator (p, + false /* ignore_dangling */)) + { + if (n++ < 10) + dr << '\n' << (de.ltype () == entry_type::directory + ? path_cast (de.path ()) + : de.path ()); + } + + if (n > 10) + dr << "\nand " << n - 10 << " more file(s)"; + } + catch (const system_error& e) + { + fail (ll) << "unable to iterate over " << p << ": " << e; + } + } + } +} diff --git a/libbuild2/script/run.hxx b/libbuild2/script/run.hxx new file mode 100644 index 0000000..477dd88 --- /dev/null +++ b/libbuild2/script/run.hxx @@ -0,0 +1,75 @@ +// file : libbuild2/script/run.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_SCRIPT_RUN_HXX +#define LIBBUILD2_SCRIPT_RUN_HXX + +#include +#include + +#include + +namespace build2 +{ + namespace script + { + // An exception that can be thrown by an expression running function to + // exit the script (for example, as a result of executing the exit builtin + // by the below run*() functions). The status indicates whether the + // execution should be considered to have succeeded or failed. + // + struct exit + { + bool status; + + explicit + exit (bool s): status (s) {} + }; + + // Helpers. + // + + // Command expression running functions. + // + // Index is the 1-base index of this command line in the command list. + // If it is 0 then it means there is only one command. This information + // can be used, for example, to derive file names. + // + // Location is the start position of this command line in the script. It + // can be used in diagnostics. + // + void + run (environment&, const command_expr&, size_t index, const location&); + + bool + run_if (environment&, const command_expr&, size_t, const location&); + + // Perform the registered special file cleanups in the direct order and + // then the regular cleanups in the reverse order. + // + void + clean (environment&, const location&); + + // Print first 10 directory sub-entries to the diag record. The directory + // must exist. Is normally used while issuing diagnostics on non-empty + // directory removal failure. + // + void + print_dir (diag_record&, const dir_path&, const location&); + + // Return the quoted path representation with the preserved trailing + // directory separator. The path is relative if the verbosity level is + // less than 3. + // + string + diag_path (const path&); + + // Same as above, but prepends the path with a name, if present. The path + // must be not NULL. + // + string + diag_path (const dir_name_view&); + } +} + +#endif // LIBBUILD2_SCRIPT_RUN_HXX diff --git a/libbuild2/script/script.cxx b/libbuild2/script/script.cxx new file mode 100644 index 0000000..c85bfd3 --- /dev/null +++ b/libbuild2/script/script.cxx @@ -0,0 +1,659 @@ +// file : libbuild2/script/script.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include +#include // strchr() + +using namespace std; + +namespace build2 +{ + namespace script + { + ostream& + operator<< (ostream& o, line_type lt) + { + const char* s (nullptr); + + switch (lt) + { + case line_type::var: s = "variable"; break; + case line_type::cmd: s = "command"; break; + case line_type::cmd_if: s = "'if'"; break; + case line_type::cmd_ifn: s = "'if!'"; break; + case line_type::cmd_elif: s = "'elif'"; break; + case line_type::cmd_elifn: s = "'elif!'"; break; + case line_type::cmd_else: s = "'else'"; break; + case line_type::cmd_end: s = "'end'"; break; + } + + return o << s; + } + + void + dump (ostream& os, const string& ind, const lines& ls) + { + // For each line print its tokens literal representation trying to + // reproduce the quoting. Consider mixed quoting as double quoting + // since the information is lost. + // + // Also additionally indent the if-branch lines. + // + string if_ind; + + for (const line& l: ls) + { + // Before printing indentation, decrease it if the else or end line is + // reached. + // + switch (l.type) + { + case line_type::cmd_elif: + case line_type::cmd_elifn: + case line_type::cmd_else: + case line_type::cmd_end: + { + size_t n (if_ind.size ()); + assert (n >= 2); + if_ind.resize (n - 2); + break; + } + default: break; + } + + // Print indentations. + // + os << ind << if_ind; + + // After printing indentation, increase it for if/else branch. + // + switch (l.type) + { + case line_type::cmd_if: + case line_type::cmd_ifn: + case line_type::cmd_elif: + case line_type::cmd_elifn: + case line_type::cmd_else: if_ind += " "; break; + default: break; + } + + // '"' or '\'' if we are inside the quoted token sequence and '\0' + // otherwise. Thus, can be used as bool. + // + char qseq ('\0'); + + for (const replay_token& rt: l.tokens) + { + const token& t (rt.token); + + // '"' or '\'' if the token is quoted and '\0' otherwise. Thus, + // can be used as bool. + // + char qtok ('\0'); + + switch (t.qtype) + { + case quote_type::unquoted: qtok = '\0'; break; + case quote_type::single: qtok = '\''; break; + case quote_type::mixed: + case quote_type::double_: qtok = '"'; break; + } + + // If being inside a quoted token sequence we have reached a token + // quoted differently or the newline, then we probably made a + // mistake misinterpreting some previous partially quoted token, for + // example f"oo" as "foo. If that's the case, all we can do is to + // end the sequence adding the trailing quote. + // + // Note that a token inside the quoted sequence may well be + // unquoted, so for example "$foo" is lexed as: + // + // token quoting complete notes + // '' " no + // $ " yes + // 'foo' Unquoted since lexed in variable mode. + // '' " no + // \n + // + if (qseq && + ((qtok && qtok != qseq) || t.type == token_type::newline)) + { + os << qseq; + qseq = '\0'; + } + + // Left and right token quotes (can be used as bool). + // + char lq ('\0'); + char rq ('\0'); + + // If the token is quoted, then determine if/which quotes should be + // present on its sides and track the quoted token sequence. + // + if (qtok) + { + if (t.qcomp) // Complete token quoting. + { + // If we are inside a quoted token sequence then do noting. + // Otherwise just quote the current token not starting a + // sequence. + // + if (!qseq) + { + lq = qtok; + rq = qtok; + } + } + else // Partial token quoting. + { + // Note that we can not always reproduce the original tokens + // representation for partial quoting. For example, the two + // following tokens are lexed into the identical token objects: + // + // "foo + // f"oo" + // + // We will always assume that the partially quoted token either + // starts or ends the quoted token sequence. Sometimes this ends + // up unexpectedly, but seems there is not much we can do: + // + // f"oo" "ba"r -> "foo bar" + // + if (!qseq) // Start quoted sequence. + { + lq = qtok; + qseq = qtok; + } + else // End quoted sequence. + { + rq = qtok; + qseq = '\0'; + } + } + } + + // Print the space character prior to the separated token, unless + // it is a first like token or the newline. + // + if (t.separated && + t.type != token_type::newline && + &rt != &l.tokens[0]) + os << ' '; + + if (lq) os << lq; // Print the left quote, if required. + + // Escape the special characters, unless the token in not a word or + // is single-quoted. Note that the special character set depends on + // whether the word is double-quoted or unquoted. + // + if (t.type == token_type::word && qtok != '\'') + { + for (char c: t.value) + { + if (strchr (qtok ? "\\\"" : "|&<>=\\\"", c) != nullptr) + os << '\\'; + + os << c; + } + } + else + t.printer (os, t, print_mode::raw); + + if (rq) os << rq; // Print the right quote, if required. + } + } + } + + // Quote if empty or contains spaces or any of the special characters. + // Note that we use single quotes since double quotes still allow + // expansion. + // + // @@ What if it contains single quotes? + // + static void + to_stream_q (ostream& o, const string& s) + { + if (s.empty () || s.find_first_of (" |&<>=\\\"") != string::npos) + o << '\'' << s << '\''; + else + o << s; + }; + + void + to_stream (ostream& o, const command& c, command_to_stream m) + { + auto print_path = [&o] (const path& p) + { + using build2::operator<<; + + ostringstream s; + stream_verb (s, stream_verb (o)); + s << p; + + to_stream_q (o, s.str ()); + }; + + auto print_redirect = [&o, print_path] (const redirect& r, int fd) + { + const redirect& er (r.effective ()); + + // Print the none redirect (no data allowed) if/when the respective + // syntax is invented. + // + if (er.type == redirect_type::none) + return; + + o << ' '; + + // Print the redirect file descriptor. + // + if (fd == 2) + o << fd; + + // Print the redirect original representation and the modifiers, if + // present. + // + r.token.printer (o, r.token, print_mode::raw); + + // Print the rest of the redirect (file path, etc). + // + switch (er.type) + { + case redirect_type::none: assert (false); break; + case redirect_type::here_doc_ref: assert (false); break; + + case redirect_type::pass: + case redirect_type::null: + case redirect_type::trace: break; + case redirect_type::merge: o << er.fd; break; + + case redirect_type::file: + { + print_path (er.file.path); + break; + } + + case redirect_type::here_str_literal: + case redirect_type::here_doc_literal: + { + if (er.type == redirect_type::here_doc_literal) + o << er.end; + else + { + const string& v (er.str); + to_stream_q (o, + er.modifiers ().find (':') == string::npos + ? string (v, 0, v.size () - 1) // Strip newline. + : v); + } + + break; + } + + case redirect_type::here_str_regex: + case redirect_type::here_doc_regex: + { + const regex_lines& re (er.regex); + + if (er.type == redirect_type::here_doc_regex) + o << re.intro + er.end + re.intro + re.flags; + else + { + assert (!re.lines.empty ()); // Regex can't be empty. + + regex_line l (re.lines[0]); + to_stream_q (o, re.intro + l.value + re.intro + l.flags); + } + + break; + } + } + }; + + auto print_doc = [&o] (const redirect& r) + { + o << endl; + + if (r.type == redirect_type::here_doc_literal) + o << r.str; + else + { + assert (r.type == redirect_type::here_doc_regex); + + const regex_lines& rl (r.regex); + + for (auto b (rl.lines.cbegin ()), i (b), e (rl.lines.cend ()); + i != e; ++i) + { + if (i != b) + o << endl; + + const regex_line& l (*i); + + if (l.regex) // Regex (possibly empty), + o << rl.intro << l.value << rl.intro << l.flags; + else if (!l.special.empty ()) // Special literal. + o << rl.intro; + else // Textual literal. + o << l.value; + + o << l.special; + } + } + + o << (r.modifiers ().find (':') == string::npos ? "" : "\n") << r.end; + }; + + if ((m & command_to_stream::header) == command_to_stream::header) + { + // Program. + // + to_stream_q (o, c.program.string ()); + + // Arguments. + // + for (const string& a: c.arguments) + { + o << ' '; + to_stream_q (o, a); + } + + // Redirects. + // + if (c.in) + print_redirect (*c.in, 0); + + if (c.out) + print_redirect (*c.out, 1); + + if (c.err) + print_redirect (*c.err, 2); + + for (const auto& p: c.cleanups) + { + o << " &"; + + if (p.type != cleanup_type::always) + o << (p.type == cleanup_type::maybe ? '?' : '!'); + + print_path (p.path); + } + + if (c.exit.comparison != exit_comparison::eq || c.exit.code != 0) + { + switch (c.exit.comparison) + { + case exit_comparison::eq: o << " == "; break; + case exit_comparison::ne: o << " != "; break; + } + + o << static_cast (c.exit.code); + } + } + + if ((m & command_to_stream::here_doc) == command_to_stream::here_doc) + { + // Here-documents. + // + if (c.in && + (c.in->type == redirect_type::here_doc_literal || + c.in->type == redirect_type::here_doc_regex)) + print_doc (*c.in); + + if (c.out && + (c.out->type == redirect_type::here_doc_literal || + c.out->type == redirect_type::here_doc_regex)) + print_doc (*c.out); + + if (c.err && + (c.err->type == redirect_type::here_doc_literal || + c.err->type == redirect_type::here_doc_regex)) + print_doc (*c.err); + } + } + + void + to_stream (ostream& o, const command_pipe& p, command_to_stream m) + { + if ((m & command_to_stream::header) == command_to_stream::header) + { + for (auto b (p.begin ()), i (b); i != p.end (); ++i) + { + if (i != b) + o << " | "; + + to_stream (o, *i, command_to_stream::header); + } + } + + if ((m & command_to_stream::here_doc) == command_to_stream::here_doc) + { + for (const command& c: p) + to_stream (o, c, command_to_stream::here_doc); + } + } + + void + to_stream (ostream& o, const command_expr& e, command_to_stream m) + { + if ((m & command_to_stream::header) == command_to_stream::header) + { + for (auto b (e.begin ()), i (b); i != e.end (); ++i) + { + if (i != b) + { + switch (i->op) + { + case expr_operator::log_or: o << " || "; break; + case expr_operator::log_and: o << " && "; break; + } + } + + to_stream (o, i->pipe, command_to_stream::header); + } + } + + if ((m & command_to_stream::here_doc) == command_to_stream::here_doc) + { + for (const expr_term& t: e) + to_stream (o, t.pipe, command_to_stream::here_doc); + } + } + + // redirect + // + redirect:: + redirect (redirect_type t) + : type (t) + { + switch (type) + { + case redirect_type::none: + case redirect_type::pass: + case redirect_type::null: + case redirect_type::trace: + case redirect_type::merge: break; + + case redirect_type::here_str_literal: + case redirect_type::here_doc_literal: new (&str) string (); break; + + case redirect_type::here_str_regex: + case redirect_type::here_doc_regex: + { + new (®ex) regex_lines (); + break; + } + + case redirect_type::file: new (&file) file_type (); break; + + case redirect_type::here_doc_ref: assert (false); break; + } + } + + redirect:: + redirect (redirect&& r) noexcept + : type (r.type), + token (move (r.token)), + end (move (r.end)), + end_line (r.end_line), + end_column (r.end_column) + { + switch (type) + { + case redirect_type::none: + case redirect_type::pass: + case redirect_type::null: + case redirect_type::trace: break; + + case redirect_type::merge: fd = r.fd; break; + + case redirect_type::here_str_literal: + case redirect_type::here_doc_literal: + { + new (&str) string (move (r.str)); + break; + } + case redirect_type::here_str_regex: + case redirect_type::here_doc_regex: + { + new (®ex) regex_lines (move (r.regex)); + break; + } + case redirect_type::file: + { + new (&file) file_type (move (r.file)); + break; + } + case redirect_type::here_doc_ref: + { + new (&ref) reference_wrapper (r.ref); + break; + } + } + } + + redirect& redirect:: + operator= (redirect&& r) noexcept + { + if (this != &r) + { + this->~redirect (); + new (this) redirect (move (r)); // Assume noexcept move-constructor. + } + return *this; + } + + redirect:: + ~redirect () + { + switch (type) + { + case redirect_type::none: + case redirect_type::pass: + case redirect_type::null: + case redirect_type::trace: + case redirect_type::merge: break; + + case redirect_type::here_str_literal: + case redirect_type::here_doc_literal: str.~string (); break; + + case redirect_type::here_str_regex: + case redirect_type::here_doc_regex: regex.~regex_lines (); break; + + case redirect_type::file: file.~file_type (); break; + + case redirect_type::here_doc_ref: + { + ref.~reference_wrapper (); + break; + } + } + } + + redirect:: + redirect (const redirect& r) + : type (r.type), + token (r.token), + end (r.end), + end_line (r.end_line), + end_column (r.end_column) + { + switch (type) + { + case redirect_type::none: + case redirect_type::pass: + case redirect_type::null: + case redirect_type::trace: break; + + case redirect_type::merge: fd = r.fd; break; + + case redirect_type::here_str_literal: + case redirect_type::here_doc_literal: + { + new (&str) string (r.str); + break; + } + case redirect_type::here_str_regex: + case redirect_type::here_doc_regex: + { + new (®ex) regex_lines (r.regex); + break; + } + case redirect_type::file: + { + new (&file) file_type (r.file); + break; + } + case redirect_type::here_doc_ref: + { + new (&ref) reference_wrapper (r.ref); + break; + } + } + } + + redirect& redirect:: + operator= (const redirect& r) + { + if (this != &r) + *this = redirect (r); // Reduce to move-assignment. + return *this; + } + + // environment + // + void environment:: + clean (script::cleanup c, bool implicit) + { + using script::cleanup; + + assert (!implicit || c.type == cleanup_type::always); + + const path& p (c.path); + + if (sandbox_dir.path != nullptr && !p.sub (*sandbox_dir.path)) + { + if (implicit) + return; + else + assert (false); // Error so should have been checked. + } + + auto pr = [&p] (const cleanup& v) -> bool {return v.path == p;}; + auto i (find_if (cleanups.begin (), cleanups.end (), pr)); + + if (i == cleanups.end ()) + cleanups.emplace_back (move (c)); + else if (!implicit) + i->type = c.type; + } + + void environment:: + clean_special (path p) + { + special_cleanups.emplace_back (move (p)); + } + } +} diff --git a/libbuild2/script/script.hxx b/libbuild2/script/script.hxx new file mode 100644 index 0000000..f4998b7 --- /dev/null +++ b/libbuild2/script/script.hxx @@ -0,0 +1,471 @@ +// file : libbuild2/script/script.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_SCRIPT_SCRIPT_HXX +#define LIBBUILD2_SCRIPT_SCRIPT_HXX + +#include +#include +#include + +#include +#include + +namespace build2 +{ + namespace script + { + // Pre-parsed representation. + // + + enum class line_type + { + var, + cmd, + cmd_if, + cmd_ifn, + cmd_elif, + cmd_elifn, + cmd_else, + cmd_end + }; + + ostream& + operator<< (ostream&, line_type); + + struct line + { + line_type type; + replay_tokens tokens; + + union + { + const variable* var; // Pre-entered for line_type::var. + }; + }; + + // Most of the time we will have just one line (a command). + // + using lines = small_vector; + + // Print the script lines, trying to reproduce their original (non- + // expanded) representation. + // + // Note that the exact spacing and partial quoting may not be restored due + // to the information loss. + // + void + dump (ostream&, const string& ind, const lines&); + + // Parse object model. + // + + // redirect + // + enum class redirect_type + { + // No data is allowed to be read or written. + // + // Note that redirect of this type cannot be currently specified on the + // script command line and can only be set via the environment object + // as a default redirect (see below). + // + none, + + pass, + null, + trace, + merge, + here_str_literal, + here_str_regex, + here_doc_literal, + here_doc_regex, + here_doc_ref, // Reference to here_doc literal or regex. + file, + }; + + // Pre-parsed (but not instantiated) regex lines. The idea here is that + // we should be able to re-create their (more or less) exact text + // representation for diagnostics but also instantiate without any + // re-parsing. + // + struct regex_line + { + // If regex is true, then value is the regex expression. Otherwise, it + // is a literal. Note that special characters can be present in both + // cases. For example, //+ is a regex, while /+ is a literal, both + // with '+' as a special character. Flags are only valid for regex. + // Literals falls apart into textual (has no special characters) and + // special (has just special characters instead) ones. For example + // foo is a textual literal, while /.+ is a special one. Note that + // literal must not have value and special both non-empty. + // + bool regex; + + string value; + string flags; + string special; + + uint64_t line; + uint64_t column; + + // Create regex with optional special characters. + // + regex_line (uint64_t l, uint64_t c, + string v, string f, string s = string ()) + : regex (true), + value (move (v)), + flags (move (f)), + special (move (s)), + line (l), + column (c) {} + + // Create a literal, either text or special. + // + regex_line (uint64_t l, uint64_t c, string v, bool s) + : regex (false), + value (s ? string () : move (v)), + special (s ? move (v) : string ()), + line (l), + column (c) {} + }; + + struct regex_lines + { + char intro; // Introducer character. + string flags; // Global flags (here-document). + + small_vector lines; + }; + + // Output file redirect mode. + // + enum class redirect_fmode + { + compare, + overwrite, + append + }; + + struct redirect + { + redirect_type type; + + struct file_type + { + using path_type = build2::path; + path_type path; + redirect_fmode mode; // Meaningless for input redirect. + }; + + union + { + int fd; // Merge-to descriptor. + string str; // Note: with trailing newline, if requested. + regex_lines regex; // Note: with trailing blank, if requested. + file_type file; + reference_wrapper ref; // Note: no chains. + }; + + // Modifiers and the original representation (potentially an alias). + // + build2::token token; + + string end; // Here-document end marker (no regex intro/flags). + uint64_t end_line; // Here-document end marker location. + uint64_t end_column; + + // Create redirect of a type other than reference. + // + explicit + redirect (redirect_type); + + // Create redirect of the reference type. + // + redirect (redirect_type t, const redirect& r, build2::token tk) + : type (redirect_type::here_doc_ref), + ref (r), + token (move (tk)) + { + // There is no support (and need) for reference chains. + // + assert (t == redirect_type::here_doc_ref && + r.type != redirect_type::here_doc_ref); + } + + // Create redirect of the merge type. + // + // Note that it's the caller's responsibility to make sure that the file + // descriptor is valid for this redirect (2 for stdout, etc). + // + redirect (redirect_type t, int f) + : type (redirect_type::merge), fd (f) + { + assert (t == redirect_type::merge && (f == 1 || f == 2)); + } + + redirect (redirect&&) noexcept; + redirect& operator= (redirect&&) noexcept; + + // @@ Defining optional movable-only redirects in the command class make + // the older C++ compilers (GCC 4.9, Clang 4, VC 15) fail to compile the + // command vector manipulating code. Thus, we make the redirect class + // copyable to workaround the issue. + // + redirect (const redirect&); + redirect& operator= (const redirect&); + + ~redirect (); + + const redirect& + effective () const noexcept + { + return type == redirect_type::here_doc_ref ? ref.get () : *this; + } + + const string& + modifiers () const noexcept + { + return token.value; + } + }; + + // cleanup + // + enum class cleanup_type + { + always, // &foo - cleanup, fail if does not exist. + maybe, // &?foo - cleanup, ignore if does not exist. + never // &!foo - don’t cleanup, ignore if doesn’t exist. + }; + + // File or directory to be automatically cleaned up at the end of the + // script execution. If the path ends with a trailing slash, then it is + // assumed to be a directory, otherwise -- a file. A directory that is + // about to be cleaned up must be empty. + // + // The last component in the path may contain a wildcard that have the + // following semantics: + // + // dir/* - remove all immediate files + // dir/*/ - remove all immediate sub-directories (must be empty) + // dir/** - remove all files recursively + // dir/**/ - remove all sub-directories recursively (must be empty) + // dir/*** - remove directory dir with all files and sub-directories + // recursively + // + struct cleanup + { + cleanup_type type; + build2::path path; + }; + using cleanups = vector; + + // command_exit + // + enum class exit_comparison {eq, ne}; + + struct command_exit + { + // C/C++ don't apply constraints on program exit code other than it + // being of type int. + // + // POSIX specifies that only the least significant 8 bits shall be + // available from wait() and waitpid(); the full value shall be + // available from waitid() (read more at _Exit, _exit Open Group + // spec). + // + // While the Linux man page for waitid() doesn't mention any + // deviations from the standard, the FreeBSD implementation (as of + // version 11.0) only returns 8 bits like the other wait*() calls. + // + // Windows supports 32-bit exit codes. + // + // Note that in shells some exit values can have special meaning so + // using them can be a source of confusion. For bash values in the + // [126, 255] range are such a special ones (see Appendix E, "Exit + // Codes With Special Meanings" in the Advanced Bash-Scripting Guide). + // + exit_comparison comparison; + uint8_t code; + }; + + // command + // + struct command + { + path program; + strings arguments; + + optional in; + optional out; + optional err; + + script::cleanups cleanups; + + command_exit exit {exit_comparison::eq, 0}; + }; + + enum class command_to_stream: uint16_t + { + header = 0x01, + here_doc = 0x02, // Note: printed on a new line. + all = header | here_doc + }; + + void + to_stream (ostream&, const command&, command_to_stream); + + ostream& + operator<< (ostream&, const command&); + + // command_pipe + // + using command_pipe = vector; + + void + to_stream (ostream&, const command_pipe&, command_to_stream); + + ostream& + operator<< (ostream&, const command_pipe&); + + // command_expr + // + enum class expr_operator {log_or, log_and}; + + struct expr_term + { + expr_operator op; // OR-ed to an implied false for the first term. + command_pipe pipe; + }; + + using command_expr = vector; + + void + to_stream (ostream&, const command_expr&, command_to_stream); + + ostream& + operator<< (ostream&, const command_expr&); + + // Script execution environment. + // + class environment + { + public: + build2::context& context; + + // The platform script programs run on. + // + const target_triplet& host; + + // The work directory is used as the builtin/process CWD and to complete + // relative paths. Any attempt to remove or move this directory (or its + // parent directory) using the rm or mv builtins will fail. Must be an + // absolute path. + // + const dir_name_view work_dir; + + // If the sanbox directory is not NULL, then any attempt to remove or + // move a filesystem entry outside this directory using an explicit + // cleanup or the rm/mv builtins will fail, unless the --force option is + // specified for the builtin. Must be an absolute path. + // + const dir_name_view sandbox_dir; + + // The temporary directory is used by the script running machinery to + // create special files. Must be an absolute path, unless empty. Can be + // empty until the create_temp_dir() function call, which can be used + // for creating this directory on demand. + // + const dir_path& temp_dir; + + // If true, the temporary directory will not be removed on the script + // failure. In particular, this allows the script running machinery to + // refer to the special files in diagnostics. + // + const bool temp_dir_keep; + + // Default process streams redirects. + // + // If a stream redirect is not specified on the script command line, + // then the respective redirect data member will be used as the default. + // + const redirect in; + const redirect out; + const redirect err; + + environment (build2::context& ctx, + const target_triplet& h, + const dir_name_view& wd, + const dir_name_view& sd, + const dir_path& td, bool tk, + redirect&& i = redirect (redirect_type::pass), + redirect&& o = redirect (redirect_type::pass), + redirect&& e = redirect (redirect_type::pass)) + : context (ctx), host (h), + work_dir (wd), sandbox_dir (sd), temp_dir (td), temp_dir_keep (tk), + in (move (i)), out (move (o)), err (move (e)) + { + } + + // Create environment without the sandbox. + // + environment (build2::context& ctx, + const target_triplet& h, + const dir_name_view& wd, + const dir_path& td, bool tk, + redirect&& i = redirect (redirect_type::pass), + redirect&& o = redirect (redirect_type::pass), + redirect&& e = redirect (redirect_type::pass)) + : environment (ctx, h, + wd, dir_name_view (), td, tk, + move (i), move (o), move (e)) + { + } + + // Cleanup. + // + public: + script::cleanups cleanups; + paths special_cleanups; + + // Register a cleanup. If the cleanup is explicit, then override the + // cleanup type if this path is already registered. Ignore implicit + // registration of a path outside root directory (see below). + // + void + clean (cleanup, bool implicit); + + // Register cleanup of a special file. Such files are created to + // maintain the script running machinery and must be removed first, not + // to interfere with the user-defined wildcard cleanups if the working + // and temporary directories are the same. + // + void + clean_special (path); + + public: + // Set variable value with optional (non-empty) attributes. + // + virtual void + set_variable (string&& name, + names&&, + const string& attrs, + const location&) = 0; + + // Create the temporary directory and set the temp_dir reference target + // to its path. Must only be called if temp_dir is empty. + // + virtual void + create_temp_dir () = 0; + + public: + virtual + ~environment () = default; + }; + } +} + +#include + +#endif // LIBBUILD2_SCRIPT_SCRIPT_HXX diff --git a/libbuild2/script/script.ixx b/libbuild2/script/script.ixx new file mode 100644 index 0000000..56043b2 --- /dev/null +++ b/libbuild2/script/script.ixx @@ -0,0 +1,56 @@ +// file : libbuild2/script/script.ixx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +namespace build2 +{ + namespace script + { + inline command_to_stream + operator&= (command_to_stream& x, command_to_stream y) + { + return x = static_cast ( + static_cast (x) & static_cast (y)); + } + + inline command_to_stream + operator|= (command_to_stream& x, command_to_stream y) + { + return x = static_cast ( + static_cast (x) | static_cast (y)); + } + + inline command_to_stream + operator& (command_to_stream x, command_to_stream y) {return x &= y;} + + inline command_to_stream + operator| (command_to_stream x, command_to_stream y) {return x |= y;} + + + // command + // + inline ostream& + operator<< (ostream& o, const command& c) + { + to_stream (o, c, command_to_stream::all); + return o; + } + + // command_pipe + // + inline ostream& + operator<< (ostream& o, const command_pipe& p) + { + to_stream (o, p, command_to_stream::all); + return o; + } + + // command_expr + // + inline ostream& + operator<< (ostream& o, const command_expr& e) + { + to_stream (o, e, command_to_stream::all); + return o; + } + } +} diff --git a/libbuild2/script/token.cxx b/libbuild2/script/token.cxx new file mode 100644 index 0000000..1c612a5 --- /dev/null +++ b/libbuild2/script/token.cxx @@ -0,0 +1,53 @@ +// file : libbuild2/script/token.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +using namespace std; + +namespace build2 +{ + namespace script + { + void + token_printer (ostream& os, const token& t, print_mode m) + { + const string& v (t.value); + + // Only quote non-name tokens for diagnostics. + // + const char* q (m == print_mode::diagnostics ? "'" : ""); + + switch (t.type) + { + case token_type::clean: os << q << '&' << v << q; break; + case token_type::pipe: os << q << '|' << q; break; + + case token_type::in_pass: os << q << "<|" << q; break; + case token_type::in_null: os << q << "<-" << q; break; + case token_type::in_file: os << q << "<=" << q; break; + case token_type::in_doc: os << q << "<<=" << v << q; break; + case token_type::in_str: os << q << "<<<=" << v << q; break; + + case token_type::out_pass: os << q << ">|" << q; break; + case token_type::out_null: os << q << ">-" << q; break; + case token_type::out_trace: os << q << ">!" << q; break; + case token_type::out_merge: os << q << ">&" << q; break; + case token_type::out_file_ovr: os << q << ">=" << q; break; + case token_type::out_file_app: os << q << ">+" << q; break; + case token_type::out_file_cmp: os << q << ">?" << q; break; + case token_type::out_doc: os << q << ">>?" << v << q; break; + case token_type::out_str: os << q << ">>>?" << v << q; break; + + case token_type::in_l: os << q << '<' << v << q; break; + case token_type::in_ll: os << q << "<<" << v << q; break; + case token_type::in_lll: os << q << "<<<" << v << q; break; + case token_type::out_g: os << q << '>' << v << q; break; + case token_type::out_gg: os << q << ">>" << v << q; break; + case token_type::out_ggg: os << q << ">>>" << v << q; break; + + default: build2::token_printer (os, t, m); + } + } + } +} diff --git a/libbuild2/script/token.hxx b/libbuild2/script/token.hxx new file mode 100644 index 0000000..0186bd9 --- /dev/null +++ b/libbuild2/script/token.hxx @@ -0,0 +1,66 @@ +// file : libbuild2/script/token.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_SCRIPT_TOKEN_HXX +#define LIBBUILD2_SCRIPT_TOKEN_HXX + +#include +#include + +#include + +namespace build2 +{ + namespace script + { + struct token_type: build2::token_type + { + using base_type = build2::token_type; + + enum + { + // NOTE: remember to update token_printer()! + + pipe = base_type::value_next, // | + clean, // &{?!} (modifiers in value) + + in_pass, // <| + in_null, // <- + in_file, // <= + in_doc, // <<={:/} (modifiers in value) + in_str, // <<<={:/} (modifiers in value) + + out_pass, // >| + out_null, // >- + out_trace, // >! + out_merge, // >& + out_file_ovr, // >= + out_file_app, // >+ + out_file_cmp, // >? + out_doc, // >>?{:/~} (modifiers in value) + out_str, // >>>?{:/~} (modifiers in value) + + // The modifiers are in the token value, if the redirect the alias + // resolves to supports the modifiers. + // + in_l, // < + in_ll, // << + in_lll, // <<< + out_g, // > + out_gg, // >> + out_ggg, // >>> + + value_next + }; + + token_type () = default; + token_type (value_type v): base_type (v) {} + token_type (base_type v): base_type (v) {} + }; + + void + token_printer (ostream&, const token&, print_mode); + } +} + +#endif // LIBBUILD2_SCRIPT_TOKEN_HXX diff --git a/libbuild2/target-key.hxx b/libbuild2/target-key.hxx index 0096d46..62bcc25 100644 --- a/libbuild2/target-key.hxx +++ b/libbuild2/target-key.hxx @@ -32,10 +32,18 @@ namespace build2 bool is_a () const {return type->is_a ();} bool is_a (const target_type& tt) const {return type->is_a (tt);} - // Return the target name or a pair of names if out-qualified. + // Append/return the target name or a pair of names if out-qualified. // + void + as_name (names&) const; + names - as_name () const; + as_name () const + { + names r; + as_name (r); + return r; + } }; inline bool diff --git a/libbuild2/target.cxx b/libbuild2/target.cxx index 83ed4a5..b9cfea7 100644 --- a/libbuild2/target.cxx +++ b/libbuild2/target.cxx @@ -44,11 +44,9 @@ namespace build2 // target_key // - names target_key:: - as_name () const + void target_key:: + as_name (names& r) const { - names r; - string v (*name); target::combine_name (v, ext, false /* @@ TODO: what to do? */); @@ -56,11 +54,9 @@ namespace build2 if (!out->empty ()) { - r.front ().pair = '@'; + r.back ().pair = '@'; r.push_back (build2::name (*out)); } - - return r; } // target_state @@ -964,8 +960,8 @@ namespace build2 phase_switch ps (t.ctx, run_phase::load); // This is subtle: while we were fussing around another thread may - // have loaded the buildfile. So re-test now that we are in exclusive - // phase. + // have loaded the buildfile. So re-test now that we are in an + // exclusive phase. // if (e == nullptr) e = search_existing_target (t.ctx, pk); diff --git a/libbuild2/target.hxx b/libbuild2/target.hxx index 9975f33..72b7acc 100644 --- a/libbuild2/target.hxx +++ b/libbuild2/target.hxx @@ -264,6 +264,9 @@ namespace build2 names as_name () const; + void + as_name (names&) const; + // Scoping. // public: @@ -410,6 +413,11 @@ namespace build2 value& append (const variable&); + // Ad hoc recipes. + // + public: + vector adhoc_recipes; + // Target operation state. // public: @@ -578,6 +586,9 @@ namespace build2 // This function can only be called during execution if we have observed // (synchronization-wise) that this target has been executed. // + // It can also be called during the serial load phase (but make sure you + // understand what you are doing). + // target_state executed_state (action, bool fail = true) const; diff --git a/libbuild2/target.ixx b/libbuild2/target.ixx index bb30c9c..611e562 100644 --- a/libbuild2/target.ixx +++ b/libbuild2/target.ixx @@ -36,6 +36,12 @@ namespace build2 return key ().as_name (); } + inline void target:: + as_name (names& r) const + { + return key ().as_name (r); + } + inline auto target:: prerequisites () const -> const prerequisites_type& { @@ -114,8 +120,6 @@ namespace build2 inline pair target:: matched_state_impl (action a) const { - assert (ctx.phase == run_phase::match); - // Note that the "tried" state is "final". // const opstate& s (state[a]); @@ -138,13 +142,14 @@ namespace build2 inline target_state target:: executed_state_impl (action a) const { - assert (ctx.phase == run_phase::execute); return (group_state (a) ? group->state : state)[a].state; } inline target_state target:: matched_state (action a, bool fail) const { + assert (ctx.phase == run_phase::match); + // Note that the target could be being asynchronously re-matched. // pair r (matched_state_impl (a)); @@ -158,6 +163,8 @@ namespace build2 inline pair target:: try_matched_state (action a, bool fail) const { + assert (ctx.phase == run_phase::match); + pair r (matched_state_impl (a)); if (fail && r.first && r.second == target_state::failed) @@ -169,6 +176,8 @@ namespace build2 inline target_state target:: executed_state (action a, bool fail) const { + assert (ctx.phase == run_phase::execute || ctx.phase == run_phase::load); + target_state r (executed_state_impl (a)); if (fail && r == target_state::failed) @@ -193,6 +202,8 @@ namespace build2 inline bool target:: unchanged (action a) const { + assert (ctx.phase == run_phase::match); + return matched_state_impl (a).second == target_state::unchanged; } diff --git a/libbuild2/test/init.cxx b/libbuild2/test/init.cxx index 16891c6..7a07e76 100644 --- a/libbuild2/test/init.cxx +++ b/libbuild2/test/init.cxx @@ -14,8 +14,6 @@ #include #include -#include // script::regex::init() - using namespace std; using namespace butl; @@ -226,8 +224,6 @@ namespace build2 const module_functions* build2_test_load () { - script::regex::init (); - return mod_functions; } } diff --git a/libbuild2/test/script/builtin-options.cxx b/libbuild2/test/script/builtin-options.cxx deleted file mode 100644 index 6b6afe0..0000000 --- a/libbuild2/test/script/builtin-options.cxx +++ /dev/null @@ -1,667 +0,0 @@ -// -*- C++ -*- -// -// This file was generated by CLI, a command line interface -// compiler for C++. -// - -// Begin prologue. -// -// -// End prologue. - -#include - -#include -#include -#include -#include -#include -#include - -namespace build2 -{ - namespace test - { - namespace script - { - namespace cli - { - // unknown_option - // - unknown_option:: - ~unknown_option () throw () - { - } - - void unknown_option:: - print (::std::ostream& os) const - { - os << "unknown option '" << option ().c_str () << "'"; - } - - const char* unknown_option:: - what () const throw () - { - return "unknown option"; - } - - // unknown_argument - // - unknown_argument:: - ~unknown_argument () throw () - { - } - - void unknown_argument:: - print (::std::ostream& os) const - { - os << "unknown argument '" << argument ().c_str () << "'"; - } - - const char* unknown_argument:: - what () const throw () - { - return "unknown argument"; - } - - // missing_value - // - missing_value:: - ~missing_value () throw () - { - } - - void missing_value:: - print (::std::ostream& os) const - { - os << "missing value for option '" << option ().c_str () << "'"; - } - - const char* missing_value:: - what () const throw () - { - return "missing option value"; - } - - // invalid_value - // - invalid_value:: - ~invalid_value () throw () - { - } - - void invalid_value:: - print (::std::ostream& os) const - { - os << "invalid value '" << value ().c_str () << "' for option '" - << option ().c_str () << "'"; - - if (!message ().empty ()) - os << ": " << message ().c_str (); - } - - const char* invalid_value:: - what () const throw () - { - return "invalid option value"; - } - - // eos_reached - // - void eos_reached:: - print (::std::ostream& os) const - { - os << what (); - } - - const char* eos_reached:: - what () const throw () - { - return "end of argument stream reached"; - } - - // scanner - // - scanner:: - ~scanner () - { - } - - // argv_scanner - // - bool argv_scanner:: - more () - { - return i_ < argc_; - } - - const char* argv_scanner:: - peek () - { - if (i_ < argc_) - return argv_[i_]; - else - throw eos_reached (); - } - - const char* argv_scanner:: - next () - { - if (i_ < argc_) - { - const char* r (argv_[i_]); - - if (erase_) - { - for (int i (i_ + 1); i < argc_; ++i) - argv_[i - 1] = argv_[i]; - - --argc_; - argv_[argc_] = 0; - } - else - ++i_; - - return r; - } - else - throw eos_reached (); - } - - void argv_scanner:: - skip () - { - if (i_ < argc_) - ++i_; - else - throw eos_reached (); - } - - // vector_scanner - // - bool vector_scanner:: - more () - { - return i_ < v_.size (); - } - - const char* vector_scanner:: - peek () - { - if (i_ < v_.size ()) - return v_[i_].c_str (); - else - throw eos_reached (); - } - - const char* vector_scanner:: - next () - { - if (i_ < v_.size ()) - return v_[i_++].c_str (); - else - throw eos_reached (); - } - - void vector_scanner:: - skip () - { - if (i_ < v_.size ()) - ++i_; - else - throw eos_reached (); - } - - template - struct parser - { - static void - parse (X& x, bool& xs, scanner& s) - { - using namespace std; - - const char* o (s.next ()); - if (s.more ()) - { - string v (s.next ()); - istringstream is (v); - if (!(is >> x && is.peek () == istringstream::traits_type::eof ())) - throw invalid_value (o, v); - } - else - throw missing_value (o); - - xs = true; - } - }; - - template <> - struct parser - { - static void - parse (bool& x, scanner& s) - { - s.next (); - x = true; - } - }; - - template <> - struct parser - { - static void - parse (std::string& x, bool& xs, scanner& s) - { - const char* o (s.next ()); - - if (s.more ()) - x = s.next (); - else - throw missing_value (o); - - xs = true; - } - }; - - template - struct parser > - { - static void - parse (std::vector& c, bool& xs, scanner& s) - { - X x; - bool dummy; - parser::parse (x, dummy, s); - c.push_back (x); - xs = true; - } - }; - - template - struct parser > - { - static void - parse (std::set& c, bool& xs, scanner& s) - { - X x; - bool dummy; - parser::parse (x, dummy, s); - c.insert (x); - xs = true; - } - }; - - template - struct parser > - { - static void - parse (std::map& m, bool& xs, scanner& s) - { - const char* o (s.next ()); - - if (s.more ()) - { - std::string ov (s.next ()); - std::string::size_type p = ov.find ('='); - - K k = K (); - V v = V (); - std::string kstr (ov, 0, p); - std::string vstr (ov, (p != std::string::npos ? p + 1 : ov.size ())); - - int ac (2); - char* av[] = - { - const_cast (o), 0 - }; - - bool dummy; - if (!kstr.empty ()) - { - av[1] = const_cast (kstr.c_str ()); - argv_scanner s (0, ac, av); - parser::parse (k, dummy, s); - } - - if (!vstr.empty ()) - { - av[1] = const_cast (vstr.c_str ()); - argv_scanner s (0, ac, av); - parser::parse (v, dummy, s); - } - - m[k] = v; - } - else - throw missing_value (o); - - xs = true; - } - }; - - template - void - thunk (X& x, scanner& s) - { - parser::parse (x.*M, s); - } - - template - void - thunk (X& x, scanner& s) - { - parser::parse (x.*M, x.*S, s); - } - } - } - } -} - -#include -#include - -namespace build2 -{ - namespace test - { - namespace script - { - // set_options - // - - set_options:: - set_options () - : exact_ (), - newline_ (), - whitespace_ () - { - } - - set_options:: - set_options (int& argc, - char** argv, - bool erase, - ::build2::test::script::cli::unknown_mode opt, - ::build2::test::script::cli::unknown_mode arg) - : exact_ (), - newline_ (), - whitespace_ () - { - ::build2::test::script::cli::argv_scanner s (argc, argv, erase); - _parse (s, opt, arg); - } - - set_options:: - set_options (int start, - int& argc, - char** argv, - bool erase, - ::build2::test::script::cli::unknown_mode opt, - ::build2::test::script::cli::unknown_mode arg) - : exact_ (), - newline_ (), - whitespace_ () - { - ::build2::test::script::cli::argv_scanner s (start, argc, argv, erase); - _parse (s, opt, arg); - } - - set_options:: - set_options (int& argc, - char** argv, - int& end, - bool erase, - ::build2::test::script::cli::unknown_mode opt, - ::build2::test::script::cli::unknown_mode arg) - : exact_ (), - newline_ (), - whitespace_ () - { - ::build2::test::script::cli::argv_scanner s (argc, argv, erase); - _parse (s, opt, arg); - end = s.end (); - } - - set_options:: - set_options (int start, - int& argc, - char** argv, - int& end, - bool erase, - ::build2::test::script::cli::unknown_mode opt, - ::build2::test::script::cli::unknown_mode arg) - : exact_ (), - newline_ (), - whitespace_ () - { - ::build2::test::script::cli::argv_scanner s (start, argc, argv, erase); - _parse (s, opt, arg); - end = s.end (); - } - - set_options:: - set_options (::build2::test::script::cli::scanner& s, - ::build2::test::script::cli::unknown_mode opt, - ::build2::test::script::cli::unknown_mode arg) - : exact_ (), - newline_ (), - whitespace_ () - { - _parse (s, opt, arg); - } - - typedef - std::map - _cli_set_options_map; - - static _cli_set_options_map _cli_set_options_map_; - - struct _cli_set_options_map_init - { - _cli_set_options_map_init () - { - _cli_set_options_map_["--exact"] = - &::build2::test::script::cli::thunk< set_options, bool, &set_options::exact_ >; - _cli_set_options_map_["-e"] = - &::build2::test::script::cli::thunk< set_options, bool, &set_options::exact_ >; - _cli_set_options_map_["--newline"] = - &::build2::test::script::cli::thunk< set_options, bool, &set_options::newline_ >; - _cli_set_options_map_["-n"] = - &::build2::test::script::cli::thunk< set_options, bool, &set_options::newline_ >; - _cli_set_options_map_["--whitespace"] = - &::build2::test::script::cli::thunk< set_options, bool, &set_options::whitespace_ >; - _cli_set_options_map_["-w"] = - &::build2::test::script::cli::thunk< set_options, bool, &set_options::whitespace_ >; - } - }; - - static _cli_set_options_map_init _cli_set_options_map_init_; - - bool set_options:: - _parse (const char* o, ::build2::test::script::cli::scanner& s) - { - _cli_set_options_map::const_iterator i (_cli_set_options_map_.find (o)); - - if (i != _cli_set_options_map_.end ()) - { - (*(i->second)) (*this, s); - return true; - } - - return false; - } - - bool set_options:: - _parse (::build2::test::script::cli::scanner& s, - ::build2::test::script::cli::unknown_mode opt_mode, - ::build2::test::script::cli::unknown_mode arg_mode) - { - // Can't skip combined flags (--no-combined-flags). - // - assert (opt_mode != ::build2::test::script::cli::unknown_mode::skip); - - bool r = false; - bool opt = true; - - while (s.more ()) - { - const char* o = s.peek (); - - if (std::strcmp (o, "--") == 0) - { - opt = false; - s.skip (); - r = true; - continue; - } - - if (opt) - { - if (_parse (o, s)) - { - r = true; - continue; - } - - if (std::strncmp (o, "-", 1) == 0 && o[1] != '\0') - { - // Handle combined option values. - // - std::string co; - if (const char* v = std::strchr (o, '=')) - { - co.assign (o, 0, v - o); - ++v; - - int ac (2); - char* av[] = - { - const_cast (co.c_str ()), - const_cast (v) - }; - - ::build2::test::script::cli::argv_scanner ns (0, ac, av); - - if (_parse (co.c_str (), ns)) - { - // Parsed the option but not its value? - // - if (ns.end () != 2) - throw ::build2::test::script::cli::invalid_value (co, v); - - s.next (); - r = true; - continue; - } - else - { - // Set the unknown option and fall through. - // - o = co.c_str (); - } - } - - // Handle combined flags. - // - char cf[3]; - { - const char* p = o + 1; - for (; *p != '\0'; ++p) - { - if (!((*p >= 'a' && *p <= 'z') || - (*p >= 'A' && *p <= 'Z') || - (*p >= '0' && *p <= '9'))) - break; - } - - if (*p == '\0') - { - for (p = o + 1; *p != '\0'; ++p) - { - std::strcpy (cf, "-"); - cf[1] = *p; - cf[2] = '\0'; - - int ac (1); - char* av[] = - { - cf - }; - - ::build2::test::script::cli::argv_scanner ns (0, ac, av); - - if (!_parse (cf, ns)) - break; - } - - if (*p == '\0') - { - // All handled. - // - s.next (); - r = true; - continue; - } - else - { - // Set the unknown option and fall through. - // - o = cf; - } - } - } - - switch (opt_mode) - { - case ::build2::test::script::cli::unknown_mode::skip: - { - s.skip (); - r = true; - continue; - } - case ::build2::test::script::cli::unknown_mode::stop: - { - break; - } - case ::build2::test::script::cli::unknown_mode::fail: - { - throw ::build2::test::script::cli::unknown_option (o); - } - } - - break; - } - } - - switch (arg_mode) - { - case ::build2::test::script::cli::unknown_mode::skip: - { - s.skip (); - r = true; - continue; - } - case ::build2::test::script::cli::unknown_mode::stop: - { - break; - } - case ::build2::test::script::cli::unknown_mode::fail: - { - throw ::build2::test::script::cli::unknown_argument (o); - } - } - - break; - } - - return r; - } - } - } -} - -// Begin epilogue. -// -// -// End epilogue. - diff --git a/libbuild2/test/script/builtin-options.hxx b/libbuild2/test/script/builtin-options.hxx deleted file mode 100644 index 44e129a..0000000 --- a/libbuild2/test/script/builtin-options.hxx +++ /dev/null @@ -1,345 +0,0 @@ -// -*- C++ -*- -// -// This file was generated by CLI, a command line interface -// compiler for C++. -// - -#ifndef LIBBUILD2_TEST_SCRIPT_BUILTIN_OPTIONS_HXX -#define LIBBUILD2_TEST_SCRIPT_BUILTIN_OPTIONS_HXX - -// Begin prologue. -// -// -// End prologue. - -#include -#include -#include -#include -#include - -#ifndef CLI_POTENTIALLY_UNUSED -# if defined(_MSC_VER) || defined(__xlC__) -# define CLI_POTENTIALLY_UNUSED(x) (void*)&x -# else -# define CLI_POTENTIALLY_UNUSED(x) (void)x -# endif -#endif - -namespace build2 -{ - namespace test - { - namespace script - { - namespace cli - { - class unknown_mode - { - public: - enum value - { - skip, - stop, - fail - }; - - unknown_mode (value); - - operator value () const - { - return v_; - } - - private: - value v_; - }; - - // Exceptions. - // - - class exception: public std::exception - { - public: - virtual void - print (::std::ostream&) const = 0; - }; - - ::std::ostream& - operator<< (::std::ostream&, const exception&); - - class unknown_option: public exception - { - public: - virtual - ~unknown_option () throw (); - - unknown_option (const std::string& option); - - const std::string& - option () const; - - virtual void - print (::std::ostream&) const; - - virtual const char* - what () const throw (); - - private: - std::string option_; - }; - - class unknown_argument: public exception - { - public: - virtual - ~unknown_argument () throw (); - - unknown_argument (const std::string& argument); - - const std::string& - argument () const; - - virtual void - print (::std::ostream&) const; - - virtual const char* - what () const throw (); - - private: - std::string argument_; - }; - - class missing_value: public exception - { - public: - virtual - ~missing_value () throw (); - - missing_value (const std::string& option); - - const std::string& - option () const; - - virtual void - print (::std::ostream&) const; - - virtual const char* - what () const throw (); - - private: - std::string option_; - }; - - class invalid_value: public exception - { - public: - virtual - ~invalid_value () throw (); - - invalid_value (const std::string& option, - const std::string& value, - const std::string& message = std::string ()); - - const std::string& - option () const; - - const std::string& - value () const; - - const std::string& - message () const; - - virtual void - print (::std::ostream&) const; - - virtual const char* - what () const throw (); - - private: - std::string option_; - std::string value_; - std::string message_; - }; - - class eos_reached: public exception - { - public: - virtual void - print (::std::ostream&) const; - - virtual const char* - what () const throw (); - }; - - // Command line argument scanner interface. - // - // The values returned by next() are guaranteed to be valid - // for the two previous arguments up until a call to a third - // peek() or next(). - // - class scanner - { - public: - virtual - ~scanner (); - - virtual bool - more () = 0; - - virtual const char* - peek () = 0; - - virtual const char* - next () = 0; - - virtual void - skip () = 0; - }; - - class argv_scanner: public scanner - { - public: - argv_scanner (int& argc, char** argv, bool erase = false); - argv_scanner (int start, int& argc, char** argv, bool erase = false); - - int - end () const; - - virtual bool - more (); - - virtual const char* - peek (); - - virtual const char* - next (); - - virtual void - skip (); - - private: - int i_; - int& argc_; - char** argv_; - bool erase_; - }; - - class vector_scanner: public scanner - { - public: - vector_scanner (const std::vector&, std::size_t start = 0); - - std::size_t - end () const; - - void - reset (std::size_t start = 0); - - virtual bool - more (); - - virtual const char* - peek (); - - virtual const char* - next (); - - virtual void - skip (); - - private: - const std::vector& v_; - std::size_t i_; - }; - - template - struct parser; - } - } - } -} - -namespace build2 -{ - namespace test - { - namespace script - { - class set_options - { - public: - set_options (); - - set_options (int& argc, - char** argv, - bool erase = false, - ::build2::test::script::cli::unknown_mode option = ::build2::test::script::cli::unknown_mode::fail, - ::build2::test::script::cli::unknown_mode argument = ::build2::test::script::cli::unknown_mode::stop); - - set_options (int start, - int& argc, - char** argv, - bool erase = false, - ::build2::test::script::cli::unknown_mode option = ::build2::test::script::cli::unknown_mode::fail, - ::build2::test::script::cli::unknown_mode argument = ::build2::test::script::cli::unknown_mode::stop); - - set_options (int& argc, - char** argv, - int& end, - bool erase = false, - ::build2::test::script::cli::unknown_mode option = ::build2::test::script::cli::unknown_mode::fail, - ::build2::test::script::cli::unknown_mode argument = ::build2::test::script::cli::unknown_mode::stop); - - set_options (int start, - int& argc, - char** argv, - int& end, - bool erase = false, - ::build2::test::script::cli::unknown_mode option = ::build2::test::script::cli::unknown_mode::fail, - ::build2::test::script::cli::unknown_mode argument = ::build2::test::script::cli::unknown_mode::stop); - - set_options (::build2::test::script::cli::scanner&, - ::build2::test::script::cli::unknown_mode option = ::build2::test::script::cli::unknown_mode::fail, - ::build2::test::script::cli::unknown_mode argument = ::build2::test::script::cli::unknown_mode::stop); - - // Option accessors. - // - const bool& - exact () const; - - const bool& - newline () const; - - const bool& - whitespace () const; - - // Implementation details. - // - protected: - bool - _parse (const char*, ::build2::test::script::cli::scanner&); - - private: - bool - _parse (::build2::test::script::cli::scanner&, - ::build2::test::script::cli::unknown_mode option, - ::build2::test::script::cli::unknown_mode argument); - - public: - bool exact_; - bool newline_; - bool whitespace_; - }; - } - } -} - -#include - -// Begin epilogue. -// -// -// End epilogue. - -#endif // LIBBUILD2_TEST_SCRIPT_BUILTIN_OPTIONS_HXX diff --git a/libbuild2/test/script/builtin-options.ixx b/libbuild2/test/script/builtin-options.ixx deleted file mode 100644 index bdb95b4..0000000 --- a/libbuild2/test/script/builtin-options.ixx +++ /dev/null @@ -1,188 +0,0 @@ -// -*- C++ -*- -// -// This file was generated by CLI, a command line interface -// compiler for C++. -// - -// Begin prologue. -// -// -// End prologue. - -#include - -namespace build2 -{ - namespace test - { - namespace script - { - namespace cli - { - // unknown_mode - // - inline unknown_mode:: - unknown_mode (value v) - : v_ (v) - { - } - - // exception - // - inline ::std::ostream& - operator<< (::std::ostream& os, const exception& e) - { - e.print (os); - return os; - } - - // unknown_option - // - inline unknown_option:: - unknown_option (const std::string& option) - : option_ (option) - { - } - - inline const std::string& unknown_option:: - option () const - { - return option_; - } - - // unknown_argument - // - inline unknown_argument:: - unknown_argument (const std::string& argument) - : argument_ (argument) - { - } - - inline const std::string& unknown_argument:: - argument () const - { - return argument_; - } - - // missing_value - // - inline missing_value:: - missing_value (const std::string& option) - : option_ (option) - { - } - - inline const std::string& missing_value:: - option () const - { - return option_; - } - - // invalid_value - // - inline invalid_value:: - invalid_value (const std::string& option, - const std::string& value, - const std::string& message) - : option_ (option), - value_ (value), - message_ (message) - { - } - - inline const std::string& invalid_value:: - option () const - { - return option_; - } - - inline const std::string& invalid_value:: - value () const - { - return value_; - } - - inline const std::string& invalid_value:: - message () const - { - return message_; - } - - // argv_scanner - // - inline argv_scanner:: - argv_scanner (int& argc, char** argv, bool erase) - : i_ (1), argc_ (argc), argv_ (argv), erase_ (erase) - { - } - - inline argv_scanner:: - argv_scanner (int start, int& argc, char** argv, bool erase) - : i_ (start), argc_ (argc), argv_ (argv), erase_ (erase) - { - } - - inline int argv_scanner:: - end () const - { - return i_; - } - - // vector_scanner - // - inline vector_scanner:: - vector_scanner (const std::vector& v, std::size_t i) - : v_ (v), i_ (i) - { - } - - inline std::size_t vector_scanner:: - end () const - { - return i_; - } - - inline void vector_scanner:: - reset (std::size_t i) - { - i_ = i; - } - } - } - } -} - -namespace build2 -{ - namespace test - { - namespace script - { - // set_options - // - - inline const bool& set_options:: - exact () const - { - return this->exact_; - } - - inline const bool& set_options:: - newline () const - { - return this->newline_; - } - - inline const bool& set_options:: - whitespace () const - { - return this->whitespace_; - } - } - } -} - -// Begin epilogue. -// -// -// End epilogue. diff --git a/libbuild2/test/script/builtin.cli b/libbuild2/test/script/builtin.cli deleted file mode 100644 index 42b26d2..0000000 --- a/libbuild2/test/script/builtin.cli +++ /dev/null @@ -1,25 +0,0 @@ -// file : libbuild2/test/script/builtin.cli -// license : MIT; see accompanying LICENSE file - -// Note that options in this file are undocumented because we generate neither -// the usage printing code nor man pages. Instead, they are documented in the -// Testscript Language Manual's builtin descriptions. -// -namespace build2 -{ - namespace test - { - namespace script - { - // Pseudo-builtin options. - // - - class set_options - { - bool --exact|-e; - bool --newline|-n; - bool --whitespace|-w; - }; - } - } -} diff --git a/libbuild2/test/script/lexer+command-expansion.test.testscript b/libbuild2/test/script/lexer+command-expansion.test.testscript deleted file mode 100644 index 2cb6587..0000000 --- a/libbuild2/test/script/lexer+command-expansion.test.testscript +++ /dev/null @@ -1,247 +0,0 @@ -# file : libbuild2/test/script/lexer+command-expansion.test.testscript -# license : MIT; see accompanying LICENSE file - -test.arguments = command-expansion - -: pass-redirect -: -{ - : in - : - $* <:"0<|" >>EOO - '0' - <| - EOO - - : arg-in - : - $* <:"0 <|" >>EOO - '0 ' - <| - EOO - - : out - : - $* <:"1>|" >>EOO - '1' - >| - EOO - - : arg-out - : - $* <:"1 >|" >>EOO - '1 ' - >| - EOO -} - -: null-redirect -: -{ - : in - : - $* <:"0<-" >>EOO - '0' - <- - EOO - - : arg-in - : - $* <:"0 <-" >>EOO - '0 ' - <- - EOO - - : out - : - $* <:"1>-" >>EOO - '1' - >- - EOO - - : arg-out - : - $* <:"1 >-" >>EOO - '1 ' - >- - EOO -} - -: trace-redirect -: -{ - : out - : - $* <:"1>!" >>EOO - '1' - >! - EOO - - : arg-out - : - $* <:"1 >!" >>EOO - '1 ' - >! - EOO -} - -: merge-redirect -: -{ - : out - : - $* <:"1>&2" >>EOO - '1' - >& - '2' - EOO - - : arg-out - : - $* <:"1 >&2" >>EOO - '1 ' - >& - '2' - EOO -} - -: str-redirect -: -{ - : in - : - { - : newline - : - $* <:"0>EOO - '0' - < - 'a b' - EOO - - : no-newline - : - $* <:"0<:a b" >>EOO - '0' - <: - 'a b' - EOO - } - - : out - : - { - : newline - : - $* <:"1>a b" >>EOO - '1' - > - 'a b' - EOO - - : no-newline - : - $* <:"1>:a b" >>EOO - '1' - >: - 'a b' - EOO - } -} - -: doc-redirect -: -{ - : in - : - { - : newline - : - $* <:"0<>EOO - '0' - << - 'E O I' - EOO - - : no-newline - : - $* <:"0<<:E O I" >>EOO - '0' - <<: - 'E O I' - EOO - } - - : out - : - { - : newline - : - $* <:"1>>E O O" >>EOO - '1' - >> - 'E O O' - EOO - - : no-newline - : - $* <:"1>>:E O O" >>EOO - '1' - >>: - 'E O O' - EOO - } -} - -: file-redirect -: -{ - : in - : - $* <:"0<<>EOO - '0' - <<< - 'a b' - EOO - - : out - : - $* <:"1>=a b" >>EOO - '1' - >= - 'a b' - EOO - - : out-app - : - $* <:"1>+a b" >>EOO - '1' - >+ - 'a b' - EOO -} - -: cleanup -: -{ - : always - : - $* <:"&file" >>EOO - & - 'file' - EOO - - : maybe - : - $* <:"&?file" >>EOO - &? - 'file' - EOO - - : never - : - $* <:"&!file" >>EOO - &! - 'file' - EOO -} diff --git a/libbuild2/test/script/lexer.cxx b/libbuild2/test/script/lexer.cxx index 26d77b5..a94109b 100644 --- a/libbuild2/test/script/lexer.cxx +++ b/libbuild2/test/script/lexer.cxx @@ -15,8 +15,16 @@ namespace build2 { using type = token_type; + build2::script::redirect_aliases lexer::redirect_aliases { + type (type::in_str), + type (type::in_doc), + type (type::in_file), + type (type::out_str), + type (type::out_doc), + type (type::out_file_cmp)}; + void lexer:: - mode (base_mode m, char ps, optional esc) + mode (base_mode m, char ps, optional esc, uintptr_t data) { bool a (false); // attributes @@ -77,43 +85,6 @@ namespace build2 s2 = " "; break; } - - case lexer_mode::command_expansion: - { - // Note that whitespaces are not word separators in this mode. - // - s1 = "|&<>"; - s2 = " "; - s = false; - break; - } - case lexer_mode::here_line_single: - { - // This one is like a single-quoted string except it treats - // newlines as a separator. We also treat quotes as literals. - // - // Note that it might be tempting to enable line continuation - // escapes. However, we will then have to also enable escaping of - // the backslash, which makes it a lot less tempting. - // - s1 = "\n"; - s2 = " "; - esc = ""; // Disable escape sequences. - s = false; - q = false; - break; - } - case lexer_mode::here_line_double: - { - // This one is like a double-quoted string except it treats - // newlines as a separator. We also treat quotes as literals. - // - s1 = "$(\n"; - s2 = " "; - s = false; - q = false; - break; - } case lexer_mode::description_line: { // This one is like a single-quoted string and has an ad hoc @@ -138,7 +109,7 @@ namespace build2 } assert (ps == '\0'); - state_.push (state {m, a, ps, s, n, q, *esc, s1, s2}); + state_.push (state {m, data, nullopt, a, ps, s, n, q, *esc, s1, s2}); } token lexer:: @@ -152,17 +123,12 @@ namespace build2 case lexer_mode::first_token: case lexer_mode::second_token: case lexer_mode::variable_line: - case lexer_mode::command_expansion: - case lexer_mode::here_line_single: - case lexer_mode::here_line_double: r = next_line (); break; case lexer_mode::description_line: r = next_description (); break; - default: - r = base_lexer::next (); - break; + default: return base_lexer::next (); } if (r.qtype != quote_type::unquoted) @@ -174,7 +140,7 @@ namespace build2 token lexer:: next_line () { - bool sep (skip_spaces ()); + bool sep (skip_spaces ().first); xchar c (get ()); uint64_t ln (c.line), cn (c.column); @@ -182,38 +148,9 @@ namespace build2 state st (state_.top ()); // Make copy (see first/second_token). lexer_mode m (st.mode); - auto make_token = [&sep, &m, ln, cn] (type t, string v = string ()) + auto make_token = [&sep, ln, cn] (type t) { - bool q (m == lexer_mode::here_line_double); - - return token (t, move (v), sep, - (q ? quote_type::double_ : quote_type::unquoted), q, - ln, cn, - token_printer); - }; - - auto make_token_with_modifiers = - [&make_token, this] (type t, - const char* mods, // To recorgnize. - const char* stop = nullptr) // To stop after. - { - string v; - if (mods != nullptr) - { - for (xchar p (peek ()); - (strchr (mods, p) != nullptr && // Modifier. - strchr (v.c_str (), p) == nullptr); // Not already seen. - p = peek ()) - { - get (); - v += p; - - if (stop != nullptr && strchr (stop, p) != nullptr) - break; - } - } - - return make_token (t, move (v)); + return token (t, sep, ln, cn, token_printer); }; // Handle attributes (do it first to make sure the flag is cleared @@ -240,32 +177,23 @@ namespace build2 // NOTE: remember to update mode() if adding new special characters. - if (m != lexer_mode::command_expansion) + switch (c) { - switch (c) + case '\n': { - case '\n': - { - // Expire variable value mode at the end of the line. - // - if (m == lexer_mode::variable_line) - state_.pop (); + // Expire variable value mode at the end of the line. + // + if (m == lexer_mode::variable_line) + state_.pop (); - sep = true; // Treat newline as always separated. - return make_token (type::newline); - } + sep = true; // Treat newline as always separated. + return make_token (type::newline); } - } - if (m != lexer_mode::here_line_single) - { - switch (c) - { - // Variable expansion, function call, and evaluation context. - // - case '$': return make_token (type::dollar); - case '(': return make_token (type::lparen); - } + // Variable expansion, function call, and evaluation context. + // + case '$': return make_token (type::dollar); + case '(': return make_token (type::lparen); } // Line separators. @@ -313,133 +241,14 @@ namespace build2 } } - // Command operators/separators. + // Command operators. // if (m == lexer_mode::command_line || m == lexer_mode::first_token || - m == lexer_mode::second_token || - m == lexer_mode::command_expansion) + m == lexer_mode::second_token) { - switch (c) - { - // |, || - // - case '|': - { - if (peek () == '|') - { - get (); - return make_token (type::log_or); - } - else - return make_token (type::pipe); - } - // &, && - // - case '&': - { - xchar p (peek ()); - - if (p == '&') - { - get (); - return make_token (type::log_and); - } - - // These modifiers are mutually exclusive so stop after seeing - // either one. - // - return make_token_with_modifiers (type::clean, "!?", "!?"); - } - // < - // - case '<': - { - type r (type::in_str); - xchar p (peek ()); - - if (p == '|' || p == '-' || p == '<') - { - get (); - - switch (p) - { - case '|': return make_token (type::in_pass); - case '-': return make_token (type::in_null); - case '<': - { - r = type::in_doc; - p = peek (); - - if (p == '<') - { - get (); - r = type::in_file; - } - break; - } - } - } - - // Handle modifiers. - // - const char* mods (nullptr); - switch (r) - { - case type::in_str: - case type::in_doc: mods = ":/"; break; - } - - return make_token_with_modifiers (r, mods); - } - // > - // - case '>': - { - type r (type::out_str); - xchar p (peek ()); - - if (p == '|' || p == '-' || p == '!' || p == '&' || - p == '=' || p == '+' || p == '>') - { - get (); - - switch (p) - { - case '|': return make_token (type::out_pass); - case '-': return make_token (type::out_null); - case '!': return make_token (type::out_trace); - case '&': return make_token (type::out_merge); - case '=': return make_token (type::out_file_ovr); - case '+': return make_token (type::out_file_app); - case '>': - { - r = type::out_doc; - p = peek (); - - if (p == '>') - { - get (); - r = type::out_file_cmp; - } - break; - } - } - } - - // Handle modifiers. - // - const char* mods (nullptr); - const char* stop (nullptr); - switch (r) - { - case type::out_str: - case type::out_doc: mods = ":/~"; stop = "~"; break; - } - - return make_token_with_modifiers (r, mods, stop); - } - } + if (optional t = next_cmd_op (c, sep)) + return move (*t); } // Dot, plus/minus, and left/right curly braces. diff --git a/libbuild2/test/script/lexer.hxx b/libbuild2/test/script/lexer.hxx index 5763e3b..452e794 100644 --- a/libbuild2/test/script/lexer.hxx +++ b/libbuild2/test/script/lexer.hxx @@ -7,7 +7,7 @@ #include #include -#include +#include #include @@ -17,9 +17,9 @@ namespace build2 { namespace script { - struct lexer_mode: build2::lexer_mode + struct lexer_mode: build2::script::lexer_mode { - using base_type = build2::lexer_mode; + using base_type = build2::script::lexer_mode; enum { @@ -27,22 +27,18 @@ namespace build2 first_token, // Expires at the end of the token. second_token, // Expires at the end of the token. variable_line, // Expires at the end of the line. - command_expansion, - here_line_single, - here_line_double, description_line // Expires at the end of the line. }; lexer_mode () = default; lexer_mode (value_type v): base_type (v) {} - lexer_mode (base_type v): base_type (v) {} + lexer_mode (build2::lexer_mode v): base_type (v) {} }; - class lexer: public build2::lexer + class lexer: public build2::script::lexer { public: - using base_lexer = build2::lexer; - using base_mode = build2::lexer_mode; + using base_lexer = build2::script::lexer; // Note that neither the name nor escape arguments are copied. // @@ -52,28 +48,25 @@ namespace build2 const char* escapes = nullptr) : base_lexer (is, name, 1 /* line */, nullptr /* escapes */, - false /* set_mode */) + false /* set_mode */, + redirect_aliases) { mode (m, '\0', escapes); } virtual void - mode (base_mode, + mode (build2::lexer_mode, char = '\0', - optional = nullopt) override; - - // Number of quoted (double or single) tokens since last reset. - // - size_t - quoted () const {return quoted_;} - - void - reset_quoted (size_t q) {quoted_ = q;} + optional = nullopt, + uintptr_t = 0) override; virtual token next () override; - protected: + public: + static redirect_aliases_type redirect_aliases; + + private: token next_line (); @@ -82,9 +75,6 @@ namespace build2 virtual token word (state, bool) override; - - protected: - size_t quoted_; }; } } diff --git a/libbuild2/test/script/lexer.test.cxx b/libbuild2/test/script/lexer.test.cxx index 1512e58..9c64616 100644 --- a/libbuild2/test/script/lexer.test.cxx +++ b/libbuild2/test/script/lexer.test.cxx @@ -32,9 +32,6 @@ namespace build2 else if (s == "first-token") m = lexer_mode::first_token; else if (s == "second-token") m = lexer_mode::second_token; else if (s == "variable-line") m = lexer_mode::variable_line; - else if (s == "command-expansion") m = lexer_mode::command_expansion; - else if (s == "here-line-single") m = lexer_mode::here_line_single; - else if (s == "here-line-double") m = lexer_mode::here_line_double; else if (s == "description-line") m = lexer_mode::description_line; else if (s == "variable") m = lexer_mode::variable; else assert (false); @@ -46,14 +43,10 @@ namespace build2 // Some modes auto-expire so we need something underneath. // - bool u (m == lexer_mode::first_token || - m == lexer_mode::second_token || - m == lexer_mode::variable_line || - m == lexer_mode::description_line || - m == lexer_mode::variable); + bool u (m != lexer_mode::command_line); path_name in (""); - lexer l (cin, in, u ? lexer_mode::command_line : m); + lexer l (cin, in, lexer_mode::command_line); if (u) l.mode (m); @@ -63,7 +56,7 @@ namespace build2 { // Print each token on a separate line without quoting operators. // - t.printer (cout, t, false); + t.printer (cout, t, print_mode::normal); cout << endl; } } diff --git a/libbuild2/test/script/parser+exit.test.testscript b/libbuild2/test/script/parser+exit.test.testscript index c6327df..44728a5 100644 --- a/libbuild2/test/script/parser+exit.test.testscript +++ b/libbuild2/test/script/parser+exit.test.testscript @@ -22,5 +22,5 @@ EOO $* <>EOE != 0 cmd != 1 <"foo" EOI -testscript:1:10: error: unexpected '<' after command exit status +testscript:1:10: error: expected newline instead of '<' EOE diff --git a/libbuild2/test/script/parser+redirect.test.testscript b/libbuild2/test/script/parser+redirect.test.testscript index 3858808..79530e0 100644 --- a/libbuild2/test/script/parser+redirect.test.testscript +++ b/libbuild2/test/script/parser+redirect.test.testscript @@ -49,7 +49,7 @@ : portable-path : $* <>EOO - cmd </EOO_ 2>/EOE_ + cmd <>/EOO_ 2>>/EOE_ foo EOI_ bar @@ -57,7 +57,7 @@ baz EOE_ EOI - cmd </EOO_ 2>/EOE_ + cmd <>/EOO_ 2>>/EOE_ foo EOI_ bar @@ -113,13 +113,13 @@ : portable-path : $* <>EOO - cmd >/~%EOF% 2>/~%EOE% + cmd >>/~%EOF% 2>>/~%EOE% foo EOF bar EOE EOI - cmd >/~%EOF% 2>/~%EOE% + cmd >>/~%EOF% 2>>/~%EOE% foo EOF bar diff --git a/libbuild2/test/script/parser+regex.test.testscript b/libbuild2/test/script/parser+regex.test.testscript index 8627304..db418b3 100644 --- a/libbuild2/test/script/parser+regex.test.testscript +++ b/libbuild2/test/script/parser+regex.test.testscript @@ -162,9 +162,12 @@ EOE EOO - : no-newline + : no-newline-str : $* <'cmd >:~/fo*/' >'cmd >:~/fo*/' + + : no-newline-doc + : $* <>EOO cmd 2>>:~/EOE/ foo diff --git a/libbuild2/test/script/parser+variable.test.testscript b/libbuild2/test/script/parser+variable.test.testscript new file mode 100644 index 0000000..3751a5f --- /dev/null +++ b/libbuild2/test/script/parser+variable.test.testscript @@ -0,0 +1,19 @@ +# file : libbuild2/test/script/parser+variable.test.testscript +# license : MIT; see accompanying LICENSE file + +: assignment +: +$* <>EOO +a = b +echo $a +EOI +echo b +EOO + +: empty-name +: +$* <>EOE != 0 += b +EOI +testscript:1:1: error: missing variable name +EOE diff --git a/libbuild2/test/script/parser.cxx b/libbuild2/test/script/parser.cxx index 06cefc7..f663c11 100644 --- a/libbuild2/test/script/parser.cxx +++ b/libbuild2/test/script/parser.cxx @@ -3,8 +3,6 @@ #include -#include - #include // sched, keep_going #include @@ -316,7 +314,7 @@ namespace build2 // Determine the line type/start token. // line_type lt; - type st (type::eos); + type st (type::eos); // Later, can only be set to plus or minus. switch (tt) { @@ -372,51 +370,7 @@ namespace build2 } default: { - // Either variable assignment or test command. - // - replay_save (); // Start saving tokens from the current one. - next (t, tt); - - // Decide whether this is a variable assignment or a command. - // - // It is an assignment if the first token is an unquoted name and - // the next token is an assign/append/prepend operator. Assignment - // to a computed variable name must use the set builtin. - // - // Note also thatspecial commands take precedence over variable - // assignments. - // - lt = line_type::cmd; // Default. - - if (tt == type::word && t.qtype == quote_type::unquoted) - { - const string& n (t.value); - - if (n == "if") lt = line_type::cmd_if; - else if (n == "if!") lt = line_type::cmd_ifn; - else if (n == "elif") lt = line_type::cmd_elif; - else if (n == "elif!") lt = line_type::cmd_elifn; - else if (n == "else") lt = line_type::cmd_else; - else if (n == "end") lt = line_type::cmd_end; - else - { - // Switch the recognition of leading variable assignments for - // the next token. This is safe to do because we know we - // cannot be in the quoted mode (since the current token is - // not quoted). - // - type p (peek (lexer_mode::second_token)); - - if (p == type::assign || - p == type::prepend || - p == type::append) - { - lt = line_type::var; - st = p; - } - } - } - + lt = pre_parse_line_start (t, tt, lexer_mode::second_token); break; } } @@ -435,7 +389,7 @@ namespace build2 // string& n (t.value); - if (n == "*" || n == "~" || n == "@" || digit (n)) + if (special_variable (n)) fail (t) << "attempt to set '" << n << "' variable directly"; // Pre-enter the variables now while we are executing serially. @@ -444,6 +398,11 @@ namespace build2 ln.var = &script_->var_pool.insert (move (n)); next (t, tt); // Assignment kind. + + // We cannot reuse the value mode since it will recognize `{` + // which we want to treat as a literal. + // + mode (lexer_mode::variable_line); parse_variable_line (t, tt); semi = (tt == type::semi); @@ -469,7 +428,7 @@ namespace build2 pair p; if (lt != line_type::cmd_else && lt != line_type::cmd_end) - p = parse_command_expr (t, tt); + p = parse_command_expr (t, tt, lexer::redirect_aliases); // Colon and semicolon are only valid in test command lines and // after 'end' in if-else. Note that we still recognize them @@ -1039,7 +998,7 @@ namespace build2 const path_name* op (path_); path_ = &pn; - lexer* ol (lexer_); + build2::script::lexer* ol (lexer_); set_lexer (&l); string oip (id_prefix_); @@ -1281,35 +1240,6 @@ namespace build2 return r; } - value parser:: - parse_variable_line (token& t, type& tt) - { - // enter: assignment - // leave: newline or semi - - // We cannot reuse the value mode since it will recognize `{` which we - // want to treat as a literal. - // - mode (lexer_mode::variable_line); - next_with_attributes (t, tt); - - // Parse value attributes if any. Note that it's ok not to have - // anything after the attributes (e.g., foo=[null]). - // - attributes_push (t, tt, true); - - // @@ PAT: Should we expand patterns? Note that it will only be - // simple ones since we have disabled {}. Also, what would be the - // pattern base directory? - // - return tt != type::newline && tt != type::semi - ? parse_value (t, tt, - pattern_mode::ignore, - "variable value", - nullptr) - : value (names ()); - } - command_expr parser:: parse_command_line (token& t, type& tt) { @@ -1318,7 +1248,8 @@ namespace build2 // Note: this one is only used during execution. - pair p (parse_command_expr (t, tt)); + pair p ( + parse_command_expr (t, tt, lexer::redirect_aliases)); switch (tt) { @@ -1334,1671 +1265,214 @@ namespace build2 return move (p.first); } - // Parse the regular expression representation (non-empty string value - // framed with introducer characters and optionally followed by flag - // characters from the {di} set, for example '/foo/id') into - // components. Also return end-of-parsing position if requested, - // otherwise treat any unparsed characters left as an error. // - struct regex_parts + // Execute. + // + + void parser:: + execute (script& s, runner& r) { - string value; - char intro; - string flags; // Combination of characters from {di} set. + assert (s.state == scope_state::unknown); - // Create a special empty object. - // - regex_parts (): intro ('\0') {} + auto g ( + make_exception_guard ( + [&s] () {s.state = scope_state::failed;})); - regex_parts (string v, char i, string f) - : value (move (v)), intro (i), flags (move (f)) {} - }; + if (!s.empty ()) + execute (s, s, r); + else + s.state = scope_state::passed; + } - static regex_parts - parse_regex (const string& s, - const location& l, - const char* what, - size_t* end = nullptr) + void parser:: + execute (scope& sc, script& s, runner& r) { - if (s.empty ()) - fail (l) << "no introducer character in " << what; - - size_t p (s.find (s[0], 1)); // Find terminating introducer. - - if (p == string::npos) - fail (l) << "no closing introducer character in " << what; + path_ = nullptr; // Set by replays. - size_t rn (p - 1); // Regex length. - if (rn == 0) - fail (l) << what << " is empty"; + pre_parse_ = false; - // Find end-of-flags position. - // - size_t fp (++p); // Save flags starting position. - for (char c; (c = s[p]) == 'd' || c == 'i'; ++p) ; + set_lexer (nullptr); - // If string end is not reached then report invalid flags, unless - // end-of-parsing position is requested (which means regex is just a - // prefix). - // - if (s[p] != '\0' && end == nullptr) - fail (l) << "junk at the end of " << what; + script_ = &s; + runner_ = &r; + group_ = nullptr; + id_map_ = nullptr; + include_set_ = nullptr; + scope_ = ≻ - if (end != nullptr) - *end = p; + //@@ PAT TODO: set pbase_? - return regex_parts (string (s, 1, rn), s[0], string (s, fp, p - fp)); + exec_scope_body (); } - pair parser:: - parse_command_expr (token& t, type& tt) + static void + execute_impl (scope& s, script& scr, runner& r) { - // enter: first token of the command line - // leave: - - command_expr expr; - - // OR-ed to an implied false for the first term. - // - expr.push_back ({expr_operator::log_or, command_pipe ()}); - - command c; // Command being assembled. - - // Make sure the command makes sense. - // - auto check_command = [&c, this] (const location& l, bool last) + try { - if (c.out.type == redirect_type::merge && - c.err.type == redirect_type::merge) - fail (l) << "stdout and stderr redirected to each other"; - - if (!last && c.out.type != redirect_type::none) - fail (l) << "stdout is both redirected and piped"; - }; - - // Check that the introducer character differs from '/' if the - // portable path modifier is specified. Must be called before - // parse_regex() (see below) to make sure its diagnostics is - // meaningful. - // - // Note that the portable path modifier assumes '/' to be a valid - // regex character and so makes it indistinguishable from the - // terminating introducer. - // - auto check_regex_mod = [this] (const string& mod, - const string& re, - const location& l, - const char* what) + parser p (scr.test_target.ctx); + p.execute (s, scr, r); + } + catch (const failed&) { - // Handles empty regex properly. - // - if (mod.find ('/') != string::npos && re[0] == '/') - fail (l) << "portable path modifier and '/' introducer in " - << what; - }; + s.state = scope_state::failed; + } + } - // Pending positions where the next word should go. - // - enum class pending - { - none, - program, - in_string, - in_document, - in_file, - out_merge, - out_string, - out_str_regex, - out_document, - out_doc_regex, - out_file, - err_merge, - err_string, - err_str_regex, - err_document, - err_doc_regex, - err_file, - clean - }; - pending p (pending::program); - string mod; // Modifiers for pending in_* and out_* positions. - here_docs hd; // Expected here-documents. + void parser:: + exec_scope_body () + { + runner_->enter (*scope_, scope_->start_loc_); - // Add the next word to either one of the pending positions or to - // program arguments by default. + // Note that we rely on "small function object" optimization for the + // exec_*() lambdas. // - auto add_word = [&c, &p, &mod, &check_regex_mod, this] ( - string&& w, const location& l) + auto exec_set = [this] (const variable& var, + token& t, build2::script::token_type& tt, + const location&) { - auto add_merge = [&l, this] (redirect& r, const string& w, int fd) - { - try - { - size_t n; - if (stoi (w, &n) == fd && n == w.size ()) - { - r.fd = fd; - return; - } - } - catch (const exception&) {} // Fall through. - - fail (l) << (fd == 1 ? "stderr" : "stdout") << " merge redirect " - << "file descriptor must be " << fd; - }; - - auto add_here_str = [] (redirect& r, string&& w) - { - if (r.modifiers.find (':') == string::npos) - w += '\n'; - r.str = move (w); - }; - - auto add_here_str_regex = [&l, &check_regex_mod] ( - redirect& r, int fd, string&& w) - { - const char* what (nullptr); - switch (fd) - { - case 1: what = "stdout regex redirect"; break; - case 2: what = "stderr regex redirect"; break; - } - - check_regex_mod (r.modifiers, w, l, what); - - regex_parts rp (parse_regex (w, l, what)); - - regex_lines& re (r.regex); - re.intro = rp.intro; - - re.lines.emplace_back ( - l.line, l.column, move (rp.value), move (rp.flags)); - - // Add final blank line unless suppressed. - // - // Note that the position is synthetic, but that's ok as we don't - // expect any diagnostics to refer this line. - // - if (r.modifiers.find (':') == string::npos) - re.lines.emplace_back (l.line, l.column, string (), false); - }; + next (t, tt); + type kind (tt); // Assignment kind. - auto parse_path = [&l, this] (string&& w, const char* what) -> path - { - try - { - path p (move (w)); + // We cannot reuse the value mode (see above for details). + // + mode (lexer_mode::variable_line); + value rhs (parse_variable_line (t, tt)); - if (!p.empty ()) - { - p.normalize (); - return p; - } + if (tt == type::semi) + next (t, tt); - fail (l) << "empty " << what << endf; - } - catch (const invalid_path& e) - { - fail (l) << "invalid " << what << " '" << e.path << "'" << endf; - } - }; + assert (tt == type::newline); - auto add_file = [&parse_path] (redirect& r, int fd, string&& w) - { - const char* what (nullptr); - switch (fd) - { - case 0: what = "stdin redirect path"; break; - case 1: what = "stdout redirect path"; break; - case 2: what = "stderr redirect path"; break; - } + // Assign. + // + value& lhs (kind == type::assign + ? scope_->assign (var) + : scope_->append (var)); - r.file.path = parse_path (move (w), what); - }; + apply_value_attributes (&var, lhs, move (rhs), kind); - switch (p) + // If we change any of the test.* values, then reset the $*, $N + // special aliases. + // + if (var.name == script_->test_var.name || + var.name == script_->options_var.name || + var.name == script_->arguments_var.name || + var.name == script_->redirects_var.name || + var.name == script_->cleanups_var.name) { - case pending::none: c.arguments.push_back (move (w)); break; - case pending::program: - c.program = parse_path (move (w), "program path"); - break; - - case pending::out_merge: add_merge (c.out, w, 2); break; - case pending::err_merge: add_merge (c.err, w, 1); break; - - case pending::in_string: add_here_str (c.in, move (w)); break; - case pending::out_string: add_here_str (c.out, move (w)); break; - case pending::err_string: add_here_str (c.err, move (w)); break; - - case pending::out_str_regex: - { - add_here_str_regex (c.out, 1, move (w)); - break; - } - case pending::err_str_regex: - { - add_here_str_regex (c.err, 2, move (w)); - break; - } - - // These are handled specially below. - // - case pending::in_document: - case pending::out_document: - case pending::err_document: - case pending::out_doc_regex: - case pending::err_doc_regex: assert (false); break; - - case pending::in_file: add_file (c.in, 0, move (w)); break; - case pending::out_file: add_file (c.out, 1, move (w)); break; - case pending::err_file: add_file (c.err, 2, move (w)); break; - - case pending::clean: - { - cleanup_type t; - switch (mod[0]) // Ok, if empty - { - case '!': t = cleanup_type::never; break; - case '?': t = cleanup_type::maybe; break; - default: t = cleanup_type::always; break; - } - - c.cleanups.push_back ( - {t, parse_path (move (w), "cleanup path")}); - break; - } + scope_->reset_special (); } - - p = pending::none; - mod.clear (); }; - // Make sure we don't have any pending positions to fill. + // Is set later, right before the exec_lines() call. // - auto check_pending = [&p, this] (const location& l) + command_type ct; + + auto exec_cmd = [&ct, this] (token& t, build2::script::token_type& tt, + size_t li, + bool single, + const location& ll) { - const char* what (nullptr); + // We use the 0 index to signal that this is the only command. + // Note that we only do this for test commands. + // + if (ct == command_type::test && single) + li = 0; - switch (p) - { - case pending::none: break; - case pending::program: what = "program"; break; - case pending::in_string: what = "stdin here-string"; break; - case pending::in_document: what = "stdin here-document end"; break; - case pending::in_file: what = "stdin file"; break; - case pending::out_merge: what = "stdout file descriptor"; break; - case pending::out_string: what = "stdout here-string"; break; - case pending::out_document: what = "stdout here-document end"; break; - case pending::out_file: what = "stdout file"; break; - case pending::err_merge: what = "stderr file descriptor"; break; - case pending::err_string: what = "stderr here-string"; break; - case pending::err_document: what = "stderr here-document end"; break; - case pending::err_file: what = "stderr file"; break; - case pending::clean: what = "cleanup path"; break; - - case pending::out_str_regex: - { - what = "stdout here-string regex"; - break; - } - case pending::err_str_regex: - { - what = "stderr here-string regex"; - break; - } - case pending::out_doc_regex: - { - what = "stdout here-document regex end"; - break; - } - case pending::err_doc_regex: - { - what = "stderr here-document regex end"; - break; - } - } + command_expr ce ( + parse_command_line (t, static_cast (tt))); - if (what != nullptr) - fail (l) << "missing " << what; + runner_->run (*scope_, ce, ct, li, ll); }; - // Parse the redirect operator. - // - auto parse_redirect = - [&c, &expr, &p, &mod, &hd, this] (token& t, const location& l) + auto exec_if = [this] (token& t, build2::script::token_type& tt, + size_t li, + const location& ll) { - // Our semantics is the last redirect seen takes effect. - // - assert (p == pending::none && mod.empty ()); + command_expr ce ( + parse_command_line (t, static_cast (tt))); - // See if we have the file descriptor. + // Assume if-else always involves multiple commands. // - unsigned long fd (3); - if (!t.separated) - { - if (c.arguments.empty ()) - fail (l) << "missing redirect file descriptor"; - - const string& s (c.arguments.back ()); + return runner_->run_if (*scope_, ce, li, ll); + }; - try - { - size_t n; - fd = stoul (s, &n); + size_t li (1); - if (n != s.size () || fd > 2) - throw invalid_argument (string ()); - } - catch (const exception&) - { - fail (l) << "invalid redirect file descriptor '" << s << "'"; - } + if (test* t = dynamic_cast (scope_)) + { + ct = command_type::test; - c.arguments.pop_back (); - } + exec_lines (t->tests_.begin (), t->tests_.end (), + exec_set, exec_cmd, exec_if, + li); + } + else if (group* g = dynamic_cast (scope_)) + { + ct = command_type::setup; - type tt (t.type); + bool exec_scope (exec_lines (g->setup_.begin (), g->setup_.end (), + exec_set, exec_cmd, exec_if, + li)); - // Validate/set default file descriptor. - // - switch (tt) + if (exec_scope) { - case type::in_pass: - case type::in_null: - case type::in_str: - case type::in_doc: - case type::in_file: - { - if ((fd = fd == 3 ? 0 : fd) != 0) - fail (l) << "invalid in redirect file descriptor " << fd; - - if (!expr.back ().pipe.empty ()) - fail (l) << "stdin is both piped and redirected"; + atomic_count task_count (0); + wait_guard wg (g->root.test_target.ctx, task_count); - break; - } - case type::out_pass: - case type::out_null: - case type::out_trace: - case type::out_merge: - case type::out_str: - case type::out_doc: - case type::out_file_cmp: - case type::out_file_ovr: - case type::out_file_app: + // Start asynchronous execution of inner scopes keeping track of + // how many we have handled. + // + for (unique_ptr& chain: g->scopes) { - if ((fd = fd == 3 ? 1 : fd) == 0) - fail (l) << "invalid out redirect file descriptor " << fd; - - break; - } - } - - mod = move (t.value); - - redirect_type rt (redirect_type::none); - switch (tt) - { - case type::in_pass: - case type::out_pass: rt = redirect_type::pass; break; - - case type::in_null: - case type::out_null: rt = redirect_type::null; break; + // Check if this scope is ignored (e.g., via config.test). + // + if (!runner_->test (*chain) || !exec_scope) + { + chain = nullptr; + continue; + } - case type::out_trace: rt = redirect_type::trace; break; + // Pick a scope from the if-else chain. + // + // In fact, we are going to drop all but the selected (if any) + // scope. This way we can re-examine the scope states later. It + // will also free some memory. + // + unique_ptr* ps; + for (ps = &chain; *ps != nullptr; ps = &ps->get ()->if_chain) + { + scope& s (**ps); - case type::out_merge: rt = redirect_type::merge; break; + if (!s.if_cond_) // Unconditional. + { + assert (s.if_chain == nullptr); + break; + } - case type::in_str: - case type::out_str: - { - bool re (mod.find ('~') != string::npos); - assert (tt == type::out_str || !re); + line l (move (*s.if_cond_)); + line_type lt (l.type); - rt = re - ? redirect_type::here_str_regex - : redirect_type::here_str_literal; + replay_data (move (l.tokens)); - break; - } + token t; + type tt; - case type::in_doc: - case type::out_doc: - { - bool re (mod.find ('~') != string::npos); - assert (tt == type::out_doc || !re); + next (t, tt); + const location ll (get_location (t)); + next (t, tt); // Skip to start of command. - rt = re - ? redirect_type::here_doc_regex - : redirect_type::here_doc_literal; - - break; - } - - case type::in_file: - case type::out_file_cmp: - case type::out_file_ovr: - case type::out_file_app: rt = redirect_type::file; break; - } - - redirect& r (fd == 0 ? c.in : fd == 1 ? c.out : c.err); - redirect_type overriden (r.type); - - r = redirect (rt); - - // Don't move as still may be used for pending here-document end - // marker processing. - // - r.modifiers = mod; - - switch (rt) - { - case redirect_type::none: - case redirect_type::pass: - case redirect_type::null: - case redirect_type::trace: - break; - case redirect_type::merge: - switch (fd) - { - case 0: assert (false); break; - case 1: p = pending::out_merge; break; - case 2: p = pending::err_merge; break; - } - break; - case redirect_type::here_str_literal: - switch (fd) - { - case 0: p = pending::in_string; break; - case 1: p = pending::out_string; break; - case 2: p = pending::err_string; break; - } - break; - case redirect_type::here_str_regex: - switch (fd) - { - case 0: assert (false); break; - case 1: p = pending::out_str_regex; break; - case 2: p = pending::err_str_regex; break; - } - break; - case redirect_type::here_doc_literal: - switch (fd) - { - case 0: p = pending::in_document; break; - case 1: p = pending::out_document; break; - case 2: p = pending::err_document; break; - } - break; - case redirect_type::here_doc_regex: - switch (fd) - { - case 0: assert (false); break; - case 1: p = pending::out_doc_regex; break; - case 2: p = pending::err_doc_regex; break; - } - break; - case redirect_type::file: - switch (fd) - { - case 0: p = pending::in_file; break; - case 1: p = pending::out_file; break; - case 2: p = pending::err_file; break; - } - - // Also sets for stdin, but this is harmless. - // - r.file.mode = tt == type::out_file_ovr - ? redirect_fmode::overwrite - : (tt == type::out_file_app - ? redirect_fmode::append - : redirect_fmode::compare); - - break; - - case redirect_type::here_doc_ref: assert (false); break; - } - - // If we are overriding a here-document, then remove the reference - // to this command redirect from the corresponding here_doc object. - // - if (!pre_parse_ && - (overriden == redirect_type::here_doc_literal || - overriden == redirect_type::here_doc_regex)) - { - size_t e (expr.size () - 1); - size_t p (expr.back ().pipe.size ()); - int f (static_cast (fd)); - - for (here_doc& d: hd) - { - small_vector& rs (d.redirects); - - auto i (find_if (rs.begin (), rs.end (), - [e, p, f] (const here_redirect& r) - { - return r.expr == e && - r.pipe == p && - r.fd == f; - })); - - if (i != rs.end ()) - { - rs.erase (i); - break; - } - } - } - }; - - // Set pending cleanup type. - // - auto parse_clean = [&p, &mod] (token& t) - { - p = pending::clean; - mod = move (t.value); - }; - - const location ll (get_location (t)); // Line location. - - // Keep parsing chunks of the command line until we see one of the - // "terminators" (newline, semicolon, exit status comparison, etc). - // - location l (ll); - names ns; // Reuse to reduce allocations. - - for (bool done (false); !done; l = get_location (t)) - { - switch (tt) - { - case type::semi: - case type::colon: - case type::newline: - { - done = true; - break; - } - - case type::equal: - case type::not_equal: - { - if (!pre_parse_) - check_pending (l); - - c.exit = parse_command_exit (t, tt); - - // Only a limited set of things can appear after the exit status - // so we check this here. - // - switch (tt) - { - case type::semi: - case type::colon: - case type::newline: - - case type::pipe: - case type::log_or: - case type::log_and: - break; - default: - fail (t) << "unexpected " << t << " after command exit status"; - } - - break; - } - - case type::pipe: - case type::log_or: - case type::log_and: - - case type::in_pass: - case type::out_pass: - - case type::in_null: - case type::out_null: - - case type::out_trace: - - case type::out_merge: - - case type::in_str: - case type::in_doc: - case type::out_str: - case type::out_doc: - - case type::in_file: - case type::out_file_cmp: - case type::out_file_ovr: - case type::out_file_app: - - case type::clean: - { - if (pre_parse_) - { - // The only things we need to handle here are the here-document - // and here-document regex end markers since we need to know - // how many of them to pre-parse after the command. - // - switch (tt) - { - case type::in_doc: - case type::out_doc: - mod = move (t.value); - - bool re (mod.find ('~') != string::npos); - const char* what (re - ? "here-document regex end marker" - : "here-document end marker"); - - // We require the end marker to be a literal, unquoted word. - // In particularm, we don't allow quoted because of cases - // like foo"$bar" (where we will see word 'foo'). - // - next (t, tt); - - // We require the end marker to be an unquoted or completely - // quoted word. The complete quoting becomes important for - // cases like foo"$bar" (where we will see word 'foo'). - // - // For good measure we could have also required it to be - // separated from the following token, but out grammar - // allows one to write >>EOO;. The problematic sequence - // would be >>FOO$bar -- on reparse it will be expanded - // as a single word. - // - if (tt != type::word || t.value.empty ()) - fail (t) << "expected " << what; - - peek (); - const token& p (peeked ()); - if (!p.separated) - { - switch (p.type) - { - case type::dollar: - case type::lparen: - fail (p) << what << " must be literal"; - } - } - - quote_type qt (t.qtype); - switch (qt) - { - case quote_type::unquoted: - qt = quote_type::single; // Treat as single-quoted. - break; - case quote_type::single: - case quote_type::double_: - if (t.qcomp) - break; - // Fall through. - case quote_type::mixed: - fail (t) << "partially-quoted " << what; - } - - regex_parts r; - string end (move (t.value)); - - if (re) - { - check_regex_mod (mod, end, l, what); - - r = parse_regex (end, l, what); - end = move (r.value); // The "cleared" end marker. - } - - bool literal (qt == quote_type::single); - bool shared (false); - - for (const auto& d: hd) - { - if (d.end == end) - { - auto check = [&t, &end, &re, this] (bool c, - const char* what) - { - if (!c) - fail (t) << "different " << what - << " for shared here-document " - << (re ? "regex '" : "'") << end << "'"; - }; - - check (d.modifiers == mod, "modifiers"); - check (d.literal == literal, "quoting"); - - if (re) - { - check (d.regex == r.intro, "introducers"); - check (d.regex_flags == r.flags, "global flags"); - } - - shared = true; - break; - } - } - - if (!shared) - hd.push_back ( - here_doc { - {}, - move (end), - literal, - move (mod), - r.intro, move (r.flags)}); - - break; - } - - next (t, tt); - break; - } - - // If this is one of the operators/separators, check that we - // don't have any pending locations to be filled. - // - check_pending (l); - - // Note: there is another one in the inner loop below. - // - switch (tt) - { - case type::pipe: - case type::log_or: - case type::log_and: - { - // Check that the previous command makes sense. - // - check_command (l, tt != type::pipe); - expr.back ().pipe.push_back (move (c)); - - c = command (); - p = pending::program; - - if (tt != type::pipe) - { - expr_operator o (tt == type::log_or - ? expr_operator::log_or - : expr_operator::log_and); - expr.push_back ({o, command_pipe ()}); - } - - break; - } - - case type::in_pass: - case type::out_pass: - - case type::in_null: - case type::out_null: - - case type::out_trace: - - case type::out_merge: - - case type::in_str: - case type::in_doc: - case type::out_str: - case type::out_doc: - - case type::in_file: - case type::out_file_cmp: - case type::out_file_ovr: - case type::out_file_app: - { - parse_redirect (t, l); - break; - } - - case type::clean: - { - parse_clean (t); - break; - } - - default: assert (false); break; - } - - next (t, tt); - break; - } - default: - { - // Here-document end markers are literal (we verified that above - // during pre-parsing) and we need to know whether they were - // quoted. So handle this case specially. - // - { - int fd; - switch (p) - { - case pending::in_document: fd = 0; break; - case pending::out_document: - case pending::out_doc_regex: fd = 1; break; - case pending::err_document: - case pending::err_doc_regex: fd = 2; break; - default: fd = -1; break; - } - - if (fd != -1) - { - here_redirect rd { - expr.size () - 1, expr.back ().pipe.size (), fd}; - - string end (move (t.value)); - - regex_parts r; - - if (p == pending::out_doc_regex || - p == pending::err_doc_regex) - { - // We can't fail here as we already parsed all the end - // markers during pre-parsing stage, and so no need in the - // description. - // - r = parse_regex (end, l, ""); - end = move (r.value); // The "cleared" end marker. - } - - bool shared (false); - for (auto& d: hd) - { - // No need to check that redirects that share here-document - // have the same modifiers, etc. That have been done during - // pre-parsing. - // - if (d.end == end) - { - d.redirects.emplace_back (rd); - shared = true; - break; - } - } - - if (!shared) - hd.push_back ( - here_doc { - {rd}, - move (end), - (t.qtype == quote_type::unquoted || - t.qtype == quote_type::single), - move (mod), - r.intro, move (r.flags)}); - - p = pending::none; - mod.clear (); - - next (t, tt); - break; - } - } - - // Parse the next chunk as simple names to get expansion, etc. - // Note that we do it in the chunking mode to detect whether - // anything in each chunk is quoted. - // - // @@ PAT: should we support pattern expansion? This is even - // fuzzier than the variable case above. Though this is the - // shell semantics. Think what happens when we do rm *.txt? - // - reset_quoted (t); - parse_names (t, tt, - ns, - pattern_mode::ignore, - true, - "command line", - nullptr); - - if (pre_parse_) // Nothing else to do if we are pre-parsing. - break; - - // Process what we got. Determine whether anything inside was - // quoted (note that the current token is "next" and is not part - // of this). - // - bool q ((quoted () - - (t.qtype != quote_type::unquoted ? 1 : 0)) != 0); - - for (name& n: ns) - { - string s; - - try - { - s = value_traits::convert (move (n), nullptr); - } - catch (const invalid_argument&) - { - diag_record dr (fail (l)); - dr << "invalid string value "; - to_stream (dr.os, n, true); // Quote. - } - - // If it is a quoted chunk, then we add the word as is. - // Otherwise we re-lex it. But if the word doesn't contain any - // interesting characters (operators plus quotes/escapes), - // then no need to re-lex. - // - // NOTE: update quoting (script.cxx:to_stream_q()) if adding - // any new characters. - // - if (q || s.find_first_of ("|&<>\'\"\\") == string::npos) - add_word (move (s), l); - else - { - // If the chunk re-parsing results in error, our diagnostics - // will look like this: - // - // :1:4: error: stdout merge redirect file descriptor must be 2 - // testscript:2:5: info: while parsing string '1>&a' - // - auto df = make_diag_frame ( - [s, &l](const diag_record& dr) - { - dr << info (l) << "while parsing string '" << s << "'"; - }); - - // When re-lexing we do "effective escaping" and only for - // ['"\] (quotes plus the backslash itself). In particular, - // there is no way to escape redirects, operators, etc. The - // idea is to prefer quoting except for passing literal - // quotes, for example: - // - // args = \"&foo\" - // cmd $args # cmd &foo - // - // args = 'x=\"foo bar\"' - // cmd $args # cmd x="foo bar" - // - - istringstream is (s); - path_name in (""); - lexer lex (is, in, - lexer_mode::command_expansion, - "\'\"\\"); - - // Treat the first "sub-token" as always separated from what - // we saw earlier. - // - // Note that this is not "our" token so we cannot do - // fail(t). Rather we should do fail(l). - // - token t (lex.next ()); - location l (build2::get_location (t, in)); - t.separated = true; - - string w; - bool f (t.type == type::eos); // If the whole thing is empty. - - for (; t.type != type::eos; t = lex.next ()) - { - type tt (t.type); - l = build2::get_location (t, in); - - // Re-lexing double-quotes will recognize $, ( inside as - // tokens so we have to reverse them back. Since we don't - // treat spaces as separators we can be sure we will get - // it right. - // - switch (tt) - { - case type::dollar: w += '$'; continue; - case type::lparen: w += '('; continue; - } - - // Retire the current word. We need to distinguish between - // empty and non-existent (e.g., > vs >""). - // - if (!w.empty () || f) - { - add_word (move (w), l); - f = false; - } - - if (tt == type::word) - { - w = move (t.value); - f = true; - continue; - } - - // If this is one of the operators/separators, check that - // we don't have any pending locations to be filled. - // - check_pending (l); - - // Note: there is another one in the outer loop above. - // - switch (tt) - { - case type::pipe: - case type::log_or: - case type::log_and: - { - // Check that the previous command makes sense. - // - check_command (l, tt != type::pipe); - expr.back ().pipe.push_back (move (c)); - - c = command (); - p = pending::program; - - if (tt != type::pipe) - { - expr_operator o (tt == type::log_or - ? expr_operator::log_or - : expr_operator::log_and); - expr.push_back ({o, command_pipe ()}); - } - - break; - } - - case type::in_pass: - case type::out_pass: - - case type::in_null: - case type::out_null: - - case type::out_trace: - - case type::out_merge: - - case type::in_str: - case type::out_str: - - case type::in_file: - case type::out_file_cmp: - case type::out_file_ovr: - case type::out_file_app: - { - parse_redirect (t, l); - break; - } - - case type::clean: - { - parse_clean (t); - break; - } - - case type::in_doc: - case type::out_doc: - { - fail (l) << "here-document redirect in expansion"; - break; - } - } - } - - // Don't forget the last word. - // - if (!w.empty () || f) - add_word (move (w), l); - } - } - - ns.clear (); - break; - } - } - } - - if (!pre_parse_) - { - // Verify we don't have anything pending to be filled and the - // command makes sense. - // - check_pending (l); - check_command (l, true); - - expr.back ().pipe.push_back (move (c)); - } - - return make_pair (move (expr), move (hd)); - } - - command_exit parser:: - parse_command_exit (token& t, type& tt) - { - // enter: equal/not_equal - // leave: token after exit status (one parse_names() chunk) - - exit_comparison comp (tt == type::equal - ? exit_comparison::eq - : exit_comparison::ne); - - // The next chunk should be the exit status. - // - next (t, tt); - location l (get_location (t)); - names ns (parse_names (t, tt, - pattern_mode::ignore, - true, - "exit status", - nullptr)); - unsigned long es (256); - - if (!pre_parse_) - { - try - { - if (ns.size () == 1 && ns[0].simple () && !ns[0].empty ()) - es = stoul (ns[0].value); - } - catch (const exception&) {} // Fall through. - - if (es > 255) - { - diag_record dr; - - dr << fail (l) << "expected exit status instead of "; - to_stream (dr.os, ns, true); // Quote. - - dr << info << "exit status is an unsigned integer less than 256"; - } - } - - return command_exit {comp, static_cast (es)}; - } - - void parser:: - parse_here_documents (token& t, type& tt, - pair& p) - { - // enter: newline - // leave: newline - - // Parse here-document fragments in the order they were mentioned on - // the command line. - // - for (here_doc& h: p.second) - { - // Switch to the here-line mode which is like single/double-quoted - // string but recognized the newline as a separator. - // - mode (h.literal - ? lexer_mode::here_line_single - : lexer_mode::here_line_double); - next (t, tt); - - parsed_doc v ( - parse_here_document (t, tt, h.end, h.modifiers, h.regex)); - - // If all the here-document redirects are overridden, then we just - // drop the fragment. - // - if (!pre_parse_ && !h.redirects.empty ()) - { - auto i (h.redirects.cbegin ()); - - command& c (p.first[i->expr].pipe[i->pipe]); - redirect& r (i->fd == 0 ? c.in : i->fd == 1 ? c.out : c.err); - - if (v.re) - { - assert (r.type == redirect_type::here_doc_regex); - - r.regex = move (v.regex); - r.regex.flags = move (h.regex_flags); - } - else - { - assert (r.type == redirect_type::here_doc_literal); - - r.str = move (v.str); - } - - r.end = move (h.end); - r.end_line = v.end_line; - r.end_column = v.end_column; - - // Note that our references cannot be invalidated because the - // command_expr/command-pipe vectors already contain all their - // elements. - // - for (++i; i != h.redirects.cend (); ++i) - { - command& c (p.first[i->expr].pipe[i->pipe]); - - (i->fd == 0 ? c.in : i->fd == 1 ? c.out : c.err) = - redirect (redirect_type::here_doc_ref, r); - } - } - - expire_mode (); - } - } - - parser::parsed_doc parser:: - parse_here_document (token& t, type& tt, - const string& em, - const string& mod, - char re) - { - // enter: first token on first line - // leave: newline (after end marker) - - // String literal. Note that when decide if to terminate the previously - // added line with a newline, we need to distinguish a yet empty result - // and the one that has a single blank line added. - // - optional rs; - - regex_lines rre; - - // Here-documents can be indented. The leading whitespaces of the end - // marker line (called strip prefix) determine the indentation. Every - // other line in the here-document should start with this prefix which - // is automatically stripped. The only exception is a blank line. - // - // The fact that the strip prefix is only known at the end, after - // seeing all the lines, is rather inconvenient. As a result, the way - // we implement this is a bit hackish (though there is also something - // elegant about it): at the end of the pre-parse stage we are going - // re-examine the sequence of tokens that comprise this here-document - // and "fix up" the first token of each line by stripping the prefix. - // - string sp; - - // Remember the position of the first token in this here-document. - // - size_t ri (pre_parse_ ? replay_data_.size () - 1 : 0); - - // We will use the location of the first token on the line for the - // regex diagnostics. At the end of the loop it will point to the - // beginning of the end marker. - // - location l; - - while (tt != type::eos) - { - l = get_location (t); - - // Check if this is the end marker. For starters, it should be a - // single, unquoted word followed by a newline. - // - if (tt == type::word && - t.qtype == quote_type::unquoted && - peek () == type::newline) - { - const string& v (t.value); - - size_t vn (v.size ()); - size_t en (em.size ()); - - // Then check that it ends with the end marker. - // - if (vn >= en && v.compare (vn - en, en, em) == 0) - { - // Now check that the prefix only contains whitespaces. - // - size_t n (vn - en); - - if (v.find_first_not_of (" \t") >= n) - { - assert (pre_parse_ || n == 0); // Should have been stripped. - - if (n != 0) - sp.assign (v, 0, n); // Save the strip prefix. - - next (t, tt); // Get the newline. - break; - } - } - } - - // Expand the line (can be blank). - // - // @@ PAT: one could argue that if we do it in variables, then we - // should do it here as well. Though feels bizarre. - // - names ns (tt != type::newline - ? parse_names (t, tt, - pattern_mode::ignore, - false, - "here-document line", - nullptr) - : names ()); - - if (!pre_parse_) - { - // What shall we do if the expansion results in multiple names? - // For, example if the line contains just the variable expansion - // and it is of type strings. Adding all the elements space- - // separated seems like the natural thing to do. - // - string s; - for (auto b (ns.begin ()), i (b); i != ns.end (); ++i) - { - string n; - - try - { - n = value_traits::convert (move (*i), nullptr); - } - catch (const invalid_argument&) - { - fail (l) << "invalid string value '" << *i << "'"; - } - - if (i == b) - s = move (n); - else - { - s += ' '; - s += n; - } - } - - if (!re) - { - // Add newline after previous line. - // - if (rs) - { - *rs += '\n'; - *rs += s; - } - else - rs = move (s); - } - else - { - // Due to expansion we can end up with multiple lines. If empty - // then will add a blank textual literal. - // - for (size_t p (0); p != string::npos; ) - { - string ln; - size_t np (s.find ('\n', p)); - - if (np != string::npos) - { - ln = string (s, p, np - p); - p = np + 1; - } - else - { - ln = string (s, p); - p = np; - } - - if (ln[0] != re) // Line doesn't start with regex introducer. - { - // This is a line-char literal (covers blank lines as well). - // - // Append textual literal. - // - rre.lines.emplace_back (l.line, l.column, move (ln), false); - } - else // Line starts with the regex introducer. - { - // This is a char-regex, or a sequence of line-regex syntax - // characters or both (in this specific order). So we will - // add regex (with optional special characters) or special - // literal. - // - size_t p (ln.find (re, 1)); - if (p == string::npos) - { - // No regex, just a sequence of syntax characters. - // - string spec (ln, 1); - if (spec.empty ()) - fail (l) << "no syntax line characters"; - - // Append special literal. - // - rre.lines.emplace_back ( - l.line, l.column, move (spec), true); - } - else - { - // Regex (probably with syntax characters). - // - regex_parts re; - - // Empty regex is a special case repesenting a blank line. - // - if (p == 1) - // Position to optional specal characters of an empty - // regex. - // - ++p; - else - // Can't fail as all the pre-conditions verified - // (non-empty with both introducers in place), so no - // description required. - // - re = parse_regex (ln, l, "", &p); - - // Append regex with optional special characters. - // - rre.lines.emplace_back (l.line, l.column, - move (re.value), move (re.flags), - string (ln, p)); - } - } - } - } - } - - // We should expand the whole line at once so this would normally be - // a newline but can also be an end-of-stream. - // - if (tt == type::newline) - next (t, tt); - else - assert (tt == type::eos); - } - - if (tt == type::eos) - fail (t) << "missing here-document end marker '" << em << "'"; - - if (pre_parse_) - { - // Strip the indentation prefix if there is one. - // - assert (replay_ == replay::save); - - if (!sp.empty ()) - { - size_t sn (sp.size ()); - - for (; ri != replay_data_.size (); ++ri) - { - token& rt (replay_data_[ri].token); - - if (rt.type == type::newline) // Blank - continue; - - if (rt.type != type::word || rt.value.compare (0, sn, sp) != 0) - fail (rt) << "unindented here-document line"; - - // If the word is equal to the strip prefix then we have to drop - // the token. Note that simply making it an empty word won't - // have the same semantics. For instance, it would trigger - // concatenated expansion. - // - if (rt.value.size () == sn) - replay_data_.erase (replay_data_.begin () + ri); - else - { - rt.value.erase (0, sn); - rt.column += sn; - ++ri; - } - - // Skip until next newline. - // - for (; replay_data_[ri].token.type != type::newline; ++ri) ; - } - } - } - else - { - // Add final newline unless suppressed. - // - if (mod.find (':') == string::npos) - { - if (re) - // Note that the position is synthetic, but that's ok as we don't - // expect any diagnostics to refer this line. - // - rre.lines.emplace_back (l.line, l.column, string (), false); - else if (rs) - *rs += '\n'; - else - rs = "\n"; - } - - // Finalize regex lines. - // - if (re) - { - // Empty regex matches nothing, so not of much use. - // - if (rre.lines.empty ()) - fail (l) << "empty here-document regex"; - - rre.intro = re; - } - } - - return re - ? parsed_doc (move (rre), l.line, l.column) - : parsed_doc (rs ? move (*rs) : string (), l.line, l.column); - } - - // - // Execute. - // - - void parser:: - execute (script& s, runner& r) - { - assert (s.state == scope_state::unknown); - - auto g ( - make_exception_guard ( - [&s] () {s.state = scope_state::failed;})); - - if (!s.empty ()) - execute (s, s, r); - else - s.state = scope_state::passed; - } - - void parser:: - execute (scope& sc, script& s, runner& r) - { - path_ = nullptr; // Set by replays. - - pre_parse_ = false; - - set_lexer (nullptr); - - script_ = &s; - runner_ = &r; - group_ = nullptr; - id_map_ = nullptr; - include_set_ = nullptr; - scope_ = ≻ - - //@@ PAT TODO: set pbase_? - - exec_scope_body (); - } - - static void - execute_impl (scope& s, script& scr, runner& r) - { - try - { - parser p (scr.test_target.ctx); - p.execute (s, scr, r); - } - catch (const failed&) - { - s.state = scope_state::failed; - } - } - - void parser:: - exec_scope_body () - { - size_t li (0); - - runner_->enter (*scope_, scope_->start_loc_); - - if (test* t = dynamic_cast (scope_)) - { - exec_lines ( - t->tests_.begin (), t->tests_.end (), li, command_type::test); - } - else if (group* g = dynamic_cast (scope_)) - { - bool exec_scope ( - exec_lines ( - g->setup_.begin (), g->setup_.end (), li, command_type::setup)); - - if (exec_scope) - { - atomic_count task_count (0); - wait_guard wg (g->root.test_target.ctx, task_count); - - // Start asynchronous execution of inner scopes keeping track of - // how many we have handled. - // - for (unique_ptr& chain: g->scopes) - { - // Check if this scope is ignored (e.g., via config.test). - // - if (!runner_->test (*chain) || !exec_scope) - { - chain = nullptr; - continue; - } - - // Pick a scope from the if-else chain. - // - // In fact, we are going to drop all but the selected (if any) - // scope. This way we can re-examine the scope states later. It - // will also free some memory. - // - unique_ptr* ps; - for (ps = &chain; *ps != nullptr; ps = &ps->get ()->if_chain) - { - scope& s (**ps); - - if (!s.if_cond_) // Unconditional. - { - assert (s.if_chain == nullptr); - break; - } - - line l (move (*s.if_cond_)); - line_type lt (l.type); - - replay_data (move (l.tokens)); - - token t; - type tt; - - next (t, tt); - const location ll (get_location (t)); - next (t, tt); // Skip to start of command. - - bool take; - if (lt != line_type::cmd_else) - { - // Note: the line index count continues from setup. - // - command_expr ce (parse_command_line (t, tt)); + bool take; + if (lt != line_type::cmd_else) + { + // Note: the line index count continues from setup. + // + command_expr ce (parse_command_line (t, tt)); try { - take = runner_->run_if (*scope_, ce, ++li, ll); + take = runner_->run_if (*scope_, ce, li++, ll); } catch (const exit_scope& e) { @@ -3106,8 +1580,11 @@ namespace build2 } } - exec_lines ( - g->tdown_.begin (), g->tdown_.end (), li, command_type::teardown); + ct = command_type::teardown; + + exec_lines (g->tdown_.begin (), g->tdown_.end (), + exec_set, exec_cmd, exec_if, + li); } else assert (false); @@ -3117,239 +1594,23 @@ namespace build2 scope_->state = scope_state::passed; } - bool parser:: - exec_lines (lines::iterator i, lines::iterator e, - size_t& li, - command_type ct) - { - try - { - token t; - type tt; - - for (; i != e; ++i) - { - line& ln (*i); - line_type lt (ln.type); - - assert (path_ == nullptr); - - // Set the tokens and start playing. - // - replay_data (move (ln.tokens)); - - // We don't really need to change the mode since we already know - // the line type. - // - next (t, tt); - const location ll (get_location (t)); - - switch (lt) - { - case line_type::var: - { - // Parse. - // - string name (move (t.value)); - - next (t, tt); - type kind (tt); // Assignment kind. - - value rhs (parse_variable_line (t, tt)); - - if (tt == type::semi) - next (t, tt); - - assert (tt == type::newline); - - // Assign. - // - const variable& var (*ln.var); - - value& lhs (kind == type::assign - ? scope_->assign (var) - : scope_->append (var)); - - build2::parser::apply_value_attributes ( - &var, lhs, move (rhs), kind); - - // If we changes any of the test.* values, then reset the $*, - // $N special aliases. - // - if (var.name == script_->test_var.name || - var.name == script_->options_var.name || - var.name == script_->arguments_var.name || - var.name == script_->redirects_var.name || - var.name == script_->cleanups_var.name) - { - scope_->reset_special (); - } - - replay_stop (); - break; - } - case line_type::cmd: - { - // We use the 0 index to signal that this is the only command. - // Note that we only do this for test commands. - // - if (ct == command_type::test && li == 0) - { - lines::iterator j (i); - for (++j; j != e && j->type == line_type::var; ++j) ; - - if (j != e) // We have another command. - ++li; - } - else - ++li; - - command_expr ce (parse_command_line (t, tt)); - runner_->run (*scope_, ce, ct, li, ll); - - replay_stop (); - break; - } - case line_type::cmd_if: - case line_type::cmd_ifn: - case line_type::cmd_elif: - case line_type::cmd_elifn: - case line_type::cmd_else: - { - next (t, tt); // Skip to start of command. - - bool take; - if (lt != line_type::cmd_else) - { - // Assume if-else always involves multiple commands. - // - command_expr ce (parse_command_line (t, tt)); - take = runner_->run_if (*scope_, ce, ++li, ll); - - if (lt == line_type::cmd_ifn || lt == line_type::cmd_elifn) - take = !take; - } - else - { - assert (tt == type::newline); - take = true; - } - - replay_stop (); - - // If end is true, then find the 'end' line. Otherwise, find - // the next if-else line. If skip is true then increment the - // command line index. - // - auto next = [e, &li] - (lines::iterator j, bool end, bool skip) -> lines::iterator - { - // We need to be aware of nested if-else chains. - // - size_t n (0); - - for (++j; j != e; ++j) - { - line_type lt (j->type); - - if (lt == line_type::cmd_if || - lt == line_type::cmd_ifn) - ++n; - - // If we are nested then we just wait until we get back - // to the surface. - // - if (n == 0) - { - switch (lt) - { - case line_type::cmd_elif: - case line_type::cmd_elifn: - case line_type::cmd_else: - if (end) break; - // Fall through. - case line_type::cmd_end: return j; - default: break; - } - } - - if (lt == line_type::cmd_end) - --n; - - if (skip) - { - // Note that we don't count else and end as commands. - // - switch (lt) - { - case line_type::cmd: - case line_type::cmd_if: - case line_type::cmd_ifn: - case line_type::cmd_elif: - case line_type::cmd_elifn: ++li; break; - default: break; - } - } - } - - assert (false); // Missing end. - return e; - }; - - // If we are taking this branch then we need to parse all the - // lines until the next if-else line and then skip all the - // lines until the end (unless next is already end). - // - // Otherwise, we need to skip all the lines until the next - // if-else line and then continue parsing. - // - if (take) - { - lines::iterator j (next (i, false, false)); // Next if-else. - if (!exec_lines (i + 1, j, li, ct)) - return false; - - i = j->type == line_type::cmd_end ? j : next (j, true, true); - } - else - { - i = next (i, false, true); - if (i->type != line_type::cmd_end) - --i; // Continue with this line (e.g., elif or else). - } - - break; - } - case line_type::cmd_end: - { - assert (false); - } - } - } - - return true; - } - catch (const exit_scope& e) - { - // Bail out if the scope is exited with the failure status. Otherwise - // leave the scope normally. - // - if (!e.status) - throw failed (); - - replay_stop (); - return false; - } - } - // // The rest. // + // When add a special variable don't forget to update lexer::word(). + // + bool parser:: + special_variable (const string& n) noexcept + { + return n == "*" || n == "~" || n == "@" || digit (n); + } + lookup parser:: lookup_variable (name&& qual, string&& name, const location& loc) { - assert (!pre_parse_); + if (pre_parse_) + return lookup (); if (!qual.empty ()) fail (loc) << "qualified variable name"; @@ -3381,40 +1642,6 @@ namespace build2 : script_->lookup_in_buildfile (name); } - size_t parser:: - quoted () const - { - size_t r (0); - - if (replay_ != replay::play) - r = lexer_->quoted (); - else - { - // Examine tokens we have replayed since last reset. - // - for (size_t i (replay_quoted_); i != replay_i_; ++i) - if (replay_data_[i].token.qtype != quote_type::unquoted) - ++r; - } - - return r; - } - - void parser:: - reset_quoted (token& cur) - { - if (replay_ != replay::play) - lexer_->reset_quoted (cur.qtype != quote_type::unquoted ? 1 : 0); - else - { - replay_quoted_ = replay_i_ - 1; - - // Must be the same token. - // - assert (replay_data_[replay_quoted_].token.qtype == cur.qtype); - } - } - const string& parser:: insert_id (string id, location l) { @@ -3426,76 +1653,6 @@ namespace build2 return p.first->first; } - - void parser:: - set_lexer (lexer* l) - { - lexer_ = l; - base_parser::lexer_ = l; - } - - void parser:: - apply_value_attributes (const variable* var, - value& lhs, - value&& rhs, - const string& attributes, - token_type kind, - const path_name& name) - { - path_ = &name; - - istringstream is (attributes); - lexer l (is, name, lexer_mode::attributes); - set_lexer (&l); - - token t; - type tt; - - next_with_attributes (t, tt); // Enable `[` recognition. - - if (tt != type::lsbrace && tt != type::eos) - fail (t) << "expected '[' instead of " << t; - - attributes_push (t, tt, true); - - if (tt != type::eos) - fail (t) << "trailing junk after ']'"; - - build2::parser::apply_value_attributes (var, lhs, move (rhs), kind); - } - - // parser::parsed_doc - // - parser::parsed_doc:: - parsed_doc (string s, uint64_t l, uint64_t c) - : str (move (s)), re (false), end_line (l), end_column (c) - { - } - - parser::parsed_doc:: - parsed_doc (regex_lines&& r, uint64_t l, uint64_t c) - : regex (move (r)), re (true), end_line (l), end_column (c) - { - } - - parser::parsed_doc:: - parsed_doc (parsed_doc&& d) - : re (d.re), end_line (d.end_line), end_column (d.end_column) - { - if (re) - new (®ex) regex_lines (move (d.regex)); - else - new (&str) string (move (d.str)); - } - - parser::parsed_doc:: - ~parsed_doc () - { - if (re) - regex.~regex_lines (); - else - str.~string (); - } } } } diff --git a/libbuild2/test/script/parser.hxx b/libbuild2/test/script/parser.hxx index ed3c926..aa64943 100644 --- a/libbuild2/test/script/parser.hxx +++ b/libbuild2/test/script/parser.hxx @@ -8,9 +8,10 @@ #include #include -#include #include +#include + #include #include @@ -20,15 +21,14 @@ namespace build2 { namespace script { - class lexer; class runner; - class parser: protected build2::parser + class parser: public build2::script::parser { // Pre-parse. Issue diagnostics and throw failed in case of an error. // public: - parser (context& c): build2::parser (c) {} + parser (context& c): build2::script::parser (c) {} void pre_parse (script&); @@ -36,19 +36,6 @@ namespace build2 void pre_parse (istream&, script&); - // Helpers. - // - // Parse attribute string and perform attribute-guided assignment. - // Issue diagnostics and throw failed in case of an error. - // - void - apply_value_attributes (const variable*, // Optional. - value& lhs, - value&& rhs, - const string& attributes, - token_type assign_kind, - const path_name&); // For diagnostics. - // Recursive descent parser. // // Usually (but not always) parse functions receive the token/type @@ -101,83 +88,14 @@ namespace build2 description parse_trailing_description (token&, token_type&); - value - parse_variable_line (token&, token_type&); - command_expr parse_command_line (token&, token_type&); - // Ordered sequence of here-document redirects that we can expect to - // see after the command line. - // - struct here_redirect - { - size_t expr; // Index in command_expr. - size_t pipe; // Index in command_pipe. - int fd; // Redirect fd (0 - in, 1 - out, 2 - err). - }; - - struct here_doc - { - // Redirects that share here_doc. Most of the time we will have no - // more than 2 (2 - for the roundtrip test cases). Doesn't refer - // overridden redirects and thus can be empty. - // - small_vector redirects; - - string end; - bool literal; // Literal (single-quote). - string modifiers; - - // Regex introducer ('\0' if not a regex, so can be used as bool). - // - char regex; - - // Regex global flags. Meaningful if regex != '\0'. - // - string regex_flags; - }; - using here_docs = vector; - - pair - parse_command_expr (token&, token_type&); - - command_exit - parse_command_exit (token&, token_type&); - - void - parse_here_documents (token&, token_type&, - pair&); - - struct parsed_doc - { - union - { - string str; // Here-document literal. - regex_lines regex; // Here-document regex. - }; - - bool re; // True if regex. - uint64_t end_line; // Here-document end marker location. - uint64_t end_column; - - parsed_doc (string, uint64_t line, uint64_t column); - parsed_doc (regex_lines&&, uint64_t line, uint64_t column); - parsed_doc (parsed_doc&&); // Note: move constuctible-only type. - ~parsed_doc (); - }; - - parsed_doc - parse_here_document (token&, token_type&, - const string&, - const string& mode, - char re_intro); // '\0' if not a regex. - // Execute. Issue diagnostics and throw failed in case of an error. // public: void - execute (script& s, runner& r); + execute (script&, runner&); void execute (scope&, script&, runner&); @@ -186,13 +104,11 @@ namespace build2 void exec_scope_body (); - // Return false if the execution of the scope should be terminated - // with the success status (e.g., as a result of encountering the exit - // builtin). For unsuccessful termination the failed exception should - // be thrown. + // Helpers. // - bool - exec_lines (lines::iterator, lines::iterator, size_t&, command_type); + public: + static bool + special_variable (const string&) noexcept; // Customization hooks. // @@ -200,33 +116,13 @@ namespace build2 virtual lookup lookup_variable (name&&, string&&, const location&) override; - // Number of quoted tokens since last reset. Note that this includes - // the peeked token, if any. - // - protected: - size_t - quoted () const; - - void - reset_quoted (token& current); - - size_t replay_quoted_; - // Insert id into the id map checking for duplicates. // protected: const string& insert_id (string, location); - // Set lexer pointers for both the current and the base classes. - // - protected: - void - set_lexer (lexer* l); - protected: - using base_parser = build2::parser; - script* script_; // Pre-parse state. @@ -238,7 +134,7 @@ namespace build2 id_map* id_map_; include_set* include_set_; // Testscripts already included in this // scope. Must be absolute and normalized. - lexer* lexer_; + string id_prefix_; // Auto-derived id prefix. // Execute state. diff --git a/libbuild2/test/script/regex.cxx b/libbuild2/test/script/regex.cxx deleted file mode 100644 index 92dd8f1..0000000 --- a/libbuild2/test/script/regex.cxx +++ /dev/null @@ -1,439 +0,0 @@ -// file : libbuild2/test/script/regex.cxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#include - -#include - -using namespace std; - -namespace build2 -{ - namespace test - { - namespace script - { - namespace regex - { - static_assert (alignof (char_string) % 4 == 0, - "unexpected char_string alignment"); - - static_assert (alignof (char_regex) % 4 == 0, - "unexpected char_regex alignment"); - - static_assert (sizeof (uintptr_t) > sizeof (int16_t), - "unexpected uintptr_t size"); - - const line_char line_char::nul (0); - const line_char line_char::eof (-1); - - // line_char - // - // We package the special character into uintptr_t with the following - // steps: - // - // - narrow down int value to int16_t (preserves all the valid values) - // - // - convert to uint16_t (bitwise representation stays the same, but no - // need to bother with signed value widening, leftmost bits loss on - // left shift, etc) - // - // - convert to uintptr_t (storage type) - // - // - shift left by two bits (the operation is fully reversible as - // uintptr_t is wider then uint16_t) - // - line_char:: - line_char (int c) - : data_ ( - (static_cast ( - static_cast ( - static_cast (c))) << 2) | - static_cast (line_type::special)) - { - // @@ How can we allow anything for basic_regex but only subset - // for our own code? - // - const char ex[] = "pn\n\r"; - - assert (c == 0 || // Null character. - - // EOF. Note that is also passed by msvcrt as _Meta_eos - // enum value. - // - c == -1 || - - // libstdc++ line/paragraph separators. - // - c == u'\u2028' || c == u'\u2029' || - - (c > 0 && c <= 255 && ( - // Supported regex special characters. - // - syntax (c) || - - // libstdc++ look-ahead tokens, newline chars. - // - string::traits_type::find (ex, 4, c) != nullptr))); - } - - line_char:: - line_char (const char_string& s, line_pool& p) - : line_char (&(*p.strings.emplace (s).first)) - { - } - - line_char:: - line_char (char_string&& s, line_pool& p) - : line_char (&(*p.strings.emplace (move (s)).first)) - { - } - - line_char:: - line_char (char_regex r, line_pool& p) - // Note: in C++17 can write as p.regexes.emplace_front(move (r)) - // - : line_char (&(*p.regexes.emplace (p.regexes.begin (), move (r)))) - { - } - - bool - line_char::syntax (char c) - { - return string::traits_type::find ( - "()|.*+?{}\\0123456789,=!", 23, c) != nullptr; - } - - bool - operator== (const line_char& l, const line_char& r) - { - line_type lt (l.type ()); - line_type rt (r.type ()); - - if (lt == rt) - { - bool res (true); - - switch (lt) - { - case line_type::special: res = l.special () == r.special (); break; - case line_type::regex: assert (false); break; - - // Note that we use pointers (rather than vales) comparison - // assuming that the strings must belong to the same pool. - // - case line_type::literal: res = l.literal () == r.literal (); break; - } - - return res; - } - - // Match literal with regex. - // - if (lt == line_type::literal && rt == line_type::regex) - return regex_match (*l.literal (), *r.regex ()); - else if (rt == line_type::literal && lt == line_type::regex) - return regex_match (*r.literal (), *l.regex ()); - - return false; - } - - bool - operator< (const line_char& l, const line_char& r) - { - if (l == r) - return false; - - line_type lt (l.type ()); - line_type rt (r.type ()); - - if (lt != rt) - return lt < rt; - - bool res (false); - - switch (lt) - { - case line_type::special: res = l.special () < r.special (); break; - case line_type::literal: res = *l.literal () < *r.literal (); break; - case line_type::regex: assert (false); break; - } - - return res; - } - - // line_char_locale - // - - // An exemplar locale with the std::ctype facet. It is used - // for the subsequent line char locale objects creation (see below) - // which normally ends up with a shallow copy of a reference-counted - // object. - // - // Note that creating the line char locales from the exemplar is not - // merely an optimization: there is a data race in the libstdc++ (at - // least as of GCC 9.1) implementation of the locale(const locale&, - // Facet*) constructor (bug #91057). - // - // Also note that we install the facet in init() rather than during - // the object creation to avoid a race with the std::locale-related - // global variables initialization. - // - static locale line_char_locale_exemplar; - - void - init () - { - line_char_locale_exemplar = - locale (locale (), - new std::ctype ()); // Hidden by ctype bitmask. - } - - line_char_locale:: - line_char_locale () - : locale (line_char_locale_exemplar) - { - // Make sure init() has been called. - // - // Note: has_facet() is hidden by a private function in libc++. - // - assert (std::has_facet> (*this)); - } - - // char_regex - // - // Transform regex according to the extended flags {idot}. If regex is - // malformed then keep transforming, so the resulting string is - // malformed the same way. We expect the error to be reported by the - // char_regex ctor. - // - static string - transform (const string& s, char_flags f) - { - assert ((f & char_flags::idot) != char_flags::none); - - string r; - bool escape (false); - bool cclass (false); - - for (char c: s) - { - // Inverse escaping for a dot which is out of the char class - // brackets. - // - bool inverse (c == '.' && !cclass); - - // Handle the escape case. Note that we delay adding the backslash - // since we may have to inverse things. - // - if (escape) - { - if (!inverse) - r += '\\'; - - r += c; - escape = false; - - continue; - } - else if (c == '\\') - { - escape = true; - continue; - } - - // Keep track of being inside the char class brackets, escape if - // inversion. Note that we never inverse square brackets. - // - if (c == '[' && !cclass) - cclass = true; - else if (c == ']' && cclass) - cclass = false; - else if (inverse) - r += '\\'; - - r += c; - } - - if (escape) // Regex is malformed but that's not our problem. - r += '\\'; - - return r; - } - - static char_regex::flag_type - to_std_flags (char_flags f) - { - // Note that ECMAScript flag is implied in the absense of a grammar - // flag. - // - return (f & char_flags::icase) != char_flags::none - ? char_regex::icase - : char_regex::flag_type (); - } - - char_regex:: - char_regex (const char_string& s, char_flags f) - : base_type ((f & char_flags::idot) != char_flags::none - ? transform (s, f) - : s, - to_std_flags (f)) - { - } - } - } - } -} - -namespace std -{ - using namespace build2::test::script::regex; - - // char_traits - // - line_char* char_traits:: - assign (char_type* s, size_t n, char_type c) - { - for (size_t i (0); i != n; ++i) - s[i] = c; - return s; - } - - line_char* char_traits:: - move (char_type* d, const char_type* s, size_t n) - { - if (n > 0 && d != s) - { - // If d < s then it can't be in [s, s + n) range and so using copy() is - // safe. Otherwise d + n is out of (s, s + n] range and so using - // copy_backward() is safe. - // - if (d < s) - std::copy (s, s + n, d); // Hidden by char_traits::copy(). - else - copy_backward (s, s + n, d + n); - } - - return d; - } - - line_char* char_traits:: - copy (char_type* d, const char_type* s, size_t n) - { - std::copy (s, s + n, d); // Hidden by char_traits::copy(). - return d; - } - - int char_traits:: - compare (const char_type* s1, const char_type* s2, size_t n) - { - for (size_t i (0); i != n; ++i) - { - if (s1[i] < s2[i]) - return -1; - else if (s2[i] < s1[i]) - return 1; - } - - return 0; - } - - size_t char_traits:: - length (const char_type* s) - { - size_t i (0); - while (s[i] != char_type::nul) - ++i; - - return i; - } - - const line_char* char_traits:: - find (const char_type* s, size_t n, const char_type& c) - { - for (size_t i (0); i != n; ++i) - { - if (s[i] == c) - return s + i; - } - - return nullptr; - } - - // ctype - // - locale::id ctype::id; - - const line_char* ctype:: - is (const char_type* b, const char_type* e, mask* m) const - { - while (b != e) - { - const char_type& c (*b++); - - *m++ = c.type () == line_type::special && c.special () >= 0 && - build2::digit (static_cast (c.special ())) - ? digit - : 0; - } - - return e; - } - - const line_char* ctype:: - scan_is (mask m, const char_type* b, const char_type* e) const - { - for (; b != e; ++b) - { - if (is (m, *b)) - return b; - } - - return e; - } - - const line_char* ctype:: - scan_not (mask m, const char_type* b, const char_type* e) const - { - for (; b != e; ++b) - { - if (!is (m, *b)) - return b; - } - - return e; - } - - const char* ctype:: - widen (const char* b, const char* e, char_type* c) const - { - while (b != e) - *c++ = widen (*b++); - - return e; - } - - const line_char* ctype:: - narrow (const char_type* b, const char_type* e, char def, char* c) const - { - while (b != e) - *c++ = narrow (*b++, def); - - return e; - } - - // regex_traits - // - int regex_traits:: - value (char_type c, int radix) const - { - assert (radix == 8 || radix == 10 || radix == 16); - - if (c.type () != line_type::special) - return -1; - - const char digits[] = "0123456789ABCDEF"; - const char* d (string::traits_type::find (digits, radix, c.special ())); - return d != nullptr ? static_cast (d - digits) : -1; - } -} diff --git a/libbuild2/test/script/regex.hxx b/libbuild2/test/script/regex.hxx deleted file mode 100644 index 4114ea4..0000000 --- a/libbuild2/test/script/regex.hxx +++ /dev/null @@ -1,684 +0,0 @@ -// file : libbuild2/test/script/regex.hxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef LIBBUILD2_TEST_SCRIPT_REGEX_HXX -#define LIBBUILD2_TEST_SCRIPT_REGEX_HXX - -#include -#include -#include -#include // basic_string -#include // make_unsigned, enable_if, is_* -#include - -#include -#include - -namespace build2 -{ - namespace test - { - namespace script - { - namespace regex - { - using char_string = std::basic_string; - - enum class char_flags: uint16_t - { - icase = 0x1, // Case-insensitive match. - idot = 0x2, // Invert '.' escaping. - - none = 0 - }; - - // Restricts valid standard flags to just {icase}, extends with custom - // flags {idot}. - // - class char_regex: public std::basic_regex - { - public: - using base_type = std::basic_regex; - - char_regex (const char_string&, char_flags = char_flags::none); - }; - - // Newlines are line separators and are not part of the line: - // - // lineline - // - // Specifically, this means that a customary trailing newline creates a - // trailing blank line. - // - // All characters can inter-compare (though there cannot be regex - // characters in the output, only in line_regex). - // - // Note that we assume that line_regex and the input to regex_match() - // use the same pool. - // - struct line_pool - { - // Note that we assume the pool can be moved without invalidating - // pointers to any already pooled entities. - // - std::unordered_set strings; - std::list regexes; - }; - - enum class line_type - { - special, - literal, - regex - }; - - struct line_char - { - // Steal last two bits from the pointer to store the type. - // - private: - std::uintptr_t data_; - - public: - line_type - type () const {return static_cast (data_ & 0x3);} - - int - special () const - { - // Stored as (shifted) int16_t. Perform steps reversed to those - // that are described in the comment for the corresponding ctor. - // Note that the intermediate cast to uint16_t is required to - // portably preserve the -1 special character. - // - return static_cast (static_cast (data_ >> 2)); - } - - const char_string* - literal () const - { - // Note that 2 rightmost bits are used for packaging line_char - // type. Read the comment for the corresponding ctor for details. - // - return reinterpret_cast ( - data_ & ~std::uintptr_t (0x3)); - } - - const char_regex* - regex () const - { - // Note that 2 rightmost bits are used for packaging line_char - // type. Read the comment for the corresponding ctor for details. - // - return reinterpret_cast ( - data_ & ~std::uintptr_t (0x3)); - } - - static const line_char nul; - static const line_char eof; - - // Note: creates an uninitialized value. - // - line_char () = default; - - // Create a special character. The argument value must be one of the - // following ones: - // - // 0 (nul character) - // -1 (EOF) - // [()|.*+?{}\0123456789,=!] (excluding []) - // - // Note that the constructor is implicit to allow basic_regex to - // implicitly construct line_chars from special char literals (in - // particular libstdc++ appends them to an internal line_string). - // - // Also note that we extend the valid characters set (see above) with - // 'p', 'n' (used by libstdc++ for positive/negative look-ahead - // tokens representation), and '\n', '\r', u'\u2028', u'\u2029' (used - // by libstdc++ for newline/newparagraph matching). - // - line_char (int); - - // Create a literal character. - // - // Don't copy string if already pooled. - // - explicit - line_char (const char_string&, line_pool&); - - explicit - line_char (char_string&&, line_pool&); - - explicit - line_char (const char_string* s) // Assume already pooled. - // - // Steal two bits from the pointer to package line_char type. - // Assume (and statically assert) that char_string address is a - // multiple of four. - // - : data_ (reinterpret_cast (s) | - static_cast (line_type::literal)) {} - - // Create a regex character. - // - explicit - line_char (char_regex, line_pool&); - - explicit - line_char (const char_regex* r) // Assume already pooled. - // - // Steal two bits from the pointer to package line_char type. - // Assume (and statically assert) that char_regex address is a - // multiple of four. - // - : data_ (reinterpret_cast (r) | - static_cast (line_type::regex)) {} - - // Provide basic_regex with the ability to use line_char in a context - // where a char value is expected (e.g., as a function argument). - // - // libstdc++ seems to cast special line_chars only (and such a - // conversion is meanigfull). - // - // msvcrt casts line_chars of arbitrary types instead. The only - // reasonable strategy is to return a value that differs from any - // other that can be encountered in a regex expression and so will - // unlikelly be misinterpreted. - // - operator char () const - { - return type () == line_type::special ? special () : '\a'; // BELL. - } - - // Return true if the character is a syntax (special) one. - // - static bool - syntax (char); - - // Provide basic_regex (such as from msvcrt) with the ability to - // explicitly cast line_chars to implementation-specific numeric - // types (enums, msvcrt's _Uelem, etc). - // - template - explicit - operator T () const - { - assert (type () == line_type::special); - return static_cast (special ()); - } - }; - - // Perform "deep" characters comparison (for example match literal - // character with a regex character), rather than just compare them - // literally. At least one argument must be of a type other than regex - // as there is no operator==() defined to compare regexes. Characters - // of the literal type must share the same pool (strings are compared - // by pointers not by values). - // - bool - operator== (const line_char&, const line_char&); - - // Return false if arguments are equal (operator==() returns true). - // Otherwise if types are different return the value implying that - // special < literal < regex. If types are special or literal return - // the result of the respective characters or strings comparison. At - // least one argument must be of a type other than regex as there is no - // operator<() defined to compare regexes. - // - // While not very natural operation for the class we have, we have to - // provide some meaningfull semantics for such a comparison as it is - // required by the char_traits specialization. While we - // could provide it right in that specialization, let's keep it here - // for basic_regex implementations that potentially can compare - // line_chars as they compare them with expressions of other types (see - // below). - // - bool - operator< (const line_char&, const line_char&); - - inline bool - operator!= (const line_char& l, const line_char& r) - { - return !(l == r); - } - - inline bool - operator<= (const line_char& l, const line_char& r) - { - return l < r || l == r; - } - - // Provide basic_regex (such as from msvcrt) with the ability to - // compare line_char to a value of an integral or - // implementation-specific enum type. In the absense of the following - // template operators, such a comparisons would be ambigious for - // integral types (given that there are implicit conversions - // int->line_char and line_char->char) and impossible for enums. - // - // Note that these == and < operators can succeed only for a line_char - // of the special type. For other types they always return false. That - // in particular leads to the following case: - // - // (lc != c) != (lc < c || c < lc). - // - // Note that we can not assert line_char is of the special type as - // basic_regex (such as from libc++) may need the ability to check if - // arbitrary line_char belongs to some special characters range (like - // ['0', '9']). - // - template - struct line_char_cmp - : public std::enable_if::value || - (std::is_enum::value && - !std::is_same::value)> {}; - - template ::type> - bool - operator== (const line_char& l, const T& r) - { - return l.type () == line_type::special && - static_cast (l.special ()) == r; - } - - template ::type> - bool - operator== (const T& l, const line_char& r) - { - return r.type () == line_type::special && - static_cast (r.special ()) == l; - } - - template ::type> - bool - operator!= (const line_char& l, const T& r) - { - return !(l == r); - } - - template ::type> - bool - operator!= (const T& l, const line_char& r) - { - return !(l == r); - } - - template ::type> - bool - operator< (const line_char& l, const T& r) - { - return l.type () == line_type::special && - static_cast (l.special ()) < r; - } - - template ::type> - bool - operator< (const T& l, const line_char& r) - { - return r.type () == line_type::special && - l < static_cast (r.special ()); - } - - template ::type> - inline bool - operator<= (const line_char& l, const T& r) - { - return l < r || l == r; - } - - template ::type> - inline bool - operator<= (const T& l, const line_char& r) - { - return l < r || l == r; - } - - using line_string = std::basic_string; - - // Locale that has ctype facet installed. Used in the - // regex_traits specialization (see below). - // - class line_char_locale: public std::locale - { - public: - // Create a copy of the global C++ locale. - // - line_char_locale (); - }; - - // Initialize the testscript regex global state. Should be called once - // prior to creating objects of types from this namespace. Note: not - // thread-safe. - // - void - init (); - } - } - } -} - -// Standard template specializations for line_char that are required for the -// basic_regex instantiation. -// -namespace std -{ - template <> - class char_traits - { - public: - using char_type = build2::test::script::regex::line_char; - using int_type = char_type; - using off_type = char_traits::off_type; - using pos_type = char_traits::pos_type; - using state_type = char_traits::state_type; - - static void - assign (char_type& c1, const char_type& c2) {c1 = c2;} - - static char_type* - assign (char_type*, size_t, char_type); - - // Note that eq() and lt() are not constexpr (as required by C++11) - // because == and < operators for char_type are not constexpr. - // - static bool - eq (const char_type& l, const char_type& r) {return l == r;} - - static bool - lt (const char_type& l, const char_type& r) {return l < r;} - - static char_type* - move (char_type*, const char_type*, size_t); - - static char_type* - copy (char_type*, const char_type*, size_t); - - static int - compare (const char_type*, const char_type*, size_t); - - static size_t - length (const char_type*); - - static const char_type* - find (const char_type*, size_t, const char_type&); - - static constexpr char_type - to_char_type (const int_type& c) {return c;} - - static constexpr int_type - to_int_type (const char_type& c) {return int_type (c);} - - // Note that the following functions are not constexpr (as required by - // C++11) because their return expressions are not constexpr. - // - static bool - eq_int_type (const int_type& l, const int_type& r) {return l == r;} - - static int_type eof () {return char_type::eof;} - - static int_type - not_eof (const int_type& c) - { - return c != char_type::eof ? c : char_type::nul; - } - }; - - // ctype<> must be derived from both ctype_base and locale::facet (the later - // supports ref-counting used by the std::locale implementation internally). - // - // msvcrt for some reason also derives ctype_base from locale::facet which - // produces "already a base-class" warning and effectivelly breaks the - // reference counting. So we derive from ctype_base only in this case. - // - template <> - class ctype: public ctype_base -#if !defined(_MSC_VER) || _MSC_VER >= 2000 - , public locale::facet -#endif - { - // Used by the implementation only. - // - using line_type = build2::test::script::regex::line_type; - - public: - using char_type = build2::test::script::regex::line_char; - - static locale::id id; - -#if !defined(_MSC_VER) || _MSC_VER >= 2000 - explicit - ctype (size_t refs = 0): locale::facet (refs) {} -#else - explicit - ctype (size_t refs = 0): ctype_base (refs) {} -#endif - - // While unnecessary, let's keep for completeness. - // - virtual - ~ctype () override = default; - - // The C++ standard requires the following functions to call their virtual - // (protected) do_*() counterparts that provide the real implementations. - // The only purpose for this indirection is to provide a user with the - // ability to customize existing (standard) ctype facets. As we do not - // provide such an ability, for simplicity we will omit the do_*() - // functions and provide the implementations directly. This should be safe - // as nobody except us could call those protected functions. - // - bool - is (mask m, char_type c) const - { - return m == - (c.type () == line_type::special && c.special () >= 0 && - build2::digit (static_cast (c.special ())) - ? digit - : 0); - } - - const char_type* - is (const char_type*, const char_type*, mask*) const; - - const char_type* - scan_is (mask, const char_type*, const char_type*) const; - - const char_type* - scan_not (mask, const char_type*, const char_type*) const; - - char_type - toupper (char_type c) const {return c;} - - const char_type* - toupper (char_type*, const char_type* e) const {return e;} - - char_type - tolower (char_type c) const {return c;} - - const char_type* - tolower (char_type*, const char_type* e) const {return e;} - - char_type - widen (char c) const {return char_type (c);} - - const char* - widen (const char*, const char*, char_type*) const; - - char - narrow (char_type c, char def) const - { - return c.type () == line_type::special ? c.special () : def; - } - - const char_type* - narrow (const char_type*, const char_type*, char, char*) const; - }; - - // Note: the current application locale must be POSIX. Otherwise the - // behavior is undefined. - // - template <> - class regex_traits - { - public: - using char_type = build2::test::script::regex::line_char; - using string_type = build2::test::script::regex::line_string; - using locale_type = build2::test::script::regex::line_char_locale; - using char_class_type = regex_traits::char_class_type; - - // Workaround for msvcrt bugs. For some reason it assumes such a members - // to be present in a regex_traits specialization. - // -#if defined(_MSC_VER) && _MSC_VER < 2000 - static const ctype_base::mask _Ch_upper = ctype_base::upper; - static const ctype_base::mask _Ch_alpha = ctype_base::alpha; - - // Unsigned numeric type. msvcrt normally casts characters to this type - // for comparing with some numeric values or for calculating an index in - // some bit array. Luckily that all relates to the character class - // handling that we don't support. - // - using _Uelem = unsigned int; -#endif - - regex_traits () = default; // Unnecessary but let's keep for completeness. - - static size_t - length (const char_type* p) {return string_type::traits_type::length (p);} - - char_type - translate (char_type c) const {return c;} - - // Case-insensitive matching is not supported by line_regex. So there is no - // reason for the function to be called. - // - char_type - translate_nocase (char_type c) const {assert (false); return c;} - - // Return a sort-key - the exact copy of [b, e). - // - template - string_type - transform (I b, I e) const {return string_type (b, e);} - - // Return a case-insensitive sort-key. Case-insensitive matching is not - // supported by line_regex. So there is no reason for the function to be - // called. - // - template - string_type - transform_primary (I b, I e) const - { - assert (false); - return string_type (b, e); - } - - // POSIX regex grammar and collating elements (e.g., [.tilde.]) in - // particular are not supported. So there is no reason for the function to - // be called. - // - template - string_type - lookup_collatename (I, I) const {assert (false); return string_type ();} - - // Character classes (e.g., [:lower:]) are not supported. So there is no - // reason for the function to be called. - // - template - char_class_type - lookup_classname (I, I, bool = false) const - { - assert (false); - return char_class_type (); - } - - // Return false as we don't support character classes (e.g., [:lower:]). - // - bool - isctype (char_type, char_class_type) const {return false;} - - int - value (char_type, int) const; - - // Return the locale passed as an argument as we do not expect anything - // other than POSIX locale, that we also assume to be imbued by default. - // - locale_type - imbue (locale_type l) {return l;} - - locale_type - getloc () const {return locale_type ();} - }; - - // We assume line_char to be an unsigned type and express that with the - // following specialization used by basic_regex implementations. - // - // libstdc++ defines unsigned CharT type (regex_traits template parameter) - // to use as an index in some internal cache regardless if the cache is used - // for this specialization (and the cache is used only if CharT is char). - // - template <> - struct make_unsigned - { - using type = build2::test::script::regex::line_char; - }; - - // When used with libc++ the linker complains that it can't find - // __match_any_but_newline::__exec() function. The problem is - // that the function is only specialized for char and wchar_t - // (LLVM bug #31409). As line_char has no notion of the newline character we - // specialize the class template to behave as the __match_any - // instantiation does (that luckily has all the functions in place). - // -#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION <= 9000 - template <> - class __match_any_but_newline - : public __match_any - { - public: - using base = __match_any; - using base::base; - }; -#endif -} - -namespace build2 -{ - namespace test - { - namespace script - { - namespace regex - { - class line_regex: public std::basic_regex - { - public: - using base_type = std::basic_regex; - - using base_type::base_type; - - line_regex () = default; - - // Move string regex together with the pool used to create it. - // - line_regex (line_string&& s, line_pool&& p) - // No move-string ctor for base_type, so emulate it. - // - : base_type (s), pool (move (p)) {s.clear ();} - - // Move constuctible/assignable-only type. - // - line_regex (line_regex&&) = default; - line_regex (const line_regex&) = delete; - line_regex& operator= (line_regex&&) = default; - line_regex& operator= (const line_regex&) = delete; - - public: - line_pool pool; - }; - } - } - } -} - -#include - -#endif // LIBBUILD2_TEST_SCRIPT_REGEX_HXX diff --git a/libbuild2/test/script/regex.ixx b/libbuild2/test/script/regex.ixx deleted file mode 100644 index 46db9db..0000000 --- a/libbuild2/test/script/regex.ixx +++ /dev/null @@ -1,34 +0,0 @@ -// file : libbuild2/test/script/regex.ixx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -namespace build2 -{ - namespace test - { - namespace script - { - namespace regex - { - inline char_flags - operator&= (char_flags& x, char_flags y) - { - return x = static_cast ( - static_cast (x) & static_cast (y)); - } - - inline char_flags - operator|= (char_flags& x, char_flags y) - { - return x = static_cast ( - static_cast (x) | static_cast (y)); - } - - inline char_flags - operator& (char_flags x, char_flags y) {return x &= y;} - - inline char_flags - operator| (char_flags x, char_flags y) {return x |= y;} - } - } - } -} diff --git a/libbuild2/test/script/regex.test.cxx b/libbuild2/test/script/regex.test.cxx deleted file mode 100644 index 5a93c53..0000000 --- a/libbuild2/test/script/regex.test.cxx +++ /dev/null @@ -1,303 +0,0 @@ -// file : libbuild2/test/script/regex.test.cxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#include -#include // is_* - -#include - -using namespace std; -using namespace build2::test::script::regex; - -int -main () -{ - using lc = line_char; - using ls = line_string; - using lr = line_regex; - using cf = char_flags; - using cr = char_regex; - - init (); // Initializes the testscript regex global state. - - // Test line_char. - // - { - static_assert (is_trivial::value && - is_standard_layout::value && - !is_array::value, - "line_char must be char-like"); - - // Zero-initialed line_char should be the null-char as required by - // char_traits<>::length() specification. - // - assert (lc () == lc::nul); - - line_pool p; - - assert (lc::eof == -1); - assert (lc::nul == 0); - - enum meta {mn = 'n', mp = 'p'}; - - // Special roundtrip. - // - assert (lc ('0').special () == '0'); - assert (lc (0).special () == 0); - assert (lc (-1).special () == -1); - assert (lc ('p').special () == 'p'); - assert (lc (u'\u2028').special () == u'\u2028'); - - // Special comparison. - // - assert (lc ('0') == lc ('0')); - assert (lc ('0') == '0'); - assert (lc ('n') == mn); - assert (mn == static_cast (lc ('n'))); - - assert (lc ('0') != lc ('1')); - assert (lc ('0') != '1'); - assert (lc ('n') != mp); - assert (lc ('0') != lc ("0", p)); - assert (lc ('0') != lc (cr ("0"), p)); - - assert (lc ('0') < lc ('1')); - assert (lc ('0') < '1'); - assert (lc ('1') < lc ("0", p)); - assert (lc ('n') < mp); - - assert (lc ('0') <= '1'); - assert (lc ('0') <= lc ('1')); - assert (lc ('n') <= mn); - assert (lc ('1') <= lc ("0", p)); - - // Literal roundtrip. - // - assert (*lc ("abc", p).literal () == "abc"); - - // Literal comparison. - // - assert (lc ("a", p) == lc ("a", p)); - assert (lc ("a", p).literal () == lc ("a", p).literal ()); - assert (char (lc ("a", p)) == '\a'); - - assert (lc ("a", p) != lc ("b", p)); - assert (!(lc ("a", p) != lc (cr ("a"), p))); - assert (lc ("a", p) != lc (cr ("b"), p)); - - assert (lc ("a", p) < lc ("b", p)); - assert (!(lc ("a", p) < lc (cr ("a"), p))); - - assert (lc ("a", p) <= lc ("b", p)); - assert (lc ("a", p) <= lc (cr ("a"), p)); - assert (lc ("a", p) < lc (cr ("c"), p)); - - // Regex roundtrip. - // - assert (regex_match ("abc", *lc (cr ("abc"), p).regex ())); - - // Regex flags. - // - // icase - // - assert (regex_match ("ABC", cr ("abc", cf::icase))); - - // idot - // - assert (!regex_match ("a", cr ("[.]", cf::idot))); - assert (!regex_match ("a", cr ("[\\.]", cf::idot))); - - assert (regex_match ("a", cr ("."))); - assert (!regex_match ("a", cr (".", cf::idot))); - assert (regex_match ("a", cr ("\\.", cf::idot))); - assert (!regex_match ("a", cr ("\\."))); - - // regex::transform() - // - // The function is static and we can't test it directly. So we will test - // it indirectly via regex matches. - // - // @@ Would be nice to somehow address the inability to test internals (not - // exposed via headers). As a part of utility library support? - // - assert (regex_match (".a[.", cr (".\\.\\[[.]", cf::idot))); - assert (regex_match (".a[.", cr (".\\.\\[[\\.]", cf::idot))); - assert (!regex_match ("ba[.", cr (".\\.\\[[.]", cf::idot))); - assert (!regex_match (".a[b", cr (".\\.\\[[.]", cf::idot))); - assert (!regex_match (".a[b", cr (".\\.\\[[\\.]", cf::idot))); - - // Regex comparison. - // - assert (lc ("a", p) == lc (cr ("a|b"), p)); - assert (lc (cr ("a|b"), p) == lc ("a", p)); - } - - // Test char_traits. - // - { - using ct = char_traits; - using vc = vector; - - lc c; - ct::assign (c, '0'); - assert (c == ct::char_type ('0')); - - assert (ct::to_char_type (c) == c); - assert (ct::to_int_type (c) == c); - - assert (ct::eq_int_type (c, c)); - assert (!ct::eq_int_type (c, lc::eof)); - - assert (ct::eof () == lc::eof); - - assert (ct::not_eof (c) == c); - assert (ct::not_eof (lc::eof) != lc::eof); - - ct::assign (&c, 1, '1'); - assert (c == ct::int_type ('1')); - - assert (ct::eq (lc ('0'), lc ('0'))); - assert (ct::lt (lc ('0'), lc ('1'))); - - vc v1 ({'0', '1', '2'}); - vc v2 (3, lc::nul); - - assert (ct::find (v1.data (), 3, '1') == v1.data () + 1); - - ct::copy (v2.data (), v1.data (), 3); - assert (v2 == v1); - - v2.push_back (lc::nul); - assert (ct::length (v2.data ()) == 3); - - // Overlaping ranges. - // - ct::move (v1.data () + 1, v1.data (), 2); - assert (v1 == vc ({'0', '0', '1'})); - - v1 = vc ({'0', '1', '2'}); - ct::move (v1.data (), v1.data () + 1, 2); - assert (v1 == vc ({'1', '2', '2'})); - } - - // Test line_char_locale and ctype (only non-trivial functions). - // - { - using ct = ctype; - - line_char_locale l; - - // It is better not to create q facet on stack as it is - // reference-countable. - // - const ct& t (use_facet (l)); - line_pool p; - - assert (t.is (ct::digit, '0')); - assert (!t.is (ct::digit, '?')); - assert (!t.is (ct::digit, lc ("0", p))); - - const lc chars[] = { '0', '?' }; - ct::mask m[2]; - - const lc* b (chars); - const lc* e (chars + 2); - - // Cast flag value to mask type and compare to mask. - // - auto fl = [] (ct::mask m, ct::mask f) {return m == f;}; - - t.is (b, e, m); - assert (fl (m[0], ct::digit) && fl (m[1], 0)); - - assert (t.scan_is (ct::digit, b, e) == b); - assert (t.scan_is (0, b, e) == b + 1); - - assert (t.scan_not (ct::digit, b, e) == b + 1); - assert (t.scan_not (0, b, e) == b); - - { - char nr[] = "0?"; - lc wd[2]; - t.widen (nr, nr + 2, wd); - assert (wd[0] == b[0] && wd[1] == b[1]); - } - - { - lc wd[] = {'0', lc ("a", p)}; - char nr[2]; - t.narrow (wd, wd + 2, '-', nr); - assert (nr[0] == '0' && nr[1] == '-'); - } - } - - // Test regex_traits. Functions other that value() are trivial. - // - { - regex_traits t; - - const int radix[] = {8, 10}; // Radix 16 is not supported by line_char. - const char digits[] = "0123456789ABCDEF"; - - for (size_t r (0); r < 2; ++r) - { - for (int i (0); i < radix[r]; ++i) - assert (t.value (digits[i], radix[r]) == i); - } - } - - // Test line_regex construction. - // - { - line_pool p; - lr r1 ({lc ("foo", p), lc (cr ("ba(r|z)"), p)}, move (p)); - - lr r2 (move (r1)); - assert (regex_match (ls ({lc ("foo", r2.pool), lc ("bar", r2.pool)}), r2)); - assert (!regex_match (ls ({lc ("foo", r2.pool), lc ("ba", r2.pool)}), r2)); - } - - // Test line_regex match. - // - { - line_pool p; - - const lc foo ("foo", p); - const lc bar ("bar", p); - const lc baz ("baz", p); - const lc blank ("", p); - - assert (regex_match (ls ({foo, bar}), lr ({foo, bar}))); - assert (!regex_match (ls ({foo, baz}), lr ({foo, bar}))); - - assert (regex_match (ls ({bar, foo}), - lr ({'(', foo, '|', bar, ')', '+'}))); - - assert (regex_match (ls ({foo, foo, bar}), - lr ({'(', foo, ')', '\\', '1', bar}))); - - assert (regex_match (ls ({foo}), lr ({lc (cr ("fo+"), p)}))); - assert (regex_match (ls ({foo}), lr ({lc (cr (".*"), p)}))); - assert (regex_match (ls ({blank}), lr ({lc (cr (".*"), p)}))); - - assert (regex_match (ls ({blank, blank, foo}), - lr ({blank, '*', foo, blank, '*'}))); - - assert (regex_match (ls ({blank, blank, foo}), lr ({'.', '*'}))); - - assert (regex_match (ls ({blank, blank}), - lr ({blank, '*', foo, '?', blank, '*'}))); - - assert (regex_match (ls ({foo}), lr ({foo, '{', '1', '}'}))); - assert (regex_match (ls ({foo, foo}), lr ({foo, '{', '1', ',', '}'}))); - - assert (regex_match (ls ({foo, foo}), - lr ({foo, '{', '1', ',', '2', '}'}))); - - assert (!regex_match (ls ({foo, foo}), - lr ({foo, '{', '3', ',', '4', '}'}))); - - assert (regex_match (ls ({foo}), lr ({'(', '?', '=', foo, ')', foo}))); - assert (regex_match (ls ({foo}), lr ({'(', '?', '!', bar, ')', foo}))); - } -} diff --git a/libbuild2/test/script/runner.cxx b/libbuild2/test/script/runner.cxx index b40dea8..03a1f0e 100644 --- a/libbuild2/test/script/runner.cxx +++ b/libbuild2/test/script/runner.cxx @@ -3,696 +3,17 @@ #include -#include // streamsize - -#include -#include -#include // fdopen_mode, fddup() -#include // path_search() -#include - -#include -#include -#include +#include #include -#include -#include -#include - -using namespace std; -using namespace butl; - namespace build2 { namespace test { namespace script { - // Normalize a path. Also make the relative path absolute using the - // scope's working directory unless it is already absolute. - // - static path - normalize (path p, const scope& sp, const location& l) - { - path r (p.absolute () ? move (p) : sp.wd_path / move (p)); - - try - { - r.normalize (); - } - catch (const invalid_path& e) - { - fail (l) << "invalid file path " << e.path; - } - - return r; - } - - // Check if a path is not empty, the referenced file exists and is not - // empty. - // - static bool - non_empty (const path& p, const location& ll) - { - if (p.empty () || !exists (p)) - return false; - - try - { - ifdstream is (p); - return is.peek () != ifdstream::traits_type::eof (); - } - catch (const io_error& e) - { - // While there can be no fault of the test command being currently - // executed let's add the location anyway to ease the - // troubleshooting. And let's stick to that principle down the road. - // - fail (ll) << "unable to read " << p << ": " << e << endf; - } - } - - // If the file exists, not empty and not larger than 4KB print it to the - // diag record. The file content goes from the new line and is not - // indented. - // - static void - print_file (diag_record& d, const path& p, const location& ll) - { - if (exists (p)) - { - try - { - ifdstream is (p, ifdstream::badbit); - - if (is.peek () != ifdstream::traits_type::eof ()) - { - char buf[4096 + 1]; // Extra byte is for terminating '\0'. - - // Note that the string is always '\0'-terminated with a maximum - // sizeof (buf) - 1 bytes read. - // - is.getline (buf, sizeof (buf), '\0'); - - // Print if the file fits 4KB-size buffer. Note that if it - // doesn't the failbit is set. - // - if (is.eof ()) - { - // Suppress the trailing newline character as the diag record - // adds it's own one when flush. - // - streamsize n (is.gcount ()); - assert (n > 0); - - // Note that if the file contains '\0' it will also be counted - // by gcount(). But even in the worst case we will stay in the - // buffer boundaries (and so not crash). - // - if (buf[n - 1] == '\n') - buf[n - 1] = '\0'; - - d << '\n' << buf; - } - } - } - catch (const io_error& e) - { - fail (ll) << "unable to read " << p << ": " << e; - } - } - } - - // Print first 10 directory sub-entries to the diag record. The directory - // must exist. - // - static void - print_dir (diag_record& d, const dir_path& p, const location& ll) - { - try - { - size_t n (0); - for (const dir_entry& de: dir_iterator (p, - false /* ignore_dangling */)) - { - if (n++ < 10) - d << '\n' << (de.ltype () == entry_type::directory - ? path_cast (de.path ()) - : de.path ()); - } - - if (n > 10) - d << "\nand " << n - 10 << " more file(s)"; - } - catch (const system_error& e) - { - fail (ll) << "unable to iterate over " << p << ": " << e; - } - } - - // Save a string to the file. Fail if exception is thrown by underlying - // operations. - // - static void - save (const path& p, const string& s, const location& ll) - { - try - { - ofdstream os (p); - os << s; - os.close (); - } - catch (const io_error& e) - { - fail (ll) << "unable to write to " << p << ": " << e; - } - } - - // Return the value of the test.target variable. - // - static inline const target_triplet& - test_target (const script& s) - { - // @@ Would be nice to use cached value from test::common_data. - // - if (auto r = cast_null (s.test_target["test.target"])) - return *r; - - // We set it to default value in init() so it can only be NULL if the - // user resets it. - // - fail << "invalid test.target value" << endf; - } - - // Transform string according to here-* redirect modifiers from the {/} - // set. - // - static string - transform (const string& s, - bool regex, - const string& modifiers, - const script& scr) - { - if (modifiers.find ('/') == string::npos) - return s; - - // For targets other than Windows leave the string intact. - // - if (test_target (scr).class_ != "windows") - return s; - - // Convert forward slashes to Windows path separators (escape for - // regex). - // - string r; - for (size_t p (0);;) - { - size_t sp (s.find ('/', p)); - - if (sp != string::npos) - { - r.append (s, p, sp - p); - r.append (regex ? "\\\\" : "\\"); - p = sp + 1; - } - else - { - r.append (s, p, sp); - break; - } - } - - return r; - } - - // Check if the test command output matches the expected result (redirect - // value). Noop for redirect types other than none, here_*. - // - static bool - check_output (const path& pr, - const path& op, - const path& ip, - const redirect& rd, - const location& ll, - scope& sp, - bool diag, - const char* what) - { - auto input_info = [&ip, &ll] (diag_record& d) - { - if (non_empty (ip, ll)) - d << info << "stdin: " << ip; - }; - - auto output_info = [&what, &ll] (diag_record& d, - const path& p, - const char* prefix = "", - const char* suffix = "") - { - if (non_empty (p, ll)) - d << info << prefix << what << suffix << ": " << p; - else - d << info << prefix << what << suffix << " is empty"; - }; - - if (rd.type == redirect_type::none) - { - // Check that there is no output produced. - // - assert (!op.empty ()); - - if (!non_empty (op, ll)) - return true; - - if (diag) - { - diag_record d (error (ll)); - d << pr << " unexpectedly writes to " << what << - info << what << ": " << op; - - input_info (d); - - // Print cached output. - // - print_file (d, op, ll); - } - - // Fall through (to return false). - // - } - else if (rd.type == redirect_type::here_str_literal || - rd.type == redirect_type::here_doc_literal || - (rd.type == redirect_type::file && - rd.file.mode == redirect_fmode::compare)) - { - // The expected output is provided as a file or as a string. Save the - // string to a file in the later case. - // - assert (!op.empty ()); - - path eop; - - if (rd.type == redirect_type::file) - eop = normalize (rd.file.path, sp, ll); - else - { - eop = path (op + ".orig"); - save (eop, transform (rd.str, false, rd.modifiers, sp.root), ll); - sp.clean_special (eop); - } - - // Use the diff utility for comparison. - // - path dp ("diff"); - process_path pp (run_search (dp, true)); - - cstrings args {pp.recall_string (), "-u"}; - - // Ignore Windows newline fluff if that's what we are running on. - // - if (test_target (sp.root).class_ == "windows") - args.push_back ("--strip-trailing-cr"); - - args.push_back (eop.string ().c_str ()); - args.push_back (op.string ().c_str ()); - args.push_back (nullptr); - - if (verb >= 2) - print_process (args); - - try - { - // Save diff's stdout to a file for troubleshooting and for the - // optional (if not too large) printing (at the end of - // diagnostics). - // - path ep (op + ".diff"); - auto_fd efd; - - try - { - efd = fdopen (ep, fdopen_mode::out | fdopen_mode::create); - sp.clean_special (ep); - } - catch (const io_error& e) - { - fail (ll) << "unable to write to " << ep << ": " << e; - } - - // Diff utility prints the differences to stdout. But for the - // user it is a part of the test failure diagnostics so let's - // redirect stdout to stderr. - // - process p (pp, args.data (), 0, 2, efd.get ()); - efd.reset (); - - if (p.wait ()) - return true; - - assert (p.exit); - const process_exit& pe (*p.exit); - - // Note that both POSIX and GNU diff report error by exiting with - // the code > 1. - // - if (!pe.normal () || pe.code () > 1) - { - diag_record d (fail (ll)); - print_process (d, args); - d << " " << pe; - } - - // Output doesn't match the expected result. - // - if (diag) - { - diag_record d (error (ll)); - d << pr << " " << what << " doesn't match expected"; - - output_info (d, op); - output_info (d, eop, "expected "); - output_info (d, ep, "", " diff"); - input_info (d); - - print_file (d, ep, ll); - } - - // Fall through (to return false). - // - } - catch (const process_error& e) - { - error (ll) << "unable to execute " << pp << ": " << e; - - if (e.child) - exit (1); - - throw failed (); - } - } - else if (rd.type == redirect_type::here_str_regex || - rd.type == redirect_type::here_doc_regex) - { - // The overall plan is: - // - // 1. Create regex line string. While creating it's line characters - // transform regex lines according to the redirect modifiers. - // - // 2. Create line regex using the line string. If creation fails - // then save the (transformed) regex redirect to a file for - // troubleshooting. - // - // 3. Parse the output into the literal line string. - // - // 4. Match the output line string with the line regex. - // - // 5. If match fails save the (transformed) regex redirect to a file - // for troubleshooting. - // - using namespace regex; - - assert (!op.empty ()); - - // Create regex line string. - // - line_pool pool; - line_string rls; - const regex_lines rl (rd.regex); - - // Parse regex flags. - // - // When add support for new flags don't forget to update - // parse_regex(). - // - auto parse_flags = [] (const string& f) -> char_flags - { - char_flags r (char_flags::none); - - for (char c: f) - { - switch (c) - { - case 'd': r |= char_flags::idot; break; - case 'i': r |= char_flags::icase; break; - default: assert (false); // Error so should have been checked. - } - } - - return r; - }; - - // Return original regex line with the transformation applied. - // - auto line = [&rl, &rd, &sp] (const regex_line& l) -> string - { - string r; - if (l.regex) // Regex (possibly empty), - { - r += rl.intro; - r += transform (l.value, true, rd.modifiers, sp.root); - r += rl.intro; - r += l.flags; - } - else if (!l.special.empty ()) // Special literal. - r += rl.intro; - else // Textual literal. - r += transform (l.value, false, rd.modifiers, sp.root); - - r += l.special; - return r; - }; - - // Return regex line location. - // - // Note that we rely on the fact that the command and regex lines - // are always belong to the same testscript file. - // - auto loc = [&ll] (uint64_t line, uint64_t column) -> location - { - location r (ll); - r.line = line; - r.column = column; - return r; - }; - - // Save the regex to file for troubleshooting, return the file path - // it have been saved to. - // - // Note that we save the regex on line regex creation failure or if - // the program output doesn't match. - // - auto save_regex = [&op, &rl, &rd, &ll, &line] () -> path - { - path rp (op + ".regex"); - - // Encode here-document regex global flags if present as a file - // name suffix. For example if icase and idot flags are specified - // the name will look like: - // - // test/1/stdout.regex-di - // - if (rd.type == redirect_type::here_doc_regex && !rl.flags.empty ()) - rp += '-' + rl.flags; - - // Note that if would be more efficient to directly write chunks - // to file rather than to compose a string first. Hower we don't - // bother (about performance) for the sake of the code as we - // already failed. - // - string s; - for (auto b (rl.lines.cbegin ()), i (b), e (rl.lines.cend ()); - i != e; ++i) - { - if (i != b) s += '\n'; - s += line (*i); - } - - save (rp, s, ll); - return rp; - }; - - // Finally create regex line string. - // - // Note that diagnostics doesn't refer to the program path as it is - // irrelevant to failures at this stage. - // - char_flags gf (parse_flags (rl.flags)); // Regex global flags. - - for (const auto& l: rl.lines) - { - if (l.regex) // Regex (with optional special characters). - { - line_char c; - - // Empty regex is a special case repesenting the blank line. - // - if (l.value.empty ()) - c = line_char ("", pool); - else - { - try - { - string s (transform (l.value, true, rd.modifiers, sp.root)); - - c = line_char ( - char_regex (s, gf | parse_flags (l.flags)), pool); - } - catch (const regex_error& e) - { - // Print regex_error description if meaningful. - // - diag_record d (fail (loc (l.line, l.column))); - - if (rd.type == redirect_type::here_str_regex) - d << "invalid " << what << " regex redirect" << e << - info << "regex: '" << line (l) << "'"; - else - d << "invalid char-regex in " << what << " regex redirect" - << e << - info << "regex line: '" << line (l) << "'"; - - d << endf; - } - } - - rls += c; // Append blank literal or regex line char. - } - else if (!l.special.empty ()) // Special literal. - { - // Literal can not be followed by special characters in the same - // line. - // - assert (l.value.empty ()); - } - else // Textual literal. - { - // Append literal line char. - // - rls += line_char ( - transform (l.value, false, rd.modifiers, sp.root), pool); - } - - for (char c: l.special) - { - if (line_char::syntax (c)) - rls += line_char (c); // Append special line char. - else - fail (loc (l.line, l.column)) - << "invalid syntax character '" << c << "' in " << what - << " regex redirect" << - info << "regex line: '" << line (l) << "'"; - } - } - - // Create line regex. - // - line_regex regex; - - try - { - regex = line_regex (move (rls), move (pool)); - } - catch (const regex_error& e) - { - // Note that line regex creation can not fail for here-string - // redirect as it doesn't have syntax line chars. That in - // particular means that end_line and end_column are meaningful. - // - assert (rd.type == redirect_type::here_doc_regex); - - diag_record d (fail (loc (rd.end_line, rd.end_column))); - - // Print regex_error description if meaningful. - // - d << "invalid " << what << " regex redirect" << e; - - output_info (d, save_regex (), "", " regex"); - } - - // Parse the output into the literal line string. - // - line_string ls; - - try - { - // Do not throw when eofbit is set (end of stream reached), and - // when failbit is set (getline() failed to extract any character). - // - // Note that newlines are treated as line-chars separators. That - // in particular means that the trailing newline produces a blank - // line-char (empty literal). Empty output produces the zero-length - // line-string. - // - // Also note that we strip the trailing CR characters (otherwise - // can mismatch when cross-test). - // - ifdstream is (op, ifdstream::badbit); - is.peek (); // Sets eofbit for an empty stream. - - while (!is.eof ()) - { - string s; - getline (is, s); - - // It is safer to strip CRs in cycle, as msvcrt unexplainably - // adds too much trailing junk to the system_error descriptions, - // and so it can appear in programs output. For example: - // - // ...: Invalid data.\r\r\n - // - // Note that our custom operator<<(ostream&, const exception&) - // removes this junk. - // - while (!s.empty () && s.back () == '\r') - s.pop_back (); - - ls += line_char (move (s), regex.pool); - } - } - catch (const io_error& e) - { - fail (ll) << "unable to read " << op << ": " << e; - } - - // Match the output with the regex. - // - if (regex_match (ls, regex)) // Doesn't throw. - return true; - - // Output doesn't match the regex. We save the regex to file for - // troubleshooting regardless of whether we print the diagnostics or - // not. We, however, register it for cleanup in the later case (the - // expression may still succeed, we can be evaluating the if - // condition, etc). - // - path rp (save_regex ()); - - if (diag) - { - diag_record d (error (ll)); - d << pr << " " << what << " doesn't match regex"; - - output_info (d, op); - output_info (d, rp, "", " regex"); - input_info (d); - - // Print cached output. - // - print_file (d, op, ll); - } - else - sp.clean_special (rp); - - // Fall through (to return false). - // - } - else // Noop. - return true; - - return false; - } + using namespace build2::script; bool default_runner:: test (scope& s) const @@ -703,7 +24,7 @@ namespace build2 void default_runner:: enter (scope& sp, const location&) { - context& ctx (sp.root.target_scope.ctx); + context& ctx (sp.context); auto df = make_diag_frame ( [&sp](const diag_record& dr) @@ -730,29 +51,25 @@ namespace build2 sp.parent == nullptr ? mkdir_buildignore ( ctx, - sp.wd_path, + *sp.work_dir.path, sp.root.target_scope.root_scope ()->root_extra->buildignore_file, 2) - : mkdir (sp.wd_path, 2)); + : mkdir (*sp.work_dir.path, 2)); if (r == mkdir_status::already_exists) - fail << "working directory " << sp.wd_path << " already exists" << + fail << diag_path (sp.work_dir) << " already exists" << info << "are tests stomping on each other's feet?"; // We don't change the current directory here but indicate that the // scope test commands will be executed in that directory. // if (verb >= 2) - text << "cd " << sp.wd_path; - - sp.clean ({cleanup_type::always, sp.wd_path}, true); + text << "cd " << *sp.work_dir.path; } void default_runner:: leave (scope& sp, const location& ll) { - context& ctx (sp.root.target_scope.ctx); - auto df = make_diag_frame ( [&sp](const diag_record& dr) { @@ -766,200 +83,30 @@ namespace build2 // if (common_.after == output_after::clean) { - // Note that we operate with normalized paths here. - // - // Remove special files. The order is not important as we don't - // expect directories here. - // - for (const auto& p: sp.special_cleanups) - { - // Remove the file if exists. Fail otherwise. - // - if (rmfile (ctx, p, 3) == rmfile_status::not_exist) - fail (ll) << "registered for cleanup special file " << p - << " does not exist"; - } - - // Remove files and directories in the order opposite to the order of - // cleanup registration. - // - for (const auto& c: reverse_iterate (sp.cleanups)) - { - cleanup_type t (c.type); + clean (sp, ll); - // Skip whenever the path exists or not. - // - if (t == cleanup_type::never) - continue; - - const path& cp (c.path); - - // Wildcard with the last component being '***' (without trailing - // separator) matches all files and sub-directories recursively as - // well as the start directories itself. So we will recursively - // remove the directories that match the parent (for the original - // path) directory wildcard. - // - bool recursive (cp.leaf ().representation () == "***"); - const path& p (!recursive ? cp : cp.directory ()); - - // Remove files or directories using wildcard. - // - if (path_pattern (p)) - { - bool removed (false); - - auto rm = [&cp, recursive, &removed, &sp, &ll, &ctx] - (path&& pe, const string&, bool interm) - { - if (!interm) - { - // While removing the entry we can get not_exist due to - // racing conditions, but that's ok if somebody did our job. - // Note that we still set the removed flag to true in this - // case. - // - removed = true; // Will be meaningless on failure. - - if (pe.to_directory ()) - { - dir_path d (path_cast (pe)); - - if (!recursive) - { - rmdir_status r (rmdir (ctx, d, 3)); - - if (r != rmdir_status::not_empty) - return true; - - diag_record dr (fail (ll)); - dr << "registered for cleanup directory " << d - << " is not empty"; - - print_dir (dr, d, ll); - dr << info << "wildcard: '" << cp << "'"; - } - else - { - // Don't remove the working directory (it will be removed - // by the dedicated cleanup). - // - // Cast to uint16_t to avoid ambiguity with - // libbutl::rmdir_r(). - // - rmdir_status r (rmdir_r (ctx, d, d != sp.wd_path, 3)); - - if (r != rmdir_status::not_empty) - return true; - - // The directory is unlikely to be current but let's keep - // for completeness. - // - fail (ll) << "registered for cleanup wildcard " << cp - << " matches the current directory"; - } - } - else - rmfile (ctx, pe, 3); - } - - return true; - }; - - // Note that here we rely on the fact that recursive iterating - // goes depth-first (which make sense for the cleanup). - // - try - { - // Doesn't follow symlinks. - // - path_search (p, - rm, - dir_path () /* start */, - path_match_flags::none); - } - catch (const system_error& e) - { - fail (ll) << "unable to cleanup wildcard " << cp << ": " << e; - } + context& ctx (sp.context); - // Removal of no filesystem entries is not an error for 'maybe' - // cleanup type. - // - if (removed || t == cleanup_type::maybe) - continue; + rmdir_status r ( + sp.parent == nullptr + ? rmdir_buildignore (ctx, + *sp.work_dir.path, + sp.root.target_scope.root_scope ()-> + root_extra->buildignore_file, + 2) + : rmdir (ctx, *sp.work_dir.path, 2)); - fail (ll) << "registered for cleanup wildcard " << cp - << " doesn't match any " - << (recursive - ? "path" - : p.to_directory () - ? "directory" - : "file"); - } - - // Remove the directory if exists and empty. Fail otherwise. - // Removal of non-existing directory is not an error for 'maybe' - // cleanup type. - // - if (p.to_directory ()) - { - dir_path d (path_cast (p)); - bool wd (d == sp.wd_path); - - // Trace the scope working directory removal with the verbosity - // level 2 (that was used for its creation). For other - // directories use level 3 (as for other cleanups). - // - int v (wd ? 2 : 3); - - // Don't remove the working directory for the recursive cleanup - // (it will be removed by the dedicated one). - // - // Note that the root working directory contains the - // .buildignore file (see above). - // - // @@ If 'd' is a file then will fail with a diagnostics having - // no location info. Probably need to add an optional location - // parameter to rmdir() function. The same problem exists for - // a file cleanup when try to rmfile() directory instead of - // file. - // - rmdir_status r ( - recursive - ? rmdir_r (ctx, d, !wd, static_cast (v)) - : (wd && sp.parent == nullptr - ? rmdir_buildignore ( - ctx, - d, - sp.root.target_scope.root_scope ()->root_extra-> - buildignore_file, - v) - : rmdir (ctx, d, v))); - - if (r == rmdir_status::success || - (r == rmdir_status::not_exist && t == cleanup_type::maybe)) - continue; - - diag_record dr (fail (ll)); - dr << "registered for cleanup directory " << d - << (r == rmdir_status::not_exist - ? " does not exist" - : !recursive - ? " is not empty" - : " is current"); + if (r != rmdir_status::success) + { + diag_record dr (fail (ll)); - if (r == rmdir_status::not_empty) - print_dir (dr, d, ll); - } + dr << diag_path (sp.work_dir) + << (r == rmdir_status::not_exist + ? " does not exist" + : " is not empty"); - // Remove the file if exists. Fail otherwise. Removal of - // non-existing file is not an error for 'maybe' cleanup type. - // - if (rmfile (ctx, p, 3) == rmfile_status::not_exist && - t == cleanup_type::always) - fail (ll) << "registered for cleanup file " << p - << " does not exist"; + if (r == rmdir_status::not_empty) + print_dir (dr, *sp.work_dir.path, ll); } } @@ -968,1041 +115,57 @@ namespace build2 // if (verb >= 2) text << "cd " << (sp.parent != nullptr - ? sp.parent->wd_path - : sp.wd_path.directory ()); + ? *sp.parent->work_dir.path + : sp.work_dir.path->directory ()); } - // The exit pseudo-builtin: exit the current scope successfully, or - // print the diagnostics and exit the current scope and all the outer - // scopes unsuccessfully. Always throw exit_scope exception. - // - // exit [] - // - [[noreturn]] static void - exit_builtin (const strings& args, const location& ll) + void default_runner:: + run (scope& sp, + const command_expr& expr, command_type ct, + size_t li, const location& ll) { - auto i (args.begin ()); - auto e (args.end ()); - - // Process arguments. - // - // If no argument is specified, then exit successfully. Otherwise, - // print the diagnostics and exit unsuccessfully. + // Noop for teardown commands if keeping tests output is requested. // - if (i == e) - throw exit_scope (true); - - const string& s (*i++); - - if (i != e) - fail (ll) << "unexpected argument '" << *i << "'"; - - error (ll) << s; - throw exit_scope (false); - } + if (ct == command_type::teardown && + common_.after == output_after::keep) + return; - // The set pseudo-builtin: set variable from the stdin input. - // - // set [-e|--exact] [(-n|--newline)|(-w|--whitespace)] [] - // - static void - set_builtin (scope& sp, - const strings& args, - auto_fd in, - const location& ll) - { - try + if (verb >= 3) { - // Do not throw when eofbit is set (end of stream reached), and - // when failbit is set (read operation failed to extract any - // character). - // - ifdstream cin (move (in), ifdstream::badbit); - - // Parse arguments. - // - cli::vector_scanner scan (args); - set_options ops (scan); - - if (ops.whitespace () && ops.newline ()) - fail (ll) << "both -n|--newline and -w|--whitespace specified"; - - if (!scan.more ()) - fail (ll) << "missing variable name"; - - string a (scan.next ()); // Either attributes or variable name. - const string* ats (!scan.more () ? nullptr : &a); - const string& vname (!scan.more () ? a : scan.next ()); - - if (scan.more ()) - fail (ll) << "unexpected argument '" << scan.next () << "'"; - - if (ats != nullptr && ats->empty ()) - fail (ll) << "empty variable attributes"; + char c ('\0'); - if (vname.empty ()) - fail (ll) << "empty variable name"; + switch (ct) + { + case command_type::test: c = ' '; break; + case command_type::setup: c = '+'; break; + case command_type::teardown: c = '-'; break; + } - // Read the input. - // - cin.peek (); // Sets eofbit for an empty stream. + text << ": " << c << expr; + } - names ns; - while (!cin.eof ()) + // Print test id once per test expression. + // + auto df = make_diag_frame ( + [&sp](const diag_record& dr) { - // Read next element that depends on the whitespace mode being - // enabled or not. For the later case it also make sense to strip - // the trailing CRs that can appear while cross-testing Windows - // target or as a part of msvcrt junk production (see above). + // Let's not depend on how the path representation can be improved + // for readability on printing. // - string s; - if (ops.whitespace ()) - cin >> s; - else - { - getline (cin, s); + dr << info << "test id: " << sp.id_path.posix_string (); + }); - while (!s.empty () && s.back () == '\r') - s.pop_back (); - } + build2::script::run (sp, expr, li, ll); + } - // If failbit is set then we read nothing into the string as eof is - // reached. That in particular means that the stream has trailing - // whitespaces (possibly including newlines) if the whitespace mode - // is enabled, or the trailing newline otherwise. If so then - // we append the "blank" to the variable value in the exact mode - // prior to bailing out. - // - if (cin.fail ()) - { - if (ops.exact ()) - { - if (ops.whitespace () || ops.newline ()) - ns.emplace_back (move (s)); // Reuse empty string. - else if (ns.empty ()) - ns.emplace_back ("\n"); - else - ns[0].value += '\n'; - } - - break; - } - - if (ops.whitespace () || ops.newline () || ns.empty ()) - ns.emplace_back (move (s)); - else - { - ns[0].value += '\n'; - ns[0].value += s; - } - } - - cin.close (); - - // Set the variable value and attributes. Note that we need to aquire - // unique lock before potentially changing the script's variable - // pool. The obtained variable reference can safelly be used with no - // locking as the variable pool is an associative container - // (underneath) and we are only adding new variables into it. - // - ulock ul (sp.root.var_pool_mutex); - const variable& var (sp.root.var_pool.insert (move (vname))); - ul.unlock (); - - value& lhs (sp.assign (var)); - - // If there are no attributes specified then the variable assignment - // is straightforward. Otherwise we will use the build2 parser helper - // function. - // - if (ats == nullptr) - lhs.assign (move (ns), &var); - else - { - // If there is an error in the attributes string, our diagnostics - // will look like this: - // - // :1:1 error: unknown value attribute x - // testscript:10:1 info: while parsing attributes '[x]' - // - auto df = make_diag_frame ( - [ats, &ll](const diag_record& dr) - { - dr << info (ll) << "while parsing attributes '" << *ats << "'"; - }); - - parser p (sp.root.test_target.ctx); - p.apply_value_attributes (&var, - lhs, - value (move (ns)), - *ats, - token_type::assign, - path_name ("")); - } - } - catch (const io_error& e) - { - fail (ll) << "set: " << e; - } - catch (const cli::exception& e) - { - fail (ll) << "set: " << e; - } - } - - // Sorted array of builtins that support filesystem entries cleanup. - // - static const char* cleanup_builtins[] = { - "cp", "ln", "mkdir", "mv", "touch"}; - - static inline bool - cleanup_builtin (const string& name) - { - return binary_search ( - cleanup_builtins, - cleanup_builtins + - sizeof (cleanup_builtins) / sizeof (*cleanup_builtins), - name); - } - - static bool - run_pipe (scope& sp, - command_pipe::const_iterator bc, - command_pipe::const_iterator ec, - auto_fd ifd, - size_t ci, size_t li, const location& ll, - bool diag) + bool default_runner:: + run_if (scope& sp, + const command_expr& expr, + size_t li, const location& ll) { - if (bc == ec) // End of the pipeline. - return true; - - // The overall plan is to run the first command in the pipe, reading - // its input from the file descriptor passed (or, for the first - // command, according to stdin redirect specification) and redirecting - // its output to the right-hand part of the pipe recursively. Fail if - // the right-hand part fails. Otherwise check the process exit code, - // match stderr (and stdout for the last command in the pipe) according - // to redirect specification(s) and fail if any of the above fails. - // - const command& c (*bc); - - // Register the command explicit cleanups. Verify that the path being - // cleaned up is a sub-path of the testscript working directory. Fail - // if this is not the case. - // - for (const auto& cl: c.cleanups) - { - const path& p (cl.path); - path np (normalize (p, sp, ll)); - - const string& ls (np.leaf ().string ()); - bool wc (ls == "*" || ls == "**" || ls == "***"); - const path& cp (wc ? np.directory () : np); - const dir_path& wd (sp.root.wd_path); - - if (!cp.sub (wd)) - fail (ll) << (wc - ? "wildcard" - : p.to_directory () - ? "directory" - : "file") - << " cleanup " << p << " is out of working directory " - << wd; - - sp.clean ({cl.type, move (np)}, false); - } - - const redirect& in (c.in.effective ()); - const redirect& out (c.out.effective ()); - const redirect& err (c.err.effective ()); - bool eq (c.exit.comparison == exit_comparison::eq); - - // If stdin file descriptor is not open then this is the first pipeline - // command. - // - bool first (ifd.get () == -1); - - command_pipe::const_iterator nc (bc + 1); - bool last (nc == ec); - - const string& program (c.program.string ()); - - // Prior to opening file descriptors for command input/output - // redirects let's check if the command is the exit builtin. Being a - // builtin syntactically it differs from the regular ones in a number - // of ways. It doesn't communicate with standard streams, so - // redirecting them is meaningless. It may appear only as a single - // command in a pipeline. It doesn't return any value and stops the - // scope execution, so checking its exit status is meaningless as - // well. That all means we can short-circuit here calling the builtin - // and bailing out right after that. Checking that the user didn't - // specify any redirects or exit code check sounds like a right thing - // to do. - // - if (program == "exit") - { - // In case the builtin is erroneously pipelined from the other - // command, we will close stdin gracefully (reading out the stream - // content), to make sure that the command doesn't print any - // unwanted diagnostics about IO operation failure. - // - // Note that dtor will ignore any errors (which is what we want). - // - ifdstream is (move (ifd), fdstream_mode::skip); - - if (!first || !last) - fail (ll) << "exit builtin must be the only pipe command"; - - if (in.type != redirect_type::none) - fail (ll) << "exit builtin stdin cannot be redirected"; - - if (out.type != redirect_type::none) - fail (ll) << "exit builtin stdout cannot be redirected"; - - if (err.type != redirect_type::none) - fail (ll) << "exit builtin stderr cannot be redirected"; - - // We can't make sure that there is no exit code check. Let's, at - // least, check that non-zero code is not expected. - // - if (eq != (c.exit.code == 0)) - fail (ll) << "exit builtin exit code cannot be non-zero"; - - exit_builtin (c.arguments, ll); // Throws exit_scope exception. - } - - // Create a unique path for a command standard stream cache file. - // - auto std_path = [&sp, &ci, &li, &ll] (const char* n) -> path - { - path p (n); - - // 0 if belongs to a single-line test scope, otherwise is the - // command line number (start from one) in the test scope. - // - if (li > 0) - p += "-" + to_string (li); - - // 0 if belongs to a single-command expression, otherwise is the - // command number (start from one) in the expression. - // - // Note that the name like stdin-N can relate to N-th command of a - // single-line test or to N-th single-command line of multi-line - // test. These cases are mutually exclusive and so are unambiguous. - // - if (ci > 0) - p += "-" + to_string (ci); - - return normalize (move (p), sp, ll); - }; - - // If this is the first pipeline command, then open stdin descriptor - // according to the redirect specified. - // - path isp; - - if (!first) - assert (in.type == redirect_type::none); // No redirect expected. - else - { - // Open a file for passing to the command stdin. - // - auto open_stdin = [&isp, &ifd, &ll] () - { - assert (!isp.empty ()); - - try - { - ifd = fdopen (isp, fdopen_mode::in); - } - catch (const io_error& e) - { - fail (ll) << "unable to read " << isp << ": " << e; - } - }; - - switch (in.type) - { - case redirect_type::pass: - { - try - { - ifd = fddup (0); - } - catch (const io_error& e) - { - fail (ll) << "unable to duplicate stdin: " << e; - } - - break; - } - - case redirect_type::none: - // Somehow need to make sure that the child process doesn't read - // from stdin. That is tricky to do in a portable way. Here we - // suppose that the program which (erroneously) tries to read some - // data from stdin being redirected to /dev/null fails not being - // able to read the expected data, and so the test doesn't pass - // through. - // - // @@ Obviously doesn't cover the case when the process reads - // whatever available. - // @@ Another approach could be not to redirect stdin and let the - // process to hang which can be interpreted as a test failure. - // @@ Both ways are quite ugly. Is there some better way to do - // this? - // - // Fall through. - // - case redirect_type::null: - { - ifd = open_null (); - break; - } - - case redirect_type::file: - { - isp = normalize (in.file.path, sp, ll); - - open_stdin (); - break; - } - - case redirect_type::here_str_literal: - case redirect_type::here_doc_literal: - { - // We could write to the command stdin directly but instead will - // cache the data for potential troubleshooting. - // - isp = std_path ("stdin"); - - save ( - isp, transform (in.str, false, in.modifiers, sp.root), ll); - - sp.clean_special (isp); - - open_stdin (); - break; - } - case redirect_type::trace: - case redirect_type::merge: - case redirect_type::here_str_regex: - case redirect_type::here_doc_regex: - case redirect_type::here_doc_ref: assert (false); break; - } - } - - assert (ifd.get () != -1); - - // Prior to opening file descriptors for command outputs redirects - // let's check if the command is the set builtin. Being a builtin - // syntactically it differs from the regular ones in a number of ways. - // It either succeeds or terminates abnormally, so redirecting stderr - // is meaningless. It also never produces any output and may appear - // only as a terminal command in a pipeline. That means we can - // short-circuit here calling the builtin and returning right after - // that. Checking that the user didn't specify any meaningless - // redirects or exit code check sounds as a right thing to do. - // - if (program == "set") - { - if (!last) - fail (ll) << "set builtin must be the last pipe command"; - - if (out.type != redirect_type::none) - fail (ll) << "set builtin stdout cannot be redirected"; - - if (err.type != redirect_type::none) - fail (ll) << "set builtin stderr cannot be redirected"; - - if (eq != (c.exit.code == 0)) - fail (ll) << "set builtin exit code cannot be non-zero"; - - set_builtin (sp, c.arguments, move (ifd), ll); - return true; - } - - // Open a file for command output redirect if requested explicitly - // (file overwrite/append redirects) or for the purpose of the output - // validation (none, here_*, file comparison redirects), register the - // file for cleanup, return the file descriptor. Interpret trace - // redirect according to the verbosity level (as null if below 2, as - // pass otherwise). Return nullfd, standard stream descriptor duplicate - // or null-device descriptor for merge, pass or null redirects - // respectively (not opening any file). - // - auto open = [&sp, &ll, &std_path] (const redirect& r, - int dfd, - path& p) -> auto_fd - { - assert (dfd == 1 || dfd == 2); - const char* what (dfd == 1 ? "stdout" : "stderr"); - - fdopen_mode m (fdopen_mode::out | fdopen_mode::create); - - redirect_type rt (r.type != redirect_type::trace - ? r.type - : verb < 2 - ? redirect_type::null - : redirect_type::pass); - switch (rt) - { - case redirect_type::pass: - { - try - { - return fddup (dfd); - } - catch (const io_error& e) - { - fail (ll) << "unable to duplicate " << what << ": " << e; - } - } - - case redirect_type::null: return open_null (); - - // Duplicate the paired file descriptor later. - // - case redirect_type::merge: return nullfd; - - case redirect_type::file: - { - // For the cmp mode the user-provided path refers a content to - // match against, rather than a content to be produced (as for - // overwrite and append modes). And so for cmp mode we redirect - // the process output to a temporary file. - // - p = r.file.mode == redirect_fmode::compare - ? std_path (what) - : normalize (r.file.path, sp, ll); - - m |= r.file.mode == redirect_fmode::append - ? fdopen_mode::at_end - : fdopen_mode::truncate; - - break; - } - - case redirect_type::none: - case redirect_type::here_str_literal: - case redirect_type::here_doc_literal: - case redirect_type::here_str_regex: - case redirect_type::here_doc_regex: - { - p = std_path (what); - m |= fdopen_mode::truncate; - break; - } - - case redirect_type::trace: - case redirect_type::here_doc_ref: assert (false); break; - } - - auto_fd fd; - - try - { - fd = fdopen (p, m); - - if ((m & fdopen_mode::at_end) != fdopen_mode::at_end) - { - if (rt == redirect_type::file) - sp.clean ({cleanup_type::always, p}, true); - else - sp.clean_special (p); - } - } - catch (const io_error& e) - { - fail (ll) << "unable to write to " << p << ": " << e; - } - - return fd; - }; - - path osp; - fdpipe ofd; - - // If this is the last command in the pipeline than redirect the - // command process stdout to a file. Otherwise create a pipe and - // redirect the stdout to the write-end of the pipe. The read-end will - // be passed as stdin for the next command in the pipeline. - // - // @@ Shouldn't we allow the here-* and file output redirects for a - // command with pipelined output? Say if such redirect is present - // then the process output is redirected to a file first (as it is - // when no output pipelined), and only after the process exit code - // and the output are validated the next command in the pipeline is - // executed taking the file as an input. This could be usefull for - // test failures investigation and for tests "tightening". - // - if (last) - ofd.out = open (out, 1, osp); - else - { - assert (out.type == redirect_type::none); // No redirect expected. - ofd = open_pipe (); - } - - path esp; - auto_fd efd (open (err, 2, esp)); - - // Merge standard streams. - // - bool mo (out.type == redirect_type::merge); - if (mo || err.type == redirect_type::merge) - { - auto_fd& self (mo ? ofd.out : efd); - auto_fd& other (mo ? efd : ofd.out); - - try - { - assert (self.get () == -1 && other.get () != -1); - self = fddup (other.get ()); - } - catch (const io_error& e) - { - fail (ll) << "unable to duplicate " << (mo ? "stderr" : "stdout") - << ": " << e; - } - } - - // All descriptors should be open to the date. - // - assert (ofd.out.get () != -1 && efd.get () != -1); - - optional exit; - builtin_function* bf (builtins.find (program)); - - bool success; - - auto process_args = [&c] () -> cstrings - { - cstrings args {c.program.string ().c_str ()}; - - for (const auto& a: c.arguments) - args.push_back (a.c_str ()); - - args.push_back (nullptr); - return args; - }; - - if (bf != nullptr) - { - // Execute the builtin. - // - if (verb >= 2) - print_process (process_args ()); - - // Some of the testscript builtins (cp, mkdir, etc) extend libbutl - // builtins (via callbacks) registering/moving cleanups for the - // filesystem entries they create/move, unless explicitly requested - // not to do so via the --no-cleanup option. - // - // Let's "wrap up" the cleanup-related flags into the single object - // to rely on "small function object" optimization. - // - struct cleanup - { - // Whether the cleanups are enabled for the builtin. Can be set to - // false by the parse_option callback if --no-cleanup is - // encountered. - // - bool enabled = true; - - // Whether to register cleanup for a filesystem entry being - // created/updated depending on its existence. Calculated by the - // create pre-hook and used by the subsequent post-hook. - // - bool add; - - // Whether to move existing cleanups for the filesystem entry - // being moved, rather than to erase them. Calculated by the move - // pre-hook and used by the subsequent post-hook. - // - bool move; - }; - - // nullopt if the builtin doesn't support cleanups. - // - optional cln; - - if (cleanup_builtin (program)) - cln = cleanup (); - - builtin_callbacks bcs { - - // create - // - // Unless cleanups are suppressed, test that the filesystem entry - // doesn't exist (pre-hook) and, if that's the case, register the - // cleanup for the newly created filesystem entry (post-hook). - // - [&sp, &cln] (const path& p, bool pre) - { - // Cleanups must be supported by a filesystem entry-creating - // builtin. - // - assert (cln); - - if (cln->enabled) - { - if (pre) - cln->add = !butl::entry_exists (p); - else if (cln->add) - sp.clean ({cleanup_type::always, p}, true /* implicit */); - } - }, - - // move - // - // Validate the source and destination paths (pre-hook) and, - // unless suppressed, adjust the cleanups that are sub-paths of - // the source path (post-hook). - // - [&sp, &cln] - (const path& from, const path& to, bool force, bool pre) - { - // Cleanups must be supported by a filesystem entry-moving - // builtin. - // - assert (cln); - - if (pre) - { - const dir_path& wd (sp.wd_path); - const dir_path& rwd (sp.root.wd_path); - - auto fail = [] (const string& d) {throw runtime_error (d);}; - - if (!from.sub (rwd) && !force) - fail ("'" + from.representation () + - "' is out of working directory '" + rwd.string () + - "'"); - - auto check_wd = [&wd, fail] (const path& p) - { - if (wd.sub (path_cast (p))) - fail ("'" + p.string () + - "' contains test working directory '" + - wd.string () + "'"); - }; - - check_wd (from); - check_wd (to); - - // Unless cleanups are disabled, "move" the matching cleanups - // if the destination path doesn't exist and it is a sub-path - // of the working directory and just remove them otherwise. - // - if (cln->enabled) - cln->move = !butl::entry_exists (to) && to.sub (rwd); - } - else if (cln->enabled) - { - // Move or remove the matching cleanups (see above). - // - // Note that it's not enough to just change the cleanup paths. - // We also need to make sure that these cleanups happen before - // the destination directory (or any of its parents) cleanup, - // that is potentially registered. To achieve that we can just - // relocate these cleanup entries to the end of the list, - // preserving their mutual order. Remember that cleanups in - // the list are executed in the reversed order. - // - cleanups cs; - - // Remove the source path sub-path cleanups from the list, - // adjusting/caching them if required (see above). - // - for (auto i (sp.cleanups.begin ()); i != sp.cleanups.end (); ) - { - build2::test::script::cleanup& c (*i); - path& p (c.path); - - if (p.sub (from)) - { - if (cln->move) - { - // Note that we need to preserve the cleanup path - // trailing separator which indicates the removal - // method. Also note that leaf(), in particular, does - // that. - // - p = p != from - ? to / p.leaf (path_cast (from)) - : p.to_directory () - ? path_cast (to) - : to; - - cs.push_back (move (c)); - } - - i = sp.cleanups.erase (i); - } - else - ++i; - } - - // Re-insert the adjusted cleanups at the end of the list. - // - sp.cleanups.insert (sp.cleanups.end (), - make_move_iterator (cs.begin ()), - make_move_iterator (cs.end ())); - - } - }, - - // remove - // - // Validate the filesystem entry path (pre-hook). - // - [&sp] (const path& p, bool force, bool pre) - { - if (pre) - { - const dir_path& wd (sp.wd_path); - const dir_path& rwd (sp.root.wd_path); - - auto fail = [] (const string& d) {throw runtime_error (d);}; - - if (!p.sub (rwd) && !force) - fail ("'" + p.representation () + - "' is out of working directory '" + rwd.string () + - "'"); - - if (wd.sub (path_cast (p))) - fail ("'" + p.string () + - "' contains test working directory '" + wd.string () + - "'"); - } - }, - - // parse_option - // - [&cln] (const strings& args, size_t i) - { - // Parse --no-cleanup, if it is supported by the builtin. - // - if (cln && args[i] == "--no-cleanup") - { - cln->enabled = false; - return 1; - } - - return 0; - }, - - // sleep - // - // Deactivate the thread before going to sleep. - // - [&sp] (const duration& d) - { - // If/when required we could probably support the precise sleep - // mode (e.g., via an option). - // - sp.root.test_target.ctx.sched.sleep (d); - } - }; - - try - { - uint8_t r; // Storage. - builtin b (bf (r, - c.arguments, - move (ifd), move (ofd.out), move (efd), - sp.wd_path, - bcs)); - - success = run_pipe (sp, - nc, - ec, - move (ofd.in), - ci + 1, li, ll, diag); - - exit = process_exit (b.wait ()); - } - catch (const system_error& e) - { - fail (ll) << "unable to execute " << c.program << " builtin: " - << e << endf; - } - } - else - { - // Execute the process. - // - cstrings args (process_args ()); - - // Resolve the relative not simple program path against the scope's - // working directory. The simple one will be left for the process - // path search machinery. Also strip the potential leading `^`, - // indicating that this is an external program rather than a - // builtin. - // - path p; - - try - { - p = path (args[0]); - - if (p.relative ()) - { - auto program = [&p, &args] (path pp) - { - p = move (pp); - args[0] = p.string ().c_str (); - }; - - if (p.simple ()) - { - const string& s (p.string ()); - - // Don't end up with an empty path. - // - if (s.size () > 1 && s[0] == '^') - program (path (s, 1, s.size () - 1)); - } - else - program (sp.wd_path / p); - } - } - catch (const invalid_path& e) - { - fail (ll) << "invalid program path " << e.path; - } - - try - { - process_path pp (process::path_search (args[0])); - - // Note: the builtin-escaping character '^' is not printed. - // - if (verb >= 2) - print_process (args); - - process pr ( - pp, - args.data (), - {ifd.get (), -1}, process::pipe (ofd), {-1, efd.get ()}, - sp.wd_path.string ().c_str ()); - - ifd.reset (); - ofd.out.reset (); - efd.reset (); - - success = run_pipe (sp, - nc, - ec, - move (ofd.in), - ci + 1, li, ll, diag); - - pr.wait (); - - exit = move (pr.exit); - } - catch (const process_error& e) - { - error (ll) << "unable to execute " << args[0] << ": " << e; - - if (e.child) - std::exit (1); - - throw failed (); - } - } - - assert (exit); - - // If the righ-hand side pipeline failed than the whole pipeline fails, - // and no further checks are required. - // - if (!success) - return false; - - const path& pr (c.program); - - // If there is no valid exit code available by whatever reason then we - // print the proper diagnostics, dump stderr (if cached and not too - // large) and fail the whole test. Otherwise if the exit code is not - // correct then we print diagnostics if requested and fail the - // pipeline. - // - bool valid (exit->normal ()); - - // On Windows the exit code can be out of the valid codes range being - // defined as uint16_t. - // -#ifdef _WIN32 - if (valid) - valid = exit->code () < 256; -#endif - - success = valid && eq == (exit->code () == c.exit.code); - - if (!valid || (!success && diag)) - { - // In the presense of a valid exit code we print the diagnostics and - // return false rather than throw. - // - diag_record d (valid ? error (ll) : fail (ll)); - - if (!exit->normal ()) - d << pr << " " << *exit; - else - { - uint16_t ec (exit->code ()); // Make sure is printed as integer. - - if (!valid) - d << pr << " exit code " << ec << " out of 0-255 range"; - else if (!success) - { - if (diag) - d << pr << " exit code " << ec << (eq ? " != " : " == ") - << static_cast (c.exit.code); - } - else - assert (false); - } - - if (non_empty (esp, ll)) - d << info << "stderr: " << esp; - - if (non_empty (osp, ll)) - d << info << "stdout: " << osp; - - if (non_empty (isp, ll)) - d << info << "stdin: " << isp; - - // Print cached stderr. - // - print_file (d, esp, ll); - } - - // If exit code is correct then check if the standard outputs match the - // expectations. Note that stdout is only redirected to file for the - // last command in the pipeline. - // - // The thinking behind matching stderr first is that if it mismatches, - // then the program probably misbehaves (executes wrong functionality, - // etc) in which case its stdout doesn't really matter. - // - if (success) - success = - check_output (pr, esp, isp, err, ll, sp, diag, "stderr") && - (!last || - check_output (pr, osp, isp, out, ll, sp, diag, "stdout")); - - return success; - } + if (verb >= 3) + text << ": ?" << expr; - static bool - run_expr (scope& sp, - const command_expr& expr, - size_t li, const location& ll, - bool diag) - { // Print test id once per test expression. // auto df = make_diag_frame ( @@ -2014,90 +177,7 @@ namespace build2 dr << info << "test id: " << sp.id_path.posix_string (); }); - // Commands are numbered sequentially throughout the expression - // starting with 1. Number 0 means the command is a single one. - // - size_t ci (expr.size () == 1 && expr.back ().pipe.size () == 1 - ? 0 - : 1); - - // If there is no ORs to the right of a pipe then the pipe failure is - // fatal for the whole expression. In particular, the pipe must print - // the diagnostics on failure (if generally allowed). So we find the - // pipe that "switches on" the diagnostics potential printing. - // - command_expr::const_iterator trailing_ands; // Undefined if diag is - // disallowed. - if (diag) - { - auto i (expr.crbegin ()); - for (; i != expr.crend () && i->op == expr_operator::log_and; ++i) ; - trailing_ands = i.base (); - } - - bool r (false); - bool print (false); - - for (auto b (expr.cbegin ()), i (b), e (expr.cend ()); i != e; ++i) - { - if (diag && i + 1 == trailing_ands) - print = true; - - const command_pipe& p (i->pipe); - bool or_op (i->op == expr_operator::log_or); - - // Short-circuit if the pipe result must be OR-ed with true or AND-ed - // with false. - // - if (!((or_op && r) || (!or_op && !r))) - r = run_pipe ( - sp, p.begin (), p.end (), auto_fd (), ci, li, ll, print); - - ci += p.size (); - } - - return r; - } - - void default_runner:: - run (scope& sp, - const command_expr& expr, command_type ct, - size_t li, - const location& ll) - { - // Noop for teardown commands if keeping tests output is requested. - // - if (ct == command_type::teardown && - common_.after == output_after::keep) - return; - - if (verb >= 3) - { - char c ('\0'); - - switch (ct) - { - case command_type::test: c = ' '; break; - case command_type::setup: c = '+'; break; - case command_type::teardown: c = '-'; break; - } - - text << ": " << c << expr; - } - - if (!run_expr (sp, expr, li, ll, true)) - throw failed (); // Assume diagnostics is already printed. - } - - bool default_runner:: - run_if (scope& sp, - const command_expr& expr, - size_t li, const location& ll) - { - if (verb >= 3) - text << ": ?" << expr; - - return run_expr (sp, expr, li, ll, false); + return build2::script::run_if (sp, expr, li, ll); } } } diff --git a/libbuild2/test/script/runner.hxx b/libbuild2/test/script/runner.hxx index af37f56..22cae4e 100644 --- a/libbuild2/test/script/runner.hxx +++ b/libbuild2/test/script/runner.hxx @@ -7,6 +7,8 @@ #include #include +#include // exit + #include namespace build2 @@ -17,18 +19,7 @@ namespace build2 namespace script { - // An exception that can be thrown by a runner to exit the scope (for - // example, as a result of executing the exit builtin). The status - // indicates whether the scope should be considered to have succeeded - // or failed. - // - struct exit_scope - { - bool status; - - explicit - exit_scope (bool s): status (s) {} - }; + using exit_scope = build2::script::exit; class runner { diff --git a/libbuild2/test/script/script.cxx b/libbuild2/test/script/script.cxx index 79b8bca..34d4723 100644 --- a/libbuild2/test/script/script.cxx +++ b/libbuild2/test/script/script.cxx @@ -8,6 +8,8 @@ #include #include +#include + using namespace std; namespace build2 @@ -16,414 +18,60 @@ namespace build2 { namespace script { - ostream& - operator<< (ostream& o, line_type lt) - { - const char* s (nullptr); - - switch (lt) - { - case line_type::var: s = "variable"; break; - case line_type::cmd: s = "command"; break; - case line_type::cmd_if: s = "'if'"; break; - case line_type::cmd_ifn: s = "'if!'"; break; - case line_type::cmd_elif: s = "'elif'"; break; - case line_type::cmd_elifn: s = "'elif!'"; break; - case line_type::cmd_else: s = "'else'"; break; - case line_type::cmd_end: s = "'end'"; break; - } - - return o << s; - } - - // Quote if empty or contains spaces or any of the special characters. - // Note that we use single quotes since double quotes still allow - // expansion. - // - // @@ What if it contains single quotes? - // - static void - to_stream_q (ostream& o, const string& s) - { - if (s.empty () || s.find_first_of (" |&<>=\\\"") != string::npos) - o << '\'' << s << '\''; - else - o << s; - }; - - void - to_stream (ostream& o, const command& c, command_to_stream m) - { - auto print_path = [&o] (const path& p) - { - using build2::operator<<; - - ostringstream s; - stream_verb (s, stream_verb (o)); - s << p; - - to_stream_q (o, s.str ()); - }; - - auto print_redirect = - [&o, print_path] (const redirect& r, const char* prefix) - { - o << ' ' << prefix; - - size_t n (string::traits_type::length (prefix)); - assert (n > 0); - - char d (prefix[n - 1]); // Redirect direction. - - switch (r.type) - { - case redirect_type::none: assert (false); break; - case redirect_type::pass: o << '|'; break; - case redirect_type::null: o << '-'; break; - case redirect_type::trace: o << '!'; break; - case redirect_type::merge: o << '&' << r.fd; break; - - case redirect_type::here_str_literal: - case redirect_type::here_doc_literal: - { - bool doc (r.type == redirect_type::here_doc_literal); - - // For here-document add another '>' or '<'. Note that here end - // marker never needs to be quoted. - // - if (doc) - o << d; - - o << r.modifiers; - - if (doc) - o << r.end; - else - { - const string& v (r.str); - to_stream_q (o, - r.modifiers.find (':') == string::npos - ? string (v, 0, v.size () - 1) // Strip newline. - : v); - } - - break; - } - - case redirect_type::here_str_regex: - case redirect_type::here_doc_regex: - { - bool doc (r.type == redirect_type::here_doc_regex); - - // For here-document add another '>' or '<'. Note that here end - // marker never needs to be quoted. - // - if (doc) - o << d; - - o << r.modifiers; - - const regex_lines& re (r.regex); - - if (doc) - o << re.intro + r.end + re.intro + re.flags; - else - { - assert (!re.lines.empty ()); // Regex can't be empty. - - regex_line l (re.lines[0]); - to_stream_q (o, re.intro + l.value + re.intro + l.flags); - } - - break; - } - - case redirect_type::file: - { - // For stdin or stdout-comparison redirect add '>>' or '<<' (and - // so make it '<<<' or '>>>'). Otherwise add '+' or '=' (and so - // make it '>+' or '>='). - // - if (d == '<' || r.file.mode == redirect_fmode::compare) - o << d << d; - else - o << (r.file.mode == redirect_fmode::append ? '+' : '='); - - print_path (r.file.path); - break; - } - - case redirect_type::here_doc_ref: assert (false); break; - } - }; - - auto print_doc = [&o] (const redirect& r) - { - o << endl; - - if (r.type == redirect_type::here_doc_literal) - o << r.str; - else - { - assert (r.type == redirect_type::here_doc_regex); - - const regex_lines& rl (r.regex); - - for (auto b (rl.lines.cbegin ()), i (b), e (rl.lines.cend ()); - i != e; ++i) - { - if (i != b) - o << endl; - - const regex_line& l (*i); - - if (l.regex) // Regex (possibly empty), - o << rl.intro << l.value << rl.intro << l.flags; - else if (!l.special.empty ()) // Special literal. - o << rl.intro; - else // Textual literal. - o << l.value; - - o << l.special; - } - } - - o << (r.modifiers.find (':') == string::npos ? "" : "\n") << r.end; - }; - - if ((m & command_to_stream::header) == command_to_stream::header) - { - // Program. - // - to_stream_q (o, c.program.string ()); - - // Arguments. - // - for (const string& a: c.arguments) - { - o << ' '; - to_stream_q (o, a); - } - - // Redirects. - // - if (c.in.effective ().type != redirect_type::none) - print_redirect (c.in.effective (), "<"); - - if (c.out.effective ().type != redirect_type::none) - print_redirect (c.out.effective (), ">"); - - if (c.err.effective ().type != redirect_type::none) - print_redirect (c.err.effective (), "2>"); - - for (const auto& p: c.cleanups) - { - o << " &"; - - if (p.type != cleanup_type::always) - o << (p.type == cleanup_type::maybe ? '?' : '!'); - - print_path (p.path); - } - - if (c.exit.comparison != exit_comparison::eq || c.exit.code != 0) - { - switch (c.exit.comparison) - { - case exit_comparison::eq: o << " == "; break; - case exit_comparison::ne: o << " != "; break; - } - - o << static_cast (c.exit.code); - } - } - - if ((m & command_to_stream::here_doc) == command_to_stream::here_doc) - { - // Here-documents. - // - if (c.in.type == redirect_type::here_doc_literal || - c.in.type == redirect_type::here_doc_regex) - print_doc (c.in); - - if (c.out.type == redirect_type::here_doc_literal || - c.out.type == redirect_type::here_doc_regex) - print_doc (c.out); - - if (c.err.type == redirect_type::here_doc_literal || - c.err.type == redirect_type::here_doc_regex) - print_doc (c.err); - } - } - - void - to_stream (ostream& o, const command_pipe& p, command_to_stream m) - { - if ((m & command_to_stream::header) == command_to_stream::header) - { - for (auto b (p.begin ()), i (b); i != p.end (); ++i) - { - if (i != b) - o << " | "; - - to_stream (o, *i, command_to_stream::header); - } - } - - if ((m & command_to_stream::here_doc) == command_to_stream::here_doc) - { - for (const command& c: p) - to_stream (o, c, command_to_stream::here_doc); - } - } - - void - to_stream (ostream& o, const command_expr& e, command_to_stream m) - { - if ((m & command_to_stream::header) == command_to_stream::header) - { - for (auto b (e.begin ()), i (b); i != e.end (); ++i) - { - if (i != b) - { - switch (i->op) - { - case expr_operator::log_or: o << " || "; break; - case expr_operator::log_and: o << " && "; break; - } - } - - to_stream (o, i->pipe, command_to_stream::header); - } - } - - if ((m & command_to_stream::here_doc) == command_to_stream::here_doc) - { - for (const expr_term& t: e) - to_stream (o, t.pipe, command_to_stream::here_doc); - } - } - - // redirect + // scope_base // - redirect:: - redirect (redirect_type t) - : type (t) + scope_base:: + scope_base (script& s) + : root (s), + vars (s.test_target.ctx, false /* global */) { - switch (type) - { - case redirect_type::none: - case redirect_type::pass: - case redirect_type::null: - case redirect_type::trace: - case redirect_type::merge: break; - - case redirect_type::here_str_literal: - case redirect_type::here_doc_literal: new (&str) string (); break; - - case redirect_type::here_str_regex: - case redirect_type::here_doc_regex: - { - new (®ex) regex_lines (); - break; - } - - case redirect_type::file: new (&file) file_type (); break; - - case redirect_type::here_doc_ref: assert (false); break; - } + vars.assign (root.wd_var) = dir_path (); } - redirect:: - redirect (redirect&& r) - : type (r.type), - modifiers (move (r.modifiers)), - end (move (r.end)), - end_line (r.end_line), - end_column (r.end_column) + const dir_path* scope_base:: + wd_path () const { - switch (type) - { - case redirect_type::none: - case redirect_type::pass: - case redirect_type::null: - case redirect_type::trace: break; - - case redirect_type::merge: fd = r.fd; break; - - case redirect_type::here_str_literal: - case redirect_type::here_doc_literal: - { - new (&str) string (move (r.str)); - break; - } - case redirect_type::here_str_regex: - case redirect_type::here_doc_regex: - { - new (®ex) regex_lines (move (r.regex)); - break; - } - case redirect_type::file: - { - new (&file) file_type (move (r.file)); - break; - } - case redirect_type::here_doc_ref: - { - new (&ref) reference_wrapper (r.ref); - break; - } - } + return &cast (vars[root.wd_var]); } - redirect:: - ~redirect () + const target_triplet& scope_base:: + test_tt () const { - switch (type) - { - case redirect_type::none: - case redirect_type::pass: - case redirect_type::null: - case redirect_type::trace: - case redirect_type::merge: break; + if (auto r = + cast_null (root.test_target["test.target"])) + return *r; - case redirect_type::here_str_literal: - case redirect_type::here_doc_literal: str.~string (); break; - - case redirect_type::here_str_regex: - case redirect_type::here_doc_regex: regex.~regex_lines (); break; - - case redirect_type::file: file.~file_type (); break; - - case redirect_type::here_doc_ref: - { - ref.~reference_wrapper (); - break; - } - } - } - - redirect& redirect:: - operator= (redirect&& r) - { - if (this != &r) - { - this->~redirect (); - new (this) redirect (move (r)); // Assume noexcept move-constructor. - } - return *this; + // We set it to default value in init() so it can only be NULL if the + // user resets it. + // + fail << "invalid test.target value" << endf; } // scope // + static const optional wd_name ("test working directory"); + static const optional sd_name ("working directory"); + scope:: scope (const string& id, scope* p, script& r) - : parent (p), - root (r), - vars (r.test_target.ctx, false /* global */), - id_path (cast (assign (root.id_var) = path ())), - wd_path (cast (assign (root.wd_var) = dir_path ())) - + : scope_base (r), + // + // Note that root.work_dir is not yet constructed if we are + // creating the root scope (p is NULL). Also note that + // root.test_target is always constructed to date. + // + environment (root.test_target.ctx, + test_tt (), + dir_name_view (wd_path (), &wd_name), + dir_name_view ( + p != nullptr ? root.work_dir.path : wd_path (), + &sd_name), + *wd_path (), true /* temp_dir_keep */, + redirect (redirect_type::none), + redirect (redirect_type::none), + redirect (redirect_type::none)), + parent (p), + id_path (cast (assign (root.id_var) = path ())) { // Construct the id_path as a string to ensure POSIX form. In fact, // the only reason we keep it as a path is to be able to easily get id @@ -443,38 +91,64 @@ namespace build2 // (handled in an ad hoc way). // if (p != nullptr) - const_cast (wd_path) = dir_path (p->wd_path) /= id; + const_cast (*work_dir.path) = + dir_path (*p->work_dir.path) /= id; } void scope:: - clean (cleanup c, bool implicit) + set_variable (string&& nm, + names&& val, + const string& attrs, + const location& ll) { - using std::find; // Hidden by scope::find(). + // Check if we are trying to modify any of the special variables. + // + if (parser::special_variable (nm)) + fail (ll) << "attempt to set '" << nm << "' variable directly"; + + // Set the variable value and attributes. Note that we need to aquire + // unique lock before potentially changing the script's variable + // pool. The obtained variable reference can safelly be used with no + // locking as the variable pool is an associative container + // (underneath) and we are only adding new variables into it. + // + ulock ul (root.var_pool_mutex); + const variable& var (root.var_pool.insert (move (nm))); + ul.unlock (); - assert (!implicit || c.type == cleanup_type::always); + value& lhs (assign (var)); - const path& p (c.path); - if (!p.sub (root.wd_path)) + // If there are no attributes specified then the variable assignment + // is straightforward. Otherwise we will use the build2 parser helper + // function. + // + if (attrs.empty ()) + lhs.assign (move (val), &var); + else { - if (implicit) - return; - else - assert (false); // Error so should have been checked. + // If there is an error in the attributes string, our diagnostics + // will look like this: + // + // :1:1 error: unknown value attribute x + // testscript:10:1 info: while parsing attributes '[x]' + // + // Note that the attributes parsing error is the only reason for a + // failure. + // + auto df = make_diag_frame ( + [attrs, &ll](const diag_record& dr) + { + dr << info (ll) << "while parsing attributes '" << attrs << "'"; + }); + + parser p (context); + p.apply_value_attributes (&var, + lhs, + value (move (val)), + attrs, + token_type::assign, + path_name ("")); } - - auto pr = [&p] (const cleanup& v) -> bool {return v.path == p;}; - auto i (find_if (cleanups.begin (), cleanups.end (), pr)); - - if (i == cleanups.end ()) - cleanups.emplace_back (move (c)); - else if (!implicit) - i->type = c.type; - } - - void scope:: - clean_special (path p) - { - special_cleanups.emplace_back (move (p)); } // script_base @@ -523,7 +197,8 @@ namespace build2 // Set the script working dir ($~) to $out_base/test/ (id_path // for root is just the id which is empty if st is 'testscript'). // - const_cast (wd_path) = dir_path (rwd) /= id_path.string (); + const_cast (*work_dir.path) = + dir_path (rwd) /= id_path.string (); // Set the test variable at the script level. We do it even if it's // set in the buildfile since they use different types. @@ -625,7 +300,6 @@ namespace build2 return lookup_in_buildfile (var.name); } - lookup scope:: lookup_in_buildfile (const string& n, bool target_only) const { @@ -634,7 +308,7 @@ namespace build2 // in parallel). Plus, if there is no such variable, then we cannot // possibly find any value. // - const variable* pvar (root.test_target.ctx.var_pool.find (n)); + const variable* pvar (context.var_pool.find (n)); if (pvar == nullptr) return lookup_type (); diff --git a/libbuild2/test/script/script.hxx b/libbuild2/test/script/script.hxx index a28ef25..6356501 100644 --- a/libbuild2/test/script/script.hxx +++ b/libbuild2/test/script/script.hxx @@ -12,9 +12,9 @@ #include -#include +#include -#include // replay_tokens +#include namespace build2 { @@ -22,295 +22,14 @@ namespace build2 { namespace script { - class parser; // Required by VC for 'friend class parser' declaration. - - // Pre-parse representation. - // - - enum class line_type - { - var, - cmd, - cmd_if, - cmd_ifn, - cmd_elif, - cmd_elifn, - cmd_else, - cmd_end - }; - - ostream& - operator<< (ostream&, line_type); - - struct line - { - line_type type; - replay_tokens tokens; - - union - { - const variable* var; // Pre-entered for line_type::var. - }; - }; - - // Most of the time we will have just one line (test command). - // - using lines = small_vector; - - // Parse object model. - // - - // redirect - // - enum class redirect_type - { - none, - pass, - null, - trace, - merge, - here_str_literal, - here_str_regex, - here_doc_literal, - here_doc_regex, - here_doc_ref, // Reference to here_doc literal or regex. - file, - }; - - // Pre-parsed (but not instantiated) regex lines. The idea here is that - // we should be able to re-create their (more or less) exact text - // representation for diagnostics but also instantiate without any - // re-parsing. - // - struct regex_line - { - // If regex is true, then value is the regex expression. Otherwise, it - // is a literal. Note that special characters can be present in both - // cases. For example, //+ is a regex, while /+ is a literal, both - // with '+' as a special character. Flags are only valid for regex. - // Literals falls apart into textual (has no special characters) and - // special (has just special characters instead) ones. For example - // foo is a textual literal, while /.+ is a special one. Note that - // literal must not have value and special both non-empty. - // - bool regex; - - string value; - string flags; - string special; - - uint64_t line; - uint64_t column; - - // Create regex with optional special characters. - // - regex_line (uint64_t l, uint64_t c, - string v, string f, string s = string ()) - : regex (true), - value (move (v)), - flags (move (f)), - special (move (s)), - line (l), - column (c) {} - - // Create a literal, either text or special. - // - regex_line (uint64_t l, uint64_t c, string v, bool s) - : regex (false), - value (s ? string () : move (v)), - special (s ? move (v) : string ()), - line (l), - column (c) {} - }; - - struct regex_lines - { - char intro; // Introducer character. - string flags; // Global flags (here-document). - - small_vector lines; - }; - - // Output file redirect mode. - // - enum class redirect_fmode - { - compare, - overwrite, - append - }; - - struct redirect - { - redirect_type type; - - struct file_type - { - using path_type = build2::path; - path_type path; - redirect_fmode mode; // Meaningless for input redirect. - }; - - union - { - int fd; // Merge-to descriptor. - string str; // Note: with trailing newline, if requested. - regex_lines regex; // Note: with trailing blank, if requested. - file_type file; - reference_wrapper ref; // Note: no chains. - }; - - string modifiers; // Redirect modifiers. - string end; // Here-document end marker (no regex intro/flags). - uint64_t end_line; // Here-document end marker location. - uint64_t end_column; - - // Create redirect of a type other than reference. - // - explicit - redirect (redirect_type = redirect_type::none); - - // Create redirect of the reference type. - // - redirect (redirect_type t, const redirect& r) - : type (redirect_type::here_doc_ref), ref (r) - { - // There is no support (and need) for reference chains. - // - assert (t == redirect_type::here_doc_ref && - r.type != redirect_type::here_doc_ref); - } - - // Move constuctible/assignable-only type. - // - redirect (redirect&&); - redirect& operator= (redirect&&); - - ~redirect (); - - const redirect& - effective () const noexcept - { - return type == redirect_type::here_doc_ref ? ref.get () : *this; - } - }; - - // cleanup - // - enum class cleanup_type - { - always, // &foo - cleanup, fail if does not exist. - maybe, // &?foo - cleanup, ignore if does not exist. - never // &!foo - don’t cleanup, ignore if doesn’t exist. - }; - - // File or directory to be automatically cleaned up at the end of the - // scope. If the path ends with a trailing slash, then it is assumed to - // be a directory, otherwise -- a file. A directory that is about to be - // cleaned up must be empty. - // - // The last component in the path may contain a wildcard that have the - // following semantics: - // - // dir/* - remove all immediate files - // dir/*/ - remove all immediate sub-directories (must be empty) - // dir/** - remove all files recursively - // dir/**/ - remove all sub-directories recursively (must be empty) - // dir/*** - remove directory dir with all files and sub-directories - // recursively - // - struct cleanup - { - cleanup_type type; - build2::path path; - }; - using cleanups = vector; - - // command_exit - // - enum class exit_comparison {eq, ne}; - - struct command_exit - { - // C/C++ don't apply constraints on program exit code other than it - // being of type int. - // - // POSIX specifies that only the least significant 8 bits shall be - // available from wait() and waitpid(); the full value shall be - // available from waitid() (read more at _Exit, _exit Open Group - // spec). - // - // While the Linux man page for waitid() doesn't mention any - // deviations from the standard, the FreeBSD implementation (as of - // version 11.0) only returns 8 bits like the other wait*() calls. - // - // Windows supports 32-bit exit codes. - // - // Note that in shells some exit values can have special meaning so - // using them can be a source of confusion. For bash values in the - // [126, 255] range are such a special ones (see Appendix E, "Exit - // Codes With Special Meanings" in the Advanced Bash-Scripting Guide). - // - exit_comparison comparison; - uint8_t code; - }; - - // command - // - struct command - { - path program; - strings arguments; - - redirect in; - redirect out; - redirect err; - - script::cleanups cleanups; - - command_exit exit {exit_comparison::eq, 0}; - }; - - enum class command_to_stream: uint16_t - { - header = 0x01, - here_doc = 0x02, // Note: printed on a new line. - all = header | here_doc - }; - - void - to_stream (ostream&, const command&, command_to_stream); - - ostream& - operator<< (ostream&, const command&); - - // command_pipe - // - using command_pipe = vector; - - void - to_stream (ostream&, const command_pipe&, command_to_stream); - - ostream& - operator<< (ostream&, const command_pipe&); - - // command_expr - // - enum class expr_operator {log_or, log_and}; - - struct expr_term - { - expr_operator op; // OR-ed to an implied false for the first term. - command_pipe pipe; - }; - - using command_expr = vector; + using build2::script::line; + using build2::script::lines; + using build2::script::redirect; + using build2::script::redirect_type; + using build2::script::line_type; + using build2::script::command_expr; - void - to_stream (ostream&, const command_expr&, command_to_stream); - - ostream& - operator<< (ostream&, const command_expr&); + class parser; // Required by VC for 'friend class parser' declaration. // command_type // @@ -335,31 +54,54 @@ namespace build2 // class script; + class scope_base // Make sure certain things are initialized early. + { + public: + script& root; // Self for the root (script) scope. + + // Note that if we pass the variable name as a string, then it will + // be looked up in the wrong pool. + // + variable_map vars; + + protected: + scope_base (script&); + + const dir_path* + wd_path () const; + + const target_triplet& + test_tt () const; + }; + enum class scope_state {unknown, passed, failed}; - class scope + class scope: public scope_base, public build2::script::environment { public: scope* const parent; // NULL for the root (script) scope. - script& root; // Self for the root (script) scope. // The chain of if-else scope alternatives. See also if_cond_ below. // unique_ptr if_chain; - // Note that if we pass the variable name as a string, then it will - // be looked up in the wrong pool. - // - variable_map vars; - const path& id_path; // Id path ($@, relative in POSIX form). - const dir_path& wd_path; // Working dir ($~, absolute and normalized). optional desc; scope_state state = scope_state::unknown; - test::script::cleanups cleanups; - paths special_cleanups; + + void + set_variable (string&& name, + names&&, + const string& attrs, + const location&) override; + + // Noop since the temporary directory is a working directory and so + // is created before the scope commands execution. + // + virtual void + create_temp_dir () override {assert (false);}; // Variables. // @@ -382,17 +124,18 @@ namespace build2 lookup_in_buildfile (const string&, bool target_only = true) const; // Return a value suitable for assignment. If the variable does not - // exist in this scope's map, then a new one with the NULL value is - // added and returned. Otherwise the existing value is returned. + // exist in this scope's variable map, then a new one with the NULL + // value is added and returned. Otherwise the existing value is + // returned. // value& assign (const variable& var) {return vars.assign (var);} // Return a value suitable for append/prepend. If the variable does - // not exist in this scope's map, then outer scopes are searched for - // the same variable. If found then a new variable with the found - // value is added to this scope and returned. Otherwise this function - // proceeds as assign() above. + // not exist in this scope's variable map, then outer scopes are + // searched for the same variable. If found then a new variable with + // the found value is added to this scope and returned. Otherwise this + // function proceeds as assign() above. // value& append (const variable&); @@ -402,27 +145,6 @@ namespace build2 void reset_special (); - // Cleanup. - // - public: - // Register a cleanup. If the cleanup is explicit, then override the - // cleanup type if this path is already registered. Ignore implicit - // registration of a path outside script working directory. - // - void - clean (cleanup, bool implicit); - - // Register cleanup of a special file. Such files are created to - // maintain testscript machinery and must be removed first, not to - // interfere with the user-defined wildcard cleanups. - // - void - clean_special (path p); - - public: - virtual - ~scope () = default; - protected: scope (const string& id, scope* parent, script& root); @@ -567,6 +289,4 @@ namespace build2 } } -#include - #endif // LIBBUILD2_TEST_SCRIPT_SCRIPT_HXX diff --git a/libbuild2/test/script/script.ixx b/libbuild2/test/script/script.ixx deleted file mode 100644 index 38cba29..0000000 --- a/libbuild2/test/script/script.ixx +++ /dev/null @@ -1,59 +0,0 @@ -// file : libbuild2/test/script/script.ixx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -namespace build2 -{ - namespace test - { - namespace script - { - inline command_to_stream - operator&= (command_to_stream& x, command_to_stream y) - { - return x = static_cast ( - static_cast (x) & static_cast (y)); - } - - inline command_to_stream - operator|= (command_to_stream& x, command_to_stream y) - { - return x = static_cast ( - static_cast (x) | static_cast (y)); - } - - inline command_to_stream - operator& (command_to_stream x, command_to_stream y) {return x &= y;} - - inline command_to_stream - operator| (command_to_stream x, command_to_stream y) {return x |= y;} - - - // command - // - inline ostream& - operator<< (ostream& o, const command& c) - { - to_stream (o, c, command_to_stream::all); - return o; - } - - // command_pipe - // - inline ostream& - operator<< (ostream& o, const command_pipe& p) - { - to_stream (o, p, command_to_stream::all); - return o; - } - - // command_expr - // - inline ostream& - operator<< (ostream& o, const command_expr& e) - { - to_stream (o, e, command_to_stream::all); - return o; - } - } - } -} diff --git a/libbuild2/test/script/token.cxx b/libbuild2/test/script/token.cxx index 85fbb06..efeb17b 100644 --- a/libbuild2/test/script/token.cxx +++ b/libbuild2/test/script/token.cxx @@ -12,43 +12,22 @@ namespace build2 namespace script { void - token_printer (ostream& os, const token& t, bool d) + token_printer (ostream& os, const token& t, print_mode m) { - const string& v (t.value); - // Only quote non-name tokens for diagnostics. // - const char* q (d ? "'" : ""); + const char* q (m == print_mode::diagnostics ? "'" : ""); switch (t.type) { - case token_type::semi: os << q << ';' << q; break; - - case token_type::dot: os << q << '.' << q; break; - - case token_type::plus: os << q << '+' << q; break; - case token_type::minus: os << q << '-' << q; break; - - case token_type::clean: os << q << '&' << v << q; break; - case token_type::pipe: os << q << '|' << q; break; + case token_type::semi: os << q << ';' << q; break; - case token_type::in_pass: os << q << "<|" << q; break; - case token_type::in_null: os << q << "<-" << q; break; - case token_type::in_str: os << q << '<' << v << q; break; - case token_type::in_doc: os << q << "<<" << v << q; break; - case token_type::in_file: os << q << "<<<" << q; break; + case token_type::dot: os << q << '.' << q; break; - case token_type::out_pass: os << q << ">|" << q; break; - case token_type::out_null: os << q << ">-" << q; break; - case token_type::out_trace: os << q << ">!" << q; break; - case token_type::out_merge: os << q << ">&" << q; break; - case token_type::out_str: os << q << '>' << v << q; break; - case token_type::out_doc: os << q << ">>" << v << q; break; - case token_type::out_file_cmp: os << q << ">>>" << v << q; break; - case token_type::out_file_ovr: os << q << ">=" << v << q; break; - case token_type::out_file_app: os << q << ">+" << v << q; break; + case token_type::plus: os << q << '+' << q; break; + case token_type::minus: os << q << '-' << q; break; - default: build2::token_printer (os, t, d); + default: build2::script::token_printer (os, t, m); } } } diff --git a/libbuild2/test/script/token.hxx b/libbuild2/test/script/token.hxx index 14be0a2..dead796 100644 --- a/libbuild2/test/script/token.hxx +++ b/libbuild2/test/script/token.hxx @@ -7,7 +7,7 @@ #include #include -#include +#include namespace build2 { @@ -15,9 +15,9 @@ namespace build2 { namespace script { - struct token_type: build2::token_type + struct token_type: build2::script::token_type { - using base_type = build2::token_type; + using base_type = build2::script::token_type; enum { @@ -28,35 +28,16 @@ namespace build2 dot, // . plus, // + - minus, // - - - pipe, // | - clean, // &{?!} (modifiers in value) - - in_pass, // <| - in_null, // <- - in_str, // <{:} (modifiers in value) - in_doc, // <<{:} (modifiers in value) - in_file, // <<< - - out_pass, // >| - out_null, // >- - out_trace, // >! - out_merge, // >& - out_str, // >{:~} (modifiers in value) - out_doc, // >>{:~} (modifiers in value) - out_file_cmp, // >>> - out_file_ovr, // >= - out_file_app // >+ + minus // - }; token_type () = default; token_type (value_type v): base_type (v) {} - token_type (base_type v): base_type (v) {} + token_type (build2::token_type v): base_type (v) {} }; void - token_printer (ostream&, const token&, bool); + token_printer (ostream&, const token&, print_mode); } } } diff --git a/libbuild2/token.cxx b/libbuild2/token.cxx index 4975a02..7ce85be 100644 --- a/libbuild2/token.cxx +++ b/libbuild2/token.cxx @@ -8,51 +8,82 @@ using namespace std; namespace build2 { void - token_printer (ostream& os, const token& t, bool d) + token_printer (ostream& os, const token& t, print_mode m) { // Only quote non-name tokens for diagnostics. // - const char* q (d ? "'" : ""); + const char* q (m == print_mode::diagnostics ? "'" : ""); + bool r (m == print_mode::raw); switch (t.type) { - case token_type::eos: os << ""; break; - case token_type::newline: os << ""; break; - case token_type::pair_separator: os << ""; break; - case token_type::word: os << '\'' << t.value << '\''; break; - - case token_type::colon: os << q << ':' << q; break; - case token_type::dollar: os << q << '$' << q; break; - case token_type::question: os << q << '?' << q; break; - case token_type::comma: os << q << ',' << q; break; - - case token_type::lparen: os << q << '(' << q; break; - case token_type::rparen: os << q << ')' << q; break; - - case token_type::lcbrace: os << q << '{' << q; break; - case token_type::rcbrace: os << q << '}' << q; break; - - case token_type::lsbrace: os << q << '[' << q; break; - case token_type::rsbrace: os << q << ']' << q; break; - - case token_type::labrace: os << q << '<' << q; break; - case token_type::rabrace: os << q << '>' << q; break; - - case token_type::assign: os << q << '=' << q; break; - case token_type::prepend: os << q << "=+" << q; break; - case token_type::append: os << q << "+=" << q; break; - case token_type::default_assign: os << q << "?=" << q; break; - - case token_type::equal: os << q << "==" << q; break; - case token_type::not_equal: os << q << "!=" << q; break; - case token_type::less: os << q << '<' << q; break; - case token_type::greater: os << q << '>' << q; break; - case token_type::less_equal: os << q << "<=" << q; break; - case token_type::greater_equal: os << q << ">=" << q; break; - - case token_type::log_or: os << q << "||" << q; break; - case token_type::log_and: os << q << "&&" << q; break; - case token_type::log_not: os << q << '!' << q; break; + case token_type::eos: + { + if (!r) + os <<""; + + break; + } + case token_type::newline: + { + os << (r ? "\n" : ""); + break; + } + case token_type::pair_separator: + { + if (r) + os << t.value[0]; + else + os << ""; + + break; + } + case token_type::word: + { + if (r) + os << t.value; + else + os << '\'' << t.value << '\''; + + break; + } + + case token_type::colon: os << q << ':' << q; break; + case token_type::dollar: os << q << '$' << q; break; + case token_type::question: os << q << '?' << q; break; + case token_type::percent: os << q << '%' << q; break; + case token_type::comma: os << q << ',' << q; break; + + case token_type::lparen: os << q << '(' << q; break; + case token_type::rparen: os << q << ')' << q; break; + + case token_type::lcbrace: os << q << '{' << q; break; + case token_type::rcbrace: os << q << '}' << q; break; + + case token_type::multi_lcbrace: os << q << t.value << q; break; + case token_type::multi_rcbrace: os << q << t.value << q; break; + + case token_type::lsbrace: os << q << '[' << q; break; + case token_type::rsbrace: os << q << ']' << q; break; + + case token_type::labrace: os << q << '<' << q; break; + case token_type::rabrace: os << q << '>' << q; break; + + case token_type::assign: os << q << '=' << q; break; + case token_type::prepend: os << q << "=+" << q; break; + case token_type::append: os << q << "+=" << q; break; + case token_type::default_assign: os << q << "?=" << q; break; + + case token_type::equal: os << q << "==" << q; break; + case token_type::not_equal: os << q << "!=" << q; break; + case token_type::less: os << q << '<' << q; break; + case token_type::greater: os << q << '>' << q; break; + case token_type::less_equal: os << q << "<=" << q; break; + case token_type::greater_equal: os << q << ">=" << q; break; + + case token_type::log_or: os << q << "||" << q; break; + case token_type::log_and: os << q << "&&" << q; break; + case token_type::log_not: os << q << '!' << q; break; default: assert (false); // Unhandled extended token. } diff --git a/libbuild2/token.hxx b/libbuild2/token.hxx index e48c088..156e428 100644 --- a/libbuild2/token.hxx +++ b/libbuild2/token.hxx @@ -13,7 +13,6 @@ namespace build2 { - // Token type. // // A line consists of a sequence of words separated by separators and @@ -36,6 +35,7 @@ namespace build2 colon, // : dollar, // $ question, // ? + percent, // % comma, // , lparen, // ( @@ -44,6 +44,9 @@ namespace build2 lcbrace, // { rcbrace, // } + multi_lcbrace, // {{... (value contains the braces) + multi_rcbrace, // }}... (value contains the braces) + lsbrace, // [ rsbrace, // ] @@ -85,20 +88,37 @@ namespace build2 class token; + enum class print_mode + { + // Print eos, newline, and pair separator in the form and other + // tokens as literals, single-quoting the word token. + // + normal, + + // Same as normal but all literals are quoted. + // + diagnostics, + + // Print all tokens as literals with newline represented as '\n' and eos + // as an empty string. + // + raw + }; + LIBBUILD2_SYMEXPORT void - token_printer (ostream&, const token&, bool); + token_printer (ostream&, const token&, print_mode); class token { public: - using printer_type = void (ostream&, const token&, bool diag); + using printer_type = void (ostream&, const token&, print_mode); token_type type; bool separated; // Whitespace-separated from the previous token. // Quoting can be complete, where the token starts and ends with the quote // characters and quoting is contiguous or partial where only some part(s) - // of the token are quoted or quoting continus to the next token. + // of the token are quoted or quoting continues to the next token. // quote_type qtype; bool qcomp; @@ -146,7 +166,25 @@ namespace build2 // Output the token value in a format suitable for diagnostics. // inline ostream& - operator<< (ostream& o, const token& t) {t.printer (o, t, true); return o;} + operator<< (ostream& o, const token& t) + { + t.printer (o, t, print_mode::diagnostics); + return o; + } + + // Note: these are currently only used for sanity checks. + // + inline bool + operator== (const token& x, const token& y) + { + return x.type == y.type && x.value == y.value; + } + + inline bool + operator!= (const token& x, const token& y) + { + return !(x == y); + } // Context-dependent lexing (see lexer_mode for details). // diff --git a/libbuild2/types.hxx b/libbuild2/types.hxx index d20fa22..6582c3a 100644 --- a/libbuild2/types.hxx +++ b/libbuild2/types.hxx @@ -230,6 +230,7 @@ namespace build2 using butl::path_name_view; using butl::path_name_value; using butl::dir_path; + using butl::dir_name_view; using butl::path_cast; using butl::basic_path; using butl::invalid_path; @@ -347,6 +348,12 @@ namespace build2 location (uint64_t l, uint64_t c): line (l), column (c) {} }; + // Print in the :: form with 0 lines/columns not + // printed. Nothing is printed for an empty location. + // + ostream& + operator<< (ostream&, const location&); + // Similar (and implicit-convertible) to the above but stores a copy of the // path. // @@ -371,7 +378,6 @@ namespace build2 LIBBUILD2_SYMEXPORT ostream& operator<< (ostream&, run_phase); // utility.cxx - } // In order to be found (via ADL) these have to be either in std:: or in diff --git a/libbuild2/types.ixx b/libbuild2/types.ixx index c770842..750c8c7 100644 --- a/libbuild2/types.ixx +++ b/libbuild2/types.ixx @@ -3,6 +3,27 @@ namespace build2 { + // location + // + inline ostream& + operator<< (ostream& o, const location& l) + { + if (!l.empty ()) + { + o << l.file; + + if (l.line != 0) + { + o << ':' << l.line; + + if (l.column != 0) + o << ':' << l.column; + } + } + + return o; + } + // Note that in the constructors we cannot pass the file data member to the // base class constructor as it is not initialized yet (and so its base // path/name pointers are not initialized). Thus, we initialize the path diff --git a/libbuild2/utility.cxx b/libbuild2/utility.cxx index a45e901..81f6809 100644 --- a/libbuild2/utility.cxx +++ b/libbuild2/utility.cxx @@ -14,6 +14,8 @@ #include #include +#include // script::regex::init() + using namespace std; using namespace butl; @@ -588,5 +590,7 @@ namespace build2 { fail << "unable to obtain home directory: " << e; } + + script::regex::init (); } } diff --git a/libbuild2/utility.hxx b/libbuild2/utility.hxx index 9800d6c..7a6ada2 100644 --- a/libbuild2/utility.hxx +++ b/libbuild2/utility.hxx @@ -73,6 +73,7 @@ namespace build2 using butl::trim; using butl::next_word; using butl::sanitize_identifier; + using butl::sanitize_strlit; using butl::make_guard; using butl::make_exception_guard; diff --git a/tests/dependency/chain/testscript b/tests/dependency/chain/testscript index 9232840..ac4a946 100644 --- a/tests/dependency/chain/testscript +++ b/tests/dependency/chain/testscript @@ -3,7 +3,7 @@ .include ../../common.testscript -: basic +: basics : $* <>/~%EOE% ./: dir{x}: dir{a} diff --git a/tests/dependency/recipe/buildfile b/tests/dependency/recipe/buildfile new file mode 100644 index 0000000..3dc5452 --- /dev/null +++ b/tests/dependency/recipe/buildfile @@ -0,0 +1,4 @@ +# file : tests/dependency/recipe/buildfile +# license : MIT; see accompanying LICENSE file + +./: testscript $b diff --git a/tests/dependency/recipe/testscript b/tests/dependency/recipe/testscript new file mode 100644 index 0000000..5510e3c --- /dev/null +++ b/tests/dependency/recipe/testscript @@ -0,0 +1,344 @@ +# file : tests/dependency/recipe/testscript +# license : MIT; see accompanying LICENSE file + +.include ../../common.testscript + +# Note: in the parser we have to handle recipes for the with/without +# prerequisites cases separately. So we try to cover both here. + +: basics +: +$* <>/~%EOE% +alias{x}: alias{z} +{{ + cmd +}} +dump alias{x} +EOI +:5:1: dump: +% .+/alias\{x\}: .+/:alias\{z\}% + {{ + cmd + }} +EOE + +: basics-replay +: +$* <>/~%EOE% +alias{x y}: alias{z} +{{ + cmd +}} +dump alias{y} +EOI +:5:1: dump: +% .+/alias\{y\}: .+/:alias\{z\}% + {{ + cmd + }} +EOE + +: basics-header +: +$* <>/~%EOE% +alias{x}: +% +{{ + cmd +}} +dump alias{x} +EOI +:6:1: dump: +% .+/alias\{x\}:% + {{ + cmd + }} +EOE + +: basics-header-replay +: +$* <>/~%EOE% +alias{x y}: +% +{{ + cmd +}} +dump alias{y} +EOI +:6:1: dump: +% .+/alias\{y\}:% + {{ + cmd + }} +EOE + +: basics-lang +: +$* <>/~%EOE% +alias{x}: +{{ c++ + void f (); +}} +dump alias{x} +EOI +:5:1: dump: +% .+/alias\{x\}:% + {{ c++ + void f (); + }} +EOE + +: with-vars +: +$* <>/~%EOE% +alias{x}: +{ + var = x +} +{{ + cmd +}} +dump alias{x} +EOI +:8:1: dump: +% .+/alias\{x\}:% + { + var = x + } + {{ + cmd + }} +EOE + +: with-vars-replay +: +$* <>/~%EOE% +alias{x y}: alias{z} +{ + var = x +} +{{ + cmd +}} +dump alias{y} +EOI +:8:1: dump: +% .+/alias\{y\}: .+/:alias\{z\}% + { + var = x + } + {{ + cmd + }} +EOE + +: with-vars-header +: +$* <>/~%EOE% +alias{x}: alias{z} +{ + var = x +} +% +{{ + cmd +}} +dump alias{x} +EOI +:9:1: dump: +% .+/alias\{x\}: .+/:alias\{z\}% + { + var = x + } + {{ + cmd + }} +EOE + +: with-vars-header-replay +: +$* <>/~%EOE% +alias{x y}: +{ + var = x +} +% +{{ + cmd +}} +dump alias{y} +EOI +:9:1: dump: +% .+/alias\{y\}:% + { + var = x + } + {{ + cmd + }} +EOE + +: chain +: +$* <>/~%EOE% +alias{x}: +{{ + cmd1 +}} +{{{ + cmd2 +}}} +dump alias{x} +EOI +:8:1: dump: +% .+/alias\{x\}:% + {{ + cmd1 + }} + {{{ + cmd2 + }}} +EOE + +: chain-replay +: +$* <>/~%EOE% +alias{x y}: alias{z} +{{ + cmd1 +}} +{{{ + cmd2 +}}} +dump alias{y} +EOI +:8:1: dump: +% .+/alias\{y\}: .+/:alias\{z\}% + {{ + cmd1 + }} + {{{ + cmd2 + }}} +EOE + +: chain-header +: +$* <>/~%EOE% +alias{x}: alias{z} + +{{ + cmd1 +}} + +% +{{{ + cmd2 +}}} +dump alias{x} +EOI +:11:1: dump: +% .+/alias\{x\}: .+/:alias\{z\}% + {{ + cmd1 + }} + {{{ + cmd2 + }}} +EOE + +: chain-header-replay +: +$* <>/~%EOE% +alias{x y}: + +{{ + cmd1 +}} + +% +{{{ + cmd2 +}}} +dump alias{y} +EOI +:11:1: dump: +% .+/alias\{y\}:% + {{ + cmd1 + }} + {{{ + cmd2 + }}} +EOE + +: unterminated +: +$* <>EOE != 0 +alias{x}: +{{{ + cmd +}} +EOI +:5:1: error: unterminated recipe block + :2:1: info: recipe block starts here +EOE + +: expected-lang +: +$* <>EOE != 0 +alias{x}: +{{ $lang + cmd +}} +EOI +:2:4: error: expected recipe language instead of '$' +EOE + +: header-attribute +: +$* <>/~!EOE! +alias{x}: +% [diag=gen] +{{ + cmd +}} +dump alias{x} +EOI +:6:1: dump: +! .+/alias\{x\}:! + % [diag=gen] + {{ + cmd + }} +EOE + +: header-attribute-replay +: +$* <>/~!EOE! +alias{x y}: +% [diag=gen] +{{ + cmd +}} +dump alias{y} +EOI +:6:1: dump: +! .+/alias\{y\}:! + % [diag=gen] + {{ + cmd + }} +EOE + +: header-missing-block +: +$* <>EOE != 0 +alias{x}: +% +{ + cmd +} +EOI +:3:1: error: expected recipe block instead of '{' +EOE diff --git a/tests/test/script/builtin/mv.testscript b/tests/test/script/builtin/mv.testscript index 31e2603..764c1e0 100644 --- a/tests/test/script/builtin/mv.testscript +++ b/tests/test/script/builtin/mv.testscript @@ -21,8 +21,8 @@ : Moving path outside the testscript working directory fails. : $c <>/~%EOE% == 1 - %mv: '.+/fail/a/b/c' is out of working directory '.+/fail/test'% + mv ../../a/b/c ./c 2>>/EOE == 1 + mv: 'a/b/c' is out of working directory 'test/' EOE EOI @@ -84,14 +84,14 @@ : src : $c <"mv: '$~' contains test working directory '$~'" != 0 + mv $~ b 2>/"mv: 'test/1/' contains test working directory 'test/1/'" != 0 EOI : dst : $c <"mv: '$~' contains test working directory '$~'" != 0 + mv a "$~" 2>/"mv: 'test/1' contains test working directory 'test/1/'" != 0 EOI } diff --git a/tests/test/script/builtin/rm.testscript b/tests/test/script/builtin/rm.testscript index 5b00042..21ec2a9 100644 --- a/tests/test/script/builtin/rm.testscript +++ b/tests/test/script/builtin/rm.testscript @@ -11,7 +11,7 @@ : Removing scope directory fails. : $c <"rm: '$~' contains test working directory '$~'" == 1 + rm -r ./ 2>/"rm: 'test/1/' contains test working directory 'test/1/'" == 1 EOI } @@ -28,8 +28,8 @@ : Removing path outside the testscript working directory fails. : $c <>/~%EOE% == 1 - %rm: '.+/path/outside-scope/fail/a/b/c' is out of working directory '.+/path/outside-scope/fail/test'% + rm ../../a/b/c 2>>/EOE == 1 + rm: 'a/b/c' is out of working directory 'test/' EOE EOI diff --git a/tests/test/script/builtin/rmdir.testscript b/tests/test/script/builtin/rmdir.testscript index 269dd58..a63a5dd 100644 --- a/tests/test/script/builtin/rmdir.testscript +++ b/tests/test/script/builtin/rmdir.testscript @@ -11,7 +11,7 @@ : Removing scope directory fails. : $c <"rmdir: '$~' contains test working directory '$~'" == 1 + rmdir ./ 2>/"rmdir: 'test/1/' contains test working directory 'test/1/'" == 1 EOI : outside-scope @@ -24,8 +24,8 @@ : Removing directory outside the testscript working directory fails. : $c <>/~%EOE% == 1 - %rmdir: '.+/dir/outside-scope/fail/a/b/c/' is out of working directory '.+/dir/outside-scope/fail/test'% + rmdir ../../a/b/c 2>>/EOE == 1 + rmdir: 'a/b/c/' is out of working directory 'test/' EOE EOI diff --git a/tests/test/script/runner/cleanup.testscript b/tests/test/script/runner/cleanup.testscript index e39ae95..03153e4 100644 --- a/tests/test/script/runner/cleanup.testscript +++ b/tests/test/script/runner/cleanup.testscript @@ -56,7 +56,7 @@ b += --no-column : Test explicit cleanup of a file out of the testscript working directory. : $c <'$* &../../a' && $b 2>>/EOE != 0 - testscript:1: error: file cleanup ../../a is out of working directory test/ + testscript:1: error: file cleanup ../../a is out of working directory 'test/' info: test id: 1 EOE @@ -116,7 +116,7 @@ b += --no-column : Test cleanup of a directory out of the testscript working directory. : $c <'$* &../../a/' && $b 2>>/EOE != 0 - testscript:1: error: directory cleanup ../../a/ is out of working directory test/ + testscript:1: error: directory cleanup ../../a/ is out of working directory 'test/' info: test id: 1 EOE @@ -150,7 +150,7 @@ b += --no-column : dir : $c <'$* -d a/b' && $b 2>>/EOE != 0 - testscript:1: error: registered for cleanup directory test/1/ is not empty + testscript:1: error: test working directory 'test/1/' is not empty a/ info: test id: 1 EOE @@ -220,7 +220,7 @@ b += --no-column : Test cleanup of a wildcard out of the testscript working directory. : $c <'$* &../../a/***' && $b 2>>/EOE != 0 - testscript:1: error: wildcard cleanup ../../a/*** is out of working directory test/ + testscript:1: error: wildcard cleanup ../../a/*** is out of working directory 'test/' info: test id: 1 EOE @@ -346,7 +346,7 @@ EOI : Test an implicit cleanup being overwritten with the explicit one, : $c <'$* -o foo >=a &!a' && $b 2>>/EOE != 0 -testscript:1: error: registered for cleanup directory test/1/ is not empty +testscript:1: error: test working directory 'test/1/' is not empty a info: test id: 1 EOE @@ -359,7 +359,7 @@ $c <>/EOE != 0 $* &!a; $* -o foo >=a EOO -testscript:2: error: registered for cleanup directory test/1/ is not empty +testscript:2: error: test working directory 'test/1/' is not empty a info: test id: 1 EOE diff --git a/tests/test/script/runner/redirect.testscript b/tests/test/script/runner/redirect.testscript index b8fe74d..0fe3aa3 100644 --- a/tests/test/script/runner/redirect.testscript +++ b/tests/test/script/runner/redirect.testscript @@ -80,10 +80,14 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : literal : { - $c <'$* -i 0 foo' && $b : out - $c <'$* -e foo 2>foo' && $b : err - $c <'$* -i 1 foo' && $b : inout + $c <'$* -i 0 <<<=foo' && $b : in + $c <'$* -i 0 >>?foo' && $b : out + $c <'$* -o foo >foo' && $b : out-alias + $c <'$* -e foo 2>>>?foo' && $b : err + $c <'$* -e foo 2>foo' && $b : err-alias + $c <'$* -i 1 <<<=foo >>>?foo' && $b : inout + $c <'$* -i 1 foo' && $b : inout-alias : inout-fail : @@ -101,11 +105,16 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. info: test id: 1 EOE - $c <'$* -i 2 foo' && $b : inerr - $c <'$* -i 1 -e bar foo 2>bar' && $b : inout-err - $c <'$* -o "" >""' && $b : empty - $c <'$* -i 1 <:"foo" >:"foo"' && $b : no-newline - $c <'$* -i 1 <:"" >:""' && $b : no-newline-empty + $c <'$* -i 2 <<<=foo 2>>>?foo' && $b : inerr + $c <'$* -i 2 foo' && $b : inerr-alias + $c <'$* -i 1 -e bar <<<=foo 1>>>?foo 2>>>?bar' && $b : inout-err + $c <'$* -i 1 -e bar foo 2>bar' && $b : inout-err-alias + $c <'$* -o "" >>>?""' && $b : empty + $c <'$* -o "" >""' && $b : empty-alias + $c <'$* -i 1 <<<=:"foo" >>>?:"foo"' && $b : no-newline + $c <'$* -i 1 <:"foo" >:"foo"' && $b : no-newline-alias + $c <'$* -i 1 <<<=:"" >>>?:""' && $b : no-newline-empty + $c <'$* -i 1 <:"" >:""' && $b : no-newline-empty-alias : no-newline-fail1 : @@ -132,6 +141,15 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : merge : $c <>?EOE 1>&2 + foo + bar + EOE + EOI + + : merge-alias + : + $c <>EOE 1>&2 foo bar @@ -155,6 +173,10 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. { : match : + $c <'$* -o foo >>>?~/Foo?/i' && $b + + : match-alias + : $c <'$* -o foo >~/Foo?/i' && $b : mismatch @@ -209,6 +231,15 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : in : $c <>?EOO + foo + bar + EOO + EOI + + : out-alias + : + $c <>EOO foo bar @@ -227,6 +267,15 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : err : $c <>?EOO + foo + bar + EOO + EOI + + : err-alias + : + $c <>EOO foo bar @@ -236,6 +285,18 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : inout : $c <>?EOO + foo + bar + EOF + foo + bar + EOO + EOI + + : inout-alias + : + $c <>EOO foo bar @@ -248,6 +309,19 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : inerr : $c <>?EOE + foo + bar + EOF + foo + bar + EOE + EOI + + + : inerr-alias + : + $c <>EOE foo bar @@ -260,6 +334,14 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : empty : $c <>?EOO + EOF + EOO + EOI + + : empty-alias + : + $c <>EOO EOF EOO @@ -268,16 +350,43 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : shared : $c <>?EOF + foo + bar + EOF + EOI + + : shared-alias + : + $c <>EOF foo bar EOF EOI + : shared-in-alias + : + $c <>?EOF + foo + bar + EOF + EOI + + : shared-out-alias + : + $c <>EOF + foo + bar + EOF + EOI + : extra-newline : $c <>EOO + $* -i 1 <<=EOF >>?EOO EOF @@ -287,7 +396,7 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : no-newline : $c <>:EOO + $* -i 1 <<=:EOF >>?:EOO foo EOF foo @@ -297,7 +406,7 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : no-newline-fail1 : $c <>~/EOE/ != 0 - $* -i 1 <<:EOF >>EOO + $* -i 1 <<=:EOF >>?EOO foo EOF foo @@ -314,7 +423,7 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : no-newline-fail2 : $c <>~/EOE/ != 0 - $* -i 1 <>:EOO + $* -i 1 <<=EOF >>?:EOO foo EOF foo @@ -331,7 +440,7 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : no-newline-empty : $c <>:EOO + $* -i 1 <<=:EOF >>?:EOO EOF EOO EOI @@ -339,7 +448,7 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : no-newline-extra-newline : $c <>:EOO + $* -i 1 <<=:EOF >>?:EOO EOF @@ -349,7 +458,7 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : merge : $c <>EOO 2>&1 + $* -i 1 <<=EOF -e baz >>?EOO 2>&1 foo bar EOF @@ -387,6 +496,14 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : in : $c <<"EOI" && $b + \$* -i 1 <<=/EOF >>>?'foo$ps' + foo/ + EOF + EOI + + : in-alias + : + $c <<"EOI" && $b \$* -i 1 <'foo$ps' foo/ EOF @@ -395,6 +512,14 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : out : $c <<"EOI" && $b + \$* -i 1 <<<='foo$ps' >>?/EOO + foo/ + EOO + EOI + + : out-alias + : + $c <<"EOI" && $b \$* -i 1 <'foo$ps' >>/EOO foo/ EOO @@ -403,6 +528,14 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : err : $c <<"EOI" && $b + \$* -i 2 <<<='foo$ps' 2>>?/EOE + foo/ + EOE + EOI + + : err-alias + : + $c <<"EOI" && $b \$* -i 2 <'foo$ps' 2>>/EOE foo/ EOE @@ -419,7 +552,7 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : match : $c <>~/EOO/i + $* -o foo -o foo -o bar >>?~/EOO/i /FO*/* bar /* @@ -429,7 +562,7 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : match-empty : $c <>:~/EOO/ + $* >>?:~/EOO/ /.{0} EOO EOI @@ -437,7 +570,7 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : shared : $c <>~/EOF/ 2>>~/EOF/ + $* -o foo -e foo >>?~/EOF/ 2>>?~/EOF/ foo EOF EOI @@ -445,7 +578,7 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : mismatch : $c <>/~%EOE%d != 0 - $* -o foo >>~/EOO/ + $* -o foo >>?~/EOO/ bar EOO EOI @@ -459,7 +592,7 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : mismatch-icase : $c <>/~%EOE%d != 0 - $* -o foo >>~/EOO/i + $* -o foo >>?~/EOO/i bar EOO EOI @@ -479,6 +612,13 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : $c <=out; + $* -i 1 <=out >foo + EOI + + : in-alias + : + $c <=out; $* -i 1 <<foo EOI @@ -490,6 +630,17 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. $c <=out; $* -e bar 2>+out; + $* -i 1 <?out + foo + bar + EOF + EOI + + : match-alias + : + $c <=out; + $* -e bar 2>+out; $* -i 1 <>>out foo bar @@ -500,7 +651,7 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex. : $c <>/~%EOE%d != 0 $* -o foo >=out; - $* -o bar >>>out + $* -o bar >?out EOI %testscript:2: error: ../../../../../driver(.exe)? stdout doesn't match expected% info: stdout: test/1/stdout-2 -- cgit v1.1