From f59d82eb8fda3ddcf790556c6c3615e40ae8b15b Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Mon, 3 Oct 2022 21:23:22 +0300 Subject: Add support for 'for' loop second (... | for x) and third (for x <...) forms in script --- .../build/script/lexer+for-loop.test.testscript | 188 ++++++ libbuild2/build/script/lexer.cxx | 31 +- libbuild2/build/script/lexer.hxx | 7 +- libbuild2/build/script/lexer.test.cxx | 1 + libbuild2/build/script/parser+for.test.testscript | 460 +++++++++++++- libbuild2/build/script/parser.cxx | 201 ++++-- libbuild2/build/script/parser.test.cxx | 25 +- libbuild2/build/script/runner.cxx | 13 +- libbuild2/build/script/runner.hxx | 5 + libbuild2/build/script/script.cxx | 2 +- libbuild2/build/script/script.hxx | 4 +- libbuild2/script/builtin-options.cxx | 291 +++++++++ libbuild2/script/builtin-options.hxx | 84 +++ libbuild2/script/builtin-options.ixx | 57 ++ libbuild2/script/builtin.cli | 7 + libbuild2/script/parser.cxx | 373 +++++++++-- libbuild2/script/parser.hxx | 31 +- libbuild2/script/run.cxx | 391 +++++++----- libbuild2/script/run.hxx | 66 +- libbuild2/script/script.cxx | 24 +- libbuild2/script/script.hxx | 18 +- .../test/script/lexer+for-loop.test.testscript | 231 +++++++ libbuild2/test/script/lexer.cxx | 28 +- libbuild2/test/script/lexer.hxx | 9 +- libbuild2/test/script/lexer.test.cxx | 1 + libbuild2/test/script/parser+for.test.testscript | 702 ++++++++++++++++++++- libbuild2/test/script/parser.cxx | 218 ++++--- libbuild2/test/script/parser.test.cxx | 25 +- libbuild2/test/script/runner.cxx | 3 +- libbuild2/test/script/runner.hxx | 5 + libbuild2/test/script/script.cxx | 2 +- libbuild2/test/script/script.hxx | 3 +- libbuild2/utility.hxx | 1 + 33 files changed, 3104 insertions(+), 403 deletions(-) create mode 100644 libbuild2/build/script/lexer+for-loop.test.testscript create mode 100644 libbuild2/test/script/lexer+for-loop.test.testscript (limited to 'libbuild2') diff --git a/libbuild2/build/script/lexer+for-loop.test.testscript b/libbuild2/build/script/lexer+for-loop.test.testscript new file mode 100644 index 0000000..3f8e6b5 --- /dev/null +++ b/libbuild2/build/script/lexer+for-loop.test.testscript @@ -0,0 +1,188 @@ +# file : libbuild2/build/script/lexer+for-loop.test.testscript +# license : MIT; see accompanying LICENSE file + +test.arguments = for-loop + +: redirect +: +{ + : pass + : + $* <"cmd <| 1>|" >>EOO + 'cmd' + <| + '1' + >| + + EOO + + : null + : + $* <"cmd <- 1>-" >>EOO + 'cmd' + <- + '1' + >- + + EOO + + : trace + : + $* <"cmd 1>!" >>EOO + 'cmd' + '1' + >! + + EOO + + : merge + : + $* <"cmd 1>&2" >>EOO + 'cmd' + '1' + >& + '2' + + EOO + + : str + : + $* <"cmd <<<=a 1>>>?b" >>EOO + 'cmd' + <<<= + 'a' + '1' + >>>? + 'b' + + EOO + + : str-nn + : + $* <"cmd <<<=:a 1>>>?:b" >>EOO + 'cmd' + <<<=: + 'a' + '1' + >>>?: + 'b' + + EOO + + : str-nn-alias + : + $* <"cmd <<<:a 1>>>?:b" >>EOO + 'cmd' + <<<: + 'a' + '1' + >>>?: + 'b' + + EOO + + : doc + : + $* <"cmd <>EOO" >>EOO + 'cmd' + << + 'EOI' + '1' + >> + 'EOO' + + EOO + + : doc-nn + : + $* <"cmd <<:EOI 1>>?:EOO" >>EOO + 'cmd' + <<: + 'EOI' + '1' + >>?: + 'EOO' + + EOO + + : file-cmp + : + $* <"cmd <=in >?out 2>?err" >>EOO + 'cmd' + <= + 'in' + >? + 'out' + '2' + >? + 'err' + + EOO + + : file-write + : + $* <"cmd >=out 2>+err" >>EOO + 'cmd' + >= + 'out' + '2' + >+ + 'err' + + EOO +} + +: cleanup +: +{ + : always + : + $* <"cmd &file" >>EOO + 'cmd' + & + 'file' + + EOO + + : maybe + : + $* <"cmd &?file" >>EOO + 'cmd' + &? + 'file' + + EOO + + : never + : + $* <"cmd &!file" >>EOO + 'cmd' + &! + 'file' + + EOO +} + +: for +: +{ + : form-1 + : + $* <"for x: a" >>EOO + 'for' + 'x' + : + 'a' + + EOO + + : form-3 + : + $* <"for <<>EOO + 'for' + <<< + 'a' + 'x' + + EOO +} diff --git a/libbuild2/build/script/lexer.cxx b/libbuild2/build/script/lexer.cxx index d849ac9..5c13239 100644 --- a/libbuild2/build/script/lexer.cxx +++ b/libbuild2/build/script/lexer.cxx @@ -78,6 +78,19 @@ namespace build2 s2 = " "; break; } + case lexer_mode::for_loop: + { + // Leading tokens of the for-loop. Like command_line but + // recognizes colon as a separator and lsbrace like value. + // + // Note that while sensing the form of the for-loop (`for x:...` + // vs `for x <...`) we need to make sure that the pre-parsed token + // types are valid for the execution phase. + // + s1 = ":=!|&<> $(#\t\n"; + s2 = " == "; + break; + } default: { // Recognize special variable names ($>, $<, $~). @@ -109,6 +122,7 @@ namespace build2 case lexer_mode::first_token: case lexer_mode::second_token: case lexer_mode::variable_line: + case lexer_mode::for_loop: r = next_line (); break; default: return base_lexer::next (); @@ -141,7 +155,8 @@ namespace build2 // if (st.lsbrace) { - assert (m == lexer_mode::variable_line); + assert (m == lexer_mode::variable_line || + m == lexer_mode::for_loop); state_.top ().lsbrace = false; // Note: st is a copy. @@ -179,11 +194,20 @@ namespace build2 case '(': return make_token (type::lparen); } + if (m == lexer_mode::for_loop) + { + switch (c) + { + case ':': return make_token (type::colon); + } + } + // Command line operator/separators. // if (m == lexer_mode::command_line || m == lexer_mode::first_token || - m == lexer_mode::second_token) + m == lexer_mode::second_token || + m == lexer_mode::for_loop) { switch (c) { @@ -205,7 +229,8 @@ namespace build2 // if (m == lexer_mode::command_line || m == lexer_mode::first_token || - m == lexer_mode::second_token) + m == lexer_mode::second_token || + m == lexer_mode::for_loop) { if (optional t = next_cmd_op (c, sep)) return move (*t); diff --git a/libbuild2/build/script/lexer.hxx b/libbuild2/build/script/lexer.hxx index 646d3b9..313d80a 100644 --- a/libbuild2/build/script/lexer.hxx +++ b/libbuild2/build/script/lexer.hxx @@ -24,9 +24,10 @@ namespace build2 enum { command_line = base_type::value_next, - first_token, // Expires at the end of the token. - second_token, // Expires at the end of the token. - variable_line // Expires at the end of the line. + first_token, // Expires at the end of the token. + second_token, // Expires at the end of the token. + variable_line, // Expires at the end of the line. + for_loop // Used for sensing the for-loop leading tokens. }; lexer_mode () = default; diff --git a/libbuild2/build/script/lexer.test.cxx b/libbuild2/build/script/lexer.test.cxx index e496f94..d8733ba 100644 --- a/libbuild2/build/script/lexer.test.cxx +++ b/libbuild2/build/script/lexer.test.cxx @@ -35,6 +35,7 @@ namespace build2 else if (s == "second-token") m = lexer_mode::second_token; else if (s == "variable-line") m = lexer_mode::variable_line; else if (s == "variable") m = lexer_mode::variable; + else if (s == "for-loop") m = lexer_mode::for_loop; else assert (false); } diff --git a/libbuild2/build/script/parser+for.test.testscript b/libbuild2/build/script/parser+for.test.testscript index 877f958..c5f6587 100644 --- a/libbuild2/build/script/parser+for.test.testscript +++ b/libbuild2/build/script/parser+for.test.testscript @@ -16,7 +16,7 @@ cmd end EOI - buildfile:11:4: error: expected variable name instead of + buildfile:11:1: error: for: missing variable name EOE : untyped @@ -180,3 +180,461 @@ EOE } } + +: form-2 +: +: ... | for x +: +{ + : for + : + { + : status + : + $* <>EOE != 0 + echo 'a b' | for x != 0 + cmd + end + EOI + buildfile:11:20: error: for-loop exit code cannot be checked + EOE + + : not-last + : + $* <>EOE != 0 + echo 'a b' | for x | echo x + cmd + end + EOI + buildfile:11:20: error: for-loop must be last command in a pipe + EOE + + : not-last-relex + : + $* <>EOE != 0 + echo 'a b' | for x|echo x + cmd + end + EOI + buildfile:11:19: error: for-loop must be last command in a pipe + EOE + + : expression-after + : + $* <>EOE != 0 + echo 'a b' | for x && echo x + cmd + end + EOI + buildfile:11:20: error: command expression involving for-loop + EOE + + : expression-after-relex + : + $* <>EOE != 0 + echo 'a b' | for x&&echo x + cmd + end + EOI + buildfile:11:19: error: command expression involving for-loop + EOE + + : expression-before + : + $* <>EOE != 0 + echo 'a b' && echo x | for x + cmd + end + EOI + buildfile:11:24: error: command expression involving for-loop + EOE + + : expression-before-relex + : + $* <>EOE != 0 + echo 'a b' && echo x|for x + cmd + end + EOI + buildfile:11:22: error: command expression involving for-loop + EOE + + : cleanup + : + $* <>EOE != 0 + echo 'a b' | for x &f + cmd + end + EOI + buildfile:11:20: error: cleanup in for-loop + EOE + + : cleanup-relex + : + $* <>EOE != 0 + echo 'a b' | for x&f + cmd + end + EOI + buildfile:11:19: error: cleanup in for-loop + EOE + + : stdout-redirect + : + $* <>EOE != 0 + echo 'a b' | for x >a + cmd + end + EOI + buildfile:11:20: error: output redirect in for-loop + EOE + + : stdout-redirect-relex + : + $* <>EOE != 0 + echo 'a b' | for x>a + cmd + end + EOI + buildfile:11:19: error: output redirect in for-loop + EOE + + : stdin-redirect + : + $* <>EOE != 0 + echo 'a b' | for x >EOE != 0 + echo 'a b' | for + cmd + end + EOI + buildfile:11:1: error: for: missing variable name + EOE + + : untyped + : + $* <>EOO + echo 'a b' | for -w x + cmd $x + end + EOI + echo 'a b' | for -w x + EOO + + : expansion + : + $* <>EOO + vs = a b + echo $vs | for x + cmd $x + end + EOI + echo a b | for x + EOO + + : typed-var + : + $* <>EOO + echo 'a b' | for -w [dir_paths] x + cmd $x + end + EOI + echo 'a b' | for -w [dir_paths] x + EOO + } + + : end + : + { + : without-end + : + $* <>EOE != 0 + echo 'a b' | for x + cmd + EOI + buildfile:13:1: error: expected closing 'end' + EOE + } + + : elif + : + { + : without-if + : + $* <>EOE != 0 + echo 'a b' | for x + elif true + cmd + end + end + EOI + buildfile:12:3: error: 'elif' without preceding 'if' + EOE + } + + : nested + : + { + $* -l -r <>EOO + echo 'a b' | for x # 1 + cmd1 $x # 2 + if ($x == "a") # 3 + cmd2 # 4 + echo x y | for y # 5 + cmd3 # 6 + end + else + cmd4 # 7 + end + cmd5 # 8 + end + cmd6 # 9 + EOI + echo 'a b' | for x # 1 + cmd6 # 9 + EOO + } +} + +: form-3 +: +: for x <... +: +{ + : for + : + { + : status + : + $* <>EOE != 0 + for x >EOE != 0 + for x >EOE != 0 + for >EOE != 0 + for x >EOE != 0 + for >EOE != 0 + echo 'a b' && for x >EOE != 0 + for x >EOE != 0 + for &f x >EOE != 0 + for >EOE != 0 + for x >a + cmd + end + EOI + buildfile:11:7: error: output redirect in for-loop + EOE + + : stdout-redirect-before-var + : + $* <>EOE != 0 + for >a x + cmd + end + EOI + buildfile:11:5: error: output redirect in for-loop + EOE + + : stdout-redirect-relex + : + $* <>EOE != 0 + for x>a + cmd + end + EOI + buildfile:11:6: error: output redirect in for-loop + EOE + + : no-var + : + $* <>EOE != 0 + for >EOO + for -w x <'a b' + cmd $x + end + EOI + for -w x <'a b' + EOO + + : expansion + : + $* <>EOO + vs = a b + for x <$vs + cmd $x + end + EOI + for x b >EOO + for -w [dir_path] x <'a b' + cmd $x + end + EOI + for -w [dir_path] x <'a b' + EOO + } + + : end + : + { + : without-end + : + $* <>EOE != 0 + for x <'a b' + cmd + EOI + buildfile:13:1: error: expected closing 'end' + EOE + } + + : elif + : + { + : without-if + : + $* <>EOE != 0 + for x <'a b' + elif true + cmd + end + end + EOI + buildfile:12:3: error: 'elif' without preceding 'if' + EOE + } + + : nested + : + { + $* -l -r <>EOO + for -w x <'a b' # 1 + cmd1 $x # 2 + if ($x == "a") # 3 + cmd2 # 4 + for -w y <'x y' # 5 + cmd3 # 6 + end + else + cmd4 # 7 + end + cmd5 # 8 + end + cmd6 # 9 + EOI + for -w x <'a b' # 1 + cmd6 # 9 + EOO + } + + : contained + : + { + : eos + : + $* <>EOE != 0 + for x <'a b' + EOI + buildfile:12:1: error: expected closing 'end' + EOE + } +} diff --git a/libbuild2/build/script/parser.cxx b/libbuild2/build/script/parser.cxx index bcea7e0..035ab6b 100644 --- a/libbuild2/build/script/parser.cxx +++ b/libbuild2/build/script/parser.cxx @@ -205,10 +205,11 @@ namespace build2 // enter: next token is peeked at (type in tt) // leave: newline - assert (!fct || - *fct == line_type::cmd_if || - *fct == line_type::cmd_while || - *fct == line_type::cmd_for); + assert (!fct || + *fct == line_type::cmd_if || + *fct == line_type::cmd_while || + *fct == line_type::cmd_for_stream || + *fct == line_type::cmd_for_args); // Determine the line type/start token. // @@ -246,50 +247,107 @@ namespace build2 break; } - case line_type::cmd_for: + // + // See pre_parse_line_start() for details. + // + case line_type::cmd_for_args: assert (false); break; + case line_type::cmd_for_stream: { - // First take care of the variable name. + // First we need to sense the next few tokens and detect which + // form of the loop we are dealing with, the first (for x: ...) + // or the third (x <...) one. Note that the second form (... | for + // x) is handled separately. + // + // @@ Do we diagnose `... | for x: ...`? + // + // If the next token doesn't introduce a variable (doesn't start + // attributes and doesn't look like a variable name), then this is + // the third form. Otherwise, if colon follows the variable name, + // then this is the first form and the third form otherwise. + // + // Note that for the third form we will need to pass the 'for' + // token as a program name to the command expression parsing + // function since it will be gone from the token stream by that + // time. Thus, we save it. // - mode (lexer_mode::normal); + // Note also that in this model it won't be possible to support + // options in the first form. + // + token pt (t); + assert (pt.type == type::word && pt.value == "for"); + mode (lexer_mode::for_loop); next_with_attributes (t, tt); - attributes_push (t, tt); - if (tt != type::word || t.qtype != quote_type::unquoted) - fail (t) << "expected variable name instead of " << t; + // Note that we also consider special variable names (those that + // don't clash with the command line elements like redirects, etc) + // to later fail gracefully. + // + string& n (t.value); - const string& n (t.value); + if (tt == type::lsbrace || // Attributes. + (tt == type::word && // Variable name. + t.qtype == quote_type::unquoted && + (n[0] == '_' || alpha (n[0]) || n == "~"))) + { + attributes_push (t, tt); - if (special_variable (n)) - fail (t) << "attempt to set '" << n << "' variable directly"; + if (tt != type::word || t.qtype != quote_type::unquoted) + fail (t) << "expected variable name instead of " << t; - // We don't pre-enter variables. - // - ln.var = nullptr; + if (special_variable (n)) + fail (t) << "attempt to set '" << n << "' special variable"; - next (t, tt); + if (lexer_->peek_char ().first == ':') + lt = line_type::cmd_for_args; + } - if (tt != type::colon) + if (lt == line_type::cmd_for_stream) // for x <... { - // @@ TMP We will need to fallback to parsing the 'for x <...' - // form instead. + // At this point `t` contains the token that follows the `for` + // token and, potentially, the attributes. Now pre-parse the + // command expression in the command_line lexer mode starting + // from this position and also passing the 'for' token as a + // program name. // - fail (t) << "expected ':' instead of " << t - << " after variable name"; + // Note that the fact that the potential attributes are already + // parsed doesn't affect the command expression pre-parsing. + // Also note that they will be available during the execution + // phase being replayed. + // + expire_mode (); // Expire the for-loop lexer mode. + + parse_command_expr_result r ( + parse_command_expr (t, tt, + lexer::redirect_aliases, + move (pt))); + + assert (r.for_loop); + + if (tt != type::newline) + fail (t) << "expected newline instead of " << t; + + parse_here_documents (t, tt, r); } + else // for x: ... + { + next (t, tt); - expire_mode (); // Expire the normal lexer mode. + assert (tt == type::colon); - // Parse the value similar to the var line type (see above). - // - mode (lexer_mode::variable_line); - parse_variable_line (t, tt); + expire_mode (); // Expire the for-loop lexer mode. - if (tt != type::newline) - fail (t) << "expected newline instead of " << t << " after for"; + // Parse the value similar to the var line type (see above). + // + mode (lexer_mode::variable_line); + parse_variable_line (t, tt); - ++level_; + if (tt != type::newline) + fail (t) << "expected newline instead of " << t << " after for"; + } + ln.var = nullptr; + ++level_; break; } case line_type::cmd_elif: @@ -321,15 +379,24 @@ namespace build2 // Fall through. case line_type::cmd: { - pair p; + parse_command_expr_result r; if (lt != line_type::cmd_else && lt != line_type::cmd_end) - p = parse_command_expr (t, tt, lexer::redirect_aliases); + r = parse_command_expr (t, tt, lexer::redirect_aliases); + + if (r.for_loop) + { + lt = line_type::cmd_for_stream; + ln.var = nullptr; + + ++level_; + } if (tt != type::newline) fail (t) << "expected newline instead of " << t; - parse_here_documents (t, tt, p); + parse_here_documents (t, tt, r); + break; } } @@ -358,7 +425,8 @@ namespace build2 break; } case line_type::cmd_while: - case line_type::cmd_for: + case line_type::cmd_for_stream: + case line_type::cmd_for_args: { tt = peek (lexer_mode::first_token); @@ -396,7 +464,8 @@ namespace build2 break; } case line_type::cmd_while: - case line_type::cmd_for: + case line_type::cmd_for_stream: + case line_type::cmd_for_args: { fct = bt; break; @@ -466,7 +535,9 @@ namespace build2 // enter: peeked first token of next line (type in tt) // leave: newline - assert (lt == line_type::cmd_while || lt == line_type::cmd_for); + assert (lt == line_type::cmd_while || + lt == line_type::cmd_for_stream || + lt == line_type::cmd_for_args); // Parse lines until we see closing 'end'. // @@ -491,12 +562,12 @@ namespace build2 // assert (!pre_parse_); - pair p ( + parse_command_expr_result pr ( parse_command_expr (t, tt, lexer::redirect_aliases)); assert (tt == type::newline); - parse_here_documents (t, tt, p); + parse_here_documents (t, tt, pr); assert (tt == type::newline); // @@ Note that currently running programs via a runner (e.g., see @@ -509,7 +580,7 @@ namespace build2 // passed to the environment constructor, similar to passing the // script deadline. // - return move (p.first); + return move (pr.expr); } // @@ -1121,6 +1192,7 @@ namespace build2 auto exec_cmd = [this] (token& t, build2::script::token_type& tt, const iteration_index* ii, size_t li, bool single, + const function& cf, const location& ll) { // We use the 0 index to signal that this is the only command. @@ -1131,7 +1203,7 @@ namespace build2 command_expr ce ( parse_command_line (t, static_cast (tt))); - runner_->run (*environment_, ce, ii, li, ll); + runner_->run (*environment_, ce, ii, li, cf, ll); }; exec_lines (s.body, exec_cmd); @@ -1184,6 +1256,7 @@ namespace build2 build2::script::token_type& tt, const iteration_index* ii, size_t li, bool /* single */, + const function& cf, const location& ll) { // Note that we never reset the line index to zero (as we do in @@ -1282,14 +1355,17 @@ namespace build2 command_expr ce ( parse_command_line (t, static_cast (tt))); - // Verify that this expression executes the set builtin. + // Verify that this expression executes the set builtin or is a + // for-loop. // if (find_if (ce.begin (), ce.end (), - [] (const expr_term& et) + [&cf] (const expr_term& et) { const process_path& p (et.pipe.back ().program); return p.initial == nullptr && - p.recall.string () == "set"; + (p.recall.string () == "set" || + (cf != nullptr && + p.recall.string () == "for")); }) == ce.end ()) { const replay_tokens& rt (data.scr.depdb_preamble.back ().tokens); @@ -1301,7 +1377,7 @@ namespace build2 info (rt[0].location ()) << "depdb preamble ends here"; } - runner_->run (*environment_, ce, ii, li, ll); + runner_->run (*environment_, ce, ii, li, cf, ll); } }; @@ -2336,18 +2412,36 @@ namespace build2 { // Note: depdb is disallowed inside flow control constructs. // - string s; - build2::script::run (*environment_, - cmd, - nullptr /* iteration_index */, li, - ll, - !file ? &s : nullptr); - if (!file) { - iss.str (move (s)); + function cf ( + [&iss] + (build2::script::environment&, + const strings&, + auto_fd in, + bool pipe, + const optional& dl, + const command& deadline_cmd, + const location& ll) + { + iss.str (stream_read (move (in), + pipe, + dl, + deadline_cmd, + ll)); + }); + + build2::script::run (*environment_, + cmd, + nullptr /* iteration_index */, li, + ll, + cf, false /* last_cmd */); + iss.exceptions (istream::badbit); } + else + build2::script::run ( + *environment_, cmd, nullptr /* iteration_index */, li, ll); } ifdstream ifs (ifdstream::badbit); @@ -2490,7 +2584,8 @@ namespace build2 environment_->set_special_variables (a); } - // When add a special variable don't forget to update lexer::word(). + // When add a special variable don't forget to update lexer::word() and + // for-loop parsing in pre_parse_line(). // bool parser:: special_variable (const string& n) noexcept diff --git a/libbuild2/build/script/parser.test.cxx b/libbuild2/build/script/parser.test.cxx index 061f3f8..7e9c612 100644 --- a/libbuild2/build/script/parser.test.cxx +++ b/libbuild2/build/script/parser.test.cxx @@ -37,11 +37,32 @@ namespace build2 enter (environment&, const location&) override {} virtual void - run (environment&, + run (environment& env, const command_expr& e, const iteration_index* ii, size_t i, - const location&) override + const function& cf, + const location& ll) override { + // If the functions is specified, then just execute it with an empty + // stdin so it can perform the housekeeping (stop replaying tokens, + // increment line index, etc). + // + if (cf != nullptr) + { + assert (e.size () == 1 && !e[0].pipe.empty ()); + + const command& c (e[0].pipe.back ()); + + // Must be enforced by the caller. + // + assert (!c.out && !c.err && !c.exit); + + cf (env, c.arguments, + fdopen_null (), false /* pipe */, + nullopt /* deadline */, c, + ll); + } + cout << e; if (line_ || iterations_) diff --git a/libbuild2/build/script/runner.cxx b/libbuild2/build/script/runner.cxx index 157fc60..c52ef66 100644 --- a/libbuild2/build/script/runner.cxx +++ b/libbuild2/build/script/runner.cxx @@ -97,25 +97,28 @@ namespace build2 run (environment& env, const command_expr& expr, const iteration_index* ii, size_t li, + const function& cf, const location& ll) { if (verb >= 3) text << ": " << expr; // Run the expression if we are not in the dry-run mode or if it - // executes the set or exit builtin and just print the expression - // otherwise at verbosity level 2 and up. + // executes the set or exit builtin or it is a for-loop. Otherwise, + // just print the expression otherwise at verbosity level 2 and up. // if (!env.context.dry_run || find_if (expr.begin (), expr.end (), - [] (const expr_term& et) + [&cf] (const expr_term& et) { const process_path& p (et.pipe.back ().program); return p.initial == nullptr && (p.recall.string () == "set" || - p.recall.string () == "exit"); + p.recall.string () == "exit" || + (cf != nullptr && + p.recall.string () == "for")); }) != expr.end ()) - build2::script::run (env, expr, ii, li, ll); + build2::script::run (env, expr, ii, li, ll, cf); else if (verb >= 2) text << expr; } diff --git a/libbuild2/build/script/runner.hxx b/libbuild2/build/script/runner.hxx index 0652396..ec8a948 100644 --- a/libbuild2/build/script/runner.hxx +++ b/libbuild2/build/script/runner.hxx @@ -32,10 +32,14 @@ namespace build2 // Location is the start position of this command line in the script. // It can be used in diagnostics. // + // Optionally, execute the specified function instead of the last + // pipe command. + // virtual void run (environment&, const command_expr&, const iteration_index*, size_t index, + const function&, const location&) = 0; virtual bool @@ -66,6 +70,7 @@ namespace build2 run (environment&, const command_expr&, const iteration_index*, size_t, + const function&, const location&) override; virtual bool diff --git a/libbuild2/build/script/script.cxx b/libbuild2/build/script/script.cxx index 2e777b4..9d9b5a8 100644 --- a/libbuild2/build/script/script.cxx +++ b/libbuild2/build/script/script.cxx @@ -156,7 +156,7 @@ namespace build2 } void environment:: - set_variable (string&& nm, + set_variable (string nm, names&& val, const string& attrs, const location& ll) diff --git a/libbuild2/build/script/script.hxx b/libbuild2/build/script/script.hxx index 2c5e6e0..ec27781 100644 --- a/libbuild2/build/script/script.hxx +++ b/libbuild2/build/script/script.hxx @@ -24,11 +24,13 @@ namespace build2 using build2::script::lines; using build2::script::redirect; using build2::script::redirect_type; + using build2::script::command; using build2::script::expr_term; using build2::script::command_expr; using build2::script::iteration_index; using build2::script::deadline; using build2::script::timeout; + using build2::script::command_function; // Forward declarations. // @@ -166,7 +168,7 @@ namespace build2 size_t exec_line = 1; virtual void - set_variable (string&& name, + set_variable (string name, names&&, const string& attrs, const location&) override; diff --git a/libbuild2/script/builtin-options.cxx b/libbuild2/script/builtin-options.cxx index 8e15ddd..9b4067b 100644 --- a/libbuild2/script/builtin-options.cxx +++ b/libbuild2/script/builtin-options.cxx @@ -1076,6 +1076,297 @@ namespace build2 return r; } + + // for_options + // + + for_options:: + for_options () + : exact_ (), + newline_ (), + whitespace_ () + { + } + + for_options:: + for_options (int& argc, + char** argv, + bool erase, + ::build2::build::cli::unknown_mode opt, + ::build2::build::cli::unknown_mode arg) + : exact_ (), + newline_ (), + whitespace_ () + { + ::build2::build::cli::argv_scanner s (argc, argv, erase); + _parse (s, opt, arg); + } + + for_options:: + for_options (int start, + int& argc, + char** argv, + bool erase, + ::build2::build::cli::unknown_mode opt, + ::build2::build::cli::unknown_mode arg) + : exact_ (), + newline_ (), + whitespace_ () + { + ::build2::build::cli::argv_scanner s (start, argc, argv, erase); + _parse (s, opt, arg); + } + + for_options:: + for_options (int& argc, + char** argv, + int& end, + bool erase, + ::build2::build::cli::unknown_mode opt, + ::build2::build::cli::unknown_mode arg) + : exact_ (), + newline_ (), + whitespace_ () + { + ::build2::build::cli::argv_scanner s (argc, argv, erase); + _parse (s, opt, arg); + end = s.end (); + } + + for_options:: + for_options (int start, + int& argc, + char** argv, + int& end, + bool erase, + ::build2::build::cli::unknown_mode opt, + ::build2::build::cli::unknown_mode arg) + : exact_ (), + newline_ (), + whitespace_ () + { + ::build2::build::cli::argv_scanner s (start, argc, argv, erase); + _parse (s, opt, arg); + end = s.end (); + } + + for_options:: + for_options (::build2::build::cli::scanner& s, + ::build2::build::cli::unknown_mode opt, + ::build2::build::cli::unknown_mode arg) + : exact_ (), + newline_ (), + whitespace_ () + { + _parse (s, opt, arg); + } + + typedef + std::map + _cli_for_options_map; + + static _cli_for_options_map _cli_for_options_map_; + + struct _cli_for_options_map_init + { + _cli_for_options_map_init () + { + _cli_for_options_map_["--exact"] = + &::build2::build::cli::thunk< for_options, &for_options::exact_ >; + _cli_for_options_map_["-e"] = + &::build2::build::cli::thunk< for_options, &for_options::exact_ >; + _cli_for_options_map_["--newline"] = + &::build2::build::cli::thunk< for_options, &for_options::newline_ >; + _cli_for_options_map_["-n"] = + &::build2::build::cli::thunk< for_options, &for_options::newline_ >; + _cli_for_options_map_["--whitespace"] = + &::build2::build::cli::thunk< for_options, &for_options::whitespace_ >; + _cli_for_options_map_["-w"] = + &::build2::build::cli::thunk< for_options, &for_options::whitespace_ >; + } + }; + + static _cli_for_options_map_init _cli_for_options_map_init_; + + bool for_options:: + _parse (const char* o, ::build2::build::cli::scanner& s) + { + _cli_for_options_map::const_iterator i (_cli_for_options_map_.find (o)); + + if (i != _cli_for_options_map_.end ()) + { + (*(i->second)) (*this, s); + return true; + } + + return false; + } + + bool for_options:: + _parse (::build2::build::cli::scanner& s, + ::build2::build::cli::unknown_mode opt_mode, + ::build2::build::cli::unknown_mode arg_mode) + { + // Can't skip combined flags (--no-combined-flags). + // + assert (opt_mode != ::build2::build::cli::unknown_mode::skip); + + bool r = false; + bool opt = true; + + while (s.more ()) + { + const char* o = s.peek (); + + if (std::strcmp (o, "--") == 0) + { + opt = false; + s.skip (); + r = true; + continue; + } + + if (opt) + { + if (_parse (o, s)) + { + r = true; + continue; + } + + if (std::strncmp (o, "-", 1) == 0 && o[1] != '\0') + { + // Handle combined option values. + // + std::string co; + if (const char* v = std::strchr (o, '=')) + { + co.assign (o, 0, v - o); + ++v; + + int ac (2); + char* av[] = + { + const_cast (co.c_str ()), + const_cast (v) + }; + + ::build2::build::cli::argv_scanner ns (0, ac, av); + + if (_parse (co.c_str (), ns)) + { + // Parsed the option but not its value? + // + if (ns.end () != 2) + throw ::build2::build::cli::invalid_value (co, v); + + s.next (); + r = true; + continue; + } + else + { + // Set the unknown option and fall through. + // + o = co.c_str (); + } + } + + // Handle combined flags. + // + char cf[3]; + { + const char* p = o + 1; + for (; *p != '\0'; ++p) + { + if (!((*p >= 'a' && *p <= 'z') || + (*p >= 'A' && *p <= 'Z') || + (*p >= '0' && *p <= '9'))) + break; + } + + if (*p == '\0') + { + for (p = o + 1; *p != '\0'; ++p) + { + std::strcpy (cf, "-"); + cf[1] = *p; + cf[2] = '\0'; + + int ac (1); + char* av[] = + { + cf + }; + + ::build2::build::cli::argv_scanner ns (0, ac, av); + + if (!_parse (cf, ns)) + break; + } + + if (*p == '\0') + { + // All handled. + // + s.next (); + r = true; + continue; + } + else + { + // Set the unknown option and fall through. + // + o = cf; + } + } + } + + switch (opt_mode) + { + case ::build2::build::cli::unknown_mode::skip: + { + s.skip (); + r = true; + continue; + } + case ::build2::build::cli::unknown_mode::stop: + { + break; + } + case ::build2::build::cli::unknown_mode::fail: + { + throw ::build2::build::cli::unknown_option (o); + } + } + + break; + } + } + + switch (arg_mode) + { + case ::build2::build::cli::unknown_mode::skip: + { + s.skip (); + r = true; + continue; + } + case ::build2::build::cli::unknown_mode::stop: + { + break; + } + case ::build2::build::cli::unknown_mode::fail: + { + throw ::build2::build::cli::unknown_argument (o); + } + } + + break; + } + + return r; + } } } diff --git a/libbuild2/script/builtin-options.hxx b/libbuild2/script/builtin-options.hxx index c7cebbc..9361d18 100644 --- a/libbuild2/script/builtin-options.hxx +++ b/libbuild2/script/builtin-options.hxx @@ -253,6 +253,90 @@ namespace build2 vector clear_; bool clear_specified_; }; + + class for_options + { + public: + for_options (); + + for_options (int& argc, + char** argv, + bool erase = false, + ::build2::build::cli::unknown_mode option = ::build2::build::cli::unknown_mode::fail, + ::build2::build::cli::unknown_mode argument = ::build2::build::cli::unknown_mode::stop); + + for_options (int start, + int& argc, + char** argv, + bool erase = false, + ::build2::build::cli::unknown_mode option = ::build2::build::cli::unknown_mode::fail, + ::build2::build::cli::unknown_mode argument = ::build2::build::cli::unknown_mode::stop); + + for_options (int& argc, + char** argv, + int& end, + bool erase = false, + ::build2::build::cli::unknown_mode option = ::build2::build::cli::unknown_mode::fail, + ::build2::build::cli::unknown_mode argument = ::build2::build::cli::unknown_mode::stop); + + for_options (int start, + int& argc, + char** argv, + int& end, + bool erase = false, + ::build2::build::cli::unknown_mode option = ::build2::build::cli::unknown_mode::fail, + ::build2::build::cli::unknown_mode argument = ::build2::build::cli::unknown_mode::stop); + + for_options (::build2::build::cli::scanner&, + ::build2::build::cli::unknown_mode option = ::build2::build::cli::unknown_mode::fail, + ::build2::build::cli::unknown_mode argument = ::build2::build::cli::unknown_mode::stop); + + // Option accessors and modifiers. + // + const bool& + exact () const; + + bool& + exact (); + + void + exact (const bool&); + + const bool& + newline () const; + + bool& + newline (); + + void + newline (const bool&); + + const bool& + whitespace () const; + + bool& + whitespace (); + + void + whitespace (const bool&); + + // Implementation details. + // + protected: + bool + _parse (const char*, ::build2::build::cli::scanner&); + + private: + bool + _parse (::build2::build::cli::scanner&, + ::build2::build::cli::unknown_mode option, + ::build2::build::cli::unknown_mode argument); + + public: + bool exact_; + bool newline_; + bool whitespace_; + }; } } diff --git a/libbuild2/script/builtin-options.ixx b/libbuild2/script/builtin-options.ixx index 8f84177..575eb95 100644 --- a/libbuild2/script/builtin-options.ixx +++ b/libbuild2/script/builtin-options.ixx @@ -153,6 +153,63 @@ namespace build2 { this->clear_specified_ = x; } + + // for_options + // + + inline const bool& for_options:: + exact () const + { + return this->exact_; + } + + inline bool& for_options:: + exact () + { + return this->exact_; + } + + inline void for_options:: + exact (const bool& x) + { + this->exact_ = x; + } + + inline const bool& for_options:: + newline () const + { + return this->newline_; + } + + inline bool& for_options:: + newline () + { + return this->newline_; + } + + inline void for_options:: + newline (const bool& x) + { + this->newline_ = x; + } + + inline const bool& for_options:: + whitespace () const + { + return this->whitespace_; + } + + inline bool& for_options:: + whitespace () + { + return this->whitespace_; + } + + inline void for_options:: + whitespace (const bool& x) + { + this->whitespace_ = x; + } } } diff --git a/libbuild2/script/builtin.cli b/libbuild2/script/builtin.cli index 50dd3a0..c993983 100644 --- a/libbuild2/script/builtin.cli +++ b/libbuild2/script/builtin.cli @@ -30,5 +30,12 @@ namespace build2 vector --unset|-u; vector --clear|-c; }; + + class for_options + { + bool --exact|-e; + bool --newline|-n; + bool --whitespace|-w; + }; } } diff --git a/libbuild2/script/parser.cxx b/libbuild2/script/parser.cxx index 536821b..7989c20 100644 --- a/libbuild2/script/parser.cxx +++ b/libbuild2/script/parser.cxx @@ -4,10 +4,13 @@ #include #include // strchr() +#include #include -#include // exit + +#include // exit, stream_reader #include +#include using namespace std; @@ -140,18 +143,20 @@ namespace build2 return nullopt; } - pair parser:: + parser::parse_command_expr_result parser:: parse_command_expr (token& t, type& tt, - const redirect_aliases& ra) + const redirect_aliases& ra, + optional&& program) { - // enter: first token of the command line + // enter: first (or second, if program) token of the command line // leave: or unknown token command_expr expr; // OR-ed to an implied false for the first term. // - expr.push_back ({expr_operator::log_or, command_pipe ()}); + if (!pre_parse_) + expr.push_back ({expr_operator::log_or, command_pipe ()}); command c; // Command being assembled. @@ -218,8 +223,8 @@ namespace build2 // Add the next word to either one of the pending positions or to // program arguments by default. // - auto add_word = [&c, &p, &mod, &check_regex_mod, this] ( - string&& w, const location& l) + auto add_word = [&c, &p, &mod, &check_regex_mod, this] + (string&& w, const location& l) { auto add_merge = [&l, this] (optional& r, const string& w, @@ -697,11 +702,30 @@ namespace build2 const location ll (get_location (t)); // Line location. // Keep parsing chunks of the command line until we see one of the - // "terminators" (newline, exit status comparison, etc). + // "terminators" (newline or unknown/unexpected token). // location l (ll); names ns; // Reuse to reduce allocations. + bool for_loop (false); + + if (program) + { + assert (program->type == type::word); + + // Note that here we skip all the parse_program() business since the + // program can only be one of the specially-recognized names. + // + if (program->value == "for") + for_loop = true; + else + assert (false); // Must be specially-recognized program. + + // Save the program name and continue parsing as a command. + // + add_word (move (program->value), get_location (*program)); + } + for (bool done (false); !done; l = get_location (t)) { tt = ra.resolve (tt); @@ -717,6 +741,9 @@ namespace build2 case type::equal: case type::not_equal: { + if (for_loop) + fail (l) << "for-loop exit code cannot be checked"; + if (!pre_parse_) check_pending (l); @@ -747,30 +774,39 @@ namespace build2 } case type::pipe: + if (for_loop) + fail (l) << "for-loop must be last command in a pipe"; + // Fall through. + case type::log_or: case type::log_and: + if (for_loop) + fail (l) << "command expression involving for-loop"; + // Fall through. - case type::in_pass: - case type::out_pass: + case type::clean: + if (for_loop) + fail (l) << "cleanup in for-loop"; + // Fall through. - case type::in_null: + case type::out_pass: case type::out_null: - case type::out_trace: - case type::out_merge: - - case type::in_str: - case type::in_doc: case type::out_str: case type::out_doc: - - case type::in_file: case type::out_file_cmp: case type::out_file_ovr: case type::out_file_app: + if (for_loop) + fail (l) << "output redirect in for-loop"; + // Fall through. - case type::clean: + case type::in_pass: + case type::in_null: + case type::in_str: + case type::in_doc: + case type::in_file: { if (pre_parse_) { @@ -968,6 +1004,42 @@ namespace build2 next (t, tt); break; } + case type::lsbrace: + { + // Recompose the attributes into a single command argument. + // + assert (!pre_parse_); + + attributes_push (t, tt, true /* standalone */); + + attributes as (attributes_pop ()); + assert (!as.empty ()); + + ostringstream os; + names storage; + char c ('['); + for (const attribute& a: as) + { + os << c << a.name; + + if (!a.value.null) + { + os << '='; + + storage.clear (); + to_stream (os, + reverse (a.value, storage), + quote_mode::normal, + '@'); + } + + c = ','; + } + os << ']'; + + add_word (os.str (), l); + break; + } default: { // Bail out if this is one of the unknown tokens. @@ -1053,16 +1125,33 @@ namespace build2 bool prog (p == pending::program_first || p == pending::program_next); - // Check if this is the env pseudo-builtin. + // Check if this is the env pseudo-builtin or the for-loop. // bool env (false); - if (prog && tt == type::word && t.value == "env") + if (prog && tt == type::word) { - parsed_env r (parse_env_builtin (t, tt)); - c.cwd = move (r.cwd); - c.variables = move (r.variables); - c.timeout = r.timeout; - env = true; + if (t.value == "env") + { + parsed_env r (parse_env_builtin (t, tt)); + c.cwd = move (r.cwd); + c.variables = move (r.variables); + c.timeout = r.timeout; + env = true; + } + else if (t.value == "for") + { + if (expr.size () > 1) + fail (l) << "command expression involving for-loop"; + + for_loop = true; + + // Save 'for' as a program name and continue parsing as a + // command. + // + add_word (move (t.value), l); + next (t, tt); + continue; + } } // Parse the next chunk as names to get expansion, etc. Note that @@ -1243,9 +1332,16 @@ namespace build2 switch (tt) { case type::pipe: + if (for_loop) + fail (l) << "for-loop must be last command in a pipe"; + // Fall through. + case type::log_or: case type::log_and: { + if (for_loop) + fail (l) << "command expression involving for-loop"; + // Check that the previous command makes sense. // check_command (l, tt != type::pipe); @@ -1265,30 +1361,11 @@ namespace build2 break; } - case type::in_pass: - case type::out_pass: - - case type::in_null: - case type::out_null: - - case type::out_trace: - - case type::out_merge: - - case type::in_str: - case type::out_str: - - case type::in_file: - case type::out_file_cmp: - case type::out_file_ovr: - case type::out_file_app: - { - parse_redirect (move (t), tt, l); - break; - } - case type::clean: { + if (for_loop) + fail (l) << "cleanup in for-loop"; + parse_clean (t); break; } @@ -1299,6 +1376,27 @@ namespace build2 fail (l) << "here-document redirect in expansion"; break; } + + case type::out_pass: + case type::out_null: + case type::out_trace: + case type::out_merge: + case type::out_str: + case type::out_file_cmp: + case type::out_file_ovr: + case type::out_file_app: + if (for_loop) + fail (l) << "output redirect in for-loop"; + // Fall through. + + case type::in_pass: + case type::in_null: + case type::in_str: + case type::in_file: + { + parse_redirect (move (t), tt, l); + break; + } } } @@ -1326,7 +1424,7 @@ namespace build2 expr.back ().pipe.push_back (move (c)); } - return make_pair (move (expr), move (hd)); + return parse_command_expr_result {move (expr), move (hd), for_loop}; } parser::parsed_env parser:: @@ -1575,7 +1673,7 @@ namespace build2 void parser:: parse_here_documents (token& t, type& tt, - pair& p) + parse_command_expr_result& pr) { // enter: newline // leave: newline @@ -1583,7 +1681,7 @@ namespace build2 // Parse here-document fragments in the order they were mentioned on // the command line. // - for (here_doc& h: p.second) + for (here_doc& h: pr.docs) { // Switch to the here-line mode which is like single/double-quoted // string but recognized the newline as a separator. @@ -1603,7 +1701,7 @@ namespace build2 { auto i (h.redirects.cbegin ()); - command& c (p.first[i->expr].pipe[i->pipe]); + command& c (pr.expr[i->expr].pipe[i->pipe]); optional& r (i->fd == 0 ? c.in : i->fd == 1 ? c.out : @@ -1635,7 +1733,7 @@ namespace build2 // for (++i; i != h.redirects.cend (); ++i) { - command& c (p.first[i->expr].pipe[i->pipe]); + command& c (pr.expr[i->expr].pipe[i->pipe]); optional& ir (i->fd == 0 ? c.in : i->fd == 1 ? c.out : @@ -2062,7 +2160,7 @@ namespace build2 else if (n == "elif!") r = line_type::cmd_elifn; else if (n == "else") r = line_type::cmd_else; else if (n == "while") r = line_type::cmd_while; - else if (n == "for") r = line_type::cmd_for; + else if (n == "for") r = line_type::cmd_for_stream; else if (n == "end") r = line_type::cmd_end; else { @@ -2136,10 +2234,11 @@ namespace build2 { line_type lt (j->type); - if (lt == line_type::cmd_if || - lt == line_type::cmd_ifn || - lt == line_type::cmd_while || - lt == line_type::cmd_for) + if (lt == line_type::cmd_if || + lt == line_type::cmd_ifn || + lt == line_type::cmd_while || + lt == line_type::cmd_for_stream || + lt == line_type::cmd_for_args) ++n; // If we are nested then we just wait until we get back @@ -2164,10 +2263,8 @@ namespace build2 if (skip) { - // Note that we don't count else and end as commands. - // - // @@ Note that for the for-loop's second and third forms - // will probably need to increment li. + // Note that we don't count else, end, and 'for x: ...' as + // commands. // switch (lt) { @@ -2176,8 +2273,9 @@ namespace build2 case line_type::cmd_ifn: case line_type::cmd_elif: case line_type::cmd_elifn: - case line_type::cmd_while: ++li; break; - default: break; + case line_type::cmd_for_stream: + case line_type::cmd_while: ++li; break; + default: break; } } } @@ -2221,7 +2319,10 @@ namespace build2 single = true; } - exec_cmd (t, tt, ii, li++, single, ll); + exec_cmd (t, tt, + ii, li++, single, + nullptr /* command_function */, + ll); replay_stop (); break; @@ -2339,7 +2440,147 @@ namespace build2 break; } - case line_type::cmd_for: + case line_type::cmd_for_stream: + { + // The for-loop construct end. Set on the first iteration. + // + lines::const_iterator fe (e); + + // Let's "wrap up" all the required data into the single object + // to rely on the "small function object" optimization. + // + struct + { + lines::const_iterator i; + lines::const_iterator e; + const function& exec_set; + const function& exec_cmd; + const function& exec_cond; + const function& exec_for; + const iteration_index* ii; + size_t& li; + variable_pool* var_pool; + decltype (fcend)& fce; + lines::const_iterator& fe; + } d {i, e, + exec_set, exec_cmd, exec_cond, exec_for, + ii, li, + var_pool, + fcend, + fe}; + + function cf ( + [&d, this] + (environment& env, + const strings& args, + auto_fd in, + bool pipe, + const optional& dl, + const command& deadline_cmd, + const location& ll) + { + namespace cli = build2::build::cli; + + try + { + // Parse arguments. + // + cli::vector_scanner scan (args); + for_options ops (scan); + + // Note: diagnostics consistent with the set builtin. + // + if (ops.whitespace () && ops.newline ()) + fail (ll) << "for: both -n|--newline and " + << "-w|--whitespace specified"; + + if (!scan.more ()) + fail (ll) << "for: missing variable name"; + + // Either attributes or variable name. + // + string a (scan.next ()); + const string* ats (!scan.more () ? nullptr : &a); + string vname (!scan.more () ? move (a) : scan.next ()); + + if (scan.more ()) + fail (ll) << "for: unexpected argument '" + << scan.next () << "'"; + + if (ats != nullptr && ats->empty ()) + fail (ll) << "for: empty variable attributes"; + + if (vname.empty ()) + fail (ll) << "for: empty variable name"; + + // Let's also diagnose the `... | for x:...` misuse which + // can probably be quite common. + // + if (vname.find (':') != string::npos) + fail (ll) << "for: ':' after variable name"; + + stream_reader sr ( + move (in), pipe, + ops.whitespace (), ops.newline (), ops.exact (), + dl, deadline_cmd, + ll); + + // Since the command pipe is parsed, we can stop + // replaying. Note that we should do this before calling + // exec_lines() for the loop body. Also note that we + // should increment the line index before that. + // + replay_stop (); + + size_t fli (++d.li); + iteration_index fi {1, d.ii}; + + for (optional s; (s = sr.next ()); ) + { + d.li = fli; + + // Don't move from the variable name since it is used on + // each iteration. + // + env.set_variable (vname, + names {name (move (*s))}, + ats != nullptr ? *ats : empty_string, + ll); + + // Find the construct end, if it is not found yet. + // + if (d.fe == d.e) + d.fe = d.fce (d.i, true, false); + + if (!exec_lines (d.i + 1, d.fe, + d.exec_set, + d.exec_cmd, + d.exec_cond, + d.exec_for, + &fi, d.li, + d.var_pool)) + { + throw exit (true); + } + + fi.index++; + } + } + catch (const cli::exception& e) + { + fail (ll) << "for: " << e; + } + }); + + exec_cmd (t, tt, ii, li, false /* single */, cf, ll); + + // Position to construct end. + // + i = (fe != e ? fe : fcend (i, true, true)); + + break; + } + case line_type::cmd_for_args: { // Parse the variable name with the potential attributes. // diff --git a/libbuild2/script/parser.hxx b/libbuild2/script/parser.hxx index 3a4c46f..c402d3e 100644 --- a/libbuild2/script/parser.hxx +++ b/libbuild2/script/parser.hxx @@ -97,15 +97,34 @@ namespace build2 }; using here_docs = vector; - pair - parse_command_expr (token&, token_type&, const redirect_aliases&); + struct parse_command_expr_result + { + command_expr expr; // Single pipe for the for-loop. + here_docs docs; + bool for_loop = false; + + parse_command_expr_result () = default; + + parse_command_expr_result (command_expr&& e, + here_docs&& h, + bool f) + : expr (move (e)), docs (move (h)), for_loop (f) {} + }; + + // Pass the first special command program name (token_type::word) if it + // is already pre-parsed. + // + parse_command_expr_result + parse_command_expr (token&, token_type&, + const redirect_aliases&, + optional&& program = nullopt); command_exit parse_command_exit (token&, token_type&); void parse_here_documents (token&, token_type&, - pair&); + parse_command_expr_result&); struct parsed_doc { @@ -135,6 +154,11 @@ namespace build2 // the first two tokens. Use the specified lexer mode to peek the second // token. // + // Always return the cmd_for_stream line type for the for-loop. Note + // that the for-loop form cannot be detected easily, based on the first + // two tokens. Also note that the detection can be specific for the + // script implementation (custom lexing mode, special variables, etc). + // line_type pre_parse_line_start (token&, token_type&, lexer_mode); @@ -169,6 +193,7 @@ namespace build2 using exec_cmd_function = void (token&, token_type&, const iteration_index*, size_t li, bool single, + const function&, const location&); using exec_cond_function = bool (token&, token_type&, diff --git a/libbuild2/script/run.cxx b/libbuild2/script/run.cxx index 81abdab..b7f3314 100644 --- a/libbuild2/script/run.cxx +++ b/libbuild2/script/run.cxx @@ -9,7 +9,8 @@ # include // DBG_TERMINATE_PROCESS #endif -#include // streamsize +#include // streamsize +#include // strchr() #include #include @@ -971,81 +972,201 @@ namespace build2 : path (c.program.recall_string ()); } - // Read out the stream content into a string. Throw io_error on the - // underlying OS error. - // - // If the execution deadline is specified, then turn the stream into the - // non-blocking mode reading its content in chunks and with a single - // operation otherwise. If the specified deadline is reached while - // reading the stream, then bail out for the successful deadline and - // fail otherwise. Note that in the former case the result will be - // incomplete, but we leave it to the caller to handle that. - // - // Note that on Windows we can only turn pipe file descriptors into the - // non-blocking mode. Thus, we have no choice but to read from - // descriptors of other types synchronously there. That implies that we - // can potentially block indefinitely reading a file and missing the - // deadline on Windows. Note though, that the user can normally rewrite - // the command, for example, `set foo <<& dl, - const command& deadline_cmd, - const location& ll) + bool ws, bool nl, bool ex, + const optional& dl, + const command& dc, + const location& l) + : whitespace_ (ws), + newline_ (nl), + exact_ (ex), + deadline_cmd_ (dc), + location_ (l) { - string r; - ifdstream cin; - #ifndef _WIN32 if (dl) #else if (dl && pipe) #endif { - fdselect_set fds {in.get ()}; - cin.open (move (in), fdstream_mode::non_blocking); + is_.open (move (in), fdstream_mode::non_blocking); + deadline_ = dl; + } + else + is_.open (move (in)); + } - const timestamp& dlt (dl->value); + optional stream_reader:: + next () + { + if (!is_.is_open ()) + return nullopt; + + // If eos is not reached, then read and return a character. Otherwise + // close the stream and return nullopt. If the deadline is specified and + // is reached, then return nullopt for the successful deadline (as if + // eof is reached) and fail otherwise. + // + // Set the empty_ flag to false after the first character is read. + // + auto get = [this] () -> optional + { + char r; - for (char buf[4096];; ) + if (deadline_) // Reading a character in the non-blocking mode. { - timestamp now (system_clock::now ()); + fdselect_set fds {is_.fd ()}; - if (dlt <= now || ifdselect (fds, dlt - now) == 0) + // Only fallback to ifdselect() if there is no character immediately + // available. + // + for (;;) { - if (!dl->success) - fail (ll) << cmd_path (deadline_cmd) - << " terminated: execution timeout expired"; - else + streamsize n (is_.readsome (&r, 1)); + + if (n == 1) break; + + if (is_.eof ()) + { + is_.close (); + return nullopt; + } + + const timestamp& dlt (deadline_->value); + timestamp now (system_clock::now ()); + + if (dlt <= now || ifdselect (fds, dlt - now) == 0) + { + is_.close (); + + if (!deadline_->success) + fail (location_) << cmd_path (deadline_cmd_) + << " terminated: execution timeout expired"; + else + return nullopt; + } + } + } + else // Reading a character in the blocking mode. + { + if (is_.peek () == ifdstream::traits_type::eof ()) + { + is_.close (); + return nullopt; } - streamsize n (cin.readsome (buf, sizeof (buf))); + is_.get (r); + } - // Bail out if eos is reached. - // - if (n == 0) - break; + empty_ = false; + return r; + }; + + if (whitespace_) // The whitespace mode. + { + const char* sep (" \n\r\t"); + + // Note that we collapse multiple consecutive whitespaces. + // + optional c; + + // Skip the whitespaces. + // + while ((c = get ()) && strchr (sep, *c) != nullptr) ; - r.append (buf, n); + // Bail out for the trailing whitespace(s) or an empty stream. + // + if (!c) + { + // Return the trailing "blank" after the trailing whitespaces in the + // exact mode, unless the stream is empty. + // + return exact_ && !empty_ ? empty_string : optional (); } + + // Read the word until eof or a whitespace character is encountered. + // + string r (1, *c); + while ((c = get ()) && strchr (sep, *c) == nullptr) + r += *c; + + return optional (move (r)); } - else + else // The newline or no-split mode. { - cin.open (move (in)); - r = cin.read_text (); - } + // Note that we don't collapse multiple consecutive newlines. + // + // Note also that we always sanitize CRs, so in the no-split mode we + // need to loop rather than read the whole text at once. + // + optional r; - cin.close (); + do + { + string l; + optional c; - return r; + // Read the line until eof or newline character is encountered. + // + while ((c = get ()) && *c != '\n') + l += *c; + + // Strip the trailing CRs that can appear while, for example, + // cross-testing Windows target or as a part of msvcrt junk + // production (see above). + // + while (!l.empty () && l.back () == '\r') + l.pop_back (); + + // Append the line. + // + if (!l.empty () || // Non-empty. + c || // Empty, non-trailing. + (exact_ && // Empty, trailing, in the exact mode for + !empty_)) // non-empty stream. + { + if (newline_ || !r) + { + r = move (l); + } + else + { + *r += '\n'; + *r += l; + } + } + } + while (!newline_ && is_.is_open ()); + + return r; + } + } + + string + stream_read (auto_fd&& in, + bool pipe, + const optional& dl, + const command& dc, + const location& ll) + { + stream_reader sr (move (in), + pipe, + false /* whitespace */, + false /* newline */, + true /* exact */, + dl, + dc, + ll); + + optional s (sr.next ()); + return s ? move (*s) : empty_string; } // The set pseudo-builtin: set variable from the stdin input. @@ -1087,87 +1208,17 @@ namespace build2 if (vname.empty ()) fail (ll) << "set: empty variable name"; - // Read out the stream content into a string while keeping an eye on - // the deadline. - // - string s (read (move (in), pipe, dl, deadline_cmd, ll)); + stream_reader sr (move (in), pipe, + ops.whitespace (), ops.newline (), ops.exact (), + dl, deadline_cmd, + ll); // Parse the stream content into the variable value. // names ns; - if (!s.empty ()) - { - if (ops.whitespace ()) // The whitespace mode. - { - // Note that we collapse multiple consecutive whitespaces. - // - for (size_t p (0); p != string::npos; ) - { - // Skip the whitespaces. - // - const char* sep (" \n\r\t"); - size_t b (s.find_first_not_of (sep, p)); - - if (b != string::npos) // Word beginning. - { - size_t e (s.find_first_of (sep, b)); // Find the word end. - ns.emplace_back (string (s, b, e != string::npos ? e - b : e)); - - p = e; - } - else // Trailings whitespaces. - { - // Append the trailing "blank" after the trailing whitespaces - // in the exact mode. - // - if (ops.exact ()) - ns.emplace_back (empty_string); - - // Bail out since the end of the string is reached. - // - break; - } - } - } - else // The newline or no-split mode. - { - // Note that we don't collapse multiple consecutive newlines. - // - // Note also that we always sanitize CRs so this loop is always - // needed. - // - for (size_t p (0); p != string::npos; ) - { - size_t e (s.find ('\n', p)); - string l (s, p, e != string::npos ? e - p : e); - - // Strip the trailing CRs that can appear while, for example, - // cross-testing Windows target or as a part of msvcrt junk - // production (see above). - // - while (!l.empty () && l.back () == '\r') - l.pop_back (); - - // Append the line. - // - if (!l.empty () || // Non-empty. - e != string::npos || // Empty, non-trailing. - ops.exact ()) // Empty, trailing, in the exact mode. - { - if (ops.newline () || ns.empty ()) - ns.emplace_back (move (l)); - else - { - ns[0].value += '\n'; - ns[0].value += l; - } - } - - p = e != string::npos ? e + 1 : e; - } - } - } + for (optional s; (s = sr.next ()); ) + ns.emplace_back (move (*s)); env.set_variable (move (vname), move (ns), @@ -1242,7 +1293,7 @@ namespace build2 const iteration_index* ii, size_t li, size_t ci, const location& ll, bool diag, - string* output, + const function& cf, bool last_cmd, optional dl = nullopt, const command* dl_cmd = nullptr, // env -t pipe_command* prev_cmd = nullptr) @@ -1253,8 +1304,10 @@ namespace build2 // if (bc == ec) { - if (output != nullptr) + if (cf != nullptr) { + assert (!last_cmd); // Otherwise we wouldn't be here. + // The pipeline can't be empty. // assert (ifd != nullfd && prev_cmd != nullptr); @@ -1263,15 +1316,14 @@ namespace build2 try { - *output = read (move (ifd), - true /* pipe */, - dl, - dl_cmd != nullptr ? *dl_cmd : c, - ll); + cf (env, strings () /* arguments */, + move (ifd), true /* pipe */, + dl, dl_cmd != nullptr ? *dl_cmd : c, + ll); } catch (const io_error& e) { - fail (ll) << "io error reading " << cmd_path (c) << " output: " + fail (ll) << "unable to read from " << cmd_path (c) << " output: " << e; } } @@ -1329,9 +1381,10 @@ namespace build2 command_pipe::const_iterator nc (bc + 1); bool last (nc == ec); - // Make sure that stdout is not redirected if meant to be read. + // Make sure that stdout is not redirected if meant to be read (last_cmd + // is false) or cannot not be produced (last_cmd is true). // - if (last && output != nullptr && c.out) + if (last && c.out && cf != nullptr) fail (ll) << "stdout cannot be redirected"; // True if the process path is not pre-searched and the program path @@ -1345,7 +1398,7 @@ namespace build2 const redirect& in ((c.in ? *c.in : env.in).effective ()); - const redirect* out (!last || output != nullptr + const redirect* out (!last || (cf != nullptr && !last_cmd) ? nullptr // stdout is piped. : &(c.out ? *c.out : env.out).effective ()); @@ -1413,7 +1466,7 @@ namespace build2 if (c.out) fail (ll) << program << " builtin stdout cannot be redirected"; - if (output != nullptr) + if (cf != nullptr && !last_cmd) fail (ll) << program << " builtin stdout cannot be read"; if (c.err) @@ -1620,7 +1673,7 @@ namespace build2 if (c.out) fail (ll) << "set builtin stdout cannot be redirected"; - if (output != nullptr) + if (cf != nullptr && !last_cmd) fail (ll) << "set builtin stdout cannot be read"; if (c.err) @@ -1640,6 +1693,39 @@ namespace build2 return true; } + // If this is the last command in the pipe and the command function is + // specified for it, then call it. + // + if (last && cf != nullptr && last_cmd) + { + // Must be enforced by the caller. + // + assert (!c.out && !c.err && !c.exit); + + try + { + cf (env, c.arguments, + move (ifd), !first, + dl, dl_cmd != nullptr ? *dl_cmd : c, + ll); + } + catch (const io_error& e) + { + diag_record dr (fail (ll)); + + dr << cmd_path (c) << ": unable to read from "; + + if (prev_cmd != nullptr) + dr << cmd_path (prev_cmd->cmd) << " output"; + else + dr << "stdin"; + + dr << ": " << e; + } + + return true; + } + // Open a file for command output redirect if requested explicitly // (file overwrite/append redirects) or for the purpose of the output // validation (none, here_*, file comparison redirects), register the @@ -2220,7 +2306,7 @@ namespace build2 nc, ec, move (ofd.in), ii, li, ci + 1, ll, diag, - output, + cf, last_cmd, dl, dl_cmd, &pc); @@ -2347,7 +2433,7 @@ namespace build2 nc, ec, move (ofd.in), ii, li, ci + 1, ll, diag, - output, + cf, last_cmd, dl, dl_cmd, &pc); @@ -2487,7 +2573,7 @@ namespace build2 const iteration_index* ii, size_t li, const location& ll, bool diag, - string* output) + const function& cf, bool last_cmd) { // Commands are numbered sequentially throughout the expression // starting with 1. Number 0 means the command is a single one. @@ -2532,7 +2618,7 @@ namespace build2 p.begin (), p.end (), auto_fd (), ii, li, ci, ll, print, - output); + cf, last_cmd); } ci += p.size (); @@ -2546,13 +2632,18 @@ namespace build2 const command_expr& expr, const iteration_index* ii, size_t li, const location& ll, - string* output) + const function& cf, + bool last_cmd) { // Note that we don't print the expression at any verbosity level // assuming that the caller does this, potentially providing some // additional information (command type, etc). // - if (!run_expr (env, expr, ii, li, ll, true /* diag */, output)) + if (!run_expr (env, + expr, + ii, li, ll, + true /* diag */, + cf, last_cmd)) throw failed (); // Assume diagnostics is already printed. } @@ -2561,11 +2652,15 @@ namespace build2 const command_expr& expr, const iteration_index* ii, size_t li, const location& ll, - string* output) + const function& cf, bool last_cmd) { // Note that we don't print the expression here (see above). // - return run_expr (env, expr, ii, li, ll, false /* diag */, output); + return run_expr (env, + expr, + ii, li, ll, + false /* diag */, + cf, last_cmd); } void diff --git a/libbuild2/script/run.hxx b/libbuild2/script/run.hxx index 01b010c..5d46d21 100644 --- a/libbuild2/script/run.hxx +++ b/libbuild2/script/run.hxx @@ -38,22 +38,24 @@ namespace build2 // Location is the start position of this command line in the script. It // can be used in diagnostics. // - // Optionally, save the command output into the referenced variable. In - // this case assume that the expression contains a single pipline. + // Optionally, execute the specified function at the end of the pipe, + // either after the last command or instead of it. // void run (environment&, const command_expr&, const iteration_index*, size_t index, const location&, - string* output = nullptr); + const function& = nullptr, + bool last_cmd = true); bool run_cond (environment&, const command_expr&, const iteration_index*, size_t index, const location&, - string* output = nullptr); + const function& = nullptr, + bool last_cmd = true); // Perform the registered special file cleanups in the direct order and // then the regular cleanups in the reverse order. @@ -80,6 +82,62 @@ namespace build2 // string diag_path (const dir_name_view&); + + // Read out the stream content into a string, optionally splitting the + // input data at whitespaces or newlines in which case return one + // sub-string at a time (see the set builtin options for the splitting + // semantics). Throw io_error on the underlying OS error. + // + // If the execution deadline is specified, then turn the stream into the + // non-blocking mode. If the specified deadline is reached while reading + // the stream, then bail out for the successful deadline and fail + // otherwise. Note that in the former case the result will be incomplete, + // but we leave it to the caller to handle that. + // + // Note that on Windows we can only turn pipe file descriptors into the + // non-blocking mode. Thus, we have no choice but to read from descriptors + // of other types synchronously there. That implies that we can + // potentially block indefinitely reading a file and missing the deadline + // on Windows. Note though, that the user can normally rewrite the + // command, for example, `set foo <<&, + const command& deadline_cmd, + const location&); + + // Return nullopt if eos is reached. + // + optional + next (); + + private: + ifdstream is_; + bool whitespace_; + bool newline_; + bool exact_; + optional deadline_; + const command& deadline_cmd_; + const location& location_; + + bool empty_ = true; // Set to false after the first character is read. + }; + + // Read the stream content using the stream reader in the no-split exact + // mode. + // + string + stream_read (auto_fd&&, + bool pipe, + const optional&, + const command& deadline_cmd, + const location&); } } diff --git a/libbuild2/script/script.cxx b/libbuild2/script/script.cxx index 33c4c30..b8dfc68 100644 --- a/libbuild2/script/script.cxx +++ b/libbuild2/script/script.cxx @@ -20,16 +20,17 @@ namespace build2 switch (lt) { - case line_type::var: s = "variable"; break; - case line_type::cmd: s = "command"; break; - case line_type::cmd_if: s = "'if'"; break; - case line_type::cmd_ifn: s = "'if!'"; break; - case line_type::cmd_elif: s = "'elif'"; break; - case line_type::cmd_elifn: s = "'elif!'"; break; - case line_type::cmd_else: s = "'else'"; break; - case line_type::cmd_while: s = "'while'"; break; - case line_type::cmd_for: s = "'for'"; break; - case line_type::cmd_end: s = "'end'"; break; + case line_type::var: s = "variable"; break; + case line_type::cmd: s = "command"; break; + case line_type::cmd_if: s = "'if'"; break; + case line_type::cmd_ifn: s = "'if!'"; break; + case line_type::cmd_elif: s = "'elif'"; break; + case line_type::cmd_elifn: s = "'elif!'"; break; + case line_type::cmd_else: s = "'else'"; break; + case line_type::cmd_while: s = "'while'"; break; + case line_type::cmd_for_args: s = "'for'"; break; + case line_type::cmd_for_stream: s = "'for'"; break; + case line_type::cmd_end: s = "'end'"; break; } return o << s; @@ -227,7 +228,8 @@ namespace build2 case line_type::cmd_elifn: case line_type::cmd_else: case line_type::cmd_while: - case line_type::cmd_for: fc_ind += " "; break; + case line_type::cmd_for_args: + case line_type::cmd_for_stream: fc_ind += " "; break; default: break; } diff --git a/libbuild2/script/script.hxx b/libbuild2/script/script.hxx index 5eb4ee9..aa96b7f 100644 --- a/libbuild2/script/script.hxx +++ b/libbuild2/script/script.hxx @@ -28,7 +28,8 @@ namespace build2 cmd_elifn, cmd_else, cmd_while, - cmd_for, + cmd_for_args, // `for x: ...` + cmd_for_stream, // `... | for x` and `for x <...` cmd_end }; @@ -42,7 +43,7 @@ namespace build2 union { - const variable* var; // Pre-entered for line_type::var. + const variable* var; // Pre-entered for line_type::{var,cmd_for_*}. }; }; @@ -547,7 +548,7 @@ namespace build2 // Set variable value with optional (non-empty) attributes. // virtual void - set_variable (string&& name, + set_variable (string name, names&&, const string& attrs, const location&) = 0; @@ -580,6 +581,17 @@ namespace build2 ~environment () = default; }; + // Custom command function that can be executed at the end of the pipe. + // Should throw io_error on the underlying OS error. + // + using command_function = void (environment&, + const strings& args, + auto_fd in, + bool pipe, + const optional&, + const command& deadline_cmd, + const location&); + // Helpers. // // Issue diagnostics with the specified prefix and fail if the string diff --git a/libbuild2/test/script/lexer+for-loop.test.testscript b/libbuild2/test/script/lexer+for-loop.test.testscript new file mode 100644 index 0000000..fcd12f7 --- /dev/null +++ b/libbuild2/test/script/lexer+for-loop.test.testscript @@ -0,0 +1,231 @@ +# file : libbuild2/test/script/lexer+for-loop.test.testscript +# license : MIT; see accompanying LICENSE file + +test.arguments = for-loop + +: semi +{ + : immediate + : + $* <"cmd;" >>EOO + 'cmd' + ; + + EOO + + : separated + : + $* <"cmd ;" >>EOO + 'cmd' + ; + + EOO + + : only + : + $* <";" >>EOO + ; + + EOO +} + +: colon +: +{ + : immediate + : + $* <"cmd: dsc" >>EOO + 'cmd' + : + 'dsc' + + EOO + + : separated + : + $* <"cmd :dsc" >>EOO + 'cmd' + : + 'dsc' + + EOO + + : only + : + $* <":" >>EOO + : + + EOO +} + +: redirect +: +{ + : pass + : + $* <"cmd <| 1>|" >>EOO + 'cmd' + <| + '1' + >| + + EOO + + : null + : + $* <"cmd <- 1>-" >>EOO + 'cmd' + <- + '1' + >- + + EOO + + : trace + : + $* <"cmd 1>!" >>EOO + 'cmd' + '1' + >! + + EOO + + : merge + : + $* <"cmd 1>&2" >>EOO + 'cmd' + '1' + >& + '2' + + EOO + + : str + : + $* <"cmd b" >>EOO + 'cmd' + < + 'a' + '1' + > + 'b' + + EOO + + : str-nn + : + $* <"cmd <:a 1>:b" >>EOO + 'cmd' + <: + 'a' + '1' + >: + 'b' + + EOO + + : doc + : + $* <"cmd <>EOO" >>EOO + 'cmd' + << + 'EOI' + '1' + >> + 'EOO' + + EOO + + : doc-nn + : + $* <"cmd <<:EOI 1>>:EOO" >>EOO + 'cmd' + <<: + 'EOI' + '1' + >>: + 'EOO' + + EOO + + : file-cmp + : + $* <"cmd <<>>out 2>>>err" >>EOO + 'cmd' + <<< + 'in' + >>> + 'out' + '2' + >>> + 'err' + + EOO + + : file-write + : + $* <"cmd >=out 2>+err" >>EOO + 'cmd' + >= + 'out' + '2' + >+ + 'err' + + EOO +} + +: cleanup +: +{ + : always + : + $* <"cmd &file" >>EOO + 'cmd' + & + 'file' + + EOO + + : maybe + : + $* <"cmd &?file" >>EOO + 'cmd' + &? + 'file' + + EOO + + : never + : + $* <"cmd &!file" >>EOO + 'cmd' + &! + 'file' + + EOO +} + +: for +: +{ + : form-1 + : + $* <"for x: a" >>EOO + 'for' + 'x' + : + 'a' + + EOO + + : form-3 + : + $* <"for <<>EOO + 'for' + <<< + 'a' + 'x' + + EOO +} diff --git a/libbuild2/test/script/lexer.cxx b/libbuild2/test/script/lexer.cxx index f9c8ac6..9475ad4 100644 --- a/libbuild2/test/script/lexer.cxx +++ b/libbuild2/test/script/lexer.cxx @@ -41,6 +41,12 @@ namespace build2 switch (m) { + case lexer_mode::for_loop: + { + // Leading tokens of the for-loop. Like command_line but also + // recognizes lsbrace like value. + } + // Fall through. case lexer_mode::command_line: { s1 = ":;=!|&<> $(#\t\n"; @@ -122,6 +128,7 @@ namespace build2 case lexer_mode::first_token: case lexer_mode::second_token: case lexer_mode::variable_line: + case lexer_mode::for_loop: r = next_line (); break; case lexer_mode::description_line: @@ -157,7 +164,8 @@ namespace build2 // if (st.lsbrace) { - assert (m == lexer_mode::variable_line); + assert (m == lexer_mode::variable_line || + m == lexer_mode::for_loop); state_.top ().lsbrace = false; // Note: st is a copy. @@ -197,10 +205,11 @@ namespace build2 // Line separators. // - if (m == lexer_mode::command_line || - m == lexer_mode::first_token || - m == lexer_mode::second_token || - m == lexer_mode::variable_line) + if (m == lexer_mode::command_line || + m == lexer_mode::first_token || + m == lexer_mode::second_token || + m == lexer_mode::variable_line || + m == lexer_mode::for_loop) { switch (c) { @@ -210,7 +219,8 @@ namespace build2 if (m == lexer_mode::command_line || m == lexer_mode::first_token || - m == lexer_mode::second_token) + m == lexer_mode::second_token || + m == lexer_mode::for_loop) { switch (c) { @@ -222,7 +232,8 @@ namespace build2 // if (m == lexer_mode::command_line || m == lexer_mode::first_token || - m == lexer_mode::second_token) + m == lexer_mode::second_token || + m == lexer_mode::for_loop) { switch (c) { @@ -244,7 +255,8 @@ namespace build2 // if (m == lexer_mode::command_line || m == lexer_mode::first_token || - m == lexer_mode::second_token) + m == lexer_mode::second_token || + m == lexer_mode::for_loop) { if (optional t = next_cmd_op (c, sep)) return move (*t); diff --git a/libbuild2/test/script/lexer.hxx b/libbuild2/test/script/lexer.hxx index 452e794..def269b 100644 --- a/libbuild2/test/script/lexer.hxx +++ b/libbuild2/test/script/lexer.hxx @@ -24,10 +24,11 @@ namespace build2 enum { command_line = base_type::value_next, - first_token, // Expires at the end of the token. - second_token, // Expires at the end of the token. - variable_line, // Expires at the end of the line. - description_line // Expires at the end of the line. + first_token, // Expires at the end of the token. + second_token, // Expires at the end of the token. + variable_line, // Expires at the end of the line. + description_line, // Expires at the end of the line. + for_loop // Used for sensing the for-loop leading tokens. }; lexer_mode () = default; diff --git a/libbuild2/test/script/lexer.test.cxx b/libbuild2/test/script/lexer.test.cxx index 76f102d..ef3ce4d 100644 --- a/libbuild2/test/script/lexer.test.cxx +++ b/libbuild2/test/script/lexer.test.cxx @@ -36,6 +36,7 @@ namespace build2 else if (s == "variable-line") m = lexer_mode::variable_line; else if (s == "description-line") m = lexer_mode::description_line; else if (s == "variable") m = lexer_mode::variable; + else if (s == "for-loop") m = lexer_mode::for_loop; else assert (false); } diff --git a/libbuild2/test/script/parser+for.test.testscript b/libbuild2/test/script/parser+for.test.testscript index 70c1c89..426a39b 100644 --- a/libbuild2/test/script/parser+for.test.testscript +++ b/libbuild2/test/script/parser+for.test.testscript @@ -16,7 +16,7 @@ cmd end EOI - testscript:1:4: error: expected variable name instead of + testscript:1:1: error: for: missing variable name EOE : untyped @@ -311,3 +311,703 @@ testscript:4:1: error: both leading and trailing descriptions EOE } + +: form-2 +: +: ... | for x +: +{ + : for + : + { + : status + : + $* <>EOE != 0 + echo 'a b' | for x != 0 + cmd + end + EOI + testscript:1:20: error: for-loop exit code cannot be checked + EOE + + : not-last + : + $* <>EOE != 0 + echo 'a b' | for x | echo x + cmd + end + EOI + testscript:1:20: error: for-loop must be last command in a pipe + EOE + + : not-last-relex + : + $* <>EOE != 0 + echo 'a b' | for x|echo x + cmd + end + EOI + testscript:1:19: error: for-loop must be last command in a pipe + EOE + + : expression-after + : + $* <>EOE != 0 + echo 'a b' | for x && echo x + cmd + end + EOI + testscript:1:20: error: command expression involving for-loop + EOE + + : expression-after-relex + : + $* <>EOE != 0 + echo 'a b' | for x&&echo x + cmd + end + EOI + testscript:1:19: error: command expression involving for-loop + EOE + + : expression-before + : + $* <>EOE != 0 + echo 'a b' && echo x | for x + cmd + end + EOI + testscript:1:24: error: command expression involving for-loop + EOE + + : expression-before-relex + : + $* <>EOE != 0 + echo 'a b' && echo x|for x + cmd + end + EOI + testscript:1:22: error: command expression involving for-loop + EOE + + : cleanup + : + $* <>EOE != 0 + echo 'a b' | for x &f + cmd + end + EOI + testscript:1:20: error: cleanup in for-loop + EOE + + : cleanup-relex + : + $* <>EOE != 0 + echo 'a b' | for x&f + cmd + end + EOI + testscript:1:19: error: cleanup in for-loop + EOE + + : stdout-redirect + : + $* <>EOE != 0 + echo 'a b' | for x >a + cmd + end + EOI + testscript:1:20: error: output redirect in for-loop + EOE + + : stdout-redirect-relex + : + $* <>EOE != 0 + echo 'a b' | for x>a + cmd + end + EOI + testscript:1:19: error: output redirect in for-loop + EOE + + : stdin-redirect + : + $* <>EOE != 0 + echo 'a b' | for x >EOE != 0 + echo 'a b' | for + cmd + end + EOI + testscript:1:1: error: for: missing variable name + EOE + + : untyped + : + $* <>EOO + echo 'a b' | for -w x + cmd $x + end + EOI + echo 'a b' | for -w x + EOO + + : expansion + : + $* <>EOO + vs = a b + echo $vs | for x + cmd $x + end + EOI + echo a b | for x + EOO + + : typed-var + : + $* <>EOO + echo 'a b' | for -w [dir_path] x + cmd $x + end + EOI + echo 'a b' | for -w [dir_path] x + EOO + } + + : after-semi + : + $* -s <>EOO + cmd1; + echo 'a b' | for x + cmd2 $x + end + EOI + { + { + cmd1 + echo 'a b' | for x + } + } + EOO + + : setup + : + $* -s <>EOO + +echo 'a b' | for x + cmd $x + end + EOI + { + +echo 'a b' | for x + } + EOO + + : tdown + : + $* -s <>EOO + -echo 'a b' | for x + cmd $x + end + EOI + { + -echo 'a b' | for x + } + EOO + + : end + : + { + : without-end + : + $* <>EOE != 0 + echo 'a b' | for x + cmd + EOI + testscript:3:1: error: expected closing 'end' + EOE + } + + : elif + : + { + : without-if + : + $* <>EOE != 0 + echo 'a b' | for x + elif true + cmd + end + end + EOI + testscript:2:3: error: 'elif' without preceding 'if' + EOE + } + + : nested + : + { + $* -l -r <>EOO + echo 'a b' | for x # 1 + cmd1 $x # 2 + if ($x == "a") # 3 + cmd2 # 4 + echo x y | for y # 5 + cmd3 # 6 + end + else + cmd4 # 7 + end + cmd5 # 8 + end; + cmd6 # 9 + EOI + echo 'a b' | for x # 1 + cmd6 # 9 + EOO + } + + : contained + : + { + : semi + : + $* <>EOE != 0 + echo 'a b' | for x + cmd; + cmd + end + EOI + testscript:2:3: error: ';' inside 'for' + EOE + + : colon-leading + : + $* <>EOE != 0 + echo 'a b' | for x + : foo + cmd + end + EOI + testscript:2:3: error: description inside 'for' + EOE + + : colon-trailing + : + $* <>EOE != 0 + echo 'a b' | for x + cmd : foo + end + EOI + testscript:2:3: error: description inside 'for' + EOE + + : eos + : + $* <>EOE != 0 + echo 'a b' | for x + EOI + testscript:2:1: error: expected closing 'end' + EOE + + : scope + : + $* <>EOE != 0 + echo 'a b' | for x + cmd + { + } + end + EOI + testscript:3:3: error: expected closing 'end' + EOE + + : setup + : + $* <>EOE != 0 + echo 'a b' | for x + +cmd + end + EOI + testscript:2:3: error: setup command inside 'for' + EOE + + : tdown + : + $* <>EOE != 0 + echo 'a b' | for x + -cmd + end + EOI + testscript:2:3: error: teardown command inside 'for' + EOE + } + + : leading-and-trailing-description + : + $* <>EOE != 0 + : foo + echo 'a b' | for x + cmd + end : bar + EOI + testscript:4:1: error: both leading and trailing descriptions + EOE +} + +: form-3 +: +: for x <... +: +{ + : for + : + { + : status + : + $* <>EOE != 0 + for x >EOE != 0 + for x >EOE != 0 + for >EOE != 0 + for x >EOE != 0 + for >EOE != 0 + echo 'a b' && for x >EOE != 0 + for x >EOE != 0 + for &f x >EOE != 0 + for >EOE != 0 + for x >a + cmd + end + EOI + testscript:1:7: error: output redirect in for-loop + EOE + + : stdout-redirect-before-var + : + $* <>EOE != 0 + for >a x + cmd + end + EOI + testscript:1:5: error: output redirect in for-loop + EOE + + : stdout-redirect-relex + : + $* <>EOE != 0 + for x>a + cmd + end + EOI + testscript:1:6: error: output redirect in for-loop + EOE + + : no-var + : + $* <>EOE != 0 + for >EOO + for -w x <'a b' + cmd $x + end + EOI + for -w x <'a b' + EOO + + : expansion + : + $* <>EOO + vs = a b + for x <$vs + cmd $x + end + EOI + for x b >EOO + for -w [dir_path] x <'a b' + cmd $x + end + EOI + for -w [dir_path] x <'a b' + EOO + } + + : after-semi + : + $* -s <>EOO + cmd1; + for x <'a b' + cmd2 $x + end + EOI + { + { + cmd1 + for x <'a b' + } + } + EOO + + : setup + : + $* -s <>EOO + +for x <'a b' + cmd $x + end + EOI + { + +for x <'a b' + } + EOO + + : tdown + : + $* -s <>EOO + -for x <'a b' + cmd $x + end + EOI + { + -for x <'a b' + } + EOO + + : end + : + { + : without-end + : + $* <>EOE != 0 + for x <'a b' + cmd + EOI + testscript:3:1: error: expected closing 'end' + EOE + } + + : elif + : + { + : without-if + : + $* <>EOE != 0 + for x <'a b' + elif true + cmd + end + end + EOI + testscript:2:3: error: 'elif' without preceding 'if' + EOE + } + + : nested + : + { + $* -l -r <>EOO + for -w x <'a b' # 1 + cmd1 $x # 2 + if ($x == "a") # 3 + cmd2 # 4 + for -w y <'x y' # 5 + cmd3 # 6 + end + else + cmd4 # 7 + end + cmd5 # 8 + end; + cmd6 # 9 + EOI + for -w x <'a b' # 1 + cmd6 # 9 + EOO + } + + : contained + : + { + : semi + : + $* <>EOE != 0 + for x <'a b' + cmd; + cmd + end + EOI + testscript:2:3: error: ';' inside 'for' + EOE + + : colon-leading + : + $* <>EOE != 0 + for x <'a b' + : foo + cmd + end + EOI + testscript:2:3: error: description inside 'for' + EOE + + : colon-trailing + : + $* <>EOE != 0 + for x <'a b' + cmd : foo + end + EOI + testscript:2:3: error: description inside 'for' + EOE + + : eos + : + $* <>EOE != 0 + for x <'a b' + EOI + testscript:2:1: error: expected closing 'end' + EOE + + : scope + : + $* <>EOE != 0 + for x <'a b' + cmd + { + } + end + EOI + testscript:3:3: error: expected closing 'end' + EOE + + : setup + : + $* <>EOE != 0 + for x <'a b' + +cmd + end + EOI + testscript:2:3: error: setup command inside 'for' + EOE + + : tdown + : + $* <>EOE != 0 + for x <'a b' + -cmd + end + EOI + testscript:2:3: error: teardown command inside 'for' + EOE + } + + : leading-and-trailing-description + : + $* <>EOE != 0 + : foo + for x <'a b' + cmd + end : bar + EOI + testscript:4:1: error: both leading and trailing descriptions + EOE +} diff --git a/libbuild2/test/script/parser.cxx b/libbuild2/test/script/parser.cxx index f302aee..f85b185 100644 --- a/libbuild2/test/script/parser.cxx +++ b/libbuild2/test/script/parser.cxx @@ -311,10 +311,11 @@ namespace build2 // enter: next token is peeked at (type in tt) // leave: newline - assert (!fct || - *fct == line_type::cmd_if || - *fct == line_type::cmd_while || - *fct == line_type::cmd_for); + assert (!fct || + *fct == line_type::cmd_if || + *fct == line_type::cmd_while || + *fct == line_type::cmd_for_stream || + *fct == line_type::cmd_for_args); // Note: token is only peeked at. // @@ -324,6 +325,52 @@ namespace build2 // line_type lt; type st (type::eos); // Later, can only be set to plus or minus. + bool semi (false); + + // Parse the command line tail, starting from the newline or the + // potential colon/semicolon token. + // + // Note that colon and semicolon are only valid in test command lines + // and after 'end' in flow control constructs. Note that we always + // recognize them lexically, even when they are not valid tokens per + // the grammar. + // + auto parse_command_tail = [&t, &tt, &st, <, &d, &semi, &ll, this] () + { + if (tt != type::newline) + { + if (lt != line_type::cmd && lt != line_type::cmd_end) + fail (t) << "expected newline instead of " << t; + + switch (st) + { + case type::plus: fail (t) << t << " after setup command" << endf; + case type::minus: fail (t) << t << " after teardown command" << endf; + } + } + + switch (tt) + { + case type::colon: + { + if (d) + fail (ll) << "both leading and trailing descriptions"; + + d = parse_trailing_description (t, tt); + break; + } + case type::semi: + { + semi = true; + replay_pop (); // See above for the reasoning. + next (t, tt); // Get newline. + break; + } + } + + if (tt != type::newline) + fail (t) << "expected newline instead of " << t; + }; switch (tt) { @@ -371,10 +418,12 @@ namespace build2 { const string& n (t.value); + // Handle the for-loop consistently with pre_parse_line_start(). + // if (n == "if") lt = line_type::cmd_if; else if (n == "if!") lt = line_type::cmd_ifn; else if (n == "while") lt = line_type::cmd_while; - else if (n == "for") lt = line_type::cmd_for; + else if (n == "for") lt = line_type::cmd_for_stream; } break; @@ -388,8 +437,6 @@ namespace build2 // Pre-parse the line keeping track of whether it ends with a semi. // - bool semi (false); - line ln; switch (lt) { @@ -436,47 +483,80 @@ namespace build2 break; } - case line_type::cmd_for: + // + // See pre_parse_line_start() for details. + // + case line_type::cmd_for_args: assert (false); break; + case line_type::cmd_for_stream: { - // First take care of the variable name. There is no reason not to - // support variable attributes. + // First we need to sense the next few tokens and detect which + // form of the for-loop that actually is (see + // libbuild2/build/script/parser.cxx for details). // - mode (lexer_mode::normal); + token pt (t); + assert (pt.type == type::word && pt.value == "for"); + mode (lexer_mode::for_loop); next_with_attributes (t, tt); - attributes_push (t, tt); - - if (tt != type::word || t.qtype != quote_type::unquoted) - fail (t) << "expected variable name instead of " << t; string& n (t.value); - if (special_variable (n)) - fail (t) << "attempt to set '" << n << "' variable directly"; + if (tt == type::lsbrace || // Attributes. + (tt == type::word && // Variable name. + t.qtype == quote_type::unquoted && + (n[0] == '_' || + alpha (n[0]) || + n == "*" || + n == "~" || + n == "@"))) + { + attributes_push (t, tt); - ln.var = &script_->var_pool.insert (move (n)); + if (tt != type::word || t.qtype != quote_type::unquoted) + fail (t) << "expected variable name instead of " << t; - next (t, tt); + if (special_variable (n)) + fail (t) << "attempt to set '" << n << "' variable directly"; - if (tt != type::colon) + if (lexer_->peek_char ().first == ':') + lt = line_type::cmd_for_args; + } + + if (lt == line_type::cmd_for_stream) // for x <... { - // @@ TMP We will need to fallback to parsing the 'for x <...' - // form instead. - // - fail (t) << "expected ':' instead of " << t - << " after variable name"; + ln.var = nullptr; + + expire_mode (); + + parse_command_expr_result r ( + parse_command_expr (t, tt, + lexer::redirect_aliases, + move (pt))); + + assert (r.for_loop); + + parse_command_tail (); + parse_here_documents (t, tt, r); } + else // for x: ... + { + ln.var = &script_->var_pool.insert (move (n)); - expire_mode (); // Expire the normal lexer mode. + next (t, tt); - // Parse the value similar to the var line type (see above), - // except for the fact that we don't expect a trailing semicolon. - // - mode (lexer_mode::variable_line); - parse_variable_line (t, tt); + assert (tt == type::colon); - if (tt != type::newline) - fail (t) << "expected newline instead of " << t << " after for"; + expire_mode (); + + // Parse the value similar to the var line type (see above), + // except for the fact that we don't expect a trailing semicolon. + // + mode (lexer_mode::variable_line); + parse_variable_line (t, tt); + + if (tt != type::newline) + fail (t) << "expected newline instead of " << t << " after for"; + } break; } @@ -501,51 +581,20 @@ namespace build2 // Fall through. case line_type::cmd: { - pair p; + parse_command_expr_result r; if (lt != line_type::cmd_else && lt != line_type::cmd_end) - p = parse_command_expr (t, tt, lexer::redirect_aliases); + r = parse_command_expr (t, tt, lexer::redirect_aliases); - // Colon and semicolon are only valid in test command lines and - // after 'end' in a flow control construct. Note that we still - // recognize them lexically, they are just not valid tokens per - // the grammar. - // - if (tt != type::newline) + if (r.for_loop) { - if (lt != line_type::cmd && lt != line_type::cmd_end) - fail (t) << "expected newline instead of " << t; - - switch (st) - { - case type::plus: fail (t) << t << " after setup command" << endf; - case type::minus: fail (t) << t << " after teardown command" << endf; - } - } - - switch (tt) - { - case type::colon: - { - if (d) - fail (ll) << "both leading and trailing descriptions"; - - d = parse_trailing_description (t, tt); - break; - } - case type::semi: - { - semi = true; - replay_pop (); // See above for the reasoning. - next (t, tt); // Get newline. - break; - } + lt = line_type::cmd_for_stream; + ln.var = nullptr; } - if (tt != type::newline) - fail (t) << "expected newline instead of " << t; + parse_command_tail (); + parse_here_documents (t, tt, r); - parse_here_documents (t, tt, p); break; } } @@ -579,7 +628,8 @@ namespace build2 break; } case line_type::cmd_while: - case line_type::cmd_for: + case line_type::cmd_for_stream: + case line_type::cmd_for_args: { semi = pre_parse_loop (t, tt, lt, d, *ls); break; @@ -608,7 +658,8 @@ namespace build2 case line_type::cmd_if: case line_type::cmd_ifn: case line_type::cmd_while: - case line_type::cmd_for: + case line_type::cmd_for_stream: + case line_type::cmd_for_args: { // See if this is a variable-only flow control construct. // @@ -951,7 +1002,7 @@ namespace build2 // size_t i (ls.size ()); - line_type fct; // Flow control type the block type relates to. + line_type fct; // Flow control construct type the block type relates to. switch (bt) { @@ -965,7 +1016,8 @@ namespace build2 break; } case line_type::cmd_while: - case line_type::cmd_for: + case line_type::cmd_for_stream: + case line_type::cmd_for_args: { fct = bt; break; @@ -1068,7 +1120,9 @@ namespace build2 // enter: (previous line) // leave: - assert (lt == line_type::cmd_while || lt == line_type::cmd_for); + assert (lt == line_type::cmd_while || + lt == line_type::cmd_for_stream || + lt == line_type::cmd_for_args); tt = peek (lexer_mode::first_token); @@ -1428,7 +1482,7 @@ namespace build2 // Note: this one is only used during execution. - pair p ( + parse_command_expr_result pr ( parse_command_expr (t, tt, lexer::redirect_aliases)); if (tt == type::colon) @@ -1436,10 +1490,10 @@ namespace build2 assert (tt == type::newline); - parse_here_documents (t, tt, p); + parse_here_documents (t, tt, pr); assert (tt == type::newline); - command_expr r (move (p.first)); + command_expr r (move (pr.expr)); // If the test program runner is specified, then adjust the // expressions to run test programs via this runner. @@ -1582,6 +1636,7 @@ namespace build2 auto exec_cmd = [&ct, this] (token& t, build2::script::token_type& tt, const iteration_index* ii, size_t li, bool single, + const function& cf, const location& ll) { // We use the 0 index to signal that this is the only command. @@ -1593,7 +1648,7 @@ namespace build2 command_expr ce ( parse_command_line (t, static_cast (tt))); - runner_->run (*scope_, ce, ct, ii, li, ll); + runner_->run (*scope_, ce, ct, ii, li, cf, ll); }; auto exec_cond = [this] (token& t, build2::script::token_type& tt, @@ -1827,7 +1882,8 @@ namespace build2 // The rest. // - // When add a special variable don't forget to update lexer::word(). + // When add a special variable don't forget to update lexer::word() and + // for-loop parsing in pre_parse_line(). // bool parser:: special_variable (const string& n) noexcept diff --git a/libbuild2/test/script/parser.test.cxx b/libbuild2/test/script/parser.test.cxx index ab0aee9..7339346 100644 --- a/libbuild2/test/script/parser.test.cxx +++ b/libbuild2/test/script/parser.test.cxx @@ -100,11 +100,32 @@ namespace build2 } virtual void - run (scope&, + run (scope& env, const command_expr& e, command_type t, const iteration_index* ii, size_t i, - const location&) override + const function& cf, + const location& ll) override { + // If the functions is specified, then just execute it with an empty + // stdin so it can perform the housekeeping (stop replaying tokens, + // increment line index, etc). + // + if (cf != nullptr) + { + assert (e.size () == 1 && !e[0].pipe.empty ()); + + const command& c (e[0].pipe.back ()); + + // Must be enforced by the caller. + // + assert (!c.out && !c.err && !c.exit); + + cf (env, c.arguments, + fdopen_null (), false /* pipe */, + nullopt /* deadline */, c, + ll); + } + const char* s (nullptr); switch (t) diff --git a/libbuild2/test/script/runner.cxx b/libbuild2/test/script/runner.cxx index 42eef04..340ada4 100644 --- a/libbuild2/test/script/runner.cxx +++ b/libbuild2/test/script/runner.cxx @@ -143,6 +143,7 @@ namespace build2 run (scope& sp, const command_expr& expr, command_type ct, const iteration_index* ii, size_t li, + const function& cf, const location& ll) { // Noop for teardown commands if keeping tests output is requested. @@ -176,7 +177,7 @@ namespace build2 dr << info << "test id: " << sp.id_path.posix_string (); }); - build2::script::run (sp, expr, ii, li, ll); + build2::script::run (sp, expr, ii, li, ll, cf); } bool default_runner:: diff --git a/libbuild2/test/script/runner.hxx b/libbuild2/test/script/runner.hxx index 0309a35..687d991 100644 --- a/libbuild2/test/script/runner.hxx +++ b/libbuild2/test/script/runner.hxx @@ -48,10 +48,14 @@ namespace build2 // Location is the start position of this command line in the // testscript. It can be used in diagnostics. // + // Optionally, execute the specified function instead of the last + // pipe command. + // virtual void run (scope&, const command_expr&, command_type, const iteration_index*, size_t index, + const function&, const location&) = 0; virtual bool @@ -88,6 +92,7 @@ namespace build2 run (scope&, const command_expr&, command_type, const iteration_index*, size_t, + const function&, const location&) override; virtual bool diff --git a/libbuild2/test/script/script.cxx b/libbuild2/test/script/script.cxx index e10afec..bbe5326 100644 --- a/libbuild2/test/script/script.cxx +++ b/libbuild2/test/script/script.cxx @@ -115,7 +115,7 @@ namespace build2 } void scope:: - set_variable (string&& nm, + set_variable (string nm, names&& val, const string& attrs, const location& ll) diff --git a/libbuild2/test/script/script.hxx b/libbuild2/test/script/script.hxx index b75f68e..319a9e2 100644 --- a/libbuild2/test/script/script.hxx +++ b/libbuild2/test/script/script.hxx @@ -32,6 +32,7 @@ namespace build2 using build2::script::environment_vars; using build2::script::deadline; using build2::script::timeout; + using build2::script::command_function; class parser; // Required by VC for 'friend class parser' declaration. @@ -105,7 +106,7 @@ namespace build2 small_vector test_programs; void - set_variable (string&& name, + set_variable (string name, names&&, const string& attrs, const location&) override; diff --git a/libbuild2/utility.hxx b/libbuild2/utility.hxx index a285b03..88ea43d 100644 --- a/libbuild2/utility.hxx +++ b/libbuild2/utility.hxx @@ -91,6 +91,7 @@ namespace build2 // // + using butl::fdopen_null; using butl::open_file_or_stdin; using butl::open_file_or_stdout; -- cgit v1.1