diff options
Diffstat (limited to 'libbuild2/script/parser.cxx')
-rw-r--r-- | libbuild2/script/parser.cxx | 465 |
1 files changed, 376 insertions, 89 deletions
diff --git a/libbuild2/script/parser.cxx b/libbuild2/script/parser.cxx index 536821b..84d2afc 100644 --- a/libbuild2/script/parser.cxx +++ b/libbuild2/script/parser.cxx @@ -4,10 +4,13 @@ #include <libbuild2/script/parser.hxx> #include <cstring> // strchr() +#include <sstream> #include <libbuild2/variable.hxx> -#include <libbuild2/script/run.hxx> // exit + +#include <libbuild2/script/run.hxx> // exit, stream_reader #include <libbuild2/script/lexer.hxx> +#include <libbuild2/script/builtin-options.hxx> using namespace std; @@ -140,18 +143,20 @@ namespace build2 return nullopt; } - pair<command_expr, parser::here_docs> parser:: + parser::parse_command_expr_result parser:: parse_command_expr (token& t, type& tt, - const redirect_aliases& ra) + const redirect_aliases& ra, + optional<token>&& program) { - // enter: first token of the command line + // enter: first (or second, if program) token of the command line // leave: <newline> or unknown token command_expr expr; // OR-ed to an implied false for the first term. // - expr.push_back ({expr_operator::log_or, command_pipe ()}); + if (!pre_parse_) + expr.push_back ({expr_operator::log_or, command_pipe ()}); command c; // Command being assembled. @@ -218,8 +223,8 @@ namespace build2 // Add the next word to either one of the pending positions or to // program arguments by default. // - auto add_word = [&c, &p, &mod, &check_regex_mod, this] ( - string&& w, const location& l) + auto add_word = [&c, &p, &mod, &check_regex_mod, this] + (string&& w, const location& l) { auto add_merge = [&l, this] (optional<redirect>& r, const string& w, @@ -697,11 +702,30 @@ namespace build2 const location ll (get_location (t)); // Line location. // Keep parsing chunks of the command line until we see one of the - // "terminators" (newline, exit status comparison, etc). + // "terminators" (newline or unknown/unexpected token). // location l (ll); names ns; // Reuse to reduce allocations. + bool for_loop (false); + + if (program) + { + assert (program->type == type::word); + + // Note that here we skip all the parse_program() business since the + // program can only be one of the specially-recognized names. + // + if (program->value == "for") + for_loop = true; + else + assert (false); // Must be specially-recognized program. + + // Save the program name and continue parsing as a command. + // + add_word (move (program->value), get_location (*program)); + } + for (bool done (false); !done; l = get_location (t)) { tt = ra.resolve (tt); @@ -717,6 +741,9 @@ namespace build2 case type::equal: case type::not_equal: { + if (for_loop) + fail (l) << "for-loop exit code cannot be checked"; + if (!pre_parse_) check_pending (l); @@ -747,30 +774,39 @@ namespace build2 } case type::pipe: + if (for_loop) + fail (l) << "for-loop must be last command in a pipe"; + // Fall through. + case type::log_or: case type::log_and: + if (for_loop) + fail (l) << "command expression involving for-loop"; + // Fall through. - case type::in_pass: - case type::out_pass: + case type::clean: + if (for_loop) + fail (l) << "cleanup in for-loop"; + // Fall through. - case type::in_null: + case type::out_pass: case type::out_null: - case type::out_trace: - case type::out_merge: - - case type::in_str: - case type::in_doc: case type::out_str: case type::out_doc: - - case type::in_file: case type::out_file_cmp: case type::out_file_ovr: case type::out_file_app: + if (for_loop) + fail (l) << "output redirect in for-loop"; + // Fall through. - case type::clean: + case type::in_pass: + case type::in_null: + case type::in_str: + case type::in_doc: + case type::in_file: { if (pre_parse_) { @@ -968,6 +1004,42 @@ namespace build2 next (t, tt); break; } + case type::lsbrace: + { + // Recompose the attributes into a single command argument. + // + assert (!pre_parse_); + + attributes_push (t, tt, true /* standalone */); + + attributes as (attributes_pop ()); + assert (!as.empty ()); + + ostringstream os; + names storage; + char c ('['); + for (const attribute& a: as) + { + os << c << a.name; + + if (!a.value.null) + { + os << '='; + + storage.clear (); + to_stream (os, + reverse (a.value, storage, true /* reduce */), + quote_mode::normal, + '@'); + } + + c = ','; + } + os << ']'; + + add_word (os.str (), l); + break; + } default: { // Bail out if this is one of the unknown tokens. @@ -1036,11 +1108,12 @@ namespace build2 hd.push_back ( here_doc { {rd}, - move (end), - (t.qtype == quote_type::unquoted || - t.qtype == quote_type::single), - move (mod), - r.intro, move (r.flags)}); + move (end), + (t.qtype == quote_type::unquoted || + t.qtype == quote_type::single), + move (mod), + r.intro, + move (r.flags)}); p = pending::none; mod.clear (); @@ -1053,16 +1126,34 @@ namespace build2 bool prog (p == pending::program_first || p == pending::program_next); - // Check if this is the env pseudo-builtin. + // Check if this is the env pseudo-builtin or the for-loop. // bool env (false); - if (prog && tt == type::word && t.value == "env") + if (prog && tt == type::word) { - parsed_env r (parse_env_builtin (t, tt)); - c.cwd = move (r.cwd); - c.variables = move (r.variables); - c.timeout = r.timeout; - env = true; + if (t.value == "env") + { + parsed_env r (parse_env_builtin (t, tt)); + c.cwd = move (r.cwd); + c.variables = move (r.variables); + c.timeout = r.timeout; + c.timeout_success = r.timeout_success; + env = true; + } + else if (t.value == "for") + { + if (expr.size () > 1) + fail (l) << "command expression involving for-loop"; + + for_loop = true; + + // Save 'for' as a program name and continue parsing as a + // command. + // + add_word (move (t.value), l); + next (t, tt); + continue; + } } // Parse the next chunk as names to get expansion, etc. Note that @@ -1243,9 +1334,16 @@ namespace build2 switch (tt) { case type::pipe: + if (for_loop) + fail (l) << "for-loop must be last command in a pipe"; + // Fall through. + case type::log_or: case type::log_and: { + if (for_loop) + fail (l) << "command expression involving for-loop"; + // Check that the previous command makes sense. // check_command (l, tt != type::pipe); @@ -1265,30 +1363,11 @@ namespace build2 break; } - case type::in_pass: - case type::out_pass: - - case type::in_null: - case type::out_null: - - case type::out_trace: - - case type::out_merge: - - case type::in_str: - case type::out_str: - - case type::in_file: - case type::out_file_cmp: - case type::out_file_ovr: - case type::out_file_app: - { - parse_redirect (move (t), tt, l); - break; - } - case type::clean: { + if (for_loop) + fail (l) << "cleanup in for-loop"; + parse_clean (t); break; } @@ -1299,6 +1378,27 @@ namespace build2 fail (l) << "here-document redirect in expansion"; break; } + + case type::out_pass: + case type::out_null: + case type::out_trace: + case type::out_merge: + case type::out_str: + case type::out_file_cmp: + case type::out_file_ovr: + case type::out_file_app: + if (for_loop) + fail (l) << "output redirect in for-loop"; + // Fall through. + + case type::in_pass: + case type::in_null: + case type::in_str: + case type::in_file: + { + parse_redirect (move (t), tt, l); + break; + } } } @@ -1326,7 +1426,7 @@ namespace build2 expr.back ().pipe.push_back (move (c)); } - return make_pair (move (expr), move (hd)); + return parse_command_expr_result {move (expr), move (hd), for_loop}; } parser::parsed_env parser:: @@ -1502,6 +1602,10 @@ namespace build2 { r.timeout = chrono::seconds (*v); } + else if (o == "-s" || o == "--timeout-success") + { + r.timeout_success = true; + } else if (optional<dir_path> v = dir ("--cwd", "-c")) { r.cwd = move (*v); @@ -1516,6 +1620,9 @@ namespace build2 break; } + if (r.timeout_success && !r.timeout) + fail (l) << "env: -s|--timeout-success specified without -t|--timeout"; + // Parse arguments (variable sets). // for (; i != e; ++i) @@ -1575,7 +1682,7 @@ namespace build2 void parser:: parse_here_documents (token& t, type& tt, - pair<command_expr, here_docs>& p) + parse_command_expr_result& pr) { // enter: newline // leave: newline @@ -1583,7 +1690,7 @@ namespace build2 // Parse here-document fragments in the order they were mentioned on // the command line. // - for (here_doc& h: p.second) + for (here_doc& h: pr.docs) { // Switch to the here-line mode which is like single/double-quoted // string but recognized the newline as a separator. @@ -1603,7 +1710,7 @@ namespace build2 { auto i (h.redirects.cbegin ()); - command& c (p.first[i->expr].pipe[i->pipe]); + command& c (pr.expr[i->expr].pipe[i->pipe]); optional<redirect>& r (i->fd == 0 ? c.in : i->fd == 1 ? c.out : @@ -1635,7 +1742,7 @@ namespace build2 // for (++i; i != h.redirects.cend (); ++i) { - command& c (p.first[i->expr].pipe[i->pipe]); + command& c (pr.expr[i->expr].pipe[i->pipe]); optional<redirect>& ir (i->fd == 0 ? c.in : i->fd == 1 ? c.out : @@ -2062,7 +2169,7 @@ namespace build2 else if (n == "elif!") r = line_type::cmd_elifn; else if (n == "else") r = line_type::cmd_else; else if (n == "while") r = line_type::cmd_while; - else if (n == "for") r = line_type::cmd_for; + else if (n == "for") r = line_type::cmd_for_stream; else if (n == "end") r = line_type::cmd_end; else { @@ -2136,10 +2243,11 @@ namespace build2 { line_type lt (j->type); - if (lt == line_type::cmd_if || - lt == line_type::cmd_ifn || - lt == line_type::cmd_while || - lt == line_type::cmd_for) + if (lt == line_type::cmd_if || + lt == line_type::cmd_ifn || + lt == line_type::cmd_while || + lt == line_type::cmd_for_stream || + lt == line_type::cmd_for_args) ++n; // If we are nested then we just wait until we get back @@ -2164,10 +2272,8 @@ namespace build2 if (skip) { - // Note that we don't count else and end as commands. - // - // @@ Note that for the for-loop's second and third forms - // will probably need to increment li. + // Note that we don't count else, end, and 'for x: ...' as + // commands. // switch (lt) { @@ -2176,8 +2282,9 @@ namespace build2 case line_type::cmd_ifn: case line_type::cmd_elif: case line_type::cmd_elifn: - case line_type::cmd_while: ++li; break; - default: break; + case line_type::cmd_for_stream: + case line_type::cmd_while: ++li; break; + default: break; } } } @@ -2221,7 +2328,10 @@ namespace build2 single = true; } - exec_cmd (t, tt, ii, li++, single, ll); + exec_cmd (t, tt, + ii, li++, single, + nullptr /* command_function */, + ll); replay_stop (); break; @@ -2339,12 +2449,179 @@ namespace build2 break; } - case line_type::cmd_for: + case line_type::cmd_for_stream: { - // Parse the variable name with the potential attributes. + // The for-loop construct end. Set on the first iteration. // - next_with_attributes (t, tt); - attributes_push (t, tt); + lines::const_iterator fe (e); + + // Let's "wrap up" all the required data into the single object + // to rely on the "small function object" optimization. + // + struct loop_data + { + lines::const_iterator i; + lines::const_iterator e; + const function<exec_set_function>& exec_set; + const function<exec_cmd_function>& exec_cmd; + const function<exec_cond_function>& exec_cond; + const function<exec_for_function>& exec_for; + const iteration_index* ii; + size_t& li; + variable_pool* var_pool; + decltype (fcend)& fce; + lines::const_iterator& fe; + } ld {i, e, + exec_set, exec_cmd, exec_cond, exec_for, + ii, li, + var_pool, + fcend, + fe}; + + function<command_function> cf ( + [&ld, this] + (environment& env, + const strings& args, + auto_fd in, + pipe_command* pipe, + const optional<deadline>& dl, + const location& ll) + { + namespace cli = build2::build::cli; + + try + { + // Parse arguments. + // + cli::vector_scanner scan (args); + for_options ops (scan); + + // Note: diagnostics consistent with the set builtin. + // + if (ops.whitespace () && ops.newline ()) + fail (ll) << "for: both -n|--newline and " + << "-w|--whitespace specified"; + + if (!scan.more ()) + fail (ll) << "for: missing variable name"; + + string vname (scan.next ()); + if (vname.empty ()) + fail (ll) << "for: empty variable name"; + + // Detect patterns analogous to parse_variable_name() (so + // we diagnose `for x[string]`). + // + if (vname.find_first_of ("[*?") != string::npos) + fail (ll) << "for: expected variable name instead of " + << vname; + + // Let's also diagnose the `... | for x:...` misuse which + // can probably be quite common. + // + if (vname.find (':') != string::npos) + fail (ll) << "for: ':' after variable name"; + + string attrs; + if (scan.more ()) + { + attrs = scan.next (); + + if (attrs.empty ()) + fail (ll) << "for: empty variable attributes"; + + if (scan.more ()) + fail (ll) << "for: unexpected argument '" + << scan.next () << "'"; + } + + // Since the command pipe is parsed, we can stop + // replaying. Note that we should do this before calling + // exec_lines() for the loop body. Also note that we + // should increment the line index before that. + // + replay_stop (); + + size_t fli (++ld.li); + iteration_index fi {1, ld.ii}; + + // Let's "wrap up" all the required data into the single + // object to rely on the "small function object" + // optimization. + // + struct + { + loop_data& ld; + environment& env; + const string& vname; + const string& attrs; + const location& ll; + size_t fli; + iteration_index& fi; + + } d {ld, env, vname, attrs, ll, fli, fi}; + + function<void (string&&)> f ( + [&d, this] (string&& s) + { + loop_data& ld (d.ld); + + ld.li = d.fli; + + // Don't move from the variable name since it is used + // on each iteration. + // + d.env.set_variable (d.vname, + names {name (move (s))}, + d.attrs, + d.ll); + + // Find the construct end, if it is not found yet. + // + if (ld.fe == ld.e) + ld.fe = ld.fce (ld.i, true, false); + + if (!exec_lines (ld.i + 1, ld.fe, + ld.exec_set, + ld.exec_cmd, + ld.exec_cond, + ld.exec_for, + &d.fi, ld.li, + ld.var_pool)) + { + throw exit (true); + } + + d.fi.index++; + }); + + read (move (in), + !ops.newline (), ops.newline (), ops.exact (), + f, + pipe, + dl, + ll, + "for"); + } + catch (const cli::exception& e) + { + fail (ll) << "for: " << e; + } + }); + + exec_cmd (t, tt, ii, li, false /* single */, cf, ll); + + // Position to construct end. + // + i = (fe != e ? fe : fcend (i, true, true)); + + break; + } + case line_type::cmd_for_args: + { + // Parse the variable name. + // + next (t, tt); assert (tt == type::word && t.qtype == quote_type::unquoted); @@ -2363,11 +2640,18 @@ namespace build2 var = &var_pool->insert (move (vn)); } - apply_variable_attributes (*var); + // Parse the potential element attributes and skip the colon. + // + next_with_attributes (t, tt); + attributes_push (t, tt); - next (t, tt); // Skip the colon. assert (tt == type::colon); + // Save element attributes so that we can inject them on each + // iteration. + // + attributes val_attrs (attributes_pop ()); + // Parse the value with the potential attributes. // // Note that we don't really need to change the mode since we @@ -2400,30 +2684,33 @@ namespace build2 if (val.type != nullptr) { etype = val.type->element_type; - untypify (val); + + // Note that here we don't want to be reducing empty simple + // values to empty lists. + // + untypify (val, false /* reduce */); } size_t fli (li); iteration_index fi {1, ii}; + names& ns (val.as<names> ()); - // @@ Handle pairs. - // - // Do we need to always lex the variable values (for-loop - // and var lines) pair-character aware? - // - // Can there be any harm if a value with pairs is - // substituted into the command line? - // - for (name& n: val.as<names> ()) + for (auto ni (ns.begin ()), ne (ns.end ()); ni != ne; ++ni) { li = fli; - value v (names {move (n)}); // Untyped. + // Set the variable value. + // + bool pair (ni->pair); + names n; + n.push_back (move (*ni)); + if (pair) n.push_back (move (*++ni)); + value v (move (n)); // Untyped. if (etype != nullptr) typify (v, *etype, var); - exec_for (*var, move (v), ll); + exec_for (*var, move (v), val_attrs, ll); // Find the construct end, if it is not found yet. // @@ -2484,7 +2771,7 @@ namespace build2 } parser::parsed_doc:: - parsed_doc (parsed_doc&& d) + parsed_doc (parsed_doc&& d) noexcept : re (d.re), end_line (d.end_line), end_column (d.end_column) { if (re) |