diff options
Diffstat (limited to 'libbuild2/script/parser.cxx')
-rw-r--r-- | libbuild2/script/parser.cxx | 767 |
1 files changed, 630 insertions, 137 deletions
diff --git a/libbuild2/script/parser.cxx b/libbuild2/script/parser.cxx index 7722002..84d2afc 100644 --- a/libbuild2/script/parser.cxx +++ b/libbuild2/script/parser.cxx @@ -3,9 +3,14 @@ #include <libbuild2/script/parser.hxx> +#include <cstring> // strchr() +#include <sstream> + #include <libbuild2/variable.hxx> -#include <libbuild2/script/run.hxx> // exit + +#include <libbuild2/script/run.hxx> // exit, stream_reader #include <libbuild2/script/lexer.hxx> +#include <libbuild2/script/builtin-options.hxx> using namespace std; @@ -15,6 +20,33 @@ namespace build2 { using type = token_type; + bool parser:: + need_cmdline_relex (const string& s) + { + for (auto i (s.begin ()), e (s.end ()); i != e; ++i) + { + char c (*i); + + if (c == '\\') + { + if (++i == e) + return false; + + c = *i; + + if (c == '\\' || c == '\'' || c == '\"') + return true; + + // Fall through. + } + + if (strchr ("|<>&\"'", c) != nullptr) + return true; + } + + return false; + } + value parser:: parse_variable_line (token& t, type& tt) { @@ -111,18 +143,20 @@ namespace build2 return nullopt; } - pair<command_expr, parser::here_docs> parser:: + parser::parse_command_expr_result parser:: parse_command_expr (token& t, type& tt, - const redirect_aliases& ra) + const redirect_aliases& ra, + optional<token>&& program) { - // enter: first token of the command line + // enter: first (or second, if program) token of the command line // leave: <newline> or unknown token command_expr expr; // OR-ed to an implied false for the first term. // - expr.push_back ({expr_operator::log_or, command_pipe ()}); + if (!pre_parse_) + expr.push_back ({expr_operator::log_or, command_pipe ()}); command c; // Command being assembled. @@ -189,8 +223,8 @@ namespace build2 // Add the next word to either one of the pending positions or to // program arguments by default. // - auto add_word = [&c, &p, &mod, &check_regex_mod, this] ( - string&& w, const location& l) + auto add_word = [&c, &p, &mod, &check_regex_mod, this] + (string&& w, const location& l) { auto add_merge = [&l, this] (optional<redirect>& r, const string& w, @@ -668,11 +702,30 @@ namespace build2 const location ll (get_location (t)); // Line location. // Keep parsing chunks of the command line until we see one of the - // "terminators" (newline, exit status comparison, etc). + // "terminators" (newline or unknown/unexpected token). // location l (ll); names ns; // Reuse to reduce allocations. + bool for_loop (false); + + if (program) + { + assert (program->type == type::word); + + // Note that here we skip all the parse_program() business since the + // program can only be one of the specially-recognized names. + // + if (program->value == "for") + for_loop = true; + else + assert (false); // Must be specially-recognized program. + + // Save the program name and continue parsing as a command. + // + add_word (move (program->value), get_location (*program)); + } + for (bool done (false); !done; l = get_location (t)) { tt = ra.resolve (tt); @@ -688,6 +741,9 @@ namespace build2 case type::equal: case type::not_equal: { + if (for_loop) + fail (l) << "for-loop exit code cannot be checked"; + if (!pre_parse_) check_pending (l); @@ -718,30 +774,39 @@ namespace build2 } case type::pipe: + if (for_loop) + fail (l) << "for-loop must be last command in a pipe"; + // Fall through. + case type::log_or: case type::log_and: + if (for_loop) + fail (l) << "command expression involving for-loop"; + // Fall through. - case type::in_pass: - case type::out_pass: + case type::clean: + if (for_loop) + fail (l) << "cleanup in for-loop"; + // Fall through. - case type::in_null: + case type::out_pass: case type::out_null: - case type::out_trace: - case type::out_merge: - - case type::in_str: - case type::in_doc: case type::out_str: case type::out_doc: - - case type::in_file: case type::out_file_cmp: case type::out_file_ovr: case type::out_file_app: + if (for_loop) + fail (l) << "output redirect in for-loop"; + // Fall through. - case type::clean: + case type::in_pass: + case type::in_null: + case type::in_str: + case type::in_doc: + case type::in_file: { if (pre_parse_) { @@ -939,6 +1004,42 @@ namespace build2 next (t, tt); break; } + case type::lsbrace: + { + // Recompose the attributes into a single command argument. + // + assert (!pre_parse_); + + attributes_push (t, tt, true /* standalone */); + + attributes as (attributes_pop ()); + assert (!as.empty ()); + + ostringstream os; + names storage; + char c ('['); + for (const attribute& a: as) + { + os << c << a.name; + + if (!a.value.null) + { + os << '='; + + storage.clear (); + to_stream (os, + reverse (a.value, storage, true /* reduce */), + quote_mode::normal, + '@'); + } + + c = ','; + } + os << ']'; + + add_word (os.str (), l); + break; + } default: { // Bail out if this is one of the unknown tokens. @@ -1007,11 +1108,12 @@ namespace build2 hd.push_back ( here_doc { {rd}, - move (end), - (t.qtype == quote_type::unquoted || - t.qtype == quote_type::single), - move (mod), - r.intro, move (r.flags)}); + move (end), + (t.qtype == quote_type::unquoted || + t.qtype == quote_type::single), + move (mod), + r.intro, + move (r.flags)}); p = pending::none; mod.clear (); @@ -1024,16 +1126,34 @@ namespace build2 bool prog (p == pending::program_first || p == pending::program_next); - // Check if this is the env pseudo-builtin. + // Check if this is the env pseudo-builtin or the for-loop. // bool env (false); - if (prog && tt == type::word && t.value == "env") + if (prog && tt == type::word) { - parsed_env r (parse_env_builtin (t, tt)); - c.cwd = move (r.cwd); - c.variables = move (r.variables); - c.timeout = r.timeout; - env = true; + if (t.value == "env") + { + parsed_env r (parse_env_builtin (t, tt)); + c.cwd = move (r.cwd); + c.variables = move (r.variables); + c.timeout = r.timeout; + c.timeout_success = r.timeout_success; + env = true; + } + else if (t.value == "for") + { + if (expr.size () > 1) + fail (l) << "command expression involving for-loop"; + + for_loop = true; + + // Save 'for' as a program name and continue parsing as a + // command. + // + add_word (move (t.value), l); + next (t, tt); + continue; + } } // Parse the next chunk as names to get expansion, etc. Note that @@ -1092,16 +1212,17 @@ namespace build2 // Process what we got. // - // First see if this is a value that should not be re-lexed. The - // long term plan is to only re-lex values of a special type - // representing a canned command line. + // First see if this is a value that should not be re-lexed. We + // only re-lex values of the special `cmdline` type that + // represents a canned command line. // // Otherwise, determine whether anything inside was quoted (note // that the current token is "next" and is not part of this). // - bool q ( - (pr.value && !relex_) || - (quoted () - (t.qtype != quote_type::unquoted ? 1 : 0)) != 0); + bool lex ( + pr.value + ? pr.type != nullptr && pr.type->is_a<cmdline> () + : (quoted () - (t.qtype != quote_type::unquoted ? 1 : 0)) == 0); for (name& n: ns) { @@ -1115,7 +1236,7 @@ namespace build2 { diag_record dr (fail (l)); dr << "invalid string value "; - to_stream (dr.os, n, true /* quote */); + to_stream (dr.os, n, quote_mode::normal); } // If it is a quoted chunk, then we add the word as is. @@ -1123,10 +1244,7 @@ namespace build2 // interesting characters (operators plus quotes/escapes), // then no need to re-lex. // - // NOTE: update quoting (script.cxx:to_stream_q()) if adding - // any new characters. - // - if (q || s.find_first_of ("|&<>\'\"\\") == string::npos) + if (!lex || !need_cmdline_relex (s)) add_word (move (s), l); else { @@ -1216,9 +1334,16 @@ namespace build2 switch (tt) { case type::pipe: + if (for_loop) + fail (l) << "for-loop must be last command in a pipe"; + // Fall through. + case type::log_or: case type::log_and: { + if (for_loop) + fail (l) << "command expression involving for-loop"; + // Check that the previous command makes sense. // check_command (l, tt != type::pipe); @@ -1238,30 +1363,11 @@ namespace build2 break; } - case type::in_pass: - case type::out_pass: - - case type::in_null: - case type::out_null: - - case type::out_trace: - - case type::out_merge: - - case type::in_str: - case type::out_str: - - case type::in_file: - case type::out_file_cmp: - case type::out_file_ovr: - case type::out_file_app: - { - parse_redirect (move (t), tt, l); - break; - } - case type::clean: { + if (for_loop) + fail (l) << "cleanup in for-loop"; + parse_clean (t); break; } @@ -1272,6 +1378,27 @@ namespace build2 fail (l) << "here-document redirect in expansion"; break; } + + case type::out_pass: + case type::out_null: + case type::out_trace: + case type::out_merge: + case type::out_str: + case type::out_file_cmp: + case type::out_file_ovr: + case type::out_file_app: + if (for_loop) + fail (l) << "output redirect in for-loop"; + // Fall through. + + case type::in_pass: + case type::in_null: + case type::in_str: + case type::in_file: + { + parse_redirect (move (t), tt, l); + break; + } } } @@ -1299,7 +1426,7 @@ namespace build2 expr.back ().pipe.push_back (move (c)); } - return make_pair (move (expr), move (hd)); + return parse_command_expr_result {move (expr), move (hd), for_loop}; } parser::parsed_env parser:: @@ -1351,7 +1478,7 @@ namespace build2 { diag_record dr (fail (l)); dr << "invalid string value "; - to_stream (dr.os, n, true /* quote */); + to_stream (dr.os, n, quote_mode::normal); } } @@ -1475,6 +1602,10 @@ namespace build2 { r.timeout = chrono::seconds (*v); } + else if (o == "-s" || o == "--timeout-success") + { + r.timeout_success = true; + } else if (optional<dir_path> v = dir ("--cwd", "-c")) { r.cwd = move (*v); @@ -1489,6 +1620,9 @@ namespace build2 break; } + if (r.timeout_success && !r.timeout) + fail (l) << "env: -s|--timeout-success specified without -t|--timeout"; + // Parse arguments (variable sets). // for (; i != e; ++i) @@ -1537,7 +1671,7 @@ namespace build2 diag_record dr; dr << fail (l) << "expected exit status instead of "; - to_stream (dr.os, ns, true /* quote */); + to_stream (dr.os, ns, quote_mode::normal); dr << info << "exit status is an unsigned integer less than 256"; } @@ -1548,7 +1682,7 @@ namespace build2 void parser:: parse_here_documents (token& t, type& tt, - pair<command_expr, here_docs>& p) + parse_command_expr_result& pr) { // enter: newline // leave: newline @@ -1556,7 +1690,7 @@ namespace build2 // Parse here-document fragments in the order they were mentioned on // the command line. // - for (here_doc& h: p.second) + for (here_doc& h: pr.docs) { // Switch to the here-line mode which is like single/double-quoted // string but recognized the newline as a separator. @@ -1576,7 +1710,7 @@ namespace build2 { auto i (h.redirects.cbegin ()); - command& c (p.first[i->expr].pipe[i->pipe]); + command& c (pr.expr[i->expr].pipe[i->pipe]); optional<redirect>& r (i->fd == 0 ? c.in : i->fd == 1 ? c.out : @@ -1608,7 +1742,7 @@ namespace build2 // for (++i; i != h.redirects.cend (); ++i) { - command& c (p.first[i->expr].pipe[i->pipe]); + command& c (pr.expr[i->expr].pipe[i->pipe]); optional<redirect>& ir (i->fd == 0 ? c.in : i->fd == 1 ? c.out : @@ -2034,6 +2168,8 @@ namespace build2 else if (n == "elif") r = line_type::cmd_elif; else if (n == "elif!") r = line_type::cmd_elifn; else if (n == "else") r = line_type::cmd_else; + else if (n == "while") r = line_type::cmd_while; + else if (n == "for") r = line_type::cmd_for_stream; else if (n == "end") r = line_type::cmd_end; else { @@ -2064,8 +2200,9 @@ namespace build2 exec_lines (lines::const_iterator i, lines::const_iterator e, const function<exec_set_function>& exec_set, const function<exec_cmd_function>& exec_cmd, - const function<exec_if_function>& exec_if, - size_t& li, + const function<exec_cond_function>& exec_cond, + const function<exec_for_function>& exec_for, + const iteration_index* ii, size_t& li, variable_pool* var_pool) { try @@ -2089,6 +2226,73 @@ namespace build2 next (t, tt); const location ll (get_location (t)); + // If end is true, then find the flow control construct's end ('end' + // line). Otherwise, find the flow control construct's block end + // ('end', 'else', etc). If skip is true then increment the command + // line index. + // + auto fcend = [e, &li] (lines::const_iterator j, + bool end, + bool skip) -> lines::const_iterator + { + // We need to be aware of nested flow control constructs. + // + size_t n (0); + + for (++j; j != e; ++j) + { + line_type lt (j->type); + + if (lt == line_type::cmd_if || + lt == line_type::cmd_ifn || + lt == line_type::cmd_while || + lt == line_type::cmd_for_stream || + lt == line_type::cmd_for_args) + ++n; + + // If we are nested then we just wait until we get back + // to the surface. + // + if (n == 0) + { + switch (lt) + { + case line_type::cmd_elif: + case line_type::cmd_elifn: + case line_type::cmd_else: + if (end) break; + // Fall through. + case line_type::cmd_end: return j; + default: break; + } + } + + if (lt == line_type::cmd_end) + --n; + + if (skip) + { + // Note that we don't count else, end, and 'for x: ...' as + // commands. + // + switch (lt) + { + case line_type::cmd: + case line_type::cmd_if: + case line_type::cmd_ifn: + case line_type::cmd_elif: + case line_type::cmd_elifn: + case line_type::cmd_for_stream: + case line_type::cmd_while: ++li; break; + default: break; + } + } + } + + assert (false); // Missing end. + return e; + }; + switch (lt) { case line_type::var: @@ -2124,7 +2328,10 @@ namespace build2 single = true; } - exec_cmd (t, tt, li++, single, ll); + exec_cmd (t, tt, + ii, li++, single, + nullptr /* command_function */, + ll); replay_stop (); break; @@ -2140,7 +2347,7 @@ namespace build2 bool take; if (lt != line_type::cmd_else) { - take = exec_if (t, tt, li++, ll); + take = exec_cond (t, tt, ii, li++, ll); if (lt == line_type::cmd_ifn || lt == line_type::cmd_elifn) take = !take; @@ -2153,97 +2360,383 @@ namespace build2 replay_stop (); - // If end is true, then find the 'end' line. Otherwise, find - // the next if-else line. If skip is true then increment the - // command line index. + // If we are taking this branch then we need to parse all the + // lines until the next if-else line and then skip all the lines + // until the end (unless we are already at the end). + // + // Otherwise, we need to skip all the lines until the next + // if-else line and then continue parsing. // - auto next = [e, &li] (lines::const_iterator j, - bool end, - bool skip) -> lines::const_iterator + if (take) + { + // Find block end. + // + lines::const_iterator j (fcend (i, false, false)); + + if (!exec_lines (i + 1, j, + exec_set, exec_cmd, exec_cond, exec_for, + ii, li, + var_pool)) + return false; + + // Find construct end. + // + i = j->type == line_type::cmd_end ? j : fcend (j, true, true); + } + else + { + // Find block end. + // + i = fcend (i, false, true); + + if (i->type != line_type::cmd_end) + --i; // Continue with this line (e.g., elif or else). + } + + break; + } + case line_type::cmd_while: + { + // The while-loop construct end. Set on the first iteration. + // + lines::const_iterator we (e); + + size_t wli (li); + + for (iteration_index wi {1, ii};; wi.index++) + { + next (t, tt); // Skip to start of command. + + bool exec (exec_cond (t, tt, &wi, li++, ll)); + + replay_stop (); + + // If the condition evaluates to true, then we need to parse + // all the lines until the end line, prepare for the condition + // reevaluation, and re-iterate. + // + // Otherwise, we need to skip all the lines until the end + // line, bail out from the loop, and continue parsing. + // + if (exec) { - // We need to be aware of nested if-else chains. + // Find the construct end, if it is not found yet. + // + if (we == e) + we = fcend (i, true, false); + + if (!exec_lines (i + 1, we, + exec_set, exec_cmd, exec_cond, exec_for, + &wi, li, + var_pool)) + return false; + + // Prepare for the condition reevaluation. // - size_t n (0); + replay_data (replay_tokens (ln.tokens)); + next (t, tt); + li = wli; + } + else + { + // Position to the construct end, always incrementing the + // line index (skip is true). + // + i = fcend (i, true, true); + break; // Bail out from the while-loop. + } + } + + break; + } + case line_type::cmd_for_stream: + { + // The for-loop construct end. Set on the first iteration. + // + lines::const_iterator fe (e); - for (++j; j != e; ++j) + // Let's "wrap up" all the required data into the single object + // to rely on the "small function object" optimization. + // + struct loop_data + { + lines::const_iterator i; + lines::const_iterator e; + const function<exec_set_function>& exec_set; + const function<exec_cmd_function>& exec_cmd; + const function<exec_cond_function>& exec_cond; + const function<exec_for_function>& exec_for; + const iteration_index* ii; + size_t& li; + variable_pool* var_pool; + decltype (fcend)& fce; + lines::const_iterator& fe; + } ld {i, e, + exec_set, exec_cmd, exec_cond, exec_for, + ii, li, + var_pool, + fcend, + fe}; + + function<command_function> cf ( + [&ld, this] + (environment& env, + const strings& args, + auto_fd in, + pipe_command* pipe, + const optional<deadline>& dl, + const location& ll) + { + namespace cli = build2::build::cli; + + try { - line_type lt (j->type); + // Parse arguments. + // + cli::vector_scanner scan (args); + for_options ops (scan); - if (lt == line_type::cmd_if || lt == line_type::cmd_ifn) - ++n; + // Note: diagnostics consistent with the set builtin. + // + if (ops.whitespace () && ops.newline ()) + fail (ll) << "for: both -n|--newline and " + << "-w|--whitespace specified"; - // If we are nested then we just wait until we get back - // to the surface. + if (!scan.more ()) + fail (ll) << "for: missing variable name"; + + string vname (scan.next ()); + if (vname.empty ()) + fail (ll) << "for: empty variable name"; + + // Detect patterns analogous to parse_variable_name() (so + // we diagnose `for x[string]`). + // + if (vname.find_first_of ("[*?") != string::npos) + fail (ll) << "for: expected variable name instead of " + << vname; + + // Let's also diagnose the `... | for x:...` misuse which + // can probably be quite common. // - if (n == 0) + if (vname.find (':') != string::npos) + fail (ll) << "for: ':' after variable name"; + + string attrs; + if (scan.more ()) { - switch (lt) - { - case line_type::cmd_elif: - case line_type::cmd_elifn: - case line_type::cmd_else: - if (end) break; - // Fall through. - case line_type::cmd_end: return j; - default: break; - } + attrs = scan.next (); + + if (attrs.empty ()) + fail (ll) << "for: empty variable attributes"; + + if (scan.more ()) + fail (ll) << "for: unexpected argument '" + << scan.next () << "'"; } - if (lt == line_type::cmd_end) - --n; + // Since the command pipe is parsed, we can stop + // replaying. Note that we should do this before calling + // exec_lines() for the loop body. Also note that we + // should increment the line index before that. + // + replay_stop (); + + size_t fli (++ld.li); + iteration_index fi {1, ld.ii}; - if (skip) + // Let's "wrap up" all the required data into the single + // object to rely on the "small function object" + // optimization. + // + struct { - // Note that we don't count else and end as commands. - // - switch (lt) + loop_data& ld; + environment& env; + const string& vname; + const string& attrs; + const location& ll; + size_t fli; + iteration_index& fi; + + } d {ld, env, vname, attrs, ll, fli, fi}; + + function<void (string&&)> f ( + [&d, this] (string&& s) { - case line_type::cmd: - case line_type::cmd_if: - case line_type::cmd_ifn: - case line_type::cmd_elif: - case line_type::cmd_elifn: ++li; break; - default: break; - } - } + loop_data& ld (d.ld); + + ld.li = d.fli; + + // Don't move from the variable name since it is used + // on each iteration. + // + d.env.set_variable (d.vname, + names {name (move (s))}, + d.attrs, + d.ll); + + // Find the construct end, if it is not found yet. + // + if (ld.fe == ld.e) + ld.fe = ld.fce (ld.i, true, false); + + if (!exec_lines (ld.i + 1, ld.fe, + ld.exec_set, + ld.exec_cmd, + ld.exec_cond, + ld.exec_for, + &d.fi, ld.li, + ld.var_pool)) + { + throw exit (true); + } + + d.fi.index++; + }); + + read (move (in), + !ops.newline (), ops.newline (), ops.exact (), + f, + pipe, + dl, + ll, + "for"); + } + catch (const cli::exception& e) + { + fail (ll) << "for: " << e; } + }); - assert (false); // Missing end. - return e; - }; + exec_cmd (t, tt, ii, li, false /* single */, cf, ll); - // If we are taking this branch then we need to parse all the - // lines until the next if-else line and then skip all the - // lines until the end (unless next is already end). + // Position to construct end. // - // Otherwise, we need to skip all the lines until the next - // if-else line and then continue parsing. + i = (fe != e ? fe : fcend (i, true, true)); + + break; + } + case line_type::cmd_for_args: + { + // Parse the variable name. // - if (take) + next (t, tt); + + assert (tt == type::word && t.qtype == quote_type::unquoted); + + string vn (move (t.value)); + + // Enter the variable into the pool if this is not done during + // the script parsing (see the var line type handling for + // details). + // + const variable* var (ln.var); + + if (var == nullptr) { - // Next if-else. - // - lines::const_iterator j (next (i, false, false)); - if (!exec_lines (i + 1, j, - exec_set, exec_cmd, exec_if, - li, - var_pool)) - return false; + assert (var_pool != nullptr); - i = j->type == line_type::cmd_end ? j : next (j, true, true); + var = &var_pool->insert (move (vn)); } - else + + // Parse the potential element attributes and skip the colon. + // + next_with_attributes (t, tt); + attributes_push (t, tt); + + assert (tt == type::colon); + + // Save element attributes so that we can inject them on each + // iteration. + // + attributes val_attrs (attributes_pop ()); + + // Parse the value with the potential attributes. + // + // Note that we don't really need to change the mode since we + // are replaying the tokens. + // + value val; + apply_value_attributes (nullptr /* variable */, + val, + parse_variable_line (t, tt), + type::assign); + + replay_stop (); + + // If the value is not NULL then iterate over its elements, + // assigning them to the for-loop variable, and parsing all the + // construct lines afterwards. Then position to the end line of + // the construct and continue parsing. + + // The for-loop construct end. Set on the first iteration. + // + lines::const_iterator fe (e); + + if (val) { - i = next (i, false, true); - if (i->type != line_type::cmd_end) - --i; // Continue with this line (e.g., elif or else). + // If this value is a vector, then save its element type so + // that we can typify each element below. + // + const value_type* etype (nullptr); + + if (val.type != nullptr) + { + etype = val.type->element_type; + + // Note that here we don't want to be reducing empty simple + // values to empty lists. + // + untypify (val, false /* reduce */); + } + + size_t fli (li); + iteration_index fi {1, ii}; + names& ns (val.as<names> ()); + + for (auto ni (ns.begin ()), ne (ns.end ()); ni != ne; ++ni) + { + li = fli; + + // Set the variable value. + // + bool pair (ni->pair); + names n; + n.push_back (move (*ni)); + if (pair) n.push_back (move (*++ni)); + value v (move (n)); // Untyped. + + if (etype != nullptr) + typify (v, *etype, var); + + exec_for (*var, move (v), val_attrs, ll); + + // Find the construct end, if it is not found yet. + // + if (fe == e) + fe = fcend (i, true, false); + + if (!exec_lines (i + 1, fe, + exec_set, exec_cmd, exec_cond, exec_for, + &fi, li, + var_pool)) + return false; + + fi.index++; + } } + // Position to construct end. + // + i = (fe != e ? fe : fcend (i, true, true)); + break; } case line_type::cmd_end: { assert (false); + break; } } } @@ -2278,7 +2771,7 @@ namespace build2 } parser::parsed_doc:: - parsed_doc (parsed_doc&& d) + parsed_doc (parsed_doc&& d) noexcept : re (d.re), end_line (d.end_line), end_column (d.end_column) { if (re) |