diff options
Diffstat (limited to 'libbuild2/test/script/parser.cxx')
-rw-r--r-- | libbuild2/test/script/parser.cxx | 558 |
1 files changed, 404 insertions, 154 deletions
diff --git a/libbuild2/test/script/parser.cxx b/libbuild2/test/script/parser.cxx index 9e92f3b..337b162 100644 --- a/libbuild2/test/script/parser.cxx +++ b/libbuild2/test/script/parser.cxx @@ -293,22 +293,30 @@ namespace build2 } // Parse a logical line (as well as scope-if since the only way to - // recognize it is to parse the if line). + // recognize it is to parse the if line), handling the flow control + // constructs recursively. // // If one is true then only parse one line returning an indication of - // whether the line ended with a semicolon. If if_line is true then this - // line can be an if-else construct flow control line (else, end, etc). + // whether the line ended with a semicolon. If the flow control + // construct type is specified, then this line is assumed to belong to + // such construct. // bool parser:: pre_parse_line (token& t, type& tt, optional<description>& d, lines* ls, bool one, - bool if_line) + optional<line_type> fct) { // enter: next token is peeked at (type in tt) // leave: newline + assert (!fct || + *fct == line_type::cmd_if || + *fct == line_type::cmd_while || + *fct == line_type::cmd_for_stream || + *fct == line_type::cmd_for_args); + // Note: token is only peeked at. // const location ll (get_location (peeked ())); @@ -317,6 +325,52 @@ namespace build2 // line_type lt; type st (type::eos); // Later, can only be set to plus or minus. + bool semi (false); + + // Parse the command line tail, starting from the newline or the + // potential colon/semicolon token. + // + // Note that colon and semicolon are only valid in test command lines + // and after 'end' in flow control constructs. Note that we always + // recognize them lexically, even when they are not valid tokens per + // the grammar. + // + auto parse_command_tail = [&t, &tt, &st, <, &d, &semi, &ll, this] () + { + if (tt != type::newline) + { + if (lt != line_type::cmd && lt != line_type::cmd_end) + fail (t) << "expected newline instead of " << t; + + switch (st) + { + case type::plus: fail (t) << t << " after setup command" << endf; + case type::minus: fail (t) << t << " after teardown command" << endf; + } + } + + switch (tt) + { + case type::colon: + { + if (d) + fail (ll) << "both leading and trailing descriptions"; + + d = parse_trailing_description (t, tt); + break; + } + case type::semi: + { + semi = true; + replay_pop (); // See above for the reasoning. + next (t, tt); // Get newline. + break; + } + } + + if (tt != type::newline) + fail (t) << "expected newline instead of " << t; + }; switch (tt) { @@ -364,8 +418,12 @@ namespace build2 { const string& n (t.value); - if (n == "if") lt = line_type::cmd_if; - else if (n == "if!") lt = line_type::cmd_ifn; + // Handle the for-loop consistently with pre_parse_line_start(). + // + if (n == "if") lt = line_type::cmd_if; + else if (n == "if!") lt = line_type::cmd_ifn; + else if (n == "while") lt = line_type::cmd_while; + else if (n == "for") lt = line_type::cmd_for_stream; } break; @@ -379,8 +437,6 @@ namespace build2 // Pre-parse the line keeping track of whether it ends with a semi. // - bool semi (false); - line ln; switch (lt) { @@ -407,76 +463,147 @@ namespace build2 mode (lexer_mode::variable_line); parse_variable_line (t, tt); + // Note that the semicolon token is only required during + // pre-parsing to decide which line list the current line should + // go to and provides no additional semantics during the + // execution. Moreover, build2::script::parser::exec_lines() + // doesn't expect this token to be present. Thus, we just drop + // this token from the saved tokens. + // semi = (tt == type::semi); - if (tt == type::semi) + if (semi) + { + replay_pop (); next (t, tt); + } if (tt != type::newline) fail (t) << "expected newline instead of " << t; break; } + // + // See pre_parse_line_start() for details. + // + case line_type::cmd_for_args: assert (false); break; + case line_type::cmd_for_stream: + { + // First we need to sense the next few tokens and detect which + // form of the for-loop that actually is (see + // libbuild2/build/script/parser.cxx for details). + // + token pt (t); + assert (pt.type == type::word && pt.value == "for"); + + mode (lexer_mode::for_loop); + next (t, tt); + + string& n (t.value); + + if (tt == type::word && t.qtype == quote_type::unquoted && + (n[0] == '_' || alpha (n[0]) || // Variable. + n == "*" || n == "~" || n == "@")) // Special variable. + { + // Detect patterns analogous to parse_variable_name() (so we + // diagnose `for x[string]: ...`). + // + if (n.find_first_of ("[*?") != string::npos) + fail (t) << "expected variable name instead of " << n; + + if (special_variable (n)) + fail (t) << "attempt to set '" << n << "' variable directly"; + + if (lexer_->peek_char ().first == '[') + { + token vt (move (t)); + next_with_attributes (t, tt); + + attributes_push (t, tt, + true /* standalone */, + false /* next_token */); + + t = move (vt); + tt = t.type; + } + + if (lexer_->peek_char ().first == ':') + lt = line_type::cmd_for_args; + } + + if (lt == line_type::cmd_for_stream) // for x <... + { + ln.var = nullptr; + + expire_mode (); + + parse_command_expr_result r ( + parse_command_expr (t, tt, + lexer::redirect_aliases, + move (pt))); + + assert (r.for_loop); + + parse_command_tail (); + parse_here_documents (t, tt, r); + } + else // for x: ... + { + ln.var = &script_->var_pool.insert (move (n)); + + next (t, tt); + + assert (tt == type::colon); + + expire_mode (); + + // Parse the value similar to the var line type (see above), + // except for the fact that we don't expect a trailing semicolon. + // + mode (lexer_mode::variable_line); + parse_variable_line (t, tt); + + if (tt != type::newline) + fail (t) << "expected newline instead of " << t << " after for"; + } + + break; + } case line_type::cmd_elif: case line_type::cmd_elifn: case line_type::cmd_else: - case line_type::cmd_end: { - if (!if_line) - { + if (!fct || *fct != line_type::cmd_if) fail (t) << lt << " without preceding 'if'"; - } + } + // Fall through. + case line_type::cmd_end: + { + if (!fct) + fail (t) << lt << " without preceding 'if', 'for', or 'while'"; } // Fall through. case line_type::cmd_if: case line_type::cmd_ifn: + case line_type::cmd_while: next (t, tt); // Skip to start of command. // Fall through. case line_type::cmd: { - pair<command_expr, here_docs> p; + parse_command_expr_result r; if (lt != line_type::cmd_else && lt != line_type::cmd_end) - p = parse_command_expr (t, tt, lexer::redirect_aliases); - - // Colon and semicolon are only valid in test command lines and - // after 'end' in if-else. Note that we still recognize them - // lexically, they are just not valid tokens per the grammar. - // - if (tt != type::newline) - { - if (lt != line_type::cmd && lt != line_type::cmd_end) - fail (t) << "expected newline instead of " << t; - - switch (st) - { - case type::plus: fail (t) << t << " after setup command" << endf; - case type::minus: fail (t) << t << " after teardown command" << endf; - } - } + r = parse_command_expr (t, tt, lexer::redirect_aliases); - switch (tt) + if (r.for_loop) { - case type::colon: - { - if (d) - fail (ll) << "both leading and trailing descriptions"; - - d = parse_trailing_description (t, tt); - break; - } - case type::semi: - { - semi = true; - next (t, tt); // Get newline. - break; - } + lt = line_type::cmd_for_stream; + ln.var = nullptr; } - if (tt != type::newline) - fail (t) << "expected newline instead of " << t; + parse_command_tail (); + parse_here_documents (t, tt, r); - parse_here_documents (t, tt, p); break; } } @@ -494,24 +621,39 @@ namespace build2 ln.tokens = replay_data (); ls->push_back (move (ln)); - if (lt == line_type::cmd_if || lt == line_type::cmd_ifn) + switch (lt) { - semi = pre_parse_if_else (t, tt, d, *ls); + case line_type::cmd_if: + case line_type::cmd_ifn: + { + semi = pre_parse_if_else (t, tt, d, *ls); - // If this turned out to be scope-if, then ls is empty, semi is - // false, and none of the below logic applies. - // - if (ls->empty ()) - return semi; + // If this turned out to be scope-if, then ls is empty, semi is + // false, and none of the below logic applies. + // + if (ls->empty ()) + return semi; + + break; + } + case line_type::cmd_while: + case line_type::cmd_for_stream: + case line_type::cmd_for_args: + { + semi = pre_parse_loop (t, tt, lt, d, *ls); + break; + } + default: break; } // Unless we were told where to put it, decide where it actually goes. // if (ls == &ls_data) { - // First pre-check variable and variable-if: by themselves (i.e., - // without a trailing semicolon) they are treated as either setup or - // teardown without plus/minus. Also handle illegal line types. + // First pre-check variables and variable-only flow control + // constructs: by themselves (i.e., without a trailing semicolon) + // they are treated as either setup or teardown without + // plus/minus. Also handle illegal line types. // switch (lt) { @@ -524,8 +666,11 @@ namespace build2 } case line_type::cmd_if: case line_type::cmd_ifn: + case line_type::cmd_while: + case line_type::cmd_for_stream: + case line_type::cmd_for_args: { - // See if this is a variable-only command-if. + // See if this is a variable-only flow control construct. // if (find_if (ls_data.begin (), ls_data.end (), [] (const line& l) { @@ -549,7 +694,7 @@ namespace build2 fail (ll) << "description before setup/teardown variable"; else fail (ll) << "description before/after setup/teardown " - << "variable-if"; + << "variable-only " << lt; } // If we don't have any nested scopes or teardown commands, @@ -793,7 +938,7 @@ namespace build2 td, &ls, true /* one */, - true /* if_line */)); + line_type::cmd_if)); assert (ls.size () == 1 && ls.back ().type == lt); assert (tt == type::newline); @@ -831,6 +976,99 @@ namespace build2 return false; // We never end with a semi. } + // Pre-parse the flow control construct block line. Fail if the line is + // unexpectedly followed with a semicolon or test description. + // + bool parser:: + pre_parse_block_line (token& t, type& tt, + line_type bt, + optional<description>& d, + lines& ls) + { + // enter: peeked first token of the line (type in tt) + // leave: newline + + const location ll (get_location (peeked ())); + + switch (tt) + { + case type::colon: + fail (ll) << "description inside " << bt << endf; + case type::eos: + case type::rcbrace: + case type::lcbrace: + fail (ll) << "expected closing 'end'" << endf; + case type::plus: + fail (ll) << "setup command inside " << bt << endf; + case type::minus: + fail (ll) << "teardown command inside " << bt << endf; + } + + // Parse one line. Note that this one line can still be multiple lines + // in case of a flow control construct. In this case we want to view + // it as, for example, cmd_if, not cmd_end. Thus remember the start + // position of the next logical line. + // + size_t i (ls.size ()); + + line_type fct; // Flow control construct type the block type relates to. + + switch (bt) + { + case line_type::cmd_if: + case line_type::cmd_ifn: + case line_type::cmd_elif: + case line_type::cmd_elifn: + case line_type::cmd_else: + { + fct = line_type::cmd_if; + break; + } + case line_type::cmd_while: + case line_type::cmd_for_stream: + case line_type::cmd_for_args: + { + fct = bt; + break; + } + default: assert(false); + } + + optional<description> td; + bool semi (pre_parse_line (t, tt, td, &ls, true /* one */, fct)); + + assert (tt == type::newline); + + line_type lt (ls[i].type); + + // First take care of 'end'. + // + if (lt == line_type::cmd_end) + { + if (td) + { + if (d) + fail (ll) << "both leading and trailing descriptions"; + + d = move (td); + } + + return semi; + } + + // For any other line trailing semi or description is illegal. + // + // @@ Not the exact location of semi/colon. + // + if (semi) + fail (ll) << "';' inside " << bt; + + if (td) + fail (ll) << "description inside " << bt; + + return false; + } + bool parser:: pre_parse_if_else_command (token& t, type& tt, optional<description>& d, @@ -839,70 +1077,23 @@ namespace build2 // enter: peeked first token of next line (type in tt) // leave: newline - // Parse lines until we see closing 'end'. Nested if-else blocks are - // handled recursively. + // Parse lines until we see closing 'end'. // for (line_type bt (line_type::cmd_if); // Current block. ; tt = peek (lexer_mode::first_token)) { const location ll (get_location (peeked ())); - - switch (tt) - { - case type::colon: - fail (ll) << "description inside " << bt << endf; - case type::eos: - case type::rcbrace: - case type::lcbrace: - fail (ll) << "expected closing 'end'" << endf; - case type::plus: - fail (ll) << "setup command inside " << bt << endf; - case type::minus: - fail (ll) << "teardown command inside " << bt << endf; - } - - // Parse one line. Note that this one line can still be multiple - // lines in case of if-else. In this case we want to view it as - // cmd_if, not cmd_end. Thus remember the start position of the - // next logical line. - // size_t i (ls.size ()); - optional<description> td; - bool semi (pre_parse_line (t, tt, - td, - &ls, - true /* one */, - true /* if_line */)); - assert (tt == type::newline); + bool semi (pre_parse_block_line (t, tt, bt, d, ls)); line_type lt (ls[i].type); // First take care of 'end'. // if (lt == line_type::cmd_end) - { - if (td) - { - if (d) - fail (ll) << "both leading and trailing descriptions"; - - d = move (td); - } - return semi; - } - - // For any other line trailing semi or description is illegal. - // - // @@ Not the exact location of semi/colon. - // - if (semi) - fail (ll) << "';' inside " << bt; - - if (td) - fail (ll) << "description inside " << bt; // Check if-else block sequencing. // @@ -924,6 +1115,40 @@ namespace build2 default: break; } } + + assert (false); // Can't be here. + return false; + } + + bool parser:: + pre_parse_loop (token& t, type& tt, + line_type lt, + optional<description>& d, + lines& ls) + { + // enter: <newline> (previous line) + // leave: <newline> + + assert (lt == line_type::cmd_while || + lt == line_type::cmd_for_stream || + lt == line_type::cmd_for_args); + + tt = peek (lexer_mode::first_token); + + // Parse lines until we see closing 'end'. + // + for (;; tt = peek (lexer_mode::first_token)) + { + size_t i (ls.size ()); + + bool semi (pre_parse_block_line (t, tt, lt, d, ls)); + + if (ls[i].type == line_type::cmd_end) + return semi; + } + + assert (false); // Can't be here. + return false; } void parser:: @@ -1057,7 +1282,7 @@ namespace build2 diag_record dr (fail (dl)); dr << "invalid testscript include path "; - to_stream (dr.os, n, true); // Quote. + to_stream (dr.os, n, quote_mode::normal); } } @@ -1266,21 +1491,18 @@ namespace build2 // Note: this one is only used during execution. - pair<command_expr, here_docs> p ( + parse_command_expr_result pr ( parse_command_expr (t, tt, lexer::redirect_aliases)); - switch (tt) - { - case type::colon: parse_trailing_description (t, tt); break; - case type::semi: next (t, tt); break; // Get newline. - } + if (tt == type::colon) + parse_trailing_description (t, tt); assert (tt == type::newline); - parse_here_documents (t, tt, p); + parse_here_documents (t, tt, pr); assert (tt == type::newline); - command_expr r (move (p.first)); + command_expr r (move (pr.expr)); // If the test program runner is specified, then adjust the // expressions to run test programs via this runner. @@ -1387,6 +1609,17 @@ namespace build2 { runner_->enter (*scope_, scope_->start_loc_); + // Set thread-specific current directory override. In particular, this + // makes sure functions like $path.complete() work correctly. + // + auto wdg = make_guard ( + [old = path_traits::thread_current_directory ()] () + { + path_traits::thread_current_directory (old); + }); + + path_traits::thread_current_directory (&scope_->work_dir.path->string ()); + // Note that we rely on "small function object" optimization for the // exec_*() lambdas. // @@ -1402,9 +1635,6 @@ namespace build2 mode (lexer_mode::variable_line); value rhs (parse_variable_line (t, tt)); - if (tt == type::semi) - next (t, tt); - assert (tt == type::newline); // Assign. @@ -1424,8 +1654,9 @@ namespace build2 command_type ct; auto exec_cmd = [&ct, this] (token& t, build2::script::token_type& tt, - size_t li, + const iteration_index* ii, size_t li, bool single, + const function<command_function>& cf, const location& ll) { // We use the 0 index to signal that this is the only command. @@ -1437,19 +1668,35 @@ namespace build2 command_expr ce ( parse_command_line (t, static_cast<token_type&> (tt))); - runner_->run (*scope_, ce, ct, li, ll); + runner_->run (*scope_, ce, ct, ii, li, cf, ll); }; - auto exec_if = [this] (token& t, build2::script::token_type& tt, - size_t li, - const location& ll) + auto exec_cond = [this] (token& t, build2::script::token_type& tt, + const iteration_index* ii, size_t li, + const location& ll) { command_expr ce ( parse_command_line (t, static_cast<token_type&> (tt))); - // Assume if-else always involves multiple commands. + // Assume a flow control construct always involves multiple + // commands. // - return runner_->run_if (*scope_, ce, li, ll); + return runner_->run_cond (*scope_, ce, ii, li, ll); + }; + + auto exec_for = [this] (const variable& var, + value&& val, + const attributes& val_attrs, + const location&) + { + value& lhs (scope_->assign (var)); + + attributes_.push_back (val_attrs); + + apply_value_attributes (&var, lhs, move (val), type::assign); + + if (script_->test_command_var (var.name)) + scope_->reset_special (); }; size_t li (1); @@ -1459,16 +1706,17 @@ namespace build2 ct = command_type::test; exec_lines (t->tests_.begin (), t->tests_.end (), - exec_set, exec_cmd, exec_if, - li); + exec_set, exec_cmd, exec_cond, exec_for, + nullptr /* iteration_index */, li); } else if (group* g = dynamic_cast<group*> (scope_)) { ct = command_type::setup; - bool exec_scope (exec_lines (g->setup_.begin (), g->setup_.end (), - exec_set, exec_cmd, exec_if, - li)); + bool exec_scope ( + exec_lines (g->setup_.begin (), g->setup_.end (), + exec_set, exec_cmd, exec_cond, exec_for, + nullptr /* iteration_index */, li)); if (exec_scope) { @@ -1526,7 +1774,8 @@ namespace build2 try { - take = runner_->run_if (*scope_, ce, li++, ll); + take = runner_->run_cond ( + *scope_, ce, nullptr /* iteration_index */, li++, ll); } catch (const exit_scope& e) { @@ -1593,24 +1842,24 @@ namespace build2 // UBSan workaround. // const diag_frame* df (diag_frame::stack ()); - if (!ctx.sched.async (task_count, - [] (const diag_frame* ds, - scope& s, - script& scr, - runner& r) - { - diag_frame::stack_guard dsg (ds); - execute_impl (s, scr, r); - }, - df, - ref (*chain), - ref (*script_), - ref (*runner_))) + if (!ctx->sched->async (task_count, + [] (const diag_frame* ds, + scope& s, + script& scr, + runner& r) + { + diag_frame::stack_guard dsg (ds); + execute_impl (s, scr, r); + }, + df, + ref (*chain), + ref (*script_), + ref (*runner_))) { // Bail out if the scope has failed and we weren't instructed // to keep going. // - if (chain->state == scope_state::failed && !ctx.keep_going) + if (chain->state == scope_state::failed && !ctx->keep_going) throw failed (); } } @@ -1637,8 +1886,8 @@ namespace build2 ct = command_type::teardown; exec_lines (g->tdown_.begin (), g->tdown_.end (), - exec_set, exec_cmd, exec_if, - li); + exec_set, exec_cmd, exec_cond, exec_for, + nullptr /* iteration_index */, li); } else assert (false); @@ -1652,7 +1901,8 @@ namespace build2 // The rest. // - // When add a special variable don't forget to update lexer::word(). + // When add a special variable don't forget to update lexer::word() and + // for-loop parsing in pre_parse_line(). // bool parser:: special_variable (const string& n) noexcept @@ -1661,7 +1911,7 @@ namespace build2 } lookup parser:: - lookup_variable (name&& qual, string&& name, const location& loc) + lookup_variable (names&& qual, string&& name, const location& loc) { if (pre_parse_) return lookup (); |