From 57b10c06925d0bdf6ffb38488ee908f085109e95 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Thu, 4 Jul 2019 19:12:15 +0300 Subject: Move config, dist, test, and install modules into library --- build2/test/script/parser.cxx | 3451 ----------------------------------------- 1 file changed, 3451 deletions(-) delete mode 100644 build2/test/script/parser.cxx (limited to 'build2/test/script/parser.cxx') diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx deleted file mode 100644 index 59b950f..0000000 --- a/build2/test/script/parser.cxx +++ /dev/null @@ -1,3451 +0,0 @@ -// file : build2/test/script/parser.cxx -*- C++ -*- -// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd -// license : MIT; see accompanying LICENSE file - -#include - -#include - -#include // sched, keep_going - -#include -#include - -using namespace std; - -namespace build2 -{ - namespace test - { - namespace script - { - using type = token_type; - - // Return true if the string contains only a single digit characters - // (used to detect the special $N variables). - // - static inline bool - digit (const string& s) - { - return s.size () == 1 && butl::digit (s[0]); - } - - // - // Pre-parse. - // - - void parser:: - pre_parse (script& s) - { - const path& p (s.script_target.path ()); - assert (!p.empty ()); // Should have been assigned. - - try - { - ifdstream ifs (p); - pre_parse (ifs, s); - } - catch (const io_error& e) - { - fail << "unable to read testscript " << p << ": " << e << endf; - } - } - - void parser:: - pre_parse (istream& is, script& s) - { - path_ = &*s.paths_.insert (s.script_target.path ()).first; - - pre_parse_ = true; - - lexer l (is, *path_, lexer_mode::command_line); - set_lexer (&l); - - id_prefix_.clear (); - - id_map idm; - include_set ins; - - script_ = &s; - runner_ = nullptr; - group_ = script_; - id_map_ = &idm; - include_set_ = &ins; - scope_ = nullptr; - - //@@ PAT TODO: set pbase_? - - // Start location of the implied script group is the beginning of - // the file. End location -- end of the file. - // - group_->start_loc_ = location (path_, 1, 1); - - token t (pre_parse_scope_body ()); - - if (t.type != type::eos) - fail (t) << "stray " << t; - - group_->end_loc_ = get_location (t); - } - - bool parser:: - pre_parse_demote_group_scope (unique_ptr& s) - { - // See if this turned out to be an explicit test scope. An explicit - // test scope contains a single test, only variable assignments in - // setup and nothing in teardown. Plus only the group can have the - // description. Because we apply this recursively, also disqualify - // a test scope that has an if-condition. - // - // If we have a chain, then all the scopes must be demotable. So we - // first check if this scope is demotable and if so then recurse for - // the next in chain. - // - group& g (static_cast (*s)); - - auto& sc (g.scopes); - auto& su (g.setup_); - auto& td (g.tdown_); - - test* t; - if (sc.size () == 1 && - (t = dynamic_cast (sc.back ().get ())) != nullptr && - find_if ( - su.begin (), su.end (), - [] (const line& l) { - return l.type != line_type::var; - }) == su.end () && - - td.empty () && - !t->desc && - !t->if_cond_) - { - if (g.if_chain != nullptr && - !pre_parse_demote_group_scope (g.if_chain)) - return false; - - // It would have been nice to reuse the test object and only throw - // away the group. However, the merged scope has to use id_path and - // wd_path of the group. So to keep things simple we are going to - // throw away both and create a new test object. - // - // We always use the group's id since the test cannot have a - // user-provided one. - // - unique_ptr m (new test (g.id_path.leaf ().string (), *group_)); - - // Move the description, if-condition, and if-chain. - // - m->desc = move (g.desc); - m->if_cond_ = move (g.if_cond_); - m->if_chain = move (g.if_chain); - - // Merge the lines of the group and the test. - // - if (su.empty ()) - m->tests_ = move (t->tests_); - else - { - m->tests_ = move (su); // Should come first. - m->tests_.insert (m->tests_.end (), - make_move_iterator (t->tests_.begin ()), - make_move_iterator (t->tests_.end ())); - } - - // Use start/end locations of the outer scope. - // - m->start_loc_ = g.start_loc_; - m->end_loc_ = g.end_loc_; - - s = move (m); - return true; - } - - return false; - } - - token parser:: - pre_parse_scope_body () - { - // enter: next token is first token of scope body - // leave: rcbrace or eos (returned) - - token t; - type tt; - - // Parse lines (including nested scopes) until we see '}' or eos. - // - for (;;) - { - // Start lexing each line recognizing leading '.+-{}'. - // - tt = peek (lexer_mode::first_token); - - // Handle description. - // - optional d; - if (tt == type::colon) - d = pre_parse_leading_description (t, tt); - - // Determine the line type by peeking at the first token. - // - switch (tt) - { - case type::eos: - case type::rcbrace: - { - next (t, tt); - - if (d) - fail (t) << "description before " << t; - - return t; - } - case type::lcbrace: - { - // Nested scope. - // - next (t, tt); // Get '{'. - const location sl (get_location (t)); - - // First check that we don't have any teardown commands yet. - // This will detect things like variable assignments between - // scopes. - // - if (!group_->tdown_.empty ()) - { - location tl ( - group_->tdown_.back ().tokens.front ().location ()); - - fail (sl) << "scope after teardown" << - info (tl) << "last teardown line appears here"; - } - - // If there is no user-supplied id, use the line number - // (prefixed with include id) as the scope id. - // - const string& id ( - d && !d->id.empty () - ? d->id - : insert_id (id_prefix_ + to_string (sl.line), sl)); - - unique_ptr g (pre_parse_scope_block (t, tt, id)); - g->desc = move (d); - - pre_parse_demote_group_scope (g); - group_->scopes.push_back (move (g)); - continue; - } - default: - { - pre_parse_line (t, tt, d); - assert (tt == type::newline); - break; - } - } - } - } - - unique_ptr parser:: - pre_parse_scope_block (token& t, type& tt, const string& id) - { - // enter: lcbrace - // leave: newline after rcbrace - - const location sl (get_location (t)); - - if (next (t, tt) != type::newline) - fail (t) << "expected newline after '{'"; - - // Push group. - // - id_map idm; - include_set ins; - - unique_ptr g (new group (id, *group_)); - - id_map* om (id_map_); - id_map_ = &idm; - - include_set* os (include_set_); - include_set_ = &ins; - - group* og (group_); - group_ = g.get (); - - // Parse body. - // - group_->start_loc_ = sl; - token e (pre_parse_scope_body ()); - group_->end_loc_ = get_location (e); - - // Pop group. - // - group_ = og; - include_set_ = os; - id_map_ = om; - - if (e.type != type::rcbrace) - fail (e) << "expected '}' at the end of the scope"; - - if (next (t, tt) != type::newline) - fail (t) << "expected newline after '}'"; - - return g; - } - - // Parse a logical line (as well as scope-if since the only way to - // recognize it is to parse the if line). - // - // If one is true then only parse one line returning an indication of - // whether the line ended with a semicolon. - // - bool parser:: - pre_parse_line (token& t, type& tt, - optional& d, - lines* ls, - bool one) - { - // enter: next token is peeked at (type in tt) - // leave: newline - - // Note: token is only peeked at. - // - const location ll (get_location (peeked ())); - - // Determine the line type/start token. - // - line_type lt; - type st (type::eos); - - switch (tt) - { - case type::dot: - { - // Directive. - // - next (t, tt); // Skip dot. - next (t, tt); // Get the directive name. - - if (tt != type::word || t.qtype != quote_type::unquoted) - fail (t) << "expected directive name instead of " << t; - - // Make sure we are not inside a test (i.e., after semi). - // - if (ls != nullptr) - fail (ll) << "directive after ';'"; - - const string& n (t.value); - - if (n == "include") - pre_parse_directive (t, tt); - else - fail (t) << "unknown directive '" << n << "'"; - - assert (tt == type::newline); - return false; - } - case type::plus: - case type::minus: - { - // Setup/teardown command. - // - st = tt; - - next (t, tt); // Start saving tokens from the next one. - replay_save (); - next (t, tt); - - // See if this is a special command. - // - lt = line_type::cmd; // Default. - - if (tt == type::word && t.qtype == quote_type::unquoted) - { - const string& n (t.value); - - if (n == "if") lt = line_type::cmd_if; - else if (n == "if!") lt = line_type::cmd_ifn; - } - - break; - } - default: - { - // Either variable assignment or test command. - // - replay_save (); // Start saving tokens from the current one. - next (t, tt); - - // Decide whether this is a variable assignment or a command. - // - // It is an assignment if the first token is an unquoted name and - // the next token is an assign/append/prepend operator. Assignment - // to a computed variable name must use the set builtin. - // - // Note also thatspecial commands take precedence over variable - // assignments. - // - lt = line_type::cmd; // Default. - - if (tt == type::word && t.qtype == quote_type::unquoted) - { - const string& n (t.value); - - if (n == "if") lt = line_type::cmd_if; - else if (n == "if!") lt = line_type::cmd_ifn; - else if (n == "elif") lt = line_type::cmd_elif; - else if (n == "elif!") lt = line_type::cmd_elifn; - else if (n == "else") lt = line_type::cmd_else; - else if (n == "end") lt = line_type::cmd_end; - else - { - // Switch the recognition of leading variable assignments for - // the next token. This is safe to do because we know we - // cannot be in the quoted mode (since the current token is - // not quoted). - // - type p (peek (lexer_mode::second_token)); - - if (p == type::assign || - p == type::prepend || - p == type::append) - { - lt = line_type::var; - st = p; - } - } - } - - break; - } - } - - // Pre-parse the line keeping track of whether it ends with a semi. - // - bool semi (false); - - line ln; - switch (lt) - { - case line_type::var: - { - // Check if we are trying to modify any of the special aliases - // ($*, $N, $~, $@). - // - string& n (t.value); - - if (n == "*" || n == "~" || n == "@" || digit (n)) - fail (t) << "attempt to set '" << n << "' variable directly"; - - // Pre-enter the variables now while we are executing serially. - // Once parallel, it becomes a lot harder to do. - // - ln.var = &script_->var_pool.insert (move (n)); - - next (t, tt); // Assignment kind. - parse_variable_line (t, tt); - - semi = (tt == type::semi); - - if (tt == type::semi) - next (t, tt); - - if (tt != type::newline) - fail (t) << "expected newline instead of " << t; - - break; - } - case line_type::cmd_if: - case line_type::cmd_ifn: - case line_type::cmd_elif: - case line_type::cmd_elifn: - case line_type::cmd_else: - case line_type::cmd_end: - next (t, tt); // Skip to start of command. - // Fall through. - case line_type::cmd: - { - pair p; - - if (lt != line_type::cmd_else && lt != line_type::cmd_end) - p = parse_command_expr (t, tt); - - // Colon and semicolon are only valid in test command lines and - // after 'end' in if-else. Note that we still recognize them - // lexically, they are just not valid tokens per the grammar. - // - if (tt != type::newline) - { - if (lt != line_type::cmd && lt != line_type::cmd_end) - fail (t) << "expected newline instead of " << t; - - switch (st) - { - case type::plus: fail (t) << t << " after setup command" << endf; - case type::minus: fail (t) << t << " after teardown command" << endf; - } - } - - switch (tt) - { - case type::colon: - { - if (d) - fail (ll) << "both leading and trailing descriptions"; - - d = parse_trailing_description (t, tt); - break; - } - case type::semi: - { - semi = true; - next (t, tt); // Get newline. - break; - } - } - - if (tt != type::newline) - fail (t) << "expected newline instead of " << t; - - parse_here_documents (t, tt, p); - break; - } - } - - assert (tt == type::newline); - - // Stop saving and get the tokens. - // - lines ls_data; - - if (ls == nullptr) - ls = &ls_data; - - ln.type = lt; - ln.tokens = replay_data (); - ls->push_back (move (ln)); - - if (lt == line_type::cmd_if || lt == line_type::cmd_ifn) - { - semi = pre_parse_if_else (t, tt, d, *ls); - - // If this turned out to be scope-if, then ls is empty, semi is - // false, and none of the below logic applies. - // - if (ls->empty ()) - return semi; - } - - // Unless we were told where to put it, decide where it actually goes. - // - if (ls == &ls_data) - { - // First pre-check variable and variable-if: by themselves (i.e., - // without a trailing semicolon) they are treated as either setup or - // teardown without plus/minus. Also handle illegal line types. - // - switch (lt) - { - case line_type::cmd_elif: - case line_type::cmd_elifn: - case line_type::cmd_else: - case line_type::cmd_end: - { - fail (ll) << lt << " without preceding 'if'" << endf; - } - case line_type::cmd_if: - case line_type::cmd_ifn: - { - // See if this is a variable-only command-if. - // - if (find_if (ls_data.begin (), ls_data.end (), - [] (const line& l) { - return l.type == line_type::cmd; - }) != ls_data.end ()) - break; - } - // Fall through. - case line_type::var: - { - // If there is a semicolon after the variable then we assume - // it is part of a test (there is no reason to use semicolons - // after variables in the group scope). Otherwise -- setup or - // teardown. - // - if (!semi) - { - if (d) - { - if (lt == line_type::var) - fail (ll) << "description before setup/teardown variable"; - else - fail (ll) << "description before/after setup/teardown " - << "variable-if"; - } - - // If we don't have any nested scopes or teardown commands, - // then we assume this is a setup, otherwise -- teardown. - // - ls = group_->scopes.empty () && group_->tdown_.empty () - ? &group_->setup_ - : &group_->tdown_; - } - break; - } - default: - break; - } - - // If pre-check didn't change the destination, then it's a test. - // - if (ls == &ls_data) - { - switch (st) - { - // Setup. - // - case type::plus: - { - if (d) - fail (ll) << "description before setup command"; - - if (!group_->scopes.empty ()) - fail (ll) << "setup command after tests"; - - if (!group_->tdown_.empty ()) - fail (ll) << "setup command after teardown"; - - ls = &group_->setup_; - break; - } - - // Teardown. - // - case type::minus: - { - if (d) - fail (ll) << "description before teardown command"; - - ls = &group_->tdown_; - break; - } - - // Test command or variable. - // - default: - { - // First check that we don't have any teardown commands yet. - // This will detect things like variable assignments between - // tests. - // - if (!group_->tdown_.empty ()) - { - location tl ( - group_->tdown_.back ().tokens.front ().location ()); - - fail (ll) << "test after teardown" << - info (tl) << "last teardown line appears here"; - } - break; - } - } - } - - // If the destination changed, then move the data over. - // - if (ls != &ls_data) - ls->insert (ls->end (), - make_move_iterator (ls_data.begin ()), - make_move_iterator (ls_data.end ())); - } - - // If this command ended with a semicolon, then the next one should - // go to the same place. - // - if (semi && !one) - { - tt = peek (lexer_mode::first_token); - const location ll (get_location (peeked ())); - - switch (tt) - { - case type::colon: - fail (ll) << "description inside test" << endf; - case type::eos: - case type::rcbrace: - case type::lcbrace: - fail (ll) << "expected another line after ';'" << endf; - case type::plus: - fail (ll) << "setup command in test" << endf; - case type::minus: - fail (ll) << "teardown command in test" << endf; - default: - semi = pre_parse_line (t, tt, d, ls); - assert (tt == type::newline); // End of last test line. - } - } - - // If this is a test then create implicit test scope. - // - if (ls == &ls_data) - { - // If there is no user-supplied id, use the line number (prefixed - // with include id) as the scope id. - // - const string& id ( - d && !d->id.empty () - ? d->id - : insert_id (id_prefix_ + to_string (ll.line), ll)); - - unique_ptr p (new test (id, *group_)); - - p->desc = move (d); - - p->start_loc_ = ll; - p->tests_ = move (ls_data); - p->end_loc_ = get_location (t); - - group_->scopes.push_back (move (p)); - } - - return semi; - } - - bool parser:: - pre_parse_if_else (token& t, type& tt, - optional& d, - lines& ls) - { - // enter: (previous line) - // leave: - - tt = peek (lexer_mode::first_token); - - return tt == type::lcbrace - ? pre_parse_if_else_scope (t, tt, d, ls) - : pre_parse_if_else_command (t, tt, d, ls); - } - - bool parser:: - pre_parse_if_else_scope (token& t, type& tt, - optional& d, - lines& ls) - { - // enter: peeked token of next line (lcbrace) - // leave: newline - - assert (ls.size () == 1); // The if/if! line. - - // Use if/if! as the entire scope chain location. - // - const location sl (ls.back ().tokens.front ().location ()); - - // First check that we don't have any teardown commands yet. This - // will detect things like variable assignments between scopes. - // - if (!group_->tdown_.empty ()) - { - location tl ( - group_->tdown_.back ().tokens.front ().location ()); - - fail (sl) << "scope after teardown" << - info (tl) << "last teardown line appears here"; - } - - // If there is no user-supplied id, use the line number (prefixed with - // include id) as the scope id. Note that we use the same id for all - // scopes in the chain. - // - const string& id ( - d && !d->id.empty () - ? d->id - : insert_id (id_prefix_ + to_string (sl.line), sl)); - - unique_ptr root; - - // Parse the if-else scope chain. - // - line_type bt (line_type::cmd_if); // Current block. - - for (unique_ptr* ps (&root);; ps = &(*ps)->if_chain) - { - next (t, tt); // Get '{'. - - { - unique_ptr g (pre_parse_scope_block (t, tt, id)); - - // If-condition. - // - g->if_cond_ = move (ls.back ()); - ls.clear (); - - // Description. For now we just duplicate it through the entire - // chain. - // - g->desc = (ps == &root ? d : root->desc); - - *ps = move (g); - } - - // See if what comes next is another chain element. - // - line_type lt (line_type::cmd_end); - - type pt (peek (lexer_mode::first_token)); - const token& p (peeked ()); - const location ll (get_location (p)); - - if (pt == type::word && p.qtype == quote_type::unquoted) - { - if (p.value == "elif") lt = line_type::cmd_elif; - else if (p.value == "elif!") lt = line_type::cmd_elifn; - else if (p.value == "else") lt = line_type::cmd_else; - } - - if (lt == line_type::cmd_end) - break; - - // Check if-else block sequencing. - // - if (bt == line_type::cmd_else) - { - if (lt == line_type::cmd_else || - lt == line_type::cmd_elif || - lt == line_type::cmd_elifn) - fail (ll) << lt << " after " << bt; - } - - // Parse just the condition line using pre_parse_line() in the "one" - // mode and into ls so that it is naturally picked up as if_cond_ on - // the next iteration. - // - optional td; - bool semi (pre_parse_line (t, (tt = pt), td, &ls, true)); - assert (ls.size () == 1 && ls.back ().type == lt); - assert (tt == type::newline); - - // For any of these lines trailing semi or description is illegal. - // - // @@ Not the exact location of semi/colon. - // - if (semi) - fail (ll) << "';' after " << lt; - - if (td) - fail (ll) << "description after " << lt; - - // Make sure what comes next is another scope. - // - tt = peek (lexer_mode::first_token); - - if (tt != type::lcbrace) - fail (ll) << "expected scope after " << lt; - - // Update current if-else block. - // - switch (lt) - { - case line_type::cmd_elif: - case line_type::cmd_elifn: bt = line_type::cmd_elif; break; - case line_type::cmd_else: bt = line_type::cmd_else; break; - default: break; - } - } - - pre_parse_demote_group_scope (root); - group_->scopes.push_back (move (root)); - return false; // We never end with a semi. - } - - bool parser:: - pre_parse_if_else_command (token& t, type& tt, - optional& d, - lines& ls) - { - // enter: peeked first token of next line (type in tt) - // leave: newline - - // Parse lines until we see closing 'end'. Nested if-else blocks are - // handled recursively. - // - for (line_type bt (line_type::cmd_if); // Current block. - ; - tt = peek (lexer_mode::first_token)) - { - const location ll (get_location (peeked ())); - - switch (tt) - { - case type::colon: - fail (ll) << "description inside " << bt << endf; - case type::eos: - case type::rcbrace: - case type::lcbrace: - fail (ll) << "expected closing 'end'" << endf; - case type::plus: - fail (ll) << "setup command inside " << bt << endf; - case type::minus: - fail (ll) << "teardown command inside " << bt << endf; - } - - // Parse one line. Note that this one line can still be multiple - // lines in case of if-else. In this case we want to view it as - // cmd_if, not cmd_end. Thus remember the start position of the - // next logical line. - // - size_t i (ls.size ()); - - optional td; - bool semi (pre_parse_line (t, tt, td, &ls, true)); - assert (tt == type::newline); - - line_type lt (ls[i].type); - - // First take care of 'end'. - // - if (lt == line_type::cmd_end) - { - if (td) - { - if (d) - fail (ll) << "both leading and trailing descriptions"; - - d = move (td); - } - - return semi; - } - - // For any other line trailing semi or description is illegal. - // - // @@ Not the exact location of semi/colon. - // - if (semi) - fail (ll) << "';' inside " << bt; - - if (td) - fail (ll) << "description inside " << bt; - - // Check if-else block sequencing. - // - if (bt == line_type::cmd_else) - { - if (lt == line_type::cmd_else || - lt == line_type::cmd_elif || - lt == line_type::cmd_elifn) - fail (ll) << lt << " after " << bt; - } - - // Update current if-else block. - // - switch (lt) - { - case line_type::cmd_elif: - case line_type::cmd_elifn: bt = line_type::cmd_elif; break; - case line_type::cmd_else: bt = line_type::cmd_else; break; - default: break; - } - } - } - - void parser:: - pre_parse_directive (token& t, type& tt) - { - // enter: directive name - // leave: newline - - string d (t.value); - location l (get_location (t)); - next (t, tt); - - // Suspend pre-parsing since we want to really parse the line, with - // expansion, etc. Also parse the whole line in one go. - // - names args; - - if (tt != type::newline) - { - pre_parse_ = false; - args = parse_names (t, tt, - pattern_mode::expand, - false, - "directive argument", - nullptr); - pre_parse_ = true; - } - - if (tt != type::newline) - fail (t) << t << " after directive"; - - if (d == "include") - pre_parse_include_line (move (args), move (l)); - else - assert (false); // Unhandled directive. - } - - void parser:: - pre_parse_include_line (names args, location dl) - { - auto i (args.begin ()); - - // Process options. - // - bool once (false); - for (; i != args.end () && i->simple (); ++i) - { - if (i->value == "--once") - once = true; - else - break; - } - - // Process arguments. - // - auto include = [&dl, once, this] (string n) // throw invalid_path - { - // It may be tempting to use relative paths in diagnostics but it - // most likely will be misguided. - // - auto enter_path = [this] (string n) -> const path& - { - path p (move (n)); - - if (p.relative ()) - p = path_->directory () / p; - - p.normalize (); - - return *script_->paths_.insert (move (p)).first; - }; - - const path& p (enter_path (move (n))); - - if (include_set_->insert (p).second || !once) - { - try - { - ifdstream ifs (p); - lexer l (ifs, p, lexer_mode::command_line); - - const path* op (path_); - path_ = &p; - - lexer* ol (lexer_); - set_lexer (&l); - - string oip (id_prefix_); - id_prefix_ += to_string (dl.line); - id_prefix_ += '-'; - id_prefix_ += p.leaf ().base ().string (); - id_prefix_ += '-'; - - token t (pre_parse_scope_body ()); - - if (t.type != type::eos) - fail (t) << "stray " << t; - - id_prefix_ = oip; - set_lexer (ol); - path_ = op; - } - catch (const io_error& e) - { - fail (dl) << "unable to read testscript " << p << ": " << e; - } - } - }; - - for (; i != args.end (); ++i) - { - name& n (*i); - - try - { - if (n.simple () && !n.empty ()) - { - include (move (n.value)); - continue; - } - } - catch (const invalid_path&) {} // Fall through. - - diag_record dr (fail (dl)); - dr << "invalid testscript include path "; - to_stream (dr.os, n, true); // Quote. - } - } - - description parser:: - pre_parse_leading_description (token& t, type& tt) - { - // enter: peeked at colon (type in tt) - // leave: peeked at in the first_token mode (type in tt) - - assert (tt == type::colon); - - description r; - location loc (get_location (peeked ())); - - string sp; // Strip prefix. - size_t sn (0); // Strip prefix length. - - for (size_t ln (1); tt == type::colon; ++ln) - { - next (t, tt); // Get ':'. - - mode (lexer_mode::description_line); - next (t, tt); - - // If it is empty, then we get newline right away. - // - const string& l (tt == type::word ? t.value : string ()); - - if (tt == type::word) - next (t, tt); // Get newline. - - assert (tt == type::newline); - - // If this is the first line, then get the "strip prefix", i.e., - // the beginning of the line that contains only whitespaces. If - // the subsequent lines start with the same prefix, then we strip - // it. - // - if (ln == 1) - { - sn = l.find_first_not_of (" \t"); - sp.assign (l, 0, sn == string::npos ? (sn = 0) : sn); - } - - // Apply strip prefix. - // - size_t i (l.compare (0, sn, sp) == 0 ? sn : 0); - - // Strip trailing whitespaces, as a courtesy to the user. - // - size_t j (l.find_last_not_of (" \t")); - j = j != string::npos ? j + 1 : i; - - size_t n (j - i); // [i, j) is our data. - - if (ln == 1) - { - // First line. Ignore if it's blank. - // - if (n == 0) - --ln; // Stay as if on the first line. - else - { - // Otherwise, see if it is the id. Failed that we assume it is - // the summary until we see the next line. - // - (l.find_first_of (" \t.", i) >= j ? r.id : r.summary). - assign (l, i, n); - - // If this is an id then validate it. - // - if (!r.id.empty ()) - { - for (char c: r.id) - { - if (!(alnum (c) || c == '_' || c == '-' || c == '+')) - fail (loc) << "illegal character '" << c - << "' in test id '" << r.id << "'"; - } - } - } - } - else if (ln == 2) - { - // If this is a blank then whatever we have in id/summary is good. - // Otherwise, if we have id, then assume this is summary until we - // see the next line. And if not, then move what we (wrongly) - // assumed to be the summary to details. - // - if (n != 0) - { - if (!r.id.empty ()) - r.summary.assign (l, i, n); - else - { - r.details = move (r.summary); - r.details += '\n'; - r.details.append (l, i, n); - - r.summary.clear (); - } - } - } - // Don't treat line 3 as special if we have given up on id/summary. - // - else if (ln == 3 && r.details.empty ()) - { - // If this is a blank and we have id and/or summary, then we are - // good. Otherwise, if we have both, then move what we (wrongly) - // assumed to be id and summary to details. - // - if (n != 0) - { - if (!r.id.empty () && !r.summary.empty ()) - { - r.details = move (r.id); - r.details += '\n'; - r.details += r.summary; - r.details += '\n'; - - r.id.clear (); - r.summary.clear (); - } - - r.details.append (l, i, n); - } - } - else - { - if (!r.details.empty ()) - r.details += '\n'; - - r.details.append (l, i, n); - } - - tt = peek (lexer_mode::first_token); - } - - // Zap trailing newlines in the details. - // - size_t p (r.details.find_last_not_of ('\n')); - if (p != string::npos && ++p != r.details.size ()) - r.details.resize (p); - - if (r.empty ()) - fail (loc) << "empty description"; - - // Insert id into the id map if we have one. - // - if (!r.id.empty ()) - insert_id (r.id, loc); - - return r; - } - - description parser:: - parse_trailing_description (token& t, type& tt) - { - // enter: colon - // leave: newline - - // Parse one-line trailing description. - // - description r; - - // @@ Would be nice to omit trailing description from replay. - // - const location loc (get_location (t)); - - mode (lexer_mode::description_line); - next (t, tt); - - // If it is empty, then we will get newline right away. - // - if (tt == type::word) - { - string l (move (t.value)); - trim (l); // Strip leading/trailing whitespaces. - - // Decide whether this is id or summary. - // - (l.find_first_of (" \t") == string::npos ? r.id : r.summary) = - move (l); - - next (t, tt); // Get newline. - } - - assert (tt == type::newline); // Lexer mode invariant. - - if (r.empty ()) - fail (loc) << "empty description"; - - // Insert id into the id map if we have one. - // - if (pre_parse_ && !r.id.empty ()) - insert_id (r.id, loc); - - return r; - } - - value parser:: - parse_variable_line (token& t, type& tt) - { - // enter: assignment - // leave: newline or semi - - // We cannot reuse the value mode since it will recognize { which we - // want to treat as a literal. - // - mode (lexer_mode::variable_line); - next (t, tt); - - // Parse value attributes if any. Note that it's ok not to have - // anything after the attributes (e.g., foo=[null]). - // - attributes_push (t, tt, true); - - // @@ PAT: Should we expand patterns? Note that it will only be - // simple ones since we have disabled {}. Also, what would be the - // pattern base directory? - // - return tt != type::newline && tt != type::semi - ? parse_value (t, tt, - pattern_mode::ignore, - "variable value", - nullptr) - : value (names ()); - } - - command_expr parser:: - parse_command_line (token& t, type& tt) - { - // enter: first token of the command line - // leave: - - // Note: this one is only used during execution. - - pair p (parse_command_expr (t, tt)); - - switch (tt) - { - case type::colon: parse_trailing_description (t, tt); break; - case type::semi: next (t, tt); break; // Get newline. - } - - assert (tt == type::newline); - - parse_here_documents (t, tt, p); - assert (tt == type::newline); - - return move (p.first); - } - - // Parse the regular expression representation (non-empty string value - // framed with introducer characters and optionally followed by flag - // characters from the {di} set, for example '/foo/id') into - // components. Also return end-of-parsing position if requested, - // otherwise treat any unparsed characters left as an error. - // - struct regex_parts - { - string value; - char intro; - string flags; // Combination of characters from {di} set. - - // Create a special empty object. - // - regex_parts (): intro ('\0') {} - - regex_parts (string v, char i, string f) - : value (move (v)), intro (i), flags (move (f)) {} - }; - - static regex_parts - parse_regex (const string& s, - const location& l, - const char* what, - size_t* end = nullptr) - { - if (s.empty ()) - fail (l) << "no introducer character in " << what; - - size_t p (s.find (s[0], 1)); // Find terminating introducer. - - if (p == string::npos) - fail (l) << "no closing introducer character in " << what; - - size_t rn (p - 1); // Regex length. - if (rn == 0) - fail (l) << what << " is empty"; - - // Find end-of-flags position. - // - size_t fp (++p); // Save flags starting position. - for (char c; (c = s[p]) == 'd' || c == 'i'; ++p) ; - - // If string end is not reached then report invalid flags, unless - // end-of-parsing position is requested (which means regex is just a - // prefix). - // - if (s[p] != '\0' && end == nullptr) - fail (l) << "junk at the end of " << what; - - if (end != nullptr) - *end = p; - - return regex_parts (string (s, 1, rn), s[0], string (s, fp, p - fp)); - } - - pair parser:: - parse_command_expr (token& t, type& tt) - { - // enter: first token of the command line - // leave: - - command_expr expr; - - // OR-ed to an implied false for the first term. - // - expr.push_back ({expr_operator::log_or, command_pipe ()}); - - command c; // Command being assembled. - - // Make sure the command makes sense. - // - auto check_command = [&c, this] (const location& l, bool last) - { - if (c.out.type == redirect_type::merge && - c.err.type == redirect_type::merge) - fail (l) << "stdout and stderr redirected to each other"; - - if (!last && c.out.type != redirect_type::none) - fail (l) << "stdout is both redirected and piped"; - }; - - // Check that the introducer character differs from '/' if the - // portable path modifier is specified. Must be called before - // parse_regex() (see below) to make sure its diagnostics is - // meaningful. - // - // Note that the portable path modifier assumes '/' to be a valid - // regex character and so makes it indistinguishable from the - // terminating introducer. - // - auto check_regex_mod = [this] (const string& mod, - const string& re, - const location& l, - const char* what) - { - // Handles empty regex properly. - // - if (mod.find ('/') != string::npos && re[0] == '/') - fail (l) << "portable path modifier and '/' introducer in " - << what; - }; - - // Pending positions where the next word should go. - // - enum class pending - { - none, - program, - in_string, - in_document, - in_file, - out_merge, - out_string, - out_str_regex, - out_document, - out_doc_regex, - out_file, - err_merge, - err_string, - err_str_regex, - err_document, - err_doc_regex, - err_file, - clean - }; - pending p (pending::program); - string mod; // Modifiers for pending in_* and out_* positions. - here_docs hd; // Expected here-documents. - - // Add the next word to either one of the pending positions or to - // program arguments by default. - // - auto add_word = [&c, &p, &mod, &check_regex_mod, this] ( - string&& w, const location& l) - { - auto add_merge = [&l, this] (redirect& r, const string& w, int fd) - { - try - { - size_t n; - if (stoi (w, &n) == fd && n == w.size ()) - { - r.fd = fd; - return; - } - } - catch (const exception&) {} // Fall through. - - fail (l) << (fd == 1 ? "stderr" : "stdout") << " merge redirect " - << "file descriptor must be " << fd; - }; - - auto add_here_str = [] (redirect& r, string&& w) - { - if (r.modifiers.find (':') == string::npos) - w += '\n'; - r.str = move (w); - }; - - auto add_here_str_regex = [&l, &check_regex_mod] ( - redirect& r, int fd, string&& w) - { - const char* what (nullptr); - switch (fd) - { - case 1: what = "stdout regex redirect"; break; - case 2: what = "stderr regex redirect"; break; - } - - check_regex_mod (r.modifiers, w, l, what); - - regex_parts rp (parse_regex (w, l, what)); - - regex_lines& re (r.regex); - re.intro = rp.intro; - - re.lines.emplace_back ( - l.line, l.column, move (rp.value), move (rp.flags)); - - // Add final blank line unless suppressed. - // - // Note that the position is synthetic, but that's ok as we don't - // expect any diagnostics to refer this line. - // - if (r.modifiers.find (':') == string::npos) - re.lines.emplace_back (l.line, l.column, string (), false); - }; - - auto parse_path = [&l, this] (string&& w, const char* what) -> path - { - try - { - path p (move (w)); - - if (!p.empty ()) - { - p.normalize (); - return p; - } - - fail (l) << "empty " << what << endf; - } - catch (const invalid_path& e) - { - fail (l) << "invalid " << what << " '" << e.path << "'" << endf; - } - }; - - auto add_file = [&parse_path] (redirect& r, int fd, string&& w) - { - const char* what (nullptr); - switch (fd) - { - case 0: what = "stdin redirect path"; break; - case 1: what = "stdout redirect path"; break; - case 2: what = "stderr redirect path"; break; - } - - r.file.path = parse_path (move (w), what); - }; - - switch (p) - { - case pending::none: c.arguments.push_back (move (w)); break; - case pending::program: - c.program = parse_path (move (w), "program path"); - break; - - case pending::out_merge: add_merge (c.out, w, 2); break; - case pending::err_merge: add_merge (c.err, w, 1); break; - - case pending::in_string: add_here_str (c.in, move (w)); break; - case pending::out_string: add_here_str (c.out, move (w)); break; - case pending::err_string: add_here_str (c.err, move (w)); break; - - case pending::out_str_regex: - { - add_here_str_regex (c.out, 1, move (w)); - break; - } - case pending::err_str_regex: - { - add_here_str_regex (c.err, 2, move (w)); - break; - } - - // These are handled specially below. - // - case pending::in_document: - case pending::out_document: - case pending::err_document: - case pending::out_doc_regex: - case pending::err_doc_regex: assert (false); break; - - case pending::in_file: add_file (c.in, 0, move (w)); break; - case pending::out_file: add_file (c.out, 1, move (w)); break; - case pending::err_file: add_file (c.err, 2, move (w)); break; - - case pending::clean: - { - cleanup_type t; - switch (mod[0]) // Ok, if empty - { - case '!': t = cleanup_type::never; break; - case '?': t = cleanup_type::maybe; break; - default: t = cleanup_type::always; break; - } - - c.cleanups.push_back ( - {t, parse_path (move (w), "cleanup path")}); - break; - } - } - - p = pending::none; - mod.clear (); - }; - - // Make sure we don't have any pending positions to fill. - // - auto check_pending = [&p, this] (const location& l) - { - const char* what (nullptr); - - switch (p) - { - case pending::none: break; - case pending::program: what = "program"; break; - case pending::in_string: what = "stdin here-string"; break; - case pending::in_document: what = "stdin here-document end"; break; - case pending::in_file: what = "stdin file"; break; - case pending::out_merge: what = "stdout file descriptor"; break; - case pending::out_string: what = "stdout here-string"; break; - case pending::out_document: what = "stdout here-document end"; break; - case pending::out_file: what = "stdout file"; break; - case pending::err_merge: what = "stderr file descriptor"; break; - case pending::err_string: what = "stderr here-string"; break; - case pending::err_document: what = "stderr here-document end"; break; - case pending::err_file: what = "stderr file"; break; - case pending::clean: what = "cleanup path"; break; - - case pending::out_str_regex: - { - what = "stdout here-string regex"; - break; - } - case pending::err_str_regex: - { - what = "stderr here-string regex"; - break; - } - case pending::out_doc_regex: - { - what = "stdout here-document regex end"; - break; - } - case pending::err_doc_regex: - { - what = "stderr here-document regex end"; - break; - } - } - - if (what != nullptr) - fail (l) << "missing " << what; - }; - - // Parse the redirect operator. - // - auto parse_redirect = - [&c, &expr, &p, &mod, this] (token& t, const location& l) - { - // Our semantics is the last redirect seen takes effect. - // - assert (p == pending::none && mod.empty ()); - - // See if we have the file descriptor. - // - unsigned long fd (3); - if (!t.separated) - { - if (c.arguments.empty ()) - fail (l) << "missing redirect file descriptor"; - - const string& s (c.arguments.back ()); - - try - { - size_t n; - fd = stoul (s, &n); - - if (n != s.size () || fd > 2) - throw invalid_argument (string ()); - } - catch (const exception&) - { - fail (l) << "invalid redirect file descriptor '" << s << "'"; - } - - c.arguments.pop_back (); - } - - type tt (t.type); - - // Validate/set default file descriptor. - // - switch (tt) - { - case type::in_pass: - case type::in_null: - case type::in_str: - case type::in_doc: - case type::in_file: - { - if ((fd = fd == 3 ? 0 : fd) != 0) - fail (l) << "invalid in redirect file descriptor " << fd; - - if (!expr.back ().pipe.empty ()) - fail (l) << "stdin is both piped and redirected"; - - break; - } - case type::out_pass: - case type::out_null: - case type::out_trace: - case type::out_merge: - case type::out_str: - case type::out_doc: - case type::out_file_cmp: - case type::out_file_ovr: - case type::out_file_app: - { - if ((fd = fd == 3 ? 1 : fd) == 0) - fail (l) << "invalid out redirect file descriptor " << fd; - - break; - } - } - - mod = move (t.value); - - redirect_type rt (redirect_type::none); - switch (tt) - { - case type::in_pass: - case type::out_pass: rt = redirect_type::pass; break; - - case type::in_null: - case type::out_null: rt = redirect_type::null; break; - - case type::out_trace: rt = redirect_type::trace; break; - - case type::out_merge: rt = redirect_type::merge; break; - - case type::in_str: - case type::out_str: - { - bool re (mod.find ('~') != string::npos); - assert (tt == type::out_str || !re); - - rt = re - ? redirect_type::here_str_regex - : redirect_type::here_str_literal; - - break; - } - - case type::in_doc: - case type::out_doc: - { - bool re (mod.find ('~') != string::npos); - assert (tt == type::out_doc || !re); - - rt = re - ? redirect_type::here_doc_regex - : redirect_type::here_doc_literal; - - break; - } - - case type::in_file: - case type::out_file_cmp: - case type::out_file_ovr: - case type::out_file_app: rt = redirect_type::file; break; - } - - redirect& r (fd == 0 ? c.in : fd == 1 ? c.out : c.err); - r = redirect (rt); - - // Don't move as still may be used for pending here-document end - // marker processing. - // - r.modifiers = mod; - - switch (rt) - { - case redirect_type::none: - case redirect_type::pass: - case redirect_type::null: - case redirect_type::trace: - break; - case redirect_type::merge: - switch (fd) - { - case 0: assert (false); break; - case 1: p = pending::out_merge; break; - case 2: p = pending::err_merge; break; - } - break; - case redirect_type::here_str_literal: - switch (fd) - { - case 0: p = pending::in_string; break; - case 1: p = pending::out_string; break; - case 2: p = pending::err_string; break; - } - break; - case redirect_type::here_str_regex: - switch (fd) - { - case 0: assert (false); break; - case 1: p = pending::out_str_regex; break; - case 2: p = pending::err_str_regex; break; - } - break; - case redirect_type::here_doc_literal: - switch (fd) - { - case 0: p = pending::in_document; break; - case 1: p = pending::out_document; break; - case 2: p = pending::err_document; break; - } - break; - case redirect_type::here_doc_regex: - switch (fd) - { - case 0: assert (false); break; - case 1: p = pending::out_doc_regex; break; - case 2: p = pending::err_doc_regex; break; - } - break; - case redirect_type::file: - switch (fd) - { - case 0: p = pending::in_file; break; - case 1: p = pending::out_file; break; - case 2: p = pending::err_file; break; - } - - // Also sets for stdin, but this is harmless. - // - r.file.mode = tt == type::out_file_ovr - ? redirect_fmode::overwrite - : (tt == type::out_file_app - ? redirect_fmode::append - : redirect_fmode::compare); - - break; - - case redirect_type::here_doc_ref: assert (false); break; - } - }; - - // Set pending cleanup type. - // - auto parse_clean = [&p, &mod] (token& t) - { - p = pending::clean; - mod = move (t.value); - }; - - const location ll (get_location (t)); // Line location. - - // Keep parsing chunks of the command line until we see one of the - // "terminators" (newline, semicolon, exit status comparison, etc). - // - location l (ll); - names ns; // Reuse to reduce allocations. - - for (bool done (false); !done; l = get_location (t)) - { - switch (tt) - { - case type::semi: - case type::colon: - case type::newline: - { - done = true; - break; - } - - case type::equal: - case type::not_equal: - { - if (!pre_parse_) - check_pending (l); - - c.exit = parse_command_exit (t, tt); - - // Only a limited set of things can appear after the exit status - // so we check this here. - // - switch (tt) - { - case type::semi: - case type::colon: - case type::newline: - - case type::pipe: - case type::log_or: - case type::log_and: - break; - default: - fail (t) << "unexpected " << t << " after command exit status"; - } - - break; - } - - case type::pipe: - case type::log_or: - case type::log_and: - - case type::in_pass: - case type::out_pass: - - case type::in_null: - case type::out_null: - - case type::out_trace: - - case type::out_merge: - - case type::in_str: - case type::in_doc: - case type::out_str: - case type::out_doc: - - case type::in_file: - case type::out_file_cmp: - case type::out_file_ovr: - case type::out_file_app: - - case type::clean: - { - if (pre_parse_) - { - // The only things we need to handle here are the here-document - // and here-document regex end markers since we need to know - // how many of them to pre-parse after the command. - // - switch (tt) - { - case type::in_doc: - case type::out_doc: - mod = move (t.value); - - bool re (mod.find ('~') != string::npos); - const char* what (re - ? "here-document regex end marker" - : "here-document end marker"); - - // We require the end marker to be a literal, unquoted word. - // In particularm, we don't allow quoted because of cases - // like foo"$bar" (where we will see word 'foo'). - // - next (t, tt); - - // We require the end marker to be an unquoted or completely - // quoted word. The complete quoting becomes important for - // cases like foo"$bar" (where we will see word 'foo'). - // - // For good measure we could have also required it to be - // separated from the following token, but out grammar - // allows one to write >>EOO;. The problematic sequence - // would be >>FOO$bar -- on reparse it will be expanded - // as a single word. - // - if (tt != type::word || t.value.empty ()) - fail (t) << "expected " << what; - - peek (); - const token& p (peeked ()); - if (!p.separated) - { - switch (p.type) - { - case type::dollar: - case type::lparen: - fail (p) << what << " must be literal"; - } - } - - quote_type qt (t.qtype); - switch (qt) - { - case quote_type::unquoted: - qt = quote_type::single; // Treat as single-quoted. - break; - case quote_type::single: - case quote_type::double_: - if (t.qcomp) - break; - // Fall through. - case quote_type::mixed: - fail (t) << "partially-quoted " << what; - } - - regex_parts r; - string end (move (t.value)); - - if (re) - { - check_regex_mod (mod, end, l, what); - - r = parse_regex (end, l, what); - end = move (r.value); // The "cleared" end marker. - } - - bool literal (qt == quote_type::single); - bool shared (false); - - for (const auto& d: hd) - { - if (d.end == end) - { - auto check = [&t, &end, &re, this] (bool c, - const char* what) - { - if (!c) - fail (t) << "different " << what - << " for shared here-document " - << (re ? "regex '" : "'") << end << "'"; - }; - - check (d.modifiers == mod, "modifiers"); - check (d.literal == literal, "quoting"); - - if (re) - { - check (d.regex == r.intro, "introducers"); - check (d.regex_flags == r.flags, "global flags"); - } - - shared = true; - break; - } - } - - if (!shared) - hd.push_back ( - here_doc { - {}, - move (end), - literal, - move (mod), - r.intro, move (r.flags)}); - - break; - } - - next (t, tt); - break; - } - - // If this is one of the operators/separators, check that we - // don't have any pending locations to be filled. - // - check_pending (l); - - // Note: there is another one in the inner loop below. - // - switch (tt) - { - case type::pipe: - case type::log_or: - case type::log_and: - { - // Check that the previous command makes sense. - // - check_command (l, tt != type::pipe); - expr.back ().pipe.push_back (move (c)); - - c = command (); - p = pending::program; - - if (tt != type::pipe) - { - expr_operator o (tt == type::log_or - ? expr_operator::log_or - : expr_operator::log_and); - expr.push_back ({o, command_pipe ()}); - } - - break; - } - - case type::in_pass: - case type::out_pass: - - case type::in_null: - case type::out_null: - - case type::out_trace: - - case type::out_merge: - - case type::in_str: - case type::in_doc: - case type::out_str: - case type::out_doc: - - case type::in_file: - case type::out_file_cmp: - case type::out_file_ovr: - case type::out_file_app: - { - parse_redirect (t, l); - break; - } - - case type::clean: - { - parse_clean (t); - break; - } - - default: assert (false); break; - } - - next (t, tt); - break; - } - default: - { - // Here-document end markers are literal (we verified that above - // during pre-parsing) and we need to know whether they were - // quoted. So handle this case specially. - // - { - int fd; - switch (p) - { - case pending::in_document: fd = 0; break; - case pending::out_document: - case pending::out_doc_regex: fd = 1; break; - case pending::err_document: - case pending::err_doc_regex: fd = 2; break; - default: fd = -1; break; - } - - if (fd != -1) - { - here_redirect rd { - expr.size () - 1, expr.back ().pipe.size (), fd}; - - string end (move (t.value)); - - regex_parts r; - - if (p == pending::out_doc_regex || - p == pending::err_doc_regex) - { - // We can't fail here as we already parsed all the end - // markers during pre-parsing stage, and so no need in the - // description. - // - r = parse_regex (end, l, ""); - end = move (r.value); // The "cleared" end marker. - } - - bool shared (false); - for (auto& d: hd) - { - // No need to check that redirects that share here-document - // have the same modifiers, etc. That have been done during - // pre-parsing. - // - if (d.end == end) - { - d.redirects.emplace_back (rd); - shared = true; - break; - } - } - - if (!shared) - hd.push_back ( - here_doc { - {rd}, - move (end), - (t.qtype == quote_type::unquoted || - t.qtype == quote_type::single), - move (mod), - r.intro, move (r.flags)}); - - p = pending::none; - mod.clear (); - - next (t, tt); - break; - } - } - - // Parse the next chunk as simple names to get expansion, etc. - // Note that we do it in the chunking mode to detect whether - // anything in each chunk is quoted. - // - // @@ PAT: should we support pattern expansion? This is even - // fuzzier than the variable case above. Though this is the - // shell semantics. Think what happens when we do rm *.txt? - // - reset_quoted (t); - parse_names (t, tt, - ns, - pattern_mode::ignore, - true, - "command line", - nullptr); - - if (pre_parse_) // Nothing else to do if we are pre-parsing. - break; - - // Process what we got. Determine whether anything inside was - // quoted (note that the current token is "next" and is not part - // of this). - // - bool q ((quoted () - - (t.qtype != quote_type::unquoted ? 1 : 0)) != 0); - - for (name& n: ns) - { - string s; - - try - { - s = value_traits::convert (move (n), nullptr); - } - catch (const invalid_argument&) - { - diag_record dr (fail (l)); - dr << "invalid string value "; - to_stream (dr.os, n, true); // Quote. - } - - // If it is a quoted chunk, then we add the word as is. - // Otherwise we re-lex it. But if the word doesn't contain any - // interesting characters (operators plus quotes/escapes), - // then no need to re-lex. - // - // NOTE: update quoting (script.cxx:to_stream_q()) if adding - // any new characters. - // - if (q || s.find_first_of ("|&<>\'\"\\") == string::npos) - add_word (move (s), l); - else - { - // If the chunk re-parsing results in error, our diagnostics - // will look like this: - // - // :1:4: error: stdout merge redirect file descriptor must be 2 - // testscript:2:5: info: while parsing string '1>&a' - // - auto df = make_diag_frame ( - [s, &l](const diag_record& dr) - { - dr << info (l) << "while parsing string '" << s << "'"; - }); - - // When re-lexing we do "effective escaping" and only for - // ['"\] (quotes plus the backslash itself). In particular, - // there is no way to escape redirects, operators, etc. The - // idea is to prefer quoting except for passing literal - // quotes, for example: - // - // args = \"&foo\" - // cmd $args # cmd &foo - // - // args = 'x=\"foo bar\"' - // cmd $args # cmd x="foo bar" - // - - path name (""); - istringstream is (s); - lexer lex (is, name, - lexer_mode::command_expansion, - "\'\"\\"); - - // Treat the first "sub-token" as always separated from what - // we saw earlier. - // - // Note that this is not "our" token so we cannot do - // fail(t). Rather we should do fail(l). - // - token t (lex.next ()); - location l (build2::get_location (t, name)); - t.separated = true; - - string w; - bool f (t.type == type::eos); // If the whole thing is empty. - - for (; t.type != type::eos; t = lex.next ()) - { - type tt (t.type); - l = build2::get_location (t, name); - - // Re-lexing double-quotes will recognize $, ( inside as - // tokens so we have to reverse them back. Since we don't - // treat spaces as separators we can be sure we will get - // it right. - // - switch (tt) - { - case type::dollar: w += '$'; continue; - case type::lparen: w += '('; continue; - } - - // Retire the current word. We need to distinguish between - // empty and non-existent (e.g., > vs >""). - // - if (!w.empty () || f) - { - add_word (move (w), l); - f = false; - } - - if (tt == type::word) - { - w = move (t.value); - f = true; - continue; - } - - // If this is one of the operators/separators, check that - // we don't have any pending locations to be filled. - // - check_pending (l); - - // Note: there is another one in the outer loop above. - // - switch (tt) - { - case type::pipe: - case type::log_or: - case type::log_and: - { - // Check that the previous command makes sense. - // - check_command (l, tt != type::pipe); - expr.back ().pipe.push_back (move (c)); - - c = command (); - p = pending::program; - - if (tt != type::pipe) - { - expr_operator o (tt == type::log_or - ? expr_operator::log_or - : expr_operator::log_and); - expr.push_back ({o, command_pipe ()}); - } - - break; - } - - case type::in_pass: - case type::out_pass: - - case type::in_null: - case type::out_null: - - case type::out_trace: - - case type::out_merge: - - case type::in_str: - case type::out_str: - - case type::in_file: - case type::out_file_cmp: - case type::out_file_ovr: - case type::out_file_app: - { - parse_redirect (t, l); - break; - } - - case type::clean: - { - parse_clean (t); - break; - } - - case type::in_doc: - case type::out_doc: - { - fail (l) << "here-document redirect in expansion"; - break; - } - } - } - - // Don't forget the last word. - // - if (!w.empty () || f) - add_word (move (w), l); - } - } - - ns.clear (); - break; - } - } - } - - if (!pre_parse_) - { - // Verify we don't have anything pending to be filled and the - // command makes sense. - // - check_pending (l); - check_command (l, true); - - expr.back ().pipe.push_back (move (c)); - } - - return make_pair (move (expr), move (hd)); - } - - command_exit parser:: - parse_command_exit (token& t, type& tt) - { - // enter: equal/not_equal - // leave: token after exit status (one parse_names() chunk) - - exit_comparison comp (tt == type::equal - ? exit_comparison::eq - : exit_comparison::ne); - - // The next chunk should be the exit status. - // - next (t, tt); - location l (get_location (t)); - names ns (parse_names (t, tt, - pattern_mode::ignore, - true, - "exit status", - nullptr)); - unsigned long es (256); - - if (!pre_parse_) - { - try - { - if (ns.size () == 1 && ns[0].simple () && !ns[0].empty ()) - es = stoul (ns[0].value); - } - catch (const exception&) {} // Fall through. - - if (es > 255) - { - diag_record dr; - - dr << fail (l) << "expected exit status instead of "; - to_stream (dr.os, ns, true); // Quote. - - dr << info << "exit status is an unsigned integer less than 256"; - } - } - - return command_exit {comp, static_cast (es)}; - } - - void parser:: - parse_here_documents (token& t, type& tt, - pair& p) - { - // enter: newline - // leave: newline - - // Parse here-document fragments in the order they were mentioned on - // the command line. - // - for (here_doc& h: p.second) - { - // Switch to the here-line mode which is like single/double-quoted - // string but recognized the newline as a separator. - // - mode (h.literal - ? lexer_mode::here_line_single - : lexer_mode::here_line_double); - next (t, tt); - - parsed_doc v ( - parse_here_document (t, tt, h.end, h.modifiers, h.regex)); - - if (!pre_parse_) - { - assert (!h.redirects.empty ()); - auto i (h.redirects.cbegin ()); - - command& c (p.first[i->expr].pipe[i->pipe]); - redirect& r (i->fd == 0 ? c.in : i->fd == 1 ? c.out : c.err); - - if (v.re) - { - r.regex = move (v.regex); - r.regex.flags = move (h.regex_flags); - } - else - r.str = move (v.str); - - r.end = move (h.end); - r.end_line = v.end_line; - r.end_column = v.end_column; - - // Note that our references cannot be invalidated because the - // command_expr/command-pipe vectors already contain all their - // elements. - // - for (++i; i != h.redirects.cend (); ++i) - { - command& c (p.first[i->expr].pipe[i->pipe]); - - (i->fd == 0 ? c.in : i->fd == 1 ? c.out : c.err) = - redirect (redirect_type::here_doc_ref, r); - } - } - - expire_mode (); - } - } - - parser::parsed_doc parser:: - parse_here_document (token& t, type& tt, - const string& em, - const string& mod, - char re) - { - // enter: first token on first line - // leave: newline (after end marker) - - // String literal. Note that when decide if to terminate the previously - // added line with a newline, we need to distinguish a yet empty result - // and the one that has a single blank line added. - // - optional rs; - - regex_lines rre; - - // Here-documents can be indented. The leading whitespaces of the end - // marker line (called strip prefix) determine the indentation. Every - // other line in the here-document should start with this prefix which - // is automatically stripped. The only exception is a blank line. - // - // The fact that the strip prefix is only known at the end, after - // seeing all the lines, is rather inconvenient. As a result, the way - // we implement this is a bit hackish (though there is also something - // elegant about it): at the end of the pre-parse stage we are going - // re-examine the sequence of tokens that comprise this here-document - // and "fix up" the first token of each line by stripping the prefix. - // - string sp; - - // Remember the position of the first token in this here-document. - // - size_t ri (pre_parse_ ? replay_data_.size () - 1 : 0); - - // We will use the location of the first token on the line for the - // regex diagnostics. At the end of the loop it will point to the - // beginning of the end marker. - // - location l; - - while (tt != type::eos) - { - l = get_location (t); - - // Check if this is the end marker. For starters, it should be a - // single, unquoted word followed by a newline. - // - if (tt == type::word && - t.qtype == quote_type::unquoted && - peek () == type::newline) - { - const string& v (t.value); - - size_t vn (v.size ()); - size_t en (em.size ()); - - // Then check that it ends with the end marker. - // - if (vn >= en && v.compare (vn - en, en, em) == 0) - { - // Now check that the prefix only contains whitespaces. - // - size_t n (vn - en); - - if (v.find_first_not_of (" \t") >= n) - { - assert (pre_parse_ || n == 0); // Should have been stripped. - - if (n != 0) - sp.assign (v, 0, n); // Save the strip prefix. - - next (t, tt); // Get the newline. - break; - } - } - } - - // Expand the line (can be blank). - // - // @@ PAT: one could argue that if we do it in variables, then we - // should do it here as well. Though feels bizarre. - // - names ns (tt != type::newline - ? parse_names (t, tt, - pattern_mode::ignore, - false, - "here-document line", - nullptr) - : names ()); - - if (!pre_parse_) - { - // What shall we do if the expansion results in multiple names? - // For, example if the line contains just the variable expansion - // and it is of type strings. Adding all the elements space- - // separated seems like the natural thing to do. - // - string s; - for (auto b (ns.begin ()), i (b); i != ns.end (); ++i) - { - string n; - - try - { - n = value_traits::convert (move (*i), nullptr); - } - catch (const invalid_argument&) - { - fail (l) << "invalid string value '" << *i << "'"; - } - - if (i == b) - s = move (n); - else - { - s += ' '; - s += n; - } - } - - if (!re) - { - // Add newline after previous line. - // - if (rs) - { - *rs += '\n'; - *rs += s; - } - else - rs = move (s); - } - else - { - // Due to expansion we can end up with multiple lines. If empty - // then will add a blank textual literal. - // - for (size_t p (0); p != string::npos; ) - { - string ln; - size_t np (s.find ('\n', p)); - - if (np != string::npos) - { - ln = string (s, p, np - p); - p = np + 1; - } - else - { - ln = string (s, p); - p = np; - } - - if (ln[0] != re) // Line doesn't start with regex introducer. - { - // This is a line-char literal (covers blank lines as well). - // - // Append textual literal. - // - rre.lines.emplace_back (l.line, l.column, move (ln), false); - } - else // Line starts with the regex introducer. - { - // This is a char-regex, or a sequence of line-regex syntax - // characters or both (in this specific order). So we will - // add regex (with optional special characters) or special - // literal. - // - size_t p (ln.find (re, 1)); - if (p == string::npos) - { - // No regex, just a sequence of syntax characters. - // - string spec (ln, 1); - if (spec.empty ()) - fail (l) << "no syntax line characters"; - - // Append special literal. - // - rre.lines.emplace_back ( - l.line, l.column, move (spec), true); - } - else - { - // Regex (probably with syntax characters). - // - regex_parts re; - - // Empty regex is a special case repesenting a blank line. - // - if (p == 1) - // Position to optional specal characters of an empty - // regex. - // - ++p; - else - // Can't fail as all the pre-conditions verified - // (non-empty with both introducers in place), so no - // description required. - // - re = parse_regex (ln, l, "", &p); - - // Append regex with optional special characters. - // - rre.lines.emplace_back (l.line, l.column, - move (re.value), move (re.flags), - string (ln, p)); - } - } - } - } - } - - // We should expand the whole line at once so this would normally be - // a newline but can also be an end-of-stream. - // - if (tt == type::newline) - next (t, tt); - else - assert (tt == type::eos); - } - - if (tt == type::eos) - fail (t) << "missing here-document end marker '" << em << "'"; - - if (pre_parse_) - { - // Strip the indentation prefix if there is one. - // - assert (replay_ == replay::save); - - if (!sp.empty ()) - { - size_t sn (sp.size ()); - - for (; ri != replay_data_.size (); ++ri) - { - token& rt (replay_data_[ri].token); - - if (rt.type == type::newline) // Blank - continue; - - if (rt.type != type::word || rt.value.compare (0, sn, sp) != 0) - fail (rt) << "unindented here-document line"; - - // If the word is equal to the strip prefix then we have to drop - // the token. Note that simply making it an empty word won't - // have the same semantics. For instance, it would trigger - // concatenated expansion. - // - if (rt.value.size () == sn) - replay_data_.erase (replay_data_.begin () + ri); - else - { - rt.value.erase (0, sn); - rt.column += sn; - ++ri; - } - - // Skip until next newline. - // - for (; replay_data_[ri].token.type != type::newline; ++ri) ; - } - } - } - else - { - // Add final newline unless suppressed. - // - if (mod.find (':') == string::npos) - { - if (re) - // Note that the position is synthetic, but that's ok as we don't - // expect any diagnostics to refer this line. - // - rre.lines.emplace_back (l.line, l.column, string (), false); - else if (rs) - *rs += '\n'; - else - rs = "\n"; - } - - // Finalize regex lines. - // - if (re) - { - // Empty regex matches nothing, so not of much use. - // - if (rre.lines.empty ()) - fail (l) << "empty here-document regex"; - - rre.intro = re; - } - } - - return re - ? parsed_doc (move (rre), l.line, l.column) - : parsed_doc (rs ? move (*rs) : string (), l.line, l.column); - } - - // - // Execute. - // - - void parser:: - execute (script& s, runner& r) - { - assert (s.state == scope_state::unknown); - - auto g ( - make_exception_guard ( - [&s] () {s.state = scope_state::failed;})); - - if (!s.empty ()) - execute (s, s, r); - else - s.state = scope_state::passed; - } - - void parser:: - execute (scope& sc, script& s, runner& r) - { - path_ = nullptr; // Set by replays. - - pre_parse_ = false; - - set_lexer (nullptr); - - script_ = &s; - runner_ = &r; - group_ = nullptr; - id_map_ = nullptr; - include_set_ = nullptr; - scope_ = ≻ - - //@@ PAT TODO: set pbase_? - - exec_scope_body (); - } - - static void - execute_impl (scope& s, script& scr, runner& r) - { - try - { - parser p; - p.execute (s, scr, r); - } - catch (const failed&) - { - s.state = scope_state::failed; - } - } - - void parser:: - exec_scope_body () - { - size_t li (0); - - runner_->enter (*scope_, scope_->start_loc_); - - if (test* t = dynamic_cast (scope_)) - { - exec_lines ( - t->tests_.begin (), t->tests_.end (), li, command_type::test); - } - else if (group* g = dynamic_cast (scope_)) - { - bool exec_scope ( - exec_lines ( - g->setup_.begin (), g->setup_.end (), li, command_type::setup)); - - if (exec_scope) - { - atomic_count task_count (0); - wait_guard wg (task_count); - - // Start asynchronous execution of inner scopes keeping track of - // how many we have handled. - // - for (unique_ptr& chain: g->scopes) - { - // Check if this scope is ignored (e.g., via config.test). - // - if (!runner_->test (*chain) || !exec_scope) - { - chain = nullptr; - continue; - } - - // Pick a scope from the if-else chain. - // - // In fact, we are going to drop all but the selected (if any) - // scope. This way we can re-examine the scope states later. It - // will also free some memory. - // - unique_ptr* ps; - for (ps = &chain; *ps != nullptr; ps = &ps->get ()->if_chain) - { - scope& s (**ps); - - if (!s.if_cond_) // Unconditional. - { - assert (s.if_chain == nullptr); - break; - } - - line l (move (*s.if_cond_)); - line_type lt (l.type); - - replay_data (move (l.tokens)); - - token t; - type tt; - - next (t, tt); - const location ll (get_location (t)); - next (t, tt); // Skip to start of command. - - bool take; - if (lt != line_type::cmd_else) - { - // Note: the line index count continues from setup. - // - command_expr ce (parse_command_line (t, tt)); - - try - { - take = runner_->run_if (*scope_, ce, ++li, ll); - } - catch (const exit_scope& e) - { - // Bail out if the scope is exited with the failure status. - // Otherwise leave the scope normally. - // - if (!e.status) - throw failed (); - - // Stop iterating through if conditions, and stop executing - // inner scopes. - // - exec_scope = false; - replay_stop (); - break; - } - - if (lt == line_type::cmd_ifn || lt == line_type::cmd_elifn) - take = !take; - } - else - { - assert (tt == type::newline); - take = true; - } - - replay_stop (); - - if (take) - { - // Count the remaining conditions for the line index. - // - for (scope* r (s.if_chain.get ()); - r != nullptr && - r->if_cond_->type != line_type::cmd_else; - r = r->if_chain.get ()) - ++li; - - s.if_chain.reset (); // Drop remaining scopes. - break; - } - } - - chain.reset (*ps == nullptr || (*ps)->empty () || !exec_scope - ? nullptr - : ps->release ()); - - if (chain != nullptr) - { - // Hand it off to a sub-parser potentially in another thread. - // But we could also have handled it serially in this parser: - // - // scope* os (scope_); - // scope_ = chain.get (); - // exec_scope_body (); - // scope_ = os; - - // Pass our diagnostics stack (this is safe since we are going - // to wait for completion before unwinding the diag stack). - // - // If the scope was executed synchronously, check the status - // and bail out if we weren't asked to keep going. - // - // UBSan workaround. - // - const diag_frame* df (diag_frame::stack ()); - if (!sched.async (task_count, - [] (const diag_frame* ds, - scope& s, - script& scr, - runner& r) - { - diag_frame::stack_guard dsg (ds); - execute_impl (s, scr, r); - }, - df, - ref (*chain), - ref (*script_), - ref (*runner_))) - { - // Bail out if the scope has failed and we weren't instructed - // to keep going. - // - if (chain->state == scope_state::failed && !keep_going) - throw failed (); - } - } - } - - wg.wait (); - - // Re-examine the scopes we have executed collecting their state. - // - for (const unique_ptr& chain: g->scopes) - { - if (chain == nullptr) - continue; - - switch (chain->state) - { - case scope_state::passed: break; - case scope_state::failed: throw failed (); - default: assert (false); - } - } - } - - exec_lines ( - g->tdown_.begin (), g->tdown_.end (), li, command_type::teardown); - } - else - assert (false); - - runner_->leave (*scope_, scope_->end_loc_); - - scope_->state = scope_state::passed; - } - - bool parser:: - exec_lines (lines::iterator i, lines::iterator e, - size_t& li, - command_type ct) - { - try - { - token t; - type tt; - - for (; i != e; ++i) - { - line& ln (*i); - line_type lt (ln.type); - - assert (path_ == nullptr); - - // Set the tokens and start playing. - // - replay_data (move (ln.tokens)); - - // We don't really need to change the mode since we already know - // the line type. - // - next (t, tt); - const location ll (get_location (t)); - - switch (lt) - { - case line_type::var: - { - // Parse. - // - string name (move (t.value)); - - next (t, tt); - type kind (tt); // Assignment kind. - - value rhs (parse_variable_line (t, tt)); - - if (tt == type::semi) - next (t, tt); - - assert (tt == type::newline); - - // Assign. - // - const variable& var (*ln.var); - - value& lhs (kind == type::assign - ? scope_->assign (var) - : scope_->append (var)); - - build2::parser::apply_value_attributes ( - &var, lhs, move (rhs), kind); - - // If we changes any of the test.* values, then reset the $*, - // $N special aliases. - // - if (var.name == script_->test_var.name || - var.name == script_->options_var.name || - var.name == script_->arguments_var.name || - var.name == script_->redirects_var.name || - var.name == script_->cleanups_var.name) - { - scope_->reset_special (); - } - - replay_stop (); - break; - } - case line_type::cmd: - { - // We use the 0 index to signal that this is the only command. - // Note that we only do this for test commands. - // - if (ct == command_type::test && li == 0) - { - lines::iterator j (i); - for (++j; j != e && j->type == line_type::var; ++j) ; - - if (j != e) // We have another command. - ++li; - } - else - ++li; - - command_expr ce (parse_command_line (t, tt)); - runner_->run (*scope_, ce, ct, li, ll); - - replay_stop (); - break; - } - case line_type::cmd_if: - case line_type::cmd_ifn: - case line_type::cmd_elif: - case line_type::cmd_elifn: - case line_type::cmd_else: - { - next (t, tt); // Skip to start of command. - - bool take; - if (lt != line_type::cmd_else) - { - // Assume if-else always involves multiple commands. - // - command_expr ce (parse_command_line (t, tt)); - take = runner_->run_if (*scope_, ce, ++li, ll); - - if (lt == line_type::cmd_ifn || lt == line_type::cmd_elifn) - take = !take; - } - else - { - assert (tt == type::newline); - take = true; - } - - replay_stop (); - - // If end is true, then find the 'end' line. Otherwise, find - // the next if-else line. If skip is true then increment the - // command line index. - // - auto next = [e, &li] - (lines::iterator j, bool end, bool skip) -> lines::iterator - { - // We need to be aware of nested if-else chains. - // - size_t n (0); - - for (++j; j != e; ++j) - { - line_type lt (j->type); - - if (lt == line_type::cmd_if || - lt == line_type::cmd_ifn) - ++n; - - // If we are nested then we just wait until we get back - // to the surface. - // - if (n == 0) - { - switch (lt) - { - case line_type::cmd_elif: - case line_type::cmd_elifn: - case line_type::cmd_else: - if (end) break; - // Fall through. - case line_type::cmd_end: return j; - default: break; - } - } - - if (lt == line_type::cmd_end) - --n; - - if (skip) - { - // Note that we don't count else and end as commands. - // - switch (lt) - { - case line_type::cmd: - case line_type::cmd_if: - case line_type::cmd_ifn: - case line_type::cmd_elif: - case line_type::cmd_elifn: ++li; break; - default: break; - } - } - } - - assert (false); // Missing end. - return e; - }; - - // If we are taking this branch then we need to parse all the - // lines until the next if-else line and then skip all the - // lines until the end (unless next is already end). - // - // Otherwise, we need to skip all the lines until the next - // if-else line and then continue parsing. - // - if (take) - { - lines::iterator j (next (i, false, false)); // Next if-else. - if (!exec_lines (i + 1, j, li, ct)) - return false; - - i = j->type == line_type::cmd_end ? j : next (j, true, true); - } - else - { - i = next (i, false, true); - if (i->type != line_type::cmd_end) - --i; // Continue with this line (e.g., elif or else). - } - - break; - } - case line_type::cmd_end: - { - assert (false); - } - } - } - - return true; - } - catch (const exit_scope& e) - { - // Bail out if the scope is exited with the failure status. Otherwise - // leave the scope normally. - // - if (!e.status) - throw failed (); - - replay_stop (); - return false; - } - } - - // - // The rest. - // - - lookup parser:: - lookup_variable (name&& qual, string&& name, const location& loc) - { - assert (!pre_parse_); - - if (!qual.empty ()) - fail (loc) << "qualified variable name"; - - // If we have no scope (happens when pre-parsing directives), then we - // only look for buildfile variables. - // - // Otherwise, every variable that is ever set in a script has been - // pre-entered during pre-parse or introduced with the set builtin - // during test execution. Which means that if one is not found in the - // script pool then it can only possibly be set in the buildfile. - // - // Note that we need to acquire the variable pool lock. The pool can - // be changed from multiple threads by the set builtin. The obtained - // variable pointer can safelly be used with no locking as the variable - // pool is an associative container (underneath) and we are only adding - // new variables into it. - // - const variable* pvar (nullptr); - - if (scope_ != nullptr) - { - slock sl (script_->var_pool_mutex); - pvar = script_->var_pool.find (name); - } - - return pvar != nullptr - ? scope_->find (*pvar) - : script_->find_in_buildfile (name); - } - - size_t parser:: - quoted () const - { - size_t r (0); - - if (replay_ != replay::play) - r = lexer_->quoted (); - else - { - // Examine tokens we have replayed since last reset. - // - for (size_t i (replay_quoted_); i != replay_i_; ++i) - if (replay_data_[i].token.qtype != quote_type::unquoted) - ++r; - } - - return r; - } - - void parser:: - reset_quoted (token& cur) - { - if (replay_ != replay::play) - lexer_->reset_quoted (cur.qtype != quote_type::unquoted ? 1 : 0); - else - { - replay_quoted_ = replay_i_ - 1; - - // Must be the same token. - // - assert (replay_data_[replay_quoted_].token.qtype == cur.qtype); - } - } - - const string& parser:: - insert_id (string id, location l) - { - auto p (id_map_->emplace (move (id), move (l))); - - if (!p.second) - fail (l) << "duplicate id " << p.first->first << - info (p.first->second) << "previously used here"; - - return p.first->first; - } - - void parser:: - set_lexer (lexer* l) - { - lexer_ = l; - base_parser::lexer_ = l; - } - - void parser:: - apply_value_attributes (const variable* var, - value& lhs, - value&& rhs, - const string& attributes, - token_type kind, - const path& name) - { - path_ = &name; - - istringstream is (attributes); - lexer l (is, name, lexer_mode::attribute); - set_lexer (&l); - - token t; - type tt; - next (t, tt); - - if (tt != type::lsbrace && tt != type::eos) - fail (t) << "expected '[' instead of " << t; - - attributes_push (t, tt, true); - - if (tt != type::eos) - fail (t) << "trailing junk after ']'"; - - build2::parser::apply_value_attributes (var, lhs, move (rhs), kind); - } - - // parser::parsed_doc - // - parser::parsed_doc:: - parsed_doc (string s, uint64_t l, uint64_t c) - : str (move (s)), re (false), end_line (l), end_column (c) - { - } - - parser::parsed_doc:: - parsed_doc (regex_lines&& r, uint64_t l, uint64_t c) - : regex (move (r)), re (true), end_line (l), end_column (c) - { - } - - parser::parsed_doc:: - parsed_doc (parsed_doc&& d) - : re (d.re), end_line (d.end_line), end_column (d.end_column) - { - if (re) - new (®ex) regex_lines (move (d.regex)); - else - new (&str) string (move (d.str)); - } - - parser::parsed_doc:: - ~parsed_doc () - { - if (re) - regex.~regex_lines (); - else - str.~string (); - } - } - } -} -- cgit v1.1