From 3eb0cd7fe3c1dec0bb3b7e1d225107e55ca4b435 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Fri, 4 Nov 2016 08:47:35 +0200 Subject: Various testscript lexer/parser fixes (testscript) --- build2/test/script/lexer | 13 ++-- build2/test/script/lexer.cxx | 61 +++++++++------- build2/test/script/parser | 14 +++- build2/test/script/parser.cxx | 161 +++++++++++++++++++++++++++--------------- 4 files changed, 161 insertions(+), 88 deletions(-) (limited to 'build2/test') diff --git a/build2/test/script/lexer b/build2/test/script/lexer index 5d77ab9..d79ef78 100644 --- a/build2/test/script/lexer +++ b/build2/test/script/lexer @@ -25,8 +25,8 @@ namespace build2 enum { script_line = base_type::value_next, + assign_line, // Auto-expires at the end of the token. variable_line, // Auto-expires at the end of the line. - test_line, command_line, here_line }; @@ -46,14 +46,13 @@ namespace build2 virtual void mode (base_mode, char = '\0') override; - // Return true if we entered the quoted (double or single) mode since - // last reset. + // Number of quoted (double or single) tokens since last reset. // - bool + size_t quoted () const {return quoted_;} void - reset_quoted (bool q) {quoted_ = q;} + reset_quoted (size_t q) {quoted_ = q;} protected: virtual token @@ -63,10 +62,10 @@ namespace build2 next_line (); virtual token - word (bool) override; + word (state, bool) override; protected: - bool quoted_ = false; + size_t quoted_; }; } } diff --git a/build2/test/script/lexer.cxx b/build2/test/script/lexer.cxx index f752f39..f75ad4a 100644 --- a/build2/test/script/lexer.cxx +++ b/build2/test/script/lexer.cxx @@ -25,6 +25,14 @@ namespace build2 { case lexer_mode::script_line: { + s1 = "=!|&<> $(#\t\n"; + s2 = "== "; + break; + } + case lexer_mode::assign_line: + { + // As script_line but with variable assignments. + // s1 = "=+!|&<> $(#\t\n"; s2 = " == "; break; @@ -37,14 +45,7 @@ namespace build2 s2 = " "; break; } - case lexer_mode::test_line: - { - // As script_line but without variable assignments. - // - s1 = "=!|&<> $(#\t\n"; - s2 = "== "; - break; - } + case lexer_mode::command_line: { // Note that whitespaces are not word separators in this mode. @@ -64,10 +65,6 @@ namespace build2 s = false; break; } - case lexer_mode::single_quoted: - case lexer_mode::double_quoted: - quoted_ = true; - // Fall through. default: { // Disable pair separator. @@ -83,15 +80,22 @@ namespace build2 token lexer:: next_impl () { + token r; + switch (state_.top ().mode) { case lexer_mode::script_line: + case lexer_mode::assign_line: case lexer_mode::variable_line: - case lexer_mode::test_line: case lexer_mode::command_line: - case lexer_mode::here_line: return next_line (); - default: return base_lexer::next_impl (); + case lexer_mode::here_line: r = next_line (); break; + default: r = base_lexer::next_impl (); break; } + + if (r.quoted) + ++quoted_; + + return r; } token lexer:: @@ -110,7 +114,14 @@ namespace build2 if (eos (c)) return make_token (type::eos); - lexer_mode m (state_.top ().mode); + state st (state_.top ()); // Make copy (see assign_line). + lexer_mode m (st.mode); + + // Expire the assign mode at the end of the token. Do it early in case + // we push any new mode (e.g., double quote). + // + if (m == lexer_mode::assign_line) + state_.pop (); // NOTE: remember to update mode() if adding new special characters. @@ -148,7 +159,7 @@ namespace build2 // Command line operator/separators. // - if (m == lexer_mode::script_line || m == lexer_mode::test_line) + if (m == lexer_mode::script_line || m == lexer_mode::assign_line) { switch (c) { @@ -169,7 +180,7 @@ namespace build2 // Command operators/separators. // if (m == lexer_mode::script_line || - m == lexer_mode::test_line || + m == lexer_mode::assign_line || m == lexer_mode::command_line) { switch (c) @@ -234,7 +245,7 @@ namespace build2 // Variable assignment (=, +=, =+). // - if (m == lexer_mode::script_line) + if (m == lexer_mode::assign_line) { switch (c) { @@ -262,22 +273,24 @@ namespace build2 // Otherwise it is a word. // unget (c); - return word (sep); + return word (st, sep); } token lexer:: - word (bool sep) + word (state st, bool sep) { + lexer_mode m (st.mode); + // Customized implementation that handles special variable names ($*, // $~, $NNN). // - if (state_.top ().mode != lexer_mode::variable) - return base_lexer::word (sep); + if (m != lexer_mode::variable) + return base_lexer::word (st, sep); xchar c (peek ()); if (c != '*' && c != '~' && !digit (c)) - return base_lexer::word (sep); + return base_lexer::word (st, sep); uint64_t ln (c.line), cn (c.column); string lexeme; diff --git a/build2/test/script/parser b/build2/test/script/parser index f9fbd98..adff8a3 100644 --- a/build2/test/script/parser +++ b/build2/test/script/parser @@ -40,7 +40,7 @@ namespace build2 // protected: void - parse_script (token&, token_type&); + parse_script (); void parse_script_line (token&, token_type&); @@ -63,6 +63,18 @@ namespace build2 virtual lookup lookup_variable (name&&, string&&, const location&) override; + // Number of quoted tokens since last reset. Note that this includes + // the peeked token, if any. + // + protected: + size_t + quoted () const; + + void + reset_quoted (token& current); + + size_t replay_quoted_; + protected: using base_parser = build2::parser; diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx index bfd0667..74bacee 100644 --- a/build2/test/script/parser.cxx +++ b/build2/test/script/parser.cxx @@ -30,22 +30,28 @@ namespace build2 runner_ = &r; scope_ = script_; - token t; - type tt; - next (t, tt); - - parse_script (t, tt); - - if (tt != type::eos) - fail (t) << "unexpected " << t; + parse_script (); } void parser:: - parse_script (token& t, token_type& tt) + parse_script () { - for (; tt != type::eos; next (t, tt)) + token t; + type tt; + + for (;;) { + // We need to start lexing each line in the assign mode in order to + // recognize assignment operators as separators. + // + mode (lexer_mode::assign_line); + next (t, tt); + + if (tt == type::eos) + break; + parse_script_line (t, tt); + assert (tt == type::newline); } } @@ -53,19 +59,27 @@ namespace build2 parse_script_line (token& t, token_type& tt) { // Decide whether this is a variable assignment or a command. It is a - // variable assignment if the first token is a word and the next is an - // assign/append/prepend operator. Assignment to a computed variable - // name must use the set builtin. + // variable assignment if the first token is an unquoted word and the + // next is an assign/append/prepend operator. Assignment to a computed + // variable name must use the set builtin. // - auto assign = [] (type t) + if (tt == type::word && !t.quoted) { - return t == type::assign || t == type::prepend || t == type::append; - }; + // Switch recognition of variable assignments for one more token. + // This is safe to do because we know we cannot be in the quoted + // mode (since the current token is not quoted). + // + mode (lexer_mode::assign_line); + type p (peek ()); - if (tt == type::word && assign (peek ())) - parse_variable_line (t, tt); - else - parse_test_line (t, tt); + if (p == type::assign || p == type::prepend || p == type::append) + { + parse_variable_line (t, tt); + return; + } + } + + parse_test_line (t, tt); } // Return true if the string contains only digit characters (used to @@ -84,16 +98,16 @@ namespace build2 void parser:: parse_variable_line (token& t, token_type& tt) { - location nl (get_location (t)); string name (move (t.value)); // Check if we are trying to modify any of the special aliases ($*, // $~, $N). // - if (name == "*" || name == "~" || digits (name)) - fail (nl) << "attempt to set '" << name << "' variable directly"; - - const variable& var (script_->var_pool.insert (move (name))); + if (pre_parse_) + { + if (name == "*" || name == "~" || digits (name)) + fail (t) << "attempt to set '" << name << "' variable directly"; + } next (t, tt); type kind (tt); // Assignment kind. @@ -106,39 +120,40 @@ namespace build2 if (tt != type::newline) fail (t) << "unexpected " << t; - value& lhs (kind == type::assign - ? scope_->assign (var) - : scope_->append (var)); + if (!pre_parse_) + { + const variable& var (script_->var_pool.insert (move (name))); - // @@ Need to adjust to make strings the default type. - // - apply_value_attributes (&var, lhs, move (rhs), kind); + value& lhs (kind == type::assign + ? scope_->assign (var) + : scope_->append (var)); - // Handle the $*, $NN special aliases. - // - // The plan is as follows: in this function we detect modification of - // the source variables (test*), and (re)set $* to NULL on this scope - // (this is important to both invalidate any old values but also to - // "stake" the lookup position). This signals to the variable lookup - // function below that the $* and $NN values need to be recalculated - // from their sources. Note that we don't need to invalidate $NN since - // their lookup always checks $* first. - // - if (var.name == script_->test_var.name || - var.name == script_->opts_var.name || - var.name == script_->args_var.name) - { - scope_->assign (script_->cmd_var) = nullptr; + // @@ Need to adjust to make strings the default type. + // + apply_value_attributes (&var, lhs, move (rhs), kind); + + // Handle the $*, $NN special aliases. + // + // The plan is as follows: in this function we detect modification + // of the source variables (test*), and (re)set $* to NULL on this + // scope (this is important to both invalidate any old values but + // also to "stake" the lookup position). This signals to the + // variable lookup function below that the $* and $NN values need to + // be recalculated from their sources. Note that we don't need to + // invalidate $NN since their lookup always checks $* first. + // + if (var.name == script_->test_var.name || + var.name == script_->opts_var.name || + var.name == script_->args_var.name) + { + scope_->assign (script_->cmd_var) = nullptr; + } } } void parser:: parse_test_line (token& t, token_type& tt) { - // Stop recognizing variable assignments. - // - mode (lexer_mode::test_line); - test ts; // Pending positions where the next word should go. @@ -371,12 +386,14 @@ namespace build2 // Note that we do it in the chunking mode to detect whether // anything in each chunk is quoted. // - lexer_->reset_quoted (t.quoted); + reset_quoted (t); parse_names (t, tt, ns, true, "command"); - // Process what we got. + // Process what we got. Determine whether anything inside was + // quoted (note that the current token is not part of it). // - bool q (lexer_->quoted ()); + bool q ((quoted () - (t.quoted ? 1 : 0)) != 0); + for (name& n: ns) { string s; @@ -505,7 +522,7 @@ namespace build2 // While we no longer need to recognize command line operators, we // also don't expect a valid test trailer to contain them. So we are - // going to continue lexing in the test_line mode. + // going to continue lexing in the script_line mode. // if (tt == type::equal || tt == type::not_equal) { @@ -516,8 +533,6 @@ namespace build2 if (tt != type::newline) fail (t) << "unexpected " << t; - expire_mode (); // Done parsing test-line. - // Parse here-document fragments in the order they were mentioned on // the command line. // @@ -702,6 +717,40 @@ namespace build2 return lookup (nv, vars); } + + size_t parser:: + quoted () const + { + size_t r (0); + + if (replay_ != replay::play) + r = lexer_->quoted (); + else + { + // Examine tokens we have replayed since last reset. + // + for (size_t i (replay_quoted_); i != replay_i_; ++i) + if (replay_data_[i].token.quoted) + ++r; + } + + return r; + } + + void parser:: + reset_quoted (token& cur) + { + if (replay_ != replay::play) + lexer_->reset_quoted (cur.quoted ? 1 : 0); + else + { + replay_quoted_ = replay_i_ - 1; + + // Must be the same token. + // + assert (replay_data_[replay_quoted_].token.quoted == cur.quoted); + } + } } } } -- cgit v1.1