From 996136ec9b8d002b7f1cbd2d7eeac850a561143d Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Wed, 19 Oct 2016 15:08:30 +0200 Subject: Implement two-stage testscript parsing --- build2/test/rule.cxx | 3 +- build2/test/script/lexer | 4 +- build2/test/script/parser | 13 +- build2/test/script/parser.cxx | 223 +++++++++++++++++++------- build2/test/script/script | 19 +++ unit-tests/test/script/lexer/script-line.test | 14 ++ 6 files changed, 217 insertions(+), 59 deletions(-) diff --git a/build2/test/rule.cxx b/build2/test/rule.cxx index 7e4101c..188cc90 100644 --- a/build2/test/rule.cxx +++ b/build2/test/rule.cxx @@ -373,7 +373,8 @@ namespace build2 ifdstream ifs (sp); script::parser p; - p.parse (ifs, sp, s, r); + p.pre_parse (ifs, sp, s); + p.parse (sp, s, r); } catch (const io_error& e) { diff --git a/build2/test/script/lexer b/build2/test/script/lexer index d79ef78..80e6036 100644 --- a/build2/test/script/lexer +++ b/build2/test/script/lexer @@ -31,7 +31,9 @@ namespace build2 here_line }; - using base_type::base_type; + lexer_mode () = default; + lexer_mode (value_type v): base_type (v) {} + lexer_mode (base_type v): base_type (v) {} }; class lexer: public build2::lexer diff --git a/build2/test/script/parser b/build2/test/script/parser index adff8a3..6531aba 100644 --- a/build2/test/script/parser +++ b/build2/test/script/parser @@ -29,7 +29,10 @@ namespace build2 // Issue diagnostics and throw failed in case of an error. // void - parse (istream&, const path& name, script&, runner&); + pre_parse (istream&, const path& name, script&); + + void + parse (const path& name, script&, runner&); // Recursive descent parser. // @@ -40,10 +43,16 @@ namespace build2 // protected: void + pre_parse_script (); + + void parse_script (); + line_type + pre_parse_script_line (token&, token_type&); + void - parse_script_line (token&, token_type&); + parse_script_line (token&, token_type&, line_type); void parse_variable_line (token&, token_type&); diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx index e515532..ea75d1b 100644 --- a/build2/test/script/parser.cxx +++ b/build2/test/script/parser.cxx @@ -18,7 +18,7 @@ namespace build2 using type = token_type; void parser:: - parse (istream& is, const path& p, script& s, runner& r) + pre_parse (istream& is, const path& p, script& s) { path_ = &p; @@ -27,20 +27,43 @@ namespace build2 base_parser::lexer_ = &l; script_ = &s; + runner_ = nullptr; + scope_ = script_; + + pre_parse_ = true; + + pre_parse_script (); + } + + void parser:: + parse (const path& p, script& s, runner& r) + { + path_ = &p; + + lexer_ = nullptr; + base_parser::lexer_ = nullptr; + + script_ = &s; runner_ = &r; scope_ = script_; + pre_parse_ = false; + parse_script (); } void parser:: - parse_script () + pre_parse_script () { token t; type tt; for (;;) { + // Start saving tokens for the next (logical) line. + // + replay_save (); + // We need to start lexing each line in the assign mode in order to // recognize assignment operators as separators. // @@ -50,18 +73,46 @@ namespace build2 if (tt == type::eos) break; - parse_script_line (t, tt); + line_type lt (pre_parse_script_line (t, tt)); assert (tt == type::newline); + + // Stop saving and get the tokens. + // + scope_->lines.push_back (line {lt, replay_data ()}); } + + replay_stop (); // Discard replay of eos. } void parser:: - parse_script_line (token& t, token_type& tt) + parse_script () + { + token t; + type tt; + + for (line& l: scope_->lines) + { + replay_data (move (l.tokens)); // Set the tokens and start playing. + + // We don't really need the assign mode since we already know the + // line type. + // + next (t, tt); + + parse_script_line (t, tt, l.type); + assert (tt == type::newline); + + replay_stop (); // Stop playing. + } + } + + line_type parser:: + pre_parse_script_line (token& t, token_type& tt) { // Decide whether this is a variable assignment or a command. It is a - // variable assignment if the first token is an unquoted word and the - // next is an assign/append/prepend operator. Assignment to a computed - // variable name must use the set builtin. + // variable assignment if the first token is an unquoted word (name) + // and the next is an assign/append/prepend operator. Assignment to a + // computed variable name must use the set builtin. // if (tt == type::word && !t.quoted) { @@ -75,11 +126,22 @@ namespace build2 if (p == type::assign || p == type::prepend || p == type::append) { parse_variable_line (t, tt); - return; + return line_type::variable; } } parse_test_line (t, tt); + return line_type::test; + } + + void parser:: + parse_script_line (token& t, token_type& tt, line_type lt) + { + switch (lt) + { + case line_type::variable: parse_variable_line (t, tt); break; + case line_type::test: parse_test_line (t, tt); break; + } } // Return true if the string contains only digit characters (used to @@ -174,20 +236,21 @@ namespace build2 // Ordered sequence of here-document redirects that we can expect to // see after the command line. // - vector> hd; + struct here_doc + { + redirect* redir; + string end; + }; + vector hd; // Add the next word to either one of the pending positions or // to program arguments by default. // auto add_word = [&ts, &p, &hd, this] (string&& w, const location& l) { - auto add_here_end = [&w, &hd, &l, this] (redirect& r) + auto add_here_end = [&hd] (redirect& r, string&& w) { - if (w.empty ()) - fail (l) << "empty here-document end marker"; - - hd.push_back (r); - r.here_end = move (w); + hd.push_back (here_doc {&r, move (w)}); }; switch (p) @@ -208,11 +271,13 @@ namespace build2 } break; } - case pending::in_document: add_here_end (ts.in); break; - case pending::in_string: ts.in.value = move (w); break; - case pending::out_document: add_here_end (ts.out); break; + + case pending::in_document: add_here_end (ts.in, move (w)); break; + case pending::out_document: add_here_end (ts.out, move (w)); break; + case pending::err_document: add_here_end (ts.err, move (w)); break; + + case pending::in_string: ts.in.value = move (w); break; case pending::out_string: ts.out.value = move (w); break; - case pending::err_document: add_here_end (ts.err); break; case pending::err_string: ts.err.value = move (w); break; } @@ -364,6 +429,33 @@ namespace build2 case type::out_string: case type::out_document: { + if (pre_parse_) + { + // The only thing we need to handle here are the here-document + // end markers since we need to know how many of the to pre- + // parse after the command. + // + switch (tt) + { + case type::in_document: + case type::out_document: + // We require the end marker to be a literal, unquoted word. + // In particularm, we don't allow quoted because of cases + // like foo"$bar" (where we will see word 'foo'). + // + next (t, tt); + + if (tt != type::word || t.quoted) + fail (l) << "here-document end marker expected"; + + hd.push_back (here_doc {nullptr, move (t.value)}); + break; + } + + next (t, tt); + break; + } + // If this is one of the operators/separators, check that we // don't have any pending locations to be filled. // @@ -395,8 +487,12 @@ namespace build2 reset_quoted (t); parse_names (t, tt, ns, true, "command"); + if (pre_parse_) // Nothing else to do if we are pre-parsing. + break; + // Process what we got. Determine whether anything inside was - // quoted (note that the current token is not part of it). + // quoted (note that the current token is "next" and is not part + // of this). // bool q ((quoted () - (t.quoted ? 1 : 0)) != 0); @@ -500,12 +596,14 @@ namespace build2 { case type::in_null: case type::in_string: - case type::in_document: case type::out_null: case type::out_string: - case type::out_document: parse_redirect (t, l); break; + case type::in_document: + case type::out_document: + fail (l) << "here-document redirect in expansion"; + break; } } @@ -524,7 +622,8 @@ namespace build2 // Verify we don't have anything pending to be filled. // - check_pending (l); + if (!pre_parse_) + check_pending (l); // While we no longer need to recognize command line operators, we // also don't expect a valid test trailer to contain them. So we are @@ -539,7 +638,7 @@ namespace build2 // Parse here-document fragments in the order they were mentioned on // the command line. // - for (redirect& r: hd) + for (here_doc& h: hd) { // Switch to the here-line mode which is like double-quoted but // recognized the newline as a separator. @@ -547,14 +646,22 @@ namespace build2 mode (lexer_mode::here_line); next (t, tt); - r.value = parse_here_document (t, tt, r.here_end); + string v (parse_here_document (t, tt, h.end)); + + if (!pre_parse_) + { + redirect& r (*h.redir); + r.value = move (v); + r.here_end = move (h.end); + } expire_mode (); } // Now that we have all the pieces, run the test. // - runner_->run (ts); + if (!pre_parse_) + runner_->run (ts); } command_exit parser:: @@ -570,18 +677,19 @@ namespace build2 names ns (parse_names (t, tt, true, "exit status")); unsigned long es (256); - try - { - if (ns.size () == 1 && ns[0].simple () && !ns[0].empty ()) - es = stoul (ns[0].value); - } - catch (const exception&) + if (!pre_parse_) { - } + try + { + if (ns.size () == 1 && ns[0].simple () && !ns[0].empty ()) + es = stoul (ns[0].value); + } + catch (const exception&) {} // Fall through. - if (es > 255) - fail (t) << "command exit status expected instead of " << ns << - info << "must be an unsigned integer less than 256"; + if (es > 255) + fail (t) << "exit status expected instead of '" << ns << "'" << + info << "exit status is an unsigned integer less than 256"; + } return command_exit {comp, static_cast (es)}; } @@ -608,29 +716,32 @@ namespace build2 // names ns (parse_names (t, tt, false, "here-document line")); - // What shall we do if the expansion results in multiple names? For, - // example if the line contains just the variable expansion and it - // is of type strings. Adding all the elements space-separated seems - // like the natural thing to do. - // - for (auto b (ns.begin ()), i (b); i != ns.end (); ++i) + if (!pre_parse_) { - string s; - - try - { - s = value_traits::convert (move (*i), nullptr); - } - catch (const invalid_argument&) + // What shall we do if the expansion results in multiple names? + // For, example if the line contains just the variable expansion + // and it is of type strings. Adding all the elements space- + // separated seems like the natural thing to do. + // + for (auto b (ns.begin ()), i (b); i != ns.end (); ++i) { - fail (t) << "invalid string value '" << *i << "'"; - } + string s; - if (i != b) - r += ' '; + try + { + s = value_traits::convert (move (*i), nullptr); + } + catch (const invalid_argument&) + { + fail (t) << "invalid string value '" << *i << "'"; + } + + if (i != b) + r += ' '; - r += s; - r += '\n'; // Here-document line always includes a newline. + r += s; + r += '\n'; // Here-document line always includes a newline. + } } // We should expand the whole line at once so this would normally be @@ -651,6 +762,8 @@ namespace build2 lookup parser:: lookup_variable (name&& qual, string&& name, const location& loc) { + assert (!pre_parse_); + if (!qual.empty ()) fail (loc) << "qualified variable name"; diff --git a/build2/test/script/script b/build2/test/script/script index 590013a..e3afbca 100644 --- a/build2/test/script/script +++ b/build2/test/script/script @@ -12,6 +12,8 @@ #include +#include // replay_tokens + namespace build2 { class target; @@ -20,6 +22,18 @@ namespace build2 { namespace script { + // Pre-parse representation. + // + enum class line_type {variable, test}; + + struct line + { + line_type type; + replay_tokens tokens; + }; + + // Parse object model. + // enum class redirect_type { none, @@ -118,6 +132,11 @@ namespace build2 // value& append (const variable&); + + // Pre-parse. + // + public: + vector lines; }; class script: public scope diff --git a/unit-tests/test/script/lexer/script-line.test b/unit-tests/test/script/lexer/script-line.test index 9739cec..64d7c18 100644 --- a/unit-tests/test/script/lexer/script-line.test +++ b/unit-tests/test/script/lexer/script-line.test @@ -24,3 +24,17 @@ test.arguments += bar $2 $* ($3 == [null]) + +x = [uint64] 001 +foo $x +cmd abc$(x)23 +cmd >>EOO << EOI +foo +$x +bar +EOO +$x$x$x +EOI + +y = >> +cmd "2"$y -- cgit v1.1