aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2016-10-19 15:08:30 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2016-11-04 09:26:30 +0200
commit996136ec9b8d002b7f1cbd2d7eeac850a561143d (patch)
tree87bc68bfdf086191725fe211444f71ca4904f6fb
parent56eb8c36b5b408e08760fa18cf893e5e8a613b7a (diff)
Implement two-stage testscript parsing
-rw-r--r--build2/test/rule.cxx3
-rw-r--r--build2/test/script/lexer4
-rw-r--r--build2/test/script/parser13
-rw-r--r--build2/test/script/parser.cxx223
-rw-r--r--build2/test/script/script19
-rw-r--r--unit-tests/test/script/lexer/script-line.test14
6 files changed, 217 insertions, 59 deletions
diff --git a/build2/test/rule.cxx b/build2/test/rule.cxx
index 7e4101c..188cc90 100644
--- a/build2/test/rule.cxx
+++ b/build2/test/rule.cxx
@@ -373,7 +373,8 @@ namespace build2
ifdstream ifs (sp);
script::parser p;
- p.parse (ifs, sp, s, r);
+ p.pre_parse (ifs, sp, s);
+ p.parse (sp, s, r);
}
catch (const io_error& e)
{
diff --git a/build2/test/script/lexer b/build2/test/script/lexer
index d79ef78..80e6036 100644
--- a/build2/test/script/lexer
+++ b/build2/test/script/lexer
@@ -31,7 +31,9 @@ namespace build2
here_line
};
- using base_type::base_type;
+ lexer_mode () = default;
+ lexer_mode (value_type v): base_type (v) {}
+ lexer_mode (base_type v): base_type (v) {}
};
class lexer: public build2::lexer
diff --git a/build2/test/script/parser b/build2/test/script/parser
index adff8a3..6531aba 100644
--- a/build2/test/script/parser
+++ b/build2/test/script/parser
@@ -29,7 +29,10 @@ namespace build2
// Issue diagnostics and throw failed in case of an error.
//
void
- parse (istream&, const path& name, script&, runner&);
+ pre_parse (istream&, const path& name, script&);
+
+ void
+ parse (const path& name, script&, runner&);
// Recursive descent parser.
//
@@ -40,10 +43,16 @@ namespace build2
//
protected:
void
+ pre_parse_script ();
+
+ void
parse_script ();
+ line_type
+ pre_parse_script_line (token&, token_type&);
+
void
- parse_script_line (token&, token_type&);
+ parse_script_line (token&, token_type&, line_type);
void
parse_variable_line (token&, token_type&);
diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx
index e515532..ea75d1b 100644
--- a/build2/test/script/parser.cxx
+++ b/build2/test/script/parser.cxx
@@ -18,7 +18,7 @@ namespace build2
using type = token_type;
void parser::
- parse (istream& is, const path& p, script& s, runner& r)
+ pre_parse (istream& is, const path& p, script& s)
{
path_ = &p;
@@ -27,20 +27,43 @@ namespace build2
base_parser::lexer_ = &l;
script_ = &s;
+ runner_ = nullptr;
+ scope_ = script_;
+
+ pre_parse_ = true;
+
+ pre_parse_script ();
+ }
+
+ void parser::
+ parse (const path& p, script& s, runner& r)
+ {
+ path_ = &p;
+
+ lexer_ = nullptr;
+ base_parser::lexer_ = nullptr;
+
+ script_ = &s;
runner_ = &r;
scope_ = script_;
+ pre_parse_ = false;
+
parse_script ();
}
void parser::
- parse_script ()
+ pre_parse_script ()
{
token t;
type tt;
for (;;)
{
+ // Start saving tokens for the next (logical) line.
+ //
+ replay_save ();
+
// We need to start lexing each line in the assign mode in order to
// recognize assignment operators as separators.
//
@@ -50,18 +73,46 @@ namespace build2
if (tt == type::eos)
break;
- parse_script_line (t, tt);
+ line_type lt (pre_parse_script_line (t, tt));
assert (tt == type::newline);
+
+ // Stop saving and get the tokens.
+ //
+ scope_->lines.push_back (line {lt, replay_data ()});
}
+
+ replay_stop (); // Discard replay of eos.
}
void parser::
- parse_script_line (token& t, token_type& tt)
+ parse_script ()
+ {
+ token t;
+ type tt;
+
+ for (line& l: scope_->lines)
+ {
+ replay_data (move (l.tokens)); // Set the tokens and start playing.
+
+ // We don't really need the assign mode since we already know the
+ // line type.
+ //
+ next (t, tt);
+
+ parse_script_line (t, tt, l.type);
+ assert (tt == type::newline);
+
+ replay_stop (); // Stop playing.
+ }
+ }
+
+ line_type parser::
+ pre_parse_script_line (token& t, token_type& tt)
{
// Decide whether this is a variable assignment or a command. It is a
- // variable assignment if the first token is an unquoted word and the
- // next is an assign/append/prepend operator. Assignment to a computed
- // variable name must use the set builtin.
+ // variable assignment if the first token is an unquoted word (name)
+ // and the next is an assign/append/prepend operator. Assignment to a
+ // computed variable name must use the set builtin.
//
if (tt == type::word && !t.quoted)
{
@@ -75,11 +126,22 @@ namespace build2
if (p == type::assign || p == type::prepend || p == type::append)
{
parse_variable_line (t, tt);
- return;
+ return line_type::variable;
}
}
parse_test_line (t, tt);
+ return line_type::test;
+ }
+
+ void parser::
+ parse_script_line (token& t, token_type& tt, line_type lt)
+ {
+ switch (lt)
+ {
+ case line_type::variable: parse_variable_line (t, tt); break;
+ case line_type::test: parse_test_line (t, tt); break;
+ }
}
// Return true if the string contains only digit characters (used to
@@ -174,20 +236,21 @@ namespace build2
// Ordered sequence of here-document redirects that we can expect to
// see after the command line.
//
- vector<reference_wrapper<redirect>> hd;
+ struct here_doc
+ {
+ redirect* redir;
+ string end;
+ };
+ vector<here_doc> hd;
// Add the next word to either one of the pending positions or
// to program arguments by default.
//
auto add_word = [&ts, &p, &hd, this] (string&& w, const location& l)
{
- auto add_here_end = [&w, &hd, &l, this] (redirect& r)
+ auto add_here_end = [&hd] (redirect& r, string&& w)
{
- if (w.empty ())
- fail (l) << "empty here-document end marker";
-
- hd.push_back (r);
- r.here_end = move (w);
+ hd.push_back (here_doc {&r, move (w)});
};
switch (p)
@@ -208,11 +271,13 @@ namespace build2
}
break;
}
- case pending::in_document: add_here_end (ts.in); break;
- case pending::in_string: ts.in.value = move (w); break;
- case pending::out_document: add_here_end (ts.out); break;
+
+ case pending::in_document: add_here_end (ts.in, move (w)); break;
+ case pending::out_document: add_here_end (ts.out, move (w)); break;
+ case pending::err_document: add_here_end (ts.err, move (w)); break;
+
+ case pending::in_string: ts.in.value = move (w); break;
case pending::out_string: ts.out.value = move (w); break;
- case pending::err_document: add_here_end (ts.err); break;
case pending::err_string: ts.err.value = move (w); break;
}
@@ -364,6 +429,33 @@ namespace build2
case type::out_string:
case type::out_document:
{
+ if (pre_parse_)
+ {
+ // The only thing we need to handle here are the here-document
+ // end markers since we need to know how many of the to pre-
+ // parse after the command.
+ //
+ switch (tt)
+ {
+ case type::in_document:
+ case type::out_document:
+ // We require the end marker to be a literal, unquoted word.
+ // In particularm, we don't allow quoted because of cases
+ // like foo"$bar" (where we will see word 'foo').
+ //
+ next (t, tt);
+
+ if (tt != type::word || t.quoted)
+ fail (l) << "here-document end marker expected";
+
+ hd.push_back (here_doc {nullptr, move (t.value)});
+ break;
+ }
+
+ next (t, tt);
+ break;
+ }
+
// If this is one of the operators/separators, check that we
// don't have any pending locations to be filled.
//
@@ -395,8 +487,12 @@ namespace build2
reset_quoted (t);
parse_names (t, tt, ns, true, "command");
+ if (pre_parse_) // Nothing else to do if we are pre-parsing.
+ break;
+
// Process what we got. Determine whether anything inside was
- // quoted (note that the current token is not part of it).
+ // quoted (note that the current token is "next" and is not part
+ // of this).
//
bool q ((quoted () - (t.quoted ? 1 : 0)) != 0);
@@ -500,12 +596,14 @@ namespace build2
{
case type::in_null:
case type::in_string:
- case type::in_document:
case type::out_null:
case type::out_string:
- case type::out_document:
parse_redirect (t, l);
break;
+ case type::in_document:
+ case type::out_document:
+ fail (l) << "here-document redirect in expansion";
+ break;
}
}
@@ -524,7 +622,8 @@ namespace build2
// Verify we don't have anything pending to be filled.
//
- check_pending (l);
+ if (!pre_parse_)
+ check_pending (l);
// While we no longer need to recognize command line operators, we
// also don't expect a valid test trailer to contain them. So we are
@@ -539,7 +638,7 @@ namespace build2
// Parse here-document fragments in the order they were mentioned on
// the command line.
//
- for (redirect& r: hd)
+ for (here_doc& h: hd)
{
// Switch to the here-line mode which is like double-quoted but
// recognized the newline as a separator.
@@ -547,14 +646,22 @@ namespace build2
mode (lexer_mode::here_line);
next (t, tt);
- r.value = parse_here_document (t, tt, r.here_end);
+ string v (parse_here_document (t, tt, h.end));
+
+ if (!pre_parse_)
+ {
+ redirect& r (*h.redir);
+ r.value = move (v);
+ r.here_end = move (h.end);
+ }
expire_mode ();
}
// Now that we have all the pieces, run the test.
//
- runner_->run (ts);
+ if (!pre_parse_)
+ runner_->run (ts);
}
command_exit parser::
@@ -570,18 +677,19 @@ namespace build2
names ns (parse_names (t, tt, true, "exit status"));
unsigned long es (256);
- try
- {
- if (ns.size () == 1 && ns[0].simple () && !ns[0].empty ())
- es = stoul (ns[0].value);
- }
- catch (const exception&)
+ if (!pre_parse_)
{
- }
+ try
+ {
+ if (ns.size () == 1 && ns[0].simple () && !ns[0].empty ())
+ es = stoul (ns[0].value);
+ }
+ catch (const exception&) {} // Fall through.
- if (es > 255)
- fail (t) << "command exit status expected instead of " << ns <<
- info << "must be an unsigned integer less than 256";
+ if (es > 255)
+ fail (t) << "exit status expected instead of '" << ns << "'" <<
+ info << "exit status is an unsigned integer less than 256";
+ }
return command_exit {comp, static_cast<uint8_t> (es)};
}
@@ -608,29 +716,32 @@ namespace build2
//
names ns (parse_names (t, tt, false, "here-document line"));
- // What shall we do if the expansion results in multiple names? For,
- // example if the line contains just the variable expansion and it
- // is of type strings. Adding all the elements space-separated seems
- // like the natural thing to do.
- //
- for (auto b (ns.begin ()), i (b); i != ns.end (); ++i)
+ if (!pre_parse_)
{
- string s;
-
- try
- {
- s = value_traits<string>::convert (move (*i), nullptr);
- }
- catch (const invalid_argument&)
+ // What shall we do if the expansion results in multiple names?
+ // For, example if the line contains just the variable expansion
+ // and it is of type strings. Adding all the elements space-
+ // separated seems like the natural thing to do.
+ //
+ for (auto b (ns.begin ()), i (b); i != ns.end (); ++i)
{
- fail (t) << "invalid string value '" << *i << "'";
- }
+ string s;
- if (i != b)
- r += ' ';
+ try
+ {
+ s = value_traits<string>::convert (move (*i), nullptr);
+ }
+ catch (const invalid_argument&)
+ {
+ fail (t) << "invalid string value '" << *i << "'";
+ }
+
+ if (i != b)
+ r += ' ';
- r += s;
- r += '\n'; // Here-document line always includes a newline.
+ r += s;
+ r += '\n'; // Here-document line always includes a newline.
+ }
}
// We should expand the whole line at once so this would normally be
@@ -651,6 +762,8 @@ namespace build2
lookup parser::
lookup_variable (name&& qual, string&& name, const location& loc)
{
+ assert (!pre_parse_);
+
if (!qual.empty ())
fail (loc) << "qualified variable name";
diff --git a/build2/test/script/script b/build2/test/script/script
index 590013a..e3afbca 100644
--- a/build2/test/script/script
+++ b/build2/test/script/script
@@ -12,6 +12,8 @@
#include <build2/test/target>
+#include <build2/test/script/token> // replay_tokens
+
namespace build2
{
class target;
@@ -20,6 +22,18 @@ namespace build2
{
namespace script
{
+ // Pre-parse representation.
+ //
+ enum class line_type {variable, test};
+
+ struct line
+ {
+ line_type type;
+ replay_tokens tokens;
+ };
+
+ // Parse object model.
+ //
enum class redirect_type
{
none,
@@ -118,6 +132,11 @@ namespace build2
//
value&
append (const variable&);
+
+ // Pre-parse.
+ //
+ public:
+ vector<line> lines;
};
class script: public scope
diff --git a/unit-tests/test/script/lexer/script-line.test b/unit-tests/test/script/lexer/script-line.test
index 9739cec..64d7c18 100644
--- a/unit-tests/test/script/lexer/script-line.test
+++ b/unit-tests/test/script/lexer/script-line.test
@@ -24,3 +24,17 @@ test.arguments += bar
$2
$*
($3 == [null])
+
+x = [uint64] 001
+foo $x
+cmd abc$(x)23
+cmd >>EOO << EOI
+foo
+$x
+bar
+EOO
+$x$x$x
+EOI
+
+y = >>
+cmd "2"$y