aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2016-10-13 13:08:31 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2016-11-04 08:29:23 +0200
commit5381b25c51475c0c7a2f39e9f6efa623f621ef3e (patch)
tree634110f6d81fea2d67f13564a26030f088c154bc
parent18ce15f3aee71debe3f35356c6a739943815da8a (diff)
Continue work on testscript parser
-rw-r--r--build2/test/script/lexer3
-rw-r--r--build2/test/script/lexer.cxx28
-rw-r--r--build2/test/script/parser21
-rw-r--r--build2/test/script/parser.cxx403
-rw-r--r--build2/test/script/script39
-rw-r--r--doc/testscript.cli66
6 files changed, 467 insertions, 93 deletions
diff --git a/build2/test/script/lexer b/build2/test/script/lexer
index de4c84e..f3d8fa9 100644
--- a/build2/test/script/lexer
+++ b/build2/test/script/lexer
@@ -27,7 +27,8 @@ namespace build2
script_line = base_type::value_next,
variable_line,
test_line,
- command_line
+ command_line,
+ here_line
};
using base_type::base_type;
diff --git a/build2/test/script/lexer.cxx b/build2/test/script/lexer.cxx
index 84be7c1..e0a3272 100644
--- a/build2/test/script/lexer.cxx
+++ b/build2/test/script/lexer.cxx
@@ -25,24 +25,24 @@ namespace build2
{
case lexer_mode::script_line:
{
- s1 = "=+!|&<> $()#\t\n";
- s2 = " == ";
+ s1 = "=+!|&<> $(#\t\n";
+ s2 = " == ";
break;
}
case lexer_mode::variable_line:
{
// Like value except we don't recognize {.
//
- s1 = " $()[]#\t\n";
- s2 = " ";
+ s1 = " $([]#\t\n";
+ s2 = " ";
break;
}
case lexer_mode::test_line:
{
// As script_line but without variable assignments.
//
- s1 = "=!|&<> $()#\t\n";
- s2 = "== ";
+ s1 = "=!|&<> $(#\t\n";
+ s2 = "== ";
break;
}
case lexer_mode::command_line:
@@ -54,6 +54,16 @@ namespace build2
s = false;
break;
}
+ case lexer_mode::here_line:
+ {
+ // This one is like a double-quoted string except it treats
+ // newlines as a separator.
+ //
+ s1 = "$(\n";
+ s2 = " ";
+ s = false;
+ break;
+ }
case lexer_mode::single_quoted:
case lexer_mode::double_quoted:
quoted_ = true;
@@ -77,8 +87,9 @@ namespace build2
case lexer_mode::script_line:
case lexer_mode::variable_line:
case lexer_mode::test_line:
- case lexer_mode::command_line: return next_line ();
- default: return base_lexer::next_impl ();
+ case lexer_mode::command_line:
+ case lexer_mode::here_line: return next_line ();
+ default: return base_lexer::next_impl ();
}
}
@@ -110,7 +121,6 @@ namespace build2
//
case '$': return token (type::dollar, sep, ln, cn);
case '(': return token (type::lparen, sep, ln, cn);
- case ')': return token (type::rparen, sep, ln, cn);
}
}
diff --git a/build2/test/script/parser b/build2/test/script/parser
index 720a077..0ba4710 100644
--- a/build2/test/script/parser
+++ b/build2/test/script/parser
@@ -25,11 +25,9 @@ namespace build2
class parser: protected build2::parser
{
public:
- using script_type = test::script::script;
-
// Issue diagnostics and throw failed in case of an error.
//
- script_type
+ script
parse (istream&, const path& name, target& test, target& script);
// Recursive descent parser.
@@ -40,25 +38,28 @@ namespace build2
//
protected:
void
- script (token&, token_type&);
+ parse_script (token&, token_type&);
void
- script_line (token&, token_type&);
+ parse_script_line (token&, token_type&);
void
- variable_line (token&, token_type&, string);
+ parse_variable_line (token&, token_type&, string);
void
- test_line (token&, token_type&, names_type, location);
+ parse_test_line (token&, token_type&, names_type, location);
- void
- command_exit (token&, token_type&);
+ command_exit
+ parse_command_exit (token&, token_type&);
+
+ string
+ parse_here_document (token&, token_type&, const string&);
protected:
using base_parser = build2::parser;
lexer* lexer_;
- script_type* script_;
+ script* script_;
};
}
}
diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx
index aba9f9a..ce867b3 100644
--- a/build2/test/script/parser.cxx
+++ b/build2/test/script/parser.cxx
@@ -25,14 +25,14 @@ namespace build2
lexer_ = &l;
base_parser::lexer_ = &l;
- script_type r (test_t, script_t);
+ script r (test_t, script_t);
script_ = &r;
token t (type::eos, false, 0, 0);
type tt;
next (t, tt);
- script (t, tt);
+ parse_script (t, tt);
if (tt != type::eos)
fail (t) << "unexpected " << t;
@@ -41,16 +41,16 @@ namespace build2
}
void parser::
- script (token& t, token_type& tt)
+ parse_script (token& t, token_type& tt)
{
while (tt != type::eos)
{
- script_line (t, tt);
+ parse_script_line (t, tt);
}
}
void parser::
- script_line (token& t, token_type& tt)
+ parse_script_line (token& t, token_type& tt)
{
// Parse first chunk. Keep track of whether anything in it was quoted.
//
@@ -98,14 +98,14 @@ namespace build2
if (ns.size () != 1 || !ns[0].simple () || ns[0].empty ())
fail (nl) << "variable name expected instead of '" << ns << "'";
- variable_line (t, tt, move (ns[0].value));
+ parse_variable_line (t, tt, move (ns[0].value));
}
else
- test_line (t, tt, move (ns), move (nl));
+ parse_test_line (t, tt, move (ns), move (nl));
}
void parser::
- variable_line (token& t, token_type& tt, string name)
+ parse_variable_line (token& t, token_type& tt, string name)
{
type kind (tt); // Assignment kind.
const variable_type& var (script_->var_pool.insert (move (name)));
@@ -125,18 +125,196 @@ namespace build2
}
void parser::
- test_line (token& t, token_type& tt, names_type ns, location nl)
+ parse_test_line (token& t, token_type& tt, names_type ns, location nl)
{
// Stop recognizing variable assignments.
//
mode (lexer_mode::test_line);
+ test ts;
+
+ // Pending positions where the next word should go.
+ //
+ enum class pending
+ {
+ none,
+ program,
+ in_string,
+ in_document,
+ out_string,
+ out_document,
+ err_string,
+ err_document
+ };
+ pending p (pending::program);
+
+ // Ordered sequence of here-document redirects that we can expect to
+ // see after the command line. We temporarily store the end marker
+ // as the redirect's value.
+ //
+ vector<reference_wrapper<redirect>> hd;
+
+ // Add the next word to either one of the pending positions or
+ // to program arguments by default.
+ //
+ auto add_word = [&ts, &p, &hd, this] (string&& w, const location& l)
+ {
+ switch (p)
+ {
+ case pending::none: ts.arguments.push_back (move (w)); break;
+ case pending::program:
+ {
+ try
+ {
+ ts.program = path (move (w));
+
+ if (ts.program.empty ())
+ fail (l) << "empty program path";
+ }
+ catch (const invalid_path& e)
+ {
+ fail (l) << "invalid program path '" << e.path << "'";
+ }
+ break;
+ }
+ case pending::in_document: hd.push_back (ts.in); // Fall through.
+ case pending::in_string: ts.in.value = move (w); break;
+
+ case pending::out_document: hd.push_back (ts.out); // Fall through.
+ case pending::out_string: ts.out.value = move (w); break;
+
+ case pending::err_document: hd.push_back (ts.err); // Fall through.
+ case pending::err_string: ts.err.value = move (w); break;
+ }
+
+ p = pending::none;
+ };
+
+ // Make sure we don't have any pending positions to fill.
+ //
+ auto check_pending = [p, this] (const location& l)
+ {
+ const char* what (nullptr);
+
+ switch (p)
+ {
+ case pending::none: break;
+ case pending::program: what = "program"; break;
+ case pending::in_string: what = "stdin here-string"; break;
+ case pending::in_document: what = "stdin here-document end"; break;
+ case pending::out_string: what = "stdout here-string"; break;
+ case pending::out_document: what = "stdout here-document end"; break;
+ case pending::err_string: what = "stderr here-string"; break;
+ case pending::err_document: what = "stderr here-document end"; break;
+ }
+
+ if (what != nullptr)
+ fail (l) << "missing " << what;
+ };
+
+ // Parse the redirect operator.
+ //
+ auto parse_redirect =
+ [&ts, &p, this] (const token& t, const location& l)
+ {
+ // Our semantics is the last redirect seen takes effect.
+ //
+ assert (p == pending::none);
+
+ // See if we have the file descriptor.
+ //
+ unsigned long fd (3);
+ if (!t.separated)
+ {
+ if (!ts.arguments.empty ())
+ fail (l) << "missing redirect file descriptor";
+
+ const string& s (ts.arguments.back ());
+
+ try
+ {
+ size_t n;
+ fd = stoul (s, &n);
+
+ if (n != s.size () || fd > 2)
+ throw invalid_argument (string ());
+ }
+ catch (const exception&)
+ {
+ fail (l) << "invalid redirect file descriptor '" << s << "'";
+ }
+
+ ts.arguments.pop_back ();
+ }
+
+ type tt (t.type);
+
+ // Validate/set default file descriptor.
+ //
+ switch (tt)
+ {
+ case type::in_null:
+ case type::in_string:
+ case type::in_document:
+ {
+ if ((fd = fd == 3 ? 0 : fd) != 0)
+ fail (l) << "invalid in redirect file descriptor " << fd;
+
+ break;
+ }
+ case type::out_null:
+ case type::out_string:
+ case type::out_document:
+ {
+ if ((fd = fd == 3 ? 1 : fd) == 0)
+ fail (l) << "invalid out redirect file descriptor " << fd;
+
+ break;
+ }
+ }
+
+ redirect_type rt;
+ switch (tt)
+ {
+ case type::in_null:
+ case type::out_null: rt = redirect_type::null; break;
+ case type::in_string:
+ case type::out_string: rt = redirect_type::here_string; break;
+ case type::in_document:
+ case type::out_document: rt = redirect_type::here_document; break;
+ }
+
+ redirect& r (fd == 0 ? ts.in : fd == 1 ? ts.out : ts.err);
+ r.type = rt;
+
+ switch (rt)
+ {
+ case redirect_type::none:
+ case redirect_type::null:
+ break;
+ case redirect_type::here_string:
+ switch (fd)
+ {
+ case 0: p = pending::in_string; break;
+ case 1: p = pending::out_string; break;
+ case 2: p = pending::err_string; break;
+ }
+ break;
+ case redirect_type::here_document:
+ switch (fd)
+ {
+ case 0: p = pending::in_document; break;
+ case 1: p = pending::out_document; break;
+ case 2: p = pending::err_document; break;
+ }
+ break;
+ }
+ };
+
// Keep parsing chunks of the command line until we see the newline or
// the exit status comparison.
//
- strings cmd;
-
- do
+ for (bool done (false); !done; )
{
// Process words that we already have.
//
@@ -161,7 +339,7 @@ namespace build2
// re-lex.
//
if (q || s.find_first_of ("|&<>\'\"\\") == string::npos)
- cmd.push_back (move (s));
+ add_word (move (s), nl);
else
{
// Come up with a "path" that contains both the original
@@ -188,19 +366,28 @@ namespace build2
string w;
bool f (true); // In case the whole thing is empty.
- for (token t (lex.next ()); t.type != type::eos; t = lex.next ())
+
+ // Treat the first "sub-token" as always separated from what we
+ // saw earlier.
+ //
+ // Note that this is not "our" token so we cannot do fail(t).
+ // Rather we should do fail(l).
+ //
+ token t (lex.next ());
+ location l (build2::get_location (t, name));
+ t.separated = true;
+
+ for (; t.type != type::eos; t = lex.next ())
{
- // Note that this is not "our" token so we cannot do fail(t).
- // Rather we should do fail(l).
- //
- location l (build2::get_location (t, lex.name ()));
+ type tt (t.type);
+ l = build2::get_location (t, name);
// Re-lexing double-quotes will recognize $, ( inside as
// tokens so we have to reverse them back. Since we don't
// treat spaces as separators we can be sure we will get it
// right.
//
- switch (t.type)
+ switch (tt)
{
case type::dollar: w += '$'; continue;
case type::lparen: w += '('; continue;
@@ -211,28 +398,33 @@ namespace build2
//
if (!w.empty () || f)
{
- cmd.push_back (move (w));
+ add_word (move (w), l);
f = false;
}
- switch (t.type)
+ if (tt == type::name)
{
- case type::name: w = move (t.value); f = true; break;
+ w = move (t.value);
+ f = true;
+ continue;
+ }
- // @@ TODO
- //
- case type::pipe:
- case type::clean:
- case type::log_and:
- case type::log_or:
+ // If this is one of the operators/separators, check that we
+ // don't have any pending locations to be filled.
+ //
+ check_pending (l);
+ // Note: there is another one in the outer loop below.
+ //
+ switch (tt)
+ {
case type::in_null:
case type::in_string:
case type::in_document:
-
case type::out_null:
case type::out_string:
case type::out_document:
+ parse_redirect (t, l);
break;
}
}
@@ -240,43 +432,164 @@ namespace build2
// Don't forget the last word.
//
if (!w.empty () || f)
- cmd.push_back (move (w));
+ add_word (move (w), l);
}
}
- if (tt == type::newline ||
- tt == type::equal ||
- tt == type::not_equal)
- break;
+ switch (tt)
+ {
+ case type::equal:
+ case type::not_equal:
+ case type::newline:
+ {
+ done = true;
+ break;
+ }
+ default:
+ {
+ // Parse the next chunk.
+ //
+ ns.clear ();
+ lexer_->reset_quoted (t.quoted);
+ nl = get_location (t);
+ names (t, tt, ns, true);
+ continue;
+ }
+ }
- // Parse the next chunk.
+ // If this is one of the operators/separators, check that we don't
+ // have any pending locations to be filled.
//
- ns.clear ();
- lexer_->reset_quoted (t.quoted);
- names (t, tt, ns, true);
+ check_pending (nl);
- } while (true);
+ // Note: there is another one in the inner loop above.
+ //
+ switch (tt)
+ {
+ case type::in_null:
+ case type::in_string:
+ case type::in_document:
+ case type::out_null:
+ case type::out_string:
+ case type::out_document:
+ parse_redirect (t, get_location (t));
+ next (t, tt);
+ break;
+ }
+ }
- //@@ switch mode (we no longer want to recognize command operators)?
+ // Verify we don't have anything pending to be filled.
+ //
+ check_pending (nl);
+ // While we no longer need to recognize command line operators, we
+ // also don't expect a valid test trailer to contain them. So we are
+ // going to continue lexing in the test_line mode.
+ //
if (tt == type::equal || tt == type::not_equal)
{
- command_exit (t, tt);
+ next (t, tt);
+ ts.exit = parse_command_exit (t, tt);
}
- // here-document
+ if (tt != type::newline)
+ fail (t) << "unexpected " << t;
+
+ expire_mode (); // Done parsing test-line.
+
+ // Parse here-document fragments in the order they were mentioned on
+ // the command line. The end marker is temporarily stored as the
+ // redirect's value.
+ //
+ if (!hd.empty ())
+ {
+ // Switch to the here-line mode which is like double-quoted but
+ // recognized the newline as a separator.
+ //
+ mode (lexer_mode::here_line);
+ next (t, tt);
+
+ for (redirect& r: hd)
+ r.value = parse_here_document (t, tt, r.value);
+
+ expire_mode ();
+ }
}
- void parser::
- command_exit (token& t, token_type& tt)
+ command_exit parser::
+ parse_command_exit (token& t, token_type& tt)
{
// The next chunk should be the exit status.
//
- next (t, tt);
names_type ns (names (t, tt, true));
//@@ TODO: validate to be single, simple, non-empty name that
// converts to integer (is exit status always non-negative).
+
+ return command_exit {exit_comparison::eq, 0};
+ }
+
+ string parser::
+ parse_here_document (token& t, token_type& tt, const string& em)
+ {
+ string r;
+
+ while (tt != type::eos)
+ {
+ // Check if this is the end marker.
+ //
+ if (tt == type::name &&
+ !t.quoted &&
+ t.value == em &&
+ peek () == type::newline)
+ {
+ next (t, tt); // Get the newline.
+ break;
+ }
+
+ // Expand the line.
+ //
+ names_type ns (names (t, tt));
+
+ // What shall we do if the expansion results in multiple names? For,
+ // example if the line contains just the variable expansion and it
+ // is of type strings. Adding all the elements space-separated seems
+ // like the natural thing to do.
+ //
+ for (auto b (ns.begin ()), i (b); i != ns.end (); ++i)
+ {
+ string s;
+
+ try
+ {
+ s = value_traits<string>::convert (move (*i), nullptr);
+ }
+ catch (const invalid_argument&)
+ {
+ fail (t) << "invalid string value '" << *i << "'";
+ }
+
+ if (i != b)
+ r += ' ';
+
+ r += s;
+ r += '\n'; // Here-document line always includes a newline.
+ }
+
+ // We should expand the whole line at once so this would normally be
+ // a newline but can also be an end-of-stream.
+ //
+ if (tt == type::newline)
+ next (t, tt);
+ else
+ assert (tt == type::eos);
+ }
+
+ if (tt == type::eos)
+ fail (t) << "missing here-document end marker '" << em << "'";
+
+ next (t, tt);
+ return r;
}
}
}
diff --git a/build2/test/script/script b/build2/test/script/script
index de81fa6..cda4feb 100644
--- a/build2/test/script/script
+++ b/build2/test/script/script
@@ -18,6 +18,45 @@ namespace build2
{
namespace script
{
+ enum class redirect_type
+ {
+ none,
+ null,
+ here_string, // Value is the string.
+ here_document // Value is the document.
+ };
+
+ struct redirect
+ {
+ redirect_type type = redirect_type::none;
+ string value;
+ };
+
+ struct command
+ {
+ path program;
+ strings arguments;
+
+ redirect in;
+ redirect out;
+ redirect err;
+ };
+
+ enum class exit_comparison {eq, ne};
+
+ struct command_exit
+ {
+ // @@ Need to understand what type we should use for status.
+
+ exit_comparison comparison = exit_comparison::eq;
+ uint16_t status = 0;
+ };
+
+ struct test: command
+ {
+ command_exit exit;
+ };
+
class script
{
public:
diff --git a/doc/testscript.cli b/doc/testscript.cli
index 1aff571..a5aef9d 100644
--- a/doc/testscript.cli
+++ b/doc/testscript.cli
@@ -711,23 +711,33 @@ command-exit: ('=='|'!=') <exit-status>
command: <path> (' '+ <arg>)* {stdin? stdout? stderr?}
-stdin: '0'? ('<!'|\
- '<' <text>|\
- '<<' <here-end>)
+stdin: '0'?('<!'|\
+ '<' <text>|\
+ '<<' <here-end>)
-stdout: '1'? ('>!'|\
- '>' <text>|\
- '>>' <here-end>)
+stdout: '1'?('>!'|\
+ '>' <text>|\
+ '>>' <here-end>)
-stderr: '2' ('>!'|\
- '>' <text>|\
- '>>' <here-end>)
+stderr: '2'('>!'|\
+ '>' <text>|\
+ '>>' <here-end>)
here-document:
<text>*
<here-end>
\
+Note that if specified, file descriptors must not be separated from the
+redirect operator with whitespaces. In other words, the following command
+has \c{2} as an argument and redirects \c{stdout}, not \c{stderr}.
+
+Here-line is like double-quoted string by recognizes newlines.
+
+\
+$* 2 >!
+\
+
\
script:
(script-scope|script-line)*
@@ -782,25 +792,25 @@ command-pipe: command ('|' command)*
command: <path> (' '+ <arg>)* {stdin? stdout? stderr? cleanup*}
-stdin: '0'? ('<!'|\
- '<?'|\
- '<' <text>|\
- '<<' <here-end>|\
- '<<<' <file>)
-
-stdout: '1'? ('>!'|\
- '>?'|\
- '>&' '2'|\
- '>' <text>|\
- '>>' <here-end>|\
- ('>>>'|'>>>&') <file>)
-
-stderr: '2' ('>!'|\
- '>?'|\
- '>&' '1' |\
- '>' <text>|\
- '>>' <here-end>|\
- ('>>>'|'>>>&') <file>)
+stdin: ('<!'|\
+ '<?'|\
+ '<' <text>|\
+ '<<' <here-end>|\
+ '<<<' <file>)
+
+stdout: ('>!'|\
+ '>?'|\
+ '>&' '2'|\
+ '>' <text>|\
+ '>>' <here-end>|\
+ ('>>>'|'>>>&') <file>)
+
+stderr: '2' ('>!'|\
+ '>?'|\
+ '>&' '1' |\
+ '>' <text>|\
+ '>>' <here-end>|\
+ ('>>>'|'>>>&') <file>)
cleanup: '&' (<file>|<dir>)