aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2016-11-04 08:47:35 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2016-11-04 09:26:26 +0200
commit3eb0cd7fe3c1dec0bb3b7e1d225107e55ca4b435 (patch)
treef7c06695b1c8974ab0b4777b416db7343f5ebbae
parent1270101f4267ecd187bb604190d004daaae341b7 (diff)
Various testscript lexer/parser fixes (testscript)
-rw-r--r--build2/test/script/lexer13
-rw-r--r--build2/test/script/lexer.cxx61
-rw-r--r--build2/test/script/parser14
-rw-r--r--build2/test/script/parser.cxx161
-rw-r--r--unit-tests/test/script/lexer/driver.cxx2
5 files changed, 162 insertions, 89 deletions
diff --git a/build2/test/script/lexer b/build2/test/script/lexer
index 5d77ab9..d79ef78 100644
--- a/build2/test/script/lexer
+++ b/build2/test/script/lexer
@@ -25,8 +25,8 @@ namespace build2
enum
{
script_line = base_type::value_next,
+ assign_line, // Auto-expires at the end of the token.
variable_line, // Auto-expires at the end of the line.
- test_line,
command_line,
here_line
};
@@ -46,14 +46,13 @@ namespace build2
virtual void
mode (base_mode, char = '\0') override;
- // Return true if we entered the quoted (double or single) mode since
- // last reset.
+ // Number of quoted (double or single) tokens since last reset.
//
- bool
+ size_t
quoted () const {return quoted_;}
void
- reset_quoted (bool q) {quoted_ = q;}
+ reset_quoted (size_t q) {quoted_ = q;}
protected:
virtual token
@@ -63,10 +62,10 @@ namespace build2
next_line ();
virtual token
- word (bool) override;
+ word (state, bool) override;
protected:
- bool quoted_ = false;
+ size_t quoted_;
};
}
}
diff --git a/build2/test/script/lexer.cxx b/build2/test/script/lexer.cxx
index f752f39..f75ad4a 100644
--- a/build2/test/script/lexer.cxx
+++ b/build2/test/script/lexer.cxx
@@ -25,6 +25,14 @@ namespace build2
{
case lexer_mode::script_line:
{
+ s1 = "=!|&<> $(#\t\n";
+ s2 = "== ";
+ break;
+ }
+ case lexer_mode::assign_line:
+ {
+ // As script_line but with variable assignments.
+ //
s1 = "=+!|&<> $(#\t\n";
s2 = " == ";
break;
@@ -37,14 +45,7 @@ namespace build2
s2 = " ";
break;
}
- case lexer_mode::test_line:
- {
- // As script_line but without variable assignments.
- //
- s1 = "=!|&<> $(#\t\n";
- s2 = "== ";
- break;
- }
+
case lexer_mode::command_line:
{
// Note that whitespaces are not word separators in this mode.
@@ -64,10 +65,6 @@ namespace build2
s = false;
break;
}
- case lexer_mode::single_quoted:
- case lexer_mode::double_quoted:
- quoted_ = true;
- // Fall through.
default:
{
// Disable pair separator.
@@ -83,15 +80,22 @@ namespace build2
token lexer::
next_impl ()
{
+ token r;
+
switch (state_.top ().mode)
{
case lexer_mode::script_line:
+ case lexer_mode::assign_line:
case lexer_mode::variable_line:
- case lexer_mode::test_line:
case lexer_mode::command_line:
- case lexer_mode::here_line: return next_line ();
- default: return base_lexer::next_impl ();
+ case lexer_mode::here_line: r = next_line (); break;
+ default: r = base_lexer::next_impl (); break;
}
+
+ if (r.quoted)
+ ++quoted_;
+
+ return r;
}
token lexer::
@@ -110,7 +114,14 @@ namespace build2
if (eos (c))
return make_token (type::eos);
- lexer_mode m (state_.top ().mode);
+ state st (state_.top ()); // Make copy (see assign_line).
+ lexer_mode m (st.mode);
+
+ // Expire the assign mode at the end of the token. Do it early in case
+ // we push any new mode (e.g., double quote).
+ //
+ if (m == lexer_mode::assign_line)
+ state_.pop ();
// NOTE: remember to update mode() if adding new special characters.
@@ -148,7 +159,7 @@ namespace build2
// Command line operator/separators.
//
- if (m == lexer_mode::script_line || m == lexer_mode::test_line)
+ if (m == lexer_mode::script_line || m == lexer_mode::assign_line)
{
switch (c)
{
@@ -169,7 +180,7 @@ namespace build2
// Command operators/separators.
//
if (m == lexer_mode::script_line ||
- m == lexer_mode::test_line ||
+ m == lexer_mode::assign_line ||
m == lexer_mode::command_line)
{
switch (c)
@@ -234,7 +245,7 @@ namespace build2
// Variable assignment (=, +=, =+).
//
- if (m == lexer_mode::script_line)
+ if (m == lexer_mode::assign_line)
{
switch (c)
{
@@ -262,22 +273,24 @@ namespace build2
// Otherwise it is a word.
//
unget (c);
- return word (sep);
+ return word (st, sep);
}
token lexer::
- word (bool sep)
+ word (state st, bool sep)
{
+ lexer_mode m (st.mode);
+
// Customized implementation that handles special variable names ($*,
// $~, $NNN).
//
- if (state_.top ().mode != lexer_mode::variable)
- return base_lexer::word (sep);
+ if (m != lexer_mode::variable)
+ return base_lexer::word (st, sep);
xchar c (peek ());
if (c != '*' && c != '~' && !digit (c))
- return base_lexer::word (sep);
+ return base_lexer::word (st, sep);
uint64_t ln (c.line), cn (c.column);
string lexeme;
diff --git a/build2/test/script/parser b/build2/test/script/parser
index f9fbd98..adff8a3 100644
--- a/build2/test/script/parser
+++ b/build2/test/script/parser
@@ -40,7 +40,7 @@ namespace build2
//
protected:
void
- parse_script (token&, token_type&);
+ parse_script ();
void
parse_script_line (token&, token_type&);
@@ -63,6 +63,18 @@ namespace build2
virtual lookup
lookup_variable (name&&, string&&, const location&) override;
+ // Number of quoted tokens since last reset. Note that this includes
+ // the peeked token, if any.
+ //
+ protected:
+ size_t
+ quoted () const;
+
+ void
+ reset_quoted (token& current);
+
+ size_t replay_quoted_;
+
protected:
using base_parser = build2::parser;
diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx
index bfd0667..74bacee 100644
--- a/build2/test/script/parser.cxx
+++ b/build2/test/script/parser.cxx
@@ -30,22 +30,28 @@ namespace build2
runner_ = &r;
scope_ = script_;
- token t;
- type tt;
- next (t, tt);
-
- parse_script (t, tt);
-
- if (tt != type::eos)
- fail (t) << "unexpected " << t;
+ parse_script ();
}
void parser::
- parse_script (token& t, token_type& tt)
+ parse_script ()
{
- for (; tt != type::eos; next (t, tt))
+ token t;
+ type tt;
+
+ for (;;)
{
+ // We need to start lexing each line in the assign mode in order to
+ // recognize assignment operators as separators.
+ //
+ mode (lexer_mode::assign_line);
+ next (t, tt);
+
+ if (tt == type::eos)
+ break;
+
parse_script_line (t, tt);
+ assert (tt == type::newline);
}
}
@@ -53,19 +59,27 @@ namespace build2
parse_script_line (token& t, token_type& tt)
{
// Decide whether this is a variable assignment or a command. It is a
- // variable assignment if the first token is a word and the next is an
- // assign/append/prepend operator. Assignment to a computed variable
- // name must use the set builtin.
+ // variable assignment if the first token is an unquoted word and the
+ // next is an assign/append/prepend operator. Assignment to a computed
+ // variable name must use the set builtin.
//
- auto assign = [] (type t)
+ if (tt == type::word && !t.quoted)
{
- return t == type::assign || t == type::prepend || t == type::append;
- };
+ // Switch recognition of variable assignments for one more token.
+ // This is safe to do because we know we cannot be in the quoted
+ // mode (since the current token is not quoted).
+ //
+ mode (lexer_mode::assign_line);
+ type p (peek ());
- if (tt == type::word && assign (peek ()))
- parse_variable_line (t, tt);
- else
- parse_test_line (t, tt);
+ if (p == type::assign || p == type::prepend || p == type::append)
+ {
+ parse_variable_line (t, tt);
+ return;
+ }
+ }
+
+ parse_test_line (t, tt);
}
// Return true if the string contains only digit characters (used to
@@ -84,16 +98,16 @@ namespace build2
void parser::
parse_variable_line (token& t, token_type& tt)
{
- location nl (get_location (t));
string name (move (t.value));
// Check if we are trying to modify any of the special aliases ($*,
// $~, $N).
//
- if (name == "*" || name == "~" || digits (name))
- fail (nl) << "attempt to set '" << name << "' variable directly";
-
- const variable& var (script_->var_pool.insert (move (name)));
+ if (pre_parse_)
+ {
+ if (name == "*" || name == "~" || digits (name))
+ fail (t) << "attempt to set '" << name << "' variable directly";
+ }
next (t, tt);
type kind (tt); // Assignment kind.
@@ -106,39 +120,40 @@ namespace build2
if (tt != type::newline)
fail (t) << "unexpected " << t;
- value& lhs (kind == type::assign
- ? scope_->assign (var)
- : scope_->append (var));
+ if (!pre_parse_)
+ {
+ const variable& var (script_->var_pool.insert (move (name)));
- // @@ Need to adjust to make strings the default type.
- //
- apply_value_attributes (&var, lhs, move (rhs), kind);
+ value& lhs (kind == type::assign
+ ? scope_->assign (var)
+ : scope_->append (var));
- // Handle the $*, $NN special aliases.
- //
- // The plan is as follows: in this function we detect modification of
- // the source variables (test*), and (re)set $* to NULL on this scope
- // (this is important to both invalidate any old values but also to
- // "stake" the lookup position). This signals to the variable lookup
- // function below that the $* and $NN values need to be recalculated
- // from their sources. Note that we don't need to invalidate $NN since
- // their lookup always checks $* first.
- //
- if (var.name == script_->test_var.name ||
- var.name == script_->opts_var.name ||
- var.name == script_->args_var.name)
- {
- scope_->assign (script_->cmd_var) = nullptr;
+ // @@ Need to adjust to make strings the default type.
+ //
+ apply_value_attributes (&var, lhs, move (rhs), kind);
+
+ // Handle the $*, $NN special aliases.
+ //
+ // The plan is as follows: in this function we detect modification
+ // of the source variables (test*), and (re)set $* to NULL on this
+ // scope (this is important to both invalidate any old values but
+ // also to "stake" the lookup position). This signals to the
+ // variable lookup function below that the $* and $NN values need to
+ // be recalculated from their sources. Note that we don't need to
+ // invalidate $NN since their lookup always checks $* first.
+ //
+ if (var.name == script_->test_var.name ||
+ var.name == script_->opts_var.name ||
+ var.name == script_->args_var.name)
+ {
+ scope_->assign (script_->cmd_var) = nullptr;
+ }
}
}
void parser::
parse_test_line (token& t, token_type& tt)
{
- // Stop recognizing variable assignments.
- //
- mode (lexer_mode::test_line);
-
test ts;
// Pending positions where the next word should go.
@@ -371,12 +386,14 @@ namespace build2
// Note that we do it in the chunking mode to detect whether
// anything in each chunk is quoted.
//
- lexer_->reset_quoted (t.quoted);
+ reset_quoted (t);
parse_names (t, tt, ns, true, "command");
- // Process what we got.
+ // Process what we got. Determine whether anything inside was
+ // quoted (note that the current token is not part of it).
//
- bool q (lexer_->quoted ());
+ bool q ((quoted () - (t.quoted ? 1 : 0)) != 0);
+
for (name& n: ns)
{
string s;
@@ -505,7 +522,7 @@ namespace build2
// While we no longer need to recognize command line operators, we
// also don't expect a valid test trailer to contain them. So we are
- // going to continue lexing in the test_line mode.
+ // going to continue lexing in the script_line mode.
//
if (tt == type::equal || tt == type::not_equal)
{
@@ -516,8 +533,6 @@ namespace build2
if (tt != type::newline)
fail (t) << "unexpected " << t;
- expire_mode (); // Done parsing test-line.
-
// Parse here-document fragments in the order they were mentioned on
// the command line.
//
@@ -702,6 +717,40 @@ namespace build2
return lookup (nv, vars);
}
+
+ size_t parser::
+ quoted () const
+ {
+ size_t r (0);
+
+ if (replay_ != replay::play)
+ r = lexer_->quoted ();
+ else
+ {
+ // Examine tokens we have replayed since last reset.
+ //
+ for (size_t i (replay_quoted_); i != replay_i_; ++i)
+ if (replay_data_[i].token.quoted)
+ ++r;
+ }
+
+ return r;
+ }
+
+ void parser::
+ reset_quoted (token& cur)
+ {
+ if (replay_ != replay::play)
+ lexer_->reset_quoted (cur.quoted ? 1 : 0);
+ else
+ {
+ replay_quoted_ = replay_i_ - 1;
+
+ // Must be the same token.
+ //
+ assert (replay_data_[replay_quoted_].token.quoted == cur.quoted);
+ }
+ }
}
}
}
diff --git a/unit-tests/test/script/lexer/driver.cxx b/unit-tests/test/script/lexer/driver.cxx
index 7e237ed..b80eed9 100644
--- a/unit-tests/test/script/lexer/driver.cxx
+++ b/unit-tests/test/script/lexer/driver.cxx
@@ -30,8 +30,8 @@ namespace build2
string s (argv[1]);
if (s == "script-line") m = lexer_mode::script_line;
+ else if (s == "assign-line") m = lexer_mode::assign_line;
else if (s == "variable-line") m = lexer_mode::variable_line;
- else if (s == "test-line") m = lexer_mode::test_line;
else if (s == "command-line") m = lexer_mode::command_line;
else if (s == "here-line") m = lexer_mode::here_line;
else if (s == "variable") m = lexer_mode::variable;