aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2016-11-04 08:47:26 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2016-11-04 09:26:26 +0200
commit1270101f4267ecd187bb604190d004daaae341b7 (patch)
tree61ba12ad9c699019996f0ad6e6aa6348fd48740a
parentb2cde46e0540126fe8a4dc94a2b9722663aa45c5 (diff)
Various testscript lexer/parser fixes
-rw-r--r--build2/lexer41
-rw-r--r--build2/lexer.cxx30
-rw-r--r--build2/parser2
3 files changed, 45 insertions, 28 deletions
diff --git a/build2/lexer b/build2/lexer
index c5c3857..f7f7b82 100644
--- a/build2/lexer
+++ b/build2/lexer
@@ -31,6 +31,12 @@ namespace build2
// automatically reset after the end of the line. The variable mode is reset
// after the word token. And the eval mode is reset after the closing ')'.
//
+ // Note that normally it is only safe to switch mode when the current token
+ // is not quoted (or, more generally, when you are not in the double-quoted
+ // mode) unless the mode treats the double-quote as a separator (e.g.,
+ // variable name mode). Failed that your mode (which now will be the top of
+ // the mode stack) will prevent proper recognition of the closing quote.
+ //
// Extendable/inheritable enum-like class.
//
@@ -102,6 +108,23 @@ namespace build2
peek_char ();
protected:
+ struct state
+ {
+ lexer_mode mode;
+
+ char sep_pair;
+ bool sep_space; // Are whitespaces separators (see skip_spaces())?
+
+ // Word separator characters. For two-character sequence put the first
+ // one in sep_first and the second one in the corresponding position of
+ // sep_second. If it's a single-character sequence, then put space in
+ // sep_second. If there are multiple sequences that start with the same
+ // character, then repeat the first character in sep_first.
+ //
+ const char* sep_first;
+ const char* sep_second;
+ };
+
// If you extend the lexer and add a custom lexer mode, then you must
// override next_impl() and handle the custom mode there.
//
@@ -115,7 +138,7 @@ namespace build2
next_quoted ();
virtual token
- word (bool separated);
+ word (state, bool separated);
// Return true if we have seen any spaces. Skipped empty lines
// don't count. In other words, we are only interested in spaces
@@ -161,22 +184,6 @@ namespace build2
const char* escapes_;
void (*processor_) (token&, const lexer&);
- struct state
- {
- lexer_mode mode;
-
- char sep_pair;
- bool sep_space; // Are whitespaces separators (see skip_spaces())?
-
- // Word separator characters. For two-character sequence put the first
- // one in sep_first and the second one in the corresponding position of
- // sep_second. If it's a single-character sequence, then put space in
- // sep_second. If there are multiple sequences that start with the same
- // character, then repeat the first character in sep_first.
- //
- const char* sep_first;
- const char* sep_second;
- };
std::stack<state> state_;
bool sep_; // True if we skipped spaces in peek().
diff --git a/build2/lexer.cxx b/build2/lexer.cxx
index b188396..c84b102 100644
--- a/build2/lexer.cxx
+++ b/build2/lexer.cxx
@@ -78,7 +78,8 @@ namespace build2
token lexer::
next_impl ()
{
- lexer_mode m (state_.top ().mode);
+ const state& st (state_.top ());
+ lexer_mode m (st.mode);
// For some modes we have dedicated imlementations of next().
//
@@ -108,7 +109,7 @@ namespace build2
// Handle pair separator.
//
if ((m == lexer_mode::normal || m == lexer_mode::value) &&
- c == state_.top ().sep_pair)
+ c == st.sep_pair)
return make_token (type::pair_separator);
switch (c)
@@ -168,7 +169,7 @@ namespace build2
// Otherwise it is a word.
//
unget (c);
- return word (sep);
+ return word (st, sep);
}
token lexer::
@@ -180,6 +181,8 @@ namespace build2
if (eos (c))
fail (c) << "unterminated evaluation context";
+ const state& st (state_.top ());
+
uint64_t ln (c.line), cn (c.column);
auto make_token = [sep, ln, cn] (type t)
@@ -193,7 +196,7 @@ namespace build2
// Handle pair separator.
//
- if (c == state_.top ().sep_pair)
+ if (c == st.sep_pair)
return make_token (type::pair_separator);
// Note: we don't treat [ and ] as special here. Maybe can use them for
@@ -242,7 +245,7 @@ namespace build2
// Otherwise it is a word.
//
unget (c);
- return word (sep);
+ return word (st, sep);
}
token lexer::
@@ -264,13 +267,13 @@ namespace build2
// Otherwise it is a word.
//
unget (c);
- return word (false);
+ return word (state_.top (), false);
}
token lexer::
- word (bool sep)
+ word (state st, bool sep)
{
- lexer_mode m (state_.top ().mode);
+ lexer_mode m (st.mode);
xchar c (peek ());
assert (!eos (c));
@@ -337,7 +340,9 @@ namespace build2
{
get ();
state_.pop ();
- m = state_.top ().mode;
+
+ st = state_.top ();
+ m = st.mode;
continue;
}
}
@@ -366,7 +371,6 @@ namespace build2
{
// First check if it's a pair separator.
//
- const state& st (state_.top ());
if (c == st.sep_pair)
done = true;
else
@@ -421,7 +425,11 @@ namespace build2
case '\"':
{
get ();
- mode ((m = lexer_mode::double_quoted));
+
+ mode (lexer_mode::double_quoted);
+ st = state_.top ();
+ m = st.mode;
+
quoted = true;
continue;
}
diff --git a/build2/parser b/build2/parser
index 89f42b8..f038b97 100644
--- a/build2/parser
+++ b/build2/parser
@@ -445,6 +445,8 @@ namespace build2
const fail_mark<failed> fail;
protected:
+ bool pre_parse_ = false;
+
bool boot_;
const path* path_; // Current path.