Various testscript lexer/parser fixes

author: Boris Kolpackov <boris@codesynthesis.com> 2016-11-04 08:47:26 +0200
committer: Boris Kolpackov <boris@codesynthesis.com> 2016-11-04 09:26:26 +0200
commit: 1270101f4267ecd187bb604190d004daaae341b7 (patch)
tree: 61ba12ad9c699019996f0ad6e6aa6348fd48740a /build2/lexer
parent: b2cde46e0540126fe8a4dc94a2b9722663aa45c5 (diff)
1 files changed, 24 insertions, 17 deletions
diff --git a/build2/lexer b/build2/lexer
index c5c3857..f7f7b82 100644
--- a/build2/lexer
+++ b/build2/lexer
@@ -31,6 +31,12 @@ namespace build2
   // automatically reset after the end of the line. The variable mode is reset
   // after the word token. And the eval mode is reset after the closing ')'.
   //
+  // Note that normally it is only safe to switch mode when the current token
+  // is not quoted (or, more generally, when you are not in the double-quoted
+  // mode) unless the mode treats the double-quote as a separator (e.g.,
+  // variable name mode). Failed that your mode (which now will be the top of
+  // the mode stack) will prevent proper recognition of the closing quote.
+  //
 
   // Extendable/inheritable enum-like class.
   //
@@ -102,6 +108,23 @@ namespace build2
     peek_char ();
 
   protected:
+    struct state
+    {
+      lexer_mode mode;
+
+      char sep_pair;
+      bool sep_space; // Are whitespaces separators (see skip_spaces())?
+
+      // Word separator characters. For two-character sequence put the first
+      // one in sep_first and the second one in the corresponding position of
+      // sep_second. If it's a single-character sequence, then put space in
+      // sep_second. If there are multiple sequences that start with the same
+      // character, then repeat the first character in sep_first.
+      //
+      const char* sep_first;
+      const char* sep_second;
+    };
+
     // If you extend the lexer and add a custom lexer mode, then you must
     // override next_impl() and handle the custom mode there.
     //
@@ -115,7 +138,7 @@ namespace build2
     next_quoted ();
 
     virtual token
-    word (bool separated);
+    word (state, bool separated);
 
     // Return true if we have seen any spaces. Skipped empty lines
     // don't count. In other words, we are only interested in spaces
@@ -161,22 +184,6 @@ namespace build2
     const char* escapes_;
     void (*processor_) (token&, const lexer&);
 
-    struct state
-    {
-      lexer_mode mode;
-
-      char sep_pair;
-      bool sep_space; // Are whitespaces separators (see skip_spaces())?
-
-      // Word separator characters. For two-character sequence put the first
-      // one in sep_first and the second one in the corresponding position of
-      // sep_second. If it's a single-character sequence, then put space in
-      // sep_second. If there are multiple sequences that start with the same
-      // character, then repeat the first character in sep_first.
-      //
-      const char* sep_first;
-      const char* sep_second;
-    };
     std::stack<state> state_;
 
     bool sep_; // True if we skipped spaces in peek().
author	Boris Kolpackov <boris@codesynthesis.com>	2016-11-04 08:47:26 +0200
committer	Boris Kolpackov <boris@codesynthesis.com>	2016-11-04 09:26:26 +0200
commit	1270101f4267ecd187bb604190d004daaae341b7 (patch)
tree	61ba12ad9c699019996f0ad6e6aa6348fd48740a /build2/lexer
parent	b2cde46e0540126fe8a4dc94a2b9722663aa45c5 (diff)