aboutsummaryrefslogtreecommitdiff
path: root/build2/test/script/lexer.cxx
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2016-10-12 14:53:32 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2016-11-04 08:29:23 +0200
commit18ce15f3aee71debe3f35356c6a739943815da8a (patch)
tree828dea05101c11c5b0b6974b25447226db37debf /build2/test/script/lexer.cxx
parentf423dbc95239cc88021d5d332ad19eeecc6e11e8 (diff)
Initial work on testscript lexer/parser
Diffstat (limited to 'build2/test/script/lexer.cxx')
-rw-r--r--build2/test/script/lexer.cxx249
1 files changed, 249 insertions, 0 deletions
diff --git a/build2/test/script/lexer.cxx b/build2/test/script/lexer.cxx
new file mode 100644
index 0000000..84be7c1
--- /dev/null
+++ b/build2/test/script/lexer.cxx
@@ -0,0 +1,249 @@
+// file : build2/test/script/lexer.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <build2/test/script/lexer>
+
+using namespace std;
+
+namespace build2
+{
+ namespace test
+ {
+ namespace script
+ {
+ using type = token_type;
+
+ void lexer::
+ mode (base_mode m, char)
+ {
+ const char* s1 (nullptr);
+ const char* s2 (nullptr);
+ bool s (true);
+
+ switch (m)
+ {
+ case lexer_mode::script_line:
+ {
+ s1 = "=+!|&<> $()#\t\n";
+ s2 = " == ";
+ break;
+ }
+ case lexer_mode::variable_line:
+ {
+ // Like value except we don't recognize {.
+ //
+ s1 = " $()[]#\t\n";
+ s2 = " ";
+ break;
+ }
+ case lexer_mode::test_line:
+ {
+ // As script_line but without variable assignments.
+ //
+ s1 = "=!|&<> $()#\t\n";
+ s2 = "== ";
+ break;
+ }
+ case lexer_mode::command_line:
+ {
+ // Note that whitespaces are not word separators in this mode.
+ //
+ s1 = "|&<>";
+ s2 = " ";
+ s = false;
+ break;
+ }
+ case lexer_mode::single_quoted:
+ case lexer_mode::double_quoted:
+ quoted_ = true;
+ // Fall through.
+ default:
+ {
+ // Disable pair separator.
+ //
+ base_lexer::mode (m, '\0');
+ }
+ }
+
+ state_.push (state {m, '\0', s, s1, s2});
+ }
+
+ token lexer::
+ next_impl ()
+ {
+ switch (state_.top ().mode)
+ {
+ case lexer_mode::script_line:
+ case lexer_mode::variable_line:
+ case lexer_mode::test_line:
+ case lexer_mode::command_line: return next_line ();
+ default: return base_lexer::next_impl ();
+ }
+ }
+
+ token lexer::
+ next_line ()
+ {
+ bool sep (skip_spaces ());
+
+ xchar c (get ());
+ uint64_t ln (c.line), cn (c.column);
+
+ if (eos (c))
+ return token (type::eos, sep, ln, cn);
+
+ lexer_mode m (state_.top ().mode);
+
+ // NOTE: remember to update mode() if adding new special characters.
+
+ if (m != lexer_mode::command_line)
+ {
+ switch (c)
+ {
+ case '\n':
+ {
+ return token (type::newline, sep, ln, cn);
+ }
+
+ // Variable expansion, function call, and evaluation context.
+ //
+ case '$': return token (type::dollar, sep, ln, cn);
+ case '(': return token (type::lparen, sep, ln, cn);
+ case ')': return token (type::rparen, sep, ln, cn);
+ }
+ }
+
+ if (m == lexer_mode::variable_line)
+ {
+ switch (c)
+ {
+ // Attributes.
+ //
+ case '[': return token (type::lsbrace, sep, ln, cn);
+ case ']': return token (type::rsbrace, sep, ln, cn);
+ }
+ }
+
+ // Command line operator/separators.
+ //
+ if (m == lexer_mode::script_line || m == lexer_mode::test_line)
+ {
+ switch (c)
+ {
+ // Comparison (==, !=).
+ //
+ case '=':
+ case '!':
+ {
+ if (peek () == '=')
+ {
+ get ();
+ return token (
+ c == '=' ? type::equal : type::not_equal, sep, ln, cn);
+ }
+ }
+ }
+ }
+
+ // Command operators/separators.
+ //
+ if (m == lexer_mode::script_line ||
+ m == lexer_mode::test_line ||
+ m == lexer_mode::command_line)
+ {
+ switch (c)
+ {
+ // |, ||
+ //
+ case '|':
+ {
+ if (peek () == '|')
+ {
+ get ();
+ return token (type::log_or, sep, ln, cn);
+ }
+ else
+ return token (type::pipe, sep, ln, cn);
+ }
+ // &, &&
+ //
+ case '&':
+ {
+ if (peek () == '&')
+ {
+ get ();
+ return token (type::log_and, sep, ln, cn);
+ }
+ else
+ return token (type::clean, sep, ln, cn);
+ }
+ // <
+ //
+ case '<':
+ {
+ xchar p (peek ());
+
+ if (p == '!' || p == '<')
+ {
+ get ();
+ return token (
+ p == '!' ? type::in_null : type::in_document, sep, ln, cn);
+ }
+ else
+ return token (type::in_string, sep, ln, cn);
+
+ }
+ // >
+ //
+ case '>':
+ {
+ xchar p (peek ());
+
+ if (p == '!' || p == '>')
+ {
+ get ();
+ return token (
+ p == '!' ? type::out_null : type::out_document, sep, ln, cn);
+ }
+ else
+ return token (type::out_string, sep, ln, cn);
+ }
+ }
+ }
+
+ // Variable assignment (=, +=, =+).
+ //
+ if (m == lexer_mode::script_line)
+ {
+ switch (c)
+ {
+ case '=':
+ {
+ if (peek () == '+')
+ {
+ get ();
+ return token (type::prepend, sep, ln, cn);
+ }
+ else
+ return token (type::assign, sep, ln, cn);
+ }
+ case '+':
+ {
+ if (peek () == '=')
+ {
+ get ();
+ return token (type::append, sep, ln, cn);
+ }
+ }
+ }
+ }
+
+ // Otherwise it is a name.
+ //
+ unget (c);
+ return name (sep);
+ }
+ }
+ }
+}