aboutsummaryrefslogtreecommitdiff
path: root/build2/test/script/lexer.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'build2/test/script/lexer.cxx')
-rw-r--r--build2/test/script/lexer.cxx551
1 files changed, 0 insertions, 551 deletions
diff --git a/build2/test/script/lexer.cxx b/build2/test/script/lexer.cxx
deleted file mode 100644
index 2470fcc..0000000
--- a/build2/test/script/lexer.cxx
+++ /dev/null
@@ -1,551 +0,0 @@
-// file : build2/test/script/lexer.cxx -*- C++ -*-
-// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
-// license : MIT; see accompanying LICENSE file
-
-#include <build2/test/script/lexer.hxx>
-
-#include <cstring> // strchr()
-
-using namespace std;
-
-namespace build2
-{
- namespace test
- {
- namespace script
- {
- using type = token_type;
-
- void lexer::
- mode (base_mode m, char ps, optional<const char*> esc)
- {
- const char* s1 (nullptr);
- const char* s2 (nullptr);
- bool s (true);
- bool n (true);
- bool q (true);
-
- if (!esc)
- {
- assert (!state_.empty ());
- esc = state_.top ().escapes;
- }
-
- switch (m)
- {
- case lexer_mode::command_line:
- {
- s1 = ":;=!|&<> $(#\t\n";
- s2 = " == ";
- break;
- }
- case lexer_mode::first_token:
- {
- // First token on the script line. Like command_line but
- // recognizes leading '.+-{}' as tokens as well as variable
- // assignments as separators.
- //
- // Note that to recognize only leading '.+-{}' we shouldn't add
- // them to the separator strings.
- //
- s1 = ":;=+!|&<> $(#\t\n";
- s2 = " == ";
- break;
- }
- case lexer_mode::second_token:
- {
- // Second token on the script line. Like command_line but
- // recognizes leading variable assignments.
- //
- // Note that to recognize only leading assignments we shouldn't
- // add them to the separator strings (so this is identical to
- // command_line).
- //
- s1 = ":;=!|&<> $(#\t\n";
- s2 = " == ";
- break;
- }
- case lexer_mode::variable_line:
- {
- // Like value except we recognize ';' and don't recognize '{'.
- // Note that we don't recognize ':' since having a trailing
- // variable assignment is illegal.
- //
- s1 = "; $([]#\t\n";
- s2 = " ";
- break;
- }
-
- case lexer_mode::command_expansion:
- {
- // Note that whitespaces are not word separators in this mode.
- //
- s1 = "|&<>";
- s2 = " ";
- s = false;
- break;
- }
- case lexer_mode::here_line_single:
- {
- // This one is like a single-quoted string except it treats
- // newlines as a separator. We also treat quotes as literals.
- //
- // Note that it might be tempting to enable line continuation
- // escapes. However, we will then have to also enable escaping of
- // the backslash, which makes it a lot less tempting.
- //
- s1 = "\n";
- s2 = " ";
- esc = ""; // Disable escape sequences.
- s = false;
- q = false;
- break;
- }
- case lexer_mode::here_line_double:
- {
- // This one is like a double-quoted string except it treats
- // newlines as a separator. We also treat quotes as literals.
- //
- s1 = "$(\n";
- s2 = " ";
- s = false;
- q = false;
- break;
- }
- case lexer_mode::description_line:
- {
- // This one is like a single-quoted string and has an ad hoc
- // implementation.
- //
- break;
- }
- default:
- {
- // Make sure pair separators are only enabled where we expect
- // them.
- //
- // @@ Should we disable pair separators in the eval mode?
- //
- assert (ps == '\0' ||
- m == lexer_mode::eval ||
- m == lexer_mode::attribute);
-
- base_lexer::mode (m, ps, esc);
- return;
- }
- }
-
- assert (ps == '\0');
- state_.push (state {m, ps, s, n, q, *esc, s1, s2});
- }
-
- token lexer::
- next ()
- {
- token r;
-
- switch (state_.top ().mode)
- {
- case lexer_mode::command_line:
- case lexer_mode::first_token:
- case lexer_mode::second_token:
- case lexer_mode::variable_line:
- case lexer_mode::command_expansion:
- case lexer_mode::here_line_single:
- case lexer_mode::here_line_double:
- r = next_line ();
- break;
- case lexer_mode::description_line:
- r = next_description ();
- break;
- default:
- r = base_lexer::next ();
- break;
- }
-
- if (r.qtype != quote_type::unquoted)
- ++quoted_;
-
- return r;
- }
-
- token lexer::
- next_line ()
- {
- bool sep (skip_spaces ());
-
- xchar c (get ());
- uint64_t ln (c.line), cn (c.column);
-
- if (eos (c))
- return token (type::eos, sep, ln, cn, token_printer);
-
- state st (state_.top ()); // Make copy (see first/second_token).
- lexer_mode m (st.mode);
-
- auto make_token = [&sep, &m, ln, cn] (type t, string v = string ())
- {
- bool q (m == lexer_mode::here_line_double);
-
- return token (t, move (v), sep,
- (q ? quote_type::double_ : quote_type::unquoted), q,
- ln, cn,
- token_printer);
- };
-
- auto make_token_with_modifiers =
- [&make_token, this] (type t,
- const char* mods, // To recorgnize.
- const char* stop = nullptr) // To stop after.
- {
- string v;
- if (mods != nullptr)
- {
- for (xchar p (peek ());
- (strchr (mods, p) != nullptr && // Modifier.
- strchr (v.c_str (), p) == nullptr); // Not already seen.
- p = peek ())
- {
- get ();
- v += p;
-
- if (stop != nullptr && strchr (stop, p) != nullptr)
- break;
- }
- }
-
- return make_token (t, move (v));
- };
-
- // Expire certain modes at the end of the token. Do it early in case
- // we push any new mode (e.g., double quote).
- //
- if (m == lexer_mode::first_token || m == lexer_mode::second_token)
- state_.pop ();
-
- // NOTE: remember to update mode() if adding new special characters.
-
- if (m != lexer_mode::command_expansion)
- {
- switch (c)
- {
- case '\n':
- {
- // Expire variable value mode at the end of the line.
- //
- if (m == lexer_mode::variable_line)
- state_.pop ();
-
- sep = true; // Treat newline as always separated.
- return make_token (type::newline);
- }
- }
- }
-
- if (m != lexer_mode::here_line_single)
- {
- switch (c)
- {
- // Variable expansion, function call, and evaluation context.
- //
- case '$': return make_token (type::dollar);
- case '(': return make_token (type::lparen);
- }
- }
-
-
- if (m == lexer_mode::variable_line)
- {
- switch (c)
- {
- // Attributes.
- //
- case '[': return make_token (type::lsbrace);
- case ']': return make_token (type::rsbrace);
- }
- }
-
- // Line separators.
- //
- if (m == lexer_mode::command_line ||
- m == lexer_mode::first_token ||
- m == lexer_mode::second_token ||
- m == lexer_mode::variable_line)
- {
- switch (c)
- {
- case ';': return make_token (type::semi);
- }
- }
-
- if (m == lexer_mode::command_line ||
- m == lexer_mode::first_token ||
- m == lexer_mode::second_token)
- {
- switch (c)
- {
- case ':': return make_token (type::colon);
- }
- }
-
- // Command line operator/separators.
- //
- if (m == lexer_mode::command_line ||
- m == lexer_mode::first_token ||
- m == lexer_mode::second_token)
- {
- switch (c)
- {
- // Comparison (==, !=).
- //
- case '=':
- case '!':
- {
- if (peek () == '=')
- {
- get ();
- return make_token (c == '=' ? type::equal : type::not_equal);
- }
- }
- }
- }
-
- // Command operators/separators.
- //
- if (m == lexer_mode::command_line ||
- m == lexer_mode::first_token ||
- m == lexer_mode::second_token ||
- m == lexer_mode::command_expansion)
- {
- switch (c)
- {
- // |, ||
- //
- case '|':
- {
- if (peek () == '|')
- {
- get ();
- return make_token (type::log_or);
- }
- else
- return make_token (type::pipe);
- }
- // &, &&
- //
- case '&':
- {
- xchar p (peek ());
-
- if (p == '&')
- {
- get ();
- return make_token (type::log_and);
- }
-
- // These modifiers are mutually exclusive so stop after seeing
- // either one.
- //
- return make_token_with_modifiers (type::clean, "!?", "!?");
- }
- // <
- //
- case '<':
- {
- type r (type::in_str);
- xchar p (peek ());
-
- if (p == '|' || p == '-' || p == '<')
- {
- get ();
-
- switch (p)
- {
- case '|': return make_token (type::in_pass);
- case '-': return make_token (type::in_null);
- case '<':
- {
- r = type::in_doc;
- p = peek ();
-
- if (p == '<')
- {
- get ();
- r = type::in_file;
- }
- break;
- }
- }
- }
-
- // Handle modifiers.
- //
- const char* mods (nullptr);
- switch (r)
- {
- case type::in_str:
- case type::in_doc: mods = ":/"; break;
- }
-
- return make_token_with_modifiers (r, mods);
- }
- // >
- //
- case '>':
- {
- type r (type::out_str);
- xchar p (peek ());
-
- if (p == '|' || p == '-' || p == '!' || p == '&' ||
- p == '=' || p == '+' || p == '>')
- {
- get ();
-
- switch (p)
- {
- case '|': return make_token (type::out_pass);
- case '-': return make_token (type::out_null);
- case '!': return make_token (type::out_trace);
- case '&': return make_token (type::out_merge);
- case '=': return make_token (type::out_file_ovr);
- case '+': return make_token (type::out_file_app);
- case '>':
- {
- r = type::out_doc;
- p = peek ();
-
- if (p == '>')
- {
- get ();
- r = type::out_file_cmp;
- }
- break;
- }
- }
- }
-
- // Handle modifiers.
- //
- const char* mods (nullptr);
- const char* stop (nullptr);
- switch (r)
- {
- case type::out_str:
- case type::out_doc: mods = ":/~"; stop = "~"; break;
- }
-
- return make_token_with_modifiers (r, mods, stop);
- }
- }
- }
-
- // Dot, plus/minus, and left/right curly braces.
- //
- if (m == lexer_mode::first_token)
- {
- switch (c)
- {
- case '.': return make_token (type::dot);
- case '+': return make_token (type::plus);
- case '-': return make_token (type::minus);
- case '{': return make_token (type::lcbrace);
- case '}': return make_token (type::rcbrace);
- }
- }
-
- // Variable assignment (=, +=, =+).
- //
- if (m == lexer_mode::second_token)
- {
- switch (c)
- {
- case '=':
- {
- if (peek () == '+')
- {
- get ();
- return make_token (type::prepend);
- }
- else
- return make_token (type::assign);
- }
- case '+':
- {
- if (peek () == '=')
- {
- get ();
- return make_token (type::append);
- }
- }
- }
- }
-
- // Otherwise it is a word.
- //
- unget (c);
- return word (st, sep);
- }
-
- token lexer::
- next_description ()
- {
- xchar c (peek ());
-
- if (eos (c))
- fail (c) << "expected newline at the end of description line";
-
- uint64_t ln (c.line), cn (c.column);
-
- if (c == '\n')
- {
- get ();
- state_.pop (); // Expire the description mode.
- return token (type::newline, true, ln, cn, token_printer);
- }
-
- string lexeme;
-
- // For now no line continutions though we could support them.
- //
- for (; !eos (c) && c != '\n'; c = peek ())
- {
- get ();
- lexeme += c;
- }
-
- return token (move (lexeme),
- false,
- quote_type::unquoted, false,
- ln, cn);
- }
-
- token lexer::
- word (state st, bool sep)
- {
- lexer_mode m (st.mode);
-
- // Customized implementation that handles special variable names ($*,
- // $N, $~, $@).
- //
- if (m != lexer_mode::variable)
- return base_lexer::word (st, sep);
-
- xchar c (peek ());
-
- if (c != '*' && c != '~' && c != '@' && !digit (c))
- return base_lexer::word (st, sep);
-
- get ();
-
- if (digit (c) && digit (peek ()))
- fail (c) << "multi-digit special variable name";
-
- state_.pop (); // Expire the variable mode.
- return token (string (1, c),
- sep,
- quote_type::unquoted, false,
- c.line, c.column);
- }
- }
- }
-}