From e5827facbfbfe90eae1b71c355a08bf61e2f6e1a Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Thu, 7 May 2020 22:30:17 +0300 Subject: Factor out generic script parsing/executing functionality from build2::test::script namespace --- libbuild2/script/lexer.cxx | 339 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 339 insertions(+) create mode 100644 libbuild2/script/lexer.cxx (limited to 'libbuild2/script/lexer.cxx') diff --git a/libbuild2/script/lexer.cxx b/libbuild2/script/lexer.cxx new file mode 100644 index 0000000..9351197 --- /dev/null +++ b/libbuild2/script/lexer.cxx @@ -0,0 +1,339 @@ +// file : libbuild2/script/lexer.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include // strchr() + +using namespace std; + +namespace build2 +{ + namespace script + { + using type = token_type; + + void lexer:: + mode (base_mode m, char ps, optional esc) + { + bool a (false); // attributes + + const char* s1 (nullptr); + const char* s2 (nullptr); + + bool s (true); // space + bool n (true); // newline + bool q (true); // quotes + + if (!esc) + { + assert (!state_.empty ()); + esc = state_.top ().escapes; + } + + switch (m) + { + case lexer_mode::command_expansion: + { + // Note that whitespaces are not word separators in this mode. + // + s1 = "|&<>"; + s2 = " "; + s = false; + break; + } + case lexer_mode::here_line_single: + { + // This one is like a single-quoted string except it treats + // newlines as a separator. We also treat quotes as literals. + // + // Note that it might be tempting to enable line continuation + // escapes. However, we will then have to also enable escaping of + // the backslash, which makes it a lot less tempting. + // + s1 = "\n"; + s2 = " "; + esc = ""; // Disable escape sequences. + s = false; + q = false; + break; + } + case lexer_mode::here_line_double: + { + // This one is like a double-quoted string except it treats + // newlines as a separator. We also treat quotes as literals. + // + s1 = "$(\n"; + s2 = " "; + s = false; + q = false; + break; + } + default: + { + // Make sure pair separators are only enabled where we expect + // them. + // + // @@ Should we disable pair separators in the eval mode? + // + assert (ps == '\0' || + m == lexer_mode::eval || + m == lexer_mode::attribute_value); + + base_lexer::mode (m, ps, esc); + return; + } + } + + assert (ps == '\0'); + state_.push (state {m, a, ps, s, n, q, *esc, s1, s2}); + } + + token lexer:: + next () + { + token r; + + switch (state_.top ().mode) + { + case lexer_mode::command_expansion: + case lexer_mode::here_line_single: + case lexer_mode::here_line_double: + r = next_line (); + break; + default: + r = base_lexer::next (); + break; + } + + if (r.qtype != quote_type::unquoted) + ++quoted_; + + return r; + } + + token lexer:: + next_line () + { + bool sep (skip_spaces ()); + + xchar c (get ()); + uint64_t ln (c.line), cn (c.column); + + const state& st (state_.top ()); + lexer_mode m (st.mode); + + auto make_token = [&sep, &m, ln, cn] (type t) + { + bool q (m == lexer_mode::here_line_double); + + return token (t, string (), sep, + (q ? quote_type::double_ : quote_type::unquoted), q, + ln, cn, + token_printer); + }; + + if (eos (c)) + return make_token (type::eos); + + // NOTE: remember to update mode() if adding new special characters. + + if (m != lexer_mode::command_expansion) + { + switch (c) + { + case '\n': + { + sep = true; // Treat newline as always separated. + return make_token (type::newline); + } + } + } + + if (m != lexer_mode::here_line_single) + { + switch (c) + { + // Variable expansion, function call, and evaluation context. + // + case '$': return make_token (type::dollar); + case '(': return make_token (type::lparen); + } + } + + // Command operators. + // + if (m == lexer_mode::command_expansion) + { + if (optional t = next_cmd_op (c, sep, m)) + return move (*t); + } + + // Otherwise it is a word. + // + unget (c); + return word (st, sep); + } + + optional lexer:: + next_cmd_op (const xchar& c, bool sep, lexer_mode m) + { + auto make_token = [&sep, &m, &c] (type t, string v = string ()) + { + bool q (m == lexer_mode::here_line_double); + + return token (t, move (v), sep, + (q ? quote_type::double_ : quote_type::unquoted), q, + c.line, c.column, + token_printer); + }; + + auto make_token_with_modifiers = + [&make_token, this] (type t, + const char* mods, // To recorgnize. + const char* stop = nullptr) // To stop after. + { + string v; + if (mods != nullptr) + { + for (xchar p (peek ()); + (strchr (mods, p) != nullptr && // Modifier. + strchr (v.c_str (), p) == nullptr); // Not already seen. + p = peek ()) + { + get (); + v += p; + + if (stop != nullptr && strchr (stop, p) != nullptr) + break; + } + } + + return make_token (t, move (v)); + }; + + switch (c) + { + // |, || + // + case '|': + { + if (peek () == '|') + { + get (); + return make_token (type::log_or); + } + else + return make_token (type::pipe); + } + // &, && + // + case '&': + { + xchar p (peek ()); + + if (p == '&') + { + get (); + return make_token (type::log_and); + } + + // These modifiers are mutually exclusive so stop after seeing + // either one. + // + return make_token_with_modifiers (type::clean, "!?", "!?"); + } + // < + // + case '<': + { + type r (type::in_str); + xchar p (peek ()); + + if (p == '|' || p == '-' || p == '<') + { + get (); + + switch (p) + { + case '|': return make_token (type::in_pass); + case '-': return make_token (type::in_null); + case '<': + { + r = type::in_doc; + p = peek (); + + if (p == '<') + { + get (); + r = type::in_file; + } + break; + } + } + } + + // Handle modifiers. + // + const char* mods (nullptr); + switch (r) + { + case type::in_str: + case type::in_doc: mods = ":/"; break; + } + + return make_token_with_modifiers (r, mods); + } + // > + // + case '>': + { + type r (type::out_str); + xchar p (peek ()); + + if (p == '|' || p == '-' || p == '!' || p == '&' || + p == '=' || p == '+' || p == '>') + { + get (); + + switch (p) + { + case '|': return make_token (type::out_pass); + case '-': return make_token (type::out_null); + case '!': return make_token (type::out_trace); + case '&': return make_token (type::out_merge); + case '=': return make_token (type::out_file_ovr); + case '+': return make_token (type::out_file_app); + case '>': + { + r = type::out_doc; + p = peek (); + + if (p == '>') + { + get (); + r = type::out_file_cmp; + } + break; + } + } + } + + // Handle modifiers. + // + const char* mods (nullptr); + const char* stop (nullptr); + switch (r) + { + case type::out_str: + case type::out_doc: mods = ":/~"; stop = "~"; break; + } + + return make_token_with_modifiers (r, mods, stop); + } + } + + return nullopt; + } + } +} -- cgit v1.1