diff options
Diffstat (limited to 'libbuild2/build/script/lexer.cxx')
-rw-r--r-- | libbuild2/build/script/lexer.cxx | 270 |
1 files changed, 270 insertions, 0 deletions
diff --git a/libbuild2/build/script/lexer.cxx b/libbuild2/build/script/lexer.cxx new file mode 100644 index 0000000..7b8bdd4 --- /dev/null +++ b/libbuild2/build/script/lexer.cxx @@ -0,0 +1,270 @@ +// file : libbuild2/build/script/lexer.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/build/script/lexer.hxx> + +using namespace std; + +namespace build2 +{ + namespace build + { + namespace script + { + using type = token_type; + + build2::script::redirect_aliases lexer::redirect_aliases { + type (type::in_file), + type (type::in_doc), + type (type::in_str), + type (type::out_file_ovr), + type (type::out_file_app), + nullopt}; + + void lexer:: + mode (build2::lexer_mode m, + char ps, + optional<const char*> esc, + uintptr_t data) + { + bool a (false); // attributes + + const char* s1 (nullptr); + const char* s2 (nullptr); + + bool s (true); // space + bool n (true); // newline + bool q (true); // quotes + + if (!esc) + { + assert (!state_.empty ()); + esc = state_.top ().escapes; + } + + switch (m) + { + case lexer_mode::command_line: + { + s1 = "=!|&<> $(#\t\n"; + s2 = "== "; + break; + } + case lexer_mode::first_token: + { + // First token on the script line. Like command_line but + // recognizes variable assignments as separators. + // + s1 = "=+!|&<> $(#\t\n"; + s2 = " == "; + break; + } + case lexer_mode::second_token: + { + // Second token on the script line. Like command_line but + // recognizes leading variable assignments. + // + // Note that to recognize only leading assignments we shouldn't + // add them to the separator strings (so this is identical to + // command_line). + // + s1 = "=!|&<> $(#\t\n"; + s2 = "== "; + break; + } + case lexer_mode::variable_line: + { + // Like value except we don't recognize '{'. + // + s1 = " $(#\t\n"; + s2 = " "; + break; + } + default: + { + base_lexer::mode (m, ps, esc); + return; + } + } + + assert (ps == '\0'); + state_.push (state {m, data, nullopt, a, ps, s, n, q, *esc, s1, s2}); + } + + token lexer:: + next () + { + token r; + + switch (state_.top ().mode) + { + case lexer_mode::command_line: + case lexer_mode::first_token: + case lexer_mode::second_token: + case lexer_mode::variable_line: + r = next_line (); + break; + default: return base_lexer::next (); + } + + if (r.qtype != quote_type::unquoted) + ++quoted_; + + return r; + } + + token lexer:: + next_line () + { + bool sep (skip_spaces ().first); + + xchar c (get ()); + uint64_t ln (c.line), cn (c.column); + + state st (state_.top ()); // Make copy (see first/second_token). + lexer_mode m (st.mode); + + auto make_token = [&sep, ln, cn] (type t) + { + return token (t, sep, ln, cn, token_printer); + }; + + // Handle attributes (do it first to make sure the flag is cleared + // regardless of what we return). + // + if (st.attributes) + { + assert (m == lexer_mode::variable_line); + + state_.top ().attributes = false; + + if (c == '[') + return make_token (type::lsbrace); + } + + if (eos (c)) + return make_token (type::eos); + + // Expire certain modes at the end of the token. Do it early in case + // we push any new mode (e.g., double quote). + // + if (m == lexer_mode::first_token || m == lexer_mode::second_token) + state_.pop (); + + // NOTE: remember to update mode() if adding new special characters. + + switch (c) + { + case '\n': + { + // Expire variable value mode at the end of the line. + // + if (m == lexer_mode::variable_line) + state_.pop (); + + sep = true; // Treat newline as always separated. + return make_token (type::newline); + } + + // Variable expansion, function call, and evaluation context. + // + case '$': return make_token (type::dollar); + case '(': return make_token (type::lparen); + } + + // Command line operator/separators. + // + if (m == lexer_mode::command_line || + m == lexer_mode::first_token || + m == lexer_mode::second_token) + { + switch (c) + { + // Comparison (==, !=). + // + case '=': + case '!': + { + if (peek () == '=') + { + get (); + return make_token (c == '=' ? type::equal : type::not_equal); + } + } + } + } + + // Command operators. + // + if (m == lexer_mode::command_line || + m == lexer_mode::first_token || + m == lexer_mode::second_token) + { + if (optional<token> t = next_cmd_op (c, sep)) + return move (*t); + } + + // Variable assignment (=, +=, =+). + // + if (m == lexer_mode::second_token) + { + switch (c) + { + case '=': + { + if (peek () == '+') + { + get (); + return make_token (type::prepend); + } + else + return make_token (type::assign); + } + case '+': + { + if (peek () == '=') + { + get (); + return make_token (type::append); + } + } + } + } + + // Otherwise it is a word. + // + unget (c); + return word (st, sep); + } + + token lexer:: + word (state st, bool sep) + { + lexer_mode m (st.mode); + + // Customized implementation that handles special variable names ($>, + // $<, $~). + // + // @@ TODO: $(<), $(>): feels like this will have to somehow be + // handled at the top-level lexer level. Maybe provide a + // string of one-char special variable names as state::data? + // + if (m != lexer_mode::variable) + return base_lexer::word (st, sep); + + xchar c (peek ()); + + if (c != '>' && c != '<' && c != '~') + return base_lexer::word (st, sep); + + get (); + + state_.pop (); // Expire the variable mode. + return token (string (1, c), + sep, + quote_type::unquoted, false, + c.line, c.column); + } + } + } +} |