diff options
Diffstat (limited to 'libbuild2/lexer.hxx')
-rw-r--r-- | libbuild2/lexer.hxx | 207 |
1 files changed, 207 insertions, 0 deletions
diff --git a/libbuild2/lexer.hxx b/libbuild2/lexer.hxx new file mode 100644 index 0000000..f987071 --- /dev/null +++ b/libbuild2/lexer.hxx @@ -0,0 +1,207 @@ +// file : libbuild2/lexer.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_LEXER_HXX +#define LIBBUILD2_LEXER_HXX + +#include <stack> + +#include <libbutl/char-scanner.mxx> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/token.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + // Context-dependent lexing mode. In the value mode we don't treat certain + // characters (e.g., '+', '=') as special so that we can use them in the + // variable values, e.g., 'foo = g++'. In contrast, in the variable mode, we + // restrict certain character (e.g., '/') from appearing in the name. The + // attribute mode is like value except it doesn't treat '{' and '}' as + // special (so we cannot have name groups in attributes). The eval mode is + // used in the evaluation context. Quoted modes are internal and should not + // be set explicitly. + // + // Note that the normal, value, and eval modes split words separated by the + // pair character (to disable pairs one can pass '\0' as a pair character). + // + // The alternnative modes must be set manually. The value mode automatically + // expires after the end of the line. The attribute mode expires after the + // closing ']'. The variable mode expires after the word token. And the eval + // mode expires after the closing ')'. + // + // Note that normally it is only safe to switch mode when the current token + // is not quoted (or, more generally, when you are not in the double-quoted + // mode) unless the mode treats the double-quote as a separator (e.g., + // variable name mode). Failed that your mode (which now will be the top of + // the mode stack) will prevent proper recognition of the closing quote. + // + + // Extendable/inheritable enum-like class. + // + struct lexer_mode: lexer_mode_base + { + using base_type = lexer_mode_base; + + enum + { + normal = base_type::value_next, + variable, + value, + attribute, + eval, + single_quoted, + double_quoted, + buildspec, + + value_next + }; + + lexer_mode () = default; + lexer_mode (value_type v): base_type (v) {} + lexer_mode (base_type v): base_type (v) {} + }; + + class LIBBUILD2_SYMEXPORT lexer: public butl::char_scanner + { + public: + // If escape is not NULL then only escape sequences with characters from + // this string are considered "effective escapes" with all others passed + // through as is. Note that the escape string is not copied. + // + lexer (istream& is, + const path& name, + uint64_t line = 1, // Start line in the stream. + const char* escapes = nullptr) + : lexer (is, name, line, escapes, true /* set_mode */) {} + + const path& + name () const {return name_;} + + // Note: sets mode for the next token. The second argument can be used to + // specifythe pair separator character (if the mode supports pairs). If + // escapes not specified, then inherit the current mode's (thought a mode + // can also override it). + // + virtual void + mode (lexer_mode, + char pair_separator = '\0', + optional<const char*> escapes = nullopt); + + // Expire the current mode early. + // + void + expire_mode () {state_.pop ();} + + lexer_mode + mode () const {return state_.top ().mode;} + + char + pair_separator () const {return state_.top ().sep_pair;} + + // Scanner. Note that it is ok to call next() again after getting eos. + // + // If you extend the lexer and add a custom lexer mode, then you must + // override next() and handle the custom mode there. + // + virtual token + next (); + + // Peek at the first character of the next token. Return the character + // or '\0' if the next token will be eos. Also return an indicator of + // whether the next token will be separated. + // + pair<char, bool> + peek_char (); + + protected: + struct state + { + lexer_mode mode; + + char sep_pair; + bool sep_space; // Are whitespaces separators (see skip_spaces())? + bool sep_newline; // Is newline special (see skip_spaces())? + bool quotes; // Recognize quoted fragments. + + const char* escapes; // Effective escape sequences to recognize. + + // Word separator characters. For two-character sequence put the first + // one in sep_first and the second one in the corresponding position of + // sep_second. If it's a single-character sequence, then put space in + // sep_second. If there are multiple sequences that start with the same + // character, then repeat the first character in sep_first. + // + const char* sep_first; + const char* sep_second; + }; + + token + next_eval (); + + token + next_quoted (); + + // Lex a word assuming current is the top state (which may already have + // been "expired" from the top). + // + virtual token + word (state current, bool separated); + + // Return true if we have seen any spaces. Skipped empty lines + // don't count. In other words, we are only interested in spaces + // that are on the same line as the following non-space character. + // + bool + skip_spaces (); + + // Diagnostics. + // + protected: + fail_mark fail; + + // Lexer state. + // + protected: + lexer (istream& is, + const path& name, + uint64_t line, + const char* escapes, + bool set_mode) + : char_scanner (is, true /* crlf */, line), + fail ("error", &name_), + name_ (name), + sep_ (false) + { + if (set_mode) + mode (lexer_mode::normal, '@', escapes); + } + + const path name_; + std::stack<state> state_; + + bool sep_; // True if we skipped spaces in peek(). + }; +} + +// Diagnostics plumbing. +// +namespace butl // ADL +{ + inline build2::location + get_location (const butl::char_scanner::xchar& c, const void* data) + { + using namespace build2; + + assert (data != nullptr); // E.g., must be &lexer::name_. + return location (static_cast<const path*> (data), c.line, c.column); + } +} + +#endif // LIBBUILD2_LEXER_HXX |