diff options
Diffstat (limited to 'build2/lexer')
-rw-r--r-- | build2/lexer | 138 |
1 files changed, 138 insertions, 0 deletions
diff --git a/build2/lexer b/build2/lexer new file mode 100644 index 0000000..51f3e56 --- /dev/null +++ b/build2/lexer @@ -0,0 +1,138 @@ +// file : build2/lexer -*- C++ -*- +// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef BUILD2_LEXER +#define BUILD2_LEXER + +#include <stack> +#include <string> +#include <iosfwd> +#include <cstddef> // size_t +#include <cstdint> // uint64_t +#include <cassert> +#include <exception> + +#include <butl/char-scanner> + +#include <build2/types> +#include <build2/utility> + +#include <build2/token> +#include <build2/diagnostics> + +namespace build2 +{ + // Context-dependent lexing mode. In the value mode we don't treat + // certain characters (e.g., +, =) as special so that we can use + // them in the variable values, e.g., 'foo = g++'. In contrast, in + // the variable mode, we restrict certain character (e.g., /) from + // appearing in the name. The pairs mode is just like value except + // that we split names separated by the pair character. The eval + // mode is used in the evaluation context. + // + // The alternnative modes must be set manually. The value and pairs + // modes are automatically reset after the end of the line. The + // variable mode is reset after the name token. And the eval mode + // is reset after the closing ')'. + // + // Quoted is an internal mode and should not be set explicitly. + // + enum class lexer_mode {normal, variable, value, pairs, eval, quoted}; + + class lexer: protected butl::char_scanner + { + public: + lexer (std::istream& is, + const std::string& name, + void (*processor) (token&, const lexer&) = nullptr) + : char_scanner (is), fail (name), processor_ (processor), sep_ (false) + { + mode_.push (lexer_mode::normal); + } + + const std::string& + name () const {return fail.name_;} + + // Note: sets mode for the next token. If mode is pairs, then + // the second argument specifies the separator character. + // + void + mode (lexer_mode m, char pair_separator = '=') + { + mode_.push (m); + pair_separator_ = pair_separator; + } + + // Expire the current mode early. + // + void + expire_mode () {mode_.pop ();} + + lexer_mode + mode () const {return mode_.top ();} + + char + pair_separator () const {return pair_separator_;} + + // Scanner. + // + token + next (); + + // Peek at the first character of the next token. Return the character + // or 0 if the next token will be eos. Also return an indicator of + // whether the next token will be separated. + // + pair<char, bool> + peek_char (); + + private: + token + next_impl (); + + token + next_eval (); + + token + next_quoted (); + + token + name (bool separated); + + // Return true if we have seen any spaces. Skipped empty lines + // don't count. In other words, we are only interested in spaces + // that are on the same line as the following non-space character. + // + bool + skip_spaces (); + + xchar + escape (); + + // Diagnostics. + // + private: + struct fail_mark_base: build2::fail_mark_base<failed> + { + fail_mark_base (const std::string& n): name_ (n) {} + + location_prologue + operator() (const xchar&) const; + + std::string name_; + }; + typedef diag_mark<fail_mark_base> fail_mark; + + private: + fail_mark fail; + + void (*processor_) (token&, const lexer&); + + std::stack<lexer_mode> mode_; + char pair_separator_; + bool sep_; // True if we skipped spaces in peek(). + }; +} + +#endif // BUILD2_LEXER |