From 9fb791e9fad6c63fc1dac49f4d05ae63b8a3db9b Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 5 Jan 2016 11:55:15 +0200 Subject: Rename build directory/namespace to build2 --- build2/lexer | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 build2/lexer (limited to 'build2/lexer') diff --git a/build2/lexer b/build2/lexer new file mode 100644 index 0000000..51f3e56 --- /dev/null +++ b/build2/lexer @@ -0,0 +1,138 @@ +// file : build2/lexer -*- C++ -*- +// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef BUILD2_LEXER +#define BUILD2_LEXER + +#include +#include +#include +#include // size_t +#include // uint64_t +#include +#include + +#include + +#include +#include + +#include +#include + +namespace build2 +{ + // Context-dependent lexing mode. In the value mode we don't treat + // certain characters (e.g., +, =) as special so that we can use + // them in the variable values, e.g., 'foo = g++'. In contrast, in + // the variable mode, we restrict certain character (e.g., /) from + // appearing in the name. The pairs mode is just like value except + // that we split names separated by the pair character. The eval + // mode is used in the evaluation context. + // + // The alternnative modes must be set manually. The value and pairs + // modes are automatically reset after the end of the line. The + // variable mode is reset after the name token. And the eval mode + // is reset after the closing ')'. + // + // Quoted is an internal mode and should not be set explicitly. + // + enum class lexer_mode {normal, variable, value, pairs, eval, quoted}; + + class lexer: protected butl::char_scanner + { + public: + lexer (std::istream& is, + const std::string& name, + void (*processor) (token&, const lexer&) = nullptr) + : char_scanner (is), fail (name), processor_ (processor), sep_ (false) + { + mode_.push (lexer_mode::normal); + } + + const std::string& + name () const {return fail.name_;} + + // Note: sets mode for the next token. If mode is pairs, then + // the second argument specifies the separator character. + // + void + mode (lexer_mode m, char pair_separator = '=') + { + mode_.push (m); + pair_separator_ = pair_separator; + } + + // Expire the current mode early. + // + void + expire_mode () {mode_.pop ();} + + lexer_mode + mode () const {return mode_.top ();} + + char + pair_separator () const {return pair_separator_;} + + // Scanner. + // + token + next (); + + // Peek at the first character of the next token. Return the character + // or 0 if the next token will be eos. Also return an indicator of + // whether the next token will be separated. + // + pair + peek_char (); + + private: + token + next_impl (); + + token + next_eval (); + + token + next_quoted (); + + token + name (bool separated); + + // Return true if we have seen any spaces. Skipped empty lines + // don't count. In other words, we are only interested in spaces + // that are on the same line as the following non-space character. + // + bool + skip_spaces (); + + xchar + escape (); + + // Diagnostics. + // + private: + struct fail_mark_base: build2::fail_mark_base + { + fail_mark_base (const std::string& n): name_ (n) {} + + location_prologue + operator() (const xchar&) const; + + std::string name_; + }; + typedef diag_mark fail_mark; + + private: + fail_mark fail; + + void (*processor_) (token&, const lexer&); + + std::stack mode_; + char pair_separator_; + bool sep_; // True if we skipped spaces in peek(). + }; +} + +#endif // BUILD2_LEXER -- cgit v1.1