// file : build/lexer -*- C++ -*- // copyright : Copyright (c) 2014-2015 Code Synthesis Ltd // license : MIT; see accompanying LICENSE file #ifndef BUILD_LEXER #define BUILD_LEXER #include #include #include // size_t #include // uint64_t #include #include #include #include #include namespace build { // Context-dependent lexing mode. In the value mode we don't treat // certain characters (e.g., +, =) as special so that we can use // them in the variable values, e.g., 'foo = g++'. In contrast, // in the variable mode, we restrict certain character (e.g., /) // from appearing in the name. The pairs mode is just like value // except that we split names separated by the pair character. // The alternnative modes must be set manually. The value and // pairs modes are automatically reset after the end of the line. // The variable mode is automatically reset after the name token. // Quoted is an internal mode and should not be explicitly set. // enum class lexer_mode {normal, quoted, variable, value, pairs}; class lexer: protected butl::char_scanner { public: lexer (std::istream& is, const std::string& name) : char_scanner (is), fail (name) {mode_.push (lexer_mode::normal);} const std::string& name () const {return fail.name_;} // Note: sets mode for the next token. If mode is pairs, then // the second argument specifies the separator character. // void mode (lexer_mode m, char pair_separator = '=') { mode_.push (m); pair_separator_ = pair_separator; } // Expire the current mode early. // void expire_mode () {mode_.pop ();} lexer_mode mode () const {return mode_.top ();} char pair_separator () const {return pair_separator_;} // Scanner. // token next (); private: token name (bool separated); void single_quote (std::string&); bool double_quote (std::string&); // Return true we have seen any spaces. Skipped empty lines don't // count. In other words, we are only interested in spaces that // are on the same line as the following non-space character. // bool skip_spaces (); xchar escape (); // Diagnostics. // private: struct fail_mark_base: build::fail_mark_base { fail_mark_base (const std::string& n): name_ (n) {} location_prologue operator() (const xchar&) const; std::string name_; }; typedef diag_mark fail_mark; private: fail_mark fail; // Currently, the maximum mode nesting is 4: {normal, value, quoted, // variable}. // struct mode_stack { static const size_t max_size = 4; void push (lexer_mode m) {assert (n_ != max_size); d_[n_++] = m;} void pop () {assert (n_ != 0); n_--;} lexer_mode top () const {return d_[n_ - 1];} private: size_t n_ = 0; lexer_mode d_[max_size]; }; mode_stack mode_; char pair_separator_; }; } #endif // BUILD_LEXER