// file : build/lexer -*- C++ -*- // copyright : Copyright (c) 2014-2015 Code Synthesis Ltd // license : MIT; see accompanying LICENSE file #ifndef BUILD_LEXER #define BUILD_LEXER #include #include #include // uint64_t #include #include #include namespace build { // Context-dependent lexing mode. In the value mode we don't treat // certain characters (e.g., +, =) as special so that we can use // them in the variable values, e.g., 'foo = g++'. In contrast, // in the variable mode, we restrict certain character (e.g., /) // from appearing in the name. The pairs mode is just like value // except that we split names separated by the pair character. // The pairs mode must be set manually. // enum class lexer_mode {normal, value, variable, pairs}; class lexer { public: lexer (std::istream& is, const std::string& name): is_ (is), fail (name) {} const std::string& name () const {return fail.name_;} // Note: sets mode for the next token. If mode is pairs, then // the second argument specifies the separator character. // void mode (lexer_mode m, char pair_separator = '=') { next_mode_ = m; pair_separator_ = pair_separator; } lexer_mode mode () const {return mode_;} // Scanner. // token next (); private: class xchar { public: typedef std::char_traits traits_type; typedef traits_type::int_type int_type; typedef traits_type::char_type char_type; xchar (int_type v, std::uint64_t l, std::uint64_t c) : v_ (v), l_ (l), c_ (c) {} operator char_type () const {return static_cast (v_);} int_type value () const {return v_;} std::uint64_t line () const {return l_;} std::uint64_t column () const {return c_;} private: int_type v_; std::uint64_t l_; std::uint64_t c_; }; token name (xchar, bool separated); // Return true we have seen any spaces. Skipped empty lines don't // count. In other words, we are only interested in spaces that // are on the same line as the following non-space character. // bool skip_spaces (); xchar escape (); // Character interface. // private: xchar peek (); xchar get (); void unget (const xchar&); // Tests. // bool is_eos (const xchar& c) const { return c.value () == xchar::traits_type::eof (); } // Diagnostics. // private: struct fail_mark_base: build::fail_mark_base { fail_mark_base (const std::string& n): name_ (n) {} location_prologue operator() (const xchar&) const; std::string name_; }; typedef diag_mark fail_mark; private: std::istream& is_; fail_mark fail; std::uint64_t l_ {1}; std::uint64_t c_ {1}; bool unget_ {false}; xchar buf_ {0, 0, 0}; bool eos_ {false}; lexer_mode mode_ {lexer_mode::normal}; char pair_separator_; lexer_mode next_mode_ {lexer_mode::normal}; // Switch to for next token. lexer_mode prev_mode_; // Return to after current mode expires. }; } #endif // BUILD_LEXER