// file : build/lexer.cxx -*- C++ -*- // copyright : Copyright (c) 2014-2015 Code Synthesis Ltd // license : MIT; see accompanying LICENSE file #include using namespace std; namespace build { token lexer:: next () { if (mode_ != next_mode_) { prev_mode_ = mode_; mode_ = next_mode_; } bool sep (skip_spaces ()); xchar c (get ()); uint64_t ln (c.line ()), cn (c.column ()); if (is_eos (c)) return token (token_type::eos, sep, ln, cn); switch (c) { // NOTE: remember to update name() if adding new punctuations. // case '\n': { // Restore the normal mode at the end of the line. // if (mode_ == lexer_mode::value || mode_ == lexer_mode::pairs) mode_ = next_mode_ = lexer_mode::normal; return token (token_type::newline, sep, ln, cn); } case '{': { return token (token_type::lcbrace, sep, ln, cn); } case '}': { return token (token_type::rcbrace, sep, ln, cn); } case '$': { // The following name is lexed in the variable mode. // next_mode_ = lexer_mode::variable; return token (token_type::dollar, sep, ln, cn); } case '(': { return token (token_type::lparen, sep, ln, cn); } case ')': { return token (token_type::rparen, sep, ln, cn); } } // Handle pair separator. // if (mode_ == lexer_mode::pairs && c == pair_separator_) return token (token_type::pair_separator, sep, ln, cn); // The following characters are not treated as special in the // value or pairs mode. // if (mode_ != lexer_mode::value && mode_ != lexer_mode::pairs) { // NOTE: remember to update name() if adding new punctuations. // switch (c) { case ':': { return token (token_type::colon, sep, ln, cn); } case '+': { if (get () != '=') fail (c) << "expected = after +"; next_mode_ = lexer_mode::value; return token (token_type::plus_equal, sep, ln, cn); } case '=': { next_mode_ = lexer_mode::value; return token (token_type::equal, sep, ln, cn); } } } // Otherwise it is a name. // return name (c, sep); } token lexer:: name (xchar c, bool sep) { uint64_t ln (c.line ()), cn (c.column ()); string lexeme; lexeme += (c != '\\' ? c : escape ()); for (c = peek (); !is_eos (c); c = peek ()) { bool done (false); // Handle pair separator. // if (mode_ == lexer_mode::pairs && c == pair_separator_) break; // The following characters are not treated as special in the // value or pairs mode. // if (mode_ != lexer_mode::value && mode_ != lexer_mode::pairs) { switch (c) { case ':': case '+': case '=': { done = true; break; } } if (done) break; } // While these extra characters are treated as the name end in // the variable mode. // if (mode_ == lexer_mode::variable) { switch (c) { case '/': { done = true; break; } } if (done) break; } switch (c) { case ' ': case '\t': case '\n': case '#': case '{': case '}': case '$': case '(': case ')': { done = true; break; } case '\\': { get (); lexeme += escape (); break; } default: { get (); lexeme += c; break; } } if (done) break; } if (mode_ == lexer_mode::variable) next_mode_ = prev_mode_; return token (lexeme, sep, ln, cn); } bool lexer:: skip_spaces () { bool r (false); xchar c (peek ()); bool start (c.column () == 1); for (; !is_eos (c); c = peek ()) { switch (c) { case ' ': case '\t': { r = true; break; } case '\n': { // Skip empty lines. // if (start) { r = false; break; } return r; } case '#': { get (); // Read until newline or eos. // for (c = peek (); !is_eos (c) && c != '\n'; c = peek ()) get (); r = true; continue; } case '\\': { get (); if (peek () == '\n') { r = true; break; } unget (c); // Fall through. } default: return r; // Not a space. } get (); } return r; } lexer::xchar lexer:: escape () { xchar c (get ()); if (is_eos (c)) fail (c) << "unterminated escape sequence"; return c; } lexer::xchar lexer:: peek () { if (unget_) return buf_; else { if (eos_) return xchar (xchar::traits_type::eof (), l_, c_); else { xchar::int_type v (is_.peek ()); if (v == xchar::traits_type::eof ()) eos_ = true; return xchar (v, l_, c_); } } } lexer::xchar lexer:: get () { if (unget_) { unget_ = false; return buf_; } else { // When is_.get () returns eof, the failbit is also set (stupid, // isn't?) which may trigger an exception. To work around this // we will call peek() first and only call get() if it is not // eof. But we can only call peek() on eof once; any subsequent // calls will spoil the failbit (even more stupid). // xchar c (peek ()); if (!is_eos (c)) { is_.get (); if (c == '\n') { l_++; c_ = 1; } else c_++; } return c; } } void lexer:: unget (const xchar& c) { // Because iostream::unget cannot work once eos is reached, // we have to provide our own implementation. // buf_ = c; unget_ = true; } location_prologue lexer::fail_mark_base:: operator() (const xchar& c) const { return build::fail_mark_base::operator() ( location (name_.c_str (), c.line (), c.column ())); } }