From 4a9ee48613cf5c59e071400280b62358eb79987e Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Mon, 2 Mar 2015 09:52:59 +0200 Subject: Indicate whether token is separated from previous one by whitespaces --- build/lexer | 32 ++++++----- build/lexer.cxx | 159 ++++++++++++++++++++++++++++++------------------------- build/parser | 2 +- build/parser.cxx | 2 +- build/token | 14 +++-- 5 files changed, 117 insertions(+), 92 deletions(-) (limited to 'build') diff --git a/build/lexer b/build/lexer index 1723ae0..67a94c8 100644 --- a/build/lexer +++ b/build/lexer @@ -20,11 +20,11 @@ namespace build public: lexer (std::istream& is, const std::string& name): is_ (is), fail (name) {} + // Scanner. + // token next (); - // Character interface. - // private: class xchar { @@ -50,6 +50,22 @@ namespace build std::uint64_t c_; }; + token + name (xchar, bool separated); + + // Return true we have seen any spaces. Skipped empty lines don't + // count. In other words, we are only interested in spaces that + // are on the same line as the following non-space character. + // + bool + skip_spaces (); + + xchar + escape (); + + // Character interface. + // + private: xchar peek (); @@ -67,18 +83,6 @@ namespace build return c.value () == xchar::traits_type::eof (); } - // Scanner. - // - private: - xchar - escape (); - - void - skip_spaces (); - - token - name (xchar); - // Diagnostics. // private: diff --git a/build/lexer.cxx b/build/lexer.cxx index 6836322..4e29b43 100644 --- a/build/lexer.cxx +++ b/build/lexer.cxx @@ -11,13 +11,13 @@ namespace build token lexer:: next () { - skip_spaces (); + bool sep (skip_spaces ()); xchar c (get ()); uint64_t ln (c.line ()), cn (c.column ()); if (is_eos (c)) - return token (token_type::eos, ln, cn); + return token (token_type::eos, sep, ln, cn); switch (c) { @@ -30,15 +30,15 @@ namespace build if (mode_ == mode::value) mode_ = mode::normal; - return token (token_type::newline, ln, cn); + return token (token_type::newline, sep, ln, cn); } case '{': { - return token (token_type::lcbrace, ln, cn); + return token (token_type::lcbrace, sep, ln, cn); } case '}': { - return token (token_type::rcbrace, ln, cn); + return token (token_type::rcbrace, sep, ln, cn); } } @@ -53,12 +53,12 @@ namespace build { case ':': { - return token (token_type::colon, ln, cn); + return token (token_type::colon, sep, ln, cn); } case '=': { mode_ = mode::value; - return token (token_type::equal, ln, cn); + return token (token_type::equal, sep, ln, cn); } case '+': { @@ -66,79 +66,18 @@ namespace build fail (c) << "expected = after +"; mode_ = mode::value; - return token (token_type::plus_equal, ln, cn); + return token (token_type::plus_equal, sep, ln, cn); } } } // Otherwise it is a name. // - return name (c); - } - - lexer::xchar lexer:: - escape () - { - xchar c (get ()); - - if (is_eos (c)) - fail (c) << "unterminated escape sequence"; - - return c; - } - - void lexer:: - skip_spaces () - { - xchar c (peek ()); - bool start (c.column () == 1); - - for (; !is_eos (c); c = peek ()) - { - switch (c) - { - case ' ': - case '\t': - break; - case '\n': - { - // Skip empty lines. - // - if (start) - break; - - return; - } - case '#': - { - get (); - - // Read until newline or eos. - // - for (c = peek (); !is_eos (c) && c != '\n'; c = peek ()) - get (); - continue; - } - case '\\': - { - get (); - - if (peek () == '\n') - break; - - unget (c); - // Fall through. - } - default: - return; // Not a space. - } - - get (); - } + return name (c, sep); } token lexer:: - name (xchar c) + name (xchar c, bool sep) { uint64_t ln (c.line ()), cn (c.column ()); string lexeme; @@ -198,7 +137,83 @@ namespace build break; } - return token (lexeme, ln, cn); + return token (lexeme, sep, ln, cn); + } + + bool lexer:: + skip_spaces () + { + bool r (false); + + xchar c (peek ()); + bool start (c.column () == 1); + + for (; !is_eos (c); c = peek ()) + { + switch (c) + { + case ' ': + case '\t': + { + r = true; + break; + } + case '\n': + { + // Skip empty lines. + // + if (start) + { + r = false; + break; + } + + return r; + } + case '#': + { + get (); + + // Read until newline or eos. + // + for (c = peek (); !is_eos (c) && c != '\n'; c = peek ()) + get (); + + r = true; + continue; + } + case '\\': + { + get (); + + if (peek () == '\n') + { + r = true; + break; + } + + unget (c); + // Fall through. + } + default: + return r; // Not a space. + } + + get (); + } + + return r; + } + + lexer::xchar lexer:: + escape () + { + xchar c (get ()); + + if (is_eos (c)) + fail (c) << "unterminated escape sequence"; + + return c; } lexer::xchar lexer:: diff --git a/build/parser b/build/parser index 4f099fe..53fc83c 100644 --- a/build/parser +++ b/build/parser @@ -77,7 +77,7 @@ namespace build lexer* lexer_; scope* scope_; - token peek_ {token_type::eos, 0, 0}; + token peek_ {token_type::eos, false, 0, 0}; bool peeked_ {false}; std::unordered_set include_; diff --git a/build/parser.cxx b/build/parser.cxx index d887d5f..6eca0e6 100644 --- a/build/parser.cxx +++ b/build/parser.cxx @@ -62,7 +62,7 @@ namespace build lexer_ = &l; scope_ = &s; - token t (type::eos, 0, 0); + token t (type::eos, false, 0, 0); type tt; next (t, tt); diff --git a/build/token b/build/token index a071987..e6930ff 100644 --- a/build/token +++ b/build/token @@ -31,6 +31,11 @@ namespace build token_type type () const {return t_;} + // Token is whitespace-separated from the previous token. + // + bool + separated () const {return s_;} + std::string const& name () const {assert (t_ == token_type::name); return n_;} @@ -38,14 +43,15 @@ namespace build std::uint64_t column () const {return c_;} public: - token (token_type t, std::uint64_t l, std::uint64_t c) - : t_ (t), l_ (l), c_ (c) {} + token (token_type t, bool s, std::uint64_t l, std::uint64_t c) + : t_ (t), s_ (s), l_ (l), c_ (c) {} - token (std::string n, std::uint64_t l, std::uint64_t c) - : t_ (token_type::name), n_ (std::move (n)), l_ (l), c_ (c) {} + token (std::string n, bool s, std::uint64_t l, std::uint64_t c) + : t_ (token_type::name), s_ (s), n_ (std::move (n)), l_ (l), c_ (c) {} private: token_type t_; + bool s_; std::string n_; std::uint64_t l_; -- cgit v1.1