From b6e02f4224975a6425f62095bc35478e8866db77 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Wed, 24 May 2017 13:26:13 +0200 Subject: Various improvements to char_scanner --- libbutl/char-scanner.cxx | 68 +++++++++++++++++++----------------------------- libbutl/char-scanner.hxx | 38 ++++++++++++++++++++------- libbutl/char-scanner.ixx | 32 +++++++++++++++++++++++ 3 files changed, 87 insertions(+), 51 deletions(-) create mode 100644 libbutl/char-scanner.ixx diff --git a/libbutl/char-scanner.cxx b/libbutl/char-scanner.cxx index cbc2503..42a72cc 100644 --- a/libbutl/char-scanner.cxx +++ b/libbutl/char-scanner.cxx @@ -14,44 +14,44 @@ namespace butl peek () -> xchar { if (unget_) - return buf_; - else - { - if (eos_) - return xchar (xchar::traits_type::eof (), line, column); - else - { - xchar::int_type v (is_.peek ()); + return ungetc_; - if (v == xchar::traits_type::eof ()) - eos_ = true; - else if (crlf_ && v == 0x0D) - { - is_.get (); - xchar::int_type v1 (is_.peek ()); + if (unpeek_) + return unpeekc_; - if (v1 != '\n') - { - unget_ = true; - buf_ = '\n'; - } + if (eos_) + return xchar (xchar::traits_type::eof (), line, column); - v = '\n'; - } + xchar::int_type v (is_.peek ()); - return xchar (v, line, column); + if (v == xchar::traits_type::eof ()) + eos_ = true; + else if (crlf_ && v == 0x0D) + { + is_.get (); + xchar::int_type v1 (is_.peek ()); + + if (v1 != '\n') + { + // We need to make sure subsequent calls to peek() return newline. + // + unpeek_ = true; + unpeekc_ = xchar ('\n', line, column); } + + v = '\n'; } + + return xchar (v, line, column); } - auto char_scanner:: - get () -> xchar + void char_scanner:: + get (const xchar& c) { if (unget_) - { unget_ = false; - return buf_; - } + else if (unpeek_) + unpeek_ = false; else { // When is_.get () returns eof, the failbit is also set (stupid, @@ -60,8 +60,6 @@ namespace butl // eof. But we can only call peek() on eof once; any subsequent // calls will spoil the failbit (even more stupid). // - xchar c (peek ()); - if (!eos (c)) { is_.get (); @@ -74,18 +72,6 @@ namespace butl else column++; } - - return c; } } - - void char_scanner:: - unget (const xchar& c) - { - // Because iostream::unget cannot work once eos is reached, - // we have to provide our own implementation. - // - buf_ = c; - unget_ = true; - } } diff --git a/libbutl/char-scanner.hxx b/libbutl/char-scanner.hxx index 71f8313..e71f286 100644 --- a/libbutl/char-scanner.hxx +++ b/libbutl/char-scanner.hxx @@ -33,8 +33,12 @@ namespace butl // public: - // Extended character. It includes line/column information - // and is capable of representing EOF. + // Extended character. It includes line/column information and is capable + // of representing EOF. + // + // Note that implicit conversion of EOF to char_type results in NUL + // character (which means in most cases it is safe to compare xchar to + // char without checking for EOF). // class xchar { @@ -47,7 +51,12 @@ namespace butl std::uint64_t line; std::uint64_t column; - operator char_type () const {return static_cast (value);} + operator char_type () const + { + return value != traits_type::eof () + ? static_cast (value) + : char_type (0); + } xchar (int_type v, std::uint64_t l = 0, std::uint64_t c = 0) : value (v), line (l), column (c) {} @@ -57,6 +66,9 @@ namespace butl get (); void + get (const xchar& peeked); // Get previously peeked character (faster). + + void unget (const xchar&); // Note that if there is an "ungot" character, peek() will return @@ -71,20 +83,26 @@ namespace butl static bool eos (const xchar& c) {return c.value == xchar::traits_type::eof ();} - // Line and column of the furthest seen (either via get() or - // peek()) character. + // Line and column of the next character to be extracted from the stream + // by peek() or get(). // - std::uint64_t line {1}; - std::uint64_t column {1}; + std::uint64_t line = 1; + std::uint64_t column = 1; protected: std::istream& is_; + bool crlf_; + bool eos_ = false; + + bool unget_ = false; + bool unpeek_ = false; - bool unget_ {false}; - xchar buf_ = '\0'; - bool eos_ {false}; + xchar ungetc_ = '\0'; + xchar unpeekc_ = '\0'; }; } +#include + #endif // LIBBUTL_CHAR_SCANNER_HXX diff --git a/libbutl/char-scanner.ixx b/libbutl/char-scanner.ixx new file mode 100644 index 0000000..2d96207 --- /dev/null +++ b/libbutl/char-scanner.ixx @@ -0,0 +1,32 @@ +// file : libbutl/char-scanner.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +namespace butl +{ + inline auto char_scanner:: + get () -> xchar + { + if (unget_) + { + unget_ = false; + return ungetc_; + } + else + { + xchar c (peek ()); + get (c); + return c; + } + } + + inline void char_scanner:: + unget (const xchar& c) + { + // Because iostream::unget cannot work once eos is reached, we have to + // provide our own implementation. + // + unget_ = true; + ungetc_ = c; + } +} -- cgit v1.1