diff options
Diffstat (limited to 'butl')
-rw-r--r-- | butl/buildfile | 2 | ||||
-rw-r--r-- | butl/char-scanner | 82 | ||||
-rw-r--r-- | butl/char-scanner.cxx | 78 |
3 files changed, 161 insertions, 1 deletions
diff --git a/butl/buildfile b/butl/buildfile index e9fc900..a1b5a32 100644 --- a/butl/buildfile +++ b/butl/buildfile @@ -2,6 +2,6 @@ # copyright : Copyright (c) 2014-2015 Code Synthesis Ltd # license : MIT; see accompanying LICENSE file -lib{butl}: cxx{fdstream filesystem path process timestamp} +lib{butl}: cxx{char-scanner fdstream filesystem path process timestamp} cxx.poptions += -I$src_root lib{butl}: cxx.export.poptions = -I$src_root diff --git a/butl/char-scanner b/butl/char-scanner new file mode 100644 index 0000000..3c8cdbe --- /dev/null +++ b/butl/char-scanner @@ -0,0 +1,82 @@ +// file : butl/char-scanner -*- C++ -*- +// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef BUTL_CHAR_SCANNER +#define BUTL_CHAR_SCANNER + +#include <string> // char_traits +#include <iosfwd> +#include <cstdint> // uint64_t + +namespace butl +{ + // Low-level character stream scanner. Normally used as a base for + // higher-level lexers. + // + class char_scanner + { + public: + char_scanner (std::istream& is): is_ (is) {} + + char_scanner (const char_scanner&) = delete; + char_scanner& operator= (const char_scanner&) = delete; + + // Scanner interface. + // + public: + + // Extended character. It includes line/column information + // and is capable of representing EOF. + // + class xchar + { + public: + typedef std::char_traits<char> traits_type; + typedef traits_type::int_type int_type; + typedef traits_type::char_type char_type; + + int_type value; + std::uint64_t line; + std::uint64_t column; + + operator char_type () const {return static_cast<char_type> (value);} + + xchar (int_type v, std::uint64_t l = 0, std::uint64_t c = 0) + : value (v), line (l), column (c) {} + }; + + xchar + get (); + + void + unget (const xchar&); + + // Note that if there is an "ungot" character, peek() will return + // that. + // + xchar + peek (); + + // Tests. In the future we can add tests line alpha(), alnum(), + // etc. + // + static bool + eos (const xchar& c) {return c.value == xchar::traits_type::eof ();} + + // Line and column of the furthest seen (either via get() or + // peek()) character. + // + std::uint64_t line {1}; + std::uint64_t column {1}; + + protected: + std::istream& is_; + + bool unget_ {false}; + xchar buf_ = '\0'; + bool eos_ {false}; + }; +} + +#endif // BUTL_CHAR_SCANNER diff --git a/butl/char-scanner.cxx b/butl/char-scanner.cxx new file mode 100644 index 0000000..1561ad1 --- /dev/null +++ b/butl/char-scanner.cxx @@ -0,0 +1,78 @@ +// file : butl/char-scanner.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <butl/char-scanner> + +#include <istream> + +using namespace std; + +namespace butl +{ + auto char_scanner:: + peek () -> xchar + { + if (unget_) + return buf_; + else + { + if (eos_) + return xchar (xchar::traits_type::eof (), line, column); + else + { + xchar::int_type v (is_.peek ()); + + if (v == xchar::traits_type::eof ()) + eos_ = true; + + return xchar (v, line, column); + } + } + } + + auto char_scanner:: + get () -> xchar + { + if (unget_) + { + unget_ = false; + return buf_; + } + else + { + // When is_.get () returns eof, the failbit is also set (stupid, + // isn't?) which may trigger an exception. To work around this + // we will call peek() first and only call get() if it is not + // eof. But we can only call peek() on eof once; any subsequent + // calls will spoil the failbit (even more stupid). + // + xchar c (peek ()); + + if (!eos (c)) + { + is_.get (); + + if (c == '\n') + { + line++; + column = 1; + } + else + column++; + } + + return c; + } + } + + void char_scanner:: + unget (const xchar& c) + { + // Because iostream::unget cannot work once eos is reached, + // we have to provide our own implementation. + // + buf_ = c; + unget_ = true; + } +} |