From c09cd7512491cee1e82c1ad8128ce9fd4bc3f79b Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Fri, 22 Sep 2017 23:32:28 +0200 Subject: Initial modularization with both Clang and VC hacks Note: gave up on VC about half way though. --- libbutl/char-scanner.mxx | 141 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 libbutl/char-scanner.mxx (limited to 'libbutl/char-scanner.mxx') diff --git a/libbutl/char-scanner.mxx b/libbutl/char-scanner.mxx new file mode 100644 index 0000000..af4dad9 --- /dev/null +++ b/libbutl/char-scanner.mxx @@ -0,0 +1,141 @@ +// file : libbutl/char-scanner.mxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef __cpp_modules +#pragma once +#endif + +// C includes. + +#ifndef __cpp_lib_modules +#include // char_traits +#include // uint64_t +#include +#endif + +// Other includes. + +#ifdef __cpp_modules +export module butl.char_scanner; +#ifdef __cpp_lib_modules +import std.core; +import std.io; +#endif +import butl.fdstream; +#else +#include +#endif + +#include + +LIBBUTL_MODEXPORT namespace butl +{ + // Low-level character stream scanner. Normally used as a base for + // higher-level lexers. + // + class LIBBUTL_SYMEXPORT char_scanner + { + public: + // If the crlf argument is true, then recognize Windows newlines (0x0D + // 0x0A) and convert them to just '\n' (0x0A). Note that a standalone + // 0x0D is treated "as if" it was followed by 0x0A. + // + // Note also that if the stream happens to be ifdstream, then it includes + // a number of optimizations that assume nobody else is messing with the + // stream. + // + char_scanner (std::istream& is, bool crlf = true); + + char_scanner (const char_scanner&) = delete; + char_scanner& operator= (const char_scanner&) = delete; + + // Scanner interface. + // + public: + + // Extended character. It includes line/column information and is capable + // of representing EOF. + // + // Note that implicit conversion of EOF to char_type results in NUL + // character (which means in most cases it is safe to compare xchar to + // char without checking for EOF). + // + class xchar + { + public: + using traits_type = std::char_traits; + using int_type = traits_type::int_type; + using char_type = traits_type::char_type; + + int_type value; + std::uint64_t line; + std::uint64_t column; + + operator char_type () const + { + return value != traits_type::eof () + ? static_cast (value) + : char_type (0); + } + + xchar (int_type v, std::uint64_t l = 0, std::uint64_t c = 0) + : value (v), line (l), column (c) {} + }; + + xchar + get (); + + void + get (const xchar& peeked); // Get previously peeked character (faster). + + void + unget (const xchar&); + + // Note that if there is an "ungot" character, peek() will return + // that. + // + xchar + peek (); + + // Tests. In the future we can add tests line alpha(), alnum(), + // etc. + // + static bool + eos (const xchar& c) {return c.value == xchar::traits_type::eof ();} + + // Line and column of the next character to be extracted from the stream + // by peek() or get(). + // + std::uint64_t line = 1; + std::uint64_t column = 1; + + protected: + using int_type = xchar::int_type; + using char_type = xchar::char_type; + + int_type + peek_ (); + + void + get_ (); + + protected: + std::istream& is_; + + fdbuf* buf_; // NULL if not ifdstream. + const char_type* gptr_; + const char_type* egptr_; + + bool crlf_; + bool eos_ = false; + + bool unget_ = false; + bool unpeek_ = false; + + xchar ungetc_ = '\0'; + xchar unpeekc_ = '\0'; + }; +} + +#include -- cgit v1.1