From 5ae9686adac1508873f2d980e84becd3496244c2 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Wed, 26 Feb 2020 17:16:45 +0300 Subject: Add notion of validator to char_scanner and make sure manifest is UTF-8 This involves implementing utf8_validator and UTF-8 utility functions and using them during the manifest parsing, serialization, and rewriting. --- libbutl/char-scanner.cxx | 126 ----------------------------------------------- 1 file changed, 126 deletions(-) delete mode 100644 libbutl/char-scanner.cxx (limited to 'libbutl/char-scanner.cxx') diff --git a/libbutl/char-scanner.cxx b/libbutl/char-scanner.cxx deleted file mode 100644 index 85416e5..0000000 --- a/libbutl/char-scanner.cxx +++ /dev/null @@ -1,126 +0,0 @@ -// file : libbutl/char-scanner.cxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#include -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include // char_traits -#include // uint64_t -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.char_scanner; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.fdstream; -#endif - -#endif - -using namespace std; - -namespace butl -{ - char_scanner:: - char_scanner (istream& is, bool crlf, uint64_t l, uint64_t p) - : line (l), - column (1), - position (p), - is_ (is), - buf_ (dynamic_cast (is.rdbuf ())), - gptr_ (nullptr), - egptr_ (nullptr), - crlf_ (crlf) - { - } - - auto char_scanner:: - peek () -> xchar - { - if (unget_) - return ungetc_; - - if (unpeek_) - return unpeekc_; - - if (eos_) - return xchar (xchar::traits_type::eof (), line, column, position); - - int_type v (peek_ ()); - - if (v == xchar::traits_type::eof ()) - eos_ = true; - else if (crlf_ && v == '\r') - { - int_type v1; - do - { - get_ (); - v1 = peek_ (); - } - while (v1 == '\r'); - - if (v1 != '\n') - { - // We need to make sure subsequent calls to peek() return newline. - // - unpeek_ = true; - unpeekc_ = xchar ('\n', line, column, position); - - if (v1 == xchar::traits_type::eof ()) - eos_ = true; - } - - v = '\n'; - } - - return xchar (v, line, column, position); - } - - void char_scanner:: - get (const xchar& c) - { - if (unget_) - unget_ = false; - else - { - if (unpeek_) - { - unpeek_ = false; - } - // When is_.get () returns eof, the failbit is also set (stupid, - // isn't?) which may trigger an exception. To work around this - // we will call peek() first and only call get() if it is not - // eof. But we can only call peek() on eof once; any subsequent - // calls will spoil the failbit (even more stupid). - // - else if (!eos (c)) - get_ (); - - if (!eos (c)) - { - if (c == '\n') - { - line++; - column = 1; - } - else - column++; - - position = pos_ (); - } - } - } -} -- cgit v1.1