diff options
author | Karen Arutyunov <karen@codesynthesis.com> | 2020-02-26 17:16:45 +0300 |
---|---|---|
committer | Karen Arutyunov <karen@codesynthesis.com> | 2020-02-26 17:17:49 +0300 |
commit | 5ae9686adac1508873f2d980e84becd3496244c2 (patch) | |
tree | d7c88e678b29ed6bb7ae30b74bd01aa2b5d2e9a8 /libbutl/manifest-parser.mxx | |
parent | afb726d2d59b3715960a8647738860f40e37cf4f (diff) |
Add notion of validator to char_scanner and make sure manifest is UTF-8
This involves implementing utf8_validator and UTF-8 utility functions and
using them during the manifest parsing, serialization, and rewriting.
Diffstat (limited to 'libbutl/manifest-parser.mxx')
-rw-r--r-- | libbutl/manifest-parser.mxx | 33 |
1 files changed, 31 insertions, 2 deletions
diff --git a/libbutl/manifest-parser.mxx b/libbutl/manifest-parser.mxx index adf6181..77addff 100644 --- a/libbutl/manifest-parser.mxx +++ b/libbutl/manifest-parser.mxx @@ -25,10 +25,12 @@ export module butl.manifest_parser; import std.core; import std.io; #endif +import butl.utf8; import butl.optional; import butl.char_scanner; import butl.manifest_types; #else +#include <libbutl/utf8.mxx> #include <libbutl/optional.mxx> #include <libbutl/char-scanner.mxx> #include <libbutl/manifest-types.mxx> @@ -54,7 +56,8 @@ LIBBUTL_MODEXPORT namespace butl std::string description; }; - class LIBBUTL_SYMEXPORT manifest_parser: protected butl::char_scanner + class LIBBUTL_SYMEXPORT manifest_parser: + protected char_scanner<utf8_validator> { public: // The filter, if specified, is called by next() prior to returning the @@ -69,7 +72,10 @@ LIBBUTL_MODEXPORT namespace butl manifest_parser (std::istream& is, const std::string& name, std::function<filter_function> filter = {}) - : char_scanner (is), name_ (name), filter_ (std::move (filter)) {} + : char_scanner (is, + utf8_validator (codepoint_types::graphic, U"\n\r\t")), + name_ (name), + filter_ (std::move (filter)) {} const std::string& name () const {return name_;} @@ -97,6 +103,8 @@ LIBBUTL_MODEXPORT namespace butl split_comment (const std::string&); private: + using base = char_scanner<utf8_validator>; + void parse_next (manifest_name_value&); @@ -114,12 +122,33 @@ LIBBUTL_MODEXPORT namespace butl std::pair<xchar, std::uint64_t> skip_spaces (); + // As base::get() but in case of an invalid character throws + // manifest_parsing. + // + xchar + get (const char* what); + + // Get previously peeked character (faster). + // + void + get (const xchar&); + + // As base::peek() but in case of an invalid character throws + // manifest_parsing. + // + xchar + peek (const char* what); + private: const std::string name_; const std::function<filter_function> filter_; enum {start, body, end} s_ = start; std::string version_; // Current format version. + + // Buffer for a get()/peek() potential error. + // + std::string ebuf_; }; // Parse and return a single manifest. Throw manifest_parsing in case of an |