diff options
author | Karen Arutyunov <karen@codesynthesis.com> | 2020-02-26 17:16:45 +0300 |
---|---|---|
committer | Karen Arutyunov <karen@codesynthesis.com> | 2020-02-26 17:17:49 +0300 |
commit | 5ae9686adac1508873f2d980e84becd3496244c2 (patch) | |
tree | d7c88e678b29ed6bb7ae30b74bd01aa2b5d2e9a8 /libbutl/manifest-rewriter.cxx | |
parent | afb726d2d59b3715960a8647738860f40e37cf4f (diff) |
Add notion of validator to char_scanner and make sure manifest is UTF-8
This involves implementing utf8_validator and UTF-8 utility functions and
using them during the manifest parsing, serialization, and rewriting.
Diffstat (limited to 'libbutl/manifest-rewriter.cxx')
-rw-r--r-- | libbutl/manifest-rewriter.cxx | 22 |
1 files changed, 19 insertions, 3 deletions
diff --git a/libbutl/manifest-rewriter.cxx b/libbutl/manifest-rewriter.cxx index ba0c866..e38d5f4 100644 --- a/libbutl/manifest-rewriter.cxx +++ b/libbutl/manifest-rewriter.cxx @@ -30,8 +30,10 @@ import butl.fdstream; import butl.manifest_types; #endif +import butl.utility; // utf8_length() import butl.manifest_serializer; #else +#include <libbutl/utility.mxx> #include <libbutl/manifest-serializer.mxx> #endif @@ -101,8 +103,16 @@ namespace butl manifest_serializer s (os, path_.string (), long_lines_); + // Note that the name can be surrounded with the ASCII whitespace + // characters and the start_pos refers to the first character in the + // line. + // + // Also note that we assume the already serialized name to be a valid + // UTF-8 byte string and so utf8_length() may not throw. + // s.write_value (nv.value, - static_cast<size_t> (nv.colon_pos - nv.start_pos + 2)); + static_cast<size_t> (nv.colon_pos - nv.start_pos) - + (nv.name.size () - utf8_length (nv.name)) + 2); } os << suffix; @@ -128,15 +138,21 @@ namespace butl os << '\n'; manifest_serializer s (os, path_.string (), long_lines_); - s.write_name (nv.name); + size_t n (s.write_name (nv.name)); os << ':'; if (!nv.value.empty ()) { os << ' '; + + // Note that the name can be surrounded with the ASCII whitespace + // characters and the start_pos refers to the first character in the + // line. + // s.write_value (nv.value, - static_cast<size_t> (nv.colon_pos - nv.start_pos + 2)); + static_cast<size_t> (nv.colon_pos - nv.start_pos) - + (nv.name.size () - n) + 2); } os << suffix; |