aboutsummaryrefslogtreecommitdiff
path: root/tests/manifest-parser
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2020-02-26 17:16:45 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2020-02-26 17:17:49 +0300
commit5ae9686adac1508873f2d980e84becd3496244c2 (patch)
treed7c88e678b29ed6bb7ae30b74bd01aa2b5d2e9a8 /tests/manifest-parser
parentafb726d2d59b3715960a8647738860f40e37cf4f (diff)
Add notion of validator to char_scanner and make sure manifest is UTF-8
This involves implementing utf8_validator and UTF-8 utility functions and using them during the manifest parsing, serialization, and rewriting.
Diffstat (limited to 'tests/manifest-parser')
-rw-r--r--tests/manifest-parser/driver.cxx40
1 files changed, 39 insertions, 1 deletions
diff --git a/tests/manifest-parser/driver.cxx b/tests/manifest-parser/driver.cxx
index 57674cb..a34f2b7 100644
--- a/tests/manifest-parser/driver.cxx
+++ b/tests/manifest-parser/driver.cxx
@@ -40,6 +40,9 @@ namespace butl
static bool
equal (const optional<pairs>& actual, const optional<pairs>& expected);
+ static pairs
+ parse (const char* m, manifest_parser::filter_function f = {});
+
// Test manifest as it is represented in the stream, including format
// version and end-of-manifest values.
//
@@ -188,6 +191,41 @@ namespace butl
assert (p.first == "" && p.second == "comment");
}
+ // UTF-8.
+ //
+ assert (test (":1\n#\xD0\xB0\n\xD0\xB0y\xD0\xB0:\xD0\xB0z\xD0\xB0",
+ {{"","1"},
+ {"\xD0\xB0y\xD0\xB0", "\xD0\xB0z\xD0\xB0"},
+ {"",""},
+ {"",""}}));
+
+ assert (fail (":1\n#\xD0\n\xD0\xB0y\xD0\xB0:\xD0\xB0z\xD0\xB0"));
+ assert (fail (":1\n#\xD0\xB0\n\xB0y\xD0\xB0:\xD0\xB0z\xD0\xB0"));
+ assert (fail (":1\n#\xD0\xB0\n\xD0y\xD0\xB0:\xD0\xB0z\xD0\xB0"));
+ assert (fail (":1\n#\xD0\xB0\n\xD0\xB0y\xD0:\xD0\xB0z\xD0\xB0"));
+ assert (fail (":1\n#\xD0\xB0\n\xD0\xB0y\xD0\xB0:\xD0z\xD0\xB0"));
+ assert (fail (":1\n#\xD0\xB0\n\xD0\xB0y\xD0\xB0:\xD0\xB0z\xD0"));
+ assert (fail (":1\r\r\xB0#\n\xD0\xB0y\xD0\xB0:\xD0\xB0z\xD0\xB0"));
+ assert (fail (":1\r\xD0#\n\xD0\xB0y\xD0\xB0:\xD0\xB0z\xD0\xB0"));
+ assert (fail (":1\n#\xD0\xB0\n\xD0\xB0y\xD0\xB0:\xD0\xB0z\xD0\xB0\r\xD0"));
+
+ // Test parsing failure for manifest with multi-byte UTF-8 sequences
+ // (the column is properly reported, etc).
+ //
+ try
+ {
+ parse (":1\na\xD0\xB0\xD0\xB0\xFE");
+ assert (false);
+ }
+ catch (const manifest_parsing& e)
+ {
+ assert (e.line == 2 &&
+ e.column == 4 &&
+ e.description ==
+ "invalid manifest name: "
+ "invalid UTF-8 sequence first byte (0xFE)");
+ }
+
// Filtering.
//
assert (test (":1\na: abc\nb: bca\nc: cab",
@@ -281,7 +319,7 @@ namespace butl
}
static pairs
- parse (const char* m, manifest_parser::filter_function f = {})
+ parse (const char* m, manifest_parser::filter_function f)
{
istringstream is (m);
is.exceptions (istream::failbit | istream::badbit);