diff options
author | Karen Arutyunov <karen@codesynthesis.com> | 2021-09-28 19:24:31 +0300 |
---|---|---|
committer | Karen Arutyunov <karen@codesynthesis.com> | 2021-09-28 20:29:59 +0300 |
commit | df1ef68cd8e8582724ce1192bfc202e0b9aeaf0c (patch) | |
tree | b731ca4c68e60c00c7e7d499dbf4868ee7b71f44 /libbutl/manifest-parser.hxx | |
parent | 7a4fc37f264cdb67f2f83fa92703c869215bbc86 (diff) |
Get rid of C++ modules related code and rename *.mxx files to *.hxx
Diffstat (limited to 'libbutl/manifest-parser.hxx')
-rw-r--r-- | libbutl/manifest-parser.hxx | 160 |
1 files changed, 160 insertions, 0 deletions
diff --git a/libbutl/manifest-parser.hxx b/libbutl/manifest-parser.hxx new file mode 100644 index 0000000..d53eb42 --- /dev/null +++ b/libbutl/manifest-parser.hxx @@ -0,0 +1,160 @@ +// file : libbutl/manifest-parser.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include <string> +#include <vector> +#include <iosfwd> +#include <cstdint> // uint64_t +#include <utility> // pair, move() +#include <stdexcept> // runtime_error +#include <functional> + +#include <libbutl/utf8.hxx> +#include <libbutl/optional.hxx> +#include <libbutl/char-scanner.hxx> +#include <libbutl/manifest-types.hxx> + +#include <libbutl/export.hxx> + +namespace butl +{ + class LIBBUTL_SYMEXPORT manifest_parsing: public std::runtime_error + { + public: + manifest_parsing (const std::string& name, + std::uint64_t line, + std::uint64_t column, + const std::string& description); + + manifest_parsing (const std::string& description); + + std::string name; + std::uint64_t line; + std::uint64_t column; + std::string description; + }; + + class LIBBUTL_SYMEXPORT manifest_parser: + protected char_scanner<utf8_validator> + { + public: + // The filter, if specified, is called by next() prior to returning the + // pair to the caller. If the filter returns false, then the pair is + // discarded. + // + // Note that the filter should handle the end-of-manifest pairs (see + // below) carefully, so next() doesn't end up with an infinite cycle. + // + using filter_function = bool (manifest_name_value&); + + manifest_parser (std::istream& is, + const std::string& name, + std::function<filter_function> filter = {}) + : char_scanner (is, + utf8_validator (codepoint_types::graphic, U"\n\r\t")), + name_ (name), + filter_ (std::move (filter)) {} + + const std::string& + name () const {return name_;} + + // The first returned pair is special "start-of-manifest" with empty name + // and value being the format version: {"", "<ver>"}. After that we have a + // sequence of ordinary pairs which are the manifest. At the end of the + // manifest we have the special "end-of-manifest" pair with empty name and + // value: {"", ""}. After that we can either get another start-of-manifest + // pair (in which case the whole sequence repeats from the beginning) or + // we get another end-of-manifest-like pair which signals the end of + // stream (aka EOF) and which we will call the end-of-stream pair. To put + // it another way, the parse sequence always has the following form: + // + // ({"", "<ver>"} {"<name>", "<value>"}* {"", ""})* {"", ""} + // + manifest_name_value + next (); + + // Split the manifest value, optionally followed by ';' character and a + // comment into the value/comment pair. Note that ';' characters in the + // value must be escaped by the backslash. + // + static std::pair<std::string, std::string> + split_comment (const std::string&); + + private: + using base = char_scanner<utf8_validator>; + + void + parse_next (manifest_name_value&); + + void + parse_name (manifest_name_value&); + + void + parse_value (manifest_name_value&); + + // Skip spaces and return the first peeked non-space character and the + // starting position of the line it belongs to. If the later is not + // available (skipped spaces are all in the middle of a line, we are at + // eos, etc.), then fallback to the first peeked character position. + // + std::pair<xchar, std::uint64_t> + skip_spaces (); + + // As base::get() but in case of an invalid character throws + // manifest_parsing. + // + xchar + get (const char* what); + + // Get previously peeked character (faster). + // + void + get (const xchar&); + + // As base::peek() but in case of an invalid character throws + // manifest_parsing. + // + xchar + peek (const char* what); + + private: + const std::string name_; + const std::function<filter_function> filter_; + + enum {start, body, end} s_ = start; + std::string version_; // Current format version. + + // Buffer for a get()/peek() potential error. + // + std::string ebuf_; + }; + + // Parse and return a single manifest. Throw manifest_parsing in case of an + // error. + // + // Note that the returned manifest doesn't contain the format version nor + // the end-of-manifest/stream pairs. + // + LIBBUTL_SYMEXPORT std::vector<manifest_name_value> + parse_manifest (manifest_parser&); + + // As above but append the manifest values to an existing list. + // + LIBBUTL_SYMEXPORT void + parse_manifest (manifest_parser&, std::vector<manifest_name_value>&); + + // As above but return nullopt if eos is reached before reading any values. + // + LIBBUTL_SYMEXPORT optional<std::vector<manifest_name_value>> + try_parse_manifest (manifest_parser&); + + // As above but append the manifest values to an existing list returning + // false if eos is reached before reading any values. + // + LIBBUTL_SYMEXPORT bool + try_parse_manifest (manifest_parser&, std::vector<manifest_name_value>&); +} + +#include <libbutl/manifest-parser.ixx> |