diff options
author | Karen Arutyunov <karen@codesynthesis.com> | 2018-12-15 17:23:37 +0300 |
---|---|---|
committer | Karen Arutyunov <karen@codesynthesis.com> | 2018-12-15 17:27:56 +0300 |
commit | 3bf1846063ad30ecc0fc90d34490bf70776faef0 (patch) | |
tree | 8a2a248be187671f8d9fd75d3367bfb9de782e45 | |
parent | 5bff24a8862f61e40f827591be5c81228efab4c6 (diff) |
Add manifest_rewriter class
-rw-r--r-- | libbutl/char-scanner.cxx | 25 | ||||
-rw-r--r-- | libbutl/char-scanner.ixx | 6 | ||||
-rw-r--r-- | libbutl/char-scanner.mxx | 34 | ||||
-rw-r--r-- | libbutl/manifest-forward.hxx | 4 | ||||
-rw-r--r-- | libbutl/manifest-parser.cxx | 33 | ||||
-rw-r--r-- | libbutl/manifest-parser.mxx | 25 | ||||
-rw-r--r-- | libbutl/manifest-rewriter.cxx | 144 | ||||
-rw-r--r-- | libbutl/manifest-rewriter.mxx | 75 | ||||
-rw-r--r-- | libbutl/manifest-serializer.cxx | 107 | ||||
-rw-r--r-- | libbutl/manifest-serializer.mxx | 23 | ||||
-rw-r--r-- | libbutl/manifest-types.mxx | 49 | ||||
-rw-r--r-- | tests/manifest-rewriter/buildfile | 8 | ||||
-rw-r--r-- | tests/manifest-rewriter/driver.cxx | 185 |
13 files changed, 619 insertions, 99 deletions
diff --git a/libbutl/char-scanner.cxx b/libbutl/char-scanner.cxx index aa9e1b7..763a3cb 100644 --- a/libbutl/char-scanner.cxx +++ b/libbutl/char-scanner.cxx @@ -35,9 +35,10 @@ using namespace std; namespace butl { char_scanner:: - char_scanner (istream& is, bool crlf, uint64_t l) + char_scanner (istream& is, bool crlf, uint64_t l, uint64_t p) : line (l), column (1), + position (p), is_ (is), buf_ (dynamic_cast<fdbuf*> (is.rdbuf ())), gptr_ (nullptr), @@ -56,7 +57,7 @@ namespace butl return unpeekc_; if (eos_) - return xchar (xchar::traits_type::eof (), line, column); + return xchar (xchar::traits_type::eof (), line, column, position); int_type v (peek_ ()); @@ -77,13 +78,16 @@ namespace butl // We need to make sure subsequent calls to peek() return newline. // unpeek_ = true; - unpeekc_ = xchar ('\n', line, column); + unpeekc_ = xchar ('\n', line, column, position); + + if (v1 == xchar::traits_type::eof ()) + eos_ = true; } v = '\n'; } - return xchar (v, line, column); + return xchar (v, line, column, position); } void char_scanner:: @@ -91,20 +95,23 @@ namespace butl { if (unget_) unget_ = false; - else if (unpeek_) - unpeek_ = false; else { + if (unpeek_) + { + unpeek_ = false; + } // When is_.get () returns eof, the failbit is also set (stupid, // isn't?) which may trigger an exception. To work around this // we will call peek() first and only call get() if it is not // eof. But we can only call peek() on eof once; any subsequent // calls will spoil the failbit (even more stupid). // - if (!eos (c)) - { + else if (!eos (c)) get_ (); + if (!eos (c)) + { if (c == '\n') { line++; @@ -112,6 +119,8 @@ namespace butl } else column++; + + position = pos_ (); } } } diff --git a/libbutl/char-scanner.ixx b/libbutl/char-scanner.ixx index a33cf2b..ce57a05 100644 --- a/libbutl/char-scanner.ixx +++ b/libbutl/char-scanner.ixx @@ -65,4 +65,10 @@ namespace butl if (save_ != nullptr && c != xchar::traits_type::eof ()) save_->push_back (static_cast<char_type> (c)); } + + inline std::uint64_t char_scanner:: + pos_ () const + { + return buf_ != nullptr ? buf_->tellg () : 0; + } } diff --git a/libbutl/char-scanner.mxx b/libbutl/char-scanner.mxx index 58d8ebc..2cd4487 100644 --- a/libbutl/char-scanner.mxx +++ b/libbutl/char-scanner.mxx @@ -46,10 +46,14 @@ LIBBUTL_MODEXPORT namespace butl // a number of optimizations that assume nobody else is messing with the // stream. // - // The line argument can be used to override the start line in the stream - // (useful when re-scanning data saved with the save_* facility). + // The line and position arguments can be used to override the start line + // and position in the stream (useful when re-scanning data saved with the + // save_* facility). // - char_scanner (std::istream& is, bool crlf = true, std::uint64_t line = 1); + char_scanner (std::istream& is, + bool crlf = true, + std::uint64_t line = 1, + std::uint64_t position = 0); char_scanner (const char_scanner&) = delete; char_scanner& operator= (const char_scanner&) = delete; @@ -58,8 +62,8 @@ LIBBUTL_MODEXPORT namespace butl // public: - // Extended character. It includes line/column information and is capable - // of representing EOF. + // Extended character. It includes line/column/position information and is + // capable of representing EOF. // // Note that implicit conversion of EOF to char_type results in NUL // character (which means in most cases it is safe to compare xchar to @@ -76,6 +80,11 @@ LIBBUTL_MODEXPORT namespace butl std::uint64_t line; std::uint64_t column; + // Logical character position (see ifdstream for details on the logical + // part) if the scanned stream is ifdstream and always zero otherwise. + // + std::uint64_t position; + operator char_type () const { return value != traits_type::eof () @@ -83,8 +92,11 @@ LIBBUTL_MODEXPORT namespace butl : char_type (0); } - xchar (int_type v, std::uint64_t l = 0, std::uint64_t c = 0) - : value (v), line (l), column (c) {} + xchar (int_type v, + std::uint64_t l = 0, + std::uint64_t c = 0, + std::uint64_t p = 0) + : value (v), line (l), column (c), position (p) {} }; xchar @@ -108,11 +120,12 @@ LIBBUTL_MODEXPORT namespace butl static bool eos (const xchar& c) {return c.value == xchar::traits_type::eof ();} - // Line and column of the next character to be extracted from the stream - // by peek() or get(). + // Line, column and position of the next character to be extracted from + // the stream by peek() or get(). // std::uint64_t line; std::uint64_t column; + std::uint64_t position; // Ability to save raw data as it is being scanned. Note that the // character is only saved when it is got, not peeked. @@ -156,6 +169,9 @@ LIBBUTL_MODEXPORT namespace butl void get_ (); + std::uint64_t + pos_ () const; + protected: std::istream& is_; diff --git a/libbutl/manifest-forward.hxx b/libbutl/manifest-forward.hxx index babcb4b..84162cc 100644 --- a/libbutl/manifest-forward.hxx +++ b/libbutl/manifest-forward.hxx @@ -10,10 +10,10 @@ namespace butl class manifest_serializer; class manifest_name_value; - // The way manifest implementation should proceed when unknown value name is + // The way manifest implementation should proceed when unknown value is // encountered during parsing. // - enum class unknown_name_mode + enum class manifest_unknown_mode { skip, stop, diff --git a/libbutl/manifest-parser.cxx b/libbutl/manifest-parser.cxx index f2567f3..20932e3 100644 --- a/libbutl/manifest-parser.cxx +++ b/libbutl/manifest-parser.cxx @@ -29,6 +29,7 @@ import std.core; import std.io; #endif import butl.char_scanner; +import butl.manifest_types; #endif #endif @@ -44,9 +45,12 @@ namespace butl next () { if (s_ == end) - return name_value {"", "", line, column, line, column}; + return name_value { + "", "", line, column, line, column, position, position, position}; - xchar c (skip_spaces ()); + auto clp (skip_spaces ()); + xchar c (clp.first); + uint64_t start_pos (clp.second); // Here is the problem: if we are in the 'body' state (that is, // we are parsing inside the manifest) and we see the special @@ -62,13 +66,18 @@ namespace butl if (s_ == body && c == ':') { s_ = start; - return name_value {"", "", c.line, c.column, c.line, c.column}; + + return name_value {"", "", + c.line, c.column, c.line, c.column, + start_pos, c.position, c.position}; } // Regardless of the state, what should come next is a name, // potentially the special empty one. // name_value r; + r.start_pos = start_pos; + parse_name (r); skip_spaces (); @@ -87,12 +96,16 @@ namespace butl // r.value_line = r.name_line; r.value_column = r.name_column; + r.colon_pos = r.start_pos; + r.end_pos = r.start_pos; return r; } if (c != ':') throw parsing (name_, c.line, c.column, "':' expected after name"); + r.colon_pos = c.position; + skip_spaces (); parse_value (r); @@ -102,6 +115,8 @@ namespace butl // assert (c == '\n' || eos (c)); + r.end_pos = c.position; + if (c == '\n') get (); @@ -384,11 +399,12 @@ namespace butl v.resize (n); } - manifest_parser::xchar manifest_parser:: + pair<manifest_parser::xchar, uint64_t> manifest_parser:: skip_spaces () { xchar c (peek ()); bool start (c.column == 1); + uint64_t lp (c.position); for (; !eos (c); c = peek ()) { @@ -402,8 +418,9 @@ namespace butl // Skip empty lines. // if (!start) - return c; + return make_pair (c, lp); + lp = c.position + 1; break; } case '#': @@ -412,7 +429,7 @@ namespace butl // of the line (sans leading spaces). // if (!start) - return c; + return make_pair (c, lp); get (); @@ -424,13 +441,13 @@ namespace butl continue; } default: - return c; // Not a space. + return make_pair (c, lp); // Not a space. } get (); } - return c; + return make_pair (c, lp); } // manifest_parsing diff --git a/libbutl/manifest-parser.mxx b/libbutl/manifest-parser.mxx index 4af7dba..7fc4ee3 100644 --- a/libbutl/manifest-parser.mxx +++ b/libbutl/manifest-parser.mxx @@ -25,8 +25,10 @@ import std.core; import std.io; #endif import butl.char_scanner; +import butl.manifest_types; #else #include <libbutl/char-scanner.mxx> +#include <libbutl/manifest-types.mxx> #endif #include <libbutl/export.hxx> @@ -47,22 +49,6 @@ LIBBUTL_MODEXPORT namespace butl std::string description; }; - class manifest_name_value - { - public: - std::string name; - std::string value; - - std::uint64_t name_line; - std::uint64_t name_column; - - std::uint64_t value_line; - std::uint64_t value_column; - - bool - empty () const {return name.empty () && value.empty ();} - }; - class LIBBUTL_SYMEXPORT manifest_parser: protected butl::char_scanner { public: @@ -102,9 +88,12 @@ LIBBUTL_MODEXPORT namespace butl void parse_value (manifest_name_value&); - // Skip spaces and return the first peeked non-space character. + // Skip spaces and return the first peeked non-space character and the + // starting position of the line it belongs to. If the later is not + // available (skipped spaces are all in the middle of a line, we are at + // eos, etc.), then fallback to the first peeked character position. // - xchar + std::pair<xchar, std::uint64_t> skip_spaces (); private: diff --git a/libbutl/manifest-rewriter.cxx b/libbutl/manifest-rewriter.cxx new file mode 100644 index 0000000..1e1c3d5 --- /dev/null +++ b/libbutl/manifest-rewriter.cxx @@ -0,0 +1,144 @@ +// file : libbutl/manifest-rewriter.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2018 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef __cpp_modules +#include <libbutl/manifest-rewriter.mxx> +#endif + +#include <cassert> + +// C includes. + +#ifndef __cpp_lib_modules +#include <string> +#include <cstdint> // uint64_t +#endif + +// Other includes. + +#ifdef __cpp_modules +module butl.manifest_rewriter; + +// Only imports additional to interface. +#ifdef __clang__ +#ifdef __cpp_lib_modules +import std.core; +#endif +import butl.path; +import butl.fdstream; +import butl.manifest_types; +#endif + +import butl.manifest_serializer; +#else +#include <libbutl/manifest-serializer.mxx> +#endif + +using namespace std; + +namespace butl +{ + manifest_rewriter:: + manifest_rewriter (path p) + : path_ (move (p)), + fd_ (fdopen (path_, + fdopen_mode::in | + fdopen_mode::out | + fdopen_mode::exclusive)) + { + } + + // Seek the file descriptor to the specified logical position and truncate + // the file. Return the file suffix (cached prior to truncating) starting + // from the specified position. + // + static string + truncate (auto_fd& fd, uint64_t pos, uint64_t suffix_pos) + { + string r; + { + // Temporary move the descriptor into the stream. + // + ifdstream is (move (fd)); + fdbuf& buf (static_cast<fdbuf&> (*is.rdbuf ())); + + // Read suffix. + // + buf.seekg (suffix_pos); + r = is.read_text (); + + // Seek to the specified position and move the file descriptor back. + // + buf.seekg (pos); + fd = is.release (); + } + + // Truncate the file starting from the current position. Note that we need + // to use the physical position rather than logical. + // + fdtruncate (fd.get (), fdseek (fd.get (), 0, fdseek_mode::cur)); + return r; + } + + void manifest_rewriter:: + replace (const manifest_name_value& nv) + { + assert (nv.colon_pos != 0); // Sanity check. + + // Truncate right after the value colon. + // + string suffix (truncate (fd_, nv.colon_pos + 1, nv.end_pos)); + + // Temporary move the descriptor into the stream. + // + ofdstream os (move (fd_)); + + if (!nv.value.empty ()) + { + os << ' '; + + manifest_serializer s (os, path_.string ()); + s.write_value (nv.value, nv.colon_pos - nv.start_pos + 2); + } + + os << suffix; + + // Move the file descriptor back. + // + fd_ = os.release (); // Note: flushes the buffer. + } + + void manifest_rewriter:: + insert (const manifest_name_value& pos, const manifest_name_value& nv) + { + assert (pos.end_pos != 0); // Sanity check. + + // We could have just started writing over the suffix but the truncation + // doesn't hurt. + // + string suffix (truncate (fd_, pos.end_pos, pos.end_pos)); + + // Temporary move the descriptor into the stream. + // + ofdstream os (move (fd_)); + os << '\n'; + + manifest_serializer s (os, path_.string ()); + s.write_name (nv.name); + + os << ':'; + + if (!nv.value.empty ()) + { + os << ' '; + s.write_value (nv.value, nv.colon_pos - nv.start_pos + 2); + } + + os << suffix; + + // Move the file descriptor back. + // + fd_ = os.release (); // Note: flushes the buffer. + } +} diff --git a/libbutl/manifest-rewriter.mxx b/libbutl/manifest-rewriter.mxx new file mode 100644 index 0000000..3261ef2 --- /dev/null +++ b/libbutl/manifest-rewriter.mxx @@ -0,0 +1,75 @@ +// file : libbutl/manifest-rewriter.mxx -*- C++ -*- +// copyright : Copyright (c) 2014-2018 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef __cpp_modules +#pragma once +#endif + +// C includes. + +#ifndef __cpp_lib_modules +#endif + +// Other includes. + +#ifdef __cpp_modules +export module butl.manifest_rewriter; +#ifdef __cpp_lib_modules +#endif +import butl.path; +import butl.fdstream; +import butl.manifest_types; +#else +#include <libbutl/path.mxx> +#include <libbutl/fdstream.mxx> +#include <libbutl/manifest-types.mxx> +#endif + +#include <libbutl/export.hxx> + +LIBBUTL_MODEXPORT namespace butl +{ + // Rewrite a hand-written manifest file preserving formatting, comments, + // etc., of the unaffected parts. The general workflow is as follows: + // + // 1. Parse the manifest file using manifest_parser into a sequence of + // name/value pairs and their positions. + // + // 2. Create an instance of manifest_rewriter for the manifest file. This + // opens the file in read/write mode with exclusive access. + // + // 3. Iterate over this sequence in reverse and apply changes to the desired + // name/value pairs using the below API. Doing this in reverse makes sure + // the positions obtained on step 1 remain valid. + // + // Note that if an exception is thrown by replace() or insert(), then the + // writer is no longer usable and there is no guarantees that the file is + // left in a consistent state. + // + class LIBBUTL_SYMEXPORT manifest_rewriter + { + public: + manifest_rewriter (path); + + // Replace the existing value at the specified position (specifically, + // between colon_pos and end_pos) with the specified new value. The new + // value is serialized as if by manifest_serializer. + // + void + replace (const manifest_name_value&); + + // Insert a new name/value after the specified position (specifically, + // after end_pos). To insert before the first value, use the special + // start-of-manifest value as position. The new name/value is serialized + // as if by manifest_serializer. Throw manifest_serialization exception + // on error. + // + void + insert (const manifest_name_value& pos, const manifest_name_value&); + + private: + path path_; + auto_fd fd_; + }; +} diff --git a/libbutl/manifest-serializer.cxx b/libbutl/manifest-serializer.cxx index 3d73f47..2d2e722 100644 --- a/libbutl/manifest-serializer.cxx +++ b/libbutl/manifest-serializer.cxx @@ -86,55 +86,13 @@ namespace butl break; } - check_name (n); - - os_ << n << ':'; + write_name (n); + os_ << ':'; if (!v.empty ()) { os_ << ' '; - - // Consider both \r and \n characters as line separators, and the - // \r\n characters sequence as a single line separator. - // - auto nl = [&v] (size_t p = 0) {return v.find_first_of ("\r\n", p);}; - - // Use the multi-line mode in any of the following cases: - // - // - name is too long (say longer than 37 (78/2 - 2) characters; - // we cannot start on the next line since that would start the - // multi-line mode) - // - value contains newlines - // - value contains leading/trailing whitespaces - // - if (n.size () > 37 || nl () != string::npos || - v.front () == ' ' || v.front () == '\t' || - v.back () == ' ' || v.back () == '\t') - { - os_ << "\\" << endl; // Multi-line mode introductor. - - // Chunk the value into fragments separated by newlines. - // - for (size_t i (0), p (nl ()); ; p = nl (i)) - { - if (p == string::npos) - { - // Last chunk. - // - write_value (0, v.c_str () + i, v.size () - i); - break; - } - - write_value (0, v.c_str () + i, p - i); - os_ << endl; - - i = p + (v[p] == '\r' && v[p + 1] == '\n' ? 2 : 1); - } - - os_ << endl << "\\"; // Multi-line mode terminator. - } - else - write_value (n.size () + 2, v.c_str (), v.size ()); + write_value (v, n.size () + 2); } os_ << endl; @@ -187,8 +145,11 @@ namespace butl } void manifest_serializer:: - check_name (const string& n) + write_name (const string& n) { + if (n.empty ()) + throw serialization (name_, "empty name"); + if (n[0] == '#') throw serialization (name_, "name starts with '#'"); @@ -200,14 +161,16 @@ namespace butl case '\t': case '\r': case '\n': throw serialization (name_, "name contains whitespace"); - case ':': throw serialization (name_, "name contains ':'"); - default: break; + case ':': throw serialization (name_, "name contains ':'"); + default: break; } } + + os_ << n; } void manifest_serializer:: - write_value (size_t cl, const char* s, size_t n) + write_value (const char* s, size_t n, size_t cl) { char c ('\0'); @@ -282,6 +245,52 @@ namespace butl os_ << '\\'; } + void manifest_serializer:: + write_value (const string& v, size_t cl) + { + // Consider both \r and \n characters as line separators, and the + // \r\n characters sequence as a single line separator. + // + auto nl = [&v] (size_t p = 0) {return v.find_first_of ("\r\n", p);}; + + // Use the multi-line mode in any of the following cases: + // + // - column offset is too large (say greater than 39 (78/2) characters; we + // cannot start on the next line since that would start the multi-line + // mode) + // - value contains newlines + // - value contains leading/trailing whitespaces + // + if (cl > 39 || nl () != string::npos || + v.front () == ' ' || v.front () == '\t' || + v.back () == ' ' || v.back () == '\t') + { + os_ << "\\" << endl; // Multi-line mode introductor. + + // Chunk the value into fragments separated by newlines. + // + for (size_t i (0), p (nl ()); ; p = nl (i)) + { + if (p == string::npos) + { + // Last chunk. + // + write_value (v.c_str () + i, v.size () - i, 0); + break; + } + + write_value (v.c_str () + i, p - i, 0); + os_ << endl; + + i = p + (v[p] == '\r' && v[p + 1] == '\n' ? 2 : 1); + } + + os_ << endl << "\\"; // Multi-line mode terminator. + } + else + write_value (v.c_str (), v.size (), cl); + } + // manifest_serialization // diff --git a/libbutl/manifest-serializer.mxx b/libbutl/manifest-serializer.mxx index a1864d6..66ca398 100644 --- a/libbutl/manifest-serializer.mxx +++ b/libbutl/manifest-serializer.mxx @@ -75,15 +75,28 @@ LIBBUTL_MODEXPORT namespace butl merge_comment (const std::string& value, const std::string& comment); private: + friend class manifest_rewriter; + + // Validate and write a name. + // + void + write_name (const std::string&); + + // Write a value assuming the current line already has the specified + // offset. If the resulting line length would be too large then the + // multi-line representation will be used. It is assumed that the name, + // followed by the colon, is already written. + // void - check_name (const std::string&); + write_value (const std::string&, std::size_t offset); - // Write 'n' characters from 's' (assuming there are no newlines) - // split into multiple lines at or near the 78 characters - // boundary. The first line starts at the 'column' offset. + // Write the specified number of characters from the specified string + // (assuming there are no newlines) split into multiple lines at or near + // the 78 characters boundary. Assume the current line already has the + // specified offset. // void - write_value (std::size_t column, const char* s, std::size_t n); + write_value (const char* s, std::size_t n, std::size_t offset); private: enum {start, body, end} s_ = start; diff --git a/libbutl/manifest-types.mxx b/libbutl/manifest-types.mxx new file mode 100644 index 0000000..f9050ab --- /dev/null +++ b/libbutl/manifest-types.mxx @@ -0,0 +1,49 @@ +// file : libbutl/manifest-types.mxx -*- C++ -*- +// copyright : Copyright (c) 2014-2018 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef __cpp_modules +#pragma once +#endif + +// C includes. + +#ifndef __cpp_lib_modules +#include <string> +#include <cstdint> // uint64_t +#endif + +// Other includes. + +#ifdef __cpp_modules +export module butl.manifest_types; +#ifdef __cpp_lib_modules +import std.core; +#endif +#else +#endif + +#include <libbutl/export.hxx> + +LIBBUTL_MODEXPORT namespace butl +{ + class manifest_name_value + { + public: + std::string name; + std::string value; + + std::uint64_t name_line; + std::uint64_t name_column; + + std::uint64_t value_line; + std::uint64_t value_column; + + std::uint64_t start_pos; // Position of name/value-starting character. + std::uint64_t colon_pos; // Position of name/value-separating ':'. + std::uint64_t end_pos; // Position of name/value-terminating '\n' or EOF. + + bool + empty () const {return name.empty () && value.empty ();} + }; +} diff --git a/tests/manifest-rewriter/buildfile b/tests/manifest-rewriter/buildfile new file mode 100644 index 0000000..4c6e90f --- /dev/null +++ b/tests/manifest-rewriter/buildfile @@ -0,0 +1,8 @@ +# file : tests/manifest-rewriter/buildfile +# copyright : Copyright (c) 2014-2018 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +import libs = libbutl%lib{butl} +libs += $stdmod_lib + +exe{driver}: {hxx cxx}{*} $libs diff --git a/tests/manifest-rewriter/driver.cxx b/tests/manifest-rewriter/driver.cxx new file mode 100644 index 0000000..f206c22 --- /dev/null +++ b/tests/manifest-rewriter/driver.cxx @@ -0,0 +1,185 @@ +// file : tests/manifest-rewriter/driver.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2018 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <cassert> + +#ifndef __cpp_lib_modules +#include <vector> +#include <string> +#include <cstdint> // uint64_t +#include <utility> // move() +#include <iostream> +#include <exception> +#endif + +// Other includes. + +#ifdef __cpp_modules +#ifdef __cpp_lib_modules +import std.core; +import std.io; +#endif +import butl.path; +import butl.optional; +import butl.fdstream; +import butl.manifest_parser; +import butl.manifest_rewriter; +#else +#include <libbutl/path.mxx> +#include <libbutl/optional.mxx> +#include <libbutl/fdstream.mxx> +#include <libbutl/manifest-parser.mxx> +#include <libbutl/manifest-rewriter.mxx> +#endif + +using namespace std; + +namespace butl +{ + using butl::optional; + using butl::nullopt; + + // Value rewriting or insertion command. + // + struct edit_cmd + { + string name; + string value; + optional<string> after; // Rewrite an existing value if nullopt. + + edit_cmd (string n, string v) + : name (move (n)), value (move (v)) {} + + edit_cmd (string n, string v, string a) + : name (move (n)), value (move (v)), after (move (a)) {} + }; + + using edit_cmds = vector<edit_cmd>; + + // Dump the manifest into the file, edit and return the resulting manifest. + // + // The file will stay in the filesystem for troubleshooting in case of an + // assertion failure and will be deleted otherwise. + // + static path temp_file (path::temp_path ("butl-manifest-rewriter")); + + static string + edit (const char* manifest, const edit_cmds&); + + int + main () + { + auto_rmfile rm (temp_file); + + assert (edit (":1\n# Comment\n# Comment\n a : b \n# Comment\n\nc:d\n", + {{"a", "xyz"}}) == + ":1\n# Comment\n# Comment\n a : xyz\n# Comment\n\nc:d\n"); + + assert (edit (":1\n\n a: b\n", {{"a", "xyz"}}) == ":1\n\n a: xyz\n"); + assert (edit (":1\na: b", {{"a", "xyz"}}) == ":1\na: xyz"); + + assert (edit (":1\na:b\nc:d\ne:f", + {{"a", "xyz"}, edit_cmd {"x", "y", "c"}, {"e", "123"}}) == + ":1\na: xyz\nc:d\nx: y\ne: 123"); + + assert (edit (":1\na: b", {{"a", "xy\nz"}}) == ":1\na: \\\nxy\nz\n\\"); + + assert (edit (":1\n", {{"a", "b", ""}}) == ":1\na: b\n"); + + assert (edit (":1\n abc: b", + {{"abc", "xyz"}}) == + ":1\n abc: \\\nxyz\n\\"); + + // Test editing of manifests that contains CR characters. + // + assert (edit (":1\r\na: b\r\r\n", {{"a", "xyz"}}) == ":1\r\na: xyz\r\r\n"); + + assert (edit (":1\ra: b\r", {{"a", "xyz"}}) == ":1\ra: xyz\r"); + + assert (edit (":1\na: \\s", {{"a", "xyz"}}) == ":1\na: xyz"); + + assert (edit (":1\na: \\\nx\ny\nz\n\\\r", {{"a", "b"}}) == ":1\na: b\r"); + + return 0; + } + + static string + edit (const char* manifest, const edit_cmds& cmds) + { + { + ofdstream os (temp_file); + os << manifest; + os.close (); + } + + struct insertion + { + manifest_name_value value; + optional<manifest_name_value> pos; // Rewrite existing value if nullopt. + }; + + vector<insertion> insertions; + { + ifdstream is (temp_file); + manifest_parser p (is, temp_file.string ()); + + for (manifest_name_value nv; !(nv = p.next ()).empty (); ) + { + for (const edit_cmd& c: cmds) + { + if (c.after) + { + if (nv.name == *c.after) + { + // Note: new value lines, columns and positions are all zero as + // are not used for an insertion. + // + insertions.push_back ( + insertion { + manifest_name_value {c.name, c.value, 0, 0, 0, 0, 0, 0, 0}, + move (nv)}); + + break; + } + } + else if (nv.name == c.name) + { + nv.value = c.value; + insertions.push_back (insertion {move (nv), nullopt /* pos */}); + break; + } + } + } + } + + { + manifest_rewriter rw (temp_file); + + for (const auto& ins: reverse_iterate (insertions)) + { + if (ins.pos) + rw.insert (*ins.pos, ins.value); + else + rw.replace (ins.value); + } + } + + ifdstream is (temp_file); + return is.read_text (); + } +} + +int +main () +{ + try + { + return butl::main (); + } + catch (const exception& e) + { + cerr << e << endl; + return 1; + } +} |