diff options
Diffstat (limited to 'butl/manifest-parser.cxx')
-rw-r--r-- | butl/manifest-parser.cxx | 387 |
1 files changed, 0 insertions, 387 deletions
diff --git a/butl/manifest-parser.cxx b/butl/manifest-parser.cxx deleted file mode 100644 index 6b50d6b..0000000 --- a/butl/manifest-parser.cxx +++ /dev/null @@ -1,387 +0,0 @@ -// file : butl/manifest-parser.cxx -*- C++ -*- -// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd -// license : MIT; see accompanying LICENSE file - -#include <butl/manifest-parser> - -#include <cassert> -#include <sstream> - -using namespace std; - -namespace butl -{ - using parsing = manifest_parsing; - using name_value = manifest_name_value; - - name_value manifest_parser:: - next () - { - if (s_ == end) - return name_value {"", "", line, column, line, column}; - - xchar c (skip_spaces ()); - - // Here is the problem: if we are in the 'body' state (that is, - // we are parsing inside the manifest) and we see the special - // empty name, then before returning the "start" pair for the - // next manifest, we have to return the "end" pair. One way - // would be to cache the "start" pair and return it on the - // next call of next(). But that would require quite a bit - // of extra logic. The alternative is to detect the beginning - // of the empty name before parsing too far. This way, the - // next call to next() will start parsing where we left of - // and return the "start" pair naturally. - // - if (s_ == body && c == ':') - { - s_ = start; - return name_value {"", "", c.line, c.column, c.line, c.column}; - } - - // Regardless of the state, what should come next is a name, - // potentially the special empty one. - // - name_value r; - parse_name (r); - - skip_spaces (); - c = get (); - - if (eos (c)) - { - // This is ok as long as the name is empty. - // - if (!r.name.empty ()) - throw parsing (name_, c.line, c.column, "':' expected after name"); - - s_ = end; - - // The "end" pair. - // - r.value_line = r.name_line; - r.value_column = r.name_column; - return r; - } - - if (c != ':') - throw parsing (name_, c.line, c.column, "':' expected after name"); - - skip_spaces (); - parse_value (r); - - c = peek (); - - // The character after the value should be either a newline or eos. - // - assert (c == '\n' || eos (c)); - - if (c == '\n') - get (); - - // Now figure out whether what we've got makes sense, depending - // on the state we are in. - // - if (s_ == start) - { - // Start of the (next) manifest. The first pair should be the - // special empty name/format version. - // - if (!r.name.empty ()) - throw parsing (name_, r.name_line, r.name_column, - "format version pair expected"); - - // The version value is only mandatory for the first manifest in - // a sequence. - // - if (r.value.empty ()) - { - if (version_.empty ()) - throw parsing (name_, r.value_line, r.value_column, - "format version value expected"); - r.value = version_; - } - else - { - version_ = r.value; // Update with the latest. - - if (version_ != "1") - throw parsing (name_, r.value_line, r.value_column, - "unsupported format version " + version_); - } - - s_ = body; - } - else - { - // Parsing the body of the manifest. - // - - // Should have been handled by the special case above. - // - assert (!r.name.empty ()); - } - - return r; - } - - void manifest_parser:: - parse_name (name_value& r) - { - xchar c (peek ()); - - r.name_line = c.line; - r.name_column = c.column; - - for (; !eos (c); c = peek ()) - { - if (c == ':' || c == ' ' || c == '\t' || c == '\n') - break; - - r.name += c; - get (); - } - } - - void manifest_parser:: - parse_value (name_value& r) - { - xchar c (peek ()); - - r.value_line = c.line; - r.value_column = c.column; - - string& v (r.value); - string::size_type n (0); // Size of last non-space character (simple mode). - - // Detect the multi-line mode introductor. - // - bool ml (false); - if (c == '\\') - { - get (); - xchar p (peek ()); - - if (p == '\n') - { - get (); // Newline is not part of the value so skip it. - c = peek (); - ml = true; - } - else if (eos (p)) - ml = true; - else - unget (c); - } - - // Multi-line value starts from the line that follows the name. - // - if (ml) - { - r.value_line = c.line; - r.value_column = c.column; - } - - // The nl flag signals that the preceding character was a "special - // newline", that is, a newline that was part of the milti-line mode - // introductor or an escape sequence. - // - for (bool nl (ml); !eos (c); c = peek ()) - { - // Detect the special "\n\\\n" sequence. In the multi-line mode, - // this is a "terminator". In the simple mode, this is a way to - // specify a newline. - // - // The key idea here is this: if we "swallowed" any characters - // (i.e., called get() without a matching unget()), then we - // have to restart the loop in order to do all the tests for - // the next character. Also, for this to work, we can only - // add one character to v, which limits us to maximum three - // characters look-ahead: one in v, one "ungot", and one - // peeked. - // - // The first block handles the special sequence that starts with - // a special newline. In multi-line mode, this is an "immediate - // termination" where we "use" the newline from the introductor. - // Note also that in the simple mode the special sequence can - // only start with a special (i.e., escaped) newline. - // - if (nl) - { - nl = false; - - if (c == '\\') - { - get (); - xchar c1 (peek ()); - - if (c1 == '\n' || eos (c1)) - { - if (ml) - break; - else - { - if (c1 == '\n') - get (); - - v += '\n'; // Literal newline. - n = v.size (); - continue; // Restart from the next character. - } - } - else - unget (c); // Fall through. - } - } - - if (c == '\n') - { - if (ml) - { - get (); - xchar c1 (peek ()); - - if (c1 == '\\') - { - get (); - xchar c2 (peek ()); - - if (c2 == '\n' || eos (c2)) - break; - else - { - v += '\n'; - unget (c1); - continue; // Restart from c1 (slash). - } - } - else - unget (c); // Fall through. - } - else - break; // Simple value terminator. - } - - // Detect the newline escape sequence. The same look-ahead - // approach as above. - // - if (c == '\\') - { - get (); - xchar c1 (peek ()); - - if (c1 == '\n' || eos (c1)) - { - if (c1 == '\n') - { - get (); - nl = true; // This is a special newline. - } - continue; // Restart from the next character. - } - else if (c1 == '\\') - { - get (); - xchar c2 (peek ()); - - if (c2 == '\n' || eos (c1)) - { - v += '\\'; - n = v.size (); - // Restart from c2 (newline/eos). - } - else - { - v += '\\'; - n = v.size (); - unget (c1); // Restart from c1 (second slash). - } - - continue; - } - else - unget (c); // Fall through. - } - - get (); - v += c; - - if (!ml && c != ' ' && c != '\t') - n = v.size (); - } - - // Cut off trailing whitespaces. - // - if (!ml) - v.resize (n); - } - - manifest_parser::xchar manifest_parser:: - skip_spaces () - { - xchar c (peek ()); - bool start (c.column == 1); - - for (; !eos (c); c = peek ()) - { - switch (c) - { - case ' ': - case '\t': - break; - case '\n': - { - // Skip empty lines. - // - if (!start) - return c; - - break; - } - case '#': - { - // We only recognize '#' as a start of a comment at the beginning - // of the line (sans leading spaces). - // - if (!start) - return c; - - get (); - - // Read until newline or eos. - // - for (c = peek (); !eos (c) && c != '\n'; c = peek ()) - get (); - - continue; - } - default: - return c; // Not a space. - } - - get (); - } - - return c; - } - - // manifest_parsing - // - - static string - format (const string& n, uint64_t l, uint64_t c, const string& d) - { - ostringstream os; - if (!n.empty ()) - os << n << ':'; - os << l << ':' << c << ": error: " << d; - return os.str (); - } - - manifest_parsing:: - manifest_parsing (const string& n, uint64_t l, uint64_t c, const string& d) - : runtime_error (format (n, l, c, d)), - name (n), line (l), column (c), description (d) - { - } -} |