From efd76ff778c0b7b1f8cb9e0485bb9b4b62b149a7 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 8 Sep 2015 12:37:39 +0200 Subject: Implement single quote support --- build/bootstrap.build | 1 + build/lexer | 5 ++++- build/lexer.cxx | 39 ++++++++++++++++++++++++++++++++---- build/root.build | 7 +++++++ tests/lexer/buildfile | 4 +++- tests/lexer/driver.cxx | 54 +++++++++++++++++++++++++++++++++++++++++--------- 6 files changed, 95 insertions(+), 15 deletions(-) diff --git a/build/bootstrap.build b/build/bootstrap.build index 381b45e..05aac3e 100644 --- a/build/bootstrap.build +++ b/build/bootstrap.build @@ -4,3 +4,4 @@ project = build2 subprojects = # No subprojects. using config +using test diff --git a/build/lexer b/build/lexer index e5676cc..9a0582d 100644 --- a/build/lexer +++ b/build/lexer @@ -61,7 +61,10 @@ namespace build private: token - name (xchar, bool separated); + name (bool separated); + + void + single_quote (std::string& lexeme); // Return true we have seen any spaces. Skipped empty lines don't // count. In other words, we are only interested in spaces that diff --git a/build/lexer.cxx b/build/lexer.cxx index 88628d3..4151087 100644 --- a/build/lexer.cxx +++ b/build/lexer.cxx @@ -94,17 +94,20 @@ namespace build // Otherwise it is a name. // - return name (c, sep); + unget (c); + return name (sep); } token lexer:: - name (xchar c, bool sep) + name (bool sep) { + xchar c (peek ()); + assert (!eos (c)); + uint64_t ln (c.line), cn (c.column); string lexeme; - lexeme += (c != '\\' ? c : escape ()); - for (c = peek (); !eos (c); c = peek ()) + for (; !eos (c); c = peek ()) { bool done (false); @@ -173,6 +176,11 @@ namespace build lexeme += escape (); break; } + case '\'': + { + single_quote (lexeme); + break; + } default: { get (); @@ -185,12 +193,35 @@ namespace build break; } + // The first character shall not be a separator (we shouldn't have + // been called if that's the case). + // + assert (c.line != ln || c.column != cn); + if (mode_ == lexer_mode::variable) next_mode_ = prev_mode_; return token (lexeme, sep, ln, cn); } + // Assuming the next character is the opening single quote, scan + // the stream until the closing quote (or eos), accumulating + // characters in between in lexeme. Fail if eos is reached before + // the closing quote. + // + void lexer:: + single_quote (string& lexeme) + { + xchar c (get ()); // Opening quote mark. + assert (c == '\''); + + for (c = get (); !eos (c) && c != '\''; c = get ()) + lexeme += c; + + if (eos (c)) + fail (c) << "unterminated single-quoted sequence"; + } + bool lexer:: skip_spaces () { diff --git a/build/root.build b/build/root.build index 5af4637..1afbe80 100644 --- a/build/root.build +++ b/build/root.build @@ -11,3 +11,10 @@ cxx.ext = cxx cxx.std = 14 cxx.poptions += -I$src_root + +# All exe{} in tests/ are, well, tests. +# +tests/: +{ + test.exe = true +} diff --git a/tests/lexer/buildfile b/tests/lexer/buildfile index 3aca207..a6976cf 100644 --- a/tests/lexer/buildfile +++ b/tests/lexer/buildfile @@ -2,4 +2,6 @@ # copyright : Copyright (c) 2014-2015 Code Synthesis Ltd # license : MIT; see accompanying LICENSE file -exe{driver}: cxx{driver ../../../build/{lexer diagnostics utility}} +import libs = libbutl%lib{butl} + +exe{driver}: cxx{driver ../../build/{lexer diagnostics utility}} $libs diff --git a/tests/lexer/driver.cxx b/tests/lexer/driver.cxx index f77656d..fb5efc3 100644 --- a/tests/lexer/driver.cxx +++ b/tests/lexer/driver.cxx @@ -14,11 +14,14 @@ using namespace std; using namespace build; -typedef vector tokens; +using tokens = vector; static tokens lex (const char*); +ostream& +operator<< (ostream&, const tokens&); + int main () { @@ -54,10 +57,28 @@ main () assert (lex ("fo\\ o\\:") == tokens ({"fo o:", ""})); assert (lex ("foo\\\nbar") == tokens ({"foo\nbar", ""})); assert (lex ("foo \\\nbar") == tokens ({"foo", "bar", ""})); + assert (lex ("\\'foo") == tokens ({"'foo", ""})); assert (lex (" \\") == tokens ({""})); assert (lex (" foo\\") == tokens ({""})); + + // Quoting. + // + assert (lex ("''") == tokens ({"", ""})); + assert (lex ("'foo'") == tokens ({"foo", ""})); + assert (lex ("'foo bar'") == tokens ({"foo bar", ""})); + assert (lex ("'foo 'bar") == tokens ({"foo bar", ""})); + assert (lex ("foo' bar'") == tokens ({"foo bar", ""})); + assert (lex ("'foo ''bar'") == tokens ({"foo bar", ""})); + assert (lex ("foo' 'bar") == tokens ({"foo bar", ""})); + assert (lex ("'foo\nbar'") == tokens ({"foo\nbar", ""})); + assert (lex ("'#:${}()=+\n'") == tokens ({"#:${}()=+\n", ""})); + assert (lex ("'\"'") == tokens ({"\"", ""})); + assert (lex ("'\\'") == tokens ({"\\", ""})); + + assert (lex ("'foo bar") == tokens ({""})); + // Combinations. // assert (lex ("foo: bar") == tokens ({"foo", ":", "bar", ""})); @@ -87,21 +108,27 @@ lex (const char* s) { for (token t (l.next ());; t = l.next ()) { - const char* v (nullptr); + string v; switch (t.type ()) { - case token_type::eos: v= ""; break; - case token_type::newline: v = "\n"; break; - case token_type::colon: v = ":"; break; - case token_type::lcbrace: v = "{"; break; - case token_type::rcbrace: v = "}"; break; - case token_type::name: v = t.name ().c_str (); break; + case token_type::eos: v = ""; break; + case token_type::newline: v = "\n"; break; + case token_type::pair_separator: v = l.pair_separator (); break; + case token_type::colon: v = ":"; break; + case token_type::lcbrace: v = "{"; break; + case token_type::rcbrace: v = "}"; break; + case token_type::equal: v = "="; break; + case token_type::plus_equal: v = "+="; break; + case token_type::dollar: v = "$"; break; + case token_type::lparen: v = "("; break; + case token_type::rparen: v = ")"; break; + case token_type::name: v = t.name ().c_str (); break; } // cerr << t.line () << ':' << t.column () << ':' << v << endl; - r.push_back (v); + r.push_back (move (v)); if (t.type () == token_type::eos) break; @@ -118,3 +145,12 @@ lex (const char* s) return r; } + +ostream& +operator<< (ostream& os, const tokens& ts) +{ + for (const string& t: ts) + os << '"' << t << '"' << ' '; + + return os; +} -- cgit v1.1