aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2015-09-08 12:37:39 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2015-09-08 12:37:39 +0200
commitefd76ff778c0b7b1f8cb9e0485bb9b4b62b149a7 (patch)
tree81551df937191dde3503cdaa81fc472e120fa3c5
parentb66d30af9fb5c50966183820f8ed7af6b8791a2e (diff)
Implement single quote support
-rw-r--r--build/bootstrap.build1
-rw-r--r--build/lexer5
-rw-r--r--build/lexer.cxx39
-rw-r--r--build/root.build7
-rw-r--r--tests/lexer/buildfile4
-rw-r--r--tests/lexer/driver.cxx54
6 files changed, 95 insertions, 15 deletions
diff --git a/build/bootstrap.build b/build/bootstrap.build
index 381b45e..05aac3e 100644
--- a/build/bootstrap.build
+++ b/build/bootstrap.build
@@ -4,3 +4,4 @@
project = build2
subprojects = # No subprojects.
using config
+using test
diff --git a/build/lexer b/build/lexer
index e5676cc..9a0582d 100644
--- a/build/lexer
+++ b/build/lexer
@@ -61,7 +61,10 @@ namespace build
private:
token
- name (xchar, bool separated);
+ name (bool separated);
+
+ void
+ single_quote (std::string& lexeme);
// Return true we have seen any spaces. Skipped empty lines don't
// count. In other words, we are only interested in spaces that
diff --git a/build/lexer.cxx b/build/lexer.cxx
index 88628d3..4151087 100644
--- a/build/lexer.cxx
+++ b/build/lexer.cxx
@@ -94,17 +94,20 @@ namespace build
// Otherwise it is a name.
//
- return name (c, sep);
+ unget (c);
+ return name (sep);
}
token lexer::
- name (xchar c, bool sep)
+ name (bool sep)
{
+ xchar c (peek ());
+ assert (!eos (c));
+
uint64_t ln (c.line), cn (c.column);
string lexeme;
- lexeme += (c != '\\' ? c : escape ());
- for (c = peek (); !eos (c); c = peek ())
+ for (; !eos (c); c = peek ())
{
bool done (false);
@@ -173,6 +176,11 @@ namespace build
lexeme += escape ();
break;
}
+ case '\'':
+ {
+ single_quote (lexeme);
+ break;
+ }
default:
{
get ();
@@ -185,12 +193,35 @@ namespace build
break;
}
+ // The first character shall not be a separator (we shouldn't have
+ // been called if that's the case).
+ //
+ assert (c.line != ln || c.column != cn);
+
if (mode_ == lexer_mode::variable)
next_mode_ = prev_mode_;
return token (lexeme, sep, ln, cn);
}
+ // Assuming the next character is the opening single quote, scan
+ // the stream until the closing quote (or eos), accumulating
+ // characters in between in lexeme. Fail if eos is reached before
+ // the closing quote.
+ //
+ void lexer::
+ single_quote (string& lexeme)
+ {
+ xchar c (get ()); // Opening quote mark.
+ assert (c == '\'');
+
+ for (c = get (); !eos (c) && c != '\''; c = get ())
+ lexeme += c;
+
+ if (eos (c))
+ fail (c) << "unterminated single-quoted sequence";
+ }
+
bool lexer::
skip_spaces ()
{
diff --git a/build/root.build b/build/root.build
index 5af4637..1afbe80 100644
--- a/build/root.build
+++ b/build/root.build
@@ -11,3 +11,10 @@ cxx.ext = cxx
cxx.std = 14
cxx.poptions += -I$src_root
+
+# All exe{} in tests/ are, well, tests.
+#
+tests/:
+{
+ test.exe = true
+}
diff --git a/tests/lexer/buildfile b/tests/lexer/buildfile
index 3aca207..a6976cf 100644
--- a/tests/lexer/buildfile
+++ b/tests/lexer/buildfile
@@ -2,4 +2,6 @@
# copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
# license : MIT; see accompanying LICENSE file
-exe{driver}: cxx{driver ../../../build/{lexer diagnostics utility}}
+import libs = libbutl%lib{butl}
+
+exe{driver}: cxx{driver ../../build/{lexer diagnostics utility}} $libs
diff --git a/tests/lexer/driver.cxx b/tests/lexer/driver.cxx
index f77656d..fb5efc3 100644
--- a/tests/lexer/driver.cxx
+++ b/tests/lexer/driver.cxx
@@ -14,11 +14,14 @@
using namespace std;
using namespace build;
-typedef vector<string> tokens;
+using tokens = vector<string>;
static tokens
lex (const char*);
+ostream&
+operator<< (ostream&, const tokens&);
+
int
main ()
{
@@ -54,10 +57,28 @@ main ()
assert (lex ("fo\\ o\\:") == tokens ({"fo o:", ""}));
assert (lex ("foo\\\nbar") == tokens ({"foo\nbar", ""}));
assert (lex ("foo \\\nbar") == tokens ({"foo", "bar", ""}));
+ assert (lex ("\\'foo") == tokens ({"'foo", ""}));
assert (lex (" \\") == tokens ({"<lexer error>"}));
assert (lex (" foo\\") == tokens ({"<lexer error>"}));
+
+ // Quoting.
+ //
+ assert (lex ("''") == tokens ({"", ""}));
+ assert (lex ("'foo'") == tokens ({"foo", ""}));
+ assert (lex ("'foo bar'") == tokens ({"foo bar", ""}));
+ assert (lex ("'foo 'bar") == tokens ({"foo bar", ""}));
+ assert (lex ("foo' bar'") == tokens ({"foo bar", ""}));
+ assert (lex ("'foo ''bar'") == tokens ({"foo bar", ""}));
+ assert (lex ("foo' 'bar") == tokens ({"foo bar", ""}));
+ assert (lex ("'foo\nbar'") == tokens ({"foo\nbar", ""}));
+ assert (lex ("'#:${}()=+\n'") == tokens ({"#:${}()=+\n", ""}));
+ assert (lex ("'\"'") == tokens ({"\"", ""}));
+ assert (lex ("'\\'") == tokens ({"\\", ""}));
+
+ assert (lex ("'foo bar") == tokens ({"<lexer error>"}));
+
// Combinations.
//
assert (lex ("foo: bar") == tokens ({"foo", ":", "bar", ""}));
@@ -87,21 +108,27 @@ lex (const char* s)
{
for (token t (l.next ());; t = l.next ())
{
- const char* v (nullptr);
+ string v;
switch (t.type ())
{
- case token_type::eos: v= ""; break;
- case token_type::newline: v = "\n"; break;
- case token_type::colon: v = ":"; break;
- case token_type::lcbrace: v = "{"; break;
- case token_type::rcbrace: v = "}"; break;
- case token_type::name: v = t.name ().c_str (); break;
+ case token_type::eos: v = ""; break;
+ case token_type::newline: v = "\n"; break;
+ case token_type::pair_separator: v = l.pair_separator (); break;
+ case token_type::colon: v = ":"; break;
+ case token_type::lcbrace: v = "{"; break;
+ case token_type::rcbrace: v = "}"; break;
+ case token_type::equal: v = "="; break;
+ case token_type::plus_equal: v = "+="; break;
+ case token_type::dollar: v = "$"; break;
+ case token_type::lparen: v = "("; break;
+ case token_type::rparen: v = ")"; break;
+ case token_type::name: v = t.name ().c_str (); break;
}
// cerr << t.line () << ':' << t.column () << ':' << v << endl;
- r.push_back (v);
+ r.push_back (move (v));
if (t.type () == token_type::eos)
break;
@@ -118,3 +145,12 @@ lex (const char* s)
return r;
}
+
+ostream&
+operator<< (ostream& os, const tokens& ts)
+{
+ for (const string& t: ts)
+ os << '"' << t << '"' << ' ';
+
+ return os;
+}