aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--build/lexer13
-rw-r--r--build/lexer.cxx99
-rw-r--r--tests/lexer/driver.cxx24
-rw-r--r--tests/quote/buildfile18
-rw-r--r--tests/quote/test.out11
-rwxr-xr-xtests/quote/test.sh3
6 files changed, 148 insertions, 20 deletions
diff --git a/build/lexer b/build/lexer
index 1e253fd..0740f14 100644
--- a/build/lexer
+++ b/build/lexer
@@ -28,8 +28,9 @@ namespace build
// The alternnative modes must be set manually. The value and
// pairs modes are automatically reset after the end of the line.
// The variable mode is automatically reset after the name token.
+ // Quoted is an internal mode and should not be explicitly set.
//
- enum class lexer_mode {normal, variable, value, pairs};
+ enum class lexer_mode {normal, quoted, variable, value, pairs};
class lexer: protected butl::char_scanner
{
@@ -71,7 +72,10 @@ namespace build
name (bool separated);
void
- single_quote (std::string& lexeme);
+ single_quote (std::string&);
+
+ bool
+ double_quote (std::string&);
// Return true we have seen any spaces. Skipped empty lines don't
// count. In other words, we are only interested in spaces that
@@ -100,11 +104,12 @@ namespace build
private:
fail_mark fail;
- // Currently, the maximum mode nesting is 3: {normal, value, variable}.
+ // Currently, the maximum mode nesting is 4: {normal, value, quoted,
+ // variable}.
//
struct mode_stack
{
- static const size_t max_size = 3;
+ static const size_t max_size = 4;
void push (lexer_mode m) {assert (n_ != max_size); d_[n_++] = m;}
void pop () {assert (n_ != 0); n_--;}
diff --git a/build/lexer.cxx b/build/lexer.cxx
index f4733be..9c76377 100644
--- a/build/lexer.cxx
+++ b/build/lexer.cxx
@@ -11,6 +11,33 @@ namespace build
token lexer::
next ()
{
+ lexer_mode m (mode_.top ());
+
+ // If we are in the quoted mode, then this means we have seen a
+ // variable expansion ($) and had to "break" the quoted sequence
+ // into multiple "concatenated" tokens. So what we have now is
+ // the "tail" of that quoted sequence which we need to continue
+ // scanning. To make this work auto-magically (well, almost) we
+ // are going to use a little trick: we will "pretend" that the
+ // next character is the opening quote. After all, a sequence
+ // like "$foo bar" is semantically equivalent to "$foo"" bar".
+ //
+ if (m == lexer_mode::quoted)
+ {
+ xchar c (peek ());
+
+ // Detect the beginning of the "break". After that, we rely
+ // on the caller switching to the variable mode.
+ //
+ if (c != '$')
+ {
+ mode_.pop (); // As if we saw closing quote.
+ c.value = '"'; // Keep line/column information.
+ unget (c);
+ return name (false);
+ }
+ }
+
bool sep (skip_spaces ());
xchar c (get ());
@@ -19,8 +46,6 @@ namespace build
if (eos (c))
return token (token_type::eos, sep, ln, cn);
- lexer_mode m (mode_.top ());
-
switch (c)
{
// NOTE: remember to update name() if adding new punctuations.
@@ -175,8 +200,25 @@ namespace build
break;
}
case '\'':
+ case '\"':
{
- single_quote (lexeme);
+ // If we are in the variable mode, then treat quotes as just
+ // another separator.
+ //
+ if (m == lexer_mode::variable)
+ done = true;
+ else
+ {
+ get ();
+
+ if (c == '\'')
+ single_quote (lexeme);
+ else
+ {
+ mode_.push (lexer_mode::quoted);
+ done = double_quote (lexeme);
+ }
+ }
break;
}
default:
@@ -191,11 +233,6 @@ namespace build
break;
}
- // The first character shall not be a separator (we shouldn't have
- // been called if that's the case).
- //
- assert (c.line != ln || c.column != cn);
-
// Expire variable mode at the end of the name.
//
if (m == lexer_mode::variable)
@@ -204,24 +241,56 @@ namespace build
return token (lexeme, sep, ln, cn);
}
- // Assuming the next character is the opening single quote, scan
- // the stream until the closing quote (or eos), accumulating
- // characters in between in lexeme. Fail if eos is reached before
- // the closing quote.
+ // Assuming the previous character is the opening single quote, scan
+ // the stream until the closing quote or eos, accumulating characters
+ // in between in lexeme. Fail if eos is reached before the closing
+ // quote.
//
void lexer::
single_quote (string& lexeme)
{
- xchar c (get ()); // Opening quote mark.
- assert (c == '\'');
+ xchar c (get ());
- for (c = get (); !eos (c) && c != '\''; c = get ())
+ for (; !eos (c) && c != '\''; c = get ())
lexeme += c;
if (eos (c))
fail (c) << "unterminated single-quoted sequence";
}
+ // Assuming the previous character is the opening double quote, scan
+ // the stream until the closing quote, $, or eos, accumulating
+ // characters in between in lexeme. Return false if we stopped
+ // because of the closing quote (which means the normal name
+ // scanning can continue) and true if we stopped at $ (meaning this
+ // name is done and what follows is another token). Fail if eos is
+ // reached before the closing quote.
+ //
+ bool lexer::
+ double_quote (string& lexeme)
+ {
+ xchar c (peek ());
+
+ for (; !eos (c); c = peek ())
+ {
+ if (c == '$')
+ return true;
+
+ get ();
+
+ if (c == '"')
+ {
+ mode_.pop (); // Expire quoted mode.
+ return false;
+ }
+
+ lexeme += c;
+ }
+
+ fail (c) << "unterminated double-quoted sequence";
+ return false; // Never reached.
+ }
+
bool lexer::
skip_spaces ()
{
diff --git a/tests/lexer/driver.cxx b/tests/lexer/driver.cxx
index fb5efc3..e3543da 100644
--- a/tests/lexer/driver.cxx
+++ b/tests/lexer/driver.cxx
@@ -63,7 +63,7 @@ main ()
assert (lex (" foo\\") == tokens ({"<lexer error>"}));
- // Quoting.
+ // Quoting ''.
//
assert (lex ("''") == tokens ({"", ""}));
assert (lex ("'foo'") == tokens ({"foo", ""}));
@@ -79,6 +79,28 @@ main ()
assert (lex ("'foo bar") == tokens ({"<lexer error>"}));
+ // Quoting "".
+ //
+ assert (lex ("\"\"") == tokens ({"", ""}));
+ assert (lex ("\"foo\"") == tokens ({"foo", ""}));
+ assert (lex ("\"foo bar\"") == tokens ({"foo bar", ""}));
+ assert (lex ("\"foo \"bar") == tokens ({"foo bar", ""}));
+ assert (lex ("foo\" bar\"") == tokens ({"foo bar", ""}));
+ assert (lex ("\"foo \"\"bar\"") == tokens ({"foo bar", ""}));
+ assert (lex ("foo\" \"bar") == tokens ({"foo bar", ""}));
+ assert (lex ("\"foo\nbar\"") == tokens ({"foo\nbar", ""}));
+ assert (lex ("\"#:{}()=+\n\"") == tokens ({"#:{}()=+\n", ""}));
+ assert (lex ("\"'\"") == tokens ({"'", ""}));
+ assert (lex ("\"\\\"") == tokens ({"\\", ""}));
+
+ assert (lex ("\"$\"") == tokens ({"", "$", "", ""}));
+ assert (lex ("\"foo$bar\"") == tokens ({"foo", "$", "bar", ""}));
+ assert (lex ("foo\"$\"bar") == tokens ({"foo", "$", "bar", ""}));
+ assert (lex ("f\"oo$ba\"r") == tokens ({"foo", "$", "bar", ""}));
+
+ assert (lex ("\"foo bar") == tokens ({"<lexer error>"}));
+ assert (lex ("\"foo $bar") == tokens ({"foo ", "$", "<lexer error>"}));
+
// Combinations.
//
assert (lex ("foo: bar") == tokens ({"foo", ":", "bar", ""}));
diff --git a/tests/quote/buildfile b/tests/quote/buildfile
new file mode 100644
index 0000000..931bc36
--- /dev/null
+++ b/tests/quote/buildfile
@@ -0,0 +1,18 @@
+print "foo bar"
+print "foo
+bar"
+
+foo = "fo o"
+bar = " bar "
+
+print "$foo"
+print "$bar"
+print "$foo $bar"
+print "$foo$bar"
+
+print "[ $foo ]"
+print "[ $bar ]"
+print "[ $foo $bar ]"
+print "[ $foo/$bar ]"
+
+./:
diff --git a/tests/quote/test.out b/tests/quote/test.out
new file mode 100644
index 0000000..802f28f
--- /dev/null
+++ b/tests/quote/test.out
@@ -0,0 +1,11 @@
+foo bar
+foo
+bar
+fo o
+ bar
+fo o bar
+fo o bar
+[ fo o ]
+[ bar ]
+[ fo o bar ]
+[ fo o/ bar ]
diff --git a/tests/quote/test.sh b/tests/quote/test.sh
new file mode 100755
index 0000000..145ea6b
--- /dev/null
+++ b/tests/quote/test.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+valgrind -q b -q | diff test.out -