diff options
-rw-r--r-- | build/lexer | 13 | ||||
-rw-r--r-- | build/lexer.cxx | 99 | ||||
-rw-r--r-- | tests/lexer/driver.cxx | 24 | ||||
-rw-r--r-- | tests/quote/buildfile | 18 | ||||
-rw-r--r-- | tests/quote/test.out | 11 | ||||
-rwxr-xr-x | tests/quote/test.sh | 3 |
6 files changed, 148 insertions, 20 deletions
diff --git a/build/lexer b/build/lexer index 1e253fd..0740f14 100644 --- a/build/lexer +++ b/build/lexer @@ -28,8 +28,9 @@ namespace build // The alternnative modes must be set manually. The value and // pairs modes are automatically reset after the end of the line. // The variable mode is automatically reset after the name token. + // Quoted is an internal mode and should not be explicitly set. // - enum class lexer_mode {normal, variable, value, pairs}; + enum class lexer_mode {normal, quoted, variable, value, pairs}; class lexer: protected butl::char_scanner { @@ -71,7 +72,10 @@ namespace build name (bool separated); void - single_quote (std::string& lexeme); + single_quote (std::string&); + + bool + double_quote (std::string&); // Return true we have seen any spaces. Skipped empty lines don't // count. In other words, we are only interested in spaces that @@ -100,11 +104,12 @@ namespace build private: fail_mark fail; - // Currently, the maximum mode nesting is 3: {normal, value, variable}. + // Currently, the maximum mode nesting is 4: {normal, value, quoted, + // variable}. // struct mode_stack { - static const size_t max_size = 3; + static const size_t max_size = 4; void push (lexer_mode m) {assert (n_ != max_size); d_[n_++] = m;} void pop () {assert (n_ != 0); n_--;} diff --git a/build/lexer.cxx b/build/lexer.cxx index f4733be..9c76377 100644 --- a/build/lexer.cxx +++ b/build/lexer.cxx @@ -11,6 +11,33 @@ namespace build token lexer:: next () { + lexer_mode m (mode_.top ()); + + // If we are in the quoted mode, then this means we have seen a + // variable expansion ($) and had to "break" the quoted sequence + // into multiple "concatenated" tokens. So what we have now is + // the "tail" of that quoted sequence which we need to continue + // scanning. To make this work auto-magically (well, almost) we + // are going to use a little trick: we will "pretend" that the + // next character is the opening quote. After all, a sequence + // like "$foo bar" is semantically equivalent to "$foo"" bar". + // + if (m == lexer_mode::quoted) + { + xchar c (peek ()); + + // Detect the beginning of the "break". After that, we rely + // on the caller switching to the variable mode. + // + if (c != '$') + { + mode_.pop (); // As if we saw closing quote. + c.value = '"'; // Keep line/column information. + unget (c); + return name (false); + } + } + bool sep (skip_spaces ()); xchar c (get ()); @@ -19,8 +46,6 @@ namespace build if (eos (c)) return token (token_type::eos, sep, ln, cn); - lexer_mode m (mode_.top ()); - switch (c) { // NOTE: remember to update name() if adding new punctuations. @@ -175,8 +200,25 @@ namespace build break; } case '\'': + case '\"': { - single_quote (lexeme); + // If we are in the variable mode, then treat quotes as just + // another separator. + // + if (m == lexer_mode::variable) + done = true; + else + { + get (); + + if (c == '\'') + single_quote (lexeme); + else + { + mode_.push (lexer_mode::quoted); + done = double_quote (lexeme); + } + } break; } default: @@ -191,11 +233,6 @@ namespace build break; } - // The first character shall not be a separator (we shouldn't have - // been called if that's the case). - // - assert (c.line != ln || c.column != cn); - // Expire variable mode at the end of the name. // if (m == lexer_mode::variable) @@ -204,24 +241,56 @@ namespace build return token (lexeme, sep, ln, cn); } - // Assuming the next character is the opening single quote, scan - // the stream until the closing quote (or eos), accumulating - // characters in between in lexeme. Fail if eos is reached before - // the closing quote. + // Assuming the previous character is the opening single quote, scan + // the stream until the closing quote or eos, accumulating characters + // in between in lexeme. Fail if eos is reached before the closing + // quote. // void lexer:: single_quote (string& lexeme) { - xchar c (get ()); // Opening quote mark. - assert (c == '\''); + xchar c (get ()); - for (c = get (); !eos (c) && c != '\''; c = get ()) + for (; !eos (c) && c != '\''; c = get ()) lexeme += c; if (eos (c)) fail (c) << "unterminated single-quoted sequence"; } + // Assuming the previous character is the opening double quote, scan + // the stream until the closing quote, $, or eos, accumulating + // characters in between in lexeme. Return false if we stopped + // because of the closing quote (which means the normal name + // scanning can continue) and true if we stopped at $ (meaning this + // name is done and what follows is another token). Fail if eos is + // reached before the closing quote. + // + bool lexer:: + double_quote (string& lexeme) + { + xchar c (peek ()); + + for (; !eos (c); c = peek ()) + { + if (c == '$') + return true; + + get (); + + if (c == '"') + { + mode_.pop (); // Expire quoted mode. + return false; + } + + lexeme += c; + } + + fail (c) << "unterminated double-quoted sequence"; + return false; // Never reached. + } + bool lexer:: skip_spaces () { diff --git a/tests/lexer/driver.cxx b/tests/lexer/driver.cxx index fb5efc3..e3543da 100644 --- a/tests/lexer/driver.cxx +++ b/tests/lexer/driver.cxx @@ -63,7 +63,7 @@ main () assert (lex (" foo\\") == tokens ({"<lexer error>"})); - // Quoting. + // Quoting ''. // assert (lex ("''") == tokens ({"", ""})); assert (lex ("'foo'") == tokens ({"foo", ""})); @@ -79,6 +79,28 @@ main () assert (lex ("'foo bar") == tokens ({"<lexer error>"})); + // Quoting "". + // + assert (lex ("\"\"") == tokens ({"", ""})); + assert (lex ("\"foo\"") == tokens ({"foo", ""})); + assert (lex ("\"foo bar\"") == tokens ({"foo bar", ""})); + assert (lex ("\"foo \"bar") == tokens ({"foo bar", ""})); + assert (lex ("foo\" bar\"") == tokens ({"foo bar", ""})); + assert (lex ("\"foo \"\"bar\"") == tokens ({"foo bar", ""})); + assert (lex ("foo\" \"bar") == tokens ({"foo bar", ""})); + assert (lex ("\"foo\nbar\"") == tokens ({"foo\nbar", ""})); + assert (lex ("\"#:{}()=+\n\"") == tokens ({"#:{}()=+\n", ""})); + assert (lex ("\"'\"") == tokens ({"'", ""})); + assert (lex ("\"\\\"") == tokens ({"\\", ""})); + + assert (lex ("\"$\"") == tokens ({"", "$", "", ""})); + assert (lex ("\"foo$bar\"") == tokens ({"foo", "$", "bar", ""})); + assert (lex ("foo\"$\"bar") == tokens ({"foo", "$", "bar", ""})); + assert (lex ("f\"oo$ba\"r") == tokens ({"foo", "$", "bar", ""})); + + assert (lex ("\"foo bar") == tokens ({"<lexer error>"})); + assert (lex ("\"foo $bar") == tokens ({"foo ", "$", "<lexer error>"})); + // Combinations. // assert (lex ("foo: bar") == tokens ({"foo", ":", "bar", ""})); diff --git a/tests/quote/buildfile b/tests/quote/buildfile new file mode 100644 index 0000000..931bc36 --- /dev/null +++ b/tests/quote/buildfile @@ -0,0 +1,18 @@ +print "foo bar" +print "foo +bar" + +foo = "fo o" +bar = " bar " + +print "$foo" +print "$bar" +print "$foo $bar" +print "$foo$bar" + +print "[ $foo ]" +print "[ $bar ]" +print "[ $foo $bar ]" +print "[ $foo/$bar ]" + +./: diff --git a/tests/quote/test.out b/tests/quote/test.out new file mode 100644 index 0000000..802f28f --- /dev/null +++ b/tests/quote/test.out @@ -0,0 +1,11 @@ +foo bar +foo +bar +fo o + bar +fo o bar +fo o bar +[ fo o ] +[ bar ] +[ fo o bar ] +[ fo o/ bar ] diff --git a/tests/quote/test.sh b/tests/quote/test.sh new file mode 100755 index 0000000..145ea6b --- /dev/null +++ b/tests/quote/test.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +valgrind -q b -q | diff test.out - |