diff options
author | Boris Kolpackov <boris@codesynthesis.com> | 2015-09-09 10:20:52 +0200 |
---|---|---|
committer | Boris Kolpackov <boris@codesynthesis.com> | 2015-09-09 10:20:52 +0200 |
commit | e3b6dc455ab5c98606e38983bd19426ae346f469 (patch) | |
tree | 62f145eac81c7c6f955ca9e63df17aa07c392c11 | |
parent | ccca13f8eadef31f2df873cb505ebca98501c45a (diff) |
Reimplement double quote lexing to avoid "implied quote" trick
-rw-r--r-- | build/lexer | 9 | ||||
-rw-r--r-- | build/lexer.cxx | 191 | ||||
-rw-r--r-- | tests/lexer/driver.cxx | 1 | ||||
-rw-r--r-- | tests/quote/buildfile | 4 | ||||
-rw-r--r-- | tests/quote/test.out | 3 | ||||
-rwxr-xr-x | tests/quote/test.sh | 2 |
6 files changed, 105 insertions, 105 deletions
diff --git a/build/lexer b/build/lexer index 0740f14..37c7807 100644 --- a/build/lexer +++ b/build/lexer @@ -69,13 +69,10 @@ namespace build private: token - name (bool separated); - - void - single_quote (std::string&); + next_quoted (); - bool - double_quote (std::string&); + token + name (bool separated); // Return true we have seen any spaces. Skipped empty lines don't // count. In other words, we are only interested in spaces that diff --git a/build/lexer.cxx b/build/lexer.cxx index 9c76377..6da18eb 100644 --- a/build/lexer.cxx +++ b/build/lexer.cxx @@ -13,29 +13,12 @@ namespace build { lexer_mode m (mode_.top ()); - // If we are in the quoted mode, then this means we have seen a - // variable expansion ($) and had to "break" the quoted sequence - // into multiple "concatenated" tokens. So what we have now is - // the "tail" of that quoted sequence which we need to continue - // scanning. To make this work auto-magically (well, almost) we - // are going to use a little trick: we will "pretend" that the - // next character is the opening quote. After all, a sequence - // like "$foo bar" is semantically equivalent to "$foo"" bar". + // For some modes we have dedicated imlementations of next(). // - if (m == lexer_mode::quoted) + switch (m) { - xchar c (peek ()); - - // Detect the beginning of the "break". After that, we rely - // on the caller switching to the variable mode. - // - if (c != '$') - { - mode_.pop (); // As if we saw closing quote. - c.value = '"'; // Keep line/column information. - unget (c); - return name (false); - } + case lexer_mode::quoted: return next_quoted (); + default: break; } bool sep (skip_spaces ()); @@ -120,6 +103,23 @@ namespace build } token lexer:: + next_quoted () + { + xchar c (peek ()); + + if (eos (c)) + fail (c) << "unterminated double-quoted sequence"; + + uint64_t ln (c.line), cn (c.column); + + switch (c) + { + case '$': get (); return token (token_type::dollar, false, ln, cn); + default: return name (false); + } + } + + token lexer:: name (bool sep) { xchar c (peek ()); @@ -140,9 +140,11 @@ namespace build break; // The following characters are not treated as special in the - // value or pairs mode. + // value/pairs and quoted modes. // - if (m != lexer_mode::value && m != lexer_mode::pairs) + if (m != lexer_mode::value && + m != lexer_mode::pairs && + m != lexer_mode::quoted) { switch (c) { @@ -178,61 +180,104 @@ namespace build break; } + // If we are quoted, these are ordinary characters. + // + if (m != lexer_mode::quoted) + { + switch (c) + { + case ' ': + case '\t': + case '\n': + case '#': + case '{': + case '}': + case '(': + case ')': + { + done = true; + break; + } + case '\\': + { + get (); + lexeme += escape (); + continue; + } + case '\'': + { + // If we are in the variable mode, then treat quote as just + // another separator. + // + if (m == lexer_mode::variable) + { + done = true; + break; + } + else + { + get (); + + for (c = get (); !eos (c) && c != '\''; c = get ()) + lexeme += c; + + if (eos (c)) + fail (c) << "unterminated single-quoted sequence"; + + continue; + } + } + } + + if (done) + break; + } + switch (c) { - case ' ': - case '\t': - case '\n': - case '#': - case '{': - case '}': case '$': - case '(': - case ')': { done = true; break; } - case '\\': - { - get (); - lexeme += escape (); - break; - } - case '\'': case '\"': { - // If we are in the variable mode, then treat quotes as just + // If we are in the variable mode, then treat quote as just // another separator. // if (m == lexer_mode::variable) + { done = true; + break; + } else { get (); - if (c == '\'') - single_quote (lexeme); + if (m == lexer_mode::quoted) + mode_.pop (); else - { mode_.push (lexer_mode::quoted); - done = double_quote (lexeme); - } + + m = mode_.top (); + continue; } - break; } default: { get (); lexeme += c; - break; + continue; } } - if (done) - break; + assert (done); + break; } + if (m == lexer_mode::quoted && eos (c)) + fail (c) << "unterminated double-quoted sequence"; + // Expire variable mode at the end of the name. // if (m == lexer_mode::variable) @@ -241,56 +286,6 @@ namespace build return token (lexeme, sep, ln, cn); } - // Assuming the previous character is the opening single quote, scan - // the stream until the closing quote or eos, accumulating characters - // in between in lexeme. Fail if eos is reached before the closing - // quote. - // - void lexer:: - single_quote (string& lexeme) - { - xchar c (get ()); - - for (; !eos (c) && c != '\''; c = get ()) - lexeme += c; - - if (eos (c)) - fail (c) << "unterminated single-quoted sequence"; - } - - // Assuming the previous character is the opening double quote, scan - // the stream until the closing quote, $, or eos, accumulating - // characters in between in lexeme. Return false if we stopped - // because of the closing quote (which means the normal name - // scanning can continue) and true if we stopped at $ (meaning this - // name is done and what follows is another token). Fail if eos is - // reached before the closing quote. - // - bool lexer:: - double_quote (string& lexeme) - { - xchar c (peek ()); - - for (; !eos (c); c = peek ()) - { - if (c == '$') - return true; - - get (); - - if (c == '"') - { - mode_.pop (); // Expire quoted mode. - return false; - } - - lexeme += c; - } - - fail (c) << "unterminated double-quoted sequence"; - return false; // Never reached. - } - bool lexer:: skip_spaces () { diff --git a/tests/lexer/driver.cxx b/tests/lexer/driver.cxx index e3543da..a3819f5 100644 --- a/tests/lexer/driver.cxx +++ b/tests/lexer/driver.cxx @@ -99,6 +99,7 @@ main () assert (lex ("f\"oo$ba\"r") == tokens ({"foo", "$", "bar", ""})); assert (lex ("\"foo bar") == tokens ({"<lexer error>"})); + assert (lex ("\"foo $") == tokens ({"foo ", "$", "<lexer error>"})); assert (lex ("\"foo $bar") == tokens ({"foo ", "$", "<lexer error>"})); // Combinations. diff --git a/tests/quote/buildfile b/tests/quote/buildfile index 931bc36..6dd22b4 100644 --- a/tests/quote/buildfile +++ b/tests/quote/buildfile @@ -15,4 +15,8 @@ print "[ $bar ]" print "[ $foo $bar ]" print "[ $foo/$bar ]" +print $foo'bar' +print $foo"$bar" +print "$foo"bar + ./: diff --git a/tests/quote/test.out b/tests/quote/test.out index 802f28f..216b1c8 100644 --- a/tests/quote/test.out +++ b/tests/quote/test.out @@ -9,3 +9,6 @@ fo o bar [ bar ] [ fo o bar ] [ fo o/ bar ] +fo obar +fo o bar +fo obar diff --git a/tests/quote/test.sh b/tests/quote/test.sh index 145ea6b..b898b3c 100755 --- a/tests/quote/test.sh +++ b/tests/quote/test.sh @@ -1,3 +1,3 @@ #!/bin/sh -valgrind -q b -q | diff test.out - +valgrind -q b -q | diff -u test.out - |