From ccca13f8eadef31f2df873cb505ebca98501c45a Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 8 Sep 2015 16:42:24 +0200 Subject: Initial take on double quote support Currently, $(foo)-style variable expansion is not supported. --- build/lexer | 13 +++++--- build/lexer.cxx | 99 ++++++++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 93 insertions(+), 19 deletions(-) (limited to 'build') diff --git a/build/lexer b/build/lexer index 1e253fd..0740f14 100644 --- a/build/lexer +++ b/build/lexer @@ -28,8 +28,9 @@ namespace build // The alternnative modes must be set manually. The value and // pairs modes are automatically reset after the end of the line. // The variable mode is automatically reset after the name token. + // Quoted is an internal mode and should not be explicitly set. // - enum class lexer_mode {normal, variable, value, pairs}; + enum class lexer_mode {normal, quoted, variable, value, pairs}; class lexer: protected butl::char_scanner { @@ -71,7 +72,10 @@ namespace build name (bool separated); void - single_quote (std::string& lexeme); + single_quote (std::string&); + + bool + double_quote (std::string&); // Return true we have seen any spaces. Skipped empty lines don't // count. In other words, we are only interested in spaces that @@ -100,11 +104,12 @@ namespace build private: fail_mark fail; - // Currently, the maximum mode nesting is 3: {normal, value, variable}. + // Currently, the maximum mode nesting is 4: {normal, value, quoted, + // variable}. // struct mode_stack { - static const size_t max_size = 3; + static const size_t max_size = 4; void push (lexer_mode m) {assert (n_ != max_size); d_[n_++] = m;} void pop () {assert (n_ != 0); n_--;} diff --git a/build/lexer.cxx b/build/lexer.cxx index f4733be..9c76377 100644 --- a/build/lexer.cxx +++ b/build/lexer.cxx @@ -11,6 +11,33 @@ namespace build token lexer:: next () { + lexer_mode m (mode_.top ()); + + // If we are in the quoted mode, then this means we have seen a + // variable expansion ($) and had to "break" the quoted sequence + // into multiple "concatenated" tokens. So what we have now is + // the "tail" of that quoted sequence which we need to continue + // scanning. To make this work auto-magically (well, almost) we + // are going to use a little trick: we will "pretend" that the + // next character is the opening quote. After all, a sequence + // like "$foo bar" is semantically equivalent to "$foo"" bar". + // + if (m == lexer_mode::quoted) + { + xchar c (peek ()); + + // Detect the beginning of the "break". After that, we rely + // on the caller switching to the variable mode. + // + if (c != '$') + { + mode_.pop (); // As if we saw closing quote. + c.value = '"'; // Keep line/column information. + unget (c); + return name (false); + } + } + bool sep (skip_spaces ()); xchar c (get ()); @@ -19,8 +46,6 @@ namespace build if (eos (c)) return token (token_type::eos, sep, ln, cn); - lexer_mode m (mode_.top ()); - switch (c) { // NOTE: remember to update name() if adding new punctuations. @@ -175,8 +200,25 @@ namespace build break; } case '\'': + case '\"': { - single_quote (lexeme); + // If we are in the variable mode, then treat quotes as just + // another separator. + // + if (m == lexer_mode::variable) + done = true; + else + { + get (); + + if (c == '\'') + single_quote (lexeme); + else + { + mode_.push (lexer_mode::quoted); + done = double_quote (lexeme); + } + } break; } default: @@ -191,11 +233,6 @@ namespace build break; } - // The first character shall not be a separator (we shouldn't have - // been called if that's the case). - // - assert (c.line != ln || c.column != cn); - // Expire variable mode at the end of the name. // if (m == lexer_mode::variable) @@ -204,24 +241,56 @@ namespace build return token (lexeme, sep, ln, cn); } - // Assuming the next character is the opening single quote, scan - // the stream until the closing quote (or eos), accumulating - // characters in between in lexeme. Fail if eos is reached before - // the closing quote. + // Assuming the previous character is the opening single quote, scan + // the stream until the closing quote or eos, accumulating characters + // in between in lexeme. Fail if eos is reached before the closing + // quote. // void lexer:: single_quote (string& lexeme) { - xchar c (get ()); // Opening quote mark. - assert (c == '\''); + xchar c (get ()); - for (c = get (); !eos (c) && c != '\''; c = get ()) + for (; !eos (c) && c != '\''; c = get ()) lexeme += c; if (eos (c)) fail (c) << "unterminated single-quoted sequence"; } + // Assuming the previous character is the opening double quote, scan + // the stream until the closing quote, $, or eos, accumulating + // characters in between in lexeme. Return false if we stopped + // because of the closing quote (which means the normal name + // scanning can continue) and true if we stopped at $ (meaning this + // name is done and what follows is another token). Fail if eos is + // reached before the closing quote. + // + bool lexer:: + double_quote (string& lexeme) + { + xchar c (peek ()); + + for (; !eos (c); c = peek ()) + { + if (c == '$') + return true; + + get (); + + if (c == '"') + { + mode_.pop (); // Expire quoted mode. + return false; + } + + lexeme += c; + } + + fail (c) << "unterminated double-quoted sequence"; + return false; // Never reached. + } + bool lexer:: skip_spaces () { -- cgit v1.1