diff options
Diffstat (limited to 'libbuild2/lexer.cxx')
-rw-r--r-- | libbuild2/lexer.cxx | 177 |
1 files changed, 147 insertions, 30 deletions
diff --git a/libbuild2/lexer.cxx b/libbuild2/lexer.cxx index 992e5d1..04c15be 100644 --- a/libbuild2/lexer.cxx +++ b/libbuild2/lexer.cxx @@ -42,6 +42,22 @@ namespace build2 return make_pair (make_pair (r[0], r[1]), sep_); } + pair<char, bool> lexer:: + peek_char () + { + auto p (skip_spaces ()); + assert (!p.second); + sep_ = p.first; + + char r ('\0'); + + xchar c (peek ()); + if (!eos (c)) + r = c; + + return make_pair (r, sep_); + } + void lexer:: mode (lexer_mode m, char ps, optional<const char*> esc, uintptr_t data) { @@ -144,13 +160,15 @@ namespace build2 break; } case lexer_mode::foreign: - assert (data > 1); - // Fall through. + { + assert (ps == '\0' && data > 1); + s = false; + break; + } case lexer_mode::single_quoted: case lexer_mode::double_quoted: { - assert (ps == '\0'); - s = false; + assert (false); // Can only be set manually in word(). break; } case lexer_mode::variable: @@ -162,8 +180,49 @@ namespace build2 default: assert (false); // Unhandled custom mode. } - state_.push ( - state {m, data, nullopt, lsb, false, ps, s, n, q, *esc, s1, s2}); + mode_impl (state {m, data, nullopt, lsb, false, ps, s, n, q, *esc, s1, s2}); + } + + void lexer:: + mode_impl (state&& s) + { + // If we are in the double-quoted mode then, unless the new mode is eval + // or variable, delay the state switch until the current mode is expired. + // Note that we delay by injecting the new state beneath the current + // state. + // + if (!state_.empty () && + state_.top ().mode == lexer_mode::double_quoted && + s.mode != lexer_mode::eval && + s.mode != lexer_mode::variable) + { + state qs (move (state_.top ())); // Save quoted state. + state_.top () = move (s); // Overwrite quoted state with new state. + state_.push (move (qs)); // Restore quoted state. + } + else + state_.push (move (s)); + } + + void lexer:: + expire_mode () + { + // If we are in the double-quoted mode, then delay the state expiration + // until the current mode is expired. Note that we delay by overwriting + // the being expired state with the current state. + // + assert (!state_.empty () && + (state_.top ().mode != lexer_mode::double_quoted || + state_.size () > 1)); + + if (state_.top ().mode == lexer_mode::double_quoted) + { + state qs (move (state_.top ())); // Save quoted state. + state_.pop (); // Pop quoted state. + state_.top () = move (qs); // Expire state, restoring quoted state. + } + else + state_.pop (); } token lexer:: @@ -654,9 +713,9 @@ namespace build2 } token lexer:: - word (state st, bool sep) + word (const state& rst, bool sep) { - lexer_mode m (st.mode); + lexer_mode m (rst.mode); xchar c (peek ()); assert (!eos (c)); @@ -687,22 +746,66 @@ namespace build2 lexeme += c; }; - for (; !eos (c); c = peek ()) + const state* st (&rst); + for (bool first (true); !eos (c); first = false, c = peek ()) { // First handle escape sequences. // if (c == '\\') { - // In the variable mode we treat the beginning of the escape sequence - // as a separator (think \"$foo\"). + // In the variable mode we treat immediate `\` as the escape sequence + // literal and any following as a separator (think \"$foo\"). // if (m == lexer_mode::variable) - break; + { + if (!first) + break; + + get (); + c = get (); + + if (eos (c)) + fail (c) << "unterminated escape sequence"; + + // For now we only support all the simple C/C++ escape sequences + // plus \0 (which in C/C++ is an octal escape sequence). + // + // In the future we may decide to support more elaborate sequences + // such as \xNN, \uNNNN, etc. + // + // Note: we return it in the literal form instead of translating for + // easier printing. + // + switch (c) + { + case '\'': + case '"': + case '?': + case '\\': + case '0': + case 'a': + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + case 'v': lexeme = c; break; + default: + fail (c) << "unknown escape sequence \\" << c; + } + + state_.pop (); + return token (type::escape, + move (lexeme), + sep, + qtype, qcomp, qfirst, + ln, cn); + } get (); xchar p (peek ()); - const char* esc (st.escapes); + const char* esc (st->escapes); if (esc == nullptr || (*esc != '\0' && !eos (p) && strchr (esc, p) != nullptr)) @@ -718,7 +821,7 @@ namespace build2 continue; } else - unget (c); // Treat as a normal character. + unget (c); // Fall through to treat as a normal character. } bool done (false); @@ -747,8 +850,8 @@ namespace build2 get (); state_.pop (); - st = state_.top (); - m = st.mode; + st = &state_.top (); + m = st->mode; continue; } } @@ -757,19 +860,17 @@ namespace build2 // else if (m == lexer_mode::variable) { - bool first (lexeme.empty ()); - // Handle special variable names, if any. // - if (first && - st.data != 0 && - strchr (reinterpret_cast<const char*> (st.data), c) != nullptr) + if (first && + st->data != 0 && + strchr (reinterpret_cast<const char*> (st->data), c) != nullptr) { get (); lexeme += c; done = true; } - else if (c != '_' && !(first ? alpha (c) : alnum (c))) + else if (c != '_' && !(lexeme.empty () ? alpha (c) : alnum (c))) { if (c != '.') done = true; @@ -789,17 +890,17 @@ namespace build2 { // First check if it's a pair separator. // - if (c == st.sep_pair) + if (c == st->sep_pair) done = true; else { // Then see if this character or character sequence is a separator. // - for (const char* p (strchr (st.sep_first, c)); + for (const char* p (strchr (st->sep_first, c)); p != nullptr; p = done ? nullptr : strchr (p + 1, c)) { - char s (st.sep_second[p - st.sep_first]); + char s (st->sep_second[p - st->sep_first]); // See if it has a second. // @@ -817,8 +918,21 @@ namespace build2 // Handle single and double quotes if enabled for this mode and unless // they were considered separators. // - if (st.quotes && !done) + if (st->quotes && !done) { + auto quoted_mode = [this] (lexer_mode m) + { + // In the double-quoted mode we only do effective escaping of the + // special `$("\` characters, line continuations, plus `)` for + // symmetry. Nothing can be escaped in single-quoted. + // + const char* esc (m == lexer_mode::double_quoted ? "$()\"\\\n" : ""); + + state_.push (state { + m, 0, nullopt, false, false, '\0', false, true, true, + esc, nullptr, nullptr}); + }; + switch (c) { case '\'': @@ -826,7 +940,7 @@ namespace build2 // Enter the single-quoted mode in case the derived lexer needs // to notice this. // - mode (lexer_mode::single_quoted); + quoted_mode (lexer_mode::single_quoted); switch (qtype) { @@ -865,9 +979,10 @@ namespace build2 { get (); - mode (lexer_mode::double_quoted); - st = state_.top (); - m = st.mode; + quoted_mode (lexer_mode::double_quoted); + + st = &state_.top (); + m = st->mode; switch (qtype) { @@ -1023,6 +1138,8 @@ namespace build2 } case '\\': { + // See if this is line continuation. + // get (); if (peek () == '\n') |