From 4b31ef06275ad423e48a75d15fb0ee21c3127e3c Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 29 Nov 2016 15:28:47 +0200 Subject: Fix escaping issue, propagate quoting for $ and ( tokens --- build2/lexer | 6 ++++-- build2/lexer.cxx | 19 +++++++++++++++---- build2/test/script/lexer | 4 +++- build2/test/script/lexer.cxx | 38 +++++++++++++++++++++++--------------- build2/token | 6 ++++++ unit-tests/lexer/quoting.test | 4 ++-- 6 files changed, 53 insertions(+), 24 deletions(-) diff --git a/build2/lexer b/build2/lexer index e2cf07c..ad00e83 100644 --- a/build2/lexer +++ b/build2/lexer @@ -82,12 +82,14 @@ namespace build2 name () const {return name_;} // Note: sets mode for the next token. The second argument can be used to - // specifythe pair separator character (if the mode supports pairs). + // specifythe pair separator character (if the mode supports pairs). If + // escapes not specified, then inherit the current mode's (thought a mode + // can also override it). // virtual void mode (lexer_mode, char pair_separator = '\0', - const char* escapes = nullptr); + optional escapes = nullopt); // Expire the current mode early. // diff --git a/build2/lexer.cxx b/build2/lexer.cxx index 3c8eb5a..8918740 100644 --- a/build2/lexer.cxx +++ b/build2/lexer.cxx @@ -30,13 +30,19 @@ namespace build2 } void lexer:: - mode (lexer_mode m, char ps, const char* esc) + mode (lexer_mode m, char ps, optional esc) { const char* s1 (nullptr); const char* s2 (nullptr); bool s (true); bool q (true); + if (!esc) + { + assert (!state_.empty ()); + esc = state_.top ().escapes; + } + switch (m) { case lexer_mode::normal: @@ -76,7 +82,7 @@ namespace build2 default: assert (false); // Unhandled custom mode. } - state_.push (state {m, ps, s, q, esc, s1, s2}); + state_.push (state {m, ps, s, q, *esc, s1, s2}); } token lexer:: @@ -272,10 +278,15 @@ namespace build2 uint64_t ln (c.line), cn (c.column); + auto make_token = [ln, cn] (type t) + { + return token (t, false, quote_type::double_, ln, cn, token_printer); + }; + switch (c) { - case '$': return token (type::dollar, false, ln, cn, token_printer); - case '(': return token (type::lparen, false, ln, cn, token_printer); + case '$': return make_token (type::dollar); + case '(': return make_token (type::lparen); } // Otherwise it is a word. diff --git a/build2/test/script/lexer b/build2/test/script/lexer index b812f84..a6a96ca 100644 --- a/build2/test/script/lexer +++ b/build2/test/script/lexer @@ -55,7 +55,9 @@ namespace build2 } virtual void - mode (base_mode, char = '\0', const char* = nullptr) override; + mode (base_mode, + char = '\0', + optional = nullopt) override; // Number of quoted (double or single) tokens since last reset. // diff --git a/build2/test/script/lexer.cxx b/build2/test/script/lexer.cxx index cdf726b..74aa02e 100644 --- a/build2/test/script/lexer.cxx +++ b/build2/test/script/lexer.cxx @@ -17,13 +17,19 @@ namespace build2 using type = token_type; void lexer:: - mode (base_mode m, char ps, const char* esc) + mode (base_mode m, char ps, optional esc) { const char* s1 (nullptr); const char* s2 (nullptr); bool s (true); bool q (true); + if (!esc) + { + assert (!state_.empty ()); + esc = state_.top ().escapes; + } + switch (m) { case lexer_mode::script_line: @@ -129,7 +135,7 @@ namespace build2 } assert (ps == '\0'); - state_.push (state {m, ps, s, q, esc, s1, s2}); + state_.push (state {m, ps, s, q, *esc, s1, s2}); } token lexer:: @@ -170,16 +176,24 @@ namespace build2 xchar c (get ()); uint64_t ln (c.line), cn (c.column); - auto make_token = [&sep, ln, cn] (type t) + if (eos (c)) + return token (type::eos, sep, ln, cn, token_printer); + + state st (state_.top ()); // Make copy (see first/second_token). + lexer_mode m (st.mode); + + auto make_token = [&sep, &m, ln, cn] (type t, string v = string ()) { - return token (t, sep, ln, cn, token_printer); - }; + bool q (m == lexer_mode::here_line_double); - if (eos (c)) - return make_token (type::eos); + return token (t, move (v), sep, + (q ? quote_type::double_ : quote_type::unquoted), q, + ln, cn, + token_printer); + }; auto make_token_with_modifiers = - [&sep, ln, cn, this] (type t, const char* mods, bool exc = false) + [&make_token, this] (type t, const char* mods, bool exc = false) { string v; if (mods != nullptr) @@ -199,15 +213,9 @@ namespace build2 } } - return token (t, move (v), sep, - quote_type::unquoted, false, - ln, cn, - token_printer); + return make_token (t, move (v)); }; - state st (state_.top ()); // Make copy (see first/second_token). - lexer_mode m (st.mode); - // Expire certain modes at the end of the token. Do it early in case // we push any new mode (e.g., double quote). // diff --git a/build2/token b/build2/token index 0dc914f..3d580b9 100644 --- a/build2/token +++ b/build2/token @@ -98,6 +98,12 @@ namespace build2 token (token_type t, bool s, uint64_t l, uint64_t c, printer_type* p) : token (t, string (), s, quote_type::unquoted, false, l, c, p) {} + token (token_type t, bool s, + quote_type qt, + uint64_t l, uint64_t c, + printer_type* p) + : token (t, string (), s, qt, qt != quote_type::unquoted, l, c, p) {} + token (string v, bool s, quote_type qt, bool qc, uint64_t l, uint64_t c) diff --git a/unit-tests/lexer/quoting.test b/unit-tests/lexer/quoting.test index aab02c3..efe5e05 100644 --- a/unit-tests/lexer/quoting.test +++ b/unit-tests/lexer/quoting.test @@ -48,7 +48,7 @@ EOO : $* <'"$foo"' >>EOO '' [D/P] -$ +$ [D/C] 'foo' [D/P] EOO @@ -58,7 +58,7 @@ EOO : $* <'"foo$"' >>EOO 'foo' [D/P] -$ +$ [D/C] '' [D/P] EOO -- cgit v1.1