From 757f42e7dea94f8b79b3d55074dedeafd853ddc5 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Fri, 25 Nov 2016 15:17:01 +0200 Subject: Implement literal here-document support --- build2/lexer | 10 +++--- build2/lexer.cxx | 10 +++--- build2/test/script/lexer | 10 ++++-- build2/test/script/lexer.cxx | 34 +++++++++++++++--- build2/test/script/parser | 6 ++-- build2/test/script/parser.cxx | 81 ++++++++++++++++++++++++++++--------------- 6 files changed, 105 insertions(+), 46 deletions(-) (limited to 'build2') diff --git a/build2/lexer b/build2/lexer index 59150a9..e2cf07c 100644 --- a/build2/lexer +++ b/build2/lexer @@ -85,7 +85,9 @@ namespace build2 // specifythe pair separator character (if the mode supports pairs). // virtual void - mode (lexer_mode, char pair_separator = '\0'); + mode (lexer_mode, + char pair_separator = '\0', + const char* escapes = nullptr); // Expire the current mode early. // @@ -119,6 +121,8 @@ namespace build2 bool sep_space; // Are whitespaces separators (see skip_spaces())? bool quotes; // Recognize quoted fragments. + const char* escapes; // Effective escape sequences to recognize. + // Word separator characters. For two-character sequence put the first // one in sep_first and the second one in the corresponding position of // sep_second. If it's a single-character sequence, then put space in @@ -170,16 +174,14 @@ namespace build2 : char_scanner (is), fail ("error", &name_), name_ (n), - escapes_ (e), processor_ (p), sep_ (false) { if (sm) - mode (lexer_mode::normal, '@'); + mode (lexer_mode::normal, '@', e); } const path name_; - const char* escapes_; void (*processor_) (token&, const lexer&); std::stack state_; diff --git a/build2/lexer.cxx b/build2/lexer.cxx index b73c291..3c8eb5a 100644 --- a/build2/lexer.cxx +++ b/build2/lexer.cxx @@ -30,7 +30,7 @@ namespace build2 } void lexer:: - mode (lexer_mode m, char ps) + mode (lexer_mode m, char ps, const char* esc) { const char* s1 (nullptr); const char* s2 (nullptr); @@ -76,7 +76,7 @@ namespace build2 default: assert (false); // Unhandled custom mode. } - state_.push (state {m, ps, s, q, s1, s2}); + state_.push (state {m, ps, s, q, esc, s1, s2}); } token lexer:: @@ -329,8 +329,10 @@ namespace build2 get (); xchar p (peek ()); - if (escapes_ == nullptr || - (!eos (p) && strchr (escapes_, p) != nullptr)) + const char* esc (st.escapes); + + if (esc == nullptr || + (*esc != '\0' && !eos (p) && strchr (esc, p) != nullptr)) { get (); diff --git a/build2/test/script/lexer b/build2/test/script/lexer index 5597e9a..b812f84 100644 --- a/build2/test/script/lexer +++ b/build2/test/script/lexer @@ -29,7 +29,8 @@ namespace build2 second_token, // Expires at the end of the token. variable_line, // Expires at the end of the line. command_line, - here_line, + here_line_single, + here_line_double, description_line // Expires at the end of the line. }; @@ -48,10 +49,13 @@ namespace build2 const path& name, lexer_mode m, const char* escapes = nullptr) - : base_lexer (is, name, escapes, nullptr, false) {mode (m);} + : base_lexer (is, name, nullptr, nullptr, false) + { + mode (m, '\0', escapes); + } virtual void - mode (base_mode, char = '\0') override; + mode (base_mode, char = '\0', const char* = nullptr) override; // Number of quoted (double or single) tokens since last reset. // diff --git a/build2/test/script/lexer.cxx b/build2/test/script/lexer.cxx index 19e7498..72fa85b 100644 --- a/build2/test/script/lexer.cxx +++ b/build2/test/script/lexer.cxx @@ -15,7 +15,7 @@ namespace build2 using type = token_type; void lexer:: - mode (base_mode m, char ps) + mode (base_mode m, char ps, const char* esc) { const char* s1 (nullptr); const char* s2 (nullptr); @@ -76,7 +76,23 @@ namespace build2 s = false; break; } - case lexer_mode::here_line: + case lexer_mode::here_line_single: + { + // This one is like a single-quoted string except it treats + // newlines as a separator. We also treat quotes as literals. + // + // Note that it might be tempting to enable line continuation + // escapes. However, we will then have to also enable escaping of + // the backslash, which makes it a lot less tempting. + // + s1 = "\n"; + s2 = " "; + esc = ""; // Disable escape sequences. + s = false; + q = false; + break; + } + case lexer_mode::here_line_double: { // This one is like a double-quoted string except it treats // newlines as a separator. We also treat quotes as literals. @@ -105,13 +121,13 @@ namespace build2 m == lexer_mode::eval || m == lexer_mode::attribute); - base_lexer::mode (m, ps); + base_lexer::mode (m, ps, esc); return; } } assert (ps == '\0'); - state_.push (state {m, ps, s, q, s1, s2}); + state_.push (state {m, ps, s, q, esc, s1, s2}); } token lexer:: @@ -126,7 +142,8 @@ namespace build2 case lexer_mode::second_token: case lexer_mode::variable_line: case lexer_mode::command_line: - case lexer_mode::here_line: + case lexer_mode::here_line_single: + case lexer_mode::here_line_double: r = next_line (); break; case lexer_mode::description_line: @@ -184,7 +201,13 @@ namespace build2 sep = true; // Treat newline as always separated. return make_token (type::newline); } + } + } + if (m != lexer_mode::here_line_single) + { + switch (c) + { // Variable expansion, function call, and evaluation context. // case '$': return make_token (type::dollar); @@ -192,6 +215,7 @@ namespace build2 } } + if (m == lexer_mode::variable_line) { switch (c) diff --git a/build2/test/script/parser b/build2/test/script/parser index fdfbe11..da82df2 100644 --- a/build2/test/script/parser +++ b/build2/test/script/parser @@ -99,10 +99,10 @@ namespace build2 { size_t expr; // Index in command_expr. size_t pipe; // Index in command_pipe. - size_t redir; // Redirect (0 - in, 1 - out, 2 - err). - + int fd; // Redirect fd (0 - in, 1 - out, 2 - err). string end; - bool no_newline; + bool literal; // Literal (single-quote). + bool no_newline; // No final newline. }; using here_docs = vector; diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx index 7655ba9..9e2018f 100644 --- a/build2/test/script/parser.cxx +++ b/build2/test/script/parser.cxx @@ -1261,11 +1261,11 @@ namespace build2 cleanup_type ct; // Pending cleanup type. here_docs hd; // Expected here-documents. - // Add the next word to either one of the pending positions or - // to program arguments by default. + // Add the next word to either one of the pending positions or to + // program arguments by default. // - auto add_word = [&expr, &c, &p, &nn, &app, &ct, &hd, this] - (string&& w, const location& l) + auto add_word = + [&c, &p, &nn, &app, &ct, this] (string&& w, const location& l) { auto add_merge = [&l, this] (redirect& r, const string& w, int fd) { @@ -1290,13 +1290,6 @@ namespace build2 r.str = move (w); }; - auto add_here_end = [&expr, &hd, &nn] (size_t r, string&& w) - { - hd.push_back ( - here_doc { - expr.size () - 1, expr.back ().pipe.size (), r, move (w), nn}); - }; - auto parse_path = [&l, this] (string&& w, const char* what) -> path { try @@ -1335,10 +1328,8 @@ namespace build2 { case pending::none: c.arguments.push_back (move (w)); break; case pending::program: - { c.program = parse_path (move (w), "program path"); break; - } case pending::out_merge: add_merge (c.out, w, 2); break; case pending::err_merge: add_merge (c.err, w, 1); break; @@ -1347,21 +1338,19 @@ namespace build2 case pending::out_string: add_here_str (c.out, move (w)); break; case pending::err_string: add_here_str (c.err, move (w)); break; - case pending::in_document: add_here_end (0, move (w)); break; - case pending::out_document: add_here_end (1, move (w)); break; - case pending::err_document: add_here_end (2, move (w)); break; + // These are handled specially below. + // + case pending::in_document: + case pending::out_document: + case pending::err_document: assert (false); break; case pending::in_file: add_file (c.in, 0, move (w)); break; case pending::out_file: add_file (c.out, 1, move (w)); break; case pending::err_file: add_file (c.err, 2, move (w)); break; case pending::clean: - { - c.cleanups.push_back ( - {ct, parse_path (move (w), "cleanup path")}); - - break; - } + c.cleanups.push_back ({ct, parse_path (move (w), "cleanup path")}); + break; } p = pending::none; @@ -1692,7 +1681,9 @@ namespace build2 fail (t) << "partially-quoted here-document end marker"; } - hd.push_back (here_doc {0, 0, 0, move (t.value), nn}); + hd.push_back ( + here_doc { + 0, 0, 0, move (t.value), qt == quote_type::single, nn}); break; } @@ -1774,6 +1765,40 @@ namespace build2 } default: { + // Here-document end markers are literal (we verified that above + // during pre-parsing) and we need to know whether they were + // quoted. So handle this case specially. + // + { + int fd; + switch (p) + { + case pending::in_document: fd = 0; break; + case pending::out_document: fd = 1; break; + case pending::err_document: fd = 2; break; + default: fd = -1; break; + } + + if (fd != -1) + { + hd.push_back ( + here_doc { + expr.size () - 1, + expr.back ().pipe.size (), + fd, + move (t.value), + (t.qtype == quote_type::unquoted || + t.qtype == quote_type::single), + nn}); + + p = pending::none; + nn = false; + + next (t, tt); + break; + } + } + // Parse the next chunk as simple names to get expansion, etc. // Note that we do it in the chunking mode to detect whether // anything in each chunk is quoted. @@ -2060,10 +2085,12 @@ namespace build2 // for (here_doc& h: p.second) { - // Switch to the here-line mode which is like double-quoted but - // recognized the newline as a separator. + // Switch to the here-line mode which is like single/double-quoted + // string but recognized the newline as a separator. // - mode (lexer_mode::here_line); + mode (h.literal + ? lexer_mode::here_line_single + : lexer_mode::here_line_double); next (t, tt); string v (parse_here_document (t, tt, h.end, h.no_newline)); @@ -2071,7 +2098,7 @@ namespace build2 if (!pre_parse_) { command& c (p.first[h.expr].pipe[h.pipe]); - redirect& r (h.redir == 0 ? c.in : h.redir == 1 ? c.out : c.err); + redirect& r (h.fd == 0 ? c.in : h.fd == 1 ? c.out : c.err); r.doc.doc = move (v); r.doc.end = move (h.end); -- cgit v1.1