From 84cc0fc42c6b86eb09b06c7f59a0beb94397a38a Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Wed, 20 May 2020 23:05:10 +0300 Subject: Complete dump(ostream,script::lines) --- libbuild2/build/script/parser+line.test.testscript | 76 ++++++---- libbuild2/build/script/parser.test.cxx | 98 ++++++++++--- libbuild2/lexer+quoting.test.testscript | 2 + libbuild2/script/script.cxx | 162 ++++++++++++++++----- libbuild2/script/script.hxx | 6 + 5 files changed, 262 insertions(+), 82 deletions(-) diff --git a/libbuild2/build/script/parser+line.test.testscript b/libbuild2/build/script/parser+line.test.testscript index fe38249..6401d91 100644 --- a/libbuild2/build/script/parser+line.test.testscript +++ b/libbuild2/build/script/parser+line.test.testscript @@ -3,46 +3,70 @@ test.options += -d -#\ +: command +: +$* <>EOF + foo >| 2>- &a &?b + foo >=c 2>~/error:.*/ &!c + foo >>:/~%EOS% + %.* + abc + %xyz.*% + EOS + EOF + : if-else : -$* <| +$* <>EOF if foo bar elif fox - baz + if fix + baz + end + biz end if! foo bar elif! fox baz end - EOI - -: command -: -$* <| - foo >| 2>- &a &?b - foo >=c 2>~/error:.*/ &!c - foo >>:/~%EOF% - %.* - abc - %xyz.*% EOF - EOI : quoting : -$* <| +$* <>EOO foo 'bar' "baz" '' "" + "$foo" + "foo$" + "fo"o + "foo"\" + "foo\\" + "foo\"<" + fo\"o + fo\\o + fo\ #include +#include // line #include #include @@ -71,6 +72,7 @@ namespace build2 // // argv[0] [-l] // argv[0] -d + // argv[0] -p // // In the first form read the script from stdin and trace the script // execution to stdout using the custom print runner. @@ -78,44 +80,63 @@ namespace build2 // In the second form read the script from stdin, parse it and dump the // resulting lines to stdout. // + // In the third form read the script from stdin, parse it and print + // line tokens quoting information to stdout. + // // -l // Print the script line number for each executed expression. // // -d // Dump the parsed script to sdout. // + // -p + // Print the parsed script tokens quoting information to sdout. If a + // token is quoted follow its representation with its quoting + // information in the [/] form, where: + // + // := 'S' | 'D' | 'M' + // := 'C' | 'P' + // int main (int argc, char* argv[]) { tracer trace ("main"); - // Fake build system driver, default verbosity. - // - init_diag (1); - init (nullptr, argv[0]); - - // Serial execution. - // - scheduler sched (1); - global_mutexes mutexes (1); - context ctx (sched, mutexes); + enum class mode + { + run, + dump, + print + } m (mode::run); - bool line (false); - bool dump (false); + bool print_line (false); for (int i (1); i != argc; ++i) { string a (argv[i]); if (a == "-l") - line = true; + print_line = true; else if (a == "-d") - dump = true; + m = mode::dump; + else if (a == "-p") + m = mode::print; else assert (false); } - assert (!dump || !line); + assert (m == mode::run || !print_line); + + // Fake build system driver, default verbosity. + // + init_diag (1); + init (nullptr, argv[0]); + + // Serial execution. + // + scheduler sched (1); + global_mutexes mutexes (1); + context ctx (sched, mutexes); try { @@ -141,14 +162,49 @@ namespace build2 path_name nm ("buildfile"); script s (p.pre_parse (cin, nm, 11 /* line */)); - if (!dump) + switch (m) { - environment e (tt); - print_runner r (line); - p.execute (s, e, r); + case mode::run: + { + environment e (tt); + print_runner r (print_line); + p.execute (s, e, r); + break; + } + case mode::dump: + { + dump (cout, "", s.lines); + break; + } + case mode::print: + { + for (const line& l: s.lines) + { + for (const replay_token& rt: l.tokens) + { + if (&rt != &l.tokens[0]) + cout << ' '; + + const token& t (rt.token); + cout << t; + + char q ('\0'); + switch (t.qtype) + { + case quote_type::single: q = 'S'; break; + case quote_type::double_: q = 'D'; break; + case quote_type::mixed: q = 'M'; break; + case quote_type::unquoted: break; + } + + if (q != '\0') + cout << " [" << q << (t.qcomp ? "/C" : "/P") << ']'; + } + } + + cout << endl; + } } - else - build2::script::dump (cout, "", s.lines); } catch (const failed&) { diff --git a/libbuild2/lexer+quoting.test.testscript b/libbuild2/lexer+quoting.test.testscript index debefc1..0143c90 100644 --- a/libbuild2/lexer+quoting.test.testscript +++ b/libbuild2/lexer+quoting.test.testscript @@ -47,8 +47,10 @@ EOO } : part +: { : quoted + : { : start : Token start already quoted diff --git a/libbuild2/script/script.cxx b/libbuild2/script/script.cxx index 2529671..a93315f 100644 --- a/libbuild2/script/script.cxx +++ b/libbuild2/script/script.cxx @@ -4,6 +4,7 @@ #include #include +#include // strchr() using namespace std; @@ -34,82 +35,173 @@ namespace build2 void dump (ostream& os, const string& ind, const lines& ls) { + // For each line print its tokens literal representation trying to + // reproduce the quoting. Consider mixed quoting as double quoting + // since the information is lost. + // + // Also additionally indent the if-branch lines. + // + string if_ind; + for (const line& l: ls) { - os << ind; + // Before printing indentation, decrease it if the else or end line is + // reached. + // + switch (l.type) + { + case line_type::cmd_elif: + case line_type::cmd_elifn: + case line_type::cmd_else: + case line_type::cmd_end: + { + size_t n (if_ind.size ()); + assert (n >= 2); + if_ind.resize (n - 2); + break; + } + default: break; + } - // @@ Should be across lines? + // Print indentations. // - // We will consider mixed quoting as a double quoting since the - // information is lost and we won't be able to restore the token - // original representation. + os << ind << if_ind; + + // After printing indentation, increase it for if/else branch. // - char qseq ('\0'); // Can be used as bool. + switch (l.type) + { + case line_type::cmd_if: + case line_type::cmd_ifn: + case line_type::cmd_elif: + case line_type::cmd_elifn: + case line_type::cmd_else: if_ind += " "; break; + default: break; + } + + // '"' or '\'' if we are inside the quoted token sequence and '\0' + // otherwise. Thus, can be used as bool. + // + char qseq ('\0'); for (const replay_token& rt: l.tokens) { const token& t (rt.token); - // Left and right quotes (can be used as bool). + // '"' or '\'' if the token is quoted and '\0' otherwise. Thus, + // can be used as bool. + // + char qtok ('\0'); + + switch (t.qtype) + { + case quote_type::unquoted: qtok = '\0'; break; + case quote_type::single: qtok = '\''; break; + case quote_type::mixed: + case quote_type::double_: qtok = '"'; break; + } + + // If being inside a quoted token sequence we have reached a token + // quoted differently or the newline, then we probably made a + // mistake misinterpreting some previous partially quoted token, for + // example f"oo" as "foo. If that's the case, all we can do is to + // end the sequence adding the trailing quote. + // + // Note that a token inside the quoted sequence may well be + // unquoted, so for example "$foo" is lexed as: + // + // token quoting complete notes + // '' " no + // $ " yes + // 'foo' Unquoted since lexed in variable mode. + // '' " no + // \n + // + if (qseq && + ((qtok && qtok != qseq) || t.type == token_type::newline)) + { + os << qseq; + qseq = '\0'; + } + + // Left and right token quotes (can be used as bool). // char lq ('\0'); char rq ('\0'); - if (t.qtype != quote_type::unquoted) + // If the token is quoted, then determine if/which quotes should be + // present on its sides and track the quoted token sequence. + // + if (qtok) { - auto quote = [&t] () + if (t.qcomp) // Complete token quoting. { - return t.qtype == quote_type::single ? '\'' : '"'; - }; - - if (t.qcomp) // Complete quoting. - { - // If we are inside quoted token sequence then we do noting. - // Otherwise we just quote the current token not starting a + // If we are inside a quoted token sequence then do noting. + // Otherwise just quote the current token not starting a // sequence. // if (!qseq) { - lq = quote (); - rq = lq; + lq = qtok; + rq = qtok; } } - else // Partial quoting. + else // Partial token quoting. { // Note that we can not always reproduce the original tokens - // representation for partial quoting. For example, the - // following two tokens are lexed into the identical token - // objects: + // representation for partial quoting. For example, the two + // following tokens are lexed into the identical token objects: // // "foo // f"oo" // - if (!qseq) + // We will always assume that the partially quoted token either + // starts or ends the quoted token sequence. Sometimes this ends + // up unexpectedly, but seems there is not much we can do: + // + // f"oo" "ba"r -> "foo bar" + // + if (!qseq) // Start quoted sequence. { - lq = quote (); - qseq = lq; + lq = qtok; + qseq = qtok; } - else + else // End quoted sequence. { - rq = quote (); + rq = qtok; qseq = '\0'; } } } - // @@ Add 2 spaces indentation for if block contents. - + // Print the space character prior to the separated token, unless + // it is a first like token or the newline. + // if (t.separated && t.type != token_type::newline && - &rt != &l.tokens[0]) // Not first in the line. + &rt != &l.tokens[0]) os << ' '; - if (lq) os << lq; - t.printer (os, t, print_mode::raw); - if (rq) os << rq; + if (lq) os << lq; // Print the left quote, if required. + + // Escape the special characters, unless the token in not a word or + // is single-quoted. Note that the special character set depends on + // whether the word is double-quoted or unquoted. + // + if (t.type == token_type::word && qtok != '\'') + { + for (char c: t.value) + { + if (strchr (qtok ? "\\\"" : "|&<>=\\\"", c) != nullptr) + os << '\\'; + + os << c; + } + } + else + t.printer (os, t, print_mode::raw); -// prev_qcomp = t.qcomp; -// prev_qtype = t.qtype; + if (rq) os << rq; // Print the right quote, if required. } } } diff --git a/libbuild2/script/script.hxx b/libbuild2/script/script.hxx index 7d3fdd0..120cd1d 100644 --- a/libbuild2/script/script.hxx +++ b/libbuild2/script/script.hxx @@ -48,6 +48,12 @@ namespace build2 // using lines = small_vector; + // Print the script lines, trying to reproduce their original (non- + // expanded) representation. + // + // Note that the exact spacing and partial quoting may not be restored due + // to the information loss. + // LIBBUILD2_SYMEXPORT void dump (ostream&, const string& ind, const lines&); -- cgit v1.1