aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2020-05-20 23:05:10 +0300
committerBoris Kolpackov <boris@codesynthesis.com>2020-05-27 08:38:57 +0200
commit84cc0fc42c6b86eb09b06c7f59a0beb94397a38a (patch)
treec16c2bcbb6dff26101b700bd3b338eed29512027
parent7765f51c5f6429fcb45a6c195b71929c392b0a31 (diff)
Complete dump(ostream,script::lines)
-rw-r--r--libbuild2/build/script/parser+line.test.testscript76
-rw-r--r--libbuild2/build/script/parser.test.cxx98
-rw-r--r--libbuild2/lexer+quoting.test.testscript2
-rw-r--r--libbuild2/script/script.cxx162
-rw-r--r--libbuild2/script/script.hxx6
5 files changed, 262 insertions, 82 deletions
diff --git a/libbuild2/build/script/parser+line.test.testscript b/libbuild2/build/script/parser+line.test.testscript
index fe38249..6401d91 100644
--- a/libbuild2/build/script/parser+line.test.testscript
+++ b/libbuild2/build/script/parser+line.test.testscript
@@ -3,46 +3,70 @@
test.options += -d
-#\
+: command
+:
+$* <<EOF >>EOF
+ foo >| 2>- &a &?b
+ foo >=c 2>~/error:.*/ &!c
+ foo >>:/~%EOS%
+ %.*
+ abc
+ %xyz.*%
+ EOS
+ EOF
+
: if-else
:
-$* <<EOI >|
+$* <<EOF >>EOF
if foo
bar
elif fox
- baz
+ if fix
+ baz
+ end
+ biz
end
if! foo
bar
elif! fox
baz
end
- EOI
-
-: command
-:
-$* <<EOI >|
- foo >| 2>- &a &?b
- foo >=c 2>~/error:.*/ &!c
- foo >>:/~%EOF%
- %.*
- abc
- %xyz.*%
EOF
- EOI
: quoting
:
-$* <<EOI >|
+$* <<EOI >>EOO
foo 'bar' "baz" '' ""
+ "$foo"
+ "foo$"
+ "fo"o
+ "foo"\"
+ "foo\\"
+ "foo\"<"
+ fo\"o
+ fo\\o
+ fo\<o
+ "fo<o"
+ 'fo\"o'
+ f"oo" "ba"r
+ f"oo" 'ba'r
+ "fo"'o'
+ 'foo b"ar baz'
EOI
-#\
-
-#\
- libbuild2/lexer+quoting.test.testscript
-
- : tmp
- :
- $* <'f"oo" "foo$"'
-
-#\
+ foo 'bar' "baz" '' ""
+ "$foo"
+ "foo$"
+ "foo"
+ "foo\""
+ "foo\\"
+ "foo\"<"
+ fo\"o
+ fo\\o
+ fo\<o
+ "fo<o"
+ 'fo\"o'
+ "foo bar"
+ "foo" 'bar'
+ "foo"
+ 'foo b"ar baz'
+ EOO
diff --git a/libbuild2/build/script/parser.test.cxx b/libbuild2/build/script/parser.test.cxx
index ab9935d..b179884 100644
--- a/libbuild2/build/script/parser.test.cxx
+++ b/libbuild2/build/script/parser.test.cxx
@@ -11,6 +11,7 @@
#include <libbuild2/context.hxx>
#include <libbuild2/scheduler.hxx>
+#include <libbuild2/build/script/script.hxx> // line
#include <libbuild2/build/script/parser.hxx>
#include <libbuild2/build/script/runner.hxx>
@@ -71,6 +72,7 @@ namespace build2
//
// argv[0] [-l]
// argv[0] -d
+ // argv[0] -p
//
// In the first form read the script from stdin and trace the script
// execution to stdout using the custom print runner.
@@ -78,44 +80,63 @@ namespace build2
// In the second form read the script from stdin, parse it and dump the
// resulting lines to stdout.
//
+ // In the third form read the script from stdin, parse it and print
+ // line tokens quoting information to stdout.
+ //
// -l
// Print the script line number for each executed expression.
//
// -d
// Dump the parsed script to sdout.
//
+ // -p
+ // Print the parsed script tokens quoting information to sdout. If a
+ // token is quoted follow its representation with its quoting
+ // information in the [<quoting>/<completeness>] form, where:
+ //
+ // <quoting> := 'S' | 'D' | 'M'
+ // <completeness> := 'C' | 'P'
+ //
int
main (int argc, char* argv[])
{
tracer trace ("main");
- // Fake build system driver, default verbosity.
- //
- init_diag (1);
- init (nullptr, argv[0]);
-
- // Serial execution.
- //
- scheduler sched (1);
- global_mutexes mutexes (1);
- context ctx (sched, mutexes);
+ enum class mode
+ {
+ run,
+ dump,
+ print
+ } m (mode::run);
- bool line (false);
- bool dump (false);
+ bool print_line (false);
for (int i (1); i != argc; ++i)
{
string a (argv[i]);
if (a == "-l")
- line = true;
+ print_line = true;
else if (a == "-d")
- dump = true;
+ m = mode::dump;
+ else if (a == "-p")
+ m = mode::print;
else
assert (false);
}
- assert (!dump || !line);
+ assert (m == mode::run || !print_line);
+
+ // Fake build system driver, default verbosity.
+ //
+ init_diag (1);
+ init (nullptr, argv[0]);
+
+ // Serial execution.
+ //
+ scheduler sched (1);
+ global_mutexes mutexes (1);
+ context ctx (sched, mutexes);
try
{
@@ -141,14 +162,49 @@ namespace build2
path_name nm ("buildfile");
script s (p.pre_parse (cin, nm, 11 /* line */));
- if (!dump)
+ switch (m)
{
- environment e (tt);
- print_runner r (line);
- p.execute (s, e, r);
+ case mode::run:
+ {
+ environment e (tt);
+ print_runner r (print_line);
+ p.execute (s, e, r);
+ break;
+ }
+ case mode::dump:
+ {
+ dump (cout, "", s.lines);
+ break;
+ }
+ case mode::print:
+ {
+ for (const line& l: s.lines)
+ {
+ for (const replay_token& rt: l.tokens)
+ {
+ if (&rt != &l.tokens[0])
+ cout << ' ';
+
+ const token& t (rt.token);
+ cout << t;
+
+ char q ('\0');
+ switch (t.qtype)
+ {
+ case quote_type::single: q = 'S'; break;
+ case quote_type::double_: q = 'D'; break;
+ case quote_type::mixed: q = 'M'; break;
+ case quote_type::unquoted: break;
+ }
+
+ if (q != '\0')
+ cout << " [" << q << (t.qcomp ? "/C" : "/P") << ']';
+ }
+ }
+
+ cout << endl;
+ }
}
- else
- build2::script::dump (cout, "", s.lines);
}
catch (const failed&)
{
diff --git a/libbuild2/lexer+quoting.test.testscript b/libbuild2/lexer+quoting.test.testscript
index debefc1..0143c90 100644
--- a/libbuild2/lexer+quoting.test.testscript
+++ b/libbuild2/lexer+quoting.test.testscript
@@ -47,8 +47,10 @@ EOO
}
: part
+:
{
: quoted
+ :
{
: start
: Token start already quoted
diff --git a/libbuild2/script/script.cxx b/libbuild2/script/script.cxx
index 2529671..a93315f 100644
--- a/libbuild2/script/script.cxx
+++ b/libbuild2/script/script.cxx
@@ -4,6 +4,7 @@
#include <libbuild2/script/script.hxx>
#include <sstream>
+#include <cstring> // strchr()
using namespace std;
@@ -34,82 +35,173 @@ namespace build2
void
dump (ostream& os, const string& ind, const lines& ls)
{
+ // For each line print its tokens literal representation trying to
+ // reproduce the quoting. Consider mixed quoting as double quoting
+ // since the information is lost.
+ //
+ // Also additionally indent the if-branch lines.
+ //
+ string if_ind;
+
for (const line& l: ls)
{
- os << ind;
+ // Before printing indentation, decrease it if the else or end line is
+ // reached.
+ //
+ switch (l.type)
+ {
+ case line_type::cmd_elif:
+ case line_type::cmd_elifn:
+ case line_type::cmd_else:
+ case line_type::cmd_end:
+ {
+ size_t n (if_ind.size ());
+ assert (n >= 2);
+ if_ind.resize (n - 2);
+ break;
+ }
+ default: break;
+ }
- // @@ Should be across lines?
+ // Print indentations.
//
- // We will consider mixed quoting as a double quoting since the
- // information is lost and we won't be able to restore the token
- // original representation.
+ os << ind << if_ind;
+
+ // After printing indentation, increase it for if/else branch.
//
- char qseq ('\0'); // Can be used as bool.
+ switch (l.type)
+ {
+ case line_type::cmd_if:
+ case line_type::cmd_ifn:
+ case line_type::cmd_elif:
+ case line_type::cmd_elifn:
+ case line_type::cmd_else: if_ind += " "; break;
+ default: break;
+ }
+
+ // '"' or '\'' if we are inside the quoted token sequence and '\0'
+ // otherwise. Thus, can be used as bool.
+ //
+ char qseq ('\0');
for (const replay_token& rt: l.tokens)
{
const token& t (rt.token);
- // Left and right quotes (can be used as bool).
+ // '"' or '\'' if the token is quoted and '\0' otherwise. Thus,
+ // can be used as bool.
+ //
+ char qtok ('\0');
+
+ switch (t.qtype)
+ {
+ case quote_type::unquoted: qtok = '\0'; break;
+ case quote_type::single: qtok = '\''; break;
+ case quote_type::mixed:
+ case quote_type::double_: qtok = '"'; break;
+ }
+
+ // If being inside a quoted token sequence we have reached a token
+ // quoted differently or the newline, then we probably made a
+ // mistake misinterpreting some previous partially quoted token, for
+ // example f"oo" as "foo. If that's the case, all we can do is to
+ // end the sequence adding the trailing quote.
+ //
+ // Note that a token inside the quoted sequence may well be
+ // unquoted, so for example "$foo" is lexed as:
+ //
+ // token quoting complete notes
+ // '' " no
+ // $ " yes
+ // 'foo' Unquoted since lexed in variable mode.
+ // '' " no
+ // \n
+ //
+ if (qseq &&
+ ((qtok && qtok != qseq) || t.type == token_type::newline))
+ {
+ os << qseq;
+ qseq = '\0';
+ }
+
+ // Left and right token quotes (can be used as bool).
//
char lq ('\0');
char rq ('\0');
- if (t.qtype != quote_type::unquoted)
+ // If the token is quoted, then determine if/which quotes should be
+ // present on its sides and track the quoted token sequence.
+ //
+ if (qtok)
{
- auto quote = [&t] ()
+ if (t.qcomp) // Complete token quoting.
{
- return t.qtype == quote_type::single ? '\'' : '"';
- };
-
- if (t.qcomp) // Complete quoting.
- {
- // If we are inside quoted token sequence then we do noting.
- // Otherwise we just quote the current token not starting a
+ // If we are inside a quoted token sequence then do noting.
+ // Otherwise just quote the current token not starting a
// sequence.
//
if (!qseq)
{
- lq = quote ();
- rq = lq;
+ lq = qtok;
+ rq = qtok;
}
}
- else // Partial quoting.
+ else // Partial token quoting.
{
// Note that we can not always reproduce the original tokens
- // representation for partial quoting. For example, the
- // following two tokens are lexed into the identical token
- // objects:
+ // representation for partial quoting. For example, the two
+ // following tokens are lexed into the identical token objects:
//
// "foo
// f"oo"
//
- if (!qseq)
+ // We will always assume that the partially quoted token either
+ // starts or ends the quoted token sequence. Sometimes this ends
+ // up unexpectedly, but seems there is not much we can do:
+ //
+ // f"oo" "ba"r -> "foo bar"
+ //
+ if (!qseq) // Start quoted sequence.
{
- lq = quote ();
- qseq = lq;
+ lq = qtok;
+ qseq = qtok;
}
- else
+ else // End quoted sequence.
{
- rq = quote ();
+ rq = qtok;
qseq = '\0';
}
}
}
- // @@ Add 2 spaces indentation for if block contents.
-
+ // Print the space character prior to the separated token, unless
+ // it is a first like token or the newline.
+ //
if (t.separated &&
t.type != token_type::newline &&
- &rt != &l.tokens[0]) // Not first in the line.
+ &rt != &l.tokens[0])
os << ' ';
- if (lq) os << lq;
- t.printer (os, t, print_mode::raw);
- if (rq) os << rq;
+ if (lq) os << lq; // Print the left quote, if required.
+
+ // Escape the special characters, unless the token in not a word or
+ // is single-quoted. Note that the special character set depends on
+ // whether the word is double-quoted or unquoted.
+ //
+ if (t.type == token_type::word && qtok != '\'')
+ {
+ for (char c: t.value)
+ {
+ if (strchr (qtok ? "\\\"" : "|&<>=\\\"", c) != nullptr)
+ os << '\\';
+
+ os << c;
+ }
+ }
+ else
+ t.printer (os, t, print_mode::raw);
-// prev_qcomp = t.qcomp;
-// prev_qtype = t.qtype;
+ if (rq) os << rq; // Print the right quote, if required.
}
}
}
diff --git a/libbuild2/script/script.hxx b/libbuild2/script/script.hxx
index 7d3fdd0..120cd1d 100644
--- a/libbuild2/script/script.hxx
+++ b/libbuild2/script/script.hxx
@@ -48,6 +48,12 @@ namespace build2
//
using lines = small_vector<line, 1>;
+ // Print the script lines, trying to reproduce their original (non-
+ // expanded) representation.
+ //
+ // Note that the exact spacing and partial quoting may not be restored due
+ // to the information loss.
+ //
LIBBUILD2_SYMEXPORT void
dump (ostream&, const string& ind, const lines&);