aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2022-04-26 10:39:03 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2022-04-26 10:39:03 +0200
commitd92635b74d6d95e27c8b24bedfaa572019c23fb6 (patch)
tree3cdfdce15e2107dad3f316bafa69ffb48f528f2f
parent48707e16dd0c8806e99387b0718a078ecf092f69 (diff)
Use new cmdline type for canned command lines in {Build,Test}scriptcmdline-relex
-rw-r--r--doc/testscript.cli50
-rw-r--r--libbuild2/parser.cxx1
-rw-r--r--libbuild2/script/parser.cxx50
-rw-r--r--libbuild2/variable.cxx79
-rw-r--r--libbuild2/variable.hxx26
-rw-r--r--tests/recipe/buildscript/testscript24
6 files changed, 197 insertions, 33 deletions
diff --git a/doc/testscript.cli b/doc/testscript.cli
index 1395363..1da111b 100644
--- a/doc/testscript.cli
+++ b/doc/testscript.cli
@@ -1285,62 +1285,54 @@ here-document single-quoted here_line_single
here-document double-quoted here_line_double expansions
\
-Finally, unquoted expansions in command lines (test, setup, and teardown) are
-re-lexed in the \c{command_expansion} mode in order to recognize command line
-syntax tokens (redirects, pipes, etc). To illustrate why this re-lexing is
-necessary, consider the following example of a \"canned\" command line:
+Finally, unquoted expansions in command lines (test, setup, and teardown) of
+the special \c{cmdline} type are re-lexed in the \c{command_expansion} mode in
+order to recognize command line syntax tokens (redirects, pipes, etc). To
+illustrate this mechanism, consider the following example of a \"canned\"
+command line:
\
-x = echo >-
-$x foo
+cmd = [cmdline] echo >-
+$cmd foo
\
-The test command line token sequence will be \c{$}, \c{x}, \c{foo}. After the
-expansion we have \c{echo}, \c{>-}, \c{foo}, however, the second element
-(\c{>-}) is not (yet) recognized as a redirect. To recognize it we re-lex
-the result of the expansion.
+The test command line token sequence will be \c{$}, \c{cmd}, \c{foo}. After
+the expansion we have \c{echo}, \c{>-}, \c{foo}, however, the second element
+(\c{>-}) is not (yet) recognized as a redirect. To recognize it, the result of
+the expansion is re-lex.
Note that besides the few command line syntax characters, re-lexing will also
\"consume\" quotes and escapes, for example:
\
-args = \"'foo'\" # 'foo'
-echo $args # echo foo
+cmd = [cmdline] echo \"'foo'\" # echo 'foo'
+$cmd # echo foo
\
To preserve quotes in this context we need to escape them:
\
-args = \"\\\\'foo\\\\'\" # \'foo\'
-echo $args # echo 'foo'
-\
-
-Alternatively, for a single value, we could quote the expansion (in order
-to suppress re-lexing; note, however, that quoting will also inhibit
-word-splitting):
-
-\
-arg = \"'foo'\" # 'foo'
-echo \"$arg\" # echo 'foo'
+cmd = [cmdline] echo \"\\\\'foo\\\\'\" # echo \'foo\'
+$cmd # echo 'foo'
\
To minimize unhelpful consumption of escape sequences (for example, in Windows
paths), re-lexing only performs the \i{effective escaping} for the \c{'\"\\}
characters. All other escape sequences are passed through uninterpreted. Note
-that this means there is no way to escape command line syntax characters. The
-recommendation is to use quoting except for passing literal quotes, for
-example:
+that this means there is no way to escape command line syntax characters in
+canned commands. The recommendation is to use quoting except for passing
+literal quotes, for example:
\
-args = \'&foo\' # '&foo'
-echo $args # echo &foo
+cmd = [cmdline] echo \'&foo\' # echo '&foo'
+$cmd # echo &foo
\
To make sure that a string is passed as is through both expansions use the
\i{doubled single-quoting} idiom, for example:
\
-filter = sed -e \''s/foo (bar|baz)/$&/'\'
+filter = [cmdline] sed -e \''s/foo (bar|baz)/$&/'\'
$* <<EOI | $filter >>EOO
...
EOI
diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx
index 99e67a7..3f876e7 100644
--- a/libbuild2/parser.cxx
+++ b/libbuild2/parser.cxx
@@ -4657,6 +4657,7 @@ namespace build2
n == "paths" ? ptr (value_traits<paths>::value_type) :
n == "dir_paths" ? ptr (value_traits<dir_paths>::value_type) :
n == "names" ? ptr (value_traits<vector<name>>::value_type) :
+ n == "cmdline" ? ptr (value_traits<cmdline>::value_type) :
nullptr;
}
diff --git a/libbuild2/script/parser.cxx b/libbuild2/script/parser.cxx
index 82eb9c8..e41b2b7 100644
--- a/libbuild2/script/parser.cxx
+++ b/libbuild2/script/parser.cxx
@@ -1092,15 +1092,57 @@ namespace build2
// Process what we got.
//
- // First see if this is a value that should not be re-lexed. The
- // long term plan is to only re-lex values of a special type
- // representing a canned command line.
+ // First see if this is a value that should not be re-lexed. We
+ // only re-lex values of the special `cmdline` type that
+ // represents a canned command line.
//
// Otherwise, determine whether anything inside was quoted (note
// that the current token is "next" and is not part of this).
//
+ // @@ TMP: switch Testscript to cmdline and get rid of relex_.
+ //
+ // @@ parse_names() is currently broken: the implementation should
+ // re-lex if what it parses is cmdline otherwise there could be
+ // quoting that will not be consumed (note: mention this in
+ // function documentation). But it can also leave some names
+ // (in ns) unprocessed which we pick up here: are they already
+ // re-lexed or not? Feels like could be either way so perhaps
+ // need to return an indication. Check the implementation in
+ // Buildscript if we actually need this flexibility.
+ //
+ // @@ For Testscript we also have $* and $N where $* is assembled
+ // from $test $test.options, $test.arguments, $test.redirects
+ // and $test.cleanups. Currently they are string/strings. Feels
+ // like we need the following changes:
+ //
+ // - $*, $test.redirects, $test.cleanups should become cmdline
+ //
+ // - the rest should stay string/strings
+ //
+ // - $N should only consider $test $test.options, $test.arguments
+ //
+ // - we will need to effective-quote the $test $test.options,
+ // $test.arguments in $* (since they will be re-lexed). See
+ // the Testscript for details on quoting semantics. In
+ // particular, we cannot escape special chars (|<>&) so
+ // have to rely on quoting. We can use single-quote for
+ // everything except if the value contain a single quote.
+ // In which case we should probably just do separately-
+ // quoted regions, for example:
+ //
+ // <'>
+ //
+ // Can be quoted as:
+ //
+ // '<'"'"'>'
+ //
+ // - if all this pans out, need to document in Testscript
+ // manual which special vars are cmdline.
+ //
bool q (
- (pr.value && !relex_) ||
+ (pr.value &&
+ !relex_ &&
+ (pr.type == nullptr || !pr.type->is_a<cmdline> ())) ||
(quoted () - (t.qtype != quote_type::unquoted ? 1 : 0)) != 0);
for (name& n: ns)
diff --git a/libbuild2/variable.cxx b/libbuild2/variable.cxx
index 8a063f7..a224531 100644
--- a/libbuild2/variable.cxx
+++ b/libbuild2/variable.cxx
@@ -1420,6 +1420,85 @@ namespace build2
&default_empty<project_name>
};
+ // cmdline
+ //
+ cmdline value_traits<cmdline>::
+ convert (names&& ns)
+ {
+ return cmdline (value_traits<strings>::convert (move (ns)));
+ }
+
+ void
+ cmdline_assign (value& v, names&& ns, const variable* var)
+ {
+ if (!v)
+ {
+ new (&v.data_) cmdline ();
+ v.null = false;
+ }
+
+ vector_assign<string> (v, move (ns), var);
+ }
+
+ void
+ cmdline_append (value& v, names&& ns, const variable* var)
+ {
+ if (!v)
+ {
+ new (&v.data_) cmdline ();
+ v.null = false;
+ }
+
+ vector_append<string> (v, move (ns), var);
+ }
+
+ void
+ cmdline_prepend (value& v, names&& ns, const variable* var)
+ {
+ if (!v)
+ {
+ new (&v.data_) cmdline ();
+ v.null = false;
+ }
+
+ vector_prepend<string> (v, move (ns), var);
+ }
+
+ static names_view
+ cmdline_reverse (const value& v, names& s)
+ {
+ return vector_reverse<string> (v, s);
+ }
+
+ static int
+ cmdline_compare (const value& l, const value& r)
+ {
+ return vector_compare<string> (l, r);
+ }
+
+ const cmdline value_traits<cmdline>::empty_instance;
+
+ const char* const value_traits<cmdline>::type_name = "cmdline";
+
+ const value_type value_traits<cmdline>::value_type
+ {
+ type_name,
+ sizeof (cmdline),
+ &value_traits<strings>::value_type, // Base (assuming direct cast works
+ // for both).
+ &value_traits<string>::value_type,
+ &default_dtor<cmdline>,
+ &default_copy_ctor<cmdline>,
+ &default_copy_assign<cmdline>,
+ &cmdline_assign,
+ &cmdline_append,
+ &cmdline_prepend,
+ &cmdline_reverse,
+ nullptr, // No cast (cast data_ directly).
+ &cmdline_compare,
+ &default_empty<cmdline>
+ };
+
// variable_pool
//
void variable_pool::
diff --git a/libbuild2/variable.hxx b/libbuild2/variable.hxx
index 2bfab05..c4639c1 100644
--- a/libbuild2/variable.hxx
+++ b/libbuild2/variable.hxx
@@ -1141,6 +1141,32 @@ namespace build2
static const map_value_type<K, V> value_type;
};
+ // Canned command line to be re-lexed (used in {Build,Test}scripts).
+ //
+ struct cmdline: strings
+ {
+ using strings::strings;
+
+ cmdline () {} // For Clang.
+ cmdline (strings&& v): strings (move (v)) {}
+ };
+
+ template <>
+ struct LIBBUILD2_SYMEXPORT value_traits<cmdline>
+ {
+ static_assert (sizeof (cmdline) <= value::size_, "insufficient space");
+
+ static cmdline convert (names&&);
+ static void assign (value&, cmdline&&);
+ static void append (value&, cmdline&&);
+ static void prepend (value&, cmdline&&);
+ static bool empty (const cmdline& x) {return x.empty ();}
+
+ static const cmdline empty_instance;
+ static const char* const type_name;
+ static const build2::value_type value_type;
+ };
+
// Explicitly pre-instantiate and export value_traits templates for
// vector/map value types used in the build2 project. Note that this is not
// merely an optimization since not doing so we may end up with multiple
diff --git a/tests/recipe/buildscript/testscript b/tests/recipe/buildscript/testscript
index 910ee67..8632280 100644
--- a/tests/recipe/buildscript/testscript
+++ b/tests/recipe/buildscript/testscript
@@ -700,6 +700,30 @@ posix = ($cxx.target.class != 'windows')
$* clean 2>-
}
+: canned-cmdline
+:
+{
+ cat <<EOI >=buildfile;
+ ./:
+ {{
+ x = echo >|
+ y = [cmdline] echo >|
+ diag update
+ $x foo
+ $y bar
+ ([cmdline] $x) baz
+ }}
+ EOI
+
+ $* >> EOO 2>>EOE
+ bar
+ baz
+ EOO
+ update
+ >| foo
+ EOE
+}
+
: timeout
:
if $posix