diff options
author | Boris Kolpackov <boris@codesynthesis.com> | 2022-04-26 10:39:03 +0200 |
---|---|---|
committer | Boris Kolpackov <boris@codesynthesis.com> | 2022-04-26 10:39:03 +0200 |
commit | d92635b74d6d95e27c8b24bedfaa572019c23fb6 (patch) | |
tree | 3cdfdce15e2107dad3f316bafa69ffb48f528f2f | |
parent | 48707e16dd0c8806e99387b0718a078ecf092f69 (diff) |
Use new cmdline type for canned command lines in {Build,Test}scriptcmdline-relex
-rw-r--r-- | doc/testscript.cli | 50 | ||||
-rw-r--r-- | libbuild2/parser.cxx | 1 | ||||
-rw-r--r-- | libbuild2/script/parser.cxx | 50 | ||||
-rw-r--r-- | libbuild2/variable.cxx | 79 | ||||
-rw-r--r-- | libbuild2/variable.hxx | 26 | ||||
-rw-r--r-- | tests/recipe/buildscript/testscript | 24 |
6 files changed, 197 insertions, 33 deletions
diff --git a/doc/testscript.cli b/doc/testscript.cli index 1395363..1da111b 100644 --- a/doc/testscript.cli +++ b/doc/testscript.cli @@ -1285,62 +1285,54 @@ here-document single-quoted here_line_single here-document double-quoted here_line_double expansions \ -Finally, unquoted expansions in command lines (test, setup, and teardown) are -re-lexed in the \c{command_expansion} mode in order to recognize command line -syntax tokens (redirects, pipes, etc). To illustrate why this re-lexing is -necessary, consider the following example of a \"canned\" command line: +Finally, unquoted expansions in command lines (test, setup, and teardown) of +the special \c{cmdline} type are re-lexed in the \c{command_expansion} mode in +order to recognize command line syntax tokens (redirects, pipes, etc). To +illustrate this mechanism, consider the following example of a \"canned\" +command line: \ -x = echo >- -$x foo +cmd = [cmdline] echo >- +$cmd foo \ -The test command line token sequence will be \c{$}, \c{x}, \c{foo}. After the -expansion we have \c{echo}, \c{>-}, \c{foo}, however, the second element -(\c{>-}) is not (yet) recognized as a redirect. To recognize it we re-lex -the result of the expansion. +The test command line token sequence will be \c{$}, \c{cmd}, \c{foo}. After +the expansion we have \c{echo}, \c{>-}, \c{foo}, however, the second element +(\c{>-}) is not (yet) recognized as a redirect. To recognize it, the result of +the expansion is re-lex. Note that besides the few command line syntax characters, re-lexing will also \"consume\" quotes and escapes, for example: \ -args = \"'foo'\" # 'foo' -echo $args # echo foo +cmd = [cmdline] echo \"'foo'\" # echo 'foo' +$cmd # echo foo \ To preserve quotes in this context we need to escape them: \ -args = \"\\\\'foo\\\\'\" # \'foo\' -echo $args # echo 'foo' -\ - -Alternatively, for a single value, we could quote the expansion (in order -to suppress re-lexing; note, however, that quoting will also inhibit -word-splitting): - -\ -arg = \"'foo'\" # 'foo' -echo \"$arg\" # echo 'foo' +cmd = [cmdline] echo \"\\\\'foo\\\\'\" # echo \'foo\' +$cmd # echo 'foo' \ To minimize unhelpful consumption of escape sequences (for example, in Windows paths), re-lexing only performs the \i{effective escaping} for the \c{'\"\\} characters. All other escape sequences are passed through uninterpreted. Note -that this means there is no way to escape command line syntax characters. The -recommendation is to use quoting except for passing literal quotes, for -example: +that this means there is no way to escape command line syntax characters in +canned commands. The recommendation is to use quoting except for passing +literal quotes, for example: \ -args = \'&foo\' # '&foo' -echo $args # echo &foo +cmd = [cmdline] echo \'&foo\' # echo '&foo' +$cmd # echo &foo \ To make sure that a string is passed as is through both expansions use the \i{doubled single-quoting} idiom, for example: \ -filter = sed -e \''s/foo (bar|baz)/$&/'\' +filter = [cmdline] sed -e \''s/foo (bar|baz)/$&/'\' $* <<EOI | $filter >>EOO ... EOI diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx index 99e67a7..3f876e7 100644 --- a/libbuild2/parser.cxx +++ b/libbuild2/parser.cxx @@ -4657,6 +4657,7 @@ namespace build2 n == "paths" ? ptr (value_traits<paths>::value_type) : n == "dir_paths" ? ptr (value_traits<dir_paths>::value_type) : n == "names" ? ptr (value_traits<vector<name>>::value_type) : + n == "cmdline" ? ptr (value_traits<cmdline>::value_type) : nullptr; } diff --git a/libbuild2/script/parser.cxx b/libbuild2/script/parser.cxx index 82eb9c8..e41b2b7 100644 --- a/libbuild2/script/parser.cxx +++ b/libbuild2/script/parser.cxx @@ -1092,15 +1092,57 @@ namespace build2 // Process what we got. // - // First see if this is a value that should not be re-lexed. The - // long term plan is to only re-lex values of a special type - // representing a canned command line. + // First see if this is a value that should not be re-lexed. We + // only re-lex values of the special `cmdline` type that + // represents a canned command line. // // Otherwise, determine whether anything inside was quoted (note // that the current token is "next" and is not part of this). // + // @@ TMP: switch Testscript to cmdline and get rid of relex_. + // + // @@ parse_names() is currently broken: the implementation should + // re-lex if what it parses is cmdline otherwise there could be + // quoting that will not be consumed (note: mention this in + // function documentation). But it can also leave some names + // (in ns) unprocessed which we pick up here: are they already + // re-lexed or not? Feels like could be either way so perhaps + // need to return an indication. Check the implementation in + // Buildscript if we actually need this flexibility. + // + // @@ For Testscript we also have $* and $N where $* is assembled + // from $test $test.options, $test.arguments, $test.redirects + // and $test.cleanups. Currently they are string/strings. Feels + // like we need the following changes: + // + // - $*, $test.redirects, $test.cleanups should become cmdline + // + // - the rest should stay string/strings + // + // - $N should only consider $test $test.options, $test.arguments + // + // - we will need to effective-quote the $test $test.options, + // $test.arguments in $* (since they will be re-lexed). See + // the Testscript for details on quoting semantics. In + // particular, we cannot escape special chars (|<>&) so + // have to rely on quoting. We can use single-quote for + // everything except if the value contain a single quote. + // In which case we should probably just do separately- + // quoted regions, for example: + // + // <'> + // + // Can be quoted as: + // + // '<'"'"'>' + // + // - if all this pans out, need to document in Testscript + // manual which special vars are cmdline. + // bool q ( - (pr.value && !relex_) || + (pr.value && + !relex_ && + (pr.type == nullptr || !pr.type->is_a<cmdline> ())) || (quoted () - (t.qtype != quote_type::unquoted ? 1 : 0)) != 0); for (name& n: ns) diff --git a/libbuild2/variable.cxx b/libbuild2/variable.cxx index 8a063f7..a224531 100644 --- a/libbuild2/variable.cxx +++ b/libbuild2/variable.cxx @@ -1420,6 +1420,85 @@ namespace build2 &default_empty<project_name> }; + // cmdline + // + cmdline value_traits<cmdline>:: + convert (names&& ns) + { + return cmdline (value_traits<strings>::convert (move (ns))); + } + + void + cmdline_assign (value& v, names&& ns, const variable* var) + { + if (!v) + { + new (&v.data_) cmdline (); + v.null = false; + } + + vector_assign<string> (v, move (ns), var); + } + + void + cmdline_append (value& v, names&& ns, const variable* var) + { + if (!v) + { + new (&v.data_) cmdline (); + v.null = false; + } + + vector_append<string> (v, move (ns), var); + } + + void + cmdline_prepend (value& v, names&& ns, const variable* var) + { + if (!v) + { + new (&v.data_) cmdline (); + v.null = false; + } + + vector_prepend<string> (v, move (ns), var); + } + + static names_view + cmdline_reverse (const value& v, names& s) + { + return vector_reverse<string> (v, s); + } + + static int + cmdline_compare (const value& l, const value& r) + { + return vector_compare<string> (l, r); + } + + const cmdline value_traits<cmdline>::empty_instance; + + const char* const value_traits<cmdline>::type_name = "cmdline"; + + const value_type value_traits<cmdline>::value_type + { + type_name, + sizeof (cmdline), + &value_traits<strings>::value_type, // Base (assuming direct cast works + // for both). + &value_traits<string>::value_type, + &default_dtor<cmdline>, + &default_copy_ctor<cmdline>, + &default_copy_assign<cmdline>, + &cmdline_assign, + &cmdline_append, + &cmdline_prepend, + &cmdline_reverse, + nullptr, // No cast (cast data_ directly). + &cmdline_compare, + &default_empty<cmdline> + }; + // variable_pool // void variable_pool:: diff --git a/libbuild2/variable.hxx b/libbuild2/variable.hxx index 2bfab05..c4639c1 100644 --- a/libbuild2/variable.hxx +++ b/libbuild2/variable.hxx @@ -1141,6 +1141,32 @@ namespace build2 static const map_value_type<K, V> value_type; }; + // Canned command line to be re-lexed (used in {Build,Test}scripts). + // + struct cmdline: strings + { + using strings::strings; + + cmdline () {} // For Clang. + cmdline (strings&& v): strings (move (v)) {} + }; + + template <> + struct LIBBUILD2_SYMEXPORT value_traits<cmdline> + { + static_assert (sizeof (cmdline) <= value::size_, "insufficient space"); + + static cmdline convert (names&&); + static void assign (value&, cmdline&&); + static void append (value&, cmdline&&); + static void prepend (value&, cmdline&&); + static bool empty (const cmdline& x) {return x.empty ();} + + static const cmdline empty_instance; + static const char* const type_name; + static const build2::value_type value_type; + }; + // Explicitly pre-instantiate and export value_traits templates for // vector/map value types used in the build2 project. Note that this is not // merely an optimization since not doing so we may end up with multiple diff --git a/tests/recipe/buildscript/testscript b/tests/recipe/buildscript/testscript index 910ee67..8632280 100644 --- a/tests/recipe/buildscript/testscript +++ b/tests/recipe/buildscript/testscript @@ -700,6 +700,30 @@ posix = ($cxx.target.class != 'windows') $* clean 2>- } +: canned-cmdline +: +{ + cat <<EOI >=buildfile; + ./: + {{ + x = echo >| + y = [cmdline] echo >| + diag update + $x foo + $y bar + ([cmdline] $x) baz + }} + EOI + + $* >> EOO 2>>EOE + bar + baz + EOO + update + >| foo + EOE +} + : timeout : if $posix |