From a473abe80f4c42a366f0573bbbc762fa440b7fe6 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 26 Apr 2022 10:39:03 +0200 Subject: Use new cmdline type for canned command lines in {Build,Test}script --- libbuild2/build/script/parser.cxx | 111 ++++++++++------- libbuild2/build/script/parser.hxx | 2 +- libbuild2/parser.cxx | 1 + libbuild2/script/parser.cxx | 47 ++++++-- libbuild2/script/parser.hxx | 19 ++- libbuild2/test/init.cxx | 4 +- .../script/parser+command-re-parse.test.testscript | 2 +- .../test/script/parser+expansion.test.testscript | 2 +- libbuild2/test/script/parser.hxx | 2 +- libbuild2/test/script/script.cxx | 111 +++++++++++++++-- libbuild2/variable.cxx | 132 +++++++++++++++++++++ libbuild2/variable.hxx | 29 +++++ 12 files changed, 389 insertions(+), 73 deletions(-) (limited to 'libbuild2') diff --git a/libbuild2/build/script/parser.cxx b/libbuild2/build/script/parser.cxx index a27ec41..2112b5e 100644 --- a/libbuild2/build/script/parser.cxx +++ b/libbuild2/build/script/parser.cxx @@ -711,6 +711,19 @@ namespace build2 return nullopt; } + // If this is a value of the special cmdline type, then only do + // certain tests below if the value is not quoted and doesn't contain + // any characters that would be consumed by re-lexing. + // + // This is somewhat of a hack but handling this properly would not + // only require unquoting but also keeping track of which special + // characters were quoted (and thus should be treated literally) and + // which were not (and thus should act as separators, etc). + // + bool qs (pr.type != nullptr && + pr.type->is_a () && + need_cmdline_relex (ns[0].value)); + // We have to handle process_path[_ex] and executable target. The // process_path[_ex] we may have to recognize syntactically because // of the loss of type, for example: @@ -744,10 +757,14 @@ namespace build2 pp_vt = pr.type; ns.clear (); } - else if (ns[0].file ()) + else if (ns[0].file () && !qs) { // Find the end of the value. // + // Note that here we ignore the whole cmdline issue (see above) + // for the further values assuming that they are unquoted and + // don't contain any special characters. + // auto b (ns.begin ()); auto i (value_traits::find_end (ns)); @@ -814,40 +831,43 @@ namespace build2 // else if (!ns[0].simple ()) { - if (const target* t = search_existing ( - ns[0], *scope_, ns[0].pair ? ns[1].dir : empty_dir_path)) + if (!qs) { - if (const auto* et = t->is_a ()) + if (const target* t = search_existing ( + ns[0], *scope_, ns[0].pair ? ns[1].dir : empty_dir_path)) { - if (pre_parse_) + if (const auto* et = t->is_a ()) { - if (auto* n = et->lookup_metadata ("name")) + if (pre_parse_) { - set_diag (*n, 3); - return nullopt; + if (auto* n = et->lookup_metadata ("name")) + { + set_diag (*n, 3); + return nullopt; + } + // Fall through. } - // Fall through. - } - else - { - process_path pp (et->process_path ()); + else + { + process_path pp (et->process_path ()); - if (pp.empty ()) - fail (l) << "target " << *et << " is out of date" << - info << "consider specifying it as a prerequisite of " - << environment_->target; + if (pp.empty ()) + fail (l) << "target " << *et << " is out of date" << + info << "consider specifying it as a prerequisite of " + << environment_->target; - ns.erase (ns.begin (), ns.begin () + (ns[0].pair ? 2 : 1)); - return optional (move (pp)); + ns.erase (ns.begin (), ns.begin () + (ns[0].pair ? 2 : 1)); + return optional (move (pp)); + } } - } - if (pre_parse_) - { - diag_record dr (fail (l)); - dr << "unable to deduce low-verbosity script diagnostics name " - << "from target " << *t; - suggest_diag (dr); + if (pre_parse_) + { + diag_record dr (fail (l)); + dr << "unable to deduce low-verbosity script diagnostics name " + << "from target " << *t; + suggest_diag (dr); + } } } @@ -865,26 +885,29 @@ namespace build2 { // If we are here, the name is simple and is not part of a pair. // - string& v (ns[0].value); + if (!qs) + { + string& v (ns[0].value); - // Try to interpret the name as a builtin. - // - const builtin_info* bi (builtins.find (v)); + // Try to interpret the name as a builtin. + // + const builtin_info* bi (builtins.find (v)); - if (bi != nullptr) - { - set_diag (move (v), bi->weight); - return nullopt; - } - // - // Try to interpret the name as a pseudo-builtin. - // - // Note that both of them has the zero weight and cannot be picked - // up as a script name. - // - else if (v == "set" || v == "exit") - { - return nullopt; + if (bi != nullptr) + { + set_diag (move (v), bi->weight); + return nullopt; + } + // + // Try to interpret the name as a pseudo-builtin. + // + // Note that both of them has the zero weight and cannot be picked + // up as a script name. + // + else if (v == "set" || v == "exit") + { + return nullopt; + } } diag_record dr (fail (l)); diff --git a/libbuild2/build/script/parser.hxx b/libbuild2/build/script/parser.hxx index 1a6c39d..a02e34a 100644 --- a/libbuild2/build/script/parser.hxx +++ b/libbuild2/build/script/parser.hxx @@ -28,7 +28,7 @@ namespace build2 // Pre-parse. Issue diagnostics and throw failed in case of an error. // public: - parser (context& c): build2::script::parser (c, false /* relex */) {} + parser (context& c): build2::script::parser (c) {} // Note that the returned script object references the passed path // name. diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx index 8343112..cc8fd9e 100644 --- a/libbuild2/parser.cxx +++ b/libbuild2/parser.cxx @@ -4731,6 +4731,7 @@ namespace build2 n == "paths" ? ptr (value_traits::value_type) : n == "dir_paths" ? ptr (value_traits::value_type) : n == "names" ? ptr (value_traits>::value_type) : + n == "cmdline" ? ptr (value_traits::value_type) : nullptr; } diff --git a/libbuild2/script/parser.cxx b/libbuild2/script/parser.cxx index 82eb9c8..d5cabe2 100644 --- a/libbuild2/script/parser.cxx +++ b/libbuild2/script/parser.cxx @@ -3,6 +3,8 @@ #include +#include // strchr() + #include #include // exit #include @@ -15,6 +17,33 @@ namespace build2 { using type = token_type; + bool parser:: + need_cmdline_relex (const string& s) + { + for (auto i (s.begin ()), e (s.end ()); i != e; ++i) + { + char c (*i); + + if (c == '\\') + { + if (++i != e) + return false; + + c = *i; + + if (c == '\\' || c == '\'' || c == '\"') + return true; + + // Fall through. + } + + if (strchr ("|<>&\"'", c) != nullptr) + return true; + } + + return false; + } + value parser:: parse_variable_line (token& t, type& tt) { @@ -1092,16 +1121,17 @@ namespace build2 // Process what we got. // - // First see if this is a value that should not be re-lexed. The - // long term plan is to only re-lex values of a special type - // representing a canned command line. + // First see if this is a value that should not be re-lexed. We + // only re-lex values of the special `cmdline` type that + // represents a canned command line. // // Otherwise, determine whether anything inside was quoted (note // that the current token is "next" and is not part of this). // - bool q ( - (pr.value && !relex_) || - (quoted () - (t.qtype != quote_type::unquoted ? 1 : 0)) != 0); + bool lex ( + pr.value + ? pr.type != nullptr && pr.type->is_a () + : (quoted () - (t.qtype != quote_type::unquoted ? 1 : 0)) == 0); for (name& n: ns) { @@ -1123,10 +1153,7 @@ namespace build2 // interesting characters (operators plus quotes/escapes), // then no need to re-lex. // - // NOTE: update quoting (script.cxx:to_stream_q()) if adding - // any new characters. - // - if (q || s.find_first_of ("|&<>\'\"\\") == string::npos) + if (!lex || !need_cmdline_relex (s)) add_word (move (s), l); else { diff --git a/libbuild2/script/parser.hxx b/libbuild2/script/parser.hxx index 6e24d37..d8e5dbf 100644 --- a/libbuild2/script/parser.hxx +++ b/libbuild2/script/parser.hxx @@ -25,7 +25,7 @@ namespace build2 class parser: protected build2::parser { public: - parser (context& c, bool relex): build2::parser (c), relex_ (relex) {} + parser (context& c): build2::parser (c) {} // Helpers. // @@ -42,6 +42,15 @@ namespace build2 using build2::parser::apply_value_attributes; + // Return true if a command line element needs to be re-lexed. + // + // Specifically, it needs to be re-lexed if it contains any of the + // special characters (|<>&), quotes ("') or effective escape sequences + // (\", \', \\). + // + static bool + need_cmdline_relex (const string&); + // Commonly used parsing functions. Issue diagnostics and throw failed // in case of an error. // @@ -200,6 +209,13 @@ namespace build2 // something that requires re-lexing, for example `foo|bar`, which won't // be easy to translate but which are handled by the parser. // + // Note that the chunk could be of the special cmdline type in which + // case the names may need to be "preprocessed" (at least unquoted or + // potentially fully re-lexed) before being analyzed/consumed. Note also + // that in this case any names left unconsumed must remain of the + // cmdline type. + // + // // During the pre-parsing phase the returned process path and names // (that must still be parsed) are discarded. The main purpose of the // call is to allow implementations to perform static script analysis, @@ -229,7 +245,6 @@ namespace build2 size_t replay_quoted_; protected: - bool relex_; lexer* lexer_ = nullptr; }; } diff --git a/libbuild2/test/init.cxx b/libbuild2/test/init.cxx index c2fc831..c80d3f0 100644 --- a/libbuild2/test/init.cxx +++ b/libbuild2/test/init.cxx @@ -115,8 +115,8 @@ namespace build2 // These are only used in testscript. // - vp.insert ("test.redirects"); - vp.insert ("test.cleanups"); + vp.insert ("test.redirects"); + vp.insert ("test.cleanups"); // Unless already set, default test.target to build.host. Note that it // can still be overriden by the user, e.g., in root.build. diff --git a/libbuild2/test/script/parser+command-re-parse.test.testscript b/libbuild2/test/script/parser+command-re-parse.test.testscript index 84465b3..5a082eb 100644 --- a/libbuild2/test/script/parser+command-re-parse.test.testscript +++ b/libbuild2/test/script/parser+command-re-parse.test.testscript @@ -4,7 +4,7 @@ : double-quote : $* <>EOO -x = cmd \">-\" "'<-'" +x = [cmdline] cmd \">-\" "'<-'" $x EOI cmd '>-' '<-' diff --git a/libbuild2/test/script/parser+expansion.test.testscript b/libbuild2/test/script/parser+expansion.test.testscript index 77a7d6d..c31b0ad 100644 --- a/libbuild2/test/script/parser+expansion.test.testscript +++ b/libbuild2/test/script/parser+expansion.test.testscript @@ -27,7 +27,7 @@ EOE : invalid-redirect : $* <>EOE != 0 -x = "1>&a" +x = [cmdline] "1>&a" cmd $x EOI :1:4: error: stdout merge redirect file descriptor must be 2 diff --git a/libbuild2/test/script/parser.hxx b/libbuild2/test/script/parser.hxx index 66160d9..0d15580 100644 --- a/libbuild2/test/script/parser.hxx +++ b/libbuild2/test/script/parser.hxx @@ -30,7 +30,7 @@ namespace build2 // Pre-parse. Issue diagnostics and throw failed in case of an error. // public: - parser (context& c): build2::script::parser (c, true /* relex */) {} + parser (context& c): build2::script::parser (c) {} void pre_parse (script&); diff --git a/libbuild2/test/script/script.cxx b/libbuild2/test/script/script.cxx index 3a8ceac..be86117 100644 --- a/libbuild2/test/script/script.cxx +++ b/libbuild2/test/script/script.cxx @@ -197,12 +197,12 @@ namespace build2 test_var (var_pool.insert ("test")), options_var (var_pool.insert ("test.options")), arguments_var (var_pool.insert ("test.arguments")), - redirects_var (var_pool.insert ("test.redirects")), - cleanups_var (var_pool.insert ("test.cleanups")), + redirects_var (var_pool.insert ("test.redirects")), + cleanups_var (var_pool.insert ("test.cleanups")), wd_var (var_pool.insert ("~")), id_var (var_pool.insert ("@")), - cmd_var (var_pool.insert ("*")), + cmd_var (var_pool.insert ("*")), cmdN_var { &var_pool.insert ("0"), &var_pool.insert ("1"), @@ -410,11 +410,12 @@ namespace build2 // First assemble the $* value and save the test variable value into // the test program set. // - strings s; + cmdline s; - auto append = [&s] (const strings& v) + auto append = [&s] (const strings& vs) { - s.insert (s.end (), v.begin (), v.end ()); + for (const string& v: vs) + s.push_back (name (v)); // Simple name. }; // If the test variable can't be looked up for any reason (is NULL, @@ -423,7 +424,7 @@ namespace build2 if (auto l = lookup (root.test_var)) { const path& p (cast (l)); - s.push_back (p.representation ()); + s.push_back (name (p.representation ())); test_programs[0] = &p; @@ -441,10 +442,16 @@ namespace build2 size_t n (s.size ()); if (auto l = lookup (root.redirects_var)) - append (cast (l)); + { + const auto& v (cast (l)); + s.insert (s.end (), v.begin (), v.end ()); + } if (auto l = lookup (root.cleanups_var)) - append (cast (l)); + { + const auto& v (cast (l)); + s.insert (s.end (), v.begin (), v.end ()); + } // Set the $N values if present. // @@ -455,9 +462,9 @@ namespace build2 if (i < n) { if (i == 0) - v = path (s[i]); + v = path (s[i].value); else - v = s[i]; + v = s[i].value; } else v = nullptr; // Clear any old values. @@ -465,6 +472,88 @@ namespace build2 // Set $*. // + // We need to effective-quote the $test $test.options, $test.arguments + // part of it since they will be re-lexed. See the Testscript manual + // for details on quoting semantics. In particular, we cannot escape + // the special character (|<>&) so we have to rely on quoting. We can + // use single-quoting for everything except if the value contains a + // single quote. In which case we should probably just do separately- + // quoted regions (similar to shell), for example: + // + // <''> + // + // Can be quoted as: + // + // '<'"''"'>' + // + for (size_t i (0); i != n; ++i) + { + string& v (s[i].value); + + // Check if the quoting is required for this value. + // + if (!parser::need_cmdline_relex (v)) + continue; + + // If the value doesn't contain the single-quote character, then + // single-quote it. + // + size_t p (v.find ('\'')); + + if (p == string::npos) + { + v = "'" + v + "'"; + continue; + } + + // Otherwise quote the regions. + // + // Note that we double-quote the single-quote character sequences + // and single-quote all the other regions. + // + string r; + char q (p == 0 ? '"' : '\''); // Current region quoting mode. + + r += q; // Open the first region. + + for (char c: v) + { + // If we are in the double-quoting mode, then switch to the + // single-quoting mode if a non-single-quote character is + // encountered. + // + if (q == '"') + { + if (c != '\'') + { + r += q; // Close the double-quoted region. + q = '\''; // Set the single-quoting mode. + r += q; // Open the single-quoted region. + } + } + // + // If we are in the single-quoting mode, then switch to the + // double-quoting mode if the single-quote character is + // encountered. + // + else + { + if (c == '\'') + { + r += q; // Close the single-quoted region. + q = '"'; // Set the double-quoting mode. + r += q; // Open the double-quoted region. + } + } + + r += c; + } + + r += q; // Close the last region. + + v = move (r); + } + assign (root.cmd_var) = move (s); } diff --git a/libbuild2/variable.cxx b/libbuild2/variable.cxx index 74fad14..4bd01dc 100644 --- a/libbuild2/variable.cxx +++ b/libbuild2/variable.cxx @@ -1420,6 +1420,138 @@ namespace build2 &default_empty }; + // cmdline + // + cmdline value_traits:: + convert (names&& ns) + { + return cmdline (make_move_iterator (ns.begin ()), + make_move_iterator (ns.end ())); + } + + void value_traits:: + assign (value& v, cmdline&& x) + { + if (v) + v.as () = move (x); + else + new (&v.data_) cmdline (move (x)); + } + + void value_traits:: + append (value& v, cmdline&& x) + { + if (v) + { + cmdline& p (v.as ()); + + if (p.empty ()) + p.swap (x); + else + p.insert (p.end (), + make_move_iterator (x.begin ()), + make_move_iterator (x.end ())); + } + else + new (&v.data_) cmdline (move (x)); + } + + void value_traits:: + prepend (value& v, cmdline&& x) + { + if (v) + { + cmdline& p (v.as ()); + + if (!p.empty ()) + x.insert (x.end (), + make_move_iterator (p.begin ()), + make_move_iterator (p.end ())); + + p.swap (x); + } + else + new (&v.data_) cmdline (move (x)); + } + + void + cmdline_assign (value& v, names&& ns, const variable*) + { + if (!v) + { + new (&v.data_) cmdline (); + v.null = false; + } + + v.as ().assign (make_move_iterator (ns.begin ()), + make_move_iterator (ns.end ())); + } + + void + cmdline_append (value& v, names&& ns, const variable*) + { + if (!v) + { + new (&v.data_) cmdline (); + v.null = false; + } + + auto& x (v.as ()); + x.insert (x.end (), + make_move_iterator (ns.begin ()), + make_move_iterator (ns.end ())); + } + + void + cmdline_prepend (value& v, names&& ns, const variable*) + { + if (!v) + { + new (&v.data_) cmdline (); + v.null = false; + } + + auto& x (v.as ()); + x.insert (x.begin (), + make_move_iterator (ns.begin ()), + make_move_iterator (ns.end ())); + } + + static names_view + cmdline_reverse (const value& v, names&) + { + const auto& x (v.as ()); + return names_view (x.data (), x.size ()); + } + + static int + cmdline_compare (const value& l, const value& r) + { + return vector_compare (l, r); + } + + const cmdline value_traits::empty_instance; + + const char* const value_traits::type_name = "cmdline"; + + const value_type value_traits::value_type + { + type_name, + sizeof (cmdline), + nullptr, // No base. + &value_traits::value_type, + &default_dtor, + &default_copy_ctor, + &default_copy_assign, + &cmdline_assign, + &cmdline_append, + &cmdline_prepend, + &cmdline_reverse, + nullptr, // No cast (cast data_ directly). + &cmdline_compare, + &default_empty + }; + // variable_pool // void variable_pool:: diff --git a/libbuild2/variable.hxx b/libbuild2/variable.hxx index 6047bf3..c03838f 100644 --- a/libbuild2/variable.hxx +++ b/libbuild2/variable.hxx @@ -1147,6 +1147,35 @@ namespace build2 static const map_value_type value_type; }; + // Canned command line to be re-lexed (used in {Build,Test}scripts). + // + // Note that because the executable can be specific as a target or as + // process_path_ex, this is a list of names rather than a list of strings. + // Note also that unlike vector this type allows name pairs. + // + struct cmdline: vector + { + using vector::vector; + + cmdline () {} // For Clang. + }; + + template <> + struct LIBBUILD2_SYMEXPORT value_traits + { + static_assert (sizeof (cmdline) <= value::size_, "insufficient space"); + + static cmdline convert (names&&); + static void assign (value&, cmdline&&); + static void append (value&, cmdline&&); + static void prepend (value&, cmdline&&); + static bool empty (const cmdline& x) {return x.empty ();} + + static const cmdline empty_instance; + static const char* const type_name; + static const build2::value_type value_type; + }; + // Explicitly pre-instantiate and export value_traits templates for // vector/map value types used in the build2 project. Note that this is not // merely an optimization since not doing so we may end up with multiple -- cgit v1.1