From 728b075cb5e0df9c386f8377e0f6961e5ccc5143 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Tue, 16 Jun 2020 17:08:39 +0300 Subject: Add env script pseudo-builtin Also disable C++ recipe tests when cross-testing. --- libbuild2/build/script/parser+diag.test.testscript | 36 +++++ libbuild2/build/script/parser.cxx | 6 +- libbuild2/build/script/parser.hxx | 9 +- libbuild2/script/parser.cxx | 162 ++++++++++++++++++++- libbuild2/script/parser.hxx | 14 +- libbuild2/script/run.cxx | 11 +- libbuild2/script/script.cxx | 130 ++++++++++++++++- libbuild2/script/script.hxx | 8 +- libbuild2/test/script/parser+env.test.testscript | 77 ++++++++++ 9 files changed, 430 insertions(+), 23 deletions(-) create mode 100644 libbuild2/test/script/parser+env.test.testscript (limited to 'libbuild2') diff --git a/libbuild2/build/script/parser+diag.test.testscript b/libbuild2/build/script/parser+diag.test.testscript index 60683bc..5b4e64a 100644 --- a/libbuild2/build/script/parser+diag.test.testscript +++ b/libbuild2/build/script/parser+diag.test.testscript @@ -55,3 +55,39 @@ $* <>~%EOO% buildfile:12:1: info: previous call is here EOE } + +: inside-if +: +$* <>EOE != 0 + if true + diag copy >= $> + fi + EOI + buildfile:12:3: error: 'diag' call inside flow control construct + EOE + +: inside-if-cond +: +$* <>EOE != 0 + if diag copy >= $> + true + fi + EOI + buildfile:11:4: error: 'diag' call inside flow control construct + EOE + +: second-command +: +$* <>EOE != 0 + true && diag copy >= $> + EOI + buildfile:11:9: error: 'diag' call must be the only command + EOE + +: via-env +: +$* <>EOE != 0 + env -- diag copy >= $> + EOI + buildfile:11:8: error: 'diag' call via 'env' builtin + EOE diff --git a/libbuild2/build/script/parser.cxx b/libbuild2/build/script/parser.cxx index 2c41ac1..8f2c46d 100644 --- a/libbuild2/build/script/parser.cxx +++ b/libbuild2/build/script/parser.cxx @@ -330,6 +330,7 @@ namespace build2 optional parser:: parse_program (token& t, build2::script::token_type& tt, bool first, + bool env, names& ns) { const location l (get_location (t)); @@ -369,13 +370,16 @@ namespace build2 // Verify that the special builtin is not called inside an improper // context (flow control construct or complex expression). // - auto verify = [first, &v, &l, this] () + auto verify = [first, env, &v, &l, this] () { if (level_ != 0) fail (l) << "'" << v << "' call inside flow control construct"; if (!first) fail (l) << "'" << v << "' call must be the only command"; + + if (env) + fail (l) << "'" << v << "' call via 'env' builtin"; }; if (v == "diag") diff --git a/libbuild2/build/script/parser.hxx b/libbuild2/build/script/parser.hxx index 73bcd09..5ada8be 100644 --- a/libbuild2/build/script/parser.hxx +++ b/libbuild2/build/script/parser.hxx @@ -98,13 +98,16 @@ namespace build2 // // During pre-parsing try to deduce the low-verbosity script // diagnostics name as a program/builtin name or obtain the custom - // low-verbosity diagnostics specified with the diag builtin. Note - // that the diag builtin can only appear at the beginning of the - // command line. + // low-verbosity diagnostics specified with the diag builtin. Also + // handle the depdb builtin calls. + // + // Note that the diag and depdb builtins can only appear at the + // beginning of the command line. // virtual optional parse_program (token&, build2::script::token_type&, bool first, + bool env, names&) override; protected: diff --git a/libbuild2/script/parser.cxx b/libbuild2/script/parser.cxx index a00651c..d5cff1a 100644 --- a/libbuild2/script/parser.cxx +++ b/libbuild2/script/parser.cxx @@ -97,7 +97,7 @@ namespace build2 } optional parser:: - parse_program (token& t, type& tt, bool, names& ns) + parse_program (token& t, type& tt, bool, bool, names& ns) { parse_names (t, tt, ns, @@ -1019,6 +1019,18 @@ namespace build2 } } + bool prog (p == pending::program_first || + p == pending::program_next); + + // Check if this is the env pseudo-builtin. + // + bool env (false); + if (prog && tt == type::word && t.value == "env") + { + c.variables = parse_env_builtin (t, tt); + env = true; + } + // Parse the next chunk as names to get expansion, etc. Note that // we do it in the chunking mode to detect whether anything in // each chunk is quoted. If we are waiting for the command @@ -1033,10 +1045,10 @@ namespace build2 // reset_quoted (t); - if (p == pending::program_first || p == pending::program_next) + if (prog) { optional pp ( - parse_program (t, tt, p == pending::program_first, ns)); + parse_program (t, tt, p == pending::program_first, env, ns)); // During pre-parsing we are not interested in the // parse_program() call result, so just discard the potentially @@ -1088,7 +1100,7 @@ namespace build2 { diag_record dr (fail (l)); dr << "invalid string value "; - to_stream (dr.os, n, true); // Quote. + to_stream (dr.os, n, true /* quote */); } // If it is a quoted chunk, then we add the word as is. @@ -1275,6 +1287,146 @@ namespace build2 return make_pair (move (expr), move (hd)); } + environment_vars parser:: + parse_env_builtin (token& t, token_type& tt) + { + // enter: 'env' word token + // leave: first token of the program name + + next (t, tt); // Skip 'env'. + + // Note that the -u option and its value can belong to the different + // name chunks. That's why we parse the env builtin arguments in the + // chunking mode into the argument/location pair list up to the '--' + // separator and parse this list into the variable sets/unsets + // afterwords. + // + // Align the size with environment_vars (double because of -u + // which is two arguments). + // + using args = small_vector, 4>; + + args as; + names ns; // Reuse to reduce allocations. + while (tt != type::word || t.value != "--") + { + location l (get_location (t)); + + if (!start_names (tt)) + fail (l) << "env: expected option, variable, or '--' separator " + << "instead of " << t; + + parse_names (t, tt, + ns, + pattern_mode::ignore, + true /* chunk */, + "env builtin argument", + nullptr); + + if (pre_parse_) + continue; + + for (name& n: ns) + { + try + { + as.emplace_back ( + value_traits::convert (move (n), nullptr), l); + } + catch (const invalid_argument&) + { + diag_record dr (fail (l)); + dr << "invalid string value "; + to_stream (dr.os, n, true /* quote */); + } + } + + ns.clear (); + } + + location l (get_location (t)); // '--' location. + next (t, tt); // Skip '--'. + + if (tt == type::newline || tt == type::eos) + fail (t) << "env: expected program name instead of " << t; + + // Parse the env builtin options and arguments. + // + environment_vars r; + + // Note: args is empty in the pre-parse mode. + // + auto i (as.begin ()), e (as.end ()); + + // Parse the variable unsets (from options). + // + for (; i != e; ++i) + { + string& o (i->first); + + // Bail out if the options and arguments separator is encountered. + // + if (o == "-") + { + ++i; + break; + } + + // Unset the variable, adding its name to the resulting variable list. + // + auto unset = [&r, &i, this] (string&& v, const char* o) + { + if (v.empty ()) + fail (i->second) << "env: empty value for option '" << o << "'"; + + if (v.find ('=') != string::npos) + fail (i->second) << "env: invalid value '" << v << "' for " + << "option '" << o << "': contains '='"; + + r.push_back (move (v)); + }; + + // If this is the --unset|-u option then add the variable unset and + // bail out to parsing the variable sets otherwise. + // + if (o == "--unset" || o == "-u") + { + if (++i == e) + fail (l) << "env: missing value for option '" << o << "'"; + + unset (move (i->first), o.c_str ()); + } + else if (o.compare (0, 8, "--unset=") == 0) + unset (string (o, 8), "--unset"); + else + break; + } + + // Parse the variable sets (from arguments). + // + for (; i != e; ++i) + { + string& a (i->first); + + // Validate the variable assignment. + // + size_t p (a.find ('=')); + + if (p == string::npos) + fail (i->second) + << "env: expected variable assignment instead of '" << a << "'"; + + if (p == 0) + fail (i->second) << "env: empty variable name"; + + // Add the variable set to the resulting list. + // + r.push_back (move (a)); + } + + return r; + } + command_exit parser:: parse_command_exit (token& t, type& tt) { @@ -1310,7 +1462,7 @@ namespace build2 diag_record dr; dr << fail (l) << "expected exit status instead of "; - to_stream (dr.os, ns, true); // Quote. + to_stream (dr.os, ns, true /* quote */); dr << info << "exit status is an unsigned integer less than 256"; } diff --git a/libbuild2/script/parser.hxx b/libbuild2/script/parser.hxx index bec6867..da69591 100644 --- a/libbuild2/script/parser.hxx +++ b/libbuild2/script/parser.hxx @@ -129,6 +129,15 @@ namespace build2 line_type pre_parse_line_start (token&, token_type&, lexer_mode); + // Parse the env pseudo-builtin arguments up to the program name. Return + // the list of the variables that should be unset ("name") and/or set + // ("name=value") in the command environment and the token/type that + // starts the program name. Note that the variable unsets come first, if + // present. + // + environment_vars + parse_env_builtin (token&, token_type&); + // Execute. // protected: @@ -166,7 +175,8 @@ namespace build2 // protected: // Parse the command's leading name chunk. The argument first is true if - // this is the first command in the line. + // this is the first command in the line. The argument env is true if + // the command is executed via the env pseudo-builtin. // // During the execution phase try to parse and translate the leading // names into the process path and return nullopt if choose not to do @@ -189,7 +199,7 @@ namespace build2 // recognize and execute certain directives, or some such. // virtual optional - parse_program (token&, token_type&, bool first, names&); + parse_program (token&, token_type&, bool first, bool env, names&); // Set lexer pointers for both the current and the base classes. // diff --git a/libbuild2/script/run.cxx b/libbuild2/script/run.cxx index 46c061c..b90ba48 100644 --- a/libbuild2/script/run.cxx +++ b/libbuild2/script/run.cxx @@ -1619,16 +1619,19 @@ namespace build2 ? process::path_search (args[0]) : process_path ()); - // Note: the builtin-escaping character '^' is not printed. + // Note that CWD and builtin-escaping character '^' are not printed. // + process_env pe (resolve ? pp : c.program, c.variables); + if (verb >= 2) - print_process (args); + print_process (pe, args); process pr ( - resolve ? pp : c.program, + *pe.path, args.data (), {ifd.get (), -1}, process::pipe (ofd), {-1, efd.get ()}, - env.work_dir.path->string ().c_str ()); + env.work_dir.path->string ().c_str (), + pe.vars); ifd.reset (); ofd.out.reset (); diff --git a/libbuild2/script/script.cxx b/libbuild2/script/script.cxx index d0d3304..ee238cc 100644 --- a/libbuild2/script/script.cxx +++ b/libbuild2/script/script.cxx @@ -229,22 +229,57 @@ namespace build2 } } - // Quote if empty or contains spaces or any of the special characters. - // Note that we use single quotes since double quotes still allow - // expansion. + // Quote a string unconditionally, assuming it contains some special + // characters. // - // @@ What if it contains single quotes? + // If the quote character is present in the string then it is double + // quoted rather than single quoted. In this case the following characters + // are escaped: + // + // \" + // + static void + to_stream_quoted (ostream& o, const char* s) + { + if (strchr (s, '\'') != nullptr) + { + o << '"'; + + for (; *s != '\0'; ++s) + { + // Escape characters special inside double quotes. + // + if (strchr ("\\\"", *s) != nullptr) + o << '\\'; + + o << *s; + } + + o << '"'; + } + else + o << '\'' << s << '\''; + } + + static inline void + to_stream_quoted (ostream& o, const string& s) + { + to_stream_quoted (o, s.c_str ()); + } + + // Quote if empty or contains spaces or any of the command line special + // characters. // static void to_stream_q (ostream& o, const string& s) { // NOTE: update dump(line) if adding any new special character. // - if (s.empty () || s.find_first_of (" |&<>=\\\"") != string::npos) - o << '\'' << s << '\''; + if (s.empty () || s.find_first_of (" |&<>=\\\"'") != string::npos) + to_stream_quoted (o, s); else o << s; - }; + } void to_stream (ostream& o, const command& c, command_to_stream m) @@ -373,6 +408,87 @@ namespace build2 if ((m & command_to_stream::header) == command_to_stream::header) { + // Print the env builtin arguments, if any environment variable + // (un)sets are present. + // + if (!c.variables.empty ()) + { + o << "env"; + + auto b (c.variables.begin ()), i (b), e (c.variables.end ()); + + // Print a variable name or assignment to the stream, quoting it if + // necessary. + // + auto print = [&o] (const string& v, bool name) + { + size_t p (v.find_first_of (" \\\"'")); + + // Print the variable name/assignment as is if it doesn't contain + // any special characters. + // + if (p == string::npos) + { + o << v; + return; + } + + // If the variable name contains any special characters, then + // quote the name/assignment as a whole. + // + size_t eq; + if (name || (eq = v.find ('=')) > p) + { + to_stream_quoted (o, v); + return; + } + + // Finally, if the variable value contains any special characters, + // then we quote only the value. + // + assert (eq != string::npos); + + o.write (v.c_str (), eq + 1); // Includes '='. + to_stream_quoted (o, v.c_str () + eq + 1); + }; + + // Variable unsets. + // + // Print the variable unsets as the -u options until a variable set + // is encountered (contains '=') or the end of the variable list is + // reached. In the former case, to avoid a potential ambiguity add + // the '-' separator, if there are any options. + // + // Note that we rely on the fact that unsets come first, which is + // guaranteed by parser::parse_env_builtin(). + // + for (; i != e; ++i) + { + const string& v (*i); + + if (v.find ('=') == string::npos) // Variable unset. + { + o << " -u "; print (v, true /* name*/); + } + else // Variable set. + { + if (i != b) + o << " -"; + + break; + } + } + + // Variable sets. + // + for (; i != e; ++i) + { + o << ' '; print (*i, false /* name */); + } + + o << " -- "; + } + // Program. // to_stream_q (o, c.program.recall_string ()); diff --git a/libbuild2/script/script.hxx b/libbuild2/script/script.hxx index d751169..8e1c852 100644 --- a/libbuild2/script/script.hxx +++ b/libbuild2/script/script.hxx @@ -298,6 +298,11 @@ namespace build2 // command // + // Align with butl::process_env, assuming it is not very common to (un)set + // more than two variables. + // + using environment_vars = small_vector; + struct command { // We use NULL initial as an indication that the path stored in recall @@ -306,7 +311,8 @@ namespace build2 // process_path program; - strings arguments; + strings arguments; + environment_vars variables; optional in; optional out; diff --git a/libbuild2/test/script/parser+env.test.testscript b/libbuild2/test/script/parser+env.test.testscript new file mode 100644 index 0000000..b1e864c --- /dev/null +++ b/libbuild2/test/script/parser+env.test.testscript @@ -0,0 +1,77 @@ +# file : libbuild2/test/script/parser+env.test.testscript +# license : MIT; see accompanying LICENSE file + +: unset +: +{ + $* <'env -u a -- cmd' >'env -u a -- cmd' : short-opt + $* <'env --unset a -- cmd' >'env -u a -- cmd' : long-opt + $* <'env --unset=a -- cmd' >'env -u a -- cmd' : long-opt-eq + $* <'env -u a -u b -- cmd' >'env -u a -u b -- cmd' : mult-opt + $* <'env -u "a b" -- cmd' >"env -u 'a b' -- cmd" : quote + + : invalid-opt + : + $* <'env -w a -- cmd' 2>>EOE != 0 + testscript:1:5: error: env: expected variable assignment instead of '-w' + EOE + + : no-val + : + $* <'env -u -- cmd' 2>>EOE != 0 + testscript:1:8: error: env: missing value for option '-u' + EOE + + : empty-val + : + $* <'env --unset= -- cmd' 2>>EOE != 0 + testscript:1:5: error: env: empty value for option '--unset' + EOE + + : invalid-val + : + $* <'env --unset=a=b -- cmd' 2>>EOE != 0 + testscript:1:5: error: env: invalid value 'a=b' for option '--unset': contains '=' + EOE + + : no-sep + : + $* <'env -u a cmd' 2>>EOE != 0 + testscript:1:13: error: env: expected option, variable, or '--' separator instead of + EOE + + $* <'env && cmd' 2>>EOE != 0 + testscript:1:5: error: env: expected option, variable, or '--' separator instead of '&&' + EOE +} + +: set +: +{ + $* <'env a=b -- cmd' >'env a=b -- cmd' : var + $* <'env -u a b=c -- cmd' >'env -u a - b=c -- cmd' : opt-var + $* <'env a="b c" -- cmd' >"env a='b c' -- cmd" : quote + $* <'env "a b"=c -- cmd' >"env 'a b=c' -- cmd" : quote-name + + : double-quote + : + $* <>EOF + env a="'a\"'" -- cmd + EOF + + : expected-assign + : + $* <'env a -- cmd' 2>>EOE != 0 + testscript:1:5: error: env: expected variable assignment instead of 'a' + EOE +} + +: non-first +: +$* <'cmd1 && env -u a b=c -- cmd2' >'cmd1 && env -u a - b=c -- cmd2' + +: no-cmd +: +$* <'env -u a --' 2>>EOE != 0 + testscript:1:12: error: env: expected program name instead of + EOE -- cgit v1.1