From b808c255b6a9ddba085bf5646e7d20ec344f2e2d Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 28 Apr 2020 08:48:53 +0200 Subject: Initial support for ad hoc recipes (still work in progress) --- libbuild2/script/run.cxx | 2020 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2020 insertions(+) create mode 100644 libbuild2/script/run.cxx (limited to 'libbuild2/script/run.cxx') diff --git a/libbuild2/script/run.cxx b/libbuild2/script/run.cxx new file mode 100644 index 0000000..38436b9 --- /dev/null +++ b/libbuild2/script/run.cxx @@ -0,0 +1,2020 @@ +// file : libbuild2/script/run.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include // streamsize + +#include +#include +#include // fdopen_mode, fddup() +#include // path_search() +#include + +#include +#include + +#include +#include + +using namespace std; +using namespace butl; + +namespace build2 +{ + namespace script + { + string + diag_path (const path& d) + { + string r ("'"); + + r += stream_verb_map ().path < 1 + ? diag_relative (d) + : d.representation (); + + r += '\''; + return r; + } + + string + diag_path (const dir_name_view& dn) + { + string r; + if (dn.name != nullptr && *dn.name) + { + r += **dn.name; + r += ' '; + } + + assert (dn.path != nullptr); + + r += diag_path (*dn.path); + return r; + } + + // Return the environment temporary directory, creating it if it doesn't + // exist. + // + static inline const dir_path& + temp_dir (environment& env) + { + if (env.temp_dir.empty ()) + env.create_temp_dir (); + + return env.temp_dir; + } + + // Normalize a path. Also make the relative path absolute using the + // specified directory unless it is already absolute. + // + static path + normalize (path p, const dir_path& d, const location& l) + { + path r (p.absolute () ? move (p) : d / move (p)); + + try + { + r.normalize (); + } + catch (const invalid_path& e) + { + fail (l) << "invalid file path " << e.path; + } + + return r; + } + + // Check if a path is not empty, the referenced file exists and is not + // empty. + // + static bool + non_empty (const path& p, const location& ll) + { + if (p.empty () || !exists (p)) + return false; + + try + { + ifdstream is (p); + return is.peek () != ifdstream::traits_type::eof (); + } + catch (const io_error& e) + { + // While there can be no fault of the script command being currently + // executed let's add the location anyway to ease the + // troubleshooting. And let's stick to that principle down the road. + // + fail (ll) << "unable to read " << p << ": " << e << endf; + } + } + + // If the file exists, not empty and not larger than 4KB print it to the + // diag record. The file content goes from the new line and is not + // indented. + // + static void + print_file (diag_record& d, const path& p, const location& ll) + { + if (exists (p)) + { + try + { + ifdstream is (p, ifdstream::badbit); + + if (is.peek () != ifdstream::traits_type::eof ()) + { + char buf[4096 + 1]; // Extra byte is for terminating '\0'. + + // Note that the string is always '\0'-terminated with a maximum + // sizeof (buf) - 1 bytes read. + // + is.getline (buf, sizeof (buf), '\0'); + + // Print if the file fits 4KB-size buffer. Note that if it + // doesn't the failbit is set. + // + if (is.eof ()) + { + // Suppress the trailing newline character as the diag record + // adds it's own one when flush. + // + streamsize n (is.gcount ()); + assert (n > 0); + + // Note that if the file contains '\0' it will also be counted + // by gcount(). But even in the worst case we will stay in the + // buffer boundaries (and so not crash). + // + if (buf[n - 1] == '\n') + buf[n - 1] = '\0'; + + d << '\n' << buf; + } + } + } + catch (const io_error& e) + { + fail (ll) << "unable to read " << p << ": " << e; + } + } + } + + // Save a string to the file. Fail if exception is thrown by underlying + // operations. + // + static void + save (const path& p, const string& s, const location& ll) + { + try + { + ofdstream os (p); + os << s; + os.close (); + } + catch (const io_error& e) + { + fail (ll) << "unable to write to " << p << ": " << e; + } + } + + // Transform string according to here-* redirect modifiers from the {/} + // set. + // + static string + transform (const string& s, + bool regex, + const string& modifiers, + environment& env) + { + if (modifiers.find ('/') == string::npos) + return s; + + // For targets other than Windows leave the string intact. + // + if (env.host.class_ != "windows") + return s; + + // Convert forward slashes to Windows path separators (escape for + // regex). + // + string r; + for (size_t p (0);;) + { + size_t sp (s.find ('/', p)); + + if (sp != string::npos) + { + r.append (s, p, sp - p); + r.append (regex ? "\\\\" : "\\"); + p = sp + 1; + } + else + { + r.append (s, p, sp); + break; + } + } + + return r; + } + + // Return true if the script temporary directory is not created yet (and + // so cannot contain any path), a path is not under the temporary + // directory or this directory will not be removed on failure. + // + static inline bool + avail_on_failure (const path& p, const environment& env) + { + return env.temp_dir.empty () || + env.temp_dir_keep || + !p.sub (env.temp_dir); + } + + // Check if the script command output matches the expected result + // (redirect value). Noop for redirect types other than none, here_*. + // + static bool + check_output (const path& pr, + const path& op, + const path& ip, + const redirect& rd, + const location& ll, + environment& env, + bool diag, + const char* what) + { + auto input_info = [&ip, &ll, &env] (diag_record& d) + { + if (non_empty (ip, ll) && avail_on_failure (ip, env)) + d << info << "stdin: " << ip; + }; + + auto output_info = [&what, &ll, &env] (diag_record& d, + const path& p, + const char* prefix = "", + const char* suffix = "") + { + if (non_empty (p, ll)) + { + if (avail_on_failure (p, env)) + d << info << prefix << what << suffix << ": " << p; + } + else + d << info << prefix << what << suffix << " is empty"; + }; + + if (rd.type == redirect_type::none) + { + // Check that there is no output produced. + // + assert (!op.empty ()); + + if (!non_empty (op, ll)) + return true; + + if (diag) + { + diag_record d (error (ll)); + d << pr << " unexpectedly writes to " << what; + + if (avail_on_failure (op, env)) + d << info << what << ": " << op; + + input_info (d); + + // Print cached output. + // + print_file (d, op, ll); + } + + // Fall through (to return false). + // + } + else if (rd.type == redirect_type::here_str_literal || + rd.type == redirect_type::here_doc_literal || + (rd.type == redirect_type::file && + rd.file.mode == redirect_fmode::compare)) + { + // The expected output is provided as a file or as a string. Save the + // string to a file in the later case. + // + assert (!op.empty ()); + + path eop; + + if (rd.type == redirect_type::file) + eop = normalize (rd.file.path, *env.work_dir.path, ll); + else + { + eop = path (op + ".orig"); + + save (eop, + transform (rd.str, false /* regex */, rd.modifiers (), env), + ll); + + env.clean_special (eop); + } + + // Use the diff utility for comparison. + // + path dp ("diff"); + process_path pp (run_search (dp, true)); + + cstrings args {pp.recall_string ()}; + + // If both files being compared won't be available on failure, then + // instruct diff not to print the file paths. It seems that the only + // way to achieve this is to abandon the output unified format in the + // favor of the minimal output, which normally is still informative + // enough for the troubleshooting (contains the difference line + // numbers, etc). + // + if (avail_on_failure (eop, env) || avail_on_failure (op, env)) + args.push_back ("-u"); + + // Ignore Windows newline fluff if that's what we are running on. + // + if (env.host.class_ == "windows") + args.push_back ("--strip-trailing-cr"); + + args.push_back (eop.string ().c_str ()); + args.push_back (op.string ().c_str ()); + args.push_back (nullptr); + + if (verb >= 2) + print_process (args); + + try + { + // Save diff's stdout to a file for troubleshooting and for the + // optional (if not too large) printing (at the end of + // diagnostics). + // + path ep (op + ".diff"); + auto_fd efd; + + try + { + efd = fdopen (ep, fdopen_mode::out | fdopen_mode::create); + env.clean_special (ep); + } + catch (const io_error& e) + { + fail (ll) << "unable to write to " << ep << ": " << e; + } + + // Diff utility prints the differences to stdout. But for the + // user it is a part of the script failure diagnostics so let's + // redirect stdout to stderr. + // + process p (pp, args.data (), 0, 2, efd.get ()); + efd.reset (); + + if (p.wait ()) + return true; + + assert (p.exit); + const process_exit& pe (*p.exit); + + // Note that both POSIX and GNU diff report error by exiting with + // the code > 1. + // + if (!pe.normal () || pe.code () > 1) + { + diag_record d (fail (ll)); + print_process (d, args); + d << " " << pe; + + print_file (d, ep, ll); + } + + // Output doesn't match the expected result. + // + if (diag) + { + diag_record d (error (ll)); + d << pr << " " << what << " doesn't match expected"; + + output_info (d, op); + output_info (d, eop, "expected "); + output_info (d, ep, "", " diff"); + input_info (d); + + print_file (d, ep, ll); + } + + // Fall through (to return false). + // + } + catch (const process_error& e) + { + error (ll) << "unable to execute " << pp << ": " << e; + + if (e.child) + exit (1); + + throw failed (); + } + } + else if (rd.type == redirect_type::here_str_regex || + rd.type == redirect_type::here_doc_regex) + { + // The overall plan is: + // + // 1. Create regex line string. While creating it's line characters + // transform regex lines according to the redirect modifiers. + // + // 2. Create line regex using the line string. If creation fails + // then save the (transformed) regex redirect to a file for + // troubleshooting. + // + // 3. Parse the output into the literal line string. + // + // 4. Match the output line string with the line regex. + // + // 5. If match fails save the (transformed) regex redirect to a file + // for troubleshooting. + // + using namespace regex; + + assert (!op.empty ()); + + // Create regex line string. + // + line_pool pool; + line_string rls; + const regex_lines rl (rd.regex); + + // Parse regex flags. + // + // When add support for new flags don't forget to update + // parse_regex(). + // + auto parse_flags = [] (const string& f) -> char_flags + { + char_flags r (char_flags::none); + + for (char c: f) + { + switch (c) + { + case 'd': r |= char_flags::idot; break; + case 'i': r |= char_flags::icase; break; + default: assert (false); // Error so should have been checked. + } + } + + return r; + }; + + // Return original regex line with the transformation applied. + // + auto line = [&rl, &rd, &env] (const regex_line& l) -> string + { + string r; + if (l.regex) // Regex (possibly empty), + { + r += rl.intro; + r += transform (l.value, true /* regex */, rd.modifiers (), env); + r += rl.intro; + r += l.flags; + } + else if (!l.special.empty ()) // Special literal. + r += rl.intro; + else // Textual literal. + r += transform (l.value, false /* regex */, rd.modifiers (), env); + + r += l.special; + return r; + }; + + // Return regex line location. + // + // Note that we rely on the fact that the command and regex lines + // are always belong to the same file. + // + auto loc = [&ll] (uint64_t line, uint64_t column) -> location + { + location r (ll); + r.line = line; + r.column = column; + return r; + }; + + // Save the regex to file for troubleshooting, return the file path + // it have been saved to. + // + // Note that we save the regex on line regex creation failure or if + // the program output doesn't match. + // + auto save_regex = [&op, &rl, &rd, &ll, &line] () -> path + { + path rp (op + ".regex"); + + // Encode here-document regex global flags if present as a file + // name suffix. For example if icase and idot flags are specified + // the name will look like: + // + // stdout.regex-di + // + if (rd.type == redirect_type::here_doc_regex && !rl.flags.empty ()) + rp += '-' + rl.flags; + + // Note that if would be more efficient to directly write chunks + // to file rather than to compose a string first. Hower we don't + // bother (about performance) for the sake of the code as we + // already failed. + // + string s; + for (auto b (rl.lines.cbegin ()), i (b), e (rl.lines.cend ()); + i != e; ++i) + { + if (i != b) s += '\n'; + s += line (*i); + } + + save (rp, s, ll); + return rp; + }; + + // Finally create regex line string. + // + // Note that diagnostics doesn't refer to the program path as it is + // irrelevant to failures at this stage. + // + char_flags gf (parse_flags (rl.flags)); // Regex global flags. + + for (const auto& l: rl.lines) + { + if (l.regex) // Regex (with optional special characters). + { + line_char c; + + // Empty regex is a special case repesenting the blank line. + // + if (l.value.empty ()) + c = line_char ("", pool); + else + { + try + { + string s (transform (l.value, + true /* regex */, + rd.modifiers (), + env)); + + c = line_char ( + char_regex (s, gf | parse_flags (l.flags)), pool); + } + catch (const regex_error& e) + { + // Print regex_error description if meaningful. + // + diag_record d (fail (loc (l.line, l.column))); + + if (rd.type == redirect_type::here_str_regex) + d << "invalid " << what << " regex redirect" << e << + info << "regex: '" << line (l) << "'"; + else + d << "invalid char-regex in " << what << " regex redirect" + << e << + info << "regex line: '" << line (l) << "'"; + + d << endf; + } + } + + rls += c; // Append blank literal or regex line char. + } + else if (!l.special.empty ()) // Special literal. + { + // Literal can not be followed by special characters in the same + // line. + // + assert (l.value.empty ()); + } + else // Textual literal. + { + // Append literal line char. + // + rls += line_char (transform (l.value, + false /* regex */, + rd.modifiers (), + env), + pool); + } + + for (char c: l.special) + { + if (line_char::syntax (c)) + rls += line_char (c); // Append special line char. + else + fail (loc (l.line, l.column)) + << "invalid syntax character '" << c << "' in " << what + << " regex redirect" << + info << "regex line: '" << line (l) << "'"; + } + } + + // Create line regex. + // + line_regex regex; + + try + { + regex = line_regex (move (rls), move (pool)); + } + catch (const regex_error& e) + { + // Note that line regex creation can not fail for here-string + // redirect as it doesn't have syntax line chars. That in + // particular means that end_line and end_column are meaningful. + // + assert (rd.type == redirect_type::here_doc_regex); + + diag_record d (fail (loc (rd.end_line, rd.end_column))); + + // Print regex_error description if meaningful. + // + d << "invalid " << what << " regex redirect" << e; + + // It would be a waste to save the regex into the file just to + // remove it. + // + if (env.temp_dir_keep) + output_info (d, save_regex (), "", " regex"); + } + + // Parse the output into the literal line string. + // + line_string ls; + + try + { + // Do not throw when eofbit is set (end of stream reached), and + // when failbit is set (getline() failed to extract any character). + // + // Note that newlines are treated as line-chars separators. That + // in particular means that the trailing newline produces a blank + // line-char (empty literal). Empty output produces the zero-length + // line-string. + // + // Also note that we strip the trailing CR characters (otherwise + // can mismatch when, for example, cross-testing). + // + ifdstream is (op, ifdstream::badbit); + is.peek (); // Sets eofbit for an empty stream. + + while (!is.eof ()) + { + string s; + getline (is, s); + + // It is safer to strip CRs in cycle, as msvcrt unexplainably + // adds too much trailing junk to the system_error descriptions, + // and so it can appear in programs output. For example: + // + // ...: Invalid data.\r\r\n + // + // Note that our custom operator<<(ostream&, const exception&) + // removes this junk. + // + while (!s.empty () && s.back () == '\r') + s.pop_back (); + + ls += line_char (move (s), regex.pool); + } + } + catch (const io_error& e) + { + fail (ll) << "unable to read " << op << ": " << e; + } + + // Match the output with the regex. + // + if (regex_match (ls, regex)) // Doesn't throw. + return true; + + // Output doesn't match the regex. + // + // Unless the temporary directory is removed on failure, we save the + // regex to file for troubleshooting regardless of whether we print + // the diagnostics or not. We, however, register it for cleanup in the + // later case (the expression may still succeed, we can be evaluating + // the if condition, etc). + // + optional rp; + if (env.temp_dir_keep) + rp = save_regex (); + + if (diag) + { + diag_record d (error (ll)); + d << pr << " " << what << " doesn't match regex"; + + output_info (d, op); + + if (rp) + output_info (d, *rp, "", " regex"); + + input_info (d); + + // Print cached output. + // + print_file (d, op, ll); + } + else if (rp) + env.clean_special (*rp); + + // Fall through (to return false). + // + } + else // Noop. + return true; + + return false; + } + + // The exit pseudo-builtin: exit the script successfully, or print the + // diagnostics and exit the script unsuccessfully. Always throw exit + // exception. + // + // exit [] + // + [[noreturn]] static void + exit_builtin (const strings& args, const location& ll) + { + auto i (args.begin ()); + auto e (args.end ()); + + // Process arguments. + // + // If no argument is specified, then exit successfully. Otherwise, + // print the diagnostics and exit unsuccessfully. + // + if (i == e) + throw exit (true); + + const string& s (*i++); + + if (i != e) + fail (ll) << "unexpected argument '" << *i << "'"; + + error (ll) << s; + throw exit (false); + } + + // The set pseudo-builtin: set variable from the stdin input. + // + // set [-e|--exact] [(-n|--newline)|(-w|--whitespace)] [] + // + static void + set_builtin (environment& env, + const strings& args, + auto_fd in, + const location& ll) + { + try + { + // Do not throw when eofbit is set (end of stream reached), and + // when failbit is set (read operation failed to extract any + // character). + // + ifdstream cin (move (in), ifdstream::badbit); + + // Parse arguments. + // + cli::vector_scanner scan (args); + set_options ops (scan); + + if (ops.whitespace () && ops.newline ()) + fail (ll) << "both -n|--newline and -w|--whitespace specified"; + + if (!scan.more ()) + fail (ll) << "missing variable name"; + + string a (scan.next ()); // Either attributes or variable name. + const string* ats (!scan.more () ? nullptr : &a); + string vname (!scan.more () ? move (a) : scan.next ()); + + if (scan.more ()) + fail (ll) << "unexpected argument '" << scan.next () << "'"; + + if (ats != nullptr && ats->empty ()) + fail (ll) << "empty variable attributes"; + + if (vname.empty ()) + fail (ll) << "empty variable name"; + + // Read the input. + // + cin.peek (); // Sets eofbit for an empty stream. + + names ns; + while (!cin.eof ()) + { + // Read next element that depends on the whitespace mode being + // enabled or not. For the later case it also make sense to strip + // the trailing CRs that can appear while, for example, + // cross-testing Windows target or as a part of msvcrt junk + // production (see above). + // + string s; + if (ops.whitespace ()) + cin >> s; + else + { + getline (cin, s); + + while (!s.empty () && s.back () == '\r') + s.pop_back (); + } + + // If failbit is set then we read nothing into the string as eof is + // reached. That in particular means that the stream has trailing + // whitespaces (possibly including newlines) if the whitespace mode + // is enabled, or the trailing newline otherwise. If so then + // we append the "blank" to the variable value in the exact mode + // prior to bailing out. + // + if (cin.fail ()) + { + if (ops.exact ()) + { + if (ops.whitespace () || ops.newline ()) + ns.emplace_back (move (s)); // Reuse empty string. + else if (ns.empty ()) + ns.emplace_back ("\n"); + else + ns[0].value += '\n'; + } + + break; + } + + if (ops.whitespace () || ops.newline () || ns.empty ()) + ns.emplace_back (move (s)); + else + { + ns[0].value += '\n'; + ns[0].value += s; + } + } + + cin.close (); + + env.set_variable (move (vname), + move (ns), + ats != nullptr ? *ats : empty_string, + ll); + } + catch (const io_error& e) + { + fail (ll) << "set: " << e; + } + catch (const cli::exception& e) + { + fail (ll) << "set: " << e; + } + } + + // Sorted array of builtins that support filesystem entries cleanup. + // + static const char* cleanup_builtins[] = { + "cp", "ln", "mkdir", "mv", "touch"}; + + static inline bool + cleanup_builtin (const string& name) + { + return binary_search ( + cleanup_builtins, + cleanup_builtins + + sizeof (cleanup_builtins) / sizeof (*cleanup_builtins), + name); + } + + static bool + run_pipe (environment& env, + command_pipe::const_iterator bc, + command_pipe::const_iterator ec, + auto_fd ifd, + size_t ci, size_t li, const location& ll, + bool diag) + { + if (bc == ec) // End of the pipeline. + return true; + + // The overall plan is to run the first command in the pipe, reading + // its input from the file descriptor passed (or, for the first + // command, according to stdin redirect specification) and redirecting + // its output to the right-hand part of the pipe recursively. Fail if + // the right-hand part fails. Otherwise check the process exit code, + // match stderr (and stdout for the last command in the pipe) according + // to redirect specification(s) and fail if any of the above fails. + // + const command& c (*bc); + + // Register the command explicit cleanups. Verify that the path being + // cleaned up is a sub-path of the script working directory. Fail if + // this is not the case. + // + for (const auto& cl: c.cleanups) + { + const path& p (cl.path); + path np (normalize (p, *env.work_dir.path, ll)); + + const string& ls (np.leaf ().string ()); + bool wc (ls == "*" || ls == "**" || ls == "***"); + const path& cp (wc ? np.directory () : np); + const dir_path* sd (env.sandbox_dir.path); + + if (sd != nullptr && !cp.sub (*sd)) + fail (ll) << (wc ? "wildcard" : + p.to_directory () ? "directory" : + "file") + << " cleanup " << p << " is out of " + << diag_path (env.sandbox_dir); + + env.clean ({cl.type, move (np)}, false); + } + + bool eq (c.exit.comparison == exit_comparison::eq); + + // If stdin file descriptor is not open then this is the first pipeline + // command. + // + bool first (ifd.get () == -1); + + command_pipe::const_iterator nc (bc + 1); + bool last (nc == ec); + + const string& program (c.program.string ()); + + const redirect& in ((c.in ? *c.in : env.in).effective ()); + + const redirect* out (!last + ? nullptr // stdout is piped. + : &(c.out ? *c.out : env.out).effective ()); + + const redirect& err ((c.err ? *c.err : env.err).effective ()); + + auto process_args = [&c] () -> cstrings + { + cstrings args {c.program.string ().c_str ()}; + + for (const auto& a: c.arguments) + args.push_back (a.c_str ()); + + args.push_back (nullptr); + return args; + }; + + // Prior to opening file descriptors for command input/output + // redirects let's check if the command is the exit builtin. Being a + // builtin syntactically it differs from the regular ones in a number + // of ways. It doesn't communicate with standard streams, so + // redirecting them is meaningless. It may appear only as a single + // command in a pipeline. It doesn't return any value and stops the + // script execution, so checking its exit status is meaningless as + // well. That all means we can short-circuit here calling the builtin + // and bailing out right after that. Checking that the user didn't + // specify any redirects or exit code check sounds like a right thing + // to do. + // + if (program == "exit") + { + // In case the builtin is erroneously pipelined from the other + // command, we will close stdin gracefully (reading out the stream + // content), to make sure that the command doesn't print any + // unwanted diagnostics about IO operation failure. + // + // Note that dtor will ignore any errors (which is what we want). + // + ifdstream is (move (ifd), fdstream_mode::skip); + + if (!first || !last) + fail (ll) << "exit builtin must be the only pipe command"; + + if (c.in) + fail (ll) << "exit builtin stdin cannot be redirected"; + + if (c.out) + fail (ll) << "exit builtin stdout cannot be redirected"; + + if (c.err) + fail (ll) << "exit builtin stderr cannot be redirected"; + + // We can't make sure that there is no exit code check. Let's, at + // least, check that non-zero code is not expected. + // + if (eq != (c.exit.code == 0)) + fail (ll) << "exit builtin exit code cannot be non-zero"; + + if (verb >= 2) + print_process (process_args ()); + + exit_builtin (c.arguments, ll); // Throws exit exception. + } + + // Create a unique path for a command standard stream cache file. + // + auto std_path = [&env, &ci, &li, &ll] (const char* n) -> path + { + using std::to_string; + + path p (n); + + // 0 if belongs to a single-line script, otherwise is the command line + // number (start from one) in the script. + // + if (li > 0) + p += "-" + to_string (li); + + // 0 if belongs to a single-command expression, otherwise is the + // command number (start from one) in the expression. + // + // Note that the name like stdin-N can relate to N-th command of a + // single-line script or to N-th single-command line of multi-line + // script. These cases are mutually exclusive and so are unambiguous. + // + if (ci > 0) + p += "-" + to_string (ci); + + return normalize (move (p), temp_dir (env), ll); + }; + + // If this is the first pipeline command, then open stdin descriptor + // according to the redirect specified. + // + path isp; + + if (!first) + assert (!c.in); // No redirect expected. + else + { + // Open a file for passing to the command stdin. + // + auto open_stdin = [&isp, &ifd, &ll] () + { + assert (!isp.empty ()); + + try + { + ifd = fdopen (isp, fdopen_mode::in); + } + catch (const io_error& e) + { + fail (ll) << "unable to read " << isp << ": " << e; + } + }; + + switch (in.type) + { + case redirect_type::pass: + { + try + { + ifd = fddup (0); + } + catch (const io_error& e) + { + fail (ll) << "unable to duplicate stdin: " << e; + } + + break; + } + case redirect_type::none: + // Somehow need to make sure that the child process doesn't read + // from stdin. That is tricky to do in a portable way. Here we + // suppose that the program which (erroneously) tries to read some + // data from stdin being redirected to /dev/null fails not being + // able to read the expected data, and so the command doesn't pass + // through. + // + // @@ Obviously doesn't cover the case when the process reads + // whatever available. + // @@ Another approach could be not to redirect stdin and let the + // process to hang which can be interpreted as a command failure. + // @@ Both ways are quite ugly. Is there some better way to do + // this? + // + // Fall through. + // + case redirect_type::null: + { + ifd = open_null (); + break; + } + case redirect_type::file: + { + isp = normalize (in.file.path, *env.work_dir.path, ll); + + open_stdin (); + break; + } + case redirect_type::here_str_literal: + case redirect_type::here_doc_literal: + { + // We could write to the command stdin directly but instead will + // cache the data for potential troubleshooting. + // + isp = std_path ("stdin"); + + save (isp, + transform (in.str, false /* regex */, in.modifiers (), env), + ll); + + env.clean_special (isp); + + open_stdin (); + break; + } + case redirect_type::trace: + case redirect_type::merge: + case redirect_type::here_str_regex: + case redirect_type::here_doc_regex: + case redirect_type::here_doc_ref: assert (false); break; + } + } + + assert (ifd.get () != -1); + + // Prior to opening file descriptors for command outputs redirects + // let's check if the command is the set builtin. Being a builtin + // syntactically it differs from the regular ones in a number of ways. + // It either succeeds or terminates abnormally, so redirecting stderr + // is meaningless. It also never produces any output and may appear + // only as a terminal command in a pipeline. That means we can + // short-circuit here calling the builtin and returning right after + // that. Checking that the user didn't specify any meaningless + // redirects or exit code check sounds as a right thing to do. + // + if (program == "set") + { + if (!last) + fail (ll) << "set builtin must be the last pipe command"; + + if (c.out) + fail (ll) << "set builtin stdout cannot be redirected"; + + if (c.err) + fail (ll) << "set builtin stderr cannot be redirected"; + + if (eq != (c.exit.code == 0)) + fail (ll) << "set builtin exit code cannot be non-zero"; + + if (verb >= 2) + print_process (process_args ()); + + set_builtin (env, c.arguments, move (ifd), ll); + return true; + } + + // Open a file for command output redirect if requested explicitly + // (file overwrite/append redirects) or for the purpose of the output + // validation (none, here_*, file comparison redirects), register the + // file for cleanup, return the file descriptor. Interpret trace + // redirect according to the verbosity level (as null if below 2, as + // pass otherwise). Return nullfd, standard stream descriptor duplicate + // or null-device descriptor for merge, pass or null redirects + // respectively (not opening any file). + // + auto open = [&env, &ll, &std_path] (const redirect& r, + int dfd, + path& p) -> auto_fd + { + assert (dfd == 1 || dfd == 2); + const char* what (dfd == 1 ? "stdout" : "stderr"); + + fdopen_mode m (fdopen_mode::out | fdopen_mode::create); + + redirect_type rt (r.type != redirect_type::trace + ? r.type + : verb < 2 + ? redirect_type::null + : redirect_type::pass); + switch (rt) + { + case redirect_type::pass: + { + try + { + return fddup (dfd); + } + catch (const io_error& e) + { + fail (ll) << "unable to duplicate " << what << ": " << e; + } + } + + case redirect_type::null: return open_null (); + + // Duplicate the paired file descriptor later. + // + case redirect_type::merge: return nullfd; + + case redirect_type::file: + { + // For the cmp mode the user-provided path refers a content to + // match against, rather than a content to be produced (as for + // overwrite and append modes). And so for cmp mode we redirect + // the process output to a temporary file. + // + p = r.file.mode == redirect_fmode::compare + ? std_path (what) + : normalize (r.file.path, *env.work_dir.path, ll); + + m |= r.file.mode == redirect_fmode::append + ? fdopen_mode::at_end + : fdopen_mode::truncate; + + break; + } + + case redirect_type::none: + case redirect_type::here_str_literal: + case redirect_type::here_doc_literal: + case redirect_type::here_str_regex: + case redirect_type::here_doc_regex: + { + p = std_path (what); + m |= fdopen_mode::truncate; + break; + } + + case redirect_type::trace: + case redirect_type::here_doc_ref: assert (false); break; + } + + auto_fd fd; + + try + { + fd = fdopen (p, m); + + if ((m & fdopen_mode::at_end) != fdopen_mode::at_end) + { + if (rt == redirect_type::file) + env.clean ({cleanup_type::always, p}, true); + else + env.clean_special (p); + } + } + catch (const io_error& e) + { + fail (ll) << "unable to write to " << p << ": " << e; + } + + return fd; + }; + + path osp; + fdpipe ofd; + + // If this is the last command in the pipeline than redirect the + // command process stdout to a file. Otherwise create a pipe and + // redirect the stdout to the write-end of the pipe. The read-end will + // be passed as stdin for the next command in the pipeline. + // + // @@ Shouldn't we allow the here-* and file output redirects for a + // command with pipelined output? Say if such redirect is present + // then the process output is redirected to a file first (as it is + // when no output pipelined), and only after the process exit code + // and the output are validated the next command in the pipeline is + // executed taking the file as an input. This could be usefull for + // script failures investigation and, for example, for validation + // "tightening". + // + if (last) + ofd.out = open (*out, 1, osp); + else + { + assert (!c.out); // No redirect expected. + ofd = open_pipe (); + } + + path esp; + auto_fd efd (open (err, 2, esp)); + + // Merge standard streams. + // + bool mo (out != nullptr && out->type == redirect_type::merge); + if (mo || err.type == redirect_type::merge) + { + auto_fd& self (mo ? ofd.out : efd); + auto_fd& other (mo ? efd : ofd.out); + + try + { + assert (self.get () == -1 && other.get () != -1); + self = fddup (other.get ()); + } + catch (const io_error& e) + { + fail (ll) << "unable to duplicate " << (mo ? "stderr" : "stdout") + << ": " << e; + } + } + + // All descriptors should be open to the date. + // + assert (ofd.out.get () != -1 && efd.get () != -1); + + optional exit; + builtin_function* bf (builtins.find (program)); + + bool success; + + if (bf != nullptr) + { + // Execute the builtin. + // + if (verb >= 2) + print_process (process_args ()); + + // Some of the script builtins (cp, mkdir, etc) extend libbutl + // builtins (via callbacks) registering/moving cleanups for the + // filesystem entries they create/move, unless explicitly requested + // not to do so via the --no-cleanup option. + // + // Let's "wrap up" the cleanup-related flags into the single object + // to rely on "small function object" optimization. + // + struct cleanup + { + // Whether the cleanups are enabled for the builtin. Can be set to + // false by the parse_option callback if --no-cleanup is + // encountered. + // + bool enabled = true; + + // Whether to register cleanup for a filesystem entry being + // created/updated depending on its existence. Calculated by the + // create pre-hook and used by the subsequent post-hook. + // + bool add; + + // Whether to move existing cleanups for the filesystem entry + // being moved, rather than to erase them. Calculated by the move + // pre-hook and used by the subsequent post-hook. + // + bool move; + }; + + // nullopt if the builtin doesn't support cleanups. + // + optional cln; + + if (cleanup_builtin (program)) + cln = cleanup (); + + builtin_callbacks bcs { + + // create + // + // Unless cleanups are suppressed, test that the filesystem entry + // doesn't exist (pre-hook) and, if that's the case, register the + // cleanup for the newly created filesystem entry (post-hook). + // + [&env, &cln] (const path& p, bool pre) + { + // Cleanups must be supported by a filesystem entry-creating + // builtin. + // + assert (cln); + + if (cln->enabled) + { + if (pre) + cln->add = !butl::entry_exists (p); + else if (cln->add) + env.clean ({cleanup_type::always, p}, true /* implicit */); + } + }, + + // move + // + // Validate the source and destination paths (pre-hook) and, + // unless suppressed, adjust the cleanups that are sub-paths of + // the source path (post-hook). + // + [&env, &cln] (const path& from, const path& to, bool force, bool pre) + { + // Cleanups must be supported by a filesystem entry-moving + // builtin. + // + assert (cln); + + if (pre) + { + const dir_path& wd (*env.work_dir.path); + const dir_path* sd (env.sandbox_dir.path); + + auto fail = [] (const string& d) {throw runtime_error (d);}; + + if (sd != nullptr && !from.sub (*sd) && !force) + fail (diag_path (from) + " is out of " + + diag_path (env.sandbox_dir)); + + auto check_wd = [&wd, &env, fail] (const path& p) + { + if (wd.sub (path_cast (p))) + fail (diag_path (p) + " contains " + + diag_path (env.work_dir)); + }; + + check_wd (from); + check_wd (to); + + // Unless cleanups are disabled, "move" the matching cleanups + // if the destination path doesn't exist and it is a sub-path + // of the working directory and just remove them otherwise. + // + if (cln->enabled) + cln->move = !butl::entry_exists (to) && + (sd == nullptr || to.sub (*sd)); + } + else if (cln->enabled) + { + // Move or remove the matching cleanups (see above). + // + // Note that it's not enough to just change the cleanup paths. + // We also need to make sure that these cleanups happen before + // the destination directory (or any of its parents) cleanup, + // that is potentially registered. To achieve that we can just + // relocate these cleanup entries to the end of the list, + // preserving their mutual order. Remember that cleanups in + // the list are executed in the reversed order. + // + cleanups cs; + + // Remove the source path sub-path cleanups from the list, + // adjusting/caching them if required (see above). + // + for (auto i (env.cleanups.begin ()); i != env.cleanups.end (); ) + { + script::cleanup& c (*i); + path& p (c.path); + + if (p.sub (from)) + { + if (cln->move) + { + // Note that we need to preserve the cleanup path + // trailing separator which indicates the removal + // method. Also note that leaf(), in particular, does + // that. + // + p = p != from + ? to / p.leaf (path_cast (from)) + : p.to_directory () + ? path_cast (to) + : to; + + cs.push_back (move (c)); + } + + i = env.cleanups.erase (i); + } + else + ++i; + } + + // Re-insert the adjusted cleanups at the end of the list. + // + env.cleanups.insert (env.cleanups.end (), + make_move_iterator (cs.begin ()), + make_move_iterator (cs.end ())); + + } + }, + + // remove + // + // Validate the filesystem entry path (pre-hook). + // + [&env] (const path& p, bool force, bool pre) + { + if (pre) + { + const dir_path& wd (*env.work_dir.path); + const dir_path* sd (env.sandbox_dir.path); + + auto fail = [] (const string& d) {throw runtime_error (d);}; + + if (sd != nullptr && !p.sub (*sd) && !force) + fail (diag_path (p) + " is out of " + + diag_path (env.sandbox_dir)); + + if (wd.sub (path_cast (p))) + fail (diag_path (p) + " contains " + + diag_path (env.work_dir)); + } + }, + + // parse_option + // + [&cln] (const strings& args, size_t i) + { + // Parse --no-cleanup, if it is supported by the builtin. + // + if (cln && args[i] == "--no-cleanup") + { + cln->enabled = false; + return 1; + } + + return 0; + }, + + // sleep + // + // Deactivate the thread before going to sleep. + // + [&env] (const duration& d) + { + // If/when required we could probably support the precise sleep + // mode (e.g., via an option). + // + env.context.sched.sleep (d); + } + }; + + try + { + uint8_t r; // Storage. + builtin b (bf (r, + c.arguments, + move (ifd), move (ofd.out), move (efd), + *env.work_dir.path, + bcs)); + + success = run_pipe (env, + nc, + ec, + move (ofd.in), + ci + 1, li, ll, diag); + + exit = process_exit (b.wait ()); + } + catch (const system_error& e) + { + fail (ll) << "unable to execute " << c.program << " builtin: " + << e << endf; + } + } + else + { + // Execute the process. + // + cstrings args (process_args ()); + + // Resolve the relative not simple program path against the script's + // working directory. The simple one will be left for the process + // path search machinery. Also strip the potential leading `^`, + // indicating that this is an external program rather than a + // builtin. + // + path p; + + try + { + p = path (args[0]); + + if (p.relative ()) + { + auto program = [&p, &args] (path pp) + { + p = move (pp); + args[0] = p.string ().c_str (); + }; + + if (p.simple ()) + { + const string& s (p.string ()); + + // Don't end up with an empty path. + // + if (s.size () > 1 && s[0] == '^') + program (path (s, 1, s.size () - 1)); + } + else + program (*env.work_dir.path / p); + } + } + catch (const invalid_path& e) + { + fail (ll) << "invalid program path " << e.path; + } + + try + { + process_path pp (process::path_search (args[0])); + + // Note: the builtin-escaping character '^' is not printed. + // + if (verb >= 2) + print_process (args); + + process pr ( + pp, + args.data (), + {ifd.get (), -1}, process::pipe (ofd), {-1, efd.get ()}, + env.work_dir.path->string ().c_str ()); + + ifd.reset (); + ofd.out.reset (); + efd.reset (); + + success = run_pipe (env, + nc, + ec, + move (ofd.in), + ci + 1, li, ll, diag); + + pr.wait (); + + exit = move (pr.exit); + } + catch (const process_error& e) + { + error (ll) << "unable to execute " << args[0] << ": " << e; + + if (e.child) + std::exit (1); + + throw failed (); + } + } + + assert (exit); + + // If the righ-hand side pipeline failed than the whole pipeline fails, + // and no further checks are required. + // + if (!success) + return false; + + const path& pr (c.program); + + // If there is no valid exit code available by whatever reason then we + // print the proper diagnostics, dump stderr (if cached and not too + // large) and fail the whole script. Otherwise if the exit code is not + // correct then we print diagnostics if requested and fail the + // pipeline. + // + bool valid (exit->normal ()); + + // On Windows the exit code can be out of the valid codes range being + // defined as uint16_t. + // +#ifdef _WIN32 + if (valid) + valid = exit->code () < 256; +#endif + + success = valid && eq == (exit->code () == c.exit.code); + + if (!valid || (!success && diag)) + { + // In the presense of a valid exit code we print the diagnostics and + // return false rather than throw. + // + diag_record d (valid ? error (ll) : fail (ll)); + + if (!exit->normal ()) + d << pr << " " << *exit; + else + { + uint16_t ec (exit->code ()); // Make sure is printed as integer. + + if (!valid) + d << pr << " exit code " << ec << " out of 0-255 range"; + else if (!success) + { + if (diag) + d << pr << " exit code " << ec << (eq ? " != " : " == ") + << static_cast (c.exit.code); + } + else + assert (false); + } + + if (non_empty (esp, ll) && avail_on_failure (esp, env)) + d << info << "stderr: " << esp; + + if (non_empty (osp, ll) && avail_on_failure (osp, env)) + d << info << "stdout: " << osp; + + if (non_empty (isp, ll) && avail_on_failure (isp, env)) + d << info << "stdin: " << isp; + + // Print cached stderr. + // + print_file (d, esp, ll); + } + + // If exit code is correct then check if the standard outputs match the + // expectations. Note that stdout is only redirected to file for the + // last command in the pipeline. + // + // The thinking behind matching stderr first is that if it mismatches, + // then the program probably misbehaves (executes wrong functionality, + // etc) in which case its stdout doesn't really matter. + // + if (success) + success = + check_output (pr, esp, isp, err, ll, env, diag, "stderr") && + (!last || + check_output (pr, osp, isp, *out, ll, env, diag, "stdout")); + + return success; + } + + static bool + run_expr (environment& env, + const command_expr& expr, + size_t li, const location& ll, + bool diag) + { + // Commands are numbered sequentially throughout the expression + // starting with 1. Number 0 means the command is a single one. + // + size_t ci (expr.size () == 1 && expr.back ().pipe.size () == 1 + ? 0 + : 1); + + // If there is no ORs to the right of a pipe then the pipe failure is + // fatal for the whole expression. In particular, the pipe must print + // the diagnostics on failure (if generally allowed). So we find the + // pipe that "switches on" the diagnostics potential printing. + // + command_expr::const_iterator trailing_ands; // Undefined if diag is + // disallowed. + if (diag) + { + auto i (expr.crbegin ()); + for (; i != expr.crend () && i->op == expr_operator::log_and; ++i) ; + trailing_ands = i.base (); + } + + bool r (false); + bool print (false); + + for (auto b (expr.cbegin ()), i (b), e (expr.cend ()); i != e; ++i) + { + if (diag && i + 1 == trailing_ands) + print = true; + + const command_pipe& p (i->pipe); + bool or_op (i->op == expr_operator::log_or); + + // Short-circuit if the pipe result must be OR-ed with true or AND-ed + // with false. + // + if (!((or_op && r) || (!or_op && !r))) + r = run_pipe ( + env, p.begin (), p.end (), auto_fd (), ci, li, ll, print); + + ci += p.size (); + } + + return r; + } + + void + run (environment& env, + const command_expr& expr, + size_t li, const location& ll) + { + // Note that we don't print the expression at any verbosity level + // assuming that the caller does this, potentially providing some + // additional information (command type, etc). + // + if (!run_expr (env, expr, li, ll, true /* diag */)) + throw failed (); // Assume diagnostics is already printed. + } + + bool + run_if (environment& env, + const command_expr& expr, + size_t li, const location& ll) + { + // Note that we don't print the expression here (see above). + // + return run_expr (env, expr, li, ll, false /* diag */); + } + + void + clean (environment& env, const location& ll) + { + context& ctx (env.context); + const dir_path& wdir (*env.work_dir.path); + + // Note that we operate with normalized paths here. + // + // Remove special files. The order is not important as we don't + // expect directories here. + // + for (const path& p: env.special_cleanups) + { + // Remove the file if exists. Fail otherwise. + // + if (rmfile (ctx, p, 3) == rmfile_status::not_exist) + fail (ll) << "registered for cleanup special file " << p + << " does not exist"; + } + + // Remove files and directories in the order opposite to the order of + // cleanup registration. + // + for (const auto& c: reverse_iterate (env.cleanups)) + { + cleanup_type t (c.type); + + // Skip whenever the path exists or not. + // + if (t == cleanup_type::never) + continue; + + const path& cp (c.path); + + // Wildcard with the last component being '***' (without trailing + // separator) matches all files and sub-directories recursively as + // well as the start directories itself. So we will recursively + // remove the directories that match the parent (for the original + // path) directory wildcard. + // + bool recursive (cp.leaf ().representation () == "***"); + const path& p (!recursive ? cp : cp.directory ()); + + // Remove files or directories using wildcard. + // + if (path_pattern (p)) + { + bool removed (false); + + auto rm = [&cp, recursive, &removed, &ll, &ctx, &wdir] + (path&& pe, const string&, bool interm) + { + if (!interm) + { + // While removing the entry we can get not_exist due to + // racing conditions, but that's ok if somebody did our job. + // Note that we still set the removed flag to true in this + // case. + // + removed = true; // Will be meaningless on failure. + + if (pe.to_directory ()) + { + dir_path d (path_cast (pe)); + + if (!recursive) + { + rmdir_status r (rmdir (ctx, d, 3)); + + if (r != rmdir_status::not_empty) + return true; + + diag_record dr (fail (ll)); + dr << "registered for cleanup directory " << d + << " is not empty"; + + print_dir (dr, d, ll); + dr << info << "wildcard: '" << cp << "'"; + } + else + { + // Don't remove the working directory (it will be removed + // by the dedicated cleanup). + // + // Cast to uint16_t to avoid ambiguity with + // libbutl::rmdir_r(). + // + rmdir_status r (rmdir_r (ctx, d, d != wdir, 3)); + + if (r != rmdir_status::not_empty) + return true; + + // The directory is unlikely to be current but let's keep + // for completeness. + // + fail (ll) << "registered for cleanup wildcard " << cp + << " matches the current directory"; + } + } + else + rmfile (ctx, pe, 3); + } + + return true; + }; + + // Note that here we rely on the fact that recursive iterating + // goes depth-first (which make sense for the cleanup). + // + try + { + // Doesn't follow symlinks. + // + path_search (p, + rm, + dir_path () /* start */, + path_match_flags::none); + } + catch (const system_error& e) + { + fail (ll) << "unable to cleanup wildcard " << cp << ": " << e; + } + + // Removal of no filesystem entries is not an error for 'maybe' + // cleanup type. + // + if (removed || t == cleanup_type::maybe) + continue; + + fail (ll) << "registered for cleanup wildcard " << cp + << " doesn't match any " + << (recursive + ? "path" + : p.to_directory () + ? "directory" + : "file"); + } + + // Remove the directory if exists and empty. Fail otherwise. + // Removal of non-existing directory is not an error for 'maybe' + // cleanup type. + // + if (p.to_directory ()) + { + dir_path d (path_cast (p)); + bool wd (d == wdir); + + // Don't remove the working directory for the recursive cleanup + // (it will be removed by the dedicated one). + // + // Note that the root working directory contains the + // .buildignore file (see above). + // + // @@ If 'd' is a file then will fail with a diagnostics having + // no location info. Probably need to add an optional location + // parameter to rmdir() function. The same problem exists for + // a file cleanup when try to rmfile() directory instead of + // file. + // + rmdir_status r (recursive + ? rmdir_r (ctx, d, !wd, static_cast (3)) + : rmdir (ctx, d, 3)); + + if (r == rmdir_status::success || + (r == rmdir_status::not_exist && t == cleanup_type::maybe)) + continue; + + diag_record dr (fail (ll)); + dr << "registered for cleanup directory " << d + << (r == rmdir_status::not_exist ? " does not exist" : + !recursive ? " is not empty" + : " is current"); + + if (r == rmdir_status::not_empty) + print_dir (dr, d, ll); + } + + // Remove the file if exists. Fail otherwise. Removal of + // non-existing file is not an error for 'maybe' cleanup type. + // + if (rmfile (ctx, p, 3) == rmfile_status::not_exist && + t == cleanup_type::always) + fail (ll) << "registered for cleanup file " << p + << " does not exist"; + } + } + + void + print_dir (diag_record& dr, const dir_path& p, const location& ll) + { + try + { + size_t n (0); + for (const dir_entry& de: dir_iterator (p, + false /* ignore_dangling */)) + { + if (n++ < 10) + dr << '\n' << (de.ltype () == entry_type::directory + ? path_cast (de.path ()) + : de.path ()); + } + + if (n > 10) + dr << "\nand " << n - 10 << " more file(s)"; + } + catch (const system_error& e) + { + fail (ll) << "unable to iterate over " << p << ": " << e; + } + } + } +} -- cgit v1.1