diff options
Diffstat (limited to 'libbuild2/script')
-rw-r--r-- | libbuild2/script/builtin-options.cxx | 341 | ||||
-rw-r--r-- | libbuild2/script/builtin-options.hxx | 84 | ||||
-rw-r--r-- | libbuild2/script/builtin-options.ixx | 57 | ||||
-rw-r--r-- | libbuild2/script/builtin.cli | 7 | ||||
-rw-r--r-- | libbuild2/script/lexer.cxx | 11 | ||||
-rw-r--r-- | libbuild2/script/lexer.hxx | 2 | ||||
-rw-r--r-- | libbuild2/script/parser.cxx | 714 | ||||
-rw-r--r-- | libbuild2/script/parser.hxx | 61 | ||||
-rw-r--r-- | libbuild2/script/regex.cxx | 18 | ||||
-rw-r--r-- | libbuild2/script/regex.hxx | 20 | ||||
-rw-r--r-- | libbuild2/script/run.cxx | 1700 | ||||
-rw-r--r-- | libbuild2/script/run.hxx | 54 | ||||
-rw-r--r-- | libbuild2/script/script.cxx | 50 | ||||
-rw-r--r-- | libbuild2/script/script.hxx | 52 |
14 files changed, 2512 insertions, 659 deletions
diff --git a/libbuild2/script/builtin-options.cxx b/libbuild2/script/builtin-options.cxx index 8e15ddd..b71b9d3 100644 --- a/libbuild2/script/builtin-options.cxx +++ b/libbuild2/script/builtin-options.cxx @@ -187,6 +187,56 @@ namespace build2 } }; + template <typename K, typename V, typename C> + struct parser<std::multimap<K, V, C> > + { + static void + parse (std::multimap<K, V, C>& m, bool& xs, scanner& s) + { + const char* o (s.next ()); + + if (s.more ()) + { + std::size_t pos (s.position ()); + std::string ov (s.next ()); + std::string::size_type p = ov.find ('='); + + K k = K (); + V v = V (); + std::string kstr (ov, 0, p); + std::string vstr (ov, (p != std::string::npos ? p + 1 : ov.size ())); + + int ac (2); + char* av[] = + { + const_cast<char*> (o), + 0 + }; + + bool dummy; + if (!kstr.empty ()) + { + av[1] = const_cast<char*> (kstr.c_str ()); + argv_scanner s (0, ac, av, false, pos); + parser<K>::parse (k, dummy, s); + } + + if (!vstr.empty ()) + { + av[1] = const_cast<char*> (vstr.c_str ()); + argv_scanner s (0, ac, av, false, pos); + parser<V>::parse (v, dummy, s); + } + + m.insert (typename std::multimap<K, V, C>::value_type (k, v)); + } + else + throw missing_value (o); + + xs = true; + } + }; + template <typename X, typename T, T X::*M> void thunk (X& x, scanner& s) @@ -1076,6 +1126,297 @@ namespace build2 return r; } + + // for_options + // + + for_options:: + for_options () + : exact_ (), + newline_ (), + whitespace_ () + { + } + + for_options:: + for_options (int& argc, + char** argv, + bool erase, + ::build2::build::cli::unknown_mode opt, + ::build2::build::cli::unknown_mode arg) + : exact_ (), + newline_ (), + whitespace_ () + { + ::build2::build::cli::argv_scanner s (argc, argv, erase); + _parse (s, opt, arg); + } + + for_options:: + for_options (int start, + int& argc, + char** argv, + bool erase, + ::build2::build::cli::unknown_mode opt, + ::build2::build::cli::unknown_mode arg) + : exact_ (), + newline_ (), + whitespace_ () + { + ::build2::build::cli::argv_scanner s (start, argc, argv, erase); + _parse (s, opt, arg); + } + + for_options:: + for_options (int& argc, + char** argv, + int& end, + bool erase, + ::build2::build::cli::unknown_mode opt, + ::build2::build::cli::unknown_mode arg) + : exact_ (), + newline_ (), + whitespace_ () + { + ::build2::build::cli::argv_scanner s (argc, argv, erase); + _parse (s, opt, arg); + end = s.end (); + } + + for_options:: + for_options (int start, + int& argc, + char** argv, + int& end, + bool erase, + ::build2::build::cli::unknown_mode opt, + ::build2::build::cli::unknown_mode arg) + : exact_ (), + newline_ (), + whitespace_ () + { + ::build2::build::cli::argv_scanner s (start, argc, argv, erase); + _parse (s, opt, arg); + end = s.end (); + } + + for_options:: + for_options (::build2::build::cli::scanner& s, + ::build2::build::cli::unknown_mode opt, + ::build2::build::cli::unknown_mode arg) + : exact_ (), + newline_ (), + whitespace_ () + { + _parse (s, opt, arg); + } + + typedef + std::map<std::string, void (*) (for_options&, ::build2::build::cli::scanner&)> + _cli_for_options_map; + + static _cli_for_options_map _cli_for_options_map_; + + struct _cli_for_options_map_init + { + _cli_for_options_map_init () + { + _cli_for_options_map_["--exact"] = + &::build2::build::cli::thunk< for_options, &for_options::exact_ >; + _cli_for_options_map_["-e"] = + &::build2::build::cli::thunk< for_options, &for_options::exact_ >; + _cli_for_options_map_["--newline"] = + &::build2::build::cli::thunk< for_options, &for_options::newline_ >; + _cli_for_options_map_["-n"] = + &::build2::build::cli::thunk< for_options, &for_options::newline_ >; + _cli_for_options_map_["--whitespace"] = + &::build2::build::cli::thunk< for_options, &for_options::whitespace_ >; + _cli_for_options_map_["-w"] = + &::build2::build::cli::thunk< for_options, &for_options::whitespace_ >; + } + }; + + static _cli_for_options_map_init _cli_for_options_map_init_; + + bool for_options:: + _parse (const char* o, ::build2::build::cli::scanner& s) + { + _cli_for_options_map::const_iterator i (_cli_for_options_map_.find (o)); + + if (i != _cli_for_options_map_.end ()) + { + (*(i->second)) (*this, s); + return true; + } + + return false; + } + + bool for_options:: + _parse (::build2::build::cli::scanner& s, + ::build2::build::cli::unknown_mode opt_mode, + ::build2::build::cli::unknown_mode arg_mode) + { + // Can't skip combined flags (--no-combined-flags). + // + assert (opt_mode != ::build2::build::cli::unknown_mode::skip); + + bool r = false; + bool opt = true; + + while (s.more ()) + { + const char* o = s.peek (); + + if (std::strcmp (o, "--") == 0) + { + opt = false; + s.skip (); + r = true; + continue; + } + + if (opt) + { + if (_parse (o, s)) + { + r = true; + continue; + } + + if (std::strncmp (o, "-", 1) == 0 && o[1] != '\0') + { + // Handle combined option values. + // + std::string co; + if (const char* v = std::strchr (o, '=')) + { + co.assign (o, 0, v - o); + ++v; + + int ac (2); + char* av[] = + { + const_cast<char*> (co.c_str ()), + const_cast<char*> (v) + }; + + ::build2::build::cli::argv_scanner ns (0, ac, av); + + if (_parse (co.c_str (), ns)) + { + // Parsed the option but not its value? + // + if (ns.end () != 2) + throw ::build2::build::cli::invalid_value (co, v); + + s.next (); + r = true; + continue; + } + else + { + // Set the unknown option and fall through. + // + o = co.c_str (); + } + } + + // Handle combined flags. + // + char cf[3]; + { + const char* p = o + 1; + for (; *p != '\0'; ++p) + { + if (!((*p >= 'a' && *p <= 'z') || + (*p >= 'A' && *p <= 'Z') || + (*p >= '0' && *p <= '9'))) + break; + } + + if (*p == '\0') + { + for (p = o + 1; *p != '\0'; ++p) + { + std::strcpy (cf, "-"); + cf[1] = *p; + cf[2] = '\0'; + + int ac (1); + char* av[] = + { + cf + }; + + ::build2::build::cli::argv_scanner ns (0, ac, av); + + if (!_parse (cf, ns)) + break; + } + + if (*p == '\0') + { + // All handled. + // + s.next (); + r = true; + continue; + } + else + { + // Set the unknown option and fall through. + // + o = cf; + } + } + } + + switch (opt_mode) + { + case ::build2::build::cli::unknown_mode::skip: + { + s.skip (); + r = true; + continue; + } + case ::build2::build::cli::unknown_mode::stop: + { + break; + } + case ::build2::build::cli::unknown_mode::fail: + { + throw ::build2::build::cli::unknown_option (o); + } + } + + break; + } + } + + switch (arg_mode) + { + case ::build2::build::cli::unknown_mode::skip: + { + s.skip (); + r = true; + continue; + } + case ::build2::build::cli::unknown_mode::stop: + { + break; + } + case ::build2::build::cli::unknown_mode::fail: + { + throw ::build2::build::cli::unknown_argument (o); + } + } + + break; + } + + return r; + } } } diff --git a/libbuild2/script/builtin-options.hxx b/libbuild2/script/builtin-options.hxx index c7cebbc..9361d18 100644 --- a/libbuild2/script/builtin-options.hxx +++ b/libbuild2/script/builtin-options.hxx @@ -253,6 +253,90 @@ namespace build2 vector<string> clear_; bool clear_specified_; }; + + class for_options + { + public: + for_options (); + + for_options (int& argc, + char** argv, + bool erase = false, + ::build2::build::cli::unknown_mode option = ::build2::build::cli::unknown_mode::fail, + ::build2::build::cli::unknown_mode argument = ::build2::build::cli::unknown_mode::stop); + + for_options (int start, + int& argc, + char** argv, + bool erase = false, + ::build2::build::cli::unknown_mode option = ::build2::build::cli::unknown_mode::fail, + ::build2::build::cli::unknown_mode argument = ::build2::build::cli::unknown_mode::stop); + + for_options (int& argc, + char** argv, + int& end, + bool erase = false, + ::build2::build::cli::unknown_mode option = ::build2::build::cli::unknown_mode::fail, + ::build2::build::cli::unknown_mode argument = ::build2::build::cli::unknown_mode::stop); + + for_options (int start, + int& argc, + char** argv, + int& end, + bool erase = false, + ::build2::build::cli::unknown_mode option = ::build2::build::cli::unknown_mode::fail, + ::build2::build::cli::unknown_mode argument = ::build2::build::cli::unknown_mode::stop); + + for_options (::build2::build::cli::scanner&, + ::build2::build::cli::unknown_mode option = ::build2::build::cli::unknown_mode::fail, + ::build2::build::cli::unknown_mode argument = ::build2::build::cli::unknown_mode::stop); + + // Option accessors and modifiers. + // + const bool& + exact () const; + + bool& + exact (); + + void + exact (const bool&); + + const bool& + newline () const; + + bool& + newline (); + + void + newline (const bool&); + + const bool& + whitespace () const; + + bool& + whitespace (); + + void + whitespace (const bool&); + + // Implementation details. + // + protected: + bool + _parse (const char*, ::build2::build::cli::scanner&); + + private: + bool + _parse (::build2::build::cli::scanner&, + ::build2::build::cli::unknown_mode option, + ::build2::build::cli::unknown_mode argument); + + public: + bool exact_; + bool newline_; + bool whitespace_; + }; } } diff --git a/libbuild2/script/builtin-options.ixx b/libbuild2/script/builtin-options.ixx index 8f84177..575eb95 100644 --- a/libbuild2/script/builtin-options.ixx +++ b/libbuild2/script/builtin-options.ixx @@ -153,6 +153,63 @@ namespace build2 { this->clear_specified_ = x; } + + // for_options + // + + inline const bool& for_options:: + exact () const + { + return this->exact_; + } + + inline bool& for_options:: + exact () + { + return this->exact_; + } + + inline void for_options:: + exact (const bool& x) + { + this->exact_ = x; + } + + inline const bool& for_options:: + newline () const + { + return this->newline_; + } + + inline bool& for_options:: + newline () + { + return this->newline_; + } + + inline void for_options:: + newline (const bool& x) + { + this->newline_ = x; + } + + inline const bool& for_options:: + whitespace () const + { + return this->whitespace_; + } + + inline bool& for_options:: + whitespace () + { + return this->whitespace_; + } + + inline void for_options:: + whitespace (const bool& x) + { + this->whitespace_ = x; + } } } diff --git a/libbuild2/script/builtin.cli b/libbuild2/script/builtin.cli index 50dd3a0..c993983 100644 --- a/libbuild2/script/builtin.cli +++ b/libbuild2/script/builtin.cli @@ -30,5 +30,12 @@ namespace build2 vector<string> --unset|-u; vector<string> --clear|-c; }; + + class for_options + { + bool --exact|-e; + bool --newline|-n; + bool --whitespace|-w; + }; } } diff --git a/libbuild2/script/lexer.cxx b/libbuild2/script/lexer.cxx index 7577149..e13bbdb 100644 --- a/libbuild2/script/lexer.cxx +++ b/libbuild2/script/lexer.cxx @@ -24,10 +24,7 @@ namespace build2 bool q (true); // quotes if (!esc) - { - assert (!state_.empty ()); - esc = state_.top ().escapes; - } + esc = current_state ().escapes; switch (m) { @@ -84,7 +81,7 @@ namespace build2 } assert (ps == '\0'); - state_.push ( + mode_impl ( state {m, data, nullopt, false, false, ps, s, n, q, *esc, s1, s2}); } @@ -93,7 +90,7 @@ namespace build2 { token r; - switch (state_.top ().mode) + switch (mode ()) { case lexer_mode::command_expansion: case lexer_mode::here_line_single: @@ -119,7 +116,7 @@ namespace build2 xchar c (get ()); uint64_t ln (c.line), cn (c.column); - const state& st (state_.top ()); + const state& st (current_state ()); lexer_mode m (st.mode); auto make_token = [&sep, &m, ln, cn] (type t) diff --git a/libbuild2/script/lexer.hxx b/libbuild2/script/lexer.hxx index dbfdfcc..3cbcc03 100644 --- a/libbuild2/script/lexer.hxx +++ b/libbuild2/script/lexer.hxx @@ -112,6 +112,8 @@ namespace build2 const redirect_aliases_type& redirect_aliases; protected: + using build2::lexer::mode; // Getter. + lexer (istream& is, const path_name& name, uint64_t line, const char* escapes, bool set_mode, diff --git a/libbuild2/script/parser.cxx b/libbuild2/script/parser.cxx index c199c0e..84d2afc 100644 --- a/libbuild2/script/parser.cxx +++ b/libbuild2/script/parser.cxx @@ -4,10 +4,13 @@ #include <libbuild2/script/parser.hxx> #include <cstring> // strchr() +#include <sstream> #include <libbuild2/variable.hxx> -#include <libbuild2/script/run.hxx> // exit + +#include <libbuild2/script/run.hxx> // exit, stream_reader #include <libbuild2/script/lexer.hxx> +#include <libbuild2/script/builtin-options.hxx> using namespace std; @@ -140,18 +143,20 @@ namespace build2 return nullopt; } - pair<command_expr, parser::here_docs> parser:: + parser::parse_command_expr_result parser:: parse_command_expr (token& t, type& tt, - const redirect_aliases& ra) + const redirect_aliases& ra, + optional<token>&& program) { - // enter: first token of the command line + // enter: first (or second, if program) token of the command line // leave: <newline> or unknown token command_expr expr; // OR-ed to an implied false for the first term. // - expr.push_back ({expr_operator::log_or, command_pipe ()}); + if (!pre_parse_) + expr.push_back ({expr_operator::log_or, command_pipe ()}); command c; // Command being assembled. @@ -218,8 +223,8 @@ namespace build2 // Add the next word to either one of the pending positions or to // program arguments by default. // - auto add_word = [&c, &p, &mod, &check_regex_mod, this] ( - string&& w, const location& l) + auto add_word = [&c, &p, &mod, &check_regex_mod, this] + (string&& w, const location& l) { auto add_merge = [&l, this] (optional<redirect>& r, const string& w, @@ -697,11 +702,30 @@ namespace build2 const location ll (get_location (t)); // Line location. // Keep parsing chunks of the command line until we see one of the - // "terminators" (newline, exit status comparison, etc). + // "terminators" (newline or unknown/unexpected token). // location l (ll); names ns; // Reuse to reduce allocations. + bool for_loop (false); + + if (program) + { + assert (program->type == type::word); + + // Note that here we skip all the parse_program() business since the + // program can only be one of the specially-recognized names. + // + if (program->value == "for") + for_loop = true; + else + assert (false); // Must be specially-recognized program. + + // Save the program name and continue parsing as a command. + // + add_word (move (program->value), get_location (*program)); + } + for (bool done (false); !done; l = get_location (t)) { tt = ra.resolve (tt); @@ -717,6 +741,9 @@ namespace build2 case type::equal: case type::not_equal: { + if (for_loop) + fail (l) << "for-loop exit code cannot be checked"; + if (!pre_parse_) check_pending (l); @@ -747,30 +774,39 @@ namespace build2 } case type::pipe: + if (for_loop) + fail (l) << "for-loop must be last command in a pipe"; + // Fall through. + case type::log_or: case type::log_and: + if (for_loop) + fail (l) << "command expression involving for-loop"; + // Fall through. - case type::in_pass: - case type::out_pass: + case type::clean: + if (for_loop) + fail (l) << "cleanup in for-loop"; + // Fall through. - case type::in_null: + case type::out_pass: case type::out_null: - case type::out_trace: - case type::out_merge: - - case type::in_str: - case type::in_doc: case type::out_str: case type::out_doc: - - case type::in_file: case type::out_file_cmp: case type::out_file_ovr: case type::out_file_app: + if (for_loop) + fail (l) << "output redirect in for-loop"; + // Fall through. - case type::clean: + case type::in_pass: + case type::in_null: + case type::in_str: + case type::in_doc: + case type::in_file: { if (pre_parse_) { @@ -968,6 +1004,42 @@ namespace build2 next (t, tt); break; } + case type::lsbrace: + { + // Recompose the attributes into a single command argument. + // + assert (!pre_parse_); + + attributes_push (t, tt, true /* standalone */); + + attributes as (attributes_pop ()); + assert (!as.empty ()); + + ostringstream os; + names storage; + char c ('['); + for (const attribute& a: as) + { + os << c << a.name; + + if (!a.value.null) + { + os << '='; + + storage.clear (); + to_stream (os, + reverse (a.value, storage, true /* reduce */), + quote_mode::normal, + '@'); + } + + c = ','; + } + os << ']'; + + add_word (os.str (), l); + break; + } default: { // Bail out if this is one of the unknown tokens. @@ -1036,11 +1108,12 @@ namespace build2 hd.push_back ( here_doc { {rd}, - move (end), - (t.qtype == quote_type::unquoted || - t.qtype == quote_type::single), - move (mod), - r.intro, move (r.flags)}); + move (end), + (t.qtype == quote_type::unquoted || + t.qtype == quote_type::single), + move (mod), + r.intro, + move (r.flags)}); p = pending::none; mod.clear (); @@ -1053,16 +1126,34 @@ namespace build2 bool prog (p == pending::program_first || p == pending::program_next); - // Check if this is the env pseudo-builtin. + // Check if this is the env pseudo-builtin or the for-loop. // bool env (false); - if (prog && tt == type::word && t.value == "env") + if (prog && tt == type::word) { - parsed_env r (parse_env_builtin (t, tt)); - c.cwd = move (r.cwd); - c.variables = move (r.variables); - c.timeout = r.timeout; - env = true; + if (t.value == "env") + { + parsed_env r (parse_env_builtin (t, tt)); + c.cwd = move (r.cwd); + c.variables = move (r.variables); + c.timeout = r.timeout; + c.timeout_success = r.timeout_success; + env = true; + } + else if (t.value == "for") + { + if (expr.size () > 1) + fail (l) << "command expression involving for-loop"; + + for_loop = true; + + // Save 'for' as a program name and continue parsing as a + // command. + // + add_word (move (t.value), l); + next (t, tt); + continue; + } } // Parse the next chunk as names to get expansion, etc. Note that @@ -1243,9 +1334,16 @@ namespace build2 switch (tt) { case type::pipe: + if (for_loop) + fail (l) << "for-loop must be last command in a pipe"; + // Fall through. + case type::log_or: case type::log_and: { + if (for_loop) + fail (l) << "command expression involving for-loop"; + // Check that the previous command makes sense. // check_command (l, tt != type::pipe); @@ -1265,30 +1363,11 @@ namespace build2 break; } - case type::in_pass: - case type::out_pass: - - case type::in_null: - case type::out_null: - - case type::out_trace: - - case type::out_merge: - - case type::in_str: - case type::out_str: - - case type::in_file: - case type::out_file_cmp: - case type::out_file_ovr: - case type::out_file_app: - { - parse_redirect (move (t), tt, l); - break; - } - case type::clean: { + if (for_loop) + fail (l) << "cleanup in for-loop"; + parse_clean (t); break; } @@ -1299,6 +1378,27 @@ namespace build2 fail (l) << "here-document redirect in expansion"; break; } + + case type::out_pass: + case type::out_null: + case type::out_trace: + case type::out_merge: + case type::out_str: + case type::out_file_cmp: + case type::out_file_ovr: + case type::out_file_app: + if (for_loop) + fail (l) << "output redirect in for-loop"; + // Fall through. + + case type::in_pass: + case type::in_null: + case type::in_str: + case type::in_file: + { + parse_redirect (move (t), tt, l); + break; + } } } @@ -1326,7 +1426,7 @@ namespace build2 expr.back ().pipe.push_back (move (c)); } - return make_pair (move (expr), move (hd)); + return parse_command_expr_result {move (expr), move (hd), for_loop}; } parser::parsed_env parser:: @@ -1502,6 +1602,10 @@ namespace build2 { r.timeout = chrono::seconds (*v); } + else if (o == "-s" || o == "--timeout-success") + { + r.timeout_success = true; + } else if (optional<dir_path> v = dir ("--cwd", "-c")) { r.cwd = move (*v); @@ -1516,6 +1620,9 @@ namespace build2 break; } + if (r.timeout_success && !r.timeout) + fail (l) << "env: -s|--timeout-success specified without -t|--timeout"; + // Parse arguments (variable sets). // for (; i != e; ++i) @@ -1575,7 +1682,7 @@ namespace build2 void parser:: parse_here_documents (token& t, type& tt, - pair<command_expr, here_docs>& p) + parse_command_expr_result& pr) { // enter: newline // leave: newline @@ -1583,7 +1690,7 @@ namespace build2 // Parse here-document fragments in the order they were mentioned on // the command line. // - for (here_doc& h: p.second) + for (here_doc& h: pr.docs) { // Switch to the here-line mode which is like single/double-quoted // string but recognized the newline as a separator. @@ -1603,7 +1710,7 @@ namespace build2 { auto i (h.redirects.cbegin ()); - command& c (p.first[i->expr].pipe[i->pipe]); + command& c (pr.expr[i->expr].pipe[i->pipe]); optional<redirect>& r (i->fd == 0 ? c.in : i->fd == 1 ? c.out : @@ -1635,7 +1742,7 @@ namespace build2 // for (++i; i != h.redirects.cend (); ++i) { - command& c (p.first[i->expr].pipe[i->pipe]); + command& c (pr.expr[i->expr].pipe[i->pipe]); optional<redirect>& ir (i->fd == 0 ? c.in : i->fd == 1 ? c.out : @@ -2061,6 +2168,8 @@ namespace build2 else if (n == "elif") r = line_type::cmd_elif; else if (n == "elif!") r = line_type::cmd_elifn; else if (n == "else") r = line_type::cmd_else; + else if (n == "while") r = line_type::cmd_while; + else if (n == "for") r = line_type::cmd_for_stream; else if (n == "end") r = line_type::cmd_end; else { @@ -2091,8 +2200,9 @@ namespace build2 exec_lines (lines::const_iterator i, lines::const_iterator e, const function<exec_set_function>& exec_set, const function<exec_cmd_function>& exec_cmd, - const function<exec_if_function>& exec_if, - size_t& li, + const function<exec_cond_function>& exec_cond, + const function<exec_for_function>& exec_for, + const iteration_index* ii, size_t& li, variable_pool* var_pool) { try @@ -2116,6 +2226,73 @@ namespace build2 next (t, tt); const location ll (get_location (t)); + // If end is true, then find the flow control construct's end ('end' + // line). Otherwise, find the flow control construct's block end + // ('end', 'else', etc). If skip is true then increment the command + // line index. + // + auto fcend = [e, &li] (lines::const_iterator j, + bool end, + bool skip) -> lines::const_iterator + { + // We need to be aware of nested flow control constructs. + // + size_t n (0); + + for (++j; j != e; ++j) + { + line_type lt (j->type); + + if (lt == line_type::cmd_if || + lt == line_type::cmd_ifn || + lt == line_type::cmd_while || + lt == line_type::cmd_for_stream || + lt == line_type::cmd_for_args) + ++n; + + // If we are nested then we just wait until we get back + // to the surface. + // + if (n == 0) + { + switch (lt) + { + case line_type::cmd_elif: + case line_type::cmd_elifn: + case line_type::cmd_else: + if (end) break; + // Fall through. + case line_type::cmd_end: return j; + default: break; + } + } + + if (lt == line_type::cmd_end) + --n; + + if (skip) + { + // Note that we don't count else, end, and 'for x: ...' as + // commands. + // + switch (lt) + { + case line_type::cmd: + case line_type::cmd_if: + case line_type::cmd_ifn: + case line_type::cmd_elif: + case line_type::cmd_elifn: + case line_type::cmd_for_stream: + case line_type::cmd_while: ++li; break; + default: break; + } + } + } + + assert (false); // Missing end. + return e; + }; + switch (lt) { case line_type::var: @@ -2151,7 +2328,10 @@ namespace build2 single = true; } - exec_cmd (t, tt, li++, single, ll); + exec_cmd (t, tt, + ii, li++, single, + nullptr /* command_function */, + ll); replay_stop (); break; @@ -2167,7 +2347,7 @@ namespace build2 bool take; if (lt != line_type::cmd_else) { - take = exec_if (t, tt, li++, ll); + take = exec_cond (t, tt, ii, li++, ll); if (lt == line_type::cmd_ifn || lt == line_type::cmd_elifn) take = !take; @@ -2180,97 +2360,383 @@ namespace build2 replay_stop (); - // If end is true, then find the 'end' line. Otherwise, find - // the next if-else line. If skip is true then increment the - // command line index. + // If we are taking this branch then we need to parse all the + // lines until the next if-else line and then skip all the lines + // until the end (unless we are already at the end). + // + // Otherwise, we need to skip all the lines until the next + // if-else line and then continue parsing. + // + if (take) + { + // Find block end. + // + lines::const_iterator j (fcend (i, false, false)); + + if (!exec_lines (i + 1, j, + exec_set, exec_cmd, exec_cond, exec_for, + ii, li, + var_pool)) + return false; + + // Find construct end. + // + i = j->type == line_type::cmd_end ? j : fcend (j, true, true); + } + else + { + // Find block end. + // + i = fcend (i, false, true); + + if (i->type != line_type::cmd_end) + --i; // Continue with this line (e.g., elif or else). + } + + break; + } + case line_type::cmd_while: + { + // The while-loop construct end. Set on the first iteration. // - auto next = [e, &li] (lines::const_iterator j, - bool end, - bool skip) -> lines::const_iterator + lines::const_iterator we (e); + + size_t wli (li); + + for (iteration_index wi {1, ii};; wi.index++) + { + next (t, tt); // Skip to start of command. + + bool exec (exec_cond (t, tt, &wi, li++, ll)); + + replay_stop (); + + // If the condition evaluates to true, then we need to parse + // all the lines until the end line, prepare for the condition + // reevaluation, and re-iterate. + // + // Otherwise, we need to skip all the lines until the end + // line, bail out from the loop, and continue parsing. + // + if (exec) { - // We need to be aware of nested if-else chains. + // Find the construct end, if it is not found yet. // - size_t n (0); + if (we == e) + we = fcend (i, true, false); - for (++j; j != e; ++j) + if (!exec_lines (i + 1, we, + exec_set, exec_cmd, exec_cond, exec_for, + &wi, li, + var_pool)) + return false; + + // Prepare for the condition reevaluation. + // + replay_data (replay_tokens (ln.tokens)); + next (t, tt); + li = wli; + } + else + { + // Position to the construct end, always incrementing the + // line index (skip is true). + // + i = fcend (i, true, true); + break; // Bail out from the while-loop. + } + } + + break; + } + case line_type::cmd_for_stream: + { + // The for-loop construct end. Set on the first iteration. + // + lines::const_iterator fe (e); + + // Let's "wrap up" all the required data into the single object + // to rely on the "small function object" optimization. + // + struct loop_data + { + lines::const_iterator i; + lines::const_iterator e; + const function<exec_set_function>& exec_set; + const function<exec_cmd_function>& exec_cmd; + const function<exec_cond_function>& exec_cond; + const function<exec_for_function>& exec_for; + const iteration_index* ii; + size_t& li; + variable_pool* var_pool; + decltype (fcend)& fce; + lines::const_iterator& fe; + } ld {i, e, + exec_set, exec_cmd, exec_cond, exec_for, + ii, li, + var_pool, + fcend, + fe}; + + function<command_function> cf ( + [&ld, this] + (environment& env, + const strings& args, + auto_fd in, + pipe_command* pipe, + const optional<deadline>& dl, + const location& ll) + { + namespace cli = build2::build::cli; + + try { - line_type lt (j->type); + // Parse arguments. + // + cli::vector_scanner scan (args); + for_options ops (scan); + + // Note: diagnostics consistent with the set builtin. + // + if (ops.whitespace () && ops.newline ()) + fail (ll) << "for: both -n|--newline and " + << "-w|--whitespace specified"; + + if (!scan.more ()) + fail (ll) << "for: missing variable name"; - if (lt == line_type::cmd_if || lt == line_type::cmd_ifn) - ++n; + string vname (scan.next ()); + if (vname.empty ()) + fail (ll) << "for: empty variable name"; + + // Detect patterns analogous to parse_variable_name() (so + // we diagnose `for x[string]`). + // + if (vname.find_first_of ("[*?") != string::npos) + fail (ll) << "for: expected variable name instead of " + << vname; - // If we are nested then we just wait until we get back - // to the surface. + // Let's also diagnose the `... | for x:...` misuse which + // can probably be quite common. // - if (n == 0) + if (vname.find (':') != string::npos) + fail (ll) << "for: ':' after variable name"; + + string attrs; + if (scan.more ()) { - switch (lt) - { - case line_type::cmd_elif: - case line_type::cmd_elifn: - case line_type::cmd_else: - if (end) break; - // Fall through. - case line_type::cmd_end: return j; - default: break; - } + attrs = scan.next (); + + if (attrs.empty ()) + fail (ll) << "for: empty variable attributes"; + + if (scan.more ()) + fail (ll) << "for: unexpected argument '" + << scan.next () << "'"; } - if (lt == line_type::cmd_end) - --n; + // Since the command pipe is parsed, we can stop + // replaying. Note that we should do this before calling + // exec_lines() for the loop body. Also note that we + // should increment the line index before that. + // + replay_stop (); + + size_t fli (++ld.li); + iteration_index fi {1, ld.ii}; - if (skip) + // Let's "wrap up" all the required data into the single + // object to rely on the "small function object" + // optimization. + // + struct { - // Note that we don't count else and end as commands. - // - switch (lt) + loop_data& ld; + environment& env; + const string& vname; + const string& attrs; + const location& ll; + size_t fli; + iteration_index& fi; + + } d {ld, env, vname, attrs, ll, fli, fi}; + + function<void (string&&)> f ( + [&d, this] (string&& s) { - case line_type::cmd: - case line_type::cmd_if: - case line_type::cmd_ifn: - case line_type::cmd_elif: - case line_type::cmd_elifn: ++li; break; - default: break; - } - } + loop_data& ld (d.ld); + + ld.li = d.fli; + + // Don't move from the variable name since it is used + // on each iteration. + // + d.env.set_variable (d.vname, + names {name (move (s))}, + d.attrs, + d.ll); + + // Find the construct end, if it is not found yet. + // + if (ld.fe == ld.e) + ld.fe = ld.fce (ld.i, true, false); + + if (!exec_lines (ld.i + 1, ld.fe, + ld.exec_set, + ld.exec_cmd, + ld.exec_cond, + ld.exec_for, + &d.fi, ld.li, + ld.var_pool)) + { + throw exit (true); + } + + d.fi.index++; + }); + + read (move (in), + !ops.newline (), ops.newline (), ops.exact (), + f, + pipe, + dl, + ll, + "for"); + } + catch (const cli::exception& e) + { + fail (ll) << "for: " << e; } + }); - assert (false); // Missing end. - return e; - }; + exec_cmd (t, tt, ii, li, false /* single */, cf, ll); - // If we are taking this branch then we need to parse all the - // lines until the next if-else line and then skip all the - // lines until the end (unless next is already end). + // Position to construct end. // - // Otherwise, we need to skip all the lines until the next - // if-else line and then continue parsing. + i = (fe != e ? fe : fcend (i, true, true)); + + break; + } + case line_type::cmd_for_args: + { + // Parse the variable name. // - if (take) + next (t, tt); + + assert (tt == type::word && t.qtype == quote_type::unquoted); + + string vn (move (t.value)); + + // Enter the variable into the pool if this is not done during + // the script parsing (see the var line type handling for + // details). + // + const variable* var (ln.var); + + if (var == nullptr) { - // Next if-else. - // - lines::const_iterator j (next (i, false, false)); - if (!exec_lines (i + 1, j, - exec_set, exec_cmd, exec_if, - li, - var_pool)) - return false; + assert (var_pool != nullptr); - i = j->type == line_type::cmd_end ? j : next (j, true, true); + var = &var_pool->insert (move (vn)); } - else + + // Parse the potential element attributes and skip the colon. + // + next_with_attributes (t, tt); + attributes_push (t, tt); + + assert (tt == type::colon); + + // Save element attributes so that we can inject them on each + // iteration. + // + attributes val_attrs (attributes_pop ()); + + // Parse the value with the potential attributes. + // + // Note that we don't really need to change the mode since we + // are replaying the tokens. + // + value val; + apply_value_attributes (nullptr /* variable */, + val, + parse_variable_line (t, tt), + type::assign); + + replay_stop (); + + // If the value is not NULL then iterate over its elements, + // assigning them to the for-loop variable, and parsing all the + // construct lines afterwards. Then position to the end line of + // the construct and continue parsing. + + // The for-loop construct end. Set on the first iteration. + // + lines::const_iterator fe (e); + + if (val) { - i = next (i, false, true); - if (i->type != line_type::cmd_end) - --i; // Continue with this line (e.g., elif or else). + // If this value is a vector, then save its element type so + // that we can typify each element below. + // + const value_type* etype (nullptr); + + if (val.type != nullptr) + { + etype = val.type->element_type; + + // Note that here we don't want to be reducing empty simple + // values to empty lists. + // + untypify (val, false /* reduce */); + } + + size_t fli (li); + iteration_index fi {1, ii}; + names& ns (val.as<names> ()); + + for (auto ni (ns.begin ()), ne (ns.end ()); ni != ne; ++ni) + { + li = fli; + + // Set the variable value. + // + bool pair (ni->pair); + names n; + n.push_back (move (*ni)); + if (pair) n.push_back (move (*++ni)); + value v (move (n)); // Untyped. + + if (etype != nullptr) + typify (v, *etype, var); + + exec_for (*var, move (v), val_attrs, ll); + + // Find the construct end, if it is not found yet. + // + if (fe == e) + fe = fcend (i, true, false); + + if (!exec_lines (i + 1, fe, + exec_set, exec_cmd, exec_cond, exec_for, + &fi, li, + var_pool)) + return false; + + fi.index++; + } } + // Position to construct end. + // + i = (fe != e ? fe : fcend (i, true, true)); + break; } case line_type::cmd_end: { assert (false); + break; } } } @@ -2305,7 +2771,7 @@ namespace build2 } parser::parsed_doc:: - parsed_doc (parsed_doc&& d) + parsed_doc (parsed_doc&& d) noexcept : re (d.re), end_line (d.end_line), end_column (d.end_column) { if (re) diff --git a/libbuild2/script/parser.hxx b/libbuild2/script/parser.hxx index d8e5dbf..795ce4e 100644 --- a/libbuild2/script/parser.hxx +++ b/libbuild2/script/parser.hxx @@ -97,15 +97,34 @@ namespace build2 }; using here_docs = vector<here_doc>; - pair<command_expr, here_docs> - parse_command_expr (token&, token_type&, const redirect_aliases&); + struct parse_command_expr_result + { + command_expr expr; // Single pipe for the for-loop. + here_docs docs; + bool for_loop = false; + + parse_command_expr_result () = default; + + parse_command_expr_result (command_expr&& e, + here_docs&& h, + bool f) + : expr (move (e)), docs (move (h)), for_loop (f) {} + }; + + // Pass the first special command program name (token_type::word) if it + // is already pre-parsed. + // + parse_command_expr_result + parse_command_expr (token&, token_type&, + const redirect_aliases&, + optional<token>&& program = nullopt); command_exit parse_command_exit (token&, token_type&); void parse_here_documents (token&, token_type&, - pair<command_expr, here_docs>&); + parse_command_expr_result&); struct parsed_doc { @@ -121,7 +140,7 @@ namespace build2 parsed_doc (string, uint64_t line, uint64_t column); parsed_doc (regex_lines&&, uint64_t line, uint64_t column); - parsed_doc (parsed_doc&&); // Note: move constuctible-only type. + parsed_doc (parsed_doc&&) noexcept; // Note: move constuctible-only type. ~parsed_doc (); }; @@ -135,18 +154,24 @@ namespace build2 // the first two tokens. Use the specified lexer mode to peek the second // token. // + // Always return the cmd_for_stream line type for the for-loop. Note + // that the for-loop form cannot be detected easily, based on the first + // two tokens. Also note that the detection can be specific for the + // script implementation (custom lexing mode, special variables, etc). + // line_type pre_parse_line_start (token&, token_type&, lexer_mode); // Parse the env pseudo-builtin arguments up to the program name. Return - // the program execution timeout, CWD, the list of the variables that - // should be unset ("name") and/or set ("name=value") in the command - // environment, and the token/type that starts the program name. Note - // that the variable unsets come first, if present. + // the program execution timeout and its success flag, CWD, the list of + // the variables that should be unset ("name") and/or set ("name=value") + // in the command environment, and the token/type that starts the + // program name. Note that the variable unsets come first, if present. // struct parsed_env { optional<duration> timeout; + bool timeout_success = false; optional<dir_path> cwd; environment_vars variables; }; @@ -159,19 +184,26 @@ namespace build2 protected: // Return false if the execution of the script should be terminated with // the success status (e.g., as a result of encountering the exit - // builtin). For unsuccessful termination the failed exception is thrown. + // builtin). For unsuccessful termination the failed exception is + // thrown. // using exec_set_function = void (const variable&, token&, token_type&, const location&); using exec_cmd_function = void (token&, token_type&, - size_t li, + const iteration_index*, size_t li, bool single, + const function<command_function>&, const location&); - using exec_if_function = bool (token&, token_type&, - size_t li, + using exec_cond_function = bool (token&, token_type&, + const iteration_index*, size_t li, + const location&); + + using exec_for_function = void (const variable&, + value&&, + const attributes& value_attrs, const location&); // If a parser implementation doesn't pre-enter variables into a pool @@ -183,8 +215,9 @@ namespace build2 exec_lines (lines::const_iterator b, lines::const_iterator e, const function<exec_set_function>&, const function<exec_cmd_function>&, - const function<exec_if_function>&, - size_t& li, + const function<exec_cond_function>&, + const function<exec_for_function>&, + const iteration_index*, size_t& li, variable_pool* = nullptr); // Customization hooks. diff --git a/libbuild2/script/regex.cxx b/libbuild2/script/regex.cxx index 3f796b6..11ff8a1 100644 --- a/libbuild2/script/regex.cxx +++ b/libbuild2/script/regex.cxx @@ -75,15 +75,29 @@ namespace build2 string::traits_type::find (ex, 4, c) != nullptr))); } + template <typename S> + static inline const char_string* + find_or_insert (line_pool& p, S&& s) + { + auto i (find (p.strings.begin (), p.strings.end (), s)); + if (i == p.strings.end ()) + { + p.strings.push_front (forward<S> (s)); + i = p.strings.begin (); + } + + return &*i; + } + line_char:: line_char (const char_string& s, line_pool& p) - : line_char (&(*p.strings.emplace (s).first)) + : line_char (find_or_insert (p, s)) { } line_char:: line_char (char_string&& s, line_pool& p) - : line_char (&(*p.strings.emplace (move (s)).first)) + : line_char (find_or_insert (p, move (s))) { } diff --git a/libbuild2/script/regex.hxx b/libbuild2/script/regex.hxx index e043c99..3c49b31 100644 --- a/libbuild2/script/regex.hxx +++ b/libbuild2/script/regex.hxx @@ -9,7 +9,6 @@ #include <locale> #include <string> // basic_string #include <type_traits> // make_unsigned, enable_if, is_* -#include <unordered_set> #include <libbuild2/types.hxx> #include <libbuild2/utility.hxx> @@ -59,7 +58,12 @@ namespace build2 // Note that we assume the pool can be moved without invalidating // pointers to any already pooled entities. // - std::unordered_set<char_string> strings; + // Note that we used to use unordered_set for strings but (1) there is + // no general expectation that we will have many identical strings and + // (2) the number of strings is not expected to be large. So that felt + // like an overkill and we now use a list with linear search. + // + std::list<char_string> strings; std::list<char_regex> regexes; }; @@ -267,8 +271,8 @@ namespace build2 template <typename T> struct line_char_cmp : public std::enable_if<std::is_integral<T>::value || - (std::is_enum<T>::value && - !std::is_same<T, char_flags>::value)> {}; + (std::is_enum<T>::value && + !std::is_same<T, char_flags>::value)> {}; template <typename T, typename = typename line_char_cmp<T>::type> bool @@ -466,10 +470,10 @@ namespace std is (mask m, char_type c) const { return m == - (c.type () == line_type::special && c.special () >= 0 && - build2::digit (static_cast<char> (c.special ())) - ? digit - : 0); + (c.type () == line_type::special && c.special () >= 0 && + build2::digit (static_cast<char> (c.special ())) + ? digit + : 0); } const char_type* diff --git a/libbuild2/script/run.cxx b/libbuild2/script/run.cxx index 8b609f1..f8f98c1 100644 --- a/libbuild2/script/run.cxx +++ b/libbuild2/script/run.cxx @@ -9,7 +9,8 @@ # include <libbutl/win32-utility.hxx> // DBG_TERMINATE_PROCESS #endif -#include <ios> // streamsize +#include <ios> // streamsize +#include <cstring> // strchr() #include <libbutl/regex.hxx> #include <libbutl/builtin.hxx> @@ -759,6 +760,31 @@ namespace build2 output_info (d, op); } + // Note that a here-document regex without ':' modifier can never + // match an empty output since it always contains the trailing empty + // line-char. This can be confusing, as for example while testing a + // program which can print some line or nothing with the following + // test: + // + // $* >>~%EOO% + // %( + // Hello, World! + // %)? + // EOO + // + // Note that the above line-regex contains 4 line-chars and will never + // match empty output. + // + // Thus, let's complete an empty output with an empty line-char for + // such a regex, so it may potentially match. + // + if (ls.empty () && + rd.type == redirect_type::here_doc_regex && + rd.modifiers ().find (':') == string::npos) + { + ls += line_char (string (), regex.pool); + } + // Match the output with the regex. // // Note that we don't distinguish between the line_regex and @@ -784,7 +810,7 @@ namespace build2 // regex to file for troubleshooting regardless of whether we print // the diagnostics or not. We, however, register it for cleanup in the // later case (the expression may still succeed, we can be evaluating - // the if condition, etc). + // the flow control construct condition, etc). // optional<path> rp; if (env.temp_dir_keep) @@ -946,96 +972,660 @@ namespace build2 : path (c.program.recall_string ()); } - // Read out the stream content into a string. Throw io_error on the - // underlying OS error. + // Read the stream content into a string, optionally splitting the input + // data at whitespaces or newlines in which case return one, potentially + // incomplete, substring at a time (see the set builtin options for the + // splitting semantics). Throw io_error on the underlying OS error. // - // If the execution deadline is specified, then turn the stream into the - // non-blocking mode reading its content in chunks and with a single - // operation otherwise. If the specified deadline is reached while - // reading the stream, then bail out for the successful deadline and - // fail otherwise. Note that in the former case the result will be - // incomplete, but we leave it to the caller to handle that. + // On POSIX expects the stream to be non-blocking and its exception mask + // to have at least badbit. On Windows can also handle a blocking stream. // // Note that on Windows we can only turn pipe file descriptors into the - // non-blocking mode. Thus, we have no choice but to read from - // descriptors of other types synchronously there. That implies that we - // can potentially block indefinitely reading a file and missing the - // deadline on Windows. Note though, that the user can normally rewrite - // the command, for example, `set foo <<<file` with `cat file | set foo` - // to avoid this problem. + // non-blocking mode. Thus, we have no choice but to read from descriptors + // of other types synchronously there. That implies that we can + // potentially block indefinitely reading a file and missing a deadline on + // Windows. Note though, that the user can normally rewrite the command, + // for example, `set foo <<<file` with `cat file | set foo` to avoid this + // problem. // - static string - read (auto_fd in, + class stream_reader + { + public: + stream_reader (ifdstream&, bool whitespace, bool newline, bool exact); + + // Read next substring. Return true if the substring has been read or + // false if it should be called again once the stream has more data to + // read. Also return true on eof (in which case no substring is read). + // The string must be empty on the first call. Throw ios::failure on the + // underlying OS error. + // + // Note that there could still be data to read in the stream's buffer + // (as opposed to file descriptor) after this function returns true and + // you should be careful not to block on fdselect() in this case. The + // recommended usage pattern is similar to that of + // butl::getline_non_blocking(). The only difference is that + // ifdstream::eof() needs to be used instead of butl::eof() since this + // function doesn't set failbit and only sets eofbit after the last + // substring is returned. + // + bool + next (string&); + + private: + ifdstream& is_; + bool whitespace_; + bool newline_; + bool exact_; + + bool empty_ = true; // Set to false after the first character is read. + }; + + stream_reader:: + stream_reader (ifdstream& is, bool ws, bool nl, bool ex) + : is_ (is), + whitespace_ (ws), + newline_ (nl), + exact_ (ex) + { + } + + bool stream_reader:: + next (string& ss) + { #ifndef _WIN32 - bool, + assert ((is_.exceptions () & ifdstream::badbit) != 0 && !is_.blocking ()); #else - bool pipe, + assert ((is_.exceptions () & ifdstream::badbit) != 0); #endif + + fdstreambuf& sb (*static_cast<fdstreambuf*> (is_.rdbuf ())); + + // Return the number of characters available in the stream buffer's get + // area, which can be: + // + // -1 -- EOF. + // 0 -- no data since blocked before encountering more data/EOF. + // >0 -- there is some data. + // + // Note that on Windows if the stream is blocking, then the lambda calls + // underflow() instead of returning 0. + // + // @@ Probably we can call underflow() only once per the next() call, + // emulating the 'no data' case. This will allow the caller to + // perform some housekeeping (reading other streams, checking for the + // deadline, etc). But let's keep it simple for now. + // + auto avail = [&sb] () -> streamsize + { + // Note that here we reasonably assume that any failure in in_avail() + // will lead to badbit and thus an exception (see showmanyc()). + // + streamsize r (sb.in_avail ()); + +#ifdef _WIN32 + if (r == 0 && sb.blocking ()) + { + if (sb.underflow () == ifdstream::traits_type::eof ()) + return -1; + + r = sb.in_avail (); + + assert (r != 0); // We wouldn't be here otherwise. + } +#endif + + return r; + }; + + // Read until blocked (0), EOF (-1) or encounter the delimiter. + // + streamsize s; + while ((s = avail ()) > 0) + { + if (empty_) + empty_ = false; + + const char* p (sb.gptr ()); + size_t n (sb.egptr () - p); + + // We move p and bump by the number of consumed characters. + // + auto bump = [&sb, &p] () {sb.gbump (static_cast<int> (p - sb.gptr ()));}; + + if (whitespace_) // The whitespace mode. + { + const char* sep (" \n\r\t"); + + // Skip the whitespaces. + // + for (; n != 0 && strchr (sep, *p) != nullptr; ++p, --n) ; + + // If there are any non-whitespace characters in the get area, then + // append them to the resulting substring until a whitespace + // character is encountered. + // + if (n != 0) + { + // Append the non-whitespace characters. + // + for (char c; n != 0 && strchr (sep, c = *p) == nullptr; ++p, --n) + ss += c; + + // If a separator is encountered, then consume it, bump, and + // return the substring. + // + if (n != 0) + { + ++p; --n; // Consume the separator character. + + bump (); + return true; + } + + // Fall through. + } + + bump (); // Bump and continue reading. + } + else // The newline or no-split mode. + { + // Note that we don't collapse multiple consecutive newlines. + // + // Note also that we always sanitize CRs, so in the no-split mode we + // need to loop rather than consume the whole get area at once. + // + while (n != 0) + { + // Append the characters until the newline character or the end of + // the get area is encountered. + // + char c; + for (; n != 0 && (c = *p) != '\n'; ++p, --n) + ss += c; + + // If the newline character is encountered, then sanitize CRs and + // return the substring in the newline mode and continue + // parsing/reading otherwise. + // + if (n != 0) + { + // Strip the trailing CRs that can appear while, for example, + // cross-testing Windows target or as a part of msvcrt junk + // production (see above). + // + while (!ss.empty () && ss.back () == '\r') + ss.pop_back (); + + assert (c == '\n'); + + ++p; --n; // Consume the newline character. + + if (newline_) + { + bump (); + return true; + } + + ss += c; // Append newline to the resulting string. + + // Fall through. + } + + bump (); // Bump and continue parsing/reading. + } + } + } + + // Here s can be: + // + // -1 -- EOF. + // 0 -- blocked before encountering delimiter/EOF. + // + // Note: >0 (encountered the delimiter) case is handled in-place. + // + assert (s == -1 || s == 0); + + if (s == -1) + { + // Return the last substring if it is not empty or it is the trailing + // "blank" in the exact mode. Otherwise, set eofbit for the stream + // indicating that we are done. + // + if (!ss.empty () || (exact_ && !empty_)) + { + // Also, strip the trailing newline character, if present, in the + // no-split no-exact mode. + // + if (!ss.empty () && ss.back () == '\n' && // Trailing newline. + !newline_ && !whitespace_ && !exact_) // No-split no-exact mode. + { + ss.pop_back (); + } + + exact_ = false; // Make sure we will set eofbit on the next call. + } + else + is_.setstate (ifdstream::eofbit); + } + + return s == -1; + } + + // Stack-allocated linked list of information about the running pipeline + // processes and builtins. + // + // Note: constructed incrementally. + // + struct pipe_command + { + // Initially NULL. Set to the address of the process or builtin object + // when it is created. Reset back to NULL when the respective + // process/builtin is executed and its exit status is collected (see + // complete_pipe() for details). + // + // We could probably use a union here, but let's keep it simple for now + // (at least one is NULL). + // + process* proc = nullptr; + builtin* bltn = nullptr; + + const command& cmd; + const cstrings* args = nullptr; + const optional<deadline>& dl; + + diag_buffer dbuf; + + bool terminated = false; // True if this command has been terminated. + + // True if this command has been terminated but we failed to read out + // its stdout and/or stderr streams in the reasonable timeframe (2 + // seconds) after the termination. + // + // Note that this may happen if there is a still running child process + // of the terminated command which has inherited the parent's stdout and + // stderr file descriptors. + // + bool unread_stdout = false; + bool unread_stderr = false; + + // Only for diagnostics. + // + const location& loc; + const path* isp = nullptr; // stdin cache. + const path* osp = nullptr; // stdout cache. + const path* esp = nullptr; // stderr cache. + + pipe_command* prev; // NULL for the left-most command. + pipe_command* next; // Left-most command for the right-most command. + + pipe_command (context& x, + const command& c, + const optional<deadline>& d, + const location& l, + pipe_command* p, + pipe_command* f) + : cmd (c), dl (d), dbuf (x), loc (l), prev (p), next (f) {} + }; + + // Wait for a process/builtin to complete until the deadline is reached + // and return the underlying wait function result (optional<something>). + // + template<typename P> + static auto + timed_wait (P& p, const timestamp& deadline) -> decltype(p.try_wait ()) + { + timestamp now (system_clock::now ()); + return deadline > now ? p.timed_wait (deadline - now) : p.try_wait (); + } + + // Terminate the pipeline processes starting from the specified one and up + // to the leftmost one and then kill those which didn't terminate after 2 + // seconds. + // + // After that wait for the pipeline builtins completion. Since their + // standard streams should no longer be written to or read from by any + // process, that shouldn't take long. If, however, they won't be able to + // complete in 2 seconds, then some of them have probably stuck while + // communicating with a slow filesystem device or similar, and since we + // currently have no way to terminate asynchronous builtins, we have no + // choice but to abort. + // + // Issue diagnostics and fail if something goes wrong, but still try to + // terminate/kill all the pipe processes. + // + static void + term_pipe (pipe_command* pc, tracer& trace) + { + auto prog = [] (pipe_command* c) {return cmd_path (c->cmd);}; + + // Terminate processes gracefully and set the terminate flag for the + // pipe commands. + // + diag_record dr; + for (pipe_command* c (pc); c != nullptr; c = c->prev) + { + if (process* p = c->proc) + try + { + l5 ([&]{trace (c->loc) << "terminating: " << c->cmd;}); + + p->term (); + } + catch (const process_error& e) + { + // If unable to terminate the process for any reason (the process is + // exiting on Windows, etc) then just ignore this, postponing the + // potential failure till the kill() call. + // + l5 ([&]{trace (c->loc) << "unable to terminate " << prog (c) + << ": " << e;}); + } + + c->terminated = true; + } + + // Wait a bit for the processes to terminate and kill the remaining + // ones. + // + timestamp dl (system_clock::now () + chrono::seconds (2)); + + for (pipe_command* c (pc); c != nullptr; c = c->prev) + { + if (process* p = c->proc) + try + { + l5 ([&]{trace (c->loc) << "waiting: " << c->cmd;}); + + if (!timed_wait (*p, dl)) + { + l5 ([&]{trace (c->loc) << "killing: " << c->cmd;}); + + p->kill (); + p->wait (); + } + } + catch (const process_error& e) + { + dr << fail (c->loc) << "unable to wait/kill " << prog (c) << ": " + << e; + } + } + + // Wait a bit for the builtins to complete and abort if any remain + // running. + // + dl = system_clock::now () + chrono::seconds (2); + + for (pipe_command* c (pc); c != nullptr; c = c->prev) + { + if (builtin* b = c->bltn) + try + { + l5 ([&]{trace (c->loc) << "waiting: " << c->cmd;}); + + if (!timed_wait (*b, dl)) + { + error (c->loc) << prog (c) << " builtin hanged, aborting"; + terminate (false /* trace */); + } + } + catch (const system_error& e) + { + dr << fail (c->loc) << "unable to wait for " << prog (c) << ": " + << e; + } + } + } + + void + read (auto_fd&& in, + bool whitespace, bool newline, bool exact, + const function<void (string&&)>& cf, + pipe_command* pipeline, const optional<deadline>& dl, - const command& deadline_cmd, - const location& ll) + const location& ll, + const char* what) { - string r; - ifdstream cin; + tracer trace ("script::stream_read"); + // Note: stays blocking on Windows if the descriptor is not of the pipe + // type. + // #ifndef _WIN32 - if (dl) + fdstream_mode m (fdstream_mode::non_blocking); #else - if (dl && pipe) + fdstream_mode m (pipeline != nullptr + ? fdstream_mode::non_blocking + : fdstream_mode::blocking); #endif + + ifdstream is (move (in), m, ifdstream::badbit); + stream_reader sr (is, whitespace, newline, exact); + + fdselect_set fds; + for (pipe_command* c (pipeline); c != nullptr; c = c->prev) + { + diag_buffer& b (c->dbuf); + + if (b.is.is_open ()) + fds.emplace_back (b.is.fd (), c); + } + + fds.emplace_back (is.fd ()); + fdselect_state& ist (fds.back ()); + size_t unread (fds.size ()); + + optional<timestamp> dlt (dl ? dl->value : optional<timestamp> ()); + + // If there are some left-hand side processes/builtins running, then + // terminate them and, if there are unread stdout/stderr file + // descriptors, then increase the deadline by another 2 seconds and + // return true. In this case the term() should be called again upon + // reaching the timeout. Otherwise return false. If there are no + // left-hand side processes/builtins running, then fail straight away. + // + // Note that in the former case the further reading will be performed + // with the adjusted timeout. We assume that this timeout is normally + // sufficient to read out the buffered data written by the already + // terminated processes. If, however, that's not the case (see + // pipe_command for the possible reasons), then term() needs to be + // called for the second time and the reading should be interrupted + // afterwards. + // + auto term = [&dlt, pipeline, &fds, &ist, &is, &unread, + &trace, &ll, what, terminated = false] () mutable -> bool { - fdselect_set fds {in.get ()}; - cin.open (move (in), fdstream_mode::non_blocking); + // Can only be called if the deadline is specified. + // + assert (dlt); - const timestamp& dlt (dl->value); + if (pipeline == nullptr) + fail (ll) << what << " terminated: execution timeout expired"; - for (char buf[4096];; ) + if (!terminated) { - timestamp now (system_clock::now ()); + // Terminate the pipeline and adjust the deadline. + // - if (dlt <= now || ifdselect (fds, dlt - now) == 0) + // Note that if we are still reading the stream and it's a builtin + // stdout, then we need to close it before terminating the pipeline. + // Not doing so can result in blocking this builtin on the write + // operation and thus aborting the build2 process (see term_pipe() + // for details). + // + // Should we do the same for all the pipeline builtins' stderr + // streams? No we don't, since the builtin diagnostics is assumed to + // always fit the pipe buffer (see libbutl/builtin.cxx for details). + // Thus, we will leave them open to fully read out the diagnostics. + // + if (ist.fd != nullfd && pipeline->bltn != nullptr) { - if (!dl->success) - fail (ll) << cmd_path (deadline_cmd) - << " terminated: execution timeout expired"; - else - break; + try + { + is.close (); + } + catch (const io_error&) + { + // Not much we can do here. + } + + ist.fd = nullfd; + --unread; } - streamsize n (cin.readsome (buf, sizeof (buf))); + term_pipe (pipeline, trace); + terminated = true; - // Bail out if eos is reached. + if (unread != 0) + dlt = system_clock::now () + chrono::seconds (2); + + return unread != 0; + } + else + { + // Set the unread_{stderr,stdout} flags to true for the commands + // whose streams are not fully read yet. // - if (n == 0) - break; - r.append (buf, n); + // Can only be called after the first call of term() which would + // throw failed if pipeline is NULL. + // + assert (pipeline != nullptr); + + for (fdselect_state& s: fds) + { + if (s.fd != nullfd) + { + if (s.data != nullptr) // stderr. + { + pipe_command* c (static_cast<pipe_command*> (s.data)); + + c->unread_stderr = true; + + // Let's also close the stderr stream not to confuse + // diag_buffer::close() with a not fully read stream (eof is + // not reached, etc). + // + try + { + c->dbuf.is.close (); + } + catch (const io_error&) + { + // Not much we can do here. Anyway the diagnostics will be + // issued by complete_pipe(). + } + } + else // stdout. + pipeline->unread_stdout = true; + } + } + + return false; } - } - else + }; + + // Note that on Windows if the file descriptor is not a pipe, then + // ifdstream assumes the blocking mode for which ifdselect() would throw + // invalid_argument. Such a descriptor can, however, only appear for the + // first command in the pipeline and so fds will only contain the input + // stream's descriptor. That all means that this descriptor will be read + // out by a series of the stream_reader::next() calls which can only + // return true and thus no ifdselect() calls will ever be made. + // + string s; + while (unread != 0) { - cin.open (move (in)); - r = cin.read_text (); - } + // Read any pending data from the input stream. + // + if (ist.fd != nullfd) + { + // Prior to reading let's check that the deadline, if specified, is + // not reached. This way we handle the (hypothetical) case when we + // are continuously fed with the data without delays and thus can + // never get to ifdselect() which watches for the deadline. Also + // this check is the only way to bail out early on Windows for a + // blocking file descriptor. + // + if (dlt && *dlt <= system_clock::now ()) + { + if (!term ()) + break; + } - cin.close (); + if (sr.next (s)) + { + if (!is.eof ()) + { + // Consume the substring. + // + cf (move (s)); + s.clear (); + } + else + { + ist.fd = nullfd; + --unread; + } - return r; + continue; + } + } + + try + { + // Wait until the data appear in any of the streams. If a deadline + // is specified, then pass the timeout to fdselect(). + // + if (dlt) + { + timestamp now (system_clock::now ()); + + if (*dlt <= now || ifdselect (fds, *dlt - now) == 0) + { + if (term ()) + continue; + else + break; + } + } + else + ifdselect (fds); + + // Read out the pending data from the stderr streams. + // + for (fdselect_state& s: fds) + { + if (s.ready && + s.data != nullptr && + !static_cast<pipe_command*> (s.data)->dbuf.read ()) + { + s.fd = nullfd; + --unread; + } + } + } + catch (const io_error& e) + { + fail (ll) << "io error reading pipeline streams: " << e; + } + } } // The set pseudo-builtin: set variable from the stdin input. // - // set [-e|--exact] [(-n|--newline)|(-w|--whitespace)] [<attr>] <var> + // set [-e|--exact] [(-n|--newline)|(-w|--whitespace)] <var> [<attr>] // static void set_builtin (environment& env, const strings& args, auto_fd in, - bool pipe, + pipe_command* pipeline, const optional<deadline>& dl, - const command& deadline_cmd, const location& ll) { + tracer trace ("script::set_builtin"); + try { // Parse arguments. @@ -1049,105 +1639,41 @@ namespace build2 if (!scan.more ()) fail (ll) << "set: missing variable name"; - string a (scan.next ()); // Either attributes or variable name. - const string* ats (!scan.more () ? nullptr : &a); - string vname (!scan.more () ? move (a) : scan.next ()); - - if (scan.more ()) - fail (ll) << "set: unexpected argument '" << scan.next () << "'"; - - if (ats != nullptr && ats->empty ()) - fail (ll) << "set: empty variable attributes"; - + string vname (scan.next ()); if (vname.empty ()) fail (ll) << "set: empty variable name"; - // Read out the stream content into a string while keeping an eye on - // the deadline. + // Detect patterns analogous to parser::parse_variable_name() (so we + // diagnose `set x[string]`). // - string s (read (move (in), pipe, dl, deadline_cmd, ll)); + if (vname.find_first_of ("[*?") != string::npos) + fail (ll) << "set: expected variable name instead of " << vname; - // Parse the stream content into the variable value. - // - names ns; - - if (!s.empty ()) + string attrs; + if (scan.more ()) { - if (ops.whitespace ()) // The whitespace mode. - { - // Note that we collapse multiple consecutive whitespaces. - // - for (size_t p (0); p != string::npos; ) - { - // Skip the whitespaces. - // - const char* sep (" \n\r\t"); - size_t b (s.find_first_not_of (sep, p)); + attrs = scan.next (); - if (b != string::npos) // Word beginning. - { - size_t e (s.find_first_of (sep, b)); // Find the word end. - ns.emplace_back (string (s, b, e != string::npos ? e - b : e)); - - p = e; - } - else // Trailings whitespaces. - { - // Append the trailing "blank" after the trailing whitespaces - // in the exact mode. - // - if (ops.exact ()) - ns.emplace_back (empty_string); - - // Bail out since the end of the string is reached. - // - break; - } - } - } - else // The newline or no-split mode. - { - // Note that we don't collapse multiple consecutive newlines. - // - // Note also that we always sanitize CRs so this loop is always - // needed. - // - for (size_t p (0); p != string::npos; ) - { - size_t e (s.find ('\n', p)); - string l (s, p, e != string::npos ? e - p : e); + if (attrs.empty ()) + fail (ll) << "set: empty variable attributes"; - // Strip the trailing CRs that can appear while, for example, - // cross-testing Windows target or as a part of msvcrt junk - // production (see above). - // - while (!l.empty () && l.back () == '\r') - l.pop_back (); + if (scan.more ()) + fail (ll) << "set: unexpected argument '" << scan.next () << "'"; + } - // Append the line. - // - if (!l.empty () || // Non-empty. - e != string::npos || // Empty, non-trailing. - ops.exact ()) // Empty, trailing, in the exact mode. - { - if (ops.newline () || ns.empty ()) - ns.emplace_back (move (l)); - else - { - ns[0].value += '\n'; - ns[0].value += l; - } - } + // Parse the stream content into the variable value. + // + names ns; - p = e != string::npos ? e + 1 : e; - } - } - } + read (move (in), + ops.whitespace (), ops.newline (), ops.exact (), + [&ns] (string&& s) {ns.emplace_back (move (s));}, + pipeline, + dl, + ll, + "set"); - env.set_variable (move (vname), - move (ns), - ats != nullptr ? *ats : empty_string, - ll); + env.set_variable (move (vname), move (ns), attrs, ll); } catch (const io_error& e) { @@ -1174,51 +1700,16 @@ namespace build2 name); } - // Stack-allocated linked list of information about the running pipeline - // processes and builtins. - // - struct pipe_command - { - // We could probably use a union here, but let's keep it simple for now - // (one is NULL). - // - process* proc; - builtin* bltn; - - // True if this command has been terminated. - // - bool terminated = false; - - // Only for diagnostics. - // - const command& cmd; - const location& loc; - - pipe_command* prev; // NULL for the left-most command. - - pipe_command (process& p, - const command& c, - const location& l, - pipe_command* v) - : proc (&p), bltn (nullptr), cmd (c), loc (l), prev (v) {} - - pipe_command (builtin& b, - const command& c, - const location& l, - pipe_command* v) - : proc (nullptr), bltn (&b), cmd (c), loc (l), prev (v) {} - }; - static bool run_pipe (environment& env, command_pipe::const_iterator bc, command_pipe::const_iterator ec, auto_fd ifd, - size_t ci, size_t li, const location& ll, + const iteration_index* ii, size_t li, size_t ci, + const location& ll, bool diag, - string* output, + const function<command_function>& cf, bool last_cmd, optional<deadline> dl = nullopt, - const command* dl_cmd = nullptr, // env -t <cmd> pipe_command* prev_cmd = nullptr) { tracer trace ("script::run_pipe"); @@ -1227,8 +1718,10 @@ namespace build2 // if (bc == ec) { - if (output != nullptr) + if (cf != nullptr) { + assert (!last_cmd); // Otherwise we wouldn't be here. + // The pipeline can't be empty. // assert (ifd != nullfd && prev_cmd != nullptr); @@ -1237,15 +1730,14 @@ namespace build2 try { - *output = read (move (ifd), - true /* pipe */, - dl, - dl_cmd != nullptr ? *dl_cmd : c, - ll); + cf (env, strings () /* arguments */, + move (ifd), prev_cmd, + dl, + ll); } catch (const io_error& e) { - fail (ll) << "io error reading " << cmd_path (c) << " output: " + fail (ll) << "unable to read from " << cmd_path (c) << " stdout: " << e; } } @@ -1303,9 +1795,10 @@ namespace build2 command_pipe::const_iterator nc (bc + 1); bool last (nc == ec); - // Make sure that stdout is not redirected if meant to be read. + // Make sure that stdout is not redirected if meant to be read (last_cmd + // is false) or cannot not be produced (last_cmd is true). // - if (last && output != nullptr && c.out) + if (last && c.out && cf != nullptr) fail (ll) << "stdout cannot be redirected"; // True if the process path is not pre-searched and the program path @@ -1319,7 +1812,7 @@ namespace build2 const redirect& in ((c.in ? *c.in : env.in).effective ()); - const redirect* out (!last || output != nullptr + const redirect* out (!last || (cf != nullptr && !last_cmd) ? nullptr // stdout is piped. : &(c.out ? *c.out : env.out).effective ()); @@ -1327,13 +1820,7 @@ namespace build2 auto process_args = [&c] () -> cstrings { - cstrings args {c.program.recall_string ()}; - - for (const auto& a: c.arguments) - args.push_back (a.c_str ()); - - args.push_back (nullptr); - return args; + return build2::process_args (c.program.recall_string (), c.arguments); }; // Prior to opening file descriptors for command input/output redirects @@ -1356,14 +1843,29 @@ namespace build2 // content), to make sure that the command doesn't print any unwanted // diagnostics about IO operation failure. // - // Note though, that doing so would be a bad idea if the deadline is - // specified, since we can block on read and miss the deadline. - // - if (!dl) + if (ifd != nullfd) { - // Note that dtor will ignore any errors (which is what we want). + // Note that we can't use ifdstream dtor in the skip mode here since + // it turns the stream into the blocking mode and we won't be able + // to read out the potentially buffered stderr for the + // pipeline. Using read() is also not ideal since it performs + // parsing and allocations needlessly. This, however, is probably ok + // for such an uncommon case. + // + //ifdstream (move (ifd), fdstream_mode::skip); + + // Let's try to minimize the allocation size splitting the input + // data at whitespaces. // - ifdstream (move (ifd), fdstream_mode::skip); + read (move (ifd), + true /* whitespace */, + false /* newline */, + false /* exact */, + [] (string&&) {}, // Just drop the string. + prev_cmd, + dl, + ll, + program.c_str ()); } if (!first || !last) @@ -1387,7 +1889,7 @@ namespace build2 if (c.out) fail (ll) << program << " builtin stdout cannot be redirected"; - if (output != nullptr) + if (cf != nullptr && !last_cmd) fail (ll) << program << " builtin stdout cannot be read"; if (c.err) @@ -1419,19 +1921,28 @@ namespace build2 // Create a unique path for a command standard stream cache file. // - auto std_path = [&env, &ci, &li, &ll] (const char* n) -> path + auto std_path = [&env, ii, &li, &ci, &ll] (const char* nm) -> path { using std::to_string; - path p (n); + string s (nm); + size_t n (s.size ()); + + if (ii != nullptr) + { + // Note: reverse order (outermost to innermost). + // + for (const iteration_index* i (ii); i != nullptr; i = i->prev) + s.insert (n, "-i" + to_string (i->index)); + } // 0 if belongs to a single-line script, otherwise is the command line // number (start from one) in the script. // - if (li > 0) + if (li != 0) { - p += '-'; - p += to_string (li); + s += "-n"; + s += to_string (li); } // 0 if belongs to a single-command expression, otherwise is the @@ -1441,13 +1952,13 @@ namespace build2 // single-line script or to N-th single-command line of multi-line // script. These cases are mutually exclusive and so are unambiguous. // - if (ci > 0) + if (ci != 0) { - p += '-'; - p += to_string (ci); + s += "-c"; + s += to_string (ci); } - return normalize (move (p), temp_dir (env), ll); + return normalize (path (move (s)), temp_dir (env), ll); }; // If this is the first pipeline command, then open stdin descriptor @@ -1552,19 +2063,15 @@ namespace build2 // Calculate the process/builtin execution deadline. Note that we should // also consider the left-hand side processes deadlines, not to keep // them waiting for us and allow them to terminate not later than their - // deadlines. Thus, let's also track which command has introduced the - // deadline, so we can report it if the deadline is missed. + // deadlines. // dl = earlier (dl, env.effective_deadline ()); if (c.timeout) { - deadline d (system_clock::now () + *c.timeout, false /* success */); + deadline d (system_clock::now () + *c.timeout, c.timeout_success); if (!dl || d < *dl) - { dl = d; - dl_cmd = &c; - } } // Prior to opening file descriptors for command outputs redirects @@ -1585,7 +2092,7 @@ namespace build2 if (c.out) fail (ll) << "set builtin stdout cannot be redirected"; - if (output != nullptr) + if (cf != nullptr && !last_cmd) fail (ll) << "set builtin stdout cannot be read"; if (c.err) @@ -1597,14 +2104,54 @@ namespace build2 if (verb >= 2) print_process (process_args ()); - set_builtin (env, c.arguments, - move (ifd), !first, - dl, dl_cmd != nullptr ? *dl_cmd : c, - ll); + set_builtin (env, c.arguments, move (ifd), prev_cmd, dl, ll); + return true; + } + + // If this is the last command in the pipe and the command function is + // specified for it, then call it. + // + if (last && cf != nullptr && last_cmd) + { + // Must be enforced by the caller. + // + assert (!c.out && !c.err && !c.exit); + + try + { + cf (env, c.arguments, move (ifd), prev_cmd, dl, ll); + } + catch (const io_error& e) + { + diag_record dr (fail (ll)); + + dr << cmd_path (c) << ": unable to read from "; + + if (prev_cmd != nullptr) + dr << cmd_path (prev_cmd->cmd) << " output"; + else + dr << "stdin"; + + dr << ": " << e; + } return true; } + // Propagate the pointer to the left-most command. + // + pipe_command pc (env.context, + c, + dl, + ll, + prev_cmd, + prev_cmd != nullptr ? prev_cmd->next : nullptr); + + if (prev_cmd != nullptr) + prev_cmd->next = &pc; + else + pc.next = &pc; // Points to itself. + // Open a file for command output redirect if requested explicitly // (file overwrite/append redirects) or for the purpose of the output // validation (none, here_*, file comparison redirects), register the @@ -1614,9 +2161,9 @@ namespace build2 // or null-device descriptor for merge, pass or null redirects // respectively (not opening any file). // - auto open = [&env, &wdir, &ll, &std_path] (const redirect& r, - int dfd, - path& p) -> auto_fd + auto open = [&env, &wdir, &ll, &std_path, &c, &pc] (const redirect& r, + int dfd, + path& p) -> auto_fd { assert (dfd == 1 || dfd == 2); const char* what (dfd == 1 ? "stdout" : "stderr"); @@ -1634,11 +2181,34 @@ namespace build2 { try { + if (dfd == 2) // stderr? + { + fdpipe p; + if (diag_buffer::pipe (env.context) == -1) // Are we buffering? + p = fdopen_pipe (); + + // Deduce the args0 argument similar to cmd_path(). + // + // Note that we must open the diag buffer regardless of the + // diag_buffer::pipe() result. + // + pc.dbuf.open ((c.program.initial == nullptr + ? c.program.recall.string ().c_str () + : c.program.recall_string ()), + move (p.in), + fdstream_mode::non_blocking); + + if (p.out != nullfd) + return move (p.out); + + // Fall through. + } + return fddup (dfd); } catch (const io_error& e) { - fail (ll) << "unable to duplicate " << what << ": " << e; + fail (ll) << "unable to redirect " << what << ": " << e; } } @@ -1767,111 +2337,386 @@ namespace build2 // assert (ofd.out != nullfd && efd != nullfd); - // Wait for a process/builtin to complete until the deadline is reached - // and return the underlying wait function result (optional<something>). - // - auto timed_wait = [] (auto& p, const timestamp& deadline) - { - timestamp now (system_clock::now ()); - return deadline > now ? p.timed_wait (deadline - now) : p.try_wait (); - }; + pc.isp = &isp; + pc.osp = &osp; + pc.esp = &esp; - // Terminate the pipeline processes starting from the specified one and - // up to the leftmost one and then kill those which didn't terminate - // after 2 seconds. + // Read out all the pipeline's buffered strerr streams watching for the + // deadline, if specified. If the deadline is reached, then terminate + // the whole pipeline, move the deadline by another 2 seconds, and + // continue reading. // - // After that wait for the pipeline builtins completion. Since their - // standard streams should no longer be written to or read from by any - // process, that shouldn't take long. If, however, they won't be able to - // complete in 2 seconds, then some of them have probably stuck while - // communicating with a slow filesystem device or similar, and since we - // currently have no way to terminate asynchronous builtins, we have no - // choice but to abort. + // Note that we assume that this timeout increment is normally + // sufficient to read out the buffered data written by the already + // terminated processes. If, however, that's not the case (see + // pipe_command for the possible reasons), then we just set + // unread_stderr flag to true for such commands and bail out. // - // Issue diagnostics and fail if something goes wrong, but still try to - // terminate/kill all the pipe processes. + // Also note that this is a reduced version of the above read() function. // - auto term_pipe = [&timed_wait, &trace] (pipe_command* pc) + auto read_pipe = [&pc, &ll, &trace] () { - diag_record dr; + fdselect_set fds; + for (pipe_command* c (&pc); c != nullptr; c = c->prev) + { + diag_buffer& b (c->dbuf); - auto prog = [] (pipe_command* c) {return cmd_path (c->cmd);}; + if (b.is.is_open ()) + fds.emplace_back (b.is.fd (), c); + } - // Terminate processes gracefully and set the terminate flag for the - // pipe commands. + // Note that the current command deadline is the earliest (see above). // - for (pipe_command* c (pc); c != nullptr; c = c->prev) + optional<timestamp> dlt (pc.dl ? pc.dl->value : optional<timestamp> ()); + + bool terminated (false); + + for (size_t unread (fds.size ()); unread != 0;) { - if (process* p = c->proc) try { - l5 ([&]{trace (c->loc) << "terminating: " << c->cmd;}); + // If a deadline is specified, then pass the timeout to fdselect(). + // + if (dlt) + { + timestamp now (system_clock::now ()); + + if (*dlt <= now || ifdselect (fds, *dlt - now) == 0) + { + if (!terminated) + { + term_pipe (&pc, trace); + terminated = true; - p->term (); + dlt = system_clock::now () + chrono::seconds (2); + continue; + } + else + { + for (fdselect_state& s: fds) + { + if (s.fd != nullfd) + { + pipe_command* c (static_cast<pipe_command*> (s.data)); + + c->unread_stderr = true; + + // Let's also close the stderr stream not to confuse + // diag_buffer::close() (see read() for details). + // + try + { + c->dbuf.is.close (); + } + catch (const io_error&) {} + } + } + + break; + } + } + } + else + ifdselect (fds); + + for (fdselect_state& s: fds) + { + if (s.ready && + !static_cast<pipe_command*> (s.data)->dbuf.read ()) + { + s.fd = nullfd; + --unread; + } + } } - catch (const process_error& e) + catch (const io_error& e) { - // If unable to terminate the process for any reason (the process - // is exiting on Windows, etc) then just ignore this, postponing - // the potential failure till the kill() call. - // - l5 ([&]{trace (c->loc) << "unable to terminate " << prog (c) - << ": " << e;}); + fail (ll) << "io error reading pipeline streams: " << e; } - - c->terminated = true; } + }; - // Wait a bit for the processes to terminate and kill the remaining - // ones. - // - timestamp dl (system_clock::now () + chrono::seconds (2)); - - for (pipe_command* c (pc); c != nullptr; c = c->prev) + // Wait for the pipeline processes and builtins to complete, watching + // for their deadlines if present. If a deadline is reached for any of + // them, then terminate the whole pipeline. + // + // Note: must be called after read_pipe(). + // + auto wait_pipe = [&pc, &dl, &trace] () + { + for (pipe_command* c (&pc); c != nullptr; c = c->prev) { - if (process* p = c->proc) try { - l5 ([&]{trace (c->loc) << "waiting: " << c->cmd;}); - - if (!timed_wait (*p, dl)) + if (process* p = c->proc) { - l5 ([&]{trace (c->loc) << "killing: " << c->cmd;}); + if (!dl) + p->wait (); + else if (!timed_wait (*p, dl->value)) + term_pipe (c, trace); + } + else + { + builtin* b (c->bltn); - p->kill (); - p->wait (); + if (!dl) + b->wait (); + else if (!timed_wait (*b, dl->value)) + term_pipe (c, trace); } } catch (const process_error& e) { - dr << fail (c->loc) << "unable to wait/kill " << prog (c) << ": " - << e; + fail (c->loc) << "unable to wait " << cmd_path (c->cmd) << ": " + << e; } } + }; - // Wait a bit for the builtins to complete and abort if any remain - // running. - // - dl = system_clock::now () + chrono::seconds (2); + // Iterate over the pipeline processes and builtins left to right, + // printing their stderr if buffered and issuing the diagnostics if the + // exit code is not available (terminated abnormally or due to a + // deadline), is unexpected, or stdout and/or stderr was not fully + // read. Throw failed at the end if the exit code for any of them is not + // available or stdout and/or stderr was not fully read. Return false if + // exit code for any of them is unexpected (the return is used, for + // example, in the if-conditions). + // + // Note: must be called after wait_pipe() and only once. + // + auto complete_pipe = [&pc, &env, diag] () + { + bool r (true); + bool fail (false); - for (pipe_command* c (pc); c != nullptr; c = c->prev) + pipe_command* c (pc.next); // Left-most command. + assert (c != nullptr); // Since the lambda must be called once. + + for (pc.next = nullptr; c != nullptr; c = c->next) { - if (builtin* b = c->bltn) - try + // Collect the exit status, if present. + // + // Absent if the process/builtin misses the "unsuccessful" deadline. + // + optional<process_exit> exit; + + const char* w (c->bltn != nullptr ? "builtin" : "process"); + + if (c->bltn != nullptr) { - l5 ([&]{trace (c->loc) << "waiting: " << c->cmd;}); + // Note that this also handles ad hoc termination (without the + // call to term_pipe()) by the sleep builtin. + // + if (c->terminated) + { + if (c->dl && c->dl->success) + exit = process_exit (0); + } + else + exit = process_exit (c->bltn->wait ()); - if (!timed_wait (*b, dl)) + c->bltn = nullptr; + } + else if (c->proc != nullptr) + { + const process& pr (*c->proc); + +#ifndef _WIN32 + if (c->terminated && + !pr.exit->normal () && + pr.exit->signal () == SIGTERM) +#else + if (c->terminated && + !pr.exit->normal () && + pr.exit->status == DBG_TERMINATE_PROCESS) +#endif { - error (c->loc) << prog (c) << " builtin hanged, aborting"; - terminate (false /* trace */); + if (c->dl && c->dl->success) + exit = process_exit (0); } + else + exit = pr.exit; + + c->proc = nullptr; } - catch (const system_error& e) + else + assert (false); // The lambda can only be called once. + + const command& cmd (c->cmd); + const location& ll (c->loc); + + // Verify the exit status and issue the diagnostics on failure. + // + diag_record dr; + + path pr (cmd_path (cmd)); + + // Print the diagnostics if the command stdout and/or stderr are not + // fully read. + // + auto unread_output_diag = [&dr, c, w, &pr] (bool main_error) + { + if (main_error) + dr << error (c->loc) << w << ' ' << pr << ' '; + else + dr << error; + + if (c->unread_stdout) + { + dr << "stdout "; + + if (c->unread_stderr) + dr << "and "; + } + + if (c->unread_stderr) + dr << "stderr "; + + dr << "not closed after exit"; + }; + + // Fail if the process is terminated due to reaching the deadline. + // + if (!exit) + { + dr << error (ll) << w << ' ' << pr + << " terminated: execution timeout expired"; + + if (c->unread_stdout || c->unread_stderr) + unread_output_diag (false /* main_error */); + + if (verb == 1) + { + dr << info << "command line: "; + print_process (dr, *c->args); + } + + fail = true; + } + else + { + // If there is no valid exit code available by whatever reason + // then we print the proper diagnostics, dump stderr (if cached + // and not too large) and fail the whole script. Otherwise if the + // exit code is not correct then we print diagnostics if requested + // and fail the pipeline. + // + bool valid (exit->normal ()); + + // On Windows the exit code can be out of the valid codes range + // being defined as uint16_t. + // +#ifdef _WIN32 + if (valid) + valid = exit->code () < 256; +#endif + + // In the presense of a valid exit code and given stdout and + // stderr are fully read out we print the diagnostics and return + // false rather than throw. + // + // Note that there can be a race, so that the process we have + // terminated due to reaching the deadline has in fact exited + // normally. Thus, the 'unread stderr' situation can also happen + // to a successfully terminated process. If that's the case, we + // report this problem as the main error and the secondary error + // otherwise. + // + if (!valid || c->unread_stdout || c->unread_stderr) + fail = true; + + exit_comparison cmp (cmd.exit + ? cmd.exit->comparison + : exit_comparison::eq); + + uint16_t exc (cmd.exit ? cmd.exit->code : 0); + + bool success (valid && + (cmp == exit_comparison::eq) == + (exc == exit->code ())); + + if (!success) + r = false; + + if (!valid || (!success && diag)) + { + dr << error (ll) << w << ' ' << pr << ' '; + + if (!exit->normal ()) + dr << *exit; + else + { + uint16_t ec (exit->code ()); // Make sure printed as integer. + + if (!valid) + { + dr << "exit code " << ec << " out of 0-255 range"; + } + else + { + if (cmd.exit) + dr << "exit code " << ec + << (cmp == exit_comparison::eq ? " != " : " == ") + << exc; + else + dr << "exited with code " << ec; + } + } + + if (c->unread_stdout || c->unread_stderr) + unread_output_diag (false /* main_error */); + + if (verb == 1) + { + dr << info << "command line: "; + print_process (dr, *c->args); + } + + if (non_empty (*c->esp, ll) && avail_on_failure (*c->esp, env)) + dr << info << "stderr: " << *c->esp; + + if (non_empty (*c->osp, ll) && avail_on_failure (*c->osp, env)) + dr << info << "stdout: " << *c->osp; + + if (non_empty (*c->isp, ll) && avail_on_failure (*c->isp, env)) + dr << info << "stdin: " << *c->isp; + + // Print cached stderr. + // + print_file (dr, *c->esp, ll); + } + else if (c->unread_stdout || c->unread_stderr) + unread_output_diag (true /* main_error */); + } + + // Now print the buffered stderr, if present, and/or flush the + // diagnostics, if issued. + // + if (c->dbuf.is_open ()) + c->dbuf.close (move (dr)); + } + + // Fail if required. + // + if (fail) + throw failed (); + + return r; + }; + + // Close all buffered pipeline stderr streams ignoring io_error + // exceptions. + // + auto close_pipe = [&pc] () + { + for (pipe_command* c (&pc); c != nullptr; c = c->prev) + { + if (c->dbuf.is.is_open ()) + try { - dr << fail (c->loc) << "unable to wait for " << prog (c) << ": " - << e; + c->dbuf.is.close(); } + catch (const io_error&) {} } }; @@ -1897,9 +2742,8 @@ namespace build2 fail (ll) << "specified working directory " << cwd << " does not exist"; - // Absent if the process/builtin misses the "unsuccessful" deadline. - // - optional<process_exit> exit; + cstrings args (process_args ()); + pc.args = &args; const builtin_info* bi (resolve ? builtins.find (program) : nullptr); @@ -1909,8 +2753,11 @@ namespace build2 { // Execute the builtin. // - if (verb >= 2) - print_process (process_args ()); + // Don't print the true and false builtins, since they are normally + // used for the commands execution flow control. + // + if (verb >= 2 && program != "true" && program != "false") + print_process (args); // Some of the script builtins (cp, mkdir, etc) extend libbutl // builtins (via callbacks) registering/moving cleanups for the @@ -1951,18 +2798,6 @@ namespace build2 // We also extend the sleep builtin, deactivating the thread before // going to sleep and waking up before the deadline is reached. // - // Let's "wrap up" the sleep-related values into the single object to - // rely on "small function object" optimization. - // - struct sleep - { - optional<timestamp> deadline; - bool terminated = false; - - sleep (const optional<timestamp>& d): deadline (d) {} - }; - sleep slp (dl ? dl->value : optional<timestamp> ()); - builtin_callbacks bcs { // create @@ -2124,16 +2959,19 @@ namespace build2 // sleep // - [&env, &slp] (const duration& d) + [&env, &pc] (const duration& d) { duration t (d); - const optional<timestamp>& dl (slp.deadline); + const optional<timestamp>& dl (pc.dl + ? pc.dl->value + : optional<timestamp> ()); if (dl) { timestamp now (system_clock::now ()); - slp.terminated = now + t > *dl; + if (now + t > *dl) + pc.terminated = true; if (*dl <= now) return; @@ -2146,7 +2984,7 @@ namespace build2 // If/when required we could probably support the precise sleep // mode (e.g., via an option). // - env.context.sched.sleep (t); + env.context.sched->sleep (t); } }; @@ -2158,19 +2996,19 @@ namespace build2 move (ifd), move (ofd.out), move (efd), cwd, bcs)); + pc.bltn = &b; - pipe_command pc (b, c, ll, prev_cmd); - - // If the deadline is specified, then make sure we don't miss it - // waiting indefinitely in the builtin destructor on the right-hand - // side of the pipe failure. + // If the right-hand part of the pipe fails, then make sure we don't + // wait indefinitely in the process destructor if the deadlines are + // specified or just because a process is blocked on stderr. // - auto g (make_exception_guard ([&dl, &pc, &term_pipe] () + auto g (make_exception_guard ([&pc, &close_pipe, &trace] () { - if (dl) + if (pc.bltn != nullptr) try { - term_pipe (&pc); + close_pipe (); + term_pipe (&pc, trace); } catch (const failed&) { @@ -2181,28 +3019,21 @@ namespace build2 success = run_pipe (env, nc, ec, move (ofd.in), - ci + 1, li, ll, diag, - output, - dl, dl_cmd, + ii, li, ci + 1, ll, diag, + cf, last_cmd, + dl, &pc); - if (!dl) - b.wait (); - else if (!timed_wait (b, dl->value)) - term_pipe (&pc); - - // Note that this also handles ad hoc termination (without the call - // to term_pipe()) by the sleep builtin (see above). + // Complete the pipeline execution, if not done yet. // - if (pc.terminated || slp.terminated) + if (pc.bltn != nullptr) { - assert (dl); + read_pipe (); + wait_pipe (); - if (dl->success) - exit = process_exit (0); + if (!complete_pipe ()) + success = false; } - else - exit = process_exit (r); } catch (const system_error& e) { @@ -2214,8 +3045,6 @@ namespace build2 { // Execute the process. // - cstrings args (process_args ()); - // If the process path is not pre-searched then resolve the relative // non-simple program path against the script's working directory. The // simple one will be left for the process path search machinery. Also @@ -2273,10 +3102,16 @@ namespace build2 if (verb >= 2) print_process (pe, args); + // Note that stderr can only be a pipe if we are buffering the + // diagnostics. In this case also pass the reading end so it can be + // "probed" on Windows (see butl::process::pipe for details). + // process pr ( *pe.path, args.data (), - {ifd.get (), -1}, process::pipe (ofd), {-1, efd.get ()}, + {ifd.get (), -1}, + process::pipe (ofd), + {pc.dbuf.is.fd (), efd.get ()}, cwd.string ().c_str (), pe.vars); @@ -2286,18 +3121,19 @@ namespace build2 ofd.out.reset (); efd.reset (); - pipe_command pc (pr, c, ll, prev_cmd); + pc.proc = ≺ - // If the deadline is specified, then make sure we don't miss it - // waiting indefinitely in the process destructor on the right-hand - // part of the pipe failure. + // If the right-hand part of the pipe fails, then make sure we don't + // wait indefinitely in the process destructor (see above for + // details). // - auto g (make_exception_guard ([&dl, &pc, &term_pipe] () + auto g (make_exception_guard ([&pc, &close_pipe, &trace] () { - if (dl) + if (pc.proc != nullptr) try { - term_pipe (&pc); + close_pipe (); + term_pipe (&pc, trace); } catch (const failed&) { @@ -2308,33 +3144,21 @@ namespace build2 success = run_pipe (env, nc, ec, move (ofd.in), - ci + 1, li, ll, diag, - output, - dl, dl_cmd, + ii, li, ci + 1, ll, diag, + cf, last_cmd, + dl, &pc); - if (!dl) - pr.wait (); - else if (!timed_wait (pr, dl->value)) - term_pipe (&pc); - -#ifndef _WIN32 - if (pc.terminated && - !pr.exit->normal () && - pr.exit->signal () == SIGTERM) -#else - if (pc.terminated && - !pr.exit->normal () && - pr.exit->status == DBG_TERMINATE_PROCESS) -#endif + // Complete the pipeline execution, if not done yet. + // + if (pc.proc != nullptr) { - assert (dl); + read_pipe (); + wait_pipe (); - if (dl->success) - exit = process_exit (0); + if (!complete_pipe ()) + success = false; } - else - exit = pr.exit; } catch (const process_error& e) { @@ -2347,98 +3171,23 @@ namespace build2 } } - // If the righ-hand side pipeline failed than the whole pipeline fails, - // and no further checks are required. - // - if (!success) - return false; - - // Fail if the process is terminated due to reaching the deadline. - // - if (!exit) - fail (ll) << cmd_path (dl_cmd != nullptr ? *dl_cmd : c) - << " terminated: execution timeout expired"; - - path pr (cmd_path (c)); - - // If there is no valid exit code available by whatever reason then we - // print the proper diagnostics, dump stderr (if cached and not too - // large) and fail the whole script. Otherwise if the exit code is not - // correct then we print diagnostics if requested and fail the pipeline. - // - bool valid (exit->normal ()); - - // On Windows the exit code can be out of the valid codes range being - // defined as uint16_t. - // -#ifdef _WIN32 - if (valid) - valid = exit->code () < 256; -#endif - - exit_comparison cmp (c.exit ? c.exit->comparison : exit_comparison::eq); - uint16_t exc (c.exit ? c.exit->code : 0); - - success = valid && - (cmp == exit_comparison::eq) == (exc == exit->code ()); - - if (!valid || (!success && diag)) - { - // In the presense of a valid exit code we print the diagnostics and - // return false rather than throw. - // - diag_record d (valid ? error (ll) : fail (ll)); - - if (!exit->normal ()) - d << pr << " " << *exit; - else - { - uint16_t ec (exit->code ()); // Make sure is printed as integer. - - if (!valid) - d << pr << " exit code " << ec << " out of 0-255 range"; - else if (!success) - { - if (diag) - { - if (c.exit) - d << pr << " exit code " << ec - << (cmp == exit_comparison::eq ? " != " : " == ") << exc; - else - d << pr << " exited with code " << ec; - } - } - else - assert (false); - } - - if (non_empty (esp, ll) && avail_on_failure (esp, env)) - d << info << "stderr: " << esp; - - if (non_empty (osp, ll) && avail_on_failure (osp, env)) - d << info << "stdout: " << osp; - - if (non_empty (isp, ll) && avail_on_failure (isp, env)) - d << info << "stdin: " << isp; - - // Print cached stderr. - // - print_file (d, esp, ll); - } - - // If exit code is correct then check if the standard outputs match the - // expectations. Note that stdout is only redirected to file for the - // last command in the pipeline. + // If the pipeline or the righ-hand side outputs check failed, then no + // further checks are required. Otherwise, check if the standard outputs + // match the expectations. Note that stdout can only be redirected to + // file for the last command in the pipeline. // // The thinking behind matching stderr first is that if it mismatches, // then the program probably misbehaves (executes wrong functionality, // etc) in which case its stdout doesn't really matter. // if (success) - success = - check_output (pr, esp, isp, err, ll, env, diag, "stderr") && - (out == nullptr || - check_output (pr, osp, isp, *out, ll, env, diag, "stdout")); + { + path pr (cmd_path (c)); + + success = check_output (pr, esp, isp, err, ll, env, diag, "stderr") && + (out == nullptr || + check_output (pr, osp, isp, *out, ll, env, diag, "stdout")); + } return success; } @@ -2446,9 +3195,10 @@ namespace build2 static bool run_expr (environment& env, const command_expr& expr, - size_t li, const location& ll, + const iteration_index* ii, size_t li, + const location& ll, bool diag, - string* output) + const function<command_function>& cf, bool last_cmd) { // Commands are numbered sequentially throughout the expression // starting with 1. Number 0 means the command is a single one. @@ -2492,8 +3242,8 @@ namespace build2 r = run_pipe (env, p.begin (), p.end (), auto_fd (), - ci, li, ll, print, - output); + ii, li, ci, ll, print, + cf, last_cmd); } ci += p.size (); @@ -2505,26 +3255,37 @@ namespace build2 void run (environment& env, const command_expr& expr, - size_t li, const location& ll, - string* output) + const iteration_index* ii, size_t li, + const location& ll, + const function<command_function>& cf, + bool last_cmd) { // Note that we don't print the expression at any verbosity level // assuming that the caller does this, potentially providing some // additional information (command type, etc). // - if (!run_expr (env, expr, li, ll, true /* diag */, output)) + if (!run_expr (env, + expr, + ii, li, ll, + true /* diag */, + cf, last_cmd)) throw failed (); // Assume diagnostics is already printed. } bool - run_if (environment& env, - const command_expr& expr, - size_t li, const location& ll, - string* output) + run_cond (environment& env, + const command_expr& expr, + const iteration_index* ii, size_t li, + const location& ll, + const function<command_function>& cf, bool last_cmd) { // Note that we don't print the expression here (see above). // - return run_expr (env, expr, li, ll, false /* diag */, output); + return run_expr (env, + expr, + ii, li, ll, + false /* diag */, + cf, last_cmd); } void @@ -2773,8 +3534,7 @@ namespace build2 try { size_t n (0); - for (const dir_entry& de: dir_iterator (p, - false /* ignore_dangling */)) + for (const dir_entry& de: dir_iterator (p, dir_iterator::no_follow)) { if (n++ < 10) dr << '\n' << (de.ltype () == entry_type::directory diff --git a/libbuild2/script/run.hxx b/libbuild2/script/run.hxx index 8bc246c..c4c2aa2 100644 --- a/libbuild2/script/run.hxx +++ b/libbuild2/script/run.hxx @@ -38,22 +38,24 @@ namespace build2 // Location is the start position of this command line in the script. It // can be used in diagnostics. // - // Optionally, save the command output into the referenced variable. In - // this case assume that the expression contains a single pipline. + // Optionally, execute the specified function at the end of the pipe, + // either after the last command or instead of it. // void run (environment&, const command_expr&, - size_t index, + const iteration_index*, size_t index, const location&, - string* output = nullptr); + const function<command_function>& = nullptr, + bool last_cmd = true); bool - run_if (environment&, - const command_expr&, - size_t index, - const location&, - string* output = nullptr); + run_cond (environment&, + const command_expr&, + const iteration_index*, size_t index, + const location&, + const function<command_function>& = nullptr, + bool last_cmd = true); // Perform the registered special file cleanups in the direct order and // then the regular cleanups in the reverse order. @@ -80,6 +82,40 @@ namespace build2 // string diag_path (const dir_name_view&); + + // Read the stream content, optionally splitting the input data at + // whitespaces or newlines and calling the specified callback function for + // each substring (see the set builtin options for the splitting + // semantics). Throw failed on io_error. + // + // If the stream is a pipeline's output, then the pipeline argument must + // also be specified. Normally called from a custom command function (see + // command_function for details) which is provided with the pipeline + // information. + // + // Turn the stream into the non-blocking mode and, if the pipeline is + // specified, read out its buffered stderr streams while waiting for the + // input stream data. If a deadline is specified and is reached, then + // terminate the whole pipeline, if specified, and bail out. Otherwise + // issue diagnostics and fail. The thinking here is that in the former + // case the caller first needs to dump the buffered stderr streams, issue + // the appropriate diagnostics for the pipeline processes/builtins, and + // only throw failed afterwards. + // + // Note that on Windows we can only turn file descriptors of the pipe type + // into the non-blocking mode. Thus, a non-pipe descriptor is read in the + // blocking manner (and the deadline is checked less accurately). This is + // fine since there are no pipeline stderr streams to read out in this + // case. + // + void + read (auto_fd&&, + bool whitespace, bool newline, bool exact, + const function<void (string&&)>&, + pipe_command* pipeline, + const optional<deadline>&, + const location&, + const char* what); } } diff --git a/libbuild2/script/script.cxx b/libbuild2/script/script.cxx index 9e6eeed..b53fc23 100644 --- a/libbuild2/script/script.cxx +++ b/libbuild2/script/script.cxx @@ -20,14 +20,17 @@ namespace build2 switch (lt) { - case line_type::var: s = "variable"; break; - case line_type::cmd: s = "command"; break; - case line_type::cmd_if: s = "'if'"; break; - case line_type::cmd_ifn: s = "'if!'"; break; - case line_type::cmd_elif: s = "'elif'"; break; - case line_type::cmd_elifn: s = "'elif!'"; break; - case line_type::cmd_else: s = "'else'"; break; - case line_type::cmd_end: s = "'end'"; break; + case line_type::var: s = "variable"; break; + case line_type::cmd: s = "command"; break; + case line_type::cmd_if: s = "'if'"; break; + case line_type::cmd_ifn: s = "'if!'"; break; + case line_type::cmd_elif: s = "'elif'"; break; + case line_type::cmd_elifn: s = "'elif!'"; break; + case line_type::cmd_else: s = "'else'"; break; + case line_type::cmd_while: s = "'while'"; break; + case line_type::cmd_for_args: s = "'for'"; break; + case line_type::cmd_for_stream: s = "'for'"; break; + case line_type::cmd_end: s = "'end'"; break; } return o << s; @@ -186,14 +189,14 @@ namespace build2 void dump (ostream& os, const string& ind, const lines& ls) { - // Additionally indent the if-branch lines. + // Additionally indent the flow control construct block lines. // - string if_ind; + string fc_ind; for (const line& l: ls) { - // Before printing indentation, decrease it if the else or end line is - // reached. + // Before printing indentation, decrease it if the else, end, etc line + // is reached. // switch (l.type) { @@ -202,9 +205,9 @@ namespace build2 case line_type::cmd_else: case line_type::cmd_end: { - size_t n (if_ind.size ()); + size_t n (fc_ind.size ()); assert (n >= 2); - if_ind.resize (n - 2); + fc_ind.resize (n - 2); break; } default: break; @@ -212,9 +215,10 @@ namespace build2 // Print indentations. // - os << ind << if_ind; + os << ind << fc_ind; - // After printing indentation, increase it for if/else branch. + // After printing indentation, increase it for the flow control + // construct block lines. // switch (l.type) { @@ -222,7 +226,10 @@ namespace build2 case line_type::cmd_ifn: case line_type::cmd_elif: case line_type::cmd_elifn: - case line_type::cmd_else: if_ind += " "; break; + case line_type::cmd_else: + case line_type::cmd_while: + case line_type::cmd_for_args: + case line_type::cmd_for_stream: fc_ind += " "; break; default: break; } @@ -418,9 +425,14 @@ namespace build2 // Timeout. // if (c.timeout) + { o << " -t " << chrono::duration_cast<chrono::seconds> (*c.timeout).count (); + if (c.timeout_success) + o << " -s"; + } + // CWD. // if (c.cwd) @@ -761,7 +773,9 @@ namespace build2 { using script::cleanup; - assert (!implicit || c.type == cleanup_type::always); + // Implicit never-cleanup doesn't make sense. + // + assert (!implicit || c.type != cleanup_type::never); const path& p (c.path); diff --git a/libbuild2/script/script.hxx b/libbuild2/script/script.hxx index 5a39659..f5bd69a 100644 --- a/libbuild2/script/script.hxx +++ b/libbuild2/script/script.hxx @@ -27,6 +27,9 @@ namespace build2 cmd_elif, cmd_elifn, cmd_else, + cmd_while, + cmd_for_args, // `for x: ...` + cmd_for_stream, // `... | for x` and `for x <...` cmd_end }; @@ -40,7 +43,7 @@ namespace build2 union { - const variable* var; // Pre-entered for line_type::var. + const variable* var; // Pre-entered for line_type::{var,cmd_for_*}. }; }; @@ -262,7 +265,7 @@ namespace build2 cleanup_type type; build2::path path; }; - using cleanups = vector<cleanup>; + using cleanups = small_vector<cleanup, 1>; // command_exit // @@ -315,6 +318,10 @@ namespace build2 add (string); }; + // @@ For better diagnostics we may want to store an individual location + // of each command in the pipeline (maybe we can share the file part + // somehow since a pipline cannot span multiple files). + // struct command { // We use NULL initial as an indication that the path stored in recall @@ -324,9 +331,13 @@ namespace build2 process_path program; strings arguments; - optional<dir_path> cwd; // From env builtin. - environment_vars variables; // From env builtin. - optional<duration> timeout; // From env builtin. + + // These come from the env builtin. + // + optional<dir_path> cwd; + environment_vars variables; + optional<duration> timeout; + bool timeout_success = false; optional<redirect> in; optional<redirect> out; @@ -354,6 +365,10 @@ namespace build2 // command_pipe // + // Note that we cannot use small_vector here, since moving from objects of + // the command_pipe type would invalidate the command redirects of the + // reference type in this case. + // using command_pipe = vector<command>; void @@ -372,7 +387,7 @@ namespace build2 command_pipe pipe; }; - using command_expr = vector<expr_term>; + using command_expr = small_vector<expr_term, 1>; void to_stream (ostream&, const command_expr&, command_to_stream); @@ -380,6 +395,15 @@ namespace build2 ostream& operator<< (ostream&, const command_expr&); + // Stack-allocated linked list of iteration indexes of the nested loops. + // + struct iteration_index + { + size_t index; // 1-based. + + const iteration_index* prev; // NULL for the top-most loop. + }; + struct timeout { duration value; @@ -536,7 +560,7 @@ namespace build2 // Set variable value with optional (non-empty) attributes. // virtual void - set_variable (string&& name, + set_variable (string name, names&&, const string& attrs, const location&) = 0; @@ -569,6 +593,20 @@ namespace build2 ~environment () = default; }; + // Custom command function that can be executed at the end of the + // pipeline. Should throw io_error on the underlying OS error. + // + // Note: the pipeline can be NULL (think of `for x <<<='foo'`). + // + struct pipe_command; + + using command_function = void (environment&, + const strings& args, + auto_fd in, + pipe_command* pipeline, + const optional<deadline>&, + const location&); + // Helpers. // // Issue diagnostics with the specified prefix and fail if the string |