From d280946474568925016359be742b59fd6c000c52 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Wed, 3 Jun 2020 16:38:23 +0300 Subject: Properly handle diag directive in build script parser --- libbuild2/build/script/parser+diag.test.testscript | 57 ++++++++ libbuild2/build/script/parser.cxx | 156 +++++++++++++++------ libbuild2/build/script/parser.hxx | 61 ++++++-- libbuild2/build/script/parser.test.cxx | 51 ++++++- libbuild2/build/script/script.hxx | 6 +- libbuild2/functions-process.cxx | 2 +- libbuild2/parser.cxx | 3 +- libbuild2/rule.cxx | 91 +++++++----- libbuild2/script/parser.cxx | 53 +++---- libbuild2/script/parser.hxx | 5 +- 10 files changed, 363 insertions(+), 122 deletions(-) create mode 100644 libbuild2/build/script/parser+diag.test.testscript (limited to 'libbuild2') diff --git a/libbuild2/build/script/parser+diag.test.testscript b/libbuild2/build/script/parser+diag.test.testscript new file mode 100644 index 0000000..bb0672e --- /dev/null +++ b/libbuild2/build/script/parser+diag.test.testscript @@ -0,0 +1,57 @@ +# file : libbuild2/build/script/parser+diag.test.testscript +# license : MIT; see accompanying LICENSE file + +test.options += -g + +: name +: +$* test <>EOO + echo abc + EOI + name: test + EOO + +: name-deduce +: +$* <>EOO + echo abc + EOI + name: echo + EOO + +: diag +: +$* <>~%EOO% + echo abc + cat abc + diag abc '==>' $> + cp abc xyz + EOI + %diag: abc ==> .+file\{driver\.\}% + EOO + +: ambiguity +: +{ +: name +: + $* test <>EOE != 0 + echo abc + diag xyz + EOI + buildfile:12:1: error: both low-verbosity script diagnostics name and 'diag' builtin call + buildfile:10: info: script name specified here + EOE + + : diag + : + $* <>EOE != 0 + echo abc + diag abc + cat abc + diag xyz + EOI + buildfile:14:1: error: multiple 'diag' builtin calls + buildfile:12:1: info: previous call is here + EOE +} diff --git a/libbuild2/build/script/parser.cxx b/libbuild2/build/script/parser.cxx index c4c4b03..72c99ad 100644 --- a/libbuild2/build/script/parser.cxx +++ b/libbuild2/build/script/parser.cxx @@ -28,7 +28,7 @@ namespace build2 script parser:: pre_parse (const target& tg, istream& is, const path_name& pn, uint64_t line, - optional diag_name, const location& diag_loc) + optional diag, const location& diag_loc) { path_ = &pn; @@ -50,9 +50,9 @@ namespace build2 runner_ = nullptr; environment_ = nullptr; - if (diag_name) + if (diag) { - diag = make_pair (move (*diag_name), diag_loc); + diag_name = make_pair (move (*diag), diag_loc); diag_weight = 4; } @@ -69,17 +69,21 @@ namespace build2 { diag_record dr; - if (!diag) + if (!diag_name && !diag_line) { dr << fail (s.start_loc) << "unable to deduce low-verbosity script diagnostics name"; } - else if (diag2) + else if (diag_name2) { + assert (diag_name); + dr << fail (s.start_loc) << "low-verbosity script diagnostics name is ambiguous" << - info (diag->second) << "could be '" << diag->first << "'" << - info (diag2->second) << "could be '" << diag2->first << "'"; + info (diag_name->second) << "could be '" << diag_name->first + << "'" << + info (diag_name2->second) << "could be '" << diag_name2->first + << "'"; } if (!dr.empty ()) @@ -92,7 +96,12 @@ namespace build2 } } - s.diag = move (diag->first); + assert (diag_name.has_value () != diag_line.has_value ()); + + if (diag_name) + s.diag_name = move (diag_name->first); + else + s.diag_line = move (diag_line->first); return s; } @@ -141,6 +150,8 @@ namespace build2 line_type lt ( pre_parse_line_start (t, tt, lexer_mode::second_token)); + save_line_ = nullptr; + line ln; switch (lt) { @@ -198,26 +209,20 @@ namespace build2 assert (tt == type::newline); - //@@ TODO: we need to make sure special builtin is the first command. + ln.type = lt; + ln.tokens = replay_data (); - // Save the script line, unless this is a special builtin (indicated - // by the replay::stop mode). - // - if (replay_ == replay::save) - { - ln.type = lt; - ln.tokens = replay_data (); + if (save_line_ != nullptr) + *save_line_ = move (ln); + else script_->lines.push_back (move (ln)); - if (lt == line_type::cmd_if || lt == line_type::cmd_ifn) - { - tt = peek (lexer_mode::first_token); + if (lt == line_type::cmd_if || lt == line_type::cmd_ifn) + { + tt = peek (lexer_mode::first_token); - pre_parse_if_else (t, tt); - } + pre_parse_if_else (t, tt); } - else - assert (replay_ == replay::stop && lt == line_type::cmd); } void parser:: @@ -303,7 +308,9 @@ namespace build2 // optional parser:: - parse_program (token& t, build2::script::token_type& tt, names& ns) + parse_program (token& t, build2::script::token_type& tt, + bool first, + names& ns) { const location l (get_location (t)); @@ -318,37 +325,57 @@ namespace build2 { if (diag_weight < w) { - diag = make_pair (move (d), l); + diag_name = make_pair (move (d), l); diag_weight = w; - diag2 = nullopt; + diag_name2 = nullopt; } - else if (w != 0 && w == diag_weight && d != diag->first && !diag2) - diag2 = make_pair (move (d), l); + else if (w != 0 && + w == diag_weight && + d != diag_name->first && + !diag_name2) + diag_name2 = make_pair (move (d), l); }; // Handle special builtins. // - if (pre_parse_) + if (pre_parse_ && first && tt == type::word) { - if (tt == type::word && t.value == "diag") + if (t.value == "diag") { - // @@ Redo the diag directive handling (save line separately and - // execute later, before script execution). + // Check for ambiguity. // if (diag_weight == 4) { - fail (script_->start_loc) - << "low-verbosity script diagnostics name is ambiguous" << - info (diag->second) << "could be '" << diag->first << "'" << - info (l) << "could be ''"; + if (diag_name) // Script name. + { + fail (l) << "both low-verbosity script diagnostics name " + << "and 'diag' builtin call" << + info (diag_name->second) << "script name specified here"; + } + else // Custom diagnostics. + { + assert (diag_line); + + fail (l) << "multiple 'diag' builtin calls" << + info (diag_line->second) << "previous call is here"; + } } - set_diag ("", 4); + // Instruct the parser to save the diag builtin line separately + // from the script lines, when it is fully parsed. Note that it + // will be executed prior to the script execution to obtain the + // custom diagnostics. + // + diag_line = make_pair (line (), l); + save_line_ = &diag_line->first; + diag_weight = 4; - build2::script::parser::parse_program (t, tt, ns); - replay_stop (); + diag_name = nullopt; + diag_name2 = nullopt; - return nullopt; + // Parse the leading chunk and bail out. + // + return build2::script::parser::parse_program (t, tt, first, ns); } } @@ -719,6 +746,55 @@ namespace build2 runner_->leave (*environment_, s.end_loc); } + names parser:: + execute_special (const scope& rs, const scope& bs, + environment& e, + const line& ln, + bool omit_builtin) + { + path_ = nullptr; // Set by replays. + + pre_parse_ = false; + + set_lexer (nullptr); + + // The script shouldn't be able to modify the scopes. + // + // Note that for now we don't set target_ since it's not clear what + // it could be used for (we need scope_ for calling functions such as + // $target.path()). + // + root_ = const_cast (&rs); + scope_ = const_cast (&bs); + pbase_ = scope_->src_path_; + + script_ = nullptr; + runner_ = nullptr; + environment_ = &e; + + // Copy the tokens and start playing. + // + replay_data (replay_tokens (ln.tokens)); + + token t; + build2::script::token_type tt; + next (t, tt); + + if (omit_builtin) + { + assert (tt != type::newline && tt != type::eos); + + next (t, tt); + } + + names r (tt != type::newline && tt != type::eos + ? parse_names (t, tt, pattern_mode::expand) + : names ()); + + replay_stop (); + return r; + } + // When add a special variable don't forget to update lexer::word(). // bool parser:: diff --git a/libbuild2/build/script/parser.hxx b/libbuild2/build/script/parser.hxx index 15d4ede..4b98cbc 100644 --- a/libbuild2/build/script/parser.hxx +++ b/libbuild2/build/script/parser.hxx @@ -36,7 +36,7 @@ namespace build2 script pre_parse (const target&, istream&, const path_name&, uint64_t line, - optional diag, const location& diag_loc); + optional diag_name, const location& diag_loc); // Recursive descent parser. // @@ -67,6 +67,16 @@ namespace build2 execute (const scope& root, const scope& base, environment&, const script&, runner&); + // Parse a special builtin line into names, performing the variable + // and pattern expansions. If omit_builtin is true, then omit the + // builtin name from the result. + // + names + execute_special (const scope& root, const scope& base, + environment&, + const line&, + bool omit_builtin = true); + protected: void exec_script (); @@ -87,24 +97,28 @@ namespace build2 // leaving the rest for the base parser to handle. // // During pre-parsing try to deduce the low-verbosity script - // diagnostics name. + // diagnostics name as a program/builtin name or obtain the custom + // low-verbosity diagnostics specified with the diag builtin. Note + // that the diag builtin can only appear at the beginning of the + // command line. // virtual optional - parse_program (token&, build2::script::token_type&, names&) override; - - void - parse_program_diag (token&, build2::script::token_type&, names&); + parse_program (token&, build2::script::token_type&, + bool first, + names&) override; protected: script* script_; - // Current low-verbosity script diagnostics name and weight. + // Current low-verbosity script diagnostics and its weight. // // During pre-parsing each command leading names are translated into a - // potential script name, unless it is set manually (with the diag - // directive or via the constructor). The potential script name has a - // weight associated with it, so script names with greater weights - // override names with lesser weights. The possible weights are: + // potential low-verbosity script diagnostics name, unless the + // diagnostics is set manually (script name via the constructor or + // custom diagnostics via the diag builtin). The potential script + // name has a weight associated with it, so script names with greater + // weights override names with lesser weights. The possible weights + // are: // // 0 - builtins that do not add to the script semantics (exit, // true, etc) and are never picked up as a script name @@ -119,8 +133,20 @@ namespace build2 // then this ambiguity is reported unless a higher-weighted name is // encountered later. // - optional> diag; - optional> diag2; + // If the diag builtin is encountered, then its whole line is saved + // (including the leading 'diag' word) for later execution and the + // diagnostics weight is set to 4. + // + // Any attempt to manually set the custom diagnostics twice (the diag + // builtin after the script name or after another diag builtin) is + // reported as ambiguity. + // + // At the end of pre-parsing either diag_name or diag_line (but not + // both) are present. + // + optional> diag_name; + optional> diag_name2; // Ambiguous script name. + optional> diag_line; uint8_t diag_weight = 0; // True during pre-parsing when the pre-parse mode is temporarily @@ -128,6 +154,15 @@ namespace build2 // bool pre_parse_suspended_ = false; + // The alternative location where the next line should be saved. + // + // It is set to NULL before the script line get parsed, indicating + // that the line should by default be appended to the script. However, + // parse_program() can point it to a different location where the line + // should be saved instead (e.g., diag_line, etc). + // + line* save_line_; + // Execute state. // runner* runner_; diff --git a/libbuild2/build/script/parser.test.cxx b/libbuild2/build/script/parser.test.cxx index de3e839..7f2840d 100644 --- a/libbuild2/build/script/parser.test.cxx +++ b/libbuild2/build/script/parser.test.cxx @@ -73,6 +73,7 @@ namespace build2 // argv[0] [-l] // argv[0] -d // argv[0] -p + // argv[0] -g [] // // In the first form read the script from stdin and trace the script // execution to stdout using the custom print runner. @@ -83,6 +84,11 @@ namespace build2 // In the third form read the script from stdin, parse it and print // line tokens quoting information to stdout. // + // In the forth form read the script from stdin, parse it and print the + // low-verbosity script diagnostics name or custom low-verbosity + // diagnostics to stdout. If the script doesn't deduce any of them, then + // print the diagnostics and exit with non-zero code. + // // -l // Print the script line number for each executed expression. // @@ -97,6 +103,10 @@ namespace build2 // := 'S' | 'D' | 'M' // := 'C' | 'P' // + // -g + // Dump the low-verbosity script diagnostics name or custom + // low-verbosity diagnostics to stdout. + // int main (int argc, char* argv[]) { @@ -106,10 +116,12 @@ namespace build2 { run, dump, - print + print, + diag } m (mode::run); bool print_line (false); + optional diag_name; for (int i (1); i != argc; ++i) { @@ -121,11 +133,22 @@ namespace build2 m = mode::dump; else if (a == "-p") m = mode::print; + else if (a == "-g") + m = mode::diag; else + { + if (m == mode::diag) + { + diag_name = move (a); + break; + } + assert (false); + } } - assert (m == mode::run || !print_line); + assert (!print_line || m == mode::run); + assert (!diag_name || m == mode::diag); // Fake build system driver, default verbosity. // @@ -164,7 +187,9 @@ namespace build2 script s (p.pre_parse (tt, cin, nm, 11 /* line */, - string ("test"), + (m != mode::diag + ? optional ("test") + : move (diag_name)), location (nm, 10))); switch (m) @@ -176,6 +201,26 @@ namespace build2 p.execute (ctx.global_scope, ctx.global_scope, e, s, r); break; } + case mode::diag: + { + if (s.diag_name) + { + cout << "name: " << *s.diag_name << endl; + } + else + { + assert (s.diag_line); + + environment e (perform_update_id, tt, false /* temp_dir */); + + cout << "diag: " << p.execute_special (ctx.global_scope, + ctx.global_scope, + e, + *s.diag_line) << endl; + } + + break; + } case mode::dump: { dump (cout, "", s.lines); diff --git a/libbuild2/build/script/script.hxx b/libbuild2/build/script/script.hxx index de759de..5fd8561 100644 --- a/libbuild2/build/script/script.hxx +++ b/libbuild2/build/script/script.hxx @@ -58,9 +58,11 @@ namespace build2 // bool temp_dir = false; - // Command name for low-verbosity diagnostics. + // Command name for low-verbosity diagnostics and custom low-verbosity + // diagnostics line. Note: cannot be both. // - optional diag; + optional diag_name; + optional diag_line; location start_loc; location end_loc; diff --git a/libbuild2/functions-process.cxx b/libbuild2/functions-process.cxx index de06f27..5e704e5 100644 --- a/libbuild2/functions-process.cxx +++ b/libbuild2/functions-process.cxx @@ -214,7 +214,7 @@ namespace build2 try { - size_t erase; + size_t erase (0); // This can be a process_path (pair), process_path_ex (process_path // followed by the name@ and checksum@ pairs), or just a path. diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx index 756a45b..d8baf06 100644 --- a/libbuild2/parser.cxx +++ b/libbuild2/parser.cxx @@ -1146,7 +1146,8 @@ namespace build2 } catch (const invalid_argument& e) { - fail (nloc) << "invalid c++ recipe version value: " << e; + fail (nloc) << "invalid c++ recipe version value: " << e + << endf; } ar.reset (new adhoc_cxx_rule (loc, st.value.size (), v)); diff --git a/libbuild2/rule.cxx b/libbuild2/rule.cxx index a5d51ca..7ea6e68 100644 --- a/libbuild2/rule.cxx +++ b/libbuild2/rule.cxx @@ -413,15 +413,15 @@ namespace build2 // @@ TODO: for now we dump it as an attribute whether it was specified or // derived from the script. Maybe that's ok? // - if (script.diag) + if (script.diag_name) { os << ind << '%'; - if (script.diag) + if (script.diag_name) { os << " ["; os << "diag="; - to_stream (os, name (*script.diag), true /* quote */, '@'); + to_stream (os, name (*script.diag_name), true /* quote */, '@'); os << ']'; } @@ -691,34 +691,43 @@ namespace build2 if (!update) return *ps; - if (verb == 1) - { - // @@ TODO (and below): - // - // - derive diag if absent (should probably do in match?) - // - // - we are printing target, not source (like in most other places) - // - // - printing of ad hoc target group (the {hxx cxx}{foo} idea) - // - // - if we are printing prerequisites, should we print all of them - // (including tools)? - // - - text << (script.diag ? script.diag->c_str () : "adhoc") << ' ' << t; - } - - if (!ctx.dry_run || verb >= 2) + if (!ctx.dry_run || verb != 0) { const scope& bs (t.base_scope ()); + const scope& rs (*bs.root_scope ()); build::script::environment e (a, t, script.temp_dir); build::script::parser p (ctx); - build::script::default_runner r; - p.execute (*bs.root_scope (), bs, e, script, r); - if (!ctx.dry_run) - dd.check_mtime (tp); + if (verb == 1) + { + if (script.diag_line) + { + text << p.execute_special (rs, bs, e, *script.diag_line); + } + else + { + // @@ TODO (and below): + // + // - we are printing target, not source (like in most other places) + // + // - printing of ad hoc target group (the {hxx cxx}{foo} idea) + // + // - if we are printing prerequisites, should we print all of them + // (including tools)? + // + text << *script.diag_name << ' ' << t; + } + } + + if (!ctx.dry_run || verb >= 2) + { + build::script::default_runner r; + p.execute (rs, bs, e, script, r); + + if (!ctx.dry_run) + dd.check_mtime (tp); + } } t.mtime (system_clock::now ()); @@ -734,21 +743,33 @@ namespace build2 execute_prerequisites (a, t); - if (verb == 1) - { - // @@ TODO: as above - - text << (script.diag ? script.diag->c_str () : "adhoc") << ' ' << t; - } - - if (!ctx.dry_run || verb >= 2) + if (!ctx.dry_run || verb != 0) { const scope& bs (t.base_scope ()); + const scope& rs (*bs.root_scope ()); build::script::environment e (a, t, script.temp_dir); build::script::parser p (ctx); - build::script::default_runner r; - p.execute (*bs.root_scope (), bs, e, script, r); + + if (verb == 1) + { + if (script.diag_line) + { + text << p.execute_special (rs, bs, e, *script.diag_line); + } + else + { + // @@ TODO: as above + // + text << *script.diag_name << ' ' << t; + } + } + + if (!ctx.dry_run || verb >= 2) + { + build::script::default_runner r; + p.execute (rs, bs, e, script, r); + } } return target_state::changed; diff --git a/libbuild2/script/parser.cxx b/libbuild2/script/parser.cxx index 33dc273..fd9399d 100644 --- a/libbuild2/script/parser.cxx +++ b/libbuild2/script/parser.cxx @@ -97,7 +97,7 @@ namespace build2 } optional parser:: - parse_program (token& t, type& tt, names& ns) + parse_program (token& t, type& tt, bool, names& ns) { parse_names (t, tt, ns, @@ -153,8 +153,7 @@ namespace build2 // Handles empty regex properly. // if (mod.find ('/') != string::npos && re[0] == '/') - fail (l) << "portable path modifier and '/' introducer in " - << what; + fail (l) << "portable path modifier and '/' introducer in " << what; }; // Pending positions where the next word should go. @@ -162,7 +161,8 @@ namespace build2 enum class pending { none, - program, + program_first, + program_next, in_string, in_document, in_file, @@ -180,7 +180,7 @@ namespace build2 err_file, clean }; - pending p (pending::program); + pending p (pending::program_first); string mod; // Modifiers for pending in_* and out_* positions. here_docs hd; // Expected here-documents. @@ -291,7 +291,8 @@ namespace build2 switch (p) { case pending::none: c.arguments.push_back (move (w)); break; - case pending::program: + case pending::program_first: + case pending::program_next: c.program = process_path (nullptr /* initial */, parse_path (move (w), "program path"), path () /* effect */); @@ -355,20 +356,21 @@ namespace build2 switch (p) { - case pending::none: break; - case pending::program: what = "program"; break; - case pending::in_string: what = "stdin here-string"; break; - case pending::in_document: what = "stdin here-document end"; break; - case pending::in_file: what = "stdin file"; break; - case pending::out_merge: what = "stdout file descriptor"; break; - case pending::out_string: what = "stdout here-string"; break; - case pending::out_document: what = "stdout here-document end"; break; - case pending::out_file: what = "stdout file"; break; - case pending::err_merge: what = "stderr file descriptor"; break; - case pending::err_string: what = "stderr here-string"; break; - case pending::err_document: what = "stderr here-document end"; break; - case pending::err_file: what = "stderr file"; break; - case pending::clean: what = "cleanup path"; break; + case pending::none: break; + case pending::program_first: + case pending::program_next: what = "program"; break; + case pending::in_string: what = "stdin here-string"; break; + case pending::in_document: what = "stdin here-document end"; break; + case pending::in_file: what = "stdin file"; break; + case pending::out_merge: what = "stdout file descriptor"; break; + case pending::out_string: what = "stdout here-string"; break; + case pending::out_document: what = "stdout here-document end"; break; + case pending::out_file: what = "stdout file"; break; + case pending::err_merge: what = "stderr file descriptor"; break; + case pending::err_string: what = "stderr here-string"; break; + case pending::err_document: what = "stderr here-document end"; break; + case pending::err_file: what = "stderr file"; break; + case pending::clean: what = "cleanup path"; break; case pending::out_str_regex: { @@ -752,7 +754,7 @@ namespace build2 case type::pipe: case type::log_or: case type::log_and: - p = pending::program; + p = pending::program_next; break; case type::in_doc: @@ -886,7 +888,7 @@ namespace build2 expr.back ().pipe.push_back (move (c)); c = command (); - p = pending::program; + p = pending::program_next; if (tt != type::pipe) { @@ -1031,9 +1033,10 @@ namespace build2 // reset_quoted (t); - if (p == pending::program) + if (p == pending::program_first || p == pending::program_next) { - optional pp (parse_program (t, tt, ns)); + optional pp ( + parse_program (t, tt, p == pending::program_first, ns)); // During pre-parsing we are not interested in the // parse_program() call result, so just discard the potentially @@ -1195,7 +1198,7 @@ namespace build2 expr.back ().pipe.push_back (move (c)); c = command (); - p = pending::program; + p = pending::program_next; if (tt != type::pipe) { diff --git a/libbuild2/script/parser.hxx b/libbuild2/script/parser.hxx index b15f632..bec6867 100644 --- a/libbuild2/script/parser.hxx +++ b/libbuild2/script/parser.hxx @@ -165,7 +165,8 @@ namespace build2 // Customization hooks. // protected: - // Parse the command's leading name chunk. + // Parse the command's leading name chunk. The argument first is true if + // this is the first command in the line. // // During the execution phase try to parse and translate the leading // names into the process path and return nullopt if choose not to do @@ -188,7 +189,7 @@ namespace build2 // recognize and execute certain directives, or some such. // virtual optional - parse_program (token&, token_type&, names&); + parse_program (token&, token_type&, bool first, names&); // Set lexer pointers for both the current and the base classes. // -- cgit v1.1