diff options
author | Karen Arutyunov <karen@codesynthesis.com> | 2020-06-04 23:01:58 +0300 |
---|---|---|
committer | Karen Arutyunov <karen@codesynthesis.com> | 2020-06-05 17:35:24 +0300 |
commit | fb56fc798110c8ee9685bec156b21f1f87aca121 (patch) | |
tree | c92a0d7d764794b1af63227bb8b9e89d036dbb72 | |
parent | e4a9ccadf751b88f5508ce9f890484bae33d1aaf (diff) |
Add depdb buildscript builtin
-rw-r--r-- | libbuild2/build/script/parser+diag.test.testscript | 6 | ||||
-rw-r--r-- | libbuild2/build/script/parser+line.test.testscript | 6 | ||||
-rw-r--r-- | libbuild2/build/script/parser.cxx | 156 | ||||
-rw-r--r-- | libbuild2/build/script/parser.hxx | 34 | ||||
-rw-r--r-- | libbuild2/build/script/script.hxx | 14 | ||||
-rw-r--r-- | libbuild2/parser.cxx | 2 | ||||
-rw-r--r-- | libbuild2/rule.cxx | 297 | ||||
-rw-r--r-- | libbuild2/script/script.cxx | 283 | ||||
-rw-r--r-- | libbuild2/script/script.hxx | 6 | ||||
-rw-r--r-- | tests/dependency/recipe/testscript | 1 |
10 files changed, 521 insertions, 284 deletions
diff --git a/libbuild2/build/script/parser+diag.test.testscript b/libbuild2/build/script/parser+diag.test.testscript index bb0672e..60683bc 100644 --- a/libbuild2/build/script/parser+diag.test.testscript +++ b/libbuild2/build/script/parser+diag.test.testscript @@ -24,10 +24,10 @@ $* <<EOI >>EOO $* <<EOI >>~%EOO% echo abc cat abc - diag abc '==>' $> - cp abc xyz + diag copy >= $> + cp <- $> EOI - %diag: abc ==> .+file\{driver\.\}% + %diag: copy >= .+file\{driver\.\}% EOO : ambiguity diff --git a/libbuild2/build/script/parser+line.test.testscript b/libbuild2/build/script/parser+line.test.testscript index 45b07b7..1b39265 100644 --- a/libbuild2/build/script/parser+line.test.testscript +++ b/libbuild2/build/script/parser+line.test.testscript @@ -70,3 +70,9 @@ $* <<EOI >>EOO "foo" 'foo b"ar baz' EOO + +: no-newline +: +$* <:'foo' 2>>EOE != 0 + buildfile:11:4: error: expected newline instead of <end of file> + EOE diff --git a/libbuild2/build/script/parser.cxx b/libbuild2/build/script/parser.cxx index 274faf0..c698448 100644 --- a/libbuild2/build/script/parser.cxx +++ b/libbuild2/build/script/parser.cxx @@ -52,8 +52,8 @@ namespace build2 if (diag) { - diag_name = make_pair (move (*diag), diag_loc); - diag_weight = 4; + diag_name_ = make_pair (move (*diag), diag_loc); + diag_weight_ = 4; } s.start_loc = location (*path_, line, 1); @@ -69,20 +69,20 @@ namespace build2 { diag_record dr; - if (!diag_name && !diag_line) + if (!diag_name_ && !diag_line_) { dr << fail (s.start_loc) << "unable to deduce low-verbosity script diagnostics name"; } - else if (diag_name2) + else if (diag_name2_) { - assert (diag_name); + assert (diag_name_); dr << fail (s.start_loc) << "low-verbosity script diagnostics name is ambiguous" << - info (diag_name->second) << "could be '" << diag_name->first + info (diag_name_->second) << "could be '" << diag_name_->first << "'" << - info (diag_name2->second) << "could be '" << diag_name2->first + info (diag_name2_->second) << "could be '" << diag_name2_->first << "'"; } @@ -96,12 +96,19 @@ namespace build2 } } - assert (diag_name.has_value () != diag_line.has_value ()); + // Save the script name or custom diagnostics line. + // + assert (diag_name_.has_value () != diag_line_.has_value ()); - if (diag_name) - s.diag_name = move (diag_name->first); + if (diag_name_) + s.diag_name = move (diag_name_->first); else - s.diag_line = move (diag_line->first); + s.diag_line = move (diag_line_->first); + + // Save the custom dependency change tracking lines, if present. + // + s.depdb_clear = depdb_clear_.has_value (); + s.depdb_lines = move (depdb_lines_); return s; } @@ -150,9 +157,13 @@ namespace build2 line_type lt ( pre_parse_line_start (t, tt, lexer_mode::second_token)); - save_line_ = nullptr; - line ln; + + // Indicates that the parsed line should by default be appended to the + // script. + // + save_line_ = &ln; + switch (lt) { case line_type::var: @@ -213,9 +224,12 @@ namespace build2 ln.tokens = replay_data (); if (save_line_ != nullptr) - *save_line_ = move (ln); - else - script_->lines.push_back (move (ln)); + { + if (save_line_ == &ln) + script_->lines.push_back (move (ln)); + else + *save_line_ = move (ln); + } if (lt == line_type::cmd_if || lt == line_type::cmd_ifn) { @@ -323,41 +337,46 @@ namespace build2 // auto set_diag = [&l, this] (string d, uint8_t w) { - if (diag_weight < w) + if (diag_weight_ < w) { - diag_name = make_pair (move (d), l); - diag_weight = w; - diag_name2 = nullopt; + diag_name_ = make_pair (move (d), l); + diag_weight_ = w; + diag_name2_ = nullopt; } - else if (w != 0 && - w == diag_weight && - d != diag_name->first && - !diag_name2) - diag_name2 = make_pair (move (d), l); + else if (w != 0 && + w == diag_weight_ && + d != diag_name_->first && + !diag_name2_) + diag_name2_ = make_pair (move (d), l); }; // Handle special builtins. // + // NOTE: update line dumping (script.cxx:dump()) if adding a special + // builtin. + // if (pre_parse_ && first && tt == type::word) { - if (t.value == "diag") + const string& v (t.value); + + if (v == "diag") { // Check for ambiguity. // - if (diag_weight == 4) + if (diag_weight_ == 4) { - if (diag_name) // Script name. + if (diag_name_) // Script name. { fail (l) << "both low-verbosity script diagnostics name " << "and 'diag' builtin call" << - info (diag_name->second) << "script name specified here"; + info (diag_name_->second) << "script name specified here"; } else // Custom diagnostics. { - assert (diag_line); + assert (diag_line_); fail (l) << "multiple 'diag' builtin calls" << - info (diag_line->second) << "previous call is here"; + info (diag_line_->second) << "previous call is here"; } } @@ -366,16 +385,73 @@ namespace build2 // will be executed prior to the script execution to obtain the // custom diagnostics. // - diag_line = make_pair (line (), l); - save_line_ = &diag_line->first; - diag_weight = 4; + diag_line_ = make_pair (line (), l); + save_line_ = &diag_line_->first; + diag_weight_ = 4; + + diag_name_ = nullopt; + diag_name2_ = nullopt; + + // Note that the rest of the line contains the builtin argument to + // be printed, thus we parse it in the value lexer mode. + // + mode (lexer_mode::value); + parse_names (t, tt, pattern_mode::ignore); + return nullopt; + } + else if (v == "depdb") + { + // Note that the rest of the line contains the builtin command + // name, potentially followed by the arguments to be + // hashed/saved. Thus, we parse it in the value lexer mode. + // + mode (lexer_mode::value); - diag_name = nullopt; - diag_name2 = nullopt; + // Obtain and validate the depdb builtin command name. + // + next (t, tt); - // Parse the leading chunk and bail out. + if (tt != type::word || + (v != "clear" && v != "hash" && v != "string")) + { + fail (get_location (t)) + << "expected 'depdb' builtin command instead of " << t; + } + + if (v == "clear") + { + // Make sure the clear depdb command comes first. + // + if (depdb_clear_) + fail (l) << "multiple 'depdb clear' builtin calls" << + info (*depdb_clear_) << "previous call is here"; + + if (!depdb_lines_.empty ()) + fail (l) << "'depdb clear' should be the first 'depdb' " + << "builtin call" << + info (depdb_lines_[0].tokens[0].location ()) + << "first 'depdb' call is here"; + + // Save the builtin location and cancel the line saving. + // + depdb_clear_ = l; + save_line_ = nullptr; + } + else + { + // Instruct the parser to save the depdb builtin line separately + // from the script lines, when it is fully parsed. Note that the + // builtin command arguments will be validated during execution, + // when expanded. + // + depdb_lines_.push_back (line ()); + save_line_ = &depdb_lines_.back (); + } + + // Parse the rest of the line and bail out. // - return build2::script::parser::parse_program (t, tt, first, ns); + parse_names (t, tt, pattern_mode::ignore); + return nullopt; } } @@ -414,7 +490,7 @@ namespace build2 // // This is also the reason why we add a diag frame. // - if (pre_parse_ && diag_weight != 4) + if (pre_parse_ && diag_weight_ != 4) { pre_parse_ = false; // Make parse_names() perform expansions. pre_parse_suspended_ = true; @@ -445,7 +521,7 @@ namespace build2 pre_parse_ = true; } - if (pre_parse_ && diag_weight == 4) + if (pre_parse_ && diag_weight_ == 4) return nullopt; } diff --git a/libbuild2/build/script/parser.hxx b/libbuild2/build/script/parser.hxx index 4b98cbc..a652cf4 100644 --- a/libbuild2/build/script/parser.hxx +++ b/libbuild2/build/script/parser.hxx @@ -141,13 +141,29 @@ namespace build2 // builtin after the script name or after another diag builtin) is // reported as ambiguity. // - // At the end of pre-parsing either diag_name or diag_line (but not + // At the end of pre-parsing either diag_name_ or diag_line_ (but not // both) are present. // - optional<pair<string, location>> diag_name; - optional<pair<string, location>> diag_name2; // Ambiguous script name. - optional<pair<line, location>> diag_line; - uint8_t diag_weight = 0; + optional<pair<string, location>> diag_name_; + optional<pair<string, location>> diag_name2_; // Ambiguous script name. + optional<pair<line, location>> diag_line_; + uint8_t diag_weight_ = 0; + + // Custom dependency change tracking. + // + // The depdb builtin can be used to change the default dependency + // change tracking: + // + // depdb clear - Cancels the default variables, targets, and + // prerequisites change tracking. Can only be + // the first depdb builtin call. + // + // depdb hash <args> - Track the argument list change as a hash. + // + // depdb string <arg> - Track the argument (single) change as string. + // + optional<location> depdb_clear_; // 'depdb clear' location if any. + lines depdb_lines_; // Note: excludes 'depdb clear'. // True during pre-parsing when the pre-parse mode is temporarily // suspended to perform expansion. @@ -156,10 +172,12 @@ namespace build2 // The alternative location where the next line should be saved. // - // It is set to NULL before the script line get parsed, indicating - // that the line should by default be appended to the script. However, + // Before the script line gets parsed, it is set to a temporary value + // that will by default be appended to the script. However, // parse_program() can point it to a different location where the line - // should be saved instead (e.g., diag_line, etc). + // should be saved instead (e.g., diag_line_, etc) or set it to NULL + // if the line is handled in an ad-hoc way and should be dropped + // (e.g., depdb_clear_, etc). // line* save_line_; diff --git a/libbuild2/build/script/script.hxx b/libbuild2/build/script/script.hxx index 5fd8561..fafc87e 100644 --- a/libbuild2/build/script/script.hxx +++ b/libbuild2/build/script/script.hxx @@ -20,6 +20,7 @@ namespace build2 namespace script { using build2::script::line; + using build2::script::lines; using build2::script::line_type; using build2::script::redirect; using build2::script::redirect_type; @@ -37,10 +38,12 @@ namespace build2 class script { public: + using lines_type = build::script::lines; + // Note that the variables are not pre-entered into a pool during the // parsing phase, so the line variable pointers are NULL. // - build2::script::lines lines; + lines_type lines; // Referenced ordinary (non-special) variables. // @@ -59,11 +62,18 @@ namespace build2 bool temp_dir = false; // Command name for low-verbosity diagnostics and custom low-verbosity - // diagnostics line. Note: cannot be both. + // diagnostics line. Note: cannot be both (see the script parser for + // details). // optional<string> diag_name; optional<line> diag_line; + // The script's custom dependency change tracking lines (see the + // script parser for details). + // + bool depdb_clear; + lines_type depdb_lines; + location start_loc; location end_loc; }; diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx index 4b958e8..f22f13a 100644 --- a/libbuild2/parser.cxx +++ b/libbuild2/parser.cxx @@ -1264,7 +1264,7 @@ namespace build2 // Calling operation_pre() to translate doesn't feel // appropriate here. // - fail (l) << "default operation in recipe action"; + fail (l) << "default operation in recipe action" << endf; } else oi = ctx.operation_table.find (o.name); diff --git a/libbuild2/rule.cxx b/libbuild2/rule.cxx index d9c07cb..fb769fd 100644 --- a/libbuild2/rule.cxx +++ b/libbuild2/rule.cxx @@ -430,6 +430,17 @@ namespace build2 { os << ind << string (braces, '{') << endl; ind += " "; + + if (script.depdb_clear) + os << ind << "depdb clear" << endl; + + script::dump (os, ind, script.depdb_lines); + + if (script.diag_line) + { + os << ind; script::dump (os, *script.diag_line, true /* newline */); + } + script::dump (os, ind, script.lines); ind.resize (ind.size () - 2); os << ind << string (braces, '}'); @@ -504,7 +515,7 @@ namespace build2 target_state adhoc_script_rule:: perform_update_file (action a, const target& xt) const { - tracer trace ("adhoc_rule::perform_update_file"); + tracer trace ("adhoc_script_rule::perform_update_file"); context& ctx (xt.ctx); @@ -539,18 +550,22 @@ namespace build2 // executable prerequisite target that has it. We do it before executing // in order to include ad hoc prerequisites (which feels like the right // thing to do; the user may mark tools as ad hoc in order to omit them - // from $<). + // from $<). Note, however, that this is only required if the script + // doesn't track the dependency changes itself. // sha256 prog_cs; - for (const target* pt: t.prerequisite_targets[a]) + if (!script.depdb_clear) { - if (pt != nullptr) + for (const target* pt: t.prerequisite_targets[a]) { - if (auto* e = pt->is_a<exe> ()) + if (pt != nullptr) { - if (auto* c = e->lookup_metadata<string> ("checksum")) + if (auto* e = pt->is_a<exe> ()) { - prog_cs.append (*c); + if (auto* c = e->lookup_metadata<string> ("checksum")) + { + prog_cs.append (*c); + } } } } @@ -568,117 +583,194 @@ namespace build2 // names, tools, etc. // depdb dd (tp + ".d"); - { - // First should come the rule name/version. - // - if (dd.expect ("adhoc 1") != nullptr) - l4 ([&]{trace << "rule mismatch forcing update of " << t;}); - // Then the script checksum. - // - // Ideally, to detect changes to the script semantics, we would hash the - // text with all the variables expanded but without executing any - // commands. In practice, this is easier said than done (think the set - // builtin that receives output of a command that modifies the - // filesystem). - // - // So as the next best thing we are going to hash the unexpanded text as - // well as values of all the variables expanded in it (which we get as a - // side effect of pre-parsing the script). This approach has a number of - // drawbacks: - // - // - We can't handle computed variable names (e.g., $($x ? X : Y)). - // - // - We may "overhash" by including variables that are actually - // script-local. - // - // - There are functions like $install.resolve() with result based on - // external (to the script) information. - // - if (dd.expect (checksum) != nullptr) - l4 ([&]{trace << "recipe text change forcing update of " << t;}); + // First should come the rule name/version. + // + if (dd.expect ("adhoc 1") != nullptr) + l4 ([&]{trace << "rule mismatch forcing update of " << t;}); - // For each variable hash its name, undefined/null/non-null indicator, - // and the value if non-null. - // - // Note that this excludes the special $< and $> variables which we - // handle below. - // - { - sha256 cs; - names storage; + // Then the script checksum. + // + // Ideally, to detect changes to the script semantics, we would hash the + // text with all the variables expanded but without executing any + // commands. In practice, this is easier said than done (think the set + // builtin that receives output of a command that modifies the + // filesystem). + // + // So as the next best thing we are going to hash the unexpanded text as + // well as values of all the variables expanded in it (which we get as a + // side effect of pre-parsing the script). This approach has a number of + // drawbacks: + // + // - We can't handle computed variable names (e.g., $($x ? X : Y)). + // + // - We may "overhash" by including variables that are actually + // script-local. + // + // - There are functions like $install.resolve() with result based on + // external (to the script) information. + // + if (dd.expect (checksum) != nullptr) + l4 ([&]{trace << "recipe text change forcing update of " << t;}); - for (const string& n: script.vars) - { - cs.append (n); + // Track the variables, targets, and prerequisites changes, unless the + // script doesn't track the dependency changes itself. + // - lookup l; + // For each variable hash its name, undefined/null/non-null indicator, + // and the value if non-null. + // + // Note that this excludes the special $< and $> variables which we + // handle below. + // + if (!script.depdb_clear) + { + sha256 cs; + names storage; - if (const variable* var = ctx.var_pool.find (n)) - l = t[var]; + for (const string& n: script.vars) + { + cs.append (n); - cs.append (!l.defined () ? '\x1' : l->null ? '\x2' : '\x3'); + lookup l; - if (l) - { - storage.clear (); - names_view ns (reverse (*l, storage)); + if (const variable* var = ctx.var_pool.find (n)) + l = t[var]; - for (const name& n: ns) - to_checksum (cs, n); - } - } + cs.append (!l.defined () ? '\x1' : l->null ? '\x2' : '\x3'); + + if (l) + { + storage.clear (); + names_view ns (reverse (*l, storage)); - if (dd.expect (cs.string ()) != nullptr) - l4 ([&]{trace << "recipe variable change forcing update of " << t;}); + for (const name& n: ns) + to_checksum (cs, n); + } } - // Target and prerequisite sets ($> and $<). - // - // How should we hash them? We could hash them as target names (i.e., - // the same as the $>/< content) or as paths (only for path-based - // targets). While names feel more general, they are also more expensive - // to compute. And for path-based targets, path is generally a good - // proxy for the target name. Since the bulk of the ad hoc recipes will - // presumably be operating exclusively on path-based targets, let's do - // it both ways. - // + if (dd.expect (cs.string ()) != nullptr) + l4 ([&]{trace << "recipe variable change forcing update of " << t;}); + } + + // Target and prerequisite sets ($> and $<). + // + // How should we hash them? We could hash them as target names (i.e., the + // same as the $>/< content) or as paths (only for path-based targets). + // While names feel more general, they are also more expensive to compute. + // And for path-based targets, path is generally a good proxy for the + // target name. Since the bulk of the ad hoc recipes will presumably be + // operating exclusively on path-based targets, let's do it both ways. + // + if (!script.depdb_clear) + { + auto hash = [ns = names ()] (sha256& cs, const target& t) mutable { - auto hash = [ns = names ()] (sha256& cs, const target& t) mutable + if (const path_target* pt = t.is_a<path_target> ()) + cs.append (pt->path ().string ()); + else { - if (const path_target* pt = t.is_a<path_target> ()) - cs.append (pt->path ().string ()); - else - { - ns.clear (); - t.as_name (ns); - for (const name& n: ns) - to_checksum (cs, n); - } - }; + ns.clear (); + t.as_name (ns); + for (const name& n: ns) + to_checksum (cs, n); + } + }; - sha256 tcs; - for (const target* m (&t); m != nullptr; m = m->adhoc_member) - hash (tcs, *m); + sha256 tcs; + for (const target* m (&t); m != nullptr; m = m->adhoc_member) + hash (tcs, *m); - if (dd.expect (tcs.string ()) != nullptr) - l4 ([&]{trace << "target set change forcing update of " << t;}); + if (dd.expect (tcs.string ()) != nullptr) + l4 ([&]{trace << "target set change forcing update of " << t;}); - sha256 pcs; - for (const target* pt: t.prerequisite_targets[a]) - if (pt != nullptr) - hash (pcs, *pt); + sha256 pcs; + for (const target* pt: t.prerequisite_targets[a]) + if (pt != nullptr) + hash (pcs, *pt); - if (dd.expect (pcs.string ()) != nullptr) - l4 ([&]{trace << "prerequisite set change forcing update of " << t;}); - } + if (dd.expect (pcs.string ()) != nullptr) + l4 ([&]{trace << "prerequisite set change forcing update of " << t;}); + } - // Finally the programs checksum. - // + // Finally the programs checksum. + // + if (!script.depdb_clear) + { if (dd.expect (prog_cs.string ()) != nullptr) l4 ([&]{trace << "program checksum change forcing update of " << t;}); } + const scope* bs (nullptr); + const scope* rs (nullptr); + + // Execute the custom dependency change tracking commands, if present. + // + if (!script.depdb_lines.empty ()) + { + bs = &t.base_scope (); + rs = bs->root_scope (); + + // While it would have been nice to reuse the environment for both + // dependency tracking and execution, there are complications (creating + // temporary directory, etc). + // + build::script::environment e (a, t, false /* temp_dir */); + build::script::parser p (ctx); + + for (const script::line& l: script.depdb_lines) + { + names ns (p.execute_special (*rs, *bs, e, l)); + + // These should have been enforced during pre-parsing. + // + assert (!ns.empty ()); // <cmd> ... <newline> + assert (l.tokens.size () > 2); // 'depdb' <cmd> ... <newline> + + const string& cmd (ns[0].value); + + location loc (l.tokens[0].location ()); + + if (cmd == "hash") + { + sha256 cs; + for (auto i (ns.begin () + 1); i != ns.end (); ++i) // Skip <cmd>. + to_checksum (cs, *i); + + if (dd.expect (cs.string ()) != nullptr) + l4 ([&] { + diag_record dr (trace); + dr << "'depdb hash' argument change forcing update of " << t << + info (loc); script::dump (dr.os, l); + }); + } + else if (cmd == "string") + { + string s; + try + { + s = convert<string> (names (move_iterator (ns.begin () + 1), + move_iterator (ns.end ()))); + } + catch (const invalid_argument& e) + { + fail (l.tokens[2].location ()) + << "invalid 'depdb string' argument: " << e; + } + + if (dd.expect (s) != nullptr) + l4 ([&] { + diag_record dr (trace); + dr << "'depdb string' argument change forcing update of " + << t << + info (loc); script::dump (dr.os, l); + }); + } + else + assert (false); + } + } + // Update if depdb mismatch. // if (dd.writing () || dd.mtime > mt) @@ -693,8 +785,11 @@ namespace build2 if (!ctx.dry_run || verb != 0) { - const scope& bs (t.base_scope ()); - const scope& rs (*bs.root_scope ()); + if (bs == nullptr) + { + bs = &t.base_scope (); + rs = bs->root_scope (); + } build::script::environment e (a, t, script.temp_dir); build::script::parser p (ctx); @@ -703,7 +798,7 @@ namespace build2 { if (script.diag_line) { - text << p.execute_special (rs, bs, e, *script.diag_line); + text << p.execute_special (*rs, *bs, e, *script.diag_line); } else { @@ -723,7 +818,7 @@ namespace build2 if (!ctx.dry_run || verb >= 2) { build::script::default_runner r; - p.execute (rs, bs, e, script, r); + p.execute (*rs, *bs, e, script, r); if (!ctx.dry_run) dd.check_mtime (tp); @@ -737,7 +832,7 @@ namespace build2 target_state adhoc_script_rule:: default_action (action a, const target& t) const { - tracer trace ("adhoc_rule::default_action"); + tracer trace ("adhoc_script_rule::default_action"); context& ctx (t.ctx); diff --git a/libbuild2/script/script.cxx b/libbuild2/script/script.cxx index 7722b47..eb63e53 100644 --- a/libbuild2/script/script.cxx +++ b/libbuild2/script/script.cxx @@ -33,13 +33,159 @@ namespace build2 } void - dump (ostream& os, const string& ind, const lines& ls) + dump (ostream& os, const line& ln, bool newline) { - // For each line print its tokens literal representation trying to - // reproduce the quoting. Consider mixed quoting as double quoting - // since the information is lost. + // Print the line's tokens literal representation trying to reproduce + // the quoting. Consider mixed quoting as double quoting since the + // information is lost. + // + const replay_tokens& rts (ln.tokens); + + assert (!rts.empty ()); // ... <newline> + const token& ft (rts[0].token); + + // If true, this is a special builtin line. + // + // Note that special characters set differs for such lines since they + // are parsed in the value lexer mode. // - // Also additionally indent the if-branch lines. + bool builtin (ln.type == line_type::cmd && + ft.type == token_type::word && + (ft.value == "diag" || ft.value == "depdb")); + + // '"' or '\'' if we are inside the quoted token sequence and '\0' + // otherwise. Thus, can be used as bool. + // + char qseq ('\0'); + + optional<token_type> prev_tt; + for (const replay_token& rt: rts) + { + const token& t (rt.token); + + // '"' or '\'' if the token is quoted and '\0' otherwise. Thus, can be + // used as bool. + // + char qtok ('\0'); + + switch (t.qtype) + { + case quote_type::unquoted: qtok = '\0'; break; + case quote_type::single: qtok = '\''; break; + case quote_type::mixed: + case quote_type::double_: qtok = '"'; break; + } + + // If being inside a quoted token sequence we have reached a token + // quoted differently or the newline, then we probably made a mistake + // misinterpreting some previous partially quoted token, for example + // f"oo" as "foo. If that's the case, all we can do is to end the + // sequence adding the trailing quote. + // + // Note that a token inside the quoted sequence may well be unquoted, + // so for example "$foo" is lexed as: + // + // token quoting complete notes + // '' " no + // $ " yes + // 'foo' Unquoted since lexed in variable mode. + // '' " no + // \n + // + if (qseq && + ((qtok && qtok != qseq) || t.type == token_type::newline)) + { + os << qseq; + qseq = '\0'; + } + + // Left and right token quotes (can be used as bool). + // + char lq ('\0'); + char rq ('\0'); + + // If the token is quoted, then determine if/which quotes should be + // present on its sides and track the quoted token sequence. + // + if (qtok) + { + if (t.qcomp) // Complete token quoting. + { + // If we are inside a quoted token sequence then do noting. + // Otherwise just quote the current token not starting a sequence. + // + if (!qseq) + { + lq = qtok; + rq = qtok; + } + } + else // Partial token quoting. + { + // Note that we can not always reproduce the original tokens + // representation for partial quoting. For example, the two + // following tokens are lexed into the identical token objects: + // + // "foo + // f"oo" + // + // We will always assume that the partially quoted token either + // starts or ends the quoted token sequence. Sometimes this ends + // up unexpectedly, but seems there is not much we can do: + // + // f"oo" "ba"r -> "foo bar" + // + if (!qseq) // Start quoted sequence. + { + lq = qtok; + qseq = qtok; + } + else // End quoted sequence. + { + rq = qtok; + qseq = '\0'; + } + } + } + + // Print the space character prior to the separated token, unless it + // is a first like token or the newline. + // + if (t.separated && t.type != token_type::newline && &rt != &rts[0]) + os << ' '; + + if (lq) os << lq; // Print the left quote, if required. + + // Escape the special characters, unless the token in not a word, is a + // variable name, or is single-quoted. Note that the special + // characters set depends on whether the word is double-quoted or + // unquoted and whether this is a special builtin line or not. + // + if (t.type == token_type::word && + qtok != '\'' && + prev_tt != token_type::dollar) + { + for (char c: t.value) + { + if (strchr (qtok || builtin ? "\\\"" : "|&<>=\\\"", c) != nullptr) + os << '\\'; + + os << c; + } + } + else if (t.type != token_type::newline || newline) + t.printer (os, t, print_mode::raw); + + if (rq) os << rq; // Print the right quote, if required. + + prev_tt = t.type; + } + } + + void + dump (ostream& os, const string& ind, const lines& ls) + { + // Additionally indent the if-branch lines. // string if_ind; @@ -79,130 +225,7 @@ namespace build2 default: break; } - // '"' or '\'' if we are inside the quoted token sequence and '\0' - // otherwise. Thus, can be used as bool. - // - char qseq ('\0'); - - for (const replay_token& rt: l.tokens) - { - const token& t (rt.token); - - // '"' or '\'' if the token is quoted and '\0' otherwise. Thus, - // can be used as bool. - // - char qtok ('\0'); - - switch (t.qtype) - { - case quote_type::unquoted: qtok = '\0'; break; - case quote_type::single: qtok = '\''; break; - case quote_type::mixed: - case quote_type::double_: qtok = '"'; break; - } - - // If being inside a quoted token sequence we have reached a token - // quoted differently or the newline, then we probably made a - // mistake misinterpreting some previous partially quoted token, for - // example f"oo" as "foo. If that's the case, all we can do is to - // end the sequence adding the trailing quote. - // - // Note that a token inside the quoted sequence may well be - // unquoted, so for example "$foo" is lexed as: - // - // token quoting complete notes - // '' " no - // $ " yes - // 'foo' Unquoted since lexed in variable mode. - // '' " no - // \n - // - if (qseq && - ((qtok && qtok != qseq) || t.type == token_type::newline)) - { - os << qseq; - qseq = '\0'; - } - - // Left and right token quotes (can be used as bool). - // - char lq ('\0'); - char rq ('\0'); - - // If the token is quoted, then determine if/which quotes should be - // present on its sides and track the quoted token sequence. - // - if (qtok) - { - if (t.qcomp) // Complete token quoting. - { - // If we are inside a quoted token sequence then do noting. - // Otherwise just quote the current token not starting a - // sequence. - // - if (!qseq) - { - lq = qtok; - rq = qtok; - } - } - else // Partial token quoting. - { - // Note that we can not always reproduce the original tokens - // representation for partial quoting. For example, the two - // following tokens are lexed into the identical token objects: - // - // "foo - // f"oo" - // - // We will always assume that the partially quoted token either - // starts or ends the quoted token sequence. Sometimes this ends - // up unexpectedly, but seems there is not much we can do: - // - // f"oo" "ba"r -> "foo bar" - // - if (!qseq) // Start quoted sequence. - { - lq = qtok; - qseq = qtok; - } - else // End quoted sequence. - { - rq = qtok; - qseq = '\0'; - } - } - } - - // Print the space character prior to the separated token, unless - // it is a first like token or the newline. - // - if (t.separated && - t.type != token_type::newline && - &rt != &l.tokens[0]) - os << ' '; - - if (lq) os << lq; // Print the left quote, if required. - - // Escape the special characters, unless the token in not a word or - // is single-quoted. Note that the special character set depends on - // whether the word is double-quoted or unquoted. - // - if (t.type == token_type::word && qtok != '\'') - { - for (char c: t.value) - { - if (strchr (qtok ? "\\\"" : "|&<>=\\\"", c) != nullptr) - os << '\\'; - - os << c; - } - } - else - t.printer (os, t, print_mode::raw); - - if (rq) os << rq; // Print the right quote, if required. - } + dump (os, l, true /* newline */); } } @@ -215,6 +238,8 @@ namespace build2 static void to_stream_q (ostream& o, const string& s) { + // NOTE: update dump(line) if adding any new special character. + // if (s.empty () || s.find_first_of (" |&<>=\\\"") != string::npos) o << '\'' << s << '\''; else diff --git a/libbuild2/script/script.hxx b/libbuild2/script/script.hxx index 891b2f6..31527a0 100644 --- a/libbuild2/script/script.hxx +++ b/libbuild2/script/script.hxx @@ -57,6 +57,12 @@ namespace build2 void dump (ostream&, const string& ind, const lines&); + // As above but print a single line and without the trailing newline token + // by default. + // + void + dump (ostream&, const line&, bool newline = false); + // Parse object model. // diff --git a/tests/dependency/recipe/testscript b/tests/dependency/recipe/testscript index 64947dd..ee598b7 100644 --- a/tests/dependency/recipe/testscript +++ b/tests/dependency/recipe/testscript @@ -629,6 +629,7 @@ EOE % .+alias\{x\}:% % perform(update) {{ + diag bar %.{4} EOE } |