Add depdb buildscript builtin

author: Karen Arutyunov <karen@codesynthesis.com> 2020-06-04 23:01:58 +0300
committer: Karen Arutyunov <karen@codesynthesis.com> 2020-06-05 17:35:24 +0300
commit: fb56fc798110c8ee9685bec156b21f1f87aca121 (patch)
tree: c92a0d7d764794b1af63227bb8b9e89d036dbb72 /libbuild2
parent: e4a9ccadf751b88f5508ce9f890484bae33d1aaf (diff)
9 files changed, 520 insertions, 284 deletions
diff --git a/libbuild2/build/script/parser+diag.test.testscript b/libbuild2/build/script/parser+diag.test.testscript
index bb0672e..60683bc 100644
--- a/libbuild2/build/script/parser+diag.test.testscript
+++ b/libbuild2/build/script/parser+diag.test.testscript
@@ -24,10 +24,10 @@ $* <<EOI >>EOO
 $* <<EOI >>~%EOO%
   echo abc
   cat abc
-  diag abc '==>' $>
-  cp abc xyz
+  diag copy >= $>
+  cp <- $>
   EOI
-  %diag: abc ==> .+file\{driver\.\}%
+  %diag: copy >= .+file\{driver\.\}%
   EOO
 
 : ambiguity
diff --git a/libbuild2/build/script/parser+line.test.testscript b/libbuild2/build/script/parser+line.test.testscript
index 45b07b7..1b39265 100644
--- a/libbuild2/build/script/parser+line.test.testscript
+++ b/libbuild2/build/script/parser+line.test.testscript
@@ -70,3 +70,9 @@ $* <<EOI >>EOO
   "foo"
   'foo b"ar baz'
   EOO
+
+: no-newline
+:
+$* <:'foo' 2>>EOE != 0
+  buildfile:11:4: error: expected newline instead of <end of file>
+  EOE
diff --git a/libbuild2/build/script/parser.cxx b/libbuild2/build/script/parser.cxx
index 274faf0..c698448 100644
--- a/libbuild2/build/script/parser.cxx
+++ b/libbuild2/build/script/parser.cxx
@@ -52,8 +52,8 @@ namespace build2
 
         if (diag)
         {
-          diag_name   = make_pair (move (*diag), diag_loc);
-          diag_weight = 4;
+          diag_name_   = make_pair (move (*diag), diag_loc);
+          diag_weight_ = 4;
         }
 
         s.start_loc = location (*path_, line, 1);
@@ -69,20 +69,20 @@ namespace build2
         {
           diag_record dr;
 
-          if (!diag_name && !diag_line)
+          if (!diag_name_ && !diag_line_)
           {
             dr << fail (s.start_loc)
                << "unable to deduce low-verbosity script diagnostics name";
           }
-          else if (diag_name2)
+          else if (diag_name2_)
           {
-            assert (diag_name);
+            assert (diag_name_);
 
             dr << fail (s.start_loc)
                << "low-verbosity script diagnostics name is ambiguous" <<
-              info (diag_name->second) << "could be '" << diag_name->first
+              info (diag_name_->second) << "could be '" << diag_name_->first
                << "'" <<
-              info (diag_name2->second) << "could be '" << diag_name2->first
+              info (diag_name2_->second) << "could be '" << diag_name2_->first
                << "'";
           }
 
@@ -96,12 +96,19 @@ namespace build2
           }
         }
 
-        assert (diag_name.has_value () != diag_line.has_value ());
+        // Save the script name or custom diagnostics line.
+        //
+        assert (diag_name_.has_value () != diag_line_.has_value ());
 
-        if (diag_name)
-          s.diag_name = move (diag_name->first);
+        if (diag_name_)
+          s.diag_name = move (diag_name_->first);
         else
-          s.diag_line = move (diag_line->first);
+          s.diag_line = move (diag_line_->first);
+
+        // Save the custom dependency change tracking lines, if present.
+        //
+        s.depdb_clear = depdb_clear_.has_value ();
+        s.depdb_lines = move (depdb_lines_);
 
         return s;
       }
@@ -150,9 +157,13 @@ namespace build2
         line_type lt (
           pre_parse_line_start (t, tt, lexer_mode::second_token));
 
-        save_line_ = nullptr;
-
         line ln;
+
+        // Indicates that the parsed line should by default be appended to the
+        // script.
+        //
+        save_line_ = &ln;
+
         switch (lt)
         {
         case line_type::var:
@@ -213,9 +224,12 @@ namespace build2
         ln.tokens = replay_data ();
 
         if (save_line_ != nullptr)
-          *save_line_ = move (ln);
-        else
-          script_->lines.push_back (move (ln));
+        {
+          if (save_line_ == &ln)
+            script_->lines.push_back (move (ln));
+          else
+            *save_line_ = move (ln);
+        }
 
         if (lt == line_type::cmd_if || lt == line_type::cmd_ifn)
         {
@@ -323,41 +337,46 @@ namespace build2
         //
         auto set_diag = [&l, this] (string d, uint8_t w)
         {
-          if (diag_weight < w)
+          if (diag_weight_ < w)
           {
-            diag_name   = make_pair (move (d), l);
-            diag_weight = w;
-            diag_name2  = nullopt;
+            diag_name_   = make_pair (move (d), l);
+            diag_weight_ = w;
+            diag_name2_  = nullopt;
           }
-          else if (w != 0                &&
-                   w == diag_weight      &&
-                   d != diag_name->first &&
-                   !diag_name2)
-            diag_name2 = make_pair (move (d), l);
+          else if (w != 0                 &&
+                   w == diag_weight_      &&
+                   d != diag_name_->first &&
+                   !diag_name2_)
+            diag_name2_ = make_pair (move (d), l);
         };
 
         // Handle special builtins.
         //
+        // NOTE: update line dumping (script.cxx:dump()) if adding a special
+        // builtin.
+        //
         if (pre_parse_ && first && tt == type::word)
         {
-          if (t.value == "diag")
+          const string& v (t.value);
+
+          if (v == "diag")
           {
             // Check for ambiguity.
             //
-            if (diag_weight == 4)
+            if (diag_weight_ == 4)
             {
-              if (diag_name) // Script name.
+              if (diag_name_) // Script name.
               {
                 fail (l) << "both low-verbosity script diagnostics name "
                          << "and 'diag' builtin call" <<
-                  info (diag_name->second) << "script name specified here";
+                  info (diag_name_->second) << "script name specified here";
               }
               else           // Custom diagnostics.
               {
-                assert (diag_line);
+                assert (diag_line_);
 
                 fail (l) << "multiple 'diag' builtin calls" <<
-                  info (diag_line->second) << "previous call is here";
+                  info (diag_line_->second) << "previous call is here";
               }
             }
 
@@ -366,16 +385,73 @@ namespace build2
             // will be executed prior to the script execution to obtain the
             // custom diagnostics.
             //
-            diag_line  = make_pair (line (), l);
-            save_line_ = &diag_line->first;
-            diag_weight  = 4;
+            diag_line_   = make_pair (line (), l);
+            save_line_   = &diag_line_->first;
+            diag_weight_ = 4;
+
+            diag_name_  = nullopt;
+            diag_name2_ = nullopt;
+
+            // Note that the rest of the line contains the builtin argument to
+            // be printed, thus we parse it in the value lexer mode.
+            //
+            mode (lexer_mode::value);
+            parse_names (t, tt, pattern_mode::ignore);
+            return nullopt;
+          }
+          else if (v == "depdb")
+          {
+            // Note that the rest of the line contains the builtin command
+            // name, potentially followed by the arguments to be
+            // hashed/saved. Thus, we parse it in the value lexer mode.
+            //
+            mode (lexer_mode::value);
 
-            diag_name  = nullopt;
-            diag_name2 = nullopt;
+            // Obtain and validate the depdb builtin command name.
+            //
+            next (t, tt);
 
-            // Parse the leading chunk and bail out.
+            if (tt != type::word ||
+                (v != "clear" && v != "hash" && v != "string"))
+            {
+              fail (get_location (t))
+                << "expected 'depdb' builtin command instead of " << t;
+            }
+
+            if (v == "clear")
+            {
+              // Make sure the clear depdb command comes first.
+              //
+              if (depdb_clear_)
+                fail (l) << "multiple 'depdb clear' builtin calls" <<
+                  info (*depdb_clear_) << "previous call is here";
+
+              if (!depdb_lines_.empty ())
+                fail (l) << "'depdb clear' should be the first 'depdb' "
+                         << "builtin call" <<
+                  info (depdb_lines_[0].tokens[0].location ())
+                         << "first 'depdb' call is here";
+
+              // Save the builtin location and cancel the line saving.
+              //
+              depdb_clear_ = l;
+              save_line_   = nullptr;
+            }
+            else
+            {
+              // Instruct the parser to save the depdb builtin line separately
+              // from the script lines, when it is fully parsed. Note that the
+              // builtin command arguments will be validated during execution,
+              // when expanded.
+              //
+              depdb_lines_.push_back (line ());
+              save_line_ = &depdb_lines_.back ();
+            }
+
+            // Parse the rest of the line and bail out.
             //
-            return build2::script::parser::parse_program (t, tt, first, ns);
+            parse_names (t, tt, pattern_mode::ignore);
+            return nullopt;
           }
         }
 
@@ -414,7 +490,7 @@ namespace build2
           //
           // This is also the reason why we add a diag frame.
           //
-          if (pre_parse_ && diag_weight != 4)
+          if (pre_parse_ && diag_weight_ != 4)
           {
             pre_parse_ = false; // Make parse_names() perform expansions.
             pre_parse_suspended_ = true;
@@ -445,7 +521,7 @@ namespace build2
             pre_parse_ = true;
           }
 
-          if (pre_parse_ && diag_weight == 4)
+          if (pre_parse_ && diag_weight_ == 4)
             return nullopt;
         }
 
diff --git a/libbuild2/build/script/parser.hxx b/libbuild2/build/script/parser.hxx
index 4b98cbc..a652cf4 100644
--- a/libbuild2/build/script/parser.hxx
+++ b/libbuild2/build/script/parser.hxx
@@ -141,13 +141,29 @@ namespace build2
         // builtin after the script name or after another diag builtin) is
         // reported as ambiguity.
         //
-        // At the end of pre-parsing either diag_name or diag_line (but not
+        // At the end of pre-parsing either diag_name_ or diag_line_ (but not
         // both) are present.
         //
-        optional<pair<string, location>> diag_name;
-        optional<pair<string, location>> diag_name2; // Ambiguous script name.
-        optional<pair<line, location>>   diag_line;
-        uint8_t                          diag_weight = 0;
+        optional<pair<string, location>> diag_name_;
+        optional<pair<string, location>> diag_name2_; // Ambiguous script name.
+        optional<pair<line, location>>   diag_line_;
+        uint8_t                          diag_weight_ = 0;
+
+        // Custom dependency change tracking.
+        //
+        // The depdb builtin can be used to change the default dependency
+        // change tracking:
+        //
+        // depdb clear         - Cancels the default variables, targets, and
+        //                       prerequisites change tracking. Can only be
+        //                       the first depdb builtin call.
+        //
+        // depdb hash <args>  - Track the argument list change as a hash.
+        //
+        // depdb string <arg> - Track the argument (single) change as string.
+        //
+        optional<location> depdb_clear_; // 'depdb clear' location if any.
+        lines              depdb_lines_; // Note: excludes 'depdb clear'.
 
         // True during pre-parsing when the pre-parse mode is temporarily
         // suspended to perform expansion.
@@ -156,10 +172,12 @@ namespace build2
 
         // The alternative location where the next line should be saved.
         //
-        // It is set to NULL before the script line get parsed, indicating
-        // that the line should by default be appended to the script. However,
+        // Before the script line gets parsed, it is set to a temporary value
+        // that will by default be appended to the script. However,
         // parse_program() can point it to a different location where the line
-        // should be saved instead (e.g., diag_line, etc).
+        // should be saved instead (e.g., diag_line_, etc) or set it to NULL
+        // if the line is handled in an ad-hoc way and should be dropped
+        // (e.g., depdb_clear_, etc).
         //
         line* save_line_;
 
diff --git a/libbuild2/build/script/script.hxx b/libbuild2/build/script/script.hxx
index 5fd8561..fafc87e 100644
--- a/libbuild2/build/script/script.hxx
+++ b/libbuild2/build/script/script.hxx
@@ -20,6 +20,7 @@ namespace build2
     namespace script
     {
       using build2::script::line;
+      using build2::script::lines;
       using build2::script::line_type;
       using build2::script::redirect;
       using build2::script::redirect_type;
@@ -37,10 +38,12 @@ namespace build2
       class script
       {
       public:
+        using lines_type = build::script::lines;
+
         // Note that the variables are not pre-entered into a pool during the
         // parsing phase, so the line variable pointers are NULL.
         //
-        build2::script::lines lines;
+        lines_type lines;
 
         // Referenced ordinary (non-special) variables.
         //
@@ -59,11 +62,18 @@ namespace build2
         bool temp_dir = false;
 
         // Command name for low-verbosity diagnostics and custom low-verbosity
-        // diagnostics line. Note: cannot be both.
+        // diagnostics line. Note: cannot be both (see the script parser for
+        // details).
         //
         optional<string> diag_name;
         optional<line>   diag_line;
 
+        // The script's custom dependency change tracking lines (see the
+        // script parser for details).
+        //
+        bool       depdb_clear;
+        lines_type depdb_lines;
+
         location start_loc;
         location end_loc;
       };
diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx
index 4b958e8..f22f13a 100644
--- a/libbuild2/parser.cxx
+++ b/libbuild2/parser.cxx
@@ -1264,7 +1264,7 @@ namespace build2
                   // Calling operation_pre() to translate doesn't feel
                   // appropriate here.
                   //
-                  fail (l) << "default operation in recipe action";
+                  fail (l) << "default operation in recipe action" << endf;
               }
               else
                 oi = ctx.operation_table.find (o.name);
diff --git a/libbuild2/rule.cxx b/libbuild2/rule.cxx
index d9c07cb..fb769fd 100644
--- a/libbuild2/rule.cxx
+++ b/libbuild2/rule.cxx
@@ -430,6 +430,17 @@ namespace build2
   {
     os << ind << string (braces, '{') << endl;
     ind += "  ";
+
+    if (script.depdb_clear)
+      os << ind << "depdb clear" << endl;
+
+    script::dump (os, ind, script.depdb_lines);
+
+    if (script.diag_line)
+    {
+      os << ind; script::dump (os, *script.diag_line, true /* newline */);
+    }
+
     script::dump (os, ind, script.lines);
     ind.resize (ind.size () - 2);
     os << ind << string (braces, '}');
@@ -504,7 +515,7 @@ namespace build2
   target_state adhoc_script_rule::
   perform_update_file (action a, const target& xt) const
   {
-    tracer trace ("adhoc_rule::perform_update_file");
+    tracer trace ("adhoc_script_rule::perform_update_file");
 
     context& ctx (xt.ctx);
 
@@ -539,18 +550,22 @@ namespace build2
     // executable prerequisite target that has it. We do it before executing
     // in order to include ad hoc prerequisites (which feels like the right
     // thing to do; the user may mark tools as ad hoc in order to omit them
-    // from $<).
+    // from $<). Note, however, that this is only required if the script
+    // doesn't track the dependency changes itself.
     //
     sha256 prog_cs;
-    for (const target* pt: t.prerequisite_targets[a])
+    if (!script.depdb_clear)
     {
-      if (pt != nullptr)
+      for (const target* pt: t.prerequisite_targets[a])
       {
-        if (auto* e = pt->is_a<exe> ())
+        if (pt != nullptr)
         {
-          if (auto* c = e->lookup_metadata<string> ("checksum"))
+          if (auto* e = pt->is_a<exe> ())
           {
-            prog_cs.append (*c);
+            if (auto* c = e->lookup_metadata<string> ("checksum"))
+            {
+              prog_cs.append (*c);
+            }
           }
         }
       }
@@ -568,117 +583,194 @@ namespace build2
     // names, tools, etc.
     //
     depdb dd (tp + ".d");
-    {
-      // First should come the rule name/version.
-      //
-      if (dd.expect ("adhoc 1") != nullptr)
-        l4 ([&]{trace << "rule mismatch forcing update of " << t;});
 
-      // Then the script checksum.
-      //
-      // Ideally, to detect changes to the script semantics, we would hash the
-      // text with all the variables expanded but without executing any
-      // commands. In practice, this is easier said than done (think the set
-      // builtin that receives output of a command that modifies the
-      // filesystem).
-      //
-      // So as the next best thing we are going to hash the unexpanded text as
-      // well as values of all the variables expanded in it (which we get as a
-      // side effect of pre-parsing the script). This approach has a number of
-      // drawbacks:
-      //
-      // - We can't handle computed variable names (e.g., $($x ? X : Y)).
-      //
-      // - We may "overhash" by including variables that are actually
-      //   script-local.
-      //
-      // - There are functions like $install.resolve() with result based on
-      //   external (to the script) information.
-      //
-      if (dd.expect (checksum) != nullptr)
-        l4 ([&]{trace << "recipe text change forcing update of " << t;});
+    // First should come the rule name/version.
+    //
+    if (dd.expect ("adhoc 1") != nullptr)
+      l4 ([&]{trace << "rule mismatch forcing update of " << t;});
 
-      // For each variable hash its name, undefined/null/non-null indicator,
-      // and the value if non-null.
-      //
-      // Note that this excludes the special $< and $> variables which we
-      // handle below.
-      //
-      {
-        sha256 cs;
-        names storage;
+    // Then the script checksum.
+    //
+    // Ideally, to detect changes to the script semantics, we would hash the
+    // text with all the variables expanded but without executing any
+    // commands. In practice, this is easier said than done (think the set
+    // builtin that receives output of a command that modifies the
+    // filesystem).
+    //
+    // So as the next best thing we are going to hash the unexpanded text as
+    // well as values of all the variables expanded in it (which we get as a
+    // side effect of pre-parsing the script). This approach has a number of
+    // drawbacks:
+    //
+    // - We can't handle computed variable names (e.g., $($x ? X : Y)).
+    //
+    // - We may "overhash" by including variables that are actually
+    //   script-local.
+    //
+    // - There are functions like $install.resolve() with result based on
+    //   external (to the script) information.
+    //
+    if (dd.expect (checksum) != nullptr)
+      l4 ([&]{trace << "recipe text change forcing update of " << t;});
 
-        for (const string& n: script.vars)
-        {
-          cs.append (n);
+    // Track the variables, targets, and prerequisites changes, unless the
+    // script doesn't track the dependency changes itself.
+    //
 
-          lookup l;
+    // For each variable hash its name, undefined/null/non-null indicator,
+    // and the value if non-null.
+    //
+    // Note that this excludes the special $< and $> variables which we
+    // handle below.
+    //
+    if (!script.depdb_clear)
+    {
+      sha256 cs;
+      names storage;
 
-          if (const variable* var = ctx.var_pool.find (n))
-            l = t[var];
+      for (const string& n: script.vars)
+      {
+        cs.append (n);
 
-          cs.append (!l.defined () ? '\x1' : l->null ? '\x2' : '\x3');
+        lookup l;
 
-          if (l)
-          {
-            storage.clear ();
-            names_view ns (reverse (*l, storage));
+        if (const variable* var = ctx.var_pool.find (n))
+          l = t[var];
 
-            for (const name& n: ns)
-              to_checksum (cs, n);
-          }
-        }
+        cs.append (!l.defined () ? '\x1' : l->null ? '\x2' : '\x3');
+
+        if (l)
+        {
+          storage.clear ();
+          names_view ns (reverse (*l, storage));
 
-        if (dd.expect (cs.string ()) != nullptr)
-          l4 ([&]{trace << "recipe variable change forcing update of " << t;});
+          for (const name& n: ns)
+            to_checksum (cs, n);
+        }
       }
 
-      // Target and prerequisite sets ($> and $<).
-      //
-      // How should we hash them? We could hash them as target names (i.e.,
-      // the same as the $>/< content) or as paths (only for path-based
-      // targets). While names feel more general, they are also more expensive
-      // to compute. And for path-based targets, path is generally a good
-      // proxy for the target name. Since the bulk of the ad hoc recipes will
-      // presumably be operating exclusively on path-based targets, let's do
-      // it both ways.
-      //
+      if (dd.expect (cs.string ()) != nullptr)
+        l4 ([&]{trace << "recipe variable change forcing update of " << t;});
+    }
+
+    // Target and prerequisite sets ($> and $<).
+    //
+    // How should we hash them? We could hash them as target names (i.e., the
+    // same as the $>/< content) or as paths (only for path-based targets).
+    // While names feel more general, they are also more expensive to compute.
+    // And for path-based targets, path is generally a good proxy for the
+    // target name. Since the bulk of the ad hoc recipes will presumably be
+    // operating exclusively on path-based targets, let's do it both ways.
+    //
+    if (!script.depdb_clear)
+    {
+      auto hash = [ns = names ()] (sha256& cs, const target& t) mutable
       {
-        auto hash = [ns = names ()] (sha256& cs, const target& t) mutable
+        if (const path_target* pt = t.is_a<path_target> ())
+          cs.append (pt->path ().string ());
+        else
         {
-          if (const path_target* pt = t.is_a<path_target> ())
-            cs.append (pt->path ().string ());
-          else
-          {
-            ns.clear ();
-            t.as_name (ns);
-            for (const name& n: ns)
-              to_checksum (cs, n);
-          }
-        };
+          ns.clear ();
+          t.as_name (ns);
+          for (const name& n: ns)
+            to_checksum (cs, n);
+        }
+      };
 
-        sha256 tcs;
-        for (const target* m (&t); m != nullptr; m = m->adhoc_member)
-          hash (tcs, *m);
+      sha256 tcs;
+      for (const target* m (&t); m != nullptr; m = m->adhoc_member)
+        hash (tcs, *m);
 
-        if (dd.expect (tcs.string ()) != nullptr)
-          l4 ([&]{trace << "target set change forcing update of " << t;});
+      if (dd.expect (tcs.string ()) != nullptr)
+        l4 ([&]{trace << "target set change forcing update of " << t;});
 
-        sha256 pcs;
-        for (const target* pt: t.prerequisite_targets[a])
-          if (pt != nullptr)
-            hash (pcs, *pt);
+      sha256 pcs;
+      for (const target* pt: t.prerequisite_targets[a])
+        if (pt != nullptr)
+          hash (pcs, *pt);
 
-        if (dd.expect (pcs.string ()) != nullptr)
-          l4 ([&]{trace << "prerequisite set change forcing update of " << t;});
-      }
+      if (dd.expect (pcs.string ()) != nullptr)
+        l4 ([&]{trace << "prerequisite set change forcing update of " << t;});
+    }
 
-      // Finally the programs checksum.
-      //
+    // Finally the programs checksum.
+    //
+    if (!script.depdb_clear)
+    {
       if (dd.expect (prog_cs.string ()) != nullptr)
         l4 ([&]{trace << "program checksum change forcing update of " << t;});
     }
 
+    const scope* bs (nullptr);
+    const scope* rs (nullptr);
+
+    // Execute the custom dependency change tracking commands, if present.
+    //
+    if (!script.depdb_lines.empty ())
+    {
+      bs = &t.base_scope ();
+      rs = bs->root_scope ();
+
+      // While it would have been nice to reuse the environment for both
+      // dependency tracking and execution, there are complications (creating
+      // temporary directory, etc).
+      //
+      build::script::environment e (a, t, false /* temp_dir */);
+      build::script::parser p (ctx);
+
+      for (const script::line& l: script.depdb_lines)
+      {
+        names ns (p.execute_special (*rs, *bs, e, l));
+
+        // These should have been enforced during pre-parsing.
+        //
+        assert (!ns.empty ());         //         <cmd> ... <newline>
+        assert (l.tokens.size () > 2); // 'depdb' <cmd> ... <newline>
+
+        const string& cmd (ns[0].value);
+
+        location loc (l.tokens[0].location ());
+
+        if (cmd == "hash")
+        {
+          sha256 cs;
+          for (auto i (ns.begin () + 1); i != ns.end (); ++i) // Skip <cmd>.
+            to_checksum (cs, *i);
+
+          if (dd.expect (cs.string ()) != nullptr)
+            l4 ([&] {
+                diag_record dr (trace);
+                dr << "'depdb hash' argument change forcing update of " << t <<
+                  info (loc); script::dump (dr.os, l);
+              });
+        }
+        else if (cmd == "string")
+        {
+          string s;
+          try
+          {
+            s = convert<string> (names (move_iterator (ns.begin () + 1),
+                                        move_iterator (ns.end ())));
+          }
+          catch (const invalid_argument& e)
+          {
+            fail (l.tokens[2].location ())
+              << "invalid 'depdb string' argument: " << e;
+          }
+
+          if (dd.expect (s) != nullptr)
+            l4 ([&] {
+                diag_record dr (trace);
+                dr << "'depdb string' argument change forcing update of "
+                   << t <<
+                  info (loc); script::dump (dr.os, l);
+              });
+        }
+        else
+          assert (false);
+      }
+    }
+
     // Update if depdb mismatch.
     //
     if (dd.writing () || dd.mtime > mt)
@@ -693,8 +785,11 @@ namespace build2
 
     if (!ctx.dry_run || verb != 0)
     {
-      const scope& bs (t.base_scope ());
-      const scope& rs (*bs.root_scope ());
+      if (bs == nullptr)
+      {
+        bs = &t.base_scope ();
+        rs = bs->root_scope ();
+      }
 
       build::script::environment e (a, t, script.temp_dir);
       build::script::parser p (ctx);
@@ -703,7 +798,7 @@ namespace build2
       {
         if (script.diag_line)
         {
-          text << p.execute_special (rs, bs, e, *script.diag_line);
+          text << p.execute_special (*rs, *bs, e, *script.diag_line);
         }
         else
         {
@@ -723,7 +818,7 @@ namespace build2
       if (!ctx.dry_run || verb >= 2)
       {
         build::script::default_runner r;
-        p.execute (rs, bs, e, script, r);
+        p.execute (*rs, *bs, e, script, r);
 
         if (!ctx.dry_run)
           dd.check_mtime (tp);
@@ -737,7 +832,7 @@ namespace build2
   target_state adhoc_script_rule::
   default_action (action a, const target& t) const
   {
-    tracer trace ("adhoc_rule::default_action");
+    tracer trace ("adhoc_script_rule::default_action");
 
     context& ctx (t.ctx);
 
diff --git a/libbuild2/script/script.cxx b/libbuild2/script/script.cxx
index 7722b47..eb63e53 100644
--- a/libbuild2/script/script.cxx
+++ b/libbuild2/script/script.cxx
@@ -33,13 +33,159 @@ namespace build2
     }
 
     void
-    dump (ostream& os, const string& ind, const lines& ls)
+    dump (ostream& os, const line& ln, bool newline)
     {
-      // For each line print its tokens literal representation trying to
-      // reproduce the quoting. Consider mixed quoting as double quoting
-      // since the information is lost.
+      // Print the line's tokens literal representation trying to reproduce
+      // the quoting. Consider mixed quoting as double quoting since the
+      // information is lost.
+      //
+      const replay_tokens& rts (ln.tokens);
+
+      assert (!rts.empty ());         // ... <newline>
+      const token& ft (rts[0].token);
+
+      // If true, this is a special builtin line.
+      //
+      // Note that special characters set differs for such lines since they
+      // are parsed in the value lexer mode.
       //
-      // Also additionally indent the if-branch lines.
+      bool builtin (ln.type == line_type::cmd   &&
+                    ft.type == token_type::word &&
+                    (ft.value == "diag" || ft.value == "depdb"));
+
+      // '"' or '\'' if we are inside the quoted token sequence and '\0'
+      // otherwise. Thus, can be used as bool.
+      //
+      char qseq ('\0');
+
+      optional<token_type> prev_tt;
+      for (const replay_token& rt: rts)
+      {
+        const token& t (rt.token);
+
+        // '"' or '\'' if the token is quoted and '\0' otherwise. Thus, can be
+        // used as bool.
+        //
+        char qtok ('\0');
+
+        switch (t.qtype)
+        {
+        case quote_type::unquoted: qtok = '\0'; break;
+        case quote_type::single:   qtok = '\''; break;
+        case quote_type::mixed:
+        case quote_type::double_:  qtok = '"';  break;
+        }
+
+        // If being inside a quoted token sequence we have reached a token
+        // quoted differently or the newline, then we probably made a mistake
+        // misinterpreting some previous partially quoted token, for example
+        // f"oo" as "foo. If that's the case, all we can do is to end the
+        // sequence adding the trailing quote.
+        //
+        // Note that a token inside the quoted sequence may well be unquoted,
+        // so for example "$foo" is lexed as:
+        //
+        //   token  quoting  complete  notes
+        //   ''     "        no
+        //   $      "        yes
+        //   'foo'                     Unquoted since lexed in variable mode.
+        //   ''     "        no
+        //   \n
+        //
+        if (qseq &&
+            ((qtok && qtok != qseq) || t.type == token_type::newline))
+        {
+          os << qseq;
+          qseq = '\0';
+        }
+
+        // Left and right token quotes (can be used as bool).
+        //
+        char lq ('\0');
+        char rq ('\0');
+
+        // If the token is quoted, then determine if/which quotes should be
+        // present on its sides and track the quoted token sequence.
+        //
+        if (qtok)
+        {
+          if (t.qcomp) // Complete token quoting.
+          {
+            // If we are inside a quoted token sequence then do noting.
+            // Otherwise just quote the current token not starting a sequence.
+            //
+            if (!qseq)
+            {
+              lq = qtok;
+              rq = qtok;
+            }
+          }
+          else         // Partial token quoting.
+          {
+            // Note that we can not always reproduce the original tokens
+            // representation for partial quoting. For example, the two
+            // following tokens are lexed into the identical token objects:
+            //
+            // "foo
+            // f"oo"
+            //
+            // We will always assume that the partially quoted token either
+            // starts or ends the quoted token sequence. Sometimes this ends
+            // up unexpectedly, but seems there is not much we can do:
+            //
+            // f"oo" "ba"r  ->  "foo bar"
+            //
+            if (!qseq)     // Start quoted sequence.
+            {
+              lq = qtok;
+              qseq = qtok;
+            }
+            else           // End quoted sequence.
+            {
+              rq = qtok;
+              qseq = '\0';
+            }
+          }
+        }
+
+        // Print the space character prior to the separated token, unless it
+        // is a first like token or the newline.
+        //
+        if (t.separated && t.type != token_type::newline && &rt != &rts[0])
+          os << ' ';
+
+        if (lq) os << lq; // Print the left quote, if required.
+
+        // Escape the special characters, unless the token in not a word, is a
+        // variable name, or is single-quoted. Note that the special
+        // characters set depends on whether the word is double-quoted or
+        // unquoted and whether this is a special builtin line or not.
+        //
+        if (t.type == token_type::word &&
+            qtok != '\''               &&
+            prev_tt != token_type::dollar)
+        {
+          for (char c: t.value)
+          {
+            if (strchr (qtok || builtin ? "\\\"" : "|&<>=\\\"", c) != nullptr)
+              os << '\\';
+
+            os << c;
+          }
+        }
+        else if (t.type != token_type::newline || newline)
+          t.printer (os, t, print_mode::raw);
+
+        if (rq) os << rq; // Print the right quote, if required.
+
+        prev_tt = t.type;
+      }
+    }
+
+    void
+    dump (ostream& os, const string& ind, const lines& ls)
+    {
+      // Additionally indent the if-branch lines.
       //
       string if_ind;
 
@@ -79,130 +225,7 @@ namespace build2
         default: break;
         }
 
-        // '"' or '\'' if we are inside the quoted token sequence and '\0'
-        // otherwise. Thus, can be used as bool.
-        //
-        char qseq ('\0');
-
-        for (const replay_token& rt: l.tokens)
-        {
-          const token& t (rt.token);
-
-          // '"' or '\'' if the token is quoted and '\0' otherwise. Thus,
-          // can be used as bool.
-          //
-          char qtok ('\0');
-
-          switch (t.qtype)
-          {
-          case quote_type::unquoted: qtok = '\0'; break;
-          case quote_type::single:   qtok = '\''; break;
-          case quote_type::mixed:
-          case quote_type::double_:  qtok = '"';  break;
-          }
-
-          // If being inside a quoted token sequence we have reached a token
-          // quoted differently or the newline, then we probably made a
-          // mistake misinterpreting some previous partially quoted token, for
-          // example f"oo" as "foo. If that's the case, all we can do is to
-          // end the sequence adding the trailing quote.
-          //
-          // Note that a token inside the quoted sequence may well be
-          // unquoted, so for example "$foo" is lexed as:
-          //
-          //   token  quoting  complete  notes
-          //   ''     "        no
-          //   $      "        yes
-          //   'foo'                     Unquoted since lexed in variable mode.
-          //   ''     "        no
-          //   \n
-          //
-          if (qseq &&
-              ((qtok && qtok != qseq) || t.type == token_type::newline))
-          {
-            os << qseq;
-            qseq = '\0';
-          }
-
-          // Left and right token quotes (can be used as bool).
-          //
-          char lq ('\0');
-          char rq ('\0');
-
-          // If the token is quoted, then determine if/which quotes should be
-          // present on its sides and track the quoted token sequence.
-          //
-          if (qtok)
-          {
-            if (t.qcomp) // Complete token quoting.
-            {
-              // If we are inside a quoted token sequence then do noting.
-              // Otherwise just quote the current token not starting a
-              // sequence.
-              //
-              if (!qseq)
-              {
-                lq = qtok;
-                rq = qtok;
-              }
-            }
-            else         // Partial token quoting.
-            {
-              // Note that we can not always reproduce the original tokens
-              // representation for partial quoting. For example, the two
-              // following tokens are lexed into the identical token objects:
-              //
-              // "foo
-              // f"oo"
-              //
-              // We will always assume that the partially quoted token either
-              // starts or ends the quoted token sequence. Sometimes this ends
-              // up unexpectedly, but seems there is not much we can do:
-              //
-              // f"oo" "ba"r  ->  "foo bar"
-              //
-              if (!qseq)     // Start quoted sequence.
-              {
-                lq = qtok;
-                qseq = qtok;
-              }
-              else           // End quoted sequence.
-              {
-                rq = qtok;
-                qseq = '\0';
-              }
-            }
-          }
-
-          // Print the space character prior to the separated token, unless
-          // it is a first like token or the newline.
-          //
-          if (t.separated                   &&
-              t.type != token_type::newline &&
-              &rt != &l.tokens[0])
-            os << ' ';
-
-          if (lq) os << lq; // Print the left quote, if required.
-
-          // Escape the special characters, unless the token in not a word or
-          // is single-quoted. Note that the special character set depends on
-          // whether the word is double-quoted or unquoted.
-          //
-          if (t.type == token_type::word && qtok != '\'')
-          {
-            for (char c: t.value)
-            {
-              if (strchr (qtok ? "\\\"" : "|&<>=\\\"", c) != nullptr)
-                os << '\\';
-
-              os << c;
-            }
-          }
-          else
-            t.printer (os, t, print_mode::raw);
-
-          if (rq) os << rq; // Print the right quote, if required.
-        }
+        dump (os, l, true /* newline */);
       }
     }
 
@@ -215,6 +238,8 @@ namespace build2
     static void
     to_stream_q (ostream& o, const string& s)
     {
+      // NOTE: update dump(line) if adding any new special character.
+      //
       if (s.empty () || s.find_first_of (" |&<>=\\\"") != string::npos)
         o << '\'' << s << '\'';
       else
diff --git a/libbuild2/script/script.hxx b/libbuild2/script/script.hxx
index 891b2f6..31527a0 100644
--- a/libbuild2/script/script.hxx
+++ b/libbuild2/script/script.hxx
@@ -57,6 +57,12 @@ namespace build2
     void
     dump (ostream&, const string& ind, const lines&);
 
+    // As above but print a single line and without the trailing newline token
+    // by default.
+    //
+    void
+    dump (ostream&, const line&, bool newline = false);
+
     // Parse object model.
     //
author	Karen Arutyunov <karen@codesynthesis.com>	2020-06-04 23:01:58 +0300
committer	Karen Arutyunov <karen@codesynthesis.com>	2020-06-05 17:35:24 +0300
commit	fb56fc798110c8ee9685bec156b21f1f87aca121 (patch)
tree	c92a0d7d764794b1af63227bb8b9e89d036dbb72 /libbuild2
parent	e4a9ccadf751b88f5508ce9f890484bae33d1aaf (diff)