Implement detection of ignorable changes (whitespaces, comments)

author: Boris Kolpackov <boris@codesynthesis.com> 2017-07-22 17:43:09 +0200
committer: Boris Kolpackov <boris@codesynthesis.com> 2017-07-22 17:43:09 +0200
commit: 8b98a6b3fae40487ac529a7118865df6a71159ee (patch)
tree: dcdc0a13ccec81c087ed05a9ebfe5cac3d56b955
parent: ade763571a19b5e222ac626a6b3bc10685e542a1 (diff)
12 files changed, 359 insertions, 182 deletions
diff --git a/build2/algorithm.cxx b/build2/algorithm.cxx
index cbe27b6..fe71d54 100644
--- a/build2/algorithm.cxx
+++ b/build2/algorithm.cxx
@@ -1192,7 +1192,7 @@ namespace build2
 
       // Should we compare the timestamp to this target's?
       //
-      if (!e && (!pf || pf (*pt)))
+      if (!e && (!pf || pf (*pt, i)))
       {
         // If this is an mtime-based target, then compare timestamps.
         //
diff --git a/build2/algorithm.hxx b/build2/algorithm.hxx
index 5b52069..aef72cb 100644
--- a/build2/algorithm.hxx
+++ b/build2/algorithm.hxx
@@ -332,7 +332,7 @@ namespace build2
   // Note that because we use mtime, this function should normally only be
   // used in the perform_update action (which is straight).
   //
-  using prerequisite_filter = function<bool (const target&)>;
+  using prerequisite_filter = function<bool (const target&, size_t pos)>;
 
   optional<target_state>
   execute_prerequisites (action, const target&,
diff --git a/build2/cc/compile.cxx b/build2/cc/compile.cxx
index 76be8e6..2153205 100644
--- a/build2/cc/compile.cxx
+++ b/build2/cc/compile.cxx
@@ -131,14 +131,16 @@ namespace build2
       preprocessed pp = preprocessed::none;
       prerequisite_member src;
       auto_rmfile psrc;                      // Preprocessed source, if any.
+      path dd;                               // Dependency database path.
       timestamp mt = timestamp_unknown;      // Target timestamp.
+      bool touch = false;                    // Target needs to be touched.
       module_positions mods = {0, 0, 0};
     };
 
     compile::
     compile (data&& d)
         : common (move (d)),
-          rule_id (string (x) += ".compile 3")
+          rule_id (string (x) += ".compile 4")
     {
       static_assert (sizeof (compile::match_data) <= target::data_size,
                      "insufficient space");
@@ -679,7 +681,8 @@ namespace build2
           fsdir_rule::perform_update_direct (act, t);
         }
 
-        depdb dd (tp + ".d");
+        md.dd = tp + ".d";
+        depdb dd (md.dd);
 
         // First should come the rule name/version.
         //
@@ -698,43 +701,45 @@ namespace build2
         // The idea is to keep them exactly as they are passed to the compiler
         // since the order may be significant.
         //
-        sha256 cs;
-
-        // These flags affect how we compile the source and/or the format of
-        // depdb so factor them in.
-        //
-        cs.append (&md.pp, sizeof (md.pp));
-        cs.append (&symexport, sizeof (symexport));
-
-        if (md.pp != preprocessed::all)
         {
-          hash_options (cs, t, c_poptions);
-          hash_options (cs, t, x_poptions);
+          sha256 cs;
 
-          // Hash *.export.poptions from prerequisite libraries.
+          // These flags affect how we compile the source and/or the format of
+          // depdb so factor them in.
           //
-          hash_lib_options (bs, cs, t, act, lo);
+          cs.append (&md.pp, sizeof (md.pp));
+          cs.append (&symexport, sizeof (symexport));
 
-          // Extra system header dirs (last).
-          //
-          for (const dir_path& d: sys_inc_dirs)
-            cs.append (d.string ());
-        }
+          if (md.pp != preprocessed::all)
+          {
+            hash_options (cs, t, c_poptions);
+            hash_options (cs, t, x_poptions);
 
-        hash_options (cs, t, c_coptions);
-        hash_options (cs, t, x_coptions);
-        hash_options (cs, tstd);
+            // Hash *.export.poptions from prerequisite libraries.
+            //
+            hash_lib_options (bs, cs, t, act, lo);
 
-        if (ct == otype::s)
-        {
-          // On Darwin, Win32 -fPIC is the default.
-          //
-          if (tclass == "linux" || tclass == "bsd")
-            cs.append ("-fPIC");
-        }
+            // Extra system header dirs (last).
+            //
+            for (const dir_path& d: sys_inc_dirs)
+              cs.append (d.string ());
+          }
+
+          hash_options (cs, t, c_coptions);
+          hash_options (cs, t, x_coptions);
+          hash_options (cs, tstd);
+
+          if (ct == otype::s)
+          {
+            // On Darwin, Win32 -fPIC is the default.
+            //
+            if (tclass == "linux" || tclass == "bsd")
+              cs.append ("-fPIC");
+          }
 
-        if (dd.expect (cs.string ()) != nullptr)
-          l4 ([&]{trace << "options mismatch forcing update of " << t;});
+          if (dd.expect (cs.string ()) != nullptr)
+            l4 ([&]{trace << "options mismatch forcing update of " << t;});
+        }
 
         // Finally the source file.
         //
@@ -743,7 +748,7 @@ namespace build2
 
         // If any of the above checks resulted in a mismatch (different
         // compiler, options, or source file) or if the depdb is newer than
-        // the target, then do unconditional update.
+        // the target (interrupted update), then do unconditional update.
         //
         timestamp mt;
         bool u (dd.writing () || dd.mtime () > (mt = file_mtime (tp)));
@@ -787,7 +792,7 @@ namespace build2
         //
         pair<auto_rmfile, bool> psrc (auto_rmfile (), false);
         if (md.pp < preprocessed::includes)
-          psrc = extract_headers (act, t, lo, src, md, dd, u);
+          psrc = extract_headers (act, t, lo, src, md, dd, u, mt);
 
         // Next we "obtain" the translation unit information. What exactly
         // "obtain" entails is tricky: If things changed, then we re-parse the
@@ -795,53 +800,80 @@ namespace build2
         // depdb. We, however, have to do it here and now in case the database
         // is invalid and we still have to fallback to re-parse.
         //
-        translation_unit tu;
-        for (bool f (true);; f = false)
+        // Store a translation unit's checksum to detect ignorable changes
+        // (whitespaces, comments, etc).
+        //
         {
-          if (u)
-            tu = parse_unit (act, t, lo, src, psrc.first, md);
+          string cs;
+          if (string* l = dd.read ())
+            cs = move (*l);
+          else
+            u = true; // Database is invalid, force re-parse.
 
-          if (modules)
+          translation_unit tu;
+          for (bool f (true);; f = false)
           {
             if (u)
             {
-              string s (to_string (tu.mod));
+              auto p (parse_unit (act, t, lo, src, psrc.first, md));
 
-              if (f)
-                dd.expect (s);
-              else
-                dd.write (s);
+              if (cs != p.second)
+              {
+                assert (f); // Unchanged TU has a different checksum?
+                dd.write (p.second);
+              }
+              else if (f) // Don't clear if it was forced.
+              {
+                // Clear the update flag and set the touch flag. See also
+                // the md.mt logic below.
+                //
+                u = false;
+                md.touch = true;
+              }
+
+              tu = move (p.first);
             }
-            else
+
+            if (modules)
             {
-              if (string* l = dd.read ())
-                tu.mod = to_module_info (*l);
+              if (u || !f)
+              {
+                string s (to_string (tu.mod));
+
+                if (f)
+                  dd.expect (s);
+                else
+                  dd.write (s);
+              }
               else
               {
-                // Database is invalid, re-parse.
-                //
-                u = true;
-                continue;
+                if (string* l = dd.read ())
+                  tu.mod = to_module_info (*l);
+                else
+                {
+                  u = true; // Database is invalid, force re-parse.
+                  continue;
+                }
               }
             }
-          }
 
-          break;
-        }
+            break;
+          }
 
-        md.type = tu.type ();
+          md.type = tu.type ();
 
-        // Extract the module dependency information in addition to header
-        // dependencies.
-        //
-        // NOTE: assumes that no further targets will be added into
-        //       t.prerequisite_targets!
-        //
-        extract_modules (act, t, lo, tt, src, md, move (tu.mod), dd, u);
+          // Extract the module dependency information in addition to header
+          // dependencies.
+          //
+          // NOTE: assumes that no further targets will be added into
+          //       t.prerequisite_targets!
+          //
+          extract_modules (act, t, lo, tt, src, md, move (tu.mod), dd, u);
+        }
 
         // If anything got updated, then we didn't rely on the cache. However,
         // the cached data could actually have been valid and the compiler run
-        // in extract_headers() merely validated it.
+        // in extract_headers() as well as the code above merely validated it.
         //
         // We do need to update the database timestamp, however. Failed that,
         // we will keep re-validating the cached data over and over again.
@@ -867,16 +899,27 @@ namespace build2
           // compiling the original source would break distributed
           // compilation.
           //
-          // Note also that the long term trend will be for modularized
-          // projects to get rid of #include's which means the need for
-          // producing this partially preprocessed output will hopefully
-          // gradually disappear.
+          // Note also that the long term trend will (hopefully) be for
+          // modularized projects to get rid of #include's which means the
+          // need for producing this partially preprocessed output will
+          // (hopefully) gradually disappear.
           //
           if (modules)
             md.psrc.active = false; // Keep.
         }
 
-        md.mt = u ? timestamp_nonexistent : mt;
+        // Above we may have ignored changes to the translation unit. The
+        // problem is, unless we also update the target's timestamp, we will
+        // keep re-checking this on subsequent runs and it is not cheap.
+        // Updating the target's timestamp is not without problems either: it
+        // will cause a re-link on a subsequent run. So, essentially, we
+        // somehow need to remember two timestamps: one for checking
+        // "preprocessor prerequisites" above and one for checking other
+        // prerequisites (like modules) below. So what we are going to do is
+        // store the first in the target file (so we do touch it) and the
+        // second in depdb (which is never newer that the target).
+        //
+        md.mt = u ? timestamp_nonexistent : dd.mtime ();
       }
 
       switch (act)
@@ -1227,7 +1270,8 @@ namespace build2
                      const file& src,
                      const match_data& md,
                      depdb& dd,
-                     bool& updating) const
+                     bool& updating,
+                     timestamp mt) const
     {
       tracer trace (x, "compile::extract_headers");
 
@@ -1656,8 +1700,10 @@ namespace build2
       // from the depdb cache or from the compiler run. Return whether the
       // extraction process should be restarted.
       //
-      auto add = [&trace, &pm, act, &t, lo, &dd, &updating, &bs, &rels, this]
-        (path f, bool cache) -> bool
+      auto add = [&trace, &pm,
+                  act, &t, lo,
+                  &dd, &updating, mt,
+                  &bs, &rels, this] (path f, bool cache) -> bool
       {
         // Find or maybe insert the target.
         //
@@ -1859,12 +1905,11 @@ namespace build2
         // Update.
         //
         // If this header came from the depdb, make sure it is no older than
-        // the db itself (if it has changed since the db was written, then
-        // chances are the cached data is stale).
+        // the target (if it has changed since the target was updated, then
+        // the cached data is stale).
         //
         bool restart (
-          update (
-            trace, act, *pt, cache ? dd.mtime () : timestamp_unknown));
+          update (trace, act, *pt, cache ? mt : timestamp_unknown));
 
         updating = updating || restart;
 
@@ -2273,7 +2318,7 @@ namespace build2
       return make_pair (move (psrc), puse);
     }
 
-    translation_unit compile::
+    pair<translation_unit, string> compile::
     parse_unit (action act,
                 file& t,
                 lorder lo,
@@ -2433,7 +2478,8 @@ namespace build2
 
           // Use binary mode to obtain consistent positions.
           //
-          ifdstream is (move (pr.in_ofd), fdstream_mode::skip);
+          ifdstream is (move (pr.in_ofd),
+                        fdstream_mode::binary | fdstream_mode::skip);
 
           parser p;
           translation_unit tu (p.parse (is, rels));
@@ -2480,7 +2526,7 @@ namespace build2
                 tu.mod.iface = true;
             }
 
-            return tu;
+            return pair<translation_unit, string> (move (tu), p.checksum);
           }
 
           // Fall through.
@@ -3223,6 +3269,7 @@ namespace build2
     perform_update (action act, const target& xt) const
     {
       const file& t (xt.as<file> ());
+      const path& tp (t.path ());
 
       match_data md (move (t.data<match_data> ()));
       bool mod (md.type == translation_type::module_iface);
@@ -3235,15 +3282,26 @@ namespace build2
         execute_prerequisites<file> (
           (mod ? *x_mod : x_src),
           act, t,
-          md.mt, nullptr,
+          md.mt,
+          [s = md.mods.start] (const target&, size_t i)
+          {
+            return s != 0 && i >= s; // Only compare timestamps for modules.
+          },
           md.mods.copied)); // See search_modules() for details.
 
       if (pr.first)
       {
+        if (md.touch)
+          touch (tp, false, 2);
+
         t.mtime (md.mt);
         return *pr.first;
       }
 
+      // Make sure depdb is no older than any of our prerequisites.
+      //
+      touch (md.dd, false, verb_never);
+
       const file& s (pr.second);
 
       const scope& bs (t.base_scope ());
@@ -3266,11 +3324,11 @@ namespace build2
       path relo, relm;
       if (mod)
       {
-        relm = relative (t.path ());
+        relm = relative (tp);
         relo = relative (t.member->is_a<file> ()->path ());
       }
       else
-        relo = relative (t.path ());
+        relo = relative (tp);
 
       // Build the command line.
       //
diff --git a/build2/cc/compile.hxx b/build2/cc/compile.hxx
index 58b6e13..7bf182e 100644
--- a/build2/cc/compile.hxx
+++ b/build2/cc/compile.hxx
@@ -102,9 +102,9 @@ namespace build2
       pair<auto_rmfile, bool>
       extract_headers (action, file&, lorder,
                        const file&, const match_data&,
-                       depdb&, bool&) const;
+                       depdb&, bool&, timestamp) const;
 
-      translation_unit
+      pair<translation_unit, string>
       parse_unit (action, file&, lorder,
                   const file&, auto_rmfile&, const match_data&) const;
 
diff --git a/build2/cc/lexer.cxx b/build2/cc/lexer.cxx
index ba6ea18..8cabffd 100644
--- a/build2/cc/lexer.cxx
+++ b/build2/cc/lexer.cxx
@@ -25,34 +25,6 @@ namespace build2
 {
   namespace cc
   {
-    inline void lexer::
-    get (const xchar& c)
-    {
-      // Increment the logical line similar to how base will increment the
-      // physical (the column counts are the same).
-      //
-      if (log_line_ && c == '\n' && !unget_ && !unpeek_)
-        ++*log_line_;
-
-      base::get (c);
-    }
-
-    inline auto lexer::
-    get (bool e) -> xchar
-    {
-      if (unget_)
-      {
-        unget_ = false;
-        return ungetc_;
-      }
-      else
-      {
-        xchar c (peek (e));
-        get (c);
-        return c;
-      }
-    }
-
     auto lexer::
     peek (bool e) -> xchar
     {
@@ -85,6 +57,49 @@ namespace build2
       return c;
     }
 
+    inline auto lexer::
+    get (bool e) -> xchar
+    {
+      if (unget_)
+      {
+        unget_ = false;
+        return ungetc_;
+      }
+      else
+      {
+        xchar c (peek (e));
+        get (c);
+        return c;
+      }
+    }
+
+    inline void lexer::
+    get (const xchar& c)
+    {
+      // Increment the logical line similar to how base will increment the
+      // physical (the column counts are the same).
+      //
+      if (log_line_ && c == '\n' && !unget_ && !unpeek_)
+        ++*log_line_;
+
+      base::get (c);
+    }
+
+    inline auto lexer::
+    geth (bool e) -> xchar
+    {
+      xchar c (get (e));
+      cs_.append (c);
+      return c;
+    }
+
+    inline void lexer::
+    geth (const xchar& c)
+    {
+      get (c);
+      cs_.append (c);
+    }
+
     using type = token_type;
 
     void lexer::
@@ -93,7 +108,7 @@ namespace build2
       for (;; c = skip_spaces ())
       {
         t.file = log_file_;
-        t.line = log_line_ ? * log_line_ : c.line;
+        t.line = log_line_ ? *log_line_ : c.line;
         t.column = c.column;
 
         if (eos (c))
@@ -104,6 +119,23 @@ namespace build2
 
         const location l (&name_, c.line, c.column);
 
+        // Hash the token's line. The reason is debug info. In fact, doing
+        // this will make quite a few "noop" changes (like adding a newline
+        // anywhere in the source) cause the checksum change. But there
+        // doesn't seem to be any way around it: the case where we benefit
+        // from the precise change detection the most (development) is also
+        // where we will most likely have debug info enable.
+        //
+        // Note that in order not to make this completely useless we don't
+        // hash the column. Even if it is part of the debug info, having it a
+        // bit off shouldn't cause any significant mis-positioning. We also
+        // don't hash the file path for each token instead only hashing it
+        // when changed with the #line directive (as well as in the
+        // constructor for the initial path).
+        //
+        cs_.append (t.line);
+        cs_.append (c);
+
         switch (c)
         {
           // Preprocessor lines.
@@ -112,12 +144,14 @@ namespace build2
           {
             // It is tempting to simply scan until the newline ignoring
             // anything in between. However, these lines can start a
-            // multi-line C-style comment. So we have to tokenize them.
+            // multi-line C-style comment. So we have to tokenize them (and
+            // hash the data for each token).
             //
             // Note that this may not work for things like #error that can
             // contain pretty much anything. Also note that lines that start
             // with '#' can contain '#' further down. In this case we need to
-            // be careful not to recurse (and consume multiple newlines).
+            // be careful not to recurse (and consume multiple newlines). Thus
+            // the ignore_pp flag.
             //
             // Finally, to support diagnostics properly we need to recognize
             // #line directives.
@@ -206,7 +240,7 @@ namespace build2
 
             if (p == '*')
             {
-              get (p);
+              geth (p);
               t.type = type::punctuation;
               return;
             }
@@ -218,10 +252,13 @@ namespace build2
             else if (p == '.')
             {
               get (p);
+
               xchar q (peek ());
               if (q == '.')
               {
-                get (q);
+                cs_.append (p);
+
+                geth (q);
                 t.type = type::punctuation;
                 return;
               }
@@ -242,7 +279,7 @@ namespace build2
             xchar p (peek ());
 
             if (p == '=')
-              get (p);
+              geth (p);
 
             t.type = type::punctuation;
             return;
@@ -254,12 +291,12 @@ namespace build2
 
             if (p == c)
             {
-              get (p);
+              geth (p);
               if ((p = peek ()) == '=')
-                get (p);
+                geth (p);
             }
             else if (p == '=')
-              get (p);
+              geth (p);
 
             t.type = type::punctuation;
             return;
@@ -269,15 +306,13 @@ namespace build2
           {
             xchar p (peek ());
 
-            if (p == c)
-              get (p);
-            else if (p == '=')
-              get (p);
+            if (p == c || p == '=')
+              geth (p);
             else if (c == '-' && p == '>')
             {
-              get (p);
+              geth (p);
               if ((p = peek ()) == '*')
-                get (p);
+                geth (p);
             }
 
             t.type = type::punctuation;
@@ -288,10 +323,8 @@ namespace build2
           {
             xchar p (peek ());
 
-            if (p == c)
-              get (p);
-            else if (p == '=')
-              get (p);
+            if (p == c || p == '=')
+              geth (p);
 
             t.type = type::punctuation;
             return;
@@ -301,7 +334,7 @@ namespace build2
             xchar p (peek ());
 
             if (p == ':')
-              get (p);
+              geth (p);
 
             t.type = type::punctuation;
             return;
@@ -340,7 +373,7 @@ namespace build2
               string& id (t.value);
               id.clear ();
 
-              for (id += c; (c = peek ()) == '_' || alnum (c); get (c))
+              for (id += c; (c = peek ()) == '_' || alnum (c); geth (c))
                 id += c;
 
               // If the following character is a quote, see if the identifier
@@ -382,7 +415,7 @@ namespace build2
 
                 if (i == n) // All characters "consumed".
                 {
-                  get (c);
+                  geth (c);
                   id.clear ();
                 }
               }
@@ -423,6 +456,8 @@ namespace build2
     void lexer::
     number_literal (token& t, xchar c)
     {
+      // note: c is hashed
+
       // A number (integer or floating point literal) can:
       //
       // 1. Start with a dot (which must be followed by a digit, e.g., .123).
@@ -500,10 +535,10 @@ namespace build2
         case 'p':
         case 'P':
           {
-            get (c);
+            geth (c);
             c = peek ();
             if (c == '+' || c == '-')
-              get (c);
+              geth (c);
             continue;
           }
 
@@ -512,7 +547,7 @@ namespace build2
         case '\'':
         default: // Digits and letters.
           {
-            get (c);
+            geth (c);
             continue;
           }
         }
@@ -526,11 +561,13 @@ namespace build2
     void lexer::
     char_literal (token& t, xchar c)
     {
+      // note: c is hashed
+
       const location l (&name_, c.line, c.column);
 
       for (char p (c);;) // Previous character (see below).
       {
-        c = get ();
+        c = geth ();
 
         if (eos (c) || c == '\n')
           fail (l) << "unterminated character literal";
@@ -555,11 +592,13 @@ namespace build2
     void lexer::
     string_literal (token& t, xchar c)
     {
+      // note: c is hashed
+
       const location l (&name_, c.line, c.column);
 
       for (char p (c);;) // Previous character (see below).
       {
-        c = get ();
+        c = geth ();
 
         if (eos (c) || c == '\n')
           fail (l) << "unterminated string literal";
@@ -584,6 +623,8 @@ namespace build2
     void lexer::
     raw_string_literal (token& t, xchar c)
     {
+      // note: c is hashed
+
       // The overall form is:
       //
       // R"<delimiter>(<raw_characters>)<delimiter>"
@@ -603,7 +644,7 @@ namespace build2
 
       for (;;)
       {
-        c = get ();
+        c = geth ();
 
         if (eos (c) || c == '\"' || c == ')' || c == '\\' || c == ' ')
           fail (l) << "invalid raw string literal";
@@ -621,7 +662,7 @@ namespace build2
       //
       for (size_t i (0);;) // Position to match in d.
       {
-        c = get (false); // No newline escaping.
+        c = geth (false); // No newline escaping.
 
         if (eos (c)) // Note: newline is ok.
           fail (l) << "invalid raw string literal";
@@ -647,9 +688,11 @@ namespace build2
     void lexer::
     literal_suffix (xchar c)
     {
+      // note: c is unhashed
+
       // Parse a user-defined literal suffix identifier.
       //
-      for (get (c); (c = peek ()) == '_' || alnum (c); get (c)) ;
+      for (geth (c); (c = peek ()) == '_' || alnum (c); geth (c)) ;
     }
 
     void lexer::
@@ -657,12 +700,17 @@ namespace build2
     {
       // enter: first digit of the line number
       // leave: last character of the line number or file string
+      // note:  c is unhashed
 
       // If our number and string tokens contained the literal values, then we
       // could have used that. However, we ignore the value (along with escape
       // processing, etc), for performance. Let's keep it that way and instead
       // handle it ourselves.
       //
+      // Note also that we are not hashing these at the character level
+      // instead hashing the switch to a new file path below and leaving the
+      // line number to the token line hashing.
+      //
       {
         string& s (t.value);
 
@@ -726,6 +774,65 @@ namespace build2
         }
 
         log_file_ = path (move (s)); // Move back in.
+
+        // If the path is relative, then prefix it with the current working
+        // directory. Failed that, we will end up with different checksums for
+        // invocations from different directories.
+        //
+        // While this should work fine for normal cross-compilation, it's an
+        // entirely different story for the emulated case (e.g., msvc-linux
+        // where the preprocessed output contains absolute Windows paths). So
+        // we try to sense if things look fishy and leave the path alone.
+        //
+        // Also detect special names like <built-in> and <command-line>. Plus
+        // GCC sometimes adds what looks like working directory (has trailing
+        // slash). So ignore that as well.
+        //
+        if (!log_file_.to_directory ())
+        {
+          using tr = path::traits;
+          const string& f (log_file_.string ());
+
+          if (f.find (':') != string::npos            ||
+              (f.front () == '<' && f.back () == '>') ||
+              log_file_.absolute ())
+            cs_.append (f);
+          else
+          {
+            // This gets complicated and slow: the path may contain '..' and
+            // '.'  so strictly speaking we would need to normalize it.
+            // Instead, we are going to handle leading '..'s ourselves (the
+            // sane case) and ignore everything else (so if you have '..'  or
+            // '.' somewhere in the middle, then things might not work
+            // optimally for you).
+            //
+            const string& d (work.string ());
+
+            // Iterate over leading '..' in f "popping" the corresponding
+            // number of trailing components from d.
+            //
+            size_t fp (0);
+            size_t dp (d.size () - 1);
+
+            for (size_t p;; )
+            {
+              // Note that in file we recognize any directory separator, not
+              // just of this platform (see note about emulation above).
+              //
+              if (f.compare (fp, 2, "..") != 0  ||
+                  (f[fp + 2] != '/' && f[fp + 2] != '\\') || // Could be '\0'.
+                  (p = tr::rfind_separator (d, dp)) == string::npos)
+                break;
+
+              fp += 3;
+              dp = p - 1;
+            }
+
+            cs_.append (d.c_str (), dp + 1);
+            cs_.append (tr::directory_separator); // Canonical in work.
+            cs_.append (f.c_str () + fp);
+          }
+        }
       }
       else
         unget (c);
diff --git a/build2/cc/lexer.hxx b/build2/cc/lexer.hxx
index 65e9012..1869344 100644
--- a/build2/cc/lexer.hxx
+++ b/build2/cc/lexer.hxx
@@ -5,6 +5,7 @@
 #ifndef BUILD2_CC_LEXER_HXX
 #define BUILD2_CC_LEXER_HXX
 
+#include <libbutl/sha256.hxx>
 #include <libbutl/char-scanner.hxx>
 
 #include <build2/types.hxx>
@@ -27,6 +28,10 @@ namespace build2
     // saved from literals. The #line directive (and its shorthand notation)
     // is recognized to provide the logical token location.
     //
+    // While at it we also calculate the checksum of the input ignoring
+    // comments, whitespaces, etc. This is used to detect changes that do not
+    // alter the resulting token stream.
+    //
     enum class token_type
     {
       // NOTE: remember to update operator<<() if changing anything here!
@@ -82,6 +87,9 @@ namespace build2
       const path&
       name () const {return name_;}
 
+      string
+      checksum () const {return cs_.string ();}
+
       // Note that it is ok to call next() again after getting eos.
       //
       token
@@ -137,13 +145,21 @@ namespace build2
       using base = char_scanner;
 
       xchar
+      peek (bool escape = true);
+
+      xchar
       get (bool escape = true);
 
       void
       get (const xchar& peeked);
 
+      // Hashing versions.
+      //
       xchar
-      peek (bool escape = true);
+      geth (bool escape = true);
+
+      void
+      geth (const xchar& peeked);
 
     private:
       const path name_;
@@ -154,6 +170,8 @@ namespace build2
       //
       path               log_file_;
       optional<uint64_t> log_line_;
+
+      sha256 cs_;
     };
 
     // Diagnostics plumbing.
diff --git a/build2/cc/parser.cxx b/build2/cc/parser.cxx
index df2e257..a97a98b 100644
--- a/build2/cc/parser.cxx
+++ b/build2/cc/parser.cxx
@@ -129,6 +129,7 @@ namespace build2
       if (bb != 0)
         /*warn*/ fail (t) << "{}-imbalance detected";
 
+      checksum = l.checksum ();
       return u;
     }
 
diff --git a/build2/cc/parser.hxx b/build2/cc/parser.hxx
index d542d57..9142001 100644
--- a/build2/cc/parser.hxx
+++ b/build2/cc/parser.hxx
@@ -38,6 +38,7 @@ namespace build2
       parse_module_name (token&);
 
     public:
+      string checksum;     // Translation unit checksum.
       uint64_t export_pos; // Temporary hack, see parse_unit().
 
     private:
diff --git a/build2/diagnostics.hxx b/build2/diagnostics.hxx
index 1e76099..1b8f370 100644
--- a/build2/diagnostics.hxx
+++ b/build2/diagnostics.hxx
@@ -60,6 +60,7 @@ namespace build2
   // While uint8 is more than enough, use uint16 for the ease of printing.
   //
   extern uint16_t verb;
+  const  uint16_t verb_never = 7;
 
   template <typename F> inline void l1 (const F& f) {if (verb >= 1) f ();}
   template <typename F> inline void l2 (const F& f) {if (verb >= 2) f ();}
diff --git a/build2/filesystem.cxx b/build2/filesystem.cxx
index 9d9b3b6..eefaf99 100644
--- a/build2/filesystem.cxx
+++ b/build2/filesystem.cxx
@@ -11,6 +11,22 @@ using namespace butl;
 
 namespace build2
 {
+  bool
+  touch (const path& p, bool create, uint16_t v)
+  {
+    if (verb >= v)
+      text << "touch " << p;
+
+    try
+    {
+      return touch_file (p, create);
+    }
+    catch (const system_error& e)
+    {
+      fail << "unable to touch file " << p << ": " << e << endf;
+    }
+  }
+
   fs_status<mkdir_status>
   mkdir (const dir_path& d, uint16_t v)
   {
diff --git a/build2/filesystem.hxx b/build2/filesystem.hxx
index 79633af..4ef4caf 100644
--- a/build2/filesystem.hxx
+++ b/build2/filesystem.hxx
@@ -30,6 +30,14 @@ namespace build2
     explicit operator bool () const {return v == T::success;}
   };
 
+  // Set the file access and modification times to the current time printing
+  // the standard diagnostics starting from the specified verbosity level. If
+  // the file does not exist and create is true, create it and fail otherwise.
+  // Return true if the file was created and false otherwise.
+  //
+  bool
+  touch (const path&, bool create, uint16_t verbosity = 1);
+
   // Create the directory and print the standard diagnostics starting from
   // the specified verbosity level.
   //
diff --git a/build2/test/script/builtin.cxx b/build2/test/script/builtin.cxx
index 2a8150d..ca47ad2 100644
--- a/build2/test/script/builtin.cxx
+++ b/build2/test/script/builtin.cxx
@@ -4,12 +4,6 @@
 
 #include <build2/test/script/builtin.hxx>
 
-#ifndef _WIN32
-#  include <utime.h>
-#else
-#  include <sys/utime.h>
-#endif
-
 #include <locale>
 #include <ostream>
 #include <sstream>
@@ -1505,38 +1499,11 @@ namespace build2
 
             try
             {
-              if (file_exists (p))
-              {
-                // Set the file access and modification times to the current
-                // time. Note that we don't register (implicit) cleanup for an
-                // existing path.
-                //
-#ifndef _WIN32
-                if (utime  (p.string ().c_str (), nullptr) == -1)
-#else
-                if (_utime (p.string ().c_str (), nullptr) == -1)
-#endif
-                  throw_generic_error (errno);
-              }
-              else if (!entry_exists (p))
-              {
-                // Create the file. Assume the file access and modification
-                // times are set to the current time automatically.
-                //
-                try
-                {
-                  fdopen (p, fdopen_mode::out | fdopen_mode::create);
-                }
-                catch (const io_error& e)
-                {
-                  error () << "cannot create file '" << p << "': " << e;
-                }
-
-                if (cleanup)
-                  sp.clean ({cleanup_type::always, p}, true);
-              }
-              else
-                error () << "'" << p << "' exists and is not a file";
+              // Note that we don't register (implicit) cleanup for an
+              // existing path.
+              //
+              if (touch_file (p) && cleanup)
+                sp.clean ({cleanup_type::always, p}, true);
             }
             catch (const system_error& e)
             {
author	Boris Kolpackov <boris@codesynthesis.com>	2017-07-22 17:43:09 +0200
committer	Boris Kolpackov <boris@codesynthesis.com>	2017-07-22 17:43:09 +0200
commit	8b98a6b3fae40487ac529a7118865df6a71159ee (patch)
tree	dcdc0a13ccec81c087ed05a9ebfe5cac3d56b955
parent	ade763571a19b5e222ac626a6b3bc10685e542a1 (diff)