aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2017-07-22 17:43:09 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2017-07-22 17:43:09 +0200
commit8b98a6b3fae40487ac529a7118865df6a71159ee (patch)
treedcdc0a13ccec81c087ed05a9ebfe5cac3d56b955
parentade763571a19b5e222ac626a6b3bc10685e542a1 (diff)
Implement detection of ignorable changes (whitespaces, comments)
-rw-r--r--build2/algorithm.cxx2
-rw-r--r--build2/algorithm.hxx2
-rw-r--r--build2/cc/compile.cxx220
-rw-r--r--build2/cc/compile.hxx4
-rw-r--r--build2/cc/lexer.cxx223
-rw-r--r--build2/cc/lexer.hxx20
-rw-r--r--build2/cc/parser.cxx1
-rw-r--r--build2/cc/parser.hxx1
-rw-r--r--build2/diagnostics.hxx1
-rw-r--r--build2/filesystem.cxx16
-rw-r--r--build2/filesystem.hxx8
-rw-r--r--build2/test/script/builtin.cxx43
12 files changed, 359 insertions, 182 deletions
diff --git a/build2/algorithm.cxx b/build2/algorithm.cxx
index cbe27b6..fe71d54 100644
--- a/build2/algorithm.cxx
+++ b/build2/algorithm.cxx
@@ -1192,7 +1192,7 @@ namespace build2
// Should we compare the timestamp to this target's?
//
- if (!e && (!pf || pf (*pt)))
+ if (!e && (!pf || pf (*pt, i)))
{
// If this is an mtime-based target, then compare timestamps.
//
diff --git a/build2/algorithm.hxx b/build2/algorithm.hxx
index 5b52069..aef72cb 100644
--- a/build2/algorithm.hxx
+++ b/build2/algorithm.hxx
@@ -332,7 +332,7 @@ namespace build2
// Note that because we use mtime, this function should normally only be
// used in the perform_update action (which is straight).
//
- using prerequisite_filter = function<bool (const target&)>;
+ using prerequisite_filter = function<bool (const target&, size_t pos)>;
optional<target_state>
execute_prerequisites (action, const target&,
diff --git a/build2/cc/compile.cxx b/build2/cc/compile.cxx
index 76be8e6..2153205 100644
--- a/build2/cc/compile.cxx
+++ b/build2/cc/compile.cxx
@@ -131,14 +131,16 @@ namespace build2
preprocessed pp = preprocessed::none;
prerequisite_member src;
auto_rmfile psrc; // Preprocessed source, if any.
+ path dd; // Dependency database path.
timestamp mt = timestamp_unknown; // Target timestamp.
+ bool touch = false; // Target needs to be touched.
module_positions mods = {0, 0, 0};
};
compile::
compile (data&& d)
: common (move (d)),
- rule_id (string (x) += ".compile 3")
+ rule_id (string (x) += ".compile 4")
{
static_assert (sizeof (compile::match_data) <= target::data_size,
"insufficient space");
@@ -679,7 +681,8 @@ namespace build2
fsdir_rule::perform_update_direct (act, t);
}
- depdb dd (tp + ".d");
+ md.dd = tp + ".d";
+ depdb dd (md.dd);
// First should come the rule name/version.
//
@@ -698,43 +701,45 @@ namespace build2
// The idea is to keep them exactly as they are passed to the compiler
// since the order may be significant.
//
- sha256 cs;
-
- // These flags affect how we compile the source and/or the format of
- // depdb so factor them in.
- //
- cs.append (&md.pp, sizeof (md.pp));
- cs.append (&symexport, sizeof (symexport));
-
- if (md.pp != preprocessed::all)
{
- hash_options (cs, t, c_poptions);
- hash_options (cs, t, x_poptions);
+ sha256 cs;
- // Hash *.export.poptions from prerequisite libraries.
+ // These flags affect how we compile the source and/or the format of
+ // depdb so factor them in.
//
- hash_lib_options (bs, cs, t, act, lo);
+ cs.append (&md.pp, sizeof (md.pp));
+ cs.append (&symexport, sizeof (symexport));
- // Extra system header dirs (last).
- //
- for (const dir_path& d: sys_inc_dirs)
- cs.append (d.string ());
- }
+ if (md.pp != preprocessed::all)
+ {
+ hash_options (cs, t, c_poptions);
+ hash_options (cs, t, x_poptions);
- hash_options (cs, t, c_coptions);
- hash_options (cs, t, x_coptions);
- hash_options (cs, tstd);
+ // Hash *.export.poptions from prerequisite libraries.
+ //
+ hash_lib_options (bs, cs, t, act, lo);
- if (ct == otype::s)
- {
- // On Darwin, Win32 -fPIC is the default.
- //
- if (tclass == "linux" || tclass == "bsd")
- cs.append ("-fPIC");
- }
+ // Extra system header dirs (last).
+ //
+ for (const dir_path& d: sys_inc_dirs)
+ cs.append (d.string ());
+ }
+
+ hash_options (cs, t, c_coptions);
+ hash_options (cs, t, x_coptions);
+ hash_options (cs, tstd);
+
+ if (ct == otype::s)
+ {
+ // On Darwin, Win32 -fPIC is the default.
+ //
+ if (tclass == "linux" || tclass == "bsd")
+ cs.append ("-fPIC");
+ }
- if (dd.expect (cs.string ()) != nullptr)
- l4 ([&]{trace << "options mismatch forcing update of " << t;});
+ if (dd.expect (cs.string ()) != nullptr)
+ l4 ([&]{trace << "options mismatch forcing update of " << t;});
+ }
// Finally the source file.
//
@@ -743,7 +748,7 @@ namespace build2
// If any of the above checks resulted in a mismatch (different
// compiler, options, or source file) or if the depdb is newer than
- // the target, then do unconditional update.
+ // the target (interrupted update), then do unconditional update.
//
timestamp mt;
bool u (dd.writing () || dd.mtime () > (mt = file_mtime (tp)));
@@ -787,7 +792,7 @@ namespace build2
//
pair<auto_rmfile, bool> psrc (auto_rmfile (), false);
if (md.pp < preprocessed::includes)
- psrc = extract_headers (act, t, lo, src, md, dd, u);
+ psrc = extract_headers (act, t, lo, src, md, dd, u, mt);
// Next we "obtain" the translation unit information. What exactly
// "obtain" entails is tricky: If things changed, then we re-parse the
@@ -795,53 +800,80 @@ namespace build2
// depdb. We, however, have to do it here and now in case the database
// is invalid and we still have to fallback to re-parse.
//
- translation_unit tu;
- for (bool f (true);; f = false)
+ // Store a translation unit's checksum to detect ignorable changes
+ // (whitespaces, comments, etc).
+ //
{
- if (u)
- tu = parse_unit (act, t, lo, src, psrc.first, md);
+ string cs;
+ if (string* l = dd.read ())
+ cs = move (*l);
+ else
+ u = true; // Database is invalid, force re-parse.
- if (modules)
+ translation_unit tu;
+ for (bool f (true);; f = false)
{
if (u)
{
- string s (to_string (tu.mod));
+ auto p (parse_unit (act, t, lo, src, psrc.first, md));
- if (f)
- dd.expect (s);
- else
- dd.write (s);
+ if (cs != p.second)
+ {
+ assert (f); // Unchanged TU has a different checksum?
+ dd.write (p.second);
+ }
+ else if (f) // Don't clear if it was forced.
+ {
+ // Clear the update flag and set the touch flag. See also
+ // the md.mt logic below.
+ //
+ u = false;
+ md.touch = true;
+ }
+
+ tu = move (p.first);
}
- else
+
+ if (modules)
{
- if (string* l = dd.read ())
- tu.mod = to_module_info (*l);
+ if (u || !f)
+ {
+ string s (to_string (tu.mod));
+
+ if (f)
+ dd.expect (s);
+ else
+ dd.write (s);
+ }
else
{
- // Database is invalid, re-parse.
- //
- u = true;
- continue;
+ if (string* l = dd.read ())
+ tu.mod = to_module_info (*l);
+ else
+ {
+ u = true; // Database is invalid, force re-parse.
+ continue;
+ }
}
}
- }
- break;
- }
+ break;
+ }
- md.type = tu.type ();
+ md.type = tu.type ();
- // Extract the module dependency information in addition to header
- // dependencies.
- //
- // NOTE: assumes that no further targets will be added into
- // t.prerequisite_targets!
- //
- extract_modules (act, t, lo, tt, src, md, move (tu.mod), dd, u);
+ // Extract the module dependency information in addition to header
+ // dependencies.
+ //
+ // NOTE: assumes that no further targets will be added into
+ // t.prerequisite_targets!
+ //
+ extract_modules (act, t, lo, tt, src, md, move (tu.mod), dd, u);
+ }
// If anything got updated, then we didn't rely on the cache. However,
// the cached data could actually have been valid and the compiler run
- // in extract_headers() merely validated it.
+ // in extract_headers() as well as the code above merely validated it.
//
// We do need to update the database timestamp, however. Failed that,
// we will keep re-validating the cached data over and over again.
@@ -867,16 +899,27 @@ namespace build2
// compiling the original source would break distributed
// compilation.
//
- // Note also that the long term trend will be for modularized
- // projects to get rid of #include's which means the need for
- // producing this partially preprocessed output will hopefully
- // gradually disappear.
+ // Note also that the long term trend will (hopefully) be for
+ // modularized projects to get rid of #include's which means the
+ // need for producing this partially preprocessed output will
+ // (hopefully) gradually disappear.
//
if (modules)
md.psrc.active = false; // Keep.
}
- md.mt = u ? timestamp_nonexistent : mt;
+ // Above we may have ignored changes to the translation unit. The
+ // problem is, unless we also update the target's timestamp, we will
+ // keep re-checking this on subsequent runs and it is not cheap.
+ // Updating the target's timestamp is not without problems either: it
+ // will cause a re-link on a subsequent run. So, essentially, we
+ // somehow need to remember two timestamps: one for checking
+ // "preprocessor prerequisites" above and one for checking other
+ // prerequisites (like modules) below. So what we are going to do is
+ // store the first in the target file (so we do touch it) and the
+ // second in depdb (which is never newer that the target).
+ //
+ md.mt = u ? timestamp_nonexistent : dd.mtime ();
}
switch (act)
@@ -1227,7 +1270,8 @@ namespace build2
const file& src,
const match_data& md,
depdb& dd,
- bool& updating) const
+ bool& updating,
+ timestamp mt) const
{
tracer trace (x, "compile::extract_headers");
@@ -1656,8 +1700,10 @@ namespace build2
// from the depdb cache or from the compiler run. Return whether the
// extraction process should be restarted.
//
- auto add = [&trace, &pm, act, &t, lo, &dd, &updating, &bs, &rels, this]
- (path f, bool cache) -> bool
+ auto add = [&trace, &pm,
+ act, &t, lo,
+ &dd, &updating, mt,
+ &bs, &rels, this] (path f, bool cache) -> bool
{
// Find or maybe insert the target.
//
@@ -1859,12 +1905,11 @@ namespace build2
// Update.
//
// If this header came from the depdb, make sure it is no older than
- // the db itself (if it has changed since the db was written, then
- // chances are the cached data is stale).
+ // the target (if it has changed since the target was updated, then
+ // the cached data is stale).
//
bool restart (
- update (
- trace, act, *pt, cache ? dd.mtime () : timestamp_unknown));
+ update (trace, act, *pt, cache ? mt : timestamp_unknown));
updating = updating || restart;
@@ -2273,7 +2318,7 @@ namespace build2
return make_pair (move (psrc), puse);
}
- translation_unit compile::
+ pair<translation_unit, string> compile::
parse_unit (action act,
file& t,
lorder lo,
@@ -2433,7 +2478,8 @@ namespace build2
// Use binary mode to obtain consistent positions.
//
- ifdstream is (move (pr.in_ofd), fdstream_mode::skip);
+ ifdstream is (move (pr.in_ofd),
+ fdstream_mode::binary | fdstream_mode::skip);
parser p;
translation_unit tu (p.parse (is, rels));
@@ -2480,7 +2526,7 @@ namespace build2
tu.mod.iface = true;
}
- return tu;
+ return pair<translation_unit, string> (move (tu), p.checksum);
}
// Fall through.
@@ -3223,6 +3269,7 @@ namespace build2
perform_update (action act, const target& xt) const
{
const file& t (xt.as<file> ());
+ const path& tp (t.path ());
match_data md (move (t.data<match_data> ()));
bool mod (md.type == translation_type::module_iface);
@@ -3235,15 +3282,26 @@ namespace build2
execute_prerequisites<file> (
(mod ? *x_mod : x_src),
act, t,
- md.mt, nullptr,
+ md.mt,
+ [s = md.mods.start] (const target&, size_t i)
+ {
+ return s != 0 && i >= s; // Only compare timestamps for modules.
+ },
md.mods.copied)); // See search_modules() for details.
if (pr.first)
{
+ if (md.touch)
+ touch (tp, false, 2);
+
t.mtime (md.mt);
return *pr.first;
}
+ // Make sure depdb is no older than any of our prerequisites.
+ //
+ touch (md.dd, false, verb_never);
+
const file& s (pr.second);
const scope& bs (t.base_scope ());
@@ -3266,11 +3324,11 @@ namespace build2
path relo, relm;
if (mod)
{
- relm = relative (t.path ());
+ relm = relative (tp);
relo = relative (t.member->is_a<file> ()->path ());
}
else
- relo = relative (t.path ());
+ relo = relative (tp);
// Build the command line.
//
diff --git a/build2/cc/compile.hxx b/build2/cc/compile.hxx
index 58b6e13..7bf182e 100644
--- a/build2/cc/compile.hxx
+++ b/build2/cc/compile.hxx
@@ -102,9 +102,9 @@ namespace build2
pair<auto_rmfile, bool>
extract_headers (action, file&, lorder,
const file&, const match_data&,
- depdb&, bool&) const;
+ depdb&, bool&, timestamp) const;
- translation_unit
+ pair<translation_unit, string>
parse_unit (action, file&, lorder,
const file&, auto_rmfile&, const match_data&) const;
diff --git a/build2/cc/lexer.cxx b/build2/cc/lexer.cxx
index ba6ea18..8cabffd 100644
--- a/build2/cc/lexer.cxx
+++ b/build2/cc/lexer.cxx
@@ -25,34 +25,6 @@ namespace build2
{
namespace cc
{
- inline void lexer::
- get (const xchar& c)
- {
- // Increment the logical line similar to how base will increment the
- // physical (the column counts are the same).
- //
- if (log_line_ && c == '\n' && !unget_ && !unpeek_)
- ++*log_line_;
-
- base::get (c);
- }
-
- inline auto lexer::
- get (bool e) -> xchar
- {
- if (unget_)
- {
- unget_ = false;
- return ungetc_;
- }
- else
- {
- xchar c (peek (e));
- get (c);
- return c;
- }
- }
-
auto lexer::
peek (bool e) -> xchar
{
@@ -85,6 +57,49 @@ namespace build2
return c;
}
+ inline auto lexer::
+ get (bool e) -> xchar
+ {
+ if (unget_)
+ {
+ unget_ = false;
+ return ungetc_;
+ }
+ else
+ {
+ xchar c (peek (e));
+ get (c);
+ return c;
+ }
+ }
+
+ inline void lexer::
+ get (const xchar& c)
+ {
+ // Increment the logical line similar to how base will increment the
+ // physical (the column counts are the same).
+ //
+ if (log_line_ && c == '\n' && !unget_ && !unpeek_)
+ ++*log_line_;
+
+ base::get (c);
+ }
+
+ inline auto lexer::
+ geth (bool e) -> xchar
+ {
+ xchar c (get (e));
+ cs_.append (c);
+ return c;
+ }
+
+ inline void lexer::
+ geth (const xchar& c)
+ {
+ get (c);
+ cs_.append (c);
+ }
+
using type = token_type;
void lexer::
@@ -93,7 +108,7 @@ namespace build2
for (;; c = skip_spaces ())
{
t.file = log_file_;
- t.line = log_line_ ? * log_line_ : c.line;
+ t.line = log_line_ ? *log_line_ : c.line;
t.column = c.column;
if (eos (c))
@@ -104,6 +119,23 @@ namespace build2
const location l (&name_, c.line, c.column);
+ // Hash the token's line. The reason is debug info. In fact, doing
+ // this will make quite a few "noop" changes (like adding a newline
+ // anywhere in the source) cause the checksum change. But there
+ // doesn't seem to be any way around it: the case where we benefit
+ // from the precise change detection the most (development) is also
+ // where we will most likely have debug info enable.
+ //
+ // Note that in order not to make this completely useless we don't
+ // hash the column. Even if it is part of the debug info, having it a
+ // bit off shouldn't cause any significant mis-positioning. We also
+ // don't hash the file path for each token instead only hashing it
+ // when changed with the #line directive (as well as in the
+ // constructor for the initial path).
+ //
+ cs_.append (t.line);
+ cs_.append (c);
+
switch (c)
{
// Preprocessor lines.
@@ -112,12 +144,14 @@ namespace build2
{
// It is tempting to simply scan until the newline ignoring
// anything in between. However, these lines can start a
- // multi-line C-style comment. So we have to tokenize them.
+ // multi-line C-style comment. So we have to tokenize them (and
+ // hash the data for each token).
//
// Note that this may not work for things like #error that can
// contain pretty much anything. Also note that lines that start
// with '#' can contain '#' further down. In this case we need to
- // be careful not to recurse (and consume multiple newlines).
+ // be careful not to recurse (and consume multiple newlines). Thus
+ // the ignore_pp flag.
//
// Finally, to support diagnostics properly we need to recognize
// #line directives.
@@ -206,7 +240,7 @@ namespace build2
if (p == '*')
{
- get (p);
+ geth (p);
t.type = type::punctuation;
return;
}
@@ -218,10 +252,13 @@ namespace build2
else if (p == '.')
{
get (p);
+
xchar q (peek ());
if (q == '.')
{
- get (q);
+ cs_.append (p);
+
+ geth (q);
t.type = type::punctuation;
return;
}
@@ -242,7 +279,7 @@ namespace build2
xchar p (peek ());
if (p == '=')
- get (p);
+ geth (p);
t.type = type::punctuation;
return;
@@ -254,12 +291,12 @@ namespace build2
if (p == c)
{
- get (p);
+ geth (p);
if ((p = peek ()) == '=')
- get (p);
+ geth (p);
}
else if (p == '=')
- get (p);
+ geth (p);
t.type = type::punctuation;
return;
@@ -269,15 +306,13 @@ namespace build2
{
xchar p (peek ());
- if (p == c)
- get (p);
- else if (p == '=')
- get (p);
+ if (p == c || p == '=')
+ geth (p);
else if (c == '-' && p == '>')
{
- get (p);
+ geth (p);
if ((p = peek ()) == '*')
- get (p);
+ geth (p);
}
t.type = type::punctuation;
@@ -288,10 +323,8 @@ namespace build2
{
xchar p (peek ());
- if (p == c)
- get (p);
- else if (p == '=')
- get (p);
+ if (p == c || p == '=')
+ geth (p);
t.type = type::punctuation;
return;
@@ -301,7 +334,7 @@ namespace build2
xchar p (peek ());
if (p == ':')
- get (p);
+ geth (p);
t.type = type::punctuation;
return;
@@ -340,7 +373,7 @@ namespace build2
string& id (t.value);
id.clear ();
- for (id += c; (c = peek ()) == '_' || alnum (c); get (c))
+ for (id += c; (c = peek ()) == '_' || alnum (c); geth (c))
id += c;
// If the following character is a quote, see if the identifier
@@ -382,7 +415,7 @@ namespace build2
if (i == n) // All characters "consumed".
{
- get (c);
+ geth (c);
id.clear ();
}
}
@@ -423,6 +456,8 @@ namespace build2
void lexer::
number_literal (token& t, xchar c)
{
+ // note: c is hashed
+
// A number (integer or floating point literal) can:
//
// 1. Start with a dot (which must be followed by a digit, e.g., .123).
@@ -500,10 +535,10 @@ namespace build2
case 'p':
case 'P':
{
- get (c);
+ geth (c);
c = peek ();
if (c == '+' || c == '-')
- get (c);
+ geth (c);
continue;
}
@@ -512,7 +547,7 @@ namespace build2
case '\'':
default: // Digits and letters.
{
- get (c);
+ geth (c);
continue;
}
}
@@ -526,11 +561,13 @@ namespace build2
void lexer::
char_literal (token& t, xchar c)
{
+ // note: c is hashed
+
const location l (&name_, c.line, c.column);
for (char p (c);;) // Previous character (see below).
{
- c = get ();
+ c = geth ();
if (eos (c) || c == '\n')
fail (l) << "unterminated character literal";
@@ -555,11 +592,13 @@ namespace build2
void lexer::
string_literal (token& t, xchar c)
{
+ // note: c is hashed
+
const location l (&name_, c.line, c.column);
for (char p (c);;) // Previous character (see below).
{
- c = get ();
+ c = geth ();
if (eos (c) || c == '\n')
fail (l) << "unterminated string literal";
@@ -584,6 +623,8 @@ namespace build2
void lexer::
raw_string_literal (token& t, xchar c)
{
+ // note: c is hashed
+
// The overall form is:
//
// R"<delimiter>(<raw_characters>)<delimiter>"
@@ -603,7 +644,7 @@ namespace build2
for (;;)
{
- c = get ();
+ c = geth ();
if (eos (c) || c == '\"' || c == ')' || c == '\\' || c == ' ')
fail (l) << "invalid raw string literal";
@@ -621,7 +662,7 @@ namespace build2
//
for (size_t i (0);;) // Position to match in d.
{
- c = get (false); // No newline escaping.
+ c = geth (false); // No newline escaping.
if (eos (c)) // Note: newline is ok.
fail (l) << "invalid raw string literal";
@@ -647,9 +688,11 @@ namespace build2
void lexer::
literal_suffix (xchar c)
{
+ // note: c is unhashed
+
// Parse a user-defined literal suffix identifier.
//
- for (get (c); (c = peek ()) == '_' || alnum (c); get (c)) ;
+ for (geth (c); (c = peek ()) == '_' || alnum (c); geth (c)) ;
}
void lexer::
@@ -657,12 +700,17 @@ namespace build2
{
// enter: first digit of the line number
// leave: last character of the line number or file string
+ // note: c is unhashed
// If our number and string tokens contained the literal values, then we
// could have used that. However, we ignore the value (along with escape
// processing, etc), for performance. Let's keep it that way and instead
// handle it ourselves.
//
+ // Note also that we are not hashing these at the character level
+ // instead hashing the switch to a new file path below and leaving the
+ // line number to the token line hashing.
+ //
{
string& s (t.value);
@@ -726,6 +774,65 @@ namespace build2
}
log_file_ = path (move (s)); // Move back in.
+
+ // If the path is relative, then prefix it with the current working
+ // directory. Failed that, we will end up with different checksums for
+ // invocations from different directories.
+ //
+ // While this should work fine for normal cross-compilation, it's an
+ // entirely different story for the emulated case (e.g., msvc-linux
+ // where the preprocessed output contains absolute Windows paths). So
+ // we try to sense if things look fishy and leave the path alone.
+ //
+ // Also detect special names like <built-in> and <command-line>. Plus
+ // GCC sometimes adds what looks like working directory (has trailing
+ // slash). So ignore that as well.
+ //
+ if (!log_file_.to_directory ())
+ {
+ using tr = path::traits;
+ const string& f (log_file_.string ());
+
+ if (f.find (':') != string::npos ||
+ (f.front () == '<' && f.back () == '>') ||
+ log_file_.absolute ())
+ cs_.append (f);
+ else
+ {
+ // This gets complicated and slow: the path may contain '..' and
+ // '.' so strictly speaking we would need to normalize it.
+ // Instead, we are going to handle leading '..'s ourselves (the
+ // sane case) and ignore everything else (so if you have '..' or
+ // '.' somewhere in the middle, then things might not work
+ // optimally for you).
+ //
+ const string& d (work.string ());
+
+ // Iterate over leading '..' in f "popping" the corresponding
+ // number of trailing components from d.
+ //
+ size_t fp (0);
+ size_t dp (d.size () - 1);
+
+ for (size_t p;; )
+ {
+ // Note that in file we recognize any directory separator, not
+ // just of this platform (see note about emulation above).
+ //
+ if (f.compare (fp, 2, "..") != 0 ||
+ (f[fp + 2] != '/' && f[fp + 2] != '\\') || // Could be '\0'.
+ (p = tr::rfind_separator (d, dp)) == string::npos)
+ break;
+
+ fp += 3;
+ dp = p - 1;
+ }
+
+ cs_.append (d.c_str (), dp + 1);
+ cs_.append (tr::directory_separator); // Canonical in work.
+ cs_.append (f.c_str () + fp);
+ }
+ }
}
else
unget (c);
diff --git a/build2/cc/lexer.hxx b/build2/cc/lexer.hxx
index 65e9012..1869344 100644
--- a/build2/cc/lexer.hxx
+++ b/build2/cc/lexer.hxx
@@ -5,6 +5,7 @@
#ifndef BUILD2_CC_LEXER_HXX
#define BUILD2_CC_LEXER_HXX
+#include <libbutl/sha256.hxx>
#include <libbutl/char-scanner.hxx>
#include <build2/types.hxx>
@@ -27,6 +28,10 @@ namespace build2
// saved from literals. The #line directive (and its shorthand notation)
// is recognized to provide the logical token location.
//
+ // While at it we also calculate the checksum of the input ignoring
+ // comments, whitespaces, etc. This is used to detect changes that do not
+ // alter the resulting token stream.
+ //
enum class token_type
{
// NOTE: remember to update operator<<() if changing anything here!
@@ -82,6 +87,9 @@ namespace build2
const path&
name () const {return name_;}
+ string
+ checksum () const {return cs_.string ();}
+
// Note that it is ok to call next() again after getting eos.
//
token
@@ -137,13 +145,21 @@ namespace build2
using base = char_scanner;
xchar
+ peek (bool escape = true);
+
+ xchar
get (bool escape = true);
void
get (const xchar& peeked);
+ // Hashing versions.
+ //
xchar
- peek (bool escape = true);
+ geth (bool escape = true);
+
+ void
+ geth (const xchar& peeked);
private:
const path name_;
@@ -154,6 +170,8 @@ namespace build2
//
path log_file_;
optional<uint64_t> log_line_;
+
+ sha256 cs_;
};
// Diagnostics plumbing.
diff --git a/build2/cc/parser.cxx b/build2/cc/parser.cxx
index df2e257..a97a98b 100644
--- a/build2/cc/parser.cxx
+++ b/build2/cc/parser.cxx
@@ -129,6 +129,7 @@ namespace build2
if (bb != 0)
/*warn*/ fail (t) << "{}-imbalance detected";
+ checksum = l.checksum ();
return u;
}
diff --git a/build2/cc/parser.hxx b/build2/cc/parser.hxx
index d542d57..9142001 100644
--- a/build2/cc/parser.hxx
+++ b/build2/cc/parser.hxx
@@ -38,6 +38,7 @@ namespace build2
parse_module_name (token&);
public:
+ string checksum; // Translation unit checksum.
uint64_t export_pos; // Temporary hack, see parse_unit().
private:
diff --git a/build2/diagnostics.hxx b/build2/diagnostics.hxx
index 1e76099..1b8f370 100644
--- a/build2/diagnostics.hxx
+++ b/build2/diagnostics.hxx
@@ -60,6 +60,7 @@ namespace build2
// While uint8 is more than enough, use uint16 for the ease of printing.
//
extern uint16_t verb;
+ const uint16_t verb_never = 7;
template <typename F> inline void l1 (const F& f) {if (verb >= 1) f ();}
template <typename F> inline void l2 (const F& f) {if (verb >= 2) f ();}
diff --git a/build2/filesystem.cxx b/build2/filesystem.cxx
index 9d9b3b6..eefaf99 100644
--- a/build2/filesystem.cxx
+++ b/build2/filesystem.cxx
@@ -11,6 +11,22 @@ using namespace butl;
namespace build2
{
+ bool
+ touch (const path& p, bool create, uint16_t v)
+ {
+ if (verb >= v)
+ text << "touch " << p;
+
+ try
+ {
+ return touch_file (p, create);
+ }
+ catch (const system_error& e)
+ {
+ fail << "unable to touch file " << p << ": " << e << endf;
+ }
+ }
+
fs_status<mkdir_status>
mkdir (const dir_path& d, uint16_t v)
{
diff --git a/build2/filesystem.hxx b/build2/filesystem.hxx
index 79633af..4ef4caf 100644
--- a/build2/filesystem.hxx
+++ b/build2/filesystem.hxx
@@ -30,6 +30,14 @@ namespace build2
explicit operator bool () const {return v == T::success;}
};
+ // Set the file access and modification times to the current time printing
+ // the standard diagnostics starting from the specified verbosity level. If
+ // the file does not exist and create is true, create it and fail otherwise.
+ // Return true if the file was created and false otherwise.
+ //
+ bool
+ touch (const path&, bool create, uint16_t verbosity = 1);
+
// Create the directory and print the standard diagnostics starting from
// the specified verbosity level.
//
diff --git a/build2/test/script/builtin.cxx b/build2/test/script/builtin.cxx
index 2a8150d..ca47ad2 100644
--- a/build2/test/script/builtin.cxx
+++ b/build2/test/script/builtin.cxx
@@ -4,12 +4,6 @@
#include <build2/test/script/builtin.hxx>
-#ifndef _WIN32
-# include <utime.h>
-#else
-# include <sys/utime.h>
-#endif
-
#include <locale>
#include <ostream>
#include <sstream>
@@ -1505,38 +1499,11 @@ namespace build2
try
{
- if (file_exists (p))
- {
- // Set the file access and modification times to the current
- // time. Note that we don't register (implicit) cleanup for an
- // existing path.
- //
-#ifndef _WIN32
- if (utime (p.string ().c_str (), nullptr) == -1)
-#else
- if (_utime (p.string ().c_str (), nullptr) == -1)
-#endif
- throw_generic_error (errno);
- }
- else if (!entry_exists (p))
- {
- // Create the file. Assume the file access and modification
- // times are set to the current time automatically.
- //
- try
- {
- fdopen (p, fdopen_mode::out | fdopen_mode::create);
- }
- catch (const io_error& e)
- {
- error () << "cannot create file '" << p << "': " << e;
- }
-
- if (cleanup)
- sp.clean ({cleanup_type::always, p}, true);
- }
- else
- error () << "'" << p << "' exists and is not a file";
+ // Note that we don't register (implicit) cleanup for an
+ // existing path.
+ //
+ if (touch_file (p) && cleanup)
+ sp.clean ({cleanup_type::always, p}, true);
}
catch (const system_error& e)
{