From e222cbabb84b5a6357242e47196f7e0eeb69a44f Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Wed, 20 May 2020 15:00:09 +0200 Subject: Implement build script variable hashing --- libbuild2/build/script/parser.cxx | 40 ++++++++++++++++++++-- libbuild2/build/script/script.hxx | 14 +++++++- libbuild2/name.hxx | 12 +++++++ libbuild2/parser.cxx | 2 ++ libbuild2/rule.cxx | 71 +++++++++++++++++++++++++++++++-------- libbuild2/rule.hxx | 4 ++- libbuild2/script/script.cxx | 2 -- 7 files changed, 124 insertions(+), 21 deletions(-) diff --git a/libbuild2/build/script/parser.cxx b/libbuild2/build/script/parser.cxx index 6649777..86019ba 100644 --- a/libbuild2/build/script/parser.cxx +++ b/libbuild2/build/script/parser.cxx @@ -16,6 +16,12 @@ namespace build2 { using type = token_type; + static inline bool + special_variable (const string& name) + { + return name == ">" || name == "<"; + } + // // Pre-parse. // @@ -96,9 +102,9 @@ namespace build2 case line_type::var: { // Check if we are trying to modify any of the special variables - // ($>). + // ($>, $<). // - if (t.value == ">") + if (special_variable (t.value)) fail (t) << "attempt to set '" << t.value << "' variable"; // We don't pre-enter variables. @@ -327,13 +333,41 @@ namespace build2 lookup parser:: lookup_variable (name&& qual, string&& name, const location& loc) { + // In the pre-parse mode collect the referenced variable names for the + // script semantics change tracking. + // if (pre_parse_) + { + // Add the variable name skipping special variables and suppressing + // duplicates. + // + if (!name.empty () && !special_variable (name)) + { + auto& vars (script_->vars); + + if (find (vars.begin (), vars.end (), name) == vars.end ()) + vars.push_back (move (name)); + } + return lookup (); + } if (!qual.empty ()) fail (loc) << "qualified variable name"; - return environment_->lookup (name); + lookup r (environment_->lookup (name)); + + // Fail if non-script-local variable with an untracked name. + // + if (r.defined () && !r.belongs (*environment_)) + { + const auto& vars (script_->vars); + + if (find (vars.begin (), vars.end (), name) == vars.end ()) + fail (loc) << "use of untracked variable '" << name << "'"; + } + + return r; } } } diff --git a/libbuild2/build/script/script.hxx b/libbuild2/build/script/script.hxx index 1540ed8..29d62aa 100644 --- a/libbuild2/build/script/script.hxx +++ b/libbuild2/build/script/script.hxx @@ -40,7 +40,19 @@ namespace build2 // Note that the variables are not pre-entered into a pool during the // parsing phase, so the line variable pointers are NULL. // - build2::script::lines lines; + build2::script::lines lines; + + // Referenced ordinary (non-special) variables. + // + // Used for the script semantics change tracking. The variable list is + // filled during the pre-parsing phase and is checked against during + // the execution phase. If during execution some non-script-local + // variable is not found in the list (may happen for a computed name), + // then the execution fails since the script semantics may not be + // properly tracked (the variable value change will not trigger the + // target rebuild). + // + small_vector vars; location start_loc; location end_loc; diff --git a/libbuild2/name.hxx b/libbuild2/name.hxx index d0e8d85..39d2396 100644 --- a/libbuild2/name.hxx +++ b/libbuild2/name.hxx @@ -113,6 +113,18 @@ namespace build2 LIBBUILD2_SYMEXPORT string to_string (const name&); + template + inline void + to_checksum (T& cs, const name& n) + { + if (n.proj) + cs.append (n.proj->string ()); + cs.append (n.dir.string ()); + cs.append (n.type); + cs.append (n.value); + cs.append (n.pair); + } + // Store a string in a name in a reversible way. If the string ends with a // trailing directory separator then it is stored as a directory, otherwise // as a simple name. diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx index ff32340..7a4efcd 100644 --- a/libbuild2/parser.cxx +++ b/libbuild2/parser.cxx @@ -1131,6 +1131,8 @@ namespace build2 auto* asr (new adhoc_script_rule (move (diag), loc, st.value.size ())); ar.reset (asr); + asr->checksum = sha256 (t.value).string (); + istringstream is (move (t.value)); build::script::parser p (ctx); asr->script = p.pre_parse (is, asr->loc.file, loc.line + 1); diff --git a/libbuild2/rule.cxx b/libbuild2/rule.cxx index 5e28805..b9441e0 100644 --- a/libbuild2/rule.cxx +++ b/libbuild2/rule.cxx @@ -490,26 +490,69 @@ namespace build2 if (dd.expect ("adhoc 1") != nullptr) l4 ([&]{trace << "rule mismatch forcing update of " << t;}); - // Then the tool checksums. + // Then the script checksum. // - // @@ TODO: obtain checksums of all the targets used as commands in - // the script. + // Ideally, to detect changes to the script semantics, we would hash the + // text with all the variables expanded but without executing any + // commands. In practice, this is easier said than done (think the set + // builtin that receives output of a command that modifies the + // filesystem). // - //if (dd.expect (csum) != nullptr) - // l4 ([&]{trace << "compiler mismatch forcing update of " << t;}); + // So as the next best thing we are going to hash the unexpanded text as + // well as values of all the variables expanded in it (which we get as a + // side effect of pre-parsing the script). This approach has a number of + // drawbacks: + // + // - We can't handle computed variable names (e.g., $($x ? X : Y)). + // + // - We may "overhash" by including variables that are actually + // script-local. + // + if (dd.expect (checksum) != nullptr) + l4 ([&]{trace << "recipe text change forcing update of " << t;}); - // Then the script checksum. + // For each variable hash its name, undefined/null/non-null indicator, + // and the value if non-null. // - // @@ TODO: for now we hash the unexpanded text but it should be - // expanded. This will take care of all the relevant input - // file name changes as well as any other variables the - // script may reference. + { + sha256 cs; + names storage; + + for (const string& n: script.vars) + { + cs.append (n); + + lookup l; + + if (const variable* var = ctx.var_pool.find (n)) + l = t[var]; + + cs.append (!l.defined () ? '\x1' : l->null ? '\x2' : '\x3'); + + if (l) + { + storage.clear (); + names_view ns (reverse (*l, storage)); + + for (const name& n: ns) + to_checksum (cs, n); + } + } + + //@@ TODO (later): hash special variables values (targets, + // prerequisites). + + if (dd.expect (cs.string ()) != nullptr) + l4 ([&]{trace << "recipe variable change forcing update of " << t;}); + } + + // Then the tools checksums. // - // It feels like we need a special execute mode that instead - // of executing hashes the commands. + // @@ TODO: obtain checksums of all the targets used as commands in + // the script. // - //if (dd.expect (sha256 (code).string ()) != nullptr) - // l4 ([&]{trace << "recipe change forcing update of " << t;}); + //if (dd.expect (csum) != nullptr) + // l4 ([&]{trace << "compiler mismatch forcing update of " << t;}); } // Update if depdb mismatch. diff --git a/libbuild2/rule.hxx b/libbuild2/rule.hxx index 50ed126..a674b05 100644 --- a/libbuild2/rule.hxx +++ b/libbuild2/rule.hxx @@ -188,8 +188,10 @@ namespace build2 : adhoc_rule (l, b), diag (move (d)) {} public: - const optional diag; // Command name for low-verbosity diag. + const optional diag; // Command name for low-verbosity diag. + string checksum; // Script text hashsum. script_type script; + }; // Ad hoc C++ rule. diff --git a/libbuild2/script/script.cxx b/libbuild2/script/script.cxx index 04727a5..2529671 100644 --- a/libbuild2/script/script.cxx +++ b/libbuild2/script/script.cxx @@ -5,8 +5,6 @@ #include -#include // find_if() - using namespace std; namespace build2 -- cgit v1.1