From 19f8e6bd7b301da3ce6e7483f4d06dc2eca3e402 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 19 May 2020 14:27:07 +0200 Subject: Additional ad hoc C++ recipe change detection --- libbuild2/rule.cxx | 483 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 281 insertions(+), 202 deletions(-) diff --git a/libbuild2/rule.cxx b/libbuild2/rule.cxx index 8b5017d..d5d6661 100644 --- a/libbuild2/rule.cxx +++ b/libbuild2/rule.cxx @@ -651,28 +651,10 @@ namespace build2 // // Since the recipe can be shared among multiple targets, several threads // can all be trying to do this in parallel. - - // The only way to guarantee that the name of our module matches its - // implementation is to based the name on the implementation hash. - // - // Unfortunately, this means we will be creating a new project (and - // leaving behind the old one as garbage) for every change to the - // recipe. On the other hand, if the recipe is moved around unchanged, we - // will reuse the same project. In fact, two different recipes (e.g., in - // different buildfiles) with the same text will share the project. // - // @@ Shouldn't we also include buildfile path and line seeing that we - // add them as #line? Or can we do something clever for this case - // (i.e., if update is successful, then this information is no longer - // necessary, unless update is caused by something external, like - // change of compiler). Also location in comment. Why not just - // overwrite the source file every time we compile it, to KISS? - // - string id (sha256 (code).abbreviated_string (12)); - - // We use the relaxed memory order because any change must go through the - // serial load phase. In other words, all we need here is atomicity with - // ordering/visibility provided by the phase mutex. + // We use the relaxed memory order here because any change must go through + // the serial load phase. In other words, all we need here is atomicity + // with ordering/visibility provided by the phase mutex. // cxx_rule* impl (this->impl.load (memory_order_relaxed)); @@ -688,241 +670,298 @@ namespace build2 using create_function = cxx_rule* (const location&); using load_function = create_function* (); + // The only way to guarantee that the name of our module matches its + // implementation is to based the name on the implementation hash (plus + // the language, in case we support other compiled implementations in + // the future). + // + // Unfortunately, this means we will be creating a new project (and + // leaving behind the old one as garbage) for every change to the + // recipe. On the other hand, if the recipe is moved around unchanged, + // we will reuse the same project. In fact, two different recipes (e.g., + // in different buildfiles) with the same text will share the project. + // + // The fact that we don't incorporate the recipe location into the hash + // but include it in the source (in the form of the #line directive; see + // below) has its own problems. If we do nothing extra here, then if a + // "moved" but otherwise unchanged recipe is updated (for example, + // because of changes in the build system core), then we may end up with + // bogus location in the diagnostics. + // + // The straightforward solution would be to just update the location in + // the source code if it has changed. This, however, will lead to + // unnecessary and probably surprising recompilations since any line + // count change before the recipe will trigger this update. One key + // observation here is that we need accurate location information only + // if we are going to recompile the recipe but the change to location + // itself does not render the recipe out of date. So what we going to do + // is factor the location information into its own small header and then + // keep it up-to-date without changing its modification time. + // + // This works well if the project is not shared by multiple recipes. + // However, if we have recipes in several buildfiles with identical + // text, then the location information may end up yo-yo'ing depending on + // which recipe got here first. + // + // There doesn't seem to be much we can do about it without incurring + // other drawbacks/overheads. So the answer is for the user to use an ad + // hoc rule with the common implementation instead of a bunch of + // duplicate recipes. + // + string id; + { + sha256 cs; + cs.append ("c++"); + cs.append (code); + id = cs.abbreviated_string (12); + } + dir_path pd (rs.out_path () / rs.root_extra->build_dir / recipes_build_dir /= id); + path bf (pd / std_buildfile_file); + string sym ("load_" + id); - // Update the project is necessary. + // Check whether the file exists and its last line matches the specified + // signature. // - path of; - try + // Note: we use the last instead of the first line for extra protection + // against incomplete writes. + // + auto check_sig = [] (const path& f, const string& s) -> bool { - const uint16_t verbosity (3); + try + { + if (!file_exists (f)) + return false; - ofdstream ofs; + ifdstream ifs (f); - optional altn (false); // Standard naming scheme. - if (!is_src_root (pd, altn)) + string l; + while (ifs.peek () != ifdstream::traits_type::eof ()) + getline (ifs, l); + + return l == s; + } + catch (const io_error& e) + { + fail << "unable to read " << f << ": " << e << endf; + } + catch (const system_error& e) { + fail << "unable to access " << f << ": " << e << endf; + } + }; - // Write ad hoc config.build that loads the ~build2 configuration. - // This way the configuration will be always in sync with ~build2 - // and we can update the recipe manually (e.g., for debugging). - // - create_project ( - pd, - dir_path (), /* amalgamation */ - {}, /* boot_modules */ - "cxx.std = latest", /* root_pre */ - {"cxx."}, /* root_modules */ - "", /* root_post */ - string ("config"), /* config_module */ - string ("config.config.load = ~build2"), /* config_file */ - false, /* buildfile */ - "build2 core", /* who */ - verbosity); /* verbosity */ - - - // Write the rule source file. - // - of = path (pd / "rule.cxx"); + bool nested (ctx.module_context == &ctx); - if (verb >= verbosity) - text << (verb >= 2 ? "cat >" : "save ") << of; + // Create the build context if necessary. + // + if (ctx.module_context == nullptr) + { + if (!ctx.module_context_storage) + fail (loc) << "unable to update ad hoc recipe for target " << t << + info << "building of ad hoc recipes is disabled"; - ofs.open (of); + create_module_context (ctx, loc); + } - ofs << "#include \"location.hxx\"" << '\n' - << '\n'; + // "Switch" to the module context. + // + context& ctx (*t.ctx.module_context); - // Include every header that can plausibly be needed by a rule. - // - ofs << "#include " << '\n' - << "#include " << '\n' - << "#include " << '\n' - << '\n' - << "#include " << '\n' - << "#include " << '\n' - << "#include " << '\n' - << "#include " << '\n' - << "#include " << '\n' - << "#include " << '\n' - << "#include " << '\n' - << "#include " << '\n' - << "#include " << '\n' - << "#include " << '\n' - << '\n'; - - // Normally the recipe code will have one level of indentation so - // let's not indent the namespace level to match. - // - ofs << "namespace build2" << '\n' - << "{" << '\n' - << '\n'; - - // If we want the user to be able to supply a custom constuctor, - // then we have to give the class a predictable name (i.e., we - // cannot use id as part of its name) and put it into an unnamed - // namespace. One clever idea is to call the class `constructor` but - // the name could also be used for a custom destructor (still could - // work) or for name qualification (would definitely look bizarre). - // - // In this light the most natural name is probable `rule`. The issue - // is we already have this name in the build2 namespace (and its our - // indirect base). In fact, any name that we choose could in the - // future conflict with something in that namespace so maybe it - // makes sense to bite the bullet and pick a name that is least - // likely to be used by the user directly (can always use cxx_rule - // instead). - // - ofs << "namespace" << '\n' - << "{" << '\n' - << "class rule: public cxx_rule" << '\n' - << "{" << '\n' - << "public:" << '\n' - << '\n'; - - // Inherit base constructor. This way the user may provide their own - // but don't have to. - // - ofs << " using cxx_rule::cxx_rule;" << '\n' - << '\n'; + const uint16_t verbosity (3); // Project creation command verbosity. - // An extern "C" function cannot throw which can happen in case of a - // user-defined constructor. So we need an extra level of - // indirection. We incorporate id to make sure it doesn't conflict - // with anything user-defined. - // - ofs << " static cxx_rule*" << '\n' - << " create_" << id << " (const location& l)" << '\n' - << " {" << '\n' - << " return new rule (l);" << '\n' - << " }" << '\n' - << '\n'; - - // Use the #line directive to point diagnostics to the code in the - // buildfile. Note that there is no easy way to restore things to - // point back to the source file (other than another #line with a - // line and a file). Seeing that we don't have much after, let's not - // bother for now. - // - ofs << "#line RECIPE_LINE RECIPE_FILE" << '\n'; + // Project and location signatures. + // + // Specifically, we update the project version when changing anything + // which would make the already existing projects unusable. + // + const string& lf (!loc.file.path.empty () + ? loc.file.path.string () + : loc.file.name ? *loc.file.name : string ()); - // Note that the code always includes trailing newline. - // - ofs << code - << "};" << '\n' - << '\n'; + const string psig ("# c++ 1"); + const string lsig ("// " + lf + ':' + to_string (loc.line)); - // Add an alias that we can use unambiguously in the load function. - // - ofs << "using rule_" << id << " = rule;" << '\n' - << "}" << '\n' - << '\n'; + // Check whether we need to (re)create the project. + // + optional altn (false); // Standard naming scheme. + bool create (!is_src_root (pd, altn)); - // Entry point. - // - ofs << "extern \"C\"" << '\n' - << "#ifdef _WIN32" << '\n' - << "__declspec(dllexport)" << '\n' - << "#endif" << '\n' - << "cxx_rule* (*" << sym << " ()) (const location&)" << '\n' - << "{" << '\n' - << " return &rule_" << id << "::create_" << id << ";" << '\n' - << "}" << '\n' - << '\n'; - - ofs << "}" << '\n'; + if (!create && (create = !check_sig (bf, psig))) + rmdir_r (ctx, pd, false, verbosity); // Never dry-run. - ofs.close (); + path of; + ofdstream ofs; + if (create) + try + { + // Write ad hoc config.build that loads the ~build2 configuration. + // This way the configuration will be always in sync with ~build2 + // and we can update the recipe manually (e.g., for debugging). + // + create_project ( + pd, + dir_path (), /* amalgamation */ + {}, /* boot_modules */ + "cxx.std = latest", /* root_pre */ + {"cxx."}, /* root_modules */ + "", /* root_post */ + string ("config"), /* config_module */ + string ("config.config.load = ~build2"), /* config_file */ + false, /* buildfile */ + "build2 core", /* who */ + verbosity); /* verbosity */ + + + // Write the rule source file. + // + of = path (pd / "rule.cxx"); - // Write buildfile. - // - of = path (pd / std_buildfile_file); + if (verb >= verbosity) + text << (verb >= 2 ? "cat >" : "save ") << of; - if (verb >= verbosity) - text << (verb >= 2 ? "cat >" : "save ") << of; + ofs.open (of); - ofs.open (of); + ofs << "#include \"location.hxx\"" << '\n' + << '\n'; - ofs << "import imp_libs += build2%lib{build2}" << '\n' - << "libs{" << id << "}: cxx{rule} hxx{location} $imp_libs"<< '\n'; + // Include every header that can plausibly be needed by a rule. + // + ofs << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << '\n'; + + // Normally the recipe code will have one level of indentation so + // let's not indent the namespace level to match. + // + ofs << "namespace build2" << '\n' + << "{" << '\n' + << '\n'; + + // If we want the user to be able to supply a custom constuctor, then + // we have to give the class a predictable name (i.e., we cannot use + // id as part of its name) and put it into an unnamed namespace. One + // clever idea is to call the class `constructor` but the name could + // also be used for a custom destructor (still could work) or for name + // qualification (would definitely look bizarre). + // + // In this light the most natural name is probable `rule`. The issue + // is we already have this name in the build2 namespace (and its our + // indirect base). In fact, any name that we choose could in the + // future conflict with something in that namespace so maybe it makes + // sense to bite the bullet and pick a name that is least likely to be + // used by the user directly (can always use cxx_rule instead). + // + ofs << "namespace" << '\n' + << "{" << '\n' + << "class rule: public cxx_rule" << '\n' + << "{" << '\n' + << "public:" << '\n' + << '\n'; + + // Inherit base constructor. This way the user may provide their own + // but don't have to. + // + ofs << " using cxx_rule::cxx_rule;" << '\n' + << '\n'; - ofs.close (); - } + // An extern "C" function cannot throw which can happen in case of a + // user-defined constructor. So we need an extra level of indirection. + // We incorporate id to make sure it doesn't conflict with anything + // user-defined. + // + ofs << " static cxx_rule*" << '\n' + << " create_" << id << " (const location& l)" << '\n' + << " {" << '\n' + << " return new rule (l);" << '\n' + << " }" << '\n' + << '\n'; + + // Use the #line directive to point diagnostics to the code in the + // buildfile. Note that there is no easy way to restore things to + // point back to the source file (other than another #line with a line + // and a file). Seeing that we don't have much after, let's not bother + // for now. + // + ofs << "#line RECIPE_LINE RECIPE_FILE" << '\n'; - // Create/update the recipe location header. + // Note that the code always includes trailing newline. // - // For update, preserve the file timestamp in order not to render the - // recipe out of date. + ofs << code + << "};" << '\n' + << '\n'; + + // Add an alias that we can use unambiguously in the load function. // - of = path (pd / "location.hxx"); + ofs << "using rule_" << id << " = rule;" << '\n' + << "}" << '\n' + << '\n'; + + // Entry point. + // + ofs << "extern \"C\"" << '\n' + << "#ifdef _WIN32" << '\n' + << "__declspec(dllexport)" << '\n' + << "#endif" << '\n' + << "cxx_rule* (*" << sym << " ()) (const location&)" << '\n' + << "{" << '\n' + << " return &rule_" << id << "::create_" << id << ";" << '\n' + << "}" << '\n' + << '\n'; + + ofs << "}" << '\n'; + + ofs.close (); - entry_time et (file_time (of)); + + // Write buildfile. + // + of = bf; if (verb >= verbosity) text << (verb >= 2 ? "cat >" : "save ") << of; ofs.open (of); - // Recipe file and line for the #line directive above. Note that the - // code starts from the next line thus +1. We also need to escape - // backslashes (Windows paths). - // - { - const string& f (!loc.file.path.empty () - ? loc.file.path.string () - : loc.file.name ? *loc.file.name : string ()); - - ofs << "#define RECIPE_FILE \"" << sanitize_strlit (f) << '"' << '\n' - << "#define RECIPE_LINE " << loc.line + 1 << '\n'; - } + ofs << "import imp_libs += build2%lib{build2}" << '\n' + << "libs{" << id << "}: cxx{rule} hxx{location} $imp_libs" << '\n' + << '\n' + << psig << '\n'; ofs.close (); - - if (et.modification != timestamp_nonexistent) - file_time (of, et); } catch (const io_error& e) { fail << "unable to write to " << of << ": " << e; } - catch (const system_error& e) - { - fail << "unable to get/set timestamp for " << of << ": " << e; - } // Update the library target in the module context. // const target* l (nullptr); do // Breakout loop. { - bool nested (ctx.module_context == &ctx); - - // Create the build context if necessary. - // - if (ctx.module_context == nullptr) - { - if (!ctx.module_context_storage) - fail (loc) << "unable to update ad hoc recipe for target " << t << - info << "building of ad hoc recipes is disabled"; - - create_module_context (ctx, loc); - } - - // "Switch" to the module context. - // - context& ctx (*t.ctx.module_context); - // Load the project in the module context. // // Note that it's possible it has already been loaded (see above about // the id calculation). // - path bf (pd / std_buildfile_file); scope& rs (load_project (ctx, pd, pd, false /* forwarded */)); auto find_target = [&ctx, &rs, &pd, &id] () @@ -939,7 +978,8 @@ namespace build2 // If the project has already been loaded then, as an optimization, // check if the target has already been updated (this will make a - // difference we if we have identical recipes in several buildfiles). + // difference we if we have identical recipes in several buildfiles, + // especially to the location update that comes next). // if (!source_once (rs, rs, bf)) { @@ -949,6 +989,45 @@ namespace build2 break; } + // Create/update the recipe location header. + // + // For update, preserve the file timestamp in order not to render the + // recipe out of date. + // + of = path (pd / "location.hxx"); + if (!check_sig (of, lsig)) + try + { + entry_time et (file_time (of)); + + if (verb >= verbosity) + text << (verb >= 2 ? "cat >" : "save ") << of; + + ofs.open (of); + + // Recipe file and line for the #line directive above. Note that the + // code starts from the next line thus +1. We also need to escape + // backslashes (Windows paths). + // + ofs << "#define RECIPE_FILE \"" << sanitize_strlit (lf) << '"'<< '\n' + << "#define RECIPE_LINE " << loc.line + 1 << '\n' + << '\n' + << lsig << '\n'; + + ofs.close (); + + if (et.modification != timestamp_nonexistent) + file_time (of, et); + } + catch (const io_error& e) + { + fail << "unable to write to " << of << ": " << e; + } + catch (const system_error& e) + { + fail << "unable to get/set timestamp for " << of << ": " << e; + } + if (nested) { // This means there is a perform update action already in progress -- cgit v1.1