From 19f8e6bd7b301da3ce6e7483f4d06dc2eca3e402 Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Tue, 19 May 2020 14:27:07 +0200
Subject: Additional ad hoc C++ recipe change detection

---
 libbuild2/rule.cxx | 483 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 281 insertions(+), 202 deletions(-)

diff --git a/libbuild2/rule.cxx b/libbuild2/rule.cxx
index 8b5017d..d5d6661 100644
--- a/libbuild2/rule.cxx
+++ b/libbuild2/rule.cxx
@@ -651,28 +651,10 @@ namespace build2
     //
     // Since the recipe can be shared among multiple targets, several threads
     // can all be trying to do this in parallel.
-
-    // The only way to guarantee that the name of our module matches its
-    // implementation is to based the name on the implementation hash.
-    //
-    // Unfortunately, this means we will be creating a new project (and
-    // leaving behind the old one as garbage) for every change to the
-    // recipe. On the other hand, if the recipe is moved around unchanged, we
-    // will reuse the same project. In fact, two different recipes (e.g., in
-    // different buildfiles) with the same text will share the project.
     //
-    // @@ Shouldn't we also include buildfile path and line seeing that we
-    //    add them as #line? Or can we do something clever for this case
-    //    (i.e., if update is successful, then this information is no longer
-    //    necessary, unless update is caused by something external, like
-    //    change of compiler). Also location in comment. Why not just
-    //    overwrite the source file every time we compile it, to KISS?
-    //
-    string id (sha256 (code).abbreviated_string (12));
-
-    // We use the relaxed memory order because any change must go through the
-    // serial load phase. In other words, all we need here is atomicity with
-    // ordering/visibility provided by the phase mutex.
+    // We use the relaxed memory order here because any change must go through
+    // the serial load phase. In other words, all we need here is atomicity
+    // with ordering/visibility provided by the phase mutex.
     //
     cxx_rule* impl (this->impl.load (memory_order_relaxed));
 
@@ -688,241 +670,298 @@ namespace build2
       using create_function = cxx_rule* (const location&);
       using load_function = create_function* ();
 
+      // The only way to guarantee that the name of our module matches its
+      // implementation is to based the name on the implementation hash (plus
+      // the language, in case we support other compiled implementations in
+      // the future).
+      //
+      // Unfortunately, this means we will be creating a new project (and
+      // leaving behind the old one as garbage) for every change to the
+      // recipe. On the other hand, if the recipe is moved around unchanged,
+      // we will reuse the same project. In fact, two different recipes (e.g.,
+      // in different buildfiles) with the same text will share the project.
+      //
+      // The fact that we don't incorporate the recipe location into the hash
+      // but include it in the source (in the form of the #line directive; see
+      // below) has its own problems. If we do nothing extra here, then if a
+      // "moved" but otherwise unchanged recipe is updated (for example,
+      // because of changes in the build system core), then we may end up with
+      // bogus location in the diagnostics.
+      //
+      // The straightforward solution would be to just update the location in
+      // the source code if it has changed. This, however, will lead to
+      // unnecessary and probably surprising recompilations since any line
+      // count change before the recipe will trigger this update. One key
+      // observation here is that we need accurate location information only
+      // if we are going to recompile the recipe but the change to location
+      // itself does not render the recipe out of date. So what we going to do
+      // is factor the location information into its own small header and then
+      // keep it up-to-date without changing its modification time.
+      //
+      // This works well if the project is not shared by multiple recipes.
+      // However, if we have recipes in several buildfiles with identical
+      // text, then the location information may end up yo-yo'ing depending on
+      // which recipe got here first.
+      //
+      // There doesn't seem to be much we can do about it without incurring
+      // other drawbacks/overheads. So the answer is for the user to use an ad
+      // hoc rule with the common implementation instead of a bunch of
+      // duplicate recipes.
+      //
+      string id;
+      {
+        sha256 cs;
+        cs.append ("c++");
+        cs.append (code);
+        id = cs.abbreviated_string (12);
+      }
+
       dir_path pd (rs.out_path () /
                    rs.root_extra->build_dir /
                    recipes_build_dir /= id);
 
+      path bf (pd / std_buildfile_file);
+
       string sym ("load_" + id);
 
-      // Update the project is necessary.
+      // Check whether the file exists and its last line matches the specified
+      // signature.
       //
-      path of;
-      try
+      // Note: we use the last instead of the first line for extra protection
+      // against incomplete writes.
+      //
+      auto check_sig = [] (const path& f, const string& s) -> bool
       {
-        const uint16_t verbosity (3);
+        try
+        {
+          if (!file_exists (f))
+            return false;
 
-        ofdstream ofs;
+          ifdstream ifs (f);
 
-        optional<bool> altn (false); // Standard naming scheme.
-        if (!is_src_root (pd, altn))
+          string l;
+          while (ifs.peek () != ifdstream::traits_type::eof ())
+            getline (ifs, l);
+
+          return l == s;
+        }
+        catch (const io_error& e)
+        {
+          fail << "unable to read " << f << ": " << e << endf;
+        }
+        catch (const system_error& e)
         {
+          fail << "unable to access " << f << ": " << e << endf;
+        }
+      };
 
-          // Write ad hoc config.build that loads the ~build2 configuration.
-          // This way the configuration will be always in sync with ~build2
-          // and we can update the recipe manually (e.g., for debugging).
-          //
-          create_project (
-            pd,
-            dir_path (),                             /* amalgamation */
-            {},                                      /* boot_modules */
-            "cxx.std = latest",                      /* root_pre */
-            {"cxx."},                                /* root_modules */
-            "",                                      /* root_post */
-            string ("config"),                       /* config_module */
-            string ("config.config.load = ~build2"), /* config_file */
-            false,                                   /* buildfile */
-            "build2 core",                           /* who */
-            verbosity);                              /* verbosity */
-
-
-          // Write the rule source file.
-          //
-          of = path (pd / "rule.cxx");
+      bool nested (ctx.module_context == &ctx);
 
-          if (verb >= verbosity)
-            text << (verb >= 2 ? "cat >" : "save ") << of;
+      // Create the build context if necessary.
+      //
+      if (ctx.module_context == nullptr)
+      {
+        if (!ctx.module_context_storage)
+          fail (loc) << "unable to update ad hoc recipe for target " << t <<
+            info << "building of ad hoc recipes is disabled";
 
-          ofs.open (of);
+        create_module_context (ctx, loc);
+      }
 
-          ofs << "#include \"location.hxx\""                            << '\n'
-              << '\n';
+      // "Switch" to the module context.
+      //
+      context& ctx (*t.ctx.module_context);
 
-          // Include every header that can plausibly be needed by a rule.
-          //
-          ofs << "#include <libbuild2/types.hxx>"                       << '\n'
-              << "#include <libbuild2/forward.hxx>"                     << '\n'
-              << "#include <libbuild2/utility.hxx>"                     << '\n'
-              << '\n'
-              << "#include <libbuild2/file.hxx>"                        << '\n'
-              << "#include <libbuild2/rule.hxx>"                        << '\n'
-              << "#include <libbuild2/depdb.hxx>"                       << '\n'
-              << "#include <libbuild2/scope.hxx>"                       << '\n'
-              << "#include <libbuild2/target.hxx>"                      << '\n'
-              << "#include <libbuild2/context.hxx>"                     << '\n'
-              << "#include <libbuild2/variable.hxx>"                    << '\n'
-              << "#include <libbuild2/algorithm.hxx>"                   << '\n'
-              << "#include <libbuild2/filesystem.hxx>"                  << '\n'
-              << "#include <libbuild2/diagnostics.hxx>"                 << '\n'
-              << '\n';
-
-          // Normally the recipe code will have one level of indentation so
-          // let's not indent the namespace level to match.
-          //
-          ofs << "namespace build2"                                     << '\n'
-              << "{"                                                    << '\n'
-              << '\n';
-
-          // If we want the user to be able to supply a custom constuctor,
-          // then we have to give the class a predictable name (i.e., we
-          // cannot use id as part of its name) and put it into an unnamed
-          // namespace. One clever idea is to call the class `constructor` but
-          // the name could also be used for a custom destructor (still could
-          // work) or for name qualification (would definitely look bizarre).
-          //
-          // In this light the most natural name is probable `rule`. The issue
-          // is we already have this name in the build2 namespace (and its our
-          // indirect base). In fact, any name that we choose could in the
-          // future conflict with something in that namespace so maybe it
-          // makes sense to bite the bullet and pick a name that is least
-          // likely to be used by the user directly (can always use cxx_rule
-          // instead).
-          //
-          ofs << "namespace"                                            << '\n'
-              << "{"                                                    << '\n'
-              << "class rule: public cxx_rule"                          << '\n'
-              << "{"                                                    << '\n'
-              << "public:"                                              << '\n'
-              << '\n';
-
-          // Inherit base constructor. This way the user may provide their own
-          // but don't have to.
-          //
-          ofs << "  using cxx_rule::cxx_rule;"                          << '\n'
-              << '\n';
+      const uint16_t verbosity (3); // Project creation command verbosity.
 
-          // An extern "C" function cannot throw which can happen in case of a
-          // user-defined constructor. So we need an extra level of
-          // indirection. We incorporate id to make sure it doesn't conflict
-          // with anything user-defined.
-          //
-          ofs << "  static cxx_rule*"                                   << '\n'
-              << "  create_" << id << " (const location& l)"            << '\n'
-              << "  {"                                                  << '\n'
-              << "    return new rule (l);"                             << '\n'
-              << "  }"                                                  << '\n'
-              << '\n';
-
-          // Use the #line directive to point diagnostics to the code in the
-          // buildfile. Note that there is no easy way to restore things to
-          // point back to the source file (other than another #line with a
-          // line and a file). Seeing that we don't have much after, let's not
-          // bother for now.
-          //
-          ofs << "#line RECIPE_LINE RECIPE_FILE"                        << '\n';
+      // Project and location signatures.
+      //
+      // Specifically, we update the project version when changing anything
+      // which would make the already existing projects unusable.
+      //
+      const string& lf (!loc.file.path.empty ()
+                        ? loc.file.path.string ()
+                        : loc.file.name ? *loc.file.name : string ());
 
-          // Note that the code always includes trailing newline.
-          //
-          ofs << code
-              << "};"                                                   << '\n'
-              << '\n';
+      const string psig ("# c++ 1");
+      const string lsig ("// " + lf + ':' + to_string (loc.line));
 
-          // Add an alias that we can use unambiguously in the load function.
-          //
-          ofs << "using rule_" << id << " = rule;"                      << '\n'
-              << "}"                                                    << '\n'
-              << '\n';
+      // Check whether we need to (re)create the project.
+      //
+      optional<bool> altn (false); // Standard naming scheme.
+      bool create (!is_src_root (pd, altn));
 
-          // Entry point.
-          //
-          ofs << "extern \"C\""                                         << '\n'
-              << "#ifdef _WIN32"                                        << '\n'
-              << "__declspec(dllexport)"                                << '\n'
-              << "#endif"                                               << '\n'
-              << "cxx_rule* (*" << sym << " ()) (const location&)"      << '\n'
-              << "{"                                                    << '\n'
-              << "  return &rule_" << id << "::create_" << id << ";"    << '\n'
-              << "}"                                                    << '\n'
-              << '\n';
-
-          ofs << "}"                                                    << '\n';
+      if (!create && (create = !check_sig (bf, psig)))
+        rmdir_r (ctx, pd, false, verbosity); // Never dry-run.
 
-          ofs.close ();
+      path of;
+      ofdstream ofs;
 
+      if (create)
+      try
+      {
+        // Write ad hoc config.build that loads the ~build2 configuration.
+        // This way the configuration will be always in sync with ~build2
+        // and we can update the recipe manually (e.g., for debugging).
+        //
+        create_project (
+          pd,
+          dir_path (),                             /* amalgamation */
+          {},                                      /* boot_modules */
+          "cxx.std = latest",                      /* root_pre */
+          {"cxx."},                                /* root_modules */
+          "",                                      /* root_post */
+          string ("config"),                       /* config_module */
+          string ("config.config.load = ~build2"), /* config_file */
+          false,                                   /* buildfile */
+          "build2 core",                           /* who */
+          verbosity);                              /* verbosity */
+
+
+        // Write the rule source file.
+        //
+        of = path (pd / "rule.cxx");
 
-          // Write buildfile.
-          //
-          of = path (pd / std_buildfile_file);
+        if (verb >= verbosity)
+          text << (verb >= 2 ? "cat >" : "save ") << of;
 
-          if (verb >= verbosity)
-            text << (verb >= 2 ? "cat >" : "save ") << of;
+        ofs.open (of);
 
-          ofs.open (of);
+        ofs << "#include \"location.hxx\""                              << '\n'
+            << '\n';
 
-          ofs << "import imp_libs += build2%lib{build2}"                << '\n'
-              << "libs{" << id << "}: cxx{rule} hxx{location} $imp_libs"<< '\n';
+        // Include every header that can plausibly be needed by a rule.
+        //
+        ofs << "#include <libbuild2/types.hxx>"                         << '\n'
+            << "#include <libbuild2/forward.hxx>"                       << '\n'
+            << "#include <libbuild2/utility.hxx>"                       << '\n'
+            << '\n'
+            << "#include <libbuild2/file.hxx>"                          << '\n'
+            << "#include <libbuild2/rule.hxx>"                          << '\n'
+            << "#include <libbuild2/depdb.hxx>"                         << '\n'
+            << "#include <libbuild2/scope.hxx>"                         << '\n'
+            << "#include <libbuild2/target.hxx>"                        << '\n'
+            << "#include <libbuild2/context.hxx>"                       << '\n'
+            << "#include <libbuild2/variable.hxx>"                      << '\n'
+            << "#include <libbuild2/algorithm.hxx>"                     << '\n'
+            << "#include <libbuild2/filesystem.hxx>"                    << '\n'
+            << "#include <libbuild2/diagnostics.hxx>"                   << '\n'
+            << '\n';
+
+        // Normally the recipe code will have one level of indentation so
+        // let's not indent the namespace level to match.
+        //
+        ofs << "namespace build2"                                       << '\n'
+            << "{"                                                      << '\n'
+            << '\n';
+
+        // If we want the user to be able to supply a custom constuctor, then
+        // we have to give the class a predictable name (i.e., we cannot use
+        // id as part of its name) and put it into an unnamed namespace. One
+        // clever idea is to call the class `constructor` but the name could
+        // also be used for a custom destructor (still could work) or for name
+        // qualification (would definitely look bizarre).
+        //
+        // In this light the most natural name is probable `rule`. The issue
+        // is we already have this name in the build2 namespace (and its our
+        // indirect base). In fact, any name that we choose could in the
+        // future conflict with something in that namespace so maybe it makes
+        // sense to bite the bullet and pick a name that is least likely to be
+        // used by the user directly (can always use cxx_rule instead).
+        //
+        ofs << "namespace"                                              << '\n'
+            << "{"                                                      << '\n'
+            << "class rule: public cxx_rule"                            << '\n'
+            << "{"                                                      << '\n'
+            << "public:"                                                << '\n'
+            << '\n';
+
+        // Inherit base constructor. This way the user may provide their own
+        // but don't have to.
+        //
+        ofs << "  using cxx_rule::cxx_rule;"                            << '\n'
+            << '\n';
 
-          ofs.close ();
-        }
+        // An extern "C" function cannot throw which can happen in case of a
+        // user-defined constructor. So we need an extra level of indirection.
+        // We incorporate id to make sure it doesn't conflict with anything
+        // user-defined.
+        //
+        ofs << "  static cxx_rule*"                                     << '\n'
+            << "  create_" << id << " (const location& l)"              << '\n'
+            << "  {"                                                    << '\n'
+            << "    return new rule (l);"                               << '\n'
+            << "  }"                                                    << '\n'
+            << '\n';
+
+        // Use the #line directive to point diagnostics to the code in the
+        // buildfile. Note that there is no easy way to restore things to
+        // point back to the source file (other than another #line with a line
+        // and a file). Seeing that we don't have much after, let's not bother
+        // for now.
+        //
+        ofs << "#line RECIPE_LINE RECIPE_FILE"                          << '\n';
 
-        // Create/update the recipe location header.
+        // Note that the code always includes trailing newline.
         //
-        // For update, preserve the file timestamp in order not to render the
-        // recipe out of date.
+        ofs << code
+            << "};"                                                     << '\n'
+            << '\n';
+
+        // Add an alias that we can use unambiguously in the load function.
         //
-        of = path (pd / "location.hxx");
+        ofs << "using rule_" << id << " = rule;"                        << '\n'
+            << "}"                                                      << '\n'
+            << '\n';
+
+        // Entry point.
+        //
+        ofs << "extern \"C\""                                           << '\n'
+            << "#ifdef _WIN32"                                          << '\n'
+            << "__declspec(dllexport)"                                  << '\n'
+            << "#endif"                                                 << '\n'
+            << "cxx_rule* (*" << sym << " ()) (const location&)"        << '\n'
+            << "{"                                                      << '\n'
+            << "  return &rule_" << id << "::create_" << id << ";"      << '\n'
+            << "}"                                                      << '\n'
+            << '\n';
+
+        ofs << "}"                                                      << '\n';
+
+        ofs.close ();
 
-        entry_time et (file_time (of));
+
+        // Write buildfile.
+        //
+        of = bf;
 
         if (verb >= verbosity)
           text << (verb >= 2 ? "cat >" : "save ") << of;
 
         ofs.open (of);
 
-        // Recipe file and line for the #line directive above. Note that the
-        // code starts from the next line thus +1. We also need to escape
-        // backslashes (Windows paths).
-        //
-        {
-          const string& f (!loc.file.path.empty ()
-                           ? loc.file.path.string ()
-                           : loc.file.name ? *loc.file.name : string ());
-
-          ofs << "#define RECIPE_FILE \"" << sanitize_strlit (f) << '"' << '\n'
-              << "#define RECIPE_LINE "   << loc.line + 1               << '\n';
-        }
+        ofs << "import imp_libs += build2%lib{build2}"                  << '\n'
+            << "libs{" << id << "}: cxx{rule} hxx{location} $imp_libs"  << '\n'
+            << '\n'
+            << psig                                                     << '\n';
 
         ofs.close ();
-
-        if (et.modification != timestamp_nonexistent)
-          file_time (of, et);
       }
       catch (const io_error& e)
       {
         fail << "unable to write to " << of << ": " << e;
       }
-      catch (const system_error& e)
-      {
-        fail << "unable to get/set timestamp for " << of << ": " << e;
-      }
 
       // Update the library target in the module context.
       //
       const target* l (nullptr);
       do // Breakout loop.
       {
-        bool nested (ctx.module_context == &ctx);
-
-        // Create the build context if necessary.
-        //
-        if (ctx.module_context == nullptr)
-        {
-          if (!ctx.module_context_storage)
-            fail (loc) << "unable to update ad hoc recipe for target " << t <<
-              info << "building of ad hoc recipes is disabled";
-
-          create_module_context (ctx, loc);
-        }
-
-        // "Switch" to the module context.
-        //
-        context& ctx (*t.ctx.module_context);
-
         // Load the project in the module context.
         //
         // Note that it's possible it has already been loaded (see above about
         // the id calculation).
         //
-        path bf (pd / std_buildfile_file);
         scope& rs (load_project (ctx, pd, pd, false /* forwarded */));
 
         auto find_target = [&ctx, &rs, &pd, &id] ()
@@ -939,7 +978,8 @@ namespace build2
 
         // If the project has already been loaded then, as an optimization,
         // check if the target has already been updated (this will make a
-        // difference we if we have identical recipes in several buildfiles).
+        // difference we if we have identical recipes in several buildfiles,
+        // especially to the location update that comes next).
         //
         if (!source_once (rs, rs, bf))
         {
@@ -949,6 +989,45 @@ namespace build2
             break;
         }
 
+        // Create/update the recipe location header.
+        //
+        // For update, preserve the file timestamp in order not to render the
+        // recipe out of date.
+        //
+        of = path (pd / "location.hxx");
+        if (!check_sig (of, lsig))
+        try
+        {
+          entry_time et (file_time (of));
+
+          if (verb >= verbosity)
+            text << (verb >= 2 ? "cat >" : "save ") << of;
+
+          ofs.open (of);
+
+          // Recipe file and line for the #line directive above. Note that the
+          // code starts from the next line thus +1. We also need to escape
+          // backslashes (Windows paths).
+          //
+          ofs << "#define RECIPE_FILE \"" << sanitize_strlit (lf) << '"'<< '\n'
+              << "#define RECIPE_LINE "   << loc.line + 1               << '\n'
+              << '\n'
+              << lsig                                                   << '\n';
+
+          ofs.close ();
+
+          if (et.modification != timestamp_nonexistent)
+            file_time (of, et);
+        }
+        catch (const io_error& e)
+        {
+          fail << "unable to write to " << of << ": " << e;
+        }
+        catch (const system_error& e)
+        {
+          fail << "unable to get/set timestamp for " << of << ": " << e;
+        }
+
         if (nested)
         {
           // This means there is a perform update action already in progress
-- 
cgit v1.1