Get rid of action rule override semantics

Instead we now have two more or less separate match states for outer and inner parts of an action.
author: Boris Kolpackov <boris@codesynthesis.com> 2018-01-20 13:46:11 +0200
committer: Boris Kolpackov <boris@codesynthesis.com> 2018-02-03 14:35:45 +0200
commit: 934f2a9a90c5cad3cdc8a66b50c17827a3ddbcee (patch)
tree: f35f106e5369e98350327c79080c571195234c0b /build2/cc/compile-rule.cxx
parent: 280f4a5bf787587227ca193cd59c6bd74091db70 (diff)
1 files changed, 4631 insertions, 0 deletions
diff --git a/build2/cc/compile-rule.cxx b/build2/cc/compile-rule.cxx
new file mode 100644
index 0000000..df84547
--- /dev/null
+++ b/build2/cc/compile-rule.cxx
@@ -0,0 +1,4631 @@
+// file      : build2/cc/compile-rule.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <build2/cc/compile-rule.hxx>
+
+#include <cstdlib>  // exit()
+#include <cstring>  // strlen()
+
+#include <build2/file.hxx>
+#include <build2/depdb.hxx>
+#include <build2/scope.hxx>
+#include <build2/context.hxx>
+#include <build2/variable.hxx>
+#include <build2/algorithm.hxx>
+#include <build2/diagnostics.hxx>
+
+#include <build2/bin/target.hxx>
+#include <build2/config/utility.hxx> // create_project()
+
+#include <build2/cc/parser.hxx>
+#include <build2/cc/target.hxx>  // h
+#include <build2/cc/module.hxx>
+#include <build2/cc/utility.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+  namespace cc
+  {
+    using namespace bin;
+
+    // module_info string serialization.
+    //
+    // The string representation is a space-separated list of module names
+    // with the following rules:
+    //
+    // 1. If this is a module interface unit, then the first name is the
+    //    module name intself following by either '!' for an interface unit or
+    //    by '+' for an implementation unit.
+    //
+    // 2. If an imported module is re-exported, then the module name is
+    //    followed by '*'.
+    //
+    // For example:
+    //
+    // foo! foo.core* foo.base* foo.impl
+    // foo.base+ foo.impl
+    // foo.base foo.impl
+    //
+    static string
+    to_string (const module_info& m)
+    {
+      string s;
+
+      if (!m.name.empty ())
+      {
+        s += m.name;
+        s += m.iface ? '!' : '+';
+      }
+
+      for (const module_import& i: m.imports)
+      {
+        if (!s.empty ())
+          s += ' ';
+
+        s += i.name;
+
+        if (i.exported)
+          s += '*';
+      }
+
+      return s;
+    }
+
+    static module_info
+    to_module_info (const string& s)
+    {
+      module_info m;
+
+      for (size_t b (0), e (0), n; (n = next_word (s, b, e, ' ')) != 0; )
+      {
+        char c (s[e - 1]);
+        switch (c)
+        {
+        case '!':
+        case '+':
+        case '*': break;
+        default:  c = '\0';
+        }
+
+        string w (s, b, n - (c == '\0' ? 0 : 1));
+
+        if (c == '!' || c == '+')
+        {
+          m.name = move (w);
+          m.iface = (c == '!');
+        }
+        else
+          m.imports.push_back (module_import {move (w), c == '*', 0});
+      }
+
+      return m;
+    }
+
+    // preprocessed
+    //
+    template <typename T>
+    inline bool
+    operator< (preprocessed l, T r) // Template because of VC14 bug.
+    {
+      return static_cast<uint8_t> (l) < static_cast<uint8_t> (r);
+    }
+
+    preprocessed
+    to_preprocessed (const string& s)
+    {
+      if (s == "none")     return preprocessed::none;
+      if (s == "includes") return preprocessed::includes;
+      if (s == "modules")  return preprocessed::modules;
+      if (s == "all")      return preprocessed::all;
+      throw invalid_argument ("invalid preprocessed value '" + s + "'");
+    }
+
+    struct compile_rule::match_data
+    {
+      explicit
+      match_data (translation_type t, const prerequisite_member& s)
+          : type (t), src (s) {}
+
+      translation_type type;
+      preprocessed pp = preprocessed::none;
+      bool symexport = false;                // Target uses __symexport.
+      bool touch = false;                    // Target needs to be touched.
+      timestamp mt = timestamp_unknown;      // Target timestamp.
+      prerequisite_member src;
+      auto_rmfile psrc;                      // Preprocessed source, if any.
+      path dd;                               // Dependency database path.
+      module_positions mods = {0, 0, 0};
+    };
+
+    compile_rule::
+    compile_rule (data&& d)
+        : common (move (d)),
+          rule_id (string (x) += ".compile 4")
+    {
+      static_assert (sizeof (match_data) <= target::data_size,
+                     "insufficient space");
+    }
+
+    const char* compile_rule::
+    langopt (const match_data& md) const
+    {
+      bool m (md.type == translation_type::module_iface);
+      //preprocessed p (md.pp);
+
+      switch (cid)
+      {
+      case compiler_id::gcc:
+        {
+          // Ignore the preprocessed value since for GCC it is handled via
+          // -fpreprocessed -fdirectives-only.
+          //
+          switch (x_lang)
+          {
+          case lang::c:   return "c";
+          case lang::cxx: return "c++";
+          }
+        }
+      case compiler_id::clang:
+      case compiler_id::clang_apple:
+        {
+          // Clang has *-cpp-output (but not c++-module-cpp-output) and they
+          // handle comments and line continuations. However, currently this
+          // is only by accident since these modes are essentially equivalent
+          // to their cpp-output-less versions.
+          //
+          switch (x_lang)
+          {
+          case lang::c:   return "c";
+          case lang::cxx: return m ? "c++-module" : "c++";
+          }
+        }
+      case compiler_id::msvc:
+        {
+          switch (x_lang)
+          {
+          case lang::c:   return "/TC";
+          case lang::cxx: return "/TP";
+          }
+        }
+      case compiler_id::icc:
+        {
+          switch (x_lang)
+          {
+          case lang::c:   return "c";
+          case lang::cxx: return "c++";
+          }
+        }
+      }
+
+      return nullptr;
+    }
+
+    inline void compile_rule::
+    append_symexport_options (cstrings& args, const target& t) const
+    {
+      // With VC if a BMI is compiled with dllexport, then when such BMI is
+      // imported, it is auto-magically treated as dllimport. Let's hope
+      // other compilers follow suit.
+      //
+      args.push_back (t.is_a<bmis> () && tclass == "windows"
+                      ? "-D__symexport=__declspec(dllexport)"
+                      : "-D__symexport=");
+    }
+
+    bool compile_rule::
+    match (action a, target& t, const string&) const
+    {
+      tracer trace (x, "compile_rule::match");
+
+      bool mod (t.is_a<bmie> () || t.is_a<bmia> () || t.is_a<bmis> ());
+
+      // Link-up to our group (this is the obj/bmi{} target group protocol
+      // which means this can be done whether we match or not).
+      //
+      if (t.group == nullptr)
+        t.group = &search (t,
+                           mod ? bmi::static_type : obj::static_type,
+                           t.dir, t.out, t.name);
+
+      // See if we have a source file. Iterate in reverse so that a source
+      // file specified for a member overrides the one specified for the
+      // group. Also "see through" groups.
+      //
+      for (prerequisite_member p: reverse_group_prerequisite_members (a, t))
+      {
+        if (p.is_a (mod ? *x_mod : x_src))
+        {
+          // Save in the target's auxiliary storage. Translation type will
+          // be refined in apply().
+          //
+          t.data (match_data (mod
+                              ? translation_type::module_iface
+                              : translation_type::plain,
+                              p));
+          return true;
+        }
+      }
+
+      l4 ([&]{trace << "no " << x_lang << " source file for target " << t;});
+      return false;
+    }
+
+    // Append or hash library options from a pair of *.export.* variables
+    // (first one is cc.export.*) recursively, prerequisite libraries first.
+    //
+    void compile_rule::
+    append_lib_options (const scope& bs,
+                        cstrings& args,
+                        action a,
+                        const target& t,
+                        linfo li) const
+    {
+      // See through utility libraries.
+      //
+      auto imp = [] (const file& l, bool la) {return la && l.is_a<libux> ();};
+
+      auto opt = [&args, this] (
+        const file& l, const string& t, bool com, bool exp)
+      {
+        // Note that in our model *.export.poptions are always "interface",
+        // even if set on liba{}/libs{}, unlike loptions.
+        //
+        if (!exp) // Ignore libux.
+          return;
+
+        const variable& var (
+          com
+          ? c_export_poptions
+          : (t == x ? x_export_poptions : var_pool[t + ".export.poptions"]));
+
+        append_options (args, l, var);
+      };
+
+      // In case we don't have the "small function object" optimization.
+      //
+      const function<bool (const file&, bool)> impf (imp);
+      const function<void (const file&, const string&, bool, bool)> optf (opt);
+
+      for (prerequisite_member p: group_prerequisite_members (a, t))
+      {
+        // Should be already searched and matched for libraries.
+        //
+        if (const target* pt = p.load ())
+        {
+          if (const libx* l = pt->is_a<libx> ())
+            pt = &link_member (*l, a, li);
+
+          bool la;
+          if (!((la = pt->is_a<liba> ())  ||
+                (la = pt->is_a<libux> ()) ||
+                pt->is_a<libs> ()))
+            continue;
+
+          process_libraries (a, bs, li, sys_lib_dirs,
+                             pt->as<file> (), la, 0, // Hack: lflags unused.
+                             impf, nullptr, optf);
+        }
+      }
+    }
+
+    void compile_rule::
+    hash_lib_options (const scope& bs,
+                      sha256& cs,
+                      action a,
+                      const target& t,
+                      linfo li) const
+    {
+      auto imp = [] (const file& l, bool la) {return la && l.is_a<libux> ();};
+
+      auto opt = [&cs, this] (
+        const file& l, const string& t, bool com, bool exp)
+      {
+        if (!exp)
+          return;
+
+        const variable& var (
+          com
+          ? c_export_poptions
+          : (t == x ? x_export_poptions : var_pool[t + ".export.poptions"]));
+
+        hash_options (cs, l, var);
+      };
+
+      // The same logic as in append_lib_options().
+      //
+      const function<bool (const file&, bool)> impf (imp);
+      const function<void (const file&, const string&, bool, bool)> optf (opt);
+
+      for (prerequisite_member p: group_prerequisite_members (a, t))
+      {
+        if (const target* pt = p.load ())
+        {
+          if (const libx* l = pt->is_a<libx> ())
+            pt = &link_member (*l, a, li);
+
+          bool la;
+          if (!((la = pt->is_a<liba> ())  ||
+                (la = pt->is_a<libux> ()) ||
+                pt->is_a<libs> ()))
+            continue;
+
+          process_libraries (a, bs, li, sys_lib_dirs,
+                             pt->as<file> (), la, 0, // Hack: lflags unused.
+                             impf, nullptr, optf);
+        }
+      }
+    }
+
+    // Append library prefixes based on the *.export.poptions variables
+    // recursively, prerequisite libraries first.
+    //
+    void compile_rule::
+    append_lib_prefixes (const scope& bs,
+                         prefix_map& m,
+                         action a,
+                         target& t,
+                         linfo li) const
+    {
+      auto imp = [] (const file& l, bool la) {return la && l.is_a<libux> ();};
+
+      auto opt = [&m, this] (
+        const file& l, const string& t, bool com, bool exp)
+      {
+        if (!exp)
+          return;
+
+        const variable& var (
+          com
+          ? c_export_poptions
+          : (t == x ? x_export_poptions : var_pool[t + ".export.poptions"]));
+
+        append_prefixes (m, l, var);
+      };
+
+      // The same logic as in append_lib_options().
+      //
+      const function<bool (const file&, bool)> impf (imp);
+      const function<void (const file&, const string&, bool, bool)> optf (opt);
+
+      for (prerequisite_member p: group_prerequisite_members (a, t))
+      {
+        if (const target* pt = p.load ())
+        {
+          if (const libx* l = pt->is_a<libx> ())
+            pt = &link_member (*l, a, li);
+
+          bool la;
+          if (!((la = pt->is_a<liba> ())  ||
+                (la = pt->is_a<libux> ()) ||
+                pt->is_a<libs> ()))
+            continue;
+
+          process_libraries (a, bs, li, sys_lib_dirs,
+                             pt->as<file> (), la, 0, // Hack: lflags unused.
+                             impf, nullptr, optf);
+        }
+      }
+    }
+
+    // Update the target during the match phase. Return true if it has changed
+    // or if the passed timestamp is not timestamp_unknown and is older than
+    // the target.
+    //
+    // This function is used to make sure header dependencies are up to date.
+    //
+    // There would normally be a lot of headers for every source file (think
+    // all the system headers) and just calling execute_direct() on all of
+    // them can get expensive. At the same time, most of these headers are
+    // existing files that we will never be updating (again, system headers,
+    // for example) and the rule that will match them is the fallback
+    // file_rule. That rule has an optimization: it returns noop_recipe (which
+    // causes the target state to be automatically set to unchanged) if the
+    // file is known to be up to date. So we do the update "smartly".
+    //
+    static bool
+    update (tracer& trace, action a, const target& t, timestamp ts)
+    {
+      const path_target* pt (t.is_a<path_target> ());
+
+      if (pt == nullptr)
+        ts = timestamp_unknown;
+
+      target_state os (t.matched_state (a));
+
+      if (os == target_state::unchanged)
+      {
+        if (ts == timestamp_unknown)
+          return false;
+        else
+        {
+          // We expect the timestamp to be known (i.e., existing file).
+          //
+          timestamp mt (pt->mtime ());
+          assert (mt != timestamp_unknown);
+          return mt > ts;
+        }
+      }
+      else
+      {
+        // We only want to return true if our call to execute() actually
+        // caused an update. In particular, the target could already have been
+        // in target_state::changed because of a dependency extraction run for
+        // some other source file.
+        //
+        // @@ MT perf: so we are going to switch the phase and execute for
+        //    any generated header.
+        //
+        phase_switch ps (run_phase::execute);
+        target_state ns (execute_direct (a, t));
+
+        if (ns != os && ns != target_state::unchanged)
+        {
+          l6 ([&]{trace << "updated " << t
+                        << "; old state " << os
+                        << "; new state " << ns;});
+          return true;
+        }
+        else
+          return ts != timestamp_unknown ? pt->newer (ts) : false;
+      }
+    }
+
+    recipe compile_rule::
+    apply (action a, target& xt) const
+    {
+      tracer trace (x, "compile_rule::apply");
+
+      file& t (xt.as<file> ()); // Either obj*{} or bmi*{}.
+
+      match_data& md (t.data<match_data> ());
+      bool mod (md.type == translation_type::module_iface);
+
+      const scope& bs (t.base_scope ());
+      const scope& rs (*bs.root_scope ());
+
+      otype ot (compile_type (t, mod));
+      linfo li (link_info (bs, ot)); // Link info for selecting libraries.
+      compile_target_types tt (compile_types (ot));
+
+      // Derive file name from target name.
+      //
+      string e; // Primary target extension (module or object).
+      {
+        const char* o ("o"); // Object extension (.o or .obj).
+
+        if (tsys == "win32-msvc")
+        {
+          switch (ot)
+          {
+          case otype::e: e = "exe."; break;
+          case otype::a: e = "lib."; break;
+          case otype::s: e = "dll."; break;
+          }
+          o = "obj";
+        }
+        else if (tsys == "mingw32")
+        {
+          switch (ot)
+          {
+          case otype::e: e = "exe."; break;
+          case otype::a: e = "a.";   break;
+          case otype::s: e = "dll."; break;
+          }
+        }
+        else if (tsys == "darwin")
+        {
+          switch (ot)
+          {
+          case otype::e: e = "";       break;
+          case otype::a: e = "a.";     break;
+          case otype::s: e = "dylib."; break;
+          }
+        }
+        else
+        {
+          switch (ot)
+          {
+          case otype::e: e = "";    break;
+          case otype::a: e = "a.";  break;
+          case otype::s: e = "so."; break;
+          }
+        }
+
+        switch (cid)
+        {
+        case compiler_id::gcc:
+          {
+            e += mod ? "nms" : o;
+            break;
+          }
+        case compiler_id::clang:
+        case compiler_id::clang_apple:
+          {
+            e += mod ? "pcm" : o;
+            break;
+          }
+        case compiler_id::msvc:
+          {
+            e += mod ? "ifc" : o;
+            break;
+          }
+        case compiler_id::icc:
+          {
+            assert (!mod);
+            e += o;
+          }
+        }
+
+        // If we are compiling a module, then the obj*{} is an ad hoc member
+        // of bmi*{}.
+        //
+        if (mod)
+        {
+          // The module interface unit can be the same as an implementation
+          // (e.g., foo.mxx and foo.cxx) which means obj*{} targets could
+          // collide. So we add the module extension to the target name.
+          //
+          target_lock obj (add_adhoc_member (a, t, tt.obj, e.c_str ()));
+          obj.target->as<file> ().derive_path (o);
+          match_recipe (obj, group_recipe); // Set recipe and unlock.
+        }
+      }
+
+      const path& tp (t.derive_path (e.c_str ()));
+
+      // Inject dependency on the output directory.
+      //
+      const fsdir* dir (inject_fsdir (a, t));
+
+      // Match all the existing prerequisites. The injection code takes care
+      // of the ones it is adding.
+      //
+      // When cleaning, ignore prerequisites that are not in the same or a
+      // subdirectory of our project root.
+      //
+      auto& pts (t.prerequisite_targets[a]);
+      optional<dir_paths> usr_lib_dirs; // Extract lazily.
+
+      // Start asynchronous matching of prerequisites. Wait with unlocked
+      // phase to allow phase switching.
+      //
+      wait_guard wg (target::count_busy (), t[a].task_count, true);
+
+      size_t start (pts.size ()); // Index of the first to be added.
+      for (prerequisite_member p: group_prerequisite_members (a, t))
+      {
+        const target* pt (nullptr);
+
+        // A dependency on a library is there so that we can get its
+        // *.export.poptions, modules, etc. This is the "library
+        // meta-information protocol". See also append_lib_options().
+        //
+        if (p.is_a<libx> () ||
+            p.is_a<liba> () ||
+            p.is_a<libs> () ||
+            p.is_a<libux> ())
+        {
+          if (a.operation () == update_id)
+          {
+            // Handle (phase two) imported libraries. We know that for such
+            // libraries we don't need to do match() in order to get options
+            // (if any, they would be set by search_library()).
+            //
+            if (p.proj ())
+            {
+              if (search_library (a,
+                                  sys_lib_dirs,
+                                  usr_lib_dirs,
+                                  p.prerequisite) != nullptr)
+                continue;
+            }
+
+            pt = &p.search (t);
+
+            if (const libx* l = pt->is_a<libx> ())
+              pt = &link_member (*l, a, li);
+          }
+          else
+            continue;
+        }
+        //
+        // For modules we pick only what we import which is done below so
+        // skip it here. One corner case is clean: we assume that someone
+        // else (normally library/executable) also depends on it and will
+        // clean it up.
+        //
+        else if (p.is_a<bmi> () || p.is_a (tt.bmi))
+          continue;
+        else
+        {
+          pt = &p.search (t);
+
+          if (a.operation () == clean_id && !pt->dir.sub (rs.out_path ()))
+            continue;
+        }
+
+        match_async (a, *pt, target::count_busy (), t[a].task_count);
+        pts.push_back (pt);
+      }
+
+      wg.wait ();
+
+      // Finish matching all the targets that we have started.
+      //
+      for (size_t i (start), n (pts.size ()); i != n; ++i)
+      {
+        const target*& pt (pts[i]);
+
+        // Making sure a library is updated before us will only restrict
+        // parallelism. But we do need to match it in order to get its imports
+        // resolved and prerequisite_targets populated. So we match it but
+        // then unmatch if it is safe. And thanks to the two-pass prerequisite
+        // match in link::apply() it will be safe unless someone is building
+        // an obj?{} target directory.
+        //
+        if (build2::match (
+              a,
+              *pt,
+              pt->is_a<liba> () || pt->is_a<libs> () || pt->is_a<libux> ()
+              ? unmatch::safe
+              : unmatch::none))
+          pt = nullptr; // Ignore in execute.
+      }
+
+      // Inject additional prerequisites. We only do it when performing update
+      // since chances are we will have to update some of our prerequisites in
+      // the process (auto-generated source code).
+      //
+      if (a == perform_update_id)
+      {
+        // The cached prerequisite target should be the same as what is in
+        // t.prerequisite_targets since we used standard search() and match()
+        // above.
+        //
+        const file& src (*md.src.search (t).is_a<file> ());
+
+        // Figure out if __symexport is used. While normally it is specified
+        // on the project root (which we cached), it can be overridden with
+        // a target-specific value for installed modules (which we sidebuild
+        // as part of our project).
+        //
+        if (modules && src.is_a (*x_mod))
+        {
+          lookup l (src.vars[x_symexport]);
+          md.symexport = l ? cast<bool> (l) : symexport;
+        }
+
+        // Make sure the output directory exists.
+        //
+        // Is this the right thing to do? It does smell a bit, but then we do
+        // worse things in inject_prerequisites() below. There is also no way
+        // to postpone this until update since we need to extract and inject
+        // header dependencies now (we don't want to be calling search() and
+        // match() in update), which means we need to cache them now as well.
+        // So the only alternative, it seems, is to cache the updates to the
+        // database until later which will sure complicate (and slow down)
+        // things.
+        //
+        if (dir != nullptr)
+        {
+          // We can do it properly by using execute_direct(). But this means
+          // we will be switching to the execute phase with all the associated
+          // overheads. At the same time, in case of update, creation of a
+          // directory is not going to change the external state in any way
+          // that would affect any parallel efforts in building the internal
+          // state. So we are just going to create the directory directly.
+          // Note, however, that we cannot modify the fsdir{} target since
+          // this can very well be happening in parallel. But that's not a
+          // problem since fsdir{}'s update is idempotent.
+          //
+          fsdir_rule::perform_update_direct (a, t);
+        }
+
+        // Note: the leading '@' is reserved for the module map prefix (see
+        // extract_modules()) and no other line must start with it.
+        //
+        md.dd = tp + ".d";
+        depdb dd (md.dd);
+
+        // First should come the rule name/version.
+        //
+        if (dd.expect (rule_id) != nullptr)
+          l4 ([&]{trace << "rule mismatch forcing update of " << t;});
+
+        // Then the compiler checksum. Note that here we assume it
+        // incorporates the (default) target so that if the compiler changes
+        // but only in what it targets, then the checksum will still change.
+        //
+        if (dd.expect (cast<string> (rs[x_checksum])) != nullptr)
+          l4 ([&]{trace << "compiler mismatch forcing update of " << t;});
+
+        // Then the options checksum.
+        //
+        // The idea is to keep them exactly as they are passed to the compiler
+        // since the order may be significant.
+        //
+        {
+          sha256 cs;
+
+          // These flags affect how we compile the source and/or the format of
+          // depdb so factor them in.
+          //
+          cs.append (&md.pp, sizeof (md.pp));
+          cs.append (&md.symexport, sizeof (md.symexport));
+
+          if (md.pp != preprocessed::all)
+          {
+            hash_options (cs, t, c_poptions);
+            hash_options (cs, t, x_poptions);
+
+            // Hash *.export.poptions from prerequisite libraries.
+            //
+            hash_lib_options (bs, cs, a, t, li);
+
+            // Extra system header dirs (last).
+            //
+            assert (sys_inc_dirs_extra <= sys_inc_dirs.size ());
+            hash_option_values (
+              cs, "-I",
+              sys_inc_dirs.begin () + sys_inc_dirs_extra, sys_inc_dirs.end (),
+              [] (const dir_path& d) {return d.string ();});
+          }
+
+          hash_options (cs, t, c_coptions);
+          hash_options (cs, t, x_coptions);
+          hash_options (cs, tstd);
+
+          if (ot == otype::s)
+          {
+            // On Darwin, Win32 -fPIC is the default.
+            //
+            if (tclass == "linux" || tclass == "bsd")
+              cs.append ("-fPIC");
+          }
+
+          if (dd.expect (cs.string ()) != nullptr)
+            l4 ([&]{trace << "options mismatch forcing update of " << t;});
+        }
+
+        // Finally the source file.
+        //
+        if (dd.expect (src.path ()) != nullptr)
+          l4 ([&]{trace << "source file mismatch forcing update of " << t;});
+
+        // If any of the above checks resulted in a mismatch (different
+        // compiler, options, or source file) or if the depdb is newer than
+        // the target (interrupted update), then do unconditional update.
+        //
+        timestamp mt;
+        bool u (dd.writing () || dd.mtime () > (mt = file_mtime (tp)));
+        if (u)
+          mt = timestamp_nonexistent; // Treat as if it doesn't exist.
+
+        // Update prerequisite targets (normally just the source file).
+        //
+        // This is an unusual place and time to do it. But we have to do it
+        // before extracting dependencies. The reasoning for source file is
+        // pretty clear. What other prerequisites could we have? While
+        // normally they will be some other sources (as in, static content
+        // from src_root), it's possible they are some auto-generated stuff.
+        // And it's possible they affect the preprocessor result. Say some ad
+        // hoc/out-of-band compiler input file that is passed via the command
+        // line. So, to be safe, we make sure everything is up to date.
+        //
+        for (const target* pt: pts)
+        {
+          if (pt == nullptr || pt == dir)
+            continue;
+
+          u = update (trace, a, *pt, u ? timestamp_unknown : mt) || u;
+        }
+
+        // Check if the source is already preprocessed to a certain degree.
+        // This determines which of the following steps we perform and on
+        // what source (original or preprocessed).
+        //
+        // Note: must be set on the src target.
+        //
+        if (const string* v = cast_null<string> (src[x_preprocessed]))
+        try
+        {
+          md.pp = to_preprocessed (*v);
+        }
+        catch (const invalid_argument& e)
+        {
+          fail << "invalid " << x_preprocessed.name << " variable value "
+               << "for target " << src << ": " << e;
+        }
+
+        // If we have no #include directives, then skip header dependency
+        // extraction.
+        //
+        pair<auto_rmfile, bool> psrc (auto_rmfile (), false);
+        if (md.pp < preprocessed::includes)
+          psrc = extract_headers (a, bs, t, li, src, md, dd, u, mt);
+
+        // Next we "obtain" the translation unit information. What exactly
+        // "obtain" entails is tricky: If things changed, then we re-parse the
+        // translation unit. Otherwise, we re-create this information from
+        // depdb. We, however, have to do it here and now in case the database
+        // is invalid and we still have to fallback to re-parse.
+        //
+        // Store a translation unit's checksum to detect ignorable changes
+        // (whitespaces, comments, etc).
+        //
+        {
+          string cs;
+          if (string* l = dd.read ())
+            cs = move (*l);
+          else
+            u = true; // Database is invalid, force re-parse.
+
+          translation_unit tu;
+          for (bool f (true);; f = false)
+          {
+            if (u)
+            {
+              auto p (parse_unit (a, t, li, src, psrc.first, md));
+
+              if (cs != p.second)
+              {
+                assert (f); // Unchanged TU has a different checksum?
+                dd.write (p.second);
+              }
+              else if (f) // Don't clear if it was forced.
+              {
+                // Clear the update flag and set the touch flag. Unless there
+                // is no object file, of course. See also the md.mt logic
+                // below.
+                //
+                if (mt != timestamp_nonexistent)
+                {
+                  u = false;
+                  md.touch = true;
+                }
+              }
+
+              tu = move (p.first);
+            }
+
+            if (modules)
+            {
+              if (u || !f)
+              {
+                string s (to_string (tu.mod));
+
+                if (f)
+                  dd.expect (s);
+                else
+                  dd.write (s);
+              }
+              else
+              {
+                if (string* l = dd.read ())
+                  tu.mod = to_module_info (*l);
+                else
+                {
+                  u = true; // Database is invalid, force re-parse.
+                  continue;
+                }
+              }
+            }
+
+            break;
+          }
+
+          // Make sure the translation unit type matches the resulting target
+          // type.
+          //
+          switch (tu.type ())
+          {
+          case translation_type::plain:
+          case translation_type::module_impl:
+            {
+              if (mod)
+                fail << "translation unit " << src << " is not a module interface" <<
+                  info << "consider using " << x_src.name << "{} instead";
+              break;
+            }
+          case translation_type::module_iface:
+            {
+              if (!mod)
+                fail << "translation unit " << src << " is a module interface" <<
+                  info << "consider using " << x_mod->name << "{} instead";
+              break;
+            }
+          }
+
+          md.type = tu.type ();
+
+          // Extract the module dependency information in addition to header
+          // dependencies.
+          //
+          // NOTE: assumes that no further targets will be added into
+          //       t.prerequisite_targets!
+          //
+          extract_modules (a, bs, t, li, tt, src, md, move (tu.mod), dd, u);
+        }
+
+        // If anything got updated, then we didn't rely on the cache. However,
+        // the cached data could actually have been valid and the compiler run
+        // in extract_headers() as well as the code above merely validated it.
+        //
+        // We do need to update the database timestamp, however. Failed that,
+        // we will keep re-validating the cached data over and over again.
+        //
+        if (u && dd.reading ())
+          dd.touch ();
+
+        dd.close ();
+
+        // If the preprocessed output is suitable for compilation and is not
+        // disabled, then pass it along.
+        //
+        if (psrc.second && !cast_false<bool> (t[c_reprocess]))
+        {
+          md.psrc = move (psrc.first);
+
+          // Without modules keeping the (partially) preprocessed output
+          // around doesn't buy us much: if the source/headers haven't changed
+          // then neither will the object file. Modules make things more
+          // interesting: now we may have to recompile an otherwise unchanged
+          // translation unit because a BMI it depends on has changed. In this
+          // case re-processing the translation unit would be a waste and
+          // compiling the original source would break distributed
+          // compilation.
+          //
+          // Note also that the long term trend will (hopefully) be for
+          // modularized projects to get rid of #include's which means the
+          // need for producing this partially preprocessed output will
+          // (hopefully) gradually disappear.
+          //
+          if (modules)
+            md.psrc.active = false; // Keep.
+        }
+
+        // Above we may have ignored changes to the translation unit. The
+        // problem is, unless we also update the target's timestamp, we will
+        // keep re-checking this on subsequent runs and it is not cheap.
+        // Updating the target's timestamp is not without problems either: it
+        // will cause a re-link on a subsequent run. So, essentially, we
+        // somehow need to remember two timestamps: one for checking
+        // "preprocessor prerequisites" above and one for checking other
+        // prerequisites (like modules) below. So what we are going to do is
+        // store the first in the target file (so we do touch it) and the
+        // second in depdb (which is never newer that the target).
+        //
+        md.mt = u ? timestamp_nonexistent : dd.mtime ();
+      }
+
+      switch (a)
+      {
+      case perform_update_id: return [this] (action a, const target& t)
+        {
+          return perform_update (a, t);
+        };
+      case perform_clean_id: return [this] (action a, const target& t)
+        {
+          return perform_clean (a, t);
+        };
+      default: return noop_recipe; // Configure update.
+      }
+    }
+
+    // Reverse-lookup target type from extension.
+    //
+    const target_type* compile_rule::
+    map_extension (const scope& s, const string& n, const string& e) const
+    {
+      // We will just have to try all of the possible ones, in the "most
+      // likely to match" order.
+      //
+      auto test = [&s, &n, &e] (const target_type& tt) -> bool
+      {
+        // Call the extension derivation function. Here we know that it will
+        // only use the target type and name from the target key so we can
+        // pass bogus values for the rest.
+        //
+        target_key tk {&tt, nullptr, nullptr, &n, nullopt};
+
+        // This is like prerequisite search.
+        //
+        if (optional<string> de = tt.default_extension (tk, s, true))
+          if (*de == e)
+            return true;
+
+        return false;
+      };
+
+      for (const target_type* const* p (x_inc); *p != nullptr; ++p)
+        if (test (**p)) return *p;
+
+      return nullptr;
+    }
+
+    void compile_rule::
+    append_prefixes (prefix_map& m, const target& t, const variable& var) const
+    {
+      tracer trace (x, "compile_rule::append_prefixes");
+
+      // If this target does not belong to any project (e.g, an "imported as
+      // installed" library), then it can't possibly generate any headers for
+      // us.
+      //
+      const scope& bs (t.base_scope ());
+      const scope* rs (bs.root_scope ());
+      if (rs == nullptr)
+        return;
+
+      const dir_path& out_base (t.dir);
+      const dir_path& out_root (rs->out_path ());
+
+      if (auto l = t[var])
+      {
+        const auto& v (cast<strings> (l));
+
+        for (auto i (v.begin ()), e (v.end ()); i != e; ++i)
+        {
+          // -I can either be in the "-Ifoo" or "-I foo" form. For VC it can
+          // also be /I.
+          //
+          const string& o (*i);
+
+          if (o.size () < 2 || (o[0] != '-' && o[0] != '/') || o[1] != 'I')
+            continue;
+
+          dir_path d;
+          if (o.size () == 2)
+          {
+            if (++i == e)
+              break; // Let the compiler complain.
+
+            d = dir_path (*i);
+          }
+          else
+            d = dir_path (*i, 2, string::npos);
+
+          l6 ([&]{trace << "-I " << d;});
+
+          if (d.relative ())
+            fail << "relative -I directory " << d
+                 << " in variable " << var.name
+                 << " for target " << t;
+
+          // If we are not inside our project root, then ignore.
+          //
+          if (!d.sub (out_root))
+            continue;
+
+          // If the target directory is a sub-directory of the include
+          // directory, then the prefix is the difference between the
+          // two. Otherwise, leave it empty.
+          //
+          // The idea here is to make this "canonical" setup work auto-
+          // magically:
+          //
+          // 1. We include all files with a prefix, e.g., <foo/bar>.
+          // 2. The library target is in the foo/ sub-directory, e.g.,
+          //    /tmp/foo/.
+          // 3. The poptions variable contains -I/tmp.
+          //
+          dir_path p (out_base.sub (d) ? out_base.leaf (d) : dir_path ());
+
+          // We use the target's directory as out_base but that doesn't work
+          // well for targets that are stashed in subdirectories. So as a
+          // heuristics we are going to also enter the outer directories of
+          // the original prefix. It is, however, possible, that another -I
+          // option after this one will produce one of these outer prefixes as
+          // its original prefix in which case we should override it.
+          //
+          // So we are going to assign the original prefix priority value 0
+          // (highest) and then increment it for each outer prefix.
+          //
+          auto enter = [&trace, &m] (dir_path p, dir_path d, size_t prio)
+          {
+            auto j (m.find (p));
+
+            if (j != m.end ())
+            {
+              prefix_value& v (j->second);
+
+              // We used to reject duplicates but it seems this can be
+              // reasonably expected to work according to the order of the
+              // -I options.
+              //
+              // Seeing that we normally have more "specific" -I paths first,
+              // (so that we don't pick up installed headers, etc), we ignore
+              // it.
+              //
+              if (v.directory == d)
+              {
+                if (v.priority > prio)
+                  v.priority = prio;
+              }
+              else if (v.priority <= prio)
+              {
+                if (verb >= 4)
+                  trace << "ignoring dependency prefix " << p << '\n'
+                        << "  existing mapping to " << v.directory
+                        << " priority " << v.priority << '\n'
+                        << "  another mapping to  " << d
+                        << " priority " << prio;
+              }
+              else
+              {
+                if (verb >= 4)
+                  trace << "overriding dependency prefix " << p << '\n'
+                        << "  existing mapping to " << v.directory
+                        << " priority " << v.priority << '\n'
+                        << "  new mapping to      " << d
+                        << " priority " << prio;
+
+                v.directory = move (d);
+                v.priority = prio;
+              }
+            }
+            else
+            {
+              l6 ([&]{trace << p << " -> " << d << " priority " << prio;});
+              m.emplace (move (p), prefix_value {move (d), prio});
+            }
+          };
+
+          size_t prio (0);
+          for (bool e (false); !e; ++prio)
+          {
+            dir_path n (p.directory ());
+            e = n.empty ();
+            enter ((e ? move (p) : p), (e ? move (d) : d), prio);
+            p = move (n);
+          }
+        }
+      }
+    }
+
+    auto compile_rule::
+    build_prefix_map (const scope& bs,
+                      action a,
+                      target& t,
+                      linfo li) const -> prefix_map
+    {
+      prefix_map m;
+
+      // First process our own.
+      //
+      append_prefixes (m, t, c_poptions);
+      append_prefixes (m, t, x_poptions);
+
+      // Then process the include directories from prerequisite libraries.
+      //
+      append_lib_prefixes (bs, m, a, t, li);
+
+      return m;
+    }
+
+    // Return the next make prerequisite starting from the specified
+    // position and update position to point to the start of the
+    // following prerequisite or l.size() if there are none left.
+    //
+    static string
+    next_make (const string& l, size_t& p)
+    {
+      size_t n (l.size ());
+
+      // Skip leading spaces.
+      //
+      for (; p != n && l[p] == ' '; p++) ;
+
+      // Lines containing multiple prerequisites are 80 characters max.
+      //
+      string r;
+      r.reserve (n);
+
+      // Scan the next prerequisite while watching out for escape sequences.
+      //
+      for (; p != n && l[p] != ' '; p++)
+      {
+        char c (l[p]);
+
+        if (p + 1 != n)
+        {
+          if (c == '$')
+          {
+            // Got to be another (escaped) '$'.
+            //
+            if (l[p + 1] == '$')
+              ++p;
+          }
+          else if (c == '\\')
+          {
+            // This may or may not be an escape sequence depending on whether
+            // what follows is "escapable".
+            //
+            switch (c = l[++p])
+            {
+            case '\\': break;
+            case ' ': break;
+            default: c = '\\'; --p; // Restore.
+            }
+          }
+        }
+
+        r += c;
+      }
+
+      // Skip trailing spaces.
+      //
+      for (; p != n && l[p] == ' '; p++) ;
+
+      // Skip final '\'.
+      //
+      if (p == n - 1 && l[p] == '\\')
+        p++;
+
+      return r;
+    }
+
+    // VC /showIncludes output. The first line is the file being compiled
+    // (handled by our caller). Then we have the list of headers, one per
+    // line, in this form (text can presumably be translated):
+    //
+    // Note: including file: C:\Program Files (x86)\[...]\iostream
+    //
+    // Finally, if we hit a non-existent header, then we end with an error
+    // line in this form:
+    //
+    // x.cpp(3): fatal error C1083: Cannot open include file: 'd/h.hpp':
+    // No such file or directory
+    //
+    // Distinguishing between the include note and the include error is
+    // easy: we can just check for C1083. Distinguising between the note and
+    // other errors/warnings is harder: an error could very well end with
+    // what looks like a path so we cannot look for the note but rather have
+    // to look for an error. Here we assume that a line containing ' CNNNN:'
+    // is an error. Should be robust enough in the face of language
+    // translation, etc.
+    //
+    // It turns out C1083 is also used when we are unable to open the main
+    // source file and the error line looks like this:
+    //
+    // c1xx: fatal error C1083: Cannot open source file: 's.cpp': No such
+    // file or directory
+
+    // Sense whether this is an include note (return npos) or a diagnostics
+    // line (return postion of the NNNN code in CNNNN).
+    //
+    static inline size_t
+    next_show_sense (const string& l)
+    {
+      size_t p (l.find (':'));
+
+      for (size_t n (l.size ());
+           p != string::npos;
+           p = ++p != n ? l.find (':', p) : string::npos)
+      {
+        auto isnum = [](char c) {return c >= '0' && c <= '9';};
+
+        if (p > 5 &&
+            l[p - 6] == ' '  &&
+            l[p - 5] == 'C'  &&
+            isnum (l[p - 4]) &&
+            isnum (l[p - 3]) &&
+            isnum (l[p - 2]) &&
+            isnum (l[p - 1]))
+        {
+          p -= 4; // Start of the error code.
+          break;
+        }
+      }
+
+      return p;
+    }
+
+    // Extract the include path from the VC /showIncludes output line. Return
+    // empty string if the line is not an include note or include error. Set
+    // the good_error flag if it is an include error (which means the process
+    // will terminate with the error status that needs to be ignored).
+    //
+    static string
+    next_show (const string& l, bool& good_error)
+    {
+      // The include error should be the last line that we handle.
+      //
+      assert (!good_error);
+
+      size_t p (next_show_sense (l));
+      if (p == string::npos)
+      {
+        // Include note. We assume the path is always at the end but need to
+        // handle both absolute Windows and POSIX ones.
+        //
+        // Note that VC appears to always write the absolute path to the
+        // included file even if it is ""-included and the source path is
+        // relative. Aren't we lucky today?
+        //
+        p = l.rfind (':');
+
+        if (p != string::npos)
+        {
+          // See if this one is part of the Windows drive letter.
+          //
+          if (p > 1 && p + 1 < l.size () && // 2 chars before, 1 after.
+              l[p - 2] == ' '            &&
+              alpha (l[p - 1])           &&
+              path::traits::is_separator (l[p + 1]))
+            p = l.rfind (':', p - 2);
+        }
+
+        if (p != string::npos)
+        {
+          // VC uses indentation to indicate the include nesting so there
+          // could be any number of spaces after ':'. Skip them.
+          //
+          p = l.find_first_not_of (' ', p + 1);
+        }
+
+        if (p == string::npos)
+          fail << "unable to parse /showIncludes include note line";
+
+        return string (l, p);
+      }
+      else if (l.compare (p, 4, "1083")  == 0 &&
+               l.compare (0, 5, "c1xx:") != 0 /* Not the main source file. */ )
+      {
+        // Include error. The path is conveniently quoted with ''.
+        //
+        size_t p2 (l.rfind ('\''));
+
+        if (p2 != string::npos && p2 != 0)
+        {
+          size_t p1 (l.rfind ('\'', p2 - 1));
+
+          if (p1 != string::npos)
+          {
+            good_error = true;
+            return string (l, p1 + 1 , p2 - p1 - 1);
+          }
+        }
+
+        fail << "unable to parse /showIncludes include error line" << endf;
+      }
+      else
+      {
+        // Some other error.
+        //
+        return string ();
+      }
+    }
+
+    // Extract and inject header dependencies. Return the preprocessed source
+    // file as well as an indication if it is usable for compilation (see
+    // below for details).
+    //
+    pair<auto_rmfile, bool> compile_rule::
+    extract_headers (action a,
+                     const scope& bs,
+                     file& t,
+                     linfo li,
+                     const file& src,
+                     const match_data& md,
+                     depdb& dd,
+                     bool& updating,
+                     timestamp mt) const
+    {
+      tracer trace (x, "compile_rule::extract_headers");
+
+      l5 ([&]{trace << "target: " << t;});
+
+      auto_rmfile psrc;
+      bool puse (true);
+
+      // If things go wrong (and they often do in this area), give the user a
+      // bit extra context.
+      //
+      auto df = make_diag_frame (
+        [&src](const diag_record& dr)
+        {
+          if (verb != 0)
+            dr << info << "while extracting header dependencies from " << src;
+        });
+
+      const scope& rs (*bs.root_scope ());
+
+      // Preprocess mode that preserves as much information as possible while
+      // still performing inclusions. Also serves as a flag indicating whether
+      // this compiler uses the separate preprocess and compile setup.
+      //
+      const char* pp (nullptr);
+
+      switch (cid)
+      {
+      case compiler_id::gcc:
+        {
+          // -fdirectives-only is available since GCC 4.3.0.
+          //
+          if (cmaj > 4 || (cmaj == 4 && cmin >= 3))
+            pp = "-fdirectives-only";
+
+          break;
+        }
+      case compiler_id::clang:
+      case compiler_id::clang_apple:
+        {
+          // -frewrite-includes is available since vanilla Clang 3.2.0.
+          //
+          // Apple Clang 5.0 is based on LLVM 3.3svn so it should have this
+          // option (4.2 is based on 3.2svc so it may or may not have it and,
+          // no, we are not going to try to find out).
+          //
+          if (cid == compiler_id::clang_apple
+              ? (cmaj >= 5)
+              : (cmaj > 3 || (cmaj == 3 && cmin >= 2)))
+            pp = "-frewrite-includes";
+
+          break;
+        }
+      case compiler_id::msvc:
+        {
+          pp = "/C";
+          break;
+        }
+      case compiler_id::icc:
+        break;
+      }
+
+      // Initialize lazily, only if required.
+      //
+      environment env;
+      cstrings args;
+      string out; // Storage.
+
+      // Some compilers in certain modes (e.g., when also producing the
+      // preprocessed output) are incapable of writing the dependecy
+      // information to stdout. In this case we use a temporary file.
+      //
+      auto_rmfile drm;
+
+      // Here is the problem: neither GCC nor Clang allow -MG (treat missing
+      // header as generated) when we produce any kind of other output (-MD).
+      // And that's probably for the best since otherwise the semantics gets
+      // pretty hairy (e.g., what is the exit code and state of the output)?
+      //
+      // One thing to note about generated headers: if we detect one, then,
+      // after generating it, we re-run the compiler since we need to get
+      // this header's dependencies.
+      //
+      // So this is how we are going to work around this problem: we first run
+      // with -E but without -MG. If there are any errors (maybe because of
+      // generated headers maybe not), we restart with -MG and without -E. If
+      // this fixes the error (so it was a generated header after all), then
+      // we have to restart at which point we go back to -E and no -MG. And we
+      // keep yo-yoing like this. Missing generated headers will probably be
+      // fairly rare occurrence so this shouldn't be too expensive.
+      //
+      // Actually, there is another error case we would like to handle: an
+      // outdated generated header that is now causing an error (e.g., because
+      // of a check that is now triggering #error or some such). So there are
+      // actually three error cases: outdated generated header, missing
+      // generated header, and some other error. To handle the outdated case
+      // we need the compiler to produce the dependency information even in
+      // case of an error. Clang does it, for VC we parse diagnostics
+      // ourselves, but GCC does not (but a patch has been submitted).
+      //
+      // So the final plan is then as follows:
+      //
+      // 1. Start wothout -MG and with suppressed diagnostics.
+      // 2. If error but we've updated a header, then repeat step 1.
+      // 3. Otherwise, restart with -MG and diagnostics.
+      //
+      // Note that below we don't even check if the compiler supports the
+      // dependency info on error. We just try to use it and if it's not
+      // there we ignore the io error since the compiler has failed.
+      //
+      bool args_gen; // Current state of args.
+      size_t args_i; // Start of the -M/-MD "tail".
+
+      // Ok, all good then? Not so fast, the rabbit hole is deeper than it
+      // seems: When we run with -E we have to discard diagnostics. This is
+      // not a problem for errors since they will be shown on the re-run but
+      // it is for (preprocessor) warnings.
+      //
+      // Clang's -frewrite-includes is nice in that it preserves the warnings
+      // so they will be shown during the compilation of the preprocessed
+      // source. They are also shown during -E but that we discard. And unlike
+      // GCC, in Clang -M does not imply -w (disable warnings) so it would
+      // have been shown in -M -MG re-runs but we suppress that with explicit
+      // -w. All is good in the Clang land then (even -Werror works nicely).
+      //
+      // GCC's -fdirective-only, on the other hand, processes all the
+      // directives so they are gone from the preprocessed source. Here is
+      // what we are going to do to work around this: we will detect if any
+      // diagnostics has been written to stderr on the -E run. If that's the
+      // case (but the compiler indicated success) then we assume they are
+      // warnings and disable the use of the preprocessed output for
+      // compilation. This in turn will result in compilation from source
+      // which will display the warnings. Note that we may still use the
+      // preprocessed output for other things (e.g., C++ module dependency
+      // discovery). BTW, another option would be to collect all the
+      // diagnostics and then dump it if the run is successful, similar to
+      // the VC semantics (and drawbacks) described below.
+      //
+      // Finally, for VC, things are completely different: there is no -MG
+      // equivalent and we handle generated headers by analyzing the
+      // diagnostics. This means that unlike in the above two cases, the
+      // preprocessor warnings are shown during dependency extraction, not
+      // compilation. Not ideal but that's the best we can do. Or is it -- we
+      // could implement ad hoc diagnostics sensing... It appears warnings are
+      // in the C4000-C4999 code range though there can also be note lines
+      // which don't have any C-code.
+      //
+      // BTW, triggering a warning in the VC preprocessor is not easy; there
+      // is no #warning and pragmas are passed through to the compiler. One
+      // way to do it is to redefine a macro, for example:
+      //
+      // hello.cxx(4): warning C4005: 'FOO': macro redefinition
+      // hello.cxx(3): note: see previous definition of 'FOO'
+      //
+      // So seeing that it is hard to trigger a legitimate VC preprocessor
+      // warning, for now, we will just treat them as errors by adding /WX.
+      //
+      // Note: diagnostics sensing is currently only supported if dependency
+      // info is written to a file (see above).
+      //
+      bool sense_diag (false);
+
+      // And here is another problem: if we have an already generated header
+      // in src and the one in out does not yet exist, then the compiler will
+      // pick the one in src and we won't even notice. Note that this is not
+      // only an issue with mixing in- and out-of-tree builds (which does feel
+      // wrong but is oh so convenient): this is also a problem with
+      // pre-generated headers, a technique we use to make installing the
+      // generator by end-users optional by shipping pre-generated headers.
+      //
+      // This is a nasty problem that doesn't seem to have a perfect solution
+      // (except, perhaps, C++ modules). So what we are going to do is try to
+      // rectify the situation by detecting and automatically remapping such
+      // mis-inclusions. It works as follows.
+      //
+      // First we will build a map of src/out pairs that were specified with
+      // -I. Here, for performance and simplicity, we will assume that they
+      // always come in pairs with out first and src second. We build this
+      // map lazily only if we are running the preprocessor and reuse it
+      // between restarts.
+      //
+      // With the map in hand we can then check each included header for
+      // potentially having a doppelganger in the out tree. If this is the
+      // case, then we calculate a corresponding header in the out tree and,
+      // (this is the most important part), check if there is a target for
+      // this header in the out tree. This should be fairly accurate and not
+      // require anything explicit from the user except perhaps for a case
+      // where the header is generated out of nothing (so there is no need to
+      // explicitly mention its target in the buildfile). But this probably
+      // won't be very common.
+      //
+      // One tricky area in this setup are target groups: if the generated
+      // sources are mentioned in the buildfile as a group, then there might
+      // be no header target (yet). The way we solve this is by requiring code
+      // generator rules to cooperate and create at least the header target as
+      // part of the group creation. While not all members of the group may be
+      // generated depending on the options (e.g., inline files might be
+      // suppressed), headers are usually non-optional.
+      //
+      // Note that we use path_map instead of dir_path_map to allow searching
+      // using path (file path).
+      //
+      using srcout_map = path_map<dir_path>;
+      srcout_map so_map;
+
+      // The gen argument to init_args() is in/out. The caller signals whether
+      // to force the generated header support and on return it signals
+      // whether this support is enabled. The first call to init_args is
+      // expected to have gen false.
+      //
+      // Return NULL if the dependency information goes to stdout and a
+      // pointer to the temporary file path otherwise.
+      //
+      auto init_args = [&t, a, li,
+                        &src, &md, &psrc, &sense_diag,
+                        &rs, &bs,
+                        pp, &env, &args, &args_gen, &args_i, &out, &drm,
+                        &so_map, this]
+        (bool& gen) -> const path*
+      {
+        const path* r (nullptr);
+
+        if (args.empty ()) // First call.
+        {
+          assert (!gen);
+
+          // We use absolute/relative paths in the dependency output to
+          // distinguish existing headers from (missing) generated. Which
+          // means we have to (a) use absolute paths in -I and (b) pass
+          // absolute source path (for ""-includes). That (b) is a problem:
+          // if we use an absolute path, then all the #line directives will be
+          // absolute and all the diagnostics will have long, noisy paths
+          // (actually, we will still have long paths for diagnostics in
+          // headers).
+          //
+          // To work around this we used to pass a relative path to the source
+          // file and then check every relative path in the dependency output
+          // for existence in the source file's directory. This is not without
+          // issues: it is theoretically possible for a generated header that
+          // is <>-included and found via -I to exist in the source file's
+          // directory. Note, however, that this is a lot more likely to
+          // happen with prefix-less inclusion (e.g., <foo>) and in this case
+          // we assume the file is in the project anyway. And if there is a
+          // conflict with a prefixed include (e.g., <bar/foo>), then, well,
+          // we will just have to get rid of quoted includes (which are
+          // generally a bad idea, anyway).
+          //
+          // But then this approach (relative path) fell apart further when we
+          // tried to implement precise changed detection: the preprocessed
+          // output would change depending from where it was compiled because
+          // of #line (which we could work around) and __FILE__/assert()
+          // (which we can't really do anything about). So it looks like using
+          // the absolute path is the lesser of all the evils (and there are
+          // many).
+          //
+          // Note that we detect and diagnose relative -I directories lazily
+          // when building the include prefix map.
+          //
+          args.push_back (cpath.recall_string ());
+
+          // Add *.export.poptions from prerequisite libraries.
+          //
+          append_lib_options (bs, args, a, t, li);
+
+          append_options (args, t, c_poptions);
+          append_options (args, t, x_poptions);
+
+          // Populate the src-out with the -I$out_base -I$src_base pairs.
+          //
+          {
+            // Try to be fast and efficient by reusing buffers as much as
+            // possible.
+            //
+            string ds;
+
+            // Previous -I innermost scope if out_base plus the difference
+            // between the scope path and the -I path (normally empty).
+            //
+            const scope* s (nullptr);
+            dir_path p;
+
+            for (auto i (args.begin ()), e (args.end ()); i != e; ++i)
+            {
+              // -I can either be in the "-Ifoo" or "-I foo" form. For VC it
+              // can also be /I.
+              //
+              const char* o (*i);
+              size_t n (strlen (o));
+
+              if (n < 2 || (o[0] != '-' && o[0] != '/') || o[1] != 'I')
+              {
+                s = nullptr;
+                continue;
+              }
+
+              if (n == 2)
+              {
+                if (++i == e)
+                  break; // Let the compiler complain.
+
+                ds = *i;
+              }
+              else
+                ds.assign (o + 2, n - 2);
+
+              if (!ds.empty ())
+              {
+                // Note that we don't normalize the paths since it would be
+                // quite expensive and normally the pairs we are inerested in
+                // are already normalized (since they are usually specified as
+                // -I$src/out_*). We just need to add a trailing directory
+                // separator if it's not already there.
+                //
+                if (!dir_path::traits::is_separator (ds.back ()))
+                  ds += dir_path::traits::directory_separator;
+
+                dir_path d (move (ds), dir_path::exact); // Move the buffer in.
+
+                // Ignore invalid paths (buffer is not moved).
+                //
+                if (!d.empty ())
+                {
+                  // Ignore any paths containing '.', '..' components. Allow
+                  // any directory separators thought (think -I$src_root/foo
+                  // on Windows).
+                  //
+                  if (d.absolute () && d.normalized (false))
+                  {
+                    // If we have a candidate out_base, see if this is its
+                    // src_base.
+                    //
+                    if (s != nullptr)
+                    {
+                      const dir_path& bp (s->src_path ());
+
+                      if (d.sub (bp))
+                      {
+                        if (p.empty () || d.leaf (bp) == p)
+                        {
+                          // We've got a pair.
+                          //
+                          so_map.emplace (move (d), s->out_path () / p);
+                          s = nullptr; // Taken.
+                          continue;
+                        }
+                      }
+
+                      // Not a pair. Fall through to consider as out_base.
+                      //
+                      s = nullptr;
+                    }
+
+                    // See if this path is inside a project with an out-of-
+                    // tree build and is in the out directory tree.
+                    //
+                    const scope& bs (scopes.find (d));
+                    if (bs.root_scope () != nullptr)
+                    {
+                      const dir_path& bp (bs.out_path ());
+                      if (bp != bs.src_path ())
+                      {
+                        bool e;
+                        if ((e = (d == bp)) || d.sub (bp))
+                        {
+                          s = &bs;
+                          if (e)
+                            p.clear ();
+                          else
+                            p = d.leaf (bp);
+                        }
+                      }
+                    }
+                  }
+                  else
+                    s = nullptr;
+
+                  ds = move (d).string (); // Move the buffer out.
+                }
+                else
+                  s = nullptr;
+              }
+              else
+                s = nullptr;
+            }
+          }
+
+          // Extra system header dirs (last).
+          //
+          assert (sys_inc_dirs_extra <= sys_inc_dirs.size ());
+          append_option_values (
+            args, "-I",
+            sys_inc_dirs.begin () + sys_inc_dirs_extra, sys_inc_dirs.end (),
+            [] (const dir_path& d) {return d.string ().c_str ();});
+
+          if (md.symexport)
+            append_symexport_options (args, t);
+
+          // Some compile options (e.g., -std, -m) affect the preprocessor.
+          //
+          // Currently Clang supports importing "header modules" even when in
+          // the TS mode. And "header modules" support macros which means
+          // imports have to be resolved during preprocessing. Which poses a
+          // bit of a chicken and egg problem for us. For now, the workaround
+          // is to remove the -fmodules-ts option when preprocessing. Hopefully
+          // there will be a "pure modules" mode at some point.
+          //
+
+          // Don't treat warnings as errors.
+          //
+          const char* werror (nullptr);
+          switch (cclass)
+          {
+          case compiler_class::gcc:  werror = "-Werror"; break;
+          case compiler_class::msvc: werror = "/WX";     break;
+          }
+
+          bool clang (cid == compiler_id::clang      ||
+                      cid == compiler_id::clang_apple);
+
+          append_options (args, t, c_coptions, werror);
+          append_options (args, t, x_coptions, werror);
+          append_options (args, tstd,
+                          tstd.size () - (modules && clang ? 1 : 0));
+
+          switch (cclass)
+          {
+          case compiler_class::msvc:
+            {
+              assert (pp != nullptr);
+
+              args.push_back ("/nologo");
+
+              // See perform_update() for details on overriding the default
+              // exceptions and runtime.
+              //
+              if (x_lang == lang::cxx && !find_option_prefix ("/EH", args))
+                args.push_back ("/EHsc");
+
+              if (!find_option_prefixes ({"/MD", "/MT"}, args))
+                args.push_back ("/MD");
+
+              args.push_back ("/P");            // Preprocess to file.
+              args.push_back ("/showIncludes"); // Goes to stdout (with diag).
+              args.push_back (pp);              // /C (preserve comments).
+              args.push_back ("/WX");           // Warning as error (see above).
+
+              psrc = auto_rmfile (t.path () + x_pext);
+
+              if (cast<uint64_t> (rs[x_version_major]) >= 18)
+              {
+                args.push_back ("/Fi:");
+                args.push_back (psrc.path.string ().c_str ());
+              }
+              else
+              {
+                out = "/Fi" + psrc.path.string ();
+                args.push_back (out.c_str ());
+              }
+
+              args.push_back (langopt (md)); // Compile as.
+              gen = args_gen = true;
+              break;
+            }
+          case compiler_class::gcc:
+            {
+              if (t.is_a<objs> ())
+              {
+                // On Darwin, Win32 -fPIC is the default.
+                //
+                if (tclass == "linux" || tclass == "bsd")
+                  args.push_back ("-fPIC");
+              }
+
+              // Depending on the compiler, decide whether (and how) we can
+              // produce preprocessed output as a side effect of dependency
+              // extraction.
+              //
+              // Note: -MM -MG skips missing <>-included.
+
+              // Clang's -M does not imply -w (disable warnings). We also
+              // don't need them in the -MD case (see above) so disable for
+              // both.
+              //
+              if (clang)
+                args.push_back ("-w");
+
+              // Previously we used '*' as a target name but it gets expanded
+              // to the current directory file names by GCC (4.9) that comes
+              // with MSYS2 (2.4). Yes, this is the (bizarre) behavior of GCC
+              // being executed in the shell with -MQ '*' option and not just
+              // -MQ *.
+              //
+              args.push_back ("-MQ"); // Quoted target name.
+              args.push_back ("^");   // Old versions can't do empty target.
+
+              args.push_back ("-x");
+              args.push_back (langopt (md));
+
+              if (pp != nullptr)
+              {
+                // Note that the options are carefully laid out to be easy to
+                // override (see below).
+                //
+                args_i = args.size ();
+
+                args.push_back ("-MD");
+                args.push_back ("-E");
+                args.push_back (pp);
+
+                // Dependency output.
+                //
+                args.push_back ("-MF");
+
+                // GCC is not capable of writing the dependency info to
+                // stdout. We also need to sense the diagnostics on the -E
+                // runs.
+                //
+                if (cid == compiler_id::gcc)
+                {
+                  // Use the .t extension (for "temporary"; .d is taken).
+                  //
+                  r = &(drm = auto_rmfile (t.path () + ".t")).path;
+                  args.push_back (r->string ().c_str ());
+
+                  sense_diag = true;
+                }
+                else
+                  args.push_back ("-");
+
+                // Preprocessor output.
+                //
+                psrc = auto_rmfile (t.path () + x_pext);
+                args.push_back ("-o");
+                args.push_back (psrc.path.string ().c_str ());
+              }
+              else
+              {
+                args.push_back ("-M");
+                args.push_back ("-MG"); // Treat missing headers as generated.
+              }
+
+              gen = args_gen = (pp == nullptr);
+              break;
+            }
+          }
+
+          args.push_back (src.path ().string ().c_str ());
+          args.push_back (nullptr);
+
+          // Note: only doing it here.
+          //
+          if (!env.empty ())
+            env.push_back (nullptr);
+        }
+        else
+        {
+          assert (gen != args_gen);
+
+          size_t i (args_i);
+
+          if (gen)
+          {
+            // Overwrite.
+            //
+            args[i++] = "-M";
+            args[i++] = "-MG";
+            args[i++] = src.path ().string ().c_str ();
+            args[i]   = nullptr;
+
+            if (cid == compiler_id::gcc)
+            {
+              sense_diag = false;
+            }
+          }
+          else
+          {
+            // Restore.
+            //
+            args[i++] = "-MD";
+            args[i++] = "-E";
+            args[i++] = pp;
+            args[i]   = "-MF";
+
+            if (cid == compiler_id::gcc)
+            {
+              r = &drm.path;
+              sense_diag = true;
+            }
+          }
+
+          args_gen = gen;
+        }
+
+        return r;
+      };
+
+      // Build the prefix map lazily only if we have non-existent files.
+      // Also reuse it over restarts since it doesn't change.
+      //
+      optional<prefix_map> pfx_map;
+
+      // If any prerequisites that we have extracted changed, then we have to
+      // redo the whole thing. The reason for this is auto-generated headers:
+      // the updated header may now include a yet-non-existent header. Unless
+      // we discover this and generate it (which, BTW, will trigger another
+      // restart since that header, in turn, can also include auto-generated
+      // headers), we will end up with an error during compilation proper.
+      //
+      // One complication with this restart logic is that we will see a
+      // "prefix" of prerequisites that we have already processed (i.e., they
+      // are already in our prerequisite_targets list) and we don't want to
+      // keep redoing this over and over again. One thing to note, however, is
+      // that the prefix that we have seen on the previous run must appear
+      // exactly the same in the subsequent run. The reason for this is that
+      // none of the files that it can possibly be based on have changed and
+      // thus it should be exactly the same. To put it another way, the
+      // presence or absence of a file in the dependency output can only
+      // depend on the previous files (assuming the compiler outputs them as
+      // it encounters them and it is hard to think of a reason why would
+      // someone do otherwise). And we have already made sure that all those
+      // files are up to date. And here is the way we are going to exploit
+      // this: we are going to keep track of how many prerequisites we have
+      // processed so far and on restart skip right to the next one.
+      //
+      // And one more thing: most of the time this list of headers would stay
+      // unchanged and extracting them by running the compiler every time is a
+      // bit wasteful. So we are going to cache them in the depdb. If the db
+      // hasn't been invalidated yet (e.g., because the compiler options have
+      // changed), then we start by reading from it. If anything is out of
+      // date then we use the same restart and skip logic to switch to the
+      // compiler run.
+      //
+      size_t skip_count (0);
+
+      // Update and add a header file to the list of prerequisite targets.
+      // Depending on the cache flag, the file is assumed to either have come
+      // from the depdb cache or from the compiler run. Return whether the
+      // extraction process should be restarted.
+      //
+      auto add = [&trace, &pfx_map, &so_map,
+                  a, &t, li,
+                  &dd, &updating, &skip_count,
+                  &bs, this]
+        (path f, bool cache, timestamp mt) -> bool
+      {
+        // Find or maybe insert the target. The directory is only moved
+        // from if insert is true.
+        //
+        auto find = [&trace, &t, this]
+          (dir_path&& d, path&& f, bool insert) -> const path_target*
+        {
+          // Split the file into its name part and extension. Here we can
+          // assume the name part is a valid filesystem name.
+          //
+          // Note that if the file has no extension, we record an empty
+          // extension rather than NULL (which would signify that the default
+          // extension should be added).
+          //
+          string e (f.extension ());
+          string n (move (f).string ());
+
+          if (!e.empty ())
+            n.resize (n.size () - e.size () - 1); // One for the dot.
+
+          // Determine the target type.
+          //
+          const target_type* tt (nullptr);
+
+          // See if this directory is part of any project out_root hierarchy.
+          // Note that this will miss all the headers that come from src_root
+          // (so they will be treated as generic C headers below). Generally,
+          // we don't have the ability to determine that some file belongs to
+          // src_root of some project. But that's not a problem for our
+          // purposes: it is only important for us to accurately determine
+          // target types for headers that could be auto-generated.
+          //
+          // While at it also try to determine if this target is from the src
+          // or out tree of said project.
+          //
+          dir_path out;
+
+          const scope& bs (scopes.find (d));
+          if (const scope* rs = bs.root_scope ())
+          {
+            tt = map_extension (bs, n, e);
+
+            if (bs.out_path () != bs.src_path () && d.sub (bs.src_path ()))
+              out = out_src (d, *rs);
+          }
+
+          // If it is outside any project, or the project doesn't have such an
+          // extension, assume it is a plain old C header.
+          //
+          if (tt == nullptr)
+          {
+            // If the project doesn't "know" this extension then we won't
+            // possibly find an explicit target of this type.
+            //
+            if (!insert)
+              return nullptr;
+
+            tt = &h::static_type;
+          }
+
+          // Find or insert target.
+          //
+          // @@ OPT: move d, out, n
+          //
+          const target* r;
+          if (insert)
+            r = &search (t, *tt, d, out, n, &e, nullptr);
+          else
+          {
+            // Note that we skip any target type-specific searches (like for
+            // an existing file) and go straight for the target object since
+            // we need to find the target explicitly spelled out.
+            //
+            r = targets.find (*tt, d, out, n, e, trace);
+          }
+
+          return static_cast<const path_target*> (r);
+        };
+
+        // If it's not absolute then it either does not (yet) exist or is
+        // a relative ""-include (see init_args() for details). Reduce the
+        // second case to absolute.
+        //
+        // Note: we now always use absolute path to the translation unit so
+        // this no longer applies.
+        //
+#if 0
+        if (f.relative () && rels.relative ())
+        {
+          // If the relative source path has a directory component, make sure
+          // it matches since ""-include will always start with that (none of
+          // the compilers we support try to normalize this path). Failed that
+          // we may end up searching for a generated header in a random
+          // (working) directory.
+          //
+          const string& fs (f.string ());
+          const string& ss (rels.string ());
+
+          size_t p (path::traits::rfind_separator (ss));
+
+          if (p == string::npos || // No directory.
+              (fs.size () > p + 1 &&
+               path::traits::compare (fs.c_str (), p, ss.c_str (), p) == 0))
+          {
+            path t (work / f); // The rels path is relative to work.
+
+            if (exists (t))
+              f = move (t);
+          }
+        }
+#endif
+
+        const path_target* pt (nullptr);
+
+        // If still relative then it does not exist.
+        //
+        if (f.relative ())
+        {
+          f.normalize ();
+
+          // This is probably as often an error as an auto-generated file, so
+          // trace at level 4.
+          //
+          l4 ([&]{trace << "non-existent header '" << f << "'";});
+
+          if (!pfx_map)
+            pfx_map = build_prefix_map (bs, a, t, li);
+
+          // First try the whole file. Then just the directory.
+          //
+          // @@ Has to be a separate map since the prefix can be the same as
+          //    the file name.
+          //
+          // auto i (pfx_map->find (f));
+
+          // Find the most qualified prefix of which we are a sub-path.
+          //
+          if (!pfx_map->empty ())
+          {
+            dir_path d (f.directory ());
+            auto i (pfx_map->find_sup (d));
+
+            if (i != pfx_map->end ())
+            {
+              const dir_path& pd (i->second.directory);
+
+              // If this is a prefixless mapping, then only use it if we can
+              // resolve it to an existing target (i.e., it is explicitly
+              // spelled out in a buildfile).
+              //
+              // Note that at some point we will probably have a list of
+              // directories.
+              //
+              pt = find (pd / d, f.leaf (), !i->first.empty ());
+              if (pt != nullptr)
+              {
+                f = pd / f;
+                l4 ([&]{trace << "mapped as auto-generated " << f;});
+              }
+            }
+          }
+
+          if (pt == nullptr)
+          {
+            diag_record dr (fail);
+            dr << "header '" << f << "' not found and cannot be generated";
+            //for (const auto& p: pm)
+            //  dr << info << p.first.string () << " -> " << p.second.string ();
+          }
+        }
+        else
+        {
+          // We used to just normalize the path but that could result in an
+          // invalid path (e.g., on CentOS 7 with Clang 3.4) because of the
+          // symlinks. So now we realize (i.e., realpath(3)) it instead.
+          // Unless it comes from the depdb, in which case we've already done
+          // that. This is also where we handle src-out remap (again, not
+          // needed if cached)
+          //
+          if (!cache)
+          {
+            // While we can reasonably expect this path to exit, things do
+            // go south from time to time (like compiling under wine with
+            // file wlantypes.h included as WlanTypes.h).
+            //
+            try
+            {
+              f.realize ();
+            }
+            catch (const invalid_path&)
+            {
+              fail << "invalid header path '" << f << "'";
+            }
+            catch (const system_error& e)
+            {
+              fail << "invalid header path '" << f << "': " << e;
+            }
+
+            if (!so_map.empty ())
+            {
+              // Find the most qualified prefix of which we are a sub-path.
+              //
+              auto i (so_map.find_sup (f));
+              if (i != so_map.end ())
+              {
+                // Ok, there is an out tree for this headers. Remap to a path
+                // from the out tree and see if there is a target for it.
+                //
+                dir_path d (i->second);
+                d /= f.leaf (i->first).directory ();
+                pt = find (move (d), f.leaf (), false); // d is not moved from.
+
+                if (pt != nullptr)
+                {
+                  path p (d / f.leaf ());
+                  l4 ([&]{trace << "remapping " << f << " to " << p;});
+                  f = move (p);
+                }
+              }
+            }
+          }
+
+          if (pt == nullptr)
+          {
+            l6 ([&]{trace << "injecting " << f;});
+            pt = find (f.directory (), f.leaf (), true);
+          }
+        }
+
+        // Cache the path.
+        //
+        const path& pp (pt->path (move (f)));
+
+        // Match to a rule.
+        //
+        // If we are reading the cache, then it is possible the file has since
+        // been removed (think of a header in /usr/local/include that has been
+        // uninstalled and now we need to use one from /usr/include). This
+        // will lead to the match failure which we translate to a restart.
+        //
+        if (!cache)
+          build2::match (a, *pt);
+        else if (!build2::try_match (a, *pt).first)
+        {
+          dd.write (); // Invalidate this line.
+          updating = true;
+          return true;
+        }
+
+        // Update.
+        //
+        bool restart (update (trace, a, *pt, mt));
+
+        // Verify/add it to the dependency database. We do it after update in
+        // order not to add bogus files (non-existent and without a way to
+        // update).
+        //
+        if (!cache)
+          dd.expect (pp);
+
+        // Add to our prerequisite target list.
+        //
+        t.prerequisite_targets[a].push_back (pt);
+        skip_count++;
+
+        updating = updating || restart;
+        return restart;
+      };
+
+      // If nothing so far has invalidated the dependency database, then try
+      // the cached data before running the compiler.
+      //
+      bool cache (!updating);
+
+      // See init_args() above for details on generated header support.
+      //
+      bool gen (false);
+      optional<bool>   force_gen;
+      optional<size_t> force_gen_skip; // Skip count at last force_gen run.
+
+      const path* drmp (nullptr); // Points to drm.path () if active.
+
+      for (bool restart (true); restart; cache = false)
+      {
+        restart = false;
+
+        if (cache)
+        {
+          // If any, this is always the first run.
+          //
+          assert (skip_count == 0);
+
+          // We should always end with a blank line.
+          //
+          for (;;)
+          {
+            string* l (dd.read ());
+
+            // If the line is invalid, run the compiler.
+            //
+            if (l == nullptr)
+            {
+              restart = true;
+              break;
+            }
+
+            if (l->empty ()) // Done, nothing changed.
+            {
+              // If modules are enabled, then we keep the preprocessed output
+              // around (see apply() for details).
+              //
+              return modules
+                ? make_pair (auto_rmfile (t.path () + x_pext, false), true)
+                : make_pair (auto_rmfile (), false);
+            }
+
+            // If this header came from the depdb, make sure it is no older
+            // than the target (if it has changed since the target was
+            // updated, then the cached data is stale).
+            //
+            restart = add (path (move (*l)), true, mt);
+
+            if (restart)
+            {
+              l6 ([&]{trace << "restarting (cache)";});
+              break;
+            }
+          }
+        }
+        else
+        {
+          try
+          {
+            if (force_gen)
+              gen = *force_gen;
+
+            if (args.empty () || gen != args_gen)
+              drmp = init_args (gen);
+
+            if (verb >= 3)
+              print_process (args.data ()); // Disable pipe mode.
+
+            process pr;
+
+            try
+            {
+              // Assume the preprocessed output (if produced) is usable
+              // until proven otherwise.
+              //
+              puse = true;
+
+              // Save the timestamp just before we start preprocessing. If
+              // we depend on any header that has been updated since, then
+              // we should assume we've "seen" the old copy and re-process.
+              //
+              timestamp pmt (system_clock::now ());
+
+              // If we have no generated header support, then suppress all
+              // diagnostics (if things go badly we will restart with this
+              // support).
+              //
+              if (drmp == nullptr)
+              {
+                // Dependency info goes to stdout.
+                //
+                assert (!sense_diag);
+
+                // For VC with /P the dependency info and diagnostics all go
+                // to stderr so redirect it to stdout.
+                //
+                pr = process (
+                  cpath,
+                  args.data (),
+                  0,
+                  -1,
+                  cclass == compiler_class::msvc ? 1 : gen ? 2 : -2,
+                  nullptr, // CWD
+                  env.empty () ? nullptr : env.data ());
+              }
+              else
+              {
+                // Dependency info goes to a temporary file.
+                //
+                pr = process (cpath,
+                              args.data (),
+                              0,
+                              2, // Send stdout to stderr.
+                              gen ? 2 : sense_diag ? -1 : -2,
+                              nullptr, // CWD
+                              env.empty () ? nullptr : env.data ());
+
+                // If requested, monitor for diagnostics and if detected, mark
+                // the preprocessed output as unusable for compilation.
+                //
+                if (sense_diag)
+                {
+                  ifdstream is (move (pr.in_efd), fdstream_mode::skip);
+                  puse = puse && (is.peek () == ifdstream::traits_type::eof ());
+                  is.close ();
+                }
+
+                // The idea is to reduce it to the stdout case.
+                //
+                pr.wait ();
+                pr.in_ofd = fdopen (*drmp, fdopen_mode::in);
+              }
+
+              // We may not read all the output (e.g., due to a restart).
+              // Before we used to just close the file descriptor to signal to
+              // the other end that we are not interested in the rest. This
+              // works fine with GCC but Clang (3.7.0) finds this impolite and
+              // complains, loudly (broken pipe). So now we are going to skip
+              // until the end.
+              //
+              ifdstream is (move (pr.in_ofd),
+                            fdstream_mode::text | fdstream_mode::skip,
+                            ifdstream::badbit);
+
+              // In some cases we may need to ignore the error return status.
+              // The good_error flag keeps track of that. Similarly we
+              // sometimes expect the error return status based on the output
+              // we see. The bad_error flag is for that.
+              //
+              bool good_error (false), bad_error (false);
+
+              size_t skip (skip_count);
+              string l; // Reuse.
+              for (bool first (true), second (false); !restart; )
+              {
+                if (eof (getline (is, l)))
+                  break;
+
+                l6 ([&]{trace << "header dependency line '" << l << "'";});
+
+                // Parse different dependency output formats.
+                //
+                switch (cclass)
+                {
+                case compiler_class::msvc:
+                  {
+                    if (first)
+                    {
+                      // The first line should be the file we are compiling.
+                      // If it is not, then something went wrong even before
+                      // we could compile anything (e.g., file does not
+                      // exist). In this case the first line (and everything
+                      // after it) is presumably diagnostics.
+                      //
+                      if (l != src.path ().leaf ().string ())
+                      {
+                        text << l;
+                        bad_error = true;
+                        break;
+                      }
+
+                      first = false;
+                      continue;
+                    }
+
+                    string f (next_show (l, good_error));
+
+                    if (f.empty ()) // Some other diagnostics.
+                    {
+                      text << l;
+                      bad_error = true;
+                      break;
+                    }
+
+                    // Skip until where we left off.
+                    //
+                    if (skip != 0)
+                    {
+                      // We can't be skipping over a non-existent header.
+                      //
+                      assert (!good_error);
+                      skip--;
+                    }
+                    else
+                    {
+                      restart = add (path (move (f)), false, pmt);
+
+                      // If the header does not exist (good_error), then
+                      // restart must be true. Except that it is possible that
+                      // someone running in parallel has already updated it.
+                      // In this case we must force a restart since we haven't
+                      // yet seen what's after this at-that-time-non-existent
+                      // header.
+                      //
+                      // We also need to force the target update (normally
+                      // done by add()).
+                      //
+                      if (good_error)
+                        restart = updating = true;
+                      //
+                      // And if we have updated the header (restart is true),
+                      // then we may end up in this situation: an old header
+                      // got included which caused the preprocessor to fail
+                      // down the line. So if we are restarting, set the good
+                      // error flag in case the process fails because of
+                      // something like this (and if it is for a valid reason,
+                      // then we will pick it up on the next round).
+                      //
+                      else if (restart)
+                        good_error = true;
+
+                      if (restart)
+                        l6 ([&]{trace << "restarting";});
+                    }
+
+                    break;
+                  }
+                case compiler_class::gcc:
+                  {
+                    // Make dependency declaration.
+                    //
+                    size_t pos (0);
+
+                    if (first)
+                    {
+                      // Empty/invalid output should mean the wait() call
+                      // below will return false.
+                      //
+                      if (l.empty ()  ||
+                          l[0] != '^' || l[1] != ':' || l[2] != ' ')
+                      {
+                        if (!l.empty ())
+                          text << l;
+
+                        bad_error = true;
+                        break;
+                      }
+
+                      first = false;
+                      second = true;
+
+                      // While normally we would have the source file on the
+                      // first line, if too long, it will be moved to the next
+                      // line and all we will have on this line is "^: \".
+                      //
+                      if (l.size () == 4 && l[3] == '\\')
+                        continue;
+                      else
+                        pos = 3; // Skip "^: ".
+
+                      // Fall through to the 'second' block.
+                    }
+
+                    if (second)
+                    {
+                      second = false;
+                      next_make (l, pos); // Skip the source file.
+                    }
+
+                    while (pos != l.size ())
+                    {
+                      string f (next_make (l, pos));
+
+                      // Skip until where we left off.
+                      //
+                      if (skip != 0)
+                      {
+                        skip--;
+                        continue;
+                      }
+
+                      restart = add (path (move (f)), false, pmt);
+
+                      if (restart)
+                      {
+                        // The same "preprocessor may fail down the line"
+                        // logic as above.
+                        //
+                        good_error = true;
+
+                        l6 ([&]{trace << "restarting";});
+                        break;
+                      }
+                    }
+
+                    break;
+                  }
+                }
+
+                if (bad_error)
+                  break;
+              }
+
+              // In case of VC, we are parsing stderr and if things go south,
+              // we need to copy the diagnostics for the user to see.
+              //
+              if (bad_error && cclass == compiler_class::msvc)
+              {
+                // We used to just dump the whole rdbuf but it turns out VC
+                // may continue writing include notes interleaved with the
+                // diagnostics. So we have to filter them out.
+                //
+                for (; !eof (getline (is, l)); )
+                {
+                  size_t p (next_show_sense (l));
+                  if (p != string::npos && l.compare (p, 4, "1083") != 0)
+                    diag_stream_lock () << l << endl;
+                }
+              }
+
+              is.close ();
+
+              // This is tricky: it is possible that in parallel someone has
+              // generated all our missing headers and we wouldn't restart
+              // normally.
+              //
+              // In this case we also need to force the target update
+              // (normally done by add()).
+              //
+              if (force_gen && *force_gen)
+              {
+                restart = updating = true;
+                force_gen = false;
+              }
+
+              if (pr.wait ())
+              {
+                if (!bad_error)
+                  continue;
+
+                fail << "expected error exist status from " << x_lang
+                     << " compiler";
+              }
+              else if (pr.exit->normal ())
+              {
+                if (good_error) // Ignore expected errors (restart).
+                  continue;
+              }
+
+              // Fall through.
+            }
+            catch (const io_error&)
+            {
+              if (pr.wait ())
+                fail << "unable to read " << x_lang << " compiler header "
+                     << "dependency output";
+
+              // Fall through.
+            }
+
+            assert (pr.exit && !*pr.exit);
+            const process_exit& e (*pr.exit);
+
+            // For normal exit we assume the child process issued some
+            // diagnostics.
+            //
+            if (e.normal ())
+            {
+              // If this run was with the generated header support then we
+              // have issued diagnostics and it's time to give up.
+              //
+              if (gen)
+                throw failed ();
+
+              // Just to recap, being here means something is wrong with the
+              // source: it can be a missing generated header, it can be an
+              // outdated generated header (e.g., some check triggered #error
+              // which will go away if only we updated the generated header),
+              // or it can be a real error that is not going away.
+              //
+              // So this is what we are going to do here: if anything got
+              // updated on this run (i.e., the compiler has produced valid
+              // dependency information even though there were errors and we
+              // managed to find and update a header based on this
+              // informaion), then we restart in the same mode hoping that
+              // this fixes things. Otherwise, we force the generated header
+              // support which will either uncover a missing generated header
+              // or will issue diagnostics.
+              //
+              if (restart)
+                l6 ([&]{trace << "trying again without generated headers";});
+              else
+              {
+                // In some pathological situations (e.g., we are out of disk
+                // space) we may end up switching back and forth indefinitely
+                // without making any headway. So we use skip_count to track
+                // our progress.
+                //
+                if (force_gen_skip && *force_gen_skip == skip_count)
+                {
+                  diag_record dr (fail);
+
+                  dr << "inconsistent " << x_lang << " compiler behavior";
+
+                  // Show the yo-yo'ing command lines.
+                  //
+                  dr << info;
+                  print_process (dr, args.data ()); // No pipes.
+
+                  init_args ((gen = true));
+                  dr << info << "";
+                  print_process (dr, args.data ()); // No pipes.
+
+                  dr << info << "perhaps you are running out of disk space?";
+                }
+
+                restart = true;
+                force_gen = true;
+                force_gen_skip = skip_count;
+                l6 ([&]{trace << "restarting with forced generated headers";});
+              }
+              continue;
+            }
+            else
+              run_finish (args, pr); // Throws.
+          }
+          catch (const process_error& e)
+          {
+            error << "unable to execute " << args[0] << ": " << e;
+
+            // In a multi-threaded program that fork()'ed but did not exec(),
+            // it is unwise to try to do any kind of cleanup (like unwinding
+            // the stack and running destructors).
+            //
+            if (e.child)
+            {
+              drm.cancel ();
+              exit (1);
+            }
+
+            throw failed ();
+          }
+        }
+      }
+
+      // Add the terminating blank line (we are updated depdb).
+      //
+      dd.expect ("");
+
+      puse = puse && !psrc.path.empty ();
+      return make_pair (move (psrc), puse);
+    }
+
+    pair<translation_unit, string> compile_rule::
+    parse_unit (action a,
+                file& t,
+                linfo lo,
+                const file& src,
+                auto_rmfile& psrc,
+                const match_data& md) const
+    {
+      tracer trace (x, "compile_rule::parse_unit");
+
+      // If things go wrong give the user a bit extra context.
+      //
+      auto df = make_diag_frame (
+        [&src](const diag_record& dr)
+        {
+          if (verb != 0)
+            dr << info << "while parsing " << src;
+        });
+
+      // For some compilers (GCC, Clang) the preporcessed output is only
+      // partially preprocessed. For others (VC), it is already fully
+      // preprocessed (well, almost: it still has comments but we can handle
+      // that). Plus, the source file might already be (sufficiently)
+      // preprocessed.
+      //
+      // So the plan is to start the compiler process that writes the fully
+      // preprocessed output to stdout and reduce the already preprocessed
+      // case to it.
+      //
+      environment env;
+      cstrings args;
+      const path* sp; // Source path.
+
+      bool ps; // True if extracting from psrc.
+      if (md.pp < preprocessed::modules)
+      {
+        ps = !psrc.path.empty ();
+        sp = &(ps ? psrc.path : src.path ());
+
+        // VC's preprocessed output, if present, is fully preprocessed.
+        //
+        if (cclass != compiler_class::msvc || !ps)
+        {
+          // This should match with how we setup preprocessing and is pretty
+          // similar to init_args() from extract_headers().
+          //
+          args.push_back (cpath.recall_string ());
+
+          append_lib_options (t.base_scope (), args, a, t, lo);
+
+          append_options (args, t, c_poptions);
+          append_options (args, t, x_poptions);
+
+          assert (sys_inc_dirs_extra <= sys_inc_dirs.size ());
+          append_option_values (
+            args, "-I",
+            sys_inc_dirs.begin () + sys_inc_dirs_extra, sys_inc_dirs.end (),
+            [] (const dir_path& d) {return d.string ().c_str ();});
+
+          if (md.symexport)
+            append_symexport_options (args, t);
+
+          // Make sure we don't fail because of warnings.
+          //
+          // @@ Can be both -WX and /WX.
+          //
+          const char* werror (nullptr);
+          switch (cclass)
+          {
+          case compiler_class::gcc:  werror = "-Werror"; break;
+          case compiler_class::msvc: werror = "/WX";     break;
+          }
+
+          bool clang (cid == compiler_id::clang      ||
+                      cid == compiler_id::clang_apple);
+
+          append_options (args, t, c_coptions, werror);
+          append_options (args, t, x_coptions, werror);
+          append_options (args, tstd,
+                          tstd.size () - (modules && clang ? 1 : 0));
+
+          switch (cclass)
+          {
+          case compiler_class::msvc:
+            {
+              args.push_back ("/nologo");
+
+              if (x_lang == lang::cxx && !find_option_prefix ("/EH", args))
+                args.push_back ("/EHsc");
+
+              if (!find_option_prefixes ({"/MD", "/MT"}, args))
+                args.push_back ("/MD");
+
+              args.push_back ("/E");
+              args.push_back ("/C");
+              args.push_back (langopt (md)); // Compile as.
+
+              break;
+            }
+          case compiler_class::gcc:
+            {
+              if (t.is_a<objs> ())
+              {
+                if (tclass == "linux" || tclass == "bsd")
+                  args.push_back ("-fPIC");
+              }
+
+              // Options that trigger preprocessing of partially preprocessed
+              // output are a bit of a compiler-specific voodoo.
+              //
+              args.push_back ("-E");
+
+              if (ps)
+              {
+                args.push_back ("-x");
+                args.push_back (langopt (md));
+
+                if (cid == compiler_id::gcc)
+                {
+                  args.push_back ("-fpreprocessed");
+                  args.push_back ("-fdirectives-only");
+                }
+              }
+
+              break;
+            }
+          }
+
+          args.push_back (sp->string ().c_str ());
+          args.push_back (nullptr);
+        }
+
+        if (!env.empty ())
+          env.push_back (nullptr);
+      }
+      else
+      {
+        // Extracting directly from source.
+        //
+        ps = false;
+        sp = &src.path ();
+      }
+
+      // Preprocess and parse.
+      //
+      for (;;) // Breakout loop.
+      try
+      {
+        // Disarm the removal of the preprocessed file in case of an error.
+        // We re-arm it below.
+        //
+        if (ps)
+          psrc.active = false;
+
+        process pr;
+
+        try
+        {
+          if (args.empty ())
+          {
+            pr = process (process_exit (0)); // Successfully exited.
+            pr.in_ofd = fdopen (*sp, fdopen_mode::in);
+          }
+          else
+          {
+            if (verb >= 3)
+              print_process (args);
+
+            // We don't want to see warnings multiple times so ignore all
+            // diagnostics.
+            //
+            pr = process (cpath,
+                          args.data (),
+                          0, -1, -2,
+                          nullptr, // CWD
+                          env.empty () ? nullptr : env.data ());
+          }
+
+          // Use binary mode to obtain consistent positions.
+          //
+          ifdstream is (move (pr.in_ofd),
+                        fdstream_mode::binary | fdstream_mode::skip);
+
+          parser p;
+          translation_unit tu (p.parse (is, *sp));
+
+          is.close ();
+
+          if (pr.wait ())
+          {
+            if (ps)
+              psrc.active = true; // Re-arm.
+
+            // Prior to 15u5 VC was not using the 'export module' syntax so we
+            // use the preprequisite type to distinguish between interface and
+            // implementation units.
+            //
+            if (cid == compiler_id::msvc &&
+                cmaj == 19 && cmin <= 11 &&
+                x_mod != nullptr && src.is_a (*x_mod))
+            {
+              // It's quite painful to guard the export with an #if/#endif so
+              // if it is present, "fixup" the (temporary) preprocessed output
+              // by getting rid of the keyword.
+              //
+              // Note: when removing this also remember to remove the test.
+              //
+              if (tu.mod.iface)
+              {
+                // We can only fixup a temporary file.
+                //
+                if (!ps)
+                  fail (relative (src)) << "fixup requires preprocessor";
+
+                // Stomp out the export keyword with spaces. We are using
+                // std::fstream since our fdstream does not yet support
+                // seeking.
+                //
+                fstream os (psrc.path.string (), fstream::out | fstream::in);
+                auto pos (static_cast<fstream::pos_type> (p.export_pos));
+
+                if (!os.is_open ()  ||
+                    !os.seekp (pos) ||
+                    !os.write ("      ", 6))
+                  fail << "unable to overwrite preprocessor output";
+              }
+              else
+                tu.mod.iface = true;
+            }
+
+            return pair<translation_unit, string> (move (tu), p.checksum);
+          }
+
+          // Fall through.
+        }
+        catch (const io_error&)
+        {
+          if (pr.wait ())
+            fail << "unable to read " << x_lang << " preprocessor output";
+
+          // Fall through.
+        }
+
+        assert (pr.exit && !*pr.exit);
+        const process_exit& e (*pr.exit);
+
+        // What should we do with a normal error exit? Remember we suppressed
+        // the compiler's diagnostics. We used to issue a warning and continue
+        // with the assumption that the compilation step will fail with
+        // diagnostics. The problem with this approach is that we may fail
+        // before that because the information we return (e.g., module name)
+        // is bogus. So looks like failing is the only option.
+        //
+        if (e.normal ())
+        {
+          fail << "unable to preprocess " << src <<
+            info << "re-run with -s -V to display failing command" <<
+            info << "then run failing command to display compiler diagnostics";
+        }
+        else
+          run_finish (args, pr); // Throws.
+      }
+      catch (const process_error& e)
+      {
+        error << "unable to execute " << args[0] << ": " << e;
+
+        if (e.child)
+          exit (1);
+      }
+
+      throw failed ();
+    }
+
+    // Extract and inject module dependencies.
+    //
+    void compile_rule::
+    extract_modules (action a,
+                     const scope& bs,
+                     file& t,
+                     linfo li,
+                     const compile_target_types& tt,
+                     const file& src,
+                     match_data& md,
+                     module_info&& mi,
+                     depdb& dd,
+                     bool& updating) const
+    {
+      tracer trace (x, "compile_rule::extract_modules");
+      l5 ([&]{trace << "target: " << t;});
+
+      // If things go wrong, give the user a bit extra context.
+      //
+      auto df = make_diag_frame (
+        [&src](const diag_record& dr)
+        {
+          if (verb != 0)
+            dr << info << "while extracting module dependencies from " << src;
+        });
+
+      if (!modules)
+      {
+        if (!mi.name.empty () || !mi.imports.empty ())
+          fail (relative (src)) << "modules support not enabled/available";
+
+        return;
+      }
+
+      // Sanity checks.
+      //
+      // If we are compiling a module interface unit, make sure it has the
+      // necessary declarations.
+      //
+      if (src.is_a (*x_mod) && (mi.name.empty () || !mi.iface))
+        fail << src << " is not a module interface unit";
+
+      // Search and match all the modules we depend on. If this is a module
+      // implementation unit, then treat the module itself as if it was
+      // imported (we insert it first since for some compilers we have to
+      // differentiate between this special module and real imports). Note:
+      // move.
+      //
+      if (!mi.iface && !mi.name.empty ())
+        mi.imports.insert (mi.imports.begin (),
+                           module_import {move (mi.name), false, 0});
+
+      // The change to the set of imports would have required a change to
+      // source code (or options). Changes to the bmi{}s themselves will be
+      // detected via the normal prerequisite machinery. However, the same set
+      // of imports could be resolved to a different set of bmi{}s (in a sense
+      // similar to changing the source file). To detect this we calculate and
+      // store a hash of all (not just direct) bmi{}'s paths.
+      //
+      sha256 cs;
+
+      if (!mi.imports.empty ())
+        md.mods = search_modules (a, bs, t, li, tt.bmi, src, mi.imports, cs);
+
+      if (dd.expect (cs.string ()) != nullptr)
+        updating = true;
+
+#if 0
+      // Save the module map for compilers that use it.
+      //
+      if (md.mods.start != 0)
+      {
+        switch (cid)
+        {
+        case compiler_id::gcc:
+        case compiler_id::clang:
+        case compiler_id::clang_apple:
+          {
+            // We don't need to redo this if the above hash hasn't changed and
+            // the database is valid.
+            //
+            if (dd.writing () || !dd.skip ())
+            {
+              const auto& pts (t.prerequisite_targets);
+
+              for (size_t i (md.mods.start); i != pts.size (); ++i)
+              {
+                if (const target* m = pts[i])
+                {
+                  // Save a variable lookup by getting the module name from
+                  // the import list (see search_modules()).
+                  //
+                  dd.write ('@', false);
+                  dd.write (mi.imports[i - md.mods.start].name, false);
+                  dd.write ('=', false);
+                  dd.write (m->as<file> ().path ());
+                }
+              }
+            }
+            break;
+          }
+        default:
+          break;
+        }
+      }
+#endif
+
+      // Set the cc.module_name variable if this is an interface unit. Note
+      // that it may seem like a good idea to set it on the bmi{} group to
+      // avoid duplication. We, however, cannot do it MT-safely since we don't
+      // match the group.
+      //
+      if (mi.iface)
+      {
+        if (value& v = t.vars.assign (c_module_name))
+          assert (cast<string> (v) == mi.name);
+        else
+          v = move (mi.name); // Note: move.
+      }
+    }
+
+    inline bool
+    std_module (const string& m)
+    {
+      size_t n (m.size ());
+      return (n >= 3 &&
+              m[0] == 's' && m[1] == 't' && m[2] == 'd' &&
+              (n == 3 || m[3] == '.'));
+    };
+
+    // Resolve imported modules to bmi*{} targets.
+    //
+    module_positions compile_rule::
+    search_modules (action a,
+                    const scope& bs,
+                    file& t,
+                    linfo li,
+                    const target_type& mtt,
+                    const file& src,
+                    module_imports& imports,
+                    sha256& cs) const
+    {
+      tracer trace (x, "compile_rule::search_modules");
+
+      // So we have a list of imports and a list of "potential" module
+      // prerequisites. They are potential in the sense that they may or may
+      // not be required by this translation unit. In other words, they are
+      // the pool where we can resolve actual imports.
+      //
+      // Because we may not need all of these prerequisites, we cannot just go
+      // ahead and match all of them (and they can even have cycles; see rule
+      // synthesis). This poses a bit of a problem: the only way to discover
+      // the module's actual name (see cc.module_name) is by matching it.
+      //
+      // One way to solve this would be to make the user specify the module
+      // name for each mxx{} explicitly. This will be a major pain, however.
+      // Another would be to require encoding of the module name in the
+      // interface unit file name. For example, hello.core -> hello-core.mxx.
+      // This is better but still too restrictive: some will want to call it
+      // hello_core.mxx or HelloCore.mxx (because that's their file naming
+      // convention) or place it in a subdirectory, say, hello/core.mxx.
+      //
+      // In the above examples one common theme about all the file names is
+      // that they contain, in one form or another, the "tail" of the module
+      // name ('core'). So what we are going to do is require that the
+      // interface file names contain enough of the module name tail to
+      // unambiguously resolve all the module imports. On our side we are
+      // going to implement a "fuzzy" module name to file name match. This
+      // should be reliable enough since we will always verify our guesses
+      // once we match the target and extract the actual module name. Plus,
+      // the user will always have the option of resolving any impasses by
+      // specifying the module name explicitly.
+      //
+      // So, the fuzzy match: the idea is that each match gets a score, the
+      // number of characters in the module name that got matched. A match
+      // with the highest score is used. And we use the (length + 1) for a
+      // match against an actual module name.
+      //
+      // For std.* modules we only accept non-fuzzy matches (think std.core vs
+      // some core.mxx). And if such a module is unresolved, then we assume it
+      // is pre-built and will be found by some other means (e.g., VC's
+      // IFCPATH).
+      //
+      auto match = [] (const string& f, const string& m) -> size_t
+      {
+        size_t fi (f.size ());
+        size_t mi (m.size ());
+
+        // Scan backwards for as long as we match. Keep track of the previous
+        // character for case change detection.
+        //
+        for (char fc, mc, fp ('\0'), mp ('\0');
+             fi != 0 && mi != 0;
+             fp = fc, mp = mc, --fi, --mi)
+        {
+          fc = f[fi - 1];
+          mc = m[mi - 1];
+
+          if (casecmp (fc, mc) == 0)
+            continue;
+
+          // We consider all separators equal and character case change being
+          // a separators. Some examples of the latter:
+          //
+          // foo.bar
+          //  fooBAR
+          //  FOObar
+          //
+          bool fs (fc == '_' || fc == '-' || fc == '.' ||
+                   path::traits::is_separator (fc));
+          bool ms (mc == '_' || mc == '.');
+
+          if (fs && ms)
+            continue;
+
+          // Only if one is a real separator do we consider case change.
+          //
+          if (fs || ms)
+          {
+            auto cc = [] (char c1, char c2) -> bool
+            {
+              return (alpha (c1) &&
+                      alpha (c2) &&
+                      (ucase (c1) == c1) != (ucase (c2) == c2));
+            };
+
+            bool fa (false), ma (false);
+            if ((fs || (fa = cc (fp, fc))) && (ms || (ma = cc (mp, mc))))
+            {
+              // Stay on this character if imaginary punctuation (note: cannot
+              // be both true).
+              //
+              if (fa) ++fi;
+              if (ma) ++mi;
+              continue;
+            }
+          }
+
+          break; // No match.
+        }
+
+        // Return the number of characters matched in the module name and not
+        // in the file (this may not be the same because of the imaginary
+        // separators).
+        //
+        return m.size () - mi;
+      };
+
+      auto& pts (t.prerequisite_targets[a]);
+      size_t start (pts.size ()); // Index of the first to be added.
+
+      // We have two parallel vectors: module names/scores in imports and
+      // targets in prerequisite_targets (offset with start). Pre-allocate
+      // NULL entries in the latter.
+      //
+      size_t n (imports.size ());
+      pts.resize (start + n, nullptr);
+
+      // Oh, yes, there is one "minor" complication. It's the last one, I
+      // promise. It has to do with module re-exporting (export import M;).
+      // In this case (currently) all implementations simply treat it as a
+      // shallow (from the BMI's point of view) reference to the module (or an
+      // implicit import, if you will). Do you see where it's going? Nowever
+      // good, that's right. This shallow reference means that the compiler
+      // should be able to find BMIs for all the re-exported modules,
+      // recursive. The good news is we are actually in a pretty good shape to
+      // handle this: after match all our prerequisite BMIs will have their
+      // prerequisite BMIs known, recursively. The only bit that is missing is
+      // the re-export flag of some sorts. As well as deciding where to handle
+      // it: here or in append_modules(). After some meditation it became
+      // clear handling it here will be simpler: We need to weed out
+      // duplicates for which we can re-use the imports vector. And we may
+      // also need to save this "flattened" list of modules in depdb.
+      //
+      // Ok, so, here is the plan:
+      //
+      // 1. There is no good place in prerequisite_targets to store the
+      //    exported flag (no, using the marking facility across match/execute
+      //    is a bad idea). So what we are going to do is put re-exported
+      //    bmi{}s at the back and store (in the target's data pad) the start
+      //    position. One bad aspect about this part is that we assume those
+      //    bmi{}s have been matched by the same rule. But let's not kid
+      //    ourselves, there will be no other rule that matches bmi{}s.
+      //
+      // 2. Once we have matched all the bmi{}s we are importing directly
+      //    (with all the re-exported by us at the back), we will go over them
+      //    and copy all of their re-exported bmi{}s (using the position we
+      //    saved on step #1). The end result will be a recursively-explored
+      //    list of imported bmi{}s that append_modules() can simply convert
+      //    to the list of options.
+      //
+      //    One issue with this approach is that these copied targets will be
+      //    executed which means we need to adjust their dependent counts
+      //    (which is normally done by match). While this seems conceptually
+      //    correct (especially if you view re-exports as implicit imports),
+      //    it's just extra overhead (we know they will be updated). So what
+      //    we are going to do is save another position, that of the start of
+      //    these copied-over targets, and will only execute up to this point.
+      //
+      // And after implementing this came the reality check: all the current
+      // implementations require access to all the imported BMIs, not only
+      // re-exported. Some (like Clang) store references to imported BMI files
+      // so we actually don't need to pass any extra options (unless things
+      // get moved) but they still need access to the BMIs (and things will
+      // most likely have to be done differenly for distributed compilation).
+      //
+      // So the revised plan: on the off chance that some implementation will
+      // do it differently we will continue maintaing the imported/re-exported
+      // split and how much to copy-over can be made compiler specific.
+      //
+      // As a first sub-step of step #1, move all the re-exported imports to
+      // the end of the vector. This will make sure they end up at the end
+      // of prerequisite_targets. Note: the special first import, if any,
+      // should be unaffected.
+      //
+      sort (imports.begin (), imports.end (),
+            [] (const module_import& x, const module_import& y)
+            {
+              return !x.exported && y.exported;
+            });
+
+      // Go over the prerequisites once.
+      //
+      // For (direct) library prerequisites, check their prerequisite bmi{}s
+      // (which should be searched and matched with module names discovered;
+      // see the library meta-information protocol for details).
+      //
+      // For our own bmi{} prerequisites, checking if each (better) matches
+      // any of the imports.
+
+      // For fuzzy check if a file name (better) resolves any of our imports
+      // and if so make it the new selection. For exact the name is the actual
+      // module name and it can only resolve one import (there are no
+      // duplicates).
+      //
+      // Set done to true if all the imports have now been resolved to actual
+      // module names (which means we can stop searching). This will happens
+      // if all the modules come from libraries. Which will be fairly common
+      // (think of all the tests) so it's worth optimizing for.
+      //
+      bool done (false);
+
+      auto check_fuzzy = [&trace, &imports, &pts, &match, start, n]
+        (const target* pt, const string& name)
+      {
+        for (size_t i (0); i != n; ++i)
+        {
+          module_import& m (imports[i]);
+
+          if (std_module (m.name)) // No fuzzy std.* matches.
+            continue;
+
+          size_t n (m.name.size ());
+
+          if (m.score > n) // Resolved to module name.
+            continue;
+
+          size_t s (match (name, m.name));
+
+          l5 ([&]{trace << name << " ~ " << m.name << ": " << s;});
+
+          if (s > m.score)
+          {
+            pts[start + i] = pt;
+            m.score = s;
+          }
+        }
+      };
+
+      // If resolved, return the "slot" in pts (we don't want to create a
+      // side build until we know we match; see below for details).
+      //
+      auto check_exact = [&trace, &imports, &pts, start, n, &done]
+        (const string& name) -> const target**
+      {
+        const target** r (nullptr);
+        done = true;
+
+        for (size_t i (0); i != n; ++i)
+        {
+          module_import& m (imports[i]);
+
+          size_t n (m.name.size ());
+
+          if (m.score > n) // Resolved to module name (no effect on done).
+            continue;
+
+          if (r == nullptr)
+          {
+            size_t s (name == m.name ? n + 1 : 0);
+
+            l5 ([&]{trace << name << " ~ " << m.name << ": " << s;});
+
+            if (s > m.score)
+            {
+              r = &pts[start + i].target;
+              m.score = s;
+              continue; // Scan the rest to detect if all done.
+            }
+          }
+
+          done = false;
+        }
+
+        return r;
+      };
+
+      for (prerequisite_member p: group_prerequisite_members (a, t))
+      {
+        const target* pt (p.load ()); // Should be cached for libraries.
+
+        if (pt != nullptr)
+        {
+          const target* lt (nullptr);
+
+          if (const libx* l = pt->is_a<libx> ())
+            lt = &link_member (*l, a, li);
+          else if (pt->is_a<liba> () || pt->is_a<libs> () || pt->is_a<libux> ())
+            lt = pt;
+
+          // If this is a library, check its bmi{}s and mxx{}s.
+          //
+          if (lt != nullptr)
+          {
+            for (const target* bt: lt->prerequisite_targets[a])
+            {
+              if (bt == nullptr)
+                continue;
+
+              // Note that here we (try) to use whatever flavor of bmi*{} is
+              // available.
+              //
+              // @@ MOD: BMI compatibility check.
+              // @@ UTL: we need to (recursively) see through libux{} (and
+              //    also in pkgconfig_save()).
+              //
+              if (bt->is_a<bmis> () ||
+                  bt->is_a<bmia> () ||
+                  bt->is_a<bmie> ())
+              {
+                const string& n (cast<string> (bt->vars[c_module_name]));
+
+                if (const target** p = check_exact (n))
+                  *p = bt;
+              }
+              else if (bt->is_a (*x_mod))
+              {
+                // This is an installed library with a list of module sources
+                // (the source are specified as prerequisites but the fallback
+                // file rule puts them into prerequisite_targets for us).
+                //
+                // The module names should be specified but if not assume
+                // something else is going on and ignore.
+                //
+                const string* n (cast_null<string> (bt->vars[c_module_name]));
+                if (n == nullptr)
+                  continue;
+
+                if (const target** p = check_exact (*n))
+                  *p = &make_module_sidebuild (a, bs, *lt, *bt, *n);
+              }
+              else
+                continue;
+
+              if (done)
+                break;
+            }
+
+            if (done)
+              break;
+
+            continue;
+          }
+
+          // Fall through.
+        }
+
+        // While it would have been even better not to search for a target, we
+        // need to get hold of the corresponding mxx{} (unlikely but possible
+        // for bmi{} to have a different name).
+        //
+        if (p.is_a<bmi> ())
+          pt = &search (t, mtt, p.key ()); // Same logic as in picking obj*{}.
+        else if (p.is_a (mtt))
+        {
+          if (pt == nullptr)
+            pt = &p.search (t);
+        }
+        else
+          continue;
+
+        // Find the mxx{} prerequisite and extract its "file name" for the
+        // fuzzy match unless the user specified the module name explicitly.
+        //
+        for (prerequisite_member p: group_prerequisite_members (a, *pt))
+        {
+          if (p.is_a (*x_mod))
+          {
+            // Check for an explicit module name. Only look for an existing
+            // target (which means the name can only be specified on the
+            // target itself, no target type/pattern-spec).
+            //
+            const target* t (p.search_existing ());
+            const string* n (t != nullptr
+                             ? cast_null<string> (t->vars[c_module_name])
+                             : nullptr);
+            if (n != nullptr)
+            {
+              if (const target** p = check_exact (*n))
+                *p = pt;
+            }
+            else
+            {
+              // Fuzzy match.
+              //
+              string f;
+
+              // Add the directory part if it is relative. The idea is to
+              // include it into the module match, say hello.core vs
+              // hello/mxx{core}.
+              //
+              // @@ MOD: Why not for absolute? Good question. What if it
+              // contains special components, say, ../mxx{core}?
+              //
+              const dir_path& d (p.dir ());
+
+              if (!d.empty () && d.relative ())
+                f = d.representation (); // Includes trailing slash.
+
+              f += p.name ();
+              check_fuzzy (pt, f);
+            }
+            break;
+          }
+        }
+
+        if (done)
+          break;
+      }
+
+      // Diagnose unresolved modules.
+      //
+      if (!done)
+      {
+        for (size_t i (0); i != n; ++i)
+        {
+          if (pts[start + i] == nullptr && !std_module (imports[i].name))
+          {
+            // It would have been nice to print the location of the import
+            // declaration. And we could save it during parsing at the expense
+            // of a few paths (that can be pooled). The question is what to do
+            // when we re-create this information from depdb? We could have
+            // saved the location information there but the relative paths
+            // (e.g., from the #line directives) could end up being wrong if
+            // the we re-run from a different working directory.
+            //
+            // It seems the only workable approach is to extract full location
+            // info during parse, not save it in depdb, when re-creating,
+            // fallback to just src path without any line/column information.
+            // This will probably cover the majority of case (most of the time
+            // it will be a misspelled module name, not a removal of module
+            // from buildfile).
+            //
+            // But at this stage this doesn't seem worth the trouble.
+            //
+            fail (relative (src)) << "unable to resolve module "
+                                  << imports[i].name;
+          }
+        }
+      }
+
+      // Match in parallel and wait for completion.
+      //
+      match_members (a, t, pts, start);
+
+      // Post-process the list of our (direct) imports. While at it, calculate
+      // the checksum of all (direct and indirect) bmi{} paths.
+      //
+      size_t exported (n);
+      size_t copied (pts.size ());
+
+      for (size_t i (0); i != n; ++i)
+      {
+        const module_import& m (imports[i]);
+
+        // Determine the position of the first re-exported bmi{}.
+        //
+        if (m.exported && exported == n)
+          exported = i;
+
+        const target* bt (pts[start + i]);
+
+        if (bt == nullptr)
+          continue; // Unresolved (std.*).
+
+        // Verify our guesses against extracted module names but don't waste
+        // time if it was a match against the actual module name.
+        //
+        const string& in (m.name);
+
+        if (m.score <= in.size ())
+        {
+          const string& mn (cast<string> (bt->vars[c_module_name]));
+
+          if (in != mn)
+          {
+            for (prerequisite_member p: group_prerequisite_members (a, *bt))
+            {
+              if (p.is_a (*x_mod)) // Got to be there.
+              {
+                fail (relative (src))
+                  << "failed to correctly guess module name from " << p <<
+                  info << "guessed: " << in <<
+                  info << "actual:  " << mn <<
+                  info << "consider adjusting module interface file names or" <<
+                  info << "consider specifying module name with " << x
+                  << ".module_name";
+              }
+            }
+          }
+        }
+
+        // Hash (we know it's a file).
+        //
+        cs.append (static_cast<const file&> (*bt).path ().string ());
+
+        // Copy over bmi{}s from our prerequisites weeding out duplicates.
+        //
+        if (size_t j = bt->data<match_data> ().mods.start)
+        {
+          // Hard to say whether we should reserve or not. We will probably
+          // get quite a bit of duplications.
+          //
+          auto& bpts (bt->prerequisite_targets[a]);
+          for (size_t m (bpts.size ()); j != m; ++j)
+          {
+            const target* et (bpts[j]);
+
+            if (et == nullptr)
+              continue; // Unresolved (std.*).
+
+            const string& mn (cast<string> (et->vars[c_module_name]));
+
+            if (find_if (imports.begin (), imports.end (),
+                         [&mn] (const module_import& i)
+                         {
+                           return i.name == mn;
+                         }) == imports.end ())
+            {
+              pts.push_back (et);
+              cs.append (static_cast<const file&> (*et).path ().string ());
+
+              // Add to the list of imports for further duplicate suppression.
+              // We could have probably stored reference to the name (e.g., in
+              // score) but it's probably not worth it if we have a small
+              // string optimization.
+              //
+              imports.push_back (module_import {mn, true, 0});
+            }
+          }
+        }
+      }
+
+      if (copied == pts.size ()) // No copied tail.
+        copied = 0;
+
+      if (exported == n) // No (own) re-exported imports.
+        exported = copied;
+      else
+        exported += start; // Rebase.
+
+      return module_positions {start, exported, copied};
+    }
+
+    // Synthesize a dependency for building a module binary interface on
+    // the side.
+    //
+    const target& compile_rule::
+    make_module_sidebuild (action a,
+                           const scope& bs,
+                           const target& lt,
+                           const target& mt,
+                           const string& mn) const
+    {
+      tracer trace (x, "compile_rule::make_module_sidebuild");
+
+      // First figure out where we are going to build. We want to avoid
+      // multiple sidebuilds so the outermost scope that has loaded the
+      // cc.config module and that is within our amalgmantion seems like a
+      // good place.
+      //
+      const scope& rs (*bs.root_scope ());
+      const scope* as (&rs);
+      {
+        const scope* ws (as->weak_scope ());
+        if (as != ws)
+        {
+          const scope* s (as);
+          do
+          {
+            s = s->parent_scope ()->root_scope ();
+
+            // Use cc.core.vars as a proxy for {c,cxx}.config (a bit smelly).
+            //
+            // This is also the module that registers the scope operation
+            // callback that cleans up the subproject.
+            //
+            if (cast_false<bool> ((*s)["cc.core.vars.loaded"]))
+              as = s;
+
+          } while (s != ws);
+        }
+      }
+
+      // We build modules in a subproject (since there might be no full
+      // language support module loaded in the amalgamation, only *.config).
+      // So the first step is to check if the project has already been created
+      // and/or loaded and if not, then to go ahead and do so.
+      //
+      dir_path pd (as->out_path () / modules_sidebuild_dir /= x);
+      {
+        const scope* ps (&scopes.find (pd));
+
+        if (ps->out_path () != pd)
+        {
+          // Switch the phase to load then create and load the subproject.
+          //
+          phase_switch phs (run_phase::load);
+
+          // Re-test again now that we are in exclusive phase (another thread
+          // could have already created and loaded the subproject).
+          //
+          ps = &scopes.find (pd);
+
+          if (ps->out_path () != pd)
+          {
+            // The project might already be created in which case we just need
+            // to load it.
+            //
+            if (!is_src_root (pd))
+            {
+              // Copy our standard and force modules.
+              //
+              string extra;
+
+              if (const string* std = cast_null<string> (rs[x_std]))
+                extra += string (x) + ".std = " + *std + '\n';
+
+              extra += string (x) + ".features.modules = true";
+
+              config::create_project (
+                pd,
+                as->out_path ().relative (pd),  /* amalgamation */
+                {},                             /* boot_modules */
+                extra,                          /* root_pre */
+                {string (x) + '.'},             /* root_modules */
+                "",                             /* root_post */
+                false,                          /* config */
+                false,                          /* buildfile */
+                "the cc module",
+                2);                             /* verbosity */
+            }
+
+            ps = &load_project (as->rw () /* lock */, pd, pd);
+          }
+        }
+
+        // Some sanity checks.
+        //
+#ifndef NDEBUG
+        assert (ps->root ());
+        const module* m (ps->modules.lookup<module> (x));
+        assert (m != nullptr && m->modules);
+#endif
+      }
+
+      // Next we need to come up with a file/target name that will be unique
+      // enough not to conflict with other modules. If we assume that within
+      // an amalgamation there is only one "version" of each module, then the
+      // module name itself seems like a good fit. We just replace '.' with
+      // '-'.
+      //
+      string mf;
+      transform (mn.begin (), mn.end (),
+                 back_inserter (mf),
+                 [] (char c) {return c == '.' ? '-' : c;});
+
+      // It seems natural to build a BMI type that corresponds to the library
+      // type. After all, this is where the object file part of the BMI is
+      // going to come from (though things will probably be different for
+      // module-only libraries).
+      //
+      const target_type* tt (nullptr);
+      switch (link_type (lt).type)
+      {
+      case otype::a: tt = &bmia::static_type; break;
+      case otype::s: tt = &bmis::static_type; break;
+      case otype::e: assert (false);
+      }
+
+      // Store the BMI target in the subproject root. If the target already
+      // exists then we assume all this is already done (otherwise why would
+      // someone have created such a target).
+      //
+      if (const target* bt = targets.find (
+            *tt,
+            pd,
+            dir_path (), // Always in the out tree.
+            mf,
+            nullopt,     // Use default extension.
+            trace))
+        return *bt;
+
+      prerequisites ps;
+      ps.push_back (prerequisite (mt));
+
+      // We've added the mxx{} but it may import other modules from this
+      // library. Or from (direct) dependencies of this library. We add them
+      // all as prerequisites so that the standard module search logic can
+      // sort things out. This is pretty similar to what we do in link when
+      // synthesizing dependencies for bmi{}'s.
+      //
+      ps.push_back (prerequisite (lt));
+      for (prerequisite_member p: group_prerequisite_members (a, lt))
+      {
+        // @@ TODO: will probably need revision if using sidebuild for
+        //    non-installed libraries (e.g., direct BMI dependencies
+        //    will probably have to be translated to mxx{} or some such).
+        //
+        if (p.is_a<libx> () ||
+            p.is_a<liba> () || p.is_a<libs> () || p.is_a<libux> ())
+        {
+          ps.push_back (p.as_prerequisite ());
+        }
+      }
+
+      auto p (targets.insert_locked (*tt,
+                                     move (pd),
+                                     dir_path (), // Always in the out tree.
+                                     move (mf),
+                                     nullopt,     // Use default extension.
+                                     true,        // Implied.
+                                     trace));
+      const target& bt (p.first);
+
+      // Note that this is racy and someone might have created this target
+      // while we were preparing the prerequisite list.
+      //
+      if (p.second.owns_lock ())
+        bt.prerequisites (move (ps));
+
+      return bt;
+    }
+
+    // Filter cl.exe noise (msvc.cxx).
+    //
+    void
+    msvc_filter_cl (ifdstream&, const path& src);
+
+    void compile_rule::
+    append_modules (environment& env,
+                    cstrings& args,
+                    strings& stor,
+                    action a,
+                    const file& t,
+                    const match_data& md) const
+    {
+      const module_positions& ms (md.mods);
+      assert (ms.start != 0);
+
+      dir_path stdifc; // See the VC case below.
+
+      auto& pts (t.prerequisite_targets[a]);
+
+#if 0
+      switch (cid)
+      {
+      case compiler_id::gcc:
+        {
+          // Use the module map stored in depdb.
+          //
+          string s (relative (md.dd).string ());
+          s.insert (0, "-fmodule-file-map=@=");
+          stor.push_back (move (s));
+          break;
+        }
+      case compiler_id::clang:
+      case compiler_id::clang_apple:
+        {
+          // In Clang the module implementation's unit .pcm is special and
+          // must be "loaded".
+          //
+          if (md.type == translation_type::module_impl)
+          {
+            const file& f (pts[ms.start]->as<file> ());
+            string s (relative (f.path ()).string ());
+            s.insert (0, "-fmodule-file=");
+            stor.push_back (move (s));
+          }
+
+          // Use the module map stored in depdb for others.
+          //
+          string s (relative (md.dd).string ());
+          s.insert (0, "-fmodule-file-map=@=");
+          stor.push_back (move (s));
+          break;
+        }
+      case compiler_id::msvc:
+        {
+          for (size_t i (ms.start), n (pts.size ());
+               i != n;
+               ++i)
+          {
+            const target* pt (pts[i]);
+
+            if (pt == nullptr)
+              continue;
+
+            // Here we use whatever bmi type has been added. And we know all
+            // of these are bmi's.
+            //
+            const file& f (pt->as<file> ());
+
+            // In VC std.* modules can only come from a single directory
+            // specified with the IFCPATH environment variable or the
+            // /module:stdIfcDir option.
+            //
+            if (std_module (cast<string> (f.vars[c_module_name])))
+            {
+              dir_path d (f.path ().directory ());
+
+              if (stdifc.empty ())
+              {
+                // Go one directory up since /module:stdIfcDir will look in
+                // either Release or Debug subdirectories. Keeping the result
+                // absolute feels right.
+                //
+                stor.push_back ("/module:stdIfcDir");
+                stor.push_back (d.directory ().string ());
+                stdifc = move (d);
+              }
+              else if (d != stdifc) // Absolute and normalized.
+                fail << "multiple std.* modules in different directories";
+            }
+            else
+            {
+              stor.push_back ("/module:reference");
+              stor.push_back (relative (f.path ()).string ());
+            }
+          }
+          break;
+        }
+      case compiler_id::icc:
+        assert (false);
+      }
+#else
+      size_t n (pts.size ());
+
+      // Clang embeds module file references so we only need to specify
+      // our direct imports.
+      //
+      // If/when we get the ability to specify the mapping in a file, we
+      // should probably pass the whole list.
+      //
+      switch (cid)
+      {
+      case compiler_id::gcc:   break; // All of them.
+      case compiler_id::clang_apple:
+      case compiler_id::clang: n = ms.copied != 0 ? ms.copied : n; break;
+      case compiler_id::msvc:  break; // All of them.
+      case compiler_id::icc:   assert (false);
+      }
+
+      for (size_t i (ms.start); i != n; ++i)
+      {
+        const target* pt (pts[i]);
+
+        if (pt == nullptr)
+          continue;
+
+        // Here we use whatever bmi type has been added. And we know all of
+        // these are bmi's.
+        //
+        const file& f (pt->as<file> ());
+        string s (relative (f.path ()).string ());
+
+        switch (cid)
+        {
+        case compiler_id::gcc:
+          {
+            s.insert (0, 1, '=');
+            s.insert (0, cast<string> (f.vars[c_module_name]));
+            s.insert (0, "-fmodule-file=");
+            break;
+          }
+        case compiler_id::clang:
+        case compiler_id::clang_apple:
+          {
+            // In Clang the module implementation's unit .pcm is special and
+            // must be "loaded".
+            //
+            if (md.type == translation_type::module_impl && i == ms.start)
+              s.insert (0, "-fmodule-file=");
+            else
+            {
+              s.insert (0, 1, '=');
+              s.insert (0, cast<string> (f.vars[c_module_name]));
+              s.insert (0, "-fmodule-file=");
+            }
+            break;
+          }
+        case compiler_id::msvc:
+          {
+            // In VC std.* modules can only come from a single directory
+            // specified with the IFCPATH environment variable or the
+            // /module:stdIfcDir option.
+            //
+            if (std_module (cast<string> (f.vars[c_module_name])))
+            {
+              dir_path d (f.path ().directory ());
+
+              if (stdifc.empty ())
+              {
+                // Go one directory up since /module:stdIfcDir will look in
+                // either Release or Debug subdirectories. Keeping the result
+                // absolute feels right.
+                //
+                s = d.directory ().string ();
+                stor.push_back ("/module:stdIfcDir");
+                stdifc = move (d);
+              }
+              else
+              {
+                if (d != stdifc) // Absolute and normalized.
+                  fail << "multiple std.* modules in different directories";
+
+                continue; // Skip.
+              }
+            }
+            else
+              stor.push_back ("/module:reference");
+
+            break;
+          }
+        case compiler_id::icc:
+          assert (false);
+        }
+
+        stor.push_back (move (s));
+      }
+#endif
+
+      // Shallow-copy storage to args. Why not do it as we go along pushing
+      // into storage? Because of potential reallocations.
+      //
+      for (const string& a: stor)
+        args.push_back (a.c_str ());
+
+      // VC's IFCPATH takes precedence over /module:stdIfcDir so unset it
+      // if we are using our own std modules.
+      //
+      if (!stdifc.empty ())
+        env.push_back ("IFCPATH");
+    }
+
+    target_state compile_rule::
+    perform_update (action a, const target& xt) const
+    {
+      const file& t (xt.as<file> ());
+      const path& tp (t.path ());
+
+      match_data md (move (t.data<match_data> ()));
+      bool mod (md.type == translation_type::module_iface);
+
+      // While all our prerequisites are already up-to-date, we still have to
+      // execute them to keep the dependency counts straight. Actually, no, we
+      // may also have to update the modules.
+      //
+      auto pr (
+        execute_prerequisites<file> (
+          (mod ? *x_mod : x_src),
+          a, t,
+          md.mt,
+          [s = md.mods.start] (const target&, size_t i)
+          {
+            return s != 0 && i >= s; // Only compare timestamps for modules.
+          },
+          md.mods.copied)); // See search_modules() for details.
+
+      const file& s (pr.second);
+      const path* sp (&s.path ());
+
+      if (pr.first)
+      {
+        if (md.touch)
+        {
+          touch (tp, false, 2);
+          skip_count.fetch_add (1, memory_order_relaxed);
+        }
+
+        t.mtime (md.mt);
+        return *pr.first;
+      }
+
+      // Make sure depdb is no older than any of our prerequisites.
+      //
+      touch (md.dd, false, verb_never);
+
+      const scope& bs (t.base_scope ());
+      const scope& rs (*bs.root_scope ());
+
+      otype ot (compile_type (t, mod));
+      linfo li (link_info (bs, ot));
+
+      environment env;
+      cstrings args {cpath.recall_string ()};
+
+      // If we are building a module, then the target is bmi*{} and its ad hoc
+      // member is obj*{}.
+      //
+      path relo, relm;
+      if (mod)
+      {
+        relm = relative (tp);
+        relo = relative (t.member->is_a<file> ()->path ());
+      }
+      else
+        relo = relative (tp);
+
+      // Build the command line.
+      //
+      if (md.pp != preprocessed::all)
+      {
+        append_options (args, t, c_poptions);
+        append_options (args, t, x_poptions);
+
+        // Add *.export.poptions from prerequisite libraries.
+        //
+        append_lib_options (bs, args, a, t, li);
+
+        // Extra system header dirs (last).
+        //
+        assert (sys_inc_dirs_extra <= sys_inc_dirs.size ());
+        append_option_values (
+          args, "-I",
+          sys_inc_dirs.begin () + sys_inc_dirs_extra, sys_inc_dirs.end (),
+          [] (const dir_path& d) {return d.string ().c_str ();});
+
+        if (md.symexport)
+          append_symexport_options (args, t);
+      }
+
+      append_options (args, t, c_coptions);
+      append_options (args, t, x_coptions);
+      append_options (args, tstd);
+
+      string out, out1; // Output options storage.
+      strings mods;     // Module options storage.
+      size_t out_i (0); // Index of the -o option.
+
+      if (cclass == compiler_class::msvc)
+      {
+        // The /F*: option variants with separate names only became available
+        // in VS2013/12.0. Why do we bother? Because the command line suddenly
+        // becomes readable.
+        //
+        uint64_t ver (cast<uint64_t> (rs[x_version_major]));
+
+        args.push_back ("/nologo");
+
+        // While we want to keep the low-level build as "pure" as possible,
+        // the two misguided defaults, exceptions and runtime, just have to be
+        // fixed. Otherwise the default build is pretty much unusable. But we
+        // also make sure that the user can easily disable our defaults: if we
+        // see any relevant options explicitly specified, we take our hands
+        // off.
+        //
+        // For C looks like no /EH* (exceptions supported but no C++ objects
+        // destroyed) is a reasonable default.
+        //
+        if (x_lang == lang::cxx && !find_option_prefix ("/EH", args))
+          args.push_back ("/EHsc");
+
+        // The runtime is a bit more interesting. At first it may seem like a
+        // good idea to be a bit clever and use the static runtime if we are
+        // building obja{}. And for obje{} we could decide which runtime to
+        // use based on the library link order: if it is static-only, then we
+        // could assume the static runtime. But it is indeed too clever: when
+        // building liba{} we have no idea who is going to use it. It could be
+        // an exe{} that links both static and shared libraries (and is
+        // therefore built with the shared runtime). And to safely use the
+        // static runtime, everything must be built with /MT and there should
+        // be no DLLs in the picture. So we are going to play it safe and
+        // always default to the shared runtime.
+        //
+        // In a similar vein, it would seem reasonable to use the debug runtime
+        // if we are compiling with debug. But, again, there will be fireworks
+        // if we have some projects built with debug and some without and then
+        // we try to link them together (which is not an unreasonable thing to
+        // do). So by default we will always use the release runtime.
+        //
+        if (!find_option_prefixes ({"/MD", "/MT"}, args))
+          args.push_back ("/MD");
+
+        if (md.mods.start != 0)
+          append_modules (env, args, mods, a, t, md);
+
+        // The presence of /Zi or /ZI causes the compiler to write debug info
+        // to the .pdb file. By default it is a shared file called vcNN.pdb
+        // (where NN is the VC version) created (wait for it) in the current
+        // working directory (and not the directory of the .obj file). Also,
+        // because it is shared, there is a special Windows service that
+        // serializes access. We, of course, want none of that so we will
+        // create a .pdb per object file.
+        //
+        // Note that this also changes the name of the .idb file (used for
+        // minimal rebuild and incremental compilation): cl.exe take the /Fd
+        // value and replaces the .pdb extension with .idb.
+        //
+        // Note also that what we are doing here appears to be incompatible
+        // with PCH (/Y* options) and /Gm (minimal rebuild).
+        //
+        if (find_options ({"/Zi", "/ZI"}, args))
+        {
+          if (ver >= 18)
+            args.push_back ("/Fd:");
+          else
+            out1 = "/Fd";
+
+          out1 += relo.string ();
+          out1 += ".pdb";
+
+          args.push_back (out1.c_str ());
+        }
+
+        if (ver >= 18)
+        {
+          args.push_back ("/Fo:");
+          args.push_back (relo.string ().c_str ());
+        }
+        else
+        {
+          out = "/Fo" + relo.string ();
+          args.push_back (out.c_str ());
+        }
+
+        if (mod)
+        {
+          args.push_back ("/module:interface");
+          args.push_back ("/module:output");
+          args.push_back (relm.string ().c_str ());
+        }
+
+        // Note: no way to indicate that the source if already preprocessed.
+
+        args.push_back ("/c");                   // Compile only.
+        args.push_back (langopt (md));           // Compile as.
+        args.push_back (sp->string ().c_str ()); // Note: relied on being last.
+      }
+      else
+      {
+        if (ot == otype::s)
+        {
+          // On Darwin, Win32 -fPIC is the default.
+          //
+          if (tclass == "linux" || tclass == "bsd")
+            args.push_back ("-fPIC");
+        }
+
+        if (md.mods.start != 0)
+          append_modules (env, args, mods, a, t, md);
+
+        // Note: the order of the following options is relied upon below.
+        //
+        out_i = args.size (); // Index of the -o option.
+
+        if (mod)
+        {
+          switch (cid)
+          {
+          case compiler_id::gcc:
+            {
+              args.push_back ("-o");
+              args.push_back (relo.string ().c_str ());
+
+              out = "-fmodule-output=";
+              out += relm.string ();
+              args.push_back (out.c_str ());
+
+              args.push_back ("-c");
+              break;
+            }
+          case compiler_id::clang:
+          case compiler_id::clang_apple:
+            {
+              args.push_back ("-o");
+              args.push_back (relm.string ().c_str ());
+              args.push_back ("--precompile");
+
+              // Without this option Clang's .pcm will reference source files.
+              // In our case this file may be transient (.ii). Plus, it won't
+              // play nice with distributed compilation.
+              //
+              args.push_back ("-Xclang");
+              args.push_back ("-fmodules-embed-all-files");
+
+              break;
+            }
+          case compiler_id::msvc:
+          case compiler_id::icc:
+            assert (false);
+          }
+        }
+        else
+        {
+          args.push_back ("-o");
+          args.push_back (relo.string ().c_str ());
+          args.push_back ("-c");
+        }
+
+        args.push_back ("-x");
+        args.push_back (langopt (md));
+
+        if (md.pp == preprocessed::all)
+        {
+          // Note that the mode we select must still handle comments and line
+          // continuations. So some more compiler-specific voodoo.
+          //
+          switch (cid)
+          {
+          case compiler_id::gcc:
+            {
+              // -fdirectives-only is available since GCC 4.3.0.
+              //
+              if (cmaj > 4 || (cmaj == 4 && cmin >= 3))
+              {
+                args.push_back ("-fpreprocessed");
+                args.push_back ("-fdirectives-only");
+              }
+              break;
+            }
+          case compiler_id::clang:
+          case compiler_id::clang_apple:
+            {
+              // Clang handles comments and line continuations in the
+              // preprocessed source (it does not have -fpreprocessed).
+              //
+              break;
+            }
+          case compiler_id::icc:
+            break; // Compile as normal source for now.
+          case compiler_id::msvc:
+            assert (false);
+          }
+        }
+
+        args.push_back (sp->string ().c_str ());
+      }
+
+      args.push_back (nullptr);
+
+      if (!env.empty ())
+        env.push_back (nullptr);
+
+      // With verbosity level 2 print the command line as if we are compiling
+      // the source file, not its preprocessed version (so that it's easy to
+      // copy and re-run, etc). Only at level 3 and above print the real deal.
+      //
+      if (verb == 1)
+        text << x_name << ' ' << s;
+      else if (verb == 2)
+        print_process (args);
+
+      // If we have the (partially) preprocessed output, switch to that.
+      //
+      bool psrc (!md.psrc.path.empty ());
+      bool pact (md.psrc.active);
+      if (psrc)
+      {
+        args.pop_back (); // nullptr
+        args.pop_back (); // sp
+
+        sp = &md.psrc.path;
+
+        // This should match with how we setup preprocessing.
+        //
+        switch (cid)
+        {
+        case compiler_id::gcc:
+          {
+            // The -fpreprocessed is implied by .i/.ii.
+            //
+            args.pop_back (); // lang()
+            args.pop_back (); // -x
+            args.push_back ("-fdirectives-only");
+            break;
+          }
+        case compiler_id::clang:
+        case compiler_id::clang_apple:
+          {
+            // Note that without -x Clang will treat .i/.ii as fully
+            // preprocessed.
+            //
+            break;
+          }
+        case compiler_id::msvc:
+          {
+            // Nothing to do (/TP or /TC already there).
+            //
+            break;
+          }
+        case compiler_id::icc:
+          assert (false);
+        }
+
+        args.push_back (sp->string ().c_str ());
+        args.push_back (nullptr);
+
+        // Let's keep the preprocessed file in case of an error but only at
+        // verbosity level 3 and up (when one actually sees it mentioned on
+        // the command line). We also have to re-arm on success (see below).
+        //
+        if (pact && verb >= 3)
+          md.psrc.active = false;
+      }
+
+      if (verb >= 3)
+        print_process (args);
+
+      try
+      {
+        // VC cl.exe sends diagnostics to stdout. It also prints the file name
+        // being compiled as the first line. So for cl.exe we redirect stdout
+        // to a pipe, filter that noise out, and send the rest to stderr.
+        //
+        // For other compilers redirect stdout to stderr, in case any of them
+        // tries to pull off something similar. For sane compilers this should
+        // be harmless.
+        //
+        bool filter (cid == compiler_id::msvc);
+
+        process pr (cpath,
+                    args.data (),
+                    0, (filter ? -1 : 2), 2,
+                    nullptr, // CWD
+                    env.empty () ? nullptr : env.data ());
+
+        if (filter)
+        {
+          try
+          {
+            ifdstream is (
+              move (pr.in_ofd), fdstream_mode::text, ifdstream::badbit);
+
+            msvc_filter_cl (is, *sp);
+
+            // If anything remains in the stream, send it all to stderr. Note
+            // that the eof check is important: if the stream is at eof, this
+            // and all subsequent writes to the diagnostics stream will fail
+            // (and you won't see a thing).
+            //
+            if (is.peek () != ifdstream::traits_type::eof ())
+              diag_stream_lock () << is.rdbuf ();
+
+            is.close ();
+          }
+          catch (const io_error&) {} // Assume exits with error.
+        }
+
+        run_finish (args, pr);
+      }
+      catch (const process_error& e)
+      {
+        error << "unable to execute " << args[0] << ": " << e;
+
+        if (e.child)
+          exit (1);
+
+        throw failed ();
+      }
+
+      if (pact && verb >= 3)
+        md.psrc.active = true;
+
+      // Clang's module compilation requires two separate compiler
+      // invocations.
+      //
+      if (mod && (cid == compiler_id::clang ||
+                  cid == compiler_id::clang_apple))
+      {
+        // Remove the target file if this fails. If we don't do that, we will
+        // end up with a broken build that is up-to-date.
+        //
+        auto_rmfile rm (relm);
+
+        // Adjust the command line. First discard everything after -o then
+        // build the new "tail".
+        //
+        args.resize (out_i + 1);
+        args.push_back (relo.string ().c_str ()); // Produce .o.
+        args.push_back ("-c");                    // By compiling .pcm.
+        args.push_back ("-Wno-unused-command-line-argument");
+        args.push_back (relm.string ().c_str ());
+        args.push_back (nullptr);
+
+        if (verb >= 2)
+          print_process (args);
+
+        try
+        {
+          process pr (cpath,
+                      args.data (),
+                      0, 2, 2,
+                      nullptr, // CWD
+                      env.empty () ? nullptr : env.data ());
+
+          run_finish (args, pr);
+        }
+        catch (const process_error& e)
+        {
+          error << "unable to execute " << args[0] << ": " << e;
+
+          if (e.child)
+            exit (1);
+
+          throw failed ();
+        }
+
+        rm.cancel ();
+      }
+
+      // Should we go to the filesystem and get the new mtime? We know the
+      // file has been modified, so instead just use the current clock time.
+      // It has the advantage of having the subseconds precision.
+      //
+      t.mtime (system_clock::now ());
+      return target_state::changed;
+    }
+
+    target_state compile_rule::
+    perform_clean (action a, const target& xt) const
+    {
+      const file& t (xt.as<file> ());
+
+      using id = compiler_id;
+
+      switch (cid)
+      {
+      case id::gcc:   return clean_extra (a, t, {".d", x_pext, ".t"});
+      case id::clang_apple:
+      case id::clang: return clean_extra (a, t, {".d", x_pext});
+      case id::msvc:  return clean_extra (a, t, {".d", x_pext, ".idb", ".pdb"});
+      case id::icc:   return clean_extra (a, t, {".d"});
+      }
+
+      assert (false);
+      return target_state::unchanged;
+    }
+  }
+}
author	Boris Kolpackov <boris@codesynthesis.com>	2018-01-20 13:46:11 +0200
committer	Boris Kolpackov <boris@codesynthesis.com>	2018-02-03 14:35:45 +0200
commit	934f2a9a90c5cad3cdc8a66b50c17827a3ddbcee (patch)
tree	f35f106e5369e98350327c79080c571195234c0b /build2/cc/compile-rule.cxx
parent	280f4a5bf787587227ca193cd59c6bd74091db70 (diff)