1 files changed, 1740 insertions, 848 deletions
diff --git a/libbuild2/cc/compile-rule.cxx b/libbuild2/cc/compile-rule.cxx
index 77d01c6..2e4775e 100644
--- a/libbuild2/cc/compile-rule.cxx
+++ b/libbuild2/cc/compile-rule.cxx
@@ -3,6 +3,7 @@
 
 #include <libbuild2/cc/compile-rule.hxx>
 
+#include <cerrno>
 #include <cstdlib>  // exit()
 #include <cstring>  // strlen(), strchr(), strncmp()
 
@@ -175,7 +176,7 @@ namespace build2
       if (s == "includes") return preprocessed::includes;
       if (s == "modules")  return preprocessed::modules;
       if (s == "all")      return preprocessed::all;
-      throw invalid_argument ("invalid preprocessed value '" + s + "'");
+      throw invalid_argument ("invalid preprocessed value '" + s + '\'');
     }
 
     // Return true if the compiler supports -isystem (GCC class) or
@@ -229,10 +230,17 @@ namespace build2
       return nullopt;
     }
 
+    // Note that we don't really need this for clean (where we only need
+    // unrefined unit type) so we could make this update-only. But let's keep
+    // it simple for now. Note that now we do need the source prerequisite
+    // type in clean to deal with Objective-X.
+    //
     struct compile_rule::match_data
     {
-      match_data (unit_type t, const prerequisite_member& s)
-          : type (t), src (s) {}
+      match_data (const compile_rule& r,
+                  unit_type t,
+                  const prerequisite_member& s)
+          : type (t), src (s), rule (r) {}
 
       unit_type type;
       preprocessed pp = preprocessed::none;
@@ -245,39 +253,67 @@ namespace build2
       path dd;                              // Dependency database path.
       size_t header_units = 0;              // Number of imported header units.
       module_positions modules = {0, 0, 0}; // Positions of imported modules.
+
+      const compile_rule& rule;
+
+      target_state
+      operator() (action a, const target& t)
+      {
+        return rule.perform_update (a, t, *this);
+      }
     };
 
     compile_rule::
-    compile_rule (data&& d)
+    compile_rule (data&& d, const scope& rs)
         : common (move (d)),
           rule_id (string (x) += ".compile 6")
     {
-      static_assert (sizeof (match_data) <= target::data_size,
-                     "insufficient space");
+      // Locate the header cache (see enter_header() for details).
+      //
+      {
+        string mn (string (x) + ".config");
+
+        header_cache_ = rs.find_module<config_module> (mn); // Must be there.
+
+        const scope* ws (rs.weak_scope ());
+        if (ws != &rs)
+        {
+          const scope* s (&rs);
+          do
+          {
+            s = s->parent_scope ()->root_scope ();
+
+            if (const auto* m = s->find_module<config_module> (mn))
+              header_cache_ = m;
+
+          } while (s != ws);
+        }
+      }
     }
 
     template <typename T>
     void compile_rule::
     append_sys_hdr_options (T& args) const
     {
-      assert (sys_hdr_dirs_extra <= sys_hdr_dirs.size ());
+      assert (sys_hdr_dirs_mode + sys_hdr_dirs_extra <= sys_hdr_dirs.size ());
 
       // Note that the mode options are added as part of cmode.
       //
       auto b (sys_hdr_dirs.begin () + sys_hdr_dirs_mode);
-      auto m (sys_hdr_dirs.begin () + sys_hdr_dirs_extra);
-      auto e (sys_hdr_dirs.end ());
+      auto x (b + sys_hdr_dirs_extra);
 
+      // Add extras.
+      //
       // Note: starting from 16.10, MSVC gained /external:I option though it
       // doesn't seem to affect the order, only "system-ness".
       //
       append_option_values (
         args,
-        cclass == compiler_class::gcc  ? "-idirafter" :
+        cclass == compiler_class::gcc  ? "-isystem" :
         cclass == compiler_class::msvc ? (isystem (*this)
                                           ? "/external:I"
                                           : "/I") : "-I",
-        m, e,
+        b, x,
         [] (const dir_path& d) {return d.string ().c_str ();});
 
       // For MSVC if we have no INCLUDE environment variable set, then we
@@ -293,7 +329,7 @@ namespace build2
         {
           append_option_values (
             args, "/I",
-            b, m,
+            x, sys_hdr_dirs.end (),
             [] (const dir_path& d) {return d.string ().c_str ();});
         }
       }
@@ -318,6 +354,35 @@ namespace build2
           case lang::c:   o1 = "/TC"; break;
           case lang::cxx: o1 = "/TP"; break;
           }
+
+          // Note: /interface and /internalPartition are in addition to /TP.
+          //
+          switch (md.type)
+          {
+          case unit_type::non_modular:
+          case unit_type::module_impl:
+            {
+              break;
+            }
+          case unit_type::module_intf:
+          case unit_type::module_intf_part:
+            {
+              o2 = "/interface";
+              break;
+            }
+          case unit_type::module_impl_part:
+            {
+              o2 = "/internalPartition";
+              break;
+            }
+          case unit_type::module_header:
+            {
+              //@@ MODHDR TODO: /exportHeader
+              assert (false);
+              break;
+            }
+          }
+
           break;
         }
       case compiler_class::gcc:
@@ -336,11 +401,20 @@ namespace build2
           case unit_type::module_impl:
             {
               o1 = "-x";
-              switch (x_lang)
+
+              if (x_assembler_cpp (md.src))
+                o2 = "assembler-with-cpp";
+              else
               {
-              case lang::c:   o2 = "c";   break;
-              case lang::cxx: o2 = "c++"; break;
+                bool obj (x_objective (md.src));
+
+                switch (x_lang)
+                {
+                case lang::c:   o2 = obj ? "objective-c"   : "c";   break;
+                case lang::cxx: o2 = obj ? "objective-c++" : "c++"; break;
+                }
               }
+
               break;
             }
           case unit_type::module_intf:
@@ -380,9 +454,11 @@ namespace build2
               default:
                   assert (false);
               }
+
               break;
             }
           }
+
           break;
         }
       }
@@ -406,7 +482,7 @@ namespace build2
     }
 
     bool compile_rule::
-    match (action a, target& t, const string&) const
+    match (action a, target& t) const
     {
       tracer trace (x, "compile_rule::match");
 
@@ -439,13 +515,15 @@ namespace build2
 
         // For a header unit we check the "real header" plus the C header.
         //
-        if (ut == unit_type::module_header ? p.is_a (**x_hdr) || p.is_a<h> () :
-            ut == unit_type::module_intf   ? p.is_a (*x_mod)                  :
-            p.is_a (x_src))
+        if (ut == unit_type::module_header ? p.is_a (**x_hdrs) || p.is_a<h> () :
+            ut == unit_type::module_intf   ? p.is_a (*x_mod)                   :
+            p.is_a (x_src)                        ||
+            (x_asp != nullptr && p.is_a (*x_asp)) ||
+            (x_obj != nullptr && p.is_a (*x_obj)))
         {
           // Save in the target's auxiliary storage.
           //
-          t.data (match_data (ut, p));
+          t.data (a, match_data (*this, ut, p));
           return true;
         }
       }
@@ -456,13 +534,16 @@ namespace build2
 
     // Append or hash library options from a pair of *.export.* variables
     // (first is x.* then cc.*) recursively, prerequisite libraries first.
+    // If common is true, then only append common options from the lib{}
+    // groups.
     //
     template <typename T>
     void compile_rule::
     append_library_options (appended_libraries& ls, T& args,
                             const scope& bs,
                             const scope* is, // Internal scope.
-                            action a, const file& l, bool la, linfo li,
+                            action a, const file& l, bool la,
+                            linfo li, bool common,
                             library_cache* lib_cache) const
     {
       struct data
@@ -476,7 +557,7 @@ namespace build2
       //
       auto imp = [] (const target& l, bool la) {return la && l.is_a<libux> ();};
 
-      auto opt = [&d, this] (const target& lt,
+      auto opt = [&d, this] (const target& l, // Note: could be lib{}
                              const string& t, bool com, bool exp)
       {
         // Note that in our model *.export.poptions are always "interface",
@@ -485,8 +566,6 @@ namespace build2
         if (!exp) // Ignore libux.
           return true;
 
-        const file& l (lt.as<file> ());
-
         // Suppress duplicates.
         //
         // Compilation is the simple case: we can add the options on the first
@@ -496,6 +575,8 @@ namespace build2
         if (find (d.ls.begin (), d.ls.end (), &l) != d.ls.end ())
           return false;
 
+        // Note: go straight for the public variable pool.
+        //
         const variable& var (
           com
           ? c_export_poptions
@@ -645,16 +726,24 @@ namespace build2
 
       process_libraries (a, bs, li, sys_lib_dirs,
                          l, la, 0, // lflags unused.
-                         imp, nullptr, opt, false /* self */, lib_cache);
+                         imp, nullptr, opt,
+                         false /* self */,
+                         common /* proc_opt_group */,
+                         lib_cache);
     }
 
     void compile_rule::
     append_library_options (appended_libraries& ls, strings& args,
                             const scope& bs,
-                            action a, const file& l, bool la, linfo li) const
+                            action a, const file& l, bool la,
+                            linfo li,
+                            bool common,
+                            bool original) const
     {
-      const scope* is (isystem (*this) ? effective_iscope (bs) : nullptr);
-      append_library_options (ls, args, bs, is, a, l, la, li, nullptr);
+      const scope* is (!original && isystem (*this)
+                       ? effective_iscope (bs)
+                       : nullptr);
+      append_library_options (ls, args, bs, is, a, l, la, li, common, nullptr);
     }
 
     template <typename T>
@@ -695,7 +784,9 @@ namespace build2
             append_library_options (ls,
                                     args,
                                     bs, iscope (),
-                                    a, *f, la, li,
+                                    a, *f, la,
+                                    li,
+                                    false /* common */,
                                     &lc);
           }
         }
@@ -737,6 +828,8 @@ namespace build2
         //
         if (const scope* rs = l.base_scope ().root_scope ())
         {
+          // Note: go straight for the public variable pool.
+          //
           const variable& var (
             com
             ? c_export_poptions
@@ -777,7 +870,9 @@ namespace build2
 
           process_libraries (a, bs, li, sys_lib_dirs,
                              pt->as<file> (), la, 0, // lflags unused.
-                             impf, nullptr, optf, false /* self */,
+                             impf, nullptr, optf,
+                             false /* self */,
+                             false /* proc_opt_group */,
                              &lib_cache);
         }
       }
@@ -790,7 +885,7 @@ namespace build2
 
       file& t (xt.as<file> ()); // Either obj*{} or bmi*{}.
 
-      match_data& md (t.data<match_data> ());
+      match_data& md (t.data<match_data> (a));
 
       context& ctx (t.ctx);
 
@@ -883,7 +978,9 @@ namespace build2
         //
         // Note: ut is still unrefined.
         //
-        if (ut == unit_type::module_intf && cast_true<bool> (t[b_binless]))
+        if ((ut == unit_type::module_intf      ||
+             ut == unit_type::module_intf_part ||
+             ut == unit_type::module_impl_part) && cast_true<bool> (t[b_binless]))
         {
           // The module interface unit can be the same as an implementation
           // (e.g., foo.mxx and foo.cxx) which means obj*{} targets could
@@ -944,6 +1041,12 @@ namespace build2
             // to match it if we may need its modules or importable headers
             // (see search_modules(), make_header_sidebuild() for details).
             //
+            // Well, that was the case until we've added support for immediate
+            // importation of libraries, which happens during the load phase
+            // and natually leaves the library unmatched. While we could have
+            // returned from search_library() an indication of whether the
+            // library has been matched, this doesn't seem worth the trouble.
+            //
             if (p.proj ())
             {
               pt = search_library (a,
@@ -951,8 +1054,10 @@ namespace build2
                                    usr_lib_dirs,
                                    p.prerequisite);
 
+#if 0
               if (pt != nullptr && !modules)
                 continue;
+#endif
             }
 
             if (pt == nullptr)
@@ -980,7 +1085,8 @@ namespace build2
         {
           pt = &p.search (t);
 
-          if (a.operation () == clean_id && !pt->dir.sub (rs.out_path ()))
+          if (pt == dir ||
+              (a.operation () == clean_id && !pt->dir.sub (rs.out_path ())))
             continue;
         }
 
@@ -1010,10 +1116,10 @@ namespace build2
         // @@ If for some reason unmatch fails, this messes up the for_install
         //    logic because we will update this library during match. Perhaps
         //    we should postpone updating them until execute if we failed to
-        //    unmatch.
+        //    unmatch. See how we do this in ad hoc rule.
         //
         pair<bool, target_state> mr (
-          build2::match (
+          match_complete (
             a,
             *pt,
             pt->is_a<liba> () || pt->is_a<libs> () || pt->is_a<libux> ()
@@ -1072,12 +1178,14 @@ namespace build2
           // this can very well be happening in parallel. But that's not a
           // problem since fsdir{}'s update is idempotent.
           //
-          fsdir_rule::perform_update_direct (a, t);
+          fsdir_rule::perform_update_direct (a, *dir);
         }
 
         // Note: the leading '@' is reserved for the module map prefix (see
         // extract_modules()) and no other line must start with it.
         //
+        // NOTE: see also the predefs rule if changing anything here.
+        //
         depdb dd (tp + ".d");
 
         // First should come the rule name/version.
@@ -1242,7 +1350,7 @@ namespace build2
           //
           l5 ([&]{trace << "extracting headers from " << src;});
           auto& is (tu.module_info.imports);
-          psrc = extract_headers (a, bs, t, li, src, md, dd, u, mt, is);
+          extract_headers (a, bs, t, li, src, md, dd, u, mt, is, psrc);
           is.clear (); // No longer needed.
         }
 
@@ -1297,6 +1405,10 @@ namespace build2
                 //
                 if (mt != timestamp_nonexistent)
                 {
+                  // Appended to by to_module_info() below.
+                  //
+                  tu.module_info.imports.clear ();
+
                   u = false;
                   md.touch = true;
                 }
@@ -1381,24 +1493,6 @@ namespace build2
             extract_modules (a, bs, t, li,
                              tts, src,
                              md, move (tu.module_info), dd, u);
-
-            // Currently in VC module interface units must be compiled from
-            // the original source (something to do with having to detect and
-            // store header boundaries in the .ifc files).
-            //
-            // @@ MODHDR MSVC: should we do the same for header units? I guess
-            //    we will figure it out when MSVC supports header units.
-            //
-            // @@ TMP: probably outdated. Probably the same for partitions.
-            //
-            // @@ See also similar check in extract_headers(), existing entry
-            //    case.
-            //
-            if (ctype == compiler_type::msvc)
-            {
-              if (ut == unit_type::module_intf)
-                psrc.second = false;
-            }
           }
         }
 
@@ -1417,7 +1511,7 @@ namespace build2
         // to keep re-validating the file on every subsequent dry-run as well
         // on the real run).
         //
-        if (u && dd.reading () && !ctx.dry_run)
+        if (u && dd.reading () && !ctx.dry_run_option)
           dd.touch = timestamp_unknown;
 
         dd.close (false /* mtime_check */);
@@ -1478,14 +1572,14 @@ namespace build2
 
       switch (a)
       {
-      case perform_update_id: return [this] (action a, const target& t)
-        {
-          return perform_update (a, t);
-        };
-      case perform_clean_id: return [this] (action a, const target& t)
+      case perform_update_id: return move (md);
+      case perform_clean_id:
         {
-          return perform_clean (a, t);
-        };
+          return [this, srct = &md.src.type ()] (action a, const target& t)
+          {
+            return perform_clean (a, t, *srct);
+          };
+        }
       default: return noop_recipe; // Configure update.
       }
     }
@@ -1582,72 +1676,6 @@ namespace build2
       return pm;
     }
 
-    // @@ TMP
-    //
-#if 0
-    // Return the next make prerequisite starting from the specified
-    // position and update position to point to the start of the
-    // following prerequisite or l.size() if there are none left.
-    //
-    static string
-    next_make (const string& l, size_t& p)
-    {
-      size_t n (l.size ());
-
-      // Skip leading spaces.
-      //
-      for (; p != n && l[p] == ' '; p++) ;
-
-      // Lines containing multiple prerequisites are 80 characters max.
-      //
-      string r;
-      r.reserve (n);
-
-      // Scan the next prerequisite while watching out for escape sequences.
-      //
-      for (; p != n && l[p] != ' '; p++)
-      {
-        char c (l[p]);
-
-        if (p + 1 != n)
-        {
-          if (c == '$')
-          {
-            // Got to be another (escaped) '$'.
-            //
-            if (l[p + 1] == '$')
-              ++p;
-          }
-          else if (c == '\\')
-          {
-            // This may or may not be an escape sequence depending on whether
-            // what follows is "escapable".
-            //
-            switch (c = l[++p])
-            {
-            case '\\': break;
-            case ' ': break;
-            default: c = '\\'; --p; // Restore.
-            }
-          }
-        }
-
-        r += c;
-      }
-
-      // Skip trailing spaces.
-      //
-      for (; p != n && l[p] == ' '; p++) ;
-
-      // Skip final '\'.
-      //
-      if (p == n - 1 && l[p] == '\\')
-        p++;
-
-      return r;
-    }
-#endif
-
     // VC /showIncludes output. The first line is the file being compiled
     // (unless clang-cl; handled by our caller). Then we have the list of
     // headers, one per line, in this form (text can presumably be
@@ -1843,7 +1871,7 @@ namespace build2
     // Any unhandled io_error is handled by the caller as a generic module
     // mapper io error. Returning false terminates the communication.
     //
-    struct compile_rule::module_mapper_state //@@ gcc_module_mapper_state
+    struct compile_rule::gcc_module_mapper_state
     {
       size_t skip;              // Number of depdb entries to skip.
       size_t header_units = 0;  // Number of header units imported.
@@ -1854,15 +1882,20 @@ namespace build2
       optional<const build2::cc::translatable_headers*> translatable_headers;
 
       small_vector<string, 2> batch; // Reuse buffers.
+      size_t                  batch_n = 0;
 
-      module_mapper_state (size_t s, module_imports& i)
+      gcc_module_mapper_state (size_t s, module_imports& i)
           : skip (s), imports (i) {}
     };
 
-    bool compile_rule::
-    gcc_module_mapper (module_mapper_state& st,
+    // The module mapper is called on one line of input at a time. It should
+    // return nullopt if another line is expected (batch), false if the mapper
+    // interaction should be terminated, and true if it should be continued.
+    //
+    optional<bool> compile_rule::
+    gcc_module_mapper (gcc_module_mapper_state& st,
                        action a, const scope& bs, file& t, linfo li,
-                       ifdstream& is,
+                       const string& l,
                        ofdstream& os,
                        depdb& dd, bool& update, bool& bad_error,
                        optional<prefix_map>& pfx_map, srcout_map& so_map) const
@@ -1878,35 +1911,40 @@ namespace build2
 
       // Read in the entire batch trying hard to reuse the buffers.
       //
-      auto& batch (st.batch);
-      size_t batch_n (0);
+      small_vector<string, 2>& batch (st.batch);
+      size_t& batch_n (st.batch_n);
 
-      for (;;)
+      // Add the next line.
+      //
       {
         if (batch.size () == batch_n)
-          batch.push_back (string ());
-
-        string& r (batch[batch_n]);
-
-        if (eof (getline (is, r)))
-          break;
+          batch.push_back (l);
+        else
+          batch[batch_n] = l;
 
         batch_n++;
+      }
 
-        if (r.back () != ';')
-          break;
+      // Check if more is expected in this batch.
+      //
+      {
+        string& r (batch[batch_n - 1]);
 
-        // Strip the trailing `;` word.
-        //
-        r.pop_back ();
-        r.pop_back ();
-      }
+        if (r.back () == ';')
+        {
+          // Strip the trailing `;` word.
+          //
+          r.pop_back ();
+          r.pop_back ();
 
-      if (batch_n == 0) // EOF
-        return false;
+          return nullopt;
+        }
+      }
 
       if (verb >= 3)
       {
+        // It doesn't feel like buffering this would be useful.
+        //
         // Note that we show `;` in requests/responses so that the result
         // could be replayed.
         //
@@ -1928,23 +1966,211 @@ namespace build2
       for (size_t i (0); i != batch_n; ++i)
       {
         string& r (batch[i]);
+        size_t rn (r.size ());
+
+        // The protocol uses a peculiar quoting/escaping scheme that can be
+        // summarized as follows (see the libcody documentation for details):
+        //
+        // - Words are seperated with spaces and/or tabs.
+        //
+        // - Words need not be quoted if they only containing characters from
+        //   the [-+_/%.A-Za-z0-9] set.
+        //
+        // - Otherwise words need to be single-quoted.
+        //
+        // - Inside single-quoted words, the \n \t \' and \\ escape sequences
+        //   are recognized.
+        //
+        // Note that we currently don't treat abutted quotes (as in a' 'b) as
+        // a single word (it doesn't seem plausible that we will ever receive
+        // something like this).
+        //
+        size_t b (0), e (0), n; bool q; // Next word.
+
+        auto next = [&r, rn, &b, &e, &n, &q] () -> size_t
+        {
+          if (b != e)
+            b = e;
+
+          // Skip leading whitespaces.
+          //
+          for (; b != rn && (r[b] == ' ' || r[b] == '\t'); ++b) ;
+
+          if (b != rn)
+          {
+            q = (r[b] == '\'');
+
+            // Find first trailing whitespace or closing quote.
+            //
+            for (e = b + 1; e != rn; ++e)
+            {
+              // Note that we deal with invalid quoting/escaping in unquote().
+              //
+              switch (r[e])
+              {
+              case ' ':
+              case '\t':
+                if (q)
+                  continue;
+                else
+                  break;
+              case '\'':
+                if (q)
+                {
+                  ++e; // Include closing quote (hopefully).
+                  break;
+                }
+                else
+                {
+                  assert (false); // Abutted quote.
+                  break;
+                }
+              case '\\':
+                if (++e != rn) // Skip next character (hopefully).
+                  continue;
+                else
+                  break;
+              default:
+                continue;
+              }
+
+              break;
+            }
 
-        // @@ TODO: quoting and escaping.
+            n = e - b;
+          }
+          else
+          {
+            q = false;
+            e = rn;
+            n = 0;
+          }
+
+          return n;
+        };
+
+        // Unquote into tmp the current word returning false if malformed.
         //
-        size_t b (0), e (0), n; // Next word.
+        auto unquote = [&r, &b, &n, &q, &tmp] (bool clear = true) -> bool
+        {
+          if (q && n > 1)
+          {
+            size_t e (b + n - 1);
+
+            if (r[b] == '\'' && r[e] == '\'')
+            {
+              if (clear)
+                tmp.clear ();
+
+              size_t i (b + 1);
+              for (; i != e; ++i)
+              {
+                char c (r[i]);
+                if (c == '\\')
+                {
+                  if (++i == e)
+                  {
+                    i = 0;
+                    break;
+                  }
+
+                  c = r[i];
+                  if      (c == 'n') c = '\n';
+                  else if (c == 't') c = '\t';
+                }
+                tmp += c;
+              }
+
+              if (i == e)
+                return true;
+            }
+          }
+
+          return false;
+        };
+
+#if 0
+#define UNQUOTE(x, y)                     \
+        r = x; rn = r.size (); b = e = 0; \
+        assert (next () && unquote () && tmp == y)
+
+        UNQUOTE ("'foo bar'", "foo bar");
+        UNQUOTE (" 'foo bar' ", "foo bar");
+        UNQUOTE ("'foo\\\\bar'", "foo\\bar");
+        UNQUOTE ("'\\'foo bar'", "'foo bar");
+        UNQUOTE ("'foo bar\\''", "foo bar'");
+        UNQUOTE ("'\\'foo\\\\bar\\''", "'foo\\bar'");
+
+        fail << "all good";
+#endif
+
+        // Escape if necessary the specified string and append to r.
+        //
+        auto escape = [&r] (const string& s)
+        {
+          size_t b (0), e, n (s.size ());
+          while (b != n && (e = s.find_first_of ("\\'\n\t", b)) != string::npos)
+          {
+            r.append (s, b, e - b); // Preceding chunk.
+
+            char c (s[e]);
+            r += '\\';
+            r += (c == '\n' ? 'n' : c == '\t' ? 't' : c);
+            b = e + 1;
+          }
+
+          if (b != n)
+            r.append (s, b, e); // Final chunk.
+        };
 
-        auto next = [&r, &b, &e, &n] () -> size_t
+        // Quote and escape if necessary the specified string and append to r.
+        //
+        auto quote = [&r, &escape] (const string& s)
         {
-          return (n = next_word (r, b, e, ' ', '\t'));
+          if (find_if (s.begin (), s.end (),
+                       [] (char c)
+                       {
+                         return !((c >= 'a' && c <= 'z') ||
+                                  (c >= '0' && c <= '9') ||
+                                  (c >= 'A' && c <= 'Z') ||
+                                  c == '-' || c == '_' || c == '/' ||
+                                  c == '.' || c == '+' || c == '%');
+                       }) == s.end ())
+          {
+            r += s;
+          }
+          else
+          {
+            r += '\'';
+            escape (s);
+            r += '\'';
+          }
         };
 
+#if 0
+#define QUOTE(x, y)            \
+        r.clear (); quote (x); \
+        assert (r == y)
+
+        QUOTE ("foo/Bar-7.h", "foo/Bar-7.h");
+
+        QUOTE ("foo bar", "'foo bar'");
+        QUOTE ("foo\\bar", "'foo\\\\bar'");
+        QUOTE ("'foo bar", "'\\'foo bar'");
+        QUOTE ("foo bar'", "'foo bar\\''");
+        QUOTE ("'foo\\bar'", "'\\'foo\\\\bar\\''");
+
+        fail << "all good";
+#endif
+
         next (); // Request name.
 
-        auto name = [&r, b, n] (const char* c) -> bool
+        auto name = [&r, b, n, q] (const char* c) -> bool
         {
           // We can reasonably assume a command will never be quoted.
           //
-          return (r.compare (b, n, c) == 0 &&
+          return (!q                       &&
+                  r.compare (b, n, c) == 0 &&
                   (r[n] == ' '  || r[n] == '\t' || r[n] == '\0'));
         };
 
@@ -1993,7 +2219,17 @@ namespace build2
 
           if (next ())
           {
-            path f (r, b, n);
+            path f;
+            if (!q)
+              f = path (r, b, n);
+            else if (unquote ())
+              f = path (tmp);
+            else
+            {
+              r = "ERROR 'malformed quoting/escaping in request'";
+              continue;
+            }
+
             bool exists (true);
 
             // The TU path we pass to the compiler is always absolute so any
@@ -2004,8 +2240,9 @@ namespace build2
             //
             if (exists && f.relative ())
             {
-              tmp.assign (r, b, n);
-              r = "ERROR relative header path '"; r += tmp; r += '\'';
+              r = "ERROR 'relative header path ";
+              escape (f.string ());
+              r += '\'';
               continue;
             }
 
@@ -2044,7 +2281,7 @@ namespace build2
               pair<const file*, bool> er (
                 enter_header (
                   a, bs, t, li,
-                  f, false /* cache */, false /* normalized */,
+                  move (f), false /* cache */, false /* normalized */,
                   pfx_map, so_map));
 
               ht = er.first;
@@ -2052,7 +2289,7 @@ namespace build2
 
               if (remapped)
               {
-                r = "ERROR remapping of headers not supported";
+                r = "ERROR 'remapping of headers not supported'";
                 continue;
               }
 
@@ -2062,7 +2299,7 @@ namespace build2
               // diagnostics won't really add anything to the compiler's. So
               // let's only print it at -V or higher.
               //
-              if (ht == nullptr)
+              if (ht == nullptr) // f is still valid.
               {
                 assert (!exists); // Sanity check.
 
@@ -2110,8 +2347,10 @@ namespace build2
               // messy, let's keep both (it would have been nicer to print
               // ours after the compiler's but that isn't easy).
               //
-              r = "ERROR unable to update header '";
-              r += (ht != nullptr ? ht->path () : f).string ();
+              // Note: if ht is NULL, f is still valid.
+              //
+              r = "ERROR 'unable to update header ";
+              escape ((ht != nullptr ? ht->path () : f).string ());
               r += '\'';
               continue;
             }
@@ -2246,17 +2485,27 @@ namespace build2
                   // original (which we may need to normalize when we read
                   // this mapping in extract_headers()).
                   //
-                  tmp = "@ "; tmp.append (r, b, n); tmp += ' '; tmp += bp;
+                  // @@ This still breaks if the header path contains spaces.
+                  //    GCC bug 110153.
+                  //
+                  tmp = "@ ";
+                  if (!q) tmp.append (r, b, n);
+                  else    unquote (false /* clear */); // Can't fail.
+                  tmp += ' ';
+                  tmp += bp;
+
                   dd.expect (tmp);
                   st.header_units++;
                 }
 
-                r = "PATHNAME "; r += bp;
+                r = "PATHNAME ";
+                quote (bp);
               }
               catch (const failed&)
               {
                 r = "ERROR 'unable to update header unit for ";
-                r += hs; r += '\'';
+                escape (hs);
+                r += '\'';
                 continue;
               }
             }
@@ -2282,7 +2531,7 @@ namespace build2
         // Truncate the response batch and terminate the communication (see
         // also libcody issue #22).
         //
-        tmp.assign (r, b, n);
+        tmp.assign (r, b, n); // Request name (unquoted).
         r = "ERROR '"; r += w; r += ' '; r += tmp; r += '\'';
         batch_n = i + 1;
         term = true;
@@ -2298,6 +2547,9 @@ namespace build2
 
       // Write the response batch.
       //
+      // @@ It's theoretically possible that we get blocked writing the
+      //    response while the compiler gets blocked writing the diagnostics.
+      //
       for (size_t i (0);; )
       {
         string& r (batch[i]);
@@ -2318,6 +2570,8 @@ namespace build2
 
       os.flush ();
 
+      batch_n = 0; // Start a new batch.
+
       return !term;
     }
 
@@ -2573,7 +2827,7 @@ namespace build2
               pair<const file*, bool> r (
                 enter_header (
                   a, bs, t, li,
-                  f, false /* cache */, false /* normalized */,
+                  move (f), false /* cache */, false /* normalized */,
                   pfx_map, so_map));
 
               if (!r.second) // Shouldn't be remapped.
@@ -2582,7 +2836,7 @@ namespace build2
 
             if (ht != pts.back ())
             {
-              ht = static_cast<const file*> (pts.back ().target);
+              ht = &pts.back ().target->as<file> ();
               rs = "ERROR expected header '" + ht->path ().string () +
                 "' to be found instead";
               bad_error = true; // We expect an error from the compiler.
@@ -2601,7 +2855,7 @@ namespace build2
               pair<const file*, bool> er (
                 enter_header (
                   a, bs, t, li,
-                  f, false /* cache */, false /* normalized */,
+                  move (f), false /* cache */, false /* normalized */,
                   pfx_map, so_map));
 
               ht = er.first;
@@ -2620,7 +2874,7 @@ namespace build2
               // diagnostics won't really add anything to the compiler's. So
               // let's only print it at -V or higher.
               //
-              if (ht == nullptr)
+              if (ht == nullptr) // f is still valid.
               {
                 assert (!exists); // Sanity check.
 
@@ -2667,10 +2921,12 @@ namespace build2
               // messy, let's keep both (it would have been nicer to print
               // ours after the compiler's but that isn't easy).
               //
+              // Note: if ht is NULL, f is still valid.
+              //
               rs = !exists
                 ? string ("INCLUDE")
                 : ("ERROR unable to update header '" +
-                   (ht != nullptr ? ht->path () : f).string () + "'");
+                   (ht != nullptr ? ht->path () : f).string () + '\'');
 
               bad_error = true;
               break;
@@ -2748,7 +3004,7 @@ namespace build2
             }
             catch (const failed&)
             {
-              rs = "ERROR unable to update header unit '" + hp + "'";
+              rs = "ERROR unable to update header unit '" + hp + '\'';
               bad_error = true;
               break;
             }
@@ -2790,17 +3046,123 @@ namespace build2
     }
 #endif
 
+    //atomic_count cache_hit {0};
+    //atomic_count cache_mis {0};
+    //atomic_count cache_cls {0};
+
+    // The fp path is only moved from on success.
+    //
     // Note: this used to be a lambda inside extract_headers() so refer to the
     // body of that function for the overall picture.
     //
     pair<const file*, bool> compile_rule::
     enter_header (action a, const scope& bs, file& t, linfo li,
-                  path& fp, bool cache, bool norm,
+                  path&& fp, bool cache, bool norm,
                   optional<prefix_map>& pfx_map,
                   const srcout_map& so_map) const
     {
       tracer trace (x, "compile_rule::enter_header");
 
+      // It's reasonable to expect the same header to be included by multiple
+      // translation units, which means we will be re-doing this work over and
+      // over again. And it's not exactly cheap, taking up to 50% of an
+      // up-to-date check time on some projects. So we are going to cache the
+      // header path to target mapping.
+      //
+      // While we pass quite a bit of specific "context" (target, base scope)
+      // to enter_file(), here is the analysis why the result will not depend
+      // on this context for the non-absent header (fp is absolute):
+      //
+      // 1. Let's start with the base scope (bs). Firstly, the base scope
+      //    passed to map_extension() is the scope of the header (i.e., it is
+      //    the scope of fp.directory()). Other than that, the target base
+      //    scope is only passed to build_prefix_map() which is only called
+      //    for the absent header (linfo is also only used here).
+      //
+      // 2. Next is the target (t). It is passed to build_prefix_map() but
+      //    that doesn't matter for the same reason as in (1). Other than
+      //    that, it is only passed to build2::search() which in turn passes
+      //    it to target type-specific prerequisite search callback (see
+      //    target_type::search) if one is not NULL. The target type in
+      //    question here is one of the headers and we know all of them use
+      //    the standard file_search() which ignores the passed target.
+      //
+      // 3. Finally, so_map could be used for an absolute fp. While we could
+      //    simply not cache the result if it was used (second half of the
+      //    result pair is true), there doesn't seem to be any harm in caching
+      //    the remapped path->target mapping. In fact, if to think about it,
+      //    there is no harm in caching the generated file mapping since it
+      //    will be immediately generated and any subsequent inclusions we
+      //    will "see" with an absolute path, which we can resolve from the
+      //    cache.
+      //
+      // To put it another way, all we need to do is make sure that if we were
+      // to not return an existing cache entry, the call to enter_file() would
+      // have returned exactly the same path/target.
+      //
+      // @@ Could it be that the header is re-mapped in one config but not the
+      //    other (e.g., when we do both in src and in out builds and we pick
+      //    the generated header in src)? If so, that would lead to a
+      //    divergence. I.e., we would cache the no-remap case first and then
+      //    return it even though the re-map is necessary? Why can't we just
+      //    check for re-mapping ourselves? A: the remapping logic in
+      //    enter_file() is not exactly trivial.
+      //
+      //    But on the other hand, I think we can assume that different
+      //    configurations will end up with different caches. In other words,
+      //    we can assume that for the same "cc amalgamation" we use only a
+      //    single "version" of a header. Seems reasonable.
+      //
+      // Note also that while it would have been nice to have a unified cc
+      // cache, the map_extension() call is passed x_incs which is module-
+      // specific. In other words, we may end up mapping the same header to
+      // two different targets depending on whether it is included from, say,
+      // C or C++ translation unit. We could have used a unified cache for
+      // headers that were mapped using the fallback target type, which would
+      // cover the installed headers. Maybe, one day (it's also possible that
+      // separate caches reduce contention).
+      //
+      // Another related question is where we want to keep the cache: project,
+      // strong amalgamation, or weak amalgamation (like module sidebuilds).
+      // Some experimentation showed that weak has the best performance (which
+      // suggest that a unified cache will probably be a win).
+      //
+      // Note also that we don't need to clear this cache since we never clear
+      // the targets set. In other words, the only time targets are
+      // invalidated is when we destroy the build context, which also destroys
+      // the cache.
+      //
+      const config_module& hc (*header_cache_);
+
+      // First check the cache.
+      //
+      config_module::header_key hk;
+
+      bool e (fp.absolute ());
+      if (e)
+      {
+        if (!norm)
+        {
+          normalize_external (fp, "header");
+          norm = true;
+        }
+
+        hk.file = move (fp);
+        hk.hash = hash<path> () (hk.file);
+
+        slock l (hc.header_map_mutex);
+        auto i (hc.header_map.find (hk));
+        if (i != hc.header_map.end ())
+        {
+          //cache_hit.fetch_add (1, memory_order_relaxed);
+          return make_pair (i->second, false);
+        }
+
+        fp = move (hk.file);
+
+        //cache_mis.fetch_add (1, memory_order_relaxed);
+      }
+
       struct data
       {
         linfo li;
@@ -2810,24 +3172,52 @@ namespace build2
       // If it is outside any project, or the project doesn't have such an
       // extension, assume it is a plain old C header.
       //
-      return enter_file (
-        trace, "header",
-        a, bs, t,
-        fp, cache, norm,
-        [this] (const scope& bs, const string& n, const string& e)
+      auto r (enter_file (
+                trace, "header",
+                a, bs, t,
+                fp, cache, norm,
+                [this] (const scope& bs, const string& n, const string& e)
+                {
+                  return map_extension (bs, n, e, x_incs);
+                },
+                h::static_type,
+                [this, &d] (action a, const scope& bs, const target& t)
+                  -> const prefix_map&
+                {
+                  if (!d.pfx_map)
+                    d.pfx_map = build_prefix_map (bs, a, t, d.li);
+
+                  return *d.pfx_map;
+                },
+                so_map));
+
+      // Cache.
+      //
+      if (r.first != nullptr)
+      {
+        hk.file = move (fp);
+
+        // Calculate the hash if we haven't yet and re-calculate it if the
+        // path has changed (header has been remapped).
+        //
+        if (!e || r.second)
+          hk.hash = hash<path> () (hk.file);
+
+        const file* f;
         {
-          return map_extension (bs, n, e, x_inc);
-        },
-        h::static_type,
-        [this, &d] (action a, const scope& bs, const target& t)
-          -> const prefix_map&
+          ulock l (hc.header_map_mutex);
+          auto p (hc.header_map.emplace (move (hk), r.first));
+          f = p.second ? nullptr : p.first->second;
+        }
+
+        if (f != nullptr)
         {
-          if (!d.pfx_map)
-            d.pfx_map = build_prefix_map (bs, a, t, d.li);
+          //cache_cls.fetch_add (1, memory_order_relaxed);
+          assert (r.first == f);
+        }
+      }
 
-          return *d.pfx_map;
-        },
-        so_map);
+      return r;
     }
 
     // Note: this used to be a lambda inside extract_headers() so refer to the
@@ -2842,16 +3232,18 @@ namespace build2
       return inject_file (trace, "header", a, t, pt, mt, fail);
     }
 
-    // Extract and inject header dependencies. Return the preprocessed source
-    // file as well as an indication if it is usable for compilation (see
-    // below for details).
+    // Extract and inject header dependencies. Return (in result) the
+    // preprocessed source file as well as an indication if it is usable for
+    // compilation (see below for details). Note that result is expected to
+    // be initialized to {entry (), false}. Not using return type due to
+    // GCC bug #107555.
     //
     // This is also the place where we handle header units which are a lot
     // more like auto-generated headers than modules. In particular, if a
     // header unit BMI is out-of-date, then we have to re-preprocess this
     // translation unit.
     //
-    pair<file_cache::entry, bool> compile_rule::
+    void compile_rule::
     extract_headers (action a,
                      const scope& bs,
                      file& t,
@@ -2861,7 +3253,8 @@ namespace build2
                      depdb& dd,
                      bool& update,
                      timestamp mt,
-                     module_imports& imports) const
+                     module_imports& imports,
+                     pair<file_cache::entry, bool>& result) const
     {
       tracer trace (x, "compile_rule::extract_headers");
 
@@ -2874,9 +3267,16 @@ namespace build2
       file_cache::entry psrc;
       bool puse (true);
 
+      // Preprocessed file extension.
+      //
+      const char* pext (x_assembler_cpp (src) ? ".Si"      :
+                        x_objective (src)     ? x_obj_pext :
+                        x_pext);
+
       // Preprocesor mode that preserves as much information as possible while
       // still performing inclusions. Also serves as a flag indicating whether
-      // this compiler uses the separate preprocess and compile setup.
+      // this (non-MSVC) compiler uses the separate preprocess and compile
+      // setup.
       //
       const char* pp (nullptr);
 
@@ -2887,7 +3287,16 @@ namespace build2
           // -fdirectives-only is available since GCC 4.3.0.
           //
           if (cmaj > 4 || (cmaj == 4 && cmin >= 3))
-            pp = "-fdirectives-only";
+          {
+            // Note that for assembler-with-cpp GCC currently forces full
+            // preprocessing in (what appears to be) an attempt to paper over
+            // a deeper issue (see GCC bug 109534). If/when that bug gets
+            // fixed, we can enable this on our side. Note that Clang's
+            // -frewrite-includes also has issues (see below).
+            //
+            if (!x_assembler_cpp (src))
+              pp = "-fdirectives-only";
+          }
 
           break;
         }
@@ -2896,7 +3305,16 @@ namespace build2
           // -frewrite-includes is available since Clang 3.2.0.
           //
           if (cmaj > 3 || (cmaj == 3 && cmin >= 2))
-            pp = "-frewrite-includes";
+          {
+            // While Clang's -frewrite-includes appears to work, there are
+            // some issues with correctly tracking location information
+            // (manifests itself as wrong line numbers in debug info, for
+            // example). The result also appears to reference the .Si file
+            // instead of the original source file for some reason.
+            //
+            if (!x_assembler_cpp (src))
+              pp = "-frewrite-includes";
+          }
 
           break;
         }
@@ -2977,7 +3395,7 @@ namespace build2
       //
       // GCC's -fdirective-only, on the other hand, processes all the
       // directives so they are gone from the preprocessed source. Here is
-      // what we are going to do to work around this: we will detect if any
+      // what we are going to do to work around this: we will sense if any
       // diagnostics has been written to stderr on the -E run. If that's the
       // case (but the compiler indicated success) then we assume they are
       // warnings and disable the use of the preprocessed output for
@@ -3015,7 +3433,9 @@ namespace build2
       // not found, and there is no problem with outdated generated headers
       // since we update/remap them before the compiler has a chance to read
       // them. Overall, this "dependency mapper" approach is how it should
-      // have been done from the beginning.
+      // have been done from the beginning. Note: that's the ideal world,
+      // the reality is that the required mapper extensions are not (yet)
+      // in libcody/GCC.
 
       // Note: diagnostics sensing is currently only supported if dependency
       // info is written to a file (see above).
@@ -3025,7 +3445,7 @@ namespace build2
       // And here is another problem: if we have an already generated header
       // in src and the one in out does not yet exist, then the compiler will
       // pick the one in src and we won't even notice. Note that this is not
-      // only an issue with mixing in- and out-of-tree builds (which does feel
+      // only an issue with mixing in and out of source builds (which does feel
       // wrong but is oh so convenient): this is also a problem with
       // pre-generated headers, a technique we use to make installing the
       // generator by end-users optional by shipping pre-generated headers.
@@ -3064,13 +3484,13 @@ namespace build2
 
       // The gen argument to init_args() is in/out. The caller signals whether
       // to force the generated header support and on return it signals
-      // whether this support is enabled. The first call to init_args is
-      // expected to have gen false.
+      // whether this support is enabled. If gen is false, then stderr is
+      // expected to be either discarded or merged with sdtout.
       //
       // Return NULL if the dependency information goes to stdout and a
       // pointer to the temporary file path otherwise.
       //
-      auto init_args = [a, &t, ot, li, reprocess,
+      auto init_args = [a, &t, ot, li, reprocess, pext,
                         &src, &md, &psrc, &sense_diag, &mod_mapper, &bs,
                         pp, &env, &args, &args_gen, &args_i, &out, &drm,
                         &so_map, this]
@@ -3216,16 +3636,6 @@ namespace build2
 
           // Some compile options (e.g., -std, -m) affect the preprocessor.
           //
-          // Currently Clang supports importing "header modules" even when in
-          // the TS mode. And "header modules" support macros which means
-          // imports have to be resolved during preprocessing. Which poses a
-          // bit of a chicken and egg problem for us. For now, the workaround
-          // is to remove the -fmodules-ts option when preprocessing. Hopefully
-          // there will be a "pure modules" mode at some point.
-          //
-          // @@ MODHDR Clang: should be solved with the dynamic module mapper
-          //    if/when Clang supports it?
-          //
 
           // Don't treat warnings as errors.
           //
@@ -3254,11 +3664,18 @@ namespace build2
               append_options (args, cmode);
               append_sys_hdr_options (args); // Extra system header dirs (last).
 
+              // Note that for MSVC stderr is merged with stdout and is then
+              // parsed, so no append_diag_color_options() call.
+
               // See perform_update() for details on the choice of options.
               //
+              // NOTE: see also the predefs rule if adding anything here.
+              //
               {
-                bool sc (find_option_prefix ("/source-charset:", args));
-                bool ec (find_option_prefix ("/execution-charset:", args));
+                bool sc (find_option_prefixes (
+                           {"/source-charset:", "-source-charset:"}, args));
+                bool ec (find_option_prefixes (
+                           {"/execution-charset:", "-execution-charset:"}, args));
 
                 if (!sc && !ec)
                   args.push_back ("/utf-8");
@@ -3274,15 +3691,18 @@ namespace build2
 
               if (cvariant != "clang" && isystem (*this))
               {
-                if (find_option_prefix ("/external:I", args) &&
-                    !find_option_prefix ("/external:W", args))
+                if (find_option_prefixes ({"/external:I", "-external:I"}, args) &&
+                    !find_option_prefixes ({"/external:W", "-external:W"}, args))
                   args.push_back ("/external:W0");
               }
 
-              if (x_lang == lang::cxx && !find_option_prefix ("/EH", args))
+              if (x_lang == lang::cxx &&
+                  !find_option_prefixes ({"/EH", "-EH"}, args))
                 args.push_back ("/EHsc");
 
-              if (!find_option_prefixes ({"/MD", "/MT"}, args))
+              // NOTE: see similar code in search_modules().
+              //
+              if (!find_option_prefixes ({"/MD", "/MT", "-MD", "-MT"}, args))
                 args.push_back ("/MD");
 
               args.push_back ("/P");            // Preprocess to file.
@@ -3293,7 +3713,7 @@ namespace build2
 
               msvc_sanitize_cl (args);
 
-              psrc = ctx.fcache.create (t.path () + x_pext, !modules);
+              psrc = ctx.fcache->create (t.path () + pext, !modules);
 
               if (fc)
               {
@@ -3312,12 +3732,18 @@ namespace build2
             }
           case compiler_class::gcc:
             {
-              append_options (args, cmode,
-                              cmode.size () - (modules && clang ? 1 : 0));
+              append_options (args, cmode);
               append_sys_hdr_options (args); // Extra system header dirs (last).
 
+              // If not gen, then stderr is discarded.
+              //
+              if (gen)
+                append_diag_color_options (args);
+
               // See perform_update() for details on the choice of options.
               //
+              // NOTE: see also the predefs rule if adding anything here.
+              //
               if (!find_option_prefix ("-finput-charset=", args))
                 args.push_back ("-finput-charset=UTF-8");
 
@@ -3329,8 +3755,7 @@ namespace build2
 
               if (ctype == compiler_type::clang && tsys == "win32-msvc")
               {
-                initializer_list<const char*> os {"-nostdlib", "-nostartfiles"};
-                if (!find_options (os, cmode) && !find_options (os, args))
+                if (!find_options ({"-nostdlib", "-nostartfiles"}, args))
                 {
                   args.push_back ("-D_MT");
                   args.push_back ("-D_DLL");
@@ -3440,7 +3865,7 @@ namespace build2
 
                 // Preprocessor output.
                 //
-                psrc = ctx.fcache.create (t.path () + x_pext, !modules);
+                psrc = ctx.fcache->create (t.path () + pext, !modules);
                 args.push_back ("-o");
                 args.push_back (psrc.path ().string ().c_str ());
               }
@@ -3588,7 +4013,7 @@ namespace build2
 
         if (const file* ht = enter_header (
               a, bs, t, li,
-              hp, cache, cache /* normalized */,
+              move (hp), cache, cache /* normalized */,
               pfx_map, so_map).first)
         {
           // If we are reading the cache, then it is possible the file has
@@ -3603,7 +4028,7 @@ namespace build2
             // Verify/add it to the dependency database.
             //
             if (!cache)
-              dd.expect (ht->path ()); // @@ Use hp (or verify match)?
+              dd.expect (ht->path ());
 
             skip_count++;
             return *u;
@@ -3617,7 +4042,7 @@ namespace build2
             return fail (*ht);
         }
         else
-          return fail (hp);
+          return fail (hp); // hp is still valid.
       };
 
       // As above but for a header unit. Note that currently it is only used
@@ -3634,10 +4059,10 @@ namespace build2
 
         const file* ht (
           enter_header (a, bs, t, li,
-                        hp, true /* cache */, false /* normalized */,
+                        move (hp), true /* cache */, false /* normalized */,
                         pfx_map, so_map).first);
 
-        if (ht == nullptr)
+        if (ht == nullptr) // hp is still valid.
         {
           diag_record dr;
           dr << error << "header " << hp << " not found and no rule to "
@@ -3730,13 +4155,13 @@ namespace build2
               // If modules are enabled, then we keep the preprocessed output
               // around (see apply() for details).
               //
-              // See apply() for details on the extra MSVC check.
-              //
-              return modules && (ctype != compiler_type::msvc ||
-                                 md.type != unit_type::module_intf)
-                ? make_pair (ctx.fcache.create_existing (t.path () + x_pext),
-                             true)
-                : make_pair (file_cache::entry (), false);
+              if (modules)
+              {
+                result.first = ctx.fcache->create_existing (t.path () + pext);
+                result.second = true;
+              }
+
+              return;
             }
 
             // This can be a header or a header unit (mapping).
@@ -3789,7 +4214,7 @@ namespace build2
 
               // Bail out early if we have deferred a failure.
               //
-              return make_pair (file_cache::entry (), false);
+              return;
             }
           }
         }
@@ -3815,6 +4240,12 @@ namespace build2
 
             process pr;
 
+            // We use the fdstream_mode::skip mode on stdout (cannot be used
+            // on both) and so dbuf must be destroyed (closed) first.
+            //
+            ifdstream is (ifdstream::badbit);
+            diag_buffer dbuf (ctx);
+
             try
             {
               // Assume the preprocessed output (if produced) is usable
@@ -3835,217 +4266,229 @@ namespace build2
               //
               bool good_error (false), bad_error (false);
 
-              // If we have no generated header support, then suppress all
-              // diagnostics (if things go badly we will restart with this
-              // support).
-              //
-              if (drmp == nullptr) // Dependency info goes to stdout.
+              if (mod_mapper) // Dependency info is implied by mapper requests.
               {
-                assert (!sense_diag); // Note: could support with fdselect().
+                assert (gen && !sense_diag); // Not used in this mode.
 
-                // For VC with /P the dependency info and diagnostics all go
-                // to stderr so redirect it to stdout.
+                // Note that here we use the skip mode on the diagnostics
+                // stream which means we have to use own instance of stdout
+                // stream for the correct destruction order (see below).
                 //
-                pr = process (
-                  cpath,
-                  args.data (),
-                  0,
-                  -1,
-                  cclass == compiler_class::msvc ? 1 : gen ? 2 : -2,
-                  nullptr, // CWD
-                  env.empty () ? nullptr : env.data ());
-              }
-              else // Dependency info goes to a temporary file.
-              {
                 pr = process (cpath,
-                              args.data (),
-                              mod_mapper ? -1 : 0,
-                              mod_mapper ? -1 : 2, // Send stdout to stderr.
-                              gen ? 2 : sense_diag ? -1 : -2,
+                              args,
+                              -1,
+                              -1,
+                              diag_buffer::pipe (ctx),
                               nullptr, // CWD
                               env.empty () ? nullptr : env.data ());
 
-                // Monitor for module mapper requests and/or diagnostics. If
-                // diagnostics is detected, mark the preprocessed output as
-                // unusable for compilation.
-                //
-                if (mod_mapper || sense_diag)
+                dbuf.open (args[0],
+                           move (pr.in_efd),
+                           fdstream_mode::non_blocking |
+                           fdstream_mode::skip);
+                try
                 {
-                  module_mapper_state mm_state (skip_count, imports);
+                  gcc_module_mapper_state mm_state (skip_count, imports);
+
+                  // Note that while we read both streams until eof in normal
+                  // circumstances, we cannot use fdstream_mode::skip for the
+                  // exception case on both of them: we may end up being
+                  // blocked trying to read one stream while the process may
+                  // be blocked writing to the other. So in case of an
+                  // exception we only skip the diagnostics and close the
+                  // mapper stream hard. The latter (together with closing of
+                  // the stdin stream) should happen first so the order of
+                  // the following variable is important.
+                  //
+                  // Note also that we open the stdin stream in the blocking
+                  // mode.
+                  //
+                  ifdstream is (move (pr.in_ofd),
+                                fdstream_mode::non_blocking,
+                                ifdstream::badbit); // stdout
+                  ofdstream os (move (pr.out_fd));  // stdin (badbit|failbit)
 
-                  const char* w (nullptr);
-                  try
+                  // Read until we reach EOF on all streams.
+                  //
+                  // Note that if dbuf is not opened, then we automatically
+                  // get an inactive nullfd entry.
+                  //
+                  fdselect_set fds {is.fd (), dbuf.is.fd ()};
+                  fdselect_state& ist (fds[0]);
+                  fdselect_state& dst (fds[1]);
+
+                  bool more (false);
+                  for (string l; ist.fd != nullfd || dst.fd != nullfd; )
                   {
-                    // For now we don't need to do both so let's use a simpler
-                    // blocking implementation. Note that the module mapper
-                    // also needs to be adjusted when switching to the
-                    // non-blocking version.
+                    // @@ Currently we will accept a (potentially truncated)
+                    //    line that ends with EOF rather than newline.
                     //
-#if 1
-                    assert (mod_mapper != sense_diag);
-
-                    if (mod_mapper)
+                    if (ist.fd != nullfd && getline_non_blocking (is, l))
                     {
-                      w = "module mapper request";
-
-                      // Note: the order is important (see the non-blocking
-                      // verison for details).
-                      //
-                      ifdstream is (move (pr.in_ofd),
-                                    fdstream_mode::skip,
-                                    ifdstream::badbit);
-                      ofdstream os (move (pr.out_fd));
-
-                      do
+                      if (eof (is))
                       {
-                        if (!gcc_module_mapper (mm_state,
-                                                a, bs, t, li,
-                                                is, os,
-                                                dd, update, bad_error,
-                                                pfx_map, so_map))
-                          break;
-
-                      } while (!is.eof ());
+                        os.close ();
+                        is.close ();
 
-                      os.close ();
-                      is.close ();
-                    }
-
-                    if (sense_diag)
-                    {
-                      w = "diagnostics";
-                      ifdstream is (move (pr.in_efd), fdstream_mode::skip);
-                      puse = puse && (is.peek () == ifdstream::traits_type::eof ());
-                      is.close ();
-                    }
-#else
-                    fdselect_set fds;
-                    auto add = [&fds] (const auto_fd& afd) -> fdselect_state*
-                    {
-                      int fd (afd.get ());
-                      fdmode (fd, fdstream_mode::non_blocking);
-                      fds.push_back (fd);
-                      return &fds.back ();
-                    };
-
-                    // Note that while we read both streams until eof in
-                    // normal circumstances, we cannot use fdstream_mode::skip
-                    // for the exception case on both of them: we may end up
-                    // being blocked trying to read one stream while the
-                    // process may be blocked writing to the other. So in case
-                    // of an exception we only skip the diagnostics and close
-                    // the mapper stream hard. The latter should happen first
-                    // so the order of the following variable is important.
-                    //
-                    ifdstream es;
-                    ofdstream os;
-                    ifdstream is;
+                        if (more)
+                          throw_generic_ios_failure (EIO, "unexpected EOF");
 
-                    fdselect_state* ds (nullptr);
-                    if (sense_diag)
-                    {
-                      w = "diagnostics";
-                      ds = add (pr.in_efd);
-                      es.open (move (pr.in_efd), fdstream_mode::skip);
-                    }
-
-                    fdselect_state* ms (nullptr);
-                    if (mod_mapper)
-                    {
-                      w = "module mapper request";
-                      ms = add (pr.in_ofd);
-                      is.open (move (pr.in_ofd));
-                      os.open (move (pr.out_fd)); // Note: blocking.
-                    }
-
-                    // Set each state pointer to NULL when the respective
-                    // stream reaches eof.
-                    //
-                    while (ds != nullptr || ms != nullptr)
-                    {
-                      w = "output";
-                      ifdselect (fds);
-
-                      // First read out the diagnostics in case the mapper
-                      // interaction produces more. To make sure we don't get
-                      // blocked by full stderr, the mapper should only handle
-                      // one request at a time.
-                      //
-                      if (ds != nullptr && ds->ready)
+                        ist.fd = nullfd;
+                      }
+                      else
                       {
-                        w = "diagnostics";
-
-                        for (char buf[4096];;)
-                        {
-                          streamsize c (sizeof (buf));
-                          streamsize n (es.readsome (buf, c));
+                        optional<bool> r (
+                          gcc_module_mapper (mm_state,
+                                             a, bs, t, li,
+                                             l, os,
+                                             dd, update, bad_error,
+                                             pfx_map, so_map));
 
-                          if (puse && n > 0)
-                            puse = false;
+                        more = !r.has_value ();
 
-                          if (n < c)
-                            break;
-                        }
-
-                        if (es.eof ())
-                        {
-                          es.close ();
-                          ds->fd = nullfd;
-                          ds = nullptr;
-                        }
-                      }
-
-                      if (ms != nullptr && ms->ready)
-                      {
-                        w = "module mapper request";
-
-                        gcc_module_mapper (mm_state,
-                                           a, bs, t, li,
-                                           is, os,
-                                           dd, update, bad_error,
-                                           pfx_map, so_map);
-                        if (is.eof ())
+                        if (more || *r)
+                          l.clear ();
+                        else
                         {
                           os.close ();
                           is.close ();
-                          ms->fd = nullfd;
-                          ms = nullptr;
+                          ist.fd = nullfd;
                         }
                       }
+
+                      continue;
                     }
-#endif
-                  }
-                  catch (const io_error& e)
-                  {
-                    if (pr.wait ())
-                      fail << "io error handling " << x_lang << " compiler "
-                           << w << ": " << e;
 
-                    // Fall through.
+                    ifdselect (fds);
+
+                    if (dst.ready)
+                    {
+                      if (!dbuf.read ())
+                        dst.fd = nullfd;
+                    }
                   }
 
-                  if (mod_mapper)
-                    md.header_units += mm_state.header_units;
+                  md.header_units += mm_state.header_units;
+                }
+                catch (const io_error& e)
+                {
+                  // Note that diag_buffer handles its own io errors so this
+                  // is about mapper stdin/stdout.
+                  //
+                  if (pr.wait ())
+                    fail << "io error handling " << x_lang << " compiler "
+                         << "module mapper request: " << e;
+
+                  // Fall through.
                 }
 
                 // The idea is to reduce this to the stdout case.
                 //
-                pr.wait ();
-
-                // With -MG we want to read dependency info even if there is
-                // an error (in case an outdated header file caused it). But
-                // with the GCC module mapper an error is non-negotiable, so
-                // to speak, and so we want to skip all of that. In fact, we
-                // now write directly to depdb without generating and then
+                // We now write directly to depdb without generating and then
                 // parsing an intermadiate dependency makefile.
                 //
-                pr.in_ofd = (ctype == compiler_type::gcc && mod_mapper)
-                  ? auto_fd (nullfd)
-                  : fdopen (*drmp, fdopen_mode::in);
+                pr.wait ();
+                pr.in_ofd = nullfd;
+              }
+              else
+              {
+                // If we have no generated header support, then suppress all
+                // diagnostics (if things go badly we will restart with this
+                // support).
+                //
+                if (drmp == nullptr) // Dependency info goes to stdout.
+                {
+                  assert (!sense_diag); // Note: could support if necessary.
+
+                  // For VC with /P the dependency info and diagnostics all go
+                  // to stderr so redirect it to stdout.
+                  //
+                  int err (
+                    cclass == compiler_class::msvc ?  1 : // stdout
+                    !gen                           ? -2 : // /dev/null
+                    diag_buffer::pipe (ctx, sense_diag /* force */));
+
+                  pr = process (
+                    cpath,
+                    args,
+                    0,
+                    -1,
+                    err,
+                    nullptr, // CWD
+                    env.empty () ? nullptr : env.data ());
+
+                  if (cclass != compiler_class::msvc && gen)
+                  {
+                    dbuf.open (args[0],
+                               move (pr.in_efd),
+                               fdstream_mode::non_blocking); // Skip on stdout.
+                  }
+                }
+                else // Dependency info goes to temporary file.
+                {
+                  // Since we only need to read from one stream (dbuf) let's
+                  // use the simpler blocking setup.
+                  //
+                  int err (
+                    !gen && !sense_diag ? -2 : // /dev/null
+                    diag_buffer::pipe (ctx, sense_diag /* force */));
+
+                  pr = process (cpath,
+                                args,
+                                0,
+                                2, // Send stdout to stderr.
+                                err,
+                                nullptr, // CWD
+                                env.empty () ? nullptr : env.data ());
+
+                  if (gen || sense_diag)
+                  {
+                    dbuf.open (args[0], move (pr.in_efd));
+                    dbuf.read (sense_diag /* force */);
+                  }
+
+                  if (sense_diag)
+                  {
+                    if (!dbuf.buf.empty ())
+                    {
+                      puse = false;
+                      dbuf.buf.clear (); // Discard.
+                    }
+                  }
+
+                  // The idea is to reduce this to the stdout case.
+                  //
+                  // Note that with -MG we want to read dependency info even
+                  // if there is an error (in case an outdated header file
+                  // caused it).
+                  //
+                  pr.wait ();
+                  pr.in_ofd = fdopen (*drmp, fdopen_mode::in);
+                }
               }
 
+              // Read and process dependency information, if any.
+              //
               if (pr.in_ofd != nullfd)
               {
+                // We have two cases here: reading from stdout and potentially
+                // stderr (dbuf) or reading from file (see the process startup
+                // code above for details). If we have to read from two
+                // streams, then we have to use the non-blocking setup. But we
+                // cannot use the non-blocking setup uniformly because on
+                // Windows it's only suppored for pipes. So things are going
+                // to get a bit hairy.
+                //
+                // And there is another twist to this: for MSVC we redirect
+                // stderr to stdout since the header dependency information is
+                // part of the diagnostics. If, however, there is some real
+                // diagnostics, we need to pass it through, potentially with
+                // buffering. The way we achieve this is by later opening dbuf
+                // in the EOF state and using it to buffer or stream the
+                // diagnostics.
+                //
+                bool nb (dbuf.is.is_open ());
+
                 // We may not read all the output (e.g., due to a restart).
                 // Before we used to just close the file descriptor to signal
                 // to the other end that we are not interested in the rest.
@@ -4053,20 +4496,69 @@ namespace build2
                 // impolite and complains, loudly (broken pipe). So now we are
                 // going to skip until the end.
                 //
-                ifdstream is (move (pr.in_ofd),
-                              fdstream_mode::text | fdstream_mode::skip,
-                              ifdstream::badbit);
+                // Note that this means we are not using skip on dbuf (see
+                // above for the destruction order details).
+                //
+                {
+                  fdstream_mode m (fdstream_mode::text |
+                                   fdstream_mode::skip);
+
+                  if (nb)
+                    m |= fdstream_mode::non_blocking;
+
+                  is.open (move (pr.in_ofd), m);
+                }
+
+                fdselect_set fds;
+                if (nb)
+                  fds = {is.fd (), dbuf.is.fd ()};
 
                 size_t skip (skip_count);
                 string l, l2; // Reuse.
                 for (bool first (true), second (false); !restart; )
                 {
-                  if (eof (getline (is, l)))
+                  if (nb)
                   {
-                    if (bad_error && !l2.empty ())
-                      text << l2;
+                    fdselect_state& ist (fds[0]);
+                    fdselect_state& dst (fds[1]);
 
-                    break;
+                    // We read until we reach EOF on both streams.
+                    //
+                    if (ist.fd == nullfd && dst.fd == nullfd)
+                      break;
+
+                    if (ist.fd != nullfd && getline_non_blocking (is, l))
+                    {
+                      if (eof (is))
+                      {
+                        ist.fd = nullfd;
+                        continue;
+                      }
+
+                      // Fall through to parse (and clear) the line.
+                    }
+                    else
+                    {
+                      ifdselect (fds);
+
+                      if (dst.ready)
+                      {
+                        if (!dbuf.read ())
+                          dst.fd = nullfd;
+                      }
+
+                      continue;
+                    }
+                  }
+                  else
+                  {
+                    if (eof (getline (is, l)))
+                    {
+                      if (bad_error && !l2.empty ()) // MSVC only (see below).
+                        dbuf.write (l2, true /* newline */);
+
+                      break;
+                    }
                   }
 
                   l6 ([&]{trace << "header dependency line '" << l << "'";});
@@ -4117,9 +4609,15 @@ namespace build2
                             else
                             {
                               l2 = l;
-                              bad_error = true;
+
+                              if (!bad_error)
+                              {
+                                dbuf.open_eof (args[0]);
+                                bad_error = true;
+                              }
                             }
 
+                            l.clear ();
                             continue;
                           }
 
@@ -4129,6 +4627,7 @@ namespace build2
                         }
 
                         first = false;
+                        l.clear ();
                         continue;
                       }
 
@@ -4136,8 +4635,13 @@ namespace build2
 
                       if (f.empty ()) // Some other diagnostics.
                       {
-                        text << l;
-                        bad_error = true;
+                        if (!bad_error)
+                        {
+                          dbuf.open_eof (args[0]);
+                          bad_error = true;
+                        }
+
+                        dbuf.write (l, true /* newline */);
                         break;
                       }
 
@@ -4231,12 +4735,9 @@ namespace build2
                         if (l.empty ()  ||
                             l[0] != '^' || l[1] != ':' || l[2] != ' ')
                         {
-                          // @@ Hm, we don't seem to redirect stderr to stdout
-                          //    for this class of compilers so I wonder why
-                          //    we are doing this?
-                          //
                           if (!l.empty ())
-                            text << l;
+                            l5 ([&]{trace << "invalid header dependency line '"
+                                          << l << "'";});
 
                           bad_error = true;
                           break;
@@ -4251,7 +4752,10 @@ namespace build2
                         // "^: \".
                         //
                         if (l.size () == 4 && l[3] == '\\')
+                        {
+                          l.clear ();
                           continue;
+                        }
                         else
                           pos = 3; // Skip "^: ".
 
@@ -4266,10 +4770,8 @@ namespace build2
 
                         if (pos != l.size () && l[pos] == ':')
                         {
-                          // @@ Hm, the same as above.
-                          //
-                          text << l;
-
+                          l5 ([&]{trace << "invalid header dependency line '"
+                                        << l << "'";});
                           bad_error = true;
                           break;
                         }
@@ -4324,19 +4826,56 @@ namespace build2
                   }
 
                   if (bad_error || md.deferred_failure)
+                  {
+                    // Note that it may be tempting to finish reading out the
+                    // diagnostics before bailing out. But that may end up in
+                    // a deadlock if the process gets blocked trying to write
+                    // to stdout.
+                    //
                     break;
+                  }
+
+                  l.clear ();
+                }
+
+                // We may bail out early from the above loop in case of a
+                // restart or error. Which means the stderr stream (dbuf) may
+                // still be open and we need to close it before closing the
+                // stdout stream (which may try to skip).
+                //
+                // In this case we may also end up with incomplete diagnostics
+                // so discard it.
+                //
+                // Generally, it may be tempting to start thinking if we
+                // should discard buffered diagnostics in other cases, such as
+                // restart. But remember that during serial execution it will
+                // go straight to stderr so for consistency (and simplicity)
+                // we should just print it unless there are good reasons not
+                // to (also remember that in the restartable modes we normally
+                // redirect stderr to /dev/null; see the process startup code
+                // for details).
+                //
+                if (dbuf.is.is_open ())
+                {
+                  dbuf.is.close ();
+                  dbuf.buf.clear ();
                 }
 
                 // Bail out early if we have deferred a failure.
                 //
+                // Let's ignore any buffered diagnostics in this case since
+                // it would appear after the deferred failure note.
+                //
                 if (md.deferred_failure)
                 {
                   is.close ();
-                  return make_pair (file_cache::entry (), false);
+                  return;
                 }
 
-                // In case of VC, we are parsing stderr and if things go
-                // south, we need to copy the diagnostics for the user to see.
+                // In case of VC, we are parsing redirected stderr and if
+                // things go south, we need to copy the diagnostics for the
+                // user to see. Note that we should have already opened dbuf
+                // at EOF above.
                 //
                 if (bad_error && cclass == compiler_class::msvc)
                 {
@@ -4351,7 +4890,7 @@ namespace build2
                         l.compare (p.first, 4, "1083") != 0 &&
                         msvc_header_c1083 (l, p))
                     {
-                      diag_stream_lock () << l << endl;
+                      dbuf.write (l, true /* newline */);
                     }
                   }
                 }
@@ -4374,27 +4913,42 @@ namespace build2
 
               if (pr.wait ())
               {
-                if (!bad_error) // Ignore expected successes (we are done).
                 {
-                  if (!restart && psrc)
-                    psrcw.close ();
+                  diag_record dr;
 
-                  continue;
+                  if (bad_error)
+                    dr << fail << "expected error exit status from "
+                       << x_lang << " compiler";
+
+                  if (dbuf.is_open ())
+                    dbuf.close (move (dr)); // Throws if error.
                 }
 
-                fail << "expected error exit status from " << x_lang
-                     << " compiler";
+                // Ignore expected successes (we are done).
+                //
+                if (!restart && psrc)
+                  psrcw.close ();
+
+                continue;
               }
               else if (pr.exit->normal ())
               {
                 if (good_error) // Ignore expected errors (restart).
+                {
+                  if (dbuf.is_open ())
+                    dbuf.close ();
+
                   continue;
+                }
               }
 
               // Fall through.
             }
             catch (const io_error& e)
             {
+              // Ignore buffered diagnostics (since reading it could be the
+              // cause of this failure).
+              //
               if (pr.wait ())
                 fail << "unable to read " << x_lang << " compiler header "
                      << "dependency output: " << e;
@@ -4403,18 +4957,23 @@ namespace build2
             }
 
             assert (pr.exit && !*pr.exit);
-            const process_exit& e (*pr.exit);
+            const process_exit& pe (*pr.exit);
 
             // For normal exit we assume the child process issued some
             // diagnostics.
             //
-            if (e.normal ())
+            if (pe.normal ())
             {
-              // If this run was with the generated header support then we
-              // have issued diagnostics and it's time to give up.
+              // If this run was with the generated header support then it's
+              // time to give up.
               //
               if (gen)
+              {
+                if (dbuf.is_open ())
+                  dbuf.close (args, pe, 2 /* verbosity */);
+
                 throw failed ();
+              }
 
               // Just to recap, being here means something is wrong with the
               // source: it can be a missing generated header, it can be an
@@ -4432,7 +4991,12 @@ namespace build2
               // or will issue diagnostics.
               //
               if (restart)
+              {
+                if (dbuf.is_open ())
+                  dbuf.close ();
+
                 l6 ([&]{trace << "trying again without generated headers";});
+              }
               else
               {
                 // In some pathological situations we may end up switching
@@ -4457,19 +5021,24 @@ namespace build2
                 // example, because we have removed all the partially
                 // preprocessed source files).
                 //
-                if (force_gen_skip && *force_gen_skip == skip_count)
                 {
-                  diag_record dr (fail);
+                  diag_record dr;
+                  if (force_gen_skip && *force_gen_skip == skip_count)
+                  {
+                    dr <<
+                      fail << "inconsistent " << x_lang << " compiler behavior" <<
+                      info << "run the following two commands to investigate";
 
-                  dr << "inconsistent " << x_lang << " compiler behavior" <<
-                    info << "run the following two commands to investigate";
+                    dr << info;
+                    print_process (dr, args.data ()); // No pipes.
 
-                  dr << info;
-                  print_process (dr, args.data ()); // No pipes.
+                    init_args ((gen = true));
+                    dr << info << "";
+                    print_process (dr, args.data ()); // No pipes.
+                  }
 
-                  init_args ((gen = true));
-                  dr << info << "";
-                  print_process (dr, args.data ()); // No pipes.
+                  if (dbuf.is_open ())
+                    dbuf.close (move (dr)); // Throws if error.
                 }
 
                 restart = true;
@@ -4480,7 +5049,15 @@ namespace build2
               continue;
             }
             else
-              run_finish (args, pr); // Throws.
+            {
+              if (dbuf.is_open ())
+              {
+                dbuf.close (args, pe, 2 /* verbosity */);
+                throw failed ();
+              }
+              else
+                run_finish (args, pr, 2 /* verbosity */);
+            }
           }
           catch (const process_error& e)
           {
@@ -4506,7 +5083,9 @@ namespace build2
       dd.expect ("");
 
       puse = puse && !reprocess && psrc;
-      return make_pair (move (psrc), puse);
+
+      result.first = move (psrc);
+      result.second = puse;
     }
 
     // Return the translation unit information (last argument) and its
@@ -4525,6 +5104,18 @@ namespace build2
     {
       tracer trace (x, "compile_rule::parse_unit");
 
+      // Scanning .S files with our parser is hazardous since such files
+      // sometimes use `#`-style comments. Presumably real compilers just
+      // ignore them in some way, but it doesn't seem worth it to bother in
+      // our case. Also, the checksum calculation over assembler tokens feels
+      // iffy.
+      //
+      if (x_assembler_cpp (src))
+      {
+        tu.type = unit_type::non_modular;
+        return "";
+      }
+
       otype ot (li.type);
 
       // If things go wrong give the user a bit extra context. Let's call it
@@ -4603,8 +5194,6 @@ namespace build2
           case compiler_class::msvc: werror = "/WX";     break;
           }
 
-          bool clang (ctype == compiler_type::clang);
-
           append_options (args, t, c_coptions, werror);
           append_options (args, t, x_coptions, werror);
 
@@ -4619,11 +5208,16 @@ namespace build2
               append_options (args, cmode);
               append_sys_hdr_options (args);
 
+              // Note: no append_diag_color_options() call since the
+              // diagnostics is discarded.
+
               // See perform_update() for details on the choice of options.
               //
               {
-                bool sc (find_option_prefix ("/source-charset:", args));
-                bool ec (find_option_prefix ("/execution-charset:", args));
+                bool sc (find_option_prefixes (
+                           {"/source-charset:", "-source-charset:"}, args));
+                bool ec (find_option_prefixes (
+                           {"/execution-charset:", "-execution-charset:"}, args));
 
                 if (!sc && !ec)
                   args.push_back ("/utf-8");
@@ -4639,15 +5233,16 @@ namespace build2
 
               if (cvariant != "clang" && isystem (*this))
               {
-                if (find_option_prefix ("/external:I", args) &&
-                    !find_option_prefix ("/external:W", args))
+                if (find_option_prefixes ({"/external:I", "-external:I"}, args) &&
+                    !find_option_prefixes ({"/external:W", "-external:W"}, args))
                   args.push_back ("/external:W0");
               }
 
-              if (x_lang == lang::cxx && !find_option_prefix ("/EH", args))
+              if (x_lang == lang::cxx &&
+                  !find_option_prefixes ({"/EH", "-EH"}, args))
                 args.push_back ("/EHsc");
 
-              if (!find_option_prefixes ({"/MD", "/MT"}, args))
+              if (!find_option_prefixes ({"/MD", "/MT", "-MD", "-MT"}, args))
                 args.push_back ("/MD");
 
               args.push_back ("/E");
@@ -4661,10 +5256,12 @@ namespace build2
             }
           case compiler_class::gcc:
             {
-              append_options (args, cmode,
-                              cmode.size () - (modules && clang ? 1 : 0));
+              append_options (args, cmode);
               append_sys_hdr_options (args);
 
+              // Note: no append_diag_color_options() call since the
+              // diagnostics is discarded.
+
               // See perform_update() for details on the choice of options.
               //
               if (!find_option_prefix ("-finput-charset=", args))
@@ -4678,8 +5275,7 @@ namespace build2
 
               if (ctype == compiler_type::clang && tsys == "win32-msvc")
               {
-                initializer_list<const char*> os {"-nostdlib", "-nostartfiles"};
-                if (!find_options (os, cmode) && !find_options (os, args))
+                if (!find_options ({"-nostdlib", "-nostartfiles"}, args))
                 {
                   args.push_back ("-D_MT");
                   args.push_back ("-D_DLL");
@@ -4706,12 +5302,36 @@ namespace build2
               //
               if (ps)
               {
-                if (ctype == compiler_type::gcc)
+                switch (ctype)
                 {
-                  // Note that only these two *plus* -x do the trick.
-                  //
-                  args.push_back ("-fpreprocessed");
-                  args.push_back ("-fdirectives-only");
+                case compiler_type::gcc:
+                  {
+                    // Note that only these two *plus* -x do the trick.
+                    //
+                    args.push_back ("-fpreprocessed");
+                    args.push_back ("-fdirectives-only");
+                    break;
+                  }
+                case compiler_type::clang:
+                  {
+                    // See below for details.
+                    //
+                    if (ctype == compiler_type::clang &&
+                        cmaj >= (cvariant != "apple" ? 15 : 16))
+                    {
+                      if (find_options ({"-pedantic",  "-pedantic-errors",
+                                         "-Wpedantic", "-Werror=pedantic"},
+                                        args))
+                      {
+                        args.push_back ("-Wno-gnu-line-marker");
+                      }
+                    }
+
+                    break;
+                  }
+                case compiler_type::msvc:
+                case compiler_type::icc:
+                  assert (false);
                 }
               }
 
@@ -4765,10 +5385,10 @@ namespace build2
               print_process (args);
 
             // We don't want to see warnings multiple times so ignore all
-            // diagnostics.
+            // diagnostics (thus no need for diag_buffer).
             //
             pr = process (cpath,
-                          args.data (),
+                          args,
                           0, -1, -2,
                           nullptr, // CWD
                           env.empty () ? nullptr : env.data ());
@@ -4780,7 +5400,7 @@ namespace build2
                         fdstream_mode::binary | fdstream_mode::skip);
 
           parser p;
-          p.parse (is, path_name (*sp), tu);
+          p.parse (is, path_name (*sp), tu, cid);
 
           is.close ();
 
@@ -4795,7 +5415,9 @@ namespace build2
             if (!modules)
             {
               if (ut != unit_type::non_modular || !mi.imports.empty ())
-                fail << "modules support required by " << src;
+                fail << "modules support required by " << src <<
+                  info << "consider enabling modules with "
+                       << x << ".features.modules=true in root.build";
             }
             else
             {
@@ -4820,25 +5442,21 @@ namespace build2
                 ut = md.type;
                 mi.name = src.path ().string ();
               }
-
-              // Prior to 15.5 (19.12) VC was not using the 'export module M;'
-              // syntax so we use the preprequisite type to distinguish
-              // between interface and implementation units.
-              //
-              // @@ TMP: probably outdated.
-              //
-              if (ctype == compiler_type::msvc && cmaj == 19 && cmin <= 11)
-              {
-                if (ut == unit_type::module_impl && src.is_a (*x_mod))
-                  ut = unit_type::module_intf;
-              }
             }
 
             // If we were forced to reprocess, assume the checksum is not
             // accurate (parts of the translation unit could have been
             // #ifdef'ed out; see __build2_preprocess).
             //
-            return reprocess ? string () : move (p.checksum);
+            // Also, don't use the checksum for header units since it ignores
+            // preprocessor directives and may therefore cause us to ignore a
+            // change to an exported macro. @@ TODO: maybe we should add a
+            // flag to the parser not to waste time calculating the checksum
+            // in these cases.
+            //
+            return reprocess || ut == unit_type::module_header
+              ? string ()
+              : move (p.checksum);
           }
 
           // Fall through.
@@ -4869,7 +5487,7 @@ namespace build2
             info << "then run failing command to display compiler diagnostics";
         }
         else
-          run_finish (args, pr); // Throws.
+          run_finish (args, pr, 2 /* verbosity */); // Throws.
       }
       catch (const process_error& e)
       {
@@ -5038,6 +5656,9 @@ namespace build2
     {
       tracer trace (x, "compile_rule::search_modules");
 
+      context& ctx (bs.ctx);
+      const scope& rs (*bs.root_scope ());
+
       // NOTE: currently we don't see header unit imports (they are handled by
       //       extract_headers() and are not in imports).
 
@@ -5073,7 +5694,7 @@ namespace build2
       // So, the fuzzy match: the idea is that each match gets a score, the
       // number of characters in the module name that got matched. A match
       // with the highest score is used. And we use the (length + 1) for a
-      // match against an actual module name.
+      // match against an actual (extracted) module name.
       //
       // Actually, the scoring system is a bit more elaborate than that.
       // Consider module name core.window and two files, window.mxx and
@@ -5101,10 +5722,10 @@ namespace build2
       // module (or partition) component. Failed that, we will match `format`
       // to `print` because the last character (`t`) is the same.
       //
-      // For std.* modules we only accept non-fuzzy matches (think std.core vs
-      // some core.mxx). And if such a module is unresolved, then we assume it
-      // is pre-built and will be found by some other means (e.g., VC's
-      // IFCPATH).
+      // For std.* modules we only accept non-fuzzy matches (think std.compat
+      // vs some compat.mxx). And if such a module is unresolved, then we
+      // assume it is pre-built and will be found by some other means (e.g.,
+      // VC's IFCPATH).
       //
       // Note also that we handle module partitions the same as submodules. In
       // other words, for matching, `.` and `:` are treated the same.
@@ -5117,7 +5738,7 @@ namespace build2
         // PPPPABBBB
         //
         // Where PPPP is the primary score, A is the A) score, and BBBB is
-        // the B) scope described above. Zero signifies no match.
+        // the B) score described above. Zero signifies no match.
         //
         // We use decimal instead of binary packing to make it easier for the
         // human to separate fields in the trace messages, during debugging,
@@ -5223,6 +5844,31 @@ namespace build2
         if (!match)
           return 0;
 
+        // Here is another corner case, the module is async_simple:IOExecutor
+        // and the file names are:
+        //
+        // IOExecutor.mxx
+        // SimpleIOExecutor.mxx
+        //
+        // The above implementation treats the latter as better because
+        // `Simple` in SimpleIOExecutor matches `simple` in async_simple. It's
+        // unclear what we can do about it without potentially breaking other
+        // legitimate cases (think Boost_Simple:IOExecutor). Maybe we could
+        // boost the exact partition name match score, similar to the exact
+        // module match, as some sort of a heuristics? Let's try.
+        //
+        if (fi == 0 && mi != 0 && m[mi - 1] == ':')
+        {
+          // Pretend we matched one short of the next module component. This
+          // way AsyncSimpleIOExecutor.mxx would still be a better match.
+          //
+          while (--mi != 0 && m[mi - 1] != '.')
+            ;
+
+          msep = (mi != 0); // For uncount logic below.
+          mi++; // One short.
+        }
+
         // "Uncount" real separators.
         //
         if (fsep) fi++;
@@ -5251,6 +5897,20 @@ namespace build2
         return ps * 100000 + as * 10000 + bs;
       };
 
+#if 0
+      assert (match ("IOExecutor",       "async_simple:IOExecutor") >
+              match ("SimpleIOExecutor", "async_simple:IOExecutor"));
+
+      assert (match ("IOExecutor",            "async_simple:IOExecutor") <
+              match ("AsyncSimpleIOExecutor", "async_simple:IOExecutor"));
+
+      assert (match ("IOExecutor",       "x.async_simple:IOExecutor") >
+              match ("SimpleIOExecutor", "x.async_simple:IOExecutor"));
+
+      assert (match ("IOExecutor",            "x.async_simple:IOExecutor") <
+              match ("AsyncSimpleIOExecutor", "x.async_simple:IOExecutor"));
+#endif
+
       auto& pts (t.prerequisite_targets[a]);
       size_t start (pts.size ()); // Index of the first to be added.
 
@@ -5265,7 +5925,7 @@ namespace build2
       // promise. It has to do with module re-exporting (export import M;).
       // In this case (currently) all implementations simply treat it as a
       // shallow (from the BMI's point of view) reference to the module (or an
-      // implicit import, if you will). Do you see where it's going? Nowever
+      // implicit import, if you will). Do you see where it's going? Nowhere
       // good, that's right. This shallow reference means that the compiler
       // should be able to find BMIs for all the re-exported modules,
       // recursively. The good news is we are actually in a pretty good shape
@@ -5282,10 +5942,11 @@ namespace build2
       // 1. There is no good place in prerequisite_targets to store the
       //    exported flag (no, using the marking facility across match/execute
       //    is a bad idea). So what we are going to do is put re-exported
-      //    bmi{}s at the back and store (in the target's data pad) the start
-      //    position. One bad aspect about this part is that we assume those
-      //    bmi{}s have been matched by the same rule. But let's not kid
-      //    ourselves, there will be no other rule that matches bmi{}s.
+      //    bmi{}s at the back and store (in the target's auxiliary data
+      //    storage) the start position. One bad aspect about this part is
+      //    that we assume those bmi{}s have been matched by the same
+      //    rule. But let's not kid ourselves, there will be no other rule
+      //    that matches bmi{}s.
       //
       //    @@ I think now we could use prerequisite_targets::data for this?
       //
@@ -5310,6 +5971,7 @@ namespace build2
       // so we actually don't need to pass any extra options (unless things
       // get moved) but they still need access to the BMIs (and things will
       // most likely have to be done differenly for distributed compilation).
+      // @@ Note: no longer the case for Clang either.
       //
       // So the revised plan: on the off chance that some implementation will
       // do it differently we will continue maintaing the imported/re-exported
@@ -5403,6 +6065,8 @@ namespace build2
               continue; // Scan the rest to detect if all done.
             }
           }
+          else
+            assert (name != m.name); // No duplicates.
 
           done = false;
         }
@@ -5430,10 +6094,18 @@ namespace build2
           //
           if (pt->is_a<bmix> ())
           {
-            const string& n (cast<string> (pt->state[a].vars[c_module_name]));
-
-            if (const target** p = check_exact (n))
-              *p = pt;
+            // If the extraction of the module information for this BMI failed
+            // and we have deferred failure to compiler diagnostics, then
+            // there will be no module name assigned. It would have been
+            // better to make sure that's the cause, but that won't be easy.
+            //
+            const string* n (cast_null<string> (
+                               pt->state[a].vars[c_module_name]));
+            if (n != nullptr)
+            {
+              if (const target** p = check_exact (*n))
+                *p = pt;
+            }
           }
           else if (pt->is_a (*x_mod))
           {
@@ -5442,7 +6114,8 @@ namespace build2
             // rule puts them into prerequisite_targets for us).
             //
             // The module names should be specified but if not assume
-            // something else is going on and ignore.
+            // something else is going on (like a deferred failure) and
+            // ignore.
             //
             // Note also that besides modules, prerequisite_targets may
             // contain libraries which are interface dependencies of this
@@ -5455,7 +6128,15 @@ namespace build2
               continue;
 
             if (const target** p = check_exact (*n))
-              *p = &this->make_module_sidebuild (a, bs, l, *pt, *n); // GCC 4.9
+            {
+              // It seems natural to build a BMI type that corresponds to the
+              // library type. After all, this is where the object file part
+              // of the BMI is going to come from (unless it's a module
+              // interface-only library).
+              //
+              *p = &this->make_module_sidebuild (
+                a, bs, &l, link_type (l).type, *pt, *n).first; // GCC 4.9
+            }
           }
           // Note that in prerequisite targets we will have the libux{}
           // members, not the group.
@@ -5470,112 +6151,295 @@ namespace build2
         }
       };
 
-      for (prerequisite_member p: group_prerequisite_members (a, t))
+      // Pre-resolve std modules in an ad hoc way for certain compilers.
+      //
+      // @@ TODO: cache x_stdlib value.
+      //
+      if ((ctype == compiler_type::msvc)          ||
+          (ctype == compiler_type::clang &&
+           cmaj >= 17                    &&
+           cast<string> (rs[x_stdlib]) == "libc++"))
       {
-        if (include (a, t, p) != include_type::normal) // Excluded/ad hoc.
-          continue;
-
-        const target* pt (p.load ()); // Should be cached for libraries.
+        // Similar logic to check_exact() above.
+        //
+        done = true;
 
-        if (pt != nullptr)
+        for (size_t i (0); i != n; ++i)
         {
-          const file* lt (nullptr);
-
-          if (const libx* l = pt->is_a<libx> ())
-            lt = link_member (*l, a, li);
-          else if (pt->is_a<liba> () || pt->is_a<libs> () || pt->is_a<libux> ())
-            lt = &pt->as<file> ();
+          module_import& m (imports[i]);
 
-          // If this is a library, check its bmi{}s and mxx{}s.
-          //
-          if (lt != nullptr)
+          if (m.name == "std" || m.name == "std.compat")
           {
-            find (*lt, find);
+            otype ot (otype::e);
+            const target* mt (nullptr);
 
-            if (done)
-              break;
+            switch (ctype)
+            {
+            case compiler_type::clang:
+              {
+                if (m.name != "std")
+                  fail << "module " << m.name << " not yet provided by libc++";
 
-            continue;
-          }
+                // Find or insert std.cppm (similar code to pkgconfig.cxx).
+                //
+                // Note: build_install_data is absolute and normalized.
+                //
+                mt = &ctx.targets.insert_locked (
+                  *x_mod,
+                  (dir_path (build_install_data) /= "libbuild2") /= "cc",
+                  dir_path (),
+                  "std",
+                  string ("cppm"), // For C++14 during bootstrap.
+                  target_decl::implied,
+                  trace).first;
+
+                // Which output type should we use, static or shared? The
+                // correct way would be to detect whether static or shared
+                // version of libc++ is to be linked and use the corresponding
+                // type. And we could do that by looking for -static-libstdc++
+                // in loption (and no, it's not -static-libc++).
+                //
+                // But, looking at the object file produced from std.cppm, it
+                // only contains one symbol, the static object initializer.
+                // And this is unlikely to change since all other non-inline
+                // or template symbols should be in libc++. So feels like it's
+                // not worth the trouble and one variant should be good enough
+                // for both cases. Let's use the shared one for less
+                // surprising diagnostics (as in, "why are you linking obje{}
+                // to a shared library?")
+                //
+                // (Of course, theoretically, std.cppm could detect via a
+                // macro whether it's being compiled with -fPIC or not and do
+                // things differently, but this seems far-fetched).
+                //
+                ot = otype::s;
 
-          // Fall through.
-        }
+                break;
+              }
+            case compiler_type::msvc:
+              {
+                // For MSVC, the source files std.ixx and std.compat.ixx are
+                // found in the modules/ subdirectory which is a sibling of
+                // include/ in the MSVC toolset (and "that is a contract with
+                // customers" to quote one of the developers).
+                //
+                // The problem of course is that there are multiple system
+                // header search directories (for example, as specified in the
+                // INCLUDE environment variable) and which one of them is for
+                // the MSVC toolset is not specified. So what we are going to
+                // do is search for one of the well-known standard C++ headers
+                // and assume that the directory where we found it is the one
+                // we are looking for. Or we could look for something
+                // MSVC-specific like vcruntime.h.
+                //
+                dir_path modules;
+                if (optional<path> p = find_system_header (path ("vcruntime.h")))
+                {
+                  p->make_directory (); // Strip vcruntime.h.
+                  if (p->leaf () == path ("include")) // Sanity check.
+                  {
+                    modules = path_cast<dir_path> (move (p->make_directory ()));
+                    modules /= "modules";
+                  }
+                }
 
-        // While it would have been even better not to search for a target, we
-        // need to get hold of the corresponding mxx{} (unlikely but possible
-        // for bmi{} to have a different name).
-        //
-        // While we want to use group_prerequisite_members() below, we cannot
-        // call resolve_group() since we will be doing it "speculatively" for
-        // modules that we may use but also for modules that may use us. This
-        // quickly leads to deadlocks. So instead we are going to perform an
-        // ad hoc group resolution.
-        //
-        const target* pg;
-        if (p.is_a<bmi> ())
-        {
-          pg = pt != nullptr ? pt : &p.search (t);
-          pt = &search (t, btt, p.key ()); // Same logic as in picking obj*{}.
-        }
-        else if (p.is_a (btt))
-        {
-          pg = &search (t, bmi::static_type, p.key ());
-          if (pt == nullptr) pt = &p.search (t);
+                if (modules.empty ())
+                  fail << "unable to locate MSVC standard modules directory";
+
+                mt = &ctx.targets.insert_locked (
+                  *x_mod,
+                  move (modules),
+                  dir_path (),
+                  m.name,
+                  string ("ixx"), // For C++14 during bootstrap.
+                  target_decl::implied,
+                  trace).first;
+
+                // For MSVC it's easier to detect the runtime being used since
+                // it's specified with the compile options (/MT[d], /MD[d]).
+                //
+                // Similar semantics as in extract_headers() except here we
+                // use options visible from the root scope. Note that
+                // find_option_prefixes() looks in reverse, so look in the
+                // cmode, x_coptions, c_coptions order.
+                //
+                initializer_list<const char*> os {"/MD", "/MT", "-MD", "-MT"};
+
+                const string* o;
+                if ((o = find_option_prefixes (os, cmode))          != nullptr ||
+                    (o = find_option_prefixes (os, rs, x_coptions)) != nullptr ||
+                    (o = find_option_prefixes (os, rs, c_coptions)) != nullptr)
+                {
+                  ot = (*o)[2] == 'D' ? otype::s : otype::a;
+                }
+                else
+                  ot = otype::s; // The default is /MD.
+
+                break;
+              }
+            case compiler_type::gcc:
+            case compiler_type::icc:
+              assert (false);
+            };
+
+            pair<target&, ulock> tl (
+              this->make_module_sidebuild ( // GCC 4.9
+                a, bs, nullptr, ot, *mt, m.name));
+
+            if (tl.second.owns_lock ())
+            {
+              // Special compile options for the std modules.
+              //
+              if (ctype == compiler_type::clang)
+              {
+                value& v (tl.first.append_locked (x_coptions));
+
+                if (v.null)
+                  v = strings {};
+
+                strings& cops (v.as<strings> ());
+
+                switch (ctype)
+                {
+                case compiler_type::clang:
+                  {
+                    cops.push_back ("-Wno-reserved-module-identifier");
+                    break;
+                  }
+                case compiler_type::msvc:
+                  // It appears nothing special is needed to compile MSVC
+                  // standard modules.
+                case compiler_type::gcc:
+                case compiler_type::icc:
+                  assert (false);
+                };
+              }
+
+              tl.second.unlock ();
+            }
+
+            pts[start + i].target = &tl.first;
+            m.score = match_max (m.name) + 1;
+            continue; // Scan the rest to detect if all done.
+          }
+
+          done = false;
         }
-        else
-          continue;
+      }
 
-        // Find the mxx{} prerequisite and extract its "file name" for the
-        // fuzzy match unless the user specified the module name explicitly.
-        //
-        for (prerequisite_member p:
-               prerequisite_members (a, t, group_prerequisites (*pt, pg)))
+      // Go over prerequisites and try to resolve imported modules with them.
+      //
+      if (!done)
+      {
+        for (prerequisite_member p: group_prerequisite_members (a, t))
         {
           if (include (a, t, p) != include_type::normal) // Excluded/ad hoc.
             continue;
 
-          if (p.is_a (*x_mod))
+          const target* pt (p.load ()); // Should be cached for libraries.
+
+          if (pt != nullptr)
           {
-            // Check for an explicit module name. Only look for an existing
-            // target (which means the name can only be specified on the
-            // target itself, not target type/pattern-spec).
+            const file* lt (nullptr);
+
+            if (const libx* l = pt->is_a<libx> ())
+              lt = link_member (*l, a, li);
+            else if (pt->is_a<liba> () ||
+                     pt->is_a<libs> () ||
+                     pt->is_a<libux> ())
+              lt = &pt->as<file> ();
+
+            // If this is a library, check its bmi{}s and mxx{}s.
             //
-            const target* t (p.search_existing ());
-            const string* n (t != nullptr
-                             ? cast_null<string> (t->vars[c_module_name])
-                             : nullptr);
-            if (n != nullptr)
+            if (lt != nullptr)
             {
-              if (const target** p = check_exact (*n))
-                *p = pt;
+              find (*lt, find);
+
+              if (done)
+                break;
+
+              continue;
             }
-            else
+
+            // Fall through.
+          }
+
+          // While it would have been even better not to search for a target,
+          // we need to get hold of the corresponding mxx{} (unlikely but
+          // possible for bmi{} to have a different name).
+          //
+          // While we want to use group_prerequisite_members() below, we
+          // cannot call resolve_group() since we will be doing it
+          // "speculatively" for modules that we may use but also for modules
+          // that may use us. This quickly leads to deadlocks. So instead we
+          // are going to perform an ad hoc group resolution.
+          //
+          const target* pg;
+          if (p.is_a<bmi> ())
+          {
+            pg = pt != nullptr ? pt : &p.search (t);
+            pt = &search (t, btt, p.key ()); // Same logic as in picking obj*{}.
+          }
+          else if (p.is_a (btt))
+          {
+            pg = &search (t, bmi::static_type, p.key ());
+            if (pt == nullptr) pt = &p.search (t);
+          }
+          else
+            continue;
+
+          // Find the mxx{} prerequisite and extract its "file name" for the
+          // fuzzy match unless the user specified the module name explicitly.
+          //
+          for (prerequisite_member p:
+                 prerequisite_members (a, t, group_prerequisites (*pt, pg)))
+          {
+            if (include (a, t, p) != include_type::normal) // Excluded/ad hoc.
+              continue;
+
+            if (p.is_a (*x_mod))
             {
-              // Fuzzy match.
+              // Check for an explicit module name. Only look for an existing
+              // target (which means the name can only be specified on the
+              // target itself, not target type/pattern-spec).
               //
-              string f;
+              const target* mt (p.search_existing ());
+              const string* n (mt != nullptr
+                               ? cast_null<string> (mt->vars[c_module_name])
+                               : nullptr);
+              if (n != nullptr)
+              {
+                if (const target** p = check_exact (*n))
+                  *p = pt;
+              }
+              else
+              {
+                // Fuzzy match.
+                //
+                string f;
 
-              // Add the directory part if it is relative. The idea is to
-              // include it into the module match, say hello.core vs
-              // hello/mxx{core}.
-              //
-              // @@ MOD: Why not for absolute? Good question. What if it
-              // contains special components, say, ../mxx{core}?
-              //
-              const dir_path& d (p.dir ());
+                // Add the directory part if it is relative. The idea is to
+                // include it into the module match, say hello.core vs
+                // hello/mxx{core}.
+                //
+                // @@ MOD: Why not for absolute? Good question. What if it
+                // contains special components, say, ../mxx{core}?
+                //
+                const dir_path& d (p.dir ());
 
-              if (!d.empty () && d.relative ())
-                f = d.representation (); // Includes trailing slash.
+                if (!d.empty () && d.relative ())
+                  f = d.representation (); // Includes trailing slash.
 
-              f += p.name ();
-              check_fuzzy (pt, f);
+                f += p.name ();
+                check_fuzzy (pt, f);
+              }
+              break;
             }
-            break;
           }
-        }
 
-        if (done)
-          break;
+          if (done)
+            break;
+        }
       }
 
       // Diagnose unresolved modules.
@@ -5645,9 +6509,12 @@ namespace build2
 
         if (m.score <= match_max (in))
         {
-          const string& mn (cast<string> (bt->state[a].vars[c_module_name]));
+          // As above (deffered failure).
+          //
+          const string* mn (
+            cast_null<string> (bt->state[a].vars[c_module_name]));
 
-          if (in != mn)
+          if (mn != nullptr && in != *mn)
           {
             // Note: matched, so the group should be resolved.
             //
@@ -5661,7 +6528,7 @@ namespace build2
                 fail (relative (src))
                   << "failed to correctly guess module name from " << p <<
                   info << "guessed: " << in <<
-                  info << "actual:  " << mn <<
+                  info << "actual:  " << *mn <<
                   info << "consider adjusting module interface file names or" <<
                   info << "consider specifying module name with " << x
                   << ".module_name";
@@ -5672,11 +6539,11 @@ namespace build2
 
         // Hash (we know it's a file).
         //
-        cs.append (static_cast<const file&> (*bt).path ().string ());
+        cs.append (bt->as<file> ().path ().string ());
 
         // Copy over bmi{}s from our prerequisites weeding out duplicates.
         //
-        if (size_t j = bt->data<match_data> ().modules.start)
+        if (size_t j = bt->data<match_data> (a).modules.start)
         {
           // Hard to say whether we should reserve or not. We will probably
           // get quite a bit of duplications.
@@ -5689,26 +6556,29 @@ namespace build2
             if (et == nullptr)
               continue; // Unresolved (std.*).
 
-            const string& mn (cast<string> (et->state[a].vars[c_module_name]));
+            // As above (deferred failure).
+            //
+            const string* mn (cast_null<string> (et->state[a].vars[c_module_name]));
 
-            if (find_if (imports.begin (), imports.end (),
-                         [&mn] (const module_import& i)
+            if (mn != nullptr &&
+                find_if (imports.begin (), imports.end (),
+                         [mn] (const module_import& i)
                          {
-                           return i.name == mn;
+                           return i.name == *mn;
                          }) == imports.end ())
             {
               pts.push_back (et);
-              cs.append (static_cast<const file&> (*et).path ().string ());
+              cs.append (et->as<file> ().path ().string ());
 
               // Add to the list of imports for further duplicate suppression.
               // We could have stored reference to the name (e.g., in score)
               // but it's probably not worth it if we have a small string
               // optimization.
               //
-              import_type t (mn.find (':') != string::npos
+              import_type t (mn->find (':') != string::npos
                              ? import_type::module_part
                              : import_type::module_intf);
-              imports.push_back (module_import {t, mn, true, 0});
+              imports.push_back (module_import {t, *mn, true, 0});
             }
           }
         }
@@ -5728,6 +6598,10 @@ namespace build2
     // Find or create a modules sidebuild subproject returning its root
     // directory.
     //
+    // @@ Could we omit creating a subproject if the sidebuild scope is the
+    //    project scope itself? This would speed up simple examples (and
+    //    potentially direct compilation that we may support).
+    //
     pair<dir_path, const scope&> compile_rule::
     find_modules_sidebuild (const scope& rs) const
     {
@@ -5738,6 +6612,9 @@ namespace build2
       // cc.config module and that is within our amalgmantion seems like a
       // good place.
       //
+      // @@ TODO: maybe we should cache this in compile_rule ctor like we
+      //          do for the header cache?
+      //
       const scope* as (&rs);
       {
         const scope* ws (as->weak_scope ());
@@ -5753,7 +6630,7 @@ namespace build2
             // This is also the module that registers the scope operation
             // callback that cleans up the subproject.
             //
-            if (cast_false<bool> ((*s)["cc.core.vars.loaded"]))
+            if (cast_false<bool> (s->vars["cc.core.vars.loaded"]))
               as = s;
 
           } while (s != ws);
@@ -5829,13 +6706,18 @@ namespace build2
       return pair<dir_path, const scope&> (move (pd), *as);
     }
 
-    // Synthesize a dependency for building a module binary interface on
-    // the side.
+    // Synthesize a dependency for building a module binary interface of a
+    // library on the side. If library is missing, then assume it's some
+    // ad hoc/system library case (in which case we assume it's binless,
+    // for now).
     //
-    const file& compile_rule::
+    // The return value semantics is as in target_set::insert_locked().
+    //
+    pair<target&, ulock> compile_rule::
     make_module_sidebuild (action a,
                            const scope& bs,
-                           const file& lt,
+                           const file* lt,
+                           otype ot,
                            const target& mt,
                            const string& mn) const
     {
@@ -5856,24 +6738,20 @@ namespace build2
                  back_inserter (mf),
                  [] (char c) {return c == '.' ? '-' : c == ':' ? '+' : c;});
 
-      // It seems natural to build a BMI type that corresponds to the library
-      // type. After all, this is where the object file part of the BMI is
-      // going to come from (unless it's a module interface-only library).
-      //
-      const target_type& tt (compile_types (link_type (lt).type).bmi);
+      const target_type& tt (compile_types (ot).bmi);
 
       // Store the BMI target in the subproject root. If the target already
       // exists then we assume all this is already done (otherwise why would
       // someone have created such a target).
       //
-      if (const file* bt = bs.ctx.targets.find<file> (
+      if (const target* bt = bs.ctx.targets.find (
             tt,
             pd,
             dir_path (), // Always in the out tree.
             mf,
             nullopt,     // Use default extension.
             trace))
-        return *bt;
+        return pair<target&, ulock> (const_cast<target&> (*bt), ulock ());
 
       prerequisites ps;
       ps.push_back (prerequisite (mt));
@@ -5886,16 +6764,22 @@ namespace build2
       //
       // Note: lt is matched and so the group is resolved.
       //
-      ps.push_back (prerequisite (lt));
-      for (prerequisite_member p: group_prerequisite_members (a, lt))
+      if (lt != nullptr)
       {
-        if (include (a, lt, p) != include_type::normal) // Excluded/ad hoc.
-          continue;
-
-        if (p.is_a<libx> () ||
-            p.is_a<liba> () || p.is_a<libs> () || p.is_a<libux> ())
+        ps.push_back (prerequisite (*lt));
+        for (prerequisite_member p: group_prerequisite_members (a, *lt))
         {
-          ps.push_back (p.as_prerequisite ());
+          // Ignore update=match.
+          //
+          lookup l;
+          if (include (a, *lt, p, &l) != include_type::normal) // Excluded/ad hoc.
+            continue;
+
+          if (p.is_a<libx> () ||
+              p.is_a<liba> () || p.is_a<libs> () || p.is_a<libux> ())
+          {
+            ps.push_back (p.as_prerequisite ());
+          }
         }
       }
 
@@ -5906,23 +6790,24 @@ namespace build2
                 move (mf),
                 nullopt,     // Use default extension.
                 target_decl::implied,
-                trace));
-      file& bt (static_cast<file&> (p.first));
+                trace,
+                true /* skip_find */));
 
       // Note that this is racy and someone might have created this target
       // while we were preparing the prerequisite list.
       //
       if (p.second)
       {
-        bt.prerequisites (move (ps));
+        p.first.prerequisites (move (ps));
 
         // Unless this is a binless library, we don't need the object file
         // (see config_data::b_binless for details).
         //
-        bt.vars.assign (b_binless) = (lt.mtime () == timestamp_unreal);
+        p.first.vars.assign (b_binless) = (lt == nullptr ||
+                                           lt->mtime () == timestamp_unreal);
       }
 
-      return bt;
+      return p;
     }
 
     // Synthesize a dependency for building a header unit binary interface on
@@ -6038,7 +6923,9 @@ namespace build2
               //
               process_libraries (a, bs, nullopt, sys_lib_dirs,
                                  *f, la, 0, // lflags unused.
-                                 imp, lib, nullptr, true /* self */,
+                                 imp, lib, nullptr,
+                                 true /* self */,
+                                 false /* proc_opt_group */,
                                  &lib_cache);
 
               if (lt != nullptr)
@@ -6123,7 +7010,10 @@ namespace build2
         //
         for (prerequisite_member p: group_prerequisite_members (a, t))
         {
-          if (include (a, t, p) != include_type::normal) // Excluded/ad hoc.
+          // Ignore update=match.
+          //
+          lookup l;
+          if (include (a, t, p, &l) != include_type::normal) // Excluded/ad hoc.
             continue;
 
           if (p.is_a<libx> () ||
@@ -6141,8 +7031,9 @@ namespace build2
                 move (mf),
                 nullopt,     // Use default extension.
                 target_decl::implied,
-                trace));
-      file& bt (static_cast<file&> (p.first));
+                trace,
+                true /* skip_find */));
+      file& bt (p.first.as<file> ());
 
       // Note that this is racy and someone might have created this target
       // while we were preparing the prerequisite list.
@@ -6180,7 +7071,7 @@ namespace build2
     // Filter cl.exe noise (msvc.cxx).
     //
     void
-    msvc_filter_cl (ifdstream&, const path& src);
+    msvc_filter_cl (diag_buffer&, const path& src);
 
     // Append header unit-related options.
     //
@@ -6231,7 +7122,7 @@ namespace build2
     // options).
     //
     void compile_rule::
-    append_module_options (environment& env,
+    append_module_options (environment&,
                            cstrings& args,
                            small_vector<string, 2>& stor,
                            action a,
@@ -6242,8 +7133,6 @@ namespace build2
       unit_type ut (md.type);
       const module_positions& ms (md.modules);
 
-      dir_path stdifc; // See the VC case below.
-
       switch (ctype)
       {
       case compiler_type::gcc:
@@ -6272,15 +7161,12 @@ namespace build2
           if (ms.start == 0)
             return;
 
-          // Clang embeds module file references so we only need to specify
-          // our direct imports.
-          //
-          // If/when we get the ability to specify the mapping in a file, we
-          // will pass the whole list.
+          // If/when we get the ability to specify the mapping in a file.
           //
 #if 0
           // In Clang the module implementation's unit .pcm is special and
-          // must be "loaded".
+          // must be "loaded". Note: not anymore, not from Clang 16 and is
+          // deprecated in 17.
           //
           if (ut == unit_type::module_impl)
           {
@@ -6297,10 +7183,7 @@ namespace build2
           stor.push_back (move (s));
 #else
           auto& pts (t.prerequisite_targets[a]);
-          for (size_t i (ms.start),
-                 n (ms.copied != 0 ? ms.copied : pts.size ());
-               i != n;
-               ++i)
+          for (size_t i (ms.start), n (pts.size ()); i != n; ++i)
           {
             const target* pt (pts[i]);
 
@@ -6313,17 +7196,9 @@ namespace build2
             const file& f (pt->as<file> ());
             string s (relative (f.path ()).string ());
 
-            // In Clang the module implementation's unit .pcm is special and
-            // must be "loaded".
-            //
-            if (ut == unit_type::module_impl && i == ms.start)
-              s.insert (0, "-fmodule-file=");
-            else
-            {
-              s.insert (0, 1, '=');
-              s.insert (0, cast<string> (f.state[a].vars[c_module_name]));
-              s.insert (0, "-fmodule-file=");
-            }
+            s.insert (0, 1, '=');
+            s.insert (0, cast<string> (f.state[a].vars[c_module_name]));
+            s.insert (0, "-fmodule-file=");
 
             stor.push_back (move (s));
           }
@@ -6335,10 +7210,11 @@ namespace build2
           if (ms.start == 0)
             return;
 
+          // MSVC requires a transitive set of interfaces, including
+          // implementation partitions.
+          //
           auto& pts (t.prerequisite_targets[a]);
-          for (size_t i (ms.start), n (pts.size ());
-               i != n;
-               ++i)
+          for (size_t i (ms.start), n (pts.size ()); i != n; ++i)
           {
             const target* pt (pts[i]);
 
@@ -6349,34 +7225,14 @@ namespace build2
             // of these are bmi's.
             //
             const file& f (pt->as<file> ());
+            string s (relative (f.path ()).string ());
 
-            // In VC std.* modules can only come from a single directory
-            // specified with the IFCPATH environment variable or the
-            // /module:stdIfcDir option.
-            //
-            if (std_module (cast<string> (f.state[a].vars[c_module_name])))
-            {
-              dir_path d (f.path ().directory ());
+            s.insert (0, 1, '=');
+            s.insert (0, cast<string> (f.state[a].vars[c_module_name]));
 
-              if (stdifc.empty ())
-              {
-                // Go one directory up since /module:stdIfcDir will look in
-                // either Release or Debug subdirectories. Keeping the result
-                // absolute feels right.
-                //
-                stor.push_back ("/module:stdIfcDir");
-                stor.push_back (d.directory ().string ());
-                stdifc = move (d);
-              }
-              else if (d != stdifc) // Absolute and normalized.
-                fail << "multiple std.* modules in different directories";
-            }
-            else
-            {
-              stor.push_back ("/module:reference");
-              stor.push_back (relative (f.path ()).string ());
-            }
+            stor.push_back (move (s));
           }
+
           break;
         }
       case compiler_type::icc:
@@ -6387,35 +7243,20 @@ namespace build2
       // into storage? Because of potential reallocations.
       //
       for (const string& a: stor)
-        args.push_back (a.c_str ());
-
-      if (getenv ("IFCPATH"))
-      {
-        // VC's IFCPATH takes precedence over /module:stdIfcDir so unset it if
-        // we are using our own std modules. Note: IFCPATH saved in guess.cxx.
-        //
-        if (!stdifc.empty ())
-          env.push_back ("IFCPATH");
-      }
-      else if (stdifc.empty ())
       {
-        // Add the VC's default directory (should be only one).
-        //
-        if (sys_mod_dirs != nullptr && !sys_mod_dirs->empty ())
-        {
-          args.push_back ("/module:stdIfcDir");
-          args.push_back (sys_mod_dirs->front ().string ().c_str ());
-        }
+        if (ctype == compiler_type::msvc)
+          args.push_back ("/reference");
+
+        args.push_back (a.c_str ());
       }
     }
 
     target_state compile_rule::
-    perform_update (action a, const target& xt) const
+    perform_update (action a, const target& xt, match_data& md) const
     {
       const file& t (xt.as<file> ());
       const path& tp (t.path ());
 
-      match_data md (move (t.data<match_data> ()));
       unit_type ut (md.type);
 
       context& ctx (t.ctx);
@@ -6479,7 +7320,8 @@ namespace build2
       // If we are building a module interface or partition, then the target
       // is bmi*{} and it may have an ad hoc obj*{} member. For header units
       // there is no obj*{} (see the corresponding add_adhoc_member() call in
-      // apply()).
+      // apply()). For named modules there may be no obj*{} if this is a
+      // sidebuild (obj*{} is already in the library binary).
       //
       path relm;
       path relo;
@@ -6527,9 +7369,6 @@ namespace build2
       small_vector<string, 2> header_args; // Header unit options storage.
       small_vector<string, 2> module_args; // Module options storage.
 
-      size_t out_i (0);  // Index of the -o option.
-      size_t lang_n (0); // Number of lang options.
-
       switch (cclass)
       {
       case compiler_class::msvc:
@@ -6549,14 +7388,20 @@ namespace build2
           if (md.pp != preprocessed::all)
             append_sys_hdr_options (args); // Extra system header dirs (last).
 
+          // Note: could be overridden in mode.
+          //
+          append_diag_color_options (args);
+
           // Set source/execution charsets to UTF-8 unless a custom charset
           // is specified.
           //
           // Note that clang-cl supports /utf-8 and /*-charset.
           //
           {
-            bool sc (find_option_prefix ("/source-charset:", args));
-            bool ec (find_option_prefix ("/execution-charset:", args));
+            bool sc (find_option_prefixes (
+                       {"/source-charset:", "-source-charset:"}, args));
+            bool ec (find_option_prefixes (
+                       {"/execution-charset:", "-execution-charset:"}, args));
 
             if (!sc && !ec)
               args.push_back ("/utf-8");
@@ -6575,8 +7420,8 @@ namespace build2
           //
           if (cvariant != "clang" && isystem (*this))
           {
-            if (find_option_prefix ("/external:I", args) &&
-                !find_option_prefix ("/external:W", args))
+            if (find_option_prefixes ({"/external:I", "-external:I"}, args) &&
+                !find_option_prefixes ({"/external:W", "-external:W"}, args))
               args.push_back ("/external:W0");
           }
 
@@ -6590,7 +7435,9 @@ namespace build2
           // For C looks like no /EH* (exceptions supported but no C++ objects
           // destroyed) is a reasonable default.
           //
-          if (x_lang == lang::cxx && !find_option_prefix ("/EH", args))
+
+          if (x_lang == lang::cxx &&
+              !find_option_prefixes ({"/EH", "-EH"}, args))
             args.push_back ("/EHsc");
 
           // The runtime is a bit more interesting. At first it may seem like
@@ -6612,7 +7459,7 @@ namespace build2
           // unreasonable thing to do). So by default we will always use the
           // release runtime.
           //
-          if (!find_option_prefixes ({"/MD", "/MT"}, args))
+          if (!find_option_prefixes ({"/MD", "/MT", "-MD", "-MT"}, args))
             args.push_back ("/MD");
 
           msvc_sanitize_cl (args);
@@ -6635,9 +7482,8 @@ namespace build2
           // Note also that what we are doing here appears to be incompatible
           // with PCH (/Y* options) and /Gm (minimal rebuild).
           //
-          // @@ MOD: TODO deal with absent relo.
-          //
-          if (find_options ({"/Zi", "/ZI"}, args))
+          if (!relo.empty () &&
+              find_options ({"/Zi", "/ZI", "-Zi", "-ZI"}, args))
           {
             if (fc)
               args.push_back ("/Fd:");
@@ -6650,27 +7496,38 @@ namespace build2
             args.push_back (out1.c_str ());
           }
 
-          if (fc)
-          {
-            args.push_back ("/Fo:");
-            args.push_back (relo.string ().c_str ());
-          }
-          else
+          if (ut == unit_type::module_intf      ||
+              ut == unit_type::module_intf_part ||
+              ut == unit_type::module_impl_part ||
+              ut == unit_type::module_header)
           {
-            out = "/Fo" + relo.string ();
-            args.push_back (out.c_str ());
-          }
+            assert (ut != unit_type::module_header); // @@ MODHDR
 
-          // @@ MODHDR MSVC
-          // @@ MODPART MSVC
-          //
-          if (ut == unit_type::module_intf)
-          {
             relm = relative (tp);
 
-            args.push_back ("/module:interface");
-            args.push_back ("/module:output");
+            args.push_back ("/ifcOutput");
             args.push_back (relm.string ().c_str ());
+
+            if (relo.empty ())
+              args.push_back ("/ifcOnly");
+            else
+            {
+              args.push_back ("/Fo:");
+              args.push_back (relo.string ().c_str ());
+            }
+          }
+          else
+          {
+            if (fc)
+            {
+              args.push_back ("/Fo:");
+              args.push_back (relo.string ().c_str ());
+            }
+            else
+            {
+              out = "/Fo" + relo.string ();
+              args.push_back (out.c_str ());
+            }
           }
 
           // Note: no way to indicate that the source if already preprocessed.
@@ -6685,9 +7542,53 @@ namespace build2
         {
           append_options (args, cmode);
 
+          // Clang 15 introduced the unqualified-std-cast-call warning which
+          // warns about unqualified calls to std::move() and std::forward()
+          // (because they can be "hijacked" via ADL). Surprisingly, this
+          // warning is enabled by default, as opposed to with -Wextra or at
+          // least -Wall. It has also proven to be quite disruptive, causing a
+          // large number of warnings in a large number of packages. So we are
+          // going to "remap" it to -Wextra for now and in the future may
+          // "relax" it to -Wall and potentially to being enabled by default.
+          // See GitHub issue #259 for background and details.
+          //
+          if (x_lang == lang::cxx           &&
+              ctype == compiler_type::clang &&
+              cmaj >= 15)
+          {
+            bool w (false);       // Seen -W[no-]unqualified-std-cast-call
+            optional<bool> extra; // Seen -W[no-]extra
+
+            for (const char* s: reverse_iterate (args))
+            {
+              if (s != nullptr)
+              {
+                if (strcmp (s, "-Wunqualified-std-cast-call") == 0 ||
+                    strcmp (s, "-Wno-unqualified-std-cast-call") == 0)
+                {
+                  w = true;
+                  break;
+                }
+
+                if (!extra) // Last seen option wins.
+                {
+                  if      (strcmp (s, "-Wextra") == 0) extra = true;
+                  else if (strcmp (s, "-Wno-extra") == 0) extra = false;
+                }
+              }
+            }
+
+            if (!w && (!extra || !*extra))
+              args.push_back ("-Wno-unqualified-std-cast-call");
+          }
+
           if (md.pp != preprocessed::all)
             append_sys_hdr_options (args); // Extra system header dirs (last).
 
+          // Note: could be overridden in mode.
+          //
+          append_diag_color_options (args);
+
           // Set the input charset to UTF-8 unless a custom one is specified.
           //
           // Note that the execution charset (-fexec-charset) is UTF-8 by
@@ -6741,8 +7642,7 @@ namespace build2
                 // either -nostdlib or -nostartfiles is specified. Let's do
                 // the same.
                 //
-                initializer_list<const char*> os {"-nostdlib", "-nostartfiles"};
-                if (!find_options (os, cmode) && !find_options (os, args))
+                if (!find_options ({"-nostdlib", "-nostartfiles"}, args))
                 {
                   args.push_back ("-D_MT");
                   args.push_back ("-D_DLL");
@@ -6804,10 +7704,6 @@ namespace build2
           append_header_options (env, args, header_args, a, t, md, md.dd);
           append_module_options (env, args, module_args, a, t, md, md.dd);
 
-          // Note: the order of the following options is relied upon below.
-          //
-          out_i = args.size (); // Index of the -o option.
-
           if (ut == unit_type::module_intf      ||
               ut == unit_type::module_intf_part ||
               ut == unit_type::module_impl_part ||
@@ -6846,21 +7742,35 @@ namespace build2
               }
             case compiler_type::clang:
               {
-                // @@ MOD TODO: deal with absent relo.
+                assert (ut != unit_type::module_header); // @@ MODHDR
 
                 relm = relative (tp);
 
-                args.push_back ("-o");
-                args.push_back (relm.string ().c_str ());
-                args.push_back ("--precompile");
-
                 // Without this option Clang's .pcm will reference source
-                // files.  In our case this file may be transient (.ii). Plus,
+                // files. In our case this file may be transient (.ii). Plus,
                 // it won't play nice with distributed compilation.
                 //
+                // Note that this sort of appears to be the default from Clang
+                // 17, but not quite, see llvm-project issued #72383.
+                //
                 args.push_back ("-Xclang");
                 args.push_back ("-fmodules-embed-all-files");
 
+                if (relo.empty ())
+                {
+                  args.push_back ("-o");
+                  args.push_back (relm.string ().c_str ());
+                  args.push_back ("--precompile");
+                }
+                else
+                {
+                  out1 = "-fmodule-output=" + relm.string ();
+                  args.push_back (out1.c_str ());
+                  args.push_back ("-o");
+                  args.push_back (relo.string ().c_str ());
+                  args.push_back ("-c");
+                }
+
                 break;
               }
             case compiler_type::msvc:
@@ -6875,7 +7785,7 @@ namespace build2
             args.push_back ("-c");
           }
 
-          lang_n = append_lang_options (args, md);
+          append_lang_options (args, md);
 
           if (md.pp == preprocessed::all)
           {
@@ -6920,23 +7830,44 @@ namespace build2
       if (!env.empty ())
         env.push_back (nullptr);
 
+      // We have no choice but to serialize early if we want the command line
+      // printed shortly before actually executing the compiler. Failed that,
+      // it may look like we are still executing in parallel.
+      //
+      scheduler::alloc_guard jobs_ag;
+      if (!ctx.dry_run && cast_false<bool> (t[c_serialize]))
+        jobs_ag = scheduler::alloc_guard (*ctx.sched, phase_unlock (nullptr));
+
       // With verbosity level 2 print the command line as if we are compiling
       // the source file, not its preprocessed version (so that it's easy to
       // copy and re-run, etc). Only at level 3 and above print the real deal.
       //
+      // @@ TODO: why don't we print env (here and/or below)? Also link rule.
+      //
       if (verb == 1)
-        text << x_name << ' ' << s;
+      {
+        const char* name (x_assembler_cpp (s) ? "as-cpp"   :
+                          x_objective (s)     ? x_obj_name :
+                          x_name);
+
+        print_diag (name, s, t);
+      }
       else if (verb == 2)
         print_process (args);
 
       // If we have the (partially) preprocessed output, switch to that.
       //
-      bool psrc (md.psrc);
+      // But we remember the original source/position to restore later.
+      //
+      bool psrc (md.psrc); // Note: false if cc.reprocess.
       bool ptmp (psrc && md.psrc.temporary);
+      pair<size_t, const char*> osrc;
       if (psrc)
       {
         args.pop_back (); // nullptr
+        osrc.second = args.back ();
         args.pop_back (); // sp
+        osrc.first = args.size ();
 
         sp = &md.psrc.path ();
 
@@ -6946,25 +7877,40 @@ namespace build2
         {
         case compiler_type::gcc:
           {
-            // The -fpreprocessed is implied by .i/.ii. But not when compiling
-            // a header unit (there is no .hi/.hii).
+            // -fpreprocessed is implied by .i/.ii unless compiling a header
+            // unit (there is no .hi/.hii). Also, we would need to pop -x
+            // since it takes precedence over the extension, which would mess
+            // up our osrc logic. So in the end it feels like always passing
+            // explicit -fpreprocessed is the way to go.
             //
-            if (ut == unit_type::module_header)
-              args.push_back ("-fpreprocessed");
-            else
-              // Pop -x since it takes precedence over the extension.
-              //
-              // @@ I wonder why bother and not just add -fpreprocessed? Are
-              //    we trying to save an option or does something break?
-              //
-              for (; lang_n != 0; --lang_n)
-                args.pop_back ();
-
+            // Also note that similarly there is no .Si for .S files.
+            //
+            args.push_back ("-fpreprocessed");
             args.push_back ("-fdirectives-only");
             break;
           }
         case compiler_type::clang:
           {
+            // Clang 15 and later with -pedantic warns about GNU-style line
+            // markers that it wrote itself in the -frewrite-includes output
+            // (llvm-project issue 63284). So we suppress this warning unless
+            // compiling from source.
+            //
+            // In Apple Clang this warning/option are absent in 14.0.3 (which
+            // is said to be based on vanilla Clang 15.0.5) for some reason
+            // (let's hope it's because they patched it out rather than due to
+            // a misleading _LIBCPP_VERSION value).
+            //
+            if (ctype == compiler_type::clang &&
+                cmaj >= (cvariant != "apple" ? 15 : 16))
+            {
+              if (find_options ({"-pedantic",  "-pedantic-errors",
+                                 "-Wpedantic", "-Werror=pedantic"}, args))
+              {
+                args.push_back ("-Wno-gnu-line-marker");
+              }
+            }
+
             // Note that without -x Clang will treat .i/.ii as fully
             // preprocessed.
             //
@@ -7013,45 +7959,38 @@ namespace build2
           file_cache::read psrcr (psrc ? md.psrc.open () : file_cache::read ());
 
           // VC cl.exe sends diagnostics to stdout. It also prints the file
-          // name being compiled as the first line. So for cl.exe we redirect
-          // stdout to a pipe, filter that noise out, and send the rest to
-          // stderr.
+          // name being compiled as the first line. So for cl.exe we filter
+          // that noise out.
           //
-          // For other compilers redirect stdout to stderr, in case any of
-          // them tries to pull off something similar. For sane compilers this
-          // should be harmless.
+          // For other compilers also redirect stdout to stderr, in case any
+          // of them tries to pull off something similar. For sane compilers
+          // this should be harmless.
           //
           bool filter (ctype == compiler_type::msvc);
 
           process pr (cpath,
-                      args.data (),
-                      0, (filter ? -1 : 2), 2,
+                      args,
+                      0, 2, diag_buffer::pipe (ctx, filter /* force */),
                       nullptr, // CWD
                       env.empty () ? nullptr : env.data ());
 
-          if (filter)
-          {
-            try
-            {
-              ifdstream is (
-                move (pr.in_ofd), fdstream_mode::text, ifdstream::badbit);
+          diag_buffer dbuf (ctx, args[0], pr);
 
-              msvc_filter_cl (is, *sp);
+          if (filter)
+            msvc_filter_cl (dbuf, *sp);
 
-              // If anything remains in the stream, send it all to stderr.
-              // Note that the eof check is important: if the stream is at
-              // eof, this and all subsequent writes to the diagnostics stream
-              // will fail (and you won't see a thing).
-              //
-              if (is.peek () != ifdstream::traits_type::eof ())
-                diag_stream_lock () << is.rdbuf ();
+          dbuf.read ();
 
-              is.close ();
-            }
-            catch (const io_error&) {} // Assume exits with error.
+          // Restore the original source if we switched to preprocessed.
+          //
+          if (psrc)
+          {
+            args.resize (osrc.first);
+            args.push_back (osrc.second);
+            args.push_back (nullptr);
           }
 
-          run_finish (args, pr);
+          run_finish (dbuf, args, pr, 1 /* verbosity */);
         }
         catch (const process_error& e)
         {
@@ -7063,6 +8002,8 @@ namespace build2
           throw failed ();
         }
 
+        jobs_ag.deallocate ();
+
         if (md.deferred_failure)
           fail << "expected error exit status from " << x_lang << " compiler";
       }
@@ -7072,57 +8013,6 @@ namespace build2
       if (ptmp && verb >= 3)
         md.psrc.temporary = true;
 
-      // Clang's module compilation requires two separate compiler
-      // invocations.
-      //
-      // @@ MODPART: Clang (all of this is probably outdated).
-      //
-      if (ctype == compiler_type::clang && ut == unit_type::module_intf)
-      {
-        // Adjust the command line. First discard everything after -o then
-        // build the new "tail".
-        //
-        args.resize (out_i + 1);
-        args.push_back (relo.string ().c_str ()); // Produce .o.
-        args.push_back ("-c");                    // By compiling .pcm.
-        args.push_back ("-Wno-unused-command-line-argument");
-        args.push_back (relm.string ().c_str ());
-        args.push_back (nullptr);
-
-        if (verb >= 2)
-          print_process (args);
-
-        if (!ctx.dry_run)
-        {
-          // Remove the target file if this fails. If we don't do that, we
-          // will end up with a broken build that is up-to-date.
-          //
-          auto_rmfile rm (relm);
-
-          try
-          {
-            process pr (cpath,
-                        args.data (),
-                        0, 2, 2,
-                        nullptr, // CWD
-                        env.empty () ? nullptr : env.data ());
-
-            run_finish (args, pr);
-          }
-          catch (const process_error& e)
-          {
-            error << "unable to execute " << args[0] << ": " << e;
-
-            if (e.child)
-              exit (1);
-
-            throw failed ();
-          }
-
-          rm.cancel ();
-        }
-      }
-
       timestamp now (system_clock::now ());
 
       if (!ctx.dry_run)
@@ -7138,25 +8028,27 @@ namespace build2
     }
 
     target_state compile_rule::
-    perform_clean (action a, const target& xt) const
+    perform_clean (action a, const target& xt, const target_type& srct) const
     {
       const file& t (xt.as<file> ());
 
+      // Preprocessed file extension.
+      //
+      const char* pext (x_assembler_cpp (srct) ? ".Si"      :
+                        x_objective (srct)     ? x_obj_pext :
+                        x_pext);
+
       // Compressed preprocessed file extension.
       //
-      auto cpext = [this, &t, s = string ()] () mutable -> const char*
-      {
-        return (s = t.ctx.fcache.compressed_extension (x_pext)).c_str ();
-      };
+      string cpext (t.ctx.fcache->compressed_extension (pext));
 
       clean_extras extras;
-
       switch (ctype)
       {
-      case compiler_type::gcc:   extras = {".d", x_pext, cpext (), ".t"};          break;
-      case compiler_type::clang: extras = {".d", x_pext, cpext ()};                break;
-      case compiler_type::msvc:  extras = {".d", x_pext, cpext (), ".idb", ".pdb"};break;
-      case compiler_type::icc:   extras = {".d"};                                  break;
+      case compiler_type::gcc:   extras = {".d", pext, cpext.c_str (), ".t"};           break;
+      case compiler_type::clang: extras = {".d", pext, cpext.c_str ()};                 break;
+      case compiler_type::msvc:  extras = {".d", pext, cpext.c_str (), ".idb", ".pdb"}; break;
+      case compiler_type::icc:   extras = {".d"};                                       break;
       }
 
       return perform_clean_extra (a, t, extras);