Add support for JSON compilation database generation and maintenance

See the "Compilation Database" section in the "cc Module" chapter of the manual for details.
author: Boris Kolpackov <boris@codesynthesis.com> 2024-08-28 09:36:16 +0200
committer: Boris Kolpackov <boris@codesynthesis.com> 2024-10-09 10:06:21 +0200
commit: eeb155ebc35c5947234f731c333e2bd71ea88974 (patch)
tree: d2784e072b1770b3d30587f97eb4b72b7ef3e765 /libbuild2
parent: 8384a087afc7e29e900a3ce96d55ab2f5c2a74c2 (diff)
13 files changed, 1964 insertions, 55 deletions
diff --git a/libbuild2/cc/compile-rule.cxx b/libbuild2/cc/compile-rule.cxx
index 99c3b90..29a26b5 100644
--- a/libbuild2/cc/compile-rule.cxx
+++ b/libbuild2/cc/compile-rule.cxx
@@ -25,6 +25,7 @@
 #include <libbuild2/cc/target.hxx>  // h
 #include <libbuild2/cc/module.hxx>
 #include <libbuild2/cc/utility.hxx>
+#include <libbuild2/cc/compiledb.hxx>
 
 using std::exit;
 using std::strlen;
@@ -1181,6 +1182,11 @@ namespace build2
           fsdir_rule::perform_update_direct (a, *dir);
         }
 
+        // Use the subset of the depdb checks to detect changes to the
+        // compilation database entry.
+        //
+        bool compiledb_changed (false);
+
         // Note: the leading '@' is reserved for the module map prefix (see
         // extract_modules()) and no other line must start with it.
         //
@@ -1198,8 +1204,14 @@ namespace build2
         // but only in what it targets, then the checksum will still change.
         //
         if (dd.expect (cast<string> (rs[x_checksum])) != nullptr)
+        {
           l4 ([&]{trace << "compiler mismatch forcing update of " << t;});
 
+          // The checksum includes the absolute compiler path.
+          //
+          compiledb_changed = true;
+        }
+
         // Then the compiler environment checksum.
         //
         if (dd.expect (env_checksum) != nullptr)
@@ -1263,7 +1275,17 @@ namespace build2
             append_sys_hdr_options (cs); // Extra system header dirs (last).
 
           if (dd.expect (cs.string ()) != nullptr)
+          {
             l4 ([&]{trace << "options mismatch forcing update of " << t;});
+
+            // Note that this doesn't include any of the "plumbing" options
+            // like -x, -c, -o, etc. In the unlikely event that there are
+            // changes in this area that also affect the semantics of the
+            // compilation database (options reordering doesn't, for example),
+            // then we can resort to incrementing the rule version.
+            //
+            compiledb_changed = true;
+          }
         }
 
         // Finally the source file.
@@ -1273,7 +1295,10 @@ namespace build2
           assert (!p.empty ()); // Sanity check.
 
           if (dd.expect (p) != nullptr)
+          {
             l4 ([&]{trace << "source file mismatch forcing update of " << t;});
+            compiledb_changed = true;
+          }
         }
 
         // If any of the above checks resulted in a mismatch (different
@@ -1296,6 +1321,14 @@ namespace build2
           u = dd.mtime > mt;
         }
 
+        // Confirm the entry in the compilation database, if any.
+        //
+        if (compiledb::match (bs, t, tp, src, compiledb_changed) && !u)
+        {
+          l4 ([&]{trace << "compilation database forcing update of " << t;});
+          u = true;
+        }
+
         // If updating for any of the above reasons, treat it as if doesn't
         // exist.
         //
@@ -7354,8 +7387,11 @@ namespace build2
       // apply()). For named modules there may be no obj*{} if this is a
       // sidebuild (obj*{} is already in the library binary).
       //
-      path relm;
+      const path* abso (nullptr);
+      const path* absm (nullptr);
       path relo;
+      path relm;
+
       switch (ut)
       {
       case unit_type::module_header:
@@ -7365,12 +7401,18 @@ namespace build2
       case unit_type::module_impl_part:
         {
           if (const file* o = find_adhoc_member<file> (t, tts.obj))
-            relo = relative (o->path ());
+          {
+            abso = &o->path ();
+            relo = relative (*abso);
+          }
 
           break;
         }
       default:
-        relo = relative (tp);
+        {
+          abso = &tp;
+          relo = relative (tp);
+        }
       }
 
       // Build the command line.
@@ -7400,6 +7442,9 @@ namespace build2
       small_vector<string, 2> header_args; // Header unit options storage.
       small_vector<string, 2> module_args; // Module options storage.
 
+      // NOTE: see a note in apply() on the compilation database implications
+      // if changing anything below.
+      //
       switch (cclass)
       {
       case compiler_class::msvc:
@@ -7534,6 +7579,7 @@ namespace build2
           {
             assert (ut != unit_type::module_header); // @@ MODHDR
 
+            absm = &tp;
             relm = relative (tp);
 
             args.push_back ("/ifcOutput");
@@ -7747,6 +7793,9 @@ namespace build2
                 // Output module file is specified in the mapping file, the
                 // same as input.
                 //
+                // We set neither relm nor absm since they are not on the
+                // command line.
+                //
                 if (ut == unit_type::module_header) // No obj, -c implied.
                   break;
 
@@ -7775,6 +7824,7 @@ namespace build2
               {
                 assert (ut != unit_type::module_header); // @@ MODHDR
 
+                absm = &tp;
                 relm = relative (tp);
 
                 // Without this option Clang's .pcm will reference source
@@ -7886,6 +7936,15 @@ namespace build2
       else if (verb == 2)
         print_process (args);
 
+      // Insert or update the entry in the compilation database, if any.
+      //
+      compiledb::execute (
+        bs,
+        t, tp, s, *sp,
+        cpath, args,
+        relo, abso != nullptr ? *abso : empty_path,
+        relm, absm != nullptr ? *absm : empty_path);
+
       // If we have the (partially) preprocessed output, switch to that.
       //
       // But we remember the original source/position to restore later.
diff --git a/libbuild2/cc/compiledb.cxx b/libbuild2/cc/compiledb.cxx
new file mode 100644
index 0000000..7414eb2
--- /dev/null
+++ b/libbuild2/cc/compiledb.cxx
@@ -0,0 +1,1099 @@
+// file      : libbuild2/cc/compiledb.cxx -*- C++ -*-
+// license   : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/compiledb.hxx>
+
+#include <cstring>  // strlen()
+#include <iostream> // cout
+
+#ifndef BUILD2_BOOTSTRAP
+#  include <libbutl/json/parser.hxx>
+#endif
+
+#include <libbuild2/filesystem.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/cc/module.hxx>
+
+#include <libbuild2/cc/target.hxx>
+#include <libbuild2/bin/target.hxx>
+
+using namespace std;
+
+namespace build2
+{
+  namespace cc
+  {
+    compiledb_set compiledbs;
+
+    // compiledb
+    //
+    compiledb::
+    ~compiledb ()
+    {
+    }
+
+    // Return true if this entry should be written to the database with the
+    // specified name.
+    //
+    static bool
+    filter (const scope& rs,
+            const core_module& m,
+            const string& name,
+            const file& ot, const file& it)
+    {
+      tracer trace ("cc::compiledb_filter");
+
+      bool r (true);
+      const char* w (nullptr); // Why r is false.
+
+      // First check if writing to this database is enabled.
+      //
+      // No filter means not enabled.
+      //
+      if (m.cdb_filter_ == nullptr)
+      {
+        r = false;
+        w = "no database name filter";
+      }
+      else
+      {
+        // Iterate in reverse (so that later values override earlier) and take
+        // the first name match.
+        //
+        r = false;
+        for (const pair<optional<string>, bool>& p:
+               reverse_iterate (*m.cdb_filter_))
+        {
+          if (!p.first || *p.first == name)
+          {
+            r = p.second;
+            break;
+          }
+        }
+
+        if (!r)
+          w = "no match in database name filter";
+      }
+
+      // Verify the name is known in this amalgamation. Note that without
+      // this check we may end up writing to unrelated databases in other
+      // amalgamations (think linked configurations).
+      //
+      if (r)
+      {
+        r = false;
+        for (const core_module* pm (&m);
+             pm != nullptr;
+             pm = pm->outer_module_)
+        {
+          const strings& ns (pm->cdb_names_);
+
+          if (find (ns.begin (), ns.end (), name) != ns.end ())
+          {
+            r = true;
+            break;
+          }
+        }
+
+        if (!r)
+          w = "database name unknown in amalgamation";
+      }
+
+      // Filter based on the output target.
+      //
+      // If there is no filter specified, then accept all targets.
+      //
+      if (r && m.cdb_filter_output_ != nullptr)
+      {
+        // If the filter is empty, then there is no match.
+        //
+        if (m.cdb_filter_output_->empty ())
+        {
+          r = false;
+          w = "empty output target type filter";
+        }
+        else
+        {
+          const target_type& ott (ot.type ());
+
+          // Iterate in reverse (so that later values override earlier) and
+          // take the first name match.
+          //
+          r = false;
+          for (const pair<optional<string>, string>& p:
+                 reverse_iterate (*m.cdb_filter_output_))
+          {
+            if (p.first && *p.first != name)
+              continue;
+
+            using namespace bin;
+
+            const string& n (p.second);
+
+            if (ott.name == n || n == "target")
+            {
+              r = true;
+            }
+            //
+            // Handle obj/bmi/hbmi{} groups ad hoc.
+            //
+            else if (n == "obj")
+            {
+              r = ott.is_a<obje> () || ott.is_a<objs> () || ott.is_a<obja> ();
+            }
+            else if (n == "bmi")
+            {
+              r = ott.is_a<bmie> () || ott.is_a<bmis> () || ott.is_a<bmia> ();
+            }
+            else if (n == "hbmi")
+            {
+              r = ott.is_a<hbmie> () || ott.is_a<hbmis> () || ott.is_a<hbmia> ();
+            }
+            else
+            {
+              // Handle the commonly-used, well-known targets directly (see
+              // note in core_config_init() for why we cannot pre-lookup
+              // them).
+              //
+              const target_type* tt (
+                n == "obje"   ? &obje::static_type :
+                n == "objs"   ? &objs::static_type :
+                n == "obja"   ? &obja::static_type :
+                n == "bmie"   ? &bmie::static_type :
+                n == "bmis"   ? &bmis::static_type :
+                n == "bmia"   ? &bmia::static_type :
+                n == "hbmie"  ? &hbmie::static_type :
+                n == "hbmis"  ? &hbmis::static_type :
+                n == "hbmia"  ? &hbmia::static_type :
+                rs.find_target_type (n));
+
+              if (tt == nullptr)
+                fail << "unknown target type '" << n << "' in "
+                     << "config.cc.compiledb.filter.output value";
+
+              r = ott.is_a (*tt);
+            }
+
+            if (r)
+              break;
+          }
+
+          if (!r)
+            w = "no match in output target type filter";
+        }
+      }
+
+      // Filter based on the input target.
+      //
+      // If there is no filter specified, then accept all targets.
+      //
+      if (r && m.cdb_filter_input_ != nullptr)
+      {
+        // If the filter is empty, then there is no match.
+        //
+        if (m.cdb_filter_input_->empty ())
+        {
+          r = false;
+          w = "empty input target type filter";
+        }
+        else
+        {
+          const target_type& itt (it.type ());
+
+          // Iterate in reverse (so that later values override earlier) and
+          // take the first name match.
+          //
+          r = false;
+          for (const pair<optional<string>, string>& p:
+                 reverse_iterate (*m.cdb_filter_input_))
+          {
+            if (p.first && *p.first != name)
+              continue;
+
+            const string& n (p.second);
+
+            if (itt.name == n || n == "target")
+              r = true;
+            else
+            {
+              // The same optimization as above. Note: cxx{}, etc., are in the
+              // cxx module so we have to look them up.
+              //
+              const target_type* tt (
+                n == "c" ? &c::static_type :
+                n == "m" ? &m::static_type :
+                n == "S" ? &m::static_type :
+                rs.find_target_type (n));
+
+              if (tt == nullptr)
+                fail << "unknown target type '" << n << "' in "
+                     << "config.cc.compiledb.filter.input value";
+
+              r = itt.is_a (*tt);
+            }
+
+            if (r)
+              break;
+          }
+
+          if (!r)
+            w = "no match in input target type filter";
+        }
+      }
+
+      l6 ([&]
+          {
+            if (r)
+              trace << "keep " << ot << " in " << name;
+            else
+              trace << "omit " << ot << " from " << name << ": " << w;
+          });
+
+      return r;
+    }
+
+    bool compiledb::
+    match (const scope& bs,
+           const file& ot, const path_type& op,
+           const file& it,
+           bool changed)
+    {
+      if (compiledbs.empty ())
+        return false;
+
+      const scope& rs (*bs.root_scope ());
+      const auto* m (rs.find_module<core_module> (core_module::name));
+
+      assert (m != nullptr);
+
+      bool u (false);
+
+      for (const unique_ptr<compiledb>& db: compiledbs)
+      {
+        if (filter (rs, *m, db->name, ot, it))
+          u = db->match (ot, op, changed) || u;
+      }
+
+      return u;
+    }
+
+    void compiledb::
+    execute (const scope& bs,
+             const file& ot, const path_type& op,
+             const file& it, const path_type& ip,
+             const process_path& cpath, const cstrings& args,
+             const path_type& relo, const path_type& abso,
+             const path_type& relm, const path_type& absm)
+    {
+      if (compiledbs.empty ())
+        return;
+
+      const scope& rs (*bs.root_scope ());
+      const auto* m (rs.find_module<core_module> (core_module::name));
+
+      assert (m != nullptr);
+
+      assert (relo.empty () == abso.empty () &&
+              relm.empty () == absm.empty ());
+
+      for (const unique_ptr<compiledb>& db: compiledbs)
+      {
+        if (filter (rs, *m, db->name, ot, it))
+          db->execute (ot, op, it, ip, cpath, args, relo, abso, relm, absm);
+      }
+    }
+
+    void
+    compiledb_pre (context& ctx, action a, const action_targets&)
+    {
+      // Note: won't be registered if compiledbs is empty.
+
+      // Note: may be called directly with empty action_targets.
+
+      assert (a.inner_action () == perform_update_id);
+
+      tracer trace ("cc::compiledb_pre");
+
+      bool mctx (ctx.module_context == &ctx);
+
+      l6 ([&]{trace << (mctx ? "module" : "normal") << " context " << &ctx;});
+
+      for (const unique_ptr<compiledb>& db: compiledbs)
+        db->pre (ctx);
+    }
+
+    void
+    compiledb_post (context& ctx,
+                    action a,
+                    const action_targets& ts,
+                    bool failed)
+    {
+      // Note: won't be registered if compiledbs is empty.
+
+      assert (a.inner_action () == perform_update_id);
+
+      tracer trace ("cc::compiledb_post");
+
+      bool mctx (ctx.module_context == &ctx);
+
+      l6 ([&]{trace << (mctx ? "module" : "normal") << " context " << &ctx
+                    << ", failed: " << failed;});
+
+      for (const unique_ptr<compiledb>& db: compiledbs)
+        db->post (ctx, ts, failed);
+    }
+
+#ifndef BUILD2_BOOTSTRAP
+
+    namespace json = butl::json;
+
+    // compiledb_stdout
+    //
+    compiledb_stdout::
+    compiledb_stdout (string n)
+        : compiledb (move (n), path_type ()),
+          state_ (state::init),
+          nesting_ (0),
+          js_ (cout, 0 /* indentation */, "" /* multi_value_separator */)
+    {
+    }
+
+    void compiledb_stdout::
+    pre (context&)
+    {
+      // If the previous operation batch failed, then we shouldn't be here.
+      //
+      assert (state_ != state::failed);
+
+      // The module context (used to build build system modules) poses a
+      // problem: we can receive its callbacks before the main context's or
+      // nested in the pre/post calls of the main context (or both, in
+      // fact). Plus there may be multiple pre/post sequences corresponding to
+      // the module context of both kinds. The three distinct cases are:
+      //
+      // 1. Module is loaded as part of the initial buildfile load (e.g., from
+      //    root.build) -- in this case we will observe module pre/post before
+      //    the main context's pre/post.
+      //
+      //    In fact, to be precise, we will only observe them if cc is loaded
+      //    before such a module.
+      //
+      // 2. Module is loaded via the interrupting load (e.g., from a directory
+      //    buildfile that is loaded implicitly during match) -- in this case
+      //    we will observe pre/post calls nested into the main context's
+      //    pre/post.
+      //
+      // 3. The module context is used to build an ad hoc C++ recipe -- in
+      //    this case we also get nested calls like in (2) since this happens
+      //    during the recipe's match().
+      //
+      // One thing to keep in mind (and which we rely upon quite a bit below)
+      // is that the main context's post will always be last (within any given
+      // operation; there could be another for the subsequent operation in a
+      // batch).
+      //
+      // Handling the nested case is relatively straightforward: we can keep
+      // track and ignore all the nested calls.
+      //
+      // The before case is where things get complicated. We could "take" the
+      // first module pre call and then wait until the main post, unless we
+      // see a module post call with failed=true, in which case there will be
+      // no further pre/post calls. There is, however, a nuance: the module is
+      // loaded and build for any operation, not just update, which means that
+      // if the main operation is not update (say, it's clean), we won't see
+      // any of the main context's pre/post calls.
+      //
+      // The way we are going to resolve this problem is different for the
+      // stdout and file implementations:
+      //
+      // For stdout we will just say that it should only be used with the
+      // update operation. There is really no good reason to use it with
+      // anything else anyway. See compiledb_stdout::post() for additional
+      // details.
+      //
+      // For file we will rely on its persistence and simply close and reopen
+      // the database for each pre/post sequence, the same way as if they were
+      // separate operations in a batch.
+      //
+      if (nesting_++ != 0) // Nested pre() call.
+        return;
+
+      if (state_ == state::init) // First pre() call.
+      {
+        state_ = state::empty;
+        cout << "[\n";
+      }
+    }
+
+    bool compiledb_stdout::
+    match (const file&, const path_type&, bool)
+    {
+      return true;
+    }
+
+    static inline const char*
+    rel_to_abs (const char* a,
+                const string& rs, const string& as,
+                string& buf)
+    {
+      if (size_t rn = rs.size ())
+      {
+        size_t an (strlen (a));
+
+        if (an >= rn && rs.compare (0, rn, a, rn) == 0)
+        {
+          if (an == rn)
+            return as.c_str ();
+
+          buf = as;
+          buf.append (a + rn, an - rn);
+
+          return buf.c_str ();
+        }
+      }
+
+      return nullptr;
+    }
+
+    void compiledb_stdout::
+    execute (const file&, const path_type& op,
+             const file&, const path_type& ip,
+             const process_path& cpath, const cstrings& args,
+             const path_type& relo, const path_type& abso,
+             const path_type& relm, const path_type& absm)
+    {
+      const string& ro (relo.string ());
+      const string& ao (abso.string ());
+
+      const string& rm (relm.string ());
+      const string& am (absm.string ());
+
+      mlock l (mutex_);
+
+      switch (state_)
+      {
+      case state::full:
+        {
+          cout << ",\n";
+          break;
+        }
+      case state::empty:
+        {
+          state_ = state::full;
+          break;
+        }
+      case state::failed:
+        return;
+      case state::init:
+        assert (false);
+        return;
+      }
+
+      try
+      {
+        // Duplicate what we have in the file implementation (instead of
+        // factoring it out to something common) in case here we need to
+        // adjust things (change order, omit some values; for example to
+        // accommodate broken consumers). We have this freedom here but not
+        // there.
+        //
+        js_.begin_object ();
+        {
+          js_.member ("output", op.string ());
+          js_.member ("file", ip.string ());
+
+          js_.member_begin_array ("arguments");
+          {
+            string buf; // Reuse.
+            for (auto b (args.begin ()), i (b), e (args.end ());
+                 i != e && *i != nullptr;
+                 ++i)
+            {
+              const char* r;
+
+              if (i == b)
+                r = cpath.effect_string ();
+              else
+              {
+                // Untranslate relative paths back to absolute.
+                //
+                const char* a (*i);
+
+                if ((r = rel_to_abs (a, ro, ao, buf)) == nullptr &&
+                    (r = rel_to_abs (a, rm, am, buf)) == nullptr)
+                  r = a;
+              }
+
+              js_.value (r);
+            }
+          }
+          js_.end_array ();
+
+          js_.member ("directory", work.string ());
+        }
+        js_.end_object ();
+      }
+      catch (const json::invalid_json_output& e)
+      {
+        // There is no way (nor reason; the output will most likely be invalid
+        // anyway) to reuse the failed json serializer so make sure we ignore
+        // all the subsequent callbacks.
+        //
+        state_ = state::failed;
+
+        l.unlock ();
+
+        fail << "invalid compilation database json output: " << e;
+      }
+    }
+
+    void compiledb_stdout::
+    post (context& ctx, const action_targets&, bool failed)
+    {
+      assert (nesting_ != 0);
+      if (--nesting_ != 0) // Nested post() call.
+        return;
+
+      bool mctx (ctx.module_context == &ctx);
+
+      switch (state_)
+      {
+      case state::empty:
+      case state::full:
+        {
+          // If this is a module context's post, wait for the main context's
+          // post (last) unless the module load failed (in which case there
+          // will be no main pre/post).
+          //
+          // Note that there is no easy way to diagnose the case where we
+          // won't get the main pre/post calls. Instead, we will just produce
+          // invalid JSON (array won't be closed). In a somewhat hackish way,
+          // this actually makes the `b [-n] clean update` sequence work: we
+          // will take the pre() call from clean and the main post() from
+          // update.
+          //
+          if (mctx && !failed)
+            return;
+
+          if (state_ == state::full)
+            cout << '\n';
+
+          cout << "]\n";
+          break;
+        }
+      case state::failed:
+        return;
+      case state::init:
+        assert (false);
+      }
+
+      state_ = state::init;
+    }
+
+    // compiledb_file
+    //
+    compiledb_file::
+    compiledb_file (string n, path_type p)
+        : compiledb (move (n), move (p)),
+          state_ (state::closed),
+          nesting_ (0)
+    {
+    }
+
+    void compiledb_file::
+    pre (context&)
+    {
+      // If the previous operation batch failed, then we shouldn't be here.
+      //
+      assert (state_ != state::failed);
+
+      // See compiledb_stdout::pre() for background on dealing with the module
+      // context. Here are some file-specific nuances:
+      //
+      // We are going to load the database on the first pre call and flush
+      // (but not close) it on the matching post. Flushing means that we will
+      // update the file but still keep the in-memory state, in case there is
+      // another pre/post session coming. This is both a performance
+      // optimization but also the way we handle prunning no longer present
+      // entries, which gets tricky across multiple pre/post sessions (see
+      // post() for details).
+      //
+      if (nesting_++ != 0) // Nested pre() call.
+        return;
+
+      if (state_ == state::closed) // First pre() call.
+      {
+        // Load the contents of the file if it exists, marking all the entries
+        // as (presumed) absent.
+        //
+        if (exists (path))
+        {
+          uint64_t line (1);
+          try
+          {
+            ifdstream ifs (path, ifdstream::badbit);
+
+            // Parse the top-level array manually (see post() for the expected
+            // format).
+            //
+            auto throw_invalid_input = [] (const string& d)
+            {
+              throw json::invalid_json_input ("", 0, 1, 0, d);
+            };
+
+            enum {first, second, next, last, end} s (first);
+
+            for (string l; !eof (getline (ifs, l)); line++)
+            {
+              switch (s)
+              {
+              case first:
+                {
+                  if (l != "[")
+                    throw_invalid_input ("beginning of array expected");
+
+                  s = second;
+                  continue;
+                }
+              case second:
+                {
+                  if (l == "]")
+                  {
+                    s = end;
+                    continue;
+                  }
+
+                  s = next;
+                }
+                // Fall through.
+              case next:
+                {
+                  if (!l.empty () && l.back () == ',')
+                    l.pop_back ();
+                  else
+                    s = last;
+
+                  break;
+                }
+              case last:
+                {
+                  if (l != "]")
+                    throw_invalid_input ("end of array expected");
+
+                  s = end;
+                  continue;
+                }
+              case end:
+                {
+                  throw_invalid_input ("junk after end of array");
+                }
+              }
+
+              // Parse just the output target path, which must come first.
+              //
+              json::parser jp (l, "" /* name */);
+
+              jp.next_expect (json::event::begin_object);
+              string op (move (jp.next_expect_member_string ("output")));
+
+              auto r (db_.emplace (move (op), entry {entry_status::absent, l}));
+              if (!r.second)
+                throw_invalid_input (
+                  "duplicate output value '" + r.first->first + '\'');
+            }
+
+            if (s != end)
+              throw_invalid_input ("corrupt input text");
+          }
+          catch (const json::invalid_json_input& e)
+          {
+            location l (path, line, e.column);
+            fail (l) << "invalid compilation database json input: " << e;
+            state_ = state::failed;
+          }
+          catch (const io_error& e)
+          {
+            fail << "unable to read " << path << ": " << e;
+            state_ = state::failed;
+          }
+        }
+
+        absent_ = db_.size ();
+        changed_ = false;
+
+        state_ = state::open;
+      }
+    }
+
+    bool compiledb_file::
+    match (const file&, const path_type& op, bool changed)
+    {
+      mlock l (mutex_);
+
+      switch (state_)
+      {
+      case state::open:
+        break;
+      case state::failed:
+        return false;
+      case state::closed:
+        assert (false);
+        return false;
+      }
+
+      // Mark an existing entry as present or changed. And if one does not
+      // exist, then (for now) as missing.
+      //
+      auto i (db_.find (op.string ()));
+
+      if (i != db_.end ())
+      {
+        entry& e (i->second);
+
+        // Note: we can end up with present entries via the module context
+        // (see post() below). And we can see changed entries in a subsequent
+        // nested module context.
+        //
+        switch (e.status)
+        {
+        case entry_status::present:
+        case entry_status::changed:
+          assert (!changed);
+          break;
+        case entry_status::absent:
+          {
+            e.status = changed ? entry_status::changed : entry_status::present;
+
+            absent_--;
+            changed_ = changed_ || (e.status == entry_status::changed);
+            break;
+          }
+        case entry_status::missing:
+          assert (false);
+        }
+
+        return false;
+      }
+      else
+      {
+        db_.emplace (op.string (), entry {entry_status::missing, string ()});
+
+        changed_ = true;
+
+        return true;
+      }
+    }
+
+    void compiledb_file::
+    execute (const file&, const path_type& op,
+             const file&, const path_type& ip,
+             const process_path& cpath, const cstrings& args,
+             const path_type& relo, const path_type& abso,
+             const path_type& relm, const path_type& absm)
+    {
+      const string& ro (relo.string ());
+      const string& ao (abso.string ());
+
+      const string& rm (relm.string ());
+      const string& am (absm.string ());
+
+      mlock l (mutex_);
+
+      switch (state_)
+      {
+      case state::open:
+        break;
+      case state::failed:
+        return;
+      case state::closed:
+        assert (false);
+        return;
+      }
+
+      auto i (db_.find (op.string ()));
+
+      // We should have had the match() call before execute().
+      //
+      assert (i != db_.end () && i->second.status != entry_status::absent);
+
+      entry& e (i->second);
+
+      if (e.status == entry_status::present) // Present and unchanged.
+        return;
+
+      // The entry is either missing or changed.
+      //
+      try
+      {
+        e.json.clear ();
+        json::buffer_serializer js (e.json, 0 /* indentation */);
+
+        js.begin_object ();
+        {
+          js.member ("output", op.string ()); // Note: must come first.
+          js.member ("file", ip.string ());
+
+          js.member_begin_array ("arguments");
+          {
+            string buf; // Reuse.
+            for (auto b (args.begin ()), i (b), e (args.end ());
+                 i != e && *i != nullptr;
+                 ++i)
+            {
+              const char* r;
+
+              if (i == b)
+                r = cpath.effect_string ();
+              else
+              {
+                // Untranslate relative paths back to absolute.
+                //
+                const char* a (*i);
+
+                if ((r = rel_to_abs (a, ro, ao, buf)) == nullptr &&
+                    (r = rel_to_abs (a, rm, am, buf)) == nullptr)
+                  r = a;
+              }
+
+              js.value (r);
+            }
+          }
+          js.end_array ();
+
+          js.member ("directory", work.string ());
+        }
+        js.end_object ();
+      }
+      catch (const json::invalid_json_output& e)
+      {
+        // There is no way (nor reason; the output will most likely be invalid
+        // anyway) to reuse the failed json serializer so make sure we ignore
+        // all the subsequent callbacks.
+        //
+        state_ = state::failed;
+
+        l.unlock ();
+
+        fail << "invalid compilation database json output: " << e;
+      }
+
+      e.status = entry_status::changed;
+    }
+
+    void compiledb_file::
+    post (context& ctx, const action_targets& ts, bool failed)
+    {
+      assert (nesting_ != 0);
+      if (--nesting_ != 0) // Nested post() call.
+        return;
+
+      switch (state_)
+      {
+      case state::open:
+        break;
+      case state::failed:
+        return;
+      case state::closed:
+        assert (false);
+        return;
+      }
+
+      bool mctx (ctx.module_context == &ctx);
+
+      tracer trace ("cc::compiledb_file::post");
+
+      // See if we need to update the file.
+      //
+      if (changed_)
+        l6 ([&]{trace << "updating due to missing/changed entries: " << path;});
+
+      // Don't prune the stale entries if the operation failed since we may
+      // not have gotten to execute some of them.
+      //
+      // And if this is a module context's post, then also don't prune the
+      // stale entries, instead waiting for the main context's post (if there
+      // will be one; this means we will only prune on update).
+      //
+      // Actually, this pruning business is even trickier than that: if we
+      // are not updating the entire project (say, rather only a subdirectory
+      // or even a specific target), then we will naturally not get any
+      // match/execute calls for targets of this project that don't get pulled
+      // into this build. Which means that we cannot just prune entries that
+      // we did not match/execute. It feels the correct semantics is to only
+      // prune the entries if they are in a subdirectory of the dir{} targets
+      // which we are building.
+      //
+      // What do we do about the module context, where we always update a
+      // specific libs{}? We could use its directory instead but that may lead
+      // to undesirable results. For example, if there are unit tests in the
+      // same directory, we will end up dropping their entries. It feels like
+      // the correct approach is to just ignore module context's entries
+      // entirely. If someone wants to prune the compilation database of a
+      // module, they will just need to update it directly (i.e., via the main
+      // context). Note that we cannot apply the same "simplification" to the
+      // changed entries since we will only observe the change once.
+      //
+      bool absent (false);
+
+      if (!failed && !mctx && absent_ != 0)
+      {
+        // Pre-scan the entries and drop the appropriate absent ones.
+        //
+        for (auto i (db_.begin ()); i != db_.end (); )
+        {
+          const entry& e (i->second);
+
+          if (e.status == entry_status::absent)
+          {
+            // Absent entries should be rare enough during the normal
+            // development that we don't need to bother with caching the
+            // directories.
+            //
+            bool a (false);
+            for (const action_target& at: ts)
+            {
+              const target& t (at.as<target> ());
+              if (t.is_a<dir> ())
+              {
+                const string& p (i->first);
+                const string& d (t.out_dir ().string ());
+
+                if (path_traits::sub (p.c_str (), p.size (),
+                                      d.c_str (), d.size ()))
+                {
+                  // Remove this entry from the in-memory state so that it
+                  // matches the file state.
+                  //
+                  i = db_.erase (i);
+                  --absent_;
+                  a = absent = true;
+                  break;
+                }
+              }
+            }
+
+            if (a)
+              continue;
+          }
+
+          ++i;
+        }
+      }
+
+      if (absent)
+        l6 ([&]{trace << "updating due to absent entries: " << path;});
+
+      try
+      {
+        auto_rmfile rm;
+        ofdstream ofs;
+
+        bool u (changed_ || absent); // Update the file.
+
+        if (u)
+        {
+          rm = auto_rmfile (path);
+          ofs.open (path);
+
+          // We parse the top-level array manually (see pre() above) and the
+          // expected format is as follows:
+          //
+          // [
+          // {"output":...},
+          // ...
+          // {"output":...}
+          // ]
+          //
+          ofs.write ("[\n", 2);
+        }
+
+        // Iterate over the entries resetting their status and writing them to
+        // the file if necessary.
+        //
+        bool first (true);
+        for (auto& p: db_)
+        {
+          entry& e (p.second);
+
+          // First sort out the status also skipping appropriate entries.
+          //
+          switch (e.status)
+          {
+          case entry_status::absent:
+            {
+              // This is an absent entry that we should keep (see pre-scan
+              // above).
+              //
+              break;
+            }
+          case entry_status::missing:
+            {
+              // This should only happen if this operation has failed (see
+              // also below) or we are in the match-only mode.
+              //
+              assert (failed || ctx.match_only);
+              continue;
+            }
+          case entry_status::present:
+          case entry_status::changed:
+            {
+              // This is tricky: if this is a module context, then we don't
+              // want to mark the entries as absent since they will then get
+              // dropped by the main operation context.
+              //
+              if (mctx)
+                e.status = entry_status::present;
+              else
+              {
+                // Note: this is necessary for things to work across multiple
+                // operations in a batch.
+                //
+                e.status = entry_status::absent;
+                absent_++;
+              }
+            }
+          }
+
+          if (u)
+          {
+            if (first)
+              first = false;
+            else
+              ofs.write (",\n", 2);
+
+            ofs.write (e.json.c_str (), e.json.size ());
+          }
+        }
+
+        if (u)
+        {
+          ofs.write (first ? "]\n" : "\n]\n", first ? 2 : 3);
+
+          ofs.close ();
+          rm.cancel ();
+        }
+      }
+      catch (const io_error& e)
+      {
+        fail << "unable to write to " << path << ": " << e;
+        state_ = state::failed;
+        return;
+      }
+
+      // If this operation has failed, then our state may not be accurate
+      // (e.g., entries with missing status) but we also don't expect any
+      // further pre calls. Let's change out state to failed as a sanity
+      // check.
+      //
+      if (failed)
+        state_ = state::failed;
+      else
+        changed_ = false;
+
+      // Note: keep in the open state (see pre() for details).
+    }
+
+#endif // BUILD2_BOOTSTRAP
+  }
+}
diff --git a/libbuild2/cc/compiledb.hxx b/libbuild2/cc/compiledb.hxx
new file mode 100644
index 0000000..edfd1ee
--- /dev/null
+++ b/libbuild2/cc/compiledb.hxx
@@ -0,0 +1,226 @@
+// file      : libbuild2/cc/compiledb.hxx -*- C++ -*-
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_COMPILEDB_HXX
+#define LIBBUILD2_CC_COMPILEDB_HXX
+
+#include <unordered_map>
+
+#ifndef BUILD2_BOOTSTRAP
+#  include <libbutl/json/serializer.hxx>
+#endif
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/target.hxx>
+#include <libbuild2/action.hxx>
+#include <libbuild2/context.hxx>
+
+namespace build2
+{
+  namespace cc
+  {
+    using compiledb_name_filter = vector<pair<optional<string>, bool>>;
+    using compiledb_type_filter = vector<pair<optional<string>, string>>;
+
+    class compiledb
+    {
+    public:
+      // Match callback where we confirm an entry in the database and also
+      // signal whether it has changes (based on change tracking in depdb).
+      // Return true to force compilation of this target and thus make sure
+      // the below execute() is called (unless something before that failed).
+      //
+      // Besides noticing changes, this callback is also necessary to notice
+      // and delete entries that should no longer be in the database (e.g., a
+      // source file was removed from the project).
+      //
+      // Note that output is either obj*{}, bmi*{}, of hbmi*{}.
+      //
+      static bool
+      match (const scope& bs,
+             const file& output, const path& output_path,
+             const file& input,
+             bool changed);
+
+      // Execute callback where we insert or update an entry in the database.
+      //
+      // The {relo, abso}, and {relm, absm} pairs are used to "untranslate"
+      // relative paths to absolute. Specifically, any argument that has rel?
+      // as a prefix has this prefix replaced with the corresponding abs?.
+      // Note that this means we won't be able to handle old MSVC and
+      // clang-cl, which don't support the `/F?: <path>` form, only
+      // `/F?<path>`. Oh, well. Note also that either relo or relm (but not
+      // both) could be empty if unused.
+      //
+      // Note also that we assume the source file is always absolute and is
+      // the last argument.
+      //
+      static void
+      execute (const scope& bs,
+               const file& output, const path& output_path,
+               const file& input, const path& input_path,
+               const process_path& cpath, const cstrings& args,
+               const path& relo, const path& abso,
+               const path& relm, const path& absm);
+
+    public:
+      using path_type = build2::path;
+
+      string    name;
+      path_type path;
+
+      // The path is expected to be absolute and normalized or empty if the
+      // name is `-` (stdout).
+      //
+      compiledb (string n, path_type p)
+          : name (move (n)), path (move (p))
+      {
+      }
+
+      virtual void
+      pre (context&) = 0;
+
+      virtual bool
+      match (const file& output, const path_type& output_path,
+             bool changed) = 0;
+
+      virtual void
+      execute (const file& output, const path_type& output_path,
+               const file& input, const path_type& input_path,
+               const process_path& cpath, const cstrings& args,
+               const path_type& relo, const path_type& abso,
+               const path_type& relm, const path_type& absm) = 0;
+
+      virtual void
+      post (context&, const action_targets&, bool failed) = 0;
+
+      virtual
+      ~compiledb ();
+    };
+
+    using compiledb_set = vector<unique_ptr<compiledb>>;
+
+    // Populated by core_config_init() during serial load.
+    //
+    extern compiledb_set compiledbs;
+
+    // Context operation callbacks.
+    //
+    void
+    compiledb_pre (context&, action, const action_targets&);
+
+    void
+    compiledb_post (context&, action, const action_targets&, bool failed);
+
+#ifndef BUILD2_BOOTSTRAP
+
+    // Implementation that writes to stdout.
+    //
+    // Note that this implementation forces compilation of all the targets for
+    // which it is called to make sure their entries are in the database. So
+    // typically used in the dry run mode.
+    //
+    class compiledb_stdout: public compiledb
+    {
+    public:
+      // The path is expected to be empty.
+      //
+      explicit
+      compiledb_stdout (string name);
+
+      virtual void
+      pre (context&) override;
+
+      virtual bool
+      match (const file& output, const path_type& output_path,
+             bool changed) override;
+
+      virtual void
+      execute (const file& output, const path_type& output_path,
+               const file& input, const path_type& input_path,
+               const process_path& cpath, const cstrings& args,
+               const path_type& relo, const path_type& abso,
+               const path_type& relm, const path_type& absm) override;
+
+      virtual void
+      post (context&, const action_targets&, bool failed) override;
+
+    private:
+      mutex mutex_;
+      enum class state {init, empty, full, failed} state_;
+      size_t nesting_;
+      butl::json::stream_serializer js_;
+    };
+
+    // Implementation that maintains a file.
+    //
+    class compiledb_file: public compiledb
+    {
+    public:
+      compiledb_file (string name, path_type path);
+
+      virtual void
+      pre (context&) override;
+
+      virtual bool
+      match (const file& output, const path_type& output_path,
+             bool changed) override;
+
+      virtual void
+      execute (const file& output, const path_type& output_path,
+               const file& input, const path_type& input_path,
+               const process_path& cpath, const cstrings& args,
+               const path_type& relo, const path_type& abso,
+               const path_type& relm, const path_type& absm) override;
+
+      virtual void
+      post (context&, const action_targets&, bool failed) override;
+
+    private:
+      mutex mutex_;
+      enum class state {closed, open, failed} state_;
+      size_t nesting_;
+
+      // We want to optimize the performance for the incremental update case
+      // where only a few files will be recompiled and most of the time there
+      // will be no change in the command line, which means we won't need to
+      // rewrite the file.
+      //
+      // As a result, our in-memory representation is a hashmap (we could have
+      // thousands of entries) of absolute and normalized output file paths
+      // (stored as strings for lookup efficiency) to their serialized JSON
+      // text lines plus the status: absent, present, changed, or missing
+      // (entry should be there but is not). This way we don't waste
+      // (completely) parsing (and re-serializing) each line knowing that we
+      // won't need to touch most of them.
+      //
+      // In fact, we could have gone even further and used a sorted vector
+      // since insertions will be rare in this case. But we will need to
+      // lookup every entry on each update, so it's unclear this is a win.
+      //
+      enum class entry_status {absent, present, changed, missing};
+
+      struct entry
+      {
+        entry_status status;
+        string json;
+      };
+
+      using map_type = std::unordered_map<string, entry>;
+      map_type db_;
+
+      // Number/presence of various entries in the database (used to determine
+      // whether we need to update the file without iterating over all the
+      // entries).
+      //
+      size_t absent_; // Number of absent entries.
+      bool changed_;  // Presence of changed or missing entries.
+    };
+
+#endif // BUILD2_BOOTSTRAP
+  }
+}
+
+#endif // LIBBUILD2_CC_COMPILEDB_HXX
diff --git a/libbuild2/cc/init.cxx b/libbuild2/cc/init.cxx
index e124450..1ddeca8 100644
--- a/libbuild2/cc/init.cxx
+++ b/libbuild2/cc/init.cxx
@@ -10,8 +10,10 @@
 
 #include <libbuild2/config/utility.hxx>
 
+#include <libbuild2/cc/module.hxx>
 #include <libbuild2/cc/target.hxx>
 #include <libbuild2/cc/utility.hxx>
+#include <libbuild2/cc/compiledb.hxx>
 
 using namespace std;
 using namespace butl;
@@ -23,7 +25,7 @@ namespace build2
     // Scope operation callback that cleans up module sidebuilds.
     //
     static target_state
-    clean_module_sidebuilds (action, const scope& rs, const dir&)
+    clean_module_sidebuilds (const scope& rs)
     {
       context& ctx (rs.ctx);
 
@@ -67,6 +69,81 @@ namespace build2
       return target_state::unchanged;
     }
 
+    // Scope operation callback that cleans up compilation databases.
+    //
+    static target_state
+    clean_compiledb (const scope& rs)
+    {
+      context& ctx (rs.ctx);
+
+      target_state r (target_state::unchanged);
+
+      for (const unique_ptr<compiledb>& db: compiledbs)
+      {
+        const path& p (db->path);
+
+        if (p.empty () ||
+            ctx.scopes.find_out (p.directory ()).root_scope () != &rs)
+          continue;
+
+        if (rmfile (ctx, p))
+          r = target_state::changed;
+      }
+
+      return r;
+    }
+
+    // Scope operation callback for cleaning module sidebuilds and compilation
+    // databases.
+    //
+    static target_state
+    clean_callback (action, const scope& rs, const dir&)
+    {
+      target_state r (clean_module_sidebuilds (rs));
+
+      if (!compiledbs.empty ())
+        r |= clean_compiledb (rs);
+
+      return r;
+    }
+
+    // Custom save function that completes relative paths in the
+    // config.cc.compiledb and config.cc.compiledb.name values.
+    //
+    static pair<names_view, const char*>
+    save_compiledb_name (const scope&,
+                         const value& v,
+                         const value*,
+                         names& storage)
+    {
+      const names& ns (v.as<names> ()); // Value is untyped.
+
+      if (find_if (ns.begin (), ns.end (),
+                   [] (const name& n) {return n.pair;}) == ns.end ())
+      {
+        return make_pair (names_view (ns), "=");
+      }
+
+      storage = ns;
+      for (auto i (storage.begin ()); i != storage.end (); ++i)
+      {
+        if (i->pair)
+        {
+          name& n (*++i);
+
+          if (!n.directory ())
+            n.canonicalize ();
+
+          if (n.dir.relative ())
+            n.dir.complete ();
+
+          n.dir.normalize ();
+        }
+      }
+
+      return make_pair (names_view (storage), "=");
+    }
+
     bool
     core_vars_init (scope& rs,
                     scope&,
@@ -107,6 +184,22 @@ namespace build2
 
       vp.insert<abs_dir_path> ("config.cc.pkgconfig.sysroot");
 
+      // Compilation database.
+      //
+      // See the manual for the semantics.
+      //
+      // config.cc.compiledb                --  <name>[@<path>]    (untyped)
+      // config.cc.compiledb.name           --  <name>[@<path>]... (untyped)
+      // config.cc.compiledb.filter         --  [<name>@]<bool>...
+      // config.cc.compiledb.filter.input   --  [<name>@]<target-type>...
+      // config.cc.compiledb.filter.output  --  [<name>@]<target-type>...
+      //
+      vp.insert                        ("config.cc.compiledb");
+      vp.insert                        ("config.cc.compiledb.name");
+      vp.insert<compiledb_name_filter> ("config.cc.compiledb.filter");
+      vp.insert<compiledb_type_filter> ("config.cc.compiledb.filter.input");
+      vp.insert<compiledb_type_filter> ("config.cc.compiledb.filter.output");
+
       vp.insert<strings> ("cc.poptions");
       vp.insert<strings> ("cc.coptions");
       vp.insert<strings> ("cc.loptions");
@@ -192,16 +285,6 @@ namespace build2
       //
       vp.insert<bool> ("cc.serialize");
 
-      // Register scope operation callback.
-      //
-      // It feels natural to clean up sidebuilds as a post operation but that
-      // prevents the (otherwise-empty) out root directory to be cleaned up
-      // (via the standard fsdir{} chain).
-      //
-      rs.operation_callbacks.emplace (
-        perform_clean_id,
-        scope::operation_callback {&clean_module_sidebuilds, nullptr /*post*/});
-
       return true;
     }
 
@@ -292,6 +375,8 @@ namespace build2
 
       assert (first);
 
+      context& ctx (rs.ctx);
+
       // Load cc.core.guess.
       //
       load_module (rs, rs, "cc.core.guess", loc);
@@ -312,7 +397,6 @@ namespace build2
       //
       // @@ Same nonsense as in module.
       //
-      //
       rs.assign ("cc.poptions") += cast_null<strings> (
         lookup_config (rs, "config.cc.poptions", nullptr));
 
@@ -363,21 +447,16 @@ namespace build2
       if (!cast_false<bool> (rs["bin.config.loaded"]))
       {
         // Prepare configuration hints (pretend it belongs to root scope).
-        // They are only used on the first load of bin.config so we only
-        // populate them on our first load.
         //
         variable_map h (rs);
 
-        if (first)
-        {
-          // Note that all these variables have already been registered.
-          //
-          h.assign ("config.bin.target") =
-            cast<target_triplet> (rs["cc.target"]).representation ();
+        // Note that all these variables have already been registered.
+        //
+        h.assign ("config.bin.target") =
+          cast<target_triplet> (rs["cc.target"]).representation ();
 
-          if (auto l = extra.hints["config.bin.pattern"])
-            h.assign ("config.bin.pattern") = cast<string> (l);
-        }
+        if (auto l = extra.hints["config.bin.pattern"])
+          h.assign ("config.bin.pattern") = cast<string> (l);
 
         init_module (rs, rs, "bin.config", loc, false /* optional */, h);
       }
@@ -386,7 +465,6 @@ namespace build2
       // ourselves since the target can come from the configuration and not
       // our hint).
       //
-      if (first)
       {
         const auto& ct (cast<target_triplet> (rs["cc.target"]));
         const auto& bt (cast<target_triplet> (rs["bin.target"]));
@@ -416,6 +494,399 @@ namespace build2
       if (tsys == "mingw32")
         load_module (rs, rs, "bin.rc.config", loc);
 
+      // Find the innermost outer core_module, if any.
+      //
+      const core_module* om (nullptr);
+      for (const scope* s (&rs);
+           (s = s->parent_scope ()->root_scope ()) != nullptr; )
+      {
+        if ((om = s->find_module<core_module> (core_module::name)) != nullptr)
+          break;
+      }
+
+      auto& m (extra.set_module (new core_module (om)));
+
+      // config.cc.compiledb.*
+      //
+      {
+        // For config.cc.compiledb and config.cc.compiledb.name we only
+        // consider a value in this root scope (if it's inherited from the
+        // outer scope, then that's where it will be handled). One special
+        // case is when it's specified on a scope that doesn't load the cc
+        // module (including, ultimately, the global scope for a global
+        // override). We handle it by assuming the value belongs to the
+        // outermost amalgamation that loads the cc module.
+        //
+        // Note: cache the result.
+        //
+        auto find_outermost =
+          [&rs, o = optional<pair<scope*, core_module*>> ()] () mutable
+        {
+          if (!o)
+          {
+            o = pair<scope*, core_module*> (&rs, nullptr);
+            for (scope* s (&rs);
+                 (s = s->parent_scope ()->root_scope ()) != nullptr; )
+            {
+              if (auto* m = s->find_module<core_module> (core_module::name))
+              {
+                o->first = s;
+                o->second = m;
+              }
+            }
+          }
+
+          return *o;
+        };
+
+        auto belongs = [&rs, &find_outermost] (const lookup& l)
+        {
+          return l.belongs (rs) || find_outermost ().first == &rs;
+        };
+
+        // Add compilation databases specified in ns as <name>[@<path>] pairs,
+        // appending their names to cdb_names. If <path> is absent, then place
+        // the database into the base directory. Return the last added name.
+        //
+        auto add_cdbs = [&ctx,
+                         &loc,
+                         &trace] (strings& cdb_names,
+                                  const names& ns,
+                                  const dir_path& base) -> const string&
+        {
+          // Check that names and paths match. Return false if this entry
+          // already exist.
+          //
+          // Note that before we also checked that the same paths are not used
+          // across contexts. But, actually, there doesn't seem to be anything
+          // wrong with that and this can actually be useful, for example,
+          // when developing build system modules.
+          //
+          auto check = [&loc] (const string& n, const path& p)
+          {
+            for (const unique_ptr<compiledb>& db: compiledbs)
+            {
+              bool nm (db->name == n);
+              bool pm (db->path == p);
+
+              if (nm != pm)
+                fail (loc) << "inconsistent compilation database names/paths" <<
+                  info << p << " is called " << n <<
+                  info << db->path << " is called " << db->name;
+
+              if (nm)
+                return false;
+            }
+
+            return true;
+          };
+
+          const string* r (&empty_string);
+
+          bool reg (false);
+          size_t j (compiledbs.size ()); // First newly added database.
+          for (auto i (ns.begin ()); i != ns.end (); ++i)
+          {
+            // Each element has the <name>[@<path>] form.
+            //
+            // The special `-` <name> signifies stdout.
+            //
+            // If <path> is absent, then the file is called <name>.json and
+            // placed into the output directory of the amalgamation or project
+            // root scope (passed as the base argument).
+            //
+            // If <path> is (syntactically) a directory, then the file path is
+            // <path>/<name>.json.
+            //
+            if (!i->simple () || i->empty ())
+              fail (loc) << "invalid compilation database name '" << *i << "'";
+
+            string n (i->value);
+
+            path p;
+            if (i->pair)
+            {
+              ++i;
+
+              if (n == "-")
+                fail (loc) << "compilation database path specified for stdout "
+                           << "name";
+              try
+              {
+                if (i->directory ())
+                  p = i->dir / n + ".json";
+                else if (i->file ())
+                {
+                  if (i->dir.empty ())
+                    p = path (i->value);
+                  else
+                    p = i->dir / i->value;
+                }
+                else
+                  throw invalid_path ("");
+
+                if (p.relative ())
+                  p.complete ();
+
+                p.normalize ();
+              }
+              catch (const invalid_path&)
+              {
+                fail (loc) << "invalid compilation database path '" << *i
+                           << "'";
+              }
+            }
+            else if (n != "-")
+            {
+              p = base / n + ".json";
+            }
+
+            if (check (n, p))
+            {
+              reg = compiledbs.empty (); // First time.
+
+#ifdef BUILD2_BOOTSTRAP
+              fail (loc) << "compilation database requested during bootstrap";
+#else
+              if (n == "-")
+                compiledbs.push_back (
+                  unique_ptr<compiledb> (
+                    new compiledb_stdout (n)));
+              else
+                compiledbs.push_back (
+                  unique_ptr<compiledb> (
+                    new compiledb_file (n, move (p))));
+#endif
+            }
+
+            // We may end up with duplicates via the config.cc.compiledb
+            // logic.
+            //
+            auto k (find (cdb_names.begin (), cdb_names.end (), n));
+
+            if (k == cdb_names.end ())
+            {
+              cdb_names.push_back (move (n));
+              r = &cdb_names.back ();
+            }
+            else
+              r = &*k;
+          }
+
+          // Register context operation callback for compiledb generation.
+          //
+          // We have two complications here:
+          //
+          // 1. We could be performing all this from the load phase that
+          //    interrupted the match phase, which means the point where the
+          //    pre callback would have been called is already gone (but the
+          //    post callback will still be called). This will happen if we,
+          //    say, import a project that has a compilation database from a
+          //    project that doesn't.
+          //
+          //    (Note that if you think that this can be solved by simply
+          //    always registering the callbacks, regardless of whether we
+          //    have any databases or not, consider a slightly different
+          //    scenario where we import a project that loads the cc module
+          //    from a project that does not).
+          //
+          //    What we are going to do in this case is simply call the pre
+          //    callback manually.
+          //
+          // 2. We could again be performing all this from the load phase that
+          //    interrupted the match phase, but this time the pre callback
+          //    has already been called, which means there will be no pre()
+          //    call for the newly added database(s). This will happen if we,
+          //    say, import a project that has a compilation database from a
+          //    project that also has one.
+          //
+          //    Again, what we are going to do in this case is simply call the
+          //    pre callback for the new database(s) manually.
+          //
+          if (reg)
+            ctx.operation_callbacks.emplace (
+              perform_update_id,
+              context::operation_callback {&compiledb_pre, &compiledb_post});
+
+          if (ctx.load_generation > 1)
+          {
+            action a (ctx.current_action ());
+
+            if (a.inner_action () == perform_update_id)
+            {
+              if (reg) // Case #1.
+              {
+                l6 ([&]{trace << "direct compiledb_pre for context " << &ctx;});
+                compiledb_pre (ctx, a, action_targets {});
+              }
+              else     // Case #2.
+              {
+                size_t n (compiledbs.size ());
+
+                if (j != n)
+                {
+                  l6 ([&]{trace << "additional compiledb for context " << &ctx;});
+
+                  for (; j != n; ++j)
+                    compiledbs[j]->pre (ctx);
+                }
+              }
+            }
+          }
+
+          return *r;
+        };
+
+        lookup l;
+
+        // config.cc.compiledb
+        //
+        // The semantics of this value is as follows:
+        //
+        // Location:    outermost amalgamation that loads the cc module.
+        // Name filter: enable from this scope unless specified explicitly.
+        // Type filter: enable from this scope unless specified explicitly.
+        //
+        // Note: save omitted.
+        //
+        optional<string> enable_filter;
+
+        l = lookup_config (rs, "config.cc.compiledb", 0, &save_compiledb_name);
+        if (l && belongs (l))
+        {
+          l6 ([&]{trace << "config.cc.compiledb specified on " << rs;});
+
+          const names& ns (cast<names> (l));
+
+          // Make sure it's one name/path.
+          //
+          if (ns.empty () || ns.size () != (ns.front ().pair ? 2 : 1))
+            fail (loc) << "invalid compilation database name '" << ns << "'";
+
+          // We inject the database directly into the outer amalgamation's
+          // module, as-if config.cc.compiledb.name was specified in its
+          // scope. Unless there isn't one, in which case it's us.
+          //
+          pair<scope*, core_module*> p (find_outermost ());
+
+          // Save the name for the name filter below.
+          //
+          enable_filter = add_cdbs (
+            (p.second != nullptr ? *p.second : m).cdb_names_,
+            ns,
+            p.first->out_path ());
+        }
+
+        // config.cc.compiledb.name
+        //
+        // Note: save omitted.
+        //
+        l = lookup_config (rs,
+                           "config.cc.compiledb.name",
+                           0,
+                           &save_compiledb_name);
+        if (l && belongs (l))
+        {
+          l6 ([&]{trace << "config.cc.compiledb.name specified on " << rs;});
+
+          add_cdbs (m.cdb_names_, cast<names> (l), rs.out_path ());
+        }
+
+        // config.cc.compiledb.filter
+        //
+        // Note: save omitted.
+        //
+        l = lookup_config (rs, "config.cc.compiledb.filter");
+        if (l && belongs (l)) // Custom.
+        {
+          m.cdb_filter_ = &cast<compiledb_name_filter> (l);
+        }
+        else if (enable_filter) // Override.
+        {
+          // Inherit outer filter.
+          //
+          if (om != nullptr && om->cdb_filter_ != nullptr)
+            m.cdb_filter_storage_ = *om->cdb_filter_;
+
+          m.cdb_filter_storage_.emplace_back (*enable_filter, true);
+          m.cdb_filter_ = &m.cdb_filter_storage_;
+        }
+        else if (om != nullptr) // Inherit.
+        {
+          m.cdb_filter_ = om->cdb_filter_;
+        }
+
+        // config.cc.compiledb.filter.input
+        // config.cc.compiledb.filter.output
+        //
+        // Note that filtering happens before we take into account the change
+        // status, which means for larger projects there would be a lot of
+        // targets to filter even during the incremental update. So it feels
+        // it would have been better to pre-lookup the target types. However,
+        // the targets that would normally be used are registered by other
+        // modules (bin, c/cxx) and which haven't been loaded yet. So instead
+        // we try to optimize the lookup for the commonly used targets.
+        //
+        // Note: save omitted.
+        //
+        l = lookup_config (rs, "config.cc.compiledb.filter.input");
+        if (l && belongs (l)) // Custom.
+        {
+          m.cdb_filter_input_ = &cast<compiledb_type_filter> (l);
+        }
+        else if (enable_filter) // Override.
+        {
+          // Inherit outer filter.
+          //
+          if (om != nullptr && om->cdb_filter_input_ != nullptr)
+          {
+            m.cdb_filter_input_storage_ = *om->cdb_filter_input_;
+            m.cdb_filter_input_storage_.emplace_back (*enable_filter, "target");
+            m.cdb_filter_input_ = &m.cdb_filter_input_storage_;
+          }
+          else
+            m.cdb_filter_input_ = nullptr; // Enable all.
+        }
+        else if (om != nullptr) // Inherit.
+        {
+          m.cdb_filter_input_ = om->cdb_filter_input_;
+        }
+
+        l = lookup_config (rs, "config.cc.compiledb.filter.output");
+        if (l && belongs (l)) // Custom.
+        {
+          m.cdb_filter_output_ = &cast<compiledb_type_filter> (l);
+        }
+        else if (enable_filter) // Override.
+        {
+          // Inherit outer filter.
+          //
+          if (om != nullptr && om->cdb_filter_output_ != nullptr)
+          {
+            m.cdb_filter_output_storage_ = *om->cdb_filter_output_;
+            m.cdb_filter_output_storage_.emplace_back (*enable_filter, "target");
+            m.cdb_filter_output_ = &m.cdb_filter_output_storage_;
+          }
+          else
+            m.cdb_filter_output_ = nullptr; // Enable all.
+        }
+        else if (om != nullptr) // Inherit.
+        {
+          m.cdb_filter_output_ = om->cdb_filter_output_;
+        }
+      }
+
+      // Register scope operation callback for cleaning module sidebuilds and
+      // compilation databases.
+      //
+      // It feels natural to clean this stuff up as a post operation but that
+      // prevents the (otherwise-empty) out root directory to be cleaned up
+      // (via the standard fsdir{} chain).
+      //
+      rs.operation_callbacks.emplace (
+        perform_clean_id,
+        scope::operation_callback {&clean_callback, nullptr /*post*/});
+
       return true;
     }
 
diff --git a/libbuild2/cc/module.cxx b/libbuild2/cc/module.cxx
index cf6c6e4..a3c64d9 100644
--- a/libbuild2/cc/module.cxx
+++ b/libbuild2/cc/module.cxx
@@ -22,6 +22,12 @@ namespace build2
 {
   namespace cc
   {
+    // cc.core_module
+    //
+    const string core_module::name ("cc.core.config");
+
+    // x.config_module
+    //
     void config_module::
     guess (scope& rs, const location& loc, const variable_map&)
     {
@@ -891,6 +897,9 @@ namespace build2
       config::save_environment (rs, xi.platform_environment);
     }
 
+    // x module
+    //
+
     // Global cache of ad hoc importable headers.
     //
     // The key is a hash of the system header search directories
diff --git a/libbuild2/cc/module.hxx b/libbuild2/cc/module.hxx
index 2ef07d6..d4e9a67 100644
--- a/libbuild2/cc/module.hxx
+++ b/libbuild2/cc/module.hxx
@@ -14,6 +14,8 @@
 
 #include <libbuild2/cc/common.hxx>
 
+#include <libbuild2/cc/compiledb.hxx>
+
 #include <libbuild2/cc/compile-rule.hxx>
 #include <libbuild2/cc/link-rule.hxx>
 #include <libbuild2/cc/install-rule.hxx>
@@ -27,6 +29,35 @@ namespace build2
   {
     struct compiler_info;
 
+    // cc.core module
+    //
+    class core_module: public build2::module
+    {
+    public:
+      static const string name;
+
+      explicit
+      core_module (const core_module* om)
+          : outer_module_ (om)
+      {
+      }
+
+    public:
+      const core_module* outer_module_;
+
+      strings cdb_names_;
+
+      const compiledb_name_filter* cdb_filter_ = nullptr;
+      const compiledb_type_filter* cdb_filter_input_ = nullptr;
+      const compiledb_type_filter* cdb_filter_output_ = nullptr;
+
+      compiledb_name_filter cdb_filter_storage_;
+      compiledb_type_filter cdb_filter_input_storage_;
+      compiledb_type_filter cdb_filter_output_storage_;
+    };
+
+    // x.config module
+    //
     class LIBBUILD2_CC_SYMEXPORT config_module: public build2::module,
                                                 public config_data
     {
@@ -153,6 +184,8 @@ namespace build2
       msvc_library_search_dirs (const compiler_info&, scope&) const;
     };
 
+    // x module
+    //
     class LIBBUILD2_CC_SYMEXPORT module: public build2::module,
                                          public virtual common,
                                          public link_rule,
@@ -162,7 +195,6 @@ namespace build2
                                          public predefs_rule
     {
     public:
-      explicit
       module (data&& d, const scope& rs)
           : common (move (d)),
             link_rule (move (d)),
diff --git a/libbuild2/cc/pkgconfig.cxx b/libbuild2/cc/pkgconfig.cxx
index ecef61c..79a38ea 100644
--- a/libbuild2/cc/pkgconfig.cxx
+++ b/libbuild2/cc/pkgconfig.cxx
@@ -1,6 +1,8 @@
 // file      : libbuild2/cc/pkgconfig.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
+#include <libbuild2/cc/pkgconfig.hxx>
+
 #include <libbuild2/scope.hxx>
 #include <libbuild2/target.hxx>
 #include <libbuild2/context.hxx>
@@ -18,7 +20,6 @@
 #include <libbuild2/cc/utility.hxx>
 
 #include <libbuild2/cc/common.hxx>
-#include <libbuild2/cc/pkgconfig.hxx>
 #include <libbuild2/cc/compile-rule.hxx>
 #include <libbuild2/cc/link-rule.hxx>
 
diff --git a/libbuild2/context.hxx b/libbuild2/context.hxx
index db126bc..81ac970 100644
--- a/libbuild2/context.hxx
+++ b/libbuild2/context.hxx
@@ -178,34 +178,35 @@ namespace build2
     // match    - search prerequisites and match rules
     // execute  - execute the matched rule
     //
-    // The build system starts with a "serial load" phase and then continues
-    // with parallel match and execute. Match, however, can be interrupted
-    // both with load and execute.
+    // The build system starts with a serial "initial load" phase and then
+    // continues with parallel match and execute. Match, however, can be
+    // interrupted both with load and execute.
     //
-    // Match can be interrupted with "exclusive load" in order to load
-    // additional buildfiles. Similarly, it can be interrupted with (parallel)
-    // execute in order to build targetd required to complete the match (for
-    // example, generated source code or source code generators themselves).
+    // Match can be interrupted with a (serial) "interrupting load" in order
+    // to load additional buildfiles. Similarly, it can be interrupted with
+    // (parallel) execute in order to build targetd required to complete the
+    // match (for example, generated source code or source code generators
+    // themselves).
     //
     // Such interruptions are performed by phase change that is protected by
     // phase_mutex (which is also used to synchronize the state changes
     // between phases).
     //
-    // Serial load can perform arbitrary changes to the build state. Exclusive
-    // load, however, can only perform "island appends". That is, it can
-    // create new "nodes" (variables, scopes, etc) but not (semantically)
-    // change already existing nodes or invalidate any references to such (the
-    // idea here is that one should be able to load additional buildfiles as
-    // long as they don't interfere with the existing build state). The
-    // "islands" are identified by the load_generation number (1 for the
-    // initial/serial load). It is incremented in case of a phase switch and
-    // can be stored in various "nodes" to verify modifications are only done
-    // "within the islands". Another example of invalidation would be
-    // insertion of a new scope "under" an existing target thus changing its
-    // scope hierarchy (and potentially even its base scope). This would be
-    // bad because we may have made decisions based on the original hierarchy,
-    // for example, we may have queried a variable which in the new hierarchy
-    // would "see" a new value from the newly inserted scope.
+    // Initial load can perform arbitrary changes to the build state.
+    // Interrupting load, however, can only perform what we call "island
+    // appends". That is, it can create new "nodes" (variables, scopes, etc)
+    // but not (semantically) change already existing nodes or invalidate any
+    // references to such (the idea here is that one should be able to load
+    // additional buildfiles as long as they don't interfere with the existing
+    // build state). The "islands" are identified by the load_generation
+    // number (1 for the initial load). It is incremented in case of a phase
+    // switch and can be stored in various "nodes" to verify modifications are
+    // only done "within the islands". Another example of invalidation would
+    // be insertion of a new scope "under" an existing target thus changing
+    // its scope hierarchy (and potentially even its base scope). This would
+    // be bad because we may have made decisions based on the original
+    // hierarchy, for example, we may have queried a variable which in the new
+    // hierarchy would "see" a new value from the newly inserted scope.
     //
     // The special load_generation value 0 indicates initialization before
     // anything has been loaded. Currently, it is changed to 1 at the end
@@ -370,6 +371,10 @@ namespace build2
     // system module or an ad hoc C++ recipe. See create_module_context() for
     // details.
     //
+    // Note also that if the callbacks are registered from a module load
+    // function, then there are nuances with interrupted load phases. See the
+    // compilation database handling in the cc module for details.
+    //
     // See also scope::operation_callback.
     //
     struct operation_callback
diff --git a/libbuild2/dump.cxx b/libbuild2/dump.cxx
index 9b7f5b1..9fcfca8 100644
--- a/libbuild2/dump.cxx
+++ b/libbuild2/dump.cxx
@@ -242,7 +242,8 @@ namespace build2
           h_pair = true;
         }
         else if (t.is_a<map<optional<string>, string>>          () ||
-                 t.is_a<vector<pair<optional<string>, string>>> ())
+                 t.is_a<vector<pair<optional<string>, string>>> () ||
+                 t.is_a<vector<pair<optional<string>, bool>>>   ())
         {
           h_array = true;
           h_pair = false;
diff --git a/libbuild2/module.cxx b/libbuild2/module.cxx
index 520b993..36a7ce5 100644
--- a/libbuild2/module.cxx
+++ b/libbuild2/module.cxx
@@ -480,7 +480,7 @@ namespace build2
       //
       if (nested)
       {
-        // This could be initial or exclusive load.
+        // This could be initial or interrupting load.
         //
         // @@ TODO: see the ad hoc recipe case as a reference.
         //
diff --git a/libbuild2/name.hxx b/libbuild2/name.hxx
index f5cb2c5..c6aac45 100644
--- a/libbuild2/name.hxx
+++ b/libbuild2/name.hxx
@@ -136,7 +136,7 @@ namespace build2
     // value to dir. Throw invalid_argument if value would become empty. May
     // also throw invalid_path.
     //
-    void
+    LIBBUILD2_SYMEXPORT void
     canonicalize ();
   };
 
diff --git a/libbuild2/variable.cxx b/libbuild2/variable.cxx
index 0ec23d3..0abc360 100644
--- a/libbuild2/variable.cxx
+++ b/libbuild2/variable.cxx
@@ -3320,10 +3320,13 @@ namespace build2
   value_traits<vector<pair<string, optional<string>>>>;
 
   template struct LIBBUILD2_DEFEXPORT
+  value_traits<vector<pair<string, optional<bool>>>>;
+
+  template struct LIBBUILD2_DEFEXPORT
   value_traits<vector<pair<optional<string>, string>>>;
 
   template struct LIBBUILD2_DEFEXPORT
-  value_traits<vector<pair<string, optional<bool>>>>;
+  value_traits<vector<pair<optional<string>, bool>>>;
 
   template struct LIBBUILD2_DEFEXPORT value_traits<set<string>>;
   template struct LIBBUILD2_DEFEXPORT value_traits<set<json_value>>;
diff --git a/libbuild2/variable.hxx b/libbuild2/variable.hxx
index a14c52b..e55a121 100644
--- a/libbuild2/variable.hxx
+++ b/libbuild2/variable.hxx
@@ -1380,6 +1380,9 @@ namespace build2
   extern template struct LIBBUILD2_DECEXPORT
   value_traits<vector<pair<string, optional<bool>>>>;
 
+  extern template struct LIBBUILD2_DECEXPORT
+  value_traits<vector<pair<optional<string>, bool>>>;
+
   extern template struct LIBBUILD2_DECEXPORT value_traits<set<string>>;
   extern template struct LIBBUILD2_DECEXPORT value_traits<set<json_value>>;
author	Boris Kolpackov <boris@codesynthesis.com>	2024-08-28 09:36:16 +0200
committer	Boris Kolpackov <boris@codesynthesis.com>	2024-10-09 10:06:21 +0200
commit	eeb155ebc35c5947234f731c333e2bd71ea88974 (patch)
tree	d2784e072b1770b3d30587f97eb4b72b7ef3e765 /libbuild2
parent	8384a087afc7e29e900a3ce96d55ab2f5c2a74c2 (diff)