From b37f1aa6398065be806e6605a023189685669885 Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Wed, 15 Feb 2017 03:55:15 +0200
Subject: Implement parallel match

---
 build2/algorithm                         | 150 ++++--
 build2/algorithm.cxx                     | 859 +++++++++++++++++++++----------
 build2/algorithm.ixx                     | 277 +++++++---
 build2/b-options                         |   8 +
 build2/b-options.cxx                     |  26 +-
 build2/b-options.ixx                     |  12 +
 build2/b.cli                             |  18 +-
 build2/b.cxx                             |  21 +-
 build2/bin/rule                          |   8 +-
 build2/bin/rule.cxx                      |  55 +-
 build2/bin/target                        |  32 +-
 build2/bin/target.cxx                    | 158 +++---
 build2/buildfile                         |   2 +-
 build2/cc/common                         |  53 +-
 build2/cc/common.cxx                     | 198 +++----
 build2/cc/compile                        |  17 +-
 build2/cc/compile.cxx                    | 339 +++++++-----
 build2/cc/install                        |   8 +-
 build2/cc/install.cxx                    |  27 +-
 build2/cc/link                           |  19 +-
 build2/cc/link.cxx                       | 534 +++++++++++--------
 build2/cc/msvc.cxx                       |  75 ++-
 build2/cc/pkgconfig.cxx                  |   8 +-
 build2/cc/utility                        |   8 +-
 build2/cc/utility.cxx                    |  46 +-
 build2/cc/windows-manifest.cxx           |   2 +-
 build2/cc/windows-rpath.cxx              |  17 +-
 build2/cli/rule                          |   4 +-
 build2/cli/rule.cxx                      |  93 ++--
 build2/cli/target                        |  17 +-
 build2/cli/target.cxx                    |  18 +-
 build2/config/operation.cxx              |  17 +-
 build2/context                           | 309 ++++++-----
 build2/context.cxx                       | 153 +++++-
 build2/context.ixx                       | 115 +++++
 build2/diagnostics                       |  78 ++-
 build2/diagnostics.cxx                   |  11 +-
 build2/dist/operation.cxx                |  87 ++--
 build2/dist/rule                         |   4 +-
 build2/dist/rule.cxx                     |  10 +-
 build2/dump                              |   4 +-
 build2/dump.cxx                          |  43 +-
 build2/file                              |   2 +-
 build2/file.cxx                          |   6 +-
 build2/file.ixx                          |   8 +-
 build2/install/rule                      |  12 +-
 build2/install/rule.cxx                  |  32 +-
 build2/operation                         |  26 +-
 build2/operation.cxx                     | 127 +++--
 build2/parser.cxx                        |  12 +-
 build2/prerequisite                      |  42 +-
 build2/prerequisite.cxx                  |   4 +-
 build2/rule                              |  46 +-
 build2/rule-map                          |   8 +-
 build2/rule.cxx                          |  86 +++-
 build2/scheduler                         |  60 ++-
 build2/scheduler.cxx                     | 202 +++++---
 build2/scheduler.txx                     |   6 +-
 build2/scope                             |  16 +-
 build2/search                            |   6 +-
 build2/search.cxx                        |  24 +-
 build2/target                            | 601 ++++++++++++---------
 build2/target-type                       |   2 +-
 build2/target.cxx                        | 232 +++++----
 build2/target.ixx                        | 218 +++++++-
 build2/target.txx                        |   2 +-
 build2/test/rule                         |   6 +-
 build2/test/rule.cxx                     |  50 +-
 build2/test/script/parser.cxx            |  37 +-
 build2/test/script/script.cxx            |  10 +-
 build2/types                             |  39 +-
 build2/utility.cxx                       |  10 +
 build2/variable                          |   8 +-
 build2/variable.ixx                      |   2 -
 tests/common.test                        |   2 +-
 tests/search/dir/testscript              |   1 -
 tests/test/common.test                   |   2 +-
 tests/test/config-test/testscript        |   2 +-
 tests/test/script/common.test            |   2 +-
 tests/test/script/runner/redirect.test   |   2 +-
 unit-tests/test/script/parser/driver.cxx |   6 +-
 81 files changed, 3761 insertions(+), 2138 deletions(-)
 create mode 100644 build2/context.ixx

diff --git a/build2/algorithm b/build2/algorithm
index d360fad..2fa9fe4 100644
--- a/build2/algorithm
+++ b/build2/algorithm
@@ -21,48 +21,53 @@ namespace build2
   // target-type-specific search function. If that doesn't yeld anything,
   // it creates a new target.
   //
-  target&
-  search (prerequisite&);
+  const target&
+  search (const prerequisite&);
 
   // As above but specify the prerequisite to search as a key.
   //
-  target&
+  const target&
   search (const prerequisite_key&);
 
+  // Uniform search interface for prerequisite/prerequisite_member.
+  //
+  inline const target&
+  search (const prerequisite_member& p) {return p.search ();}
+
   // As above but override the target type. Useful for searching for
   // target group members where we need to search for a different
   // target type.
   //
-  target&
+  const target&
   search (const target_type&, const prerequisite_key&);
 
   // As above but specify the prerequisite to search as individual key
   // components. Scope can be NULL if the directory is absolute.
   //
-  target&
+  const target&
   search (const target_type& type,
           const dir_path& dir,
           const dir_path& out,
           const string& name,
-          const string* ext,  // NULL means unspecified.
-          const scope*,
+          const string* ext = nullptr,  // NULL means unspecified.
+          const scope* = nullptr,       // NULL means dir is absolute.
           const optional<string>& proj = nullopt);
 
   // As above but specify the target type as template argument.
   //
   template <typename T>
-  T&
+  const T&
   search (const dir_path& dir,
           const dir_path& out,
           const string& name,
-          const string* ext,
-          const scope*);
+          const string* ext = nullptr,
+          const scope* = nullptr);
 
   // Search for a target identified by the name. The semantics is "as if" we
   // first created a prerequisite based on this name in exactly the same way
   // as the parser would and then searched based on this prerequisite.
   //
-  target&
+  const target&
   search (name, const scope&);
 
   // As above but only search for an already existing target. Unlike the
@@ -76,24 +81,84 @@ namespace build2
                    const scope&,
                    const dir_path& out = dir_path ());
 
+  // Target match lock: a non-const target reference as well as the
+  // target::offset_* state that has already been "achieved".
+  //
+  struct target_lock
+  {
+    using target_type = build2::target;
+
+    target_type* target = nullptr;
+    size_t offset = 0;
+
+    explicit operator bool () const {return target != nullptr;}
+
+    void unlock ();
+    target_type* release ();
+
+    target_lock () = default;
+
+    target_lock (target_lock&&);
+    target_lock& operator= (target_lock&&);
+
+    // Implementation details.
+    //
+    target_lock (const target_lock&) = delete;
+    target_lock& operator= (const target_lock&) = delete;
+
+    target_lock (target_type* t, size_t o): target (t), offset (o) {}
+    ~target_lock ();
+  };
+
+  // If the target is already applied (for this action ) or executed, then no
+  // lock is acquired. Otherwise, the target must not yet be matched for this
+  // action.
+  //
+  // @@ MT fuzzy: what if it is already in the desired state, why assert?
+  //    Currently we only use it with match_recipe().
+  //
+  target_lock
+  lock (action, const target&);
+
   // Match and apply a rule to the action/target with ambiguity detection.
   // Increment the target's dependents count, which means that you should call
-  // this function with the intent to also call execute().
+  // this function with the intent to also call execute(). Return the target
+  // state translating target_state::failed to the failed exception unless
+  // instructed otherwise.
   //
-  // In case of optimizations that would avoid calling execute(), call
-  // unmatch() to indicate this. This is only allowed in the following
-  // cases:
+  // The unmatch argument allows optimizations that avoid calling execute().
+  // If it is unmatch::unchanged then only unmatch the target if it is known
+  // to be unchanged after match. If it is unmatch::safe, then unmatch the
+  // target if it is safe (this includes unchanged or if we know that someone
+  // else will execute this target). Return true if unmatch succeeded. Always
+  // throw if failed.
   //
-  // - target::unchanged() returns true
-  // - match() returns true
+  enum class unmatch {none, unchanged, safe};
+
+  target_state
+  match (action, const target&, bool fail = true);
+
+  bool
+  match (action, const target&, unmatch);
+
+  // Start asynchronous match. Return target_state::postponed if the
+  // asynchrounous operation has been started and target_state::busy if the
+  // target has already been busy. Regardless of the result, match() must be
+  // called in order to complete the operation (except target_state::failed).
   //
-  // Note that match() does not check unchanged().
+  // If fail is false, then return target_state::failed if the target match
+  // failed. Otherwise, throw the failed exception if keep_going is false and
+  // return target_state::failed otherwise.
   //
-  bool
-  match (slock&, action, target&);
+  target_state
+  match_async (action, const target&,
+               size_t start_count, atomic_count& task_count,
+               bool fail = true);
 
+  // Match by specifying the recipe directly. The target must be locked.
+  //
   void
-  unmatch (action, target&);
+  match_recipe (target_lock&, recipe);
 
   // Match a "delegate rule" from withing another rules' apply() function
   // avoiding recursive matches (thus the third argument). Return recipe and
@@ -102,7 +167,7 @@ namespace build2
   // execute_delegate().
   //
   pair<recipe, action>
-  match_delegate (slock&, action, target&, const rule&);
+  match_delegate (action, target&, const rule&);
 
   // The standard prerequisite search and match implementations. They call
   // search() and then match() for each prerequisite in a loop omitting out of
@@ -110,22 +175,40 @@ namespace build2
   // of a group, then they first do this to the group's prerequisites.
   //
   void
-  search_and_match_prerequisites (slock&, action, target&);
+  match_prerequisites (action, target&);
 
   // If we are cleaning, this function doesn't go into group members,
   // as an optimization (the group should clean everything up).
   //
   void
-  search_and_match_prerequisite_members (slock&, action, target&);
+  match_prerequisite_members (action, target&);
 
   // As above but omit prerequisites that are not in the specified scope.
   //
   void
-  search_and_match_prerequisites (slock&, action, target&, const scope&);
+  match_prerequisites (action, target&, const scope&);
+
+  void
+  match_prerequisite_members (action, target&, const scope&);
 
+  // Match (already searched) members of a group or similar prerequisite-like
+  // dependencies. Similar in semantics to match_prerequisites().
+  //
   void
-  search_and_match_prerequisite_members (
-    slock&, action, target&, const scope&);
+  match_members (action, target&, const target*[], size_t);
+
+  template <size_t N>
+  inline void
+  match_members (action a, target& t, const target* (&ts)[N])
+  {
+    match_members (a, t, ts, N);
+  }
+
+  inline void
+  match_members (action a, target& t, vector<const target*>& ts, size_t start)
+  {
+    match_members (a, t, ts.data () + start, ts.size () - start);
+  }
 
   // Unless already available, match, and, if necessary, execute the group
   // in order to obtain its members list. Note that even after that the
@@ -133,7 +216,7 @@ namespace build2
   // fallback rule matched).
   //
   group_view
-  resolve_group_members (slock&, action, target&);
+  resolve_group_members (action, const target&);
 
   // Inject dependency on the target's directory fsdir{}, unless it is in the
   // src tree or is outside of any project (say, for example, an installation
@@ -142,8 +225,8 @@ namespace build2
   // the injected target or NULL. Normally this function is called from the
   // rule's apply() function.
   //
-  fsdir*
-  inject_fsdir (slock&, action, target&, bool parent = true);
+  const fsdir*
+  inject_fsdir (action, target&, bool parent = true);
 
   // Execute the action on target, assuming a rule has been matched and the
   // recipe for this action has been set. This is the synchrounous executor
@@ -159,11 +242,14 @@ namespace build2
   // if the asynchrounous execution has been started and target_state::busy if
   // the target has already been busy.
   //
-  // Note: does not translate target_state::failed to the failed exception.
+  // If fail is false, then return target_state::failed if the target match
+  // failed. Otherwise, throw the failed exception if keep_going is false and
+  // return target_state::failed otherwise.
   //
   target_state
   execute_async (action, const target&,
-                 size_t start_count, atomic_count& task_count);
+                 size_t start_count, atomic_count& task_count,
+                 bool fail = true);
 
   // Execute the recipe obtained with match_delegate(). Note that the target's
   // state is neither checked nor updated by this function. In other words,
diff --git a/build2/algorithm.cxx b/build2/algorithm.cxx
index 7239f3e..728fb14 100644
--- a/build2/algorithm.cxx
+++ b/build2/algorithm.cxx
@@ -10,7 +10,6 @@
 #include <build2/file> // import()
 #include <build2/search>
 #include <build2/context>
-#include <build2/scheduler>
 #include <build2/filesystem>
 #include <build2/diagnostics>
 #include <build2/prerequisite>
@@ -20,10 +19,10 @@ using namespace butl;
 
 namespace build2
 {
-  target&
+  const target&
   search (const prerequisite_key& pk)
   {
-    assert (phase == run_phase::search_match);
+    assert (phase == run_phase::match);
 
     // If this is a project-qualified prerequisite, then this is import's
     // business.
@@ -31,16 +30,16 @@ namespace build2
     if (pk.proj)
       return import (pk);
 
-    if (target* t = pk.tk.type->search (pk))
+    if (const target* t = pk.tk.type->search (pk))
       return *t;
 
     return create_new_target (pk);
   }
 
-  target&
+  const target&
   search (name n, const scope& s)
   {
-    assert (phase == run_phase::search_match);
+    assert (phase == run_phase::match);
 
     optional<string> ext;
     const target_type* tt (s.find_target_type (n, ext));
@@ -66,7 +65,7 @@ namespace build2
   const target*
   search_existing (const name& cn, const scope& s, const dir_path& out)
   {
-    assert (phase == run_phase::search_match || phase == run_phase::execute);
+    assert (phase == run_phase::match || phase == run_phase::execute);
 
     // We don't handle this for now.
     //
@@ -93,11 +92,162 @@ namespace build2
       prerequisite_key {n.proj, {tt, &n.dir, &out, &n.value, ext}, &s});
   }
 
-  pair<const rule*, match_result>
-  match_impl (slock& ml, action a, target& t, bool apply, const rule* skip)
+  // If the work_queue is not present, then we don't wait.
+  //
+  target_lock
+  lock_impl (action a, const target& ct, optional<scheduler::work_queue> wq)
+  {
+    assert (phase == run_phase::match);
+
+    // Most likely the target's state is (count_touched - 1), that is, 0 or
+    // previously executed, so let's start with that.
+    //
+    size_t b (target::count_base ());
+    size_t e (b + target::offset_touched - 1);
+
+    size_t exec (b + target::offset_executed);
+    size_t lock (b + target::offset_locked);
+    size_t busy (b + target::offset_busy);
+
+    for (;;)
+    {
+      // First try to grab the spin lock which we later may upgrade to busy.
+      //
+      if (ct.task_count.compare_exchange_strong (
+            e,
+            lock,
+            memory_order_acq_rel,  // Synchronize on success.
+            memory_order_acquire)) // Synchronize on failure.
+      {
+        break;
+      }
+
+      while (e == lock || e >= busy)
+      {
+        // Wait for the count to drop below busy if someone is already working
+        // on this target.
+        //
+        // We also unlock the phase for the duration of the wait. Why? Consider
+        // this scenario: we are trying to match a dir{} target whose buildfile
+        // still needs to be loaded. Let's say someone else started the match
+        // before us. So we wait for their completion and they wait to switch
+        // the phase to load. Which would result in a deadlock unless we release
+        // the phase.
+        //
+        if (e >= busy)
+        {
+          if (!wq)
+            return target_lock {nullptr, e - b};
+
+          phase_unlock ul;
+          e = sched.wait (busy - 1, ct.task_count, *wq);
+        }
+
+        // Spin if it is locked.
+        //
+        for (; e == lock; e = ct.task_count.load (memory_order_acquire))
+          this_thread::yield ();
+      }
+
+      // We don't lock already executed targets.
+      //
+      if (e == exec)
+      {
+        // Sanity check: we better not be overriding a recipe for an already
+        // executed target.
+        //
+        assert (ct.action == a);
+
+        return target_lock {nullptr, target::offset_executed};
+      }
+    }
+
+    // We now have the sping lock. Analyze the old value and decide what to
+    // do.
+    //
+    target& t (const_cast<target&> (ct));
+
+    size_t offset;
+    if (e <= b)
+    {
+      // First lock for this operation.
+      //
+      t.action = a;
+      t.dependents.store (0, memory_order_release);
+      offset = target::offset_touched;
+    }
+    else
+    {
+      offset = e - b;
+
+      switch (offset)
+      {
+      case target::offset_touched:
+      case target::offset_matched:
+      case target::offset_applied:
+        {
+          if (a > t.action)
+          {
+            // Only noop_recipe can be overridden.
+            //
+            if (offset >= target::offset_applied)
+            {
+              recipe_function** f (t.recipe_.target<recipe_function*> ());
+              assert (f != nullptr && *f == &noop_action);
+            }
+
+            t.action = a;
+            offset = target::offset_touched; // Back to just touched.
+          }
+          else
+          {
+            assert (t.action > a || a == t.action);
+
+            // Release the lock if already applied for this action. This is
+            // necessary no to confuse execute since otherwise it might see
+            // that the target is busy and assume that someone is already
+            // executing it. Note that we cannot handle this case in the loop
+            // above since we need to lock target::action.
+            //
+            if (offset == target::offset_applied || t.action > a)
+            {
+              // Release the spin lock.
+              //
+              t.task_count.store (e, memory_order_release);
+              return target_lock {nullptr, offset};
+            }
+          }
+
+          break;
+        }
+      default:
+        assert (false);
+      }
+    }
+
+    // We are keeping it so upgrade to busy.
+    //
+    t.task_count.store (busy, memory_order_release);
+    return target_lock (&t, offset);
+  }
+
+  void
+  unlock_impl (target& t, size_t offset)
   {
-    pair<const rule*, match_result> r (nullptr, false);
+    assert (phase == run_phase::match);
+
+    // Set the task count and wake up any threads that might be waiting for
+    // this target.
+    //
+    t.task_count.store (offset + target::count_base (), memory_order_release);
+    sched.resume (t.task_count);
+  }
 
+  // Return the matching rule and the recipe action.
+  //
+  pair<const pair<const string, reference_wrapper<const rule>>&, action>
+  match_impl (action a, target& t, const rule* skip)
+  {
     // Clear the resolved targets list before calling match(). The rule is
     // free to modify this list in match() (provided that it matches) in order
     // to, for example, prepare it for apply().
@@ -178,26 +328,29 @@ namespace build2
 
             for (auto i (rs.first); i != rs.second; ++i)
             {
-              const string& n (i->first);
-              const rule& ru (i->second);
+              const auto& r (*i);
+              const string& n (r.first);
+              const rule& ru (r.second);
 
               if (&ru == skip)
                 continue;
 
               match_result m (false);
               {
-                auto g (
-                  make_exception_guard (
-                    [ra, &t, &n]()
-                    {
-                      info << "while matching rule " << n << " to "
-                           << diag_do (ra, t);
-                    }));
-
-                if (!(m = ru.match (ml, ra, t, hint)))
+                auto df = make_diag_frame (
+                  [ra, &t, &n](const diag_record& dr)
+                  {
+                    if (verb != 0)
+                      dr << info << "while matching rule " << n << " to "
+                         << diag_do (ra, t);
+                  });
+
+                if (!(m = ru.match (ra, t, hint)))
                   continue;
 
-                if (!m.recipe_action.valid ())
+                if (m.recipe_action.valid ())
+                  assert (m.recipe_action > ra);
+                else
                   m.recipe_action = ra; // Default, if not set.
               }
 
@@ -206,22 +359,25 @@ namespace build2
               bool ambig (false);
 
               diag_record dr;
-
               for (++i; i != rs.second; ++i)
               {
                 const string& n1 (i->first);
                 const rule& ru1 (i->second);
 
                 {
-                  auto g (
-                    make_exception_guard (
-                      [ra, &t, &n1]()
-                      {
-                        info << "while matching rule " << n1 << " to "
-                             << diag_do (ra, t);
-                      }));
-
-                  if (!ru1.match (ml, ra, t, hint))
+                  auto df = make_diag_frame (
+                    [ra, &t, &n1](const diag_record& dr)
+                    {
+                      if (verb != 0)
+                        dr << info << "while matching rule " << n1 << " to "
+                           << diag_do (ra, t);
+                    });
+
+                  // @@ TODO: this makes target state in match() undetermined
+                  //    so need to fortify rules that modify anything in match
+                  //    to clear things.
+                  //
+                  if (!ru1.match (ra, t, hint))
                     continue;
                 }
 
@@ -237,32 +393,9 @@ namespace build2
               }
 
               if (!ambig)
-              {
-                ra = m.recipe_action; // Use custom, if set.
-
-                if (apply)
-                {
-                  auto g (
-                    make_exception_guard (
-                      [ra, &t, &n]()
-                      {
-                        info << "while applying rule " << n << " to "
-                             << diag_do (ra, t);
-                      }));
-
-                  // @@ We could also allow the rule to change the recipe
-                  // action in apply(). Could be useful with delegates.
-                  //
-                  t.recipe (ra, ru.apply (ml, ra, t));
-                }
-                else
-                {
-                  r.first = &ru;
-                  r.second = move (m);
-                }
-
-                return r;
-              }
+                return pair<
+                  const pair<const string, reference_wrapper<const rule>>&,
+                  action> {r, m.recipe_action};
               else
                 dr << info << "use rule hint to disambiguate this match";
             }
@@ -280,80 +413,283 @@ namespace build2
     dr << endf;
   }
 
-  group_view
-  resolve_group_members_impl (slock& ml, action a, target& g)
+  recipe
+  apply_impl (target& t,
+              const pair<const string, reference_wrapper<const rule>>& r,
+              action a)
   {
-    group_view r;
+    auto df = make_diag_frame (
+      [a, &t, &r](const diag_record& dr)
+      {
+        if (verb != 0)
+          dr << info << "while applying rule " << r.first << " to "
+             << diag_do (a, t);
+      });
 
-    // Unless we already have a recipe, try matching the target to
-    // the rule.
+    // @@ We could also allow the rule to change the recipe action in
+    // apply(). Could be useful with delegates.
     //
-    if (!g.recipe (a))
+    return r.second.get ().apply (a, t);
+  }
+
+  // If step is true then perform only one step of the match/apply sequence.
+  //
+  static target_state
+  match_impl (action a, target_lock& l, bool step = false) noexcept
+  {
+    assert (l.target != nullptr);
+    target& t (*l.target);
+
+    try
     {
-      auto rp (match_impl (ml, a, g, false));
+      // Continue from where the target has been left off.
+      //
+      switch (l.offset)
+      {
+      case target::offset_touched:
+        {
+          // Match.
+          //
+          auto mr (match_impl (a, t, nullptr));
+          t.rule = &mr.first;
+          t.action = mr.second; // In case overriden.
+          l.offset = target::offset_matched;
 
-      r = g.group_members (a);
-      if (r.members != nullptr)
-        return r;
+          if (step)
+            return target_state::unknown; // t.state_ not set yet.
 
-      // That didn't help, so apply the rule and go to the building
-      // phase.
+          // Fall through.
+        }
+      case target::offset_matched:
+        {
+          // Apply.
+          //
+          t.recipe (apply_impl (t, *t.rule, t.action));
+          l.offset = target::offset_applied;
+          break;
+        }
+      default:
+        assert (false);
+      }
+    }
+    catch (const failed&)
+    {
+      // As a sanity measure clear the target data since it can be incomplete
+      // or invalid (mark()/unmark() should give you some for ideas).
       //
-      const match_result& mr (rp.second);
-      g.recipe (mr.recipe_action,
-                rp.first->apply (ml, mr.recipe_action, g));
+      t.clear_data ();
+      t.prerequisite_targets.clear ();
+
+      t.state_ = target_state::failed;
+      l.offset = target::offset_applied;
     }
 
-    // Note that we use execute_direct() rather than execute() here to
-    // sidestep the dependents count logic. In this context, this is by
-    // definition the first attempt to execute this rule (otherwise we
-    // would have already known the members list) and we really do need
-    // to execute it now.
+    return t.state_;
+  }
+
+  target_state
+  match (action a,
+         const target& ct,
+         size_t start_count,
+         atomic_count* task_count)
+  {
+    // If we are blocking then work our own queue one task at a time. The
+    // logic here is that we may have already queued other tasks before this
+    // one and there is nothing bad (except a potentially deep stack trace)
+    // about working through them while we wait. On the other hand, we want
+    // to continue as soon as the lock is available in order not to nest
+    // things unnecessarily.
     //
-    execute_direct (a, g);
+    target_lock l (
+      lock_impl (a,
+                 ct,
+                 task_count == nullptr
+                 ? optional<scheduler::work_queue> (scheduler::work_one)
+                 : nullopt));
+
+    if (l.target == nullptr)
+    {
+      // Already matched, executed, or busy.
+      //
+      if (l.offset >= target::offset_busy)
+        return target_state::busy;
+
+      // Fall through.
+    }
+    else if (l.offset != target::offset_applied) // Nothing to do if applied.
+    {
+      if (task_count == nullptr)
+        return match_impl (a, l);
+
+      // Pass "disassembled" lock since the scheduler queue doesn't support
+      // task destruction. Also pass our diagnostics stack (this is safe since
+      // we expect the caller to wait for completion before unwinding its diag
+      // stack).
+      //
+      if (sched.async (start_count,
+                       *task_count,
+                       [a] (target& t, size_t offset, const diag_frame* ds)
+                       {
+                         diag_frame df (ds);
+                         phase_lock pl (run_phase::match);
+                         {
+                           target_lock l {&t, offset}; // Reassemble.
+                           match_impl (a, l);
+                           // Unlock withing the match phase.
+                         }
+                       },
+                       ref (*l.release ()),
+                       l.offset,
+                       diag_frame::stack))
+        return target_state::postponed; // Queued.
 
-    r = g.group_members (a);
-    return r; // Might still be unresolved.
+      // Matched synchronously, fall through.
+    }
+
+    return ct.matched_state (a, false);
   }
 
-  void
-  search_and_match_prerequisites (slock& ml,
-                                  action a,
-                                  target& t,
-                                  const scope* s)
+  group_view
+  resolve_group_members_impl (action a, const target& g, target_lock l)
   {
-    for (prerequisite& p: group_prerequisites (t))
+    // Note that we will be unlocked if the target is already applied.
+    //
+    group_view r;
+
+    // Continue from where the target has been left off.
+    //
+    switch (l.offset)
     {
-      target& pt (search (p));
+    case target::offset_touched:
+      {
+        // Match (locked).
+        //
+        if (match_impl (a, l, true) == target_state::failed)
+          throw failed ();
+
+        if ((r = g.group_members (a)).members != nullptr)
+          break;
 
-      if (s == nullptr || pt.in (*s))
+        // Fall through to apply.
+      }
+    case target::offset_matched:
       {
-        match (ml, a, pt);
-        t.prerequisite_targets.push_back (&pt);
+        // Apply (locked).
+        //
+        if (match_impl (a, l, true) == target_state::failed)
+          throw failed ();
+
+        if ((r = g.group_members (a)).members != nullptr)
+          break;
+
+        // Unlock and fall through to execute.
+        //
+        l.unlock ();
       }
+    case target::offset_applied:
+      {
+        // Execute (unlocked).
+        //
+        // Note that we use execute_direct() rather than execute() here to
+        // sidestep the dependents count logic. In this context, this is by
+        // definition the first attempt to execute this rule (otherwise we
+        // would have already known the members list) and we really do need
+        // to execute it now.
+        //
+        {
+          phase_switch ps (run_phase::execute);
+          execute_direct (a, g);
+        }
+
+        r = g.group_members (a);
+        break;
+      }
+    }
+
+    return r;
+  }
+
+  template <typename R>
+  static void
+  match_prerequisite_range (action a, target& t, R&& r, const scope* s)
+  {
+    auto& pts (t.prerequisite_targets);
+
+    // Start asynchronous matching of prerequisites. Wait with unlocked phase
+    // to allow phase switching.
+    //
+    wait_guard wg (target::count_busy (), t.task_count, true);
+
+    size_t i (pts.size ()); // Index of the first to be added.
+    for (auto&& p: forward<R> (r))
+    {
+      const target& pt (search (p));
+
+      if (s != nullptr && !pt.in (*s))
+        continue;
+
+      match_async (a, pt, target::count_busy (), t.task_count);
+      pts.push_back (&pt);
     }
+
+    wg.wait ();
+
+    // Finish matching all the targets that we have started.
+    //
+    for (size_t n (pts.size ()); i != n; ++i)
+    {
+      const target& pt (*pts[i]);
+      match (a, pt);
+    }
+  }
+
+  void
+  match_prerequisites (action a, target& t, const scope* s)
+  {
+    match_prerequisite_range (a, t, group_prerequisites (t), s);
   }
 
   void
-  search_and_match_prerequisite_members (slock& ml,
-                                         action a,
-                                         target& t,
-                                         const scope* s)
+  match_prerequisite_members (action a, target& t, const scope* s)
   {
-    for (prerequisite_member p: group_prerequisite_members (ml, a, t))
+    match_prerequisite_range (a, t, group_prerequisite_members (a, t), s);
+  }
+
+  void
+  match_members (action a, target& t, const target* ts[], size_t n)
+  {
+    // Pretty much identical to match_prerequisite_range() except we don't
+    // search.
+    //
+    wait_guard wg (target::count_busy (), t.task_count, true);
+
+    for (size_t i (0); i != n; ++i)
     {
-      target& pt (p.search ());
+      const target* m (ts[i]);
 
-      if (s == nullptr || pt.in (*s))
-      {
-        match (ml, a, pt);
-        t.prerequisite_targets.push_back (&pt);
-      }
+      if (m == nullptr)
+        continue;
+
+      match_async (a, *m, target::count_busy (), t.task_count);
+    }
+
+    wg.wait ();
+
+    // Finish matching all the targets that we have started.
+    //
+    for (size_t i (0); i != n; ++i)
+    {
+      const target* m (ts[i]);
+
+      if (m == nullptr)
+        continue;
+
+      match (a, *m);
     }
   }
 
-  fsdir*
-  inject_fsdir (slock& ml, action a, target& t, bool parent)
+  const fsdir*
+  inject_fsdir (action a, target& t, bool parent)
   {
     tracer trace ("inject_fsdir");
 
@@ -381,24 +717,31 @@ namespace build2
 
     // Target is in the out tree, so out directory is empty.
     //
-    fsdir* r (&search<fsdir> (d, dir_path (), string (), nullptr, nullptr));
-    match (ml, a, *r);
+    const fsdir* r (
+      &search<fsdir> (d, dir_path (), string (), nullptr, nullptr));
+    match (a, *r);
     t.prerequisite_targets.emplace_back (r);
     return r;
   }
 
-  target_state
+  static target_state
   execute_impl (action a, target& t) noexcept
   {
-    // Task count should be count_executing.
-    //
-    assert (t.state_ == target_state::unknown);
+    assert (t.task_count.load (memory_order_consume) == target::count_busy ()
+            && t.state_ == target_state::unknown);
 
     target_state ts;
 
     try
     {
-      ts = t.recipe (a) (a, t);
+      auto df = make_diag_frame (
+        [a, &t](const diag_record& dr)
+        {
+          if (verb != 0)
+            dr << info << "while " << diag_doing (a, t);
+        });
+
+      ts = t.recipe_ (a, t);
 
       // See the recipe documentation for details on what's going on here.
       // Note that if the result is group, then the group's state can be
@@ -421,21 +764,16 @@ namespace build2
     }
     catch (const failed&)
     {
-      // The "how we got here" stack trace is useful but only during serial
-      // execution in the "stop going" mode. Otherwise, you not only get
-      // things interleaved, you may also get a whole bunch of such stacks.
-      //
-      if (verb != 0 && sched.serial () && !keep_going)
-        info << "while " << diag_doing (a, t);
-
       ts = t.state_ = target_state::failed;
     }
 
     // Decrement the task count (to count_executed) and wake up any threads
     // that might be waiting for this target.
     //
-    size_t tc (t.task_count--);
-    assert (tc == target::count_executing);
+    size_t tc (t.task_count.fetch_sub (
+                 target::offset_busy - target::offset_executed,
+                 memory_order_release));
+    assert (tc == target::count_busy ());
     sched.resume (t.task_count);
 
     return ts;
@@ -449,17 +787,11 @@ namespace build2
   {
     target& t (const_cast<target&> (ct)); // MT-aware.
 
-    // text << "E " << t << ": " << t.dependents << " " << dependency_count;
-
     // Update dependency counts and make sure they are not skew.
     //
-    size_t td (t.dependents--);
-#ifndef NDEBUG
-    size_t gd (dependency_count--);
+    size_t td (t.dependents.fetch_sub (1, memory_order_release));
+    size_t gd (dependency_count.fetch_sub (1, memory_order_release));
     assert (td != 0 && gd != 0);
-#else
-    dependency_count.fetch_sub (1, std::memory_order_release);
-#endif
     td--;
 
     // Handle the "last" execution mode.
@@ -486,39 +818,52 @@ namespace build2
     if (current_mode == execution_mode::last && td != 0)
       return target_state::postponed;
 
-    // Try to atomically change unexecuted to executing.
+    // Try to atomically change applied to busy. Note that we are in the
+    // execution phase so the target shall not be spin-locked.
     //
-    size_t tc (target::count_unexecuted);
-    if (t.task_count.compare_exchange_strong (tc, target::count_executing))
+    size_t tc (target::count_applied ());
+    if (t.task_count.compare_exchange_strong (
+          tc,
+          target::count_busy (),
+          memory_order_acq_rel,  // Synchronize on success.
+          memory_order_acquire)) // Synchronize on failure.
     {
-      if (task_count == nullptr)
-        return execute_impl (a, t);
-
-      //text << this_thread::get_id () << " async " << t;
-
-      if (sched.async (start_count,
-                       *task_count,
-                       [a] (target& t)
-                       {
-                         //text << this_thread::get_id () << " deque " << t;
-                         execute_impl (a, t); // Note: noexcept.
-                       },
-                       ref (t)))
-        return target_state::unknown; // Queued.
+      if (t.state_ != target_state::unchanged) // Noop recipe.
+      {
+        if (task_count == nullptr)
+          return execute_impl (a, t);
 
-      // Executed synchronously, fall through.
+        // Pass our diagnostics stack (this is safe since we expect the caller
+        // to wait for completion before unwinding its diag stack).
+        //
+        if (sched.async (start_count,
+                         *task_count,
+                         [a] (target& t, const diag_frame* ds)
+                         {
+                           diag_frame df (ds);
+                           execute_impl (a, t); // Note: noexcept.
+                         },
+                         ref (t),
+                         diag_frame::stack))
+          return target_state::unknown; // Queued.
+
+        // Executed synchronously, fall through.
+      }
+      else
+      {
+        t.task_count.store (target::count_executed (), memory_order_release);
+        sched.resume (t.task_count);
+      }
     }
     else
     {
-      switch (tc)
-      {
-      case target::count_unexecuted: assert (false);
-      case target::count_executed:   break;
-      default:                       return target_state::busy;
-      }
+      // Should be either busy or already executed.
+      //
+      if (tc >= target::count_busy ()) return target_state::busy;
+      else assert (tc == target::count_executed ());
     }
 
-    return t.synchronized_state (false);
+    return t.executed_state (false);
   }
 
   target_state
@@ -526,70 +871,54 @@ namespace build2
   {
     target& t (const_cast<target&> (ct)); // MT-aware.
 
-    size_t tc (target::count_unexecuted);
-    if (t.task_count.compare_exchange_strong (tc, target::count_executing))
+    size_t busy (target::count_busy ());
+    size_t exec (target::count_executed ());
+
+    size_t tc (target::count_applied ());
+    if (t.task_count.compare_exchange_strong (
+          tc,
+          busy,
+          memory_order_acq_rel,  // Synchronize on success.
+          memory_order_acquire)) // Synchronize on failure.
     {
-      execute_impl (a, t);
+      if (t.state_ != target_state::unchanged) // Noop recipe.
+        execute_impl (a, t);
+      else
+      {
+        t.task_count.store (exec, memory_order_release);
+        sched.resume (t.task_count);
+      }
     }
     else
     {
       // If the target is busy, wait for it.
       //
-      switch (tc)
-      {
-      case target::count_unexecuted: assert (false);
-      case target::count_executed: break;
-      default: sched.wait (target::count_executed, t.task_count, false);
-      }
+      if (tc >= busy)
+        sched.wait (exec, t.task_count, scheduler::work_none);
+      else
+        assert (tc == exec);
     }
 
-    return t.synchronized_state ();
+    return t.executed_state ();
   }
 
-  // We use the last bit of a pointer to target in prerequisite_targets as a
-  // flag to indicate whether the target was already busy. Probably not worth
-  // it (we are saving an atomic load), but what the hell.
+  // We use the target pointer mark mechanism to indicate whether the target
+  // was already busy. Probably not worth it (we are saving an atomic load),
+  // but what the hell. Note that this means we have to always "harvest" all
+  // the targets to clear the mark.
   //
-  // VC15 doesn't like if we use (abstract) target here.
-  //
-  static_assert (alignof (file) % 2 == 0, "unexpected target alignment");
-
-  static inline void
-  set_busy (const target*& p)
-  {
-    uintptr_t i (reinterpret_cast<uintptr_t> (p));
-    i |= 0x01;
-    p = reinterpret_cast<const target*> (i);
-  }
-
-  static inline bool
-  get_busy (const target*& p)
-  {
-    uintptr_t i (reinterpret_cast<uintptr_t> (p));
-
-    if ((i & 0x01) != 0)
-    {
-      i &= ~uintptr_t (0x01);
-      p = reinterpret_cast<const target*> (i);
-      return true;
-    }
-
-    return false;
-  }
-
   target_state
   straight_execute_members (action a,
                             const target& t,
-                            const target* ts[],
-                            size_t n)
+                            const target* ts[], size_t n)
   {
     target_state r (target_state::unchanged);
 
-    // Start asynchronous execution of prerequisites keeping track of how many
-    // we have handled.
+    // Start asynchronous execution of prerequisites.
     //
-    size_t i (0);
-    for (; i != n; ++i)
+    wait_guard wg (target::count_busy (), t.task_count);
+
+    for (size_t i (0); i != n; ++i)
     {
       const target*& mt (ts[i]);
 
@@ -598,7 +927,7 @@ namespace build2
 
       target_state s (
         execute_async (
-          a, *mt, target::count_executing, t.task_count));
+          a, *mt, target::count_busy (), t.task_count));
 
       if (s == target_state::postponed)
       {
@@ -606,55 +935,45 @@ namespace build2
         mt = nullptr;
       }
       else if (s == target_state::busy)
-        set_busy (mt);
-      //
-      // Bail out if the target has failed and we weren't instructed to
-      // keep going.
-      //
-      else if (s == target_state::failed && !keep_going)
-      {
-        ++i;
-        break;
-      }
+        mark (mt);
     }
-    sched.wait (target::count_executing, t.task_count);
+
+    wg.wait ();
 
     // Now all the targets in prerequisite_targets must be executed and
     // synchronized (and we have blanked out all the postponed ones).
     //
-    for (size_t j (0); j != i; ++j)
+    for (size_t i (0); i != n; ++i)
     {
-      const target*& mt (ts[j]);
+      const target*& mt (ts[i]);
 
       if (mt == nullptr)
         continue;
 
       // If the target was already busy, wait for its completion.
       //
-      if (get_busy (mt))
-        sched.wait (target::count_executed, mt->task_count, false);
+      if (unmark (mt))
+        sched.wait (
+          target::count_executed (), mt->task_count, scheduler::work_none);
 
-      r |= mt->synchronized_state (false);
+      r |= mt->executed_state ();
     }
 
-    if (r == target_state::failed)
-      throw failed ();
-
     return r;
   }
 
   target_state
   reverse_execute_members (action a,
                            const target& t,
-                           const target* ts[],
-                           size_t n)
+                           const target* ts[], size_t n)
   {
     // Pretty much as straight_execute_members() but in reverse order.
     //
     target_state r (target_state::unchanged);
 
-    size_t i (n);
-    for (; i != 0; --i)
+    wait_guard wg (target::count_busy (), t.task_count);
+
+    for (size_t i (n); i != 0; --i)
     {
       const target*& mt (ts[i - 1]);
 
@@ -663,7 +982,7 @@ namespace build2
 
       target_state s (
         execute_async (
-          a, *mt, target::count_executing, t.task_count));
+          a, *mt, target::count_busy (), t.task_count));
 
       if (s == target_state::postponed)
       {
@@ -671,31 +990,25 @@ namespace build2
         mt = nullptr;
       }
       else if (s == target_state::busy)
-        set_busy (mt);
-      else if (s == target_state::failed && !keep_going)
-      {
-        --i;
-        break;
-      }
+        mark (mt);
     }
-    sched.wait (target::count_executing, t.task_count);
 
-    for (size_t j (n); j != i; --j)
+    wg.wait ();
+
+    for (size_t i (n); i != 0; --i)
     {
-      const target*& mt (ts[j - 1]);
+      const target*& mt (ts[i - 1]);
 
       if (mt == nullptr)
         continue;
 
-      if (get_busy (mt))
-        sched.wait (target::count_executed, mt->task_count, false);
+      if (unmark (mt))
+        sched.wait (
+          target::count_executed (), mt->task_count, scheduler::work_none);
 
-      r |= mt->synchronized_state (false);
+      r |= mt->executed_state ();
     }
 
-    if (r == target_state::failed)
-      throw failed ();
-
     return r;
   }
 
@@ -706,21 +1019,22 @@ namespace build2
   {
     assert (current_mode == execution_mode::first);
 
+    auto& pts (const_cast<target&> (t).prerequisite_targets); // MT-aware.
+
     // Pretty much as straight_execute_members() but hairier.
     //
     target_state rs (target_state::unchanged);
 
-    size_t i (0);
-    for (size_t n (t.prerequisite_targets.size ()); i != n; ++i)
-    {
-      const target*& pt (t.prerequisite_targets[i]);
+    wait_guard wg (target::count_busy (), t.task_count);
 
+    for (const target*& pt : pts)
+    {
       if (pt == nullptr) // Skipped.
         continue;
 
       target_state s (
         execute_async (
-          a, *pt, target::count_executing, t.task_count));
+          a, *pt, target::count_busy (), t.task_count));
 
       if (s == target_state::postponed)
       {
@@ -728,38 +1042,28 @@ namespace build2
         pt = nullptr;
       }
       else if (s == target_state::busy)
-        set_busy (pt);
-      else if (s == target_state::failed && !keep_going)
-      {
-        ++i;
-        break;
-      }
+        mark (pt);
     }
-    sched.wait (target::count_executing, t.task_count);
+
+    wg.wait ();
 
     bool e (mt == timestamp_nonexistent);
     const target* rt (tt != nullptr ? nullptr : &t);
 
-    for (size_t j (0); j != i; ++j)
+    for (const target*& pt : pts)
     {
-      const target*& pt (t.prerequisite_targets[j]);
-
       if (pt == nullptr)
         continue;
 
       // If the target was already busy, wait for its completion.
       //
-      if (get_busy (pt))
-        sched.wait (target::count_executed, pt->task_count, false);
+      if (unmark (pt))
+        sched.wait (
+          target::count_executed (), pt->task_count, scheduler::work_none);
 
-      target_state s (pt->synchronized_state (false));
+      target_state s (pt->executed_state ());
       rs |= s;
 
-      // Don't bother with the rest if we are failing.
-      //
-      if (rs == target_state::failed)
-        continue;
-
       // Should we compare the timestamp to this target's?
       //
       if (!e && (!pf || pf (*pt)))
@@ -789,9 +1093,6 @@ namespace build2
         rt = pt;
     }
 
-    if (rs == target_state::failed)
-      throw failed ();
-
     assert (rt != nullptr);
     return make_pair (e ? rt : nullptr, rs);
   }
@@ -810,8 +1111,10 @@ namespace build2
     // If the group is busy, we wait, similar to prerequisites.
     //
     const target& g (*t.group);
+
     if (execute (a, g) == target_state::busy)
-      sched.wait (target::count_executed, g.task_count, false);
+      sched.wait (
+        target::count_executed (), g.task_count, scheduler::work_none);
 
     // Indicate to execute() that this target's state comes from the group
     // (which, BTW, can be failed).
@@ -839,6 +1142,7 @@ namespace build2
     path ep;
 
     auto clean = [&er, &ed, &ep] (const file& f,
+                                  const path* fp,
                                   initializer_list<const char*> es)
     {
       for (const char* e: es)
@@ -860,7 +1164,13 @@ namespace build2
           if ((d = (e[n - 1] == '/')))
             --n;
 
-          p = f.path ();
+          if (fp == nullptr)
+          {
+            fp = &f.path ();
+            assert (!fp->empty ()); // Must be assigned.
+          }
+
+          p = *fp;
           for (; *e == '-'; ++e)
             p = p.base ();
 
@@ -909,28 +1219,27 @@ namespace build2
     auto ei (extra.begin ()), ee (extra.end ());
 
     if (ei != ee)
-      clean (ft, *ei++);
+      clean (ft, nullptr, *ei++);
 
     // Now clean the ad hoc group file members, if any.
     //
     for (const target* m (ft.member); m != nullptr; m = m->member)
     {
       const file* fm (dynamic_cast<const file*> (m));
+      const path* fp (fm != nullptr ? &fm->path () : nullptr);
 
-      if (fm == nullptr || fm->path ().empty ())
+      if (fm == nullptr || fp->empty ())
         continue;
 
       if (ei != ee)
-        clean (*fm, *ei++);
-
-      const path& f (fm->path ());
+        clean (*fm, fp, *ei++);
 
-      target_state r (rmfile (f, 3)
+      target_state r (rmfile (*fp, 3)
                       ? target_state::changed
                       : target_state::unchanged);
 
       if (r == target_state::changed && ep.empty ())
-        ep = f;
+        ep = *fp;
 
       er |= r;
     }
@@ -938,7 +1247,7 @@ namespace build2
     // Now clean the primary target and its prerequisited in the reverse order
     // of update: first remove the file, then clean the prerequisites.
     //
-    target_state tr (rmfile (ft.path (), ft)
+    target_state tr (rmfile (ft.path (), ft) // Path must be assigned.
                      ? target_state::changed
                      : target_state::unchanged);
 
diff --git a/build2/algorithm.ixx b/build2/algorithm.ixx
index 6bbe4bb..83aa050 100644
--- a/build2/algorithm.ixx
+++ b/build2/algorithm.ixx
@@ -7,18 +7,29 @@
 
 namespace build2
 {
-  inline target&
-  search (prerequisite& p)
+  inline const target&
+  search (const prerequisite& p)
   {
-    assert (phase == run_phase::search_match);
+    assert (phase == run_phase::match);
 
-    if (p.target == nullptr)
-      p.target = &search (p.key ());
+    const target* r (p.target.load (memory_order_consume));
 
-    return *p.target;
+    if (r == nullptr)
+    {
+      r = &search (p.key ());
+
+      const target* e (nullptr);
+      if (!p.target.compare_exchange_strong (
+            e, r,
+            memory_order_release,
+            memory_order_consume))
+        assert (e == r);
+    }
+
+    return *r;
   }
 
-  inline target&
+  inline const target&
   search (const target_type& t, const prerequisite_key& k)
   {
     return search (
@@ -26,7 +37,7 @@ namespace build2
         k.proj, {&t, k.tk.dir, k.tk.out, k.tk.name, k.tk.ext}, k.scope});
   }
 
-  inline target&
+  inline const target&
   search (const target_type& type,
           const dir_path& dir,
           const dir_path& out,
@@ -49,7 +60,7 @@ namespace build2
   }
 
   template <typename T>
-  inline T&
+  inline const T&
   search (const dir_path& dir,
           const dir_path& out,
           const string& name,
@@ -60,110 +71,235 @@ namespace build2
       T::static_type, dir, out, name, ext, scope).template as<T> ();
   }
 
-  pair<const rule*, match_result>
-  match_impl (slock&, action, target&, bool apply, const rule* skip = nullptr);
+  target_lock
+  lock_impl (action, const target&, optional<scheduler::work_queue>);
 
-  inline bool
-  match (slock& ml, action a, target& t)
+  void
+  unlock_impl (target&, size_t);
+
+  inline void target_lock::
+  unlock ()
+  {
+    if (target != nullptr)
+    {
+      unlock_impl (*target, offset);
+      target = nullptr;
+    }
+  }
+
+  inline target* target_lock::
+  release ()
+  {
+    target_type* r (target);
+    target = nullptr;
+    return r;
+  }
+
+  inline target_lock::
+  ~target_lock ()
+  {
+    unlock ();
+  }
+
+  inline target_lock::
+  target_lock (target_lock&& x)
+      : target (x.release ()), offset (x.offset)
+  {
+  }
+
+  inline target_lock& target_lock::
+  operator= (target_lock&& x)
+  {
+    if (this != &x)
+    {
+      unlock ();
+      target = x.release ();
+      offset = x.offset;
+    }
+    return *this;
+  }
+
+  inline target_lock
+  lock (action a, const target& t)
+  {
+    // We don't allow locking a target that has already been matched.
+    //
+    target_lock r (lock_impl (a, t, scheduler::work_none));
+    assert (!r || r.offset == target::offset_touched);
+    return r;
+  }
+
+  pair<const pair<const string, reference_wrapper<const rule>>&, action>
+  match_impl (action, target&, const rule* skip);
+
+  recipe
+  apply_impl (target&,
+              const pair<const string, reference_wrapper<const rule>>&,
+              action);
+
+  target_state
+  match (action, const target&, size_t, atomic_count*);
+
+  inline target_state
+  match (action a, const target& t, bool fail)
   {
-    assert (phase == run_phase::search_match);
+    assert (phase == run_phase::match);
+
+    target_state s (match (a, t, 0, nullptr));
 
-    if (!t.recipe (a))
-      match_impl (ml, a, t, true);
+    if (fail && s == target_state::failed)
+      throw failed ();
 
-    dependency_count.fetch_add (1, std::memory_order_release);
-    bool r (t.dependents++ != 0); // Safe if someone else is also a dependent.
+    dependency_count.fetch_add (1, memory_order_release);
+    t.dependents.fetch_add (1, memory_order_release);
 
-    // text << "M " << t << ": " << t.dependents << " " << dependency_count;
+    return s;
+  }
+
+  inline bool
+  match (action a, const target& t, unmatch um)
+  {
+    assert (phase == run_phase::match);
+
+    target_state s (match (a, t, 0, nullptr));
+
+    if (s == target_state::failed)
+      throw failed ();
+
+    switch (um)
+    {
+    case unmatch::none: break;
+    case unmatch::unchanged:
+      {
+        if (s == target_state::unchanged)
+          return true;
+
+        break;
+      }
+    case unmatch::safe:
+      {
+        // Safe if unchanged or someone else is also a dependent.
+        //
+        if (s == target_state::unchanged                ||
+            t.dependents.load (memory_order_consume) != 0)
+          return true;
+
+        break;
+      }
+    }
+
+    dependency_count.fetch_add (1, memory_order_release);
+    t.dependents.fetch_add (1, memory_order_release);
+
+    return false;
+  }
+
+  inline target_state
+  match_async (action a, const target& t,
+               size_t sc, atomic_count& tc,
+               bool fail)
+  {
+    assert (phase == run_phase::match);
+    target_state r (match (a, t, sc, &tc));
+
+    if (fail && !keep_going && r == target_state::failed)
+      throw failed ();
 
     return r;
   }
 
   inline void
-  unmatch (action, target& t)
+  match_recipe (target_lock& l, recipe r)
   {
-    // text << "U " << t << ": " << t.dependents << " " << dependency_count;
-
-    assert (phase == run_phase::search_match);
-
-#ifndef NDEBUG
-    size_t td (t.dependents--);
-    size_t gd (dependency_count--);
-    assert (td != 0 && gd != 0);
-#else
-    t.dependents.fetch_sub (1, std::memory_order_release);
-    dependency_count.fetch_sub (1, std::memory_order_release);
-#endif
+    assert (phase == run_phase::match && l.target != nullptr);
+
+    target& t (*l.target);
+    t.rule = nullptr; // No rule.
+    t.recipe (move (r));
+    l.offset = target::offset_applied;
   }
 
   inline pair<recipe, action>
-  match_delegate (slock& ml, action a, target& t, const rule& r)
+  match_delegate (action a, target& t, const rule& r)
   {
-    assert (phase == run_phase::search_match);
-
-    auto rp (match_impl (ml, a, t, false, &r));
-    const match_result& mr (rp.second);
-    return make_pair (rp.first->apply (ml, mr.recipe_action, t),
-                      mr.recipe_action);
+    assert (phase == run_phase::match);
+    auto mr (match_impl (a, t, &r));
+    return make_pair (apply_impl (t, mr.first, mr.second), mr.second);
   }
 
   group_view
-  resolve_group_members_impl (slock& ml, action, target&);
+  resolve_group_members_impl (action, const target&, target_lock);
 
   inline group_view
-  resolve_group_members (slock& ml, action a, target& g)
+  resolve_group_members (action a, const target& g)
   {
-    assert (phase == run_phase::search_match);
+    group_view r;
+
+    // We can be called during execute though everything should have been
+    // already resolved.
+    //
+    switch (phase)
+    {
+    case run_phase::match:
+      {
+        // Grab a target lock to make sure the group state is synchronized.
+        //
+        target_lock l (lock_impl (a, g, scheduler::work_none));
+        r = g.group_members (a);
+
+        // If the group members are alrealy known or there is nothing else
+        // we can do, then unlock and return.
+        //
+        if (r.members == nullptr && l.offset != target::offset_executed)
+          r = resolve_group_members_impl (a, g, move (l));
+
+        break;
+      }
+    case run_phase::execute: r = g.group_members (a); break;
+    case run_phase::load:    assert (false);
+    }
 
-    group_view r (g.group_members (a));
-    return r.members != nullptr ? r : resolve_group_members_impl (ml, a, g);
+    return r;
   }
 
   void
-  search_and_match_prerequisites (slock&, action, target&, const scope*);
+  match_prerequisites (action, target&, const scope*);
 
   void
-  search_and_match_prerequisite_members (slock&, action, target&, const scope*);
+  match_prerequisite_members (action, target&, const scope*);
 
   inline void
-  search_and_match_prerequisites (slock& ml, action a, target& t)
+  match_prerequisites (action a, target& t)
   {
-    search_and_match_prerequisites (
-      ml,
+    match_prerequisites (
       a,
       t,
       (a.operation () != clean_id ? nullptr : &t.root_scope ()));
   }
 
   inline void
-  search_and_match_prerequisite_members (slock& ml, action a, target& t)
+  match_prerequisite_members (action a, target& t)
   {
     if (a.operation () != clean_id)
-      search_and_match_prerequisite_members (ml, a, t, nullptr);
+      match_prerequisite_members (a, t, nullptr);
     else
       // Note that here we don't iterate over members even for see-
       // through groups since the group target should clean eveything
       // up. A bit of an optimization.
       //
-      search_and_match_prerequisites (ml, a, t, &t.root_scope ());
+      match_prerequisites (a, t, &t.root_scope ());
   }
 
   inline void
-  search_and_match_prerequisites (slock& ml,
-                                  action a,
-                                  target& t,
-                                  const scope& s)
+  match_prerequisites (action a, target& t, const scope& s)
   {
-    search_and_match_prerequisites (ml, a, t, &s);
+    match_prerequisites (a, t, &s);
   }
 
   inline void
-  search_and_match_prerequisite_members (slock& ml,
-                                         action a,
-                                         target& t,
-                                         const scope& s)
+  match_prerequisite_members (action a, target& t, const scope& s)
   {
-    search_and_match_prerequisite_members (ml, a, t, &s);
+    match_prerequisite_members (a, t, &s);
   }
 
   target_state
@@ -176,9 +312,16 @@ namespace build2
   }
 
   inline target_state
-  execute_async (action a, const target& t, size_t sc, atomic_count& tc)
+  execute_async (action a, const target& t,
+                 size_t sc, atomic_count& tc,
+                 bool fail)
   {
-    return execute (a, t, sc, &tc);
+    target_state r (execute (a, t, sc, &tc));
+
+    if (fail && !keep_going && r == target_state::failed)
+      throw failed ();
+
+    return r;
   }
 
   inline target_state
@@ -190,21 +333,21 @@ namespace build2
   inline target_state
   straight_execute_prerequisites (action a, const target& t)
   {
-    auto& p (t.prerequisite_targets);
+    auto& p (const_cast<target&> (t).prerequisite_targets); // MT-aware.
     return straight_execute_members (a, t, p.data (), p.size ());
   }
 
   inline target_state
   reverse_execute_prerequisites (action a, const target& t)
   {
-    auto& p (t.prerequisite_targets);
+    auto& p (const_cast<target&> (t).prerequisite_targets); // MT-aware.
     return reverse_execute_members (a, t, p.data (), p.size ());
   }
 
   inline target_state
   execute_prerequisites (action a, const target& t)
   {
-    auto& p (t.prerequisite_targets);
+    auto& p (const_cast<target&> (t).prerequisite_targets); // MT-aware.
     return execute_members (a, t, p.data (), p.size ());
   }
 
diff --git a/build2/b-options b/build2/b-options
index 9f787cf..c692cae 100644
--- a/build2/b-options
+++ b/build2/b-options
@@ -416,6 +416,12 @@ namespace build2
     bool
     jobs_specified () const;
 
+    const size_t&
+    max_jobs () const;
+
+    bool
+    max_jobs_specified () const;
+
     const bool&
     serial_stop () const;
 
@@ -487,6 +493,8 @@ namespace build2
     bool verbose_specified_;
     size_t jobs_;
     bool jobs_specified_;
+    size_t max_jobs_;
+    bool max_jobs_specified_;
     bool serial_stop_;
     bool no_column_;
     bool no_line_;
diff --git a/build2/b-options.cxx b/build2/b-options.cxx
index 8a133dd..1118eb4 100644
--- a/build2/b-options.cxx
+++ b/build2/b-options.cxx
@@ -576,6 +576,8 @@ namespace build2
     verbose_specified_ (false),
     jobs_ (),
     jobs_specified_ (false),
+    max_jobs_ (),
+    max_jobs_specified_ (false),
     serial_stop_ (),
     no_column_ (),
     no_line_ (),
@@ -689,12 +691,18 @@ namespace build2
        << "                     6. Even more detailed information, including state dumps." << ::std::endl;
 
     os << std::endl
-       << "\033[1m--jobs\033[0m|\033[1m-j\033[0m \033[4mnum\033[0m        Number of jobs to perform in parallel. This includes both" << ::std::endl
-       << "                     the number of active threads inside the build system as" << ::std::endl
-       << "                     well as the number of external commands (compilers," << ::std::endl
-       << "                     linkers, etc) started but not yet finished. If this option" << ::std::endl
-       << "                     is not specified, then the number of available hardware" << ::std::endl
-       << "                     threads is used." << ::std::endl;
+       << "\033[1m--jobs\033[0m|\033[1m-j\033[0m \033[4mnum\033[0m        Number of active jobs to perform in parallel. This" << ::std::endl
+       << "                     includes both the number of active threads inside the" << ::std::endl
+       << "                     build system as well as the number of external commands" << ::std::endl
+       << "                     (compilers, linkers, etc) started but not yet finished. If" << ::std::endl
+       << "                     this option is not specified, then the number of available" << ::std::endl
+       << "                     hardware threads is used." << ::std::endl;
+
+    os << std::endl
+       << "\033[1m--max-jobs\033[0m|\033[1m-J\033[0m \033[4mnum\033[0m    Maximum number of jobs (threads) to create. The default is" << ::std::endl
+       << "                     16x the number of active jobs (--jobs|j\033[0m) on 32-bit" << ::std::endl
+       << "                     architectures and 32x on 64-bit. See the build system" << ::std::endl
+       << "                     scheduler implementation for details." << ::std::endl;
 
     os << std::endl
        << "\033[1m--serial-stop\033[0m|\033[1m-s\033[0m     Run serially and stop at the first error. This mode is" << ::std::endl
@@ -783,6 +791,12 @@ namespace build2
       _cli_options_map_["-j"] = 
       &::build2::cl::thunk< options, size_t, &options::jobs_,
         &options::jobs_specified_ >;
+      _cli_options_map_["--max-jobs"] = 
+      &::build2::cl::thunk< options, size_t, &options::max_jobs_,
+        &options::max_jobs_specified_ >;
+      _cli_options_map_["-J"] = 
+      &::build2::cl::thunk< options, size_t, &options::max_jobs_,
+        &options::max_jobs_specified_ >;
       _cli_options_map_["--serial-stop"] = 
       &::build2::cl::thunk< options, bool, &options::serial_stop_ >;
       _cli_options_map_["-s"] = 
diff --git a/build2/b-options.ixx b/build2/b-options.ixx
index 61d0f11..c230f7c 100644
--- a/build2/b-options.ixx
+++ b/build2/b-options.ixx
@@ -258,6 +258,18 @@ namespace build2
     return this->jobs_specified_;
   }
 
+  inline const size_t& options::
+  max_jobs () const
+  {
+    return this->max_jobs_;
+  }
+
+  inline bool options::
+  max_jobs_specified () const
+  {
+    return this->max_jobs_specified_;
+  }
+
   inline const bool& options::
   serial_stop () const
   {
diff --git a/build2/b.cli b/build2/b.cli
index 6e6800f..d711109 100644
--- a/build2/b.cli
+++ b/build2/b.cli
@@ -244,11 +244,19 @@ namespace build2
     size_t --jobs|-j
     {
       "<num>",
-      "Number of jobs to perform in parallel. This includes both the number of
-       active threads inside the build system as well as the number of external
-       commands (compilers, linkers, etc) started but not yet finished. If this
-       option is not specified, then the number of available hardware threads
-       is used."
+      "Number of active jobs to perform in parallel. This includes both the
+       number of active threads inside the build system as well as the number
+       of external commands (compilers, linkers, etc) started but not yet
+       finished. If this option is not specified, then the number of available
+       hardware threads is used."
+    }
+
+    size_t --max-jobs|-J
+    {
+      "<num>",
+      "Maximum number of jobs (threads) to create. The default is 16x the
+       number of active jobs (\c{--jobs|j}) on 32-bit architectures and 32x
+       on 64-bit. See the build system scheduler implementation for details."
     }
 
     bool --serial-stop|-s
diff --git a/build2/b.cxx b/build2/b.cxx
index 01d9378..2ada09b 100644
--- a/build2/b.cxx
+++ b/build2/b.cxx
@@ -32,7 +32,6 @@
 #include <build2/diagnostics>
 #include <build2/context>
 #include <build2/variable>
-#include <build2/scheduler>
 
 #include <build2/parser>
 
@@ -294,7 +293,17 @@ main (int argc, char* argv[])
       jobs = 1;
     }
 
-    sched.startup (jobs);
+    size_t max_jobs (0);
+
+    if (ops.max_jobs_specified ())
+    {
+      max_jobs = ops.max_jobs ();
+
+      if (max_jobs != 0 && max_jobs < jobs)
+        fail << "invalid --max-jobs|-J value";
+    }
+
+    sched.startup (jobs, 1, max_jobs);
 
     variable_cache_mutex_shard_size = sched.shard_size ();
     variable_cache_mutex_shard.reset (
@@ -1042,7 +1051,6 @@ main (int argc, char* argv[])
             mif->operation_pre (pre_oid); // Cannot be translated.
 
           set_current_oif (*pre_oif, oif);
-          dependency_count = 0;
 
           action a (mid, pre_oid, oid);
 
@@ -1057,7 +1065,6 @@ main (int argc, char* argv[])
         }
 
         set_current_oif (*oif);
-        dependency_count = 0;
 
         action a (mid, oid, 0);
 
@@ -1073,7 +1080,6 @@ main (int argc, char* argv[])
             mif->operation_pre (post_oid); // Cannot be translated.
 
           set_current_oif (*post_oif, oif);
-          dependency_count = 0;
 
           action a (mid, post_oid, oid);
 
@@ -1118,6 +1124,11 @@ main (int argc, char* argv[])
   //
   scheduler::stat st (sched.shutdown ());
 
+  // In our world we wait for all the tasks to complete, even in case of a
+  // failure (see, for example, wait_guard).
+  //
+  assert (st.task_queue_remain == 0);
+
   if (verb >= (st.thread_max_active > 1 ? 3 : 4))
   {
     info << "scheduler statistics:" << "\n\n"
diff --git a/build2/bin/rule b/build2/bin/rule
index 5fe9069..95747c2 100644
--- a/build2/bin/rule
+++ b/build2/bin/rule
@@ -20,10 +20,10 @@ namespace build2
       obj_rule () {}
 
       virtual match_result
-      match (slock&, action, target&, const string& hint) const override;
+      match (action, target&, const string&) const override;
 
       virtual recipe
-      apply (slock&, action, target&) const override;
+      apply (action, target&) const override;
     };
 
     class lib_rule: public rule
@@ -32,10 +32,10 @@ namespace build2
       lib_rule () {}
 
       virtual match_result
-      match (slock&, action, target&, const string& hint) const override;
+      match (action, target&, const string&) const override;
 
       virtual recipe
-      apply (slock&, action, target&) const override;
+      apply (action, target&) const override;
 
       static target_state
       perform (action, const target&);
diff --git a/build2/bin/rule.cxx b/build2/bin/rule.cxx
index 446f91b..3ea8e75 100644
--- a/build2/bin/rule.cxx
+++ b/build2/bin/rule.cxx
@@ -20,7 +20,7 @@ namespace build2
     // obj
     //
     match_result obj_rule::
-    match (slock&, action a, target& t, const string&) const
+    match (action a, target& t, const string&) const
     {
       fail << diag_doing (a, t) << " target group" <<
         info << "explicitly select obje{}, obja{}, or objs{} member";
@@ -29,29 +29,20 @@ namespace build2
     }
 
     recipe obj_rule::
-    apply (slock&, action, target&) const {return empty_recipe;}
+    apply (action, target&) const {return empty_recipe;}
 
     // lib
     //
     // The whole logic is pretty much as if we had our two group members as
     // our prerequisites.
     //
-
-    struct match_data
-    {
-      const string& type;
-    };
-
-    static_assert (sizeof (match_data) <= target::data_size,
-                   "insufficient space");
-
     match_result lib_rule::
-    match (slock&, action act, target& xt, const string&) const
+    match (action act, target& xt, const string&) const
     {
       lib& t (xt.as<lib> ());
 
-      // Get the library type to build. If not set for a target, this
-      // should be configured at the project scope by init().
+      // Get the library type to build. If not set for a target, this should
+      // be configured at the project scope by init().
       //
       const string& type (cast<string> (t["bin.lib"]));
 
@@ -62,7 +53,8 @@ namespace build2
         fail << "unknown library type: " << type <<
           info << "'static', 'shared', or 'both' expected";
 
-      t.data (match_data {type}); // Save in the target's auxilary storage.
+      t.a = a ? &search<liba> (t.dir, t.out, t.name) : nullptr;
+      t.s = s ? &search<libs> (t.dir, t.out, t.name) : nullptr;
 
       match_result mr (true);
 
@@ -76,31 +68,12 @@ namespace build2
     }
 
     recipe lib_rule::
-    apply (slock& ml, action act, target& xt) const
+    apply (action act, target& xt) const
     {
       lib& t (xt.as<lib> ());
 
-      const match_data& md (t.data<match_data> ());
-      const string& type (md.type);
-
-      bool a (type == "static" || type == "both");
-      bool s (type == "shared" || type == "both");
-
-      if (a)
-      {
-        if (t.a == nullptr)
-          t.a = &search<liba> (t.dir, t.out, t.name, nullptr, nullptr);
-
-        build2::match (ml, act, *t.a);
-      }
-
-      if (s)
-      {
-        if (t.s == nullptr)
-          t.s = &search<libs> (t.dir, t.out, t.name, nullptr, nullptr);
-
-        build2::match (ml, act, *t.s);
-      }
+      const target* m[] = {t.a, t.s};
+      match_members (act, t, m);
 
       return &perform;
     }
@@ -110,13 +83,7 @@ namespace build2
     {
       const lib& t (xt.as<lib> ());
 
-      const match_data& md (t.data<match_data> ());
-      const string& type (md.type);
-
-      bool a (type == "static" || type == "both");
-      bool s (type == "shared" || type == "both");
-
-      const target* m[] = {a ? t.a : nullptr, s ? t.s : nullptr};
+      const target* m[] = {t.a, t.s};
       return execute_members (act, t, m);
     }
   }
diff --git a/build2/bin/target b/build2/bin/target
index 4157f67..35bde60 100644
--- a/build2/bin/target
+++ b/build2/bin/target
@@ -51,12 +51,6 @@ namespace build2
     public:
       using target::target;
 
-      // Group members.
-      //
-      const_ptr<obje> e = nullptr;
-      const_ptr<obja> a = nullptr;
-      const_ptr<objs> s = nullptr;
-
     public:
       static const target_type static_type;
       virtual const target_type& dynamic_type () const {return static_type;}
@@ -81,26 +75,32 @@ namespace build2
 
     public:
       static const target_type static_type;
-      virtual const target_type& dynamic_type () const {return static_type;}
+
+      virtual const target_type&
+      dynamic_type () const override {return static_type;}
+    };
+
+    // Standard layout type compatible with group_view's const target*[2].
+    //
+    struct lib_members
+    {
+      const liba* a = nullptr;
+      const libs* s = nullptr;
     };
 
-    class lib: public target
+    class lib: public target, public lib_members
     {
     public:
       using target::target;
 
-      // Group members.
-      //
-      const_ptr<liba> a = nullptr;
-      const_ptr<libs> s = nullptr;
+      virtual group_view
+      group_members (action_type) const override;
 
     public:
       static const target_type static_type;
 
-      virtual const target_type& dynamic_type () const override
-      {
-        return static_type;
-      }
+      virtual const target_type&
+      dynamic_type () const override {return static_type;}
     };
 
     // Windows import library.
diff --git a/build2/bin/target.cxx b/build2/bin/target.cxx
index ea0a1ac..cf187fa 100644
--- a/build2/bin/target.cxx
+++ b/build2/bin/target.cxx
@@ -10,83 +10,58 @@ namespace build2
 {
   namespace bin
   {
+    // Note that we link groups during the load phase since this is often
+    // relied upon when setting target-specific variables (e.g., we may set a
+    // common value for lib{} and then append liba/libs-specific values to
+    // it). While sure inelegant, this is MT-safe since during load we are
+    // running serial. For the members it is also safe to set the group during
+    // creation.
+
     extern const char ext_var[] = "extension"; // VC14 rejects constexpr.
 
+    template <typename T>
     static pair<target*, optional<string>>
-    obje_factory (const target_type&,
+    objx_factory (const target_type&,
                   dir_path dir,
                   dir_path out,
                   string n,
                   optional<string> ext)
     {
-      obj* o (targets.find<obj> (dir, out, n));
-      obje* e (new obje (move (dir), move (out), move (n)));
+      const obj* g (targets.find<obj> (dir, out, n));
 
-      if ((e->group = o) != nullptr)
-        o->e = e;
+      T* x (new T (move (dir), move (out), move (n)));
+      x->group = g;
 
-      return make_pair (e, move (ext));
+      return make_pair (x, move (ext));
     }
 
     const target_type obje::static_type
     {
       "obje",
       &file::static_type,
-      &obje_factory,
+      &objx_factory<obje>,
       &target_extension_var<ext_var, nullptr>,
       nullptr,
       &search_target, // Note: not _file(); don't look for an existing file.
       false
     };
 
-    static pair<target*, optional<string>>
-    obja_factory (const target_type&,
-                  dir_path dir,
-                  dir_path out,
-                  string n,
-                  optional<string> ext)
-    {
-      obj* o (targets.find<obj> (dir, out, n));
-      obja* a (new obja (move (dir), move (out), move (n)));
-
-      if ((a->group = o) != nullptr)
-        o->a = a;
-
-      return make_pair (a, move (ext));
-    }
-
     const target_type obja::static_type
     {
       "obja",
       &file::static_type,
-      &obja_factory,
+      &objx_factory<obja>,
       &target_extension_var<ext_var, nullptr>,
       nullptr,
       &search_target, // Note: not _file(); don't look for an existing file.
       false
     };
 
-    static pair<target*, optional<string>>
-    objs_factory (const target_type&,
-                   dir_path dir,
-                   dir_path out,
-                   string n,
-                   optional<string> ext)
-    {
-      obj* o (targets.find<obj> (dir, out, n));
-      objs* s (new objs (move (dir), move (out), move (n)));
-
-      if ((s->group = o) != nullptr)
-        o->s = s;
-
-      return make_pair (s, move (ext));
-    }
-
     const target_type objs::static_type
     {
       "objs",
       &file::static_type,
-      &objs_factory,
+      &objx_factory<objs>,
       &target_extension_var<ext_var, nullptr>,
       nullptr,
       &search_target, // Note: not _file(); don't look for an existing file.
@@ -100,20 +75,23 @@ namespace build2
                  string n,
                  optional<string> ext)
     {
-      obje* e (targets.find<obje> (dir, out, n));
-      obja* a (targets.find<obja> (dir, out, n));
-      objs* s (targets.find<objs> (dir, out, n));
+      // Casts are MT-aware (during serial load).
+      //
+      obje* e (phase == run_phase::load
+               ? const_cast<obje*> (targets.find<obje> (dir, out, n))
+               : nullptr);
+      obja* a (phase == run_phase::load
+               ? const_cast<obja*> (targets.find<obja> (dir, out, n))
+               : nullptr);
+      objs* s (phase == run_phase::load
+               ? const_cast<objs*> (targets.find<objs> (dir, out, n))
+               : nullptr);
 
       obj* o (new obj (move (dir), move (out), move (n)));
 
-      if ((o->e = e) != nullptr)
-        e->group = o;
-
-      if ((o->a = a)!= nullptr)
-        a->group = o;
-
-      if ((o->s = s)!= nullptr)
-        s->group = o;
+      if (e != nullptr) e->group = o;
+      if (a != nullptr) a->group = o;
+      if (s != nullptr) s->group = o;
 
       return make_pair (o, move (ext));
     }
@@ -129,26 +107,22 @@ namespace build2
       false
     };
 
+    template <typename T>
     static pair<target*, optional<string>>
-    liba_factory (const target_type& t,
-                  dir_path d,
-                  dir_path o,
+    libx_factory (const target_type&,
+                  dir_path dir,
+                  dir_path out,
                   string n,
                   optional<string> ext)
     {
-      // Only link-up to the group if the types match exactly.
-      //
-      lib* l (t == liba::static_type ? targets.find<lib> (d, o, n) : nullptr);
-      liba* a (new liba (move (d), move (o), move (n)));
+      const lib* g (targets.find<lib> (dir, out, n));
 
-      if ((a->group = l) != nullptr)
-        l->a = a;
+      T* x (new T (move (dir), move (out), move (n)));
+      x->group = g;
 
-      return make_pair (a, move (ext));
+      return make_pair (x, move (ext));
     }
 
-    // @@
-    //
     // What extensions should we use? At the outset, this is platform-
     // dependent. And if we consider cross-compilation, is it build or
     // host-dependent? Feels like it should be host-dependent so that
@@ -163,36 +137,18 @@ namespace build2
     {
       "liba",
       &file::static_type,
-      &liba_factory,
+      &libx_factory<liba>,
       &target_extension_var<ext_var, nullptr>,
       nullptr,
       &search_file,
       false
     };
 
-    static pair<target*, optional<string>>
-    libs_factory (const target_type& t,
-                  dir_path d,
-                  dir_path o,
-                  string n,
-                  optional<string> ext)
-    {
-      // Only link-up to the group if the types match exactly.
-      //
-      lib* l (t == libs::static_type ? targets.find<lib> (d, o, n) : nullptr);
-      libs* s (new libs (move (d), move (o), move (n)));
-
-      if ((s->group = l) != nullptr)
-        l->s = s;
-
-      return make_pair (s, move (ext));
-    }
-
     const target_type libs::static_type
     {
       "libs",
       &file::static_type,
-      &libs_factory,
+      &libx_factory<libs>,
       &target_extension_var<ext_var, nullptr>,
       nullptr,
       &search_file,
@@ -201,23 +157,37 @@ namespace build2
 
     // lib
     //
+    group_view lib::
+    group_members (action_type) const
+    {
+      static_assert (sizeof (lib_members) == sizeof (const target*) * 2,
+                     "member layout incompatible with array");
+
+      return a != nullptr || s != nullptr
+        ? group_view {reinterpret_cast<const target* const*> (&a), 2}
+        : group_view {nullptr, 0};
+    }
+
     static pair<target*, optional<string>>
     lib_factory (const target_type&,
-                 dir_path d,
-                 dir_path o,
+                 dir_path dir,
+                 dir_path out,
                  string n,
                  optional<string> ext)
     {
-      liba* a (targets.find<liba> (d, o, n));
-      libs* s (targets.find<libs> (d, o, n));
-
-      lib* l (new lib (move (d), move (o), move (n)));
+      // Casts are MT-aware (during serial load).
+      //
+      liba* a (phase == run_phase::load
+               ? const_cast<liba*> (targets.find<liba> (dir, out, n))
+               : nullptr);
+      libs* s (phase == run_phase::load
+               ? const_cast<libs*> (targets.find<libs> (dir, out, n))
+               : nullptr);
 
-      if ((l->a = a) != nullptr)
-        a->group = l;
+      lib* l (new lib (move (dir), move (out), move (n)));
 
-      if ((l->s = s) != nullptr)
-        s->group = l;
+      if (a != nullptr) a->group = l;
+      if (s != nullptr) s->group = l;
 
       return make_pair (l, move (ext));
     }
diff --git a/build2/buildfile b/build2/buildfile
index 80d72a2..a809a8b 100644
--- a/build2/buildfile
+++ b/build2/buildfile
@@ -8,7 +8,7 @@ exe{b}:                                                   \
             {hxx ixx     cxx}{ algorithm                } \
             {            cxx}{ b                        } \
             {hxx ixx     cxx}{ b-options                } \
-            {hxx         cxx}{ context                  } \
+            {hxx ixx     cxx}{ context                  } \
             {hxx         cxx}{ depdb                    } \
             {hxx         cxx}{ diagnostics              } \
             {hxx         cxx}{ dump                     } \
diff --git a/build2/cc/common b/build2/cc/common
index c631f38..5a459b8 100644
--- a/build2/cc/common
+++ b/build2/cc/common
@@ -185,6 +185,7 @@ namespace build2
     public:
       void
       process_libraries (
+        action,
         const scope&,
         lorder,
         const dir_paths&,
@@ -195,44 +196,70 @@ namespace build2
         const function<void (const file&, const string&, bool, bool)>&,
         bool = false) const;
 
-      target*
-      search_library (const dir_paths& sysd,
+      const target*
+      search_library (action act,
+                      const dir_paths& sysd,
                       optional<dir_paths>& usrd,
-                      prerequisite& p) const
+                      const prerequisite& p) const
       {
-        if (p.target == nullptr) // First check the cache.
-          p.target = search_library (sysd, usrd, p.key ());
-
-        return p.target;
+        const target* r (p.target.load (memory_order_consume));
+
+        if (r == nullptr)
+        {
+          if ((r = search_library (act, sysd, usrd, p.key ())) != nullptr)
+          {
+            const target* e (nullptr);
+            if (!p.target.compare_exchange_strong (
+                  e, r,
+                  memory_order_release,
+                  memory_order_consume))
+              assert (e == r);
+          }
+        }
+
+        return r;
       }
 
-    private:
+    public:
       const file&
-      resolve_library (const scope&,
+      resolve_library (action,
+                       const scope&,
                        name,
                        lorder,
                        const dir_paths&,
                        optional<dir_paths>&) const;
 
+      template <typename T>
+      static ulock
+      insert_library (T*&,
+                      const string&,
+                      const dir_path&,
+                      optional<string>,
+                      bool,
+                      tracer&);
+
       target*
-      search_library (const dir_paths&,
+      search_library (action,
+                      const dir_paths&,
                       optional<dir_paths>&,
                       const prerequisite_key&,
                       bool existing = false) const;
 
       const target*
-      search_library_existing (const dir_paths& sysd,
+      search_library_existing (action act,
+                               const dir_paths& sysd,
                                optional<dir_paths>& usrd,
                                const prerequisite_key& pk) const
       {
-        return search_library (sysd, usrd, pk, true);
+        return search_library (act, sysd, usrd, pk, true);
       }
 
       dir_paths
       extract_library_dirs (const scope&) const;
 
       bool
-      pkgconfig_extract (const scope&,
+      pkgconfig_extract (action,
+                         const scope&,
                          bin::lib&,
                          bin::liba*,
                          bin::libs*,
diff --git a/build2/cc/common.cxx b/build2/cc/common.cxx
index 7b8499c..88eb45d 100644
--- a/build2/cc/common.cxx
+++ b/build2/cc/common.cxx
@@ -46,6 +46,7 @@ namespace build2
     //
     void common::
     process_libraries (
+      action act,
       const scope& top_bs,
       lorder top_lo,
       const dir_paths& top_sysd,
@@ -174,13 +175,14 @@ namespace build2
       //
       if (self && proc_lib)
       {
-        const string& p (l.path ().string ());
+        const path& p (l.path ());
+        assert (!p.empty ()); // Must be assigned.
 
         bool s (t != nullptr // If cc library (matched or imported).
                 ? cast_false<bool> (l.vars[c_system])
-                : sys (top_sysd, p));
+                : sys (top_sysd, p.string ()));
 
-        proc_lib (&l, p, s);
+        proc_lib (&l, p.string (), s);
       }
 
       const scope& bs (t == nullptr || cc ? top_bs : l.base_scope ());
@@ -224,7 +226,7 @@ namespace build2
             if (sysd == nullptr) find_sysd ();
             if (!lo) find_lo ();
 
-            process_libraries (bs, *lo, *sysd,
+            process_libraries (act, bs, *lo, *sysd,
                                *f, a,
                                proc_impl, proc_lib, proc_opt, true);
           }
@@ -262,7 +264,7 @@ namespace build2
                        &proc_impl, &proc_lib, &proc_opt,
                        &sysd, &usrd,
                        &find_sysd, &find_lo, &sys, &sys_simple,
-                       &bs, &lo, this] (const lookup& lu)
+                       &bs, act, &lo, this] (const lookup& lu)
       {
         const vector<name>* ns (cast_null<vector<name>> (lu));
         if (ns == nullptr || ns->empty ())
@@ -287,7 +289,7 @@ namespace build2
             if (sysd == nullptr) find_sysd ();
             if (!lo) find_lo ();
 
-            const file& t (resolve_library (bs, n, *lo, *sysd, usrd));
+            const file& t (resolve_library (act, bs, n, *lo, *sysd, usrd));
 
             if (proc_lib)
             {
@@ -300,7 +302,7 @@ namespace build2
               // on Windows import-installed DLLs may legally have empty
               // paths.
               //
-              if (t.mtime (false) == timestamp_unknown)
+              if (t.mtime () == timestamp_unknown)
                 fail << "interface dependency " << t << " is out of date" <<
                   info << "mentioned in *.export.libs of target " << l <<
                   info << "is it a prerequisite of " << l << "?";
@@ -308,7 +310,7 @@ namespace build2
 
             // Process it recursively.
             //
-            process_libraries (bs, *lo, *sysd,
+            process_libraries (act, bs, *lo, *sysd,
                                t, t.is_a<liba> (),
                                proc_impl, proc_lib, proc_opt, true);
           }
@@ -386,7 +388,8 @@ namespace build2
     // that's the only way to guarantee it will be up-to-date.
     //
     const file& common::
-    resolve_library (const scope& s,
+    resolve_library (action act,
+                     const scope& s,
                      name n,
                      lorder lo,
                      const dir_paths& sysd,
@@ -422,7 +425,7 @@ namespace build2
         //
         dir_path out;
         prerequisite_key pk {n.proj, {tt, &n.dir, &out, &n.value, ext}, &s};
-        xt = search_library_existing (sysd, usrd, pk);
+        xt = search_library_existing (act, sysd, usrd, pk);
 
         if (xt == nullptr)
         {
@@ -437,17 +440,41 @@ namespace build2
       // If this is lib{}, pick appropriate member.
       //
       if (const lib* l = xt->is_a<lib> ())
-        xt = &link_member (*l, lo); // Pick liba{} or libs{}.
+        xt = &link_member (*l, act, lo); // Pick liba{} or libs{}.
 
       return xt->as<file> ();
     }
 
-    // Note that pk's scope should not be NULL (even if dir is absolute). If
-    // sys is not NULL, then store there an inidication of whether this is a
-    // system library.
+    // Insert a target verifying that it already exists if requested. Return
+    // the lock.
+    //
+    template <typename T>
+    ulock common::
+    insert_library (T*& r,
+                    const string& name,
+                    const dir_path& d,
+                    optional<string> ext,
+                    bool exist,
+                    tracer& trace)
+    {
+      auto p (targets.insert_locked (T::static_type,
+                                     d,
+                                     dir_path (),
+                                     name,
+                                     move (ext),
+                                     true, // Implied.
+                                     trace));
+
+      assert (!exist || !p.second.owns_lock ());
+      r = &p.first.template as<T> ();
+      return move (p.second);
+    }
+
+    // Note that pk's scope should not be NULL (even if dir is absolute).
     //
     target* common::
-    search_library (const dir_paths& sysd,
+    search_library (action act,
+                    const dir_paths& sysd,
                     optional<dir_paths>& usrd,
                     const prerequisite_key& p,
                     bool exist) const
@@ -546,31 +573,11 @@ namespace build2
       path f; // Reuse the buffer.
       const dir_path* pd (nullptr);
 
-      // Insert a target verifying that it already exists if requested.
-      //
-      auto insert = [&name, exist, &trace] (auto*& r,
-                                            const dir_path& d,
-                                            optional<string> ext)
-      {
-        using T = typename std::remove_reference<decltype (*r)>::type;
-
-        auto p (targets.insert (T::static_type,
-                                d,
-                                dir_path (),
-                                name,
-                                move (ext),
-                                true, // Implied.
-                                trace));
-
-        assert (!exist || !p.second);
-        r = &p.first.template as<T> ();
-      };
-
       auto search =[&a, &s,
                     &an, &ae,
                     &sn, &se,
                     &name, ext,
-                    &p, &f, &insert, exist, this] (const dir_path& d) -> bool
+                    &p, &f, exist, &trace, this] (const dir_path& d) -> bool
       {
         timestamp mt;
 
@@ -593,17 +600,22 @@ namespace build2
             //
             if (tclass == "windows")
             {
-              insert (s, d, nullopt);
+              libi* i (nullptr);
+              insert_library (i, name, d, se, exist, trace);
+
+              ulock l (insert_library (s, name, d, nullopt, exist, trace));
 
-              if (s->member == nullptr)
+              if (!exist)
               {
-                libi* i;
-                insert (i, d, se);
+                if (l.owns_lock ())
+                  s->member = i;
+                else
+                  assert (s->member == i);
 
-                if (i->path ().empty ())
-                  i->path (move (f));
+                l.unlock ();
 
                 i->mtime (mt);
+                i->path (move (f));
 
                 // Presumably there is a DLL somewhere, we just don't know
                 // where (and its possible we might have to look for one if we
@@ -612,17 +624,15 @@ namespace build2
                 // but valid timestamp (aka "trust me, it's there").
                 //
                 s->mtime (mt);
-                s->member = i;
+                s->path (path ());
               }
             }
             else
             {
-              insert (s, d, se);
-
-              if (s->path ().empty ())
-                s->path (move (f));
+              insert_library (s, name, d, se, exist, trace);
 
               s->mtime (mt);
+              s->path (move (f));
             }
           }
           else if (!ext && tsys == "mingw32")
@@ -639,12 +649,10 @@ namespace build2
 
             if (mt != timestamp_nonexistent)
             {
-              insert (s, d, se);
-
-              if (s->path ().empty ())
-                s->path (move (f));
+              insert_library (s, name, d, se, exist, trace);
 
               s->mtime (mt);
+              s->path (move (f));
             }
           }
         }
@@ -666,12 +674,9 @@ namespace build2
             // Note that this target is outside any project which we treat
             // as out trees.
             //
-            insert (a, d, ae);
-
-            if (a->path ().empty ())
-              a->path (move (f));
-
+            insert_library (a, name, d, ae, exist, trace);
             a->mtime (mt);
+            a->path (move (f));
           }
         }
 
@@ -727,29 +732,44 @@ namespace build2
       if (pd == nullptr)
         return nullptr;
 
-      // Enter (or find) the lib{} target group. Note that we must be careful
-      // here since its possible we have already imported some of its members.
+      // Enter (or find) the lib{} target group.
       //
       lib* lt;
-      insert (lt, *pd, l ? p.tk.ext : nullopt);
+      insert_library (lt, name, *pd, l ? p.tk.ext : nullopt, exist, trace);
 
-      // It should automatically link-up to the members we have found.
+      // Result.
       //
-      assert (a == nullptr || lt->a == a);
-      assert (s == nullptr || lt->s == s);
+      target* r (l ? lt : (p.is_a<liba> () ? static_cast<target*> (a) : s));
+
+      // Assume the rest is already done if existing.
+      //
+      if (exist)
+        return r;
 
-      // Update the bin.lib variable to indicate what's available. Assume
-      // already done if existing.
+      // If we cannot acquire the lock then this mean the target has already
+      // been matched (though not clear by whom) and we assume all of this
+      // has already been done.
       //
-      if (!exist)
+      target_lock ll (lock (act, *lt));
+
+      // Set lib{} group members to indicate what's available. Note that we
+      // must be careful here since its possible we have already imported some
+      // of its members.
+      //
+      if (ll)
       {
-        const char* bl (lt->a != nullptr
-                        ? (lt->s != nullptr ? "both" : "static")
-                        : "shared");
-        lt->assign (var_pool["bin.lib"]) = bl;
+        if (a != nullptr) lt->a = a;
+        if (s != nullptr) lt->s = s;
       }
 
-      target* r (l ? lt : (p.is_a<liba> () ? static_cast<target*> (a) : s));
+      target_lock al (a != nullptr ? lock (act, *a) : target_lock ());
+      target_lock sl (s != nullptr ? lock (act, *s) : target_lock ());
+
+      if (!al) a = nullptr;
+      if (!sl) s = nullptr;
+
+      if (a != nullptr) a->group = lt;
+      if (s != nullptr) s->group = lt;
 
       // Mark as a "cc" library (unless already marked) and set the system
       // flag.
@@ -769,24 +789,16 @@ namespace build2
         return p.second;
       };
 
-      // If the library already has cc.type, then assume it was either already
-      // imported or was matched by a rule.
-      //
-      // Assume already done if existing.
+      // If the library already has cc.type, then assume it was either
+      // already imported or was matched by a rule.
       //
-      if (!exist)
-      {
-        if (a != nullptr && !mark_cc (*a))
-          a = nullptr;
-
-        if (s != nullptr && !mark_cc (*s))
-          s = nullptr;
-      }
+      if (a != nullptr && !mark_cc (*a)) a = nullptr;
+      if (s != nullptr && !mark_cc (*s)) s = nullptr;
 
       // Add the "using static/shared library" macro (used, for example, to
-      // handle DLL export). The absence of either of these macros would mean
-      // some other build system that cannot distinguish between the two (and
-      // no pkg-config information).
+      // handle DLL export). The absence of either of these macros would
+      // mean some other build system that cannot distinguish between the
+      // two (and no pkg-config information).
       //
       auto add_macro = [this] (target& t, const char* suffix)
       {
@@ -832,17 +844,15 @@ namespace build2
         }
       };
 
-      // Assume already done if existing.
-      //
-      if (!exist && (a != nullptr || s != nullptr))
+      if (ll && (a != nullptr || s != nullptr))
       {
-        // Try to extract library information from pkg-config. We only add the
-        // default macro if we could not extract more precise information. The
-        // idea is that when we auto-generate .pc files, we will copy those
-        // macros (or custom ones) from *.export.poptions.
+        // Try to extract library information from pkg-config. We only add
+        // the default macro if we could not extract more precise
+        // information. The idea is that when we auto-generate .pc files, we
+        // will copy those macros (or custom ones) from *.export.poptions.
         //
-        if (pkgconfig == nullptr ||
-            !pkgconfig_extract (*p.scope, *lt, a, s, p.proj, name, *pd, sysd))
+        if (pkgconfig == nullptr || !pkgconfig_extract (
+              act, *p.scope, *lt, a, s, p.proj, name, *pd, sysd))
         {
           if (a != nullptr) add_macro (*a, "STATIC");
           if (s != nullptr) add_macro (*s, "SHARED");
diff --git a/build2/cc/compile b/build2/cc/compile
index 63ce286..2986b7d 100644
--- a/build2/cc/compile
+++ b/build2/cc/compile
@@ -27,10 +27,10 @@ namespace build2
       compile (data&&);
 
       virtual match_result
-      match (slock&, action, target&, const string& hint) const override;
+      match (action, target&, const string&) const override;
 
       virtual recipe
-      apply (slock&, action, target&) const override;
+      apply (action, target&) const override;
 
       target_state
       perform_update (action, const target&) const;
@@ -43,13 +43,13 @@ namespace build2
       append_lib_options (const scope&,
                           cstrings&,
                           const target&,
-                          lorder) const;
+                          action, lorder) const;
 
       void
       hash_lib_options (const scope&,
                         sha256&,
                         const target&,
-                        lorder) const;
+                        action, lorder) const;
 
       // Mapping of include prefixes (e.g., foo in <foo/bar>) for auto-
       // generated headers to directories where they will be generated.
@@ -67,10 +67,13 @@ namespace build2
       append_prefixes (prefix_map&, const target&, const variable&) const;
 
       void
-      append_lib_prefixes (const scope&, prefix_map&, target&, lorder) const;
+      append_lib_prefixes (const scope&,
+                           prefix_map&,
+                           target&,
+                           action, lorder) const;
 
       prefix_map
-      build_prefix_map (const scope&, target&, lorder) const;
+      build_prefix_map (const scope&, target&, action, lorder) const;
 
       // Reverse-lookup target type from extension.
       //
@@ -80,7 +83,7 @@ namespace build2
       // Header dependency injection.
       //
       void
-      inject (slock&, action, target&, lorder, file&, depdb&) const;
+      inject (action, target&, lorder, const file&, depdb&) const;
 
     private:
       const string rule_id;
diff --git a/build2/cc/compile.cxx b/build2/cc/compile.cxx
index c04f0a9..f202ba1 100644
--- a/build2/cc/compile.cxx
+++ b/build2/cc/compile.cxx
@@ -38,13 +38,14 @@ namespace build2
     struct match_data
     {
       prerequisite_member src;
+      timestamp dd_mtime; // depdb mtime, timestamp_nonexistent if outdated.
     };
 
     static_assert (sizeof (match_data) <= target::data_size,
                    "insufficient space");
 
     match_result compile::
-    match (slock& ml, action a, target& t, const string&) const
+    match (action act, target& t, const string&) const
     {
       tracer trace (x, "compile::match");
 
@@ -54,16 +55,23 @@ namespace build2
       // - if path already assigned, verify extension?
       //
 
+      // Link-up to our group (this is the obj{} target group protocol which
+      // means this can be done whether we match or not).
+      //
+      if (t.group == nullptr)
+        t.group = targets.find<obj> (t.dir, t.out, t.name);
+
       // See if we have a source file. Iterate in reverse so that a source
       // file specified for an obj*{} member overrides the one specified for
       // the group. Also "see through" groups.
       //
-      for (prerequisite_member p:
-             reverse_group_prerequisite_members (ml, a, t))
+      for (prerequisite_member p: reverse_group_prerequisite_members (act, t))
       {
         if (p.is_a (x_src))
         {
-          t.data (match_data {p}); // Save in the target's auxilary storage.
+          // Save in the target's auxilary storage.
+          //
+          t.data (match_data {p, timestamp_nonexistent});
           return true;
         }
       }
@@ -79,6 +87,7 @@ namespace build2
     append_lib_options (const scope& bs,
                         cstrings& args,
                         const target& t,
+                        action act,
                         lorder lo) const
     {
       auto opt = [&args, this] (
@@ -103,18 +112,20 @@ namespace build2
 
       // Note that here we don't need to see group members (see apply()).
       //
-      for (const prerequisite& p: const_group_prerequisites (t))
+      for (const prerequisite& p: group_prerequisites (t))
       {
-        const target* pt (p.target); // Already searched and matched.
+        // Should be already searched and matched.
+        //
+        const target* pt (p.target.load (memory_order_consume));
 
         bool a;
 
         if (const lib* l = pt->is_a<lib> ())
-          a = (pt = &link_member (*l, lo))->is_a<liba> ();
+          a = (pt = &link_member (*l, act, lo))->is_a<liba> ();
         else if (!(a = pt->is_a<liba> ()) && !pt->is_a<libs> ())
           continue;
 
-        process_libraries (bs, lo, sys_lib_dirs,
+        process_libraries (act, bs, lo, sys_lib_dirs,
                            pt->as<file> (), a,
                            nullptr, nullptr, optf);
       }
@@ -124,6 +135,7 @@ namespace build2
     hash_lib_options (const scope& bs,
                       sha256& cs,
                       const target& t,
+                      action act,
                       lorder lo) const
     {
       auto opt = [&cs, this] (
@@ -143,18 +155,20 @@ namespace build2
       //
       const function<void (const file&, const string&, bool, bool)> optf (opt);
 
-      for (const prerequisite& p: const_group_prerequisites (t))
+      for (const prerequisite& p: group_prerequisites (t))
       {
-        const target* pt (p.target); // Already searched and matched.
+        // Should be already searched and matched.
+        //
+        const target* pt (p.target.load (memory_order_consume));
 
         bool a;
 
         if (const lib* l = pt->is_a<lib> ())
-          a = (pt = &link_member (*l, lo))->is_a<liba> ();
+          a = (pt = &link_member (*l, act, lo))->is_a<liba> ();
         else if (!(a = pt->is_a<liba> ()) && !pt->is_a<libs> ())
           continue;
 
-        process_libraries (bs, lo, sys_lib_dirs,
+        process_libraries (act, bs, lo, sys_lib_dirs,
                            pt->as<file> (), a,
                            nullptr, nullptr, optf);
       }
@@ -167,6 +181,7 @@ namespace build2
     append_lib_prefixes (const scope& bs,
                          prefix_map& m,
                          target& t,
+                         action act,
                          lorder lo) const
     {
       auto opt = [&m, this] (
@@ -186,30 +201,32 @@ namespace build2
       //
       const function<void (const file&, const string&, bool, bool)> optf (opt);
 
-      for (prerequisite& p: group_prerequisites (t))
+      for (const prerequisite& p: group_prerequisites (t))
       {
-        target* pt (p.target); // Already searched and matched.
+        // Should be already searched and matched.
+        //
+        const target* pt (p.target.load (memory_order_consume));
 
         bool a;
 
-        if (lib* l = pt->is_a<lib> ())
-          a = (pt = &link_member (*l, lo))->is_a<liba> ();
+        if (const lib* l = pt->is_a<lib> ())
+          a = (pt = &link_member (*l, act, lo))->is_a<liba> ();
         else if (!(a = pt->is_a<liba> ()) && !pt->is_a<libs> ())
           continue;
 
-        process_libraries (bs, lo, sys_lib_dirs,
+        process_libraries (act, bs, lo, sys_lib_dirs,
                            pt->as<file> (), a,
                            nullptr, nullptr, optf);
       }
     }
 
     recipe compile::
-    apply (slock& ml, action a, target& xt) const
+    apply (action act, target& xt) const
     {
       tracer trace (x, "compile::apply");
 
       file& t (xt.as<file> ());
-      const match_data& md (t.data<match_data> ());
+      match_data& md (t.data<match_data> ());
 
       const scope& bs (t.base_scope ());
       const scope& rs (*bs.root_scope ());
@@ -219,71 +236,77 @@ namespace build2
 
       // Derive file name from target name.
       //
-      if (t.path ().empty ())
-      {
-        const char* e (nullptr);
+      const char* e (nullptr);
 
-        if (tsys == "win32-msvc")
+      if (tsys == "win32-msvc")
+      {
+        switch (ct)
         {
-          switch (ct)
-          {
-          case otype::e: e = "exe.obj"; break;
-          case otype::a: e = "lib.obj"; break;
-          case otype::s: e = "dll.obj"; break;
-          }
+        case otype::e: e = "exe.obj"; break;
+        case otype::a: e = "lib.obj"; break;
+        case otype::s: e = "dll.obj"; break;
         }
-        else if (tsys == "mingw32")
+      }
+      else if (tsys == "mingw32")
+      {
+        switch (ct)
         {
-          switch (ct)
-          {
-          case otype::e: e = "exe.o"; break;
-          case otype::a: e = "a.o";   break;
-          case otype::s: e = "dll.o"; break;
-          }
+        case otype::e: e = "exe.o"; break;
+        case otype::a: e = "a.o";   break;
+        case otype::s: e = "dll.o"; break;
         }
-        else if (tsys == "darwin")
+      }
+      else if (tsys == "darwin")
+      {
+        switch (ct)
         {
-          switch (ct)
-          {
-          case otype::e: e = "o";       break;
-          case otype::a: e = "a.o";     break;
-          case otype::s: e = "dylib.o"; break;
-          }
+        case otype::e: e = "o";       break;
+        case otype::a: e = "a.o";     break;
+        case otype::s: e = "dylib.o"; break;
         }
-        else
+      }
+      else
+      {
+        switch (ct)
         {
-          switch (ct)
-          {
-          case otype::e: e = "o"; break;
-          case otype::a: e = "a.o"; break;
-          case otype::s: e = "so.o"; break;
-          }
+        case otype::e: e = "o"; break;
+        case otype::a: e = "a.o"; break;
+        case otype::s: e = "so.o"; break;
         }
-
-        t.derive_path (e);
       }
 
+      const path& tp (t.derive_path (e));
+
       // Inject dependency on the output directory.
       //
-      fsdir* dir (inject_fsdir (ml, a, t));
+      const fsdir* dir (inject_fsdir (act, t));
 
-      // Search and match all the existing prerequisites. The injection code
-      // takes care of the ones it is adding.
+      // Match all the existing prerequisites. The injection code takes care
+      // of the ones it is adding.
       //
       // When cleaning, ignore prerequisites that are not in the same or a
       // subdirectory of our project root.
       //
+      auto& pts (t.prerequisite_targets);
       optional<dir_paths> usr_lib_dirs; // Extract lazily.
 
-      for (prerequisite_member p: group_prerequisite_members (ml, a, t))
+      // Start asynchronous matching of prerequisites. Wait with unlocked
+      // phase to allow phase switching.
+      //
+      wait_guard wg (target::count_busy (), t.task_count, true);
+
+      size_t start (pts.size ()); // Index of the first to be added.
+      for (prerequisite_member p: group_prerequisite_members (act, t))
       {
+        const target* pt (nullptr);
+
         // A dependency on a library is there so that we can get its
         // *.export.poptions. This is the "library meta-information
         // protocol". See also append_lib_options().
         //
         if (p.is_a<lib> () || p.is_a<liba> () || p.is_a<libs> ())
         {
-          if (a.operation () == update_id)
+          if (act.operation () == update_id)
           {
             // Handle imported libraries. We know that for such libraries we
             // don't need to do match() in order to get options (if any, they
@@ -291,53 +314,68 @@ namespace build2
             //
             if (p.proj ())
             {
-              if (search_library (sys_lib_dirs,
+              if (search_library (act,
+                                  sys_lib_dirs,
                                   usr_lib_dirs,
                                   p.prerequisite) != nullptr)
                 continue;
             }
 
-            target* pt (&p.search ());
+            pt = &p.search ();
 
-            if (lib* l = pt->is_a<lib> ())
-              pt = &link_member (*l, lo);
-
-            // Making sure it is executed before us will only restrict
-            // parallelism. But we do need to match it in order to get its
-            // imports resolved and prerequisite_targets populated. So we
-            // match it but then unmatch if it is safe. And thanks to the
-            // two-pass prerequisite search & match in link::apply() it will
-            // be safe unless someone is building an obj?{} target directory.
-            //
-            if (build2::match (ml, a, *pt))
-              unmatch (a, *pt);
-            else
-              t.prerequisite_targets.push_back (pt);
+            if (const lib* l = pt->is_a<lib> ())
+              pt = &link_member (*l, act, lo);
           }
 
           continue;
         }
+        else
+        {
+          pt = &p.search ();
 
-        target& pt (p.search ());
+          if (act.operation () == clean_id && !pt->dir.sub (rs.out_path ()))
+            continue;
+        }
 
-        if (a.operation () == clean_id && !pt.dir.sub (rs.out_path ()))
-          continue;
+        match_async (act, *pt, target::count_busy (), t.task_count);
+        pts.push_back (pt);
+      }
+
+      wg.wait ();
+
+      // Finish matching all the targets that we have started.
+      //
+      for (size_t i (start), n (pts.size ()); i != n; ++i)
+      {
+        const target*& pt (pts[i]);
+
+        // Making sure a library is updated before us will only restrict
+        // parallelism. But we do need to match it in order to get its imports
+        // resolved and prerequisite_targets populated. So we match it but
+        // then unmatch if it is safe. And thanks to the two-pass prerequisite
+        // match in link::apply() it will be safe unless someone is building
+        // an obj?{} target directory.
+        //
+        if (build2::match (act,
+                           *pt,
+                           pt->is_a<liba> () || pt->is_a<libs> ()
+                           ? unmatch::safe
+                           : unmatch::none))
+          pt = nullptr; // Ignore in execute.
 
-        build2::match (ml, a, pt);
-        t.prerequisite_targets.push_back (&pt);
       }
 
       // Inject additional prerequisites. We only do it when performing update
       // since chances are we will have to update some of our prerequisites in
       // the process (auto-generated source code).
       //
-      if (a == perform_update_id)
+      if (act == perform_update_id)
       {
         // The cached prerequisite target should be the same as what is in
         // t.prerequisite_targets since we used standard search() and match()
         // above.
         //
-        file& src (*md.src.search ().is_a<file> ());
+        const file& src (*md.src.search ().is_a<file> ());
 
         // Make sure the output directory exists.
         //
@@ -351,9 +389,21 @@ namespace build2
         // things.
         //
         if (dir != nullptr)
-          execute_direct (a, *dir);
+        {
+          // We can do it properly by using execute_direct(). But this means
+          // we will be switching to the execute phase with all the associated
+          // overheads. At the same time, in case of update, creation of a
+          // directory is not going to change the external state in any way
+          // that would affect any parallel efforts in building the internal
+          // state. So we are just going to create the directory directly.
+          // Note, however, that we cannot modify the fsdir{} target since
+          // this can very well be happening in parallel. But that's not a
+          // problem since fsdir{}'s update is idempotent.
+          //
+          fsdir_rule::perform_update_direct (act, t);
+        }
 
-        depdb dd (t.path () + ".d");
+        depdb dd (tp + ".d");
 
         // First should come the rule name/version.
         //
@@ -379,7 +429,7 @@ namespace build2
 
         // Hash *.export.poptions from prerequisite libraries.
         //
-        hash_lib_options (bs, cs, t, lo);
+        hash_lib_options (bs, cs, t, act, lo);
 
         // Extra system header dirs (last).
         //
@@ -410,15 +460,13 @@ namespace build2
         // compiler, options, or source file), or if the database is newer
         // than the target (interrupted update) then force the target update.
         //
-        if (dd.writing () || dd.mtime () > t.mtime ())
-          t.mtime (timestamp_nonexistent);
-
-        inject (ml, a, t, lo, src, dd);
+        md.dd_mtime = dd.writing () ? timestamp_nonexistent : dd.mtime ();
 
+        inject (act, t, lo, src, dd);
         dd.close ();
       }
 
-      switch (a)
+      switch (act)
       {
       case perform_update_id: return [this] (action a, const target& t)
         {
@@ -466,7 +514,7 @@ namespace build2
     void compile::
     append_prefixes (prefix_map& m, const target& t, const variable& var) const
     {
-      tracer trace (x, "append_prefixes");
+      tracer trace (x, "compile::append_prefixes");
 
       // If this target does not belong to any project (e.g, an
       // "imported as installed" library), then it can't possibly
@@ -558,7 +606,7 @@ namespace build2
     auto compile::
     build_prefix_map (const scope& bs,
                       target& t,
-                      lorder lo) const -> prefix_map
+                      action act, lorder lo) const -> prefix_map
     {
       prefix_map m;
 
@@ -569,7 +617,7 @@ namespace build2
 
       // Then process the include directories from prerequisite libraries.
       //
-      append_lib_prefixes (bs, m, t, lo);
+      append_lib_prefixes (bs, m, t, act, lo);
 
       return m;
     }
@@ -748,12 +796,7 @@ namespace build2
     }
 
     void compile::
-    inject (slock& ml,
-            action a,
-            target& t,
-            lorder lo,
-            file& src,
-            depdb& dd) const
+    inject (action act, target& t, lorder lo, const file& src, depdb& dd) const
     {
       tracer trace (x, "compile::inject");
 
@@ -762,12 +805,12 @@ namespace build2
       // If things go wrong (and they often do in this area), give the user a
       // bit extra context.
       //
-      auto g (
-        make_exception_guard (
-          [&src]()
-          {
-            info << "while extracting header dependencies from " << src;
-          }));
+      auto df = make_diag_frame (
+        [&src](const diag_record& dr)
+        {
+          if (verb != 0)
+            dr << info << "while extracting header dependencies from " << src;
+        });
 
       const scope& bs (t.base_scope ());
       const scope& rs (*bs.root_scope ());
@@ -777,14 +820,14 @@ namespace build2
       const process_path* xc (nullptr);
       cstrings args;
 
-      auto init_args = [&ml, &t, lo, &src, &rs, &bs, &xc, &args, this] ()
+      auto init_args = [&t, act, lo, &src, &rs, &bs, &xc, &args, this] ()
       {
         xc = &cast<process_path> (rs[x_path]);
         args.push_back (xc->recall_string ());
 
         // Add *.export.poptions from prerequisite libraries.
         //
-        append_lib_options (bs, args, t, lo);
+        append_lib_options (bs, args, t, act, lo);
 
         append_options (args, t, c_poptions);
         append_options (args, t, x_poptions);
@@ -904,18 +947,35 @@ namespace build2
       // (which causes the target state to be automatically set to unchanged)
       // if the file is known to be up to date.
       //
-      auto update = [&trace, a] (path_target& pt, timestamp ts) -> bool
+      auto update = [&trace, act] (const path_target& pt, timestamp ts) -> bool
       {
-        target_state os (pt.synchronized_state ()); //@@ MT? matched?
+        target_state os (pt.matched_state (act));
 
-        if (os != target_state::unchanged)
+        if (os == target_state::unchanged)
+        {
+          if (ts == timestamp_unknown)
+            return false;
+          else
+          {
+            // We expect the timestamp to be known (i.e., existing file).
+            //
+            timestamp mt (pt.mtime ()); // @@ MT perf: know target state.
+            assert (mt != timestamp_unknown);
+            return mt > ts;
+          }
+        }
+        else
         {
-          // We only want to restart if our call to execute() actually
-          // caused an update. In particular, the target could already
-          // have been in target_state::changed because of a dependency
-          // extraction run for some other source file.
+          // We only want to restart if our call to execute() actually caused
+          // an update. In particular, the target could already have been in
+          // target_state::changed because of a dependency extraction run for
+          // some other source file.
           //
-          target_state ns (execute_direct (a, pt)); //@@ MT extenal modification sync.
+          // @@ MT perf: so we are going to switch the phase and execute for
+          //    any generated header.
+          //
+          phase_switch ps (run_phase::execute);
+          target_state ns (execute_direct (act, pt));
 
           if (ns != os && ns != target_state::unchanged)
           {
@@ -924,9 +984,9 @@ namespace build2
                           << "; new state " << ns;});
             return true;
           }
+          else
+            return ts != timestamp_unknown ? pt.newer (ts) : false;
         }
-
-        return ts != timestamp_unknown ? pt.newer (ts) : false;
       };
 
       // Update and add a header file to the list of prerequisite targets.
@@ -934,12 +994,13 @@ namespace build2
       // from the depdb cache or from the compiler run. Return whether the
       // extraction process should be restarted.
       //
-      auto add = [&trace, &ml, &update, &pm, a, &t, lo, &dd, &bs, this]
+      auto add = [&trace, &update, &pm, act, &t, lo, &dd, &bs, this]
         (path f, bool cache) -> bool
       {
         // Find or maybe insert the target.
         //
-        auto find = [&trace, this] (const path& f, bool insert) -> path_target*
+        auto find = [&trace, this] (
+          const path& f, bool insert) -> const path_target*
         {
           // Split the name into its directory part, the name part, and
           // extension. Here we can assume the name part is a valid filesystem
@@ -997,7 +1058,7 @@ namespace build2
           //
           // @@ OPT: move d, out, n
           //
-          target* r;
+          const target* r;
           if (insert)
             r = &search (*tt, d, out, n, &e, nullptr);
           else
@@ -1009,10 +1070,10 @@ namespace build2
             r = targets.find (*tt, d, out, n, e, trace);
           }
 
-          return static_cast<path_target*> (r);
+          return static_cast<const path_target*> (r);
         };
 
-        path_target* pt (nullptr);
+        const path_target* pt (nullptr);
 
         // If it's not absolute then it does not exist.
         //
@@ -1029,7 +1090,7 @@ namespace build2
           // then we would have failed below.
           //
           if (pm.empty ())
-            pm = build_prefix_map (bs, t, lo);
+            pm = build_prefix_map (bs, t, act, lo);
 
           // First try the whole file. Then just the directory.
           //
@@ -1097,16 +1158,13 @@ namespace build2
           pt = find (f, true);
         }
 
-        // Assign path.
+        // Cache the path.
         //
-        if (pt->path ().empty ())
-          pt->path (move (f));
-        else
-          assert (pt->path () == f);
+        const path& pp (pt->path (move (f)));
 
         // Match to a rule.
         //
-        build2::match (ml, a, *pt);
+        build2::match (act, *pt);
 
         // Update.
         //
@@ -1121,7 +1179,7 @@ namespace build2
         // update).
         //
         if (!cache)
-          dd.expect (pt->path ());
+          dd.expect (pp);
 
         // Add to our prerequisite target list.
         //
@@ -1419,9 +1477,11 @@ namespace build2
     msvc_filter_cl (ifdstream&, const path& src);
 
     target_state compile::
-    perform_update (action a, const target& xt) const
+    perform_update (action act, const target& xt) const
     {
       const file& t (xt.as<file> ());
+      const path& tp (t.path ());
+      const match_data& md (t.data<match_data> ());
 
       // Update prerequisites and determine if any relevant ones render us
       // out-of-date. Note that currently we treat all the prerequisites
@@ -1429,7 +1489,16 @@ namespace build2
       //
       const file* s;
       {
-        auto p (execute_prerequisites<file> (x_src, a, t, t.mtime ()));
+        timestamp mt;
+
+        // If the depdb was overwritten or it's newer than the target, then
+        // do unconditional update.
+        //
+        if (md.dd_mtime == timestamp_nonexistent ||
+            md.dd_mtime > (mt = t.load_mtime ()))
+          mt = timestamp_nonexistent;
+
+        auto p (execute_prerequisites<file> (x_src, act, t, mt));
 
         if ((s = p.first) == nullptr)
           return p.second;
@@ -1447,7 +1516,7 @@ namespace build2
       // Translate paths to relative (to working directory) ones. This
       // results in easier to read diagnostics.
       //
-      path relo (relative (t.path ()));
+      path relo (relative (tp));
       path rels (relative (s->path ()));
 
       append_options (args, t, c_poptions);
@@ -1455,7 +1524,7 @@ namespace build2
 
       // Add *.export.poptions from prerequisite libraries.
       //
-      append_lib_options (bs, args, t, lo);
+      append_lib_options (bs, args, t, act, lo);
 
       // Extra system header dirs (last).
       //
@@ -1646,14 +1715,14 @@ namespace build2
     }
 
     target_state compile::
-    perform_clean (action a, const target& xt) const
+    perform_clean (action act, const target& xt) const
     {
       const file& t (xt.as<file> ());
 
       if (cid == "msvc")
-        return clean_extra (a, t, {".d", ".idb", ".pdb"});
+        return clean_extra (act, t, {".d", ".idb", ".pdb"});
       else
-        return clean_extra (a, t, {".d"});
+        return clean_extra (act, t, {".d"});
     }
   }
 }
diff --git a/build2/cc/install b/build2/cc/install
index f676d72..e229e94 100644
--- a/build2/cc/install
+++ b/build2/cc/install
@@ -24,14 +24,14 @@ namespace build2
     public:
       install (data&&, const link&);
 
-      virtual target*
-      filter (slock&, action, target&, prerequisite_member) const override;
+      virtual const target*
+      filter (action, const target&, prerequisite_member) const override;
 
       virtual match_result
-      match (slock&, action, target&, const string&) const override;
+      match (action, target&, const string&) const override;
 
       virtual recipe
-      apply (slock&, action, target&) const override;
+      apply (action, target&) const override;
 
       virtual void
       install_extra (const file&, const install_dir&) const override;
diff --git a/build2/cc/install.cxx b/build2/cc/install.cxx
index f022a92..6195ab7 100644
--- a/build2/cc/install.cxx
+++ b/build2/cc/install.cxx
@@ -22,8 +22,8 @@ namespace build2
     install::
     install (data&& d, const link& l): common (move (d)), link_ (l) {}
 
-    target* install::
-    filter (slock& ml, action a, target& t, prerequisite_member p) const
+    const target* install::
+    filter (action a, const target& t, prerequisite_member p) const
     {
       if (t.is_a<exe> ())
       {
@@ -42,37 +42,38 @@ namespace build2
       if ((t.is_a<exe> () || t.is_a<libs> ()) &&
           (p.is_a<lib> () || p.is_a<libs> ()))
       {
-        target* pt (&p.search ());
+        const target* pt (&p.search ());
 
         // If this is the lib{} group, pick a member which we would link.
         //
-        if (lib* l = pt->is_a<lib> ())
-          pt = &link_member (*l, link_order (t.base_scope (), link_type (t)));
+        if (const lib* l = pt->is_a<lib> ())
+          pt = &link_member (
+            *l, a, link_order (t.base_scope (), link_type (t)));
 
         if (pt->is_a<libs> ()) // Can be liba{}.
           return pt->in (t.weak_scope ()) ? pt : nullptr;
       }
 
-      return file_rule::filter (ml, a, t, p);
+      return file_rule::filter (a, t, p);
     }
 
     match_result install::
-    match (slock& ml, action a, target& t, const string& hint) const
+    match (action a, target& t, const string& hint) const
     {
       // @@ How do we split the hint between the two?
       //
 
-      // We only want to handle installation if we are also the
-      // ones building this target. So first run link's match().
+      // We only want to handle installation if we are also the ones building
+      // this target. So first run link's match().
       //
-      match_result r (link_.match (ml, a, t, hint));
-      return r ? file_rule::match (ml, a, t, "") : r;
+      match_result r (link_.match (a, t, hint));
+      return r ? file_rule::match (a, t, "") : r;
     }
 
     recipe install::
-    apply (slock& s, action a, target& t) const
+    apply (action a, target& t) const
     {
-      recipe r (file_rule::apply (s, a, t));
+      recipe r (file_rule::apply (a, t));
 
       // Derive shared library paths and cache them in the target's aux
       // storage if we are (un)installing (used in *_extra() functions below).
diff --git a/build2/cc/link b/build2/cc/link
index c787015..cd83516 100644
--- a/build2/cc/link
+++ b/build2/cc/link
@@ -25,10 +25,10 @@ namespace build2
       link (data&&);
 
       virtual match_result
-      match (slock&, action, target&, const string& hint) const override;
+      match (action, target&, const string&) const override;
 
       virtual recipe
-      apply (slock&, action, target&) const override;
+      apply (action, target&) const override;
 
       target_state
       perform_update (action, const target&) const;
@@ -70,18 +70,17 @@ namespace build2
       void
       append_libraries (strings&,
                         const file&, bool,
-                        const scope&, lorder) const;
+                        const scope&, action, lorder) const;
 
       void
       hash_libraries (sha256&,
                       const file&, bool,
-                      const scope&, lorder) const;
+                      const scope&, action, lorder) const;
 
       void
       rpath_libraries (strings&,
                        const target&,
-                       const scope&,
-                       lorder,
+                       const scope&, action, lorder,
                        bool) const;
 
       // Windows rpath emulation (windows-rpath.cxx).
@@ -98,13 +97,15 @@ namespace build2
       using windows_dlls = std::set<windows_dll>;
 
       timestamp
-      windows_rpath_timestamp (const file&, const scope&, lorder) const;
+      windows_rpath_timestamp (const file&,
+                               const scope&,
+                               action, lorder) const;
 
       windows_dlls
-      windows_rpath_dlls (const file&, const scope&, lorder) const;
+      windows_rpath_dlls (const file&, const scope&, action, lorder) const;
 
       void
-      windows_rpath_assembly (const file&, const scope&, lorder,
+      windows_rpath_assembly (const file&, const scope&, action, lorder,
                               const string&,
                               timestamp,
                               bool) const;
diff --git a/build2/cc/link.cxx b/build2/cc/link.cxx
index 682b736..68f3d64 100644
--- a/build2/cc/link.cxx
+++ b/build2/cc/link.cxx
@@ -41,7 +41,7 @@ namespace build2
     }
 
     match_result link::
-    match (slock& ml, action a, target& t, const string& hint) const
+    match (action act, target& t, const string& hint) const
     {
       tracer trace (x, "link::match");
 
@@ -59,12 +59,21 @@ namespace build2
 
       otype lt (link_type (t));
 
+      // If this is a library, link-up to our group (this is the lib{} target
+      // group protocol which means this can be done whether we match or not).
+      //
+      if (lt == otype::a || lt == otype::s)
+      {
+        if (t.group == nullptr)
+          t.group = targets.find<lib> (t.dir, t.out, t.name);
+      }
+
       // Scan prerequisites and see if we can work with what we've got. Note
       // that X could be C. We handle this by always checking for X first.
       //
       bool seen_x (false), seen_c (false), seen_obj (false), seen_lib (false);
 
-      for (prerequisite_member p: group_prerequisite_members (ml, a, t))
+      for (prerequisite_member p: group_prerequisite_members (act, t))
       {
         if (p.is_a (x_src))
         {
@@ -248,7 +257,7 @@ namespace build2
         lk = b;
         append_ext (lk);
 
-        libi& li (ls.member->as<libi> ());
+        libi& li (ls.member->as<libi> ()); // Note: libi is locked.
         lk = li.derive_path (move (lk), tsys == "mingw32" ? "a" : "lib");
       }
       else if (!v.empty ())
@@ -266,7 +275,7 @@ namespace build2
     }
 
     recipe link::
-    apply (slock& ml, action a, target& xt) const
+    apply (action act, target& xt) const
     {
       static_assert (sizeof (link::libs_paths) <= target::data_size,
                      "insufficient space");
@@ -288,21 +297,31 @@ namespace build2
 
       // Derive file name(s) and add ad hoc group members.
       //
-      auto add_adhoc = [a, &bs] (target& t, const char* type) -> file&
+
+      // Add if necessary and lock an ad hoc group member.
+      //
+      auto add_adhoc = [act, &bs] (target& t, const char* type) -> target_lock
       {
         const target_type& tt (*bs.find_target_type (type));
 
-        if (t.member != nullptr) // Might already be there.
-          assert (t.member->type () == tt);
+        const target& m (t.member != nullptr // Might already be there.
+                         ? *t.member
+                         : search (tt, t.dir, t.out, t.name));
+
+        target_lock l (lock (act, m));
+        assert (l.target != nullptr); // Someone messing with adhoc members?
+
+        if (t.member == nullptr)
+          t.member = l.target;
         else
-          t.member = &search (tt, t.dir, t.out, t.name, nullptr, nullptr);
+          assert (t.member->type () == tt);
 
-        file& r (t.member->as<file> ());
-        r.recipe (a, group_recipe);
-        return r;
+        return l;
       };
 
       {
+        target_lock libi; // Have to hold until after PDB member addition.
+
         const char* e (nullptr); // Extension.
         const char* p (nullptr); // Prefix.
         const char* s (nullptr); // Suffix.
@@ -344,34 +363,41 @@ namespace build2
             // DLL and we add libi{} import library as its member.
             //
             if (tclass == "windows")
-              add_adhoc (t, "libi");
+              libi = add_adhoc (t, "libi");
 
             t.data (derive_libs_paths (t)); // Cache in target.
+
+            if (libi)
+              match_recipe (libi, group_recipe); // Set recipe and unlock.
+
             break;
           }
         }
-      }
 
-      // PDB
-      //
-      if (lt != otype::a &&
-          cid == "msvc" &&
-          (find_option ("/DEBUG", t, c_loptions, true) ||
-           find_option ("/DEBUG", t, x_loptions, true)))
-      {
-        // Add after the import library if any.
+        // PDB
         //
-        file& pdb (add_adhoc (t.member == nullptr ? t : *t.member, "pdb"));
+        if (lt != otype::a &&
+            cid == "msvc" &&
+            (find_option ("/DEBUG", t, c_loptions, true) ||
+             find_option ("/DEBUG", t, x_loptions, true)))
+        {
+          // Add after the import library if any.
+          //
+          target_lock pdb (
+            add_adhoc (t.member == nullptr ? t : *t.member, "pdb"));
 
-        // We call it foo.{exe,dll}.pdb rather than just foo.pdb because we
-        // can have both foo.exe and foo.dll in the same directory.
-        //
-        pdb.derive_path (t.path (), "pdb");
+          // We call it foo.{exe,dll}.pdb rather than just foo.pdb because we
+          // can have both foo.exe and foo.dll in the same directory.
+          //
+          pdb.target->as<file> ().derive_path (t.path (), "pdb");
+
+          match_recipe (pdb, group_recipe); // Set recipe and unlock.
+        }
       }
 
       // Inject dependency on the output directory.
       //
-      inject_fsdir (ml, a, t);
+      inject_fsdir (act, t);
 
       optional<dir_paths> usr_lib_dirs; // Extract lazily.
 
@@ -380,22 +406,23 @@ namespace build2
       //
       // We do it first in order to indicate that we will execute these
       // targets before matching any of the obj?{}. This makes it safe for
-      // compiler::apply() to unmatch them and therefore not to hinder
+      // compile::apply() to unmatch them and therefore not to hinder
       // parallelism.
       //
       // When cleaning, we ignore prerequisites that are not in the same or a
       // subdirectory of our project root.
       //
-      size_t slot (t.prerequisite_targets.size ()); // Start.
-      for (prerequisite_member p: group_prerequisite_members (ml, a, t))
+      size_t start (t.prerequisite_targets.size ());
+
+      for (prerequisite_member p: group_prerequisite_members (act, t))
       {
         // We pre-allocate a NULL slot for each (potential; see clean)
         // prerequisite target.
         //
         t.prerequisite_targets.push_back (nullptr);
-        const target*& cpt (t.prerequisite_targets.back ());
+        const target*& rpt (t.prerequisite_targets.back ());
 
-        target* pt (nullptr);
+        const target* pt (nullptr);
 
         if (p.is_a<lib> () || p.is_a<liba> () || p.is_a<libs> ())
         {
@@ -405,27 +432,31 @@ namespace build2
           // target in the prerequisite.
           //
           if (p.proj ())
-            pt = search_library (sys_lib_dirs, usr_lib_dirs, p.prerequisite);
+            pt = search_library (
+              act, sys_lib_dirs, usr_lib_dirs, p.prerequisite);
 
           // The rest is the same basic logic as in search_and_match().
           //
           if (pt == nullptr)
             pt = &p.search ();
 
-          if (a.operation () == clean_id && !pt->dir.sub (rs.out_path ()))
+          if (act.operation () == clean_id && !pt->dir.sub (rs.out_path ()))
             continue; // Skip.
 
           // If this is the lib{} target group, then pick the appropriate
           // member.
           //
-          if (lib* l = pt->is_a<lib> ())
-            pt = &link_member (*l, lo);
+          if (const lib* l = pt->is_a<lib> ())
+            pt = &link_member (*l, act, lo);
 
-          build2::match (ml, a, *pt);
-          cpt = pt;
+          rpt = pt;
         }
       }
 
+      // Match in parallel and wait for completion.
+      //
+      match_members (act, t, t.prerequisite_targets, start);
+
       // Process prerequisites, pass 2: search and match obj{} amd do rule
       // chaining for C and X source files.
       //
@@ -433,212 +464,256 @@ namespace build2
                               lt == otype::a ? obja::static_type :
                               objs::static_type);
 
-      for (prerequisite_member p: group_prerequisite_members (ml, a, t))
       {
-        const target*& cpt (t.prerequisite_targets[slot++]);
-        target* pt (nullptr);
+        // Wait with unlocked phase to allow phase switching.
+        //
+        wait_guard wg (target::count_busy (), t.task_count, true);
 
-        if (p.is_a<lib> () || p.is_a<liba> () || p.is_a<libs> ())
-          continue; // Handled on pass 1.
+        size_t i (start); // Parallel prerequisite_targets loop.
 
-        if (!p.is_a (x_src) && !p.is_a<c> ())
+        for (prerequisite_member p: group_prerequisite_members (act, t))
         {
-          pt = &p.search ();
+          const target*& rpt (t.prerequisite_targets[i++]);
+          const target* pt (nullptr);
 
-          if (a.operation () == clean_id && !pt->dir.sub (rs.out_path ()))
-            continue; // Skip.
+          if (p.is_a<lib> () || p.is_a<liba> () || p.is_a<libs> ())
+            continue; // Taken care of on pass 1.
 
-          // If this is the obj{} target group, then pick the appropriate
-          // member.
-          //
-          if (obj* o = pt->is_a<obj> ())
+          uint8_t pm (1); // Completion (1) and verfication (2) mark.
+
+          if (!p.is_a (x_src) && !p.is_a<c> ())
           {
-            switch (lt)
-            {
-            case otype::e: pt = o->e; break;
-            case otype::a: pt = o->a; break;
-            case otype::s: pt = o->s; break;
-            }
+            // If this is the obj{} target group, then pick the appropriate
+            // member.
+            //
+            pt = p.is_a<obj> () ? &search (ott, p.key ()) : &p.search ();
 
-            if (pt == nullptr)
-              pt = &search (ott, p.key ());
+            if (act.operation () == clean_id && !pt->dir.sub (rs.out_path ()))
+              continue; // Skip.
+
+            // Fall through.
           }
+          else
+          {
+            // The rest is rule chaining.
+            //
+            // Which scope shall we use to resolve the root? Unlikely, but
+            // possible, the prerequisite is from a different project
+            // altogether. So we are going to use the target's project.
+            //
 
-          build2::match (ml, a, *pt);
-          cpt = pt;
-          continue;
-        }
+            // If the source came from the lib{} group, then create the obj{}
+            // group and add the source as a prerequisite of the obj{} group,
+            // not the obj?{} member. This way we only need one prerequisite
+            // for, say, both liba{} and libs{}.
+            //
+            bool group (!p.prerequisite.belongs (t)); // Group's prerequisite.
+            const target_type& tt (group ? obj::static_type : ott);
 
-        // The rest is rule chaining.
-        //
+            const prerequisite_key& cp (p.key ()); // C-source (X or C) key.
 
-        // Which scope shall we use to resolve the root? Unlikely, but
-        // possible, the prerequisite is from a different project
-        // altogether. So we are going to use the target's project.
-        //
+            // Come up with the obj*{} target. The source prerequisite
+            // directory can be relative (to the scope) or absolute. If it is
+            // relative, then use it as is. If absolute, then translate it to
+            // the corresponding directory under out_root. While the source
+            // directory is most likely under src_root, it is also possible it
+            // is under out_root (e.g., generated source).
+            //
+            dir_path d;
+            {
+              const dir_path& cpd (*cp.tk.dir);
 
-        // If the source came from the lib{} group, then create the obj{}
-        // group and add the source as a prerequisite of the obj{} group,
-        // not the obj?{} member. This way we only need one prerequisite
-        // for, say, both liba{} and libs{}.
-        //
-        bool group (!p.prerequisite.belongs (t)); // Group's prerequisite.
+              if (cpd.relative () || cpd.sub (rs.out_path ()))
+                d = cpd;
+              else
+              {
+                if (!cpd.sub (rs.src_path ()))
+                  fail << "out of project prerequisite " << cp <<
+                    info << "specify corresponding " << tt.name << "{} "
+                       << "target explicitly";
 
-        const prerequisite_key& cp (p.key ()); // C-source (X or C) key.
-        const target_type& tt (group ? obj::static_type : ott);
+                d = rs.out_path () / cpd.leaf (rs.src_path ());
+              }
+            }
 
-        // Come up with the obj*{} target. The source prerequisite directory
-        // can be relative (to the scope) or absolute. If it is relative, then
-        // use it as is. If absolute, then translate it to the corresponding
-        // directory under out_root. While the source directory is most likely
-        // under src_root, it is also possible it is under out_root (e.g.,
-        // generated source).
-        //
-        dir_path d;
-        {
-          const dir_path& cpd (*cp.tk.dir);
+            // obj*{} is always in the out tree.
+            //
+            const target& ot (
+              search (tt, d, dir_path (), *cp.tk.name, nullptr, cp.scope));
 
-          if (cpd.relative () || cpd.sub (rs.out_path ()))
-            d = cpd;
-          else
-          {
-            if (!cpd.sub (rs.src_path ()))
-              fail << "out of project prerequisite " << cp <<
-                info << "specify corresponding " << tt.name << "{} "
-                   << "target explicitly";
+            // If we are cleaning, check that this target is in the same or a
+            // subdirectory of our project root.
+            //
+            if (act.operation () == clean_id && !ot.dir.sub (rs.out_path ()))
+            {
+              // If we shouldn't clean obj{}, then it is fair to assume we
+              // shouldn't clean the source either (generated source will be
+              // in the same directory as obj{} and if not, well, go find
+              // yourself another build system ;-)).
+              //
+              continue; // Skip.
+            }
 
-            d = rs.out_path () / cpd.leaf (rs.src_path ());
-          }
-        }
+            // If we have created the obj{} target group, pick one of its
+            // members; the rest would be primarily concerned with it.
+            //
+            pt = group ? &search (ott, ot.dir, ot.out, ot.name) : &ot;
 
-        // obj*{} is always in the out tree.
-        //
-        target& ot (
-          search (tt, d, dir_path (), *cp.tk.name, nullptr, cp.scope));
+            // If this obj*{} already has prerequisites, then verify they are
+            // "compatible" with what we are doing here. Otherwise, synthesize
+            // the dependency. Note that we may also end up synthesizing with
+            // someone beating up to it. In this case also verify.
+            //
+            bool verify (true);
 
-        // If we are cleaning, check that this target is in the same or
-        // a subdirectory of our project root.
-        //
-        if (a.operation () == clean_id && !ot.dir.sub (rs.out_path ()))
-        {
-          // If we shouldn't clean obj{}, then it is fair to assume we
-          // shouldn't clean the source either (generated source will be in
-          // the same directory as obj{} and if not, well, go find yourself
-          // another build system ;-)).
-          //
-          continue; // Skip.
-        }
+            if (!pt->has_prerequisites ())
+            {
+              prerequisites ps;
+              ps.push_back (p.as_prerequisite ()); // Source.
 
-        // If we have created the obj{} target group, pick one of its members;
-        // the rest would be primarily concerned with it.
-        //
-        if (group)
-        {
-          obj& o (ot.as<obj> ());
+              // Add our lib*{} prerequisites (see the export.* machinery for
+              // details).
+              //
+              // Note that we don't resolve lib{} to liba{}/libs{} here
+              // instead leaving it to whoever (e.g., the compile rule) will
+              // be needing *.export.*. One reason for doing it there is that
+              // the object target might be specified explicitly by the user
+              // in which case they will have to specify the set of lib{}
+              // prerequisites and it's much cleaner to do as lib{} rather
+              // than liba{}/libs{}.
+              //
+              // Initially, we were only adding imported libraries, but there
+              // is a problem with this approach: the non-imported library
+              // might depend on the imported one(s) which we will never "see"
+              // unless we start with this library.
+              //
+              for (const prerequisite& p: group_prerequisites (t))
+              {
+                if (p.is_a<lib> () || p.is_a<liba> () || p.is_a<libs> ())
+                  ps.emplace_back (p);
+              }
 
-          switch (lt)
-          {
-          case otype::e: pt = o.e; break;
-          case otype::a: pt = o.a; break;
-          case otype::s: pt = o.s; break;
+              // Note: add to the group, not the member.
+              //
+              verify = !ot.prerequisites (move (ps));
+            }
+
+            if (verify)
+            {
+              // This gets a bit tricky. We need to make sure the source files
+              // are the same which we can only do by comparing the targets to
+              // which they resolve. But we cannot search ot's prerequisites
+              // -- only the rule that matches can. Note, however, that if all
+              // this works out, then our next step is to match the obj*{}
+              // target. If things don't work out, then we fail, in which case
+              // searching and matching speculatively doesn't really hurt. So
+              // we start the async match here and finish this verification in
+              // the "harvest" loop below.
+              //
+              bool src (false);
+              for (prerequisite_member p1:
+                     group_prerequisite_members (act, *pt))
+              {
+                // Most of the time we will have just a single source so
+                // fast-path that case.
+                //
+                if (p1.is_a (x_src) || p1.is_a<c> ())
+                {
+                  src = true;
+                  continue; // Check the rest of the prerequisites.
+                }
+
+                // Ignore some known target types (fsdir, headers, libraries).
+                //
+                if (p1.is_a<fsdir> () ||
+                    p1.is_a<lib>  ()  ||
+                    p1.is_a<liba> ()  ||
+                    p1.is_a<libs> ()  ||
+                    (p.is_a (x_src) && x_header (p1)) ||
+                    (p.is_a<c> () && p1.is_a<h> ()))
+                  continue;
+
+                fail << "synthesized dependency for prerequisite " << p
+                     << " would be incompatible with existing target " << *pt <<
+                  info << "unexpected existing prerequisite type " << p1 <<
+                  info << "specify corresponding " << tt.name << "{} "
+                     << "dependency explicitly";
+              }
+
+              if (!src)
+                fail << "synthesized dependency for prerequisite " << p
+                     << " would be incompatible with existing target " << *pt <<
+                  info << "no existing c/" << x_name << " source prerequisite" <<
+                  info << "specify corresponding " << tt.name << "{} "
+                     << "dependency explicitly";
+
+              pm = 2; // Needs completion and verification.
+            }
           }
 
-          if (pt == nullptr)
-            pt = &search (ott, o.dir, o.out, o.name, o.ext (), nullptr);
+          match_async (act, *pt, target::count_busy (), t.task_count);
+          rpt = pt;
+          mark (rpt, pm); // Mark for completion/verification.
         }
-        else
-          pt = &ot;
 
-        // If this obj*{} target already exists, then it needs to be
-        // "compatible" with what we are doing here.
-        //
-        // This gets a bit tricky. We need to make sure the source files
-        // are the same which we can only do by comparing the targets to
-        // which they resolve. But we cannot search the ot's prerequisites
-        // -- only the rule that matches can. Note, however, that if all
-        // this works out, then our next step is to match the obj*{}
-        // target. If things don't work out, then we fail, in which case
-        // searching and matching speculatively doesn't really hurt.
-        //
-        bool found (false);
-        for (prerequisite_member p1:
-               reverse_group_prerequisite_members (ml, a, *pt))
-        {
-          // Most of the time we will have just a single source so fast-path
-          // that case.
-          //
-          if (p1.is_a (x_src) || p1.is_a<c> ())
-          {
-            if (!found)
-            {
-              build2::match (ml, a, *pt); // Now p1 should be resolved.
+        wg.wait ();
+      }
 
-              // Searching our own prerequisite is ok.
-              //
-              if (&p.search () != &p1.search ())
-                fail << "synthesized target for prerequisite " << cp << " "
-                     << "would be incompatible with existing target " << *pt <<
-                  info << "existing prerequisite " << p1 << " does not match "
-                     << cp <<
-                  info << "specify corresponding " << tt.name << "{} target "
-                     << "explicitly";
+      // The "harvest" loop: finish matching the targets we have started. Note
+      // that we may have bailed out early (thus the parallel i/n for-loop).
+      //
+      {
+        size_t i (start), n (t.prerequisite_targets.size ());
 
-              found = true;
-            }
+        for (prerequisite_member p: group_prerequisite_members (act, t))
+        {
+          if (i == n)
+            break;
 
-            continue; // Check the rest of the prerequisites.
-          }
+          const target*& pt (t.prerequisite_targets[i++]);
+
+          uint8_t m;
 
-          // Ignore some known target types (fsdir, headers, libraries).
+          // Skipped or not marked for completion (pass 1).
           //
-          if (p1.is_a<fsdir> () ||
-              p1.is_a<lib>  ()  ||
-              p1.is_a<liba> ()  ||
-              p1.is_a<libs> ()  ||
-              (p.is_a (x_src) && x_header (p1)) ||
-              (p.is_a<c> () && p1.is_a<h> ()))
+          if (pt == nullptr || (m = unmark (pt)) == 0)
             continue;
 
-          fail << "synthesized target for prerequisite " << cp
-               << " would be incompatible with existing target " << *pt <<
-            info << "unexpected existing prerequisite type " << p1 <<
-            info << "specify corresponding obj{} target explicitly";
-        }
+          build2::match (act, *pt);
 
-        if (!found)
-        {
-          // Note: add the source to the group, not the member.
+          // Nothing else to do if not marked for verification.
           //
-          ot.prerequisites.push_back (p.as_prerequisite ());
+          if (m == 1)
+            continue;
 
-          // Add our lib*{} prerequisites to the object file (see the export.*
-          // machinery for details).
-          //
-          // Note that we don't resolve lib{} to liba{}/libs{} here instead
-          // leaving it to whoever (e.g., the compile rule) will be needing
-          // *.export.*. One reason for doing it there is that the object
-          // target might be specified explicitly by the user in which case
-          // they will have to specify the set of lib{} prerequisites and it's
-          // much cleaner to do as lib{} rather than liba{}/libs{}.
+          // Finish verifying the existing dependency (which is now matched)
+          // compared to what we would have synthesized.
           //
-          // Initially, we were only adding imported libraries, but there is a
-          // problem with this approach: the non-imported library might depend
-          // on the imported one(s) which we will never "see" unless we start
-          // with this library.
-          //
-          for (prerequisite& p: group_prerequisites (t))
+          bool group (!p.prerequisite.belongs (t)); // Group's prerequisite.
+          const target_type& tt (group ? obj::static_type : ott);
+
+          for (prerequisite_member p1: group_prerequisite_members (act, *pt))
           {
-            if (p.is_a<lib> () || p.is_a<liba> () || p.is_a<libs> ())
-              ot.prerequisites.emplace_back (p);
-          }
+            if (p1.is_a (x_src) || p1.is_a<c> ())
+            {
+              // Searching our own prerequisite is ok, p1 must already be
+              // resolved.
+              //
+              if (&p.search () != &p1.search ())
+                fail << "synthesized dependency for prerequisite " << p << " "
+                     << "would be incompatible with existing target " << *pt <<
+                  info << "existing prerequisite " << p1 << " does not match "
+                     << p <<
+                  info << "specify corresponding " << tt.name << "{} "
+                     << "dependency explicitly";
 
-          build2::match (ml, a, *pt);
+              break;
+            }
+          }
         }
-
-        cpt = pt;
       }
 
-      switch (a)
+      switch (act)
       {
       case perform_update_id: return [this] (action a, const target& t)
         {
@@ -655,7 +730,7 @@ namespace build2
     void link::
     append_libraries (strings& args,
                       const file& l, bool la,
-                      const scope& bs, lorder lo) const
+                      const scope& bs, action act, lorder lo) const
     {
       // Note: lack of the "small function object" optimization will really
       // kill us here since we are called in a loop.
@@ -686,7 +761,7 @@ namespace build2
       {
         // If we need an interface value, then use the group (lib{}).
         //
-        if (const target* g = exp && l.is_a<libs> () ? l.group.get () : &l)
+        if (const target* g = exp && l.is_a<libs> () ? l.group : &l)
         {
           const variable& var (
             com
@@ -699,13 +774,14 @@ namespace build2
         }
       };
 
-      process_libraries (bs, lo, sys_lib_dirs, l, la, imp, lib, opt, true);
+      process_libraries (
+        act, bs, lo, sys_lib_dirs, l, la, imp, lib, opt, true);
     }
 
     void link::
     hash_libraries (sha256& cs,
                     const file& l, bool la,
-                    const scope& bs, lorder lo) const
+                    const scope& bs, action act, lorder lo) const
     {
       bool win (tclass == "windows");
 
@@ -731,7 +807,7 @@ namespace build2
       auto opt = [&cs, this] (
         const file& l, const string& t, bool com, bool exp)
       {
-        if (const target* g = exp && l.is_a<libs> () ? l.group.get () : &l)
+        if (const target* g = exp && l.is_a<libs> () ? l.group : &l)
         {
           const variable& var (
             com
@@ -744,13 +820,15 @@ namespace build2
         }
       };
 
-      process_libraries (bs, lo, sys_lib_dirs, l, la, imp, lib, opt, true);
+      process_libraries (
+        act, bs, lo, sys_lib_dirs, l, la, imp, lib, opt, true);
     }
 
     void link::
     rpath_libraries (strings& args,
                      const target& t,
                      const scope& bs,
+                     action act,
                      lorder lo,
                      bool for_install) const
     {
@@ -864,7 +942,7 @@ namespace build2
                 "-Wl,-rpath," + f->path ().directory ().string ());
           }
 
-          process_libraries (bs, lo, sys_lib_dirs,
+          process_libraries (act, bs, lo, sys_lib_dirs,
                              *f, a != nullptr,
                              impf, libf, nullptr);
         }
@@ -882,13 +960,14 @@ namespace build2
     msvc_machine (const string& cpu); // msvc.cxx
 
     target_state link::
-    perform_update (action a, const target& xt) const
+    perform_update (action act, const target& xt) const
     {
       tracer trace (x, "link::perform_update");
 
       const file& t (xt.as<file> ());
+      const path& tp (t.path ());
 
-      auto oop (a.outer_operation ());
+      auto oop (act.outer_operation ());
       bool for_install (oop == install_id || oop == uninstall_id);
 
       const scope& bs (t.base_scope ());
@@ -901,8 +980,8 @@ namespace build2
       // out-of-date manually below.
       //
       bool update (false);
-      timestamp mt (t.mtime ());
-      target_state ts (straight_execute_prerequisites (a, t));
+      timestamp mt (t.load_mtime ());
+      target_state ts (straight_execute_prerequisites (act, t));
 
       // If targeting Windows, take care of the manifest.
       //
@@ -916,7 +995,7 @@ namespace build2
         // it if we are updating for install.
         //
         if (!for_install)
-          rpath_timestamp = windows_rpath_timestamp (t, bs, lo);
+          rpath_timestamp = windows_rpath_timestamp (t, bs, act, lo);
 
         path mf (
           windows_manifest (
@@ -1015,7 +1094,7 @@ namespace build2
 
       // Check/update the dependency database.
       //
-      depdb dd (t.path () + ".d");
+      depdb dd (tp + ".d");
 
       // First should come the rule name/version.
       //
@@ -1157,7 +1236,7 @@ namespace build2
           // rpath of the imported libraries (i.e., we assume they are also
           // installed). But we add -rpath-link for some platforms.
           //
-          rpath_libraries (sargs, t, bs, lo, for_install);
+          rpath_libraries (sargs, t, bs, act, lo, for_install);
 
           if (auto l = t["bin.rpath"])
             for (const dir_path& p: cast<dir_paths> (l))
@@ -1207,7 +1286,7 @@ namespace build2
             // and implementation (static), recursively.
             //
             if (a != nullptr || s != nullptr)
-              hash_libraries (cs, *f, a != nullptr, bs, lo);
+              hash_libraries (cs, *f, a != nullptr, bs, act, lo);
             else
               cs.append (f->path ().string ());
           }
@@ -1260,7 +1339,7 @@ namespace build2
       // Translate paths to relative (to working directory) ones. This results
       // in easier to read diagnostics.
       //
-      path relt (relative (t.path ()));
+      path relt (relative (tp));
 
       const process_path* ld (nullptr);
       switch (lt)
@@ -1372,7 +1451,6 @@ namespace build2
             //
             if (find_option ("/DEBUG", args, true))
             {
-
               auto& pdb (
                 (lt == otype::e ? t.member : t.member->member)->as<file> ());
               out1 = "/PDB:" + relative (pdb.path ()).string ();
@@ -1441,7 +1519,7 @@ namespace build2
           // and implementation (static), recursively.
           //
           if (a != nullptr || s != nullptr)
-            append_libraries (sargs, *f, a != nullptr, bs, lo);
+            append_libraries (sargs, *f, a != nullptr, bs, act, lo);
           else
             sargs.push_back (relative (f->path ()).string ()); // string()&&
         }
@@ -1566,7 +1644,7 @@ namespace build2
         // install).
         //
         if (lt == otype::e && !for_install)
-          windows_rpath_assembly (t, bs, lo,
+          windows_rpath_assembly (t, bs, act, lo,
                                   cast<string> (rs[x_target_cpu]),
                                   rpath_timestamp,
                                   scratch);
@@ -1620,7 +1698,7 @@ namespace build2
     }
 
     target_state link::
-    perform_clean (action a, const target& xt) const
+    perform_clean (action act, const target& xt) const
     {
       const file& t (xt.as<file> ());
 
@@ -1634,13 +1712,13 @@ namespace build2
           {
             if (tsys == "mingw32")
               return clean_extra (
-                a, t, {".d", ".dlls/", ".manifest.o", ".manifest"});
+                act, t, {".d", ".dlls/", ".manifest.o", ".manifest"});
             else
               // Assuming it's VC or alike. Clean up .ilk in case the user
               // enabled incremental linking (note that .ilk replaces .exe).
               //
               return clean_extra (
-                a, t, {".d", ".dlls/", ".manifest", "-.ilk"});
+                act, t, {".d", ".dlls/", ".manifest", "-.ilk"});
           }
 
           break;
@@ -1655,7 +1733,7 @@ namespace build2
             // versioning their bases may not be the same.
             //
             if (tsys != "mingw32")
-              return clean_extra (a, t, {{".d", "-.ilk"}, {"-.exp"}});
+              return clean_extra (act, t, {{".d", "-.ilk"}, {"-.exp"}});
           }
           else
           {
@@ -1664,7 +1742,7 @@ namespace build2
             //
             const libs_paths& paths (t.data<libs_paths> ());
 
-            return clean_extra (a, t, {".d",
+            return clean_extra (act, t, {".d",
                                        paths.link.string ().c_str (),
                                        paths.soname.string ().c_str (),
                                        paths.interm.string ().c_str ()});
@@ -1674,7 +1752,7 @@ namespace build2
         }
       }
 
-      return clean_extra (a, t, {".d"});
+      return clean_extra (act, t, {".d"});
     }
   }
 }
diff --git a/build2/cc/msvc.cxx b/build2/cc/msvc.cxx
index 94064ca..aa9389f 100644
--- a/build2/cc/msvc.cxx
+++ b/build2/cc/msvc.cxx
@@ -219,18 +219,17 @@ namespace build2
 
     template <typename T>
     static T*
-    msvc_search_library (const char* mod,
-                         const process_path& ld,
+    msvc_search_library (const process_path& ld,
                          const dir_path& d,
                          const prerequisite_key& p,
                          otype lt,
                          const char* pfx,
                          const char* sfx,
-                         bool exist)
+                         bool exist,
+                         tracer& trace)
     {
       // Pretty similar logic to search_library().
       //
-      tracer trace (mod, "msvc_search_library");
 
       const optional<string>& ext (p.tk.ext);
       const string& name (*p.tk.name);
@@ -268,21 +267,13 @@ namespace build2
       {
         // Enter the target.
         //
-        auto p (targets.insert (T::static_type,
-                                d,
-                                dir_path (),
-                                name,
-                                e,
-                                true, // Implied.
-                                trace));
-        assert (!exist || !p.second);
-        T& t (p.first.template as<T> ());
-
-        if (t.path ().empty ())
-          t.path (move (f));
-
-        t.mtime (mt);
-        return &t;
+        T* t;
+        common::insert_library (t, name, d, e, exist, trace);
+
+        t->mtime (mt);
+        t->path (move (f));
+
+        return t;
       }
 
       return nullptr;
@@ -294,12 +285,15 @@ namespace build2
                         const prerequisite_key& p,
                         bool exist) const
     {
+      tracer trace (x, "msvc_search_static");
+
       liba* r (nullptr);
 
-      auto search = [&r, &ld, &d, &p, exist, this] (
+      auto search = [&r, &ld, &d, &p, exist, &trace, this] (
         const char* pf, const char* sf) -> bool
       {
-        r = msvc_search_library<liba> (x, ld, d, p, otype::a, pf, sf, exist);
+        r = msvc_search_library<liba> (
+          ld, d, p, otype::a, pf, sf, exist, trace);
         return r != nullptr;
       };
 
@@ -324,32 +318,33 @@ namespace build2
     {
       tracer trace (x, "msvc_search_shared");
 
-      libs* r (nullptr);
+      libs* s (nullptr);
 
-      auto search = [&r, &ld, &d, &pk, &trace, exist, this] (
+      auto search = [&s, &ld, &d, &pk, exist, &trace, this] (
         const char* pf, const char* sf) -> bool
       {
-        if (libi* i =
-            msvc_search_library<libi> (x, ld, d, pk, otype::s, pf, sf, exist))
+        if (libi* i = msvc_search_library<libi> (
+              ld, d, pk, otype::s, pf, sf, exist, trace))
         {
-          auto p (targets.insert (libs::static_type,
-                                  d,
-                                  dir_path (),
-                                  *pk.tk.name,
-                                  nullopt,
-                                  true, // Implied.
-                                  trace));
-          assert (!exist || !p.second);
-          r = &p.first.as<libs> ();
-
-          if (r->member == nullptr)
+          ulock l (insert_library (s, *pk.tk.name, d, nullopt, exist, trace));
+
+          if (!exist)
           {
-            r->mtime (i->mtime ());
-            r->member = i;
+            if (l.owns_lock ())
+              s->member = i;
+            else
+              assert (s->member == i);
+
+            l.unlock ();
+
+            // Presumably there is a DLL somewhere, we just don't know where.
+            //
+            s->mtime (i->mtime ());
+            s->path (path ());
           }
         }
 
-        return r != nullptr;
+        return s != nullptr;
       };
 
       // Try:
@@ -360,7 +355,7 @@ namespace build2
       return
         search ("",    "")    ||
         search ("lib", "")    ||
-        search ("",    "dll") ? r : nullptr;
+        search ("",    "dll") ? s : nullptr;
     }
   }
 }
diff --git a/build2/cc/pkgconfig.cxx b/build2/cc/pkgconfig.cxx
index da6614f..72ae31b 100644
--- a/build2/cc/pkgconfig.cxx
+++ b/build2/cc/pkgconfig.cxx
@@ -37,7 +37,8 @@ namespace build2
     // search_library() POV.
     //
     bool common::
-    pkgconfig_extract (const scope& s,
+    pkgconfig_extract (action act,
+                       const scope& s,
                        lib& lt,
                        liba* at,
                        libs* st,
@@ -256,7 +257,7 @@ namespace build2
 
       // Now parse --libs into loptions/libs (interface and implementation).
       //
-      auto parse_libs = [&s, &f, sysd, &next, this] (
+      auto parse_libs = [act, &s, &f, sysd, &next, this] (
         const string& lstr, target& t)
       {
         strings lops;
@@ -421,7 +422,8 @@ namespace build2
           prerequisite_key pk {
             nullopt, {&lib::static_type, &out, &out, &name, nullopt}, &s};
 
-          if (lib* lt = static_cast<lib*> (search_library (sysd, usrd, pk)))
+          if (lib* lt = static_cast<lib*> (
+                search_library (act, sysd, usrd, pk)))
           {
             // We used to pick a member but that doesn't seem right since the
             // same target could be used with different link orders.
diff --git a/build2/cc/utility b/build2/cc/utility
index b1d07b8..ee3cb81 100644
--- a/build2/cc/utility
+++ b/build2/cc/utility
@@ -41,14 +41,8 @@ namespace build2
 
     // Given the link order return the library member (liba or libs) to link.
     //
-    // Note that the const version assumes you have already called non-const
-    // (which does the search, if necessary).
-    //
-    target&
-    link_member (bin::lib&, lorder);
-
     const target&
-    link_member (const bin::lib&, lorder);
+    link_member (const bin::lib&, action, lorder);
   }
 }
 
diff --git a/build2/cc/utility.cxx b/build2/cc/utility.cxx
index 62febfa..4a931af 100644
--- a/build2/cc/utility.cxx
+++ b/build2/cc/utility.cxx
@@ -39,40 +39,14 @@ namespace build2
     }
 
     const target&
-    link_member (const bin::lib& l, lorder lo)
+    link_member (const bin::lib& l, action a, lorder lo)
     {
-      bool ls (true);
-      const string& at (cast<string> (l["bin.lib"])); // Available members.
-
-      switch (lo)
-      {
-      case lorder::a:
-      case lorder::a_s:
-        ls = false; // Fall through.
-      case lorder::s:
-      case lorder::s_a:
-        {
-          if (ls ? at == "static" : at == "shared")
-          {
-            if (lo == lorder::a_s || lo == lorder::s_a)
-              ls = !ls;
-            else
-              assert (false);
-          }
-        }
-      }
-
-      const target* r (ls ? static_cast<const target*> (l.s) : l.a);
-      assert (r != nullptr);
-      return *r;
-    }
+      // Make sure group members are resolved.
+      //
+      group_view gv (resolve_group_members (a, l));
+      assert (gv.members != nullptr);
 
-    target&
-    link_member (bin::lib& l, lorder lo)
-    {
       bool ls (true);
-      const string& at (cast<string> (l["bin.lib"])); // Available members.
-
       switch (lo)
       {
       case lorder::a:
@@ -81,7 +55,7 @@ namespace build2
       case lorder::s:
       case lorder::s_a:
         {
-          if (ls ? at == "static" : at == "shared")
+          if (ls ? l.s == nullptr : l.a == nullptr)
           {
             if (lo == lorder::a_s || lo == lorder::s_a)
               ls = !ls;
@@ -92,13 +66,7 @@ namespace build2
         }
       }
 
-      target* r (ls ? static_cast<target*> (l.s) : l.a);
-
-      if (r == nullptr)
-        r = &search (ls ? libs::static_type : liba::static_type,
-                     prerequisite_key {nullopt, l.key (), nullptr});
-
-      return *r;
+      return *(ls ? static_cast<const target*> (l.s) : l.a);
     }
   }
 }
diff --git a/build2/cc/windows-manifest.cxx b/build2/cc/windows-manifest.cxx
index 0e38e7d..3b9c649 100644
--- a/build2/cc/windows-manifest.cxx
+++ b/build2/cc/windows-manifest.cxx
@@ -41,7 +41,7 @@ namespace build2
     path link::
     windows_manifest (const file& t, bool rpath_assembly) const
     {
-      tracer trace (x, "windows_manifest");
+      tracer trace (x, "link::windows_manifest");
 
       const scope& rs (t.root_scope ());
 
diff --git a/build2/cc/windows-rpath.cxx b/build2/cc/windows-rpath.cxx
index 46a3d3a..383663f 100644
--- a/build2/cc/windows-rpath.cxx
+++ b/build2/cc/windows-rpath.cxx
@@ -46,7 +46,10 @@ namespace build2
     // adding to the assembly or timestamp_nonexistent if there aren't any.
     //
     timestamp link::
-    windows_rpath_timestamp (const file& t, const scope& bs, lorder lo) const
+    windows_rpath_timestamp (const file& t,
+                             const scope& bs,
+                             action act,
+                             lorder lo) const
     {
       timestamp r (timestamp_nonexistent);
 
@@ -91,7 +94,9 @@ namespace build2
 
         // Ok, this is a DLL.
         //
-        timestamp t (l != nullptr ? l->mtime () : file_mtime (f.c_str ()));
+        timestamp t (l != nullptr
+                     ? l->load_mtime ()
+                     : file_mtime (f.c_str ()));
 
         if (t > r)
           r = t;
@@ -104,7 +109,7 @@ namespace build2
 
         if ((f = a = pt->is_a<liba> ()) ||
             (f =     pt->is_a<libs> ()))
-          process_libraries (bs, lo, sys_lib_dirs,
+          process_libraries (act, bs, lo, sys_lib_dirs,
                              *f, a != nullptr,
                              imp, lib, nullptr, true);
       }
@@ -118,6 +123,7 @@ namespace build2
     auto link::
     windows_rpath_dlls (const file& t,
                         const scope& bs,
+                        action act,
                         lorder lo) const -> windows_dlls
     {
       windows_dlls r;
@@ -185,7 +191,7 @@ namespace build2
 
         if ((f = a = pt->is_a<liba> ()) ||
             (f =     pt->is_a<libs> ()))
-          process_libraries (bs, lo, sys_lib_dirs,
+          process_libraries (act, bs, lo, sys_lib_dirs,
                              *f, a != nullptr,
                              imp, lib, nullptr, true);
       }
@@ -207,6 +213,7 @@ namespace build2
     void link::
     windows_rpath_assembly (const file& t,
                             const scope& bs,
+                            action act,
                             lorder lo,
                             const string& tcpu,
                             timestamp ts,
@@ -244,7 +251,7 @@ namespace build2
 
       windows_dlls dlls;
       if (!empty)
-        dlls = windows_rpath_dlls (t, bs, lo);
+        dlls = windows_rpath_dlls (t, bs, act, lo);
 
       // Clean the assembly directory and make sure it exists. Maybe it would
       // have been faster to overwrite the existing manifest rather than
diff --git a/build2/cli/rule b/build2/cli/rule
index 03fd471..d783c20 100644
--- a/build2/cli/rule
+++ b/build2/cli/rule
@@ -22,10 +22,10 @@ namespace build2
       compile () {}
 
       virtual match_result
-      match (slock&, action, target&, const string& hint) const override;
+      match (action, target&, const string&) const override;
 
       virtual recipe
-      apply (slock&, action, target&) const override;
+      apply (action, target&) const override;
 
       static target_state
       perform_update (action, const target&);
diff --git a/build2/cli/rule.cxx b/build2/cli/rule.cxx
index ba4963e..96eafbc 100644
--- a/build2/cli/rule.cxx
+++ b/build2/cli/rule.cxx
@@ -44,7 +44,7 @@ namespace build2
     }
 
     match_result compile::
-    match (slock& ml, action a, target& xt, const string&) const
+    match (action a, target& xt, const string&) const
     {
       tracer trace ("cli::compile::match");
 
@@ -57,7 +57,7 @@ namespace build2
         // See if we have a .cli source file.
         //
         bool r (false);
-        for (prerequisite_member p: group_prerequisite_members (ml, a, t))
+        for (prerequisite_member p: group_prerequisite_members (a, t))
         {
           if (p.is_a<cli> ())
           {
@@ -81,25 +81,16 @@ namespace build2
           return r;
         }
 
-        // If we still haven't figured out the member list, we can do
-        // that now. Specifically, at this stage, no further changes to
-        // cli.options are possible and we can determine whether the
-        // --suppress-inline option is present.
+        // Figure out the member list.
         //
-        if (t.h == nullptr)
-        {
-          t.h = &search<cxx::hxx> (t.dir, t.out, t.name, nullptr, nullptr);
-          t.h->group = &t;
-
-          t.c = &search<cxx::cxx> (t.dir, t.out, t.name, nullptr, nullptr);
-          t.c->group = &t;
-
-          if (!find_option ("--suppress-inline", t, "cli.options"))
-          {
-            t.i = &search<cxx::ixx> (t.dir, t.out, t.name, nullptr, nullptr);
-            t.i->group = &t;
-          }
-        }
+        // At this stage, no further changes to cli.options are possible and
+        // we can determine whether the --suppress-inline option is present.
+        //
+        t.h = &search<cxx::hxx> (t.dir, t.out, t.name);
+        t.c = &search<cxx::cxx> (t.dir, t.out, t.name);
+        t.i = find_option ("--suppress-inline", t, "cli.options")
+          ? nullptr
+          : &search<cxx::ixx> (t.dir, t.out, t.name);
 
         return r;
       }
@@ -109,23 +100,17 @@ namespace build2
         //
         target& t (xt);
 
-        // First see if we are already linked-up to the cli.cxx{} group. If
-        // it is some other group, then we are definitely not a match.
-        //
-        if (t.group != nullptr)
-          return t.group->is_a<cli_cxx> () != nullptr;
-
         // Check if there is a corresponding cli.cxx{} group.
         //
-        cli_cxx* g (targets.find<cli_cxx> (t.dir, t.out, t.name));
+        const cli_cxx* g (targets.find<cli_cxx> (t.dir, t.out, t.name));
 
         // If not or if it has no prerequisites (happens when we use it to
         // set cli.options) and this target has a cli{} prerequisite, then
-        // synthesize the group.
+        // synthesize the dependency.
         //
         if (g == nullptr || !g->has_prerequisites ())
         {
-          for (prerequisite_member p: group_prerequisite_members (ml, a, t))
+          for (prerequisite_member p: prerequisite_members (a, t))
           {
             if (p.is_a<cli> ())
             {
@@ -136,7 +121,7 @@ namespace build2
                 if (g == nullptr)
                   g = &targets.insert<cli_cxx> (t.dir, t.out, t.name, trace);
 
-                g->prerequisites.push_back (p.as_prerequisite ());
+                g->prerequisites (prerequisites {p.as_prerequisite ()});
               }
               else
                 l4 ([&]{trace << ".cli file stem '" << p.name () << "' "
@@ -146,30 +131,23 @@ namespace build2
           }
         }
 
-        if (g != nullptr)
-        {
-          // Resolve the group's members. This should link us up to the
-          // group.
-          //
-          resolve_group_members (ml, a, *g);
-
-          // For ixx{}, verify it is part of the group.
-          //
-          if (t.is_a<cxx::ixx> () && g->i == nullptr)
-          {
-            l4 ([&]{trace << "generation of inline file " << t
-                          << " is disabled with --suppress-inline";});
-            g = nullptr;
-          }
-        }
+        if (g == nullptr)
+          return false;
 
-        assert (t.group == g);
-        return g != nullptr;
+        // For ixx{}, verify it is part of the group (i.e., not disabled
+        // via --suppress-inline).
+        //
+        if (t.is_a<cxx::ixx> () &&
+            find_option ("--suppress-inline", *g, "cli.options"))
+          return false;
+
+        t.group = g;
+        return true;
       }
     }
 
     recipe compile::
-    apply (slock& ml, action a, target& xt) const
+    apply (action a, target& xt) const
     {
       if (cli_cxx* pt = xt.is_a<cli_cxx> ())
       {
@@ -184,11 +162,11 @@ namespace build2
 
         // Inject dependency on the output directory.
         //
-        inject_fsdir (ml, a, t);
+        inject_fsdir (a, t);
 
-        // Search and match prerequisite members.
+        // Match prerequisite members.
         //
-        search_and_match_prerequisite_members (ml, a, t);
+        match_prerequisite_members (a, t);
 
         switch (a)
         {
@@ -199,8 +177,8 @@ namespace build2
       }
       else
       {
-        cli_cxx& g (xt.group->as<cli_cxx> ());
-        build2::match (ml, a, g);
+        const cli_cxx& g (xt.group->as<cli_cxx> ());
+        build2::match (a, g);
         return group_recipe; // Execute the group's recipe.
       }
     }
@@ -239,7 +217,12 @@ namespace build2
       //
       const cli* s;
       {
-        auto p (execute_prerequisites<cli> (a, t, t.mtime ()));
+        // The rule has been matched which means the members should be
+        // resolved and paths assigned.
+        //
+        auto p (
+          execute_prerequisites<cli> (
+            a, t, t.load_mtime (t.h->path ())));
 
         if ((s = p.first) == nullptr)
           return p.second;
diff --git a/build2/cli/target b/build2/cli/target
index c10e4b8..5e51bb4 100644
--- a/build2/cli/target
+++ b/build2/cli/target
@@ -26,13 +26,13 @@ namespace build2
       virtual const target_type& dynamic_type () const {return static_type;}
     };
 
-    // Standard layout type compatible with target*[3].
+    // Standard layout type compatible with group_view's const target*[3].
     //
     struct cli_cxx_members
     {
-      const_ptr<cxx::hxx> h = nullptr;
-      const_ptr<cxx::cxx> c = nullptr;
-      const_ptr<cxx::ixx> i = nullptr;
+      const cxx::hxx* h = nullptr;
+      const cxx::cxx* c = nullptr;
+      const cxx::ixx* i = nullptr;
     };
 
     class cli_cxx: public mtime_target, public cli_cxx_members
@@ -41,14 +41,13 @@ namespace build2
       using mtime_target::mtime_target;
 
       virtual group_view
-      group_members (action_type);
-
-      virtual timestamp
-      load_mtime () const;
+      group_members (action_type) const override;
 
     public:
       static const target_type static_type;
-      virtual const target_type& dynamic_type () const {return static_type;}
+
+      virtual const target_type&
+      dynamic_type () const override {return static_type;}
     };
   }
 }
diff --git a/build2/cli/target.cxx b/build2/cli/target.cxx
index 200f6f0..51f58aa 100644
--- a/build2/cli/target.cxx
+++ b/build2/cli/target.cxx
@@ -4,8 +4,6 @@
 
 #include <build2/cli/target>
 
-#include <build2/filesystem> // file_mtime()
-
 using namespace std;
 using namespace butl;
 
@@ -32,27 +30,17 @@ namespace build2
     // cli.cxx
     //
     group_view cli_cxx::
-    group_members (action_type)
+    group_members (action_type) const
     {
-      static_assert (offsetof (cli_cxx_members, i) -
-                     offsetof (cli_cxx_members, h) == sizeof (target*) * 2,
+      static_assert (sizeof (cli_cxx_members) == sizeof (const target*) * 3,
                      "member layout incompatible with array");
 
       return h != nullptr
-        ? group_view {reinterpret_cast<target* const*> (&h),
+        ? group_view {reinterpret_cast<const target* const*> (&h),
                       (i != nullptr ? 3U : 2U)}
         : group_view {nullptr, 0};
     }
 
-    timestamp cli_cxx::
-    load_mtime () const
-    {
-      // The rule has been matched which means the members should
-      // be resolved and paths assigned.
-      //
-      return file_mtime (h->path ());
-    }
-
     static pair<target*, optional<string>>
     cli_cxx_factory (const target_type&,
                      dir_path d,
diff --git a/build2/config/operation.cxx b/build2/config/operation.cxx
index ac18d82..951da30 100644
--- a/build2/config/operation.cxx
+++ b/build2/config/operation.cxx
@@ -368,11 +368,9 @@ namespace build2
       // target-specific. However, inside match(), things can proceed in
       // parallel.
       //
-      model_slock ml;
-
-      for (void* v: ts)
+      for (const void* v: ts)
       {
-        target& t (*static_cast<target*> (v));
+        const target& t (*static_cast<const target*> (v));
         const scope* rs (t.base_scope ().root_scope ());
 
         if (rs == nullptr)
@@ -387,10 +385,9 @@ namespace build2
             continue;
 
           set_current_oif (*oif);
-          dependency_count = 0;
 
-          phase_guard pg (run_phase::search_match);
-          match (ml, action (configure_id, id), t);
+          phase_lock pl (run_phase::match);
+          match (action (configure_id, id), t);
         }
 
         configure_project (a, *rs, projects);
@@ -488,7 +485,7 @@ namespace build2
     {
       tracer trace ("disfigure_search");
       l6 ([&]{trace << "collecting " << root.out_path ();});
-      ts.push_back (const_cast<scope*> (&root)); //@@ MT TMP action_targets
+      ts.push_back (&root);
     }
 
     static void
@@ -599,7 +596,7 @@ namespace build2
       // Note: doing everything in the load phase (disfigure_project () does
       // modify the model).
       //
-      for (void* v: ts)
+      for (const void* v: ts)
       {
         const scope& root (*static_cast<const scope*> (v));
 
@@ -617,7 +614,7 @@ namespace build2
                             true,       // Implied.
                             trace).first);
 
-          if (!quiet)
+          if (verb != 0 && !quiet)
             info << diag_done (a, t);
         }
       }
diff --git a/build2/context b/build2/context
index 79f04a2..bc73d5b 100644
--- a/build2/context
+++ b/build2/context
@@ -11,35 +11,33 @@
 #include <build2/scope>
 #include <build2/variable>
 #include <build2/operation>
+#include <build2/scheduler>
 
 namespace build2
 {
+  // Main (and only) scheduler. Started up and shut down in main().
+  //
+  extern scheduler sched;
+
   // In order to perform each operation the build system goes through the
   // following phases:
   //
-  // load           - load the buildfiles
-  // search & match - search prerequisites and match rules
-  // execute        - execute the matched rule
-  //
-  // The phase can only be changed during serial or exclusive execution
-  // (see below).
+  // load     - load the buildfiles
+  // match    - search prerequisites and match rules
+  // execute  - execute the matched rule
   //
-  extern run_phase phase;
-
-  // The build system model (internal state) is protected at the top level by
-  // the model mutex. During serial execution the model mutex is unlocked.
+  // The build system starts with a "serial load" phase and then continues
+  // with parallel search and execute. Match, however, can be interrupted
+  // both with load and execute.
   //
-  extern shared_mutex model_mutex;
-
-  // Parallel execution always starts with acquiring a shared model lock (by
-  // creating model_slock; see below). Pointers to these locks are cached in
-  // the model_lock TLS variable (which is NULL during serial execution).
+  // Match can be interrupted with "exclusive load" in order to load
+  // additional buildfiles. Similarly, it can be interrupted with (parallel)
+  // execute in order to build targetd required to complete the match (for
+  // example, generated source code or source code generators themselves.
   //
-  // The build system starts with a "serial load" phase and then continues
-  // with parallel search & match and execute. Search & match, however, can be
-  // interrupted with an "exclusive load" by re-locking the shared lock as
-  // exclusive (using model_rlock below), changing the phase, and loading
-  // additional buildfiles.
+  // Such interruptions are performed by phase change that is protected by
+  // phase_mutex (which is also used to synchronize the state changes between
+  // phases).
   //
   // Serial load can perform arbitrary changes to the model. Exclusive load,
   // however, can only perform "island appends". That is, it can create new
@@ -47,150 +45,198 @@ namespace build2
   // invalidate any references to such (the idea here is that one should be
   // able to load additional buildfiles as long as they don't interfere with
   // the existing build state). The "islands" are identified by the
-  // load_generation number (0 for serial load). It is incremented/restored by
-  // phase_guard and is stored in various "nodes" (variables, etc) to allow
-  // modifications "within the islands".
-  //
-  // @@ MT: do we really have to hold shared lock during execute?
-  // @@ MT: we can also interrupt load s&m with execute -- neither handled
-  //        nor documented.
+  // load_generation number (0 for initial/serial load). It is incremented in
+  // case of a phase switch and is stored in various "nodes" (variables, etc)
+  // to allow modifications "within the islands".
   //
-  extern
-#ifdef __cpp_thread_local
-  thread_local
-#else
-  __thread
-#endif
-  slock* model_lock;
-
+  extern run_phase phase;
   extern size_t load_generation;
 
-  struct phase_guard
+  // A "tri-mutex" that keeps all the threads in one of the three phases. When
+  // a thread wants to switch a phase, it has to wait for all the other
+  // threads to do the same (or release their phase locks). The load phase is
+  // exclusive.
+  //
+  // The interleaving match and execute is interesting: during match we read
+  // the "external state" (e.g., filesystem entries, modifications times, etc)
+  // and capture it in the "internal state" (our dependency graph). During
+  // execute we are modifying the external state with controlled modifications
+  // of the internal state to reflect the changes (e.g., update mtimes). If
+  // you think about it, it's pretty clear that we cannot safely perform both
+  // of these actions simultaneously. A good example would be running a code
+  // generator and header dependency extraction simultaneously: the extraction
+  // process may pick up headers as they are being generated. As a result, we
+  // either have everyone treat the external state as read-only or write-only.
+  //
+  class phase_mutex
   {
-    explicit
-    phase_guard (run_phase p)
-        : o (phase)
-    {
-      phase = p;
-
-      if (phase == run_phase::load)
-        ++load_generation;
-    }
-
-    ~phase_guard ()
-    {
-      if (phase == run_phase::load)
-        --load_generation;
-
-      phase = o;
-    }
-    run_phase o;
+  public:
+    // Acquire a phase lock potentially blocking (unless already in the
+    // desired phase) until switching to the desired phase is possible.
+    //
+    void
+    lock (run_phase);
+
+    // Release the phase lock potentially allowing (unless there are other
+    // locks on this phase) switching to a different phase.
+    //
+    void
+    unlock (run_phase);
+
+    // Switch from one phase to another. Semantically, just unlock() followed
+    // by lock() but more efficient.
+    //
+    void
+    relock (run_phase unlock, run_phase lock);
+
+  private:
+    friend struct phase_lock;
+    friend struct phase_unlock;
+    friend struct phase_switch;
+
+    phase_mutex (): lc_ (0), mc_ (0), ec_ (0) {phase = run_phase::load;}
+
+    static phase_mutex instance;
+
+  private:
+    // We have a counter for each phase which represents the number of threads
+    // in or waiting for this phase.
+    //
+    // We use condition variables to wait for a phase switch. The load phase
+    // is exclusive so we have a separate mutex to serialize it (think of it
+    // as a second level locking).
+    //
+    // When the mutex is unlocked (all three counters become zero, the phase
+    // is always changed to load (this is also the initial state).
+    //
+    mutex m_;
+    size_t lc_;
+    size_t mc_;
+    size_t ec_;
+
+    condition_variable lv_;
+    condition_variable mv_;
+    condition_variable ev_;
+
+    mutex lm_;
   };
 
-  // A shared model lock. If there is already an instance of model_slock in
-  // this thread, then the new instance simply references it (asserting that
-  // it is locked).
+  // Grab a new phase lock releasing it on destruction. The lock can be
+  // "owning" or "referencing" (recursive).
+  //
+  // On the referencing semantics: If there is already an instance of
+  // phase_lock in this thread, then the new instance simply references it.
   //
   // The reason for this semantics is to support the following scheduling
-  // pattern:
+  // pattern (in actual code we use wait_guard to RAII it):
   //
-  // scheduler::atomic_count task_count (0);
+  // atomic_count task_count (0);
   //
   // {
-  //   model_slock ml;                    // (1)
+  //   phase_lock l (run_phase::match);                    // (1)
   //
   //   for (...)
   //   {
   //     sched.async (task_count,
   //                  [] (...)
   //                  {
-  //                    model_slock ml;   // (2)
+  //                    phase_lock pl (run_phase::match);  // (2)
   //                    ...
   //                  },
   //                  ...);
   //   }
   // }
   //
-  // sched.wait (task_count);             // (3)
+  // sched.wait (task_count);                              // (3)
   //
   // Here is what's going on here:
   //
-  // 1. We first get a shared lock "for ourselves" since after the first
+  // 1. We first get a phase lock "for ourselves" since after the first
   //    iteration of the loop, things may become asynchronous (including
-  //    attempts to relock for exclusive access and change the structure we
-  //    are iteration upon).
+  //    attempts to switch the phase and modify the structure we are iteration
+  //    upon).
   //
   // 2. The task can be queued or it can be executed synchronously inside
   //    async() (refer to the scheduler class for details on this semantics).
   //
   //    If this is an async()-synchronous execution, then the task will create
-  //    a referencing model_slock. If, however, this is a queued execution
+  //    a referencing phase_lock. If, however, this is a queued execution
   //    (including wait()-synchronous), then the task will create a top-level
-  //    model_slock.
+  //    phase_lock.
   //
   //    Note that we only acquire the lock once the task starts executing
   //    (there is no reason to hold the lock while the task is sitting in the
   //    queue). This optimization assumes that whatever else we pass to the
-  //    task (for example, a reference to a target) is immutable (so such a
-  //    reference cannot become invalid).
+  //    task (for example, a reference to a target) is stable (in other words,
+  //    such a reference cannot become invalid).
   //
-  // 3. Before calling wait(), we release our shared lock to allow re-locking
-  //    for exclusive access. And once wait() returns we are again running
-  //    serially.
+  // 3. Before calling wait(), we release our phase lock to allow switching
+  //    the phase.
   //
-  struct model_slock
+  struct phase_lock
   {
-    model_slock ()
-    {
-      if (slock* l = model_lock)
-        assert (l->owns_lock ());
-      else
-        model_lock = &(l_ = slock (model_mutex));
-    }
-
-    ~model_slock ()
-    {
-      if (&l_ == model_lock)
-        model_lock = nullptr;
-    }
-
-    operator slock& () {return *model_lock;}
-    operator const slock& () const {return *model_lock;}
+    explicit phase_lock (run_phase);
+    ~phase_lock ();
 
-  private:
-    slock l_;
+    phase_lock (phase_lock&&) = delete;
+    phase_lock (const phase_lock&) = delete;
+
+    phase_lock& operator= (phase_lock&&) = delete;
+    phase_lock& operator= (const phase_lock&) = delete;
+
+    run_phase p;
+
+    static
+#ifdef __cpp_thread_local
+    thread_local
+#else
+    __thread
+#endif
+    phase_lock* instance;
   };
 
-  // Re-lock shared to exclusive for the lifetime or rlock.
+  // Assuming we have a lock on the current phase, temporarily release it
+  // and reacquire on destruction.
   //
-  struct model_rlock
+  struct phase_unlock
   {
-    model_rlock ()
-        : sl_ (model_lock)
-    {
-      if (sl_ != nullptr)
-      {
-        sl_->unlock ();
-        ul_ = ulock (*sl_->mutex ());
-      }
-    }
-
-    ~model_rlock ()
-    {
-      if (sl_ != nullptr)
-      {
-        ul_.unlock ();
-        sl_->lock ();
-      }
-    }
-
-    // Can be treated as const ulock.
-    //
-    operator const ulock& () const {return ul_;}
+    phase_unlock (bool unlock = true);
+    ~phase_unlock ();
 
-  private:
-    slock* sl_;
-    ulock ul_;
+    phase_lock* l;
+  };
+
+  // Assuming we have a lock on the current phase, temporarily switch to a
+  // new phase and switch back on destruction.
+  //
+  struct phase_switch
+  {
+    explicit phase_switch (run_phase);
+    ~phase_switch ();
+
+    run_phase o, n;
+  };
+
+  // Wait for a task count optionally and temporarily unlocking the phase.
+  //
+  struct wait_guard
+  {
+    ~wait_guard () noexcept (false);
+
+    explicit
+    wait_guard (atomic_count& task_count,
+                bool phase = false);
+
+    wait_guard (size_t start_count,
+                atomic_count& task_count,
+                bool phase = false);
+
+    void
+    wait ();
+
+    size_t start_count;
+    atomic_count* task_count;
+    bool phase;
   };
 
   // Cached variables.
@@ -218,23 +264,36 @@ namespace build2
   extern const meta_operation_info* current_mif;
   extern const operation_info* current_inner_oif;
   extern const operation_info* current_outer_oif;
+  extern size_t current_on; // Current operation number (1-based) in the
+                            // meta-operation batch.
+
   extern execution_mode current_mode;
 
+  // Total number of dependency relationships in the current action. Together
+  // with the target::dependents count it is incremented during the rule
+  // search & match phase and is decremented during execution with the
+  // expectation of it reaching 0. Used as a sanity check.
+  //
+  extern atomic_count dependency_count;
+
   inline void
   set_current_mif (const meta_operation_info& mif)
   {
-    current_mif = &mif;
     current_mname = &mif.name;
+    current_mif = &mif;
+    current_on = 0; // Reset.
   }
 
   inline void
   set_current_oif (const operation_info& inner_oif,
                    const operation_info* outer_oif = nullptr)
   {
+    current_oname = &(outer_oif == nullptr ? inner_oif : *outer_oif).name;
     current_inner_oif = &inner_oif;
     current_outer_oif = outer_oif;
-    current_oname = &(outer_oif == nullptr ? inner_oif : *outer_oif).name;
+    current_on++;
     current_mode = inner_oif.mode;
+    dependency_count.store (0, memory_order_relaxed); // Serial.
   }
 
   // Keep going flag.
@@ -245,14 +304,6 @@ namespace build2
   //
   extern bool keep_going;
 
-  // Total number of dependency relationships in the current action.
-  // Together with the target::dependents count it is incremented
-  // during the rule search & match phase and is decremented during
-  // execution with the expectation of it reaching 0. Used as a sanity
-  // check.
-  //
-  extern atomic_count dependency_count;
-
   // Reset the build state. In particular, this removes all the targets,
   // scopes, and variables.
   //
@@ -343,4 +394,6 @@ namespace build2
   }
 }
 
+#include <build2/context.ixx>
+
 #endif // BUILD2_CONTEXT
diff --git a/build2/context.cxx b/build2/context.cxx
index 8b4fd52..62822a3 100644
--- a/build2/context.cxx
+++ b/build2/context.cxx
@@ -23,18 +23,157 @@ using namespace butl;
 
 namespace build2
 {
-  run_phase phase = run_phase::load;
+  scheduler sched;
 
-  shared_mutex model_mutex;
+  run_phase phase;
+  phase_mutex phase_mutex::instance;
+
+  size_t load_generation;
 
 #ifdef __cpp_thread_local
   thread_local
 #else
   __thread
 #endif
-  slock* model_lock;
+  phase_lock* phase_lock::instance;
 
-  size_t load_generation;
+  void phase_mutex::
+  lock (run_phase p)
+  {
+    {
+      mlock l (m_);
+      bool u (lc_ == 0 && mc_ == 0 && ec_ == 0); // Unlocked.
+
+      // Increment the counter.
+      //
+      condition_variable* v (nullptr);
+      switch (p)
+      {
+      case run_phase::load:    lc_++; v = &lv_; break;
+      case run_phase::match:   mc_++; v = &mv_; break;
+      case run_phase::execute: ec_++; v = &ev_; break;
+      }
+
+      // If unlocked, switch directly to the new phase. Otherwise wait for the
+      // phase switch. Note that in the unlocked case we don't need to notify
+      // since there is nobody waiting (all counters are zero).
+      //
+      if (u)
+        phase = p;
+      else if (phase != p)
+      {
+        sched.deactivate ();
+        for (; phase != p; v->wait (l)) ;
+        l.unlock (); // Important: activate() can block.
+        sched.activate ();
+      }
+    }
+
+    // In case of load, acquire the exclusive access mutex.
+    //
+    if (p == run_phase::load)
+      lm_.lock ();
+  }
+
+  void phase_mutex::
+  unlock (run_phase p)
+  {
+    // In case of load, release the exclusive access mutex.
+    //
+    if (p == run_phase::load)
+      lm_.unlock ();
+
+    {
+      mlock l (m_);
+
+      // Decrement the counter and see if this phase has become unlocked.
+      //
+      bool u (false);
+      switch (p)
+      {
+      case run_phase::load:    u = (--lc_ == 0); break;
+      case run_phase::match:   u = (--mc_ == 0); break;
+      case run_phase::execute: u = (--ec_ == 0); break;
+      }
+
+      // If the phase is unlocked, pick a new phase and notify the waiters.
+      // Note that we notify all load waiters so that they can all serialize
+      // behind the second-level mutex.
+      //
+      if (u)
+      {
+        condition_variable* v;
+
+        if      (lc_ != 0) {phase = run_phase::load;    v = &lv_;}
+        else if (mc_ != 0) {phase = run_phase::match;   v = &mv_;}
+        else if (ec_ != 0) {phase = run_phase::execute; v = &ev_;}
+        else               {phase = run_phase::load;    v = nullptr;}
+
+        if (v != nullptr)
+        {
+          l.unlock ();
+          v->notify_all ();
+        }
+      }
+    }
+  }
+
+  void phase_mutex::
+  relock (run_phase o, run_phase n)
+  {
+    // Pretty much a fused unlock/lock implementation except that we always
+    // switch into the new phase.
+    //
+    assert (o != n);
+
+    if (o == run_phase::load)
+      lm_.unlock ();
+
+    {
+      mlock l (m_);
+      bool u (false);
+
+      switch (o)
+      {
+      case run_phase::load:    u = (--lc_ == 0); break;
+      case run_phase::match:   u = (--mc_ == 0); break;
+      case run_phase::execute: u = (--ec_ == 0); break;
+      }
+
+      // Set if will be waiting or notifying others.
+      //
+      condition_variable* v (nullptr);
+      switch (n)
+      {
+      case run_phase::load:    v = lc_++ != 0 || !u ? &lv_ : nullptr; break;
+      case run_phase::match:   v = mc_++ != 0 || !u ? &mv_ : nullptr; break;
+      case run_phase::execute: v = ec_++ != 0 || !u ? &ev_ : nullptr; break;
+      }
+
+      if (u)
+      {
+        phase = n;
+
+        // Notify others that could be waiting for this phase.
+        //
+        if (v != nullptr)
+        {
+          l.unlock ();
+          v->notify_all ();
+        }
+      }
+      else // phase != n
+      {
+        sched.deactivate ();
+        for (; phase != n; v->wait (l)) ;
+        l.unlock (); // Important: activate() can block.
+        sched.activate ();
+      }
+    }
+
+    if (n == run_phase::load)
+      lm_.lock ();
+  }
 
   const variable* var_src_root;
   const variable* var_out_root;
@@ -53,13 +192,13 @@ namespace build2
   const meta_operation_info* current_mif;
   const operation_info* current_inner_oif;
   const operation_info* current_outer_oif;
-
+  size_t current_on;
   execution_mode current_mode;
 
-  bool keep_going = false;
-
   atomic_count dependency_count;
 
+  bool keep_going = false;
+
   variable_overrides
   reset (const strings& cmd_vars)
   {
diff --git a/build2/context.ixx b/build2/context.ixx
new file mode 100644
index 0000000..aee7e32
--- /dev/null
+++ b/build2/context.ixx
@@ -0,0 +1,115 @@
+// file      : build2/context.ixx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+namespace build2
+{
+  // phase_lock
+  //
+  inline phase_lock::
+  phase_lock (run_phase p)
+      : p (p)
+  {
+    if (phase_lock* l = instance)
+      assert (l->p == p);
+    else
+    {
+      phase_mutex::instance.lock (p);
+      instance = this;
+
+      //text << this_thread::get_id () << " phase acquire " << p;
+    }
+  }
+
+  inline phase_lock::
+  ~phase_lock ()
+  {
+    if (instance == this)
+    {
+      instance = nullptr;
+      phase_mutex::instance.unlock (p);
+
+      //text << this_thread::get_id () << " phase release " << p;
+    }
+  }
+
+  // phase_unlock
+  //
+  inline phase_unlock::
+  phase_unlock (bool u)
+      : l (u ? phase_lock::instance : nullptr)
+  {
+    if (u)
+    {
+      phase_lock::instance = nullptr;
+      phase_mutex::instance.unlock (l->p);
+
+      //text << this_thread::get_id () << " phase unlock  " << l->p;
+    }
+  }
+
+  inline phase_unlock::
+  ~phase_unlock ()
+  {
+    if (l != nullptr)
+    {
+      phase_mutex::instance.lock (l->p);
+      phase_lock::instance = l;
+
+      //text << this_thread::get_id () << " phase lock    " << l->p;
+    }
+  }
+
+  // phase_switch
+  //
+  inline phase_switch::
+  phase_switch (run_phase n)
+      : o (phase), n (n)
+  {
+    phase_mutex::instance.relock (o, n);
+    phase_lock::instance->p = n;
+
+    if (n == run_phase::load) // Note: load lock is exclusive.
+      load_generation++;
+
+    //text << this_thread::get_id () << " phase switch  " << o << " " << n;
+  }
+
+  inline phase_switch::
+  ~phase_switch ()
+  {
+    phase_mutex::instance.relock (n, o);
+    phase_lock::instance->p = o;
+
+    //text << this_thread::get_id () << " phase restore " << n << " " << o;
+  }
+
+  // wait_guard
+  //
+  inline wait_guard::
+  wait_guard (atomic_count& tc, bool p)
+      : wait_guard (0, tc, p)
+  {
+  }
+
+  inline wait_guard::
+  wait_guard (size_t sc, atomic_count& tc, bool p)
+      : start_count (sc), task_count (&tc), phase (p)
+  {
+  }
+
+  inline wait_guard::
+  ~wait_guard () noexcept (false)
+  {
+    if (task_count != nullptr)
+      wait ();
+  }
+
+  inline void wait_guard::
+  wait ()
+  {
+    phase_unlock u (phase);
+    sched.wait (start_count, *task_count);
+    task_count = nullptr;
+  }
+}
diff --git a/build2/diagnostics b/build2/diagnostics
index dff1bd6..b889fd4 100644
--- a/build2/diagnostics
+++ b/build2/diagnostics
@@ -113,6 +113,70 @@ namespace build2
   using butl::diag_stream;
   using butl::diag_epilogue;
 
+  // Diagnostics stack. Each frame is "applied" to the fail/error/warn/info
+  // diag record.
+  //
+  // Unfortunately most of our use-cases don't fit into the 2-pointer small
+  // object optimization of std::function. So we have to complicate things
+  // a bit here.
+  //
+  struct diag_frame
+  {
+    explicit
+    diag_frame (void (*f) (const diag_frame&, const diag_record&))
+        : func_ (f), prev_ (stack) {stack = this;}
+
+    // Start with an existing stack, for example, from another thread.
+    //
+    explicit
+    diag_frame (const diag_frame* prev)
+        : prev_ (stack) {stack = prev;} // Just a restore guard.
+
+    static void
+    apply (const diag_record& r)
+    {
+      for (const diag_frame* f (stack); f != nullptr; f = f->prev_)
+        f->func_ (*f, r);
+    }
+
+    ~diag_frame () {stack = prev_;}
+
+    static
+#ifdef __cpp_thread_local
+    thread_local
+#else
+    __thread
+#endif
+    const diag_frame* stack; // Tip of the stack.
+
+  private:
+    void (*func_) (const diag_frame&, const diag_record&);
+    const diag_frame* prev_;
+  };
+
+  template <typename F>
+  struct diag_frame_impl: diag_frame
+  {
+    explicit
+    diag_frame_impl (F f): diag_frame (&thunk), func_ (move (f)) {}
+
+  private:
+    static void
+    thunk (const diag_frame& f, const diag_record& r)
+    {
+      static_cast<const diag_frame_impl&> (f).func_ (r);
+    }
+
+    const F func_;
+  };
+
+  template <typename F>
+  inline diag_frame_impl<F>
+  make_diag_frame (F f)
+  {
+    return diag_frame_impl<F> (move (f));
+  }
+
   // Diagnostic facility, project specifics.
   //
   struct simple_prologue_base
@@ -179,11 +243,11 @@ namespace build2
 
     explicit
     basic_mark_base (const char* type,
+                     diag_epilogue* epilogue = &diag_frame::apply,
                      uint16_t (*sverb) () = &stream_verb_map,
                      const char* mod = nullptr,
                      const char* name = nullptr,
-                     const void* data = nullptr,
-                     diag_epilogue* epilogue = nullptr)
+                     const void* data = nullptr)
         : sverb_ (sverb),
           type_ (type), mod_ (mod), name_ (name), data_ (data),
           epilogue_ (epilogue) {}
@@ -235,6 +299,7 @@ namespace build2
                      const char* name,
                      const void* data = nullptr)
         : basic_mark_base ("trace",
+                           nullptr, // No diag stack.
                            []() {return stream_verb_max;},
                            mod,
                            name,
@@ -251,11 +316,16 @@ namespace build2
     fail_mark_base (const char* type,
                     const void* data = nullptr)
         : basic_mark_base (type,
+                           [](const diag_record& r)
+                           {
+                             diag_frame::apply (r);
+                             r.flush ();
+                             throw failed ();
+                           },
                            &stream_verb_map,
                            nullptr,
                            nullptr,
-                           data,
-                           [](const diag_record&) {throw failed ();}) {}
+                           data) {}
   };
   using fail_mark = butl::diag_mark<fail_mark_base>;
 
diff --git a/build2/diagnostics.cxx b/build2/diagnostics.cxx
index 26fd9f0..7b312a1 100644
--- a/build2/diagnostics.cxx
+++ b/build2/diagnostics.cxx
@@ -44,6 +44,15 @@ namespace build2
   //
   uint16_t verb = 0; // Keep disabled until set from options.
 
+  // Diagnostics stack.
+  //
+#ifdef __cpp_thread_local
+  thread_local
+#else
+  __thread
+#endif
+  const diag_frame* diag_frame::stack;
+
   // Diagnostic facility, project specifics.
   //
 
@@ -96,7 +105,7 @@ namespace build2
   const basic_mark error ("error");
   const basic_mark warn  ("warning");
   const basic_mark info  ("info");
-  const basic_mark text  (nullptr);
+  const basic_mark text  (nullptr, nullptr); // No type/frame.
   const fail_mark  fail  ("error");
   const fail_end   endf;
 }
diff --git a/build2/dist/operation.cxx b/build2/dist/operation.cxx
index e087069..6b50c14 100644
--- a/build2/dist/operation.cxx
+++ b/build2/dist/operation.cxx
@@ -10,7 +10,6 @@
 #include <build2/target>
 #include <build2/context>
 #include <build2/algorithm>
-#include <build2/scheduler>
 #include <build2/filesystem>
 #include <build2/diagnostics>
 
@@ -29,7 +28,7 @@ namespace build2
     // install <file> <dir>
     //
     static void
-    install (const process_path& cmd, file&, const dir_path&);
+    install (const process_path& cmd, const file&, const dir_path&);
 
     // cd <root> && tar|zip ... <dir>/<pkg>.<ext> <pkg>
     //
@@ -61,7 +60,7 @@ namespace build2
 
       // For now we assume all the targets are from the same project.
       //
-      target& t (*static_cast<target*> (ts[0]));
+      const target& t (*static_cast<const target*> (ts[0]));
       const scope* rs (t.base_scope ().root_scope ());
 
       if (rs == nullptr)
@@ -98,63 +97,34 @@ namespace build2
       const string& dist_package (cast<string> (l));
       const process_path& dist_cmd (cast<process_path> (rs->vars["dist.cmd"]));
 
-      // Match a rule for every operation supported by this project. Skip
-      // default_id.
+      // Verify all the targets are from the same project.
       //
-      auto match = [&trace, &rs, &ts] (const operation_info& o)
+      for (const void* v: ts)
       {
-        // Note that we are not calling operation_pre/post() callbacks
-        // here since the meta operation is dist and we know what we
-        // are doing.
-        //
-        set_current_oif (o);
-        dependency_count = 0;
-
-        action a (dist_id, o.id);
-
-        if (verb >= 6)
-          dump (a);
-
-        {
-          phase_guard pg (run_phase::search_match);
-
-          scheduler::atomic_count task_count (0);
-          {
-            model_slock ml;
-
-            for (void* v: ts)
-            {
-              target& t (*static_cast<target*> (v));
-
-              if (rs != t.base_scope ().root_scope ())
-                fail << "target " << t << " is from a different project" <<
-                  info << "one dist() meta-operation can handle one project" <<
-                  info << "consider using several dist() meta-operations";
-
-              l5 ([&]{trace << diag_doing (a, t);});
-
-              sched.async (task_count,
-                           [a] (target& t)
-                           {
-                             model_slock ml;
-                             build2::match (ml, a, t); // @@ MT exception.
-                           },
-                           ref (t));
-            }
-          }
-          sched.wait (task_count);
-        }
+        const target& t (*static_cast<const target*> (v));
 
-        if (verb >= 6)
-          dump (a);
-      };
+        if (rs != t.base_scope ().root_scope ())
+          fail << "target " << t << " is from a different project" <<
+            info << "one dist() meta-operation can handle one project" <<
+            info << "consider using several dist() meta-operations";
+      }
 
+      // Match a rule for every operation supported by this project. Skip
+      // default_id.
+      //
       for (operations::size_type id (default_id + 1);
            id < rs->operations.size ();
            ++id)
       {
         if (const operation_info* oif = rs->operations[id])
-          match (*oif);
+        {
+          // Note that we are not calling operation_pre/post() callbacks here
+          // since the meta operation is dist and we know what we are doing.
+          //
+          set_current_oif (*oif);
+
+          match (action (dist_id, oif->id), ts); // Standard (perform) match.
+        }
       }
 
       // Add buildfiles that are not normally loaded as part of the
@@ -256,13 +226,20 @@ namespace build2
         if (perform.meta_operation_pre != nullptr)
           perform.meta_operation_pre ();
 
+        // This is a hack since according to the rules we need to completely
+        // reset the state. We could have done that (i.e., saved target names
+        // and then re-searched them in the new tree) but that would just slow
+        // things down while this little cheat seems harmless (i.e., assume
+        // the dist mete-opreation is "compatible" with perform).
+        //
+        size_t on (current_on);
         set_current_mif (perform);
+        current_on = on + 1;
 
         if (perform.operation_pre != nullptr)
           perform.operation_pre (update_id);
 
         set_current_oif (update);
-        dependency_count = 0;
 
         action a (perform_id, update_id);
 
@@ -289,9 +266,9 @@ namespace build2
 
       // Copy over all the files.
       //
-      for (void* v: files)
+      for (const void* v: files)
       {
-        file& t (*static_cast<file*> (v));
+        const file& t (*static_cast<const file*> (v));
 
         // Figure out where this file is inside the target directory.
         //
@@ -369,7 +346,7 @@ namespace build2
     // install <file> <dir>
     //
     static void
-    install (const process_path& cmd, file& t, const dir_path& d)
+    install (const process_path& cmd, const file& t, const dir_path& d)
     {
       dir_path reld (relative (d));
       path relf (relative (t.path ()));
diff --git a/build2/dist/rule b/build2/dist/rule
index 61be24f..db8e731 100644
--- a/build2/dist/rule
+++ b/build2/dist/rule
@@ -22,10 +22,10 @@ namespace build2
       rule () {}
 
       virtual match_result
-      match (slock&, action, target&, const string&) const override;
+      match (action, target&, const string&) const override;
 
       virtual recipe
-      apply (slock&, action, target&) const override;
+      apply (action, target&) const override;
     };
   }
 }
diff --git a/build2/dist/rule.cxx b/build2/dist/rule.cxx
index f5c4018..bf5ab47 100644
--- a/build2/dist/rule.cxx
+++ b/build2/dist/rule.cxx
@@ -16,17 +16,17 @@ namespace build2
   namespace dist
   {
     match_result rule::
-    match (slock&, action, target&, const string&) const
+    match (action, target&, const string&) const
     {
       return true; // We always match.
     }
 
     recipe rule::
-    apply (slock& ml, action a, target& t) const
+    apply (action a, target& t) const
     {
       const dir_path& out_root (t.root_scope ().out_path ());
 
-      auto r (group_prerequisite_members (ml, a, t, false));
+      auto r (group_prerequisite_members (a, t, false));
       for (auto i (r.begin ()); i != r.end (); ++i)
       {
         prerequisite_member p (*i);
@@ -43,12 +43,12 @@ namespace build2
         if (p.type ().see_through && i.enter_group ())
           continue;
 
-        target& pt (p.search ());
+        const target& pt (p.search ());
 
         // Don't match targets that are outside of our project.
         //
         if (pt.dir.sub (out_root))
-          build2::match (ml, a, pt);
+          build2::match (a, pt);
       }
 
       return noop_recipe; // We will never be executed.
diff --git a/build2/dump b/build2/dump
index 9b4f529..a1d9b07 100644
--- a/build2/dump
+++ b/build2/dump
@@ -12,10 +12,10 @@
 
 namespace build2
 {
-  // Dump the state pertaining to the specified action.
+  // Dump the state pertaining to the current action.
   //
   void
-  dump (action);
+  dump ();
 }
 
 #endif // BUILD2_DUMP
diff --git a/build2/dump.cxx b/build2/dump.cxx
index 61739de..7055a78 100644
--- a/build2/dump.cxx
+++ b/build2/dump.cxx
@@ -184,11 +184,7 @@ namespace build2
   }
 
   static void
-  dump_target (ostream& os,
-               string& ind,
-               action a,
-               const target& t,
-               const scope& s)
+  dump_target (ostream& os, string& ind, const target& t, const scope& s)
   {
     // Print the target and its prerequisites relative to the scope. To achieve
     // this we are going to temporarily lower the stream verbosity to level 1.
@@ -206,14 +202,14 @@ namespace build2
 
     os << ':';
 
-    for (const prerequisite& p: t.prerequisites)
+    for (const prerequisite& p: t.prerequisites ())
     {
       os << ' ';
 
       // Print it as target if one has been cached.
       //
-      if (p.target != nullptr)
-        os << *p.target;
+      if (const target* t = p.target.load (memory_order_relaxed)) // Serial.
+        os << *t;
       else
         os << p;
     }
@@ -221,16 +217,20 @@ namespace build2
     // If the target has been matched to a rule, also print resolved
     // prerequisite targets.
     //
-    if (t.recipe (a))
     {
-      bool first (true);
-      for (const target* pt: t.prerequisite_targets)
+      size_t c (t.task_count.load (memory_order_relaxed)); // Running serial.
+
+      if (c == target::count_applied () || c == target::count_executed ())
       {
-        if (pt == nullptr) // Skipped.
-          continue;
+        bool first (true);
+        for (const target* pt: t.prerequisite_targets)
+        {
+          if (pt == nullptr) // Skipped.
+            continue;
 
-        os << (first ? " | " : " ") << *pt;
-        first = false;
+          os << (first ? " | " : " ") << *pt;
+          first = false;
+        }
       }
     }
 
@@ -251,10 +251,7 @@ namespace build2
   }
 
   static void
-  dump_scope (ostream& os,
-              string& ind,
-              action a,
-              scope_map::const_iterator& i)
+  dump_scope (ostream& os, string& ind, scope_map::const_iterator& i)
   {
     const scope& p (i->second);
     const dir_path& d (i->first);
@@ -312,7 +309,7 @@ namespace build2
         os << endl; // Extra newline between scope blocks.
 
       os << endl;
-      dump_scope (os, ind, a, i);
+      dump_scope (os, ind, i);
       sb = true;
     }
 
@@ -332,7 +329,7 @@ namespace build2
       }
 
       os << endl;
-      dump_target (os, ind, a, t, p);
+      dump_target (os, ind, t, p);
     }
 
     ind.resize (ind.size () - 2);
@@ -343,14 +340,14 @@ namespace build2
   }
 
   void
-  dump (action a)
+  dump ()
   {
     auto i (scopes.cbegin ());
     assert (&i->second == global_scope);
 
     string ind;
     ostream& os (*diag_stream);
-    dump_scope (os, ind, a, i);
+    dump_scope (os, ind, i);
     os << endl;
   }
 }
diff --git a/build2/file b/build2/file
index 482542f..2200c26 100644
--- a/build2/file
+++ b/build2/file
@@ -170,7 +170,7 @@ namespace build2
   names
   import (scope& base, name, const location&);
 
-  target&
+  const target&
   import (const prerequisite_key&);
 
   // As above but only imports as an already existing target. Unlike the above
diff --git a/build2/file.cxx b/build2/file.cxx
index 06910dc..3d7f1b8 100644
--- a/build2/file.cxx
+++ b/build2/file.cxx
@@ -1142,7 +1142,7 @@ namespace build2
     return names (); // Never reached.
   }
 
-  target*
+  const target*
   import (const prerequisite_key& pk, bool existing)
   {
     tracer trace ("import");
@@ -1174,7 +1174,7 @@ namespace build2
         path& p (pp.effect);
         assert (!p.empty ()); // We searched for a simple name.
 
-        exe* t (
+        const exe* t (
           !existing
           ? &targets.insert<exe> (tt,
                                   p.directory (),
@@ -1191,7 +1191,7 @@ namespace build2
 
         if (t != nullptr)
         {
-          if (t->path ().empty () && !existing)
+          if (!existing)
             t->path (move (p));
           else
             assert (t->path () == p);
diff --git a/build2/file.ixx b/build2/file.ixx
index be43857..15fa8dc 100644
--- a/build2/file.ixx
+++ b/build2/file.ixx
@@ -10,20 +10,20 @@ namespace build2
     return source_once (root, base, bf, base);
   }
 
-  target*
+  const target*
   import (const prerequisite_key&, bool existing);
 
-  inline target&
+  inline const target&
   import (const prerequisite_key& pk)
   {
-    assert (phase == run_phase::search_match);
+    assert (phase == run_phase::match);
     return *import (pk, false);
   }
 
   inline const target*
   import_existing (const prerequisite_key& pk)
   {
-    assert (phase == run_phase::search_match || phase == run_phase::execute);
+    assert (phase == run_phase::match || phase == run_phase::execute);
     return import (pk, true);
   }
 }
diff --git a/build2/install/rule b/build2/install/rule
index 24111ce..c923db9 100644
--- a/build2/install/rule
+++ b/build2/install/rule
@@ -22,10 +22,10 @@ namespace build2
       alias_rule () {}
 
       virtual match_result
-      match (slock&, action, target&, const string&) const override;
+      match (action, target&, const string&) const override;
 
       virtual recipe
-      apply (slock&, action, target&) const override;
+      apply (action, target&) const override;
     };
 
     struct install_dir;
@@ -36,17 +36,17 @@ namespace build2
       file_rule () {}
 
       virtual match_result
-      match (slock&, action, target&, const string&) const override;
+      match (action, target&, const string&) const override;
 
       virtual recipe
-      apply (slock&, action, target&) const override;
+      apply (action, target&) const override;
 
       // Return NULL if this prerequisite should be ignored and pointer to its
       // target otherwise. The default implementation ignores prerequsites that
       // are outside of this target's project.
       //
-      virtual target*
-      filter (slock&, action, target&, prerequisite_member) const;
+      virtual const target*
+      filter (action, const target&, prerequisite_member) const;
 
       // Extra installation hooks.
       //
diff --git a/build2/install/rule.cxx b/build2/install/rule.cxx
index 323060d..cf12d36 100644
--- a/build2/install/rule.cxx
+++ b/build2/install/rule.cxx
@@ -39,19 +39,19 @@ namespace build2
     // alias_rule
     //
     match_result alias_rule::
-    match (slock&, action, target&, const string&) const
+    match (action, target&, const string&) const
     {
       return true;
     }
 
     recipe alias_rule::
-    apply (slock& ml, action a, target& t) const
+    apply (action a, target& t) const
     {
       tracer trace ("install::alias_rule::apply");
 
-      for (prerequisite& p: group_prerequisites (t))
+      for (const prerequisite& p: group_prerequisites (t))
       {
-        target& pt (search (p));
+        const target& pt (search (p));
 
         // Check if this prerequisite is explicitly "not installable",
         // that is, there is the 'install' variable and its value is
@@ -73,7 +73,7 @@ namespace build2
           continue;
         }
 
-        build2::match (ml, a, pt);
+        build2::match (a, pt);
         t.prerequisite_targets.push_back (&pt);
       }
 
@@ -91,7 +91,7 @@ namespace build2
                    "insufficient space");
 
     match_result file_rule::
-    match (slock&, action a, target& t, const string&) const
+    match (action a, target& t, const string&) const
     {
       // First determine if this target should be installed (called
       // "installable" for short).
@@ -118,15 +118,15 @@ namespace build2
       return mr;
     }
 
-    target* file_rule::
-    filter (slock&, action, target& t, prerequisite_member p) const
+    const target* file_rule::
+    filter (action, const target& t, prerequisite_member p) const
     {
-      target& pt (p.search ());
+      const target& pt (p.search ());
       return pt.in (t.root_scope ()) ? &pt : nullptr;
     }
 
     recipe file_rule::
-    apply (slock& ml, action a, target& t) const
+    apply (action a, target& t) const
     {
       match_data md (move (t.data<match_data> ()));
       t.clear_data (); // In case delegated-to rule (or the rule that overrides
@@ -150,7 +150,7 @@ namespace build2
       // run standard search_and_match()? Will need an indicator
       // that it was forced (e.g., [install]) for filter() below.
       //
-      auto r (group_prerequisite_members (ml, a, t));
+      auto r (group_prerequisite_members (a, t));
       for (auto i (r.begin ()); i != r.end (); ++i)
       {
         prerequisite_member p (*i);
@@ -163,7 +163,7 @@ namespace build2
 
         // Let a customized rule have its say.
         //
-        target* pt (filter (ml, a, t, p));
+        const target* pt (filter (a, t, p));
         if (pt == nullptr)
           continue;
 
@@ -173,17 +173,13 @@ namespace build2
         if (l && cast<path> (l).string () == "false")
           continue;
 
-        build2::match (ml, a, *pt);
-
         // If the matched rule returned noop_recipe, then the target
         // state will be set to unchanged as an optimization. Use this
         // knowledge to optimize things on our side as well since this
         // will help a lot in case of any static installable content
         // (headers, documentation, etc).
         //
-        if (pt->unchanged ()) //@@ MT?
-          unmatch (a, *pt); // No intent to execute.
-        else
+        if (!build2::match (a, *pt, unmatch::unchanged))
           t.prerequisite_targets.push_back (pt);
 
         // Skip members of ad hoc groups. We handle them explicitly below.
@@ -210,7 +206,7 @@ namespace build2
         // have been found if we signalled that we do not match from
         // match() above.
         //
-        recipe d (match_delegate (ml, a, t, *this).first);
+        recipe d (match_delegate (a, t, *this).first);
 
         // If we have no installable prerequisites, then simply redirect
         // to it.
diff --git a/build2/operation b/build2/operation
index 544a9f9..7eb6325 100644
--- a/build2/operation
+++ b/build2/operation
@@ -64,9 +64,8 @@ namespace build2
     operation_id
     outer_operation () const {return outer_id & 0xF;}
 
-    // Implicit conversion operator to action_id for the switch()
-    // statement, etc. Most places will only care about the inner
-    // operation.
+    // Implicit conversion operator to action_id for the switch() statement,
+    // etc. Most places only care about the inner operation.
     //
     operator action_id () const {return inner_id;}
 
@@ -74,10 +73,10 @@ namespace build2
     action_id outer_id;
   };
 
-  // This is an "overrides" comparison, i.e., it returns true
-  // if the recipe for x overrides recipe for y. The idea is
-  // that for the same inner operation, action with an outer
-  // operation is "weaker" than the one without.
+  // This is an "overrides" comparison, i.e., it returns true if the recipe
+  // for x overrides recipe for y. The idea is that for the same inner
+  // operation, action with an outer operation is "weaker" than the one
+  // without.
   //
   inline bool
   operator> (action x, action y)
@@ -86,10 +85,11 @@ namespace build2
       (x.outer_id != y.outer_id && y.outer_id != 0);
   }
 
-  // Note that these ignore the outer operation.
-  //
   inline bool
-  operator== (action x, action y) {return x.inner_id == y.inner_id;}
+  operator== (action x, action y)
+  {
+    return x.inner_id == y.inner_id && x.outer_id == y.outer_id;
+  }
 
   inline bool
   operator!= (action x, action y) {return !(x == y);}
@@ -170,10 +170,10 @@ namespace build2
   // Meta-operation info.
   //
 
-  // Normally a list of resolved and matched targets to execute. But
-  // can be something else, depending on the meta-operation.
+  // Normally a list of resolved and matched targets to execute. But can be
+  // something else, depending on the meta-operation.
   //
-  typedef vector<void*> action_targets; //@@ MT TMP: make const void*
+  typedef vector<const void*> action_targets;
 
   struct meta_operation_info
   {
diff --git a/build2/operation.cxx b/build2/operation.cxx
index c4366f7..1fe70df 100644
--- a/build2/operation.cxx
+++ b/build2/operation.cxx
@@ -9,7 +9,6 @@
 #include <build2/scope>
 #include <build2/target>
 #include <build2/algorithm>
-#include <build2/scheduler>
 #include <build2/diagnostics>
 
 using namespace std;
@@ -74,9 +73,9 @@ namespace build2
   {
     tracer trace ("search");
 
-    phase_guard pg (run_phase::search_match);
+    phase_lock pl (run_phase::match); // Never switched.
 
-    if (target* t = targets.find (tk, trace))
+    if (const target* t = targets.find (tk, trace))
       ts.push_back (t);
     else
       fail (l) << "unknown target " << tk;
@@ -88,39 +87,89 @@ namespace build2
     tracer trace ("match");
 
     if (verb >= 6)
-      dump (a);
+      dump ();
 
     {
-      phase_guard pg (run_phase::search_match);
+      phase_lock l (run_phase::match);
 
-      if (ts.size () > 1)
-        sched.tune (1); //@@ MT TMP run serially.
+      // Start asynchronous matching of prerequisites keeping track of how
+      // many we have started. Wait with unlocked phase to allow phase
+      // switching.
+      //
+      atomic_count task_count (0);
+      wait_guard wg (task_count, true);
 
-      scheduler::atomic_count task_count (0);
+      size_t i (0), n (ts.size ());
+      for (; i != n; ++i)
       {
-        model_slock ml;
+        const target& t (*static_cast<const target*> (ts[i]));
+        l5 ([&]{trace << diag_doing (a, t);});
 
-        for (void* vt: ts)
+        target_state s (match_async (a, t, 0, task_count, false));
+
+        // Bail out if the target has failed and we weren't instructed to
+        // keep going.
+        //
+        if (s == target_state::failed && !keep_going)
+        {
+          ++i;
+          break;
+        }
+      }
+
+      wg.wait ();
+
+      // We are now running serially. Re-examine targets that we have matched.
+      //
+      bool fail (false);
+      for (size_t j (0); j != n; ++j)
+      {
+        const target& t (*static_cast<const target*> (ts[j]));
+
+        target_state s (j < i
+                        ? match (a, t, false)
+                        : target_state::postponed);
+        switch (s)
         {
-          target& t (*static_cast<target*> (vt));
-          l5 ([&]{trace << "matching " << t;});
-
-          sched.async (task_count,
-                       [a] (target& t)
-                       {
-                         model_slock ml;
-                         match (ml, a, t); // @@ MT exception handling.
-                       },
-                       ref (t));
+        case target_state::postponed:
+          {
+            // We bailed before matching it.
+            //
+            if (verb != 0)
+              info << "not " << diag_did (a, t);
+
+            break;
+          }
+        case target_state::unknown:
+        case target_state::unchanged:
+          {
+            break; // Matched successfully.
+          }
+        case target_state::failed:
+          {
+            // Things didn't go well for this target.
+            //
+            if (verb != 0)
+              info << "failed to " << diag_do (a, t);
+
+            fail = true;
+            break;
+          }
+        default:
+          assert (false);
         }
       }
-      sched.wait (task_count);
 
-      sched.tune (0); //@@ MT TMP run serially restore.
+      if (fail)
+        throw failed ();
     }
 
+    // Phase restored to load.
+    //
+    assert (phase == run_phase::load);
+
     if (verb >= 6)
-      dump (a);
+      dump ();
   }
 
   void
@@ -133,8 +182,6 @@ namespace build2
     if (current_mode == execution_mode::last)
       reverse (ts.begin (), ts.end ());
 
-    phase_guard pg (run_phase::execute);
-
     // Tune the scheduler.
     //
     switch (current_inner_oif->concurrency)
@@ -144,40 +191,46 @@ namespace build2
     default:                assert (false); // Not yet supported.
     }
 
+    phase_lock pl (run_phase::execute); // Never switched.
+
     // Similar logic to execute_members(): first start asynchronous execution
     // of all the top-level targets.
     //
     atomic_count task_count (0);
+    wait_guard wg (task_count);
 
-    size_t n (ts.size ());
-    for (size_t i (0); i != n; ++i)
+    for (const void* vt: ts)
     {
-      const target& t (*static_cast<const target*> (ts[i]));
+      const target& t (*static_cast<const target*> (vt));
 
       l5 ([&]{trace << diag_doing (a, t);});
 
-      target_state s (execute_async (a, t, 0, task_count));
+      target_state s (execute_async (a, t, 0, task_count, false));
 
+      // Bail out if the target has failed and we weren't instructed to keep
+      // going.
+      //
       if (s == target_state::failed && !keep_going)
         break;
     }
-    sched.wait (task_count);
+
+    wg.wait ();
     sched.tune (0); // Restore original scheduler settings.
 
     // We are now running serially. Re-examine them all.
     //
     bool fail (false);
-    for (size_t i (0); i != n; ++i)
+    for (const void* vt: ts)
     {
-      const target& t (*static_cast<const target*> (ts[i]));
+      const target& t (*static_cast<const target*> (vt));
 
-      switch (t.synchronized_state (false))
+      switch (t.executed_state (false))
       {
       case target_state::unknown:
         {
           // We bailed before executing it.
           //
-          if (!quiet)
+          if (verb != 0 && !quiet)
             info << "not " << diag_did (a, t);
 
           break;
@@ -186,7 +239,7 @@ namespace build2
         {
           // Nothing had to be done.
           //
-          if (!quiet)
+          if (verb != 0 && !quiet)
             info << diag_done (a, t);
 
           break;
@@ -201,7 +254,7 @@ namespace build2
         {
           // Things didn't go well for this target.
           //
-          if (!quiet)
+          if (verb != 0 && !quiet)
             info << "failed to " << diag_do (a, t);
 
           fail = true;
@@ -218,7 +271,7 @@ namespace build2
     // We should have executed every target that we matched, provided we
     // haven't failed (in which case we could have bailed out early).
     //
-    assert (dependency_count == 0);
+    assert (dependency_count.load (memory_order_relaxed) == 0);
   }
 
   const meta_operation_info noop {
diff --git a/build2/parser.cxx b/build2/parser.cxx
index 154c123..a5ed0d5 100644
--- a/build2/parser.cxx
+++ b/build2/parser.cxx
@@ -660,7 +660,8 @@ namespace build2
               if (default_target_ == nullptr)
                 default_target_ = target_;
 
-              target_->prerequisites.reserve (pns.size ());
+              target_->prerequisites_state_.store (2, memory_order_relaxed);
+              target_->prerequisites_.reserve (pns.size ());
               tgs.push_back (*target_);
             }
 
@@ -703,7 +704,10 @@ namespace build2
                 // one).
                 //
                 target& t (*i);
-                t.prerequisites.push_back (++i == e ? move (p) : p);
+                t.prerequisites_.push_back (
+                  ++i == e
+                  ? move (p)
+                  : prerequisite (p, memory_order_relaxed)); // Serial
               }
             }
           }
@@ -3573,7 +3577,9 @@ namespace build2
                       false,
                       trace).first);
 
-    ct.prerequisites.emplace_back (prerequisite (dt));
+
+    ct.prerequisites_state_.store (2, memory_order_relaxed);
+    ct.prerequisites_.emplace_back (prerequisite (dt));
   }
 
   void parser::
diff --git a/build2/prerequisite b/build2/prerequisite
index 9e1dbfa..e3b78a2 100644
--- a/build2/prerequisite
+++ b/build2/prerequisite
@@ -45,6 +45,8 @@ namespace build2
   ostream&
   operator<< (ostream&, const prerequisite_key&);
 
+  // Note that every data member except for the target is immutable (const).
+  //
   class prerequisite
   {
   public:
@@ -71,9 +73,15 @@ namespace build2
     const optional<string> ext; // Absent if unspecified.
     const scope_type& scope;
 
-    const_ptr<target_type> target; // NULL if not yet resolved. Note that this
-                                   // should always be the "primary target",
-                                   // not a member of a target group.
+    // NULL if not yet resolved. Note that this should always be the "primary
+    // target", not a member of a target group.
+    //
+    // While normally only a matching rule should change this, if the
+    // prerequisite comes from the group, then it's possible that several
+    // rules will try to update it simultaneously. Thus the atomic.
+    //
+    mutable atomic<const target_type*> target {nullptr};
+
   public:
     prerequisite (optional<string> p,
                   const target_type_type& t,
@@ -88,13 +96,12 @@ namespace build2
           out (move (o)),
           name (move (n)),
           ext (move (e)),
-          scope (s),
-          target (nullptr) {}
+          scope (s) {}
 
     // Make a prerequisite from a target.
     //
     explicit
-    prerequisite (target_type&);
+    prerequisite (const target_type&);
 
     // Note that the returned key "tracks" the prerequisite; that is, any
     // updates to the prerequisite's members will be reflected in the key.
@@ -124,6 +131,27 @@ namespace build2
 
     bool
     is_a (const target_type_type& tt) const {return type.is_a (tt);}
+
+  public:
+    prerequisite (prerequisite&& x)
+        : proj (move (x.proj)),
+          type (move (x.type)),
+          dir (move (x.dir)),
+          out (move (x.out)),
+          name (move (x.name)),
+          ext (move (x.ext)),
+          scope (move (x.scope)),
+          target (x.target.load (memory_order_relaxed)) {}
+
+    prerequisite (const prerequisite& x, memory_order o = memory_order_consume)
+        : proj (x.proj),
+          type (x.type),
+          dir (x.dir),
+          out (x.out),
+          name (x.name),
+          ext (x.ext),
+          scope (x.scope),
+          target (x.target.load (o)) {}
   };
 
   inline ostream&
@@ -131,6 +159,8 @@ namespace build2
   {
     return os << p.key ();
   }
+
+  using prerequisites = vector<prerequisite>;
 }
 
 #endif // BUILD2_PREREQUISITE
diff --git a/build2/prerequisite.cxx b/build2/prerequisite.cxx
index 47da950..b1f3cb2 100644
--- a/build2/prerequisite.cxx
+++ b/build2/prerequisite.cxx
@@ -57,7 +57,7 @@ namespace build2
   }
 
   prerequisite::
-  prerequisite (target_type& t)
+  prerequisite (const target_type& t)
       : proj (nullopt),
         type (t.type ()),
         dir (t.dir),
@@ -72,7 +72,7 @@ namespace build2
   bool prerequisite::
   belongs (const target_type& t) const
   {
-    const auto& p (t.prerequisites);
+    const auto& p (t.prerequisites ());
     return !(p.empty () || this < &p.front () || this > &p.back ());
   }
 }
diff --git a/build2/rule b/build2/rule
index fad4316..d434d0d 100644
--- a/build2/rule
+++ b/build2/rule
@@ -17,7 +17,18 @@ namespace build2
   {
   public:
     bool result;
-    action recipe_action = action (); // Used as recipe's action if set.
+
+    // If set, then this is a recipe's action. It must override the original
+    // action. Normally it is "unconditional inner operation". Only
+    // noop_recipe can be overridden.
+    //
+    // It is passed to rule::apply() so that prerequisites are matched for
+    // this action. It is also passed to target::recipe() so that if someone
+    // is matching this target for this action, we won't end-up re-matching
+    // it. However, the recipe itself is executed with the original action
+    // so that it can adjust its logic, if necessary.
+    //
+    action recipe_action = action ();
 
     explicit
     operator bool () const {return result;}
@@ -33,14 +44,21 @@ namespace build2
   // you need to modify some state (e.g., counters or some such), then make
   // sure it is MT-safe.
   //
+  // Note that match() may not be followed by apply() or be called several
+  // times before the following apply() (see resolve_group_members()) which
+  // means that it should be idempotent. The target_data object in the call
+  // to match() may not be the same as target.
+  //
+  // match() can also be called by another rules (see cc/install).
+  //
   class rule
   {
   public:
     virtual match_result
-    match (slock&, action, target&, const string& hint) const = 0;
+    match (action, target&, const string& hint) const = 0;
 
     virtual recipe
-    apply (slock&, action, target&) const = 0;
+    apply (action, target&) const = 0;
   };
 
   // Fallback rule that only matches if the file exists.
@@ -51,10 +69,10 @@ namespace build2
     file_rule () {}
 
     virtual match_result
-    match (slock&, action, target&, const string&) const override;
+    match (action, target&, const string&) const override;
 
     virtual recipe
-    apply (slock&, action, target&) const override;
+    apply (action, target&) const override;
 
     static const file_rule instance;
   };
@@ -65,10 +83,10 @@ namespace build2
     alias_rule () {}
 
     virtual match_result
-    match (slock&, action, target&, const string&) const override;
+    match (action, target&, const string&) const override;
 
     virtual recipe
-    apply (slock&, action, target&) const override;
+    apply (action, target&) const override;
 
     static const alias_rule instance;
   };
@@ -79,10 +97,10 @@ namespace build2
     fsdir_rule () {}
 
     virtual match_result
-    match (slock&, action, target&, const string&) const override;
+    match (action, target&, const string&) const override;
 
     virtual recipe
-    apply (slock&, action, target&) const override;
+    apply (action, target&) const override;
 
     static target_state
     perform_update (action, const target&);
@@ -90,6 +108,12 @@ namespace build2
     static target_state
     perform_clean (action, const target&);
 
+    // Sometimes, as an optimization, we want to emulate execute_direct()
+    // of fsdir{} without the overhead of switching to the execute phase.
+    //
+    static void
+    perform_update_direct (action, const target&);
+
     static const fsdir_rule instance;
   };
 
@@ -101,13 +125,13 @@ namespace build2
     fallback_rule () {}
 
     virtual match_result
-    match (slock&, action, target&, const string&) const override
+    match (action, target&, const string&) const override
     {
       return true;
     }
 
     virtual recipe
-    apply (slock&, action, target&) const override {return noop_recipe;}
+    apply (action, target&) const override {return noop_recipe;}
 
     static const fallback_rule instance;
   };
diff --git a/build2/rule-map b/build2/rule-map
index d7748b6..9fb4056 100644
--- a/build2/rule-map
+++ b/build2/rule-map
@@ -18,10 +18,10 @@ namespace build2
 {
   class rule;
 
-  using target_type_rule_map = std::map<
-    const target_type*,
-    butl::prefix_map<string, // Rule hint.
-                     reference_wrapper<const rule>, '.'>>;
+  using hint_rule_map =
+    butl::prefix_map<string, reference_wrapper<const rule>, '.'>;
+
+  using target_type_rule_map = std::map<const target_type*, hint_rule_map>;
 
   // This is an "indexed map" with operation_id being the index. Entry
   // with id 0 is a wildcard.
diff --git a/build2/rule.cxx b/build2/rule.cxx
index c18173b..dd508d0 100644
--- a/build2/rule.cxx
+++ b/build2/rule.cxx
@@ -26,7 +26,7 @@ namespace build2
   // use it as a guide to implement your own, normal, rules.
   //
   match_result file_rule::
-  match (slock&, action a, target& t, const string&) const
+  match (action a, target& t, const string&) const
   {
     tracer trace ("file_rule::match");
 
@@ -42,29 +42,37 @@ namespace build2
     {
     case perform_update_id:
       {
-        path_target& pt (dynamic_cast<path_target&> (t));
+        // While normally we shouldn't do any of this in match(), no other
+        // rule should ever be ambiguous with the fallback one and path/mtime
+        // access is atomic. In other words, we know what we are doing but
+        // don't do this in normal rules.
 
-        // First check the timestamp. This allows for the special "trust me,
-        // this file exists" situations (used, for example, for installed
+        path_target& pt (t.as<path_target> ());
+
+        // First check the timestamp. This takes care of the special "trust
+        // me, this file exists" situations (used, for example, for installed
         // stuff where we know it's there, just not exactly where).
         //
         timestamp ts (pt.mtime ());
 
         if (ts == timestamp_unknown)
         {
-          // Assign the path. While normally we shouldn't do this in match(),
-          // no other rule should ever be ambiguous with the fallback one.
+          const path* p (&pt.path ());
+
+          // Assign the path.
           //
-          if (pt.path ().empty ())
+          if (p->empty ())
           {
             // Since we cannot come up with an extension, ask the target's
             // derivation function to treat this as prerequisite (just like
             // in search_existing_file()).
             //
             pt.derive_extension (nullptr, true);
-            pt.derive_path ();
-            ts = pt.mtime ();
+            p = &pt.derive_path ();
           }
+
+          ts = file_mtime (*p);
+          pt.mtime (ts);
         }
 
         if (ts != timestamp_unknown && ts != timestamp_nonexistent)
@@ -79,7 +87,7 @@ namespace build2
   }
 
   recipe file_rule::
-  apply (slock& ml, action a, target& t) const
+  apply (action a, target& t) const
   {
     // Update triggers the update of this target's prerequisites so it would
     // seem natural that we should also trigger their cleanup. However, this
@@ -97,9 +105,9 @@ namespace build2
     if (!t.has_prerequisites ())
       return noop_recipe;
 
-    // Search and match all the prerequisites.
+    // Match all the prerequisites.
     //
-    search_and_match_prerequisites (ml, a, t);
+    match_prerequisites (a, t);
 
     // Note that we used to provide perform_update() which checked that this
     // target is not older than any of its prerequisites. However, later we
@@ -115,20 +123,20 @@ namespace build2
   // alias_rule
   //
   match_result alias_rule::
-  match (slock&, action, target&, const string&) const
+  match (action, target&, const string&) const
   {
     return true;
   }
 
   recipe alias_rule::
-  apply (slock& ml, action a, target& t) const
+  apply (action a, target& t) const
   {
     // Inject dependency on our directory (note: not parent) so that it is
     // automatically created on update and removed on clean.
     //
-    inject_fsdir (ml, a, t, false);
+    inject_fsdir (a, t, false);
 
-    search_and_match_prerequisites (ml, a, t);
+    match_prerequisites (a, t);
     return default_recipe;
   }
 
@@ -137,22 +145,22 @@ namespace build2
   // fsdir_rule
   //
   match_result fsdir_rule::
-  match (slock&, action, target&, const string&) const
+  match (action, target&, const string&) const
   {
     return true;
   }
 
   recipe fsdir_rule::
-  apply (slock& ml, action a, target& t) const
+  apply (action a, target& t) const
   {
     // Inject dependency on the parent directory. Note that we don't do it for
     // clean since we shouldn't (and can't possibly, since it's our parent) be
-    // removing it.
+    // removing it. It also must be first (see perform_update_direct()).
     //
     if (a.operation () != clean_id)
-      inject_fsdir (ml, a, t);
+      inject_fsdir (a, t);
 
-    search_and_match_prerequisites (ml, a, t);
+    match_prerequisites (a, t);
 
     switch (a)
     {
@@ -173,6 +181,8 @@ namespace build2
     if (!t.prerequisite_targets.empty ())
       ts = straight_execute_prerequisites (a, t);
 
+    // The same code as in perform_update_direct() below.
+    //
     const dir_path& d (t.dir); // Everything is in t.dir.
 
     // Generally, it is probably correct to assume that in the majority
@@ -202,6 +212,40 @@ namespace build2
     return ts;
   }
 
+  void fsdir_rule::
+  perform_update_direct (action a, const target& t)
+  {
+    // First create the parent directory. If present, it is always first.
+    //
+    const target* p (t.prerequisite_targets.empty ()
+                     ? nullptr
+                     : t.prerequisite_targets[0]);
+
+    if (p != nullptr && p->is_a<fsdir> ())
+      perform_update_direct (a, *p);
+
+    // The same code as in perform_update() above.
+    //
+    const dir_path& d (t.dir);
+
+    if (!exists (d))
+    {
+      if (verb >= 2)
+        text << "mkdir " << d;
+      else if (verb)
+        text << "mkdir " << t;
+
+      try
+      {
+        try_mkdir (d);
+      }
+      catch (const system_error& e)
+      {
+        fail << "unable to create directory " << d << ": " << e;
+      }
+    }
+  }
+
   target_state fsdir_rule::
   perform_clean (action a, const target& t)
   {
diff --git a/build2/scheduler b/build2/scheduler
index 430fdf2..ffe61df 100644
--- a/build2/scheduler
+++ b/build2/scheduler
@@ -5,6 +5,7 @@
 #ifndef BUILD2_SCHEDULER
 #define BUILD2_SCHEDULER
 
+#include <list>
 #include <mutex>
 #include <tuple>
 #include <atomic>
@@ -83,8 +84,10 @@ namespace build2
       return async (0, task_count, forward<F> (f), forward<A> (a)...);
     }
 
-    // Wait until the task count reaches the start count. If the scheduler is
-    // shutdown while waiting, throw system_error(ECANCELED).
+    // Wait until the task count reaches the start count or less. If the
+    // scheduler is shutdown while waiting, throw system_error(ECANCELED).
+    // Return the value of task count. Note that this is a synchronizaiton
+    // point (i.e., the task count is checked with memory_order_acquire).
     //
     // Note that it is valid to wait on another thread's task count (that is,
     // without making any async() calls in this thread). However, if the start
@@ -98,20 +101,27 @@ namespace build2
     // count starts before/during async() calls, then it must be "gated" with
     // an alternative (lower) start count.
     //
-    // Finally, if waiting on someone else's start count, it is most likely
-    // unsafe (from the deadlock's point of view) to continue working through
-    // our own queue (i.e., we may block waiting on a task that has been
-    // queued before us which in turn may end up waiting on "us").
+    // Finally, if waiting on someone else's start count, it may be unsafe
+    // (from the deadlock's point of view) to continue working through our own
+    // queue (i.e., we may block waiting on a task that has been queued before
+    // us which in turn may end up waiting on "us").
     //
-    void
+    enum work_queue
+    {
+      work_none, // Don't work own queue.
+      work_one,  // Work own queue rechecking the task count after every task.
+      work_all   // Work own queue before rechecking the task count.
+    };
+
+    size_t
     wait (size_t start_count,
           const atomic_count& task_count,
-          bool work_queue = true);
+          work_queue = work_all);
 
-    void
-    wait (const atomic_count& task_count, bool work_queue = true)
+    size_t
+    wait (const atomic_count& task_count, work_queue wq = work_all)
     {
-      wait (0, task_count, work_queue);
+      return wait (0, task_count, wq);
     }
 
     // Resume threads waiting on this task count.
@@ -119,6 +129,17 @@ namespace build2
     void
     resume (const atomic_count& task_count);
 
+    // An active thread that is about to wait for potentially significant time
+    // on something other than task_count (e.g., mutex, condition variable)
+    // should deactivate itself with the scheduler and then reactivate once
+    // done waiting.
+    //
+    void
+    deactivate ();
+
+    void
+    activate (bool collision = false);
+
     // Startup and shutdown.
     //
   public:
@@ -196,6 +217,7 @@ namespace build2
 
       size_t task_queue_depth      = 0; // # of entries in a queue (capacity).
       size_t task_queue_full       = 0; // # of times task queue was full.
+      size_t task_queue_remain     = 0; // # of tasks remaining in queue.
 
       size_t wait_queue_slots      = 0; // # of wait slots (buckets).
       size_t wait_queue_collisions = 0; // # of times slot had been occupied.
@@ -266,7 +288,7 @@ namespace build2
     static void
     helper (void*);
 
-    void
+    size_t
     suspend (size_t start_count, const atomic_count& task_count);
 
     // Task encapsulation.
@@ -306,7 +328,7 @@ namespace build2
     // The constraints that we must maintain:
     //
     //                  active <= max_active
-    // (init_active + helpers) <= max_threads
+    // (init_active + helpers) <= max_threads (soft; see activate_helper())
     //
     // Note that the first three are immutable between startup() and
     // shutdown() so can be accessed without a lock (but see join()).
@@ -358,7 +380,7 @@ namespace build2
       std::mutex mutex;
       std::condition_variable condv;
       size_t waiters = 0;
-      const atomic_count* tcount;
+      const atomic_count* task_count;
       bool shutdown = true;
     };
 
@@ -542,11 +564,9 @@ namespace build2
         m = om;
     }
 
-    // Each thread has its own queue. Instead of allocating all max_threads of
-    // them up front, we will reserve the space but will only construct queues
-    // on demand.
+    // Each thread has its own queue which are stored in this list.
     //
-    vector<unique_ptr<task_queue>> task_queues_;
+    std::list<task_queue> task_queues_;
 
     // TLS cache of thread's task queue.
     //
@@ -561,10 +581,6 @@ namespace build2
     task_queue&
     create_queue ();
   };
-
-  // Main (and only) scheduler. Started up and shut down in main().
-  //
-  extern scheduler sched;
 }
 
 #include <build2/scheduler.txx>
diff --git a/build2/scheduler.cxx b/build2/scheduler.cxx
index 1793ab2..05816b8 100644
--- a/build2/scheduler.cxx
+++ b/build2/scheduler.cxx
@@ -10,69 +10,118 @@ using namespace std;
 
 namespace build2
 {
-  void scheduler::
-  wait (size_t start_count, const atomic_count& task_count, bool work_queue)
+  size_t scheduler::
+  wait (size_t start_count, const atomic_count& task_count, work_queue wq)
   {
-    if (task_count <= start_count)
-      return;
+    // Note that task_count is a synchronization point.
+    //
+    size_t tc;
+
+    if ((tc = task_count.load (memory_order_acquire)) <= start_count)
+      return tc;
 
     assert (max_active_ != 1); // Serial execution, nobody to wait for.
 
     // See if we can run some of our own tasks.
     //
-    if (work_queue)
+    if (wq != work_none)
     {
       // If we are waiting on someone else's task count then there migh still
-      // be no queue which is set by async().
+      // be no queue (set by async()).
       //
       if (task_queue* tq = task_queue_)
       {
         for (lock ql (tq->mutex); !tq->shutdown && !empty_back (*tq); )
+        {
           pop_back (*tq, ql);
 
+          if (wq == work_one)
+          {
+            if ((tc = task_count.load (memory_order_acquire)) <= start_count)
+              return tc;
+          }
+        }
+
         // Note that empty task queue doesn't automatically mean the task
         // count has been decremented (some might still be executing
         // asynchronously).
         //
-        if (task_count <= start_count)
-          return;
+        if ((tc = task_count.load (memory_order_acquire)) <= start_count)
+          return tc;
       }
     }
 
-    suspend (start_count, task_count);
+    return suspend (start_count, task_count);
   }
 
   void scheduler::
-  suspend (size_t start_count, const atomic_count& tc)
+  deactivate ()
   {
-    wait_slot& s (
-      wait_queue_[hash<const atomic_count*> () (&tc) % wait_queue_size_]);
+    if (max_active_ == 1) // Serial execution.
+      return;
 
-    // This thread is no longer active.
+    lock l (mutex_);
+
+    active_--;
+    waiting_++;
+
+    if (waiting_ > stat_max_waiters_)
+      stat_max_waiters_ = waiting_;
+
+    // A spare active thread has become available. If there are ready
+    // masters or eager helpers, wake someone up.
     //
-    {
-      lock l (mutex_);
+    if (ready_ != 0)
+      ready_condv_.notify_one ();
+    else if (queued_task_count_.load (std::memory_order_consume) != 0)
+      activate_helper (l);
+  }
 
-      active_--;
-      waiting_++;
+  void scheduler::
+  activate (bool collision)
+  {
+    if (max_active_ == 1) // Serial execution.
+      return;
 
-      if (waiting_ > stat_max_waiters_)
-        stat_max_waiters_ = waiting_;
+    lock l (mutex_);
+    waiting_--;
 
-      // A spare active thread has become available. If there are ready
-      // masters or eager helpers, wake someone up.
-      //
-      if (ready_ != 0)
-        ready_condv_.notify_one ();
-      else if (queued_task_count_ != 0)
-        activate_helper (l);
-    }
+    if (collision)
+      stat_wait_collisions_++;
+
+    // If we have spare active threads, then become active. Otherwise it
+    // enters the ready queue.
+    //
+    ready_++;
+
+    while (!shutdown_ && active_ >= max_active_)
+      ready_condv_.wait (l);
+
+    ready_--;
+
+    if (shutdown_)
+      throw system_error (ECANCELED, system_category ());
+
+    active_++;
+  }
+
+  size_t scheduler::
+  suspend (size_t start_count, const atomic_count& task_count)
+  {
+    wait_slot& s (
+      wait_queue_[
+        hash<const atomic_count*> () (&task_count) % wait_queue_size_]);
+
+    // This thread is no longer active.
+    //
+    deactivate ();
 
     // Note that the task count is checked while holding the lock. We also
     // have to notify while holding the lock (see resume()). The aim here
     // is not to end up with a notification that happens between the check
     // and the wait.
     //
+    size_t tc (0);
     bool collision;
     {
       lock l (s.mutex);
@@ -80,20 +129,21 @@ namespace build2
       // We have a collision if there is already a waiter for a different
       // task count.
       //
-      collision = (s.waiters++ != 0 && s.tcount != &tc);
+      collision = (s.waiters++ != 0 && s.task_count != &task_count);
 
       // This is nuanced: we want to always have the task count of the last
       // thread to join the queue. Otherwise, if threads are leaving and
       // joining the queue simultaneously, we may end up with a task count of
       // a thread group that is no longer waiting.
       //
-      s.tcount = &tc;
+      s.task_count = &task_count;
 
       // We could probably relax the atomic access since we use a mutex for
       // synchronization though this has a different tradeoff (calling wait
       // because we don't see the count).
       //
-      while (!(s.shutdown || tc <= start_count))
+      while (!(s.shutdown ||
+               (tc = task_count.load (memory_order_acquire)) <= start_count))
         s.condv.wait (l);
 
       s.waiters--;
@@ -101,28 +151,9 @@ namespace build2
 
     // This thread is no longer waiting.
     //
-    {
-      lock l (mutex_);
-      waiting_--;
-
-      if (collision)
-        stat_wait_collisions_++;
-
-      // If we have spare active threads, then become active. Otherwise it
-      // enters the ready queue.
-      //
-      ready_++;
-
-      while (!shutdown_ && active_ >= max_active_)
-        ready_condv_.wait (l);
-
-      ready_--;
-
-      if (shutdown_)
-        throw system_error (ECANCELED, system_category ());
+    activate (collision);
 
-      active_++;
-    }
+    return tc;
   }
 
   void scheduler::
@@ -151,7 +182,7 @@ namespace build2
   size_t scheduler::
   shard_size (size_t mul, size_t div) const
   {
-    size_t n (max_threads_ == 1 ? 0 : max_threads_ * mul / div / 2);
+    size_t n (max_threads_ == 1 ? 0 : max_threads_ * mul / div / 4);
 
     // Experience shows that we want something close to 2x for small numbers,
     // then reduce to 1.5x in-between, and 1x for large ones.
@@ -208,11 +239,11 @@ namespace build2
     //
     lock l (mutex_);
 
-    // Use 8x max_active on 32-bit and 16x max_active on 64-bit. Unless we
+    // Use 16x max_active on 32-bit and 32x max_active on 64-bit. Unless we
     // were asked to run serially.
     //
     if (max_threads == 0)
-      max_threads = max_active * (max_active == 1 ? 1 : sizeof (void*) * 2);
+      max_threads = max_active * (max_active == 1 ? 1 : sizeof (void*) * 4);
 
     assert (shutdown_ &&
             init_active != 0 &&
@@ -229,10 +260,7 @@ namespace build2
     //
     task_queue_depth_ = queue_depth != 0
       ? queue_depth
-      : max_active * sizeof (void*) * 2;
-
-    if (max_active != 1)
-      task_queues_.reserve (max_threads_);
+      : max_active * sizeof (void*) * 4;
 
     queued_task_count_.store (0, memory_order_relaxed);
 
@@ -289,7 +317,11 @@ namespace build2
 
     if (!shutdown_)
     {
-      // Signal shutdown and collect statistics.
+      // Collect statistics.
+      //
+      r.thread_helpers = helpers_;
+
+      // Signal shutdown.
       //
       shutdown_ = true;
 
@@ -300,18 +332,16 @@ namespace build2
         ws.shutdown = true;
       }
 
-      for (unique_ptr<task_queue>& tq: task_queues_)
+      for (task_queue& tq: task_queues_)
       {
-        lock ql (tq->mutex);
-        r.task_queue_full += tq->stat_full;
-        tq->shutdown = true;
+        lock ql (tq.mutex);
+        r.task_queue_full += tq.stat_full;
+        tq.shutdown = true;
       }
 
       // Wait for all the helpers to terminate waking up any thread that
       // sleeps.
       //
-      r.thread_helpers = helpers_;
-
       while (helpers_ != 0)
       {
         bool i (idle_ != 0);
@@ -344,6 +374,7 @@ namespace build2
       r.thread_max_waiting    = stat_max_waiters_;
 
       r.task_queue_depth      = task_queue_depth_;
+      r.task_queue_remain     = queued_task_count_.load (memory_order_consume);
 
       r.wait_queue_slots      = wait_queue_size_;
       r.wait_queue_collisions = stat_wait_collisions_;
@@ -359,8 +390,19 @@ namespace build2
     {
       if (idle_ != 0)
         idle_condv_.notify_one ();
-      else if (init_active_ + helpers_ < max_threads_)
+      else if (init_active_ + helpers_ < max_threads_ ||
+               //
+               // Ignore the max_threads value if we have queued tasks but no
+               // active threads. This means everyone is waiting for something
+               // to happen but nobody is doing anything (e.g., work the
+               // queues). This, for example, can happen if a thread waits for
+               // a task that is in its queue but is below the mark.
+               //
+               (active_ == 0 &&
+                queued_task_count_.load (memory_order_consume) != 0))
+      {
         create_helper (l);
+      }
     }
   }
 
@@ -412,19 +454,18 @@ namespace build2
       {
         s.active_++;
 
-        while (s.queued_task_count_ != 0)
+        while (s.queued_task_count_.load (memory_order_consume) != 0)
         {
-          // Queues are never removed and there shouldn't be any reallocations
-          // since we reserve maximum possible size upfront. Which means we
-          // can get the current number of queues and release the main lock
-          // while examining each of them.
+          // Queues are never removed which means we can get the current range
+          // and release the main lock while examining each of them.
           //
-          size_t n (s.task_queues_.size ());
+          auto it (s.task_queues_.begin ());
+          size_t n (s.task_queues_.size ()); // Different to end().
           l.unlock ();
 
           for (size_t i (0); i != n; ++i)
           {
-            task_queue& tq (*s.task_queues_[i]);
+            task_queue& tq (*it++);
 
             for (lock ql (tq.mutex); !tq.shutdown && !s.empty_front (tq); )
               s.pop_front (tq, ql);
@@ -465,18 +506,15 @@ namespace build2
     // Note that task_queue_depth is immutable between startup() and
     // shutdown() (but see join()).
     //
-    unique_ptr<task_queue> tqp (new task_queue (task_queue_depth_));
-    task_queue& tq (*tqp);
-
+    task_queue* tq;
     {
       lock l (mutex_);
-      tq.shutdown = shutdown_;
-      task_queues_.push_back (move (tqp));
+      task_queues_.emplace_back (task_queue_depth_);
+      tq = &task_queues_.back ();
+      tq->shutdown = shutdown_;
     }
 
-    task_queue_ = &tq;
-    return tq;
+    task_queue_ = tq;
+    return *tq;
   }
-
-  scheduler sched;
 }
diff --git a/build2/scheduler.txx b/build2/scheduler.txx
index f53c044..c9d2d14 100644
--- a/build2/scheduler.txx
+++ b/build2/scheduler.txx
@@ -66,7 +66,7 @@ namespace build2
     // If there is a spare active thread, wake up (or create) the helper
     // (unless someone already snatched it).
     //
-    if (queued_task_count_ != 0)
+    if (queued_task_count_.load (std::memory_order_consume) != 0)
     {
       lock l (mutex_);
 
@@ -91,7 +91,7 @@ namespace build2
     t.thunk (std::index_sequence_for<A...> ());
 
     atomic_count& tc (*t.task_count);
-    if (--tc <= t.start_count)
-      s.resume (tc); // Resume a waiter, if any.
+    if (tc.fetch_sub (1, memory_order_release) - 1 <= t.start_count)
+      s.resume (tc); // Resume waiters, if any.
   }
 }
diff --git a/build2/scope b/build2/scope
index b172965..c15d973 100644
--- a/build2/scope
+++ b/build2/scope
@@ -246,10 +246,14 @@ namespace build2
     loaded_module_map modules; // Only on root scope.
 
   public:
-    // Proof of lock for RW access.
+    // RW access.
     //
     scope&
-    rw (const ulock&) const {return const_cast<scope&> (*this);}
+    rw () const
+    {
+      assert (phase == run_phase::load);
+      return const_cast<scope&> (*this);
+    }
 
     // RW access to global scope (RO via global global_scope below).
     //
@@ -340,11 +344,15 @@ namespace build2
       return find (path_cast<dir_path> (p));
     }
 
-    // Proof of lock for RW access.
+    // RW access.
     //
   public:
     scope_map&
-    rw (ulock&) const {return const_cast<scope_map&> (*this);}
+    rw () const
+    {
+      assert (phase == run_phase::load);
+      return const_cast<scope_map&> (*this);
+    }
 
     scope_map&
     rw (scope&) const {return const_cast<scope_map&> (*this);}
diff --git a/build2/search b/build2/search
index 85b4d85..92521bc 100644
--- a/build2/search
+++ b/build2/search
@@ -15,7 +15,7 @@ namespace build2
 
   // Search for an existing target in this prerequisite's scope.
   //
-  target*
+  const target*
   search_existing_target (const prerequisite_key&);
 
   // Search for an existing file in the scope's src directory. Prerequisite
@@ -24,12 +24,12 @@ namespace build2
   // Originally the plan was to have a target-type specific variable that
   // contains the search paths. But there wasn't any need for this yet.
   //
-  target*
+  const target*
   search_existing_file (const prerequisite_key&);
 
   // Create a new target in this prerequisite's scope.
   //
-  target&
+  const target&
   create_new_target (const prerequisite_key&);
 }
 
diff --git a/build2/search.cxx b/build2/search.cxx
index 7c658e0..f7864e9 100644
--- a/build2/search.cxx
+++ b/build2/search.cxx
@@ -17,7 +17,7 @@ using namespace butl;
 
 namespace build2
 {
-  target*
+  const target*
   search_existing_target (const prerequisite_key& pk)
   {
     tracer trace ("search_existing_target");
@@ -71,16 +71,16 @@ namespace build2
         o.clear ();
     }
 
-    target* t (targets.find (*tk.type, d, o, *tk.name, tk.ext, trace));
+    const target* t (targets.find (*tk.type, d, o, *tk.name, tk.ext, trace));
 
     if (t != nullptr)
-      l5 ([&]{trace << "existing target " << t
+      l5 ([&]{trace << "existing target " << *t
                     << " for prerequisite " << pk;});
 
     return t;
   }
 
-  target*
+  const target*
   search_existing_file (const prerequisite_key& cpk)
   {
     tracer trace ("search_existing_file");
@@ -177,19 +177,18 @@ namespace build2
 
     // Has to be a file_target.
     //
-    file& t (dynamic_cast<file&> (r.first));
+    const file& t (dynamic_cast<const file&> (r.first));
 
     l5 ([&]{trace << (r.second ? "new" : "existing") << " target " << t
                   << " for prerequisite " << cpk;});
 
-    if (t.path ().empty ())
-      t.path (move (f));
-
     t.mtime (mt);
+    t.path (move (f));
+
     return &t;
   }
 
-  target&
+  const target&
   create_new_target (const prerequisite_key& pk)
   {
     tracer trace ("create_new_target");
@@ -219,11 +218,10 @@ namespace build2
     auto r (
       targets.insert (
         *tk.type, move (d), *tk.out, *tk.name, tk.ext, true, trace));
-    assert (r.second);
 
-    target& t (r.first);
-
-    l5 ([&]{trace << "new target " << t << " for prerequisite " << pk;});
+    const target& t (r.first);
+    l5 ([&]{trace << (r.second ? "new" : "existing") << " target " << t
+                  << " for prerequisite " << pk;});
     return t;
   }
 }
diff --git a/build2/target b/build2/target
index 27d6947..563e32f 100644
--- a/build2/target
+++ b/build2/target
@@ -23,11 +23,14 @@
 
 namespace build2
 {
+  class rule;
   class scope;
   class target;
 
-  target&
-  search (prerequisite&); // From <build2/algorithm>.
+  extern size_t current_on; // From <build/context>.
+
+  const target&
+  search (const prerequisite&); // From <build2/algorithm>.
 
   // Target state.
   //
@@ -106,7 +109,7 @@ namespace build2
   //
   struct group_view
   {
-    target* const* members; // NULL means not yet known.
+    const target* const* members; // NULL means not yet known.
     size_t count;
   };
 
@@ -182,7 +185,7 @@ namespace build2
     // special target_state::group state. You would normally also use the
     // group_recipe for group members.
     //
-    const_ptr<target> group = nullptr;
+    const target* group = nullptr;
 
 
     // What has been described above is a "normal" group. That is, there is
@@ -249,7 +252,7 @@ namespace build2
     // resolve_group_members() from <build2/algorithm>.
     //
     virtual group_view
-    group_members (action_type);
+    group_members (action_type) const;
 
     // Note that the returned key "tracks" the target (except for the
     // extension).
@@ -293,35 +296,47 @@ namespace build2
 
     // Prerequisites.
     //
+    // We use an atomic-empty semantics that allows one to "swap in" a set of
+    // prerequisites if none were specified. This is used to implement
+    // "synthesized" dependencies.
+    //
   public:
-    using prerequisites_type = small_vector<prerequisite, 4>;
-    prerequisites_type prerequisites;
+    using prerequisites_type = build2::prerequisites;
 
-    // Targets to which prerequisites resolve for this recipe. Note
-    // that unlike prerequisite::target, these can be resolved to
-    // group members. NULL means the target should be skipped (or
-    // the rule may simply not add such a target to the list).
-    //
-    // Note also that it is possible the target can vary from
-    // action to action, just like recipes. We don't need to keep
-    // track of the action here since the targets will be updated
-    // if the recipe is updated, normally as part of rule::apply().
-    //
-    // Note that the recipe may modify (mutable) this list.
+    const prerequisites_type&
+    prerequisites () const;
+
+    // Swap-in a list of prerequisites. Return false if unsuccessful (i.e.,
+    // some beat us to it). Note that it can be called on const target.
     //
-    using prerequisite_targets_type = vector<const target*>;
-    mutable prerequisite_targets_type prerequisite_targets;
+    bool
+    prerequisites (prerequisites_type&&) const;
 
-    // Check if there are any prerequisites, taking into account
-    // group prerequisites.
+    // Check if there are any prerequisites, taking into account group
+    // prerequisites.
     //
     bool
     has_prerequisites () const
     {
-      return !prerequisites.empty () ||
-        (group != nullptr && !group->prerequisites.empty ());
+      return !prerequisites ().empty () ||
+        (group != nullptr && !group->prerequisites ().empty ());
     }
 
+  private:
+    friend class parser;
+
+    // Note that the state is also used to synchronize the prerequisites
+    // value so we use the release-acquire ordering.
+    //
+    // 0 - absent
+    // 1 - being set
+    // 2 - present
+    //
+    atomic<uint8_t> prerequisites_state_ {0};
+    prerequisites_type prerequisites_;
+
+    static const prerequisites_type empty_prerequisites_;
+
     // Target-specific variables.
     //
   public:
@@ -391,7 +406,7 @@ namespace build2
     // variable assignment, dependency extraction, etc) is called implied.
     //
     // The implied flag should only be cleared during the load phase via the
-    // target_set::insert().
+    // MT-safe target_set::insert().
     //
   public:
     bool implied;
@@ -399,68 +414,136 @@ namespace build2
     // Target state.
     //
   public:
-    // Atomic task count that is used during execution to (atomically) track a
-    // subset of the target's state as well as the number of its sub-tasks
-    // (execution of prerequisites).
+    // Atomic task count that is used during match and execution to track the
+    // target's "meta-state" as well as the number of its sub-tasks (e.g.,
+    // busy+1, busy+2, and so on, for instance, number of prerequisites
+    // being matched or executed).
+    //
+    // For each operation in a meta-operation batch (current_on) we have a
+    // "band" of counts, [touched, executed], that represent the target
+    // meta-state. Once the next operation is started, this band "moves" thus
+    // automatically resetting the target to "not yet touched" state for this
+    // operation.
     //
-    // The count starts unexecuted then transitions executing. Executing
-    // transitions (via a decrement) to executed. Once it is executed, then
-    // state_ becomes immutable.
+    // For match we have a further complication in that we may re-match the
+    // target and override with a "stronger" recipe thus re-setting the state
+    // from, say, applied back to touched.
     //
     // The target is said to be synchronized (in this thread) if we have
-    // either observed the task count to reach count_executed or we have
-    // successfully changed it (via compare_exchange) to count_executing.
-    // If the target is synchronized, then we can access and modify (second
-    // case) its state, mtime, etc.
+    // either observed the task count to reach applied or executed or we have
+    // successfully changed it (via compare_exchange) to locked or busy. If
+    // the target is synchronized, then we can access and modify (second case)
+    // its state etc.
     //
-    static const size_t count_unexecuted = 0;
-    static const size_t count_executed   = 1;
-    static const size_t count_executing  = 2;
+    static const size_t offset_touched  = 1; // Has been locked.
+    static const size_t offset_matched  = 2; // Rule has been matched.
+    static const size_t offset_applied  = 3; // Rule has been applied.
+    static const size_t offset_executed = 4; // Recipe has been executed.
+    static const size_t offset_locked   = 5; // Fast (spin) lock.
+    static const size_t offset_busy     = 6; // Slow (wait) lock.
 
-    mutable atomic_count task_count;
+    static size_t count_base     () {return 4 * (current_on - 1);}
 
-    // Return the "stapshot" of the target state. That is, unless the target
-    // has been executed, its state can change asynchronously. If fail is
-    // true then translate target_state::failed to the failed exception.
+    static size_t count_touched  () {return offset_touched  + count_base ();}
+    static size_t count_matched  () {return offset_matched  + count_base ();}
+    static size_t count_applied  () {return offset_applied  + count_base ();}
+    static size_t count_executed () {return offset_executed + count_base ();}
+    static size_t count_locked   () {return offset_locked   + count_base ();}
+    static size_t count_busy     () {return offset_busy     + count_base ();}
+
+    mutable atomic_count task_count {0}; // Start offset_touched - 1.
+
+    // This function can only be called during match if we have observed
+    // (synchronization-wise) that the this target has been matched (i.e.,
+    // the rule has been applied).
     //
     target_state
-    atomic_state (bool fail = true) const;
+    matched_state (action a, bool fail = true) const;
 
-    // During execution this function can only be called if we have observed
+    // This function can only be called during execution if we have observed
     // (synchronization-wise) that this target has been executed.
     //
     target_state
-    synchronized_state (bool fail = true) const;
+    executed_state (bool fail = true) const;
 
     // Number of direct targets that depend on this target in the current
-    // action. It is incremented during the match phase and then decremented
-    // during execution, before running the recipe. As a result, the recipe
-    // can detect the last chance (i.e., last dependent) to execute the
-    // command (see also the first/last execution modes in <operation>).
+    // operation. It is incremented during match and then decremented during
+    // execution, before running the recipe. As a result, the recipe can
+    // detect the last chance (i.e., last dependent) to execute the command
+    // (see also the first/last execution modes in <operation>).
     //
-    // Note that setting a new recipe (which happens when we match the rule
-    // and which in turn is triggered by the first dependent) clears this
-    // counter. However, if the previous action was the same as the current,
-    // then the existing recipe is reused. In this case, however, the counter
-    // should have been decremented to 0 naturally, as part of the previous
-    // action execution.
-    //
-    atomic_count dependents;
+    mutable atomic_count dependents;
 
   protected:
-    friend target_state execute_impl (action, target&) noexcept;
-
-    target_state state_ = target_state::unknown;
-
     // Return fail-untranslated (but group-translated) state assuming the
-    // target is synchronized.
+    // target is executed and synchronized.
     //
     target_state
     state () const;
 
-    // Auxilary data storage.
+    // Version that should be used during search & match after the target has
+    // been matched for this action (see the recipe override).
+    //
+    target_state
+    state (action a) const;
+
+    // Return true if the state comes from the group. Target must be at least
+    // matched.
+    //
+    bool
+    group_state () const;
+
+  public:
+    target_state state_; // Raw state, normally not accessed directly.
+
+    // Recipe.
     //
   public:
+    using recipe_type = build2::recipe;
+    using rule_type = build2::rule;
+
+    action_type action; // Action the rule/recipe is for.
+
+    // Matched rule (pointer to hint_rule_map element). Note that in case of a
+    // direct recipe assignment we may not have a rule.
+    //
+    const pair<const string, reference_wrapper<const rule_type>>* rule;
+
+    // Applied recipe.
+    //
+    recipe_type recipe_;
+
+    // Note that the target must be locked in order to set the recipe.
+    //
+    void
+    recipe (recipe_type);
+
+    // After the target has been matched and synchronized, check if the target
+    // is known to be unchanged. Used for optimizations during search & match.
+    //
+    bool
+    unchanged (action_type a) const
+    {
+      return state (a) == target_state::unchanged;
+    }
+
+    // Targets to which prerequisites resolve for this recipe. Note that
+    // unlike prerequisite::target, these can be resolved to group members.
+    // NULL means the target should be skipped (or the rule may simply not add
+    // such a target to the list).
+    //
+    // Note also that it is possible the target can vary from action to
+    // action, just like recipes. We don't need to keep track of the action
+    // here since the targets will be updated if the recipe is updated,
+    // normally as part of rule::apply().
+    //
+    // Note that the recipe may modify this list.
+    //
+    using prerequisite_targets_type = vector<const target*>;
+    mutable prerequisite_targets_type prerequisite_targets;
+
+    // Auxilary data storage.
+    //
     // A rule that matches (i.e., returns true from its match() function) may
     // use this pad to pass data between its match and apply functions as well
     // as the recipe. After the recipe is executed, the data is destroyed by
@@ -478,17 +561,18 @@ namespace build2
     //
     // Currenly the data is not destroyed until the next match.
     //
-    // Note that the recipe may modify (mutable) the data.
+    // Note that the recipe may modify the data.
     //
     static constexpr size_t data_size = sizeof (string) * 8;
     mutable std::aligned_storage<data_size>::type data_pad;
-    mutable void (*data_dtor) (void*) = nullptr;
+
+    mutable void (*data_dtor) (void*)                = nullptr;
 
     template <typename R,
               typename T = typename std::remove_cv<
                 typename std::remove_reference<R>::type>::type>
     typename std::enable_if<std::is_trivially_destructible<T>::value,T&>::type
-    data (R&& d)
+    data (R&& d) const
     {
       assert (sizeof (T) <= data_size && data_dtor == nullptr);
       return *new (&data_pad) T (forward<R> (d));
@@ -498,7 +582,7 @@ namespace build2
               typename T = typename std::remove_cv<
                 typename std::remove_reference<R>::type>::type>
     typename std::enable_if<!std::is_trivially_destructible<T>::value,T&>::type
-    data (R&& d)
+    data (R&& d) const
     {
       assert (sizeof (T) <= data_size && data_dtor == nullptr);
       T& r (*new (&data_pad) T (forward<R> (d)));
@@ -520,29 +604,6 @@ namespace build2
       }
     }
 
-    // Recipe.
-    //
-  public:
-    using recipe_type = build2::recipe;
-
-    action_type action; // Action this recipe is for.
-
-    const recipe_type&
-    recipe (action_type a) const {return a > action ? empty_recipe : recipe_;}
-
-    void
-    recipe (action_type, recipe_type);
-
-    // After the recipe has been set (and target synchronized), check if the
-    // target is known to be unchanged. Used for various optimizations during
-    // search & match.
-    //
-    bool
-    unchanged () {return state () == target_state::unchanged;}
-
-  private:
-    recipe_type recipe_;
-
     // Target type info and casting.
     //
   public:
@@ -609,6 +670,17 @@ namespace build2
   inline ostream&
   operator<< (ostream& os, const target& t) {return os << t.key ();}
 
+  // Sometimes it is handy to "mark" a pointer to a target (for example, in
+  // prerequisite_targets). We use the last 2 bits in a pointer for that (aka
+  // the "bit stealing" technique). Note that the pointer needs to be unmarked
+  // before it can be usable so care must be taken in the face of exceptions,
+  // etc.
+  //
+  void
+  mark (const target*&, uint8_t = 1);
+
+  uint8_t
+  unmark (const target*&);
 
   // A "range" that presents the prerequisites of a group and one of
   // its members as one continuous sequence, or, in other words, as
@@ -626,35 +698,40 @@ namespace build2
   //
   // For constant iteration use const_group_prerequisites().
   //
-  template <typename T, typename P, typename I>
-  class group_prerequisites_impl
+  class group_prerequisites
   {
   public:
     explicit
-    group_prerequisites_impl (T& t)
+    group_prerequisites (const target& t)
         : t_ (t),
-          g_ (t_.group == nullptr              ||
-              t_.group->member != nullptr      || // Ad hoc group member.
-              t_.group->prerequisites.empty ()
+          g_ (t_.group == nullptr                 ||
+              t_.group->member != nullptr         || // Ad hoc group member.
+              t_.group->prerequisites ().empty ()
               ? nullptr : t_.group) {}
 
+    using prerequisites_type = target::prerequisites_type;
+    using base_iterator      = prerequisites_type::const_iterator;
+
     struct iterator
     {
-      using value_type        = typename I::value_type;
-      using pointer           = typename I::pointer;
-      using reference         = typename I::reference;
-      using difference_type   = typename I::difference_type;
+      using value_type        = base_iterator::value_type;
+      using pointer           = base_iterator::pointer;
+      using reference         = base_iterator::reference;
+      using difference_type   = base_iterator::difference_type;
       using iterator_category = std::bidirectional_iterator_tag;
 
       iterator () {}
-      iterator (T* t, T* g, P* c, I i): t_ (t), g_ (g), c_ (c), i_ (i) {}
+      iterator (const target* t,
+                const target* g,
+                const prerequisites_type* c,
+                base_iterator i): t_ (t), g_ (g), c_ (c), i_ (i) {}
 
       iterator&
       operator++ ()
       {
-        if (++i_ == c_->end () && c_ != &t_->prerequisites)
+        if (++i_ == c_->end () && c_ != &t_->prerequisites ())
         {
-          c_ = &t_->prerequisites;
+          c_ = &t_->prerequisites ();
           i_ = c_->begin ();
         }
         return *this;
@@ -666,9 +743,9 @@ namespace build2
       iterator&
       operator-- ()
       {
-        if (i_ == c_->begin () && c_ == &t_->prerequisites)
+        if (i_ == c_->begin () && c_ == &t_->prerequisites ())
         {
-          c_ = &g_->prerequisites;
+          c_ = &g_->prerequisites ();
           i_ = c_->end ();
         }
 
@@ -692,10 +769,10 @@ namespace build2
       operator!= (const iterator& x, const iterator& y) {return !(x == y);}
 
     private:
-      T* t_ = nullptr;
-      T* g_ = nullptr;
-      P* c_ = nullptr;
-      I  i_;
+      const target*             t_ = nullptr;
+      const target*             g_ = nullptr;
+      const prerequisites_type* c_ = nullptr;
+      base_iterator             i_;
     };
 
     using reverse_iterator = std::reverse_iterator<iterator>;
@@ -703,14 +780,14 @@ namespace build2
     iterator
     begin () const
     {
-      P& c ((g_ != nullptr ? *g_ : t_).prerequisites);
+      auto& c ((g_ != nullptr ? *g_ : t_).prerequisites ());
       return iterator (&t_, g_, &c, c.begin ());
     }
 
     iterator
     end () const
     {
-      P& c (t_.prerequisites);
+      auto& c (t_.prerequisites ());
       return iterator (&t_, g_, &c, c.end ());
     }
 
@@ -723,25 +800,15 @@ namespace build2
     size_t
     size () const
     {
-      return t_.prerequisites.size () +
-        (g_ != nullptr ? g_->prerequisites.size () : 0);
+      return t_.prerequisites ().size () +
+        (g_ != nullptr ? g_->prerequisites ().size () : 0);
     }
 
   private:
-    T& t_;
-    T* g_;
+    const target& t_;
+    const target* g_;
   };
 
-  using group_prerequisites = group_prerequisites_impl<
-    target,
-    target::prerequisites_type,
-    target::prerequisites_type::iterator>;
-
-  using const_group_prerequisites = group_prerequisites_impl<
-    const target,
-    const target::prerequisites_type,
-    target::prerequisites_type::const_iterator>;
-
   // A member of a prerequisite. If 'target' is NULL, then this is the
   // prerequisite itself. Otherwise, it is its member. In this case
   // 'prerequisite' still refers to the prerequisite.
@@ -752,8 +819,8 @@ namespace build2
     using prerequisite_type = build2::prerequisite;
     using target_type_type = build2::target_type;
 
-    prerequisite_type& prerequisite;
-    target_type* target;
+    const prerequisite_type& prerequisite;
+    const target_type* target;
 
     template <typename T>
     bool
@@ -800,7 +867,7 @@ namespace build2
         : prerequisite.proj;
     }
 
-    target_type&
+    const target_type&
     search () const
     {
       return target != nullptr ? *target : build2::search (prerequisite);
@@ -848,32 +915,32 @@ namespace build2
   // group_prerequisite_members (a, t)
   // reverse_group_prerequisite_members (a, t)
   //
-  template <typename T>
+  template <typename R>
   class prerequisite_members_range;
 
-  template <typename T>
-  inline prerequisite_members_range<T>
-  prerequisite_members (slock& ml, action a, T&& x, bool members = true)
+  template <typename R>
+  inline prerequisite_members_range<R>
+  prerequisite_members (action a, R&& r, bool members = true)
   {
-    return prerequisite_members_range<T> (ml, a, forward<T> (x), members);
+    return prerequisite_members_range<R> (a, forward<R> (r), members);
   }
 
-  template <typename T>
+  template <typename R>
   class prerequisite_members_range
   {
   public:
-    prerequisite_members_range (slock& l, action a, T&& r, bool m)
-        : l_ (l), a_ (a), members_ (m), r_ (forward<T> (r)), e_ (r_.end ()) {}
+    prerequisite_members_range (action a, R&& r, bool m)
+        : a_ (a), members_ (m), r_ (forward<R> (r)), e_ (r_.end ()) {}
 
-    using base_iterator = decltype (declval<T> ().begin ());
+    using base_iterator = decltype (declval<R> ().begin ());
 
     struct iterator
     {
-      typedef prerequisite_member value_type;
-      typedef const value_type* pointer;
-      typedef const value_type& reference;
-      typedef typename base_iterator::difference_type difference_type;
-      typedef std::forward_iterator_tag iterator_category;
+      using value_type        = prerequisite_member;
+      using pointer           = const value_type*;
+      using reference         = const value_type&;
+      using difference_type   = typename base_iterator::difference_type;
+      using iterator_category = std::forward_iterator_tag;
 
       iterator (): r_ (nullptr) {}
       iterator (const prerequisite_members_range* r, const base_iterator& i)
@@ -904,8 +971,8 @@ namespace build2
 
       value_type operator* () const
       {
-        target* t (k_ != nullptr ? k_:
-                   g_.count != 0 ? g_.members[j_ - 1] : nullptr);
+        const target* t (k_ != nullptr ? k_:
+                         g_.count != 0 ? g_.members[j_ - 1] : nullptr);
 
         return value_type {*i_, t};
       }
@@ -913,11 +980,11 @@ namespace build2
       pointer operator-> () const
       {
         static_assert (
-          std::is_trivially_destructible<prerequisite_member>::value,
+          std::is_trivially_destructible<value_type>::value,
           "prerequisite_member is not trivially destructible");
 
-        target* t (k_ != nullptr ? k_:
-                   g_.count != 0 ? g_.members[j_ - 1] : nullptr);
+        const target* t (k_ != nullptr ? k_:
+                         g_.count != 0 ? g_.members[j_ - 1] : nullptr);
 
         return new (&m_) value_type {*i_, t};
       }
@@ -955,10 +1022,10 @@ namespace build2
       const prerequisite_members_range* r_;
       base_iterator i_;
       group_view g_;
-      size_t j_; // 1-based index, to support enter_group().
-      target* k_; // Current member of ad hoc group or NULL.
-      mutable std::aligned_storage<sizeof (prerequisite_member),
-                                   alignof (prerequisite_member)>::type m_;
+      size_t j_;        // 1-based index, to support enter_group().
+      const target* k_; // Current member of ad hoc group or NULL.
+      mutable typename std::aligned_storage<sizeof (value_type),
+                                            alignof (value_type)>::type m_;
     };
 
     iterator
@@ -968,48 +1035,68 @@ namespace build2
     end () const {return iterator (this, e_);}
 
   private:
-    slock& l_;
     action a_;
     bool members_; // Go into group members by default?
-    T r_;
+    R r_;
     base_iterator e_;
   };
 
   // prerequisite_members(t.prerequisites)
   //
   inline auto
-  prerequisite_members (slock& ml, action a, target& t, bool members = true)
+  prerequisite_members (action a, target& t, bool members = true)
+  {
+    return prerequisite_members (a, t.prerequisites (), members);
+  }
+
+  inline auto
+  prerequisite_members (action a, const target& t, bool members = true)
   {
-    return prerequisite_members (ml, a, t.prerequisites, members);
+    return prerequisite_members (a, t.prerequisites (), members);
   }
 
   // prerequisite_members(reverse_iterate(t.prerequisites))
   //
   inline auto
-  reverse_prerequisite_members (
-    slock& ml, action a, target& t, bool members = true)
+  reverse_prerequisite_members (action a, target& t, bool m = true)
   {
-    return prerequisite_members (
-      ml, a, reverse_iterate (t.prerequisites), members);
+    return prerequisite_members (a, reverse_iterate (t.prerequisites ()), m);
+  }
+
+  inline auto
+  reverse_prerequisite_members (action a, const target& t, bool m = true)
+  {
+    return prerequisite_members (a, reverse_iterate (t.prerequisites ()), m);
   }
 
   // prerequisite_members(group_prerequisites (t))
   //
   inline auto
-  group_prerequisite_members (
-    slock& ml, action a, target& t, bool members = true)
+  group_prerequisite_members (action a, target& t, bool m = true)
+  {
+    return prerequisite_members (a, group_prerequisites (t), m);
+  }
+
+  inline auto
+  group_prerequisite_members (action a, const target& t, bool m = true)
   {
-    return prerequisite_members (ml, a, group_prerequisites (t), members);
+    return prerequisite_members (a, group_prerequisites (t), m);
   }
 
   // prerequisite_members(reverse_iterate (group_prerequisites (t)))
   //
   inline auto
-  reverse_group_prerequisite_members (
-    slock& ml, action a, target& t, bool members = true)
+  reverse_group_prerequisite_members (action a, target& t, bool m = true)
   {
     return prerequisite_members (
-      ml, a, reverse_iterate (group_prerequisites (t)), members);
+      a, reverse_iterate (group_prerequisites (t)), m);
+  }
+
+  inline auto
+  reverse_group_prerequisite_members (action a, const target& t, bool m = true)
+  {
+    return prerequisite_members (
+      a, reverse_iterate (group_prerequisites (t)), m);
   }
 
   // A target with an unspecified extension is considered equal to the one
@@ -1028,10 +1115,10 @@ namespace build2
 
     // Return existing target or NULL.
     //
-    target*
+    const target*
     find (const target_key& k, tracer& trace) const;
 
-    target*
+    const target*
     find (const target_type& type,
           const dir_path& dir,
           const dir_path& out,
@@ -1043,7 +1130,7 @@ namespace build2
     }
 
     template <typename T>
-    T*
+    const T*
     find (const target_type& type,
           const dir_path& dir,
           const dir_path& out,
@@ -1051,30 +1138,58 @@ namespace build2
           const optional<string>& ext,
           tracer& trace) const
     {
-      return static_cast<T*> (find (type, dir, out, name, ext, trace));
+      return static_cast<const T*> (find (type, dir, out, name, ext, trace));
     }
 
     // As above but ignore the extension.
     //
     template <typename T>
-    T*
+    const T*
     find (const dir_path& dir, const dir_path& out, const string& name) const
     {
       slock l (mutex_);
+
       auto i (
         map_.find (
           target_key {&T::static_type, &dir, &out, &name, nullopt}));
-      return i != map_.end () ? static_cast<T*> (i->second.get ()) : nullptr;
+
+      return i != map_.end ()
+        ? static_cast<const T*> (i->second.get ())
+        : nullptr;
     }
 
+    // If the target was inserted, keep the map exclusive-locked and return
+    // the lock. In this case, the target is effectively still being created
+    // since nobody can see it until the lock is released.
+    //
+    pair<target&, ulock>
+    insert_locked (const target_type&,
+                   dir_path dir,
+                   dir_path out,
+                   string name,
+                   optional<string> ext,
+                   bool implied,
+                   tracer&);
+
     pair<target&, bool>
-    insert (const target_type&,
+    insert (const target_type& tt,
             dir_path dir,
             dir_path out,
             string name,
             optional<string> ext,
             bool implied,
-            tracer&);
+            tracer& t)
+    {
+      auto p (insert_locked (tt,
+                             move (dir),
+                             move (out),
+                             move (name),
+                             move (ext),
+                             implied,
+                             t));
+
+      return pair<target&, bool> (p.first, p.second.owns_lock ());
+    }
 
     // Note that the following versions always enter implied targets.
     //
@@ -1144,67 +1259,65 @@ namespace build2
   public:
     using target::target;
 
-    // Generally, modification time for a target can only be queried after a
-    // rule has been matched since that's where the path is normally gets
-    // assigned (if the path was not assigned and no timestamp was set
-    // manually, this function will return timestamp_unknown). Normally,
-    // however, it would make sense to first execute the rule to get the
-    // "updated" timestamp.
+    // Modification time is an "atomic cash". That is, it can be set at any
+    // time and we assume everything will be ok regardless of the order in
+    // which racing updates happen because we do not modify the external state
+    // (which is the source of timestemps) while updating the internal.
+    //
+    // The rule for groups that utilize target_state::group is as follows: if
+    // it has any members that are mtime_targets, then the group should be
+    // mtime_target and the members get the mtime from it.
     //
-    // The rule for groups that utilize the group state is as follows:
-    // if it has any members that are mtime_targets, then the group
-    // should be mtime_target and the members get the mtime from it.
+    // Note that this function can be called before the target is matched in
+    // which case the value always comes from the target itself. In other
+    // words, that group logic only kicks in once the target is matched.
     //
     timestamp
-    mtime (bool load = true) const
-    {
-      const mtime_target& t (state_ == target_state::group
-                             ? group->as<mtime_target> ()
-                             : *this);
-
-      if (load && t.mtime_ == timestamp_unknown)
-        t.mtime_ = t.load_mtime ();
+    mtime () const;
 
-      return t.mtime_;
-    }
-
-    // Note that while we can cache the mtime at any time, it may be ignored
-    // if the target state is group (see the mtime() accessor).
+    // Note also that while we can cache the mtime, it may be ignored if the
+    // target state is set to group (see above).
     //
     void
-    mtime (timestamp mt) const
-    {
-      mtime_ = mt;
-    }
+    mtime (timestamp) const;
+
+    // If the mtime is unknown, then load it from the filesystem also caching
+    // the result.
+    //
+    // Note: can only be called during executing and must not be used if the
+    // target state is group.
+    //
+    timestamp
+    load_mtime (const path&) const;
 
     // Return true if this target is newer than the specified timestamp.
     //
-    // Note: can only be called on a synchronized target.
+    // Note: can only be called during execute on a synchronized target.
     //
     bool
-    newer (timestamp mt) const
-    {
-      timestamp mp (mtime ());
+    newer (timestamp) const;
 
-      // What do we do if timestamps are equal? This can happen, for example,
-      // on filesystems that don't have subsecond resolution. There is not
-      // much we can do here except detect the case where the target was
-      // changed on this run.
-      //
-      return mt < mp || (mt == mp && state () == target_state::changed);
-    }
+  public:
+    static const target_type static_type;
 
   protected:
-    // Return timestamp_unknown if the mtime cannot be loaded.
+    // C++17:
     //
-    virtual timestamp
-    load_mtime () const = 0;
+    // static_assert (atomic<timestamp::rep>::is_always_lock_free,
+    //               "timestamp is not lock-free on this architecture");
 
-  public:
-    static const target_type static_type;
+#if !defined(ATOMIC_LLONG_LOCK_FREE) || ATOMIC_LLONG_LOCK_FREE != 2
+#  error timestamp is not lock-free on this architecture
+#endif
 
-  private:
-    mutable timestamp mtime_ {timestamp_unknown};
+    // Note that the value is not used to synchronize any other state so we
+    // use the release-consume ordering (i.e., we are only interested in the
+    // mtime value being synchronized).
+    //
+    // Store it as an underlying representation (normally int64_t) since
+    // timestamp is not usable with atomic (non-noexcept default ctor).
+    //
+    mutable atomic<timestamp::rep> mtime_ {timestamp_unknown_rep};
   };
 
   // Filesystem path-based target.
@@ -1216,11 +1329,27 @@ namespace build2
 
     typedef build2::path path_type;
 
+    // Target path is an "atomic consistent cash". That is, it can be set at
+    // any time but any subsequent updates must set the same path. Or, in
+    // other words, once the path is set, it never changes.
+    //
+    // A set empty path may signify special unknown/undetermined location (for
+    // example an installed import library -- we know it's there, just not
+    // exactly where). In this case you would also normally set its mtime. We
+    // used to return a pointer to properly distinguish between not set and
+    // empty but that proved too tedious. Note that this means there could be
+    // a race between path and mtime (unless you lock the target in some other
+    // way; see file_rule) so for this case it makes sense to set the
+    // timestamp first.
+    //
     const path_type&
-    path () const {return path_;}
+    path () const;
 
-    void
-    path (path_type p) {assert (path_.empty ()); path_ = move (p);}
+    const path_type&
+    path (path_type) const;
+
+    timestamp
+    load_mtime () const {return mtime_target::load_mtime (path ());}
 
     // Derive a path from target's dir, name, and, if set, ext. If ext is not
     // set, try to derive it using the target type extension function and
@@ -1254,11 +1383,30 @@ namespace build2
     const string&
     derive_extension (const char* default_ext = nullptr, bool search = false);
 
+    // Const versions of the above that can be used on unlocked targets. Note
+    // that here we don't allow providing any defaults since you probably
+    // should only use this version if everything comes from the target itself
+    // (and is therefore atomic).
+    //
+    const path_type&
+    derive_path () const
+    {
+      return const_cast<path_target*> (this)->derive_path (); // MT-aware.
+    }
+
   public:
     static const target_type static_type;
 
   private:
-    path_type path_;
+    // Note that the state is also used to synchronize the path value so
+    // we use the release-acquire ordering.
+    //
+    // 0 - absent
+    // 1 - being set
+    // 2 - present
+    //
+    mutable atomic<uint8_t> path_state_ {0};
+    mutable path_type path_;
   };
 
   // File target.
@@ -1268,13 +1416,6 @@ namespace build2
   public:
     using path_target::path_target;
 
-  protected:
-    // Note that it is final in order to be consistent with file_rule,
-    // search_existing_file().
-    //
-    virtual timestamp
-    load_mtime () const final;
-
   public:
     static const target_type static_type;
     virtual const target_type& dynamic_type () const {return static_type;}
@@ -1469,13 +1610,13 @@ namespace build2
   // The default behavior, that is, look for an existing target in the
   // prerequisite's directory scope.
   //
-  target*
+  const target*
   search_target (const prerequisite_key&);
 
   // First look for an existing target as above. If not found, then look
   // for an existing file in the target-type-specific list of paths.
   //
-  target*
+  const target*
   search_file (const prerequisite_key&);
 }
 
diff --git a/build2/target-type b/build2/target-type
index 81f6916..aa4bbf7 100644
--- a/build2/target-type
+++ b/build2/target-type
@@ -55,7 +55,7 @@ namespace build2
 
     void (*print) (ostream&, const target_key&);
 
-    target* (*search) (const prerequisite_key&);
+    const target* (*search) (const prerequisite_key&);
 
     bool see_through; // A group with the default "see through" semantics.
 
diff --git a/build2/target.cxx b/build2/target.cxx
index 9f5d94b..dacf534 100644
--- a/build2/target.cxx
+++ b/build2/target.cxx
@@ -57,6 +57,7 @@ namespace build2
 
   // target
   //
+  const target::prerequisites_type target::empty_prerequisites_;
 
   target::
   ~target ()
@@ -89,50 +90,8 @@ namespace build2
     return *e;
   }
 
-  void target::
-  recipe (action_type a, recipe_type r)
-  {
-    assert (a > action || !recipe_);
-
-    bool override (a == action && recipe_); // See action::operator<.
-
-    // Only noop_recipe can be overridden.
-    //
-    if (override)
-    {
-      recipe_function** f (recipe_.target<recipe_function*> ());
-      assert (f != nullptr && *f == &noop_action);
-    }
-
-    action = a;
-    recipe_ = move (r);
-
-    state_ = target_state::unknown;
-
-    // If this is a noop recipe, then mark the target unchanged so that we
-    // don't waste time executing the recipe.
-    //
-    if (recipe_function** f = recipe_.target<recipe_function*> ())
-    {
-      if (*f == &noop_action)
-        state_ = target_state::unchanged;
-    }
-
-    //@@ MT can this be a relaxed save?
-    //
-    task_count = state_ == target_state::unknown
-      ? count_unexecuted
-      : count_executed;
-
-    // This one is tricky: we don't want to reset the dependents count
-    // if we are merely overriding with a "stronger" recipe.
-    //
-    if (!override)
-      dependents = 0; //@@ MT: either relax or use as match flag?
-  }
-
   group_view target::
-  group_members (action_type)
+  group_members (action_type) const
   {
     assert (false); // Not a group or doesn't expose its members.
     return group_view {nullptr, 0};
@@ -157,6 +116,78 @@ namespace build2
     return *r;
   }
 
+  target_state target::
+  state (action_type a) const
+  {
+    assert (phase == run_phase::match);
+
+    // The tricky aspect here is that we my be re-matching the target for
+    // another (overriding action). Since it is only valid to call this
+    // function after the target has been matched (for this action), we assume
+    // that if the target is busy, then it is being overriden (note that it
+    // cannot be being executed since we are in the match phase).
+    //
+    // But that's not the end of it: in case of a re-match the task count
+    // might have been reset to, say, applied. The only way to know for sure
+    // that there isn't another match "underneath" is to compare actions. But
+    // that can only be done safely if we lock the target. At least we will be
+    // quick (and we don't need to wait since if it's busy, we know it is a
+    // re-match). This logic is similar to lock_impl().
+    //
+    size_t b (target::count_base ());
+    size_t e (task_count.load (memory_order_acquire));
+
+    size_t exec (b + target::offset_executed);
+    size_t lock (b + target::offset_locked);
+    size_t busy (b + target::offset_busy);
+
+    for (;;)
+    {
+      for (; e == lock; e = task_count.load (memory_order_acquire))
+        this_thread::yield ();
+
+      if (e >= busy)
+        return target_state::unchanged; // Override in progress.
+
+      if (e == exec)
+      {
+        // Sanity check: we better not be overriding a recipe for an already
+        // executed target.
+        //
+        assert (action == a);
+
+        return group_state () ? group->state_ : state_;
+      }
+
+      // Try to grab the spin-lock.
+      //
+      if (task_count.compare_exchange_strong (
+            e,
+            lock,
+            memory_order_acq_rel,  // Synchronize on success.
+            memory_order_acquire)) // Synchronize on failure.
+      {
+        break;
+      }
+
+      // Task count changed, try again.
+    }
+
+    // We have the spin-lock. Quickly get the matched action and unlock.
+    //
+    action_type ma (action);
+    task_count.store (e, memory_order_release);
+
+    if (ma > a)
+      return target_state::unchanged; // Overriden.
+
+    // Otherwise we should have a matched target.
+    //
+    assert (ma == a && (e == b + target::offset_applied || e == exec));
+
+    return group_state () ? group->state_ : state_;
+  }
+
   pair<lookup, size_t> target::
   find_original (const variable& var, bool target_only) const
   {
@@ -230,7 +261,7 @@ namespace build2
   //
   target_set targets;
 
-  target* target_set::
+  const target* target_set::
   find (const target_key& k, tracer& trace) const
   {
     slock sl (mutex_);
@@ -239,7 +270,7 @@ namespace build2
     if (i == map_.end ())
       return nullptr;
 
-    target& t (*i->second);
+    const target& t (*i->second);
     optional<string>& ext (i->first.ext);
 
     if (ext != k.ext)
@@ -283,17 +314,17 @@ namespace build2
     return &t;
   }
 
-  pair<target&, bool> target_set::
-  insert (const target_type& tt,
-          dir_path dir,
-          dir_path out,
-          string name,
-          optional<string> ext,
-          bool implied,
-          tracer& trace)
+  pair<target&, ulock> target_set::
+  insert_locked (const target_type& tt,
+                 dir_path dir,
+                 dir_path out,
+                 string name,
+                 optional<string> ext,
+                 bool implied,
+                 tracer& trace)
   {
     target_key tk {&tt, &dir, &out, &name, move (ext)};
-    target* t (find (tk, trace));
+    target* t (const_cast<target*> (find (tk, trace)));
 
     if (t == nullptr)
     {
@@ -326,7 +357,7 @@ namespace build2
       {
         t->ext_ = &i->first.ext;
         t->implied = implied;
-        return pair<target&, bool> (*t, true);
+        return pair<target&, ulock> (*t, move (ul));
       }
 
       // The "tail" of find().
@@ -369,7 +400,7 @@ namespace build2
         t->implied = false;
     }
 
-    return pair<target&, bool> (*t, false);
+    return pair<target&, ulock> (*t, ulock ());
   }
 
   ostream&
@@ -449,6 +480,50 @@ namespace build2
     return os;
   }
 
+  // mtime_target
+  //
+  timestamp mtime_target::
+  mtime () const
+  {
+    // Figure out from which target we should get the value.
+    //
+    const mtime_target* t (this);
+
+    switch (phase)
+    {
+    case run_phase::load: break;
+    case run_phase::match:
+      {
+        // Similar logic to state(action) except here we don't distinguish
+        // between original/overridden actions (an overridable action is by
+        // definition a noop and should never need to query the mtime).
+        //
+        size_t c (task_count.load (memory_order_acquire)); // For group_state()
+
+        // Wait out the spin lock to get the meaningful count.
+        //
+        for (size_t lock (target::count_locked ());
+             c == lock;
+             c = task_count.load (memory_order_acquire))
+          this_thread::yield ();
+
+        if (c != target::count_applied () && c != target::count_executed ())
+          break;
+
+        // Fall through.
+      }
+    case run_phase::execute:
+      {
+        if (group_state ())
+          t = &group->as<mtime_target> ();
+
+        break;
+      }
+    }
+
+    return timestamp (duration (t->mtime_.load (memory_order_consume)));
+  }
+
   // path_target
   //
   const string& path_target::
@@ -521,31 +596,14 @@ namespace build2
       }
     }
 
-    const path_type& ep (path ());
-
-    if (ep.empty ())
-      path (move (p));
-    else if (p != ep)
-      fail << "path mismatch for target " << *this <<
-        info << "existing '" << ep << "'" <<
-        info << "derived  '" << p << "'";
-
-    return path ();
-  }
-
-  // file_target
-  //
-  timestamp file::
-  load_mtime () const
-  {
-    const path_type& f (path ());
-    return f.empty () ? timestamp_unknown : file_mtime (f);
+    path (move (p));
+    return path_;
   }
 
   // Search functions.
   //
 
-  target*
+  const target*
   search_target (const prerequisite_key& pk)
   {
     // The default behavior is to look for an existing target in the
@@ -554,12 +612,12 @@ namespace build2
     return search_existing_target (pk);
   }
 
-  target*
+  const target*
   search_file (const prerequisite_key& pk)
   {
     // First see if there is an existing target.
     //
-    if (target* t = search_existing_target (pk))
+    if (const target* t = search_existing_target (pk))
       return t;
 
     // Then look for an existing file in the src tree.
@@ -664,13 +722,13 @@ namespace build2
     false
   };
 
-  static target*
+  static const target*
   search_alias (const prerequisite_key& pk)
   {
     // For an alias we don't want to silently create a target since it will do
     // nothing and it most likely not what the user intended.
     //
-    target* t (search_existing_target (pk));
+    const target* t (search_existing_target (pk));
 
     if (t == nullptr || t->implied)
       fail << "no explicit target for prerequisite " << pk;
@@ -689,14 +747,14 @@ namespace build2
     false
   };
 
-  static target*
+  static const target*
   search_dir (const prerequisite_key& pk)
   {
     tracer trace ("search_dir");
 
     // The first step is like in search_alias(): looks for an existing target.
     //
-    target* t (search_existing_target (pk));
+    const target* t (search_existing_target (pk));
 
     if (t != nullptr && !t->implied)
       return t;
@@ -734,14 +792,15 @@ namespace build2
       // is so. And so it is.
       //
       bool retest (false);
+
+      assert (phase == run_phase::match);
       {
-        // Relock for exclusive access and change to the load phase.
+        // Switch the phase to load.
         //
-        model_rlock rl;
-        phase_guard pg (run_phase::load);
+        phase_switch ps (run_phase::load);
 
         pair<scope&, scope*> sp (
-          switch_scope (*s.rw (rl).root_scope (), out_base));
+          switch_scope (*s.rw ().root_scope (), out_base));
 
         if (sp.second != nullptr) // Ignore scopes out of any project.
         {
@@ -757,6 +816,7 @@ namespace build2
           }
         }
       }
+      assert (phase == run_phase::match);
 
       // If we loaded the buildfile, examine the target again.
       //
diff --git a/build2/target.ixx b/build2/target.ixx
index b73acd7..661321d 100644
--- a/build2/target.ixx
+++ b/build2/target.ixx
@@ -2,6 +2,8 @@
 // copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
 // license   : MIT; see accompanying LICENSE file
 
+#include <cstring> // memcpy()
+
 namespace build2
 {
   // target
@@ -25,31 +27,72 @@ namespace build2
       e != nullptr ? optional<string> (*e) : nullopt};
   }
 
+  inline auto target::
+  prerequisites () const -> const prerequisites_type&
+  {
+    return prerequisites_state_.load (memory_order_acquire) == 2
+      ? prerequisites_
+      : empty_prerequisites_;
+  }
+
+  inline bool target::
+  prerequisites (prerequisites_type&& p) const
+  {
+    target& x (const_cast<target&> (*this)); // MT-aware.
+
+    uint8_t e (0);
+    if (x.prerequisites_state_.compare_exchange_strong (
+          e,
+          1,
+          memory_order_acq_rel,
+          memory_order_acquire))
+    {
+      x.prerequisites_ = move (p);
+      x.prerequisites_state_.fetch_add (1, memory_order_release);
+      return true;
+    }
+    else
+    {
+      // Spin the transition out so that prerequisites() doesn't return empty.
+      //
+      for (; e == 1; e = prerequisites_state_.load (memory_order_acquire))
+        /*this_thread::yield ()*/ ;
+
+      return false;
+    }
+  }
+
   inline target_state target::
   state () const
   {
+    assert (phase == run_phase::execute);
+    return group_state () ? group->state_ : state_;
+  }
+
+  inline bool target::
+  group_state () const
+  {
     // We go an extra step and short-circuit to the target state even if the
     // raw state is not group provided the recipe is group_recipe.
 
     if (state_ == target_state::group)
-      return group->state_;
-
-    if (group == nullptr)
-      return state_;
+      return true;
 
-    if (recipe_function* const* f = recipe_.target<recipe_function*> ())
+    if (group != nullptr)
     {
-      if (*f == &group_action)
-        return group->state_;
+      if (recipe_function* const* f = recipe_.target<recipe_function*> ())
+        return *f == &group_action;
     }
 
-    return state_;
+    return false;
   }
 
   inline target_state target::
-  synchronized_state (bool fail) const
+  matched_state (action_type a, bool fail) const
   {
-    target_state r (state ());
+    // Note that the target could be being asynchronously re-matched.
+    //
+    target_state r (state (a));
 
     if (fail && r == target_state::failed)
       throw failed ();
@@ -58,16 +101,63 @@ namespace build2
   }
 
   inline target_state target::
-  atomic_state (bool fail) const
+  executed_state (bool fail) const
   {
-    switch (task_count)
+    target_state r (state ());
+
+    if (fail && r == target_state::failed)
+      throw failed ();
+
+    return r;
+  }
+
+  inline void target::
+  recipe (recipe_type r)
+  {
+    recipe_ = move (r);
+
+    // If this is a noop recipe, then mark the target unchanged to allow for
+    // some optimizations.
+    //
+    state_ = target_state::unknown;
+
+    if (recipe_function** f = recipe_.target<recipe_function*> ())
     {
-    case target::count_unexecuted: return target_state::unknown;
-    case target::count_executed:   return synchronized_state (fail);
-    default:                       return target_state::busy;
+      if (*f == &noop_action)
+        state_ = target_state::unchanged;
     }
   }
 
+  // mark()/unmark()
+  //
+
+  // VC15 doesn't like if we use (abstract) target here.
+  //
+  static_assert (alignof (file) % 4 == 0, "unexpected target alignment");
+
+  inline void
+  mark (const target*& p, uint8_t m)
+  {
+    uintptr_t i (reinterpret_cast<uintptr_t> (p));
+    i |= m & 0x03;
+    p = reinterpret_cast<const target*> (i);
+  }
+
+  inline uint8_t
+  unmark (const target*& p)
+  {
+    uintptr_t i (reinterpret_cast<uintptr_t> (p));
+    uint8_t m (i & 0x03);
+
+    if (m != 0)
+    {
+      i &= ~uintptr_t (0x03);
+      p = reinterpret_cast<const target*> (i);
+    }
+
+    return m;
+  }
+
   // prerequisite_member
   //
   inline prerequisite prerequisite_member::
@@ -87,7 +177,7 @@ namespace build2
   // prerequisite_members
   //
   group_view
-  resolve_group_members (slock&, action, target&); // <build2/algorithm>
+  resolve_group_members (action, const target&); // <build2/algorithm>
 
   template <typename T>
   inline auto prerequisite_members_range<T>::iterator::
@@ -100,9 +190,11 @@ namespace build2
       // Get the target if one has been resolved and see if it's an ad hoc
       // group. If so, switch to the ad hoc mode.
       //
-      target* t (g_.count != 0
-                 ? j_ != 0 ? g_.members[j_ - 1] : nullptr // enter_group()
-                 : i_->target);
+      const target* t (
+        g_.count != 0
+        ? j_ != 0 ? g_.members[j_ - 1] : nullptr // enter_group()
+        : i_->target.load (memory_order_consume));
+
       if (t != nullptr && t->member != nullptr)
         k_ = t->member;
     }
@@ -132,9 +224,9 @@ namespace build2
     // First see if we are about to enter an ad hoc group (the same code as in
     // operator++() above).
     //
-    target* t (g_.count != 0
-               ? j_ != 0 ? g_.members[j_ - 1] : nullptr
-               : i_->target);
+    const target* t (g_.count != 0
+                     ? j_ != 0 ? g_.members[j_ - 1] : nullptr
+                     : i_->target.load (memory_order_consume));
 
     if (t != nullptr && t->member != nullptr)
       k_ = t->member;
@@ -142,7 +234,7 @@ namespace build2
     {
       // Otherwise assume it is a normal group.
       //
-      g_ = resolve_group_members (r_->l_, r_->a_, search (*i_));
+      g_ = resolve_group_members (r_->a_, search (*i_));
 
       if (g_.members == nullptr) // Members are not know.
       {
@@ -167,9 +259,9 @@ namespace build2
     //
     if (k_ == nullptr)
     {
-      target* t (g_.count != 0
-                 ? j_ != 0 ? g_.members[j_ - 1] : nullptr
-                 : i_->target);
+      const target* t (g_.count != 0
+                       ? j_ != 0 ? g_.members[j_ - 1] : nullptr
+                       : i_->target.load (memory_order_consume));
       if (t != nullptr && t->member != nullptr)
         k_ = t->member;
     }
@@ -189,4 +281,78 @@ namespace build2
       g_.members = nullptr; // Ugly "special case signal" for operator++.
     }
   }
+
+  // mtime_target
+  //
+  inline void mtime_target::
+  mtime (timestamp mt) const
+  {
+    mtime_.store (mt.time_since_epoch ().count (), memory_order_release);
+  }
+
+  inline timestamp mtime_target::
+  load_mtime (const path& p) const
+  {
+    assert (phase == run_phase::execute && !group_state ());
+
+    duration::rep r (mtime_.load (memory_order_consume));
+    if (r == timestamp_unknown_rep)
+    {
+      assert (!p.empty ());
+
+      r = file_mtime (p).time_since_epoch ().count ();
+      mtime_.store (r, memory_order_release);
+    }
+
+    return timestamp (duration (r));
+  }
+
+  inline bool mtime_target::
+  newer (timestamp mt) const
+  {
+    assert (phase == run_phase::execute);
+
+    timestamp mp (mtime ());
+
+    // What do we do if timestamps are equal? This can happen, for example,
+    // on filesystems that don't have subsecond resolution. There is not
+    // much we can do here except detect the case where the target was
+    // changed on this run.
+    //
+    return mt < mp || (mt == mp && state () == target_state::changed);
+  }
+
+  // path_target
+  //
+  inline const path& path_target::
+  path () const
+  {
+    return path_state_.load (memory_order_acquire) == 2 ? path_ : empty_path;
+  }
+
+  inline const path& path_target::
+  path (path_type p) const
+  {
+    uint8_t e (0);
+    if (path_state_.compare_exchange_strong (
+          e,
+          1,
+          memory_order_acq_rel,
+          memory_order_acquire))
+    {
+      path_ = move (p);
+      path_state_.fetch_add (1, memory_order_release);
+    }
+    else
+    {
+      // Spin the transition out.
+      //
+      for (; e == 1; e = path_state_.load (memory_order_acquire))
+        /*this_thread::yield ()*/ ;
+
+      assert (path_ == p);
+    }
+
+    return path_;
+  }
 }
diff --git a/build2/target.txx b/build2/target.txx
index ea7bca7..7a222d7 100644
--- a/build2/target.txx
+++ b/build2/target.txx
@@ -18,7 +18,7 @@ namespace build2
     //
     do
     {
-      g_ = resolve_group_members (r_->l_, r_->a_, search (*i_));
+      g_ = resolve_group_members (r_->a_, search (*i_));
       assert (g_.members != nullptr); // Group could not be resolved.
 
       if (g_.count != 0) // Skip empty see through groups.
diff --git a/build2/test/rule b/build2/test/rule
index da55173..7de2e24 100644
--- a/build2/test/rule
+++ b/build2/test/rule
@@ -21,7 +21,7 @@ namespace build2
     {
     public:
       virtual match_result
-      match (slock&, action, target&, const string&) const override;
+      match (action, target&, const string&) const override;
 
       target_state
       perform_script (action, const target&) const;
@@ -31,7 +31,7 @@ namespace build2
     {
     public:
       virtual recipe
-      apply (slock&, action, target&) const override;
+      apply (action, target&) const override;
 
       static target_state
       perform_test (action, const target&);
@@ -41,7 +41,7 @@ namespace build2
     {
     public:
       virtual recipe
-      apply (slock&, action, target&) const override;
+      apply (action, target&) const override;
 
       target_state
       perform_test (action, const target&) const;
diff --git a/build2/test/rule.cxx b/build2/test/rule.cxx
index eac9203..06ffc9f 100644
--- a/build2/test/rule.cxx
+++ b/build2/test/rule.cxx
@@ -35,7 +35,7 @@ namespace build2
                    "insufficient space");
 
     match_result rule_common::
-    match (slock& ml, action a, target& t, const string&) const
+    match (action a, target& t, const string&) const
     {
       // The (admittedly twisted) logic of this rule tries to achieve the
       // following: If the target is testable, then we want both first update
@@ -63,7 +63,7 @@ namespace build2
         // If we have any prerequisites of the test{} type, then this is the
         // testscript case.
         //
-        for (prerequisite_member p: group_prerequisite_members (ml, a, t))
+        for (prerequisite_member p: group_prerequisite_members (a, t))
         {
           if (p.is_a<testscript> ())
           {
@@ -155,7 +155,7 @@ namespace build2
     }
 
     recipe alias_rule::
-    apply (slock& ml, action a, target& t) const
+    apply (action a, target& t) const
     {
       match_data md (move (t.data<match_data> ()));
       t.clear_data (); // In case delegated-to rule also uses aux storage.
@@ -171,7 +171,7 @@ namespace build2
       // standard alias rule.
       //
       if (a.operation () == update_id)
-        return match_delegate (ml, a, t, *this).first;
+        return match_delegate (a, t, *this).first;
 
       // For the test operation we have to implement our own search and match
       // because we need to ignore prerequisites that are outside of our
@@ -181,7 +181,7 @@ namespace build2
       // not ours seems right. Note that we still want to make sure they are
       // up to date (via the above delegate) since our tests might use them.
       //
-      search_and_match_prerequisites (ml, a, t, t.root_scope ());
+      match_prerequisites (a, t, t.root_scope ());
 
       // If not a test then also redirect to the alias rule.
       //
@@ -191,7 +191,7 @@ namespace build2
     }
 
     recipe rule::
-    apply (slock& ml, action a, target& t) const
+    apply (action a, target& t) const
     {
       tracer trace ("test::rule::apply");
 
@@ -208,11 +208,11 @@ namespace build2
       if (md.script)
       {
         if (a.operation () == update_id)
-          return match_delegate (ml, a, t, *this).first;
+          return match_delegate (a, t, *this).first;
 
         // Collect all the testscript targets in prerequisite_targets.
         //
-        for (prerequisite_member p: group_prerequisite_members (ml, a, t))
+        for (prerequisite_member p: group_prerequisite_members (a, t))
         {
           if (p.is_a<testscript> ())
             t.prerequisite_targets.push_back (&p.search ());
@@ -277,10 +277,10 @@ namespace build2
 
         // @@ OUT: what if this is a @-qualified pair or names?
         //
-        target* it (in != nullptr ? &search (*in, bs) : nullptr);
-        target* ot (on != nullptr
-                    ? in == on ? it : &search (*on, bs)
-                    : nullptr);
+        const target* it (in != nullptr ? &search (*in, bs) : nullptr);
+        const target* ot (on != nullptr
+                          ? in == on ? it : &search (*on, bs)
+                          : nullptr);
 
         if (a.operation () == update_id)
         {
@@ -289,26 +289,16 @@ namespace build2
           //
           if (it != nullptr)
           {
-            build2::match (ml, a, *it);
-
-            if (it->unchanged ()) //@@ TM?
-            {
-              unmatch (a, *it);
+            if (build2::match (a, *it, unmatch::unchanged))
               it = nullptr;
-            }
           }
 
           if (ot != nullptr)
           {
             if (in != on)
             {
-              build2::match (ml, a, *ot);
-
-              if (ot->unchanged ()) //@@ MT?
-              {
-                unmatch (a, *ot);
+              if (build2::match (a, *ot, unmatch::unchanged))
                 ot = nullptr;
-              }
             }
             else
               ot = it;
@@ -318,7 +308,7 @@ namespace build2
           // been found if we signalled that we do not match from match()
           // above.
           //
-          recipe d (match_delegate (ml, a, t, *this).first);
+          recipe d (match_delegate (a, t, *this).first);
 
           // If we have no input/output that needs updating, then simply
           // redirect to it.
@@ -636,8 +626,9 @@ namespace build2
       if (pts.size () != 0 && pts[0] != nullptr)
       {
         const file& it (pts[0]->as<file> ());
-        assert (!it.path ().empty ()); // Should have been assigned by update.
-        args.push_back (it.path ().string ().c_str ());
+        const path& ip (it.path ());
+        assert (!ip.empty ()); // Should have been assigned by update.
+        args.push_back (ip.string ().c_str ());
       }
       // Maybe arguments then?
       //
@@ -656,7 +647,8 @@ namespace build2
       if (pts.size () != 0 && pts[1] != nullptr)
       {
         const file& ot (pts[1]->as<file> ());
-        assert (!ot.path ().empty ()); // Should have been assigned by update.
+        const path& op (ot.path ());
+        assert (!op.empty ()); // Should have been assigned by update.
 
         dpp = run_search (dp, true);
 
@@ -668,7 +660,7 @@ namespace build2
         if (cast<target_triplet> (tt["test.target"]).class_ == "windows")
           args.push_back ("--strip-trailing-cr");
 
-        args.push_back (ot.path ().string ().c_str ());
+        args.push_back (op.string ().c_str ());
         args.push_back ("-");
         args.push_back (nullptr);
       }
diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx
index 2ea42b5..4e6759f 100644
--- a/build2/test/script/parser.cxx
+++ b/build2/test/script/parser.cxx
@@ -6,8 +6,7 @@
 
 #include <sstream>
 
-#include <build2/context>   // keep_going
-#include <build2/scheduler>
+#include <build2/context> // sched, keep_going
 
 #include <build2/test/script/lexer>
 #include <build2/test/script/runner>
@@ -2838,16 +2837,14 @@ namespace build2
         {
           exec_lines (g->setup_.begin (), g->setup_.end (), li, false);
 
-          scheduler::atomic_count task_count (0);
+          atomic_count task_count (0);
+          wait_guard wg (task_count);
 
           // Start asynchronous execution of inner scopes keeping track of how
           // many we have handled.
           //
-          auto i (g->scopes.begin ());
-          for (auto e (g->scopes.end ()); i != e; ++i)
+          for (unique_ptr<scope>& chain: g->scopes)
           {
-            unique_ptr<scope>& chain (*i);
-
             // Check if this scope is ignored (e.g., via config.test).
             //
             if (!runner_->test (*chain))
@@ -2932,12 +2929,18 @@ namespace build2
               // exec_scope_body ();
               // scope_ = os;
 
+              // Pass our diagnostics stack (this is safe since we are going
+              // to wait for completion before unwinding the diag stack).
+              //
               // If the scope was executed synchronously, check the status and
               // bail out if we weren't asked to keep going.
               //
               if (!sched.async (task_count,
-                                [] (scope& s, script& scr, runner& r)
+                                [] (scope& s, script& scr, runner& r,
+                                    const diag_frame* ds)
                                 {
+                                  diag_frame df (ds);
+
                                   try
                                   {
                                     parser p;
@@ -2951,24 +2954,24 @@ namespace build2
                                 },
                                 ref (*chain),
                                 ref (*script_),
-                                ref (*runner_)))
+                                ref (*runner_),
+                                diag_frame::stack))
               {
+                // Bail out if the scope has failed and we weren't instructed
+                // to keep going.
+                //
                 if (chain->state == scope_state::failed && !keep_going)
-                {
-                  ++i;
-                  break;
-                }
+                  throw failed ();
               }
             }
           }
-          sched.wait (task_count);
+
+          wg.wait ();
 
           // Re-examine the scopes we have executed collecting their state.
           //
-          for (auto j (g->scopes.begin ()); j != i; ++j)
+          for (const unique_ptr<scope>& chain: g->scopes)
           {
-            const unique_ptr<scope>& chain (*j);
-
             if (chain == nullptr)
               continue;
 
diff --git a/build2/test/script/script.cxx b/build2/test/script/script.cxx
index 0586377..c2b13ca 100644
--- a/build2/test/script/script.cxx
+++ b/build2/test/script/script.cxx
@@ -567,17 +567,17 @@ namespace build2
           //
           if (t != nullptr)
           {
-            if (auto* p = t->is_a<path_target> ())
+            if (auto* pt = t->is_a<path_target> ())
             {
               // Do some sanity checks: the target better be up-to-date with
               // an assigned path.
               //
-              if (p->path ().empty ())
-                fail << "target " << *p << " specified in the test variable "
+              v = pt->path ();
+
+              if (v.empty ())
+                fail << "target " << *pt << " specified in the test variable "
                      << "is out of date" <<
                   info << "consider specifying it as a prerequisite of " << tt;
-
-              v = p->path ();
             }
             else if (t->is_a<alias> ())
               v = path (t->dir);
diff --git a/build2/types b/build2/types
index 82ee889..de0ae3f 100644
--- a/build2/types
+++ b/build2/types
@@ -21,6 +21,8 @@
 #include <mutex>
 #include <atomic>
 #include <future>
+#include <thread>
+#include <condition_variable>
 
 #include <butl/ft/shared_mutex>
 #if defined(__cpp_lib_shared_mutex) || defined(__cpp_lib_shared_timed_mutex)
@@ -82,9 +84,21 @@ namespace build2
 
   // Concurrency.
   //
-  using atomic_count = std::atomic<size_t>; // Matches scheduler::atomic_count.
+  using std::atomic;
+  using std::memory_order;
+  using std::memory_order_relaxed;
+  using std::memory_order_consume;
+  using std::memory_order_acquire;
+  using std::memory_order_release;
+  using std::memory_order_acq_rel;
+  using std::memory_order_seq_cst;
 
-  using std::future;
+  using atomic_count = atomic<size_t>; // Matches scheduler::atomic_count.
+
+  using std::mutex;
+  using mlock = std::unique_lock<mutex>;
+
+  using std::condition_variable;
 
 #if   defined(__cpp_lib_shared_mutex)
   using shared_mutex = std::shared_mutex;
@@ -98,7 +112,15 @@ namespace build2
   // Because we have this fallback, we need to be careful not to create
   // multiple shared locks in the same thread.
   //
-  using shared_mutex = std::mutex;
+  struct shared_mutex: mutex
+  {
+    using mutex::mutex;
+
+    void lock_shared     () { lock ();     }
+    void try_lock_shared () { try_lock (); }
+    void unlock_shared   () { unlock ();   }
+  };
+
   using ulock        = std::unique_lock<shared_mutex>;
   using slock        = ulock;
 #endif
@@ -106,6 +128,9 @@ namespace build2
   using std::defer_lock;
   using std::adopt_lock;
 
+  using std::future;
+  namespace this_thread = std::this_thread;
+
   // Exceptions.
   //
   // While <exception> is included, there is no using for std::exception --
@@ -162,6 +187,7 @@ namespace build2
   using butl::timestamp;
   using butl::duration;
   using butl::timestamp_unknown;
+  using butl::timestamp_unknown_rep;
   using butl::timestamp_nonexistent;
   using butl::operator<<;
 
@@ -186,7 +212,12 @@ namespace build2
 
   // See context.
   //
-  enum class run_phase {load, search_match, execute};
+  enum class run_phase {load, match, execute};
+
+  ostream&
+  operator<< (ostream&, run_phase); // utility.cxx
+
+  extern run_phase phase;
 }
 
 // In order to be found (via ADL) these have to be either in std:: or in
diff --git a/build2/utility.cxx b/build2/utility.cxx
index 76d3367..4bd4583 100644
--- a/build2/utility.cxx
+++ b/build2/utility.cxx
@@ -18,6 +18,16 @@ using namespace std;
 //
 // <build2/types>
 //
+namespace build2
+{
+  static const char* const run_phase_[] = {"load", "match", "execute"};
+
+  ostream&
+  operator<< (ostream& os, run_phase p)
+  {
+    return os << run_phase_[static_cast<uint8_t> (p)];
+  }
+}
 
 namespace std
 {
diff --git a/build2/variable b/build2/variable
index af7cbef..690c45d 100644
--- a/build2/variable
+++ b/build2/variable
@@ -898,10 +898,14 @@ namespace build2
 
     variable_pool (): variable_pool (false) {}
 
-    // Proof of lock for RW access.
+    // RW access.
     //
     variable_pool&
-    rw (const ulock&) const {return const_cast<variable_pool&> (*this);}
+    rw () const
+    {
+      assert (phase == run_phase::load);
+      return const_cast<variable_pool&> (*this);
+    }
 
     variable_pool&
     rw (scope&) const {return const_cast<variable_pool&> (*this);}
diff --git a/build2/variable.ixx b/build2/variable.ixx
index 8d5d710..dfe5c84 100644
--- a/build2/variable.ixx
+++ b/build2/variable.ixx
@@ -639,8 +639,6 @@ namespace build2
 
   // variable_map::iterator_adapter
   //
-  extern run_phase phase; // context
-
   template <typename I>
   inline typename I::reference variable_map::iterator_adapter<I>::
   operator* () const
diff --git a/tests/common.test b/tests/common.test
index 250948e..b8d6148 100644
--- a/tests/common.test
+++ b/tests/common.test
@@ -11,7 +11,7 @@ project = test
 amalgamation =
 EOI
 
-test.options += --jobs 1 --quiet --buildfile -
+test.options += --serial-stop --quiet --buildfile -
 
 # By default just load the buildfile.
 #
diff --git a/tests/search/dir/testscript b/tests/search/dir/testscript
index 49c964e..d580c4f 100644
--- a/tests/search/dir/testscript
+++ b/tests/search/dir/testscript
@@ -22,7 +22,6 @@ EOI
 $* <'./: foo/' 2>>/EOE != 0
 error: no explicit target for prerequisite ../:dir{foo/}
   info: did you forget to include the corresponding buildfile?
-info: while applying rule alias to update dir{../}
 EOE
 
 : basic
diff --git a/tests/test/common.test b/tests/test/common.test
index 7e3aa67..fc3cf9f 100644
--- a/tests/test/common.test
+++ b/tests/test/common.test
@@ -20,7 +20,7 @@ if ($null($test.options))
   test.options = --buildfile -
 end
 
-test.options += --jobs 1 --quiet
+test.options += --serial-stop --quiet
 
 # By default perform test.
 #
diff --git a/tests/test/config-test/testscript b/tests/test/config-test/testscript
index 833a33a..d918a3b 100644
--- a/tests/test/config-test/testscript
+++ b/tests/test/config-test/testscript
@@ -5,7 +5,7 @@
 # Setup a realistic test project that we will then exercise.
 #
 
-test.options   = --jobs 1 --quiet
+test.options   = --serial-stop --quiet
 test.arguments = 'test(../proj/@./)' # Test out-of-src (for parallel).
 test.cleanups  = &**/                # Cleanup out directory structure.
 
diff --git a/tests/test/script/common.test b/tests/test/script/common.test
index 781be30..cc93a8d 100644
--- a/tests/test/script/common.test
+++ b/tests/test/script/common.test
@@ -32,5 +32,5 @@ end
 # automatically becoming dir{./}'s prerequisite.
 #
 c = cat >=testscript
-b = $0 --jobs 1 --quiet --buildfile - test <"'test{testscript}: \$target'" \
+b = $0 --serial-stop --quiet --buildfile - test <"'test{testscript}: \$target'" \
 &?test/***
diff --git a/tests/test/script/runner/redirect.test b/tests/test/script/runner/redirect.test
index b316011..4e0bc69 100644
--- a/tests/test/script/runner/redirect.test
+++ b/tests/test/script/runner/redirect.test
@@ -16,7 +16,7 @@ psr = ($cxx.target.class != 'windows' ? '/' : '\\') # Path separator in regex.
   cat <<EOI >=buildfile;
   test{testscript}: $target
   EOI
-  $0 --jobs 1 --quiet test <foo >foo 2>bar
+  $0 --serial-stop --quiet test <foo >foo 2>bar
 }
 
 : null
diff --git a/unit-tests/test/script/parser/driver.cxx b/unit-tests/test/script/parser/driver.cxx
index c054669..5872fb8 100644
--- a/unit-tests/test/script/parser/driver.cxx
+++ b/unit-tests/test/script/parser/driver.cxx
@@ -182,8 +182,6 @@ namespace build2
           // be absolute. However, the testscript implementation doesn't
           // really care.
           //
-          ulock ml (model_mutex);
-
           file& tt (
             targets.insert<file> (work,
                                   dir_path (),
@@ -193,7 +191,7 @@ namespace build2
 
           value& v (
             tt.assign (
-              var_pool.rw (ml).insert<target_triplet> (
+              var_pool.rw ().insert<target_triplet> (
                 "test.target", variable_visibility::project)));
 
           v = cast<target_triplet> ((*global_scope)["build.host"]);
@@ -205,8 +203,6 @@ namespace build2
                                         name.leaf ().extension (),
                                         trace));
 
-          ml.unlock ();
-
           tt.path (path ("driver"));
           st.path (name);
 
-- 
cgit v1.1