Parallel implementation of update_during_match_prerequisites()

author: Boris Kolpackov <boris@codesynthesis.com> 2022-03-09 11:12:12 +0200
committer: Boris Kolpackov <boris@codesynthesis.com> 2022-03-09 11:12:12 +0200
commit: 2e6c3bf33ab1cd75b9936e65568a39571f279fc3 (patch)
tree: 2c5a0b7554ffbfb47060f20fb1d3c412ac7edb1f
parent: 2ede341d59b4ab259caf808dfa65c0ac380ba347 (diff)
5 files changed, 150 insertions, 34 deletions
diff --git a/libbuild2/adhoc-rule-buildscript.cxx b/libbuild2/adhoc-rule-buildscript.cxx
index 25ef1b7..d91be65 100644
--- a/libbuild2/adhoc-rule-buildscript.cxx
+++ b/libbuild2/adhoc-rule-buildscript.cxx
@@ -459,10 +459,11 @@ namespace build2
     // Note that we ignore the result and whether it renders us out of date,
     // leaving it to the common execute logic in perform_update_*().
     //
-    // Note also that update_during_match() spoils prerequisite_target::data.
+    // Note also that update_during_match_prerequisites() spoils
+    // prerequisite_target::data.
     //
     if (a == perform_update_id)
-      update_during_match (trace, a, pts, 2 /* mask */);
+      update_during_match_prerequisites (trace, a, xt, 2 /* mask */);
 
     // See if this is not update or not on a file-based target.
     //
diff --git a/libbuild2/algorithm.cxx b/libbuild2/algorithm.cxx
index d72e95e..c101117 100644
--- a/libbuild2/algorithm.cxx
+++ b/libbuild2/algorithm.cxx
@@ -1922,7 +1922,6 @@ namespace build2
     size_t gd (ctx.dependency_count.fetch_sub (1, memory_order_relaxed));
     size_t td (s.dependents.fetch_sub (1, memory_order_release));
     assert (td != 0 && gd != 0);
-    td--;
 
     // Handle the "last" execution mode.
     //
@@ -1945,7 +1944,7 @@ namespace build2
     // thread. For other threads the state will still be unknown (until they
     // try to execute it).
     //
-    if (ctx.current_mode == execution_mode::last && td != 0)
+    if (ctx.current_mode == execution_mode::last && --td != 0)
       return target_state::postponed;
 
     // Try to atomically change applied to busy.
@@ -2007,14 +2006,17 @@ namespace build2
   }
 
   target_state
-  execute_direct (action a, const target& ct)
+  execute_direct (action a,
+                  const target& ct,
+                  size_t start_count,
+                  atomic_count* task_count)
   {
     context& ctx (ct.ctx);
 
     target& t (const_cast<target&> (ct)); // MT-aware.
     target::opstate& s (t[a]);
 
-    // Similar logic to match() above except we execute synchronously.
+    // Similar logic to execute() above.
     //
     size_t tc (ctx.count_applied ());
 
@@ -2028,7 +2030,23 @@ namespace build2
           memory_order_acquire)) // Synchronize on failure.
     {
       if (s.state == target_state::unknown)
-        execute_impl (a, t);
+      {
+        if (task_count == nullptr)
+          return execute_impl (a, t);
+
+        if (ctx.sched.async (start_count,
+                             *task_count,
+                             [a] (const diag_frame* ds, target& t)
+                             {
+                               diag_frame::stack_guard dsg (ds);
+                               execute_impl (a, t);
+                             },
+                             diag_frame::stack (),
+                             ref (t)))
+          return target_state::unknown; // Queued.
+
+        // Executed synchronously, fall through.
+      }
       else
       {
         assert (s.state == target_state::unchanged ||
@@ -2046,15 +2064,13 @@ namespace build2
     }
     else
     {
-        // If the target is busy, wait for it.
-        //
-        if (tc >= busy)
-          ctx.sched.wait (exec, s.task_count, scheduler::work_none);
-        else
-          assert (tc == exec);
+      // Either busy or already executed.
+      //
+      if (tc >= busy) return target_state::busy;
+      else            assert (tc == exec);
     }
 
-    return t.executed_state (a);
+    return t.executed_state (a, false);
   }
 
   bool
@@ -2120,16 +2136,17 @@ namespace build2
   }
 
   bool
-  update_during_match (tracer& trace,
-                       action a,
-                       prerequisite_targets& pts,
-                       uintptr_t mask)
+  update_during_match_prerequisites (tracer& trace,
+                                     action a, target& t,
+                                     uintptr_t mask)
   {
+    prerequisite_targets& pts (t.prerequisite_targets[a]);
+
     // On the first pass detect and handle unchanged tragets. Note that we
     // have to do it in a separate pass since we cannot call matched_state()
     // once we've switched the phase.
     //
-    context* ctx (nullptr);
+    size_t n (0);
 
     for (prerequisite_target& p: pts)
     {
@@ -2143,9 +2160,7 @@ namespace build2
 
           if (os != target_state::unchanged)
           {
-            if (ctx == nullptr)
-              ctx = &pt.ctx;
-
+            ++n;
             p.data = static_cast<uintptr_t> (os);
             continue;
           }
@@ -2157,13 +2172,19 @@ namespace build2
 
     // If all unchanged, we are done.
     //
-    if (ctx == nullptr)
+    if (n == 0)
       return false;
 
-    phase_switch ps (*ctx, run_phase::execute);
+    context& ctx (t.ctx);
+
+    phase_switch ps (ctx, run_phase::execute);
 
     bool r (false);
 
+    // @@ Maybe we should optimize for n == 1?
+    //
+
+#if 0
     for (prerequisite_target& p: pts)
     {
       if ((p.include & mask) != 0 && p.data != 0)
@@ -2184,6 +2205,59 @@ namespace build2
         p.data = 0;
       }
     }
+#else
+
+    // Start asynchronous execution of prerequisites. Similar logic to
+    // straight_execute_members().
+    //
+    // Note that the target's task count is expected to be busy (since this
+    // function is called during match). And there don't seem to be any
+    // problems in using it for execute.
+    //
+    atomic_count& tc (t[a].task_count);
+
+    size_t busy (ctx.count_busy ());
+    size_t exec (ctx.count_executed ());
+
+    wait_guard wg (ctx, busy, tc);
+
+    for (prerequisite_target& p: pts)
+    {
+      if ((p.include & mask) != 0 && p.data != 0)
+      {
+        execute_direct_async (a, *p.target, busy, tc);
+      }
+    }
+
+    wg.wait ();
+
+    // Finish execution and process the result.
+    //
+    for (prerequisite_target& p: pts)
+    {
+      if ((p.include & mask) != 0 && p.data != 0)
+      {
+        const target& pt (*p.target);
+
+        // If the target is still busy, wait for its completion.
+        //
+        ctx.sched.wait (exec, pt[a].task_count, scheduler::work_none);
+
+        target_state os (static_cast<target_state> (p.data));
+        target_state ns (pt.executed_state (a));
+
+        if (ns != os && ns != target_state::unchanged)
+        {
+          l6 ([&]{trace << "updated " << pt
+                        << "; old state " << os
+                        << "; new state " << ns;});
+          r = true;
+        }
+
+        p.data = 0;
+      }
+    }
+#endif
 
     return r;
   }
diff --git a/libbuild2/algorithm.hxx b/libbuild2/algorithm.hxx
index 788138b..f6c296e 100644
--- a/libbuild2/algorithm.hxx
+++ b/libbuild2/algorithm.hxx
@@ -589,12 +589,17 @@ namespace build2
   // relationship (so no dependents count is decremented) and execution order
   // (so this function never returns the postponed target state).
   //
-  // Note: waits for the completion if the target is busy and translates
-  // target_state::failed to the failed exception.
+  // Note: the first version waits for the completion if the target is busy
+  // and translates target_state::failed to the failed exception.
   //
-  LIBBUILD2_SYMEXPORT target_state
+  target_state
   execute_direct (action, const target&);
 
+  target_state
+  execute_direct_async (action, const target&,
+                        size_t start_count, atomic_count& task_count,
+                        bool fail = true);
+
   // Update the target during the match phase (by switching the phase and
   // calling execute_direct()). Return true if the target has changed or, if
   // the passed timestamp is not timestamp_unknown, it is older than the
@@ -617,10 +622,7 @@ namespace build2
   // for temporary storage). But it resets data to 0 once done.
   //
   LIBBUILD2_SYMEXPORT bool
-  update_during_match (tracer&,
-                       action,
-                       prerequisite_targets&,
-                       uintptr_t mask);
+  update_during_match_prerequisites (tracer&, action, target&, uintptr_t mask);
 
   // The default prerequisite execute implementation. Call execute_async() on
   // each non-ignored (non-NULL) prerequisite target in a loop and then wait
diff --git a/libbuild2/algorithm.ixx b/libbuild2/algorithm.ixx
index bdf0815..29a4b59 100644
--- a/libbuild2/algorithm.ixx
+++ b/libbuild2/algorithm.ixx
@@ -688,6 +688,8 @@ namespace build2
   inline target_state
   execute_wait (action a, const target& t)
   {
+    //@@ redo
+
     if (execute (a, t) == target_state::busy)
       t.ctx.sched.wait (t.ctx.count_executed (),
                         t[a].task_count,
@@ -703,7 +705,43 @@ namespace build2
   {
     target_state r (execute (a, t, sc, &tc));
 
-    if (fail && !t.ctx.keep_going && r == target_state::failed)
+    if (r == target_state::failed && fail && !t.ctx.keep_going)
+      throw failed ();
+
+    return r;
+  }
+
+  LIBBUILD2_SYMEXPORT target_state
+  execute_direct (action, const target&, size_t, atomic_count*);
+
+  inline target_state
+  execute_direct (action a, const target& t)
+  {
+    target_state r (execute_direct (a, t, 0, nullptr));
+
+    if (r == target_state::busy)
+    {
+      t.ctx.sched.wait (t.ctx.count_executed (),
+                        t[a].task_count,
+                        scheduler::work_none);
+
+      r = t.executed_state (a, false);
+    }
+
+    if (r == target_state::failed)
+      throw failed ();
+
+    return r;
+  }
+
+  inline target_state
+  execute_direct_async (action a, const target& t,
+                        size_t sc, atomic_count& tc,
+                        bool fail)
+  {
+    target_state r (execute_direct (a, t, sc, &tc));
+
+    if (r == target_state::failed && fail && !t.ctx.keep_going)
       throw failed ();
 
     return r;
diff --git a/libbuild2/cc/link-rule.cxx b/libbuild2/cc/link-rule.cxx
index b72e5a0..97834fd 100644
--- a/libbuild2/cc/link-rule.cxx
+++ b/libbuild2/cc/link-rule.cxx
@@ -1229,10 +1229,11 @@ namespace build2
       // Note that we ignore the result and whether it renders us out of date,
       // leaving it to the common execute logic in perform_update().
       //
-      // Note also that update_during_match() spoils prerequisite_target::data.
+      // Note also that update_during_match_prerequisites() spoils
+      // prerequisite_target::data.
       //
       if (update_match)
-        update_during_match (trace, a, pts, 2 /* mask */);
+        update_during_match_prerequisites (trace, a, t, 2 /* mask */);
 
       // Check if we have any binful utility libraries.
       //
author	Boris Kolpackov <boris@codesynthesis.com>	2022-03-09 11:12:12 +0200
committer	Boris Kolpackov <boris@codesynthesis.com>	2022-03-09 11:12:12 +0200
commit	2e6c3bf33ab1cd75b9936e65568a39571f279fc3 (patch)
tree	2c5a0b7554ffbfb47060f20fb1d3c412ac7edb1f
parent	2ede341d59b4ab259caf808dfa65c0ac380ba347 (diff)