1 files changed, 81 insertions, 27 deletions
diff --git a/libbuild2/scheduler.hxx b/libbuild2/scheduler.hxx
index dcde79b..3cc206e 100644
--- a/libbuild2/scheduler.hxx
+++ b/libbuild2/scheduler.hxx
@@ -5,11 +5,10 @@
 #define LIBBUILD2_SCHEDULER_HXX
 
 #include <list>
-#include <mutex>
 #include <tuple>
 #include <atomic>
-#include <type_traits>        // aligned_storage, etc
-#include <condition_variable>
+#include <cstddef>     // max_align_t
+#include <type_traits> // decay, etc
 
 #include <libbuild2/types.hxx>
 #include <libbuild2/utility.hxx>
@@ -193,13 +192,15 @@ namespace build2
     //
     // The external flag indicates whether the wait is for an event external
     // to the scheduler, that is, triggered by something other than one of the
-    // threads managed by the scheduler.
+    // threads managed by the scheduler. This is used to suspend deadlock
+    // detection (which is progress-based and which cannot be measured for
+    // external events).
     //
     void
     deactivate (bool external);
 
     void
-    activate (bool external, bool = false);
+    activate (bool external);
 
     // Sleep for the specified duration, deactivating the thread before going
     // to sleep and re-activating it after waking up (which means this
@@ -218,7 +219,7 @@ namespace build2
     // Allocate additional active thread count to the current active thread,
     // for example, to be "passed" to an external program:
     //
-    // scheduler::alloc_guard ag (ctx.sched, ctx.sched.max_active () / 2);
+    // scheduler::alloc_guard ag (*ctx.sched, ctx.sched->max_active () / 2);
     // args.push_back ("-flto=" + to_string (1 + ag.n));
     // run (args);
     // ag.deallocate ();
@@ -243,14 +244,38 @@ namespace build2
     void
     deallocate (size_t);
 
+    // Similar to allocate() but reserve all the available threads blocking
+    // until this becomes possible. Call unlock() on the specified lock before
+    // deactivating and lock() after activating (can be used to unlock the
+    // phase). Typical usage:
+    //
+    // scheduler::alloc_guard ag (*ctx.sched,
+    //                            phase_unlock (ctx, true /* delay */));
+    //
+    // Or, without unlocking the phase:
+    //
+    // scheduler::alloc_guard ag (*ctx.sched, phase_unlock (nullptr));
+    //
+    template <typename L>
+    size_t
+    serialize (L& lock);
+
     struct alloc_guard
     {
       size_t n;
 
       alloc_guard (): n (0), s_ (nullptr) {}
       alloc_guard (scheduler& s, size_t m): n (s.allocate (m)), s_ (&s) {}
-      alloc_guard (alloc_guard&& x): n (x.n), s_ (x.s_) {x.s_ = nullptr;}
-      alloc_guard& operator= (alloc_guard&& x)
+
+      template <typename L,
+                typename std::enable_if<!std::is_integral<L>::value, int>::type = 0>
+      alloc_guard (scheduler& s, L&& l): n (s.serialize (l)), s_ (&s) {}
+
+      alloc_guard (alloc_guard&& x) noexcept
+        : n (x.n), s_ (x.s_) {x.s_ = nullptr;}
+
+      alloc_guard&
+      operator= (alloc_guard&& x) noexcept
       {
         if (&x != this)
         {
@@ -301,14 +326,25 @@ namespace build2
     // If the maximum threads or task queue depth arguments are unspecified,
     // then appropriate defaults are used.
     //
+    // Passing non-zero orig_max_active (normally the real max active) allows
+    // starting up a pre-tuned scheduler. In particular, starting a pre-tuned
+    // to serial scheduler is relatively cheap since starting the deadlock
+    // detection thread is delayed until the scheduler is re-tuned.
+    //
     explicit
     scheduler (size_t max_active,
                size_t init_active = 1,
                size_t max_threads = 0,
                size_t queue_depth = 0,
-               optional<size_t> max_stack = nullopt)
+               optional<size_t> max_stack = nullopt,
+               size_t orig_max_active = 0)
     {
-      startup (max_active, init_active, max_threads, queue_depth, max_stack);
+      startup (max_active,
+               init_active,
+               max_threads,
+               queue_depth,
+               max_stack,
+               orig_max_active);
     }
 
     // Start the scheduler.
@@ -318,7 +354,8 @@ namespace build2
              size_t init_active = 1,
              size_t max_threads = 0,
              size_t queue_depth = 0,
-             optional<size_t> max_stack = nullopt);
+             optional<size_t> max_stack = nullopt,
+             size_t orig_max_active = 0);
 
     // Return true if the scheduler was started up.
     //
@@ -343,12 +380,19 @@ namespace build2
     size_t
     tune (size_t max_active);
 
+    bool
+    tuned () const {return max_active_ != orig_max_active_;}
+
     struct tune_guard
     {
       tune_guard (): s_ (nullptr), o_ (0) {}
       tune_guard (scheduler& s, size_t ma): s_ (&s), o_ (s_->tune (ma)) {}
-      tune_guard (tune_guard&& x): s_ (x.s_), o_ (x.o_) {x.s_ = nullptr;}
-      tune_guard& operator= (tune_guard&& x)
+
+      tune_guard (tune_guard&& x) noexcept
+        : s_ (x.s_), o_ (x.o_) {x.s_ = nullptr;}
+
+      tune_guard&
+      operator= (tune_guard&& x) noexcept
       {
         if (&x != this)
         {
@@ -416,8 +460,8 @@ namespace build2
     {
       explicit
       monitor_guard (scheduler* s = nullptr): s_ (s) {}
-      monitor_guard (monitor_guard&& x): s_ (x.s_) {x.s_ = nullptr;}
-      monitor_guard& operator= (monitor_guard&& x)
+      monitor_guard (monitor_guard&& x) noexcept: s_ (x.s_) {x.s_ = nullptr;}
+      monitor_guard& operator= (monitor_guard&& x) noexcept
       {
         if (&x != this)
         {
@@ -480,7 +524,7 @@ namespace build2
     static size_t
     hardware_concurrency ()
     {
-      return std::thread::hardware_concurrency ();
+      return build2::thread::hardware_concurrency ();
     }
 
     // Return a prime number that can be used as a lock shard size that's
@@ -497,7 +541,7 @@ namespace build2
     // to become idle. Return the lock over the scheduler mutex. Normally you
     // don't need to call this function directly.
     //
-    using lock = std::unique_lock<std::mutex>;
+    using lock = build2::mlock;
 
     lock
     wait_idle ();
@@ -533,8 +577,8 @@ namespace build2
 
       atomic_count* task_count;
       size_t start_count;
-      func_type func;
       args_type args;
+      func_type func;
 
       template <size_t... i>
       void
@@ -559,7 +603,7 @@ namespace build2
     size_t                     monitor_init_;             // Initial count.
     function<size_t (size_t)>  monitor_func_;
 
-    std::mutex mutex_;
+    build2::mutex mutex_;
     bool shutdown_ = true;  // Shutdown flag.
 
     optional<size_t> max_stack_;
@@ -599,8 +643,8 @@ namespace build2
     //
     size_t orig_max_active_ = 0;
 
-    std::condition_variable idle_condv_;  // Idle helpers queue.
-    std::condition_variable ready_condv_; // Ready masters queue.
+    build2::condition_variable idle_condv_;  // Idle helpers queue.
+    build2::condition_variable ready_condv_; // Ready masters queue.
 
     // Statistics counters.
     //
@@ -619,8 +663,8 @@ namespace build2
 
     // Deadlock detection.
     //
-    std::thread             dead_thread_;
-    std::condition_variable dead_condv_;
+    build2::thread             dead_thread_;
+    build2::condition_variable dead_condv_;
 
     static void*
     deadlock_monitor (void*);
@@ -641,8 +685,8 @@ namespace build2
     //
     struct wait_slot
     {
-      std::mutex mutex;
-      std::condition_variable condv;
+      build2::mutex mutex;
+      build2::condition_variable condv;
       size_t waiters = 0;
       const atomic_count* task_count;
       bool shutdown = true;
@@ -663,7 +707,11 @@ namespace build2
     //
     struct task_data
     {
-      std::aligned_storage<sizeof (void*) * 8>::type data;
+      static const size_t data_size = (sizeof (void*) == 4
+                                       ? sizeof (void*) * 16
+                                       : sizeof (void*) * 8);
+
+      alignas (std::max_align_t) unsigned char data[data_size];
       void (*thunk) (scheduler&, lock&, void*);
     };
 
@@ -714,7 +762,7 @@ namespace build2
 
     struct task_queue: task_queue_data
     {
-      std::mutex mutex;
+      build2::mutex mutex;
       bool shutdown = false;
 
       size_t stat_full = 0; // Number of times push() returned NULL.
@@ -913,6 +961,12 @@ namespace build2
   private:
     optional<size_t>
     wait_impl (size_t, const atomic_count&, work_queue);
+
+    void
+    deactivate_impl (bool, lock&&);
+
+    lock
+    activate_impl (bool, bool);
   };
 }