From ade1111af0e0d253418c0707ad4e15b71a191348 Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Tue, 6 Aug 2019 16:23:59 +0200
Subject: Implement general deadlock detection via monitoring thread

---
 libbuild2/scheduler.cxx | 146 ++++++++++++++++++++++++++++++------------------
 1 file changed, 91 insertions(+), 55 deletions(-)

(limited to 'libbuild2/scheduler.cxx')

diff --git a/libbuild2/scheduler.cxx b/libbuild2/scheduler.cxx
index 8ac2b97..437f904 100644
--- a/libbuild2/scheduler.cxx
+++ b/libbuild2/scheduler.cxx
@@ -106,7 +106,7 @@ namespace build2
 
     active_--;
     waiting_++;
-    progress_++;
+    progress_.fetch_add (1, memory_order_relaxed);
 
     if (waiting_ > stat_max_waiters_)
       stat_max_waiters_ = waiting_;
@@ -122,60 +122,16 @@ namespace build2
     {
       activate_helper (l);
     }
-    // @@ TODO: Redo as a separate "monitoring" thread.
-    //
-    // This still doesn't work for the phase lock case where we call
-    // deactivate and then go wait on a condition variable: we are doing
-    // deadlock detection while holding the lock that prevents other
-    // threads from making progress!
-    //
-#if 0
     else if (active_ == 0)
     {
-      // We may have a deadlock which can happen because of dependency cycles.
+      // Note that we tried to handle this directly in this thread but that
+      // wouldn't work for the phase lock case where we cal; deactivate and
+      // then go wait on a condition variable: we would be doing deadlock
+      // detection while holding the lock that prevents other threads from
+      // making progress! So it has to be a separate monitoring thread.
       //
-      // Relying on the active_ count alone is not precise enough, however:
-      // some threads might be transitioning between the active/waiting/ready
-      // states. Carefully accounting for this is not trivial, to say the
-      // least (especially in the face of spurious wakeups). So we are going
-      // to do a "fuzzy" deadlock detection by measuring "progress". The idea
-      // is that those transitions should be pretty short-lived and so if we
-      // wait for a couple of hundreds context switches, then we should be
-      // able to distinguish a real deadlock from the transition case.
-      //
-      size_t p (progress_);
-
-      for (size_t i (0); i != 100; ++i)
-      {
-        l.unlock ();
-        this_thread::yield () is not enough.
-        l.lock ();
-
-        if (p != progress_)
-          break;
-      }
-
-      if (p == progress_)
-      {
-        // Reactivate and fail.
-        //
-        waiting_--;
-        active_++;
-
-        // Shutting things down cleanly is tricky: we could have handled it in
-        // the scheduler (e.g., by setting a flag and then waking everyone up,
-        // similar to shutdown). But there could also be "external waiters"
-        // that have called deactivate() -- we have no way to wake those up.
-        // So for now we are going to abort (the nice thing about abort is if
-        // this is not a dependency cycle, then we have a core to examine).
-        //
-        error << "deadlock detected, can be caused by a dependency cycle" <<
-          info << "re-run with -s to diagnose dependency cycles";
-
-        std::terminate ();
-      }
+      dead_condv_.notify_one ();
     }
-#endif
   }
 
   void scheduler::
@@ -194,14 +150,14 @@ namespace build2
     //
     waiting_--;
     ready_++;
-    progress_++;
+    progress_.fetch_add (1, memory_order_relaxed);
 
     while (!shutdown_ && active_ >= max_active_)
       ready_condv_.wait (l);
 
     ready_--;
     active_++;
-    progress_++;
+    progress_.fetch_add (1, memory_order_relaxed);
 
     if (shutdown_)
       throw_generic_error (ECANCELED);
@@ -419,17 +375,23 @@ namespace build2
     stat_max_waiters_     = 0;
     stat_wait_collisions_ = 0;
 
-    progress_ = 0;
+    progress_.store (0, memory_order_relaxed);
 
     for (size_t i (0); i != wait_queue_size_; ++i)
       wait_queue_[i].shutdown = false;
 
     shutdown_ = false;
+
+    if (max_active_ != 1)
+      dead_thread_ = thread (deadlock_monitor, this);
   }
 
   void scheduler::
   tune (size_t max_active)
   {
+    // Note that if we tune a parallel scheduler to run serially, we will
+    // still have the deadlock monitoring thread running.
+
     if (max_active == 0)
       max_active = orig_max_active_;
 
@@ -503,6 +465,15 @@ namespace build2
         l.lock ();
       }
 
+      // Wait for the deadlock monitor (the only remaining thread).
+      //
+      if (orig_max_active_ != 1) // See tune() for why not max_active_.
+      {
+        l.unlock ();
+        dead_condv_.notify_one ();
+        dead_thread_.join ();
+      }
+
       // Free the memory.
       //
       wait_queue_.reset ();
@@ -783,10 +754,13 @@ namespace build2
 
         s.active_--;
 
-        // While executing the tasks a thread might have become ready.
+        // While executing the tasks a thread might have become ready
+        // (equivalent logic to deactivate()).
         //
         if (s.ready_ != 0)
           s.ready_condv_.notify_one ();
+        else if (s.active_ == 0)
+          s.dead_condv_.notify_one ();
       }
 
       // Become idle and wait for a notification.
@@ -817,4 +791,66 @@ namespace build2
     queue (tq);
     return *tq;
   }
+
+  void* scheduler::
+  deadlock_monitor (void* d)
+  {
+    scheduler& s (*static_cast<scheduler*> (d));
+
+    lock l (s.mutex_);
+    while (!s.shutdown_)
+    {
+      s.dead_condv_.wait (l);
+
+      while (s.active_ == 0 && !s.shutdown_)
+      {
+        // We may have a deadlock which can happen because of dependency
+        // cycles.
+        //
+        // Relying on the active_ count alone is not precise enough, however:
+        // some threads might be transitioning between active/waiting/ready
+        // states. Carefully accounting for this is not trivial, to say the
+        // least (especially in the face of spurious wakeups). So we are going
+        // to do a "fuzzy" deadlock detection by measuring "progress". The
+        // idea is that those transitions should be pretty short-lived and so
+        // if we wait for a few thousand context switches, then we should be
+        // able to distinguish a real deadlock from the transition case.
+        //
+        size_t op (s.progress_.load (memory_order_relaxed)), np (op);
+
+        l.unlock ();
+        for (size_t i (0); op == np && i != 10000; ++i)
+        {
+          // We don't really need consume, but let's keep it to slow things
+          // down in case yield() is a noop.
+          //
+          this_thread::yield ();
+          np = s.progress_.load (memory_order_consume);
+        }
+        l.lock ();
+
+        // Re-check active count for good measure (maybe spinning too fast).
+        //
+        if (np == op && s.active_ == 0 && !s.shutdown_)
+        {
+          // Shutting things down cleanly is tricky: we could have handled it
+          // in the scheduler (e.g., by setting a flag and then waking
+          // everyone up, similar to shutdown). But there could also be
+          // "external waiters" that have called deactivate() -- we have no
+          // way to wake those up. So for now we are going to abort (the nice
+          // thing about abort is if this is not a dependency cycle, then we
+          // have a core to examine).
+          //
+          error << "detected deadlock that could be caused by a dependency "
+                << "cycle" <<
+            info << "re-run with -s to diagnose dependency cycles" <<
+            info << "if not a dependency cycle, please report";
+
+          std::terminate ();
+        }
+      }
+    }
+
+    return nullptr;
+  }
 }
-- 
cgit v1.1