Add scheduler::shard_size() helper function

author: Boris Kolpackov <boris@codesynthesis.com> 2017-02-13 09:26:11 +0200
committer: Boris Kolpackov <boris@codesynthesis.com> 2017-02-13 12:42:42 +0200
commit: 38290dacd8faab166774d757a1e09807e57e9ba5 (patch)
tree: bff156e0bf4c906828800b35c32042f69a5ebd1b /build2
parent: 62f962ec68435f6acade5769335153ffb90aad69 (diff)
2 files changed, 61 insertions, 46 deletions
diff --git a/build2/scheduler b/build2/scheduler
index c487b88..430fdf2 100644
--- a/build2/scheduler
+++ b/build2/scheduler
@@ -241,6 +241,16 @@ namespace build2
       return std::thread::hardware_concurrency ();
     }
 
+    // Return a prime number that can be used as a lock shard size that's
+    // appropriate for the scheduler's concurrency. Use power of two values
+    // for mul for higher-contention shards and for div for lower-contention
+    // ones. Always return 1 for serial execution.
+    //
+    // Note: can only be called from threads that have observed startup.
+    //
+    size_t
+    shard_size (size_t mul = 1, size_t div = 1) const;
+
   private:
     using lock = std::unique_lock<std::mutex>;
 
@@ -352,7 +362,7 @@ namespace build2
       bool shutdown = true;
     };
 
-    size_t wait_queue_size_; // Multiple of max_threads.
+    size_t wait_queue_size_; // Proportional to max_threads.
     unique_ptr<wait_slot[]> wait_queue_;
 
     // Task queue.
diff --git a/build2/scheduler.cxx b/build2/scheduler.cxx
index 308310e..1793ab2 100644
--- a/build2/scheduler.cxx
+++ b/build2/scheduler.cxx
@@ -148,6 +148,55 @@ namespace build2
     try { shutdown (); } catch (system_error&) {}
   }
 
+  size_t scheduler::
+  shard_size (size_t mul, size_t div) const
+  {
+    size_t n (max_threads_ == 1 ? 0 : max_threads_ * mul / div / 2);
+
+    // Experience shows that we want something close to 2x for small numbers,
+    // then reduce to 1.5x in-between, and 1x for large ones.
+    //
+    // Note that Intel Xeons are all over the map when it comes to cores (6,
+    // 8, 10, 12, 14, 16, 18, 20, 22).
+    //
+    return              // HW threads x arch-bits (see max_threads below)
+      n ==   0 ?    1 : // serial
+      //
+      // 2x
+      //
+      n ==   1 ?    3 :
+      n ==   2 ?    5 :
+      n ==   4 ?   11 :
+      n ==   6 ?   13 :
+      n ==   8 ?   17 : // 2 x 4
+      n ==  16 ?   31 : // 4 x 4, 2 x 8
+      //
+      // 1.5x
+      //
+      n ==  32 ?   47 : // 4 x 8
+      n ==  48 ?   53 : // 6 x 8
+      n ==  64 ?   67 : // 8 x 8
+      n ==  80 ?   89 : // 10 x 8
+      //
+      // 1x
+      //
+      n ==  96 ?  101 : // 12 x 8
+      n == 112 ?  127 : // 14 x 8
+      n == 128 ?  131 : // 16 x 8
+      n == 144 ?  139 : // 18 x 8
+      n == 160 ?  157 : // 20 x 8
+      n == 176 ?  173 : // 22 x 8
+      n == 192 ?  191 : // 24 x 8
+      n == 224 ?  223 : // 28 x 8
+      n == 256 ?  251 : // 32 x 8
+      n == 288 ?  271 : // 36 x 8
+      n == 320 ?  313 : // 40 x 8
+      n == 352 ?  331 : // 44 x 8
+      n == 384 ?  367 : // 48 x 8
+      n == 512 ?  499 : // 64 x 8
+      n - 1;            // Assume it is even.
+  }
+
   void scheduler::
   startup (size_t max_active,
            size_t init_active,
@@ -187,51 +236,7 @@ namespace build2
 
     queued_task_count_.store (0, memory_order_relaxed);
 
-    // Pick a nice prime for common max_threads/2 numbers. Experience shows
-    // that we want something close to 2x for small numbers, then reduce to
-    // 1.5x in-between, and 1x for large ones.
-    //
-    // Note that Intel Xeons are all over the map when it comes to cores (6,
-    // 8, 10, 12, 14, 16, 18, 20, 22).
-    //
-    {
-      size_t n (max_threads / 2);
-
-      wait_queue_size_ =            // HW threads x bits
-        n == 0  ?     0 : // serial
-        //
-        // 2x
-        //
-        n ==   8 ?   17 : // 2 x 4
-        n ==  16 ?   31 : // 4 x 4, 2 x 8
-        //
-        // 1.5x
-        //
-        n ==  32 ?   47 : // 4 x 8
-        n ==  48 ?   53 : // 6 x 8
-        n ==  64 ?   67 : // 8 x 8
-        n ==  80 ?   89 : // 10 x 8
-        //
-        // 1x
-        //
-        n ==  96 ?  101 : // 12 x 8
-        n == 112 ?  127 : // 14 x 8
-        n == 128 ?  131 : // 16 x 8
-        n == 144 ?  139 : // 18 x 8
-        n == 160 ?  157 : // 20 x 8
-        n == 176 ?  173 : // 22 x 8
-        n == 192 ?  191 : // 24 x 8
-        n == 224 ?  223 : // 28 x 8
-        n == 256 ?  251 : // 32 x 8
-        n == 288 ?  271 : // 36 x 8
-        n == 320 ?  313 : // 40 x 8
-        n == 352 ?  331 : // 44 x 8
-        n == 384 ?  367 : // 48 x 8
-        n == 512 ?  499 : // 64 x 8
-        n - 1;            // Assume it is even.
-    }
-
-    if (wait_queue_size_ != 0)
+    if ((wait_queue_size_ = max_threads == 1 ? 0 : shard_size ()) != 0)
       wait_queue_.reset (new wait_slot[wait_queue_size_]);
 
     // Reset stats counters.
author	Boris Kolpackov <boris@codesynthesis.com>	2017-02-13 09:26:11 +0200
committer	Boris Kolpackov <boris@codesynthesis.com>	2017-02-13 12:42:42 +0200
commit	38290dacd8faab166774d757a1e09807e57e9ba5 (patch)
tree	bff156e0bf4c906828800b35c32042f69a5ebd1b /build2
parent	62f962ec68435f6acade5769335153ffb90aad69 (diff)