From 052dc48939a063b19a13c10cb2c735b4b06a4c4b Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 13 Dec 2016 12:30:14 +0200 Subject: Various scheduler improvements and fixes --- build2/scheduler.cxx | 129 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 83 insertions(+), 46 deletions(-) (limited to 'build2/scheduler.cxx') diff --git a/build2/scheduler.cxx b/build2/scheduler.cxx index b62d2d4..4619e85 100644 --- a/build2/scheduler.cxx +++ b/build2/scheduler.cxx @@ -4,6 +4,8 @@ #include +#include + using namespace std; namespace build2 @@ -16,27 +18,14 @@ namespace build2 // See if we can run some of our own tasks. // - task_queue& tq (*task_queue_); // Must have been initializied by async(). + task_queue& tq (*task_queue_); // Must have been set by async() or task + // would have been 0. for (lock ql (tq.mutex); !tq.shutdown && !empty_back (tq); ) - { - // Save the old stop point and set the new one in case the task we are - // about to run adds sub-tasks. - // - size_t stop (tq.stop); - tq.stop = tq.tail - 1; // Index of the first sub-task to be added (-1 - // is for the following pop_back()). - - pop_back (tq, ql); // Releases the lock. - ql.lock (); - - // Restore the old stop point which we might have to adjust. - // - tq.stop = tq.head > stop ? tq.head : tq.tail < stop ? tq.tail : stop; - } + pop_back (tq, ql); - // Note that empty task queue doesn't automatically mean the task count is - // zero (some might still be executing asynchronously). + // Note that empty task queue doesn't automatically mean the task count + // is zero (some might still be executing asynchronously). // if (task_count == 0) return; @@ -135,8 +124,16 @@ namespace build2 } void scheduler:: - startup (size_t max_active, size_t init_active, size_t max_threads) + startup (size_t max_active, + size_t init_active, + size_t max_threads, + size_t queue_depth) { + // Lock the mutex to make sure our changes are visible in (other) active + // threads. + // + lock l (mutex_); + // Use 4x max_active on 32-bit and 8x max_active on 64-bit. Unless we were // asked to run serially. // @@ -153,30 +150,53 @@ namespace build2 max_threads_ = max_threads; // This value should be proportional to the amount of hardware concurrency - // we have. Note that the queue entry is quite sizable. + // we have (no use queing things if helpers cannot keep up). Note that the + // queue entry is quite sizable. // - task_queue_depth_ = max_active * sizeof (void*) * 4; + task_queue_depth_ = queue_depth != 0 + ? queue_depth + : max_active * sizeof (void*) * 2; - task_queues_.clear (); task_queues_.reserve (max_threads_); - // Pick a nice prime for common max_threads numbers. Though Intel Xeons - // are all over the map when it comes to cores (6, 8, 10, 12, 14, 16, - // 18, 20, 22). + // Pick a nice prime for common max_threads numbers. Experience shows that + // we want something close to 2x for small numbers, then reduce to 1.5x + // in-between, and 1x for large ones. + // + // Note that Intel Xeons are all over the map when it comes to cores (6, + // 8, 10, 12, 14, 16, 18, 20, 22). // wait_queue_size_ = // HW threads x bits + // + // 2x + // max_threads == 8 ? 17 : // 2 x 4 max_threads == 16 ? 31 : // 4 x 4, 2 x 8 - max_threads == 32 ? 63 : // 4 x 8 - max_threads == 48 ? 97 : // 6 x 8 - max_threads == 64 ? 127 : // 8 x 8 - max_threads == 96 ? 191 : // 12 x 8 - max_threads == 128 ? 257 : // 16 x 8 - max_threads == 192 ? 383 : // 24 x 8 - max_threads == 256 ? 509 : // 32 x 8 - max_threads == 384 ? 769 : // 48 x 8 - max_threads == 512 ? 1021 : // 64 x 8 - 2 * max_threads - 1; + // + // 1.5x + // + max_threads == 32 ? 47 : // 4 x 8 + max_threads == 48 ? 53 : // 6 x 8 + max_threads == 64 ? 67 : // 8 x 8 + max_threads == 80 ? 89 : // 10 x 8 + // + // 1x + // + max_threads == 96 ? 101 : // 12 x 8 + max_threads == 112 ? 127 : // 14 x 8 + max_threads == 128 ? 131 : // 16 x 8 + max_threads == 144 ? 139 : // 18 x 8 + max_threads == 160 ? 157 : // 20 x 8 + max_threads == 176 ? 173 : // 22 x 8 + max_threads == 192 ? 191 : // 24 x 8 + max_threads == 224 ? 223 : // 28 x 8 + max_threads == 256 ? 251 : // 32 x 8 + max_threads == 288 ? 271 : // 36 x 8 + max_threads == 320 ? 313 : // 40 x 8 + max_threads == 352 ? 331 : // 44 x 8 + max_threads == 384 ? 367 : // 48 x 8 + max_threads == 512 ? 499 : // 64 x 8 + max_threads - 1; // Assume max_threads is even. wait_queue_.reset (new wait_slot[wait_queue_size_]); @@ -249,6 +269,11 @@ namespace build2 l.lock (); } + // Free the memory. + // + wait_queue_.reset (); + task_queues_.clear (); + r.thread_max_active = max_active_; r.thread_max_total = max_threads_; r.thread_max_waiting = stat_max_waiters_; @@ -281,7 +306,7 @@ namespace build2 starting_++; l.unlock (); - // Restore the counter if the thread creation fails. + // Restore the counters if the thread creation fails. // struct guard { @@ -338,10 +363,8 @@ namespace build2 { task_queue& tq (*s.task_queues_[i]); - for (lock ql (tq.mutex); - !tq.shutdown && !s.empty_front (tq); - ql.lock ()) - s.pop_front (tq, ql); // Releases the lock. + for (lock ql (tq.mutex); !tq.shutdown && !s.empty_front (tq); ) + s.pop_front (tq, ql); } l.lock (); @@ -367,16 +390,30 @@ namespace build2 s.helpers_--; } - thread_local scheduler::task_queue* scheduler::task_queue_ = nullptr; +#if defined(__apple_build_version__) && __apple_build_version__ < 8000000 + __thread +#else + thread_local +#endif + scheduler::task_queue* scheduler::task_queue_ = nullptr; auto scheduler:: create_queue () -> task_queue& { - lock l (mutex_); - task_queues_.push_back (make_unique (task_queue_depth_)); - task_queue_ = task_queues_.back ().get (); - task_queue_->shutdown = shutdown_; - return *task_queue_; + // Note that task_queue_depth is immutable between startup() and + // shutdown() (but see join()). + // + unique_ptr tqp (new task_queue (task_queue_depth_)); + task_queue& tq (*tqp); + + { + lock l (mutex_); + tq.shutdown = shutdown_; + task_queues_.push_back (move (tqp)); + } + + task_queue_ = &tq; + return tq; } scheduler sched; -- cgit v1.1