From b37f1aa6398065be806e6605a023189685669885 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Wed, 15 Feb 2017 03:55:15 +0200 Subject: Implement parallel match --- build2/context | 309 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 181 insertions(+), 128 deletions(-) (limited to 'build2/context') diff --git a/build2/context b/build2/context index 79f04a2..bc73d5b 100644 --- a/build2/context +++ b/build2/context @@ -11,35 +11,33 @@ #include #include #include +#include namespace build2 { + // Main (and only) scheduler. Started up and shut down in main(). + // + extern scheduler sched; + // In order to perform each operation the build system goes through the // following phases: // - // load - load the buildfiles - // search & match - search prerequisites and match rules - // execute - execute the matched rule - // - // The phase can only be changed during serial or exclusive execution - // (see below). + // load - load the buildfiles + // match - search prerequisites and match rules + // execute - execute the matched rule // - extern run_phase phase; - - // The build system model (internal state) is protected at the top level by - // the model mutex. During serial execution the model mutex is unlocked. + // The build system starts with a "serial load" phase and then continues + // with parallel search and execute. Match, however, can be interrupted + // both with load and execute. // - extern shared_mutex model_mutex; - - // Parallel execution always starts with acquiring a shared model lock (by - // creating model_slock; see below). Pointers to these locks are cached in - // the model_lock TLS variable (which is NULL during serial execution). + // Match can be interrupted with "exclusive load" in order to load + // additional buildfiles. Similarly, it can be interrupted with (parallel) + // execute in order to build targetd required to complete the match (for + // example, generated source code or source code generators themselves. // - // The build system starts with a "serial load" phase and then continues - // with parallel search & match and execute. Search & match, however, can be - // interrupted with an "exclusive load" by re-locking the shared lock as - // exclusive (using model_rlock below), changing the phase, and loading - // additional buildfiles. + // Such interruptions are performed by phase change that is protected by + // phase_mutex (which is also used to synchronize the state changes between + // phases). // // Serial load can perform arbitrary changes to the model. Exclusive load, // however, can only perform "island appends". That is, it can create new @@ -47,150 +45,198 @@ namespace build2 // invalidate any references to such (the idea here is that one should be // able to load additional buildfiles as long as they don't interfere with // the existing build state). The "islands" are identified by the - // load_generation number (0 for serial load). It is incremented/restored by - // phase_guard and is stored in various "nodes" (variables, etc) to allow - // modifications "within the islands". - // - // @@ MT: do we really have to hold shared lock during execute? - // @@ MT: we can also interrupt load s&m with execute -- neither handled - // nor documented. + // load_generation number (0 for initial/serial load). It is incremented in + // case of a phase switch and is stored in various "nodes" (variables, etc) + // to allow modifications "within the islands". // - extern -#ifdef __cpp_thread_local - thread_local -#else - __thread -#endif - slock* model_lock; - + extern run_phase phase; extern size_t load_generation; - struct phase_guard + // A "tri-mutex" that keeps all the threads in one of the three phases. When + // a thread wants to switch a phase, it has to wait for all the other + // threads to do the same (or release their phase locks). The load phase is + // exclusive. + // + // The interleaving match and execute is interesting: during match we read + // the "external state" (e.g., filesystem entries, modifications times, etc) + // and capture it in the "internal state" (our dependency graph). During + // execute we are modifying the external state with controlled modifications + // of the internal state to reflect the changes (e.g., update mtimes). If + // you think about it, it's pretty clear that we cannot safely perform both + // of these actions simultaneously. A good example would be running a code + // generator and header dependency extraction simultaneously: the extraction + // process may pick up headers as they are being generated. As a result, we + // either have everyone treat the external state as read-only or write-only. + // + class phase_mutex { - explicit - phase_guard (run_phase p) - : o (phase) - { - phase = p; - - if (phase == run_phase::load) - ++load_generation; - } - - ~phase_guard () - { - if (phase == run_phase::load) - --load_generation; - - phase = o; - } - run_phase o; + public: + // Acquire a phase lock potentially blocking (unless already in the + // desired phase) until switching to the desired phase is possible. + // + void + lock (run_phase); + + // Release the phase lock potentially allowing (unless there are other + // locks on this phase) switching to a different phase. + // + void + unlock (run_phase); + + // Switch from one phase to another. Semantically, just unlock() followed + // by lock() but more efficient. + // + void + relock (run_phase unlock, run_phase lock); + + private: + friend struct phase_lock; + friend struct phase_unlock; + friend struct phase_switch; + + phase_mutex (): lc_ (0), mc_ (0), ec_ (0) {phase = run_phase::load;} + + static phase_mutex instance; + + private: + // We have a counter for each phase which represents the number of threads + // in or waiting for this phase. + // + // We use condition variables to wait for a phase switch. The load phase + // is exclusive so we have a separate mutex to serialize it (think of it + // as a second level locking). + // + // When the mutex is unlocked (all three counters become zero, the phase + // is always changed to load (this is also the initial state). + // + mutex m_; + size_t lc_; + size_t mc_; + size_t ec_; + + condition_variable lv_; + condition_variable mv_; + condition_variable ev_; + + mutex lm_; }; - // A shared model lock. If there is already an instance of model_slock in - // this thread, then the new instance simply references it (asserting that - // it is locked). + // Grab a new phase lock releasing it on destruction. The lock can be + // "owning" or "referencing" (recursive). + // + // On the referencing semantics: If there is already an instance of + // phase_lock in this thread, then the new instance simply references it. // // The reason for this semantics is to support the following scheduling - // pattern: + // pattern (in actual code we use wait_guard to RAII it): // - // scheduler::atomic_count task_count (0); + // atomic_count task_count (0); // // { - // model_slock ml; // (1) + // phase_lock l (run_phase::match); // (1) // // for (...) // { // sched.async (task_count, // [] (...) // { - // model_slock ml; // (2) + // phase_lock pl (run_phase::match); // (2) // ... // }, // ...); // } // } // - // sched.wait (task_count); // (3) + // sched.wait (task_count); // (3) // // Here is what's going on here: // - // 1. We first get a shared lock "for ourselves" since after the first + // 1. We first get a phase lock "for ourselves" since after the first // iteration of the loop, things may become asynchronous (including - // attempts to relock for exclusive access and change the structure we - // are iteration upon). + // attempts to switch the phase and modify the structure we are iteration + // upon). // // 2. The task can be queued or it can be executed synchronously inside // async() (refer to the scheduler class for details on this semantics). // // If this is an async()-synchronous execution, then the task will create - // a referencing model_slock. If, however, this is a queued execution + // a referencing phase_lock. If, however, this is a queued execution // (including wait()-synchronous), then the task will create a top-level - // model_slock. + // phase_lock. // // Note that we only acquire the lock once the task starts executing // (there is no reason to hold the lock while the task is sitting in the // queue). This optimization assumes that whatever else we pass to the - // task (for example, a reference to a target) is immutable (so such a - // reference cannot become invalid). + // task (for example, a reference to a target) is stable (in other words, + // such a reference cannot become invalid). // - // 3. Before calling wait(), we release our shared lock to allow re-locking - // for exclusive access. And once wait() returns we are again running - // serially. + // 3. Before calling wait(), we release our phase lock to allow switching + // the phase. // - struct model_slock + struct phase_lock { - model_slock () - { - if (slock* l = model_lock) - assert (l->owns_lock ()); - else - model_lock = &(l_ = slock (model_mutex)); - } - - ~model_slock () - { - if (&l_ == model_lock) - model_lock = nullptr; - } - - operator slock& () {return *model_lock;} - operator const slock& () const {return *model_lock;} + explicit phase_lock (run_phase); + ~phase_lock (); - private: - slock l_; + phase_lock (phase_lock&&) = delete; + phase_lock (const phase_lock&) = delete; + + phase_lock& operator= (phase_lock&&) = delete; + phase_lock& operator= (const phase_lock&) = delete; + + run_phase p; + + static +#ifdef __cpp_thread_local + thread_local +#else + __thread +#endif + phase_lock* instance; }; - // Re-lock shared to exclusive for the lifetime or rlock. + // Assuming we have a lock on the current phase, temporarily release it + // and reacquire on destruction. // - struct model_rlock + struct phase_unlock { - model_rlock () - : sl_ (model_lock) - { - if (sl_ != nullptr) - { - sl_->unlock (); - ul_ = ulock (*sl_->mutex ()); - } - } - - ~model_rlock () - { - if (sl_ != nullptr) - { - ul_.unlock (); - sl_->lock (); - } - } - - // Can be treated as const ulock. - // - operator const ulock& () const {return ul_;} + phase_unlock (bool unlock = true); + ~phase_unlock (); - private: - slock* sl_; - ulock ul_; + phase_lock* l; + }; + + // Assuming we have a lock on the current phase, temporarily switch to a + // new phase and switch back on destruction. + // + struct phase_switch + { + explicit phase_switch (run_phase); + ~phase_switch (); + + run_phase o, n; + }; + + // Wait for a task count optionally and temporarily unlocking the phase. + // + struct wait_guard + { + ~wait_guard () noexcept (false); + + explicit + wait_guard (atomic_count& task_count, + bool phase = false); + + wait_guard (size_t start_count, + atomic_count& task_count, + bool phase = false); + + void + wait (); + + size_t start_count; + atomic_count* task_count; + bool phase; }; // Cached variables. @@ -218,23 +264,36 @@ namespace build2 extern const meta_operation_info* current_mif; extern const operation_info* current_inner_oif; extern const operation_info* current_outer_oif; + extern size_t current_on; // Current operation number (1-based) in the + // meta-operation batch. + extern execution_mode current_mode; + // Total number of dependency relationships in the current action. Together + // with the target::dependents count it is incremented during the rule + // search & match phase and is decremented during execution with the + // expectation of it reaching 0. Used as a sanity check. + // + extern atomic_count dependency_count; + inline void set_current_mif (const meta_operation_info& mif) { - current_mif = &mif; current_mname = &mif.name; + current_mif = &mif; + current_on = 0; // Reset. } inline void set_current_oif (const operation_info& inner_oif, const operation_info* outer_oif = nullptr) { + current_oname = &(outer_oif == nullptr ? inner_oif : *outer_oif).name; current_inner_oif = &inner_oif; current_outer_oif = outer_oif; - current_oname = &(outer_oif == nullptr ? inner_oif : *outer_oif).name; + current_on++; current_mode = inner_oif.mode; + dependency_count.store (0, memory_order_relaxed); // Serial. } // Keep going flag. @@ -245,14 +304,6 @@ namespace build2 // extern bool keep_going; - // Total number of dependency relationships in the current action. - // Together with the target::dependents count it is incremented - // during the rule search & match phase and is decremented during - // execution with the expectation of it reaching 0. Used as a sanity - // check. - // - extern atomic_count dependency_count; - // Reset the build state. In particular, this removes all the targets, // scopes, and variables. // @@ -343,4 +394,6 @@ namespace build2 } } +#include + #endif // BUILD2_CONTEXT -- cgit v1.1