From 23cbbc7f7cdcb7721d8d1cf4e70ae58184f8cc7e Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 9 Jun 2020 08:19:33 +0200 Subject: Factor ad hoc C++ and Buildscript rules into separate files --- libbuild2/parser.cxx | 5 +- libbuild2/rule-adhoc-buildscript.cxx | 616 +++++++++++++++++ libbuild2/rule-adhoc-buildscript.hxx | 56 ++ libbuild2/rule-adhoc-cxx.cxx | 640 ++++++++++++++++++ libbuild2/rule-adhoc-cxx.hxx | 83 +++ libbuild2/rule.cxx | 1225 ---------------------------------- libbuild2/rule.hxx | 105 --- 7 files changed, 1399 insertions(+), 1331 deletions(-) create mode 100644 libbuild2/rule-adhoc-buildscript.cxx create mode 100644 libbuild2/rule-adhoc-buildscript.hxx create mode 100644 libbuild2/rule-adhoc-cxx.cxx create mode 100644 libbuild2/rule-adhoc-cxx.hxx diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx index f22f13a..e6a1c6a 100644 --- a/libbuild2/parser.cxx +++ b/libbuild2/parser.cxx @@ -20,6 +20,9 @@ #include #include +#include +#include + #include // lookup_config using namespace std; @@ -1134,7 +1137,7 @@ namespace build2 { // Buildscript // - ar.reset (new adhoc_script_rule (loc, st.value.size ())); + ar.reset (new adhoc_buildscript_rule (loc, st.value.size ())); } else if (icasecmp (*lang, "c++") == 0) { diff --git a/libbuild2/rule-adhoc-buildscript.cxx b/libbuild2/rule-adhoc-buildscript.cxx new file mode 100644 index 0000000..1555c71 --- /dev/null +++ b/libbuild2/rule-adhoc-buildscript.cxx @@ -0,0 +1,616 @@ +// file : libbuild2/rule-adhoc-buildscript.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include // attributes + +#include +#include + +using namespace std; + +namespace build2 +{ + bool adhoc_buildscript_rule:: + recipe_text (context& ctx, const target& tg, string&& t, attributes& as) + { + // Handle and erase recipe-specific attributes. + // + optional diag; + for (auto i (as.begin ()); i != as.end (); ) + { + attribute& a (*i); + const string& n (a.name); + + if (n == "diag") + try + { + diag = convert (move (a.value)); + } + catch (const invalid_argument& e) + { + fail (as.loc) << "invalid " << n << " attribute value: " << e; + } + else + { + ++i; + continue; + } + + i = as.erase (i); + } + + checksum = sha256 (t).string (); + + istringstream is (move (t)); + build::script::parser p (ctx); + + script = p.pre_parse (tg, + is, loc.file, loc.line + 1, + move (diag), as.loc); + + return false; + } + + void adhoc_buildscript_rule:: + dump_attributes (ostream& os) const + { + // For now we dump it as an attribute whether it was specified or derived + // from the script. Maybe that's ok (we use this in tests)? + // + if (script.diag_name) + { + os << " ["; + os << "diag="; + to_stream (os, name (*script.diag_name), true /* quote */, '@'); + os << ']'; + } + } + + void adhoc_buildscript_rule:: + dump_text (ostream& os, string& ind) const + { + os << ind << string (braces, '{') << endl; + ind += " "; + + if (script.depdb_clear) + os << ind << "depdb clear" << endl; + + script::dump (os, ind, script.depdb_lines); + + if (script.diag_line) + { + os << ind; script::dump (os, *script.diag_line, true /* newline */); + } + + script::dump (os, ind, script.lines); + ind.resize (ind.size () - 2); + os << ind << string (braces, '}'); + } + + bool adhoc_buildscript_rule:: + match (action a, target& t, const string&, optional fb) const + { + if (!fb) + ; + // If this is clean for a file target and we are supplying the update, + // then we will also supply the standard clean. + // + else if (a == perform_clean_id && + *fb == perform_update_id && + t.is_a ()) + ; + else + return false; + + // It's unfortunate we have to resort to this but we need to remember this + // in apply(). + // + t.data (fb.has_value ()); + + return true; + } + + recipe adhoc_buildscript_rule:: + apply (action a, target& t) const + { + // If this is an outer operation (e.g., update-for-test), then delegate to + // the inner. + // + if (a.outer ()) + { + match_inner (a, t); + return execute_inner; + } + + // Derive file names for the target and its ad hoc group members, if any. + // + if (a == perform_update_id || a == perform_clean_id) + { + for (target* m (&t); m != nullptr; m = m->adhoc_member) + { + if (auto* p = m->is_a ()) + p->derive_path (); + } + } + + // Inject dependency on the output directory. + // + // We do it always instead of only if one of the targets is path-based in + // case the recipe creates temporary files or some such. + // + inject_fsdir (a, t); + + // Match prerequisites. + // + match_prerequisite_members (a, t); + + // See if we are providing the standard clean as a fallback. + // + if (t.data ()) + return &perform_clean_depdb; + + if (a == perform_update_id && t.is_a ()) + { + return [this] (action a, const target& t) + { + return perform_update_file (a, t); + }; + } + else + { + return [this] (action a, const target& t) + { + return default_action (a, t); + }; + } + } + + target_state adhoc_buildscript_rule:: + perform_update_file (action a, const target& xt) const + { + tracer trace ("adhoc_buildscript_rule::perform_update_file"); + + context& ctx (xt.ctx); + + const file& t (xt.as ()); + const path& tp (t.path ()); + + // How should we hash target and prerequisite sets ($> and $<)? We could + // hash them as target names (i.e., the same as the $>/< content) or as + // paths (only for path-based targets). While names feel more general, + // they are also more expensive to compute. And for path-based targets, + // path is generally a good proxy for the target name. Since the bulk of + // the ad hoc recipes will presumably be operating exclusively on + // path-based targets, let's do it both ways. + // + auto hash_target = [ns = names ()] (sha256& cs, const target& t) mutable + { + if (const path_target* pt = t.is_a ()) + cs.append (pt->path ().string ()); + else + { + ns.clear (); + t.as_name (ns); + for (const name& n: ns) + to_checksum (cs, n); + } + }; + + // Update prerequisites and determine if any of them render this target + // out-of-date. + // + timestamp mt (t.load_mtime ()); + optional ps; + + sha256 pcs, ecs; + { + // This is essentially ps=execute_prerequisites(a, t, mt) which we + // cannot use because we need to see ad hoc prerequisites. + // + size_t busy (ctx.count_busy ()); + size_t exec (ctx.count_executed ()); + + target_state rs (target_state::unchanged); + + wait_guard wg (ctx, busy, t[a].task_count); + + for (const target*& pt: t.prerequisite_targets[a]) + { + if (pt == nullptr) // Skipped. + continue; + + target_state s (execute_async (a, *pt, busy, t[a].task_count)); + + if (s == target_state::postponed) + { + rs |= s; + pt = nullptr; + } + } + + wg.wait (); + + bool e (mt == timestamp_nonexistent); + for (prerequisite_target& p: t.prerequisite_targets[a]) + { + if (p == nullptr) + continue; + + const target& pt (*p.target); + + const auto& tc (pt[a].task_count); + if (tc.load (memory_order_acquire) >= busy) + ctx.sched.wait (exec, tc, scheduler::work_none); + + target_state s (pt.executed_state (a)); + rs |= s; + + // Compare our timestamp to this prerequisite's. + // + if (!e) + { + // If this is an mtime-based target, then compare timestamps. + // + if (const mtime_target* mpt = pt.is_a ()) + { + if (mpt->newer (mt, s)) + e = true; + } + else + { + // Otherwise we assume the prerequisite is newer if it was + // changed. + // + if (s == target_state::changed) + e = true; + } + } + + if (p.adhoc) + p.target = nullptr; // Blank out. + + // As part of this loop calculate checksums that need to include ad + // hoc prerequisites (unless the script tracks changes itself). + // + if (script.depdb_clear) + continue; + + hash_target (pcs, pt); + + // The script can reference a program in one of four ways: + // + // 1. As an (imported) target (e.g., $cli) + // + // 2. As a process_path_ex (e.g., $cxx.path). + // + // 3. As a builtin (e.g., sed) + // + // 4. As a program path/name. + // + // When it comes to change tracking, there is nothing we can do for + // (4) and there is nothing to do for (3) (assuming builtin semantics + // is stable/backwards-compatible). The (2) case is handled + // automatically by hashing all the variable values referenced by the + // script (see below), which in case of process_path_ex includes the + // checksum, if available. + // + // This leaves the (1) case, which itself splits into two sub-cases: + // the target comes with the dependency information (e.g., imported + // from a project via an export stub) or it does not (e.g., imported + // as installed). We don't need to do anything extra for the first + // sub-case since the target's state/mtime can be relied upon like any + // other prerequisite. Which cannot be said about the second sub-case, + // where we reply on checksum that may be included as part of the + // target metadata. + // + // So what we are going to do is hash checksum metadata of every + // executable prerequisite target that has it (we do it here in order + // to include ad hoc prerequisites, which feels like the right thing + // to do; the user may mark tools as ad hoc in order to omit them from + // $<). + // + if (auto* e = pt.is_a ()) + { + if (auto* c = e->lookup_metadata ("checksum")) + { + ecs.append (*c); + } + } + } + + if (!e) + ps = rs; + } + + bool update (!ps); + + // We use depdb to track changes to the script itself, input/output file + // names, tools, etc. + // + depdb dd (tp + ".d"); + + // First should come the rule name/version. + // + if (dd.expect (" 1") != nullptr) + l4 ([&]{trace << "rule mismatch forcing update of " << t;}); + + // Then the script checksum. + // + // Ideally, to detect changes to the script semantics, we would hash the + // text with all the variables expanded but without executing any + // commands. In practice, this is easier said than done (think the set + // builtin that receives output of a command that modifies the + // filesystem). + // + // So as the next best thing we are going to hash the unexpanded text as + // well as values of all the variables expanded in it (which we get as a + // side effect of pre-parsing the script). This approach has a number of + // drawbacks: + // + // - We can't handle computed variable names (e.g., $($x ? X : Y)). + // + // - We may "overhash" by including variables that are actually + // script-local. + // + // - There are functions like $install.resolve() with result based on + // external (to the script) information. + // + if (dd.expect (checksum) != nullptr) + l4 ([&]{trace << "recipe text change forcing update of " << t;}); + + // Track the variables, targets, and prerequisites changes, unless the + // script doesn't track the dependency changes itself. + // + + // For each variable hash its name, undefined/null/non-null indicator, + // and the value if non-null. + // + // Note that this excludes the special $< and $> variables which we + // handle below. + // + if (!script.depdb_clear) + { + sha256 cs; + names storage; + + for (const string& n: script.vars) + { + cs.append (n); + + lookup l; + + if (const variable* var = ctx.var_pool.find (n)) + l = t[var]; + + cs.append (!l.defined () ? '\x1' : l->null ? '\x2' : '\x3'); + + if (l) + { + storage.clear (); + names_view ns (reverse (*l, storage)); + + for (const name& n: ns) + to_checksum (cs, n); + } + } + + if (dd.expect (cs.string ()) != nullptr) + l4 ([&]{trace << "recipe variable change forcing update of " << t;}); + } + + // Target and prerequisite sets ($> and $<). + // + if (!script.depdb_clear) + { + auto hash = [ns = names ()] (sha256& cs, const target& t) mutable + { + if (const path_target* pt = t.is_a ()) + cs.append (pt->path ().string ()); + else + { + ns.clear (); + t.as_name (ns); + for (const name& n: ns) + to_checksum (cs, n); + } + }; + + sha256 tcs; + for (const target* m (&t); m != nullptr; m = m->adhoc_member) + hash_target (tcs, *m); + + if (dd.expect (tcs.string ()) != nullptr) + l4 ([&]{trace << "target set change forcing update of " << t;}); + + if (dd.expect (pcs.string ()) != nullptr) + l4 ([&]{trace << "prerequisite set change forcing update of " << t;}); + } + + // Finally the programs checksum. + // + if (!script.depdb_clear) + { + if (dd.expect (ecs.string ()) != nullptr) + l4 ([&]{trace << "program checksum change forcing update of " << t;}); + } + + const scope* bs (nullptr); + const scope* rs (nullptr); + + // Execute the custom dependency change tracking commands, if present. + // + if (!script.depdb_lines.empty ()) + { + bs = &t.base_scope (); + rs = bs->root_scope (); + + // While it would have been nice to reuse the environment for both + // dependency tracking and execution, there are complications (creating + // temporary directory, etc). + // + build::script::environment e (a, t, false /* temp_dir */); + build::script::parser p (ctx); + + for (const script::line& l: script.depdb_lines) + { + names ns (p.execute_special (*rs, *bs, e, l)); + + // These should have been enforced during pre-parsing. + // + assert (!ns.empty ()); // ... + assert (l.tokens.size () > 2); // 'depdb' ... + + const string& cmd (ns[0].value); + + location loc (l.tokens[0].location ()); + + if (cmd == "hash") + { + sha256 cs; + for (auto i (ns.begin () + 1); i != ns.end (); ++i) // Skip . + to_checksum (cs, *i); + + if (dd.expect (cs.string ()) != nullptr) + l4 ([&] { + diag_record dr (trace); + dr << "'depdb hash' argument change forcing update of " << t << + info (loc); script::dump (dr.os, l); + }); + } + else if (cmd == "string") + { + string s; + try + { + s = convert (names (make_move_iterator (ns.begin () + 1), + make_move_iterator (ns.end ()))); + } + catch (const invalid_argument& e) + { + fail (l.tokens[2].location ()) + << "invalid 'depdb string' argument: " << e; + } + + if (dd.expect (s) != nullptr) + l4 ([&] { + diag_record dr (trace); + dr << "'depdb string' argument change forcing update of " + << t << + info (loc); script::dump (dr.os, l); + }); + } + else + assert (false); + } + } + + // Update if depdb mismatch. + // + if (dd.writing () || dd.mtime > mt) + update = true; + + dd.close (); + + // If nothing changed, then we are done. + // + if (!update) + return *ps; + + if (!ctx.dry_run || verb != 0) + { + if (bs == nullptr) + { + bs = &t.base_scope (); + rs = bs->root_scope (); + } + + build::script::environment e (a, t, script.temp_dir); + build::script::parser p (ctx); + + if (verb == 1) + { + if (script.diag_line) + { + text << p.execute_special (*rs, *bs, e, *script.diag_line); + } + else + { + // @@ TODO (and below): + // + // - we are printing target, not source (like in most other places) + // + // - printing of ad hoc target group (the {hxx cxx}{foo} idea) + // + // - if we are printing prerequisites, should we print all of them + // (including tools)? + // + text << *script.diag_name << ' ' << t; + } + } + + if (!ctx.dry_run || verb >= 2) + { + build::script::default_runner r; + p.execute (*rs, *bs, e, script, r); + + if (!ctx.dry_run) + dd.check_mtime (tp); + } + } + + t.mtime (system_clock::now ()); + return target_state::changed; + } + + target_state adhoc_buildscript_rule:: + default_action (action a, const target& t) const + { + tracer trace ("adhoc_buildscript_rule::default_action"); + + context& ctx (t.ctx); + + execute_prerequisites (a, t); + + if (!ctx.dry_run || verb != 0) + { + const scope& bs (t.base_scope ()); + const scope& rs (*bs.root_scope ()); + + build::script::environment e (a, t, script.temp_dir); + build::script::parser p (ctx); + + if (verb == 1) + { + if (script.diag_line) + { + text << p.execute_special (rs, bs, e, *script.diag_line); + } + else + { + // @@ TODO: as above + // + text << *script.diag_name << ' ' << t; + } + } + + if (!ctx.dry_run || verb >= 2) + { + build::script::default_runner r; + p.execute (rs, bs, e, script, r); + } + } + + return target_state::changed; + } +} diff --git a/libbuild2/rule-adhoc-buildscript.hxx b/libbuild2/rule-adhoc-buildscript.hxx new file mode 100644 index 0000000..5f10ef4 --- /dev/null +++ b/libbuild2/rule-adhoc-buildscript.hxx @@ -0,0 +1,56 @@ +// file : libbuild2/rule-adhoc-buildscript.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_RULE_ADHOC_BUILDSCRIPT_HXX +#define LIBBUILD2_RULE_ADHOC_BUILDSCRIPT_HXX + +#include +#include +#include + +#include + +#include + +namespace build2 +{ + // Ad hoc buildscript rule. + // + // Note: not exported and should not be used directly (i.e., registered). + // + class adhoc_buildscript_rule: public adhoc_rule + { + public: + virtual bool + match (action, target&, const string&, optional) const override; + + virtual recipe + apply (action, target&) const override; + + target_state + perform_update_file (action, const target&) const; + + target_state + default_action (action, const target&) const; + + adhoc_buildscript_rule (const location& l, size_t b) + : adhoc_rule ("", l, b) {} + + virtual bool + recipe_text (context&, const target&, string&&, attributes&) override; + + virtual void + dump_attributes (ostream&) const override; + + virtual void + dump_text (ostream&, string&) const override; + + public: + using script_type = build::script::script; + + script_type script; + string checksum; // Script text hash. + }; +} + +#endif // LIBBUILD2_RULE_ADHOC_BUILDSCRIPT_HXX diff --git a/libbuild2/rule-adhoc-cxx.cxx b/libbuild2/rule-adhoc-cxx.cxx new file mode 100644 index 0000000..098cf4f --- /dev/null +++ b/libbuild2/rule-adhoc-cxx.cxx @@ -0,0 +1,640 @@ +// file : libbuild2/rule-adhoc-cxx.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include // file_time() + +#include +#include +#include +#include +#include +#include + +using namespace butl; + +namespace build2 +{ + // cxx_rule_v1 + // + bool cxx_rule_v1:: + match (action, target&, const string&) const + { + return true; + } + + // adhoc_cxx_rule + // + adhoc_cxx_rule:: + adhoc_cxx_rule (const location& l, size_t b, uint64_t v, optional s) + : adhoc_rule ("", l, b), + version (v), + separator (move (s)), + impl (nullptr) + { + if (v != 1) + fail (l) << "unsupported c++ recipe version " << v; + } + + bool adhoc_cxx_rule:: + recipe_text (context&, const target&, string&& t, attributes&) + { + code = move (t); + return true; + } + + adhoc_cxx_rule:: + ~adhoc_cxx_rule () + { + delete impl.load (memory_order_relaxed); // Serial execution. + } + + void adhoc_cxx_rule:: + dump_text (ostream& os, string& ind) const + { + // @@ TODO: indentation is multi-line recipes is off (would need to insert + // indentation after every newline). + // + os << ind << string (braces, '{') << " c++ " << version << endl + << ind << code + << ind << string (braces, '}'); + } + + // From module.cxx. + // + void + create_module_context (context&, const location&); + + const target& + update_in_module_context (context&, const scope&, names tgt, + const location&, const path& bf); + + pair + load_module_library (const path& lib, const string& sym, string& err); + + bool adhoc_cxx_rule:: + match (action a, target& t, const string& hint) const + { + tracer trace ("adhoc_cxx_rule::match"); + + context& ctx (t.ctx); + const scope& rs (t.root_scope ()); + + // The plan is to reduce this to the build system module case as much as + // possible. Specifically, we switch to the load phase, create a module- + // like library with the recipe text as a rule implementation, then build + // and load it. + // + // Since the recipe can be shared among multiple targets, several threads + // can all be trying to do this in parallel. + // + // We use the relaxed memory order here because any change must go through + // the serial load phase. In other words, all we need here is atomicity + // with ordering/visibility provided by the phase mutex. + // + cxx_rule* impl (this->impl.load (memory_order_relaxed)); + + while (impl == nullptr) // Breakout loop. + { + // Switch the phase to (serial) load and re-check. + // + phase_switch ps (ctx, run_phase::load); + + if ((impl = this->impl.load (memory_order_relaxed)) != nullptr) + break; + + using create_function = cxx_rule_v1* (const location&, target_state); + using load_function = create_function* (); + + // The only way to guarantee that the name of our module matches its + // implementation is to based the name on the implementation hash (plus + // the language, in case we support other compiled implementations in + // the future). + // + // Unfortunately, this means we will be creating a new project (and + // leaving behind the old one as garbage) for every change to the + // recipe. On the other hand, if the recipe is moved around unchanged, + // we will reuse the same project. In fact, two different recipes (e.g., + // in different buildfiles) with the same text will share the project. + // + // The fact that we don't incorporate the recipe location into the hash + // but include it in the source (in the form of the #line directive; see + // below) has its own problems. If we do nothing extra here, then if a + // "moved" but otherwise unchanged recipe is updated (for example, + // because of changes in the build system core), then we may end up with + // bogus location in the diagnostics. + // + // The straightforward solution would be to just update the location in + // the source code if it has changed. This, however, will lead to + // unnecessary and probably surprising recompilations since any line + // count change before the recipe will trigger this update. One key + // observation here is that we need accurate location information only + // if we are going to recompile the recipe but the change to location + // itself does not render the recipe out of date. So what we going to do + // is factor the location information into its own small header and then + // keep it up-to-date without changing its modification time. + // + // This works well if the project is not shared by multiple recipes. + // However, if we have recipes in several buildfiles with identical + // text, then the location information may end up yo-yo'ing depending on + // which recipe got here first. + // + // There doesn't seem to be much we can do about it without incurring + // other drawbacks/overheads. So the answer is for the user to use an ad + // hoc rule with the common implementation instead of a bunch of + // duplicate recipes. + // + string id; + { + sha256 cs; + cs.append ("c++"); + cs.append (separator ? *separator : ""); + cs.append (code); + id = cs.abbreviated_string (12); + } + + dir_path pd (rs.out_path () / + rs.root_extra->build_dir / + recipes_build_dir /= id); + + path bf (pd / std_buildfile_file); + + string sym ("load_" + id); + + // Check whether the file exists and its last line matches the specified + // signature. + // + // Note: we use the last instead of the first line for extra protection + // against incomplete writes. + // + auto check_sig = [] (const path& f, const string& s) -> bool + { + try + { + if (!file_exists (f)) + return false; + + ifdstream ifs (f); + + string l; + while (ifs.peek () != ifdstream::traits_type::eof ()) + getline (ifs, l); + + return l == s; + } + catch (const io_error& e) + { + fail << "unable to read " << f << ": " << e << endf; + } + catch (const system_error& e) + { + fail << "unable to access " << f << ": " << e << endf; + } + }; + + // Calculate (and cache) the global/local fragments split. + // + struct fragments + { + size_t global_p; // Start position. + size_t global_n; // Length (0 if no global fragment). + location global_l; // Position. + + size_t local_p; + size_t local_n; + location local_l; + }; + + auto split = [this, f = optional ()] () mutable -> + const fragments& + { + if (f) + return *f; + + // Note that the code starts from the next line thus +1. + // + location gl (loc.file, loc.line + 1, 1); + + if (!separator) + { + f = fragments {0, 0, location (), 0, code.size (), gl}; + return *f; + } + + // Iterate over lines (keeping track of the current line) looking + // for the separator. + // + uint64_t l (gl.line); + for (size_t b (0), e (b), n (code.size ()); b < n; b = e + 1, l++) + { + if ((e = code.find ('\n', b)) == string::npos) + e = n; + + // Trim the line. + // + size_t tb (b), te (e); + auto ws = [] (char c) {return c == ' ' || c == '\t' || c == '\r';}; + for (; tb != te && ws (code[tb ]); ++tb) ; + for (; te != tb && ws (code[te - 1]); --te) ; + + // text << "'" << string (code, tb, te - tb) << "'"; + + if (code.compare (tb, te - tb, *separator) == 0) + { + // End the global fragment at the previous newline and start the + // local fragment at the beginning of the next line. + // + location ll (loc.file, l + 1, 1); + + if (++e >= n) + fail (ll) << "empty c++ recipe local fragment"; + + f = fragments {0, b, gl, e, n - e, ll}; + return *f; + } + } + + fail (loc) << "c++ recipe fragment separator '" << *separator + << "' not found" << endf; + }; + + bool nested (ctx.module_context == &ctx); + + // Create the build context if necessary. + // + if (ctx.module_context == nullptr) + { + if (!ctx.module_context_storage) + fail (loc) << "unable to update ad hoc recipe for target " << t << + info << "building of ad hoc recipes is disabled"; + + create_module_context (ctx, loc); + } + + // "Switch" to the module context. + // + context& ctx (*t.ctx.module_context); + + const uint16_t verbosity (3); // Project creation command verbosity. + + // Project and location signatures. + // + // Specifically, we update the project version when changing anything + // which would make the already existing projects unusable. + // + const string& lf (!loc.file.path.empty () + ? loc.file.path.string () + : loc.file.name ? *loc.file.name : string ()); + + const string psig ("# c++ " + to_string (version)); + const string lsig ("// " + lf + ':' + to_string (loc.line)); + + // Check whether we need to (re)create the project. + // + optional altn (false); // Standard naming scheme. + bool create (!is_src_root (pd, altn)); + + if (!create && (create = !check_sig (bf, psig))) + rmdir_r (ctx, pd, false, verbosity); // Never dry-run. + + path of; + ofdstream ofs; + + if (create) + try + { + const fragments& frag (split ()); + + // Write ad hoc config.build that loads the ~build2 configuration. + // This way the configuration will be always in sync with ~build2 + // and we can update the recipe manually (e.g., for debugging). + // + create_project ( + pd, + dir_path (), /* amalgamation */ + {}, /* boot_modules */ + "cxx.std = latest", /* root_pre */ + {"cxx."}, /* root_modules */ + "", /* root_post */ + string ("config"), /* config_module */ + string ("config.config.load = ~build2"), /* config_file */ + false, /* buildfile */ + "build2 core", /* who */ + verbosity); /* verbosity */ + + + // Write the rule source file. + // + of = path (pd / "rule.cxx"); + + if (verb >= verbosity) + text << (verb >= 2 ? "cat >" : "save ") << of; + + ofs.open (of); + + ofs << "#include \"location.hxx\"" << '\n' + << '\n'; + + // Include every header that can plausibly be needed by a rule. + // + // @@ TMP: any new headers to add? [Keep this note for review.] + // + ofs << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << "#include " << '\n' + << '\n'; + + // Write the global fragment, if any. Note that it always includes the + // trailing newline. + // + if (frag.global_n != 0) + { + // Use the #line directive to point diagnostics to the code in the + // buildfile. Note that there is no easy way to restore things to + // point back to the source file (other than another #line with a + // line and a file). Let's not bother for now. + // + ofs << "#line RECIPE_GLOBAL_LINE RECIPE_FILE" << '\n'; + ofs.write (code.c_str () + frag.global_p, frag.global_n); + ofs << '\n'; + } + + // Normally the recipe code will have one level of indentation so + // let's not indent the namespace level to match. + // + ofs << "namespace build2" << '\n' + << "{" << '\n' + << '\n'; + + // If we want the user to be able to supply a custom constuctor, then + // we have to give the class a predictable name (i.e., we cannot use + // id as part of its name) and put it into an unnamed namespace. One + // clever idea is to call the class `constructor` but the name could + // also be used for a custom destructor (still could work) or for name + // qualification (would definitely look bizarre). + // + // In this light the most natural name is probable `rule`. The issue + // is we already have this name in the build2 namespace (and its our + // indirect base). In fact, any name that we choose could in the + // future conflict with something in that namespace so maybe it makes + // sense to bite the bullet and pick a name that is least likely to be + // used by the user directly (can always use cxx_rule instead). + // + ofs << "namespace" << '\n' + << "{" << '\n' + << "class rule: public cxx_rule_v1" << '\n' + << "{" << '\n' + << "public:" << '\n' + << '\n'; + + // Inherit base constructor. This way the user may provide their own + // but don't have to. + // + ofs << " using cxx_rule_v1::cxx_rule_v1;" << '\n' + << '\n'; + + // An extern "C" function cannot throw which can happen in case of a + // user-defined constructor. So we need an extra level of indirection. + // We incorporate id to make sure it doesn't conflict with anything + // user-defined. + // + ofs << " static cxx_rule_v1*" << '\n' + << " create_" << id << " (const location& l, target_state s)" << '\n' + << " {" << '\n' + << " return new rule (l, s);" << '\n' + << " }" << '\n' + << '\n'; + + // Use the #line directive to point diagnostics to the code in the + // buildfile similar to the global fragment above. + // + ofs << "#line RECIPE_LOCAL_LINE RECIPE_FILE" << '\n'; + + // Note that the local fragment always includes the trailing newline. + // + ofs.write (code.c_str () + frag.local_p, frag.local_n); + ofs << "};" << '\n' + << '\n'; + + // Add an alias that we can use unambiguously in the load function. + // + ofs << "using rule_" << id << " = rule;" << '\n' + << "}" << '\n' + << '\n'; + + // Entry point. + // + ofs << "extern \"C\"" << '\n' + << "#ifdef _WIN32" << '\n' + << "__declspec(dllexport)" << '\n' + << "#endif" << '\n' + << "cxx_rule_v1* (*" << sym << " ()) (const location&, target_state)" << '\n' + << "{" << '\n' + << " return &rule_" << id << "::create_" << id << ";" << '\n' + << "}" << '\n' + << '\n'; + + ofs << "}" << '\n'; + + ofs.close (); + + + // Write buildfile. + // + of = bf; + + if (verb >= verbosity) + text << (verb >= 2 ? "cat >" : "save ") << of; + + ofs.open (of); + + ofs << "import imp_libs += build2%lib{build2}" << '\n' + << "libs{" << id << "}: cxx{rule} hxx{location} $imp_libs" << '\n' + << '\n' + << psig << '\n'; + + ofs.close (); + } + catch (const io_error& e) + { + fail << "unable to write to " << of << ": " << e; + } + + // Update the library target in the module context. + // + const target* l (nullptr); + do // Breakout loop. + { + // Load the project in the module context. + // + // Note that it's possible it has already been loaded (see above about + // the id calculation). + // + scope& rs (load_project (ctx, pd, pd, false /* forwarded */)); + + auto find_target = [&ctx, &rs, &pd, &id] () + { + const target_type* tt (rs.find_target_type ("libs")); + assert (tt != nullptr); + + const target* t ( + ctx.targets.find (*tt, pd, dir_path () /* out */, id)); + assert (t != nullptr); + + return t; + }; + + // If the project has already been loaded then, as an optimization, + // check if the target has already been updated (this will make a + // difference we if we have identical recipes in several buildfiles, + // especially to the location update that comes next). + // + if (!source_once (rs, rs, bf)) + { + l = find_target (); + + if (l->executed_state (perform_update_id) != target_state::unknown) + break; + } + + // Create/update the recipe location header. + // + // For update, preserve the file timestamp in order not to render the + // recipe out of date. + // + of = path (pd / "location.hxx"); + if (!check_sig (of, lsig)) + try + { + const fragments& frag (split ()); + + entry_time et (file_time (of)); + + if (verb >= verbosity) + text << (verb >= 2 ? "cat >" : "save ") << of; + + ofs.open (of); + + // Recipe file and line for the #line directive above. We also need + // to escape backslashes (Windows paths). + // + ofs << "#define RECIPE_FILE \"" << sanitize_strlit (lf) << '"'<< '\n'; + + if (frag.global_n != 0) + ofs << "#define RECIPE_GLOBAL_LINE " << frag.global_l.line << '\n'; + + ofs << "#define RECIPE_LOCAL_LINE " << frag.local_l.line << '\n' + << '\n' + << lsig << '\n'; + + ofs.close (); + + if (et.modification != timestamp_nonexistent) + file_time (of, et); + } + catch (const io_error& e) + { + fail << "unable to write to " << of << ": " << e; + } + catch (const system_error& e) + { + fail << "unable to get/set timestamp for " << of << ": " << e; + } + + if (nested) + { + // This means there is a perform update action already in progress + // in this context. So we are going to switch the phase and + // perform direct match and update (similar how we do this for + // generated headers). + // + // Note that since neither match nor execute are serial phases, it + // means other targets in this context can be matched and executed + // in paralellel with us. + // + if (l == nullptr) + l = find_target (); + + phase_switch mp (ctx, run_phase::match); + if (build2::match (perform_update_id, *l) != target_state::unchanged) + { + phase_switch ep (ctx, run_phase::execute); + execute (a, *l); + } + } + else + { + // Cutoff the existing diagnostics stack and push our own entry. + // + diag_frame::stack_guard diag_cutoff (nullptr); + + auto df = make_diag_frame ( + [this, &t] (const diag_record& dr) + { + dr << info (loc) << "while updating ad hoc recipe for target " + << t; + }); + + l = &update_in_module_context ( + ctx, rs, names {name (pd, "libs", id)}, + loc, bf); + } + } while (false); + + // Load the library. + // + const path& lib (l->as ().path ()); + + // Note again that it's possible the library has already been loaded + // (see above about the id calculation). + // + string err; + pair hs (load_module_library (lib, sym, err)); + + // These normally shouldn't happen unless something is seriously broken. + // + if (hs.first == nullptr) + fail (loc) << "unable to load recipe library " << lib << ": " << err; + + if (hs.second == nullptr) + fail (loc) << "unable to lookup " << sym << " in recipe library " + << lib << ": " << err; + + { + auto df = make_diag_frame ( + [this](const diag_record& dr) + { + if (verb != 0) + dr << info (loc) << "while initializing ad hoc recipe"; + }); + + load_function* lf (function_cast (hs.second)); + create_function* cf (lf ()); + + impl = cf (loc, l->executed_state (perform_update_id)); + this->impl.store (impl, memory_order_relaxed); // Still in load phase. + } + } + + return impl->match (a, t, hint); + } + + recipe adhoc_cxx_rule:: + apply (action a, target& t) const + { + return impl.load (memory_order_relaxed)->apply (a, t); + } +} diff --git a/libbuild2/rule-adhoc-cxx.hxx b/libbuild2/rule-adhoc-cxx.hxx new file mode 100644 index 0000000..00ed279 --- /dev/null +++ b/libbuild2/rule-adhoc-cxx.hxx @@ -0,0 +1,83 @@ +// file : libbuild2/rule-adhoc-cxx.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_RULE_ADHOC_CXX_HXX +#define LIBBUILD2_RULE_ADHOC_CXX_HXX + +#include +#include +#include + +#include + +#include + +namespace build2 +{ + // Ad hoc C++ rule. + // + // Note: exported but should not be used directly (i.e., registered). + // + class LIBBUILD2_SYMEXPORT cxx_rule: public rule + { + // For now this class is provided purely as an alias for rule in case the + // implementation (which is also called rule) needs to refer to something + // in its base. + }; + + class LIBBUILD2_SYMEXPORT cxx_rule_v1: public cxx_rule + { + public: + // A robust recipe may want to incorporate the recipe_state into its + // up-to-date decision as if the recipe library was a prerequisite (it + // cannot be injected as a real prerequisite since it's from a different + // build context). + // + const location recipe_loc; // Buildfile location of the recipe. + const target_state recipe_state; // State of recipe library target. + + cxx_rule_v1 (const location& l, target_state s) + : recipe_loc (l), recipe_state (s) {} + + // Return true by default. + // + virtual bool + match (action, target&, const string&) const override; + }; + + // Note: not exported. + // + class adhoc_cxx_rule: public adhoc_rule + { + public: + virtual bool + match (action, target&, const string&) const override; + + virtual recipe + apply (action, target&) const override; + + adhoc_cxx_rule (const location&, size_t, + uint64_t ver, + optional sep); + + virtual bool + recipe_text (context&, const target&, string&& t, attributes&) override; + + virtual + ~adhoc_cxx_rule () override; + + virtual void + dump_text (ostream&, string&) const override; + + public: + // Note that this recipe (rule instance) can be shared between multiple + // targets which could all be matched in parallel. + // + uint64_t version; + optional separator; + string code; + mutable atomic impl; + }; +} + +#endif // LIBBUILD2_RULE_ADHOC_CXX_HXX diff --git a/libbuild2/rule.cxx b/libbuild2/rule.cxx index ea7186c..86a6b4d 100644 --- a/libbuild2/rule.cxx +++ b/libbuild2/rule.cxx @@ -3,10 +3,6 @@ #include -#include - -#include -#include #include #include #include @@ -14,11 +10,6 @@ #include #include -#include // attributes - -#include -#include - using namespace std; using namespace butl; @@ -366,1220 +357,4 @@ namespace build2 return target_state::unchanged; } - - // adhoc_script_rule - // - bool adhoc_script_rule:: - recipe_text (context& ctx, const target& tg, string&& t, attributes& as) - { - // Handle and erase recipe-specific attributes. - // - optional diag; - for (auto i (as.begin ()); i != as.end (); ) - { - attribute& a (*i); - const string& n (a.name); - - if (n == "diag") - try - { - diag = convert (move (a.value)); - } - catch (const invalid_argument& e) - { - fail (as.loc) << "invalid " << n << " attribute value: " << e; - } - else - { - ++i; - continue; - } - - i = as.erase (i); - } - - checksum = sha256 (t).string (); - - istringstream is (move (t)); - build::script::parser p (ctx); - - script = p.pre_parse (tg, - is, loc.file, loc.line + 1, - move (diag), as.loc); - - return false; - } - - void adhoc_script_rule:: - dump_attributes (ostream& os) const - { - // For now we dump it as an attribute whether it was specified or derived - // from the script. Maybe that's ok (we use this in tests)? - // - if (script.diag_name) - { - os << " ["; - os << "diag="; - to_stream (os, name (*script.diag_name), true /* quote */, '@'); - os << ']'; - } - } - - void adhoc_script_rule:: - dump_text (ostream& os, string& ind) const - { - os << ind << string (braces, '{') << endl; - ind += " "; - - if (script.depdb_clear) - os << ind << "depdb clear" << endl; - - script::dump (os, ind, script.depdb_lines); - - if (script.diag_line) - { - os << ind; script::dump (os, *script.diag_line, true /* newline */); - } - - script::dump (os, ind, script.lines); - ind.resize (ind.size () - 2); - os << ind << string (braces, '}'); - } - - bool adhoc_script_rule:: - match (action a, target& t, const string&, optional fb) const - { - if (!fb) - ; - // If this is clean for a file target and we are supplying the update, - // then we will also supply the standard clean. - // - else if (a == perform_clean_id && - *fb == perform_update_id && - t.is_a ()) - ; - else - return false; - - // It's unfortunate we have to resort to this but we need to remember this - // in apply(). - // - t.data (fb.has_value ()); - - return true; - } - - recipe adhoc_script_rule:: - apply (action a, target& t) const - { - // If this is an outer operation (e.g., update-for-test), then delegate to - // the inner. - // - if (a.outer ()) - { - match_inner (a, t); - return execute_inner; - } - - // Derive file names for the target and its ad hoc group members, if any. - // - if (a == perform_update_id || a == perform_clean_id) - { - for (target* m (&t); m != nullptr; m = m->adhoc_member) - { - if (auto* p = m->is_a ()) - p->derive_path (); - } - } - - // Inject dependency on the output directory. - // - // We do it always instead of only if one of the targets is path-based in - // case the recipe creates temporary files or some such. - // - inject_fsdir (a, t); - - // Match prerequisites. - // - match_prerequisite_members (a, t); - - // See if we are providing the standard clean as a fallback. - // - if (t.data ()) - return &perform_clean_depdb; - - if (a == perform_update_id && t.is_a ()) - { - return [this] (action a, const target& t) - { - return perform_update_file (a, t); - }; - } - else - { - return [this] (action a, const target& t) - { - return default_action (a, t); - }; - } - } - - target_state adhoc_script_rule:: - perform_update_file (action a, const target& xt) const - { - tracer trace ("adhoc_script_rule::perform_update_file"); - - context& ctx (xt.ctx); - - const file& t (xt.as ()); - const path& tp (t.path ()); - - // How should we hash target and prerequisite sets ($> and $<)? We could - // hash them as target names (i.e., the same as the $>/< content) or as - // paths (only for path-based targets). While names feel more general, - // they are also more expensive to compute. And for path-based targets, - // path is generally a good proxy for the target name. Since the bulk of - // the ad hoc recipes will presumably be operating exclusively on - // path-based targets, let's do it both ways. - // - auto hash_target = [ns = names ()] (sha256& cs, const target& t) mutable - { - if (const path_target* pt = t.is_a ()) - cs.append (pt->path ().string ()); - else - { - ns.clear (); - t.as_name (ns); - for (const name& n: ns) - to_checksum (cs, n); - } - }; - - // Update prerequisites and determine if any of them render this target - // out-of-date. - // - timestamp mt (t.load_mtime ()); - optional ps; - - sha256 pcs, ecs; - { - // This is essentially ps=execute_prerequisites(a, t, mt) which we - // cannot use because we need to see ad hoc prerequisites. - // - size_t busy (ctx.count_busy ()); - size_t exec (ctx.count_executed ()); - - target_state rs (target_state::unchanged); - - wait_guard wg (ctx, busy, t[a].task_count); - - for (const target*& pt: t.prerequisite_targets[a]) - { - if (pt == nullptr) // Skipped. - continue; - - target_state s (execute_async (a, *pt, busy, t[a].task_count)); - - if (s == target_state::postponed) - { - rs |= s; - pt = nullptr; - } - } - - wg.wait (); - - bool e (mt == timestamp_nonexistent); - for (prerequisite_target& p: t.prerequisite_targets[a]) - { - if (p == nullptr) - continue; - - const target& pt (*p.target); - - const auto& tc (pt[a].task_count); - if (tc.load (memory_order_acquire) >= busy) - ctx.sched.wait (exec, tc, scheduler::work_none); - - target_state s (pt.executed_state (a)); - rs |= s; - - // Compare our timestamp to this prerequisite's. - // - if (!e) - { - // If this is an mtime-based target, then compare timestamps. - // - if (const mtime_target* mpt = pt.is_a ()) - { - if (mpt->newer (mt, s)) - e = true; - } - else - { - // Otherwise we assume the prerequisite is newer if it was - // changed. - // - if (s == target_state::changed) - e = true; - } - } - - if (p.adhoc) - p.target = nullptr; // Blank out. - - // As part of this loop calculate checksums that need to include ad - // hoc prerequisites (unless the script tracks changes itself). - // - if (script.depdb_clear) - continue; - - hash_target (pcs, pt); - - // The script can reference a program in one of four ways: - // - // 1. As an (imported) target (e.g., $cli) - // - // 2. As a process_path_ex (e.g., $cxx.path). - // - // 3. As a builtin (e.g., sed) - // - // 4. As a program path/name. - // - // When it comes to change tracking, there is nothing we can do for - // (4) and there is nothing to do for (3) (assuming builtin semantics - // is stable/backwards-compatible). The (2) case is handled - // automatically by hashing all the variable values referenced by the - // script (see below), which in case of process_path_ex includes the - // checksum, if available. - // - // This leaves the (1) case, which itself splits into two sub-cases: - // the target comes with the dependency information (e.g., imported - // from a project via an export stub) or it does not (e.g., imported - // as installed). We don't need to do anything extra for the first - // sub-case since the target's state/mtime can be relied upon like any - // other prerequisite. Which cannot be said about the second sub-case, - // where we reply on checksum that may be included as part of the - // target metadata. - // - // So what we are going to do is hash checksum metadata of every - // executable prerequisite target that has it (we do it here in order - // to include ad hoc prerequisites, which feels like the right thing - // to do; the user may mark tools as ad hoc in order to omit them from - // $<). - // - if (auto* e = pt.is_a ()) - { - if (auto* c = e->lookup_metadata ("checksum")) - { - ecs.append (*c); - } - } - } - - if (!e) - ps = rs; - } - - bool update (!ps); - - // We use depdb to track changes to the script itself, input/output file - // names, tools, etc. - // - depdb dd (tp + ".d"); - - // First should come the rule name/version. - // - if (dd.expect (" 1") != nullptr) - l4 ([&]{trace << "rule mismatch forcing update of " << t;}); - - // Then the script checksum. - // - // Ideally, to detect changes to the script semantics, we would hash the - // text with all the variables expanded but without executing any - // commands. In practice, this is easier said than done (think the set - // builtin that receives output of a command that modifies the - // filesystem). - // - // So as the next best thing we are going to hash the unexpanded text as - // well as values of all the variables expanded in it (which we get as a - // side effect of pre-parsing the script). This approach has a number of - // drawbacks: - // - // - We can't handle computed variable names (e.g., $($x ? X : Y)). - // - // - We may "overhash" by including variables that are actually - // script-local. - // - // - There are functions like $install.resolve() with result based on - // external (to the script) information. - // - if (dd.expect (checksum) != nullptr) - l4 ([&]{trace << "recipe text change forcing update of " << t;}); - - // Track the variables, targets, and prerequisites changes, unless the - // script doesn't track the dependency changes itself. - // - - // For each variable hash its name, undefined/null/non-null indicator, - // and the value if non-null. - // - // Note that this excludes the special $< and $> variables which we - // handle below. - // - if (!script.depdb_clear) - { - sha256 cs; - names storage; - - for (const string& n: script.vars) - { - cs.append (n); - - lookup l; - - if (const variable* var = ctx.var_pool.find (n)) - l = t[var]; - - cs.append (!l.defined () ? '\x1' : l->null ? '\x2' : '\x3'); - - if (l) - { - storage.clear (); - names_view ns (reverse (*l, storage)); - - for (const name& n: ns) - to_checksum (cs, n); - } - } - - if (dd.expect (cs.string ()) != nullptr) - l4 ([&]{trace << "recipe variable change forcing update of " << t;}); - } - - // Target and prerequisite sets ($> and $<). - // - if (!script.depdb_clear) - { - auto hash = [ns = names ()] (sha256& cs, const target& t) mutable - { - if (const path_target* pt = t.is_a ()) - cs.append (pt->path ().string ()); - else - { - ns.clear (); - t.as_name (ns); - for (const name& n: ns) - to_checksum (cs, n); - } - }; - - sha256 tcs; - for (const target* m (&t); m != nullptr; m = m->adhoc_member) - hash_target (tcs, *m); - - if (dd.expect (tcs.string ()) != nullptr) - l4 ([&]{trace << "target set change forcing update of " << t;}); - - if (dd.expect (pcs.string ()) != nullptr) - l4 ([&]{trace << "prerequisite set change forcing update of " << t;}); - } - - // Finally the programs checksum. - // - if (!script.depdb_clear) - { - if (dd.expect (ecs.string ()) != nullptr) - l4 ([&]{trace << "program checksum change forcing update of " << t;}); - } - - const scope* bs (nullptr); - const scope* rs (nullptr); - - // Execute the custom dependency change tracking commands, if present. - // - if (!script.depdb_lines.empty ()) - { - bs = &t.base_scope (); - rs = bs->root_scope (); - - // While it would have been nice to reuse the environment for both - // dependency tracking and execution, there are complications (creating - // temporary directory, etc). - // - build::script::environment e (a, t, false /* temp_dir */); - build::script::parser p (ctx); - - for (const script::line& l: script.depdb_lines) - { - names ns (p.execute_special (*rs, *bs, e, l)); - - // These should have been enforced during pre-parsing. - // - assert (!ns.empty ()); // ... - assert (l.tokens.size () > 2); // 'depdb' ... - - const string& cmd (ns[0].value); - - location loc (l.tokens[0].location ()); - - if (cmd == "hash") - { - sha256 cs; - for (auto i (ns.begin () + 1); i != ns.end (); ++i) // Skip . - to_checksum (cs, *i); - - if (dd.expect (cs.string ()) != nullptr) - l4 ([&] { - diag_record dr (trace); - dr << "'depdb hash' argument change forcing update of " << t << - info (loc); script::dump (dr.os, l); - }); - } - else if (cmd == "string") - { - string s; - try - { - s = convert (names (make_move_iterator (ns.begin () + 1), - make_move_iterator (ns.end ()))); - } - catch (const invalid_argument& e) - { - fail (l.tokens[2].location ()) - << "invalid 'depdb string' argument: " << e; - } - - if (dd.expect (s) != nullptr) - l4 ([&] { - diag_record dr (trace); - dr << "'depdb string' argument change forcing update of " - << t << - info (loc); script::dump (dr.os, l); - }); - } - else - assert (false); - } - } - - // Update if depdb mismatch. - // - if (dd.writing () || dd.mtime > mt) - update = true; - - dd.close (); - - // If nothing changed, then we are done. - // - if (!update) - return *ps; - - if (!ctx.dry_run || verb != 0) - { - if (bs == nullptr) - { - bs = &t.base_scope (); - rs = bs->root_scope (); - } - - build::script::environment e (a, t, script.temp_dir); - build::script::parser p (ctx); - - if (verb == 1) - { - if (script.diag_line) - { - text << p.execute_special (*rs, *bs, e, *script.diag_line); - } - else - { - // @@ TODO (and below): - // - // - we are printing target, not source (like in most other places) - // - // - printing of ad hoc target group (the {hxx cxx}{foo} idea) - // - // - if we are printing prerequisites, should we print all of them - // (including tools)? - // - text << *script.diag_name << ' ' << t; - } - } - - if (!ctx.dry_run || verb >= 2) - { - build::script::default_runner r; - p.execute (*rs, *bs, e, script, r); - - if (!ctx.dry_run) - dd.check_mtime (tp); - } - } - - t.mtime (system_clock::now ()); - return target_state::changed; - } - - target_state adhoc_script_rule:: - default_action (action a, const target& t) const - { - tracer trace ("adhoc_script_rule::default_action"); - - context& ctx (t.ctx); - - execute_prerequisites (a, t); - - if (!ctx.dry_run || verb != 0) - { - const scope& bs (t.base_scope ()); - const scope& rs (*bs.root_scope ()); - - build::script::environment e (a, t, script.temp_dir); - build::script::parser p (ctx); - - if (verb == 1) - { - if (script.diag_line) - { - text << p.execute_special (rs, bs, e, *script.diag_line); - } - else - { - // @@ TODO: as above - // - text << *script.diag_name << ' ' << t; - } - } - - if (!ctx.dry_run || verb >= 2) - { - build::script::default_runner r; - p.execute (rs, bs, e, script, r); - } - } - - return target_state::changed; - } - - // cxx_rule_v1 - // - bool cxx_rule_v1:: - match (action, target&, const string&) const - { - return true; - } - - // adhoc_cxx_rule - // - adhoc_cxx_rule:: - adhoc_cxx_rule (const location& l, size_t b, uint64_t v, optional s) - : adhoc_rule ("", l, b), - version (v), - separator (move (s)), - impl (nullptr) - { - if (v != 1) - fail (l) << "unsupported c++ recipe version " << v; - } - - bool adhoc_cxx_rule:: - recipe_text (context&, const target&, string&& t, attributes&) - { - code = move (t); - return true; - } - - adhoc_cxx_rule:: - ~adhoc_cxx_rule () - { - delete impl.load (memory_order_relaxed); // Serial execution. - } - - void adhoc_cxx_rule:: - dump_text (ostream& os, string& ind) const - { - // @@ TODO: indentation is multi-line recipes is off (would need to insert - // indentation after every newline). - // - os << ind << string (braces, '{') << " c++ " << version << endl - << ind << code - << ind << string (braces, '}'); - } - - // From module.cxx. - // - void - create_module_context (context&, const location&); - - const target& - update_in_module_context (context&, const scope&, names tgt, - const location&, const path& bf); - - pair - load_module_library (const path& lib, const string& sym, string& err); - - bool adhoc_cxx_rule:: - match (action a, target& t, const string& hint) const - { - tracer trace ("adhoc_cxx_rule::match"); - - context& ctx (t.ctx); - const scope& rs (t.root_scope ()); - - // The plan is to reduce this to the build system module case as much as - // possible. Specifically, we switch to the load phase, create a module- - // like library with the recipe text as a rule implementation, then build - // and load it. - // - // Since the recipe can be shared among multiple targets, several threads - // can all be trying to do this in parallel. - // - // We use the relaxed memory order here because any change must go through - // the serial load phase. In other words, all we need here is atomicity - // with ordering/visibility provided by the phase mutex. - // - cxx_rule* impl (this->impl.load (memory_order_relaxed)); - - while (impl == nullptr) // Breakout loop. - { - // Switch the phase to (serial) load and re-check. - // - phase_switch ps (ctx, run_phase::load); - - if ((impl = this->impl.load (memory_order_relaxed)) != nullptr) - break; - - using create_function = cxx_rule_v1* (const location&, target_state); - using load_function = create_function* (); - - // The only way to guarantee that the name of our module matches its - // implementation is to based the name on the implementation hash (plus - // the language, in case we support other compiled implementations in - // the future). - // - // Unfortunately, this means we will be creating a new project (and - // leaving behind the old one as garbage) for every change to the - // recipe. On the other hand, if the recipe is moved around unchanged, - // we will reuse the same project. In fact, two different recipes (e.g., - // in different buildfiles) with the same text will share the project. - // - // The fact that we don't incorporate the recipe location into the hash - // but include it in the source (in the form of the #line directive; see - // below) has its own problems. If we do nothing extra here, then if a - // "moved" but otherwise unchanged recipe is updated (for example, - // because of changes in the build system core), then we may end up with - // bogus location in the diagnostics. - // - // The straightforward solution would be to just update the location in - // the source code if it has changed. This, however, will lead to - // unnecessary and probably surprising recompilations since any line - // count change before the recipe will trigger this update. One key - // observation here is that we need accurate location information only - // if we are going to recompile the recipe but the change to location - // itself does not render the recipe out of date. So what we going to do - // is factor the location information into its own small header and then - // keep it up-to-date without changing its modification time. - // - // This works well if the project is not shared by multiple recipes. - // However, if we have recipes in several buildfiles with identical - // text, then the location information may end up yo-yo'ing depending on - // which recipe got here first. - // - // There doesn't seem to be much we can do about it without incurring - // other drawbacks/overheads. So the answer is for the user to use an ad - // hoc rule with the common implementation instead of a bunch of - // duplicate recipes. - // - string id; - { - sha256 cs; - cs.append ("c++"); - cs.append (separator ? *separator : ""); - cs.append (code); - id = cs.abbreviated_string (12); - } - - dir_path pd (rs.out_path () / - rs.root_extra->build_dir / - recipes_build_dir /= id); - - path bf (pd / std_buildfile_file); - - string sym ("load_" + id); - - // Check whether the file exists and its last line matches the specified - // signature. - // - // Note: we use the last instead of the first line for extra protection - // against incomplete writes. - // - auto check_sig = [] (const path& f, const string& s) -> bool - { - try - { - if (!file_exists (f)) - return false; - - ifdstream ifs (f); - - string l; - while (ifs.peek () != ifdstream::traits_type::eof ()) - getline (ifs, l); - - return l == s; - } - catch (const io_error& e) - { - fail << "unable to read " << f << ": " << e << endf; - } - catch (const system_error& e) - { - fail << "unable to access " << f << ": " << e << endf; - } - }; - - // Calculate (and cache) the global/local fragments split. - // - struct fragments - { - size_t global_p; // Start position. - size_t global_n; // Length (0 if no global fragment). - location global_l; // Position. - - size_t local_p; - size_t local_n; - location local_l; - }; - - auto split = [this, f = optional ()] () mutable -> - const fragments& - { - if (f) - return *f; - - // Note that the code starts from the next line thus +1. - // - location gl (loc.file, loc.line + 1, 1); - - if (!separator) - { - f = fragments {0, 0, location (), 0, code.size (), gl}; - return *f; - } - - // Iterate over lines (keeping track of the current line) looking - // for the separator. - // - uint64_t l (gl.line); - for (size_t b (0), e (b), n (code.size ()); b < n; b = e + 1, l++) - { - if ((e = code.find ('\n', b)) == string::npos) - e = n; - - // Trim the line. - // - size_t tb (b), te (e); - auto ws = [] (char c) {return c == ' ' || c == '\t' || c == '\r';}; - for (; tb != te && ws (code[tb ]); ++tb) ; - for (; te != tb && ws (code[te - 1]); --te) ; - - // text << "'" << string (code, tb, te - tb) << "'"; - - if (code.compare (tb, te - tb, *separator) == 0) - { - // End the global fragment at the previous newline and start the - // local fragment at the beginning of the next line. - // - location ll (loc.file, l + 1, 1); - - if (++e >= n) - fail (ll) << "empty c++ recipe local fragment"; - - f = fragments {0, b, gl, e, n - e, ll}; - return *f; - } - } - - fail (loc) << "c++ recipe fragment separator '" << *separator - << "' not found" << endf; - }; - - bool nested (ctx.module_context == &ctx); - - // Create the build context if necessary. - // - if (ctx.module_context == nullptr) - { - if (!ctx.module_context_storage) - fail (loc) << "unable to update ad hoc recipe for target " << t << - info << "building of ad hoc recipes is disabled"; - - create_module_context (ctx, loc); - } - - // "Switch" to the module context. - // - context& ctx (*t.ctx.module_context); - - const uint16_t verbosity (3); // Project creation command verbosity. - - // Project and location signatures. - // - // Specifically, we update the project version when changing anything - // which would make the already existing projects unusable. - // - const string& lf (!loc.file.path.empty () - ? loc.file.path.string () - : loc.file.name ? *loc.file.name : string ()); - - const string psig ("# c++ " + to_string (version)); - const string lsig ("// " + lf + ':' + to_string (loc.line)); - - // Check whether we need to (re)create the project. - // - optional altn (false); // Standard naming scheme. - bool create (!is_src_root (pd, altn)); - - if (!create && (create = !check_sig (bf, psig))) - rmdir_r (ctx, pd, false, verbosity); // Never dry-run. - - path of; - ofdstream ofs; - - if (create) - try - { - const fragments& frag (split ()); - - // Write ad hoc config.build that loads the ~build2 configuration. - // This way the configuration will be always in sync with ~build2 - // and we can update the recipe manually (e.g., for debugging). - // - create_project ( - pd, - dir_path (), /* amalgamation */ - {}, /* boot_modules */ - "cxx.std = latest", /* root_pre */ - {"cxx."}, /* root_modules */ - "", /* root_post */ - string ("config"), /* config_module */ - string ("config.config.load = ~build2"), /* config_file */ - false, /* buildfile */ - "build2 core", /* who */ - verbosity); /* verbosity */ - - - // Write the rule source file. - // - of = path (pd / "rule.cxx"); - - if (verb >= verbosity) - text << (verb >= 2 ? "cat >" : "save ") << of; - - ofs.open (of); - - ofs << "#include \"location.hxx\"" << '\n' - << '\n'; - - // Include every header that can plausibly be needed by a rule. - // - // @@ TMP: any new headers to add? [Keep this note for review.] - // - ofs << "#include " << '\n' - << "#include " << '\n' - << "#include " << '\n' - << '\n' - << "#include " << '\n' - << "#include " << '\n' - << "#include " << '\n' - << "#include " << '\n' - << "#include " << '\n' - << "#include " << '\n' - << "#include " << '\n' - << "#include " << '\n' - << "#include " << '\n' - << "#include " << '\n' - << '\n'; - - // Write the global fragment, if any. Note that it always includes the - // trailing newline. - // - if (frag.global_n != 0) - { - // Use the #line directive to point diagnostics to the code in the - // buildfile. Note that there is no easy way to restore things to - // point back to the source file (other than another #line with a - // line and a file). Let's not bother for now. - // - ofs << "#line RECIPE_GLOBAL_LINE RECIPE_FILE" << '\n'; - ofs.write (code.c_str () + frag.global_p, frag.global_n); - ofs << '\n'; - } - - // Normally the recipe code will have one level of indentation so - // let's not indent the namespace level to match. - // - ofs << "namespace build2" << '\n' - << "{" << '\n' - << '\n'; - - // If we want the user to be able to supply a custom constuctor, then - // we have to give the class a predictable name (i.e., we cannot use - // id as part of its name) and put it into an unnamed namespace. One - // clever idea is to call the class `constructor` but the name could - // also be used for a custom destructor (still could work) or for name - // qualification (would definitely look bizarre). - // - // In this light the most natural name is probable `rule`. The issue - // is we already have this name in the build2 namespace (and its our - // indirect base). In fact, any name that we choose could in the - // future conflict with something in that namespace so maybe it makes - // sense to bite the bullet and pick a name that is least likely to be - // used by the user directly (can always use cxx_rule instead). - // - ofs << "namespace" << '\n' - << "{" << '\n' - << "class rule: public cxx_rule_v1" << '\n' - << "{" << '\n' - << "public:" << '\n' - << '\n'; - - // Inherit base constructor. This way the user may provide their own - // but don't have to. - // - ofs << " using cxx_rule_v1::cxx_rule_v1;" << '\n' - << '\n'; - - // An extern "C" function cannot throw which can happen in case of a - // user-defined constructor. So we need an extra level of indirection. - // We incorporate id to make sure it doesn't conflict with anything - // user-defined. - // - ofs << " static cxx_rule_v1*" << '\n' - << " create_" << id << " (const location& l, target_state s)" << '\n' - << " {" << '\n' - << " return new rule (l, s);" << '\n' - << " }" << '\n' - << '\n'; - - // Use the #line directive to point diagnostics to the code in the - // buildfile similar to the global fragment above. - // - ofs << "#line RECIPE_LOCAL_LINE RECIPE_FILE" << '\n'; - - // Note that the local fragment always includes the trailing newline. - // - ofs.write (code.c_str () + frag.local_p, frag.local_n); - ofs << "};" << '\n' - << '\n'; - - // Add an alias that we can use unambiguously in the load function. - // - ofs << "using rule_" << id << " = rule;" << '\n' - << "}" << '\n' - << '\n'; - - // Entry point. - // - ofs << "extern \"C\"" << '\n' - << "#ifdef _WIN32" << '\n' - << "__declspec(dllexport)" << '\n' - << "#endif" << '\n' - << "cxx_rule_v1* (*" << sym << " ()) (const location&, target_state)" << '\n' - << "{" << '\n' - << " return &rule_" << id << "::create_" << id << ";" << '\n' - << "}" << '\n' - << '\n'; - - ofs << "}" << '\n'; - - ofs.close (); - - - // Write buildfile. - // - of = bf; - - if (verb >= verbosity) - text << (verb >= 2 ? "cat >" : "save ") << of; - - ofs.open (of); - - ofs << "import imp_libs += build2%lib{build2}" << '\n' - << "libs{" << id << "}: cxx{rule} hxx{location} $imp_libs" << '\n' - << '\n' - << psig << '\n'; - - ofs.close (); - } - catch (const io_error& e) - { - fail << "unable to write to " << of << ": " << e; - } - - // Update the library target in the module context. - // - const target* l (nullptr); - do // Breakout loop. - { - // Load the project in the module context. - // - // Note that it's possible it has already been loaded (see above about - // the id calculation). - // - scope& rs (load_project (ctx, pd, pd, false /* forwarded */)); - - auto find_target = [&ctx, &rs, &pd, &id] () - { - const target_type* tt (rs.find_target_type ("libs")); - assert (tt != nullptr); - - const target* t ( - ctx.targets.find (*tt, pd, dir_path () /* out */, id)); - assert (t != nullptr); - - return t; - }; - - // If the project has already been loaded then, as an optimization, - // check if the target has already been updated (this will make a - // difference we if we have identical recipes in several buildfiles, - // especially to the location update that comes next). - // - if (!source_once (rs, rs, bf)) - { - l = find_target (); - - if (l->executed_state (perform_update_id) != target_state::unknown) - break; - } - - // Create/update the recipe location header. - // - // For update, preserve the file timestamp in order not to render the - // recipe out of date. - // - of = path (pd / "location.hxx"); - if (!check_sig (of, lsig)) - try - { - const fragments& frag (split ()); - - entry_time et (file_time (of)); - - if (verb >= verbosity) - text << (verb >= 2 ? "cat >" : "save ") << of; - - ofs.open (of); - - // Recipe file and line for the #line directive above. We also need - // to escape backslashes (Windows paths). - // - ofs << "#define RECIPE_FILE \"" << sanitize_strlit (lf) << '"'<< '\n'; - - if (frag.global_n != 0) - ofs << "#define RECIPE_GLOBAL_LINE " << frag.global_l.line << '\n'; - - ofs << "#define RECIPE_LOCAL_LINE " << frag.local_l.line << '\n' - << '\n' - << lsig << '\n'; - - ofs.close (); - - if (et.modification != timestamp_nonexistent) - file_time (of, et); - } - catch (const io_error& e) - { - fail << "unable to write to " << of << ": " << e; - } - catch (const system_error& e) - { - fail << "unable to get/set timestamp for " << of << ": " << e; - } - - if (nested) - { - // This means there is a perform update action already in progress - // in this context. So we are going to switch the phase and - // perform direct match and update (similar how we do this for - // generated headers). - // - // Note that since neither match nor execute are serial phases, it - // means other targets in this context can be matched and executed - // in paralellel with us. - // - if (l == nullptr) - l = find_target (); - - phase_switch mp (ctx, run_phase::match); - if (build2::match (perform_update_id, *l) != target_state::unchanged) - { - phase_switch ep (ctx, run_phase::execute); - execute (a, *l); - } - } - else - { - // Cutoff the existing diagnostics stack and push our own entry. - // - diag_frame::stack_guard diag_cutoff (nullptr); - - auto df = make_diag_frame ( - [this, &t] (const diag_record& dr) - { - dr << info (loc) << "while updating ad hoc recipe for target " - << t; - }); - - l = &update_in_module_context ( - ctx, rs, names {name (pd, "libs", id)}, - loc, bf); - } - } while (false); - - // Load the library. - // - const path& lib (l->as ().path ()); - - // Note again that it's possible the library has already been loaded - // (see above about the id calculation). - // - string err; - pair hs (load_module_library (lib, sym, err)); - - // These normally shouldn't happen unless something is seriously broken. - // - if (hs.first == nullptr) - fail (loc) << "unable to load recipe library " << lib << ": " << err; - - if (hs.second == nullptr) - fail (loc) << "unable to lookup " << sym << " in recipe library " - << lib << ": " << err; - - { - auto df = make_diag_frame ( - [this](const diag_record& dr) - { - if (verb != 0) - dr << info (loc) << "while initializing ad hoc recipe"; - }); - - load_function* lf (function_cast (hs.second)); - create_function* cf (lf ()); - - impl = cf (loc, l->executed_state (perform_update_id)); - this->impl.store (impl, memory_order_relaxed); // Still in load phase. - } - } - - return impl->match (a, t, hint); - } - - recipe adhoc_cxx_rule:: - apply (action a, target& t) const - { - return impl.load (memory_order_relaxed)->apply (a, t); - } } diff --git a/libbuild2/rule.hxx b/libbuild2/rule.hxx index f3581c8..8390ea7 100644 --- a/libbuild2/rule.hxx +++ b/libbuild2/rule.hxx @@ -12,8 +12,6 @@ #include #include -#include - #include namespace build2 @@ -178,109 +176,6 @@ namespace build2 static target_state clean_recipes_build (action, const scope&, const dir&); }; - - // Ad hoc script rule. - // - // Note: not exported and should not be used directly (i.e., registered). - // - class adhoc_script_rule: public adhoc_rule - { - public: - virtual bool - match (action, target&, const string&, optional) const override; - - virtual recipe - apply (action, target&) const override; - - target_state - perform_update_file (action, const target&) const; - - target_state - default_action (action, const target&) const; - - adhoc_script_rule (const location& l, size_t b) - : adhoc_rule ("", l, b) {} - - virtual bool - recipe_text (context&, const target&, string&&, attributes&) override; - - virtual void - dump_attributes (ostream&) const override; - - virtual void - dump_text (ostream&, string&) const override; - - public: - using script_type = build::script::script; - - script_type script; - string checksum; // Script text hash. - }; - - // Ad hoc C++ rule. - // - // Note: exported but should not be used directly (i.e., registered). - // - class LIBBUILD2_SYMEXPORT cxx_rule: public rule - { - // For now this class is provided purely as an alias for rule in case the - // implementation (which is also called rule) needs to refer to something - // in its base. - }; - - class LIBBUILD2_SYMEXPORT cxx_rule_v1: public cxx_rule - { - public: - // A robust recipe may want to incorporate the recipe_state into its - // up-to-date decision as if the recipe library was a prerequisite (it - // cannot be injected as a real prerequisite since it's from a different - // build context). - // - const location recipe_loc; // Buildfile location of the recipe. - const target_state recipe_state; // State of recipe library target. - - cxx_rule_v1 (const location& l, target_state s) - : recipe_loc (l), recipe_state (s) {} - - // Return true by default. - // - virtual bool - match (action, target&, const string&) const override; - }; - - // Note: not exported. - // - class adhoc_cxx_rule: public adhoc_rule - { - public: - virtual bool - match (action, target&, const string&) const override; - - virtual recipe - apply (action, target&) const override; - - adhoc_cxx_rule (const location&, size_t, - uint64_t ver, - optional sep); - - virtual bool - recipe_text (context&, const target&, string&& t, attributes&) override; - - virtual - ~adhoc_cxx_rule () override; - - virtual void - dump_text (ostream&, string&) const override; - - public: - // Note that this recipe (rule instance) can be shared between multiple - // targets which could all be matched in parallel. - // - uint64_t version; - optional separator; - string code; - mutable atomic impl; - }; } #endif // LIBBUILD2_RULE_HXX -- cgit v1.1